在使用python的图像中围绕整个文本块绘制边界框答案

【问题标题】：Draw bounding boxding box around whole block of text In image using python在使用python的图像中围绕整个文本块绘制边界框
【发布时间】：2021-10-29 08:43:42
【问题描述】：

我有图像，我已经消除了噪音（背景中的点），我想在图像中的文本块周围绘制一个边界框我如何使用 python OpenCV 来做到这一点

Input image

Noise Removed Image

这是用于消除背景噪音的代码，我可以更改以保存带有文本周围边界框的图像

import cv2
import matplotlib.pyplot as plt
import glob
import os
def remove_dots(image_path,outdir):
    image = cv2.imread(image_path)
    mask = np.zeros(image.shape, dtype=np.uint8)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (3,3), 0)
    thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,51,9)

    # Create horizontal kernel then dilate to connect text contours
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
    dilate = cv2.dilate(thresh, kernel, iterations=2)

    # Find contours and filter out noise using contour approximation and area filtering
    cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.04 * peri, True)
        x,y,w,h = cv2.boundingRect(c)
        area = w * h
        ar = w / float(h)
        if area > 1200 and area < 50000 and ar <8:
            cv2.drawContours(mask, [c], -1, (255,255,255), -1)
    # Bitwise-and input image and mask to get result
    mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
    result = cv2.bitwise_and(image, image, mask=mask)
    result[mask==0] = (255,255,255) # Color background white


    cv2.imwrite(os.path.join(outdir,os.path.basename(image_path)),result)
    
for jpgfile in glob.glob(r'C:\custom\TableDetectionWork\text_detection_dataset/*'):
    print(jpgfile)
    remove_dots(jpgfile,r'C:\custom\TableDetectionWork\textdetect/')

【问题讨论】：

标签： python opencv image-processing computer-vision opencv-contour

【解决方案1】：

这里是修改代码的核心，以在 Python/OpenCV 中执行您想要的操作。它只是将我的代码添加到代码的末尾。

输入：

import cv2
import numpy as np
    
image = cv2.imread("john.jpg")
mask = np.zeros(image.shape, dtype=np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,51,9)

# Create horizontal kernel then dilate to connect text contours
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
dilate = cv2.dilate(thresh, kernel, iterations=2)

# Find contours and filter out noise using contour approximation and area filtering
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    peri = cv2.arcLength(c, True)
    approx = cv2.approxPolyDP(c, 0.04 * peri, True)
    x,y,w,h = cv2.boundingRect(c)
    area = w * h
    ar = w / float(h)
    if area > 1200 and area < 50000 and ar <8:
        cv2.drawContours(mask, [c], -1, (255,255,255), -1)
        
# Bitwise-and input image and mask to get result
mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
result = cv2.bitwise_and(image, image, mask=mask)
result[mask==0] = (255,255,255) # Color background white

# NEW CODE HERE TO END _____________________________________________________________
gray2 = cv2.cvtColor(result, cv2.COLOR_BGR2GRAY)
thresh2 = cv2.threshold(gray2, 128, 255, cv2.THRESH_BINARY)[1]
thresh2 = 255 - thresh2
kernel = np.ones((5 ,191), np.uint8)
close = cv2.morphologyEx(thresh2, cv2.MORPH_CLOSE, kernel)

# get external contours
contours = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]

# draw contours
result2 = result.copy()
for cntr in contours:
    # get bounding boxes
    pad = 10
    x,y,w,h = cv2.boundingRect(cntr)
    cv2.rectangle(result2, (x-pad, y-pad), (x+w+pad, y+h+pad), (0, 0, 255), 4)

cv2.imwrite("john_bboxes.jpg", result2)

cv2.imshow("mask",mask)
cv2.imshow("thresh",thresh)
cv2.imshow("dilate",dilate)
cv2.imshow("result",result)
cv2.imshow("gray2",gray2)
cv2.imshow("thresh2",thresh2)
cv2.imshow("close",close)
cv2.imshow("result2",result2)

cv2.waitKey(0)
cv2.destroyAllWindows()

结果的边界框：

【讨论】：

【解决方案2】：

您可以通过使用水平形态过滤器来合并蒙版图像中的字母来做到这一点。然后找到轮廓。然后获取边界框。

输入：

import cv2
import numpy as np

img = cv2.imread("john.jpg")

# convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# threshold
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]

# invert
thresh = 255 - thresh

# apply horizontal morphology close
kernel = np.ones((5 ,191), np.uint8)
morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

# get external contours
contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]

# draw contours
result = img.copy()
for cntr in contours:
    # get bounding boxes
    pad = 10
    x,y,w,h = cv2.boundingRect(cntr)
    cv2.rectangle(result, (x-pad, y-pad), (x+w+pad, y+h+pad), (0, 0, 255), 4)

# save result
cv2.imwrite("john_bbox.png",result)

# display result
cv2.imshow("thresh", thresh)
cv2.imshow("morph", morph)
cv2.imshow("result", result)
cv2.waitKey(0)
cv2.destroyAllWindows()

形态闭合图像：

边界框图像：

【讨论】：

我已经采用了这种方法，但我想在上面的代码中做同样的事情，我可以改变它吗？
我不明白。你的代码有什么问题，为什么你不能在你的代码中这样做。你画出轮廓。但是您应该绘制边界框。除此之外，我看不出您的代码有什么问题。请进一步解释。
在我的代码中，我尝试通过放置 cv2.rectangle(result, (x, y), (x+w, y+h), (0, 0, 255), 3 来保存图像) 在 cv2.drawContours(mask, [c], -1, (255,255,255), -1) 之后，但它不起作用。没有使用边界框保存图像
在您的蒙版和结果图像上使用 cv2.imshow() 以查看它们是否符合预期。在每个步骤中使用 imshow() 并确保每个步骤都能产生您期望的结果。
计算 if 条件保留了多少轮廓。也许你把它们都扔掉了。