从边界框注释python自动裁剪图像答案

【问题标题】：Automatic image cropping from bounding boxes annotation python从边界框注释python自动裁剪图像
【发布时间】：2020-08-12 06:42:55
【问题描述】：

我正在开发用于图像识别的 CNN。我有一组不同的图像，在每张图像中我都有一组不同的边界框（每张图像至少有 3 个边界框）。

我想自动提取边界框内的部分，然后对其进行裁剪，得到一组与每个边界框内容对应的裁剪图像。我创建了一个 voc xml 和一个累积的 .csv 文件，其中包含每个图像的所有详细信息，这里是一个摘录：

,filepath,x1,x2,y1,y2,class_name
0,71.jpeg,81,118,98,122,os
1,71.jpeg,120,156,83,110,od
2,71.jpeg,107,161,136,154,m

基本上，我在专用文件夹 (\train_images) 和注释文件中有提到的 .jpeg 格式的图像。你有处理这个问题的快速实现吗？

谢谢

【问题讨论】：

你试过用 opencv 或 scikit-image 来裁剪你的图片吗？

标签： python python-imaging-library crop bounding-box

【解决方案1】：

如果有人仍在寻找答案，您可以查看以下脚本：

此脚本将裁剪每个边界框并自动将它们保存到相应的class 文件夹中

from PIL import Image
import ast
import os
import cv2
import os
import glob
import xml.etree.ElementTree as ET

original_file = './images/' #you images directory
dst = './save/'


def check_folder_exists(path):
        if not os.path.exists(path):
            try:
                os.makedirs(path)
                print ('create ' + path)
            except OSError as e:
                if e.errno != errno.EEXIST:
                    raise




seed_arr = []
for xml_file in glob.glob('./labels/*.xml'): #your xml directory 
    root = ET.parse(xml_file).getroot()
    filename = root.find('filename').text

    for type_tag in root.findall('size'):
        #file_name = type_tag.find('filename').text
        width = type_tag.find('width').text
        height = type_tag.find('height').text

    for type_tag in root.findall('object'):
        class_name = type_tag.find('name').text
        xmin = type_tag.find('bndbox/xmin').text
        ymin = type_tag.find('bndbox/ymin').text
        xmax = type_tag.find('bndbox/xmax').text
        ymax = type_tag.find('bndbox/ymax').text
        all_list = [filename, width,height,class_name,xmin, ymin, xmax,ymax]

        seed_arr.append(all_list)
    
seed_arr.sort()
#print(str(len(seed_arr)))
#print(str(seed_arr))


for index, line in enumerate(seed_arr):
    filename = line[0]
    width = line[1]
    height = line[2]
    class_name = line[3]
    xmin = line[4]
    ymin = line[5]
    xmax = line[6]
    ymax = line[7]
    

#print(len(class_name))
    

    
    load_img_path = os.path.join(original_file, filename)
    #save img path

#save img path----------
    save_class_path = os.path.join(dst, class_name)
    check_folder_exists(save_class_path)
    save_img_path = os.path.join(save_class_path, str(index)+'_'+filename)
    
    img = Image.open(load_img_path)
    crop_img = img.crop((int(xmin) ,int(ymin) ,int(xmax) ,int(ymax)))
    newsize = (224, 224) 
    im1 = crop_img.resize(newsize) 
    im1.save(save_img_path, 'JPEG')
    print('save ' + save_img_path)

https://github.com/Laudarisd/Project_Root/blob/master/Data-preprocessing/img_manuplating/crop_from_xml/crop_from_xml.py

如果您尝试从 csv 进行裁剪，请查看此链接：

https://github.com/Laudarisd/Project_Root/blob/master/Data-preprocessing/img_manuplating/crop_from_csv_bbox/crop_image_from_csv.py

【讨论】：

【解决方案2】：

你可以只返回一个元组数组，然后迭代它，如下所示。

#crop images
import numpy as np # linear algebra
import xml.etree.ElementTree as ET # for parsing XML
import matplotlib.pyplot as plt # to show images
from PIL import Image # to read images
import os
import glob

root_images="/content/images"
root_annots="/content/annotation"

all_images=os.listdir("/content/images/")
print(f"Total images : {len(all_images)}")

breeds = glob.glob('/content/annotation/')
annotation=[]
for b in breeds:
    annotation+=glob.glob(b+"/*")
print(f"Total annotation : {len(annotation)}")

breed_map={}
for annot in annotation:
    breed=annot.split("/")[-2]
    index=breed.split("-")[0]
    breed_map.setdefault(index,breed)
    
print(f"Total Breeds : {len(breed_map)}")

def bounding_box(image):
retval = []
    #bpath=root_annots+str(breed_map[image.split("_")[0]])+"/"+str(image.split(".")[0])
    #print (bpath)
    #print(root_annots)
    #print (str(breed_map[image.split("_")[0]]))
    #print (str(image.split(".")[0]))
    bpath=root_annots+"/"+str(image.split(".")[0]+".xml")
    tree = ET.parse(bpath)
    root = tree.getroot()
    objects = root.findall('object')
    
    for o in objects:
        bndbox = o.find('bndbox') # reading bound box
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)
        retval.append(tuple((xmin, ymin, xmax, ymax)))
        
        
    return retval
    
plt.figure(figsize=(10,10))
bbox=[]
for i,image in enumerate(all_images):
    bboxarray=bounding_box(image)
    for x,bbox in enumerate(bboxarray):
        bbox=bounding_box(image) 
        print(bbox)  
        im=Image.open(os.path.join(root_images,image))
        im=im.crop(bbox)           
        im.save(f'/content/results_imgs/{i}-{x}.jpeg')

【讨论】：

【解决方案3】：

我找到了一个 git 存储库，可以从 Pascal VOC 图像（带有由 LabelImg 生成的边界框的图像）中检测到的对象的所有边界框创建裁剪图像： https://github.com/giovannicimolin/PascalVOC-to-Images

代码运行良好。希望它能帮助您解决问题。

【讨论】：

【解决方案4】：

我假设您想要裁剪边界框的图像。你可以简单地使用一个 numpy 数组：

请在此处找到一个工作示例。

import matplotlib.pyplot as plt

mydic = {
  "annotations": [
  {
    "class": "rect",
    "height": 98,
    "width": 113,
    "x": 177,
    "y": 12
  },
  {
    "class": "rect",
    "height": 80,
    "width": 87,
    "x": 373,
    "y": 43
  }
 ],
   "class": "image",
   "filename": "https://i.stack.imgur.com/9qe6z.png"
}


def crop(dic, i):
    image = plt.imread(dic["filename"])
    x0 = dic["annotations"][i]["x"]
    y0 = dic["annotations"][i]["y"]
    width = dic["annotations"][i]["width"]
    height = dic["annotations"][i]["height"]
    return image[y0:y0+height , x0:x0+width, :]


fig = plt.figure()
ax = fig.add_subplot(121)
ax.imshow(plt.imread(mydic["filename"]))

ax1 = fig.add_subplot(222)
ax1.imshow(crop(mydic, 0))

ax2 = fig.add_subplot(224)
ax2.imshow(crop(mydic, 1))

plt.show()

注意：这不是我的代码，但我之前在搜索相同问题时发现了它。

【讨论】：

【解决方案5】：

好吧，我找到了使用以下代码提取裁剪图像的方法：

#crop images
import numpy as np # linear algebra
import xml.etree.ElementTree as ET # for parsing XML
import matplotlib.pyplot as plt # to show images
from PIL import Image # to read images
import os
import glob

root_images="/content/images"
root_annots="/content/annotation"

all_images=os.listdir("/content/images/")
print(f"Total images : {len(all_images)}")

breeds = glob.glob('/content/annotation/')
annotation=[]
for b in breeds:
    annotation+=glob.glob(b+"/*")
print(f"Total annotation : {len(annotation)}")

breed_map={}
for annot in annotation:
    breed=annot.split("/")[-2]
    index=breed.split("-")[0]
    breed_map.setdefault(index,breed)
    
print(f"Total Breeds : {len(breed_map)}")

def bounding_box(image):
    #bpath=root_annots+str(breed_map[image.split("_")[0]])+"/"+str(image.split(".")[0])
    #print (bpath)
    #print(root_annots)
    #print (str(breed_map[image.split("_")[0]]))
    #print (str(image.split(".")[0]))
    bpath=root_annots+"/"+str(image.split(".")[0]+".xml")
    tree = ET.parse(bpath)
    root = tree.getroot()
    objects = root.findall('object')
    
    for o in objects:
        bndbox = o.find('bndbox') # reading bound box
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)
        
        
    return (xmin,ymin,xmax,ymax)
    
plt.figure(figsize=(10,10))
bbox=[]
for i,image in enumerate(all_images):
    bbox=bounding_box(image) 
    print(bbox)  
    im=Image.open(os.path.join(root_images,image))
    im=im.crop(bbox)           
    im.save('/content/results_imgs/{}.jpeg'.format(i,im))

但是，如果您运行此代码，它只会从每个 xml 中的多个边界框中提取一个图像。 我应该如何修改它以便从每个 xml 中注释的多个边界框中获取所有图像？

【讨论】：