【问题标题】:Automatic image cropping from bounding boxes annotation python从边界框注释python自动裁剪图像
【发布时间】:2020-08-12 06:42:55
【问题描述】:

我正在开发用于图像识别的 CNN。我有一组不同的图像,在每张图像中我都有一组不同的边界框(每张图像至少有 3 个边界框)。

我想自动提取边界框内的部分,然后对其进行裁剪,得到一组与每个边界框内容对应的裁剪图像。我创建了一个 voc xml 和一个累积的 .csv 文件,其中包含每个图像的所有详细信息,这里是一个摘录:

,filepath,x1,x2,y1,y2,class_name
0,71.jpeg,81,118,98,122,os
1,71.jpeg,120,156,83,110,od
2,71.jpeg,107,161,136,154,m

基本上,我在专用文件夹 (\train_images) 和注释文件中有提到的 .jpeg 格式的图像。你有处理这个问题的快速实现吗?

谢谢

【问题讨论】:

  • 你试过用 opencv 或 scikit-image 来裁剪你的图片吗?

标签: python python-imaging-library crop bounding-box


【解决方案1】:

如果有人仍在寻找答案,您可以查看以下脚本:

此脚本将裁剪每个边界框并自动将它们保存到相应的class 文件夹中

from PIL import Image
import ast
import os
import cv2
import os
import glob
import xml.etree.ElementTree as ET

original_file = './images/' #you images directory
dst = './save/'


def check_folder_exists(path):
        if not os.path.exists(path):
            try:
                os.makedirs(path)
                print ('create ' + path)
            except OSError as e:
                if e.errno != errno.EEXIST:
                    raise




seed_arr = []
for xml_file in glob.glob('./labels/*.xml'): #your xml directory 
    root = ET.parse(xml_file).getroot()
    filename = root.find('filename').text

    for type_tag in root.findall('size'):
        #file_name = type_tag.find('filename').text
        width = type_tag.find('width').text
        height = type_tag.find('height').text

    for type_tag in root.findall('object'):
        class_name = type_tag.find('name').text
        xmin = type_tag.find('bndbox/xmin').text
        ymin = type_tag.find('bndbox/ymin').text
        xmax = type_tag.find('bndbox/xmax').text
        ymax = type_tag.find('bndbox/ymax').text
        all_list = [filename, width,height,class_name,xmin, ymin, xmax,ymax]

        seed_arr.append(all_list)
    
seed_arr.sort()
#print(str(len(seed_arr)))
#print(str(seed_arr))


for index, line in enumerate(seed_arr):
    filename = line[0]
    width = line[1]
    height = line[2]
    class_name = line[3]
    xmin = line[4]
    ymin = line[5]
    xmax = line[6]
    ymax = line[7]
    

#print(len(class_name))
    

    
    load_img_path = os.path.join(original_file, filename)
    #save img path

#save img path----------
    save_class_path = os.path.join(dst, class_name)
    check_folder_exists(save_class_path)
    save_img_path = os.path.join(save_class_path, str(index)+'_'+filename)
    
    img = Image.open(load_img_path)
    crop_img = img.crop((int(xmin) ,int(ymin) ,int(xmax) ,int(ymax)))
    newsize = (224, 224) 
    im1 = crop_img.resize(newsize) 
    im1.save(save_img_path, 'JPEG')
    print('save ' + save_img_path)

https://github.com/Laudarisd/Project_Root/blob/master/Data-preprocessing/img_manuplating/crop_from_xml/crop_from_xml.py

如果您尝试从 csv 进行裁剪,请查看此链接:

https://github.com/Laudarisd/Project_Root/blob/master/Data-preprocessing/img_manuplating/crop_from_csv_bbox/crop_image_from_csv.py

【讨论】:

    【解决方案2】:

    你可以只返回一个元组数组,然后迭代它,如下所示。

    #crop images
    import numpy as np # linear algebra
    import xml.etree.ElementTree as ET # for parsing XML
    import matplotlib.pyplot as plt # to show images
    from PIL import Image # to read images
    import os
    import glob
    
    root_images="/content/images"
    root_annots="/content/annotation"
    
    all_images=os.listdir("/content/images/")
    print(f"Total images : {len(all_images)}")
    
    breeds = glob.glob('/content/annotation/')
    annotation=[]
    for b in breeds:
        annotation+=glob.glob(b+"/*")
    print(f"Total annotation : {len(annotation)}")
    
    breed_map={}
    for annot in annotation:
        breed=annot.split("/")[-2]
        index=breed.split("-")[0]
        breed_map.setdefault(index,breed)
        
    print(f"Total Breeds : {len(breed_map)}")
    
    def bounding_box(image):
    retval = []
        #bpath=root_annots+str(breed_map[image.split("_")[0]])+"/"+str(image.split(".")[0])
        #print (bpath)
        #print(root_annots)
        #print (str(breed_map[image.split("_")[0]]))
        #print (str(image.split(".")[0]))
        bpath=root_annots+"/"+str(image.split(".")[0]+".xml")
        tree = ET.parse(bpath)
        root = tree.getroot()
        objects = root.findall('object')
        
        for o in objects:
            bndbox = o.find('bndbox') # reading bound box
            xmin = int(bndbox.find('xmin').text)
            ymin = int(bndbox.find('ymin').text)
            xmax = int(bndbox.find('xmax').text)
            ymax = int(bndbox.find('ymax').text)
            retval.append(tuple((xmin, ymin, xmax, ymax)))
            
            
        return retval
        
    plt.figure(figsize=(10,10))
    bbox=[]
    for i,image in enumerate(all_images):
        bboxarray=bounding_box(image)
        for x,bbox in enumerate(bboxarray):
            bbox=bounding_box(image) 
            print(bbox)  
            im=Image.open(os.path.join(root_images,image))
            im=im.crop(bbox)           
            im.save(f'/content/results_imgs/{i}-{x}.jpeg') 

    【讨论】:

      【解决方案3】:

      我找到了一个 git 存储库,可以从 Pascal VOC 图像(带有由 LabelImg 生成的边界框的图像)中检测到的对象的所有边界框创建裁剪图像: https://github.com/giovannicimolin/PascalVOC-to-Images

      代码运行良好。希望它能帮助您解决问题。

      【讨论】:

        【解决方案4】:

        我假设您想要裁剪边界框的图像。你可以简单地使用一个 numpy 数组:

        请在此处找到一个工作示例。

        import matplotlib.pyplot as plt
        
        mydic = {
          "annotations": [
          {
            "class": "rect",
            "height": 98,
            "width": 113,
            "x": 177,
            "y": 12
          },
          {
            "class": "rect",
            "height": 80,
            "width": 87,
            "x": 373,
            "y": 43
          }
         ],
           "class": "image",
           "filename": "https://i.stack.imgur.com/9qe6z.png"
        }
        
        
        def crop(dic, i):
            image = plt.imread(dic["filename"])
            x0 = dic["annotations"][i]["x"]
            y0 = dic["annotations"][i]["y"]
            width = dic["annotations"][i]["width"]
            height = dic["annotations"][i]["height"]
            return image[y0:y0+height , x0:x0+width, :]
        
        
        fig = plt.figure()
        ax = fig.add_subplot(121)
        ax.imshow(plt.imread(mydic["filename"]))
        
        ax1 = fig.add_subplot(222)
        ax1.imshow(crop(mydic, 0))
        
        ax2 = fig.add_subplot(224)
        ax2.imshow(crop(mydic, 1))
        
        plt.show()
        

        注意:这不是我的代码,但我之前在搜索相同问题时发现了它。

        【讨论】:

          【解决方案5】:

          好吧,我找到了使用以下代码提取裁剪图像的方法:

          #crop images
          import numpy as np # linear algebra
          import xml.etree.ElementTree as ET # for parsing XML
          import matplotlib.pyplot as plt # to show images
          from PIL import Image # to read images
          import os
          import glob
          
          root_images="/content/images"
          root_annots="/content/annotation"
          
          all_images=os.listdir("/content/images/")
          print(f"Total images : {len(all_images)}")
          
          breeds = glob.glob('/content/annotation/')
          annotation=[]
          for b in breeds:
              annotation+=glob.glob(b+"/*")
          print(f"Total annotation : {len(annotation)}")
          
          breed_map={}
          for annot in annotation:
              breed=annot.split("/")[-2]
              index=breed.split("-")[0]
              breed_map.setdefault(index,breed)
              
          print(f"Total Breeds : {len(breed_map)}")
          
          def bounding_box(image):
              #bpath=root_annots+str(breed_map[image.split("_")[0]])+"/"+str(image.split(".")[0])
              #print (bpath)
              #print(root_annots)
              #print (str(breed_map[image.split("_")[0]]))
              #print (str(image.split(".")[0]))
              bpath=root_annots+"/"+str(image.split(".")[0]+".xml")
              tree = ET.parse(bpath)
              root = tree.getroot()
              objects = root.findall('object')
              
              for o in objects:
                  bndbox = o.find('bndbox') # reading bound box
                  xmin = int(bndbox.find('xmin').text)
                  ymin = int(bndbox.find('ymin').text)
                  xmax = int(bndbox.find('xmax').text)
                  ymax = int(bndbox.find('ymax').text)
                  
                  
              return (xmin,ymin,xmax,ymax)
              
          plt.figure(figsize=(10,10))
          bbox=[]
          for i,image in enumerate(all_images):
              bbox=bounding_box(image) 
              print(bbox)  
              im=Image.open(os.path.join(root_images,image))
              im=im.crop(bbox)           
              im.save('/content/results_imgs/{}.jpeg'.format(i,im)) 

          但是,如果您运行此代码,它只会从每个 xml 中的多个边界框中提取一个图像。 我应该如何修改它以便从每个 xml 中注释的多个边界框中获取所有图像?

          【讨论】:

            猜你喜欢
            • 1970-01-01
            • 2018-10-24
            • 2019-01-31
            • 1970-01-01
            • 1970-01-01
            • 2020-12-26
            • 1970-01-01
            • 1970-01-01
            • 2022-01-23
            相关资源
            最近更新 更多