使用 TensorFlow 进行对象检测答案

【问题标题】：Object Detection using Tensorflow使用 TensorFlow 进行对象检测
【发布时间】：2017-08-18 06:57:34
【问题描述】：

我正在关注 Oxford-IIIT Pets Dataset 的 TensorFlow 对象检测教程：https://github.com/tensorflow/models/blob/master/object_detection/g3doc/running_pets.md

我已经从最新的检查点成功生成了“frozen_inference_graph.pb”。如何在图像上测试推理图 - “frozen_inference_graph.pb”和宠物标签 - “pet_label_map.pbtxt”。

我尝试使用 jupytor notebook，但在图像中没有检测到任何东西。我还使用以下 python 代码来检测“狗”和“猫”，但没有检测到任何东西。 Python代码如下：

import os
import cv2
import time
import argparse
import multiprocessing
import numpy as np
import tensorflow as tf

from utils import FPS, WebcamVideoStream
from multiprocessing import Queue, Pool
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

PATH_TO_CKPT = os.path.join('frozen_inference_graph.pb')

PATH_TO_LABELS = os.path.join('pet_label_map.pbtxt')

NUM_CLASSES = 37

label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
                                                            use_display_name=True)
category_index = label_map_util.create_category_index(categories)


def detect_objects(image_np, sess, detection_graph):
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0)
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

    # Each box represents a part of the image where a particular object was detected.
    boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
    scores = detection_graph.get_tensor_by_name('detection_scores:0')
    classes = detection_graph.get_tensor_by_name('detection_classes:0')
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')

    # Actual detection.
    (boxes, scores, classes, num_detections) = sess.run(
        [boxes, scores, classes, num_detections],
        feed_dict={image_tensor: image_np_expanded})

    # Visualization of the results of a detection.
    vis_util.visualize_boxes_and_labels_on_image_array(
        image_np,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        line_thickness=8)
    return image_np


def worker(input_q, output_q):
    # Load a (frozen) Tensorflow model into memory.
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

        sess = tf.Session(graph=detection_graph)
    frame = input_q.get()
    output_q.put(detect_objects(frame, sess, detection_graph))

    sess.close()


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-src', '--source', dest='video_source', type=int,
                        default=0, help='Device index of the camera.')
    parser.add_argument('-wd', '--width', dest='width', type=int,
                        default=20, help='Width of the frames in the video stream.')
    parser.add_argument('-ht', '--height', dest='height', type=int,
                        default=20, help='Height of the frames in the video stream.')
    parser.add_argument('-num-w', '--num-workers', dest='num_workers', type=int,
                        default=2, help='Number of workers.')
    parser.add_argument('-q-size', '--queue-size', dest='queue_size', type=int,
                        default=5, help='Size of the queue.')
    args = parser.parse_args()

    logger = multiprocessing.log_to_stderr()
    logger.setLevel(multiprocessing.SUBDEBUG)

    input_q = Queue(maxsize=args.queue_size)
    output_q = Queue(maxsize=args.queue_size)
    pool = Pool(args.num_workers, worker, (input_q, output_q))


    frame = cv2.imread("image2.jpg");

    input_q.put(frame)


    cv2.imshow('Video', output_q.get())


    cv2.waitKey(0)
    cv2.destroyAllWindows()

如果没有检测到任何关于在实际图像上运行推理图或调试的帮助，我们将不胜感激。

【问题讨论】：

标签： python tensorflow

【解决方案1】：

如果您使用的是 Tensorflow API，请转到文件夹 models/research，在那里打开一个控制台。

在 research 文件夹中运行命令protoc object_detection/protos/*.proto --python_out=.，然后运行export PYTHONPATH=$PYTHONPATH:pwd:pwd/slim。

然后运行cd object_detection 更改控制台中的文件夹并在当前文件夹中打开jupyter notebook。

在 jupyter notebook 的主页中找到文件object_detection_tutorial.ipynb，修改它以适合您的用途。

【讨论】：

【解决方案2】：

box、scores 和 classes 的输出是什么？你能打印出来吗？如果您从他们那里得到数字，也许您只需要更改代码中的几行即可正确可视化结果。

为了测试，你可以使用：

        vis_util.save_image_array_as_png(image,'./outputImg.png')
        #print(image.shape)
        print('image saved')
        img=mpimg.imread('./outputImg.png')
        imgplot = plt.imshow(img)
        plt.show()

【讨论】：