【问题标题】:How to get the output from YOLO model using tensorflow with C++ correctly?如何正确使用带有 C++ 的 tensorflow 从 YOLO 模型中获取输出?
【发布时间】:2020-04-27 20:06:15
【问题描述】:

我正在尝试使用 C++ 中的 YOLO 模型编写推理程序。我搜索了一些关于darknet的信息,但是它必须使用.cfg文件来导入模型结构(这对我来说有点太复杂了......),所以我想用tensorflow做这个程序。

(我的模型权重从.hdf5(用于python)转换为.pb(用于C++))

我找到了一些用python写的例子,好像他们在推理过程之前做了一些工作......Source

def yolo_eval(yolo_outputs,
              anchors,
              num_classes,
              image_shape,
              max_boxes=50,
              score_threshold=.6,
              iou_threshold=.5):

    """Evaluate YOLO model on given input and return filtered boxes."""
    num_layers = len(yolo_outputs)
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] # default setting
    input_shape = K.shape(yolo_outputs[0])[1:3] * 32
    boxes = []
    box_scores = []
    for l in range(num_layers):
        _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
            anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
        boxes.append(_boxes)
        box_scores.append(_box_scores)
    boxes = K.concatenate(boxes, axis=0)
    box_scores = K.concatenate(box_scores, axis=0)

    mask = box_scores >= score_threshold
    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
    boxes_ = []
    scores_ = []
    classes_ = []
    for c in range(num_classes):
        # TODO: use keras backend instead of tf.
        class_boxes = tf.boolean_mask(boxes, mask[:, c])
        class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
        nms_index = tf.image.non_max_suppression(
            class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
        class_boxes = K.gather(class_boxes, nms_index)
        class_box_scores = K.gather(class_box_scores, nms_index)
        classes = K.ones_like(class_box_scores, 'int32') * c
        boxes_.append(class_boxes)
        scores_.append(class_box_scores)
        classes_.append(classes)
    boxes_ = K.concatenate(boxes_, axis=0)
    scores_ = K.concatenate(scores_, axis=0)
    classes_ = K.concatenate(classes_, axis=0)

    return boxes_, scores_, classes_

我已经打印出返回值 它看起来像这样

boxes-> Tensor("concat_11:0", shape=(?, 4), dtype=float32)

scores-> Tensor("concat_12:0", shape=(?,), dtype=float32)

classes-> Tensor("concat_13:0", shape=(?,), dtype=int32)

我的 YOLO 模型(.hdf5)的原始输出是(我通过打印输出得到这个 model.output

tf.Tensor 'conv2d_59_1/BiasAdd:0' shape=(?, ?, ?, 21) dtype=float32

tf.Tensor 'conv2d_67_1/BiasAdd:0' shape=(?, ?, ?, 21) dtype=float32

tf.Tensor 'conv2d_75_1/BiasAdd:0' shape=(?, ?, ?, 21) dtype=float32

而python代码的推理部分是

out_boxes, out_scores, out_classes = sess.run(
                                    [boxes, scores, classes],
                                    feed_dict={
                                        yolo_model.input: image_data,
                                        input_image_shape: [image.size[1], image.size[0]],
                                        K.learning_phase(): 0
                                    })

对比python版本的推理代码, C++ 部分是... (Reference)

int main()
{
    string image = "test.jpg";
    string graph = "yolo_weight.pb";
    string labels = "coco.names";
    int32 input_width = 416;
    int32 input_height = 416;
    float input_mean = 0;
    float input_std = 255;
    string input_layer = "input_1:0";
    std::vector<std::string> output_layer = {"conv2d_59/BiasAdd:0", "conv2d_67/BiasAdd:0", "conv2d_75/BiasAdd:0" };

    std::unique_ptr<tensorflow::Session> session;
    string graph_path = tensorflow::io::JoinPath(root_dir, graph);
    Status load_graph_status = LoadGraph(graph_path, &session);

    std::vector<Tensor> resized_tensors;
    string image_path = tensorflow::io::JoinPath(root_dir, image);
    Status read_tensor_status = ReadTensorFromImageFile(image_path, input_height, input_width, 
    input_mean, input_std, &resized_tensors);

    Tensor inpTensor = Tensor(DT_FLOAT, TensorShape({ 1, input_height, input_width, 3 }));
    std::vector<Tensor> outputs;
    cv::Mat srcImage = cv::imread(image);
    cv::resize(srcImage, srcImage, cv::Size(input_width, input_height));
    srcImage.convertTo(srcImage, CV_32FC3);
    srcImage = srcImage / 255;  
    string ty = type2str(srcImage.type());
    float *p = (&inpTensor)->flat<float>().data();
    cv::Mat tensorMat(input_height, input_width, CV_32FC3, p);
    srcImage.convertTo(tensorMat, CV_32FC3);
    Status run_status = session->Run({{ input_layer, inpTensor }}, { output_layer }, {}, &outputs);
    int cc = 1;
    auto output_detection_class = outputs[0].tensor<float, 4>();
    std::cout << "detection scores" << std::endl;
    std::cout << "typeid(output_detection_scoreclass).name->" << typeid(output_detection_class).name() << std::endl;
    for (int i = 0; i < 13; ++i)
    {
        for (int j = 0; j < 13; ++j)
        {
            for (int k = 0; k < 21; ++k)
            {
                // using (index_1, index_2, index_3) to access the element in a tensor
                printf("i->%d, j->%d, k->%d\t", i, j, k);
                std::cout << output_detection_class(1, i, j, k) << "\t";
                cc += 1;
                if (cc % 4 == 0)
                {
                    std::cout << "\n";
                }
            }
        }
        std::cout << std::endl;
    }
    return 0;
}

c++版本推断部分的输出是

outputs.size()-> 3

输出[0].shape()-> [1,13,13,21]

输出[1].shape()-> [1,26,26,21]

输出[2].shape()-> [1,52,52,21]

但是我得到的输出很奇怪......

(outputs[0] 的输出值看起来不像是分数、类或坐标中的任何一个...)

所以我想知道是不是因为我错过了在推断之前用 python 编写的部分?还是我使用错误的方式获取输出数据?

我已经检查了一些相关的问题和答案...

1.Yolo v3 model output clarification with keras

2.Convert YoloV3 output to coordinates of bounding box, label and confidence

3.How to access tensorflow::Tensor C++

但我还是不知道怎么做:(

我还找到了一个repo,这可能会有所帮助, 我看了一下它的yolo.cpp,但是它的模型输出张量的形状和我的不一样,我不确定我是否可以直接修改代码,它的输出张量是

tf.Tensor 'import/output:0' shape=(?, 735) dtype = float32

感谢任何帮助或建议...

【问题讨论】:

    标签: python c++ tensorflow keras yolo


    【解决方案1】:

    如果您仍在为此苦苦挣扎,我看不出您将 Sigmoid 和 Exp 应用于输出层值的位置。

    你可以看看这篇论文,它描述了如何处理输出。

    https://medium.com/analytics-vidhya/yolo-v3-theory-explained-33100f6d193

    【讨论】:

    • 抱歉回复晚了,我已经想出了解决这个问题的另一种方法,有空我会更新答案。 :D
    【解决方案2】:

    正如 Bryan 所说,输出层仍然需要执行一些操作。

    所以在我的情况下(根据this repo),我将它添加到 YOLO 类(在文件yolo.py)中,以便在保存模型时添加这些后处理:

    def output_pb(self, out_dir, out_pb):
    
        out_bx = self.boxes.name.split(":")[0]
        out_sc = self.scores.name.split(":")[0]
        out_cs = self.classes.name.split(":")[0]
        print(out_bx, out_sc, out_cs)
        frozen_graph = tf.graph_util.remove_training_nodes(tf.graph_util.convert_variables_to_constants(self.sess, self.sess.graph.as_graph_def(), [out_bx, out_sc, out_cs]))
        tf.io.write_graph(frozen_graph, out_dir, out_pb, as_text=False)
        print("===== FINISH saving new pb file =====")
    

    保存模型时,我这样调用函数:

    yolo = YOLO(**config)
    yolo.output_pb(output_dir, output_pb_name)
    

    在 C++ 中进行推理时, 整个过程是这样的:

    // initialize model
    YOLO* YOLO_data = (YOLO*)Init_DllODM_object(config);
    // do some stuff to set data in YOLO_data
    cv::Mat input_pic = "whatever_pic.png";
    predict(YOLO_data, input_pic, YOLO_data ->bbox_res, YOLO_data ->score_res, YOLO_data ->class_res);
    // draw result on pic
    cv::Mat res = show_result(YOLO_data, input_pic);
    

    详细代码在这里:

    // yolo_cpp.h
    
    struct YOLO
    {
        float score_thres;
        std::vector<int> class_res;
        std::vector<float> bbox_res, score_res;
    
        std::string inp_tensor_name;
        std::string placeholder_name;
        std::vector<std::string> out_tensors;
        Session* session;
    
        Tensor t, inpTensor;
        std::vector<tensorflow::Tensor> outTensor;
    
        std::vector<int> MD_size;
        std::vector<int> inp_pic_size;
        std::vector<std::string> md_class_list;
        std::vector<cv::Scalar> color_list;
        int show_score;
        int score_type;
        int return_origin;
    };
    
    // yolo_cpp.cpp
    
    void* Init_DllODM_object(json config)
    {
        std::string model_path = config["model"].get<std::string>();
        YOLO* YOLO_data = new YOLO();
        auto options = tensorflow::SessionOptions();
        GraphDef graphdef;
        // loading model to graph
        Status status_load = ReadBinaryProto(Env::Default(), model_path, &graphdef);
    
        options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction(0.7);
        options.config.mutable_gpu_options()->set_allow_growth(true);
    
        int node_count = graphdef.node_size();
        for (int i = 0; i < node_count; i++)
        {
            auto n = graphdef.node(i);
            if (n.name().find("input_") != string::npos)
            {
                YOLO_data->inp_tensor_name = n.name();
            }
            else if (n.name().find("Placeholder_") != string::npos)
            {
                YOLO_data->placeholder_name = n.name();
            }
            else if (i == node_count - 5)
            {
                YOLO_data->out_tensors.push_back(n.name());
            }
            else if (i == node_count - 3)
            {
                YOLO_data->out_tensors.push_back(n.name());
            }
            else if (i == node_count - 1)
            {
                YOLO_data->out_tensors.push_back(n.name());
            }
    
        }
        if (!status_load.ok()) {
            std::cout << "ERROR: Loading model failed..." << std::endl;
            std::cout << model_path << status_load.ToString() << "\n";
        }
    
        std::vector<int> MD_size_ = config["input_size"];
        YOLO_data->MD_size = MD_size_;
        std::vector<int> inp_pic_size_ = config["input_pic_size"];
        YOLO_data->inp_pic_size = inp_pic_size_;
    
        YOLO_data->inpTensor = Tensor(DT_FLOAT, TensorShape({ 1, YOLO_data->MD_size[0], YOLO_data->MD_size[1], 3 }));  // input tensor
        YOLO_data->t = Tensor(DT_FLOAT, TensorShape({ 2 }));
        //ref: https://stackoverflow.com/questions/36804714/define-a-feed-dict-in-c-for-tensorflow-models
        auto t_matrix = YOLO_data->t.tensor<float, 1>();
        t_matrix(0) = YOLO_data->inp_pic_size[0];
        t_matrix(1) = YOLO_data->inp_pic_size[1];
        // create session
        Status status_newsess = NewSession(options, &YOLO_data->session); //for the usage of gpu setting
        Status status_create = YOLO_data->session->Create(graphdef);
        if (!status_create.ok()) {
            std::cout << "ERROR: Creating graph in session failed.." << status_create.ToString() << std::endl;
        }
        else {
            std::cout << "----------- Successfully created session and load graph -------------" << std::endl;
        }
    
        return YOLO_data;
    }
    
    int predict(YOLO* YOLO_, cv::Mat srcImage, std::vector<float>& bbox_res, std::vector<float>& score_res, std::vector<int>& class_res)
    {
        // read image -> input image
        if (srcImage.empty())   // check if image can open correctly
        {
            std::cout << "can't open the image!!!!!!!" << std::endl;
            int res = -1;
            return res;
        }
        // ref: https://ppt.cc/f7ERNx
        std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
            { YOLO_->inp_tensor_name, YOLO_->inpTensor },
            { YOLO_->placeholder_name, YOLO_->t },
        };
        srcImage = letterbox_image(srcImage, YOLO_->MD_size[0], YOLO_->MD_size[1]);
        convertCVMatToTensor(YOLO_, srcImage);
        Status status_run = YOLO_->session->Run({ inputs }, { YOLO_->out_tensors }, {}, &YOLO_->outTensor);
        if (!status_run.ok()) {
            std::cout << "ERROR: RUN failed..." << std::endl;
            std::cout << status_run.ToString() << "\n";
            int res = -1;
            return res;
        }
    
        TTypes<float>::Flat pp1 = YOLO_->outTensor[0].flat<float>();
        TTypes<float>::Flat pp2 = YOLO_->outTensor[1].flat<float>();
        TTypes<int>::Flat pp3 = YOLO_->outTensor[2].flat<int>();
        int pp1_idx;
    
        for (int i = 0; i < pp2.size(); i++)
        {
            pp1_idx = i * 4;
            bbox_res.push_back(pp1(pp1_idx));
            bbox_res.push_back(pp1(pp1_idx + 1));
            bbox_res.push_back(pp1(pp1_idx + 2));
            bbox_res.push_back(pp1(pp1_idx + 3));
            score_res.push_back(pp2(i));
            class_res.push_back(pp3(i));
        }
    
        return 0;
    }
    
    cv::Mat show_result(YOLO* inf_obj, cv::Mat inp_pic)
    {
        int bbox_idx;
        std::string plot_str;
        bool under_thresh = false;
        std::vector<int> del_idx;
        for (int i = 0; i < inf_obj->class_res.size(); i++)
        {
            int y_min, y_max, x_min, x_max;
            bbox_idx = i * 4;
            y_min = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx] + 0.5));
            x_min = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx + 1] + 0.5));
            y_max = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx + 2] + 0.5));
            x_max = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx + 3] + 0.5));
            //std::cout << md_class_list[class_res[i]] << ", ";
            //std::cout << score_res[i] << ",";
            //std::cout << "[" << x_min << ", " << y_min << ", " << x_max << ", " << y_max << "]\n";
    
            if (inf_obj->show_score)
            {
                if (inf_obj->score_type)
                    plot_str = inf_obj->md_class_list[inf_obj->class_res[i]] + ", " + std::to_string(rounding(inf_obj->score_res[i] * 100, 2)).substr(0, 5) + "%";
                else
                    plot_str = inf_obj->md_class_list[inf_obj->class_res[i]] + ", " + std::to_string(rounding(inf_obj->score_res[i], 2)).substr(0, 4);
            }
    
            else
                plot_str = inf_obj->md_class_list[inf_obj->class_res[i]];
    
            if (inf_obj->score_res[i] >= inf_obj->score_thres)
            {
                inp_pic = plot_one_box(inp_pic, x_min, y_min, x_max, y_max, plot_str, inf_obj->color_list[inf_obj->class_res[i]]);
            }
            else
            {
                //std::cout << "score_res[i]->" << score_res[i] << "under thresh!!" << std::endl;
                under_thresh = true;
                del_idx.push_back(i);
            }
        }
    
        if (under_thresh)
        {
            //std::cout << "*** deleting element" << std::endl;
            for (int x = 0; x < del_idx.size(); x++)
            {
                bbox_idx = (del_idx[x] - x) * 4;
                inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx + 3);
                inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx + 2);
                inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx + 1);
                inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx);
                inf_obj->score_res.erase(inf_obj->score_res.begin() + del_idx[x] - x);
                inf_obj->class_res.erase(inf_obj->class_res.begin() + del_idx[x] - x);
            }
            del_idx.clear();
        }
    
        return inp_pic;
    }
    

    由于我的代码是用于dll的,所以我是这样安排的。 还有一些我没有删除的冗余代码, 但我认为到目前为止,整个过程都可以使用这些提供的代码来完成。 希望对您有所帮助:D

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 2022-08-19
      • 1970-01-01
      • 2019-05-16
      • 2021-10-13
      • 2017-02-23
      • 1970-01-01
      • 2021-03-18
      相关资源
      最近更新 更多