关于caffe转的rknn模型c++代码无法显示结果

[复制链接] · 发表于 2020-6-11 15:01:42

你好，我自己的caffe模型转换好后，用py测试结果正确。
用咱们提供的rknn_ssd.cpp代码测试咱们自己的mobilenet ssd结果也是正常的，但是：
（1）修改rknn_ssd.cpp里面的类别个数，以及模型路径等去测试咱们提供的caffe文件夹中的vgg ssd时候不会显示任何结果；
（2）修改rknn_ssd.cpp里面的类别个数，以及模型路径等却测试我自己的mobilenet ssd结果也完全不搭边。
（3）我看咱们这个rknn_ssd.cpp代码是TensorFlow文件夹中的mobilenet ssd的测试demo，是否caffe的模型不能用？
（4）可以的话，麻烦可否一下以下几个宏的具体含义。
      #define NUM_RESULTS 2183          prior box个数/4？
      #define NUM_CLASSES 21             类别个数

      #define Y_SCALE 10.0f
      #define X_SCALE 10.0f
      #define H_SCALE 5.0f
      #define W_SCALE 5.0f
代码如下：

#include <stdio.h>

#include <stdint.h>

#include <stdlib.h>

#include <fstream>

#include <iostream>

#include <sys/time.h>



#include "rknn_api.h"

#include "opencv2/core/core.hpp"

#include "opencv2/imgproc/imgproc.hpp"

#include "opencv2/highgui/highgui.hpp"



#include <time.h>



using namespace std;

using namespace cv;



#define NUM_RESULTS         2183

#define NUM_CLASSES         21



#define Y_SCALE  10.0f

#define X_SCALE  10.0f

#define H_SCALE  5.0f

#define W_SCALE  5.0f



Scalar colorArray[10] = {

        Scalar(139,   0,   0, 255),

        Scalar(139,   0, 139, 255),

        Scalar(  0,   0, 139, 255),

        Scalar(  0, 100,   0, 255),

        Scalar(139, 139,   0, 255),

        Scalar(209, 206,   0, 255),

        Scalar(  0, 127, 255, 255),

        Scalar(139,  61,  72, 255),

        Scalar(  0, 255,   0, 255),

        Scalar(255,   0,   0, 255),

};



float MIN_SCORE = 0.7f;



float NMS_THRESHOLD = 0.45f;





int loadLabelName(string locationFilename, string* labels) {

    ifstream fin(locationFilename);

    string line;

    int lineNum = 0;

    while(getline(fin, line))

    {

        labels[lineNum] = line;

        lineNum++;

    }

    return 0;

}



int loadCoderOptions(string locationFilename, float (*boxPriors)[NUM_RESULTS])

{

    const char *d = ", ";

    ifstream fin(locationFilename);

    string line;

    int lineNum = 0;

    while(getline(fin, line))

    {

        char *line_str = const_cast<char *>(line.c_str());

        char *p;

        p = strtok(line_str, d);

        int priorIndex = 0;

        while (p) {

            float number = static_cast<float>(atof(p));

            boxPriors[lineNum][priorIndex++] = number;

            p=strtok(nullptr, d);

        }

        if (priorIndex != NUM_RESULTS) {

            return -1;

        }

        lineNum++;

    }

    return 0;



}



float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1, float ymax1) {

    float w = max(0.f, min(xmax0, xmax1) - max(xmin0, xmin1));

    float h = max(0.f, min(ymax0, ymax1) - max(ymin0, ymin1));

    float i = w * h;

    float u = (xmax0 - xmin0) * (ymax0 - ymin0) + (xmax1 - xmin1) * (ymax1 - ymin1) - i;

    return u <= 0.f ? 0.f : (i / u);

}



float expit(float x) {

    return (float) (1.0 / (1.0 + exp(-x)));

}



void decodeCenterSizeBoxes(float* predictions, float (*boxPriors)[NUM_RESULTS]) {



    for (int i = 0; i < NUM_RESULTS; ++i) {

        float ycenter = predictions[i*4+0] / Y_SCALE * boxPriors[2][i] + boxPriors[0][i];

        float xcenter = predictions[i*4+1] / X_SCALE * boxPriors[3][i] + boxPriors[1][i];

        float h = (float) exp(predictions[i*4 + 2] / H_SCALE) * boxPriors[2][i];

        float w = (float) exp(predictions[i*4 + 3] / W_SCALE) * boxPriors[3][i];



        float ymin = ycenter - h / 2.0f;

        float xmin = xcenter - w / 2.0f;

        float ymax = ycenter + h / 2.0f;

        float xmax = xcenter + w / 2.0f;



        predictions[i*4 + 0] = ymin;

        predictions[i*4 + 1] = xmin;

        predictions[i*4 + 2] = ymax;

        predictions[i*4 + 3] = xmax;

    }

}



int scaleToInputSize(float * outputClasses, int (*output)[NUM_RESULTS], int numClasses)

{

    int validCount = 0;

    // Scale them back to the input size.

    for (int i = 0; i < NUM_RESULTS; ++i) {

        float topClassScore = static_cast<float>(-1000.0);

        int topClassScoreIndex = -1;



        // Skip the first catch-all class.

        for (int j = 1; j < numClasses; ++j) {

            float score = expit(outputClasses[i*numClasses+j]);

            if (score > topClassScore) {

                topClassScoreIndex = j;

                topClassScore = score;

            }

        }



        if (topClassScore >= MIN_SCORE) {

            output[0][validCount] = i;

            output[1][validCount] = topClassScoreIndex;

            ++validCount;

        }

    }



    return validCount;

}



int nms(int validCount, float* outputLocations, int (*output)[NUM_RESULTS])

{

    for (int i=0; i < validCount; ++i) {

        if (output[0][i] == -1) {

            continue;

        }

        int n = output[0][i];

        for (int j=i + 1; j<validCount; ++j) {

            int m = output[0][j];

            if (m == -1) {

                continue;

            }

            float xmin0 = outputLocations[n*4 + 1];

            float ymin0 = outputLocations[n*4 + 0];

            float xmax0 = outputLocations[n*4 + 3];

            float ymax0 = outputLocations[n*4 + 2];



            float xmin1 = outputLocations[m*4 + 1];

            float ymin1 = outputLocations[m*4 + 0];

            float xmax1 = outputLocations[m*4 + 3];

            float ymax1 = outputLocations[m*4 + 2];



            float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);



            if (iou >= NMS_THRESHOLD) {

                output[0][j] = -1;

            }

        }

    }



    return 0;

}



int main(int argc, char** argv)

{

    const char *img_path = "./vgg_ssd/road_300x300.jpg";

    const char *model_path = "./vgg_ssd/vgg_ssd.rknn";

    const char *label_path = "./vgg_ssd/label.txt";

    const char *box_priors_path = "./vgg_ssd/mbox_priorbox_97.txt";



    //const char *img_path = "/tmp/road.bmp";

    //const char *model_path = "/tmp/mobilenet_ssd.rknn";

    //const char *label_path = "/tmp/coco_labels_list.txt";

    //const char *box_priors_path = "/tmp/box_priors.txt";



    const int img_width = 300;

    const int img_height = 300;

    const int img_channels = 3;

    const int input_index = 0;      // node name "Preprocessor/sub"



    const int output_elems1 = NUM_RESULTS * 4;

    const uint32_t output_size1 = output_elems1 * sizeof(float);

    const int output_index1 = 0;    // node name "concat"



    const int output_elems2 = NUM_RESULTS * NUM_CLASSES;

    const uint32_t output_size2 = output_elems2 * sizeof(float);

    const int output_index2 = 1;    // node name "concat_1"



    // Load image

    cv::Mat img = cv::imread(img_path, 1);

    if(!img.data) {

        printf("cv::imread %s fail!\n", img_path);

        return -1;

    }

    if(img.cols != img_width || img.rows != img_height)

        cv::resize(img, img, cv::Size(img_width, img_height), (0, 0), (0, 0), cv::INTER_LINEAR);



    // Load model

    FILE *fp = fopen(model_path, "rb");

    if(fp == NULL) {

        printf("fopen %s fail!\n", model_path);

        return -1;

    }

    fseek(fp, 0, SEEK_END);

    int model_len = ftell(fp);

    void *model = malloc(model_len);

    fseek(fp, 0, SEEK_SET);

    if(model_len != fread(model, 1, model_len, fp)) {

        printf("fread %s fail!\n", model_path);

        free(model);

        return -1;

    }



    // Start Inference

    rknn_input inputs[1];

    rknn_output outputs[2];

    rknn_tensor_attr outputs_attr[2];



    int ret = 0;

    rknn_context ctx = 0;



    ret = rknn_init(&ctx, model, model_len, RKNN_FLAG_PRIOR_MEDIUM);

    if(ret < 0) {

        printf("rknn_init fail! ret=%d\n", ret);

        goto Error;

    }



    outputs_attr[0].index = 0;

    ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(outputs_attr[0]), sizeof(outputs_attr[0]));

    if(ret < 0) {

        printf("rknn_query fail! ret=%d\n", ret);

        goto Error;

    }



    outputs_attr[1].index = 1;

    ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(outputs_attr[1]), sizeof(outputs_attr[1]));

    if(ret < 0) {

        printf("rknn_query fail! ret=%d\n", ret);

        goto Error;

    }



    inputs[0].index = input_index;

    inputs[0].buf = img.data;

    inputs[0].size = img_width * img_height * img_channels;

    inputs[0].pass_through = false;

    inputs[0].type = RKNN_TENSOR_UINT8;

    inputs[0].fmt = RKNN_TENSOR_NHWC;

    ret = rknn_inputs_set(ctx, 1, inputs);

    if(ret < 0) {

        printf("rknn_input_set fail! ret=%d\n", ret);

        goto Error;

    }



    ret = rknn_run(ctx, nullptr);

    if(ret < 0) {

        printf("rknn_run fail! ret=%d\n", ret);

        goto Error;

    }



    outputs[0].want_float = true;

    outputs[0].is_prealloc = false;

    outputs[1].want_float = true;

    outputs[1].is_prealloc = false;

     



    clock_t start,finish;

    double totaltime;

    start = clock();

    ret = rknn_outputs_get(ctx, 2, outputs, nullptr);

    finish=clock();

    totaltime=(double)(finish-start)/CLOCKS_PER_SEC;

    cout<<"程序的inference时间为"<<totaltime<<"秒！"<<endl;





    if(ret < 0) {

        printf("rknn_outputs_get fail! ret=%d\n", ret);

        goto Error;

    }

     

     

    // Process output

    if(outputs[0].size == outputs_attr[0].n_elems*sizeof(float) && outputs[1].size == outputs_attr[1].n_elems*sizeof(float))

    {

        float boxPriors[4][NUM_RESULTS];

        string labels[91];



        /* load label and boxPriors */

        loadLabelName(label_path, labels);

        loadCoderOptions(box_priors_path, boxPriors);



        float* predictions = (float*)outputs[0].buf;

        float* outputClasses = (float*)outputs[1].buf;



        int output[2][NUM_RESULTS];



        /* transform */

        decodeCenterSizeBoxes(predictions, boxPriors);



        start = clock();

        int validCount = scaleToInputSize(outputClasses ,output, NUM_CLASSES);

        finish=clock();

        totaltime=(double)(finish-start)/CLOCKS_PER_SEC;

        cout<<"程序的scaleToInputSize时间为"<<totaltime<<"秒！"<<endl;





        printf("validCount: %d\n", validCount);



        if (validCount < 100) {

            /* detect nest box */

            start = clock();

            nms(validCount, predictions, output);

            finish=clock();

            totaltime=(double)(finish-start)/CLOCKS_PER_SEC;

            cout<<"程序的nms时间为"<<totaltime<<"秒！"<<endl;









            start = clock();

            Mat rgba = imread(img_path, CV_LOAD_IMAGE_UNCHANGED);

            cv::resize(rgba, rgba, cv::Size(300, 300), (0, 0), (0, 0), cv::INTER_LINEAR);

            finish=clock();

            totaltime=(double)(finish-start)/CLOCKS_PER_SEC;

            cout<<"程序的imread时间为"<<totaltime<<"秒！"<<endl;

            

            start = clock();

            /* box valid detect target */

            for (int i = 0; i < validCount; ++i) {

                if (output[0][i] == -1) {

                    continue;

                }

                int n = output[0][i];

                int topClassScoreIndex = output[1][i];



                int x1 = static_cast<int>(predictions[n * 4 + 1] * rgba.cols);

                int y1 = static_cast<int>(predictions[n * 4 + 0] * rgba.rows);

                int x2 = static_cast<int>(predictions[n * 4 + 3] * rgba.cols);

                int y2 = static_cast<int>(predictions[n * 4 + 2] * rgba.rows);



                string label = labels[topClassScoreIndex];



                std::cout << label << "\t@ (" << x1 << ", " << y1 << ") (" << x2 << ", " << y2 << ")" << "\n";



                rectangle(rgba, Point(x1, y1), Point(x2, y2), colorArray[topClassScoreIndex%10], 3);

                putText(rgba, label, Point(x1, y1 - 12), 1, 2, Scalar(0, 255, 0, 255));

            }

            imwrite("out.jpg", rgba);

            //printf("write out.jpg succ!\n");

            finish=clock();

            totaltime=(double)(finish-start)/CLOCKS_PER_SEC;

            cout<<"程序的draw时间为"<<totaltime<<"秒！"<<endl;



        } else {

            printf("validCount too much!\n");

        }

    }

    else

    {

        printf("rknn_outputs_get fail! get outputs_size = [%d, %d], but expect [%lu, %lu]!\n",

            outputs[0].size, outputs[1].size, outputs_attr[0].n_elems*sizeof(float), outputs_attr[1].n_elems*sizeof(float));

    }

    

    

    rknn_outputs_release(ctx, 2, outputs);



Error:

    if(ctx)             rknn_destroy(ctx);

    if(model)           free(model);

    if(fp)              fclose(fp);

    return 0;

}
复制代码

只看该作者 · 发表于 2020-6-12 11:59:25

你的Python代码可以的话，那你对模型的输出格式和后处理有一定的概念，有几个点可以确认一下。
1.目前最新的rknn-toolkit，输出结果时，应该是有保留结果信息的，你可以在Python打印出来看一下，你的模型和mobilenet-ssd输出的格式不一定一致。
2.后处理不是简单套进去就可以，mobilenet-ssd只是提供参考，具体的后处理你要参考你Python的写法。
3.你可以在C里面把outputs的数据n_elems打印出来，跟你Python代码的结果对比一下。

只看该作者 · 发表于 2020-6-12 16:27:30

troy 发表于 2020-6-12 11:59
你的Python代码可以的话，那你对模型的输出格式和后处理有一定的概念，有几个点可以确认一下。
1.目前最新 ...

谢谢回复。自己已经解决。

只看该作者 · 发表于 2020-10-9 14:13:07

ylc123 发表于 2020-6-12 16:27
谢谢回复。自己已经解决。

你好，请问是怎么解决的？我也遇到这个问题了，能请教一下吗？