|
本帖最后由 ylc123 于 2020-6-16 09:48 编辑
你好:
我自己的mobilenet ssd模型;(1)直接在按照咱们的c代码示例进行测试,for循环中反复测试,单帧的速度大概在10ms左右;(2)将其集成到qt中读取视频流,然后进行测试,时间大概在34ms左右,现在无法想到可能的原因,能否解答一下可能的原因呢?非常感谢。代码模块如下:
for(int k =0;k<1000;k++){
cv::Mat img = cv::imread(img_path, 1);
cv::Mat img_clone = img.clone();
cvtColor(img, img, CV_BGR2RGB);
if(img.cols != img_width || img.rows != img_height)
cv::resize(img, img, cv::Size(img_width, img_height), (0, 0), (0, 0), cv::INTER_LINEAR);
clock_t start,finish;
double totaltime;
start = clock();
rknn_input inputs[1];
rknn_output outputs[2];
rknn_tensor_attr outputs_attr[2];
int ret = 0;
rknn_context ctx = 0;
ret = rknn_init(&ctx, model, model_len, RKNN_FLAG_PRIOR_MEDIUM);
outputs_attr[0].index = 0;
ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(outputs_attr[0]), sizeof(outputs_attr[0]));
outputs_attr[1].index = 1;
ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(outputs_attr[1]), sizeof(outputs_attr[1]));
inputs[0].index = input_index;
inputs[0].buf = img.data;
inputs[0].size = img_width * img_height * img_channels;
inputs[0].pass_through = false;
inputs[0].type = RKNN_TENSOR_UINT8;
inputs[0].fmt = RKNN_TENSOR_NHWC; //ago
ret = rknn_inputs_set(ctx, 1, inputs);
ret = rknn_run(ctx, nullptr);
outputs[0].want_float = true;
outputs[0].is_prealloc = false;
outputs[1].want_float = true;
outputs[1].is_prealloc = false;
ret = rknn_outputs_get(ctx, 2, outputs, nullptr);
if(outputs[0].size == outputs_attr[0].n_elems*sizeof(float) && outputs[1].size == outputs_attr[1].n_elems*sizeof(float)){
float boxPriors[4][NUM_RESULTS];
string labels[2];
loadLabelName(label_path, labels);
loadCoderOptions(box_priors_path, boxPriors);
float* predictions = (float*)outputs[0].buf;float* outputClasses = (float*)outputs[1].buf;
int output[2][NUM_RESULTS];
decodeCenterSizeBoxes(predictions, boxPriors);
int validCount = scaleToInputSize(outputClasses ,output, NUM_CLASSES);
if (validCount < 100) {
nms(validCount, predictions, output);
for (int i = 0; i < validCount; ++i) {
if (output[0] == -1) continue;
int n = output[0];
int topClassScoreIndex = output[1];
float score = outputClasses[n*2+topClassScoreIndex];
int x1 = static_cast<int>(predictions[n * 4 + 0] * img_clone.cols);
int y1 = static_cast<int>(predictions[n * 4 + 1] * img_clone.rows);
int x2 = static_cast<int>(predictions[n * 4 + 2] * img_clone.cols);
int y2 = static_cast<int>(predictions[n * 4 + 3] * img_clone.rows);
string label = labels[topClassScoreIndex];
rectangle(img_clone, Point(x1, y1), Point(x2, y2), colorArray[topClassScoreIndex%10], 3);
putText(img_clone, label, Point(x1, y1 - 12), 1, 2, Scalar(0, 255, 0, 255));
}
}
else
printf("validCount too much!\n");
}
else{
printf("rknn_outputs_get fail! get outputs_size = [%d, %d], but expect [%lu, %lu]!\n",
outputs[0].size, outputs[1].size, outputs_attr[0].n_elems*sizeof(float), outputs_attr[1].n_elems*sizeof(float));
}
rknn_outputs_release(ctx, 2, outputs);
finish=clock();
totaltime=(double)(finish-start)/CLOCKS_PER_SEC;
cout<<"time of process one frame is :"<<totaltime<<"秒!"<<endl;
}
|
|