Toybrick
标题:
使用rknnlite2在板端推理结果正常,使用c++代码推理差异很大
[打印本页]
作者:
chaofengl
时间:
2025-1-6 14:48
标题:
使用rknnlite2在板端推理结果正常,使用c++代码推理差异很大
本帖最后由 chaofengl 于 2025-1-7 13:05 编辑
背景介绍,toolkit-lite2版本2.3.0, RKNPU2 CAPI版本2.3.0, 驱动版本0.8.2 python代码与C代码使用的模型是同一个,图像文件使用的经过letterbox方式处理过的图像,但是python推理结果跟C推理结果差别很大下面是python代码,输出的结果都未经处理的裸数据:
# -*- coding: utf-8 -*-
import os
import sys
import cv2
from rknnlite.api import RKNNLite
from aniru.common.resize import letterbox
import numpy as np
sys.path.append(os.path.dirname(__file__) + os.sep + "./")
from common.face_detector_util import prior_box_forward, decode, nms_sorted
if __name__ == "__main__":
RKNN_MODEL = 'fd38.rknn'
# img = cv2.imread("./demo.jpg")
img = cv2.imread("./letterbox_input.jpg")
#img, ratio, (dw, dh) = letterbox(img)
ori_img = img.copy()
img_shape = img.shape
print(img_shape)
model_h = img_shape[0]
model_w = img_shape[1]
model_channel = img_shape[2]
img = img.reshape((1, model_h, model_w, model_channel))
#img = img.transpose((0, 3, 1, 2)).astype(np.float32)
#img = img.transpose((0, 3, 1, 2))
rknn_lite = RKNNLite()
ret = rknn_lite.load_rknn(RKNN_MODEL)
ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
# debug
ret=rknn_lite.list_support_target_platform(rknn_model=RKNN_MODEL)
# outputs = rknn_lite.inference(inputs=[img])
outputs = rknn_lite.inference(inputs=[img])
print("done")
"""
for idx, output in enumerate(outputs):
print(f'Output {idx}: shape={output.shape}')
print(output)
"""
pre_output = np.array([[20., 20.],[10., 10.],[ 5., 5.]])
priors = prior_box_forward(pre_output, model_h, model_w, 3)
print('priors len:', len(priors))
#for pr in priors:
#print("%.4f %.4f %.4f %.4f" % pr)
print("\n\r")
variance = [0.1, 0.2]
#print("outputs[1]:")
for idx, item in enumerate(outputs[1]):
#print(item)
print("index=%d %.4f %.4f" % (idx, item[0], item[1]))
C代码:
ret = rknn_init(&ctx, model_data, model_data_size, 0, NULL);
rknn_sdk_version version;
ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &version, sizeof(rknn_sdk_version));
if (ret < 0)
{
printf("rknn_init error ret=%d\n", ret);
return -1;
}
printf("sdk version: %s driver version: %s\n", version.api_version, version.drv_version);
rknn_input_output_num io_num;
ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
if (ret < 0)
{
printf("rknn_init error ret=%d\n", ret);
return -1;
}
printf("model input num: %d, output num: %d\n", io_num.n_input, io_num.n_output);
rknn_tensor_attr input_attrs[io_num.n_input];
memset(input_attrs, 0, sizeof(input_attrs));
for (int i = 0; i < io_num.n_input; i++)
{
input_attrs
.index = i;
ret = rknn_query(ctx, RKNN_QUERY_NATIVE_INPUT_ATTR, &(input_attrs
), sizeof(rknn_tensor_attr));
if (ret < 0)
{
printf("rknn_init error ret=%d\n", ret);
return -1;
}
dump_tensor_attr(&(input_attrs
));
}
int channel = 3;
int width = 0;
int height = 0;
if (input_attrs[0].fmt == RKNN_TENSOR_NCHW)
{
printf("model is NCHW input fmt\n");
channel = input_attrs[0].dims[1];
height = input_attrs[0].dims[2];
width = input_attrs[0].dims[3];
}
else
{
printf("model is NHWC input fmt\n");
height = input_attrs[0].dims[1];
width = input_attrs[0].dims[2];
channel = input_attrs[0].dims[3];
}
rknn_input inputs[1];
memset(inputs, 0, sizeof(inputs));
inputs[0].index = 0;
inputs[0].size = width * height * channel;
inputs[0].type = RKNN_TENSOR_UINT8;
inputs[0].fmt = RKNN_TENSOR_NHWC;
inputs[0].pass_through = 0;
inputs[0].buf = resized_img.data;
rknn_inputs_set(ctx, io_num.n_input, inputs);
rknn_output outputs[io_num.n_output];
memset(outputs, 0, sizeof(outputs));
for (int i = 0; i < io_num.n_output; i++)
{
outputs
.index = i;
outputs
.want_float = 1;
}
ret = rknn_run(ctx, NULL);
ret = rknn_outputs_get(ctx, io_num.n_output, outputs, NULL);
float *pBuffer = (float *)outputs[1].buf;
for (size_t i = 0; i < outputs[1].size / 4 ; i += 2)
{
/* code */
std::vector<float> tmp_box;
auto tmp_0 = pBuffer[i / 2 + 0];
auto tmp_1 = pBuffer[i / 2 + 1];
printf("index=%d %.4f %.4f\n", i/2, tmp_0, tmp_1);
}
测算结果见附件:
作者:
chaofengl
时间:
2025-1-6 15:04
测试的结果对比图片上传不了
C结果: python结果:
index=0 0.9941 0.0062 index=0 0.9941 0.0061
index=1 0.0062 0.9941 index=1 0.9941 0.0057
index=2 0.9941 0.0058 index=2 0.9961 0.0042
index=3 0.0058 0.9961 index=3 0.9971 0.0030
index=4 0.9961 0.0042 index=4 0.9951 0.0052
index=5 0.0042 0.9971 index=5 0.9951 0.0044
index=6 0.9971 0.0030 index=6 0.9971 0.0031
index=7 0.0030 0.9951 index=7 0.9980 0.0020
index=8 0.9951 0.0052 index=8 0.9951 0.0047
index=9 0.0052 0.9951 index=9 0.9961 0.0041
index=10 0.9951 0.0044 index=10 0.9971 0.0032
index=11 0.0044 0.9971 index=11 0.9971 0.0025
index=12 0.9971 0.0031 index=12 0.9961 0.0037
index=13 0.0031 0.9980 index=13 0.9971 0.0032
index=14 0.9980 0.0020 index=14 0.9971 0.0033
index=15 0.0020 0.9951 index=15 0.9971 0.0032
index=16 0.9951 0.0047 index=16 0.9951 0.0045
index=17 0.0047 0.9961 index=17 0.9971 0.0028
... ...
作者:
jefferyzhang
时间:
2025-1-8 10:45
python 最后调用的也是 c的 librknn库,参数一样结果不会不一样
所以大概率就是设置的参数不一致。
其次要保证库和驱动都是最新版本
作者:
chaofengl
时间:
2025-1-9 09:38
本帖最后由 chaofengl 于 2025-1-9 09:48 编辑
jefferyzhang 发表于 2025-1-8 10:45
python 最后调用的也是 c的 librknn库,参数一样结果不会不一样
所以大概率就是设置的参数不一致。
其次要 ...
大神您好,驱动没法更新0.8.2,库都是最新的2.3.0;python的参数是默认的, CAPI调用的参数,参考技术文档设置的。模型是同一个
图像是同一个,经过letterbox前处理后的图像
python主要代码:
rknn_lite = RKNNLite()
ret = rknn_lite.load_rknn(RKNN_MODEL)
ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
ret=rknn_lite.list_support_target_platform(rknn_model=RKNN_MODEL)
outputs = rknn_lite.inference(inputs=[img])
for idx, item in enumerate(outputs[1]):
print("index=%d %.4f %.4f" % (idx, item[0], item[1]))
C主要代码:
ret = rknn_init(&ctx, model_data, model_data_size, 0, NULL);
rknn_sdk_version version;
ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &version, sizeof(rknn_sdk_version));
rknn_input_output_num io_num;
ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
rknn_tensor_attr input_attrs[io_num.n_input];
memset(input_attrs, 0, sizeof(input_attrs));
for (int i = 0; i < io_num.n_input; i++)
{
input_attrs.index = i;
ret = rknn_query(ctx, RKNN_QUERY_NATIVE_INPUT_ATTR, &(input_attrs), sizeof(rknn_tensor_attr));
if (ret < 0)
{
printf("rknn_init error ret=%d\n", ret);
return -1;
}
dump_tensor_attr(&(input_attrs));
}
int channel = 3;
int width = 0;
int height = 0;
if (input_attrs[0].fmt == RKNN_TENSOR_NCHW)
{
printf("model is NCHW input fmt\n");
channel = input_attrs[0].dims[1];
height = input_attrs[0].dims[2];
width = input_attrs[0].dims[3];
}
else
{
printf("model is NHWC input fmt\n");
height = input_attrs[0].dims[1];
width = input_attrs[0].dims[2];
channel = input_attrs[0].dims[3];
}
rknn_input inputs[1];
memset(inputs, 0, sizeof(inputs));
inputs[0].index = 0;
inputs[0].size = width * height * channel;
inputs[0].type = RKNN_TENSOR_UINT8;
inputs[0].fmt = RKNN_TENSOR_NHWC;
inputs[0].pass_through = 0;
inputs[0].buf = resized_img.data;
rknn_inputs_set(ctx, io_num.n_input, inputs);
rknn_output outputs[io_num.n_output];
memset(outputs, 0, sizeof(outputs));
for (int i = 0; i < io_num.n_output; i++)
{
outputs.index = i;
outputs.want_float = 1;
}
ret = rknn_run(ctx, NULL);
ret = rknn_outputs_get(ctx, io_num.n_output, outputs, NULL);
float *pBuffer = (float *)outputs[1].buf;
for (size_t i = 0; i < outputs[1].size / 4 ; i += 2)
{
/* code */
std::vector<float> tmp_box;
auto tmp_0 = pBuffer[i / 2 + 0];
auto tmp_1 = pBuffer[i / 2 + 1];
printf("index=%d %.4f %.4f\n", i/2, tmp_0, tmp_1);
}
老烦给看一下,跪谢了
欢迎光临 Toybrick (https://t.rock-chips.com/)
Powered by Discuz! X3.3