|
我把onnx转rknn的例子中的test.py修改成从摄像头输入视频流并使用opencv进行接收后循环检测每一帧并进行后处理和显示,发现FPS只有8帧左右,使用的是自己训练并修改网络结构的yolov5,使用eval_perf接口进行单张图片的rknn模型评估时fps为44,同时我还自己训练了未修改网络的yolov5进行对比,rknn评估时fps为37,但在RK3399pro上表现也只有8帧,我不知道问题出在了哪里,烦请大神解答,若个人思路有偏差,还请多多指正。以下是我对主函数的改动,后处理函数未贴。
# Load rknn models
print('--> Loading model')
ret = rknn.load_rknn(path='./rkb11_rk3399pro_out_opt.rknn')
if ret != 0:
print('Load yolov5rknn failed!')
exit(ret)
print('done')
# init runtime environment
print('--> Init runtime environment')
ret = rknn.init_runtime()
# ret = rknn.init_runtime('rk1808', device_id='1808')
if ret != 0:
print('Init runtime environment failed')
exit(ret)
print('done')
# VideoCapture inputs
capture = cv2.VideoCapture(10)
ref, frame = capture.read()
height, width = frame.shape[:2]
print(f"Image resolution: {width} x {height}")
if not ref:
raise ValueError("Could not open the camera!Please check it to ensure work correctly.")
fps = 0.0
while(True):
t1 = time.time()
# 读取某一帧
ref,frame = capture.read()
if not ref:
break
# 格式转变
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# 进行检测
img = frame
# img, ratio, (dw, dh) = letterbox(img, new_shape=(IMG_SIZE, IMG_SIZE))
# print(dw, dh)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img,(IMG_SIZE, IMG_SIZE))
'''
# Set inputs
img = cv2.imread(IMG_PATH)
# img, ratio, (dw, dh) = letterbox(img, new_shape=(IMG_SIZE, IMG_SIZE))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img,(IMG_SIZE, IMG_SIZE))
'''
# Inference
print('--> Running model')
outputs = rknn.inference(inputs=[img])
# print(outputs[0].ndim)
# print(outputs[0].shape)
# print(outputs[0].size)
# simple post process
simple_data = outputs[0]
simple_data = simple_data.reshape([3,-1]+list(simple_data.shape[-2:]))
simple_data = np.transpose(simple_data, (2, 3, 0, 1))
# print(simple_data.shape)
boxes, classes, scores = yolov5_post_process_simple(simple_data)
img_0 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
if boxes is not None:
draw(img_0, boxes, scores, classes)
cv2.imshow("direct result", img_0)
# full post process 三个输出对应yolov5三个不同尺度的检测结果
input0_data = outputs[0] # 每个边界框的信息通常包括边界框坐标(4个值,x、y、宽度、高度)、置信度分数(1个值)和类别分数(C个值,每个类别一个分数)
input1_data = outputs[1] # 在上下文中,input_data 是一个多维数组,其形状为 (batch_size, channels, height, width),分别对应(0,1,2,3)四个维度
input2_data = outputs[2]
input0_data = input0_data.reshape([3,-1]+list(input0_data.shape[-2:]))
input1_data = input1_data.reshape([3,-1]+list(input1_data.shape[-2:]))
input2_data = input2_data.reshape([3,-1]+list(input2_data.shape[-2:]))
input_data = list()
input_data.append(np.transpose(input0_data, (2, 3, 0, 1)))
input_data.append(np.transpose(input1_data, (2, 3, 0, 1)))
input_data.append(np.transpose(input2_data, (2, 3, 0, 1)))
boxes, classes, scores = yolov5_post_process(input_data)
img_1 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
if boxes is not None:
draw(img_1, boxes, scores, classes)
fps = ( fps + (1./(time.time()-t1))) / 2
print("fps= %.2f"%(fps))
cv2.imshow("viedo", img_1[:, :, : : -1])
c = cv2.waitKey(100) & 0xff
if c == 27:
capture.release()
break
print("Video Detection Done!")
capture.release()
cv2.destroyAllWindows()
rknn.release()
|
|