Toybrick

多进程版yolov3tiny

fantrix

注册会员

积分
58
发表于 2019-4-11 18:28:24    查看: 10808|回复: 8 | [复制链接]    打印 | 显示全部楼层

没试过多线程,post process都是cpu在跑,直接用了多进程搞,FPS大概20.
  1. import logging as log
  2. import time
  3. import numpy as np
  4. import cv2
  5. from rknn.api import RKNN
  6. from multiprocessing import Process, Queue

  7. GRID0 = 13
  8. GRID1 = 26
  9. LISTSIZE = 85
  10. SPAN = 3
  11. NUM_CLS = 80
  12. MAX_BOXES = 500
  13. OBJ_THRESH = 0.2
  14. NMS_THRESH = 0.2

  15. CLASSES = ("person", "bicycle", "car","motorbike ","aeroplane ","bus ","train","truck ","boat","traffic light",
  16.            "fire hydrant","stop sign ","parking meter","bench","bird","cat","dog ","horse ","sheep","cow","elephant",
  17.            "bear","zebra ","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite",
  18.            "baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife ",
  19.            "spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza ","donut","cake","chair","sofa",
  20.            "pottedplant","bed","diningtable","toilet ","tvmonitor","laptop        ","mouse        ","remote ","keyboard ","cell phone","microwave ",
  21.            "oven ","toaster","sink","refrigerator ","book","clock","vase","scissors ","teddy bear ","hair drier", "toothbrush ")

  22. def sigmoid(x):
  23.     return 1 / (1 + np.exp(-x))


  24. def process(input, mask, anchors):

  25.     anchors = [anchors[i] for i in mask]
  26.     grid_h, grid_w = map(int, input.shape[0:2])

  27.     box_confidence = sigmoid(input[..., 4])
  28.     box_confidence = np.expand_dims(box_confidence, axis=-1)

  29.     box_class_probs = sigmoid(input[..., 5:])

  30.     box_xy = sigmoid(input[..., :2])
  31.     box_wh = np.exp(input[..., 2:4])
  32.     box_wh = box_wh * anchors

  33.     col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
  34.     row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)

  35.     col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
  36.     row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
  37.     grid = np.concatenate((col, row), axis=-1)

  38.     box_xy += grid
  39.     box_xy /= (grid_w, grid_h)
  40.     box_wh /= (416, 416)
  41.     box_xy -= (box_wh / 2.)
  42.     box = np.concatenate((box_xy, box_wh), axis=-1)

  43.     return box, box_confidence, box_class_probs

  44. def filter_boxes(boxes, box_confidences, box_class_probs):
  45.     """Filter boxes with object threshold.

  46.     # Arguments
  47.         boxes: ndarray, boxes of objects.
  48.         box_confidences: ndarray, confidences of objects.
  49.         box_class_probs: ndarray, class_probs of objects.

  50.     # Returns
  51.         boxes: ndarray, filtered boxes.
  52.         classes: ndarray, classes for boxes.
  53.         scores: ndarray, scores for boxes.
  54.     """
  55.     box_scores = box_confidences * box_class_probs
  56.     box_classes = np.argmax(box_scores, axis=-1)
  57.     box_class_scores = np.max(box_scores, axis=-1)
  58.     pos = np.where(box_class_scores >= OBJ_THRESH)

  59.     boxes = boxes[pos]
  60.     classes = box_classes[pos]
  61.     scores = box_class_scores[pos]

  62.     return boxes, classes, scores

  63. def nms_boxes(boxes, scores):
  64.     """Suppress non-maximal boxes.

  65.     # Arguments
  66.         boxes: ndarray, boxes of objects.
  67.         scores: ndarray, scores of objects.

  68.     # Returns
  69.         keep: ndarray, index of effective boxes.
  70.     """
  71.     x = boxes[:, 0]
  72.     y = boxes[:, 1]
  73.     w = boxes[:, 2]
  74.     h = boxes[:, 3]

  75.     areas = w * h
  76.     order = scores.argsort()[::-1]

  77.     keep = []
  78.     while order.size > 0:
  79.         i = order[0]
  80.         keep.append(i)

  81.         xx1 = np.maximum(x[i], x[order[1:]])
  82.         yy1 = np.maximum(y[i], y[order[1:]])
  83.         xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
  84.         yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])

  85.         w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
  86.         h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
  87.         inter = w1 * h1

  88.         ovr = inter / (areas[i] + areas[order[1:]] - inter)
  89.         inds = np.where(ovr <= NMS_THRESH)[0]
  90.         order = order[inds + 1]
  91.     keep = np.array(keep)
  92.     return keep


  93. def yolov3_post_process(input_data):
  94.     # # yolov3
  95.     # masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
  96.     # anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
  97.     #            [59, 119], [116, 90], [156, 198], [373, 326]]
  98.     # yolov3-tiny
  99.     masks = [[3, 4, 5], [0, 1, 2]]
  100.     anchors = [[10, 14], [23, 27], [37, 58], [81, 82], [135, 169], [344, 319]]

  101.     boxes, classes, scores = [], [], []
  102.     for input,mask in zip(input_data, masks):
  103.         b, c, s = process(input, mask, anchors)
  104.         b, c, s = filter_boxes(b, c, s)
  105.         boxes.append(b)
  106.         classes.append(c)
  107.         scores.append(s)

  108.     boxes = np.concatenate(boxes)
  109.     classes = np.concatenate(classes)
  110.     scores = np.concatenate(scores)

  111.     # # Scale boxes back to original image shape.
  112.     # width, height = 416, 416 #shape[1], shape[0]
  113.     # image_dims = [width, height, width, height]
  114.     # boxes = boxes * image_dims

  115.     nboxes, nclasses, nscores = [], [], []
  116.     for c in set(classes):
  117.         inds = np.where(classes == c)
  118.         b = boxes[inds]
  119.         c = classes[inds]
  120.         s = scores[inds]

  121.         keep = nms_boxes(b, s)

  122.         nboxes.append(b[keep])
  123.         nclasses.append(c[keep])
  124.         nscores.append(s[keep])

  125.     if not nclasses and not nscores:
  126.         return None, None, None

  127.     boxes = np.concatenate(nboxes)
  128.     classes = np.concatenate(nclasses)
  129.     scores = np.concatenate(nscores)

  130.     return boxes, classes, scores

  131. def draw(image, boxes, scores, classes):
  132.     """Draw the boxes on the image.

  133.     # Argument:
  134.         image: original image.
  135.         boxes: ndarray, boxes of objects.
  136.         classes: ndarray, classes of objects.
  137.         scores: ndarray, scores of objects.
  138.         all_classes: all classes name.
  139.     """
  140.     for box, score, cl in zip(boxes, scores, classes):
  141.         x, y, w, h = box
  142.         # print('class: {}, score: {}'.format(CLASSES[cl], score))
  143.         # print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(x, y, x+w, y+h))
  144.         x *= image.shape[1]
  145.         y *= image.shape[0]
  146.         w *= image.shape[1]
  147.         h *= image.shape[0]
  148.         top = max(0, np.floor(x + 0.5).astype(int))
  149.         left = max(0, np.floor(y + 0.5).astype(int))
  150.         right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
  151.         bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))

  152.         # print('class: {}, score: {}'.format(CLASSES[cl], score))
  153.         # print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(top, left, right, bottom))

  154.         cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
  155.         cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
  156.                     (top, left - 6),
  157.                     cv2.FONT_HERSHEY_SIMPLEX,
  158.                     0.6, (0, 0, 255), 2)

  159.         # print('class: {0}, score: {1:.2f}'.format(CLASSES[cl], score))
  160.         # print('box coordinate x,y,w,h: {0}'.format(box))

  161. def load_model():
  162.         rknn = RKNN()
  163.         print('-->loading model')
  164.         rknn.load_rknn('./model/yolov3_tiny.rknn')
  165.         #rknn.load_rknn('./yolov3.rknn')
  166.         print('loading model done')

  167.         print('--> Init runtime environment')
  168.         ret = rknn.init_runtime(target='rk3399pro')
  169.         if ret != 0:
  170.                 print('Init runtime environment failed')
  171.                 exit(ret)
  172.         print('done')
  173.         return rknn


  174. def video_capture(src, q_frame:Queue):
  175.     #video = VideoCapture.Video(src)
  176.     video = cv2.VideoCapture(int(src))
  177.     while True:
  178.         s = time.time()
  179.         #frame = video.get_last_frame()
  180.         ret, frame = video.read()
  181.         assert ret, 'read video frame failed.'
  182.         if q_frame.empty():
  183.             s = time.time()
  184.             q_frame.put(frame)
  185.             print("get frame and put to queue used {} ms".format((time.time()-s)*1000))
  186.         else:
  187.             print("get frame but skiped, used {} ms".format((time.time()-s)*1000))

  188. def infer_rknn(q_image:Queue, q_infer:Queue):
  189.     rknn = load_model()
  190.     rknn.get_sdk_version()

  191.     while True:
  192.         s = time.time()
  193.         image = q_image.get()
  194.         print('Infer recv frame, used time {} ms. '.format((time.time() - s) * 1000))
  195.         s = time.time()
  196.         out_boxes, out_boxes2 = rknn.inference(inputs=[image])
  197.         out_boxes = out_boxes.reshape(SPAN, LISTSIZE, GRID0, GRID0)
  198.         out_boxes2 = out_boxes2.reshape(SPAN, LISTSIZE, GRID1, GRID1)
  199.         input_data = []
  200.         input_data.append(np.transpose(out_boxes, (2, 3, 0, 1)))
  201.         input_data.append(np.transpose(out_boxes2, (2, 3, 0, 1)))

  202.         q_infer.put(input_data)
  203.         print('Infer done, used time {} ms. '.format((time.time()-s)*1000))

  204. def post_process(q_infer, q_objs):
  205.     while True:
  206.         s = time.time()
  207.         input_data = q_infer.get()
  208.         log.info('Post process recv input, used time {} ms. '.format((time.time() - s) * 1000))
  209.         s = time.time()
  210.         boxes, classes, scores = yolov3_post_process(input_data)

  211.         q_objs.put((boxes, classes, scores))
  212.         log.info('Post process done, used time {} ms. '.format((time.time()-s)*1000))

  213. if __name__ == '__main__':
  214.     log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.DEBUG)

  215.     q_frame = Queue()
  216.     q_image = Queue()
  217.     q_infer = Queue()
  218.     q_objs = Queue()

  219.     p_cap = Process(target=video_capture, args=('0',q_frame))
  220.     p_infer = Process(target=infer_rknn, args=(q_image, q_infer))
  221.     p_post = Process(target=post_process, args=(q_infer, q_objs))


  222.     p_infer.start()
  223.     p_cap.start()
  224.     p_post.start()
  225.     #p_show.start()

  226.     l_frame = []
  227.     fps = 0
  228.     l_used_time = []

  229.     while True:
  230.         s = time.time()
  231.         frame = q_frame.get()

  232.         image = cv2.resize(frame, (416, 416))
  233.         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  234.         q_image.put(image)

  235.         l_frame.append(frame)

  236.         if q_objs.empty() and (len(l_frame) < 3):
  237.             continue
  238.         else:
  239.             objects = q_objs.get()

  240.         frame = l_frame.pop(0)
  241.         boxes, classes, scores = objects
  242.         if boxes is not None:
  243.             draw(frame, boxes, scores, classes)
  244.             cv2.putText(frame, text='FPS: {}'.format(fps), org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
  245.                                      fontScale=0.50, color=(255, 0, 0), thickness=2)
  246.             cv2.imshow("results", frame)

  247.             c = cv2.waitKey(5) & 0xff
  248.             if c == 27:
  249.                 cv2.destroyAllWindows()
  250.                 break

  251.         used_time = time.time() - s
  252.         l_used_time.append(used_time)
  253.         if len(l_used_time) > 20:
  254.             l_used_time.pop(0)
  255.         fps = int(1/np.mean(l_used_time))

  256.         print('All done, used time {} ms. '.format(used_time*1000))

  257.     p_cap.terminate()
  258.     p_infer.terminate()
  259.     p_post.terminate()
  260.     exit()

复制代码


回复

使用道具 举报

zhangzj

超级版主

积分
1109
发表于 2019-4-11 18:34:27 | 显示全部楼层
20FPS的瓶颈在哪里?post process吗?
回复

使用道具 举报

fantrix

注册会员

积分
58
 楼主| 发表于 2019-4-12 09:13:39 | 显示全部楼层
zhangzj 发表于 2019-4-11 18:34
20FPS的瓶颈在哪里?post process吗?

推理和后处理并行,大概2,30ms, 画图和显示大概10ms(wait了5ms), 队列搞进搞出的大概10ms.
显示也可以丢到进程里面去的,有空我搞搞。
回复

使用道具 举报

MinskyLee

注册会员

积分
68
发表于 2019-4-12 15:09:56 | 显示全部楼层
为什么我运行了代码后一直显示get frame but skiped,也没有弹出摄像头的界面。
回复

使用道具 举报

fantrix

注册会员

积分
58
 楼主| 发表于 2019-4-15 15:17:39 | 显示全部楼层
MinskyLee 发表于 2019-4-12 15:09
为什么我运行了代码后一直显示get frame but skiped,也没有弹出摄像头的界面。

估计是推理进程没跑起来,看看启动阶段报了什么错。注意,rknn.init_runtime(target='rk3399pro')。
回复

使用道具 举报

magi803

新手上路

积分
32
发表于 2019-4-18 10:38:36 | 显示全部楼层
hisping 发表于 2019-4-18 09:01
感谢你提供的多进程版本,我在你的基础上做了优化,可以达到30帧,已上传,帖子里可以下载下来 ...

好的,这个30帧是tiny才能达到的吧?完整的yolov3在推理这块就用了近0.5s,实在太慢了,有办法解决吗,还是我弄的有问题
回复

使用道具 举报

abcehac

注册会员

积分
163
发表于 2019-8-7 21:18:37 | 显示全部楼层
感谢分享!
回复

使用道具 举报

shopping

中级会员

积分
410
发表于 2019-9-16 15:22:56 | 显示全部楼层
你好,请问 c++版 demo 与 python 版 demo,二者推理速度差多少,请问您试过吗?
回复

使用道具 举报

administer

中级会员

积分
305
发表于 2020-5-17 00:56:49 | 显示全部楼层
这速度有点对不起这价格啊
回复

使用道具 举报

您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

产品中心 购买渠道 开源社区 Wiki教程 资料下载 关于Toybrick


快速回复 返回顶部 返回列表