Toybrick

标题: 多进程版yolov3tiny [打印本页]

作者: fantrix    时间: 2019-4-11 18:28
标题: 多进程版yolov3tiny

没试过多线程,post process都是cpu在跑,直接用了多进程搞,FPS大概20.
  1. import logging as log
  2. import time
  3. import numpy as np
  4. import cv2
  5. from rknn.api import RKNN
  6. from multiprocessing import Process, Queue

  7. GRID0 = 13
  8. GRID1 = 26
  9. LISTSIZE = 85
  10. SPAN = 3
  11. NUM_CLS = 80
  12. MAX_BOXES = 500
  13. OBJ_THRESH = 0.2
  14. NMS_THRESH = 0.2

  15. CLASSES = ("person", "bicycle", "car","motorbike ","aeroplane ","bus ","train","truck ","boat","traffic light",
  16.            "fire hydrant","stop sign ","parking meter","bench","bird","cat","dog ","horse ","sheep","cow","elephant",
  17.            "bear","zebra ","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite",
  18.            "baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife ",
  19.            "spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza ","donut","cake","chair","sofa",
  20.            "pottedplant","bed","diningtable","toilet ","tvmonitor","laptop        ","mouse        ","remote ","keyboard ","cell phone","microwave ",
  21.            "oven ","toaster","sink","refrigerator ","book","clock","vase","scissors ","teddy bear ","hair drier", "toothbrush ")

  22. def sigmoid(x):
  23.     return 1 / (1 + np.exp(-x))


  24. def process(input, mask, anchors):

  25.     anchors = [anchors[i] for i in mask]
  26.     grid_h, grid_w = map(int, input.shape[0:2])

  27.     box_confidence = sigmoid(input[..., 4])
  28.     box_confidence = np.expand_dims(box_confidence, axis=-1)

  29.     box_class_probs = sigmoid(input[..., 5:])

  30.     box_xy = sigmoid(input[..., :2])
  31.     box_wh = np.exp(input[..., 2:4])
  32.     box_wh = box_wh * anchors

  33.     col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
  34.     row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)

  35.     col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
  36.     row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
  37.     grid = np.concatenate((col, row), axis=-1)

  38.     box_xy += grid
  39.     box_xy /= (grid_w, grid_h)
  40.     box_wh /= (416, 416)
  41.     box_xy -= (box_wh / 2.)
  42.     box = np.concatenate((box_xy, box_wh), axis=-1)

  43.     return box, box_confidence, box_class_probs

  44. def filter_boxes(boxes, box_confidences, box_class_probs):
  45.     """Filter boxes with object threshold.

  46.     # Arguments
  47.         boxes: ndarray, boxes of objects.
  48.         box_confidences: ndarray, confidences of objects.
  49.         box_class_probs: ndarray, class_probs of objects.

  50.     # Returns
  51.         boxes: ndarray, filtered boxes.
  52.         classes: ndarray, classes for boxes.
  53.         scores: ndarray, scores for boxes.
  54.     """
  55.     box_scores = box_confidences * box_class_probs
  56.     box_classes = np.argmax(box_scores, axis=-1)
  57.     box_class_scores = np.max(box_scores, axis=-1)
  58.     pos = np.where(box_class_scores >= OBJ_THRESH)

  59.     boxes = boxes[pos]
  60.     classes = box_classes[pos]
  61.     scores = box_class_scores[pos]

  62.     return boxes, classes, scores

  63. def nms_boxes(boxes, scores):
  64.     """Suppress non-maximal boxes.

  65.     # Arguments
  66.         boxes: ndarray, boxes of objects.
  67.         scores: ndarray, scores of objects.

  68.     # Returns
  69.         keep: ndarray, index of effective boxes.
  70.     """
  71.     x = boxes[:, 0]
  72.     y = boxes[:, 1]
  73.     w = boxes[:, 2]
  74.     h = boxes[:, 3]

  75.     areas = w * h
  76.     order = scores.argsort()[::-1]

  77.     keep = []
  78.     while order.size > 0:
  79.         i = order[0]
  80.         keep.append(i)

  81.         xx1 = np.maximum(x[i], x[order[1:]])
  82.         yy1 = np.maximum(y[i], y[order[1:]])
  83.         xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
  84.         yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])

  85.         w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
  86.         h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
  87.         inter = w1 * h1

  88.         ovr = inter / (areas[i] + areas[order[1:]] - inter)
  89.         inds = np.where(ovr <= NMS_THRESH)[0]
  90.         order = order[inds + 1]
  91.     keep = np.array(keep)
  92.     return keep


  93. def yolov3_post_process(input_data):
  94.     # # yolov3
  95.     # masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
  96.     # anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
  97.     #            [59, 119], [116, 90], [156, 198], [373, 326]]
  98.     # yolov3-tiny
  99.     masks = [[3, 4, 5], [0, 1, 2]]
  100.     anchors = [[10, 14], [23, 27], [37, 58], [81, 82], [135, 169], [344, 319]]

  101.     boxes, classes, scores = [], [], []
  102.     for input,mask in zip(input_data, masks):
  103.         b, c, s = process(input, mask, anchors)
  104.         b, c, s = filter_boxes(b, c, s)
  105.         boxes.append(b)
  106.         classes.append(c)
  107.         scores.append(s)

  108.     boxes = np.concatenate(boxes)
  109.     classes = np.concatenate(classes)
  110.     scores = np.concatenate(scores)

  111.     # # Scale boxes back to original image shape.
  112.     # width, height = 416, 416 #shape[1], shape[0]
  113.     # image_dims = [width, height, width, height]
  114.     # boxes = boxes * image_dims

  115.     nboxes, nclasses, nscores = [], [], []
  116.     for c in set(classes):
  117.         inds = np.where(classes == c)
  118.         b = boxes[inds]
  119.         c = classes[inds]
  120.         s = scores[inds]

  121.         keep = nms_boxes(b, s)

  122.         nboxes.append(b[keep])
  123.         nclasses.append(c[keep])
  124.         nscores.append(s[keep])

  125.     if not nclasses and not nscores:
  126.         return None, None, None

  127.     boxes = np.concatenate(nboxes)
  128.     classes = np.concatenate(nclasses)
  129.     scores = np.concatenate(nscores)

  130.     return boxes, classes, scores

  131. def draw(image, boxes, scores, classes):
  132.     """Draw the boxes on the image.

  133.     # Argument:
  134.         image: original image.
  135.         boxes: ndarray, boxes of objects.
  136.         classes: ndarray, classes of objects.
  137.         scores: ndarray, scores of objects.
  138.         all_classes: all classes name.
  139.     """
  140.     for box, score, cl in zip(boxes, scores, classes):
  141.         x, y, w, h = box
  142.         # print('class: {}, score: {}'.format(CLASSES[cl], score))
  143.         # print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(x, y, x+w, y+h))
  144.         x *= image.shape[1]
  145.         y *= image.shape[0]
  146.         w *= image.shape[1]
  147.         h *= image.shape[0]
  148.         top = max(0, np.floor(x + 0.5).astype(int))
  149.         left = max(0, np.floor(y + 0.5).astype(int))
  150.         right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
  151.         bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))

  152.         # print('class: {}, score: {}'.format(CLASSES[cl], score))
  153.         # print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(top, left, right, bottom))

  154.         cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
  155.         cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
  156.                     (top, left - 6),
  157.                     cv2.FONT_HERSHEY_SIMPLEX,
  158.                     0.6, (0, 0, 255), 2)

  159.         # print('class: {0}, score: {1:.2f}'.format(CLASSES[cl], score))
  160.         # print('box coordinate x,y,w,h: {0}'.format(box))

  161. def load_model():
  162.         rknn = RKNN()
  163.         print('-->loading model')
  164.         rknn.load_rknn('./model/yolov3_tiny.rknn')
  165.         #rknn.load_rknn('./yolov3.rknn')
  166.         print('loading model done')

  167.         print('--> Init runtime environment')
  168.         ret = rknn.init_runtime(target='rk3399pro')
  169.         if ret != 0:
  170.                 print('Init runtime environment failed')
  171.                 exit(ret)
  172.         print('done')
  173.         return rknn


  174. def video_capture(src, q_frame:Queue):
  175.     #video = VideoCapture.Video(src)
  176.     video = cv2.VideoCapture(int(src))
  177.     while True:
  178.         s = time.time()
  179.         #frame = video.get_last_frame()
  180.         ret, frame = video.read()
  181.         assert ret, 'read video frame failed.'
  182.         if q_frame.empty():
  183.             s = time.time()
  184.             q_frame.put(frame)
  185.             print("get frame and put to queue used {} ms".format((time.time()-s)*1000))
  186.         else:
  187.             print("get frame but skiped, used {} ms".format((time.time()-s)*1000))

  188. def infer_rknn(q_image:Queue, q_infer:Queue):
  189.     rknn = load_model()
  190.     rknn.get_sdk_version()

  191.     while True:
  192.         s = time.time()
  193.         image = q_image.get()
  194.         print('Infer recv frame, used time {} ms. '.format((time.time() - s) * 1000))
  195.         s = time.time()
  196.         out_boxes, out_boxes2 = rknn.inference(inputs=[image])
  197.         out_boxes = out_boxes.reshape(SPAN, LISTSIZE, GRID0, GRID0)
  198.         out_boxes2 = out_boxes2.reshape(SPAN, LISTSIZE, GRID1, GRID1)
  199.         input_data = []
  200.         input_data.append(np.transpose(out_boxes, (2, 3, 0, 1)))
  201.         input_data.append(np.transpose(out_boxes2, (2, 3, 0, 1)))

  202.         q_infer.put(input_data)
  203.         print('Infer done, used time {} ms. '.format((time.time()-s)*1000))

  204. def post_process(q_infer, q_objs):
  205.     while True:
  206.         s = time.time()
  207.         input_data = q_infer.get()
  208.         log.info('Post process recv input, used time {} ms. '.format((time.time() - s) * 1000))
  209.         s = time.time()
  210.         boxes, classes, scores = yolov3_post_process(input_data)

  211.         q_objs.put((boxes, classes, scores))
  212.         log.info('Post process done, used time {} ms. '.format((time.time()-s)*1000))

  213. if __name__ == '__main__':
  214.     log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.DEBUG)

  215.     q_frame = Queue()
  216.     q_image = Queue()
  217.     q_infer = Queue()
  218.     q_objs = Queue()

  219.     p_cap = Process(target=video_capture, args=('0',q_frame))
  220.     p_infer = Process(target=infer_rknn, args=(q_image, q_infer))
  221.     p_post = Process(target=post_process, args=(q_infer, q_objs))


  222.     p_infer.start()
  223.     p_cap.start()
  224.     p_post.start()
  225.     #p_show.start()

  226.     l_frame = []
  227.     fps = 0
  228.     l_used_time = []

  229.     while True:
  230.         s = time.time()
  231.         frame = q_frame.get()

  232.         image = cv2.resize(frame, (416, 416))
  233.         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  234.         q_image.put(image)

  235.         l_frame.append(frame)

  236.         if q_objs.empty() and (len(l_frame) < 3):
  237.             continue
  238.         else:
  239.             objects = q_objs.get()

  240.         frame = l_frame.pop(0)
  241.         boxes, classes, scores = objects
  242.         if boxes is not None:
  243.             draw(frame, boxes, scores, classes)
  244.             cv2.putText(frame, text='FPS: {}'.format(fps), org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
  245.                                      fontScale=0.50, color=(255, 0, 0), thickness=2)
  246.             cv2.imshow("results", frame)

  247.             c = cv2.waitKey(5) & 0xff
  248.             if c == 27:
  249.                 cv2.destroyAllWindows()
  250.                 break

  251.         used_time = time.time() - s
  252.         l_used_time.append(used_time)
  253.         if len(l_used_time) > 20:
  254.             l_used_time.pop(0)
  255.         fps = int(1/np.mean(l_used_time))

  256.         print('All done, used time {} ms. '.format(used_time*1000))

  257.     p_cap.terminate()
  258.     p_infer.terminate()
  259.     p_post.terminate()
  260.     exit()

复制代码



作者: zhangzj    时间: 2019-4-11 18:34
20FPS的瓶颈在哪里?post process吗?
作者: fantrix    时间: 2019-4-12 09:13
zhangzj 发表于 2019-4-11 18:34
20FPS的瓶颈在哪里?post process吗?

推理和后处理并行,大概2,30ms, 画图和显示大概10ms(wait了5ms), 队列搞进搞出的大概10ms.
显示也可以丢到进程里面去的,有空我搞搞。
作者: MinskyLee    时间: 2019-4-12 15:09
为什么我运行了代码后一直显示get frame but skiped,也没有弹出摄像头的界面。
作者: fantrix    时间: 2019-4-15 15:17
MinskyLee 发表于 2019-4-12 15:09
为什么我运行了代码后一直显示get frame but skiped,也没有弹出摄像头的界面。

估计是推理进程没跑起来,看看启动阶段报了什么错。注意,rknn.init_runtime(target='rk3399pro')。
作者: magi803    时间: 2019-4-18 10:38
hisping 发表于 2019-4-18 09:01
感谢你提供的多进程版本,我在你的基础上做了优化,可以达到30帧,已上传,帖子里可以下载下来 ...

好的,这个30帧是tiny才能达到的吧?完整的yolov3在推理这块就用了近0.5s,实在太慢了,有办法解决吗,还是我弄的有问题
作者: abcehac    时间: 2019-8-7 21:18
感谢分享!
作者: shopping    时间: 2019-9-16 15:22
你好,请问 c++版 demo 与 python 版 demo,二者推理速度差多少,请问您试过吗?
作者: administer    时间: 2020-5-17 00:56
这速度有点对不起这价格啊




欢迎光临 Toybrick (https://t.rock-chips.com/) Powered by Discuz! X3.3