|
硬件:RK3588
软件:RGA:/usr/lib/aarch64-linux-gnu/librga.so.2.1.0
对比测试crop + resize 操作,RGA平均用时2ms左右,OpenCV平均用时0.2ms,部分代码如下:
TransformData
- typedef struct _TransformData
- {
- // 图像基本信息
- int width, height, format;
- int size;
- // 对齐后的宽度
- int stride;
- int size_with_stride;
- // 图像原始数据
- char *data;
- // DMA
- int dma_fd;
- // im2d
- im_rect rect;
- // RGA
- rga_buffer_t buffer;
- rga_buffer_handle_t handle;
- } TransformData;
TransformData 实例化
- std::unique_ptr<TransformData> Transformer::wrapTransformData(cv::Mat cv_img, const im_rect &rect)
- {
- m_timer.record(__func__);
- std::unique_ptr<TransformData> td(new TransformData());
- td->width = cv_img.cols;
- td->height = cv_img.rows;
- td->format = m_format;
- td->size = td->width * td->height * get_bpp_from_format(td->format);
- // 对齐后的宽度
- td->stride = td->width;
- td->size_with_stride = td->size;
- if (td->width % m_align_bit)
- {
- td->stride = (td->width / m_align_bit + 1) * m_align_bit;
- td->size_with_stride = td->size / td->width * td->stride;
- }
- // 使用dma_heap分配内存调用RGA
- td->dma_fd = -1;
- dma_buf_alloc(DMA_HEAP_PATH, td->size_with_stride, &(td->dma_fd), reinterpret_cast<void **>(&(td->data)));
- assert(td->dma_fd != -1);
- // 数据拷贝
- if (td->width == td->stride)
- {
- std::memcpy(reinterpret_cast<uchar *>(td->data), cv_img.data, td->size);
- }
- else
- {
- size_t c = get_bpp_from_format(td->format);
- size_t src_stride = td->width * c;
- size_t dst_stride = td->stride * c;
- uchar *src_ptr = cv_img.data;
- uchar *dst_ptr = reinterpret_cast<uchar *>(td->data);
- for (int i = 0; i < td->height; i++)
- {
- std::memcpy(dst_ptr, src_ptr, src_stride);
- src_ptr += src_stride;
- dst_ptr += dst_stride;
- }
- }
- td->handle = importbuffer_fd(td->dma_fd, td->size_with_stride);
- td->buffer = wrapbuffer_handle(td->handle, td->stride, td->height, td->format);
- td->rect = rect;
- m_timer.stop(__func__);
- return td;
- }
- std::unique_ptr<TransformData> Transformer::wrapTransformData(const im_rect &rect)
- {
- m_timer.record(__func__);
- std::unique_ptr<TransformData> td(new TransformData());
- td->width = rect.width;
- td->height = rect.height;
- td->format = m_format;
- td->size = td->width * td->height * get_bpp_from_format(td->format);
- // 对齐后的宽度
- td->stride = td->width;
- td->size_with_stride = td->size;
- if (td->width % m_align_bit)
- {
- td->stride = (td->width / m_align_bit + 1) * m_align_bit;
- td->size_with_stride = td->size / td->width * td->stride;
- }
- // 使用dma_heap分配内存调用RGA
- td->dma_fd = -1;
- dma_buf_alloc(DMA_HEAP_PATH, td->size_with_stride, &(td->dma_fd), reinterpret_cast<void **>(&(td->data)));
- assert(td->dma_fd != -1);
- td->handle = importbuffer_fd(td->dma_fd, td->size_with_stride);
- td->buffer = wrapbuffer_handle(td->handle, td->stride, td->height, td->format);
- td->rect = {};
- m_timer.stop(__func__);
- return td;
- }
- void Transformer::resetTransformData(std::unique_ptr<TransformData> &td)
- {
- if (td)
- {
- m_timer.record(__func__);
- // invalid CPU cache
- dma_sync_device_to_cpu(td->dma_fd);
- std::cout << "td->dma_fd: " << td->dma_fd << std::endl;
- dma_buf_free(td->size_with_stride, &(td->dma_fd), reinterpret_cast<void **>(&(td->data)));
- // release buffer
- releasebuffer_handle(td->handle);
- // reset
- td.reset(nullptr);
- m_timer.stop(__func__);
- }
- }
crop + resize 实际就是把原始图像中某一个区域进行resize操作
- void *Transformer::process(cv::Mat &cv_src, const im_rect &crop_rect, const im_rect &dst_info)
- {
- m_timer.record(__func__);
- // init src TransformData
- resetTransformData(m_src);
- m_src = wrapTransformData(cv_src, crop_rect);
- // init dst TransformData
- resetTransformData(m_dst);
- m_dst = wrapTransformData(dst_info);
- // rga process
- IM_STATUS ret = imcheck(m_src->buffer, m_dst->buffer, m_src->rect, m_dst->rect);
- assert(ret == IM_STATUS_NOERROR);
- ret = improcess(m_src->buffer, m_dst->buffer, {}, m_src->rect, m_dst->rect, {}, IM_SYNC);
- assert(ret == IM_STATUS_SUCCESS);
- m_timer.stop(__func__);
- return reinterpret_cast<void *>(m_dst->data);
- }
请问是代码中的哪些地方影响了处理速度?有什么不合理的地方?
|
|