|
已经参考文档在RV1126上用C语言部署了uint8(asymmetric_quantized-u8量化)的模型,文档说对于有些量化模型而言,dynamic_fixed_point-i8 量化的精度比 asymmetric_quantized-u8 高。所以我用rknn-toolkit 1.7.3 转换了int8版本的模型,该int8模型用python测试推理结果是正确的。于是我参考uint8的C语言部署代码(RK没有给出int8的代码只给出了uint8的代码)编写了int8的推理代码,部署到板子上进行推理,结果是错误的(找到很多框框,没有一个框框是正确的)。
我感觉int8和uint8的推理代码差别主要是在量化和反量化上,以下是我们的量化和反量化的代码:
- static int8_t qnt_f32_to_i8(float f32, int8_t fl)
- {
- float dst_val = f32 * pow(2, fl);
- int8_t res = (int8_t)__clip(dst_val, -128, 127);
- return res;
- }
- static float deqnt_i8_affine_to_f32(int8_t qnt, int8_t fl)
- {
- return (float)qnt / pow(2, fl);
- }
- static int process_i8(int8_t *input, int *anchor, int anchor_per_branch, int grid_h, int grid_w, int height, int width, int stride,
- std::vector<float> &boxes, std::vector<float> &boxScores, std::vector<int> &classId,
- float threshold, int8_t fl, MODEL_TYPE yolo)
- {
- int validCount = 0;
- int grid_len = grid_h * grid_w;
- float thres = threshold;
- int8_t thres_i8 = qnt_f32_to_i8(thres, fl);
- // printf("threash %f\n", thres);
- // printf("thres_u8 %u\n", thres_u8);
- // printf("scale %f\n", scale);
- // printf("zp %u\n", zp);
- for (int a = 0; a < anchor_per_branch; a++)
- {
- for (int i = 0; i < grid_h; i++)
- {
- for (int j = 0; j < grid_w; j++)
- {
- int8_t box_confidence = input[(PROP_BOX_SIZE * a + 4) * grid_len + i * grid_w + j];
- if (box_confidence >= thres_i8)
- {
- // printf("box_conf %u, thres_u8 %u\n", box_confidence, thres_u8);
- int offset = (PROP_BOX_SIZE * a) * grid_len + i * grid_w + j;
- int8_t *in_ptr = input + offset;
- int8_t maxClassProbs = in_ptr[5 * grid_len];
- int maxClassId = 0;
- for (int k = 1; k < OBJ_CLASS_NUM; ++k)
- {
- int8_t prob = in_ptr[(5 + k) * grid_len];
- if (prob > maxClassProbs)
- {
- maxClassId = k;
- maxClassProbs = prob;
- }
- }
- float box_conf_f32 = deqnt_i8_affine_to_f32(box_confidence, fl);
- float class_prob_f32 = deqnt_i8_affine_to_f32(maxClassProbs, fl);
- float limit_score = 0;
- limit_score = box_conf_f32* class_prob_f32;
- // printf("limit score: %f\n", limit_score);
- if (limit_score > threshold){
- float box_x, box_y, box_w, box_h;
- if(yolo == YOLOX){
- box_x = deqnt_i8_affine_to_f32(*in_ptr, fl);
- box_y = deqnt_i8_affine_to_f32(in_ptr[grid_len], fl);
- box_w = deqnt_i8_affine_to_f32(in_ptr[2 * grid_len], fl);
- box_h = deqnt_i8_affine_to_f32(in_ptr[3 * grid_len], fl);
- box_w = exp(box_w)* stride;
- box_h = exp(box_h)* stride;
- }
- else{
- box_x = deqnt_i8_affine_to_f32(*in_ptr, fl) * 2.0 - 0.5;
- box_y = deqnt_i8_affine_to_f32(in_ptr[grid_len], fl) * 2.0 - 0.5;
- box_w = deqnt_i8_affine_to_f32(in_ptr[2 * grid_len], fl) * 2.0;
- box_h = deqnt_i8_affine_to_f32(in_ptr[3 * grid_len], fl) * 2.0;
- box_w = box_w * box_w;
- box_h = box_h * box_h;
- }
- box_x = (box_x + j) * (float)stride;
- box_y = (box_y + i) * (float)stride;
- box_w *= (float)anchor[a * 2];
- box_h *= (float)anchor[a * 2 + 1];
- box_x -= (box_w / 2.0);
- box_y -= (box_h / 2.0);
- boxes.push_back(box_x);
- boxes.push_back(box_y);
- boxes.push_back(box_w);
- boxes.push_back(box_h);
- boxScores.push_back(box_conf_f32* class_prob_f32);
- classId.push_back(maxClassId);
- validCount++;
- }
- }
- }
- }
- }
- return validCount;
- }
请问问题出在哪里? 或者要怎么在RV1126板子上实现int8模型的C语言部署? 谢谢!
|
|