使用C语言在RV1126部署in8模型的问题

[复制链接] · 发表于 2023-8-4 15:22:26

已经参考文档在RV1126上用C语言部署了uint8（asymmetric_quantized-u8量化）的模型，文档说对于有些量化模型而言，dynamic_fixed_point-i8 量化的精度比 asymmetric_quantized-u8 高。所以我用rknn-toolkit 1.7.3 转换了int8版本的模型，该int8模型用python测试推理结果是正确的。于是我参考uint8的C语言部署代码（RK没有给出int8的代码只给出了uint8的代码）编写了int8的推理代码，部署到板子上进行推理，结果是错误的（找到很多框框，没有一个框框是正确的）。

我感觉int8和uint8的推理代码差别主要是在量化和反量化上，以下是我们的量化和反量化的代码：

static int8_t qnt_f32_to_i8(float f32, int8_t fl)

{

    float dst_val = f32 * pow(2, fl);

    int8_t res = (int8_t)__clip(dst_val, -128, 127);

    return res;

}



static float deqnt_i8_affine_to_f32(int8_t qnt, int8_t fl)

{

    return (float)qnt / pow(2, fl);

}



static int process_i8(int8_t *input, int *anchor, int anchor_per_branch, int grid_h, int grid_w, int height, int width, int stride,

                   std::vector<float> &boxes, std::vector<float> &boxScores, std::vector<int> &classId,

                   float threshold, int8_t fl, MODEL_TYPE yolo)

{

    int validCount = 0;

    int grid_len = grid_h * grid_w;

    float thres = threshold;

    int8_t thres_i8 = qnt_f32_to_i8(thres, fl);

    // printf("threash %f\n", thres);

    // printf("thres_u8 %u\n", thres_u8);

    // printf("scale %f\n", scale);

    // printf("zp %u\n", zp);



    for (int a = 0; a < anchor_per_branch; a++)

    {

        for (int i = 0; i < grid_h; i++)

        {

            for (int j = 0; j < grid_w; j++)

            {

                int8_t box_confidence = input[(PROP_BOX_SIZE * a + 4) * grid_len + i * grid_w + j];

                if (box_confidence >= thres_i8)

                {

                    // printf("box_conf %u, thres_u8 %u\n", box_confidence, thres_u8);

                    int offset = (PROP_BOX_SIZE * a) * grid_len + i * grid_w + j;

                    int8_t *in_ptr = input + offset;



                    int8_t maxClassProbs = in_ptr[5 * grid_len];

                    int maxClassId = 0;

                    for (int k = 1; k < OBJ_CLASS_NUM; ++k)

                    {

                        int8_t prob = in_ptr[(5 + k) * grid_len];

                        if (prob > maxClassProbs)

                        {

                            maxClassId = k;

                            maxClassProbs = prob;

                        }

                    }



                    float box_conf_f32 = deqnt_i8_affine_to_f32(box_confidence, fl);

                    float class_prob_f32 = deqnt_i8_affine_to_f32(maxClassProbs, fl);

                    float limit_score = 0;

                    limit_score = box_conf_f32* class_prob_f32;



                    // printf("limit score: %f\n", limit_score);

                    if (limit_score > threshold){

                        float box_x, box_y, box_w, box_h;

                        if(yolo == YOLOX){

                            box_x = deqnt_i8_affine_to_f32(*in_ptr, fl);

                            box_y = deqnt_i8_affine_to_f32(in_ptr[grid_len], fl);

                            box_w = deqnt_i8_affine_to_f32(in_ptr[2 * grid_len], fl);

                            box_h = deqnt_i8_affine_to_f32(in_ptr[3 * grid_len], fl);

                            box_w = exp(box_w)* stride;

                            box_h = exp(box_h)* stride;

                        }   

                        else{

                            box_x = deqnt_i8_affine_to_f32(*in_ptr, fl) * 2.0 - 0.5;

                            box_y = deqnt_i8_affine_to_f32(in_ptr[grid_len], fl) * 2.0 - 0.5;

                            box_w = deqnt_i8_affine_to_f32(in_ptr[2 * grid_len], fl) * 2.0;

                            box_h = deqnt_i8_affine_to_f32(in_ptr[3 * grid_len], fl) * 2.0;

                            box_w = box_w * box_w;

                            box_h = box_h * box_h;

                        }

                        box_x = (box_x + j) * (float)stride;

                        box_y = (box_y + i) * (float)stride;

                        box_w *= (float)anchor[a * 2];

                        box_h *= (float)anchor[a * 2 + 1];

                        box_x -= (box_w / 2.0);

                        box_y -= (box_h / 2.0);



                        boxes.push_back(box_x);

                        boxes.push_back(box_y);

                        boxes.push_back(box_w);

                        boxes.push_back(box_h);

                        boxScores.push_back(box_conf_f32* class_prob_f32);

                        classId.push_back(maxClassId);

                        validCount++;

                    }

                }

            }

        }

    }

    return validCount;

}

复制代码

请问问题出在哪里？或者要怎么在RV1126板子上实现int8模型的C语言部署？谢谢！