Toybrick

标题: pytorch 转换时报错 [打印本页]

作者: JasonZhu    时间: 2020-3-23 19:37
标题: pytorch 转换时报错
rknn 1.3  pytorch 报错,如下:
--> Building model
E Catch exception when building RKNN model!
E Traceback (most recent call last):
E   File "/root/miniconda2/envs/rknn-1.3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1626, in _create_c_op
E     c_op = c_api.TF_FinishOperation(op_desc)
E tensorflow.python.framework.errors_impl.InvalidArgumentError: Dimensions must be equal, but are 2 and 8 for 'add_at_input134.1_365/Add' (op: 'Add') with input shapes: [1,1,2,128], [1,4,8,128].
E During handling of the above exception, another exception occurred:
E Traceback (most recent call last):
E   File "rknn/api/rknn_base.py", line 737, in rknn.api.rknn_base.RKNNBase.build
E   File "rknn/api/rknn_base.py", line 1644, in rknn.api.rknn_base.RKNNBase._quantize2
E   File "rknn/base/RKNNlib/app/medusa/quantization.py", line 105, in rknn.base.RKNNlib.app.medusa.quantization.Quantization.run
E   File "rknn/base/RKNNlib/app/medusa/quantization.py", line 44, in rknn.base.RKNNlib.app.medusa.quantization.Quantization._run_quantization
E   File "rknn/base/RKNNlib/app/medusa/workspace.py", line 129, in rknn.base.RKNNlib.app.medusa.workspace.Workspace.run
E   File "rknn/base/RKNNlib/app/medusa/workspace.py", line 99, in rknn.base.RKNNlib.app.medusa.workspace.Workspace._setup_graph
E   File "rknn/base/RKNNlib/app/medusa/workspace.py", line 100, in rknn.base.RKNNlib.app.medusa.workspace.Workspace._setup_graph
E   File "rknn/base/RKNNlib/RKNNnetbuilder.py", line 274, in rknn.base.RKNNlib.RKNNnetbuilder.RKNNNetBuilder.build
E   File "rknn/base/RKNNlib/RKNNnetbuilder.py", line 278, in rknn.base.RKNNlib.RKNNnetbuilder.RKNNNetBuilder.build
E   File "rknn/base/RKNNlib/RKNNnetbuilder.py", line 305, in rknn.base.RKNNlib.RKNNnetbuilder.RKNNNetBuilder.build_layer
E   File "rknn/base/RKNNlib/RKNNnetbuilder.py", line 305, in rknn.base.RKNNlib.RKNNnetbuilder.RKNNNetBuilder.build_layer
E   File "rknn/base/RKNNlib/RKNNnetbuilder.py", line 305, in rknn.base.RKNNlib.RKNNnetbuilder.RKNNNetBuilder.build_layer
E   [Previous line repeated 124 more times]
E   File "rknn/base/RKNNlib/RKNNnetbuilder.py", line 331, in rknn.base.RKNNlib.RKNNnetbuilder.RKNNNetBuilder.build_layer
E   File "rknn/base/RKNNlib/RKNNnetbuilder.py", line 336, in rknn.base.RKNNlib.RKNNnetbuilder.RKNNNetBuilder.build_layer
E   File "rknn/base/RKNNlib/layer/RKNNlayer.py", line 287, in rknn.base.RKNNlib.layer.RKNNlayer.RKNNLayer.compute_tensor
E   File "rknn/base/RKNNlib/layer/add.py", line 36, in rknn.base.RKNNlib.layer.add.Add.compute_out_tensor
E   File "/root/miniconda2/envs/rknn-1.3/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 301, in add
E     "Add", x=x, y=y, name=name)
E   File "/root/miniconda2/envs/rknn-1.3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
E     op_def=op_def)
E   File "/root/miniconda2/envs/rknn-1.3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
E     return func(*args, **kwargs)
E   File "/root/miniconda2/envs/rknn-1.3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3272, in create_op
E     op_def=op_def)
E   File "/root/miniconda2/envs/rknn-1.3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1790, in __init__
E     control_input_ops)
E   File "/root/miniconda2/envs/rknn-1.3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1629, in _create_c_op
E     raise ValueError(str(e))
E ValueError: Dimensions must be equal, but are 2 and 8 for 'add_at_input134.1_365/Add' (op: 'Add') with input shapes: [1,1,2,128], [1,4,8,128].
Build pytorch failed!


作者: JasonZhu    时间: 2020-3-23 19:39
#########################################################################
##
## Some utility for training, data processing, and network.
##
#########################################################################
import torch
import torch.nn as nn
from parameters import Parameters

p = Parameters()

def backward_hook(self, grad_input, grad_output):
    print('grad_input norm:', grad_input[0].data.norm())

def cross_entropy2d(inputs, target, weight=None, size_average=True):
    loss = torch.nn.CrossEntropyLoss()

    n, c, h, w = inputs.size()
    prediction = inputs.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c)
    gt =target.transpose(1, 2).transpose(2, 3).contiguous().view(-1)

    return loss(prediction, gt)

######################################################################
##
## Convolution layer modules
##
######################################################################
class Conv2D_BatchNorm_Relu(nn.Module):
    def __init__(self, in_channels, n_filters, k_size, padding, stride, bias=True, acti=True):
        super(Conv2D_BatchNorm_Relu, self).__init__()

        if acti:
            self.cbr_unit = nn.Sequential(nn.Conv2d(in_channels, n_filters, k_size,
                                                    padding=padding, stride=stride, bias=bias),
                                    nn.BatchNorm2d(n_filters),
                                    nn.ReLU(inplace=True),)
        else:
            self.cbr_unit = nn.Conv2d(in_channels, n_filters, k_size, padding=padding, stride=stride, bias=bias)

    def forward(self, inputs):
        outputs = self.cbr_unit(inputs)
        return outputs

class bottleneck(nn.Module):
    def __init__(self, in_channels, out_channels, acti=True):
        super(bottleneck, self).__init__()
        self.acti = acti
        temp_channels = in_channels//4
        if in_channels < 4:
            temp_channels = in_channels
        self.conv1 = Conv2D_BatchNorm_Relu(in_channels, temp_channels, 1, 0, 1)
        self.conv2 = Conv2D_BatchNorm_Relu(temp_channels, temp_channels, 3, 1, 1)
        self.conv3 = Conv2D_BatchNorm_Relu(temp_channels, out_channels, 1, 0, 1, acti = self.acti)

        self.residual = Conv2D_BatchNorm_Relu(in_channels, out_channels, 1, 0, 1)

    def forward(self, x):
        re = x

        out = self.conv1(x)
        out = self.conv2(out)
        out = self.conv3(out)
        if not self.acti:
            return out

        re = self.residual(x)
        out = out + re

        return out

class bottleneck_down(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(bottleneck_down, self).__init__()
        temp_channels = in_channels//4
        if in_channels < 4:
            temp_channels = in_channels
        self.conv1 = Conv2D_BatchNorm_Relu(in_channels, temp_channels, 1, 0, 1)
        self.conv2 = Conv2D_BatchNorm_Relu(temp_channels, temp_channels, 3, 1, 2)
        self.conv3 = Conv2D_BatchNorm_Relu(temp_channels, out_channels, 1, 0, 1)

        self.residual = Conv2D_BatchNorm_Relu(in_channels, out_channels, 3, 1, 2)

    def forward(self, x):
        re = x

        out = self.conv1(x)
        out = self.conv2(out)
        out = self.conv3(out)

        re = self.residual(x)

        out = out + re

        return out

class bottleneck_up(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(bottleneck_up, self).__init__()
        temp_channels = in_channels//4
        if in_channels < 4:
            temp_channels = in_channels
        self.conv1 = Conv2D_BatchNorm_Relu(in_channels, temp_channels,1,  0, 1)
        self.conv2 = nn.Sequential( nn.ConvTranspose2d(temp_channels, temp_channels, 3, 2, 1, 1),
                                        nn.BatchNorm2d(temp_channels),
                                        nn.ReLU() )
        self.conv3 = Conv2D_BatchNorm_Relu(temp_channels, out_channels, 1, 0, 1)

        self.residual = nn.Sequential( nn.ConvTranspose2d(in_channels, out_channels, 3, 2, 1, 1),
                                        nn.BatchNorm2d(out_channels),
                                        nn.ReLU() )

    def forward(self, x):
        re = x

        out = self.conv1(x)
        out = self.conv2(out)
        out = self.conv3(out)
        
        re = self.residual(re)

        out = out + re

        return out

class Output(nn.Module):
    def __init__(self, in_size, out_size):
        super(Output, self).__init__()
        self.conv = bottleneck(in_size, out_size, acti=False)

    def forward(self, inputs):
        outputs = self.conv(inputs)
        return outputs

class hourglass_same(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(hourglass_same, self).__init__()
        self.down1 = bottleneck_down(in_channels, out_channels)
        self.down2 = bottleneck_down(out_channels, out_channels)
        self.down3 = bottleneck_down(out_channels, out_channels)
        self.down4 = bottleneck_down(out_channels, out_channels)

        self.same1 = bottleneck(out_channels, out_channels)
        self.same2 = bottleneck(out_channels, out_channels)

        self.up2 = bottleneck_up(out_channels, out_channels)
        self.up3 = bottleneck_up(out_channels, out_channels)
        self.up4 = bottleneck_up(out_channels, out_channels)
        self.up5 = bottleneck_up(out_channels, out_channels)

        self.residual1 = bottleneck_down(in_channels, out_channels)
        self.residual2 = bottleneck_down(out_channels, out_channels)
        self.residual3 = bottleneck_down(out_channels, out_channels)
        self.residual4 = bottleneck_down(out_channels, out_channels)

    def forward(self, inputs):
        outputs1 = self.down1(inputs)  # 512*256 -> 256*128
        outputs2 = self.down2(outputs1)  # 256*128 -> 128*64
        outputs3 = self.down3(outputs2)  # 128*64 -> 64*32
        outputs4 = self.down4(outputs3)  # 64*32 -> 32*16

        outputs = self.same1(outputs4)  # 16*8 -> 16*8
        outputs = self.same2(outputs)  # 16*8 -> 16*8
        
        outputs = self.up2(outputs + self.residual4(outputs3))  # 32*16 -> 64*32
        outputs = self.up3(outputs + self.residual3(outputs2))  # 64*32 -> 128*64
        outputs = self.up4(outputs + self.residual2(outputs1))  # 128*64 -> 256*128
        outputs = self.up5(outputs + self.residual1(inputs))  # 256*128 -> 512*256

        return outputs   

class resize_layer(nn.Module):
    def __init__(self, in_channels, out_channels, acti = True):
        super(resize_layer, self).__init__()
        self.conv = Conv2D_BatchNorm_Relu(in_channels, out_channels//2, 7, 3, 2)
        self.maxpool = nn.MaxPool2d(2, 2)
        self.re1 = bottleneck(out_channels//2, out_channels//2)
        self.re2 = bottleneck(out_channels//2, out_channels//2)
        self.re3 = bottleneck(out_channels//2, out_channels)

    def forward(self, inputs):
        outputs = self.conv(inputs)
        outputs = self.re1(outputs)
        outputs = self.maxpool(outputs)
        outputs = self.re2(outputs)
        outputs = self.maxpool(outputs)
        outputs = self.re3(outputs)

        return outputs   

class hourglass_block(nn.Module):
    def __init__(self, in_channels, out_channels, acti = True, input_re=True):
        super(hourglass_block, self).__init__()
        self.layer1 = hourglass_same(in_channels, out_channels)
        self.re1 = bottleneck(out_channels, out_channels)
        self.re2 = bottleneck(out_channels, out_channels)
        self.re3 = bottleneck(1, out_channels)  

        self.out_confidence = Output(out_channels, 1)      
        self.out_offset = Output(out_channels, 2)      
        self.out_instance = Output(out_channels, p.feature_size)  
        self.input_re = input_re   

    def forward(self, inputs):
        outputs = self.layer1(inputs)
        outputs = self.re1(outputs)

        out_confidence = self.out_confidence(outputs)
        out_offset = self.out_offset(outputs)
        out_instance = self.out_instance(outputs)

        out = out_confidence

        outputs = self.re2(outputs)
        out = self.re3(out)

        if self.input_re:
            outputs = outputs + out + inputs
        else:
            outputs = outputs + out

        return [out_confidence, out_offset, out_instance], outputs

作者: jefferyzhang    时间: 2020-3-24 08:38
E   File "/root/miniconda2/envs/rknn-1.3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1629, in _create_c_op
E     raise ValueError(str(e))

这个报错都错在tensorflow里,与rknn无关的。先检查你自己的pb文件能不能被当前这个版本tensorflow读出来并推理。
作者: JasonZhu    时间: 2020-3-24 17:49
jefferyzhang 发表于 2020-3-24 08:38
E   File "/root/miniconda2/envs/rknn-1.3/lib/python3.6/site-packages/tensorflow/python/framework/ops ...

我的模型是pytorch的,可以用tensorflow 加载?
作者: jefferyzhang    时间: 2020-3-24 17:53
JasonZhu 发表于 2020-3-24 17:49
我的模型是pytorch的,可以用tensorflow 加载?

那这就尴尬了,请把模型和转换脚本一起发给我下,我报给NPU部门debug
作者: JasonZhu    时间: 2020-3-24 18:00
jefferyzhang 发表于 2020-3-24 17:53
那这就尴尬了,请把模型和转换脚本一起发给我下,我报给NPU部门debug

我现在redmine上提上去看看,多谢版主
作者: jefferyzhang    时间: 2020-3-25 08:17
JasonZhu 发表于 2020-3-24 18:00
我现在redmine上提上去看看,多谢版主

嗯,好的
作者: Sean    时间: 2020-3-25 09:13
也遇到了同样问题,楼主能加个wx聊聊吗?

--> Building model
E Catch exception when building RKNN model!
E Traceback (most recent call last):
E   File "/home/user/.local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1567, in _create_c_op
E     c_op = c_api.TF_FinishOperation(op_desc)
E tensorflow.python.framework.errors_impl.InvalidArgumentError: Dimension 1 in both shapes must be equal, but are 6 and 26. Shapes are [1,6,6] and [1,26,26]. for 'cat_at_2304_1/cat_at_2304_1' (op: 'ConcatV2') with input shapes: [1,6,6,256], [1,26,26,512], [] and with computed input tensors: input[2] = <3>.
E During handling of the above exception, another exception occurred:
E Traceback (most recent call last):
E   File "rknn/api/rknn_base.py", line 737, in rknn.api.rknn_base.RKNNBase.build
E   File "rknn/api/rknn_base.py", line 1644, in rknn.api.rknn_base.RKNNBase._quantize2
E   File "rknn/base/RKNNlib/app/medusa/quantization.py", line 105, in rknn.base.RKNNlib.app.medusa.quantization.Quantization.run
E   File "rknn/base/RKNNlib/app/medusa/quantization.py", line 44, in rknn.base.RKNNlib.app.medusa.quantization.Quantization._run_quantization
E   File "rknn/base/RKNNlib/app/medusa/workspace.py", line 129, in rknn.base.RKNNlib.app.medusa.workspace.Workspace.run
E   File "rknn/base/RKNNlib/app/medusa/workspace.py", line 99, in rknn.base.RKNNlib.app.medusa.workspace.Workspace._setup_graph
E   File "rknn/base/RKNNlib/app/medusa/workspace.py", line 100, in rknn.base.RKNNlib.app.medusa.workspace.Workspace._setup_graph
E   File "rknn/base/RKNNlib/RKNNnetbuilder.py", line 274, in rknn.base.RKNNlib.RKNNnetbuilder.RKNNNetBuilder.build
E   File "rknn/base/RKNNlib/RKNNnetbuilder.py", line 278, in rknn.base.RKNNlib.RKNNnetbuilder.RKNNNetBuilder.build
E   File "rknn/base/RKNNlib/RKNNnetbuilder.py", line 305, in rknn.base.RKNNlib.RKNNnetbuilder.RKNNNetBuilder.build_layer
E   File "rknn/base/RKNNlib/RKNNnetbuilder.py", line 331, in rknn.base.RKNNlib.RKNNnetbuilder.RKNNNetBuilder.build_layer
E   File "rknn/base/RKNNlib/RKNNnetbuilder.py", line 336, in rknn.base.RKNNlib.RKNNnetbuilder.RKNNNetBuilder.build_layer
E   File "rknn/base/RKNNlib/layer/RKNNlayer.py", line 287, in rknn.base.RKNNlib.layer.RKNNlayer.RKNNLayer.compute_tensor
E   File "rknn/base/RKNNlib/layer/concat.py", line 44, in rknn.base.RKNNlib.layer.concat.Concat.compute_out_tensor
E   File "/home/user/.local/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1189, in concat
E     return gen_array_ops.concat_v2(values=values, axis=axis, name=name)
E   File "/home/user/.local/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 953, in concat_v2
E     "ConcatV2", values=values, axis=axis, name=name)
E   File "/home/user/.local/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
E     op_def=op_def)
E   File "/home/user/.local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
E     op_def=op_def)
E   File "/home/user/.local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1734, in __init__
E     control_input_ops)
E   File "/home/user/.local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1570, in _create_c_op
E     raise ValueError(str(e))
E ValueError: Dimension 1 in both shapes must be equal, but are 6 and 26. Shapes are [1,6,6] and [1,26,26]. for 'cat_at_2304_1/cat_at_2304_1' (op: 'ConcatV2') with input shapes: [1,6,6,256], [1,26,26,512], [] and with computed input tensors: input[2] = <3>.
Build pytorch failed!
作者: Sean    时间: 2020-3-25 09:14
Sean 发表于 2020-3-25 09:13
也遇到了同样问题,楼主能加个wx聊聊吗?

--> Building model

用的是torch的yolo3代码




欢迎光临 Toybrick (http://t.rock-chips.com/) Powered by Discuz! X3.3