|
进行模型转换的代码:
#!/usr/bin/env python3
import numpy as np
import re
import math
import random
import cv2
from rknn.api import RKNN
if __name__ == '__main__':
# Create RKNN object
rknn = RKNN(verbose=False, verbose_file='./speech_command_build.log')
# Config for Model Input PreProcess
#rknn.config(quantized_dtype='dynamic_fixed_point-8')
#rknn.config(quantized_dtype='asymmetric_quantized-u8')
rknn.config()
# Load TensorFlow Model
print('--> Loading model')
rknn.load_tensorflow(tf_pb='./my_frozen_graph.pb',
inputs=['Reshape'],
outputs=['labels_softmax'],
input_size_list=[[1,3920]])
print('done')
# Build Model
print('--> Building model')
#rknn.build(do_quantization=False, dataset='./dataset.txt', pre_compile=False)
rknn.build(do_quantization=False)
print('done')
# Export RKNN Model
#rknn.export_rknn('./speech_command_quantized.rknn')
rknn.export_rknn('./speech_new.rknn')
语音识别的运行时代码:
#!/usr/bin/env python3
import numpy as np
import re
import math
import random
import tensorflow as tf
from rknn.api import RKNN
import soundfile as sf
import cv2
import tensorflow as tf
from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio
from tensorflow.python.framework import graph_util
def load_labels(filename):
"""Read in labels, one label per line."""
return [line.rstrip() for line in tf.gfile.GFile(filename)]
if __name__ == '__main__':
# Create RKNN object
#rknn = RKNN(verbose=True, verbose_file='./speech_command_build.log')
rknn = RKNN()
# Load TensorFlow Model
print('--> Loading model')
ret = rknn.load_rknn(path='./speech_new.rknn')
if ret != 0:
print('load_rknn failed')
exit(ret)
print('load_rknn done')
print('--> Init runtime environment')
ret = rknn.init_runtime()
if ret != 0:
print('Init runtime environment failed')
exit(ret)
print('init_runtime done')
wav_file = open("on.wav", "rb")
wav_data = wav_file.read()
decoded_sample_data = contrib_audio.decode_wav(wav_data, desired_channels=1, desired_samples=16000, name='decoded_sample_data')
spectrogram = contrib_audio.audio_spectrogram(decoded_sample_data.audio, window_size=480, stride=160, magnitude_squared=True)
fingerprint_input = contrib_audio.mfcc(spectrogram, 16000, dct_coefficient_count=40)
with tf.Session() as sess:
fingerprint_input_npy = fingerprint_input.eval()
outputs, = rknn.inference(inputs=fingerprint_input_npy,data_type='float32')
print('inference done')
print('inference result: ', outputs)
labels = load_labels("./conv_labels.txt")
predictions = np.array(outputs)
top_k = predictions[0].argsort()[-3:][::-1]
for node_id in top_k:
human_string = labels[node_id]
score = predictions[0][node_id]
print('%s (score = %.5f)' % (human_string, score))
发现的问题:
1.可以正常进行转换成rknn模型,但是转换后,用此模型进行语音识别,发现识别的是完全错误的,但是tensorflow进行识别时,人家结果是正确的。
这里实在虚拟机上进行测试的,在rk1808的开发板上运行时,同样的代码和模型,每次运行的结果都不相同
这是什么原因导致的呢?希望能给个思路
|
本帖子中包含更多资源
您需要 登录 才可以下载或查看,没有帐号?立即注册
x
|