Skip to content

Commit

Permalink
Add scripts to export streaming zipformer(v1) to RKNN (#1882)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Feb 27, 2025
1 parent 2ba665a commit db9fb8a
Show file tree
Hide file tree
Showing 7 changed files with 1,155 additions and 31 deletions.
26 changes: 23 additions & 3 deletions .github/scripts/docker/generate_build_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,17 @@ def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--min-torch-version",
help="Minimu torch version",
help="torch version",
)

parser.add_argument(
"--torch-version",
help="torch version",
)

parser.add_argument(
"--python-version",
help="python version",
)
return parser.parse_args()

Expand Down Expand Up @@ -52,7 +62,7 @@ def get_torchaudio_version(torch_version):
return torch_version


def get_matrix(min_torch_version):
def get_matrix(min_torch_version, specified_torch_version, specified_python_version):
k2_version = "1.24.4.dev20241029"
kaldifeat_version = "1.25.5.dev20241029"
version = "20241218"
Expand All @@ -71,6 +81,12 @@ def get_matrix(min_torch_version):
torch_version += ["2.5.0"]
torch_version += ["2.5.1"]

if specified_torch_version:
torch_version = [specified_torch_version]

if specified_python_version:
python_version = [specified_python_version]

matrix = []
for p in python_version:
for t in torch_version:
Expand Down Expand Up @@ -115,7 +131,11 @@ def get_matrix(min_torch_version):

def main():
args = get_args()
matrix = get_matrix(min_torch_version=args.min_torch_version)
matrix = get_matrix(
min_torch_version=args.min_torch_version,
specified_torch_version=args.torch_version,
specified_python_version=args.python_version,
)
print(json.dumps({"include": matrix}))


Expand Down
200 changes: 200 additions & 0 deletions .github/scripts/librispeech/ASR/run_rknn.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
#!/usr/bin/env bash

set -ex

python3 -m pip install kaldi-native-fbank soundfile librosa

log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

cd egs/librispeech/ASR


# https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed
# sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
function export_bilingual_zh_en() {
d=exp_zh_en

mkdir $d
pushd $d

curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/exp/pretrained.pt
mv pretrained.pt epoch-99.pt

curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/data/lang_char_bpe/tokens.txt

curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/0.wav
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/1.wav
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/2.wav
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/3.wav
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/4.wav
ls -lh
popd

./pruned_transducer_stateless7_streaming/export-onnx-zh.py \
--dynamic-batch 0 \
--enable-int8-quantization 0 \
--tokens $d/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $d/ \
--decode-chunk-len 64 \
--num-encoder-layers "2,4,3,2,4" \
--feedforward-dims "1024,1024,1536,1536,1024" \
--nhead "8,8,8,8,8" \
--encoder-dims "384,384,384,384,384" \
--attention-dims "192,192,192,192,192" \
--encoder-unmasked-dims "256,256,256,256,256" \
--zipformer-downsampling-factors "1,2,4,8,2" \
--cnn-module-kernels "31,31,31,31,31" \
--decoder-dim 512 \
--joiner-dim 512

ls -lh $d/

./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
--encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
--tokens $d/tokens.txt \
$d/0.wav

./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
--encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
--tokens $d/tokens.txt \
$d/1.wav

mkdir -p /icefall/rknn-models

for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
mkdir -p $platform

./pruned_transducer_stateless7_streaming/export_rknn.py \
--in-encoder $d/encoder-epoch-99-avg-1.onnx \
--in-decoder $d/decoder-epoch-99-avg-1.onnx \
--in-joiner $d/joiner-epoch-99-avg-1.onnx \
--out-encoder $platform/encoder.rknn \
--out-decoder $platform/decoder.rknn \
--out-joiner $platform/joiner.rknn \
--target-platform $platform 2>/dev/null

ls -lh $platform/

./pruned_transducer_stateless7_streaming/test_rknn_on_cpu_simulator.py \
--encoder $d/encoder-epoch-99-avg-1.onnx \
--decoder $d/decoder-epoch-99-avg-1.onnx \
--joiner $d/joiner-epoch-99-avg-1.onnx \
--tokens $d/tokens.txt \
--wav $d/0.wav

cp $d/tokens.txt $platform
cp $d/*.wav $platform

cp -av $platform /icefall/rknn-models
done

ls -lh /icefall/rknn-models
}

# https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t
# sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16
function export_bilingual_zh_en_small() {
d=exp_zh_en_small

mkdir $d
pushd $d

curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/exp/pretrained.pt
mv pretrained.pt epoch-99.pt

curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/data/lang_char_bpe/tokens.txt
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/0.wav
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/1.wav
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/2.wav
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/3.wav
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/4.wav

ls -lh

popd


./pruned_transducer_stateless7_streaming/export-onnx-zh.py \
--dynamic-batch 0 \
--enable-int8-quantization 0 \
--tokens $d/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $d/ \
--decode-chunk-len 64 \
\
--num-encoder-layers 2,2,2,2,2 \
--feedforward-dims 768,768,768,768,768 \
--nhead 4,4,4,4,4 \
--encoder-dims 256,256,256,256,256 \
--attention-dims 192,192,192,192,192 \
--encoder-unmasked-dims 192,192,192,192,192 \
\
--zipformer-downsampling-factors "1,2,4,8,2" \
--cnn-module-kernels "31,31,31,31,31" \
--decoder-dim 512 \
--joiner-dim 512

ls -lh $d/

./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
--encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
--tokens $d/tokens.txt \
$d/0.wav

./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
--encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
--tokens $d/tokens.txt \
$d/1.wav

mkdir -p /icefall/rknn-models-small

for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
mkdir -p $platform

./pruned_transducer_stateless7_streaming/export_rknn.py \
--in-encoder $d/encoder-epoch-99-avg-1.onnx \
--in-decoder $d/decoder-epoch-99-avg-1.onnx \
--in-joiner $d/joiner-epoch-99-avg-1.onnx \
--out-encoder $platform/encoder.rknn \
--out-decoder $platform/decoder.rknn \
--out-joiner $platform/joiner.rknn \
--target-platform $platform 2>/dev/null

ls -lh $platform/

./pruned_transducer_stateless7_streaming/test_rknn_on_cpu_simulator.py \
--encoder $d/encoder-epoch-99-avg-1.onnx \
--decoder $d/decoder-epoch-99-avg-1.onnx \
--joiner $d/joiner-epoch-99-avg-1.onnx \
--tokens $d/tokens.txt \
--wav $d/0.wav

cp $d/tokens.txt $platform
cp $d/*.wav $platform

cp -av $platform /icefall/rknn-models-small
done

ls -lh /icefall/rknn-models-small
}

export_bilingual_zh_en_small

export_bilingual_zh_en
Loading

0 comments on commit db9fb8a

Please sign in to comment.