Skip to content

Commit

Permalink
Merge pull request #246 from xinghai-sun/doc2
Browse files Browse the repository at this point in the history
Rewrite REAME.md for DS2 and update examples.
  • Loading branch information
xinghai-sun authored Sep 13, 2017
2 parents 717ccf5 + f071bc8 commit 848bb8a
Show file tree
Hide file tree
Showing 39 changed files with 1,067 additions and 223 deletions.
3 changes: 0 additions & 3 deletions deep_speech_2/.gitignore

This file was deleted.

428 changes: 345 additions & 83 deletions deep_speech_2/README.md

Large diffs are not rendered by default.

28 changes: 0 additions & 28 deletions deep_speech_2/data/librispeech/eng_vocab.txt

This file was deleted.

28 changes: 12 additions & 16 deletions deep_speech_2/data/librispeech/librispeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
import codecs
from paddle.v2.dataset.common import md5file

DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')

URL_ROOT = "http://www.openslr.org/resources/12"
URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz"
URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz"
Expand All @@ -41,7 +39,7 @@
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--target_dir",
default=DATA_HOME + "/Libri",
default='~/.cache/paddle/dataset/speech/libri',
type=str,
help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument(
Expand All @@ -60,8 +58,7 @@


def download(url, md5sum, target_dir):
"""
Download file from url to target_dir, and check md5sum.
"""Download file from url to target_dir, and check md5sum.
"""
if not os.path.exists(target_dir): os.makedirs(target_dir)
filepath = os.path.join(target_dir, url.split("/")[-1])
Expand All @@ -77,8 +74,7 @@ def download(url, md5sum, target_dir):


def unpack(filepath, target_dir):
"""
Unpack the file to the target_dir.
"""Unpack the file to the target_dir.
"""
print("Unpacking %s ..." % filepath)
tar = tarfile.open(filepath)
Expand All @@ -87,8 +83,7 @@ def unpack(filepath, target_dir):


def create_manifest(data_dir, manifest_path):
"""
Create a manifest json file summarizing the data set, with each line
"""Create a manifest json file summarizing the data set, with each line
containing the meta data (i.e. audio filepath, transcription text, audio
duration) of each audio file within the data set.
"""
Expand Down Expand Up @@ -119,8 +114,7 @@ def create_manifest(data_dir, manifest_path):


def prepare_dataset(url, md5sum, target_dir, manifest_path):
"""
Download, unpack and create summmary manifest file.
"""Download, unpack and create summmary manifest file.
"""
if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
# download
Expand All @@ -135,6 +129,8 @@ def prepare_dataset(url, md5sum, target_dir, manifest_path):


def main():
args.target_dir = os.path.expanduser(args.target_dir)

prepare_dataset(
url=URL_TEST_CLEAN,
md5sum=MD5_TEST_CLEAN,
Expand All @@ -145,12 +141,12 @@ def main():
md5sum=MD5_DEV_CLEAN,
target_dir=os.path.join(args.target_dir, "dev-clean"),
manifest_path=args.manifest_prefix + ".dev-clean")
prepare_dataset(
url=URL_TRAIN_CLEAN_100,
md5sum=MD5_TRAIN_CLEAN_100,
target_dir=os.path.join(args.target_dir, "train-clean-100"),
manifest_path=args.manifest_prefix + ".train-clean-100")
if args.full_download:
prepare_dataset(
url=URL_TRAIN_CLEAN_100,
md5sum=MD5_TRAIN_CLEAN_100,
target_dir=os.path.join(args.target_dir, "train-clean-100"),
manifest_path=args.manifest_prefix + ".train-clean-100")
prepare_dataset(
url=URL_TEST_OTHER,
md5sum=MD5_TEST_OTHER,
Expand Down
4 changes: 2 additions & 2 deletions deep_speech_2/deploy/demo_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import paddle.v2 as paddle
import _init_paths
from data_utils.data import DataGenerator
from models.model import DeepSpeech2Model
from model_utils.model import DeepSpeech2Model
from data_utils.utils import read_manifest
from utils.utility import add_arguments, print_arguments

Expand Down Expand Up @@ -46,7 +46,7 @@
'data/librispeech/eng_vocab.txt',
"Filepath of vocabulary.")
add_arg('model_path', str,
'./checkpoints/params.latest.tar.gz',
'./checkpoints/libri/params.latest.tar.gz',
"If None, the training starts from scratch, "
"otherwise, it resumes from the pre-trained model.")
add_arg('lang_model_path', str,
Expand Down
28 changes: 0 additions & 28 deletions deep_speech_2/examples/librispeech/generate.sh

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,19 +1,31 @@
#! /usr/bin/bash

pushd ../..
pushd ../.. > /dev/null

# download data, generate manifests
python data/librispeech/librispeech.py \
--manifest_prefix='data/librispeech/manifest' \
--full_download='True' \
--target_dir=$HOME'/.cache/paddle/dataset/speech/Libri'
--target_dir='~/.cache/paddle/dataset/speech/Libri' \
--full_download='True'

if [ $? -ne 0 ]; then
echo "Prepare LibriSpeech failed. Terminated."
exit 1
fi

#cat data/librispeech/manifest.train* | shuf > data/librispeech/manifest.train
cat data/librispeech/manifest.train-* | shuf > data/librispeech/manifest.train


# build vocabulary
python tools/build_vocab.py \
--count_threshold=0 \
--vocab_path='data/librispeech/vocab.txt' \
--manifest_paths='data/librispeech/manifest.train'

if [ $? -ne 0 ]; then
echo "Build vocabulary failed. Terminated."
exit 1
fi


# compute mean and stddev for normalizer
Expand All @@ -30,3 +42,4 @@ fi


echo "LibriSpeech Data preparation done."
exit 0
45 changes: 45 additions & 0 deletions deep_speech_2/examples/librispeech/run_infer.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#! /usr/bin/bash

pushd ../.. > /dev/null

# download language model
pushd models/lm > /dev/null
sh download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
fi
popd > /dev/null


# infer
CUDA_VISIBLE_DEVICES=0 \
python -u infer.py \
--num_samples=10 \
--trainer_count=1 \
--beam_size=500 \
--num_proc_bsearch=8 \
--num_conv_layers=2 \
--num_rnn_layers=3 \
--rnn_layer_size=2048 \
--alpha=0.36 \
--beta=0.25 \
--cutoff_prob=0.99 \
--use_gru=False \
--use_gpu=True \
--share_rnn_weights=True \
--infer_manifest='data/librispeech/manifest.test-clean' \
--mean_std_path='data/librispeech/mean_std.npz' \
--vocab_path='data/librispeech/vocab.txt' \
--model_path='checkpoints/libri/params.latest.tar.gz' \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \
--error_rate_type='wer' \
--specgram_type='linear'

if [ $? -ne 0 ]; then
echo "Failed in inference!"
exit 1
fi


exit 0
54 changes: 54 additions & 0 deletions deep_speech_2/examples/librispeech/run_infer_golden.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#! /usr/bin/bash

pushd ../.. > /dev/null

# download language model
pushd models/lm > /dev/null
sh download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
fi
popd > /dev/null


# download well-trained model
pushd models/librispeech > /dev/null
sh download_model.sh
if [ $? -ne 0 ]; then
exit 1
fi
popd > /dev/null


# infer
CUDA_VISIBLE_DEVICES=0 \
python -u infer.py \
--num_samples=10 \
--trainer_count=1 \
--beam_size=500 \
--num_proc_bsearch=8 \
--num_conv_layers=2 \
--num_rnn_layers=3 \
--rnn_layer_size=2048 \
--alpha=0.36 \
--beta=0.25 \
--cutoff_prob=0.99 \
--use_gru=False \
--use_gpu=True \
--share_rnn_weights=True \
--infer_manifest='data/tiny/manifest.test-clean' \
--mean_std_path='models/librispeech/mean_std.npz' \
--vocab_path='models/librispeech/vocab.txt' \
--model_path='models/librispeech/params.tar.gz' \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \
--error_rate_type='wer' \
--specgram_type='linear'

if [ $? -ne 0 ]; then
echo "Failed in inference!"
exit 1
fi


exit 0
32 changes: 25 additions & 7 deletions deep_speech_2/examples/librispeech/run_test.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
#! /usr/bin/bash

pushd ../..
pushd ../.. > /dev/null

# download language model
pushd models/lm > /dev/null
sh download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
fi
popd > /dev/null


# evaluate model
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
python -u evaluate.py \
python -u test.py \
--batch_size=128 \
--trainer_count=8 \
--beam_size=500 \
--num_proc_bsearch=12 \
--num_proc_data=12 \
--num_proc_bsearch=8 \
--num_proc_data=4 \
--num_conv_layers=2 \
--num_rnn_layers=3 \
--rnn_layer_size=2048 \
Expand All @@ -20,9 +30,17 @@ python -u evaluate.py \
--share_rnn_weights=True \
--test_manifest='data/librispeech/manifest.test-clean' \
--mean_std_path='data/librispeech/mean_std.npz' \
--vocab_path='data/librispeech/eng_vocab.txt' \
--model_path='checkpoints/params.latest.tar.gz' \
--lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \
--vocab_path='data/librispeech/vocab.txt' \
--model_path='checkpoints/libri/params.latest.tar.gz' \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \
--error_rate_type='wer' \
--specgram_type='linear'

if [ $? -ne 0 ]; then
echo "Failed in evaluation!"
exit 1
fi


exit 0
Loading

0 comments on commit 848bb8a

Please sign in to comment.