Merge pull request #246 from xinghai-sun/doc2

Rewrite REAME.md for DS2 and update examples.
PaddlePaddle · Sep 13, 2017 · 848bb8a · 848bb8a
2 parents 717ccf5 + f071bc8
commit 848bb8a
Show file tree

Hide file tree

Showing 39 changed files with 1,067 additions and 223 deletions.
diff --git a/deep_speech_2/.gitignore b/deep_speech_2/.gitignore
diff --git a/deep_speech_2/README.md b/deep_speech_2/README.md
diff --git a/deep_speech_2/data/librispeech/eng_vocab.txt b/deep_speech_2/data/librispeech/eng_vocab.txt
diff --git a/deep_speech_2/data/librispeech/librispeech.py b/deep_speech_2/data/librispeech/librispeech.py
@@ -19,8 +19,6 @@
 import codecs
 from paddle.v2.dataset.common import md5file
 
-DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
-
 URL_ROOT = "http://www.openslr.org/resources/12"
 URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz"
 URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz"
@@ -41,7 +39,7 @@
 parser = argparse.ArgumentParser(description=__doc__)
 parser.add_argument(
     "--target_dir",
-    default=DATA_HOME + "/Libri",
+    default='~/.cache/paddle/dataset/speech/libri',
     type=str,
     help="Directory to save the dataset. (default: %(default)s)")
 parser.add_argument(
@@ -60,8 +58,7 @@
 
 
 def download(url, md5sum, target_dir):
-    """
-    Download file from url to target_dir, and check md5sum.
+    """Download file from url to target_dir, and check md5sum.
     """
     if not os.path.exists(target_dir): os.makedirs(target_dir)
     filepath = os.path.join(target_dir, url.split("/")[-1])
@@ -77,8 +74,7 @@ def download(url, md5sum, target_dir):
 
 
 def unpack(filepath, target_dir):
-    """
-    Unpack the file to the target_dir.
+    """Unpack the file to the target_dir.
     """
     print("Unpacking %s ..." % filepath)
     tar = tarfile.open(filepath)
@@ -87,8 +83,7 @@ def unpack(filepath, target_dir):
 
 
 def create_manifest(data_dir, manifest_path):
-    """
-    Create a manifest json file summarizing the data set, with each line
+    """Create a manifest json file summarizing the data set, with each line
     containing the meta data (i.e. audio filepath, transcription text, audio
     duration) of each audio file within the data set.
     """
@@ -119,8 +114,7 @@ def create_manifest(data_dir, manifest_path):
 
 
 def prepare_dataset(url, md5sum, target_dir, manifest_path):
-    """
-    Download, unpack and create summmary manifest file.
+    """Download, unpack and create summmary manifest file.
     """
     if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
         # download
@@ -135,6 +129,8 @@ def prepare_dataset(url, md5sum, target_dir, manifest_path):
 
 
 def main():
+    args.target_dir = os.path.expanduser(args.target_dir)
+
     prepare_dataset(
         url=URL_TEST_CLEAN,
         md5sum=MD5_TEST_CLEAN,
@@ -145,12 +141,12 @@ def main():
         md5sum=MD5_DEV_CLEAN,
         target_dir=os.path.join(args.target_dir, "dev-clean"),
         manifest_path=args.manifest_prefix + ".dev-clean")
-    prepare_dataset(
-        url=URL_TRAIN_CLEAN_100,
-        md5sum=MD5_TRAIN_CLEAN_100,
-        target_dir=os.path.join(args.target_dir, "train-clean-100"),
-        manifest_path=args.manifest_prefix + ".train-clean-100")
     if args.full_download:
+        prepare_dataset(
+            url=URL_TRAIN_CLEAN_100,
+            md5sum=MD5_TRAIN_CLEAN_100,
+            target_dir=os.path.join(args.target_dir, "train-clean-100"),
+            manifest_path=args.manifest_prefix + ".train-clean-100")
         prepare_dataset(
             url=URL_TEST_OTHER,
             md5sum=MD5_TEST_OTHER,

diff --git a/deep_speech_2/deploy/demo_server.py b/deep_speech_2/deploy/demo_server.py
@@ -11,7 +11,7 @@
 import paddle.v2 as paddle
 import _init_paths
 from data_utils.data import DataGenerator
-from models.model import DeepSpeech2Model
+from model_utils.model import DeepSpeech2Model
 from data_utils.utils import read_manifest
 from utils.utility import add_arguments, print_arguments
 
@@ -46,7 +46,7 @@
         'data/librispeech/eng_vocab.txt',
         "Filepath of vocabulary.")
 add_arg('model_path',       str,
-        './checkpoints/params.latest.tar.gz',
+        './checkpoints/libri/params.latest.tar.gz',
         "If None, the training starts from scratch, "
         "otherwise, it resumes from the pre-trained model.")
 add_arg('lang_model_path',  str,

diff --git a/deep_speech_2/examples/librispeech/generate.sh b/deep_speech_2/examples/librispeech/generate.sh
diff --git a/...ch_2/examples/librispeech/prepare_data.sh → ...speech_2/examples/librispeech/run_data.sh b/...ch_2/examples/librispeech/prepare_data.sh → ...speech_2/examples/librispeech/run_data.sh
@@ -1,19 +1,31 @@
 #! /usr/bin/bash
 
-pushd ../..
+pushd ../.. > /dev/null
 
 # download data, generate manifests
 python data/librispeech/librispeech.py \
 --manifest_prefix='data/librispeech/manifest' \
---full_download='True' \
---target_dir=$HOME'/.cache/paddle/dataset/speech/Libri'
+--target_dir='~/.cache/paddle/dataset/speech/Libri' \
+--full_download='True'
 
 if [ $? -ne 0 ]; then
     echo "Prepare LibriSpeech failed. Terminated."
     exit 1
 fi
 
-#cat data/librispeech/manifest.train* | shuf > data/librispeech/manifest.train
+cat data/librispeech/manifest.train-* | shuf > data/librispeech/manifest.train
+
+
+# build vocabulary
+python tools/build_vocab.py \
+--count_threshold=0 \
+--vocab_path='data/librispeech/vocab.txt' \
+--manifest_paths='data/librispeech/manifest.train'
+
+if [ $? -ne 0 ]; then
+    echo "Build vocabulary failed. Terminated."
+    exit 1
+fi
 
 
 # compute mean and stddev for normalizer
@@ -30,3 +42,4 @@ fi
 
 
 echo "LibriSpeech Data preparation done."
+exit 0
diff --git a/deep_speech_2/examples/librispeech/run_infer.sh b/deep_speech_2/examples/librispeech/run_infer.sh
@@ -0,0 +1,45 @@
+#! /usr/bin/bash
+
+pushd ../.. > /dev/null
+
+# download language model
+pushd models/lm > /dev/null
+sh download_lm_en.sh
+if [ $? -ne 0 ]; then
+    exit 1
+fi
+popd > /dev/null
+
+
+# infer
+CUDA_VISIBLE_DEVICES=0 \
+python -u infer.py \
+--num_samples=10 \
+--trainer_count=1 \
+--beam_size=500 \
+--num_proc_bsearch=8 \
+--num_conv_layers=2 \
+--num_rnn_layers=3 \
+--rnn_layer_size=2048 \
+--alpha=0.36 \
+--beta=0.25 \
+--cutoff_prob=0.99 \
+--use_gru=False \
+--use_gpu=True \
+--share_rnn_weights=True \
+--infer_manifest='data/librispeech/manifest.test-clean' \
+--mean_std_path='data/librispeech/mean_std.npz' \
+--vocab_path='data/librispeech/vocab.txt' \
+--model_path='checkpoints/libri/params.latest.tar.gz' \
+--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
+--decoding_method='ctc_beam_search' \
+--error_rate_type='wer' \
+--specgram_type='linear'
+
+if [ $? -ne 0 ]; then
+    echo "Failed in inference!"
+    exit 1
+fi
+
+
+exit 0
diff --git a/deep_speech_2/examples/librispeech/run_infer_golden.sh b/deep_speech_2/examples/librispeech/run_infer_golden.sh
@@ -0,0 +1,54 @@
+#! /usr/bin/bash
+
+pushd ../.. > /dev/null
+
+# download language model
+pushd models/lm > /dev/null
+sh download_lm_en.sh
+if [ $? -ne 0 ]; then
+    exit 1
+fi
+popd > /dev/null
+
+
+# download well-trained model
+pushd models/librispeech > /dev/null
+sh download_model.sh
+if [ $? -ne 0 ]; then
+    exit 1
+fi
+popd > /dev/null
+
+
+# infer
+CUDA_VISIBLE_DEVICES=0 \
+python -u infer.py \
+--num_samples=10 \
+--trainer_count=1 \
+--beam_size=500 \
+--num_proc_bsearch=8 \
+--num_conv_layers=2 \
+--num_rnn_layers=3 \
+--rnn_layer_size=2048 \
+--alpha=0.36 \
+--beta=0.25 \
+--cutoff_prob=0.99 \
+--use_gru=False \
+--use_gpu=True \
+--share_rnn_weights=True \
+--infer_manifest='data/tiny/manifest.test-clean' \
+--mean_std_path='models/librispeech/mean_std.npz' \
+--vocab_path='models/librispeech/vocab.txt' \
+--model_path='models/librispeech/params.tar.gz' \
+--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
+--decoding_method='ctc_beam_search' \
+--error_rate_type='wer' \
+--specgram_type='linear'
+
+if [ $? -ne 0 ]; then
+    echo "Failed in inference!"
+    exit 1
+fi
+
+
+exit 0
diff --git a/deep_speech_2/examples/librispeech/run_test.sh b/deep_speech_2/examples/librispeech/run_test.sh
@@ -1,14 +1,24 @@
 #! /usr/bin/bash
 
-pushd ../..
+pushd ../.. > /dev/null
 
+# download language model
+pushd models/lm > /dev/null
+sh download_lm_en.sh
+if [ $? -ne 0 ]; then
+    exit 1
+fi
+popd > /dev/null
+
+
+# evaluate model
 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python -u evaluate.py \
+python -u test.py \
 --batch_size=128 \
 --trainer_count=8 \
 --beam_size=500 \
---num_proc_bsearch=12 \
---num_proc_data=12 \
+--num_proc_bsearch=8 \
+--num_proc_data=4 \
 --num_conv_layers=2 \
 --num_rnn_layers=3 \
 --rnn_layer_size=2048 \
@@ -20,9 +30,17 @@ python -u evaluate.py \
 --share_rnn_weights=True \
 --test_manifest='data/librispeech/manifest.test-clean' \
 --mean_std_path='data/librispeech/mean_std.npz' \
---vocab_path='data/librispeech/eng_vocab.txt' \
---model_path='checkpoints/params.latest.tar.gz' \
---lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \
+--vocab_path='data/librispeech/vocab.txt' \
+--model_path='checkpoints/libri/params.latest.tar.gz' \
+--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
 --decoding_method='ctc_beam_search' \
 --error_rate_type='wer' \
 --specgram_type='linear'
+
+if [ $? -ne 0 ]; then
+    echo "Failed in evaluation!"
+    exit 1
+fi
+
+
+exit 0