install_bs (NVIDIA-NeMo#7019)

karpnv · zhehuaichen · commit a8123c2e640d · 2023-10-04T07:13:00.000-07:00
Signed-off-by: Nikolay Karpov &lt;karpnv@gmail.com&gt;
diff --git a/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh b/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh
@@ -14,6 +14,8 @@
 # limitations under the License.
 
 # Use this script to install KenLM, OpenSeq2Seq decoder, Flashlight decoder
+shopt -s expand_aliases
+
 NEMO_PATH=/workspace/nemo  # Path to NeMo folder: /workspace/nemo if you use NeMo/Dockerfile
 if [ "$#" -eq 1 ]
 then
@@ -24,10 +26,14 @@ KENLM_MAX_ORDER=10 # Maximum order of KenLM model, also specified in the setup_o
 cd $NEMO_PATH
 
 if [ $(id -u) -eq 0 ]; then
-  alias sudo=eval
+   alias aptupdate='apt-get update'
+   alias b2install='./b2'
+ else
+   alias aptupdate='sudo apt-get update'
+   alias b2install='sudo ./b2'
 fi
 
-sudo apt-get update && apt-get upgrade -y && apt-get install -y liblzma-dev && rm -rf /var/lib/apt/lists/* # liblzma needed for flashlight decoder
+aptupdate && apt-get upgrade -y && apt-get install -y liblzma-dev && rm -rf /var/lib/apt/lists/* # liblzma needed for flashlight decoder
 
 
 git clone https://github.com/NVIDIA/OpenSeq2Seq
@@ -42,7 +48,7 @@ cp $NEMO_PATH/scripts/installers/setup_os2s_decoders.py ./setup.py
 ./setup.sh
 
 # install Boost package for KenLM
-wget https://boostorg.jfrog.io/artifactory/main/release/1.80.0/source/boost_1_80_0.tar.bz2 --no-check-certificate && tar --bzip2 -xf $NEMO_PATH/decoders/boost_1_80_0.tar.bz2 && cd boost_1_80_0 && ./bootstrap.sh && sudo ./b2 --layout=tagged link=static,shared threading=multi,single install -j4 || echo FAILURE
+wget https://boostorg.jfrog.io/artifactory/main/release/1.80.0/source/boost_1_80_0.tar.bz2 --no-check-certificate && tar --bzip2 -xf $NEMO_PATH/decoders/boost_1_80_0.tar.bz2 && cd boost_1_80_0 && ./bootstrap.sh && b2install --layout=tagged link=static,shared threading=multi,single install -j4 || echo FAILURE
 export BOOST_ROOT=$NEMO_PATH/decoders/boost_1_80_0
 
 # install KenLM
diff --git a/tutorials/asr/Offline_ASR.ipynb b/tutorials/asr/Offline_ASR.ipynb
@@ -3,9 +3,7 @@
   "nbformat_minor": 0,
   "metadata": {
     "colab": {
-      "name": "Offline_ASR.ipynb",
       "provenance": [],
-      "collapsed_sections": [],
       "toc_visible": true
     },
     "kernelspec": {
@@ -31,7 +29,9 @@
         "\n",
         "You may find more info on how to train and use language models for ASR models here:\n",
         "https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/asr/asr_language_modeling.html\n",
-        "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n"
+        "\n",
+        "\n",
+        "NOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n"
       ]
     },
     {
@@ -41,7 +41,7 @@
       },
       "source": [
         "## Installation\n",
-        "NeMo can be installed via simple pip command. \n",
+        "NeMo can be installed via simple pip command.\n",
         "\n",
         "Optional CTC beam search decoder might require restart of Colab runtime after installation."
       ]
@@ -52,7 +52,7 @@
         "id": "I9eIxAyKHREB"
       },
       "source": [
-        "BRANCH = 'r1.20.0'\n",
+        "BRANCH = 'main'\n",
         "try:\n",
         "    # Import NeMo Speech Recognition collection\n",
         "    import nemo.collections.asr as nemo_asr\n",
@@ -77,12 +77,14 @@
         "    import ctc_decoders\n",
         "except ModuleNotFoundError:\n",
         "    # install beam search decoder\n",
+        "    import os\n",
         "    !apt-get update && apt-get install -y swig\n",
         "    !git clone https://github.com/NVIDIA/NeMo -b \"$BRANCH\"\n",
-        "    !cd NeMo && bash scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh\n",
+        "    pwd = !pwd\n",
+        "    NEMO_PATH = os.path.join(pwd[0], \"NeMo\")\n",
+        "    !cd NeMo && bash scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh $NEMO_PATH\n",
         "    print('Restarting Colab runtime to successfully import built module.')\n",
         "    print('Please re-run the notebook.')\n",
-        "    import os\n",
         "    os.kill(os.getpid(), 9)"
       ],
       "execution_count": null,
@@ -110,7 +112,7 @@
       },
       "source": [
         "## Instantiate pre-trained NeMo model\n",
-        "``from_pretrained(...)`` API downloads and initializes model directly from the cloud. \n",
+        "``from_pretrained(...)`` API downloads and initializes model directly from the cloud.\n",
         "\n",
         "Alternatively, ``restore_from(...)`` allows loading a model from a disk.\n",
         "\n",
@@ -403,7 +405,7 @@
       "source": [
         "## Offline inference with beam search decoder and N-gram language model re-scoring\n",
         "\n",
-        "It is possible to use an external [KenLM](https://kheafield.com/code/kenlm/)-based N-gram language model to rescore multiple transcription candidates. \n",
+        "It is possible to use an external [KenLM](https://kheafield.com/code/kenlm/)-based N-gram language model to rescore multiple transcription candidates.\n",
         "\n",
         "Let's download and preprocess LibriSpeech 3-gram language model."
       ]
@@ -653,4 +655,4 @@
       "outputs": []
     }
   ]
-}
+}