Making checklist optional (#5507)

* requirements and setup * allow for optional dependencies * fix script * update changelog * fix * fix format * fix * update ci * fix makefile * fix makefile * catch import errors * install everything * install all dependencies * update * add missing requirements file * oops, missing changes * fix ci, import warning * update key * fix * remove quotation marks * fix again * update cache prefix * update Makefile too * and docs too? * remove extra warnings * update dockerfile * suppress warning, mention in docs * one more fix to Dockerfile * fix test package * fix * Update README.md Co-authored-by: Pete <petew@allenai.org> * Update README.md Co-authored-by: Pete <petew@allenai.org> Co-authored-by: Pete <petew@allenai.org>
allenai · Feb 9, 2022 · 8226e87 · 8226e87
1 parent a76bf1e
commit 8226e87
Show file tree

Hide file tree

Showing 13 changed files with 186 additions and 62 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -22,7 +22,7 @@ env:
   TORCH_CPU_INSTALL: conda install pytorch torchvision torchaudio cpuonly -c pytorch
   TORCH_GPU_INSTALL: conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch
   # Change this to invalidate existing cache.
-  CACHE_PREFIX: v5
+  CACHE_PREFIX: v10
   # Disable tokenizers parallelism because this doesn't help, and can cause issues in distributed tests.
   TOKENIZERS_PARALLELISM: 'false'
   # Disable multithreading with OMP because this can lead to dead-locks in distributed tests.
@@ -146,7 +146,7 @@ jobs:
       id: virtualenv-cache
       with:
         path: .venv
-        key: ${{ env.CACHE_PREFIX }}-${{ env.WEEK_NUMBER }}-${{ runner.os }}-${{ env.RUNNER_ARCH }}-${{ env.PYTHON_VERSION }}-${{ matrix.task.torch_platform }}-${{ hashFiles('setup.py') }}-${{ hashFiles('dev-requirements.txt') }}
+        key: ${{ env.CACHE_PREFIX }}-${{ env.WEEK_NUMBER }}-${{ runner.os }}-${{ env.RUNNER_ARCH }}-${{ env.PYTHON_VERSION }}-${{ matrix.task.torch_platform }}-${{ hashFiles('setup.py') }}-${{ hashFiles('*requirements.txt') }}
 
     - name: Setup virtual environment (no cache hit)
       if: steps.virtualenv-cache.outputs.cache-hit != 'true'
@@ -159,7 +159,7 @@ jobs:
       if: steps.virtualenv-cache.outputs.cache-hit == 'true'
       run: |
         conda activate ./.venv
-        pip install --no-deps -e .
+        pip install --no-deps -e .[all]
         make download-extras
 
     - name: Pull and install models repo
@@ -170,7 +170,7 @@ jobs:
         conda activate ./.venv
         git clone /~https://github.com/allenai/allennlp-models.git
         cd allennlp-models
-        pip install -e . -r dev-requirements.txt
+        pip install -e .[dev,all]
 
     - name: Debug info
       run: |
@@ -271,7 +271,7 @@ jobs:
       id: virtualenv-cache
       with:
         path: .venv
-        key: ${{ env.CACHE_PREFIX }}-${{ env.WEEK_NUMBER }}-${{ runner.os }}-${{ env.RUNNER_ARCH }}-${{ env.PYTHON_VERSION }}-cpu-${{ hashFiles('setup.py') }}-${{ hashFiles('dev-requirements.txt') }}
+        key: ${{ env.CACHE_PREFIX }}-${{ env.WEEK_NUMBER }}-${{ runner.os }}-${{ env.RUNNER_ARCH }}-${{ env.PYTHON_VERSION }}-cpu-${{ hashFiles('setup.py') }}-${{ hashFiles('*requirements.txt') }}
 
     - name: Setup virtual environment (no cache hit)
       if: steps.virtualenv-cache.outputs.cache-hit != 'true'
@@ -284,7 +284,7 @@ jobs:
       if: steps.virtualenv-cache.outputs.cache-hit == 'true'
       run: |
         conda activate ./.venv
-        pip install --no-deps -e .
+        pip install --no-deps -e .[all]
         make download-extras
 
     - name: Debug info
@@ -369,7 +369,7 @@ jobs:
 
     - name: Install core package
       run: |
-        pip install $(ls dist/*.whl)
+        pip install $(ls dist/*.whl)[all]
 
     - name: Download NLTK prerequisites
       run: |
@@ -476,7 +476,7 @@ jobs:
       id: virtualenv-cache
       with:
         path: .venv
-        key: ${{ env.CACHE_PREFIX }}-${{ env.WEEK_NUMBER }}-${{ runner.os }}-${{ env.RUNNER_ARCH }}-${{ env.PYTHON_VERSION }}-cpu-${{ hashFiles('setup.py') }}-${{ hashFiles('dev-requirements.txt') }}
+        key: ${{ env.CACHE_PREFIX }}-${{ env.WEEK_NUMBER }}-${{ runner.os }}-${{ env.RUNNER_ARCH }}-${{ env.PYTHON_VERSION }}-cpu-${{ hashFiles('setup.py') }}-${{ hashFiles('*requirements.txt') }}
 
     - name: Setup virtual environment (no cache hit)
       if: steps.virtualenv-cache.outputs.cache-hit != 'true'
@@ -489,7 +489,7 @@ jobs:
       if: steps.virtualenv-cache.outputs.cache-hit == 'true'
       run: |
         conda activate ./.venv
-        pip install --no-deps -e .
+        pip install --no-deps -e .[all]
         make download-extras
 
     - name: Debug info

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -35,8 +35,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Removed
 
+- Removed dependency on the overrides package
 - Removed Tango components, since they now live at /~https://github.com/allenai/tango.
-- Removed dependency on the `overrides` package
+
+### Changed
+
+- Make `checklist` an optional dependency.
 
 ## [v2.8.0](/~https://github.com/allenai/allennlp/releases/tag/v2.8.0) - 2021-11-01
 

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -79,8 +79,7 @@ When you're ready to contribute code to address an open issue, please follow the
     Once your virtual environment is activated, you can install your local clone in "editable mode" with
 
         pip install -U pip setuptools wheel
-        pip install -e . 
-        pip install -r dev-requirements.txt
+        pip install -e .[dev,all] 
 
     The "editable mode" comes from the `-e` argument to `pip`, and essential just creates a symbolic link from the site-packages directory of your virtual environment to the source code in your local clone. That way any changes you make will be immediately reflected in your virtual environment.
 

diff --git a/Dockerfile b/Dockerfile
@@ -17,13 +17,15 @@ WORKDIR /stage/allennlp
 # necessary to install the dependencies.
 COPY allennlp/version.py allennlp/version.py
 COPY setup.py .
+COPY requirements.txt .
+COPY dev-requirements.txt .
 RUN touch allennlp/__init__.py \
     && touch README.md \
-    && pip install --no-cache-dir -e .
+    && pip install --no-cache-dir -e .[all]
 
 # Now add the full package source and re-install just the package.
 COPY allennlp allennlp
-RUN pip install --no-cache-dir --no-deps -e .
+RUN pip install --no-cache-dir --no-deps -e .[all]
 
 COPY Makefile .
 RUN make download-extras

diff --git a/Makefile b/Makefile
@@ -68,6 +68,24 @@ test :
 			--cov=$(SRC) \
 			--cov-report=xml
 
+.PHONY : test-without-checklist
+test-without-checklist :
+	pytest --color=yes -v -rf --durations=40 \
+			--cov-config=.coveragerc \
+			--cov=$(SRC) \
+			--cov-report=xml \
+			--ignore-glob=*checklist*
+
+.PHONY : test-checklist
+test-checklist :
+	pytest --color=yes -v -rf --durations=40 \
+			--cov-config=.coveragerc \
+			--cov=$(SRC) \
+			--cov-report=xml \
+			tests/ \
+			-k checklist
+
+
 .PHONY : gpu-tests
 gpu-tests : check-for-cuda
 	pytest --color=yes -v -rf --durations=20 \
@@ -95,7 +113,7 @@ install :
 	# python setup.py install_egg_info
 	# Install torch ecosystem first.
 	$(TORCH_INSTALL)
-	pip install -e . -r dev-requirements.txt
+	pip install -e .[dev,all]
 	# These nltk packages are used by the 'checklist' module.
 	$(NLTK_DOWNLOAD_CMD)
 

diff --git a/README.md b/README.md
@@ -197,6 +197,8 @@ It's recommended that you install the PyTorch ecosystem **before** installing Al
 
 After that, just run `pip install allennlp`.
 
+
+
 > ⚠️ If you're using Python 3.7 or greater, you should ensure that you don't have the PyPI version of `dataclasses` installed after running the above command, as this could cause issues on certain platforms. You can quickly check this by running `pip freeze | grep dataclasses`. If you see something like `dataclasses=0.6` in the output, then just run `pip uninstall -y dataclasses`.
 
 If you need pointers on setting up an appropriate Python environment or would like to install AllenNLP using a different method, see below.
@@ -229,6 +231,13 @@ Installing the library and dependencies is simple using `pip`.
 pip install allennlp
 ```
 
+To install the optional dependencies, such as `checklist`, run
+
+```bash
+pip install allennlp[checklist]
+```
+Or you can just install all optional dependencies with `pip install allennlp[all]`.
+
 *Looking for bleeding edge features? You can install nightly releases directly from [pypi](https://pypi.org/project/allennlp/#history)*
 
 AllenNLP installs a script when you install the python package, so you can run allennlp commands just by typing `allennlp` into a terminal.  For example, you can now test your installation with `allennlp test-install`.
@@ -300,8 +309,7 @@ Create a Python 3.7 or 3.8 virtual environment, and install AllenNLP in `editabl
 
 ```bash
 pip install -U pip setuptools wheel
-pip install --editable .
-pip install -r dev-requirements.txt
+pip install --editable .[dev,all]
 ```
 
 This will make `allennlp` available on your system but it will use the sources from the local clone

diff --git a/allennlp/commands/__init__.py b/allennlp/commands/__init__.py
@@ -2,6 +2,7 @@
 import logging
 import sys
 from typing import Any, Optional, Tuple, Set
+import warnings
 
 from allennlp import __version__
 from allennlp.commands.build_vocab import BuildVocab
@@ -18,10 +19,20 @@
 from allennlp.commands.count_instances import CountInstances
 from allennlp.common.plugins import import_plugins
 from allennlp.common.util import import_module_and_submodules
-from allennlp.commands.checklist import CheckList
 
 logger = logging.getLogger(__name__)
 
+try:
+    """
+    The `allennlp checklist` command requires installation of the optional dependency `checklist`.
+    It can be installed with `pip install allennlp[checklist]`.
+    """
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        from allennlp.commands.checklist import CheckList
+except ImportError:
+    pass
+
 
 class ArgumentParserWithDefaults(argparse.ArgumentParser):
     """

diff --git a/allennlp/commands/checklist.py b/allennlp/commands/checklist.py
@@ -8,13 +8,20 @@
 import argparse
 import sys
 import json
+import logging
 
 
 from allennlp.commands.subcommand import Subcommand
 from allennlp.common.checks import check_for_gpu, ConfigurationError
 from allennlp.models.archival import load_archive
 from allennlp.predictors.predictor import Predictor
-from allennlp.confidence_checks.task_checklists.task_suite import TaskSuite
+
+logger = logging.getLogger(__name__)
+
+try:
+    from allennlp.confidence_checks.task_checklists.task_suite import TaskSuite
+except ImportError:
+    raise
 
 
 @Subcommand.register("checklist")

diff --git a/allennlp/confidence_checks/task_checklists/__init__.py b/allennlp/confidence_checks/task_checklists/__init__.py
@@ -1,10 +1,19 @@
-from allennlp.confidence_checks.task_checklists.task_suite import TaskSuite
-from allennlp.confidence_checks.task_checklists.sentiment_analysis_suite import (
-    SentimentAnalysisSuite,
-)
-from allennlp.confidence_checks.task_checklists.question_answering_suite import (
-    QuestionAnsweringSuite,
-)
-from allennlp.confidence_checks.task_checklists.textual_entailment_suite import (
-    TextualEntailmentSuite,
-)
+import warnings
+
+try:
+    from allennlp.confidence_checks.task_checklists.task_suite import TaskSuite
+    from allennlp.confidence_checks.task_checklists.sentiment_analysis_suite import (
+        SentimentAnalysisSuite,
+    )
+    from allennlp.confidence_checks.task_checklists.question_answering_suite import (
+        QuestionAnsweringSuite,
+    )
+    from allennlp.confidence_checks.task_checklists.textual_entailment_suite import (
+        TextualEntailmentSuite,
+    )
+except ImportError:
+    warnings.warn(
+        'To use the checklist integration you should install ``allennlp`` with the "checklist" '
+        "extra (e.g. ``pip install allennlp[checklist]``) or just install checklist after the fact."
+    )
+    raise
diff --git a/allennlp/confidence_checks/task_checklists/task_suite.py b/allennlp/confidence_checks/task_checklists/task_suite.py
@@ -24,6 +24,11 @@ class TaskSuite(Registrable):
     [Beyond Accuracy: Behavioral Testing of NLP models with CheckList (Ribeiro et al)]
     (https://api.semanticscholar.org/CorpusID:218551201).
 
+    !!! Note
+        To use the checklist integration you should install ``allennlp`` with the
+        "checklist" extra (e.g. ``pip install allennlp[checklist]``) or just install
+        `checklist` after the fact.
+
     Task suites are intended to be used as a form of behavioral testing
     for NLP models to check for robustness across several general linguistic
     capabilities; eg. Vocabulary, SRL, Negation, etc.

diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,39 @@
+################################
+###### Core dependencies #######
+################################
+torch>=1.6.0,<1.11.0
+torchvision>=0.8.1,<0.12.0
+cached-path>=1.0.2,<2.0.0
+fairscale==0.4.5
+jsonnet>=0.10.0 ; sys.platform != 'win32'
+nltk<3.6.6
+spacy>=2.1.0,<3.3
+numpy
+tensorboardX>=1.2
+requests>=2.18
+tqdm>=4.62
+h5py
+scikit-learn
+scipy
+pytest
+transformers>=4.1,<4.17
+sentencepiece
+dataclasses;python_version<'3.7'
+filelock>=3.3,<3.5
+lmdb
+more-itertools
+termcolor==1.1.0
+wandb>=0.10.0,<0.13.0
+huggingface_hub>=0.0.16
+datasets>=1.2.1,<2.0
+dill
+base58
+sqlitedict
+
+##################################################
+###### Extra dependencies for integrations #######
+##################################################
+# NOTE: we use a special trailing comment on each line to denote which extras
+# each package is needed by. For example, checklist is needed by the 'checklist' extra
+# that you install with 'pip install allennlp[checklist]'.
+checklist==0.0.11  # needed by: checklist
diff --git a/scripts/check_torch_version.py b/scripts/check_torch_version.py
@@ -45,16 +45,16 @@ def _get_latest_torch_version() -> Tuple[str, str, str]:
 
 
 def _get_torch_version_upper_limit() -> Tuple[str, str, str]:
-    with open("setup.py") as f:
+    with open("requirements.txt") as f:
         for line in f:
             # The torch version line should look like:
             #   "torch>=X.Y.Z,<X.V.0",
-            if '"torch>=' in line:
-                version = tuple(line.split('"')[1].split("<")[1].strip().split("."))
+            if "torch>=" in line:
+                version = tuple(line.split("<")[1].strip().split("."))
                 assert len(version) == 3, f"Bad parsed version '{version}'"
                 break
         else:
-            raise RuntimeError("could not find torch version spec in setup.py")
+            raise RuntimeError("could not find torch version spec in requirements.txt")
     return cast(Tuple[str, str, str], version)