Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Run checklist suites in AllenNLP (#5065)
Browse files Browse the repository at this point in the history
* run checklist suites from command line

* specify output file

* separate task from checklist suite

* qa task

* adding describe, misc updates

* fix docs, TE suite

* update changelog

* bug fix

* adding default tests

* qa defaults

* typing, docs, minor updates

* more updates

* set add_default_tests to True

* remove commented lines

* capitalizing help strings

* does this work

* adding start_method to test

* skipping test

* oops, actually fix

* temp fix to check memory issues

* Skip more memory hungry tests

* fix

* fixing professions

* Update setup.py

Co-authored-by: Pete <petew@allenai.org>

* Update CHANGELOG.md

Co-authored-by: Pete <petew@allenai.org>

* Update allennlp/sanity_checks/task_checklists/task_suite.py

Co-authored-by: Pete <petew@allenai.org>

* formatting functions

Co-authored-by: Evan Pete Walsh <petew@allenai.org>
  • Loading branch information
2 people authored and dirkgr committed May 10, 2021
1 parent 7c06b49 commit 402bc78
Show file tree
Hide file tree
Showing 21 changed files with 2,345 additions and 3 deletions.
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased

### Added

- Added `TaskSuite` base class and command line functionality for running [`checklist`](/~https://github.com/marcotcr/checklist) test suites, along with implementations for `SentimentAnalysisSuite`, `QuestionAnsweringSuite`, and `TextualEntailmentSuite`. These can be found in the `allennlp.sanity_checks.task_checklists` module.


## [v2.4.0](/~https://github.com/allenai/allennlp/releases/tag/v2.4.0) - 2021-04-22

Expand Down Expand Up @@ -40,7 +46,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Fixed a bug with the `ShardedDatasetReader` when used with multi-process data loading (/~https://github.com/allenai/allennlp/issues/5132).


## [v2.3.0](/~https://github.com/allenai/allennlp/releases/tag/v2.3.0) - 2021-04-14

### Added
Expand Down Expand Up @@ -103,6 +108,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `Model.get_parameters_for_histogram_tensorboard_logging` is deprecated in favor of
`Model.get_parameters_for_histogram_logging`.


### Fixed

- Makes sure tensors that are stored in `TensorCache` always live on CPUs
Expand Down
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ install :
# See /~https://github.com/pypa/pip/issues/4537.
python setup.py install_egg_info
pip install --upgrade --upgrade-strategy eager -e . -r dev-requirements.txt

# Docs are not built on docker, and the runner is unable to find
# the nltk_data folder. Hence, we download the requirement.
python -c 'import nltk; nltk.download("sentiwordnet")'
#
# Documention helpers.
#
Expand Down
1 change: 1 addition & 0 deletions allennlp/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from allennlp.commands.count_instances import CountInstances
from allennlp.common.plugins import import_plugins
from allennlp.common.util import import_module_and_submodules
from allennlp.commands.checklist import CheckList

logger = logging.getLogger(__name__)

Expand Down
199 changes: 199 additions & 0 deletions allennlp/commands/checklist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
"""
The `checklist` subcommand allows you to sanity check your
model's predictions using a trained model and its
[`Predictor`](../predictors/predictor.md#predictor) wrapper.
"""

from typing import Optional, Dict, Any, List
import argparse
import sys
import json

from overrides import overrides

from allennlp.commands.subcommand import Subcommand
from allennlp.common.checks import check_for_gpu, ConfigurationError
from allennlp.models.archival import load_archive
from allennlp.predictors.predictor import Predictor
from allennlp.sanity_checks.task_checklists.task_suite import TaskSuite


@Subcommand.register("checklist")
class CheckList(Subcommand):
@overrides
def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.ArgumentParser:

description = """Run the specified model through a checklist suite."""
subparser = parser.add_parser(
self.name,
description=description,
help="Run a trained model through a checklist suite.",
)

subparser.add_argument(
"archive_file", type=str, help="The archived model to make predictions with"
)

subparser.add_argument("task", type=str, help="The name of the task suite")

subparser.add_argument("--checklist-suite", type=str, help="The checklist suite path")

subparser.add_argument(
"--capabilities",
nargs="+",
default=[],
help=('An optional list of strings of capabilities. Eg. "[Vocabulary, Robustness]"'),
)

subparser.add_argument(
"--max-examples",
type=int,
default=None,
help="Maximum number of examples to check per test.",
)

subparser.add_argument(
"--task-suite-args",
type=str,
default="",
help=(
"An optional JSON structure used to provide additional parameters to the task suite"
),
)

subparser.add_argument(
"--print-summary-args",
type=str,
default="",
help=(
"An optional JSON structure used to provide additional "
"parameters for printing test summary"
),
)

subparser.add_argument("--output-file", type=str, help="Path to output file")

subparser.add_argument(
"--cuda-device", type=int, default=-1, help="ID of GPU to use (if any)"
)

subparser.add_argument(
"--predictor", type=str, help="Optionally specify a specific predictor to use"
)

subparser.add_argument(
"--predictor-args",
type=str,
default="",
help=(
"An optional JSON structure used to provide additional parameters to the predictor"
),
)

subparser.set_defaults(func=_run_suite)

return subparser


def _get_predictor(args: argparse.Namespace) -> Predictor:
check_for_gpu(args.cuda_device)
archive = load_archive(
args.archive_file,
cuda_device=args.cuda_device,
)

predictor_args = args.predictor_args.strip()
if len(predictor_args) <= 0:
predictor_args = {}
else:
predictor_args = json.loads(predictor_args)

return Predictor.from_archive(
archive,
args.predictor,
extra_args=predictor_args,
)


def _get_task_suite(args: argparse.Namespace) -> TaskSuite:
available_tasks = TaskSuite.list_available()
if args.task in available_tasks:
suite_name = args.task
else:
raise ConfigurationError(
f"'{args.task}' is not a recognized task suite. "
f"Available tasks are: {available_tasks}."
)

file_path = args.checklist_suite

task_suite_args = args.task_suite_args.strip()
if len(task_suite_args) <= 0:
task_suite_args = {}
else:
task_suite_args = json.loads(task_suite_args)

return TaskSuite.constructor(
name=suite_name,
suite_file=file_path,
extra_args=task_suite_args,
)


class _CheckListManager:
def __init__(
self,
task_suite: TaskSuite,
predictor: Predictor,
capabilities: Optional[List[str]] = None,
max_examples: Optional[int] = None,
output_file: Optional[str] = None,
print_summary_args: Optional[Dict[str, Any]] = None,
) -> None:
self._task_suite = task_suite
self._predictor = predictor
self._capabilities = capabilities
self._max_examples = max_examples
self._output_file = None if output_file is None else open(output_file, "w")
self._print_summary_args = print_summary_args or {}

if capabilities:
self._print_summary_args["capabilities"] = capabilities

def run(self) -> None:
self._task_suite.run(
self._predictor, capabilities=self._capabilities, max_examples=self._max_examples
)

# We pass in an IO object.
output_file = self._output_file or sys.stdout
self._task_suite.summary(file=output_file, **self._print_summary_args)

# If `_output_file` was None, there would be nothing to close.
if self._output_file is not None:
self._output_file.close()


def _run_suite(args: argparse.Namespace) -> None:

task_suite = _get_task_suite(args)
predictor = _get_predictor(args)

print_summary_args = args.print_summary_args.strip()
if len(print_summary_args) <= 0:
print_summary_args = {}
else:
print_summary_args = json.loads(print_summary_args)

capabilities = args.capabilities
max_examples = args.max_examples

manager = _CheckListManager(
task_suite,
predictor,
capabilities,
max_examples,
args.output_file,
print_summary_args,
)
manager.run()
35 changes: 35 additions & 0 deletions allennlp/common/testing/checklist_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from typing import Optional
from checklist.test_suite import TestSuite
from checklist.test_types import MFT as MinimumFunctionalityTest
from allennlp.sanity_checks.task_checklists.task_suite import TaskSuite


@TaskSuite.register("fake-task-suite")
class FakeTaskSuite(TaskSuite):
"""
Fake checklist suite for testing purpose.
"""

def __init__(
self,
suite: Optional[TestSuite] = None,
fake_arg1: Optional[int] = None,
fake_arg2: Optional[int] = None,
):
self._fake_arg1 = fake_arg1
self._fake_arg2 = fake_arg2

if not suite:
suite = TestSuite()

# Adding a simple checklist test.
test = MinimumFunctionalityTest(
["sentence 1", "sentence 2"],
labels=0,
name="fake test 1",
capability="fake capability",
description="Test's description",
)
suite.add(test)

super().__init__(suite)
10 changes: 10 additions & 0 deletions allennlp/sanity_checks/task_checklists/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from allennlp.sanity_checks.task_checklists.task_suite import TaskSuite
from allennlp.sanity_checks.task_checklists.sentiment_analysis_suite import (
SentimentAnalysisSuite,
)
from allennlp.sanity_checks.task_checklists.question_answering_suite import (
QuestionAnsweringSuite,
)
from allennlp.sanity_checks.task_checklists.textual_entailment_suite import (
TextualEntailmentSuite,
)
Loading

0 comments on commit 402bc78

Please sign in to comment.