From 0c42cb979d70bff63babf0811af04550b5bc41fb Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Thu, 30 Jan 2020 12:17:12 -0800 Subject: [PATCH 01/52] example for feedback --- torch_datasets.py | 194 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 torch_datasets.py diff --git a/torch_datasets.py b/torch_datasets.py new file mode 100644 index 00000000000..55c8c766e06 --- /dev/null +++ b/torch_datasets.py @@ -0,0 +1,194 @@ +from typing import Dict, List, cast, Tuple +import json +import logging +from overrides import overrides + +from torch.utils.data import Dataset as TorchDataset +from torch.utils.data import DataLoader +from torch.utils.data import Sampler, BatchSampler, SequentialSampler, SubsetRandomSampler + +from allennlp.common.registrable import Registrable +from allennlp.common.util import add_noise_to_dict_values, lazy_groups_of +from allennlp.data.batch import Batch as AllennlpBatch +from allennlp.data.instance import Instance +from allennlp.data.vocabulary import Vocabulary +from allennlp.data import Token +from allennlp.common.file_utils import cached_path +from allennlp.data.fields import Field, TextField, LabelField, MetadataField +from allennlp.data.token_indexers import SingleIdTokenIndexer, TokenIndexer + +logger = logging.getLogger(__name__) + +class Dataset(TorchDataset, Registrable): + def __init__(self): + self.vocab: Vocabulary = None + + def text_to_instance(self, *inputs) -> Instance: + + raise NotImplementedError + + def __getitem__(self) -> Instance: + + raise NotImplementedError + + def index_with(self, vocab: Vocabulary): + self.vocab = vocab + + +""" +Here we have two SNLI readers in both of the different styles. +They are only slightly different. +""" + + +class SnliDataset(Dataset): + def __init__( + self, file_path: str, token_indexers: Dict[str, TokenIndexer] = None, lazy: bool = False + ) -> None: + + super().__init__() + + self._tokenizer = lambda x: [Token(t) for t in x.split(" ")] + self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()} + + file_path = cached_path(file_path) + self.examples = [] + + for line in open(file_path, "r"): + example = json.loads(line) + if example["gold_label"] == "-": + # These were cases where the annotators disagreed; we'll just skip them. It's + # like 800 out of 500k examples in the training data. + continue + self.examples.append(example) + + def __len__(self): + return len(self.examples) + + def __getitem__(self, idx) -> Instance: + example = self.examples[idx] + instance = self.text_to_instance( + example["sentence1"], example["sentence2"], example["gold_label"] + ) + + # This is not ideal, we don't want a user to have to worry about this + # but at the same time, it's expensive and it would be nice if it could happen here. + # It's possible we could have this in the super class. + if self.vocab is not None: + instance.index_fields(self.vocab) + return instance + + @overrides + def text_to_instance(self, premise: str, hypothesis: str, label: str = None) -> Instance: + + fields: Dict[str, Field] = {} + premise_tokens = self._tokenizer(premise) + hypothesis_tokens = self._tokenizer(hypothesis) + fields["premise"] = TextField(premise_tokens, self._token_indexers) + fields["hypothesis"] = TextField(hypothesis_tokens, self._token_indexers) + if label: + fields["label"] = LabelField(label) + + metadata = { + "premise_tokens": [x.text for x in premise_tokens], + "hypothesis_tokens": [x.text for x in hypothesis_tokens], + } + fields["metadata"] = MetadataField(metadata) + return Instance(fields) + + +class BatchInstanceSampler(BatchSampler): + + def __init__(self, data, batch_size: int, sorting_keys: List[Tuple[str, str]] = None, padding_noise: float = 0.1): + + self.vocab = data.vocab + self._sorting_keys = sorting_keys + self._padding_noise = padding_noise + self._batch_size = batch_size + self.data = data + + def _argsort_by_padding(self, instances: List[Instance]) -> List[int]: + """ + Sorts the instances by their padding lengths, using the keys in + `sorting_keys` (in the order in which they are provided). `sorting_keys` is a list of + `(field_name, padding_key)` tuples. + """ + if not self._sorting_keys: + logger.info("No sorting keys given; trying to guess a good one") + self._guess_sorting_keys(instances) + logger.info(f"Using {self._sorting_keys} as the sorting keys") + instances_with_lengths = [] + for instance in instances: + # Make sure instance is indexed before calling .get_padding + instance.index_fields(self.vocab) + padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) + if self._padding_noise > 0.0: + noisy_lengths = {} + for field_name, field_lengths in padding_lengths.items(): + noisy_lengths[field_name] = add_noise_to_dict_values( + field_lengths, self._padding_noise + ) + padding_lengths = noisy_lengths + instance_with_lengths = ( + [ + padding_lengths[field_name][padding_key] + for (field_name, padding_key) in self._sorting_keys + ], + instance, + ) + instances_with_lengths.append(instance_with_lengths) + with_indices = [(x, i) for i, x in enumerate(instances_with_lengths)] + with_indices.sort(key=lambda x: x[0][0]) + return [instance_with_index[-1] for instance_with_index in with_indices] + + def __iter__(self): + + indices = self._argsort_by_padding(self.data) + + for group in lazy_groups_of(indices, self._batch_size): + + yield list(group) + + def _guess_sorting_keys(self, instances: List[Instance]) -> None: + max_length = 0.0 + longest_padding_key: Tuple[str, str] = None + for instance in instances: + instance.index_fields(self.vocab) + padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) + for field_name, field_padding in padding_lengths.items(): + for padding_key, length in field_padding.items(): + if length > max_length: + max_length = length + longest_padding_key = (field_name, padding_key) + if not longest_padding_key: + # This shouldn't ever happen (you basically have to have an empty instance list), but + # just in case... + raise AssertionError( + "Found no field that needed padding; we are surprised you got this error, please " + "open an issue on github" + ) + self._sorting_keys = [longest_padding_key] + + +data = SnliDataset("snli_20.jsonl") +vocab = Vocabulary.from_instances(data) +data.index_with(vocab) + + +sampler = SequentialSampler(data) +batch_sampler = BatchInstanceSampler(data, 4) + + +def allennlp_collocate(batch): + + batch = AllennlpBatch(batch) + return batch.as_tensor_dict(batch.get_padding_lengths()) + +batch_generator = DataLoader(data, batch_sampler=batch_sampler, collate_fn=allennlp_collocate) + +iterator = iter(batch_generator) + +print() +for i, x in enumerate(batch_generator): + + print(x) From 80049f8af32375aed6b8e294c7b4ddcfdf3d9f3d Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Wed, 19 Feb 2020 09:42:28 -0800 Subject: [PATCH 02/52] remove all existing multiprocessing --- allennlp/data/dataset_readers/__init__.py | 1 - .../multiprocess_dataset_reader.py | 238 ----------------- allennlp/data/iterators/__init__.py | 1 - .../data/iterators/multiprocess_iterator.py | 246 ------------------ .../language_model_token_embedder.py | 2 - .../multiprocess_dataset_reader_test.py | 201 -------------- .../iterators/multiprocess_iterator_test.py | 66 ----- .../experiment_multiprocessing_reader.jsonnet | 30 --- allennlp/tests/training/trainer_test.py | 15 +- 9 files changed, 1 insertion(+), 799 deletions(-) delete mode 100644 allennlp/data/dataset_readers/multiprocess_dataset_reader.py delete mode 100644 allennlp/data/iterators/multiprocess_iterator.py delete mode 100644 allennlp/tests/data/dataset_readers/multiprocess_dataset_reader_test.py delete mode 100644 allennlp/tests/data/iterators/multiprocess_iterator_test.py delete mode 100644 allennlp/tests/fixtures/language_model/experiment_multiprocessing_reader.jsonnet diff --git a/allennlp/data/dataset_readers/__init__.py b/allennlp/data/dataset_readers/__init__.py index 23d1346e43b..229be33c1df 100644 --- a/allennlp/data/dataset_readers/__init__.py +++ b/allennlp/data/dataset_readers/__init__.py @@ -17,7 +17,6 @@ from allennlp.data.dataset_readers.interleaving_dataset_reader import InterleavingDatasetReader from allennlp.data.dataset_readers.masked_language_modeling import MaskedLanguageModelingReader from allennlp.data.dataset_readers.next_token_lm import NextTokenLmReader -from allennlp.data.dataset_readers.multiprocess_dataset_reader import MultiprocessDatasetReader from allennlp.data.dataset_readers.penn_tree_bank import PennTreeBankConstituencySpanDatasetReader from allennlp.data.dataset_readers.semantic_role_labeling import SrlReader from allennlp.data.dataset_readers.semantic_dependency_parsing import ( diff --git a/allennlp/data/dataset_readers/multiprocess_dataset_reader.py b/allennlp/data/dataset_readers/multiprocess_dataset_reader.py deleted file mode 100644 index 88d0ff53771..00000000000 --- a/allennlp/data/dataset_readers/multiprocess_dataset_reader.py +++ /dev/null @@ -1,238 +0,0 @@ -import glob -import logging -import os -from queue import Empty -from typing import List, Iterable, Iterator, Optional - -import numpy as np -from torch.multiprocessing import Process, Queue, Value, log_to_stderr - -from allennlp.data.dataset_readers.dataset_reader import DatasetReader -from allennlp.data.instance import Instance - - -class logger: - """ - multiprocessing.log_to_stderr causes some output in the logs - even when we don't use this dataset reader. This is a small hack - to instantiate the stderr logger lazily only when it's needed - (which is only when using the MultiprocessDatasetReader) - """ - - _logger = None - - @classmethod - def info(cls, message: str) -> None: - - if cls._logger is None: - cls._logger = log_to_stderr() - cls._logger.setLevel(logging.INFO) - - cls._logger.info(message) - - -def _worker( - reader: DatasetReader, - input_queue: Queue, - output_queue: Queue, - num_active_workers: Value, - num_inflight_items: Value, - worker_id: int, -) -> None: - """ - A worker that pulls filenames off the input queue, uses the dataset reader - to read them, and places the generated instances on the output queue. When - there are no filenames left on the input queue, it decrements - num_active_workers to signal completion. - """ - logger.info(f"Reader worker: {worker_id} PID: {os.getpid()}") - # Keep going until you get a file_path that's None. - while True: - file_path = input_queue.get() - if file_path is None: - # It's important that we close and join the queue here before - # decrementing num_active_workers. Otherwise our parent may join us - # before the queue's feeder thread has passed all buffered items to - # the underlying pipe resulting in a deadlock. - # - # See: - # https://docs.python.org/3.6/library/multiprocessing.html?highlight=process#pipes-and-queues - # https://docs.python.org/3.6/library/multiprocessing.html?highlight=process#programming-guidelines - output_queue.close() - output_queue.join_thread() - # Decrementing is not atomic. - # See https://docs.python.org/2/library/multiprocessing.html#multiprocessing.Value. - with num_active_workers.get_lock(): - num_active_workers.value -= 1 - logger.info(f"Reader worker {worker_id} finished") - break - - logger.info(f"reading instances from {file_path}") - for instance in reader.read(file_path): - with num_inflight_items.get_lock(): - num_inflight_items.value += 1 - output_queue.put(instance) - - -class QIterable(Iterable[Instance]): - """ - You can't set attributes on Iterators, so this is just a dumb wrapper - that exposes the output_queue. - """ - - def __init__(self, output_queue_size, epochs_per_read, num_workers, reader, file_path) -> None: - self.output_queue = Queue(output_queue_size) - self.epochs_per_read = epochs_per_read - self.num_workers = num_workers - self.reader = reader - self.file_path = file_path - - # Initialized in start. - self.input_queue: Optional[Queue] = None - self.processes: List[Process] = [] - # The num_active_workers and num_inflight_items counts in conjunction - # determine whether there could be any outstanding instances. - self.num_active_workers: Optional[Value] = None - self.num_inflight_items: Optional[Value] = None - - def __iter__(self) -> Iterator[Instance]: - self.start() - - # Keep going as long as not all the workers have finished or there are items in flight. - while self.num_active_workers.value > 0 or self.num_inflight_items.value > 0: - # Inner loop to minimize locking on self.num_active_workers. - while True: - try: - # Non-blocking to handle the empty-queue case. - yield self.output_queue.get(block=False, timeout=1.0) - with self.num_inflight_items.get_lock(): - self.num_inflight_items.value -= 1 - except Empty: - # The queue could be empty because the workers are - # all finished or because they're busy processing. - # The outer loop distinguishes between these two - # cases. - break - - self.join() - - def start(self) -> None: - shards = glob.glob(self.file_path) - # Ensure a consistent order before shuffling for testing. - shards.sort() - num_shards = len(shards) - - # If we want multiple epochs per read, put shards in the queue multiple times. - self.input_queue = Queue(num_shards * self.epochs_per_read + self.num_workers) - for _ in range(self.epochs_per_read): - np.random.shuffle(shards) - for shard in shards: - self.input_queue.put(shard) - - # Then put a None per worker to signify no more files. - for _ in range(self.num_workers): - self.input_queue.put(None) - - assert ( - not self.processes - ), "Process list non-empty! You must call QIterable.join() before restarting." - self.num_active_workers = Value("i", self.num_workers) - self.num_inflight_items = Value("i", 0) - for worker_id in range(self.num_workers): - process = Process( - target=_worker, - args=( - self.reader, - self.input_queue, - self.output_queue, - self.num_active_workers, - self.num_inflight_items, - worker_id, - ), - ) - logger.info(f"starting worker {worker_id}") - process.start() - self.processes.append(process) - - def join(self) -> None: - for process in self.processes: - process.join() - self.processes.clear() - - def __del__(self) -> None: - """ - Terminate processes if the user hasn't joined. This is necessary as - leaving stray processes running can corrupt shared state. In brief, - we've observed shared memory counters being reused (when the memory was - free from the perspective of the parent process) while the stray - workers still held a reference to them. - - For a discussion of using destructors in Python in this manner, see - https://eli.thegreenplace.net/2009/06/12/safely-using-destructors-in-python/. - """ - for process in self.processes: - process.terminate() - - -@DatasetReader.register("multiprocess") -class MultiprocessDatasetReader(DatasetReader): - """ - Wraps another dataset reader and uses it to read from multiple input files using multiple - processes. Note that in this case the `file_path` passed to `read()` should be a glob, and - that the dataset reader will return instances from all files matching the glob. The instances - will always be read lazily. - - The order the files are processed in is a function of Numpy's random state up to non-determinism - caused by using multiple worker processes. This can be avoided by setting `num_workers` to 1. - - # Parameters - - base_reader : `DatasetReader` - Each process will use this dataset reader to read zero or more files. - num_workers : `int` - How many data-reading processes to run simultaneously. - epochs_per_read : `int`, (optional, default=1) - Normally a call to `DatasetReader.read()` returns a single epoch worth of instances, and - your `DataIterator` handles iteration over multiple epochs. However, in the - multiple-process case, it's possible that you'd want finished workers to continue on to the - next epoch even while others are still finishing the previous epoch. Passing in a value - larger than 1 allows that to happen. - output_queue_size : `int`, (optional, default=1000) - The size of the queue on which read instances are placed to be yielded. - You might need to increase this if you're generating instances too quickly. - """ - - def __init__( - self, - base_reader: DatasetReader, - num_workers: int, - epochs_per_read: int = 1, - output_queue_size: int = 1000, - **kwargs, - ) -> None: - # Multiprocess reader is intrinsically lazy. - kwargs["lazy"] = True - super().__init__(**kwargs) - - self.reader = base_reader - self.num_workers = num_workers - self.epochs_per_read = epochs_per_read - self.output_queue_size = output_queue_size - - def text_to_instance(self, *args, **kwargs) -> Instance: - """ - Just delegate to the base reader text_to_instance. - """ - return self.reader.text_to_instance(*args, **kwargs) # type: ignore - - def _read(self, file_path: str) -> Iterable[Instance]: - raise RuntimeError("Multiprocess reader implements read() directly.") - - def read(self, file_path: str) -> Iterable[Instance]: - return QIterable( - output_queue_size=self.output_queue_size, - epochs_per_read=self.epochs_per_read, - num_workers=self.num_workers, - reader=self.reader, - file_path=file_path, - ) diff --git a/allennlp/data/iterators/__init__.py b/allennlp/data/iterators/__init__.py index 08094270cfb..1e2c33bcf0c 100644 --- a/allennlp/data/iterators/__init__.py +++ b/allennlp/data/iterators/__init__.py @@ -7,6 +7,5 @@ from allennlp.data.iterators.basic_iterator import BasicIterator from allennlp.data.iterators.bucket_iterator import BucketIterator from allennlp.data.iterators.homogeneous_batch_iterator import HomogeneousBatchIterator -from allennlp.data.iterators.multiprocess_iterator import MultiprocessIterator from allennlp.data.iterators.pass_through_iterator import PassThroughIterator from allennlp.data.iterators.same_language_iterator import SameLanguageIterator diff --git a/allennlp/data/iterators/multiprocess_iterator.py b/allennlp/data/iterators/multiprocess_iterator.py deleted file mode 100644 index 6e6f9f4e2fa..00000000000 --- a/allennlp/data/iterators/multiprocess_iterator.py +++ /dev/null @@ -1,246 +0,0 @@ -import logging -import os -from queue import Empty -from typing import Iterable, Iterator, List, Optional - -from torch.multiprocessing import JoinableQueue, Process, Queue, get_logger - -from allennlp.common.checks import ConfigurationError -from allennlp.data.batch import Batch -from allennlp.data.dataset_readers.multiprocess_dataset_reader import QIterable -from allennlp.data.instance import Instance -from allennlp.data.iterators.data_iterator import DataIterator, TensorDict -from allennlp.data.vocabulary import Vocabulary - -logger = get_logger() -logger.setLevel(logging.INFO) - - -def _create_tensor_dicts_from_queue( - input_queue: Queue, output_queue: Queue, iterator: DataIterator, shuffle: bool, index: int -) -> None: - """ - Pulls instances from `input_queue`, converts them into `TensorDict`s - using `iterator`, and puts them on the `output_queue`. - """ - logger.info(f"Iterator worker: {index} PID: {os.getpid()}") - - def instances() -> Iterator[Instance]: - instance = input_queue.get() - while instance is not None: - yield instance - instance = input_queue.get() - - for tensor_dict in iterator(instances(), num_epochs=1, shuffle=shuffle): - output_queue.put(tensor_dict) - - output_queue.put(index) - - # We need to ensure we've gotten all the tensors out of this queue before - # this process ends. Otherwise we'll crash. See - # /~https://github.com/pytorch/pytorch/issues/7181. This appears to be an - # issue specifically with tensors, perhaps due to the refcounting involved - # in managing them in shared memory. If you're working on this code, be - # aware that I've only been able to reproduce this issue on Linux. Testing - # on a Mac alone is not sufficient. - output_queue.join() - - -def _create_tensor_dicts_from_qiterable( - qiterable: QIterable, output_queue: Queue, iterator: DataIterator, shuffle: bool, index: int -) -> None: - """ - Pulls instances from `qiterable.output_queue`, converts them into - `TensorDict`s using `iterator`, and puts them on the `output_queue`. - """ - logger.info(f"Iterator worker: {index} PID: {os.getpid()}") - - def instances() -> Iterator[Instance]: - while qiterable.num_active_workers.value > 0 or qiterable.num_inflight_items.value > 0: - while True: - try: - yield qiterable.output_queue.get(block=False, timeout=1.0) - with qiterable.num_inflight_items.get_lock(): - qiterable.num_inflight_items.value -= 1 - except Empty: - break - - for tensor_dict in iterator(instances(), num_epochs=1, shuffle=shuffle): - output_queue.put(tensor_dict) - - output_queue.put(index) - - # See the note above in _create_tensor_dicts_from_queue. - output_queue.join() - - -def _queuer( - instances: Iterable[Instance], input_queue: Queue, num_workers: int, num_epochs: Optional[int] -) -> None: - """ - Reads Instances from the iterable and puts them in the input_queue. - """ - logger.info(f"Iterator queuer. PID: {os.getpid()}") - epoch = 0 - - while num_epochs is None or epoch < num_epochs: - epoch += 1 - for instance in instances: - input_queue.put(instance) - - # Now put a None for each worker, since each needs to receive one - # to know that it's done. - for _ in range(num_workers): - input_queue.put(None) - - -@DataIterator.register("multiprocess") -class MultiprocessIterator(DataIterator): - """ - Wraps another `DataIterator` and uses it to generate tensor dicts - using multiple processes. - - # Parameters - - base_iterator : `DataIterator` - The `DataIterator` for generating tensor dicts. It will be shared among - processes, so it should not be stateful in any way. - num_workers : `int`, optional (default = 1) - The number of processes used for generating tensor dicts. - output_queue_size : `int`, optional (default = 1000) - The size of the output queue on which tensor dicts are placed to be consumed. - You might need to increase this if you're generating tensor dicts too quickly. - """ - - def __init__( - self, base_iterator: DataIterator, num_workers: int = 1, output_queue_size: int = 1000 - ) -> None: - - super().__init__() - self.num_workers = num_workers - self.batch_size = base_iterator._batch_size - self.output_queue_size = output_queue_size - - # These two options make the iterator stateful, which means it can't be shared - # across multiple processes. - if base_iterator._cache_instances: - raise ConfigurationError("cannot use Multiprocess iterator with cache_instances") - if base_iterator._instances_per_epoch: - raise ConfigurationError("cannot use instances_per_epoch with Multiprocess iterator") - - self.iterator = base_iterator - - self.processes: List[Process] = [] - self.queuer: Optional[Process] = None - - def _create_batches(self, instances: Iterable[Instance], shuffle: bool) -> Iterable[Batch]: - raise RuntimeError("MultiprocessIterator doesn't use create_batches") - - def index_with(self, vocab: Vocabulary): - self.iterator.index_with(vocab) - - def _call_with_instances( - self, instances: Iterable[Instance], num_epochs: int, shuffle: bool - ) -> Iterator[TensorDict]: - # JoinableQueue needed here as sharing tensors across processes - # requires that the creating process not exit prematurely. - output_queue = JoinableQueue(self.output_queue_size) - input_queue = Queue(self.output_queue_size * self.batch_size) - - # Start process that populates the queue. - self.queuer = Process( - target=_queuer, args=(instances, input_queue, self.num_workers, num_epochs) - ) - self.queuer.start() - - # Start the tensor-dict workers. - for i in range(self.num_workers): - args = (input_queue, output_queue, self.iterator, shuffle, i) - process = Process(target=_create_tensor_dicts_from_queue, args=args) - process.start() - self.processes.append(process) - - num_finished = 0 - while num_finished < self.num_workers: - item = output_queue.get() - output_queue.task_done() - if isinstance(item, int): - num_finished += 1 - logger.info(f"worker {item} finished ({num_finished} / {self.num_workers})") - else: - yield item - - for process in self.processes: - process.join() - self.processes.clear() - - if self.queuer is not None: - self.queuer.join() - self.queuer = None - - def _call_with_qiterable( - self, qiterable: QIterable, num_epochs: int, shuffle: bool - ) -> Iterator[TensorDict]: - # JoinableQueue needed here as sharing tensors across processes - # requires that the creating tensor not exit prematurely. - output_queue = JoinableQueue(self.output_queue_size) - - for _ in range(num_epochs): - qiterable.start() - - # Start the tensor-dict workers. - for i in range(self.num_workers): - args = (qiterable, output_queue, self.iterator, shuffle, i) - process = Process(target=_create_tensor_dicts_from_qiterable, args=args) - process.start() - self.processes.append(process) - - num_finished = 0 - while num_finished < self.num_workers: - item = output_queue.get() - output_queue.task_done() - if isinstance(item, int): - num_finished += 1 - logger.info(f"worker {item} finished ({num_finished} / {self.num_workers})") - else: - yield item - - for process in self.processes: - process.join() - self.processes.clear() - - qiterable.join() - - def __call__( - self, instances: Iterable[Instance], num_epochs: int = None, shuffle: bool = True - ) -> Iterator[TensorDict]: - - # If you run it forever, the multiprocesses won't shut down correctly. - # TODO(joelgrus) find a solution for this - if num_epochs is None: - raise ConfigurationError( - "Multiprocess Iterator must be run for a fixed number of epochs" - ) - - if isinstance(instances, QIterable): - return self._call_with_qiterable(instances, num_epochs, shuffle) - else: - return self._call_with_instances(instances, num_epochs, shuffle) - - def __del__(self) -> None: - """ - Terminate processes if the user hasn't joined implicitly by consuming - all the tensors. This is necessary as leaving stray processes running - can corrupt shared state. In brief, we've observed shared memory - counters being reused (when the memory was free from the perspective of - the parent process) while the stray workers still held a reference to - them. - - For a discussion of using destructors in Python in this manner, see - https://eli.thegreenplace.net/2009/06/12/safely-using-destructors-in-python/. - """ - for process in self.processes: - process.terminate() - - if self.queuer is not None: - self.queuer.terminate() diff --git a/allennlp/modules/token_embedders/language_model_token_embedder.py b/allennlp/modules/token_embedders/language_model_token_embedder.py index bf08854125a..989d761afb3 100644 --- a/allennlp/modules/token_embedders/language_model_token_embedder.py +++ b/allennlp/modules/token_embedders/language_model_token_embedder.py @@ -96,8 +96,6 @@ def __init__( # /~https://github.com/allenai/allennlp/blob/eff25a3085aa9976a7650d30d8961c3626ddc411/allennlp/modules/elmo.py#L590 if bos_eos_tokens: dataset_reader_config = config.get("dataset_reader") - if dataset_reader_config.get("type") == "multiprocess": - dataset_reader_config = dataset_reader_config.get("base_reader") token_indexer_config = dataset_reader_config.get("token_indexers").get(self._token_name) token_indexer: TokenIndexer = TokenIndexer.from_params(token_indexer_config) token_list = [Token(token) for token in bos_eos_tokens] diff --git a/allennlp/tests/data/dataset_readers/multiprocess_dataset_reader_test.py b/allennlp/tests/data/dataset_readers/multiprocess_dataset_reader_test.py deleted file mode 100644 index 976da3ae170..00000000000 --- a/allennlp/tests/data/dataset_readers/multiprocess_dataset_reader_test.py +++ /dev/null @@ -1,201 +0,0 @@ -import sys -from collections import Counter -from multiprocessing import Queue, Process -from queue import Empty -from typing import Tuple - -import numpy as np -import pytest - -from allennlp.common.testing import AllenNlpTestCase -from allennlp.data.dataset_readers import MultiprocessDatasetReader, SequenceTaggingDatasetReader -from allennlp.data.dataset_readers.multiprocess_dataset_reader import QIterable -from allennlp.data.instance import Instance -from allennlp.data.iterators import BasicIterator -from allennlp.data.vocabulary import Vocabulary - - -def fingerprint(instance: Instance) -> Tuple[str, ...]: - """ - Get a hashable representation of a sequence tagging instance - that can be put in a Counter. - """ - text_tuple = tuple(t.text for t in instance.fields["tokens"].tokens) # type: ignore - labels_tuple = tuple(instance.fields["tags"].labels) # type: ignore - return text_tuple + labels_tuple - - -@pytest.mark.skipif( - sys.platform == "darwin" and sys.version_info > (3, 6), - reason="This test causes internal Python errors on the Mac since version 3.7", -) -class TestMultiprocessDatasetReader(AllenNlpTestCase): - def setUp(self) -> None: - super().setUp() - - # use SequenceTaggingDatasetReader as the base reader - self.base_reader = SequenceTaggingDatasetReader(lazy=True) - base_file_path = AllenNlpTestCase.FIXTURES_ROOT / "data" / "sequence_tagging.tsv" - - # Make 100 copies of the data - raw_data = open(base_file_path).read() - for i in range(100): - file_path = self.TEST_DIR / f"identical_{i}.tsv" - with open(file_path, "w") as f: - f.write(raw_data) - - self.all_distinct_path = str(self.TEST_DIR / "all_distinct.tsv") - with open(self.all_distinct_path, "w") as all_distinct: - for i in range(100): - file_path = self.TEST_DIR / f"distinct_{i}.tsv" - line = f"This###DT\tis###VBZ\tsentence###NN\t{i}###CD\t.###.\n" - with open(file_path, "w") as f: - f.write(line) - all_distinct.write(line) - - self.identical_files_glob = str(self.TEST_DIR / "identical_*.tsv") - self.distinct_files_glob = str(self.TEST_DIR / "distinct_*.tsv") - - # For some of the tests we need a vocab, we'll just use the base_reader for that. - self.vocab = Vocabulary.from_instances(self.base_reader.read(str(base_file_path))) - - def test_multiprocess_read(self): - reader = MultiprocessDatasetReader(base_reader=self.base_reader, num_workers=4) - - all_instances = [] - - for instance in reader.read(self.identical_files_glob): - all_instances.append(instance) - - # 100 files * 4 sentences / file - assert len(all_instances) == 100 * 4 - - counts = Counter(fingerprint(instance) for instance in all_instances) - - # should have the exact same data 100 times - assert len(counts) == 4 - assert counts[("cats", "are", "animals", ".", "N", "V", "N", "N")] == 100 - assert counts[("dogs", "are", "animals", ".", "N", "V", "N", "N")] == 100 - assert counts[("snakes", "are", "animals", ".", "N", "V", "N", "N")] == 100 - assert counts[("birds", "are", "animals", ".", "N", "V", "N", "N")] == 100 - - def test_multiprocess_read_partial_does_not_hang(self): - # Use a small queue size such that the processes generating the data will block. - reader = MultiprocessDatasetReader( - base_reader=self.base_reader, num_workers=4, output_queue_size=10 - ) - - all_instances = [] - - # Half of 100 files * 4 sentences / file - i = 0 - for instance in reader.read(self.identical_files_glob): - # Stop early such that the processes generating the data remain - # active (given the small queue size). - if i == 200: - break - i += 1 - all_instances.append(instance) - - # This should be trivially true. The real test here is that we exit - # normally and don't hang due to the still active processes. - assert len(all_instances) == 200 - - def test_multiprocess_read_with_qiterable(self): - reader = MultiprocessDatasetReader(base_reader=self.base_reader, num_workers=4) - - all_instances = [] - qiterable = reader.read(self.identical_files_glob) - assert isinstance(qiterable, QIterable) - - # Essentially QIterable.__iter__. Broken out here as we intend it to be - # a public interface. - qiterable.start() - while qiterable.num_active_workers.value > 0 or qiterable.num_inflight_items.value > 0: - while True: - try: - all_instances.append(qiterable.output_queue.get(block=False, timeout=1.0)) - with qiterable.num_inflight_items.get_lock(): - qiterable.num_inflight_items.value -= 1 - except Empty: - break - qiterable.join() - - # 100 files * 4 sentences / file - assert len(all_instances) == 100 * 4 - - counts = Counter(fingerprint(instance) for instance in all_instances) - - # should have the exact same data 100 times - assert len(counts) == 4 - assert counts[("cats", "are", "animals", ".", "N", "V", "N", "N")] == 100 - assert counts[("dogs", "are", "animals", ".", "N", "V", "N", "N")] == 100 - assert counts[("snakes", "are", "animals", ".", "N", "V", "N", "N")] == 100 - assert counts[("birds", "are", "animals", ".", "N", "V", "N", "N")] == 100 - - def test_multiprocess_read_in_subprocess_is_deterministic(self): - reader = MultiprocessDatasetReader(base_reader=self.base_reader, num_workers=1) - q = Queue() - - def read(): - for instance in reader.read(self.distinct_files_glob): - q.put(fingerprint(instance)) - - # Ensure deterministic shuffling. - np.random.seed(0) - p = Process(target=read) - p.start() - p.join() - - # Convert queue to list. - actual_fingerprints = [] - while not q.empty(): - actual_fingerprints.append(q.get(block=False)) - - assert len(actual_fingerprints) == 100 - - expected_fingerprints = [] - for instance in self.base_reader.read(self.all_distinct_path): - expected_fingerprints.append(fingerprint(instance)) - - np.random.seed(0) - expected_fingerprints.sort() - # This should be shuffled into exactly the same order as actual_fingerprints. - np.random.shuffle(expected_fingerprints) - - assert actual_fingerprints == expected_fingerprints - - def test_multiple_epochs(self): - reader = MultiprocessDatasetReader( - base_reader=self.base_reader, num_workers=2, epochs_per_read=3 - ) - - all_instances = [] - - for instance in reader.read(self.identical_files_glob): - all_instances.append(instance) - - # 100 files * 4 sentences per file * 3 epochs - assert len(all_instances) == 100 * 4 * 3 - - counts = Counter(fingerprint(instance) for instance in all_instances) - - # should have the exact same data 100 * 3 times - assert len(counts) == 4 - assert counts[("cats", "are", "animals", ".", "N", "V", "N", "N")] == 300 - assert counts[("dogs", "are", "animals", ".", "N", "V", "N", "N")] == 300 - assert counts[("snakes", "are", "animals", ".", "N", "V", "N", "N")] == 300 - assert counts[("birds", "are", "animals", ".", "N", "V", "N", "N")] == 300 - - def test_with_iterator(self): - reader = MultiprocessDatasetReader(base_reader=self.base_reader, num_workers=2) - instances = reader.read(self.identical_files_glob) - - iterator = BasicIterator(batch_size=32) - iterator.index_with(self.vocab) - - batches = [batch for batch in iterator(instances, num_epochs=1)] - - # 400 instances / batch_size 32 = 12 full batches + 1 batch of 16 - sizes = sorted(len(batch["tags"]) for batch in batches) - assert sizes == [16] + 12 * [32] diff --git a/allennlp/tests/data/iterators/multiprocess_iterator_test.py b/allennlp/tests/data/iterators/multiprocess_iterator_test.py deleted file mode 100644 index 932ed9d9446..00000000000 --- a/allennlp/tests/data/iterators/multiprocess_iterator_test.py +++ /dev/null @@ -1,66 +0,0 @@ -from allennlp.common.testing import AllenNlpTestCase -from allennlp.data.dataset_readers import SequenceTaggingDatasetReader, MultiprocessDatasetReader -from allennlp.data.iterators import BasicIterator, MultiprocessIterator -from allennlp.data.vocabulary import Vocabulary -from allennlp.tests.data.iterators.basic_iterator_test import IteratorTest - - -class TestMultiprocessIterator(IteratorTest): - def setUp(self): - super().setUp() - - self.base_reader = SequenceTaggingDatasetReader(lazy=True) - base_file_path = AllenNlpTestCase.FIXTURES_ROOT / "data" / "sequence_tagging.tsv" - - # Make 100 copies of the data - raw_data = open(base_file_path).read() - for i in range(100): - file_path = self.TEST_DIR / f"sequence_tagging_{i}.tsv" - with open(file_path, "w") as f: - f.write(raw_data) - - self.glob = str(self.TEST_DIR / "sequence_tagging_*.tsv") - - # For some of the tests we need a vocab, we'll just use the base_reader for that. - self.vocab = Vocabulary.from_instances(self.base_reader.read(str(base_file_path))) - - def test_yield_one_epoch_iterates_over_the_data_once(self): - for test_instances in (self.instances, self.lazy_instances): - base_iterator = BasicIterator(batch_size=2, max_instances_in_memory=1024) - iterator = MultiprocessIterator(base_iterator, num_workers=4) - iterator.index_with(self.vocab) - batches = list(iterator(test_instances, num_epochs=1)) - # We just want to get the single-token array for the text field in the instance. - instances = [ - tuple(instance.detach().cpu().numpy()) - for batch in batches - for instance in batch["text"]["tokens"]["tokens"] - ] - assert len(instances) == 5 - - def test_multiprocess_iterate_partial_does_not_hang(self): - for test_instances in (self.instances, self.lazy_instances): - base_iterator = BasicIterator(batch_size=2, max_instances_in_memory=1024) - iterator = MultiprocessIterator(base_iterator, num_workers=4) - iterator.index_with(self.vocab) - generator = iterator(test_instances, num_epochs=1) - # We only iterate through 3 of the 5 instances causing the - # processes generating the tensors to remain active. - for _ in range(3): - next(generator) - # The real test here is that we exit normally and don't hang due to - # the still active processes. - - def test_multiprocess_reader_with_multiprocess_iterator(self): - # use SequenceTaggingDatasetReader as the base reader - reader = MultiprocessDatasetReader(base_reader=self.base_reader, num_workers=2) - base_iterator = BasicIterator(batch_size=32, max_instances_in_memory=1024) - - iterator = MultiprocessIterator(base_iterator, num_workers=2) - iterator.index_with(self.vocab) - - instances = reader.read(self.glob) - - tensor_dicts = iterator(instances, num_epochs=1) - sizes = [len(tensor_dict["tags"]) for tensor_dict in tensor_dicts] - assert sum(sizes) == 400 diff --git a/allennlp/tests/fixtures/language_model/experiment_multiprocessing_reader.jsonnet b/allennlp/tests/fixtures/language_model/experiment_multiprocessing_reader.jsonnet deleted file mode 100644 index 6615030571a..00000000000 --- a/allennlp/tests/fixtures/language_model/experiment_multiprocessing_reader.jsonnet +++ /dev/null @@ -1,30 +0,0 @@ -local config = import "experiment_unsampled.jsonnet"; - -config + { - "dataset_reader": { - "type": "multiprocess", - "base_reader": { - "type": "simple_language_modeling", - "tokenizer": { - "type": "just_spaces", - }, - "token_indexers": { - "tokens": { - "type": "single_id" - }, - "token_characters": { - "type": "elmo_characters" - } - }, - "start_tokens": [""], - "end_tokens": [""] - }, - "num_workers": 1, - "output_queue_size": 1000 - }, - - // Note the glob on the end of these paths. - "train_data_path": "allennlp/tests/fixtures/language_model/sentences*", - "validation_data_path": "allennlp/tests/fixtures/language_model/sentences*", - "test_data_path": "allennlp/tests/fixtures/language_model/sentences*", -} diff --git a/allennlp/tests/training/trainer_test.py b/allennlp/tests/training/trainer_test.py index b8677a72bba..04645b07f61 100644 --- a/allennlp/tests/training/trainer_test.py +++ b/allennlp/tests/training/trainer_test.py @@ -11,7 +11,7 @@ from allennlp.common.checks import ConfigurationError from allennlp.common.params import Params -from allennlp.common.testing import AllenNlpTestCase, ModelTestCase +from allennlp.common.testing import AllenNlpTestCase from allennlp.data import Vocabulary from allennlp.data.dataset_readers import SequenceTaggingDatasetReader from allennlp.data.iterators import BasicIterator @@ -842,16 +842,3 @@ def test_sparse_clip_grad(self): # Final norm should be 1.5 grad = embedding.weight.grad.coalesce() self.assertAlmostEqual(grad._values().norm(2.0).item(), 1.5, places=5) - - -class TestLanguageModelWithMultiprocessDatasetReader(ModelTestCase): - def setUp(self): - super().setUp() - self.set_up_model( - self.FIXTURES_ROOT / "language_model" / "experiment_multiprocessing_reader.jsonnet", - # Note the glob on the end of this path. - self.FIXTURES_ROOT / "language_model" / "sentences*", - ) - - def test_unidirectional_language_model_can_train_save_and_load(self): - self.ensure_model_can_train_save_and_load(self.param_file) From 6f58c2ac52c1738a1134ee55c1bb12ba30092a86 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Wed, 19 Feb 2020 10:22:43 -0800 Subject: [PATCH 03/52] sneak torch datasets inside DatasetReader --- .../data/dataset_readers/dataset_reader.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/allennlp/data/dataset_readers/dataset_reader.py b/allennlp/data/dataset_readers/dataset_reader.py index 5197b749d02..4b6414a99aa 100644 --- a/allennlp/data/dataset_readers/dataset_reader.py +++ b/allennlp/data/dataset_readers/dataset_reader.py @@ -1,10 +1,11 @@ import itertools -from typing import Iterable, Iterator, Callable, Optional +from typing import Iterable, Iterator, Callable, Optional, List import logging import os import pathlib import jsonpickle +from torch.utils.data import Dataset, IterableDataset from allennlp.data.instance import Instance from allennlp.common import Tqdm, util @@ -14,7 +15,19 @@ logger = logging.getLogger(__name__) -class _LazyInstances(Iterable): +class AllennlpDataset(Dataset): + def __init__(self, instances: List[Instance]): + self.instances = instances + + def __getitem__(self, idx): + + return self.instances[idx] + + def __len__(self): + return len(self.instances) + + +class _LazyInstances(IterableDataset): """ An `Iterable` that just wraps a thunk for generating instances and calls it for each call to `__iter__`. @@ -168,6 +181,8 @@ def read(self, file_path: str) -> Iterable[Instance]: logger.info(f"Caching instances to {cache_file}") self._instances_to_cache_file(cache_file, instances) + instances = AllennlpDataset(instances) + return instances def _get_cache_location_for_file_path(self, file_path: str) -> str: From 1b3ad9a3cf1eca45db648974e81ed7ee86649097 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Wed, 19 Feb 2020 11:35:53 -0800 Subject: [PATCH 04/52] lint --- torch_datasets.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/torch_datasets.py b/torch_datasets.py index 55c8c766e06..6f35be0ff49 100644 --- a/torch_datasets.py +++ b/torch_datasets.py @@ -5,7 +5,7 @@ from torch.utils.data import Dataset as TorchDataset from torch.utils.data import DataLoader -from torch.utils.data import Sampler, BatchSampler, SequentialSampler, SubsetRandomSampler +from torch.utils.data import BatchSampler, SequentialSampler from allennlp.common.registrable import Registrable from allennlp.common.util import add_noise_to_dict_values, lazy_groups_of @@ -19,6 +19,7 @@ logger = logging.getLogger(__name__) + class Dataset(TorchDataset, Registrable): def __init__(self): self.vocab: Vocabulary = None @@ -98,8 +99,13 @@ def text_to_instance(self, premise: str, hypothesis: str, label: str = None) -> class BatchInstanceSampler(BatchSampler): - - def __init__(self, data, batch_size: int, sorting_keys: List[Tuple[str, str]] = None, padding_noise: float = 0.1): + def __init__( + self, + data, + batch_size: int, + sorting_keys: List[Tuple[str, str]] = None, + padding_noise: float = 0.1, + ): self.vocab = data.vocab self._sorting_keys = sorting_keys @@ -184,6 +190,7 @@ def allennlp_collocate(batch): batch = AllennlpBatch(batch) return batch.as_tensor_dict(batch.get_padding_lengths()) + batch_generator = DataLoader(data, batch_sampler=batch_sampler, collate_fn=allennlp_collocate) iterator = iter(batch_generator) From effc44518a3b60313c7ad37bcdeb10460a09463e Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Wed, 19 Feb 2020 11:36:30 -0800 Subject: [PATCH 05/52] trainer_v2, We Love To See It --- allennlp/data/samplers/__init__.py | 197 ++++++ allennlp/training/trainer_v2.py | 942 +++++++++++++++++++++++++++++ 2 files changed, 1139 insertions(+) create mode 100644 allennlp/data/samplers/__init__.py create mode 100644 allennlp/training/trainer_v2.py diff --git a/allennlp/data/samplers/__init__.py b/allennlp/data/samplers/__init__.py new file mode 100644 index 00000000000..846ed9240be --- /dev/null +++ b/allennlp/data/samplers/__init__.py @@ -0,0 +1,197 @@ + +from typing import List, Iterable, Tuple, Dict, cast +import logging +from torch.utils import data + +from allennlp.common.registrable import Registrable + +from allennlp.common.util import add_noise_to_dict_values, lazy_groups_of +from allennlp.data.batch import Batch as AllennlpBatch +from allennlp.data.instance import Instance +from allennlp.data.vocabulary import Vocabulary +from allennlp.data import Token +from allennlp.common.file_utils import cached_path +from allennlp.data.fields import Field, TextField, LabelField, MetadataField +from allennlp.data.token_indexers import SingleIdTokenIndexer, TokenIndexer + +logger = logging.getLogger(__name__) + + +class Sampler(Registrable): + + def __iter__(self) -> Iterable[int]: + + raise NotImplementedError + + +class BatchSampler(Registrable): + + def __iter__(self) -> Iterable[List[int]]: + + raise NotImplementedError + + +@Sampler.register("sequential") +class SequentialSampler(Sampler, data.SequentialSampler): + + def __init__(self, data_source: data.Dataset): + super().__init__(data_source) + + + +@Sampler.register("random") +class RandomSampler(Sampler, data.RandomSampler): + r"""Samples elements randomly. If without replacement, then sample from a shuffled dataset. + If with replacement, then user can specify :attr:`num_samples` to draw. + + Arguments: + data_source (Dataset): dataset to sample from + replacement (bool): samples are drawn with replacement if ``True``, default=``False`` + num_samples (int): number of samples to draw, default=`len(dataset)`. This argument + is supposed to be specified only when `replacement` is ``True``. + """ + def __init__(self, data_source: data.Dataset, replacement: bool = False, num_samples: int = None): + super().__init__(data_source, replacement, num_samples) + + +@Sampler.register("subset_random") +class SubsetRandomSampler(Sampler, data.SubsetRandomSampler): + r"""Samples elements randomly from a given list of indices, without replacement. + + Arguments: + indices (sequence): a sequence of indices + """ + def __init__(self, indices: List[int]): + super().__init__(indices) + + +@Sampler.register("weighted_random") +class WeightedRandomSampler(Sampler, data.WeightedRandomSampler): + r"""Samples elements from ``[0,..,len(weights)-1]`` with given probabilities (weights). + + Args: + weights (sequence) : a sequence of weights, not necessary summing up to one + num_samples (int): number of samples to draw + replacement (bool): if ``True``, samples are drawn with replacement. + If not, they are drawn without replacement, which means that when a + sample index is drawn for a row, it cannot be drawn again for that row. + + Example: + >>> list(WeightedRandomSampler([0.1, 0.9, 0.4, 0.7, 3.0, 0.6], 5, replacement=True)) + [0, 0, 0, 1, 0] + >>> list(WeightedRandomSampler([0.9, 0.4, 0.05, 0.2, 0.3, 0.1], 5, replacement=False)) + [0, 1, 4, 3, 2] + """ + def __init__(self, weights: List[float], num_samples: int, replacement: bool = True): + super().__init__(weights, num_samples, replacement) + + +@BatchSampler.register("basic") +class BasicBatchSampler(BatchSampler, data.BatchSampler): + r"""Wraps another sampler to yield a mini-batch of indices. + + Args: + sampler (Sampler): Base sampler. + batch_size (int): Size of mini-batch. + drop_last (bool): If ``True``, the sampler will drop the last batch if + its size would be less than ``batch_size`` + + Example: + >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=False)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] + >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=True)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8]] + """ + + def __init__(self, sampler: Sampler, batch_size: int, drop_last: bool): + super().__init__(sampler, batch_size, drop_last) + + +@BatchSampler.register("bucket") +class BatchInstanceSampler(BatchSampler): + def __init__( + self, + data: data.Dataset, + batch_size: int, + sorting_keys: List[Tuple[str, str]] = None, + padding_noise: float = 0.1, + ): + + self.vocab = data.vocab + self._sorting_keys = sorting_keys + self._padding_noise = padding_noise + self._batch_size = batch_size + self.data = data + + def _argsort_by_padding(self, instances: List[Instance]) -> List[int]: + """ + Sorts the instances by their padding lengths, using the keys in + `sorting_keys` (in the order in which they are provided). `sorting_keys` is a list of + `(field_name, padding_key)` tuples. + """ + if not self._sorting_keys: + logger.info("No sorting keys given; trying to guess a good one") + self._guess_sorting_keys(instances) + logger.info(f"Using {self._sorting_keys} as the sorting keys") + instances_with_lengths = [] + for instance in instances: + # Make sure instance is indexed before calling .get_padding + instance.index_fields(self.vocab) + padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) + if self._padding_noise > 0.0: + noisy_lengths = {} + for field_name, field_lengths in padding_lengths.items(): + noisy_lengths[field_name] = add_noise_to_dict_values( + field_lengths, self._padding_noise + ) + padding_lengths = noisy_lengths + instance_with_lengths = ( + [ + padding_lengths[field_name][padding_key] + for (field_name, padding_key) in self._sorting_keys + ], + instance, + ) + instances_with_lengths.append(instance_with_lengths) + with_indices = [(x, i) for i, x in enumerate(instances_with_lengths)] + with_indices.sort(key=lambda x: x[0][0]) + return [instance_with_index[-1] for instance_with_index in with_indices] + + def __iter__(self) -> Iterable[List[int]]: + + indices = self._argsort_by_padding(self.data) + for group in lazy_groups_of(indices, self._batch_size): + yield list(group) + + def _guess_sorting_keys(self, instances: List[Instance]) -> None: + max_length = 0.0 + longest_padding_key: Tuple[str, str] = None + for instance in instances: + instance.index_fields(self.vocab) + padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) + for field_name, field_padding in padding_lengths.items(): + for padding_key, length in field_padding.items(): + if length > max_length: + max_length = length + longest_padding_key = (field_name, padding_key) + if not longest_padding_key: + # This shouldn't ever happen (you basically have to have an empty instance list), but + # just in case... + raise AssertionError( + "Found no field that needed padding; we are surprised you got this error, please " + "open an issue on github" + ) + self._sorting_keys = [longest_padding_key] + + +class DataLoader(Registrable, data.DataLoader): + + def __init__(self, dataset: data.Dataset, batch_size: int = 1, shuffle: bool = False, sampler: Sampler = None, + batch_sampler: BatchSampler = None, num_workers: int = 0, collate_fn=None, + pin_memory: bool = False, drop_last: bool = False, timeout: bool = 0, + worker_init_fn=None, multiprocessing_context: str = None): + + super().__init__(self, dataset=dataset, batch_size=batch_size, shuffle=shuffle, sampler=sampler, + batch_sampler=batch_sampler, num_workers=num_workers, collate_fn=collate_fn, + pin_memory=pin_memory, drop_last=drop_last, timeout=timeout, + worker_init_fn=worker_init_fn, multiprocessing_context=multiprocessing_context) diff --git a/allennlp/training/trainer_v2.py b/allennlp/training/trainer_v2.py new file mode 100644 index 00000000000..2d2fa145b26 --- /dev/null +++ b/allennlp/training/trainer_v2.py @@ -0,0 +1,942 @@ +import datetime +import logging +import math +import os +import re +import time +import traceback +from typing import Dict, List, Optional, Tuple, Union, Iterable, Any + +import torch +import torch.distributed as dist +import torch.optim.lr_scheduler +from torch.nn.parallel import DistributedDataParallel + + +from allennlp.common import Lazy, Tqdm +from allennlp.common.checks import ConfigurationError, check_for_gpu +from allennlp.common import util as common_util +from allennlp.data.instance import Instance + +from allennlp.data.samplers import DataLoader + +from allennlp.data.iterators.data_iterator import TensorDict +from allennlp.models.model import Model +from allennlp.nn import util as nn_util +from allennlp.training import util as training_util +from allennlp.training.checkpointer import Checkpointer +from allennlp.training.learning_rate_schedulers import LearningRateScheduler +from allennlp.training.metric_tracker import MetricTracker +from allennlp.training.momentum_schedulers import MomentumScheduler +from allennlp.training.moving_average import MovingAverage +from allennlp.training.optimizers import Optimizer +from allennlp.training.tensorboard_writer import TensorboardWriter +from allennlp.training.trainer_base import TrainerBase + +logger = logging.getLogger(__name__) + + +@TrainerBase.register("trainer_v2", constructor="from_partial_objects") +class TrainerV2(TrainerBase): + def __init__( + self, + model: Model, + optimizer: torch.optim.Optimizer, + data_loader: torch.utils.data.DataLoader, + train_dataset: Iterable[Instance], + validation_dataset: Optional[Iterable[Instance]] = None, + patience: Optional[int] = None, + validation_metric: str = "-loss", + validation_data_loader: torch.utils.data.DataLoader = None, + shuffle: bool = True, + num_epochs: int = 20, + serialization_dir: Optional[str] = None, + num_serialized_models_to_keep: int = 20, + keep_serialized_model_every_num_seconds: int = None, + checkpointer: Checkpointer = None, + model_save_interval: float = None, + cuda_device: int = -1, + grad_norm: Optional[float] = None, + grad_clipping: Optional[float] = None, + learning_rate_scheduler: Optional[LearningRateScheduler] = None, + momentum_scheduler: Optional[MomentumScheduler] = None, + summary_interval: int = 100, + histogram_interval: int = None, + should_log_parameter_statistics: bool = True, + should_log_learning_rate: bool = False, + log_batch_size_period: Optional[int] = None, + moving_average: Optional[MovingAverage] = None, + distributed: bool = False, + local_rank: int = 0, + world_size: int = 1, + num_gradient_accumulation_steps: int = 1, + ) -> None: + """ + A trainer for doing supervised learning. It just takes a labeled dataset + and a `DataIterator`, and uses the supplied `Optimizer` to learn the weights + for your model over some fixed number of epochs. You can also pass in a validation + dataset and enable early stopping. There are many other bells and whistles as well. + + # Parameters + + model : `Model`, required. + An AllenNLP model to be optimized. Pytorch Modules can also be optimized if + their `forward` method returns a dictionary with a "loss" key, containing a + scalar tensor representing the loss function to be optimized. + + If you are training your model using GPUs, your model should already be + on the correct device. (If you use `Trainer.from_params` this will be + handled for you.) + optimizer : `torch.nn.Optimizer`, required. + An instance of a Pytorch Optimizer, instantiated with the parameters of the + model to be optimized. + iterator : `DataIterator`, required. + A method for iterating over a `Dataset`, yielding padded indexed batches. + train_dataset : `Dataset`, required. + A `Dataset` to train on. The dataset should have already been indexed. + validation_dataset : `Dataset`, optional, (default = None). + A `Dataset` to evaluate on. The dataset should have already been indexed. + patience : Optional[int] > 0, optional (default=None) + Number of epochs to be patient before early stopping: the training is stopped + after `patience` epochs with no improvement. If given, it must be `> 0`. + If None, early stopping is disabled. + validation_metric : str, optional (default="loss") + Validation metric to measure for whether to stop training using patience + and whether to serialize an `is_best` model each epoch. The metric name + must be prepended with either "+" or "-", which specifies whether the metric + is an increasing or decreasing function. + validation_iterator : `DataIterator`, optional (default=None) + An iterator to use for the validation set. If `None`, then + use the training `iterator`. + shuffle : `bool`, optional (default=True) + Whether to shuffle the instances in the iterator or not. + num_epochs : int, optional (default = 20) + Number of training epochs. + serialization_dir : str, optional (default=None) + Path to directory for saving and loading model files. Models will not be saved if + this parameter is not passed. + num_serialized_models_to_keep : `int`, optional (default=20) + Number of previous model checkpoints to retain. Default is to keep 20 checkpoints. + A value of None or -1 means all checkpoints will be kept. + keep_serialized_model_every_num_seconds : `int`, optional (default=None) + If num_serialized_models_to_keep is not None, then occasionally it's useful to + save models at a given interval in addition to the last num_serialized_models_to_keep. + To do so, specify keep_serialized_model_every_num_seconds as the number of seconds + between permanently saved checkpoints. Note that this option is only used if + num_serialized_models_to_keep is not None, otherwise all checkpoints are kept. + checkpointer : `Checkpointer`, optional (default=None) + An instance of class Checkpointer to use instead of the default. If a checkpointer is specified, + the arguments num_serialized_models_to_keep and keep_serialized_model_every_num_seconds should + not be specified. The caller is responsible for initializing the checkpointer so that it is + consistent with serialization_dir. + model_save_interval : `float`, optional (default=None) + If provided, then serialize models every `model_save_interval` + seconds within single epochs. In all cases, models are also saved + at the end of every epoch if `serialization_dir` is provided. + cuda_device : `int`, optional (default = -1) + An integer specifying the CUDA device(s) to use for this process. If -1, the CPU is used. + Data parallelism is controlled at the allennlp train level, so each trainer will have a single + GPU. + grad_norm : `float`, optional, (default = None). + If provided, gradient norms will be rescaled to have a maximum of this value. + grad_clipping : `float`, optional (default = `None`). + If provided, gradients will be clipped `during the backward pass` to have an (absolute) + maximum of this value. If you are getting `NaNs` in your gradients during training + that are not solved by using `grad_norm`, you may need this. + learning_rate_scheduler : `LearningRateScheduler`, optional (default = None) + If specified, the learning rate will be decayed with respect to + this schedule at the end of each epoch (or batch, if the scheduler implements + the `step_batch` method). If you use `torch.optim.lr_scheduler.ReduceLROnPlateau`, + this will use the `validation_metric` provided to determine if learning has plateaued. + To support updating the learning rate on every batch, this can optionally implement + `step_batch(batch_num_total)` which updates the learning rate given the batch number. + momentum_scheduler : `MomentumScheduler`, optional (default = None) + If specified, the momentum will be updated at the end of each batch or epoch + according to the schedule. + summary_interval : `int`, optional, (default = 100) + Number of batches between logging scalars to tensorboard + histogram_interval : `int`, optional, (default = `None`) + If not None, then log histograms to tensorboard every `histogram_interval` batches. + When this parameter is specified, the following additional logging is enabled: + * Histograms of model parameters + * The ratio of parameter update norm to parameter norm + * Histogram of layer activations + We log histograms of the parameters returned by + `model.get_parameters_for_histogram_tensorboard_logging`. + The layer activations are logged for any modules in the `Model` that have + the attribute `should_log_activations` set to `True`. Logging + histograms requires a number of GPU-CPU copies during training and is typically + slow, so we recommend logging histograms relatively infrequently. + Note: only Modules that return tensors, tuples of tensors or dicts + with tensors as values currently support activation logging. + should_log_parameter_statistics : `bool`, optional, (default = True) + Whether to send parameter statistics (mean and standard deviation + of parameters and gradients) to tensorboard. + should_log_learning_rate : `bool`, optional, (default = False) + Whether to send parameter specific learning rate to tensorboard. + log_batch_size_period : `int`, optional, (default = `None`) + If defined, how often to log the average batch size. + moving_average : `MovingAverage`, optional, (default = None) + If provided, we will maintain moving averages for all parameters. During training, we + employ a shadow variable for each parameter, which maintains the moving average. During + evaluation, we backup the original parameters and assign the moving averages to corresponding + parameters. Be careful that when saving the checkpoint, we will save the moving averages of + parameters. This is necessary because we want the saved model to perform as well as the validated + model if we load it later. But this may cause problems if you restart the training from checkpoint. + distributed : `bool`, optional, (default = False) + If set, PyTorch's `DistributedDataParallel` is used to train the model in multiple GPUs. This also + requires `world_size` to be greater than 1. + local_rank : `int`, optional, (default = 0) + This is the unique identifier of the `Trainer` in a distributed process group. The GPU device id is + used as the rank. + world_size : `int`, (default = 1) + The number of `Trainer` workers participating in the distributed training. + num_gradient_accumulation_steps : `int`, optional, (default = 1) + Gradients are accumulated for the given number of steps before doing an optimizer step. This can + be useful to accommodate batches that are larger than the RAM size. Refer Thomas Wolf's + [post](https://tinyurl.com/y5mv44fw) for details on Gradient Accumulation. + """ + super().__init__(serialization_dir, cuda_device, distributed, local_rank, world_size) + + # I am not calling move_to_gpu here, because if the model is + # not already on the GPU then the optimizer is going to be wrong. + self.model = model + + self.iterator = iterator + self._validation_iterator = validation_iterator + self.shuffle = shuffle + self.optimizer = optimizer + self.train_data = train_dataset + self._validation_data = validation_dataset + + if patience is None: # no early stopping + if validation_dataset: + logger.warning( + "You provided a validation dataset but patience was set to None, " + "meaning that early stopping is disabled" + ) + elif (not isinstance(patience, int)) or patience <= 0: + raise ConfigurationError( + '{} is an invalid value for "patience": it must be a positive integer ' + "or None (if you want to disable early stopping)".format(patience) + ) + + # For tracking is_best_so_far and should_stop_early + self._metric_tracker = MetricTracker(patience, validation_metric) + # Get rid of + or - + self._validation_metric = validation_metric[1:] + + self._num_epochs = num_epochs + + if checkpointer is not None: + # We can't easily check if these parameters were passed in, so check against their default values. + # We don't check against serialization_dir since it is also used by the parent class. + if ( + num_serialized_models_to_keep != 20 + or keep_serialized_model_every_num_seconds is not None + ): + raise ConfigurationError( + "When passing a custom Checkpointer, you may not also pass in separate checkpointer " + "args 'num_serialized_models_to_keep' or 'keep_serialized_model_every_num_seconds'." + ) + self._checkpointer = checkpointer + else: + self._checkpointer = Checkpointer( + serialization_dir, + keep_serialized_model_every_num_seconds, + num_serialized_models_to_keep, + ) + + self._model_save_interval = model_save_interval + + self._grad_norm = grad_norm + self._grad_clipping = grad_clipping + + self._learning_rate_scheduler = learning_rate_scheduler + self._momentum_scheduler = momentum_scheduler + self._moving_average = moving_average + + # We keep the total batch number as an instance variable because it + # is used inside a closure for the hook which logs activations in + # `_enable_activation_logging`. + self._batch_num_total = 0 + + self._tensorboard = TensorboardWriter( + get_batch_num_total=lambda: self._batch_num_total, + serialization_dir=serialization_dir, + summary_interval=summary_interval, + histogram_interval=histogram_interval, + should_log_parameter_statistics=should_log_parameter_statistics, + should_log_learning_rate=should_log_learning_rate, + ) + + self._log_batch_size_period = log_batch_size_period + + self._last_log = 0.0 # time of last logging + + self._num_gradient_accumulation_steps = num_gradient_accumulation_steps + + # Enable activation logging. + if histogram_interval is not None: + self._tensorboard.enable_activation_logging(self.model) + + # Using `DistributedDataParallel`(ddp) brings in a quirk wrt AllenNLP's `Model` interface and its + # usage. A `Model` object is wrapped by `ddp`, but assigning the wrapped model to `self.model` + # will break the usages such as `Model.get_regularization_penalty`, `Model.get_metrics`, etc. + # + # Hence a reference to Pytorch's object is maintained in the case of distributed training and in the + # normal case, reference to `Model` is retained. This reference is only used in + # these places: `model.__call__`, `model.train` and `model.eval`. + if self._distributed: + self._pytorch_model = DistributedDataParallel( + self.model, device_ids=[self.cuda_device], find_unused_parameters=True + ) + else: + self._pytorch_model = self.model + + def rescale_gradients(self) -> Optional[float]: + return training_util.rescale_gradients(self.model, self._grad_norm) + + def batch_loss(self, batch: TensorDict, for_training: bool) -> torch.Tensor: + """ + Does a forward pass on the given batches and returns the `loss` value in the result. + If `for_training` is `True` also applies regularization penalty. + """ + batch = nn_util.move_to_device(batch, self.cuda_device) + output_dict = self._pytorch_model(**batch) + + try: + loss = output_dict["loss"] + if for_training: + loss += self.model.get_regularization_penalty() + except KeyError: + if for_training: + raise RuntimeError( + "The model you are trying to optimize does not contain a" + " 'loss' key in the output of model.forward(inputs)." + ) + loss = None + + return loss + + def _train_epoch(self, epoch: int) -> Dict[str, float]: + """ + Trains one epoch and returns metrics. + """ + logger.info("Epoch %d/%d", epoch, self._num_epochs - 1) + peak_cpu_usage = common_util.peak_memory_mb() + logger.info(f"Peak CPU memory usage MB: {peak_cpu_usage}") + gpu_usage = [] + for gpu, memory in common_util.gpu_memory_mb().items(): + gpu_usage.append((gpu, memory)) + logger.info(f"GPU {gpu} memory usage MB: {memory}") + + train_loss = 0.0 + # Set the model to "train" mode. + self._pytorch_model.train() + + # Get tqdm for the training batches + batch_generator = self.iterator(self.train_data, num_epochs=1, shuffle=self.shuffle) + batch_group_generator = common_util.lazy_groups_of( + batch_generator, self._num_gradient_accumulation_steps + ) + num_training_batches = math.ceil( + self.iterator.get_num_batches(self.train_data) / self._num_gradient_accumulation_steps + ) + # Having multiple tqdm bars in case of distributed training will be a mess. Hence only the master's + # progress is shown + if self._master: + batch_group_generator_tqdm = Tqdm.tqdm( + batch_group_generator, total=num_training_batches + ) + else: + batch_group_generator_tqdm = batch_group_generator + + self._last_log = time.time() + last_save_time = time.time() + + batches_this_epoch = 0 + if self._batch_num_total is None: + self._batch_num_total = 0 + + histogram_parameters = set(self.model.get_parameters_for_histogram_tensorboard_logging()) + + logger.info("Training") + + cumulative_batch_group_size = 0 + done_early = False + for batch_group in batch_group_generator_tqdm: + if self._distributed: + # Check whether the other workers have stopped already (due to differing amounts of + # data in each). If so, we can't proceed because we would hang when we hit the + # barrier implicit in Model.forward. We use a IntTensor instead a BoolTensor + # here because NCCL process groups apparently don't support BoolTensor. + done = torch.tensor(0, device=self.cuda_device) + torch.distributed.all_reduce(done, torch.distributed.ReduceOp.SUM) + if done.item() > 0: + done_early = True + logger.warning( + f"Worker {torch.distributed.get_rank()} finishing training early! " + "This implies that there is an imbalance in your training " + "data across the workers and that some amount of it will be " + "ignored. A small amount of this is fine, but a major imbalance " + "should be avoided. Note: This warning will appear unless your " + "data is perfectly balanced." + ) + break + + batches_this_epoch += 1 + self._batch_num_total += 1 + batch_num_total = self._batch_num_total + + self.optimizer.zero_grad() + + for batch in batch_group: + loss = self.batch_loss(batch, for_training=True) + if torch.isnan(loss): + raise ValueError("nan loss encountered") + loss = loss / len(batch_group) + loss.backward() + train_loss += loss.item() + + batch_grad_norm = self.rescale_gradients() + + # This does nothing if batch_num_total is None or you are using a + # scheduler which doesn't update per batch. + if self._learning_rate_scheduler: + self._learning_rate_scheduler.step_batch(batch_num_total) + if self._momentum_scheduler: + self._momentum_scheduler.step_batch(batch_num_total) + + if self._tensorboard.should_log_histograms_this_batch() and self._master: + # get the magnitude of parameter updates for logging + # We need a copy of current parameters to compute magnitude of updates, + # and copy them to CPU so large models won't go OOM on the GPU. + param_updates = { + name: param.detach().cpu().clone() + for name, param in self.model.named_parameters() + } + self.optimizer.step() + for name, param in self.model.named_parameters(): + param_updates[name].sub_(param.detach().cpu()) + update_norm = torch.norm(param_updates[name].view(-1)) + param_norm = torch.norm(param.view(-1)).cpu() + self._tensorboard.add_train_scalar( + "gradient_update/" + name, update_norm / (param_norm + 1e-7) + ) + else: + self.optimizer.step() + + # Update moving averages + if self._moving_average is not None: + self._moving_average.apply(batch_num_total) + + # Update the description with the latest metrics + metrics = training_util.get_metrics( + self.model, + train_loss, + batches_this_epoch, + world_size=self._world_size, + cuda_device=[self.cuda_device], + ) + + # Updating tqdm only for the master as the trainers wouldn't have one + if self._master: + description = training_util.description_from_metrics(metrics) + batch_group_generator_tqdm.set_description(description, refresh=False) + + # Log parameter values to Tensorboard (only from the master) + if self._tensorboard.should_log_this_batch() and self._master: + self._tensorboard.log_parameter_and_gradient_statistics(self.model, batch_grad_norm) + self._tensorboard.log_learning_rates(self.model, self.optimizer) + + self._tensorboard.add_train_scalar("loss/loss_train", metrics["loss"]) + self._tensorboard.log_metrics({"epoch_metrics/" + k: v for k, v in metrics.items()}) + + if self._tensorboard.should_log_histograms_this_batch() and self._master: + self._tensorboard.log_histograms(self.model, histogram_parameters) + + if self._log_batch_size_period: + batch_group_size = sum(training_util.get_batch_size(batch) for batch in batch_group) + cumulative_batch_group_size += batch_group_size + if (batches_this_epoch - 1) % self._log_batch_size_period == 0: + average = cumulative_batch_group_size / batches_this_epoch + logger.info( + f"current batch size: {batch_group_size} mean batch size: {average}" + ) + self._tensorboard.add_train_scalar("current_batch_size", batch_group_size) + self._tensorboard.add_train_scalar("mean_batch_size", average) + + # Save model if needed. + if ( + self._model_save_interval is not None + and (time.time() - last_save_time > self._model_save_interval) + and self._master + ): + last_save_time = time.time() + self._save_checkpoint( + "{0}.{1}".format(epoch, training_util.time_to_str(int(last_save_time))) + ) + if self._distributed and not done_early: + logger.warning( + f"Worker {torch.distributed.get_rank()} completed its entire epoch (training)." + ) + # Indicate that we're done so that any workers that have remaining data stop the epoch early. + done = torch.tensor(1, device=self.cuda_device) + torch.distributed.all_reduce(done, torch.distributed.ReduceOp.SUM) + assert done.item() + + # Let all workers finish their epoch before computing + # the final statistics for the epoch. + if self._distributed: + dist.barrier() + + metrics = training_util.get_metrics( + self.model, + train_loss, + batches_this_epoch, + reset=True, + world_size=self._world_size, + cuda_device=[self.cuda_device], + ) + metrics["cpu_memory_MB"] = peak_cpu_usage + for (gpu_num, memory) in gpu_usage: + metrics["gpu_" + str(gpu_num) + "_memory_MB"] = memory + return metrics + + def _validation_loss(self) -> Tuple[float, int]: + """ + Computes the validation loss. Returns it and the number of batches. + """ + logger.info("Validating") + + self._pytorch_model.eval() + + # Replace parameter values with the shadow values from the moving averages. + if self._moving_average is not None: + self._moving_average.assign_average_value() + + if self._validation_iterator is not None: + val_iterator = self._validation_iterator + else: + val_iterator = self.iterator + + val_generator = val_iterator(self._validation_data, num_epochs=1, shuffle=False) + num_validation_batches = val_iterator.get_num_batches(self._validation_data) + val_generator_tqdm = Tqdm.tqdm(val_generator, total=num_validation_batches) + batches_this_epoch = 0 + val_loss = 0 + done_early = False + for batch in val_generator_tqdm: + if self._distributed: + # Check whether the other workers have stopped already (due to differing amounts of + # data in each). If so, we can't proceed because we would hang when we hit the + # barrier implicit in Model.forward. We use a IntTensor instead a BoolTensor + # here because NCCL process groups apparently don't support BoolTensor. + done = torch.tensor(0, device=self.cuda_device) + torch.distributed.all_reduce(done, torch.distributed.ReduceOp.SUM) + if done.item() > 0: + done_early = True + logger.warning( + f"Worker {torch.distributed.get_rank()} finishing validation early! " + "This implies that there is an imbalance in your validation " + "data across the workers and that some amount of it will be " + "ignored. A small amount of this is fine, but a major imbalance " + "should be avoided. Note: This warning will appear unless your " + "data is perfectly balanced." + ) + break + + loss = self.batch_loss(batch, for_training=False) + if loss is not None: + # You shouldn't necessarily have to compute a loss for validation, so we allow for + # `loss` to be None. We need to be careful, though - `batches_this_epoch` is + # currently only used as the divisor for the loss function, so we can safely only + # count those batches for which we actually have a loss. If this variable ever + # gets used for something else, we might need to change things around a bit. + batches_this_epoch += 1 + val_loss += loss.detach().cpu().numpy() + + # Update the description with the latest metrics + val_metrics = training_util.get_metrics( + self.model, + val_loss, + batches_this_epoch, + world_size=self._world_size, + cuda_device=[self.cuda_device], + ) + description = training_util.description_from_metrics(val_metrics) + val_generator_tqdm.set_description(description, refresh=False) + + if self._distributed and not done_early: + logger.warning( + f"Worker {torch.distributed.get_rank()} completed its entire epoch (validation)." + ) + # Indicate that we're done so that any workers that have remaining data stop validation early. + done = torch.tensor(1, device=self.cuda_device) + torch.distributed.all_reduce(done, torch.distributed.ReduceOp.SUM) + assert done.item() + + # Now restore the original parameter values. + if self._moving_average is not None: + self._moving_average.restore() + + return val_loss, batches_this_epoch + + def train(self) -> Dict[str, Any]: + """ + Trains the supplied model with the supplied parameters. + """ + try: + epoch_counter = self._restore_checkpoint() + except RuntimeError: + traceback.print_exc() + raise ConfigurationError( + "Could not recover training from the checkpoint. Did you mean to output to " + "a different serialization directory or delete the existing serialization " + "directory?" + ) + + training_util.enable_gradient_clipping(self.model, self._grad_clipping) + + logger.info("Beginning training.") + + val_metrics: Dict[str, float] = {} + this_epoch_val_metric: float = None + metrics: Dict[str, Any] = {} + epochs_trained = 0 + training_start_time = time.time() + + metrics["best_epoch"] = self._metric_tracker.best_epoch + for key, value in self._metric_tracker.best_epoch_metrics.items(): + metrics["best_validation_" + key] = value + + for epoch in range(epoch_counter, self._num_epochs): + epoch_start_time = time.time() + train_metrics = self._train_epoch(epoch) + + # get peak of memory usage + if "cpu_memory_MB" in train_metrics: + metrics["peak_cpu_memory_MB"] = max( + metrics.get("peak_cpu_memory_MB", 0), train_metrics["cpu_memory_MB"] + ) + for key, value in train_metrics.items(): + if key.startswith("gpu_"): + metrics["peak_" + key] = max(metrics.get("peak_" + key, 0), value) + + if self._validation_data is not None: + with torch.no_grad(): + # We have a validation set, so compute all the metrics on it. + val_loss, num_batches = self._validation_loss() + + # It is safe again to wait till the validation is done. This is + # important to get the metrics right. + if self._distributed: + dist.barrier() + + val_metrics = training_util.get_metrics( + self.model, + val_loss, + num_batches, + reset=True, + world_size=self._world_size, + cuda_device=[self.cuda_device], + ) + + # Check validation metric for early stopping + this_epoch_val_metric = val_metrics[self._validation_metric] + self._metric_tracker.add_metric(this_epoch_val_metric) + + if self._metric_tracker.should_stop_early(): + logger.info("Ran out of patience. Stopping training.") + break + + if self._master: + self._tensorboard.log_metrics( + train_metrics, val_metrics=val_metrics, log_to_console=True, epoch=epoch + 1 + ) # +1 because tensorboard doesn't like 0 + + # Create overall metrics dict + training_elapsed_time = time.time() - training_start_time + metrics["training_duration"] = str(datetime.timedelta(seconds=training_elapsed_time)) + metrics["training_start_epoch"] = epoch_counter + metrics["training_epochs"] = epochs_trained + metrics["epoch"] = epoch + + for key, value in train_metrics.items(): + metrics["training_" + key] = value + for key, value in val_metrics.items(): + metrics["validation_" + key] = value + + if self._metric_tracker.is_best_so_far(): + # Update all the best_ metrics. + # (Otherwise they just stay the same as they were.) + metrics["best_epoch"] = epoch + for key, value in val_metrics.items(): + metrics["best_validation_" + key] = value + + self._metric_tracker.best_epoch_metrics = val_metrics + + if self._serialization_dir and self._master: + common_util.dump_metrics( + os.path.join(self._serialization_dir, f"metrics_epoch_{epoch}.json"), metrics + ) + + # The Scheduler API is agnostic to whether your schedule requires a validation metric - + # if it doesn't, the validation metric passed here is ignored. + if self._learning_rate_scheduler: + self._learning_rate_scheduler.step(this_epoch_val_metric, epoch) + if self._momentum_scheduler: + self._momentum_scheduler.step(this_epoch_val_metric, epoch) + + if self._master: + self._save_checkpoint(epoch) + + # Wait for the master to finish saving the checkpoint + if self._distributed: + dist.barrier() + + epoch_elapsed_time = time.time() - epoch_start_time + logger.info("Epoch duration: %s", datetime.timedelta(seconds=epoch_elapsed_time)) + + if epoch < self._num_epochs - 1: + training_elapsed_time = time.time() - training_start_time + estimated_time_remaining = training_elapsed_time * ( + (self._num_epochs - epoch_counter) / float(epoch - epoch_counter + 1) - 1 + ) + formatted_time = str(datetime.timedelta(seconds=int(estimated_time_remaining))) + logger.info("Estimated training time remaining: %s", formatted_time) + + epochs_trained += 1 + + # make sure pending events are flushed to disk and files are closed properly + self._tensorboard.close() + + # Load the best model state before returning + best_model_state = self._checkpointer.best_model_state() + if best_model_state: + self.model.load_state_dict(best_model_state) + + return metrics + + def _save_checkpoint(self, epoch: Union[int, str]) -> None: + """ + Saves a checkpoint of the model to self._serialization_dir. + Is a no-op if self._serialization_dir is None. + + # Parameters + + epoch : Union[int, str], required. + The epoch of training. If the checkpoint is saved in the middle + of an epoch, the parameter is a string with the epoch and timestamp. + """ + # If moving averages are used for parameters, we save + # the moving average values into checkpoint, instead of the current values. + if self._moving_average is not None: + self._moving_average.assign_average_value() + + # These are the training states we need to persist. + training_states = { + "metric_tracker": self._metric_tracker.state_dict(), + "optimizer": self.optimizer.state_dict(), + "batch_num_total": self._batch_num_total, + } + + # If we have a learning rate or momentum scheduler, we should persist them too. + if self._learning_rate_scheduler is not None: + training_states["learning_rate_scheduler"] = self._learning_rate_scheduler.state_dict() + if self._momentum_scheduler is not None: + training_states["momentum_scheduler"] = self._momentum_scheduler.state_dict() + + self._checkpointer.save_checkpoint( + model_state=self.model.state_dict(), + epoch=epoch, + training_states=training_states, + is_best_so_far=self._metric_tracker.is_best_so_far(), + ) + + # Restore the original values for parameters so that training will not be affected. + if self._moving_average is not None: + self._moving_average.restore() + + def _restore_checkpoint(self) -> int: + """ + Restores the model and training state from the last saved checkpoint. + This includes an epoch count and optimizer state, which is serialized separately + from model parameters. This function should only be used to continue training - + if you wish to load a model for inference/load parts of a model into a new + computation graph, you should use the native Pytorch functions: + ` model.load_state_dict(torch.load("/path/to/model/weights.th"))` + + If `self._serialization_dir` does not exist or does not contain any checkpointed weights, + this function will do nothing and return 0. + + # Returns + + epoch: int + The epoch at which to resume training, which should be one after the epoch + in the saved training state. + """ + model_state, training_state = self._checkpointer.restore_checkpoint() + + if not training_state: + # No checkpoint to restore, start at 0 + return 0 + + self.model.load_state_dict(model_state) + self.optimizer.load_state_dict(training_state["optimizer"]) + if ( + self._learning_rate_scheduler is not None + and "learning_rate_scheduler" in training_state + ): + self._learning_rate_scheduler.load_state_dict(training_state["learning_rate_scheduler"]) + if self._momentum_scheduler is not None and "momentum_scheduler" in training_state: + self._momentum_scheduler.load_state_dict(training_state["momentum_scheduler"]) + training_util.move_optimizer_to_cuda(self.optimizer) + + # Currently the `training_state` contains a serialized `MetricTracker`. + if "metric_tracker" in training_state: + self._metric_tracker.load_state_dict(training_state["metric_tracker"]) + # It used to be the case that we tracked `val_metric_per_epoch`. + elif "val_metric_per_epoch" in training_state: + self._metric_tracker.clear() + self._metric_tracker.add_metrics(training_state["val_metric_per_epoch"]) + # And before that we didn't track anything. + else: + self._metric_tracker.clear() + + if isinstance(training_state["epoch"], int): + epoch_to_return = training_state["epoch"] + 1 + else: + epoch_to_return = int(training_state["epoch"].split(".")[0]) + 1 + + # For older checkpoints with batch_num_total missing, default to old behavior where + # it is unchanged. + batch_num_total = training_state.get("batch_num_total") + if batch_num_total is not None: + self._batch_num_total = batch_num_total + + return epoch_to_return + + @classmethod + def from_partial_objects( + cls, + model: Model, + serialization_dir: str, + data_loader: Lazy[DataLoader], + train_data: Iterable[Instance], + validation_data_loader: Lazy[DataLoader] = None, + validation_data: Iterable[Instance] = None, + local_rank: int = 0, + patience: int = None, + validation_metric: str = "-loss", + shuffle: bool = True, + num_epochs: int = 20, + cuda_device: int = -1, + grad_norm: float = None, + grad_clipping: float = None, + model_save_interval: float = None, + summary_interval: int = 100, + histogram_interval: int = None, + should_log_parameter_statistics: bool = True, + should_log_learning_rate: bool = False, + log_batch_size_period: int = None, + distributed: bool = None, + world_size: int = 1, + num_gradient_accumulation_steps: int = 1, + no_grad: List[str] = None, + optimizer: Lazy[Optimizer] = None, + learning_rate_scheduler: Lazy[LearningRateScheduler] = None, + momentum_scheduler: Lazy[MomentumScheduler] = None, + moving_average: Lazy[MovingAverage] = None, + checkpointer: Lazy[Checkpointer] = None, + ) -> "TrainerV2": + """ + This method exists so that we can have a documented method to construct this class using + `FromParams`. If you are not using `FromParams` or config files, you can safely ignore this + method. + + The reason we can't just use `__init__` with `FromParams` here is because there are + sequential dependencies to this class's arguments. Anything that has a `Lazy[]` type + annotation needs something from one of the non-`Lazy` arguments. The `Optimizer` needs to + have the parameters from the `Model` before it's constructed, and the `Schedulers` need to + have the `Optimizer`. Because of this, the typical way we construct things `FromParams` + doesn't work, so we use `Lazy` to allow for constructing the objects sequentially. + + If you're not using `FromParams`, you can just construct these arguments in the right order + yourself in your code and call the constructor directly. + """ + + check_for_gpu(cuda_device) + if cuda_device >= 0: + # Moving model to GPU here so that the optimizer state gets constructed on + # the right device. + model = model.cuda(cuda_device) + + if no_grad: + for name, parameter in model.named_parameters(): + if any(re.search(regex, name) for regex in no_grad): + parameter.requires_grad_(False) + + common_util.log_frozen_and_tunable_parameter_names(model) + + parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] + optimizer_ = optimizer.construct(model_parameters=parameters) + if not optimizer_: + optimizer_ = Optimizer.default(parameters) + + try: + batches_per_epoch = len(train_data) + except TypeError: + # If the dataset is lazy, it won't have a length. + batches_per_epoch = None + + moving_average_ = moving_average.construct(parameters=parameters) + learning_rate_scheduler_ = learning_rate_scheduler.construct( + optimizer=optimizer_, num_epochs=num_epochs, num_steps_per_epoch=batches_per_epoch + ) + momentum_scheduler_ = momentum_scheduler.construct(optimizer=optimizer_) + + checkpointer_ = checkpointer.construct() or Checkpointer(serialization_dir) + + if validation_data_loader is None and validation_data is not None: + validation_data_loader = data_loader.construct(dataset=validation_data) + + elif validation_data_loader and validation_data is not None: + validation_data_loader = validation_data_loader.construct(dataset=validation_data) + + else: + validation_data_loader = None + + data_loader = data_loader.construct(dataset=train_data) + + return cls( + model, + optimizer_, + data_loader, + train_data, + validation_data, + patience=patience, + validation_metric=validation_metric, + validation_data_loader=validation_data_loader, + shuffle=shuffle, + num_epochs=num_epochs, + serialization_dir=serialization_dir, + cuda_device=cuda_device, + grad_norm=grad_norm, + grad_clipping=grad_clipping, + learning_rate_scheduler=learning_rate_scheduler_, + momentum_scheduler=momentum_scheduler_, + checkpointer=checkpointer_, + model_save_interval=model_save_interval, + summary_interval=summary_interval, + histogram_interval=histogram_interval, + should_log_parameter_statistics=should_log_parameter_statistics, + should_log_learning_rate=should_log_learning_rate, + log_batch_size_period=log_batch_size_period, + moving_average=moving_average_, + distributed=distributed, + local_rank=local_rank, + world_size=world_size, + num_gradient_accumulation_steps=num_gradient_accumulation_steps, + ) From 9d44ad6d26f1fc33790a71c9da081f08d7cd52d0 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Wed, 19 Feb 2020 12:16:03 -0800 Subject: [PATCH 06/52] datasets have index_with now, not iterators --- .../data/dataset_readers/dataset_reader.py | 11 ++- allennlp/tests/training/trainer_v2_test.py | 75 +++++++++++++++++++ allennlp/training/trainer_v2.py | 22 +++--- 3 files changed, 97 insertions(+), 11 deletions(-) create mode 100644 allennlp/tests/training/trainer_v2_test.py diff --git a/allennlp/data/dataset_readers/dataset_reader.py b/allennlp/data/dataset_readers/dataset_reader.py index 4b6414a99aa..3b5a35299d7 100644 --- a/allennlp/data/dataset_readers/dataset_reader.py +++ b/allennlp/data/dataset_readers/dataset_reader.py @@ -8,6 +8,7 @@ from torch.utils.data import Dataset, IterableDataset from allennlp.data.instance import Instance +from allennlp.data.vocabulary import Vocabulary from allennlp.common import Tqdm, util from allennlp.common.checks import ConfigurationError from allennlp.common.registrable import Registrable @@ -16,8 +17,9 @@ class AllennlpDataset(Dataset): - def __init__(self, instances: List[Instance]): + def __init__(self, instances: List[Instance], vocab: Vocabulary = None): self.instances = instances + self.vocab = vocab def __getitem__(self, idx): @@ -26,6 +28,8 @@ def __getitem__(self, idx): def __len__(self): return len(self.instances) + def index_with(self, vocab: Vocabulary): + self.vocab = vocab class _LazyInstances(IterableDataset): """ @@ -39,12 +43,14 @@ def __init__( cache_file: str = None, deserialize: Callable[[str], Instance] = None, serialize: Callable[[Instance], str] = None, + vocab: Vocabulary = None ) -> None: super().__init__() self.instance_generator = instance_generator self.cache_file = cache_file self.deserialize = deserialize self.serialize = serialize + self.vocab = vocab def __iter__(self) -> Iterator[Instance]: # Case 1: Use cached instances @@ -68,6 +74,9 @@ def __iter__(self) -> Iterator[Instance]: ) yield from instances + def index_with(self, vocab: Vocabulary): + self.vocab = vocab + class DatasetReader(Registrable): """ diff --git a/allennlp/tests/training/trainer_v2_test.py b/allennlp/tests/training/trainer_v2_test.py new file mode 100644 index 00000000000..22cd9e2cdcf --- /dev/null +++ b/allennlp/tests/training/trainer_v2_test.py @@ -0,0 +1,75 @@ +import torch +from torch.utils.data import DataLoader +from allennlp.common.params import Params +from allennlp.common.testing import AllenNlpTestCase +from allennlp.data import Vocabulary +from allennlp.data.dataset_readers import SequenceTaggingDatasetReader +from allennlp.models.simple_tagger import SimpleTagger +from allennlp.training.trainer_v2 import TrainerV2 + + +class TestTrainer(AllenNlpTestCase): + def setUp(self): + super().setUp() + self.instances = SequenceTaggingDatasetReader().read( + self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv" + ) + vocab = Vocabulary.from_instances(self.instances) + self.vocab = vocab + self.model_params = Params( + { + "text_field_embedder": { + "token_embedders": {"tokens": {"type": "embedding", "embedding_dim": 5}} + }, + "encoder": {"type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2}, + } + ) + self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params) + self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01, momentum=0.9) + self.data_loader = DataLoader(self.instances, batch_size=2) + self.validation_data_loader = DataLoader(self.instances, batch_size=2) + self.instances.index_with(vocab) + + def test_trainer_can_run(self): + trainer = TrainerV2( + model=self.model, + optimizer=self.optimizer, + data_loader=self.data_loader, + validation_data_loader=self.validation_data_loader, + train_dataset=self.instances, + validation_dataset=self.instances, + num_epochs=2, + ) + metrics = trainer.train() + assert "best_validation_loss" in metrics + assert isinstance(metrics["best_validation_loss"], float) + assert "best_validation_accuracy" in metrics + assert isinstance(metrics["best_validation_accuracy"], float) + assert "best_validation_accuracy3" in metrics + assert isinstance(metrics["best_validation_accuracy3"], float) + assert "best_epoch" in metrics + assert isinstance(metrics["best_epoch"], int) + + # Making sure that both increasing and decreasing validation metrics work. + trainer = TrainerV2( + model=self.model, + optimizer=self.optimizer, + data_loader=self.data_loader, + validation_data_loader=self.validation_data_loader, + train_dataset=self.instances, + validation_dataset=self.instances, + validation_metric="+loss", + num_epochs=2, + ) + metrics = trainer.train() + assert "best_validation_loss" in metrics + assert isinstance(metrics["best_validation_loss"], float) + assert "best_validation_accuracy" in metrics + assert isinstance(metrics["best_validation_accuracy"], float) + assert "best_validation_accuracy3" in metrics + assert isinstance(metrics["best_validation_accuracy3"], float) + assert "best_epoch" in metrics + assert isinstance(metrics["best_epoch"], int) + assert "peak_cpu_memory_MB" in metrics + assert isinstance(metrics["peak_cpu_memory_MB"], float) + assert metrics["peak_cpu_memory_MB"] > 0 diff --git a/allennlp/training/trainer_v2.py b/allennlp/training/trainer_v2.py index 2d2fa145b26..7c397df4ac3 100644 --- a/allennlp/training/trainer_v2.py +++ b/allennlp/training/trainer_v2.py @@ -202,13 +202,16 @@ def __init__( # not already on the GPU then the optimizer is going to be wrong. self.model = model - self.iterator = iterator - self._validation_iterator = validation_iterator + self.data_loader = data_loader + self._validation_data_loader = validation_data_loader self.shuffle = shuffle self.optimizer = optimizer self.train_data = train_dataset self._validation_data = validation_dataset + if validation_dataset is not None and validation_data_loader is None: + raise ConfigurationError("To pass a validation dataset, you must also pass a validation_data_loader.") + if patience is None: # no early stopping if validation_dataset: logger.warning( @@ -336,12 +339,13 @@ def _train_epoch(self, epoch: int) -> Dict[str, float]: self._pytorch_model.train() # Get tqdm for the training batches - batch_generator = self.iterator(self.train_data, num_epochs=1, shuffle=self.shuffle) + batch_generator = self.data_loader batch_group_generator = common_util.lazy_groups_of( batch_generator, self._num_gradient_accumulation_steps ) + num_training_batches = math.ceil( - self.iterator.get_num_batches(self.train_data) / self._num_gradient_accumulation_steps + len(self.data_loader) / self._num_gradient_accumulation_steps ) # Having multiple tqdm bars in case of distributed training will be a mess. Hence only the master's # progress is shown @@ -516,14 +520,12 @@ def _validation_loss(self) -> Tuple[float, int]: if self._moving_average is not None: self._moving_average.assign_average_value() - if self._validation_iterator is not None: - val_iterator = self._validation_iterator + if self._validation_data_loader is not None: + validation_data_loader = self._validation_data_loader else: - val_iterator = self.iterator + raise ConfigurationError("Validation results cannot be calculated without a validation_data_loader") - val_generator = val_iterator(self._validation_data, num_epochs=1, shuffle=False) - num_validation_batches = val_iterator.get_num_batches(self._validation_data) - val_generator_tqdm = Tqdm.tqdm(val_generator, total=num_validation_batches) + val_generator_tqdm = Tqdm.tqdm(validation_data_loader, total=len(validation_data_loader)) batches_this_epoch = 0 val_loss = 0 done_early = False From 7e89ea67970ca074a2aa2e420ece6c280fc59e64 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Wed, 19 Feb 2020 13:33:03 -0800 Subject: [PATCH 07/52] use iter, custom collate function in allennlp wrapper --- allennlp/data/dataset_readers/dataset_reader.py | 4 +++- allennlp/data/samplers/__init__.py | 9 +++++++++ allennlp/tests/training/trainer_v2_test.py | 5 +++-- allennlp/training/trainer_v2.py | 4 ++-- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/allennlp/data/dataset_readers/dataset_reader.py b/allennlp/data/dataset_readers/dataset_reader.py index 3b5a35299d7..9af9bbbe6bc 100644 --- a/allennlp/data/dataset_readers/dataset_reader.py +++ b/allennlp/data/dataset_readers/dataset_reader.py @@ -22,7 +22,8 @@ def __init__(self, instances: List[Instance], vocab: Vocabulary = None): self.vocab = vocab def __getitem__(self, idx): - + if self.vocab is not None: + self.instances[idx].index_fields(self.vocab) return self.instances[idx] def __len__(self): @@ -31,6 +32,7 @@ def __len__(self): def index_with(self, vocab: Vocabulary): self.vocab = vocab + class _LazyInstances(IterableDataset): """ An `Iterable` that just wraps a thunk for generating instances and calls it for diff --git a/allennlp/data/samplers/__init__.py b/allennlp/data/samplers/__init__.py index 846ed9240be..3c0611e337f 100644 --- a/allennlp/data/samplers/__init__.py +++ b/allennlp/data/samplers/__init__.py @@ -184,6 +184,14 @@ def _guess_sorting_keys(self, instances: List[Instance]) -> None: self._sorting_keys = [longest_padding_key] +def allennlp_collocate(batch): + print(batch) + batch = AllennlpBatch(batch) + return batch.as_tensor_dict(batch.get_padding_lengths()) + + + + class DataLoader(Registrable, data.DataLoader): def __init__(self, dataset: data.Dataset, batch_size: int = 1, shuffle: bool = False, sampler: Sampler = None, @@ -191,6 +199,7 @@ def __init__(self, dataset: data.Dataset, batch_size: int = 1, shuffle: bool = F pin_memory: bool = False, drop_last: bool = False, timeout: bool = 0, worker_init_fn=None, multiprocessing_context: str = None): + collate_fn = allennlp_collocate super().__init__(self, dataset=dataset, batch_size=batch_size, shuffle=shuffle, sampler=sampler, batch_sampler=batch_sampler, num_workers=num_workers, collate_fn=collate_fn, pin_memory=pin_memory, drop_last=drop_last, timeout=timeout, diff --git a/allennlp/tests/training/trainer_v2_test.py b/allennlp/tests/training/trainer_v2_test.py index 22cd9e2cdcf..cf8f5751860 100644 --- a/allennlp/tests/training/trainer_v2_test.py +++ b/allennlp/tests/training/trainer_v2_test.py @@ -4,6 +4,7 @@ from allennlp.common.testing import AllenNlpTestCase from allennlp.data import Vocabulary from allennlp.data.dataset_readers import SequenceTaggingDatasetReader +from allennlp.data.samplers import allennlp_collocate from allennlp.models.simple_tagger import SimpleTagger from allennlp.training.trainer_v2 import TrainerV2 @@ -26,8 +27,8 @@ def setUp(self): ) self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params) self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01, momentum=0.9) - self.data_loader = DataLoader(self.instances, batch_size=2) - self.validation_data_loader = DataLoader(self.instances, batch_size=2) + self.data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collocate) + self.validation_data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collocate) self.instances.index_with(vocab) def test_trainer_can_run(self): diff --git a/allennlp/training/trainer_v2.py b/allennlp/training/trainer_v2.py index 7c397df4ac3..d6e5060774b 100644 --- a/allennlp/training/trainer_v2.py +++ b/allennlp/training/trainer_v2.py @@ -339,7 +339,7 @@ def _train_epoch(self, epoch: int) -> Dict[str, float]: self._pytorch_model.train() # Get tqdm for the training batches - batch_generator = self.data_loader + batch_generator = iter(self.data_loader) batch_group_generator = common_util.lazy_groups_of( batch_generator, self._num_gradient_accumulation_steps ) @@ -525,7 +525,7 @@ def _validation_loss(self) -> Tuple[float, int]: else: raise ConfigurationError("Validation results cannot be calculated without a validation_data_loader") - val_generator_tqdm = Tqdm.tqdm(validation_data_loader, total=len(validation_data_loader)) + val_generator_tqdm = Tqdm.tqdm(iter(validation_data_loader), total=len(validation_data_loader)) batches_this_epoch = 0 val_loss = 0 done_early = False From 883b6d75f06da4c689e68318b6befed8348d2930 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Wed, 19 Feb 2020 15:18:54 -0800 Subject: [PATCH 08/52] we don't even need the data in the trainer anymore --- allennlp/tests/training/trainer_v2_test.py | 4 --- allennlp/training/trainer_v2.py | 32 ++-------------------- 2 files changed, 3 insertions(+), 33 deletions(-) diff --git a/allennlp/tests/training/trainer_v2_test.py b/allennlp/tests/training/trainer_v2_test.py index cf8f5751860..37d81534c0a 100644 --- a/allennlp/tests/training/trainer_v2_test.py +++ b/allennlp/tests/training/trainer_v2_test.py @@ -37,8 +37,6 @@ def test_trainer_can_run(self): optimizer=self.optimizer, data_loader=self.data_loader, validation_data_loader=self.validation_data_loader, - train_dataset=self.instances, - validation_dataset=self.instances, num_epochs=2, ) metrics = trainer.train() @@ -57,8 +55,6 @@ def test_trainer_can_run(self): optimizer=self.optimizer, data_loader=self.data_loader, validation_data_loader=self.validation_data_loader, - train_dataset=self.instances, - validation_dataset=self.instances, validation_metric="+loss", num_epochs=2, ) diff --git a/allennlp/training/trainer_v2.py b/allennlp/training/trainer_v2.py index d6e5060774b..d3dff464ee9 100644 --- a/allennlp/training/trainer_v2.py +++ b/allennlp/training/trainer_v2.py @@ -43,8 +43,6 @@ def __init__( model: Model, optimizer: torch.optim.Optimizer, data_loader: torch.utils.data.DataLoader, - train_dataset: Iterable[Instance], - validation_dataset: Optional[Iterable[Instance]] = None, patience: Optional[int] = None, validation_metric: str = "-loss", validation_data_loader: torch.utils.data.DataLoader = None, @@ -92,10 +90,6 @@ def __init__( model to be optimized. iterator : `DataIterator`, required. A method for iterating over a `Dataset`, yielding padded indexed batches. - train_dataset : `Dataset`, required. - A `Dataset` to train on. The dataset should have already been indexed. - validation_dataset : `Dataset`, optional, (default = None). - A `Dataset` to evaluate on. The dataset should have already been indexed. patience : Optional[int] > 0, optional (default=None) Number of epochs to be patient before early stopping: the training is stopped after `patience` epochs with no improvement. If given, it must be `> 0`. @@ -206,14 +200,9 @@ def __init__( self._validation_data_loader = validation_data_loader self.shuffle = shuffle self.optimizer = optimizer - self.train_data = train_dataset - self._validation_data = validation_dataset - - if validation_dataset is not None and validation_data_loader is None: - raise ConfigurationError("To pass a validation dataset, you must also pass a validation_data_loader.") if patience is None: # no early stopping - if validation_dataset: + if validation_data_loader: logger.warning( "You provided a validation dataset but patience was set to None, " "meaning that early stopping is disabled" @@ -626,7 +615,7 @@ def train(self) -> Dict[str, Any]: if key.startswith("gpu_"): metrics["peak_" + key] = max(metrics.get("peak_" + key, 0), value) - if self._validation_data is not None: + if self._validation_data_loader is not None: with torch.no_grad(): # We have a validation set, so compute all the metrics on it. val_loss, num_batches = self._validation_loss() @@ -826,9 +815,7 @@ def from_partial_objects( model: Model, serialization_dir: str, data_loader: Lazy[DataLoader], - train_data: Iterable[Instance], validation_data_loader: Lazy[DataLoader] = None, - validation_data: Iterable[Instance] = None, local_rank: int = 0, patience: int = None, validation_metric: str = "-loss", @@ -888,7 +875,7 @@ def from_partial_objects( optimizer_ = Optimizer.default(parameters) try: - batches_per_epoch = len(train_data) + batches_per_epoch = len(data_loader) except TypeError: # If the dataset is lazy, it won't have a length. batches_per_epoch = None @@ -901,23 +888,10 @@ def from_partial_objects( checkpointer_ = checkpointer.construct() or Checkpointer(serialization_dir) - if validation_data_loader is None and validation_data is not None: - validation_data_loader = data_loader.construct(dataset=validation_data) - - elif validation_data_loader and validation_data is not None: - validation_data_loader = validation_data_loader.construct(dataset=validation_data) - - else: - validation_data_loader = None - - data_loader = data_loader.construct(dataset=train_data) - return cls( model, optimizer_, data_loader, - train_data, - validation_data, patience=patience, validation_metric=validation_metric, validation_data_loader=validation_data_loader, From 56d022a8f7ccfbf40558616f55792d59581cdcc3 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Wed, 19 Feb 2020 16:00:36 -0800 Subject: [PATCH 09/52] all trainer tests passing --- allennlp/data/samplers/__init__.py | 1 - allennlp/tests/training/trainer_test.py | 211 ++--- allennlp/tests/training/trainer_v2_test.py | 72 -- allennlp/training/trainer.py | 63 +- allennlp/training/trainer_v2.py | 918 --------------------- 5 files changed, 116 insertions(+), 1149 deletions(-) delete mode 100644 allennlp/tests/training/trainer_v2_test.py delete mode 100644 allennlp/training/trainer_v2.py diff --git a/allennlp/data/samplers/__init__.py b/allennlp/data/samplers/__init__.py index 3c0611e337f..4c527c5496c 100644 --- a/allennlp/data/samplers/__init__.py +++ b/allennlp/data/samplers/__init__.py @@ -185,7 +185,6 @@ def _guess_sorting_keys(self, instances: List[Instance]) -> None: def allennlp_collocate(batch): - print(batch) batch = AllennlpBatch(batch) return batch.as_tensor_dict(batch.get_padding_lengths()) diff --git a/allennlp/tests/training/trainer_test.py b/allennlp/tests/training/trainer_test.py index 04645b07f61..788bbab359c 100644 --- a/allennlp/tests/training/trainer_test.py +++ b/allennlp/tests/training/trainer_test.py @@ -8,13 +8,13 @@ import math import pytest import torch +from torch.utils.data import DataLoader from allennlp.common.checks import ConfigurationError from allennlp.common.params import Params from allennlp.common.testing import AllenNlpTestCase from allennlp.data import Vocabulary from allennlp.data.dataset_readers import SequenceTaggingDatasetReader -from allennlp.data.iterators import BasicIterator from allennlp.models.model import Model from allennlp.models.simple_tagger import SimpleTagger from allennlp.training import Trainer @@ -22,6 +22,7 @@ from allennlp.training.momentum_schedulers import MomentumScheduler from allennlp.training.moving_average import ExponentialMovingAverage from allennlp.training.util import sparse_clip_norm +from allennlp.data.samplers import allennlp_collocate class TestTrainer(AllenNlpTestCase): @@ -42,16 +43,16 @@ def setUp(self): ) self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params) self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01, momentum=0.9) - self.iterator = BasicIterator(batch_size=2) - self.iterator.index_with(vocab) + self.data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collocate) + self.validation_data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collocate) + self.instances.index_with(vocab) def test_trainer_can_run(self): trainer = Trainer( model=self.model, optimizer=self.optimizer, - iterator=self.iterator, - train_dataset=self.instances, - validation_dataset=self.instances, + data_loader=self.data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=2, ) metrics = trainer.train() @@ -68,9 +69,8 @@ def test_trainer_can_run(self): trainer = Trainer( model=self.model, optimizer=self.optimizer, - iterator=self.iterator, - train_dataset=self.instances, - validation_dataset=self.instances, + data_loader=self.data_loader, + validation_data_loader=self.validation_data_loader, validation_metric="+loss", num_epochs=2, ) @@ -92,9 +92,8 @@ def test_trainer_can_run_exponential_moving_average(self): trainer = Trainer( model=self.model, optimizer=self.optimizer, - iterator=self.iterator, - train_dataset=self.instances, - validation_dataset=self.instances, + data_loader=self.data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=2, moving_average=moving_average, ) @@ -104,7 +103,7 @@ def test_trainer_can_run_exponential_moving_average(self): def test_trainer_can_run_cuda(self): self.model.cuda() trainer = Trainer( - self.model, self.optimizer, self.iterator, self.instances, num_epochs=2, cuda_device=0 + self.model, self.optimizer, self.data_loader, num_epochs=2, cuda_device=0 ) metrics = trainer.train() assert "peak_cpu_memory_MB" in metrics @@ -117,14 +116,11 @@ def test_trainer_can_run_cuda(self): def test_passing_trainer_multiple_gpus_raises_error(self): self.model.cuda() - multigpu_iterator = BasicIterator(batch_size=4) - multigpu_iterator.index_with(self.vocab) with pytest.raises(ConfigurationError): Trainer( self.model, self.optimizer, - multigpu_iterator, - self.instances, + self.data_loader, num_epochs=2, cuda_device=[0, 1], ) @@ -133,9 +129,8 @@ def test_trainer_can_resume_training(self): trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=1, serialization_dir=self.TEST_DIR, ) @@ -143,9 +138,8 @@ def test_trainer_can_resume_training(self): new_trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=3, serialization_dir=self.TEST_DIR, ) @@ -165,9 +159,8 @@ def test_trainer_can_resume_training_for_exponential_moving_average(self): trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_dataset_loader=self.validation_data_loader, num_epochs=1, serialization_dir=self.TEST_DIR, moving_average=moving_average, @@ -178,9 +171,8 @@ def test_trainer_can_resume_training_for_exponential_moving_average(self): new_trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_dataset_loader=self.validation_data_loader, num_epochs=3, serialization_dir=self.TEST_DIR, moving_average=new_moving_average, @@ -201,9 +193,8 @@ def test_metric_only_considered_best_so_far_when_strictly_better_than_those_befo new_trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_dataset_loader=self.validation_data_loader, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, @@ -237,9 +228,8 @@ def test_metric_only_considered_best_so_far_when_strictly_better_than_those_befo new_trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_dataset_loader=self.validation_data_loader, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, @@ -270,9 +260,8 @@ def test_should_stop_early_with_increasing_metric(self): new_trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_dataset_loader=self.validation_data_loader, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, @@ -294,9 +283,8 @@ def test_should_stop_early_with_flat_lining_metric(self): tracker = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_dataset_loader=self.validation_data_loader, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, @@ -308,9 +296,8 @@ def test_should_stop_early_with_flat_lining_metric(self): tracker = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_dataset_loader=self.validation_data_loader, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, @@ -323,9 +310,8 @@ def test_should_stop_early_with_decreasing_metric(self): new_trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_dataset_loader=self.validation_data_loader, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, @@ -350,9 +336,8 @@ def test_should_stop_early_with_early_stopping_disabled(self): trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_dataset_loader=self.validation_data_loader, num_epochs=100, patience=None, validation_metric="+test", @@ -365,9 +350,8 @@ def test_should_stop_early_with_early_stopping_disabled(self): trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_dataset_loader=self.validation_data_loader, num_epochs=100, patience=None, validation_metric="-test", @@ -387,9 +371,8 @@ def test_should_stop_early_with_invalid_patience(self): Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_dataset_loader=self.validation_data_loader, num_epochs=100, patience=patience, validation_metric="+test", @@ -403,11 +386,10 @@ def test_trainer_can_run_and_resume_with_momentum_scheduler(self): trainer = Trainer( model=self.model, optimizer=self.optimizer, - iterator=self.iterator, + data_loader=self.data_loader, momentum_scheduler=scheduler, validation_metric="-loss", - train_dataset=self.instances, - validation_dataset=self.instances, + validation_dataset_loader=self.validation_data_loader, num_epochs=4, serialization_dir=self.TEST_DIR, ) @@ -420,11 +402,10 @@ def test_trainer_can_run_and_resume_with_momentum_scheduler(self): new_trainer = Trainer( model=self.model, optimizer=self.optimizer, - iterator=self.iterator, + data_loader=self.data_loader, momentum_scheduler=new_scheduler, validation_metric="-loss", - train_dataset=self.instances, - validation_dataset=self.instances, + validation_dataset_loader=self.validation_data_loader, num_epochs=6, serialization_dir=self.TEST_DIR, ) @@ -438,11 +419,10 @@ def test_trainer_can_run_with_lr_scheduler(self): trainer = Trainer( model=self.model, optimizer=self.optimizer, - iterator=self.iterator, + data_loader=self.data_loader, learning_rate_scheduler=lr_scheduler, validation_metric="-loss", - train_dataset=self.instances, - validation_dataset=self.instances, + validation_dataset_loader=self.validation_data_loader, num_epochs=2, ) trainer.train() @@ -452,10 +432,9 @@ def test_trainer_can_resume_with_lr_scheduler(self): trainer = Trainer( model=self.model, optimizer=self.optimizer, - iterator=self.iterator, + data_loader=self.data_loader, learning_rate_scheduler=lr_scheduler, - train_dataset=self.instances, - validation_dataset=self.instances, + validation_dataset_loader=self.validation_data_loader, num_epochs=2, serialization_dir=self.TEST_DIR, ) @@ -465,10 +444,9 @@ def test_trainer_can_resume_with_lr_scheduler(self): new_trainer = Trainer( model=self.model, optimizer=self.optimizer, - iterator=self.iterator, + data_loader=self.data_loader, learning_rate_scheduler=new_lr_scheduler, - train_dataset=self.instances, - validation_dataset=self.instances, + validation_dataset_loader=self.validation_data_loader, num_epochs=4, serialization_dir=self.TEST_DIR, ) @@ -486,8 +464,7 @@ def forward(self, **kwargs): trainer = Trainer( FakeModel(None), self.optimizer, - self.iterator, - self.instances, + self.data_loader, num_epochs=2, serialization_dir=self.TEST_DIR, ) @@ -501,8 +478,7 @@ def test_trainer_can_log_histograms(self): trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, + self.data_loader, num_epochs=3, serialization_dir=self.TEST_DIR, histogram_interval=2, @@ -513,8 +489,7 @@ def test_trainer_respects_num_serialized_models_to_keep(self): trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, + self.data_loader, num_epochs=5, serialization_dir=self.TEST_DIR, num_serialized_models_to_keep=3, @@ -531,9 +506,8 @@ def test_trainer_saves_metrics_every_epoch(self): trainer = Trainer( model=self.model, optimizer=self.optimizer, - iterator=self.iterator, - train_dataset=self.instances, - validation_dataset=self.instances, + data_loader=self.data_loader, + validation_dataset_loader=self.validation_data_loader, num_epochs=5, serialization_dir=self.TEST_DIR, num_serialized_models_to_keep=3, @@ -550,24 +524,26 @@ def test_trainer_saves_metrics_every_epoch(self): def test_trainer_respects_keep_serialized_model_every_num_seconds(self): # To test: - # Create an iterator that sleeps for 2.5 second per epoch, so the total training - # time for one epoch is slightly greater then 2.5 seconds. + # Create an fake data loader that sleeps for 2.5 second per epoch, so the total + # training time for one epoch is slightly greater then 2.5 seconds. # Run for 6 epochs, keeping the last 2 models, models also kept every 5 seconds. # Check the resulting checkpoints. Should then have models at epochs # 2, 4, plus the last two at 5 and 6. - class WaitingIterator(BasicIterator): - def _create_batches(self, *args, **kwargs): + + class SlowDataLoader: + data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collocate) + + def __iter__(self): time.sleep(2.5) - return super()._create_batches(*args, **kwargs) + return iter(self.data_loader) - iterator = WaitingIterator(batch_size=2) - iterator.index_with(self.vocab) + def __len__(self): + return len(self.data_loader) trainer = Trainer( self.model, self.optimizer, - iterator, - self.instances, + SlowDataLoader(), num_epochs=6, serialization_dir=self.TEST_DIR, num_serialized_models_to_keep=2, @@ -583,14 +559,11 @@ def _create_batches(self, *args, **kwargs): assert sorted(epochs) == [1, 3, 4, 5] def test_trainer_can_log_learning_rates_tensorboard(self): - iterator = BasicIterator(batch_size=4) - iterator.index_with(self.vocab) - + data_loader = DataLoader(self.instances, batch_size=4, collate_fn=allennlp_collocate) trainer = Trainer( self.model, self.optimizer, - iterator, - self.instances, + data_loader, num_epochs=2, serialization_dir=self.TEST_DIR, should_log_learning_rate=True, @@ -600,14 +573,12 @@ def test_trainer_can_log_learning_rates_tensorboard(self): trainer.train() def test_trainer_saves_models_at_specified_interval(self): - iterator = BasicIterator(batch_size=4) - iterator.index_with(self.vocab) + data_loader = DataLoader(self.instances, batch_size=4, collate_fn=allennlp_collocate) trainer = Trainer( self.model, self.optimizer, - iterator, - self.instances, + data_loader, num_epochs=2, serialization_dir=self.TEST_DIR, model_save_interval=0.0001, @@ -636,8 +607,7 @@ def test_trainer_saves_models_at_specified_interval(self): restore_trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, + self.data_loader, num_epochs=2, serialization_dir=self.TEST_DIR, model_save_interval=0.0001, @@ -653,9 +623,8 @@ def test_trainer_saves_and_loads_best_validation_metrics_correctly_1(self): trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_dataset_loader=self.validation_data_loader, validation_metric="-loss", num_epochs=1, serialization_dir=self.TEST_DIR, @@ -672,9 +641,8 @@ def test_trainer_saves_and_loads_best_validation_metrics_correctly_1(self): restore_trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_dataset_loader=self.validation_data_loader, validation_metric="-loss", num_epochs=2, serialization_dir=self.TEST_DIR, @@ -694,9 +662,8 @@ def test_trainer_saves_and_loads_best_validation_metrics_correctly_2(self): trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_dataset_loader=self.validation_data_loader, validation_metric="+loss", num_epochs=1, serialization_dir=self.TEST_DIR, @@ -714,9 +681,8 @@ def test_trainer_saves_and_loads_best_validation_metrics_correctly_2(self): restore_trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_dataset_loader=self.validation_data_loader, validation_metric="+loss", num_epochs=2, serialization_dir=self.TEST_DIR, @@ -738,9 +704,9 @@ def test_restored_training_returns_best_epoch_metrics_even_if_no_better_epoch_is original_trainer = Trainer( self.model, self.optimizer, - self.iterator, + self.data_loader, self.instances, - validation_dataset=self.instances, + validation_dataset_loader=self.validation_data_loader, validation_metric="+loss", num_epochs=1, serialization_dir=self.TEST_DIR, @@ -751,9 +717,8 @@ def test_restored_training_returns_best_epoch_metrics_even_if_no_better_epoch_is restored_trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_dataset_loader=self.validation_data_loader, validation_metric="+loss", num_epochs=2, serialization_dir=self.TEST_DIR, @@ -774,9 +739,8 @@ def test_restoring_works_with_older_checkpointing(self): trainer = Trainer( self.model, self.optimizer, - self.iterator, - self.instances, - validation_dataset=self.instances, + self.data_loader, + validation_dataset_loader=self.validation_data_loader, num_epochs=3, serialization_dir=self.TEST_DIR, ) @@ -806,9 +770,8 @@ def test_trainer_can_run_gradient_accumulation(self): trainer = Trainer( self.model, self.optimizer, - self.iterator, - instances, - validation_dataset=instances, + self.data_loader, + validation_dataset_loader=self.validation_data_loader, num_epochs=2, num_gradient_accumulation_steps=steps_to_accumulate, ) @@ -818,7 +781,7 @@ def test_trainer_can_run_gradient_accumulation(self): num_batches_trained_per_epoch = trainer._batch_num_total // (metrics["training_epochs"] + 1) num_batches_expected = math.ceil( - math.ceil(len(instances) / self.iterator._batch_size) / steps_to_accumulate + math.ceil(len(instances) / self.data_loader.batch_size) / steps_to_accumulate ) assert num_batches_trained_per_epoch == num_batches_expected diff --git a/allennlp/tests/training/trainer_v2_test.py b/allennlp/tests/training/trainer_v2_test.py deleted file mode 100644 index 37d81534c0a..00000000000 --- a/allennlp/tests/training/trainer_v2_test.py +++ /dev/null @@ -1,72 +0,0 @@ -import torch -from torch.utils.data import DataLoader -from allennlp.common.params import Params -from allennlp.common.testing import AllenNlpTestCase -from allennlp.data import Vocabulary -from allennlp.data.dataset_readers import SequenceTaggingDatasetReader -from allennlp.data.samplers import allennlp_collocate -from allennlp.models.simple_tagger import SimpleTagger -from allennlp.training.trainer_v2 import TrainerV2 - - -class TestTrainer(AllenNlpTestCase): - def setUp(self): - super().setUp() - self.instances = SequenceTaggingDatasetReader().read( - self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv" - ) - vocab = Vocabulary.from_instances(self.instances) - self.vocab = vocab - self.model_params = Params( - { - "text_field_embedder": { - "token_embedders": {"tokens": {"type": "embedding", "embedding_dim": 5}} - }, - "encoder": {"type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2}, - } - ) - self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params) - self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01, momentum=0.9) - self.data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collocate) - self.validation_data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collocate) - self.instances.index_with(vocab) - - def test_trainer_can_run(self): - trainer = TrainerV2( - model=self.model, - optimizer=self.optimizer, - data_loader=self.data_loader, - validation_data_loader=self.validation_data_loader, - num_epochs=2, - ) - metrics = trainer.train() - assert "best_validation_loss" in metrics - assert isinstance(metrics["best_validation_loss"], float) - assert "best_validation_accuracy" in metrics - assert isinstance(metrics["best_validation_accuracy"], float) - assert "best_validation_accuracy3" in metrics - assert isinstance(metrics["best_validation_accuracy3"], float) - assert "best_epoch" in metrics - assert isinstance(metrics["best_epoch"], int) - - # Making sure that both increasing and decreasing validation metrics work. - trainer = TrainerV2( - model=self.model, - optimizer=self.optimizer, - data_loader=self.data_loader, - validation_data_loader=self.validation_data_loader, - validation_metric="+loss", - num_epochs=2, - ) - metrics = trainer.train() - assert "best_validation_loss" in metrics - assert isinstance(metrics["best_validation_loss"], float) - assert "best_validation_accuracy" in metrics - assert isinstance(metrics["best_validation_accuracy"], float) - assert "best_validation_accuracy3" in metrics - assert isinstance(metrics["best_validation_accuracy3"], float) - assert "best_epoch" in metrics - assert isinstance(metrics["best_epoch"], int) - assert "peak_cpu_memory_MB" in metrics - assert isinstance(metrics["peak_cpu_memory_MB"], float) - assert metrics["peak_cpu_memory_MB"] > 0 diff --git a/allennlp/training/trainer.py b/allennlp/training/trainer.py index ceec81bad8c..aebd44d88f4 100644 --- a/allennlp/training/trainer.py +++ b/allennlp/training/trainer.py @@ -12,11 +12,15 @@ import torch.optim.lr_scheduler from torch.nn.parallel import DistributedDataParallel + from allennlp.common import Lazy, Tqdm from allennlp.common.checks import ConfigurationError, check_for_gpu from allennlp.common import util as common_util from allennlp.data.instance import Instance -from allennlp.data.iterators.data_iterator import DataIterator, TensorDict + +from allennlp.data.samplers import DataLoader + +from allennlp.data.iterators.data_iterator import TensorDict from allennlp.models.model import Model from allennlp.nn import util as nn_util from allennlp.training import util as training_util @@ -32,18 +36,16 @@ logger = logging.getLogger(__name__) -@TrainerBase.register("default", constructor="from_partial_objects") +@TrainerBase.register("trainer", constructor="from_partial_objects") class Trainer(TrainerBase): def __init__( self, model: Model, optimizer: torch.optim.Optimizer, - iterator: DataIterator, - train_dataset: Iterable[Instance], - validation_dataset: Optional[Iterable[Instance]] = None, + data_loader: torch.utils.data.DataLoader, patience: Optional[int] = None, validation_metric: str = "-loss", - validation_iterator: DataIterator = None, + validation_data_loader: torch.utils.data.DataLoader = None, shuffle: bool = True, num_epochs: int = 20, serialization_dir: Optional[str] = None, @@ -88,10 +90,6 @@ def __init__( model to be optimized. iterator : `DataIterator`, required. A method for iterating over a `Dataset`, yielding padded indexed batches. - train_dataset : `Dataset`, required. - A `Dataset` to train on. The dataset should have already been indexed. - validation_dataset : `Dataset`, optional, (default = None). - A `Dataset` to evaluate on. The dataset should have already been indexed. patience : Optional[int] > 0, optional (default=None) Number of epochs to be patient before early stopping: the training is stopped after `patience` epochs with no improvement. If given, it must be `> 0`. @@ -198,15 +196,13 @@ def __init__( # not already on the GPU then the optimizer is going to be wrong. self.model = model - self.iterator = iterator - self._validation_iterator = validation_iterator + self.data_loader = data_loader + self._validation_data_loader = validation_data_loader self.shuffle = shuffle self.optimizer = optimizer - self.train_data = train_dataset - self._validation_data = validation_dataset if patience is None: # no early stopping - if validation_dataset: + if validation_data_loader: logger.warning( "You provided a validation dataset but patience was set to None, " "meaning that early stopping is disabled" @@ -332,12 +328,13 @@ def _train_epoch(self, epoch: int) -> Dict[str, float]: self._pytorch_model.train() # Get tqdm for the training batches - batch_generator = self.iterator(self.train_data, num_epochs=1, shuffle=self.shuffle) + batch_generator = iter(self.data_loader) batch_group_generator = common_util.lazy_groups_of( batch_generator, self._num_gradient_accumulation_steps ) + num_training_batches = math.ceil( - self.iterator.get_num_batches(self.train_data) / self._num_gradient_accumulation_steps + len(self.data_loader) / self._num_gradient_accumulation_steps ) # Having multiple tqdm bars in case of distributed training will be a mess. Hence only the master's # progress is shown @@ -512,14 +509,12 @@ def _validation_loss(self) -> Tuple[float, int]: if self._moving_average is not None: self._moving_average.assign_average_value() - if self._validation_iterator is not None: - val_iterator = self._validation_iterator + if self._validation_data_loader is not None: + validation_data_loader = self._validation_data_loader else: - val_iterator = self.iterator + raise ConfigurationError("Validation results cannot be calculated without a validation_data_loader") - val_generator = val_iterator(self._validation_data, num_epochs=1, shuffle=False) - num_validation_batches = val_iterator.get_num_batches(self._validation_data) - val_generator_tqdm = Tqdm.tqdm(val_generator, total=num_validation_batches) + val_generator_tqdm = Tqdm.tqdm(iter(validation_data_loader), total=len(validation_data_loader)) batches_this_epoch = 0 val_loss = 0 done_early = False @@ -620,7 +615,7 @@ def train(self) -> Dict[str, Any]: if key.startswith("gpu_"): metrics["peak_" + key] = max(metrics.get("peak_" + key, 0), value) - if self._validation_data is not None: + if self._validation_data_loader is not None: with torch.no_grad(): # We have a validation set, so compute all the metrics on it. val_loss, num_batches = self._validation_loss() @@ -819,10 +814,8 @@ def from_partial_objects( cls, model: Model, serialization_dir: str, - iterator: DataIterator, - train_data: Iterable[Instance], - validation_iterator: DataIterator = None, - validation_data: Iterable[Instance] = None, + data_loader: Lazy[DataLoader], + validation_data_loader: Lazy[DataLoader] = None, local_rank: int = 0, patience: int = None, validation_metric: str = "-loss", @@ -881,9 +874,12 @@ def from_partial_objects( if not optimizer_: optimizer_ = Optimizer.default(parameters) - batches_per_epoch = iterator.get_num_batches(train_data) - if batches_per_epoch == 1: # get_num_batches returns 1 when it can't determine the answer + try: + batches_per_epoch = len(data_loader) + except TypeError: + # If the dataset is lazy, it won't have a length. batches_per_epoch = None + moving_average_ = moving_average.construct(parameters=parameters) learning_rate_scheduler_ = learning_rate_scheduler.construct( optimizer=optimizer_, num_epochs=num_epochs, num_steps_per_epoch=batches_per_epoch @@ -891,15 +887,14 @@ def from_partial_objects( momentum_scheduler_ = momentum_scheduler.construct(optimizer=optimizer_) checkpointer_ = checkpointer.construct() or Checkpointer(serialization_dir) + return cls( model, optimizer_, - iterator, - train_data, - validation_data, + data_loader, patience=patience, validation_metric=validation_metric, - validation_iterator=validation_iterator, + validation_data_loader=validation_data_loader, shuffle=shuffle, num_epochs=num_epochs, serialization_dir=serialization_dir, diff --git a/allennlp/training/trainer_v2.py b/allennlp/training/trainer_v2.py deleted file mode 100644 index d3dff464ee9..00000000000 --- a/allennlp/training/trainer_v2.py +++ /dev/null @@ -1,918 +0,0 @@ -import datetime -import logging -import math -import os -import re -import time -import traceback -from typing import Dict, List, Optional, Tuple, Union, Iterable, Any - -import torch -import torch.distributed as dist -import torch.optim.lr_scheduler -from torch.nn.parallel import DistributedDataParallel - - -from allennlp.common import Lazy, Tqdm -from allennlp.common.checks import ConfigurationError, check_for_gpu -from allennlp.common import util as common_util -from allennlp.data.instance import Instance - -from allennlp.data.samplers import DataLoader - -from allennlp.data.iterators.data_iterator import TensorDict -from allennlp.models.model import Model -from allennlp.nn import util as nn_util -from allennlp.training import util as training_util -from allennlp.training.checkpointer import Checkpointer -from allennlp.training.learning_rate_schedulers import LearningRateScheduler -from allennlp.training.metric_tracker import MetricTracker -from allennlp.training.momentum_schedulers import MomentumScheduler -from allennlp.training.moving_average import MovingAverage -from allennlp.training.optimizers import Optimizer -from allennlp.training.tensorboard_writer import TensorboardWriter -from allennlp.training.trainer_base import TrainerBase - -logger = logging.getLogger(__name__) - - -@TrainerBase.register("trainer_v2", constructor="from_partial_objects") -class TrainerV2(TrainerBase): - def __init__( - self, - model: Model, - optimizer: torch.optim.Optimizer, - data_loader: torch.utils.data.DataLoader, - patience: Optional[int] = None, - validation_metric: str = "-loss", - validation_data_loader: torch.utils.data.DataLoader = None, - shuffle: bool = True, - num_epochs: int = 20, - serialization_dir: Optional[str] = None, - num_serialized_models_to_keep: int = 20, - keep_serialized_model_every_num_seconds: int = None, - checkpointer: Checkpointer = None, - model_save_interval: float = None, - cuda_device: int = -1, - grad_norm: Optional[float] = None, - grad_clipping: Optional[float] = None, - learning_rate_scheduler: Optional[LearningRateScheduler] = None, - momentum_scheduler: Optional[MomentumScheduler] = None, - summary_interval: int = 100, - histogram_interval: int = None, - should_log_parameter_statistics: bool = True, - should_log_learning_rate: bool = False, - log_batch_size_period: Optional[int] = None, - moving_average: Optional[MovingAverage] = None, - distributed: bool = False, - local_rank: int = 0, - world_size: int = 1, - num_gradient_accumulation_steps: int = 1, - ) -> None: - """ - A trainer for doing supervised learning. It just takes a labeled dataset - and a `DataIterator`, and uses the supplied `Optimizer` to learn the weights - for your model over some fixed number of epochs. You can also pass in a validation - dataset and enable early stopping. There are many other bells and whistles as well. - - # Parameters - - model : `Model`, required. - An AllenNLP model to be optimized. Pytorch Modules can also be optimized if - their `forward` method returns a dictionary with a "loss" key, containing a - scalar tensor representing the loss function to be optimized. - - If you are training your model using GPUs, your model should already be - on the correct device. (If you use `Trainer.from_params` this will be - handled for you.) - optimizer : `torch.nn.Optimizer`, required. - An instance of a Pytorch Optimizer, instantiated with the parameters of the - model to be optimized. - iterator : `DataIterator`, required. - A method for iterating over a `Dataset`, yielding padded indexed batches. - patience : Optional[int] > 0, optional (default=None) - Number of epochs to be patient before early stopping: the training is stopped - after `patience` epochs with no improvement. If given, it must be `> 0`. - If None, early stopping is disabled. - validation_metric : str, optional (default="loss") - Validation metric to measure for whether to stop training using patience - and whether to serialize an `is_best` model each epoch. The metric name - must be prepended with either "+" or "-", which specifies whether the metric - is an increasing or decreasing function. - validation_iterator : `DataIterator`, optional (default=None) - An iterator to use for the validation set. If `None`, then - use the training `iterator`. - shuffle : `bool`, optional (default=True) - Whether to shuffle the instances in the iterator or not. - num_epochs : int, optional (default = 20) - Number of training epochs. - serialization_dir : str, optional (default=None) - Path to directory for saving and loading model files. Models will not be saved if - this parameter is not passed. - num_serialized_models_to_keep : `int`, optional (default=20) - Number of previous model checkpoints to retain. Default is to keep 20 checkpoints. - A value of None or -1 means all checkpoints will be kept. - keep_serialized_model_every_num_seconds : `int`, optional (default=None) - If num_serialized_models_to_keep is not None, then occasionally it's useful to - save models at a given interval in addition to the last num_serialized_models_to_keep. - To do so, specify keep_serialized_model_every_num_seconds as the number of seconds - between permanently saved checkpoints. Note that this option is only used if - num_serialized_models_to_keep is not None, otherwise all checkpoints are kept. - checkpointer : `Checkpointer`, optional (default=None) - An instance of class Checkpointer to use instead of the default. If a checkpointer is specified, - the arguments num_serialized_models_to_keep and keep_serialized_model_every_num_seconds should - not be specified. The caller is responsible for initializing the checkpointer so that it is - consistent with serialization_dir. - model_save_interval : `float`, optional (default=None) - If provided, then serialize models every `model_save_interval` - seconds within single epochs. In all cases, models are also saved - at the end of every epoch if `serialization_dir` is provided. - cuda_device : `int`, optional (default = -1) - An integer specifying the CUDA device(s) to use for this process. If -1, the CPU is used. - Data parallelism is controlled at the allennlp train level, so each trainer will have a single - GPU. - grad_norm : `float`, optional, (default = None). - If provided, gradient norms will be rescaled to have a maximum of this value. - grad_clipping : `float`, optional (default = `None`). - If provided, gradients will be clipped `during the backward pass` to have an (absolute) - maximum of this value. If you are getting `NaNs` in your gradients during training - that are not solved by using `grad_norm`, you may need this. - learning_rate_scheduler : `LearningRateScheduler`, optional (default = None) - If specified, the learning rate will be decayed with respect to - this schedule at the end of each epoch (or batch, if the scheduler implements - the `step_batch` method). If you use `torch.optim.lr_scheduler.ReduceLROnPlateau`, - this will use the `validation_metric` provided to determine if learning has plateaued. - To support updating the learning rate on every batch, this can optionally implement - `step_batch(batch_num_total)` which updates the learning rate given the batch number. - momentum_scheduler : `MomentumScheduler`, optional (default = None) - If specified, the momentum will be updated at the end of each batch or epoch - according to the schedule. - summary_interval : `int`, optional, (default = 100) - Number of batches between logging scalars to tensorboard - histogram_interval : `int`, optional, (default = `None`) - If not None, then log histograms to tensorboard every `histogram_interval` batches. - When this parameter is specified, the following additional logging is enabled: - * Histograms of model parameters - * The ratio of parameter update norm to parameter norm - * Histogram of layer activations - We log histograms of the parameters returned by - `model.get_parameters_for_histogram_tensorboard_logging`. - The layer activations are logged for any modules in the `Model` that have - the attribute `should_log_activations` set to `True`. Logging - histograms requires a number of GPU-CPU copies during training and is typically - slow, so we recommend logging histograms relatively infrequently. - Note: only Modules that return tensors, tuples of tensors or dicts - with tensors as values currently support activation logging. - should_log_parameter_statistics : `bool`, optional, (default = True) - Whether to send parameter statistics (mean and standard deviation - of parameters and gradients) to tensorboard. - should_log_learning_rate : `bool`, optional, (default = False) - Whether to send parameter specific learning rate to tensorboard. - log_batch_size_period : `int`, optional, (default = `None`) - If defined, how often to log the average batch size. - moving_average : `MovingAverage`, optional, (default = None) - If provided, we will maintain moving averages for all parameters. During training, we - employ a shadow variable for each parameter, which maintains the moving average. During - evaluation, we backup the original parameters and assign the moving averages to corresponding - parameters. Be careful that when saving the checkpoint, we will save the moving averages of - parameters. This is necessary because we want the saved model to perform as well as the validated - model if we load it later. But this may cause problems if you restart the training from checkpoint. - distributed : `bool`, optional, (default = False) - If set, PyTorch's `DistributedDataParallel` is used to train the model in multiple GPUs. This also - requires `world_size` to be greater than 1. - local_rank : `int`, optional, (default = 0) - This is the unique identifier of the `Trainer` in a distributed process group. The GPU device id is - used as the rank. - world_size : `int`, (default = 1) - The number of `Trainer` workers participating in the distributed training. - num_gradient_accumulation_steps : `int`, optional, (default = 1) - Gradients are accumulated for the given number of steps before doing an optimizer step. This can - be useful to accommodate batches that are larger than the RAM size. Refer Thomas Wolf's - [post](https://tinyurl.com/y5mv44fw) for details on Gradient Accumulation. - """ - super().__init__(serialization_dir, cuda_device, distributed, local_rank, world_size) - - # I am not calling move_to_gpu here, because if the model is - # not already on the GPU then the optimizer is going to be wrong. - self.model = model - - self.data_loader = data_loader - self._validation_data_loader = validation_data_loader - self.shuffle = shuffle - self.optimizer = optimizer - - if patience is None: # no early stopping - if validation_data_loader: - logger.warning( - "You provided a validation dataset but patience was set to None, " - "meaning that early stopping is disabled" - ) - elif (not isinstance(patience, int)) or patience <= 0: - raise ConfigurationError( - '{} is an invalid value for "patience": it must be a positive integer ' - "or None (if you want to disable early stopping)".format(patience) - ) - - # For tracking is_best_so_far and should_stop_early - self._metric_tracker = MetricTracker(patience, validation_metric) - # Get rid of + or - - self._validation_metric = validation_metric[1:] - - self._num_epochs = num_epochs - - if checkpointer is not None: - # We can't easily check if these parameters were passed in, so check against their default values. - # We don't check against serialization_dir since it is also used by the parent class. - if ( - num_serialized_models_to_keep != 20 - or keep_serialized_model_every_num_seconds is not None - ): - raise ConfigurationError( - "When passing a custom Checkpointer, you may not also pass in separate checkpointer " - "args 'num_serialized_models_to_keep' or 'keep_serialized_model_every_num_seconds'." - ) - self._checkpointer = checkpointer - else: - self._checkpointer = Checkpointer( - serialization_dir, - keep_serialized_model_every_num_seconds, - num_serialized_models_to_keep, - ) - - self._model_save_interval = model_save_interval - - self._grad_norm = grad_norm - self._grad_clipping = grad_clipping - - self._learning_rate_scheduler = learning_rate_scheduler - self._momentum_scheduler = momentum_scheduler - self._moving_average = moving_average - - # We keep the total batch number as an instance variable because it - # is used inside a closure for the hook which logs activations in - # `_enable_activation_logging`. - self._batch_num_total = 0 - - self._tensorboard = TensorboardWriter( - get_batch_num_total=lambda: self._batch_num_total, - serialization_dir=serialization_dir, - summary_interval=summary_interval, - histogram_interval=histogram_interval, - should_log_parameter_statistics=should_log_parameter_statistics, - should_log_learning_rate=should_log_learning_rate, - ) - - self._log_batch_size_period = log_batch_size_period - - self._last_log = 0.0 # time of last logging - - self._num_gradient_accumulation_steps = num_gradient_accumulation_steps - - # Enable activation logging. - if histogram_interval is not None: - self._tensorboard.enable_activation_logging(self.model) - - # Using `DistributedDataParallel`(ddp) brings in a quirk wrt AllenNLP's `Model` interface and its - # usage. A `Model` object is wrapped by `ddp`, but assigning the wrapped model to `self.model` - # will break the usages such as `Model.get_regularization_penalty`, `Model.get_metrics`, etc. - # - # Hence a reference to Pytorch's object is maintained in the case of distributed training and in the - # normal case, reference to `Model` is retained. This reference is only used in - # these places: `model.__call__`, `model.train` and `model.eval`. - if self._distributed: - self._pytorch_model = DistributedDataParallel( - self.model, device_ids=[self.cuda_device], find_unused_parameters=True - ) - else: - self._pytorch_model = self.model - - def rescale_gradients(self) -> Optional[float]: - return training_util.rescale_gradients(self.model, self._grad_norm) - - def batch_loss(self, batch: TensorDict, for_training: bool) -> torch.Tensor: - """ - Does a forward pass on the given batches and returns the `loss` value in the result. - If `for_training` is `True` also applies regularization penalty. - """ - batch = nn_util.move_to_device(batch, self.cuda_device) - output_dict = self._pytorch_model(**batch) - - try: - loss = output_dict["loss"] - if for_training: - loss += self.model.get_regularization_penalty() - except KeyError: - if for_training: - raise RuntimeError( - "The model you are trying to optimize does not contain a" - " 'loss' key in the output of model.forward(inputs)." - ) - loss = None - - return loss - - def _train_epoch(self, epoch: int) -> Dict[str, float]: - """ - Trains one epoch and returns metrics. - """ - logger.info("Epoch %d/%d", epoch, self._num_epochs - 1) - peak_cpu_usage = common_util.peak_memory_mb() - logger.info(f"Peak CPU memory usage MB: {peak_cpu_usage}") - gpu_usage = [] - for gpu, memory in common_util.gpu_memory_mb().items(): - gpu_usage.append((gpu, memory)) - logger.info(f"GPU {gpu} memory usage MB: {memory}") - - train_loss = 0.0 - # Set the model to "train" mode. - self._pytorch_model.train() - - # Get tqdm for the training batches - batch_generator = iter(self.data_loader) - batch_group_generator = common_util.lazy_groups_of( - batch_generator, self._num_gradient_accumulation_steps - ) - - num_training_batches = math.ceil( - len(self.data_loader) / self._num_gradient_accumulation_steps - ) - # Having multiple tqdm bars in case of distributed training will be a mess. Hence only the master's - # progress is shown - if self._master: - batch_group_generator_tqdm = Tqdm.tqdm( - batch_group_generator, total=num_training_batches - ) - else: - batch_group_generator_tqdm = batch_group_generator - - self._last_log = time.time() - last_save_time = time.time() - - batches_this_epoch = 0 - if self._batch_num_total is None: - self._batch_num_total = 0 - - histogram_parameters = set(self.model.get_parameters_for_histogram_tensorboard_logging()) - - logger.info("Training") - - cumulative_batch_group_size = 0 - done_early = False - for batch_group in batch_group_generator_tqdm: - if self._distributed: - # Check whether the other workers have stopped already (due to differing amounts of - # data in each). If so, we can't proceed because we would hang when we hit the - # barrier implicit in Model.forward. We use a IntTensor instead a BoolTensor - # here because NCCL process groups apparently don't support BoolTensor. - done = torch.tensor(0, device=self.cuda_device) - torch.distributed.all_reduce(done, torch.distributed.ReduceOp.SUM) - if done.item() > 0: - done_early = True - logger.warning( - f"Worker {torch.distributed.get_rank()} finishing training early! " - "This implies that there is an imbalance in your training " - "data across the workers and that some amount of it will be " - "ignored. A small amount of this is fine, but a major imbalance " - "should be avoided. Note: This warning will appear unless your " - "data is perfectly balanced." - ) - break - - batches_this_epoch += 1 - self._batch_num_total += 1 - batch_num_total = self._batch_num_total - - self.optimizer.zero_grad() - - for batch in batch_group: - loss = self.batch_loss(batch, for_training=True) - if torch.isnan(loss): - raise ValueError("nan loss encountered") - loss = loss / len(batch_group) - loss.backward() - train_loss += loss.item() - - batch_grad_norm = self.rescale_gradients() - - # This does nothing if batch_num_total is None or you are using a - # scheduler which doesn't update per batch. - if self._learning_rate_scheduler: - self._learning_rate_scheduler.step_batch(batch_num_total) - if self._momentum_scheduler: - self._momentum_scheduler.step_batch(batch_num_total) - - if self._tensorboard.should_log_histograms_this_batch() and self._master: - # get the magnitude of parameter updates for logging - # We need a copy of current parameters to compute magnitude of updates, - # and copy them to CPU so large models won't go OOM on the GPU. - param_updates = { - name: param.detach().cpu().clone() - for name, param in self.model.named_parameters() - } - self.optimizer.step() - for name, param in self.model.named_parameters(): - param_updates[name].sub_(param.detach().cpu()) - update_norm = torch.norm(param_updates[name].view(-1)) - param_norm = torch.norm(param.view(-1)).cpu() - self._tensorboard.add_train_scalar( - "gradient_update/" + name, update_norm / (param_norm + 1e-7) - ) - else: - self.optimizer.step() - - # Update moving averages - if self._moving_average is not None: - self._moving_average.apply(batch_num_total) - - # Update the description with the latest metrics - metrics = training_util.get_metrics( - self.model, - train_loss, - batches_this_epoch, - world_size=self._world_size, - cuda_device=[self.cuda_device], - ) - - # Updating tqdm only for the master as the trainers wouldn't have one - if self._master: - description = training_util.description_from_metrics(metrics) - batch_group_generator_tqdm.set_description(description, refresh=False) - - # Log parameter values to Tensorboard (only from the master) - if self._tensorboard.should_log_this_batch() and self._master: - self._tensorboard.log_parameter_and_gradient_statistics(self.model, batch_grad_norm) - self._tensorboard.log_learning_rates(self.model, self.optimizer) - - self._tensorboard.add_train_scalar("loss/loss_train", metrics["loss"]) - self._tensorboard.log_metrics({"epoch_metrics/" + k: v for k, v in metrics.items()}) - - if self._tensorboard.should_log_histograms_this_batch() and self._master: - self._tensorboard.log_histograms(self.model, histogram_parameters) - - if self._log_batch_size_period: - batch_group_size = sum(training_util.get_batch_size(batch) for batch in batch_group) - cumulative_batch_group_size += batch_group_size - if (batches_this_epoch - 1) % self._log_batch_size_period == 0: - average = cumulative_batch_group_size / batches_this_epoch - logger.info( - f"current batch size: {batch_group_size} mean batch size: {average}" - ) - self._tensorboard.add_train_scalar("current_batch_size", batch_group_size) - self._tensorboard.add_train_scalar("mean_batch_size", average) - - # Save model if needed. - if ( - self._model_save_interval is not None - and (time.time() - last_save_time > self._model_save_interval) - and self._master - ): - last_save_time = time.time() - self._save_checkpoint( - "{0}.{1}".format(epoch, training_util.time_to_str(int(last_save_time))) - ) - if self._distributed and not done_early: - logger.warning( - f"Worker {torch.distributed.get_rank()} completed its entire epoch (training)." - ) - # Indicate that we're done so that any workers that have remaining data stop the epoch early. - done = torch.tensor(1, device=self.cuda_device) - torch.distributed.all_reduce(done, torch.distributed.ReduceOp.SUM) - assert done.item() - - # Let all workers finish their epoch before computing - # the final statistics for the epoch. - if self._distributed: - dist.barrier() - - metrics = training_util.get_metrics( - self.model, - train_loss, - batches_this_epoch, - reset=True, - world_size=self._world_size, - cuda_device=[self.cuda_device], - ) - metrics["cpu_memory_MB"] = peak_cpu_usage - for (gpu_num, memory) in gpu_usage: - metrics["gpu_" + str(gpu_num) + "_memory_MB"] = memory - return metrics - - def _validation_loss(self) -> Tuple[float, int]: - """ - Computes the validation loss. Returns it and the number of batches. - """ - logger.info("Validating") - - self._pytorch_model.eval() - - # Replace parameter values with the shadow values from the moving averages. - if self._moving_average is not None: - self._moving_average.assign_average_value() - - if self._validation_data_loader is not None: - validation_data_loader = self._validation_data_loader - else: - raise ConfigurationError("Validation results cannot be calculated without a validation_data_loader") - - val_generator_tqdm = Tqdm.tqdm(iter(validation_data_loader), total=len(validation_data_loader)) - batches_this_epoch = 0 - val_loss = 0 - done_early = False - for batch in val_generator_tqdm: - if self._distributed: - # Check whether the other workers have stopped already (due to differing amounts of - # data in each). If so, we can't proceed because we would hang when we hit the - # barrier implicit in Model.forward. We use a IntTensor instead a BoolTensor - # here because NCCL process groups apparently don't support BoolTensor. - done = torch.tensor(0, device=self.cuda_device) - torch.distributed.all_reduce(done, torch.distributed.ReduceOp.SUM) - if done.item() > 0: - done_early = True - logger.warning( - f"Worker {torch.distributed.get_rank()} finishing validation early! " - "This implies that there is an imbalance in your validation " - "data across the workers and that some amount of it will be " - "ignored. A small amount of this is fine, but a major imbalance " - "should be avoided. Note: This warning will appear unless your " - "data is perfectly balanced." - ) - break - - loss = self.batch_loss(batch, for_training=False) - if loss is not None: - # You shouldn't necessarily have to compute a loss for validation, so we allow for - # `loss` to be None. We need to be careful, though - `batches_this_epoch` is - # currently only used as the divisor for the loss function, so we can safely only - # count those batches for which we actually have a loss. If this variable ever - # gets used for something else, we might need to change things around a bit. - batches_this_epoch += 1 - val_loss += loss.detach().cpu().numpy() - - # Update the description with the latest metrics - val_metrics = training_util.get_metrics( - self.model, - val_loss, - batches_this_epoch, - world_size=self._world_size, - cuda_device=[self.cuda_device], - ) - description = training_util.description_from_metrics(val_metrics) - val_generator_tqdm.set_description(description, refresh=False) - - if self._distributed and not done_early: - logger.warning( - f"Worker {torch.distributed.get_rank()} completed its entire epoch (validation)." - ) - # Indicate that we're done so that any workers that have remaining data stop validation early. - done = torch.tensor(1, device=self.cuda_device) - torch.distributed.all_reduce(done, torch.distributed.ReduceOp.SUM) - assert done.item() - - # Now restore the original parameter values. - if self._moving_average is not None: - self._moving_average.restore() - - return val_loss, batches_this_epoch - - def train(self) -> Dict[str, Any]: - """ - Trains the supplied model with the supplied parameters. - """ - try: - epoch_counter = self._restore_checkpoint() - except RuntimeError: - traceback.print_exc() - raise ConfigurationError( - "Could not recover training from the checkpoint. Did you mean to output to " - "a different serialization directory or delete the existing serialization " - "directory?" - ) - - training_util.enable_gradient_clipping(self.model, self._grad_clipping) - - logger.info("Beginning training.") - - val_metrics: Dict[str, float] = {} - this_epoch_val_metric: float = None - metrics: Dict[str, Any] = {} - epochs_trained = 0 - training_start_time = time.time() - - metrics["best_epoch"] = self._metric_tracker.best_epoch - for key, value in self._metric_tracker.best_epoch_metrics.items(): - metrics["best_validation_" + key] = value - - for epoch in range(epoch_counter, self._num_epochs): - epoch_start_time = time.time() - train_metrics = self._train_epoch(epoch) - - # get peak of memory usage - if "cpu_memory_MB" in train_metrics: - metrics["peak_cpu_memory_MB"] = max( - metrics.get("peak_cpu_memory_MB", 0), train_metrics["cpu_memory_MB"] - ) - for key, value in train_metrics.items(): - if key.startswith("gpu_"): - metrics["peak_" + key] = max(metrics.get("peak_" + key, 0), value) - - if self._validation_data_loader is not None: - with torch.no_grad(): - # We have a validation set, so compute all the metrics on it. - val_loss, num_batches = self._validation_loss() - - # It is safe again to wait till the validation is done. This is - # important to get the metrics right. - if self._distributed: - dist.barrier() - - val_metrics = training_util.get_metrics( - self.model, - val_loss, - num_batches, - reset=True, - world_size=self._world_size, - cuda_device=[self.cuda_device], - ) - - # Check validation metric for early stopping - this_epoch_val_metric = val_metrics[self._validation_metric] - self._metric_tracker.add_metric(this_epoch_val_metric) - - if self._metric_tracker.should_stop_early(): - logger.info("Ran out of patience. Stopping training.") - break - - if self._master: - self._tensorboard.log_metrics( - train_metrics, val_metrics=val_metrics, log_to_console=True, epoch=epoch + 1 - ) # +1 because tensorboard doesn't like 0 - - # Create overall metrics dict - training_elapsed_time = time.time() - training_start_time - metrics["training_duration"] = str(datetime.timedelta(seconds=training_elapsed_time)) - metrics["training_start_epoch"] = epoch_counter - metrics["training_epochs"] = epochs_trained - metrics["epoch"] = epoch - - for key, value in train_metrics.items(): - metrics["training_" + key] = value - for key, value in val_metrics.items(): - metrics["validation_" + key] = value - - if self._metric_tracker.is_best_so_far(): - # Update all the best_ metrics. - # (Otherwise they just stay the same as they were.) - metrics["best_epoch"] = epoch - for key, value in val_metrics.items(): - metrics["best_validation_" + key] = value - - self._metric_tracker.best_epoch_metrics = val_metrics - - if self._serialization_dir and self._master: - common_util.dump_metrics( - os.path.join(self._serialization_dir, f"metrics_epoch_{epoch}.json"), metrics - ) - - # The Scheduler API is agnostic to whether your schedule requires a validation metric - - # if it doesn't, the validation metric passed here is ignored. - if self._learning_rate_scheduler: - self._learning_rate_scheduler.step(this_epoch_val_metric, epoch) - if self._momentum_scheduler: - self._momentum_scheduler.step(this_epoch_val_metric, epoch) - - if self._master: - self._save_checkpoint(epoch) - - # Wait for the master to finish saving the checkpoint - if self._distributed: - dist.barrier() - - epoch_elapsed_time = time.time() - epoch_start_time - logger.info("Epoch duration: %s", datetime.timedelta(seconds=epoch_elapsed_time)) - - if epoch < self._num_epochs - 1: - training_elapsed_time = time.time() - training_start_time - estimated_time_remaining = training_elapsed_time * ( - (self._num_epochs - epoch_counter) / float(epoch - epoch_counter + 1) - 1 - ) - formatted_time = str(datetime.timedelta(seconds=int(estimated_time_remaining))) - logger.info("Estimated training time remaining: %s", formatted_time) - - epochs_trained += 1 - - # make sure pending events are flushed to disk and files are closed properly - self._tensorboard.close() - - # Load the best model state before returning - best_model_state = self._checkpointer.best_model_state() - if best_model_state: - self.model.load_state_dict(best_model_state) - - return metrics - - def _save_checkpoint(self, epoch: Union[int, str]) -> None: - """ - Saves a checkpoint of the model to self._serialization_dir. - Is a no-op if self._serialization_dir is None. - - # Parameters - - epoch : Union[int, str], required. - The epoch of training. If the checkpoint is saved in the middle - of an epoch, the parameter is a string with the epoch and timestamp. - """ - # If moving averages are used for parameters, we save - # the moving average values into checkpoint, instead of the current values. - if self._moving_average is not None: - self._moving_average.assign_average_value() - - # These are the training states we need to persist. - training_states = { - "metric_tracker": self._metric_tracker.state_dict(), - "optimizer": self.optimizer.state_dict(), - "batch_num_total": self._batch_num_total, - } - - # If we have a learning rate or momentum scheduler, we should persist them too. - if self._learning_rate_scheduler is not None: - training_states["learning_rate_scheduler"] = self._learning_rate_scheduler.state_dict() - if self._momentum_scheduler is not None: - training_states["momentum_scheduler"] = self._momentum_scheduler.state_dict() - - self._checkpointer.save_checkpoint( - model_state=self.model.state_dict(), - epoch=epoch, - training_states=training_states, - is_best_so_far=self._metric_tracker.is_best_so_far(), - ) - - # Restore the original values for parameters so that training will not be affected. - if self._moving_average is not None: - self._moving_average.restore() - - def _restore_checkpoint(self) -> int: - """ - Restores the model and training state from the last saved checkpoint. - This includes an epoch count and optimizer state, which is serialized separately - from model parameters. This function should only be used to continue training - - if you wish to load a model for inference/load parts of a model into a new - computation graph, you should use the native Pytorch functions: - ` model.load_state_dict(torch.load("/path/to/model/weights.th"))` - - If `self._serialization_dir` does not exist or does not contain any checkpointed weights, - this function will do nothing and return 0. - - # Returns - - epoch: int - The epoch at which to resume training, which should be one after the epoch - in the saved training state. - """ - model_state, training_state = self._checkpointer.restore_checkpoint() - - if not training_state: - # No checkpoint to restore, start at 0 - return 0 - - self.model.load_state_dict(model_state) - self.optimizer.load_state_dict(training_state["optimizer"]) - if ( - self._learning_rate_scheduler is not None - and "learning_rate_scheduler" in training_state - ): - self._learning_rate_scheduler.load_state_dict(training_state["learning_rate_scheduler"]) - if self._momentum_scheduler is not None and "momentum_scheduler" in training_state: - self._momentum_scheduler.load_state_dict(training_state["momentum_scheduler"]) - training_util.move_optimizer_to_cuda(self.optimizer) - - # Currently the `training_state` contains a serialized `MetricTracker`. - if "metric_tracker" in training_state: - self._metric_tracker.load_state_dict(training_state["metric_tracker"]) - # It used to be the case that we tracked `val_metric_per_epoch`. - elif "val_metric_per_epoch" in training_state: - self._metric_tracker.clear() - self._metric_tracker.add_metrics(training_state["val_metric_per_epoch"]) - # And before that we didn't track anything. - else: - self._metric_tracker.clear() - - if isinstance(training_state["epoch"], int): - epoch_to_return = training_state["epoch"] + 1 - else: - epoch_to_return = int(training_state["epoch"].split(".")[0]) + 1 - - # For older checkpoints with batch_num_total missing, default to old behavior where - # it is unchanged. - batch_num_total = training_state.get("batch_num_total") - if batch_num_total is not None: - self._batch_num_total = batch_num_total - - return epoch_to_return - - @classmethod - def from_partial_objects( - cls, - model: Model, - serialization_dir: str, - data_loader: Lazy[DataLoader], - validation_data_loader: Lazy[DataLoader] = None, - local_rank: int = 0, - patience: int = None, - validation_metric: str = "-loss", - shuffle: bool = True, - num_epochs: int = 20, - cuda_device: int = -1, - grad_norm: float = None, - grad_clipping: float = None, - model_save_interval: float = None, - summary_interval: int = 100, - histogram_interval: int = None, - should_log_parameter_statistics: bool = True, - should_log_learning_rate: bool = False, - log_batch_size_period: int = None, - distributed: bool = None, - world_size: int = 1, - num_gradient_accumulation_steps: int = 1, - no_grad: List[str] = None, - optimizer: Lazy[Optimizer] = None, - learning_rate_scheduler: Lazy[LearningRateScheduler] = None, - momentum_scheduler: Lazy[MomentumScheduler] = None, - moving_average: Lazy[MovingAverage] = None, - checkpointer: Lazy[Checkpointer] = None, - ) -> "TrainerV2": - """ - This method exists so that we can have a documented method to construct this class using - `FromParams`. If you are not using `FromParams` or config files, you can safely ignore this - method. - - The reason we can't just use `__init__` with `FromParams` here is because there are - sequential dependencies to this class's arguments. Anything that has a `Lazy[]` type - annotation needs something from one of the non-`Lazy` arguments. The `Optimizer` needs to - have the parameters from the `Model` before it's constructed, and the `Schedulers` need to - have the `Optimizer`. Because of this, the typical way we construct things `FromParams` - doesn't work, so we use `Lazy` to allow for constructing the objects sequentially. - - If you're not using `FromParams`, you can just construct these arguments in the right order - yourself in your code and call the constructor directly. - """ - - check_for_gpu(cuda_device) - if cuda_device >= 0: - # Moving model to GPU here so that the optimizer state gets constructed on - # the right device. - model = model.cuda(cuda_device) - - if no_grad: - for name, parameter in model.named_parameters(): - if any(re.search(regex, name) for regex in no_grad): - parameter.requires_grad_(False) - - common_util.log_frozen_and_tunable_parameter_names(model) - - parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] - optimizer_ = optimizer.construct(model_parameters=parameters) - if not optimizer_: - optimizer_ = Optimizer.default(parameters) - - try: - batches_per_epoch = len(data_loader) - except TypeError: - # If the dataset is lazy, it won't have a length. - batches_per_epoch = None - - moving_average_ = moving_average.construct(parameters=parameters) - learning_rate_scheduler_ = learning_rate_scheduler.construct( - optimizer=optimizer_, num_epochs=num_epochs, num_steps_per_epoch=batches_per_epoch - ) - momentum_scheduler_ = momentum_scheduler.construct(optimizer=optimizer_) - - checkpointer_ = checkpointer.construct() or Checkpointer(serialization_dir) - - return cls( - model, - optimizer_, - data_loader, - patience=patience, - validation_metric=validation_metric, - validation_data_loader=validation_data_loader, - shuffle=shuffle, - num_epochs=num_epochs, - serialization_dir=serialization_dir, - cuda_device=cuda_device, - grad_norm=grad_norm, - grad_clipping=grad_clipping, - learning_rate_scheduler=learning_rate_scheduler_, - momentum_scheduler=momentum_scheduler_, - checkpointer=checkpointer_, - model_save_interval=model_save_interval, - summary_interval=summary_interval, - histogram_interval=histogram_interval, - should_log_parameter_statistics=should_log_parameter_statistics, - should_log_learning_rate=should_log_learning_rate, - log_batch_size_period=log_batch_size_period, - moving_average=moving_average_, - distributed=distributed, - local_rank=local_rank, - world_size=world_size, - num_gradient_accumulation_steps=num_gradient_accumulation_steps, - ) From 01e12f5012c703b30b109c1ed3c03c2634726c70 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Thu, 20 Feb 2020 11:30:57 -0800 Subject: [PATCH 10/52] black --- allennlp/data/samplers/__init__.py | 52 +++++++++++++++++-------- allennlp/tests/training/trainer_test.py | 14 +++---- allennlp/training/trainer.py | 17 ++++---- 3 files changed, 50 insertions(+), 33 deletions(-) diff --git a/allennlp/data/samplers/__init__.py b/allennlp/data/samplers/__init__.py index 4c527c5496c..e488bb21e1f 100644 --- a/allennlp/data/samplers/__init__.py +++ b/allennlp/data/samplers/__init__.py @@ -1,4 +1,3 @@ - from typing import List, Iterable, Tuple, Dict, cast import logging from torch.utils import data @@ -18,14 +17,12 @@ class Sampler(Registrable): - def __iter__(self) -> Iterable[int]: raise NotImplementedError class BatchSampler(Registrable): - def __iter__(self) -> Iterable[List[int]]: raise NotImplementedError @@ -33,12 +30,10 @@ def __iter__(self) -> Iterable[List[int]]: @Sampler.register("sequential") class SequentialSampler(Sampler, data.SequentialSampler): - def __init__(self, data_source: data.Dataset): super().__init__(data_source) - @Sampler.register("random") class RandomSampler(Sampler, data.RandomSampler): r"""Samples elements randomly. If without replacement, then sample from a shuffled dataset. @@ -50,7 +45,10 @@ class RandomSampler(Sampler, data.RandomSampler): num_samples (int): number of samples to draw, default=`len(dataset)`. This argument is supposed to be specified only when `replacement` is ``True``. """ - def __init__(self, data_source: data.Dataset, replacement: bool = False, num_samples: int = None): + + def __init__( + self, data_source: data.Dataset, replacement: bool = False, num_samples: int = None + ): super().__init__(data_source, replacement, num_samples) @@ -61,6 +59,7 @@ class SubsetRandomSampler(Sampler, data.SubsetRandomSampler): Arguments: indices (sequence): a sequence of indices """ + def __init__(self, indices: List[int]): super().__init__(indices) @@ -82,6 +81,7 @@ class WeightedRandomSampler(Sampler, data.WeightedRandomSampler): >>> list(WeightedRandomSampler([0.9, 0.4, 0.05, 0.2, 0.3, 0.1], 5, replacement=False)) [0, 1, 4, 3, 2] """ + def __init__(self, weights: List[float], num_samples: int, replacement: bool = True): super().__init__(weights, num_samples, replacement) @@ -189,17 +189,35 @@ def allennlp_collocate(batch): return batch.as_tensor_dict(batch.get_padding_lengths()) - - class DataLoader(Registrable, data.DataLoader): - - def __init__(self, dataset: data.Dataset, batch_size: int = 1, shuffle: bool = False, sampler: Sampler = None, - batch_sampler: BatchSampler = None, num_workers: int = 0, collate_fn=None, - pin_memory: bool = False, drop_last: bool = False, timeout: bool = 0, - worker_init_fn=None, multiprocessing_context: str = None): + def __init__( + self, + dataset: data.Dataset, + batch_size: int = 1, + shuffle: bool = False, + sampler: Sampler = None, + batch_sampler: BatchSampler = None, + num_workers: int = 0, + collate_fn=None, + pin_memory: bool = False, + drop_last: bool = False, + timeout: bool = 0, + worker_init_fn=None, + multiprocessing_context: str = None, + ): collate_fn = allennlp_collocate - super().__init__(self, dataset=dataset, batch_size=batch_size, shuffle=shuffle, sampler=sampler, - batch_sampler=batch_sampler, num_workers=num_workers, collate_fn=collate_fn, - pin_memory=pin_memory, drop_last=drop_last, timeout=timeout, - worker_init_fn=worker_init_fn, multiprocessing_context=multiprocessing_context) + super().__init__( + dataset=dataset, + batch_size=batch_size, + shuffle=shuffle, + sampler=sampler, + batch_sampler=batch_sampler, + num_workers=num_workers, + collate_fn=collate_fn, + pin_memory=pin_memory, + drop_last=drop_last, + timeout=timeout, + worker_init_fn=worker_init_fn, + multiprocessing_context=multiprocessing_context, + ) diff --git a/allennlp/tests/training/trainer_test.py b/allennlp/tests/training/trainer_test.py index 788bbab359c..46fdf2d3238 100644 --- a/allennlp/tests/training/trainer_test.py +++ b/allennlp/tests/training/trainer_test.py @@ -44,7 +44,9 @@ def setUp(self): self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params) self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01, momentum=0.9) self.data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collocate) - self.validation_data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collocate) + self.validation_data_loader = DataLoader( + self.instances, batch_size=2, collate_fn=allennlp_collocate + ) self.instances.index_with(vocab) def test_trainer_can_run(self): @@ -102,9 +104,7 @@ def test_trainer_can_run_exponential_moving_average(self): @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device registered.") def test_trainer_can_run_cuda(self): self.model.cuda() - trainer = Trainer( - self.model, self.optimizer, self.data_loader, num_epochs=2, cuda_device=0 - ) + trainer = Trainer(self.model, self.optimizer, self.data_loader, num_epochs=2, cuda_device=0) metrics = trainer.train() assert "peak_cpu_memory_MB" in metrics assert isinstance(metrics["peak_cpu_memory_MB"], float) @@ -118,11 +118,7 @@ def test_passing_trainer_multiple_gpus_raises_error(self): with pytest.raises(ConfigurationError): Trainer( - self.model, - self.optimizer, - self.data_loader, - num_epochs=2, - cuda_device=[0, 1], + self.model, self.optimizer, self.data_loader, num_epochs=2, cuda_device=[0, 1], ) def test_trainer_can_resume_training(self): diff --git a/allennlp/training/trainer.py b/allennlp/training/trainer.py index aebd44d88f4..dfc7b625f2b 100644 --- a/allennlp/training/trainer.py +++ b/allennlp/training/trainer.py @@ -5,7 +5,7 @@ import re import time import traceback -from typing import Dict, List, Optional, Tuple, Union, Iterable, Any +from typing import Dict, List, Optional, Tuple, Union, Any import torch import torch.distributed as dist @@ -16,7 +16,6 @@ from allennlp.common import Lazy, Tqdm from allennlp.common.checks import ConfigurationError, check_for_gpu from allennlp.common import util as common_util -from allennlp.data.instance import Instance from allennlp.data.samplers import DataLoader @@ -36,7 +35,7 @@ logger = logging.getLogger(__name__) -@TrainerBase.register("trainer", constructor="from_partial_objects") +@TrainerBase.register("default", constructor="from_partial_objects") class Trainer(TrainerBase): def __init__( self, @@ -512,9 +511,13 @@ def _validation_loss(self) -> Tuple[float, int]: if self._validation_data_loader is not None: validation_data_loader = self._validation_data_loader else: - raise ConfigurationError("Validation results cannot be calculated without a validation_data_loader") + raise ConfigurationError( + "Validation results cannot be calculated without a validation_data_loader" + ) - val_generator_tqdm = Tqdm.tqdm(iter(validation_data_loader), total=len(validation_data_loader)) + val_generator_tqdm = Tqdm.tqdm( + iter(validation_data_loader), total=len(validation_data_loader) + ) batches_this_epoch = 0 val_loss = 0 done_early = False @@ -814,8 +817,8 @@ def from_partial_objects( cls, model: Model, serialization_dir: str, - data_loader: Lazy[DataLoader], - validation_data_loader: Lazy[DataLoader] = None, + data_loader: DataLoader, + validation_data_loader: DataLoader = None, local_rank: int = 0, patience: int = None, validation_metric: str = "-loss", From 5aea291cc86973b3e19b2d00cfe8d6b5fc5dd059 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Thu, 20 Feb 2020 11:31:28 -0800 Subject: [PATCH 11/52] make find learning rate work --- allennlp/commands/find_learning_rate.py | 20 +++++++++---------- .../data/dataset_readers/dataset_reader.py | 2 +- .../tests/commands/find_learning_rate_test.py | 14 +++++++------ 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/allennlp/commands/find_learning_rate.py b/allennlp/commands/find_learning_rate.py index 9d38cda1c8b..01536b310bc 100644 --- a/allennlp/commands/find_learning_rate.py +++ b/allennlp/commands/find_learning_rate.py @@ -49,6 +49,7 @@ import os import re from typing import List, Tuple +import itertools from overrides import overrides @@ -56,7 +57,8 @@ from allennlp.common import Params, Tqdm from allennlp.common.checks import check_for_gpu, ConfigurationError from allennlp.common.util import prepare_environment -from allennlp.data import DataIterator, Vocabulary +from allennlp.data import Vocabulary +from allennlp.data.samplers import DataLoader from allennlp.models import Model from allennlp.training import Trainer, TrainerBase from allennlp.training.util import create_serialization_dir, datasets_from_params @@ -211,11 +213,10 @@ def find_learning_rate_model( ), ) - model = Model.from_params(vocab=vocab, params=params.pop("model")) - iterator = DataIterator.from_params(params.pop("iterator")) - iterator.index_with(vocab) - train_data = all_datasets["train"] + train_data.index_with(vocab) + model = Model.from_params(vocab=vocab, params=params.pop("model")) + data_loader = DataLoader.from_params(dataset=train_data, params=params.pop("data_loader")) trainer_params = params.pop("trainer") @@ -230,11 +231,8 @@ def find_learning_rate_model( trainer: Trainer = TrainerBase.from_params( # type: ignore model=model, serialization_dir=serialization_dir, - iterator=iterator, - train_data=train_data, - validation_data=None, + data_loader=data_loader, params=trainer_params, - validation_iterator=None, ) logger.info( @@ -292,8 +290,8 @@ def search_learning_rate( trainer.model.train() - train_generator = trainer.iterator(trainer.train_data, shuffle=trainer.shuffle) - train_generator_tqdm = Tqdm.tqdm(train_generator, total=num_batches) + infinite_generator = itertools.cycle(trainer.data_loader) + train_generator_tqdm = Tqdm.tqdm(infinite_generator, total=num_batches) learning_rates = [] losses = [] diff --git a/allennlp/data/dataset_readers/dataset_reader.py b/allennlp/data/dataset_readers/dataset_reader.py index 9af9bbbe6bc..a117a2edbb5 100644 --- a/allennlp/data/dataset_readers/dataset_reader.py +++ b/allennlp/data/dataset_readers/dataset_reader.py @@ -45,7 +45,7 @@ def __init__( cache_file: str = None, deserialize: Callable[[str], Instance] = None, serialize: Callable[[Instance], str] = None, - vocab: Vocabulary = None + vocab: Vocabulary = None, ) -> None: super().__init__() self.instance_generator = instance_generator diff --git a/allennlp/tests/commands/find_learning_rate_test.py b/allennlp/tests/commands/find_learning_rate_test.py index 20cbbb01a7e..dddd22691f5 100644 --- a/allennlp/tests/commands/find_learning_rate_test.py +++ b/allennlp/tests/commands/find_learning_rate_test.py @@ -5,7 +5,8 @@ import torch from allennlp.common import Params -from allennlp.data import Vocabulary, DataIterator +from allennlp.data import Vocabulary +from allennlp.data.samplers import DataLoader from allennlp.models import Model from allennlp.common.checks import ConfigurationError from allennlp.common.testing import AllenNlpTestCase @@ -42,7 +43,7 @@ def setUp(self): "dataset_reader": {"type": "sequence_tagging"}, "train_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), "validation_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), - "iterator": {"type": "basic", "batch_size": 2}, + "data_loader": {"batch_size": 2}, "trainer": {"cuda_device": -1, "num_epochs": 2, "optimizer": "adam"}, } ) @@ -166,7 +167,7 @@ def setUp(self): "dataset_reader": {"type": "sequence_tagging"}, "train_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), "validation_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), - "iterator": {"type": "basic", "batch_size": 2}, + "data_loader": {"batch_size": 2}, "trainer": {"cuda_device": -1, "num_epochs": 2, "optimizer": "adam"}, } ) @@ -176,16 +177,17 @@ def setUp(self): instances=(instance for dataset in all_datasets.values() for instance in dataset), ) model = Model.from_params(vocab=vocab, params=params.pop("model")) - iterator = DataIterator.from_params(params.pop("iterator")) - iterator.index_with(vocab) train_data = all_datasets["train"] + train_data.index_with(vocab) + + data_loader = DataLoader.from_params(dataset=train_data, params=params.pop("data_loader")) trainer_params = params.pop("trainer") serialization_dir = os.path.join(self.TEST_DIR, "test_search_learning_rate") self.trainer = TrainerBase.from_params( model=model, serialization_dir=serialization_dir, - iterator=iterator, + data_loader=data_loader, train_data=train_data, params=trainer_params, validation_data=None, From f0269460e72a632abc499aea99bd022ba8783f83 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Thu, 20 Feb 2020 14:10:06 -0800 Subject: [PATCH 12/52] update test fixtures to new config --- .../tests/fixtures/basic_classifier/common.jsonnet | 9 ++++++--- .../experiment_from_archive.jsonnet | 2 +- .../basic_classifier/experiment_seq2seq.jsonnet | 2 +- .../basic_classifier/experiment_seq2vec.jsonnet | 2 +- .../tests/fixtures/simple_tagger/experiment.json | 14 ++++++++------ 5 files changed, 17 insertions(+), 12 deletions(-) diff --git a/allennlp/tests/fixtures/basic_classifier/common.jsonnet b/allennlp/tests/fixtures/basic_classifier/common.jsonnet index 1ed73e06ce5..b02df38c213 100644 --- a/allennlp/tests/fixtures/basic_classifier/common.jsonnet +++ b/allennlp/tests/fixtures/basic_classifier/common.jsonnet @@ -16,9 +16,12 @@ }, "train_data_path": "allennlp/tests/fixtures/data/text_classification_json/imdb_corpus.jsonl", "validation_data_path": "allennlp/tests/fixtures/data/text_classification_json/imdb_corpus.jsonl", - "iterator": { - "type": "bucket", - "batch_size": 5 + "data_loader": { + + "batch_sampler": { + "type": "bucket", + "batch_size": 5 + }, }, "trainer": { "optimizer": { diff --git a/allennlp/tests/fixtures/basic_classifier/experiment_from_archive.jsonnet b/allennlp/tests/fixtures/basic_classifier/experiment_from_archive.jsonnet index b1fdffed75e..fd1455b7b67 100644 --- a/allennlp/tests/fixtures/basic_classifier/experiment_from_archive.jsonnet +++ b/allennlp/tests/fixtures/basic_classifier/experiment_from_archive.jsonnet @@ -9,6 +9,6 @@ local COMMON = import 'common.jsonnet'; "type": "from_archive", "archive_file": "allennlp/tests/fixtures/basic_classifier/serialization/model.tar.gz", }, - "iterator": COMMON['iterator'], + "data_loader": COMMON['data_loader'], "trainer": COMMON['trainer'], } diff --git a/allennlp/tests/fixtures/basic_classifier/experiment_seq2seq.jsonnet b/allennlp/tests/fixtures/basic_classifier/experiment_seq2seq.jsonnet index fd197050c40..609cabb898c 100644 --- a/allennlp/tests/fixtures/basic_classifier/experiment_seq2seq.jsonnet +++ b/allennlp/tests/fixtures/basic_classifier/experiment_seq2seq.jsonnet @@ -36,6 +36,6 @@ local COMMON = import 'common.jsonnet'; "dropout": 0.1 } }, - "iterator": COMMON['iterator'], + "data_loader": COMMON['data_loader'], "trainer": COMMON['trainer'] } diff --git a/allennlp/tests/fixtures/basic_classifier/experiment_seq2vec.jsonnet b/allennlp/tests/fixtures/basic_classifier/experiment_seq2vec.jsonnet index fd7f58560ec..751c1d14f8d 100644 --- a/allennlp/tests/fixtures/basic_classifier/experiment_seq2vec.jsonnet +++ b/allennlp/tests/fixtures/basic_classifier/experiment_seq2vec.jsonnet @@ -23,6 +23,6 @@ local COMMON = import 'common.jsonnet'; "output_dim": 16 } }, - "iterator": COMMON['iterator'], + "data_loader": COMMON['data_loader'], "trainer": COMMON['trainer'], } diff --git a/allennlp/tests/fixtures/simple_tagger/experiment.json b/allennlp/tests/fixtures/simple_tagger/experiment.json index f5605dcf443..166b2d125e9 100644 --- a/allennlp/tests/fixtures/simple_tagger/experiment.json +++ b/allennlp/tests/fixtures/simple_tagger/experiment.json @@ -22,12 +22,14 @@ "num_layers": 1 } }, - "iterator": { - "type": "bucket", - "sorting_keys": [["tokens", "tokens___tokens"]], - "padding_noise": 0.0, - "batch_size" : 80 - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "sorting_keys": [["tokens", "tokens___tokens"]], + "padding_noise": 0.0, + "batch_size" : 80 + } +}, "trainer": { "num_epochs": 1, "grad_norm": 1.0, From 5973b506fcc334b515ec603d3398227932485fea Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Thu, 20 Feb 2020 14:13:04 -0800 Subject: [PATCH 13/52] get train command tests mostly working --- allennlp/commands/train.py | 50 ++++++++++++++----------- allennlp/tests/commands/train_test.py | 22 +++++------ allennlp/tests/training/trainer_test.py | 14 ++++--- allennlp/training/util.py | 9 ++--- 4 files changed, 53 insertions(+), 42 deletions(-) diff --git a/allennlp/commands/train.py b/allennlp/commands/train.py index 012d5450d61..815f6e7bb5d 100644 --- a/allennlp/commands/train.py +++ b/allennlp/commands/train.py @@ -41,7 +41,7 @@ import argparse import logging import os -from typing import Any, Dict, Iterable, List, Optional +from typing import Any, Dict, List, Optional import torch import torch.distributed as dist @@ -53,7 +53,8 @@ from allennlp.common.checks import check_for_gpu, ConfigurationError from allennlp.common import util as common_util from allennlp.common.plugins import import_plugins -from allennlp.data import DataIterator, DatasetReader, Instance, Vocabulary +from allennlp.data import DatasetReader, Vocabulary +from allennlp.data.samplers import DataLoader from allennlp.models.archival import archive_model, CONFIG_NAME from allennlp.models.model import _DEFAULT_WEIGHTS, Model from allennlp.training.trainer_base import TrainerBase @@ -504,16 +505,14 @@ def __init__( serialization_dir: str, model: Model, trainer: TrainerBase, - evaluation_dataset: Iterable[Instance] = None, - evaluation_iterator: DataIterator = None, + evaluation_data_loader: DataLoader = None, evaluate_on_test: bool = False, batch_weight_key: str = "", ) -> None: self.serialization_dir = serialization_dir self.model = model self.trainer = trainer - self.evaluation_dataset = evaluation_dataset - self.evaluation_iterator = evaluation_iterator + self.evaluation_data_loader = evaluation_data_loader self.evaluate_on_test = evaluate_on_test self.batch_weight_key = batch_weight_key @@ -521,19 +520,18 @@ def run(self) -> Dict[str, Any]: return self.trainer.train() def finish(self, metrics: Dict[str, Any]): - if self.evaluation_dataset and self.evaluate_on_test: + if self.evaluation_data_loader and self.evaluate_on_test: logger.info("The model will be evaluated using the best epoch weights.") test_metrics = training_util.evaluate( self.model, - self.evaluation_dataset, - self.evaluation_iterator, + self.evaluation_data_loader, cuda_device=self.trainer.cuda_device, batch_weight_key=self.batch_weight_key, ) for key, value in test_metrics.items(): metrics["test_" + key] = value - elif self.evaluation_dataset: + elif self.evaluation_data_loader: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command." @@ -551,13 +549,13 @@ def from_partial_objects( dataset_reader: DatasetReader, train_data_path: str, model: Lazy[Model], - iterator: DataIterator, + data_loader: Lazy[DataLoader], trainer: Lazy[TrainerBase], vocabulary: Lazy[Vocabulary] = None, datasets_for_vocab_creation: List[str] = None, validation_dataset_reader: DatasetReader = None, validation_data_path: str = None, - validation_iterator: DataIterator = None, + validation_data_loader: Lazy[DataLoader] = None, test_data_path: str = None, evaluate_on_test: bool = False, ) -> "TrainModel": @@ -658,27 +656,37 @@ def from_partial_objects( vocabulary_path = os.path.join(serialization_dir, "vocabulary") vocabulary_.save_to_files(vocabulary_path) - iterator.index_with(model_.vocab) - validation_iterator = validation_iterator or iterator - validation_iterator.index_with(model_.vocab) # it is ok to call this twice + for dataset in datasets.values(): + dataset.index_with(model_.vocab) + + data_loader_ = data_loader.construct(dataset=datasets["train"]) + validation_data_loader = validation_data_loader or data_loader + validation_data = datasets.get("validation") + if validation_data is not None: + validation_data_loader_ = validation_data_loader.construct(dataset=validation_data) + else: + validation_data_loader_ = None + + test_data = datasets.get("test") + if test_data is not None: + test_data_loader = validation_data_loader.construct(dataset=test_data) + else: + test_data_loader = None # We don't need to pass serialization_dir and local_rank here, because they will have been # passed through the trainer by from_params already, because they were keyword arguments to # construct this class in the first place. trainer_ = trainer.construct( model=model_, - iterator=iterator, - train_data=datasets["train"], - validation_iterator=validation_iterator, - validation_data=datasets.get("validation"), + data_loader=data_loader_, + validation_data_loader=validation_data_loader_, ) return cls( serialization_dir=serialization_dir, model=model_, trainer=trainer_, - evaluation_dataset=datasets.get("test"), - evaluation_iterator=validation_iterator, + evaluation_data_loader=test_data_loader, evaluate_on_test=evaluate_on_test, batch_weight_key=batch_weight_key, ) diff --git a/allennlp/tests/commands/train_test.py b/allennlp/tests/commands/train_test.py index e751ad783a5..6f4dea1ea94 100644 --- a/allennlp/tests/commands/train_test.py +++ b/allennlp/tests/commands/train_test.py @@ -35,7 +35,7 @@ def test_train_model(self): "dataset_reader": {"type": "sequence_tagging"}, "train_data_path": SEQUENCE_TAGGING_DATA_PATH, "validation_data_path": SEQUENCE_TAGGING_DATA_PATH, - "iterator": {"type": "basic", "batch_size": 2}, + "data_loader": {"batch_size": 2}, "trainer": {"num_epochs": 2, "optimizer": "adam"}, } ) @@ -97,7 +97,7 @@ def test_train_model_distributed(self): "dataset_reader": {"type": "sequence_tagging"}, "train_data_path": SEQUENCE_TAGGING_DATA_PATH, "validation_data_path": SEQUENCE_TAGGING_DATA_PATH, - "iterator": {"type": "basic", "batch_size": 2}, + "data_loader": {"batch_size": 2}, "trainer": {"num_epochs": 2, "optimizer": "adam"}, "distributed": {"cuda_devices": [0, 1]}, } @@ -137,7 +137,7 @@ def test_train_model_distributed_with_sharded_reader(self): }, "train_data_path": SEQUENCE_TAGGING_SHARDS_PATH, "validation_data_path": SEQUENCE_TAGGING_SHARDS_PATH, - "iterator": {"type": "basic", "batch_size": 2}, + "data_loader": {"batch_size": 2}, "trainer": {"num_epochs": 2, "optimizer": "adam"}, "distributed": {"cuda_devices": [0, 1]}, } @@ -217,7 +217,7 @@ def test_distributed_raises_error_with_no_gpus(self): "dataset_reader": {"type": "sequence_tagging"}, "train_data_path": SEQUENCE_TAGGING_DATA_PATH, "validation_data_path": SEQUENCE_TAGGING_DATA_PATH, - "iterator": {"type": "basic", "batch_size": 2}, + "data_loader": {"batch_size": 2}, "trainer": {"num_epochs": 2, "optimizer": "adam"}, "distributed": {}, } @@ -241,7 +241,7 @@ def test_train_saves_all_keys_in_config(self): "dataset_reader": {"type": "sequence_tagging"}, "train_data_path": SEQUENCE_TAGGING_DATA_PATH, "validation_data_path": SEQUENCE_TAGGING_DATA_PATH, - "iterator": {"type": "basic", "batch_size": 2}, + "data_loader": {"batch_size": 2}, "trainer": {"num_epochs": 2, "optimizer": "adam"}, } ) @@ -270,7 +270,7 @@ def test_error_is_throw_when_cuda_device_is_not_available(self): "dataset_reader": {"type": "sequence_tagging"}, "train_data_path": "allennlp/tests/fixtures/data/sequence_tagging.tsv", "validation_data_path": "allennlp/tests/fixtures/data/sequence_tagging.tsv", - "iterator": {"type": "basic", "batch_size": 2}, + "data_loader": {"batch_size": 2}, "trainer": { "num_epochs": 2, "cuda_device": torch.cuda.device_count(), @@ -297,7 +297,7 @@ def test_train_with_test_set(self): "test_data_path": SEQUENCE_TAGGING_DATA_PATH, "validation_data_path": SEQUENCE_TAGGING_DATA_PATH, "evaluate_on_test": True, - "iterator": {"type": "basic", "batch_size": 2}, + "data_loader": {"batch_size": 2}, "trainer": {"num_epochs": 2, "optimizer": "adam"}, } ) @@ -382,7 +382,7 @@ def test_train_model(self): "dataset_reader": {"type": "lazy-test"}, "train_data_path": SEQUENCE_TAGGING_DATA_PATH, "validation_data_path": SEQUENCE_TAGGING_DATA_PATH, - "iterator": {"type": "basic", "batch_size": 2}, + "data_loader": {"batch_size": 2}, "trainer": {"num_epochs": 2, "optimizer": "adam"}, } ) @@ -404,7 +404,7 @@ def test_train_with_test_set(self): "test_data_path": SEQUENCE_TAGGING_DATA_PATH, "validation_data_path": SEQUENCE_TAGGING_DATA_PATH, "evaluate_on_test": True, - "iterator": {"type": "basic", "batch_size": 2}, + "data_loader": {"batch_size": 2}, "trainer": {"num_epochs": 2, "optimizer": "adam"}, } ) @@ -424,7 +424,7 @@ def test_train_nograd_regex(self): "dataset_reader": {"type": "sequence_tagging"}, "train_data_path": SEQUENCE_TAGGING_DATA_PATH, "validation_data_path": SEQUENCE_TAGGING_DATA_PATH, - "iterator": {"type": "basic", "batch_size": 2}, + "data_loader": {"batch_size": 2}, "trainer": {"num_epochs": 2, "optimizer": "adam"}, } ) @@ -466,7 +466,7 @@ def setUp(self): "dataset_reader": {"type": "sequence_tagging"}, "train_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), "validation_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), - "iterator": {"type": "basic", "batch_size": 2}, + "data_loader": {"batch_size": 2}, "trainer": {"num_epochs": 2, "optimizer": "adam"}, } ) diff --git a/allennlp/tests/training/trainer_test.py b/allennlp/tests/training/trainer_test.py index 46fdf2d3238..788bbab359c 100644 --- a/allennlp/tests/training/trainer_test.py +++ b/allennlp/tests/training/trainer_test.py @@ -44,9 +44,7 @@ def setUp(self): self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params) self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01, momentum=0.9) self.data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collocate) - self.validation_data_loader = DataLoader( - self.instances, batch_size=2, collate_fn=allennlp_collocate - ) + self.validation_data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collocate) self.instances.index_with(vocab) def test_trainer_can_run(self): @@ -104,7 +102,9 @@ def test_trainer_can_run_exponential_moving_average(self): @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device registered.") def test_trainer_can_run_cuda(self): self.model.cuda() - trainer = Trainer(self.model, self.optimizer, self.data_loader, num_epochs=2, cuda_device=0) + trainer = Trainer( + self.model, self.optimizer, self.data_loader, num_epochs=2, cuda_device=0 + ) metrics = trainer.train() assert "peak_cpu_memory_MB" in metrics assert isinstance(metrics["peak_cpu_memory_MB"], float) @@ -118,7 +118,11 @@ def test_passing_trainer_multiple_gpus_raises_error(self): with pytest.raises(ConfigurationError): Trainer( - self.model, self.optimizer, self.data_loader, num_epochs=2, cuda_device=[0, 1], + self.model, + self.optimizer, + self.data_loader, + num_epochs=2, + cuda_device=[0, 1], ) def test_trainer_can_resume_training(self): diff --git a/allennlp/training/util.py b/allennlp/training/util.py index 503fb95e050..f1880fb1c61 100644 --- a/allennlp/training/util.py +++ b/allennlp/training/util.py @@ -9,6 +9,7 @@ import torch import torch.distributed as dist +from torch.utils.data import DataLoader from allennlp.common.checks import check_for_gpu, ConfigurationError from allennlp.common.params import Params @@ -16,7 +17,6 @@ from allennlp.data import Instance, Vocabulary from allennlp.data.batch import Batch from allennlp.data.dataset_readers import DatasetReader -from allennlp.data.iterators import DataIterator from allennlp.models.archival import CONFIG_NAME from allennlp.models.model import Model from allennlp.nn import util as nn_util @@ -356,8 +356,7 @@ def get_metrics( def evaluate( model: Model, - instances: Iterable[Instance], - data_iterator: DataIterator, + data_loader: DataLoader, cuda_device: int, batch_weight_key: str, ) -> Dict[str, Any]: @@ -365,9 +364,9 @@ def evaluate( with torch.no_grad(): model.eval() - iterator = data_iterator(instances, num_epochs=1, shuffle=False) + iterator = iter(data_loader) logger.info("Iterating over dataset") - generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) + generator_tqdm = Tqdm.tqdm(iterator, total=len(data_loader)) # Number of batches in instances. batch_count = 0 From a23f47a04588a5f49a97ac4628ad49164b3279f7 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Thu, 20 Feb 2020 14:13:22 -0800 Subject: [PATCH 14/52] lazily construct samplers, index lazy datasets --- .../data/dataset_readers/dataset_reader.py | 16 +++++++-- allennlp/data/samplers/__init__.py | 34 ++++++++++++------- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/allennlp/data/dataset_readers/dataset_reader.py b/allennlp/data/dataset_readers/dataset_reader.py index a117a2edbb5..c61442cc90d 100644 --- a/allennlp/data/dataset_readers/dataset_reader.py +++ b/allennlp/data/dataset_readers/dataset_reader.py @@ -59,13 +59,19 @@ def __iter__(self) -> Iterator[Instance]: if self.cache_file is not None and os.path.exists(self.cache_file): with open(self.cache_file) as data_file: for line in data_file: - yield self.deserialize(line) + instance = self.deserialize(line) + if self.vocab is not None: + instance.index_fields(self.vocab) + yield instance + # Case 2: Need to cache instances elif self.cache_file is not None: with open(self.cache_file, "w") as data_file: for instance in self.instance_generator(): data_file.write(self.serialize(instance)) data_file.write("\n") + if self.vocab is not None: + instance.index_fields(self.vocab) yield instance # Case 3: No cache else: @@ -74,11 +80,17 @@ def __iter__(self) -> Iterator[Instance]: raise ConfigurationError( "For a lazy dataset reader, _read() must return a generator" ) - yield from instances + for instance in instances: + if self.vocab is not None: + instance.index_fields(self.vocab) + yield instance def index_with(self, vocab: Vocabulary): self.vocab = vocab + def __len__(self): + return 1 + class DatasetReader(Registrable): """ diff --git a/allennlp/data/samplers/__init__.py b/allennlp/data/samplers/__init__.py index e488bb21e1f..f0db52e3bc8 100644 --- a/allennlp/data/samplers/__init__.py +++ b/allennlp/data/samplers/__init__.py @@ -5,6 +5,7 @@ from allennlp.common.registrable import Registrable from allennlp.common.util import add_noise_to_dict_values, lazy_groups_of +from allennlp.common.lazy import Lazy from allennlp.data.batch import Batch as AllennlpBatch from allennlp.data.instance import Instance from allennlp.data.vocabulary import Vocabulary @@ -30,7 +31,7 @@ def __iter__(self) -> Iterable[List[int]]: @Sampler.register("sequential") class SequentialSampler(Sampler, data.SequentialSampler): - def __init__(self, data_source: data.Dataset): + def __init__(self, data_source: data.Dataset, **kwargs): super().__init__(data_source) @@ -47,7 +48,7 @@ class RandomSampler(Sampler, data.RandomSampler): """ def __init__( - self, data_source: data.Dataset, replacement: bool = False, num_samples: int = None + self, data_source: data.Dataset, replacement: bool = False, num_samples: int = None, **kwargs ): super().__init__(data_source, replacement, num_samples) @@ -60,7 +61,7 @@ class SubsetRandomSampler(Sampler, data.SubsetRandomSampler): indices (sequence): a sequence of indices """ - def __init__(self, indices: List[int]): + def __init__(self, indices: List[int], **kwargs): super().__init__(indices) @@ -82,7 +83,7 @@ class WeightedRandomSampler(Sampler, data.WeightedRandomSampler): [0, 1, 4, 3, 2] """ - def __init__(self, weights: List[float], num_samples: int, replacement: bool = True): + def __init__(self, weights: List[float], num_samples: int, replacement: bool = True, **kwargs): super().__init__(weights, num_samples, replacement) @@ -103,7 +104,7 @@ class BasicBatchSampler(BatchSampler, data.BatchSampler): [[0, 1, 2], [3, 4, 5], [6, 7, 8]] """ - def __init__(self, sampler: Sampler, batch_size: int, drop_last: bool): + def __init__(self, sampler: Sampler, batch_size: int, drop_last: bool, **kwargs): super().__init__(sampler, batch_size, drop_last) @@ -111,7 +112,7 @@ def __init__(self, sampler: Sampler, batch_size: int, drop_last: bool): class BatchInstanceSampler(BatchSampler): def __init__( self, - data: data.Dataset, + data_source: data.Dataset, batch_size: int, sorting_keys: List[Tuple[str, str]] = None, padding_noise: float = 0.1, @@ -121,7 +122,7 @@ def __init__( self._sorting_keys = sorting_keys self._padding_noise = padding_noise self._batch_size = batch_size - self.data = data + self.data_source = data_source def _argsort_by_padding(self, instances: List[Instance]) -> List[int]: """ @@ -159,7 +160,7 @@ def _argsort_by_padding(self, instances: List[Instance]) -> List[int]: def __iter__(self) -> Iterable[List[int]]: - indices = self._argsort_by_padding(self.data) + indices = self._argsort_by_padding(self.data_source) for group in lazy_groups_of(indices, self._batch_size): yield list(group) @@ -195,8 +196,8 @@ def __init__( dataset: data.Dataset, batch_size: int = 1, shuffle: bool = False, - sampler: Sampler = None, - batch_sampler: BatchSampler = None, + sampler: Lazy[Sampler] = None, + batch_sampler: Lazy[BatchSampler] = None, num_workers: int = 0, collate_fn=None, pin_memory: bool = False, @@ -207,12 +208,21 @@ def __init__( ): collate_fn = allennlp_collocate + if batch_sampler is not None: + batch_sampler_ = batch_sampler.construct(dataset=dataset) + else: + batch_sampler_ = None + if sampler is not None: + sampler_ = sampler.construct(dataset=dataset) + else: + sampler_ = None + super().__init__( dataset=dataset, batch_size=batch_size, shuffle=shuffle, - sampler=sampler, - batch_sampler=batch_sampler, + sampler=sampler_, + batch_sampler=batch_sampler_, num_workers=num_workers, collate_fn=collate_fn, pin_memory=pin_memory, From ebf3854f563fb2c607c957c3e1797ff24c03ccdf Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Thu, 20 Feb 2020 15:03:09 -0800 Subject: [PATCH 15/52] update some fixtures --- .../basic_classifier/serialization/best.th | Bin 19212 -> 19212 bytes .../serialization/model.tar.gz | Bin 19005 -> 19026 bytes .../decomposable_attention/experiment.json | 10 ++++++---- .../serialization/best.th | Bin 34139 -> 34139 bytes .../serialization/model.tar.gz | Bin 31586 -> 31578 bytes .../experiment.json | 12 +++++++----- .../serialization/best.th | Bin 9270 -> 9270 bytes .../serialization/model.tar.gz | Bin 8933 -> 8912 bytes 8 files changed, 13 insertions(+), 9 deletions(-) diff --git a/allennlp/tests/fixtures/basic_classifier/serialization/best.th b/allennlp/tests/fixtures/basic_classifier/serialization/best.th index 3a5c482c9723139145cb46ae3b41ce45b7ede410..a55f032fba1145b6cb8f5015523020ddc1f6d0c1 100644 GIT binary patch literal 19212 zcma&N30O{1*YMvwPnwJ7Ii%8jpS2A|DT$O)Xim}KZbXAfg-98jWlG9ahN!bP5i$>D zC{w5?BuR$CcYD0gd%f@Tf1dCE-Pcvub@o|j|IXTH@3r?{YwZkyO%hR~r8B>4FRPlX zW|kq~-YdxapT^}g_Lbw~<699In-Cik8x@cg78f5G8^h)23HVC$?nFfSZUVl7ysOBV1g?mikgw36 zI$~p%az)*Q5)*mM5FREfEF^(PiI?z*3k{143w4SNNs!?3nRs&fjeRY6PXr`{txO0A zj|__n4G4=44h!XpnI*(74~zNl%Yc7e#+xOFMJ|m<;0lB!#Kwh0m<1#zL`KC+1jK~} zCr0v~P6&&MkBtjRvXtNo8XNnH{&`WJWYE$uuF&5{BxXj%1|`h@TPVR5_Lb$ySXh}` zTiIFJn%i62S=*R%MgE~w$cjX+sJo}woCL1eJU2eK2W}GCTyZxEH~ws{#Kwp}3P>h! zr98RPz6O7^h!5jh^8R?O7ZUqNgMjGR(8Q>)|9a103@gDE}9zkib>+h3y2C@#TyxB=Kt(&X8!?YVgKi& zP2#oTRDPaT=6_g|qAH^L0f;b6W#*+)aA@Uy(yn(<&OyMDn zd5Ech5CU$B+1zP7wRnAE`d^G=0@ultJHuD~KVtr&$ISl#F}I#G~;rbWgmcX6u$#qXmMPc8qWUvihEaRdI|8v8G|{-w)u1Jk%cel0jE*@y~I}=GIZkkb>h8|cwSFvHw1aZ3i;SCU@PE;_*1{+$IQ>D~F*r;Pc7mw8? zefE{mr1b#;Yy!#L#r=?O;6jAInv+(wXxy_Z9>fY1=mE*u^z2U+dMf%iduiiLI(OV@ zP`Idv)k67L?v_H1J8P2f8w_E;bszKW{T|@lQlj_#_hT@9jl;%{IL*S7^hhR<`p6LS z@dzKBlDL3(C#4d%yovb5K$9LYi=r3I9LOTOZ0rh%gsg_81Z3uuzTr3;LT{nIkqpfW zQY5k*Ju-XEO8ayTIeM#RHWl6f35bpBHxgwjZ|a+*A*j7kyN-ca&DH__p`cnG~D-wKZ} z8DT3kne2}$W6CBe)1NVN)UQC522Bf~moBxVO>hgEU0g*h&Gm?+uMs`q>V>BlaIj~< znnWy)p|xi(z_o-<%rck(8fqN6?$bD;UVNLCEsP^8A1oz_+rKl5^X}j)l~_9ZUW%zL zoq?YQLzs)d_Q4plvFw~RZMZ4ZoraW-qw@}~XCBUDA?;WMGx>}yoVc1we-DH(UZQd| zcGQ48Dh>k6RWaoEE`2KY+n*e3T0vd2F2mYEK00yNNe9`x@1XuXgT0Iw@msx|ya+wb zN!hdxtYjCXfK(#uYPBDiCRhWz!JF>3$i=EwcPctGj0OFdkg4M*0hPOO0Em`C&MzE?NSu3ES{Vj108* zErqPPCQykoc+$EP9DKJihq#w8Pt1>8IwXbM_E0(_=PN@!yxjLBxy=gB-243{LQ&Q77zE_joUHzxG@-9Q>1 z(vL5i9f0|94O*O+&~y7z$ePJ=^q%`Nl6B@hmh^hyp_mL}9HU2c-w{_?vl_+Ys z$$_3S5~Ha>ex#fE$r`5-DA!Dd<)=#Ud*}pubiDwq4mb?&f6t{W1)8zgbtzdkGLKrT zm7$R8MB+F90ru*C#Nn`3l<+d3*w_np7MAp2?t1t%GU_nXFo|wBn?WCN){uBd7W; zFxJ}{16f`2Hu4-yspqw2@F|G-y$hwzNx=4kA-2BUl&w#Wq)jg@$p)+CB;m$ESSdchtz+5Lc3-u@{~O>av9slt_!>LL$&0Lj`v%r^X;b z^&_W}&4(Symg(NK>C7g`n0o*|ou|0c0T)&)%<)=YbBOxY9%2c9W7MIyhqpBXy(7hm@?o>@9((M!u ztrVu!Z|mV^Hw(@&Z*kHid(v<$l|I^v@H0G}y?Om3R)s~9N0Oh}E#t!mxL96myHKNh05w)4S)t;=%Y6Sl#~lsPA;KH|^GaNyb^y@&zDw4=)O()i=h<;biNVQQnbMZhD z3=2GFoVNlA8T28J6$P-e*_)c}^`X7W5;SS)3O38xiCRXPV)+VBdf-znll`{6@sQFA zvg)7@Sv0bkq-jj1GJVpVV;=8d*QyCPGS7z!3u=&aHj-4sMu-eGtfu8Dva}^of>xi5 zrt{slp}~kL?H7B@ZVr_ueXnIme1Qz4kp;vxy#xbIQizJW3FpRbc~JE-CEaR$%&$Ck z+_-ulsx14=^jw@vOgn$#_?dd-!H;Nqow1=)|FH_><6BjA4+k=X;dtqLv@@DS>bCpQ zPS=?v>75A8X=z1m=P0s!#0>6q#$s)hGhshi5sQ_Jm?xGtRL^MzsAPY|+7KsFB=(X? zzLLY(DMyf~9i5=OK7w?gn@`6t@E|cqI&t<(0kW=Do?NLmp*{!Z(G}Y?iG!3qdG#fN zqGK}sknKgR?E=WeCMoi^O^md))N@cwlAIJ> zx856eUsa)ZT0)4k8AGMl@)O%&hNiz5bx60AqjSnk=`zz!)^&0Y&RTXE&Rq$Ho8FVi z$;T8|+wqfiX_3@Z>~BMi8q44-(IH6Ezm2N+i}@0wN@cjWz}&irJtKDzv^S@-daBSxnf@R7Kb8!*(Ohz;-xwk>KzdH%8V$jJfDrj&fB zF8%=x)=ywja1#?>s=~f5+1)51x)m1IK7|gC=^(Q?34Nqw$*hBVXkIi3Ew8*W?#CcD zZ8xE7met@PO*uL%T99@I)}m$cIZmil4Nm>}7@yQ>k*#^MWb6}pvRvGWwSRdL?0-&# z6I(J_EAcCwmYt2DX*L8}WvAd-_%Xa^o!fXs+JM|wpv>)@wRj|>2m`MkW~@FckQQ>A zv;6mSP~fc(G9|{+c+w05YGX;xgML^uS%hdU&P2~!YG7jj99mAULIIN$oOAXN*4O93 zWB)Q%>MlP?`w6(DNs;a6^FWucq6AaZ@IE^eE|`fj2gGfd_x^)y`|{tYS=ojjRT{+X z)hTB8m|}_j&fNqSw+k?(O_}WZ6vU}g$Y)0#$C7G8 zJ6LjdEyN!kORoB;(!2ZQ$+XN&JRJ9)&3re2vk&b-Ha!verPS%V)rpYdHppDbkR?TA zD@^de1aaPXFqr=-EO_0_o5Oa2)))o4p-_VC@sguOiE_ktQ!foCDul{RE1FxGPJjL1V# zviAocbG4$LHT~KN;pK^-Wi<@X54>cD3ysOl)vZ|lND*FrQD?&*^~2*zIl`Ks1J~0D zto5=oCfJXkwm*}jqNi^`=FpzTf`Z-n`FuY*R33)&*V|!%-39m^Cq#&^KikzUORE=$ zu{R|1S>Xa5Qhl+hF}>K3TChrVONtKZx~xYy3k8VK=`YZ6{{T3jkRw~QLr`(i1GLi+ zg4MClI6jtNAV)@*=zdV47kYc}lyMZU_RPW4bCqes`;!=*{hgycEJXDfCx(Wp{>CI#KB`bpB-}PznL`BkPWdtrx z0br^uNe9=;Qp0c&T7625{#+)*Xy5;WGdzDFpS~njP`r;F>5nj?d?N6-9cM1@Qz7EQ zV@UbUMmFD$g>k!AL2f}Sh@Q#C3VkV3oIC)|wOtI~wUz9n`$kkcunwfMwnO+K394~# z44LsA@ZLCK^5l{s#Kw!!Bq4noEjABS>Q%`=p`L^73whERRt~w&V`0~Ue$;g=gJ8x6 z%sW1!Y(p_9`|}e|?ryx#tLx$FuW0V}6WM(=nBH@dHSX7@uPevU9R<21`Rq5iFtrlW z9gT?3)^n`cZYi?z@hJ4yw&8&lH$ljK1bj5^;g`E{ppIJPNaq}ccSkwnT&-{*HI7}p zMw*Uj5OC+0qRSgb@%Oka+`2`b7FN&1qO*J0LXfAW$;VNoYb%a=MxnXDVZ3=Q6H`Uo zQNHCGc&*W38@nhp-4~;L@Dr-vHiDZr;uNtp@Mh&_4C-x0my7e+t7jhI*PkPJ)afC( zONo(6sV%svn;&OQ<1!~ctI)i10Z2~fr>^Pw*rbtzZSx+%6#HMWV#6a8o1O$Ev;^qt zZj5!<&We;@W3oRDBIoO2Caa_tt{$#{tH<^u-|YvG=l=wzebS=#{S|20{S@x}{DME$ zp29j^p1mB&g#)hUjIM4NjU_2Z^8t}5=F*Hyf=-P@4{*M~wh*`W!K zXH`N{%?((tdJ_8FRq2c23l2lU*T8gL7S`R|16kgkkhXO{ynU+Atdib}eslJq*25|I zL(2oM?a`wW7e25nPCNrc7Xh*vAjg=UMJCoore<2e#Gn=KI>1^w;Aub;|ce`Yo+ zWC#%7$$q>Npy= zoQ72&)M)4UNx1lyEWPY_!Ou>71Py0PN5EBG(AvaMH#G{qCrMh zsL2j3?xWoy(FG2z){2S3|qczm-OdH&!dM#{c}g>6Gn znfea9_wv)YzIkY(e3)6c^CoM!RFMj-i{ymkM7X>0(K#6#E&6JW5|y513eT;MgVyyW zc&=ZP#tlnQneuoxp11b)u*&4*?OlPhFRFou`dW6z=zGZfa2Lz{3>q>ArQjTzo;cX(=eCIf-W=z=uZ|Ti;UlY)=b}il zIb$X97Op6~LRMWF%DwBb`Zz+t_hp=u;#=Th)(BcOJY+6hQ>8oWP1%fcLr6#rV`UAc z!AGJ1L_|`VHxGE8PjwA&`QZY;wzWXoyWhCB*tsSet-cXr}gxi6@SuZ$N6v;BHpP~G^EVx^_9k@r*Fi2|+T&9xL z&{7|q2i~J_LJK@{et=yY-hoAW50*HsWOt2*z-t9#V)0RgCKqNx_%~r{@W~r|RyD&W zV#7`_s|6*#OzQKC(W0H!#ye z#EHX-O(@@;3%(Mr_;K5QIIv8HRx*!aXqh5e_r(OG;%_iTB5#m1qTNv~ z#yO`Nu36*&6@S9sm)`+-`ls3G%ui5ywgi_Ho`&M3QpB_MD;|8!pz2j|FtVQlYvWYO z;+*@qG9i=MHe^V@?c4$RNA}wp)r*mgg*(A#&Lv2=FNJT9{$!Xm5jrh+5I&7NijQl$ z(7#uLKAm}kF`9WDX6jfm?rpqvexNR`Ua|nTtel94`_$+V&)++Ffi2ht_<(&$E06{` zrtQ2P>kApO>YOsY#_I#d8rvb(E}xm{Bhy&*=_f~6dMl)>i_jg{b%@=HWTwzd znmjU*AuiMA!JS(aCnT3aMdTTf&S++w27lp%_fH^4E|2}Cei!}K2T-71k5t`AKz71Y z*y>-7p*Ie&u?^p_;%zf%jcC$cAvfXc^Gl!;t3vrYbFe(0H`)9zV>Ou z^^?u)2yg$`{K*~j^CH0D#yU*bu0ie=Wx8d}X?9f=1@}1*@j`l_GsTRUW2Gbm5++-Aro1Bc?^52F2>f!c74R+kZ$>RoRVr%KS0AW0ovU zIX4--;@_X!d;S>SOjRRpk6Xdv&J1Q!(EwbTT?G5HK7-lUHjM6X!Z9CnIZYNJjDg60 zTpX@JcW!AJ9dmJSK6`UGn18K|14Kr=bL_~X=Nl-fEB9nH@`qty}k za;?zcWjFlL5F>lC1@ZEx3XbpgSt$Nim7Xrpq#i0o4PiePL1vC5QJ?;VX=?icNADhm z@LjUdu-}WFZX^zaU6uI#vJtiHQJ|vJ-{GVUYNW?PmMYoQLg~XK%=+ES3d{p$#m;ri z$m|BVHBOvnnGE5LP(eoTxd$rLg}_r0WqN#~I$HPdg~#WW=_h*~LO&SNqR~mLV9+yG z#8sDst*^qq!6Q(-qzo&&I84u%4$xkD8pV`)aP~<#+S{`V=JcgwYjzz7)-J&}N}8l_ zNf0bImm-N-lhFCfJ*E^-WAca?IW2wz65bV}!2YLb;Vei*epa$NwtO_tRuSqfhGElk z-v%Q|XH+b5M9UXduu;m8R25~R*N`}s-CT%HZ_>cdDj$@c)v3?tZ;dnV)MIR~4jsSd zC+2$0fH%j^H&%us8}xkvR%c(piwWwm!}ug_j@`pqIxv|HdY%H3fqUT4@7s8rx3+6f zdVq>dksZ?wSwN%g{-UoIZVr3 z$6CzUg2&&#fRs3Abo8ua!!8Y@>SRHBsY#4(uF+uT=;UGgi?28(N)ne;wlv-y6=x+D z`{0rK-Kg&&NpC+mz>J+Um5COYBddZ%q2i%6HU>4|sh~<`?@cW%{9;07A1hPULj_D) zzdvfYeq$5--m~+9l&MXn77Pw*klr6!#42;Z!HbiLNA7$C(|LNJd&0DJ$9dZUtfGbUCIvwO5OX?E8p z%t{r-qC;mvc(pY7b*Bs%S$7QHIE`6Qc@bN;C!_4=Pq1e3Gx-UW5C8Wxy?Wb>6x>1Tz-xL;Gvj zF#d)b5mCGk3(oX_j)^c`Ha#D96|`gM3kf=rt4@a&uZQr=uS`PwCe&6oAU(H}Agf58 z^mH%A&3(CS+#Gc>&G{Yfo6>oLMs~UxX0-I6 zesDHi9CHFJ1}fQMg%;%ZN{6e3z2Lne0ezkrP+8e3h=6`5*s%^@?oubBGT*RHpc*!f zy$7r2v@&z^cd#RAb3v1t0@@3%vO<~;c=EIaU7q%&@v`_@IQ?-95i%%eBmMf(-*q8a zw%&n-yS9O)vm%ynzJ@1Y6dcE{#+Ym3#PD)8RIOJaD}oeh2CGH(eapvH-sa%#Y)W&T zE12} z;^ObB#K1laOt;T#_|O{4L=EwgnphFiW%mIO4;)8#2Y0aFat_l)gYaI68dl6|Yy9z= z4?PdQK-oz$q#)rFjtGd*MeCK&tML)sopuq8k9`A6r4Z(F;B`>36sPZ%R3TOF06beE zNyeNvrd^MkcymV}7<&7oL6;|r#cTwf+xqO5sgvQ+sd_d)`v9sOeEs_^ngB3B&)?}x5u?|b>UqVInDOkBT7+oigVIM3P2a^ONaYq%z`UZAt&uz9us0_5keqr|~5vnSzN5nB5 zQ|2y#wpqWiP)C)z%&vj0{epzu9|cE>w>Ag|-N5l4rJQV|5;T2Sf>m!=IAkP9n{^dv zz^#GCNhaE4{ZU1Dw#0}u*#E|0-gpT+Gr%@VsnQnzF(h?k4$o&7gskku`QW=xRtONIU_)g$i`WDG6G*w2E^UH)ETe8aXlXJZPNB zfic79nUS;m8)a|&L@g&(D)O-r&e!~6_;mz{_N%!lZJrJj1lwWCdm%zkzlM+YpJ2?| zBD`0j29gKF=$OjuFtl-iJz6IQX?$Y%)lr!G7UkmGLlwNZpablRWMMAnA*}LTgEtqL z(%7|^u)|vr$L49mp3hv=EEx-P3$t+dP9YMfa~zHIyI{SP3Q?>XOLt6iL*3Sm5VQ3Z z>lxY&GLh%87cR3?a^%R7?w=5;C(Sve6ac{!g4w*w7ns-0W8k6D1ZKH_3Q3Nc4XMSW zj8w%BxNyRTIpK4#agl`^s3wi@Vsb^8JI0utdzgu;vqObei%}7b_T`Y@V;u>MCzXUm0a+0M}Kfr*zJSJQ_ z1Y-vT=)$yKFszZLnX@ABS?4Y0_ODo!*)2h*Hq_vzYi1y3sX#y79s{4-t2j#F0B7ZX zg6w4tGNmS;QTGYQ{@@TM{QP}JSVxu)O)Q3C(?p)XcMP%lH5G=E42kLWv)KElkp1S) zPjxTP1RI?b7=KKSo{KvN=k1&^Q|2Cc`j!G89ZO{Is}h-^77$1qOB@G`vG>tTcs)Uz z>Q+k7^;3mlyLUSr=oV^p%0G_PPPy!Q*K~|ZlAu9e2QXbmmWWwyXEyFKrqM|V2i7%V zDMyHeycxzZsv>mhIT_lYQ-TiB{qRswnlcM}FtAIEJ*J>TA81I@Dmg*&t6(*JSfqp9 zYMFR^ktC5c{m87gI1Z6QeK_mHddBxtr~QbSI!!pXnaOVY4YEf%(9`5M=hb5q0%gs( zagRUat3QrWwb+UuYCbRzGK5e>Q;obo%TF#}+>LHL|K`ec}>_Lf}I@Dry zCL_nogXpzA1?#1MuqiQ0#Oy{b7>e2>e{>%#S`>q>2BLIbqaf|CR3VRYzA#hVHnX9k zwXERQT2|%CM@DJiK|K2C3FB}W(Nsl=aEEoN<9kgKZB&QH1!90ih>`(ANxJ^}EWEd7 z4>o8%gIR}F$d5>UST}VierYqpcl0}aZB(IQr>4TGm_YdceKSP%`NMjp$#A2p9BNZ8 zqu$|acFkjLf*Sf{OO!MndYZ~sdu?o-@y3vzGH0>lf;yZyBSDYuz6103L}OczE;wr( z!8@^=@Qj---LhDaI;y^EG!*=Ti8&p3xIz}}QnkoLo-Ypybx7$m6?#Oxo;m5Z7Tl83 z&}``>Oq38I`!c0a++D{gd25nK2c_uN zTzR6@uR*WvQKW@TG@F$EoaM{XrTa-eq%G4Y8j&@yZC+u+aA6TUef3#5yj_}xB)wvs zzslq43zB4}R0fVr*bAP~VvTi+ZnOO$2AteHEa7Wo=aLN#5l<$=z)>!)nR)>oZyd)- z7iCD9<66As@&dnh++d`~iBbMUaS~Wv3Wg>7ptJWRM7(^$t_%@}VoOcB&UGngUd~Iv ztUl=5@`5$J-3$Kp8^I}4i?)m2#ydV^i2AnEuxscFdK$flRuA63?88+iti_0g-~7#7 zJU0hw`WZOHzXVO&QlZx=7jg}i$nAstr2hLYh$xlA7ZC!q4o z->1Gtg-vImT|9rs;(apDgW|pU!4*tU=@YIgC5Q zu$OP_KS|ZU z!EF6_1ecvGLUCb=YwABU7(c?c+-zV(j3lt2VKLUengn$t*=*CxANaMcg{izbf_&G8 z@$|CIIIQZ6$Gbk^{HP@uI$4_VCEP@rvnSzAw;sJcqD&wB-iVX4bV%TnN(a?82Zz&( zW$|`}0F};vi>1Qy7^$XA`{l!M?;>C5HyKNwh=@^gU4<0yE&{HS3e`K^f}3wV!N6Ok zF!ojj(uue5b?{A0Y)-_Bt4-)B?hy!SKF#lt7 ze03(jaV772mI#}$Uwd;f*Uu06`VwJpBnSEZ_Mvv;Q@pdSAJe1-C|%!)mx~o?c;qL% zr^vwjy?MCKa5vucGoX%1-Ed-30UIT|1+T9<%5>zEV_0DsN_>oi_wS#>$QX56zbX}G z#MQz2`rBaFcLd>fJBv0);kZpJ_H9{*YESvG=Jqx?>S0U+zR44vYv(|6u#>fO)TMh< zRpIc9Ou)iQd}XUmd`?C%cgiY2M*TBX>^sxwY}W=+E3?2_?h-qxtj2SM0V2xXu zW5#@R&ruB+5_*7^YgCE#vkv?==!SRFg;+1;WuTnNN9{T8IR3X5@i^81Gl?Eu-Ks_M zb{dj5eFw3UZNUeviR{<-FPLy&j}CFC;$i+XD1ZG1+<3OEu`c=sv(m*6E)@-H!OJJKXzX-KNBMkDws`Lz zJkE>n-#*j^BJ~QmBe4K0KR$$>sqZl1$|BV2$N+<99Wa#L1app$ht~lY;Ducy&ljEo z=3RsMdEGDutx%*LT!dLR(lDRrU*%l7z{$&~1;IVl=qg@`N2`{xcis8W^RhVS)Sw}8nrw-x)wQ1Z-$%>R50m*g(w#TF<|UT;b78kV8)Hcco|w4fDH6G;0mA*ypjiJl71Mm!n;9KW`o@K`lJ;bPUaT6hUK_O{H7TN>Xm9H{p-dAgbwZgg9o< zCzB#bdcb9T#`9CjPqZdR7gOP+Mg~%;lZ?U2Y>K|=yIVfb!8PId6N#)I2d zQ_tDjRQ6RMskPuzC3zPbFQ!1=+xn19yH)g|LKFDPB!KqwWwd$KL97roCs!?bK79L87`MY3__-o&lC0Og9-yi;*1ErU|#?N z2U4l7(=&Lzb|a_L{{^xo5tub!oE}}W3+&&=)828WWQv(AeI#rQ4lhjUrq`}CcH3qs z6cwQxeYs>Hv5>vb+ovQ+>62&QCXsiYxzJFMOwPRe1?j6Sh<2$PUU0eA*c&o~hMI(t z!bQf!aPwz;dEc7|R;-2ej5%O9Z$0>5Hl&$Xlqml*UOc?kkM_u?6J+)1iRr44#5}@= zomtGc2otj4_F7aFHljT}H}TQQz1UDL0(0n3DPn?`H$OJ~0czo=u=XgNxwd#<65`b|f_oThHzsol3v$ zH^5IG&UB#`u3FpdvS3BUF~>|WnO3Dj0wra@qrP&51Lf++-5w~B~A)* zW@06GC46<4qH!Aen5e+Z?Q@AGTXue9gC>MGYPy$Ue6cwRR^yH7V@IGzuMHi__n_L6 z81fB0$!3R0B3~Xw%j$w*ZLc3W9QBpycgkRL+I^|3t3H#uDTy5IRV2p->+#Z}u113| z6G?@&BsuYY7@rI|kT`|aWGTOz=Xnl6zoIct#Dl(%9_Fz61CVSLOgE*x!QS#!B!9s$COaBa(OYI@ z{HQ0Eu&*#XIGS9YT+UgwHR?I@W zeNP{VB~7O!(w15*?SzUqY9zUP6-goqbf9Sk337Kr;U{5uEovWJ)bpktq9rI37DE@P z2at|qKIC?-BsucwJnlaJ0Jj@4)b~|AV`7*D6CPZFN*!@}s?Z14$-V*2x^TLyeKA;H z+{>)g1$Mf?A-ufL9Bh`vQ153$aP!Dw>d+;L@^2r)oV%tZG)R>Sjzke37A|No8eH$Q1W=<=Cjs0g~v~2`?zb~cMc7C`i zIu@A+7WB^aFzDUV2l*<5?8s1LwT~r$@GS@O)<&J!`EF%}YRzcq@d$GJxe&68P4QqM zA62~>PSPX*5AV zg23k(a(n+>*1S%P1UVkYTZ2mEf#WqyI8)ACyYB&p7X*>coJFm8xq|uj^U0yG$)x=K z3XIlHB74f5_@#6#U7ip@Pikc1!r29Au&)nKNOXf?Ml;y&pHF6gcA;<2ZD2aC z&7pVd{m_O?qU$n)X zNN}+uIpTdD(=DFhg1uWAxj8&viuY3b&|@||@VXCL>?~<`ei${@y8!8?ZscW`0a@wz z6or%r@rkqn%~v`Dt$SlgNM0>I^`Aoz%jpqol_|J7Oo3D;IWg^!O8Pb@(HYXMaPz@< z8nuBJuzPBZ-nVL*v#%FYsr_=WQf?fvx*|%?Ok7NVX8F*l?_zXE<~3$yVmKq!`k9r!s9fw$j#^ou)6fWbDoRnQ<0j}6Cu8tobEb0GiMDe@=&^Vs6rDYuCcV*T zHywC{yNnl7g%>MGmDgQNcQPYYZ=Ue-z&=9Ct{OI6b_VoCnGo-plW~IdH{6q31fOIU zFmIa$snZe;nXtqi-zg{236*169r2g&_56A8{yd4wI9KAGiT32?-egAbMi{LTn@>dU zOb6>?O_F%y0=5>dhK!Zl;hsY)b7x8h?qPyy)elj6?`t%vbLj<}ns^A9{{TX~lIizu z36h+iOe1*l!IES?8e_=NSaWCkv5dFZ-NYqxtHbF^+bML%hY-^Ec7PWPSwtGO zW>@Vn!n?8yB?7hyWXvHx@-*u?4Tns&TY$i=y#=xBG*>E)4 zk*t;%Byy$KaIBX(jB3V{eRjI^`q{a}wfX@sPw!j<|7mkfS(HLY%aWNW%?GTPTOvyH z@)I4$vT&*PIZQ8mhq*u3L**PVqOrl7@OjRGH9BkQEZZ`ixO_1YY+ps6sVblUTxDSX?`7gXw-i|p^nP_EO#1v0Gf{G5YV0c`Zp89Nq2jU62WFy6H;#(S@3(@D*_F#ot0Rqaec`7^vW-XRU#s6fJ} zEKgHw^y&7rVz_R3kx?HF1eu#dP$uk)ySI*Gn9TunI$t+@EpsH##Z77CVrP1A`4ZfA zRT*Z#Uqg>og%T~k)ugAolXvd28^R8YP>H+~?0tb~;&s85iu~G%WA}EU3O9-I+Od?F zvhuXr9iU6(7h|P8j?`W%MB_j`B9V6-w69Di5t6#3&?uJfE)YfIS}Bryl0&Y=-i3}B z5qd@S2rj%DLN9pCr|p3$bl~?ewmyg_8S@>;wK;BhCOec4s9Do(9j0{Q4GG@arpuTb zJdHhW-vjN$o5Wr|fzD#iBrTPXytcp2tniyp-Q!H@YDqih@Kza!@3zIz4^DLUkPE%H z-ivH#^Pxco^0@uJBIKs&P?rlf(EvV8zku20NLw1@4nZV0G(muwU53j6;kgv<|crBMr z8Mhry+-btB3VZUq#Drz;2N3roq6FF{h``%$%BEiA<@KhJ<@t-CZlfXNJGc*j+#}Rm zrJNI)luEuQ*i&(FOS<;XL}L4m5J|N_DnDogZ>EpRRpJFgg zi)~0*kvgrm9#0pPZH8G=hrnTKJu-(~h=Oec&iB!y=iQBnyxB@}h?`2vJ15aiAt6-b zh81QXH=#?edXr}JXqY=RV7as9Uf1_o=m|fvO741?i^+~+LHcsEyiB00T{TaMW_0u za<2E3gM#&Zd>t1^xBPsBN9If>2jk^w=gVrGdnS;4_`M&%{RaxmGBj1$gybvB!xFD0 zbhU#M_TTm=wib&hBRH3?(D7$>&GR7VB%kA-L;8PyE*jQk%?2}aXT?l%H)8|7yRJm| z#&hw47BDVHT#0~bJl%F|Asny@p{1*w=*)KqVA4!?!q-&G=xsa(J&k6hWzkIF``M0a z%a)O;{D12;+d=5;x(qbm%}*Y5R$x<}KjmMa!RgFUr|;5@>COLBvbn!geZX5vQH?XoB@uG3l#{KF6-n{Tq}fz1WhQIVG_y+5-6KJUnDggfS9(x?%CvSSk4yjcNfC)`;Y^1d#!$b($D z2145^=+mk~#T1)tnwJgeS=u2^ZhA@S!hAZU(nlwniiW-~scD8i?s1c_|MD}sY{*Mo zr47fwdMB9lO_sWas*i-rvO zQ3{E#hMc+({yX(`e$|%(;gUZ=7TZR_CpraHfE8=+xKPQH7?Muk4Uth1JaKvsRwsti zWg!o0_NH;&(*(?3-UHro4NzOwNtIa)dRrW^Br8}bjn5(Z_(M1%|CgkBxuUj-&3U;41PkylfpC^!LwqpIM&bQ6|UG>Svz8!+JAX_WdngLCZnE2PIV zz~Z6<_9vwa$?2g;Y>tUOp)=YuLM24_lsf2uI^}K19@T z*VjG*TZHj!!*t$TtffxRELX$@4=iE{6Sb?Cl*&_xn1~fal3T0ai(4sp6A6xdri`%E6|J-v@8{dNl*Mnr5 z?#3C8L3I2}Q5d;31+TLqNRxL#>qZ|Tr)DEa#|o(Sz7^+m|3m#hy9vEQ5v3FjQ{D}0 zuJlr4rg9W)LtcQvl~g{xGXU@2U&xz%G<;8mD4m|h*KZxB=|fR`XJiQSy_exoPeY6H ze+=q<*Sih-A8%*H;8XF^v^uCwcumKqxfpAoz;*d4_=V+Up519g{rV^9fPEZ#*BNtW z^-A=so5nWnmGoJ06|DPh7qwOfqBv96zno3MnYSv*Ssey9beUhwp;TIUYLGVlF&C|` zJg0&|JKS4;nL3rGI1%lSP14o;V~RN&C1&B#hWqf*(nUPFW;Go)naNhgK{(0v18$iA z5&~?U#c0DdT$XIkA8fbc2Tmavl3xKHvj*r`uLk#ZXYp8%63ndRf?xR*{z59leYdRX in@0}(QE6Pk0Ii60=elM5RV8m=YJ6F2Ut9_vP}N1&-%k5tbg6ogNk)7nQIe zEGlt+)B>KEReExKRMP+Y8TQZ5G^?zrxR}^eSZdV#j5yxa z=}}2($*EzPHWFMxGqXU^KLh1SM#MyMh5lY5;g*mbk?!}mQGzQRD9e)>Z)8)45_ko_wAUJSCQL#XTiF`ImAfmc{;2Kr)>x<;#^0 zH2$MSS`^oY_vOuAWbz*k!V;4gWF$oW*O4Kh#$TXjI#8JCrID5y5)_-uiv7(+XJeuA=v{J6V zCx0o|;4jHAoonRFHU9rW`m319zmgor^B$nJHP`gNNM`>NL)Pm^b)U1fq1y*4RaY<1TsbL8bIXug-vi{F z0iNmH8NOVvi~{b=K=D72#HjR$1rg~H+*$vaowr#aua%GMQ^588$H@QV$oxF{(*l+L zT=6e1h3j9y4T$|mWX%6TCT|U7G)+>whd+Zg>GV;;*Fk|6S6`nmfOM z8~ImO`Tr%mAT>E9IU}9Bpnx0oS4Q>!B*VK?bOAT!f4kGatvqgQ0XOci%%ANeEFn29 zjk~ab8~;yRM%sMdW-<5A2K8_LnE$`G{+BZm_@Ax+Sfk*7YyFQq5c;33|9HQDcfun7 zUG~51PV|3o{a>ac_TO6nv$n;ftbE0ITJk6v@He=B@_|Eh#Hh0|A4$!-247m0unD?h7@Dxu zQk(W2xx#5KUJm_v!{9jo5j-?*V8%!pv$alqtfbd+a5>Tm&J%1PuB8y$IFiIKOA|ep z48XS0C@kn2!0%^GX?NI3Y`Y>ua~_M(mxX80`ssAe?fSjAPo)#9xmrZjOP&Nb%97)< zx$H>uWmsUZ1M9!;W^aED=LGb%!0oC1Fx{>Wn$r(qY=$r!+^bLWRx~n!X^Zek)CNp# z-@u%&S0YCne{$-hp2JJ4`8cOchenp42YEAHveB{^vWO4~u3U!B1Ew$~=m|VHP=ZOE zG&DL?yp*h>xxZZfqT#;wwYWCnKV#w}jE%a}pTt__pU5)si@B|~MVWM{(hJ?ABuw>F(wrIIDbGd5``qZ4jIVB3z zZE`CfZP0?;ZXF^nR}81`hGU|%393&}Cwf6=F#YNfezP+oPG^>Z+_@_JoTx@LBDXmW zs%~SK9nc~jL3U87QwR@lsgch`>h#c8MRK{P0-cC3hKPPe=YTDkA{ql;K;TJcE{n7pw z!Z#?82{Ff^`AP+wnvln&ul$8(eF}8UrmGfp30TL$& zkY)F?*qTTgTHTqzj*F;a<=S*eXWNtJ=?_dO)22k1GKR!9R-ZgB<|7fmM>*!STfK2KJJ(Ft=o*47%lq)SUM>oqT!Iyb>a=4|6E?6MX8vz6 zn*RBjW2LSjOzpYT?DlCHEZnM2G8Fei;7|u!`*;oWs3sIv_hn+cpFDXV!%uE4tbjL~ zim)lW8eE^IVy@Umd~<3d$SqZ)^MCKeyZS3&&n*SwYO)i?W{A@`iF>HJPJ*5_mL^*d zC{x*EA?&KwwPxIk|1@jYZ1jaLFBT)00D) z0aL2HK$+x!)Pc37KAIP4c$k>Pmgie zpgH*1Ze)CKE0f?%d2+zx2Adn)1WPBTz;5%0FzeYe?Efl3d{z#?yZRfMdd*O!GOydAy-9?Gbgsnu&(e6x+m|`DL6wTE`obiX3eaU1>oF##0t=4a zhr6#wfh0Y~8m)!!y=5c#zj=uz`FmN;gBIrb;bFXNP{WMxtAm{MZBSOW7lY|7xG2f7_ZpU+$k+YZ^&N4yO;OCfh(7xxTGE)W~$Q4;#Nnmc~?NUW;sUPuZAS4mmt7b z1FaH5OvRB6Xl_-F>%`pgg8Vd4T&GWevi!WcehlMOgb3)@q2^FA9M{?f$UigWE@iE1UYu&IjO+8x1~HtXW56*AQ5m>DVgsz&y& zi2!ic#93vl@u_(oqw|vs=SCY)q_7@@j<0SM3p)Y#H|W8J8HzA8b`(SAnb0~|I;B{cGm42@4&rRJHOx9m ziot6(GXd@5xZGwXHV^!S-qXME`_$JU7OqILgWrI_I>MCdDudDW3;6i_M|74}!F2|YWK%9=OzegSn=H{X?g)r$O#w+!A26wGVtl1;p^u6q%c@vI zUN}E^a2j#@y%V^m>kMo>q)Fp~?9uv*0!@czX0n0?32M`#S;^t>`>-M%w@49m&YlAG z{Q)>Dcr%>(s!qI1lxc;n5Q#l<18z=GB!xE4FtFewa2?O0-PQ3<7Gj!oMbt9()QY{J z|6(~P9Jq;lXD8uP!5a3<)b%*#sR7Lo|BCI~^@tyLJpL>*XBT^GVX?~Drf{9d`0I!o z$$LJG<|{ry=7WAXzkLvs_6k#LW)jvN+QjJn>}Kx;D^MAZC}*Hi8^|+$j)0>f6fjH}{o)35})4FQ<2u@xOhMQG@?O*np$J63s(#|J_uFzBNKdtp%;Gw*2>GGhsP^G87h|{@*o5DgH$r*E{zl77Rj^~{Z`^Nnk?}(nS`jmi zrF&JNafcauG4MOn(_0DcLmZ}Tk}&n{QG(;goI&EqbIOf$oYngT-kxpthrmjXw4OuSGA0I}=xd zY+eSA_~k)%i!AlXod8B(zu?nZ7lE2S$D-8FuyS)By4+pKI$9;bVK)%9aW{ZRqO zzlqaLhrD1z4h#1rhnpWL)I#o_BRE5;8eSfirr%C_aM~_hX5Nm8#jHuAXePA^z3nff z;NcSZ$nv3u$Y#bSQ;f(at;OoY>%c`i7Jh1vhXo; zYvn737OY|=tkpp@D+G>&2ICKp6fk~L4v#$cgOJ*5jN4NKqeG&^G~SrT7M5eelvmid za}*ER3etUcG8me60-X-VW0Cz=tV$bUuU4uN`M!OO*&IERv3)liJVu8MoG`<%x?XUZ zZw~UGbZH5H4$f>0Wki?lf-adQU?eum&K#_U?A4dp-obuo+`bW~Y&{OIV#J70_XkXQ zGXp0U@q_=lanQFwi@e?S5PyD{<Qhq6UMjVfEC!W>HBgk{!4LL@dw3_eWZ&=O_g- z%f;w|g7;7?bP(f2-eUMW89Jof$5^|a2S!?valP{hbeHJRFw>bZ;yMuxI#lVNg*qh3 z!xVl!jRuA4Yfw1#1t)8UIcS#UqQzY)qOPh=UFwxdn#x+Jjtys~nBR12-)GPDe6+9OI=mOtr-$P%!RFyBQ2Sh+4nA6iUqyw< zgc(=S>8&*Jlwd)|%o-H*AFyM`N|N0zQ_$tE1+zW5w zmR1=$DeVwCcfMi;g|9-zV{rnlJK?*%8lCh_9Hc(JLxb=JM#SU_Bfs$|K7L~e=++F} z5m}m1yckQ>N7#fAdAhB1GA7-g%ZYq?0*^YVlC>Y(;YgbqL#p3^t-wyGU-}L%YCOix zomVk#K!}kKn#7#my&um7Y0{4+9dK>zDQ2~U0jZN{fm0J5u{F&NOxATE-_dh$rDcS% zdL~O&Xf$K3O&PPeRGSU{s7M^#m%~PA9`@V7Z|(n$ILSX7Uj% zGND+s^9zi=jc9f_6$KKOCa_)P9Zs_-!k&5qdc;`)M8+Bux!Sh?n@>Q%Y*o7Wem5?> zeHB--A7Rb&E|??d2)i<;;D!&|;Jc0xk-H&@lh&?wO26)iFNQSe{dPTCG%wic!@d9z zTP8}>-A0(J?Vq5e?jX3${LNI(&}9#P;DgF5hw)6KF;JA$$!d*JNd^%yIz$%J}6gk>|2qe(_DN}N`r9Cv89mH{@fCfD}!X--?T*mcZkbbui+t zLu*UcHTT>)i^oO{Y3We`YB%f;b58QH%N3upp;u<&t>YIlC{qGN-H#wx7dRai4Pc9( z$Agp5en`-~hgZVy!0_BV=pFYI*ZEwBi8`Jru=)ym9O}Tpyeb%TB?QjTxDBGYHzCYn z8i+|55bGyu)K%ILFMQqvf*(w=VZfSE3h~0p4^A{E`|D!nD>ZU1yB}80=m+lp!_AZL zokh3ZoAE*FT_&Zh0bMRuvMsm2IvGuDXVzsD!OPW-(2$tJ*qZHNas#yJRDM(1A-w~} z*EmA{dSP^V*441TN0IJ!INv-tBtw_JkH^DB>acIlQ{*=kB|dY$!U-i&`ZY|C+!?)& zBU|+81YRsLB|!pqG>&02zA50${MQgB{S|Sw9j=(nM@usg;iKh0z|0hyAUUd5;gG;cx6i&v($%=I=JnI2U+bXzVaqK$!|vq zEm`toZXYsdDZ9hE2tNEi&YTFAXD;>_QsV(xa!;%U`Z`z`v*;?Q?lq$xSvMn>f}+;Tl_X=Jshj8fLZ6-7-P{7%x+94YjY+x9Jm`rwLQAP7cy4n6 zTn<&l?+sBnt8zQG@?FN%jjE(0?Ix6`^ud})L3*pU5`vdK!_#Ks^yC;#`uP4*FpL)j z6kLTz91Mw{YauLNs6^bgqtMJ^BYRFni}Y9Yqx_k6ERed5*HU@=mHRhp?uqmxdI}BS6yaU|5 z2-BBn5hC^j-_Fc>9_ie2+Jy!gp3O3HxrthnT&XWNeBLyN=?njQfz{CrN)PDAJBO`lKeUn2nmX z9NR@d;?KI5c;lWdVO~AN)+?%{Yi~I??|RWBeI<_xejz|I&kB(yg^w8T!i&M8{9(Nc zqRNwK99*h^p1EDkoN>c!`lF|KRZ5DqI}PE%u|hNnW@H_1 zK#;d6%`2CMW8OR9nwK=0K5RnM2nF}k!BA7-kB3VG@R3p_{O}ZE)g3LNW9TwlaC-;Z zI6el6p)TgWT?dxb?qnR#PQml;{50s=5A0ObCSPuvP_OMRSXL@V^I|L@(^!`-T5+7M z<9`Al?V!JhB&yI~YPa!#;{j$|{}`wj)FmTB&)7AwcVScE`$9KN`v!g)^*r_QdMXlvlgd6KXZ z+h=dWpwLFh2@s<1qvYufD$1TcqE9x>(t_93#w2+)KaHr}h6_!*S?&Ak^!t5T(q&wN zKeiq~=_zv5@w*}Yv_y$q4Zn#>>v!SB!n3Sa&Kah=h~UafBdVqP7>)@k&@

XzzG` zj>U}C_^4lltgSl_g{CF2%cGt-(>c5OTjeht5mTj_r_Vyc+7Ha{$^2yBw&~b*v>3i6 zKY*wE1j&^Ny`a}R03lB|AaL}df_HW^N%0EoJ=evK*2#i{ge>l_6ruj=)4;-_o@k`ZzhW;#W( zGF$y&!5mF;a$*Y%-;l=j10Bq*&#NJ1_6O`x9md_gSKv1pg@>cksRnpSe9ldMWExe%S?u7JcgeXqZ1^K;mYf#kP|LX zuf9@)B+)RAbK-a~_2MI$tQILNtY%W^JXCj@%+%A1%y>U(%00Ukc6;Qb*?KwhUez7y zjvEuc(9`(h@iLai3(_-&E-SZxL*)K*NyLX`0G->kd zb2$_8(1aeuW_Z#@(bPbMl+OE(ynvr-_ejxAikooHn?5iIm!XmE-ROK$iuLW#rPCR4 zsx?i3oaCm#fVl?d>n_6PmEvTBz)R-J@e^?5)H@6iUd8z7D04PjY0;&X}1!9hu6Mm30dz96|Vvl@3*j9~e9Q98$V zEhtpJfoFmO#QWAJ)=T{9+g5bqtZXC>>ubszh}Z5LZM*ugxFI*2=V^f7nkn=s!^k!-d#qV5OvNJ+&>d^;hT3?%E2Llcm{gA#Y4ouAjvZaw>4`hZvn& zeFHAOjX~jpF@R%^;TeaOs1c_}6H9sV1tY^67kopHj}LL>St;PVphvbutI*3Sx@5eB zIvv<}nUNn;3Z=`6k@L_VgE~b?!E_M}4k=-mxf_#DYYsu8j2@@JZHH6(hvn>>g@+)@ zumJAX*W$Vd#?9JOj^G(90oGJ=F?dXS#I|1?ff5z-4%fYt}2#kga>Mf9(j| zRyqO)FK0F%+L;BtDl(LtVcKj`ISe@k=h)Z>UvSHy8gaZXM&h~}pirh2rpislZyzNo z#O!7k%-17!jZ$>OJvp*%h8A_URiTZ7Wo+5?5mvQIkFGK~3G0gu$mo)za6+%9Y2Ebw ztiX+C_&8pc-jlq`7z*iO|2}DwYE**85nI7qdFPpTC-1SFZTaE!(&f1EO(%PC_O#}` z(@mj>Zy~M!C%_J?VA;@gms8HC^8 z<>-&%24;1s5s9i21*LK~V0d{Uzv}^5sZj{4&XvLHC>5go`Zw%&dKFF`(#GseK03Q& z5c0pv(7aMv63{=w-V9MD@ltMV{{wMy|64NSrV!GUX5xgVYC=TbLxGxYJ_&x3x@4XI zHV&P*9=?5C!FWvjz@EXe?7fsGsOjp5g}J}MsAmS6TIkWo467I zhc{f#VN6pWc*w+q>r`!`Wk1T8IQ_uK;ZI?B`6jS#{f>UkDY)+9Q>=Ouh}#{MsJpy1 zc#JiqQv@f0;Vof0t~?R;1O`Cy8C9ARbqg+qR=_*w4yGnE1z4dXT;25=uO68UraiMT z?D|Wf)8=4llt0@0bYSPgODMeR7kt~)k5W49U6hRV@j$?Z^JKFAiXyUo6r zYQ)Wt*Woqw-%wk5jOmv?h*H(^Asi5OgA?jd1fUDO^;Ha@0y*rqI)2qT^ey;{mydq9N=cyBG*A4L5 zK%E*^-@-K`?YQdXCU{u42j?kVMI(A0rykBn`qGq2m>h(_1$&tVmg?k8@)9tZ6#}o9 zZHDD~N@(MD7aa{Y;_V)X=EbS+Aot#MHvG*>G%oeU;)X;>l$(r=WE+khc!n1hKA}kx zAAQ{N8ZGy$Q1{BOINN(Nd}&;Z`eM~6=VeGw*1v?g9vWI~tl z_H}vR({Rc40n2_cr2D7ILBN=NQ0U!-vn+IofxZW$-na)Oj(vvb`v#gzpdBPMi(#e6 zbMW6PLig{T4D*{JnvGv=f~$QJJYT#T`W_A7mgpq*`+{!F&#cA-^FA1J)0BQWUIMt? zm~1yW#B7kSMAxt&IDD&ynf`*GZq8DMz=KavGgyP<4t3z{!+t2?cBQ#6KL)g?3eey+ zZrJCgLsqUk%{xOfp>hrSBw?8mu{d@RD|TMQR|j%gweAtTYhpkT-FL%Y)h3+RbrIw` zUo^GJG&4Kud_i#0C)CV%j5argsi=_!h?HN)ImbommXck~;XYQ(7gnKZy=5cjDDu(J z7p6F@;)WfDiqt5&0=j4Nlgv3R+VkttarH%@eV`Yt3gyW2W&XhSiPHh4D{RZ#!>oz# zDHO^-*u3$4CT{diV;$3XV@i!Y*_v}7ymx5eiOQ8Ia`YY)2MwZXODv9AUIK?NJp>^{ zc>ZbvnC(3ap1N1rm7j~?Y|uA+wCy{Ny{|$gYg=HgkT6s`>eBTt{7e}fgF9pEaBBB1 zG;=Fvb&A9=@0K!{*Yom*E3U$i9S`B%Z&S<SLfjJ!Kq}~3I}pnvz%#r=12qM7lEJNFPt0XMkaS(XBO>W&76>4 zNY-_}2F2_IvR7g@ZU5m*QoP@wUN#@u;ww)SBupqLW+n|D&>)u{Fl5ESC@SxiLml{i z$)2ciGH^|TjC&N|@D~7r+}8 zO)7adirAhc^w?GbViE5|+eG$o28k5aePc=6htIS7AFjlZRlMBeXei`;vmqD5&S01N zD9E0QqAvV@C!)R5g~shD!C$KciS^eSR4JTGWlWH>Do2Z|E+0dOQtsi}JX^A-Pn0gq z6`?Xi+rZau(BQ8#5uy)^Kn0ag?X04Iq1nU-K%VKBJpYwQs$Veg zggoI|d_zqWW3pm`Jk6SS4JsD+v-?|TlYn`p_~X+&ves0HIBk)ltq!7i%h`l>nEMm4 z%k!8AYrSw_*cqJH$6=eFJ)94|)I2lIj(lJ6gk2~&8H|=ahyKby;x76gtc}DleV!#L zqO|$;<$VmxIR*E1c3?~Ib$p|nhcsD}xQUD-C676Dwe@u1>;LLFKPZRR*!E$=6hB%b zXG$fn7jT|7xllK0eG=`Z3OyF%u*M(&My2+nVD=t}t@ESr=Zn+sV-cXi*9Cgv7ty=1 zjq!C?rMG*`aP?3as;QJga7YmSb!j}=5$%unnI*7mO+4KhJjUqPMgM|Zw_01Ep!BrVQ} zDp(3}CcMfai{?+I=Kc}HD$5#&ZSAPfB5wg1HRwqP+1stz&N|SK=E3GkQ$+IU}$#l&U#AhhLxU$tl@$pyqf8%gxR4 z`ZRee5oAKWhu5(?!j7^pFD6l)LzYBwj{wbjKMU%dEeJPjD;_);OMRb&li>%=OiPIY zbr(t?mHVUN$i11gZnT5(`K(9J$K1iK!{e#QUIxn_9*69J5VD#RMn-POA)mpXX1Sq6 za^SZjRC+HU-XU9IQi3Set3Jy1Z%HQZwo170OA0MA+{96@48ZBJnZPCU|;b@cmz-#zMh@afPoKC_^i?IK} zcrru7gXZ4KA+5%GYMZFRfMl1c!V?$g$t{G~dmP?7OH>-Urzc$&bCfGay0s`@kIX@mCHhU3e76imwCL z4P0WC-hhHfOW}22II-67B#-#dFw<0iu%8YHka4T@XeM~lfekUlVP-z`ZI`8Lt(CCF z#gep4jln0^ED2w2IpdO`iTjiG)9+j0BUFzQ(I8P-0YP7Y;A zv;Wx-8dv9$>|X`6N<;+D_#S|TA(BL5fgMR57fxCG7}A|AMBm6OW6@(?j$vOoi5C&1 zQO|;jt+yI2vRRKlUp9bBdog}dbthZ+lxgg_crsMA0kx+@f%^G8QYRFOOw4C=%djSI z9{xh6(vZxyzKN%JHE7FBJ;>ba!Bjk4k$QKgkuuSKIPL06@9Fts+K4V4SmZ?(RD06- zl|x|e&dWWdgi*8e()8MC4Z3Wl7da-o8*Vob;M=UiCjPTJy0uT9H`OAeoLd1VHftN9k^ktBMkZ82Q--UtTGIfO*XQr{H{r2B3wX3OtD zKL-UW*^@{1OpecdY|F6jx-?3d<#-z2h2SSP@Y2y9mz>oj zf@;&r^`vEZIAj$o;v_;^S{9Oo+rs4c?gS#`qd}&BUIi-i+{pEzgP0~Vhx!d&K+zxL zsJ+=P9P6M2<0jfrL3ayMo-9Z?r*vp)#tBAx%|k|Q#%xmcP@Q--%%wknOvam@hNRiY z2yYyeql@_$(h@gk`fQ;D-4*Xg4rXhT8981gadjbGXC6&ll+K}*FE4&=w;>i~#qelK zAx*#i8E%SdgY3(VQ0b6L&n7NKpM*M2Uyc>~8R(L(TW$E+MUUQk^$fE|U8%e4 zT1?v6$Gm?Vho8qRV5;k865ZxDwx(nO&FT#!3Q<|avwktNgO`IY{wYarm?#s`QU}W2 z#i7MNW-~TwXBq7pApS=ukykP%^uk>}raD@PUcJD3cYDvXkKQhjh~VcWHuj)`V-^|= zOVNO%Tftyv3hgd6A$m=cv?N9yNF^EBq1G$o;1 zCy+Dc>tNjcJo3VE7|tFaOZF{g&~>sN`+k`#UDO&&NP!U<95aeHb_Eep_hrx@ePb>kDTCtu)oi_x302PL#Sw33kkR>8v?BK+XNtKc32JD=WW^wA((gh1?&Xk) zLS;B7L5d#r)~07HHJI`SQ+Qexjm|5gm{4DPqSU5L1W$j(M(Iv0=E~7?P3GkK^)e>D z(}Q|!8&9?{?_k=It&Hr?RGMs3-h9g17FZQ=MrWQHO$yJ1vTL4XOHLNOZan}^U*efb zUrX34)kDqpn`MdC4}QE-ehM9?ZQ#{s9b&S#OQM!uF-8cblYnkDGSlcND|z}Y-s%^i z@0DEXir`8ZO0=fr^$ysiqC zYn)BDj&5OVFdroo(@3C>5uGe*z_Yh1blb(NolCXH5#KmKRXO^6b2(a=uQ_ql>RU22Y@I`9 zUKfK2%RQO#ZlQE!>U`&)|c4336U#}{B)wvev1P6XHK=a}>{`K)kn9TwWSfZCK)diuf- z*whw8{Vofj=i;XjwA_qjp$0uMofk8y<-yQ?b@C!Jh|H^;fzfk#XUF$?;livCI_ab{ zw%*Pn+d>wwP2>2NWv0M+fVCq}2D zX`ZD7xrH})XQyLn@6|rsn=MM$icknDa-kzmc{D9Siomi25=490Ra%l{+^jk@i{aIW z30=Z1G0PZXcQ-g4^M&<#>PBnC#!~H_0p!e2Yf|+q7h5-Ekqe&(*=#c%begJ754?<~ z+omtY;)^TsNAYVk3+RC-;pbqRRR9@UG>uxPW;2fFUbJm>I9jAx)9)tH^cY_g%J=ee zdFTnIXC!H@p&AuU>_g}7HrRODl7=~-WG})ycp)A{ZdgwvJ=vFWd+$9w_45c5ePKIp zUbm2DC{L$v!ajmdmL2`&F`vFU+6cOQ?&O)CIZ?Xx0R1XQ(9pw-{;XnQFe09e)i{jF znx53N(S&SYZjFzcWQojxB-3+m5qYDTOJ7$!0i95Dsv9Xn-`d%u$=aLDdWYFm3Iu@N zrc1JuCFz7u;k2ChOjn+lqLR0J8BL=(m=NSa`W{tb&1xfJ%x%H5ot-$QoCAI9G%zYN zhe%ZFz>`KD6e5&x~I|5S~+5wW<-{HJ7JMe673HSAo}wdeD71vzL;T7``?VA4H7Q2 zR7#pYP1UAP2II-1CKeneenU^0EpJcXi?Zr^m^eq7)Mj~r@;DQ+K}Lsu`Q**yraS}* z#*>u$r{bwnE#_p~G`=Swgb%itWTa|6Po5Hfr9r6c-Zoe`Run8JuSj$ z$7Lbv(Z?lqrXS&OS0uRQ-hg!pY4p6TD7jvdM&mV&iFJYy-J$A2PdQJfo{S zrSdQ#s~RKj*ibG;GRzJxeSWqACg?Ra?-Ab#&*XYwZ?^$C$CNa``}7Hwtfpa;)msdl znhmN6kz}0H1ytE0N|(PK#K-PI}<=-7HT=G$CS*eXEV)9#{v97Bej zPeE1hBIsVx%sejYW~>{f$lXv!&^z`VzVa!PxqCN*{8?+FQ_ze(whX;#-423HBN+PH zk!FUak;kEJ&BxLSF{^)zucp7mWlt+{!;md9kGf!}?F1^A9LA27(xCh`gVr`n5H-aB zy1*-w4oZ)5JRdsX{%{|%bo+aZjoJb$A6#U2^YUEZHd<4~B^R)zL4nG@vLI(x*0N3) zHZaZ?b?CK+3+c(1)2U?1ICwF61+?nAkU@75GLCZr!y^n~%G+e3C!kBMKlqUfqi49S z@ugFC1>mkNg;e>-CdMu23~TvvG5QbPfqvTya6P9Te8%&}y>=ONCwh~f-#%ncb|^3B zSxA2c?7_JA;iQ>llAjkVdB^>_gxi@;jW1rtCzZ{N?*(nzcpwl@+9Z?JchpE?ngfw8 zRRFmfKVtaiEh=AM2^wyD@KB#M&&LVTFq>4i{y-)6tq&zi9#26mw-zeor_h`LHxg1Y z0BKV6*s^!?LJ6>^(g3H#hQBYatl1a>}NZk zl*pXJ`HU>lK9oVc%wJ(We>~}#v=R((oiK8bAD65lQ%d$Xz*O|j25K`3hj)2mwe3R=|B3nZ zh|N2AUF<@H`%LIj-6`~*O%f(bh{Beid34_TShC9`lLUr6#)16j(3mYjrL@Z~3?Q4z6*z<=V;=LVl2sI+5 zl3Xgl%HistGQ4*vAZ{P`1)J&lBtLOBy_e1N83sDcsGJ~e9r1;`-W52_e;lc`*n!jU*$@+P zRnk+OPcDDSg`oT#{4v9xojQ37(4kiJ)dMnGwt&5|C6pLP@N)Q>GGx=zg>>1tW>9or zL^#Y$pufYJg`27oq$bhbTOT{c9a}`a?I+S{)9mOC_momHVC?>|wAky{cknYl@o0k}p zkm7W*MYV_+X!}JqMWPuSJX3-vSG}bUdZ> zxLH^>mR{UCp7zY>gD8Vl%}qk0l)K^bFOXTiHWB*?Kc8bBy3ST1jflJ1xM36nsUzzBS z=aH_+CC4Wk26>n+o%*a)7H71(%0CXoAlYcstu0 zt9?>v_|KUnQ$hcR6ux4~hpK8ZbTLVxAhkxnsFQhCjljlaH% zo&IYN+V;&MYitxqzMlhGA#O?T7Fp2Zao<3zG!+h~%pmSMS@gC*7ZWM1O!E#)f__6i z+PJ8p!<2BCE#^lCJgq>o&XFAVr~>nxpDd0#P`jjfGB}_IE7_aP;m^hK(vy02U~B@p z>Ngd)eHx2GvTw0*LqB`o)thSSe**qOK05W1D(M~dr)~3JLHVu_Dv+y8>fM6Kn!tW& zI5wB$+~lXrw>-ztvH)^HiXkVMXyWP0`=5vySyIDTQ`y|bkf0q&%?0Me0dd{txha!j z)NwW@M;~H$w@G3}UplQC3M1Z=Rmh@<6gGXe39l|Fn%a5&Mn|-xZfAAq`aCcC>VXlh zom)uujfq2%IR{|Nt|CsgiXd^WRp-^a>QO0)Nuaq_05hdbAvWtbJ|2vM_p)+yvSl>Y zJE}w;TJFVJH5yDcYmQapRmiLjt3e>pAEG83(dp|ZkWO1u!riQiJB?39%JLvYgiYWN3I#^lb5>YG``vskKS;`!nY12%Q2qT zwX%@M{|wP_Ivinue%+-(Y&UkWbIa0+vT_VLrv9FJrhN-vDsbud*HfAK_hum1Vi6-K zwH*XkF)U+q2h-X->Ap}M`ocJsv28p9R+lC*gP$h=^C6cm+CPOEsgtKlk>w0DUiI;+k-5- z2#;+)3tqAFi2mLWARx94bp4&ln)5$knNBv@FeaQF?y`kP93lE7E}vv1FNc zJFd|3A(FWrcyz-?wD6cs?){!i1#YfJ@y{A0j>M9|iSZCww+detXo9vzBD`E>0~6V?!jU0F>ZmYIdp)1>hkjrd6@`GvthH=Nz8A@UE=ArBnUS|D(R7~O zAxPfy1801aC0QGGK)@Yw;xb(w);>JXDi4e$=jnL*TD*{yudc#uMPHiF%Omzaea#vl zz61YH3uoh1a~j0)Tbz(w6p`)a_JUNXlv+K{=x#44rLrpbCJEJDl}O&Z-fdECs~71- zYGo~1s`Zk4XDX2tLh`aABC!>+dn~Ow`*U{Z514ai&Y5#&zVn%}vGL{QR$Ul7*AI4i zc~R|?WmH(_OtOkw9BQ;cBCyQL-y{?!=vi5>{3y|MI)w_es=I$IPtVq%(>Jt1Ja;mE znr}@N>&#O4%MxhW5SKv-TVASzgCS`N&DyAmqtghYqkP@KRku z|JES%FWClRNfzKB34z)39q93*OT6B@$|ay6j#^h$shZ@C1Yv##-$WJx*pTA#)F%gH(c^9;<;VU>JjVo z!0|v3yh|8CdVeS=(|ahH_Ddl&&I78d14$volj3!p?~tf*W>6!N|u4+`%GL+ zFR{8Lo{Gwvc;2!QO1)Tt6)OhAy}>Qm6gds_Iu+n2-pOaRxxf+OE<3%^2%rl;L;4^0ix@(nW3AKDc>wwzWp$h|6M1biGP28I| z5+?0U=QXD*IExF1!nT#*@nQu}xRRrm22I2vr`>72jSg46NMP376uRoB)AL8~u+TJ} zPtO?O_lrwe+U8*tVK$hWM{9woe;jQc8ARhi6B2dg>I=J^2^-I2{qgFHeFh@x-=4{n z{Ejm1PTCEFNJgh)h z)v9h!Qlo{kj#uOd&}(nuzCpJGI~8N-O-dxB^_=0-h)8fPcZHaOYP@G?4@QTdveN5H z9zLp_y%*}Np)C*&gD`VU2&Osqi|Os^p(M&rQh)!#gBo1TFfo21m3~#s{5D=w9n_D7 zo-H!KE;$?@X-7OcNL6V9WrK%On|KI(*sy`ln2QBIS%>=f#L@oG z`?2-0pl1YksHK<1;B#aYltt?E+xCl~V|Wf~z1#%X8#7#r8z*org*CW0H?w8atgvcz z9)9%hXU=8c!v2f_d>d>5#+qwsYOOted4BT6=8kcK6-pZzwA&qSZBN1{)p{VZUV}y@ z#_akzPtdfAr=y}Ue$-((IWWcYA}Vri^L7KgF%?*+6}WCl~cUL``udSpMf$ zH0XR3_L$vNzjm62XJ*;K)yk9nSk+HBc481*8s5#rN^(`_#*e0uU#wy1quChU7cp!1 z3uE>n#-8?l%Ehy?Zh@yYAy?T}7pECHyhiMe?opwnXMPt;eP=*>)F^uT>;$G}heCPJ zNzQ%xa7H1gt@=|SU0;_!$d!?!gyWl=a_D_1ry_|j#hJ=kw*LZ{6nr25Z-YfE*?Ts{ H)sX%LC(Ty` diff --git a/allennlp/tests/fixtures/basic_classifier/serialization/model.tar.gz b/allennlp/tests/fixtures/basic_classifier/serialization/model.tar.gz index 8cca90a4e8b00834a392b772d0042b2061d1c6e5..dfa653ee82ed9492b316e3485a05f259323d52e5 100644 GIT binary patch literal 19026 zcmV(lK=i*KiwFoA5KmqL|7~w%Wo#~VVR8WMx(PJaUHdO?o~O)|c~%LT@9(u6C?XA_ zq$FdM={A;-l2l4G5^0ubq7mO~r=n;snlu+ek|YgO=dPaj{XOsV_PoFI|F3nz7#kJI_rGeH^OlX3)z42`8=Ie>=9a&{^VVofiAtC>blDzfL&;Lu- zQVDI|(hc&D^N$UV^NR`g4+@Uab(@KQ1gP z((gyy=#Ka=-f;hD=&KtO93KA{+vo;g@y$M@w^fg7V+b_rTJgTVUf}CaX+X$qJ_<0h>`ITe&PO0emT?Pw-)2YgeC8v zA1^MVg8q};!eiqi{wjAsSWs9@aNrN!{KNk}=>9B!EYB;y6Y-b)|Dc-x|5G3#IPkw$ zAi#ftUsQ-+a6~|GP!KOlWB;m+e?oAK|AOG4-$teGKhOQs*uRg!A1TUV%^M*b;Xbr;0=2l+)t#r)OCKf^dU zIw~;qhdW3Ai4YYX7Z&lK>y>|y|G(v3c+AgIVQ&8Wl;D{o>_?sFFSSt}eSUB;s_QH~BYG!Ey?j3@>pDA2EV{bVg zJvt^TE-El8+>h6uVtGlx<>xVYOYWQi3aJZ0s%i)5lRidN5b$*Aa<{yc!fYCs?RZ=)VA4S=n0glEs`W8!GCaCcp!f z`vExi2jGxjfbzcp72>#x9$cmWDZtR5fXcrEv>k0@ZEj^@!&Tt{@(J+(Reu4>t>UV= z$^8IS{{=WSjyueQtKqHpp8x~G{A2&nPxE(hHg?ul*5=kWT&+-1x3N4l?Vo5FTpc(5 z46g1kB)vGUz6aOfe*)>3Vurs}%*xi<#@5=BYxD<_@gGPgKafm+Aq|h?j_}})^w$4@ z^ruPN%rBhRo&96}7!b|O{}FCxe**Z#do**~(U$f$Tw9)Eiaap8pI{kW`yYxq`~u>{aTyOTcn|vl^q0Zp zC&oV%B7XzH`yUTt3=hJX2Ql^sgn*mkD(*O*T0DKm|AKLh<2re8CwQy>EzD0nCjJeG zx%Fr(E3PvSn{MeIPPQ*u6uk6cZ#?84-npT;s2wHbEp2KPc!!Z zQRleRQ@9>Kll&irY=#?utheIdloYOK3fC+2CyVL-r>eu9nZlj*>!93kf|Ki=!kzt# ztok2fm+O#tlf} z2L9qz`ZwM`-jCd%6mIY@9+ki2;oTIH!d>vEn|>D(+|U$m*e{+R?SnT7#>R3Nrf?Vi z+>4J5;57^19}VhvWs(24#YEt@wOjDFbx!EFG5GuN7x``8iT*bC#eQ3S#DjU-{OZLI zN>u4_y{llC6oVG$_f`9x3ZoKN&8SUP8zfCrtlBw3oNl{&1;nTm70k53s`^D#DY+Nc z=vYwSp}M5gz8KDFy#YU)`DB`JH>4W45aEyJq){ybcP@zqv0MeZPjWIn^HqhOi1^N4 zSTm7M8+H;D&g)^RP!1NkEhR^tHOc4IhOpPVlX={=6FAqD=&d<>F@V0req%=*XW>EG zB;&~Gut4(m5Feb7sKJ|~l8IaPNc>=+N%xtB(;71eGS_Yuw)llXM%4lWGBZeLe+&(z z*HGU`hGzIH5?PKOnY?VVeX52Wy;e4v3h#}jA-ylz1p~S8z+x&9>JX<5-zJ0iU>Y=y z^@HAq$&^+nLrm)gT)sGxu3t5cEO%8#*L-=}pP)mHz8lflnogX1ya}?4HOb0eV>)`g zJ&{r{CHksM*rLW_w!>!!-p!mtmMSX}osTwzS#C;_RZQuQ10Ue6LK^JpnM}my#M4Pb zZZYR?DpUK(3UKDw8Jrs?g=40LKz-5-ax2Fgm^cnO6P6F>MJ4gBtsR{k7)Gr34~0%z zTCKfW7%fCf*t(sOq^5%4Zfz?}f8T;NVo%x6E2h#16@3t-@)gIpsgTwW9(4VfXONjc zn;u`FK$gw+CKr@@U_^lr(LVhZk6e6-k7jugxn?^ueK3H$UXukEQ{~A+$B{JjVggiH z`BQo6Fu3WOSbaX4fqhEz@Qdd@wxw7e*U?zo{@Ir_yC{@RQPC!2nrptCSbUGg5Q}3lqG#SK5Z(MTT%+}Zb4}15GxC**P2hUqrp|$5&W611Q(JiKeDO$#)XVc~%xWM=47Y(5)6XFQsPP#8uQkCVrxgHlAcBZ%D5jd!># z9!M|9H^Ti3M%c)VCVRtoFgr#m)31?o)F)S!`j7LY7cM+Nn}B*WJHLcjn(GlsZzH&Kxr>Go_8Z;pHw)=zSl1TD>t3DO`K8GAR7foF=>R@>3WLvXL~Bew^7;mO?HFjzN^ynp%~yfPBVB-;Qoc0LD2 zx`@y-ZYf0AdMQ~LJe%?-7NCd66mot`GfEp2;6fj7683!#-Kd<4L)0`$?cESknm-R3 z<2K^MNEvw0xd1YznLshh;Bo6`;NZQHIl#Su*3om}CI`gG!@6OR6Lx^z9$-p3EUai^#xm0M zVl!SHJA)q2)+cEr2iYalZ^GcHad08wC9bKRN(vS@(^9uU`bC06b(Yf9^e(!d) zQ_`dsl`8aUo(J9OG?Mc9IZ<(mVDMA;iiN#WlykA2$Q>ax%lOoI&@eN@@%mlG0)bZYaA1p zz1MZ`zdU&M(B>5eLuJ6<6Vu4!R z?z(_19GFh6)pnqe>PX@<;|_M{zQz9FMwIY0pjh1jb{3X&f7VKPH!$ch(J+CoK9ff8 zaF&s@`3GRwvZ>UV_|s+`C)&CvgzQniiAI%nB+<12KJJTUVy&`Z``Q+!YMVXLIX{Gq zmS0GAk#20bW`c(=s^RnGEN0V51H5gkPEUkJQ;9ukWSh=ibla{@KiWP+zZ3_mdK&03 zIYUzSAQm~L?Z8-XVhm(;Nqbl&j5*D-WxxrD{Js^XDkWf3ZXbKP$do;u7)H-MwIr*p z7Lm9s`yojJ=!usW?8|YB=-WVfIxu1$6<>A^SKd$~?SZajsm5-2G+&p^8=*w%6=xHH zDj6!cc@Z@R39279maIGINY;<{qUTPng|un=;N4k@i%m{2^0U)uX@xJ5NZp2=>D|b! ziKCSfH{s>hIn=GK3hUj($jy28@s{=|a(Cq-B9@Z^8P$ZCBq&pf)3UhGb{tim`WW4F zW9b&v6e9hA!hyxYw6y&+Ty15+IkFu`-LofEN0RBi4G3RDQrWAQ-(pE{7`Z3;o?Sm| z2-&(}Gj!OT0o(2Xl0GSrWK;!#q~j@e?6qN-eRv!-8zl_8*M&3JxSAyFwK=_c_9M=o zxfxap@{=2uI~Wfm2|DI8KP_nzCHFj(>GS%TROgK#Ng0#O7B07;%_=8wj6iI4?|1>) zC!7yb^)u+R`xbOstSud2zGBD16moxxHhsH1pV3^_58FQ|Q2`f2+LC{T$-e&qJ?h?} zd!j8lEwYF@#a@HpHUsKWn}RAWQp8Nki5Y+5GF*)K0Fsu2c&~LlnW!U3&ui=Qa?6CR zGBRZj&d?`PD^%#6V-HZv)CBY&XW^ut2x_|{#+T@fp*9Xl0yJ`bTq zisK>nz(^ut@PL)_uEh3;^RQrvCw*kr4*O@fW5UHD^hSRuQPQsf8LKcNr!}5fCnNe? zG9#r%t<3p-3D7TapK;y*B(QfDaom*)i)+28+3s1iLs^0*EQn?^oSmp;xG5Gzd(eIF z%9&N|52_C+MUy4_XOX!Bz9dCsG?nR;<{X*&8n!MOfdkWLQDH$1QfVVeHEe`PUsVz< zS}IHH=S$Gi;}LX*+eS1PFs0pM_t|wp(xmgH42jK^ffO>6xTY51e3PX_McssR<+?nm zdYY0}wNB<+wmPm!+JhAqHPC1l>Z+-^KD?>?ZqmM_k{dQrG2iA;Y6FT&r#+J)c2rFS}%&Jv?z4ERXQ(kswcuZUIf0yfUAMtT0B z^w9Ah&f29pP`dpKR9QcSxdG>x*g_TdWx=*;3DFHOyZjL}PaO|3>k@F5lq{LFUk}am zd!hb?7sh<)#dDiX=(2@nctBH*PKpqu&*qn-<@QQWkW?9t{dylCR%nq8*|KElLwT}D z+=;b+eje<zT{(P+C6D zzjTnXdaFR{$#u@6?@vI1Hy@-+45hK87JAf%lFU2Zuxzvl(eh14k85gRV*do{k1s(1 zlchN2%mF-oIveiK*}+QPX#0q31lWV`vMqRU57g2^d(o1F+XW}?hKaT}&huFhI$)#DU^yVIUGA=zG55{z{>92cm@`0Vmrp5!mlsa9J z6c1@`z0AclS&~mSz=$~)Ajazk2Jk6X3&7A3T<$q9?CGdf(3K+}v&W{%kio6d#1Mmmk1PyBhc& zBSeVz9JZxZmX`VkvsWZ@Sm9h9QhNSeb?SCQYQZYe^-Fa~OPwC!%oZR*CqF>*?S0^U zOpa{O4n)PdchF8l2$G^6b7on7fJ_-(qWeaL)^xPt3FB}~^2o%K)0Amd*Kv$k^_ipG zFGTe?TAc0E6hP*MF{>_?4WF(KB}2aMfu6ob_ONvYQ@(FLcpgc{Pa(1dID({ibT;%A zOTv&x+hA?oB77ge8IPVD1x7oS>AB`2G~KWk)cll4*^GT)eO!Vr*1m-glEvw*Fj;bM zw<48(=gJ0d?!pNhzB{OGOJiM|*E5UqKY=@E07H23@noC?>RvBnD|RW6QLpa9H(3!B z|Ey29k5nX`Rz~3B?$Vbc+Y@`hxx9tpYgo+QyKO|J z=U0GK#wG|kAVD>54IvXg1Kt`YOdeh^gs50CnjoZ4BgCeI%4t>7lc(ok`&6Dh3oe2z z=b^B5UpMMH?tlQs2F#n^qHNW6P@cn2JhAE;96YCyiA~e*!5I5* z5WV^yij7Zz0$KnxsTHFfHnAc_4a}-{y~z1^kjW@0hf4={!KEX6k?;B)$e!~M#=X;` z_T9VCvh@+%`1%3AEI)x2y1aThkOli(%^6+YV8-E9H{=`_pu-{uncKO`z$LQ{qAn?6 z>$#a6oeddSC^ra}Up|1DrwE-;Bv02FYSR556zQX~XUu(fmd&X;%dQ+YmT^^a=e#W8 z&35erRjzl1XrjY8JepAq31wGck?L{ibXTQM`)eHf0vf<{MFv(}-3b|9&md*PUTA-$ z&n%JNfId@pqSoCp_(f|fH0;!)5;bqw=wpw;&_#f(jV(qy*-U8Lx)&-I?t;m>TJ-vX zHK?%dI(Es{!ic5quyxM`d}Sp@;l?O9$e+gqPh7>)XQ^yszadVkm7)o)Cgh2e3fW~J z46j19F~Vl12xMp`U^ciXGc<8wKrSD=Ic;S-?0P7RKZQ-;nXUvbqKV>&n2f!VoB zoGeg&4EjT5aM!dLNR=sp*AL`~g!KeWm}$pkD^IOH>=cC)Uai4v*?riwVjqTehcFvQ zoWf%=N0^t7lI{{*673H5{h9peHfHC1n)%~ zk*-WWd~v^m)351@NiVCI@tHBu^YjJOUopS{w}TK{?g~3BW<$8=J?2Hp*Pc*DE-3RE#w9pn@$ zAX0NGN<{B~6=O8WK(q?29V0?g7u|vvhC|4{J_e@U=>sLsMSPaNm9wv3mHJNgVvP^& zfYo0%fO1^}o?e}RH?C}DKaW|DXU7>*Ki_V&E!QPF(K^_buz)R7Q^dH(qaA!~ALG$= zYUIhCw-_e-8fG{3L2>eHY~9UIV>+j!iSj{a#g?nA;Q~b}up*2Tf+OMPntPQpHd^$> zG9@ZK$rPSg9R;n+^RTj8lE(B)P?@4wHkLPgPqj+tWbcl~$>-Hzs`_$v!eAFkUVq00pLB`)!R7GGFtj@C zVGMqqAVq6mN|fEs1bhuRVavY~ki*c&QSkq|o&IO7RN^21mr=`qT$$bg&0 zn}B;L1^u;_K^>K(hL-x^+|z}^arJP|`3|ZCK#6nB6)U2rm_kiN#wHnwXak zA)kb)!8FX$+Ru@D z_B}RfiPE>w5Z_96nZ2gmLMEW}@JX3)4@=n6`1yaPL@gwek z$)M^baWJwU1IuGniErj@TpX9qZ0s|npSEm1bsB|3S%_!GECI5V%(c}bN+l?S~_ngtY1754|b~2zG6)> zdZsPd`OO0Rf<_=!a!k`%JIIpCz{aOCWJ#qmZQ#X#vBoBdvddv6&XTDvdH0ngEWH6z z)kWy$%R0m^I+4lqlqUB~WQfbS>2Tv3#Sw`+U{}~FkWQ;*oO-|Eh^~i_DVNQDQ@@FG z)O%3iv>qwB5{K-FN3dZ|5e8k^$3|6s!d>mPpf#XLw+3E?k54XuPLvAedzOhsIlNx( zdJf~h$dd00weYc18!jKOWe0fg$J%%9n3Ej}23J;Ks&*N2*DKTY%TBUON+`Hbxr;Sb zQnXOA7)zY`*%dXHA?Kkq$>45>tyYTk_y)Y4Z3CB3s^9$i9sJtA}L-EpL@)4chXD{o8E@p%XOeQQ3Q_P zkS1|2&!C6y0Y-hk4}0*P3c0*!1E9uvFux~7)Lw4Gsm)^a+{!)R>Ny0PXPBXJ#Bt0r zIE5=~KSA<`E_>N`Ght7F8Fcx!qK(r^4BMqcmB#DBqDeX=sN@}}v8SM9x&lq-bl{g0 zbttu=ADU|)gGQqx@MT%y9G7kIMMI42TqTHgYj<(HH%&tEc2#;ZSCdXv$*&6jG8fV_ zC5ig@hs?RAFL3zgVF=kO3srkP+3`l=(A!dspX-dMWt##O9se3ftyUv#7P3^yrW^|I zCSb<*4pv|~Fwt99FawjT;My>8nqkt1SAql?y(d#qp&}3-i73;fBh}HmdpF!at4!b7 z>k#_Jkme7LVg>ylvm&m#BzR>BcJ>~E?elhEaSMlO`_K&93r?b#QX5V_E=N1smcW$G zRBT*T0fOc8@RgD#$(!d7i_E1+e8wnrzIcl%#FLmfAVyA#UxB#Sc_^^=5n4D45|OXP ztd1=o&9+s93X6VNyU4rBNYWV<^BvLhX$h>6G9)GW8R*$3PG#5Sq0_4ru(QemWoLCd z>;0$d2{%q-REG{7zU(VzO`QO*j-0J74no%d^GqyVRfFf_)M2ynaan_ykvZAcaw z19FObq_A0$j`Fg^nx0+IQKE%f!y}n8K3_b{38?-)QW=}?E0X}5w{Ul2AB63fXH^?d zqtmi2IN7L|X`OTyb4wSqCy%V*EWLG+QM|Go+Sg2iG7Z8^G^}B~9M$Q5X+7#TJ`d>a zao~604ZHA_Q1y{9LugKuQ1zr^^0c#K4hGljfZ6Fryas|KFy;$9TP#TZXK0ccF%4K- zph4Zdt8h}NIwTf~vtv~Saew6#*#GVwnvOF;sc8bVgDJ+Y2?AvC<|xdPGQt=7p-|K} zfF*gW@cRT4cKD;wxM`#?W4WaWGsa7j^kpA$y;^wdSql?G>fl_XF3lc!1(!P>f~si^ zDDtfl^v2F+UG*z5C4B{JF=ag-?RpAJW1P{^qk;{-(2uI41?h!zVsu@Z1~Ww`8&jWt z#4+KLIIp~w!+YE!HQy}cTw<3WeM1ArUQr_=inn3rsW#9t5vB{r=fKw72N?8Jf{x^>(>~vo5R(3p ziF>dXwUrG>+w}y<$d@N=t-iRfGmDLxqE5y+zs5adnsIeqBl@MPAuN?5^L=LE;mudD zY1jmC{&tO#9dCwd^=+shunNu(IR+Lz#caPqJ@R{|!lk?p@LC;*vmP2yS=kZ@g>J~* zyaJzZRVSh{pRhup6xI&C1xuzhGShN4vjZv9K$95*+A}Y)LYfYE{G zd^>~)85FT$KHWISbv9Tw-hkO#H-e?JA{MP{z+*57jzg0$vO$~})|EoZN(B<_uSnBa zEwblR4leOB2QOz+n&rHUNnO+g)1?mLV{0>f>Tw9Gr0>CH8EGnEAy4;eXcPVYx$Lop znV3l4VD;2?q#`n8ky|7BepV$0_8DNhX?oS0#vmrVkB^i^iI5h%H+Zn;D7rhigZ=tS zOcnLVTY+l0Yf@A7mzR9#vHvN`j*=m{aqn?Q&F%7u?=f)VYEjTxf^OMn31xX2bk3x+ ztmmVL5VuO6tlGd&-nE>D?{RG~?@b73b>@OxtR^LSwU}9}LewT#vRk?aVXG8@5o1(o zquMN3EvbXk?Nq7xs;lU8rJPxyuMg7B+N8mwoqe2r9fn6}vg2D=hk2);!>))Euy}U> zx{eyc-dQ9LCUHjOe9J&J+bKyeo?8#c=P6U`$e|=CTO04Z{tVYkzcLThSF>Z=uCoO~ zJ3ve98@9d^p{l}qL>yCb>9l#!H0e9$>8Mhd$z`yiTad7O!{N~O4OPPaS8({$Le46q z0yMo_fF-Y3IAA15YjqW<-?g6VQ6}1CS3#;RB8R3AtZTC zCc1nqM)t@MYW&E6PT!+UTvuJj&AeJ0z2O@B=)gth&@wyhbv2|TBpw3aYy~>ugapl7 zP{KKb>##{qjT{?!7Bo&}!jS&6%)ptw)v{N4=#b*e(6^?CJzOCMDSTr1(NUOs=VxL2fn9hY z(gC)Hu`rEu7nXP|!>cn*Y1Hxy*z6^UL$ftu=X)+{77T@Hc^SBEix7#?If_R5EwEBb zg(#K{rJF~&p>E?Eh}>|3^$2POnXt3i0d?${OgVC>^(%zwNplV;`9Z*l05-d>hIv^# z1nwG*U=|6eki^Kzki316k=peIYL3}3$7b!Xo@?O-stE(=my(ZJLySq~-5fOe;))+e ztB?`r55Upk{g9j`M_NKUA%56f@IKuJNtQ);`jIeQ9&HG73Ucwa^J5Iz%|~+T>)>$d zM7E)LHeBANM)IGZ1Nkro^r^hT^u5~*d}UqOT=@mH0&5}Q>NglaNtRsSE{h*xs$uyY z39`T7I7`RAfgX8zjB7XmLwf}1?34~LER&|`lS1+Fvun)tZ&4_-O@fZCD#Nu6W*}y% zK;K;-0`DJ`aFoCS&d7ZQ**Xm}rYwh1pA~}L0f9`&+1re;jx6mPxgGjV<8ktVA;jj} zSm;YIB&L_oV8^RG_LDn5)vcQdHaf>J_J|s-jH!gPcFveCa|=AY3xSUgC9=0wiA-NT z2&4=pjy=ZMac?5L9HC8hizVpFu|lxP>jCU*6{>d1If|uDS?o&JR18m$p#GlwFjYpD zh*@r8)@(JV5eW$UR-D5^jt~ia)sI6|Md*S`8QPs$fDRGea92>8GPB!oev24;L_vq% z(U7Dia)RVrZW6qitAnj->3DRmB#|_I%OqJGg)pH`oOEm@xJLfRo`okDiiw*du>8fS1a} z>1Kh=&_i3{v>G3&&t1(Pz54{7C1kh1|>hz>INQ#|DX(vw|DSS(S@#8Kph@@$kKejKe`hQxzq`?boG_U794qr~;1) zL;?vFB|V0cbmiqqcx%~CtkQf8lMbqoFJbzyV(b?D&}4+K>1X&@twMuOjD-`C^WpR7 zbr9A$2UaSLhASmSP`Tv9o z1UU_RVn1@ zkiy3*^pN;z=D6E(a7#!*vjw9tUP6TINtZ&Enh19NOd}#BRt^u-BdlVafUG_4S!hdHxmK`gjlw-L|mWCL1B@+C!9kEKiSZD8Zqx z2H>XIF-VknRk^Sw3OppG=)rmRRmZvS;pj_bI~Tn}h2tt@W~>A`KTr*Uwl~1)$z(j* zDo(?;RWM3kn&jSoDY_v`o+x!|(1x9gG>?g36H=eBd>OiQFF6e<3-yUcSQ%`bo>$eM zm(Pw*Is*qcNz=fD7mV{qc}%L2Bon35aA3r4@Q4tru9$nB?FKR6WMyLkUlTixtgZ@u zI2w8mb8*?&8g#sJ6i1zxAv+wG<29G3__6s4BRx!v^2dvl`K5(mSg;44bsUG#=MUM% zf#R^;Qj@N5UBH>1`5Z8#6FS#FWlgVlz?{=-z$smeJ`lZ*H)ahX>KjkO*1n7AVbleU zQ+e;QH-+i1>Ce|7v``M8h6>Pv zPw$~lS(<)LlqQXDKC|=HRmhkvKCEGeBoSD+n#u9}?vPhC4%h385Qktnnv!}H8d+Y9 z%G}|c=+1?OX9}2x=BF&*yUuEbwWpxqOAox<$xlkuys-6}7M0=EjKVA-2(sOa;{_Dy zEW;8UuTqE7joolzYAmd@9ZEu5zA{YRC!90wKGa?)fY(R*@j+t%&ffhL-=Ca@Tb<uj-9QTi)S}@Oc2uJ z4_qY`s&}#;*Ijvt^RE@c&}+Mpj=Y911Fm9xZ9JY&GNC89haj-_B;&n*7|-X+;6+Lx z?3ug+V!Vc7NXBivU9$;aoXV+Q%=2e~unGIIBNMZHe2}j*9(IRukl$wyYF9tP8ymYZ zMOuK;mCvwlyCMwX^_9$42F{;j-)T@{+?$b7m0+=j}j= zw=vMw^#lfnsMFI+l3_wj1*|-M9qc*}AzXjJqRnAAYSW0F>sO%KBYrHqz7Y;jHKu-_ zF#7zIQTRjFs~S2*lLqm$3vMLJ9dGL`g_>5=Tx<`T@!>a&H!t@ z=Wu_bAnpIo0Z9X_a;+_dw~xfgxc52m!t*t1xuml}X9m#Nc3n@qZY~DVn|+f?#E)b9`7{9vmawWVBBpz+Q%J>2l-E-{N*cf;kB$Heev-s;&WahQ$(oxj%z zCAG(3TZtkaC|eIp=L(WH(I;{KdmY+yZ6zFwc@2l7Wr*r2Pq=I=M(2FL#kL9UWVP&$ z;mMg6)z6$#Q9j3m-M)J#9?j;>I0u?Q}!m>I2W~=)4UB!#> zaLGdUraK>++}45IWl~hG_!?L)YKCTObF>=az>d8&48En8fWhE8aLgWx%3(UB#C;8D z)`@dY^coVU(Uz!Ms!P3=@gtwmufM0*b0!0xytD(Kd_JoD+Lo%x+R^W0UFe5vk#OPT zC%hrIgmi3ufz~UybmOc9D0{gBoD&m>tEv>W`lLjTJg-GfgF;NSP^9C1$#TbcIM+^zk(s7JKk^N56z#9Wm_rVb&a?UL%bH}wmWf1oaW>9epTM?K zF;Z_EO3uZLk*C8Jl0iOIBH*$;d7|k@>mpomFRzakpcWo_G=yf}3#E|@$I|s@ zBq=w@i}1&25Y<#SLLAfR!%?9m)vpd8yWL0mk=DfMd@>x@NTZ5pKf;7=eK`AU6U1|( zX`tU)4B6(xDGm5ky?+N}Tve2gn=UT8plTh95`RMvZ2u|er;Ei@eMv}Uc zK!-9mc;kFpE;yI$cU?+^I8Mxw5u3s0v^XhvBu_dWZD_xXJq;4KWK<^HX1H7qF$f$> z-jCF$cXk;t5~oCHjeRZ%>`SJ)PLJW`@->`ibDknw5Q-Ty#OdLATfx37mUav?C1cEF z={;d%aCmA;*S>V6Q5)Amo~Q_2K@pW`_mIYP0iR9FaZ;-mg zf@l}IVU0^ebw}U?8e|eo^5z;7!*%cR`E4&ExNAA2rcD9E=__GQogq!Xs6_c64C>J>x%f#ub>7Z#ynw=WHL^bk~K*mM4*|uhMbXCuwSYL6bVFjb^xG zbfBnmKIU{RW`=xlAO}L!Ncfkp*k<|)7rv09TK0y-W%Pc=@QgF{ZW~FIrCLB^X%*8y zJBm65e5qa)KOWB4?%-HR45QEXE{A-vsYK&=63sjH3huOQU_x|uvbibQ)yFT$6UR;& zlpCnR-PMJl9$3rF+OCSr$0s6lC7NXIQ6d+!uCnE#?{I1IH@v+H=;e_a5d3%q{Th%D zch?Lh>sEzP)8Litmcg;~(_RC7H`SRA?24sz6A3wG7*E?@7_jGk<7kp&CCj|bzzHJ~ ziQ^q3dK)yUWaT~K9Gbds(cQJf)m!dHmIT){CNLyT@$oegxSpN|r)tc@*Fm}5+ z2~gwZ^pQi*rq_fHMLSV#UL^U19%P+E7?Cdur#mVFV0niRIT-$t>2^wEG9P$TS66)| zd2IqY+@VO0^q$5Gb6ctnK8z&0tR=~@&;9tY&w<1!B#{MUhS1*IPjTwJLumUknSQbQ z04KZJVfgtJvhv#+`^ceT^kDF8(tRSB+UHA<({pr5#Q|6HaaIkSIKPnSmcFkZ;Zg|h zIFc-zdJ+!o9Ym$a&fp1$nEaq^?EaZ^Nv_HZ6n#J4!T(zX#)wR%T@n2pR=)=ltpe!U zrLVA~XbH)g*^h~i##Hp085ut4fd%XfTon*OE{!hYEZLn*n6G=ViYdYe_qW2%+YEW! z?MYT%l%wwbqe$Y)Y<5!QY`SS@Cx|7CrzFglS}b@5yI!f0#MUJwfyB|CbJ4`#-3f&s z24h3`9yqV(MVmznP$oE%&Q$j!%|~XD>*bQ<(7Us^?dTocWW-SK7pEB$!vq*{=OPsA zh|?2!vtWhnE6}V6p<5sLg7x{`%wk<&#|s?5x;^G#GcS^QJ??|6hkU6+izLdo--RhR zO-YcyDis_EC(2V-fN!!2S*<&ZxO{cOc?UE}i^WTb+2%`+F1Nv`#qmTyU@Y72CrBoa zD}XiKXJD{t06RV}pw@OixHcjRnL8Hr#`s|9SlZceilZnCQrdZGUCToSO|ksR_mi>Ve5apvv~jNFub%<@`5?@pae_r2_ddOJ%R zk`qjg^=cs1)QvoEF(8W_AEA(PFFuqupgBsXpmBF33Cu3XM{}mogK~PrT4f9-1uKx^ z1SjSJB$Li{33P&VBV4^RoQAI!pzz2Ty{?rrXI{>xQhViKvD`3Xby1X_8tF^FX3V1D zpT+3r^af^NWC$v#O(82L6yljV#>7am5*N&G$F`^uFfT?Dw~b99kCJ8KsrkM?H)ukf*74@cD#1RNn4R4qI8EQ{Mo1C`wVOqbB6c(;#xO&yKoXUB`%x zji*IB<%!mEU7{ChiRymgH1oa(xsx~%kIf5Z$6Yj}*W*S~ZC{{)TV<(fy%s&PFOo#z zIhZH@1>E>8$%C3*SUcVnm77(_jEA0ZVy+Q+`(zkxdSRVrUOfaCr?;?lYZ)6NI{`Yw zO^DaT(Ktf-6Yk8)hj%hFnf6*i>NJl-M$B`^*UAZWMDb8oNBlW_JbMO1JCPB*5=_g)W)P7Z2XX zQS|i0;f&X?NGh@;9ZXg#vq^LEK`ZPbtgA94VtdLex2*nxPG)xK+tGtIMPcx5>H@NL zQ#DpcNYdhMA91>hFL`sX9*(-Fl4F~ss9ybU)^x#qGQ&ogJ|BM%9qtn{N%JT$Z7X5z zm8(qouxre04+-M*VLa^0egW5WPD8Ih|&HT0%E_ zt>(1Go1*n)ccNg?hGzG7LAu^c_E51n8QMRB7Mihm(o31@rj8PUfMJ6%r2UW;$y_1`=jYEN zUUOgINvBNEJI9;Xk6BRz-g~B@U=zD^(k6V>?nhEDKZlC0VsJe@mcHv9Pj;Schlcma zT;k3m6F0ST<|GDFwKHd6Ln1#doMBB?wkCmSempoXZ3VkYdSuhMZ}5Jc73(S{O*S1S z^zg=Rc$gEAx=wna6v>xnmo*NpcfD2V|D8nxG$Q8?mPK# zX~Ye9lPO8o@;Q*goEGNNsRrQOwMXGkSHR+iGi_R^P8S&T<4x=Bc3p14>DK#V27|bZrd=7Vb=N4@qDfD zafc&$B5q26VhVTv1`S`|cl z)U4^oW>Y%*iUiS-ufybkaqLn1Hh4h1NL1Z1bQW_aDam}~rTt|l+GhrJk1?f5l6K6& z4KfhhYKuW{oap2}7kX=@Ct2Mzi~1YLlCU21H|shq7$xSS=UReFhyDBXZl zjx8jIS49&>Pm~I_IMIW;YM`(}iO!xAMe@yF;UxKe`1Nfc=DjwjvlYZi9bo$3CXtj@Pg*lplEiQaIM-|isFvVtkeRj~Rk}x#+5e-xD~*cc%EEvm z=4pcBhC~Gs7erY^l%?La%~3=U6cjg9sO~Ph3#z-C?gDIK6h#e8jKS!rQBmT)5K&`d zlzuOos4=J!jV5u+!I;Dab=0`UM4eYHGiN4q=FFcN|Je7Oe$?{bUB2($SJglI5>!bX zLnEM{)fWP`v*+RAxmfowht%xMa};%1jCY(kK;~7&z_XRX4(?PYBv*R@K8wKPYBqHg ze}+fwEP~vMDR@_TFK*)9wdCv)E9Bn>^V5?@@=9Ktqrt!?KMg+JNnNq@>6Mbj=PLr>#z{0%n*U*1%M z_f(AsLs0>oDQ`h0x7XtG#COT4Nw31Jse_>MJwLiNDuj-&T!2Tm-h^W{dkD8B0oo1T zN5)R*K|f9I4L+gMVN+f)tZoRSf7a<}*P{c;{F-1o?Td8S9_U3HJ8UCOKQnfFFw7mF z8`+KaX*%CokwqRKCyx7bdXXKm5isgR4c_?aI+Es@4sZVY3(nfGqV~tlK6J{K5NLUR z2s!V)ByM?f9QVL9fIdlFL9Y4V#SL1e3W65P}7Jd84x1>5H z5;oX;Xv3wgWX#7|aP!$ZjFTS|Pj8MEcMOJ#jy`zWxM_4o>@afcs|*+%F@YF+YJkm???7$*C z?%+!DR%LlyYrkR=Rg58g@exPMT~CO_4I*pO+@Tj~2hD~)A-i7h0q=x()A*Dxa7s<2 zV@mx*_&KJWq_vm|>pHZ6q#cL`TzHK*rUcQGF=;S(nTAU`KAgYcDwN)=83L|&PljM{9{w84`uU}e6Ruxd}X733V1!K6yx)Kuj=qBDA z_$%=(o$ZKs3nY7lBs%Z#Sh%A+#@ENE(;nSV;hGf~*6_XP(%nUI-+fjD-Wellx2&mf z_}u`KKQe+QuU$$ORz;Afy`w-od5hx&aFE;oYaFo83|MQWomTXrCokv1Bbx@+*87r* z*u{=|c>%c^*oW@$zQ}njoI-u0FXE?n2g7df1K2laD=7;OCdWp!rD#$wI_BYGM^$Ex z zylBA68pn+lG9(OaPjc>AXi1OdalRi|u+LXP&|r`_<8NtL^OdI~%MnT@OOxmlk370| zaRz8NWYA;&-oPC^h#Pc*!`CMa>USjI`PqRqU{?({`BoM^nfW?Yhb2*WLs#mV*^#aq z5=dsIt|Zx|^N8kj5skj!2DOimaNBh3dx)cmo?I8hQJ-|$y_rN8EvzSX;RP_k+k;Ts zTV(Ip{;>RPE9e=}m;Q2pHy&Qr3%t2`Byo2b*G&2WZ=F38b{$b1wWHcPK0e<8Hk&5k ziJv2qr4M$rsp5&>e)irQQpXL8>k7pakCLm+-J$-l0$KA)$c-~>fAUMDGfvpyNYC+j z|Iqz$4?db2=QXznv}k)1`wab>i@p*>4Z2a}?yluzrZEK0pL{>=f?1)D)6(d1?H#;w zaSIq17EL|h>jPFpAUv=|LOf|C6uzu&S3e2TI!}igi67!e;qTzbr)2Od{g9L@<+#Et z6JCuR2{n;laQ>B^1p@3I;B#sc-0RSQfB)eSZaDfIQCeKV)xoO?l0vAZXA;@*7YBFy zQfv5*Hp9Ng`FMX`3|F!?5BokSBkQkqq9vz(Aq{&%xC`6@A}+W^O3!wtt0EP!Y3|@| zslQ{7Gdc9o+ywaXS}e_vDWX@>0DL><((f*9ARTXH(95@P;Ca!>aN}Hc?caZQxc$OR zXt+0v#x*{4eBFK{UaxuFM_LGW~HJnZtFMn4kUQqSxH*j9ZF7mgZ2@9@*e2Cpu()50WZ9z2>>4|;+V zy7hKUZN}53#8k%1A$_0k`lzqM8(h1< zOI`m>ZBt+P^M62ORKy>j|4{?NUG@JWM59ShNlF<#Aw{D>p@>I{Q9!&|ul}{8en^p# zXk#b5f>UBxa*R)o2c*2kB+R%t>M{o38fo zR`nQUb>y=ZK{YR!P@!NIkgO`^*+I4}>zPLq)1=stUSOB4?1?6uZK76`M8%E**)lm_ zwW$|ug0(=s+Vf^KAms z@yuDZhp=63pNVXuStlTU0h=ICM>at)vAKEqqQVwh1-+i2)$k5Yr~*>zP_sr1`uvhTe=CQBkMM`I4SRXp$9yIc&=}7?|sfJ%yK$ zxwoS7pg5UfXo)6fl*ML8`DRJ5*-*A1NQ_vs&Zy2X8&rQ(dx()|u*!vIlq<57FsPjB zn0>{`WT8<-JXe$?hKyM#WDD3jX4ZLAwkk$s;-|Auu3Tu#71aY4g_%LdFyf683r$7e zrV|*<%nnNsB*`idGK+;NWNLMGNo@vglnarCSM{r;3Tmz~bQu$J%QM4@l_jWzcu7)yuqw=b=l>=^43fy`Tx1bsi=<|cGcgKO zT7>B$!$W0KRtJNiFjuTRGsdz-%Swg$q6j*pS(apj3OxhG!dujYbG9^9wd;5@OJ9~n z#(Wdc;^7S}7=|ayQN678npVMNA;~7;c@;5UWrjB5S-K&U6MV5i)ua{GuvkP4VU=I#)udG8maghWGB7z_wCX8Ut0W%^ndx#k^M~sM|qKIHZR19~i`<#EDecb22?^WG<>(+at z3RbPzJ;vAFqr*2T!V?oBW1?(kr6eW@{I6QJ{590k@#m-W(4jv+ZS8-3=Pw6GduIVV zhoN@Pwhm5q&Q1chb`Fltjsga@0{d}wNF)PK<9t3^isH%{``KR^F3 zSy@sB{ACmoni`tIO$|=whDLCcjob`=J^v|7%}nC*pBSZbGg5=Y<3dwXVj^S0LsMfC z6M}!_jgj?#>Bfc5$>i%shQ_6E|MWOMG$S~L%S+=XgmZ)AxCv3I(R?jOTiZYBq$b94 z6M|zBBDfjcT|isJ^yMEuF7|FSJ{Ha9t(?`H5nk4jBWU*h+#2J%nM{I9cqo%6>TMlq?} zT6ios_0Pc$3r!7=4o=}C^5stda#Va`1UK$)&IwJ43IFFk@{Rq|iIH4xL}X&}?9k+h zfA=9YJT)f$=e7R`ol!D3F6}R2Hi}42OiD~k<%?}??EWH)j){oiUvNZB{EzMSwto@F zBqXJ!{*dy8c0>OnOh}6ljtkBFl^1ru4L(1;?D;>z{1i)!_)k|5my#O)SG&VvB4U!c z;Xix}jr;ff{j>dlg!|u2{AK??q_+R>97yMe|Mw1rg+>J@Mh0`^!?+O<{De>Wt2d$P z{5*={M*KFxjsCgspTYipGXJP{etyJ7{DJgalYZ9hKjeQ`oPS@q|Kis_4f{>;{@!xi zzfnw04vk3&4U6Oc5rKcx%K5u${+A;9c}(z+))<@=`meunksoIyCPeYXy+zW}_?Y2*n7CgJ!!D*>6aVb)KiW`;|!{43C*ZI*C?4@|Z z78d^EKkqp4$Kd3N{MsVr5tkU6I_cL#DW0gmJYUDo(bnE|sFRDci<5(ky(3TTr&Hlc zX*_WsKZ)_FJc)_k0^Ya0r55lcy`{Vb7x1JOM*na?I+Z8m$CLFp{o%!5*osXspRlFH zasRM6I3_wc&Xy+^%{MGsB>G>0UF_{0Tx}hB^3mcxc|v?Zg&%HU(9Xrh)p;mSoewA=!Uxp&1*ouqr|GTm15oQ1pmr)x$B(D$Z~UJC z!(u{H{_s!lw{Xs`Lv4S+=|_uuyYtZuexeof47~*lc}BmGj8l0gemv9v38Y_+nf;w( z_WXAcp7|e07JndF{y-Y^3&|>#XYI!u>~Hb|=}+|7COD2iQbUvfV0AXO|428RzjMmT zf$!81ehl3O`B=6;uvC7;(C$|Z?SJ7pr1Bj7cuxLC|1I7R_jp5p@0f#~y~9u!o-^Mu z6+W2DPq0Fs>kr4=egSb(d5j+q{B?c+{e?~Z#Q29pFhBk9_$`Q?gWXS%F?^68neO=$q>wlEhaX>^@$JFOd_$l|b@c+^0coTDYem{%+AB}91w_u9D%HOmU z-sBwKl<1!-gZ>|N9p2O&-n3sE6@HVPJpUYCz%Q}Jf5=^4U=API4&_Ig*PjQ7yI*BT1psySj_k_pnlhn1%KO2gnnC% z!oO_?BERi^zi+}~zpXp*-!BU` zp&kpZb!pe$bDW0!Qs~a=2iLGWaND$&86;!MR=WwX(mti&zPAmAIoLx?V=gvxq{*b& z+UPyM2i6a8G3Rj)em-ta+k+2d^Eo-1c~^`+$vuX)_s4Rs)@;M=YHe7_(;?zMiX^a3 zo*a~)!+vZy3lYwGu;RlO_UeZa&Xlf3xH`HU#yahThSc2{ohHf#b{dkbMRkmS%3R#b zU5UvpE18ouDrA4%S56K0Aw01O!)Zl&G`#pEC|c-~Ro0y_n~0FWvV}OT#~ell+=E*? z3owC`f+mNmae+b+ghf@ewfDb4TKy;#ZDQEZFcyug#EAC(c{rRM36aazaP*C}8HwO_ zc5#g`opa(2iX74+TXSzPk^?J%7rhUT2oxdbtqCzOn9t~M-H(C3B6O46J&-?D4d&H? zaP6@&O|*Fe(#Z;N{i6@ezleDBvj&-T$Qw+ciizC32jXU|#)1(q*t}9%=IrC8II-#w zPAgEN9wVD@f2|J8Y1bo?3i)vKdI-kLnxUqHCNT&&hNn zJ3AYKUfg0%&6FW&3zxy;r^lf(@dmnmy9G&KPT=Xe%izU%HR`)vhI|ZAphJ6PhS1TwU5N2}jz?5h#yMwzrjo}+F zfc?_)0zy_Q5r?RQ&~UDVO^(ZAQp>-gMVAsCwE8@V++rH^KCMKzl^<}V_bzZeavy5e zoPu}^OC0*&*GD(rViV`GO zW|hEmZDm-UQ3=EEC*vH6RrvhKP*7N)LBqap!s~{MVCxknGTdwv3{I1zF;X{BeYq4p zZYoRG?Np`m2ZGqeO)pR?IFnP{y_DVgIETse`2w4F_oMyv6?iK~9#1H&XD`iFC3_04 zfro(t%JrC2)d*FR{YDR#6;1?YIZ1jvTAALsCP}L-G-=JnMov&(KdN+m!~E+q)T8ev z8o#}Zn|m!`qT?#Y@2V;ZOjjg3y)LnH0_$PH&?MMmc^f7?Scu&pq{zhb9(Y-Ei5a0* z!rrqsp@n|Cp)ztYOgJn@^$w~MuDJ)c1oH3ZjSU3kN>N`~3p#s82v{FbAu`u%>gRUL z5uu3f(4?sh8C`ENK5!G%F13e=o&D%Kb2IGR`5BTHSK`$sS@QkCTby`MfC{-Bz`3P| z*c-uy^x1w@YJ1a&%(r+Cb4<5Fm7pncd3BcUWu(cP{Le5s?KX}IISUg)`a$Z#dE9(3 z3{>{%5($-HT=q$knVrEPTUEk#r$|#dXICheR-n2!`w<5%Lfv&bRBX*ud{{D3xgcFPczx6PNW2y@!U=zM|Z-7HFv#BSCHDSo2O6kNEmAM^>s+vBmG1xI!Vi&}s!n zMU`OAftzss*#MA)yI7?&3qCilg2~UHU_tgamUF9-dAO$^&l*)Rc3t}*Gj%-_6>Y;n zdIe66Yk^iLJ=z()6ZbUVf!9u7@wwjt4B!-D@w*~8tvr!&-xAKWUGIXogN10@?QTY* zFbhc9W4JL~8y#O2aGcePan{Jsus`}W$h%0;$yep*QfnhRMpT`u6`f?h4Xa{h)UUE( zO*%}^_Z^&wz~x}4+}O}2D?%^HpTX;Ax5CbIXCc3!25zUT)3W>~SD%^ZK)2z zLFNev2~Oa22F*01 z`@Ag}&rjl{s<91}B*pRbpjfzmb{p(ktwfHxdSR{C8;&-wyE?vV+X_3TI`*LjP`3jv!zv1W6&p;wXnPdb$2chMJDb!a5lZ&VD?#VYe zOjZMD|r6Y&OkuE){oyq%ksgf?Bo zUC16;v<(a&m4edF%eZZ70^S#{V&9KmfrIWF(d>{9*s{TZOyb$$*CI=Hp05t(s~xWo z(YuS^_G*xV}gWdNE;}D79rq;J)3f8H2Cw?2SMrD#sD$^wjABc_7FU zay6!6%hYM(IxQ&Fs0EH-6q<+0QI#$Udh%c%n~|?gFR%Q{iJp{)19KE%=9G=BW5;{= zC~*tV)-Vn4zp0Qi}M!UJ5Sm9%bw?qzMz#Ap@)Z7$i z=6x>Ic`auTYkh^+aRK-wbRuK-{28RFci{ndQ#kKmkDp8Hptxj5o%NXt*tqFC?yxz{ zOhPqU5;caU+ti?LqXm1~|1;CkSq3eA9HwZ5DD~@5frAHzfz;lMpe-Rt*?v6|&0PrY zBi}cq8fD=f2U&V`n--K!Ita(aL@Bp18cJ=I$i&Anu(3>uZjQ8oc`4(dy0wChJa7vy zM9zb2Ll=X5RvLbslm!`$^3-dN1DJexkM}2>25SBg^OD~|`I;_tzh2I|+Qh*g4>NM* zl@uNLQUV7*Nz&E3ePCrK3pb?u8*V98!jd{OOHh2PPYVPyaeUo$MttFBcq}&`Oe6-_@x7IhvE&Tf+1m|u8&=_{bqC>T zlmroJe~n4c$KlXCL704E2y{j0kQbY8;dr3ED+-(CWGj(Nx!@?lr0;MQs^WMu#w?EHAsY ztk2?Ty(odX8d6mMuqkn0HI)%PC`GJ?Di8yuF%Wd>Eaqx#gFTV^Ao0y%Wi-D6ZO% zcb^*rdNcsA2=}|FlVxvOU~jVpLn@zxqtGU( zS@050Yu&{)ZRatjM}$!f7{MIfvI9>9Xwx?Zt#D!R5oU>t5!olz2uFsxVpEC-m@RKb zf&C}oT;oT^=7Bs}q}71c_C?IRLR~iSjWTiZEQM8W^>F#90y*)m64y?arYF{Ihf-rz zv>Z=x(8#@*XGSq^(|Z_r5!&E#gbPyEX0So*C62Mm!;Tswx_6iohz&L+3e_(F)*ON< zQ`PCbo9#I3>Uk_@-@wv`k71gED{M|5g)3jLhtGNG6>{-|`j;_U!_X@!y%Uar*48*8)&>ZVw)-Go|HPs|dzer?9cs8-4z4gex?R!!F<5oC+eFa8KYBSS)Zo|Uy2hl98 z6Qz!-Q1W&@oS$8UbHn$+xW)wh_}P$zSp-0Gu_V!&Y>xJJjf@|Qm~uykY!*EQi{;m0 z+w>>c$Pp*pm|E6!xe#3)qXVlf-a-1CwGATHE*P_Z66&&B;iaq*>0OzPQ9?2_UU41H zm6;EBla|9rPd!>)u)Lw;%5gj}U`z}53sI;3$uRA(09&ejpPhbgJYG3?8Uxa$K-_aL zf^Cl5e(@=6{=-;s6WIZA+BfiA$TjGnaSeTA?&I=_7h$NLHwrB|hhDo|(LbvK2AvCn zljE*}_?*iS>@o%UIJqk1cEPR`PWwsi+p+PnWTcS3kI!3~gbSr{%$uC9Y5#pUF5{Y-Hw4 z(V?RS&1tLbMzE`Lh3plg=>Fhw?T!v*y2a&WLvNoPUGORv_vC59_G$M~&{&*Iob~|@ zsfg1L!3N~oz(xGH){r{zxx}bADcD#yh)w&XgyXZHL9pxx#3fF+XrurwOy7-nO22^f zmJr+`pJu?23; zZb8ZN%WyBd1*LT4$(I>j$Q-BaM%z4i{rwn8_Q5=@Bl2NT2g72NpXyqL(x<-8-v!iDU${x7G?rr+Q_AS+*WABLZX$STKSmI4^+#KN3o(&XbuJ{R}(!0VDu4EN!2 z{Mw(09UEUk^La61KUR_qnzNWgt_k9U&V1ZjAVf~{IaB8~BOH;k4T5U<+&12n6x+{) zjz|-B{)ImWvb0Hei#$SxjD~PFhngLvd0UEDaZ?SE|b(aNz?yY9UDv z57MT0Z!Q4iSYbfn#kkkSm`oa;3-e~F5Kmn$T3D@OPl)M|?vid)Jl29aGFS0JGJjs~ zmm-q#2~%%K z(4;5wY|OVgP<&RI^gdh6Zh!X%JF+K(q1O$_S+Nv!edMvA<_un$^bt7w@-TLrG-3Vr z!PldTq_S3(e#|l?XRa^9s~4Q$bF2jwy;jb|ZNCbyqqbp!sX5+$ydO8G-Grn`()62> zGHsn^NUBouS?+{VY!QEhU-v!1OE=^R^Yk`0ol_@|w-v*%&5!D3&t)-zkAz72aS>9l z^af+y`5cTp8CIwvs@;pk-UUkNJ?C))XGlMrdgnf#mysbYZhg3Gun3KdmBUD*o6sP9 z1`k?%0k0EN8F`mW5a26Lvx?>6fbT}Q;3G@M_M6cZLc#NBAXJr1#yy2o@Qz9ueDM}z zHC?TtweKvOb9E!yyWRzr&cVe-o&^bAB8791!=&AFW9E6OWt2LqdprNv8Ygj zW<^;+x~V>$yXYXhPw*bRxu#4ka)e3g!6R_&U^~p?C4l0R6`;>CpzUMNp<0e6xwxQ_ z-M#QTtXe)AmQK*3#v3Mp%^^*!idUoGG_K-K*PYCe?mBXtQ`s)^>@4=~yJ zEi-yyD%-m4HXEI_0VG{Nv5KO zJF9zBlYYJ_Pac~V;FonfQFfF9b^UBi-_BPd=R+=|%8Jc+I`=rMlX;A3&m&k~WQjLn(^v5*36Z{Zxb|OnprHWanzyiN)%+cvvTVuL&P*~a(HMX^k0(26+NxYm3K=ZXzFWhRqw|wo#)^?8GzdZ zvgFMg3B3HN4qjiACLTUp*~Iu4&~jK4zZC2NsX>Bt#AVEBsGc4W?%zXq%I;%s9ax0l6IukYBMlv<@G-EeOmhz6T zgDqa!Xt6?pyi)gseFsg6!1SZ|=@k3-B^5>$hLqgWGK@*~L%t z@!k#z>e{{$b;ih&Z|{nkpxb727dF7XW{T!UVx(~9XXF!ps@ow$S1Yf^t-Xkw@is)iylAbDO zjg1an(3!RXQ1@( zNJF8}N!$^+685&<?4oOY*BA3bDz5lH*V}=t}E7KwudrV z<7h%XcNvg^lEe67bP9-1k|e?svJ`m}(erX8rq#5;%2rkK?5aL!;Zp2-KM0%0eSz2a zRB6m)7f@FXfY@D2;DvlR1a@0N&YMcO#karT^f3Eyni268HY0;0u3HIKEa;g9~3I=;+EzaQa0QislRg9CQGWxs;<;i~)@=l+BV1M43%kyyH|*Xt8#>kGC@;;t!K|zwGILI_(YM~?+FlLfdQpPJ zJg$XYxh5E`Fd9F-k){x}g^36=AWn5Mbma{NvVNQn9px7HgqKh9{^$G*J*z7Q@ z$TuPb^Y_CcgR1)FV|TDZmm1)WojkoEeVs8DF~IKavLxB008K;Jfv@VOV=oWiVAnVb z!qEk#xaxTudwS}ahHYcbAx~fyF7`i-Nox+{sw)aaW7s@oYTB`*=MKX=FG6fD!qg~ca|UWYqQYEo69&Gn5Npka3BIBVHRG(tzhUOX~ zMA1u$TC6z?lce>@^2zHtbm$8B^kxy`HRd&Y3!j;QjtaqEN1nR0s4g-khjI3U2$U@uyUi>QR6@0=?Z-q5w;YYj#$Al zszARbwm^<$FPppkD*H;N4%ggWju$k)Lv`5!rdxIw%3NHBy3HqWsG<;Tar?|JeYBT3 z86=E)NfCId+8jEAHnX<``_Ulr7^A!FI}TL8#o(ASoU0Rz9m2hMT$+oIO{B>xo$GjE zK`n^AHKh|;HR#tTg}6lBkmP=8t5aDj%ZW2lL)ZNx)Wx|6mn@UQ0aFcny*CcWR)j!U zrxsB@CrRIBX%gGvE8(4yCN-|Sf=fTP;Nrup;r70*I8*67n$U|ldQUdeC+1YjY!~=P zY-1vRXcJr3@9fTxw-qFI6fz1#Qw3nz>F$D$i|*p5 ztQ@qIk;03Diy>=-8J*9c>xzCy;mq(`Ec@D+?iiy0QwC*&Qs-uzV5LWl480i5x~(8} z;2k{N-qTP3Eg+?x59MMHVe&dLx?|f&2&)fmFnzij&UZ=id+`$Jy3>PeBNN!q5$%|r zUWsv*T`=gfIemYy0C0mT*>?1+CX0|H$@4AQDfm&ovUn@@C zGYO?U&Nbv_M}h8WAsV>U1G{|mNcr-kV7AOf>cB26*>oD8?wrGFw133w zW=3@PO%Lo;ug95>PlH0+qxxpK24>?vKMAvbEr2~|Zi5ISJbdZ^7Tb=4xBhvy{9PU#5BP+4)_=yqH`S1B# z2=E{y+b=S6cPwEJ$<89n+n#}PMjY8DHI=q}@gqsT&(R=5fUNaXBuY|dloK_c2KH!? zGq)JBXcw0%x@A%qK|ivU8$x<6NRc5=rAZ0<%>#ANM?*OA=QOOwBh4esyte>9efNQYYu??5iWHX{1u0mGItuYu>ij<7ACeIs!%O=29+~I&f-iRs$M#X z_9flGWm%47YnM2kHAjrf^{of5w>GrF+k?2f6~VIY^I-geRXBU80w+*E9~7x!ERztALXbKF->_Fj+tq{F$5`7gW zN!t&Ef|kHzFbFw~zIDxvpQk##+F^l9`hrnItq1~x0_e9hc4TAZWW34Dhs{f4>Gu8z zl0E(&xPW zUAv;G-@Ophf2)CMEHI*;B5|Z_MAFsxsfYH_lg}!*Q^Sd&X`9=`mpmi`JPMjK4?q|E#CK69a6`b`xiRKxv=4ezW zfzo9Y(iRoRL=X6Z`(;r!gtfuIxByxq#G{*+2a&LCLG+@&JYi0shPH4ntD!W6Ts`0k z3nGoE!J++3GiL@}C8tkjb&RLU(uOpnUWpbSw8d}y*}X(P2b0&#r59&>hS9Oln7#u6 zbU1e|o%l6{F+Ar_U)Lv6A<0X4_3doJ=u2WwXE`2H9zu-eq)6u^3%XqP7#Qv`A+u|X zF@BXdGw$6}NNCI^7SH_IPw9hMHz^geGkzNQpOV6m2uoIBV<>7h@`$fhGc$^#OS1hh z;Byf{a`kK~2`S9O?pt4u3zt~0R9gr>ap z!P-r$@WIkSWNL9PhuM}&pX&S3YUQ`Ed!iUQ@ZFhadsvX|rwz%g04E~-rW4Q02(zDi zrja+_GD+dA{V-T^ISgOPBQ~kED7?QAo^^!~TP<&LNAMUkM(qpxcBc>-veZcQfOd+ClJ%GGdZG$iE zXQ1uE0H%s{!d}buC>3W#lT5wwzzUXm{aA+Fp~9dMy#N|{u1wx$9%y_h#;q^ZY2NjB z*j8)BJYdQ&_G%|a<*3miH~4Q zZWtX*U8~-~vb$r+?lf8UAM>DYNfycYmP0GV#PFEkPM8%WO{5~6Nb-;n$~s4p_Cyi- zTu~MC?joja4?}nqphtnGd zewgx6pZ3i4ArY0{G_0%-EIkF-(xhN&aZ;9EII2Y#miv$c@>}3)Ll3^Voj)qg*~lEaiTEg9MPl6X@?lurMDT4aZ^dfZB61` zJA;1xG7>L)8*r7U&n`Y++6 zV^k_Ndpe4KX_F_RHIvC3jdaS4IS1RfM?tel0^L7i19G~pN!t`DVsLag@i5XSkFPZ2 zJ9h(m<>>>=7#L1HhcCl~Oa&XlAPlBIxYSV4}pGO}uO7F&ld; zA^)p1xn!nF#0y<0Z!?GHf0@eIYaC~EtAI@2KY~1!Goz=j3owTf}9i`#BSpW>q45^aIw( zwqZU`fu5+hBo{9hF|lo4)N8#RS1VYQkb=?{*l4&IwspJ1R0Ptk3 z5ht!4CS=9OaU`a%9uW>KCi8Z_Ui&Pay~#dY++h*HnL3i45Hcn z2={%dt8055g|hCxH1OPJj?nA3uvjyc`tf7zla)d8-@U`&1WS6icL+Ih#RtvzKSQtR zSR%RYA;)-iF5yb-z%v7Dkb7epOgqQtR07ioy(~wwzYQic{nxWPantDVEze+kwmWSI zaiWp>_u%MxH9pVG;d5FZ?K>P#9%VS=vXW%{l(!Cao=l{tZdc&pEFN9@Hi$T$3?wg? zOOTaE>af;<{~oU8PBVj7GwcSB**J`x zrbCKubi%0#L3G67Vc2waHd!AO!PXBEB*)$^2iGkJA$h@jES(-fcP@;^_e(=@Rkjm- z>pu-fpM3+R>~M0mb3FS?{Q~8MWQm-?wNVQhQBMyz8ugy_x$i-%BnDI6O;gCRuePM(+Z=3KIh&k% z*UM&D=%L$aUAps0Bwast0p_1B$1nNM&|*pl+zUAY>usixzPV$lZE^Ro3>Hk8`poqBm9*CWkz+?Y$AYtNz2O74N)bW5rqE=FHC zIiuOK%ghRwsZ<7pfL*UoG7_b!!`l#A%)is}lQLBLYA2&@G7aMbyhzub3anaULQHv$ zc)YC*2NiRmYq=J3(=&-wnI6pFejYr1`{Ce@DMV>?0NHU|iDV^p;ybUkOrG9eoV(wg zoN6zG@rTMVh0cQ@FHs6t?D5dH6EJ=KK8PANohk_qg*R{XsfEE9Vpeet>zu0a!!|x& zjI%^T>yKckBSkIuS`m@1FcRJEMh_KNF;6^G==5p@Vx3|_7Wleh-oymjJ$(u>oXOy; ziN)-rahA0E`5;;= zY%&R|zJ%$nwq(!sM-b)z65I_N*xEPaU~;56u@AJu;&X4%DR%`(_6=puToBnboEQmdXfpv8tBH}RB=@l%&9f6g z$#)UlZhXnSo3sGEt%7OmSrO{h#UuO7-@u;7;ovdn5-g8Pp(o|V$;FZs8mnzeY~w`e zMs;_3WY|b*SAQ1sH^!5O#xOeAY9x*S9!$JS9y1&oNYW!+Xs3xOJ=D09?Xmm_$=Ct&*GIfkFptYd0b% zn1Y6vZ{MPd%^0k=d4c|;GeA8qoD6Y0g=%ZXY3Ykzyz3c2ZYNxYfz&xKDdUB z`ZR;&HVM&|l z=bysHS|zIZ)QTJ{uV&p&tz?Fs)}t40&!UH)jHS{AL*UWKMbM=0PI^7X$Pmsc3<))c zQ7;mSfsj76eLabkm^{Gsbx+(fN&q*n&84b)S2G@&$5`tp^Kf$CHRyIc1s5}0V4@x0 z?qv(1J>Hjee4j|BWlV=zTXX5RDO)k-RS0Py>E!F_GA6)DpYYmJsp;vnc(1I1@jInU z>vsC%Vf#d~4;*6b|9t`*aX zir0ORm{ScUilb=e6b}+q(gV-WY+}T%CKFLVb7sIejIP%?1m9vi5 zS@~l)I>MA#s0osi!;{$epXA8Fo%|Y(=>n7WC5$g;8hcGHhaA{KXvL)m%)?NB`nYNs z@i_b$ombyPdy#A`)G1^RdmqG_&`g*B@@U*SnwF-Fdpz~;;gFk!wd5$WfUdBHcCwO=oRPy28jKIj~zvZHBw ziU!@J@)@7<-&wcvIblLm5ojItqw1J~QwwkVtke3PT=s5#yLZX;}exRWXA znhMl@0QPM5#!AQA9Kl0jbg%tOc$V)@M7zxBe*IDOhJ6CYONqnUuUT~FifFRgES>lV z-^HHnhftRxMP+oCu`X|ui1+^SG`npber$b!`OYqk-hx;%tW}O026#ZVz61z7)+Eim zb?97eN(P_Y4WX}`h|6>nQYg)%LaYKV`6|bMhXUg9=mLz<6``}L4xsUSE*;cAm0l5@ zO9c<~;=`2LWC0E(=^22DHqo?ZwF^DTm{Xx7Il|7oh|g|jvC_$}VEB%SBr#U zqkKikB7DvqwV6is+-zt}U@Bu2{GBnFYk{&p!|AT;<7iNgFIgutnYP{%!j`?FAaYcf zO1`uw%}t5S-VT3eqw^8=;(`ifJ)_C1_sPU+k0kvVJA%GFW(X6lG-%@cI3gO-j)o2! z@X3`=cz3Q1wcQ~`%Cbjc#l4}-tF6j3M7k7L9ORSD)NGO+Kb79d;P(t8J!U{bm^OX% zgX_K}IA-z?Qf;*n$KJ3fW|HcpBR`v*eLn{RvNQ3^IA?bB$hAQGn$XVx$Usp9dv5J? zVj3zIJjt!XXWy>^YZknDIcrhcL5NS0c!apj+16b&EMLm-splrDMi8(Mzg! z(e#!jY&{`Dp{+ZM@+P>skc=WZGAHm)n1v>3BrD4__tm*Eq@E< z3H|yzz?jK8G=IevuyRVpBPw?rMCGID>2-FrV_X+-jTSf5i-=R+qAS2{8bKc{CRl1c zi{^g)2qxQYsN_3Sa^}J=81v{Y(A6`EXXSg)it{4go(^P_Xe?G=nFTv8snERw<7uCv z7VVT0L#Mux)O+9{R8*^?!^#TWA)85xZt0WtH!R@1xd5amY=M1cN>pmj4m6$IiJy)z z*jD04Bwq7~T~a97I_v`z`Q{|jMRUl(p{6+QmOeF7PM}|-8PbwF59bsGgF!(w*&8s0 zT+mCRt*$rmxqu>F^iF}izp76L7b|ekT8?&H7*ER}g}Pb2XO~v4hL}-4Wh^VlW54-@g!YJ2bH#|&^&iz8h&~Zot@BuCq=G;%Q8a} zebkJ8%XA`b66U1r!f-bB;$n8}x2@>dHH|E_S0dSyT*x9xYjQo$isldb1UiMuuqSC8 z@zk45uL?b8!ev!y)*fjvtgS(NcMWtI6#`QwCXpU*8<5`TN)CEefMw=a76)9YQ$j51 z?J{()muefn1;JbaI{J({=^U6$ zo5P+$@#Y{ZG)I-xcm$B8{@qY}U*?YWWE z$>%$|q7(Hvu18m7`Ov4gOlb9tT(W&o42n(L32Qg!aVph>$*^ioaw5Zk%1Dg>?PWrk zE@KYSv%llrUM{?nSD+)UBdNiD6>{5p8&0UwVk%intgurf6ILz(A^*w19ce27MjJPoOdyFnxhMlhbbF}`gqNw1Bi8rOZ+sZN5a;th+Pl)ID6TAwXpFuPTu>8? zBH#{)fS@S#t`L_*Q520pz^%Hwy1S&htL?4^+66U>F~)_spm9Tu8YddpNur_ObxRD2 zON=pU)Zl^|<8Bg19d+ha%b7EiIdkUEjDOPioPJ$h_1;~+@7`C{r_Z^~(#~4oLRL7t zusj&L4>LmGulAHFS4_A%@dvv0wK!I9VLVK}`-!luc2jKFuK`Xu5sLd7izvjNWZO?w zvJJz>LuH!?sBYB}^B1SHjR$+O;w#Be*gYRNRCww5TBjN7-jA*qDBfM!91<7&=pFHn z22Gnjyf?N7%w!^5zuty4(c zzKzCy^|EmDmo8MeH4Z0z)LXc>p%FGQc7)W0S@gw5W1&im#4m^Cg5t2@pp(1k@qou{ zt@)!b8b5o3hFA4rQ)Ui>ibG@IoO?lvrP)-vvdZwV_B z2H=sFe`TY*d)b(gO>WMGWmd$OndUiWn@j(N2yr7zP%y?7y zaOYn3j4=;_Ha^2%xn#gv!*2wrE|XD?QMFa6YR!#yi~s!rj^WNMIl3Q!ZU5*@~KPSw(E}2n3xQ@ zCF&}(zd9J+?B5^v{3v4C-ek<(`VEC$bs@a|Emr*Ja2&lP6zBgm7$-(_#*zHHYVS-t zEh(*q8xIl)eziT`9}tK2b90${elUdAr8r8-f{%{1#~G6i*dpOJyY76M_T93VHXN9T zY5O@&=|MP0utNXx-7NETHp^T+2}VzA1Bv0oq33`^JTq)Jtw`UIGGwU(Pv`F_zwk@G z5Ep-jb(>Zj*mOI)Iw^?X+a3arXdA3sc$;Nsv|~#ihOidbN`#`~ zFxJ0sC`QLQailyEPE0<@rc7^v>y-0s*R^EK94p}54ecR0Q)XkwU8Q8;R3WQb8`!hL zOVhRmW2?nm=np#ZbTs(I`|JRj^s}UxZ$f>99Lv1zmJC+55Z}0!eeS;g*>yODu$3mSJ%48?;AN3M}8(3`;6ovWoqHtm-67c*B8*7L14M9d^+NNr5>3vX{1> z`6Zou_98o%EI?ytF?N7G^l{xe-mt{0Y>3GW5nTe=)Wr>;-P97gYvWcbI1Er$B|)ER z6Lo*J(3_UsmOZ<%Hx|SM(ei4y5KSZC>fX+H;nXcw7EwYw&wWgfuUs$88do39k#+EB zqq-3A=mnh6ARAwRK$sYm=iT;ZGP26gS;g9&Upxp6z(g46+fVry_?{@;0`!<;9;R_)qSRY_cIgbC&7$(8BZ4Oqe}xiV28>A zHvH{i@RW`ej^=$u!;amiTW18|VI>W6<7?s9G#v5{XL{%CGzvqHMnK(GP0-c<7(4fF zINjFqeU`F)49<)HnBCi60C9tIz&q;*Te2dtJnC2o2qhN0AJmV2+$Rbem!I~YpXOz4 ziucm3ONQW;^hUzG*RfPd#`o`bI>ypA{3dKj=|n)jXRKDa+Ml1+3C#q}?(6b2XVD__ve4%f%$ z0=t?AYa7I)(70dsVIwZnGv;tMQO?Ek?X6&QoqAXm{4CrpEW}N%gK_=0LHOux2fp|4 zJ$Cv`48DI8Zq^Wo2b^T33#S-barLVr&%K zp)O|k(jN)Qn_q+X=00XObJ{>ypc6aoNx&vGzl*)vq#KxuW1#k1z2Te5)7XzYLm(h} zENi(sOgQjaUkHqH;i`bqbXB4emw0+3ow1Y}-b#mnU7H1R=Mx(BeGF6#?@I$7on(Q* z7U)o;xAx~&c0KZt_e!@|wy8%ve7$`gU9;mu*0gOl?0)VV9ld%{`DZUS!r_04hI*%Z zGUb7}$6M85!d+VezI%TW`!x3&y>+A&rshsa>DFZ--4&e9dRs?gNbB#|`iw4c-r5k) zp8J^1EFB43uCJqX-~-mBjN*i59igOIV|rpyE+&OjcH>YU+&-CyE5b9-5s@d9W)6aO zImcPeV{m{Z+z~paw!nZtp!izixfRjr>HL4~|JE%j=|A=V^|ed;xBkCL0pUsh{%=o{ ze?>G%iYrzt(w=Vr=eW4|-}9f8*tMI#|KHOPyJR5_)kOmQ@`sdvs(-s`ALS5DCdoAF z9}TeoC)>e)um7(9TK@@gNeTY`pHD@ip6vQ0N)s*qUEmL0|21t>pZfhjfmi+S@Bc}O z3I6(j8j_kiEF(Q*Pf zq?0~PcJh%=QtcA4iUkt!sBS)mpR-#?q3Gf-59vwtp4swVD{!Zm3wVv_i2C%>bqc9(3B6o!f2OL7)SCSu^B>+LG}VLXW- zRvwW{<0{(opnNX-tn;fj4=I$*5^>1JeBU~w=yplOC~{}@9>N~IeI{|qcB4d01$=^9 zL|l?&<8#NlWsNU%N+#K;=`HHHG8YfR!$WuSa0?{6tmvw0;xx;OjRPRL2-l{#OHv~sac6lEaI>jyjvJBN&8Bw7_7nvTB9C~=s%9iAXWQFR_v9t&5<)fzpD z-VEBR77~Z3>(@z@^jzcUaxC=b)7%=*6n@1dyF`aWeKJiFbqC^;;_``yAG?S_l#388;Mb92zVif4KNJTQoLuXRe2aBX}SDYd@#tOPb zRGajqia4(-N1KQ|-H6QxzFeSdGRS&ZJR**;PPt9wC>X7(%w02!Hkp%ea*8%l_ffL* z)R&4Jif&BLUr$XQdSNRPdLtu$oHy3J$#!e+kEEivQ6YqG)hgL o>h5w+{kM9*_{A@N@rz&l;upX8#V>yG%O6<&4j=nMr2u#U0C*4e_y7O^ diff --git a/allennlp/tests/fixtures/decomposable_attention/experiment.json b/allennlp/tests/fixtures/decomposable_attention/experiment.json index a7b5df9f38e..4c1acfbf3a4 100644 --- a/allennlp/tests/fixtures/decomposable_attention/experiment.json +++ b/allennlp/tests/fixtures/decomposable_attention/experiment.json @@ -43,10 +43,12 @@ "activations": "linear" } }, - "iterator": { - "type": "bucket", - "padding_noise": 0.0, - "batch_size": 40 + "data_loader": { + "batch_sampler": { + "type": "bucket", + "padding_noise": 0.0, + "batch_size": 40 + } }, "trainer": { "num_epochs": 1, diff --git a/allennlp/tests/fixtures/decomposable_attention/serialization/best.th b/allennlp/tests/fixtures/decomposable_attention/serialization/best.th index 48ae5bd7a13e8c1240637f1051b46742094feeda..457f751e41666ee685fea6324e750239fae9ff69 100644 GIT binary patch delta 1380 zcmZ9IeN59w7{~i7t+dJ%880G@6-E^)0&Rc&weY(uD5ya3WynT-0m*#Bs-w+tQ)6kN z=#WHZ9xh{pNtD5&MjLU%ws)1yMIj10MGZQ%j3vYACOFt+jAbpLQSZO!bI*OA?>+Y_ zxc4f!Hk$HN(rTrQrd15BkfUVkV=thY1f?p0@qn9@X&k8JG$WVOXf5~s=N$~AVASX~ z*DwL%m9$)~Mp-_EW5DB7Gqj9B?LNjAK&ezPD1*0U{E14g@Mvjtfwy(Sp;XCKOnei= zxAV;2AYO_0jgit#<8U}K+B0OFdeuhm%aobW8GT-(W66!YS7^d`S|k5esvP^2agv9c zUySo^!yC)!`&#qZj*Nnr=!;bvg&SkJp2Xf0qwjScPfyVZ#!F7TpqE%~H-iAhEe+@I z%V5>0-8B@@=3H+&4<%6#U4CPIh(AR>`?+F6+HO!A)eoN6eNX&dzWi8%&P<+R_L=am z6TGW2z)nr&=}j)(M5lXO0>r!W;DVLDoeZz7vX>TL$ZHp%M%vSKNsSo_}Ko7NX8rW_C@JiA< zQ2ewFtmw>vk|*8lwIwIFBkwWrx;5fE*>wyhbnR2Kc^w>0a-ojiv zTZH-K?UX(&Vs<;Ol|+!%@H1?-t{eMoj{d+bpFV$j=f+{f43}al(-b9&XkyBH<|CA{8btaM@3}-XjuJWl5r*piYZ| zB1ll8qC_aUB#tUThLR-8j%+33>3+J0U`Ln}KWp-(hLnr}Kmx@F8 zGeMwbW-j3cMMJA(G5qe?G>|sbgFBF+G)&oY@N1meEKWR7KoRcnX55xh038nmd^zR^Egj!0&B({n;wk29b%119cE(I>bw~BsZ@84LB zx21ODt(`F>vZ30UdDyzd6DG|n=JxxcE!J6~{yo8Dd;1PjZ|`RF(nCm0|7rGIK`@b` zI;&K8_vl?v(`p44)M;H#?Lv`d>*k;8+6CTRwgNW&UX2&VM`0(Fsxj delta 1308 zcmZA0eN59w7zgnF+VWm#c@u$3fs_q_*!EZWwa`1DA_J)+GC{%siBm*yqYa=7iBmL2 zBu*g@638GkQzwaJGI0v@4wz;eL=Y54hK#q6MVLgv=>k*d3J&7DKQ6gXp4|7IC%KVw z{zy5$DU=a$DwR^hv8-0BRYEj#as@OhjthkW=J0=@W>sNo4l?+kT0j}D(Xh%erG|we zzHzyNQ?hDS8_q$ppkNtPTqwtes#)k2m{x%cAfpgoA~eQ()}jEJGvZV627ZTNhYheb)^TBwMF%-aVz67*}PqB z(d8MLZ2zxs24%9_y8)sc)b)6TcFv9v<+rrIxTj){ba_FB2% zjq!HbrbZg^U}VL&Ije=!PbBR&RvT#ZfLoan@*RAwj}02k6Td>GgBMc@jSk+-CFpX{ zIiAI7*c6|DrGHnTA&`hX^uLeiVS^$P_ZkuIk4nL^r`;%Na3_B6$*8U`?h3ibYVhSL z%lXhz^!a=kvR^z%HrMwd`U@fF{tUwxAy*X4%t4i9mqDk!AG(=KdR#r6Og=JxLbs}Y5j~wlAI;dI zVwi9Y_B-T$jrbq^2l40XQ00pP9Uf>!lT%-#_7lgDQ>G)1gb*inc-?$YL_wApJ&<2% zHX2gVeq{r>p)0^yh1E#nX@m05GdP?7kW_8Hi;8buCx+w~=tiw4l0%J?jERF5rx^EH zMHYT!YlHJTdl1)wiG0mu#CDK~HGYZdDKt@xaY~;6OK!Oklc=2duCHwAs~I-SzSmL_ ztg-$)MM{Q^QsrAceWTWjPR9d*3)*M_zo5OiD>7W7^-98sYg2@8EKQiwFoA5KmqL|7~w%Wo#~VVR8WMxo1=q-L^JLk|YMAA}C1|1j(S#wPpzd zCO|dqyI)retKawH zVQp<;BVlQ6XJu_?DZaCmu&}hYv9XoVwUGE<7)gnupaflAiI|{-sDHHuVzIEW{{p0V z{KxhGCu4!4u6XE&1c`zYLq&lJp+O;`3HlDYzn}k_6U~bc6+h8WjE#=aH~pLEaZ#bM zff2DGp~;~MiGQc_*ZKea{O{L{h@BlB8W<7sHw*uVP;}hf(1c*IUxELzlqex7^zQ^y z6#xEP%I|gbW8y+WqyHR7NN8|eOnh8o&@8d#Adx6ERumBz`?tt~X3tItogE|!4Gaqn z4GD`&m>ZN3@}B_)1&bo)1pQS0>6(6YL~LkK!ao4f503~55pPpSM9fbi>wlPyh>cGY z{hSof+5E#?Y*I{MbkMxtdu#buoPX=+PwzwiEiV&7qm%x%k5>P}N2`C|$Df-Wlo0yA z=;Z%kBNHQHBBFy5B1H27!;)fye+9&U4(PuGP@E_*J|Qk7DOjZcr>luVlSP4H5uwo` zfuS+8LPP#BrT!*;{xbj1^5mcA(BHZAdl_*u{Yf7E68sN=KlaGl!s4Gu#wUb|5`x6B zERL&)=wGV`MMsCm#zx1Ri$W7ciRNJu$)co$&_wh9STxamc68jFP%}FxGx5S9W{E*D z@zJ4XqGXZT?D_it)KFYx=&!B*^Wgd`$e-@~%vP~XSWt9g=zkJR`ES*PU+V<^EOmkL zL89<~h3u!0`Okv)t20Lb0YQIGP;^8{&~KOjA1w2GfB&J(1}B9C1%`yqi3t9QbGYT- z#M93J4vmis4*wOve=`vmFA@jt{67t^KO!S2L|oCMMM3|VPSFYfG#SLGB0^(-xncWv z8~6)zbliV#5wn6s!Qp|w-}=AsWmZyfRH*15J^4df4C80+{zF)bo%tQMv2hWJKLsrQ z@#QL}Nc`XNKOS>KBW8z-63s;6693=uKT8WMTWjloqhRJq-nN9%KfRd_%lT)VdU3WJTETNqPXC==)gJR9wn}OLP@cJUoY`Ybc9GGCQxGj z5fKzCl*;r|6hHi#>AzArQ7D}vlri$_@pJ3K;}S%|9uuVeq{Vj;u_B@D1Q|b>pKHX$ z&KAl|kV#4s+YA<)iT>RX5)~&Vh>J{Uh;u}+NKq(Z>?M>m^0O3c1pY~HHT(Nk`UhJy z`yJXssbEoDLU6cQV3H^zI#E$fac3n(h_%JcDKRb~aE_ItP}<1IPwv+{j!Oud9V(Rh zt)l2Eu63fxzaJ_Jd-(Mc%UIf2*xHP=wY0GrVP|7)C6xUIDmXq#DCe%L?I9A%drpv; z(BIuuGDD~!{+uAbDrSZMgwRtYRPqw`@+1Eqj=;Y*%D-Uh9WHh`TqgW)U9z#Ww;f?` zCw8e%xSYG6l-QNNKV6af<%;qjuBiNWMO7qJ^Af823I6A<%!&v~6l(mbBO|PA?5wP8 zg#E(hCj9iH|1Uq%g_;v2(}e?md!Qu}YI_L>`dR<<;6L+Nvp_L}PZD#8z~8j`4+3Xq z@s}+a^iQ_H&d%Cyq)zs*~Vgf?D6TR*FR+x#!wcK^({t*y14aKs$<^erCV+`_GP7{}eI&lMu5O2bqnK6&qC&8y)r2sPs?7j=vE*{Wd&WBy{!? zy7-y?8-{;jb^T{4W;w#nLO4cjR$6R!>@TzF!f`*bj{j|Pf=KA*C3H_p5qkJ3{4^UA zDhm2ZorDvA%}+A&`&oH~o+(1FUuEYnrFHTI$wWW3f0B_x?-Zd=_%D%R|5nlpr=$pd ze=FMkkF#Cqmm>84y@=|cr;KoFig4O*39WyfRKn>g!Wn;qKhB#4{>e#H7@i`G2>)wz{7K>pBU6M?zeTkFi`*7QrwC(yi<a3 zIsD%(Aj0?*q42k^;eV<7mp71*B24_PGxWc!Beo<;5hnfC()g!IFPxJioclNK{C6sY zFgZmy?{D1s?^P4w{1oAW-VLhCP(7p4ePeoO29*V5wE7o`Xn|CayBw8V$A#6;ne z6yefe&yo^niFwtGpIq+G3by2*_7JH*t+Ud9+TdmWv;qEkOP2l9LMr#C3`YJ>sg^>h znbaS&)im+}ZW}im5_^;MAgc#`q|R)_CZPB#Rn)DlCZr>OLYoRefZ z_AtB)wY&4MO0f~DH0)5&KNgnkWAL)i1GE@86z7N3z^Bt^$P9xH@)vs{W9Ku)Mz5K_ z+&_8M7v*}GQJFvPzc`M7&^hnn+NuEDnKTNe|9BoKKLz5+70BNghJJr2-;1^<+`Itd zI9ZOadSXF}6?#y!cc-|eS0*$_wB4@t*7jl=CaBPI`3dw|a4UCgu^)|eSD{M=#>Ttz1fQ95Iyi6HUpIkO|y@;tlm`yEoVS^)sY4 zdDe7%e>KK6zBie%%bzkUb%=zm7x$qV$osEx^i9_`?!fIx=0MRM!HK!S#B(C$rd0-z ziQWcOjlSlxe7%{@Np)Pow-9D(sVDjF8%}%Q^QSJWCQ-Y>TJG^~4ce}0MNT(QB-K}p zsOZ|G`qaGxIs2ZvM0eAwdSCq+bowYCVif7jwDr$sln&hBE~Ii?ho>CZBj5tJ#18DA zoM(vBj5>jc>p?w7KjF0P=GR|bs!vbp?H73aWip?~kEO$ArgL`>@Z6{5XM(3?j$~b% z7dI_FhPqqpGBhWQc0N^LMBj260@9vvj`@8^%RUVAynIx+p;`En%XQWzKZpa!lkFNitLFF;_6|0wX$F#ZB<~%G7MoWDG_}G1)hdaK#0ROj@BS zCtnc8l|2~64LM!O^(Rxw*pp{D``xcOH?8~iYX*N{2JCvpgifAH;|)p$BgY1i%%cam ziARUfV?(AfX$4xuHdTtOeql+~{B5`=xI@KeonuT3`dtV@;b7u7Z3M=CJxzgc=;U&h2 z)uyi94m2majFZ_l$|11~nfjca4aY@Rw7h$go_wvUmVs(QhnwOq4&xUwVkKG=m zW=|TIlO0I(H}$2B&KA_mD}b!AnogCf?CCvgXCinwh$dXsAz!m)xS9zrRPQTGMmD_` zT=kWtH!KD8_S;UT{zg9IV6$29uu8y9(b1yqPP)Xpmj)f>h;@5#7BysNHn_+%310iS zkuSkR$u6sDw2EmJg!UXvB1@hz3x?02ZewJLglTs}m|PUK+N;Lt8x{)&W*uZ|^QyU5 zJI`{mhulf<_9o7(S2h>pFW#Rd1)|a|u6OGvF(Ts4)!%x@EwF)x8SkCA{@>0w(2^QK z*-2Ys^vs!DKjKDP)1PyVk47>-#+EQum6r6mi#?sCHIR51deFo+hPDP>;HJkhM0@ly zhllTn3wqxf&8@sHNz)EZB9ivP1}Wp?%vz&#PJfUu6SjXDbNlWhj<<8C;m+$FhOIiy zwS3*eJeG{*40aZAnP)iWML=IFb;6B4-*}VTwXTC{{MyYqXnp1?MrcyYTRn+%x-PwJ zKabQIjA%%SzsXFtuohgmeZ)O4E)ysmP@>%>u5^&r0Zz&9DU+ntz&$@Tnn`@JL+ryE zE=+eFcQ@08t{Cq^XL35+&XlIQu4R^V5ObNk*d9r5C-x?4lT*1bb)!kC0&=}-JZX=4 z3KWYB=!FYsxeaH%$)Hqgx*=&Db3_zRzKv97F1uNhc5Sty&O+I8jm6ujvCU1Iy1PjRVj?oXal-@(L`eT zRD+zEbC`n*mE3Ekadl?`7IB|Hc{8T-$I;r=Bbi}oZrs}axtw{c0`1$bL1WG*(d~7& z1p(vwQPbD^1XbZ0)I_j?D>>uLlz)HBjFIppyCeL_wh_bW;X8@+d8`yQwcgrbwmGIj z@%&Kc#^xK$rc-Iml6TtNrx!Dbao$8CGO#15ACu|oeqnU_x(;qg9wCx8l0+xbpFHW> z)bOeN2ghB|C(O_cPSMw#oLn)KO1Fw=iR46TJEEAYx+Nu;NAnwe+~tYtz?o#_N@;TP zojS9rX+n2Wv(Zf&@l&1xY3#yxx9^^IaL=Y>aTl5;1g2K92b}q zt8aOn=hVrhPye}GT}TTTr(eP3S8ihVw9R8wJtmRL7aKSy)0xc5cUI){vgw>&k9uy$ z$wB07)nIZ*Pls-?983bVq8s+>on&5bcO+$l+$m#qM{p%Hi;?l}OPsgMQ%!MheA~30 zyXSm}S(1@oJ8h9Od8X9JWv4xESe0491l0E-{fBsP%6a3-NlPI;Ej-CMy&FhNdk8pJ zs|aRk;V`=4l{NKUI-ELvZm>-RD??%lBZI0dZu=+j|hV)D2CPp24d*NJP4w%)J;n zTr&MErkYpMwe{DrZSi`vv)cr7Y@dS2KN~Cyi}C9aecpNGTpW47gY!S>4aQ%-1ztYjE#4Ko)e> zFk0L!_!0eqY^L`C&CB7?nMiu|&3afjcQAwjaPsF{=-Gk*(ovPyaC7SbzJ)vo`5k*v zqSg>+);ytk;W~V7|M4JW1p-eg$DBMg2TbA?`=K4AfN%w z7hCY3mgM8&qE&G1d{5ZwX~WjpEJxpsP4uXxJ>BB2h9jN&L+Q-~kUEqNYfqV@eROa1 z_x^yghd)!Qj^SwADQjZD+dSq_@#9p}!@wu;U^uUD;4AJ2xTmB+zfG>}%uOFD$=!%)ma6R1e(JcT zaW3ChrH+q{3dj(42bL{60!Ay{V62V_J1FxCaivp1HY1V=YI{SSl&-*&7GqXL{vBu? zk!JHB2_aMCIa;o-AbP8%csnf>7*$q}I_7bZ5}1z{>Z-wDY&Yqj8j5@6PT(D zkI*V`y~F7eD=1%~#5<}MK=^A(PXFmfun1Y(5I~A?EHE%^?f|0p9Rw8s3Kx?f zqbWBhWDmSJGK8ynJDg<3xbfT1&E{xt;Dkvm zK6()YPNi0?f0zt^TG19G*7oA{UG$*g#dN$EbDXRy{sv)lk8uZ;*5T4gPIx>02)yjq z2MREQJ}kRJi$3&bw{N~fde}t6+Vk^CtW8fe`>`Lt4c-QxM`dWDl>$3@_zt)be+xeM z@+VqVj-++hBP!FrjXZxofLqZ$fE5nh4wsJ1hSYXBXco1SMGM>MaB~iBUik#RUKYIH zkprlitj30?B{1p1`Y88s9{jj93lA{E_{!8HXtV1)xwiT#1Cla)UXT`lyHN@nBqs5- z{a3>K^PL=f?I@;xuOm0!JZc!YV;9QXDX?=Lx|#DH%6M;|Ew8z~2=~r!CG5Jd+#B~} zXnaJIotUl2E?#873QN;Ky-bo_7N^c>tT2G4AEBt2T?{$H3P`UV36Q&4jcz=s$`eOt zw9athcJ;hVl7a{-6`J!eCoADNDOLXX2vyb=`eCH{15im*XRD4KqSx1U(&hRFkT7d3 z9Q3S$FA*c~U1$)ze><3UJiZ1dzSg2jKh&VVL^VlqeMLLAC6K)0Lr$8^V$I1t3MliH&%hiI(s}A^@WUz zl0w^@UYK(8D{TJSLJm4*Q16-N$?%)qG_Sgi+ziuZC)B3mL;GeJCT+x9Y!onii;U^8 zkuqFcdk%?d)q%EBIb61-6MR$W;OW4r==xrY|MWzG)o>n#{jXHu3HNI1v8XRBKkyd9 zj%njXi&Hc|S_Qmrp2R^7ZMf|72X5T687Ot{By_}Ipq!Zq_iR~!+~h;R9t65DBpY8n z*5r4l4C9|f=%K6DGj68odMbaggrwWH66WQ3(zK}z>W03i`R2#CHBS`K=Ibqct8p-n2fs^1fWJNgS{!z{$Q6Dz;mJsv(D_F!j@b_f0v0J*G z&Um#C3d>*O{r+p{?&%MhnZ4G~)6tvAflM`4{?;}Knp^`@hRTBPgHzCzDF;l+O)NZ< zfi!6z={fTT6elU7cWE%rt=GrU-fkGNzmb-kN8va5OgPq|z&=g7N8Gc6Nyxzf^m0$e zU8(u#Kf5o4bV%}5FCOE}fGn&oNaaUqtiUgc`%xmJFXlR)hZ|bYNRn|E%4Lp0uSqd5 zrpGt>JWZaRHDDt>|455C+*w6O(DiigiivF6Q3F!t9fYQVvtY&O1yt$qcGT27NBn}@ z@$hUPbewvN9g_4hzfeot(4KL@B&Avf&9G=SAMqV zL+IF|&MNw^Ba2&)!}^#p#BJ|$(o(6y?i0Ae#QHc;O-Kd7v}C+)HW@Y3cXN6pdH^?b z1YfU~fpctE!vn=}V70OsHpOhBQad!sh|s09>UtF@*c}6Y_9;wT8%V0J-lj@50vtH0 z3J)csT7(xDY(6)0qu{(m2Z1laD&Q z0;6PK(Sf>!cx+Q6ssC`C9@jm9s?G$H7k5*snwQkeL?( ztfpSWg4;^eq^*m%m4Cs0se53gx&p4dcnC^ertmU)%B*Rq0^ck8I1HRxfWFUl*rlUC zL7QPcJlS3YrHg8a$&p!fD?TFji_@T5tC((Tcg2?%9B`W9PCTKRMpoSQCTDMYLe{}^ z#Hh~-3@U$t(QEIKofXI7>X{~F(oUfMy?ofBuFF0*pN6qEjdU0<{gzF~0K- zd{!HU`wLewuSQlw;g=1tD|7-bg(G+);U!6Ea=_fjm5g~K;;o(2itv=~H%ujq(-s{FDR7536l74#aq z1XQa<)I=4q#bYH6c&ARAHt0Zz?0E2&*n_5=37SM&z~~<`@P5KFx_0aUzPR`na-uaD zt)RfVW$&kxFVv8UgJRh#9b*WrRzY>Cd2~-s2KhMGj+5$|4*?T)LjUvQnH3`r!Mhko zGPcPZ4`%DIhVwO{Dz^x}x$nfMWA}lKeK_b=EAWj&I;p~rt6*ZQ1=qgl!@XrvsPRDr z{n|X>Ox6^1?W~2lpE}9qqmMYxG<*Km4JU5+3`ufL#R}FqNFwL!4yK_MuzzY6Ru0aG z>u+u89i3OuF?~FlbMHCczMPGC>m*sHR|1h=uhCU5MclR93=EA}#G%<}Qg!MB?LTA< z4)WSVdXJZ98*1HQV?|#QJnI44I!@=mPG1QhC5_NbExh#y2-4Idy|-+phc}<2 zDviC!%H}WBVEhW)x3m{dKOGD9b#vJ-mHQ!bk`39dw-wb$JoC+C4SnwU1e^6*NSKUo~B0rELQ^uj|;Wc<8ZyC1kwPWuatcDuNr}g2N zE>T_MH=y|OOheDEQ=BpTlx)sRAUgG*B!5!b=ZLTWHb+X_s8jQDE z*U^2gStJ}z(r1rXVm|d?y>li*PU}H@I(h`|3;779z5B5#>%=`;eGP2h{S9`kY6Z`a zNAPwaWo}ODq9Vgm@^WDxwlLNT3%^w0rzi*hGd)j6Ynza|?%B}7Nuz&8|Hh9GgP>oL zG`mA|fPC)?z?Bz6Xsm}J?FtV@!+B{T+h5>a+8~m4`8{=4S0m1Q1nk0P+em_3A3pZ* zAZ#5m2A9*5Fk*9Wdeei&nei{dYG?)Fj(1Zkm`T~l`Rd#r)@ZDesAL*t#5$R{cAx$#B=jrBVEUm zo9rjZxVxU{B<`WY1xujpod$dHLJ?LyO~PJlb0A-%0BPnre6=c;jn=;koOC_t#*Nuvv`WKp4U9?hvLgseV&_|@xOAXirnzV8hJy@kVAC;vuVd3PIHeVawh zO|6LC%|v#|_&E(zYs~SI#Rpt9%mVr2VcZ9qFXRiim3$Q%!2M%7M1GnJ*H!VIe0p~X zbDV3?|8_38SI@$QRo5Y{PdYg(Q-^))HiG3Tf3Sa`iQ%l6FIZM$limP6Z|Niq>yY5J zqy7?H!0%2)3EP9+omqxcLItJ@ok`{|%j zei2l>cEzb&FKk+)8Nb$xSF2y*|u!M|pH(<3_4@4DDdm?vd zW5bAO_eQ_^spQ69Feo{W2cT%*Aqb)@d{ zE(a}xOC&ld0?+oLU{b3EN$f+kFE|5PYtF*O@7K`m3CnsXhVr+B`s_W) zMy&m`oE!de7dVY^#O-Ys`07m$_KQ*<8k{i&UAD1g;nM)P{IQ#v>01t2yJk{P#Q`YM zeF%G;AIR*=>w#fW-jG~062BLC!ZOJg%+8Cw=>KZrgf6 zRrWdH!z^+9g6Vk2ISwy9oPl#1mg9N_2YyfYNcQf0H`zO!(EC`?a<5PuxSHL;pMZxaJ_eBUJ$s z5(i-1`p|lxw)I#kHy`HCenD-*ROqC5OHPsZhPc;jAu)9wT_FS%_eUHx@faP`dPyLi z+!Ke-*Dd?F~?98O5>S@9Y166CJ#2iI;-hZEU1=#Z7o)ECY&E$*}Mb*VX5a6}oV zZOBJkV;68NI!KOYoD}!1$7DxQmBV&y7W95=h*#O2^vk7QyxMdQF0D^%kX7-9Xh3{( zUAbRD9Cf7R~db{P4g+U2KR>cNqo8BPTLx#hy zcdGQ6+LcwN*BJ+#6H!-DR*Wnv-Q&|%p5n8 z312rD_8cFI3NijrZ%Ht9WNf|FnoN>GhJ$cqB|biNhA^2gVD9)a*m}Q(+X%V8+lP%=P@nLYtbs!F$y9-Mf^&_8Fz9O@`^hr$4K#~V> z8}8f8F%I$tdx3qB3Tu3aLUDK>zGGhx=o-6;RB1oqEPAbh0h$lF*Ga2HO{6SvBs*!2c!D#%3Dt1Hli z&tZI2mx1cWJX9$wh7ds@PB<0F9=i6GF6!S-GV%&>)lda7XKN#I?{CpLR<&rcRDktr zk1@}_52+PQz?Ypf;r-ikvfpqSB;*VxKh{YQ;yfDU$wsm=Fq!rY@P?z-dd$K0ER?a& zBq`HxQgfds!KRp4C>|S6uXAU}hSqW#YM(_a7pS29`c#lRxDC@XzH@Jg`xup8$-71ZnLq6h~kv zruUIxJnEeAsiGZOe8dBG==hA9TzxZaX_aIHG_In-6ce_elO3LJQNwtfCUCkf z0{`*-z-v!`cFqL_d(zZ@rypF&cdR&jDiPs6(Hn*2bIQy~3Gm87;UBfZB&z#_?0q`S8q zoZlUXyvqGGo-c^<+!mN}pazy+tcKw>gUP*0z!RN5P~X}FI!12n)qKFG-TusqQDf*m)x%(P z`U*x%iqjLGslUnODVz=sa15WeH!kOTgj}G%;h{=LZD}J4355} z#BR>b;kA1{Bs%^puweZOcz(i`H1_?$oXZgN!?69>Mn&|_<0AT@wFFO^j%RJ`&eHcg zf_QxsEwc5S1Geki;NpyNu)BRXxCF}a?tK!-TKl!&=bOsE7u{kkCSSl6Yx-c;s-2kd zoQKP+KcKJWaPc{|*#)>x&xLmIk;>tp;1*=A^)f`kUM^) zd#x|F2;RcB^NoHw(a&+8ia)HLq`|K8{D!t|fy5}?7Gv9gVAGkic>I|RW-3d; z@jbq9Zk7{TN!xQPoJxL`#{=QZ9JGeX!m-0w zq1xM(^|?_>-3E*XOLhdYJbVeCtB(5Fi=fA+Tq3%>8y9ac#PxfVpz%;Dc=hq*R$Xev z@AZ?pd+%aN`I~hx;D@FlxvoF!8W_O`SKHG^uS)3X{9w?qt;IXWlN;9fu7{+l>#6)6 zZFpTe5z43@mKybC;|sJ|<0p3DzO^SW;o!zJE*L{cE!Tu@+cxUlPlR9B`9n#eI`A`3 zVpp*rxS3oO7*GF!o0;b@%Hkt7i!@+`FaT}Rd&AO0_Wb!%OUPx1dMKH7l}mQX2j%ZQ zNCD@>Zc*=#8K=FmAbcFvT3QPpE|Kt7DjM#EkD;*|<}_6ED2_85%5VQ+NQ0f+n9`~p zXz_hweZ6TIzC0*`hgeNi6`M)FE;08gut2rBn`o||2~vp__`$hG%)O-eUghohVptW1 z_^I>XgZ0FGtpM`wH*`dCCg=!-v>4^_>AI_1veH-xRj&0aEB@cg0JB@6d zKChqqp4bK+r@Kt{W9>OhR#VWmN!Q`GYxL*4sog~>8SG{-+Yg_L!R~5psFmoV_#Y&lFa`lkFc#iu4*ra5oNu zpD!cwaDr|f-wP{_RN)}&12}x`TriJ1N?ZMok_N>(m{K!mAv_tOB}T8BvcN5bhR zqv+9ovdHl(8XkYmVmk&Ga1)+wN2}AP!0)~z*zFz!-V$yY^WzL>eo~g{>{4Pr`keuR zrx%@+lS7v}6_b%$mw;tp3EWm10bWD5z}R*N__6PzLqS_36m7_Y(K6k5WUm0K%l$EH z<|cA=?S8mFs3$H6i-6ZL1HkG*49PyAhtZ=$p*p%)yq`Ag29*ei?7xUw1}=h2FI>Ro zeJge3Jka>b6gXKd#p=GgN(J#6{KBF&)JAycAJEndgT(<`=E9h*kH`mBNrUl((yE{_4n)T0-iKd^<{hQ?KUsJ7E_to2jI?DKNG(xVV| zwu?8n;KnL63R;Jzq4VHosx|4p*)904m&d(`?V!WIsqiai=c04icJggo2HpFp2QL?H z0bWKXu&3w^Q+UpU3G`4!CgC1+*sH-F77av8huv6X&w=i_-RS*xKc-f6;HfPqNhE)s zMz(A5QE;m6Rh$C7zo-;8`wO^OsRP*HZqA%Np^eud4%Syypi$v*a&yCSu0_g|whvNf z?y@sL-%N^U9t1=0GgIJ@Vk1*v>wy9H2Vr_f7w)%tLB8v>gI(WPD3y`spN^Jey#z(r zZugapDr&>iPu{^?x)t&^6Lw0NKNvW?f+F>PyyrH+IV0R?=~8cA>b(r>vhx*X zbzkA)oN792;RaAhFNT~}8MZdM9MA4PNVEKhJMq%q4-S(`;9JK-CTXh?sVxqM^FE95$p~#eJjMiT zL4_aHsmSKlO7bRLJ(6<++4|Mt^o(&XOrENN&dHbXe3S#6{UU;t`^DJr&JnCK>qk=x zccSKPCyeT41Y@#7AoSS_T23F1+lPx=vpXWkVW0B{Mo?=bwU2V zKlHv(hbJrRVf3*ne9WvNpj1?ej-$4cb47jFu;4HFcuzCgS$qvX=>s~AUJ3`2HF%%u zi!|;21e9OcpWio3k&j3+WbcIlewcQEeS8%p z?|z937ZXsgX+^zpwj{Gu6Mtl>v#)O1z%fTRbQU#&mf$WJ-|;|UiZAYQG+>K9`GL=& z^(fST2{#8H#O%19aB+tgJgw>`vPn%udaoo(8d$<x3){(}NVKr#br3{6O zcH*Ocelw6~Y2q=9q!gk}8?o+C#9g+>Ud)V!$hZNTGA<2|r?+Eu`*X*olEhY03L0 zJUKc7h8vs{oKk*)6X!^?Dz~1}yNY9Q%)GH&khn7G#{8fQiqixS+gU6hQU`j9=ZUOu z4t)PuLz^|b;NHPaFhKnz*1QzKjuqKh!tewV^-(dZo0u-Gq8A$ua`GL8Fmcd2E-_q) zvJLIzhORXpEYxR=6h*N1N;{uFE(|=kTm-h_13n8_PG4)t!)4pIkZCfTPE6Vf7F{Qa ze{37%CM|_awHCPU{3h}`TNgjf3dfBj{qf)=@%hkUJu$6$i0?k_g-_k_;ySAd1xF6z zGA@9eR1W5KUsYp!vn(#yvX<*u#p3F9MevgsRQbQBtvgwMQH2hOp51_H1sm|_mmhR` zo)joQoQadqcEBXNG7KDCfCqUc9I#D%p4c%L#%7P@vQJq+N%aaaPO6|iwt2yh^R>)$ z`;D}Rr!TDS+Kd|$dcd#~j=ZK;JrUgf0e5m=6X+q&zdEyzHY{p~T`6D4a%&b}ynjhH z4m4(tYh_`j+yE#sR^pp3G~u~s6aFTD2pJD$2%awm%iEaB?ecdQfR9nW2idQMnLH8MMJ16kxS zgfA?Yr@lir;S{bJ_?j$8TH}Y3kak4Ns^&59p!3ZiH591mMA82?}0hzA8{PJ6S zz^tN(_WpDW9XF4Ky%D+Se|s3Ky5t9wafap8S1GV6nd_lpC=b`Z%)(HwV@%4Wx3q#I zczd)HQ>i3G*Tac$UPk)YZ*LL_rF2^7SyH;7m2uyy#e2A9(|QjBV%nN7=APQT>Ntjf zGoT0O-YSnv4;kZYzD&IKMY3R=7Q3*jR?LN#!3ytWxKzA_cA0La7sIY%REjc6KReH? zQ`nD7lUKt1hl^ltYc4!HvmE=%?!$BIN5Hd-7tk>@2Tom{4cZ$XgHU=A8udwJdmQfr zKmQ|JQnU-#+f!KSVvT{X#C^{y8M9?87}vn9^r5K?yRYFX>{dvHvDPP`e5fgqq3ZnZ z3uk`*9)rz}{fLR}0y1RRTu|1Wg0~9?(Y&MTP*A2pbJWv8VbV4z%$h~N^e&i8a zo8d2<my9s7WokaB$Q{aoD7Z#@zJXTT;k^4o0Xt`~i%JMyQ z_>grlMi5Kajc)k)zdAX$SiVu^XeItwdld}*7%>+u!S!cWfWK`fGj`S*L7DqnsuDe& znE6Fg)i-*OutQDEuZ^Ks**aP~$PkV*XT|4*bjW!4kjc*74n={(;l+k(JbmB>3Vlwa z{=yk>#_^fpo6;RLG2+NB-DX0BTI_{vOQg9uVB3qr6mAaP^kDOGYG~XCZ5&p#fYD!5O7{-Pg-5AV=)JLi5LKE- z=joQx|>HX2YzOiJ(7x3_48y01GrJ z*RJ-FOX{y8(+`x^&E0;Z~Ky4yw z#omA;8y}J@O-tCeWu|bZ!UjW&eb7}<0pn!`L!Db6cGtZh_}=FhqtEvR+GxNB-$_HQ zmLV{1a8G>4n$Ys=GVF&}qaj=a;n<_Y6f#uNz;_o4ySG7TG=(ckHB_lJ4F}|AKwWDx zhLkGudnT6Bty31m=w5yJD|woHv)*dDO<^cWT&Kwl{&E2GpEqOFP<;|}{Vp0#+m3>8 zLz=BF&--c|!!<=KFml!%W(9ZFbpH%8oEgLy*&HL6iU+deq>aePgeELef5IFrNyVw&TVbt& zC413+1dOnj=Vu+N0+UT;klL{p4#*B-mqieCUeXRxjp=Ch*?}Dq=R$ z?2bNt_SpbTt>xiJ)F4_R(}tv_CrpiTWk;#@rq2(YfbnswAZ^xFytXRq+^0+?Lo8A7Y5{JCr3iAzBASZk)E}Pa9L)LfD z9f6O)!j;2^o@s2^;S8d2%9ZVM??rC=KOmAfhvMWuD`AfGTll!a1$@nPXl9QKXmv7bU@Abb=ZRW#4PX=jT!un&KO!s7F2YA zr)4i(#g>q;uLcn1*-nxoHsFIUdzhAWi&|_N4`1BH^sDnZ)a+N_<$7jOk7^GHkdx%c z+vxME)-^=<=ry#&2RFzd&_3-lPN@HZt)z0R}5+Ucfj_7A5i75fyuLb&}(cX{d7wcm;V;SmE9T88lnph zYLBTeUnETO;$c2?7DD+P0aZJy#6Pw}x*ToQ=Hug!!tkoLkLzAE4?fS?O3w}*p#`b- zMEP(ETGz`^34wO7?KFk+0T<~Z@6#a2EWs@ul_cTTNm5XB1cwYa;FUGgQR&HQct7V0 ztsE=@BU?$x-T#Aoe%@a0)R`eL$M*t-Gxx~uyOH?*XbdhfIf`9NxFlz{GW1^k0^OJ9 zVuztH{hpskZ!b?KqYcueuq+EjU8Pta(<*Szjb(Q{-N`ws^#*R@I}6X2Yq8AkdsK9n8=hWvpNz!+ zBKa#t@lqH8v*Bl?x z`jj}6`a+ETHm?1aIthZ1b1P1)ibe0-7|kuPPHm~@1PovG(Qe2Q;#UN_BSJ~&I@T=TVD zu@~=1!jsq3En5Q*wVZ?3Dw)JHcrMD=7UO!&YiQj&2jhIE!z{0SvhSM$TUJ;GM?;YF zu*L@ORd0lvA3|)(&~lviAQf)EJ4yxL9fwCZRhf^**N8Y%4z*jeNagKWOhC*eco4jT zy|iZbt+nz)B(L}g&o1YCV&!7t{L>TsT9F{*! zfUqY>j!w;oHFX16aCs^8uPmV@OTDo?;0ZFTT5!?P^Hgf?CKO>Z(7p0AF2UKLJLrf* zTtT+qTMoB(W|OYMLGsGBgXlivA-{C4z>%g4aHM-4O8kx>y*GrJwy8!OL0>QY;^@jO zEx!gEG_ryB;16g!e1WTWd;yeq>wwosSJ-hj3>H_Iu*Y-t;iAf7PFZw4X@6LOex_cW zt9F*mp7CuNjUw<^6-cFHR>D60OK@!2LpWJ)j8m7Wvldyokd@VjYnJJuf5{VQf1XYE zWtKwWSz~g0&m`8XKL@TposVzoHo=4y(-@IGN4fkiLF7=}5z0(R#oW{(m@su6R^JXJ zd!;q#S&^M^@z9RT zSUT+?X`Hbc)z|E!s(Ko*$1(%8!WdRpDVyFc;KT43Yxa+e3?_6RMgBrvHeIp@H@d!p zy@EFMWTO~OUvv>`$97=T*c2M?(}VFotI#~;11h*3gKSv?Fxl7vgH^n+*pQ&To*nbY z;S3magXqCZed_u^oUP>rLRPK0y0& z8@Q3m(jap@2=`uo1A}=s+yh5~=)mKC?m&4td~?o)A1|V?qDp|#@l8gNly9)=WeLRY z9_4;D-i7XOm*ckay86v3xZd8Z7re;^ysPfZC{+F?JL?-^qE#yGn-T>1594U@dIfx! zxB$IRFGNq*4vf0;4SfUTX^Ei{nsncWo34hqWKROz)RusHV`&-@hZs3K7N)XR?8p9J zU_a)G{$Ahfk53XogRU_Aq|CgRJAm6R=zyH#2M}GEN{2abuwuCg zJFsyo;~3QpFRbeD{#t&SVmQw{3TCItv#mGP z;7j;bD!V8ejo+o?^O}V?p*xh`Xi~vNb1I=Se=XiijKz2SOCfMVHa5@Pi|bYnqj%II z_{g~o*`rEmT_VDqIBrXBN$$X2l~f{LE61E}(Li~#UYfU?gEw-mEE++RDGgqRiCj6h z{*?)%w`VK-lsCc~Ql4;a-l6~SsG~Q=s8~=bC!^0FYtM;6Ys@7Y{+zEtIp!nZ+*p0bi7=+V09Ll0L)+>!&N59kCiIRVHh<7!*3@i49@7TW z|7j}q(dA`Z8^h7Gyco_u-vxs^$Dqfo2QTeRC;B4O*?mvG!IQ>*$~~5e3$n~ujowlc zk#9@4W_ZDlw!>8Z%OQ)H#ckx^@l)J4&#%K^_&%6_wVWPI8=!MO=Wv4e_0v^8Q6wbZ z3~V@0X%)DUEp^ryaC;0UE>LC2Cm|XqpGH=U)^Y2d^_a_5=CE{LJI3dqM=_yf)I5}g zdm2pWDQ9_@Q-2S8r`eHR>l0v&^KyLYo`D++D#w4zTfna~!LoA+HVGet6X(OwE-j31 zT~J44D*^f59*62xhQw_`H}wi`g-_F~iBpObsd+hwsh1mJ-ZE84n0ODR6xM@rNR;`! zWr!Mh9L_4guh;Pmz_K6)NR1=(JF8jzz1E{S@jmtY9R(LxX)p#ITk*i%R_?`uDDchf zrXsOhal8-A=(EF|P(fy>B6$2aO~9V@Gx7AGFq+&p#8b|}xcRpRysHn#DSAU(eYHd2 zZaaXfS&JF7+Y0396D43J{bwAyg}bZ`sR=v{;)WvQW50=QT&f9W?{;x59cRHB9btCLwZ(utN|$fA^^LdkHGSnm&DC?Em$s; zV4sGjaowKWrJI`uP-IODae0`GR%{M4v+H+#+k0!|cwR!T!#bE8v z(=_|E13tE2MY1wQ*h2epof&zDTG&}Z#PR~DRY*kty7M$)^C!ICqzwLLwlIDS%``g95Wt{V%39c@_1ieQCAjDi1R?g>R4A1l8;`^)EY>xsm=i^(*dUF^`)Gk6<%3b8$ zewHK#O2HR>8TN2!Hg#QinbtO5r;5f!SX5krrc;Ap`P@ugXwIcOj;(+tnHONIkS&0~ zElhm#gfmGo3uu2U$^4?usQ+0`7WFN~G`NE$1Da@Fwi^DVPR5x1TnMP{V}CwQLyo&J zQ&J%T$4)#W!=cg`%Q=gyOs|o)_nByarkecLE5X;|;V^!l!g$;P6Q2jg)*|>)mO$?_ zA2?*3P3mOt5k99)@J%5IFFou)#=?p0yBdzCD5@Hn4dzBNINED45yciXJNnOzeW*mT90_tBm70g{0oegtco1Xk$(s*Jm3* zPR?@pyDmenNR91cCg8>?>0qA}h5X9u7+_mqUMjVKQ|u|nj2}Oyj3tccx;1QshdsJo zcjU~QYz;^4?qE!Ll=(Mb5f~rm|HvJJx|NXjHUlTDC_;HhE6~vY0%0PLNY{V{CoGsi zzhyD``#j!??Kh}-iWEbGQ~{}j+>lA-iU3~^tcZ_HSWDuj5rrpY+;?gceJZ~$BI>0Bq?Dfry}G?t&Q;wsJFOcmp_;Y)2SJaYBG)$4xNUp~}NE{>|x`n#X$ zvdtA7t6eT+gOf0BeYb;#P7>y5Ireb}J}%E=&ZJibjiP%76 z*c?>*FsFXm76HZ%ifBPsA3Ugxrn{64$mX9HNZFey=y<7Tyk2O)x@ZoXEfxa%3-O>{ z5(gKbiQuo^IvRiY6fO}y3JdOyrzNKntX~+*Q3`jZ*B|X-t`=Uvn!qV|r!IjrqG$nE z8!AC^{t)#D5CV_H^IXHD@whjzgFek8bf^3lXef23HG*=G+|j_DyE=}%n`ur*EdnrT z`vu(0=D~s*Ui!@|hdXt}7xRRD!g%HEQR4kbovQF0BR>L%&Fe!Z;N5{C&csPgv?u8Z zu*223E?ODRm>(t%VH4nPst>F>&7q8sD6AWkMVB5=V4d#4XtFjC=NY(Pj=$brzj(Y> zXQKIq6I3uKiFg<0L+e~e^7lFLCr>A0%!eX4W3mD=8r~5B4|g>A@Qs9+Kc;hbgweTP z`!R?HP@|{uWK{J%U0*EBJs)$I-o(hdhB zxUE--2PfaBZ@a!>%$9WQ_>@O}^N-^9NHZ)R5@p{c_~OD)NtAd?$<7f4+F~uj4tB0b zp1D@IIP)wmWjCRKmnzk-BVXBcs7rrjxXSssHlu-v4>Wcp5EPj+fCn@W5j z*z`Pd)FNQ{t6Q|SFbs{{TS-M}C%(Pm3m)R@A+hf_&Un8TkNvL1o)s)8-8zgNyH?^B z{v*({l90QdXHY9m4m=e6;K$^1sIMl^-mL5Z)s-Q%s%|~YW`}SNd@qJyrUtC;&}>|L zIUj#tZ-XF*KJKuQ2Piacgl(b4^i19y>hwAjJKOie*Ugj3TmPxxKidk8wfPyVPqV?{ z?m=c^Fb|VwwGDQ3Ou_!Qn#{DS>9lu#0?p1hgaR`moF5)ZJjIWb9CIP+_jxroggt?` zp9QE}P7+LT%OuMfB@V}s7Z*M#f+r1uWXyN~YWR~O&r1TfPm)Hj3K$ug=~x!F4!7e$tw3(ZpfahO>q_4J7GrFlhvL%bMtE&Y15FB7 zB}WUZ@!T~-*ltd-IqGD+c4!>=efkB?T{%X)3e!c z{#rE{?pjFvr*x6?DxtVU2`9Vy3bJJX^_5zr39ig;1owDomXvon_qIcyL>}rk#e*RbpSR+jN2MC?n zy^wreUpFtpX8~L*63iXyRz=!HHKvlby zq!*t@!_r64C8SMK7oWUroOgq0=nO*i$IF=S@C;8}ccVrh^Qq3NR(P0OgmWGASo^L_ zye%CMTU{$rWt|55=-MBs3^|COTJ+eCyc&FQo*%W}pCDHP_QKZ*Doiq8GQB&v34*u! zb5i75VKht!4%2h+{D2q4@#W*|OPZ{zju<>~ET_8r-V@G{G`qFK7Lpds17@29yI)t3 z6P)dW!Vbgm`M55IT#v+uGMc0<{T>}FwZf-|rP;`iiKyc6h_%j6-_-48p zSLpLGf4~*ieN901Utful(g;l0nFU@VwRmn=8k>hq!Kow#l@D~`v-e(jd0!%4`~8XV z^DD6fW>-mPL_PTT=#XhMp2M|Q3z4tl9wxlc8`m!qpzigJu0E24JugB?Z21*9F!KVw zO1ngOn)*0S%|mo3H5_a0>c;Dv68wyJWv5srVNIDUv`+{}t+Ec(={3PaU-?k%_zb*t z>j@6$WI#pEO896XPd=`aWqMy}BX4*Mnt~l^%>7D2&I&QdbXu@f`YlPAD1=dD1;|*B z(A+I8;1%ouF+=*$tW!jvTNuE^%LnMo-cbBeBZq%rB4ed=101&=MwLkmY7V9?g(KR5 z^&Tpd;bp`Yklm!o?6oMM?=AV5!ueZqZ(BL6w~B%5MpodrDjbC@KOs+P3bgPa;JzA^ z8rR38gw4>WEqgAp&tKogBRSUiKEs?Nap)J=`_IR_shxCo!xqR2iiffT*T;R(YAAd? zlQ~@f0yE@GNT#kmem&Fy%eM5<{Lfjm{)7%I8Mqa#yn<-S0(%m_EEe^?-lH|iDmd-H z4*KwOH&f0ib>gd@mNz*OxnS#$XRfY&}JP=-5N#)*(7A<1<B9@uGYMvnR_vLB;;w`ki&|5C16r{iRgD8hZ%cKSRNmd2AA!+Xmi}039{Zq3d^RE z)+agS!_lB|KeU+fV>R$GZzjwKzl`hDbHrg=9PHKT#^^`Of$#lZG8%Xb>qg4R zj}yYojQghgy4bw=vauD{3jsBenpl6TZ9o>DTcxOFVAWD`Q%wZ!9m%ChVzPX1)c09x>mv`f|50Svrf0Vo} z0wSNf7L87OqQ}v64zp(5PY3fs)Y0j1$*qCwD7*$fiJ8M}k8Bt-&tN6qcVNye8`PXA z57Cdr;KVwX{#{Gow%LYSe^Mr#h&0UW%cleKw`e)}Ox`^I3I{?m@P^F>nC;C62h*al z=4B1Y%*;heix>E|j{)D5O}K7OD@eVl=4w_|LEUbs%QIy&htnv2J&*17 zYvJzOV@U1}!`JD`sA>{L{=Wa9D=h-rUx%Z^iR*MnnN@Gqakb+9VDMBFAX4Z76+cq=&XHq0r%#PI+FK;ewD$^v5UO@q9ah zZPd+0K474Efd{;#L)7i3Bi`&$#Hx*{cuwyL9nY&!F*}nUS5d;0qKO!Mh)X=3;^2YS zGN?5Up%@*91un{%B-=%o9o$HCTk1HseV&lHiS{skaS8YH#>Zr$(=yuD^@1o{nqXf> zZk>IrG`63=MxDDFbg#s5_4U2H8!?N-Er3~$YF{qG4&^_W2SUPFJ z7nu%DVB!Moofi&i6+iK~3Lks!ju^Vuy1|(>MyNp2>0?ghcwUc!-+B|__KUgraFrBu z+r$VjpJ<>l!8gg~RScM9xP#gI6pY?w!w!KHe7clOJ}5DeQWS^!!CmBYi8Z_XydHBi z)QE1BYoo6==`yjrzv$G+F|2)}MppDp$F(ntx%XBVW2r<5ar2eMjvK|`)bERYod>aI z!z*rLY7G@T?g3ERPMrEBA#argwHw^VnDq$2CDxzJbW;GY&gBs9uMInnz2jUXrZAyl zHyL?!m43n1U^@}1%}pz+)z$>O5A@(yQ7iO+n#x{nD1_N1|jh=STUl; zy7?ACz!nX(b`8J-I__ke(*=4y$&d4>P6_+pSwUJ^2^o|z1G6eC*yAP3Xu76SY1t=m zS6h%deXNK4a28{1YjxRUqI^u%?rp&LE|DwrW-i;(f07vI`~o#0H&SOU2J!-7bj5ZZ zY~CWo#G^XqJ+;J)sAFJWwtKwR&&O-?%CPj(Rq|AS6%lDF!M6EYtW3xZxa+tOwuYSM z7M4je!!xE~LRAw;AGt^FEf0b9;`-q7Z3jMZI10I$PdUv)a&SieG>Ckk)9~S0^51ot zkhQg)qmws-4Q#K%p;jH7=sJ<9h;*Xy`eKak!Fv|Heu;3cg}~&dP$IUc5M3na;a&?D z2o3iHg+BnHdK*EgTN20Y!>G!%GkCsL6&fyXK>4O-@Sm;)le909!80K+iC>JUmia}T+h0LmpCJ?GbO>*ymSgog zIqY6A4-2F;IO%Uw;8Er(cB$KUZtUOSB`Ce-|$1AAmG1eU4lEbaqCyA-l>$9Qt+~p(Z;zFkm=?irzMb%vdqh>$Jy}KTdMD zRqkeY_eYcA31+y^#THxgj^Xd~Q*iCH)z};M5`HPBGEMY{~9$>l?dQoC19Yb^?NO$vfx^2D~ntn+_jo!)Fs<@qe8@PmbA3H(v7A<_A z`V!6-eS%Tv_awtkAMgGtqyw@O*=0FB*j~O4H`qI7L6jpBJuytN0?!lzu^+eWv27v$sh^yfN?;aERgiY1p`< z01iJAW7GCpvxn!tgWYX0s1IAD{-EZJ zVAyUHNtclrw(OTMqr%RIAopY}DO7@m6_?3?crUG47mp3VGGzKxI56d5B+16Xza8MqblvmyQS z;BlE81nU~2)TkqVEt<=I-)07f+7)QiE@ig;sxb4g<`W4oO+f#rli8y90yy}6A4L6B zBerrmD6ZLpwk03Pna!$9wAok8ipxc<@&>q~oPoEudyVVI7|0G?!Sbxh!bj`$z-Q$c zox>Xpqsyfj&GMZb_xJs1Hg*n=yqv&9EL@9S^HzcW{v2}p(Mps-NoHM)DbqZ+5RU59 zki3MHJrGe%U zHFSmJ5&8sWB~!WD6_?@N?jtalb%#R1dk8*muv^XoTSw&C;Qlk{@MkvE_J1N~i|X;a zk`t#tsv1k&`g!6!-TiQXeJt4_k_k~A=HRfJi?{Dp&|QbS zu&TY2auXV0$v%6`lO3gh+<9=&YAq;5eZ@Ja3EShBNA?+i!p~!)V0)*O>=c^Kym>2v zKcgp+UtAtGZu1?w?CTt2l(q`)bh2oF*9ns!ir^$iF{l)dhK=p*@WDivo%AM?KKPjf z%`4KN@CwD4-)G>ob_rZ-eGgtU&)`zm8qgiygH;y}q1Vh0SoYZ$J`Hj4YM~p$>Qz9< z`A{0(IuD+!p1|PR8gK}4;+i%$foIm~@t6^#+28xYMY0A4#yp_0yM)XCT!1OrG@a{Q z%uD#qn$b2r5<5&sX~0k`eqK8YFJ%v6qJ0T&$+2UWZ0&;PEk3YDLJA)5)`c&h(}1s& z;;fe!iPmceX8&jqn|djjS{e0FZPie4Zj^ymMH70%CK_Cs2gW)Oc-v@nnOTckomFLntqtq54t}0Ksvf@ zybc}%k2Sok&BjaM{ELCy>W5_Hlpm{fKMOjFYe?zRXPj|Ogr@!HNOSO7^7LU7bh;N~ z)7EZ0e`W&gKb%F57W<&PyE~rQpw1@Ei6;*_g0Ski8rt1zhpl36oLSEu;nbaCvfL{K z|88r)TnNpYFW~XiGGxy7;3?-Cj0%jP`|O_KdH?LcYvHl+z9Ktmtt~ra@GDs$E{YxV zF3_`Mc@X!bh!gyt;?z(v=7?)M^q2|a?v~vkyVsHx4w}t6D++?9%Q?LC?G9aIv!#aZ%?Be`>&c`M(xKMbGs+^z7F8W+-vGlqw8TnyHE;jK&;l^5O zabrF_+w2ViOJt$<*=!W>c4HlMPol=MICyf3B?2DT$Mu68b0EYX&tC|}VgqyVemsvH zdYtmNCgSWLJK>Q{9X58`V4qMD)LxZgqMHcnP)5VYM%EFEo8I&TRW z@>OHio)h?Youx$2A^3am0R|P&H zBGv^{P;+`NdO6-9o$i6Ku=OY%EqhC?|N8v(IDz{M81*h%L zft&4N7IVUsz#||X9lPIxOa3d+A3R96z1>=W=kjtexVVvRe;J1fS!MKj@ z6TyDHIa75bj=OH`Fub&VfG7AyK(?xd@;PP0-*Z<_J`h^=EMdM@Eo37v-i4Fxy|Db} zG3xt9mCbQ2M;p$3lu|f_84U@TyF~>4K9|qEc@0D(2gk>%me#bz0Jrihm+|?5PUU>; z=I_}!?j*VXQiq_!E(f*VB$2=VN=-$T&R$L6o2fqYXvZ9;SyKa=GK`6o?>zQ`u@t*> zS}xrG5W|^zf4IIU=?KOq@-&Dp*CA_8$b&;z7?f$rvM!Rr7{C1%B)qq0JEokVw?c|w z!AW`6ZoUK8J}C_au9e{$>x6MluFS4FHG(8Z|t|2v{rRY+xh@&=9ppmJ-%C3`Oq6RMGjrKU$7}E?N zn)m(kG<%*9PmPP6LxPCV-9U>gx*?ScJ1|O!n7T#zkWrGH7&AYyt_)d8!HUZ z-T1Ku|8zZOvF~))XnqSm>(5UO(Q(7D3|7x!F7dAVg& z91nrN`&|-x_c3@pSwtlRvVcb;4HBi&21GW|N z_}d|uaHJ2A##mF_uC0RsE1!Yci$$1`(nZ(r7h%q=Ud5!%NWrexs?0T+w>bHM9WD_N z!k}NpkfbAt&c6pRsBaT3@jMJkLtkizpgvozJda9cO=F%dOW_*jyo9I1y3COK4k9}L zIcV_pf|l$Sc*h%sPqduL&dH5*+Uz7KyDh_}G@PzCKlg)r^J{`^a|SJYl1<)6_mF!^ z3>}I|K)tg{Ovr*{)@G!KgucHD!|gw@@FPD8xKC#UM59qzdLmc%ZYbW^@r_K6)#Q8? zx`4?^S-2?647JNf;kYM)T+O&P>riIfj`w1$!)g>`KO%oaB361h;;_JF?BDbPcCMU@ z3m@-;^zvRj?OX|-=^_kosu?TztB%-Z>_zL;IjH!g2z&2Y5jm+V)O}(O;*tn@`nCj| z=Qm}eaw<_oL>uQ?Ye6JihZ1ms{20zeOI$lp3%-1<{;q{J)9b-eSnljiQ+f zXJL+gI43bd3*B2^&@B$Xx#yzx6V3K)w5nUp)PGilw(2x`smlfjN)w2+4j(4(Z-b|C zlI+P-C&5E99;(8oF@rI+U@>*lm4xDvGrz};b=xi*Qt&hY_pJ!s@>o_`PbRj#` z3sfkZVYa=QNDaP60Vw9c)B0J^@7;lmRW{Sv)}f$jr-AY%&1CtZ(>P&uHmt5+!mN9x zN{bpcz=^gqWK~lVd0vu^vnL9(qe8+Y+qan|sy5H7VA zhLC0v*7WWq#(E^2#5>BOc={)>cO0O0g%x18bRV`wo+1v_*WsJ-FB+h}h)O(?!80vB zIHh~P#bCcLZj6hddyFe!i_Q=}3J?LU7YCW}Khs(B=thuYb7*ACE4Ue^j6aqPbC-_E zLaG-J$tVj%Sh^EjTfO0yEDJMrroe_dy?D=GfbA|7V-Ve3ImYk}d(T>(Nx0 zHI0u+o|{*Hd(tvynXnDHwvz+0ht$Zu4fDX`LMZr4i;ru&m%#3}VVWuoV8r}IRMNjFQeAZ@kU<#w$-QrwKyV ziW~5fc?2i(Uf>y>Z(N&MDRiP?D!rv?4?Z&CxJjoADtZQ?p4)3tFBy&c(bgEW{v>o> z`HT`rd_aA7A!g|(ag}^Z;D>8DY;>umcdx`l_N(tueLD)4I$Y)QXm!K4DJ!A*bP)I2 zwoLj$--roWj5t9nlJ1?g2rVb|kyirkpg6A?Ef4#Xx5oQ{#vh=OJJgv@T_Z@|G7s7{ zE9trmk>uXbUK|_L;A}jl%of-PGV}a$pl5L$x)sbv)wCDfEgJD~V75Br8KDkkq#q+T zFC#K%XD~eiGnftUjzX?p677;C7+lxQ@wVCnEgxSHsY#pg+hV|&R|?y%J2v0#keMKBVh@5u}rH7wQux+-M~fq!ZZt%CNmRvyU3Clj8lSwxF0J^`P#5uXg@ThMr{Jwk@$EG$=?Vq=~r$PXe(}v*FmM`?1 z=`XI~c|Z1)#3Uvo%9--N3L{fOK5|^st?Rr*6#nC#!lHGXL2$De*R*YwMc6_f=GPS` zkoJ^AS4&V0R!#67_f99_F8?s{1((p@cjiH=nbTZ{)4BF&m@!MbrICRf>v26ex3LV;#N+8pWbyGZki-}U-uaFXIWCwNFH|Q z?5k8n>AS`Ad%x?|_6Xv;OaX|90(|2Q&|hN*$v0=Sp_lW}hPgsj_>$4tZu_`T+zNMY zKjJioHH>Tc!&sUtgUp&D$eff6JjW1rzq*W;3!lKkmsQl<{wUlSS@iclAcWr8!pjzK8%Xht7RzqSo(&&|X$@pGByf%?quSEI1s-5Mm5BjMMB zKN!nt0o7l#iBW7njnC&`(XOAIsy+K*QcN63bk)Gp^O5jH@&aCeHWT|qzH&{Ef54`W z0GzJl0P}6qse=DpOnWO2v-mUdp!!i5FgXS9H4C_jf=Tc|AcAYqQ3jn6daS68GP}G{ z67SrR#>@M}nV0To&~@T4%sLuKM85OjwqyeoUv>mLo@~Q6NlJKd&=OZ##KS3PE9Ucn zBlqM+Y0SE`73$x80;481rmOTh?ppr~TD_9MC^iyKtKTJdl{&a)W;%GKhQff#Y1p(* z4p${y#^D*ljOrtOc4k%wICg6?t5=@nj>is8ar;X6l`HVNM>F~RjMZTHK3uC1MgQKr z-prQ6LVp!z`UyYuIV#Hu)h{EnSO21N^3pI{b+&V+6f@iS9=+P|0-G}b z;1z!z=7hr~{4U{v#QF!#jx`2B=_+V%6C)G19EO4G#!yu*#AJT?1aEFcQQPU7FzM9z z_^GN=o>Eyh=<^-AJ*0m;p2QkXr(J-V!{rcSq60glr{aa>NAQr*Zqi{Igay-YamGq( zAPAq(T%p-abnI$0=y*aZq^6I1@56Y(u$wa|vlsFo*2CX;={VO3_m^;pwT(VIxIzHU zGS0$^J^3Ug=q!d7Y)6TZgCv8-AWp3+CX`y33H=%&L;DBpTJwKNwB{91NEEeL$@b<#9LBAtnXtfxZRmYAJ#YG zJZ>hvQe{a47H7h62cgsBm%>}MNQ@NkptUbXIi)tSxW>qZ>fM~gD7}*gqnSNq``t9I zr;G_B_R)%+@KOT{I}<=}%N=C=JxQhgZQ{M?1jY)r!V9Vm*G!k-zSV)a+h`0It@wbp z+#Iy1or1qKHlXbiL3T^_C9uDJ88A8*j;vXM%g?(2>pzpZxa2T=$k;|sc-B%S@mDya zTT5HbmolMa>a=acELQvF4ZQJS0CawZ!*u)o_&`n!Y%424Ezl1Vn=1OvvF{`v$2@Pwp zQ`eEfQB^kSV>LIRsG34#6YAck-0ylytXFRh;hFT4My_;Yj3h7O1haI)#yki1OeZze z%EXmZG}s%Cgmpd4Vx-d_QaB)lKTDMG#q9l9xvl~FEDoYl$_JQR!ppwpU5Js{4_nYv19TlnOR=cESCcdl21ul@s@29jtu5 z9g@4`;Zb4xR4`>Gv)Ec6Dyjm(QThVrtmb7eEf^(^!R{#EJ`sQK*at;vNAabo4!-2g z!4{+QP(F`Ruwnz=qK zgALXpFn{^@JP0j>6N*!r88f^fJ;4e#l*G{+#eJ~#)&_RyX%FmP83DP!+i~>EI{0{H z37K7IL}qjVbbH#Na;iSsRHfCAZc)W)+XSHgO%S7zatRjHrcn>WXPo|`Ma=Q2VoaV_ zPqWu=hb&qH@+VHfiOoFtC2Tet3yWhHHw}J2u0bb@!}#aY1xWeb0LOh4ac`L`O8d*x z4>KD{;9@cSyjz?#H|)jQZM8S*x5>Wl3K;%zm^!JwBkNbx<3OS&9(wT- zdp)0{@gSu?OP*0eEBI&%Gsn{Eq4~2m zB%8^ih2(GSkCb83Ki&W}vm2Zb+700FdOAEkw+FW$-!Jn3AUss2{>Af za8&;|_h>{BT)e0bof6Zjh^r`y`0%mSt+|*XIEVB5k`tBWq;hy#mC<3U6~@1JMn?z1 zdW|?!+|pb-u6Kq&N@g0PX~NIFxM~mdh5w<|SHI(w)-vwC4`H}s?Ih%WPJ$m|Jg6@# z#%^~;a0(K@Og&(-e&6AiZd}7m4=qIg5;gpKZj`LP@PNDD=@0G}IYYcYETn#>CLppY z2M;ymK*HJ>9QID-y6FD`OS3aL^4J*z76sEsX&1S2yQ7ugV&<fkA3q%Lr<7l%avh^a0KOk{eiukl_^`e5tU;%z}X%p z=H=8ocx%lSxVggx7WD&uT2RjMtruh%o?%#Sa)@577r>{6r_p|qDEs5}L}txZN*6vd zgn7@Dfp_mxI9wgXtehkbZ~F~ccjrW$w|)U0Iq8T^m);Wb6vXGQqA(UJ##pxN%hH=j?5#L#Zwv)h-DGH%5Fht7+0;V+8Pt3kprHE#c9a^j(mOA}J0V4q z=)RSmp3w%Ad!E68IEEEfI*g7>rqgEW=iI#3X!^5f3U}7veUynTz;f3KSjlq`*0@!W z{@7idbBa@#5o>AgH{}}`eXR#V5UgfaRR7|h7*jky z15@_pfc!^(mIgdVean-eQd~woMk_Eyw1e^%R}z7lC!uMi9p=nEi&f1Y^i7f$sHLhh z0WF6xo!10?2CeCoGcT!&odNcncj8#2J2nn5Fn0eKYj}YNwT@?VRqh`~gXQPodlE0} z7^R9|Q&dRF*Q4-A$R39t+cATatjKDWBPb{_k3Fz_wMDAg8#MR32e)o*frpyY>9et= zaA>g&cw5ZH$3vST;q)>##9ES-ZWcymm0&cRvW!t4c4N!LL>Qj`8QA0F>g*Ee2l@9v{@3<@68@X{_i}b|^YWh+5FGGdF#o2;#{cK@ zZ*1_tn|~8y6GNV9bNvW6fdPKbUZSo(-s9~y57z(>A8%1N?*L~HuU+GfD9=Cs zzs!FV|K2{{TYa6kZFAeU)!TWOo4>EKtK0u0>EOQ`{|2W2+xRz}V=~A1U;O`%K=(hh zzNl`1^LGD#!T$f}_z&>$bo2gyqy8Hj{2%JSiNU}6|38HJ^Viwh*sk1QJAb~Yv#76| zpZ|FIpC!7($IstQbbNDX;4WW(QJ+0-exfdZe@_t6Z9YNXqRt+EzFyAWZlWIkqBBLu zm4k=3hxc~T9pitx`}mEkiCw;K0UrOMC7c69w|RJv|G$slHaEY&dpUcH`s{M^atjC; z?;|5DFC{_&50{No@0_{Tr~@sEG};~)R{$3On@kAM8*AOHBrKmPHLfBfSg d|MyLfy>ssfzKIgfvbNH;iJwt+h{8s4uhld0U{l7H~1;gCb^vBi0-2BJY(D>)Q zV3-&i8wwejm>XIc8Jk*|7zr5~nV6bc2&o$i{V$A&aGpn)y1G!12amVnU&sl>e0=_m zlY;Tr>;F&2YH4-B(Dd@)d4zlO+{3&*yu8CSt<--$|1rl~8R{)~q8T0>=%+dU56?pa zyo25Sg1x+>y~D!)PUmm)|NQ*#*Ypcs5$Nsi=k2=o45 zH1dD2k>P$pet{lge!P|LJ`us5KLX;P6X!1h6vA^44GZy#@Z@R!ay6cJG|%0~&pXh| z-8*Qxx7RA(10TO=UPPF8xWQi+4L4X37!v8NZ$4RHu&|eYxJOWE zptn9Rny0^FmFAxUh4_2_*y>*g*WaA{ZqD~?70CE_1crP6lUU4uRC|7`iy;!ppQM*#m}A|#Y22;5b_46olK!^2C^IRbee|CmmJVSkzo0nS4hWoh+tk&Jb59WA%X6Zg5DvhM_gfnfUB%vCeV+^6A&8#FYxmS z=87b_N(&x-uLeI@R5({OhAXDyI^g@(`G$n?xC3U0xQYty{DOI0@fl*SV&B&Y30}dK zm?0JsA;9z$zy$s*)Zx;z!UWCA+soF^lPAp;8t2Rv)^RlwXt@8P=js1_i~F4|>i-OF zu860A273DHyGQW+0>h;R6nA-qpFmr{Xu?Cn+#`*pxuQBct`a}qaY&fQ3U991PZeqV zzz`4K+@BAnxdU7W3uKH;4b6>=1aWL^JaOVgL$3G_r#wR=xDqqfRh@WT$vHEGW(=P> zUO15}CHR~nxJoDces^IYk1ONMm31ZmogeOhYhZuzX^^kLWM47ge>BO$$lTnNJJ?rZ zrmKj+lp)_u5&2Sw{$ZyZcYN`#Z7HH~h;MjQVF} zLrV(_V?#4T3uCUD09j0cto{RehDah;LvRuJulW;Qi^m=9%pLPHr2ZT9A6}3BmDlEm z#wNxSEzP;wzk99oyVv8sd#(EuSC7XX@64UxYWf}b7mW4ax~=~!Xd`n|b7NBzW3GV! zT2cUQ_|t8%M6S^cvF~mh|AaT;aZR1MX0FEnHT)01&HoJC+|1mNYw^3^6My&H^1I(w zKY^`zT#hrBxa$Ad?_V8z{?udm6Hq~{8ya#M0jP`sbkcWF(eEDH{PcM8Pv9v$uB|iI z&UO5M0sO;P`#*y+q4 z<2ys+&iXMwTgUZ#<>k(a;X3~)JAWyyb7u&LyAJ)6jO5OX;m-H{A)@*ri+)O|{OhFRE{@?Y`GY6_ar$wW#&F$#Dvtjz^On0T zhU@WDb@abe^$H6K4T<1!m&b5Df2xfA4^;#>UNKznpGtB+UcCn|%rE+Hr+=jN~urgp{|w*zdIU zlZ^#KaHKY_@~VfAr`yO9 ztsWx!yPmtG3xtx(ka@u8uWLW<8-fx8^r^b)&wJK(4_bvzU}r{}fZbP5SOOtr?3=p~ z66=V1GEbo|5(!Vemvc8r0@p7-gK-8mG^gjoW#-BW z4N`e%9DAhYYSYY~Dzf@jJx7JV9KHpHjgPzG= zN_uKDnr;-0U^~b&kSyPiLJebZS^Z<0;j6}``%h)P(wl&@eFu5IdL}K2Q$@9Le2|t? zV3=8JkejK>?4BZqeL632cd`(Kn+&E4dy_D@>nvwYf-SZ%qU?$lC!pW;Ar30tgu#!u zk~}R*X7yoDX2C5DHhXCk8mu#9Kd#Tjb@`j&>bZeXG{=-VV!9C*Y;U2*j4bJ{nL}~n z5Ih3U#vkX{ag>=6edTCR zN^zR#72?nMvaePpVY>WWM46E=OT!j#`@TiwyRbVAL(S1u}nFZ#vcWpO^z^CO^+FsbeY)Gg&>~j z&++JbO()A-hQ}S_7+o?TACw}jrMCZLo72fF|&{i zh!kc0=Gh|}jc50YOY)-!J3^tgF4=VN0BP|V${vtA06VWGWB4~?+|EVZIr13Z`(TJR z-KA9C&?U;kzTNeXggcb|=7J$GD(1?J-!v8(|0 zr59kp++o)3w4D^DYqEFDs_7+=VIN++Om|4Up{Xyl(0_6sPCC~}FO?o8a#4$$ep^S; zVy6<0J(b^7G3gDRb9*ouWS5CkW#s6#nl(7}m_7KP;L^GI+h~09RO~fo7^5+vRHj)N ze|yyh->%G|a&kOa?)MQ+=mq1$=Rq*J*qCwi5o1qFo1tHhEURg!0Zq>rDzuF>EkoVnvATaH z%M%U=i?JCVD(tOh5oi*c%{C0*1nsjH%g}z zK$>ngku48{^qoWL_G$%|*w~^;q8)$lz&j+ugP=^F0sCUE3{DeKU|TH|7&91#{)+cO zeuE-Yd;Ac+meWf&YHC5)@~Kcgrxrf>S>Ri54|w-xG-K161+!kNP?>K-VYpBoiLvja zJtbizqaYkSpE>|eT}qY(W)ZdLiE!Qb2xbi0!x8a}L?&MvCVlon=ch5KK>;}By|nl0 z2;8r^0tYqMg40$@tPc1@CIyI~S(+@y-1rPTKX;Jo$%%B{vU6m@jeeR@*F|pls4_De z;_!iGJLrn)u!h^MIs5a+QQe7R{I2da64a>%UBwa@zpED(#6{w1_l0QxPK5pVSc*}y zorJ?LSKx`6b<}C?5ZGAt27HdI;swJ~G&4{hoNt`OQB7SK|J!^1w5Ll@0rO@>AO-tixK_s~JDtxuhhZ3n+nwF@*o*dA@k2)7k&o8P)gHm(sJ}$*v zuYZl#+*Q~l=Y4dX#3eE!`Xr1t_yB3^{V{oZ2ALtUkJ=oD_LTdC?7q zOB->)l?1Baum?WQ7{-jMGiBd6F2^xhpI}cz8C00w!KK?f`Cpa!)bv_4x&^CZ@k=w# zrBn+DkXyvMJeMRF)~v=%$wRO(b~IjDwH>eJd?D}G6tgB@xt#NSAvRH*hhy&@<6DJm z!uCNzR6qAJ<}+68U9lAG7wx7?`VK%|*$cckJd5sIe4n#SHjADP+(D|6hBA^jOTc4p zJ-BF#!-D&#;8l_Ya0+i=URxs4h?QjEvg=R~A&v8jJu#|L6TJsHqTj)0T4oS{UnG;@ zc#jnGB;qcanc_*js+Xek%xK&jmx*pGhJaU(FkAcl5iVPrjCI*@>?EaB{1ko=g%XEg zy3IMbuJV*bjC+L=Ne<{dI|v*Ge4)=aNHWVuY^UcQst~K&o2doOrBSJ~m<`9YNbNii z9PhpyQm3q@GKY7gvhrEt>e-ElSIkG7g*Qohi$1IO@hx8|+#f&8G=TWBLcFwi1wOW@ zpm!aMIT{ttU?Vb;z1w5Yu9))xdUh!?(r#PHy3SU}4RRok`=61H8hPe`wLQ#g3;~6( zIIvz6jo0+&qH@AMzJ|pB;4iaa8;2%hq}dj@FFg&6Hx#*Ti0{VTP zNZoMbN0pfjhq9{k7P)gfbj#E3i>nKB>zl_4yEA5D4S_U&=pxDlZ71NMo z#(PV#vVpBIa$zYaJy%#AsaTz!`43{FLlvga6>bC>L?XoV|nEy&q$0yXbu!Y)O1=9$4F z3^r}XTQ);kixEPoI>rD)dvC*ULnq}xiFJ_oX&dbIo`JD&6t9Q9AYm<5nEt4S zW6+GKn|h2>p?HEW`6>$ahvMmp?m$$pdjh+U4Tm@vb^7huNk|QRM7KT_q3Ib)#B;|w z;N|sEi@OSJe1|-9QCl9Jwbz3}9gpfM0CqTSqD$W@(w1#%;3YmC<_T5ec)lL$`5VHN zZ({InMm)`#I)W`IxQTpT76wX5F^(w*>D=@6WY(x)rdDknxYx;}qR2{GnU+XCM49tN zUaf+qGxosnbJIDg7Kh+%kPVsIG7qa$)R-}=l%X~~AHK}ogHNU&05MBnP_L6>o5%K2 zsq!nJXQl#IKWW0w})f(@EsrXL##U3gXR^WUEFY_Q5oHr{2@yW3zCSb0rxxU6N^Pm?>KuY_vWs7AvD4m50H@nwuaetj4G$?B-Vq&!O{=Uc>XD;cV%& z)3mz$3%p5JK#5)k=J7`3&Cad#KxZ=Xg_HE@qfMAeotSxPb0Mv>8lOzDzyn?%VEmwA zOw3k6k5l!}3L$a$Xhr`f4d|y5L2FoyO2tzMeQ{r9NGpK8y@sB*cD9 zx<o$gAb3wP74RzNKb;r&O!8s6NAe_Ux2Z81>v{$Q)(Rr@z=Ja zl+sP&6By0u@R-au6hFw{c4R%P`E{PPIOOJhh) z-%NIoj2`_3jph#35IvnLlyR0y|BX)%o&S}8LS=Zo1%sjkq zSAZkO6cUG-T8!$JktjdMl1SX%-ej??98`R5uwFF?g(nU~8#G{&UX1~%3eQ82vgKC;5{0!{MSK?cP2HdPN8K3!< z(JjYb5^c9d)bv~&gvs6qtFP1Hv(qY$(Tt0nycRLIIX@8}eQTp{YOli8zEco-+mY3<*b|nm1 z^JFPp`q0l=wxA4>_b#Jzq(`7o{~;W3ZX{=K#sKsQmIJY0VN6!vbi0Nsl@ z^lri?qB}hjoUU%d$3uqm9ZO`PHsvg^K1Miw^+^-#1U*OCn=gb*X1jCLb`>)klND;H}dRzcK?=hW0kp3V+6;!CshAmn8Z zgvV{Asa!xof5b_%j#G!ui`Jsi198GiK6zWAMMrl|hIFk?n&Dz*-MPnt?P0`F&Z(+# z$fOh~)pw)iXO@H6=s_?;Z3Jcwx5b-pwV}H7BvHNOOGv{R!5Q%)q;DApS8pwb6Dil} z*iG$p0i5A<%v_Eyiw*eMN9ADAwoEh|X9qU<)#O;>NkQLwM9TAPt#)I(^`JLn@Cvhs ze!3{j4qeQLi@6(`#O3EfARxZ3S`UF5Q}Jc2ePad6!P%5Pi9Df+k>lcr7N_--8ym95$+732nuMg-#~1~(dKC6Poj0k{)u@X@I@ z!by4#QPUl;^IiwZ4A_8^-Z$b>{mD2mZyCb5)12@XiKK&npBDCM<1HOy&ZE%%wE4nt z9NL~igLgmUtPlfh~+aRZ9%I!v**shbxE5z#~g1^|9A8GLV*&X_BC{ktBKPSWH~l0r@qB^vGN*az*MfNh%TN=-95uoi3Y6ujLck zmEB7fH+R!-VpoY}t|n|4=)~qEsMWW0dIz~*O3tKAw)N=yF=KR7(Aw6MPEAZp)Qvvq2hfG z@Xw#1CvKKPf&F#TlAVMKS5i@rP2qB={z`QqQR-|PDU}yBoedu1~r)9V!b0M7z(C_(rf%SvaPd>dRr!wn$_}X znHvWZ)g`zg@hkt03#Hqdwt}BoHb}qY(HT|IV6{aLYjnmFKPyqDU2`=QKGk7wwXY+$ zI_8sz%1iv<;ia^+u$0_8=m)XTP7BA)!?d0DeC5_dAZJ!hgpX|`JHyV>m$T!cKKCt| z_+}v9QS8Qo#xUR*3X#tKJV69{W5Qq|j?lg;sT$50G4ZGp+Rc;GgD7&uoBXCilrFd@;IxH$3-4ZLQ;CLQjD)s3mZ z@qLYtI<;7(hHSjqI0Y7DiZRW5PGOnRK3uuB2sg$iftc()&ZmK*s92)F43b&{FJFyj zst0PL=T&(|$ioUARt#p=?dzxGUM7R9$qh7#JP6S``t(^+0*~-!xOWUAyRa2=n(MKD-D&>A1(%@Nk4L*-j)F#q=U`ovN;0^o$?K9^ zXuifA9YRMka(0M*`SNskb1$7Re=S};qC}c%yaj!u4^eE(gtm`Vd#2H!prb%#xs%~_?qtgWQWnM zl{86+0~w2|iRPlSct7$49cL?qVslj4$7M!XxZxyxX#1_{T3ZK*J*6~!&@H}5=vlaM z2r;lo1S;!VXzA>oFeg8TYC9O?Fs;GZq|*&ojy6J4v>B_Wu$_EM8pG=E--y_>(|AJh*ig!W7k z-0fZ4knn6CU5QrQ8Tzi=gVoeiAw^%Tuv^m<*CkGaeck)O&Rv3?IXH~uSmuE1 zf;jda?=B&# zqT5xlhw($-FQRPyUU9rLbO#Q#{Y8AoJKbAh%sff6fBzuzmw2=b9l+dRNo?w@Soq+j>mDrh!9W+=LHH zl~HQ!DX2-!f{LtlxM=-!cKCf|$N(wK>EA;Rt&!x1`Khuq1E1jmc{kWHTZ!2`=L?#3 zxf7iPGYszjhAnMpu=S}JCdr9FYvlqsyL>Vli<*J5%PT6xi-x0Z)#wseit%g9$j7<& z*;Vs5QsK{%SmhgUrJ^ei#}8kDx_S1@{OiTkal{lbVl0T!;fwf8Vbb?r1Oq;%6W*nL zxNdhI=I)Px=0kDdJa`U&^Tkg5+Blbg_iZpKd%YD#d{efLJ~EuKclTpG>n!QRzCt=B z(-V};8u0eGxlLIMav@@2E|siQg_p&%pp?g1a{d;d{y8Ij(QiY$1VPbJ2}r_lHmtz=P5xdcPW}C41(B0 zmh8Dx>&YdnMkrinG;o8A3w4E3Ds$SJNZN5ikP8XL#^;EQS=Jit1lAl*)ey%KPbY(pFxwS%U+ z>LC?M#rL-L0`4Wk%9eHGbKP3>a#du%dTI#xS~g_d&Lqsp3vf^C9*uLEgx~q<%1JA6 z*|}F}5Zr-^6+@7Q^wSAa7wPI7gNPg+en%Xb28- z@6VE`Ax(Iryaan)GVt4?)5wHqvYP4dh?#pU-K%#H8_pUr%GTYSMUq?aAb_aBIS#DNl_$rY%z3nCBtp5G}tXPZ!}(b62eSWan0ZbuKKN`~{nwGFK2IMdl_X*70n0hw5|9*o=z;g*aAIBV~Msohra?Z5@A z?5<|W-JbHn6iJIIxsgK%%uKwRzP2QPz0fbsnxl2WCCfm6JpF0eqb zpQg+-eWGIe<;vG4!761dF3g@V!JEKlHFAeiK^<Tmr#5Rs}jge-~X;jbyg~!1m zOtJY!U>cQb(PC<3=D@B%e;m)hft5Wg>HOW3fvfTy4>VQaNl$TBmv@^IThZ@!U;Q()|wyuoAw8;H;wumHDqZd1swC?oJBG3A;@ssMXIoArUB>7301rb zA&^^JfjW7uD-Y85+e1!*1 zpOdd@-C#Z>7>dP2*(XyZ7-#Ez>^A>QCgpeG>Bny&iWWh}PQtkOxPg{cALJ_zW9O6r zMp`)1;@Ekt$U8B{ZqH4~d2$-W)%$Q=S{+@!W*bN)6hK<17}F3~hG+Iw(`2^^7};ip zHRuYg%0gy6_W*bYCDN5UEje{uIfxzofl#wPd^kxIYMMmZ$k1S-&3A%T(@Ka@h8{au zzW{}2weXGh%w`%p-%^_tXVNR4fHUu&MMnoy=E#(3&~xkpwz*#?b1ge*O!Fx6)Nvz* zxo1G{vM=Oo>@fViYcA}P=z-Ryi}CE|Iry?i1?zR6!J^$?h-=zc;;ecQtY#O&m!1cl zh$0=*P~ZvY=C8xY7OJdokRCRGJUgjZn#pJoX7%`uNY0LA8n^h;wsGk&ccBv6Mqk8p z0akG46AxnU72vSjN3m9a7>&u>gUYuiV}PsxuR@emJ~P&ohPSW?!mFLI!wyt z{bbRBjo8(*fg!r}sP#$?^49LbhuV+H_+mb_e=AAfc}U=ljmzL;?@>-hMLRAWIucpO zD_G&yh0Z?BplNae<}y04&^U#ZW$vW&*`1im&8DR`iMS}DhI31GEasJ&^Cw@{V&&e) z(5ObjT9lYUTseoC<$jD7zH7mgQ~Y3p)>-RQa?f#Aq$neQ^9j8p?SKv|r}8}nl}SD5 z8(m$n!TLcrgJolnfQIxrBEBFEzJ92u?aHs-g{Er0WKSN<8nu-l?#o5-rfzaw-2|)iG&wrbJjl7+&1O#XfjPS_08{ZEpDx`< zUn)t$C9^k>q_=|3ir52&uTB!T;4Vmyh=q#{hPd_I4)QWZ9p5ka#qAT_uzI%Od}x(R z#@9c^GL@fl z$`A_cQej*~1szc04CUtao^97-Z?K1u<0J>3t3+U9 zwysfO%N2$FpVBq2?C6vFVXWR~ z#NNBhs8_@c%y$n0-ObtPJYJ0n6mK9yv_`YmU(|`5@JTXj$u`vcd>%q%E6JsiVoSHz02RpmF3N_)pioh8MHzk5#DSBe7e;g9j1B_wm2I7G#L;a`!` zff}2`m?bnHnqF5xlKld9Uzw5d-)$J0&sp&^Ua_mI~IsKvqvvVb;_(2)Ix@q|S?m ziv_#rtMNtjg3lEUh>=6lr{_3Zr4C|j^d`9XU@b&-ro+>=jW|U70G`dYfTtJEqm6eO zoVv6ERJT0>uIO6S863_GXdMjS|07$UzZY{YDQvPcLH9mE-!qQJ6!8jh5`@KEpwtAt8t`ZY7 zzz+{D)L_O|q|pMoIvleq8KjQo(A7ojIgiZVld6epFiudDhYf0hWg;h0@q`O}l6J;| z1cJv4%fSC2&pJ?|gfG9bl1>=A6&$RC>DDPt-~U%9XV*zK%O9)3Z#h?Bj4MaLMGG;v zEfw6%k~mYBXIYod%%Spui;2FgKUH|G0b%7s1^jv($d+!U4Wq_DE9Z>hypRBi4<2w* z(sx6?`viEttqxCDT}STx)2O*-3AEWfwf-V=8})SfWUqQVAzT&ae2Nj$^fWM&rQpJk zq&wbkgT}4#m>_Z&_Pt@yY|CDh3$3T30Zlmd$N)&b-h=nXoTE;&`Y^(+6snU)u+Ii) zGTYl

Ci&et7bUq>Whw1N2Xm-JepRsG<;BH=YLe`5PJ}e$jeK`%qe+_?j9#^dy=} zlc6>{h-}H0lI8Jugi8A*q z%ZOLr77)o8!j_IXg$B=VqhdxbYG=4GjT*ju*|Ld{@=}!Tk)Dp0$6X-Aek5}uP859= z#n`=z&p?09MGW~Zfxcgn$M0PF66AMnqr>h;LPU}@Ug7uAx%MjHF>O87s3H)cwVIBf zcn{WjS`kk*X^c12#H6je`FB?aLB|qBrmlEAwO8DU1@HC~URVmd!>XQ!yjMlt{M8)I ztYW%K)I78TZb)&Ua8M5}Q59f+w8i+=Z;&nE%Oy?4 zus4KHZEA%b_JW3mlt@YP1q9c!3PdNmwp0|#s2d0UcOx?9A$hP8AR2Vo9r+O(7n5IX?uZ_Tmw)^thQEjg3ze@VaUk*{~L>dWN%?Hn&^9zW0iTJ6?b}A&05Spqh-FFE6^5jTj6YzK;n1(NM-Eyjd@ zrEx-+aU0(OFJGENt%uG+mEh}ebo&Exxn(`m6+a%@DooM4U_RPgSHN_!(Qw3ZFthjW zH+(n$CP$MU0<>9+^}M|SRXWDPw9y0cEu%-vu8A@4`=)@e62kF^hbbf~pw@!D$n7ry z??4KdBkHM4=LQ^+o(M-eqtUBanys8wOp9FB!4%oS?Bxt)wq0WjEs@eD;ainCqd!$) z=CgJT(AFeD*Y4n$MZ3}3cMMHYlw=nu9mlNvRP`lio^Bot^p0JU4 zX{Z5=-kSjK=fv3LNps*$LmfztOTy066X4UX(danZ>xMr zzO)(miszDxy;)%U`5i}pZ328%Gp2o)+=!PDm--Fcg2e}ykO`boY`*DnacTi&IIjqDq>PvgmKI=PBFQd4R111LN+GT%2dczJG4Xx`ZP#~0Kyv~b z|7OMLitnJW$M|6ExbyUIgf@69=-~d_5tzRJIbGT;M;ez)@jI`kLjP_*=IeYp*wwk1 z?slJtb(}HGHq9&OVDgpZEZ>Iu3hK17O@UdsAR9ykH8XunRa2T?5JXJ54R%aF%FH|p z>%6Z}izhd6{G0jY{igM(TQ0(e&De{*MH%cp(dV>4o3(oSRg=v>(@w3Qh_g2R^Pyw$ zQ)K7KQtj6LurcQ{y(W_k@wYYU)8DjUyon?n4H!i$#JZ4l41|S2_RJ)OLG)SG378(T z88$4xf>-x0g@9{;ATm=17GIkSNv(x!mQVuddp*6+}<+OGjJLVOtol3yfOXuTyiyo-|x)nPx zljysDq(P&<&?VZ%WOYRk%rTP1%}gQj`K$#2bGk`{-!{De$`TeO-=u~+ro*S10{Yea z4C)U`u@VE5sZ*U3ER_&ur<-cB3MTc0`|u@n1aaxeCth&M=^-xIy@Zabtb@n%H{vJ7 z!*sw{FNj!NPxJ$}!8!GM6mI&2RT~;mI#!fORPUmj>(-&&=Q0%1Er*=YcSJ?miCOAv zLPnKTV)~r{Fm`DzKeAGlv^kqY{Wfj%xVRC5<~QN1xp(OSo->^2yhgf$uRw3#An16O zNSzC<>D2VMcr1Aq>u`G$WDm&!&!{+fp7{-ZEj2;;d<4Jl#6i5SR0>;H`@)LiPf#1K zgI)0VGSsi)MwpJGwYTTxwq6r`yD8%$#D1j=&F!sEB6*_55!rLz; zq36ggzR&K7@W#-cRy<9j#W}7-H9i}sH7U@+1I~fd1#_t6Rnv9BLJl+i^PD7w@^(TJi#Fc!t@oT{_bU%eXR|)q@0=~#d zMtidiY`5o=0Ed(m~!>KK&YboA!ki&638$u>>qP_VQP&?8Eo%+i{WIQB;2W4NrT@vP$Bk z$-?X~45I3|HE9%%-8Ptch?nt6*&rt8z6|?O!2KMSiGzek8xE{Z!QjGV(0gk_th}$2 zclWfJtT+ygeYyj=UM7r8%qr5oJ_7jS-C%QnGR93x!~>6JgNVvL`f%)1^6p>_W+n-- zCoL11K99j@VKt90bN@Nnbms~6-l2u-PVa>$Be#)&XbV(uDn*yktvKzv zA*P1tfPP>RS@ltwJy>!OGS(vJM#BueTIU80pCs7qr;e!Jl>?WaXHfCyS#a~rDCVVU zD;dO8!4da-Qgcb4;U&vLSM*Hwu>UK*fnPfsSV*&0Z5oXSMz4qU22HpzXC(9f>^Js#aK_wF5+AKEc`e{d|oq8>s5ghQODe;9C(3c8AQ^ ztilOUJ<^VIaAOlWcjFL-nFn%?&7I6d3cf37?FPTu5mY{T7Oa|Z7&cG80Xv&aQO#bH zwazbu{QR@%JY5gN_uYYW_jb@#+YUfUg(;Y)${&)x+@_8ReGqsi1x+VSz!`)3K|ORN<51QL znpK}^;fcMN)?>vQ#|7g$-+DZteuEqzw*)nvSJ6>=S`axo568qZtggxqdZkzt^Dat zE&V#QY!9Mqjv2v|8YK+Q;Sg&{3WGB%;62q~&Fix`w$-W_D?b&@WNJyXoEP~J+zb-} zUeJ&B#%zV~QH(v&0s)$j=&izLeuAnzC}c(9%KBb-ylV!3O;Qwnbo(LyQB@Uu^eBW+ z_Y?6@?EpqQbPGyle}vf&_CX5w1OL5g5c)iGL@z;K{Wvwc$<^{Y1d|1LMKhF9uK7y* zn~uZ4sX6r4uqY_HkxENll<|4G4F;D_!2r+mn0WLfhVq8fea0$i)^Q2Wcp9U9ofw(X1tyo)0V0HV;HmK|asI4V7@TMeFFEzF=&>|S)L z+sc_fT7y}CSsYLGjbWS{mZFe(GkN%0jfUuoux-cV(7dV?D(?lsWBF{^!tds4|l9;SAe(${#@HlQ2 zOgdIYW41n`hHnZu(W@TP*&&H!ZJH&_;M}FP;7yh`PD9?M&oIzt6hmH1(A42u$;=Op z{3Z`QroPq+944N_w4zFsk=TNx*JWa4vl%V(7!HO_SMj>~ToUB637kC~@qtesx)s+5 zY@5n_+upEY95;OWkc6$f%HQ-PS>+QruC1fqA#)V!?WG=INnmflb2L=`tS6A4{Uy zC1Wh}h{h#fwcvSE91hca%AcUI4t$&*VNSjsV|huLWZY2!c1RfA-QPkYj)gXDejN_y zOI^|MMvq#Sz%>VLJg$?(_ueGnrm6~Tz*JSp zPF5j}*Z1P1v^>}~FBS3QIl=62BZsjf!c-~{`r1?aLR4v-`^yIBR*Y)@b&BA z|Frj>QCUM#zb8_ph;-?q(m@a@fH=pTy>H!H@4ypT=GkpWjRZ9_&F!zKAvI_+5Yfy%RDk&!ehk z3$ctcXJ#$tx!8$2vG(T?T6n|@A9>Ct1^ME9iRZA+OuS8P-5nvp^$^smWME|737YQn z5wA4r!h;Krw0N~2m3BNtr;M`THW!Pqdv?p?vylwhQd0_NOSP!k@Ii6-<2i_QJWi8d z7QlYdUI;t#wZ8PKG`MHy(C)d*Y8`r~0aZJy#NfQLbI`Rb3Sw+DV2+&-Yj#2qXWX01 z7ls}pll$L7!JF++KH)S}X5B%-WyeTHv@Co!mgl#}7Sce6^R)KT6{>1cilt>|&~n^* zaJ9-u2OEwq-#r^<=AVS6qHX{pH!$PPV`j8!0nnZnlK)wg)%@d1rgzW69Jr0;z1nC~ zIS>A1kHMXrIfy#j&HsFqgG`7RTYg3wcJF&i24dwfg*k?EEiaSn@AJ{~=vnf6VmZE& ziihFj6o%st*tB9Oa}viF3Iw{IhQl_CLQcA&|2*arD9AaO_vH>rHzRCP+)& zd%@s!8qK*;M5W{ch*a@Xk|MYjPe1w1wLfhy}KMU$&k5q{}}FnJsIN8 z>_nmLd60No2X9Lo(gznKxy*!{WWkhaFuZ+K7R}}S1eNfa=Mg-0qMB1P@u8}zy70L+ z1s(>5;yjO^_2;+skkel^Y5ko~)Y<0@rSF#0*wJE)j(0cLs~iCrdQJ}; zyU)aAUJ-V>bvG*II zB5Vm&qBcmww+#D7H^|Kl$7{}2DEm_y#e_^iFX0l6jD14CMKn{tabw|O(OD9I*&n$@ zqw%0|FQr#1sN$C_V(ce~W5zn+gw}iXm71vSpqT`mo4tm(Ygy0^&3dZdzY+QyjbK9S z1_&{IiZRLqkh&m`yX>I@uf<*9Qru*m@L_Vjv#$v24yE)^M>pKBN~WvSO^MIXlce&E z5_+HO9Ih8y;E~Lr^$bz)JedZX<*9J`sW|@Xs-tP!593U+T`=wLa9Z*!$9jhpMlC*& zUU|5Ry;yP*Yoe9#c3nC%sA>xr8>&FY?lldK5{1x=6P($uG~5{7MxPWDx?Hc|#W1Z#kbVm*;>OMXY?Ho83@;qpMOHu5q+wb&oC(e*@gCC|n<6TsB;*^U>zyK02}}ldLW|2rX9Ld-QF`H{9u)i)|l^$#3DW_&w1Y%U(sAyx*Z-%_$-P=z)-iSvE!3sJzz5ohEdqZRxT6bVy+nMS2hG5rpl-%tXIHLh@D zZW6eqDZ&%4CG6{~g=kssNTWb`1hLO&mUW%`@~B~Pw|Jr;3j%2 z%Ze8;%>nx*dTf5g2>w934!fj095z^xW&N+nKX+MfOIf@XfvKD@fIgZ8?l=*8_ZJ;qH zhE~@t*u8jf){Dwu~Mvo=p8-HovuX@uCWsFAOW|=tG#Rq!g&N^K zP#h)=%SOv#Se_SLSaJ}TdnTjQvt0BZ(q+yh&gR^ls)C0Sc_E}B|>`o~J zow}#s{rNi?dnp@^U)hcV8OPA2zZ}2yyJNbFF*KNU;=JVxVAfCpwhXl5R-I@ruTP!S zSp|}Jza?3hXR$cznK@qeZJ?R)8e~_=Sv-E(43^nYY)U#%uN#|6ejj;`R&$0(SV=C* zDkqS6A#&7QHX5#rtMU~M;&@1NEspxC!@JafB$q#Sp<#_GeiCcIxyJ$)|- zn+?M2iDQtF=IOWb&*@b8Au40$1M*pE7@xibOD4Po-*=BNr{o@PyEdO>rOMzE4*`D4 zrN>k<;3|`Pi(tT=b9l+;0-d5XmKUy_00SKkBvPq^oERI6n>JoTft6ysjJy&|i&BPT zx8qRemnwT^p)l`ydJ%ho1z7!dI5u2I;fZ>GC^)qhbKYFDk*z%m)|@v~l;u)>VKI%_ zl}{#KcnPbT5FE zj28R^BH2OY^YjaJwelf+plJXax-BHP>;#%sJcJHWU6MWHzd zxbI3ZHSa%2_2#y~gZ-swp(|ADHQt@yEdBHvbAgU?S0 zqwf2CN+Ldx*Cy9<@6w?PM|`qf zj!$eGg=4)Q@=k@>oVrdpY_hD!TaTWQX}8m{M(ZckUX@_SM5*D~hxtqfdzxHnlwvPc zYSzC=&B9Ir9XhhU1DB0Py4kgqx@*=#xyU+_tazU~IhN9LT`{(f4#)9E4Ol}8_yaF8 zIPIbfwB2z#6WwzQw=VHUflPN&9OlfO{_cs=9eKDRcOo3#r_MSBDqwZbbuivA5BpR6 zU@X~B9IP+k*!8a=Wv@5rrtgN1C3-N<)P|g$lRNAaCV}(}e>nEV2`mNUU>~_bIFUWj zcASUxpA`7W!Kp;vC<7&2u7lFRR@7LP2E=Fq=GS+E>W|)G?Ii~WH zzEUZfG>CXJ39jdwux{^j;n4fN@XgW(XB!K#e;^P%zNDk(uP?-0Z4gGRD1b2WT0A}= zhfS|7!LK|E)wguu)AwO`ep3cs{{4{%3#;+H))z^8LOn!w>XGpV&*1V)2NXJU7t`Mt z59=3c&xS!_8vINPY<2mf%h< z?f*hzj)}6n^_sCl?k!0lC5lO8Hpn{-(*3^85VpY!cD^=-CcRSn%+?e}o!>%Vbj9M2 z8b$mAqgY3^tKhwKJB}Sat!8V^EZCtNT^~Aj47^D21%)Nr>_*!|^u4_hTVl5qH(oyl z3mtdD6>~>eJ2xIh?LVSGMHVy*Z{c3{$qwt|uY}Jtrp@cl@y}k}!5u|T_&(2uk>2(T zJR|M!PIfyrY4C-D^=VMK<;t)Rng=DXjM(k<&oNKAoa7sN;+Ji0;OyH?4}L13_51XA zndqhH7`C34PxB;c&M7$Y%UxQtXDp82vYdW--%e%(mk{&DSL{;lL7LZX4v}|nlJL0c z)Ge3iKLM##XjdzAGx!oA{dJFk3U5jNQ(g2!=A>#BQ`s#c;8uWF7 z^wPuhhn^=SE`3eM=Y2x^4Z>{8>Z8_w&ud&Yut76c$HSH+oCjBvG=TtUg)Y)uS3rDeZ#7m}m@lmK2+0IS>mdi-V zpClp*Eu{2P36ULfgjDn+w!2&=pF0Kc>xRYqRUUQQc^jdv^*{%A9k%D_Cqt+wY(NS63hpqt&SM?_Vch# zJx;uWQemUkElhss3PSHUlCRM>ux_xD{MaYPF3%~2+fwa#Ms_m4qG3GARbP+Yl3)Lh z1JGkT@V4P5QnsR=6G>8F`?wGk40gx5l^y)Pm+5e^TLnbz525VwM#vf?G#rP*Sn0Kx z#w(t$xAEyDiER&1>HHcT{~-|sdUlbwr9hOk7oho(l^D7!mtp4*`{@lrkhDu5&ILDc z-eU9NqofU(gcib(O&%}(z72~exuEtaWk`M~3Hv;F`gbn(4 z78kp6SZXN?BMomsU9lyfKafN5%L#1tTmX08?nZKF0KVv}qsEjZ^7r`%9XScm`YIm1 z_FbXNt6tNhL4X}a6WFCU@6o6{6Fe|31RWo3q#N)3uf? zXdQeC#E{)}?&)8a>Dl%F%b-tnBM+fTvny!~~aEppg;;xhF=T0-PZXTY326~o$J7Hm~G zY%wdvSzgY=^-CRENA5(GOoAZ^55e9~2R_TUG0_>*uxn~O)f#eYwPtO%eP>$0ZJ7?c)h#cBeir zcwWZcomYkx(&Z#LLIK;ZmVsYS1PZlp#hOJgxs2=@D!DflprV!d^~gZ+Txsgw7sOh3 zioiKOk{AW6Kv=sg#7FAF^4;&4%fu2!G^`WsI1k)LA$7UtNOi6^g5dp$@T;^1 zdOnWhFE*4wrPqDBSJi7+*zbmzw4HcnZ~`A3Q3_GMTIduQg;q;;{W-4y94S?AAm7wwmAa>$n5WOXXL!NPT?D(U2qD2E5 zPA@{`#wLi=*MZTx=Sbht7#J-qNi-@WpgRunsP!~*!r~hh%B`TrY02=TJ%tXWr9#!u z{kFGfJtS#rcR^$SIr1)ZJbAk6A{@S=!LloQaR0kcs9Gn@j=mm;!LhfYpx6-#q#{6u zCSldMFqo#_L@zIU3B}!JY@FXVypeqh&w420t!Y#7kgOJy`!)+6=FjD41%KyK`mS;X zYe%p%n%>hr@r@Wc?KG-hPDC&H{q$n$Ph4}hjO6s(fzt=KK#q;txR4ni21r3m4OUNB1v3>0E3se;dn>&QrC;Y_j9vn{)pUgh z*-4`1=SGAMEkf!S~rO;8^KL`0D?j$`r-6yF4|IDutDz-PkPq2el10z%uhh>P&X>mA}N;vAi9u57~p|C2HVs z<~-?@>Y_CsX;>ab*e4AtaQ46$nDY8OO*I{oC6QfkG253+`Bttzx} zl{(*gQH*_1^O3|?q+{fhF??y-A=vtT6D0kdK-?6IP)fTQ-O4|ZqdppJvh^1%NZpS* zryAgbdLG_f7B;LOcS2$8Y+hh~0Y3DY2;p;v=w!hS@YPk8)jqX?33=ax)jUj7!YIHd!i*JHA;C0*%!;;wq}Lai(AdYQAd}7Mo;eTi*6aW)J_Jfe z-bDxxhBb<|*fOZhZ|FISUVluWw&x?Uo?egN)%=*Aq_bF&wvT_XY&0*@{F-uO&OvkM zVN_Ayj>NEmB;=3avqn9EuwZQp#c8lxYZNS-okP{}1zD5jg@dc!;i%s#=&CD*gK--0 zb?r)=YPcEhEleTH#q%Ml%?7;Yad`9Y8M=^Y>9WqFWA^Ti!#M(NUZg zSObOw>#+LdHViZRfR&#j;Nxo!FO~$e{KPX5b0U_;w@ig+8vAfVZ4G$E_;Hp^jj*!d z$Z*U^(!%dO5Fk?ny+fgJ=~g)>{7i%`U!u?XmkAPK>n3!|O~f|KuQcj)3w~Pg6<#Q8 z#SG7K^eu8{XD;o4Cf{(FFD(m?))>O)PdOmePI1zU(?sW$7rXiEdOrKy2I^?uNp&@1 z!T*vxw5U#@S6z}Jz^RtR3D3ZyyJD z?Y;&xRy*TxS;1rinj_gCGo0v$sXbsAei!7DuMgM3-4Hrokat;p4*Y+yu>b4>GI)3` zuXe8h+RAE3#jK~yuqHywp5vrx!vgZ;K_j$>lwsr2TX^E=2-v*6fb1#@$B>W^Ji17e z&zzh_?zgSS>b(=t{YEP+l?-MkJ@bacx66oYSPcICtoy3qkvbW4XXZC5COC$&nLSv(c%OlR{m4)!qp zjgoM(e2dl?=; zE$38te3&O9p;w0WgCe^n#uHDR+<;}KHn959RI=?+*58_lCO=ldLzgo7Oc3Emu)$#H5B8kLLQx86d)!Ixm% zx0MFHU0Q$pyepWVUd*??NX7JmO8RV%11|Fq$MJy~;JMI-t-hMdc?@lb7jE})pU@yE zR5w#0ze4zX>`qjUhURrM*)P=&eB$XlaGb%J$fBn^j zu^QB59)WL`#_Yr8li4P1EojWMAhHot`I8p1{H*c&;ogUx%(#04^_`hJFeO8vLBds! z%-^RBUU6|ysiVLL$ZWv0Wj7%Gy(izMw2$71DTQeVlzDeMFU~VF2SqMd;(Vv{VNI^i z&pkedM@|5TD(*q#fE5=d*);5Tu475`0>0|CIiz@}5gUC~@E57XUc(QtCVn6NYDRH> zz%usQd3$!chaOuxl+1|zE(5V~&UK$65kJ=0!@Lt6xL-^F2JIY>Oqa#IdMcPYISx;^ zcfy-jOVM)^g}w_b;gdiH^efL~SxYhMS18AJcbmhv+Z9l9Ru7ro)9Jw*YA{gMh%V_8 zxME5@29DL^bFNK)Q|vM+6w=WT`D2ggwG#WWt5HN%eh6>NBlB;VXHOn`6|4EhcuhidmgX0ro!T#P0-&og8idY3H{D@K&2p#8>>-FJEa6* z_OAe3^tKc?G%tjS$0mV?jv(u!dkXBIB;(iQm1N_pt+y;>$cS!7=M-cjWI+ckk00HqF za^+wrTw1DtqaF!km%<8cmx`kc1GFJo7}#cyShU=77!7_jVX|U7@!WO^{p<5##h6?O zn&`(JH;7^+AKKCVYeK=fftpKt;C4nCe|zmGjNBG-Da8_(>FQzBoTp&@ zd^+Z3bIk9GXXiZPey zc#}-1yeZFTH5{q8IsSvL7S;xZraW5txRAV0?j(2BSo(TrI!-*M#>PzB!@CT2lGyhb zVW9OVmh=mwNQgcwB9V;ha-%rIJF$3m`8T4UqRo5}J&AiV3vhaYHR@J=g}o~g6l;dH zS(`e4eQy`0c+Epez8{4fGO#Mt8wW(rW6zT3uwsrCIy~A0xu?4Dh<_EV%oS$^v#t4& zzv_s4-bQrFo{XxGOR?*&BTM8~>$Z2c!yxPCT=p6hVI-imbMq$h-XHeZJ)sWSY5!v`Q#CJn0N#xGH9#_9$!VxuZ;6rz3$TB}c@*_ihJE4ZA zrlq9fOfy+CrwtA&X5i8JGQ_9ehQ`ZQ5Z6}@?1Nka-!&B3ovTN)X^#WBTL#5AvLp^= z6pSbza0vP{ra*RF3_1N=h`0IqjhnGRiZ^>JinE-=Af`#2x4bi&bsCH(Y2FGbmHQDq zy?d#9$r*5;wF$2$9wuIAufR8pUo=W{I+cDXk4KxsQR&uZ+rFL%T%4Lf*IAqaU%l7( zFiISBo^NI2|LF5J$(KNuFQSRfFX398I{uhBz|9&`fb1{XZ1HtyBOKKC?Z)w50i^*IGkg%^Vc| z5{mxAbt2gCBr!@BK-Imvc;>@i&|G|z`6j4L&gYDTg4tK$1^W;V6hFtKdfzygNm+E1 zSvI|)?Fr%X@wh~<1I~2zK|R-HTQ8H0#>q~&e&GRVzwimAcZ7rHni4E9&g9g>%i%}h zDOem(OYdArgTj~J;q1*MnB{em6VSN@-<0M+(~lD8I{fu?PtiOV(Fc0+U6<2w~vwX3Md$wYGZXBQ6jX)%irtMi9k zMzT}a7D4BXR17|3hZ;H0IbW?b*kYo|u1wH`O45S~KF&n`m;u`cbM+Y(a4AL{CSlUEIaGR?;br99{9Y#F&EsxZt+}jSaFzHmL&j&5e95v zb0z*@o)2sC>m)Ay4p!=vqVCmhaPK`$pIa7y+UN#aAJc)cj^pt0oH2ai_d8^kP(QDC zF$o?V3S*}QpTMS@NASlx7py+j`**%9#_c&o>P$uWC0-}-WknhGRO`{0r(;MVe;;mr zIe?S*@5WsbDe(LJMI0K}Ky`oKK)g);U zd+7)&3lGBDBNouw)IjAFShCA(j1!d&`%6c>!WiSy`XFsLZNj@@t3!FmnA zMr9ghkb&RQcR|*VGygHK`P#um_cgJ0?Zu|G3=cVz#ZWM6xvWuf8U-8*}*dG&b*t%TP%Rj?=J_Dq%^!C?SSi( zn(1EU>Ey|>x9Ia-5(j_C)Ap`0{L_G7wBDRdeV(2~&2n#8|4oQJ_4^MV*=t0`q!}V; zegU1z3j8waC8W+70Y6vQF%KuJ+yb+1=mu6V{Eycljca zkOcT&C9Q_Im8KM_9Viu`+qQnX$Fm7Cd53Yho&85uyoZ= zrh46G7`-zUq&sS0)`>*;EOQdCJT<~@@h_a^-VfN=7KQqHUSQ{vOI0GRFz2l@OcKt= zt(v=_cgkUSuYHKi7?}z8MG`pEwn}JEn8-`GsPnFuWbpQFIXu5fihU7s6az;Mz@%N# zMEtt|2JJCLDd!#7_BaUNWUArTK6{*Fn+Avd9obL4-rRx3a#(O~Db&CF2_h3ZF@}4$m$P! z@9lWf>=x4}zYz{TsE5Dt(ssNZ?#*O~lZ!FmH(La)^Nzvnbq7hz`ePV-Xcz_cY{R5iXZ;ye>#RNqjUPadgk#V_Md zGk2;lLcx=Gd@k0F={BS3DuGedKDrfei3_q;lWbV!Qa^P6Z~)b+bm(arP5$Su6pZQ- zhUUs~a5vbUR#_^czn~VRj=O+z*Oq|l;*l)eY%it{>M!9`E}vegwx>}u@?oHjQ2n%7@ODBX zCQ7x@+UH-H3YQd|ZyrD=UK`D-y^{lTqfWByP7b$HehMqu@5qmMp@k*w=`hjvHnNc` zNtNeKvU>VHOc8B?=TsLiTh7Ey^P+K$`4CQ@{Q=#$BDAel!e3g8&~4^O-nZ}^c-}k@ zn7ki$%%6>}Cjx+vG-6NB+zub|g2=v=wNy>&B@P!5Cp3TyM!l;=9HgRY|mMuzUg z(m{%i+B{v{eE@e0t2Uyv z!6|6TQ(`Tho}srvCf;B8h{)ZN!bi!{Bzyf+5cJsuUcZy@^RYT|q2HIxJol31O^Lx^ zwJacd>+q7u8)$$t5I7_Y9hxlPrlOBt8wY55!vbtK^k(s^2A|n~mWwJqOChll4R2EJ z`$RQ9tgD6yjQ&Xz=LECnGN*BbbuQs|J_G(}J2lhE$2m$`{8ewl2X5yv(eDo_=@rGF zJZH57Qg1aAyw?gE>&D`X4Hflq^RAN>dGnw= z{VC4a{Q#Oh7K4&bCPvIOVZZE5Mgx^MxMglMi80E7wpl`a_huKWmnn!-nf+K@c#`JW zTjJWmd+0NG3+;-7A?q8N-?CXz-Rh ziAD1S`E%3067LNmsN6aVe=pwzr8&Fsg@hiy5G=xG^Ak{UA|8cCErmO}Cy8s$eCl-m z6r8RZ0LMiMxO8It@P2=%JDnF$4?#&Z*&)YX8IZ>Yrx>tv9Ucc!2iT`Njx{g{gWPmS zSX7=$ua%n`Zw!Yt*mn}tu}{-nmuKDN~g1Xlge<<)OuRDa2XWP8c^Q15BB*8;O96Kv=EcR z4lW0NKdM1L+wJ)0+)2p#-2i*TRdHiwAj(B5(+@_MNc0Rz{Io`jw=wI&+8{x&cb!Wf zlxkq0;7zjW))^T1v7P!&ct;k_uE*XCZQSqIL^bO4h{W4V)8n(`AWnUm%Ta4YEQx8p_bYYLR0@}*_#-2oZHn;yOOt8Mne9&zG zuUGo;<-gGl9{$1iOs)a=Q{r;q+-uXqVQf;(-z<9xlY6 zZP|}`BPTPz&-qaqCYupxQAe+Fj+pk|AHBUs)@!9&qHj~}u-ACBm zJN^$nd+|FewN!GOKE&bd1*4JslnFm11<+VQl3(VJ;J01`^CtpZ@cTAbv3NeKA6tUL zHMYye@2+|kq#DUe>T!8T}u(v*ngOB_% zYWfEHFy}OU+hDoE<*fity%*BjpaOB|wZh4Eat_- znkdFD;N_qOh<^r7 zT{sPQ9Pq}*b8m@M7UHu&2^fl%WbIpxab003lmu15O^=bhb8aI^xsi&kmtsM~a1S&| zOAY&)LTYRL6ly;AQZRfD%PY)yxfLhjov13w2wBSO=Us;}oljv)D$7f#ZAb5!`m{;z z8CTqrOn-JNag+M)p?u;YJQX+ss|2>f{NOXBCuJ3LTvdr3bduw~sb9t9%bgID@c~b$ z2axv40&Lx%4~2F$Oi=E);aqvH{`p-=w#+UMvo;lha=$Q7qaLBL{Q($TR!Kv@p1~}M zHY!+FMMR7aK;vL5Otw0P)lH%FO{NY^$kt$^nzv!D;1mq+bD~N|U(f(|Q|z&6$Dza! zywuCW(7oNf*+~J^*;~ksy|*1rU5~@}OhMi|Ndv!RjV0w@cELkYPkjBzo$VX#Nal^* zfg`1-@>^Wz*=Ae6L7TOA;l>SLc%ZFMpAOA}Z8Kb8wXGFCdhG-0N1XW>CmCL@NetD; zZa`}#XI6b6n6H!+XFdPd7ZW}SvnJd|Uoy>IlgqSp}&rsUn)z>ng{Cj zngp3bxmCn8a~u_&98T)ye`3bhT3frmvSs9N29kK!LC&W2BeQn)QLbR8E)5si&2%3y zqq~SFS>xzT)PH0$ow}K}D-O?LMkXHNo;MrPTGuA#+}1v(=kW^SVgjT!f+06p4>Im! zGc))0BBJ}D+qQjHIoDxkMUFflUS1!|#0*SfB)+fXoQm2RdWYk(KTRQ}xeJ*GIt*#I zI8)zMB*#_Wa3UIMF4WY-fTU^Nuao$>h#c?qp^?`X(SGHP4F9E*Qx~~er|30_h_Cp; zDETT85BCd9&96};^eE*v+@DbA;}B6l&T9iXek+G_Hr>yhcxFc+MV-DiZM6NC6=74g zPslcOjuD9sa^X%D<=4rTAE@(G(xDc+%&D`o0y8s0iufE~Oquj?&1Bh4Li$l2TaH0o|oUHr+hob{;jWPIW7IxkHhy2Q?d=&zX0 z+*eLvBu{m5ZSfrUa-JwB;@if>T7q?7D?_IF)Y?XIB6OZZAE$1)sjfXvlQvJNv|Zr2 zgZb|4NDciGxyPq??)!$y^ITj1AY;?e!oB`*qJCdl zC-Y>Rk9Fa|b(?TsRT{Q(cYTD*LMC+bQSO{=IPs0lVPsT9nU%#Oh@W6Dm*v#PL|?4t zT;>lk=kiq;ZHJXi%7Y7BQI-UgkY~h+Wrc8M&+Rzf<|l?32YbH6- zwUBGn;j-k&kv<_}xjvM0^YWl>(u8=l3lW)%a@-op6mGo1?wavgLVC-3SI`gLuO$#gIgpb?VM3x~_n|8GRa+DcW7O2`sH*)?&0+*JulxP;p(1z)g>HPV=WUtv0Dp_q!pO{Z4 zwl6g3+B@UO&lF+qyvq!#ImD8wjs3QF7LA}?rna>E(;KF)E0eLYIAHst+Ll{5PK`db zA5Y9jD^ka4SbG8ks5a|YKSQ|Dw%@~*{0P(|$IZNHHPdMuJW89a*#DZ@Z0ti_XN@ER zMnCIAL|0O?lM0-sPLb`{ou`?aU1z!XN3U@s&&(!)rH$OA(J9=j#l!s*El%Wq4(Gev zxl9yUz|}n*|REGdFLxb1+Tc$xBDJ& z$8%mX4MRUU8@2D;F)I~n`fwBxN*qsbT5lwET2}R2BOWmBlg(}KP3hs@7M0nGpOU0M z_s^snYNxnSp0Ai_wR-MtlLHgkcZA8)-^+!J&*2{Lm_bvWXHY+G9CvhUW9`6pQ>ww- zoB=u<700%G*X%`*41}%fZTe#mh~cK>iKu zKOj%ush?-|y&1<`8R1IDN8PFqO#>2SDb5MF%^^*#!~N9U!GzrV&Gd^M;xv>?n3|gr zII-}_R5p(Di zt0J!Yp`h(1npy8LTa3t$^&{!&Lgeb;7^bvdlDv66kCZjswN+3vB8ev`b8qcLI_vaA z&OxP}+qLgICqKiUE*^iu)+6XFbJ^B}nEl$t>|6(#q?(+MrupxaKh?v#O9OzrT@@ zcbiM9-sN-lMt)5Cpc(nTeF--~q>ejsRfAlw)+Uc8jHAV-+Qd&StiE!>Ri?jm8Y$D5 zO&POCwzq?KGC~Vv$n;V%sxllKzY0saC)3X`u}PUVOJb&z*OCofN1MGo<++%;};yV`~44s}pk$A)AlwXJSNW z(N9x8GlgOT+%YK=TD{to_@v2F{bLSvrT8RLdf&bNzr6oB$Z-=>q~Fg7{LjF;@Ie27 z=rI4a8%_SZZDaB1qUr%?Y}BGGQvMF`2Ue~@ZXGoQ_KHt{F_alVrKd; z{{Kf{_@7x{!Z6BzS>(T9|NnFRM}@BpUj6?@{WmlHKh*yzX8-E{{}Al#JltH|<}7lv zvyi-H_f z$OaU3M7GWZ^4Xe^YkYd{5qidGa!4kIi8m L9panW*f|&hSshgT delta 385 zcmdnyvCU(HF(Z$OxsiprskyPCrLoy$d&U-+fTg*CfyLzWjFm6}15*Vh#f-urx92{9Lxw4 zBU7N6ek=)40SgnL5{u2dSb7-i!Dc`WEo6+~XJBA3G%-Lj*`$yORn#2p#6o5?QLxtv zSA}`$^iwFoA5KmqL|7~w%Wo#~VVR8WNdv{or$+9;B3Jf9!L=;q1lBnb$FkOwT zVgLj&p`tjz0HXuLz2k&v(v# z&%M|BV}`e@x_(vFRn}7ss`AH?1A7;VDMPid7?R!(=g1se@{T+IIXMQxq8@6@`kW z)e5WAry1jh95nsxVcqMgGwtaZH%(>+t+xrZip={gt6@ zqa@+ssxZV$##DIi>q0^-;GyitTQZ8Hd3tk z+R!hPYiqglS1GMBr#9tdWRm!50lV*H##f8_zosjHLo!AbEsl>B)htol>RkPG#$T6^ zAX)7P{hMf%SP~hfh__Khsr`RC|2a6={rUXo=-}|<{P%ZIih8y{`eBjxThkGR0nKev z)cx*is6LbBYF0rFReaUHsE~!pq#=o_W})gy@;WMkAUzdFDp4p@JwO~2E)m7Z)w6;G zs=?|8QPTnALyO{<`~)uD@$#VO=R}35Tv(J% zNP#w zuadEMvU7Io?Bwd;>g4KV*I8b##;35@1i6-9Ro&hSxpp5PH6MMS@C><*Pq8Kh7n;I@BOaWp9*b)Sv9JcXhFo>qlw%98j6ju-XjuYBL(u zn$ft{j3x@XL7=>8ko&)EMyNy-FE{+25l#-BJIkA?j8In@VN_#8y1cnhopgDNS_6y~ za+5%L%ODfgyf5czn-JAuJVA9ttMxEM5)~q~lehW~grkd-y^FlH3c^$cVOkB*&__K( z-bUrF%Fnj7Fzpm_vp~7|U%^Dxz*v0G6IW+v2f3vR##04jRSTn;A#d-aSq)=d3)4X% z?-(fW6x8AyFx7$j;)~7qd~tSka+2GsKpLt*>}o*L<@VL#a;ODxRLGqI<<8#+sP)3- zdjPJk_6~B_F94mt0Jv2HxYq)BDCELGISFc4oe-*H!|x8lH9tMRikck07ca+ZB>Dx( zvl>aWI$m9B8KHM>RJjTC5ONAE)I0HT(dppz7`??~^PKtf~E9+go3sI`KhGzFi#h ze#!FwQ8g0n|2yR)ACN2`SUb=8UoAU%P_lebtq!e!q=I~KvV2JGsQ#Z5L>`E6Pt#3AK?Qhn&j@YK*m)&rq?=F1dxd|eMT|i%-DhQgng=MwB2C;bp)?>wSXtU0m zEJ(WMCcGlRjD!0jO?m}`M`T0Hqv!Bh>LoP&qNnStpJ3eCbWH2Bpv-jpL^M5dNqP1} zcUXFOF;i+Q;ZFMrtY4cbh?uehhdsWIiw2IyM-R*3#iCT8y>*qE8!23BejX?0+bH84 z5VV7*vIC8kSg`aSEcRwBA3L2~?z%(Se_>xz;JJu- z-ODDQrWLbMuj;W5hoXe7FVtsK~>sUFh1kF01RTCOqX5Ask{*#M&PBA{Tm@Fy>QC9^HE*yjppH%ziM08FX%?+|+gn zvq-!tl%DZtUUj!J^dCY(-h5EnSELbsz>Rddw1aHD&B#v^;>kw+?PUC2t{l-)jo5G4 z$11iIvC6!4!V!Jah~tVtw^>^L%;1`%Ql#CMeF|N{9+z}s?wUnp#HN* zOy<|yq8zE;oIM>=K&*Rb2<-<4lkD3wSW)ZF!cI}@9xuWfd#Kq^=o!9*gxyJHpCvs# zoZSx-BV!4>JL8&%)w>5Chm$PX%x2SskCKX&-JOsWPt7Aq=P0YUejYnEeL0(O)|O2A zG>sJRT0&OX+ACiNPb53_?a6L|u`=T3MCGEx1!Tdjsbq6%YT3A!BUqnvZbVWv%fs48 zDRl4X!KR(t$zI&k6poD8$yUXLk~-#jO4e~3(YqKz7B9Q4Y%Q5g+y)AmL!Ufm*l;;> z6m2G_b@PS0*10OXjqIYF^T9)TD`|?b>P-M!_hFWBT*PqZ)Neg;4L-Nu0*10k zZb(em6q1?yRx|Tf>TLSY9!%z*Ol&?H2Y;nYcENVJ}ZE6>;oM^qDyeT+Dl8e@|{xe*Z%3ZI+R*{jzvy|`(n*=yEs#{zt$kiQ(||`U zn9DWh=0LB79P&SJAy2C)?RR=J_GjIxP5C%4Kmci z9{2j1^78dA+^Ek5xY%U^^{kSRu%WZL^TTb*;yHTI@cmWcu!#sB2YSL~Nf&zhLw#B& z>Ke0KrAB8i%z~-8YHaaiFI+xz6_J%KP<9W@6OYBRU z@T>wey2ZZ;E^eGj8^(5op7XZAX`P|M`V~Vlveb#b$@zs^h6d2jAfm^YAA*-X{khUh z3--JV=iX8|l#Dgwb9@eh(caH^FSZ3WJKq~B#vaDo18lghXAXaGtS7JNVWgB+%?5AJ zL@v#J2O)KjCGZVE_#pZ{2~LmTnaQQNHe(JS)vHo@>)L84Oi3cvXLZ5% zVJg3UtOIGieu@qmtS&Oq|1HjYoAsAjV=PAj1F#Dh}i4@twu-tZ#Q1S=@v~!h1 zLQcYii^W)!GYMp8UC6UdXJAy*N<8D+3TD`B7Jf`K=ac1Cj+YXnxdyn*@c=EFMcA)KcfpjzO26hGAxE;M#T zuLo1Wq5oF8XWSsX{oZ+piTQM7M)z<9tDuW`Pnw#f$0H&vXiK$Ha*XYVCx%({Rl7|% zCiXSlwI~JeaRcd1i*oYt$tb*&mW@^8-N=}!M{x15yX5t)LmtsChNR(!xx$y*()c>P z{^(xMj4nPgh3Oa@^3~!Z5cQkMe=@nxx{pdhH{JUv=(&##-Q$WKFqO5L)sa^kO=b7p zbm+bH!}-J$%fS2o0emx6hBE7C;QivhGUlZ*iAjv)o^LYw_R|BPedZ?qHuDjDl#F2w zXLsNsiA`Zy!yU?&`G&l*bOhBj=|~>tOh9CddB8qxzOl6pyw#b=PQB~Erxs11TA!uV zsl^n!$@~&LoI8}J-@2%*IGPDd?i|J*DRt>0>0Q##_b}W%GM6O;r1HYjVhrA+!2+|L z!3bBORnmMubjo-v+M2?DsXLB_Zp*@}o4U}HPe0Sl{ubCOb0GREGQoc1N-%S4P77u& zNAq9?lZH;ELk-_S*LI6p-{?e`(XJ7^+Bz5&PqwRy`3O+om2f0r6$xy2hnzWk9d=3D zalz<)!tF*EP%kDN2JJe619Gq8fWb?k)0}}g?95~wd*cX}&TCHn5(=<@_<=@vG*0f9 zf(2FsY0Rq*uwvn7W(5}TW=sUmy>AEIU!_x>Hg{0X(}5njJ(K4j))nf$S;h8!I;yNV zRz*_gPlN(#B1Reaq~{$kpvl=?7*inTt8b5_v&b48`JgEum}P~%^I{;_Fo0f3nTo;J zI%B)Ha@c?0mJbnAeqNBq9^KiC3QKER*Mo6#3;N@hIbB?gpca8tb; zo|E|tu6&xr`o=Xt=Ysd-Nacbf&wO|DO0jU9qvbRc^)WDFWN>!d`#&E(-xS9&C0 zosOIFigoIc$|GKevw%sDSa(5d?tM2OWh+P0IIaGC&#O#sbfYzzce93{kF=-PP5RQ+ zM~`FP`zvJW;?_9)>;m}kIR%pP0wJwq9L(A5Nv*+*`(FCQJZ$u7MRqDpyP3;xZaj#? z>kptI@p^Pm+pfG)KZ?B`VS|k?ns9KPiZ4FY5!#u~hg@UM4Nn2_oNyVZK{HzJG#lMg zba;dJ{ve9b+j07hL}+|e#6$JY;m{8QSVmO_6n#!-`p?|?oYG}{+MXAfG`u&SoNhp0 zruN~t?+I|zY(w%y^_wAi+iUXNcRX$ORzN45Fs2{21;bmd4zydV5Ll|!g;#WXjL*;P zAiF!bQ;UGHsHiH(SF2}YS=?s466%0n5fwOe&rLXeeI$Ll+Xe9ATH>Fx58ZcGp;{d^ zK7DiygjIy`MkPMnbNO>@JM}uhoBImu9%{jx-buq|$8G4qRunSV_2m~b*JFdV`(fmg z2ppL#p+4stp>SOY8GBv_w>@rvab!L1;}cEZUOa$z9T6&4KVhB6H{uuCnnOZ8Z(g2_ z81L)|dOPRwWgSzQ`tIX6`QtC}0_yWh!=t#jQ7i17sub>=Ujpi)NxXi+TCUjdMVmBF zp{pDpl4oI7R6jF^WDV>B&D)HngU_shW_u1oiD4CVJ5q%`7Avv+E-%)g#SjQ~UdnGj zdx$MUUqfB*LeQL0mwx!v7+;kSAq{L_GwpTF>5i5bbgRl~53pd)|Wu@zl2T7c8{tzr%_Gq9>8ky#3T z>7e{4#OlN}eC>M?4GrEw@Tewev$g;(e{2L9u7{X?sT)0ewi|>OkHXNV{;(iK%w3Kc z!xO(IL_XMvyZzjkjSQ1259MtF_g48RH0VatAFXH2XCz~zsTMR?!x-1*pGV6To=`Qh zjNG4LtlA@Qlj)a@QC4QkH@{Kfu%&y*;cgG`r7B14wHM*ju=>z!K?ewm*QRBa4$SiP z5NdO~5)GRS!2@O*{Q0zeTvJ#F%KPPE%eQ_W8Mm?_Al4ovvO<2<>Ku-EWyiA*E)?#5 zsf}6N_OK0O&r=g~OVz$=xns@VOvueO;bRTzc@FBc8`|G#z|+SCK-!EonD5D<$uoPp zaK`EFmP`9k{kfWy=ud2`A=d3|3XZ@29vmHabGs)d&;VyZ|8uX&BceoGRX_9&>4HYXFOx5XwdUt~WWa_QcOZR51q^(^@X@Pr!d4oDFRqB;^Db%t_0Pv!&PM#& z939jS31vB*66r^0Z`E1y6WsZ9kGTuBfTjO*9`kAwxLjRMchCMA1~;C_<`0Xdae;;C z+WRs|-E#&T=QrYmQ>Kt>Rx)n>v<+{fHXEjF8I6nGEnww`9=JN-7wjvXK=V7u_+lcb zt)(kT=%+e-vSA@KGi%MVb(Vm~rg<>)NhR5AvjH1zh(u3k4ZJX1ozEO5L*v8uus~u& zWv6xn8a~C)N7qm%!xt1VR;c?tp5NVJP47h}z_c;rx%f>z8fI+F@7ivKd9Ml98QqHS z`eiM~TsTBqE4Sco7j1r}VkAGZ%x(O zbeP=cE@bd4%q-85w-{8yPre*RFI-CDlZ^Dxp|1viIsFo(#xbScz2+D>NgLOWYtFM9 z=L#=QTtZ8q6yXCGUA}yDWBywI785m|srQ3I~<5vAM8)~9V1wjHq)Oz&-_i%;s)%8#SCqJA?d^;v|@pGGiIdTTm5@*_m& z>C?G}efeBAjeOSbM-QEOOrBjd#o&^&;7U`e&wXvasrWp;&wmInn`>}`&UN|0=0~Xe zGH1GOLpNT>J_6QA)$r9oJ9;AQJv;s=9{Ub?%x(wNp|%UhaPOWgpveOTOjy8Rk>yOf zWn_DvzNaCd9k!O}n@%8-wH5UG$zPOH?&e}sognV(w-~QjHD;S{_`;AxHE6mm6Xp+( zAZtoBShITpu;@fft~b36-!k_pfPOj7+t!(G=z5H_Y0{2weW*cO?AZ+I8k4b0eO}d5fwjFG z^C6}6X&22CaQ9d}>ftdLgJ*1l&|xjv-4mg(WT6PkbzMQXa60|G!i;Xuea!BhS`71C zJMg@%PvH7!M;_XHFEq?(#1EhEN+yK4!7<@5dPOG_TzXKV9HS7P&$tTd9mjK{ZX2;H zGv*f#X_Gb0Y^dmW_H#g%UMpdSkMvDd;pYP&Qm<1qP>&VP&}~-Km`l@{yr%{QhuuDanB^+LuNT zeo*pf(Ya*QoLk^G&5SCr1?spM!O2zEq2lRexDr$!vF}LB zhjgMPHwe$#u$=BNI>49OPp0~&?fJm>?O1)&M)YONwRq{qVyx5^(_`;uviE5mV-HLt zCeCH>f_Eeqt@UVm{y8?xd@`;-*OJGGN<6NgjKiGP>!I7i6*%N1!|>yFyiVKsaK5V^ zp1yyPB*Yy97Wy93J6%y`7faxPy$#h4S%MBld$7~vlW2cqFJ3?T1W%+bM{lzhVVUtMX0ke9rQbtVx9F> zpw>0;YJ{!&~Y|L*4G@w@J)6rVE2}W8P)482T zvz^_#!rN~3*o5)X{B-Mq{8Xa`v{lp|R#;+6ryb1Uv+BM;!PYyfl#~0~n^V3CB(fZylNLbUK<~dH_ zSKGXR+n<;4cu^sA+P93g&ufeC{d6F~$sC$2wWV2U8OpK?Ptb6D9ACd}6}{tpg4JX_4%m#0CfRHJ*>Y{B~`8_?^%h)nQxpq1BM@u10MlIU;%x)U?d3duq> zMG&Ts(xJMQAE54D8NJ!H1J!LG0t=2l!cs>u_z&HN=7*hd?V5|;^0 zyiTzgUw_@#?i}@CgIs9v(ePM5N`^KQUB*oI92xy9uKF?)6NoI{hjHW zy_ENyP{v$c6ZtIXUg$Hck}MmlN4uRZq0U-Xyt_ugj;6ol;R(M5{QR10bWWS2ARXe% z_2jeZv5u=ybL~&8P_ET2p4~l}gE?spcy>uC zwv`s*!dpk+%0zWcZtIWZSB22l>AJjpcQ3l(USE`(%wt)Nyy%C(Vzj(^gpBwY&*X1E z;P7YxYg#%Tng@)b8*Oh8kwINDA}g0?bbX1Bn&)8MHGyC-s6=?`#8SHcz-*{I_zc-| zEn4_86kA#q!2F92WtN$@*>u~6)FP!D+9}t}ZowfZi)*_cD+VrI9?H_>W zm;b}xTl)Yvhuy;A?*}ovqyqj?t^dol`_wzJ(P>G)!|NmTYP0b7t8x* z<2!Ia9@ID36pSajD1KJB~(Yv$~PD8G52=6sL+xHtE|g%V|9qR<4XC~mav zfAs#JtF!a>-v6k%F88hXKb(Ku|NA@FzvjYleD$sHzxVy0n!o?J{?1O$jz8{y{vG5m z=-J6Xu)FFrpj)8o)7Sqe!H=%^f3p9_$YMgOUs4cDS1kYE#IBalTzNWvr{sX#nZoDeUF5{ts)1rf41L8vG)DqIvU5J?;a;+RND zj945e7$Fl&1yV_(I9?!$5yXo{)o+FTXe9p=>;FGc`u}46J3Bkr{iy#R_5Xu@&=2}S eKj;Vjpda*ue$Ws4K|ko0|R2f1(YDhRWYL?iYO{zR;H`X0V9|dbIv)ZZvfAp)$5+~-20ug z_qq34e@y7A?q5}PRdo&Y8x|8CE{m`m86Oj^@$Vv`np~ZoYu?>lU2EQjjd#qm;QNSstGk;b*{VqQC5!&8olm8wVDhQ&n7<&Nt8YTNM-OmRerTpTKu zS4%{zr~Z)_GfEmAB8!$tlcaI+-z@rf{(l_*1~gt49U+&7$RvMQ_;(?B%xGy`m|F3W z-5gktPG0`iI8c!k}@Gq8gKvmqVe_-@|Z-aovW9fdSQuO zytsNh?UYGMyNI!E{}d=@q%=$^i;4cKljENV)~F*6tcpsC+eP?o@_*;J|1}l%2a?g^C~17GxW)}_tK;EsGyZXy3X(N` z(tjh3mdYX`mGO4UNR9ud{?E}-s2=-!|L0KM|2a9k{p|n#3QEz;7RWy?_I_tRvM`{z zU5cjPJuUTXvO>cqsG*v#KChH9VKMTMM0J5t*D*yMwLp-8nj@DfmFl`7jh4v7(F)D1 zAc12P*2=1XVlr2VI8Uw@t;@x?@)x*$T}tb(0qs zX%~{9l*!`->iQd+AXCp)s%2v0LJ}PX3N0I(Af1{mj)@aTNEO<(Qv}}f7_qWn?VvzW zH^@*eE-z@Hc}RM5^_T)Q{Fl`Tm5JjO#($!O(8bMFsA#5ELQ}1TNsSWeisn9b(iJUg z6);sQ%mNiHgUrQ?PD+Jy zpu*+P0cx#q{Q-cxqoaefdOL&)x32)5zXG^d19Yhc@K7p5feI4Tt~wypt;Qc3)S90J z-+4^|f3hvkjxOq*r(cmgtC6&;{nfSBUtYCnzbF;m0u|kZ+Wa%Lnke)BF;-`1wOO4N zJ=9p*YOJ0$Sm}yh)mHVc#qm)pd;=AJ3CW5+L3--BAyHDLxVi;X_}B0QY=Ww*pQ3NF zBCsa+|5o1m`P7LIYVzacQ1nk$42Z0eX#bx{AH~3A#h}`GF8|fEQv@X|2G_2k^Y>6t z3`tfDtsOP|j{%|xPF4)77t{CjK9&Hkfh>`Ln zjy-n-HpNE3%b_VSA$Ah1zPpXRYk8COXHT$!K}V1BKE<$b!+Owtu3~1|#n5og5E!!m z4L%IcLxIzEsQ+mSIPYA98Jo9)ndCB_*qKfuy+6X;$)|{ss}cBi)q$y%t#QtVZm{^C z4}9Kr8`jR<1(AO9LBr)fYxva4wK|S7Rh&GMa z>Oo=uJ$TSxpMO3zjR#j;#31WLCXCqtYZAP<4w9IMh?r~J%eieJR$e{0YV$rr~Up_;lAG9)?&5SQS$EMA+2tF)E zxpg=Xzh*=Al)d;4{SWxO!(+1aOeB?W+(Sh<(|9{GOWxgcA#eQU3+^BA8rGzRgMHCL zI``HS)IaEg($QO4MW+l@HW?2W#vFk;A@?EiZ7wDor{X#5ATApp#5+E^3>Pqk&dM<5 zI^(CHb-mZ3l+F`rW=|vT^P&;oc%dU~nN-f>T0F<{bQR{>r-9boVjMVOAipsVVCQtg z+dRDl))6Ccm)QZFxqS;}1s{ME#}^*fEga~OsSdo+ub0`?$;NzwrU_NHTmjc#G{tAV zydk%1BpNO4!g10ND!p|H%u?4-*;6sr`%RCRy&p#xRApeBXJ`3INha=<-6IYqO?Xy; z1>Ndj1eZ3=q77p^La+H-;f&reQT>Wx7*Xm>-{$;AtwRIo7ZB4ED-Oe}UjAI=r2~84 zOSrdO0VU%s_*|buV6yKE-j8iTEiUwdig8Eq&OkeE@0r6N9`D5~dYY)@Rdc}GGm*=4 z-$O{tvB_m zIz~*o7Qo@D-pp?M3_2t23qFkcK!Vf5d1i7cuFIIqNB6E&-M+pC3R9AZ?Kyq$eU!@Y z9B)D&jedz&cP_)1Z`a}Mn1SHw_XvzHTk@3Ty_kK-lthT_VR&vkNGN#>0lK-Wp&_T> z;iY0M%9#u?=UmD2&1YeB(@H$++X`mdZ4rG+v*c6!Q(;80klbn61C{L`!}_v&a5~4H zUU|ENXlRT8kLJ$&%GD0Q7j8uFkUBW(S$kUUI0+lBYf4H3^l_xgG1%R=F^mxH0pIB# z*v2RKaj7B|Paj`|x7Xid-3>C(%`F@&Ti(R{b_-y=?NH9sj8G%+14^Iih!&YTq1VHy z;5c9#-8+6T-ud9N)68-PGL!qbl2y>fycbPPGT`CiRZk+pHsblrs^LE#U$Cb@`^&cJNMb51NBz@MzvJntuC|s^VBCEWLXKd#2Q-i{*zd|5Rl3XON%jhuNDi;b^#My zjW$UO_^_!HuxMKf|E=zL8oE6TuWjy1Q$GJnGY42@tIPz4O{>7dy*VwIy#g(R z8B88FjSe$@58c`=Vf~^KVP?BV@Os-2R6gCIPUfRPeOJNJfYl_h-Cc6_+zr?*YsUp+ z_KS9yTttIt2@KwS6bI&B!+}GVLZ`WdaQN9NIPT_AES=w+`Xv-#0r3MZNfb`$pMnK8 zgJ|^Y4zP037iI%i@OErC&U+w)9UYon9;ps?^-kHVokLZi^->zo+KOa+79IqlN z3noEz%RP zI|UqgV9$q2DZd~{V~_9dL#4GXE%E_$5}rrDv3_w4(52u5Id!8G)ZK0c^OEb+*W-p_7#+kO4;_o9%{r;jZwq;J%#9w+*QDcT zzGj^|r1J1r5*9G|G3y~{&AspCW6Y{iG)`v#-}^d~o7`-TmfdaP*Q4#}4YPi9&9M`h z_u(p8wxl(VIJXc!eo29(yg*3n7zcB=cv4%i;J%kXGY>mMT9KVf({APRTbmByi24I* zNW1~v+qN68G>l|#M%rQHOJ*G0rs2zvbwon*1(0jXx$$Wro)fR&bZAD)o#&u?iXLz9 z!5_ro20PEZm5EG`iFv5Oc^vj}Aj_!AfTAzy%^KDTr^pT74cCXMKWr)C(@SE+sZ zo%;gZJja+kRiEw2+ux8Ez7uG(cLF-`q$&NlJs95UbfDc^g}^eMuDqht6MS)YC)v}X z3$+RuhsvsQe7$BCmc?zstD%nQ6<&eE_TGXsH%8HCdt3o8tt0+9`?1TeD%7Z>!Doz# zhOmk--l)Wfd#-qaZKvJf_i|rj-NP+-)4OTd?1UX1)QUpp`hNUk<_2uA?f{Hh8jhoq zWz^?%IS;r1sDFpg}XeSMgTT4i&=grHr5#wE) zz+l&WzPw{9)7*0cr+oSiUP66dX?zU#HEM-@QdOc|3rav!Jek)oSjUwIyl9i=DRi~d zBl0}Vh8kw(kgP#np?RBebjaD2&}{D^C^4>r?nkSz=MojR-|fX3v=|D(F3b3x=Z~;O z=o_f(T?pC}>(Y;(8{_Npp`?NR8>YLyIo;XPiVl$q`R&*>_|UfyZ8TbQ-%Uc8 zEzg0~C0F2vs}Mq86?s&C?#$MEccD`yk1(t4KDKu8DRBFeEi%)O!B5M!aLd;xswgz!IJ2l%YIm206TIA)oupC+0q+ zE?pXy&U-YO4=qjv@P(s_&`V;;ds{bRX%+&0W6@){_O=e!O)MjZPNtyiHy@AX--ob> zuGDJGTy)z>_?q9|GJma(w1I~WPc7(63I?5nvULqm^PPk{wVO%``;G$r?HsJzH000B zviW7rT_n6;DQfN6ijSNR<3V8KK%v3oJOx-y)fpHRn>EuVrxtA}G9K}Y_iV=KCLi~wisU(Fn&XJS=JBC{6x(!u#p ziOtFB_{R4V8XLWb;L%OcZe0Of`P2w9+zvB`Qg?dpTz8NZkH*lZ{;)7a%3Y6|!c)H{ zL@~sKyZ_pcjS7>i4(Dx#F0JxWWYnFeKi&J)vq+8F?_% zRDDL?Av3O+VoaGi-||+8!uHs^8VYa!1*v`Dn)l`dv& z-^(_RyFkq>t<~qM_0F~XG9fqDjE^&_=Q+6V9%z5J0Z$(v0BJMVV!kJbCeIz{qM2uQ zSTE~K4d-c7VmKZ0gAm(4x`ZW%E?}G4_4!_}Tr3{q4v`zPA?s`o;^ka;XkW(JR4**- zBBwna53}y)QgFhJ58&jqhYO#YK?9r#1J1u8kBJKH)%DOjq$`??xI!Aaek2*nZDjBi zYuBjX5xN>lj?p#R^t^?1^gve#3sEi8Q}M3|~SNw6%N{ z3H@A$PcbfpW)`hkw%$_k*gPL*J*^~L>^5SfjS=YSqJx^l|cmK8y zqc0w&tyNobkE<@fS}}?rU3yFuv9J?p>kXo|U0&hRJVPu_?8180wDQwvuB_h-N_`fi%jc0yoZgy_iTDIjd4_bJ zaX&tfO($P;`_sc`pOEL5%rUs+9JtX`>hnOCZ!W%oAMzi;tL9qVsB>L@sQFRaWw{Go zzp*>7;}8yOmpOpRhXtb*TNKvD~}YN@(&>2@@AGSZqCuZXMO0 zr|)gZ=Y*{zhUOEAY+VJtaq2hK)O)$uR4<78`Ypk$HjUYqo4znKQ3IN8&x8dd!pYiF zE!OOQ04zS)k{iru!?(_R24Gl@^S5`V8@nARZJM;>+a77r7JIiqy4DozT3;V+bz5=q zjST+LcL%mH4&{5A%wo-S)OBcCIP4!E&pmdV&_VAeVo}~2wqSP)Ua)C8v1A+hBf#+X~y%!R@~cY9j1=9riDkl@t5=_ zIq4q2$}Bv&Z`Tdb>Ek`93ax_WJ14=C{(i*5$$=bdFCgosZTYa!&a~@JhIt11{K>}m zWd4&W^sLi#RQgXQhF_MdVznF4@Gk4nqR~lY%f_Sr=DGCH^~s>BN+FqtkHWe>jrq{h z`n0R|Nw{~s9`*2;hru&9L+J39?B2;xSh`3I<@#=*UpRw)S!qFcq;NWf#lfpo1ON4OxngMSHQ+lT+w$b06L~_7qR1E$1gTFQEp*rm(f0 z^?0v6c~CHN7HxGll-qw;0|pJEpzn~aupU(Ksp<*_8i(+`g2kw}DIE+ulwzF?RiM#M z0a=GK$fsX6@d`gnK4^0`J!Lz9S$d4%Ep&3I!SO=OuxtslbW-^T!E#)1VKxfB_`!&0 zjyQR=Hz&KwVQYRaEZ z?*{L>*JBeWMDa7N2l3O58qijeds$(LIh}qehtID25(V4tsvcKugAKxdTyRB)&b_uB zkIx(czjR;8sutAe`XPbvfX35c`y?=3(~9=rU5_u;)8%K1rlIZq-H@<$0L^on$gj0| z33t9MggCxo`)YdEK)$Bn1j^UMbCuv11Plx;Yv(?4y;Mfzqu)n)#p^Fd~ z-x6cHCnM3KV=7HdD~9Jc8c?G>IUreb8Rt*(hX(1zWZu9qT(PD-OnRydOAqdWBc7X~ zv?@?!s}T+rgRbJZQ5tmL+O7D&Y$JL-5R-|%jp=C}hrq&PkFnHA3jV{kqva82-1pfLE>91@o~s(b*2Lu^Gq2Mu z+SecV9;;6i{aa!7^zrn{5V?KE6MU<2DJOR66&I3!+U52>}>j59-j1D$Ssy)8xjH1e)VYkar*3K(-m-_ET0@Z-~t7PpRhE*iH~!*i7n>{Va7^- zxO5Cflgj#`s%{#;vC;!R_iRMv50}H@C@ZdAZbyR(hjE?m@$BBI9Lz~;z_UwAv8}ui z7u`MzS0`y=a$A3#usVdcPS@whx!ky*JsquVQd+&l;Ct_=jE!6l;8CzsI;2j@WLq36h6=+MGf zq1e)<02W+wEVItM!)Dkwq*f{2;ob*18)2d%z3ED>eU?y(NkblUIurX_BtwU5{$RLv z3(;1*!B_1E@ZNhn^DS2#sB4u6FQ075kEaa8SwlNxqC`gf7k0#~^#fU(Ma~#H<5^YL9?25P*wqFJuT;NF>X44t`(&57#I2S#N;XI(#xwaVp*cD-nN`-gDw>)+wu zxF(W)kLHos*F|JPZl3Db!6S(0rXn&Y4pd#n%^+90?Nkj|)Q=Q+E@oc$v&rY_#ccHJ zdaT3YNKxyH_1RR#N@kqepBa}Q5WUteVHv}@Xj$yeE{z2jMV@PO$*r4=7!B?ra_d)@ z)pgMjopucu4K*raZBKZSi@nVl^C>2e@4ppYt2{{NJRHi5I=517Zo8COCEgOr&-yd3 zy4x7~4<#XQKdKxm(g;83PP$&+Nw(c#^n1AQR~j4PLY}(FC~mU(rzg7lq@A-cT?FHSuYQlE=P!osf^v5 zdELY2{X>rZ+Ri(L;4RX{xB|Z2(*UakglD_z33Qe*Zf(czKf2;=XQNk zzUqjm#ZeEkxaml?xt&Bc$@TzwTX2{p7p-FhX1c0WyMuT1(>chj2FY3YvJIlV@|Dcg zJBcV>3D~h?r$iGH`?KQ}_H5PQ-3&?=v(LSEl|33VkIg-r%KBU?5G`=oM_yQNVJ&*> zA;(vgu>{@mBE@M7GGj#>Hsb0G+<45X#WpOIdKKd%{O%yISzkrkS?No7&2)e=3*ulmsELe6Q zmVZ75M;2tm?zan|uu~JXIDH0p`bUwQpI^cL7pHKR$3^(t|K*bQ3E>QV1}8H1k=RYU zP_nfUd{;_g#PXdeigd;apI73?5F0!vx`8F$dVn3@;HFCrK^0z(@{W$buk&_7I5@oY zBRgFgGgmxSEtxq3p1<1wRbIQ)HKaLO%zcjCM(4ryr{{@Xw-g-faT^~>CStdPzoF*9 zQuujq?tcp<#)OGO6XfE!G4}t~`+sgOE`Rp?N6mG)@1Fl~Q4g8B{Jj77SMGn!CFA(& z+uDEa^FKA8zukWqXSbjCKmQW)7xe1nAJ{|v8qhsZ{p#!gi{NKf{6AU$qhq2&s-I<& zNOSb2mdtx9bCW7e@CHb7od17#Xy;$s1pKr7 zcW`w3?)-Ok6*~RQ|G$DfJqPsh>El1BkEf@=PLN<1Z6}Bc50{0>#Bzajlr$k;5GfT) z;sxO`ae`2BM5IJ45r}1u0%>%FELtj!6O4?J$^~*+qBLG0ix$L7#nn%R{46B@6Z8My zkoy0{{C9Cti~r33pZWije$r3+Nk8c){iL7tlYY`q`bj_OUqt^8jKCpf08jt`Ee6^z From 57a67e59be42d89293003094c218115cc269ab94 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Thu, 20 Feb 2020 15:03:51 -0800 Subject: [PATCH 16/52] evaluate tests passing --- allennlp/commands/evaluate.py | 16 ++++++++-------- allennlp/data/samplers/__init__.py | 9 ++++++--- allennlp/tests/commands/evaluate_test.py | 21 +++++++++------------ 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/allennlp/commands/evaluate.py b/allennlp/commands/evaluate.py index a5259dd5c63..53af3049243 100644 --- a/allennlp/commands/evaluate.py +++ b/allennlp/commands/evaluate.py @@ -62,7 +62,7 @@ from allennlp.commands.subcommand import Subcommand from allennlp.common.util import dump_metrics, prepare_environment from allennlp.data.dataset_readers.dataset_reader import DatasetReader -from allennlp.data.iterators import DataIterator +from allennlp.data.samplers import DataLoader from allennlp.models.archival import load_archive from allennlp.training.util import evaluate @@ -173,15 +173,15 @@ def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: model.vocab.extend_from_instances(instances=instances) model.extend_embedder_vocab(embedding_sources) - iterator_params = config.pop("validation_iterator", None) - if iterator_params is None: - iterator_params = config.pop("iterator") + instances.index_with(model.vocab) + data_loader_params = config.pop("validation_data_loader", None) + if data_loader_params is None: + data_loader_params = config.pop("data_loader") if args.batch_size: - iterator_params["batch_size"] = args.batch_size - iterator = DataIterator.from_params(iterator_params) - iterator.index_with(model.vocab) + data_loader_params["batch_size"] = args.batch_size + data_loader = DataLoader.from_params(dataset=instances, params=data_loader_params) - metrics = evaluate(model, instances, iterator, args.cuda_device, args.batch_weight_key) + metrics = evaluate(model, data_loader, args.cuda_device, args.batch_weight_key) logger.info("Finished evaluating.") diff --git a/allennlp/data/samplers/__init__.py b/allennlp/data/samplers/__init__.py index f0db52e3bc8..1f479bcf39d 100644 --- a/allennlp/data/samplers/__init__.py +++ b/allennlp/data/samplers/__init__.py @@ -118,7 +118,7 @@ def __init__( padding_noise: float = 0.1, ): - self.vocab = data.vocab + self.vocab = data_source.vocab self._sorting_keys = sorting_keys self._padding_noise = padding_noise self._batch_size = batch_size @@ -184,6 +184,9 @@ def _guess_sorting_keys(self, instances: List[Instance]) -> None: ) self._sorting_keys = [longest_padding_key] + def __len__(self): + return len(self.data_source) // self._batch_size + def allennlp_collocate(batch): batch = AllennlpBatch(batch) @@ -209,11 +212,11 @@ def __init__( collate_fn = allennlp_collocate if batch_sampler is not None: - batch_sampler_ = batch_sampler.construct(dataset=dataset) + batch_sampler_ = batch_sampler.construct(data_source=dataset) else: batch_sampler_ = None if sampler is not None: - sampler_ = sampler.construct(dataset=dataset) + sampler_ = sampler.construct(data_source=dataset) else: sampler_ = None diff --git a/allennlp/tests/commands/evaluate_test.py b/allennlp/tests/commands/evaluate_test.py index 4ded2eb82d7..5563aaa4731 100644 --- a/allennlp/tests/commands/evaluate_test.py +++ b/allennlp/tests/commands/evaluate_test.py @@ -7,24 +7,21 @@ from allennlp.commands.evaluate import evaluate_from_args, Evaluate, evaluate from allennlp.common.testing import AllenNlpTestCase -from allennlp.data import DataIterator, Instance -from allennlp.data.batch import Batch +from allennlp.data import Instance from allennlp.data.iterators.data_iterator import TensorDict from allennlp.models import Model -class DummyIterator(DataIterator): +class DummyDataLoader: def __init__(self, outputs: List[TensorDict]) -> None: super().__init__() self._outputs = outputs - def __call__( - self, instances: Iterable[Instance], num_epochs: int = None, shuffle: bool = True - ) -> Iterator[TensorDict]: + def __iter__(self) -> Iterator[TensorDict]: yield from self._outputs - def _create_batches(self, instances: Iterable[Instance], shuffle: bool) -> Iterable[Batch]: - raise NotImplementedError + def __len__(self): + return len(self._outputs) class DummyModel(Model): @@ -46,8 +43,8 @@ def setUp(self): def test_evaluate_calculates_average_loss(self): losses = [7.0, 9.0, 8.0] outputs = [{"loss": torch.Tensor([loss])} for loss in losses] - iterator = DummyIterator(outputs) - metrics = evaluate(DummyModel(), None, iterator, -1, "") + data_loader = DummyDataLoader(outputs) + metrics = evaluate(DummyModel(), data_loader, -1, "") self.assertAlmostEqual(metrics["loss"], 8.0) def test_evaluate_calculates_average_loss_with_weights(self): @@ -58,8 +55,8 @@ def test_evaluate_calculates_average_loss_with_weights(self): {"loss": torch.Tensor([loss]), "batch_weight": torch.Tensor([weight])} for loss, weight in inputs ] - iterator = DummyIterator(outputs) - metrics = evaluate(DummyModel(), None, iterator, -1, "batch_weight") + data_loader = DummyDataLoader(outputs) + metrics = evaluate(DummyModel(), data_loader, -1, "batch_weight") self.assertAlmostEqual(metrics["loss"], (70 + 18 + 12) / 13.5) @flaky From 7d21ed8a686d6db95122d9cc822db20c50c1cc7e Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Thu, 20 Feb 2020 15:25:13 -0800 Subject: [PATCH 17/52] all command tests passing --- allennlp/tests/commands/main_test.py | 2 +- allennlp/tests/commands/no_op_train_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/allennlp/tests/commands/main_test.py b/allennlp/tests/commands/main_test.py index 0bd6da0a0fe..9880880d0e9 100644 --- a/allennlp/tests/commands/main_test.py +++ b/allennlp/tests/commands/main_test.py @@ -97,7 +97,7 @@ def test_other_modules(self): "dataset_reader": {"type": "sequence_tagging"}, "train_data_path": "$$$", "validation_data_path": "$$$", - "iterator": {"type": "basic", "batch_size": 2}, + "data_loader": {"batch_size": 2}, "trainer": { "num_epochs": 2, "optimizer": "adam" diff --git a/allennlp/tests/commands/no_op_train_test.py b/allennlp/tests/commands/no_op_train_test.py index c9416b302f4..07dbb582091 100644 --- a/allennlp/tests/commands/no_op_train_test.py +++ b/allennlp/tests/commands/no_op_train_test.py @@ -24,7 +24,7 @@ def test_train_model(self): "dataset_reader": {"type": "sequence_tagging"}, "train_data_path": SEQUENCE_TAGGING_DATA_PATH, "validation_data_path": SEQUENCE_TAGGING_DATA_PATH, - "iterator": {"type": "basic", "batch_size": 2}, + "data_loader": {"batch_size": 2}, "trainer": {"type": "no_op"}, } ) From 24a500c8b583b600bd386ace829f84c2880b72fe Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Thu, 20 Feb 2020 15:32:37 -0800 Subject: [PATCH 18/52] lint --- allennlp/commands/train.py | 4 +--- allennlp/data/dataset_readers/dataset_reader.py | 2 +- allennlp/data/samplers/__init__.py | 8 ++++++-- allennlp/tests/commands/evaluate_test.py | 3 +-- allennlp/tests/training/trainer_test.py | 14 +++++--------- allennlp/training/util.py | 13 +++++-------- 6 files changed, 19 insertions(+), 25 deletions(-) diff --git a/allennlp/commands/train.py b/allennlp/commands/train.py index 815f6e7bb5d..cf4da7f9673 100644 --- a/allennlp/commands/train.py +++ b/allennlp/commands/train.py @@ -677,9 +677,7 @@ def from_partial_objects( # passed through the trainer by from_params already, because they were keyword arguments to # construct this class in the first place. trainer_ = trainer.construct( - model=model_, - data_loader=data_loader_, - validation_data_loader=validation_data_loader_, + model=model_, data_loader=data_loader_, validation_data_loader=validation_data_loader_, ) return cls( diff --git a/allennlp/data/dataset_readers/dataset_reader.py b/allennlp/data/dataset_readers/dataset_reader.py index c61442cc90d..ff4cc2055b7 100644 --- a/allennlp/data/dataset_readers/dataset_reader.py +++ b/allennlp/data/dataset_readers/dataset_reader.py @@ -137,7 +137,7 @@ def __init__( else: self._cache_directory = None - def read(self, file_path: str) -> Iterable[Instance]: + def read(self, file_path: str) -> Dataset: """ Returns an `Iterable` containing all the instances in the specified dataset. diff --git a/allennlp/data/samplers/__init__.py b/allennlp/data/samplers/__init__.py index 1f479bcf39d..502230c8d69 100644 --- a/allennlp/data/samplers/__init__.py +++ b/allennlp/data/samplers/__init__.py @@ -48,7 +48,11 @@ class RandomSampler(Sampler, data.RandomSampler): """ def __init__( - self, data_source: data.Dataset, replacement: bool = False, num_samples: int = None, **kwargs + self, + data_source: data.Dataset, + replacement: bool = False, + num_samples: int = None, + **kwargs, ): super().__init__(data_source, replacement, num_samples) @@ -205,7 +209,7 @@ def __init__( collate_fn=None, pin_memory: bool = False, drop_last: bool = False, - timeout: bool = 0, + timeout: int = 0, worker_init_fn=None, multiprocessing_context: str = None, ): diff --git a/allennlp/tests/commands/evaluate_test.py b/allennlp/tests/commands/evaluate_test.py index 5563aaa4731..9bc66cc9f62 100644 --- a/allennlp/tests/commands/evaluate_test.py +++ b/allennlp/tests/commands/evaluate_test.py @@ -1,13 +1,12 @@ import argparse import json -from typing import Iterator, List, Dict, Iterable +from typing import Iterator, List, Dict import torch from flaky import flaky from allennlp.commands.evaluate import evaluate_from_args, Evaluate, evaluate from allennlp.common.testing import AllenNlpTestCase -from allennlp.data import Instance from allennlp.data.iterators.data_iterator import TensorDict from allennlp.models import Model diff --git a/allennlp/tests/training/trainer_test.py b/allennlp/tests/training/trainer_test.py index 788bbab359c..46fdf2d3238 100644 --- a/allennlp/tests/training/trainer_test.py +++ b/allennlp/tests/training/trainer_test.py @@ -44,7 +44,9 @@ def setUp(self): self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params) self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01, momentum=0.9) self.data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collocate) - self.validation_data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collocate) + self.validation_data_loader = DataLoader( + self.instances, batch_size=2, collate_fn=allennlp_collocate + ) self.instances.index_with(vocab) def test_trainer_can_run(self): @@ -102,9 +104,7 @@ def test_trainer_can_run_exponential_moving_average(self): @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device registered.") def test_trainer_can_run_cuda(self): self.model.cuda() - trainer = Trainer( - self.model, self.optimizer, self.data_loader, num_epochs=2, cuda_device=0 - ) + trainer = Trainer(self.model, self.optimizer, self.data_loader, num_epochs=2, cuda_device=0) metrics = trainer.train() assert "peak_cpu_memory_MB" in metrics assert isinstance(metrics["peak_cpu_memory_MB"], float) @@ -118,11 +118,7 @@ def test_passing_trainer_multiple_gpus_raises_error(self): with pytest.raises(ConfigurationError): Trainer( - self.model, - self.optimizer, - self.data_loader, - num_epochs=2, - cuda_device=[0, 1], + self.model, self.optimizer, self.data_loader, num_epochs=2, cuda_device=[0, 1], ) def test_trainer_can_resume_training(self): diff --git a/allennlp/training/util.py b/allennlp/training/util.py index f1880fb1c61..b11067c1fdb 100644 --- a/allennlp/training/util.py +++ b/allennlp/training/util.py @@ -9,7 +9,7 @@ import torch import torch.distributed as dist -from torch.utils.data import DataLoader +from torch.utils.data import DataLoader, Dataset from allennlp.common.checks import check_for_gpu, ConfigurationError from allennlp.common.params import Params @@ -133,7 +133,7 @@ def read_all_datasets( validation_dataset_reader: DatasetReader = None, validation_data_path: str = None, test_data_path: str = None, -) -> Dict[str, Iterable[Instance]]: +) -> Dict[str, Dataset]: """ Reads all datasets (perhaps lazily, if the corresponding dataset readers are lazy) and returns a dictionary mapping dataset name ("train", "validation" or "test") to the iterable resulting from @@ -143,7 +143,7 @@ def read_all_datasets( logger.info("Reading training data from %s", train_data_path) train_data = dataset_reader.read(train_data_path) - datasets: Dict[str, Iterable[Instance]] = {"train": train_data} + datasets: Dict[str, Dataset] = {"train": train_data} validation_dataset_reader = validation_dataset_reader or dataset_reader @@ -160,7 +160,7 @@ def read_all_datasets( return datasets -def datasets_from_params(params: Params) -> Dict[str, Iterable[Instance]]: +def datasets_from_params(params: Params) -> Dict[str, Dataset]: """ Load all the datasets specified by the config. @@ -355,10 +355,7 @@ def get_metrics( def evaluate( - model: Model, - data_loader: DataLoader, - cuda_device: int, - batch_weight_key: str, + model: Model, data_loader: DataLoader, cuda_device: int, batch_weight_key: str, ) -> Dict[str, Any]: check_for_gpu(cuda_device) with torch.no_grad(): From fb137691546bc5d026ad7b98915ad829ff0aa7a2 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Thu, 20 Feb 2020 15:48:53 -0800 Subject: [PATCH 19/52] update model test case, common and module tests passing --- allennlp/common/testing/model_test_case.py | 32 +++++++++++-------- allennlp/tests/common/params_test.py | 4 +-- .../config/characters_token_embedder.json | 2 +- .../characters_token_embedder.json | 2 +- 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/allennlp/common/testing/model_test_case.py b/allennlp/common/testing/model_test_case.py index 6d7de8ab918..bc2fe84a287 100644 --- a/allennlp/common/testing/model_test_case.py +++ b/allennlp/common/testing/model_test_case.py @@ -7,7 +7,8 @@ from allennlp.commands.train import train_model_from_file from allennlp.common import Params from allennlp.common.testing.test_case import AllenNlpTestCase -from allennlp.data import DataIterator, DatasetReader, Vocabulary +from allennlp.data import DatasetReader, Vocabulary +from allennlp.data.samplers import DataLoader from allennlp.data.batch import Batch from allennlp.models import load_archive, Model @@ -93,24 +94,27 @@ def ensure_model_can_train_save_and_load( params = Params.from_file(param_file, params_overrides=overrides) reader = DatasetReader.from_params(params["dataset_reader"]) - # Need to duplicate params because Iterator.from_params will consume. - iterator_params = params["iterator"] - iterator_params2 = Params(copy.deepcopy(iterator_params.as_dict())) - - iterator = DataIterator.from_params(iterator_params) - iterator2 = DataIterator.from_params(iterator_params2) - - # We'll check that even if we index the dataset with each model separately, we still get - # the same result out. print("Reading with original model") model_dataset = reader.read(params["validation_data_path"]) - iterator.index_with(model.vocab) - model_batch = next(iterator(model_dataset, shuffle=False)) + model_dataset.index_with(model.vocab) print("Reading with loaded model") loaded_dataset = reader.read(params["validation_data_path"]) - iterator2.index_with(loaded_model.vocab) - loaded_batch = next(iterator2(loaded_dataset, shuffle=False)) + loaded_dataset.index_with(loaded_model.vocab) + + # Need to duplicate params because DataLoader.from_params will consume. + data_loader_params = params["data_loader"] + data_loader_params["shuffle"] = False + data_loader_params2 = Params(copy.deepcopy(data_loader_params.as_dict())) + + data_loader = DataLoader.from_params(dataset=model_dataset, params=data_loader_params) + data_loader2 = DataLoader.from_params(dataset=loaded_dataset, params=data_loader_params2) + + # We'll check that even if we index the dataset with each model separately, we still get + # the same result out. + model_batch = next(iter(data_loader)) + + loaded_batch = next((iter(data_loader2))) # Check gradients are None for non-trainable parameters and check that # trainable parameters receive some gradient if they are trainable. diff --git a/allennlp/tests/common/params_test.py b/allennlp/tests/common/params_test.py index f72b491d573..68d8d61f6db 100644 --- a/allennlp/tests/common/params_test.py +++ b/allennlp/tests/common/params_test.py @@ -38,14 +38,14 @@ def test_overrides(self): overrides = ( '{ "train_data_path": "FOO", "model": { "type": "BAR" },' '"model.text_field_embedder.tokens.type": "BAZ",' - '"iterator.sorting_keys.0.0": "question"}' + '"data_loader.batch_sampler.sorting_keys.0.0": "question"}' ) params = Params.from_file(filename, overrides) assert "dataset_reader" in params assert "trainer" in params assert params["train_data_path"] == "FOO" - assert params["iterator"]["sorting_keys"][0][0] == "question" + assert params["data_loader"]["batch_sampler"]["sorting_keys"][0][0] == "question" model_params = params.pop("model") assert model_params.pop("type") == "BAR" diff --git a/allennlp/tests/fixtures/elmo/config/characters_token_embedder.json b/allennlp/tests/fixtures/elmo/config/characters_token_embedder.json index ee0253a736c..ad709038a09 100644 --- a/allennlp/tests/fixtures/elmo/config/characters_token_embedder.json +++ b/allennlp/tests/fixtures/elmo/config/characters_token_embedder.json @@ -43,7 +43,7 @@ ] } }, - "iterator": {"type": "basic", "batch_size": 32}, + "data_loader": {"batch_size": 32}, "trainer": { "optimizer": "adam", "num_epochs": 5, diff --git a/allennlp/tests/fixtures/language_model/characters_token_embedder.json b/allennlp/tests/fixtures/language_model/characters_token_embedder.json index b72ab39d105..7fd0f3b7da4 100644 --- a/allennlp/tests/fixtures/language_model/characters_token_embedder.json +++ b/allennlp/tests/fixtures/language_model/characters_token_embedder.json @@ -46,7 +46,7 @@ ] } }, - "iterator": {"type": "basic", "batch_size": 32}, + "data_loader": {"batch_size": 32}, "trainer": { "optimizer": "adam", "num_epochs": 5, From ef5187fb07c8b7e63245a9ad8149433ef4dfcbd0 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Thu, 20 Feb 2020 16:08:23 -0800 Subject: [PATCH 20/52] fix test interdependence introduced by #3762 --- allennlp/tests/predictors/masked_language_model_test.py | 2 ++ allennlp/tests/predictors/next_token_lm_test.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/allennlp/tests/predictors/masked_language_model_test.py b/allennlp/tests/predictors/masked_language_model_test.py index 9d8613767e4..d5f8c8d3f14 100644 --- a/allennlp/tests/predictors/masked_language_model_test.py +++ b/allennlp/tests/predictors/masked_language_model_test.py @@ -2,6 +2,8 @@ from allennlp.models.archival import load_archive from allennlp.predictors import Predictor +from ..modules.language_model_heads.linear import LinearLanguageModelHead # noqa: F401 + class TestMaskedLanguageModelPredictor(AllenNlpTestCase): def test_predictions_to_labeled_instances(self): diff --git a/allennlp/tests/predictors/next_token_lm_test.py b/allennlp/tests/predictors/next_token_lm_test.py index 9e34f4eddcf..0904aa04989 100644 --- a/allennlp/tests/predictors/next_token_lm_test.py +++ b/allennlp/tests/predictors/next_token_lm_test.py @@ -2,6 +2,8 @@ from allennlp.models.archival import load_archive from allennlp.predictors import Predictor +from ..modules.language_model_heads.linear import LinearLanguageModelHead # noqa: F401 + class TestNextTokenLMPredictor(AllenNlpTestCase): def test_predictions_to_labeled_instances(self): From b1ea84541f5e4b5ab186f650729830f760593a24 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Thu, 20 Feb 2020 16:10:38 -0800 Subject: [PATCH 21/52] more test interdependence --- allennlp/tests/interpret/hotflip_test.py | 2 ++ allennlp/tests/interpret/simple_gradient_test.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/allennlp/tests/interpret/hotflip_test.py b/allennlp/tests/interpret/hotflip_test.py index 13a10d83612..ddf410ad591 100644 --- a/allennlp/tests/interpret/hotflip_test.py +++ b/allennlp/tests/interpret/hotflip_test.py @@ -5,6 +5,8 @@ from allennlp.modules.token_embedders import EmptyEmbedder from allennlp.predictors import Predictor +from ..modules.language_model_heads.linear import LinearLanguageModelHead # noqa: F401 + class TestHotflip(AllenNlpTestCase): def test_hotflip(self): diff --git a/allennlp/tests/interpret/simple_gradient_test.py b/allennlp/tests/interpret/simple_gradient_test.py index 3c0bfed4abd..a5d1bbebc9d 100644 --- a/allennlp/tests/interpret/simple_gradient_test.py +++ b/allennlp/tests/interpret/simple_gradient_test.py @@ -5,6 +5,8 @@ from allennlp.models.archival import load_archive from allennlp.predictors import Predictor +from ..modules.language_model_heads.linear import LinearLanguageModelHead # noqa: F401 + class TestSimpleGradient(AllenNlpTestCase): def test_simple_gradient_basic_text(self): From 02316164339d02bd07688e6a46c96df1e37552e3 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Thu, 20 Feb 2020 16:47:16 -0800 Subject: [PATCH 22/52] tests tests tests --- .../slanted_triangular_test.py | 11 ++-- allennlp/tests/training/optimizer_test.py | 7 ++- allennlp/tests/training/trainer_test.py | 51 +++++++++---------- 3 files changed, 33 insertions(+), 36 deletions(-) diff --git a/allennlp/tests/training/learning_rate_schedulers/slanted_triangular_test.py b/allennlp/tests/training/learning_rate_schedulers/slanted_triangular_test.py index 6c872db7e9d..5099b8a79f7 100644 --- a/allennlp/tests/training/learning_rate_schedulers/slanted_triangular_test.py +++ b/allennlp/tests/training/learning_rate_schedulers/slanted_triangular_test.py @@ -4,10 +4,11 @@ import torch +from allennlp.data.dataset_readers.dataset_reader import AllennlpDataset from allennlp.common import Lazy, Params from allennlp.common.checks import ConfigurationError from allennlp.common.testing import AllenNlpTestCase -from allennlp.data.iterators import BasicIterator +from allennlp.data.samplers import DataLoader from allennlp.training import TrainerBase from allennlp.training.learning_rate_schedulers import LearningRateScheduler, SlantedTriangular from allennlp.training.optimizers import Optimizer @@ -112,15 +113,14 @@ def test_from_params_in_trainer(self): ) # The method called in the logic below only checks the length of this list, not its # contents, so this should be safe. - instances = [1] * 40 + instances = AllennlpDataset([1] * 40) optim = self._get_optimizer() trainer = TrainerBase.from_params( model=self.model, optimizer=Lazy(lambda **kwargs: optim), serialization_dir=self.TEST_DIR, params=params, - iterator=BasicIterator(batch_size=10), - train_data=instances, + data_loader=DataLoader(instances, batch_size=10), ) assert isinstance(trainer._learning_rate_scheduler, SlantedTriangular) @@ -150,8 +150,7 @@ def test_from_params_in_trainer(self): optimizer=Lazy(lambda **kwargs: optim), serialization_dir=self.TEST_DIR, params=params, - iterator=BasicIterator(batch_size=10), - train_data=instances, + data_loader=DataLoader(instances, batch_size=10), ) assert trainer._learning_rate_scheduler.num_epochs == 3 diff --git a/allennlp/tests/training/optimizer_test.py b/allennlp/tests/training/optimizer_test.py index d89bfc8da84..ed89bd5ecf2 100644 --- a/allennlp/tests/training/optimizer_test.py +++ b/allennlp/tests/training/optimizer_test.py @@ -2,7 +2,7 @@ from allennlp.common.testing import AllenNlpTestCase from allennlp.data import Vocabulary from allennlp.data.dataset_readers import SequenceTaggingDatasetReader -from allennlp.data.iterators import BasicIterator +from allennlp.data.samplers import DataLoader from allennlp.models.simple_tagger import SimpleTagger from allennlp.training import Trainer from allennlp.training.optimizers import Optimizer @@ -90,6 +90,5 @@ def test_can_optimise_model_with_dense_and_sparse_params(self): optimizer_params = Params({"type": "dense_sparse_adam"}) parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(model_parameters=parameters, params=optimizer_params) - iterator = BasicIterator(2) - iterator.index_with(self.vocab) - Trainer(self.model, optimizer, iterator, self.instances).train() + self.instances.index_with(self.vocab) + Trainer(self.model, optimizer, DataLoader(self.instances, 2)).train() diff --git a/allennlp/tests/training/trainer_test.py b/allennlp/tests/training/trainer_test.py index 46fdf2d3238..ec249f90fea 100644 --- a/allennlp/tests/training/trainer_test.py +++ b/allennlp/tests/training/trainer_test.py @@ -156,7 +156,7 @@ def test_trainer_can_resume_training_for_exponential_moving_average(self): self.model, self.optimizer, self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=1, serialization_dir=self.TEST_DIR, moving_average=moving_average, @@ -168,7 +168,7 @@ def test_trainer_can_resume_training_for_exponential_moving_average(self): self.model, self.optimizer, self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=3, serialization_dir=self.TEST_DIR, moving_average=new_moving_average, @@ -190,7 +190,7 @@ def test_metric_only_considered_best_so_far_when_strictly_better_than_those_befo self.model, self.optimizer, self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, @@ -225,7 +225,7 @@ def test_metric_only_considered_best_so_far_when_strictly_better_than_those_befo self.model, self.optimizer, self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, @@ -257,7 +257,7 @@ def test_should_stop_early_with_increasing_metric(self): self.model, self.optimizer, self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, @@ -280,7 +280,7 @@ def test_should_stop_early_with_flat_lining_metric(self): self.model, self.optimizer, self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, @@ -293,7 +293,7 @@ def test_should_stop_early_with_flat_lining_metric(self): self.model, self.optimizer, self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, @@ -307,7 +307,7 @@ def test_should_stop_early_with_decreasing_metric(self): self.model, self.optimizer, self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, @@ -333,7 +333,7 @@ def test_should_stop_early_with_early_stopping_disabled(self): self.model, self.optimizer, self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=100, patience=None, validation_metric="+test", @@ -347,7 +347,7 @@ def test_should_stop_early_with_early_stopping_disabled(self): self.model, self.optimizer, self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=100, patience=None, validation_metric="-test", @@ -368,7 +368,7 @@ def test_should_stop_early_with_invalid_patience(self): self.model, self.optimizer, self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=100, patience=patience, validation_metric="+test", @@ -385,7 +385,7 @@ def test_trainer_can_run_and_resume_with_momentum_scheduler(self): data_loader=self.data_loader, momentum_scheduler=scheduler, validation_metric="-loss", - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=4, serialization_dir=self.TEST_DIR, ) @@ -401,7 +401,7 @@ def test_trainer_can_run_and_resume_with_momentum_scheduler(self): data_loader=self.data_loader, momentum_scheduler=new_scheduler, validation_metric="-loss", - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=6, serialization_dir=self.TEST_DIR, ) @@ -418,7 +418,7 @@ def test_trainer_can_run_with_lr_scheduler(self): data_loader=self.data_loader, learning_rate_scheduler=lr_scheduler, validation_metric="-loss", - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=2, ) trainer.train() @@ -430,7 +430,7 @@ def test_trainer_can_resume_with_lr_scheduler(self): optimizer=self.optimizer, data_loader=self.data_loader, learning_rate_scheduler=lr_scheduler, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=2, serialization_dir=self.TEST_DIR, ) @@ -442,7 +442,7 @@ def test_trainer_can_resume_with_lr_scheduler(self): optimizer=self.optimizer, data_loader=self.data_loader, learning_rate_scheduler=new_lr_scheduler, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=4, serialization_dir=self.TEST_DIR, ) @@ -503,7 +503,7 @@ def test_trainer_saves_metrics_every_epoch(self): model=self.model, optimizer=self.optimizer, data_loader=self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=5, serialization_dir=self.TEST_DIR, num_serialized_models_to_keep=3, @@ -620,7 +620,7 @@ def test_trainer_saves_and_loads_best_validation_metrics_correctly_1(self): self.model, self.optimizer, self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, validation_metric="-loss", num_epochs=1, serialization_dir=self.TEST_DIR, @@ -638,7 +638,7 @@ def test_trainer_saves_and_loads_best_validation_metrics_correctly_1(self): self.model, self.optimizer, self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, validation_metric="-loss", num_epochs=2, serialization_dir=self.TEST_DIR, @@ -659,7 +659,7 @@ def test_trainer_saves_and_loads_best_validation_metrics_correctly_2(self): self.model, self.optimizer, self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, validation_metric="+loss", num_epochs=1, serialization_dir=self.TEST_DIR, @@ -678,7 +678,7 @@ def test_trainer_saves_and_loads_best_validation_metrics_correctly_2(self): self.model, self.optimizer, self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, validation_metric="+loss", num_epochs=2, serialization_dir=self.TEST_DIR, @@ -701,8 +701,7 @@ def test_restored_training_returns_best_epoch_metrics_even_if_no_better_epoch_is self.model, self.optimizer, self.data_loader, - self.instances, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, validation_metric="+loss", num_epochs=1, serialization_dir=self.TEST_DIR, @@ -714,7 +713,7 @@ def test_restored_training_returns_best_epoch_metrics_even_if_no_better_epoch_is self.model, self.optimizer, self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, validation_metric="+loss", num_epochs=2, serialization_dir=self.TEST_DIR, @@ -736,7 +735,7 @@ def test_restoring_works_with_older_checkpointing(self): self.model, self.optimizer, self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=3, serialization_dir=self.TEST_DIR, ) @@ -767,7 +766,7 @@ def test_trainer_can_run_gradient_accumulation(self): self.model, self.optimizer, self.data_loader, - validation_dataset_loader=self.validation_data_loader, + validation_data_loader=self.validation_data_loader, num_epochs=2, num_gradient_accumulation_steps=steps_to_accumulate, ) From 01d76bb48aec4c52ba262e71d91b6df3dfbf1497 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Thu, 20 Feb 2020 17:13:36 -0800 Subject: [PATCH 23/52] remove unnecessary brackets Co-Authored-By: Santiago Castro --- allennlp/common/testing/model_test_case.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/allennlp/common/testing/model_test_case.py b/allennlp/common/testing/model_test_case.py index bc2fe84a287..4100cbb0300 100644 --- a/allennlp/common/testing/model_test_case.py +++ b/allennlp/common/testing/model_test_case.py @@ -114,7 +114,7 @@ def ensure_model_can_train_save_and_load( # the same result out. model_batch = next(iter(data_loader)) - loaded_batch = next((iter(data_loader2))) + loaded_batch = next(iter(data_loader2)) # Check gradients are None for non-trainable parameters and check that # trainable parameters receive some gradient if they are trainable. From 859d3ca9daa4756ff36c06b4ee6988215624b25d Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Thu, 20 Feb 2020 18:08:00 -0800 Subject: [PATCH 24/52] update a chunk of the configs --- .../fixtures/bert_srl/experiment.jsonnet | 10 +++-- .../experiment.json | 12 ++--- .../elmo_experiment.json | 12 ++--- .../experiment.json | 12 ++--- .../feedforward_experiment.json | 12 ++--- .../output_only_elmo_experiment.json | 12 ++--- allennlp/tests/fixtures/bimpm/experiment.json | 12 ++--- .../constituency_parser.json | 12 ++--- .../constituency_parser/experiment.json | 12 ++--- .../coref/coref_bert_lstm_small.jsonnet | 13 +++--- allennlp/tests/fixtures/coref/experiment.json | 12 ++--- .../tests/fixtures/crf_tagger/experiment.json | 2 +- .../crf_tagger/experiment_ccgbank.json | 2 +- .../crf_tagger/experiment_conll2000.json | 2 +- .../composed_seq2seq/experiment.json | 12 ++--- .../experiment_transformer.json | 12 ++--- .../copynet_seq2seq/experiment.json | 12 ++--- .../simple_seq2seq/experiment.json | 12 ++--- allennlp/tests/fixtures/esim/experiment.json | 12 ++--- .../fixtures/graph_parser/experiment.json | 12 ++--- .../experiment_unsampled.jsonnet | 5 +-- .../masked_language_model/experiment.json | 7 ++- .../fixtures/next_token_lm/experiment.json | 3 +- .../experiment.json | 12 ++--- .../experiment_callback_trainer.json | 44 ------------------- .../experiment_with_regularization.json | 12 ++--- allennlp/tests/fixtures/srl/experiment.json | 12 ++--- 27 files changed, 149 insertions(+), 155 deletions(-) delete mode 100644 allennlp/tests/fixtures/simple_tagger/experiment_callback_trainer.json diff --git a/allennlp/tests/fixtures/bert_srl/experiment.jsonnet b/allennlp/tests/fixtures/bert_srl/experiment.jsonnet index 27ca236d144..9b6247f0e4c 100644 --- a/allennlp/tests/fixtures/bert_srl/experiment.jsonnet +++ b/allennlp/tests/fixtures/bert_srl/experiment.jsonnet @@ -11,10 +11,12 @@ local bert_model = "allennlp/tests/fixtures/bert/vocab.txt"; "bert_model": bert_model, "embedding_dropout": 0.0 }, - "iterator": { - "type": "bucket", - "batch_size": 5, - "padding_noise": 0.0 + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 5, + "padding_noise": 0.0 + } }, "trainer": { "optimizer": { diff --git a/allennlp/tests/fixtures/biaffine_dependency_parser/experiment.json b/allennlp/tests/fixtures/biaffine_dependency_parser/experiment.json index 9cb3fb42c1a..20bd647be71 100644 --- a/allennlp/tests/fixtures/biaffine_dependency_parser/experiment.json +++ b/allennlp/tests/fixtures/biaffine_dependency_parser/experiment.json @@ -25,11 +25,13 @@ "tag_representation_dim": 3 }, - "iterator": { - "type": "bucket", - "padding_noise": 0.0, - "batch_size" : 5 - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 5, + "padding_noise": 0.0 + } + }, "trainer": { "num_epochs": 1, "grad_norm": 1.0, diff --git a/allennlp/tests/fixtures/biattentive_classification_network/elmo_experiment.json b/allennlp/tests/fixtures/biattentive_classification_network/elmo_experiment.json index 8c1fb21c317..93795956d50 100644 --- a/allennlp/tests/fixtures/biattentive_classification_network/elmo_experiment.json +++ b/allennlp/tests/fixtures/biattentive_classification_network/elmo_experiment.json @@ -61,11 +61,13 @@ "dropout": [0.0, 0.0] } }, - "iterator": { - "type": "bucket", - "padding_noise": 0, - "batch_size": 80 - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 80, + "padding_noise": 0.0 + } +}, "trainer": { "num_epochs": 1, "grad_norm": 1, diff --git a/allennlp/tests/fixtures/biattentive_classification_network/experiment.json b/allennlp/tests/fixtures/biattentive_classification_network/experiment.json index 7bc8ad6bbf8..5f99dff69fc 100644 --- a/allennlp/tests/fixtures/biattentive_classification_network/experiment.json +++ b/allennlp/tests/fixtures/biattentive_classification_network/experiment.json @@ -45,11 +45,13 @@ "dropout": [0.0, 0.0] } }, - "iterator": { - "type": "bucket", - "padding_noise": 0.0, - "batch_size" : 80 - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 80, + "padding_noise": 0.0 + } +}, "trainer": { "num_epochs": 1, "grad_norm": 1.0, diff --git a/allennlp/tests/fixtures/biattentive_classification_network/feedforward_experiment.json b/allennlp/tests/fixtures/biattentive_classification_network/feedforward_experiment.json index f61498110f2..1a4214d1c7b 100644 --- a/allennlp/tests/fixtures/biattentive_classification_network/feedforward_experiment.json +++ b/allennlp/tests/fixtures/biattentive_classification_network/feedforward_experiment.json @@ -45,11 +45,13 @@ "dropout": [0.0, 0.0] } }, - "iterator": { - "type": "bucket", - "padding_noise": 0.0, - "batch_size" : 80 - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 80, + "padding_noise": 0.0 + } +}, "trainer": { "num_epochs": 1, "grad_norm": 1.0, diff --git a/allennlp/tests/fixtures/biattentive_classification_network/output_only_elmo_experiment.json b/allennlp/tests/fixtures/biattentive_classification_network/output_only_elmo_experiment.json index 783110ebe41..bb8abaf94a6 100644 --- a/allennlp/tests/fixtures/biattentive_classification_network/output_only_elmo_experiment.json +++ b/allennlp/tests/fixtures/biattentive_classification_network/output_only_elmo_experiment.json @@ -60,11 +60,13 @@ "dropout": [0.0, 0.0] } }, - "iterator": { - "type": "bucket", - "padding_noise": 0, - "batch_size": 80 - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 80, + "padding_noise": 0.0 + } +}, "trainer": { "num_epochs": 1, "grad_norm": 1, diff --git a/allennlp/tests/fixtures/bimpm/experiment.json b/allennlp/tests/fixtures/bimpm/experiment.json index 70d689584bc..9ca5c838f03 100644 --- a/allennlp/tests/fixtures/bimpm/experiment.json +++ b/allennlp/tests/fixtures/bimpm/experiment.json @@ -112,11 +112,13 @@ ] } }, - "iterator": { - "type": "bucket", - "batch_size": 64, - "padding_noise": 0.0 - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 64, + "padding_noise": 0.0 + } +}, "trainer": { "checkpointer": { "num_serialized_models_to_keep": 2 diff --git a/allennlp/tests/fixtures/constituency_parser/constituency_parser.json b/allennlp/tests/fixtures/constituency_parser/constituency_parser.json index af6369c9728..32b2ba492cd 100644 --- a/allennlp/tests/fixtures/constituency_parser/constituency_parser.json +++ b/allennlp/tests/fixtures/constituency_parser/constituency_parser.json @@ -38,11 +38,13 @@ "input_dim": 4 } }, - "iterator": { - "type": "bucket", - "padding_noise": 0.0, - "batch_size" : 5 - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 5, + "padding_noise": 0.0 + } + }, "trainer": { "num_epochs": 1, "grad_norm": 1.0, diff --git a/allennlp/tests/fixtures/constituency_parser/experiment.json b/allennlp/tests/fixtures/constituency_parser/experiment.json index 5506a48db83..124bfcb561a 100644 --- a/allennlp/tests/fixtures/constituency_parser/experiment.json +++ b/allennlp/tests/fixtures/constituency_parser/experiment.json @@ -34,11 +34,13 @@ } }, - "iterator": { - "type": "bucket", - "padding_noise": 0.0, - "batch_size" : 5 - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 5, + "padding_noise": 0.0 + } + }, "trainer": { "num_epochs": 1, "grad_norm": 1.0, diff --git a/allennlp/tests/fixtures/coref/coref_bert_lstm_small.jsonnet b/allennlp/tests/fixtures/coref/coref_bert_lstm_small.jsonnet index 085bd5e9b83..12dd663c2c1 100644 --- a/allennlp/tests/fixtures/coref/coref_bert_lstm_small.jsonnet +++ b/allennlp/tests/fixtures/coref/coref_bert_lstm_small.jsonnet @@ -74,11 +74,14 @@ local span_pair_embedding_dim = 3 * span_embedding_dim + feature_size; "spans_per_word": 0.4, "max_antecedents": 50 }, - "iterator": { - "type": "bucket", - "sorting_keys": [["text", "tokens___token_ids"]], - "padding_noise": 0.0, - "batch_size": 1 + + "data_loader": { + "batch_sampler": { + "type": "bucket", + "sorting_keys": [["text", "tokens___token_ids"]], + "batch_size": 1, + "padding_noise": 0.0 + } }, "trainer": { "num_epochs": 1, diff --git a/allennlp/tests/fixtures/coref/experiment.json b/allennlp/tests/fixtures/coref/experiment.json index 6901fe00029..7e9b65954dc 100644 --- a/allennlp/tests/fixtures/coref/experiment.json +++ b/allennlp/tests/fixtures/coref/experiment.json @@ -68,11 +68,13 @@ "spans_per_word": 0.4, "max_antecedents": 50 }, - "iterator": { - "type": "bucket", - "padding_noise": 0.0, - "batch_size": 2 - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 5, + "padding_noise": 0.0 + } +}, "trainer": { "num_epochs": 2, "grad_norm": 5.0, diff --git a/allennlp/tests/fixtures/crf_tagger/experiment.json b/allennlp/tests/fixtures/crf_tagger/experiment.json index b64c4512fb2..693c209c6d9 100644 --- a/allennlp/tests/fixtures/crf_tagger/experiment.json +++ b/allennlp/tests/fixtures/crf_tagger/experiment.json @@ -57,7 +57,7 @@ ] } }, - "iterator": {"type": "basic", "batch_size": 32}, + "data_loader": {"batch_size": 32}, "trainer": { "optimizer": "adam", "num_epochs": 5, diff --git a/allennlp/tests/fixtures/crf_tagger/experiment_ccgbank.json b/allennlp/tests/fixtures/crf_tagger/experiment_ccgbank.json index 69ea4266816..c58a2196499 100644 --- a/allennlp/tests/fixtures/crf_tagger/experiment_ccgbank.json +++ b/allennlp/tests/fixtures/crf_tagger/experiment_ccgbank.json @@ -54,7 +54,7 @@ ] } }, - "iterator": {"type": "basic", "batch_size": 32}, + "data_loader": {"batch_size": 32}, "trainer": { "optimizer": "adam", "num_epochs": 5, diff --git a/allennlp/tests/fixtures/crf_tagger/experiment_conll2000.json b/allennlp/tests/fixtures/crf_tagger/experiment_conll2000.json index 2ac92ff8e9d..7617bfc63e8 100644 --- a/allennlp/tests/fixtures/crf_tagger/experiment_conll2000.json +++ b/allennlp/tests/fixtures/crf_tagger/experiment_conll2000.json @@ -56,7 +56,7 @@ ] } }, - "iterator": {"type": "basic", "batch_size": 32}, + "data_loader": {"batch_size": 32}, "trainer": { "optimizer": "adam", "num_epochs": 5, diff --git a/allennlp/tests/fixtures/encoder_decoder/composed_seq2seq/experiment.json b/allennlp/tests/fixtures/encoder_decoder/composed_seq2seq/experiment.json index 5870115d7f0..069c4f5a58e 100644 --- a/allennlp/tests/fixtures/encoder_decoder/composed_seq2seq/experiment.json +++ b/allennlp/tests/fixtures/encoder_decoder/composed_seq2seq/experiment.json @@ -88,11 +88,13 @@ "beam_size": 5 } }, - "iterator": { - "type": "bucket", - "padding_noise": 0.0, - "batch_size" : 80, - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 80, + "padding_noise": 0.0 + } +}, "trainer": { "num_epochs": 2, "patience": 10, diff --git a/allennlp/tests/fixtures/encoder_decoder/composed_seq2seq/experiment_transformer.json b/allennlp/tests/fixtures/encoder_decoder/composed_seq2seq/experiment_transformer.json index f70619bfbad..ad3c8848800 100644 --- a/allennlp/tests/fixtures/encoder_decoder/composed_seq2seq/experiment_transformer.json +++ b/allennlp/tests/fixtures/encoder_decoder/composed_seq2seq/experiment_transformer.json @@ -74,11 +74,13 @@ }, "tied_source_embedder_key": "tokens" }, - "iterator": { - "type": "bucket", - "padding_noise": 0.0, - "batch_size" : 2, - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 2, + "padding_noise": 0.0 + } +}, "trainer": { "num_epochs": 2, "patience": 10, diff --git a/allennlp/tests/fixtures/encoder_decoder/copynet_seq2seq/experiment.json b/allennlp/tests/fixtures/encoder_decoder/copynet_seq2seq/experiment.json index 913fef7d34c..7cbea5ae5b7 100644 --- a/allennlp/tests/fixtures/encoder_decoder/copynet_seq2seq/experiment.json +++ b/allennlp/tests/fixtures/encoder_decoder/copynet_seq2seq/experiment.json @@ -51,11 +51,13 @@ "beam_size": 5, "max_decoding_steps": 50 }, - "iterator": { - "type": "bucket", - "padding_noise": 0.0, - "batch_size" : 80, - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 80, + "padding_noise": 0.0 + } +}, "trainer": { "num_epochs": 2, "patience": 10, diff --git a/allennlp/tests/fixtures/encoder_decoder/simple_seq2seq/experiment.json b/allennlp/tests/fixtures/encoder_decoder/simple_seq2seq/experiment.json index 40853736008..1c0dcd3a38f 100644 --- a/allennlp/tests/fixtures/encoder_decoder/simple_seq2seq/experiment.json +++ b/allennlp/tests/fixtures/encoder_decoder/simple_seq2seq/experiment.json @@ -77,11 +77,13 @@ }, "beam_size": 5 }, - "iterator": { - "type": "bucket", - "padding_noise": 0.0, - "batch_size" : 80, - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 80, + "padding_noise": 0.0 + } +}, "trainer": { "num_epochs": 2, "patience": 10, diff --git a/allennlp/tests/fixtures/esim/experiment.json b/allennlp/tests/fixtures/esim/experiment.json index 5cd36fd1030..d7c20f2737a 100644 --- a/allennlp/tests/fixtures/esim/experiment.json +++ b/allennlp/tests/fixtures/esim/experiment.json @@ -69,11 +69,13 @@ ] } }, - "iterator": { - "type": "bucket", - "batch_size": 32, - "padding_noise": 0.0, - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 32, + "padding_noise": 0.0 + } +}, "trainer": { "optimizer": { "type": "adam", diff --git a/allennlp/tests/fixtures/graph_parser/experiment.json b/allennlp/tests/fixtures/graph_parser/experiment.json index d96f60537e1..b688f59c20e 100644 --- a/allennlp/tests/fixtures/graph_parser/experiment.json +++ b/allennlp/tests/fixtures/graph_parser/experiment.json @@ -25,11 +25,13 @@ "tag_representation_dim": 3 }, - "iterator": { - "type": "bucket", - "padding_noise": 0.0, - "batch_size" : 5 - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 5, + "padding_noise": 0.0 + } + }, "trainer": { "num_epochs": 1, "grad_norm": 1.0, diff --git a/allennlp/tests/fixtures/language_model/experiment_unsampled.jsonnet b/allennlp/tests/fixtures/language_model/experiment_unsampled.jsonnet index d870a6f73bc..abb07aae3ed 100644 --- a/allennlp/tests/fixtures/language_model/experiment_unsampled.jsonnet +++ b/allennlp/tests/fixtures/language_model/experiment_unsampled.jsonnet @@ -54,9 +54,8 @@ "hidden_size": 7, } }, - "iterator": { - "type": "basic", - "batch_size": 32 + "data_loader": { + "batch_size": 32, }, "trainer": { "num_epochs": 10, diff --git a/allennlp/tests/fixtures/masked_language_model/experiment.json b/allennlp/tests/fixtures/masked_language_model/experiment.json index 27b7fd6055a..d2589d5e6d9 100644 --- a/allennlp/tests/fixtures/masked_language_model/experiment.json +++ b/allennlp/tests/fixtures/masked_language_model/experiment.json @@ -21,10 +21,9 @@ }, "target_namespace": "tokens" }, - "iterator": { - "type": "basic", - "batch_size": 32 - }, + "data_loader": { + "batch_size": 32, +}, "trainer": { "num_epochs": 1, "cuda_device" : -1, diff --git a/allennlp/tests/fixtures/next_token_lm/experiment.json b/allennlp/tests/fixtures/next_token_lm/experiment.json index 9952b7f4d7a..13c807a1261 100644 --- a/allennlp/tests/fixtures/next_token_lm/experiment.json +++ b/allennlp/tests/fixtures/next_token_lm/experiment.json @@ -21,8 +21,7 @@ "vocab_namespace": "tokens" } }, - "iterator": { - "type": "basic", + "data_loader": { "batch_size": 32 }, "trainer": { diff --git a/allennlp/tests/fixtures/open_information_extraction/experiment.json b/allennlp/tests/fixtures/open_information_extraction/experiment.json index 2ab951ac3ec..e70abad32c2 100644 --- a/allennlp/tests/fixtures/open_information_extraction/experiment.json +++ b/allennlp/tests/fixtures/open_information_extraction/experiment.json @@ -23,11 +23,13 @@ "binary_feature_dim": 50, "ignore_span_metric": true // Span metric is irrelevant for Open IE loss }, - "iterator": { - "type": "bucket", - "padding_noise": 0.0, - "batch_size" : 80 - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 80, + "padding_noise": 0.0 + } +}, "trainer": { "num_epochs": 1, diff --git a/allennlp/tests/fixtures/simple_tagger/experiment_callback_trainer.json b/allennlp/tests/fixtures/simple_tagger/experiment_callback_trainer.json deleted file mode 100644 index 58c14d326b4..00000000000 --- a/allennlp/tests/fixtures/simple_tagger/experiment_callback_trainer.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "dataset_reader":{"type":"sequence_tagging"}, - "train_data_path": "allennlp/tests/fixtures/data/sequence_tagging.tsv", - "validation_data_path": "allennlp/tests/fixtures/data/sequence_tagging.tsv", - "model": { - "type": "simple_tagger", - "text_field_embedder": { - "token_embedders": { - "tokens": { - "type": "embedding", - "projection_dim": 2, - "pretrained_file": "allennlp/tests/fixtures/embeddings/glove.6B.100d.sample.txt.gz", - "embedding_dim": 100, - "trainable": true - } - } - }, - "encoder": { - "type": "lstm", - "input_size": 2, - "hidden_size": 4, - "num_layers": 1 - } - }, - "iterator": { - "type": "bucket", - "padding_noise": 0.0, - "batch_size" : 80 - }, - "trainer": { - "type": "callback", - - "optimizer": {"type": "sgd", "lr": 0.01, "momentum": 0.9}, - "num_epochs": 2, - "callbacks": [ - {"type": "gradient_norm_and_clip", "grad_norm": 1.0}, - "checkpoint", - {"type": "track_metrics", "patience": 500}, - "validate", - {"type": "log_to_tensorboard", "log_batch_size_period": 10} - ], - "cuda_device": -1 - } -} diff --git a/allennlp/tests/fixtures/simple_tagger/experiment_with_regularization.json b/allennlp/tests/fixtures/simple_tagger/experiment_with_regularization.json index 106c951af81..070f7221c5f 100644 --- a/allennlp/tests/fixtures/simple_tagger/experiment_with_regularization.json +++ b/allennlp/tests/fixtures/simple_tagger/experiment_with_regularization.json @@ -28,11 +28,13 @@ ] } }, - "iterator": { - "type": "bucket", - "padding_noise": 0.0, - "batch_size" : 80 - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 80, + "padding_noise": 0.0 + } + }, "trainer": { "num_epochs": 1, "grad_norm": 1.0, diff --git a/allennlp/tests/fixtures/srl/experiment.json b/allennlp/tests/fixtures/srl/experiment.json index 030a6bad5a8..def483e3d80 100644 --- a/allennlp/tests/fixtures/srl/experiment.json +++ b/allennlp/tests/fixtures/srl/experiment.json @@ -22,11 +22,13 @@ }, "binary_feature_dim": 50 }, - "iterator": { - "type": "bucket", - "padding_noise": 0.0, - "batch_size" : 80 - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 80, + "padding_noise": 0.0 + } +}, "trainer": { "num_epochs": 1, From c22dee3ddd6b4d78384f91e97869bb1158a43034 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Fri, 21 Feb 2020 07:55:30 -0800 Subject: [PATCH 25/52] fix archival test, couple more configs --- .../elmo_in_text_field_embedder.json | 18 +++++++----------- .../no_elmo_tokenizer_for_elmo.json | 12 +++++++----- allennlp/tests/models/archival_test.py | 2 +- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/allennlp/tests/fixtures/biattentive_classification_network/broken_experiments/elmo_in_text_field_embedder.json b/allennlp/tests/fixtures/biattentive_classification_network/broken_experiments/elmo_in_text_field_embedder.json index 7134de6db06..4708e4f50f2 100644 --- a/allennlp/tests/fixtures/biattentive_classification_network/broken_experiments/elmo_in_text_field_embedder.json +++ b/allennlp/tests/fixtures/biattentive_classification_network/broken_experiments/elmo_in_text_field_embedder.json @@ -72,17 +72,13 @@ "dropout": [0.2, 0.0] } }, - "iterator": { - "type": "bucket", - "sorting_keys": [ - [ - "tokens", - "num_tokens" - ] - ], - "padding_noise": 0, - "batch_size": 80 - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 80, + "padding_noise": 0.0 + } +}, "trainer": { "num_epochs": 1, "grad_norm": 1, diff --git a/allennlp/tests/fixtures/biattentive_classification_network/broken_experiments/no_elmo_tokenizer_for_elmo.json b/allennlp/tests/fixtures/biattentive_classification_network/broken_experiments/no_elmo_tokenizer_for_elmo.json index 9ca8e9a0d46..4b7a857d1d9 100644 --- a/allennlp/tests/fixtures/biattentive_classification_network/broken_experiments/no_elmo_tokenizer_for_elmo.json +++ b/allennlp/tests/fixtures/biattentive_classification_network/broken_experiments/no_elmo_tokenizer_for_elmo.json @@ -64,11 +64,13 @@ "dropout": [0.2, 0.0] } }, - "iterator": { - "type": "bucket", - "padding_noise": 0, - "batch_size": 80 - }, + "data_loader": { + "batch_sampler": { + "type": "bucket", + "batch_size": 80, + "padding_noise": 0.0 + } +}, "trainer": { "num_epochs": 1, "grad_norm": 1, diff --git a/allennlp/tests/models/archival_test.py b/allennlp/tests/models/archival_test.py index d5dae2d908c..56e865fd059 100644 --- a/allennlp/tests/models/archival_test.py +++ b/allennlp/tests/models/archival_test.py @@ -42,7 +42,7 @@ def setUp(self): "dataset_reader": {"type": "sequence_tagging"}, "train_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), "validation_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), - "iterator": {"type": "basic", "batch_size": 2}, + "data_loader": {"batch_size": 2}, "trainer": {"num_epochs": 2, "optimizer": "adam"}, } ) From fe5b4706bf2a29ed98d4d37259f604ccc9a2a293 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Fri, 21 Feb 2020 07:56:24 -0800 Subject: [PATCH 26/52] rm pointless gan test --- allennlp/tests/training/gan_trainer_test.py | 410 -------------------- 1 file changed, 410 deletions(-) delete mode 100644 allennlp/tests/training/gan_trainer_test.py diff --git a/allennlp/tests/training/gan_trainer_test.py b/allennlp/tests/training/gan_trainer_test.py deleted file mode 100644 index 41175a7edb0..00000000000 --- a/allennlp/tests/training/gan_trainer_test.py +++ /dev/null @@ -1,410 +0,0 @@ -""" -A toy example of how one might train a GAN using AllenNLP. - -Based on /~https://github.com/devnag/pytorch-generative-adversarial-networks. - -We use one dataset reader to sample from the "true" distribution N(4, 1.25), -and a second to sample uniform noise. We'll then adversarially train a generator `Model` -to transform the noise into something that (hopefully) looks like the true distribution -and a discriminator `Model` to (hopefully) distinguish between the "true" and generated data. -""" -from typing import Dict, Iterable, Any - -import tqdm -import torch -import numpy as np - -from allennlp.common import Lazy, Registrable -from allennlp.common.checks import ConfigurationError -from allennlp.common.params import Params -from allennlp.common.testing import AllenNlpTestCase -from allennlp.data import Instance -from allennlp.data.iterators import DataIterator -from allennlp.data.dataset_readers import DatasetReader -from allennlp.data.fields import ArrayField -from allennlp.models import Model -from allennlp.nn.activations import Activation -from allennlp.training.optimizers import Optimizer -from allennlp.training.trainer_base import TrainerBase - - -class InputSampler(Registrable): - """ - Abstract base class for sampling from a distribution. - """ - - def sample(self, *dims: int) -> np.ndarray: - raise NotImplementedError - - -@InputSampler.register("uniform") -class UniformSampler(InputSampler): - """ - Sample from the uniform [0, 1] distribution. - """ - - def sample(self, *dims: int) -> np.ndarray: - return np.random.uniform(0, 1, dims) - - -@InputSampler.register("normal") -class NormalSampler(InputSampler): - """ - Sample from the normal distribution. - """ - - def __init__(self, mean: float = 0, stdev: float = 1.0) -> None: - self.mean = mean - self.stdev = stdev - - def sample(self, *dims: int) -> np.ndarray: - return np.random.normal(self.mean, self.stdev, dims) - - -@DatasetReader.register("sampling") -class SamplingReader(DatasetReader): - """ - A dataset reader that just samples from the provided sampler forever. - """ - - def __init__(self, sampler: InputSampler) -> None: - super().__init__(lazy=True) - self.sampler = sampler - - def _read(self, _: str) -> Iterable[Instance]: - while True: - example = self.sampler.sample(1) - yield self.text_to_instance(example) - - def text_to_instance(self, example: np.ndarray) -> Instance: # type: ignore - - field = ArrayField(example) - return Instance({"array": field}) - - -@Model.register("generator-test") -class Generator(Model): - """ - A model that takes random noise (batch_size, input_dim) - and transforms it to (batch_size, output_dim). - - If its forward pass is provided with a discriminator, - it computes a loss based on the idea that it wants - to trick the discriminator into predicting that its output is genuine. - """ - - def __init__( - self, - input_dim: int, - hidden_dim: int, - output_dim: int, - activation: Activation = torch.nn.Tanh(), - ) -> None: - super().__init__(None) - self.linear1 = torch.nn.Linear(input_dim, hidden_dim) - self.linear2 = torch.nn.Linear(hidden_dim, hidden_dim) - self.linear3 = torch.nn.Linear(hidden_dim, output_dim) - self.activation = activation - self.loss = torch.nn.BCELoss() - - def forward( # type: ignore - self, inputs: torch.Tensor, discriminator: Model = None - ) -> Dict[str, torch.Tensor]: - - hidden1 = self.activation(self.linear1(inputs)) - hidden2 = self.activation(self.linear2(hidden1)) - output = self.linear3(hidden2) - output_dict = {"output": output} - - if discriminator is not None: - predicted = discriminator(output)["output"] - # We desire for the discriminator to think this is real. - desired = torch.ones_like(predicted) - output_dict["loss"] = self.loss(predicted, desired) - - return output_dict - - -def get_moments(dist: torch.Tensor) -> torch.Tensor: - """ - Returns the first 4 moments of the input data. - We'll (potentially) use this as the input to our discriminator. - """ - mean = torch.mean(dist) - diffs = dist - mean - var = torch.mean(torch.pow(diffs, 2.0)) - std = torch.pow(var, 0.5) - zscores = diffs / std - skews = torch.mean(torch.pow(zscores, 3.0)) - kurtoses = ( - torch.mean(torch.pow(zscores, 4.0)) - 3.0 - ) # excess kurtosis, should be 0 for Gaussian - final = torch.cat((mean.reshape(1), std.reshape(1), skews.reshape(1), kurtoses.reshape(1))) - return final - - -@Model.register("discriminator-test") -class Discriminator(Model): - """ - A model that takes a sample (input_dim,) and tries to predict 1 - if it's from the true distribution and 0 if it's from the generator. - """ - - def __init__( - self, - input_dim: int, - hidden_dim: int, - activation: Activation = torch.nn.Sigmoid(), - preprocessing: str = None, - ) -> None: - super().__init__(None) - if preprocessing is None: - self.preprocess = lambda x: x - elif preprocessing == "moments": - self.preprocess = get_moments - input_dim = 4 - else: - raise ConfigurationError("unknown preprocessing") - - self.linear1 = torch.nn.Linear(input_dim, hidden_dim) - self.linear2 = torch.nn.Linear(hidden_dim, hidden_dim) - self.linear3 = torch.nn.Linear(hidden_dim, 1) - self.activation = activation - self.loss = torch.nn.BCELoss() - - def forward( # type: ignore - self, inputs: torch.Tensor, label: torch.Tensor = None - ) -> Dict[str, torch.Tensor]: - - inputs = inputs.squeeze(-1) - hidden1 = self.activation(self.linear1(self.preprocess(inputs))) - hidden2 = self.activation(self.linear2(hidden1)) - output = self.activation(self.linear3(hidden2)) - output_dict = {"output": output} - if label is not None: - output_dict["loss"] = self.loss(output, label) - - return output_dict - - -@TrainerBase.register("gan-test", constructor="from_partial_objects") -class GanTestTrainer(TrainerBase): - def __init__( - self, - serialization_dir: str, - data: Iterable[Instance], - noise: Iterable[Instance], - generator: Model, - discriminator: Model, - iterator: DataIterator, - noise_iterator: DataIterator, - generator_optimizer: torch.optim.Optimizer, - discriminator_optimizer: torch.optim.Optimizer, - batches_per_epoch: int, - num_epochs: int, - ) -> None: - super().__init__(serialization_dir, -1) - self.data = data - self.noise = noise - self.generator = generator - self.generator_optimizer = generator_optimizer - self.discriminator = discriminator - self.discriminator_optimizer = discriminator_optimizer - self.num_epochs = num_epochs - self.iterator = iterator - self.noise_iterator = noise_iterator - self.batches_per_epoch = batches_per_epoch - - def train_one_epoch(self) -> Dict[str, float]: - self.generator.train() - self.discriminator.train() - - generator_loss = 0.0 - discriminator_real_loss = 0.0 - discriminator_fake_loss = 0.0 - fake_mean = 0.0 - fake_stdev = 0.0 - - # First train the discriminator - data_iterator = self.iterator(self.data) - noise_iterator = self.noise_iterator(self.noise) - - for _ in range(self.batches_per_epoch): - self.discriminator_optimizer.zero_grad() - - batch = next(data_iterator) - noise = next(noise_iterator) - - # Real example, want discriminator to predict 1. - real_error = self.discriminator(batch["array"], torch.ones(1))["loss"] - real_error.backward() - - # Fake example, want discriminator to predict 0. - fake_data = self.generator(noise["array"])["output"] - fake_error = self.discriminator(fake_data, torch.zeros(1))["loss"] - fake_error.backward() - - discriminator_real_loss += real_error.sum().item() - discriminator_fake_loss += fake_error.sum().item() - - self.discriminator_optimizer.step() - - # Now train the generator - for _ in range(self.batches_per_epoch): - self.generator_optimizer.zero_grad() - - noise = next(noise_iterator) - generated = self.generator(noise["array"], self.discriminator) - fake_data = generated["output"] - fake_error = generated["loss"] - fake_error.backward() - - fake_mean += fake_data.mean() - fake_stdev += fake_data.std() - - generator_loss += fake_error.sum().item() - - self.generator_optimizer.step() - - return { - "generator_loss": generator_loss, - "discriminator_fake_loss": discriminator_fake_loss, - "discriminator_real_loss": discriminator_real_loss, - "mean": fake_mean / self.batches_per_epoch, - "stdev": fake_stdev / self.batches_per_epoch, - } - - def train(self) -> Dict[str, Any]: - with tqdm.trange(self.num_epochs) as epochs: - for _ in epochs: - metrics = self.train_one_epoch() - description = ( - f'gl: {metrics["generator_loss"]:.3f} ' - f'dfl: {metrics["discriminator_fake_loss"]:.3f} ' - f'drl: {metrics["discriminator_real_loss"]:.3f} ' - f'mean: {metrics["mean"]:.2f} ' - f'std: {metrics["stdev"]:.2f} ' - ) - epochs.set_description(description) - return metrics - - @classmethod - def from_partial_objects( - cls, - serialization_dir: str, - data_reader: DatasetReader, - noise_reader: DatasetReader, - generator: Model, - discriminator: Model, - iterator: DataIterator, - noise_iterator: DataIterator, - generator_optimizer: Lazy[Optimizer], - discriminator_optimizer: Lazy[Optimizer], - num_epochs: int, - batches_per_epoch: int, - ) -> "GanTestTrainer": - data = data_reader.read("") - noise = noise_reader.read("") - - generator_params = [[n, p] for n, p in generator.named_parameters() if p.requires_grad] - generator_optimizer_ = generator_optimizer.construct(model_parameters=generator_params) - - discriminator_params = [ - [n, p] for n, p in discriminator.named_parameters() if p.requires_grad - ] - discriminator_optimizer_ = discriminator_optimizer.construct( - model_parameters=discriminator_params - ) - - return cls( - serialization_dir, - data, - noise, - generator, - discriminator, - iterator, - noise_iterator, - generator_optimizer_, - discriminator_optimizer_, - batches_per_epoch, - num_epochs, - ) - - -class GanTrainerTest(AllenNlpTestCase): - def setUp(self): - super().setUp() - - params = Params( - { - "type": "gan-test", - "data_reader": { - "type": "sampling", - "sampler": {"type": "normal", "mean": 4.0, "stdev": 1.25}, - }, - "noise_reader": {"type": "sampling", "sampler": {"type": "uniform"}}, - "generator": { - "type": "generator-test", - "input_dim": 1, - "hidden_dim": 5, - "output_dim": 1, - }, - "discriminator": {"type": "discriminator-test", "input_dim": 500, "hidden_dim": 10}, - "iterator": {"type": "basic", "batch_size": 500}, - "noise_iterator": {"type": "basic", "batch_size": 500}, - "generator_optimizer": {"type": "sgd", "lr": 0.1}, - "discriminator_optimizer": {"type": "sgd", "lr": 0.1}, - "num_epochs": 5, - "batches_per_epoch": 2, - } - ) - - self.trainer = TrainerBase.from_params(params=params, serialization_dir=self.TEST_DIR) - - def test_gan_can_train(self): - self.trainer.train() - - -if __name__ == "__main__": - # Run it yourself, it's fun! - # - # python -m allennlp.tests.training.gan_trainer_test - # - - sample_size = 500 - - params_ = Params( - { - "type": "gan-test", - "data_reader": { - "type": "sampling", - "sampler": {"type": "normal", "mean": 4.0, "stdev": 1.25}, - }, - "noise_reader": {"type": "sampling", "sampler": {"type": "uniform"}}, - "generator": { - "type": "generator-test", - "input_dim": 1, - "hidden_dim": 5, - "output_dim": 1, - }, - "discriminator": { - "type": "discriminator-test", - "input_dim": sample_size, - "hidden_dim": 10, - "preprocessing": "moments", - }, - "iterator": {"type": "basic", "batch_size": sample_size}, - "noise_iterator": {"type": "basic", "batch_size": sample_size}, - "generator_optimizer": {"type": "sgd", "lr": 0.1}, - "discriminator_optimizer": {"type": "sgd", "lr": 0.1}, - "num_epochs": 1000, - "batches_per_epoch": 2, - } - ) - - import tempfile - - serialization_dir_ = tempfile.mkdtemp() - trainer_ = TrainerBase.from_params(params=params_, serialization_dir=serialization_dir_) - metrics_ = trainer_.train() - print(metrics_) From 7533c91348cfad8c8c6a6329b4abf3b06d785dbe Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Fri, 21 Feb 2020 09:17:50 -0800 Subject: [PATCH 27/52] more tests passing --- allennlp/common/testing/model_test_case.py | 5 +++-- .../models/masked_language_model_test.py | 2 ++ allennlp/tests/models/next_token_lm_test.py | 2 ++ allennlp/tests/models/simple_tagger_test.py | 20 +++++++++---------- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/allennlp/common/testing/model_test_case.py b/allennlp/common/testing/model_test_case.py index 4100cbb0300..9fdc1b190a4 100644 --- a/allennlp/common/testing/model_test_case.py +++ b/allennlp/common/testing/model_test_case.py @@ -26,7 +26,7 @@ def set_up_model(self, param_file, dataset_file): reader = DatasetReader.from_params(params["dataset_reader"]) # The dataset reader might be lazy, but a lazy list here breaks some of our tests. - instances = list(reader.read(str(dataset_file))) + instances = reader.read(str(dataset_file)) # Use parameters for vocabulary if they are present in the config file, so that choices like # "non_padded_namespaces", "min_count" etc. can be set if needed. if "vocabulary" in params: @@ -36,11 +36,12 @@ def set_up_model(self, param_file, dataset_file): vocab = Vocabulary.from_instances(instances) self.vocab = vocab self.instances = instances + self.instances.index_with(vocab) self.model = Model.from_params(vocab=self.vocab, params=params["model"]) # TODO(joelgrus) get rid of these # (a lot of the model tests use them, so they'll have to be changed) - self.dataset = Batch(self.instances) + self.dataset = Batch(list(self.instances)) self.dataset.index_instances(self.vocab) def ensure_model_can_train_save_and_load( diff --git a/allennlp/tests/models/masked_language_model_test.py b/allennlp/tests/models/masked_language_model_test.py index b9f47901e30..211346e0de1 100644 --- a/allennlp/tests/models/masked_language_model_test.py +++ b/allennlp/tests/models/masked_language_model_test.py @@ -1,5 +1,7 @@ from allennlp.common.testing import ModelTestCase +from ..modules.language_model_heads.linear import LinearLanguageModelHead # noqa: F401 + class TestMaskedLanguageModel(ModelTestCase): def setUp(self): diff --git a/allennlp/tests/models/next_token_lm_test.py b/allennlp/tests/models/next_token_lm_test.py index b5dca1bca7b..c8a9dff443b 100644 --- a/allennlp/tests/models/next_token_lm_test.py +++ b/allennlp/tests/models/next_token_lm_test.py @@ -1,5 +1,7 @@ from allennlp.common.testing import ModelTestCase +from ..modules.language_model_heads.linear import LinearLanguageModelHead # noqa: F401 + class TestNextTokenLanguageModel(ModelTestCase): def setUp(self): diff --git a/allennlp/tests/models/simple_tagger_test.py b/allennlp/tests/models/simple_tagger_test.py index dbe58dd1ec0..e3ebe1ffc78 100644 --- a/allennlp/tests/models/simple_tagger_test.py +++ b/allennlp/tests/models/simple_tagger_test.py @@ -7,7 +7,8 @@ from allennlp.common.checks import ConfigurationError from allennlp.common.params import Params from allennlp.data.dataset_readers import DatasetReader -from allennlp.data.iterators import DataIterator, BasicIterator +from allennlp.data.iterators import BasicIterator +from allennlp.data.samplers import DataLoader from allennlp.models import Model from allennlp.training import Trainer, TrainerBase @@ -55,13 +56,13 @@ def test_regularization(self): penalty = self.model.get_regularization_penalty() assert penalty == 0 - iterator = BasicIterator(batch_size=32) - trainer = Trainer(self.model, None, iterator, self.instances) # optimizer, + data_loader = DataLoader(self.instances, batch_size=32) + trainer = Trainer(self.model, None, data_loader) # optimizer, # You get a RuntimeError if you call `model.forward` twice on the same inputs. # The data and config are such that the whole dataset is one batch. - training_batch = next(iterator(self.instances, num_epochs=1)) - validation_batch = next(iterator(self.instances, num_epochs=1)) + training_batch = next(iter(data_loader)) + validation_batch = next(iter(data_loader)) training_loss = trainer.batch_loss(training_batch, for_training=True).item() validation_loss = trainer.batch_loss(validation_batch, for_training=False).item() @@ -93,12 +94,11 @@ def setUp(self): self.set_up_model(param_file, self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv") params = Params.from_file(param_file) self.reader = DatasetReader.from_params(params["dataset_reader"]) - self.iterator = DataIterator.from_params(params["iterator"]) + self.data_loader = DataLoader.from_params(dataset=self.instances, params=params["data_loader"]) self.trainer = TrainerBase.from_params( model=self.model, + data_loader=self.data_loader, serialization_dir=self.TEST_DIR, - iterator=self.iterator, - train_data=self.dataset, params=params.get("trainer"), ) @@ -125,8 +125,8 @@ def test_regularization(self): # You get a RuntimeError if you call `model.forward` twice on the same inputs. # The data and config are such that the whole dataset is one batch. - training_batch = next(self.iterator(self.instances, num_epochs=1)) - validation_batch = next(self.iterator(self.instances, num_epochs=1)) + training_batch = next(iter(self.data_loader)) + validation_batch = next(iter(self.data_loader)) training_loss = self.trainer.batch_loss(training_batch, for_training=True).data validation_loss = self.trainer.batch_loss(validation_batch, for_training=False).data From ad45659886a7e17c5b23a376da13a6547a2bab8c Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Fri, 21 Feb 2020 11:37:45 -0800 Subject: [PATCH 28/52] add current state of from params changes --- allennlp/commands/evaluate.py | 2 +- allennlp/commands/find_learning_rate.py | 2 +- allennlp/commands/train.py | 12 ++-- allennlp/common/testing/model_test_case.py | 4 +- allennlp/data/samplers/__init__.py | 56 ++++++++++++------- .../tests/commands/find_learning_rate_test.py | 2 +- allennlp/tests/models/simple_tagger_test.py | 5 +- .../slanted_triangular_test.py | 4 +- allennlp/tests/training/optimizer_test.py | 3 +- allennlp/training/trainer.py | 1 - 10 files changed, 52 insertions(+), 39 deletions(-) diff --git a/allennlp/commands/evaluate.py b/allennlp/commands/evaluate.py index 53af3049243..65954691e7f 100644 --- a/allennlp/commands/evaluate.py +++ b/allennlp/commands/evaluate.py @@ -179,7 +179,7 @@ def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: data_loader_params = config.pop("data_loader") if args.batch_size: data_loader_params["batch_size"] = args.batch_size - data_loader = DataLoader.from_params(dataset=instances, params=data_loader_params) + data_loader = DataLoader.from_params(data_loader_params).construct(instances) metrics = evaluate(model, data_loader, args.cuda_device, args.batch_weight_key) diff --git a/allennlp/commands/find_learning_rate.py b/allennlp/commands/find_learning_rate.py index 01536b310bc..e9af5919160 100644 --- a/allennlp/commands/find_learning_rate.py +++ b/allennlp/commands/find_learning_rate.py @@ -216,7 +216,7 @@ def find_learning_rate_model( train_data = all_datasets["train"] train_data.index_with(vocab) model = Model.from_params(vocab=vocab, params=params.pop("model")) - data_loader = DataLoader.from_params(dataset=train_data, params=params.pop("data_loader")) + data_loader = DataLoader.from_params(params.pop("data_loader")).construct(train_data) trainer_params = params.pop("trainer") diff --git a/allennlp/commands/train.py b/allennlp/commands/train.py index cf4da7f9673..444bab89546 100644 --- a/allennlp/commands/train.py +++ b/allennlp/commands/train.py @@ -549,13 +549,13 @@ def from_partial_objects( dataset_reader: DatasetReader, train_data_path: str, model: Lazy[Model], - data_loader: Lazy[DataLoader], + data_loader: DataLoader, trainer: Lazy[TrainerBase], vocabulary: Lazy[Vocabulary] = None, datasets_for_vocab_creation: List[str] = None, validation_dataset_reader: DatasetReader = None, validation_data_path: str = None, - validation_data_loader: Lazy[DataLoader] = None, + validation_data_loader: DataLoader = None, test_data_path: str = None, evaluate_on_test: bool = False, ) -> "TrainModel": @@ -631,7 +631,6 @@ def from_partial_objects( validation_data_path=validation_data_path, test_data_path=test_data_path, ) - if datasets_for_vocab_creation: for key in datasets_for_vocab_creation: if key not in datasets: @@ -659,17 +658,18 @@ def from_partial_objects( for dataset in datasets.values(): dataset.index_with(model_.vocab) - data_loader_ = data_loader.construct(dataset=datasets["train"]) validation_data_loader = validation_data_loader or data_loader + data_loader_ = data_loader.construct(datasets["train"]) validation_data = datasets.get("validation") + if validation_data is not None: - validation_data_loader_ = validation_data_loader.construct(dataset=validation_data) + validation_data_loader_ = validation_data_loader.construct(validation_data) else: validation_data_loader_ = None test_data = datasets.get("test") if test_data is not None: - test_data_loader = validation_data_loader.construct(dataset=test_data) + test_data_loader = validation_data_loader.construct(test_data) else: test_data_loader = None diff --git a/allennlp/common/testing/model_test_case.py b/allennlp/common/testing/model_test_case.py index 9fdc1b190a4..6a094cb1ed1 100644 --- a/allennlp/common/testing/model_test_case.py +++ b/allennlp/common/testing/model_test_case.py @@ -108,8 +108,8 @@ def ensure_model_can_train_save_and_load( data_loader_params["shuffle"] = False data_loader_params2 = Params(copy.deepcopy(data_loader_params.as_dict())) - data_loader = DataLoader.from_params(dataset=model_dataset, params=data_loader_params) - data_loader2 = DataLoader.from_params(dataset=loaded_dataset, params=data_loader_params2) + data_loader = DataLoader.from_params(data_loader_params).construct(model_dataset) + data_loader2 = DataLoader.from_params(data_loader_params2).construct(loaded_dataset) # We'll check that even if we index the dataset with each model separately, we still get # the same result out. diff --git a/allennlp/data/samplers/__init__.py b/allennlp/data/samplers/__init__.py index 502230c8d69..abb231f2d31 100644 --- a/allennlp/data/samplers/__init__.py +++ b/allennlp/data/samplers/__init__.py @@ -196,15 +196,16 @@ def allennlp_collocate(batch): batch = AllennlpBatch(batch) return batch.as_tensor_dict(batch.get_padding_lengths()) +from allennlp.common.from_params import FromParams + +class DataLoader(FromParams): -class DataLoader(Registrable, data.DataLoader): def __init__( self, - dataset: data.Dataset, batch_size: int = 1, shuffle: bool = False, - sampler: Lazy[Sampler] = None, - batch_sampler: Lazy[BatchSampler] = None, + sampler: Sampler = None, + batch_sampler: BatchSampler = None, num_workers: int = 0, collate_fn=None, pin_memory: bool = False, @@ -214,27 +215,40 @@ def __init__( multiprocessing_context: str = None, ): + self.batch_size = batch_size + self.shuffle = shuffle + self.partially_constructed_sampler = sampler + self.partially_constructed_batch_sampler = batch_sampler + self.num_workers = num_workers + self.pin_memory = pin_memory + self.drop_last = drop_last + self.timeout = timeout + self.worker_init_fn = worker_init_fn + self.multiprocessing_context = multiprocessing_context + + def construct(self, dataset: data.Dataset) -> data.DataLoader: + collate_fn = allennlp_collocate - if batch_sampler is not None: - batch_sampler_ = batch_sampler.construct(data_source=dataset) + if self.partially_constructed_batch_sampler is not None: + batch_sampler = self.partially_constructed_batch_sampler.construct(data_source=dataset) else: - batch_sampler_ = None - if sampler is not None: - sampler_ = sampler.construct(data_source=dataset) + batch_sampler = None + if self.partially_constructed_sampler is not None: + sampler = self.partially_constructed_sampler.construct(data_source=dataset) else: - sampler_ = None + sampler = None - super().__init__( + data.DataLoader( dataset=dataset, - batch_size=batch_size, - shuffle=shuffle, - sampler=sampler_, - batch_sampler=batch_sampler_, - num_workers=num_workers, + batch_size=self.batch_size, + shuffle=self.shuffle, + sampler=sampler, + batch_sampler=batch_sampler, + num_workers=self.num_workers, collate_fn=collate_fn, - pin_memory=pin_memory, - drop_last=drop_last, - timeout=timeout, - worker_init_fn=worker_init_fn, - multiprocessing_context=multiprocessing_context, + pin_memory=self.pin_memory, + drop_last=self.drop_last, + timeout=self.timeout, + worker_init_fn=self.worker_init_fn, + multiprocessing_context=self.multiprocessing_context, ) diff --git a/allennlp/tests/commands/find_learning_rate_test.py b/allennlp/tests/commands/find_learning_rate_test.py index dddd22691f5..cc23637a0fd 100644 --- a/allennlp/tests/commands/find_learning_rate_test.py +++ b/allennlp/tests/commands/find_learning_rate_test.py @@ -180,7 +180,7 @@ def setUp(self): train_data = all_datasets["train"] train_data.index_with(vocab) - data_loader = DataLoader.from_params(dataset=train_data, params=params.pop("data_loader")) + data_loader = DataLoader.from_params(params.pop("data_loader")).construct(train_data) trainer_params = params.pop("trainer") serialization_dir = os.path.join(self.TEST_DIR, "test_search_learning_rate") diff --git a/allennlp/tests/models/simple_tagger_test.py b/allennlp/tests/models/simple_tagger_test.py index e3ebe1ffc78..b0b5e23c45b 100644 --- a/allennlp/tests/models/simple_tagger_test.py +++ b/allennlp/tests/models/simple_tagger_test.py @@ -7,7 +7,6 @@ from allennlp.common.checks import ConfigurationError from allennlp.common.params import Params from allennlp.data.dataset_readers import DatasetReader -from allennlp.data.iterators import BasicIterator from allennlp.data.samplers import DataLoader from allennlp.models import Model from allennlp.training import Trainer, TrainerBase @@ -56,7 +55,7 @@ def test_regularization(self): penalty = self.model.get_regularization_penalty() assert penalty == 0 - data_loader = DataLoader(self.instances, batch_size=32) + data_loader = torch.utils.data.DataLoader(self.instances, batch_size=32) trainer = Trainer(self.model, None, data_loader) # optimizer, # You get a RuntimeError if you call `model.forward` twice on the same inputs. @@ -94,7 +93,7 @@ def setUp(self): self.set_up_model(param_file, self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv") params = Params.from_file(param_file) self.reader = DatasetReader.from_params(params["dataset_reader"]) - self.data_loader = DataLoader.from_params(dataset=self.instances, params=params["data_loader"]) + self.data_loader = DataLoader.from_params(params["data_loader"]).construct(self.instances) self.trainer = TrainerBase.from_params( model=self.model, data_loader=self.data_loader, diff --git a/allennlp/tests/training/learning_rate_schedulers/slanted_triangular_test.py b/allennlp/tests/training/learning_rate_schedulers/slanted_triangular_test.py index 5099b8a79f7..81995597356 100644 --- a/allennlp/tests/training/learning_rate_schedulers/slanted_triangular_test.py +++ b/allennlp/tests/training/learning_rate_schedulers/slanted_triangular_test.py @@ -120,7 +120,7 @@ def test_from_params_in_trainer(self): optimizer=Lazy(lambda **kwargs: optim), serialization_dir=self.TEST_DIR, params=params, - data_loader=DataLoader(instances, batch_size=10), + data_loader=torch.utils.data.DataLoader(instances, batch_size=10), ) assert isinstance(trainer._learning_rate_scheduler, SlantedTriangular) @@ -150,7 +150,7 @@ def test_from_params_in_trainer(self): optimizer=Lazy(lambda **kwargs: optim), serialization_dir=self.TEST_DIR, params=params, - data_loader=DataLoader(instances, batch_size=10), + data_loader=torch.utils.data.DataLoader(instances, batch_size=10), ) assert trainer._learning_rate_scheduler.num_epochs == 3 diff --git a/allennlp/tests/training/optimizer_test.py b/allennlp/tests/training/optimizer_test.py index ed89bd5ecf2..d04e959030b 100644 --- a/allennlp/tests/training/optimizer_test.py +++ b/allennlp/tests/training/optimizer_test.py @@ -1,8 +1,9 @@ +from torch.utils.data import DataLoader + from allennlp.common.params import Params from allennlp.common.testing import AllenNlpTestCase from allennlp.data import Vocabulary from allennlp.data.dataset_readers import SequenceTaggingDatasetReader -from allennlp.data.samplers import DataLoader from allennlp.models.simple_tagger import SimpleTagger from allennlp.training import Trainer from allennlp.training.optimizers import Optimizer diff --git a/allennlp/training/trainer.py b/allennlp/training/trainer.py index dfc7b625f2b..02fc2fb8f53 100644 --- a/allennlp/training/trainer.py +++ b/allennlp/training/trainer.py @@ -636,7 +636,6 @@ def train(self) -> Dict[str, Any]: world_size=self._world_size, cuda_device=[self.cuda_device], ) - # Check validation metric for early stopping this_epoch_val_metric = val_metrics[self._validation_metric] self._metric_tracker.add_metric(this_epoch_val_metric) From f944840efb74eb0baf32418aa237949916fc8a2e Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Fri, 21 Feb 2020 13:20:44 -0800 Subject: [PATCH 29/52] Revert "add current state of from params changes" This reverts commit ad45659886a7e17c5b23a376da13a6547a2bab8c. --- allennlp/commands/evaluate.py | 2 +- allennlp/commands/find_learning_rate.py | 2 +- allennlp/commands/train.py | 12 ++-- allennlp/common/testing/model_test_case.py | 4 +- allennlp/data/samplers/__init__.py | 56 +++++++------------ .../tests/commands/find_learning_rate_test.py | 2 +- allennlp/tests/models/simple_tagger_test.py | 5 +- .../slanted_triangular_test.py | 4 +- allennlp/tests/training/optimizer_test.py | 3 +- allennlp/training/trainer.py | 1 + 10 files changed, 39 insertions(+), 52 deletions(-) diff --git a/allennlp/commands/evaluate.py b/allennlp/commands/evaluate.py index 65954691e7f..53af3049243 100644 --- a/allennlp/commands/evaluate.py +++ b/allennlp/commands/evaluate.py @@ -179,7 +179,7 @@ def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: data_loader_params = config.pop("data_loader") if args.batch_size: data_loader_params["batch_size"] = args.batch_size - data_loader = DataLoader.from_params(data_loader_params).construct(instances) + data_loader = DataLoader.from_params(dataset=instances, params=data_loader_params) metrics = evaluate(model, data_loader, args.cuda_device, args.batch_weight_key) diff --git a/allennlp/commands/find_learning_rate.py b/allennlp/commands/find_learning_rate.py index e9af5919160..01536b310bc 100644 --- a/allennlp/commands/find_learning_rate.py +++ b/allennlp/commands/find_learning_rate.py @@ -216,7 +216,7 @@ def find_learning_rate_model( train_data = all_datasets["train"] train_data.index_with(vocab) model = Model.from_params(vocab=vocab, params=params.pop("model")) - data_loader = DataLoader.from_params(params.pop("data_loader")).construct(train_data) + data_loader = DataLoader.from_params(dataset=train_data, params=params.pop("data_loader")) trainer_params = params.pop("trainer") diff --git a/allennlp/commands/train.py b/allennlp/commands/train.py index 444bab89546..cf4da7f9673 100644 --- a/allennlp/commands/train.py +++ b/allennlp/commands/train.py @@ -549,13 +549,13 @@ def from_partial_objects( dataset_reader: DatasetReader, train_data_path: str, model: Lazy[Model], - data_loader: DataLoader, + data_loader: Lazy[DataLoader], trainer: Lazy[TrainerBase], vocabulary: Lazy[Vocabulary] = None, datasets_for_vocab_creation: List[str] = None, validation_dataset_reader: DatasetReader = None, validation_data_path: str = None, - validation_data_loader: DataLoader = None, + validation_data_loader: Lazy[DataLoader] = None, test_data_path: str = None, evaluate_on_test: bool = False, ) -> "TrainModel": @@ -631,6 +631,7 @@ def from_partial_objects( validation_data_path=validation_data_path, test_data_path=test_data_path, ) + if datasets_for_vocab_creation: for key in datasets_for_vocab_creation: if key not in datasets: @@ -658,18 +659,17 @@ def from_partial_objects( for dataset in datasets.values(): dataset.index_with(model_.vocab) + data_loader_ = data_loader.construct(dataset=datasets["train"]) validation_data_loader = validation_data_loader or data_loader - data_loader_ = data_loader.construct(datasets["train"]) validation_data = datasets.get("validation") - if validation_data is not None: - validation_data_loader_ = validation_data_loader.construct(validation_data) + validation_data_loader_ = validation_data_loader.construct(dataset=validation_data) else: validation_data_loader_ = None test_data = datasets.get("test") if test_data is not None: - test_data_loader = validation_data_loader.construct(test_data) + test_data_loader = validation_data_loader.construct(dataset=test_data) else: test_data_loader = None diff --git a/allennlp/common/testing/model_test_case.py b/allennlp/common/testing/model_test_case.py index 6a094cb1ed1..9fdc1b190a4 100644 --- a/allennlp/common/testing/model_test_case.py +++ b/allennlp/common/testing/model_test_case.py @@ -108,8 +108,8 @@ def ensure_model_can_train_save_and_load( data_loader_params["shuffle"] = False data_loader_params2 = Params(copy.deepcopy(data_loader_params.as_dict())) - data_loader = DataLoader.from_params(data_loader_params).construct(model_dataset) - data_loader2 = DataLoader.from_params(data_loader_params2).construct(loaded_dataset) + data_loader = DataLoader.from_params(dataset=model_dataset, params=data_loader_params) + data_loader2 = DataLoader.from_params(dataset=loaded_dataset, params=data_loader_params2) # We'll check that even if we index the dataset with each model separately, we still get # the same result out. diff --git a/allennlp/data/samplers/__init__.py b/allennlp/data/samplers/__init__.py index abb231f2d31..502230c8d69 100644 --- a/allennlp/data/samplers/__init__.py +++ b/allennlp/data/samplers/__init__.py @@ -196,16 +196,15 @@ def allennlp_collocate(batch): batch = AllennlpBatch(batch) return batch.as_tensor_dict(batch.get_padding_lengths()) -from allennlp.common.from_params import FromParams - -class DataLoader(FromParams): +class DataLoader(Registrable, data.DataLoader): def __init__( self, + dataset: data.Dataset, batch_size: int = 1, shuffle: bool = False, - sampler: Sampler = None, - batch_sampler: BatchSampler = None, + sampler: Lazy[Sampler] = None, + batch_sampler: Lazy[BatchSampler] = None, num_workers: int = 0, collate_fn=None, pin_memory: bool = False, @@ -215,40 +214,27 @@ def __init__( multiprocessing_context: str = None, ): - self.batch_size = batch_size - self.shuffle = shuffle - self.partially_constructed_sampler = sampler - self.partially_constructed_batch_sampler = batch_sampler - self.num_workers = num_workers - self.pin_memory = pin_memory - self.drop_last = drop_last - self.timeout = timeout - self.worker_init_fn = worker_init_fn - self.multiprocessing_context = multiprocessing_context - - def construct(self, dataset: data.Dataset) -> data.DataLoader: - collate_fn = allennlp_collocate - if self.partially_constructed_batch_sampler is not None: - batch_sampler = self.partially_constructed_batch_sampler.construct(data_source=dataset) + if batch_sampler is not None: + batch_sampler_ = batch_sampler.construct(data_source=dataset) else: - batch_sampler = None - if self.partially_constructed_sampler is not None: - sampler = self.partially_constructed_sampler.construct(data_source=dataset) + batch_sampler_ = None + if sampler is not None: + sampler_ = sampler.construct(data_source=dataset) else: - sampler = None + sampler_ = None - data.DataLoader( + super().__init__( dataset=dataset, - batch_size=self.batch_size, - shuffle=self.shuffle, - sampler=sampler, - batch_sampler=batch_sampler, - num_workers=self.num_workers, + batch_size=batch_size, + shuffle=shuffle, + sampler=sampler_, + batch_sampler=batch_sampler_, + num_workers=num_workers, collate_fn=collate_fn, - pin_memory=self.pin_memory, - drop_last=self.drop_last, - timeout=self.timeout, - worker_init_fn=self.worker_init_fn, - multiprocessing_context=self.multiprocessing_context, + pin_memory=pin_memory, + drop_last=drop_last, + timeout=timeout, + worker_init_fn=worker_init_fn, + multiprocessing_context=multiprocessing_context, ) diff --git a/allennlp/tests/commands/find_learning_rate_test.py b/allennlp/tests/commands/find_learning_rate_test.py index cc23637a0fd..dddd22691f5 100644 --- a/allennlp/tests/commands/find_learning_rate_test.py +++ b/allennlp/tests/commands/find_learning_rate_test.py @@ -180,7 +180,7 @@ def setUp(self): train_data = all_datasets["train"] train_data.index_with(vocab) - data_loader = DataLoader.from_params(params.pop("data_loader")).construct(train_data) + data_loader = DataLoader.from_params(dataset=train_data, params=params.pop("data_loader")) trainer_params = params.pop("trainer") serialization_dir = os.path.join(self.TEST_DIR, "test_search_learning_rate") diff --git a/allennlp/tests/models/simple_tagger_test.py b/allennlp/tests/models/simple_tagger_test.py index b0b5e23c45b..e3ebe1ffc78 100644 --- a/allennlp/tests/models/simple_tagger_test.py +++ b/allennlp/tests/models/simple_tagger_test.py @@ -7,6 +7,7 @@ from allennlp.common.checks import ConfigurationError from allennlp.common.params import Params from allennlp.data.dataset_readers import DatasetReader +from allennlp.data.iterators import BasicIterator from allennlp.data.samplers import DataLoader from allennlp.models import Model from allennlp.training import Trainer, TrainerBase @@ -55,7 +56,7 @@ def test_regularization(self): penalty = self.model.get_regularization_penalty() assert penalty == 0 - data_loader = torch.utils.data.DataLoader(self.instances, batch_size=32) + data_loader = DataLoader(self.instances, batch_size=32) trainer = Trainer(self.model, None, data_loader) # optimizer, # You get a RuntimeError if you call `model.forward` twice on the same inputs. @@ -93,7 +94,7 @@ def setUp(self): self.set_up_model(param_file, self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv") params = Params.from_file(param_file) self.reader = DatasetReader.from_params(params["dataset_reader"]) - self.data_loader = DataLoader.from_params(params["data_loader"]).construct(self.instances) + self.data_loader = DataLoader.from_params(dataset=self.instances, params=params["data_loader"]) self.trainer = TrainerBase.from_params( model=self.model, data_loader=self.data_loader, diff --git a/allennlp/tests/training/learning_rate_schedulers/slanted_triangular_test.py b/allennlp/tests/training/learning_rate_schedulers/slanted_triangular_test.py index 81995597356..5099b8a79f7 100644 --- a/allennlp/tests/training/learning_rate_schedulers/slanted_triangular_test.py +++ b/allennlp/tests/training/learning_rate_schedulers/slanted_triangular_test.py @@ -120,7 +120,7 @@ def test_from_params_in_trainer(self): optimizer=Lazy(lambda **kwargs: optim), serialization_dir=self.TEST_DIR, params=params, - data_loader=torch.utils.data.DataLoader(instances, batch_size=10), + data_loader=DataLoader(instances, batch_size=10), ) assert isinstance(trainer._learning_rate_scheduler, SlantedTriangular) @@ -150,7 +150,7 @@ def test_from_params_in_trainer(self): optimizer=Lazy(lambda **kwargs: optim), serialization_dir=self.TEST_DIR, params=params, - data_loader=torch.utils.data.DataLoader(instances, batch_size=10), + data_loader=DataLoader(instances, batch_size=10), ) assert trainer._learning_rate_scheduler.num_epochs == 3 diff --git a/allennlp/tests/training/optimizer_test.py b/allennlp/tests/training/optimizer_test.py index d04e959030b..ed89bd5ecf2 100644 --- a/allennlp/tests/training/optimizer_test.py +++ b/allennlp/tests/training/optimizer_test.py @@ -1,9 +1,8 @@ -from torch.utils.data import DataLoader - from allennlp.common.params import Params from allennlp.common.testing import AllenNlpTestCase from allennlp.data import Vocabulary from allennlp.data.dataset_readers import SequenceTaggingDatasetReader +from allennlp.data.samplers import DataLoader from allennlp.models.simple_tagger import SimpleTagger from allennlp.training import Trainer from allennlp.training.optimizers import Optimizer diff --git a/allennlp/training/trainer.py b/allennlp/training/trainer.py index 02fc2fb8f53..dfc7b625f2b 100644 --- a/allennlp/training/trainer.py +++ b/allennlp/training/trainer.py @@ -636,6 +636,7 @@ def train(self) -> Dict[str, Any]: world_size=self._world_size, cuda_device=[self.cuda_device], ) + # Check validation metric for early stopping this_epoch_val_metric = val_metrics[self._validation_metric] self._metric_tracker.add_metric(this_epoch_val_metric) From be1f58cb8bb5f30c735a535128940a7cb4f9f6ec Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Fri, 21 Feb 2020 13:44:25 -0800 Subject: [PATCH 30/52] updated understanding of Lazy --- allennlp/commands/train.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/allennlp/commands/train.py b/allennlp/commands/train.py index cf4da7f9673..704168f6ed2 100644 --- a/allennlp/commands/train.py +++ b/allennlp/commands/train.py @@ -660,16 +660,23 @@ def from_partial_objects( dataset.index_with(model_.vocab) data_loader_ = data_loader.construct(dataset=datasets["train"]) - validation_data_loader = validation_data_loader or data_loader validation_data = datasets.get("validation") if validation_data is not None: + # Because of the way Lazy[T] works, we can't check it's existence + # _before_ we've tried to construct it. It returns None if it is not + # present, so we try to construct it first, and then afterward back off + # to the data_loader configuration used for training if it returns None. validation_data_loader_ = validation_data_loader.construct(dataset=validation_data) + if validation_data_loader_ is None: + validation_data_loader_ = data_loader.construct(dataset=validation_data) else: validation_data_loader_ = None test_data = datasets.get("test") if test_data is not None: test_data_loader = validation_data_loader.construct(dataset=test_data) + if test_data_loader is None: + test_data_loader = data_loader.construct(dataset=test_data) else: test_data_loader = None From ebdabe06dbf3e0d2b7f72fa8280965ff6011c07a Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Fri, 21 Feb 2020 13:49:28 -0800 Subject: [PATCH 31/52] add discussion of None comparison to Lazy --- allennlp/common/lazy.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/allennlp/common/lazy.py b/allennlp/common/lazy.py index cead0d251b1..188c0dd4a9f 100644 --- a/allennlp/common/lazy.py +++ b/allennlp/common/lazy.py @@ -1,4 +1,4 @@ -from typing import Callable, Generic, TypeVar +from typing import Callable, Generic, TypeVar, Optional T = TypeVar("T") @@ -20,10 +20,15 @@ class Lazy(Generic[T]): The actual implementation here is incredibly simple; the logic that handles the lazy construction is actually found in `FromParams`, where we have a special case for a `Lazy` type annotation. + + !!! Warning + The way this class is used in from_params means that optional constructor arguments CANNOT + be compared to `None` _before_ it is constructed. Instead, if the optional annotation + is indeed `None`, `construct` actually returns `None`. """ def __init__(self, constructor: Callable[..., T]): self._constructor = constructor - def construct(self, **kwargs) -> T: + def construct(self, **kwargs) -> Optional[T]: return self._constructor(**kwargs) From 869373932e47cc799ce4772364ca49bf9ca4c169 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Fri, 21 Feb 2020 13:51:27 -0800 Subject: [PATCH 32/52] lint --- allennlp/tests/models/simple_tagger_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/allennlp/tests/models/simple_tagger_test.py b/allennlp/tests/models/simple_tagger_test.py index e4cfcf1aab8..08b6369f5cc 100644 --- a/allennlp/tests/models/simple_tagger_test.py +++ b/allennlp/tests/models/simple_tagger_test.py @@ -7,7 +7,6 @@ from allennlp.common.checks import ConfigurationError from allennlp.common.params import Params from allennlp.data.dataset_readers import DatasetReader -from allennlp.data.iterators import BasicIterator from allennlp.data.samplers import DataLoader from allennlp.models import Model from allennlp.training import Trainer, TrainerBase @@ -94,7 +93,9 @@ def setUp(self): self.set_up_model(param_file, self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv") params = Params.from_file(param_file) self.reader = DatasetReader.from_params(params["dataset_reader"]) - self.data_loader = DataLoader.from_params(dataset=self.instances, params=params["data_loader"]) + self.data_loader = DataLoader.from_params( + dataset=self.instances, params=params["data_loader"] + ) self.trainer = TrainerBase.from_params( model=self.model, data_loader=self.data_loader, From b9b06508ceb9cc3f3f01c87872c1dca89711dfaa Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Fri, 21 Feb 2020 15:53:26 -0800 Subject: [PATCH 33/52] it's a hard doc life --- allennlp/data/samplers/__init__.py | 118 ++++++++++++++++++++++------- 1 file changed, 90 insertions(+), 28 deletions(-) diff --git a/allennlp/data/samplers/__init__.py b/allennlp/data/samplers/__init__.py index 502230c8d69..42b8a529722 100644 --- a/allennlp/data/samplers/__init__.py +++ b/allennlp/data/samplers/__init__.py @@ -18,12 +18,22 @@ class Sampler(Registrable): + """ + A wrapper around the pytorch [Sampler](https://pytorch.org/docs/stable/_modules/torch/utils/data/sampler.html) + which allows us to register it with `Registrable.` + """ + def __iter__(self) -> Iterable[int]: raise NotImplementedError class BatchSampler(Registrable): + """ + A wrapper around the pytorch [BatchSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.BatchSampler) + which allows us to register it with `Registrable.` + """ + def __iter__(self) -> Iterable[List[int]]: raise NotImplementedError @@ -37,14 +47,18 @@ def __init__(self, data_source: data.Dataset, **kwargs): @Sampler.register("random") class RandomSampler(Sampler, data.RandomSampler): - r"""Samples elements randomly. If without replacement, then sample from a shuffled dataset. - If with replacement, then user can specify :attr:`num_samples` to draw. - - Arguments: - data_source (Dataset): dataset to sample from - replacement (bool): samples are drawn with replacement if ``True``, default=``False`` - num_samples (int): number of samples to draw, default=`len(dataset)`. This argument - is supposed to be specified only when `replacement` is ``True``. + """ + Samples elements randomly. If without replacement, then sample from a shuffled dataset. + If with replacement, then user can specify `num_samples` to draw. + + # Parameters + data_source: `Dataset`, reqired + The dataset to sample from. + replacement : `bool`, optional(default = False) + Samples are drawn with replacement if `True`. + num_samples: `int` (default = `len(dataset)`) + The number of samples to draw. This argument + is supposed to be specified only when `replacement` is ``True``. """ def __init__( @@ -59,10 +73,12 @@ def __init__( @Sampler.register("subset_random") class SubsetRandomSampler(Sampler, data.SubsetRandomSampler): - r"""Samples elements randomly from a given list of indices, without replacement. + """ + Samples elements randomly from a given list of indices, without replacement. - Arguments: - indices (sequence): a sequence of indices + # Parameters + indices: `List[int]` + a sequence of indices to sample from. """ def __init__(self, indices: List[int], **kwargs): @@ -71,20 +87,26 @@ def __init__(self, indices: List[int], **kwargs): @Sampler.register("weighted_random") class WeightedRandomSampler(Sampler, data.WeightedRandomSampler): - r"""Samples elements from ``[0,..,len(weights)-1]`` with given probabilities (weights). - - Args: - weights (sequence) : a sequence of weights, not necessary summing up to one - num_samples (int): number of samples to draw - replacement (bool): if ``True``, samples are drawn with replacement. - If not, they are drawn without replacement, which means that when a - sample index is drawn for a row, it cannot be drawn again for that row. + """ + Samples elements from ``[0,..,len(weights)-1]`` with given probabilities (weights). + + # Parameters: + weights : `List[float]` + A sequence of weights, not necessary summing up to one. + num_samples : `int` + The number of samples to draw. + replacement : `bool` + If ``True``, samples are drawn with replacement. + If not, they are drawn without replacement, which means that when a + sample index is drawn for a row, it cannot be drawn again for that row. Example: + ``` >>> list(WeightedRandomSampler([0.1, 0.9, 0.4, 0.7, 3.0, 0.6], 5, replacement=True)) [0, 0, 0, 1, 0] >>> list(WeightedRandomSampler([0.9, 0.4, 0.05, 0.2, 0.3, 0.1], 5, replacement=False)) [0, 1, 4, 3, 2] + ``` """ def __init__(self, weights: List[float], num_samples: int, replacement: bool = True, **kwargs): @@ -93,19 +115,25 @@ def __init__(self, weights: List[float], num_samples: int, replacement: bool = T @BatchSampler.register("basic") class BasicBatchSampler(BatchSampler, data.BatchSampler): - r"""Wraps another sampler to yield a mini-batch of indices. + """ + Wraps another sampler to yield a mini-batch of indices. - Args: - sampler (Sampler): Base sampler. - batch_size (int): Size of mini-batch. - drop_last (bool): If ``True``, the sampler will drop the last batch if - its size would be less than ``batch_size`` + # Parameters + sampler: `Sampler` + The base sampler. + batch_size : `int` + The size of the batch. + drop_last : `bool` + If `True`, the sampler will drop the last batch if + its size would be less than batch_size`. Example: + ``` >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=False)) [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=True)) [[0, 1, 2], [3, 4, 5], [6, 7, 8]] + ``` """ def __init__(self, sampler: Sampler, batch_size: int, drop_last: bool, **kwargs): @@ -114,6 +142,40 @@ def __init__(self, sampler: Sampler, batch_size: int, drop_last: bool, **kwargs) @BatchSampler.register("bucket") class BatchInstanceSampler(BatchSampler): + """ + An sampler which by default, argsorts batches with respect to the maximum input lengths `per + batch`. Additionally, you can provide a list of field names and padding keys which the dataset + will be sorted by before doing this batching, causing inputs with similar length to be batched + together, making computation more efficient (as less time is wasted on padded elements of the + batch). + + # Parameters + + sorting_keys : List[Tuple[str, str]], optional + To bucket inputs into batches, we want to group the instances by padding length, so that we + minimize the amount of padding necessary per batch. In order to do this, we need to know + which fields need what type of padding, and in what order. + + Specifying the right keys for this is a bit cryptic, so if this is not given we try to + auto-detect the right keys by iterating once through the data up front, reading all of the + padding keys and seeing which one has the longest length. We use that one for padding. + This should give reasonable results in most cases. + + When you need to specify this yourself, you can create an instance from your dataset and + call `Instance.get_padding_lengths()` to see a list of all keys used in your data. You + should give one or more of those as the sorting keys here. + padding_noise : float, optional (default=.1) + When sorting by padding length, we add a bit of noise to the lengths, so that the sorting + isn't deterministic. This parameter determines how much noise we add, as a percentage of + the actual padding value for each instance. + + Note that if you specify `max_instances_in_memory`, the first batch will only be the + biggest from among the first "max instances in memory" instances. + batch_size : int, optional, (default = 32) + The size of each batch of instances yielded when calling the iterator. + + """ + def __init__( self, data_source: data.Dataset, @@ -130,9 +192,9 @@ def __init__( def _argsort_by_padding(self, instances: List[Instance]) -> List[int]: """ - Sorts the instances by their padding lengths, using the keys in - `sorting_keys` (in the order in which they are provided). `sorting_keys` is a list of - `(field_name, padding_key)` tuples. + Argsorts the instances by their padding lengths, using the keys in + `sorting_keys` (in the order in which they are provided). `sorting_keys` + is a list of `(field_name, padding_key)` tuples. """ if not self._sorting_keys: logger.info("No sorting keys given; trying to guess a good one") From 88314c751b54403083a2c578e776a7f757188218 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Fri, 21 Feb 2020 15:59:24 -0800 Subject: [PATCH 34/52] pull samplers into separate file --- allennlp/data/samplers/__init__.py | 259 ++--------------------------- allennlp/data/samplers/samplers.py | 247 +++++++++++++++++++++++++++ 2 files changed, 259 insertions(+), 247 deletions(-) create mode 100644 allennlp/data/samplers/samplers.py diff --git a/allennlp/data/samplers/__init__.py b/allennlp/data/samplers/__init__.py index 42b8a529722..4df3dbe42a7 100644 --- a/allennlp/data/samplers/__init__.py +++ b/allennlp/data/samplers/__init__.py @@ -1,261 +1,26 @@ -from typing import List, Iterable, Tuple, Dict, cast import logging from torch.utils import data from allennlp.common.registrable import Registrable -from allennlp.common.util import add_noise_to_dict_values, lazy_groups_of from allennlp.common.lazy import Lazy -from allennlp.data.batch import Batch as AllennlpBatch -from allennlp.data.instance import Instance -from allennlp.data.vocabulary import Vocabulary -from allennlp.data import Token -from allennlp.common.file_utils import cached_path -from allennlp.data.fields import Field, TextField, LabelField, MetadataField -from allennlp.data.token_indexers import SingleIdTokenIndexer, TokenIndexer +from allennlp.data.batch import Batch +from allennlp.data.samplers.samplers import ( + Sampler, + BatchSampler, + SequentialSampler, + SubsetRandomSampler, + WeightedRandomSampler, + RandomSampler, + BasicBatchSampler, + BatchInstanceSampler, +) logger = logging.getLogger(__name__) -class Sampler(Registrable): - """ - A wrapper around the pytorch [Sampler](https://pytorch.org/docs/stable/_modules/torch/utils/data/sampler.html) - which allows us to register it with `Registrable.` - """ - - def __iter__(self) -> Iterable[int]: - - raise NotImplementedError - - -class BatchSampler(Registrable): - """ - A wrapper around the pytorch [BatchSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.BatchSampler) - which allows us to register it with `Registrable.` - """ - - def __iter__(self) -> Iterable[List[int]]: - - raise NotImplementedError - - -@Sampler.register("sequential") -class SequentialSampler(Sampler, data.SequentialSampler): - def __init__(self, data_source: data.Dataset, **kwargs): - super().__init__(data_source) - - -@Sampler.register("random") -class RandomSampler(Sampler, data.RandomSampler): - """ - Samples elements randomly. If without replacement, then sample from a shuffled dataset. - If with replacement, then user can specify `num_samples` to draw. - - # Parameters - data_source: `Dataset`, reqired - The dataset to sample from. - replacement : `bool`, optional(default = False) - Samples are drawn with replacement if `True`. - num_samples: `int` (default = `len(dataset)`) - The number of samples to draw. This argument - is supposed to be specified only when `replacement` is ``True``. - """ - - def __init__( - self, - data_source: data.Dataset, - replacement: bool = False, - num_samples: int = None, - **kwargs, - ): - super().__init__(data_source, replacement, num_samples) - - -@Sampler.register("subset_random") -class SubsetRandomSampler(Sampler, data.SubsetRandomSampler): - """ - Samples elements randomly from a given list of indices, without replacement. - - # Parameters - indices: `List[int]` - a sequence of indices to sample from. - """ - - def __init__(self, indices: List[int], **kwargs): - super().__init__(indices) - - -@Sampler.register("weighted_random") -class WeightedRandomSampler(Sampler, data.WeightedRandomSampler): - """ - Samples elements from ``[0,..,len(weights)-1]`` with given probabilities (weights). - - # Parameters: - weights : `List[float]` - A sequence of weights, not necessary summing up to one. - num_samples : `int` - The number of samples to draw. - replacement : `bool` - If ``True``, samples are drawn with replacement. - If not, they are drawn without replacement, which means that when a - sample index is drawn for a row, it cannot be drawn again for that row. - - Example: - ``` - >>> list(WeightedRandomSampler([0.1, 0.9, 0.4, 0.7, 3.0, 0.6], 5, replacement=True)) - [0, 0, 0, 1, 0] - >>> list(WeightedRandomSampler([0.9, 0.4, 0.05, 0.2, 0.3, 0.1], 5, replacement=False)) - [0, 1, 4, 3, 2] - ``` - """ - - def __init__(self, weights: List[float], num_samples: int, replacement: bool = True, **kwargs): - super().__init__(weights, num_samples, replacement) - - -@BatchSampler.register("basic") -class BasicBatchSampler(BatchSampler, data.BatchSampler): - """ - Wraps another sampler to yield a mini-batch of indices. - - # Parameters - sampler: `Sampler` - The base sampler. - batch_size : `int` - The size of the batch. - drop_last : `bool` - If `True`, the sampler will drop the last batch if - its size would be less than batch_size`. - - Example: - ``` - >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=False)) - [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] - >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=True)) - [[0, 1, 2], [3, 4, 5], [6, 7, 8]] - ``` - """ - - def __init__(self, sampler: Sampler, batch_size: int, drop_last: bool, **kwargs): - super().__init__(sampler, batch_size, drop_last) - - -@BatchSampler.register("bucket") -class BatchInstanceSampler(BatchSampler): - """ - An sampler which by default, argsorts batches with respect to the maximum input lengths `per - batch`. Additionally, you can provide a list of field names and padding keys which the dataset - will be sorted by before doing this batching, causing inputs with similar length to be batched - together, making computation more efficient (as less time is wasted on padded elements of the - batch). - - # Parameters - - sorting_keys : List[Tuple[str, str]], optional - To bucket inputs into batches, we want to group the instances by padding length, so that we - minimize the amount of padding necessary per batch. In order to do this, we need to know - which fields need what type of padding, and in what order. - - Specifying the right keys for this is a bit cryptic, so if this is not given we try to - auto-detect the right keys by iterating once through the data up front, reading all of the - padding keys and seeing which one has the longest length. We use that one for padding. - This should give reasonable results in most cases. - - When you need to specify this yourself, you can create an instance from your dataset and - call `Instance.get_padding_lengths()` to see a list of all keys used in your data. You - should give one or more of those as the sorting keys here. - padding_noise : float, optional (default=.1) - When sorting by padding length, we add a bit of noise to the lengths, so that the sorting - isn't deterministic. This parameter determines how much noise we add, as a percentage of - the actual padding value for each instance. - - Note that if you specify `max_instances_in_memory`, the first batch will only be the - biggest from among the first "max instances in memory" instances. - batch_size : int, optional, (default = 32) - The size of each batch of instances yielded when calling the iterator. - - """ - - def __init__( - self, - data_source: data.Dataset, - batch_size: int, - sorting_keys: List[Tuple[str, str]] = None, - padding_noise: float = 0.1, - ): - - self.vocab = data_source.vocab - self._sorting_keys = sorting_keys - self._padding_noise = padding_noise - self._batch_size = batch_size - self.data_source = data_source - - def _argsort_by_padding(self, instances: List[Instance]) -> List[int]: - """ - Argsorts the instances by their padding lengths, using the keys in - `sorting_keys` (in the order in which they are provided). `sorting_keys` - is a list of `(field_name, padding_key)` tuples. - """ - if not self._sorting_keys: - logger.info("No sorting keys given; trying to guess a good one") - self._guess_sorting_keys(instances) - logger.info(f"Using {self._sorting_keys} as the sorting keys") - instances_with_lengths = [] - for instance in instances: - # Make sure instance is indexed before calling .get_padding - instance.index_fields(self.vocab) - padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) - if self._padding_noise > 0.0: - noisy_lengths = {} - for field_name, field_lengths in padding_lengths.items(): - noisy_lengths[field_name] = add_noise_to_dict_values( - field_lengths, self._padding_noise - ) - padding_lengths = noisy_lengths - instance_with_lengths = ( - [ - padding_lengths[field_name][padding_key] - for (field_name, padding_key) in self._sorting_keys - ], - instance, - ) - instances_with_lengths.append(instance_with_lengths) - with_indices = [(x, i) for i, x in enumerate(instances_with_lengths)] - with_indices.sort(key=lambda x: x[0][0]) - return [instance_with_index[-1] for instance_with_index in with_indices] - - def __iter__(self) -> Iterable[List[int]]: - - indices = self._argsort_by_padding(self.data_source) - for group in lazy_groups_of(indices, self._batch_size): - yield list(group) - - def _guess_sorting_keys(self, instances: List[Instance]) -> None: - max_length = 0.0 - longest_padding_key: Tuple[str, str] = None - for instance in instances: - instance.index_fields(self.vocab) - padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) - for field_name, field_padding in padding_lengths.items(): - for padding_key, length in field_padding.items(): - if length > max_length: - max_length = length - longest_padding_key = (field_name, padding_key) - if not longest_padding_key: - # This shouldn't ever happen (you basically have to have an empty instance list), but - # just in case... - raise AssertionError( - "Found no field that needed padding; we are surprised you got this error, please " - "open an issue on github" - ) - self._sorting_keys = [longest_padding_key] - - def __len__(self): - return len(self.data_source) // self._batch_size - - def allennlp_collocate(batch): - batch = AllennlpBatch(batch) + batch = Batch(batch) return batch.as_tensor_dict(batch.get_padding_lengths()) diff --git a/allennlp/data/samplers/samplers.py b/allennlp/data/samplers/samplers.py new file mode 100644 index 00000000000..bb82d37b979 --- /dev/null +++ b/allennlp/data/samplers/samplers.py @@ -0,0 +1,247 @@ +from typing import List, Iterable, Tuple, Dict, cast +import logging +from torch.utils import data + +from allennlp.common.registrable import Registrable + +from allennlp.common.util import add_noise_to_dict_values, lazy_groups_of +from allennlp.data.instance import Instance + +logger = logging.getLogger(__name__) + + +class Sampler(Registrable): + """ + A wrapper around the pytorch [Sampler](https://pytorch.org/docs/stable/_modules/torch/utils/data/sampler.html) + which allows us to register it with `Registrable.` + """ + + def __iter__(self) -> Iterable[int]: + + raise NotImplementedError + + +class BatchSampler(Registrable): + """ + A wrapper around the pytorch [BatchSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.BatchSampler) + which allows us to register it with `Registrable.` + """ + + def __iter__(self) -> Iterable[List[int]]: + + raise NotImplementedError + + +@Sampler.register("sequential") +class SequentialSampler(Sampler, data.SequentialSampler): + def __init__(self, data_source: data.Dataset, **kwargs): + super().__init__(data_source) + + +@Sampler.register("random") +class RandomSampler(Sampler, data.RandomSampler): + """ + Samples elements randomly. If without replacement, then sample from a shuffled dataset. + If with replacement, then user can specify `num_samples` to draw. + + # Parameters + data_source: `Dataset`, reqired + The dataset to sample from. + replacement : `bool`, optional(default = False) + Samples are drawn with replacement if `True`. + num_samples: `int` (default = `len(dataset)`) + The number of samples to draw. This argument + is supposed to be specified only when `replacement` is ``True``. + """ + + def __init__( + self, + data_source: data.Dataset, + replacement: bool = False, + num_samples: int = None, + **kwargs, + ): + super().__init__(data_source, replacement, num_samples) + + +@Sampler.register("subset_random") +class SubsetRandomSampler(Sampler, data.SubsetRandomSampler): + """ + Samples elements randomly from a given list of indices, without replacement. + + # Parameters + indices: `List[int]` + a sequence of indices to sample from. + """ + + def __init__(self, indices: List[int], **kwargs): + super().__init__(indices) + + +@Sampler.register("weighted_random") +class WeightedRandomSampler(Sampler, data.WeightedRandomSampler): + """ + Samples elements from ``[0,..,len(weights)-1]`` with given probabilities (weights). + + # Parameters: + weights : `List[float]` + A sequence of weights, not necessary summing up to one. + num_samples : `int` + The number of samples to draw. + replacement : `bool` + If ``True``, samples are drawn with replacement. + If not, they are drawn without replacement, which means that when a + sample index is drawn for a row, it cannot be drawn again for that row. + + Example: + ``` + >>> list(WeightedRandomSampler([0.1, 0.9, 0.4, 0.7, 3.0, 0.6], 5, replacement=True)) + [0, 0, 0, 1, 0] + >>> list(WeightedRandomSampler([0.9, 0.4, 0.05, 0.2, 0.3, 0.1], 5, replacement=False)) + [0, 1, 4, 3, 2] + ``` + """ + + def __init__(self, weights: List[float], num_samples: int, replacement: bool = True, **kwargs): + super().__init__(weights, num_samples, replacement) + + +@BatchSampler.register("basic") +class BasicBatchSampler(BatchSampler, data.BatchSampler): + """ + Wraps another sampler to yield a mini-batch of indices. + + # Parameters + sampler: `Sampler` + The base sampler. + batch_size : `int` + The size of the batch. + drop_last : `bool` + If `True`, the sampler will drop the last batch if + its size would be less than batch_size`. + + Example: + ``` + >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=False)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] + >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=True)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8]] + ``` + """ + + def __init__(self, sampler: Sampler, batch_size: int, drop_last: bool, **kwargs): + super().__init__(sampler, batch_size, drop_last) + + +@BatchSampler.register("bucket") +class BatchInstanceSampler(BatchSampler): + """ + An sampler which by default, argsorts batches with respect to the maximum input lengths `per + batch`. Additionally, you can provide a list of field names and padding keys which the dataset + will be sorted by before doing this batching, causing inputs with similar length to be batched + together, making computation more efficient (as less time is wasted on padded elements of the + batch). + + # Parameters + + sorting_keys : List[Tuple[str, str]], optional + To bucket inputs into batches, we want to group the instances by padding length, so that we + minimize the amount of padding necessary per batch. In order to do this, we need to know + which fields need what type of padding, and in what order. + + Specifying the right keys for this is a bit cryptic, so if this is not given we try to + auto-detect the right keys by iterating once through the data up front, reading all of the + padding keys and seeing which one has the longest length. We use that one for padding. + This should give reasonable results in most cases. + + When you need to specify this yourself, you can create an instance from your dataset and + call `Instance.get_padding_lengths()` to see a list of all keys used in your data. You + should give one or more of those as the sorting keys here. + padding_noise : float, optional (default=.1) + When sorting by padding length, we add a bit of noise to the lengths, so that the sorting + isn't deterministic. This parameter determines how much noise we add, as a percentage of + the actual padding value for each instance. + + Note that if you specify `max_instances_in_memory`, the first batch will only be the + biggest from among the first "max instances in memory" instances. + batch_size : int, optional, (default = 32) + The size of each batch of instances yielded when calling the iterator. + + """ + + def __init__( + self, + data_source: data.Dataset, + batch_size: int, + sorting_keys: List[Tuple[str, str]] = None, + padding_noise: float = 0.1, + ): + + self.vocab = data_source.vocab + self._sorting_keys = sorting_keys + self._padding_noise = padding_noise + self._batch_size = batch_size + self.data_source = data_source + + def _argsort_by_padding(self, instances: List[Instance]) -> List[int]: + """ + Argsorts the instances by their padding lengths, using the keys in + `sorting_keys` (in the order in which they are provided). `sorting_keys` + is a list of `(field_name, padding_key)` tuples. + """ + if not self._sorting_keys: + logger.info("No sorting keys given; trying to guess a good one") + self._guess_sorting_keys(instances) + logger.info(f"Using {self._sorting_keys} as the sorting keys") + instances_with_lengths = [] + for instance in instances: + # Make sure instance is indexed before calling .get_padding + instance.index_fields(self.vocab) + padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) + if self._padding_noise > 0.0: + noisy_lengths = {} + for field_name, field_lengths in padding_lengths.items(): + noisy_lengths[field_name] = add_noise_to_dict_values( + field_lengths, self._padding_noise + ) + padding_lengths = noisy_lengths + instance_with_lengths = ( + [ + padding_lengths[field_name][padding_key] + for (field_name, padding_key) in self._sorting_keys + ], + instance, + ) + instances_with_lengths.append(instance_with_lengths) + with_indices = [(x, i) for i, x in enumerate(instances_with_lengths)] + with_indices.sort(key=lambda x: x[0][0]) + return [instance_with_index[-1] for instance_with_index in with_indices] + + def __iter__(self) -> Iterable[List[int]]: + + indices = self._argsort_by_padding(self.data_source) + for group in lazy_groups_of(indices, self._batch_size): + yield list(group) + + def _guess_sorting_keys(self, instances: List[Instance]) -> None: + max_length = 0.0 + longest_padding_key: Tuple[str, str] = None + for instance in instances: + instance.index_fields(self.vocab) + padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) + for field_name, field_padding in padding_lengths.items(): + for padding_key, length in field_padding.items(): + if length > max_length: + max_length = length + longest_padding_key = (field_name, padding_key) + if not longest_padding_key: + # This shouldn't ever happen (you basically have to have an empty instance list), but + # just in case... + raise AssertionError( + "Found no field that needed padding; we are surprised you got this error, please " + "open an issue on github" + ) + self._sorting_keys = [longest_padding_key] + + def __len__(self): + return len(self.data_source) // self._batch_size From 14296a12595b6ae2b5061bf5a1eb5abdb9364227 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Fri, 21 Feb 2020 16:17:54 -0800 Subject: [PATCH 35/52] more docs updates --- allennlp/commands/train.py | 14 +++++++------- allennlp/data/samplers/samplers.py | 3 ++- allennlp/training/trainer.py | 10 +++++----- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/allennlp/commands/train.py b/allennlp/commands/train.py index 704168f6ed2..366cf0e5e8f 100644 --- a/allennlp/commands/train.py +++ b/allennlp/commands/train.py @@ -297,7 +297,7 @@ def train_model( ) # Creating `Vocabulary` objects from workers could be problematic since - # the data iterators in each worker will yield only `rank` specific + # the data loaders in each worker will yield only `rank` specific # instances. Hence it is safe to construct the vocabulary and write it # to disk before initializing the distributed context. The workers will # load the vocabulary from the path specified. @@ -593,9 +593,9 @@ def from_partial_objects( model: `Lazy[Model]` The model that we will train. This is lazy because it depends on the `Vocabulary`; after constructing the vocabulary we call `model.construct(vocab=vocabulary)`. - iterator: `DataIterator` - The iterator we use to batch instances from the dataset reader at training and (by - default) validation time. + data_loader: `Lazy[DataLoader]` + The data_loader we use to batch instances from the dataset reader at training and (by + default) validation time. This is lazy because it takes a dataset in it's constructor. trainer: `Lazy[TrainerBase]` The `Trainer` that actually implements the training loop. This is a lazy object because it depends on the model that's going to be trained. @@ -612,9 +612,9 @@ def from_partial_objects( `dataset_reader`. validation_data_path: `str`, optional (default=None) If given, we will use this data for computing validation metrics and early stopping. - validation_iterator: `DataIterator`, optional (default=None) - If given, we will use this iterator for batching and scheduling instances for the - validation data, instead of `iterator`. + validation_data_loader: `Lazy[DataLoader]`, optional (default=None) + If given, the data_loader we use to batch instances from the dataset reader at + validation and test time. This is lazy because it takes a dataset in it's constructor. test_data_path: `str`, optional (default=None) If given, we will use this as test data. This makes it available for vocab creation by default, but nothing else. diff --git a/allennlp/data/samplers/samplers.py b/allennlp/data/samplers/samplers.py index bb82d37b979..03d46629c0a 100644 --- a/allennlp/data/samplers/samplers.py +++ b/allennlp/data/samplers/samplers.py @@ -23,7 +23,8 @@ def __iter__(self) -> Iterable[int]: class BatchSampler(Registrable): """ - A wrapper around the pytorch [BatchSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.BatchSampler) + A wrapper around the pytorch + [BatchSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.BatchSampler) which allows us to register it with `Registrable.` """ diff --git a/allennlp/training/trainer.py b/allennlp/training/trainer.py index dfc7b625f2b..dffe51f9de6 100644 --- a/allennlp/training/trainer.py +++ b/allennlp/training/trainer.py @@ -87,8 +87,8 @@ def __init__( optimizer : `torch.nn.Optimizer`, required. An instance of a Pytorch Optimizer, instantiated with the parameters of the model to be optimized. - iterator : `DataIterator`, required. - A method for iterating over a `Dataset`, yielding padded indexed batches. + data_loader : `DataLoader`, required. + A pytorch `DataLoader` containing your `Dataset`, yielding padded indexed batches. patience : Optional[int] > 0, optional (default=None) Number of epochs to be patient before early stopping: the training is stopped after `patience` epochs with no improvement. If given, it must be `> 0`. @@ -98,9 +98,9 @@ def __init__( and whether to serialize an `is_best` model each epoch. The metric name must be prepended with either "+" or "-", which specifies whether the metric is an increasing or decreasing function. - validation_iterator : `DataIterator`, optional (default=None) - An iterator to use for the validation set. If `None`, then - use the training `iterator`. + validation_iterator : `DataLoader`, optional (default=None) + A `DataLoader` to use for the validation set. If `None`, then + use the training `DataLoader` with the validation data. shuffle : `bool`, optional (default=True) Whether to shuffle the instances in the iterator or not. num_epochs : int, optional (default = 20) From 8a08899efad7ca04dd90acc19db62cfa59cac55e Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Fri, 21 Feb 2020 16:24:57 -0800 Subject: [PATCH 36/52] fold in #3812 --- allennlp/data/samplers/samplers.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/allennlp/data/samplers/samplers.py b/allennlp/data/samplers/samplers.py index 03d46629c0a..cd3334ebe10 100644 --- a/allennlp/data/samplers/samplers.py +++ b/allennlp/data/samplers/samplers.py @@ -1,3 +1,4 @@ +import itertools from typing import List, Iterable, Tuple, Dict, cast import logging from torch.utils import data @@ -192,7 +193,21 @@ def _argsort_by_padding(self, instances: List[Instance]) -> List[int]: """ if not self._sorting_keys: logger.info("No sorting keys given; trying to guess a good one") - self._guess_sorting_keys(instances) + num_instances_for_sorting = 10 + # We want to grab just a few instances to guess a sorting key, instead of using all of + # them. If `instances` is just a list, this is really easy, but if it's a generator, we + # have to be careful to not consume those instances. + if isinstance(instances, list): + instances_for_sorting = instances[:num_instances_for_sorting] + else: + instances_for_sorting = [] + for instance in instances: + instances_for_sorting.append(instance) + if len(instances_for_sorting) >= num_instances_for_sorting: + break + instances = itertools.chain(instances_for_sorting, instances) # type: ignore + self._guess_sorting_keys(instances_for_sorting) + logger.info(f"Using {self._sorting_keys} as the sorting keys") instances_with_lengths = [] for instance in instances: From 35202805526cfde1ccf9f5d1b0af59a5cac0fe99 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Sat, 22 Feb 2020 12:49:49 -0800 Subject: [PATCH 37/52] remove torch dataset --- torch_datasets.py | 201 ---------------------------------------------- 1 file changed, 201 deletions(-) delete mode 100644 torch_datasets.py diff --git a/torch_datasets.py b/torch_datasets.py deleted file mode 100644 index 6f35be0ff49..00000000000 --- a/torch_datasets.py +++ /dev/null @@ -1,201 +0,0 @@ -from typing import Dict, List, cast, Tuple -import json -import logging -from overrides import overrides - -from torch.utils.data import Dataset as TorchDataset -from torch.utils.data import DataLoader -from torch.utils.data import BatchSampler, SequentialSampler - -from allennlp.common.registrable import Registrable -from allennlp.common.util import add_noise_to_dict_values, lazy_groups_of -from allennlp.data.batch import Batch as AllennlpBatch -from allennlp.data.instance import Instance -from allennlp.data.vocabulary import Vocabulary -from allennlp.data import Token -from allennlp.common.file_utils import cached_path -from allennlp.data.fields import Field, TextField, LabelField, MetadataField -from allennlp.data.token_indexers import SingleIdTokenIndexer, TokenIndexer - -logger = logging.getLogger(__name__) - - -class Dataset(TorchDataset, Registrable): - def __init__(self): - self.vocab: Vocabulary = None - - def text_to_instance(self, *inputs) -> Instance: - - raise NotImplementedError - - def __getitem__(self) -> Instance: - - raise NotImplementedError - - def index_with(self, vocab: Vocabulary): - self.vocab = vocab - - -""" -Here we have two SNLI readers in both of the different styles. -They are only slightly different. -""" - - -class SnliDataset(Dataset): - def __init__( - self, file_path: str, token_indexers: Dict[str, TokenIndexer] = None, lazy: bool = False - ) -> None: - - super().__init__() - - self._tokenizer = lambda x: [Token(t) for t in x.split(" ")] - self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()} - - file_path = cached_path(file_path) - self.examples = [] - - for line in open(file_path, "r"): - example = json.loads(line) - if example["gold_label"] == "-": - # These were cases where the annotators disagreed; we'll just skip them. It's - # like 800 out of 500k examples in the training data. - continue - self.examples.append(example) - - def __len__(self): - return len(self.examples) - - def __getitem__(self, idx) -> Instance: - example = self.examples[idx] - instance = self.text_to_instance( - example["sentence1"], example["sentence2"], example["gold_label"] - ) - - # This is not ideal, we don't want a user to have to worry about this - # but at the same time, it's expensive and it would be nice if it could happen here. - # It's possible we could have this in the super class. - if self.vocab is not None: - instance.index_fields(self.vocab) - return instance - - @overrides - def text_to_instance(self, premise: str, hypothesis: str, label: str = None) -> Instance: - - fields: Dict[str, Field] = {} - premise_tokens = self._tokenizer(premise) - hypothesis_tokens = self._tokenizer(hypothesis) - fields["premise"] = TextField(premise_tokens, self._token_indexers) - fields["hypothesis"] = TextField(hypothesis_tokens, self._token_indexers) - if label: - fields["label"] = LabelField(label) - - metadata = { - "premise_tokens": [x.text for x in premise_tokens], - "hypothesis_tokens": [x.text for x in hypothesis_tokens], - } - fields["metadata"] = MetadataField(metadata) - return Instance(fields) - - -class BatchInstanceSampler(BatchSampler): - def __init__( - self, - data, - batch_size: int, - sorting_keys: List[Tuple[str, str]] = None, - padding_noise: float = 0.1, - ): - - self.vocab = data.vocab - self._sorting_keys = sorting_keys - self._padding_noise = padding_noise - self._batch_size = batch_size - self.data = data - - def _argsort_by_padding(self, instances: List[Instance]) -> List[int]: - """ - Sorts the instances by their padding lengths, using the keys in - `sorting_keys` (in the order in which they are provided). `sorting_keys` is a list of - `(field_name, padding_key)` tuples. - """ - if not self._sorting_keys: - logger.info("No sorting keys given; trying to guess a good one") - self._guess_sorting_keys(instances) - logger.info(f"Using {self._sorting_keys} as the sorting keys") - instances_with_lengths = [] - for instance in instances: - # Make sure instance is indexed before calling .get_padding - instance.index_fields(self.vocab) - padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) - if self._padding_noise > 0.0: - noisy_lengths = {} - for field_name, field_lengths in padding_lengths.items(): - noisy_lengths[field_name] = add_noise_to_dict_values( - field_lengths, self._padding_noise - ) - padding_lengths = noisy_lengths - instance_with_lengths = ( - [ - padding_lengths[field_name][padding_key] - for (field_name, padding_key) in self._sorting_keys - ], - instance, - ) - instances_with_lengths.append(instance_with_lengths) - with_indices = [(x, i) for i, x in enumerate(instances_with_lengths)] - with_indices.sort(key=lambda x: x[0][0]) - return [instance_with_index[-1] for instance_with_index in with_indices] - - def __iter__(self): - - indices = self._argsort_by_padding(self.data) - - for group in lazy_groups_of(indices, self._batch_size): - - yield list(group) - - def _guess_sorting_keys(self, instances: List[Instance]) -> None: - max_length = 0.0 - longest_padding_key: Tuple[str, str] = None - for instance in instances: - instance.index_fields(self.vocab) - padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) - for field_name, field_padding in padding_lengths.items(): - for padding_key, length in field_padding.items(): - if length > max_length: - max_length = length - longest_padding_key = (field_name, padding_key) - if not longest_padding_key: - # This shouldn't ever happen (you basically have to have an empty instance list), but - # just in case... - raise AssertionError( - "Found no field that needed padding; we are surprised you got this error, please " - "open an issue on github" - ) - self._sorting_keys = [longest_padding_key] - - -data = SnliDataset("snli_20.jsonl") -vocab = Vocabulary.from_instances(data) -data.index_with(vocab) - - -sampler = SequentialSampler(data) -batch_sampler = BatchInstanceSampler(data, 4) - - -def allennlp_collocate(batch): - - batch = AllennlpBatch(batch) - return batch.as_tensor_dict(batch.get_padding_lengths()) - - -batch_generator = DataLoader(data, batch_sampler=batch_sampler, collate_fn=allennlp_collocate) - -iterator = iter(batch_generator) - -print() -for i, x in enumerate(batch_generator): - - print(x) From 0f1d8a4fd3ff1560b8c2ae9dcca8f1644d930477 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Sat, 22 Feb 2020 12:53:43 -0800 Subject: [PATCH 38/52] add example to lazy --- allennlp/common/lazy.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/allennlp/common/lazy.py b/allennlp/common/lazy.py index 188c0dd4a9f..524292fc013 100644 --- a/allennlp/common/lazy.py +++ b/allennlp/common/lazy.py @@ -23,8 +23,20 @@ class Lazy(Generic[T]): !!! Warning The way this class is used in from_params means that optional constructor arguments CANNOT - be compared to `None` _before_ it is constructed. Instead, if the optional annotation - is indeed `None`, `construct` actually returns `None`. + be compared to `None` _before_ it is constructed. See the example below for correct usage. + + ``` + @classmethod + def my_constructor(cls, some_object: Lazy[MyObject] = None) -> MyClass: + ... + # WRONG! some_object will never be None at this point, it will be + # a Lazy[] that returns None + obj = some_object or MyObjectDefault() + # CORRECT: + obj = some_object.construct(kwarg=kwarg) or MyObjectDefault() + ... + ``` + """ def __init__(self, constructor: Callable[..., T]): From 93e1e899687903b74dafd8c38ea0aa43f18fbfb4 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Sat, 22 Feb 2020 12:53:58 -0800 Subject: [PATCH 39/52] rename to collate --- allennlp/data/samplers/__init__.py | 4 ++-- allennlp/tests/training/trainer_test.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/allennlp/data/samplers/__init__.py b/allennlp/data/samplers/__init__.py index 4df3dbe42a7..f8a322c385a 100644 --- a/allennlp/data/samplers/__init__.py +++ b/allennlp/data/samplers/__init__.py @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) -def allennlp_collocate(batch): +def allennlp_collate(batch): batch = Batch(batch) return batch.as_tensor_dict(batch.get_padding_lengths()) @@ -41,7 +41,7 @@ def __init__( multiprocessing_context: str = None, ): - collate_fn = allennlp_collocate + collate_fn = allennlp_collate if batch_sampler is not None: batch_sampler_ = batch_sampler.construct(data_source=dataset) else: diff --git a/allennlp/tests/training/trainer_test.py b/allennlp/tests/training/trainer_test.py index ec249f90fea..5909ecf8b28 100644 --- a/allennlp/tests/training/trainer_test.py +++ b/allennlp/tests/training/trainer_test.py @@ -22,7 +22,7 @@ from allennlp.training.momentum_schedulers import MomentumScheduler from allennlp.training.moving_average import ExponentialMovingAverage from allennlp.training.util import sparse_clip_norm -from allennlp.data.samplers import allennlp_collocate +from allennlp.data.samplers import allennlp_collate class TestTrainer(AllenNlpTestCase): @@ -43,9 +43,9 @@ def setUp(self): ) self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params) self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01, momentum=0.9) - self.data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collocate) + self.data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collate) self.validation_data_loader = DataLoader( - self.instances, batch_size=2, collate_fn=allennlp_collocate + self.instances, batch_size=2, collate_fn=allennlp_collate ) self.instances.index_with(vocab) @@ -527,7 +527,7 @@ def test_trainer_respects_keep_serialized_model_every_num_seconds(self): # 2, 4, plus the last two at 5 and 6. class SlowDataLoader: - data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collocate) + data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collate) def __iter__(self): time.sleep(2.5) @@ -555,7 +555,7 @@ def __len__(self): assert sorted(epochs) == [1, 3, 4, 5] def test_trainer_can_log_learning_rates_tensorboard(self): - data_loader = DataLoader(self.instances, batch_size=4, collate_fn=allennlp_collocate) + data_loader = DataLoader(self.instances, batch_size=4, collate_fn=allennlp_collate) trainer = Trainer( self.model, self.optimizer, @@ -569,7 +569,7 @@ def test_trainer_can_log_learning_rates_tensorboard(self): trainer.train() def test_trainer_saves_models_at_specified_interval(self): - data_loader = DataLoader(self.instances, batch_size=4, collate_fn=allennlp_collocate) + data_loader = DataLoader(self.instances, batch_size=4, collate_fn=allennlp_collate) trainer = Trainer( self.model, From 40dd695e08c9679fb14a446ab68ef67072765075 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Sun, 23 Feb 2020 09:04:10 -0800 Subject: [PATCH 40/52] no kwargs --- allennlp/data/samplers/samplers.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/allennlp/data/samplers/samplers.py b/allennlp/data/samplers/samplers.py index cd3334ebe10..b87875213da 100644 --- a/allennlp/data/samplers/samplers.py +++ b/allennlp/data/samplers/samplers.py @@ -13,7 +13,7 @@ class Sampler(Registrable): """ - A wrapper around the pytorch [Sampler](https://pytorch.org/docs/stable/_modules/torch/utils/data/sampler.html) + A copy of the pytorch [Sampler](https://pytorch.org/docs/stable/_modules/torch/utils/data/sampler.html) which allows us to register it with `Registrable.` """ @@ -24,7 +24,7 @@ def __iter__(self) -> Iterable[int]: class BatchSampler(Registrable): """ - A wrapper around the pytorch + A copy of the pytorch [BatchSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.BatchSampler) which allows us to register it with `Registrable.` """ @@ -36,7 +36,7 @@ def __iter__(self) -> Iterable[List[int]]: @Sampler.register("sequential") class SequentialSampler(Sampler, data.SequentialSampler): - def __init__(self, data_source: data.Dataset, **kwargs): + def __init__(self, data_source: data.Dataset): super().__init__(data_source) @@ -57,11 +57,7 @@ class RandomSampler(Sampler, data.RandomSampler): """ def __init__( - self, - data_source: data.Dataset, - replacement: bool = False, - num_samples: int = None, - **kwargs, + self, data_source: data.Dataset, replacement: bool = False, num_samples: int = None, ): super().__init__(data_source, replacement, num_samples) @@ -76,7 +72,7 @@ class SubsetRandomSampler(Sampler, data.SubsetRandomSampler): a sequence of indices to sample from. """ - def __init__(self, indices: List[int], **kwargs): + def __init__(self, indices: List[int]): super().__init__(indices) @@ -104,7 +100,7 @@ class WeightedRandomSampler(Sampler, data.WeightedRandomSampler): ``` """ - def __init__(self, weights: List[float], num_samples: int, replacement: bool = True, **kwargs): + def __init__(self, weights: List[float], num_samples: int, replacement: bool = True): super().__init__(weights, num_samples, replacement) @@ -131,7 +127,7 @@ class BasicBatchSampler(BatchSampler, data.BatchSampler): ``` """ - def __init__(self, sampler: Sampler, batch_size: int, drop_last: bool, **kwargs): + def __init__(self, sampler: Sampler, batch_size: int, drop_last: bool): super().__init__(sampler, batch_size, drop_last) From da3b1b49dfedcdc3498e32e592352058e9839bbd Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Sun, 23 Feb 2020 09:28:38 -0800 Subject: [PATCH 41/52] Revert "fold in #3812" This reverts commit 8a08899efad7ca04dd90acc19db62cfa59cac55e. --- allennlp/data/samplers/samplers.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/allennlp/data/samplers/samplers.py b/allennlp/data/samplers/samplers.py index b87875213da..8cb04f247d6 100644 --- a/allennlp/data/samplers/samplers.py +++ b/allennlp/data/samplers/samplers.py @@ -1,4 +1,3 @@ -import itertools from typing import List, Iterable, Tuple, Dict, cast import logging from torch.utils import data @@ -189,21 +188,7 @@ def _argsort_by_padding(self, instances: List[Instance]) -> List[int]: """ if not self._sorting_keys: logger.info("No sorting keys given; trying to guess a good one") - num_instances_for_sorting = 10 - # We want to grab just a few instances to guess a sorting key, instead of using all of - # them. If `instances` is just a list, this is really easy, but if it's a generator, we - # have to be careful to not consume those instances. - if isinstance(instances, list): - instances_for_sorting = instances[:num_instances_for_sorting] - else: - instances_for_sorting = [] - for instance in instances: - instances_for_sorting.append(instance) - if len(instances_for_sorting) >= num_instances_for_sorting: - break - instances = itertools.chain(instances_for_sorting, instances) # type: ignore - self._guess_sorting_keys(instances_for_sorting) - + self._guess_sorting_keys(instances) logger.info(f"Using {self._sorting_keys} as the sorting keys") instances_with_lengths = [] for instance in instances: From 801a8f5aaf4cc4125f29dc2b5cebcf97fb01cfa9 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Sun, 23 Feb 2020 09:38:51 -0800 Subject: [PATCH 42/52] don't break up dataset --- allennlp/data/samplers/samplers.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/allennlp/data/samplers/samplers.py b/allennlp/data/samplers/samplers.py index 8cb04f247d6..549e2ea718d 100644 --- a/allennlp/data/samplers/samplers.py +++ b/allennlp/data/samplers/samplers.py @@ -180,7 +180,7 @@ def __init__( self._batch_size = batch_size self.data_source = data_source - def _argsort_by_padding(self, instances: List[Instance]) -> List[int]: + def _argsort_by_padding(self, instances: Iterable[Instance]) -> List[int]: """ Argsorts the instances by their padding lengths, using the keys in `sorting_keys` (in the order in which they are provided). `sorting_keys` @@ -220,10 +220,23 @@ def __iter__(self) -> Iterable[List[int]]: for group in lazy_groups_of(indices, self._batch_size): yield list(group) - def _guess_sorting_keys(self, instances: List[Instance]) -> None: + def _guess_sorting_keys(self, instances: Iterable[Instance], num_instances: int = 10) -> None: + """ + Use `num_instances` instances from the dataset to infer the keys used + for sorting the dataset for bucketing. + + # Parameters + + instances : `Iterable[Instance]`, required. + The dataset to guess sorting keys for. + num_instances : `int`, optional (default = 10) + The number of instances to use to guess sorting keys. Typically + the default value is completely sufficient, but if your instances + are not homogeneous, you might need more. + """ max_length = 0.0 longest_padding_key: Tuple[str, str] = None - for instance in instances: + for i, instance in enumerate(instances): instance.index_fields(self.vocab) padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) for field_name, field_padding in padding_lengths.items(): @@ -231,6 +244,10 @@ def _guess_sorting_keys(self, instances: List[Instance]) -> None: if length > max_length: max_length = length longest_padding_key = (field_name, padding_key) + if i > num_instances: + # Only use num_instances instances to guess the sorting keys. + break + if not longest_padding_key: # This shouldn't ever happen (you basically have to have an empty instance list), but # just in case... From 007fd0c4e943c7ffc8921f13a9a3f0fc31c2a502 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Sun, 23 Feb 2020 10:40:05 -0800 Subject: [PATCH 43/52] add comment to iterable dataset len --- allennlp/data/dataset_readers/dataset_reader.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/allennlp/data/dataset_readers/dataset_reader.py b/allennlp/data/dataset_readers/dataset_reader.py index ff4cc2055b7..17adddeab16 100644 --- a/allennlp/data/dataset_readers/dataset_reader.py +++ b/allennlp/data/dataset_readers/dataset_reader.py @@ -89,6 +89,12 @@ def index_with(self, vocab: Vocabulary): self.vocab = vocab def __len__(self): + """ + We rely in a couple of places that calling len on the dataloader + (which in turn calls len on the dataset) doesn't raise an error. + In the case that you have an IterableDataset and you call len, the pytorch dataloader + actually spits out a warning - but we need actually calling it to not crash. + """ return 1 From c066804cfc00ee5a46f8c7fdd5eb7d7b6ba512ef Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Sun, 23 Feb 2020 15:52:51 -0800 Subject: [PATCH 44/52] improve docstrings, build dataloader using partial_objects --- allennlp/data/samplers/__init__.py | 67 ++++++++++++++++++++++++++---- allennlp/data/samplers/samplers.py | 21 ++++++---- 2 files changed, 74 insertions(+), 14 deletions(-) diff --git a/allennlp/data/samplers/__init__.py b/allennlp/data/samplers/__init__.py index f8a322c385a..6f28dfa77e2 100644 --- a/allennlp/data/samplers/__init__.py +++ b/allennlp/data/samplers/__init__.py @@ -1,7 +1,9 @@ +from typing import List import logging from torch.utils import data from allennlp.common.registrable import Registrable +from allennlp.data.instance import Instance from allennlp.common.lazy import Lazy from allennlp.data.batch import Batch @@ -19,19 +21,35 @@ logger = logging.getLogger(__name__) -def allennlp_collate(batch): - batch = Batch(batch) +def allennlp_collate(instances: List[Instance]): + batch = Batch(instances) return batch.as_tensor_dict(batch.get_padding_lengths()) class DataLoader(Registrable, data.DataLoader): + """ + A registrable version of the pytorch [DataLoader](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader). + The only reason this class exists is so that we can construct a DataLoader + from a configuration file. Instead of using this class directly in python code, + you should just use the pytorch dataloader with allennlp's custom collate function: + + ``` + from torch.utils.data import DataLoader + + from allennlp.data.samplers import allennlp_collate + # Construct a dataloader directly for a dataset which contains allennlp + # Instances which have _already_ been indexed. + my_loader = DataLoader(dataset, batch_size=32, collate_fn=allennlp_collate) + ``` + """ + def __init__( self, dataset: data.Dataset, batch_size: int = 1, shuffle: bool = False, - sampler: Lazy[Sampler] = None, - batch_sampler: Lazy[BatchSampler] = None, + sampler: Sampler = None, + batch_sampler: BatchSampler = None, num_workers: int = 0, collate_fn=None, pin_memory: bool = False, @@ -40,8 +58,37 @@ def __init__( worker_init_fn=None, multiprocessing_context: str = None, ): + super().__init__( + dataset=dataset, + batch_size=batch_size, + shuffle=shuffle, + sampler=sampler, + batch_sampler=batch_sampler, + num_workers=num_workers, + collate_fn=collate_fn, + pin_memory=pin_memory, + drop_last=drop_last, + timeout=timeout, + worker_init_fn=worker_init_fn, + multiprocessing_context=multiprocessing_context, + ) + + @classmethod + def from_partial_objects( + cls, + dataset: data.Dataset, + batch_size: int = 1, + shuffle: bool = False, + sampler: Lazy[Sampler] = None, + batch_sampler: Lazy[BatchSampler] = None, + num_workers: int = 0, + pin_memory: bool = False, + drop_last: bool = False, + timeout: int = 0, + worker_init_fn=None, + multiprocessing_context: str = None, + ) -> "DataLoader": - collate_fn = allennlp_collate if batch_sampler is not None: batch_sampler_ = batch_sampler.construct(data_source=dataset) else: @@ -51,17 +98,23 @@ def __init__( else: sampler_ = None - super().__init__( + return cls( dataset=dataset, batch_size=batch_size, shuffle=shuffle, sampler=sampler_, batch_sampler=batch_sampler_, num_workers=num_workers, - collate_fn=collate_fn, + # NOTE: This default is different from the normal `None`. + # We assume that if you are using this class you are using an + # allennlp dataset of instances, which would require this. + collate_fn=allennlp_collate, pin_memory=pin_memory, drop_last=drop_last, timeout=timeout, worker_init_fn=worker_init_fn, multiprocessing_context=multiprocessing_context, ) + + +DataLoader.register("default", "from_partial_objects")(DataLoader) diff --git a/allennlp/data/samplers/samplers.py b/allennlp/data/samplers/samplers.py index 549e2ea718d..0297af55f2c 100644 --- a/allennlp/data/samplers/samplers.py +++ b/allennlp/data/samplers/samplers.py @@ -35,6 +35,9 @@ def __iter__(self) -> Iterable[List[int]]: @Sampler.register("sequential") class SequentialSampler(Sampler, data.SequentialSampler): + """ + A registerable version of pytorch's [SequentialSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.SequentialSampler). + """ def __init__(self, data_source: data.Dataset): super().__init__(data_source) @@ -42,6 +45,7 @@ def __init__(self, data_source: data.Dataset): @Sampler.register("random") class RandomSampler(Sampler, data.RandomSampler): """ + A registerable version of pytorch's [RandomSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.RandomSampler). Samples elements randomly. If without replacement, then sample from a shuffled dataset. If with replacement, then user can specify `num_samples` to draw. @@ -64,6 +68,7 @@ def __init__( @Sampler.register("subset_random") class SubsetRandomSampler(Sampler, data.SubsetRandomSampler): """ + A registerable version of pytorch's [SubsetRandomSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.SubsetRandomSampler). Samples elements randomly from a given list of indices, without replacement. # Parameters @@ -78,7 +83,8 @@ def __init__(self, indices: List[int]): @Sampler.register("weighted_random") class WeightedRandomSampler(Sampler, data.WeightedRandomSampler): """ - Samples elements from ``[0,..,len(weights)-1]`` with given probabilities (weights). + A registerable version of pytorch's [WeightedRandomSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.WeightedRandomSampler). + Samples elements from `[0,...,len(weights)-1]` with given probabilities (weights). # Parameters: weights : `List[float]` @@ -106,6 +112,7 @@ def __init__(self, weights: List[float], num_samples: int, replacement: bool = T @BatchSampler.register("basic") class BasicBatchSampler(BatchSampler, data.BatchSampler): """ + A registerable version of pytorch's [BatchSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.BatchSampler). Wraps another sampler to yield a mini-batch of indices. # Parameters @@ -134,10 +141,10 @@ def __init__(self, sampler: Sampler, batch_size: int, drop_last: bool): class BatchInstanceSampler(BatchSampler): """ An sampler which by default, argsorts batches with respect to the maximum input lengths `per - batch`. Additionally, you can provide a list of field names and padding keys which the dataset - will be sorted by before doing this batching, causing inputs with similar length to be batched - together, making computation more efficient (as less time is wasted on padded elements of the - batch). + batch`. You can provide a list of field names and padding keys (or pass none, in which case they + will be inferred) which the dataset will be sorted by before doing this batching, causing inputs + with similar length to be batched together, making computation more efficient (as less time is + wasted on padded elements of the batch). # Parameters @@ -154,6 +161,8 @@ class BatchInstanceSampler(BatchSampler): When you need to specify this yourself, you can create an instance from your dataset and call `Instance.get_padding_lengths()` to see a list of all keys used in your data. You should give one or more of those as the sorting keys here. + batch_size : int, required. + The size of each batch of instances yielded when calling the dataloader. padding_noise : float, optional (default=.1) When sorting by padding length, we add a bit of noise to the lengths, so that the sorting isn't deterministic. This parameter determines how much noise we add, as a percentage of @@ -161,8 +170,6 @@ class BatchInstanceSampler(BatchSampler): Note that if you specify `max_instances_in_memory`, the first batch will only be the biggest from among the first "max instances in memory" instances. - batch_size : int, optional, (default = 32) - The size of each batch of instances yielded when calling the iterator. """ From 61c7b14a8081c01427ccc34d8d38a75c4d119751 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Sun, 23 Feb 2020 15:54:02 -0800 Subject: [PATCH 45/52] flake --- allennlp/data/samplers/samplers.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/allennlp/data/samplers/samplers.py b/allennlp/data/samplers/samplers.py index 0297af55f2c..fb864a79463 100644 --- a/allennlp/data/samplers/samplers.py +++ b/allennlp/data/samplers/samplers.py @@ -36,8 +36,10 @@ def __iter__(self) -> Iterable[List[int]]: @Sampler.register("sequential") class SequentialSampler(Sampler, data.SequentialSampler): """ - A registerable version of pytorch's [SequentialSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.SequentialSampler). + A registerable version of pytorch's + [SequentialSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.SequentialSampler). """ + def __init__(self, data_source: data.Dataset): super().__init__(data_source) @@ -45,7 +47,8 @@ def __init__(self, data_source: data.Dataset): @Sampler.register("random") class RandomSampler(Sampler, data.RandomSampler): """ - A registerable version of pytorch's [RandomSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.RandomSampler). + A registerable version of pytorch's + [RandomSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.RandomSampler). Samples elements randomly. If without replacement, then sample from a shuffled dataset. If with replacement, then user can specify `num_samples` to draw. @@ -68,7 +71,8 @@ def __init__( @Sampler.register("subset_random") class SubsetRandomSampler(Sampler, data.SubsetRandomSampler): """ - A registerable version of pytorch's [SubsetRandomSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.SubsetRandomSampler). + A registerable version of pytorch's + [SubsetRandomSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.SubsetRandomSampler). Samples elements randomly from a given list of indices, without replacement. # Parameters @@ -83,7 +87,8 @@ def __init__(self, indices: List[int]): @Sampler.register("weighted_random") class WeightedRandomSampler(Sampler, data.WeightedRandomSampler): """ - A registerable version of pytorch's [WeightedRandomSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.WeightedRandomSampler). + A registerable version of pytorch's + [WeightedRandomSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.WeightedRandomSampler). Samples elements from `[0,...,len(weights)-1]` with given probabilities (weights). # Parameters: @@ -112,7 +117,8 @@ def __init__(self, weights: List[float], num_samples: int, replacement: bool = T @BatchSampler.register("basic") class BasicBatchSampler(BatchSampler, data.BatchSampler): """ - A registerable version of pytorch's [BatchSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.BatchSampler). + A registerable version of pytorch's + [BatchSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.BatchSampler). Wraps another sampler to yield a mini-batch of indices. # Parameters From 2b56b14fc846e30853df1b05ee3fcf2c8f141ae6 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Sun, 23 Feb 2020 18:57:29 -0800 Subject: [PATCH 46/52] give dataloader a default implementation --- allennlp/data/samplers/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/allennlp/data/samplers/__init__.py b/allennlp/data/samplers/__init__.py index 6f28dfa77e2..6ee2446d063 100644 --- a/allennlp/data/samplers/__init__.py +++ b/allennlp/data/samplers/__init__.py @@ -118,3 +118,4 @@ def from_partial_objects( DataLoader.register("default", "from_partial_objects")(DataLoader) +DataLoader.default_implementation = "default" From 354010a2eca085fbc251b164fca50263f1110d2f Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Mon, 24 Feb 2020 08:47:42 -0800 Subject: [PATCH 47/52] safer default for DataLoader init --- allennlp/data/samplers/__init__.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/allennlp/data/samplers/__init__.py b/allennlp/data/samplers/__init__.py index 6ee2446d063..3ed54819c58 100644 --- a/allennlp/data/samplers/__init__.py +++ b/allennlp/data/samplers/__init__.py @@ -30,8 +30,9 @@ class DataLoader(Registrable, data.DataLoader): """ A registrable version of the pytorch [DataLoader](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader). The only reason this class exists is so that we can construct a DataLoader - from a configuration file. Instead of using this class directly in python code, - you should just use the pytorch dataloader with allennlp's custom collate function: + from a configuration file and have a different default `collate_fn`. + You can use this class directly in python code, but it is identical to using + pytorch dataloader with allennlp's custom collate function: ``` from torch.utils.data import DataLoader @@ -51,7 +52,10 @@ def __init__( sampler: Sampler = None, batch_sampler: BatchSampler = None, num_workers: int = 0, - collate_fn=None, + # NOTE: This default is different from the normal `None`. + # We assume that if you are using this class you are using an + # allennlp dataset of instances, which would require this. + collate_fn=allennlp_collate, pin_memory: bool = False, drop_last: bool = False, timeout: int = 0, From 568291d4d0362c827f161aedc294831772e8c0fa Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Mon, 24 Feb 2020 08:55:55 -0800 Subject: [PATCH 48/52] more coherent dir structure --- allennlp/data/__init__.py | 1 + allennlp/data/dataloader.py | 113 ++++++++++++++++++++++++++++ allennlp/data/samplers/__init__.py | 115 ----------------------------- 3 files changed, 114 insertions(+), 115 deletions(-) create mode 100644 allennlp/data/dataloader.py diff --git a/allennlp/data/__init__.py b/allennlp/data/__init__.py index 7c799999907..ae7993e7722 100644 --- a/allennlp/data/__init__.py +++ b/allennlp/data/__init__.py @@ -1,3 +1,4 @@ +from allennlp.data.dataloader import DataLoader, allennlp_collate from allennlp.data.dataset_readers.dataset_reader import DatasetReader from allennlp.data.fields.field import DataArray, Field from allennlp.data.fields.text_field import TextFieldTensors diff --git a/allennlp/data/dataloader.py b/allennlp/data/dataloader.py new file mode 100644 index 00000000000..3e93cc01142 --- /dev/null +++ b/allennlp/data/dataloader.py @@ -0,0 +1,113 @@ +from typing import List +from torch.utils import data + +from allennlp.common.registrable import Registrable +from allennlp.data.instance import Instance + +from allennlp.common.lazy import Lazy +from allennlp.data.batch import Batch +from allennlp.data.samplers import Sampler, BatchSampler + + +def allennlp_collate(instances: List[Instance]): + batch = Batch(instances) + return batch.as_tensor_dict(batch.get_padding_lengths()) + + +class DataLoader(Registrable, data.DataLoader): + """ + A registrable version of the pytorch [DataLoader](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader). + The only reason this class exists is so that we can construct a DataLoader + from a configuration file and have a different default `collate_fn`. + You can use this class directly in python code, but it is identical to using + pytorch dataloader with allennlp's custom collate function: + + ``` + from torch.utils.data import DataLoader + + from allennlp.data.samplers import allennlp_collate + # Construct a dataloader directly for a dataset which contains allennlp + # Instances which have _already_ been indexed. + my_loader = DataLoader(dataset, batch_size=32, collate_fn=allennlp_collate) + ``` + """ + + def __init__( + self, + dataset: data.Dataset, + batch_size: int = 1, + shuffle: bool = False, + sampler: Sampler = None, + batch_sampler: BatchSampler = None, + num_workers: int = 0, + # NOTE: This default is different from the normal `None`. + # We assume that if you are using this class you are using an + # allennlp dataset of instances, which would require this. + collate_fn=allennlp_collate, + pin_memory: bool = False, + drop_last: bool = False, + timeout: int = 0, + worker_init_fn=None, + multiprocessing_context: str = None, + ): + super().__init__( + dataset=dataset, + batch_size=batch_size, + shuffle=shuffle, + sampler=sampler, + batch_sampler=batch_sampler, + num_workers=num_workers, + collate_fn=collate_fn, + pin_memory=pin_memory, + drop_last=drop_last, + timeout=timeout, + worker_init_fn=worker_init_fn, + multiprocessing_context=multiprocessing_context, + ) + + @classmethod + def from_partial_objects( + cls, + dataset: data.Dataset, + batch_size: int = 1, + shuffle: bool = False, + sampler: Lazy[Sampler] = None, + batch_sampler: Lazy[BatchSampler] = None, + num_workers: int = 0, + pin_memory: bool = False, + drop_last: bool = False, + timeout: int = 0, + worker_init_fn=None, + multiprocessing_context: str = None, + ) -> "DataLoader": + + if batch_sampler is not None: + batch_sampler_ = batch_sampler.construct(data_source=dataset) + else: + batch_sampler_ = None + if sampler is not None: + sampler_ = sampler.construct(data_source=dataset) + else: + sampler_ = None + + return cls( + dataset=dataset, + batch_size=batch_size, + shuffle=shuffle, + sampler=sampler_, + batch_sampler=batch_sampler_, + num_workers=num_workers, + # NOTE: This default is different from the normal `None`. + # We assume that if you are using this class you are using an + # allennlp dataset of instances, which would require this. + collate_fn=allennlp_collate, + pin_memory=pin_memory, + drop_last=drop_last, + timeout=timeout, + worker_init_fn=worker_init_fn, + multiprocessing_context=multiprocessing_context, + ) + + +DataLoader.register("default", "from_partial_objects")(DataLoader) +DataLoader.default_implementation = "default" diff --git a/allennlp/data/samplers/__init__.py b/allennlp/data/samplers/__init__.py index 3ed54819c58..2dccdb600ee 100644 --- a/allennlp/data/samplers/__init__.py +++ b/allennlp/data/samplers/__init__.py @@ -1,12 +1,3 @@ -from typing import List -import logging -from torch.utils import data - -from allennlp.common.registrable import Registrable -from allennlp.data.instance import Instance - -from allennlp.common.lazy import Lazy -from allennlp.data.batch import Batch from allennlp.data.samplers.samplers import ( Sampler, BatchSampler, @@ -17,109 +8,3 @@ BasicBatchSampler, BatchInstanceSampler, ) - -logger = logging.getLogger(__name__) - - -def allennlp_collate(instances: List[Instance]): - batch = Batch(instances) - return batch.as_tensor_dict(batch.get_padding_lengths()) - - -class DataLoader(Registrable, data.DataLoader): - """ - A registrable version of the pytorch [DataLoader](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader). - The only reason this class exists is so that we can construct a DataLoader - from a configuration file and have a different default `collate_fn`. - You can use this class directly in python code, but it is identical to using - pytorch dataloader with allennlp's custom collate function: - - ``` - from torch.utils.data import DataLoader - - from allennlp.data.samplers import allennlp_collate - # Construct a dataloader directly for a dataset which contains allennlp - # Instances which have _already_ been indexed. - my_loader = DataLoader(dataset, batch_size=32, collate_fn=allennlp_collate) - ``` - """ - - def __init__( - self, - dataset: data.Dataset, - batch_size: int = 1, - shuffle: bool = False, - sampler: Sampler = None, - batch_sampler: BatchSampler = None, - num_workers: int = 0, - # NOTE: This default is different from the normal `None`. - # We assume that if you are using this class you are using an - # allennlp dataset of instances, which would require this. - collate_fn=allennlp_collate, - pin_memory: bool = False, - drop_last: bool = False, - timeout: int = 0, - worker_init_fn=None, - multiprocessing_context: str = None, - ): - super().__init__( - dataset=dataset, - batch_size=batch_size, - shuffle=shuffle, - sampler=sampler, - batch_sampler=batch_sampler, - num_workers=num_workers, - collate_fn=collate_fn, - pin_memory=pin_memory, - drop_last=drop_last, - timeout=timeout, - worker_init_fn=worker_init_fn, - multiprocessing_context=multiprocessing_context, - ) - - @classmethod - def from_partial_objects( - cls, - dataset: data.Dataset, - batch_size: int = 1, - shuffle: bool = False, - sampler: Lazy[Sampler] = None, - batch_sampler: Lazy[BatchSampler] = None, - num_workers: int = 0, - pin_memory: bool = False, - drop_last: bool = False, - timeout: int = 0, - worker_init_fn=None, - multiprocessing_context: str = None, - ) -> "DataLoader": - - if batch_sampler is not None: - batch_sampler_ = batch_sampler.construct(data_source=dataset) - else: - batch_sampler_ = None - if sampler is not None: - sampler_ = sampler.construct(data_source=dataset) - else: - sampler_ = None - - return cls( - dataset=dataset, - batch_size=batch_size, - shuffle=shuffle, - sampler=sampler_, - batch_sampler=batch_sampler_, - num_workers=num_workers, - # NOTE: This default is different from the normal `None`. - # We assume that if you are using this class you are using an - # allennlp dataset of instances, which would require this. - collate_fn=allennlp_collate, - pin_memory=pin_memory, - drop_last=drop_last, - timeout=timeout, - worker_init_fn=worker_init_fn, - multiprocessing_context=multiprocessing_context, - ) - - -DataLoader.register("default", "from_partial_objects")(DataLoader) -DataLoader.default_implementation = "default" From a016103f62e4db64b433984c53af6de139967f73 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Mon, 24 Feb 2020 08:56:54 -0800 Subject: [PATCH 49/52] update imports --- allennlp/commands/evaluate.py | 2 +- allennlp/commands/find_learning_rate.py | 2 +- allennlp/commands/train.py | 2 +- allennlp/common/testing/model_test_case.py | 2 +- allennlp/tests/commands/find_learning_rate_test.py | 2 +- allennlp/tests/models/simple_tagger_test.py | 2 +- .../learning_rate_schedulers/slanted_triangular_test.py | 2 +- allennlp/tests/training/optimizer_test.py | 2 +- allennlp/tests/training/trainer_test.py | 2 +- allennlp/training/trainer.py | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/allennlp/commands/evaluate.py b/allennlp/commands/evaluate.py index 53af3049243..c5b6e14a093 100644 --- a/allennlp/commands/evaluate.py +++ b/allennlp/commands/evaluate.py @@ -62,7 +62,7 @@ from allennlp.commands.subcommand import Subcommand from allennlp.common.util import dump_metrics, prepare_environment from allennlp.data.dataset_readers.dataset_reader import DatasetReader -from allennlp.data.samplers import DataLoader +from allennlp.data import DataLoader from allennlp.models.archival import load_archive from allennlp.training.util import evaluate diff --git a/allennlp/commands/find_learning_rate.py b/allennlp/commands/find_learning_rate.py index 01536b310bc..8b5f5f9632a 100644 --- a/allennlp/commands/find_learning_rate.py +++ b/allennlp/commands/find_learning_rate.py @@ -58,7 +58,7 @@ from allennlp.common.checks import check_for_gpu, ConfigurationError from allennlp.common.util import prepare_environment from allennlp.data import Vocabulary -from allennlp.data.samplers import DataLoader +from allennlp.data import DataLoader from allennlp.models import Model from allennlp.training import Trainer, TrainerBase from allennlp.training.util import create_serialization_dir, datasets_from_params diff --git a/allennlp/commands/train.py b/allennlp/commands/train.py index 366cf0e5e8f..bd6a772065c 100644 --- a/allennlp/commands/train.py +++ b/allennlp/commands/train.py @@ -54,7 +54,7 @@ from allennlp.common import util as common_util from allennlp.common.plugins import import_plugins from allennlp.data import DatasetReader, Vocabulary -from allennlp.data.samplers import DataLoader +from allennlp.data import DataLoader from allennlp.models.archival import archive_model, CONFIG_NAME from allennlp.models.model import _DEFAULT_WEIGHTS, Model from allennlp.training.trainer_base import TrainerBase diff --git a/allennlp/common/testing/model_test_case.py b/allennlp/common/testing/model_test_case.py index 9fdc1b190a4..3e8861ea3c5 100644 --- a/allennlp/common/testing/model_test_case.py +++ b/allennlp/common/testing/model_test_case.py @@ -8,7 +8,7 @@ from allennlp.common import Params from allennlp.common.testing.test_case import AllenNlpTestCase from allennlp.data import DatasetReader, Vocabulary -from allennlp.data.samplers import DataLoader +from allennlp.data import DataLoader from allennlp.data.batch import Batch from allennlp.models import load_archive, Model diff --git a/allennlp/tests/commands/find_learning_rate_test.py b/allennlp/tests/commands/find_learning_rate_test.py index dddd22691f5..c410bce6a1f 100644 --- a/allennlp/tests/commands/find_learning_rate_test.py +++ b/allennlp/tests/commands/find_learning_rate_test.py @@ -6,7 +6,7 @@ from allennlp.common import Params from allennlp.data import Vocabulary -from allennlp.data.samplers import DataLoader +from allennlp.data import DataLoader from allennlp.models import Model from allennlp.common.checks import ConfigurationError from allennlp.common.testing import AllenNlpTestCase diff --git a/allennlp/tests/models/simple_tagger_test.py b/allennlp/tests/models/simple_tagger_test.py index 08b6369f5cc..7a0a11d9278 100644 --- a/allennlp/tests/models/simple_tagger_test.py +++ b/allennlp/tests/models/simple_tagger_test.py @@ -7,7 +7,7 @@ from allennlp.common.checks import ConfigurationError from allennlp.common.params import Params from allennlp.data.dataset_readers import DatasetReader -from allennlp.data.samplers import DataLoader +from allennlp.data import DataLoader from allennlp.models import Model from allennlp.training import Trainer, TrainerBase diff --git a/allennlp/tests/training/learning_rate_schedulers/slanted_triangular_test.py b/allennlp/tests/training/learning_rate_schedulers/slanted_triangular_test.py index 5099b8a79f7..961822c8edd 100644 --- a/allennlp/tests/training/learning_rate_schedulers/slanted_triangular_test.py +++ b/allennlp/tests/training/learning_rate_schedulers/slanted_triangular_test.py @@ -8,7 +8,7 @@ from allennlp.common import Lazy, Params from allennlp.common.checks import ConfigurationError from allennlp.common.testing import AllenNlpTestCase -from allennlp.data.samplers import DataLoader +from allennlp.data import DataLoader from allennlp.training import TrainerBase from allennlp.training.learning_rate_schedulers import LearningRateScheduler, SlantedTriangular from allennlp.training.optimizers import Optimizer diff --git a/allennlp/tests/training/optimizer_test.py b/allennlp/tests/training/optimizer_test.py index ed89bd5ecf2..07cda0a725b 100644 --- a/allennlp/tests/training/optimizer_test.py +++ b/allennlp/tests/training/optimizer_test.py @@ -2,7 +2,7 @@ from allennlp.common.testing import AllenNlpTestCase from allennlp.data import Vocabulary from allennlp.data.dataset_readers import SequenceTaggingDatasetReader -from allennlp.data.samplers import DataLoader +from allennlp.data import DataLoader from allennlp.models.simple_tagger import SimpleTagger from allennlp.training import Trainer from allennlp.training.optimizers import Optimizer diff --git a/allennlp/tests/training/trainer_test.py b/allennlp/tests/training/trainer_test.py index 5909ecf8b28..655f0e81ef1 100644 --- a/allennlp/tests/training/trainer_test.py +++ b/allennlp/tests/training/trainer_test.py @@ -22,7 +22,7 @@ from allennlp.training.momentum_schedulers import MomentumScheduler from allennlp.training.moving_average import ExponentialMovingAverage from allennlp.training.util import sparse_clip_norm -from allennlp.data.samplers import allennlp_collate +from allennlp.data import allennlp_collate class TestTrainer(AllenNlpTestCase): diff --git a/allennlp/training/trainer.py b/allennlp/training/trainer.py index dffe51f9de6..8efa85fa7ad 100644 --- a/allennlp/training/trainer.py +++ b/allennlp/training/trainer.py @@ -17,7 +17,7 @@ from allennlp.common.checks import ConfigurationError, check_for_gpu from allennlp.common import util as common_util -from allennlp.data.samplers import DataLoader +from allennlp.data import DataLoader from allennlp.data.iterators.data_iterator import TensorDict from allennlp.models.model import Model From 04fdb7054b7fd7e800df2f87fa6fb4b7ef05808b Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Mon, 24 Feb 2020 09:32:09 -0800 Subject: [PATCH 50/52] add a test for the BucketBatchSampler --- allennlp/data/dataloader.py | 3 ++- allennlp/data/samplers/__init__.py | 2 +- allennlp/data/samplers/samplers.py | 38 +++++++++++++++++------------- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/allennlp/data/dataloader.py b/allennlp/data/dataloader.py index 3e93cc01142..ceeb193b48f 100644 --- a/allennlp/data/dataloader.py +++ b/allennlp/data/dataloader.py @@ -16,7 +16,8 @@ def allennlp_collate(instances: List[Instance]): class DataLoader(Registrable, data.DataLoader): """ - A registrable version of the pytorch [DataLoader](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader). + A registrable version of the pytorch + [DataLoader](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader). The only reason this class exists is so that we can construct a DataLoader from a configuration file and have a different default `collate_fn`. You can use this class directly in python code, but it is identical to using diff --git a/allennlp/data/samplers/__init__.py b/allennlp/data/samplers/__init__.py index 2dccdb600ee..30b74e51295 100644 --- a/allennlp/data/samplers/__init__.py +++ b/allennlp/data/samplers/__init__.py @@ -6,5 +6,5 @@ WeightedRandomSampler, RandomSampler, BasicBatchSampler, - BatchInstanceSampler, + BucketBatchSampler, ) diff --git a/allennlp/data/samplers/samplers.py b/allennlp/data/samplers/samplers.py index fb864a79463..7149209ad68 100644 --- a/allennlp/data/samplers/samplers.py +++ b/allennlp/data/samplers/samplers.py @@ -144,7 +144,7 @@ def __init__(self, sampler: Sampler, batch_size: int, drop_last: bool): @BatchSampler.register("bucket") -class BatchInstanceSampler(BatchSampler): +class BucketBatchSampler(BatchSampler): """ An sampler which by default, argsorts batches with respect to the maximum input lengths `per batch`. You can provide a list of field names and padding keys (or pass none, in which case they @@ -173,10 +173,9 @@ class BatchInstanceSampler(BatchSampler): When sorting by padding length, we add a bit of noise to the lengths, so that the sorting isn't deterministic. This parameter determines how much noise we add, as a percentage of the actual padding value for each instance. - - Note that if you specify `max_instances_in_memory`, the first batch will only be the - biggest from among the first "max instances in memory" instances. - + drop_last : `bool` + If `True`, the sampler will drop the last batch if + its size would be less than batch_size`. """ def __init__( @@ -185,13 +184,15 @@ def __init__( batch_size: int, sorting_keys: List[Tuple[str, str]] = None, padding_noise: float = 0.1, + drop_last: bool = False, ): self.vocab = data_source.vocab - self._sorting_keys = sorting_keys - self._padding_noise = padding_noise - self._batch_size = batch_size + self.sorting_keys = sorting_keys + self.padding_noise = padding_noise + self.batch_size = batch_size self.data_source = data_source + self.drop_last = drop_last def _argsort_by_padding(self, instances: Iterable[Instance]) -> List[int]: """ @@ -199,26 +200,26 @@ def _argsort_by_padding(self, instances: Iterable[Instance]) -> List[int]: `sorting_keys` (in the order in which they are provided). `sorting_keys` is a list of `(field_name, padding_key)` tuples. """ - if not self._sorting_keys: + if not self.sorting_keys: logger.info("No sorting keys given; trying to guess a good one") self._guess_sorting_keys(instances) - logger.info(f"Using {self._sorting_keys} as the sorting keys") + logger.info(f"Using {self.sorting_keys} as the sorting keys") instances_with_lengths = [] for instance in instances: # Make sure instance is indexed before calling .get_padding instance.index_fields(self.vocab) padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) - if self._padding_noise > 0.0: + if self.padding_noise > 0.0: noisy_lengths = {} for field_name, field_lengths in padding_lengths.items(): noisy_lengths[field_name] = add_noise_to_dict_values( - field_lengths, self._padding_noise + field_lengths, self.padding_noise ) padding_lengths = noisy_lengths instance_with_lengths = ( [ padding_lengths[field_name][padding_key] - for (field_name, padding_key) in self._sorting_keys + for (field_name, padding_key) in self.sorting_keys ], instance, ) @@ -230,8 +231,11 @@ def _argsort_by_padding(self, instances: Iterable[Instance]) -> List[int]: def __iter__(self) -> Iterable[List[int]]: indices = self._argsort_by_padding(self.data_source) - for group in lazy_groups_of(indices, self._batch_size): - yield list(group) + for group in lazy_groups_of(indices, self.batch_size): + batch_indices = list(group) + if self.drop_last and len(batch_indices) < self.batch_size: + continue + yield batch_indices def _guess_sorting_keys(self, instances: Iterable[Instance], num_instances: int = 10) -> None: """ @@ -268,7 +272,7 @@ def _guess_sorting_keys(self, instances: Iterable[Instance], num_instances: int "Found no field that needed padding; we are surprised you got this error, please " "open an issue on github" ) - self._sorting_keys = [longest_padding_key] + self.sorting_keys = [longest_padding_key] def __len__(self): - return len(self.data_source) // self._batch_size + return len(self.data_source) // self.batch_size From d1d5c4ace8dd85b4489d621e8e112a345fb723bf Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Mon, 24 Feb 2020 09:54:37 -0800 Subject: [PATCH 51/52] split bucket sampler into own file, tests --- allennlp/data/samplers/__init__.py | 2 +- .../data/samplers/bucket_batch_sampler.py | 146 ++++++++++++++++++ allennlp/data/samplers/samplers.py | 143 +---------------- allennlp/tests/data/samplers/__init__.py | 0 .../samplers/bucket_batch_sampler_test.py | 109 +++++++++++++ 5 files changed, 257 insertions(+), 143 deletions(-) create mode 100644 allennlp/data/samplers/bucket_batch_sampler.py create mode 100644 allennlp/tests/data/samplers/__init__.py create mode 100644 allennlp/tests/data/samplers/bucket_batch_sampler_test.py diff --git a/allennlp/data/samplers/__init__.py b/allennlp/data/samplers/__init__.py index 30b74e51295..89e266b6bf6 100644 --- a/allennlp/data/samplers/__init__.py +++ b/allennlp/data/samplers/__init__.py @@ -6,5 +6,5 @@ WeightedRandomSampler, RandomSampler, BasicBatchSampler, - BucketBatchSampler, ) +from allennlp.data.samplers.bucket_batch_sampler import BucketBatchSampler diff --git a/allennlp/data/samplers/bucket_batch_sampler.py b/allennlp/data/samplers/bucket_batch_sampler.py new file mode 100644 index 00000000000..0a1dbeb6bda --- /dev/null +++ b/allennlp/data/samplers/bucket_batch_sampler.py @@ -0,0 +1,146 @@ +from typing import List, Iterable, Tuple, Dict, cast +import logging +from torch.utils import data + +from allennlp.common.util import add_noise_to_dict_values, lazy_groups_of +from allennlp.data.instance import Instance +from allennlp.data.samplers import BatchSampler + +logger = logging.getLogger(__name__) + + +@BatchSampler.register("bucket") +class BucketBatchSampler(BatchSampler): + """ + An sampler which by default, argsorts batches with respect to the maximum input lengths `per + batch`. You can provide a list of field names and padding keys (or pass none, in which case they + will be inferred) which the dataset will be sorted by before doing this batching, causing inputs + with similar length to be batched together, making computation more efficient (as less time is + wasted on padded elements of the batch). + + # Parameters + + data_source: `data.Dataset`, required, + The pytorch `Dataset` of allennlp Instances to bucket. + sorting_keys : List[Tuple[str, str]], optional + To bucket inputs into batches, we want to group the instances by padding length, so that we + minimize the amount of padding necessary per batch. In order to do this, we need to know + which fields need what type of padding, and in what order. + + Specifying the right keys for this is a bit cryptic, so if this is not given we try to + auto-detect the right keys by iterating once through the data up front, reading all of the + padding keys and seeing which one has the longest length. We use that one for padding. + This should give reasonable results in most cases. + + When you need to specify this yourself, you can create an instance from your dataset and + call `Instance.get_padding_lengths()` to see a list of all keys used in your data. You + should give one or more of those as the sorting keys here. + batch_size : int, required. + The size of each batch of instances yielded when calling the dataloader. + padding_noise : float, optional (default=.1) + When sorting by padding length, we add a bit of noise to the lengths, so that the sorting + isn't deterministic. This parameter determines how much noise we add, as a percentage of + the actual padding value for each instance. + drop_last : `bool` + If `True`, the sampler will drop the last batch if + its size would be less than batch_size`. + """ + + def __init__( + self, + data_source: data.Dataset, + batch_size: int, + sorting_keys: List[Tuple[str, str]] = None, + padding_noise: float = 0.1, + drop_last: bool = False, + ): + + self.vocab = data_source.vocab + self.sorting_keys = sorting_keys + self.padding_noise = padding_noise + self.batch_size = batch_size + self.data_source = data_source + self.drop_last = drop_last + + def _argsort_by_padding(self, instances: Iterable[Instance]) -> List[int]: + """ + Argsorts the instances by their padding lengths, using the keys in + `sorting_keys` (in the order in which they are provided). `sorting_keys` + is a list of `(field_name, padding_key)` tuples. + """ + if not self.sorting_keys: + logger.info("No sorting keys given; trying to guess a good one") + self._guess_sorting_keys(instances) + logger.info(f"Using {self.sorting_keys} as the sorting keys") + instances_with_lengths = [] + for instance in instances: + # Make sure instance is indexed before calling .get_padding + instance.index_fields(self.vocab) + padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) + if self.padding_noise > 0.0: + noisy_lengths = {} + for field_name, field_lengths in padding_lengths.items(): + noisy_lengths[field_name] = add_noise_to_dict_values( + field_lengths, self.padding_noise + ) + padding_lengths = noisy_lengths + instance_with_lengths = ( + [ + padding_lengths[field_name][padding_key] + for (field_name, padding_key) in self.sorting_keys + ], + instance, + ) + instances_with_lengths.append(instance_with_lengths) + with_indices = [(x, i) for i, x in enumerate(instances_with_lengths)] + with_indices.sort(key=lambda x: x[0][0]) + return [instance_with_index[-1] for instance_with_index in with_indices] + + def __iter__(self) -> Iterable[List[int]]: + + indices = self._argsort_by_padding(self.data_source) + for group in lazy_groups_of(indices, self.batch_size): + batch_indices = list(group) + if self.drop_last and len(batch_indices) < self.batch_size: + continue + yield batch_indices + + def _guess_sorting_keys(self, instances: Iterable[Instance], num_instances: int = 10) -> None: + """ + Use `num_instances` instances from the dataset to infer the keys used + for sorting the dataset for bucketing. + + # Parameters + + instances : `Iterable[Instance]`, required. + The dataset to guess sorting keys for. + num_instances : `int`, optional (default = 10) + The number of instances to use to guess sorting keys. Typically + the default value is completely sufficient, but if your instances + are not homogeneous, you might need more. + """ + max_length = 0.0 + longest_padding_key: Tuple[str, str] = None + for i, instance in enumerate(instances): + instance.index_fields(self.vocab) + padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) + for field_name, field_padding in padding_lengths.items(): + for padding_key, length in field_padding.items(): + if length > max_length: + max_length = length + longest_padding_key = (field_name, padding_key) + if i > num_instances: + # Only use num_instances instances to guess the sorting keys. + break + + if not longest_padding_key: + # This shouldn't ever happen (you basically have to have an empty instance list), but + # just in case... + raise AssertionError( + "Found no field that needed padding; we are surprised you got this error, please " + "open an issue on github" + ) + self.sorting_keys = [longest_padding_key] + + def __len__(self): + return len(self.data_source) // self.batch_size diff --git a/allennlp/data/samplers/samplers.py b/allennlp/data/samplers/samplers.py index 7149209ad68..7d5b1d4e9e8 100644 --- a/allennlp/data/samplers/samplers.py +++ b/allennlp/data/samplers/samplers.py @@ -1,14 +1,8 @@ -from typing import List, Iterable, Tuple, Dict, cast -import logging +from typing import List, Iterable from torch.utils import data from allennlp.common.registrable import Registrable -from allennlp.common.util import add_noise_to_dict_values, lazy_groups_of -from allennlp.data.instance import Instance - -logger = logging.getLogger(__name__) - class Sampler(Registrable): """ @@ -141,138 +135,3 @@ class BasicBatchSampler(BatchSampler, data.BatchSampler): def __init__(self, sampler: Sampler, batch_size: int, drop_last: bool): super().__init__(sampler, batch_size, drop_last) - - -@BatchSampler.register("bucket") -class BucketBatchSampler(BatchSampler): - """ - An sampler which by default, argsorts batches with respect to the maximum input lengths `per - batch`. You can provide a list of field names and padding keys (or pass none, in which case they - will be inferred) which the dataset will be sorted by before doing this batching, causing inputs - with similar length to be batched together, making computation more efficient (as less time is - wasted on padded elements of the batch). - - # Parameters - - sorting_keys : List[Tuple[str, str]], optional - To bucket inputs into batches, we want to group the instances by padding length, so that we - minimize the amount of padding necessary per batch. In order to do this, we need to know - which fields need what type of padding, and in what order. - - Specifying the right keys for this is a bit cryptic, so if this is not given we try to - auto-detect the right keys by iterating once through the data up front, reading all of the - padding keys and seeing which one has the longest length. We use that one for padding. - This should give reasonable results in most cases. - - When you need to specify this yourself, you can create an instance from your dataset and - call `Instance.get_padding_lengths()` to see a list of all keys used in your data. You - should give one or more of those as the sorting keys here. - batch_size : int, required. - The size of each batch of instances yielded when calling the dataloader. - padding_noise : float, optional (default=.1) - When sorting by padding length, we add a bit of noise to the lengths, so that the sorting - isn't deterministic. This parameter determines how much noise we add, as a percentage of - the actual padding value for each instance. - drop_last : `bool` - If `True`, the sampler will drop the last batch if - its size would be less than batch_size`. - """ - - def __init__( - self, - data_source: data.Dataset, - batch_size: int, - sorting_keys: List[Tuple[str, str]] = None, - padding_noise: float = 0.1, - drop_last: bool = False, - ): - - self.vocab = data_source.vocab - self.sorting_keys = sorting_keys - self.padding_noise = padding_noise - self.batch_size = batch_size - self.data_source = data_source - self.drop_last = drop_last - - def _argsort_by_padding(self, instances: Iterable[Instance]) -> List[int]: - """ - Argsorts the instances by their padding lengths, using the keys in - `sorting_keys` (in the order in which they are provided). `sorting_keys` - is a list of `(field_name, padding_key)` tuples. - """ - if not self.sorting_keys: - logger.info("No sorting keys given; trying to guess a good one") - self._guess_sorting_keys(instances) - logger.info(f"Using {self.sorting_keys} as the sorting keys") - instances_with_lengths = [] - for instance in instances: - # Make sure instance is indexed before calling .get_padding - instance.index_fields(self.vocab) - padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) - if self.padding_noise > 0.0: - noisy_lengths = {} - for field_name, field_lengths in padding_lengths.items(): - noisy_lengths[field_name] = add_noise_to_dict_values( - field_lengths, self.padding_noise - ) - padding_lengths = noisy_lengths - instance_with_lengths = ( - [ - padding_lengths[field_name][padding_key] - for (field_name, padding_key) in self.sorting_keys - ], - instance, - ) - instances_with_lengths.append(instance_with_lengths) - with_indices = [(x, i) for i, x in enumerate(instances_with_lengths)] - with_indices.sort(key=lambda x: x[0][0]) - return [instance_with_index[-1] for instance_with_index in with_indices] - - def __iter__(self) -> Iterable[List[int]]: - - indices = self._argsort_by_padding(self.data_source) - for group in lazy_groups_of(indices, self.batch_size): - batch_indices = list(group) - if self.drop_last and len(batch_indices) < self.batch_size: - continue - yield batch_indices - - def _guess_sorting_keys(self, instances: Iterable[Instance], num_instances: int = 10) -> None: - """ - Use `num_instances` instances from the dataset to infer the keys used - for sorting the dataset for bucketing. - - # Parameters - - instances : `Iterable[Instance]`, required. - The dataset to guess sorting keys for. - num_instances : `int`, optional (default = 10) - The number of instances to use to guess sorting keys. Typically - the default value is completely sufficient, but if your instances - are not homogeneous, you might need more. - """ - max_length = 0.0 - longest_padding_key: Tuple[str, str] = None - for i, instance in enumerate(instances): - instance.index_fields(self.vocab) - padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) - for field_name, field_padding in padding_lengths.items(): - for padding_key, length in field_padding.items(): - if length > max_length: - max_length = length - longest_padding_key = (field_name, padding_key) - if i > num_instances: - # Only use num_instances instances to guess the sorting keys. - break - - if not longest_padding_key: - # This shouldn't ever happen (you basically have to have an empty instance list), but - # just in case... - raise AssertionError( - "Found no field that needed padding; we are surprised you got this error, please " - "open an issue on github" - ) - self.sorting_keys = [longest_padding_key] - - def __len__(self): - return len(self.data_source) // self.batch_size diff --git a/allennlp/tests/data/samplers/__init__.py b/allennlp/tests/data/samplers/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/allennlp/tests/data/samplers/bucket_batch_sampler_test.py b/allennlp/tests/data/samplers/bucket_batch_sampler_test.py new file mode 100644 index 00000000000..a53186996e5 --- /dev/null +++ b/allennlp/tests/data/samplers/bucket_batch_sampler_test.py @@ -0,0 +1,109 @@ +from allennlp.common import Params +from allennlp.data import Instance, Token +from allennlp.data.batch import Batch +from allennlp.data.fields import TextField +from allennlp.data.samplers import BucketBatchSampler +from allennlp.tests.data.iterators.basic_iterator_test import IteratorTest +from allennlp.data.dataset_readers.dataset_reader import AllennlpDataset +from allennlp.data.dataloader import DataLoader + + +class TestBucketSampler(IteratorTest): + def test_create_batches_groups_correctly(self): + + dataset = AllennlpDataset(self.instances, vocab=self.vocab) + sampler = BucketBatchSampler( + dataset, batch_size=2, padding_noise=0, sorting_keys=[("text", "tokens___tokens")] + ) + + grouped_instances = [] + for indices in sampler: + grouped_instances.append([self.instances[idx] for idx in indices]) + assert grouped_instances == [ + [self.instances[4], self.instances[2]], + [self.instances[0], self.instances[1]], + [self.instances[3]], + ] + + def test_guess_sorting_key_picks_the_longest_key(self): + dataset = AllennlpDataset(self.instances, vocab=self.vocab) + sampler = BucketBatchSampler(dataset, batch_size=2, padding_noise=0) + instances = [] + short_tokens = [Token(t) for t in ["what", "is", "this", "?"]] + long_tokens = [Token(t) for t in ["this", "is", "a", "not", "very", "long", "passage"]] + instances.append( + Instance( + { + "question": TextField(short_tokens, self.token_indexers), + "passage": TextField(long_tokens, self.token_indexers), + } + ) + ) + instances.append( + Instance( + { + "question": TextField(short_tokens, self.token_indexers), + "passage": TextField(long_tokens, self.token_indexers), + } + ) + ) + instances.append( + Instance( + { + "question": TextField(short_tokens, self.token_indexers), + "passage": TextField(long_tokens, self.token_indexers), + } + ) + ) + assert sampler.sorting_keys is None + sampler._guess_sorting_keys(instances) + assert sampler.sorting_keys == [("passage", "tokens___tokens")] + + def test_from_params(self): + dataset = AllennlpDataset(self.instances, self.vocab) + params = Params({}) + + sorting_keys = [("s1", "nt"), ("s2", "nt2")] + params["sorting_keys"] = sorting_keys + params["batch_size"] = 32 + sampler = BucketBatchSampler.from_params(params=params, data_source=dataset) + + assert sampler.sorting_keys == sorting_keys + assert sampler.padding_noise == 0.1 + assert sampler.batch_size == 32 + + params = Params( + { + "sorting_keys": sorting_keys, + "padding_noise": 0.5, + "batch_size": 100, + "drop_last": True, + } + ) + + sampler = BucketBatchSampler.from_params(params=params, data_source=dataset) + assert sampler.sorting_keys == sorting_keys + assert sampler.padding_noise == 0.5 + assert sampler.batch_size == 100 + assert sampler.drop_last + + def test_drop_last_works(self): + dataset = AllennlpDataset(self.instances, vocab=self.vocab) + sampler = BucketBatchSampler( + dataset, + batch_size=2, + padding_noise=0, + sorting_keys=[("text", "tokens___tokens")], + drop_last=True, + ) + # We use a custom collate_fn for testing, which doesn't actually create tensors, + # just the allennlp Batches. + dataloader = DataLoader(dataset, batch_sampler=sampler, collate_fn=lambda x: Batch(x)) + batches = [batch for batch in iter(dataloader)] + stats = self.get_batches_stats(batches) + + # all batches have length batch_size + assert all(batch_len == 2 for batch_len in stats["batch_lengths"]) + + # we should have lost one instance by skipping the last batch + assert stats["total_instances"] == len(self.instances) - 1 From 5f0c8db80afed104ca054e10516a68654c774726 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Wed, 26 Feb 2020 09:17:28 -0800 Subject: [PATCH 52/52] PR comments --- allennlp/data/dataloader.py | 4 ++-- allennlp/data/samplers/bucket_batch_sampler.py | 12 +++++++----- allennlp/data/samplers/samplers.py | 17 ++++++++++++----- allennlp/training/trainer.py | 4 +--- 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/allennlp/data/dataloader.py b/allennlp/data/dataloader.py index ceeb193b48f..6b01df69a50 100644 --- a/allennlp/data/dataloader.py +++ b/allennlp/data/dataloader.py @@ -41,7 +41,7 @@ def __init__( sampler: Sampler = None, batch_sampler: BatchSampler = None, num_workers: int = 0, - # NOTE: This default is different from the normal `None`. + # NOTE: The default for collate_fn is different from the normal `None`. # We assume that if you are using this class you are using an # allennlp dataset of instances, which would require this. collate_fn=allennlp_collate, @@ -98,7 +98,7 @@ def from_partial_objects( sampler=sampler_, batch_sampler=batch_sampler_, num_workers=num_workers, - # NOTE: This default is different from the normal `None`. + # NOTE: The default for collate_fn is different from the normal `None`. # We assume that if you are using this class you are using an # allennlp dataset of instances, which would require this. collate_fn=allennlp_collate, diff --git a/allennlp/data/samplers/bucket_batch_sampler.py b/allennlp/data/samplers/bucket_batch_sampler.py index 0a1dbeb6bda..b759c81cc08 100644 --- a/allennlp/data/samplers/bucket_batch_sampler.py +++ b/allennlp/data/samplers/bucket_batch_sampler.py @@ -22,26 +22,28 @@ class BucketBatchSampler(BatchSampler): data_source: `data.Dataset`, required, The pytorch `Dataset` of allennlp Instances to bucket. + batch_size : int, required. + The size of each batch of instances yielded when calling the dataloader. sorting_keys : List[Tuple[str, str]], optional To bucket inputs into batches, we want to group the instances by padding length, so that we minimize the amount of padding necessary per batch. In order to do this, we need to know which fields need what type of padding, and in what order. Specifying the right keys for this is a bit cryptic, so if this is not given we try to - auto-detect the right keys by iterating once through the data up front, reading all of the + auto-detect the right keys by iterating through a few instances upfront, reading all of the padding keys and seeing which one has the longest length. We use that one for padding. - This should give reasonable results in most cases. + This should give reasonable results in most cases. Some cases where it might not be the + right thing to do are when you have a `ListField[TextField]`, or when you have a really + long, constant length `ArrayField`. When you need to specify this yourself, you can create an instance from your dataset and call `Instance.get_padding_lengths()` to see a list of all keys used in your data. You should give one or more of those as the sorting keys here. - batch_size : int, required. - The size of each batch of instances yielded when calling the dataloader. padding_noise : float, optional (default=.1) When sorting by padding length, we add a bit of noise to the lengths, so that the sorting isn't deterministic. This parameter determines how much noise we add, as a percentage of the actual padding value for each instance. - drop_last : `bool` + drop_last : `bool`, (default = False) If `True`, the sampler will drop the last batch if its size would be less than batch_size`. """ diff --git a/allennlp/data/samplers/samplers.py b/allennlp/data/samplers/samplers.py index 7d5b1d4e9e8..c31d8415065 100644 --- a/allennlp/data/samplers/samplers.py +++ b/allennlp/data/samplers/samplers.py @@ -3,6 +3,13 @@ from allennlp.common.registrable import Registrable +""" +Duplicates of the pytorch Sampler classes. Broadly, these only exist +so that we can add type hints, meaning we can construct them from configuration +files. You can use these directly from Python code, but they are identical to the +pytorch ones. +""" + class Sampler(Registrable): """ @@ -30,7 +37,7 @@ def __iter__(self) -> Iterable[List[int]]: @Sampler.register("sequential") class SequentialSampler(Sampler, data.SequentialSampler): """ - A registerable version of pytorch's + A registrable version of pytorch's [SequentialSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.SequentialSampler). """ @@ -41,7 +48,7 @@ def __init__(self, data_source: data.Dataset): @Sampler.register("random") class RandomSampler(Sampler, data.RandomSampler): """ - A registerable version of pytorch's + A registrable version of pytorch's [RandomSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.RandomSampler). Samples elements randomly. If without replacement, then sample from a shuffled dataset. If with replacement, then user can specify `num_samples` to draw. @@ -65,7 +72,7 @@ def __init__( @Sampler.register("subset_random") class SubsetRandomSampler(Sampler, data.SubsetRandomSampler): """ - A registerable version of pytorch's + A registrable version of pytorch's [SubsetRandomSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.SubsetRandomSampler). Samples elements randomly from a given list of indices, without replacement. @@ -81,7 +88,7 @@ def __init__(self, indices: List[int]): @Sampler.register("weighted_random") class WeightedRandomSampler(Sampler, data.WeightedRandomSampler): """ - A registerable version of pytorch's + A registrable version of pytorch's [WeightedRandomSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.WeightedRandomSampler). Samples elements from `[0,...,len(weights)-1]` with given probabilities (weights). @@ -111,7 +118,7 @@ def __init__(self, weights: List[float], num_samples: int, replacement: bool = T @BatchSampler.register("basic") class BasicBatchSampler(BatchSampler, data.BatchSampler): """ - A registerable version of pytorch's + A registrable version of pytorch's [BatchSampler](https://pytorch.org/docs/stable/data.html#torch.utils.data.BatchSampler). Wraps another sampler to yield a mini-batch of indices. diff --git a/allennlp/training/trainer.py b/allennlp/training/trainer.py index 8efa85fa7ad..2bb423d463d 100644 --- a/allennlp/training/trainer.py +++ b/allennlp/training/trainer.py @@ -515,9 +515,7 @@ def _validation_loss(self) -> Tuple[float, int]: "Validation results cannot be calculated without a validation_data_loader" ) - val_generator_tqdm = Tqdm.tqdm( - iter(validation_data_loader), total=len(validation_data_loader) - ) + val_generator_tqdm = Tqdm.tqdm(validation_data_loader) batches_this_epoch = 0 val_loss = 0 done_early = False