Allow user to provide opt_level to load_archive (#4027)

* Allow user to provide opt_level to load_archive * Moved .cuda call in merge, fix * Revert argument order, update docstring * More detailed warning logs * Remove ensemble model Co-authored-by: Dirk Groeneveld <dirkg@allenai.org>
allenai · Apr 8, 2020 · c497103 · c497103
1 parent 6a940f9
commit c497103
Show file tree

Hide file tree

Showing 3 changed files with 49 additions and 69 deletions.
diff --git a/allennlp/models/archival.py b/allennlp/models/archival.py
@@ -128,7 +128,11 @@ def archive_model(
 
 
 def load_archive(
-    archive_file: str, cuda_device: int = -1, overrides: str = "", weights_file: str = None
+    archive_file: str,
+    cuda_device: int = -1,
+    opt_level: str = None,
+    overrides: str = "",
+    weights_file: str = None,
 ) -> Archive:
     """
     Instantiates an Archive from an archived `tar.gz` file.
@@ -137,13 +141,19 @@ def load_archive(
 
     archive_file : `str`
         The archive file to load the model from.
-    weights_file : `str`, optional (default = None)
-        The weights file to use.  If unspecified, weights.th in the archive_file will be used.
     cuda_device : `int`, optional (default = -1)
         If `cuda_device` is >= 0, the model will be loaded onto the
         corresponding GPU. Otherwise it will be loaded onto the CPU.
+    opt_level : `str`, optional, (default = `None`)
+        Each `opt_level` establishes a set of properties that govern Amp’s implementation of pure or mixed
+        precision training. Must be a choice of `"O0"`, `"O1"`, `"O2"`, or `"O3"`.
+        See the Apex [documentation](https://nvidia.github.io/apex/amp.html#opt-levels-and-properties) for
+        more details. If `None`, defaults to the `opt_level` found in the model params. If `cuda_device==-1`,
+        Amp is not used and this argument is ignored.
     overrides : `str`, optional (default = "")
         JSON overrides to apply to the unarchived `Params` object.
+    weights_file : `str`, optional (default = None)
+        The weights file to use.  If unspecified, weights.th in the archive_file will be used.
     """
     # redirect to the cache, if necessary
     resolved_archive_file = cached_path(archive_file)
@@ -184,6 +194,7 @@ def load_archive(
         weights_file=weights_path,
         serialization_dir=serialization_dir,
         cuda_device=cuda_device,
+        opt_level=opt_level,
     )
 
     return Archive(model=model, config=config)

diff --git a/allennlp/models/ensemble.py b/allennlp/models/ensemble.py
diff --git a/allennlp/models/model.py b/allennlp/models/model.py
@@ -254,7 +254,12 @@ def _maybe_warn_for_unseparable_batches(self, output_key: str):
 
     @classmethod
     def _load(
-        cls, config: Params, serialization_dir: str, weights_file: str = None, cuda_device: int = -1
+        cls,
+        config: Params,
+        serialization_dir: str,
+        weights_file: str = None,
+        cuda_device: int = -1,
+        opt_level: str = None,
     ) -> "Model":
         """
         Instantiates an already-trained model, based on the experiment
@@ -275,7 +280,7 @@ def _load(
         model_params = config.get("model")
 
         training_params = config.get("trainer", Params({}))
-        opt_level = training_params.get("opt_level")
+        opt_level = opt_level or training_params.get("opt_level")
 
         # The experiment config tells us how to _train_ a model, including where to get pre-trained
         # embeddings from.  We're now _loading_ the model, so those embeddings will already be
@@ -291,18 +296,28 @@ def _load(
         else:
             model.cpu()
 
-        # If the model was trained with amp and amp is available, we should re-initialize it with
-        # the opt_level that was used. If the model was trained with amp but amp is not availble, log a warning
-        # so this doesn't pass silently.
+        # If opt_level is not None (i.e. it exists in the loaded models params or was provided
+        # as argument to this method), call amp.initialize on the loaded model.
+        # Log a warning if amp is not installed or we are loading onto the cpu so that these
+        # cases do not pass silently.
         if opt_level is not None:
             if amp is None:
                 logger.warning(
                     (
-                        f"This model was trained with amp (opt_level: {opt_level}) but amp is not available."
+                        f"Apex must be installed to enable mixed-precision via amp."
+                        f" Got opt_level is not None (opt_level={opt_level}) but Apex is not installed."
                         " Any further training or inference will happen at full-precision."
                     )
                 )
-            else:
+            if cuda_device == -1:
+                logger.warning(
+                    (
+                        f"A CUDA device must be specified to enable mixed-precision via amp."
+                        f" Got cuda_device=={cuda_device} but opt_level is not None (opt_level={opt_level})."
+                        " Any further training or inference will happen at full-precision."
+                    )
+                )
+            if amp is not None and cuda_device >= 0:
                 model = amp.initialize(model, opt_level=opt_level)
 
         # If vocab+embedding extension was done, the model initialized from from_params
@@ -320,7 +335,12 @@ def _load(
 
     @classmethod
     def load(
-        cls, config: Params, serialization_dir: str, weights_file: str = None, cuda_device: int = -1
+        cls,
+        config: Params,
+        serialization_dir: str,
+        weights_file: str = None,
+        cuda_device: int = -1,
+        opt_level: str = None,
     ) -> "Model":
         """
         Instantiates an already-trained model, based on the experiment
@@ -341,7 +361,12 @@ def load(
         cuda_device: int = -1
             By default we load the model on the CPU, but if you want to load it
             for GPU usage you can specify the id of your GPU here
-
+        opt_level : `str`, optional, (default = `None`)
+            Each `opt_level` establishes a set of properties that govern Amp’s implementation of pure or mixed
+            precision training. Must be a choice of `"O0"`, `"O1"`, `"O2"`, or `"O3"`.
+            See the Apex [documentation](https://nvidia.github.io/apex/amp.html#opt-levels-and-properties) for
+            more details. If `None`, defaults to the `opt_level` found in the model params. If `cuda_device==-1`,
+            Amp is not used and this argument is ignored.
 
         # Returns
 
@@ -359,7 +384,7 @@ def load(
         # This allows subclasses of Model to override _load.
 
         model_class: Type[Model] = cls.by_name(model_type)  # type: ignore
-        return model_class._load(config, serialization_dir, weights_file, cuda_device)
+        return model_class._load(config, serialization_dir, weights_file, cuda_device, opt_level)
 
     def extend_embedder_vocab(self, embedding_sources_mapping: Dict[str, str] = None) -> None:
         """