nv-morpheus · rapids-bot · Apr 19, 2024 · Apr 18, 2024 · Apr 18, 2024 · Apr 18, 2024
diff --git a/morpheus/stages/inference/inference_stage.py b/morpheus/stages/inference/inference_stage.py
@@ -299,8 +299,10 @@ def set_output_fut(resp: TensorMemory, inner_batch, batch_future: mrc.Future):
         else:
             node = builder.make_node(self.unique_name, ops.build(py_inference_fn))
 
-        # Set the concurrency level to be up with the thread count
-        node.launch_options.pe_count = self._thread_count
+            # Set the concurrency level to be up with the thread count, intentionally only setting this for python
+            # implementations to avoid #1639
+            node.launch_options.pe_count = self._thread_count
+
         builder.make_edge(input_node, node)
 
         return node

diff --git a/tests/test_abp.py b/tests/test_abp.py
@@ -51,8 +51,9 @@
 
 @pytest.mark.slow
 @pytest.mark.use_python
+@pytest.mark.parametrize('num_threads', [1, 4])
 @mock.patch('tritonclient.grpc.InferenceServerClient')
-def test_abp_no_cpp(mock_triton_client, config: Config, tmp_path):
+def test_abp_no_cpp(mock_triton_client: mock.MagicMock, config: Config, tmp_path: str, num_threads: int):
     mock_metadata = {
         "inputs": [{
             'name': 'input__0', 'datatype': 'FP32', "shape": [-1, FEATURE_LENGTH]
@@ -83,7 +84,7 @@ def test_abp_no_cpp(mock_triton_client, config: Config, tmp_path):
     config.pipeline_batch_size = 1024
     config.feature_length = FEATURE_LENGTH
     config.edge_buffer_size = 128
-    config.num_threads = 1
+    config.num_threads = num_threads
 
     config.fil = ConfigFIL()
     config.fil.feature_columns = load_labels_file(os.path.join(TEST_DIRS.data_dir, 'columns_fil.txt'))
@@ -108,21 +109,24 @@ def test_abp_no_cpp(mock_triton_client, config: Config, tmp_path):
 
     pipe.run()
     compare_class_to_scores(out_file, config.class_labels, '', 'score_', threshold=0.5)
-    results = calc_error_val(results_file_name)
-    assert results.diff_rows == 0
+
+    if num_threads == 1:
+        results = calc_error_val(results_file_name)
+        assert results.diff_rows == 0
 
 
 @pytest.mark.slow
 @pytest.mark.use_cpp
 @pytest.mark.usefixtures("launch_mock_triton")
-def test_abp_cpp(config, tmp_path):
+@pytest.mark.parametrize('num_threads', [1, 4])
+def test_abp_cpp(config: Config, tmp_path: str, num_threads: int):
     config.mode = PipelineModes.FIL
     config.class_labels = ["mining"]
     config.model_max_batch_size = MODEL_MAX_BATCH_SIZE
     config.pipeline_batch_size = 1024
     config.feature_length = FEATURE_LENGTH
     config.edge_buffer_size = 128
-    config.num_threads = 1
+    config.num_threads = num_threads
 
     config.fil = ConfigFIL()
     config.fil.feature_columns = load_labels_file(os.path.join(TEST_DIRS.data_dir, 'columns_fil.txt'))
@@ -151,14 +155,17 @@ def test_abp_cpp(config, tmp_path):
 
     pipe.run()
     compare_class_to_scores(out_file, config.class_labels, '', 'score_', threshold=0.5)
-    results = calc_error_val(results_file_name)
-    assert results.diff_rows == 0
+
+    if num_threads == 1:
+        results = calc_error_val(results_file_name)
+        assert results.diff_rows == 0
 
 
 @pytest.mark.slow
 @pytest.mark.use_python
+@pytest.mark.parametrize('num_threads', [1, 4])
 @mock.patch('tritonclient.grpc.InferenceServerClient')
-def test_abp_multi_segment_no_cpp(mock_triton_client, config: Config, tmp_path):
+def test_abp_multi_segment_no_cpp(mock_triton_client: mock.MagicMock, config: Config, tmp_path: str, num_threads: int):
     mock_metadata = {
         "inputs": [{
             'name': 'input__0', 'datatype': 'FP32', "shape": [-1, FEATURE_LENGTH]
@@ -189,7 +196,7 @@ def test_abp_multi_segment_no_cpp(mock_triton_client, config: Config, tmp_path):
     config.pipeline_batch_size = 1024
     config.feature_length = FEATURE_LENGTH
     config.edge_buffer_size = 128
-    config.num_threads = 1
+    config.num_threads = num_threads
 
     config.fil = ConfigFIL()
     config.fil.feature_columns = load_labels_file(os.path.join(TEST_DIRS.data_dir, 'columns_fil.txt'))
@@ -230,21 +237,24 @@ def test_abp_multi_segment_no_cpp(mock_triton_client, config: Config, tmp_path):
     pipe.add_stage(WriteToFileStage(config, filename=out_file, overwrite=False))
 
     pipe.run()
-    results = calc_error_val(results_file_name)
-    assert results.diff_rows == 0
+
+    if num_threads == 1:
+        results = calc_error_val(results_file_name)
+        assert results.diff_rows == 0
 
 
 @pytest.mark.slow
 @pytest.mark.use_cpp
 @pytest.mark.usefixtures("launch_mock_triton")
-def test_abp_multi_segment_cpp(config, tmp_path):
+@pytest.mark.parametrize('num_threads', [1, 4])
+def test_abp_multi_segment_cpp(config: Config, tmp_path: str, num_threads: int):
     config.mode = PipelineModes.FIL
     config.class_labels = ["mining"]
     config.model_max_batch_size = MODEL_MAX_BATCH_SIZE
     config.pipeline_batch_size = 1024
     config.feature_length = FEATURE_LENGTH
     config.edge_buffer_size = 128
-    config.num_threads = 1
+    config.num_threads = num_threads
 
     config.fil = ConfigFIL()
     config.fil.feature_columns = load_labels_file(os.path.join(TEST_DIRS.data_dir, 'columns_fil.txt'))
@@ -289,5 +299,6 @@ def test_abp_multi_segment_cpp(config, tmp_path):
 
     pipe.run()
 
-    results = calc_error_val(results_file_name)
-    assert results.diff_rows == 0
+    if num_threads == 1:
+        results = calc_error_val(results_file_name)
+        assert results.diff_rows == 0
diff --git a/tests/test_phishing.py b/tests/test_phishing.py
@@ -23,6 +23,7 @@
 from _utils import TEST_DIRS
 from _utils import calc_error_val
 from _utils import mk_async_infer
+from morpheus.config import Config
 from morpheus.config import PipelineModes
 from morpheus.pipeline import LinearPipeline
 from morpheus.stages.general.monitor_stage import MonitorStage
@@ -44,7 +45,7 @@
 @pytest.mark.slow
 @pytest.mark.use_python
 @mock.patch('tritonclient.grpc.InferenceServerClient')
-def test_email_no_cpp(mock_triton_client, config, tmp_path):
+def test_email_no_cpp(mock_triton_client: mock.MagicMock, config: Config, tmp_path: str):
     mock_metadata = {
         "inputs": [{
             "name": "input_ids", "datatype": "INT64", "shape": [-1, FEATURE_LENGTH]
@@ -104,21 +105,23 @@ def test_email_no_cpp(mock_triton_client, config, tmp_path):
     pipe.add_stage(WriteToFileStage(config, filename=out_file, overwrite=False))
 
     pipe.run()
+
     results = calc_error_val(results_file_name)
     assert results.diff_rows == 153
 
 
 @pytest.mark.slow
 @pytest.mark.use_cpp
 @pytest.mark.usefixtures("launch_mock_triton")
-def test_email_cpp(config, tmp_path):
+@pytest.mark.parametrize('num_threads', [1, 4])
+def test_email_cpp(config: Config, tmp_path: str, num_threads: int):
     config.mode = PipelineModes.NLP
     config.class_labels = load_labels_file(os.path.join(TEST_DIRS.data_dir, "labels_phishing.txt"))
     config.model_max_batch_size = MODEL_MAX_BATCH_SIZE
     config.pipeline_batch_size = 1024
     config.feature_length = FEATURE_LENGTH
     config.edge_buffer_size = 128
-    config.num_threads = 1
+    config.num_threads = num_threads
 
     val_file_name = os.path.join(TEST_DIRS.validation_data_dir, 'phishing-email-validation-data.jsonlines')
     vocab_file_name = os.path.join(TEST_DIRS.data_dir, 'bert-base-uncased-hash.txt')
@@ -147,5 +150,7 @@ def test_email_cpp(config, tmp_path):
     pipe.add_stage(WriteToFileStage(config, filename=out_file, overwrite=False))
 
     pipe.run()
-    results = calc_error_val(results_file_name)
-    assert results.diff_rows == 682
+
+    if num_threads == 1:
+        results = calc_error_val(results_file_name)
+        assert results.diff_rows == 682
diff --git a/tests/test_sid.py b/tests/test_sid.py
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
 import os
 from unittest import mock
 
@@ -25,6 +26,7 @@
 from _utils import calc_error_val
 from _utils import compare_class_to_scores
 from _utils import mk_async_infer
+from morpheus.config import Config
 from morpheus.config import CppConfig
 from morpheus.config import PipelineModes
 from morpheus.pipeline import LinearPipeline
@@ -44,7 +46,13 @@
 MODEL_MAX_BATCH_SIZE = 32
 
 
-def _run_minibert_pipeline(config, tmp_path, model_name, truncated, data_col_name: str = "data"):
+def _run_minibert_pipeline(*,
+                           config: Config,
+                           tmp_path: str,
+                           model_name: str,
+                           truncated: bool,
+                           data_col_name: str = "data",
+                           num_threads: int = 1):
     """
     Runs just the Minibert Pipeline
     """
@@ -66,7 +74,7 @@ def _run_minibert_pipeline(config, tmp_path, model_name, truncated, data_col_nam
     config.pipeline_batch_size = 1024
     config.feature_length = FEATURE_LENGTH
     config.edge_buffer_size = 128
-    config.num_threads = 1
+    config.num_threads = num_threads
 
     val_file_name = os.path.join(TEST_DIRS.validation_data_dir, 'sid-validation-data.csv')
     vocab_file_name = os.path.join(TEST_DIRS.data_dir, 'bert-base-uncased-hash.txt')
@@ -100,7 +108,8 @@ def _run_minibert_pipeline(config, tmp_path, model_name, truncated, data_col_nam
                            column=data_col_name))
     pipe.add_stage(
         TritonInferenceStage(config, model_name=model_name, server_url='localhost:8001', force_convert_inputs=True))
-    pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
+    pipe.add_stage(
+        MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf", log_level=logging.INFO))
     pipe.add_stage(AddClassificationsStage(config, threshold=0.5, prefix="si_"))
     pipe.add_stage(AddScoresStage(config, prefix="score_"))
     pipe.add_stage(
@@ -113,7 +122,13 @@ def _run_minibert_pipeline(config, tmp_path, model_name, truncated, data_col_nam
     return calc_error_val(results_file_name)
 
 
-def _run_minibert(config, tmp_path, model_name, truncated, data_col_name: str = "data"):
+def _run_minibert(*,
+                  config: Config,
+                  tmp_path: str,
+                  model_name: str,
+                  truncated: bool,
+                  data_col_name: str = "data",
+                  num_threads: int = 1):
     """
     Runs the minibert pipeline and mocks the Triton Python interface
     """
@@ -145,44 +160,52 @@ def _run_minibert(config, tmp_path, model_name, truncated, data_col_name: str =
         async_infer = mk_async_infer(inf_results)
         mock_triton_client.async_infer.side_effect = async_infer
 
-        return _run_minibert_pipeline(config, tmp_path, model_name, truncated, data_col_name)
+        return _run_minibert_pipeline(config=config,
+                                      tmp_path=tmp_path,
+                                      model_name=model_name,
+                                      truncated=truncated,
+                                      data_col_name=data_col_name,
+                                      num_threads=num_threads)
 
 
 @pytest.mark.slow
 @pytest.mark.use_cpp
 @pytest.mark.usefixtures("launch_mock_triton")
-def test_minibert_no_trunc(config, tmp_path):
+@pytest.mark.parametrize("num_threads", [1, 4])
+def test_minibert_no_trunc(config: Config, tmp_path: str, num_threads: int):
 
-    results = _run_minibert(config, tmp_path, "sid-minibert-onnx-no-trunc", False)
+    results = _run_minibert(config=config,
+                            tmp_path=tmp_path,
+                            model_name="sid-minibert-onnx-no-trunc",
+                            truncated=False,
+                            num_threads=num_threads)
 
-    # Not sure why these are different
-    if (CppConfig.get_should_use_cpp()):
-        assert results.diff_rows == 18
-    else:
-        assert results.diff_rows == 1333
+    # When threading is enabled, the results returned from the mocked Triton server won't match the expected results
+    if num_threads == 1:
+        # Not sure why these are different
+        if (CppConfig.get_should_use_cpp()):
+            assert results.diff_rows == 18
+        else:
+            assert results.diff_rows == 1333
 
 
 @pytest.mark.slow
 @pytest.mark.usefixtures("launch_mock_triton")
-def test_minibert_truncated(config, tmp_path):
-
-    results = _run_minibert(config, tmp_path, 'sid-minibert-onnx', True)
-
-    # Not sure why these are different
-    if (CppConfig.get_should_use_cpp()):
-        assert results.diff_rows == 1204
-    else:
-        assert results.diff_rows == 1333
-
-
-@pytest.mark.slow
-@pytest.mark.usefixtures("launch_mock_triton")
-def test_minibert_data_col_name(config, tmp_path):
-
-    results = _run_minibert(config, tmp_path, 'sid-minibert-onnx', True, "definitely_not_data")
-
-    # Not sure why these are different
-    if (CppConfig.get_should_use_cpp()):
-        assert results.diff_rows == 1204
-    else:
-        assert results.diff_rows == 1333
+@pytest.mark.parametrize("data_col_name", ["data", "definitely_not_data"])
+@pytest.mark.parametrize("num_threads", [1, 4])
+def test_minibert_truncated(config: Config, tmp_path: str, data_col_name: str, num_threads: int):
+
+    results = _run_minibert(config=config,
+                            tmp_path=tmp_path,
+                            model_name='sid-minibert-onnx',
+                            truncated=True,
+                            data_col_name=data_col_name,
+                            num_threads=num_threads)
+
+    # When threading is enabled, the results returned from the mocked Triton server won't match the expected results
+    if num_threads == 1:
+        # Not sure why these are different
+        if (CppConfig.get_should_use_cpp()):
+            assert results.diff_rows == 1204
+        else:
+            assert results.diff_rows == 1333
diff --git a/tests/test_triton_inference_stage.py b/tests/test_triton_inference_stage.py
@@ -152,8 +152,9 @@ def test_stage_get_inference_worker(config: Config, pipeline_mode: PipelineModes
 @pytest.mark.slow
 @pytest.mark.use_python
 @pytest.mark.parametrize('num_records', [1000, 2000, 4000])
+@pytest.mark.parametrize('num_threads', [1, 4, 12])
 @mock.patch('tritonclient.grpc.InferenceServerClient')
-def test_triton_stage_pipe(mock_triton_client, config, num_records):
+def test_triton_stage_pipe(mock_triton_client: mock.MagicMock, config: Config, num_records: int, num_threads: int):
     mock_metadata = {
         "inputs": [{
             'name': 'input__0', 'datatype': 'FP32', "shape": [-1, 1]
@@ -185,7 +186,7 @@ def test_triton_stage_pipe(mock_triton_client, config, num_records):
     config.pipeline_batch_size = 1024
     config.feature_length = 1
     config.edge_buffer_size = 128
-    config.num_threads = 1
+    config.num_threads = num_threads
 
     config.fil = ConfigFIL()
     config.fil.feature_columns = ['v']
@@ -202,4 +203,5 @@ def test_triton_stage_pipe(mock_triton_client, config, num_records):
 
     pipe.run()
 
-    assert_results(comp_stage.get_results())
+    if num_threads == 1:
+        assert_results(comp_stage.get_results())