Skip to content

Commit

Permalink
Migrate dfencoder to morpheus repo (#763)
Browse files Browse the repository at this point in the history
* Copy dfencoder code, ensuring correct copyright headers
* Add a `manual_seed` helper method, replacing duplicated code
* Add tests for dfencoder
* Insert both our Apache license, and the original author's BSD license in each of the 4 python files (and the `__init__.py`). The original code didn't have that, but this conforms with our other 3rd party code inclusions.
* Transfer compiled C++ tests and libs (fixes #765)

fixes #753

Authors:
  - David Gardner (/~https://github.com/dagardner-nv)
  - Michael Demoret (/~https://github.com/mdemoret-nv)

Approvers:
  - Michael Demoret (/~https://github.com/mdemoret-nv)

URL: #763
  • Loading branch information
dagardner-nv authored Mar 23, 2023
1 parent ca66ec0 commit 33e922c
Show file tree
Hide file tree
Showing 44 changed files with 2,643 additions and 224 deletions.
9 changes: 9 additions & 0 deletions ci/scripts/github/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ sccache --version

rapids-logger "Configuring cmake for Morpheus"
git submodule update --init --recursive

cmake -B build -G Ninja ${CMAKE_BUILD_ALL_FEATURES} \
-DCCACHE_PROGRAM_PATH=$(which sccache) \
-DMORPHEUS_PYTHON_BUILD_WHEEL=ON \
Expand All @@ -45,8 +46,16 @@ sccache --show-stats
rapids-logger "Archiving results"
tar cfj "${WORKSPACE_TMP}/wheel.tar.bz" build/dist

MORPHEUS_LIBS=($(find ${MORPHEUS_ROOT}/build/morpheus/_lib -name "*.so" -exec realpath --relative-to ${MORPHEUS_ROOT} {} \;))
tar cfj "${WORKSPACE_TMP}/morhpeus_libs.tar.bz" "${MORPHEUS_LIBS[@]}"

CPP_TESTS=($(find ${MORPHEUS_ROOT}/build/morpheus/_lib/tests -name "*.x" -exec realpath --relative-to ${MORPHEUS_ROOT} {} \;))
tar cfj "${WORKSPACE_TMP}/cpp_tests.tar.bz" "${CPP_TESTS[@]}"

rapids-logger "Pushing results to ${DISPLAY_ARTIFACT_URL}"
aws s3 cp --no-progress "${WORKSPACE_TMP}/wheel.tar.bz" "${ARTIFACT_URL}/wheel.tar.bz"
aws s3 cp --no-progress "${WORKSPACE_TMP}/morhpeus_libs.tar.bz" "${ARTIFACT_URL}/morhpeus_libs.tar.bz"
aws s3 cp --no-progress "${WORKSPACE_TMP}/cpp_tests.tar.bz" "${ARTIFACT_URL}/cpp_tests.tar.bz"

rapids-logger "Success"
exit 0
18 changes: 12 additions & 6 deletions ci/scripts/github/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,20 @@ source ${WORKSPACE}/ci/scripts/github/common.sh
update_conda_env

aws s3 cp --no-progress "${ARTIFACT_URL}/wheel.tar.bz" "${WORKSPACE_TMP}/wheel.tar.bz"
aws s3 cp --no-progress "${ARTIFACT_URL}/cpp_tests.tar.bz" "${WORKSPACE_TMP}/cpp_tests.tar.bz"
aws s3 cp --no-progress "${ARTIFACT_URL}/morhpeus_libs.tar.bz" "${WORKSPACE_TMP}/morhpeus_libs.tar.bz"

tar xf "${WORKSPACE_TMP}/wheel.tar.bz"
tar xf "${WORKSPACE_TMP}/morhpeus_libs.tar.bz"
tar xf "${WORKSPACE_TMP}/cpp_tests.tar.bz"

# Install the built Morpheus python package
cd ${MORPHEUS_ROOT}
pip install ${MORPHEUS_ROOT}/build/dist/*.whl

CPP_TESTS=($(find ${MORPHEUS_ROOT}/build -name "*.x"))

rapids-logger "Pulling LFS assets"
cd ${MORPHEUS_ROOT}

git lfs install
${MORPHEUS_ROOT}/scripts/fetch_data.py fetch tests validation
Expand All @@ -43,6 +47,12 @@ git lfs ls-files
REPORTS_DIR="${WORKSPACE_TMP}/reports"
mkdir -p ${WORKSPACE_TMP}/reports

rapids-logger "Running C++ tests"
# Running the tests from the tests dir. Normally this isn't nescesary, however since
# we are testing the installed version of morpheus in site-packages and not the one
# in the repo dir, the pytest coverage module reports incorrect coverage stats.
pushd ${MORPHEUS_ROOT}/tests

TEST_RESULTS=0
for cpp_test in "${CPP_TESTS[@]}"; do
test_name=$(basename ${cpp_test})
Expand All @@ -57,11 +67,6 @@ for cpp_test in "${CPP_TESTS[@]}"; do
done

rapids-logger "Running Python tests"
# Running the tests from the tests dir. Normally this isn't nescesary, however since
# we are testing the installed version of morpheus in site-packages and not the one
# in the repo dir, the pytest coverage module reports incorrect coverage stats.
cd ${MORPHEUS_ROOT}/tests

set +e

python -I -m pytest --run_slow --run_kafka \
Expand All @@ -74,6 +79,7 @@ PYTEST_RESULTS=$?
TEST_RESULTS=$(($TEST_RESULTS+$PYTEST_RESULTS))

set -e
popd

rapids-logger "Archiving test reports"
cd $(dirname ${REPORTS_DIR})
Expand Down
2 changes: 2 additions & 0 deletions docker/conda/environments/cuda11.8_dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ dependencies:
- cupy=9.5.0
- cython=0.29.24
- datacompy=0.8
- dill
- docker-compose=1.29.2
- docker-py=5.0
- faker=12.3.0
Expand Down Expand Up @@ -81,6 +82,7 @@ dependencies:
- python=3.8
- rapidjson=1.1.0
- scikit-build=0.13
- scikit-learn=0.23.1
- sphinx
- sphinx_rtd_theme
- sysroot_linux-64=2.17
Expand Down
1 change: 0 additions & 1 deletion docker/conda/environments/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

####### Pip-only runtime dependencies (keep sorted!) #######
# Packages listed here should also be listed in setup.py
git+/~https://github.com/nv-morpheus/dfencoder.git@branch-23.01#egg=dfencoder
ipywidgets
jupyter-core>=4.11.2,<5.0
jupyterlab
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
import logging

import mrc
from dfencoder import AutoEncoder
from mrc.core import operators as ops

from morpheus.messages.multi_ae_message import MultiAEMessage
from morpheus.models.dfencoder import AutoEncoder
from morpheus.utils.module_ids import MODULE_NAMESPACE
from morpheus.utils.module_utils import get_module_config
from morpheus.utils.module_utils import register_module
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import mlflow
import mrc
import requests
from dfencoder import AutoEncoder
from mlflow.exceptions import MlflowException
from mlflow.models.signature import ModelSignature
from mlflow.protos.databricks_pb2 import RESOURCE_ALREADY_EXISTS
Expand All @@ -36,6 +35,7 @@

from morpheus.config import Config
from morpheus.messages.multi_ae_message import MultiAEMessage
from morpheus.models.dfencoder import AutoEncoder
from morpheus.pipeline.single_port_stage import SinglePortStage
from morpheus.pipeline.stream_pair import StreamPair

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
import typing

import mrc
from dfencoder import AutoEncoder
from mrc.core import operators as ops
from sklearn.model_selection import train_test_split

from morpheus.config import Config
from morpheus.messages.multi_ae_message import MultiAEMessage
from morpheus.models.dfencoder import AutoEncoder
from morpheus.pipeline.single_port_stage import SinglePortStage
from morpheus.pipeline.stream_pair import StreamPair

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
from datetime import datetime

import mlflow
from dfencoder import AutoEncoder
from mlflow.entities.model_registry import RegisteredModel
from mlflow.exceptions import MlflowException
from mlflow.store.entities.paged_list import PagedList
from mlflow.tracking.client import MlflowClient

from morpheus.models.dfencoder import AutoEncoder

from .logging_timer import log_time

logger = logging.getLogger("morpheus.{}".format(__name__))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,12 @@
import logging
import typing

import numpy as np
import pandas as pd
import torch
from dfencoder import AutoEncoder
from tqdm import tqdm

from morpheus.config import Config
from morpheus.models.dfencoder import AutoEncoder
from morpheus.utils.seed import manual_seed

logger = logging.getLogger("morpheus.{}".format(__name__))

Expand Down Expand Up @@ -132,10 +131,7 @@ def train_from_batch(self, filter_func=lambda df: df):

# If the seed is set, enforce that here
if (self._seed is not None):
torch.manual_seed(self._seed)
torch.cuda.manual_seed(self._seed)
np.random.seed(self._seed)
torch.backends.cudnn.deterministic = True
manual_seed(self._seed)

model = self._model_class(
encoder_layers=[512, 500], # layers of the encoding part
Expand Down Expand Up @@ -215,10 +211,7 @@ def train(self, df: pd.DataFrame) -> AutoEncoder:

# If the seed is set, enforce that here
if (self._seed is not None):
torch.manual_seed(self._seed)
torch.cuda.manual_seed(self._seed)
np.random.seed(self._seed)
torch.backends.cudnn.deterministic = True
manual_seed(self._seed)

model = self._model_class(
encoder_layers=[512, 500], # layers of the encoding part
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
import argparse

import dill
import numpy as np
import pandas as pd
import torch
from dfencoder import AutoEncoder
from morpheus.models.dfencoder import AutoEncoder
from morpheus.utils.seed import manual_seed


def main():
Expand Down Expand Up @@ -84,10 +84,7 @@ def main():
for i in list(X_train):
if i not in list(X_val):
X_train = X_train.drop([i], axis=1)
torch.manual_seed(42)
torch.cuda.manual_seed(42)
np.random.seed(42)
torch.backends.cudnn.deterministic = True
manual_seed(42)
model = AutoEncoder(
encoder_layers=[512, 500], # layers of the encoding part
decoder_layers=[512], # layers of the decoding part
Expand Down
22 changes: 11 additions & 11 deletions models/training-tuning-scripts/dfp-models/hammah-20211017.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@
"import requests\n",
"import os.path\n",
"import torch\n",
"import numpy as np"
"import numpy as np\n",
"\n",
"from morpheus.utils.seed import manual_seed"
]
},
{
Expand Down Expand Up @@ -227,10 +229,7 @@
"outputs": [],
"source": [
"# set seeds for model reproducability\n",
"torch.manual_seed(random_seed)\n",
"torch.cuda.manual_seed(random_seed)\n",
"np.random.seed(random_seed)\n",
"torch.backends.cudnn.deterministic = True"
"manual_seed(random_seed)"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@
"from sklearn.model_selection import train_test_split\n",
"import pandas as pd\n",
"import numpy as np\n",
"import time"
"import time\n",
"\n",
"from morpheus.utils.seed import manual_seed"
]
},
{
Expand Down Expand Up @@ -324,10 +326,7 @@
}
],
"source": [
"torch.manual_seed(random_seed)\n",
"torch.cuda.manual_seed(random_seed)\n",
"np.random.seed(random_seed)\n",
"torch.backends.cudnn.deterministic = True\n",
"manual_seed(random_seed)\n",
"seq_classifier.train_model(X_train[\"log\"], y_train,batch_size=128, epochs=1,learning_rate=3.6e-4)"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,13 @@
import argparse
import time

import numpy as np
import cudf
import pandas as pd
import torch
from binary_sequence_classifier import BinarySequenceClassifier
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

import cudf
from morpheus.utils.seed import manual_seed


def train(trainingdata, unseenerrors):
Expand Down Expand Up @@ -67,10 +66,7 @@ def train(trainingdata, unseenerrors):

seq_classifier.init_model('bert-base-uncased')

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
np.random.seed(random_seed)
torch.backends.cudnn.deterministic = True
manual_seed(random_seed)
seq_classifier.train_model(X_train['log'], y_train, batch_size=128, epochs=1, learning_rate=3.6e-04)

timestr = time.strftime('%Y%m%d-%H%M%S')
Expand Down
6 changes: 6 additions & 0 deletions morpheus/_lib/cmake/libraries/morpheus.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ target_include_directories(morpheus
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
)

# We want to use RUNPATH instead of RPATH to allow LD_LIBRARY_PATH to take precedence over the paths specified in the
# binary. This is necessary to allow ld to find the real libcuda.so instead of the stub. Eventually, this can be removed
# once upgraded to cuda-python 12.1. Ideally, cuda-python would just load libcuda.so.1 which would take precedence over
# libcuda.so. Relavant issue: /~https://github.com/NVIDIA/cuda-python/issues/17
target_link_options(morpheus PUBLIC "-Wl,--enable-new-dtags")

set_target_properties(morpheus
PROPERTIES
CXX_VISIBILITY_PRESET hidden
Expand Down
3 changes: 1 addition & 2 deletions morpheus/messages/multi_ae_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@
import dataclasses
import logging

from dfencoder import AutoEncoder

from morpheus.messages.message_meta import MessageMeta
from morpheus.messages.multi_message import MultiMessage
from morpheus.models.dfencoder import AutoEncoder

logger = logging.getLogger(__name__)

Expand Down
3 changes: 1 addition & 2 deletions morpheus/messages/multi_inference_ae_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,11 @@
import dataclasses
import typing

from dfencoder.autoencoder import AutoEncoder

from morpheus.messages.memory.tensor_memory import TensorMemory
from morpheus.messages.message_meta import MessageMeta
from morpheus.messages.message_meta import UserMessageMeta
from morpheus.messages.multi_inference_message import MultiInferenceMessage
from morpheus.models.dfencoder.autoencoder import AutoEncoder


@dataclasses.dataclass
Expand Down
12 changes: 12 additions & 0 deletions morpheus/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Loading

0 comments on commit 33e922c

Please sign in to comment.