From 61680dc2c91677f6f0db61a28b3c1c8e15c37a56 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 27 Apr 2022 11:11:33 -0700 Subject: [PATCH 01/23] Add/update unittests to check for issue #60 --- tests/test_cli.py | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 0024e131b4..83a90f5a30 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -146,6 +146,7 @@ def test_autocomplete(self, tmp_path): env={'HOME': str(tmp_path)}) assert result.exit_code == 0, result.output + @pytest.mark.usefixtures("chdir_tmpdir") @pytest.mark.replace_callback('pipeline_ae') def test_pipeline_ae(self, config, callback_values): """ @@ -189,6 +190,11 @@ def test_pipeline_ae(self, config, callback_values): config.ae.userid_column_name = "user_col" config.ae.userid_filter = "user321" + with open(os.path.join(TEST_DIRS.data_dir, 'columns_ae.txt')) as fh: + expected_columns = [line.strip() for line in fh] + + assert config.ae.feature_columns == expected_columns + pipe = callback_values['pipe'] assert pipe is not None @@ -329,6 +335,7 @@ def test_pipeline_ae_all(self, config, callback_values, tmp_path): assert to_kafka._kafka_conf['bootstrap.servers'] == 'kserv1:123,kserv2:321' assert to_kafka._output_topic == 'test_topic' + @pytest.mark.usefixtures("chdir_tmpdir") @pytest.mark.replace_callback('pipeline_fil') def test_pipeline_fil(self, config, callback_values, tmp_path): """ @@ -347,6 +354,11 @@ def test_pipeline_fil(self, config, callback_values, tmp_path): assert config.mode == PipelineModes.FIL assert config.class_labels == ["mining"] + with open(os.path.join(TEST_DIRS.data_dir, 'columns_fil.txt')) as fh: + expected_columns = [line.strip() for line in fh] + + assert config.fil.feature_columns == expected_columns + assert config.ae is None pipe = callback_values['pipe'] @@ -516,7 +528,7 @@ def test_pipeline_fil_all(self, config, callback_values, tmp_path, mlflow_uri): assert to_kafka._output_topic == 'test_topic' @pytest.mark.replace_callback('pipeline_nlp') - def test_pipeline_nlp(selff, config, callback_values, tmp_path): + def test_pipeline_nlp(self, config, callback_values, tmp_path): """ Build a pipeline roughly ressembles the phishing validation script """ @@ -745,3 +757,33 @@ def test_pipeline_alias(self, config, callback_values, tmp_path): config = obj["config"] # Ensure our config is populated correctly assert config.mode == PipelineModes.NLP + + @pytest.mark.usefixtures("chdir_tmpdir") + @pytest.mark.replace_callback('pipeline_nlp') + def test_pipeline_nlp_relative_paths(self, config, callback_values, tmp_path): + """ + Build a pipeline roughly ressembles the phishing validation script + """ + + vocab_file_name = os.path.join(TEST_DIRS.data_dir, 'bert-base-uncased-hash.txt') + args = (GENERAL_ARGS + ['pipeline-nlp'] + + FILE_SRC_ARGS + [ + 'deserialize', + 'preprocess', + ] + INF_TRITON_ARGS + MONITOR_ARGS + ['add-class'] + VALIDATE_ARGS + + ['serialize'] + TO_FILE_ARGS) + + obj = {} + runner = CliRunner() + result = runner.invoke(cli.cli, args, obj=obj) + assert result.exit_code == 47, result.output + + # Ensure our config is populated correctly + config = obj["config"] + assert config.class_labels == ["score", "pred"] + + stages = callback_values['stages'] + # Verify the stages are as we expect them, if there is a size-mismatch python will raise a Value error + [file_source, deserialize, process_nlp, triton_inf, monitor, add_class, validation, serialize, to_file] = stages + + assert process_nlp._vocab_hash_file == vocab_file_name From f8400742fb01e9859dacf4c6f5461f002b325e8a Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 27 Apr 2022 11:32:06 -0700 Subject: [PATCH 02/23] Ensure default path values are no longer relative to the current dir, and fix tests --- morpheus/cli.py | 11 ++++++----- tests/test_cli.py | 9 ++++++--- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/morpheus/cli.py b/morpheus/cli.py index 8836a9a5f7..59221bf113 100644 --- a/morpheus/cli.py +++ b/morpheus/cli.py @@ -20,6 +20,7 @@ import click from click.globals import get_current_context +import morpheus from morpheus.config import Config from morpheus.config import ConfigAutoEncoder from morpheus.config import ConfigBase @@ -41,7 +42,7 @@ # List all of the options in from morpheus._lib.file_types.FileTypes without importing the object. This slows down # autocomplete too much. FILE_TYPE_NAMES = ["auto", "csv", "json"] - +MORPHEUS_ROOT = os.environ.get('MORPHEUS_ROOT', os.path.dirname(os.path.dirname(morpheus.__file__))) def str_to_file_type(file_type_str: str): from morpheus._lib.file_types import FileTypes @@ -324,7 +325,7 @@ def run(ctx: click.Context, **kwargs): "do_truncate == False, there will be multiple returned sequences containing the " "overflowing token-ids. Default value is 256")) @click.option('--labels_file', - default="data/labels_nlp.txt", + default=os.path.join(MORPHEUS_ROOT, "data/labels_nlp.txt"), type=click.Path(dir_okay=False, exists=True, file_okay=True), help=("Specifies a file to read labels from in order to convert class IDs into labels. " "A label file is a simple text file where each line corresponds to a label")) @@ -386,7 +387,7 @@ def pipeline_nlp(ctx: click.Context, **kwargs): "A label file is a simple text file where each line corresponds to a label. " "If unspecified, only a single output label is created for FIL")) @click.option('--columns_file', - default="data/columns_fil.txt", + default=os.path.join(MORPHEUS_ROOT, "data/columns_fil.txt"), type=click.Path(dir_okay=False, exists=True, file_okay=True), help=("Specifies a file to read column features.")) @click.option('--viz_file', @@ -449,7 +450,7 @@ def pipeline_fil(ctx: click.Context, **kwargs): cls=AliasedGroup, **command_kwargs) @click.option('--columns_file', - default="data/columns_ae.txt", + default=os.path.join(MORPHEUS_ROOT, "data/columns_ae.txt"), type=click.Path(dir_okay=False, exists=True, file_okay=True), help=("")) @click.option('--labels_file', @@ -827,7 +828,7 @@ def train_ae(ctx: click.Context, **kwargs): @click.command(name="preprocess", short_help="Convert messages to tokens", **command_kwargs) @click.option('--vocab_hash_file', - default="data/bert-base-cased-hash.txt", + default=os.path.join(MORPHEUS_ROOT, "data/bert-base-cased-hash.txt"), type=click.Path(exists=True, dir_okay=False), help=("Path to hash file containing vocabulary of words with token-ids. " "This can be created from the raw vocabulary using the cudf.utils.hash_vocab_utils.hash_vocab " diff --git a/tests/test_cli.py b/tests/test_cli.py index 83a90f5a30..369324aa19 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -762,10 +762,10 @@ def test_pipeline_alias(self, config, callback_values, tmp_path): @pytest.mark.replace_callback('pipeline_nlp') def test_pipeline_nlp_relative_paths(self, config, callback_values, tmp_path): """ - Build a pipeline roughly ressembles the phishing validation script + Ensure that the default paths in the nlp pipeline are valid when run from outside the morpheus repo """ - vocab_file_name = os.path.join(TEST_DIRS.data_dir, 'bert-base-uncased-hash.txt') + vocab_file_name = os.path.join(TEST_DIRS.data_dir, 'bert-base-cased-hash.txt') args = (GENERAL_ARGS + ['pipeline-nlp'] + FILE_SRC_ARGS + [ 'deserialize', @@ -778,9 +778,12 @@ def test_pipeline_nlp_relative_paths(self, config, callback_values, tmp_path): result = runner.invoke(cli.cli, args, obj=obj) assert result.exit_code == 47, result.output + with open(os.path.join(TEST_DIRS.data_dir, 'labels_nlp.txt')) as fh: + expected_labels = [line.strip() for line in fh] + # Ensure our config is populated correctly config = obj["config"] - assert config.class_labels == ["score", "pred"] + assert config.class_labels == expected_labels stages = callback_values['stages'] # Verify the stages are as we expect them, if there is a size-mismatch python will raise a Value error From edc75bd43fceb56c57f0d0ba7f431eb74993951c Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 3 May 2022 15:11:27 -0700 Subject: [PATCH 03/23] Move simple file reads to a helper function --- tests/test_cli.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 369324aa19..1f631c8370 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -121,6 +121,13 @@ def mlflow_uri(tmp_path): @pytest.mark.usefixtures("reload_modules") @pytest.mark.use_python class TestCLI: + def _read_data_file(self, data_file): + """ + Used to read in labels and columns files + """ + with open(data_file) as fh: + return [line.strip() for line in fh] + def test_help(self): runner = CliRunner() @@ -190,9 +197,7 @@ def test_pipeline_ae(self, config, callback_values): config.ae.userid_column_name = "user_col" config.ae.userid_filter = "user321" - with open(os.path.join(TEST_DIRS.data_dir, 'columns_ae.txt')) as fh: - expected_columns = [line.strip() for line in fh] - + expected_columns = self._read_data_file(os.path.join(TEST_DIRS.data_dir, 'columns_ae.txt')) assert config.ae.feature_columns == expected_columns pipe = callback_values['pipe'] @@ -354,9 +359,7 @@ def test_pipeline_fil(self, config, callback_values, tmp_path): assert config.mode == PipelineModes.FIL assert config.class_labels == ["mining"] - with open(os.path.join(TEST_DIRS.data_dir, 'columns_fil.txt')) as fh: - expected_columns = [line.strip() for line in fh] - + expected_columns = self._read_data_file(os.path.join(TEST_DIRS.data_dir, 'columns_fil.txt')) assert config.fil.feature_columns == expected_columns assert config.ae is None @@ -778,8 +781,7 @@ def test_pipeline_nlp_relative_paths(self, config, callback_values, tmp_path): result = runner.invoke(cli.cli, args, obj=obj) assert result.exit_code == 47, result.output - with open(os.path.join(TEST_DIRS.data_dir, 'labels_nlp.txt')) as fh: - expected_labels = [line.strip() for line in fh] + expected_labels = self._read_data_file(os.path.join(TEST_DIRS.data_dir, 'labels_nlp.txt')) # Ensure our config is populated correctly config = obj["config"] From 06fb137d7b67bbc01a391104f1b25b9da25ee403 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 16 May 2022 13:23:20 -0700 Subject: [PATCH 04/23] WIP --- CMakeLists.txt | 5 +++++ MANIFEST.in | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index de73768d02..61345a348a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -136,5 +136,10 @@ install( PATTERN "py.typed" ) +install( + DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/data + DESTINATION ${MORPHEUS_PY_INSTALL_DIR} + COMPONENT Wheel +) list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/MANIFEST.in b/MANIFEST.in index d1eee2cca4..cac426df6a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,3 @@ include versioneer.py -include morpheus/_version.py \ No newline at end of file +include morpheus/_version.py +graft data From c2c467b56c573cbc683a24773f551eb52e8960b8 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 16 May 2022 16:26:43 -0700 Subject: [PATCH 05/23] Move data --- CMakeLists.txt | 4 ++-- MANIFEST.in | 3 ++- morpheus/__init__.py | 2 ++ morpheus/cli.py | 8 ++++---- {data => morpheus/data}/abp_pcap_dump.jsonlines | 0 {data => morpheus/data}/bert-base-cased-hash.txt | 0 {data => morpheus/data}/bert-base-uncased-hash.txt | 0 {data => morpheus/data}/columns_ae.txt | 0 {data => morpheus/data}/columns_fil.txt | 0 {data => morpheus/data}/email.jsonlines | 0 {data => morpheus/data}/labels_ae.txt | 0 {data => morpheus/data}/labels_nlp.txt | 0 {data => morpheus/data}/labels_phishing.txt | 0 {data => morpheus/data}/nvsmi.jsonlines | 0 {data => morpheus/data}/pcap_dump.jsonlines | 0 {data => morpheus/data}/sid_training_data_truth.csv | 0 tests/utils.py | 3 ++- 17 files changed, 12 insertions(+), 8 deletions(-) rename {data => morpheus/data}/abp_pcap_dump.jsonlines (100%) rename {data => morpheus/data}/bert-base-cased-hash.txt (100%) rename {data => morpheus/data}/bert-base-uncased-hash.txt (100%) rename {data => morpheus/data}/columns_ae.txt (100%) rename {data => morpheus/data}/columns_fil.txt (100%) rename {data => morpheus/data}/email.jsonlines (100%) rename {data => morpheus/data}/labels_ae.txt (100%) rename {data => morpheus/data}/labels_nlp.txt (100%) rename {data => morpheus/data}/labels_phishing.txt (100%) rename {data => morpheus/data}/nvsmi.jsonlines (100%) rename {data => morpheus/data}/pcap_dump.jsonlines (100%) rename {data => morpheus/data}/sid_training_data_truth.csv (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 61345a348a..3c236498d2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -137,8 +137,8 @@ install( ) install( - DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/data - DESTINATION ${MORPHEUS_PY_INSTALL_DIR} + DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/morpheus/data + DESTINATION ${MORPHEUS_PY_INSTALL_DIR}/morpheus COMPONENT Wheel ) diff --git a/MANIFEST.in b/MANIFEST.in index cac426df6a..fdb610bc2a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ include versioneer.py include morpheus/_version.py -graft data +include morpheus/data +recursive-include morpheus *.so diff --git a/morpheus/__init__.py b/morpheus/__init__.py index 6ccb842156..e0bc8c6b6c 100644 --- a/morpheus/__init__.py +++ b/morpheus/__init__.py @@ -15,6 +15,7 @@ """ import logging +import os # Create a default null logger to prevent log messages from being propagated to users of this library unless otherwise # configured. Use the `utils.logging` module to configure Morpheus logging @@ -22,4 +23,5 @@ from . import _version +DATA_DIR = os.path.join(os.path.dirname(__file__), "data") __version__ = _version.get_versions()['version'] diff --git a/morpheus/cli.py b/morpheus/cli.py index e777effb10..32b5409599 100644 --- a/morpheus/cli.py +++ b/morpheus/cli.py @@ -325,7 +325,7 @@ def run(ctx: click.Context, **kwargs): "do_truncate == False, there will be multiple returned sequences containing the " "overflowing token-ids. Default value is 256")) @click.option('--labels_file', - default=os.path.join(MORPHEUS_ROOT, "data/labels_nlp.txt"), + default=os.path.join(morpheus.DATA_DIR, "labels_nlp.txt"), type=click.Path(dir_okay=False, exists=True, file_okay=True), help=("Specifies a file to read labels from in order to convert class IDs into labels. " "A label file is a simple text file where each line corresponds to a label")) @@ -387,7 +387,7 @@ def pipeline_nlp(ctx: click.Context, **kwargs): "A label file is a simple text file where each line corresponds to a label. " "If unspecified, only a single output label is created for FIL")) @click.option('--columns_file', - default=os.path.join(MORPHEUS_ROOT, "data/columns_fil.txt"), + default=os.path.join(morpheus.DATA_DIR, "columns_fil.txt"), type=click.Path(dir_okay=False, exists=True, file_okay=True), help=("Specifies a file to read column features.")) @click.option('--viz_file', @@ -450,7 +450,7 @@ def pipeline_fil(ctx: click.Context, **kwargs): cls=AliasedGroup, **command_kwargs) @click.option('--columns_file', - default=os.path.join(MORPHEUS_ROOT, "data/columns_ae.txt"), + default=os.path.join(morpheus.DATA_DIR, "columns_ae.txt"), type=click.Path(dir_okay=False, exists=True, file_okay=True), help=("")) @click.option('--labels_file', @@ -828,7 +828,7 @@ def train_ae(ctx: click.Context, **kwargs): @click.command(name="preprocess", short_help="Convert messages to tokens", **command_kwargs) @click.option('--vocab_hash_file', - default=os.path.join(MORPHEUS_ROOT, "data/bert-base-cased-hash.txt"), + default=os.path.join(morpheus.DATA_DIR, "bert-base-cased-hash.txt"), type=click.Path(exists=True, dir_okay=False), help=("Path to hash file containing vocabulary of words with token-ids. " "This can be created from the raw vocabulary using the cudf.utils.hash_vocab_utils.hash_vocab " diff --git a/data/abp_pcap_dump.jsonlines b/morpheus/data/abp_pcap_dump.jsonlines similarity index 100% rename from data/abp_pcap_dump.jsonlines rename to morpheus/data/abp_pcap_dump.jsonlines diff --git a/data/bert-base-cased-hash.txt b/morpheus/data/bert-base-cased-hash.txt similarity index 100% rename from data/bert-base-cased-hash.txt rename to morpheus/data/bert-base-cased-hash.txt diff --git a/data/bert-base-uncased-hash.txt b/morpheus/data/bert-base-uncased-hash.txt similarity index 100% rename from data/bert-base-uncased-hash.txt rename to morpheus/data/bert-base-uncased-hash.txt diff --git a/data/columns_ae.txt b/morpheus/data/columns_ae.txt similarity index 100% rename from data/columns_ae.txt rename to morpheus/data/columns_ae.txt diff --git a/data/columns_fil.txt b/morpheus/data/columns_fil.txt similarity index 100% rename from data/columns_fil.txt rename to morpheus/data/columns_fil.txt diff --git a/data/email.jsonlines b/morpheus/data/email.jsonlines similarity index 100% rename from data/email.jsonlines rename to morpheus/data/email.jsonlines diff --git a/data/labels_ae.txt b/morpheus/data/labels_ae.txt similarity index 100% rename from data/labels_ae.txt rename to morpheus/data/labels_ae.txt diff --git a/data/labels_nlp.txt b/morpheus/data/labels_nlp.txt similarity index 100% rename from data/labels_nlp.txt rename to morpheus/data/labels_nlp.txt diff --git a/data/labels_phishing.txt b/morpheus/data/labels_phishing.txt similarity index 100% rename from data/labels_phishing.txt rename to morpheus/data/labels_phishing.txt diff --git a/data/nvsmi.jsonlines b/morpheus/data/nvsmi.jsonlines similarity index 100% rename from data/nvsmi.jsonlines rename to morpheus/data/nvsmi.jsonlines diff --git a/data/pcap_dump.jsonlines b/morpheus/data/pcap_dump.jsonlines similarity index 100% rename from data/pcap_dump.jsonlines rename to morpheus/data/pcap_dump.jsonlines diff --git a/data/sid_training_data_truth.csv b/morpheus/data/sid_training_data_truth.csv similarity index 100% rename from data/sid_training_data_truth.csv rename to morpheus/data/sid_training_data_truth.csv diff --git a/tests/utils.py b/tests/utils.py index 1f770e57cd..188c9acedb 100755 --- a/tests/utils.py +++ b/tests/utils.py @@ -17,6 +17,7 @@ import json import os +import morpheus from morpheus._lib.file_types import FileTypes from morpheus.config import Config from morpheus.io.deserializers import read_file_to_df @@ -32,7 +33,7 @@ class TestDirectories(object): def __init__(self, cur_file=__file__) -> None: self.tests_dir = os.path.dirname(cur_file) self.morpheus_root = os.environ.get('MORPHEUS_ROOT', os.path.dirname(self.tests_dir)) - self.data_dir = os.path.join(self.morpheus_root, 'data') + self.data_dir = morpheus.DATA_DIR self.models_dir = os.path.join(self.morpheus_root, 'models') self.datasets_dir = os.path.join(self.models_dir, 'datasets') self.training_data_dir = os.path.join(self.datasets_dir, 'training-data') From ce01a4ad0461d3d73bcb9637c053b0b547a004f8 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 May 2022 08:35:56 -0700 Subject: [PATCH 06/23] Add missing dep for pybind11-stubgen --- docker/conda/environments/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/conda/environments/requirements.txt b/docker/conda/environments/requirements.txt index 9e5f490f1c..0654517c23 100644 --- a/docker/conda/environments/requirements.txt +++ b/docker/conda/environments/requirements.txt @@ -11,6 +11,7 @@ git+https://github.com/efajardo-nv/dfencoder.git@nv-updates#egg=dfencoder grpcio-channelz networkx nvidia-pyindex +pybind11-stubgen==0.10.5 torch==1.10.2+cu113 tqdm tritonclient[all] From 0b6d9594577df4bf14775c4421f8b1dae205381e Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 May 2022 08:37:09 -0700 Subject: [PATCH 07/23] Don't add deps for pybind11 stub files when we aren't doing an inplace build, since we only generate them when doing an inplace build --- .../cmake/utils/python_module_tools.cmake | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/morpheus/cmake/utils/python_module_tools.cmake b/morpheus/cmake/utils/python_module_tools.cmake index 86cbf5c623..3cfc530531 100644 --- a/morpheus/cmake/utils/python_module_tools.cmake +++ b/morpheus/cmake/utils/python_module_tools.cmake @@ -196,26 +196,30 @@ macro(add_python_module MODULE_NAME) # succeed add_dependencies(all_python_targets ${TARGET_NAME}) - # Before installing, create the custom command to generate the stubs - set(pybind11_stub_file "${MODULE_NAME}/__init__.pyi") + if (MORPHEUS_PYTHON_INPLACE_BUILD) + # Before installing, create the custom command to generate the stubs + set(pybind11_stub_file "${MODULE_NAME}/__init__.pyi") + + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${pybind11_stub_file} + COMMAND ${Python3_EXECUTABLE} -m pybind11_stubgen ${TARGET_NAME} --no-setup-py --log-level WARN -o ./ --root-module-suffix \"\" + DEPENDS ${TARGET_NAME} all_python_targets + COMMENT "Building stub for python module ${TARGET_NAME}..." + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + ) - add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${pybind11_stub_file} - COMMAND ${Python3_EXECUTABLE} -m pybind11_stubgen ${TARGET_NAME} --no-setup-py --log-level WARN -o ./ --root-module-suffix \"\" - DEPENDS ${TARGET_NAME} all_python_targets - COMMENT "Building stub for python module ${TARGET_NAME}..." - WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - ) + # Add a custom target to ensure the stub generation runs + add_custom_target(${TARGET_NAME}-stubs ALL + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${pybind11_stub_file} + ) - # Add a custom target to ensure the stub generation runs - add_custom_target(${TARGET_NAME}-stubs ALL - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${pybind11_stub_file} - ) - # Save the output as a target property - set_target_properties(${TARGET_NAME} PROPERTIES RESOURCE "${pybind11_stub_file}") + # Save the output as a target property + set_target_properties(${TARGET_NAME} PROPERTIES RESOURCE "${pybind11_stub_file}") + + unset(pybind11_stub_file) + endif() - unset(pybind11_stub_file) if (PYMOD_INSTALL_DEST) message(STATUS " Install dest: (${TARGET_NAME}) ${PYMOD_INSTALL_DEST}") From 827ee417285ecd4b1215a9d04cca1a360745338f Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 May 2022 09:07:51 -0700 Subject: [PATCH 08/23] Add MANIFEST.in to list of installed files --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c236498d2..793b957fe3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,6 +93,7 @@ set(MORPHEUS_ROOT_PYTHON_FILES "setup.cfg" "setup.py" "versioneer.py" + "MANIFEST.in" ) # Add the root python files to the list From 4ef5624759c0946f62720f88d9853f63ad602371 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 May 2022 09:10:05 -0700 Subject: [PATCH 09/23] Copy data dir, and files previously set by package_data --- MANIFEST.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index fdb610bc2a..c8c2c6df7a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ include versioneer.py include morpheus/_version.py -include morpheus/data -recursive-include morpheus *.so +recursive-include morpheus/data * +recursive-include morpheus *.so py.typed *.pyi From c2c597595b8f56aa9bb337df98cad648d7de4245 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 May 2022 09:11:48 -0700 Subject: [PATCH 10/23] Remove package_data, unfortunately the setuptools docs are vague and unclear on this, but the internet seems to imply that either include_package_data+MANIFEST.in or package_data should be used but not both --- setup.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/setup.py b/setup.py index 61f9d9eb2d..f339083d94 100644 --- a/setup.py +++ b/setup.py @@ -39,9 +39,6 @@ author="NVIDIA Corporation", include_package_data=True, packages=find_packages(include=["morpheus", "morpheus.*"], exclude=['tests']), - package_data={ - "morpheus": ["*.so", "**/*.so", "py.typed", "*.pyi", "**/*.pyi", "**/**/*.pyi"], - }, install_requires=[ "click>=8", "datacompy", From 418635746780dd463de2dc2c425908ee6df287d2 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 May 2022 10:06:43 -0700 Subject: [PATCH 11/23] Remove unused MORPHEUS_ROOT attr --- morpheus/cli.py | 1 - 1 file changed, 1 deletion(-) diff --git a/morpheus/cli.py b/morpheus/cli.py index 32b5409599..5ed503789d 100644 --- a/morpheus/cli.py +++ b/morpheus/cli.py @@ -42,7 +42,6 @@ # List all of the options in from morpheus._lib.file_types.FileTypes without importing the object. This slows down # autocomplete too much. FILE_TYPE_NAMES = ["auto", "csv", "json"] -MORPHEUS_ROOT = os.environ.get('MORPHEUS_ROOT', os.path.dirname(os.path.dirname(morpheus.__file__))) def str_to_file_type(file_type_str: str): from morpheus._lib.file_types import FileTypes From 65473c69a6e710a029ad612fd94cdc710a243f09 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 May 2022 10:21:25 -0700 Subject: [PATCH 12/23] Update path in examples for new data location --- examples/abp_nvsmi_detection/README.md | 4 ++-- examples/abp_pcap_detection/README.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/abp_nvsmi_detection/README.md b/examples/abp_nvsmi_detection/README.md index a0f8e40c71..999868994a 100644 --- a/examples/abp_nvsmi_detection/README.md +++ b/examples/abp_nvsmi_detection/README.md @@ -46,7 +46,7 @@ $ nvidia-smi dmon Each line in the output represents the GPU metrics at a single point in time. As the tool progresses the GPU begins to be utilized and you can see the SM% and Mem% increase as memory is loaded into the GPU and computations are performed. The model we will be using can ingest this information and determine whether or not the GPU is mining cryptocurriences without needing additional information from the host machine. -In this example we will be using the `data/nvsmi.jsonlines` dataset that is known to contain mining behavior profiles. The dataset is in the `.jsonlines` format which means each new line represents an new JSON object. In order to parse this data, it must be ingested, split by lines into individual JSON objects, and parsed into cuDF dataframes. This will all be handled by Morpheus. +In this example we will be using the `morpheus/data/nvsmi.jsonlines` dataset that is known to contain mining behavior profiles. The dataset is in the `.jsonlines` format which means each new line represents an new JSON object. In order to parse this data, it must be ingested, split by lines into individual JSON objects, and parsed into cuDF dataframes. This will all be handled by Morpheus. ## Pipeline Architecture @@ -100,7 +100,7 @@ morpheus --log_level=DEBUG \ `# Run a pipeline with 8 threads and a model batch size of 32 (Must be equal or less than Triton config)` \ run --num_threads=8 --pipeline_batch_size=1024 --model_max_batch_size=1024 \ `# Specify a NLP pipeline with 256 sequence length (Must match Triton config)` \ - pipeline-fil --columns_file=$MORPHEUS_ROOT/data/columns_fil.txt \ + pipeline-fil --model_fea_length 3 \ `# 1st Stage: Read from file` \ from-file --filename=$MORPHEUS_ROOT/data/nvsmi.jsonlines \ `# 2nd Stage: Deserialize from JSON strings to objects` \ diff --git a/examples/abp_pcap_detection/README.md b/examples/abp_pcap_detection/README.md index be2ecd1d0d..654b505b5d 100644 --- a/examples/abp_pcap_detection/README.md +++ b/examples/abp_pcap_detection/README.md @@ -87,7 +87,7 @@ To launch the configured Morpheus pipeline with the sample data that is provided ```bash python run.py \ - --input_file ../../data/abp_pcap_dump.jsonlines \ + --input_file ../../morpheus/data/abp_pcap_dump.jsonlines \ --output_file ./pcap_out.jsonlines \ --model_name 'abp-pcap-xgb' \ --server_url localhost:8001 From 7ae1e30b564fe4b1fce9f10588ce01d166d3a401 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 May 2022 10:52:33 -0700 Subject: [PATCH 13/23] Fix import path --- examples/abp_pcap_detection/abp_pcap_preprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/abp_pcap_detection/abp_pcap_preprocessing.py b/examples/abp_pcap_detection/abp_pcap_preprocessing.py index a11da78ef7..19458642a3 100644 --- a/examples/abp_pcap_detection/abp_pcap_preprocessing.py +++ b/examples/abp_pcap_detection/abp_pcap_preprocessing.py @@ -27,7 +27,7 @@ from morpheus.messages import MultiInferenceFILMessage from morpheus.messages import MultiInferenceMessage from morpheus.messages import MultiMessage -from morpheus.stages.preprocess.preprocessing import PreprocessBaseStage +from morpheus.stages.preprocess.preprocess_base_stage import PreprocessBaseStage class AbpPcapPreprocessingStage(PreprocessBaseStage): From 329a6a6a54e49e403f1ff9381f54fcb0eb79b62c Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 May 2022 11:42:29 -0700 Subject: [PATCH 14/23] Update paths in examples --- examples/abp_nvsmi_detection/README.md | 2 +- examples/abp_pcap_detection/README.md | 2 +- examples/gnn_fraud_detection_pipeline/README.md | 2 +- examples/gnn_fraud_detection_pipeline/run.py | 4 ++-- examples/log_parsing/messages.py | 12 ++++++------ examples/log_parsing/preprocessing.py | 2 +- examples/log_parsing/run.py | 2 +- examples/nlp_si_detection/README.md | 6 +++--- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/examples/abp_nvsmi_detection/README.md b/examples/abp_nvsmi_detection/README.md index 999868994a..bb2160973b 100644 --- a/examples/abp_nvsmi_detection/README.md +++ b/examples/abp_nvsmi_detection/README.md @@ -102,7 +102,7 @@ morpheus --log_level=DEBUG \ `# Specify a NLP pipeline with 256 sequence length (Must match Triton config)` \ pipeline-fil --model_fea_length 3 \ `# 1st Stage: Read from file` \ - from-file --filename=$MORPHEUS_ROOT/data/nvsmi.jsonlines \ + from-file --filename=$MORPHEUS_ROOT/morpheus/data/nvsmi.jsonlines \ `# 2nd Stage: Deserialize from JSON strings to objects` \ deserialize \ `# 3rd Stage: Preprocessing converts the input data into BERT tokens` \ diff --git a/examples/abp_pcap_detection/README.md b/examples/abp_pcap_detection/README.md index 654b505b5d..7c478afd0e 100644 --- a/examples/abp_pcap_detection/README.md +++ b/examples/abp_pcap_detection/README.md @@ -83,7 +83,7 @@ Options: --help Show this message and exit. ``` -To launch the configured Morpheus pipeline with the sample data that is provided at `/data`, run the following: +To launch the configured Morpheus pipeline with the sample data that is provided at `/morpheus/data`, run the following: ```bash python run.py \ diff --git a/examples/gnn_fraud_detection_pipeline/README.md b/examples/gnn_fraud_detection_pipeline/README.md index 44961b173f..6bb3d3e4ba 100644 --- a/examples/gnn_fraud_detection_pipeline/README.md +++ b/examples/gnn_fraud_detection_pipeline/README.md @@ -57,7 +57,7 @@ To launch the configured Morpheus pipeline with the sample data that is provided ```bash -python run.py +python run.py ====Building Pipeline==== Added source: └─> morpheus.MessageMeta diff --git a/examples/gnn_fraud_detection_pipeline/run.py b/examples/gnn_fraud_detection_pipeline/run.py index 1f17ed88e6..3e9b915aa5 100644 --- a/examples/gnn_fraud_detection_pipeline/run.py +++ b/examples/gnn_fraud_detection_pipeline/run.py @@ -13,9 +13,9 @@ # limitations under the License. import logging +import os import click -import psutil from morpheus.config import Config from morpheus.config import CppConfig @@ -35,7 +35,7 @@ @click.command() @click.option( "--num_threads", - default=psutil.cpu_count(), + default=os.cpu_count(), type=click.IntRange(min=1), help="Number of internal pipeline threads to use", ) diff --git a/examples/log_parsing/messages.py b/examples/log_parsing/messages.py index 796e1f84d4..00cf406976 100644 --- a/examples/log_parsing/messages.py +++ b/examples/log_parsing/messages.py @@ -16,15 +16,15 @@ import cupy as cp -from morpheus.messages import DataClassProp -from morpheus.messages import InferenceMemory from morpheus.messages import MultiInferenceMessage from morpheus.messages import MultiResponseMessage from morpheus.messages import ResponseMemory -from morpheus.messages import get_input -from morpheus.messages import get_output -from morpheus.messages import set_input -from morpheus.messages import set_output +from morpheus.messages.data_class_prop import DataClassProp +from morpheus.messages.multi_inference_message import InferenceMemory +from morpheus.messages.multi_inference_message import get_input +from morpheus.messages.multi_inference_message import set_input +from morpheus.messages.multi_response_message import get_output +from morpheus.messages.multi_response_message import set_output @dataclasses.dataclass diff --git a/examples/log_parsing/preprocessing.py b/examples/log_parsing/preprocessing.py index b8fe674872..e6be76f051 100644 --- a/examples/log_parsing/preprocessing.py +++ b/examples/log_parsing/preprocessing.py @@ -26,7 +26,7 @@ from morpheus.messages import MultiInferenceMessage from morpheus.messages import MultiInferenceNLPMessage from morpheus.messages import MultiMessage -from morpheus.stages.preprocess.preprocessing import PreprocessBaseStage +from morpheus.stages.preprocess.preprocess_base_stage import PreprocessBaseStage from morpheus.utils.cudf_subword_helper import tokenize_text_series diff --git a/examples/log_parsing/run.py b/examples/log_parsing/run.py index d145ece2bd..a944b3e91c 100644 --- a/examples/log_parsing/run.py +++ b/examples/log_parsing/run.py @@ -23,7 +23,7 @@ from morpheus.config import CppConfig from morpheus.config import PipelineModes from morpheus.pipeline import LinearPipeline -from morpheus.stages.general.general_stages import BufferStage +from morpheus.stages.general.buffer_stage import BufferStage from morpheus.stages.general.monitor_stage import MonitorStage from morpheus.stages.input.file_source_stage import FileSourceStage from morpheus.stages.output.write_to_file_stage import WriteToFileStage diff --git a/examples/nlp_si_detection/README.md b/examples/nlp_si_detection/README.md index 5fe1eca108..9bab4cb433 100644 --- a/examples/nlp_si_detection/README.md +++ b/examples/nlp_si_detection/README.md @@ -108,13 +108,13 @@ morpheus --debug --log_level=DEBUG \ `# Run a pipeline with 8 threads and a model batch size of 32 (Must match Triton config)` \ run --num_threads=8 --pipeline_batch_size=1024 --model_max_batch_size=32 \ `# Specify a NLP pipeline with 256 sequence length (Must match Triton config)` \ - pipeline-nlp --model_seq_length=256 --labels_file=$MORPHEUS_ROOT/data/labels_nlp.txt \ + pipeline-nlp --model_seq_length=256 \ `# 1st Stage: Read from file` \ - from-file --filename=$MORPHEUS_ROOT/data/pcap_dump.jsonlines \ + from-file --filename=$MORPHEUS_ROOT/morpheus/data/pcap_dump.jsonlines \ `# 2nd Stage: Deserialize from JSON strings to objects` \ deserialize \ `# 3rd Stage: Preprocessing converts the input data into BERT tokens` \ - preprocess --vocab_hash_file=$MORPHEUS_ROOT/data/bert-base-uncased-hash.txt --do_lower_case=True --truncation=True \ + preprocess --vocab_hash_file=$MORPHEUS_ROOT/morpheus/data/bert-base-uncased-hash.txt --do_lower_case=True --truncation=True \ `# 4th Stage: Send messages to Triton for inference. Specify the model loaded in Setup` \ inf-triton --model_name=sid-minibert-onnx --server_url=localhost:8001 --force_convert_inputs=True \ `# 5th Stage: Monitor stage prints throughput information to the console` \ From 405b539a96a1b600280f5ef4f9366bd2211be657 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 May 2022 11:59:16 -0700 Subject: [PATCH 15/23] Update data path in docs --- docs/source/basics/examples.rst | 10 ++++----- docs/source/morpheus_quickstart_guide.md | 28 ++++++++++++------------ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/source/basics/examples.rst b/docs/source/basics/examples.rst index 78950e7d7f..db1bff25d0 100644 --- a/docs/source/basics/examples.rst +++ b/docs/source/basics/examples.rst @@ -35,7 +35,7 @@ This example will copy the values from Kafka into ``out.jsonlines``. Remove Fields from JSON Objects ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -This example will only copy the fiels 'timestamp', 'src_ip' and 'dest_ip' from ``data/pcap_dump.jsonlines`` to +This example will only copy the fiels 'timestamp', 'src_ip' and 'dest_ip' from ``morpheus/data/pcap_dump.jsonlines`` to ``out.jsonlines``. .. image:: img/remove_fields_from_json_objects.png @@ -43,7 +43,7 @@ This example will only copy the fiels 'timestamp', 'src_ip' and 'dest_ip' from ` .. code-block:: bash morpheus run pipeline-nlp --viz_file=basic_usage_img/remove_fields_from_json_objects.png \ - from-file --filename data/pcap_dump.jsonlines \ + from-file --filename morpheus/data/pcap_dump.jsonlines \ deserialize \ serialize --include 'timestamp' --include 'src_ip' --include 'dest_ip' \ to-file --filename out.jsonlines @@ -58,7 +58,7 @@ This example will report the throughput on the command line. .. code-block:: console $ morpheus run pipeline-nlp --viz_file=basic_usage_img/monitor_throughput.png \ - from-file --filename data/pcap_dump.jsonlines \ + from-file --filename morpheus/data/pcap_dump.jsonlines \ deserialize \ monitor --description "Lines Throughput" --smoothing 0.1 --unit "lines" \ serialize \ @@ -79,7 +79,7 @@ decouple one stage from the next. Without the buffers, all montioring would show .. code-block:: console $ morpheus run pipeline-nlp --viz_file=basic_usage_img/multi_monitor_throughput.png \ - from-file --filename data/pcap_dump.jsonlines \ + from-file --filename morpheus/data/pcap_dump.jsonlines \ monitor --description "From File Throughput" \ buffer \ deserialize \ @@ -107,7 +107,7 @@ This example shows an NLP Pipeline which uses most stages available in Morpheus. $ morpheus run --num_threads=8 --pipeline_batch_size=1024 --model_max_batch_size=32 \ pipeline-nlp --viz_file=basic_usage_img/nlp_kitchen_sink.png \ - from-file --filename data/pcap_dump.jsonlines \ + from-file --filename morpheus/data/pcap_dump.jsonlines \ buffer --count=500 \ deserialize \ preprocess \ diff --git a/docs/source/morpheus_quickstart_guide.md b/docs/source/morpheus_quickstart_guide.md index c8f7a41d68..4f433aaf89 100644 --- a/docs/source/morpheus_quickstart_guide.md +++ b/docs/source/morpheus_quickstart_guide.md @@ -558,11 +558,11 @@ $ helm install --set ngc.apiKey="$API_KEY" \ --use_cpp=True \ pipeline-nlp \ --model_seq_length=128 \ - --labels_file=./data/labels_phishing.txt \ - from-file --filename=./data/email.jsonlines \ + --labels_file=./morpheus/data/labels_phishing.txt \ + from-file --filename=./morpheus/data/email.jsonlines \ monitor --description 'FromFile Rate' --smoothing=0.001 \ deserialize \ - preprocess --vocab_hash_file=./data/bert-base-uncased-hash.txt --truncation=True --do_lower_case=True --add_special_tokens=False \ + preprocess --vocab_hash_file=./morpheus/data/bert-base-uncased-hash.txt --truncation=True --do_lower_case=True --add_special_tokens=False \ monitor --description 'Preprocess Rate' \ inf-triton --model_name=phishing-bert-onnx --server_url=ai-engine:8001 --force_convert_inputs=True \ monitor --description 'Inference Rate' --smoothing=0.001 --unit inf \ @@ -588,11 +588,11 @@ $ helm install --set ngc.apiKey="$API_KEY" \ --use_cpp=True \ pipeline-nlp \ --model_seq_length=128 \ - --labels_file=./data/labels_phishing.txt \ + --labels_file=./morpheus/data/labels_phishing.txt \ from-kafka --input_topic --bootstrap_servers broker:9092 \ monitor --description 'FromKafka Rate' --smoothing=0.001 \ deserialize \ - preprocess --vocab_hash_file=./data/bert-base-uncased-hash.txt --truncation=True --do_lower_case=True --add_special_tokens=False \ + preprocess --vocab_hash_file=./morpheus/data/bert-base-uncased-hash.txt --truncation=True --do_lower_case=True --add_special_tokens=False \ monitor --description 'Preprocess Rate' \ inf-triton --force_convert_inputs=True --model_name=phishing-bert-onnx --server_url=ai-engine:8001 \ monitor --description='Inference Rate' --smoothing=0.001 --unit inf \ @@ -635,10 +635,10 @@ $ helm install --set ngc.apiKey="$API_KEY" \ --model_max_batch_size=32 \ pipeline-nlp \ --model_seq_length=256 \ - from-file --filename=./data/pcap_dump.jsonlines \ + from-file --filename=./morpheus/data/pcap_dump.jsonlines \ monitor --description 'FromFile Rate' --smoothing=0.001 \ deserialize \ - preprocess --vocab_hash_file=./data/bert-base-uncased-hash.txt --truncation=True --do_lower_case=True --add_special_tokens=False \ + preprocess --vocab_hash_file=./morpheus/data/bert-base-uncased-hash.txt --truncation=True --do_lower_case=True --add_special_tokens=False \ monitor --description='Preprocessing rate' \ inf-triton --force_convert_inputs=True --model_name=sid-minibert-onnx --server_url=ai-engine:8001 \ monitor --description='Inference rate' --smoothing=0.001 --unit inf \ @@ -667,7 +667,7 @@ $ helm install --set ngc.apiKey="$API_KEY" \ from-kafka --input_topic --bootstrap_servers broker:9092 \ monitor --description 'FromKafka Rate' --smoothing=0.001 \ deserialize \ - preprocess --vocab_hash_file=./data/bert-base-uncased-hash.txt --truncation=True --do_lower_case=True --add_special_tokens=False \ + preprocess --vocab_hash_file=./morpheus/data/bert-base-uncased-hash.txt --truncation=True --do_lower_case=True --add_special_tokens=False \ monitor --description='Preprocessing Rate' \ inf-triton --force_convert_inputs=True --model_name=sid-minibert-onnx --server_url=ai-engine:8001 \ monitor --description='Inference Rate' --smoothing=0.001 --unit inf \ @@ -685,7 +685,7 @@ Make sure you create input and output Kafka topics before you start the pipeline $ kubectl -n $NAMESPACE exec -it deploy/broker -c broker -- kafka-console-producer.sh \ --broker-list broker:9092 \ --topic < \ - + ``` **Note**: This should be used for development purposes only via this developer kit. Loading from the file into Kafka should not be used in production deployments of Morpheus. @@ -708,7 +708,7 @@ $ helm install --set ngc.apiKey="$API_KEY" \ --model_max_batch_size=64 \ --use_cpp=True \ pipeline-fil \ - from-file --filename=./data/nvsmi.jsonlines \ + from-file --filename=./morpheus/data/nvsmi.jsonlines \ monitor --description 'FromFile Rate' --smoothing=0.001 \ deserialize \ preprocess \ @@ -754,7 +754,7 @@ Make sure you create input and output Kafka topics before you start the pipeline $ kubectl -n $NAMESPACE exec -it deploy/broker -c broker -- kafka-console-producer.sh \ --broker-list broker:9092 \ --topic < \ - + ``` **Note**: This should be used for development purposes only via this developer kit. Loading from the file into Kafka should not be used in production deployments of Morpheus. @@ -937,7 +937,7 @@ Options: order to convert class IDs into labels. A label file is a simple text file where each line corresponds to a label [default: - data/labels_nlp.txt] + morpheus/data/labels_nlp.txt] --viz_file FILE Save a visualization of the pipeline at the specified location --help Show this message and exit. @@ -1000,7 +1000,7 @@ Options: only a single output label is created for FIL --columns_file FILE Specifies a file to read column features. - [default: data/columns_fil.txt] + [default: morpheus/data/columns_fil.txt] --viz_file FILE Save a visualization of the pipeline at the specified location --help Show this message and exit. @@ -1052,7 +1052,7 @@ Usage: morpheus run pipeline-ae [OPTIONS] COMMAND1 [ARGS]... [COMMAND2 4. The following stages must come after an inference stage: `add-class`, `filter`, `gen-viz` Options: - --columns_file FILE [default: data/columns_ae.txt] + --columns_file FILE [default: morpheus/data/columns_ae.txt] --labels_file FILE Specifies a file to read labels from in order to convert class IDs into labels. A label file is a simple text file where each line corresponds to a From 1c7f42100cfdf96062c18898efbbee42b1eadded Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 May 2022 12:07:03 -0700 Subject: [PATCH 16/23] fix path --- docs/source/developer_guide/guides/2_real_world_phishing.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/developer_guide/guides/2_real_world_phishing.md b/docs/source/developer_guide/guides/2_real_world_phishing.md index 777ea75f94..92c23cba2e 100644 --- a/docs/source/developer_guide/guides/2_real_world_phishing.md +++ b/docs/source/developer_guide/guides/2_real_world_phishing.md @@ -211,9 +211,9 @@ We set our pipeline mode to NLP. Next, we use the third-party [psutils](https:// The `feature_length` property needs to match the length of the model inputs, which we queried from the model's config endpoint in the previous section. -Ground truth classification labels are read from the `data/labels_phishing.txt` file included in Morpheus. +Ground truth classification labels are read from the `morpheus/data/labels_phishing.txt` file included in Morpheus. -Now that our config object is populated we move on to the pipeline itself. We are using the same input file from the previous examples, and to tokenize the input data we add Morpheus' `PreprocessNLPStage` with the `data/bert-base-uncased-hash.txt` vocabulary file. +Now that our config object is populated we move on to the pipeline itself. We are using the same input file from the previous examples, and to tokenize the input data we add Morpheus' `PreprocessNLPStage` with the `morpheus/data/bert-base-uncased-hash.txt` vocabulary file. ```python pipeline.add_stage( From c0d5281ff88379cd03df2068454d9afb289b76d6 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 May 2022 12:38:47 -0700 Subject: [PATCH 17/23] Update lfs to reflect data dir move --- .gitattributes | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitattributes b/.gitattributes index 325c1bfad8..2bc2023eb4 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,5 @@ morpheus/_version.py export-subst data/* filter=lfs diff=lfs merge=lfs -text +morpheus/data/* filter=lfs diff=lfs merge=lfs -text tests/expected_data/* filter=lfs diff=lfs merge=lfs -text tests/mock_triton_server/payloads/** filter=lfs diff=lfs merge=lfs -text From ce37b333160446b2b9604bcd5cf6dfe1ac055385 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 May 2022 12:39:56 -0700 Subject: [PATCH 18/23] Remove unneded fea_length --- examples/abp_nvsmi_detection/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/abp_nvsmi_detection/README.md b/examples/abp_nvsmi_detection/README.md index bb2160973b..7a52fad963 100644 --- a/examples/abp_nvsmi_detection/README.md +++ b/examples/abp_nvsmi_detection/README.md @@ -100,7 +100,7 @@ morpheus --log_level=DEBUG \ `# Run a pipeline with 8 threads and a model batch size of 32 (Must be equal or less than Triton config)` \ run --num_threads=8 --pipeline_batch_size=1024 --model_max_batch_size=1024 \ `# Specify a NLP pipeline with 256 sequence length (Must match Triton config)` \ - pipeline-fil --model_fea_length 3 \ + pipeline-fil \ `# 1st Stage: Read from file` \ from-file --filename=$MORPHEUS_ROOT/morpheus/data/nvsmi.jsonlines \ `# 2nd Stage: Deserialize from JSON strings to objects` \ From 61ebfcfc52ae7b920cc1d5a69f1386ca65863e0a Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 18 May 2022 09:28:11 -0700 Subject: [PATCH 19/23] Style fixes --- morpheus/cli.py | 1 + tests/test_cli.py | 12 +++++------- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/morpheus/cli.py b/morpheus/cli.py index 5ed503789d..c6ce7d3dfc 100644 --- a/morpheus/cli.py +++ b/morpheus/cli.py @@ -43,6 +43,7 @@ # autocomplete too much. FILE_TYPE_NAMES = ["auto", "csv", "json"] + def str_to_file_type(file_type_str: str): from morpheus._lib.file_types import FileTypes file_type_members = {name.lower(): t for (name, t) in FileTypes.__members__.items()} diff --git a/tests/test_cli.py b/tests/test_cli.py index 1e2dd238f4..718b167485 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -121,6 +121,7 @@ def mlflow_uri(tmp_path): @pytest.mark.usefixtures("reload_modules") @pytest.mark.use_python class TestCLI: + def _read_data_file(self, data_file): """ Used to read in labels and columns files @@ -128,7 +129,6 @@ def _read_data_file(self, data_file): with open(data_file) as fh: return [line.strip() for line in fh] - def test_help(self): runner = CliRunner() result = runner.invoke(cli.cli, ['--help']) @@ -769,12 +769,10 @@ def test_pipeline_nlp_relative_paths(self, config, callback_values, tmp_path): """ vocab_file_name = os.path.join(TEST_DIRS.data_dir, 'bert-base-cased-hash.txt') - args = (GENERAL_ARGS + ['pipeline-nlp'] + - FILE_SRC_ARGS + [ - 'deserialize', - 'preprocess', - ] + INF_TRITON_ARGS + MONITOR_ARGS + ['add-class'] + VALIDATE_ARGS + - ['serialize'] + TO_FILE_ARGS) + args = (GENERAL_ARGS + ['pipeline-nlp'] + FILE_SRC_ARGS + [ + 'deserialize', + 'preprocess', + ] + INF_TRITON_ARGS + MONITOR_ARGS + ['add-class'] + VALIDATE_ARGS + ['serialize'] + TO_FILE_ARGS) obj = {} runner = CliRunner() From 5a84ff2910672eac7b65715a17b4729c46284979 Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Wed, 18 May 2022 14:17:04 -0700 Subject: [PATCH 20/23] Update docs/source/basics/examples.rst Co-authored-by: Christopher Harris --- docs/source/basics/examples.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/basics/examples.rst b/docs/source/basics/examples.rst index db1bff25d0..7502bb5c51 100644 --- a/docs/source/basics/examples.rst +++ b/docs/source/basics/examples.rst @@ -35,7 +35,7 @@ This example will copy the values from Kafka into ``out.jsonlines``. Remove Fields from JSON Objects ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -This example will only copy the fiels 'timestamp', 'src_ip' and 'dest_ip' from ``morpheus/data/pcap_dump.jsonlines`` to +This example will only copy the fields 'timestamp', 'src_ip' and 'dest_ip' from ``morpheus/data/pcap_dump.jsonlines`` to ``out.jsonlines``. .. image:: img/remove_fields_from_json_objects.png From f59dcacca0dbd36565bab182a27bf03b61b65da7 Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Mon, 23 May 2022 17:45:47 -0600 Subject: [PATCH 21/23] Fixing non-inplace builds install of stub files --- .../cmake/utils/python_module_tools.cmake | 49 +++++++++---------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/morpheus/cmake/utils/python_module_tools.cmake b/morpheus/cmake/utils/python_module_tools.cmake index 3cfc530531..99dfddc094 100644 --- a/morpheus/cmake/utils/python_module_tools.cmake +++ b/morpheus/cmake/utils/python_module_tools.cmake @@ -35,13 +35,16 @@ function(inplace_build_copy TARGET_NAME INPLACE_DIR) # Create the copy command for each resource foreach(resource ${target_resources}) + # Get the relative path to the build directory + file(RELATIVE_PATH relative_resource ${target_build_dir} ${resource}) + add_custom_command( - OUTPUT ${INPLACE_DIR}/${resource} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${target_build_dir}/${resource} ${INPLACE_DIR}/${resource} - DEPENDS ${target_build_dir}/${resource} - COMMENT "Copying stub ${target_build_dir}/${resource} to ${INPLACE_DIR}/${resource}" + OUTPUT ${INPLACE_DIR}/${relative_resource} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${resource} ${INPLACE_DIR}/${relative_resource} + DEPENDS ${resource} + COMMENT "Copying stub ${resource} to ${INPLACE_DIR}/${relative_resource}" ) - list(APPEND resource_outputs ${INPLACE_DIR}/${resource}) + list(APPEND resource_outputs ${INPLACE_DIR}/${relative_resource}) endforeach() # Final target to depend on the copied files @@ -196,30 +199,26 @@ macro(add_python_module MODULE_NAME) # succeed add_dependencies(all_python_targets ${TARGET_NAME}) - if (MORPHEUS_PYTHON_INPLACE_BUILD) - # Before installing, create the custom command to generate the stubs - set(pybind11_stub_file "${MODULE_NAME}/__init__.pyi") - - add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${pybind11_stub_file} - COMMAND ${Python3_EXECUTABLE} -m pybind11_stubgen ${TARGET_NAME} --no-setup-py --log-level WARN -o ./ --root-module-suffix \"\" - DEPENDS ${TARGET_NAME} all_python_targets - COMMENT "Building stub for python module ${TARGET_NAME}..." - WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - ) - - # Add a custom target to ensure the stub generation runs - add_custom_target(${TARGET_NAME}-stubs ALL - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${pybind11_stub_file} - ) + # Before installing, create the custom command to generate the stubs + set(pybind11_stub_file "${CMAKE_CURRENT_BINARY_DIR}/${MODULE_NAME}/__init__.pyi") + add_custom_command( + OUTPUT ${pybind11_stub_file} + COMMAND ${Python3_EXECUTABLE} -m pybind11_stubgen ${TARGET_NAME} --no-setup-py --log-level WARN -o ./ --root-module-suffix \"\" + DEPENDS ${TARGET_NAME} all_python_targets + COMMENT "Building stub for python module ${TARGET_NAME}..." + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + ) - # Save the output as a target property - set_target_properties(${TARGET_NAME} PROPERTIES RESOURCE "${pybind11_stub_file}") + # Add a custom target to ensure the stub generation runs + add_custom_target(${TARGET_NAME}-stubs ALL + DEPENDS ${pybind11_stub_file} + ) - unset(pybind11_stub_file) - endif() + # Save the output as a target property + set_target_properties(${TARGET_NAME} PROPERTIES RESOURCE "${pybind11_stub_file}") + unset(pybind11_stub_file) if (PYMOD_INSTALL_DEST) message(STATUS " Install dest: (${TARGET_NAME}) ${PYMOD_INSTALL_DEST}") From 780180390c57c48c1d30a8e2668d83fa108125a7 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 23 May 2022 16:54:45 -0700 Subject: [PATCH 22/23] Move data into previous install command --- CMakeLists.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 793b957fe3..847ada9307 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -135,12 +135,8 @@ install( FILES_MATCHING PATTERN "*.py" PATTERN "py.typed" + PATTERN "data/*" ) -install( - DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/morpheus/data - DESTINATION ${MORPHEUS_PY_INSTALL_DIR}/morpheus - COMPONENT Wheel -) list(POP_BACK CMAKE_MESSAGE_CONTEXT) From 798953a46527d2c3212f766146400b65e1b35338 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 23 May 2022 16:56:40 -0700 Subject: [PATCH 23/23] Remove lfs filter for old data location --- .gitattributes | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index 2bc2023eb4..c9e36e9a1e 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,5 +1,4 @@ morpheus/_version.py export-subst -data/* filter=lfs diff=lfs merge=lfs -text morpheus/data/* filter=lfs diff=lfs merge=lfs -text tests/expected_data/* filter=lfs diff=lfs merge=lfs -text tests/mock_triton_server/payloads/** filter=lfs diff=lfs merge=lfs -text