Skip to content

Commit

Permalink
Merge pull request #1359 from nv-morpheus/fea-sherlock
Browse files Browse the repository at this point in the history
Merge fea-sherlock feature branch into branch-23.11
  • Loading branch information
mdemoret-nv authored Nov 22, 2023
2 parents 401ad6f + a343cd4 commit 4806119
Show file tree
Hide file tree
Showing 212 changed files with 15,924 additions and 1,321 deletions.
2 changes: 2 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ AllowShortBlocksOnASingleLine: true
AllowShortCaseLabelsOnASingleLine: false # Allows placing breakpoint
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Empty
AllowShortLoopsOnASingleLine: false
# This is deprecated
AlwaysBreakAfterDefinitionReturnType: None
Expand Down Expand Up @@ -103,6 +104,7 @@ ObjCBinPackProtocolList: Never
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PackConstructorInitializers: CurrentLine
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
Expand Down
8 changes: 4 additions & 4 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
docs/source/developer_guide/guides/img/** filter=lfs diff=lfs merge=lfs -text
docs/source/img/* filter=lfs diff=lfs merge=lfs -text
examples/basic_usage/img/** filter=lfs diff=lfs merge=lfs -text
examples/data/** filter=lfs diff=lfs merge=lfs -text
examples/digital_fingerprinting/production/grafana/img/** filter=lfs diff=lfs merge=lfs -text
examples/digital_fingerprinting/visualization/img/** filter=lfs diff=lfs merge=lfs -text
git filter=lfs diff=lfs merge=lfs -text
morpheus/_version.py export-subst
status filter=lfs diff=lfs merge=lfs -text
tests/mock_triton_server/payloads/** filter=lfs diff=lfs merge=lfs -text
tests/tests_data/** filter=lfs diff=lfs merge=lfs -text
examples/basic_usage/img/** filter=lfs diff=lfs merge=lfs -text
docs/source/img/* filter=lfs diff=lfs merge=lfs -text
git filter=lfs diff=lfs merge=lfs -text
status filter=lfs diff=lfs merge=lfs -text
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ if(MORPHEUS_ENABLE_DEBUG_INFO)

morpheus_utils_print_target_properties(
TARGETS
morpheus
morpheus morpheus._lib.llm
WRITE_TO_FILE
)

Expand Down
3 changes: 2 additions & 1 deletion ci/iwyu/mappings.imp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@
{ "include": [ "<google/protobuf/repeated_ptr_field.h>", private, "<google/protobuf/repeated_field.h>", "public" ] },

# pybind11
{ "include": [ "<pybind11/detail/common.h>", private, "<pybind11/pytypes.h>", "public" ] },
{ "include": [ "<pybind11/detail/common.h>", "private", "<pybind11/pytypes.h>", "public" ] },
{ "include": [ "<pybind11/cast.h>", "private", "<pybind11/pybind11.h>", "public" ] },

# rxcpp
# Hide includes that are exported by <rxcpp/rx.hpp>
Expand Down
6 changes: 4 additions & 2 deletions ci/scripts/cpp_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,10 @@ if [[ -n "${MORPHEUS_MODIFIED_FILES}" ]]; then

CLANG_TIDY_DIFF=$(find_clang_tidy_diff)

# Run using a clang-tidy wrapper to allow warnings-as-errors and to eliminate any output except errors (since clang-tidy-diff.py doesn't return the correct error codes)
CLANG_TIDY_OUTPUT=`get_unified_diff ${CPP_FILE_REGEX} | ${CLANG_TIDY_DIFF} -j 0 -path ${BUILD_DIR} -p1 -quiet 2>&1`
# Run using a clang-tidy wrapper to allow warnings-as-errors and to eliminate any output except errors (since
# clang-tidy-diff.py doesn't return the correct error codes)
CLANG_TIDY_OUTPUT=`get_unified_diff ${CPP_FILE_REGEX} | ${CLANG_TIDY_DIFF} \
-extra-arg="-Wno-ignored-optimization-argument" -j 0 -path ${BUILD_DIR} -p1 -quiet 2>&1`

if [[ -n "${CLANG_TIDY_OUTPUT}" && ${CLANG_TIDY_OUTPUT} != "No relevant changes found." ]]; then
CLANG_TIDY_RETVAL=1
Expand Down
2 changes: 1 addition & 1 deletion ci/scripts/github/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

function print_env_vars() {
rapids-logger "Environ:"
env | grep -v -E "AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|GH_TOKEN" | sort
env | grep -v -E "AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|GH_TOKEN|NGC_API_KEY" | sort
}

rapids-logger "Env Setup"
Expand Down
2 changes: 1 addition & 1 deletion ci/scripts/github/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ done
rapids-logger "Running Python tests"
set +e

python -I -m pytest --run_slow --run_kafka --fail_missing \
python -I -m pytest --run_slow --run_kafka --run_milvus --fail_missing \
--junit-xml=${REPORTS_DIR}/report_pytest.xml \
--cov=morpheus \
--cov-report term-missing \
Expand Down
19 changes: 15 additions & 4 deletions docker/conda/environments/cuda11.8_dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ channels:
dependencies:
####### Morpheus Dependencies (keep sorted!) #######
- automake=1.16.5
- beautifulsoup4=4.12
- benchmark=1.6.1
- boost-cpp=1.82
- cachetools=5.0.0
Expand All @@ -34,8 +35,8 @@ dependencies:
- configargparse=1.5
- cuda-compiler=11.8
- cuda-nvml-dev=11.8
- cuda-python>=11.8,<11.8.3 # workaround for https://github.com/nv-morpheus/Morpheus/issues/1317
- cuda-toolkit=11.8
- cuda-python=11.8.2 # 11.8.3 breaks cuda helpers, pin to 11.8.2 for now.
- cudf=23.06
- cupy>=12.0.0
- cxx-compiler
Expand Down Expand Up @@ -69,7 +70,7 @@ dependencies:
- libwebp>=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863
- mlflow>=2.2.1,<3
- mrc=23.11
- networkx=3.1
- networkx>=2.8
- ninja=1.10
- nodejs=18.*
- numba>=0.56.2
Expand All @@ -85,6 +86,7 @@ dependencies:
- pydot
- pylint>=2.17.4,<2.18 # 2.17.4 contains a fix for toml support
- pytest
- pytest-asyncio
- pytest-benchmark>=4.0
- pytest-cov
- python-confluent-kafka=1.9.2
Expand All @@ -94,6 +96,7 @@ dependencies:
- pytorch=2.0.1
- rapidjson=1.1.0
- requests=2.31
- requests-cache=1.1
- scikit-build=0.17.1
- scikit-learn=1.2.2
- sphinx
Expand All @@ -106,10 +109,18 @@ dependencies:
- watchdog=2.1
- websockets
- yapf=0.40.1

####### Pip Transitive Dependencies (keep sorted!) #######
# These are dependencies that are available on conda, but are required by the pip packages listed below. Its much
# better to install them with conda than pip to allow for better dependency resolution.
- kafka-python=2.0
- port-for=0.7
- py4j=0.10

####### Morpheus Pip Dependencies (keep sorted!) #######
- pip:
# Add additional dev dependencies here
- databricks-connect
- pytest-kafka==0.6.0
- pymilvus==2.3.1
- milvus
- pymilvus==2.3.2
- milvus==2.3.2
25 changes: 25 additions & 0 deletions docker/conda/environments/cuda11.8_examples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,41 @@
channels:
- rapidsai
- nvidia
- huggingface
- conda-forge
- dglteam/label/cu118
dependencies:
- arxiv=1.4
- boto3
- cuml=23.06
- dask>=2023.1.1
- dgl=1.0.2
- dill=0.3.6
- distributed>=2023.1.1
- huggingface_hub=0.10.1 # work-around for https://github.com/UKPLab/sentence-transformers/issues/1762
- langchain=0.0.190
- libwebp>=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863
- mlflow>=2.2.1,<3
- newspaper3k=0.2
- openai=0.28
- papermill=2.3.4
- pypdf=3.16
- requests-cache=1.1
- s3fs>=2023.6
- sentence-transformers
- transformers

####### Pip Transitive Dependencies (keep sorted!) #######
# These are dependencies that are available on conda, but are required by the pip packages listed below. Its much
# better to install them with conda than pip to allow for better dependency resolution.
- environs=9.5
- minio=7.1
- python-dotenv=1.0
- ujson=5.8


####### Pip Dependencies (keep sorted!) #######
- pip:
- google-search-results==2.4
- grpcio-status==1.58 # To keep in sync with 1.58 grpcio which is installed for Morpheus
- nemollm
8 changes: 7 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@
BUILTIN_STL_SUPPORT = YES
DOT_IMAGE_FORMAT = svg
EXCLUDE_PATTERNS = */tests/* */include/nvtext/* */__pycache__/* */doca/*
EXCLUDE_SYMBOLS = "@*" "cudf*" "py::literals" "RdKafka" "mrc*" "std*"
EXCLUDE_SYMBOLS = "@*" "cudf*" "py::literals" "RdKafka" "mrc*" "std*" "PYBIND11_NAMESPACE*"
EXTENSION_MAPPING = cu=C++ cuh=C++
EXTRACT_ALL = YES
FILE_PATTERNS = *.c *.cc *.cpp *.h *.hpp *.cu *.cuh *.md
Expand All @@ -138,6 +138,11 @@
INPUT = ../../morpheus/_lib
INTERACTIVE_SVG = YES
SOURCE_BROWSER = YES
ENABLE_PREPROCESSING = YES
MACRO_EXPANSION = YES
EXPAND_ONLY_PREDEF = NO
PREDEFINED = "MORPHEUS_EXPORT=" \
"DOXYGEN_SHOULD_SKIP_THIS=1"
''')
}

Expand Down Expand Up @@ -165,6 +170,7 @@
"cudf", # Avoid loading GPU libraries during the documentation build
"cupy", # Avoid loading GPU libraries during the documentation build
"databricks.connect",
"langchain",
"merlin",
"morpheus.cli.commands", # Dont document the CLI in Sphinx
"nvtabular",
Expand Down
5 changes: 5 additions & 0 deletions docs/source/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,8 @@ limitations under the License.
* [Example Ransomware Detection Morpheus Pipeline for AppShield Data](./examples/ransomware_detection/README.md)
* [Root Cause Analysis Acceleration & Predictive Maintenance Example](./examples/root_cause_analysis/README.md)
* [SID Visualization Example](./examples/sid_visualization/README.md)
* [Large Language Models (LLMs)](./examples/llm/README.md)
* [Agents](./examples/llm/agents/README.md)
* [Completion](./examples/llm/completion/README.md)
* [VDB Upload](./examples/llm/vdb_upload/README.md)
* [Retreival Augmented Generation (RAG)](./examples/llm/rag/README.md)
2 changes: 2 additions & 0 deletions docs/source/examples/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Examples
abp_nvsmi_detection/README.md
abp_pcap_detection/README.md
gnn_fraud_detection_pipeline/README.md
llm/index
log_parsing/README.md
nlp_si_detection/README.md
ransomware_detection/README.md
Expand All @@ -35,3 +36,4 @@ Examples
:hidden:

../examples

23 changes: 23 additions & 0 deletions docs/source/examples/llm/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<!--
SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

# LLM

- [completion](./completion/README.md)
- [vdb_upload](./vdb_upload/README.md)
- [rag](./rag/README.md)
- [agents](./agents/README.md)
1 change: 1 addition & 0 deletions docs/source/examples/llm/agents/README.md
1 change: 1 addition & 0 deletions docs/source/examples/llm/completion/README.md
30 changes: 30 additions & 0 deletions docs/source/examples/llm/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
..
SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


===
LLM
===

.. toctree::
:maxdepth: 20

./README.md
./completion/README.md
./vdb_upload/README.md
./rag/README.md
./agents/README.md

18 changes: 18 additions & 0 deletions docs/source/examples/llm/rag/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<!--
SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

# LLM RAG
18 changes: 18 additions & 0 deletions docs/source/examples/llm/vdb_upload/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<!--
SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

# LLM VDB Upload
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ def _polling_generate_frames_fsspec(self) -> typing.Iterable[fsspec.core.OpenFil
if file.full_name not in files_seen:
filtered_files.append(file)

# Replace files_seen with the new set of files. This prevents a memory leak that could occurr if files are
# deleted from the input directory. In addition if a file with a given name was created, seen/processed by
# Replace files_seen with the new set of files. This prevents a memory leak that could occur if files are
# deleted from the input directory. In addition, if a file with a given name was created, seen/processed by
# the stage, and then deleted, and a new file with the same name appeared sometime later, the stage will
# need to re-ingest that new file.
files_seen = file_set
Expand Down
Empty file added examples/llm/__init__.py
Empty file.
Loading

0 comments on commit 4806119

Please sign in to comment.