Docs update to indicate use of conda-merge to generate install files (#…

…1387) Resolves #1369 Authors: - Devin Robison (https://github.com/drobison00) Approvers: - Eli Fajardo (https://github.com/efajardo-nv) - Michael Demoret (https://github.com/mdemoret-nv) URL: #1387
nv-morpheus · Nov 29, 2023 · f6537a2 · f6537a2
1 parent 5f00e78
commit f6537a2
Show file tree

Hide file tree

Showing 34 changed files with 125 additions and 52 deletions.
diff --git a/ci/conda/recipes/morpheus/meta.yaml b/ci/conda/recipes/morpheus/meta.yaml
@@ -82,7 +82,7 @@ outputs:
         - libwebp>=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863
         - mlflow>=2.2.1,<3
         - mrc
-        - networkx 3.1.*
+        - networkx>=2.8
         - numpydoc 1.4.*
         - nvtabular {{ rapids_version }}.*
         - pandas 1.3.*

diff --git a/docker/conda/environments/cuda11.8_dev.yml b/docker/conda/environments/cuda11.8_dev.yml
@@ -21,6 +21,7 @@ channels:
     - nvidia/label/dev # For pre-releases of MRC. Should still default to full releases if available
     - pytorch
     - conda-forge
+    - defaults
 dependencies:
     ####### Morpheus Dependencies (keep sorted!) #######
     - automake=1.16.5

diff --git a/docker/conda/environments/cuda11.8_examples.yml b/docker/conda/environments/cuda11.8_examples.yml
@@ -16,15 +16,17 @@
 # Additional dependencies needed by a some of the Morpheus examples.
 # The intended usage is to first create the conda environment from the `cuda11.8_dev.yml` file, and then update the
 # env with this file. ex:
-#   mamba env create -n  morpheus --file docker/conda/environments/cuda11.8_dev.yml
-#   conda activate morpheus
-#   mamba env update -n morpheus --file docker/conda/environments/cuda11.8_examples.yml
+#   mamba install -n base -c conda-forge conda-merge
+#   conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml \
+#     docker/conda/environments/cuda${CUDA_VER}_examples.yml  > .tmp/merged.yml \
+#     && mamba env update -n morpheus --file ./merged.yml
 channels:
     - rapidsai
     - nvidia
     - huggingface
     - conda-forge
     - dglteam/label/cu118
+    - defaults
 dependencies:
     - arxiv=1.4
     - boto3

diff --git a/docker/conda/environments/cuda11.8_runtime.yml b/docker/conda/environments/cuda11.8_runtime.yml
@@ -19,6 +19,7 @@ channels:
     - nvidia
     - rapidsai-nightly
     - conda-forge
+    - defaults
 dependencies:
     - nb_conda_kernels
     - pip
diff --git a/docs/README.md b/docs/README.md
@@ -22,7 +22,11 @@ Additional packages required for building the documentation are defined in `./co
 ## Install Additional Dependencies
 From the root of the Morpheus repo:
 ```bash
-mamba env update -f docs/conda_docs.yml
+export CUDA_VER=11.8
+mamba install -n base -c conda-forge conda-merge
+conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml \
+  docs/conda_docs.yml > .tmp/merged.yml \
+  && mamba env update -n ${CONDA_DEFAULT_ENV} --file .tmp/merged.yml
 ```
 
 ## Build Morpheus and Documentation

diff --git a/examples/digital_fingerprinting/production/Dockerfile b/examples/digital_fingerprinting/production/Dockerfile
@@ -31,7 +31,10 @@ COPY ./conda_env.yml ./
 
 # Install DFP dependencies
 RUN source activate morpheus \
-    && mamba env update -n morpheus -f ./conda_env.yml
+    && mamba install -n base -c conda-forge conda-merge \
+    && conda run -n base --live-stream conda-merge /workspace/docker/conda/environments/cuda11.8_dev.yml \
+      ./conda_env.yml > ./merged.yml \
+    && mamba env update -n morpheus --file ./merged.yml
 
 # Set the tracking URI for mlflow
 ENV MLFLOW_TRACKING_URI="http://mlflow:5000"

diff --git a/examples/digital_fingerprinting/production/morpheus/benchmarks/README.md b/examples/digital_fingerprinting/production/morpheus/benchmarks/README.md
@@ -19,51 +19,55 @@
 ### Set up Morpheus Dev Container
 
 If you don't already have the Morpheus Dev container, run the following to build it:
-```
+```bash
 ./docker/build_container_dev.sh
 ```
 
 Now run the container:
-```
+```bash
 ./docker/run_container_dev.sh
 ```
 
 Note that Morpheus containers are tagged by date. By default, `run_container_dev.sh` will try to use current date as tag. Therefore, if you are trying to run a container that was not built on the current date, you must set the `DOCKER_IMAGE_TAG` environment variable. For example,
-```
+```bash
 DOCKER_IMAGE_TAG=dev-221003 ./docker/run_container_dev.sh
 ```
 
 In the `/workspace` directory of the container, run the following to compile Morpheus:
-```
+```bash
 ./scripts/compile.sh
 ```
 
 Now install Morpheus:
-```
+```bash
 pip install -e /workspace
 ```
 
 Install additonal required dependencies:
-```
+```bash
 export CUDA_VER=11.8
-mamba env update -n morpheus --file docker/conda/environments/cuda${CUDA_VER}_examples.yml
+mamba install -n base -c conda-forge conda-merge
+conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml \
+  docker/conda/environments/cuda${CUDA_VER}_examples.yml > .tmp/merged.yml \
+  && mamba env update -n ${CONDA_DEFAULT_ENV} --file .tmp/merged.yml
 ```
 
+
 Fetch input data for benchmarks:
-```
+```bash
 ./examples/digital_fingerprinting/fetch_example_data.py all
 ```
 
 ### Start MLflow
 
 MLflow is used as the model repository where the trained DFP models will be published and used for inference by the pipelines. Run the following to start MLflow in a host terminal window (not container):
 
-```
+```bash
 # from root of Morpheus repo
 cd examples/digital_fingerprinting/production
 ```
 
-```
+```bash
 docker compose up mlflow
 ```
 

diff --git a/examples/gnn_fraud_detection_pipeline/README.md b/examples/gnn_fraud_detection_pipeline/README.md
@@ -21,7 +21,11 @@ limitations under the License.
 Prior to running the GNN fraud detection pipeline, additional requirements must be installed in to your Conda environment. A supplemental requirements file has been provided in this example directory.
 
 ```bash
-mamba env update -n ${CONDA_DEFAULT_ENV} -f examples/gnn_fraud_detection_pipeline/requirements.yml
+export CUDA_VER=11.8
+mamba install -n base -c conda-forge conda-merge
+conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml \
+  examples/gnn_fraud_detection_pipeline/requirements.yml > .tmp/merged.yml \
+  && mamba env update -n ${CONDA_DEFAULT_ENV} --file .tmp/merged.yml
 ```
 
 ## Running

diff --git a/examples/gnn_fraud_detection_pipeline/requirements.yml b/examples/gnn_fraud_detection_pipeline/requirements.yml
@@ -18,6 +18,7 @@ channels:
     - nvidia
     - conda-forge
     - dglteam/label/cu118
+    - defaults
 dependencies:
     - cuml=23.06
     - dgl=1.0.2
diff --git a/examples/llm/agents/README.md b/examples/llm/agents/README.md
@@ -95,9 +95,14 @@ export SERPAPI_API_KEY="<YOUR_SERPAPI_API_KEY>"
 Install the required dependencies.
 
 ```bash
-mamba env update -n morpheus --file ${MORPHEUS_ROOT}/docker/conda/environments/cuda11.8_examples.yml
+export CUDA_VER=11.8
+mamba install -n base -c conda-forge conda-merge
+conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml \
+  docker/conda/environments/cuda${CUDA_VER}_examples.yml > .tmp/merged.yml \
+  && mamba env update -n ${CONDA_DEFAULT_ENV} --file .tmp/merged.yml
 ```
 
+
 ### Running the Morpheus Pipeline
 
 The top level entrypoint to each of the LLM example pipelines is `examples/llm/main.py`. This script accepts a set

diff --git a/examples/llm/agents/requirements.yaml b/examples/llm/agents/requirements.yaml
@@ -16,6 +16,7 @@
 channels:
   - huggingface
   - conda-forge
+  - defaults
 dependencies:
   - langchain=0.0.190
   - pip

diff --git a/examples/llm/completion/README.md b/examples/llm/completion/README.md
@@ -35,9 +35,11 @@ limitations under the License.
 The primary goal of this example is to showcase the creation of a pipeline that integrates an LLM service with Morpheus. Although this example features a single implementation, the pipeline and its components are versatile and can be adapted to various scenarios with unique requirements. The following highlights different customization points within the pipeline and the specific choices made for this example:
 
 #### LLM Service
+
 - The pipeline is designed to support any LLM service that adheres to our LLMService interface. Compatible services include OpenAI, NeMo, or even local execution using llama-cpp-python. In this demonstration, we focus on utilizing NeMo as the LLM service, highlighting the advantages it offers over other LLM services and the seamless integration with the NeMo ecosystem. Furthermore, the pipeline can accommodate more complex configurations using NeMo + Inform without necessitating changes to the core pipeline.
 
 #### Downstream Tasks
+
 - Post LLM execution, the model's output can be leveraged for various tasks, including model training, analysis, or simulating an attack. In this particular example, we have simplified the implementation and focused solely on the LLMEngine.
 
 ### Pipeline Implementation
@@ -64,9 +66,14 @@ Before running the pipeline, ensure that the `NGC_API_KEY` environment variable
 Install the required dependencies.
 
 ```bash
-mamba env update -n morpheus --file ${MORPHEUS_ROOT}/docker/conda/environments/cuda11.8_examples.yml
+export CUDA_VER=11.8
+mamba install -n base -c conda-forge conda-merge
+conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml \
+  docker/conda/environments/cuda${CUDA_VER}_examples.yml > .tmp/merged.yml \
+  && mamba env update -n ${CONDA_DEFAULT_ENV} --file .tmp/merged.yml
 ```
 
+
 #### Setting up NGC API Key
 
 For this example, we utilize the NeMo Service within NGC. To gain access, an NGC API key is required. Follow the
@@ -75,7 +82,6 @@ generate your NGC API key.
 
 Configure the following environment variables, with NGC_ORG_ID being optional:
 
-
 ```bash
 export NGC_API_KEY=<YOUR_API_KEY>
 export NGC_ORG_ID=<YOUR_NGC_ORG_ID>
@@ -105,7 +111,7 @@ python examples/llm/main.py completion [OPTIONS] COMMAND [ARGS]...
 
 - `--pipeline_batch_size INTEGER RANGE`
     - **Description**: Internal batch size for the pipeline. Can be much larger than the model batch size.
-    Also used for Kafka consumers.
+      Also used for Kafka consumers.
     - **Default**: `1024`
 
 - `--model_max_batch_size INTEGER RANGE`
@@ -123,7 +129,6 @@ python examples/llm/main.py completion [OPTIONS] COMMAND [ARGS]...
 - `--help`
     - **Description**: Show the help message with options and commands details.
 
-
 ### Running Morpheus Pipeline with OpenAI LLM service
 
 ```bash

diff --git a/examples/llm/completion/requirements.yaml b/examples/llm/completion/requirements.yaml
@@ -15,6 +15,7 @@
 
 channels:
   - conda-forge
+  - defaults
 dependencies:
   - arxiv=1.4
   - langchain=0.0.190

diff --git a/examples/llm/rag/requirements.yaml b/examples/llm/rag/requirements.yaml
@@ -16,6 +16,7 @@
 channels:
   - huggingface
   - conda-forge
+  - defaults
 dependencies:
   - pip
   - openai=0.28

diff --git a/examples/llm/vdb_upload/requirements.yaml b/examples/llm/vdb_upload/requirements.yaml
@@ -15,6 +15,7 @@
 
 channels:
   - conda-forge
+  - defaults
 dependencies:
   - arxiv=1.4
   - onnx # required for triton model export

diff --git a/models/training-tuning-scripts/abp-models/requirements.yml b/models/training-tuning-scripts/abp-models/requirements.yml
@@ -17,6 +17,7 @@ channels:
   - rapidsai
   - nvidia
   - conda-forge
+  - defaults
 dependencies:
   - cuml=23.06
   - jupyterlab

diff --git a/models/training-tuning-scripts/dfp-models/requirements.yml b/models/training-tuning-scripts/dfp-models/requirements.yml
@@ -17,6 +17,7 @@ channels:
   - nvidia
   - pytorch
   - conda-forge
+  - defaults
 dependencies:
   - dill
   - jupyterlab

diff --git a/models/training-tuning-scripts/fraud-detection-models/README.md b/models/training-tuning-scripts/fraud-detection-models/README.md
@@ -23,13 +23,17 @@ limitations under the License.
 
 Install packages for training GNN model.
 
-```
-mamba env update -n ${CONDA_DEFAULT_ENV} -f requirements.yml
+```bash
+export CUDA_VER=11.8
+mamba install -n base -c conda-forge conda-merge
+conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml \
+  models/training-tuning-scripts/fraud-detection-models/requirements.yml > .tmp/merged.yml \
+  && mamba env update -n ${CONDA_DEFAULT_ENV} --file .tmp/merged.yml
 ```
 
 ### Options for training and tuning models.
 
-```
+```bash
 python training.py --help
 Usage: training.py [OPTIONS]
 

diff --git a/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb b/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
@@ -52,7 +52,6 @@
     "%autoreload 2\n",
     "import pandas as pd\n",
     "import numpy as np\n",
-    "import matplotlib.pylab as plt\n",
     "import os\n",
     "import dgl\n",
     "import numpy as np\n",
@@ -1011,7 +1010,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.10.13"
   }
  },
  "nbformat": 4,

diff --git a/models/training-tuning-scripts/fraud-detection-models/requirements.yml b/models/training-tuning-scripts/fraud-detection-models/requirements.yml
@@ -19,12 +19,12 @@ channels:
   - dglteam/label/cu118
   - pytorch
   - conda-forge
+  - defaults
 dependencies:
   - click>=8
   - cuml=23.06
   - dgl
   - jupyterlab
-  - matplotlib
   - pytorch-cuda=11.8
   - pytorch=2.0.1
   - scikit-learn=1.2.2

diff --git a/models/training-tuning-scripts/log-parsing-models/requirements.yml b/models/training-tuning-scripts/log-parsing-models/requirements.yml
@@ -18,6 +18,7 @@ channels:
   - nvidia
   - pytorch
   - conda-forge
+  - defaults
 dependencies:
   - cudf=23.06
   - jupyterlab

diff --git a/models/training-tuning-scripts/phishing-models/requirements.yml b/models/training-tuning-scripts/phishing-models/requirements.yml
@@ -18,6 +18,7 @@ channels:
   - nvidia
   - pytorch
   - conda-forge
+  - defaults
 dependencies:
   - cudf=23.06
   - jupyterlab

diff --git a/models/training-tuning-scripts/ransomware-models/requirements.yml b/models/training-tuning-scripts/ransomware-models/requirements.yml
@@ -15,6 +15,7 @@
 
 channels:
   - conda-forge
+  - defaults
 dependencies:
   - jupyterlab
   - matplotlib

diff --git a/models/training-tuning-scripts/root-cause-models/requirements.yml b/models/training-tuning-scripts/root-cause-models/requirements.yml
@@ -18,6 +18,7 @@ channels:
   - nvidia
   - pytorch
   - conda-forge
+  - defaults
 dependencies:
   - cudf=23.06
   - jupyterlab

diff --git a/models/training-tuning-scripts/sid-models/requirements.yml b/models/training-tuning-scripts/sid-models/requirements.yml
@@ -18,6 +18,7 @@ channels:
   - nvidia
   - pytorch
   - conda-forge
+  - defaults
 dependencies:
   - cudf=23.06
   - jupyterlab

diff --git a/models/validation-inference-scripts/fraud-detection-models/requirements.yml b/models/validation-inference-scripts/fraud-detection-models/requirements.yml
@@ -17,6 +17,7 @@ channels:
     - rapidsai
     - nvidia
     - conda-forge
+    - defaults
 dependencies:
     - click==8.1.3
     - cuml=23.06

diff --git a/morpheus/llm/nodes/extracter_node.py b/morpheus/llm/nodes/extracter_node.py
@@ -30,7 +30,7 @@ class ExtracterNode(LLMNodeBase):
     """
 
     def get_input_names(self) -> list[str]:
-        # This node does not receive it's inputs from upstream nodes, but rather from the task itself
+        # This node does not receive its inputs from upstream nodes, but rather from the task itself
         return []
 
     async def execute(self, context: LLMContext) -> LLMContext:

diff --git a/morpheus/llm/services/nemo_llm_service.py b/morpheus/llm/services/nemo_llm_service.py
@@ -25,7 +25,10 @@
 
 IMPORT_ERROR_MESSAGE = (
     "NemoLLM not found. Install it and other additional dependencies by running the following command:\n"
-    "`mamba env update -n ${CONDA_DEFAULT_ENV} --file docker/conda/environments/cuda11.8_examples.yml`")
+    "`mamba install -n base -c conda-forge conda-merge`\n"
+    "`conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml "
+    "  docker/conda/environments/cuda${CUDA_VER}_examples.yml"
+    "  > .tmp/merged.yml && mamba env update -n morpheus --file .tmp/merged.yml`")
 
 try:
     import nemollm