nv-morpheus · rapids-bot · Jun 5, 2023 · May 2, 2023 · May 2, 2023 · May 2, 2023
@@ -21,13 +21,24 @@ pip install -r requirements.txt
 
 ### Training
 
+#### Training data
+
 Training data consists of 116K labelled as DGA domains and 100K labelled as not DGA domains.
 
-GPU Model: V100
-Epochs = 25
-Training batch size = 10000
-Model precision = 0.997
-Model accuracy = 0.998
+Two types of DGA domains (Banjori, Chinad) were generated based on the implementations on https://github.com/baderj/domain_generation_algorithms. 100000 benign domains were taken from https://www.domcop.com/files/top/top10milliondomains.csv.zip.
+
+#### Training epochs
+25
+
+#### Training batch size
+10000
+
+#### GPU model
+V100
+
+#### Model accuracy
+precision = 0.995 
+ccuracy = 0.998
 
 #### Training script
 

@@ -16,38 +16,11 @@
 import dataclasses
 import typing
 
-import cupy as cp
-
-from morpheus.messages import InferenceMemory
 from morpheus.messages import MultiInferenceMessage
-from morpheus.messages.data_class_prop import DataClassProp
 from morpheus.messages.memory.tensor_memory import TensorMemory
 from morpheus.messages.message_meta import MessageMeta
 
 
-@dataclasses.dataclass(init=False)
-class InferenceMemoryDGA(InferenceMemory, cpp_class=None):
-    """
-    This is a container class for data that needs to be submitted to the inference server for DGA
-    use cases.
-
-    Parameters
-    ----------
-    domains : cupy.ndarray
-        The token-ids for each string padded with 0s to max_length.
-    seq_lengths : cupy.ndarray
-        Sequence lengths
-
-    """
-    domains: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory._get_tensor_prop,
-                                                             InferenceMemory.set_input)
-    seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory._get_tensor_prop,
-                                                             InferenceMemory.set_input)
-
-    def __init__(self, *, count: int, domains: cp.ndarray, seq_ids: cp.ndarray):
-        super().__init__(count=count, tensors={'domains': domains, 'seq_ids': seq_ids})
-
-
 @dataclasses.dataclass
 class MultiInferenceDGAMessage(MultiInferenceMessage, cpp_class=None):
     """

@@ -17,7 +17,7 @@
 
 import cupy as cp
 import mrc
-from messages import InferenceMemoryDGA
+# from messages import InferenceMemoryDGA
 from messages import MultiInferenceDGAMessage
 
 import cudf
@@ -26,6 +26,7 @@
 from morpheus.messages import MultiInferenceMessage
 from morpheus.messages import MultiInferenceNLPMessage
 from morpheus.messages import MultiMessage
+from morpheus.messages.memory.tensor_memory import TensorMemory
 from morpheus.stages.preprocess.preprocess_base_stage import PreprocessBaseStage
 
 
@@ -75,7 +76,7 @@ def supports_cpp_node(self):
     @staticmethod
     def pre_process_batch(x: MultiMessage, fea_len: int, column: str, truncate_len: int) -> MultiInferenceNLPMessage:
 
-        df = x.get_meta()[[column]]
+        df = x.get_meta([column])
         df[column] = df[column].str.slice_replace(truncate_len, repl='')
 
         split_ser = df[column].str.findall(r"[\w\W\d\D\s\S]")
@@ -113,7 +114,7 @@ def pre_process_batch(x: MultiMessage, fea_len: int, column: str, truncate_len:
         seg_ids[:, 2] = fea_len - 1
 
         # Create the inference memory. Keep in mind count here could be > than input count
-        memory = InferenceMemoryDGA(count=input.shape[0], domains=input, seq_ids=seg_ids)
+        memory = TensorMemory(count=input.shape[0], tensors={'domains': input, 'seq_ids': seg_ids})
 
         infer_message = MultiInferenceDGAMessage.from_message(x, memory=memory)
 

@@ -101,7 +101,11 @@ RUN conda clean -afy
 COPY "./docker" "./docker"
 COPY "./anomalous-auth-detection" "./anomalous-auth-detection"
 COPY "./appshield-dga-detection" "./appshield-dga-detection"
+COPY "./asset-clustering" "./asset-clustering"
 COPY "./dga-detection" "./dga-detection"
+COPY "./ids-detection" "./ids-detection"
+COPY "./log-sequence-ad" "./log-sequence-ad"
+COPY "./operational-technology" "./operational-technology"
 COPY "./phishing-url-detection" "./phishing-url-detection"
 COPY "./string-resemblance-grouping" "./string-resemblance-grouping"
 COPY ["*.md", "LICENSE", "./"]