Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace usage of PreprocessLogParsingStage with PreprocessNLPStage #842

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions examples/log_parsing/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,13 @@ From the root of the Morpheus repo run:
```bash
PYTHONPATH="examples/log_parsing" \
morpheus --log_level INFO \
--plugin "preprocessing" \
--plugin "inference" \
--plugin "postprocessing" \
run --num_threads 1 --use_cpp False --pipeline_batch_size 1024 --model_max_batch_size 32 \
pipeline-nlp \
from-file --filename ./models/datasets/validation-data/log-parsing-validation-data-input.csv \
deserialize \
log-preprocess --vocab_hash_file ./models/training-tuning-scripts/sid-models/resources/bert-base-cased-hash.txt --stride 64 \
preprocess --vocab_hash_file ./models/training-tuning-scripts/sid-models/resources/bert-base-cased-hash.txt --stride 64 --column=raw \
monitor --description "Preprocessing rate" \
inf-logparsing --model_name log-parsing-onnx --server_url localhost:8001 --force_convert_inputs=True \
monitor --description "Inference rate" --unit inf \
Expand Down
190 changes: 0 additions & 190 deletions examples/log_parsing/preprocessing.py

This file was deleted.

15 changes: 8 additions & 7 deletions examples/log_parsing/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import click
from inference import LogParsingInferenceStage
from postprocessing import LogParsingPostProcessingStage
from preprocessing import PreprocessLogParsingStage

from morpheus.config import Config
from morpheus.config import CppConfig
Expand All @@ -27,6 +26,7 @@
from morpheus.stages.input.file_source_stage import FileSourceStage
from morpheus.stages.output.write_to_file_stage import WriteToFileStage
from morpheus.stages.preprocess.deserialize_stage import DeserializeStage
from morpheus.stages.preprocess.preprocess_nlp_stage import PreprocessNLPStage


@click.command()
Expand Down Expand Up @@ -115,12 +115,13 @@ def run_pipeline(
# Add a preprocessing stage.
# This stage preprocess the rows in the Dataframe.
pipeline.add_stage(
PreprocessLogParsingStage(config,
vocab_hash_file=model_vocab_hash_file,
truncation=False,
do_lower_case=False,
stride=64,
add_special_tokens=False))
PreprocessNLPStage(config,
vocab_hash_file=model_vocab_hash_file,
truncation=False,
do_lower_case=False,
stride=64,
add_special_tokens=False,
column="raw"))

# Add a monitor stage.
# This stage logs the metrics (msg/sec) from the above stage.
Expand Down
3 changes: 0 additions & 3 deletions morpheus/stages/preprocess/preprocess_nlp_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import mrc

import cudf
from cudf.core.subword_tokenizer import SubwordTokenizer

import morpheus._lib.stages as _stages
from morpheus.cli.register_stage import register_stage
Expand Down Expand Up @@ -94,8 +93,6 @@ def __init__(self,
self._do_lower_case = do_lower_case
self._add_special_tokens = add_special_tokens

self._tokenizer: SubwordTokenizer = None

@property
def name(self) -> str:
return "preprocess-nlp"
Expand Down