Skip to content

Commit

Permalink
Misc test improvements (#1447)
Browse files Browse the repository at this point in the history
* Update `morpheus/modules/payload_batcher.py` to avoid sending a single-element-list to `cudf.DataFrame.groupby`, this avoids a warning about an upcoming behavior change.
* Ignore warnings emitted from merlin/nvt regarding tensorflow not being installed
* Ignore warnings about `distutils` being deprecated while running tests.
* Mark `tests/common/test_http_server.py` as a slow test (takes ~1m)
* Update `tests/modules/test_payload_batcher.py` to expect/filter a warning


## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - David Gardner (https://github.com/dagardner-nv)

Approvers:
  - Devin Robison (https://github.com/drobison00)
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: #1447
  • Loading branch information
dagardner-nv authored Jan 8, 2024
1 parent 36f1c19 commit b68d769
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 21 deletions.
11 changes: 9 additions & 2 deletions morpheus/modules/payload_batcher.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -147,7 +147,14 @@ def _batch_dataframe_by_group(df: cudf.DataFrame) -> typing.List[cudf.DataFrame]
# Period object conversion is not supported in cudf
df[period_column] = df[period_column].to_pandas().dt.to_period(period).astype('str')

groups = df.groupby(group_by_columns)
if len(group_by_columns) == 1:
# Avoid warning from cudf regardning an upcoming change of behavior when applying a groupby to a single
# element list.
group_by_columns_ = group_by_columns[0]
else:
group_by_columns_ = group_by_columns

groups = df.groupby(group_by_columns_)

dfs = []
for _, group in groups:
Expand Down
9 changes: 7 additions & 2 deletions morpheus/utils/column_info.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -17,10 +17,15 @@
import logging
import re
import typing
import warnings
from datetime import datetime
from functools import partial

import nvtabular as nvt
with warnings.catch_warnings():
# Ignore warning regarding tensorflow not being installed
warnings.filterwarnings("ignore", message=".*No module named 'tensorflow'", category=UserWarning)
import nvtabular as nvt

import pandas as pd

import cudf
Expand Down
31 changes: 18 additions & 13 deletions morpheus/utils/schema_transforms.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -15,8 +15,8 @@
import logging
import os
import typing
import warnings

import nvtabular as nvt
import pandas as pd

import cudf
Expand All @@ -27,17 +27,22 @@
from morpheus.utils.nvt.extensions import morpheus_ext
from morpheus.utils.nvt.schema_converters import create_and_attach_nvt_workflow

if os.environ.get("MORPHEUS_IN_SPHINX_BUILD") is None:
# Apply patches to NVT
# TODO(Devin): Can be removed, once numpy mappings are updated in Merlin
# ========================================================================
patches.patch_numpy_dtype_registry()
# ========================================================================

# Add morpheus conversion mappings
# ========================================================================
morpheus_ext.register_morpheus_extensions()
# =========================================================================
with warnings.catch_warnings():
# Ignore warning regarding tensorflow not being installed
warnings.filterwarnings("ignore", message=".*No module named 'tensorflow'", category=UserWarning)
import nvtabular as nvt

if os.environ.get("MORPHEUS_IN_SPHINX_BUILD") is None:
# Apply patches to NVT
# TODO(Devin): Can be removed, once numpy mappings are updated in Merlin
# ========================================================================
patches.patch_numpy_dtype_registry()
# ========================================================================

# Add morpheus conversion mappings
# ========================================================================
morpheus_ext.register_morpheus_extensions()
# =========================================================================

logger = logging.getLogger(__name__)

Expand Down
7 changes: 6 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,12 @@ filterwarnings = [
'ignore:`np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe',
'ignore:Warning the df property returns a copy, please use the copy_dataframe method or the mutable_dataframe context manager to modify the DataFrame in-place instead.',
'ignore:`np.MachAr` is deprecated \(NumPy 1.22\):DeprecationWarning',
'ignore:Please use `spmatrix` from the `scipy.sparse` namespace, the `scipy.sparse.base` namespace is deprecated:DeprecationWarning',
'ignore:Please use `spmatrix` from the `scipy.sparse` namespace, the `scipy.sparse.base` namespace is deprecated:DeprecationWarning',

# Deprecation warning from any project using distutils, currently known sources of this are:
# GPUtils https://github.com/anderskm/gputil/issues/48
# PySpark https://issues.apache.org/jira/browse/SPARK-45390
'ignore:The distutils package is deprecated and slated for removal in Python 3.12. Use setuptools or check PEP 632 for potential alternatives',
]

testpaths = ["tests"]
Expand Down
3 changes: 2 additions & 1 deletion tests/common/test_http_server.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -35,6 +35,7 @@ def make_parse_fn(status: HTTPStatus = HTTPStatus.OK,
return mock_parse_fn


@pytest.mark.slow
@pytest.mark.parametrize("endpoint", ["/test", "test/", "/a/b/c/d"])
@pytest.mark.parametrize("port", [8088, 9090])
@pytest.mark.parametrize("method", ["GET", "POST", "PUT"])
Expand Down
8 changes: 7 additions & 1 deletion tests/modules/test_payload_batcher.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -135,8 +135,10 @@ def test_custom_params(config,
expected_count,
expected_exception):

expected_warning = False
if timestamp_column_name:
filter_probs_df["timestamp"] = TIMESTAMPS
expected_warning = timestamp_pattern is None

pipe = Pipeline(config)

Expand Down Expand Up @@ -182,6 +184,10 @@ def test_custom_params(config,
if expected_exception:
with pytest.raises(type(expected_exception), match=str(expected_exception)):
pipe.run()
elif expected_warning:
with pytest.warns(UserWarning):
pipe.run()
assert len(sink_stage.get_messages()) == expected_count
else:
pipe.run()
assert len(sink_stage.get_messages()) == expected_count
Expand Down
12 changes: 11 additions & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -16,6 +16,7 @@

import os
import shutil
import warnings
from unittest import mock

import click
Expand Down Expand Up @@ -129,6 +130,15 @@ def mlflow_uri(tmp_path):
mlflow.end_run()


@pytest.fixture(scope="function", autouse=True)
def config_warning_fixture():
# morpheus.cli.utils._apply_to_config method will warn about any keyword arguments that don't match a config option
# this isn't triggered in normal production code, but is triggered in the cli tests.
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message="No config option matches for.*", category=UserWarning)
yield


@pytest.mark.reload_modules(commands)
@pytest.mark.usefixtures("chdir_tmpdir", "reload_modules")
@pytest.mark.use_python
Expand Down

0 comments on commit b68d769

Please sign in to comment.