Skip to content

Commit

Permalink
Add cluster model function traces (#700)
Browse files Browse the repository at this point in the history
* Add sklearn to tox

* Add function traces around model methods

* Support Python 2.7 & 3.7 sklearn

* Add test for multiple calls to model method

* Fixup: add comments & organize

* Add ensemble models

* Add ensemble model tests

* Edit tests

* Add ensemble library models from sklearn

* Start tests with empty commit

* Clean up tests

* Add cluster model instrumentaton

* Fix tests for various versions of sklearn

* Fix ensemble tests with changes from tree PR

* [Mega-Linter] Apply linters fixes

* Fix some cluster model tests

* Fix tests after ensemble PR merge

* Add transform to tests

* Remove accidental commits

* Modify cluster tests to be more readable

* Break up instrumentation models

* Remove duplicate ensemble module defs

* Modify VotingRegressor test

Co-authored-by: Hannah Stepanek <hstepanek@newrelic.com>
Co-authored-by: lrafeei <lrafeei@users.noreply.github.com>
  • Loading branch information
3 people authored Dec 20, 2022
1 parent d9d5636 commit f33d21e
Show file tree
Hide file tree
Showing 4 changed files with 338 additions and 1 deletion.
114 changes: 114 additions & 0 deletions newrelic/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2902,6 +2902,120 @@ def _process_module_builtin_defaults():
"instrument_sklearn_ensemble_hist_models",
)

_process_module_definition(
"sklearn.cluster._affinity_propagation",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster.affinity_propagation_",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster._agglomerative",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_agglomerative_models",
)

_process_module_definition(
"sklearn.cluster.hierarchical",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_agglomerative_models",
)

_process_module_definition(
"sklearn.cluster._birch",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster.birch",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster._bisect_k_means",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_kmeans_models",
)

_process_module_definition(
"sklearn.cluster._dbscan",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster.dbscan_",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster._feature_agglomeration",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster._kmeans",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_kmeans_models",
)

_process_module_definition(
"sklearn.cluster.k_means_",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_kmeans_models",
)

_process_module_definition(
"sklearn.cluster._mean_shift",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster.mean_shift_",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster._optics",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster._spectral",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_clustering_models",
)

_process_module_definition(
"sklearn.cluster.spectral",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_clustering_models",
)

_process_module_definition(
"sklearn.cluster._bicluster",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_clustering_models",
)

_process_module_definition(
"sklearn.cluster.bicluster",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_clustering_models",
)

_process_module_definition(
"rest_framework.views",
"newrelic.hooks.component_djangorestframework",
Expand Down
37 changes: 37 additions & 0 deletions newrelic/hooks/mlmodel_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,43 @@ def instrument_sklearn_ensemble_hist_models(module):
_instrument_sklearn_models(module, model_classes)


def instrument_sklearn_cluster_models(module):
model_classes = (
"AffinityPropagation",
"Birch",
"DBSCAN",
"MeanShift",
"OPTICS",
)
_instrument_sklearn_models(module, model_classes)


def instrument_sklearn_cluster_agglomerative_models(module):
model_classes = (
"AgglomerativeClustering",
"FeatureAgglomeration",
)
_instrument_sklearn_models(module, model_classes)


def instrument_sklearn_cluster_clustering_models(module):
model_classes = (
"SpectralBiclustering",
"SpectralCoclustering",
"SpectralClustering",
)
_instrument_sklearn_models(module, model_classes)


def instrument_sklearn_cluster_kmeans_models(module):
model_classes = (
"BisectingKMeans",
"KMeans",
"MiniBatchKMeans",
)
_instrument_sklearn_models(module, model_classes)


def instrument_sklearn_metrics(module):
for scorer in METRIC_SCORERS:
if hasattr(module, scorer):
Expand Down
186 changes: 186 additions & 0 deletions tests/mlmodel_sklearn/test_cluster_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
# Copyright 2010 New Relic, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
from sklearn import __version__ # noqa: this is needed for get_package_version
from testing_support.validators.validate_transaction_metrics import (
validate_transaction_metrics,
)

from newrelic.api.background_task import background_task
from newrelic.common.package_version_utils import get_package_version
from newrelic.packages import six

SKLEARN_VERSION = tuple(map(int, get_package_version("sklearn").split(".")))


@pytest.mark.parametrize(
"cluster_model_name",
[
"AffinityPropagation",
"AgglomerativeClustering",
"Birch",
"DBSCAN",
"FeatureAgglomeration",
"KMeans",
"MeanShift",
"MiniBatchKMeans",
"SpectralBiclustering",
"SpectralCoclustering",
"SpectralClustering",
],
)
def test_below_v1_1_model_methods_wrapped_in_function_trace(cluster_model_name, run_cluster_model):
expected_scoped_metrics = {
"AffinityPropagation": [
("Function/MLModel/Sklearn/Named/AffinityPropagation.fit", 2),
("Function/MLModel/Sklearn/Named/AffinityPropagation.predict", 1),
("Function/MLModel/Sklearn/Named/AffinityPropagation.fit_predict", 1),
],
"AgglomerativeClustering": [
("Function/MLModel/Sklearn/Named/AgglomerativeClustering.fit", 2),
("Function/MLModel/Sklearn/Named/AgglomerativeClustering.fit_predict", 1),
],
"Birch": [
("Function/MLModel/Sklearn/Named/Birch.fit", 2),
(
"Function/MLModel/Sklearn/Named/Birch.predict",
1 if SKLEARN_VERSION >= (1, 0, 0) else 3,
),
("Function/MLModel/Sklearn/Named/Birch.fit_predict", 1),
("Function/MLModel/Sklearn/Named/Birch.transform", 1),
],
"DBSCAN": [
("Function/MLModel/Sklearn/Named/DBSCAN.fit", 2),
("Function/MLModel/Sklearn/Named/DBSCAN.fit_predict", 1),
],
"FeatureAgglomeration": [
("Function/MLModel/Sklearn/Named/FeatureAgglomeration.fit", 1),
("Function/MLModel/Sklearn/Named/FeatureAgglomeration.transform", 1),
],
"KMeans": [
("Function/MLModel/Sklearn/Named/KMeans.fit", 2),
("Function/MLModel/Sklearn/Named/KMeans.predict", 1),
("Function/MLModel/Sklearn/Named/KMeans.fit_predict", 1),
("Function/MLModel/Sklearn/Named/KMeans.transform", 1),
],
"MeanShift": [
("Function/MLModel/Sklearn/Named/MeanShift.fit", 2),
("Function/MLModel/Sklearn/Named/MeanShift.predict", 1),
("Function/MLModel/Sklearn/Named/MeanShift.fit_predict", 1),
],
"MiniBatchKMeans": [
("Function/MLModel/Sklearn/Named/MiniBatchKMeans.fit", 2),
("Function/MLModel/Sklearn/Named/MiniBatchKMeans.predict", 1),
("Function/MLModel/Sklearn/Named/MiniBatchKMeans.fit_predict", 1),
],
"SpectralBiclustering": [
("Function/MLModel/Sklearn/Named/SpectralBiclustering.fit", 1),
],
"SpectralCoclustering": [
("Function/MLModel/Sklearn/Named/SpectralCoclustering.fit", 1),
],
"SpectralClustering": [
("Function/MLModel/Sklearn/Named/SpectralClustering.fit", 2),
("Function/MLModel/Sklearn/Named/SpectralClustering.fit_predict", 1),
],
}
expected_transaction_name = "test_cluster_models:_test"
if six.PY3:
expected_transaction_name = (
"test_cluster_models:test_below_v1_1_model_methods_wrapped_in_function_trace.<locals>._test"
)

@validate_transaction_metrics(
expected_transaction_name,
scoped_metrics=expected_scoped_metrics[cluster_model_name],
rollup_metrics=expected_scoped_metrics[cluster_model_name],
background_task=True,
)
@background_task()
def _test():
run_cluster_model(cluster_model_name)

_test()


@pytest.mark.skipif(SKLEARN_VERSION < (1, 1, 0), reason="Requires sklearn > 1.1")
@pytest.mark.parametrize(
"cluster_model_name",
[
"BisectingKMeans",
"OPTICS",
],
)
def test_above_v1_1_model_methods_wrapped_in_function_trace(cluster_model_name, run_cluster_model):
expected_scoped_metrics = {
"BisectingKMeans": [
("Function/MLModel/Sklearn/Named/BisectingKMeans.fit", 2),
("Function/MLModel/Sklearn/Named/BisectingKMeans.predict", 1),
("Function/MLModel/Sklearn/Named/BisectingKMeans.fit_predict", 1),
],
"OPTICS": [
("Function/MLModel/Sklearn/Named/OPTICS.fit", 2),
("Function/MLModel/Sklearn/Named/OPTICS.fit_predict", 1),
],
}
expected_transaction_name = "test_cluster_models:_test"
if six.PY3:
expected_transaction_name = (
"test_cluster_models:test_above_v1_1_model_methods_wrapped_in_function_trace.<locals>._test"
)

@validate_transaction_metrics(
expected_transaction_name,
scoped_metrics=expected_scoped_metrics[cluster_model_name],
rollup_metrics=expected_scoped_metrics[cluster_model_name],
background_task=True,
)
@background_task()
def _test():
run_cluster_model(cluster_model_name)

_test()


@pytest.fixture
def run_cluster_model():
def _run(cluster_model_name):
import sklearn.cluster
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

X, y = load_iris(return_X_y=True)
x_train, x_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=0)

clf = getattr(sklearn.cluster, cluster_model_name)()

model = clf.fit(x_train, y_train)

if hasattr(model, "predict"):
model.predict(x_test)
if hasattr(model, "score"):
model.score(x_test, y_test)
if hasattr(model, "fit_predict"):
model.fit_predict(x_test)
if hasattr(model, "predict_log_proba"):
model.predict_log_proba(x_test)
if hasattr(model, "predict_proba"):
model.predict_proba(x_test)
if hasattr(model, "transform"):
model.transform(x_test)

return model

return _run
2 changes: 1 addition & 1 deletion tests/mlmodel_sklearn/test_ensemble_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def _run(ensemble_model_name):
"voting": "soft",
}
elif ensemble_model_name == "VotingRegressor":
kwargs = {"estimators": [("rf", RandomForestRegressor()), ("lr", LinearRegression())]}
kwargs = {"estimators": [("lr", LinearRegression())]}
elif ensemble_model_name == "StackingRegressor":
kwargs = {"estimators": [("rf", RandomForestRegressor())]}
clf = getattr(sklearn.ensemble, ensemble_model_name)(**kwargs)
Expand Down

0 comments on commit f33d21e

Please sign in to comment.