From 326410ea29b963be6ebc697e55dbbf1db1cbb49e Mon Sep 17 00:00:00 2001 From: Manu Seth Date: Thu, 14 May 2020 08:58:55 +0000 Subject: [PATCH] Revert "Changes to mxnet.metric (#18083)" This reverts commit effbb8b7498e58259bafa6010b8d042bff8d239c. --- benchmark/python/sparse/sparse_end2end.py | 2 +- example/adversary/adversary_generation.ipynb | 6 +- .../variational_autoencoder/VAE_example.ipynb | 2 +- example/caffe/caffe_net.py | 2 +- example/caffe/train_model.py | 2 +- example/capsnet/capsulenet.py | 2 +- example/ctc/lstm_ocr_train.py | 2 +- .../deep-embedded-clustering/autoencoder.py | 4 +- example/deep-embedded-clustering/dec.py | 2 +- .../gluon_mnist.py | 4 +- .../module_mnist.py | 2 +- .../resnet50_imagenet.py | 10 +- example/distributed_training/cifar10_dist.py | 2 +- .../cifar10_kvstore_hvd.py | 4 +- example/fcn-xs/solver.py | 2 +- example/gluon/audio/urban_sounds/train.py | 2 +- example/gluon/dc_gan/dcgan.py | 2 +- example/gluon/image_classification.py | 2 +- example/gluon/mnist/mnist.py | 4 +- example/gluon/sn_gan/train.py | 2 +- .../super_resolution/super_resolution.py | 2 +- example/gluon/tree_lstm/main.py | 2 +- example/image-classification/common/fit.py | 4 +- example/image-classification/score.py | 4 +- example/image-classification/test_score.py | 4 +- example/kaggle-ndsb2/Train.py | 4 +- .../matrix_factorization/train.py | 2 +- example/module/mnist_mlp.py | 2 +- example/multi-task/multi-task-learning.ipynb | 8 +- .../multivariate_time_series/src/metrics.py | 8 +- .../named_entity_recognition/src/metrics.py | 10 +- example/nce-loss/nce.py | 6 +- .../neural_collaborative_filtering/train.py | 2 +- example/quantization/imagenet_inference.py | 4 +- example/rcnn/symnet/metric.py | 12 +- example/rcnn/train.py | 2 +- example/rnn/bucketing/cudnn_rnn_bucketing.py | 6 +- example/rnn/bucketing/lstm_bucketing.py | 2 +- example/rnn/old/char-rnn.ipynb | 2 +- example/rnn/old/gru_bucketing.py | 2 +- example/rnn/old/lstm_bucketing.py | 2 +- example/rnn/old/rnn_cell_demo.py | 2 +- .../sparse/factorization_machine/metric.py | 18 +- example/sparse/factorization_machine/train.py | 2 +- example/sparse/linear_classification/train.py | 2 +- example/sparse/matrix_factorization/train.py | 2 +- example/sparse/wide_deep/inference.py | 2 +- example/sparse/wide_deep/train.py | 2 +- example/speech_recognition/stt_metric.py | 2 +- example/ssd/evaluate/eval_metric.py | 2 +- example/ssd/train/metric.py | 13 +- example/svm_mnist/svm_mnist.py | 4 +- .../api_usage_example/example_api_train.py | 2 +- .../api_usage_example/example_inference.py | 2 +- .../benchmarks/svrg_benchmark.ipynb | 4 +- .../svrg_module/linear_regression/common.py | 2 +- example/vae-gan/vaegan_mxnet.py | 8 +- python/mxnet/__init__.py | 1 + python/mxnet/callback.py | 4 +- .../contrib/svrg_optimization/svrg_module.py | 4 +- python/mxnet/gluon/__init__.py | 2 - python/mxnet/gluon/block.py | 3 +- python/mxnet/gluon/contrib/data/text.py | 2 +- .../gluon/contrib/data/vision/dataloader.py | 4 +- .../data/vision/transforms/bbox/bbox.py | 2 +- .../gluon/contrib/estimator/estimator.py | 2 +- .../gluon/contrib/estimator/event_handler.py | 4 +- python/mxnet/gluon/contrib/estimator/utils.py | 4 +- python/mxnet/gluon/contrib/nn/basic_layers.py | 2 +- python/mxnet/gluon/data/dataloader.py | 2 +- python/mxnet/gluon/data/vision/datasets.py | 2 +- python/mxnet/gluon/nn/basic_layers.py | 2 +- python/mxnet/{gluon => }/metric.py | 1135 ++++++++--------- python/mxnet/model.py | 2 +- python/mxnet/module/base_module.py | 6 +- tests/nightly/estimator/test_estimator_cnn.py | 4 +- tests/nightly/estimator/test_sentiment_rnn.py | 10 +- tests/nightly/test_optimizer.py | 2 +- tests/nightly/test_tlocal_racecondition.py | 2 +- tests/python/gpu/test_contrib_amp.py | 4 +- tests/python/tensorrt/lenet5_train.py | 2 +- tests/python/tensorrt/test_cvnets.py | 13 +- tests/python/train/test_autograd.py | 4 +- tests/python/train/test_bucketing.py | 4 +- tests/python/train/test_mlp.py | 7 +- tests/python/train/test_sparse_fm.py | 2 +- .../unittest/test_contrib_svrg_module.py | 4 +- .../unittest/test_gluon_batch_processor.py | 4 +- tests/python/unittest/test_gluon_estimator.py | 24 +- .../unittest/test_gluon_event_handler.py | 18 +- tests/python/unittest/test_loss.py | 60 +- tests/python/unittest/test_metric.py | 229 ++-- tests/python/unittest/test_metric_perf.py | 4 +- tests/python/unittest/test_module.py | 4 +- tools/caffe_converter/test_converter.py | 2 +- 95 files changed, 867 insertions(+), 938 deletions(-) rename python/mxnet/{gluon => }/metric.py (66%) diff --git a/benchmark/python/sparse/sparse_end2end.py b/benchmark/python/sparse/sparse_end2end.py index fc949b649767..d032f9d6c38e 100644 --- a/benchmark/python/sparse/sparse_end2end.py +++ b/benchmark/python/sparse/sparse_end2end.py @@ -225,7 +225,7 @@ def row_sparse_pull(kv, key, data, slices, weight_array, priority): learning_rate=0.1, rescale_grad=1.0/batch_size/num_worker) mod.init_optimizer(optimizer=sgd, kvstore=kv) # use accuracy as the metric - metric = mx.gluon.metric.create('acc') + metric = mx.metric.create('acc') index = mod._exec_group.param_names.index('w') # weight_array bound to executors of the contexts diff --git a/example/adversary/adversary_generation.ipynb b/example/adversary/adversary_generation.ipynb index 0dda371a8f41..76c5f4cff569 100644 --- a/example/adversary/adversary_generation.ipynb +++ b/example/adversary/adversary_generation.ipynb @@ -168,7 +168,7 @@ "epoch = 3\n", "for e in range(epoch):\n", " train_loss = 0.\n", - " acc = mx.gluon.metric.Accuracy()\n", + " acc = mx.metric.Accuracy()\n", " for i, (data, label) in enumerate(train_data):\n", " data = data.as_in_context(ctx)\n", " label = label.as_in_context(ctx)\n", @@ -223,7 +223,7 @@ " l = loss(output, label)\n", "l.backward()\n", "\n", - "acc = mx.gluon.metric.Accuracy()\n", + "acc = mx.metric.Accuracy()\n", "acc.update(label, output)\n", "\n", "print(\"Validation batch accuracy {}\".format(acc.get()[1]))" @@ -256,7 +256,7 @@ "\n", "output = net(data_perturbated) \n", "\n", - "acc = mx.gluon.metric.Accuracy()\n", + "acc = mx.metric.Accuracy()\n", "acc.update(label, output)\n", "\n", "print(\"Validation batch accuracy after perturbation {}\".format(acc.get()[1]))" diff --git a/example/autoencoder/variational_autoencoder/VAE_example.ipynb b/example/autoencoder/variational_autoencoder/VAE_example.ipynb index 7de336611b38..964e13725c69 100755 --- a/example/autoencoder/variational_autoencoder/VAE_example.ipynb +++ b/example/autoencoder/variational_autoencoder/VAE_example.ipynb @@ -610,7 +610,7 @@ ], "source": [ "# calculate the ELBO which is minus the loss for test set\n", - "metric = mx.gluon.metric.Loss()\n", + "metric = mx.metric.Loss()\n", "model.score(nd_iter_test, metric)" ] }, diff --git a/example/caffe/caffe_net.py b/example/caffe/caffe_net.py index d748298a2965..803efda9b68e 100644 --- a/example/caffe/caffe_net.py +++ b/example/caffe/caffe_net.py @@ -140,6 +140,6 @@ def parse_args(): # train if use_caffe_loss: - train_model.fit(args, net, get_iterator(data_shape, use_caffe_data), mx.gluon.metric.Caffe()) + train_model.fit(args, net, get_iterator(data_shape, use_caffe_data), mx.metric.Caffe()) else: train_model.fit(args, net, get_iterator(data_shape, use_caffe_data)) diff --git a/example/caffe/train_model.py b/example/caffe/train_model.py index 96e81e06add4..d7dfd5d7a31e 100644 --- a/example/caffe/train_model.py +++ b/example/caffe/train_model.py @@ -93,7 +93,7 @@ def fit(args, network, data_loader, eval_metrics=None, batch_end_callback=None): eval_metrics = ['accuracy'] # TopKAccuracy only allows top_k > 1 for top_k in [5, 10, 20]: - eval_metrics.append(mx.gluon.metric.create('top_k_accuracy', top_k=top_k)) + eval_metrics.append(mx.metric.create('top_k_accuracy', top_k=top_k)) if batch_end_callback is not None: if not isinstance(batch_end_callback, list): diff --git a/example/capsnet/capsulenet.py b/example/capsnet/capsulenet.py index 2e38d85fbdea..4d455dbc504c 100644 --- a/example/capsnet/capsulenet.py +++ b/example/capsnet/capsulenet.py @@ -122,7 +122,7 @@ def to4d(img): return img.reshape(img.shape[0], 1, 28, 28).astype(np.float32)/255 -class LossMetric(mx.gluon.metric.EvalMetric): +class LossMetric(mx.metric.EvalMetric): """Evaluate the loss function""" def __init__(self, batch_size, num_gpus): super(LossMetric, self).__init__('LossMetric') diff --git a/example/ctc/lstm_ocr_train.py b/example/ctc/lstm_ocr_train.py index e774ff73ab08..49d9531920ae 100644 --- a/example/ctc/lstm_ocr_train.py +++ b/example/ctc/lstm_ocr_train.py @@ -103,7 +103,7 @@ def main(): module.fit(train_data=data_train, eval_data=data_val, # use metrics.accuracy or metrics.accuracy_lcs - eval_metric=mx.gluon.metric.np(metrics.accuracy, allow_extra_outputs=True), + eval_metric=mx.metric.np(metrics.accuracy, allow_extra_outputs=True), optimizer='sgd', optimizer_params={'learning_rate': hp.learning_rate, 'momentum': hp.momentum, diff --git a/example/deep-embedded-clustering/autoencoder.py b/example/deep-embedded-clustering/autoencoder.py index d6c15ae19df1..c75634475e3a 100644 --- a/example/deep-embedded-clustering/autoencoder.py +++ b/example/deep-embedded-clustering/autoencoder.py @@ -165,7 +165,7 @@ def l2_norm(label, pred): return np.mean(np.square(label-pred))/2.0 solver = Solver(optimizer, momentum=0.9, wd=decay, learning_rate=l_rate, lr_scheduler=lr_scheduler) - solver.set_metric(mx.gluon.metric.CustomMetric(l2_norm)) + solver.set_metric(mx.metric.CustomMetric(l2_norm)) solver.set_monitor(Monitor(print_every)) data_iter = mx.io.NDArrayIter({'data': X}, batch_size=batch_size, shuffle=True, last_batch_handle='roll_over') @@ -188,7 +188,7 @@ def l2_norm(label, pred): return np.mean(np.square(label-pred))/2.0 solver = Solver(optimizer, momentum=0.9, wd=decay, learning_rate=l_rate, lr_scheduler=lr_scheduler) - solver.set_metric(mx.gluon.metric.CustomMetric(l2_norm)) + solver.set_metric(mx.metric.CustomMetric(l2_norm)) solver.set_monitor(Monitor(print_every)) data_iter = mx.io.NDArrayIter({'data': X}, batch_size=batch_size, shuffle=True, last_batch_handle='roll_over') diff --git a/example/deep-embedded-clustering/dec.py b/example/deep-embedded-clustering/dec.py index f67792f0fe37..8fb3891e3e99 100644 --- a/example/deep-embedded-clustering/dec.py +++ b/example/deep-embedded-clustering/dec.py @@ -122,7 +122,7 @@ def cluster(self, X, y=None, update_interval=None): def ce(label, pred): return np.sum(label*np.log(label/(pred+0.000001)))/label.shape[0] - solver.set_metric(mx.gluon.metric.CustomMetric(ce)) + solver.set_metric(mx.metric.CustomMetric(ce)) label_buff = np.zeros((X.shape[0], self.num_centers)) train_iter = mx.io.NDArrayIter({'data': X}, {'label': label_buff}, batch_size=batch_size, diff --git a/example/distributed_training-horovod/gluon_mnist.py b/example/distributed_training-horovod/gluon_mnist.py index c2e6f0bdc533..7b39f5776a42 100644 --- a/example/distributed_training-horovod/gluon_mnist.py +++ b/example/distributed_training-horovod/gluon_mnist.py @@ -104,7 +104,7 @@ def conv_nets(): # Function to evaluate accuracy for a model def evaluate(model, data_iter, context): data_iter.reset() - metric = mx.gluon.metric.Accuracy() + metric = mx.metric.Accuracy() for _, batch in enumerate(data_iter): data = batch.data[0].as_in_context(context) label = batch.label[0].as_in_context(context) @@ -149,7 +149,7 @@ def evaluate(model, data_iter, context): # Create loss function and train metric loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() -metric = mx.gluon.metric.Accuracy() +metric = mx.metric.Accuracy() # Train model for epoch in range(args.epochs): diff --git a/example/distributed_training-horovod/module_mnist.py b/example/distributed_training-horovod/module_mnist.py index 74f6bc9daf21..4fcb02a46996 100644 --- a/example/distributed_training-horovod/module_mnist.py +++ b/example/distributed_training-horovod/module_mnist.py @@ -157,7 +157,7 @@ def conv_net(): num_epoch=args.epochs) # train for at most 10 dataset passes # Step 7: evaluate model accuracy -acc = mx.gluon.metric.Accuracy() +acc = mx.metric.Accuracy() model.score(val_iter, acc) if hvd.rank() == 0: diff --git a/example/distributed_training-horovod/resnet50_imagenet.py b/example/distributed_training-horovod/resnet50_imagenet.py index ae8a56100929..5e5169e98ece 100644 --- a/example/distributed_training-horovod/resnet50_imagenet.py +++ b/example/distributed_training-horovod/resnet50_imagenet.py @@ -286,8 +286,8 @@ def evaluate(epoch): return val_data.reset() - acc_top1 = mx.gluon.metric.Accuracy() - acc_top5 = mx.gluon.metric.TopKAccuracy(5) + acc_top1 = mx.metric.Accuracy() + acc_top5 = mx.metric.TopKAccuracy(5) for _, batch in enumerate(val_data): data, label = batch_fn(batch, context) output = net(data.astype(args.dtype, copy=False)) @@ -321,7 +321,7 @@ def evaluate(epoch): # Create loss function and train metric loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() - metric = mx.gluon.metric.Accuracy() + metric = mx.metric.Accuracy() # Train model for epoch in range(args.num_epochs): @@ -450,8 +450,8 @@ def train_module(): # Evaluate performance if not using synthetic data if args.use_rec: - acc_top1 = mx.gluon.metric.Accuracy() - acc_top5 = mx.gluon.metric.TopKAccuracy(5) + acc_top1 = mx.metric.Accuracy() + acc_top5 = mx.metric.TopKAccuracy(5) res = mod.score(val_data, [acc_top1, acc_top5]) for name, val in res: logging.info('Epoch[%d] Rank[%d] Validation-%s=%f', diff --git a/example/distributed_training/cifar10_dist.py b/example/distributed_training/cifar10_dist.py index c89619d595f2..b66845702137 100644 --- a/example/distributed_training/cifar10_dist.py +++ b/example/distributed_training/cifar10_dist.py @@ -121,7 +121,7 @@ def evaluate_accuracy(data_iterator, network): ---------- tuple of array element """ - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() # Iterate through data and label for i, (data, label) in enumerate(data_iterator): diff --git a/example/distributed_training/cifar10_kvstore_hvd.py b/example/distributed_training/cifar10_kvstore_hvd.py index ff679864f7c3..e6780e5db85e 100644 --- a/example/distributed_training/cifar10_kvstore_hvd.py +++ b/example/distributed_training/cifar10_kvstore_hvd.py @@ -123,7 +123,7 @@ def evaluate(data_iterator, network, context): ---------- tuple of array element """ - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() # Iterate through data and label for i, (data, label) in enumerate(data_iterator): @@ -208,7 +208,7 @@ def __len__(self): optimizer_params={'learning_rate': args.lr}, kvstore=store) -train_metric = mx.gluon.metric.Accuracy() +train_metric = mx.metric.Accuracy() # Run as many epochs as required for epoch in range(args.epochs): diff --git a/example/fcn-xs/solver.py b/example/fcn-xs/solver.py index ab8964f80898..e99b31a13055 100644 --- a/example/fcn-xs/solver.py +++ b/example/fcn-xs/solver.py @@ -23,7 +23,7 @@ from collections import namedtuple from mxnet import optimizer as opt from mxnet.optimizer import get_updater -from mxnet.gluon import metric +from mxnet import metric # Parameter to pass to batch_end_callback BatchEndParam = namedtuple('BatchEndParams', ['epoch', 'nbatch', 'eval_metric']) diff --git a/example/gluon/audio/urban_sounds/train.py b/example/gluon/audio/urban_sounds/train.py index 8a55c5b5bc67..c88f9fb55187 100644 --- a/example/gluon/audio/urban_sounds/train.py +++ b/example/gluon/audio/urban_sounds/train.py @@ -28,7 +28,7 @@ def evaluate_accuracy(data_iterator, net): """Function to evaluate accuracy of any data iterator passed to it as an argument""" - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() for data, label in data_iterator: output = net(data) predictions = nd.argmax(output, axis=1) diff --git a/example/gluon/dc_gan/dcgan.py b/example/gluon/dc_gan/dcgan.py index d7c36a0a3a67..6e03aae8bed6 100644 --- a/example/gluon/dc_gan/dcgan.py +++ b/example/gluon/dc_gan/dcgan.py @@ -259,7 +259,7 @@ def main(): real_label = mx.nd.ones((opt.batch_size,), ctx=ctx) fake_label = mx.nd.zeros((opt.batch_size,), ctx=ctx) - metric = mx.gluon.metric.Accuracy() + metric = mx.metric.Accuracy() print('Training... ') stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') diff --git a/example/gluon/image_classification.py b/example/gluon/image_classification.py index de31b06655eb..44a2afea3681 100644 --- a/example/gluon/image_classification.py +++ b/example/gluon/image_classification.py @@ -27,7 +27,7 @@ from mxnet.gluon.model_zoo import vision as models from mxnet import autograd as ag from mxnet.test_utils import get_mnist_iterator -from mxnet.gluon.metric import Accuracy, TopKAccuracy, CompositeEvalMetric +from mxnet.metric import Accuracy, TopKAccuracy, CompositeEvalMetric import numpy as np from data import (get_cifar10_iterator, get_imagenet_iterator, diff --git a/example/gluon/mnist/mnist.py b/example/gluon/mnist/mnist.py index 81259db8b939..5acaf143ca60 100644 --- a/example/gluon/mnist/mnist.py +++ b/example/gluon/mnist/mnist.py @@ -70,7 +70,7 @@ def transformer(data, label): # train def test(ctx): - metric = mx.gluon.metric.Accuracy() + metric = mx.metric.Accuracy() for data, label in val_data: data = data.as_in_context(ctx) label = label.as_in_context(ctx) @@ -86,7 +86,7 @@ def train(epochs, ctx): # Trainer is for updating parameters with gradient. trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': opt.lr, 'momentum': opt.momentum}) - metric = mx.gluon.metric.Accuracy() + metric = mx.metric.Accuracy() loss = gluon.loss.SoftmaxCrossEntropyLoss() for epoch in range(epochs): diff --git a/example/gluon/sn_gan/train.py b/example/gluon/sn_gan/train.py index fc4e87d632fe..46e44791cebd 100644 --- a/example/gluon/sn_gan/train.py +++ b/example/gluon/sn_gan/train.py @@ -102,7 +102,7 @@ def facc(label, pred): g_net.collect_params().zero_grad() d_net.collect_params().zero_grad() # define evaluation metric -metric = mx.gluon.metric.CustomMetric(facc) +metric = mx.metric.CustomMetric(facc) # initialize labels real_label = nd.ones(BATCH_SIZE, CTX) fake_label = nd.zeros(BATCH_SIZE, CTX) diff --git a/example/gluon/super_resolution/super_resolution.py b/example/gluon/super_resolution/super_resolution.py index 52bfc2241f82..4a3e8d92aa39 100644 --- a/example/gluon/super_resolution/super_resolution.py +++ b/example/gluon/super_resolution/super_resolution.py @@ -156,7 +156,7 @@ def hybrid_forward(self, F, x): return x net = SuperResolutionNet(upscale_factor) -metric = mx.gluon.metric.MSE() +metric = mx.metric.MSE() def test(ctx): val_data.reset() diff --git a/example/gluon/tree_lstm/main.py b/example/gluon/tree_lstm/main.py index 41e4f4f13ed8..53af3fa019e9 100644 --- a/example/gluon/tree_lstm/main.py +++ b/example/gluon/tree_lstm/main.py @@ -96,7 +96,7 @@ net = SimilarityTreeLSTM(sim_hidden_size, rnn_hidden_size, vocab.size, vocab.embed.shape[1], num_classes) # use pearson correlation and mean-square error for evaluation -metric = mx.gluon.metric.create(['pearsonr', 'mse']) +metric = mx.metric.create(['pearsonr', 'mse']) def to_target(x): target = np.zeros((1, num_classes)) diff --git a/example/image-classification/common/fit.py b/example/image-classification/common/fit.py index 8662db3baba4..38ca296cf986 100644 --- a/example/image-classification/common/fit.py +++ b/example/image-classification/common/fit.py @@ -290,7 +290,7 @@ def fit(args, network, data_loader, **kwargs): # evaluation metrices eval_metrics = ['accuracy'] if args.top_k > 0: - eval_metrics.append(mx.gluon.metric.create( + eval_metrics.append(mx.metric.create( 'top_k_accuracy', top_k=args.top_k)) supported_loss = ['ce', 'nll_loss'] @@ -306,7 +306,7 @@ def fit(args, network, data_loader, **kwargs): logging.warning(loss_type + ' is not an valid loss type, only cross-entropy or ' \ 'negative likelihood loss is supported!') else: - eval_metrics.append(mx.gluon.metric.create(loss_type)) + eval_metrics.append(mx.metric.create(loss_type)) else: logging.warning("The output is not softmax_output, loss argument will be skipped!") diff --git a/example/image-classification/score.py b/example/image-classification/score.py index dbad44ef6981..f40e649f1f42 100644 --- a/example/image-classification/score.py +++ b/example/image-classification/score.py @@ -97,8 +97,8 @@ def score(model, data_val, metrics, gpus, batch_size, rgb_mean=None, mean_img=No logger = logging.getLogger() logger.setLevel(logging.DEBUG) - metrics = [mx.gluon.metric.create('acc'), - mx.gluon.metric.create('top_k_accuracy', top_k = 5)] + metrics = [mx.metric.create('acc'), + mx.metric.create('top_k_accuracy', top_k = 5)] (speed,) = score(metrics = metrics, **vars(args)) logging.info('Finished with %f images per second', speed) diff --git a/example/image-classification/test_score.py b/example/image-classification/test_score.py index 1a82bcff5ba3..58c5c66a7f1f 100644 --- a/example/image-classification/test_score.py +++ b/example/image-classification/test_score.py @@ -43,7 +43,7 @@ def test_imagenet1k_resnet(imagenet_val_5k_settings): models = ['imagenet1k-resnet-50', 'imagenet1k-resnet-152'] accs = [.77, .78] for (m, g) in zip(models, accs): - acc = mx.gluon.metric.create('acc') + acc = mx.metric.create('acc') (speed,) = score(model=m, data_val=imagenet_val_5k, rgb_mean='0,0,0', metrics=acc, **kwargs) r = acc.get()[1] @@ -52,7 +52,7 @@ def test_imagenet1k_resnet(imagenet_val_5k_settings): def test_imagenet1k_inception_bn(imagenet_val_5k_settings): imagenet_val_5k, kwargs = imagenet_val_5k_settings - acc = mx.gluon.metric.create('acc') + acc = mx.metric.create('acc') m = 'imagenet1k-inception-bn' g = 0.75 (speed,) = score(model=m, diff --git a/example/kaggle-ndsb2/Train.py b/example/kaggle-ndsb2/Train.py index c3ab165d11da..51e308a2e21c 100644 --- a/example/kaggle-ndsb2/Train.py +++ b/example/kaggle-ndsb2/Train.py @@ -111,7 +111,7 @@ def encode_csv(label_csv, systole_csv, diastole_csv): wd = 0.00001, momentum = 0.9) -systole_model.fit(X=data_train, eval_metric = mx.gluon.metric.np(CRPS)) +systole_model.fit(X=data_train, eval_metric = mx.metric.np(CRPS)) # # Predict systole @@ -139,7 +139,7 @@ def encode_csv(label_csv, systole_csv, diastole_csv): wd = 0.00001, momentum = 0.9) -diastole_model.fit(X=data_train, eval_metric = mx.gluon.metric.np(CRPS)) +diastole_model.fit(X=data_train, eval_metric = mx.metric.np(CRPS)) # # Predict diastole diff --git a/example/model-parallel/matrix_factorization/train.py b/example/model-parallel/matrix_factorization/train.py index fea2c153f853..591dab3a6534 100644 --- a/example/model-parallel/matrix_factorization/train.py +++ b/example/model-parallel/matrix_factorization/train.py @@ -94,7 +94,7 @@ 'rescale_grad': 1.0/batch_size} # use MSE as the metric - metric = mx.gluon.metric.create(['MSE']) + metric = mx.metric.create(['MSE']) speedometer = mx.callback.Speedometer(batch_size, print_every) diff --git a/example/module/mnist_mlp.py b/example/module/mnist_mlp.py index f6d5bf306bd8..7d63a584aec9 100644 --- a/example/module/mnist_mlp.py +++ b/example/module/mnist_mlp.py @@ -55,7 +55,7 @@ mod.init_params() mod.init_optimizer(optimizer_params={'learning_rate':0.01, 'momentum': 0.9}) -metric = mx.gluon.metric.create('acc') +metric = mx.metric.create('acc') for i_epoch in range(n_epoch): for i_iter, batch in enumerate(train_dataiter): diff --git a/example/multi-task/multi-task-learning.ipynb b/example/multi-task/multi-task-learning.ipynb index e615559441f6..048d6d9862b8 100644 --- a/example/multi-task/multi-task-learning.ipynb +++ b/example/multi-task/multi-task-learning.ipynb @@ -267,8 +267,8 @@ "outputs": [], "source": [ "def evaluate_accuracy(net, data_iterator):\n", - " acc_digits = mx.gluon.metric.Accuracy(name='digits')\n", - " acc_odd_even = mx.gluon.metric.Accuracy(name='odd_even')\n", + " acc_digits = mx.metric.Accuracy(name='digits')\n", + " acc_odd_even = mx.metric.Accuracy(name='odd_even')\n", " \n", " for i, (data, label_digit, label_odd_even) in enumerate(data_iterator):\n", " data = data.as_in_context(ctx)\n", @@ -335,8 +335,8 @@ "source": [ "for e in range(epochs):\n", " # Accuracies for each task\n", - " acc_digits = mx.gluon.metric.Accuracy(name='digits')\n", - " acc_odd_even = mx.gluon.metric.Accuracy(name='odd_even')\n", + " acc_digits = mx.metric.Accuracy(name='digits')\n", + " acc_odd_even = mx.metric.Accuracy(name='odd_even')\n", " # Accumulative losses\n", " l_digits_ = 0.\n", " l_odd_even_ = 0. \n", diff --git a/example/multivariate_time_series/src/metrics.py b/example/multivariate_time_series/src/metrics.py index 6dd8e765f0ed..4818591068f8 100644 --- a/example/multivariate_time_series/src/metrics.py +++ b/example/multivariate_time_series/src/metrics.py @@ -46,10 +46,10 @@ def get_custom_metrics(): """ :return: mxnet metric object """ - _rse = mx.gluon.metric.create(rse) - _rae = mx.gluon.metric.create(rae) - _corr = mx.gluon.metric.create(corr) - return mx.gluon.metric.create([_rae, _rse, _corr]) + _rse = mx.metric.create(rse) + _rae = mx.metric.create(rae) + _corr = mx.metric.create(corr) + return mx.metric.create([_rae, _rse, _corr]) def evaluate(pred, label): return {"RAE":rae(label, pred), "RSE":rse(label,pred),"CORR": corr(label,pred)} \ No newline at end of file diff --git a/example/named_entity_recognition/src/metrics.py b/example/named_entity_recognition/src/metrics.py index d04904c7763e..a1d270af6863 100644 --- a/example/named_entity_recognition/src/metrics.py +++ b/example/named_entity_recognition/src/metrics.py @@ -79,9 +79,9 @@ def entity_f1(label, pred): return classifer_metrics(label, pred)[2] def composite_classifier_metrics(): - metric1 = mx.gluon.metric.CustomMetric(feval=entity_precision, name='entity precision') - metric2 = mx.gluon.metric.CustomMetric(feval=entity_recall, name='entity recall') - metric3 = mx.gluon.metric.CustomMetric(feval=entity_f1, name='entity f1 score') - metric4 = mx.gluon.metric.Accuracy() + metric1 = mx.metric.CustomMetric(feval=entity_precision, name='entity precision') + metric2 = mx.metric.CustomMetric(feval=entity_recall, name='entity recall') + metric3 = mx.metric.CustomMetric(feval=entity_f1, name='entity f1 score') + metric4 = mx.metric.Accuracy() - return mx.gluon.metric.CompositeEvalMetric([metric4, metric1, metric2, metric3]) + return mx.metric.CompositeEvalMetric([metric4, metric1, metric2, metric3]) diff --git a/example/nce-loss/nce.py b/example/nce-loss/nce.py index 6764e9c20852..e59220a026a8 100644 --- a/example/nce-loss/nce.py +++ b/example/nce-loss/nce.py @@ -62,7 +62,7 @@ def nce_loss_subwords( label=label_weight) -class NceAccuracy(mx.gluon.metric.EvalMetric): +class NceAccuracy(mx.metric.EvalMetric): def __init__(self): super(NceAccuracy, self).__init__('nce-accuracy') @@ -75,7 +75,7 @@ def update(self, labels, preds): self.num_inst += 1 -class NceAuc(mx.gluon.metric.EvalMetric): +class NceAuc(mx.metric.EvalMetric): def __init__(self): super(NceAuc, self).__init__('nce-auc') @@ -105,7 +105,7 @@ def update(self, labels, preds): self.num_inst += 1 -class NceLSTMAuc(mx.gluon.metric.EvalMetric): +class NceLSTMAuc(mx.metric.EvalMetric): def __init__(self): super(NceLSTMAuc, self).__init__('nce-lstm-auc') diff --git a/example/neural_collaborative_filtering/train.py b/example/neural_collaborative_filtering/train.py index f99b16fd5b0e..c68f271a6f0d 100644 --- a/example/neural_collaborative_filtering/train.py +++ b/example/neural_collaborative_filtering/train.py @@ -124,7 +124,7 @@ def cross_entropy(label, pred, eps=1e-12): mod.init_params() mod.init_optimizer(optimizer='adam', optimizer_params=[('learning_rate', learning_rate), ('beta1',beta1), ('beta2',beta2), ('epsilon',eps)]) - metric = mx.gluon.metric.create(cross_entropy) + metric = mx.metric.create(cross_entropy) speedometer = mx.callback.Speedometer(batch_size, log_interval) best_hr, best_ndcg, best_iter = -1, -1, -1 logging.info('Training started ...') diff --git a/example/quantization/imagenet_inference.py b/example/quantization/imagenet_inference.py index 2f41fec2a9a3..4d690d37d00c 100644 --- a/example/quantization/imagenet_inference.py +++ b/example/quantization/imagenet_inference.py @@ -70,8 +70,8 @@ def advance_data_iter(data_iter, n): def score(sym, arg_params, aux_params, data, devs, label_name, max_num_examples, logger=None): - metrics = [mx.gluon.metric.create('acc'), - mx.gluon.metric.create('top_k_accuracy', top_k=5)] + metrics = [mx.metric.create('acc'), + mx.metric.create('top_k_accuracy', top_k=5)] if not isinstance(metrics, list): metrics = [metrics, ] mod = mx.mod.Module(symbol=sym, context=devs, label_names=[label_name, ]) diff --git a/example/rcnn/symnet/metric.py b/example/rcnn/symnet/metric.py index 6509ba436d75..fa8d7919e919 100644 --- a/example/rcnn/symnet/metric.py +++ b/example/rcnn/symnet/metric.py @@ -25,7 +25,7 @@ def get_names(): return pred, label -class RPNAccMetric(mx.gluon.metric.EvalMetric): +class RPNAccMetric(mx.metric.EvalMetric): def __init__(self): super(RPNAccMetric, self).__init__('RPNAcc') self.pred, self.label = get_names() @@ -49,7 +49,7 @@ def update(self, labels, preds): self.num_inst += len(pred_label.flat) -class RCNNAccMetric(mx.gluon.metric.EvalMetric): +class RCNNAccMetric(mx.metric.EvalMetric): def __init__(self): super(RCNNAccMetric, self).__init__('RCNNAcc') self.pred, self.label = get_names() @@ -66,7 +66,7 @@ def update(self, labels, preds): self.num_inst += len(pred_label.flat) -class RPNLogLossMetric(mx.gluon.metric.EvalMetric): +class RPNLogLossMetric(mx.metric.EvalMetric): def __init__(self): super(RPNLogLossMetric, self).__init__('RPNLogLoss') self.pred, self.label = get_names() @@ -93,7 +93,7 @@ def update(self, labels, preds): self.num_inst += label.shape[0] -class RCNNLogLossMetric(mx.gluon.metric.EvalMetric): +class RCNNLogLossMetric(mx.metric.EvalMetric): def __init__(self): super(RCNNLogLossMetric, self).__init__('RCNNLogLoss') self.pred, self.label = get_names() @@ -114,7 +114,7 @@ def update(self, labels, preds): self.num_inst += label.shape[0] -class RPNL1LossMetric(mx.gluon.metric.EvalMetric): +class RPNL1LossMetric(mx.metric.EvalMetric): def __init__(self): super(RPNL1LossMetric, self).__init__('RPNL1Loss') self.pred, self.label = get_names() @@ -130,7 +130,7 @@ def update(self, labels, preds): self.num_inst += num_inst -class RCNNL1LossMetric(mx.gluon.metric.EvalMetric): +class RCNNL1LossMetric(mx.metric.EvalMetric): def __init__(self): super(RCNNL1LossMetric, self).__init__('RCNNL1Loss') self.pred, self.label = get_names() diff --git a/example/rcnn/train.py b/example/rcnn/train.py index 4d89ac6e2cdd..7b1f2f7f31a5 100644 --- a/example/rcnn/train.py +++ b/example/rcnn/train.py @@ -85,7 +85,7 @@ def train_net(sym, roidb, args): eval_metric = RCNNAccMetric() cls_metric = RCNNLogLossMetric() bbox_metric = RCNNL1LossMetric() - eval_metrics = mx.gluon.metric.CompositeEvalMetric() + eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) diff --git a/example/rnn/bucketing/cudnn_rnn_bucketing.py b/example/rnn/bucketing/cudnn_rnn_bucketing.py index 8f77172087ef..38275ae3dfb8 100644 --- a/example/rnn/bucketing/cudnn_rnn_bucketing.py +++ b/example/rnn/bucketing/cudnn_rnn_bucketing.py @@ -156,7 +156,7 @@ def sym_gen(seq_len): model.fit( train_data = data_train, eval_data = data_val, - eval_metric = mx.gluon.metric.Perplexity(invalid_label), + eval_metric = mx.metric.Perplexity(invalid_label), kvstore = args.kv_store, optimizer = args.optimizer, optimizer_params = opt_params, @@ -244,14 +244,14 @@ def sym_gen(seq_len): if args.dtype == "float32": model.set_params(arg_params, aux_params) - model.score(data_val, mx.gluon.metric.Perplexity(invalid_label), + model.score(data_val, mx.metric.Perplexity(invalid_label), batch_end_callback=mx.callback.Speedometer(args.batch_size, 5)) else: assert args.dtype == "float16", "Only float32 and float16 are supported currently" model = amp.convert_bucketing_module(model, target_dtype="float16") model.bind(data_val.provide_data, data_val.provide_label, for_training=False) - model.score(data_val, mx.gluon.metric.Perplexity(invalid_label), + model.score(data_val, mx.metric.Perplexity(invalid_label), batch_end_callback=mx.callback.Speedometer(args.batch_size, 5)) if __name__ == '__main__': diff --git a/example/rnn/bucketing/lstm_bucketing.py b/example/rnn/bucketing/lstm_bucketing.py index 281aa8988ab0..7f150104f458 100644 --- a/example/rnn/bucketing/lstm_bucketing.py +++ b/example/rnn/bucketing/lstm_bucketing.py @@ -115,7 +115,7 @@ def sym_gen(seq_len): model.fit( train_data = data_train, eval_data = data_val, - eval_metric = mx.gluon.metric.Perplexity(invalid_label), + eval_metric = mx.metric.Perplexity(invalid_label), kvstore = args.kv_store, optimizer = args.optimizer, optimizer_params = { 'learning_rate': args.lr, diff --git a/example/rnn/old/char-rnn.ipynb b/example/rnn/old/char-rnn.ipynb index 4fd32d932512..1ec56cd9aa8c 100644 --- a/example/rnn/old/char-rnn.ipynb +++ b/example/rnn/old/char-rnn.ipynb @@ -347,7 +347,7 @@ "source": [ "# Fit it\n", "model.fit(X=data_train,\n", - " eval_metric = mx.gluon.metric.np(Perplexity),\n", + " eval_metric = mx.metric.np(Perplexity),\n", " batch_end_callback=mx.callback.Speedometer(batch_size, 50),\n", " epoch_end_callback=mx.callback.do_checkpoint(\"obama\"))" ] diff --git a/example/rnn/old/gru_bucketing.py b/example/rnn/old/gru_bucketing.py index 47c13ec0db43..b9f651a90dc0 100644 --- a/example/rnn/old/gru_bucketing.py +++ b/example/rnn/old/gru_bucketing.py @@ -88,6 +88,6 @@ def sym_gen(seq_len): logging.basicConfig(level=logging.DEBUG, format=head) model.fit(X=data_train, eval_data=data_val, - eval_metric = mx.gluon.metric.np(Perplexity), + eval_metric = mx.metric.np(Perplexity), batch_end_callback=mx.callback.Speedometer(batch_size, 50),) diff --git a/example/rnn/old/lstm_bucketing.py b/example/rnn/old/lstm_bucketing.py index 2bea6cc3898f..0fe4116250a2 100644 --- a/example/rnn/old/lstm_bucketing.py +++ b/example/rnn/old/lstm_bucketing.py @@ -90,6 +90,6 @@ def sym_gen(seq_len): logging.basicConfig(level=logging.DEBUG, format=head) model.fit(X=data_train, eval_data=data_val, kvstore='device', - eval_metric = mx.gluon.metric.np(Perplexity), + eval_metric = mx.metric.np(Perplexity), batch_end_callback=mx.callback.Speedometer(batch_size, 50),) diff --git a/example/rnn/old/rnn_cell_demo.py b/example/rnn/old/rnn_cell_demo.py index 64a8ee0fe72b..c5772fa3a5b7 100644 --- a/example/rnn/old/rnn_cell_demo.py +++ b/example/rnn/old/rnn_cell_demo.py @@ -144,7 +144,7 @@ def sym_gen(seq_len): logging.basicConfig(level=logging.DEBUG, format=head) mod.fit(data_train, eval_data=data_val, num_epoch=num_epoch, - eval_metric=mx.gluon.metric.np(Perplexity), + eval_metric=mx.metric.np(Perplexity), batch_end_callback=mx.callback.Speedometer(batch_size, 50), initializer=mx.init.Xavier(factor_type="in", magnitude=2.34), optimizer='sgd', diff --git a/example/sparse/factorization_machine/metric.py b/example/sparse/factorization_machine/metric.py index 8c80f0092203..a8c52c781c0f 100644 --- a/example/sparse/factorization_machine/metric.py +++ b/example/sparse/factorization_machine/metric.py @@ -19,9 +19,9 @@ import numpy as np from operator import itemgetter -@mx.gluon.metric.register -@mx.gluon.metric.alias('log_loss') -class LogLossMetric(mx.gluon.metric.EvalMetric): +@mx.metric.register +@mx.metric.alias('log_loss') +class LogLossMetric(mx.metric.EvalMetric): """Computes the negative log-likelihood loss. The negative log-likelihoodd loss over a batch of sample size :math:`N` is given by @@ -51,7 +51,7 @@ class LogLossMetric(mx.gluon.metric.EvalMetric): -------- >>> predicts = [mx.nd.array([[0.3], [0], [0.4]])] >>> labels = [mx.nd.array([0, 1, 1])] - >>> log_loss= mx.gluon.metric.NegativeLogLikelihood() + >>> log_loss= mx.metric.NegativeLogLikelihood() >>> log_loss.update(labels, predicts) >>> print(log_loss.get()) ('log-loss', 0.57159948348999023) @@ -74,7 +74,7 @@ def update(self, labels, preds): preds : list of `NDArray` Predicted values. """ - mx.gluon.metric.check_label_shapes(labels, preds) + mx.metric.check_label_shapes(labels, preds) for label, pred in zip(labels, preds): label = label.asnumpy() @@ -88,16 +88,16 @@ def update(self, labels, preds): self.sum_metric += (-np.log(prob + self.eps)).sum() self.num_inst += num_examples -@mx.gluon.metric.register -@mx.gluon.metric.alias('auc') -class AUCMetric(mx.gluon.metric.EvalMetric): +@mx.metric.register +@mx.metric.alias('auc') +class AUCMetric(mx.metric.EvalMetric): def __init__(self, eps=1e-12): super(AUCMetric, self).__init__( 'auc') self.eps = eps def update(self, labels, preds): - mx.gluon.metric.check_label_shapes(labels, preds) + mx.metric.check_label_shapes(labels, preds) label_weight = labels[0].asnumpy() preds = preds[0].asnumpy() tmp = [] diff --git a/example/sparse/factorization_machine/train.py b/example/sparse/factorization_machine/train.py index 1e2ab0e2f0ff..b30f9cc81acf 100644 --- a/example/sparse/factorization_machine/train.py +++ b/example/sparse/factorization_machine/train.py @@ -110,7 +110,7 @@ def all_row_ids(data_batch): mod.init_optimizer(optimizer='adam', kvstore=kv, optimizer_params=optimizer_params) # metrics - metric = mx.gluon.metric.create(['log_loss', 'auc']) + metric = mx.metric.create(['log_loss', 'auc']) speedometer = mx.callback.Speedometer(batch_size, log_interval) logging.info('Training started ...') diff --git a/example/sparse/linear_classification/train.py b/example/sparse/linear_classification/train.py index 77eb2c09de28..0a8acfd87bef 100644 --- a/example/sparse/linear_classification/train.py +++ b/example/sparse/linear_classification/train.py @@ -100,7 +100,7 @@ def all_row_ids(data_batch): optim = mx.optimizer.create(optimizer, learning_rate=0.01, rescale_grad=1.0/batch_size/num_worker) mod.init_optimizer(optimizer=optim, kvstore=kv) # use accuracy as the metric - metric = mx.gluon.metric.create(['nll_loss']) + metric = mx.metric.create(['nll_loss']) # get the sparse weight parameter speedometer = mx.callback.Speedometer(batch_size, 100) diff --git a/example/sparse/matrix_factorization/train.py b/example/sparse/matrix_factorization/train.py index d9dccce89459..44bab2c416ba 100644 --- a/example/sparse/matrix_factorization/train.py +++ b/example/sparse/matrix_factorization/train.py @@ -101,7 +101,7 @@ def all_row_ids(data_batch): rescale_grad=1.0/batch_size) mod.init_optimizer(optimizer=optim, kvstore='device') # use MSE as the metric - metric = mx.gluon.metric.create(['MSE']) + metric = mx.metric.create(['MSE']) speedometer = mx.callback.Speedometer(batch_size, log_interval) logging.info('Training started ...') for epoch in range(num_epoch): diff --git a/example/sparse/wide_deep/inference.py b/example/sparse/wide_deep/inference.py index c615020200e2..e14396e50c15 100644 --- a/example/sparse/wide_deep/inference.py +++ b/example/sparse/wide_deep/inference.py @@ -93,7 +93,7 @@ else: logging.info('Inference started ...') # use accuracy as the metric - metric = mx.gluon.metric.create(['acc']) + metric = mx.metric.create(['acc']) accuracy_avg = 0.0 for batch in data_iter: nbatch += 1 diff --git a/example/sparse/wide_deep/train.py b/example/sparse/wide_deep/train.py index c8c2b157865a..eea70301660d 100644 --- a/example/sparse/wide_deep/train.py +++ b/example/sparse/wide_deep/train.py @@ -83,7 +83,7 @@ optim = mx.optimizer.create(optimizer, learning_rate=lr, rescale_grad=1.0/batch_size) mod.init_optimizer(optimizer=optim) # use accuracy as the metric - metric = mx.gluon.metric.create(['acc']) + metric = mx.metric.create(['acc']) # get the sparse weight parameter speedometer = mx.callback.Speedometer(batch_size, log_interval) diff --git a/example/speech_recognition/stt_metric.py b/example/speech_recognition/stt_metric.py index 1eb77aa301cb..26609627ea58 100644 --- a/example/speech_recognition/stt_metric.py +++ b/example/speech_recognition/stt_metric.py @@ -35,7 +35,7 @@ def check_label_shapes(labels, preds, shape=0): "predictions {}".format(label_shape, pred_shape)) -class STTMetric(mx.gluon.metric.EvalMetric): +class STTMetric(mx.metric.EvalMetric): def __init__(self, batch_size, num_gpu, is_epoch_end=False, is_logging=True): super(STTMetric, self).__init__('STTMetric') diff --git a/example/ssd/evaluate/eval_metric.py b/example/ssd/evaluate/eval_metric.py index b038d3afb376..1deb381fb859 100644 --- a/example/ssd/evaluate/eval_metric.py +++ b/example/ssd/evaluate/eval_metric.py @@ -18,7 +18,7 @@ import mxnet as mx import numpy as np -class MApMetric(mx.gluon.metric.EvalMetric): +class MApMetric(mx.metric.EvalMetric): """ Calculate mean AP for object detection task diff --git a/example/ssd/train/metric.py b/example/ssd/train/metric.py index a99c8762de16..eeb9796bf4a8 100644 --- a/example/ssd/train/metric.py +++ b/example/ssd/train/metric.py @@ -19,7 +19,7 @@ import numpy as np -class MultiBoxMetric(mx.gluon.metric.EvalMetric): +class MultiBoxMetric(mx.metric.EvalMetric): """Calculate metrics for Multibox training """ def __init__(self, eps=1e-8): super(MultiBoxMetric, self).__init__('MultiBox') @@ -39,6 +39,17 @@ def reset(self): self.num_inst = [0] * self.num self.sum_metric = [0.0] * self.num + def reset_local(self): + """ + override reset behavior + """ + if getattr(self, 'num', None) is None: + self.num_inst = 0 + self.sum_metric = 0.0 + else: + self.num_inst = [0] * self.num + self.sum_metric = [0.0] * self.num + def update(self, labels, preds): """ Implementation of updating metrics diff --git a/example/svm_mnist/svm_mnist.py b/example/svm_mnist/svm_mnist.py index 9ceae6d4588b..e166cb6ac707 100644 --- a/example/svm_mnist/svm_mnist.py +++ b/example/svm_mnist/svm_mnist.py @@ -113,8 +113,8 @@ 'momentum': 0.9, # Momentum for SGD with momentum 'wd': 0.00001, # Weight decay for regularization }) - results[output.name] = mod.score(test_iter, mx.gluon.metric.Accuracy())[0][1]*100 - print('Accuracy for %s:'%output.name, mod.score(test_iter, mx.gluon.metric.Accuracy())[0][1]*100, '%\n') + results[output.name] = mod.score(test_iter, mx.metric.Accuracy())[0][1]*100 + print('Accuracy for %s:'%output.name, mod.score(test_iter, mx.metric.Accuracy())[0][1]*100, '%\n') for key, value in results.items(): print(key, value, "%s") diff --git a/example/svrg_module/api_usage_example/example_api_train.py b/example/svrg_module/api_usage_example/example_api_train.py index cc9987fe3edb..f6cd1b2e592c 100644 --- a/example/svrg_module/api_usage_example/example_api_train.py +++ b/example/svrg_module/api_usage_example/example_api_train.py @@ -40,7 +40,7 @@ def test_svrg_intermediate_level_api(args): mod.init_params(initializer=mx.init.Uniform(0.01), allow_missing=False, force_init=False, allow_extra=False) kv = mx.kv.create("local") mod.init_optimizer(kvstore=kv, optimizer='sgd', optimizer_params=(('learning_rate', 0.025),)) - metrics = mx.gluon.metric.create("mse") + metrics = mx.metric.create("mse") for e in range(num_epoch): metrics.reset() if e % mod.update_freq == 0: diff --git a/example/svrg_module/api_usage_example/example_inference.py b/example/svrg_module/api_usage_example/example_inference.py index 7e5b7a40abe2..312f9796074d 100644 --- a/example/svrg_module/api_usage_example/example_inference.py +++ b/example/svrg_module/api_usage_example/example_inference.py @@ -42,7 +42,7 @@ def get_validation_score(args): mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label) mod.init_params(initializer=mx.init.Uniform(0.01), allow_missing=False, force_init=False, allow_extra=False) mod.init_optimizer(kvstore='local', optimizer='sgd', optimizer_params=(('learning_rate', 0.025),)) - metrics = mx.gluon.metric.create("mse") + metrics = mx.metric.create("mse") for e in range(epoch): metrics.reset() if e % mod.update_freq == 0: diff --git a/example/svrg_module/benchmarks/svrg_benchmark.ipynb b/example/svrg_module/benchmarks/svrg_benchmark.ipynb index 66f52d70be5f..54ae81281db3 100644 --- a/example/svrg_module/benchmarks/svrg_benchmark.ipynb +++ b/example/svrg_module/benchmarks/svrg_benchmark.ipynb @@ -127,7 +127,7 @@ " mod.bind(data_shapes=di.provide_data, label_shapes=di.provide_label)\n", " mod.init_params(initializer=mx.init.Zero(), allow_missing=False, force_init=False, allow_extra=False)\n", " mod.init_optimizer(kvstore='local', optimizer='sgd', optimizer_params=optimizer_params)\n", - " metrics = mx.gluon.metric.create(\"mse\")\n", + " metrics = mx.metric.create(\"mse\")\n", " \n", " results = {}\n", " for e in range(num_epoch):\n", @@ -170,7 +170,7 @@ " mod.bind(data_shapes=di.provide_data, label_shapes=di.provide_label)\n", " mod.init_params(initializer=mx.init.Zero(), allow_missing=False, force_init=False, allow_extra=False)\n", " mod.init_optimizer(kvstore='local', optimizer='sgd', optimizer_params=optimizer_params)\n", - " metrics = mx.gluon.metric.create(\"mse\")\n", + " metrics = mx.metric.create(\"mse\")\n", " \n", " results = {}\n", " for e in range(num_epoch):\n", diff --git a/example/svrg_module/linear_regression/common.py b/example/svrg_module/linear_regression/common.py index edf4f729f3e6..14a144f40ce2 100644 --- a/example/svrg_module/linear_regression/common.py +++ b/example/svrg_module/linear_regression/common.py @@ -39,7 +39,7 @@ def create_lin_reg_network(train_features, train_labels, feature_dim, batch_size def create_metrics(metrics): - metric = mx.gluon.metric.create(metrics) + metric = mx.metric.create(metrics) return metric diff --git a/example/vae-gan/vaegan_mxnet.py b/example/vae-gan/vaegan_mxnet.py index 1881f383c18b..38e7e2ecc92f 100644 --- a/example/vae-gan/vaegan_mxnet.py +++ b/example/vae-gan/vaegan_mxnet.py @@ -424,10 +424,10 @@ def kldivergence(label, pred): KLLoss = KLLoss / nElements return KLLoss - mG = mx.gluon.metric.CustomMetric(fentropy) - mD = mx.gluon.metric.CustomMetric(fentropy) - mE = mx.gluon.metric.CustomMetric(kldivergence) - mACC = mx.gluon.metric.CustomMetric(facc) + mG = mx.metric.CustomMetric(fentropy) + mD = mx.metric.CustomMetric(fentropy) + mE = mx.metric.CustomMetric(kldivergence) + mACC = mx.metric.CustomMetric(facc) print('Training...') stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index 284788fa2276..49f10aace531 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -51,6 +51,7 @@ from . import random from . import optimizer from . import model +from . import metric from . import notebook from . import initializer # use mx.init as short for mx.initializer diff --git a/python/mxnet/callback.py b/python/mxnet/callback.py index bd515707eace..4be509270fd3 100644 --- a/python/mxnet/callback.py +++ b/python/mxnet/callback.py @@ -112,7 +112,7 @@ def _callback(param): logging.info('Iter[%d] Batch[%d] Train-%s=%f', param.epoch, param.nbatch, name, value) if auto_reset: - param.eval_metric.reset() + param.eval_metric.reset_local() return _callback @@ -163,7 +163,7 @@ def __call__(self, param): if param.eval_metric is not None: name_value = param.eval_metric.get_name_value() if self.auto_reset: - param.eval_metric.reset() + param.eval_metric.reset_local() msg = 'Epoch[%d] Batch [%d-%d]\tSpeed: %.2f samples/sec' msg += '\t%s=%f'*len(name_value) logging.info(msg, param.epoch, count-self.frequent, count, speed, *sum(name_value, ())) diff --git a/python/mxnet/contrib/svrg_optimization/svrg_module.py b/python/mxnet/contrib/svrg_optimization/svrg_module.py index fc5a6c224809..eecb87cf25bb 100644 --- a/python/mxnet/contrib/svrg_optimization/svrg_module.py +++ b/python/mxnet/contrib/svrg_optimization/svrg_module.py @@ -478,8 +478,8 @@ def fit(self, train_data, eval_data=None, eval_metric='acc', if validation_metric is None: validation_metric = eval_metric - if not isinstance(eval_metric, mx.gluon.metric.EvalMetric): - eval_metric = mx.gluon.metric.create(eval_metric) + if not isinstance(eval_metric, mx.metric.EvalMetric): + eval_metric = mx.metric.create(eval_metric) ################################################################################ # training loop diff --git a/python/mxnet/gluon/__init__.py b/python/mxnet/gluon/__init__.py index 514087049edb..288937cf4a03 100644 --- a/python/mxnet/gluon/__init__.py +++ b/python/mxnet/gluon/__init__.py @@ -19,8 +19,6 @@ # pylint: disable=wildcard-import """Neural network module.""" -from . import metric - from .parameter import * from .block import * diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py index 27428e3191b8..6706abcb40de 100644 --- a/python/mxnet/gluon/block.py +++ b/python/mxnet/gluon/block.py @@ -28,8 +28,7 @@ import numpy as np from ..base import mx_real_t, MXNetError, NDArrayHandle, py_str -from .. import symbol, ndarray, initializer, autograd, _deferred_compute as dc -from ..symbol.numpy import _symbol as np_symbol +from .. import symbol, ndarray, initializer, np_symbol, autograd, _deferred_compute as dc from ..symbol import Symbol from ..ndarray import NDArray from .. import name as _name diff --git a/python/mxnet/gluon/contrib/data/text.py b/python/mxnet/gluon/contrib/data/text.py index 916b41880d45..0536ac585484 100644 --- a/python/mxnet/gluon/contrib/data/text.py +++ b/python/mxnet/gluon/contrib/data/text.py @@ -29,7 +29,7 @@ from ...data import dataset from ...utils import download, check_sha1, _get_repo_file_url from ....contrib import text -from .... import ndarray as nd, base +from .... import nd, base class _LanguageModelDataset(dataset._DownloadedDataset): # pylint: disable=abstract-method def __init__(self, root, namespace, vocabulary): diff --git a/python/mxnet/gluon/contrib/data/vision/dataloader.py b/python/mxnet/gluon/contrib/data/vision/dataloader.py index 3213398b2214..0c71d90453d8 100644 --- a/python/mxnet/gluon/contrib/data/vision/dataloader.py +++ b/python/mxnet/gluon/contrib/data/vision/dataloader.py @@ -21,9 +21,9 @@ import logging import numpy as np -from ..... import ndarray as nd +from ..... import nd from .....util import is_np_array -from ..... import numpy as _mx_np # pylint: disable=reimported +from ..... import np as _mx_np # pylint: disable=reimported from ....nn import HybridSequential, Sequential, HybridBlock, Block from ....data.vision import transforms from ....data import DataLoader diff --git a/python/mxnet/gluon/contrib/data/vision/transforms/bbox/bbox.py b/python/mxnet/gluon/contrib/data/vision/transforms/bbox/bbox.py index 65a18aaf80cd..1629c212957f 100644 --- a/python/mxnet/gluon/contrib/data/vision/transforms/bbox/bbox.py +++ b/python/mxnet/gluon/contrib/data/vision/transforms/bbox/bbox.py @@ -23,7 +23,7 @@ from .......base import numeric_types from ......block import Block from .......util import is_np_array -from ....... import ndarray as nd, numpy_extension as npx, numpy as np +from ....... import nd, npx, np from .utils import _check_bbox_shape, bbox_crop, bbox_translate from .utils import bbox_resize, bbox_random_crop_with_constraints diff --git a/python/mxnet/gluon/contrib/estimator/estimator.py b/python/mxnet/gluon/contrib/estimator/estimator.py index c47e02b7213f..ed8a53d7c3a6 100644 --- a/python/mxnet/gluon/contrib/estimator/estimator.py +++ b/python/mxnet/gluon/contrib/estimator/estimator.py @@ -33,7 +33,7 @@ from ...trainer import Trainer from ...utils import split_and_load from ....context import Context, cpu, gpu, num_gpus -from ...metric import Loss as metric_loss +from ....metric import Loss as metric_loss from .batch_processor import BatchProcessor __all__ = ['Estimator'] diff --git a/python/mxnet/gluon/contrib/estimator/event_handler.py b/python/mxnet/gluon/contrib/estimator/event_handler.py index 5709a803a610..338c7f00e05e 100644 --- a/python/mxnet/gluon/contrib/estimator/event_handler.py +++ b/python/mxnet/gluon/contrib/estimator/event_handler.py @@ -25,8 +25,8 @@ import numpy as np -from ...metric import CompositeEvalMetric, EvalMetric -from ...metric import Loss as metric_loss +from ....metric import CompositeEvalMetric, EvalMetric +from ....metric import Loss as metric_loss from .utils import _check_metrics __all__ = ['TrainBegin', 'TrainEnd', 'EpochBegin', 'EpochEnd', 'BatchBegin', 'BatchEnd', diff --git a/python/mxnet/gluon/contrib/estimator/utils.py b/python/mxnet/gluon/contrib/estimator/utils.py index dc0c4bf8f081..d9126a2f6763 100644 --- a/python/mxnet/gluon/contrib/estimator/utils.py +++ b/python/mxnet/gluon/contrib/estimator/utils.py @@ -20,7 +20,7 @@ """Gluon Estimator Utility Functions""" from ...loss import SoftmaxCrossEntropyLoss -from ...metric import Accuracy, EvalMetric, CompositeEvalMetric +from ....metric import Accuracy, EvalMetric, CompositeEvalMetric def _check_metrics(metrics): if isinstance(metrics, CompositeEvalMetric): @@ -31,7 +31,7 @@ def _check_metrics(metrics): metrics = metrics or [] if not all([isinstance(metric, EvalMetric) for metric in metrics]): raise ValueError("metrics must be a Metric or a list of Metric, " - "refer to mxnet.gluon.metric.EvalMetric: {}".format(metrics)) + "refer to mxnet.metric.EvalMetric: {}".format(metrics)) return metrics def _check_handler_metric_ref(handler, known_metrics): diff --git a/python/mxnet/gluon/contrib/nn/basic_layers.py b/python/mxnet/gluon/contrib/nn/basic_layers.py index 5df1a1e83660..bc7c3ce19e09 100644 --- a/python/mxnet/gluon/contrib/nn/basic_layers.py +++ b/python/mxnet/gluon/contrib/nn/basic_layers.py @@ -24,7 +24,7 @@ 'PixelShuffle3D'] import warnings -from .... import ndarray as nd, context +from .... import nd, context from ...block import HybridBlock, Block from ...nn import Sequential, HybridSequential, BatchNorm diff --git a/python/mxnet/gluon/data/dataloader.py b/python/mxnet/gluon/data/dataloader.py index c51981678367..d991bc769ac9 100644 --- a/python/mxnet/gluon/data/dataloader.py +++ b/python/mxnet/gluon/data/dataloader.py @@ -39,7 +39,7 @@ from . import sampler as _sampler from . import batchify as _batchify -from ... import ndarray as nd, context +from ... import nd, context from ...util import is_np_shape, is_np_array, set_np from ... import numpy as _mx_np # pylint: disable=reimported diff --git a/python/mxnet/gluon/data/vision/datasets.py b/python/mxnet/gluon/data/vision/datasets.py index 028d846c6bee..c88648cbb73e 100644 --- a/python/mxnet/gluon/data/vision/datasets.py +++ b/python/mxnet/gluon/data/vision/datasets.py @@ -30,7 +30,7 @@ from .. import dataset from ...utils import download, check_sha1, _get_repo_file_url -from .... import ndarray as nd, image, recordio, base +from .... import nd, image, recordio, base from .... import numpy as _mx_np # pylint: disable=reimported from ....util import is_np_array, default_array from ....base import numeric_types diff --git a/python/mxnet/gluon/nn/basic_layers.py b/python/mxnet/gluon/nn/basic_layers.py index 016f523be4d0..2011b5bf36b2 100644 --- a/python/mxnet/gluon/nn/basic_layers.py +++ b/python/mxnet/gluon/nn/basic_layers.py @@ -27,7 +27,7 @@ from .activations import Activation from ..block import Block, HybridBlock from ..utils import _indent -from ... import ndarray as nd, symbol as sym +from ... import nd, sym from ...util import is_np_array diff --git a/python/mxnet/gluon/metric.py b/python/mxnet/metric.py similarity index 66% rename from python/mxnet/gluon/metric.py rename to python/mxnet/metric.py index 5b081ceac4d8..eb8f99a66d48 100644 --- a/python/mxnet/gluon/metric.py +++ b/python/mxnet/metric.py @@ -22,12 +22,11 @@ import math from collections import OrderedDict -from .. import numpy -from ..util import use_np +import numpy -from ..base import numeric_types, string_types -from .. import ndarray -from .. import registry +from .base import numeric_types, string_types +from . import ndarray +from . import registry def check_label_shapes(labels, preds, wrap=False, shape=False): @@ -90,6 +89,7 @@ def __init__(self, name, output_names=None, self.name = str(name) self.output_names = output_names self.label_names = label_names + self._has_global_stats = kwargs.pop("has_global_stats", False) self._kwargs = kwargs self.reset() @@ -148,6 +148,13 @@ def reset(self): """Resets the internal evaluation result to initial state.""" self.num_inst = 0 self.sum_metric = 0.0 + self.global_num_inst = 0 + self.global_sum_metric = 0.0 + + def reset_local(self): + """Resets the local portion of the internal evaluation results to initial state.""" + self.num_inst = 0 + self.sum_metric = 0.0 def get(self): """Gets the current evaluation result. @@ -162,13 +169,25 @@ def get(self): if self.num_inst == 0: return (self.name, float('nan')) else: - res = self.sum_metric / self.num_inst - if isinstance(res, numpy.ndarray) and len(res.shape) == 0: - # currently calling ' c = mxnet.numpy.array([1,2,3]).sum() ' would get - # ' array(6.) ', a ndarray with shape () - # In this case, returning a 'float' in .get() is more explicit. - res = res.item() - return (self.name, res) + return (self.name, self.sum_metric / self.num_inst) + + def get_global(self): + """Gets the current global evaluation result. + + Returns + ------- + names : list of str + Name of the metrics. + values : list of float + Value of the evaluations. + """ + if self._has_global_stats: + if self.global_num_inst == 0: + return (self.name, float('nan')) + else: + return (self.name, self.global_sum_metric / self.global_num_inst) + else: + return self.get() def get_name_value(self): """Returns zipped name and value pairs. @@ -185,6 +204,24 @@ def get_name_value(self): value = [value] return list(zip(name, value)) + def get_global_name_value(self): + """Returns zipped name and value pairs for global results. + + Returns + ------- + list of tuples + A (name, value) tuple list. + """ + if self._has_global_stats: + name, value = self.get_global() + if not isinstance(name, list): + name = [name] + if not isinstance(value, list): + value = [value] + return list(zip(name, value)) + else: + return self.get_name_value() + # pylint: disable=invalid-name register = registry.get_register_func(EvalMetric, 'metric') alias = registry.get_alias_func(EvalMetric, 'metric') @@ -219,9 +256,9 @@ def create(metric, *args, **kwargs): >>> def custom_metric(label, pred): ... return np.mean(np.abs(label - pred)) ... - >>> metric1 = mx.gluon.metric.create('acc') - >>> metric2 = mx.gluon.metric.create(custom_metric) - >>> metric3 = mx.gluon.metric.create([metric1, metric2, 'rmse']) + >>> metric1 = mx.metric.create('acc') + >>> metric2 = mx.metric.create(custom_metric) + >>> metric3 = mx.metric.create([metric1, metric2, 'rmse']) """ if callable(metric): return CustomMetric(metric, *args, **kwargs) @@ -256,9 +293,9 @@ class CompositeEvalMetric(EvalMetric): -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([0, 1, 1])] - >>> eval_metrics_1 = mx.gluon.metric.Accuracy() - >>> eval_metrics_2 = mx.gluon.metric.F1() - >>> eval_metrics = mx.gluon.metric.CompositeEvalMetric() + >>> eval_metrics_1 = mx.metric.Accuracy() + >>> eval_metrics_2 = mx.metric.F1() + >>> eval_metrics = mx.metric.CompositeEvalMetric() >>> for child_metric in [eval_metrics_1, eval_metrics_2]: >>> eval_metrics.add(child_metric) >>> eval_metrics.update(labels = labels, preds = predicts) @@ -269,7 +306,8 @@ class CompositeEvalMetric(EvalMetric): def __init__(self, metrics=None, name='composite', output_names=None, label_names=None): super(CompositeEvalMetric, self).__init__( - name, output_names=output_names, label_names=label_names) + name, output_names=output_names, label_names=label_names, + has_global_stats=True) if metrics is None: metrics = [] self.metrics = [create(i) for i in metrics] @@ -331,6 +369,14 @@ def reset(self): except AttributeError: pass + def reset_local(self): + """Resets the local portion of the internal evaluation results to initial state.""" + try: + for metric in self.metrics: + metric.reset_local() + except AttributeError: + pass + def get(self): """Returns the current evaluation result. @@ -353,6 +399,28 @@ def get(self): values.extend(value) return (names, values) + def get_global(self): + """Returns the current evaluation result. + + Returns + ------- + names : list of str + Name of the metrics. + values : list of float + Value of the evaluations. + """ + names = [] + values = [] + for metric in self.metrics: + name, value = metric.get_global() + if isinstance(name, string_types): + name = [name] + if isinstance(value, numeric_types): + value = [value] + names.extend(name) + values.extend(value) + return (names, values) + def get_config(self): config = super(CompositeEvalMetric, self).get_config() config.update({'metrics': [i.get_config() for i in self.metrics]}) @@ -366,7 +434,6 @@ def get_config(self): @register @alias('acc') -@use_np class Accuracy(EvalMetric): """Computes accuracy classification score. @@ -393,7 +460,7 @@ class Accuracy(EvalMetric): -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([0, 1, 1])] - >>> acc = mx.gluon.metric.Accuracy() + >>> acc = mx.metric.Accuracy() >>> acc.update(preds = predicts, labels = labels) >>> print acc.get() ('accuracy', 0.6666666666666666) @@ -402,7 +469,8 @@ def __init__(self, axis=1, name='accuracy', output_names=None, label_names=None): super(Accuracy, self).__init__( name, axis=axis, - output_names=output_names, label_names=label_names) + output_names=output_names, label_names=label_names, + has_global_stats=True) self.axis = axis def update(self, labels, preds): @@ -420,26 +488,25 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred_label in zip(labels, preds): - pred_label = pred_label.as_np_ndarray().as_in_ctx(label.ctx) - label = label.as_np_ndarray() if pred_label.shape != label.shape: - pred_label = pred_label.argmax(axis=self.axis) - pred_label = pred_label.astype('int32') - label = label.astype('int32') + pred_label = ndarray.argmax(pred_label, axis=self.axis) + pred_label = pred_label.asnumpy().astype('int32') + label = label.asnumpy().astype('int32') # flatten before checking shapes to avoid shape miss match - label = label.reshape(-1) - pred_label = pred_label.reshape(-1) + label = label.flat + pred_label = pred_label.flat check_label_shapes(label, pred_label) - num_correct = (pred_label == label).sum().astype('float64') + num_correct = (pred_label == label).sum() self.sum_metric += num_correct + self.global_sum_metric += num_correct self.num_inst += len(pred_label) + self.global_num_inst += len(pred_label) @register @alias('top_k_accuracy', 'top_k_acc') -@use_np class TopKAccuracy(EvalMetric): """Computes top k predictions accuracy. @@ -468,7 +535,7 @@ class TopKAccuracy(EvalMetric): >>> top_k = 3 >>> labels = [mx.nd.array([2, 6, 9, 2, 3, 4, 7, 8, 9, 6])] >>> predicts = [mx.nd.array(np.random.rand(10, 10))] - >>> acc = mx.gluon.metric.TopKAccuracy(top_k=top_k) + >>> acc = mx.metric.TopKAccuracy(top_k=top_k) >>> acc.update(labels, predicts) >>> print acc.get() ('top_k_accuracy', 0.3) @@ -478,7 +545,8 @@ def __init__(self, top_k=1, name='top_k_accuracy', output_names=None, label_names=None): super(TopKAccuracy, self).__init__( name, top_k=top_k, - output_names=output_names, label_names=label_names) + output_names=output_names, label_names=label_names, + has_global_stats=True) self.top_k = top_k assert(self.top_k > 1), 'Please use Accuracy if top_k is no more than 1' self.name += '_%d' % self.top_k @@ -502,89 +570,43 @@ def update(self, labels, preds): # we do not care about the order of top k elements. It is # much faster, which is important since that computation is # single-threaded due to Python GIL. - pred_label = pred_label.as_np_ndarray().as_in_ctx(label.ctx).astype('float32') - pred_label = numpy.argpartition(pred_label, -self.top_k) - label = label.as_np_ndarray().astype('int32') + pred_label = numpy.argpartition(pred_label.asnumpy().astype('float32'), -self.top_k) + label = label.asnumpy().astype('int32') check_label_shapes(label, pred_label) num_samples = pred_label.shape[0] num_dims = len(pred_label.shape) if num_dims == 1: - num_correct = (pred_label.reshape(-1) == label.reshape(-1)).sum() - self.sum_metric += num_correct.astype('float64') + self.sum_metric += (pred_label.flat == label.flat).sum() elif num_dims == 2: num_classes = pred_label.shape[1] top_k = min(num_classes, self.top_k) for j in range(top_k): - num_correct = (pred_label[:, num_classes - 1 - j].reshape(-1) == label.reshape(-1)).sum() - self.sum_metric += num_correct.astype('float64') + num_correct = (pred_label[:, num_classes - 1 - j].flat == label.flat).sum() + self.sum_metric += num_correct + self.global_sum_metric += num_correct self.num_inst += num_samples + self.global_num_inst += num_samples -def predict_with_threshold(pred, threshold=0.5): - """Do thresholding of predictions in binary and multilabel cases. - - Parameters - ---------- - preds : ndarray - predictions in shape of (batch_size, ...) or (batch_size, ..., num_categories) - - preds : float or ndarray - threshold(s) in shape of float or (num_categories) - """ - if isinstance(threshold, float): - return pred > threshold - elif isinstance(threshold, (numpy.ndarray, ndarray.ndarray.NDArray)): - num_classes = pred.shape[-1] - assert threshold.shape[-1] == num_classes, \ - "shape mismatch: %s vs. %s"%(pred.shape[-1], threshold.shape[-1]) - return pred > threshold - else: - raise ValueError("{} is a wrong type for threshold!".format(type(threshold))) - - -def one_hot(idx, num): - return (numpy.arange(num).astype(idx) == idx[:, None]).astype('int32') - - -@use_np -class _ClassificationMetrics(object): +class _BinaryClassificationMetrics(object): """Private container class for classification metric statistics. True/false positive and true/false negative counts are sufficient statistics for various classification metrics. This class provides the machinery to track those statistics across mini-batches of (label, prediction) pairs. - - Parameters - ---------- - class_type : str, default "binary" - "binary": f1 for binary classification. - "multiclass": f1 for multiclassification problem. - "multilabel": f1 for multilabel classification. - beta : float, default 1 - weight of precision in harmonic mean. - threshold : float, default 0.5 - threshold for deciding whether the predictions are positive or negative. - """ - def __init__(self, class_type="binary", threshold=0.5, beta=1): - self.class_type = class_type - self.threshold = threshold - self.beta = beta - self.reset_stats() - - def _set(self, num, ctx): - if self.num_classes is None: - self.num_classes = num - self.true_positives = numpy.zeros(num, dtype='float64').as_in_ctx(ctx) - self.false_negatives = numpy.zeros(num, dtype='float64').as_in_ctx(ctx) - self.false_positives = numpy.zeros(num, dtype='float64').as_in_ctx(ctx) - self.true_negatives = numpy.zeros(num, dtype='float64').as_in_ctx(ctx) - else: - assert self.num_classes == num, \ - "Input number of classes has changed from {} to {}".format(self.num_classes, num) - - def update_stats(self, label, pred): + def __init__(self): + self.true_positives = 0 + self.false_negatives = 0 + self.false_positives = 0 + self.true_negatives = 0 + self.global_true_positives = 0 + self.global_false_negatives = 0 + self.global_false_positives = 0 + self.global_true_negatives = 0 + + def update_binary_stats(self, label, pred): """Update various binary classification counts for a single (label, pred) pair. Parameters @@ -595,107 +617,92 @@ def update_stats(self, label, pred): pred : `NDArray` Predicted values. """ - pred = pred.as_np_ndarray().as_in_ctx(label.ctx) - label = label.as_np_ndarray().astype('int32') - if self.class_type == "binary": - self._set(1, label.ctx) - if label.max() > 1: - raise ValueError("Wrong label for binary classification.") - if pred.shape == label.shape: - pass - elif pred.shape[-1] > 2: - raise ValueError("The shape of prediction {} is wrong for binary classification.".format(pred.shape)) - elif pred.shape[-1] == 2: - pred = pred.reshape(-1, 2)[:, 1] - pred_label = predict_with_threshold(pred, self.threshold).reshape(-1) - label = label.reshape(-1) - - elif self.class_type == "multiclass": - num = pred.shape[-1] - self._set(num, label.ctx) - assert label.max() < num, "pred contains fewer classes than label!" - pred_label = one_hot(pred.argmax(axis=-1).reshape(-1), num) - label = one_hot(label.reshape(-1), num) - - elif self.class_type == "multilabel": - num = pred.shape[-1] - self._set(num, label.ctx) - assert pred.shape == label.shape, \ - "The shape of label should be same as that of prediction for multilabel classification." - pred_label = predict_with_threshold(pred, self.threshold).reshape(-1, num) - label = label.reshape(-1, num) - else: - raise ValueError( - "Wrong class_type {}! Only supports ['binary', 'multiclass', 'multilabel']".format(self.class_type)) - - check_label_shapes(label, pred_label) - + pred = pred.asnumpy() + label = label.asnumpy().astype('int32') + pred_label = numpy.argmax(pred, axis=1) + + check_label_shapes(label, pred) + if len(numpy.unique(label)) > 2: + raise ValueError("%s currently only supports binary classification." + % self.__class__.__name__) pred_true = (pred_label == 1) - pred_false = (pred_label == 0) + pred_false = 1 - pred_true label_true = (label == 1) - label_false = (label == 0) + label_false = 1 - label_true - true_pos = (pred_true * label_true).sum(0) - false_pos = (pred_true * label_false).sum(0) - false_neg = (pred_false * label_true).sum(0) - true_neg = (pred_false * label_false).sum(0) + true_pos = (pred_true * label_true).sum() + false_pos = (pred_true * label_false).sum() + false_neg = (pred_false * label_true).sum() + true_neg = (pred_false * label_false).sum() self.true_positives += true_pos + self.global_true_positives += true_pos self.false_positives += false_pos + self.global_false_positives += false_pos self.false_negatives += false_neg + self.global_false_negatives += false_neg self.true_negatives += true_neg + self.global_true_negatives += true_neg @property def precision(self): - if self.num_classes is not None: - return self.true_positives / numpy.maximum(self.true_positives + self.false_positives, 1e-12) + if self.true_positives + self.false_positives > 0: + return float(self.true_positives) / (self.true_positives + self.false_positives) else: return 0. @property - def micro_precision(self): - if self.num_classes is not None: - return self.true_positives.sum() / \ - numpy.maximum(self.true_positives.sum() + self.false_positives.sum(), 1e-12) + def global_precision(self): + if self.global_true_positives + self.global_false_positives > 0: + return float(self.global_true_positives) / (self.global_true_positives + self.global_false_positives) else: return 0. @property def recall(self): - if self.num_classes is not None: - return self.true_positives / numpy.maximum(self.true_positives + self.false_negatives, 1e-12) + if self.true_positives + self.false_negatives > 0: + return float(self.true_positives) / (self.true_positives + self.false_negatives) else: return 0. @property - def micro_recall(self): - if self.num_classes is not None: - return self.true_positives.sum() / \ - numpy.maximum(self.true_positives.sum() + self.false_negatives.sum(), 1e-12) + def global_recall(self): + if self.global_true_positives + self.global_false_negatives > 0: + return float(self.global_true_positives) / (self.global_true_positives + self.global_false_negatives) else: return 0. @property def fscore(self): - return (1 + self.beta ** 2) * self.precision * self.recall / \ - numpy.maximum(self.beta ** 2 * self.precision + self.recall, 1e-12) + if self.precision + self.recall > 0: + return 2 * self.precision * self.recall / (self.precision + self.recall) + else: + return 0. @property - def micro_fscore(self): - if self.micro_precision + self.micro_recall > 0: - return (1 + self.beta ** 2) * self.micro_precision * self.micro_recall / \ - (self.beta ** 2 * self.micro_precision + self.micro_recall) + def global_fscore(self): + if self.global_precision + self.global_recall > 0: + return 2 * self.global_precision * self.global_recall / (self.global_precision + self.global_recall) else: return 0. - def binary_matthewscc(self): + def matthewscc(self, use_global=False): """Calculate the Matthew's Correlation Coefficent""" - if not self.total_examples: - return 0. + if use_global: + if not self.global_total_examples: + return 0. + + true_pos = float(self.global_true_positives) + false_pos = float(self.global_false_positives) + false_neg = float(self.global_false_negatives) + true_neg = float(self.global_true_negatives) + else: + if not self.total_examples: + return 0. - true_pos = float(self.true_positives) - false_pos = float(self.false_positives) - false_neg = float(self.false_negatives) - true_neg = float(self.true_negatives) + true_pos = float(self.true_positives) + false_pos = float(self.false_positives) + false_neg = float(self.false_negatives) + true_neg = float(self.true_negatives) terms = [(true_pos + false_pos), (true_pos + false_neg), @@ -708,21 +715,32 @@ def binary_matthewscc(self): @property def total_examples(self): - if self.num_classes is None: - return 0 - return int(self.false_negatives[0] + self.false_positives[0] + \ - self.true_negatives[0] + self.true_positives[0]) + return self.false_negatives + self.false_positives + \ + self.true_negatives + self.true_positives + + @property + def global_total_examples(self): + return self.global_false_negatives + self.global_false_positives + \ + self.global_true_negatives + self.global_true_positives + + def local_reset_stats(self): + self.false_positives = 0 + self.false_negatives = 0 + self.true_positives = 0 + self.true_negatives = 0 def reset_stats(self): - self.num_classes = None - self.true_positives = None - self.false_negatives = None - self.false_positives = None - self.true_negatives = None + self.false_positives = 0 + self.false_negatives = 0 + self.true_positives = 0 + self.true_negatives = 0 + self.global_false_positives = 0 + self.global_false_negatives = 0 + self.global_true_positives = 0 + self.global_true_negatives = 0 @register -@use_np class F1(EvalMetric): """Computes the F1 score of a binary classification problem. @@ -750,34 +768,28 @@ class F1(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. - class_type : str, default "binary" - "binary": f1 for binary classification. - "multiclass": f1 for multiclassification problem. - "multilabel": f1 for multilabel classification. - threshold : float, default 0.5 - threshold for postive confidence value. - average : str, default 'micro' + average : str, default 'macro' Strategy to be used for aggregating across mini-batches. - "macro": Calculate metrics for each label and return unweighted mean of f1. - "micro": Calculate metrics globally by counting the total TP, FN and FP. - None: Return f1 scores for each class (numpy.ndarray) . + "macro": average the F1 scores for each batch. + "micro": compute a single F1 score across all batches. Examples -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0., 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([0., 1., 1.])] - >>> f1 = mx.gluon.metric.F1() + >>> f1 = mx.metric.F1() >>> f1.update(preds = predicts, labels = labels) >>> print f1.get() ('f1', 0.8) """ def __init__(self, name='f1', - output_names=None, label_names=None, class_type="binary", threshold=0.5, average="micro"): + output_names=None, label_names=None, average="macro"): self.average = average - self.metrics = _ClassificationMetrics(class_type=class_type, threshold=threshold) + self.metrics = _BinaryClassificationMetrics() EvalMetric.__init__(self, name=name, - output_names=output_names, label_names=label_names) + output_names=output_names, label_names=label_names, + has_global_stats=True) def update(self, labels, preds): """Updates the internal evaluation result. @@ -793,149 +805,36 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - self.metrics.update_stats(label, pred) + self.metrics.update_binary_stats(label, pred) - if self.average == "micro": - self.sum_metric = self.metrics.micro_fscore * self.metrics.total_examples - elif self.average == "macro": - self.sum_metric = self.metrics.fscore.mean() * self.metrics.total_examples + if self.average == "macro": + self.sum_metric += self.metrics.fscore + self.global_sum_metric += self.metrics.global_fscore + self.num_inst += 1 + self.global_num_inst += 1 + self.metrics.reset_stats() else: self.sum_metric = self.metrics.fscore * self.metrics.total_examples - self.num_inst = self.metrics.total_examples + self.global_sum_metric = self.metrics.global_fscore * self.metrics.global_total_examples + self.num_inst = self.metrics.total_examples + self.global_num_inst = self.metrics.global_total_examples def reset(self): """Resets the internal evaluation result to initial state.""" self.sum_metric = 0. self.num_inst = 0 + self.global_num_inst = 0 + self.global_sum_metric = 0.0 self.metrics.reset_stats() - -@register -@use_np -class Fbeta(F1): - """Computes the Fbeta score of a binary classification problem. - - The Fbeta score is equivalent to harmonic mean of the precision and recall, - where the best value is 1.0 and the worst value is 0.0. The formula for Fbeta score is:: - - Fbeta = (1 + beta ** 2) * (precision * recall) / (beta ** 2 * precision + recall) - - The formula for precision and recall is:: - - precision = true_positives / (true_positives + false_positives) - recall = true_positives / (true_positives + false_negatives) - - .. note:: - - This Fbeta score only supports binary classification. - - Parameters - ---------- - name : str - Name of this metric instance for display. - output_names : list of str, or None - Name of predictions that should be used when updating with update_dict. - By default include all predictions. - label_names : list of str, or None - Name of labels that should be used when updating with update_dict. - By default include all labels. - class_type : str, default "binary" - "binary": f1 for binary classification. - "multiclass": f1 for multiclassification problem. - "multilabel": f1 for multilabel classification. - beta : float, default 1 - weight of precision in harmonic mean. - threshold : float, default 0.5 - threshold for postive confidence value. - average : str, default 'micro' - Strategy to be used for aggregating across mini-batches. - "macro": Calculate metrics for each label and return unweighted mean of f1. - "micro": Calculate metrics globally by counting the total TP, FN and FP. - None: Return f1 scores for each class. - - Examples - -------- - >>> predicts = [mx.nd.array([[0.3, 0.7], [0., 1.], [0.4, 0.6]])] - >>> labels = [mx.nd.array([0., 1., 1.])] - >>> fbeta = mx.gluon.metric.Fbeta(beta=2) - >>> fbeta.update(preds = predicts, labels = labels) - >>> print fbeta.get() - ('fbeta', 0.9090909090909091) - """ - - def __init__(self, name='fbeta', - output_names=None, label_names=None, class_type="binary", beta=1, threshold=0.5, average="micro"): - super(Fbeta, self).__init__( - name=name, output_names=output_names, label_names=label_names, - class_type=class_type, threshold=threshold, average=average) - self.metrics = _ClassificationMetrics(class_type=class_type, threshold=threshold, beta=beta) - - -@register -@use_np -class BinaryAccuracy(EvalMetric): - """Computes the accuracy of a binary or multilabel classification problem. - - Parameters - ---------- - name : str - Name of this metric instance for display. - output_names : list of str, or None - Name of predictions that should be used when updating with update_dict. - By default include all predictions. - label_names : list of str, or None - Name of labels that should be used when updating with update_dict. - By default include all labels. - threshold : float or ndarray, default 0.5 - threshold for deciding whether the predictions are positive or negative. - - Examples - -------- - >>> predicts = [mx.nd.array([0.7, 1, 0.55])] - >>> labels = [mx.nd.array([0., 1., 0.])] - >>> bacc = mx.gluon.metric.BinaryAccuracy(threshold=0.6) - >>> bacc.update(preds = predicts, labels = labels) - >>> print bacc.get() - ('binary_accuracy', 0.6666666666666666) - """ - - def __init__(self, name='binary_accuracy', - output_names=None, label_names=None, threshold=0.5): - self.threshold = threshold - EvalMetric.__init__(self, name=name, - output_names=output_names, label_names=label_names) - - def update(self, labels, preds): - """Updates the internal evaluation result. - - Parameters - ---------- - labels : list of `NDArray` - Each label denotes positive/negative for each class. - - preds : list of `NDArray` - Each prediction value is a confidence value of being positive for each class. - """ - labels, preds = check_label_shapes(labels, preds, True) - - for label, pred_label in zip(labels, preds): - pred_label = predict_with_threshold(pred_label, self.threshold) - - pred_label = pred_label.as_np_ndarray().astype('int32').as_in_ctx(label.ctx) - label = label.as_np_ndarray().astype('int32') - # flatten before checking shapes to avoid shape miss match - label = label.reshape(-1) - pred_label = pred_label.reshape(-1) - - check_label_shapes(label, pred_label) - - num_correct = (pred_label == label).sum().astype('float64') - self.sum_metric += num_correct - self.num_inst += len(pred_label) + def reset_local(self): + """Resets the internal evaluation result to initial state.""" + self.sum_metric = 0. + self.num_inst = 0 + self.metrics.local_reset_stats() @register -@use_np class MCC(EvalMetric): """Computes the Matthews Correlation Coefficient of a binary classification problem. @@ -966,6 +865,10 @@ class MCC(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. + average : str, default 'macro' + Strategy to be used for aggregating across mini-batches. + "macro": average the MCC for each batch. + "micro": compute a single MCC across all batches. Examples -------- @@ -984,9 +887,9 @@ class MCC(EvalMetric): [0.]*(false_positives + true_negatives) + [1.]*(false_negatives + true_positives) )] - >>> f1 = mx.gluon.metric.F1() + >>> f1 = mx.metric.F1() >>> f1.update(preds = predicts, labels = labels) - >>> mcc = mx.gluon.metric.MCC() + >>> mcc = mx.metric.MCC() >>> mcc.update(preds = predicts, labels = labels) >>> print f1.get() ('f1', 0.95233560306652054) @@ -995,10 +898,12 @@ class MCC(EvalMetric): """ def __init__(self, name='mcc', - output_names=None, label_names=None): - self._metrics = _ClassificationMetrics() + output_names=None, label_names=None, average="macro"): + self._average = average + self._metrics = _BinaryClassificationMetrics() EvalMetric.__init__(self, name=name, - output_names=output_names, label_names=label_names) + output_names=output_names, label_names=label_names, + has_global_stats=True) def update(self, labels, preds): """Updates the internal evaluation result. @@ -1014,35 +919,72 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - self._metrics.update_stats(label, pred) + self._metrics.update_binary_stats(label, pred) - self.sum_metric = self._metrics.binary_matthewscc() * self._metrics.total_examples - self.num_inst = self._metrics.total_examples + if self._average == "macro": + self.sum_metric += self._metrics.matthewscc() + self.global_sum_metric += self._metrics.matthewscc(use_global=True) + self.num_inst += 1 + self.global_num_inst += 1 + self._metrics.reset_stats() + else: + self.sum_metric = self._metrics.matthewscc() * self._metrics.total_examples + self.global_sum_metric = self._metrics.matthewscc(use_global=True) * \ + self._metrics.global_total_examples + self.num_inst = self._metrics.total_examples + self.global_num_inst = self._metrics.global_total_examples def reset(self): """Resets the internal evaluation result to initial state.""" self.sum_metric = 0. self.num_inst = 0. + self.global_sum_metric = 0. + self.global_num_inst = 0. self._metrics.reset_stats() - -#################### -# REGRESSION METRICS -#################### + def reset_local(self): + """Resets the internal evaluation result to initial state.""" + self.sum_metric = 0. + self.num_inst = 0. + self._metrics.local_reset_stats() @register -@use_np -class MAE(EvalMetric): - """Computes Mean Absolute Error (MAE) loss. +class Perplexity(EvalMetric): + """Computes perplexity. - The mean absolute error is given by + Perplexity is a measurement of how well a probability distribution + or model predicts a sample. A low perplexity indicates the model + is good at predicting the sample. + + The perplexity of a model q is defined as .. math:: - \\frac{\\sum_i^n |y_i - \\hat{y}_i|}{n} + b^{\\big(-\\frac{1}{N} \\sum_{i=1}^N \\log_b q(x_i) \\big)} + = \\exp \\big(-\\frac{1}{N} \\sum_{i=1}^N \\log q(x_i)\\big) + + where we let `b = e`. + + :math:`q(x_i)` is the predicted value of its ground truth + label on sample :math:`x_i`. + + For example, we have three samples :math:`x_1, x_2, x_3` and their labels + are :math:`[0, 1, 1]`. + Suppose our model predicts :math:`q(x_1) = p(y_1 = 0 | x_1) = 0.3` + and :math:`q(x_2) = 1.0`, + :math:`q(x_3) = 0.6`. The perplexity of model q is + :math:`exp\\big(-(\\log 0.3 + \\log 1.0 + \\log 0.6) / 3\\big) = 1.77109762852`. Parameters ---------- + ignore_label : int or None + Index of invalid label to ignore when + counting. By default, sets to -1. + If set to `None`, it will include all entries. + axis : int (default -1) + The axis from prediction that was used to + compute softmax. By default use the last + axis. name : str Name of this metric instance for display. output_names : list of str, or None @@ -1054,18 +996,21 @@ class MAE(EvalMetric): Examples -------- - >>> predicts = [mx.nd.array([3, -0.5, 2, 7])] - >>> labels = [mx.nd.array([2.5, 0.0, 2, 8])] - >>> mean_absolute_error = mx.gluon.metric.MAE() - >>> mean_absolute_error.update(labels = labels, preds = predicts) - >>> print mean_absolute_error.get() - ('mae', 0.5) + >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] + >>> labels = [mx.nd.array([0, 1, 1])] + >>> perp = mx.metric.Perplexity(ignore_label=None) + >>> perp.update(labels, predicts) + >>> print perp.get() + ('Perplexity', 1.7710976285155853) """ - - def __init__(self, name='mae', + def __init__(self, ignore_label, axis=-1, name='perplexity', output_names=None, label_names=None): - super(MAE, self).__init__( - name, output_names=output_names, label_names=label_names) + super(Perplexity, self).__init__( + name, ignore_label=ignore_label, + output_names=output_names, label_names=label_names, + has_global_stats=True) + self.ignore_label = ignore_label + self.axis = axis def update(self, labels, preds): """Updates the internal evaluation result. @@ -1078,28 +1023,64 @@ def update(self, labels, preds): preds : list of `NDArray` Predicted values. """ - labels, preds = check_label_shapes(labels, preds, True) - + assert len(labels) == len(preds) + loss = 0. + num = 0 for label, pred in zip(labels, preds): - label = label.as_np_ndarray() - pred = pred.as_np_ndarray().as_in_ctx(label.ctx) + assert label.size == pred.size/pred.shape[-1], \ + "shape mismatch: %s vs. %s"%(label.shape, pred.shape) + label = label.as_in_context(pred.context).reshape((label.size,)) + pred = ndarray.pick(pred, label.astype(dtype='int32'), axis=self.axis) + if self.ignore_label is not None: + ignore = (label == self.ignore_label).astype(pred.dtype) + num -= ndarray.sum(ignore).asscalar() + pred = pred*(1-ignore) + ignore + loss -= ndarray.sum(ndarray.log(ndarray.maximum(1e-10, pred))).asscalar() + num += pred.size + self.sum_metric += loss + self.global_sum_metric += loss + self.num_inst += num + self.global_num_inst += num - num_inst = label.shape[0] - mae = numpy.abs(label - pred).reshape(num_inst, -1).mean(axis=-1).sum() + def get(self): + """Returns the current evaluation result. - self.sum_metric += mae - self.num_inst += num_inst + Returns + ------- + Tuple of (str, float) + Representing name of the metric and evaluation result. + """ + if self.num_inst == 0: + return (self.name, float('nan')) + else: + return (self.name, math.exp(self.sum_metric/self.num_inst)) + + def get_global(self): + """Returns the current global evaluation result. + + Returns + ------- + Tuple of (str, float) + Representing name of the metric and evaluation result. + """ + if self.global_num_inst == 0: + return (self.name, float('nan')) + else: + return (self.name, math.exp(self.global_sum_metric/self.global_num_inst)) + +#################### +# REGRESSION METRICS +#################### @register -@use_np -class MSE(EvalMetric): - """Computes Mean Squared Error (MSE) loss. +class MAE(EvalMetric): + """Computes Mean Absolute Error (MAE) loss. - The mean squared error is given by + The mean absolute error is given by .. math:: - \\frac{\\sum_i^n (y_i - \\hat{y}_i)^2}{n} + \\frac{\\sum_i^n |y_i - \\hat{y}_i|}{n} Parameters ---------- @@ -1114,17 +1095,19 @@ class MSE(EvalMetric): Examples -------- - >>> predicts = [mx.nd.array([3, -0.5, 2, 7])] - >>> labels = [mx.nd.array([2.5, 0.0, 2, 8])] - >>> mean_squared_error = mx.gluon.metric.MSE() - >>> mean_squared_error.update(labels = labels, preds = predicts) - >>> print mean_squared_error.get() - ('mse', 0.375) + >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] + >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] + >>> mean_absolute_error = mx.metric.MAE() + >>> mean_absolute_error.update(labels = labels, preds = predicts) + >>> print mean_absolute_error.get() + ('mae', 0.5) """ - def __init__(self, name='mse', + + def __init__(self, name='mae', output_names=None, label_names=None): - super(MSE, self).__init__( - name, output_names=output_names, label_names=label_names) + super(MAE, self).__init__( + name, output_names=output_names, label_names=label_names, + has_global_stats=True) def update(self, labels, preds): """Updates the internal evaluation result. @@ -1140,25 +1123,29 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.as_np_ndarray() - pred = pred.as_np_ndarray().as_in_ctx(label.ctx) + label = label.asnumpy() + pred = pred.asnumpy() - num_inst = label.shape[0] - mse = ((label - pred)**2.0).reshape(num_inst, -1).mean(axis=-1).sum() + if len(label.shape) == 1: + label = label.reshape(label.shape[0], 1) + if len(pred.shape) == 1: + pred = pred.reshape(pred.shape[0], 1) - self.sum_metric += mse - self.num_inst += num_inst + mae = numpy.abs(label - pred).mean() + self.sum_metric += mae + self.global_sum_metric += mae + self.num_inst += 1 # numpy.prod(label.shape) + self.global_num_inst += 1 # numpy.prod(label.shape) @register -@use_np -class RMSE(MSE): - """Computes Root Mean Squred Error (RMSE) loss. +class MSE(EvalMetric): + """Computes Mean Squared Error (MSE) loss. - The root mean squared error is given by + The mean squared error is given by .. math:: - \\sqrt{\\frac{\\sum_i^n (y_i - \\hat{y}_i)^2}{n}} + \\frac{\\sum_i^n (y_i - \\hat{y}_i)^2}{n} Parameters ---------- @@ -1173,62 +1160,18 @@ class RMSE(MSE): Examples -------- - >>> predicts = [mx.nd.array([3, -0.5, 2, 7])] - >>> labels = [mx.nd.array([2.5, 0.0, 2, 8])] - >>> root_mean_squared_error = mx.gluon.metric.RMSE() - >>> root_mean_squared_error.update(labels = labels, preds = predicts) - >>> print root_mean_squared_error.get() - ('rmse', 0.612372457981) + >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] + >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] + >>> mean_squared_error = mx.metric.MSE() + >>> mean_squared_error.update(labels = labels, preds = predicts) + >>> print mean_squared_error.get() + ('mse', 0.375) """ - def __init__(self, name='rmse', + def __init__(self, name='mse', output_names=None, label_names=None): - super(RMSE, self).__init__( - name, output_names=output_names, label_names=label_names) - - def get(self): - if self.num_inst == 0: - return (self.name, float('nan')) - else: - return (self.name, math.sqrt(self.sum_metric / self.num_inst)) - - -@register -@use_np -class MeanPairwiseDistance(EvalMetric): - """Computes Mean Pairwise Distance. - - The mean pairwise distance is given by - - .. math:: - \\sqrt{\\frac{(\\sum_i^n (y_i - \\hat{y}_i)^p)^\\frac{1}{p}}{n}} - - Parameters - ---------- - name : str - Name of this metric instance for display. - output_names : list of str, or None - Name of predictions that should be used when updating with update_dict. - By default include all predictions. - label_names : list of str, or None - Name of labels that should be used when updating with update_dict. - By default include all labels. - p : float, default 2 - calculating distance using the p-norm - - Examples - -------- - >>> predicts = [mx.nd.array([[1., 2.], [3., 4.]])] - >>> labels = [mx.nd.array([[1., 0.], [4., 2.]])] - >>> mpd = mx.gluon.metric.MeanPairwiseDistance() - >>> mpd.update(labels = labels, preds = predicts) - >>> print mpd.get() - ('mpd', 2.1180338859558105) - """ - def __init__(self, name='mpd', - output_names=None, label_names=None, p=2): - super(MeanPairwiseDistance, self).__init__( - name, output_names=output_names, label_names=label_names) - self.p = p + super(MSE, self).__init__( + name, output_names=output_names, label_names=label_names, + has_global_stats=True) def update(self, labels, preds): """Updates the internal evaluation result. @@ -1244,30 +1187,29 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.as_np_ndarray() - pred = pred.as_np_ndarray().as_in_ctx(label.ctx) + label = label.asnumpy() + pred = pred.asnumpy() - label = label.reshape(label.shape[0], -1) - pred = pred.reshape(pred.shape[0], -1) - - dis = (((label - pred) ** self.p).sum(axis=-1)) ** (1./self.p) - dis = dis.sum() - num_inst = label.shape[0] + if len(label.shape) == 1: + label = label.reshape(label.shape[0], 1) + if len(pred.shape) == 1: + pred = pred.reshape(pred.shape[0], 1) - self.sum_metric += dis - self.num_inst += num_inst + mse = ((label - pred)**2.0).mean() + self.sum_metric += mse + self.global_sum_metric += mse + self.num_inst += 1 # numpy.prod(label.shape) + self.global_num_inst += 1 # numpy.prod(label.shape) @register -@use_np -class MeanCosineSimilarity(EvalMetric): - """Computes Mean Cosine Similarity. +class RMSE(EvalMetric): + """Computes Root Mean Squred Error (RMSE) loss. - The mean cosine similarity is given by + The root mean squared error is given by .. math:: - cos_sim(label, pred) = \frac{{label}.{pred}}{max(||label||.||pred||, eps)} - (calculating on the last dimension of label and pred.) + \\sqrt{\\frac{\\sum_i^n (y_i - \\hat{y}_i)^2}{n}} Parameters ---------- @@ -1279,23 +1221,21 @@ class MeanCosineSimilarity(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. - eps : float, default 1e-8 - small vale to avoid division by zero. Examples -------- - >>> predicts = [mx.nd.array([[1., 0.], [1., 1.]])] - >>> labels = [mx.nd.array([[3., 4.], [2., 2.]])] - >>> mcs = mx.gluon.metric.MeanCosineSimilarity() - >>> mcs.update(labels = labels, preds = predicts) - >>> print mcs.get() - ('cos_sim', 0.8) + >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] + >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] + >>> root_mean_squared_error = mx.metric.RMSE() + >>> root_mean_squared_error.update(labels = labels, preds = predicts) + >>> print root_mean_squared_error.get() + ('rmse', 0.612372457981) """ - def __init__(self, name='cos_sim', - output_names=None, label_names=None, eps=1e-8): - super(MeanCosineSimilarity, self).__init__( - name, output_names=output_names, label_names=label_names) - self.eps = eps + def __init__(self, name='rmse', + output_names=None, label_names=None): + super(RMSE, self).__init__( + name, output_names=output_names, label_names=label_names, + has_global_stats=True) def update(self, labels, preds): """Updates the internal evaluation result. @@ -1311,27 +1251,23 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.as_np_ndarray() - pred = pred.as_np_ndarray().as_in_ctx(label.ctx) + label = label.asnumpy() + pred = pred.asnumpy() if len(label.shape) == 1: - label = label.reshape(1, label.shape[0]) + label = label.reshape(label.shape[0], 1) if len(pred.shape) == 1: - pred = pred.reshape(1, pred.shape[0]) + pred = pred.reshape(pred.shape[0], 1) - sim = (label * pred).sum(axis=-1) - n_p = numpy.linalg.norm(pred, axis=-1) - n_l = numpy.linalg.norm(label, axis=-1) - sim = sim / numpy.maximum(n_l * n_p, self.eps) - sim = sim.sum() - num_inst = len(label.reshape(-1, label.shape[-1])) # numpy.prod(label.shape[:-1]) is not supported - self.sum_metric += sim - self.num_inst += num_inst + rmse = numpy.sqrt(((label - pred)**2.0).mean()) + self.sum_metric += rmse + self.global_sum_metric += rmse + self.num_inst += 1 + self.global_num_inst += 1 @register @alias('ce') -@use_np class CrossEntropy(EvalMetric): """Computes Cross Entropy loss. @@ -1346,15 +1282,9 @@ class :math:`k`. Parameters ---------- - eps : float, default 1e-12 - Use small constant for the case that predicted value is 0. - ignore_label : int or None, default None - Index of invalid label to ignore when - counting. By default, sets to -1. - If set to `None`, it will include all entries. - axis : int (default -1) - The axis from prediction that was used to - compute softmax. By default use the last axis. + eps : float + Cross Entropy loss is undefined for predicted value is 0 or 1, + so predicted values are added with the small constant. name : str Name of this metric instance for display. output_names : list of str, or None @@ -1368,17 +1298,17 @@ class :math:`k`. -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([0, 1, 1])] - >>> ce = mx.gluon.metric.CrossEntropy() + >>> ce = mx.metric.CrossEntropy() >>> ce.update(labels, predicts) >>> print ce.get() ('cross-entropy', 0.57159948348999023) """ - def __init__(self, eps=1e-12, ignore_label=None, axis=-1, name='cross-entropy', + def __init__(self, eps=1e-12, name='cross-entropy', output_names=None, label_names=None): super(CrossEntropy, self).__init__( - name, output_names=output_names, label_names=label_names) - self.ignore_label = ignore_label - self.axis = axis + name, eps=eps, + output_names=output_names, label_names=label_names, + has_global_stats=True) self.eps = eps def update(self, labels, preds): @@ -1394,97 +1324,22 @@ def update(self, labels, preds): """ labels, preds = check_label_shapes(labels, preds, True) - loss = 0. - num = 0 for label, pred in zip(labels, preds): - assert label.size == pred.size/pred.shape[-1], \ - "shape mismatch: %s vs. %s"%(label.shape, pred.shape) - label = label.reshape((label.size,)) - pred = ndarray.pick(pred.as_in_context(label.ctx), label.astype(dtype='int32'), axis=self.axis) - label = label.as_np_ndarray() - pred = pred.as_np_ndarray() - if self.ignore_label is not None: - ignore = (label == self.ignore_label).astype(pred.dtype) - num -= ignore.sum() - pred = pred * (1 - ignore) + ignore - loss -= numpy.log(numpy.maximum(self.eps, pred)).sum() - num += pred.size - self.sum_metric += loss - self.num_inst += num + label = label.asnumpy() + pred = pred.asnumpy() + label = label.ravel() + assert label.shape[0] == pred.shape[0] -@register -@use_np -class Perplexity(CrossEntropy): - """Computes perplexity. - - Perplexity is a measurement of how well a probability distribution - or model predicts a sample. A low perplexity indicates the model - is good at predicting the sample. - - The perplexity of a model q is defined as - - .. math:: - b^{\\big(-\\frac{1}{N} \\sum_{i=1}^N \\log_b q(x_i) \\big)} - = \\exp \\big(-\\frac{1}{N} \\sum_{i=1}^N \\log q(x_i)\\big) - - where we let `b = e`. - - :math:`q(x_i)` is the predicted value of its ground truth - label on sample :math:`x_i`. - - For example, we have three samples :math:`x_1, x_2, x_3` and their labels - are :math:`[0, 1, 1]`. - Suppose our model predicts :math:`q(x_1) = p(y_1 = 0 | x_1) = 0.3` - and :math:`q(x_2) = 1.0`, - :math:`q(x_3) = 0.6`. The perplexity of model q is - :math:`exp\\big(-(\\log 0.3 + \\log 1.0 + \\log 0.6) / 3\\big) = 1.77109762852`. - - Parameters - ---------- - eps : float, default 1e-12 - Use small constant for the case that predicted value is 0. - ignore_label : int or None, default None - Index of invalid label to ignore when - counting. By default, sets to -1. - If set to `None`, it will include all entries. - axis : int (default -1) - The axis from prediction that was used to - compute softmax. By default use the last axis. - name : str - Name of this metric instance for display. - output_names : list of str, or None - Name of predictions that should be used when updating with update_dict. - By default include all predictions. - label_names : list of str, or None - Name of labels that should be used when updating with update_dict. - By default include all labels. - - Examples - -------- - >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] - >>> labels = [mx.nd.array([0, 1, 1])] - >>> perp = mx.gluon.metric.Perplexity(ignore_label=None) - >>> perp.update(labels, predicts) - >>> print perp.get() - ('Perplexity', 1.7710976285155853) - """ - def __init__(self, eps=1e-12, ignore_label=None, axis=-1, name='perplexity', - output_names=None, label_names=None): - super(Perplexity, self).__init__( - name=name, eps=eps, ignore_label=ignore_label, axis=axis, - output_names=output_names, label_names=label_names) - - def get(self): - if self.num_inst == 0: - return (self.name, float('nan')) - else: - return (self.name, math.exp(self.sum_metric/self.num_inst)) - + prob = pred[numpy.arange(label.shape[0]), numpy.int64(label)] + cross_entropy = (-numpy.log(prob + self.eps)).sum() + self.sum_metric += cross_entropy + self.global_sum_metric += cross_entropy + self.num_inst += label.shape[0] + self.global_num_inst += label.shape[0] @register @alias('nll_loss') -@use_np class NegativeLogLikelihood(EvalMetric): """Computes the negative log-likelihood loss. @@ -1515,7 +1370,7 @@ class NegativeLogLikelihood(EvalMetric): -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([0, 1, 1])] - >>> nll_loss = mx.gluon.metric.NegativeLogLikelihood() + >>> nll_loss = mx.metric.NegativeLogLikelihood() >>> nll_loss.update(labels, predicts) >>> print nll_loss.get() ('nll-loss', 0.57159948348999023) @@ -1524,7 +1379,8 @@ def __init__(self, eps=1e-12, name='nll-loss', output_names=None, label_names=None): super(NegativeLogLikelihood, self).__init__( name, eps=eps, - output_names=output_names, label_names=label_names) + output_names=output_names, label_names=label_names, + has_global_stats=True) self.eps = eps def update(self, labels, preds): @@ -1541,21 +1397,21 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.as_np_ndarray() - pred = pred.as_np_ndarray().as_in_ctx(label.ctx) + label = label.asnumpy() + pred = pred.asnumpy() - label = label.reshape(-1) + label = label.ravel() num_examples = pred.shape[0] assert label.shape[0] == num_examples, (label.shape[0], num_examples) prob = pred[numpy.arange(num_examples, dtype=numpy.int64), numpy.int64(label)] nll = (-numpy.log(prob + self.eps)).sum() self.sum_metric += nll + self.global_sum_metric += nll self.num_inst += num_examples - + self.global_num_inst += num_examples @register @alias('pearsonr') -@use_np class PearsonCorrelation(EvalMetric): """Computes Pearson correlation. @@ -1574,23 +1430,30 @@ class PearsonCorrelation(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. + average : str, default 'macro' + Strategy to be used for aggregating across mini-batches. + "macro": average the pearsonr scores for each batch. + "micro": compute a single pearsonr score across all batches. Examples -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([[1, 0], [0, 1], [0, 1]])] - >>> pr = mx.gluon.metric.PearsonCorrelation() + >>> pr = mx.metric.PearsonCorrelation() >>> pr.update(labels, predicts) >>> print pr.get() ('pearsonr', 0.42163704544016178) """ def __init__(self, name='pearsonr', - output_names=None, label_names=None): + output_names=None, label_names=None, average='macro'): + self.average = average super(PearsonCorrelation, self).__init__( - name, output_names=output_names, label_names=label_names) - self.reset() + name, output_names=output_names, label_names=label_names, + has_global_stats=True) + if self.average == 'micro': + self.reset_micro() - def reset(self): + def reset_micro(self): self._sse_p = 0 self._mean_p = 0 self._sse_l = 0 @@ -1599,8 +1462,13 @@ def reset(self): self._label_nums = 0 self._conv = 0 + def reset(self): self.num_inst = 0 self.sum_metric = 0.0 + self.global_num_inst = 0 + self.global_sum_metric = 0.0 + if self.average == 'micro': + self.reset_micro() def update_variance(self, new_values, *aggregate): #Welford's online algorithm for variance update @@ -1628,26 +1496,34 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): check_label_shapes(label, pred, False, True) - label = label.as_np_ndarray().reshape(-1).astype(numpy.float64) - pred = pred.as_np_ndarray().as_in_ctx(label.ctx).reshape(-1).astype(numpy.float64) - - self.num_inst += 1 - self._label_nums, self._mean_l, self._sse_l = \ - self.update_variance(label, self._label_nums, self._mean_l, self._sse_l) - self.update_cov(label, pred) - self._pred_nums, self._mean_p, self._sse_p = \ - self.update_variance(pred, self._pred_nums, self._mean_p, self._sse_p) + label = label.asnumpy().ravel().astype(numpy.float64) + pred = pred.asnumpy().ravel().astype(numpy.float64) + if self.average == 'macro': + pearson_corr = numpy.corrcoef(pred, label)[0, 1] + self.sum_metric += pearson_corr + self.global_sum_metric += pearson_corr + self.num_inst += 1 + self.global_num_inst += 1 + else: + self.global_num_inst += 1 + self.num_inst += 1 + self._label_nums, self._mean_l, self._sse_l = \ + self.update_variance(label, self._label_nums, self._mean_l, self._sse_l) + self.update_cov(label, pred) + self._pred_nums, self._mean_p, self._sse_p = \ + self.update_variance(pred, self._pred_nums, self._mean_p, self._sse_p) def get(self): if self.num_inst == 0: return (self.name, float('nan')) - - n = self._label_nums - pearsonr = self._conv / ((n-1) * numpy.sqrt(self._sse_p / (n - 1)) * numpy.sqrt(self._sse_l / (n - 1))) - return (self.name, float(pearsonr)) + if self.average == 'macro': + return (self.name, self.sum_metric / self.num_inst) + else: + n = self._label_nums + pearsonr = self._conv / ((n-1) * numpy.sqrt(self._sse_p / (n - 1)) * numpy.sqrt(self._sse_l / (n - 1))) + return (self.name, pearsonr) @register -@use_np class PCC(EvalMetric): """PCC is a multiclass equivalent for the Matthews correlation coefficient derived from a discrete solution to the Pearson correlation coefficient. @@ -1691,9 +1567,9 @@ class PCC(EvalMetric): [0]*(false_positives + true_negatives) + [1]*(false_negatives + true_positives) )] - >>> f1 = mx.gluon.metric.F1() + >>> f1 = mx.metric.F1() >>> f1.update(preds = predicts, labels = labels) - >>> pcc = mx.gluon.metric.PCC() + >>> pcc = mx.metric.PCC() >>> pcc.update(preds = predicts, labels = labels) >>> print f1.get() ('f1', 0.95233560306652054) @@ -1701,14 +1577,18 @@ class PCC(EvalMetric): ('pcc', 0.01917751877733392) """ def __init__(self, name='pcc', - output_names=None, label_names=None): + output_names=None, label_names=None, + has_global_stats=True): self.k = 2 super(PCC, self).__init__( - name=name, output_names=output_names, label_names=label_names) + name=name, output_names=output_names, label_names=label_names, + has_global_stats=has_global_stats) def _grow(self, inc): self.lcm = numpy.pad( self.lcm, ((0, inc), (0, inc)), 'constant', constant_values=(0)) + self.gcm = numpy.pad( + self.gcm, ((0, inc), (0, inc)), 'constant', constant_values=(0)) self.k += inc def _calc_mcc(self, cmat): @@ -1719,8 +1599,7 @@ def _calc_mcc(self, cmat): cov_yy = numpy.sum(y * (n - y)) if cov_xx == 0 or cov_yy == 0: return float('nan') - # i = cmat.diagonal() # mxnet.numpy.ndarray.diagonal() is currently not available. - i = cmat[numpy.arange(self.k), numpy.arange(self.k)] + i = cmat.diagonal() cov_xy = numpy.sum(i * n - x * y) return cov_xy / (cov_xx * cov_yy) ** 0.5 @@ -1739,29 +1618,42 @@ def update(self, labels, preds): # update the confusion matrix for label, pred in zip(labels, preds): - label = label.astype('int32', copy=False).as_np_ndarray() - pred = pred.as_np_ndarray().as_in_ctx(label.ctx) + label = label.astype('int32', copy=False).asnumpy() + pred = pred.asnumpy() if pred.shape != label.shape: - pred = pred.argmax(axis=1).astype(label, copy=False) + pred = pred.argmax(axis=1) else: pred = pred.astype('int32', copy=False) - n = int(max(pred.max(), label.max())) + n = max(pred.max(), label.max()) if n >= self.k: self._grow(n + 1 - self.k) - bcm = numpy.zeros((self.k, self.k), dtype='float64') + bcm = numpy.zeros((self.k, self.k)) for i, j in zip(pred, label): bcm[i, j] += 1 self.lcm += bcm + self.gcm += bcm + self.num_inst += 1 + self.global_num_inst += 1 @property def sum_metric(self): return self._calc_mcc(self.lcm) * self.num_inst + @property + def global_sum_metric(self): + return self._calc_mcc(self.gcm) * self.global_num_inst + def reset(self): """Resets the internal evaluation result to initial state.""" + self.global_num_inst = 0. + self.gcm = numpy.zeros((self.k, self.k)) + self.reset_local() + + def reset_local(self): + """Resets the local portion of the internal evaluation results to initial state.""" self.num_inst = 0. - self.lcm = numpy.zeros((self.k, self.k), dtype='float64') + self.lcm = numpy.zeros((self.k, self.k)) @register @@ -1782,7 +1674,8 @@ class Loss(EvalMetric): def __init__(self, name='loss', output_names=None, label_names=None): super(Loss, self).__init__( - name, output_names=output_names, label_names=label_names) + name, output_names=output_names, label_names=label_names, + has_global_stats=True) def update(self, _, preds): @@ -1792,7 +1685,9 @@ def update(self, _, preds): for pred in preds: loss = ndarray.sum(pred).asscalar() self.sum_metric += loss + self.global_sum_metric += loss self.num_inst += pred.size + self.global_num_inst += pred.size @register @@ -1814,7 +1709,6 @@ def __init__(self, name='caffe', @register -@use_np class CustomMetric(EvalMetric): """Computes a customized evaluation metric. @@ -1845,7 +1739,7 @@ class CustomMetric(EvalMetric): >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] >>> feval = lambda x, y : (x + y).mean() - >>> eval_metrics = mx.gluon.metric.CustomMetric(feval=feval) + >>> eval_metrics = mx.metric.CustomMetric(feval=feval) >>> eval_metrics.update(labels, predicts) >>> print eval_metrics.get() ('custom()', 6.0) @@ -1859,7 +1753,8 @@ def __init__(self, feval, name=None, allow_extra_outputs=False, super(CustomMetric, self).__init__( name, feval=feval, allow_extra_outputs=allow_extra_outputs, - output_names=output_names, label_names=label_names) + output_names=output_names, label_names=label_names, + has_global_stats=True) self._feval = feval self._allow_extra_outputs = allow_extra_outputs @@ -1878,17 +1773,21 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for pred, label in zip(preds, labels): - label = label.as_np_ndarray() - pred = pred.as_np_ndarray().as_in_ctx(label.ctx) + label = label.asnumpy() + pred = pred.asnumpy() reval = self._feval(label, pred) if isinstance(reval, tuple): (sum_metric, num_inst) = reval self.sum_metric += sum_metric + self.global_sum_metric += sum_metric self.num_inst += num_inst + self.global_num_inst += num_inst else: self.sum_metric += reval + self.global_sum_metric += reval self.num_inst += 1 + self.global_num_inst += 1 def get_config(self): raise NotImplementedError("CustomMetric cannot be serialized") @@ -1920,7 +1819,7 @@ def np(numpy_feval, name=None, allow_extra_outputs=False): >>> def custom_metric(label, pred): ... return np.mean(np.abs(label-pred)) ... - >>> metric = mx.gluon.metric.np(custom_metric) + >>> metric = mx.metric.np(custom_metric) """ def feval(label, pred): """Internal eval function.""" diff --git a/python/mxnet/model.py b/python/mxnet/model.py index bd80ec01738b..fa247624975d 100644 --- a/python/mxnet/model.py +++ b/python/mxnet/model.py @@ -30,7 +30,7 @@ from . import ndarray as nd from . import symbol as sym from . import optimizer as opt -from .gluon import metric +from . import metric from . import kvstore as kvs from .context import Context, cpu from .initializer import Uniform diff --git a/python/mxnet/module/base_module.py b/python/mxnet/module/base_module.py index 92fb7f188bfb..053a00b3abba 100644 --- a/python/mxnet/module/base_module.py +++ b/python/mxnet/module/base_module.py @@ -24,7 +24,7 @@ import warnings import numpy as np -from ..gluon import metric +from .. import metric from .. import ndarray from ..context import cpu @@ -231,7 +231,7 @@ def score(self, eval_data, eval_metric, num_batch=None, batch_end_callback=None, -------- >>> # An example of using score for prediction. >>> # Evaluate accuracy on val_dataiter - >>> metric = mx.gluon.metric.Accuracy() + >>> metric = mx.metric.Accuracy() >>> mod.score(val_dataiter, metric) >>> mod.score(val_dataiter, ['mse', 'acc']) """ @@ -543,7 +543,7 @@ def fit(self, train_data, eval_data=None, eval_metric='acc', monitor.toc_print() if end_of_batch: - eval_name_vals = eval_metric.get_name_value() + eval_name_vals = eval_metric.get_global_name_value() if batch_end_callback is not None: batch_end_params = BatchEndParam(epoch=epoch, nbatch=nbatch, diff --git a/tests/nightly/estimator/test_estimator_cnn.py b/tests/nightly/estimator/test_estimator_cnn.py index 466c01019575..0d113cdf4984 100644 --- a/tests/nightly/estimator/test_estimator_cnn.py +++ b/tests/nightly/estimator/test_estimator_cnn.py @@ -116,7 +116,7 @@ def test_estimator_cpu(): # Define estimator est = estimator.Estimator(net=net, loss=loss, - train_metrics=mx.gluon.metric.Accuracy(), + train_metrics=mx.metric.Accuracy(), trainer=trainer, context=context) # Call fit() @@ -140,7 +140,7 @@ def test_estimator_gpu(): train_data, test_data = load_data_mnist(batch_size, resize=224) loss = gluon.loss.SoftmaxCrossEntropyLoss() net.hybridize() - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) # Define estimator est = estimator.Estimator(net=net, diff --git a/tests/nightly/estimator/test_sentiment_rnn.py b/tests/nightly/estimator/test_sentiment_rnn.py index 7d3561db3789..367c69b88a0b 100644 --- a/tests/nightly/estimator/test_sentiment_rnn.py +++ b/tests/nightly/estimator/test_sentiment_rnn.py @@ -190,11 +190,11 @@ def run(net, train_dataloader, test_dataloader, num_epochs, ctx, lr): trainer = mx.gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr}) # Define loss and evaluation metrics loss = gluon.loss.SoftmaxCrossEntropyLoss() - metrics = mx.gluon.metric.CompositeEvalMetric() - acc = mx.gluon.metric.Accuracy() - nested_metrics = mx.gluon.metric.CompositeEvalMetric() - metrics.add([acc, mx.gluon.metric.Loss()]) - nested_metrics.add([metrics, mx.gluon.metric.Accuracy()]) + metrics = mx.metric.CompositeEvalMetric() + acc = mx.metric.Accuracy() + nested_metrics = mx.metric.CompositeEvalMetric() + metrics.add([acc, mx.metric.Loss()]) + nested_metrics.add([metrics, mx.metric.Accuracy()]) # Define estimator est = estimator.Estimator(net=net, loss=loss, train_metrics=nested_metrics, diff --git a/tests/nightly/test_optimizer.py b/tests/nightly/test_optimizer.py index 9c2fcb8a62cf..0a87368d991e 100644 --- a/tests/nightly/test_optimizer.py +++ b/tests/nightly/test_optimizer.py @@ -83,7 +83,7 @@ def test_lars(): num_epoch=num_epochs) # predict accuracy for lenet - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() lenet_model.score(test_iter, acc) accuracy = acc.get()[1] assert accuracy > 0.98, "LeNet-5 training accuracy on MNIST was too low" diff --git a/tests/nightly/test_tlocal_racecondition.py b/tests/nightly/test_tlocal_racecondition.py index 986e1f464bfb..d43c45937c05 100644 --- a/tests/nightly/test_tlocal_racecondition.py +++ b/tests/nightly/test_tlocal_racecondition.py @@ -91,7 +91,7 @@ def infer_type(self, in_type): def create_operator(self, ctx, shapes, dtypes): return MyCustom() -class MyMetric(mx.gluon.metric.EvalMetric): +class MyMetric(mx.metric.EvalMetric): def __init__(self): super(MyMetric, self).__init__("MyMetric") self.name = ['empty'] diff --git a/tests/python/gpu/test_contrib_amp.py b/tests/python/gpu/test_contrib_amp.py index 14a737b44b86..89775c691650 100644 --- a/tests/python/gpu/test_contrib_amp.py +++ b/tests/python/gpu/test_contrib_amp.py @@ -333,7 +333,7 @@ def check_amp_convert_bucketing_module(): data_val = mx.rnn.BucketSentenceIter(val_sent, batch_size, buckets=buckets, invalid_label=invalid_label) result_model.bind(data_val.provide_data, data_val.provide_label, for_training=False) - result_model.score(data_val, mx.gluon.metric.Perplexity(invalid_label), + result_model.score(data_val, mx.metric.Perplexity(invalid_label), batch_end_callback=mx.callback.Speedometer(batch_size, 1)) # AMP conversion with cast_optional_params set to true @@ -341,7 +341,7 @@ def check_amp_convert_bucketing_module(): ''' result_model = amp.convert_bucketing_module(model, cast_optional_params=True) result_model.bind(data_val.provide_data, data_val.provide_label, for_training=False) - result_model.score(data_val, mx.gluon.metric.Perplexity(invalid_label), + result_model.score(data_val, mx.metric.Perplexity(invalid_label), batch_end_callback=mx.callback.Speedometer(batch_size, 1)) ''' diff --git a/tests/python/tensorrt/lenet5_train.py b/tests/python/tensorrt/lenet5_train.py index 5603180e1347..441729fe0d56 100644 --- a/tests/python/tensorrt/lenet5_train.py +++ b/tests/python/tensorrt/lenet5_train.py @@ -75,7 +75,7 @@ def train_lenet5(num_epochs, batch_size, train_iter, val_iter, test_iter): num_epoch=num_epochs) # predict accuracy for lenet - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() lenet_model.score(test_iter, acc) accuracy = acc.get()[1] assert accuracy > 0.95, "LeNet-5 training accuracy on MNIST was too low" diff --git a/tests/python/tensorrt/test_cvnets.py b/tests/python/tensorrt/test_cvnets.py index cd090c5e2f5c..99312d76dc7a 100644 --- a/tests/python/tensorrt/test_cvnets.py +++ b/tests/python/tensorrt/test_cvnets.py @@ -16,6 +16,7 @@ # under the License. import gc +import gluoncv import mxnet as mx import numpy as np @@ -28,12 +29,7 @@ def get_classif_model(model_name, use_tensorrt, ctx=mx.gpu(0), batch_size=128): mx.contrib.tensorrt.set_use_fp16(False) h, w = 32, 32 - model_url = "https://raw.githubusercontent.com/dmlc/web-data/221ce5b7c6d5b0777a1e3471f7f03ff98da90a0a/gluoncv/models" - param_file = "{}-0000.params".format(model_name) - symbol_file = "{}-symbol.json".format(model_name) - mx.test_utils.download("{}/{}".format(model_url, param_file), fname=param_file, overwrite=True) - mx.test_utils.download("{}/{}".format(model_url, symbol_file), fname=symbol_file, overwrite=True) - net = gluon.SymbolBlock.imports(symbol_file, ['data'], param_file) + net = gluoncv.model_zoo.get_model(model_name, pretrained=True) net.hybridize() net.forward(mx.nd.zeros((batch_size, 3, h, w))) net.export(model_name) @@ -134,7 +130,10 @@ def test_tensorrt_on_cifar_resnets(batch_size=32, tolerance=0.1, num_workers=1): 'cifar_resnet20_v2', 'cifar_resnet56_v2', 'cifar_resnet110_v2', - 'cifar_wideresnet16_10' + 'cifar_wideresnet16_10', + 'cifar_wideresnet28_10', + 'cifar_wideresnet40_8', + 'cifar_resnext29_16x64d' ] num_models = len(models) diff --git a/tests/python/train/test_autograd.py b/tests/python/train/test_autograd.py index f0fdc5ea2576..02a3601eb362 100644 --- a/tests/python/train/test_autograd.py +++ b/tests/python/train/test_autograd.py @@ -55,7 +55,7 @@ def get_net(): batch_size=batch_size, shuffle=True, flat=True, silent=False) def score(net, ctx_list): - metric = gluon.metric.Accuracy() + metric = mx.metric.Accuracy() val_data.reset() for batch in val_data: datas = gluon.utils.split_and_load(batch.data[0], ctx_list, batch_axis=0) @@ -69,7 +69,7 @@ def score(net, ctx_list): def train(net, epoch, ctx_list): net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx_list) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5}) - metric = gluon.metric.Accuracy() + metric = mx.metric.Accuracy() loss = gluon.loss.SoftmaxCrossEntropyLoss() for i in range(epoch): diff --git a/tests/python/train/test_bucketing.py b/tests/python/train/test_bucketing.py index f4b8f417a2cc..a233e46e0992 100644 --- a/tests/python/train/test_bucketing.py +++ b/tests/python/train/test_bucketing.py @@ -98,7 +98,7 @@ def sym_gen(seq_len): model.fit( train_data=data_train, eval_data=data_val, - eval_metric=mx.gluon.metric.Perplexity(invalid_label), # Use Perplexity for multiclass classification. + eval_metric=mx.metric.Perplexity(invalid_label), # Use Perplexity for multiclass classification. kvstore='device', optimizer='sgd', optimizer_params={'learning_rate': 0.01, @@ -114,7 +114,7 @@ def sym_gen(seq_len): def test_bucket_module(): # This test forecasts random sequence of words to check bucketing. # We cannot guarantee the accuracy of such an impossible task, and comments out the following line. - # assert model.score(data_val, mx.gluon.metric.MSE())[0][1] < 350, "High mean square error." + # assert model.score(data_val, mx.metric.MSE())[0][1] < 350, "High mean square error." model = train_model() diff --git a/tests/python/train/test_mlp.py b/tests/python/train/test_mlp.py index 24947cc9c476..80885b33f955 100644 --- a/tests/python/train/test_mlp.py +++ b/tests/python/train/test_mlp.py @@ -37,9 +37,8 @@ def test_mlp(tmpdir): def accuracy(label, pred): py = np.argmax(pred, axis=1) - return np.sum(py == label.astype(py)) / float(label.size) - # currently mxnet.numpy (which used in gluon.metric) did not support "==" between different types - + return np.sum(py == label) / float(label.size) + num_epoch = 4 prefix = './mlp' @@ -66,7 +65,7 @@ def accuracy(label, pred): softmax, X=train_dataiter, eval_data=val_dataiter, - eval_metric=mx.gluon.metric.np(accuracy), + eval_metric=mx.metric.np(accuracy), epoch_end_callback=mx.callback.do_checkpoint(prefix), ctx=[mx.cpu(i) for i in range(2)], num_epoch=num_epoch, diff --git a/tests/python/train/test_sparse_fm.py b/tests/python/train/test_sparse_fm.py index 0d52ab555b56..76a2705fe4e5 100644 --- a/tests/python/train/test_sparse_fm.py +++ b/tests/python/train/test_sparse_fm.py @@ -108,7 +108,7 @@ def fm(factor_size, feature_dim, init): else: raise AssertionError("Unsupported optimizer type '" + optimizer + "' specified") # use accuracy as the metric - metric = mx.gluon.metric.create('MSE') + metric = mx.metric.create('MSE') # train 'num_epochs' epoch for epoch in range(num_epochs): train_iter.reset() diff --git a/tests/python/unittest/test_contrib_svrg_module.py b/tests/python/unittest/test_contrib_svrg_module.py index 8c25742bd74c..e9509f743f73 100644 --- a/tests/python/unittest/test_contrib_svrg_module.py +++ b/tests/python/unittest/test_contrib_svrg_module.py @@ -240,7 +240,7 @@ def create_module_with_sgd(): num_epoch = 10 # Use metric MSE - metrics = mx.gluon.metric.create("mse") + metrics = mx.metric.create("mse") # Train with SVRGModule for e in range(num_epoch): @@ -297,7 +297,7 @@ def test_accumulate_kvstore(): def test_fit(): di, mod = setup() num_epoch = 100 - metric = mx.gluon.metric.create("mse") + metric = mx.metric.create("mse") mod.fit(di, eval_metric=metric, optimizer='sgd', optimizer_params=(('learning_rate', 0.025),), num_epoch=num_epoch, kvstore='local') diff --git a/tests/python/unittest/test_gluon_batch_processor.py b/tests/python/unittest/test_gluon_batch_processor.py index bff80813bb12..952ed1c4a0da 100644 --- a/tests/python/unittest/test_gluon_batch_processor.py +++ b/tests/python/unittest/test_gluon_batch_processor.py @@ -52,7 +52,7 @@ def test_batch_processor_fit(): num_epochs = 1 ctx = mx.cpu() loss = gluon.loss.L2Loss() - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() net.initialize(ctx=ctx) processor = BatchProcessor() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) @@ -83,7 +83,7 @@ def test_batch_processor_validation(): num_epochs = 1 ctx = mx.cpu() loss = gluon.loss.L2Loss() - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() val_loss = gluon.loss.L1Loss() net.initialize(ctx=ctx) processor = BatchProcessor() diff --git a/tests/python/unittest/test_gluon_estimator.py b/tests/python/unittest/test_gluon_estimator.py index 360d25544f7e..e33aa74b3ca7 100644 --- a/tests/python/unittest/test_gluon_estimator.py +++ b/tests/python/unittest/test_gluon_estimator.py @@ -58,7 +58,7 @@ def test_fit(): num_epochs = 1 ctx = mx.cpu() loss = gluon.loss.L2Loss() - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) est = Estimator(net=net, @@ -87,7 +87,7 @@ def test_validation(): num_epochs = 1 ctx = mx.cpu() loss = gluon.loss.L2Loss() - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() val_loss = gluon.loss.L1Loss() net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) @@ -126,7 +126,7 @@ def test_initializer(): ctx = mx.cpu() loss = gluon.loss.L2Loss() - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() # no initializer est = Estimator(net=net, loss=loss, @@ -166,7 +166,7 @@ def test_trainer(): ctx = mx.cpu() loss = gluon.loss.L2Loss() - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() net.initialize(ctx=ctx) # input no trainer with warnings.catch_warnings(record=True) as w: @@ -206,7 +206,7 @@ def test_metric(): est.fit(train_data=train_data, epochs=num_epochs) # input list of metrics - metrics = [mx.gluon.metric.Accuracy(), mx.gluon.metric.Accuracy()] + metrics = [mx.metric.Accuracy(), mx.metric.Accuracy()] est = Estimator(net=net, loss=loss, train_metrics=metrics, @@ -227,14 +227,14 @@ def test_metric(): loss=loss, trainer=trainer, context=ctx) - assert isinstance(est.train_metrics[0], mx.gluon.metric.Accuracy) + assert isinstance(est.train_metrics[0], mx.metric.Accuracy) def test_loss(): ''' test with invalid loss ''' net = _get_test_network() ctx = mx.cpu() - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) # input invalid loss @@ -250,7 +250,7 @@ def test_context(): ''' test with no context, list of context, invalid context ''' net = _get_test_network() loss = gluon.loss.L2Loss() - metrics = mx.gluon.metric.Accuracy() + metrics = mx.metric.Accuracy() # input no context est = Estimator(net=net, loss=loss, @@ -332,7 +332,7 @@ def test_default_handlers(): net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) - train_acc = mx.gluon.metric.RMSE() + train_acc = mx.metric.RMSE() loss = gluon.loss.L2Loss() est = Estimator(net=net, @@ -359,7 +359,7 @@ def test_default_handlers(): # handler with mixed metrics, some handler use metrics prepared by estimator # some handler use metrics user prepared - logging = LoggingHandler(metrics=[mx.gluon.metric.RMSE("val acc")]) + logging = LoggingHandler(metrics=[mx.metric.RMSE("val acc")]) with pytest.raises(ValueError): est.fit(train_data=train_data, epochs=num_epochs, event_handlers=[logging]) @@ -383,7 +383,7 @@ def test_val_net(): ctx = mx.cpu() loss = gluon.loss.L2Loss() val_loss = gluon.loss.L2Loss() - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) est = Estimator(net=net, @@ -448,7 +448,7 @@ def test_val_handlers(): net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) - train_acc = mx.gluon.metric.RMSE() + train_acc = mx.metric.RMSE() loss = gluon.loss.L2Loss() est = Estimator(net=net, diff --git a/tests/python/unittest/test_gluon_event_handler.py b/tests/python/unittest/test_gluon_event_handler.py index 4cadc9466ed1..a07282cd46dd 100644 --- a/tests/python/unittest/test_gluon_event_handler.py +++ b/tests/python/unittest/test_gluon_event_handler.py @@ -84,7 +84,7 @@ def test_checkpoint_handler(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) checkpoint_handler = event_handler.CheckpointHandler(model_dir=tmpdir, model_prefix=model_prefix, @@ -130,7 +130,7 @@ def test_resume_checkpoint(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) checkpoint_handler = event_handler.CheckpointHandler(model_dir=tmpdir, model_prefix=model_prefix, @@ -155,7 +155,7 @@ def test_early_stopping(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) early_stopping = event_handler.EarlyStoppingHandler(monitor=acc, patience=0, @@ -179,7 +179,7 @@ def test_logging(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) est.logger.addHandler(logging.FileHandler(output_dir)) @@ -226,7 +226,7 @@ def epoch_end(self, estimator, *args, **kwargs): test_data = _get_test_data() net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) custom_handler = CustomStopHandler(3, 2) est.fit(test_data, event_handlers=[custom_handler], epochs=3) @@ -249,7 +249,7 @@ def test_logging_interval(): dataloader = _get_test_data(in_size=data_size) num_epochs = 1 ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() logging = LoggingHandler(metrics=[acc], log_interval=log_interval) est = estimator.Estimator(net=net, loss=ce_loss, @@ -273,7 +273,7 @@ def test_logging_interval(): ''' test case #2: log interval is 5 ''' old_stdout = sys.stdout sys.stdout = mystdout = StringIO() - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() log_interval = 5 logging = LoggingHandler(metrics=[acc], log_interval=log_interval) est = estimator.Estimator(net=net, @@ -299,7 +299,7 @@ def test_validation_handler_batch_axis(): test_data = _get_test_data() net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) est.fit(test_data, epochs=3) @@ -315,7 +315,7 @@ def test_validation_handler(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.gluon.metric.Accuracy() + acc = mx.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) val_handler = ValidationHandler(val_data=test_data, eval_fn=est.evaluate, diff --git a/tests/python/unittest/test_loss.py b/tests/python/unittest/test_loss.py index e22aa6b2061d..1f54066a0d90 100644 --- a/tests/python/unittest/test_loss.py +++ b/tests/python/unittest/test_loss.py @@ -80,9 +80,9 @@ def test_ce_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - eval_metric=mx.gluon.metric.Loss(), optimizer='adam', + eval_metric=mx.metric.Loss(), optimizer='adam', initializer=mx.init.Xavier(magnitude=2)) - assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 # tracked at: https://github.com/apache/incubator-mxnet/issues/11691 @with_seed() @@ -98,9 +98,9 @@ def test_bce_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - eval_metric=mx.gluon.metric.Loss(), optimizer='adam', + eval_metric=mx.metric.Loss(), optimizer='adam', initializer=mx.init.Xavier(magnitude=2)) - assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.01 + assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.01 # Test against npy data = mx.random.uniform(-5, 5, shape=(10,)) label = mx.random.uniform(0, 1, shape=(10,)) @@ -143,8 +143,8 @@ def test_kl_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - eval_metric=mx.gluon.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 + eval_metric=mx.metric.Loss(), optimizer='adam') + assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 @with_seed() @@ -160,9 +160,9 @@ def test_l2_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 @with_seed() @@ -178,9 +178,9 @@ def test_l1_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.1 + assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.1 @with_seed() @@ -223,9 +223,9 @@ def test_ctc_loss_train(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 10 + assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 10 @with_seed() @@ -244,12 +244,12 @@ def test_sample_weight_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label', 'w')) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - eval_metric=mx.gluon.metric.Loss(), optimizer='adam') + eval_metric=mx.metric.Loss(), optimizer='adam') data_iter = mx.io.NDArrayIter(data[10:], {'label': label, 'w': weight}, batch_size=10) - score = mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] + score = mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] assert score > 1 data_iter = mx.io.NDArrayIter(data[:10], {'label': label, 'w': weight}, batch_size=10) - score = mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] + score = mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] assert score < 0.05 @@ -267,13 +267,13 @@ def test_saveload(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=100, optimizer_params={'learning_rate': 1.}, - eval_metric=mx.gluon.metric.Loss()) + eval_metric=mx.metric.Loss()) mod.save_checkpoint('test', 100, save_optimizer_states=True) mod = mx.mod.Module.load('test', 100, load_optimizer_states=True, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=100, optimizer_params={'learning_rate': 1.}, - eval_metric=mx.gluon.metric.Loss()) - assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 + eval_metric=mx.metric.Loss()) + assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 @with_seed() def test_huber_loss(): @@ -288,9 +288,9 @@ def test_huber_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 @with_seed() @@ -306,9 +306,9 @@ def test_hinge_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.06 + assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.06 @with_seed() @@ -324,9 +324,9 @@ def test_squared_hinge_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 @with_seed() @@ -345,9 +345,9 @@ def test_triplet_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('pos','neg')) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 @xfail_when_nonstandard_decimal_separator @with_seed() @@ -456,9 +456,9 @@ def test_poisson_nllloss_mod(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=20, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Normal(sigma=0.1), eval_metric=mx.gluon.metric.Loss(), + initializer=mx.init.Normal(sigma=0.1), eval_metric=mx.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 @with_seed() def test_bce_loss_with_pos_weight(): @@ -477,9 +477,9 @@ def test_bce_loss_with_pos_weight(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label', 'pos_w')) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - eval_metric=mx.gluon.metric.Loss(), optimizer='adam', + eval_metric=mx.metric.Loss(), optimizer='adam', initializer=mx.init.Xavier(magnitude=2)) - assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.01 + assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.01 # Test against npy data = mx.nd.random.uniform(-5, 5, shape=(N, 5)) label = mx.nd.array(np.random.randint(2, size=(N, 5)), dtype='float32') diff --git a/tests/python/unittest/test_metric.py b/tests/python/unittest/test_metric.py index d72e33f0a2af..4dd2b48ff2fb 100644 --- a/tests/python/unittest/test_metric.py +++ b/tests/python/unittest/test_metric.py @@ -16,7 +16,6 @@ # under the License. import mxnet as mx -from mxnet.test_utils import use_np import numpy as np import scipy from scipy.stats import pearsonr @@ -26,9 +25,9 @@ from copy import deepcopy def check_metric(metric, *args, **kwargs): - metric = mx.gluon.metric.create(metric, *args, **kwargs) + metric = mx.metric.create(metric, *args, **kwargs) str_metric = json.dumps(metric.get_config()) - metric2 = mx.gluon.metric.create(str_metric) + metric2 = mx.metric.create(str_metric) assert metric.get_config() == metric2.get_config() @@ -36,16 +35,93 @@ def test_metrics(): check_metric('acc', axis=0) check_metric('f1') check_metric('mcc') - check_metric('perplexity', axis=-1) + check_metric('perplexity', -1) check_metric('pearsonr') check_metric('pcc') check_metric('nll_loss') check_metric('loss') - composite = mx.gluon.metric.create(['acc', 'f1']) + composite = mx.metric.create(['acc', 'f1']) check_metric(composite) +def _check_global_metric(metric, *args, **kwargs): + def _create_pred_label(): + if use_same_shape: + pred = mx.nd.random.uniform(0, 1, shape=shape) + label = mx.nd.random.uniform(0, 1, shape=shape) + else: + # Make a random prediction + idx = np.random.rand(*shape).argsort(1) + pred = mx.nd.array(1 - 0.1 * idx) + # Label is half 1 and half 0 + # Setting all 0s or all 1s would make either + # MCC or F1 metrics always produce 0 + label = mx.nd.ones(shape[0]) + label[:shape[0] // 2] = 0 + return pred, label + + def _compare_metric_result(m1, m2): + # Compare names + assert m1[0] == m2[0] + # Compare values + if isinstance(m1[1], (list, tuple)): + assert len(m1[1]) == len(m2[1]) + for r1, r2 in zip(m1[1], m2[1]): + assert r1 == r2 or \ + (math.isnan(r1) and + math.isnan(r2)) + else: + assert m1[1] == m2[1] or \ + (math.isnan(m1[1]) and + math.isnan(m2[1])) + + shape = kwargs.pop('shape', (10,10)) + use_same_shape = kwargs.pop('use_same_shape', False) + m1 = mx.metric.create(metric, *args, **kwargs) + m2 = deepcopy(m1) + # check that global stats are not reset when calling + # reset_local() + for i in range(10): + pred, label = _create_pred_label() + m1.update([label], [pred]) + m1.reset_local() + m2.update([label], [pred]) + assert m1.get_global() == m2.get() + + # check that reset_local() properly resets the local state + m1.reset_local() + m2.reset() + pred, label = _create_pred_label() + m1.update([label], [pred]) + m1.reset_local() + pred, label = _create_pred_label() + m1.update([label], [pred]) + m2.update([label], [pred]) + _compare_metric_result(m1.get(), m2.get()) + +@with_seed() +def test_global_metric(): + _check_global_metric('acc') + _check_global_metric('TopKAccuracy', top_k=3) + _check_global_metric('f1', shape=(10,2)) + _check_global_metric('f1', shape=(10,2), average='micro') + _check_global_metric('mcc', shape=(10,2)) + _check_global_metric('mcc', shape=(10,2), average='micro') + _check_global_metric('perplexity', -1) + _check_global_metric('pearsonr', use_same_shape=True) + _check_global_metric('pcc', shape=(10,2)) + _check_global_metric('nll_loss') + _check_global_metric('loss') + _check_global_metric('ce') + _check_global_metric('mae', use_same_shape=True) + _check_global_metric('mse', use_same_shape=True) + _check_global_metric('rmse', use_same_shape=True) + def custom_metric(label, pred): + return np.mean(np.abs(label-pred)) + _check_global_metric(custom_metric, use_same_shape=True) + _check_global_metric(['acc', 'f1'], shape=(10,2)) + def test_nll_loss(): - metric = mx.gluon.metric.create('nll_loss') + metric = mx.metric.create('nll_loss') pred = mx.nd.array([[0.2, 0.3, 0.5], [0.6, 0.1, 0.3]]) label = mx.nd.array([2, 1]) metric.update([label], [pred]) @@ -56,36 +132,36 @@ def test_nll_loss(): def test_acc(): pred = mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]]) label = mx.nd.array([0, 1, 1]) - metric = mx.gluon.metric.create('acc') + metric = mx.metric.create('acc') metric.update([label], [pred]) _, acc = metric.get() expected_acc = (np.argmax(pred, axis=1) == label).sum().asscalar() / label.size - np.testing.assert_almost_equal(acc, expected_acc) + assert acc == expected_acc def test_acc_2d_label(): # label maybe provided in 2d arrays in custom data iterator pred = mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6], [0.8, 0.2], [0.3, 0.5], [0.6, 0.4]]) label = mx.nd.array([[0, 1, 1], [1, 0, 1]]) - metric = mx.gluon.metric.create('acc') + metric = mx.metric.create('acc') metric.update([label], [pred]) _, acc = metric.get() expected_acc = (np.argmax(pred, axis=1).asnumpy() == label.asnumpy().ravel()).sum() / \ float(label.asnumpy().ravel().size) - np.testing.assert_almost_equal(acc, expected_acc) + assert acc == expected_acc def test_loss_update(): pred = mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]]) - metric1 = mx.gluon.metric.create('loss') - metric2 = mx.gluon.metric.create('loss') + metric1 = mx.metric.create('loss') + metric2 = mx.metric.create('loss') metric1.update(None, [pred]) metric2.update(None, pred) _, acc1 = metric1.get() _, acc2 = metric2.get() assert acc1 == acc2 -def test_binary_f1(): - microF1 = mx.gluon.metric.create("f1", average="micro") - macroF1 = mx.gluon.metric.F1(average="macro") +def test_f1(): + microF1 = mx.metric.create("f1", average="micro") + macroF1 = mx.metric.F1(average="macro") assert np.isnan(macroF1.get()[1]) assert np.isnan(microF1.get()[1]) @@ -115,7 +191,7 @@ def test_binary_f1(): microF1.update([label11, label12], [pred11, pred12]) macroF1.update([label11, label12], [pred11, pred12]) assert microF1.num_inst == 4 - assert macroF1.num_inst == 4 + assert macroF1.num_inst == 1 # f1 = 2 * tp / (2 * tp + fp + fn) fscore1 = 2. * (1) / (2 * 1 + 1 + 0) np.testing.assert_almost_equal(microF1.get()[1], fscore1) @@ -124,96 +200,29 @@ def test_binary_f1(): microF1.update([label21, label22], [pred21, pred22]) macroF1.update([label21, label22], [pred21, pred22]) assert microF1.num_inst == 6 - assert macroF1.num_inst == 6 + assert macroF1.num_inst == 2 fscore2 = 2. * (1) / (2 * 1 + 0 + 0) fscore_total = 2. * (1 + 1) / (2 * (1 + 1) + (1 + 0) + (0 + 0)) np.testing.assert_almost_equal(microF1.get()[1], fscore_total) - np.testing.assert_almost_equal(macroF1.get()[1], fscore_total) - -def test_multiclass_f1(): - microF1 = mx.gluon.metric.create("f1", class_type="multiclass", average="micro") - macroF1 = mx.gluon.metric.F1(class_type="multiclass", average="macro") - - assert np.isnan(macroF1.get()[1]) - assert np.isnan(microF1.get()[1]) + np.testing.assert_almost_equal(macroF1.get()[1], (fscore1 + fscore2) / 2.) - # check one class is zero - pred = mx.nd.array([[0.9, 0.1], - [0.8, 0.2]]) - label = mx.nd.array([0, 0]) - macroF1.update([label], [pred]) - microF1.update([label], [pred]) - assert macroF1.get()[1] == 0.5 # one class is 1.0, the other is 0. (divided by 0) - assert microF1.get()[1] == 1.0 # globally f1 is 1.0 - macroF1.reset() - microF1.reset() - - # test case from sklearn, here pred is probabilistic distributions instead of predicted labels - pred11 = mx.nd.array([[1, 0, 0], [0, 1, 0]]) - label11 = mx.nd.array([0, 2]) - pred12 = mx.nd.array([[0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]]) - label12 = mx.nd.array([1, 0, 0, 1]) - - microF1.update([label11, label12], [pred11, pred12]) - macroF1.update([label11, label12], [pred11, pred12]) - assert microF1.num_inst == 6 - assert macroF1.num_inst == 6 - - # from sklearn.metrics import f1_score - # overall_pred = [0, 1, 2, 0, 1, 2] - # overall_label = [0, 2, 1, 0, 0, 1] - fmacro = 0.26666666666666666 #f1_score(overall_label, overall_pred, average="macro") - fmicro = 0.3333333333333333 #f1_score(overall_label, overall_pred, average="micro") - np.testing.assert_almost_equal(microF1.get()[1], fmicro) - np.testing.assert_almost_equal(macroF1.get()[1], fmacro) - -def test_multilabel_f1(): - microF1 = mx.gluon.metric.create("f1", class_type="multilabel", average="micro") - macroF1 = mx.gluon.metric.F1(class_type="multilabel", average="macro") - - assert np.isnan(macroF1.get()[1]) - assert np.isnan(microF1.get()[1]) - - # check one class is zero - pred = mx.nd.array([[0.9, 0.1], - [0.8, 0.2]]) - label = mx.nd.array([[1, 1], [1, 1]]) - macroF1.update([label], [pred]) - microF1.update([label], [pred]) - assert macroF1.get()[1] == 0.5 # one class is 1.0, the other is 0. (divided by 0) - np.testing.assert_almost_equal(microF1.get()[1], 2.0 / 3) - macroF1.reset() - microF1.reset() - - pred11 = mx.nd.array([[0.9, 0.4, 0.3], [0.2, 0.7, 0.8]]) - label11 = mx.nd.array([[1, 0, 1], [0, 0, 1]]) - pred12 = mx.nd.array([[0.6, 0.6, 0.7]]) - label12 = mx.nd.array([[0, 1, 1]]) - - microF1.update([label11, label12], [pred11, pred12]) - macroF1.update([label11, label12], [pred11, pred12]) - assert microF1.num_inst == 3 - assert macroF1.num_inst == 3 - #from sklearn.metrics import f1_score - #overall_pred = [[1, 0, 0], [0, 1, 1], [1, 1, 1]] - #overall_label = [[1, 0, 1], [0, 0, 1], [0, 1, 1]] - fmacro = 0.7111111111111111 #f1_score(overall_label, overall_pred, average="macro") - fmicro = 0.7272727272727272 #f1_score(overall_label, overall_pred, average="micro") - np.testing.assert_almost_equal(microF1.get()[1], fmicro) - np.testing.assert_almost_equal(macroF1.get()[1], fmacro) - def test_mcc(): - microMCC = mx.gluon.metric.create("mcc") + microMCC = mx.metric.create("mcc", average="micro") + macroMCC = mx.metric.MCC(average="macro") assert np.isnan(microMCC.get()[1]) - + assert np.isnan(macroMCC.get()[1]) + # check divide by zero pred = mx.nd.array([[0.9, 0.1], [0.8, 0.2]]) label = mx.nd.array([0, 0]) microMCC.update([label], [pred]) + macroMCC.update([label], [pred]) assert microMCC.get()[1] == 0.0 + assert macroMCC.get()[1] == 0.0 microMCC.reset() + macroMCC.reset() pred11 = mx.nd.array([[0.1, 0.9], [0.5, 0.5]]) @@ -226,40 +235,51 @@ def test_mcc(): pred22 = mx.nd.array([[0.2, 0.8]]) label22 = mx.nd.array([1]) microMCC.update([label11, label12], [pred11, pred12]) + macroMCC.update([label11, label12], [pred11, pred12]) assert microMCC.num_inst == 4 + assert macroMCC.num_inst == 1 tp1 = 1; fp1 = 0; fn1 = 1; tn1=2 mcc1 = (tp1*tn1 - fp1*fn1) / np.sqrt((tp1+fp1)*(tp1+fn1)*(tn1+fp1)*(tn1+fn1)) np.testing.assert_almost_equal(microMCC.get()[1], mcc1) + np.testing.assert_almost_equal(macroMCC.get()[1], mcc1) microMCC.update([label21, label22], [pred21, pred22]) + macroMCC.update([label21, label22], [pred21, pred22]) assert microMCC.num_inst == 6 + assert macroMCC.num_inst == 2 tp2 = 1; fp2 = 0; fn2 = 0; tn2=1 mcc2 = (tp2*tn2 - fp2*fn2) / np.sqrt((tp2+fp2)*(tp2+fn2)*(tn2+fp2)*(tn2+fn2)) tpT = tp1+tp2; fpT = fp1+fp2; fnT = fn1+fn2; tnT = tn1+tn2; mccT = (tpT*tnT - fpT*fnT) / np.sqrt((tpT+fpT)*(tpT+fnT)*(tnT+fpT)*(tnT+fnT)) np.testing.assert_almost_equal(microMCC.get()[1], mccT) + np.testing.assert_almost_equal(macroMCC.get()[1], .5*(mcc1+mcc2)) def test_perplexity(): pred = mx.nd.array([[0.8, 0.2], [0.2, 0.8], [0, 1.]]) label = mx.nd.array([0, 1, 1]) p = pred.asnumpy()[np.arange(label.size), label.asnumpy().astype('int32')] perplexity_expected = np.exp(-np.log(p).sum()/label.size) - metric = mx.gluon.metric.create('perplexity', axis=-1) + metric = mx.metric.create('perplexity', -1) metric.update([label], [pred]) _, perplexity = metric.get() - np.testing.assert_almost_equal(perplexity, perplexity_expected) + assert perplexity == perplexity_expected def test_pearsonr(): pred1 = mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]]) label1 = mx.nd.array([[1, 0], [0, 1], [0, 1]]) pearsonr_expected_np = np.corrcoef(pred1.asnumpy().ravel(), label1.asnumpy().ravel())[0, 1] pearsonr_expected_scipy, _ = pearsonr(pred1.asnumpy().ravel(), label1.asnumpy().ravel()) - micro_pr = mx.gluon.metric.create('pearsonr') + macro_pr = mx.metric.create('pearsonr', average='macro') + micro_pr = mx.metric.create('pearsonr', average='micro') + assert np.isnan(macro_pr.get()[1]) assert np.isnan(micro_pr.get()[1]) + macro_pr.update([label1], [pred1]) micro_pr.update([label1], [pred1]) + np.testing.assert_almost_equal(macro_pr.get()[1], pearsonr_expected_np) + np.testing.assert_almost_equal(macro_pr.get()[1], pearsonr_expected_scipy) np.testing.assert_almost_equal(micro_pr.get()[1], pearsonr_expected_np) np.testing.assert_almost_equal(micro_pr.get()[1], pearsonr_expected_scipy) @@ -272,7 +292,11 @@ def test_pearsonr(): pearsonr_expected_np = np.corrcoef(pred12.asnumpy().ravel(), label12.asnumpy().ravel())[0, 1] pearsonr_expected_scipy, _ = pearsonr(pred12.asnumpy().ravel(), label12.asnumpy().ravel()) + macro_pr.reset() micro_pr.update([label2], [pred2]) + macro_pr.update([label12], [pred12]) + np.testing.assert_almost_equal(macro_pr.get()[1], pearsonr_expected_np) + np.testing.assert_almost_equal(macro_pr.get()[1], pearsonr_expected_scipy) np.testing.assert_almost_equal(micro_pr.get()[1], pearsonr_expected_np) np.testing.assert_almost_equal(micro_pr.get()[1], pearsonr_expected_scipy) @@ -293,18 +317,18 @@ def test_pcc(): [ 7, 3 ], [ 2, 5 ], ]) - met_pcc = mx.gluon.metric.create('pcc') + met_pcc = mx.metric.create('pcc') met_pcc.update(labels, preds) _, pcc = met_pcc.get() # pcc should agree with mcc for binary classification - met_mcc = mx.gluon.metric.create('mcc') + met_mcc = mx.metric.create('mcc') met_mcc.update(labels, preds) _, mcc = met_mcc.get() np.testing.assert_almost_equal(pcc, mcc) # pcc should agree with Pearson for binary classification - met_pear = mx.gluon.metric.create('pearsonr') + met_pear = mx.metric.create('pearsonr') met_pear.update(labels, [p.argmax(axis=1) for p in preds]) _, pear = met_pear.get() np.testing.assert_almost_equal(pcc, pear) @@ -353,7 +377,7 @@ def test_pcc(): # * order # * batch size # * update frequency - labels = [ [ i.reshape(-1) ] for i in labels[0] ] + labels = [ [ i ] for i in labels[0] ] labels.reverse() preds = [ [ i.reshape((1, -1)) ] for i in preds[0] ] preds.reverse() @@ -368,20 +392,19 @@ def test_single_array_input(): pred = mx.nd.array([[1,2,3,4]]) label = pred + 0.1 - mse = mx.gluon.metric.create('mse') + mse = mx.metric.create('mse') mse.update(label, pred) _, mse_res = mse.get() np.testing.assert_almost_equal(mse_res, 0.01) - mae = mx.gluon.metric.create('mae') + mae = mx.metric.create('mae') mae.update(label, pred) mae.get() _, mae_res = mae.get() np.testing.assert_almost_equal(mae_res, 0.1) - rmse = mx.gluon.metric.create('rmse') + rmse = mx.metric.create('rmse') rmse.update(label, pred) rmse.get() _, rmse_res = rmse.get() np.testing.assert_almost_equal(rmse_res, 0.1) - diff --git a/tests/python/unittest/test_metric_perf.py b/tests/python/unittest/test_metric_perf.py index 3c9abf6e3cc0..fc0f8da5d451 100644 --- a/tests/python/unittest/test_metric_perf.py +++ b/tests/python/unittest/test_metric_perf.py @@ -66,7 +66,7 @@ def data(self): def run_metric(name, data_gen_cls, i, n, c, pred_ctx, label_ctx, **kwargs): """ Helper function for running one metric benchmark """ - metric = mx.gluon.metric.create(name, **kwargs) + metric = mx.metric.create(name, **kwargs) data_gen = data_gen_cls(n, c, pred_ctx, label_ctx) try: label, pred = data_gen.data() @@ -105,7 +105,7 @@ def test_metric_performance(): output_dims = [128, 1024, 8192] ctxs = [mx.cpu(), mx.gpu()] - print("\nmx.gluon.metric benchmarks", file=sys.stderr) + print("\nmx.metric benchmarks", file=sys.stderr) print( "{:15}{:10}{:12}{:12}{:15}{:15}{}".format( 'Metric', 'Data-Ctx', 'Label-Ctx', 'Data Size', 'Batch Size', 'Output Dim', 'Elapsed Time'), diff --git a/tests/python/unittest/test_module.py b/tests/python/unittest/test_module.py index 7941eec5004d..65d86f62baf4 100644 --- a/tests/python/unittest/test_module.py +++ b/tests/python/unittest/test_module.py @@ -275,7 +275,7 @@ def sym_gen(seq_len): mod2.fit( train_data=data_train, eval_data=data_val, - eval_metric=mx.gluon.metric.Perplexity(invalid_label), # Use Perplexity for multiclass classification. + eval_metric=mx.metric.Perplexity(invalid_label), # Use Perplexity for multiclass classification. kvstore='device', optimizer='sgd', optimizer_params={'learning_rate': 0.01, @@ -711,7 +711,7 @@ def fm(factor_size, feature_dim, init): expected_accuracy = 0.02 # use accuracy as the metric - metric = mx.gluon.metric.create('MSE') + metric = mx.metric.create('MSE') # train 'num_epochs' epoch for epoch in range(num_epochs): train_iter.reset() diff --git a/tools/caffe_converter/test_converter.py b/tools/caffe_converter/test_converter.py index 880de1be449f..49f8bdb167c2 100644 --- a/tools/caffe_converter/test_converter.py +++ b/tools/caffe_converter/test_converter.py @@ -40,7 +40,7 @@ def test_imagenet_model_performance(model_name, val_data, gpus, batch_size): meta_info = get_model_meta_info(model_name) [model_name, mean] = convert_caffe_model(model_name, meta_info) sym, arg_params, aux_params = mx.model.load_checkpoint(model_name, 0) - acc = [mx.gluon.metric.create('acc'), mx.gluon.metric.create('top_k_accuracy', top_k=5)] + acc = [mx.metric.create('acc'), mx.metric.create('top_k_accuracy', top_k=5)] if isinstance(mean, str): mean_args = {'mean_img':mean} else: