From f8ee88664df0ba3a7460c0a103efcf6b38e70d0a Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Fri, 21 May 2021 15:35:29 -0700 Subject: [PATCH 1/2] Document ability to export cuML RF to predict on other machines --- docs/source/pickling_cuml_models.ipynb | 66 +++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/docs/source/pickling_cuml_models.ipynb b/docs/source/pickling_cuml_models.ipynb index c7da3524b7..211cdb502d 100644 --- a/docs/source/pickling_cuml_models.ipynb +++ b/docs/source/pickling_cuml_models.ipynb @@ -183,6 +183,70 @@ "source": [ "single_gpu_model.cluster_centers_" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exporting cuML Random Forest models for inferencing on machines without GPUs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Starting with cuML version 21.06, you can export cuML Random Forest models and run predictions with them on machines without an NVIDIA GPUs. The [Treelite](https://github.com/dmlc/treelite) package defines an efficient exchange format that lets you portably move the cuML Random Forest models to other machines. We will refer to the exchange format as \"checkpoints.\"\n", + "\n", + "Here are the steps to export the model:\n", + "\n", + "1. Call `to_treelite_checkpoint()` to obtain the checkpoint file from the cuML Random Forest model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from cuml.ensemble import RandomForestClassifier as cumlRandomForestClassifier\n", + "from sklearn.datasets import load_iris\n", + "import numpy as np\n", + "\n", + "X, y = load_iris(return_X_y=True)\n", + "X, y = X.astype(np.float32), y.astype(np.int32)\n", + "clf = cumlRandomForestClassifier(max_depth=3, random_state=0, n_estimators=10)\n", + "clf.fit(X, y)\n", + "\n", + "checkpoint_path = './checkpoint.tl'\n", + "# Export cuML RF model as Treelite checkpoint\n", + "clf.convert_to_treelite_model().to_treelite_checkpoint(checkpoint_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2. Copy the generated checkpoint file `checkpoint.tl` to another machine on which you'd like to run predictions.\n", + "\n", + "3. On the target machine, install Treelite by running `pip install treelite` or `conda install -c conda-forge treelite`. The machine does not need to have an NVIDIA GPUs and does not need to have cuML installed.\n", + "\n", + "4. You can now load the model from the checkpoint, by running the following on the target machine:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import treelite\n", + "\n", + "# The checkpoint file has been copied over\n", + "checkpoint_path = './checkpoint.tl'\n", + "tl_model = treelite.Model.deserialize(checkpoint_path)\n", + "out_prob = treelite.gtil.predict(tl_model, X, pred_margin=True)\n", + "print(out_prob)" + ] } ], "metadata": { @@ -201,7 +265,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.6" + "version": "3.8.8" } }, "nbformat": 4, From 52d03017065eecc1237efc22306ac8c7943e5af5 Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Wed, 26 May 2021 21:57:00 -0700 Subject: [PATCH 2/2] Add link to docstring --- python/cuml/ensemble/randomforestclassifier.pyx | 5 +++++ python/cuml/ensemble/randomforestregressor.pyx | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx index 1772b9678e..36272270ab 100644 --- a/python/cuml/ensemble/randomforestclassifier.pyx +++ b/python/cuml/ensemble/randomforestclassifier.pyx @@ -134,6 +134,11 @@ class RandomForestClassifier(BaseRandomForestModel, histogram-based algorithm to determine splits, rather than an exact count. You can tune the size of the histograms with the n_bins parameter. + .. note:: You can export cuML Random Forest models and run predictions + with them on machines without an NVIDIA GPUs. See + https://docs.rapids.ai/api/cuml/nightly/pickling_cuml_models.html + for more details. + **Known Limitations**: This is an early release of the cuML Random Forest code. It contains a few known limitations: diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx index 48d066e7f1..6eca660ac1 100644 --- a/python/cuml/ensemble/randomforestregressor.pyx +++ b/python/cuml/ensemble/randomforestregressor.pyx @@ -117,6 +117,11 @@ class RandomForestRegressor(BaseRandomForestModel, histogram-based algorithm to determine splits, rather than an exact count. You can tune the size of the histograms with the n_bins parameter. + .. note:: You can export cuML Random Forest models and run predictions + with them on machines without an NVIDIA GPUs. See + https://docs.rapids.ai/api/cuml/nightly/pickling_cuml_models.html + for more details. + **Known Limitations**: This is an early release of the cuML Random Forest code. It contains a few known limitations: