diff --git a/docs/examples/python-custom-runtime/Python-Based-Custom-Runtime-with-Model-Stored-on-PVC.md b/docs/examples/python-custom-runtime/Python-Based-Custom-Runtime-with-Model-Stored-on-PVC.md new file mode 100644 index 00000000..2646c91f --- /dev/null +++ b/docs/examples/python-custom-runtime/Python-Based-Custom-Runtime-with-Model-Stored-on-PVC.md @@ -0,0 +1,279 @@ +# The Python-Based Custom Runtime with Model Stored on Persistent Volume Claim + +This document provides step-by-step instructions to demonstrate how to write a custom Python-based `ServingRuntime` inheriting from [MLServer's MLModel class](https://github.com/SeldonIO/MLServer/blob/master/mlserver/model.py) and deploy a model stored on persistent volume claims with it. + +This example assumes that ModelMesh Serving was deployed using the [quickstart guide](https://github.com/kserve/modelmesh-serving/blob/main/docs/quickstart.md). + +# Deploy a model stored on a Persistent Volume Claim + +Let's use namespace `modelmesh-serving` here: + +```shell +kubectl config set-context --current --namespace=modelmesh-serving +``` + +## 1. Create PV and PVC for storing model file + +```shell +kubectl apply -f - </custom-model-server:0.1 . + +``` + +> **Note**: Please use the `--build-arg` to add the http proxy if there is proxy in user's environment, such as: + +```shell +docker build --build-arg HTTP_PROXY=http://:PORT --build-arg HTTPS_PROXY=http://:PORT -t /custom-model-server:0.1 . +``` + +## 3. Define and Apply Custom ServingRuntime + +Below, you will create a ServingRuntime using the image built above. You can learn more about the custom `ServingRuntime` template [here](https://github.com/kserve/modelmesh-serving/blob/main/docs/runtimes/mlserver_custom.md#custom-servingruntime-template). + +```shell +kubectl apply -f - </custom-model-server:0.1 + env: + - name: MLSERVER_MODELS_DIR + value: "/models/_mlserver_models/" + - name: MLSERVER_GRPC_PORT + value: "8001" + - name: MLSERVER_HTTP_PORT + value: "8002" + - name: MLSERVER_LOAD_MODELS_AT_STARTUP + value: "false" + - name: MLSERVER_MODEL_NAME + value: dummy-model + - name: MLSERVER_HOST + value: "127.0.0.1" + - name: MLSERVER_GRPC_MAX_MESSAGE_LENGTH + value: "-1" + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: "5" + memory: 1Gi + builtInAdapter: + serverType: mlserver + runtimeManagementPort: 8001 + memBufferBytes: 134217728 + modelLoadingTimeoutMillis: 90000 +EOF +``` + +Verify the available `ServingRuntime`, including the custom one: + +```shell +kubectl get servingruntimes + +NAME DISABLED MODELTYPE CONTAINERS AGE +mlserver-1.x sklearn mlserver 10m +my-custom-model-0.x custom_model mlserver 10m +ovms-1.x openvino_ir ovms 10m +torchserve-0.x pytorch-mar torchserve 10m +triton-2.x keras triton 10m +``` + +## 4. Deploy the InferenceService using the custom ServingRuntime + +```shell +kubectl apply -f - < 8008 +``` + +Performing an inference request to the SKLearn MNIST model via `curl`. Make sure the `MODEL_NAME` variable is set correctly. + +```shell +MODEL_NAME="sklearn-pvc-example" +curl -s -X POST -k "http://localhost:8008/v2/models/${MODEL_NAME}/infer" -d '{"inputs": [{ "name": "predict", "shape": [1, 64], "datatype": "FP32", "data": [0.0, 0.0, 1.0, 11.0, 14.0, 15.0, 3.0, 0.0, 0.0, 1.0, 13.0, 16.0, 12.0, 16.0, 8.0, 0.0, 0.0, 8.0, 16.0, 4.0, 6.0, 16.0, 5.0, 0.0, 0.0, 5.0, 15.0, 11.0, 13.0, 14.0, 0.0, 0.0, 0.0, 0.0, 2.0, 12.0, 16.0, 13.0, 0.0, 0.0, 0.0, 0.0, 0.0, 13.0, 16.0, 16.0, 6.0, 0.0, 0.0, 0.0, 0.0, 16.0, 16.0, 16.0, 7.0, 0.0, 0.0, 0.0, 0.0, 11.0, 13.0, 12.0, 1.0, 0.0]}]}' | jq . +{ + "model_name": "sklearn-pvc-example__isvc-72fbffc584", + "outputs": [ + { + "name": "predict", + "datatype": "INT64", + "shape": [1], + "data": [8] + } + ] +} +``` + +> **Note**: `jq` is optional, it is used to format the output of the InferenceService. + +To delete the resources created in this example, run the following commands: + +```shell +kubectl delete isvc "sklearn-pvc-example" +kubectl delete pod "pvc-access" +kubectl delete pvc "my-models-pvc" +``` diff --git a/docs/examples/python-custom-runtime/custom-model/Dockerfile b/docs/examples/python-custom-runtime/custom-model/Dockerfile new file mode 100644 index 00000000..d6176cde --- /dev/null +++ b/docs/examples/python-custom-runtime/custom-model/Dockerfile @@ -0,0 +1,35 @@ +FROM python:3.9.13 +# ENV LANG C.UTF-8 + +COPY requirements.txt ./requirements.txt +RUN pip3 install --no-cache-dir -r requirements.txt + +# The custom `MLModel` implementation should be on the Python search path +# instead of relying on the working directory of the image. If using a +# single-file module, this can be accomplished with: +COPY --chown=${USER} ./custom_model.py /opt/custom_model.py +ENV PYTHONPATH=/opt/ +WORKDIR /opt + +# environment variables to be compatible with ModelMesh Serving +# these can also be set in the ServingRuntime, but this is recommended for +# consistency when building and testing +# reference: https://mlserver.readthedocs.io/en/latest/reference/settings.html +ENV MLSERVER_MODELS_DIR=/models/_mlserver_models \ + MLSERVER_GRPC_PORT=8001 \ + MLSERVER_HTTP_PORT=8002 \ + MLSERVER_METRICS_PORT=8082 \ + MLSERVER_LOAD_MODELS_AT_STARTUP=false \ + MLSERVER_DEBUG=false \ + MLSERVER_PARALLEL_WORKERS=1 \ + MLSERVER_GRPC_MAX_MESSAGE_LENGTH=33554432 \ + # https://github.com/SeldonIO/MLServer/pull/748 + MLSERVER__CUSTOM_GRPC_SERVER_SETTINGS='{"grpc.max_metadata_size": "32768"}' \ + MLSERVER_MODEL_NAME=dummy-model + +# With this setting, the implementation field is not required in the model +# settings which eases integration by allowing the built-in adapter to generate +# a basic model settings file +ENV MLSERVER_MODEL_IMPLEMENTATION=custom_model.CustomMLModel + +CMD mlserver start $MLSERVER_MODELS_DIR diff --git a/docs/examples/python-custom-runtime/custom-model/custom_model.py b/docs/examples/python-custom-runtime/custom-model/custom_model.py new file mode 100644 index 00000000..40dd1e5f --- /dev/null +++ b/docs/examples/python-custom-runtime/custom-model/custom_model.py @@ -0,0 +1,61 @@ +import os +from os.path import exists +from typing import Dict, List +from mlserver import MLModel +from mlserver.utils import get_model_uri +from mlserver.types import InferenceRequest, InferenceResponse, ResponseOutput, Parameters +from mlserver.codecs import DecodedParameterName +from joblib import load + +import logging +import numpy as np + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +_to_exclude = { + "parameters": {DecodedParameterName, "headers"}, + 'inputs': {"__all__": {"parameters": {DecodedParameterName, "headers"}}} +} + +WELLKNOWN_MODEL_FILENAMES = ["mnist-svm.joblib"] + + +class CustomMLModel(MLModel): # pylint:disable=c-extension-no-member + async def load(self) -> bool: + model_uri = await get_model_uri(self._settings, wellknown_filenames=WELLKNOWN_MODEL_FILENAMES) + logger.info("Model load URI: {model_uri}") + if exists(model_uri): + logger.info(f"Loading MNIST model from {model_uri}") + self._model = load(model_uri) + logger.info("Model loaded successfully") + else: + logger.info(f"Model not exist in {model_uri}") + # raise FileNotFoundError(model_uri) + self.ready = False + return self.ready + + self.ready = True + return self.ready + + async def predict(self, payload: InferenceRequest) -> InferenceResponse: + input_data = [input_data.data for input_data in payload.inputs] + input_name = [input_data.name for input_data in payload.inputs] + input_data_array = np.array(input_data) + result = self._model.predict(input_data_array) + predictions = np.array(result) + + logger.info(f"Predict result is: {result}") + return InferenceResponse( + id=payload.id, + model_name = self.name, + model_version = self.version, + outputs = [ + ResponseOutput( + name = str(input_name[0]), + shape = predictions.shape, + datatype = "INT64", + data=predictions.tolist(), + ) + ], + ) diff --git a/docs/examples/python-custom-runtime/custom-model/requirements.txt b/docs/examples/python-custom-runtime/custom-model/requirements.txt new file mode 100644 index 00000000..406ae395 --- /dev/null +++ b/docs/examples/python-custom-runtime/custom-model/requirements.txt @@ -0,0 +1,3 @@ +mlserver==1.3.2 +scikit-learn==0.24.2 +joblib==1.0.1