diff --git a/.gitignore b/.gitignore index 61cff3e..91525f2 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ .pydevproject *.pyc .venv/ -redisai.egg-info \ No newline at end of file +redisai.egg-info +.idea \ No newline at end of file diff --git a/README.md b/README.md index af82621..1895ad9 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ # RedisAI Python Client -## Installing +## Installation 1. Install Redis 5.0 or above @@ -20,6 +20,49 @@ $ pip install redisai ``` -[RedisAI example repo](https://github.com/RedisAI/redisai-examples) shows few examples made using redisai-py under `python_client` section. +4. Install serialization-deserialization utility (optional) +```sh +$ pip install ml2rt +``` + +[RedisAI example repo](https://github.com/RedisAI/redisai-examples) shows few examples made using redisai-py under `python_client` section. Checkout [ml2rt](https://github.com/hhsecond/ml2rt) for convenient functions those might help in converting models (sparkml, sklearn, xgboost to ONNX), serializing models to disk, loading it back to redisai-py etc. + +For a quick walk through, checkout this example + +```python +from redisai import Client +from redisai import Tensor, BlobTensor, DType, Device, Backend +import ml2rt + +client = Client() +client.tensorset('x', Tensor(DType.float, [2], [2, 3])) +t = client.tensorget('x') +print(t.value) + +model = mlut.load_model('test/testdata/graph.pb') +client.tensorset('a', Tensor.scalar(DType.float, 2, 3)) +client.tensorset('b', Tensor.scalar(DType.float, 12, 10)) +client.modelset('m', Backend.tf, + Device.cpu, + input=['a', 'b'], + output='mul', + data=model) +client.modelrun('m', ['a', 'b'], ['mul']) +print(client.tensorget('mul').value) + +# Try with a script +script = mlut.load_script('test/testdata/script.txt') +client.scriptset('ket', Device.cpu, script) +client.scriptrun('ket', 'bar', input=['a', 'b'], output='c') + +b1 = client.tensorget('c', as_type=BlobTensor) +b2 = client.tensorget('c', as_type=BlobTensor) + +client.tensorset('d', BlobTensor(DType.float, b1.shape, b1, b2)) + +tnp = b1.to_numpy() +print(tnp) + +``` diff --git a/example.py b/example.py index 06b31a9..465a888 100644 --- a/example.py +++ b/example.py @@ -1,16 +1,15 @@ -from __future__ import print_function -from redisai import Client, Tensor, ScalarTensor, \ +from redisai import Client, Tensor, \ BlobTensor, DType, Device, Backend -from redisai import model as raimodel +import mlut client = Client() client.tensorset('x', Tensor(DType.float, [2], [2, 3])) t = client.tensorget('x') print(t.value) -model = raimodel.Model.load('../RedisAI/examples/models/graph.pb') -client.tensorset('a', ScalarTensor(DType.float, 2, 3)) -client.tensorset('b', ScalarTensor(DType.float, 12, 10)) +model = mlut.load_model('test/testdata/graph.pb') +client.tensorset('a', Tensor.scalar(DType.float, 2, 3)) +client.tensorset('b', Tensor.scalar(DType.float, 12, 10)) client.modelset('m', Backend.tf, Device.cpu, input=['a', 'b'], @@ -20,18 +19,14 @@ print(client.tensorget('mul').value) # Try with a script -script = raimodel.Model.load('../RedisAI/examples/models/script.txt') +script = mlut.load_script('test/testdata/script.txt') client.scriptset('ket', Device.cpu, script) client.scriptrun('ket', 'bar', input=['a', 'b'], output='c') -b1 = client.tensorget('c', astype=BlobTensor) -b2 = client.tensorget('c', astype=BlobTensor) -bt = BlobTensor(DType.float, b1.shape, b1, b2) - -print(len(bytes(bt.blob))) -print(bt.shape) +b1 = client.tensorget('c', as_type=BlobTensor) +b2 = client.tensorget('c', as_type=BlobTensor) client.tensorset('d', BlobTensor(DType.float, b1.shape, b1, b2)) tnp = b1.to_numpy() -client.tensorset('e', tnp) \ No newline at end of file +client.tensorset('e', tnp) diff --git a/redisai/__init__.py b/redisai/__init__.py index 0686a3a..8fbf61f 100644 --- a/redisai/__init__.py +++ b/redisai/__init__.py @@ -1,22 +1,2 @@ -from .version import __version__ -from .client import (Client, Tensor, BlobTensor, DType, Device, Backend) - - -def save_model(*args, **kwargs): - """ - Importing inside to avoid loading the TF/PyTorch/ONNX - into the scope unnecessary. This function wraps the - internal save model utility to make it user friendly - """ - from .model import Model - Model.save(*args, **kwargs) - - -def load_model(*args, **kwargs): - """ - Importing inside to avoid loading the TF/PyTorch/ONNX - into the scope unnecessary. This function wraps the - internal load model utility to make it user friendly - """ - from .model import Model - return Model.load(*args, **kwargs) +from .version import __version__ # noqa +from .client import (Client, Tensor, BlobTensor, DType, Device, Backend) # noqa diff --git a/redisai/client.py b/redisai/client.py index 05212e1..dd27c8b 100644 --- a/redisai/client.py +++ b/redisai/client.py @@ -9,7 +9,7 @@ np = None try: - from typing import Union, Any, AnyStr, ByteString, Collection, Type + from typing import Union, Any, AnyStr, ByteString, Collection, Type # noqa except ImportError: pass @@ -165,7 +165,7 @@ def _to_numpy_type(t): } if t in mm: return mm[t] - return t + return t.lower() @classmethod def from_resp(cls, dtype, shape, value): @@ -225,7 +225,7 @@ def tensorset(self, key, tensor): return self.execute_command(*args) def tensorget(self, key, as_type=Tensor, meta_only=False): - # type: (AnyStr, Type[Tensor], bool) -> Tensor + # type: (AnyStr, Type[Tensor], bool) -> Union[Tensor, BlobTensor] """ Retrieve the value of a tensor from the server :param key: the name of the tensor diff --git a/redisai/model.py b/redisai/model.py deleted file mode 100644 index 7a43a4b..0000000 --- a/redisai/model.py +++ /dev/null @@ -1,144 +0,0 @@ -import os -import warnings -import sys - -try: - import tensorflow as tf -except (ModuleNotFoundError, ImportError): - pass - -try: - import torch -except (ModuleNotFoundError, ImportError): - pass - -try: - import onnx -except (ModuleNotFoundError, ImportError): - pass - -try: - import skl2onnx - import sklearn -except (ModuleNotFoundError, ImportError): - pass - - -class Model: - - __slots__ = ['graph', 'backend', 'device', 'inputs', 'outputs'] - - def __init__(self, path, device=None, inputs=None, outputs=None): - """ - Declare a model suitable for passing to modelset - :param path: Filepath from where the stored model can be read - :param device: Enum from `redisai.Device` represents which device - should the model run on, inside RedisAI - :param inputs: Optional parameter required only for tensorflow. - In the TF world, this represents the list which is being - passed to `sess.run` with tensors which is required for - TF to execute the model - :param outputs: Optional parameter required only for tensorflow. - Similr to `inputs`, `outputs` is also passed to `sess.run` but - to fetch the output from - """ - raise NotImplementedError('Instance creation is not impelemented yet') - - @classmethod - def save(cls, obj, path: str, input=None, output=None, as_native=True, prototype=None): - """ - Infer the backend (TF/PyTorch/ONNX) by inspecting the class hierarchy - and calls the appropriate serialization utility. It is essentially a - wrapper over serialization mechanism of each backend - :param path: Path to which the graph/model will be saved - :param input: Optional parameter required only for tensorflow. - In the TF world, this represents the list which is being - passed to `sess.run` with tensors which is required for - TF to execute the model - :param output: Optional parameter required only for tensorflow. - Similr to `input`, `output` is also passed to `sess.run` but - to fetch the output from - :param as_native: Saves the graph/model with backend's serialization - mechanism if True. If False, custom saving utility will be called - which saves other informations required for modelset. Defaults to True - """ - if 'tensorflow' in sys.modules and issubclass(type(obj), tf.Session): - cls._save_tf_graph(obj, path, output, as_native) - elif 'torch' in sys.modules and issubclass( - type(type(obj)), torch.jit.ScriptMeta): - # TODO Is there a better way to check this - cls._save_torch_graph(obj, path, as_native) - elif 'onnx' in sys.modules and issubclass( - type(obj), onnx.onnx_ONNX_RELEASE_ml_pb2.ModelProto): - cls._save_onnx_graph(obj, path, as_native) - elif 'skl2onnx' in sys.modules and issubclass( - type(obj), sklearn.base.BaseEstimator): - cls._save_sklearn_graph(obj, path, as_native, prototype) - else: - message = ("Could not find the required dependancy to export the graph object. " - "`save_model` relies on serialization mechanism provided by the" - " supported backends such as Tensorflow, PyTorch, ONNX or skl2onnx. " - "Please install package required for serializing your graph. " - "For more information, checkout the redisia-py documentation") - raise RuntimeError(message) - - @classmethod - def _save_tf_graph(cls, sess, path, output, as_native): - graph_def = sess.graph_def - # clearing device information - for node in graph_def.node: - node.device = "" - frozen = tf.graph_util.convert_variables_to_constants( - sess, graph_def, output) - if as_native: - directory = os.path.dirname(path) - file = os.path.basename(path) - tf.io.write_graph(frozen, directory, file, as_text=False) - return - else: - raise NotImplementedError('Saving non-native graph is not supported yet') - - @classmethod - def _save_torch_graph(cls, graph, path, as_native): - # TODO how to handle the cpu/gpu - if as_native: - if graph.training is True: - warnings.warn( - 'Graph is in training mode. Converting to evaluation mode') - graph.eval() - torch.jit.save(graph, path) - return - else: - raise NotImplementedError('Saving non-native graph is not supported yet') - - @classmethod - def _save_onnx_graph(cls, graph, path, as_native): - if as_native: - with open(path, 'wb') as f: - f.write(graph.SerializeToString()) - else: - raise NotImplementedError('Saving non-native graph is not supported yet') - - @classmethod - def _save_sklearn_graph(cls, graph, path, as_native, prototype): - if not as_native: - raise NotImplementedError('Saving non-native graph is not supported yet') - if hasattr(prototype, 'shape') and hasattr(prototype, 'dtype'): - datatype = skl2onnx.common.data_types.guess_data_type(prototype) - serialized = skl2onnx.convert_sklearn(graph, initial_types=datatype) - cls._save_onnx_graph(serialized, path, as_native) - else: - raise TypeError( - "Serializing scikit learn model needs to know shape and dtype" - " of input data which will be inferred from `prototype` " - "parameter. It has to be a valid `numpy.ndarray` of shape of your input") - - @classmethod - def load(cls, path: str): - """ - Return the binary data if saved with `as_native` otherwise return the dict - that contains binary graph/model on `graph` key (Not implemented yet). - :param path: File path from where the native model or the rai models are saved - """ - with open(path, 'rb') as f: - return f.read() diff --git a/test-requirements.txt b/test-requirements.txt index 2640e86..beb8604 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,6 +1,2 @@ numpy -torch -tensorflow -onnx -skl2onnx -pandas +mlut diff --git a/test/test.py b/test/test.py index 077579f..7bb07c3 100644 --- a/test/test.py +++ b/test/test.py @@ -2,7 +2,7 @@ import numpy as np import os.path from redisai import Client, DType, Backend, Device, Tensor, BlobTensor -from redisai import load_model +from mlut import load_model from redis.exceptions import ResponseError diff --git a/test/test_model.py b/test/test_model.py deleted file mode 100644 index 75a8eb2..0000000 --- a/test/test_model.py +++ /dev/null @@ -1,132 +0,0 @@ -import time -import os - -from unittest import TestCase -from redisai import save_model, load_model -from redisai import Client, Backend, Device, Tensor, DType -import tensorflow as tf -import torch -from sklearn import linear_model, datasets -import onnx - - -def get_tf_graph(): - x = tf.placeholder(tf.float32, name='input') - W = tf.Variable(5., name='W') - b = tf.Variable(3., name='b') - y = x * W + b - y = tf.identity(y, name='output') - - -class MyModule(torch.jit.ScriptModule): - def __init__(self): - super(MyModule, self).__init__() - - @torch.jit.script_method - def forward(self, a, b): - return a + b - - -def get_sklearn_model_and_prototype(): - model = linear_model.LinearRegression() - boston = datasets.load_boston() - X, y = boston.data, boston.target - model.fit(X, y) - return model, X[0].reshape(1, -1) - - -def get_onnx_model(): - torch_model = torch.nn.ReLU() - # maybe there exists, but couldn't find a way to pass - # the onnx model without writing to disk - torch.onnx.export(torch_model, torch.rand(1, 1), 'model.onnx') - onnx_model = onnx.load('model.onnx') - os.remove('model.onnx') - return onnx_model - - -class ModelTestCase(TestCase): - - def get_client(self): - return Client() - - def testTFGraph(self): - _ = get_tf_graph() - init = tf.global_variables_initializer() - sess = tf.Session() - sess.run(init) - path = f'{time.time()}.pb' - save_model(sess, path, output=['output']) - model = load_model(path) - os.remove(path) - con = self.get_client() - con.modelset( - 'tfmodel', Backend.tf, Device.cpu, model, - input=['input'], output=['output']) - con.tensorset('a', Tensor.scalar(DType.float, 2)) - con.modelrun('tfmodel', ['a'], 'c') - tensor = con.tensorget('c') - self.assertEqual([13], tensor.value) - - def testPyTorchGraph(self): - torch_graph = MyModule() - path = f'{time.time()}.pt' - save_model(torch_graph, path) - model = load_model(path) - os.remove(path) - con = self.get_client() - con.modelset('ptmodel', Backend.torch, Device.cpu, model) - con.tensorset('a', Tensor.scalar(DType.float, 2, 5)) - con.tensorset('b', Tensor.scalar(DType.float, 3, 7)) - con.modelrun('ptmodel', ['a', 'b'], 'c') - tensor = con.tensorget('c') - self.assertEqual([5, 12], tensor.value) - - def testFakeObjSave(self): - fakemodel = {} - self.assertRaises( - RuntimeError, - save_model, fakemodel, 'fake.pt') - wrongmodel_pt = torch.nn.Linear(2, 3) - self.assertRaises( - RuntimeError, - save_model, wrongmodel_pt, 'wrong.pt') - - def testScriptLoad(self): - con = self.get_client() - dirname = os.path.dirname(__file__) - path = f'{dirname}/testdata/script.txt' - script = load_model(path) - con.scriptset('script', Device.cpu, script) - con.tensorset('a', Tensor.scalar(DType.float, 2, 5)) - con.tensorset('b', Tensor.scalar(DType.float, 3, 7)) - con.scriptrun('script', 'bar', ['a', 'b'], 'c') - tensor = con.tensorget('c') - self.assertEqual([5, 12], tensor.value) - - def testSKLearnGraph(self): - sklearn_model, prototype = get_sklearn_model_and_prototype() - path = f'{time.time()}.onnx' - self.assertRaises(TypeError, save_model, sklearn_model, path) - save_model(sklearn_model, path, prototype=prototype) - model = load_model(path) - os.remove(path) - con = self.get_client() - con.modelset('onnx_skl_model', Backend.onnx, Device.cpu, model) - con.tensorset('a', Tensor.scalar(DType.float, *([1] * 13))) - con.modelrun('onnx_skl_model', ['a'], ['outfromonnxskl']) - tensor = con.tensorget('outfromonnxskl') - self.assertEqual(len(tensor.value), 1) - - def testONNXGraph(self): - onnx_model = get_onnx_model() - path = f'{time.time()}.onnx' - save_model(onnx_model, path) - model = load_model(path) - os.remove(path) - con = self.get_client() - con.modelset('onnxmodel', Backend.onnx, Device.cpu, model) - con.tensorset('a', Tensor.scalar(DType.float, 2, -1)) - con.modelrun('onnxmodel', ['a'], ['c']) - tensor = con.tensorget('c') - self.assertEqual([2.0, 0.0], tensor.value)