mudler · Aisuko · Nov 15, 2023 · Nov 15, 2023 · Nov 15, 2023 · mudler
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -28,6 +28,15 @@
                 "LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
                 "DEBUG": "true"
             }
+        },
+        {
+            "name":"Launch outlines",
+            "type": "python",
+            "request": "launch",
+            "program": "${workspaceFolder}/backend/python/backend_outlines/backend_outlines.py",
+            "console": "integratedTerminal",
+            "justMyCode": true,
+            "env": {}
         }
     ]
 }
diff --git a/Dockerfile b/Dockerfile
@@ -12,7 +12,7 @@ ARG TARGETARCH
 ARG TARGETVARIANT
 
 ENV BUILD_TYPE=${BUILD_TYPE}
-ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/backend/python/huggingface/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh"
+ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/backend/python/huggingface/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,backend_outlines:/build/backend/python/backend_outlines/run.sh"
 ENV GALLERIES='[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'
 ARG GO_TAGS="stablediffusion tts"
 
@@ -172,6 +172,9 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
 	PATH=$PATH:/opt/conda/bin make -C backend/python/exllama \
     ; fi
+RUN if [ "{IMAGE_TYPE}" = "extras" ]; then \
+    PATH=$PATH:/opt/conda/bin make -C backend/python/backend_outlines \
+    ; fi
 
 # Copy VALLE-X as it's not a real "lib"
 RUN if [ -d /usr/lib/vall-e-x ]; then \

diff --git a/Makefile b/Makefile
@@ -384,6 +384,7 @@ protogen-python:
 	python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/diffusers/ --grpc_python_out=backend/python/diffusers/ backend/backend.proto
 	python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/vall-e-x/ --grpc_python_out=backend/python/vall-e-x/ backend/backend.proto
 	python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/vllm/ --grpc_python_out=backend/python/vllm/ backend/backend.proto
+	python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/backend_outlines/ --grpc_python_out=backend/python/backend_outlines/ backend/backend.proto
 
 ## GRPC
 # Note: it is duplicated in the Dockerfile
@@ -395,6 +396,7 @@ prepare-extra-conda-environments:
 	$(MAKE) -C backend/python/huggingface
 	$(MAKE) -C backend/python/vall-e-x
 	$(MAKE) -C backend/python/exllama
+	$(MAKE) -C backend/python/backend_outlines
 ## Duplicated from Makefile to avoid having a big layer that's hard to push 
 ## Duplicated from Makefile to avoid having a big layer that's hard to push 
 
 
 backend-assets/grpc:

diff --git a/backend/python/backend_outlines/Makefile b/backend/python/backend_outlines/Makefile
@@ -0,0 +1,11 @@
+.PONY: outlines
+outlines:
+	@echo "Creating virtual environment..."
+	@conda env create --name outlines --file outlines.yml
+	@echo "Virtual environment created."
+
+.PONY: run
+run:
+	@echo "Running outlines..."
+	bash run.sh
+	@echo "outlines run."
diff --git a/backend/python/backend_outlines/README.md b/backend/python/backend_outlines/README.md
@@ -0,0 +1,5 @@
+# Creating a separate environment for the outlines project
+
+```
+make outlines
+```
diff --git a/backend/python/backend_outlines/backend_outlines.py b/backend/python/backend_outlines/backend_outlines.py
@@ -0,0 +1,80 @@
+"""
+This is the extra gRPC server for outlines of LocalAI
+"""
+from concurrent import futures
+import argparse
+import os
+import signal
+import sys
+import time
+
+import backend_pb2
+import backend_pb2_grpc
+
+import grpc
+
+import outlines.text.generate as generate
+import outlines.models as models
+
+_ONE_DAY_IN_SECONDS = 60 * 60 * 24
+
+# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
+MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
+
+# Implement the BackendServicer class with the service methods
+class BackendServicer(backend_pb2_grpc.BackendServicer):
+    """
+    BackendServicer is the class that implements the gRPC service
+    """
+    def Health(self, request, context):
+        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
+
+    def LoadModel(self, request, context):
+        try:
+            # model should be name of the model, e.g. gpt2
+            if request.Model == "":
+                return backend_pb2.Result(success=False, message="Model name is empty")
+            # It includes cache of the model, we do not need to add cache here.
+            self.model = models.transformers(request.Model)
+        except Exception as err:
+            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
+        return backend_pb2.Result(message="Model loaded successfully", success=True)
+
+    def Predict(self, request, context):
+        try:
+            output=generate.continuation(self.model, stop=[str(request.StopPrompts)])(str(request.Prompt))
+        except Exception as err:
+            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
+        return backend_pb2.Result(message=bytes(output, encoding='utf-8'))
+
+def serve(address):
+    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
+    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
+    server.add_insecure_port(address)
+    server.start()
+    print("Server started. Listening on: " + address, file=sys.stderr)
+
+    # Define the signal handler function
+    def signal_handler(sig, frame):
+        print("Received termination signal. Shutting down...")
+        server.stop(0)
+        sys.exit(0)
+
+    # Set the signal handlers for SIGINT and SIGTERM
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
+
+    try:
+        while True:
+            time.sleep(_ONE_DAY_IN_SECONDS)
+    except KeyboardInterrupt:
+        server.stop(0)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run the gRPC server.")
+    parser.add_argument(
+        "--addr", default="localhost:50051", help="The address to bind the server to."
+    )
+    args = parser.parse_args()
+
+    serve(args.addr)
diff --git a/backend/python/backend_outlines/backend_pb2.py b/backend/python/backend_outlines/backend_pb2.py