Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(outlines): Add support outlines #1269

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,15 @@
"LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
"DEBUG": "true"
}
},
{
"name":"Launch outlines",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/backend/python/backend_outlines/backend_outlines.py",
"console": "integratedTerminal",
"justMyCode": true,
"env": {}
}
]
}
5 changes: 4 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ARG TARGETARCH
ARG TARGETVARIANT

ENV BUILD_TYPE=${BUILD_TYPE}
ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/backend/python/huggingface/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh"
ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/backend/python/huggingface/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,backend_outlines:/build/backend/python/backend_outlines/run.sh"
ENV GALLERIES='[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'
ARG GO_TAGS="stablediffusion tts"

Expand Down Expand Up @@ -172,6 +172,9 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/exllama \
; fi
RUN if [ "{IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/backend_outlines \
; fi

# Copy VALLE-X as it's not a real "lib"
RUN if [ -d /usr/lib/vall-e-x ]; then \
Expand Down
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ protogen-python:
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/diffusers/ --grpc_python_out=backend/python/diffusers/ backend/backend.proto
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/vall-e-x/ --grpc_python_out=backend/python/vall-e-x/ backend/backend.proto
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/vllm/ --grpc_python_out=backend/python/vllm/ backend/backend.proto
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/backend_outlines/ --grpc_python_out=backend/python/backend_outlines/ backend/backend.proto

## GRPC
# Note: it is duplicated in the Dockerfile
Expand All @@ -395,6 +396,7 @@ prepare-extra-conda-environments:
$(MAKE) -C backend/python/huggingface
$(MAKE) -C backend/python/vall-e-x
$(MAKE) -C backend/python/exllama
$(MAKE) -C backend/python/backend_outlines
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this needs to be added also in the Dockerfile here:

## Duplicated from Makefile to avoid having a big layer that's hard to push

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done



backend-assets/grpc:
Expand Down
11 changes: 11 additions & 0 deletions backend/python/backend_outlines/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.PONY: outlines
outlines:
@echo "Creating virtual environment..."
@conda env create --name outlines --file outlines.yml
@echo "Virtual environment created."

.PONY: run
run:
@echo "Running outlines..."
bash run.sh
@echo "outlines run."
5 changes: 5 additions & 0 deletions backend/python/backend_outlines/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Creating a separate environment for the outlines project

```
make outlines
```
80 changes: 80 additions & 0 deletions backend/python/backend_outlines/backend_outlines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""
This is the extra gRPC server for outlines of LocalAI
"""
from concurrent import futures
import argparse
import os
import signal
import sys
import time

import backend_pb2
import backend_pb2_grpc

import grpc

import outlines.text.generate as generate
import outlines.models as models

_ONE_DAY_IN_SECONDS = 60 * 60 * 24

# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))

# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
"""
BackendServicer is the class that implements the gRPC service
"""
def Health(self, request, context):
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))

def LoadModel(self, request, context):
try:
# model should be name of the model, e.g. gpt2
if request.Model == "":
return backend_pb2.Result(success=False, message="Model name is empty")
# It includes cache of the model, we do not need to add cache here.
self.model = models.transformers(request.Model)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(message="Model loaded successfully", success=True)

def Predict(self, request, context):
try:
output=generate.continuation(self.model, stop=[str(request.StopPrompts)])(str(request.Prompt))
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(message=bytes(output, encoding='utf-8'))

def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print("Server started. Listening on: " + address, file=sys.stderr)

# Define the signal handler function
def signal_handler(sig, frame):
print("Received termination signal. Shutting down...")
server.stop(0)
sys.exit(0)

# Set the signal handlers for SIGINT and SIGTERM
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)

try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument(
"--addr", default="localhost:50051", help="The address to bind the server to."
)
args = parser.parse_args()

serve(args.addr)
61 changes: 61 additions & 0 deletions backend/python/backend_outlines/backend_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading