From 29ef8b417b1b3a666fd7d62f9e7723729d850290 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 24 Jan 2024 09:54:56 +0000 Subject: [PATCH 1/2] fix --- fastchat/model/model_adapter.py | 2 +- fastchat/serve/sglang_worker.py | 27 +++++++++++---------------- tests/test_openai_vision_api.py | 1 + 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/fastchat/model/model_adapter.py b/fastchat/model/model_adapter.py index acdab09bd..e519e66fb 100644 --- a/fastchat/model/model_adapter.py +++ b/fastchat/model/model_adapter.py @@ -2198,7 +2198,7 @@ def match(self, model_path: str): def get_default_conv_template(self, model_path: str) -> Conversation: return get_conv_template("vicuna_v1.1") - + class YuanAdapter(BaseModelAdapter): """The model adapter for Yuan""" diff --git a/fastchat/serve/sglang_worker.py b/fastchat/serve/sglang_worker.py index 18c4be361..cee82bb34 100644 --- a/fastchat/serve/sglang_worker.py +++ b/fastchat/serve/sglang_worker.py @@ -1,5 +1,8 @@ """ A model worker that executes the model based on SGLANG. + +Usage: +python3 -m fastchat.serve.sglang_worker --model-path liuhaotian/llava-v1.5-7b --tokenizer-path llava-hf/llava-1.5-7b-hf --port 30000 --worker-address http://localhost:30000 """ import argparse @@ -10,16 +13,7 @@ from fastapi import FastAPI, Request, BackgroundTasks from fastapi.responses import StreamingResponse, JSONResponse import uvicorn -from sglang import ( - function, - image, - system, - user, - assistant, - gen, - set_default_backend, - Runtime, -) +import sglang as sgl from sglang.srt.hf_transformers_utils import get_tokenizer, get_config from sglang.srt.utils import load_image @@ -33,14 +27,14 @@ app = FastAPI() -@function +@sgl.function def pipeline(s, prompt, max_tokens): for p in prompt: if isinstance(p, str): s += p else: - s += image(p) - s += gen("response", max_tokens=max_tokens) + s += sgl.image(p) + s += sgl.gen("response", max_tokens=max_tokens) class SGLWorker(BaseModelWorker): @@ -55,7 +49,7 @@ def __init__( limit_worker_concurrency: int, no_register: bool, conv_template: str, - runtime: Runtime, + runtime: sgl.Runtime, trust_remote_code: bool, ): super().__init__( @@ -270,14 +264,15 @@ async def api_model_details(request: Request): args.model_path if args.tokenizer_path == "" else args.tokenizer_path ) - runtime = Runtime( + runtime = sgl.Runtime( model_path=args.model_path, tokenizer_path=args.tokenizer_path, trust_remote_code=args.trust_remote_code, mem_fraction_static=args.mem_fraction_static, tp_size=args.tp_size, + log_level="info" ) - set_default_backend(runtime) + sgl.set_default_backend(runtime) worker = SGLWorker( args.controller_address, diff --git a/tests/test_openai_vision_api.py b/tests/test_openai_vision_api.py index b1eb0ac8b..a54d7d575 100644 --- a/tests/test_openai_vision_api.py +++ b/tests/test_openai_vision_api.py @@ -1,5 +1,6 @@ """ Test the OpenAI compatible server + Launch: python3 launch_openai_api_test_server.py --multimodal """ From 9761adfe3784b3f446c47887f02f196c0e40c1e1 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 24 Jan 2024 09:55:09 +0000 Subject: [PATCH 2/2] format --- fastchat/serve/sglang_worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastchat/serve/sglang_worker.py b/fastchat/serve/sglang_worker.py index cee82bb34..1938210d9 100644 --- a/fastchat/serve/sglang_worker.py +++ b/fastchat/serve/sglang_worker.py @@ -270,7 +270,7 @@ async def api_model_details(request: Request): trust_remote_code=args.trust_remote_code, mem_fraction_static=args.mem_fraction_static, tp_size=args.tp_size, - log_level="info" + log_level="info", ) sgl.set_default_backend(runtime)