Skip to content

Commit

Permalink
experimental: Cohere compatible endpoints. (#644)
Browse files Browse the repository at this point in the history
* feat: add generate endpoint

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: update generation

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* fix(cohere): generate endpoints

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: --wip--

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* feat: update testing clients and chat implementation

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: disable schemas for easter eggs

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>

---------

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
  • Loading branch information
aarnphm authored Nov 14, 2023
1 parent 0bf6ec7 commit b0ab8cc
Show file tree
Hide file tree
Showing 8 changed files with 638 additions and 16 deletions.
40 changes: 40 additions & 0 deletions examples/cohere_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import os

import cohere
from cohere.responses.chat import StreamTextGeneration

co = cohere.Client(api_key='na', api_url=os.getenv('OPENLLM_ENDPOINT', 'http://localhost:3000') + '/cohere')

generation = co.generate(prompt='Write me a tag line for an ice cream shop.')
print(generation.generations[0].text)

for it in co.generate(prompt='Write me a tag line for an ice cream shop.', stream=True):
print(it.text, flush=True, end='')

response = co.chat(
message="What is Epicurus's philosophy of life?",
temperature=0.6,
chat_history=[
{'role': 'User', 'message': 'What is the meaning of life?'},
{
'role': 'Chatbot',
'message': "Many thinkers have proposed theories about the meaning of life. \n\nFor instance, Jean-Paul Sartre believed that existence precedes essence, meaning that the essence, or meaning, of one's life arises after birth. Søren Kierkegaard argued that life is full of absurdity and that one must make one's own values in an indifferent world. Arthur Schopenhauer stated that one's life reflects one's will, and that the will (or life) is without aim, irrational, and full of pain. \n\nEarly thinkers such as John Locke, Jean-Jacques Rousseau and Adam Smith believed that humankind should find meaning through labour, property and social contracts. \n\nAnother way of thinking about the meaning of life is to focus on the pursuit of happiness or pleasure. Aristippus of Cyrene, a student of Socrates, founded an early Socratic school that emphasised one aspect of Socrates's teachings: that happiness is the end goal of moral action and that pleasure is the supreme good. Epicurus taught that the pursuit of modest pleasures was the greatest good, as it leads to tranquility, freedom from fear and absence of bodily pain. \n\nUltimately, the meaning of life is a subjective concept and what provides life with meaning differs for each individual.",
},
],
)
print(response)

for it in co.chat(
message="What is Epicurus's philosophy of life?",
temperature=0.6,
chat_history=[
{'role': 'User', 'message': 'What is the meaning of life?'},
{
'role': 'Chatbot',
'message': "Many thinkers have proposed theories about the meaning of life. \n\nFor instance, Jean-Paul Sartre believed that existence precedes essence, meaning that the essence, or meaning, of one's life arises after birth. Søren Kierkegaard argued that life is full of absurdity and that one must make one's own values in an indifferent world. Arthur Schopenhauer stated that one's life reflects one's will, and that the will (or life) is without aim, irrational, and full of pain. \n\nEarly thinkers such as John Locke, Jean-Jacques Rousseau and Adam Smith believed that humankind should find meaning through labour, property and social contracts. \n\nAnother way of thinking about the meaning of life is to focus on the pursuit of happiness or pleasure. Aristippus of Cyrene, a student of Socrates, founded an early Socratic school that emphasised one aspect of Socrates's teachings: that happiness is the end goal of moral action and that pleasure is the supreme good. Epicurus taught that the pursuit of modest pleasures was the greatest good, as it leads to tranquility, freedom from fear and absence of bodily pain. \n\nUltimately, the meaning of life is a subjective concept and what provides life with meaning differs for each individual.",
},
],
stream=True,
):
if isinstance(it, StreamTextGeneration):
print(it.text, flush=True, end='')
43 changes: 41 additions & 2 deletions openllm-core/src/openllm_core/_configuration.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# mypy: disable-error-code="attr-defined,no-untyped-call,type-var,operator,arg-type,no-redef,misc"
from __future__ import annotations
import copy
import importlib.util
import logging
import os
import sys
Expand Down Expand Up @@ -31,15 +32,17 @@
Self,
overload,
)
from .exceptions import ForbiddenAttributeError
from .exceptions import ForbiddenAttributeError, MissingDependencyError
from .utils import LazyLoader, ReprMixin, codegen, converter, dantic, field_env_key, first_not_none, lenient_issubclass
from .utils.peft import PEFT_TASK_TYPE_TARGET_MAPPING, FineTuneConfig

if t.TYPE_CHECKING:
import click
import transformers
import vllm
from attrs import AttrsInstance

from openllm.protocol.cohere import CohereChatRequest, CohereGenerateRequest
from openllm.protocol.openai import ChatCompletionRequest, CompletionRequest
else:
vllm = LazyLoader('vllm', globals(), 'vllm')
Expand Down Expand Up @@ -1460,7 +1463,28 @@ def to_generation_config(self, return_as_dict: bool = False) -> transformers.Gen
def to_sampling_config(self) -> vllm.SamplingParams:
return self.sampling_config.build()

def with_openai_request(self, request: ChatCompletionRequest | CompletionRequest) -> dict[str, t.Any]:
@overload
def with_request(self, request: ChatCompletionRequest | CompletionRequest) -> dict[str, t.Any]: ...

@overload
def with_request(self, request: CohereChatRequest | CohereGenerateRequest) -> dict[str, t.Any]: ...

def with_request(self, request: AttrsInstance) -> dict[str, t.Any]:
if importlib.util.find_spec('openllm') is None:
raise MissingDependencyError(
"'openllm' is required to use 'with_request'. Make sure to install with 'pip install openllm'."
)
from openllm.protocol.cohere import CohereChatRequest, CohereGenerateRequest
from openllm.protocol.openai import ChatCompletionRequest, CompletionRequest

if isinstance(request, (ChatCompletionRequest, CompletionRequest)):
return self._with_openai_request(request)
elif isinstance(request, (CohereChatRequest, CohereGenerateRequest)):
return self._with_cohere_request(request)
else:
raise TypeError(f'Unknown request type {type(request)}')

def _with_openai_request(self, request: ChatCompletionRequest | CompletionRequest) -> dict[str, t.Any]:
d = dict(
temperature=first_not_none(request.temperature, self['temperature']),
top_p=first_not_none(request.top_p, self['top_p']),
Expand All @@ -1476,6 +1500,21 @@ def with_openai_request(self, request: ChatCompletionRequest | CompletionRequest
d['logprobs'] = first_not_none(request.logprobs, default=self['logprobs'])
return d

def _with_cohere_request(self, request: CohereGenerateRequest | CohereChatRequest) -> dict[str, t.Any]:
d = dict(
max_new_tokens=first_not_none(request.max_tokens, default=self['max_new_tokens']),
temperature=first_not_none(request.temperature, default=self['temperature']),
top_k=first_not_none(request.k, default=self['top_k']),
top_p=first_not_none(request.p, default=self['top_p']),
)
if hasattr(request, 'num_generations'):
d['n'] = first_not_none(request.num_generations, default=self['n'])
if hasattr(request, 'frequency_penalty'):
d['frequency_penalty'] = first_not_none(request.frequency_penalty, default=self['frequency_penalty'])
if hasattr(request, 'presence_penalty'):
d['presence_penalty'] = first_not_none(request.presence_penalty, default=self['presence_penalty'])
return d

@classmethod
def to_click_options(cls, f: AnyCallable) -> click.Command:
"""Convert current configuration to click options.
Expand Down
15 changes: 11 additions & 4 deletions openllm-python/src/openllm/entrypoints/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,28 @@
"""

from __future__ import annotations
import importlib
import typing as t

from openllm_core.utils import LazyModule

from . import hf as hf, openai as openai

if t.TYPE_CHECKING:
import bentoml
import openllm

_import_structure: dict[str, list[str]] = {'openai': [], 'hf': []}

class IntegrationModule(t.Protocol):
def mount_to_svc(self, svc: bentoml.Service, llm: openllm.LLM[t.Any, t.Any]) -> bentoml.Service: ...


_import_structure: dict[str, list[str]] = {'openai': [], 'hf': [], 'cohere': []}


def mount_entrypoints(svc: bentoml.Service, llm: openllm.LLM[t.Any, t.Any]) -> bentoml.Service:
return openai.mount_to_svc(hf.mount_to_svc(svc, llm), llm)
for module_name in _import_structure:
module = t.cast(IntegrationModule, importlib.import_module(f'.{module_name}', __name__))
svc = module.mount_to_svc(svc, llm)
return svc


__lazy = LazyModule(
Expand Down
70 changes: 66 additions & 4 deletions openllm-python/src/openllm/entrypoints/_openapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@
summary: Describes a model offering that can be used with the API.
tags:
- HF
x-bentoml-name: adapters_map
x-bentoml-name: hf_adapters
responses:
200:
description: Return list of LoRA adapters.
Expand All @@ -416,6 +416,65 @@
$ref: '#/components/schemas/HFErrorResponse'
description: Not Found
"""
COHERE_GENERATE_SCHEMA = """\
---
consumes:
- application/json
description: >-
Given a prompt, the model will return one or more predicted completions, and
can also return the probabilities of alternative tokens at each position.
operationId: cohere__generate
produces:
- application/json
tags:
- Cohere
x-bentoml-name: cohere_generate
summary: Creates a completion for the provided prompt and parameters.
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CohereGenerateRequest'
examples:
one-shot:
summary: One-shot input example
value:
prompt: This is a test
max_tokens: 256
temperature: 0.7
p: 0.43
k: 12
num_generations: 2
stream: false
streaming:
summary: Streaming input example
value:
prompt: This is a test
max_tokens: 256
temperature: 0.7
p: 0.43
k: 12
num_generations: 2
stream: true
stop_sequences:
- "\\n"
- "<|endoftext|>"
"""
COHERE_CHAT_SCHEMA = """\
---
consumes:
- application/json
description: >-
Given a list of messages comprising a conversation, the model will return a response.
operationId: cohere__chat
produces:
- application/json
tags:
- Cohere
x-bentoml-name: cohere_chat
summary: Creates a model response for the given chat conversation.
"""

_SCHEMAS = {k[:-7].lower(): v for k, v in locals().items() if k.endswith('_SCHEMA')}

Expand Down Expand Up @@ -485,12 +544,15 @@ def get_schema(self, routes: list[BaseRoute], mount_path: str | None = None) ->


def get_generator(
title: str, components: list[type[AttrsInstance]] | None = None, tags: list[dict[str, t.Any]] | None = None
title: str,
components: list[type[AttrsInstance]] | None = None,
tags: list[dict[str, t.Any]] | None = None,
inject: bool = True,
) -> OpenLLMSchemaGenerator:
base_schema: dict[str, t.Any] = dict(info={'title': title, 'version': API_VERSION}, version=OPENAPI_VERSION)
if components:
if components and inject:
base_schema['components'] = {'schemas': {c.__name__: component_schema_generator(c) for c in components}}
if tags is not None and tags:
if tags is not None and tags and inject:
base_schema['tags'] = tags
return OpenLLMSchemaGenerator(base_schema)

Expand Down
Loading

0 comments on commit b0ab8cc

Please sign in to comment.