feat: Mistral support (#571)

* feat: Mistral support Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> * ci: auto fixes from pre-commit.ci For more information, see https://pre-commit.ci * chore: fix style Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update README docs about mistral Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
bentoml · Nov 7, 2023 · 4d356f4 · 4d356f4
1 parent d9a7b6a
commit 4d356f4
Show file tree

Hide file tree

Showing 7 changed files with 100 additions and 26 deletions.
diff --git a/README.md b/README.md
@@ -166,6 +166,54 @@ openllm start opt --model-id facebook/opt-2.7b
 
 OpenLLM currently supports the following models. By default, OpenLLM doesn't include dependencies to run all models. The extra model-specific dependencies can be installed with the instructions below.
 
+<details>
+<summary>Mistral</summary>
+
+### Quickstart
+
+Run the following commands to quickly spin up a Llama 2 server and send a request to it.
+
+```bash
+openllm start mistral --model-id HuggingFaceH4/zephyr-7b-beta
+export OPENLLM_ENDPOINT=http://localhost:3000
+openllm query 'What are large language models?'
+```
+
+> [!NOTE]
+> Note that any Mistral variants can be deployed with OpenLLM.
+> Visit the [Hugging Face Model Hub](https://huggingface.co/models?sort=trending&search=mistral) to see more Mistral compatible models.
+
+### Supported models
+
+You can specify any of the following Mistral models by using `--model-id`.
+
+- [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
+- [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1)
+- [amazon/MistralLite](https://huggingface.co/amazon/MistralLite)
+- [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)
+- [HuggingFaceH4/zephyr-7b-alpha](https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha)
+- Any other models that strictly follows the [MistralForCausalLM](https://huggingface.co/docs/transformers/main/en/model_doc/mistral#transformers.MistralForCausalLM) architecture
+
+### Supported backends
+
+- PyTorch (Default):
+
+  ```bash
+  openllm start mistral --model-id HuggingFaceH4/zephyr-7b-beta --backend pt
+  ```
+
+- vLLM (Recommended):
+
+  ```bash
+  pip install "openllm[vllm]"
+  openllm start mistral --model-id HuggingFaceH4/zephyr-7b-beta --backend vllm
+  ```
+
+> [!NOTE]
+> Currently when using the vLLM backend, quantization and adapters are not supported.
+
+</details>
+
 <details>
 <summary>Llama</summary>
 

diff --git a/openllm-core/src/openllm_core/__init__.py b/openllm-core/src/openllm_core/__init__.py
@@ -18,6 +18,7 @@
 from .config import START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING
 from .config import START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING
 from .config import START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING
+from .config import START_MISTRAL_COMMAND_DOCSTRING as START_MISTRAL_COMMAND_DOCSTRING
 from .config import START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING
 from .config import START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING
 from .config import START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING
@@ -30,6 +31,7 @@
 from .config import FlanT5Config as FlanT5Config
 from .config import GPTNeoXConfig as GPTNeoXConfig
 from .config import LlamaConfig as LlamaConfig
+from .config import MistralConfig as MistralConfig
 from .config import MPTConfig as MPTConfig
 from .config import OPTConfig as OPTConfig
 from .config import StableLMConfig as StableLMConfig

diff --git a/openllm-core/src/openllm_core/_configuration.py b/openllm-core/src/openllm_core/_configuration.py
@@ -1511,31 +1511,6 @@ def to_click_options(cls, f: AnyCallable) -> click.Command:
   def peft_task_type(cls) -> str:
     return _PEFT_TASK_TYPE_TARGET_MAPPING[cls.__openllm_model_type__]
 
-  def sanitize_parameters(self, prompt: str, **attrs: t.Any) -> tuple[str, DictStrAny, DictStrAny]:
-    '''This handler will sanitize all attrs and setup prompt text.
-
-    It takes a prompt that is given by the user, attrs that can be parsed with the prompt.
-
-    Returns a tuple of three items:
-    - The attributes dictionary that can be passed into LLMConfig to generate a GenerationConfig
-    - The attributes dictionary that will be passed into `self.postprocess_generate`.
-
-    `openllm.LLM` also has a sanitize_parameters that will just call this method.
-    '''
-    return prompt, attrs, attrs
-
-  def postprocess_generate(self, prompt: str, generation_result: t.Any, **attrs: t.Any) -> t.Any:
-    '''This handler will postprocess generation results from LLM.generate and then output nicely formatted results (if the LLM decide to do so.).
-
-    You can customize how the output of the LLM looks with this hook. By default, it is a simple echo.
-
-    > [!NOTE]
-    > This will be used from the client side.
-
-    `openllm.LLM` also has a postprocess_generate that will just call this method.
-    '''
-    return generation_result
-
 converter.register_unstructure_hook_factory(lambda cls: lenient_issubclass(cls, LLMConfig),
                                             lambda cls: make_dict_unstructure_fn(cls, converter, _cattrs_omit_if_default=False, _cattrs_use_linecache=True))
 

diff --git a/openllm-core/src/openllm_core/config/__init__.py b/openllm-core/src/openllm_core/config/__init__.py
@@ -17,6 +17,8 @@
 from .configuration_gpt_neox import GPTNeoXConfig as GPTNeoXConfig
 from .configuration_llama import START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING
 from .configuration_llama import LlamaConfig as LlamaConfig
+from .configuration_mistral import START_MISTRAL_COMMAND_DOCSTRING as START_MISTRAL_COMMAND_DOCSTRING
+from .configuration_mistral import MistralConfig as MistralConfig
 from .configuration_mpt import START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING
 from .configuration_mpt import MPTConfig as MPTConfig
 from .configuration_opt import START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING

diff --git a/openllm-core/src/openllm_core/config/configuration_auto.py b/openllm-core/src/openllm_core/config/configuration_auto.py
@@ -37,7 +37,7 @@
 # NOTE: This is the entrypoint when adding new model config
 CONFIG_MAPPING_NAMES = OrderedDict([('chatglm', 'ChatGLMConfig'), ('dolly_v2', 'DollyV2Config'), ('falcon', 'FalconConfig'), ('flan_t5', 'FlanT5Config'), ('gpt_neox', 'GPTNeoXConfig'),
                                     ('llama', 'LlamaConfig'), ('mpt', 'MPTConfig'), ('opt', 'OPTConfig'), ('stablelm', 'StableLMConfig'), ('starcoder', 'StarCoderConfig'),
-                                    ('baichuan', 'BaichuanConfig')])
+                                    ('mistral', 'MistralConfig'), ('baichuan', 'BaichuanConfig')])
 
 class _LazyConfigMapping(OrderedDictType, ReprMixin):
   def __init__(self, mapping: OrderedDict[LiteralString, LiteralString]):

diff --git a/openllm-core/src/openllm_core/config/configuration_mistral.py b/openllm-core/src/openllm_core/config/configuration_mistral.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+import openllm_core
+
+START_MISTRAL_COMMAND_DOCSTRING = '''\
+Run a LLMServer for Mistral model.
+
+\b
+> See more information about Mistral at [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
+
+\b
+## Usage
+
+By default, this model will use the PyTorch model for inference. However, if vLLM exists, then it will use vLLM instead.
+
+\b
+- To use vLLM, set the environment variable ``OPENLLM_BACKEND="vllm"``
+
+\b
+Mistral Runner will use mistralai/Mistral-7B-Instruct-v0.1 as the default model. To change to any other Mistral
+saved pretrained, or a fine-tune Mistral, provide ``OPENLLM_MODEL_ID='HuggingFaceH4/zephyr-7b-alpha'``
+or provide `--model-id` flag when running ``openllm start mistral``:
+
+\b
+$ openllm start mistral --model-id HuggingFaceH4/zephyr-7b-alpha
+'''
+DEFAULT_PROMPT_TEMPLATE = '''{instruction}'''
+
+class MistralConfig(openllm_core.LLMConfig):
+  """Mistral's [paper](https://arxiv.org/abs/2310.06825) and first released by [MistralAI](https://mistral.ai/news/announcing-mistral-7b/).
+
+  Mistral-7B-v0.1 is Mistral AI\'s first Large Language Model (LLM).
+  Refer to [Mistral's HuggingFace page](https://huggingface.co/docs/transformers/v4.35.0/en/model_doc/mistral#overview) for more information.
+  """
+  __config__ = {
+      'name_type': 'lowercase',
+      'url': 'https://huggingface.co/docs/transformers/v4.35.0/en/model_doc/mistral#overview',
+      'default_id': 'mistralai/Mistral-7B-Instruct-v0.1',
+      'architecture': 'MistralForCausalLM',
+      'model_ids': ['mistralai/Mistral-7B-v0.1', 'mistralai/Mistral-7B-Instruct-v0.1', 'amazon/MistralLite', 'HuggingFaceH4/zephyr-7b-beta', 'HuggingFaceH4/zephyr-7b-alpha'],
+  }
+
+  class GenerationConfig:
+    top_k: int = 12
+    temperature: float = 0.75
+    max_new_tokens: int = 256
diff --git a/openllm-python/src/openllm/__init__.py b/openllm-python/src/openllm/__init__.py
@@ -38,6 +38,7 @@
 from openllm_core.config import OPTConfig as OPTConfig
 from openllm_core.config import StableLMConfig as StableLMConfig
 from openllm_core.config import StarCoderConfig as StarCoderConfig
+from openllm_core.config import MistralConfig as MistralConfig
 
 from . import exceptions as exceptions
 from . import utils as utils