Skip to content

Commit

Permalink
new-release? (#494)
Browse files Browse the repository at this point in the history
* new-release?

* rm conint

* fix: optimum classifier

* lint
  • Loading branch information
michaelfeil authored Dec 10, 2024
1 parent edd9107 commit d614094
Show file tree
Hide file tree
Showing 14 changed files with 69 additions and 48 deletions.
2 changes: 1 addition & 1 deletion docs/assets/openapi.json

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,15 @@ class OpenAIEmbeddingInputAudio:
model (Union[Unset, str]): Default: 'default/not-specified'.
encoding_format (Union[Unset, EmbeddingEncodingFormat]):
user (Union[None, Unset, str]):
dimensions (Union[Unset, int]): Default: 0.
modality (Union[Unset, OpenAIEmbeddingInputAudioModality]): Default: OpenAIEmbeddingInputAudioModality.AUDIO.
"""

input_: Union[List[str], str]
model: Union[Unset, str] = "default/not-specified"
encoding_format: Union[Unset, EmbeddingEncodingFormat] = UNSET
user: Union[None, Unset, str] = UNSET
dimensions: Union[Unset, int] = 0
modality: Union[Unset, OpenAIEmbeddingInputAudioModality] = OpenAIEmbeddingInputAudioModality.AUDIO
additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict)

Expand Down Expand Up @@ -52,6 +54,8 @@ def to_dict(self) -> Dict[str, Any]:
else:
user = self.user

dimensions = self.dimensions

modality: Union[Unset, str] = UNSET
if not isinstance(self.modality, Unset):
modality = self.modality.value
Expand All @@ -69,6 +73,8 @@ def to_dict(self) -> Dict[str, Any]:
field_dict["encoding_format"] = encoding_format
if user is not UNSET:
field_dict["user"] = user
if dimensions is not UNSET:
field_dict["dimensions"] = dimensions
if modality is not UNSET:
field_dict["modality"] = modality

Expand Down Expand Up @@ -118,6 +124,8 @@ def _parse_user(data: object) -> Union[None, Unset, str]:

user = _parse_user(d.pop("user", UNSET))

dimensions = d.pop("dimensions", UNSET)

_modality = d.pop("modality", UNSET)
modality: Union[Unset, OpenAIEmbeddingInputAudioModality]
if isinstance(_modality, Unset):
Expand All @@ -130,6 +138,7 @@ def _parse_user(data: object) -> Union[None, Unset, str]:
model=model,
encoding_format=encoding_format,
user=user,
dimensions=dimensions,
modality=modality,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,15 @@ class OpenAIEmbeddingInputImage:
model (Union[Unset, str]): Default: 'default/not-specified'.
encoding_format (Union[Unset, EmbeddingEncodingFormat]):
user (Union[None, Unset, str]):
dimensions (Union[Unset, int]): Default: 0.
modality (Union[Unset, OpenAIEmbeddingInputImageModality]): Default: OpenAIEmbeddingInputImageModality.IMAGE.
"""

input_: Union[List[str], str]
model: Union[Unset, str] = "default/not-specified"
encoding_format: Union[Unset, EmbeddingEncodingFormat] = UNSET
user: Union[None, Unset, str] = UNSET
dimensions: Union[Unset, int] = 0
modality: Union[Unset, OpenAIEmbeddingInputImageModality] = OpenAIEmbeddingInputImageModality.IMAGE
additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict)

Expand Down Expand Up @@ -52,6 +54,8 @@ def to_dict(self) -> Dict[str, Any]:
else:
user = self.user

dimensions = self.dimensions

modality: Union[Unset, str] = UNSET
if not isinstance(self.modality, Unset):
modality = self.modality.value
Expand All @@ -69,6 +73,8 @@ def to_dict(self) -> Dict[str, Any]:
field_dict["encoding_format"] = encoding_format
if user is not UNSET:
field_dict["user"] = user
if dimensions is not UNSET:
field_dict["dimensions"] = dimensions
if modality is not UNSET:
field_dict["modality"] = modality

Expand Down Expand Up @@ -118,6 +124,8 @@ def _parse_user(data: object) -> Union[None, Unset, str]:

user = _parse_user(d.pop("user", UNSET))

dimensions = d.pop("dimensions", UNSET)

_modality = d.pop("modality", UNSET)
modality: Union[Unset, OpenAIEmbeddingInputImageModality]
if isinstance(_modality, Unset):
Expand All @@ -130,6 +138,7 @@ def _parse_user(data: object) -> Union[None, Unset, str]:
model=model,
encoding_format=encoding_format,
user=user,
dimensions=dimensions,
modality=modality,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@ class OpenAIEmbeddingInputText:
model (Union[Unset, str]): Default: 'default/not-specified'.
encoding_format (Union[Unset, EmbeddingEncodingFormat]):
user (Union[None, Unset, str]):
dimensions (Union[Unset, int]): Default: 0.
modality (Union[Unset, OpenAIEmbeddingInputTextModality]): Default: OpenAIEmbeddingInputTextModality.TEXT.
"""

input_: Union[List[str], str]
model: Union[Unset, str] = "default/not-specified"
encoding_format: Union[Unset, EmbeddingEncodingFormat] = UNSET
user: Union[None, Unset, str] = UNSET
dimensions: Union[Unset, int] = 0
modality: Union[Unset, OpenAIEmbeddingInputTextModality] = OpenAIEmbeddingInputTextModality.TEXT
additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict)

Expand All @@ -49,6 +51,8 @@ def to_dict(self) -> Dict[str, Any]:
else:
user = self.user

dimensions = self.dimensions

modality: Union[Unset, str] = UNSET
if not isinstance(self.modality, Unset):
modality = self.modality.value
Expand All @@ -66,6 +70,8 @@ def to_dict(self) -> Dict[str, Any]:
field_dict["encoding_format"] = encoding_format
if user is not UNSET:
field_dict["user"] = user
if dimensions is not UNSET:
field_dict["dimensions"] = dimensions
if modality is not UNSET:
field_dict["modality"] = modality

Expand Down Expand Up @@ -106,6 +112,8 @@ def _parse_user(data: object) -> Union[None, Unset, str]:

user = _parse_user(d.pop("user", UNSET))

dimensions = d.pop("dimensions", UNSET)

_modality = d.pop("modality", UNSET)
modality: Union[Unset, OpenAIEmbeddingInputTextModality]
if isinstance(_modality, Unset):
Expand All @@ -118,6 +126,7 @@ def _parse_user(data: object) -> Union[None, Unset, str]:
model=model,
encoding_format=encoding_format,
user=user,
dimensions=dimensions,
modality=modality,
)

Expand Down
2 changes: 1 addition & 1 deletion libs/client_infinity/infinity_client/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "infinity_client"
version = "0.0.72"
version = "0.0.73"
description = "A client library for accessing ♾️ Infinity - Embedding Inference Server"
authors = []
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class _OpenAIEmbeddingInput(BaseModel):
model: str = "default/not-specified"
encoding_format: EmbeddingEncodingFormat = EmbeddingEncodingFormat.float
user: Optional[str] = None
dimensions: Optional[Annotated[int, Field(strict=True, gt=0, lt=8193)]] = None
dimensions: int = 0


class _OpenAIEmbeddingInput_Text(_OpenAIEmbeddingInput):
Expand Down
25 changes: 16 additions & 9 deletions libs/infinity_emb/infinity_emb/inference/batch_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
ImageClassType,
ModelCapabilites,
ModelNotDeployedError,
MatryoshkaDimError,
OverloadStatus,
PredictSingle,
PrioritizedQueueItem,
Expand Down Expand Up @@ -61,6 +62,18 @@ def submit(self, *args, **kwargs):
return self._tp.submit(*args, **kwargs)


def matryososka_slice(
embeddings: list[np.ndarray], matryoshka_dim: Optional[int]
) -> list[np.ndarray]:
if matryoshka_dim:
if 1 > matryoshka_dim or matryoshka_dim > len(embeddings[0]):
raise MatryoshkaDimError(
f"matryoshka_dim={matryoshka_dim} is not in a valid range. Select between 1 and {len(embeddings[0])}."
)
return [e[:matryoshka_dim] for e in embeddings]
return embeddings


class BatchHandler:
def __init__(
self,
Expand Down Expand Up @@ -159,9 +172,7 @@ async def embed(
input_sentences = [EmbeddingSingle(sentence=s) for s in sentences]

embeddings, usage = await self._schedule(input_sentences)
if matryoshka_dim:
embeddings = [embedding[:matryoshka_dim] for embedding in embeddings]
return embeddings, usage
return matryososka_slice(embeddings, matryoshka_dim), usage

async def rerank(
self,
Expand Down Expand Up @@ -267,9 +278,7 @@ async def image_embed(

items = await resolve_images(images)
embeddings, usage = await self._schedule(items)
if matryoshka_dim:
embeddings = [embedding[:matryoshka_dim] for embedding in embeddings]
return embeddings, usage
return matryososka_slice(embeddings, matryoshka_dim), usage

async def audio_embed(
self, *, audios: list[Union[str, bytes]], matryoshka_dim: Optional[int] = None
Expand Down Expand Up @@ -299,9 +308,7 @@ async def audio_embed(
getattr(self.model_worker[0]._model, "sampling_rate", -42),
)
embeddings, usage = await self._schedule(items)
if matryoshka_dim:
embeddings = [embedding[:matryoshka_dim] for embedding in embeddings]
return embeddings, usage
return matryososka_slice(embeddings, matryoshka_dim), usage

async def _schedule(self, list_queueitem: Sequence[AbstractSingle]) -> tuple[list[Any], int]:
"""adds list of items to the queue and awaits until these are completed."""
Expand Down
18 changes: 7 additions & 11 deletions libs/infinity_emb/infinity_emb/infinity_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
InferenceEngine,
Modality,
ModelCapabilites,
MatryoshkaDimError,
ModelNotDeployedError,
PoolingMethod,
)
Expand Down Expand Up @@ -390,14 +391,9 @@ def url_to_base64(url, modality = "image"):
f"ModelNotDeployedError: model=`{data_root.model}` does not support `embed` for modality `{modality.value}`. Reason: {ex}",
code=status.HTTP_400_BAD_REQUEST,
)
except (ImageCorruption, AudioCorruption) as ex:
# get urls_or_bytes if not defined
try:
urls_or_bytes = urls_or_bytes
except NameError:
urls_or_bytes = []
except (ImageCorruption, AudioCorruption, MatryoshkaDimError) as ex:
raise errors.OpenAIException(
f"{modality.value}Corruption, could not open {[b if isinstance(b, str) else 'bytes' for b in urls_or_bytes]} -> {ex}",
f"{ex.__class__} -> {ex}",
code=status.HTTP_400_BAD_REQUEST,
)
except Exception as ex:
Expand Down Expand Up @@ -545,9 +541,9 @@ async def _embeddings_image(data: ImageEmbeddingInput):
encoding_format=data.encoding_format,
usage=usage,
)
except ImageCorruption as ex:
except (ImageCorruption, MatryoshkaDimError) as ex:
raise errors.OpenAIException(
f"ImageCorruption, could not open {[b if isinstance(b, str) else 'bytes' for b in urls_or_bytes]} -> {ex}",
f"{ex.__class__} -> {ex}",
code=status.HTTP_400_BAD_REQUEST,
)
except ModelNotDeployedError as ex:
Expand Down Expand Up @@ -604,9 +600,9 @@ async def _embeddings_audio(data: AudioEmbeddingInput):
encoding_format=data.encoding_format,
usage=usage,
)
except AudioCorruption as ex:
except (AudioCorruption, MatryoshkaDimError) as ex:
raise errors.OpenAIException(
f"AudioCorruption, could not open {[b if isinstance(b, str) else 'bytes' for b in urls_or_bytes]} -> {ex}",
f"{ex.__class__} -> {ex}",
code=status.HTTP_400_BAD_REQUEST,
)
except ModelNotDeployedError as ex:
Expand Down
4 changes: 4 additions & 0 deletions libs/infinity_emb/infinity_emb/primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,10 @@ class ModelNotDeployedError(Exception):
pass


class MatryoshkaDimError(Exception):
pass


class ImageCorruption(Exception):
pass

Expand Down
10 changes: 3 additions & 7 deletions libs/infinity_emb/infinity_emb/transformer/audio/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,9 @@ async def resolve_audios(
CHECK_AIOHTTP.mark_required()
CHECK_SOUNDFILE.mark_required()

resolved_audios: list[AudioSingle] = []
async with aiohttp.ClientSession(trust_env=True) as session:
try:
resolved_audios = await asyncio.gather(
*[resolve_audio(audio, allowed_sampling_rate, session) for audio in audio_urls]
)
except Exception as e:
raise AudioCorruption(f"Failed to resolve audio: {e}")
resolved_audios = await asyncio.gather(
*[resolve_audio(audio, allowed_sampling_rate, session) for audio in audio_urls]
)

return resolved_audios
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __init__(self, *, engine_args: EngineArgs):
prefer_quantized=("cpu" in provider.lower() or "openvino" in provider.lower()),
)

self.model = optimize_model(
model = optimize_model(
model_name_or_path=engine_args.model_name_or_path,
model_class=ORTModelForSequenceClassification,
revision=engine_args.revision,
Expand All @@ -48,7 +48,7 @@ def __init__(self, *, engine_args: EngineArgs):
file_name=onnx_file.as_posix(),
optimize_model=not os.environ.get("INFINITY_ONNX_DISABLE_OPTIMIZE", False),
)
self.model.use_io_binding = False
model.use_io_binding = False

self.tokenizer = AutoTokenizer.from_pretrained(
engine_args.model_name_or_path,
Expand All @@ -60,12 +60,11 @@ def __init__(self, *, engine_args: EngineArgs):

self._pipe = pipeline(
task="text-classification",
model=self.model,
model=model,
trust_remote_code=engine_args.trust_remote_code,
top_k=None,
revision=engine_args.revision,
tokenizer=self.tokenizer,
device=engine_args.device,
)

def encode_pre(self, sentences: list[str]):
Expand Down
8 changes: 2 additions & 6 deletions libs/infinity_emb/infinity_emb/transformer/vision/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,7 @@ async def resolve_images(
CHECK_PIL.mark_required()

resolved_imgs = []

try:
async with aiohttp.ClientSession(trust_env=True) as session:
resolved_imgs = await asyncio.gather(*[resolve_image(img, session) for img in images])
except Exception as e:
raise ImageCorruption(f"Failed to resolve image: {images}.\nError msg: {str(e)}")
async with aiohttp.ClientSession(trust_env=True) as session:
resolved_imgs = await asyncio.gather(*[resolve_image(img, session) for img in images])

return resolved_imgs
2 changes: 1 addition & 1 deletion libs/infinity_emb/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

[tool.poetry]
name = "infinity_emb"
version = "0.0.72"
version = "0.0.73"
description = "Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip."
authors = ["michaelfeil <noreply@michaelfeil.eu>"]
license = "MIT"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,19 @@
import torch
from optimum.pipelines import pipeline # type: ignore
from optimum.onnxruntime import ORTModelForSequenceClassification
from transformers.pipelines import pipeline # type: ignore
from infinity_emb.args import EngineArgs

from infinity_emb.transformer.classifier.optimum import OptimumClassifier


def test_classifier(model_name: str = "SamLowe/roberta-base-go_emotions-onnx"):
model = OptimumClassifier(
engine_args=EngineArgs(
model_name_or_path=model_name,
device="cuda" if torch.cuda.is_available() else "cpu",
) # type: ignore
)

pipe = pipeline(
task="text-classification",
model=ORTModelForSequenceClassification.from_pretrained(
model_name, file_name="onnx/model_quantized.onnx"
),
model="SamLowe/roberta-base-go_emotions", # hoping that this is the same model as model_name
top_k=None,
)

Expand Down

0 comments on commit d614094

Please sign in to comment.