diff --git a/litellm/__init__.py b/litellm/__init__.py index 25cae832826d..0b2ec8820b62 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -483,7 +483,12 @@ def identify(event_details): "azure_ai", "github", ] - +openai_text_completion_compatible_providers: List = ( + [ # providers that support `/v1/completions` + "together_ai", + "fireworks_ai", + ] +) # well supported replicate llms replicate_models: List = [ diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index f57dd3b81237..d38f5137d5bd 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -2329,6 +2329,8 @@ def get_standard_logging_object_payload( completion_start_time_float = completion_start_time.timestamp() elif isinstance(completion_start_time, float): completion_start_time_float = completion_start_time + else: + completion_start_time_float = end_time_float # clean up litellm hidden params clean_hidden_params = StandardLoggingHiddenParams( model_id=None, diff --git a/litellm/llms/OpenAI/openai.py b/litellm/llms/OpenAI/openai.py index 8021ccd59e4b..ed4d199f675a 100644 --- a/litellm/llms/OpenAI/openai.py +++ b/litellm/llms/OpenAI/openai.py @@ -1263,6 +1263,7 @@ async def async_streaming( error_headers = getattr(e, "headers", None) if response is not None and hasattr(response, "text"): + error_headers = getattr(e, "headers", None) raise OpenAIError( status_code=500, message=f"{str(e)}\n\nOriginal Response: {response.text}", @@ -1800,12 +1801,11 @@ def completion( headers: Optional[dict] = None, ): super().completion() - exception_mapping_worked = False try: if headers is None: headers = self.validate_environment(api_key=api_key) if model is None or messages is None: - raise OpenAIError(status_code=422, message=f"Missing model or messages") + raise OpenAIError(status_code=422, message="Missing model or messages") if ( len(messages) > 0 diff --git a/litellm/llms/azure_text.py b/litellm/llms/azure_text.py index 9a8d462e5643..db8c516b265a 100644 --- a/litellm/llms/azure_text.py +++ b/litellm/llms/azure_text.py @@ -162,11 +162,10 @@ def completion( client=None, ): super().completion() - exception_mapping_worked = False try: if model is None or messages is None: raise AzureOpenAIError( - status_code=422, message=f"Missing model or messages" + status_code=422, message="Missing model or messages" ) max_retries = optional_params.pop("max_retries", 2) @@ -293,7 +292,10 @@ def completion( "api-version", api_version ) - response = azure_client.completions.create(**data, timeout=timeout) # type: ignore + raw_response = azure_client.completions.with_raw_response.create( + **data, timeout=timeout + ) + response = raw_response.parse() stringified_response = response.model_dump() ## LOGGING logging_obj.post_call( @@ -380,13 +382,15 @@ async def acompletion( "complete_input_dict": data, }, ) - response = await azure_client.completions.create(**data, timeout=timeout) + raw_response = await azure_client.completions.with_raw_response.create( + **data, timeout=timeout + ) + response = raw_response.parse() return openai_text_completion_config.convert_to_chat_model_response_object( response_object=response.model_dump(), model_response_object=model_response, ) except AzureOpenAIError as e: - exception_mapping_worked = True raise e except Exception as e: status_code = getattr(e, "status_code", 500) diff --git a/litellm/main.py b/litellm/main.py index cb355561916e..2095322328f3 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1209,6 +1209,9 @@ def completion( custom_llm_provider == "text-completion-openai" or "ft:babbage-002" in model or "ft:davinci-002" in model # support for finetuned completion models + or custom_llm_provider + in litellm.openai_text_completion_compatible_providers + and kwargs.get("text_completion") is True ): openai.api_type = "openai" @@ -4099,8 +4102,8 @@ def process_prompt(i, individual_prompt): kwargs.pop("prompt", None) - if ( - _model is not None and custom_llm_provider == "openai" + if _model is not None and ( + custom_llm_provider == "openai" ): # for openai compatible endpoints - e.g. vllm, call the native /v1/completions endpoint for text completion calls if _model not in litellm.open_ai_chat_completion_models: model = "text-completion-openai/" + _model diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 335e93447545..bf86da1e12a4 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,16 +1,9 @@ model_list: - - model_name: "anthropic/claude-3-5-sonnet-20240620" + - model_name: "gpt-turbo" litellm_params: - model: anthropic/claude-3-5-sonnet-20240620 - # api_base: http://0.0.0.0:9000 - - model_name: gpt-3.5-turbo - litellm_params: - model: openai/* + model: azure/chatgpt-v-2 + api_key: os.environ/AZURE_API_KEY + api_base: os.environ/AZURE_API_BASE -litellm_settings: - success_callback: ["s3"] - s3_callback_params: - s3_bucket_name: litellm-logs # AWS Bucket Name for S3 - s3_region_name: us-west-2 # AWS Region Name for S3 - s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/ to pass environment variables. This is AWS Access Key ID for S3 - s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3 \ No newline at end of file +router_settings: + model_group_alias: {"gpt-4": "gpt-turbo"} \ No newline at end of file diff --git a/litellm/proxy/health_check.py b/litellm/proxy/health_check.py index ff5ed7bfb752..215d2d8d601c 100644 --- a/litellm/proxy/health_check.py +++ b/litellm/proxy/health_check.py @@ -3,7 +3,7 @@ import asyncio import logging import random -from typing import Optional +from typing import List, Optional import litellm from litellm._logging import print_verbose @@ -36,6 +36,25 @@ def _clean_endpoint_data(endpoint_data: dict, details: Optional[bool] = True): ) +def filter_deployments_by_id( + model_list: List, +) -> List: + seen_ids = set() + filtered_deployments = [] + + for deployment in model_list: + _model_info = deployment.get("model_info") or {} + _id = _model_info.get("id") or None + if _id is None: + continue + + if _id not in seen_ids: + seen_ids.add(_id) + filtered_deployments.append(deployment) + + return filtered_deployments + + async def _perform_health_check(model_list: list, details: Optional[bool] = True): """ Perform a health check for each model in the list. @@ -105,6 +124,9 @@ async def perform_health_check( _new_model_list = [x for x in model_list if x["model_name"] == model] model_list = _new_model_list + model_list = filter_deployments_by_id( + model_list=model_list + ) # filter duplicate deployments (e.g. when model alias'es are used) healthy_endpoints, unhealthy_endpoints = await _perform_health_check( model_list, details ) diff --git a/litellm/proxy/management_helpers/utils.py b/litellm/proxy/management_helpers/utils.py index efbe667fb683..af8e85201346 100644 --- a/litellm/proxy/management_helpers/utils.py +++ b/litellm/proxy/management_helpers/utils.py @@ -109,8 +109,8 @@ async def add_new_member( where={"user_id": user_info.user_id}, # type: ignore data={"teams": {"push": [team_id]}}, ) - - returned_user = LiteLLM_UserTable(**_returned_user.model_dump()) + if _returned_user is not None: + returned_user = LiteLLM_UserTable(**_returned_user.model_dump()) elif len(existing_user_row) > 1: raise HTTPException( status_code=400, diff --git a/litellm/router.py b/litellm/router.py index e1cb108ccf60..5a01f4f39584 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -4556,6 +4556,27 @@ def get_model_ids(self, model_name: Optional[str] = None) -> List[str]: ids.append(id) return ids + def _get_all_deployments( + self, model_name: str, model_alias: Optional[str] = None + ) -> List[DeploymentTypedDict]: + """ + Return all deployments of a model name + + Used for accurate 'get_model_list'. + """ + + returned_models: List[DeploymentTypedDict] = [] + for model in self.model_list: + if model["model_name"] == model_name: + if model_alias is not None: + alias_model = copy.deepcopy(model) + alias_model["model_name"] = model_name + returned_models.append(alias_model) + else: + returned_models.append(model) + + return returned_models + def get_model_names(self) -> List[str]: """ Returns all possible model names for router. @@ -4567,15 +4588,18 @@ def get_model_names(self) -> List[str]: def get_model_list( self, model_name: Optional[str] = None ) -> Optional[List[DeploymentTypedDict]]: + """ + Includes router model_group_alias'es as well + """ if hasattr(self, "model_list"): returned_models: List[DeploymentTypedDict] = [] for model_alias, model_value in self.model_group_alias.items(): - model_alias_item = DeploymentTypedDict( - model_name=model_alias, - litellm_params=LiteLLMParamsTypedDict(model=model_value), + returned_models.extend( + self._get_all_deployments( + model_name=model_value, model_alias=model_alias + ) ) - returned_models.append(model_alias_item) if model_name is None: returned_models += self.model_list @@ -4583,8 +4607,7 @@ def get_model_list( return returned_models for model in self.model_list: - if model["model_name"] == model_name: - returned_models.append(model) + returned_models.extend(self._get_all_deployments(model_name=model_name)) return returned_models return None diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 619d2ab5d320..92310ae3cb41 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -626,6 +626,8 @@ async def test_model_function_invoke(model, sync_mode, api_key, api_base): response = await litellm.acompletion(**data) print(f"response: {response}") + except litellm.InternalServerError: + pass except litellm.RateLimitError as e: pass except Exception as e: diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py index 0388e026b9fd..a570692f6db6 100644 --- a/litellm/tests/test_exceptions.py +++ b/litellm/tests/test_exceptions.py @@ -864,7 +864,7 @@ def _pre_call_utils( data["messages"] = [{"role": "user", "content": "Hello world"}] if streaming is True: data["stream"] = True - mapped_target = client.chat.completions.with_raw_response + mapped_target = client.chat.completions.with_raw_response # type: ignore if sync_mode: original_function = litellm.completion else: @@ -873,7 +873,7 @@ def _pre_call_utils( data["prompt"] = "Hello world" if streaming is True: data["stream"] = True - mapped_target = client.completions.with_raw_response + mapped_target = client.completions.with_raw_response # type: ignore if sync_mode: original_function = litellm.text_completion else: diff --git a/litellm/tests/test_function_calling.py b/litellm/tests/test_function_calling.py index 79db9f1623b4..f30f713eadeb 100644 --- a/litellm/tests/test_function_calling.py +++ b/litellm/tests/test_function_calling.py @@ -52,6 +52,7 @@ def get_current_weather(location, unit="fahrenheit"): # "anthropic.claude-3-sonnet-20240229-v1:0", ], ) +@pytest.mark.flaky(retries=3, delay=1) def test_aaparallel_function_call(model): try: litellm.set_verbose = True diff --git a/litellm/tests/test_text_completion.py b/litellm/tests/test_text_completion.py index 70325c44e229..e6a4a0499ccf 100644 --- a/litellm/tests/test_text_completion.py +++ b/litellm/tests/test_text_completion.py @@ -4239,3 +4239,14 @@ def test_completion_vllm(): mock_call.assert_called_once() assert "hello" in mock_call.call_args.kwargs["extra_body"] + + +def test_completion_fireworks_ai_multiple_choices(): + litellm.set_verbose = True + response = litellm.text_completion( + model="fireworks_ai/llama-v3p1-8b-instruct", + prompt=["halo", "hi", "halo", "hi"], + ) + print(response.choices) + + assert len(response.choices) == 4 diff --git a/proxy_server_config.yaml b/proxy_server_config.yaml index 57113d3509df..b1d6b3dc6610 100644 --- a/proxy_server_config.yaml +++ b/proxy_server_config.yaml @@ -148,6 +148,7 @@ router_settings: redis_password: os.environ/REDIS_PASSWORD redis_port: os.environ/REDIS_PORT enable_pre_call_checks: true + model_group_alias: {"my-special-fake-model-alias-name": "fake-openai-endpoint-3"} general_settings: master_key: sk-1234 # [OPTIONAL] Use to enforce auth on proxy. See - https://docs.litellm.ai/docs/proxy/virtual_keys