Skip to content

Commit

Permalink
refactor: several improvements (#1044)
Browse files Browse the repository at this point in the history
- update models.yaml
- update azure openai api version
- agent `run_instructions_fn`
  • Loading branch information
sigoden authored Dec 7, 2024
1 parent af4ff7a commit ccbc340
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 14 deletions.
90 changes: 82 additions & 8 deletions models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
output_price: 0
supports_vision: true
supports_function_calling: true
- name: gemini-exp-1121
- name: gemini-exp-1206
max_input_tokens: 32768
max_output_tokens: 8192
input_price: 0
Expand Down Expand Up @@ -351,6 +351,11 @@
# - https://console.groq.com/docs/api-reference#chat
- platform: groq
models:
- name: llama-3.3-70b-versatile
max_input_tokens: 128000
input_price: 0
output_price: 0
supports_function_calling: true
- name: llama-3.1-70b-versatile
max_input_tokens: 128000
input_price: 0
Expand Down Expand Up @@ -399,8 +404,12 @@
- name: llama3.2-vision
max_input_tokens: 128000
supports_vision: true
- name: gemma2
max_input_tokens: 8192
- name: llama3.3
max_input_tokens: 128000
supports_function_calling: true
- name: qwq
max_input_tokens: 32768
supports_function_calling: true
- name: qwen2.5
max_input_tokens: 128000
supports_function_calling: true
Expand All @@ -409,6 +418,8 @@
supports_function_calling: true
- name: deepseek-coder-v2
max_input_tokens: 32768
- name: gemma2
max_input_tokens: 8192
- name: nomic-embed-text
type: embedding
max_tokens_per_chunk: 8192
Expand Down Expand Up @@ -523,7 +534,6 @@
# Links:
# - https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns
# - https://aws.amazon.com/bedrock/pricing/
# - https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html
# - https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-support.html
- platform: bedrock
models:
Expand Down Expand Up @@ -624,6 +634,23 @@
require_max_tokens: true
input_price: 0.1
output_price: 0.1
- name: us.amazon.nova-pro-v1:0
max_input_tokens: 300000
max_output_tokens: 5120
input_price: 0.8
output_price: 3.2
supports_vision: true
- name: us.amazon.nova-lite-v1:0
max_input_tokens: 300000
max_output_tokens: 5120
input_price: 0.06
output_price: 0.24
supports_vision: true
- name: us.amazon.nova-micro-v1:0
max_input_tokens: 128000
max_output_tokens: 5120
input_price: 0.035
output_price: 0.14
- name: mistral.mistral-large-2407-v1:0
max_input_tokens: 128000
input_price: 2
Expand Down Expand Up @@ -667,6 +694,12 @@
# - https://developers.cloudflare.com/workers-ai/configuration/open-ai-compatibility/
- platform: cloudflare
models:
- name: '@cf/meta/llama-3.3-70b-instruct-fp8-fast'
max_input_tokens: 6144
max_output_tokens: 2048
require_max_tokens: true
input_price: 0
output_price: 0
- name: '@cf/meta/llama-3.1-70b-instruct'
max_input_tokens: 6144
max_output_tokens: 2048
Expand Down Expand Up @@ -1044,9 +1077,15 @@
- name: ministral-3b
max_input_tokens: 128000
supports_function_calling: true
- name: cohere-command-r-plus-08-2024
max_input_tokens: 128000
supports_function_calling: true
- name: cohere-command-r-plus
max_input_tokens: 128000
supports_function_calling: true
- name: cohere-command-r-08-2024
max_input_tokens: 128000
supports_function_calling: true
- name: cohere-command-r
max_input_tokens: 128000
supports_function_calling: true
Expand Down Expand Up @@ -1079,6 +1118,10 @@
# - https://deepinfra.com/pricing
- platform: deepinfra
models:
- name: meta-llama/Llama-3.3-70B-Instruct
max_input_tokens: 128000
input_price: 0.23
output_price: 0.40
- name: meta-llama/Meta-Llama-3.1-405B-Instruct
max_input_tokens: 32000
input_price: 1.79
Expand Down Expand Up @@ -1176,6 +1219,10 @@
# - https://fireworks.ai/pricing
- platform: fireworks
models:
- name: accounts/fireworks/models/llama-v3p3-70b-instruct
max_input_tokens: 131072
input_price: 0.9
output_price: 0.9
- name: accounts/fireworks/models/llama-v3p1-405b-instruct
max_input_tokens: 131072
input_price: 3
Expand Down Expand Up @@ -1224,6 +1271,11 @@
max_input_tokens: 32768
input_price: 0.9
output_price: 0.9
- name: accounts/fireworks/models/qwen2-vl-72b-instruct
max_input_tokens: 32768
input_price: 0.9
output_price: 0.9
supports_vision: true
- name: accounts/fireworks/models/phi-3-vision-128k-instruct
max_input_tokens: 131072
input_price: 0.2
Expand Down Expand Up @@ -1385,6 +1437,10 @@
output_price: 1.25
supports_vision: true
supports_function_calling: true
- name: meta-llama/llama-3.3-70b-instruct
max_input_tokens: 131072
input_price: 0.88
output_price: 0.88
- name: meta-llama/llama-3.1-405b-instruct
max_input_tokens: 131072
input_price: 2.8
Expand Down Expand Up @@ -1550,6 +1606,23 @@
input_price: 5
output_price: 15
supports_vision: true
- name: amazon/nova-pro-v1
max_input_tokens: 300000
max_output_tokens: 5120
input_price: 0.8
output_price: 3.2
supports_vision: true
- name: amazon/nova-lite-v1
max_input_tokens: 300000
max_output_tokens: 5120
input_price: 0.06
output_price: 0.24
supports_vision: true
- name: amazon/nova-micro-v1
max_input_tokens: 128000
max_output_tokens: 5120
input_price: 0.035
output_price: 0.14

# Links
# - https://cloud.siliconflow.cn/models
Expand Down Expand Up @@ -1613,10 +1686,6 @@
input_price: 0.186
output_price: 0.186
supports_function_calling: true
- name: nvidia/Llama-3.1-Nemotron-70B-Instruct
max_input_tokens: 32768
input_price: 0.578
output_price: 0.578
- name: Tencent/Hunyuan-A52B-Instruct
max_input_tokens: 32768
input_price: 2.94
Expand Down Expand Up @@ -1650,6 +1719,11 @@
# - https://www.together.ai/pricing
- platform: together
models:
- name: meta-llama/Llama-3.3-70B-Instruct-Turbo
max_input_tokens: 32768
input_price: 0.88
output_price: 0.88
supports_function_calling: true
- name: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
max_input_tokens: 32768
input_price: 3.5
Expand Down
4 changes: 2 additions & 2 deletions src/client/azure_openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ fn prepare_chat_completions(
let api_key = self_.get_api_key()?;

let url = format!(
"{}/openai/deployments/{}/chat/completions?api-version=2024-02-01",
"{}/openai/deployments/{}/chat/completions?api-version=2024-10-21",
&api_base,
self_.model.name()
);
Expand All @@ -70,7 +70,7 @@ fn prepare_embeddings(self_: &AzureOpenAIClient, data: &EmbeddingsData) -> Resul
let api_key = self_.get_api_key()?;

let url = format!(
"{}/openai/deployments/{}/embeddings?api-version=2024-02-01",
"{}/openai/deployments/{}/embeddings?api-version=2024-10-21",
&api_base,
self_.model.name()
);
Expand Down
5 changes: 1 addition & 4 deletions src/config/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -334,10 +334,7 @@ impl Agent {
self.variable_envs(),
)?;
match value {
Some(v) => {
println!();
Ok(v)
}
Some(v) => Ok(v),
_ => bail!("No return value from '_instructions' function"),
}
}
Expand Down

0 comments on commit ccbc340

Please sign in to comment.