From 03f802ea7dce59f8fb186927761ca10a3a289dd0 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 15 Feb 2024 18:27:13 +0100 Subject: [PATCH 1/9] feat(tools): support Tools in the API Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= --- api/openai/request.go | 10 ++++++++++ api/schema/openai.go | 3 +++ pkg/grammar/functions.go | 6 ++++++ 3 files changed, 19 insertions(+) diff --git a/api/openai/request.go b/api/openai/request.go index 382a930e1c7..05e0bc90f71 100644 --- a/api/openai/request.go +++ b/api/openai/request.go @@ -136,6 +136,16 @@ func updateRequestConfig(config *config.Config, input *schema.OpenAIRequest) { } } + if len(input.Tools) > 0 { + for _, tool := range input.Tools { + input.Functions = append(input.Functions, tool.Function) + } + } + + if input.ToolsChoice != nil { + input.FunctionCall = input.ToolsChoice + } + // Decode each request's message content index := 0 for i, m := range input.Messages { diff --git a/api/schema/openai.go b/api/schema/openai.go index 6355ff63d5e..e87f829ccea 100644 --- a/api/schema/openai.go +++ b/api/schema/openai.go @@ -117,6 +117,9 @@ type OpenAIRequest struct { Functions []grammar.Function `json:"functions" yaml:"functions"` FunctionCall interface{} `json:"function_call" yaml:"function_call"` // might be a string or an object + Tools []grammar.Tool `json:"tools,omitempty" yaml:"tools"` + ToolsChoice interface{} `json:"tool_choice,omitempty" yaml:"tool_choice"` + Stream bool `json:"stream"` // Image (not supported by OpenAI) diff --git a/pkg/grammar/functions.go b/pkg/grammar/functions.go index ef56662b7b9..1038f5e6f14 100644 --- a/pkg/grammar/functions.go +++ b/pkg/grammar/functions.go @@ -11,6 +11,12 @@ type Function struct { } type Functions []Function +type Tool struct { + Type string `json:"type"` + Function Function `json:"function,omitempty"` +} +type Tools []Tool + func (f Functions) ToJSONStructure() JSONFunctionStructure { js := JSONFunctionStructure{} for _, function := range f { From ccf5faf175a9265613d2a72693418d9619f2e959 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 15 Feb 2024 19:57:35 +0100 Subject: [PATCH 2/9] feat(tools): support function streaming --- api/openai/chat.go | 320 +++++++++++++++++++++++++++++++------------ api/schema/openai.go | 14 ++ 2 files changed, 246 insertions(+), 88 deletions(-) diff --git a/api/openai/chat.go b/api/openai/chat.go index 819cd6b2d6c..94118178697 100644 --- a/api/openai/chat.go +++ b/api/openai/chat.go @@ -55,6 +55,132 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) }) close(responses) } + + /* + data: + { + "id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk", + "object":"chat.completion.chunk", + "created":1708018287, + "model":"gpt-3.5-turbo-0613", + "system_fingerprint":null, + "choices":[ + { + "index":0, + "delta": { + "role":"assistant", + "content":null, + "tool_calls": + [ + { + "index":0, + "id":"call_kL07suiDkGzYbUCLMZZ5XUIU", + "type":"function", + "function": + { + "name":"get_current_weather", + "arguments":"" + } + } + ] + }, + "logprobs":null, + "finish_reason":null + }] + } + + data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a + rguments":"{\n"}}]},"logprobs":null,"finish_reason":null}]} + + data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a + rguments":" "}}]},"logprobs":null,"finish_reason":null}]} + + data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a + rguments":" \""}}]},"logprobs":null,"finish_reason":null}]} + + data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a + rguments":"location"}}]},"logprobs":null,"finish_reason":null}]} + + data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a + rguments":"\":"}}]},"logprobs":null,"finish_reason":null}]} + + data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a + rguments":" \""}}]},"logprobs":null,"finish_reason":null}]} + + data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a + rguments":"Boston"}}]},"logprobs":null,"finish_reason":null}]} + + data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a + rguments":","}}]},"logprobs":null,"finish_reason":null}]} + + data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a + rguments":" MA"}}]},"logprobs":null,"finish_reason":null}]} + + data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a + rguments":"\"\n"}}]},"logprobs":null,"finish_reason":null}]} + + data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a + rguments":"}"}}]},"logprobs":null,"finish_reason":null}]} + + data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool + _calls"}]} + + data: [DONE] + */ + processTools := func(prompt string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { + ComputeChoices(req, prompt, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { + ss := map[string]interface{}{} + + name, args := parseFunctionCall(s) + ss["name"], ss["arguments"] = name, args + + initialMessage := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{ + Delta: &schema.Message{ + Role: "assistant", + ToolCalls: []schema.ToolCall{ + { + Index: 0, + ID: id, + Type: "function", + FunctionCall: schema.FunctionCall{ + Name: name, + }, + }, + }, + }}}, + Object: "chat.completion.chunk", + } + responses <- initialMessage + + responses <- schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{ + Delta: &schema.Message{ + Role: "assistant", + ToolCalls: []schema.ToolCall{ + { + Index: 0, + ID: id, + Type: "function", + FunctionCall: schema.FunctionCall{ + Arguments: args, + }, + }, + }, + }}}, + Object: "chat.completion.chunk", + } + return true + }) + close(responses) + } + return func(c *fiber.Ctx) error { processFunctions := false funcs := grammar.Functions{} @@ -122,7 +248,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) } // functions are not supported in stream mode (yet?) - toStream := input.Stream && !processFunctions + toStream := input.Stream log.Debug().Msgf("Parameters: %+v", config) @@ -254,10 +380,15 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) log.Debug().Msgf("Grammar: %+v", config.Grammar) } - if toStream { + switch { + case toStream: responses := make(chan schema.OpenAIResponse) - go process(predInput, input, config, o.Loader, responses) + if !processFunctions { + go process(predInput, input, config, o.Loader, responses) + } else { + go processTools(predInput, input, config, o.Loader, responses) + } c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { @@ -278,13 +409,18 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) w.Flush() } + finishReason := "stop" + if processFunctions { + finishReason = "tool_calls" + } + resp := &schema.OpenAIResponse{ ID: id, Created: created, Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []schema.Choice{ { - FinishReason: "stop", + FinishReason: finishReason, Index: 0, Delta: &schema.Message{Content: &emptyMessage}, }}, @@ -298,102 +434,110 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) w.Flush() })) return nil - } - result, tokenUsage, err := ComputeChoices(input, predInput, config, o, o.Loader, func(s string, c *[]schema.Choice) { - if processFunctions { - // As we have to change the result before processing, we can't stream the answer (yet?) - ss := map[string]interface{}{} - // This prevent newlines to break JSON parsing for clients - s = utils.EscapeNewLines(s) - json.Unmarshal([]byte(s), &ss) - log.Debug().Msgf("Function return: %s %+v", s, ss) - - // The grammar defines the function name as "function", while OpenAI returns "name" - func_name := ss["function"] - // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object - args := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) - d, _ := json.Marshal(args) - - ss["arguments"] = string(d) - ss["name"] = func_name - - // if do nothing, reply with a message - if func_name == noActionName { - log.Debug().Msgf("nothing to do, computing a reply") - - // If there is a message that the LLM already sends as part of the JSON reply, use it - arguments := map[string]interface{}{} - json.Unmarshal([]byte(d), &arguments) - m, exists := arguments["message"] - if exists { - switch message := m.(type) { - case string: - if message != "" { - log.Debug().Msgf("Reply received from LLM: %s", message) - message = backend.Finetune(*config, predInput, message) - log.Debug().Msgf("Reply received from LLM(finetuned): %s", message) - - *c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &message}}) - return + default: + result, tokenUsage, err := ComputeChoices(input, predInput, config, o, o.Loader, func(s string, c *[]schema.Choice) { + if processFunctions { + ss := map[string]interface{}{} + + name, args := parseFunctionCall(s) + ss["name"], ss["arguments"] = name, args + + // if do nothing, reply with a message + if name == noActionName { + log.Debug().Msgf("nothing to do, computing a reply") + + // If there is a message that the LLM already sends as part of the JSON reply, use it + arguments := map[string]interface{}{} + json.Unmarshal([]byte(args), &arguments) + m, exists := arguments["message"] + if exists { + switch message := m.(type) { + case string: + if message != "" { + log.Debug().Msgf("Reply received from LLM: %s", message) + message = backend.Finetune(*config, predInput, message) + log.Debug().Msgf("Reply received from LLM(finetuned): %s", message) + + *c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &message}}) + return + } } } - } - log.Debug().Msgf("No action received from LLM, without a message, computing a reply") - // Otherwise ask the LLM to understand the JSON output and the context, and return a message - // Note: This costs (in term of CPU) another computation - config.Grammar = "" - images := []string{} - for _, m := range input.Messages { - images = append(images, m.StringImages...) - } - predFunc, err := backend.ModelInference(input.Context, predInput, images, o.Loader, *config, o, nil) - if err != nil { - log.Error().Msgf("inference error: %s", err.Error()) - return - } + log.Debug().Msgf("No action received from LLM, without a message, computing a reply") + // Otherwise ask the LLM to understand the JSON output and the context, and return a message + // Note: This costs (in term of CPU) another computation + config.Grammar = "" + images := []string{} + for _, m := range input.Messages { + images = append(images, m.StringImages...) + } + predFunc, err := backend.ModelInference(input.Context, predInput, images, o.Loader, *config, o, nil) + if err != nil { + log.Error().Msgf("inference error: %s", err.Error()) + return + } - prediction, err := predFunc() - if err != nil { - log.Error().Msgf("inference error: %s", err.Error()) - return + prediction, err := predFunc() + if err != nil { + log.Error().Msgf("inference error: %s", err.Error()) + return + } + + fineTunedResponse := backend.Finetune(*config, predInput, prediction.Response) + *c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &fineTunedResponse}}) + } else { + // otherwise reply with the function call + *c = append(*c, schema.Choice{ + FinishReason: "function_call", + Message: &schema.Message{Role: "assistant", FunctionCall: ss}, + }) } - fineTunedResponse := backend.Finetune(*config, predInput, prediction.Response) - *c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &fineTunedResponse}}) - } else { - // otherwise reply with the function call - *c = append(*c, schema.Choice{ - FinishReason: "function_call", - Message: &schema.Message{Role: "assistant", FunctionCall: ss}, - }) + return } + *c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}}) + }, nil) + if err != nil { + return err + } - return + resp := &schema.OpenAIResponse{ + ID: id, + Created: created, + Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: result, + Object: "chat.completion", + Usage: schema.OpenAIUsage{ + PromptTokens: tokenUsage.Prompt, + CompletionTokens: tokenUsage.Completion, + TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, + }, } - *c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}}) - }, nil) - if err != nil { - return err - } + respData, _ := json.Marshal(resp) + log.Debug().Msgf("Response: %s", respData) - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: result, - Object: "chat.completion", - Usage: schema.OpenAIUsage{ - PromptTokens: tokenUsage.Prompt, - CompletionTokens: tokenUsage.Completion, - TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, - }, + // Return the prediction in the response body + return c.JSON(resp) } - respData, _ := json.Marshal(resp) - log.Debug().Msgf("Response: %s", respData) - // Return the prediction in the response body - return c.JSON(resp) } } + +func parseFunctionCall(llmresult string) (string, string) { + // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) + ss := map[string]interface{}{} + // This prevent newlines to break JSON parsing for clients + s := utils.EscapeNewLines(llmresult) + json.Unmarshal([]byte(s), &ss) + log.Debug().Msgf("Function return: %s %+v", s, ss) + + // The grammar defines the function name as "function", while OpenAI returns "name" + func_name := ss["function"] + // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object + args := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) + d, _ := json.Marshal(args) + + return func_name.(string), string(d) +} diff --git a/api/schema/openai.go b/api/schema/openai.go index e87f829ccea..dcd11764fba 100644 --- a/api/schema/openai.go +++ b/api/schema/openai.go @@ -76,6 +76,20 @@ type Message struct { // A result of a function call FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"` + + ToolCalls []ToolCall `json:"tool_calls,omitempty" yaml:"tool_call,omitempty"` +} + +type ToolCall struct { + Index int `json:"index"` + ID string `json:"id"` + Type string `json:"type"` + FunctionCall FunctionCall `json:"function"` +} + +type FunctionCall struct { + Name string `json:"name,omitempty"` + Arguments string `json:"arguments"` } type OpenAIModel struct { From dddd67da69eceeb2cc0230afc6beb7866f4715a0 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 15 Feb 2024 20:17:28 +0100 Subject: [PATCH 3/9] Adhere to new return types when using tools instead of functions --- api/openai/chat.go | 107 ++++++++++++------------------------------ api/openai/request.go | 7 ++- 2 files changed, 35 insertions(+), 79 deletions(-) diff --git a/api/openai/chat.go b/api/openai/chat.go index 94118178697..3bde838ea6c 100644 --- a/api/openai/chat.go +++ b/api/openai/chat.go @@ -55,78 +55,6 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) }) close(responses) } - - /* - data: - { - "id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk", - "object":"chat.completion.chunk", - "created":1708018287, - "model":"gpt-3.5-turbo-0613", - "system_fingerprint":null, - "choices":[ - { - "index":0, - "delta": { - "role":"assistant", - "content":null, - "tool_calls": - [ - { - "index":0, - "id":"call_kL07suiDkGzYbUCLMZZ5XUIU", - "type":"function", - "function": - { - "name":"get_current_weather", - "arguments":"" - } - } - ] - }, - "logprobs":null, - "finish_reason":null - }] - } - - data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a - rguments":"{\n"}}]},"logprobs":null,"finish_reason":null}]} - - data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a - rguments":" "}}]},"logprobs":null,"finish_reason":null}]} - - data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a - rguments":" \""}}]},"logprobs":null,"finish_reason":null}]} - - data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a - rguments":"location"}}]},"logprobs":null,"finish_reason":null}]} - - data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a - rguments":"\":"}}]},"logprobs":null,"finish_reason":null}]} - - data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a - rguments":" \""}}]},"logprobs":null,"finish_reason":null}]} - - data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a - rguments":"Boston"}}]},"logprobs":null,"finish_reason":null}]} - - data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a - rguments":","}}]},"logprobs":null,"finish_reason":null}]} - - data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a - rguments":" MA"}}]},"logprobs":null,"finish_reason":null}]} - - data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a - rguments":"\"\n"}}]},"logprobs":null,"finish_reason":null}]} - - data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"a - rguments":"}"}}]},"logprobs":null,"finish_reason":null}]} - - data: {"id":"chatcmpl-8sZrzBdLsWvnO2lX7Vz6glYAz8JMk","object":"chat.completion.chunk","created":1708018287,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool - _calls"}]} - - data: [DONE] - */ processTools := func(prompt string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { ComputeChoices(req, prompt, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { ss := map[string]interface{}{} @@ -391,7 +319,6 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) } c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { - usage := &schema.OpenAIUsage{} for ev := range responses { @@ -488,11 +415,35 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) fineTunedResponse := backend.Finetune(*config, predInput, prediction.Response) *c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &fineTunedResponse}}) } else { - // otherwise reply with the function call - *c = append(*c, schema.Choice{ - FinishReason: "function_call", - Message: &schema.Message{Role: "assistant", FunctionCall: ss}, - }) + if len(input.Tools) > 0 { + // Result is different in the case we have a tool call + *c = append(*c, schema.Choice{ + FinishReason: "tool_calls", + Message: &schema.Message{ + Role: "assistant", + ToolCalls: []schema.ToolCall{ + { + ID: id, + Type: "function", + FunctionCall: schema.FunctionCall{ + Name: name, + Arguments: args, + }, + }, + }, + FunctionCall: ss, + }, + }) + } else { + // otherwise reply with the function call + *c = append(*c, schema.Choice{ + FinishReason: "function_call", + Message: &schema.Message{ + Role: "assistant", + FunctionCall: ss, + }, + }) + } } return diff --git a/api/openai/request.go b/api/openai/request.go index 05e0bc90f71..6a7a14e8502 100644 --- a/api/openai/request.go +++ b/api/openai/request.go @@ -13,6 +13,7 @@ import ( fiberContext "github.com/go-skynet/LocalAI/api/ctx" options "github.com/go-skynet/LocalAI/api/options" "github.com/go-skynet/LocalAI/api/schema" + "github.com/go-skynet/LocalAI/pkg/grammar" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" @@ -143,7 +144,11 @@ func updateRequestConfig(config *config.Config, input *schema.OpenAIRequest) { } if input.ToolsChoice != nil { - input.FunctionCall = input.ToolsChoice + var toolChoice grammar.Tool + json.Unmarshal([]byte(input.ToolsChoice.(string)), &toolChoice) + input.FunctionCall = map[string]interface{}{ + "name": toolChoice.Function.Name, + } } // Decode each request's message content From 496374ae3e89c5b051eca440a4c82be1b63e5cf4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 15 Feb 2024 20:20:45 +0100 Subject: [PATCH 4/9] Keep backward compatibility with function calling --- api/openai/chat.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/openai/chat.go b/api/openai/chat.go index 3bde838ea6c..3103a3fb18c 100644 --- a/api/openai/chat.go +++ b/api/openai/chat.go @@ -337,7 +337,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) } finishReason := "stop" - if processFunctions { + if processFunctions && len(input.Tools) > 0 { finishReason = "tool_calls" } From 251045173a1cd304526637b0ae1c7fa118005d43 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 16 Feb 2024 20:41:42 +0100 Subject: [PATCH 5/9] Evaluate function names in chat templates --- api/openai/chat.go | 2 ++ api/schema/openai.go | 4 ++++ pkg/model/loader.go | 1 + 3 files changed, 7 insertions(+) diff --git a/api/openai/chat.go b/api/openai/chat.go index 3103a3fb18c..00a3b8519ec 100644 --- a/api/openai/chat.go +++ b/api/openai/chat.go @@ -199,6 +199,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) } r := config.Roles[role] contentExists := i.Content != nil && i.StringContent != "" + // First attempt to populate content via a chat message specific template if config.TemplateConfig.ChatMessage != "" { chatMessageData := model.ChatMessageTemplateData{ @@ -206,6 +207,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) Role: r, RoleName: role, Content: i.StringContent, + FunctionName: i.Name, MessageIndex: messageIndex, } templatedChatMessage, err := o.Loader.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData) diff --git a/api/schema/openai.go b/api/schema/openai.go index dcd11764fba..12a39b4284d 100644 --- a/api/schema/openai.go +++ b/api/schema/openai.go @@ -68,6 +68,10 @@ type ContentURL struct { type Message struct { // The message role Role string `json:"role,omitempty" yaml:"role"` + + // The message name (used for tools calls) + Name string `json:"name,omitempty" yaml:"name"` + // The message content Content interface{} `json:"content" yaml:"content"` diff --git a/pkg/model/loader.go b/pkg/model/loader.go index 37c2a603a63..bea32fb72a4 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -33,6 +33,7 @@ type ChatMessageTemplateData struct { SystemPrompt string Role string RoleName string + FunctionName string Content string MessageIndex int } From f2e803c867123d721dce504cfc8b1132d9d29bb3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 16 Feb 2024 23:18:53 +0100 Subject: [PATCH 6/9] Disable recovery with --debug --- api/api.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/api/api.go b/api/api.go index 7ec95f1b63a..946204d2b06 100644 --- a/api/api.go +++ b/api/api.go @@ -146,7 +146,11 @@ func App(opts ...options.AppOption) (*fiber.App, error) { } // Default middleware config - app.Use(recover.New()) + + if !options.Debug { + app.Use(recover.New()) + } + if options.Metrics != nil { app.Use(metrics.APIMiddleware(options.Metrics)) } From c6d026e522bddc36e8d82718dbf0550605ebedfd Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 16 Feb 2024 23:19:07 +0100 Subject: [PATCH 7/9] Correctly stream out the entire result --- api/openai/chat.go | 89 ++++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 42 deletions(-) diff --git a/api/openai/chat.go b/api/openai/chat.go index 00a3b8519ec..86db7a2d590 100644 --- a/api/openai/chat.go +++ b/api/openai/chat.go @@ -56,56 +56,61 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) close(responses) } processTools := func(prompt string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { + + result := "" ComputeChoices(req, prompt, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { - ss := map[string]interface{}{} + result += s + // TODO: Change generated BNF grammar to be compliant with the schema so we can + // stream the result token by token here. + return true + }) - name, args := parseFunctionCall(s) - ss["name"], ss["arguments"] = name, args + ss := map[string]interface{}{} + name, args := parseFunctionCall(result) + ss["name"], ss["arguments"] = name, args - initialMessage := schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{ - Delta: &schema.Message{ - Role: "assistant", - ToolCalls: []schema.ToolCall{ - { - Index: 0, - ID: id, - Type: "function", - FunctionCall: schema.FunctionCall{ - Name: name, - }, + initialMessage := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{ + Delta: &schema.Message{ + Role: "assistant", + ToolCalls: []schema.ToolCall{ + { + Index: 0, + ID: id, + Type: "function", + FunctionCall: schema.FunctionCall{ + Name: name, }, }, - }}}, - Object: "chat.completion.chunk", - } - responses <- initialMessage + }, + }}}, + Object: "chat.completion.chunk", + } + responses <- initialMessage - responses <- schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{ - Delta: &schema.Message{ - Role: "assistant", - ToolCalls: []schema.ToolCall{ - { - Index: 0, - ID: id, - Type: "function", - FunctionCall: schema.FunctionCall{ - Arguments: args, - }, + responses <- schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{ + Delta: &schema.Message{ + Role: "assistant", + ToolCalls: []schema.ToolCall{ + { + Index: 0, + ID: id, + Type: "function", + FunctionCall: schema.FunctionCall{ + Arguments: args, }, }, - }}}, - Object: "chat.completion.chunk", - } - return true - }) + }, + }}}, + Object: "chat.completion.chunk", + } close(responses) } From dba9094d6a93e0a79e31230a3c08d5c9e202fd1a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 16 Feb 2024 23:45:58 +0100 Subject: [PATCH 8/9] Detect when llm chooses to reply and to not perform any action in SSE --- api/openai/chat.go | 136 ++++++++++++++++++++++++++++++--------------- 1 file changed, 91 insertions(+), 45 deletions(-) diff --git a/api/openai/chat.go b/api/openai/chat.go index 86db7a2d590..ac914533e8a 100644 --- a/api/openai/chat.go +++ b/api/openai/chat.go @@ -55,10 +55,9 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) }) close(responses) } - processTools := func(prompt string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { - + processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { result := "" - ComputeChoices(req, prompt, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { + _, tokenUsage, _ := ComputeChoices(req, prompt, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { result += s // TODO: Change generated BNF grammar to be compliant with the schema so we can // stream the result token by token here. @@ -69,6 +68,40 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) name, args := parseFunctionCall(result) ss["name"], ss["arguments"] = name, args + if name == noAction { + initialMessage := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}}, + Object: "chat.completion.chunk", + } + responses <- initialMessage + + result, err := handleQuestion(config, req, o, args, prompt) + if err != nil { + log.Error().Msgf("error handling question: %s", err.Error()) + return + } + + resp := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}}, + Object: "chat.completion.chunk", + Usage: schema.OpenAIUsage{ + PromptTokens: tokenUsage.Prompt, + CompletionTokens: tokenUsage.Completion, + TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, + }, + } + + responses <- resp + close(responses) + return + } + initialMessage := schema.OpenAIResponse{ ID: id, Created: created, @@ -322,14 +355,17 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) if !processFunctions { go process(predInput, input, config, o.Loader, responses) } else { - go processTools(predInput, input, config, o.Loader, responses) + go processTools(noActionName, predInput, input, config, o.Loader, responses) } c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { usage := &schema.OpenAIUsage{} - + toolsCalled := false for ev := range responses { usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it + if len(ev.Choices[0].Delta.ToolCalls) > 0 { + toolsCalled = true + } var buf bytes.Buffer enc := json.NewEncoder(&buf) enc.Encode(ev) @@ -344,8 +380,10 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) } finishReason := "stop" - if processFunctions && len(input.Tools) > 0 { + if toolsCalled { finishReason = "tool_calls" + } else if toolsCalled && len(input.Tools) == 0 { + finishReason = "function_call" } resp := &schema.OpenAIResponse{ @@ -379,48 +417,12 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) // if do nothing, reply with a message if name == noActionName { - log.Debug().Msgf("nothing to do, computing a reply") - - // If there is a message that the LLM already sends as part of the JSON reply, use it - arguments := map[string]interface{}{} - json.Unmarshal([]byte(args), &arguments) - m, exists := arguments["message"] - if exists { - switch message := m.(type) { - case string: - if message != "" { - log.Debug().Msgf("Reply received from LLM: %s", message) - message = backend.Finetune(*config, predInput, message) - log.Debug().Msgf("Reply received from LLM(finetuned): %s", message) - - *c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &message}}) - return - } - } - } - - log.Debug().Msgf("No action received from LLM, without a message, computing a reply") - // Otherwise ask the LLM to understand the JSON output and the context, and return a message - // Note: This costs (in term of CPU) another computation - config.Grammar = "" - images := []string{} - for _, m := range input.Messages { - images = append(images, m.StringImages...) - } - predFunc, err := backend.ModelInference(input.Context, predInput, images, o.Loader, *config, o, nil) - if err != nil { - log.Error().Msgf("inference error: %s", err.Error()) - return - } - - prediction, err := predFunc() + result, err := handleQuestion(config, input, o, args, predInput) if err != nil { - log.Error().Msgf("inference error: %s", err.Error()) + log.Error().Msgf("error handling question: %s", err.Error()) return } - - fineTunedResponse := backend.Finetune(*config, predInput, prediction.Response) - *c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &fineTunedResponse}}) + *c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &result}}) } else { if len(input.Tools) > 0 { // Result is different in the case we have a tool call @@ -455,6 +457,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) return } + *c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}}) }, nil) if err != nil { @@ -483,6 +486,49 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) } } +func handleQuestion(config *config.Config, input *schema.OpenAIRequest, o *options.Option, args, prompt string) (string, error) { + log.Debug().Msgf("nothing to do, computing a reply") + + // If there is a message that the LLM already sends as part of the JSON reply, use it + arguments := map[string]interface{}{} + json.Unmarshal([]byte(args), &arguments) + m, exists := arguments["message"] + if exists { + switch message := m.(type) { + case string: + if message != "" { + log.Debug().Msgf("Reply received from LLM: %s", message) + message = backend.Finetune(*config, prompt, message) + log.Debug().Msgf("Reply received from LLM(finetuned): %s", message) + + return message, nil + } + } + } + + log.Debug().Msgf("No action received from LLM, without a message, computing a reply") + // Otherwise ask the LLM to understand the JSON output and the context, and return a message + // Note: This costs (in term of CPU/GPU) another computation + config.Grammar = "" + images := []string{} + for _, m := range input.Messages { + images = append(images, m.StringImages...) + } + + predFunc, err := backend.ModelInference(input.Context, prompt, images, o.Loader, *config, o, nil) + if err != nil { + log.Error().Msgf("inference error: %s", err.Error()) + return "", err + } + + prediction, err := predFunc() + if err != nil { + log.Error().Msgf("inference error: %s", err.Error()) + return "", err + } + return backend.Finetune(*config, prompt, prediction.Response), nil +} + func parseFunctionCall(llmresult string) (string, string) { // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) ss := map[string]interface{}{} From d44c8bac078a2ecf0af0414bd683c32fcb0da452 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 16 Feb 2024 23:49:45 +0100 Subject: [PATCH 9/9] Feedback from code review --- api/openai/chat.go | 1 - 1 file changed, 1 deletion(-) diff --git a/api/openai/chat.go b/api/openai/chat.go index ac914533e8a..68c3a291a1b 100644 --- a/api/openai/chat.go +++ b/api/openai/chat.go @@ -440,7 +440,6 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) }, }, }, - FunctionCall: ss, }, }) } else {