local server fixes (#3256)

TEMPORARY.
nomic-ai · Dec 11, 2024 · d28c0ea · d28c0ea
1 parent 9d2b1d6
commit d28c0ea
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 5 deletions.
diff --git a/gpt4all-chat/CHANGELOG.md b/gpt4all-chat/CHANGELOG.md
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
+## [Unreleased]
+
+### Fixed
+- Fix API server ignoring assistant messages in history after v3.5.0 ([#3256](https://github.com/nomic-ai/gpt4all/pull/3256))
+- Fix API server replying with incorrect token counts and stop reason after v3.5.0 ([#3256](https://github.com/nomic-ai/gpt4all/pull/3256))
+
 ## [3.5.1] - 2024-12-10
 
 ### Fixed
@@ -211,6 +217,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 - Fix several Vulkan resource management issues ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694))
 - Fix crash/hang when some models stop generating, by showing special tokens ([#2701](https://github.com/nomic-ai/gpt4all/pull/2701))
 
+[Unreleased]: https://github.com/nomic-ai/gpt4all/compare/v3.5.1...HEAD
 [3.5.1]: https://github.com/nomic-ai/gpt4all/compare/v3.5.0...v3.5.1
 [3.5.0]: https://github.com/nomic-ai/gpt4all/compare/v3.5.0-rc2...v3.5.0
 [3.5.0-rc2]: https://github.com/nomic-ai/gpt4all/compare/v3.5.0-rc1...v3.5.0-rc2

diff --git a/gpt4all-chat/src/chatllm.cpp b/gpt4all-chat/src/chatllm.cpp
@@ -949,7 +949,7 @@ auto ChatLLM::promptInternal(
         }
     }
 
-    PromptResult result;
+    PromptResult result {};
 
     auto handlePrompt = [this, &result](std::span<const LLModel::Token> batch, bool cached) -> bool {
         Q_UNUSED(cached)

diff --git a/gpt4all-chat/src/chatmodel.h b/gpt4all-chat/src/chatmodel.h
@@ -178,8 +178,17 @@ class ChatItem : public QObject
         setParent(parent);
     }
 
-    ChatItem(QObject *parent, response_tag_t, bool isCurrentResponse = true)
-        : QObject(nullptr), name(u"Response: "_s), isCurrentResponse(isCurrentResponse)
+    // A new response, to be filled in
+    ChatItem(QObject *parent, response_tag_t)
+        : QObject(nullptr), name(u"Response: "_s), isCurrentResponse(true)
+    {
+        moveToThread(parent->thread());
+        setParent(parent);
+    }
+
+    // An existing response, from Server
+    ChatItem(QObject *parent, response_tag_t, const QString &value)
+        : QObject(nullptr), name(u"Response: "_s), value(value)
     {
         moveToThread(parent->thread());
         setParent(parent);
@@ -807,7 +816,7 @@ class ChatModel : public QAbstractListModel
             Q_ASSERT(currentResponse->isCurrentResponse);
 
             // Create a new response container for any text and the tool call
-            ChatItem *newResponse = new ChatItem(this, ChatItem::response_tag, true /*isCurrentResponse*/);
+            ChatItem *newResponse = new ChatItem(this, ChatItem::response_tag);
 
             // Add preceding text if any
             if (!split.first.isEmpty()) {

diff --git a/gpt4all-chat/tests/python/test_server_api.py b/gpt4all-chat/tests/python/test_server_api.py
@@ -252,7 +252,7 @@ def test_with_models(chat_server_with_model: None) -> None:
     assert response == EXPECTED_COMPLETIONS_RESPONSE
 
 
-@pytest.mark.xfail(reason='Assertion failure in GPT4All. See nomic-ai/gpt4all#3133')
+# Fixed by nomic-ai/gpt4all#3202.
 def test_with_models_temperature(chat_server_with_model: None) -> None:
     data = {
         'model': 'Llama 3.2 1B Instruct',