infiniflow · lizheng419 · Nov 19, 2024 · Nov 19, 2024 · Nov 20, 2024 · Nov 20, 2024
diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py
@@ -13,6 +13,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
+import logging
 import re
 import json
 from api.db import LLMType

diff --git a/api/apps/tenant_app.py b/api/apps/tenant_app.py
@@ -83,7 +83,7 @@ def create(tenant_id):
 @manager.route('/<tenant_id>/user/<user_id>', methods=['DELETE'])  # noqa: F821
 @login_required
 def rm(tenant_id, user_id):
-    if current_user.id != tenant_id and current_user.id != user_id:
+    if current_user.id != tenant_id:
         return get_json_result(
             data=False,
             message='No authorization.',
@@ -111,6 +111,12 @@ def tenant_list():
 @manager.route("/agree/<tenant_id>", methods=["PUT"])  # noqa: F821
 @login_required
 def agree(tenant_id):
+    if current_user.id != tenant_id:
+        return get_json_result(
+            data=False,
+            message='No authorization.',
+            code=settings.RetCode.AUTHENTICATION_ERROR)
+
     try:
         UserTenantService.filter_update([UserTenant.tenant_id == tenant_id, UserTenant.user_id == current_user.id], {"role": UserTenantRole.NORMAL})
         return get_json_result(data=True)

diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py
@@ -280,20 +280,72 @@ def decorate_answer(answer):
             (done_tm - retrieval_tm) * 1000)
         return {"answer": answer, "reference": refs, "prompt": prompt}
 
+
+    #注释原先流式代码
+    # if stream:
+    #     last_ans = ""
+    #     answer = ""
+    #     for ans in chat_mdl.chat_streamly(prompt, msg[1:], gen_conf):
+    #         answer = ans
+    #         logging.info("answer_stream : {}".format(ans))
+    #         delta_ans = ans[len(last_ans):]
+    #         if num_tokens_from_string(delta_ans) < 16:
+    #             continue
+    #         last_ans = answer
+    #         yield {"answer": answer, "reference": {}, "audio_binary": tts(tts_mdl, delta_ans)}
+    #     delta_ans = answer[len(last_ans):]
+    #     if delta_ans:
+    #         yield {"answer": answer, "reference": {}, "audio_binary": tts(tts_mdl, delta_ans)}
+    #     yield decorate_answer(answer)
+
     if stream:
-        last_ans = ""
+        # logging.info("stream_mode : {}".format(msg[1:]))
         answer = ""
-        for ans in chat_mdl.chat_streamly(prompt, msg[1:], gen_conf):
-            answer = ans
-            delta_ans = ans[len(last_ans):]
-            if num_tokens_from_string(delta_ans) < 16:
-                continue
-            last_ans = answer
-            yield {"answer": answer, "reference": {}, "audio_binary": tts(tts_mdl, delta_ans)}
-        delta_ans = answer[len(last_ans):]
-        if delta_ans:
-            yield {"answer": answer, "reference": {}, "audio_binary": tts(tts_mdl, delta_ans)}
-        yield decorate_answer(answer)
+        for delta in chat_mdl.chat_streamly(prompt, msg[1:], gen_conf):
+            # 检查是否为总令牌数或通知信息
+            if isinstance(delta, str):
+                if delta.isdigit():
+                    # 处理总令牌数（如果需要）
+                    total_tokens = int(delta)
+                    continue
+                elif "\n**ERROR**:" in delta:
+                    # 处理错误信息
+                    answer += delta
+                    yield {"answer": answer, "reference": {}, "audio_binary": b''}  # 错误时不生成音频
+                    continue
+
+                # 处理增量文本
+                delta_ans = delta
+                # if num_tokens_from_string(delta_ans) < 16:
+                #     continue  # 根据需求调整阈值
+
+                # 更新完整的答案
+                answer += delta_ans
+
+                # 生成音频
+                audio = tts(tts_mdl, delta_ans)
+                # logging.info(f"Generated audio for delta: {delta_ans}")
+                yield {"answer": delta_ans, "reference": {}, "audio_binary": audio}
+            elif isinstance(delta, dict):
+                # 如果 chat_streamly 仍返回字典（不推荐）
+                # 例如: {"new_text": "新增内容", "position": 10}
+                new_text = delta.get("new_text", "")
+                if not new_text:
+                    continue
+                if num_tokens_from_string(new_text) < 16:
+                    continue
+
+                # 更新完整的答案
+                answer += new_text
+
+                # 生成音频
+                audio = tts(tts_mdl, new_text)
+                yield {"answer": answer, "reference": {}, "audio_binary": audio}
+
+        # 最终装饰答案
+        decorated_answer = decorate_answer(answer)
+        # logging.info(f"Final decorated answer: {decorated_answer}")
+        yield decorated_answer
     else:
         answer = chat_mdl.chat(prompt, msg[1:], gen_conf)
         logging.debug("User: {}|Assistant: {}".format(

diff --git a/poetry.toml b/poetry.toml
@@ -1,4 +1,6 @@
 [virtualenvs]
 in-project = true
 create = true
-prefer-active-python = true
+prefer-active-python = true
+[repositories.tuna]
+url = "https://pypi.tuna.tsinghua.edu.cn/simple"
diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
@@ -57,9 +57,50 @@ def chat(self, system, history, gen_conf):
         except openai.APIError as e:
             return "**ERROR**: " + str(e), 0
 
+    # def chat_streamly(self, system, history, gen_conf):
+    #     if system:
+    #         history.insert(0, {"role": "system", "content": system})
+    #     ans = ""
+    #     total_tokens = 0
+    #     try:
+    #         response = self.client.chat.completions.create(
+    #             model=self.model_name,
+    #             messages=history,
+    #             stream=True,
+    #             **gen_conf)
+    #         for resp in response:
+    #             if not resp.choices:
+    #                 continue
+    #             if not resp.choices[0].delta.content:
+    #                 resp.choices[0].delta.content = ""
+    #             ans += resp.choices[0].delta.content
+    #
+    #             if not hasattr(resp, "usage") or not resp.usage:
+    #                 total_tokens = (
+    #                             total_tokens
+    #                             + num_tokens_from_string(resp.choices[0].delta.content)
+    #                     )
+    #             elif isinstance(resp.usage, dict):
+    #                 total_tokens = resp.usage.get("total_tokens", total_tokens)
+    #             else:
+    #                 total_tokens = resp.usage.total_tokens
+    #
+    #             if resp.choices[0].finish_reason == "length":
+    #                 if is_chinese(ans):
+    #                     ans += LENGTH_NOTIFICATION_CN
+    #                 else:
+    #                     ans += LENGTH_NOTIFICATION_EN
+    #             yield ans
+    #
+    #     except openai.APIError as e:
+    #         yield ans + "\n**ERROR**: " + str(e)
+    #
+    #     yield total_tokens
+
     def chat_streamly(self, system, history, gen_conf):
         if system:
             history.insert(0, {"role": "system", "content": system})
+
         ans = ""
         total_tokens = 0
         try:
@@ -71,30 +112,44 @@ def chat_streamly(self, system, history, gen_conf):
             for resp in response:
                 if not resp.choices:
                     continue
-                if not resp.choices[0].delta.content:
-                    resp.choices[0].delta.content = ""
-                ans += resp.choices[0].delta.content
 
-                if not hasattr(resp, "usage") or not resp.usage:
-                    total_tokens = (
+                finish_reason = resp.choices[0].finish_reason
+                delta_content = resp.choices[0].delta.content if resp.choices[0].delta.content else ""
+
+                # 如果有新增文本，累积并输出增量
+                if delta_content:
+                    ans += delta_content
+
+                    # 更新令牌计数
+                    if not hasattr(resp, "usage") or not resp.usage:
+                        total_tokens = (
                                 total_tokens
                                 + num_tokens_from_string(resp.choices[0].delta.content)
                         )
-                elif isinstance(resp.usage, dict):
-                    total_tokens = resp.usage.get("total_tokens", total_tokens)
-                else:
-                    total_tokens = resp.usage.total_tokens
+                    elif isinstance(resp.usage, dict):
+                        total_tokens = resp.usage.get("total_tokens", total_tokens)
+                    else:
+                        total_tokens = resp.usage.total_tokens
 
-                if resp.choices[0].finish_reason == "length":
+                    yield delta_content
+
+                # 即使delta_content为空，也要检查finish_reason
+                if finish_reason == "length":
+                    # 长度受限时添加提示信息
                     if is_chinese(ans):
-                        ans += LENGTH_NOTIFICATION_CN
+                        notification = LENGTH_NOTIFICATION_CN
                     else:
-                        ans += LENGTH_NOTIFICATION_EN
-                yield ans
+                        notification = LENGTH_NOTIFICATION_EN
+                    yield notification
+
+                # 如果finish_reason为"stop"或其他值，可以在此添加相应逻辑
+                # (本示例中未对"stop"做额外处理，因为通常这意味着回答正常结束)
 
         except openai.APIError as e:
+            # 返回错误信息
             yield ans + "\n**ERROR**: " + str(e)
 
+        # 最终返回总令牌数
         yield total_tokens
 
 

diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py
@@ -29,6 +29,7 @@
 from rag.nlp import is_english
 from api.utils import get_uuid
 from api.utils.file_utils import get_project_base_directory
+from google.generativeai import client, GenerativeModel, GenerationConfig
 
 
 class Base(ABC):
@@ -57,6 +58,7 @@ def chat(self, system, history, gen_conf, image=""):
         except Exception as e:
             return "**ERROR**: " + str(e), 0
 
+
     def chat_streamly(self, system, history, gen_conf, image=""):
         if system:
             history[-1]["content"] = system + history[-1]["content"] + "user query: " + history[-1]["content"]
@@ -92,7 +94,8 @@ def chat_streamly(self, system, history, gen_conf, image=""):
             yield ans + "\n**ERROR**: " + str(e)
 
         yield tk_count
-
+
+
     def image2base64(self, image):
         if isinstance(image, bytes):
             return base64.b64encode(image).decode("utf-8")