julep-ai · whiterabbit1983 · May 8, 2024 · May 6, 2024 · May 6, 2024 · May 6, 2024
diff --git a/agents-api/agents_api/activities/summarization.py b/agents-api/agents_api/activities/summarization.py
@@ -10,7 +10,8 @@
     entries_summarization_query,
 )
 from agents_api.common.protocol.entries import Entry
-from ..env import summarization_model_name
+from ..model_registry import JULEP_MODELS
+from ..env import summarization_model_name, model_inference_url, model_api_key
 
 
 example_previous_memory = """
@@ -128,6 +129,12 @@ async def run_prompt(
     parser: Callable[[str], str] = lambda x: x,
     **kwargs,
 ) -> str:
+    api_base = None
+    api_key = None
+    if model in JULEP_MODELS:
+        api_base = model_inference_url
+        api_key = model_api_key
+        model = f"openai/{model}"
     prompt = make_prompt(dialog, previous_memories, **kwargs)
     response = await acompletion(
         model=model,
@@ -141,6 +148,8 @@ async def run_prompt(
         temperature=temperature,
         stop=["<", "<|"],
         stream=False,
+        api_base=api_base,
+        api_key=api_key,
     )
 
     content = response.choices[0].message.content
@@ -159,7 +168,7 @@ async def summarization(session_id: str) -> None:
     assert len(entries) > 0, "no need to summarize on empty entries list"
 
     response = await run_prompt(
-        dialog=entries, previous_memories=[], model=summarization_model_name
+        dialog=entries, previous_memories=[], model=f"openai/{summarization_model_name}"
     )
 
     new_entry = Entry(

diff --git a/agents-api/agents_api/models/entry/proc_mem_context.py b/agents-api/agents_api/models/entry/proc_mem_context.py
@@ -18,7 +18,6 @@ def proc_mem_context_query(
 
     Parameters:
         session_id (UUID),
-        tool_query_embedding (list[float]),
         doc_query_embedding (list[float]),
         tools_confidence (float),
         docs_confidence (float),
@@ -29,9 +28,8 @@ def proc_mem_context_query(
     Return type:
         A pandas DataFrame containing the query results.
     """
-    VECTOR_SIZE = 768
+    VECTOR_SIZE = 1024
     session_id = str(session_id)
-    assert len(tool_query_embedding) == len(doc_query_embedding) == VECTOR_SIZE
 
     tools_radius: float = 1.0 - tools_confidence
     docs_radius: float = 1.0 - docs_confidence
@@ -41,14 +39,14 @@ def proc_mem_context_query(
     {{
         # Input table for the query
         # (This is temporary to this query)
-        input[session_id, tool_query, doc_query] <- [[
+        input[session_id, doc_query] <- [[
             to_uuid($session_id),
-            $tool_query_embedding,
+            # $tool_query_embedding,
             $doc_query_embedding,
         ]]
 
-        ?[session_id, tool_query, doc_query, agent_id, user_id] :=
-            input[session_id, tool_query, doc_query],
+        ?[session_id, doc_query, agent_id, user_id] :=
+            input[session_id, doc_query],
             *session_lookup{{
                 session_id,
                 agent_id,
@@ -59,7 +57,7 @@ def proc_mem_context_query(
             session_id: Uuid,
             agent_id: Uuid,
             user_id: Uuid,
-            tool_query: <F32; {VECTOR_SIZE}>,
+            # tool_query: <F32; {VECTOR_SIZE}>,
             doc_query: <F32; {VECTOR_SIZE}>,
         }}
     }} {{
@@ -130,7 +128,7 @@ def proc_mem_context_query(
 
         # Search for tools
         ?[role, name, content, token_count, created_at, index] :=
-            *_input{{agent_id, tool_query}},
+            #*_input{{agent_id, tool_query}},
             # ~agent_functions:embedding_space {{
             #     agent_id,
             #     name: fn_name,

diff --git a/agents-api/agents_api/models/entry/test_entry_queries.py b/agents-api/agents_api/models/entry/test_entry_queries.py
@@ -173,10 +173,10 @@ def _():
             client=client,
         ),
         embed_docs_snippets_query(
-            agent_doc_id, snippet_indices=[0], embeddings=[[1.0] * 768], client=client
+            agent_doc_id, snippet_indices=[0], embeddings=[[1.0] * 1024], client=client
         ),
         embed_docs_snippets_query(
-            user_doc_id, snippet_indices=[0], embeddings=[[1.0] * 768], client=client
+            user_doc_id, snippet_indices=[0], embeddings=[[1.0] * 1024], client=client
         ),
     ]
 
@@ -185,7 +185,7 @@ def _():
     result = proc_mem_context_query(
         session_id=session_id,
         tool_query_embedding=[0.9] * 768,
-        tool_query_embedding=[0.9] * 768,
+        tool_query_embedding=[0.9] * 1024,
-        tool_query_embedding=[0.9] * 768,
+        tool_query_embedding=[0.9] * 1024,
-        tool_query_embedding=[0.9] * 768,
+        tool_query_embedding=[0.9] * 1024,
-        tool_query_embedding=[0.9] * 768,
+        tool_query_embedding=[0.9] * 1024,
-        doc_query_embedding=[0.9] * 768,
+        doc_query_embedding=[0.9] * 1024,
         client=client,
     )
 

diff --git a/agents-api/agents_api/routers/agents/routers.py b/agents-api/agents_api/routers/agents/routers.py
@@ -45,7 +45,6 @@
     embed_docs_snippets_query,
 )
 from agents_api.models.tools.create_tools import create_function_query
-from agents_api.models.tools.embed_tools import embed_functions_query
 from agents_api.models.tools.list_tools import list_functions_by_agent_query
 from agents_api.models.tools.get_tools import get_function_by_id_query
 from agents_api.models.tools.delete_tools import delete_function_by_id_query
@@ -247,30 +246,36 @@ async def create_agent(
 
     if request.docs:
         for info in request.docs:
+            content = [
+                (c.model_dump() if isinstance(c, ContentItem) else c)
+                for c in (
+                    [info.content] if isinstance(info.content, str) else info.content
+                )
+            ]
             create_docs_query(
                 owner_type="agent",
                 owner_id=new_agent_id,
                 id=uuid4(),
                 title=info.title,
-                content=info.content,
+                content=content,
                 metadata=info.metadata or {},
             )
 
     if request.tools:
         functions = [t.function for t in request.tools]
-        embeddings = await embed(
-            [
-                function_embed_instruction
-                + f"{function.name}, {function.description}, "
-                + "required_params:"
-                + function.parameters.model_dump_json()
-                for function in functions
-            ]
-        )
+        # embeddings = await embed(
+        #     [
+        #         function_embed_instruction
+        #         + f"{function.name}, {function.description}, "
+        #         + "required_params:"
+        #         + function.parameters.model_dump_json()
+        #         for function in functions
+        #     ]
+        # )
         create_tools_query(
             new_agent_id,
             functions,
-            embeddings,
+            [[0.0] * 768],
         )
 
     return res
@@ -435,20 +440,20 @@ async def create_tool(
         created_at=resp["created_at"][0],
     )
 
-    embeddings = await embed(
-        [
-            function_embed_instruction
-            + request.function.description
-            + "\nParameters: "
-            + json.dumps(request.function.parameters.model_dump())
-        ]
-    )
-
-    embed_functions_query(
-        agent_id=agent_id,
-        tool_ids=[tool_id],
-        embeddings=embeddings,
-    )
+    # embeddings = await embed(
+    #     [
+    #         function_embed_instruction
+    #         + request.function.description
+    #         + "\nParameters: "
+    #         + json.dumps(request.function.parameters.model_dump())
+    #     ]
+    # )
+
+    # embed_functions_query(
+    #     agent_id=agent_id,
+    #     tool_ids=[tool_id],
+    #     embeddings=embeddings,
+    # )
 
     return res
 

diff --git a/agents-api/agents_api/routers/sessions/session.py b/agents-api/agents_api/routers/sessions/session.py
@@ -19,7 +19,11 @@
 from ...common.protocol.entries import Entry
 from ...common.protocol.sessions import SessionData
 from ...common.utils.template import render_template
-from ...env import summarization_tokens_threshold
+from ...env import (
+    summarization_tokens_threshold,
+    docs_embedding_service_url,
+    docs_embedding_model_id,
+)
 from ...model_registry import (
     JULEP_MODELS,
     get_extra_settings,
@@ -201,6 +205,8 @@ async def forward(
                 ]
             ],
             join_inputs=False,
+            embedding_service_url=docs_embedding_service_url,
+            embedding_model_name=docs_embedding_model_id,
         )
 
         entries: list[Entry] = []

diff --git a/model-serving/model_api/protocol.py b/model-serving/model_api/protocol.py
@@ -93,7 +93,7 @@ class Type(Enum):
 class Tool(BaseModel):
     type: Type
     function: FunctionDef
-    id: str
+    id: str | None = None
 
 
 class SamplingParams(SamplingParams):