From 8b0ffffe9b698e442b89fd5781dac6558f61e8c9 Mon Sep 17 00:00:00 2001 From: YoanSallami Date: Thu, 19 Sep 2024 19:37:46 +0200 Subject: [PATCH] Finish falkorDB notebook --- README.md | 11 +- docs/Core API/Data Types/Agent Output.md | 26 ++ docs/Core API/Data Types/Agent State.md | 64 +++++ docs/Core API/Data Types/Agent Step.md | 100 +++++++ docs/Core API/Data Types/Document.md | 20 +- docs/Core API/Data Types/Fact.md | 25 +- docs/Core API/Data Types/Graph Program.md | 152 ---------- docs/Core API/Data Types/Graph Schema.md | 62 ++++ .../Data Types/Interaction Session.md | 66 +++++ docs/Core API/Data Types/Query.md | 28 ++ docs/Core API/Data Types/Session.md | 28 -- docs/Core API/Graph Program.md | 194 +++++++++++++ .../Sentence Transformer Embeddings.md | 0 .../Local Models/Ollama.md | 0 .../Language Models API/Remote Models/Groq.md | 0 .../Remote Models/Mistral.md | 0 .../Remote Models/OpenAI.md | 1 + .../Remote Models/SynaLinks.md | 1 + docs/Memory API/Trace Memory/Trace Memory.md | 1 - hybridagi/core/datatypes.py | 4 +- .../falkordb/falkordb_fact_retriever.py | 1 - notebooks/using_falkordb.ipynb | 272 ++++++++++++++++-- 22 files changed, 844 insertions(+), 212 deletions(-) create mode 100644 docs/Core API/Data Types/Agent Output.md create mode 100644 docs/Core API/Data Types/Agent State.md delete mode 100644 docs/Core API/Data Types/Graph Program.md create mode 100644 docs/Core API/Data Types/Graph Schema.md create mode 100644 docs/Core API/Data Types/Interaction Session.md delete mode 100644 docs/Core API/Data Types/Session.md create mode 100644 docs/Core API/Graph Program.md create mode 100644 docs/Embeddings API/Sentence Transformer Embeddings.md create mode 100644 docs/Language Models API/Local Models/Ollama.md create mode 100644 docs/Language Models API/Remote Models/Groq.md create mode 100644 docs/Language Models API/Remote Models/Mistral.md create mode 100644 docs/Language Models API/Remote Models/OpenAI.md create mode 100644 docs/Language Models API/Remote Models/SynaLinks.md diff --git a/README.md b/README.md index c355732..d5f30ba 100755 --- a/README.md +++ b/README.md @@ -42,7 +42,10 @@ - [Using External Tools](notebooks/using_external_tools.ipynb) - [Add Documents (on the fly)](notebooks/updating_documents.ipynb) - [Add Facts (on the fly)](notebooks/updating_facts.ipynb) +- [Interactive ReACT](notebooks/interactive_react.ipynb) - [ReACT Agent](notebooks/react_agent.ipynb) +- [Reflexion Agent](notebooks/reflexion_agent.ipynb) +- [Using FalkorDB](notebooks/using_falkordb.ipynb) ## What is HybridAGI? @@ -139,9 +142,11 @@ You can add more tools by using the `FunctionTool` and python functions like now ### Graph Databases Integrations -- Local Graph Memory for rapid prototyping based on [NetworkX](https://networkx.org/) -- [FalkorDB](https://www.falkordb.com/) low latency in-memory hybrid vector/graph database (coming soon) -- [Kuzu](https://kuzudb.com/) A highly scalable, extremely fast, easy-to-use embeddable graph database (coming soon) +- Local Graph Memory for rapid prototyping based on [NetworkX](https://networkx.org/). +- [FalkorDB](https://www.falkordb.com/) low latency in-memory hybrid vector/graph database. +- [Kuzu](https://kuzudb.com/) A highly scalable, extremely fast, easy-to-use embeddable graph database (coming soon). + +We accept the contributions for more database integrations. Feel free to join the discord channel for more information! ### LLM Agent as Graph VS LLM Agent as Graph Interpreter diff --git a/docs/Core API/Data Types/Agent Output.md b/docs/Core API/Data Types/Agent Output.md new file mode 100644 index 0000000..06332b9 --- /dev/null +++ b/docs/Core API/Data Types/Agent Output.md @@ -0,0 +1,26 @@ +# Agent Output + +This data structure represent the output of the Agent (e.g. the execution of a graph program). + +`FinishReason`: The different reasons for the end of a program. + +`AgentOuput`: The output returned by the Agent system. + +## Definition + +```python +class FinishReason(str, Enum): + MaxIters = "max_iters" + Finished = "finished" + Error = "error" + +class AgentOutput(BaseModel, dspy.Prediction): + finish_reason: FinishReason = Field(description="The finish reason", default=FinishReason.Finished) + final_answer: str = Field(description="The final answer or error if any", default="") + program_trace: AgentStepList = Field(description="The resulting program trace", default_factory=AgentStepList) + session: InteractionSession = Field(description="The resulting interaction session", default_factory=InteractionSession) + + def __init__(self, **kwargs): + BaseModel.__init__(self, **kwargs) + dspy.Prediction.__init__(self, **kwargs) +``` \ No newline at end of file diff --git a/docs/Core API/Data Types/Agent State.md b/docs/Core API/Data Types/Agent State.md new file mode 100644 index 0000000..ff55c05 --- /dev/null +++ b/docs/Core API/Data Types/Agent State.md @@ -0,0 +1,64 @@ +# Agent State + +This data structure represent the state of the Agent, which contains a stack allowing the agent to jump to other programs by calling them. + +`ProgramState`: Represents the state of an executed program. + +`AgentState`: Represents the state of the agent. + +## Definition + +```python + +class ProgramState(BaseModel): + current_program: GraphProgram = Field(description="The current program") + current_step: Union[Control, Action, Decision, Program] = Field(description="The current step") + +class AgentState(BaseModel): + current_hop: int = Field(description="The current hop", default=0) + decision_hop: int = Field(description="The current decision hop", default=0) + program_trace: AgentStepList = Field(description="The program trace", default_factory=AgentStepList) + program_stack: Iterable[ProgramState] = Field(description="The program stack", default=deque()) + objective: Query = Field(description="The user objective query", default_factory=Query) + final_answer: str = Field(description="The agent final answer", default="") + variables: Dict[str, Any] = Field(description="The variables of the program", default={}) + session: InteractionSession = Field(description="The current interaction session", default_factory=InteractionSession) + + def get_current_state(self) -> Optional[ProgramState]: + """Method to get the current program state""" + if len(self.program_stack) > 0: + return self.program_stack[-1] + return None + + def get_current_program(self) -> Optional[GraphProgram]: + """Method to retreive the current program from the stack""" + if len(self.program_stack) > 0: + return self.program_stack[-1].current_program + return None + + def get_current_step(self) -> Optional[Union[Control, Action, Decision, Program]]: + """Method to retreive the current node from the stack""" + if len(self.program_stack) > 0: + return self.program_stack[-1].current_step + return None + + def set_current_step(self, step: Union[Control, Action, Decision, Program]): + """Method to set the current node from the stack""" + if len(self.program_stack) > 0: + self.program_stack[-1].current_step = step + else: + raise ValueError("Cannot set the current step when program finished") + + def call_program(self, program: GraphProgram): + """Method to call a program""" + self.program_stack.append( + ProgramState( + current_program = program, + current_step = program.get_starting_step(), + ) + ) + + def end_program(self): + """Method to end the current program (pop the stack)""" + self.program_stack.pop() +``` \ No newline at end of file diff --git a/docs/Core API/Data Types/Agent Step.md b/docs/Core API/Data Types/Agent Step.md index e69de29..a140485 100644 --- a/docs/Core API/Data Types/Agent Step.md +++ b/docs/Core API/Data Types/Agent Step.md @@ -0,0 +1,100 @@ +# Agent Step + +Here is represented the datastructure used by the Agent System, usually you don't have to worry about it because it is handled automatically by the system. But for documentation purpose, here are the definition of the corresponding structure. + +`AgentStep`: Represents a step performed by the Agent to be stored into memory. + +`AgentStepList`: Represents a list of steps performed by the Agent. + +`QueryWithSteps`: Represents a query associated with a ste list used by the action retrivers and rerankers. + +## Definition + +```python + +class AgentStepType(str, Enum): + Action = "Action" + Decision = "Decision" + ProgramCall = "ProgramCall" + ProgramEnd = "ProgramEnd" + +ACTION_TEMPLATE = \ +"""--- Step {hop} --- +Action Purpose: {purpose} +Action: {prediction}""" + +DECISION_TEMPLATE = \ +"""--- Step {hop} --- +Decision Purpose: {purpose} +Decision: {choice}""" + +CALL_PROGRAM_TEMPLATE = \ +"""--- Step {hop} --- +Call Program: {program} +Program Purpose: {purpose}""" + +END_PROGRAM_TEMPLATE = \ +"""--- Step {hop} --- +End Program: {program}""" + +class AgentStep(BaseModel): + id: Union[UUID, str] = Field(description="Unique identifier for a step", default_factory=uuid4) + parent_id: Optional[Union[UUID, str]] = Field(description="The previous step id if any", default=None) + hop: int = Field(description="The step hop", default=0) + step_type: AgentStepType = Field(description="The step type") + inputs: Optional[Dict[str, Any]] = Field(description="The inputs of the step", default=None) + outputs: Optional[Dict[str, Any]] = Field(description="The outputs of the step", default=None) + vector: Optional[List[float]] = Field(description="Vector representation of the step", default=None) + metadata: Optional[Dict[str, Any]] = Field(description="Additional information about the step", default=None) + created_at: datetime = Field(description="Time when the step was created", default_factory=datetime.now) + + def __str__(self): + if self.inputs is None: + self.inputs = {} + + if self.step_type == AgentStepType.Action: + return ACTION_TEMPLATE.format( + hop=self.hop, + purpose=self.inputs.get("purpose", ""), + prediction=json.dumps(self.outputs, indent=2), + ) + elif self.step_type == AgentStepType.Decision: + return DECISION_TEMPLATE.format( + hop=self.hop, + purpose=self.inputs.get("purpose", ""), + choice=self.outputs.get("choice", ""), + ) + elif self.step_type == AgentStepType.ProgramCall: + return CALL_PROGRAM_TEMPLATE.format( + hop=self.hop, + purpose=self.inputs.get("purpose", ""), + program=self.inputs.get("program", ""), + ) + elif self.step_type == AgentStepType.ProgramEnd: + return END_PROGRAM_TEMPLATE.format( + hop=self.hop, + program=self.inputs.get("program", ""), + ) + else: + raise ValueError("Invalid type for AgentStep") + + def to_dict(self): + return {"step": str(self)} + +class AgentStepList(BaseModel, dspy.Prediction): + steps: List[AgentStep] = Field(description="List of agent steps", default=[]) + + def __init__(self, **kwargs): + BaseModel.__init__(self, **kwargs) + dspy.Prediction.__init__(self, **kwargs) + + def to_dict(self): + return {"steps": [s.to_dict() for s in self.steps]} + +class QueryWithSteps(BaseModel, dspy.Prediction): + queries: QueryList = Field(description="The input query list", default_factory=QueryList) + steps: List[AgentStep] = Field(description="List of agent steps", default=[]) + + def to_dict(self): + return {"queries": [q.query for q in self.queries.queries], "steps": [s.to_dict() for s in self.steps]} +``` \ No newline at end of file diff --git a/docs/Core API/Data Types/Document.md b/docs/Core API/Data Types/Document.md index 47a140b..d25e971 100644 --- a/docs/Core API/Data Types/Document.md +++ b/docs/Core API/Data Types/Document.md @@ -2,14 +2,15 @@ Documents are the atomic data used in HybridAGI's Document Memory, they are used to represent textual data and their chunks in the system. Allowing the system to implement vector-only [Retrieval Augmented Generation](https://en.wikipedia.org/wiki/Retrieval-augmented_generation) systems. -`Document`: Represent an unstructured textual data to be processed or saved into memory +`Document`: Represent an unstructured textual data to be processed or saved into memory. -`DocumentList`: A list of documents to be processed or saved into memory +`DocumentList`: A list of documents to be processed or saved into memory. + +`QueryWithDocuments`: A list of document associated with a Query used by the retrievers and rerankers. ## Definition ```python - class Document(BaseModel): id: Union[UUID, str] = Field(description="Unique identifier for the document", default_factory=uuid4) text: str = Field(description="The actual text content of the document") @@ -33,12 +34,21 @@ class DocumentList(BaseModel, dspy.Prediction): def to_dict(self): return {"documents": [d.to_dict() for d in self.docs]} +class QueryWithDocuments(BaseModel, dspy.Prediction): + queries: QueryList = Field(description="The input query list", default_factory=QueryList) + docs: Optional[List[Document]] = Field(description="List of documents", default=[]) + + def __init__(self, **kwargs): + BaseModel.__init__(self, **kwargs) + dspy.Prediction.__init__(self, **kwargs) + + def to_dict(self): + return {"queries": [q.query for q in self.queries.queries], "documents": [d.to_dict() for d in self.docs]} ``` ## Usage ```python - input_data = \ [ { @@ -60,6 +70,4 @@ for data in input_data: metadata={"title": data["title"]}, ) ) - ->>> ``` \ No newline at end of file diff --git a/docs/Core API/Data Types/Fact.md b/docs/Core API/Data Types/Fact.md index 281aa08..04654f5 100644 --- a/docs/Core API/Data Types/Fact.md +++ b/docs/Core API/Data Types/Fact.md @@ -1,6 +1,6 @@ # Fact -Facts are the atomic data of a [Knowledge Graph](https://en.wikipedia.org/wiki/Knowledge_graph). They represent the relations between two entities (a subject and object). They are the basis of knowledge based systems and allowing to represent precise and formal knowledge. With them you can implement [Knowledge Graph based Retrieval Augmented Generation](). +Facts are the atomic data of a [Knowledge Graph](https://en.wikipedia.org/wiki/Knowledge_graph). They represent the relations between two entities (respectively a subject and an object). They are the basis of knowledge based systems they allow to represent precise and formal knowledge. With them you can implement Knowledge Graph based Retrieval Augmented Generation. `Entity`: Represent an entity like a person, object, place or document to be processed or saved into memory @@ -44,6 +44,17 @@ class EntityList(BaseModel, dspy.Prediction): def to_dict(self): return {"entities": [e.to_dict() for e in self.entities]} +class QueryWithEntities(BaseModel, dspy.Prediction): + queries: QueryList = Field(description="The input query list", default_factory=QueryList) + entities: List[Entity] = Field(description="List of entities", default=[]) + + def __init__(self, **kwargs): + BaseModel.__init__(self, **kwargs) + dspy.Prediction.__init__(self, **kwargs) + + def to_dict(self): + return {"queries": [q.query for q in self.queries.queries], "entities": [e.to_dict() for e in self.entities]} + class Relationship(BaseModel): id: Union[UUID, str] = Field(description="Unique identifier for the relation", default_factory=uuid4) name: str = Field(description="Relationship name") @@ -117,9 +128,21 @@ class FactList(BaseModel, dspy.Prediction): def to_dict(self): return {"facts": [f.to_dict() for f in self.facts]} +class QueryWithFacts(BaseModel, dspy.Prediction): + queries: QueryList = Field(description="The input query list", default_factory=QueryList) + facts: Optional[List[Fact]] = Field(description="List of facts", default=[]) + + def __init__(self, **kwargs): + BaseModel.__init__(self, **kwargs) + dspy.Prediction.__init__(self, **kwargs) + + def to_dict(self): + return {"queries": [q.query for q in self.queries.queries], "facts": [f.to_dict() for f in self.facts]} + ``` ## Usage ``` + ``` \ No newline at end of file diff --git a/docs/Core API/Data Types/Graph Program.md b/docs/Core API/Data Types/Graph Program.md deleted file mode 100644 index df71acf..0000000 --- a/docs/Core API/Data Types/Graph Program.md +++ /dev/null @@ -1,152 +0,0 @@ -# Graph Program - -The Graph Programs are a special data type representing a workflow of actions and decisions with calls to other programs. They are used by our own custom Agent, the `GraphProgramInterpreter`. In order help you to build them, we provide two ways of doing it: Using Python or Cypher. - -The two ways are equivalent and allows you to choose the one you prefer, we recommend you however to use the pythonic way, to avoid syntax errors, and eventually save them into Cypher format for later use. - -### Python Usage - -```python -import hybridagi.core.graph_program as gp - -main = gp.GraphProgram( - name = "main", - description = "The main program", -) - -main.add("answer", gp.Action( - tool = "Speak", - purpose = "Answer the Objective's question", - prompt = "Please answer to the Objective's question", -)) - -main.connect("start", "answer") -main.connect("answer", "end") - -``` - -### Building your program - -To perform a formal verification of the graph data we provide a way to build your `GraphProgram` ensuring that the data flow is correct. If some inconsistency is detected it will raise an error. For the above program you just have to do: - -```python - -main.build() - -``` - -This operation will check that there no orphan nodes in your graph (nodes connected to any nodes), as well as checking that each node is reachable from the `start` and `end` nodes. - -## Using decision-making steps - -Decision making steps allow the Agent to branch over different paths in a program, like conditions in traditional programming, it allow conditional loops and multi-output decisions. - -```python -import hybridagi.core.graph_program as gp - -main=gp.GraphProgram(name="main", description="The main program") - -main.add("is_objective_unclear", gp.Decision( - purpose="Check if the question needs clarification or not", - prompt="Is the following question unclear?\n{{question}}", - inputs=["objective"], -)) - -main.add("clarify", gp.Action( - purpose="Ask one question to clarify the user's objective", - tool="AskUser", - prompt="Please pick one question to clarify the following: {{objective}}", - inputs=["objective"], - output="clarification" -)) - -main.add("answer", gp.Action( - purpose="Answer the question", - tool="Speak", - prompt="Please answer to the following question: {{objective}}", - inputs=["objective"], -)) - -main.add("refine_objective", gp.Action( - purpose="Refine the objective", - tool="Predict", - prompt= \ -"""You asked the following question: -Question: {{clarification}} - -Please refine the following objective: -Objective: {{objective}}""", - inputs=["objective", "clarification"], - output="objective" - )) - -main.connect("start", "is_objective_unclear") -main.connect("is_objective_unclear", "clarify", label="Clarify") -main.connect("is_objective_unclear", "answer", label="Answer") -main.connect("clarify", "refine_objective") -main.connect("refine_objective", "answer") -main.connect("answer", "end") - -main.build() - -``` - -## Using sub-programs - - TODO - -### Loading from Cypher - -```python - -cypher == \ -r"""// @desc: The main program -CREATE -// Nodes declaration -(start:Control {purpose: "Start"}), -(end:Control {purpose: "End"}), -(is_objective_unclear:Decision { - purpose: "is_objective_unclear", - prompt: "Is the following question unclear?\n{{question}}", - inputs: [ - "objective" - ] -}), -(clarify:Action { - purpose: "Ask one question to clarify the user's objective", - tool: "AskUser", - prompt: "Please pick one question to clarify the following: {{objective}}", - inputs: [ - "objective" - ], - output: "clarification" -}), -(answer:Action { - purpose: "Answer the question", - tool: "Speak", - prompt: "Please answer to the following question: {{objective}}", - inputs: [ - "objective" - ] -}), -(refine_objective:Action { - purpose: "Refine the objective", - tool: "Predict", - prompt: "You asked the following question:\nQuestion: {{clarification}}\n\nPlease refine the following objective:\nObjective: {{objective}}", - inputs: [ - "objective", - "clarification" - ], - output: "objective" -}), -// Structure declaration -(start)-[:NEXT]->(is_objective_unclear), -(is_objective_unclear)-[:CLARIFY]->(clarify), -(is_objective_unclear)-[:ANSWER]->(answer), -(clarify)-[:NEXT]->(refine_objective), -(refine_objective)-[:NEXT]->(answer), -(answer)-[:NEXT]->(end)""" - -main = gp.GraphProgram().from_cypher(cypher) - -``` diff --git a/docs/Core API/Data Types/Graph Schema.md b/docs/Core API/Data Types/Graph Schema.md new file mode 100644 index 0000000..e7dcf96 --- /dev/null +++ b/docs/Core API/Data Types/Graph Schema.md @@ -0,0 +1,62 @@ +# Graph Schema + +This datatype represent a Cypher oriented Graph Schema used to constrain the generation of a LLM. + +## Definition + +```python +class FactSchema(BaseModel): + source: str + predicate: str + target: str + + def to_cypher(self) -> str: + return "(:"+self.source+")-[:"+self.predicate+"]->(:"+self.target+")" + + def from_cypher(self, cypher_schema: str) -> "FactSchema": + match = re.match(CYPHER_SCHEMA_REGEX, cypher_schema) + if match: + self.source = match.group(1) + self.predicate = match.group(2) + self.target = match.group(3) + return self + else: + ValueError("Invalid Cypher schema provided") + + def is_valid(self, fact: Fact): + if fact.subj.label != self.source: + return False + if fact.rel.name != self.rel: + return False + if fact.obj.label != self.target: + return False + return True + + def to_dict(self): + return {"fact_schema": self.to_cypher()} + +class GraphSchema(BaseModel, dspy.Prediction): + schemas: Optional[List[FactSchema]] = Field(description="The graph schema", default=[]) + + def __init__(self, **kwargs): + BaseModel.__init__(self, **kwargs) + dspy.Prediction.__init__(self, **kwargs) + + def to_cypher(self) -> str: + return ",\n".join([s.to_cypher() for s in self.schemas]) + + def from_cypher(self, cypher_schema: str) -> "GraphSchema": + graph_schema = re.findall(CYPHER_SCHEMA_REGEX, cypher_schema) + self.schemas = [] + for schema in graph_schema: + subject_label, predicate, object_label = schema + self.schemas.append(FactSchema( + subj = subject_label, + predicate = predicate, + obj = object_label, + )) + return self + + def to_dict(self): + return {"schema": [s.to_dict() for s in self.schemas]} +``` \ No newline at end of file diff --git a/docs/Core API/Data Types/Interaction Session.md b/docs/Core API/Data Types/Interaction Session.md new file mode 100644 index 0000000..90e0b2e --- /dev/null +++ b/docs/Core API/Data Types/Interaction Session.md @@ -0,0 +1,66 @@ +# Interaction Session + +The interaction session represent the state of an interaction during the execution of query by the agent, it provide a way for the developers to personalize the interaction, make statefull chat application of simulate different personas to generate training data. + +`UserProfile`: Represent the user profile used to personalize the interaction and to simulate different user personas. + +`Role`: Represent the different roles in a conversation. + +`Message`: Represent a message in a conversation. + +`ChatHistory`: Represent a list of messages. + +`InteractionSession`: Represent the state of an interaction session. + +`QueryWithSession`: A query associated with an interaction session. + +## Definition + +```python +class UserProfile(BaseModel): + id: Union[UUID, str] = Field(description="Unique identifier for the user", default_factory=uuid4) + name: Optional[str] = Field(description="The user name", default="Unknow") + profile: Optional[str] = Field(description="The user profile", default="An average user") + vector: Optional[List[float]] = Field(description="Vector representation of the user", default=None) + metadata: Optional[Dict[str, Any]] = Field(description="Additional information about the user", default={}) + + def to_dict(self): + return {"name": self.name, "profile": self.profile, "metadata": self.metadata} + +class Role(str, Enum): + AI = "AI" + User = "User" + +class Message(BaseModel): + role: Role = Field(description="The role (AI or User)") + content: str = Field(description="The message content") + created_at: datetime = Field(description="Time when the message was created", default_factory=datetime.now) + + def to_dict(self): + return {"message": "["+self.role+"]: "+self.content} + +class ChatHistory(BaseModel): + msgs: Optional[List[Message]] = Field(description="List of messages", default=[]) + + def to_dict(self): + return {"messages": [m.to_dict() for m in self.msgs]} + +class InteractionSession(BaseModel): + id: Union[UUID, str] = Field(description="Unique identifier for the interaction session", default_factory=uuid4) + user: Optional[UserProfile] = Field(description="The user profile", default_factory=UserProfile) + chat: Optional[ChatHistory] = Field(description="The chat history", default_factory=ChatHistory) + + def to_dict(self): + return {"user": self.user.to_dict(), "chat_history": [m.to_dict() for m in self.chat.msgs]} + +class QueryWithSession(BaseModel, dspy.Prediction): + query: Query = Field(description="The input user query", default_factory=Query) + session: InteractionSession = Field(description="The current interaction session", default_factory=InteractionSession) + + def __init__(self, **kwargs): + BaseModel.__init__(self, **kwargs) + dspy.Prediction.__init__(self, **kwargs) + + def to_dict(self): + return {"query": self.query.query, "session": self.session.to_dict()} +``` \ No newline at end of file diff --git a/docs/Core API/Data Types/Query.md b/docs/Core API/Data Types/Query.md index e69de29..155b472 100644 --- a/docs/Core API/Data Types/Query.md +++ b/docs/Core API/Data Types/Query.md @@ -0,0 +1,28 @@ +# Query + +The Queries represent the input data for the agent system, they are also used by the retrievers and rerankers and the query engine. + +`Query`: A user or LLM generated query. + +`QueryList`: A list of queries. + +## Definition + +```python +class Query(BaseModel, dspy.Prediction): + query: str = Field(description="The input query", default="") + + def __init__(self, **kwargs): + BaseModel.__init__(self, **kwargs) + dspy.Prediction.__init__(self, **kwargs) + +class QueryList(BaseModel, dspy.Prediction): + queries: Optional[List[Query]] = Field(description="List of queries", default=[]) + + def __init__(self, **kwargs): + BaseModel.__init__(self, **kwargs) + dspy.Prediction.__init__(self, **kwargs) + + def to_dict(self): + return {"queries": [q.query for q in self.queries]} +``` \ No newline at end of file diff --git a/docs/Core API/Data Types/Session.md b/docs/Core API/Data Types/Session.md deleted file mode 100644 index dba4620..0000000 --- a/docs/Core API/Data Types/Session.md +++ /dev/null @@ -1,28 +0,0 @@ -# Session - -`UserProfile`: Represent the user profile used to personalize the interaction and by the simulation of the user. - -```python - -class UserProfile(BaseModel): - id: str = Field(description="Unique identifier for the user", default_factory=uuid4) - name: str = Field(description="The user name", default="Unknow") - profile: str = Field(description="The user profile", default="An average User") - -class RoleType(str, Enum): - AI = "AI" - User = "User" - -class Message(BaseModel): - role: RoleType - message: str - -class ChatHistory(BaseModel): - msgs: List[Message] = Field(description="List of messages", default=[]) - -class InteractionSession(BaseModel): - id: str = Field(description="Unique identifier for the interaction session", default_factory=uuid4) - user_profile: UserProfile = Field(description="The user profile") - chat_history: ChatHistory = Field(description="The chat history") - -``` \ No newline at end of file diff --git a/docs/Core API/Graph Program.md b/docs/Core API/Graph Program.md new file mode 100644 index 0000000..29dd1fb --- /dev/null +++ b/docs/Core API/Graph Program.md @@ -0,0 +1,194 @@ +# Graph Program + +The Graph Programs are a special data type representing a workflow of actions and decisions with calls to other programs. They are used by our own custom Agent, in order help you to build them, we provide two ways of doing it: Using Python or Cypher. + +The two ways are equivalent and allows you to choose the one you prefer, we recommend you however to use the pythonic way, to avoid syntax errors, and eventually save them into Cypher format for later use. + +## Python Usage + +```python +import hybridagi.core.graph_program as gp + +main = gp.GraphProgram( + name = "main", + description = "The main program", +) + +main.add("answer", gp.Action( + tool = "Speak", + purpose = "Answer the Objective's question", + prompt = "Please answer to the Objective's question", +)) + +main.connect("start", "answer") +main.connect("answer", "end") + +``` + +## Building your program + +To perform a formal verification of the graph data we provide a way to build your `GraphProgram` ensuring that the data flow is correct. If some inconsistency is detected it will raise an error. For the above program you just have to do: + +```python +main.build() +``` + +This operation will check that there no orphan nodes in your graph (nodes connected to any nodes), as well as checking that each node is reachable from the `start` and `end` nodes. + +Although we verify the structure of the program, we cannot verify if the name of tool used is accurate or if the program referenced is correct outside of the execution environment. This implies that you should be cautious in using the appropriate names, otherwise, the interpreter Agent will generate an error when it encounters the problematic step. + +## Using decision-making steps + +Decision making steps allow the Agent to branch over different paths in a program, like conditions in traditional programming, it allow conditional loops and multi-output decisions. + +```python +import hybridagi.core.graph_program as gp + +main = gp.GraphProgram( + name="main", + description="The main program", +) + +main.add(gp.Decision( + id="is_objective_unclear", + purpose="Check if the Objective's is unclear", + question="Is the Objective's question unclear?", +)) + +main.add(gp.Action( + id="clarify", + purpose="Ask one question to clarify the user's Objective", + tool="AskUser", + prompt="Please pick one question to clarify the Objective's question", +)) + +main.add(gp.Action( + id="answer", + purpose="Answer the question", + tool="Speak", + prompt="Please answer to the Objective's question", +)) + +main.add(gp.Action( + id="refine_objective", + purpose="Refine the objective", + tool="UpdateObjective", + prompt="Please refine the user Objective", +)) + +main.connect("start", "is_objective_unclear") +main.connect("is_objective_unclear", "clarify", label="Clarify") +main.connect("is_objective_unclear", "answer", label="Answer") +main.connect("clarify", "refine_objective") +main.connect("refine_objective", "answer") +main.connect("answer", "end") + +main.build() +``` + +## Using Program calls + +```python +import hybridagi.core.graph_program as gp + +clarify_objective = gp.GraphProgram( + name="clarify_objective", + description="Clarify the objective by asking question to the user", +) + +clarify_objective.add(gp.Decision( + id = "is_anything_unclear", + purpose = "Check if the Objective is unclear", + question = "Is the Objective still unclear?", +)) + +clarify_objective.add(gp.Action( + id = "clarify", + purpose = "Ask question to clarify the user request", + tool = "AskUser", + prompt = "Pick one question to clarify the Objective", +)) + +clarify_objective.add(gp.Action( + id = "refine_objective", + purpose = "Refine the question", + tool = "UpdateObjective", + prompt = "Refine the Objective", +)) + +clarify_objective.connect("start", "is_anything_unclear") +clarify_objective.connect("is_anything_unclear", "clarify", label="Clarify") +clarify_objective.connect("is_anything_unclear", "end", label="Answer") +clarify_objective.connect("clarify", "refine_objective") +clarify_objective.connect("refine_objective", "end") + +clarify_objective.build() + +main = gp.GraphProgram( + name="main", + description="The main program", +) + +main.add(gp.Program( + id = "clarify_objective", + purpose = "Clarify the user objective if needed", + program = "clarify_objective" +)) + +main.add(gp.Action( + id = "answer", + purpose = "Answer the objective's question", + tool = "Speak", + prompt = "Answer the Objective's question", +)) + +main.connect("start", "clarify_objective") +main.connect("clarify_objective", "answer") +main.connect("answer", "end") + +main.build() +``` + +## Loading from Cypher + +```python +import hybridagi.core.graph_program as gp + +cypher = \ +""" +// @desc: The main program +CREATE +// Nodes declaration +(start:Control {id: "start"}), +(end:Control {id: "end"}), +(answer:Action { + id: "answer", + purpose: "Answer the Objective's question", + tool: "Speak", + prompt: "Please answer to the Objective's question" +}), +// Structure declaration +(start)-[:NEXT]->(answer), +(answer)-[:NEXT]->(end) +""" + +main = gp.GraphProgram().from_cypher(cypher) + +``` + +## Saving into a file + +```python +# This command save the program into the corresponding folder +main.save("programs/") +``` + +## Loading from a file + +```python +from hybridagi.readers import GraphProgramReader + +reader = GraphProgramReader() + +main = reader("main.cypher") +``` diff --git a/docs/Embeddings API/Sentence Transformer Embeddings.md b/docs/Embeddings API/Sentence Transformer Embeddings.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/Language Models API/Local Models/Ollama.md b/docs/Language Models API/Local Models/Ollama.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/Language Models API/Remote Models/Groq.md b/docs/Language Models API/Remote Models/Groq.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/Language Models API/Remote Models/Mistral.md b/docs/Language Models API/Remote Models/Mistral.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/Language Models API/Remote Models/OpenAI.md b/docs/Language Models API/Remote Models/OpenAI.md new file mode 100644 index 0000000..30404ce --- /dev/null +++ b/docs/Language Models API/Remote Models/OpenAI.md @@ -0,0 +1 @@ +TODO \ No newline at end of file diff --git a/docs/Language Models API/Remote Models/SynaLinks.md b/docs/Language Models API/Remote Models/SynaLinks.md new file mode 100644 index 0000000..30404ce --- /dev/null +++ b/docs/Language Models API/Remote Models/SynaLinks.md @@ -0,0 +1 @@ +TODO \ No newline at end of file diff --git a/docs/Memory API/Trace Memory/Trace Memory.md b/docs/Memory API/Trace Memory/Trace Memory.md index d6fa7f6..9573f08 100644 --- a/docs/Memory API/Trace Memory/Trace Memory.md +++ b/docs/Memory API/Trace Memory/Trace Memory.md @@ -1,7 +1,6 @@ The trace memory is where the Agent system store each executed step of the graph programs, it allow the system to recall actions between session, very much like the human episodic memory that allow us to remember past events and actions we performed. - ```python from abc import ABC, abstractmethod from typing import Union, List diff --git a/hybridagi/core/datatypes.py b/hybridagi/core/datatypes.py index 6f9dad6..32ede9d 100644 --- a/hybridagi/core/datatypes.py +++ b/hybridagi/core/datatypes.py @@ -310,7 +310,6 @@ class AgentStepType(str, Enum): DECISION_TEMPLATE = \ """--- Step {hop} --- Decision Purpose: {purpose} -Decision Question: {question} Decision: {choice}""" CALL_PROGRAM_TEMPLATE = \ @@ -347,8 +346,7 @@ def __str__(self): return DECISION_TEMPLATE.format( hop=self.hop, purpose=self.inputs.get("purpose", ""), - question=self.inputs.get("question", ""), - choice=self.outputs["choice"] if self.outputs and "choice" in self.outputs else "", + choice=self.outputs.get("choice", ""), ) elif self.step_type == AgentStepType.ProgramCall: return CALL_PROGRAM_TEMPLATE.format( diff --git a/hybridagi/modules/retrievers/integration/falkordb/falkordb_fact_retriever.py b/hybridagi/modules/retrievers/integration/falkordb/falkordb_fact_retriever.py index 481199c..b8633c0 100644 --- a/hybridagi/modules/retrievers/integration/falkordb/falkordb_fact_retriever.py +++ b/hybridagi/modules/retrievers/integration/falkordb/falkordb_fact_retriever.py @@ -84,7 +84,6 @@ def forward(self, query_or_queries: Union[Query, QueryList]) -> QueryWithFacts: query, params = params, ) - print(query_result.result_set) if len(query_result.result_set) > 0: for record in query_result.result_set: if record[0] not in indexes: diff --git a/notebooks/using_falkordb.ipynb b/notebooks/using_falkordb.ipynb index 8887a79..b7094ec 100644 --- a/notebooks/using_falkordb.ipynb +++ b/notebooks/using_falkordb.ipynb @@ -2,9 +2,18 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/yoan/.cache/pypoetry/virtualenvs/hybridagi-B1GoJrSC-py3.10/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "from hybridagi.core.datatypes import Fact, FactList, Entity, Relationship\n", "\n", @@ -51,42 +60,271 @@ "input_facts = FactList()\n", "\n", "for data in input_data:\n", - " pathology = dt.Entity(name=data[\"name\"], label=\"Pathology\")\n", - " prevalence = dt.Entity(name=str(data[\"prevalence\"]), label=\"Prevalence\")\n", - " input_facts.facts.append(dt.Fact(subj=pathology, rel=dt.Relationship(name=\"HAS_PREVALENCE_OF\"), obj=prevalence))\n", + " pathology = Entity(name=data[\"name\"], label=\"Pathology\")\n", + " prevalence = Entity(name=str(data[\"prevalence\"]), label=\"Prevalence\")\n", + " input_facts.facts.append(Fact(subj=pathology, rel=Relationship(name=\"HAS_PREVALENCE_OF\"), obj=prevalence))\n", " for symptom in data[\"symptoms\"]:\n", - " symptom_entity = dt.Entity(name=symptom, label=\"Symptom\")\n", - " input_facts.facts.append(dt.Fact(subj=pathology, rel=dt.Relationship(name=\"HAS_SYMPTOMS\"), obj=symptom_entity))\n", + " symptom_entity = Entity(name=symptom, label=\"Symptom\")\n", + " input_facts.facts.append(Fact(subj=pathology, rel=Relationship(name=\"HAS_SYMPTOMS\"), obj=symptom_entity))\n", " for treatment in data[\"treatment\"]:\n", - " treatment_entity = dt.Entity(name=treatment, label=\"Treatment\")\n", - " input_facts.facts.append(dt.Fact(subj=pathology, rel=dt.Relationship(name=\"HAS_TREATMENT\"), obj=treatment_entity))" + " treatment_entity = Entity(name=treatment, label=\"Treatment\")\n", + " input_facts.facts.append(Fact(subj=pathology, rel=Relationship(name=\"HAS_TREATMENT\"), obj=treatment_entity))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "// @desc: The main program\n", + "CREATE\n", + "// Nodes declaration\n", + "(start:Control {id: \"start\"}),\n", + "(end:Control {id: \"end\"}),\n", + "(fact_search:Action {\n", + " id: \"fact_search\",\n", + " purpose: \"Find relevant facts\",\n", + " tool: \"FactSearch\",\n", + " prompt: \"Please infer the similarity search query (only ONE item) based on the Objective's question\"\n", + "}),\n", + "(answer:Action {\n", + " id: \"answer\",\n", + " purpose: \"Answer the Objective's question the context's facts\",\n", + " tool: \"Speak\",\n", + " prompt: \"\\nPlease answer the Objective's question using the relevant facts in your context.\\nIf no facts are relevant just say that you don't know.\\nDon't state the Objective's question and only give the factual answer.\\n\"\n", + "}),\n", + "// Structure declaration\n", + "(start)-[:NEXT]->(fact_search),\n", + "(fact_search)-[:NEXT]->(answer),\n", + "(answer)-[:NEXT]->(end)\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import hybridagi.core.graph_program as gp\n", + "\n", + "main = gp.GraphProgram(\n", + " name = \"main\",\n", + " description = \"The main program\",\n", + ")\n", + "\n", + "main.add(gp.Action(\n", + " id = \"fact_search\",\n", + " purpose = \"Find relevant facts\",\n", + " tool = \"FactSearch\",\n", + " prompt = \"Please infer the similarity search query (only ONE item) based on the Objective's question\",\n", + "))\n", + "\n", + "main.add(gp.Action(\n", + " id = \"answer\",\n", + " purpose = \"Answer the Objective's question the context's facts\",\n", + " tool = \"Speak\",\n", + " prompt = \"\"\"\n", + "Please answer the Objective's question using the relevant facts in your context.\n", + "If no facts are relevant just say that you don't know.\n", + "Don't state the Objective's question and only give the factual answer.\n", + "\"\"\",\n", + "))\n", + "\n", + "main.connect(\"start\", \"fact_search\")\n", + "main.connect(\"fact_search\", \"answer\")\n", + "main.connect(\"answer\", \"end\")\n", + "\n", + "main.build() # Verify that the graph program is correct\n", + "\n", + "# Let's look at it\n", + "\n", + "print(main) \n", + "\n", + "main.show(notebook=True)" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from hybridagi.memory.integration.falkordb import FalkorDBProgramMemory, FalkorDBFactMemory\n", "\n", - "fact_memory = FalkorDBFactMemory(index_name=\"pathology_graph_rag\")\n", + "fact_memory = FalkorDBFactMemory(index_name=\"pathology_graph_rag\", wipe_on_start=True)\n", + "\n", + "program_memory = FalkorDBProgramMemory(index_name=\"pathology_graph_rag\", wipe_on_start=True)\n", "\n", - "program_memory = FalkorDBProgramMemory(index_name=\"pathology_graph_rag\")\n", - "\n" + "program_memory.update(main)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/yoan/.cache/pypoetry/virtualenvs/hybridagi-B1GoJrSC-py3.10/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", + " warnings.warn(\n", + "100%|██████████| 45/45 [00:00<00:00, 154961.97it/s]\n", + "100%|██████████| 45/45 [00:00<00:00, 64.68it/s]\n", + "100%|██████████| 45/45 [00:00<00:00, 160.32it/s]\n" + ] + } + ], + "source": [ + "from hybridagi.core.pipeline import Pipeline\n", + "from hybridagi.embeddings import SentenceTransformerEmbeddings\n", + "from hybridagi.modules.deduplicators import EntityDeduplicator\n", + "from hybridagi.modules.embedders import EntityEmbedder, FactEmbedder\n", + "\n", + "pipeline = Pipeline()\n", + "\n", + "embeddings = SentenceTransformerEmbeddings(\n", + " model_name_or_path = \"all-MiniLM-L6-v2\",\n", + " dim = 384, # The dimention of the embeddings vector (also called dense vector)\n", + ")\n", + "\n", + "pipeline.add(\"deduplicate_entities\", EntityDeduplicator(method=\"exact\"))\n", + "pipeline.add(\"embed_entities\", EntityEmbedder(embeddings=embeddings))\n", + "pipeline.add(\"embed_facts\", FactEmbedder(embeddings=embeddings))\n", + "\n", + "output_facts = pipeline(input_facts)\n", + "\n", + "fact_memory.update(output_facts) " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[35m--- Step 0 ---\n", + "Call Program: main\n", + "Program Purpose: I am feeling sad and I don't have appetite, these are the symptoms of any disease?\u001b[0m\n", + "\u001b[36m--- Step 1 ---\n", + "Action Purpose: Find relevant facts\n", + "Action: {\n", + " \"queries\": [\n", + " \"symptoms of depression\",\n", + " \"loss of appetite\"\n", + " ],\n", + " \"facts\": [\n", + " {\n", + " \"fact\": \"(:Pathology {name:\\\"Depression\\\"})-[:HAS_PREVALENCE_OF]->(:Prevalence {name:\\\"4.4% of the population\\\"})\"\n", + " },\n", + " {\n", + " \"fact\": \"(:Pathology {name:\\\"Depression\\\"})-[:HAS_TREATMENT]->(:Treatment {name:\\\"Lifestyle changes\\\"})\"\n", + " },\n", + " {\n", + " \"fact\": \"(:Pathology {name:\\\"Depression\\\"})-[:HAS_SYMPTOMS]->(:Symptom {name:\\\"Changes in sleep and appetite\\\"})\"\n", + " },\n", + " {\n", + " \"fact\": \"(:Pathology {name:\\\"Depression\\\"})-[:HAS_SYMPTOMS]->(:Symptom {name:\\\"Loss of interest\\\"})\"\n", + " },\n", + " {\n", + " \"fact\": \"(:Pathology {name:\\\"Depression\\\"})-[:HAS_SYMPTOMS]->(:Symptom {name:\\\"Persistent sadness\\\"})\"\n", + " }\n", + " ]\n", + "}\u001b[0m\n", + "\u001b[36m--- Step 2 ---\n", + "Action Purpose: Answer the Objective's question the context's facts\n", + "Action: {\n", + " \"message\": \"4.4% of the population may experience symptoms similar to what you described, such as changes in sleep and appetite, loss of interest, and persistent sadness. These could be signs of depression. However, it is important to consult with a healthcare professional for an accurate diagnosis.\"\n", + "}\u001b[0m\n", + "\u001b[35m--- Step 3 ---\n", + "End Program: main\u001b[0m\n", + "4.4% of the population may experience symptoms similar to what you described, such as changes in sleep and appetite, loss of interest, and persistent sadness. These could be signs of depression. However, it is important to consult with a healthcare professional for an accurate diagnosis.\n" + ] + } + ], + "source": [ + "import dspy\n", + "from hybridagi.core.datatypes import AgentState, Query\n", + "from hybridagi.modules.agents import GraphInterpreterAgent\n", + "from hybridagi.modules.agents.tools import SpeakTool, FactSearchTool\n", + "from hybridagi.modules.retrievers.integration.falkordb import FalkorDBFactRetriever\n", + "\n", + "agent_state = AgentState()\n", + "\n", + "tools = [\n", + " SpeakTool(\n", + " agent_state = agent_state,\n", + " ),\n", + " FactSearchTool(\n", + " retriever = FalkorDBFactRetriever(\n", + " fact_memory = fact_memory,\n", + " embeddings = embeddings,\n", + " distance = \"cosine\",\n", + " max_distance = 1.0,\n", + " k = 5,\n", + " reranker = None,\n", + " )\n", + " )\n", + "]\n", + "\n", + "agent = GraphInterpreterAgent(\n", + " agent_state = agent_state,\n", + " program_memory = program_memory,\n", + " tools = tools,\n", + ")\n", + "\n", + "# We can now setup the LLM using Ollama client from DSPy\n", + "\n", + "lm = dspy.OllamaLocal(model='mistral', max_tokens=1024, stop=[\"\\n\\n\\n\"])\n", + "dspy.configure(lm=lm)\n", + "\n", + "result = agent(Query(query=\"I am feeling sad and I don't have appetite, these are the symptoms of any disease?\"))\n", + "\n", + "print(result.final_answer)" ] } ], "metadata": { + "kernelspec": { + "display_name": "hybridagi-B1GoJrSC-py3.10", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" } }, "nbformat": 4,