diff --git a/examples/openai/fetch_multiple_links.py b/examples/openai/depth_search_graph_openai.py similarity index 89% rename from examples/openai/fetch_multiple_links.py rename to examples/openai/depth_search_graph_openai.py index c9c07877..7cde7865 100644 --- a/examples/openai/fetch_multiple_links.py +++ b/examples/openai/depth_search_graph_openai.py @@ -1,4 +1,6 @@ - +""" +depth_search_graph_opeani example +""" from scrapegraphai.graphs import DepthSearchGraph graph_config = { @@ -19,4 +21,4 @@ ) result = search_graph.run() -print(result) \ No newline at end of file +print(result) diff --git a/scrapegraphai/graphs/depth_search_graph.py b/scrapegraphai/graphs/depth_search_graph.py index 6ad3b245..a93d8fcf 100644 --- a/scrapegraphai/graphs/depth_search_graph.py +++ b/scrapegraphai/graphs/depth_search_graph.py @@ -9,13 +9,18 @@ from ..utils.save_code_to_file import save_code_to_file from ..nodes import ( FetchNodeLevelK, - ParseNodeDepthK + ParseNodeDepthK, + DescriptionNode, + RAGNode, + GenerateAnswerNodeKLevel ) class DepthSearchGraph(AbstractGraph): """ - CodeGeneratorGraph is a script generator pipeline that generates the function extract_data(html: str) -> dict() for - extracting the wanted information from a HTML page. The code generated is in Python and uses the library BeautifulSoup. + CodeGeneratorGraph is a script generator pipeline that generates + the function extract_data(html: str) -> dict() for + extracting the wanted information from a HTML page. The + code generated is in Python and uses the library BeautifulSoup. It requires a user prompt, a source URL, and an output schema. Attributes: @@ -60,7 +65,7 @@ def _create_graph(self) -> BaseGraph: BaseGraph: A graph instance representing the web scraping workflow. """ - fetch_node = FetchNodeLevelK( + fetch_node_k = FetchNodeLevelK( input="url| local_dir", output=["docs"], node_config={ @@ -72,8 +77,8 @@ def _create_graph(self) -> BaseGraph: "only_inside_links": self.config.get("only_inside_links", False) } ) - - parse_node = ParseNodeDepthK( + + parse_node_k = ParseNodeDepthK( input="docs", output=["docs"], node_config={ @@ -81,15 +86,52 @@ def _create_graph(self) -> BaseGraph: } ) + description_node = DescriptionNode( + input="docs", + output=["docs"], + node_config={ + "llm_model": self.llm_model, + "verbose": self.config.get("verbose", False), + "cache_path": self.config.get("cache_path", False) + } + ) + + rag_node = RAGNode ( + input="docs", + output=["vectorial_db"], + node_config={ + "llm_model": self.llm_model, + "embedder_model": self.config.get("embedder_model", False), + "verbose": self.config.get("verbose", False), + } + ) + + generate_answer_k = GenerateAnswerNodeKLevel( + input="vectorial_db", + output=["answer"], + node_config={ + "llm_model": self.llm_model, + "embedder_model": self.config.get("embedder_model", False), + "verbose": self.config.get("verbose", False), + } + + ) + return BaseGraph( nodes=[ - fetch_node, - parse_node + fetch_node_k, + parse_node_k, + description_node, + rag_node, + generate_answer_k ], edges=[ - (fetch_node, parse_node), + (fetch_node_k, parse_node_k), + (parse_node_k, description_node), + (description_node, rag_node), + (rag_node, generate_answer_k) ], - entry_point=fetch_node, + entry_point=fetch_node_k, graph_name=self.__class__.__name__ ) diff --git a/scrapegraphai/nodes/description_node.py b/scrapegraphai/nodes/description_node.py index 6175133a..97ef2e8f 100644 --- a/scrapegraphai/nodes/description_node.py +++ b/scrapegraphai/nodes/description_node.py @@ -31,12 +31,11 @@ def __init__( input: str, output: List[str], node_config: Optional[dict] = None, - node_name: str = "RAG", + node_name: str = "DESCRIPTION", ): super().__init__(node_name, "node", input, output, 2, node_config) self.llm_model = node_config["llm_model"] - self.embedder_model = node_config.get("embedder_model", None) self.verbose = ( False if node_config is None else node_config.get("verbose", False) )