From b84883bfd12f4d1b4a0528e2c0503b649ea1e1fb Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Mon, 21 Oct 2024 09:39:17 +0200 Subject: [PATCH] add smartscraper lite --- .../anthropic/smart_scraper_lite_anthropic.py | 32 +++++++++++++ examples/azure/smart_scraper_lite_azure.py | 31 ++++++++++++ .../bedrock/smart_scraper_lite_bedrock.py | 26 ++++++++++ .../deepseek/smart_scraper_lite_deepseek.py | 31 ++++++++++++ examples/ernie/smart_scraper_lite_ernie.py | 31 ++++++++++++ .../fireworks/smart_scraper_lite_fireworks.py | 31 ++++++++++++ .../smart_scraper_lite_google_genai.py | 31 ++++++++++++ .../smart_scraper_lite_google_vertexai.py | 33 +++++++++++++ .../smart_scraper_multi_lite_vertex.py | 47 +++++++++++++++++++ examples/groq/smart_scraper_lite_groq.py | 31 ++++++++++++ .../smart_scraper_lite_huggingfacehub.py | 31 ++++++++++++ .../local_models/smart_scraper_lite_ollama.py | 30 ++++++++++++ .../mistral/smart_scraper_lite_mistral.py | 31 ++++++++++++ .../moonshot/smart_scraper_lite_moonshot.py | 31 ++++++++++++ .../nemotron/smart_scraper_lite_nemotron.py | 32 +++++++++++++ examples/oneapi/smart_scraper_lite_oneapi.py | 32 +++++++++++++ examples/openai/smart_scraper_lite_openai.py | 32 +++++++++++++ .../together/smart_scraper_lite_together.py | 1 + 18 files changed, 544 insertions(+) create mode 100644 examples/anthropic/smart_scraper_lite_anthropic.py create mode 100644 examples/azure/smart_scraper_lite_azure.py create mode 100644 examples/bedrock/smart_scraper_lite_bedrock.py create mode 100644 examples/deepseek/smart_scraper_lite_deepseek.py create mode 100644 examples/ernie/smart_scraper_lite_ernie.py create mode 100644 examples/fireworks/smart_scraper_lite_fireworks.py create mode 100644 examples/google_genai/smart_scraper_lite_google_genai.py create mode 100644 examples/google_vertexai/smart_scraper_lite_google_vertexai.py create mode 100644 examples/groq/smart_scraper_lite_groq.py create mode 100644 examples/huggingfacehub/smart_scraper_lite_huggingfacehub.py create mode 100644 examples/local_models/smart_scraper_lite_ollama.py create mode 100644 examples/mistral/smart_scraper_lite_mistral.py create mode 100644 examples/moonshot/smart_scraper_lite_moonshot.py create mode 100644 examples/nemotron/smart_scraper_lite_nemotron.py create mode 100644 examples/oneapi/smart_scraper_lite_oneapi.py create mode 100644 examples/openai/smart_scraper_lite_openai.py create mode 100644 examples/together/smart_scraper_lite_together.py diff --git a/examples/anthropic/smart_scraper_lite_anthropic.py b/examples/anthropic/smart_scraper_lite_anthropic.py new file mode 100644 index 00000000..698623c6 --- /dev/null +++ b/examples/anthropic/smart_scraper_lite_anthropic.py @@ -0,0 +1,32 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import SmartScraperLiteGraph +from scrapegraphai.utils import prettify_exec_info + +load_dotenv() + +graph_config = { + "llm": { + "api_key": os.getenv("ANTHROPIC_API_KEY"), + "model": "anthropic/claude-3-haiku-20240307", + }, + "verbose": True, + "headless": False, +} + +smart_scraper_lite_graph = SmartScraperLiteGraph( + prompt="Who is Marco Perini?", + source="https://perinim.github.io/", + config=graph_config +) + +result = smart_scraper_lite_graph.run() +print(json.dumps(result, indent=4)) + +graph_exec_info = smart_scraper_lite_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) + diff --git a/examples/azure/smart_scraper_lite_azure.py b/examples/azure/smart_scraper_lite_azure.py new file mode 100644 index 00000000..335c4832 --- /dev/null +++ b/examples/azure/smart_scraper_lite_azure.py @@ -0,0 +1,31 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import SmartScraperLiteGraph +from scrapegraphai.utils import prettify_exec_info + +load_dotenv() + +graph_config = { + "llm": { + "api_key": os.environ["AZURE_OPENAI_KEY"], + "model": "azure_openai/gpt-4o" + }, + "verbose": True, + "headless": False +} + +smart_scraper_lite_graph = SmartScraperLiteGraph( + prompt="Who is Marco Perini?", + source="https://perinim.github.io/", + config=graph_config +) + +result = smart_scraper_lite_graph.run() +print(json.dumps(result, indent=4)) + +graph_exec_info = smart_scraper_lite_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) diff --git a/examples/bedrock/smart_scraper_lite_bedrock.py b/examples/bedrock/smart_scraper_lite_bedrock.py new file mode 100644 index 00000000..2bf0471c --- /dev/null +++ b/examples/bedrock/smart_scraper_lite_bedrock.py @@ -0,0 +1,26 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +import json +from scrapegraphai.graphs import SmartScraperLiteGraph +from scrapegraphai.utils import prettify_exec_info + +graph_config = { + "llm": { + "client": "client_name", + "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0", + "temperature": 0.0 + } +} + +smart_scraper_lite_graph = SmartScraperLiteGraph( + prompt="Who is Marco Perini?", + source="https://perinim.github.io/", + config=graph_config +) + +result = smart_scraper_lite_graph.run() +print(json.dumps(result, indent=4)) + +graph_exec_info = smart_scraper_lite_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) diff --git a/examples/deepseek/smart_scraper_lite_deepseek.py b/examples/deepseek/smart_scraper_lite_deepseek.py new file mode 100644 index 00000000..a70d76b0 --- /dev/null +++ b/examples/deepseek/smart_scraper_lite_deepseek.py @@ -0,0 +1,31 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import SmartScraperLiteGraph +from scrapegraphai.utils import prettify_exec_info + +load_dotenv() + +graph_config = { + "llm": { + "api_key": os.getenv("DEEPSEEK_API_KEY"), + "model": "deepseek/deepseek-coder-33b-instruct", + }, + "verbose": True, + "headless": False, +} + +smart_scraper_lite_graph = SmartScraperLiteGraph( + prompt="Who is Marco Perini?", + source="https://perinim.github.io/", + config=graph_config +) + +result = smart_scraper_lite_graph.run() +print(json.dumps(result, indent=4)) + +graph_exec_info = smart_scraper_lite_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) diff --git a/examples/ernie/smart_scraper_lite_ernie.py b/examples/ernie/smart_scraper_lite_ernie.py new file mode 100644 index 00000000..5d3ba9d9 --- /dev/null +++ b/examples/ernie/smart_scraper_lite_ernie.py @@ -0,0 +1,31 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import SmartScraperLiteGraph +from scrapegraphai.utils import prettify_exec_info + +load_dotenv() + +graph_config = { + "llm": { + "api_key": os.getenv("ERNIE_API_KEY"), + "model": "ernie/ernie-bot-4", + }, + "verbose": True, + "headless": False, +} + +smart_scraper_lite_graph = SmartScraperLiteGraph( + prompt="Who is Marco Perini?", + source="https://perinim.github.io/", + config=graph_config +) + +result = smart_scraper_lite_graph.run() +print(json.dumps(result, indent=4)) + +graph_exec_info = smart_scraper_lite_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) diff --git a/examples/fireworks/smart_scraper_lite_fireworks.py b/examples/fireworks/smart_scraper_lite_fireworks.py new file mode 100644 index 00000000..6c9a7745 --- /dev/null +++ b/examples/fireworks/smart_scraper_lite_fireworks.py @@ -0,0 +1,31 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import SmartScraperLiteGraph +from scrapegraphai.utils import prettify_exec_info + +load_dotenv() + +graph_config = { + "llm": { + "api_key": os.getenv("FIREWORKS_API_KEY"), + "model": "fireworks/llama-v2-70b-chat", + }, + "verbose": True, + "headless": False, +} + +smart_scraper_lite_graph = SmartScraperLiteGraph( + prompt="Who is Marco Perini?", + source="https://perinim.github.io/", + config=graph_config +) + +result = smart_scraper_lite_graph.run() +print(json.dumps(result, indent=4)) + +graph_exec_info = smart_scraper_lite_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) diff --git a/examples/google_genai/smart_scraper_lite_google_genai.py b/examples/google_genai/smart_scraper_lite_google_genai.py new file mode 100644 index 00000000..9b776735 --- /dev/null +++ b/examples/google_genai/smart_scraper_lite_google_genai.py @@ -0,0 +1,31 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import SmartScraperLiteGraph +from scrapegraphai.utils import prettify_exec_info + +load_dotenv() + +graph_config = { + "llm": { + "api_key": os.getenv("GOOGLE_API_KEY"), + "model": "gemini-pro", + }, + "verbose": True, + "headless": False, +} + +smart_scraper_lite_graph = SmartScraperLiteGraph( + prompt="Who is Marco Perini?", + source="https://perinim.github.io/", + config=graph_config +) + +result = smart_scraper_lite_graph.run() +print(json.dumps(result, indent=4)) + +graph_exec_info = smart_scraper_lite_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) diff --git a/examples/google_vertexai/smart_scraper_lite_google_vertexai.py b/examples/google_vertexai/smart_scraper_lite_google_vertexai.py new file mode 100644 index 00000000..eca61bbb --- /dev/null +++ b/examples/google_vertexai/smart_scraper_lite_google_vertexai.py @@ -0,0 +1,33 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import SmartScraperLiteGraph +from scrapegraphai.utils import prettify_exec_info + +load_dotenv() + +graph_config = { + "llm": { + "project": os.getenv("GOOGLE_CLOUD_PROJECT"), + "location": "us-central1", + "model": "text-bison@001", + }, + "verbose": True, + "headless": False, +} + +smart_scraper_lite_graph = SmartScraperLiteGraph( + prompt="Who is Marco Perini?", + source="https://perinim.github.io/", + config=graph_config +) + +result = smart_scraper_lite_graph.run() +print(json.dumps(result, indent=4)) + +graph_exec_info = smart_scraper_lite_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) + diff --git a/examples/google_vertexai/smart_scraper_multi_lite_vertex.py b/examples/google_vertexai/smart_scraper_multi_lite_vertex.py index e69de29b..60ff3638 100644 --- a/examples/google_vertexai/smart_scraper_multi_lite_vertex.py +++ b/examples/google_vertexai/smart_scraper_multi_lite_vertex.py @@ -0,0 +1,47 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import SmartScraperMultiLiteGraph +from scrapegraphai.utils import prettify_exec_info + +load_dotenv() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +graph_config = { + "llm": { + "project": os.getenv("GOOGLE_CLOUD_PROJECT"), + "location": "us-central1", + "model": "text-bison@001", + }, + "verbose": True, + "headless": False, +} + +# ************************************************ +# Create the SmartScraperGraph instance and run it +# ************************************************ + +smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph( + prompt="Who is Marco Perini?", + source= [ + "https://perinim.github.io/", + "https://perinim.github.io/cv/" + ], + config=graph_config +) + +result = smart_scraper_multi_lite_graph.run() +print(json.dumps(result, indent=4)) + +# ************************************************ +# Get graph execution info +# ************************************************ + +graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) diff --git a/examples/groq/smart_scraper_lite_groq.py b/examples/groq/smart_scraper_lite_groq.py new file mode 100644 index 00000000..5fe6022f --- /dev/null +++ b/examples/groq/smart_scraper_lite_groq.py @@ -0,0 +1,31 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import SmartScraperLiteGraph +from scrapegraphai.utils import prettify_exec_info + +load_dotenv() + +graph_config = { + "llm": { + "api_key": os.getenv("GROQ_API_KEY"), + "model": "mixtral-8x7b-32768", + }, + "verbose": True, + "headless": False, +} + +smart_scraper_lite_graph = SmartScraperLiteGraph( + prompt="Who is Marco Perini?", + source="https://perinim.github.io/", + config=graph_config +) + +result = smart_scraper_lite_graph.run() +print(json.dumps(result, indent=4)) + +graph_exec_info = smart_scraper_lite_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) diff --git a/examples/huggingfacehub/smart_scraper_lite_huggingfacehub.py b/examples/huggingfacehub/smart_scraper_lite_huggingfacehub.py new file mode 100644 index 00000000..4faa8a47 --- /dev/null +++ b/examples/huggingfacehub/smart_scraper_lite_huggingfacehub.py @@ -0,0 +1,31 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import SmartScraperLiteGraph +from scrapegraphai.utils import prettify_exec_info + +load_dotenv() + +graph_config = { + "llm": { + "api_key": os.getenv("HUGGINGFACEHUB_API_TOKEN"), + "model": "huggingfacehub/meta-llama/Llama-2-70b-chat-hf", + }, + "verbose": True, + "headless": False, +} + +smart_scraper_lite_graph = SmartScraperLiteGraph( + prompt="Who is Marco Perini?", + source="https://perinim.github.io/", + config=graph_config +) + +result = smart_scraper_lite_graph.run() +print(json.dumps(result, indent=4)) + +graph_exec_info = smart_scraper_lite_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) diff --git a/examples/local_models/smart_scraper_lite_ollama.py b/examples/local_models/smart_scraper_lite_ollama.py new file mode 100644 index 00000000..2cf6c402 --- /dev/null +++ b/examples/local_models/smart_scraper_lite_ollama.py @@ -0,0 +1,30 @@ +""" +Basic example of scraping pipeline using SmartScraper + +""" +import json +from scrapegraphai.graphs import SmartScraperLiteGraph +from scrapegraphai.utils import prettify_exec_info + +graph_config = { + "llm": { + "model": "ollama/llama3.1", + "temperature": 0, + "format": "json", + "base_url": "http://localhost:11434", + }, + "verbose": True, + "headless": False +} + +smart_scraper_lite_graph = SmartScraperLiteGraph( + prompt="Who is Marco Perini?", + source="https://perinim.github.io/", + config=graph_config +) + +result = smart_scraper_lite_graph.run() +print(json.dumps(result, indent=4)) + +graph_exec_info = smart_scraper_lite_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) diff --git a/examples/mistral/smart_scraper_lite_mistral.py b/examples/mistral/smart_scraper_lite_mistral.py new file mode 100644 index 00000000..390371f9 --- /dev/null +++ b/examples/mistral/smart_scraper_lite_mistral.py @@ -0,0 +1,31 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import SmartScraperLiteGraph +from scrapegraphai.utils import prettify_exec_info + +load_dotenv() + +graph_config = { + "llm": { + "api_key": os.getenv("MISTRAL_API_KEY"), + "model": "mistral/mistral-medium", + }, + "verbose": True, + "headless": False, +} + +smart_scraper_lite_graph = SmartScraperLiteGraph( + prompt="Who is Marco Perini?", + source="https://perinim.github.io/", + config=graph_config +) + +result = smart_scraper_lite_graph.run() +print(json.dumps(result, indent=4)) + +graph_exec_info = smart_scraper_lite_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) diff --git a/examples/moonshot/smart_scraper_lite_moonshot.py b/examples/moonshot/smart_scraper_lite_moonshot.py new file mode 100644 index 00000000..509027fb --- /dev/null +++ b/examples/moonshot/smart_scraper_lite_moonshot.py @@ -0,0 +1,31 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import SmartScraperLiteGraph +from scrapegraphai.utils import prettify_exec_info + +load_dotenv() + +graph_config = { + "llm": { + "api_key": os.getenv("ANTHROPIC_API_KEY"), + "model": "anthropic/claude-3-haiku-20240307", + }, + "verbose": True, + "headless": False, +} + +smart_scraper_lite_graph = SmartScraperLiteGraph( + prompt="Who is Marco Perini?", + source="https://perinim.github.io/", + config=graph_config +) + +result = smart_scraper_lite_graph.run() +print(json.dumps(result, indent=4)) + +graph_exec_info = smart_scraper_lite_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) diff --git a/examples/nemotron/smart_scraper_lite_nemotron.py b/examples/nemotron/smart_scraper_lite_nemotron.py new file mode 100644 index 00000000..6c1d8528 --- /dev/null +++ b/examples/nemotron/smart_scraper_lite_nemotron.py @@ -0,0 +1,32 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import SmartScraperLiteGraph +from scrapegraphai.utils import prettify_exec_info + +load_dotenv() + +graph_config = { + "llm": { + "api_key": os.getenv("NEMOTRON_API_KEY"), + "model": "nemotron/nemotron-3.5-turbo", + "base_url": "http://127.0.0.1:3000/v1", + }, + "verbose": True, + "headless": False, +} + +smart_scraper_lite_graph = SmartScraperLiteGraph( + prompt="Who is Marco Perini?", + source="https://perinim.github.io/", + config=graph_config +) + +result = smart_scraper_lite_graph.run() +print(json.dumps(result, indent=4)) + +graph_exec_info = smart_scraper_lite_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) diff --git a/examples/oneapi/smart_scraper_lite_oneapi.py b/examples/oneapi/smart_scraper_lite_oneapi.py new file mode 100644 index 00000000..b271acb3 --- /dev/null +++ b/examples/oneapi/smart_scraper_lite_oneapi.py @@ -0,0 +1,32 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import SmartScraperLiteGraph +from scrapegraphai.utils import prettify_exec_info + +load_dotenv() + +graph_config = { + "llm": { + "api_key": os.getenv("ONEAPI_API_KEY"), + "model": "oneapi/gpt-3.5-turbo", + "base_url": "http://127.0.0.1:3000/v1", + }, + "verbose": True, + "headless": False, +} + +smart_scraper_lite_graph = SmartScraperLiteGraph( + prompt="Who is Marco Perini?", + source="https://perinim.github.io/", + config=graph_config +) + +result = smart_scraper_lite_graph.run() +print(json.dumps(result, indent=4)) + +graph_exec_info = smart_scraper_lite_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) diff --git a/examples/openai/smart_scraper_lite_openai.py b/examples/openai/smart_scraper_lite_openai.py new file mode 100644 index 00000000..5de725bb --- /dev/null +++ b/examples/openai/smart_scraper_lite_openai.py @@ -0,0 +1,32 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import SmartScraperLiteGraph +from scrapegraphai.utils import prettify_exec_info + +load_dotenv() + +graph_config = { + "llm": { + "api_key": os.getenv("OPENAI_API_KEY"), + "model": "openai/gpt-4o", + }, + "verbose": True, + "headless": False, +} + +smart_scraper_lite_graph = SmartScraperLiteGraph( + prompt="Who is Marco Perini?", + source="https://perinim.github.io/", + config=graph_config +) + +result = smart_scraper_lite_graph.run() +print(json.dumps(result, indent=4)) + +graph_exec_info = smart_scraper_lite_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) + diff --git a/examples/together/smart_scraper_lite_together.py b/examples/together/smart_scraper_lite_together.py new file mode 100644 index 00000000..0519ecba --- /dev/null +++ b/examples/together/smart_scraper_lite_together.py @@ -0,0 +1 @@ + \ No newline at end of file