diff --git a/e2e/preview/pipelines/test_extractive_qa_pipeline.py b/e2e/preview/pipelines/test_extractive_qa_pipeline.py index 7c8c10f898..b7ee11a0e5 100644 --- a/e2e/preview/pipelines/test_extractive_qa_pipeline.py +++ b/e2e/preview/pipelines/test_extractive_qa_pipeline.py @@ -1,3 +1,5 @@ +import json + from haystack.preview import Pipeline, Document from haystack.preview.document_stores import MemoryDocumentStore from haystack.preview.components.retrievers import MemoryBM25Retriever @@ -5,17 +7,25 @@ def test_extractive_qa_pipeline(tmp_path): + # Create the pipeline qa_pipeline = Pipeline() qa_pipeline.add_component(instance=MemoryBM25Retriever(document_store=MemoryDocumentStore()), name="retriever") qa_pipeline.add_component(instance=ExtractiveReader(model_name_or_path="deepset/tinyroberta-squad2"), name="reader") qa_pipeline.connect("retriever", "reader") + # Draw the pipeline qa_pipeline.draw(tmp_path / "test_extractive_qa_pipeline.png") - # TODO write to JSON to make sure it's actually serializable - serialized_pipeline = qa_pipeline.to_dict() - qa_pipeline = Pipeline.from_dict(serialized_pipeline) + # Serialize the pipeline to JSON + with open(tmp_path / "test_bm25_rag_pipeline.json", "w") as f: + print(json.dumps(qa_pipeline.to_dict(), indent=4)) + json.dump(qa_pipeline.to_dict(), f) + + # Load the pipeline back + with open(tmp_path / "test_bm25_rag_pipeline.json", "r") as f: + qa_pipeline = Pipeline.from_dict(json.load(f)) + # Populate the document store documents = [ Document(text="My name is Jean and I live in Paris."), Document(text="My name is Mark and I live in Berlin."), @@ -23,6 +33,7 @@ def test_extractive_qa_pipeline(tmp_path): ] qa_pipeline.get_component("retriever").document_store.write_documents(documents) + # Query and assert questions = ["Who lives in Paris?", "Who lives in Berlin?", "Who lives in Rome?"] answers_spywords = ["Jean", "Mark", "Giorgio"] diff --git a/e2e/preview/pipelines/test_rag_pipelines.py b/e2e/preview/pipelines/test_rag_pipelines.py index 7ac6e268c9..23a7972245 100644 --- a/e2e/preview/pipelines/test_rag_pipelines.py +++ b/e2e/preview/pipelines/test_rag_pipelines.py @@ -1,4 +1,5 @@ import os +import json import pytest from haystack.preview import Pipeline, Document @@ -16,6 +17,7 @@ reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", ) def test_bm25_rag_pipeline(tmp_path): + # Create the RAG pipeline prompt_template = """ Given these documents, answer the question.\nDocuments: {% for doc in documents %} @@ -36,12 +38,18 @@ def test_bm25_rag_pipeline(tmp_path): rag_pipeline.connect("llm.metadata", "answer_builder.metadata") rag_pipeline.connect("retriever", "answer_builder.documents") + # Draw the pipeline rag_pipeline.draw(tmp_path / "test_bm25_rag_pipeline.png") - # TODO write to JSON to make sure it's actually serializable - serialized_pipeline = rag_pipeline.to_dict() - rag_pipeline = Pipeline.from_dict(serialized_pipeline) + # Serialize the pipeline to JSON + with open(tmp_path / "test_bm25_rag_pipeline.json", "w") as f: + json.dump(rag_pipeline.to_dict(), f) + # Load the pipeline back + with open(tmp_path / "test_bm25_rag_pipeline.json", "r") as f: + rag_pipeline = Pipeline.from_dict(json.load(f)) + + # Populate the document store documents = [ Document(text="My name is Jean and I live in Paris."), Document(text="My name is Mark and I live in Berlin."), @@ -49,6 +57,7 @@ def test_bm25_rag_pipeline(tmp_path): ] rag_pipeline.get_component("retriever").document_store.write_documents(documents) + # Query and assert questions = ["Who lives in Paris?", "Who lives in Berlin?", "Who lives in Rome?"] answers_spywords = ["Jean", "Mark", "Giorgio"] @@ -74,6 +83,7 @@ def test_bm25_rag_pipeline(tmp_path): reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", ) def test_embedding_retrieval_rag_pipeline(tmp_path): + # Create the RAG pipeline prompt_template = """ Given these documents, answer the question.\nDocuments: {% for doc in documents %} @@ -101,12 +111,18 @@ def test_embedding_retrieval_rag_pipeline(tmp_path): rag_pipeline.connect("llm.metadata", "answer_builder.metadata") rag_pipeline.connect("retriever", "answer_builder.documents") + # Draw the pipeline rag_pipeline.draw(tmp_path / "test_embedding_rag_pipeline.png") - # TODO write to JSON to make sure it's actually serializable - serialized_pipeline = rag_pipeline.to_dict() - rag_pipeline = Pipeline.from_dict(serialized_pipeline) + # Serialize the pipeline to JSON + with open(tmp_path / "test_bm25_rag_pipeline.json", "w") as f: + json.dump(rag_pipeline.to_dict(), f) + + # Load the pipeline back + with open(tmp_path / "test_bm25_rag_pipeline.json", "r") as f: + rag_pipeline = Pipeline.from_dict(json.load(f)) + # Populate the document store documents = [ Document(text="My name is Jean and I live in Paris."), Document(text="My name is Mark and I live in Berlin."), @@ -122,6 +138,7 @@ def test_embedding_retrieval_rag_pipeline(tmp_path): indexing_pipeline.connect("document_embedder", "document_writer") indexing_pipeline.run({"document_embedder": {"documents": documents}}) + # Query and assert questions = ["Who lives in Paris?", "Who lives in Berlin?", "Who lives in Rome?"] answers_spywords = ["Jean", "Mark", "Giorgio"]