to json

deepset-ai · Sep 28, 2023 · 63cced4 · 63cced4
1 parent 4a45e87
commit 63cced4
Show file tree

Hide file tree

Showing 2 changed files with 37 additions and 9 deletions.
diff --git a/e2e/preview/pipelines/test_extractive_qa_pipeline.py b/e2e/preview/pipelines/test_extractive_qa_pipeline.py
@@ -1,28 +1,39 @@
+import json
+
 from haystack.preview import Pipeline, Document
 from haystack.preview.document_stores import MemoryDocumentStore
 from haystack.preview.components.retrievers import MemoryBM25Retriever
 from haystack.preview.components.readers import ExtractiveReader
 
 
 def test_extractive_qa_pipeline(tmp_path):
+    # Create the pipeline
     qa_pipeline = Pipeline()
     qa_pipeline.add_component(instance=MemoryBM25Retriever(document_store=MemoryDocumentStore()), name="retriever")
     qa_pipeline.add_component(instance=ExtractiveReader(model_name_or_path="deepset/tinyroberta-squad2"), name="reader")
     qa_pipeline.connect("retriever", "reader")
 
+    # Draw the pipeline
     qa_pipeline.draw(tmp_path / "test_extractive_qa_pipeline.png")
 
-    # TODO write to JSON to make sure it's actually serializable
-    serialized_pipeline = qa_pipeline.to_dict()
-    qa_pipeline = Pipeline.from_dict(serialized_pipeline)
+    # Serialize the pipeline to JSON
+    with open(tmp_path / "test_bm25_rag_pipeline.json", "w") as f:
+        print(json.dumps(qa_pipeline.to_dict(), indent=4))
+        json.dump(qa_pipeline.to_dict(), f)
+
+    # Load the pipeline back
+    with open(tmp_path / "test_bm25_rag_pipeline.json", "r") as f:
+        qa_pipeline = Pipeline.from_dict(json.load(f))
 
+    # Populate the document store
     documents = [
         Document(text="My name is Jean and I live in Paris."),
         Document(text="My name is Mark and I live in Berlin."),
         Document(text="My name is Giorgio and I live in Rome."),
     ]
     qa_pipeline.get_component("retriever").document_store.write_documents(documents)
 
+    # Query and assert
     questions = ["Who lives in Paris?", "Who lives in Berlin?", "Who lives in Rome?"]
     answers_spywords = ["Jean", "Mark", "Giorgio"]
 

diff --git a/e2e/preview/pipelines/test_rag_pipelines.py b/e2e/preview/pipelines/test_rag_pipelines.py
@@ -1,4 +1,5 @@
 import os
+import json
 import pytest
 
 from haystack.preview import Pipeline, Document
@@ -16,6 +17,7 @@
     reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
 )
 def test_bm25_rag_pipeline(tmp_path):
+    # Create the RAG pipeline
     prompt_template = """
     Given these documents, answer the question.\nDocuments:
     {% for doc in documents %}
@@ -36,19 +38,26 @@ def test_bm25_rag_pipeline(tmp_path):
     rag_pipeline.connect("llm.metadata", "answer_builder.metadata")
     rag_pipeline.connect("retriever", "answer_builder.documents")
 
+    # Draw the pipeline
     rag_pipeline.draw(tmp_path / "test_bm25_rag_pipeline.png")
 
-    # TODO write to JSON to make sure it's actually serializable
-    serialized_pipeline = rag_pipeline.to_dict()
-    rag_pipeline = Pipeline.from_dict(serialized_pipeline)
+    # Serialize the pipeline to JSON
+    with open(tmp_path / "test_bm25_rag_pipeline.json", "w") as f:
+        json.dump(rag_pipeline.to_dict(), f)
 
+    # Load the pipeline back
+    with open(tmp_path / "test_bm25_rag_pipeline.json", "r") as f:
+        rag_pipeline = Pipeline.from_dict(json.load(f))
+
+    # Populate the document store
     documents = [
         Document(text="My name is Jean and I live in Paris."),
         Document(text="My name is Mark and I live in Berlin."),
         Document(text="My name is Giorgio and I live in Rome."),
     ]
     rag_pipeline.get_component("retriever").document_store.write_documents(documents)
 
+    # Query and assert
     questions = ["Who lives in Paris?", "Who lives in Berlin?", "Who lives in Rome?"]
     answers_spywords = ["Jean", "Mark", "Giorgio"]
 
@@ -74,6 +83,7 @@ def test_bm25_rag_pipeline(tmp_path):
     reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
 )
 def test_embedding_retrieval_rag_pipeline(tmp_path):
+    # Create the RAG pipeline
     prompt_template = """
     Given these documents, answer the question.\nDocuments:
     {% for doc in documents %}
@@ -101,12 +111,18 @@ def test_embedding_retrieval_rag_pipeline(tmp_path):
     rag_pipeline.connect("llm.metadata", "answer_builder.metadata")
     rag_pipeline.connect("retriever", "answer_builder.documents")
 
+    # Draw the pipeline
     rag_pipeline.draw(tmp_path / "test_embedding_rag_pipeline.png")
 
-    # TODO write to JSON to make sure it's actually serializable
-    serialized_pipeline = rag_pipeline.to_dict()
-    rag_pipeline = Pipeline.from_dict(serialized_pipeline)
+    # Serialize the pipeline to JSON
+    with open(tmp_path / "test_bm25_rag_pipeline.json", "w") as f:
+        json.dump(rag_pipeline.to_dict(), f)
+
+    # Load the pipeline back
+    with open(tmp_path / "test_bm25_rag_pipeline.json", "r") as f:
+        rag_pipeline = Pipeline.from_dict(json.load(f))
 
+    # Populate the document store
     documents = [
         Document(text="My name is Jean and I live in Paris."),
         Document(text="My name is Mark and I live in Berlin."),
@@ -122,6 +138,7 @@ def test_embedding_retrieval_rag_pipeline(tmp_path):
     indexing_pipeline.connect("document_embedder", "document_writer")
     indexing_pipeline.run({"document_embedder": {"documents": documents}})
 
+    # Query and assert
     questions = ["Who lives in Paris?", "Who lives in Berlin?", "Who lives in Rome?"]
     answers_spywords = ["Jean", "Mark", "Giorgio"]