Skip to content

Commit

Permalink
to json
Browse files Browse the repository at this point in the history
  • Loading branch information
ZanSara committed Sep 28, 2023
1 parent 4a45e87 commit 63cced4
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 9 deletions.
17 changes: 14 additions & 3 deletions e2e/preview/pipelines/test_extractive_qa_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,39 @@
import json

from haystack.preview import Pipeline, Document
from haystack.preview.document_stores import MemoryDocumentStore
from haystack.preview.components.retrievers import MemoryBM25Retriever
from haystack.preview.components.readers import ExtractiveReader


def test_extractive_qa_pipeline(tmp_path):
# Create the pipeline
qa_pipeline = Pipeline()
qa_pipeline.add_component(instance=MemoryBM25Retriever(document_store=MemoryDocumentStore()), name="retriever")
qa_pipeline.add_component(instance=ExtractiveReader(model_name_or_path="deepset/tinyroberta-squad2"), name="reader")
qa_pipeline.connect("retriever", "reader")

# Draw the pipeline
qa_pipeline.draw(tmp_path / "test_extractive_qa_pipeline.png")

# TODO write to JSON to make sure it's actually serializable
serialized_pipeline = qa_pipeline.to_dict()
qa_pipeline = Pipeline.from_dict(serialized_pipeline)
# Serialize the pipeline to JSON
with open(tmp_path / "test_bm25_rag_pipeline.json", "w") as f:
print(json.dumps(qa_pipeline.to_dict(), indent=4))
json.dump(qa_pipeline.to_dict(), f)

# Load the pipeline back
with open(tmp_path / "test_bm25_rag_pipeline.json", "r") as f:
qa_pipeline = Pipeline.from_dict(json.load(f))

# Populate the document store
documents = [
Document(text="My name is Jean and I live in Paris."),
Document(text="My name is Mark and I live in Berlin."),
Document(text="My name is Giorgio and I live in Rome."),
]
qa_pipeline.get_component("retriever").document_store.write_documents(documents)

# Query and assert
questions = ["Who lives in Paris?", "Who lives in Berlin?", "Who lives in Rome?"]
answers_spywords = ["Jean", "Mark", "Giorgio"]

Expand Down
29 changes: 23 additions & 6 deletions e2e/preview/pipelines/test_rag_pipelines.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import json
import pytest

from haystack.preview import Pipeline, Document
Expand All @@ -16,6 +17,7 @@
reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
)
def test_bm25_rag_pipeline(tmp_path):
# Create the RAG pipeline
prompt_template = """
Given these documents, answer the question.\nDocuments:
{% for doc in documents %}
Expand All @@ -36,19 +38,26 @@ def test_bm25_rag_pipeline(tmp_path):
rag_pipeline.connect("llm.metadata", "answer_builder.metadata")
rag_pipeline.connect("retriever", "answer_builder.documents")

# Draw the pipeline
rag_pipeline.draw(tmp_path / "test_bm25_rag_pipeline.png")

# TODO write to JSON to make sure it's actually serializable
serialized_pipeline = rag_pipeline.to_dict()
rag_pipeline = Pipeline.from_dict(serialized_pipeline)
# Serialize the pipeline to JSON
with open(tmp_path / "test_bm25_rag_pipeline.json", "w") as f:
json.dump(rag_pipeline.to_dict(), f)

# Load the pipeline back
with open(tmp_path / "test_bm25_rag_pipeline.json", "r") as f:
rag_pipeline = Pipeline.from_dict(json.load(f))

# Populate the document store
documents = [
Document(text="My name is Jean and I live in Paris."),
Document(text="My name is Mark and I live in Berlin."),
Document(text="My name is Giorgio and I live in Rome."),
]
rag_pipeline.get_component("retriever").document_store.write_documents(documents)

# Query and assert
questions = ["Who lives in Paris?", "Who lives in Berlin?", "Who lives in Rome?"]
answers_spywords = ["Jean", "Mark", "Giorgio"]

Expand All @@ -74,6 +83,7 @@ def test_bm25_rag_pipeline(tmp_path):
reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
)
def test_embedding_retrieval_rag_pipeline(tmp_path):
# Create the RAG pipeline
prompt_template = """
Given these documents, answer the question.\nDocuments:
{% for doc in documents %}
Expand Down Expand Up @@ -101,12 +111,18 @@ def test_embedding_retrieval_rag_pipeline(tmp_path):
rag_pipeline.connect("llm.metadata", "answer_builder.metadata")
rag_pipeline.connect("retriever", "answer_builder.documents")

# Draw the pipeline
rag_pipeline.draw(tmp_path / "test_embedding_rag_pipeline.png")

# TODO write to JSON to make sure it's actually serializable
serialized_pipeline = rag_pipeline.to_dict()
rag_pipeline = Pipeline.from_dict(serialized_pipeline)
# Serialize the pipeline to JSON
with open(tmp_path / "test_bm25_rag_pipeline.json", "w") as f:
json.dump(rag_pipeline.to_dict(), f)

# Load the pipeline back
with open(tmp_path / "test_bm25_rag_pipeline.json", "r") as f:
rag_pipeline = Pipeline.from_dict(json.load(f))

# Populate the document store
documents = [
Document(text="My name is Jean and I live in Paris."),
Document(text="My name is Mark and I live in Berlin."),
Expand All @@ -122,6 +138,7 @@ def test_embedding_retrieval_rag_pipeline(tmp_path):
indexing_pipeline.connect("document_embedder", "document_writer")
indexing_pipeline.run({"document_embedder": {"documents": documents}})

# Query and assert
questions = ["Who lives in Paris?", "Who lives in Berlin?", "Who lives in Rome?"]
answers_spywords = ["Jean", "Mark", "Giorgio"]

Expand Down

0 comments on commit 63cced4

Please sign in to comment.