Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

compute better filenames for returned rdf #248

Merged
merged 2 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 23 additions & 29 deletions prez/renderers/renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,18 @@
from fastapi import status
from fastapi.exceptions import HTTPException
from fastapi.responses import StreamingResponse
from rdflib import Graph, URIRef, Namespace, RDF
from rdflib import RDF, Graph, Namespace, URIRef
from starlette.requests import Request
from starlette.responses import Response

from prez.models.profiles_and_mediatypes import ProfilesMediatypesInfo
from prez.models.profiles_item import ProfileItem
from prez.renderers.csv_renderer import render_csv_dropdown
from prez.renderers.json_renderer import render_json_dropdown, NotFoundError
from prez.renderers.json_renderer import NotFoundError, render_json_dropdown
from prez.services.curie_functions import get_curie_id_for_uri
from prez.sparql.methods import Repo
from prez.sparql.objects_listings import (
generate_item_construct,
get_annotation_properties,
)
from prez.sparql.objects_listings import (generate_item_construct,
get_annotation_properties)

log = logging.getLogger(__name__)

Expand All @@ -33,52 +31,49 @@ async def return_from_graph(
selected_class: URIRef,
repo: Repo,
):
profile_headers["Content-Disposition"] = "inline"
# set content-disposition
profile_headers["Content-Disposition"] = (
"attachment;" if str(mediatype) == "text/csv" else "inline;"
)
iri = graph.value(None, RDF.type, selected_class)
if iri:
profile_headers[
"Content-Disposition"
] += f" filename={get_curie_id_for_uri(URIRef(str(iri)))}"
elif selected_class:
profile_headers[
"Content-Disposition"
] += f" filename={selected_class.split('#')[-1].split('/')[-1]}"

if str(mediatype) in RDF_MEDIATYPES:
return await return_rdf(graph, mediatype, profile_headers)

elif profile == URIRef("https://w3id.org/profile/dd"):
graph = await return_annotated_rdf(graph, profile, repo)

try:
# TODO: Currently, data is generated in memory, instead of in a streaming manner.
# Not possible to do a streaming response yet since we are reading the RDF
# data into an in-memory graph.
jsonld_data = await render_json_dropdown(graph, profile, selected_class)

if str(mediatype) == "text/csv":
iri = graph.value(None, RDF.type, selected_class)
if iri:
filename = get_curie_id_for_uri(URIRef(str(iri)))
else:
filename = selected_class.split("#")[-1].split("/")[-1]
stream = render_csv_dropdown(jsonld_data["@graph"])
response = StreamingResponse(stream, media_type=mediatype)
response.headers[
"Content-Disposition"
] = f"attachment;filename={filename}.csv"
return response

# application/json
stream = io.StringIO(json.dumps(jsonld_data))
return StreamingResponse(stream, media_type=mediatype)

else:
stream = io.StringIO(json.dumps(jsonld_data))
return StreamingResponse(
stream, media_type=mediatype, headers=profile_headers
)
except NotFoundError as err:
raise HTTPException(status.HTTP_404_NOT_FOUND, str(err))

else:
if "anot+" in mediatype:
non_anot_mediatype = mediatype.replace("anot+", "")
profile_headers['Content-Type'] = non_anot_mediatype
profile_headers["Content-Type"] = non_anot_mediatype
graph = await return_annotated_rdf(graph, profile, repo)
content = io.BytesIO(
graph.serialize(format=non_anot_mediatype, encoding="utf-8")
)
return StreamingResponse(
content=content, media_type=non_anot_mediatype, headers=profile_headers
)

raise HTTPException(
status.HTTP_400_BAD_REQUEST, f"Unsupported mediatype: {mediatype}."
)
Expand All @@ -91,7 +86,6 @@ async def return_rdf(graph, mediatype, profile_headers):
format=RDF_SERIALIZER_TYPES_MAP[str(mediatype)], encoding="utf-8"
)
)
profile_headers["Content-Disposition"] = "inline"
return StreamingResponse(content=obj, media_type=mediatype, headers=profile_headers)


Expand Down
133 changes: 133 additions & 0 deletions tests/test_pmt_headers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
"""test_pmt_headers

A set of tests to confirm that the Profile and Media Type information in the response headers are
as expected for object and listing endpoints.

Also checks the content-disposition header
"""

from pathlib import Path

import pytest
from fastapi.testclient import TestClient
from pyoxigraph.pyoxigraph import Store

from prez.app import assemble_app
from prez.dependencies import get_repo
from prez.services.curie_functions import get_curie_id_for_uri
from prez.sparql.methods import PyoxigraphRepo, Repo


@pytest.fixture(scope="session")
def test_store() -> Store:
# Create a new pyoxigraph Store
store = Store()

for file in Path(__file__).parent.glob("../tests/data/*/input/*.ttl"):
store.load(file.read_bytes(), "text/turtle")

return store


@pytest.fixture(scope="session")
def test_repo(test_store: Store) -> Repo:
# Create a PyoxigraphQuerySender using the test_store
return PyoxigraphRepo(test_store)


@pytest.fixture(scope="session")
def test_client(test_repo: Repo) -> TestClient:
# Override the dependency to use the test_repo
def override_get_repo():
return test_repo

app = assemble_app()

app.dependency_overrides[get_repo] = override_get_repo

with TestClient(app) as c:
yield c

# Remove the override to ensure subsequent tests are unaffected
app.dependency_overrides.clear()


@pytest.mark.parametrize(
"endpoint, mediatype, filename",
[
("/v/vocab", "text/turtle", "SchemesList"),
("/s/datasets", "text/turtle", "DatasetList"),
("/c/catalogs", "text/turtle", "CatalogList"),
("/v/vocab", "application/ld+json", "SchemesList"),
("/s/datasets", "application/ld+json", "DatasetList"),
("/c/catalogs", "application/ld+json", "CatalogList"),
],
)
def test_listing_endpoint(
endpoint: str, mediatype: str, filename: str, test_client: TestClient
):
"""Assert that response headers are returned correctly for a listing endpoint.

i.e that they specify the

- Content-Type, and
- Content-Disposition.

headers. And that the headers have an appropriate value.
"""
headers = {"accept": mediatype}
expected_headers = {
"content-type": mediatype,
"content-disposition": f"inline; filename={filename}",
}
response = test_client.get(endpoint, headers=headers)
assert all(
header in response.headers.keys() for header in expected_headers.keys()
), f"Response must specify the {expected_headers.keys()} headers."
assert all(
response.headers[header] == expected_headers[header]
for header in expected_headers.keys()
), "Required headers do not have the expected values."


@pytest.mark.parametrize(
"endpoint, mediatype, object_uri",
[
("/v/vocab", "text/turtle", "https://linked.data.gov.au/def/vocdermods"),
("/s/datasets", "text/turtle", "http://example.com/datasets/sandgate"),
("/c/catalogs", "text/turtle", "https://data.idnau.org/pid/democat"),
(
"/v/vocab",
"application/ld+json",
"https://linked.data.gov.au/def/vocdermods",
),
("/s/datasets", "application/ld+json", "http://example.com/datasets/sandgate"),
("/c/catalogs", "application/ld+json", "https://data.idnau.org/pid/democat"),
],
)
def test_object_endpoint(
endpoint: str, mediatype: str, object_uri: str, test_client: TestClient
):
"""Assert that response headers are returned correctly for an object endpoint.

i.e that they specify the

- Content-Type, and
- Content-Disposition.

headers. And that the headers have an appropriate value.
"""
curie = get_curie_id_for_uri(object_uri)
headers = {"accept": mediatype}
expected_headers = {
"content-type": mediatype,
"content-disposition": f"inline; filename={curie}",
}
response = test_client.get(endpoint + "/" + curie, headers=headers)
assert all(
header in response.headers.keys() for header in expected_headers.keys()
), f"Response must specify the {expected_headers.keys()} headers."
assert all(
response.headers[header] == expected_headers[header]
for header in expected_headers.keys()
), "Required headers do not have the expected values."
Loading