Skip to content

Commit

Permalink
Merge pull request #134 from RDFLib/edmond/feat/vocabs-api
Browse files Browse the repository at this point in the history
Add new vocabs API to load large vocabularies incrementally
  • Loading branch information
edmondchuc authored Aug 3, 2023
2 parents 580618e + 01a1854 commit 6a47135
Show file tree
Hide file tree
Showing 37 changed files with 1,637 additions and 424 deletions.
27 changes: 14 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,19 +84,20 @@ via python-dotenv, or directly in the environment in which Prez is run. The envi
instantiate a Pydantic `Settings` object which is used throughout Prez to configure its behaviour. To see how prez
interprets/uses these environment variables see the `prez/config.py` file.

| Environment Variable | Description |
|-------------------|-----------------------------------------------------------------------------------------------------------------------------------|
| SPARQL_ENDPOINT | Read-only SPARQL endpoint for SpacePrez |
| SPARQL_USERNAME | A username for Basic Auth against the SPARQL endpoint, if required by the SPARQL endpoint. |
| SPARQL_PASSWORD | A password for Basic Auth against the SPARQL endpoint, if required by the SPARQL endpoint. |
| PROTOCOL | The protocol used to deliver Prez. Usually 'http'. |
| HOST | The host on which to server prez, typically 'localhost'. |
| PORT | The port Prez is made accessible on. Default is 8000, could be 80 or anything else that your system has permission to use |
| SYSTEM_URI | Documentation property. An IRI for the Prez system as a whole. This value appears in the landing page RDF delivered by Prez ('/') |
| LOG_LEVEL | One of CRITICAL, ERROR, WARNING, INFO, DEBUG. Defaults to INFO. |
| LOG_OUTPUT | "file", "stdout", or "both" ("file" and "stdout"). Defaults to stdout. |
| PREZ_TITLE | The title to use for Prez instance |
| PREZ_DESC | A description to use for the Prez instance |
| Environment Variable | Description |
|---------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| SPARQL_ENDPOINT | Read-only SPARQL endpoint for SpacePrez |
| SPARQL_USERNAME | A username for Basic Auth against the SPARQL endpoint, if required by the SPARQL endpoint. |
| SPARQL_PASSWORD | A password for Basic Auth against the SPARQL endpoint, if required by the SPARQL endpoint. |
| PROTOCOL | The protocol used to deliver Prez. Usually 'http'. |
| HOST | The host on which to server prez, typically 'localhost'. |
| PORT | The port Prez is made accessible on. Default is 8000, could be 80 or anything else that your system has permission to use |
| SYSTEM_URI | Documentation property. An IRI for the Prez system as a whole. This value appears in the landing page RDF delivered by Prez ('/') |
| LOG_LEVEL | One of CRITICAL, ERROR, WARNING, INFO, DEBUG. Defaults to INFO. |
| LOG_OUTPUT | "file", "stdout", or "both" ("file" and "stdout"). Defaults to stdout. |
| PREZ_TITLE | The title to use for Prez instance |
| PREZ_DESC | A description to use for the Prez instance |
| DISABLE_PREFIX_GENERATION | Default value is `false`. Very large datasets may want to disable this setting and provide a predefined set of prefixes for namespaces as described in [Link Generation](README-Dev.md#link-generation). |

### Running in a Container

Expand Down
2 changes: 2 additions & 0 deletions prez/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from prez.routers.spaceprez import router as spaceprez_router
from prez.routers.sparql import router as sparql_router
from prez.routers.vocprez import router as vocprez_router
from prez.routers.curie import router as curie_router
from prez.services.app_service import healthcheck_sparql_endpoints, count_objects
from prez.services.app_service import populate_api_info, add_prefixes_to_prefix_graph
from prez.services.exception_catchers import (
Expand Down Expand Up @@ -60,6 +61,7 @@
app.include_router(catprez_router)
app.include_router(vocprez_router)
app.include_router(spaceprez_router)
app.include_router(curie_router)


@app.middleware("http")
Expand Down
23 changes: 23 additions & 0 deletions prez/bnode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from rdflib import Graph, URIRef, BNode


def get_bnode_depth(
graph: Graph, node: URIRef | BNode = None, depth: int = 0, seen: list[BNode] = None
) -> int:
"""Get the max blank node depth of the node in the graph.
This is a recursive function.
>>> graph = Graph().parse(...)
>>> depth = get_bnode_depth(graph, URIRef("node-name"))
"""
if seen is None:
seen = []

if isinstance(node, BNode) or depth == 0:
for o in graph.objects(node, None):
if isinstance(o, BNode) and o not in seen:
seen.append(o)
depth = get_bnode_depth(graph, o, depth + 1, seen)

return depth
1 change: 1 addition & 0 deletions prez/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class Settings(BaseSettings):
"Knowledge Graph data which can be subset according to information profiles."
)
prez_version: Optional[str]
disable_prefix_generation: bool = False

@root_validator()
def check_endpoint_enabled(cls, values):
Expand Down
5 changes: 4 additions & 1 deletion prez/models/profiles_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,8 @@ def populate(cls, values):
if len(r.bindings) > 0:
values["classes"] = frozenset([prof.get("class") for prof in r.bindings])

values["label"] = profiles_graph_cache.value(URIRef(values["uri"]), URIRef("http://www.w3.org/ns/dx/conneg/altr-ext#hasLabelPredicate"))
values["label"] = profiles_graph_cache.value(
URIRef(values["uri"]),
URIRef("http://www.w3.org/ns/dx/conneg/altr-ext#hasLabelPredicate"),
)
return values
5 changes: 2 additions & 3 deletions prez/models/vocprez_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

class VocabItem(BaseModel):
uri: Optional[URIRef] = None
classes: Optional[Set[URIRef]]
classes: Optional[frozenset[URIRef]]
curie_id: Optional[str] = None
general_class: Optional[URIRef] = None
scheme_curie: Optional[str] = None
Expand All @@ -29,7 +29,6 @@ def __hash__(self):
@root_validator
def populate(cls, values):
url_path = values.get("url_path")
uri = values.get("uri")
concept_curie = values.get("concept_curie")
scheme_curie = values.get("scheme_curie")
collection_curie = values.get("collection_curie")
Expand All @@ -38,7 +37,7 @@ def populate(cls, values):
return values
if url_path in ["/object", "/v/object"]:
values["link_constructor"] = f"/v/object?uri="
elif len(url_parts) == 5: # concepts
elif len(url_parts) == 5 and "/all" not in url_path: # concepts
values["general_class"] = SKOS.Concept
if scheme_curie:
values["curie_id"] = concept_curie
Expand Down
33 changes: 33 additions & 0 deletions prez/queries/object.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from textwrap import dedent

from jinja2 import Template


def object_inbound_query(iri: str, predicate: str) -> str:
query = Template(
"""
SELECT (COUNT(?iri) as ?count)
WHERE {
BIND(<{{ iri }}> as ?iri)
?other <{{ predicate }}> ?iri .
}
"""
).render(iri=iri, predicate=predicate)

return dedent(query)


def object_outbound_query(iri: str, predicate: str) -> str:
query = Template(
"""
SELECT (COUNT(?iri) as ?count)
WHERE {
BIND(<{{ iri }}> as ?iri)
?iri <{{ predicate }}> ?other .
}
"""
).render(iri=iri, predicate=predicate)

return dedent(query)
150 changes: 150 additions & 0 deletions prez/queries/vocprez.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
from textwrap import dedent

from jinja2 import Template


def get_concept_scheme_query(iri: str, bnode_depth: int) -> str:
query = Template(
"""
PREFIX prez: <https://prez.dev/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
CONSTRUCT {
?iri ?p ?o .
{% if bnode_depth > 0 +%}
?iri ?p0 ?o0 .
{% endif %}
{% for i in range(bnode_depth) %}
?o{{ i }} ?p{{ i + 1 }} ?o{{ i + 1 }} .
{% endfor %}
?iri prez:childrenCount ?childrenCount .
}
WHERE {
BIND(<{{ iri }}> as ?iri)
?iri ?p ?o .
FILTER (?p != skos:hasTopConcept)
{
SELECT (COUNT(?topConcept) AS ?childrenCount)
WHERE {
BIND(<{{ iri }}> as ?iri)
?iri skos:hasTopConcept ?topConcept .
}
}
{% if bnode_depth > 0 %}
?iri ?p0 ?o0 .
{% endif %}
{% for i in range(bnode_depth) %}
?o{{ i }} ?p{{ i + 1 }} ?o{{ i + 1 }} .
FILTER (isBlank(?o0))
{% endfor %}
}
"""
).render(iri=iri, bnode_depth=bnode_depth)

return dedent(query)


def get_concept_scheme_top_concepts_query(iri: str, page: int, per_page: int) -> str:
query = Template(
"""
PREFIX prez: <https://prez.dev/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
CONSTRUCT {
?concept skos:prefLabel ?label .
?concept prez:childrenCount ?narrowerChildrenCount .
?iri prez:childrenCount ?childrenCount .
?iri skos:hasTopConcept ?concept .
}
WHERE {
BIND(<{{ iri }}> as ?iri)
?iri skos:hasTopConcept ?concept .
?concept skos:prefLabel ?label .
{
SELECT (COUNT(?childConcept) AS ?childrenCount)
WHERE {
BIND(<{{ iri }}> as ?iri)
?iri skos:hasTopConcept ?childConcept .
}
}
{
SELECT ?concept ?label (COUNT(?narrowerConcept) AS ?narrowerChildrenCount)
WHERE {
BIND(<{{ iri }}> as ?iri)
?iri skos:hasTopConcept ?concept .
?concept skos:prefLabel ?label .
OPTIONAL {
?narrowerConcept skos:broader ?concept .
}
}
GROUP BY ?concept ?label
ORDER BY str(?label)
LIMIT {{ limit }}
OFFSET {{ offset }}
}
}
"""
).render(iri=iri, limit=per_page, offset=(page - 1) * per_page)

return dedent(query)


def get_concept_narrowers_query(iri: str, page: int, per_page: int) -> str:
query = Template(
"""
PREFIX prez: <https://prez.dev/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
CONSTRUCT {
?concept skos:prefLabel ?label .
?concept prez:childrenCount ?narrowerChildrenCount .
?iri prez:childrenCount ?childrenCount .
?iri skos:narrower ?concept .
}
WHERE {
BIND(<{{ iri }}> as ?iri)
?concept skos:broader ?iri .
?concept skos:prefLabel ?label .
{
SELECT (COUNT(?childConcept) AS ?childrenCount)
WHERE {
BIND(<{{ iri }}> as ?iri)
?childConcept skos:broader ?iri .
}
}
{
SELECT ?concept ?label (COUNT(?narrowerConcept) AS ?narrowerChildrenCount)
WHERE {
BIND(<{{ iri }}> as ?iri)
?concept skos:broader ?iri .
?concept skos:prefLabel ?label .
OPTIONAL {
?narrowerConcept skos:broader ?concept .
}
}
GROUP BY ?concept ?label
ORDER BY str(?label)
LIMIT {{ limit }}
OFFSET {{ offset }}
}
}
"""
).render(iri=iri, limit=per_page, offset=(page - 1) * per_page)

return dedent(query)
13 changes: 9 additions & 4 deletions prez/renderers/renderer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import io
import logging
from typing import Optional, Dict
from typing import Optional

from connegp import RDF_MEDIATYPES, RDF_SERIALIZER_TYPES_MAP
from fastapi.responses import StreamingResponse
Expand Down Expand Up @@ -28,7 +28,7 @@ async def return_from_queries(
mediatype,
profile,
profile_headers,
predicates_for_link_addition: Dict = {},
predicates_for_link_addition: dict = None,
):
"""
Executes SPARQL queries, loads these to RDFLib Graphs, and calls the "return_from_graph" function to return the
Expand All @@ -45,7 +45,7 @@ async def return_from_graph(
mediatype,
profile,
profile_headers,
predicates_for_link_addition: dict = {},
predicates_for_link_addition: dict = None,
):
profile_headers["Content-Disposition"] = "inline"
if str(mediatype) in RDF_MEDIATYPES:
Expand Down Expand Up @@ -88,7 +88,12 @@ async def return_annotated_rdf(
queries_for_uncached, annotations_graph = await get_annotation_properties(
graph, **profile_annotation_props
)
anots_from_triplestore = await queries_to_graph([queries_for_uncached])

if queries_for_uncached is None:
anots_from_triplestore = Graph()
else:
anots_from_triplestore = await queries_to_graph([queries_for_uncached])

if len(anots_from_triplestore) > 1:
annotations_graph += anots_from_triplestore
cache += anots_from_triplestore
Expand Down
12 changes: 12 additions & 0 deletions prez/response.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from fastapi.responses import StreamingResponse


class StreamingTurtleResponse(StreamingResponse):
media_type = "text/turtle"

def render(self, content: str) -> bytes:
return content.encode("utf-8")


class StreamingTurtleAnnotatedResponse(StreamingTurtleResponse):
media_type = "text/anot+turtle"
Loading

0 comments on commit 6a47135

Please sign in to comment.