-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
401 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,394 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "aw_lp5wfXRSl" | ||
}, | ||
"source": [ | ||
"# JSON-LD Demonstration\n", | ||
"\n", | ||
"In this notebook, we will explore the principles of JSON-LD using the example of a person. JSON-LD stands for \"JSON for Linking Data\" and it provides a method to enrich your JSON data with semantics.\n", | ||
"\n", | ||
"An operational version of this notebook can be accessed [here](https://colab.research.google.com/drive/14XqRJPWs07RUQgZmDZEu3yb2m1xGvxEQ?usp=sharing)." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "y-aalpcNb223", | ||
"outputId": "565a219c-1d10-4b55-fd75-a1fcb2257022" | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Requirement already satisfied: jsonschema in /usr/local/lib/python3.10/dist-packages (4.19.0)\n", | ||
"Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema) (23.1.0)\n", | ||
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema) (2023.7.1)\n", | ||
"Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema) (0.30.2)\n", | ||
"Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema) (0.10.2)\n", | ||
"Collecting rdflib\n", | ||
" Downloading rdflib-7.0.0-py3-none-any.whl (531 kB)\n", | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m531.9/531.9 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[?25hCollecting isodate<0.7.0,>=0.6.0 (from rdflib)\n", | ||
" Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)\n", | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.7/41.7 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[?25hRequirement already satisfied: pyparsing<4,>=2.1.0 in /usr/local/lib/python3.10/dist-packages (from rdflib) (3.1.1)\n", | ||
"Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from isodate<0.7.0,>=0.6.0->rdflib) (1.16.0)\n", | ||
"Installing collected packages: isodate, rdflib\n", | ||
"Successfully installed isodate-0.6.1 rdflib-7.0.0\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# Install the required library for JSON schema validation\n", | ||
"!pip install jsonschema\n", | ||
"!pip install rdflib" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "tNjkjGNEXJXP" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import jsonschema\n", | ||
"from jsonschema import validate\n", | ||
"import json\n", | ||
"import rdflib\n", | ||
"\n", | ||
"# Regular JSON representation of a person\n", | ||
"person_data = {\n", | ||
" \"@context\": {\n", | ||
" \"schema\": \"https://schema.org/\",\n", | ||
" \"firstName\": \"schema:givenName\",\n", | ||
" \"lastName\": \"schema:lastName\",\n", | ||
" \"birthdate\": \"schema:birthDate\",\n", | ||
" \"institute\": \"schema:affiliation\",\n", | ||
" \"name\": \"schema:name\",\n", | ||
" \"street\": \"schema:streetAddress\",\n", | ||
" \"city\": \"schema:locality\",\n", | ||
" \"zip\": \"schema:postalCode\"\n", | ||
" },\n", | ||
" \"@id\": \"https://www.example.com/SimonClark\",\n", | ||
" \"@type\": \"schema:Person\",\n", | ||
" \"firstName\": \"Simon\",\n", | ||
" \"lastName\": \"Clark\",\n", | ||
" \"birthdate\": \"1987-04-23\",\n", | ||
" \"institute\": {\n", | ||
" \"@id\": \"https://www.example.com/SINTEF\",\n", | ||
" \"@type\": \"schema:ResearchOrganization\",\n", | ||
" \"name\": \"SINTEF\",\n", | ||
" \"street\": \"Strindvegen 4\",\n", | ||
" \"city\": \"Trondheim\",\n", | ||
" \"zip\": \"7034\"\n", | ||
" }\n", | ||
"}\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "K221MAoHa3Nx" | ||
}, | ||
"source": [ | ||
"In the JSON-LD example, we added an `@context` that maps terms in our JSON data to their semantic meanings, using URIs (typically from established vocabularies, like schema.org). This allows machines to understand the semantics behind the data.\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "ZmQfOeFyieo9" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# Regular JSON representation of a person\n", | ||
"person_data = {\n", | ||
" \"@context\": \"https://schema.org/\",\n", | ||
" \"@id\": \"https://orcid.org/0000-0002-8758-6109\",\n", | ||
" \"@type\": \"Person\",\n", | ||
" \"firstName\": \"Simon\",\n", | ||
" \"lastName\": \"Clark\",\n", | ||
" \"gender\": {\"@type\": \"Male\"},\n", | ||
" \"birthDate\": \"1987-04-23\",\n", | ||
" \"affiliation\": {\n", | ||
" \"@id\": \"https://ror.org/01f677e56\",\n", | ||
" \"@type\": \"ResearchOrganization\"\n", | ||
" }\n", | ||
"}\n", | ||
"\n", | ||
"# email to: simon.clark@sintef.no" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "HDHJuV62bTby", | ||
"outputId": "d5537f6e-4325-40ef-dfa4-c7d55159623f" | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"JSON data is valid according to the schema.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"person_schema = {\n", | ||
" \"type\": \"object\",\n", | ||
" \"properties\": {\n", | ||
" \"firstName\": {\n", | ||
" \"type\": \"string\"\n", | ||
" },\n", | ||
" \"lastName\": {\n", | ||
" \"type\": \"string\",\n", | ||
" \"minLength\": 1\n", | ||
" },\n", | ||
" \"birthDate\": { # Replacing age with birthdate\n", | ||
" \"type\": \"string\",\n", | ||
" \"format\": \"date\",\n", | ||
" \"pattern\": \"^[0-9]{4}-[0-1][0-9]-[0-3][0-9]$\"\n", | ||
" },\n", | ||
" \"gender\": {\n", | ||
" \"type\": \"object\"\n", | ||
" },\n", | ||
" \"affiliation\": {\n", | ||
" \"type\": \"object\"\n", | ||
" }\n", | ||
" },\n", | ||
" \"required\": [\"firstName\", \"lastName\", \"birthDate\", \"affiliation\"] # Updated age to birthdate\n", | ||
"}\n", | ||
"\n", | ||
"# Function to validate JSON data against the schema\n", | ||
"def validate_json(data, schema):\n", | ||
" try:\n", | ||
" validate(instance=data, schema=schema)\n", | ||
" return True, \"JSON data is valid according to the schema.\"\n", | ||
" except jsonschema.exceptions.ValidationError as ve:\n", | ||
" return False, ve.message\n", | ||
"\n", | ||
"# Validate the sample JSON data\n", | ||
"is_valid, message = validate_json(person_data, person_schema)\n", | ||
"print(message)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "R-mO4FGtbr-L", | ||
"outputId": "c0ae5e87-6ade-4ef8-a639-02bf287071a9" | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"(rdflib.term.URIRef('http://schema.org/Organization'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/PerformingGroup'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/TheaterGroup'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/MusicGroup'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/DanceGroup'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/OnlineBusiness'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/OnlineStore'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/SportsOrganization'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/SportsTeam'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/Airline'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/SearchRescueOrganization'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/FundingScheme'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/NewsMediaOrganization'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/EducationalOrganization'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/CollegeOrUniversity'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/HighSchool'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/Preschool'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/ElementarySchool'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/MiddleSchool'),)\n", | ||
"(rdflib.term.URIRef('http://schema.org/School'),)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# Create a new graph\n", | ||
"g = rdflib.Graph()\n", | ||
"\n", | ||
"# Load schema.org vocabulary into the graph\n", | ||
"g.parse(\"https://schema.org/version/latest/schemaorg-current-http.jsonld\", format=\"json-ld\")\n", | ||
"\n", | ||
"person_data_str = json.dumps(person_data)\n", | ||
"g.parse(data=person_data_str, format=\"json-ld\")\n", | ||
"\n", | ||
"# Define and execute a SPARQL query for all instances of Organization\n", | ||
"sparql_query = \"\"\"\n", | ||
"PREFIX schema: <http://schema.org/>\n", | ||
"SELECT DISTINCT ?type WHERE {\n", | ||
" ?type rdfs:subClassOf* schema:Organization .\n", | ||
"}\n", | ||
"LIMIT 20\n", | ||
"\"\"\"\n", | ||
"\n", | ||
"# Execute the SPARQL query\n", | ||
"results = g.query(sparql_query)\n", | ||
"\n", | ||
"# Print the results\n", | ||
"for row in results:\n", | ||
" print(row)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "h4vvw5hNhih2", | ||
"outputId": "77828fc9-bafe-414b-f6f6-65fb117aabc5" | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"https://ror.org/01f677e56\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# Define and execute a SPARQL query for all instances of Organization\n", | ||
"sparql_query = \"\"\"\n", | ||
"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n", | ||
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n", | ||
"PREFIX schema: <http://schema.org/>\n", | ||
"\n", | ||
"SELECT ?instance WHERE {\n", | ||
" ?subclass rdfs:subClassOf* schema:Organization .\n", | ||
" ?instance rdf:type ?subclass .\n", | ||
"}\n", | ||
"LIMIT 10\n", | ||
"\"\"\"\n", | ||
"\n", | ||
"# Execute the SPARQL query\n", | ||
"results = g.query(sparql_query)\n", | ||
"\n", | ||
"# Print the results\n", | ||
"for row in results:\n", | ||
" print(row[0])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "e_-Qa3Hgh2kg", | ||
"outputId": "2b9fafa4-fca4-426f-e768-6ef561921b14" | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"1\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# Define and execute a SPARQL query for all instances of Organization\n", | ||
"sparql_query = \"\"\"\n", | ||
"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n", | ||
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n", | ||
"PREFIX schema: <http://schema.org/>\n", | ||
"\n", | ||
"SELECT (COUNT(?subject) AS ?numMales) WHERE {\n", | ||
" ?subject rdf:type schema:Person .\n", | ||
" ?subject schema:gender ?gender .\n", | ||
" ?gender rdf:type schema:Male .\n", | ||
"}\n", | ||
"LIMIT 10\n", | ||
"\"\"\"\n", | ||
"\n", | ||
"# Execute the SPARQL query\n", | ||
"results = g.query(sparql_query)\n", | ||
"\n", | ||
"# Print the results\n", | ||
"for row in results:\n", | ||
" print(row.numMales)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "HJfpCj4-sww3", | ||
"outputId": "c4ba6b30-7d7e-45d9-c3e4-b0d311e0fdd0" | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"1987-04-23\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# Define and execute a SPARQL query for all instances of Organization\n", | ||
"sparql_query = \"\"\"\n", | ||
"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n", | ||
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n", | ||
"PREFIX schema: <http://schema.org/>\n", | ||
"\n", | ||
"SELECT ?bday WHERE {\n", | ||
" ?subject rdf:type schema:Person .\n", | ||
" ?subject schema:birthDate ?bday .\n", | ||
"}\n", | ||
"LIMIT 10\n", | ||
"\"\"\"\n", | ||
"\n", | ||
"# Execute the SPARQL query\n", | ||
"results = g.query(sparql_query)\n", | ||
"\n", | ||
"# Print the results\n", | ||
"for row in results:\n", | ||
" print(row[0])" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"colab": { | ||
"provenance": [] | ||
}, | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"name": "python" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
Oops, something went wrong.