Skip to content

Commit

Permalink
added example for ipynb
Browse files Browse the repository at this point in the history
  • Loading branch information
xavierr committed Jan 30, 2024
1 parent 6c96a53 commit b1341e5
Show file tree
Hide file tree
Showing 3 changed files with 401 additions and 1 deletion.
394 changes: 394 additions & 0 deletions example_person_jsonld_nb.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,394 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "aw_lp5wfXRSl"
},
"source": [
"# JSON-LD Demonstration\n",
"\n",
"In this notebook, we will explore the principles of JSON-LD using the example of a person. JSON-LD stands for \"JSON for Linking Data\" and it provides a method to enrich your JSON data with semantics.\n",
"\n",
"An operational version of this notebook can be accessed [here](https://colab.research.google.com/drive/14XqRJPWs07RUQgZmDZEu3yb2m1xGvxEQ?usp=sharing)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "y-aalpcNb223",
"outputId": "565a219c-1d10-4b55-fd75-a1fcb2257022"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: jsonschema in /usr/local/lib/python3.10/dist-packages (4.19.0)\n",
"Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema) (23.1.0)\n",
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema) (2023.7.1)\n",
"Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema) (0.30.2)\n",
"Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema) (0.10.2)\n",
"Collecting rdflib\n",
" Downloading rdflib-7.0.0-py3-none-any.whl (531 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m531.9/531.9 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting isodate<0.7.0,>=0.6.0 (from rdflib)\n",
" Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.7/41.7 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: pyparsing<4,>=2.1.0 in /usr/local/lib/python3.10/dist-packages (from rdflib) (3.1.1)\n",
"Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from isodate<0.7.0,>=0.6.0->rdflib) (1.16.0)\n",
"Installing collected packages: isodate, rdflib\n",
"Successfully installed isodate-0.6.1 rdflib-7.0.0\n"
]
}
],
"source": [
"# Install the required library for JSON schema validation\n",
"!pip install jsonschema\n",
"!pip install rdflib"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "tNjkjGNEXJXP"
},
"outputs": [],
"source": [
"import jsonschema\n",
"from jsonschema import validate\n",
"import json\n",
"import rdflib\n",
"\n",
"# Regular JSON representation of a person\n",
"person_data = {\n",
" \"@context\": {\n",
" \"schema\": \"https://schema.org/\",\n",
" \"firstName\": \"schema:givenName\",\n",
" \"lastName\": \"schema:lastName\",\n",
" \"birthdate\": \"schema:birthDate\",\n",
" \"institute\": \"schema:affiliation\",\n",
" \"name\": \"schema:name\",\n",
" \"street\": \"schema:streetAddress\",\n",
" \"city\": \"schema:locality\",\n",
" \"zip\": \"schema:postalCode\"\n",
" },\n",
" \"@id\": \"https://www.example.com/SimonClark\",\n",
" \"@type\": \"schema:Person\",\n",
" \"firstName\": \"Simon\",\n",
" \"lastName\": \"Clark\",\n",
" \"birthdate\": \"1987-04-23\",\n",
" \"institute\": {\n",
" \"@id\": \"https://www.example.com/SINTEF\",\n",
" \"@type\": \"schema:ResearchOrganization\",\n",
" \"name\": \"SINTEF\",\n",
" \"street\": \"Strindvegen 4\",\n",
" \"city\": \"Trondheim\",\n",
" \"zip\": \"7034\"\n",
" }\n",
"}\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "K221MAoHa3Nx"
},
"source": [
"In the JSON-LD example, we added an `@context` that maps terms in our JSON data to their semantic meanings, using URIs (typically from established vocabularies, like schema.org). This allows machines to understand the semantics behind the data.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ZmQfOeFyieo9"
},
"outputs": [],
"source": [
"# Regular JSON representation of a person\n",
"person_data = {\n",
" \"@context\": \"https://schema.org/\",\n",
" \"@id\": \"https://orcid.org/0000-0002-8758-6109\",\n",
" \"@type\": \"Person\",\n",
" \"firstName\": \"Simon\",\n",
" \"lastName\": \"Clark\",\n",
" \"gender\": {\"@type\": \"Male\"},\n",
" \"birthDate\": \"1987-04-23\",\n",
" \"affiliation\": {\n",
" \"@id\": \"https://ror.org/01f677e56\",\n",
" \"@type\": \"ResearchOrganization\"\n",
" }\n",
"}\n",
"\n",
"# email to: simon.clark@sintef.no"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "HDHJuV62bTby",
"outputId": "d5537f6e-4325-40ef-dfa4-c7d55159623f"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"JSON data is valid according to the schema.\n"
]
}
],
"source": [
"person_schema = {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"firstName\": {\n",
" \"type\": \"string\"\n",
" },\n",
" \"lastName\": {\n",
" \"type\": \"string\",\n",
" \"minLength\": 1\n",
" },\n",
" \"birthDate\": { # Replacing age with birthdate\n",
" \"type\": \"string\",\n",
" \"format\": \"date\",\n",
" \"pattern\": \"^[0-9]{4}-[0-1][0-9]-[0-3][0-9]$\"\n",
" },\n",
" \"gender\": {\n",
" \"type\": \"object\"\n",
" },\n",
" \"affiliation\": {\n",
" \"type\": \"object\"\n",
" }\n",
" },\n",
" \"required\": [\"firstName\", \"lastName\", \"birthDate\", \"affiliation\"] # Updated age to birthdate\n",
"}\n",
"\n",
"# Function to validate JSON data against the schema\n",
"def validate_json(data, schema):\n",
" try:\n",
" validate(instance=data, schema=schema)\n",
" return True, \"JSON data is valid according to the schema.\"\n",
" except jsonschema.exceptions.ValidationError as ve:\n",
" return False, ve.message\n",
"\n",
"# Validate the sample JSON data\n",
"is_valid, message = validate_json(person_data, person_schema)\n",
"print(message)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "R-mO4FGtbr-L",
"outputId": "c0ae5e87-6ade-4ef8-a639-02bf287071a9"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(rdflib.term.URIRef('http://schema.org/Organization'),)\n",
"(rdflib.term.URIRef('http://schema.org/PerformingGroup'),)\n",
"(rdflib.term.URIRef('http://schema.org/TheaterGroup'),)\n",
"(rdflib.term.URIRef('http://schema.org/MusicGroup'),)\n",
"(rdflib.term.URIRef('http://schema.org/DanceGroup'),)\n",
"(rdflib.term.URIRef('http://schema.org/OnlineBusiness'),)\n",
"(rdflib.term.URIRef('http://schema.org/OnlineStore'),)\n",
"(rdflib.term.URIRef('http://schema.org/SportsOrganization'),)\n",
"(rdflib.term.URIRef('http://schema.org/SportsTeam'),)\n",
"(rdflib.term.URIRef('http://schema.org/Airline'),)\n",
"(rdflib.term.URIRef('http://schema.org/SearchRescueOrganization'),)\n",
"(rdflib.term.URIRef('http://schema.org/FundingScheme'),)\n",
"(rdflib.term.URIRef('http://schema.org/NewsMediaOrganization'),)\n",
"(rdflib.term.URIRef('http://schema.org/EducationalOrganization'),)\n",
"(rdflib.term.URIRef('http://schema.org/CollegeOrUniversity'),)\n",
"(rdflib.term.URIRef('http://schema.org/HighSchool'),)\n",
"(rdflib.term.URIRef('http://schema.org/Preschool'),)\n",
"(rdflib.term.URIRef('http://schema.org/ElementarySchool'),)\n",
"(rdflib.term.URIRef('http://schema.org/MiddleSchool'),)\n",
"(rdflib.term.URIRef('http://schema.org/School'),)\n"
]
}
],
"source": [
"# Create a new graph\n",
"g = rdflib.Graph()\n",
"\n",
"# Load schema.org vocabulary into the graph\n",
"g.parse(\"https://schema.org/version/latest/schemaorg-current-http.jsonld\", format=\"json-ld\")\n",
"\n",
"person_data_str = json.dumps(person_data)\n",
"g.parse(data=person_data_str, format=\"json-ld\")\n",
"\n",
"# Define and execute a SPARQL query for all instances of Organization\n",
"sparql_query = \"\"\"\n",
"PREFIX schema: <http://schema.org/>\n",
"SELECT DISTINCT ?type WHERE {\n",
" ?type rdfs:subClassOf* schema:Organization .\n",
"}\n",
"LIMIT 20\n",
"\"\"\"\n",
"\n",
"# Execute the SPARQL query\n",
"results = g.query(sparql_query)\n",
"\n",
"# Print the results\n",
"for row in results:\n",
" print(row)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "h4vvw5hNhih2",
"outputId": "77828fc9-bafe-414b-f6f6-65fb117aabc5"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"https://ror.org/01f677e56\n"
]
}
],
"source": [
"# Define and execute a SPARQL query for all instances of Organization\n",
"sparql_query = \"\"\"\n",
"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
"PREFIX schema: <http://schema.org/>\n",
"\n",
"SELECT ?instance WHERE {\n",
" ?subclass rdfs:subClassOf* schema:Organization .\n",
" ?instance rdf:type ?subclass .\n",
"}\n",
"LIMIT 10\n",
"\"\"\"\n",
"\n",
"# Execute the SPARQL query\n",
"results = g.query(sparql_query)\n",
"\n",
"# Print the results\n",
"for row in results:\n",
" print(row[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "e_-Qa3Hgh2kg",
"outputId": "2b9fafa4-fca4-426f-e768-6ef561921b14"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1\n"
]
}
],
"source": [
"# Define and execute a SPARQL query for all instances of Organization\n",
"sparql_query = \"\"\"\n",
"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
"PREFIX schema: <http://schema.org/>\n",
"\n",
"SELECT (COUNT(?subject) AS ?numMales) WHERE {\n",
" ?subject rdf:type schema:Person .\n",
" ?subject schema:gender ?gender .\n",
" ?gender rdf:type schema:Male .\n",
"}\n",
"LIMIT 10\n",
"\"\"\"\n",
"\n",
"# Execute the SPARQL query\n",
"results = g.query(sparql_query)\n",
"\n",
"# Print the results\n",
"for row in results:\n",
" print(row.numMales)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "HJfpCj4-sww3",
"outputId": "c4ba6b30-7d7e-45d9-c3e4-b0d311e0fdd0"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1987-04-23\n"
]
}
],
"source": [
"# Define and execute a SPARQL query for all instances of Organization\n",
"sparql_query = \"\"\"\n",
"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
"PREFIX schema: <http://schema.org/>\n",
"\n",
"SELECT ?bday WHERE {\n",
" ?subject rdf:type schema:Person .\n",
" ?subject schema:birthDate ?bday .\n",
"}\n",
"LIMIT 10\n",
"\"\"\"\n",
"\n",
"# Execute the SPARQL query\n",
"results = g.query(sparql_query)\n",
"\n",
"# Print the results\n",
"for row in results:\n",
" print(row[0])"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Loading

0 comments on commit b1341e5

Please sign in to comment.