added example for ipynb

emmo-repo · Jan 30, 2024 · b1341e5 · b1341e5
1 parent 6c96a53
commit b1341e5
Show file tree

Hide file tree

Showing 3 changed files with 401 additions and 1 deletion.
diff --git a/example_person_jsonld_nb.ipynb b/example_person_jsonld_nb.ipynb
@@ -0,0 +1,394 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aw_lp5wfXRSl"
+      },
+      "source": [
+        "# JSON-LD Demonstration\n",
+        "\n",
+        "In this notebook, we will explore the principles of JSON-LD using the example of a person. JSON-LD stands for \"JSON for Linking Data\" and it provides a method to enrich your JSON data with semantics.\n",
+        "\n",
+        "An operational version of this notebook can be accessed [here](https://colab.research.google.com/drive/14XqRJPWs07RUQgZmDZEu3yb2m1xGvxEQ?usp=sharing)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "y-aalpcNb223",
+        "outputId": "565a219c-1d10-4b55-fd75-a1fcb2257022"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Requirement already satisfied: jsonschema in /usr/local/lib/python3.10/dist-packages (4.19.0)\n",
+            "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema) (23.1.0)\n",
+            "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema) (2023.7.1)\n",
+            "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema) (0.30.2)\n",
+            "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema) (0.10.2)\n",
+            "Collecting rdflib\n",
+            "  Downloading rdflib-7.0.0-py3-none-any.whl (531 kB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m531.9/531.9 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hCollecting isodate<0.7.0,>=0.6.0 (from rdflib)\n",
+            "  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.7/41.7 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hRequirement already satisfied: pyparsing<4,>=2.1.0 in /usr/local/lib/python3.10/dist-packages (from rdflib) (3.1.1)\n",
+            "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from isodate<0.7.0,>=0.6.0->rdflib) (1.16.0)\n",
+            "Installing collected packages: isodate, rdflib\n",
+            "Successfully installed isodate-0.6.1 rdflib-7.0.0\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Install the required library for JSON schema validation\n",
+        "!pip install jsonschema\n",
+        "!pip install rdflib"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tNjkjGNEXJXP"
+      },
+      "outputs": [],
+      "source": [
+        "import jsonschema\n",
+        "from jsonschema import validate\n",
+        "import json\n",
+        "import rdflib\n",
+        "\n",
+        "# Regular JSON representation of a person\n",
+        "person_data = {\n",
+        "    \"@context\": {\n",
+        "        \"schema\": \"https://schema.org/\",\n",
+        "        \"firstName\": \"schema:givenName\",\n",
+        "        \"lastName\": \"schema:lastName\",\n",
+        "        \"birthdate\": \"schema:birthDate\",\n",
+        "        \"institute\": \"schema:affiliation\",\n",
+        "        \"name\": \"schema:name\",\n",
+        "        \"street\": \"schema:streetAddress\",\n",
+        "        \"city\": \"schema:locality\",\n",
+        "        \"zip\": \"schema:postalCode\"\n",
+        "    },\n",
+        "    \"@id\": \"https://www.example.com/SimonClark\",\n",
+        "    \"@type\": \"schema:Person\",\n",
+        "    \"firstName\": \"Simon\",\n",
+        "    \"lastName\": \"Clark\",\n",
+        "    \"birthdate\": \"1987-04-23\",\n",
+        "    \"institute\": {\n",
+        "        \"@id\": \"https://www.example.com/SINTEF\",\n",
+        "        \"@type\": \"schema:ResearchOrganization\",\n",
+        "        \"name\": \"SINTEF\",\n",
+        "        \"street\": \"Strindvegen 4\",\n",
+        "        \"city\": \"Trondheim\",\n",
+        "        \"zip\": \"7034\"\n",
+        "    }\n",
+        "}\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "K221MAoHa3Nx"
+      },
+      "source": [
+        "In the JSON-LD example, we added an `@context` that maps terms in our JSON data to their semantic meanings, using URIs (typically from established vocabularies, like schema.org). This allows machines to understand the semantics behind the data.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZmQfOeFyieo9"
+      },
+      "outputs": [],
+      "source": [
+        "# Regular JSON representation of a person\n",
+        "person_data = {\n",
+        "    \"@context\": \"https://schema.org/\",\n",
+        "    \"@id\": \"https://orcid.org/0000-0002-8758-6109\",\n",
+        "    \"@type\": \"Person\",\n",
+        "    \"firstName\": \"Simon\",\n",
+        "    \"lastName\": \"Clark\",\n",
+        "    \"gender\": {\"@type\": \"Male\"},\n",
+        "    \"birthDate\": \"1987-04-23\",\n",
+        "    \"affiliation\": {\n",
+        "        \"@id\": \"https://ror.org/01f677e56\",\n",
+        "        \"@type\": \"ResearchOrganization\"\n",
+        "    }\n",
+        "}\n",
+        "\n",
+        "# email to: simon.clark@sintef.no"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "HDHJuV62bTby",
+        "outputId": "d5537f6e-4325-40ef-dfa4-c7d55159623f"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "JSON data is valid according to the schema.\n"
+          ]
+        }
+      ],
+      "source": [
+        "person_schema = {\n",
+        "    \"type\": \"object\",\n",
+        "    \"properties\": {\n",
+        "        \"firstName\": {\n",
+        "            \"type\": \"string\"\n",
+        "        },\n",
+        "        \"lastName\": {\n",
+        "            \"type\": \"string\",\n",
+        "            \"minLength\": 1\n",
+        "        },\n",
+        "        \"birthDate\": {   # Replacing age with birthdate\n",
+        "            \"type\": \"string\",\n",
+        "            \"format\": \"date\",\n",
+        "            \"pattern\": \"^[0-9]{4}-[0-1][0-9]-[0-3][0-9]$\"\n",
+        "        },\n",
+        "        \"gender\": {\n",
+        "            \"type\": \"object\"\n",
+        "        },\n",
+        "        \"affiliation\": {\n",
+        "            \"type\": \"object\"\n",
+        "        }\n",
+        "    },\n",
+        "    \"required\": [\"firstName\", \"lastName\", \"birthDate\", \"affiliation\"]  # Updated age to birthdate\n",
+        "}\n",
+        "\n",
+        "# Function to validate JSON data against the schema\n",
+        "def validate_json(data, schema):\n",
+        "    try:\n",
+        "        validate(instance=data, schema=schema)\n",
+        "        return True, \"JSON data is valid according to the schema.\"\n",
+        "    except jsonschema.exceptions.ValidationError as ve:\n",
+        "        return False, ve.message\n",
+        "\n",
+        "# Validate the sample JSON data\n",
+        "is_valid, message = validate_json(person_data, person_schema)\n",
+        "print(message)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "R-mO4FGtbr-L",
+        "outputId": "c0ae5e87-6ade-4ef8-a639-02bf287071a9"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "(rdflib.term.URIRef('http://schema.org/Organization'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/PerformingGroup'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/TheaterGroup'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/MusicGroup'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/DanceGroup'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/OnlineBusiness'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/OnlineStore'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/SportsOrganization'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/SportsTeam'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/Airline'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/SearchRescueOrganization'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/FundingScheme'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/NewsMediaOrganization'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/EducationalOrganization'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/CollegeOrUniversity'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/HighSchool'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/Preschool'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/ElementarySchool'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/MiddleSchool'),)\n",
+            "(rdflib.term.URIRef('http://schema.org/School'),)\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Create a new graph\n",
+        "g = rdflib.Graph()\n",
+        "\n",
+        "# Load schema.org vocabulary into the graph\n",
+        "g.parse(\"https://schema.org/version/latest/schemaorg-current-http.jsonld\", format=\"json-ld\")\n",
+        "\n",
+        "person_data_str = json.dumps(person_data)\n",
+        "g.parse(data=person_data_str, format=\"json-ld\")\n",
+        "\n",
+        "# Define and execute a SPARQL query for all instances of Organization\n",
+        "sparql_query = \"\"\"\n",
+        "PREFIX schema: <http://schema.org/>\n",
+        "SELECT DISTINCT ?type WHERE {\n",
+        "  ?type rdfs:subClassOf* schema:Organization .\n",
+        "}\n",
+        "LIMIT 20\n",
+        "\"\"\"\n",
+        "\n",
+        "# Execute the SPARQL query\n",
+        "results = g.query(sparql_query)\n",
+        "\n",
+        "# Print the results\n",
+        "for row in results:\n",
+        "    print(row)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "h4vvw5hNhih2",
+        "outputId": "77828fc9-bafe-414b-f6f6-65fb117aabc5"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "https://ror.org/01f677e56\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Define and execute a SPARQL query for all instances of Organization\n",
+        "sparql_query = \"\"\"\n",
+        "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
+        "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
+        "PREFIX schema: <http://schema.org/>\n",
+        "\n",
+        "SELECT ?instance WHERE {\n",
+        "    ?subclass rdfs:subClassOf* schema:Organization .\n",
+        "    ?instance rdf:type ?subclass .\n",
+        "}\n",
+        "LIMIT 10\n",
+        "\"\"\"\n",
+        "\n",
+        "# Execute the SPARQL query\n",
+        "results = g.query(sparql_query)\n",
+        "\n",
+        "# Print the results\n",
+        "for row in results:\n",
+        "    print(row[0])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "e_-Qa3Hgh2kg",
+        "outputId": "2b9fafa4-fca4-426f-e768-6ef561921b14"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "1\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Define and execute a SPARQL query for all instances of Organization\n",
+        "sparql_query = \"\"\"\n",
+        "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
+        "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
+        "PREFIX schema: <http://schema.org/>\n",
+        "\n",
+        "SELECT (COUNT(?subject) AS ?numMales) WHERE {\n",
+        "    ?subject rdf:type schema:Person .\n",
+        "    ?subject schema:gender ?gender .\n",
+        "    ?gender rdf:type schema:Male .\n",
+        "}\n",
+        "LIMIT 10\n",
+        "\"\"\"\n",
+        "\n",
+        "# Execute the SPARQL query\n",
+        "results = g.query(sparql_query)\n",
+        "\n",
+        "# Print the results\n",
+        "for row in results:\n",
+        "    print(row.numMales)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "HJfpCj4-sww3",
+        "outputId": "c4ba6b30-7d7e-45d9-c3e4-b0d311e0fdd0"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "1987-04-23\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Define and execute a SPARQL query for all instances of Organization\n",
+        "sparql_query = \"\"\"\n",
+        "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
+        "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
+        "PREFIX schema: <http://schema.org/>\n",
+        "\n",
+        "SELECT ?bday WHERE {\n",
+        "    ?subject rdf:type schema:Person .\n",
+        "    ?subject schema:birthDate ?bday .\n",
+        "}\n",
+        "LIMIT 10\n",
+        "\"\"\"\n",
+        "\n",
+        "# Execute the SPARQL query\n",
+        "results = g.query(sparql_query)\n",
+        "\n",
+        "# Print the results\n",
+        "for row in results:\n",
+        "    print(row[0])"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}