From b1341e58445c9bf7d2a36acb6c079abd647b0196 Mon Sep 17 00:00:00 2001 From: Xavier Raynaud Date: Tue, 30 Jan 2024 16:58:06 +0100 Subject: [PATCH] added example for ipynb --- example_person_jsonld_nb.ipynb | 394 +++++++++++++++++++++++++++++++++ sphinx/about.rst | 6 + sphinx/conf.py | 2 +- 3 files changed, 401 insertions(+), 1 deletion(-) create mode 100644 example_person_jsonld_nb.ipynb diff --git a/example_person_jsonld_nb.ipynb b/example_person_jsonld_nb.ipynb new file mode 100644 index 0000000..75a7cb3 --- /dev/null +++ b/example_person_jsonld_nb.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "aw_lp5wfXRSl" + }, + "source": [ + "# JSON-LD Demonstration\n", + "\n", + "In this notebook, we will explore the principles of JSON-LD using the example of a person. JSON-LD stands for \"JSON for Linking Data\" and it provides a method to enrich your JSON data with semantics.\n", + "\n", + "An operational version of this notebook can be accessed [here](https://colab.research.google.com/drive/14XqRJPWs07RUQgZmDZEu3yb2m1xGvxEQ?usp=sharing)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "y-aalpcNb223", + "outputId": "565a219c-1d10-4b55-fd75-a1fcb2257022" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: jsonschema in /usr/local/lib/python3.10/dist-packages (4.19.0)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema) (23.1.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema) (2023.7.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema) (0.30.2)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema) (0.10.2)\n", + "Collecting rdflib\n", + " Downloading rdflib-7.0.0-py3-none-any.whl (531 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m531.9/531.9 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting isodate<0.7.0,>=0.6.0 (from rdflib)\n", + " Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.7/41.7 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pyparsing<4,>=2.1.0 in /usr/local/lib/python3.10/dist-packages (from rdflib) (3.1.1)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from isodate<0.7.0,>=0.6.0->rdflib) (1.16.0)\n", + "Installing collected packages: isodate, rdflib\n", + "Successfully installed isodate-0.6.1 rdflib-7.0.0\n" + ] + } + ], + "source": [ + "# Install the required library for JSON schema validation\n", + "!pip install jsonschema\n", + "!pip install rdflib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tNjkjGNEXJXP" + }, + "outputs": [], + "source": [ + "import jsonschema\n", + "from jsonschema import validate\n", + "import json\n", + "import rdflib\n", + "\n", + "# Regular JSON representation of a person\n", + "person_data = {\n", + " \"@context\": {\n", + " \"schema\": \"https://schema.org/\",\n", + " \"firstName\": \"schema:givenName\",\n", + " \"lastName\": \"schema:lastName\",\n", + " \"birthdate\": \"schema:birthDate\",\n", + " \"institute\": \"schema:affiliation\",\n", + " \"name\": \"schema:name\",\n", + " \"street\": \"schema:streetAddress\",\n", + " \"city\": \"schema:locality\",\n", + " \"zip\": \"schema:postalCode\"\n", + " },\n", + " \"@id\": \"https://www.example.com/SimonClark\",\n", + " \"@type\": \"schema:Person\",\n", + " \"firstName\": \"Simon\",\n", + " \"lastName\": \"Clark\",\n", + " \"birthdate\": \"1987-04-23\",\n", + " \"institute\": {\n", + " \"@id\": \"https://www.example.com/SINTEF\",\n", + " \"@type\": \"schema:ResearchOrganization\",\n", + " \"name\": \"SINTEF\",\n", + " \"street\": \"Strindvegen 4\",\n", + " \"city\": \"Trondheim\",\n", + " \"zip\": \"7034\"\n", + " }\n", + "}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K221MAoHa3Nx" + }, + "source": [ + "In the JSON-LD example, we added an `@context` that maps terms in our JSON data to their semantic meanings, using URIs (typically from established vocabularies, like schema.org). This allows machines to understand the semantics behind the data.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZmQfOeFyieo9" + }, + "outputs": [], + "source": [ + "# Regular JSON representation of a person\n", + "person_data = {\n", + " \"@context\": \"https://schema.org/\",\n", + " \"@id\": \"https://orcid.org/0000-0002-8758-6109\",\n", + " \"@type\": \"Person\",\n", + " \"firstName\": \"Simon\",\n", + " \"lastName\": \"Clark\",\n", + " \"gender\": {\"@type\": \"Male\"},\n", + " \"birthDate\": \"1987-04-23\",\n", + " \"affiliation\": {\n", + " \"@id\": \"https://ror.org/01f677e56\",\n", + " \"@type\": \"ResearchOrganization\"\n", + " }\n", + "}\n", + "\n", + "# email to: simon.clark@sintef.no" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HDHJuV62bTby", + "outputId": "d5537f6e-4325-40ef-dfa4-c7d55159623f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "JSON data is valid according to the schema.\n" + ] + } + ], + "source": [ + "person_schema = {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"firstName\": {\n", + " \"type\": \"string\"\n", + " },\n", + " \"lastName\": {\n", + " \"type\": \"string\",\n", + " \"minLength\": 1\n", + " },\n", + " \"birthDate\": { # Replacing age with birthdate\n", + " \"type\": \"string\",\n", + " \"format\": \"date\",\n", + " \"pattern\": \"^[0-9]{4}-[0-1][0-9]-[0-3][0-9]$\"\n", + " },\n", + " \"gender\": {\n", + " \"type\": \"object\"\n", + " },\n", + " \"affiliation\": {\n", + " \"type\": \"object\"\n", + " }\n", + " },\n", + " \"required\": [\"firstName\", \"lastName\", \"birthDate\", \"affiliation\"] # Updated age to birthdate\n", + "}\n", + "\n", + "# Function to validate JSON data against the schema\n", + "def validate_json(data, schema):\n", + " try:\n", + " validate(instance=data, schema=schema)\n", + " return True, \"JSON data is valid according to the schema.\"\n", + " except jsonschema.exceptions.ValidationError as ve:\n", + " return False, ve.message\n", + "\n", + "# Validate the sample JSON data\n", + "is_valid, message = validate_json(person_data, person_schema)\n", + "print(message)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "R-mO4FGtbr-L", + "outputId": "c0ae5e87-6ade-4ef8-a639-02bf287071a9" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(rdflib.term.URIRef('http://schema.org/Organization'),)\n", + "(rdflib.term.URIRef('http://schema.org/PerformingGroup'),)\n", + "(rdflib.term.URIRef('http://schema.org/TheaterGroup'),)\n", + "(rdflib.term.URIRef('http://schema.org/MusicGroup'),)\n", + "(rdflib.term.URIRef('http://schema.org/DanceGroup'),)\n", + "(rdflib.term.URIRef('http://schema.org/OnlineBusiness'),)\n", + "(rdflib.term.URIRef('http://schema.org/OnlineStore'),)\n", + "(rdflib.term.URIRef('http://schema.org/SportsOrganization'),)\n", + "(rdflib.term.URIRef('http://schema.org/SportsTeam'),)\n", + "(rdflib.term.URIRef('http://schema.org/Airline'),)\n", + "(rdflib.term.URIRef('http://schema.org/SearchRescueOrganization'),)\n", + "(rdflib.term.URIRef('http://schema.org/FundingScheme'),)\n", + "(rdflib.term.URIRef('http://schema.org/NewsMediaOrganization'),)\n", + "(rdflib.term.URIRef('http://schema.org/EducationalOrganization'),)\n", + "(rdflib.term.URIRef('http://schema.org/CollegeOrUniversity'),)\n", + "(rdflib.term.URIRef('http://schema.org/HighSchool'),)\n", + "(rdflib.term.URIRef('http://schema.org/Preschool'),)\n", + "(rdflib.term.URIRef('http://schema.org/ElementarySchool'),)\n", + "(rdflib.term.URIRef('http://schema.org/MiddleSchool'),)\n", + "(rdflib.term.URIRef('http://schema.org/School'),)\n" + ] + } + ], + "source": [ + "# Create a new graph\n", + "g = rdflib.Graph()\n", + "\n", + "# Load schema.org vocabulary into the graph\n", + "g.parse(\"https://schema.org/version/latest/schemaorg-current-http.jsonld\", format=\"json-ld\")\n", + "\n", + "person_data_str = json.dumps(person_data)\n", + "g.parse(data=person_data_str, format=\"json-ld\")\n", + "\n", + "# Define and execute a SPARQL query for all instances of Organization\n", + "sparql_query = \"\"\"\n", + "PREFIX schema: \n", + "SELECT DISTINCT ?type WHERE {\n", + " ?type rdfs:subClassOf* schema:Organization .\n", + "}\n", + "LIMIT 20\n", + "\"\"\"\n", + "\n", + "# Execute the SPARQL query\n", + "results = g.query(sparql_query)\n", + "\n", + "# Print the results\n", + "for row in results:\n", + " print(row)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "h4vvw5hNhih2", + "outputId": "77828fc9-bafe-414b-f6f6-65fb117aabc5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://ror.org/01f677e56\n" + ] + } + ], + "source": [ + "# Define and execute a SPARQL query for all instances of Organization\n", + "sparql_query = \"\"\"\n", + "PREFIX rdf: \n", + "PREFIX rdfs: \n", + "PREFIX schema: \n", + "\n", + "SELECT ?instance WHERE {\n", + " ?subclass rdfs:subClassOf* schema:Organization .\n", + " ?instance rdf:type ?subclass .\n", + "}\n", + "LIMIT 10\n", + "\"\"\"\n", + "\n", + "# Execute the SPARQL query\n", + "results = g.query(sparql_query)\n", + "\n", + "# Print the results\n", + "for row in results:\n", + " print(row[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "e_-Qa3Hgh2kg", + "outputId": "2b9fafa4-fca4-426f-e768-6ef561921b14" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + } + ], + "source": [ + "# Define and execute a SPARQL query for all instances of Organization\n", + "sparql_query = \"\"\"\n", + "PREFIX rdf: \n", + "PREFIX rdfs: \n", + "PREFIX schema: \n", + "\n", + "SELECT (COUNT(?subject) AS ?numMales) WHERE {\n", + " ?subject rdf:type schema:Person .\n", + " ?subject schema:gender ?gender .\n", + " ?gender rdf:type schema:Male .\n", + "}\n", + "LIMIT 10\n", + "\"\"\"\n", + "\n", + "# Execute the SPARQL query\n", + "results = g.query(sparql_query)\n", + "\n", + "# Print the results\n", + "for row in results:\n", + " print(row.numMales)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HJfpCj4-sww3", + "outputId": "c4ba6b30-7d7e-45d9-c3e4-b0d311e0fdd0" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1987-04-23\n" + ] + } + ], + "source": [ + "# Define and execute a SPARQL query for all instances of Organization\n", + "sparql_query = \"\"\"\n", + "PREFIX rdf: \n", + "PREFIX rdfs: \n", + "PREFIX schema: \n", + "\n", + "SELECT ?bday WHERE {\n", + " ?subject rdf:type schema:Person .\n", + " ?subject schema:birthDate ?bday .\n", + "}\n", + "LIMIT 10\n", + "\"\"\"\n", + "\n", + "# Execute the SPARQL query\n", + "results = g.query(sparql_query)\n", + "\n", + "# Print the results\n", + "for row in results:\n", + " print(row[0])" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/sphinx/about.rst b/sphinx/about.rst index 284aa4a..a759375 100644 --- a/sphinx/about.rst +++ b/sphinx/about.rst @@ -1,3 +1,9 @@ +.. toctree:: + :hidden: + + example_person_jsonld_nb.ipynb + + About the Battery Ontology ========================================== diff --git a/sphinx/conf.py b/sphinx/conf.py index 1a98f28..ffc5c00 100644 --- a/sphinx/conf.py +++ b/sphinx/conf.py @@ -57,7 +57,7 @@ # General information about the project. project = 'BattInfo' -copyright = '2021-2023' +copyright = '2021-2024' author = 'Simon Clark' # The version info for the project you're documenting, acts as replacement for