{ "cells": [ { "cell_type": "markdown", "id": "cd3a682b-8538-49f9-88d2-f124125e06ab", "metadata": {}, "source": [ "# Query HDF metadata with SPARQL\n", "\n", "Metadata in form of JSON-LD can be queried using SPARQL: " ] }, { "cell_type": "code", "execution_count": 3, "id": "095506ba-475e-49b5-bf24-47e4780fad06", "metadata": {}, "outputs": [], "source": [ "import rdflib\n", "from ontolutils import SSNO, PIVMETA\n", "\n", "import h5rdmtoolbox as h5tbx\n", "from h5rdmtoolbox import jsonld" ] }, { "cell_type": "markdown", "id": "b16ad4a3-323c-4492-bae0-9a4cb6b20913", "metadata": {}, "source": [ "Example file:" ] }, { "cell_type": "code", "execution_count": 4, "id": "005a9671-2940-4745-ad7c-57acb5e7e405", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "\n", " \n", "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "with h5tbx.File() as h5:\n", " ds = h5.create_dataset('u', data=[1,2,3,4], attrs={'standard_name': 'coeff', 'units': 'm/s'})\n", " ds.rdf.predicate['standard_name'] = SSNO.hasStandardName\n", " ds.rdf.object['standard_name'] = PIVMETA.piv_correlation_coefficient\n", " h5.dump()" ] }, { "cell_type": "markdown", "id": "62cb9ee4-c31b-4402-9596-dda8e9899140", "metadata": {}, "source": [ "Extract metadata:" ] }, { "cell_type": "code", "execution_count": 6, "id": "7c724298-6b08-4046-91b0-0f55b017938e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"@context\": {\n", " \"foaf\": \"http://xmlns.com/foaf/0.1/\",\n", " \"hdf5\": \"http://purl.allotrope.org/ontologies/hdf5/1.8#\",\n", " \"m4i\": \"http://w3id.org/nfdi4ing/metadata4ing#\",\n", " \"standard_name\": \"https://matthiasprobst.github.io/ssno#hasStandardName\"\n", " },\n", " \"@graph\": [\n", " {\n", " \"@id\": \"_:N6\",\n", " \"@type\": \"hdf5:File\",\n", " \"hdf5:rootGroup\": {\n", " \"@id\": \"_:N5\",\n", " \"@type\": \"hdf5:Group\",\n", " \"hdf5:member\": {\n", " \"@id\": \"_:N7\",\n", " \"@type\": \"hdf5:Dataset\",\n", " \"hdf5:attribute\": [\n", " {\n", " \"@id\": \"_:N8\",\n", " \"@type\": \"hdf5:Attribute\",\n", " \"hdf5:name\": \"standard_name\",\n", " \"hdf5:value\": \"coeff\"\n", " },\n", " {\n", " \"@id\": \"_:N9\",\n", " \"@type\": \"hdf5:Attribute\",\n", " \"hdf5:name\": \"units\",\n", " \"hdf5:value\": \"m/s\"\n", " }\n", " ],\n", " \"hdf5:datatype\": \"H5T_INTEGER\",\n", " \"hdf5:dimension\": 1,\n", " \"hdf5:name\": \"/u\",\n", " \"hdf5:size\": 4,\n", " \"hdf5:value\": {\n", " \"@id\": \"https://matthiasprobst.github.io/pivmeta#piv_correlation_coefficient\"\n", " },\n", " \"standard_name\": {\n", " \"@id\": \"https://matthiasprobst.github.io/pivmeta#piv_correlation_coefficient\"\n", " }\n", " },\n", " \"hdf5:name\": \"/\"\n", " }\n", " }\n", " ]\n", "}\n" ] } ], "source": [ "json_str = jsonld.dumps(\n", " h5.hdf_filename,\n", " indent=2,\n", " context={'m4i': 'http://w3id.org/nfdi4ing/metadata4ing#',\n", " 'foaf': 'http://xmlns.com/foaf/0.1/'}\n", " )\n", "print(json_str)" ] }, { "cell_type": "markdown", "id": "13c27fb8-1297-4dda-a3b1-783886edd409", "metadata": {}, "source": [ "SPARQL query:" ] }, { "cell_type": "code", "execution_count": 7, "id": "47615dc8-6b13-411f-af08-6c8cd3812d3a", "metadata": {}, "outputs": [], "source": [ "sparql_query_str = \"\"\"\n", "PREFIX hdf5: \n", "PREFIX ssno: \n", "\n", "SELECT ?name ?sn\n", "{\n", " ?obj a hdf5:Dataset .\n", " ?obj hdf5:name ?name .\n", " ?obj ssno:hasStandardName ?sn .\n", "}\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 8, "id": "6163c77c-73e2-44bc-9c3a-91b00eb9cdff", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/u https://matthiasprobst.github.io/pivmeta#piv_correlation_coefficient\n" ] } ], "source": [ "g = rdflib.Graph().parse(data=json_str, format='json-ld')\n", "qres = g.query(sparql_query_str)\n", "\n", "for name, sn in qres:\n", " print(str(name), str(sn))" ] }, { "cell_type": "markdown", "id": "5936cd61-08c1-400d-b640-0939e38dd4b9", "metadata": {}, "source": [ "Find dataset with specific standard_name:" ] }, { "cell_type": "code", "execution_count": 9, "id": "540348e4-aae0-4f55-92bb-06f57ab925cf", "metadata": {}, "outputs": [], "source": [ "def find_dataset_from_standard_name(hdf_filename, sn, limit=1):\n", " sparql_query_str = \"\"\"\n", " PREFIX hdf5: \n", " PREFIX ssno: \n", " \n", " SELECT ?name\n", " {\n", " ?obj a hdf5:Dataset .\n", " ?obj hdf5:name ?name .\n", " \"\"\"\n", " sparql_query_str += f\"?obj ssno:hasStandardName <{sn}> .\\n}}\"\n", " g = rdflib.Graph().parse(data=json_str, format='json-ld')\n", " qres = g.query(sparql_query_str)\n", "\n", " if limit == 1:\n", " for name in qres:\n", " return str(name[0])\n", " else:\n", " return [str(name[0]) for name in qres]" ] }, { "cell_type": "code", "execution_count": 10, "id": "0a7c2be4-d1d8-4180-a360-afb481076eee", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'/u'" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "find_dataset_from_standard_name(\n", " h5.hdf_filename,\n", " 'https://matthiasprobst.github.io/pivmeta#piv_correlation_coefficient',\n", " limit=1\n", ")" ] }, { "cell_type": "code", "execution_count": 11, "id": "24c511cc-b002-419a-ac30-b4cb0137fd3e", "metadata": {}, "outputs": [], "source": [ "def find_attribute_from_name(hdf_filename, name, limit=1):\n", " sparql_query_str = f\"\"\"\n", " PREFIX hdf5: \n", " \n", " SELECT ?name\n", " {{\n", " ?obj a ?type .\n", " ?obj hdf5:name ?name .\n", " ?obj hdf5:attribute ?attr .\n", " ?attr hdf5:name \"{name}\" .\n", " VALUES ?type {{ hdf5:Group hdf5:Dataset }}\n", " }}\n", " \"\"\"\n", " g = rdflib.Graph().parse(data=json_str, format='json-ld')\n", " qres = g.query(sparql_query_str)\n", "\n", " if limit == 1:\n", " for name in qres:\n", " return str(name[0])\n", " else:\n", " return [str(name[0]) for name in qres]" ] }, { "cell_type": "code", "execution_count": 12, "id": "34444dda-921e-4f4c-8215-9382f6ca4fd6", "metadata": {}, "outputs": [], "source": [ "find_attribute_from_name(h5.hdf_filename, 'codeRepository', 1)" ] }, { "cell_type": "code", "execution_count": null, "id": "89751bb0-8ced-482c-9d74-37632af850d4", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.18" } }, "nbformat": 4, "nbformat_minor": 5 }