Query HDF metadata with SPARQL
Query HDF metadata with SPARQL#
Metadata in form of JSON-LD can be queried using SPARQL:
import rdflib
from ontolutils import SSNO, PIVMETA
import h5rdmtoolbox as h5tbx
from h5rdmtoolbox import jsonld
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
Cell In[1], line 2
1 import rdflib
----> 2 from ontolutils import SSNO, PIVMETA
4 import h5rdmtoolbox as h5tbx
5 from h5rdmtoolbox import jsonld
ImportError: cannot import name 'SSNO' from 'ontolutils' (/home/docs/checkouts/readthedocs.org/user_builds/h5rdmtoolbox/envs/v1.6.0/lib/python3.8/site-packages/ontolutils/__init__.py)
Example file:
with h5tbx.File() as h5:
ds = h5.create_dataset('u', data=[1,2,3,4], attrs={'standard_name': 'coeff', 'units': 'm/s'})
ds.rdf.predicate['standard_name'] = SSNO.hasStandardName
ds.rdf.object['standard_name'] = PIVMETA.piv_correlation_coefficient
h5.dump()
-
-
(4) [int32]
- standard_name
https://matthiasprobst.github.io/ssno#hasStandardName: coeff
https://matthiasprobst.github.io/pivmeta#piv_correlation_coefficient - units: m/s
- standard_name
Extract metadata:
json_str = jsonld.dumps(
h5.hdf_filename,
indent=2,
context={'m4i': 'http://w3id.org/nfdi4ing/metadata4ing#',
'foaf': 'http://xmlns.com/foaf/0.1/'}
)
print(json_str)
{
"@context": {
"foaf": "http://xmlns.com/foaf/0.1/",
"hdf5": "http://purl.allotrope.org/ontologies/hdf5/1.8#",
"m4i": "http://w3id.org/nfdi4ing/metadata4ing#",
"standard_name": "https://matthiasprobst.github.io/ssno#hasStandardName"
},
"@graph": [
{
"@id": "_:N6",
"@type": "hdf5:File",
"hdf5:rootGroup": {
"@id": "_:N5",
"@type": "hdf5:Group",
"hdf5:member": {
"@id": "_:N7",
"@type": "hdf5:Dataset",
"hdf5:attribute": [
{
"@id": "_:N8",
"@type": "hdf5:Attribute",
"hdf5:name": "standard_name",
"hdf5:value": "coeff"
},
{
"@id": "_:N9",
"@type": "hdf5:Attribute",
"hdf5:name": "units",
"hdf5:value": "m/s"
}
],
"hdf5:datatype": "H5T_INTEGER",
"hdf5:dimension": 1,
"hdf5:name": "/u",
"hdf5:size": 4,
"hdf5:value": {
"@id": "https://matthiasprobst.github.io/pivmeta#piv_correlation_coefficient"
},
"standard_name": {
"@id": "https://matthiasprobst.github.io/pivmeta#piv_correlation_coefficient"
}
},
"hdf5:name": "/"
}
}
]
}
SPARQL query:
sparql_query_str = """
PREFIX hdf5: <http://purl.allotrope.org/ontologies/hdf5/1.8#>
PREFIX ssno: <https://matthiasprobst.github.io/ssno#>
SELECT ?name ?sn
{
?obj a hdf5:Dataset .
?obj hdf5:name ?name .
?obj ssno:hasStandardName ?sn .
}
"""
g = rdflib.Graph().parse(data=json_str, format='json-ld')
qres = g.query(sparql_query_str)
for name, sn in qres:
print(str(name), str(sn))
/u https://matthiasprobst.github.io/pivmeta#piv_correlation_coefficient
Find dataset with specific standard_name:
def find_dataset_from_standard_name(hdf_filename, sn, limit=1):
sparql_query_str = """
PREFIX hdf5: <http://purl.allotrope.org/ontologies/hdf5/1.8#>
PREFIX ssno: <https://matthiasprobst.github.io/ssno#>
SELECT ?name
{
?obj a hdf5:Dataset .
?obj hdf5:name ?name .
"""
sparql_query_str += f"?obj ssno:hasStandardName <{sn}> .\n}}"
g = rdflib.Graph().parse(data=json_str, format='json-ld')
qres = g.query(sparql_query_str)
if limit == 1:
for name in qres:
return str(name[0])
else:
return [str(name[0]) for name in qres]
find_dataset_from_standard_name(
h5.hdf_filename,
'https://matthiasprobst.github.io/pivmeta#piv_correlation_coefficient',
limit=1
)
'/u'
def find_attribute_from_name(hdf_filename, name, limit=1):
sparql_query_str = f"""
PREFIX hdf5: <http://purl.allotrope.org/ontologies/hdf5/1.8#>
SELECT ?name
{{
?obj a ?type .
?obj hdf5:name ?name .
?obj hdf5:attribute ?attr .
?attr hdf5:name "{name}" .
VALUES ?type {{ hdf5:Group hdf5:Dataset }}
}}
"""
g = rdflib.Graph().parse(data=json_str, format='json-ld')
qres = g.query(sparql_query_str)
if limit == 1:
for name in qres:
return str(name[0])
else:
return [str(name[0]) for name in qres]
find_attribute_from_name(h5.hdf_filename, 'codeRepository', 1)