Example Layouts#

Note, work in progress!

This section provides useful layout specifications.

from h5rdmtoolbox import layout
from h5rdmtoolbox.database import hdfdb
import h5rdmtoolbox as h5tbx

All string dataset must be one-dimensional#

lay = layout.Layout()
string_dataset_1D = lay.add(
    hdfdb.FileDB.find,
    flt={'$ndim': {'$eq': 1},
         '$dtype': {'$regex': '^(?!S)*'}},
    recursive=True,
    objfilter='dataset',
    description='String dataset must be 1D',
    n={'$gt': 0}
)
with h5tbx.File() as h5:
    dss = h5.create_string_dataset('a string ds', data=['one', 'two', 'three'])
    dss = h5.create_string_dataset('grp/a string ds', data=['one', 'two', 'three'])
    dsn = h5.create_dataset('a', data=4)
    print(dsn.dtype)
    print(dss.dtype)
    hdf_filename = h5.hdf_filename
int64
|S5
res = lay.validate(hdf_filename)
res.print_summary(exclude_keys=('called', 'kwargs'))
Summary of layout validation
+--------------------------------------+--------+--------------------+---------------------------+---------------+---------------+-----------------------------------------+
| id                                   |   flag | flag description   | description               | target_type   | target_name   | func                                    |
|--------------------------------------+--------+--------------------+---------------------------+---------------+---------------+-----------------------------------------|
| e8df35aa-0fed-451d-b863-592020d2f8cd |      1 | SUCCESSFUL         | String dataset must be 1D | Group         | tmp0.hdf      | h5rdmtoolbox.database.hdfdb.filedb.find |
+--------------------------------------+--------+--------------------+---------------------------+---------------+---------------+-----------------------------------------+
--> Layout is valid
import re
re.search('^(?!S).*', 'M123')
<re.Match object; span=(0, 4), match='M123'>

Defining RDF specifications#

All dataset must have the attribute units, which shall be semantically described by http://w3id.org/nfdi4ing/metadata4ing#hasUnit

rdf_lay = layout.Layout()
is_dataset = rdf_lay.add(
    hdfdb.FileDB.find,
    flt={'units': {'$exists': True}},
    recursive=True,
    objfilter='dataset',
    description='String dataset must be 1D',
    n={'$gt': 0}
)
from h5rdmtoolbox.database import rdf_find
is_dataset = is_dataset.add(
    rdf_find,
    rdf_predicate="http://w3id.org/nfdi4ing/metadata4ing#hasUnit",
    n=1
)

Test the layout specification

with h5tbx.File() as h5:
    ds =h5.create_dataset('velocity', data=4.5, attrs={'units': 'm/s'})
rdf_lay.validate(h5.hdf_filename).is_valid()
2025-10-31_14:47:18,543 ERROR    [core.py:330] Applying spec. "LayoutSpecification(kwargs={'rdf_predicate': 'http://w3id.org/nfdi4ing/metadata4ing#hasUnit'})" failed due to not matching the number of results: 1 != 0
False
with h5tbx.File() as h5:
    ds =h5.create_dataset('velocity', data=4.5, attrs={'units': 'm/s'})
    ds.rdf['units'].predicate = "http://w3id.org/nfdi4ing/metadata4ing#hasUnit"
rdf_lay.validate(h5.hdf_filename).is_valid()
True