… slice a dataset based on a condition?#
Good practice is to have dimension attached to the datasets. The dimension scales can be used to decide what to slice exactly. The following example first generates a dataset and the slices it depending on the value in the first dimension, in this case the time:
import h5rdmtoolbox as h5tbx
import numpy as np
h5tbx.use(None)
with h5tbx.File() as h5:
h5.create_dataset('time', data=range(0, 100), make_scale=True)
h5.create_dataset('x', data=range(0, 100), make_scale=True)
h5.create_dataset('y', data=range(0, 200), make_scale=True)
h5.create_dataset('data', np.random.rand(100, 200, 100), attach_scale=('time', 'y', 'x'))
data = h5.data[:]
h5.dump()
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[1], line 1
----> 1 import h5rdmtoolbox as h5tbx
2 import numpy as np
4 h5tbx.use(None)
File ~/checkouts/readthedocs.org/user_builds/h5rdmtoolbox/checkouts/v1.7.0/h5rdmtoolbox/__init__.py:129
125 with File(src) as h5:
126 return h5.dumps()
--> 129 from h5rdmtoolbox.wrapper.ld.hdf.file import get_ld as hdf_get_ld
130 from h5rdmtoolbox.wrapper.ld.user.file import get_ld as user_get_ld
133 def get_ld(
134 hdf_filename: Union[str, pathlib.Path],
135 structural: bool = True,
136 semantic: bool = True,
137 blank_node_iri_base: Optional[str] = None,
138 **kwargs) -> rdflib.Graph:
File ~/checkouts/readthedocs.org/user_builds/h5rdmtoolbox/checkouts/v1.7.0/h5rdmtoolbox/wrapper/ld/__init__.py:1
----> 1 import ssnolib.ssno.standard_name
2 from ontolutils.namespacelib import M4I
3 from ontolutils.namespacelib import SCHEMA
ModuleNotFoundError: No module named 'ssnolib'
with h5tbx.File(h5.hdf_filename) as h5:
h5.data[h5.data.time > 5.4, :, :].plot()