{ "cells": [ { "cell_type": "markdown", "id": "03863eaf-3771-47c3-bf76-dec842375ae0", "metadata": {}, "source": [ "# Example Layouts\n", "\n", "**Note, work in progress!**\n", "\n", "This section provides useful layout specifications." ] }, { "cell_type": "code", "execution_count": 1, "id": "41a72bcd-9e6d-4330-9f41-fd0d60ab545e", "metadata": {}, "outputs": [], "source": [ "from h5rdmtoolbox import layout\n", "from h5rdmtoolbox.database import hdfdb\n", "import h5rdmtoolbox as h5tbx" ] }, { "cell_type": "markdown", "id": "d34a1dc2-dff1-4b04-a728-4457e73b9c71", "metadata": {}, "source": [ "## All string dataset must be one-dimensional" ] }, { "cell_type": "code", "execution_count": 2, "id": "2a17a625-ca82-4db6-8c9e-2ad9f46f00e5", "metadata": {}, "outputs": [], "source": [ "lay = layout.Layout()\n", "string_dataset_1D = lay.add(\n", " hdfdb.FileDB.find,\n", " flt={'$ndim': {'$eq': 1},\n", " '$dtype': {'$regex': '^(?!S)*'}},\n", " recursive=True,\n", " objfilter='dataset',\n", " description='String dataset must be 1D',\n", " n={'$gt': 0}\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "id": "83a7959a-d879-4f7e-9981-063ccf5ca354", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "int32\n", "|S5\n" ] } ], "source": [ "with h5tbx.File() as h5:\n", " dss = h5.create_string_dataset('a string ds', data=['one', 'two', 'three'])\n", " dss = h5.create_string_dataset('grp/a string ds', data=['one', 'two', 'three'])\n", " dsn = h5.create_dataset('a', data=4)\n", " print(dsn.dtype)\n", " print(dss.dtype)\n", " hdf_filename = h5.hdf_filename" ] }, { "cell_type": "code", "execution_count": 4, "id": "fc671941-728f-44c5-8f7d-5399413086ac", "metadata": {}, "outputs": [], "source": [ "res = lay.validate(hdf_filename)" ] }, { "cell_type": "code", "execution_count": 5, "id": "591ba3e5-015c-46d1-8c46-ecf66f3f1232", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Summary of layout validation\n", "+--------------------------------------+--------+--------------------+---------------------------+---------------+---------------+-----------------------------------------+\n", "| id | flag | flag description | description | target_type | target_name | func |\n", "|--------------------------------------+--------+--------------------+---------------------------+---------------+---------------+-----------------------------------------|\n", "| 39344777-7032-4f34-9f1f-ad14989430f5 | 1 | SUCCESSFUL | String dataset must be 1D | Group | tmp0.hdf | h5rdmtoolbox.database.hdfdb.filedb.find |\n", "+--------------------------------------+--------+--------------------+---------------------------+---------------+---------------+-----------------------------------------+\n", "--> Layout is valid\n" ] } ], "source": [ "res.print_summary(exclude_keys=('called', 'kwargs'))" ] }, { "cell_type": "code", "execution_count": 6, "id": "3c2e2739-d5b8-449a-8553-dd0ed6bfe7e4", "metadata": {}, "outputs": [], "source": [ "import re" ] }, { "cell_type": "code", "execution_count": 7, "id": "4c43c5dc-7445-45be-bed1-59eee4fc7c6d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re.search('^(?!S).*', 'M123')" ] }, { "cell_type": "markdown", "id": "6d8709ac-3676-4a56-891a-832deca11db4", "metadata": {}, "source": [ "## Defining RDF specifications\n", "\n", "All dataset must have the attribute units, which shall be semantically described by `http://w3id.org/nfdi4ing/metadata4ing#hasUnit`" ] }, { "cell_type": "code", "execution_count": 8, "id": "b6bda83f-582e-48d0-a9dc-7b8fc0576520", "metadata": {}, "outputs": [], "source": [ "rdf_lay = layout.Layout()" ] }, { "cell_type": "code", "execution_count": 9, "id": "a98ad451-b361-4f62-a24e-bc6840f9ae2f", "metadata": {}, "outputs": [], "source": [ "is_dataset = rdf_lay.add(\n", " hdfdb.FileDB.find,\n", " flt={'units': {'$exists': True}},\n", " recursive=True,\n", " objfilter='dataset',\n", " description='String dataset must be 1D',\n", " n={'$gt': 0}\n", ")" ] }, { "cell_type": "code", "execution_count": 10, "id": "edbd9a8f-03a0-41e8-b954-a97b1b5d180c", "metadata": {}, "outputs": [], "source": [ "from h5rdmtoolbox.database import rdf_find" ] }, { "cell_type": "code", "execution_count": 11, "id": "515b96d8-9025-4696-a5d7-6a2d0bc996f4", "metadata": {}, "outputs": [], "source": [ "is_dataset = is_dataset.add(\n", " rdf_find,\n", " rdf_predicate=\"http://w3id.org/nfdi4ing/metadata4ing#hasUnit\",\n", " n=1\n", ")" ] }, { "cell_type": "markdown", "id": "89fb9a00-eb31-42cf-9de1-e6e9785499b8", "metadata": {}, "source": [ "Test the layout specification" ] }, { "cell_type": "code", "execution_count": 16, "id": "4605bae5-fca9-4e0f-9249-cb77c05aab2a", "metadata": {}, "outputs": [], "source": [ "with h5tbx.File() as h5:\n", " ds =h5.create_dataset('velocity', data=4.5, attrs={'units': 'm/s'})" ] }, { "cell_type": "code", "execution_count": 17, "id": "83cd6254-f597-4dad-b281-7a594a0aec83", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-06-23_15:25:31,621 ERROR [core.py:330] Applying spec. \"LayoutSpecification(kwargs={'rdf_predicate': 'http://w3id.org/nfdi4ing/metadata4ing#hasUnit'})\" failed due to not matching the number of results: 1 != 0\n" ] }, { "data": { "text/plain": [ "False" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rdf_lay.validate(h5.hdf_filename).is_valid()" ] }, { "cell_type": "code", "execution_count": 18, "id": "bb359f6d-929e-46fa-a9c6-6c33e38658a3", "metadata": {}, "outputs": [], "source": [ "with h5tbx.File() as h5:\n", " ds =h5.create_dataset('velocity', data=4.5, attrs={'units': 'm/s'})\n", " ds.rdf['units'].predicate = \"http://w3id.org/nfdi4ing/metadata4ing#hasUnit\"" ] }, { "cell_type": "code", "execution_count": 19, "id": "264024ed-44a5-40ad-b2da-1075c7a58c69", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rdf_lay.validate(h5.hdf_filename).is_valid()" ] }, { "cell_type": "code", "execution_count": null, "id": "6d93fd9a-8df5-413a-ad92-23021ae8fc19", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.18" } }, "nbformat": 4, "nbformat_minor": 5 }