Source code for h5rdmtoolbox.ld

import pathlib
from typing import Dict, Optional, Union

import h5py
import rdflib

from ._types import RDFMappingEntry
from .hdf.file import get_ld as get_hdf_ld
from .sparql import sparql
from .user.file import get_ld as get_contextual_ld
from .utils import optimize_context

BINARY_AS_STRING = True

__all__ = ["get_ld", "hdf2jsonld", "hdf2ttl", "sparql"]


def _validate_file_uri(file_uri: Optional[str]) -> None:
    if file_uri and not (str(file_uri).endswith("#") or str(file_uri).endswith("/")):
        raise ValueError("file_uri must end with '#' or '/'")


def _build_ld_graph(
        h5_file: h5py.File,
        *,
        structural: bool,
        contextual: bool,
        file_uri: Optional[str],
        skipND: Optional[int],
        rdf_mappings: Optional[Dict[str, RDFMappingEntry]],
) -> Optional[rdflib.Graph]:
    """Build the requested graph variant from one open HDF5 file handle."""
    if structural and contextual:
        structural_graph = get_hdf_ld(h5_file, file_uri=file_uri, skipND=skipND)
        contextual_graph = get_contextual_ld(
            h5_file,
            file_uri=file_uri,
            rdf_mappings=rdf_mappings,
        )
        return structural_graph + contextual_graph
    if structural:
        return get_hdf_ld(h5_file, file_uri=file_uri, skipND=skipND)
    if contextual:
        # Keep historical behavior: contextual-only requests ignore rdf_mappings.
        return get_contextual_ld(h5_file, file_uri=file_uri)
    return None


def _bind_context_to_graph(graph: rdflib.Graph, context: Optional[Dict]) -> None:
    """Bind user context entries exactly like before."""
    if not context:
        return
    for _prefix, uri in context.items():
        if not isinstance(uri, rdflib.URIRef):
            graph.bind("ex", rdflib.URIRef(uri))


def _resolve_output_suffix(fmt: str) -> str:
    if fmt in ("json", "json-ld", "jsonld"):
        return ".jsonld"
    if fmt in ("turtle", "ttl"):
        return ".ttl"
    raise ValueError(f"Format '{fmt}' currently not supported. Use 'json-ld' or 'ttl'.")


def _resolve_metadata_filename(
        filename: Union[str, pathlib.Path],
        metadata_filename: Optional[Union[str, pathlib.Path]],
        suffix: str,
) -> pathlib.Path:
    if metadata_filename is None:
        # Recommended suffix for JSON-LD is .jsonld.
        return pathlib.Path(filename).with_suffix(suffix)
    return pathlib.Path(metadata_filename)


def _serialize_graph(
        graph: rdflib.Graph,
        fmt: str,
        indent: int,
        context: Optional[dict],
) -> str:
    optimized_context = optimize_context(graph, context or {})
    return graph.serialize(
        format=fmt,
        indent=indent,
        auto_compact=True,
        context=optimized_context,
    )


[docs] def get_ld( hdf_filename: Union[str, pathlib.Path], structural: bool = True, contextual: bool = True, file_uri: Optional[str] = None, skipND: Optional[int] = 1, context: Optional[Dict] = None, rdf_mappings: Dict[str, RDFMappingEntry] = None, ) -> rdflib.Graph: """Return the HDF file content as an RDF graph. Extracts metadata and structure from an HDF5 file and returns it as an rdflib.Graph object. The graph can contain structural RDF (representing the HDF5 hierarchy) and/or contextual RDF (semantic mappings from attributes to ontologies). Parameters ---------- hdf_filename : Union[str, pathlib.Path] Path to the HDF5 file. structural : bool, default=True Include structural RDF representing HDF5 groups, datasets, and attributes. contextual : bool, default=True Include contextual RDF from attribute-to-ontology mappings. file_uri : Optional[str], default=None Base URI for file resources. Must end with '#' or '/'. skipND : Optional[int], default=1 Number of dimensions to skip for nested dataset data. context : Optional[Dict], default=None Additional namespace prefixes to bind in the graph. rdf_mappings : Optional[Dict[str, RDFMappingEntry]], default=None Custom RDF mappings for attributes. Returns ------- rdflib.Graph An RDF graph containing the HDF5 metadata. Raises ------ ValueError If both structural and contextual are False, or if file_uri format is invalid. """ _validate_file_uri(file_uri) with h5py.File(hdf_filename) as h5: graph = _build_ld_graph( h5, structural=structural, contextual=contextual, file_uri=file_uri, skipND=skipND, rdf_mappings=rdf_mappings, ) if graph is None: raise ValueError("structural and semantic cannot be both False.") _bind_context_to_graph(graph, context) return graph
[docs] def hdf2jsonld( filename: Union[str, pathlib.Path], metadata_filename: Optional[Union[str, pathlib.Path]] = None, context: Optional[dict] = None, structural: bool = True, contextual: bool = True, indent: int = 2, file_uri: Optional[str] = None, skipND: Optional[int] = 1, ): """Export HDF5 file metadata to JSON-LD format. Converts HDF5 file metadata to a JSON-LD file, including both structural RDF (groups, datasets, attributes) and contextual RDF (semantic mappings). Parameters ---------- filename : Union[str, pathlib.Path] Path to the HDF5 file. metadata_filename : Optional[Union[str, pathlib.Path]], default=None Output path for the JSON-LD file. If None, uses the HDF5 filename with .jsonld extension. context : Optional[dict], default=None Additional JSON-LD context definitions. structural : bool, default=True Include structural RDF from HDF5 hierarchy. contextual : bool, default=True Include contextual RDF from attribute mappings. indent : int, default=2 JSON indentation level. file_uri : Optional[str], default=None Base URI for the file resources. skipND : Optional[int], default=1 Number of dimensions to skip for nested data. Returns ------- pathlib.Path Path to the generated JSON-LD file. """ return _hdf2ld( filename=filename, fmt="json-ld", metadata_filename=metadata_filename, context=context, structural=structural, contextual=contextual, indent=indent, file_uri=file_uri, skipND=skipND, )
[docs] def hdf2ttl( filename: Union[str, pathlib.Path], metadata_filename: Optional[Union[str, pathlib.Path]] = None, context: Optional[dict] = None, structural: bool = True, contextual: bool = True, indent: int = 2, file_uri: Optional[str] = None, skipND: Optional[int] = 1, ): """Export HDF5 file metadata to Turtle (TTL) format. Converts HDF5 file metadata to a Turtle RDF file, including both structural RDF (groups, datasets, attributes) and contextual RDF (semantic mappings). Parameters ---------- filename : Union[str, pathlib.Path] Path to the HDF5 file. metadata_filename : Optional[Union[str, pathlib.Path]], default=None Output path for the Turtle file. If None, uses the HDF5 filename with .ttl extension. context : Optional[dict], default=None Additional namespace prefix definitions. structural : bool, default=True Include structural RDF from HDF5 hierarchy. contextual : bool, default=True Include contextual RDF from attribute mappings. indent : int, default=2 Turtle indentation level. file_uri : Optional[str], default=None Base URI for the file resources. skipND : Optional[int], default=1 Number of dimensions to skip for nested data. Returns ------- pathlib.Path Path to the generated Turtle file. """ return _hdf2ld( filename=filename, fmt="ttl", metadata_filename=metadata_filename, context=context, structural=structural, contextual=contextual, indent=indent, file_uri=file_uri, skipND=skipND, )
def _hdf2ld( filename: Union[str, pathlib.Path], fmt: str, metadata_filename: Optional[Union[str, pathlib.Path]] = None, context: Optional[dict] = None, structural: bool = True, contextual: bool = True, indent: int = 2, file_uri: Optional[str] = None, skipND: Optional[int] = 1, ): suffix = _resolve_output_suffix(fmt) metadata_filename = _resolve_metadata_filename(filename, metadata_filename, suffix) graph = get_ld( hdf_filename=filename, structural=structural, contextual=contextual, file_uri=file_uri, skipND=skipND, ) with open(metadata_filename, "w", encoding="utf-8") as f: f.write(_serialize_graph(graph, fmt=fmt, indent=indent, context=context)) return metadata_filename