from __future__ import annotations
# -*- coding: utf-8 -*-
"""
This module provides utility functions for creating, checking, and loading
NumPy memory-mapped (memmap) files, which consist of a binary data file
and a corresponding JSON metadata file.
Examples
--------
"""
# =============================================================================
# METADATA
# =============================================================================
__author__ = "Yeremia Gunawan Adhisantoso"
__email__ = "adhisant@tnt.uni-hannover.de"
__license__ = "Clear BSD"
__version__ = "2.0.0"
# =============================================================================
# STANDARD LIBRARY IMPORTS
# =============================================================================
import json
import pathlib
# =============================================================================
# THIRD-PARTY IMPORTS
# =============================================================================
import numpy as np
from pydantic import validate_call
# =============================================================================
# LOCAL APPLICATION IMPORTS
# =============================================================================
from ..matrix import ContactMatrix
# =============================================================================
# MODULE CONSTANTS
# =============================================================================
BIN_EXT = "npdat"
META_EXT = "json"
DEFAULT_MODE = "r"
# =============================================================================
# UTILITY FUNCTIONS
# =============================================================================
[docs]
@validate_call(config=dict(arbitrary_types_allowed=True))
def gen_memmap_fpaths(
#? --- Input Files ---
base_fpath: str | pathlib.Path,
) -> tuple[pathlib.Path, pathlib.Path]:
"""
Generates paths for the binary data and JSON metadata files.
Parameters
----------
base_fpath : str | pathlib.Path
The base path for the memmap, without an extension.
Returns
-------
tuple[pathlib.Path, pathlib.Path]
A tuple containing the path to the binary (.npdat) file and the
metadata (.json) file.
Examples
--------
Examples
--------
"""
base_fpath = pathlib.Path(base_fpath)
bin_fpath = base_fpath.with_suffix(f".{BIN_EXT}")
meta_fpath = base_fpath.with_suffix(f".{META_EXT}")
return bin_fpath, meta_fpath
[docs]
@validate_call(config=dict(arbitrary_types_allowed=True))
def is_memmap_exists(
#? --- Input Files ---
base_fpath: str | pathlib.Path,
) -> bool:
"""
Checks if both the binary and metadata files for a memmap exist.
Parameters
----------
base_fpath : str | pathlib.Path
The base path for the memmap to check.
Returns
-------
bool
True if both the .npdat and .json files exist, False otherwise.
Examples
--------
Examples
--------
"""
base_fpath = pathlib.Path(base_fpath)
bin_fpath, meta_fpath = gen_memmap_fpaths(base_fpath)
return bin_fpath.exists() and meta_fpath.exists()
@validate_call(config=dict(arbitrary_types_allowed=True))
def _load_memmap_data(
base_fpath: str | pathlib.Path,
mode: str = DEFAULT_MODE,
) -> np.memmap:
"""
Internal function to load memmap data.
Parameters
----------
base_fpath : str | pathlib.Path
The base path to the memmap files.
mode : str, optional
The file open mode for the memmap.
Returns
-------
np.memmap
A NumPy memory-mapped array.
Examples
--------
Examples
--------
"""
base_fpath = pathlib.Path(base_fpath)
if not is_memmap_exists(base_fpath):
bin_fpath, meta_fpath = gen_memmap_fpaths(base_fpath)
raise FileNotFoundError(
"Memmap files not found. "
f"Binary exists: {bin_fpath.exists()}. "
f"Metadata exists: {meta_fpath.exists()}."
)
_, meta_fpath = gen_memmap_fpaths(base_fpath)
with meta_fpath.open("r") as f:
meta = json.load(f)
bin_fpath, _ = gen_memmap_fpaths(base_fpath)
return np.memmap(
bin_fpath,
mode=mode,
dtype=np.dtype(meta["dtype"]),
shape=tuple(meta["shape"]),
)
[docs]
@validate_call(config=dict(arbitrary_types_allowed=True))
def load_memmap(
base_fpath: str | pathlib.Path,
mode: str = DEFAULT_MODE,
) -> ContactMatrix:
"""
Loads a NumPy array from a memory-mapped file lazily.
Examples
--------
Examples
--------
"""
# For memmap, we need to read the metadata to know the chromosome and resolution
# so we can't be fully lazy.
_, meta_fpath = gen_memmap_fpaths(pathlib.Path(base_fpath))
with meta_fpath.open("r") as f:
meta = json.load(f)
loader_kwargs = {
"base_fpath": base_fpath,
"mode": mode,
}
return ContactMatrix(
chromosome1=meta.get("chromosome1"),
chromosome2=meta.get("chromosome2"),
resolution=meta.get("resolution"),
loader_func=_load_memmap_data,
loader_kwargs=loader_kwargs,
metadata=meta
)