Source code for gunz_cm.loaders.pickle_loader

from __future__ import annotations
# -*- coding: utf-8 -*-
"""
Module for loading contact matrix objects, such as those stored in pickle files.


Examples
--------
"""

__author__ = "Yeremia Gunawan Adhisantoso"
__email__ = "adhisant@tnt.uni-hannover.de"
__license__ = "Clear BSD"
__version__ = "2.4"

# =============================================================================
# STANDARD LIBRARY IMPORTS
# =============================================================================
import pathlib
import pickle
import typing as t

# =============================================================================
# THIRD-PARTY IMPORTS
# =============================================================================
import pandas as pd
import scipy.sparse as ssparse
from pydantic import validate_call

# =============================================================================
# LOCAL APPLICATION IMPORTS
# =============================================================================
from ..consts import DataStructure
from ..exceptions import LoaderError
from ..matrix import ContactMatrix
from ..preprocs.converters import to_dataframe, to_coo_matrix


@validate_call(config={"arbitrary_types_allowed": True})
def _load_pickle_data(
    fpath: str | pathlib.Path,
    region1: str | None = None,
    resolution: int | None = None,
    region2: str | None = None,
    balancing: str | None = None,
    output_format: DataStructure = DataStructure.DF,
) -> t.Any:
    """
    Internal function to load pickle data.

    Parameters
    ----------
    fpath : str | pathlib.Path
        The file path to load.
    region1 : str | None
        The first genomic region (unused for direct loading).
    resolution : int | None
        The resolution (unused for direct loading).
    region2 : str | None, optional
        The second genomic region (unused).
    balancing : str | None, optional
        The balancing method (unused).
    output_format : DataStructure, optional
        The desired output format.

    Returns
    -------
    t.Any
        The loaded data.

    Examples
    --------


Examples
--------
"""
    if output_format not in [DataStructure.DF, DataStructure.COO]:
        raise LoaderError(
            f"Unsupported output format: '{output_format}'. "
            "Must be 'df' or 'coo'."
        )

    fpath = pathlib.Path(fpath)

    if not fpath.exists():
        raise FileNotFoundError(f"File not found at the specified path: {fpath}")

    with fpath.open("rb") as f:
        cm_obj = pickle.load(f)

    if output_format == DataStructure.DF:
        if isinstance(cm_obj, pd.DataFrame):
            return cm_obj
        elif isinstance(cm_obj, ssparse.coo_matrix):
            return to_dataframe(cm_obj)
        else:
            raise TypeError(
                f"Loaded object of type {type(cm_obj).__name__} cannot be "
                "converted to a pandas DataFrame."
            )

    elif output_format == DataStructure.COO:
        if isinstance(cm_obj, ssparse.coo_matrix):
            return cm_obj
        elif isinstance(cm_obj, pd.DataFrame):
            return to_coo_matrix(cm_obj)
        else:
            raise TypeError(
                f"Loaded object of type {type(cm_obj).__name__} cannot be "
                "converted to a COO sparse matrix."
            )

[docs] @validate_call(config={"arbitrary_types_allowed": True}) def load_pickle( fpath: str | pathlib.Path, region1: str | None = None, resolution: int | None = None, region2: str | None = None, balancing: str | None = None, output_format: DataStructure = DataStructure.DF, ) -> ContactMatrix: """Loads a pickle file containing a contact matrix object lazily. Examples -------- Examples -------- """ loader_kwargs = { "fpath": fpath, "region1": region1, "resolution": resolution, "region2": region2, "balancing": balancing, "output_format": output_format, } return ContactMatrix( chromosome1=region1, chromosome2=region2, resolution=resolution, loader_func=_load_pickle_data, loader_kwargs=loader_kwargs, metadata={"format": "pickle"} )