Source code for gunz_cm.metrics.ren.third_parties.hic_spector_wrapper

# -*- coding: utf-8 -*-
"""
Module.

Examples
--------
"""
__author__ = "Yeremia Gunawan Adhisantoso"
__email__ = "adhisant@tnt.uni-hannover.de"
__license__ = "Clear BSD"
__version__ = "1.0.0"

import numpy as np
from scipy import sparse as sp
from .hic_spector import get_reproducibility

[docs] def comp_hic_spector_third_party( cm1: sp.lil_matrix, cm2: sp.lil_matrix, num_evec: int = 20, ipr_cut: int = 5, ) -> float: """ Compute the reproducibility score between two Hi-C matrices. Parameters ---------- cm1 : sparse.lil_matrix The first contact matrix. cm2 : sparse.lil_matrix The second contact matrix. num_evec : int, optional The number of eigenvectors to use for the reproducibility score. Defaults to 20. ipr_cut : int, optional The IPR cut-off value. Defaults to 5. Returns ------- float The reproducibility score between the two contact matrices. Notes ----- This function checks if the input matrices are sparse and have integer data type. It then converts the matrices to COO format if necessary and computes the reproducibility score. Examples -------- Authors ------- - Yeremia G. Adhisantoso (adhisant@tnt.uni-hannover.de) - Llama3.1 8B - 8.0bpw Examples -------- """ #? Check if input matrices are sparse assert sp.issparse(cm1), "Matrix 1 is not a sparse matrix!" assert sp.issparse(cm2), "Matrix 2 is not a sparse matrix!" #? Check if input matrices have integer data type assert np.issubdtype(cm1.dtype, np.integer), "Matrix 1 must have a datatype of integer!" assert np.issubdtype(cm2.dtype, np.integer), "Matrix 2 must have a datatype of integer!" # Check if any entry of contact_matrix1 or contact_matrix2 is negative # assert not np.any(cm1 < 0), "Matrix 1 contains negative values!" # assert not np.any(cm2 < 0), "Matrix 2 contains negative values!" #? Convert matrices to LIL format if necessary if not isinstance(cm1, sp.lil_matrix): cm1 = cm1.tolil(copy=True) #? Convert to LIL format if necessary if not isinstance(cm2, sp.lil_matrix): cm2 = cm2.tolil(copy=True) #? Convert to LIL format if necessary score = get_reproducibility( cm1, cm2, num_evec=num_evec, ipr_cut=ipr_cut, ) return score