Source code for gunz_cm.preprocs.transforms.edm

"""Module.

Examples
--------
    Not applicable.

"""
__author__ = "Yeremia Gunawan Adhisantoso"
__license__ = "Clear BSD"
__email__ = "adhisant@tnt.uni-hannover.de"

import typing as t
import numpy as np

[docs]def comp_gram_mat( P_rows: np.ndarray, P_cols: np.ndarray, ) -> np.ndarray: """Compute the Gram matrix from the given row and column vectors. Notes ----- The Gram matrix is computed as the dot product of the row and column vectors. Parameters ---------- **kwargs : dict Additional keyword arguments. cm_mat : np.ndarray The numpy array. inplace : bool Inplace flag. is_triu_sym : bool Is triu sym flag. cm_coo : sp.coo_matrix The coo matrix. cm_df : pd.DataFrame The dataframe. ratio : float Ratio. min_k : int, optional Min k. data1 : np.ndarray First data. data2 : np.ndarray Second data. op : str Operation. ret_unique_ids : bool Return unique ids. axis : int Axis. cm_mat1 : np.ndarray First matrix. cm_mat2 : np.ndarray Second matrix. cm_coo1 : sp.coo_matrix First coo. cm_coo2 : sp.coo_matrix Second coo. alpha : float Alpha. data : np.ndarray Data. na_inf_val : float NA inf val. col_ids_colname : str Col ID colname. row_ids_colname : str Row ID colname. use_pseudo : bool Use pseudo. P_rows : np.ndarray The row vectors. P_cols : np.ndarray The column vectors. Returns ------- np.ndarray The Gram matrix. Examples -------- Authors ------- - Yeremia G. Adhisantoso (adhisant@tnt.uni-hannover.de) - Qwen2.5 72B - 4.25bpw Examples -------- Not applicable. """ # G = (P_rows*P_cols).sum(axis=1) # Optimized: Use einsum to avoid intermediate array allocation G = np.einsum('ij,ij->i', P_rows, P_cols) return G
[docs]def comp_edm( P_rows: np.ndarray, P_cols: np.ndarray, G: np.ndarray | None = None ) -> np.ndarray: """Compute the Euclidean Distance Matrix (EDM) from the given row and column vectors. Notes ----- The EDM is computed as the sum of the squared row and column vectors, minus twice the Gram matrix. If `G` is not provided, it is computed internally. Parameters ---------- **kwargs : dict Additional keyword arguments. cm_mat : np.ndarray The numpy array. inplace : bool Inplace flag. is_triu_sym : bool Is triu sym flag. cm_coo : sp.coo_matrix The coo matrix. cm_df : pd.DataFrame The dataframe. ratio : float Ratio. min_k : int, optional Min k. data1 : np.ndarray First data. data2 : np.ndarray Second data. op : str Operation. ret_unique_ids : bool Return unique ids. axis : int Axis. cm_mat1 : np.ndarray First matrix. cm_mat2 : np.ndarray Second matrix. cm_coo1 : sp.coo_matrix First coo. cm_coo2 : sp.coo_matrix Second coo. alpha : float Alpha. data : np.ndarray Data. na_inf_val : float NA inf val. col_ids_colname : str Col ID colname. row_ids_colname : str Row ID colname. use_pseudo : bool Use pseudo. P_rows : np.ndarray The row vectors. P_cols : np.ndarray The column vectors. G : Optional[np.ndarray], optional The precomputed Gram matrix, by default None. Returns ------- np.ndarray The Euclidean Distance Matrix. Examples -------- Authors ------- - Yeremia G. Adhisantoso (adhisant@tnt.uni-hannover.de) - Qwen2.5 72B - 4.25bpw Examples -------- Not applicable. """ # Optimized: Use einsum for sum of squares to avoid temporary allocation D = np.einsum('ij,ij->i', P_rows, P_rows) D += np.einsum('ij,ij->i', P_cols, P_cols) if G is None: D -= 2*comp_gram_mat(P_rows, P_cols) else: D -= 2*G return D
[docs]def comp_trace_P( P: np.ndarray, reduction: str = 'mean' ) -> float: """Compute the trace of the matrix P with the specified reduction method. Notes ----- The trace is computed as the sum of the squares of the elements of P. The reduction method can be 'mean' or 'sum'. If an unsupported reduction method is provided, a `NotImplementedError` is raised. Parameters ---------- **kwargs : dict Additional keyword arguments. cm_mat : np.ndarray The numpy array. inplace : bool Inplace flag. is_triu_sym : bool Is triu sym flag. cm_coo : sp.coo_matrix The coo matrix. cm_df : pd.DataFrame The dataframe. ratio : float Ratio. min_k : int, optional Min k. data1 : np.ndarray First data. data2 : np.ndarray Second data. op : str Operation. ret_unique_ids : bool Return unique ids. axis : int Axis. cm_mat1 : np.ndarray First matrix. cm_mat2 : np.ndarray Second matrix. cm_coo1 : sp.coo_matrix First coo. cm_coo2 : sp.coo_matrix Second coo. alpha : float Alpha. data : np.ndarray Data. na_inf_val : float NA inf val. col_ids_colname : str Col ID colname. row_ids_colname : str Row ID colname. use_pseudo : bool Use pseudo. P : np.ndarray The input matrix. reduction : str, optional The reduction method, by default 'mean'. Supported methods are 'mean' and 'sum'. Returns ------- float The reduced trace value. Examples -------- Authors ------- - Yeremia G. Adhisantoso (adhisant@tnt.uni-hannover.de) - Qwen2.5 72B - 4.25bpw Examples -------- Not applicable. """ # T = P.square() # NumPy arrays don't have .square() method T = np.square(P) if reduction == 'mean': T = T.mean() elif reduction == 'sum': T = T.sum() else: raise NotImplementedError('Not yet implemented') return T
[docs]def comp_edm_from_p( P: np.ndarray, row_ids: list[int], col_ids: list[int], ) -> np.ndarray: """Compute the Euclidean Distance Matrix (EDM) from the specified rows and columns of the matrix P. Notes ----- The function extracts the specified rows and columns from the matrix P and computes the EDM using the `comp_edm` function. Parameters ---------- **kwargs : dict Additional keyword arguments. cm_mat : np.ndarray The numpy array. inplace : bool Inplace flag. is_triu_sym : bool Is triu sym flag. cm_coo : sp.coo_matrix The coo matrix. cm_df : pd.DataFrame The dataframe. ratio : float Ratio. min_k : int, optional Min k. data1 : np.ndarray First data. data2 : np.ndarray Second data. op : str Operation. ret_unique_ids : bool Return unique ids. axis : int Axis. cm_mat1 : np.ndarray First matrix. cm_mat2 : np.ndarray Second matrix. cm_coo1 : sp.coo_matrix First coo. cm_coo2 : sp.coo_matrix Second coo. alpha : float Alpha. data : np.ndarray Data. na_inf_val : float NA inf val. col_ids_colname : str Col ID colname. row_ids_colname : str Row ID colname. use_pseudo : bool Use pseudo. P : np.ndarray The input matrix. row_ids : List[int] The indices of the rows to be used. col_ids : List[int] The indices of the columns to be used. Returns ------- np.ndarray The Euclidean Distance Matrix. Examples -------- Authors ------- - Yeremia G. Adhisantoso (adhisant@tnt.uni-hannover.de) - Qwen2.5 72B - 4.25bpw Examples -------- Not applicable. """ P_rows = P[row_ids, :] P_cols = P[col_ids, :] D = comp_edm(P_rows, P_cols) return D
[docs]def comp_contacts( D: np.ndarray, alpha: float ) -> np.ndarray: """Compute the contact matrix from the Euclidean Distance Matrix (EDM) using the given alpha value. Notes ----- The contact matrix is computed as the EDM raised to the power of 1/alpha. Parameters ---------- **kwargs : dict Additional keyword arguments. cm_mat : np.ndarray The numpy array. inplace : bool Inplace flag. is_triu_sym : bool Is triu sym flag. cm_coo : sp.coo_matrix The coo matrix. cm_df : pd.DataFrame The dataframe. ratio : float Ratio. min_k : int, optional Min k. data1 : np.ndarray First data. data2 : np.ndarray Second data. op : str Operation. ret_unique_ids : bool Return unique ids. axis : int Axis. cm_mat1 : np.ndarray First matrix. cm_mat2 : np.ndarray Second matrix. cm_coo1 : sp.coo_matrix First coo. cm_coo2 : sp.coo_matrix Second coo. alpha : float Alpha. data : np.ndarray Data. na_inf_val : float NA inf val. col_ids_colname : str Col ID colname. row_ids_colname : str Row ID colname. use_pseudo : bool Use pseudo. D : np.ndarray The Euclidean Distance Matrix. alpha : float The exponent value. Returns ------- np.ndarray The contact matrix. Examples -------- Authors ------- - Yeremia G. Adhisantoso (adhisant@tnt.uni-hannover.de) - Qwen2.5 72B - 4.25bpw Examples -------- Not applicable. """ # C = D**(1/alpha) C = np.power(D, 1/alpha) return C