"""Module.
Examples
--------
Not applicable.
"""
__author__ = "Yeremia Gunawan Adhisantoso"
__license__ = "Clear BSD"
__email__ = "adhisant@tnt.uni-hannover.de"
import typing as t
import numpy as np
[docs]def comp_gram_mat(
P_rows: np.ndarray,
P_cols: np.ndarray,
) -> np.ndarray:
"""Compute the Gram matrix from the given row and column vectors.
Notes
-----
The Gram matrix is computed as the dot product of the row and column vectors.
Parameters
----------
**kwargs : dict
Additional keyword arguments.
cm_mat : np.ndarray
The numpy array.
inplace : bool
Inplace flag.
is_triu_sym : bool
Is triu sym flag.
cm_coo : sp.coo_matrix
The coo matrix.
cm_df : pd.DataFrame
The dataframe.
ratio : float
Ratio.
min_k : int, optional
Min k.
data1 : np.ndarray
First data.
data2 : np.ndarray
Second data.
op : str
Operation.
ret_unique_ids : bool
Return unique ids.
axis : int
Axis.
cm_mat1 : np.ndarray
First matrix.
cm_mat2 : np.ndarray
Second matrix.
cm_coo1 : sp.coo_matrix
First coo.
cm_coo2 : sp.coo_matrix
Second coo.
alpha : float
Alpha.
data : np.ndarray
Data.
na_inf_val : float
NA inf val.
col_ids_colname : str
Col ID colname.
row_ids_colname : str
Row ID colname.
use_pseudo : bool
Use pseudo.
P_rows : np.ndarray
The row vectors.
P_cols : np.ndarray
The column vectors.
Returns
-------
np.ndarray
The Gram matrix.
Examples
--------
Authors
-------
- Yeremia G. Adhisantoso (adhisant@tnt.uni-hannover.de)
- Qwen2.5 72B - 4.25bpw
Examples
--------
Not applicable.
"""
# G = (P_rows*P_cols).sum(axis=1)
# Optimized: Use einsum to avoid intermediate array allocation
G = np.einsum('ij,ij->i', P_rows, P_cols)
return G
[docs]def comp_edm(
P_rows: np.ndarray,
P_cols: np.ndarray,
G: np.ndarray | None = None
) -> np.ndarray:
"""Compute the Euclidean Distance Matrix (EDM) from the given row and column vectors.
Notes
-----
The EDM is computed as the sum of the squared row and column vectors, minus twice the Gram matrix.
If `G` is not provided, it is computed internally.
Parameters
----------
**kwargs : dict
Additional keyword arguments.
cm_mat : np.ndarray
The numpy array.
inplace : bool
Inplace flag.
is_triu_sym : bool
Is triu sym flag.
cm_coo : sp.coo_matrix
The coo matrix.
cm_df : pd.DataFrame
The dataframe.
ratio : float
Ratio.
min_k : int, optional
Min k.
data1 : np.ndarray
First data.
data2 : np.ndarray
Second data.
op : str
Operation.
ret_unique_ids : bool
Return unique ids.
axis : int
Axis.
cm_mat1 : np.ndarray
First matrix.
cm_mat2 : np.ndarray
Second matrix.
cm_coo1 : sp.coo_matrix
First coo.
cm_coo2 : sp.coo_matrix
Second coo.
alpha : float
Alpha.
data : np.ndarray
Data.
na_inf_val : float
NA inf val.
col_ids_colname : str
Col ID colname.
row_ids_colname : str
Row ID colname.
use_pseudo : bool
Use pseudo.
P_rows : np.ndarray
The row vectors.
P_cols : np.ndarray
The column vectors.
G : Optional[np.ndarray], optional
The precomputed Gram matrix, by default None.
Returns
-------
np.ndarray
The Euclidean Distance Matrix.
Examples
--------
Authors
-------
- Yeremia G. Adhisantoso (adhisant@tnt.uni-hannover.de)
- Qwen2.5 72B - 4.25bpw
Examples
--------
Not applicable.
"""
# Optimized: Use einsum for sum of squares to avoid temporary allocation
D = np.einsum('ij,ij->i', P_rows, P_rows)
D += np.einsum('ij,ij->i', P_cols, P_cols)
if G is None:
D -= 2*comp_gram_mat(P_rows, P_cols)
else:
D -= 2*G
return D
[docs]def comp_trace_P(
P: np.ndarray,
reduction: str = 'mean'
) -> float:
"""Compute the trace of the matrix P with the specified reduction method.
Notes
-----
The trace is computed as the sum of the squares of the elements of P.
The reduction method can be 'mean' or 'sum'. If an unsupported reduction method is provided, a `NotImplementedError` is raised.
Parameters
----------
**kwargs : dict
Additional keyword arguments.
cm_mat : np.ndarray
The numpy array.
inplace : bool
Inplace flag.
is_triu_sym : bool
Is triu sym flag.
cm_coo : sp.coo_matrix
The coo matrix.
cm_df : pd.DataFrame
The dataframe.
ratio : float
Ratio.
min_k : int, optional
Min k.
data1 : np.ndarray
First data.
data2 : np.ndarray
Second data.
op : str
Operation.
ret_unique_ids : bool
Return unique ids.
axis : int
Axis.
cm_mat1 : np.ndarray
First matrix.
cm_mat2 : np.ndarray
Second matrix.
cm_coo1 : sp.coo_matrix
First coo.
cm_coo2 : sp.coo_matrix
Second coo.
alpha : float
Alpha.
data : np.ndarray
Data.
na_inf_val : float
NA inf val.
col_ids_colname : str
Col ID colname.
row_ids_colname : str
Row ID colname.
use_pseudo : bool
Use pseudo.
P : np.ndarray
The input matrix.
reduction : str, optional
The reduction method, by default 'mean'. Supported methods are 'mean' and 'sum'.
Returns
-------
float
The reduced trace value.
Examples
--------
Authors
-------
- Yeremia G. Adhisantoso (adhisant@tnt.uni-hannover.de)
- Qwen2.5 72B - 4.25bpw
Examples
--------
Not applicable.
"""
# T = P.square() # NumPy arrays don't have .square() method
T = np.square(P)
if reduction == 'mean':
T = T.mean()
elif reduction == 'sum':
T = T.sum()
else:
raise NotImplementedError('Not yet implemented')
return T
[docs]def comp_edm_from_p(
P: np.ndarray,
row_ids: list[int],
col_ids: list[int],
) -> np.ndarray:
"""Compute the Euclidean Distance Matrix (EDM) from the specified rows and columns of the matrix P.
Notes
-----
The function extracts the specified rows and columns from the matrix P and computes the EDM using the `comp_edm` function.
Parameters
----------
**kwargs : dict
Additional keyword arguments.
cm_mat : np.ndarray
The numpy array.
inplace : bool
Inplace flag.
is_triu_sym : bool
Is triu sym flag.
cm_coo : sp.coo_matrix
The coo matrix.
cm_df : pd.DataFrame
The dataframe.
ratio : float
Ratio.
min_k : int, optional
Min k.
data1 : np.ndarray
First data.
data2 : np.ndarray
Second data.
op : str
Operation.
ret_unique_ids : bool
Return unique ids.
axis : int
Axis.
cm_mat1 : np.ndarray
First matrix.
cm_mat2 : np.ndarray
Second matrix.
cm_coo1 : sp.coo_matrix
First coo.
cm_coo2 : sp.coo_matrix
Second coo.
alpha : float
Alpha.
data : np.ndarray
Data.
na_inf_val : float
NA inf val.
col_ids_colname : str
Col ID colname.
row_ids_colname : str
Row ID colname.
use_pseudo : bool
Use pseudo.
P : np.ndarray
The input matrix.
row_ids : List[int]
The indices of the rows to be used.
col_ids : List[int]
The indices of the columns to be used.
Returns
-------
np.ndarray
The Euclidean Distance Matrix.
Examples
--------
Authors
-------
- Yeremia G. Adhisantoso (adhisant@tnt.uni-hannover.de)
- Qwen2.5 72B - 4.25bpw
Examples
--------
Not applicable.
"""
P_rows = P[row_ids, :]
P_cols = P[col_ids, :]
D = comp_edm(P_rows, P_cols)
return D