import typer
import pathlib
import typing as t
from loguru import logger
import warnings
from .. import loaders
from ..converters.coo import convert_to_cm_coo, convert_all_intra_to_cm_coo
from ..converters.memmap import convert_to_memmap
app = typer.Typer(help="CM Library - Converters")
[docs]
@app.command()
def to_bigmat(
resolution: int = typer.Argument(..., help="Target resolution of the bigmat"),
region1: str = typer.Argument(..., help="Input region path"),
input: str = typer.Argument(..., help="Input file path"),
output: str = typer.Argument(..., help="Output file path"),
region2: t.Optional[str] = typer.Option(None, help="Region in the second chromosome. If not set, it is equals `region1`"),
balancing: str = typer.Option("KR", help="Use values using selected balancing method. Defaults to 'KR'"),
no_full_matrix: bool = typer.Option(False, help="Do not output full matrix"),
verify_result: bool = typer.Option(False, help="Verify output result"),
dry_run: bool = typer.Option(False, "--dry-run", "-n", help="Simulate execution without making changes")
):
"""
Converts contact matrix data to a bigmat (memmap) format.
"""
if dry_run:
logger.info(f"[DRY-RUN] Would convert {input} to bigmat format at {output}")
raise typer.Exit()
try:
logger.info(f"Loading contact matrix data from {input}")
df = loaders.load_cm_data(
fpath=pathlib.Path(input),
resolution=resolution,
region1=region1,
region2=region2,
balancing=balancing,
output_format=loaders.DataStructure.DF
)
logger.info(f"Converting to bigmat at {output}")
convert_to_memmap(
df,
pathlib.Path(output),
output_full_matrix=not no_full_matrix,
check_output=verify_result,
)
logger.success("Successfully converted to bigmat.")
except Exception as e:
logger.error(f"Failed to convert to bigmat: {e}")
raise typer.Exit(code=1) from e
[docs]
@app.command()
def to_coo(
region1: str = typer.Argument(..., help="Region1"),
resolution: int = typer.Argument(..., help="Resolution"),
balancing: str = typer.Argument(..., help="Balancing method"),
input: str = typer.Argument(..., help="Input file path"),
output: str = typer.Argument(..., help="Output file path"),
region2: t.Optional[str] = typer.Option(None, help="Region2"),
res_to_one: bool = typer.Option(False, help="Normalize bin coordinates by the resolution"),
output_delimiter: str = typer.Option("\t", help="The delimiter for the output text file"),
to_mcoo: bool = typer.Option(False, help="Create a modified COO with raw and normalized counts"),
gen_pseudo_weights: bool = typer.Option(False, help="Generate a corresponding .weights file"),
dry_run: bool = typer.Option(False, "--dry-run", "-n", help="Simulate execution without making changes")
):
"""
Converts contact matrix data to a COO format.
"""
if dry_run:
logger.info(f"[DRY-RUN] Would convert {input} to COO format at {output}")
raise typer.Exit()
try:
logger.info(f"Converting {input} to COO format")
convert_to_cm_coo(
pathlib.Path(input),
pathlib.Path(output),
region1,
resolution,
balancing,
region2=region2,
res_to_one=res_to_one,
to_mcoo=to_mcoo,
gen_pseudo_weights=gen_pseudo_weights,
output_delimiter=output_delimiter
)
logger.success(f"Successfully converted to COO at {output}")
except Exception as e:
logger.error(f"Failed to convert to COO: {e}")
raise typer.Exit(code=1) from e
[docs]
@app.command()
def to_intra_cm_coos(
resolution: int = typer.Argument(..., help="Resolution"),
balancing: str = typer.Argument(..., help="Balancing method"),
input: str = typer.Argument(..., help="Input file path"),
output: str = typer.Argument(..., help="Output directory path"),
region2: t.Optional[str] = typer.Option(None, help="Region2"),
res_to_one: bool = typer.Option(False, help="Normalize bin coordinates by the resolution"),
output_delimiter: str = typer.Option("\t", help="The delimiter for the output text file"),
to_mcoo: bool = typer.Option(False, help="Create a modified COO with raw and normalized counts"),
gen_pseudo_weights: bool = typer.Option(False, help="Generate a corresponding .weights file"),
dry_run: bool = typer.Option(False, "--dry-run", "-n", help="Simulate execution without making changes")
):
"""
Converts all intra-chromosomal contact matrices to COO format.
"""
if dry_run:
logger.info(f"[DRY-RUN] Would convert intra-chromosomal matrices from {input} to COO in {output}")
raise typer.Exit()
try:
logger.info(f"Converting all intra-chromosomal matrices from {input}")
convert_all_intra_to_cm_coo(
pathlib.Path(input),
pathlib.Path(output),
resolution,
balancing,
res_to_one=res_to_one,
to_mcoo=to_mcoo,
gen_pseudo_weights=gen_pseudo_weights,
output_delimiter=output_delimiter
)
logger.success(f"Successfully converted matrices and saved to {output}")
except Exception as e:
logger.error(f"Failed to convert matrices: {e}")
raise typer.Exit(code=1) from e
[docs]
@app.command()
def hic2cool(
input: str = typer.Argument(..., help="Input file path"),
output: str = typer.Argument(..., help="Output file path"),
resolution: int = typer.Option(0, "-r", "--resolution", help="Resolution"),
nproc: int = typer.Option(1, "-p", "--nproc", help="Number of processors"),
dry_run: bool = typer.Option(False, "--dry-run", "-n", help="Simulate execution without making changes")
):
"""
Converts .hic to .cool format.
"""
if dry_run:
logger.info(f"[DRY-RUN] Would convert {input} to .cool format at {output} with resolution {resolution}")
raise typer.Exit()
if nproc != 1:
warnings.warn("Not recommended to change the nproc value!", RuntimeWarning)
logger.warning("Not recommended to change the nproc value!")
try:
from hic2cool import hic2cool_convert
logger.info(f"Converting {input} to {output} using hic2cool")
hic2cool_convert(
input,
output,
resolution=resolution,
nproc=nproc,
)
logger.success(f"Successfully converted to {output}")
except ImportError as e:
logger.error("hic2cool is not installed. Please install it using 'pip install hic2cool'.")
raise typer.Exit(code=1) from e
except Exception as e:
logger.error(f"Failed to convert using hic2cool: {e}")
raise typer.Exit(code=1) from e
if __name__ == "__main__":
app()