Source code for gunz_cm.cli.converters

import typer
import pathlib
import typing as t
from loguru import logger
import warnings

from .. import loaders
from ..converters.coo import convert_to_cm_coo, convert_all_intra_to_cm_coo
from ..converters.memmap import convert_to_memmap

app = typer.Typer(help="CM Library - Converters")

[docs] @app.command() def to_bigmat( resolution: int = typer.Argument(..., help="Target resolution of the bigmat"), region1: str = typer.Argument(..., help="Input region path"), input: str = typer.Argument(..., help="Input file path"), output: str = typer.Argument(..., help="Output file path"), region2: t.Optional[str] = typer.Option(None, help="Region in the second chromosome. If not set, it is equals `region1`"), balancing: str = typer.Option("KR", help="Use values using selected balancing method. Defaults to 'KR'"), no_full_matrix: bool = typer.Option(False, help="Do not output full matrix"), verify_result: bool = typer.Option(False, help="Verify output result"), dry_run: bool = typer.Option(False, "--dry-run", "-n", help="Simulate execution without making changes") ): """ Converts contact matrix data to a bigmat (memmap) format. """ if dry_run: logger.info(f"[DRY-RUN] Would convert {input} to bigmat format at {output}") raise typer.Exit() try: logger.info(f"Loading contact matrix data from {input}") df = loaders.load_cm_data( fpath=pathlib.Path(input), resolution=resolution, region1=region1, region2=region2, balancing=balancing, output_format=loaders.DataStructure.DF ) logger.info(f"Converting to bigmat at {output}") convert_to_memmap( df, pathlib.Path(output), output_full_matrix=not no_full_matrix, check_output=verify_result, ) logger.success("Successfully converted to bigmat.") except Exception as e: logger.error(f"Failed to convert to bigmat: {e}") raise typer.Exit(code=1) from e
[docs] @app.command() def to_coo( region1: str = typer.Argument(..., help="Region1"), resolution: int = typer.Argument(..., help="Resolution"), balancing: str = typer.Argument(..., help="Balancing method"), input: str = typer.Argument(..., help="Input file path"), output: str = typer.Argument(..., help="Output file path"), region2: t.Optional[str] = typer.Option(None, help="Region2"), res_to_one: bool = typer.Option(False, help="Normalize bin coordinates by the resolution"), output_delimiter: str = typer.Option("\t", help="The delimiter for the output text file"), to_mcoo: bool = typer.Option(False, help="Create a modified COO with raw and normalized counts"), gen_pseudo_weights: bool = typer.Option(False, help="Generate a corresponding .weights file"), dry_run: bool = typer.Option(False, "--dry-run", "-n", help="Simulate execution without making changes") ): """ Converts contact matrix data to a COO format. """ if dry_run: logger.info(f"[DRY-RUN] Would convert {input} to COO format at {output}") raise typer.Exit() try: logger.info(f"Converting {input} to COO format") convert_to_cm_coo( pathlib.Path(input), pathlib.Path(output), region1, resolution, balancing, region2=region2, res_to_one=res_to_one, to_mcoo=to_mcoo, gen_pseudo_weights=gen_pseudo_weights, output_delimiter=output_delimiter ) logger.success(f"Successfully converted to COO at {output}") except Exception as e: logger.error(f"Failed to convert to COO: {e}") raise typer.Exit(code=1) from e
[docs] @app.command() def to_intra_cm_coos( resolution: int = typer.Argument(..., help="Resolution"), balancing: str = typer.Argument(..., help="Balancing method"), input: str = typer.Argument(..., help="Input file path"), output: str = typer.Argument(..., help="Output directory path"), region2: t.Optional[str] = typer.Option(None, help="Region2"), res_to_one: bool = typer.Option(False, help="Normalize bin coordinates by the resolution"), output_delimiter: str = typer.Option("\t", help="The delimiter for the output text file"), to_mcoo: bool = typer.Option(False, help="Create a modified COO with raw and normalized counts"), gen_pseudo_weights: bool = typer.Option(False, help="Generate a corresponding .weights file"), dry_run: bool = typer.Option(False, "--dry-run", "-n", help="Simulate execution without making changes") ): """ Converts all intra-chromosomal contact matrices to COO format. """ if dry_run: logger.info(f"[DRY-RUN] Would convert intra-chromosomal matrices from {input} to COO in {output}") raise typer.Exit() try: logger.info(f"Converting all intra-chromosomal matrices from {input}") convert_all_intra_to_cm_coo( pathlib.Path(input), pathlib.Path(output), resolution, balancing, res_to_one=res_to_one, to_mcoo=to_mcoo, gen_pseudo_weights=gen_pseudo_weights, output_delimiter=output_delimiter ) logger.success(f"Successfully converted matrices and saved to {output}") except Exception as e: logger.error(f"Failed to convert matrices: {e}") raise typer.Exit(code=1) from e
[docs] @app.command() def hic2cool( input: str = typer.Argument(..., help="Input file path"), output: str = typer.Argument(..., help="Output file path"), resolution: int = typer.Option(0, "-r", "--resolution", help="Resolution"), nproc: int = typer.Option(1, "-p", "--nproc", help="Number of processors"), dry_run: bool = typer.Option(False, "--dry-run", "-n", help="Simulate execution without making changes") ): """ Converts .hic to .cool format. """ if dry_run: logger.info(f"[DRY-RUN] Would convert {input} to .cool format at {output} with resolution {resolution}") raise typer.Exit() if nproc != 1: warnings.warn("Not recommended to change the nproc value!", RuntimeWarning) logger.warning("Not recommended to change the nproc value!") try: from hic2cool import hic2cool_convert logger.info(f"Converting {input} to {output} using hic2cool") hic2cool_convert( input, output, resolution=resolution, nproc=nproc, ) logger.success(f"Successfully converted to {output}") except ImportError as e: logger.error("hic2cool is not installed. Please install it using 'pip install hic2cool'.") raise typer.Exit(code=1) from e except Exception as e: logger.error(f"Failed to convert using hic2cool: {e}") raise typer.Exit(code=1) from e
if __name__ == "__main__": app()