Source code for gunz_cm.io.gnz

# =============================================================================
# METADATA
# =============================================================================
"""
Reader and Writer for the .gzcm unified container format.

GZCM = GunZ Contact Matrix format
Supports GZCM v1, v2 (dense/tiled arrays), and v3 (compressed tiles).
"""
__author__ = "Yeremia Gunawan Adhisantoso"
__email__ = "adhisant@tnt.uni-hannover.de"
__license__ = "Clear BSD"

# =============================================================================
# STANDARD LIBRARY IMPORTS
# =============================================================================
import json
import warnings
import pathlib
import struct
import typing as t
import zlib
from dataclasses import dataclass, field, asdict
from enum import Enum

# =============================================================================
# THIRD-PARTY IMPORTS
# =============================================================================
import numpy as np
import scipy.sparse

# =============================================================================
# CONSTANTS
# =============================================================================
GZCM_MAGIC = b"GZCM"
ALIGNMENT = 4096


# =============================================================================
# ENUMS
# =============================================================================
class GZCMVersion(Enum):
    """Supported GZCM format versions."""

    V1 = 1
    V2 = 2
    V3 = 3


# =============================================================================
# DATA CLASSES
# =============================================================================
@dataclass
class ArrayInfo:
    """Metadata for a single array stored in GZCM container.

    Parameters
    ----------
    offset : int
        Byte offset in file where array data starts.
    shape : tuple
        Array dimensions.
    dtype : str
        NumPy dtype string (e.g., '<f4').
    order : str, default="C"
        Memory layout order ('C' or 'F').
    compressed : bool, default=False
        Whether this is a compressed tile.
    uncompressed_size : int, default=0
        Original size before compression.
    checksum : int, default=0
        CRC32 checksum for integrity verification.
    """

    offset: int
    shape: tuple
    dtype: str
    order: str = "C"
    compressed: bool = False
    uncompressed_size: int = 0
    checksum: int = 0


@dataclass
class CompressionMeta:
    """Compression configuration metadata."""

    codec: str
    tile_size: int
    version: str = "1.0"


@dataclass
class GZCMHeader:
    """GZCM file header containing version, metadata, and array info.

    Parameters
    ----------
    version : int, default=1
        GZCM format version.
    metadata : dict, default={}
        User-defined metadata (e.g., bin_size_bp, region).
    arrays : dict[str, ArrayInfo], default={}
        Mapping of array names to their metadata.
    """

    version: int = 1
    metadata: dict = field(default_factory=dict)
    arrays: dict[str, ArrayInfo] = field(default_factory=dict)

    def to_dict(self) -> dict:
        """Convert header to dictionary for JSON serialization."""
        return {
            "version": self.version,
            "metadata": self.metadata,
            "arrays": {k: asdict(v) for k, v in self.arrays.items()},
        }

    @classmethod
    def from_dict(cls, d: dict) -> "GZCMHeader":
        """Create header from dictionary."""
        arrays = {}
        for k, v in d.get("arrays", {}).items():
            arrays[k] = ArrayInfo(**v)
        return cls(
            version=d.get("version", 1),
            metadata=d.get("metadata", {}),
            arrays=arrays,
        )


# =============================================================================
# WRITER
# =============================================================================
[docs]class GZCMWriter:
    """Writer for GZCM container format.

    Supports writing GZCM v1, v2 (dense arrays) and GZCM v3 (compressed tiles).

    Parameters
    ----------
    fpath : str | pathlib.Path
        Output file path.
    overwrite : bool, default=False
        Overwrite existing file.
    version : int, default=1
        GZCM format version. Use 3 for compressed tiles.

    Examples
    --------
    >>> writer = GZCMWriter("output.gzcm", overwrite=True)
    >>> writer.add_array("matrix", data)
    >>> writer.write()
    """

    def __init__(
        self,
        fpath: str | pathlib.Path,
        overwrite: bool = False,
        version: int = 1,
    ):
        """Initialize GZCM writer.

        Parameters
        ----------
        fpath : str | pathlib.Path
            Output file path.
        overwrite : bool, default=False
            Overwrite existing file.
        version : int, default=1
            GZCM format version. Use 3 for compressed tiles.
        """
        self.fpath = pathlib.Path(fpath)
        if self.fpath.exists() and not overwrite:
            raise FileExistsError(f"File exists: {self.fpath}")

        #? --- Instance State ---
        self.version = version
        self.magic = GZCM_MAGIC
        self.metadata: dict = {}
        self.arrays_info: dict = {}
        self._pending_arrays: dict = {}
        self._streaming_arrays: dict = {}
        self._compressed_tiles: dict = {}

[docs]    def set_metadata(self, meta: dict) -> None:
        """Set user-defined metadata.

        Parameters
        ----------
        meta : dict
            Metadata dictionary to store in header.
        """
        self.metadata = meta

[docs]    def add_array(
        self,
        name: str,
        data: np.ndarray,
        dtype: str | np.dtype | None = None,
    ) -> None:
        """Register a complete array to be written.

        Data is not written until write() is called.

        Parameters
        ----------
        name : str
            Array name.
        data : np.ndarray
            Array data to write.
        dtype : str | np.dtype | None, optional
            Override dtype for storage.
        """
        self._pending_arrays[name] = (data, dtype or data.dtype)

[docs]    def init_streaming_array(
        self,
        name: str,
        shape: tuple[int, ...],
        dtype: str | np.dtype,
    ) -> None:
        """Reserve space for an array to be written incrementally.

        Parameters
        ----------
        name : str
            Array name.
        shape : tuple[int, ...]
            Array shape.
        dtype : str | np.dtype
            Data type.
        """
        self._streaming_arrays[name] = (shape, np.dtype(dtype))

[docs]    def add_compressed_tile(
        self,
        name: str,
        payload: bytes,
        uncompressed_size: int,
        checksum: int | None = None,
    ) -> None:
        """Add a pre-encoded compressed tile.

        Parameters
        ----------
        name : str
            Tile name (e.g., "tile_0").
        payload : bytes
            Encoded compressed data.
        uncompressed_size : int
            Original uncompressed size in bytes.
        checksum : int | None, optional
            CRC32 checksum for integrity verification.
        """
        if checksum is None:
            checksum = zlib.crc32(payload)

        self._compressed_tiles[name] = {
            "payload": payload,
            "uncompressed_size": uncompressed_size,
            "compressed_size": len(payload),
            "checksum": checksum,
        }

[docs]    def init_compressed_tile_stream(
        self,
        name: str,
        n_tiles: int,
        max_tile_size: int,
    ) -> None:
        """Reserve space for streaming tile writes.

        Parameters
        ----------
        name : str
            Base name for tiles.
        n_tiles : int
            Number of tiles to reserve.
        max_tile_size : int
            Maximum tile size in bytes.
        """
        total_size = n_tiles * max_tile_size
        self._streaming_arrays[f"_ct_{name}"] = (total_size,), np.uint8
        self.metadata.setdefault("_compressed_tiles", {})[name] = {
            "n_tiles": n_tiles,
            "max_tile_size": max_tile_size,
            "tile_size_bytes": max_tile_size,
        }

[docs]    def write(self) -> None:
        """Finalize the header and layout, and open the file for writing."""
        current_offset = ALIGNMENT

        #? --- Calculate array layouts ---
        all_arrays: dict = {}
        for name, (data, dtype) in self._pending_arrays.items():
            all_arrays[name] = (data.shape, dtype)
        for name, (shape, dtype) in self._streaming_arrays.items():
            all_arrays[name] = (shape, dtype)

        for name, (shape, dtype) in all_arrays.items():
            dt = np.dtype(dtype)
            nbytes = int(np.prod(shape) * dt.itemsize)

            self.arrays_info[name] = {
                "offset": current_offset,
                "shape": shape,
                "dtype": dt.str,
                "order": "C",
                "compressed": False,
            }

            next_offset = current_offset + nbytes
            padding = (ALIGNMENT - (next_offset % ALIGNMENT)) % ALIGNMENT
            current_offset = next_offset + padding

        #? --- Compressed tiles ---
        for name, tile_info in self._compressed_tiles.items():
            payload = tile_info["payload"]
            nbytes = len(payload)

            self.arrays_info[name] = {
                "offset": current_offset,
                "shape": (nbytes,),
                "dtype": np.dtype(np.uint8).str,
                "order": "C",
                "compressed": True,
                "uncompressed_size": tile_info["uncompressed_size"],
                "checksum": tile_info["checksum"],
            }

            next_offset = current_offset + nbytes
            padding = (ALIGNMENT - (next_offset % ALIGNMENT)) % ALIGNMENT
            current_offset = next_offset + padding

        #? --- Build and write header ---
        full_header = {
            "version": self.version,
            "metadata": self.metadata,
            "arrays": self.arrays_info,
        }

        header_json = json.dumps(full_header, sort_keys=True).encode("utf-8")
        header_len = len(header_json)

        max_header_size = ALIGNMENT - 8
        if header_len > max_header_size:
            n_pages = (header_len + 8 + ALIGNMENT - 1) // ALIGNMENT
            base_offset = n_pages * ALIGNMENT
            shift = base_offset - ALIGNMENT
            for info in self.arrays_info.values():
                info["offset"] += shift
            full_header["arrays"] = self.arrays_info
            header_json = json.dumps(full_header, sort_keys=True).encode("utf-8")
            header_len = len(header_json)
            data_start = base_offset
        else:
            data_start = ALIGNMENT

        with open(self.fpath, "wb") as f:
            f.write(self.magic)
            f.write(struct.pack("<I", header_len))
            f.write(header_json)
            current_pos = f.tell()
            f.write(b"\x00" * (data_start - current_pos))

            total_size = (
                current_offset if header_len <= max_header_size else current_offset + (data_start - ALIGNMENT)
            )
            f.truncate(total_size)

        #? --- Write pending arrays ---
        for name, (data, dtype) in self._pending_arrays.items():
            mm = self.get_array_writable(name)
            mm[:] = data.astype(dtype)
            mm.flush()

        for name, tile_info in self._compressed_tiles.items():
            mm = self.get_array_writable(name)
            mm[:] = np.frombuffer(tile_info["payload"], dtype=np.uint8)
            mm.flush()

[docs]    def get_array_writable(self, name: str) -> np.memmap:
        """Returns a writable memmap for a specific array in the container.

        Parameters
        ----------
        name : str
            Array name.

        Returns
        -------
        np.memmap
            Writable memory-mapped array.
        """
        if name not in self.arrays_info:
            raise KeyError(f"Array '{name}' not found. Call write() first to finalize layout.")

        info = self.arrays_info[name]
        return np.memmap(
            self.fpath,
            dtype=np.dtype(info["dtype"]),
            mode="r+",
            offset=info["offset"],
            shape=tuple(info["shape"]),
        )


# =============================================================================
# CHUNKED WRITER
# =============================================================================

class GZCMChunkedWriter:
    """Writer for chunked GZCM v2 format.

    Enables streaming writes where chunks can be written incrementally
    without holding the entire matrix in memory.

    Parameters
    ----------
    fpath : str | pathlib.Path
        Output file path.
    n_rows : int
        Number of matrix rows.
    n_cols : int
        Number of matrix columns.
    chunk_size : int, default=256
        Size of each chunk (rows/cols per chunk).
    dtype : str | np.dtype, default="float32"
        Data type for matrix storage.
    overwrite : bool, default=True
        Overwrite existing file.

    Examples
    --------
    >>> writer = GZCMChunkedWriter("output.gzcm", n_rows=1024, n_cols=1024)
    >>> for cr in range(writer.n_chunks_row):
    ...     for cc in range(writer.n_chunks_col):
    ...         writer.write_chunk(np.random.rand(256, 256), cr, cc)
    >>> writer.finalize()
    """

    def __init__(
        self,
        fpath: str | pathlib.Path,
        n_rows: int,
        n_cols: int,
        chunk_size: int = 256,
        dtype: str | np.dtype = "float32",
        overwrite: bool = True,
    ):
        self.fpath = pathlib.Path(fpath)
        self.n_rows = n_rows
        self.n_cols = n_cols
        self.chunk_size = chunk_size
        self.dtype = np.dtype(dtype)

        self.n_chunks_row = (n_rows + chunk_size - 1) // chunk_size
        self.n_chunks_col = (n_cols + chunk_size - 1) // chunk_size
        self.n_chunks = self.n_chunks_row * self.n_chunks_col

        self._written_chunks: set[tuple[int, int]] = set()
        self._is_finalized = False

        self._writer = GZCMWriter(
            self.fpath, overwrite=overwrite, version=2
        )

        padded_shape = (
            self.n_chunks_row,
            self.n_chunks_col,
            chunk_size,
            chunk_size,
        )
        self._writer.init_streaming_array("matrix", padded_shape, self.dtype)
        self._writer.set_metadata({
            "original_shape": (n_rows, n_cols),
            "n_rows": n_rows,
            "n_cols": n_cols,
            "chunk_size": chunk_size,
            "n_chunks_row": self.n_chunks_row,
            "n_chunks_col": self.n_chunks_col,
        })
        self._writer.write()

        self._mm = self._writer.get_array_writable("matrix")

    @property
    def is_complete(self) -> bool:
        """Check if all chunks have been written."""
        return len(self._written_chunks) == self.n_chunks

    @property
    def n_written(self) -> int:
        """Number of chunks written."""
        return len(self._written_chunks)

    def write_chunk(self, data: np.ndarray, chunk_r: int, chunk_c: int) -> None:
        """Write a chunk to the matrix.

        Parameters
        ----------
        data : np.ndarray
            Chunk data with shape (chunk_size, chunk_size).
        chunk_r : int
            Chunk row index.
        chunk_c : int
            Chunk column index.
        """
        if self._is_finalized:
            raise RuntimeError("Cannot write after finalize()")

        if chunk_r < 0 or chunk_r >= self.n_chunks_row:
            raise ValueError(f"chunk_r out of range: {chunk_r}")
        if chunk_c < 0 or chunk_c >= self.n_chunks_col:
            raise ValueError(f"chunk_c out of range: {chunk_c}")

        r_start = chunk_r * self.chunk_size
        c_start = chunk_c * self.chunk_size
        r_end = min(r_start + self.chunk_size, self.n_rows)
        c_end = min(c_start + self.chunk_size, self.n_cols)

        self._mm[chunk_r, chunk_c, :r_end - r_start, :c_end - c_start] = data[:r_end - r_start, :c_end - c_start]
        self._written_chunks.add((chunk_r, chunk_c))

    def finalize(self) -> None:
        """Finalize the file after all chunks are written."""
        if not self.is_complete:
            raise RuntimeError(
                f"Not all chunks written: {self.n_written}/{self.n_chunks}"
            )

        self._mm.flush()
        self._is_finalized = True


# =============================================================================
# READER
# =============================================================================
[docs]class GZCMReader:
    """Reader for GZCM container format.

    Supports reading GZCM v1, v2, and v3 files.

    Parameters
    ----------
    fpath : str | pathlib.Path
        Input file path.

    Examples
    --------
    >>> reader = GZCMReader("data.gzcm")
    >>> version = reader.version
    >>> metadata = reader.get_metadata()
    >>> matrix = reader.get_array("matrix")
    """

    def __init__(self, fpath: str | pathlib.Path):
        """Initialize GZCM reader.

        Parameters
        ----------
        fpath : str | pathlib.Path
            Input file path.
        """
        self.fpath = pathlib.Path(fpath)
        if not self.fpath.exists():
            raise FileNotFoundError(f"File not found: {self.fpath}")

        # Per-instance payload cache keyed by (name, index). Avoids the
        # bytes(memmap) full copy on repeat access within the lifetime of
        # this reader. Scoped to the instance so distinct GZCM files do
        # not cross-contaminate.
        self._payload_cache: dict[tuple[str, int], bytes] = {}

        self._parse_header()

    def _parse_header(self) -> None:
        """Parse GZCM header from file."""
        with open(self.fpath, "rb") as f:
            magic = f.read(4)
            if magic != GZCM_MAGIC:
                raise ValueError(f"Invalid GZCM file: magic={magic!r}")

            len_bytes = f.read(4)
            header_len = struct.unpack("<I", len_bytes)[0]

            json_bytes = f.read(header_len)
            self.header = json.loads(json_bytes.decode("utf-8"))

        self.version = self.header.get("version", 1)
        self.metadata = self.header.get("metadata", {})
        self.arrays_info = self.header.get("arrays", {})

[docs]    def get_array(self, name: str, mode: str = "r") -> np.memmap:
        """Get a memory-mapped array from the container.

        Parameters
        ----------
        name : str
            Array name.
        mode : str, default="r"
            Memory-map mode ('r' for read-only, 'r+' for read-write).

        Returns
        -------
        np.memmap
            Memory-mapped array.
        """
        if name not in self.arrays_info:
            raise KeyError(f"Array '{name}' not found in GZCM file")

        info = self.arrays_info[name]
        return np.memmap(
            self.fpath,
            dtype=np.dtype(info["dtype"]),
            mode=mode,
            offset=info["offset"],
            shape=tuple(info["shape"]),
            order=info.get("order", "C"),
        )

[docs]    def get_compressed_tile(
        self,
        name: str,
        index: int = 0,
        return_shape: bool = False,
    ) -> bytes | tuple[bytes, tuple[int, int]]:
        """Read a compressed tile without decoding.

        v3 GZCM tiles carry an 8-byte (rows, cols) int32 header that the
        zstd/bsc decoders consume. Pass ``return_shape=True`` to also
        recover the tile shape from that header alongside the raw
        payload bytes.

        Parameters
        ----------
        name : str
            Tile name (e.g., "tile_0").
        index : int, default=0
            Tile index for tile streams.
        return_shape : bool, default=False
            When ``True``, return ``(payload, (rows, cols))``. When
            ``False`` (default), return only the raw ``payload`` bytes
            for backward compatibility with existing callers.

        Returns
        -------
        bytes, or tuple of (bytes, tuple of (int, int))
            Encoded compressed data; optionally paired with the
            (rows, cols) shape decoded from the 8-byte header.

        Raises
        ------
        ValueError
            If the payload is shorter than 8 bytes when ``return_shape``
            is requested, or if the CRC32 checksum does not match.
        """
        if name not in self.arrays_info:
            raise KeyError(f"Tile '{name}' not found in GZCM file")

        info = self.arrays_info[name]
        if not info.get("compressed", False):
            raise ValueError(f"Array '{name}' is not a compressed tile")

        offset = info["offset"]
        shape = tuple(info["shape"])

        cache_key = (name, index)
        cached = self._payload_cache.get(cache_key)
        if cached is not None:
            payload = cached
        else:
            mm = np.memmap(self.fpath, dtype=np.uint8, mode="r", offset=offset, shape=shape)
            payload = bytes(mm)
            self._payload_cache[cache_key] = payload

        if info.get("checksum"):
            actual_crc = zlib.crc32(payload)
            if actual_crc != info["checksum"]:
                raise ValueError(
                    f"CRC32 mismatch for tile '{name}': "
                    f"expected {info['checksum']}, got {actual_crc}"
                )

        if not return_shape:
            return payload

        if len(payload) < 8:
            raise ValueError(
                f"Tile '{name}' payload is too short to contain a shape "
                f"header ({len(payload)} bytes; need at least 8)."
            )
        rows = int(np.frombuffer(payload[:4], dtype=np.int32)[0])
        cols = int(np.frombuffer(payload[4:8], dtype=np.int32)[0])
        return payload, (rows, cols)

[docs]    def decode_compressed_tile(self, payload: bytes) -> np.ndarray:
        """Decode a compressed tile using CMC.

        Parameters
        ----------
        payload : bytes
            Encoded compressed data.

        Returns
        -------
        np.ndarray
            Decoded contact matrix tile.
        """
        from ..compressions import CmcDecoder

        decoder = CmcDecoder()
        return decoder.decode_tile(payload)

[docs]    def get_metadata(self) -> dict:
        """Get user-defined metadata from header.

        Returns
        -------
        dict
            Metadata dictionary.
        """
        return self.metadata

[docs]    def keys(self) -> list[str]:
        """Get names of all arrays in the container.

        Returns
        -------
        list[str]
            Array names.
        """
        return list(self.arrays_info.keys())


# =============================================================================
# GZCM V4 READER (Phase 2 / v2.15.0)
# =============================================================================
class GzcmV4Region:
    """One region descriptor within a GZCM v4 file.

    Mirrors the contract defined in ``specs/v4_api_skeleton.py``. A region
    is one chromosome-pair (e.g. ``chr1:chr1``) with its own layout
    (``dense`` / ``sparse-tiled`` / ``sparse-tiled-intra`` / ``sparse-roaring``)
    and its own codec selection (per-tile for tiled, single for the others).

    Parameters
    ----------
    descriptor : dict
        The ``metadata["regions"][i]`` entry parsed from the v4 header.
    """

    def __init__(self, descriptor: dict) -> None:
        """Initialize from a v4 region descriptor dict."""
        if not isinstance(descriptor, dict):
            raise TypeError(f"GzcmV4Region descriptor must be a dict, got {type(descriptor).__name__}")
        self._descriptor = descriptor
        self.id = int(descriptor.get("id", -1))
        self.name = str(descriptor.get("name", ""))
        self.layout = str(descriptor.get("layout", ""))
        self.n_tiles = int(descriptor.get("n_tiles", 0))
        self.tile_size = int(descriptor.get("tile_size", 0))
        codec_per_tile = descriptor.get("codec_per_tile") or []
        if not isinstance(codec_per_tile, list):
            raise ValueError(
                f"region {self.id}: codec_per_tile must be a list, got {type(codec_per_tile).__name__}"
            )
        self.codec_per_tile = [str(c) for c in codec_per_tile]
        self.delta_encode = bool(descriptor.get("delta_encode", False))
        self.bit_pack = bool(descriptor.get("bit_pack", False))
        tile_bboxes = descriptor.get("tile_bboxes") or []
        self.tile_bboxes = list(tile_bboxes)

    def __repr__(self) -> str:
        """Concise repr for debugging."""
        return (
            f"GzcmV4Region(id={self.id}, name={self.name!r}, "
            f"layout={self.layout!r}, n_tiles={self.n_tiles})"
        )


class GzcmV4Reader(GZCMReader):
    """Reader for GZCM v4 files.

    Subclasses ``GZCMReader`` so it inherits the magic / length / JSON
    header parsing. After the header is parsed, this reader validates
    the v4-specific fields (``metadata["regions"]``, ``metadata["version_gzcm"]``,
    ``arrays["weights_*"]``) and exposes them via region objects and
    named arrays.

    Tile payload decoding (the per-tile compressed bytes) is deferred to
    later phases; this Phase 2 stub validates the wire format and
    region descriptors only.

    Parameters
    ----------
    fpath : str | pathlib.Path
        Input v4 ``.gzcm`` file path.

    Examples
    --------
    >>> reader = GzcmV4Reader("chr1_v4.gzcm")
    >>> reader.version
    4
    >>> reader.regions[0].layout
    'sparse-tiled-intra'
    >>> reader.get_array("weights_KR").shape
    (512,)
    """

    REQUIRED_METADATA_KEYS = ("regions",)

    def __init__(self, fpath: str | pathlib.Path) -> None:
        """Initialize GZCM v4 reader; validates the v4 header shape."""
        super().__init__(fpath)

        if self.version != 4:
            from ..exceptions import GzcmV4FormatError
            raise GzcmV4FormatError(
                f"Expected GZCM version 4, got {self.version} from {self.fpath}"
            )

        for key in self.REQUIRED_METADATA_KEYS:
            if key not in self.metadata:
                from ..exceptions import GzcmV4FormatError
                raise GzcmV4FormatError(
                    f"GZCM v4 header missing required metadata key {key!r} in {self.fpath}"
                )

        version_gzcm = self.metadata.get("version_gzcm")
        if version_gzcm not in (4, "4"):
            from ..exceptions import GzcmV4FormatError
            raise GzcmV4FormatError(
                f"GZCM v4 metadata.version_gzcm must be 4, got {version_gzcm!r} in {self.fpath}"
            )

        regions_raw = self.metadata.get("regions") or []
        if not isinstance(regions_raw, list) or not regions_raw:
            from ..exceptions import GzcmV4FormatError
            raise GzcmV4FormatError(
                f"GZCM v4 metadata.regions must be a non-empty list, got {type(regions_raw).__name__}"
            )

        self.regions: list[GzcmV4Region] = [GzcmV4Region(r) for r in regions_raw]
        self._region_by_name: dict[str, GzcmV4Region] = {r.name: r for r in self.regions}

    def get_region(self, region_id: int) -> GzcmV4Region:
        """Return the region descriptor for ``region_id``.

        Parameters
        ----------
        region_id : int
            Region index (0-based).

        Returns
        -------
        GzcmV4Region

        Raises
        ------
        IndexError
            If ``region_id`` is out of range.
        """
        if not 0 <= region_id < len(self.regions):
            raise IndexError(
                f"region_id {region_id} out of range [0, {len(self.regions)})"
            )
        return self.regions[region_id]

    def get_region_by_name(self, name: str) -> GzcmV4Region:
        """Return the region with the given ``name`` (e.g. ``"chr1:chr1"``).

        Raises
        ------
        KeyError
            If no region with the given name exists.
        """
        if name not in self._region_by_name:
            raise KeyError(
                f"No region with name {name!r} in v4 file {self.fpath}; "
                f"known names: {sorted(self._region_by_name)}"
            )
        return self._region_by_name[name]

    def get_tile_payload(self, region_id: int, tile_index: int) -> tuple[bytes, str]:
        """Return the (payload_bytes, codec_name) for the tile at ``region_id``/``tile_index``.

        Reads from the concatenated ``arrays["tiles"]`` byte array. The
        per-tile offset table is currently a simple sequential slice:
        tile ``i`` spans ``[i * per_tile_bytes, (i + 1) * per_tile_bytes)``
        in the tiles array. A future PR can replace this with a per-region
        tile-bbox / offset table.

        Raises
        ------
        IndexError
            If ``region_id`` or ``tile_index`` is out of range.
        KeyError
            If the v4 file has no ``tiles`` array.
        """
        region = self.get_region(region_id)
        if not 0 <= tile_index < region.n_tiles:
            raise IndexError(
                f"tile_index {tile_index} out of range for region "
                f"{region.name} (n_tiles={region.n_tiles})"
            )
        if not region.codec_per_tile:
            raise ValueError(
                f"region {region.name} has empty codec_per_tile list"
            )
        codec_name = region.codec_per_tile[tile_index]

        arrays_info = getattr(self, "arrays_info", {}) or {}
        tiles_info = arrays_info.get("tiles")
        if tiles_info is None:
            raise KeyError(
                f"GZCM v4 file {self.fpath} has no 'tiles' array; "
                f"writer may pre-date the v4 tile-array layout"
            )
        tile_bytes = np.memmap(
            self.fpath,
            dtype=np.dtype(tiles_info["dtype"]),
            mode="r",
            offset=tiles_info["offset"],
            shape=tuple(tiles_info["shape"]),
        )
        n_total_tiles = sum(r.n_tiles for r in self.regions)
        offset_start, offset_end = self._compute_tile_byte_range(region_id, tile_index)
        payload = bytes(tile_bytes[offset_start:offset_end])
        return payload, codec_name

    def _compute_tile_byte_range(self, region_id: int, tile_index: int) -> tuple[int, int]:
        """Return the (start_byte, end_byte) offsets within ``arrays['tiles']``.

        The layout is sequential: region 0's tiles come first (in order),
        then region 1's tiles, etc. Each tile spans a fixed byte range
        inferred from the total tile-array size divided by the total
        number of tiles across all regions.
        """
        arrays_info = getattr(self, "arrays_info", {}) or {}
        tiles_info = arrays_info["tiles"]
        total_bytes = int(tiles_info["shape"][0])
        n_total_tiles = sum(r.n_tiles for r in self.regions)
        if n_total_tiles == 0:
            raise ValueError("v4 file has zero total tiles; cannot compute byte range")
        per_tile = total_bytes // n_total_tiles
        cumulative = 0
        for r in self.regions:
            if r.id < region_id:
                cumulative += r.n_tiles
            else:
                cumulative += tile_index
                return cumulative * per_tile, (cumulative + 1) * per_tile
        raise IndexError(f"region_id {region_id} not found")


# =============================================================================
# GZCM V4 WRITER (Phase 3 / v2.16.0)
# =============================================================================
class GzcmV4Writer:
    """Writer for GZCM v4 container files.

    Emits a header JSON containing ``version=4``, ``metadata.version_gzcm=4``,
    and ``metadata.regions`` (the per-region descriptors that
    :class:`GzcmV4Reader` consumes). Each region carries its own
    ``codec_per_tile`` list, layout, and tile-bbox metadata. Per-tile
    compressed payloads are persisted as a single named array; weight
    arrays are stored under ``arrays["weights_*"]``.

    The writer is intentionally minimal — it produces a v4 wire format
    compatible with :class:`GzcmV4Reader`. The full codec-picker /
    delta+bitpack pipeline lives in ``gunz_cm.compressions``; this
    class only orchestrates serialization.

    Parameters
    ----------
    fpath : str | pathlib.Path
        Output file path.
    overwrite : bool, default=False
        Overwrite existing file.
    version : int, default=4
        GZCM format version. Must be 4.

    Examples
    --------
    >>> from gunz_cm.io.gnz import GzcmV4Writer
    >>> writer = GzcmV4Writer("out_v4.gzcm", overwrite=True)
    >>> writer.add_region(
    ...     region_id=0,
    ...     name="chr1:chr1",
    ...     layout="sparse-tiled-intra",
    ...     tile_size=256,
    ...     codec_per_tile=["zstd", "zstd", "zstd"],
    ...     delta_encode=False,
    ...     bit_pack=False,
    ...     tile_bboxes=[{"tile_name": "tile_0", "row_start": 0, "col_start": 0,
    ...                   "row_end": 256, "col_end": 256, "diagonal": 0}],
    ... )
    >>> writer.add_tile_payload(b"\\x00" * 1024)
    >>> writer.add_weights("weights_KR", np.ones(512, dtype=np.float32))
    >>> writer.write()
    """

    def __init__(
        self,
        fpath: str | pathlib.Path,
        overwrite: bool = False,
        version: int = 4,
    ) -> None:
        """Initialize the v4 writer."""
        if version != 4:
            raise ValueError(f"GzcmV4Writer requires version=4, got {version}")
        self.fpath = pathlib.Path(fpath)
        if self.fpath.exists() and not overwrite:
            raise FileExistsError(f"File exists: {self.fpath}")
        self.version = version
        self._regions: list[dict] = []
        self._pending_regions: list[dict] = []
        self._tile_payloads: list[bytes] = []
        self._weights: dict[str, np.ndarray] = {}
        self._original_shape: tuple[int, int] | None = None
        self._metadata_extra: dict = {}

    def add_region(
        self,
        region_id: int,
        name: str,
        layout: str,
        tile_size: int,
        codec_per_tile: list[str],
        delta_encode: bool = False,
        bit_pack: bool = False,
        tile_bboxes: list[dict] | None = None,
    ) -> None:
        """Register one region descriptor.

        The number of tiles for this region is the count of
        :meth:`add_tile_payload` calls AFTER this ``add_region`` call,
        ending at the next ``add_region`` call (or at ``write()`` for
        the last region). Tiles added before the first ``add_region``
        belong to no region — they are silently ignored.
        """
        self._pending_regions.append(
            {
                "id": int(region_id),
                "name": str(name),
                "layout": str(layout),
                "tile_size": int(tile_size),
                "codec_per_tile": [str(c) for c in codec_per_tile],
                "delta_encode": bool(delta_encode),
                "bit_pack": bool(bit_pack),
                "tile_bboxes": list(tile_bboxes) if tile_bboxes is not None else [],
                "_tile_start": len(self._tile_payloads),
            }
        )

    def _finalize_regions(self) -> None:
        """Convert pending regions to final descriptors with correct n_tiles.

        Called by ``write()``. For each pending region, ``n_tiles`` is
        the number of tiles added between this region's registration
        and the next (or the end).
        """
        for idx, pending in enumerate(self._pending_regions):
            start = pending["_tile_start"]
            end = (
                self._pending_regions[idx + 1]["_tile_start"]
                if idx + 1 < len(self._pending_regions)
                else len(self._tile_payloads)
            )
            descriptor = {k: v for k, v in pending.items() if not k.startswith("_")}
            descriptor["n_tiles"] = int(end - start)
            self._regions.append(descriptor)

    def add_tile_payload(self, payload: bytes) -> int:
        """Append a per-tile compressed payload. Returns the tile index."""
        idx = len(self._tile_payloads)
        self._tile_payloads.append(bytes(payload))
        return idx

    def add_weights(self, name: str, array: np.ndarray) -> None:
        """Register a named weight array (e.g. ``"weights_KR"``)."""
        self._weights[str(name)] = np.asarray(array)

    def set_original_shape(self, n_rows: int, n_cols: int) -> None:
        """Record the matrix's original (pre-padding) shape."""
        self._original_shape = (int(n_rows), int(n_cols))

    def add_metadata(self, key: str, value) -> None:
        """Set an arbitrary metadata key (forward-compat hook)."""
        self._metadata_extra[str(key)] = value

    def write(self) -> None:
        """Serialize header + payload + weight arrays to ``self.fpath``.

        The on-disk layout matches the v4 reader's expectation:

        * 4-byte magic ``GZCM``
        * 4-byte LE uint32 ``header_len``
        * JSON header with ``version=4``, ``metadata.version_gzcm=4``,
          ``metadata.regions`` (list), ``metadata.original_shape``,
          ``arrays`` (with offset/dtype/shape for each named array)
        * 4 KiB-aligned payload blobs, one per named array
        """
        if not self._pending_regions:
            raise ValueError(
                "GzcmV4Writer.write() requires at least one region; "
                "call add_region(...) before write()."
            )
        self._finalize_regions()

        # Build per-tile concatenated payload array.
        tile_bytes = b"".join(self._tile_payloads)
        tile_arr = np.frombuffer(tile_bytes, dtype=np.uint8) if tile_bytes else np.zeros(0, dtype=np.uint8)

        arrays_info: dict = {}
        current_offset = ALIGNMENT
        pending_payloads: list[tuple[str, np.ndarray, dict]] = []

        tile_layout = {
            "offset": current_offset,
            "shape": tuple(tile_arr.shape),
            "dtype": tile_arr.dtype.str,
            "order": "C",
            "compressed": True,
        }
        arrays_info["tiles"] = tile_layout
        pending_payloads.append(("tiles", tile_arr, tile_layout))
        current_offset = _aligned_next(current_offset, tile_arr.nbytes)

        for name, arr in self._weights.items():
            arr_c = np.ascontiguousarray(arr)
            info = {
                "offset": current_offset,
                "shape": tuple(arr_c.shape),
                "dtype": arr_c.dtype.str,
                "order": "C",
                "compressed": False,
            }
            arrays_info[f"weights_{name}" if not name.startswith("weights_") else name] = info
            pending_payloads.append((f"weights_{name}" if not name.startswith("weights_") else name, arr_c, info))
            current_offset = _aligned_next(current_offset, arr_c.nbytes)

        metadata: dict = {
            "version_gzcm": 4,
            "regions": list(self._regions),
        }
        if self._original_shape is not None:
            metadata["original_shape"] = list(self._original_shape)
        for k, v in self._metadata_extra.items():
            metadata.setdefault(k, v)

        header_dict = {
            "version": self.version,
            "metadata": metadata,
            "arrays": arrays_info,
        }
        header_json = json.dumps(header_dict, sort_keys=True).encode("utf-8")
        header_len = len(header_json)

        max_header_size = ALIGNMENT - 8
        if header_len > max_header_size:
            n_pages = (header_len + 8 + ALIGNMENT - 1) // ALIGNMENT
            base_offset = n_pages * ALIGNMENT
            shift = base_offset - ALIGNMENT
            for info in arrays_info.values():
                info["offset"] += shift
            header_dict["arrays"] = arrays_info
            header_json = json.dumps(header_dict, sort_keys=True).encode("utf-8")
            header_len = len(header_json)
            data_start = base_offset
        else:
            data_start = ALIGNMENT

        with open(self.fpath, "wb") as f:
            f.write(GZCM_MAGIC)
            f.write(struct.pack("<I", header_len))
            f.write(header_json)
            current_pos = f.tell()
            if data_start > current_pos:
                f.write(b"\x00" * (data_start - current_pos))
            f.truncate(data_start)

        for _name, arr, info in pending_payloads:
            mm = np.memmap(
                self.fpath,
                dtype=arr.dtype,
                mode="r+",
                offset=info["offset"],
                shape=tuple(arr.shape),
            )
            mm[:] = arr
            mm.flush()


def _aligned_next(current_offset: int, nbytes: int) -> int:
    """Return the next 4 KiB-aligned offset after ``current_offset + nbytes``."""
    next_offset = current_offset + int(nbytes)
    padding = (ALIGNMENT - (next_offset % ALIGNMENT)) % ALIGNMENT
    return next_offset + padding


# =============================================================================
# CHUNKED READER
# =============================================================================

class GZCMChunkedReader:
    """Chunked reader for GZCM v2 tiled format.

    Provides memory-efficient chunk-based access to large matrices
    without loading the entire file into memory.

    Parameters
    ----------
    fpath : str | pathlib.Path
        Input GZCM file path.
    chunk_size : int, default=1024
        Size of chunks (rows/cols per chunk).
    buffer_size : int, default=4
        Number of pre-allocated buffers for zero-copy reads.

    Examples
    --------
    >>> reader = GZCMChunkedReader("data.gzcm", chunk_size=1024)
    >>> for chunk, r, c in reader.iter_chunks():
    ...     process(chunk)
    """

    def __init__(
        self,
        fpath: str | pathlib.Path,
        chunk_size: int = 1024,
        buffer_size: int = 4,
    ):
        self.fpath = pathlib.Path(fpath)
        self._reader = GZCMReader(self.fpath)
        self._chunk_size = chunk_size

        self.n_rows = self._reader.metadata.get("original_shape", [0, 0])[0]
        self.n_cols = self._reader.metadata.get("original_shape", [0, 0])[1]

        if self.n_rows == 0 or self.n_cols == 0:
            info = self._reader.arrays_info.get("matrix", {})
            shape = info.get("shape", (0, 0))
            if len(shape) == 4:
                n_blocks_row, n_blocks_col, bs, _ = shape
                self.n_rows = n_blocks_row * bs
                self.n_cols = n_blocks_col * bs
            else:
                self.n_rows, self.n_cols = shape[0], shape[1]

        n_chunks_row = (self.n_rows + chunk_size - 1) // chunk_size
        n_chunks_col = (self.n_cols + chunk_size - 1) // chunk_size
        self.n_chunks_row = n_chunks_row
        self.n_chunks_col = n_chunks_col

        matrix_info = self._reader.arrays_info.get("matrix", {})
        self._dtype = np.dtype(matrix_info.get("dtype", "float32"))

        self._buffers = [
            np.empty(chunk_size * chunk_size, dtype=self._dtype)
            for _ in range(buffer_size)
        ]
        self._buffer_idx = 0

    @property
    def shape(self) -> tuple[int, int]:
        """Matrix shape."""
        return self.n_rows, self.n_cols

    @property
    def chunk_size(self) -> int:
        """Chunk size."""
        return self._chunk_size

    @property
    def metadata(self) -> dict:
        """Metadata from underlying reader."""
        return self._reader.metadata

    def _get_chunk_memmap(self, row_start: int, row_end: int, col_start: int, col_end: int) -> np.memmap:
        """Get memory-mapped view of a chunk region."""
        info = self._reader.arrays_info.get("matrix", {})
        shape = info.get("shape", ())
        dtype = np.dtype(info.get("dtype", "float32"))
        offset = info.get("offset", 0)

        if len(shape) == 4:
            _, _, block_size, _ = shape
            full_shape = (self.n_rows, self.n_cols)

            mm = np.memmap(
                self.fpath,
                dtype=dtype,
                mode="r",
                offset=offset,
                shape=full_shape,
            )
            return mm[row_start:row_end, col_start:col_end]
        else:
            return np.memmap(
                self.fpath,
                dtype=dtype,
                mode="r",
                offset=offset,
                shape=(self.n_rows, self.n_cols),
            )[row_start:row_end, col_start:col_end]

    def get_chunk(
        self,
        chunk_r: int,
        chunk_c: int,
        out: np.ndarray | None = None,
    ) -> np.ndarray:
        """Get chunk data, optionally writing to provided buffer.

        Parameters
        ----------
        chunk_r : int
            Chunk row index.
        chunk_c : int
            Chunk column index.
        out : np.ndarray, optional
            Output buffer. If None, uses pre-allocated buffer.

        Returns
        -------
        np.ndarray
            Chunk data.
        """
        row_start = chunk_r * self._chunk_size
        row_end = min(row_start + self._chunk_size, self.n_rows)
        col_start = chunk_c * self._chunk_size
        col_end = min(col_start + self._chunk_size, self.n_cols)

        view = self._get_chunk_memmap(row_start, row_end, col_start, col_end)

        if out is None:
            out = self._get_buffer()

        out = out.reshape(view.shape)
        np.copyto(out, view)
        return out

    def _get_buffer(self) -> np.ndarray:
        """Get next pre-allocated buffer (round-robin)."""
        buf = self._buffers[self._buffer_idx]
        self._buffer_idx = (self._buffer_idx + 1) % len(self._buffers)
        return buf

    def iter_chunks(self) -> t.Iterator[tuple[np.ndarray, int, int]]:
        """Iterate over all chunks (row-major order).

        Yields
        ------
        chunk : np.ndarray
            Chunk data.
        chunk_r : int
            Chunk row index.
        chunk_c : int
            Chunk column index.
        """
        for chunk_r in range(self.n_chunks_row):
            for chunk_c in range(self.n_chunks_col):
                yield self.get_chunk(chunk_r, chunk_c), chunk_r, chunk_c

    def close(self) -> None:
        """Close the reader (no-op for memory-mapped files)."""
        pass


# =============================================================================
# STREAMING NORMALIZATION
# =============================================================================

def _compute_row_sums_gzcm(fpath: pathlib.Path, array_name: str) -> np.ndarray:
    """Compute row sums by streaming through GZCM array."""
    reader = GZCMReader(fpath)
    matrix = reader.get_array(array_name)
    n = matrix.shape[0]
    row_sums = np.zeros(n, dtype=np.float64)
    chunk_size = 1024

    for start in range(0, n, chunk_size):
        end = min(start + chunk_size, n)
        row_sums[start:end] += (matrix[start:end, :] ** 2).sum(axis=1)

    return row_sums


def _compute_col_sums_gzcm(fpath: pathlib.Path, array_name: str) -> np.ndarray:
    """Compute column sums by streaming through GZCM array."""
    reader = GZCMReader(fpath)
    matrix = reader.get_array(array_name)
    n = matrix.shape[1]
    col_sums = np.zeros(n, dtype=np.float64)
    chunk_size = 1024

    for start in range(0, n, chunk_size):
        end = min(start + chunk_size, n)
        col_sums[start:end] += (matrix[:, start:end] ** 2).sum(axis=0)

    return col_sums


def kr_normalize_gzcm(
    input_path: pathlib.Path,
    output_path: pathlib.Path,
    array_name: str = "matrix",
    overwrite: bool = True,
) -> np.ndarray:
    """
    Knight-Ruiz normalization on GZCM array.

    Parameters
    ----------
    input_path : pathlib.Path
        Input GZCM file path.
    output_path : pathlib.Path
        Output GZCM file path.
    array_name : str, default="matrix"
        Array name to normalize.
    overwrite : bool, default=True
        Overwrite existing output.

    Returns
    -------
    np.ndarray
        Diagonal scaling weights.
    """
    reader_in = GZCMReader(input_path)
    writer_out = GZCMWriter(output_path, overwrite=overwrite)

    matrix = reader_in.get_array(array_name)
    n = matrix.shape[0]

    writer_out.set_metadata(reader_in.metadata)
    writer_out.init_streaming_array(array_name, (n, n), dtype=matrix.dtype)
    writer_out.write()

    row_sums = _compute_row_sums_gzcm(input_path, array_name)

    valid_sums = row_sums > 0
    weights = np.ones(n, dtype=np.float64)
    if np.any(valid_sums):
        weights[valid_sums] = (n / row_sums[valid_sums]) ** 0.25

    matrix_out = writer_out.get_array_writable(array_name)
    D = np.diag(weights)
    matrix_out[:] = (D @ matrix @ D).astype(matrix.dtype)
    matrix_out.flush()

    return weights


def ice_normalize_gzcm(
    input_path: pathlib.Path,
    output_path: pathlib.Path,
    array_name: str = "matrix",
    max_iter: int = 200,
    tolerance: float = 1e-5,
    overwrite: bool = True,
) -> np.ndarray:
    """
    Iterative Correction (ICE) normalization on GZCM array.

    Parameters
    ----------
    input_path : pathlib.Path
        Input GZCM file path.
    output_path : pathlib.Path
        Output GZCM file path.
    array_name : str, default="matrix"
        Array name to normalize.
    max_iter : int, default=200
        Maximum iterations.
    tolerance : float, default=1e-5
        Convergence tolerance.
    overwrite : bool, default=True
        Overwrite existing output.

    Returns
    -------
    np.ndarray
        Diagonal scaling weights.
    """
    reader_in = GZCMReader(input_path)
    writer_out = GZCMWriter(output_path, overwrite=overwrite)

    matrix = reader_in.get_array(array_name)
    n = matrix.shape[0]

    writer_out.set_metadata(reader_in.metadata)
    writer_out.init_streaming_array(array_name, (n, n), dtype=matrix.dtype)
    writer_out.write()

    row_weights = np.ones(n, dtype=np.float64)
    col_weights = np.ones(n, dtype=np.float64)

    for _ in range(max_iter):
        row_sums = _compute_row_sums_gzcm(input_path, array_name)
        col_sums = _compute_col_sums_gzcm(input_path, array_name)

        valid_rows = row_sums > 0
        valid_cols = col_sums > 0

        if not np.any(valid_rows) or not np.any(valid_cols):
            break

        row_corrections = np.ones(n, dtype=np.float64)
        row_corrections[valid_rows] = np.sqrt(row_sums[valid_rows] / n)
        col_corrections = np.ones(n, dtype=np.float64)
        col_corrections[valid_cols] = np.sqrt(col_sums[valid_cols] / n)

        max_change = max(
            np.max(np.abs(row_corrections - row_weights)),
            np.max(np.abs(col_corrections - col_weights)),
        )

        row_weights = row_corrections
        col_weights = col_corrections

        if max_change < tolerance:
            break

    matrix_out = writer_out.get_array_writable(array_name)
    D_row = np.diag(row_weights)
    D_col = np.diag(col_weights)
    matrix_out[:] = (D_row @ matrix @ D_col).astype(matrix.dtype)
    matrix_out.flush()

    return row_weights * col_weights


# =============================================================================
# SPARSE MATRIX SUPPORT
# =============================================================================

def write_sparse_csr_to_gzcm(
    fpath: pathlib.Path,
    csr_matrix,
    metadata: dict | None = None,
    overwrite: bool = True,
) -> None:
    """Write a scipy CSR sparse matrix to GZCM file.

    Parameters
    ----------
    fpath : pathlib.Path
        Output file path.
    csr_matrix : scipy.sparse.csr_matrix
        Input sparse matrix in CSR format.
    metadata : dict, optional
        Metadata to store in header.
    overwrite : bool, default=True
        Overwrite existing file.
    """
    import scipy.sparse as sp

    if not sp.issparse(csr_matrix):
        raise ValueError("Input must be a scipy sparse CSR matrix")
    if not isinstance(csr_matrix, sp.csr_matrix):
        csr_matrix = csr_matrix.tocsr()

    n_rows, n_cols = csr_matrix.shape

    meta = {"format": "csr", "n_rows": n_rows, "n_cols": n_cols}
    if metadata:
        meta.update(metadata)

    writer = GZCMWriter(fpath, overwrite=overwrite, version=1)
    writer.set_metadata(meta)

    writer.add_array("indptr", csr_matrix.indptr, dtype="int64")
    writer.add_array("indices", csr_matrix.indices, dtype="int32")
    writer.add_array("data", csr_matrix.data, dtype=csr_matrix.dtype.name)
    writer.write()


def read_sparse_csr_from_gzcm(fpath: pathlib.Path) -> "tuple[scipy.sparse.csr_matrix, dict]":
    """Read a CSR sparse matrix from GZCM file.

    Parameters
    ----------
    fpath : pathlib.Path
        Input GZCM file path.

    Returns
    -------
    tuple
        - csr_matrix: scipy.sparse.csr_matrix
        - metadata: dict
    """
    import scipy.sparse as sp

    reader = GZCMReader(fpath)
    metadata = reader.metadata

    if metadata.get("format") != "csr":
        raise ValueError(f"File is not a CSR GZCM: format={metadata.get('format')}")

    n_rows = metadata["n_rows"]
    n_cols = metadata["n_cols"]

    indptr = reader.get_array("indptr")
    indices = reader.get_array("indices")
    data = reader.get_array("data")

    csr = sp.csr_matrix((data, indices, indptr), shape=(n_rows, n_cols))

    return csr, metadata


def dense_to_csr_sparse(
    row_ids: np.ndarray,
    col_ids: np.ndarray,
    counts: np.ndarray,
    shape: tuple[int, int],
    dtype: str = "float32",
) -> "scipy.sparse.csr_matrix":
    """Convert COO-style coordinates to CSR sparse matrix.

    Parameters
    ----------
    row_ids : np.ndarray
        Row indices.
    col_ids : np.ndarray
        Column indices.
    counts : np.ndarray
        Values.
    shape : tuple[int, int]
        Matrix shape (n_rows, n_cols).
    dtype : str, default="float32"
        Data type.

    Returns
    -------
    scipy.sparse.csr_matrix
        CSR sparse matrix.
    """
    import scipy.sparse as sp

    if len(row_ids) > 0:
        all_rows = np.concatenate([row_ids, col_ids])
        all_cols = np.concatenate([col_ids, row_ids])
        all_data = np.concatenate([counts, counts])
        csr = sp.csr_matrix(
            (all_data, (all_rows, all_cols)),
            shape=shape,
            dtype=np.dtype(dtype)
        )
    else:
        csr = sp.csr_matrix(shape, dtype=np.dtype(dtype))

    return csr


def csr_to_dense(csr: scipy.sparse.csr_matrix) -> np.ndarray:
    """Convert CSR sparse matrix to dense ndarray.

    Parameters
    ----------
    csr : scipy.sparse.csr_matrix
        Input CSR sparse matrix.

    Returns
    -------
    np.ndarray
        Dense matrix.
    """
    return csr.toarray()

# 1-release deprecation aliases; remove in 2.30.0
GzcmReader = GZCMReader
GzcmWriter = GZCMWriter
GzcmHeader = GZCMHeader
GzcmVersion = GZCMVersion
GzcmChunkedReader = GZCMChunkedReader
GzcmChunkedWriter = GZCMChunkedWriter