Source code for gunz_cm.compressions.zstd_encoder

"""
Zstd encoder wrapper for GZCM v3 compression.

Uses zlib as fallback since zstandard may not be installed.
For better performance, install zstandard: pip install zstandard

Examples
--------
"""

__author__ = "Yeremia Gunawan Adhisantoso"
__email__ = "adhisant@tnt.uni-hannover.de"
__license__ = "Clear BSD"

import zlib
import numpy as np

try:
    import zstandard as zstd

    HAS_ZSTD = True
except ImportError:
    HAS_ZSTD = False


[docs]class ZstdEncoder: """Zstd encoder for contact matrix tiles. Parameters ---------- tile_size : int, default=256 Tile size for block processing. resolution : int, default=50000 Hi-C resolution in bp. level : int, default=3 Compression level (1-22 for zstd, 1-9 for zlib fallback). use_zstd : bool, default=True Use zstd if available, otherwise zlib fallback. Examples -------- """ def __init__( self, tile_size: int = 256, resolution: int = 50000, level: int = 3, use_zstd: bool = True, ): """ Examples -------- """ self.tile_size = tile_size self.resolution = resolution self.level = level self.use_zstd = use_zstd and HAS_ZSTD
[docs] def encode_tile(self, mat: np.ndarray) -> bytes: """Encode a single contact matrix tile. Parameters ---------- mat : np.ndarray 2D contact matrix tile. Returns ------- bytes Compressed bitstream. Examples -------- """ data = mat.tobytes() if self.use_zstd: ctx = zstd.ZstdCompressor(level=self.level) return ctx.compress(data) else: return zlib.compress(data, level=min(self.level, 9))
[docs] def encode_tiles(self, tiles: np.ndarray) -> list[bytes]: """Encode multiple tiles. Parameters ---------- tiles : np.ndarray 4D array of shape (n_tile_rows, n_tile_cols, tile_size, tile_size). Returns ------- list[bytes] List of encoded bitstreams, one per tile. Examples -------- """ n_tile_rows, n_tile_cols = tiles.shape[0], tiles.shape[1] results = [] for i in range(n_tile_rows): for j in range(n_tile_cols): results.append(self.encode_tile(tiles[i, j])) return results
[docs] def get_compression_info(self) -> dict: """Return compression metadata. Returns ------- dict Compression parameters for header. Examples -------- """ return { "codec": "zstd" if self.use_zstd else "zlib", "version": "1.0", "tile_size": self.tile_size, "resolution": self.resolution, "level": self.level, }