"""
Zstd encoder wrapper for GZCM v3 compression.
Uses zlib as fallback since zstandard may not be installed.
For better performance, install zstandard: pip install zstandard
Examples
--------
"""
__author__ = "Yeremia Gunawan Adhisantoso"
__email__ = "adhisant@tnt.uni-hannover.de"
__license__ = "Clear BSD"
import zlib
import numpy as np
try:
import zstandard as zstd
HAS_ZSTD = True
except ImportError:
HAS_ZSTD = False
[docs]class ZstdEncoder:
"""Zstd encoder for contact matrix tiles.
Parameters
----------
tile_size : int, default=256
Tile size for block processing.
resolution : int, default=50000
Hi-C resolution in bp.
level : int, default=3
Compression level (1-22 for zstd, 1-9 for zlib fallback).
use_zstd : bool, default=True
Use zstd if available, otherwise zlib fallback.
Examples
--------
"""
def __init__(
self,
tile_size: int = 256,
resolution: int = 50000,
level: int = 3,
use_zstd: bool = True,
):
"""
Examples
--------
"""
self.tile_size = tile_size
self.resolution = resolution
self.level = level
self.use_zstd = use_zstd and HAS_ZSTD
[docs] def encode_tile(self, mat: np.ndarray) -> bytes:
"""Encode a single contact matrix tile.
Parameters
----------
mat : np.ndarray
2D contact matrix tile.
Returns
-------
bytes
Compressed bitstream.
Examples
--------
"""
data = mat.tobytes()
if self.use_zstd:
ctx = zstd.ZstdCompressor(level=self.level)
return ctx.compress(data)
else:
return zlib.compress(data, level=min(self.level, 9))
[docs] def encode_tiles(self, tiles: np.ndarray) -> list[bytes]:
"""Encode multiple tiles.
Parameters
----------
tiles : np.ndarray
4D array of shape (n_tile_rows, n_tile_cols, tile_size, tile_size).
Returns
-------
list[bytes]
List of encoded bitstreams, one per tile.
Examples
--------
"""
n_tile_rows, n_tile_cols = tiles.shape[0], tiles.shape[1]
results = []
for i in range(n_tile_rows):
for j in range(n_tile_cols):
results.append(self.encode_tile(tiles[i, j]))
return results
[docs] def get_compression_info(self) -> dict:
"""Return compression metadata.
Returns
-------
dict
Compression parameters for header.
Examples
--------
"""
return {
"codec": "zstd" if self.use_zstd else "zlib",
"version": "1.0",
"tile_size": self.tile_size,
"resolution": self.resolution,
"level": self.level,
}