"""
BSC Decoder wrapper for GZCM v3 compression.
Uses BSC CLI subprocess with LD_LIBRARY_PATH for libomp.
Examples
--------
"""
__author__ = "Yeremia Gunawan Adhisantoso"
__email__ = "adhisant@tnt.uni-hannover.de"
__license__ = "Clear BSD"
import os
import subprocess
import tempfile
import pathlib
import numpy as np
[docs]class BscDecoder:
"""BSC decoder for contact matrix tiles.
Uses bsc CLI subprocess for true BSC (Block Sorting Compression) decompression.
Parameters
----------
tile_size : int, default=512
Tile size for block processing.
resolution : int, default=50000
Hi-C resolution in bp.
dtype : np.dtype, default=np.uint32
Data type for decoded tiles.
Examples
--------
"""
def __init__(
self,
tile_size: int = 512,
resolution: int = 50000,
dtype: np.dtype = np.uint32,
):
"""
Examples
--------
"""
self.tile_size = tile_size
self.resolution = resolution
self.dtype = np.dtype(dtype)
self._bsc_path = pathlib.Path("/home/adhisant/tmp/bin/bsc")
self._env = os.environ.copy()
self._env["LD_LIBRARY_PATH"] = "/home/adhisant/tmp/miniforge3/envs/gunz_cm/lib"
[docs] def decode_tile(self, payload: bytes) -> np.ndarray:
"""Decode a single BSC-compressed tile.
Parameters
----------
payload : bytes
BSC-compressed bitstream.
Returns
-------
np.ndarray
Decoded contact matrix tile.
Examples
--------
"""
with tempfile.NamedTemporaryFile(suffix=".dat", delete=False) as f_in:
f_in.write(payload)
f_in.flush()
in_path = pathlib.Path(f_in.name)
with tempfile.NamedTemporaryFile(suffix=".dat", delete=False) as f_out:
out_path = pathlib.Path(f_out.name)
try:
subprocess.run([str(self._bsc_path), "d", str(in_path), str(out_path)], check=True, capture_output=True, env=self._env)
with open(out_path, "rb") as f:
data = f.read()
tile_size = self.tile_size
return np.frombuffer(data, dtype=self.dtype).reshape(tile_size, tile_size)
finally:
in_path.unlink(missing_ok=True)
out_path.unlink(missing_ok=True)
[docs] def decode_tiles(self, payloads: list[bytes]) -> np.ndarray:
"""Decode multiple tiles into a 4D array.
Parameters
----------
payloads : list[bytes]
List of encoded bitstreams.
Returns
-------
np.ndarray
4D array of decoded tiles (n_tile_rows, n_tile_cols, tile_size, tile_size).
Examples
--------
"""
n_tiles = len(payloads)
decoded = [self.decode_tile(p) for p in payloads]
tile_shape = decoded[0].shape
tile_rows = int(np.sqrt(n_tiles))
tile_cols = n_tiles // tile_rows if tile_rows > 0 else 1
result = np.empty((tile_rows, tile_cols, *tile_shape), dtype=self.dtype)
idx = 0
for i in range(tile_rows):
for j in range(tile_cols):
result[i, j] = decoded[idx]
idx += 1
return result