Source code for velocyto.serialization

import h5py
import numpy as np
import pickle
import zlib
import os
from typing import *


def _obj2uint(obj: object, compression: int=9, protocol: int=2) -> np.ndarray:
    """Transform a python object in a numpy array of uint8
    
    Arguments
    ---------
    obj: object
        The object to encode
    compression: int, default=9
        the compression level of ``zlib``
    protocol: int, default=2
        the protocol used by ``pickle.dumps``
    
    Returns
    -------
    An array encoding in bytes (uint8) the object pickled
    """
    zstr = zlib.compress(pickle.dumps(obj, protocol=protocol), compression)
    return np.fromstring(zstr, dtype=np.uint8)


def _uint2obj(uint: np.ndarray) -> object:
    """Recover the original object encoded in a uint8 numpy array
    
    Arguments
    ---------
    uint: np.ndarray
        A uint8 numpy array generated by the ``_obj2uint`` function
    
    Returns
    -------
    A python object that was encoded in the numpy array
    """
    return pickle.loads(zlib.decompress(uint.tobytes()))


[docs]def dump_hdf5(obj: object, filename: str, data_compression: int=7, chunks: Tuple =(2048, 2048), noarray_compression: int=9, pickle_protocol: int=2) -> None: """Dump all attribute of a python object to hdf5 Arguments --------- obj: object a python object filename: str the name of the file to be saved data_compression: int the level of compression used by hdf5 chunks: Tuple, default=(2048, 2048) The size of the chunks to be used for compression/random access noarray_compression: int, default=9 the compression level of ``zlib``, used when the attribute is not an array pickle_protocol: int, default=2 the protocol used by ``pickle.dumps`` when the attribute is not an array Returns ------- Nothing but it creates a file ``filename`` """ if os.path.isfile(filename): os.remove(filename) _file = h5py.File(filename, "w") for k in obj.__dict__.keys(): attribute = getattr(obj, k) if type(attribute) is not np.ndarray: serialized = _obj2uint(attribute, compression=noarray_compression, protocol=pickle_protocol) _file.create_dataset(f"&{k}", data=serialized, chunks=tuple((min(1024, len(serialized)),)), compression="gzip", compression_opts=data_compression, fletcher32=False, shuffle=False) else: chunk_size = tuple((min(chunks[i], attribute.shape[i]) for i in range(len(attribute.shape)))) _file.create_dataset(k, data=attribute, chunks=chunk_size, compression="gzip", compression_opts=data_compression, fletcher32=False, shuffle=False) _file.close()
[docs]def load_hdf5(filename: str, obj_class: Type[object]) -> object: """Load all attributes from a hdf5 encoded python object Arguments --------- filename: the name of the file to be loaded obj_class: the type of object to be generated Returns ------- An object that has been filled with the attributes stored in the hdf5 file Note ---- The trick to create an empty object was adapted from Guido van Rossum answer: https://stackoverflow.com/questions/2168964/python-creating-class-instance-without-calling-initializer """ obj = obj_class.__new__(obj_class) _file = h5py.File(filename, "r") for k in _file.keys(): if k.startswith("&"): setattr(obj, k[1:], _uint2obj(_file[k][:])) else: setattr(obj, k, _file[k][:]) _file.close() return obj