import h5py
import numpy as np
import pickle
import zlib
import os
from typing import *
def _obj2uint(obj: object, compression: int=9, protocol: int=2) -> np.ndarray:
"""Transform a python object in a numpy array of uint8
Arguments
---------
obj: object
The object to encode
compression: int, default=9
the compression level of ``zlib``
protocol: int, default=2
the protocol used by ``pickle.dumps``
Returns
-------
An array encoding in bytes (uint8) the object pickled
"""
zstr = zlib.compress(pickle.dumps(obj, protocol=protocol), compression)
return np.fromstring(zstr, dtype=np.uint8)
def _uint2obj(uint: np.ndarray) -> object:
"""Recover the original object encoded in a uint8 numpy array
Arguments
---------
uint: np.ndarray
A uint8 numpy array generated by the ``_obj2uint`` function
Returns
-------
A python object that was encoded in the numpy array
"""
return pickle.loads(zlib.decompress(uint.tobytes()))
[docs]def dump_hdf5(obj: object, filename: str,
data_compression: int=7, chunks: Tuple =(2048, 2048),
noarray_compression: int=9, pickle_protocol: int=2) -> None:
"""Dump all attribute of a python object to hdf5
Arguments
---------
obj: object
a python object
filename: str
the name of the file to be saved
data_compression: int
the level of compression used by hdf5
chunks: Tuple, default=(2048, 2048)
The size of the chunks to be used for compression/random access
noarray_compression: int, default=9
the compression level of ``zlib``, used when the attribute is not an array
pickle_protocol: int, default=2
the protocol used by ``pickle.dumps`` when the attribute is not an array
Returns
-------
Nothing but it creates a file ``filename``
"""
if os.path.isfile(filename):
os.remove(filename)
_file = h5py.File(filename, "w")
for k in obj.__dict__.keys():
attribute = getattr(obj, k)
if type(attribute) is not np.ndarray:
serialized = _obj2uint(attribute, compression=noarray_compression, protocol=pickle_protocol)
_file.create_dataset(f"&{k}", data=serialized,
chunks=tuple((min(1024, len(serialized)),)),
compression="gzip", compression_opts=data_compression,
fletcher32=False, shuffle=False)
else:
chunk_size = tuple((min(chunks[i], attribute.shape[i]) for i in range(len(attribute.shape))))
_file.create_dataset(k, data=attribute, chunks=chunk_size,
compression="gzip", compression_opts=data_compression,
fletcher32=False, shuffle=False)
_file.close()
[docs]def load_hdf5(filename: str, obj_class: Type[object]) -> object:
"""Load all attributes from a hdf5 encoded python object
Arguments
---------
filename:
the name of the file to be loaded
obj_class:
the type of object to be generated
Returns
-------
An object that has been filled with the attributes stored in the hdf5 file
Note
----
The trick to create an empty object was adapted from Guido van Rossum answer:
https://stackoverflow.com/questions/2168964/python-creating-class-instance-without-calling-initializer
"""
obj = obj_class.__new__(obj_class)
_file = h5py.File(filename, "r")
for k in _file.keys():
if k.startswith("&"):
setattr(obj, k[1:], _uint2obj(_file[k][:]))
else:
setattr(obj, k, _file[k][:])
_file.close()
return obj