Source code for magni.reproducibility.io

"""
..
    Copyright (c) 2014-2017, Magni developers.
    All rights reserved.
    See LICENSE.rst for further information.

Module providing input/output functions to databases containing results from
reproducible research.

Routine listings
----------------
annotate_database(h5file)
    Function for annotating an existing HDF5 database.
chase_database(h5file)
    Function for chasing an existing HDF5 database.
create_database(h5file)
    Function for creating a new annotated and chased HDF5 database.
read_annotations(h5file)
    Function for reading annotations in an HDF5 database.
read_chases(h5file)
    Function for reading chases in an HDF5 database.
remove_annotations(h5file)
    Function for removing annotations in an HDF5 database.
remove_chases(h5file)
    Function for removing chases in an HDF5 database.
write_custom_annotation(h5file, annotation_name, annotation_value,
    annotations_sub_group=None)
    Write a custom annotation to an HDF5 database.

See Also
--------
magni.reproducibility._annotation.get_conda_info : Conda annotation
magni.reproducibility._annotation.get_git_revision : Git annotation
magni.reproducibility._annotation.get_platform_info : Platform annotation
magni.reproducibility._annotation.get_datetime : Date and time annotation
magni.reproducibility._annotation.get_magni_config : Magni config annotation
magni.reproducibility._annotation.get_magni_info : Magni info annotation
magni.reproducibility._chase.get_main_file_name : Magni main file name chase
magni.reproducibility._chase.get_main_file_source : Magni source code chase
magni.reproducibility._chase.get_main_source : Magni main source code chase
magni.reproducibility._chase.get_stack_trace : Magni stack trace chase

"""

from __future__ import division
import json
import os

import tables

from magni.reproducibility import _annotation
from magni.reproducibility import _chase
from magni.utils.multiprocessing import File as _File
from magni.utils.validation import decorate_validation as _decorate_validation
from magni.utils.validation import validate_generic as _generic
from magni.utils.validation import validate_numeric as _numeric


[docs]def annotate_database(h5file): """ Annotate an HDF5 database with information about Magni and the platform. The annotation consists of a group in the root of the `h5file` having nodes that each provide information about Magni or the platform on which this function is run. Parameters ---------- h5file : tables.file.File The handle to the HDF5 database that should be annotated. See Also -------- magni.reproducibility._annotation.get_conda_info : Conda annotation magni.reproducibility._annotation.get_git_revision : Git annotation magni.reproducibility._annotation.get_platform_info : Platform annotation magni.reproducibility._annotation.get_datetime : Date and time annotation magni.reproducibility._annotation.get_magni_config : Magni config annotation magni.reproducibility._annotation.get_magni_info : Magni info annotation Notes ----- The annotations of the database includes the following: * conda_info - Information about Continuum Anacononda install * git_revision - Git revision and tag of Magni * platform_info - Information about the current platform (system) * datetime - The current date and time * magni_config - Infomation about the current configuration of Magni * magni_info - Information from `help(magni)` Examples -------- Annotate the database named 'db.hdf5': >>> import magni >>> from magni.reproducibility.io import annotate_database >>> with magni.utils.multiprocessing.File('db.hdf5', mode='a') as h5file: ... annotate_database(h5file) """ @_decorate_validation def validate_input(): _generic('h5file', tables.file.File) validate_input() annotations = {'conda_info': json.dumps(_annotation.get_conda_info()), 'git_revision': json.dumps(_annotation.get_git_revision()), 'platform_info': json.dumps( _annotation.get_platform_info()), 'datetime': json.dumps(_annotation.get_datetime()), 'magni_config': json.dumps(_annotation.get_magni_config()), 'magni_info': json.dumps(_annotation.get_magni_info())} try: annotations_group = h5file.create_group('/', 'annotations') for annotation in annotations: h5file.create_array(annotations_group, annotation, obj=annotations[annotation].encode()) h5file.flush() except tables.NodeError: raise tables.NodeError('The database has already been annotated. ' + 'Remove the existing annotation prior to ' + '(re)annotating the database.')
[docs]def chase_database(h5file): """ Chase an HDF5 database to track information about stack and source code. The chase consist of a group in the root of the `h5file` having nodes that each profide information about the program execution that led to this chase of the database. Parameters ---------- h5file : tables.file.File The handle to the HDF5 database that should be chased. See Also -------- magni.reproducibility._chase.get_main_file_name : Name of main file magni.reproducibility._chase.get_main_file_source : Main file source code magni.reproducibility._chase.get_main_source : Source code around main magni.reproducibility._chase.get_stack_trace : Complete stack trace Notes ----- The chase include the following information: * main_file_name - Name of the main file/script that called this function * main_file_source - Full source code of the main file/script * main_source - Extract of main file source code that called this function * stack_trace - Complete stack trace up until the call to this function Examples -------- Chase the database named 'db.hdf5': >>> import magni >>> from magni.reproducibility.io import chase_database >>> with magni.utils.multiprocessing.File('db.hdf5', mode='a') as h5file: ... chase_database(h5file) """ @_decorate_validation def validate_input(): _generic('h5file', tables.file.File) validate_input() chases = {'main_file_name': json.dumps(_chase.get_main_file_name()), 'main_file_source': json.dumps(_chase.get_main_file_source()), 'main_source': json.dumps(_chase.get_main_source()), 'stack_trace': json.dumps(_chase.get_stack_trace())} try: chase_group = h5file.create_group('/', 'chases') for chase in chases: h5file.create_array(chase_group, chase, obj=chases[chase].encode()) h5file.flush() except tables.NodeError: raise tables.NodeError('The database has already been chased. ' + 'Remove the existing chase prior to ' + '(re)chasing the database.')
[docs]def create_database(path, overwrite=True): """ Create a new HDF database that is annotated and chased. A new HDF database is created and it is annotated using `magni.reproducibility.io.annotate_database` and chased using `magni.reproducibility.io.annotate_database`. If the `overwrite` flag is true and existing database at `path` is overwritten. Parameters ---------- path : str The path to the HDF file that is to be created. overwrite : bool The flag that indicates if an existing database should be overwritten. See Also -------- magni.reproducibility.io.annotate_database : Database annotation magni.reproducibility.io.chase_database : Database chase Examples -------- Create a new database named 'new_db.hdf5': >>> from magni.reproducibility.io import create_database >>> create_database('new_db.hdf5') """ @_decorate_validation def validate_input(): _generic('path', 'string') _numeric('overwrite', 'boolean') validate_input() if not overwrite and os.path.exists(path): raise IOError('{!r} already exists in filesystem.'.format(path)) with _File(path, mode='w') as h5file: annotate_database(h5file) chase_database(h5file)
[docs]def read_annotations(h5file): """ Read the annotations to an HDF5 database. Parameters ---------- h5file : tables.file.File The handle to the HDF5 database from which the annotations are read. Returns ------- annotations : dict The annotations read from the HDF5 database. Raises ------ ValueError If the annotations to the HDF5 database does not conform to the Magni annotation standard. Notes ----- The returned dict holds a key for each annotation in the database. The value corresponding to a given key is in itself a dict. See `magni.reproducibility.annotate_database` for examples of such annotations. Examples -------- Read annotations from the database named 'db.hdf5': >>> import magni >>> from magni.reproducibility.io import read_annotations >>> with magni.utils.multiprocessing.File('db.hdf5', mode='r') as h5file: ... annotations = read_annotations(h5file) """ @_decorate_validation def validate_input(): _generic('h5file', tables.file.File) validate_input() try: h5_annotations = h5file.get_node('/', name='annotations') except tables.NoSuchNodeError: raise tables.NoSuchNodeError('The database has not been annotated.') annotations = dict() _recursive_annotation_read(h5_annotations, annotations) return annotations
[docs]def read_chases(h5file): """ Read the chases to an HDF5 database. Parameters ---------- h5file : tables.file.File The handle to the HDF5 database from which the chases are read. Returns ------- chasess : dict The chases read from the HDF5 database. Raises ------ ValueError If the chases to the HDF5 database does not conform to the Magni chases standard. Notes ----- The returned dict holds a key for each chase in the database. The value corresponding to a given key is a string. See `magni.reproducibility.chase_database` for examples of such chases. Examples -------- Read chases from the database named 'db.hdf5': >>> import magni >>> from magni.reproducibility.io import read_chases >>> with magni.utils.multiprocessing.File('db.hdf5', mode='r') as h5file: ... chases = read_chases(h5file) """ @_decorate_validation def validate_input(): _generic('h5file', tables.file.File) validate_input() try: h5_chases = h5file.get_node('/', name='chases') except tables.NoSuchNodeError: raise tables.NoSuchNodeError('The database has not been chased.') h5_chase_dict = h5_chases._v_leaves chases = dict() try: for chase in h5_chase_dict: chases[chase] = json.loads(h5_chase_dict[chase].read().decode()) except ValueError as e: raise ValueError('Unable to read the {!r} chase '.format(chase) + 'It seems that the chase does not conform to the ' + 'Magni chase standard ({!r}).'.format(e.args[0])) return chases
[docs]def remove_annotations(h5file): """ Remove the annotations from an HDF5 database. Parameters ---------- h5file : tables.file.File The handle to the HDF5 database from which the annotations are removed. Examples -------- Remove annotations from the database named 'db.hdf5': >>> import magni >>> from magni.reproducibility.io import remove_annotations >>> with magni.utils.multiprocessing.File('db.hdf5', mode='a') as h5file: ... remove_annotations(h5file) """ @_decorate_validation def validate_input(): _generic('h5file', tables.file.File) validate_input() try: h5file.remove_node('/', 'annotations', recursive=True) h5file.flush() except tables.NoSuchNodeError: pass
[docs]def remove_chases(h5file): """ Remove the chases from an HDF5 database. Parameters ---------- h5file : tables.file.File The handle to the HDF5 database from which the chases are removed. Examples -------- Remove chases from the database named 'db.hdf5': >>> import magni >>> from magni.reproducibility.io import remove_chases >>> with magni.utils.multiprocessing.File('db.hdf5', mode='a') as h5file: ... remove_chases(h5file) """ @_decorate_validation def validate_input(): _generic('h5file', tables.file.File) validate_input() try: h5file.remove_node('/', 'chases', recursive=True) h5file.flush() except tables.NoSuchNodeError: pass
[docs]def write_custom_annotation(h5file, annotation_name, annotation_value, annotations_sub_group=None): """ Write a custom annotation to an HDF5 database. The annotation is written to the `h5file` under the `annotation_name` such that it holds the `annotation_value`. Parameters ---------- h5file : tables.file.File The handle to the HDF5 database to which the annotation is written. annotation_name : str The name of the annotation to write. annotation_value : a JSON serialisable object The annotation value to write. annotations_sub_group : str The group node under "/annotations" to which the custom annotation is written (the default is None which implies that the custom annotation is written directly under "/annotations"). Notes ----- The `annotation_value` must be a JSON seriablisable object. Examples -------- Write a custom annotation to an HDF5 database. >>> import magni >>> from magni.reproducibility.io import write_custom_annotation >>> annotation_name = 'custom_annotation' >>> annotation_value = 'the value' >>> with magni.utils.multiprocessing.File('db.hdf5', mode='a') as h5file: ... write_custom_annotation(h5file, annotation_name, annotation_value) ... annotations = magni.reproducibility.io.read_annotations(h5file) >>> str(annotations['custom_annotation']) 'the value' """ @_decorate_validation def validate_input(): _generic('h5file', tables.file.File) _generic('annotation_name', 'string') _generic('annotations_sub_group', 'string', ignore_none=True) validate_input() if annotations_sub_group is not None: annotations_group = '/'.join(['/annotations', annotations_sub_group]) else: annotations_group = '/annotations' try: ann_val = json.dumps(annotation_value) except TypeError: raise TypeError('The annotation value does not have a valid JSON ' + 'representation. It may not be used as an annotation.') try: h5file.create_array(annotations_group, annotation_name, obj=ann_val.encode(), createparents=True) h5file.flush() except tables.NodeError: raise tables.NodeError( 'The annotation "{!r}" already exists '.format(annotation_name) + 'in the database. Remove the old annotation before placing a ' + 'new one.')
[docs]def _recursive_annotation_read(h5_annotations, out_annotations_dict): """ Recursively read annotations from an annotation group Parameters ---------- h5_annotations : tables.group.Group The group to read annotations from. out_annotations_dict : dict The dictionary to store the read annotations in. """ leaves = h5_annotations._v_leaves subgroups = h5_annotations._v_groups # Read leaves try: for annotation_name, annotation_value in leaves.items(): out_annotations_dict[annotation_name] = json.loads( annotation_value.read().decode()) except ValueError as e: raise ValueError('Unable to read the {!r} '.format(annotation_name) + 'annotation. It seems that the annotation ' + 'does not conform to the Magni annotation ' + 'standard ({!r}).'.format(e.args[0])) # Recursively handle subgroups for subgroup in subgroups: out_annotations_dict[subgroup] = dict() _recursive_annotation_read( subgroups[subgroup], out_annotations_dict[subgroup])