Source code for magni.cs.phase_transition._simulation

"""
..
    Copyright (c) 2014-2017, Magni developers.
    All rights reserved.
    See LICENSE.rst for further information.

Module providing the actual simulation functionality.

Routine listings
----------------
run(algorithm, path, label)
     Simulate a reconstruction algorithm.

See Also
--------
magni.cs.phase_transition._config : Configuration options.

Notes
-----
The results of the simulation are backed up throughout the simulation. In case
the simulation is interrupted during execution, the simulation will resume from
the last backup point when run again.

"""

from __future__ import division
import os
import random
import sys
import time

import numpy as np

from magni.cs.phase_transition import _backup
from magni.cs.phase_transition import config as _conf
from magni.cs.phase_transition import _data
from magni.utils.multiprocessing import File as _File
from magni.utils.multiprocessing import process as _process
from magni.utils import split_path as _split_path

if sys.version_info[0] == 2:
    iter_range = xrange(2**30)
else:
    iter_range = range(2**32)


[docs]def run(algorithm, path, label, pre_simulation_hook=None):
    """
    Simulate a reconstruction algorithm.

    The simulation results are stored in a HDF5 database rather than returned
    by the function.

    Parameters
    ----------
    algorithm : function
        A function handle to the reconstruction algorithm.
    path : str
        The path of the HDF5 database where the results should be stored.
    label : str
        The label assigned to the simulation results
    pre_simumlation_hook : callable
        A handle to a callable which should be run *just* before the call to
        the reconstruction algorithm (the default is None, which implies that
        no pre hook is run).

    """

    tmp_dir = _split_path(path)[0] + '.tmp' + os.sep
    tmp_file = tmp_dir + label.replace('/', '#') + '.hdf5'

    if not os.path.isdir(tmp_dir):
        os.mkdir(tmp_dir)

    if not os.path.isfile(tmp_file):
        _backup.create(tmp_file)

    done = _backup.get(tmp_file)
    shape = [len(_conf['delta']), len(_conf['rho'])]
    random.seed(_conf['seed'])
    seeds = np.array(
        random.sample(iter_range, shape[0] * shape[1])).reshape(shape)

    tasks = [(algorithm, (i, j), seeds[i, j], tmp_file, pre_simulation_hook)
             for i in range(shape[0]) for j in range(shape[1])
             if not done[i, j]]

    _process(_simulate, args_list=tasks, maxtasks=_conf['maxpoints'])

    with _File(tmp_file, 'r') as f:
        stat_time = f.root.time[:]
        stat_dist = f.root.dist[:]
        stat_mse = f.root.mse[:]
        stat_norm = f.root.norm[:]

    with _File(path, 'a') as f:
        f.create_array('/' + label, 'time', stat_time, createparents=True)
        f.create_array('/' + label, 'dist', stat_dist, createparents=True)
        f.create_array('/' + label, 'mse', stat_mse, createparents=True)
        f.create_array('/' + label, 'norm', stat_norm, createparents=True)

    os.remove(tmp_file)

    if len(os.listdir(tmp_dir)) == 0:
        os.removedirs(tmp_dir)


[docs]def _simulate(algorithm, ij_tuple, seed, path, pre_simulation_hook=None):
    """
    Run a number of monte carlo simulations in a single delta-rho point.

    The result of a simulation is the simulation error distance, i.e., the
    ratio between the energy of the coefficient residual and the energy of the
    coefficient vector. The time of the simulation is the execution time of the
    reconstruction attempt.

    Parameters
    ----------
    algorithm : function
        A function handle to the reconstruction algorithm.
    ij_tuple : tuple
        A tuple (i, j) containing the parameters i, j as listed below.
    i : int
        The delta-index of the point in the delta-rho grid.
    j : int
        The rho-index of the point in the delta-rho grid.
    seed : int
        The seed to use in the random number generator when generating the
        problem suite instances.
    path : str
        The path of the HDF5 backup database.
    pre_simulation_hook : callable
        A handle to a callable which should be run *just* before the call to
        the reconstruction algorithm (the default is None, which implies that
        no pre hook is run).

    See Also
    --------
    magni.cs.phase_transition._config: Configuration options.
    magni.cs.phase_transition._data.generate_matrix : Matrix generation.
    magni.cs.phase_transition._data.generate_vector : Coefficient vector
        generation.

    Notes
    -----
    The `pre_simulation_hook` may be used to setup the simulation to match the
    specfic simulation parameters, e.g. if an oracle estimator is used in the
    reconstruction algorithm. The `pre_simulation_hook` takes one argument
    which is the locals() dict.

    The following reconstruction statistics are computed:

    * time: Measured algorithm run time in seconds.
    * dist: Normalised mean squared error (NMSE) - (
      ||alpha_hat - alpha|| / ||alpha||)^2
    * mse: Mean squared error (MSE) - 1/n * ||alpha_hat - alpha||^2
    * norm: True vector norm - ||alpha||

    """

    i, j = ij_tuple

    n = _conf['problem_size']
    m = int(np.round(n * _conf['delta'][i]))
    k = int(np.round(m * _conf['rho'][j]))
    noise = _conf['noise']

    stat_time = np.zeros(_conf['monte_carlo'], dtype=np.float64)
    stat_dist = stat_time.copy()
    stat_mse = stat_time.copy()
    stat_norm = stat_time.copy()

    if k > 0:
        np.random.seed(seed)
        for l in range(_conf['monte_carlo']):
            A = _data.generate_matrix(m, n)
            alpha = _data.generate_vector(n, k)

            if noise is not None:
                e = _data.generate_noise(m, n, k)
                y = A.dot(alpha) + e
            else:
                y = A.dot(alpha)

            if pre_simulation_hook is not None:
                pre_simulation_hook(locals())

            start = time.time()
            alpha_hat = algorithm(y, A, **_conf['algorithm_kwargs'])
            end = time.time()

            error_norm = np.linalg.norm(alpha_hat - alpha)
            alpha_norm = np.linalg.norm(alpha)

            stat_time[l] = end - start
            stat_dist[l] = (error_norm / alpha_norm)**2
            stat_mse[l] = 1.0/n * error_norm**2
            stat_norm[l] = alpha_norm

    _backup.set(path, (i, j), stat_time, stat_dist, stat_mse, stat_norm)