Source code for magni.cs.phase_transition._analysis

"""
..
    Copyright (c) 2014-2017, Magni developers.
    All rights reserved.
    See LICENSE.rst for further information.

Module providing functionality for analysing the simulation results.

Routine listings
----------------
run(path, label)
    Determine the phase transition from the simulation results.

See Also
--------
magni.cs.phase_transition.config : Configuration options.

Notes
-----
For a description of the concept of phase transition, see [1]_.

References
----------
.. [1] C. S. Oxvig, P. S. Pedersen, T. Arildsen, and T. Larsen, "Surpassing the
   Theoretical 1-norm Phase Transition in Compressive Sensing by Tuning the
   Smoothed l0 Algorithm", *in IEEE International Conference on Acoustics,
   Speech and Signal Processing (ICASSP)*, Vancouver, Canada, May 26-31, 2013,
   pp. 6019-6023.

"""

from __future__ import division

import numpy as np

from magni.cs.phase_transition import config as _conf
from magni.utils.multiprocessing import File as _File


[docs]def run(path, label): """ Determine the phase transition from the simulation results. The simulation results should be present in the HDF5 database specified by `path` in the pytables group specified by `label` in an array named 'dist'. The determined phase transition (50% curve) is stored in the same HDF5 database, in the same HDF group in an array named 'phase_transition'. Additionally, the 10%, 25%, 75%, and 90% percentiles are stored in an array named 'phase_transition_percentiles'. Parameters ---------- path : str The path of the HDF5 database. label : str The path of the pytables group in the HDF5 database. See Also -------- _estimate_PT : The actual phase transition estimation. Notes ----- A simulation is considered successful if the simulation result is less than a normalised mean squared error tolerance computed as 10^(-SNR/10) wtih SNR configured in the configuration module. """ percentiles = [0.5, 0.1, 0.25, 0.75, 0.9] with _File(path, 'a') as f: if not '/' + label + '/phase_transition' in f: dist = f.get_node('/' + label + '/dist')[:] # Set NaNs to value > NMSE_tolerance, i.e. assume failure dist[np.isnan(dist)] = 10**(-_conf['SNR'] / 10) * 2 if _conf['logit_solver'] == 'built-in': # Use "simple" built-in solver rho = _built_in_logit_solver(dist, percentiles) elif _conf['logit_solver'] == 'sklearn': # Use scikit learn solver rho = _sklearn_logit_solver(dist, percentiles) f.create_array('/' + label, 'phase_transition', rho[0]) f.create_array('/' + label, 'phase_transition_percentiles', rho[1:])
[docs]def _built_in_logit_solver(dist, percentiles): """ Fit a logistic regression model using the built-in solver. Parameters ---------- dist : ndarray The simulated signal "distances" in the phase space. percentiles : list or tuple The percentiles to estimate. Returns ------- rho : ndarray The "len(percentiles)"-by-"len(delta)" array of estimated phase transition rho vectors. The phase transition rho vectors are (in order): 50% (the phase transition esitmate), smaller to larger percentiles. """ monte_carlo = dist.shape[2] points = len(_conf['rho']) * monte_carlo NMSE_tolerance = 10**(-_conf['SNR'] / 10) z = np.zeros(points) y = np.zeros(points) rho = np.zeros((len(percentiles), len(_conf['delta']))) for i in range(len(_conf['delta'])): n = np.round(_conf['delta'][i] * _conf['problem_size']) for j in range(len(_conf['rho'])): if n > 0: var = _conf['rho'][j] var = np.round(var * n) / n else: var = 0. for m in range(monte_carlo): z[j * monte_carlo + m] = var y[j * monte_carlo + m] = dist[i, j, m] < NMSE_tolerance rho[:, i] = _estimate_PT(z, y, percentiles) return rho
[docs]def _estimate_PT(rho, success, percentiles): """ Estimate the phase transition location for a given delta. The phase transition location is estimated using logistic regression. The algorithm used for this is Newton's method. Parameters ---------- rho : ndarray The rho values. success : ndarray The success indicators. percentiles : list or tuple The percentiles to estimate. Returns ------- rho : float The estimated phase transition location. Notes ----- The function includes a number of non-standard ways of handling numerical and convergence related issues. This will be changed in a future version of the code. """ points = len(success) if success.sum() < 0.5: # if none of the simulations were successful return [0] * len(percentiles) elif success.sum() > points - 0.5: # if all of the simulations were successful return [1] * len(percentiles) y = np.zeros((points + 2, 1)) y[:points, 0], y[points:, 0] = success, [1, 0] # note: z_i in the algorithm is z[i, :]^T in the implementation z = np.ones((points + 2, 2)) z[:points, 1], z[points:, 1] = rho, [0, 1] b = np.zeros((2, 1)) for l in range(100): # p = [p_i]^T # p_i = exp(b^T z_i) / (1 + exp(b^T z_i)) # p_i = 1 / (1 + exp(-z_i^T b)) p = 1 / (1 + np.exp(-z.dot(b))) # gradient # g = sum([(y_i - p_i) * z_i]) g = z.T.dot(y - p) # Hessian # H = -sum([p_i * (1 - p_i) * z_i z_i^T]) H = -(p * (1 - p) * z).T.dot(z) try: # step s = np.linalg.inv(H).dot(g) except np.linalg.LinAlgError: # it results in convergence but is hardly a standard solution det = H[0, 0] * H[1, 1] - H[0, 1] * H[1, 0] det = np.sign(np.sign(det) + 0.5) * 1e-100 Hinv = np.float64([[H[1, 1], -H[0, 1]], [-H[1, 0], H[0, 0]]]) / det s = Hinv.dot(g) b = b - s # constrained to non-positive since the model would otherwise suggest # a better chance of reconstruction for higher rho b[1] = min(b[1], 0) g_len = np.linalg.norm(g) s_len = np.linalg.norm(s) b_len = np.linalg.norm(b) # g_len < 1e-12 : convergence # s_len < 1e-3 : convergence # s_len / b_len < 1e-12 : ... in case of large coefficients # the last two should probably be replaced by # delta (-b[0] / b[1]) < 1e-12 # or something like that if g_len < 1e-12 or s_len < 1e-3 or s_len / b_len < 1e-12: val = np.zeros(len(percentiles)) for l, p in enumerate(percentiles): val[l] = (np.log(p / (1 - p)) - b[0]) / b[1] val[l] = max(val[l], 0) val[l] = min(val[l], 1) return val raise RuntimeWarning('analysis.py: phase transition does not converge.')
[docs]def _sklearn_logit_solver(dist, percentiles): """ Fit a logistic regression model using the solver from scikit-learn. Parameters ---------- dist : ndarray The simulated signal "distances" in the phase space. percentiles : list or tuple The percentiles to estimate. Returns ------- rho : ndarray The "len(percentiles)"-by-"len(delta)" array of estimated phase transition rho vectors. The phase transition rho vectors are (in order): 50% (the phase transition esitmate), smaller to larger percentiles. """ from sklearn.linear_model import LogisticRegression NMSE_tolerance = 10**(-_conf['SNR'] / 10) lr = LogisticRegression(C=1e3, fit_intercept=False, random_state=_conf['seed']) rho = np.zeros((len(percentiles), len(_conf['delta']))) monte_carlo = dist.shape[2] for k in range(len(_conf['delta'])): successes = (dist[k, :, :] < NMSE_tolerance).reshape(-1) if not np.any(successes): rho[:, k] = 0.0 elif np.all(successes): rho[:, k] = 1.0 else: X = np.column_stack( (np.ones_like(successes), np.repeat(_conf['rho'], monte_carlo))) lr.fit(X, successes) b = [lr.coef_[0, 0], lr.coef_[0, 1]] for l, p in enumerate(percentiles): rho[l, k] = (np.log(p / (1 - p)) - b[0]) / b[1] rho[l, k] = max(rho[l, k], 0.0) rho[l, k] = min(rho[l, k], 1.0) return rho