# %% IMPORTS
# Built-in imports
import os
import logging as log
import numpy as np
from multiprocessing import Pool
from os import path
# Package imports
import dynesty
# IMAGINE imports
from imagine.pipelines import Pipeline
# All declaration
__all__ = ['DynestyPipeline']
# %% CLASS DEFINITIONS
[docs]class DynestyPipeline(Pipeline):
"""
Bayesian analysis pipeline with
`dynesty <https://dynesty.readthedocs.io>`_
This pipeline may use
:py:class:`DynamicNestedSampler <dynesty.DynamicNestedSampler>` if the
sampling parameter 'dynamic' is set to `True` or
:py:class:`NestedSampler <dynesty.NestedSampler>`
if 'dynamic` is False (default).
See base class for initialization details.
The sampler behaviour is controlled using the `sampling_controllers`
property. A description of these can be found below.
Sampling controllers
--------------------
dynamic : bool
If `True`, use
:py:class:`dynesty.DynamicNestedSampler` otherwise uses
:py:class:`dynesty.NestedSampler`
dlogz : float
Iteration will stop, in the `dynamic==False` case,
when the estimated contribution of the
remaining prior volume to the total evidence falls below
this threshold. Explicitly, the stopping criterion is
`ln(z + z_est) - ln(z) < dlogz`, where `z` is the current
evidence from all saved samples and `z_est` is the estimated
contribution from the remaining volume. If `add_live` is `True`,
the default is `1e-3 * (nlive - 1) + 0.01`. Otherwise, the
default is `0.01`.
dlogz_init : float
The baseline run will stop, in the `dynamic==True` case,
when the estimated contribution of the
remaining prior volume to the total evidence falls below
this threshold. Explicitly, the stopping criterion is
`ln(z + z_est) - ln(z) < dlogz`, where `z` is the current
evidence from all saved samples and `z_est` is the estimated
contribution from the remaining volume. If `add_live` is `True`,
the default is `1e-3 * (nlive - 1) + 0.01`. Otherwise, the
default is `0.01`.
nlive : int
If `dynamic` is `False`, this sets the number of live points used.
Default is 400.
nlive_init : int
If `dynamic` is `True`, this sets the number of live points used
during the initial (“baseline”) nested sampling run. Default is 400.
nlive_batch : int
If `dynamic` is `True`, this sets the number of live points used
when adding additional samples from a nested sampling run within
each batch. Default is `400`.
logl_max : float
Iteration will stop when the sampled ln(likelihood) exceeds the
threshold set by `logl_max`. Default is no bound (`np.inf`).
logl_max_init : float
The baseline run will stop, in the `dynamic==True` case, when the
sampled ln(likelihood) exceeds this threshold.
Default is no bound (`np.inf`).
maxiter : int
Maximum number of iterations.
Iteration may stop earlier if the
termination condition is reached. Default is (no limit).
maxiter_init : int
If `dynamic` is `True`, this sets the maximum number of iterations for
the initial baseline nested sampling run. Iteration may stop earlier
if the termination condition is reached. Default is sys.maxsize (no limit).
maxiter_batch : int
If `dynamic` is `True`, this sets the maximum number of iterations
for the nested sampling run within each batch.
Iteration may stop earlier if the termination condition is reached.
Default is `sys.maxsize` (no limit).
maxcall : int
Maximum number of likelihood evaluations (without considering the
initial points, i.e. maxcall_effective = maxcall + nlive).
Iteration may stop earlier if termination condition is reached.
Default is `sys.maxsize` (no limit).
maxcall_init : int
If `dynamic` is `True`, maximum number of likelihood evaluations in
the baseline run.
maxcall_batch : int
If `dynamic` is `True`, maximum number of likelihood evaluations for
the nested sampling run within each batch. Iteration may stop earlier
if the termination condition is reached.
Default is `sys.maxsize` (no limit).
maxbatch : int
If `dynamic` is `True`, maximum number of batches allowed.
Default is `sys.maxsize` (no limit).
use_stop : bool, optional
Whether to evaluate our stopping function after each batch.
Disabling this can improve performance if other stopping criteria
such as :data:`maxcall` are already specified. Default is `True`.
n_effective: int
Minimum number of effective posterior samples. If the estimated
"effective sample size" (ESS) exceeds this number,
sampling will terminate. Default is no ESS (`np.inf`).
n_effective_init: int
Minimum number of effective posterior samples during the baseline run.
If the estimated "effective sample size" (ESS) exceeds this number,
sampling will terminate. Default is no ESS (`np.inf`).
add_live : bool
Whether or not to add the remaining set of live points to
the list of samples at the end of each run. Default is `True`.
print_progress : bool
Whether or not to output a simple summary of the current run that
updates with each iteration. Default is `True`.
print_func : function
A function that prints out the current state of the sampler.
If not provided, the default :meth:`results.print_fn` is used.
save_bounds : bool
Whether or not to save past bounding distributions used to bound
the live points internally. Default is *True*.
bound : {`'none'`, `'single'`, `'multi'`, `'balls'`, `'cubes'`}
Method used to approximately bound the prior using the current
set of live points. Conditions the sampling methods used to
propose new live points. Choices are no bound (`'none'`), a single
bounding ellipsoid (`'single'`), multiple bounding ellipsoids
(`'multi'`), balls centered on each live point (`'balls'`), and
cubes centered on each live point (`'cubes'`). Default is `'multi'`.
sample : {`'auto'`, `'unif'`, `'rwalk'`, `'rstagger'`,
`'slice'`, `'rslice'`}
Method used to sample uniformly within the likelihood constraint,
conditioned on the provided bounds. Unique methods available are:
uniform sampling within the bounds(`'unif'`),
random walks with fixed proposals (`'rwalk'`),
random walks with variable ("staggering") proposals (`'rstagger'`),
multivariate slice sampling along preferred orientations (`'slice'`),
and "random" slice sampling along all orientations (`'rslice'`).
`'auto'` selects the sampling method based on the dimensionality
of the problem (from `ndim`).
When `ndim < 10`, this defaults to `'unif'`.
When `10 <= ndim <= 20`, this defaults to `'rwalk'`.
When `ndim > 20`, this defaults to `'slice'`. `'rstagger'` and `'rslice'`
are provided as alternatives for `'rwalk'` and `'slice'`, respectively.
Default is `'auto'`. Note that Dynesty's 'hslice' option is not
supported within IMAGINE.
update_interval : int or float
If an integer is passed, only update the proposal distribution every
`update_interval`-th likelihood call. If a float is passed, update the
proposal after every `round(update_interval * nlive)`-th likelihood
call. Larger update intervals larger can be more efficient
when the likelihood function is quick to evaluate. Default behavior
is to target a roughly constant change in prior volume, with
`1.5` for `'unif'`, `0.15 * walks` for `'rwalk'` and `'rstagger'`,
`0.9 * ndim * slices` for `'slice'`, `2.0 * slices` for `'rslice'`,
and `25.0 * slices` for `'hslice'`.
enlarge : float
Enlarge the volumes of the specified bounding object(s) by this
fraction. The preferred method is to determine this organically
using bootstrapping. If `bootstrap > 0`, this defaults to `1.0`.
If `bootstrap = 0`, this instead defaults to `1.25`.
bootstrap : int
Compute this many bootstrapped realizations of the bounding
objects. Use the maximum distance found to the set of points left
out during each iteration to enlarge the resulting volumes. Can lead
to unstable bounding ellipsoids. Default is `0` (no bootstrap).
vol_dec : float
For the `'multi'` bounding option, the required fractional reduction
in volume after splitting an ellipsoid in order to to accept the split.
Default is `0.5`.
vol_check : float
For the `'multi'` bounding option, the factor used when checking if
the volume of the original bounding ellipsoid is large enough to
warrant `> 2` splits via `ell.vol > vol_check * nlive * pointvol`.
Default is `2.0`.
walks : int
For the `'rwalk'` sampling option, the minimum number of steps
(minimum 2) before proposing a new live point. Default is `25`.
facc : float
The target acceptance fraction for the `'rwalk'` sampling option.
Default is `0.5`. Bounded to be between `[1. / walks, 1.]`.
slices : int
For the `'slice'` and `'rslice'` sampling
options, the number of times to execute a "slice update"
before proposing a new live point. Default is `5`.
Note that `'slice'` cycles through **all dimensions**
when executing a "slice update".
Note
----
Instances of this class are callable.
Look at the :py:meth:`DynestyPipeline.call` for details.
"""
[docs] def call(self, **kwargs):
"""
Runs the IMAGINE pipeline using the Dynesty sampler
Returns
-------
results : dict
Dynesty sampling results
"""
log.debug('@ dynesty_pipeline::__call__')
# Common parameters for NestedSampler and DynamicNestedSampler
default_init_params = {'bound': 'multi',
'sample': 'auto',
'update_interval': None,
'enlarge' : None,
'bootstrap': 0,
'vol_dec': 0.5,
'facc': 0.5,
'slices': 5,
'walks': 25,
'vol_check': 2.0}
# Keyword arguments can alter the sampling controllers
self.sampling_controllers = kwargs # Updates the dict
dynamic = self.sampling_controllers.get('dynamic', True)
resume = self.sampling_controllers.get('resume', False)
assert (not resume), 'DynestyPipeline does not support resuming!'
if dynamic:
dynesty_sampler = dynesty.DynamicNestedSampler
default_run_params = {'nlive_init': 400,
'maxiter_init': None,
'maxcall_init': None,
'dlogz_init': 0.01,
'logl_max_init': np.inf,
'n_effective_init': np.inf,
'nlive_batch': 400,
'maxiter_batch': None,
'maxcall_batch': None,
'maxiter': None,
'maxcall': None,
'maxbatch': None,
'n_effective': np.inf,
'use_stop': True,
'save_bounds': True,
'print_progress': True,
'print_func': None}
else:
dynesty_sampler = dynesty.NestedSampler
default_init_params.update({'nlive': 400})
default_run_params = {'maxiter': None,
'maxcall': None,
'dlogz': None,
'logl_max': np.inf,
'n_effective': None,
'print_progress': True,
'print_func': None,
'save_bounds': True}
# Prepares initialization and run parameters from
# defaults and sampling controllers
init_params = { k : self.sampling_controllers.get(k, default)
for k, default in default_init_params.items()}
run_params = { k : self.sampling_controllers.get(k, default)
for k, default in default_run_params.items()}
# Updates the sampling controllers to reflect what is being used
self.sampling_controllers = init_params # Updates the dict
self.sampling_controllers = run_params # Updates the dict
rstate = np.random.RandomState(seed=self.master_seed)
self.sampler = dynesty_sampler(self._likelihood_function,
self.prior_transform,
len(self._active_parameters),
pool=None, # parallel not working
gradient=None, # gradients not supported
live_points=None,
rstate=rstate,
**init_params)
self.sampler.run_nested(**run_params)
self.results = self.sampler.results
self._samples_array = self.results['samples']
self._evidence = self.results['logz']
self._evidence_err = self.results['logzerr']
return self.results