# %% IMPORTS
# Package imports
import astropy.units as u
import numpy as np
import scipy.stats as stats
from scipy.interpolate import CubicSpline
# All declaration
__all__ = ['GeneralPrior', 'ScipyPrior']
# %% CLASS DEFINITIONS
[docs]class GeneralPrior:
"""
Allows constructing a prior from a pre-existing sampling of the parameter
space or a known probability density function (PDF).
Like in `MultiNest <https://johannesbuchner.github.io/PyMultiNest/pymultinest_run.html>`_, priors here are represented by a mapping of uniformly
distributed scaled parameter values into a distribution with the
desired properties (i.e. following the expected PDF). Such mapping is
equivalent to the inverse of the cumulative distribution function (CDF)
associated with the prior distribution.
Notes
-----
Here we make a summary of the algorithm that allows finding the inverse of
the CDF (and therefore, a IMAGINE prior) from a set of samples or PDF.
(1) If set of samples is provided, computes Kernel Density Estimate (KDE)
representation of it using Gaussian kernels.
(2) The KDE is evaluated on `pdf_npoints` and thi si used to construct
a interpolated cubic spline, which can be inspected through the method
:py:meth:`pdf`.
(4) From PDF spline, the CDF is computed, which can be accessed using
:py:meth:`cdf`.
(5) The CDF is evaluated on `inv_cdf_npoints`, and the inverse of the CDF
is, again, constructed as a interpolated cubic spline. The spline object
is available at :py:meth:`inv_cdf`.
Parameters
----------
samples : array_like
Array containing a sample of the prior distribution one wants to use.
Note: this will use :py:class:`scipy.stats.gaussian_kde` to compute
the probability density function (PDF) through kernel density
estimate using Gaussian kernels.
pdf_fun : function
A Python function containing the PDF associated with this prior.
If an no `interval` is provided, the domain of PDF(x) will be assumed to be
within the interval [0,1].
pdf_x, pdf_y : array_like
The PDF can be provided as two arrays of points following
(pdf_x, pdf_y) = (x, PDF(x)). In the absence of an `interval`, the
interval [min(pdf_x), max(pdf_x)] will be used.
interval : tuple or list
A pair of points representing, respectively, the minimum and maximum
parameter values to be considered.
bw_method: scalar or str
Used by :py:class:`scipy.stats.gaussian_kde` to select the bandwidth
employed to estimate the PDF from provided samples. Can be a number,
if using fixed bandwidth, or strings ‘scott’ or ‘silverman’ if these
rules are to be selected.
pdf_npoints : int
Number of points used to evaluate pdf_fun or the KDE constructed from
the samples.
inv_cdf_npoints : int
Number of points used to evaluate the CDF for the calculation of its
inverse.
"""
def __init__(self, samples=None, pdf_fun=None,
pdf_x=None, pdf_y=None, interval=None,
bw_method=None, pdf_npoints=1500, inv_cdf_npoints=1500):
# Ensures interval is quantity with consistent units
interval = u.Quantity(interval)
self.range = interval
if (pdf_x is None) and (pdf_y is None):
# If needed, constructs a pdf function from samples, using KDE
if samples is not None:
assert pdf_fun is None, 'Either provide the samples or the PDF, not both.'
if interval is not None:
ok = (samples > interval[0]) * (samples < interval[1])
samples = samples[ok]
pdf_fun = stats.gaussian_kde(samples, bw_method=bw_method)
if pdf_fun is not None:
xmin, xmax = interval
# Evaluates the PDF
pdf_x = np.linspace(xmin, xmax, pdf_npoints)
pdf_y = pdf_fun(pdf_x)
# Normalizes and removes units
inv_norm = pdf_y.sum()*(xmax-xmin)/pdf_npoints
pdf_y = (pdf_y/inv_norm).value
pdf_x = ((pdf_x - pdf_x.min())/(pdf_x.max() - pdf_x.min())).value
# Placeholders
self.inv_cdf_npoints = inv_cdf_npoints
self._cdf = None
self._inv_cdf = None
self.distr = None
if (pdf_x is not None) and (pdf_y is not None):
self._pdf = CubicSpline(pdf_x, pdf_y)
else:
self._pdf = None
@property
def pdf(self):
"""
Probability density function (PDF) associated with this prior.
"""
return self._pdf
[docs] def pdf_unscaled(self, x):
"""
Probability density function (PDF) associated with this prior.
"""
return self.pdf(self._scale_parameter(x))
def _scale_parameter(self, x):
return (x - self.range[0])/(self.range[1] - self.range[0])
@property
def cdf(self):
"""
Cumulative distribution function (CDF) associated with this prior.
"""
if self._cdf is None:
if self.pdf is not None:
self._cdf = self.pdf.antiderivative()
return self._cdf
@property
def inv_cdf(self):
"""
Inverse of the CDF associated with this prior,
expressed as a :py:class:`scipy.interpolate.CubicSpline` object.
"""
if (self._inv_cdf is None) and (self.cdf is not None):
t = np.linspace(0, 1, self.inv_cdf_npoints)
y = self.cdf(t)
# Rescales the image of the function to [0,1]
y = (y-y.min())/(y.max()-y.min())
# For some distributions, there will be multiple
# values of y=0 for the same t. The following corrects this
# by simply removing this part of the cdf
select_zeros = (y == 0)
if len(select_zeros) > 1:
y = y[~select_zeros]
t = t[~select_zeros]
# Creates interpolated spline
self._inv_cdf = CubicSpline(y, t)
return self._inv_cdf
[docs] def __call__(self, x):
"""
The "prior mapping", i.e. returns the value of the
inverse of the CDF at point(s) `x`.
"""
return self.inv_cdf(x)
[docs]class ScipyPrior(GeneralPrior):
"""
Constructs a prior from a continuous distribution defined in
`scipy.stats <https://docs.scipy.org/doc/scipy/reference/stats.html#continuous-distributions>`_.
Parameters
-----------
distr : scipy.stats.rv_continuous
A distribution function expressed as an instance of
:py:class:`scipy.stats.rv_continuous`.
*args :
Any positional arguments required by the function selected in
:py:data:`distr` (e.g for `scipy.stats.chi2 <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chi2.html>`_, one needs to
supply the number of degrees of freedom, :py:data:`df`)
loc : float
Same meaning as in :py:class:`scipy.stats.rv_continuous`: sets the
centre of the distribution (generally, the mean or mode).
scale : float
Same meaning as in :py:class:`scipy.stats.rv_continuous`: sets the
width of the distribution (e.g. the standard deviation in the normal
case).
interval : tuple or list
A pair of points representing, respectively, the minimum and maximum
parameter values to be considered (will be used to rescale the
interval).
"""
def __init__(self, distr, *args, loc=0.0, scale=1.0,
interval=[-1.0,1.0], **kwargs):
super().__init__(interval=interval)
assert isinstance(distr, stats.rv_continuous), 'distr must be instance of scipy.stats.rv_continuous'
xmin, xmax = interval
loc = self._scale_parameter(loc)
self.distr = distr(*args, loc=loc, scale=scale/(xmax - xmin), **kwargs)
self._pdf = self.distr.pdf
self._cdf = self.distr.cdf
# In principle, it should be possible to use scipy's built-in
# ppf for this (which is supposed to be accurate and fast),
# but this has not yet being implemented as it requires
# some extra rescaling to work with the truncation
# self._inv_cdf = self.distr.ppf