Source code for imagine.observables.observable

"""
In the Observable class we define three data types, i.e.,
- 'measured'
- 'simulated'
- 'covariance'
where
'measured' indicates the hosted data is from measurements,
which has a single realization,
'simulated' indicates the hosted data is from simulations,
which has multiple realizations,
'covariance' indicates the hosted data is a covariance matrix,
which has a single realization but by default should not be stored/read/written
by a single computing node.

'measred' data puts its identical copies on all computing nodes,
which means each node has a full storage of 'measured' data.

'simulated' data puts different realizations on different nodes,
which means each node has part of the full realizations,
but at least a full version of one single realization.

'covariance' data distributes itself into all computing nodes,
which means to have a full set of 'covariance' data,
we have to collect pieces from all the computing nodes.
"""

# %% IMPORTS
# Built-in imports
from copy import deepcopy
import logging as log

# Package imports
import astropy.units as u
import numpy as np

# IMAGINE imports
from imagine.tools import pmean, pshape, prosecutor, pglobal, pnewdiag, pdiag

# All declaration
__all__ = ['Observable']


# %% CLASS DEFINITIONS
[docs]class Observable(object):
    """
    Observable class is designed for storing/manipulating distributed information.
    For the testing suits, please turn to "imagine/tests/observable_tests.py".

    Parameters
    ----------
    data : numpy.ndarray
        distributed/copied data
    dtype : str
        Data type, must be either: 'measured', 'simulated' or 'covariance'
    otype : str
        Observable type, must be either: 'HEALPix', 'tabular' or 'plain'
    """
    def __init__(self, data=None, dtype=None, coords=None, otype=None):
        self.dtype = dtype
        self.otype = otype

        if isinstance(data, u.Quantity):
            self.data = data.value
            self.unit = data.unit
        elif isinstance(data, np.ndarray):
            self.data = data
            self.unit = None
        else:
            raise ValueError

        self.coords = coords
        self.rw_flag = False

    @property
    def var(self):
        """
        The stored variance, if the Observable is a variance or covariance
        """
        if self._dtype == 'variance':
            return self._data
        elif self._dtype == 'covariance':
            return pdiag(self._data)
        else:
            TypeError("Needs 'variance' or 'covariance' observables for this")

    @property
    def data(self):
        """
        Data stored in the local processor
        """
        if self._dtype == 'variance':
            # Even when a variance was stored, data will still
            # return a covariance, produced on-the-fly!
            return pnewdiag(self._data)

        return self._data

    @data.setter
    def data(self, data):
        """
        extra input format check for 'measured' and 'covariance'
        no extra check for 'simulated'
        """
        log.debug('@ observable::data')
        if data is None:
            self._data = None
        else:
            if self._dtype != 'variance':
                assert (len(data.shape) == 2)
            assert isinstance(data, np.ndarray)
            if (self._dtype == 'measured'):  # copy single-row data from memory
                assert (data.shape[0] == 1)
            self._data = np.copy(data)
            if (self._dtype == 'covariance'):
                assert np.equal(*self.shape)

    @property
    def shape(self):
        """
        Shape of the GLOBAL array, i.e. considering all processors
        (`numpy.ndarray`, read-only).
        """
        return pshape(self._data)  # estimate shape from all nodes

    @property
    def global_data(self):
        """
        Data gathered from ALL processors (`numpy.ndarray`, read-only).
        Note that only master node hosts the global data,
        while slave nodes hosts None.
        """
        return pglobal(self.data)

    @property
    def size(self):
        """
        Local data size (`int`, read-only)
        this size means the dimension of input data
        not the sample size of realizations
        """
        return self._data.shape[1]

    @property
    def ensemble_mean(self):
        log.debug('@ observable::ensemble_mean')
        if (self._dtype == 'measured'):
            assert (self._data.shape[0] == 1)  # single realization
            return self._data  # since each node has a full copy
        elif (self._dtype == 'simulated'):
            return pmean(self._data)  # calculate mean from all nodes
        else:
            raise TypeError('unsupported data type')

    @property
    def dtype(self):
        """
        Data type, can be either: 'measured', 'simulated' or 'covariance'
        """
        return self._dtype

    @dtype.setter
    def dtype(self, dtype):
        if dtype is None:
            raise ValueError('dtype cannot be None')
        else:
            assert (dtype in ('measured', 'simulated',
                              'variance', 'covariance'))
            self._dtype = str(dtype)

    @property
    def rw_flag(self):
        """
        Rewriting flag, if true, append method will perform rewriting
        """
        return self._rw_flag

    @rw_flag.setter
    def rw_flag(self, rw_flag):
        assert isinstance(rw_flag, bool)
        self._rw_flag = rw_flag

[docs]    def append(self, new_data):
        """
        appending new data happens only to SIMULATED dtype
        the new data to be appended should also be distributed
        which makes the appending operation naturally in parallel

        rewrite flag will be switched off once rewriten has been performed
        """
        log.debug('@ observable::append')
        assert isinstance(new_data, (np.ndarray, Observable))
        assert (self._dtype == 'simulated')


        if isinstance(new_data, u.Quantity):
            assert self.unit == new_data.unit
            new_data = new_data.value

        if isinstance(new_data, np.ndarray):
            prosecutor(new_data)
            if (self._rw_flag):  # rewriting
                self._data = np.copy(new_data)
                self._rw_flag = False
            else:
                self._data = np.vstack([self._data, new_data])
        elif isinstance(new_data, Observable):
            if (self._rw_flag):
                self._data = np.copy(new_data.data)
                self._rw_flag = False
            else:
                self._data = np.vstack([self._data, new_data.data])