Source code for powerbox.tools

"""
A set of tools for dealing with structured boxes, such as those output by :mod:`powerbox`. Tools include those
for averaging a field angularly, and generating the isotropic power spectrum.
"""
from . import dft
import numpy as np
import warnings


def _getbins(bins, coords, log):
    mx = coords.max()
    if not np.iterable(bins):
        if not log:
            bins = np.linspace(coords.min(), mx, bins + 1)
        else:
            mn = coords[coords>0].min()
            bins = np.logspace(np.log10(mn), np.log10(mx), bins + 1)

    return bins


[docs]def angular_average(field, coords, bins, weights=1, average=True, bin_ave=True, get_variance=False, log_bins=False):
    r"""
    Average a given field within radial bins.

    This function can be used in fields of arbitrary dimension (memory permitting), and the field need not be centred
    at the origin. The averaging assumes that the grid cells fall completely into the bin which encompasses the
    co-ordinate point for the cell (i.e. there is no weighted splitting of cells if they intersect a bin edge).

    It is optimized for applying a set of weights, and obtaining the variance of the mean, at the same time as
    averaging.

    Parameters
    ----------
    field: nd-array
        An array of arbitrary dimension specifying the field to be angularly averaged.

    coords: nd-array or list of n arrays.
        Either the *magnitude* of the co-ordinates at each point of `field`, or a list of 1D arrays specifying the
        co-ordinates in each dimension.

    bins: float or array.
        The ``bins`` argument provided to histogram. Can be an int or array specifying radial bin edges.

    weights: array, optional
        An array of the same shape as `field`, giving a weight for each entry.
        
    average: bool, optional
        Whether to take the (weighted) average. If False, returns the (unweighted) sum.

    bin_ave : bool, optional
        Whether to return the bin co-ordinates as the (weighted) average of cells within the bin (if True), or
        the regularly spaced edges of the bins.

    get_variance : bool, optional
        Whether to also return an estimate of the variance of the power in each bin.

    log_bins : bool, optional
        Whether to create bins in log-space.

    Returns
    -------
    field_1d : 1D-array
        The angularly-averaged field.

    bins : 1D-array
        Array of same shape as field_1d specifying the radial co-ordinates of the bins. Either the mean co-ordinate
        from the input data, or the regularly spaced bins, dependent on `bin_ave`.

    var : 1D-array, optional
        The variance of the averaged field (same shape as bins), estimated from the mean standard error.
        Only returned if `get_variance` is True.

    Notes
    -----
    If desired, the variance is calculated as the weight unbiased variance, using the formula at
    https://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Reliability_weights for the variance in each cell, and
    normalising by a factor of :math:`V_2/V_1^2` to estimate the variance of the average.

    Examples
    --------
    Create a 3D radial function, and average over radial bins:

    >>> import numpy as np
    >>> import matplotlib.pyplot as plt
    >>> x = np.linspace(-5,5,128)   # Setup a grid
    >>> X,Y,Z = np.meshgrid(x,x,x)
    >>> r = np.sqrt(X**2+Y**2+Z**2) # Get the radial co-ordinate of grid
    >>> field = np.exp(-r**2)       # Generate a radial field
    >>> avgfunc, bins = angular_average(field,r,bins=100)   # Call angular_average
    >>> plt.plot(bins, np.exp(-bins**2), label="Input Function")   # Plot input function versus ang. avg.
    >>> plt.plot(bins, avgfunc, label="Averaged Function")

    See Also
    --------
    angular_average_nd : Perform an angular average in a subset of the total dimensions.

    """
    if len(coords) == len(field.shape):
        # coords are a segmented list of dimensional co-ordinates
        coords = _magnitude_grid(coords)

    indx, bins, sumweight = _get_binweights(coords, weights, bins, average, bin_ave=bin_ave, log_bins=log_bins)

    if np.any(sumweight==0):
        warnings.warn("One or more radial bins had no cells within it.")

    res = _field_average(indx, field, weights, sumweight)

    if get_variance:
        var = _field_variance(indx, field, res, weights, sumweight)
        return res, bins, var
    else:
        return res, bins


def _magnitude_grid(x, dim=None):
    if dim is not None:
        return np.sqrt(np.sum(np.meshgrid(*([x ** 2] * dim)), axis=0))
    else:
        return np.sqrt(np.sum(np.meshgrid(*([X ** 2 for X in x])), axis=0))


def _get_binweights(coords, weights, bins, average=True, bin_ave=True, log_bins=False):

    # Get a vector of bin edges
    bins = _getbins(bins, coords, log_bins)

    # Minus 1 to have first index at 0
    indx = np.digitize(coords.flatten(), bins)

    if average or bin_ave:
        if not np.isscalar(weights):
            if coords.shape != weights.shape:
                raise ValueError("coords and weights must have the same shape!")
            sumweights = np.bincount(indx, weights=weights.flatten(), minlength=len(bins)+1)[1:-1]
        else:
            sumweights = np.bincount(indx, minlength=len(bins)+1)[1:-1]

        if average:
            binweight = sumweights
        else:
            binweight = 1*sumweights
            sumweights = np.ones_like(binweight)

        if bin_ave:
            bins = np.bincount(indx, weights=(weights * coords).flatten(), minlength=len(bins)+1)[1:-1] / binweight

    else:
        sumweights = np.ones(len(bins)-1)

    return indx, bins, sumweights


def _field_average(indx, field, weights, sumweights):
    if not np.isscalar(weights) and field.shape != weights.shape:
        raise ValueError("the field and weights must have the same shape!")

    field = field * weights  # Leave like this because field is mutable

    rl = np.bincount(indx, weights=np.real(field.flatten()), minlength=len(sumweights)+2)[1:-1] / sumweights
    if field.dtype.kind == "c":
        im = 1j * np.bincount(indx, weights=np.imag(field.flatten()), minlength=len(sumweights)+2)[1:-1] / sumweights
    else:
        im = 0

    return rl + im


def _field_variance(indx, field, average, weights, V1):
    if field.dtype.kind == "c":
        raise NotImplementedError("Cannot use a complex field when computing variance, yet.")

    # Create a full flattened array of the same shape as field, with the average in that bin.
    # We have to pad the average vector with 0s on either side to account for cells outside the bin range.
    average_field = np.concatenate(([0],average, [0]))[indx]

    # Create the V2 array
    if not np.isscalar(weights):
        weights = weights.flatten()
        V2 = np.bincount(indx, weights=weights**2, minlength=len(V1)+2)[1:-1]
    else:
        V2 = V1

    field = (field.flatten()-average_field)**2 * weights

    # This res is the estimated variance of each cell in the bin
    res = np.bincount(indx, weights=field, minlength=len(V1)+2)[1:-1] / (V1 - V2/V1)

    # Modify to the estimated variance of the sum of the cells in the bin.
    res *= V2 / V1**2

    return res


[docs]def angular_average_nd(field, coords, bins, n=None, weights=1, average=True, bin_ave=True, get_variance=False,
                       log_bins=False):
    """
    Average the first n dimensions of a given field within radial bins.

    This function be used to take "hyper-cylindrical" averages of fields. For a 3D field, with `n=2`, this is exactly
    a cylindrical average. This function can be used in fields of arbitrary dimension (memory permitting), and the field
    need not be centred at the origin. The averaging assumes that the grid cells fall completely into the bin which
    encompasses the co-ordinate point for the cell (i.e. there is no weighted splitting of cells if they intersect a bin
    edge).

    It is optimized for applying a set of weights, and obtaining the variance of the mean, at the same time as
    averaging.

    Parameters
    ----------
    field : md-array
        An array of arbitrary dimension specifying the field to be angularly averaged.

    coords : list of n arrays
        A list of 1D arrays specifying the co-ordinates in each dimension *to be average*.

    bins : int or array.
        Specifies the radial bins for the averaged dimensions. Can be an int or array specifying radial bin edges.

    n : int, optional
        The number of dimensions to be averaged. By default, all dimensions are averaged. Always uses
        the first `n` dimensions.
        
    weights : array, optional
        An array of the same shape as the first `n` dimensions of `field`, giving a weight for each entry.
        
    average : bool, optional
        Whether to take the (weighted) average. If False, returns the (unweighted) sum.

    bin_ave : bool, optional
        Whether to return the bin co-ordinates as the (weighted) average of cells within the bin (if True), or
        the linearly spaced edges of the bins

    get_variance : bool, optional
        Whether to also return an estimate of the variance of the power in each bin.

    log_bins : bool, optional
        Whether to create bins in log-space.

    Returns
    -------
    field : (m-n+1)-array
        The angularly-averaged field. The first dimension corresponds to `bins`, while the rest correspond to the
        unaveraged dimensions.

    bins : 1D-array
        The radial co-ordinates of the bins. Either the mean co-ordinate from the input data, or the regularly spaced
        bins, dependent on `bin_ave`.

    var : (m-n+1)-array, optional
        The variance of the averaged field (same shape as `field`), estimated from the mean standard error.
        Only returned if `get_variance` is True.

    Examples
    --------
    Create a 3D radial function, and average over radial bins. Equivalent to calling :func:`angular_average`:

    >>> import numpy as np
    >>> import matplotlib.pyplot as plt
    >>> x = np.linspace(-5,5,128)   # Setup a grid
    >>> X,Y,Z = np.meshgrid(x,x,x)  # ""
    >>> r = np.sqrt(X**2+Y**2+Z**2) # Get the radial co-ordinate of grid
    >>> field = np.exp(-r**2)       # Generate a radial field
    >>> avgfunc, bins, _ = angular_average_nd(field,[x,x,x],bins=100)   # Call angular_average
    >>> plt.plot(bins, np.exp(-bins**2), label="Input Function")   # Plot input function versus ang. avg.
    >>> plt.plot(bins, avgfunc, label="Averaged Function")

    Create a 2D radial function, extended to 3D, and average over first 2 dimensions (cylindrical average):
    
    >>> r = np.sqrt(X**2+Y**2)
    >>> field = np.exp(-r**2)    # 2D field
    >>> field = np.repeat(field,len(x)).reshape((len(x),)*3)   # Extended to 3D
    >>> avgfunc, avbins, coords = angular_average_nd(field, [x,x,x], bins=50, n=2)
    >>> plt.plot(avbins, np.exp(-avbins**2), label="Input Function")
    >>> plt.plot(avbins, avgfunc[:,0], label="Averaged Function")
    """
    if n is None:
        n = len(coords)

    if len(coords) != len(field.shape):
        raise ValueError("coords should be a list of arrays, one for each dimension.")

    if n == len(coords):
        return angular_average(field, coords, bins, weights, average, bin_ave, get_variance, log_bins=log_bins)

    coords = _magnitude_grid([c for i, c in enumerate(coords) if i < n])

    indx, bins, sumweights = _get_binweights(coords, weights, bins, average, bin_ave=bin_ave, log_bins=log_bins)

    n1 = np.product(field.shape[:n])
    n2 = np.product(field.shape[n:])

    res = np.zeros((len(sumweights), n2), dtype=field.dtype)
    if get_variance:
        var = np.zeros_like(res)

    for i, fld in enumerate(field.reshape((n1, n2)).T):
        try:
            w = weights.flatten()
        except AttributeError:
            w = weights

        res[:, i] = _field_average(indx, fld, w, sumweights)

        if get_variance:
            var[:, i] = _field_variance(indx, fld, res[:,i], w, sumweights)

    if not get_variance:
        return res.reshape((len(sumweights),) + field.shape[n:]), bins
    else:
        return res.reshape((len(sumweights),) + field.shape[n:]), bins, var


[docs]def get_power(deltax, boxlength, deltax2=None, N=None, a=1., b=1., remove_shotnoise=True,
              vol_normalised_power=True, bins=None, res_ndim=None, weights=None, weights2=None,
              dimensionless=True, bin_ave=True, get_variance=False, log_bins=False, ignore_zero_mode=False,
              k_weights = 1,
              ):
    r"""
    Calculate the isotropic power spectrum of a given field, or cross-power of two similar fields.

    This function, by default, conforms to typical cosmological power spectrum conventions -- normalising by the volume
    of the box and removing shot noise if applicable. These options are configurable.

    Parameters
    ----------
    deltax : array-like
        The field on which to calculate the power spectrum . Can either be arbitrarily n-dimensional, or 2-dimensional with the
        first being the number of spatial dimensions, and the second the positions of discrete particles in the field. 
        The former should represent a density field, while the latter
        is a discrete sampling of a field. This function chooses which to use by checking the value of `N` (see below).
        Note that if a discrete sampling is used, the power spectrum calculated is the
        "overdensity" power spectrum, i.e. the field re-centered about zero and rescaled by the mean.

    boxlength : float or list of floats
        The length of the box side(s) in real-space.

    deltax2 : array-like
        If given, a box of the same shape as deltax, against which deltax will be cross correlated.

    N : int, optional
        The number of grid cells per side in the box. Only required if deltax is a discrete sample. If given,
        the function will assume a discrete sample.

    a,b : float, optional
        These define the Fourier convention used. See :mod:`powerbox.dft` for details. The defaults define the standard
        usage in *cosmology* (for example, as defined in Cosmological Physics, Peacock, 1999, pg. 496.). Standard
        numerical usage (eg. numpy) is (a,b) = (0,2pi).

    remove_shotnoise : bool, optional
        Whether to subtract a shot-noise term after determining the isotropic power. This only affects discrete samples.

    vol_weighted_power : bool, optional
        Whether the input power spectrum, ``pk``, is volume-weighted. Default True because of standard cosmological
        usage.

    bins : int or array, optional
        Defines the final k-bins output. If None, chooses a number based on the input resolution of the box. Otherwise,
        if int, this defines the number of kbins, or if an array, it defines the exact bin edges.
        
    res_ndim : int, optional
        Only perform angular averaging over first `res_ndim` dimensions. By default, uses all dimensions. 

    weights, weights2 : array-like, optional
        If deltax is a discrete sample, these are weights for each point.

    dimensionless: bool, optional
        Whether to normalise the cube by its mean prior to taking the power.

    bin_ave : bool, optional
        Whether to return the bin co-ordinates as the (weighted) average of cells within the bin (if True), or
        the linearly spaced edges of the bins

    get_variance : bool, optional
        Whether to also return an estimate of the variance of the power in each bin.

    log_bins : bool, optional
        Whether to create bins in log-space.

    ignore_zero_mode : bool, optional
        Whether to ignore the k=0 mode (or DC term).

    k_weights : nd-array, optional
        The weights of the n-dimensional k modes. This can be used to filter out some modes completely.

    Returns
    -------
    p_k : array
        The power spectrum averaged over bins of equal :math:`|k|`.

    meank : array
        The bin-centres for the p_k array (in k). This is the mean k-value for cells in that bin.

    var : array
        The variance of the power spectrum, estimated from the mean standard error. Only returned if `get_variance` is
        True.

    Examples
    --------
    One can use this function to check whether a box created with :class:`PowerBox` has the correct
    power spectrum:

    >>> from powerbox import PowerBox
    >>> import matplotlib.pyplot as plt
    >>> pb = PowerBox(250,lambda k : k**-2.)
    >>> p,k = get_power(pb.delta_x,pb.boxlength)
    >>> plt.plot(k,p)
    >>> plt.plot(k,k**-2.)
    >>> plt.xscale('log')
    >>> plt.yscale('log')
    """

    # Check if the input data is in sampled particle format
    if N is not None:

        if deltax.shape[1] > deltax.shape[0]:
            raise ValueError("It seems that there are more dimensions than particles! Try transposing deltax.")

        if deltax2 is not None and deltax2.shape[1] > deltax2.shape[0]:
            raise ValueError("It seems that there are more dimensions than particles! Try transposing deltax2.")

        dim = deltax.shape[1]
        if deltax2 is not None and dim != deltax2.shape[1]:
            raise ValueError("deltax and deltax2 must have the same number of dimensions!")

        if not np.iterable(N):
            N = [N] * dim

        if not np.iterable(boxlength):
            boxlength = [boxlength] * dim

        Npart1 = deltax.shape[0]

        if deltax2 is not None:
            Npart2 = deltax2.shape[0]
        else:
            Npart2 = Npart1

        # Generate a histogram of the data, with appropriate number of bins.
        edges = [np.linspace(0, L, n + 1) for L, n in zip(boxlength, N)]

        deltax = np.histogramdd(deltax % boxlength, bins=edges, weights=weights)[0].astype("float")

        if deltax2 is not None:
            deltax2 = np.histogramdd(deltax2 % boxlength, bins=edges, weights=weights2)[0].astype("float")

        # Convert sampled data to mean-zero data
        if dimensionless:
            deltax = deltax / np.mean(deltax) - 1
            if deltax2 is not None:
                deltax2 = deltax2 / np.mean(deltax2) - 1
        else:
            deltax -= np.mean(deltax)
            if deltax2 is not None:
                deltax2 -= np.mean(deltax2)
    else:
        # If input data is already a density field, just get the dimensions.
        dim = len(deltax.shape)

        if not np.iterable(boxlength):
            boxlength = [boxlength] * dim

        if deltax2 is not None and deltax.shape != len(deltax2.shape):
            raise ValueError("deltax and deltax2 must have the same shape!")

        N = deltax.shape
        Npart1 = None

    V = np.product(boxlength)

    # Calculate the n-D power spectrum and align it with the k from powerbox.
    FT, freq, k = dft.fft(deltax, L=boxlength, a=a, b=b, ret_cubegrid=True)

    if deltax2 is not None:
        FT2 = dft.fft(deltax2, L=boxlength, a=a, b=b)[0]
    else:
        FT2 = FT

    P = np.real(FT * np.conj(FT2) / V ** 2)

    if vol_normalised_power:
        P *= V

    if res_ndim is None:
        res_ndim = dim

    # Determine a nice number of bins.
    if bins is None:
        bins = int(np.product(N[:res_ndim]) ** (1. / res_ndim) / 2.2)

    # Set k_weights so that k=0 mode is ignore if desired.
    if ignore_zero_mode:
        kmag = _magnitude_grid([c for i, c in enumerate(freq) if i < res_ndim])
        if np.isscalar(k_weights):
            k_weights = np.ones_like(kmag)

        k_weights[kmag == 0] = 0

    # res is (P, k, <var>)
    res = angular_average_nd(P, freq, bins, n=res_ndim, bin_ave=bin_ave, get_variance=get_variance, log_bins=log_bins,
                             weights=k_weights)
    res = list(res)
    # Remove shot-noise
    if remove_shotnoise and Npart1:
        res[0] -= np.sqrt(V ** 2 / Npart1 / Npart2)

    if res_ndim < dim:
        return res + [freq[res_ndim:]]
    else:
        return res