Source code for powerbox.tools

"""
A set of tools for dealing with structured boxes, such as those output by :mod:`powerbox`. Tools include those
for averaging a field angularly, and generating the isotropic power spectrum.
"""
from . import dft
import numpy as np
import warnings


def _getbins(bins, coords, log):
    mx = coords.max()
    if not np.iterable(bins):
        if not log:
            bins = np.linspace(coords.min(), mx, bins + 1)
        else:
            mn = coords[coords>0].min()
            bins = np.logspace(np.log10(mn), np.log10(mx), bins + 1)

    return bins


[docs]def angular_average(field, coords, bins, weights=1, average=True, bin_ave=True, get_variance=False, log_bins=False): r""" Average a given field within radial bins. This function can be used in fields of arbitrary dimension (memory permitting), and the field need not be centred at the origin. The averaging assumes that the grid cells fall completely into the bin which encompasses the co-ordinate point for the cell (i.e. there is no weighted splitting of cells if they intersect a bin edge). It is optimized for applying a set of weights, and obtaining the variance of the mean, at the same time as averaging. Parameters ---------- field: nd-array An array of arbitrary dimension specifying the field to be angularly averaged. coords: nd-array or list of n arrays. Either the *magnitude* of the co-ordinates at each point of `field`, or a list of 1D arrays specifying the co-ordinates in each dimension. bins: float or array. The ``bins`` argument provided to histogram. Can be an int or array specifying radial bin edges. weights: array, optional An array of the same shape as `field`, giving a weight for each entry. average: bool, optional Whether to take the (weighted) average. If False, returns the (unweighted) sum. bin_ave : bool, optional Whether to return the bin co-ordinates as the (weighted) average of cells within the bin (if True), or the regularly spaced edges of the bins. get_variance : bool, optional Whether to also return an estimate of the variance of the power in each bin. log_bins : bool, optional Whether to create bins in log-space. Returns ------- field_1d : 1D-array The angularly-averaged field. bins : 1D-array Array of same shape as field_1d specifying the radial co-ordinates of the bins. Either the mean co-ordinate from the input data, or the regularly spaced bins, dependent on `bin_ave`. var : 1D-array, optional The variance of the averaged field (same shape as bins), estimated from the mean standard error. Only returned if `get_variance` is True. Notes ----- If desired, the variance is calculated as the weight unbiased variance, using the formula at https://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Reliability_weights for the variance in each cell, and normalising by a factor of :math:`V_2/V_1^2` to estimate the variance of the average. Examples -------- Create a 3D radial function, and average over radial bins: >>> import numpy as np >>> import matplotlib.pyplot as plt >>> x = np.linspace(-5,5,128) # Setup a grid >>> X,Y,Z = np.meshgrid(x,x,x) >>> r = np.sqrt(X**2+Y**2+Z**2) # Get the radial co-ordinate of grid >>> field = np.exp(-r**2) # Generate a radial field >>> avgfunc, bins = angular_average(field,r,bins=100) # Call angular_average >>> plt.plot(bins, np.exp(-bins**2), label="Input Function") # Plot input function versus ang. avg. >>> plt.plot(bins, avgfunc, label="Averaged Function") See Also -------- angular_average_nd : Perform an angular average in a subset of the total dimensions. """ if len(coords) == len(field.shape): # coords are a segmented list of dimensional co-ordinates coords = _magnitude_grid(coords) indx, bins, sumweight = _get_binweights(coords, weights, bins, average, bin_ave=bin_ave, log_bins=log_bins) if np.any(sumweight==0): warnings.warn("One or more radial bins had no cells within it.") res = _field_average(indx, field, weights, sumweight) if get_variance: var = _field_variance(indx, field, res, weights, sumweight) return res, bins, var else: return res, bins
def _magnitude_grid(x, dim=None): if dim is not None: return np.sqrt(np.sum(np.meshgrid(*([x ** 2] * dim)), axis=0)) else: return np.sqrt(np.sum(np.meshgrid(*([X ** 2 for X in x])), axis=0)) def _get_binweights(coords, weights, bins, average=True, bin_ave=True, log_bins=False): # Get a vector of bin edges bins = _getbins(bins, coords, log_bins) # Minus 1 to have first index at 0 indx = np.digitize(coords.flatten(), bins) if average or bin_ave: if not np.isscalar(weights): if coords.shape != weights.shape: raise ValueError("coords and weights must have the same shape!") sumweights = np.bincount(indx, weights=weights.flatten(), minlength=len(bins)+1)[1:-1] else: sumweights = np.bincount(indx, minlength=len(bins)+1)[1:-1] if average: binweight = sumweights else: binweight = 1*sumweights sumweights = np.ones_like(binweight) if bin_ave: bins = np.bincount(indx, weights=(weights * coords).flatten(), minlength=len(bins)+1)[1:-1] / binweight else: sumweights = np.ones(len(bins)-1) return indx, bins, sumweights def _field_average(indx, field, weights, sumweights): if not np.isscalar(weights) and field.shape != weights.shape: raise ValueError("the field and weights must have the same shape!") field = field * weights # Leave like this because field is mutable rl = np.bincount(indx, weights=np.real(field.flatten()), minlength=len(sumweights)+2)[1:-1] / sumweights if field.dtype.kind == "c": im = 1j * np.bincount(indx, weights=np.imag(field.flatten()), minlength=len(sumweights)+2)[1:-1] / sumweights else: im = 0 return rl + im def _field_variance(indx, field, average, weights, V1): if field.dtype.kind == "c": raise NotImplementedError("Cannot use a complex field when computing variance, yet.") # Create a full flattened array of the same shape as field, with the average in that bin. # We have to pad the average vector with 0s on either side to account for cells outside the bin range. average_field = np.concatenate(([0],average, [0]))[indx] # Create the V2 array if not np.isscalar(weights): weights = weights.flatten() V2 = np.bincount(indx, weights=weights**2, minlength=len(V1)+2)[1:-1] else: V2 = V1 field = (field.flatten()-average_field)**2 * weights # This res is the estimated variance of each cell in the bin res = np.bincount(indx, weights=field, minlength=len(V1)+2)[1:-1] / (V1 - V2/V1) # Modify to the estimated variance of the sum of the cells in the bin. res *= V2 / V1**2 return res
[docs]def angular_average_nd(field, coords, bins, n=None, weights=1, average=True, bin_ave=True, get_variance=False, log_bins=False): """ Average the first n dimensions of a given field within radial bins. This function be used to take "hyper-cylindrical" averages of fields. For a 3D field, with `n=2`, this is exactly a cylindrical average. This function can be used in fields of arbitrary dimension (memory permitting), and the field need not be centred at the origin. The averaging assumes that the grid cells fall completely into the bin which encompasses the co-ordinate point for the cell (i.e. there is no weighted splitting of cells if they intersect a bin edge). It is optimized for applying a set of weights, and obtaining the variance of the mean, at the same time as averaging. Parameters ---------- field : md-array An array of arbitrary dimension specifying the field to be angularly averaged. coords : list of n arrays A list of 1D arrays specifying the co-ordinates in each dimension *to be average*. bins : int or array. Specifies the radial bins for the averaged dimensions. Can be an int or array specifying radial bin edges. n : int, optional The number of dimensions to be averaged. By default, all dimensions are averaged. Always uses the first `n` dimensions. weights : array, optional An array of the same shape as the first `n` dimensions of `field`, giving a weight for each entry. average : bool, optional Whether to take the (weighted) average. If False, returns the (unweighted) sum. bin_ave : bool, optional Whether to return the bin co-ordinates as the (weighted) average of cells within the bin (if True), or the linearly spaced edges of the bins get_variance : bool, optional Whether to also return an estimate of the variance of the power in each bin. log_bins : bool, optional Whether to create bins in log-space. Returns ------- field : (m-n+1)-array The angularly-averaged field. The first dimension corresponds to `bins`, while the rest correspond to the unaveraged dimensions. bins : 1D-array The radial co-ordinates of the bins. Either the mean co-ordinate from the input data, or the regularly spaced bins, dependent on `bin_ave`. var : (m-n+1)-array, optional The variance of the averaged field (same shape as `field`), estimated from the mean standard error. Only returned if `get_variance` is True. Examples -------- Create a 3D radial function, and average over radial bins. Equivalent to calling :func:`angular_average`: >>> import numpy as np >>> import matplotlib.pyplot as plt >>> x = np.linspace(-5,5,128) # Setup a grid >>> X,Y,Z = np.meshgrid(x,x,x) # "" >>> r = np.sqrt(X**2+Y**2+Z**2) # Get the radial co-ordinate of grid >>> field = np.exp(-r**2) # Generate a radial field >>> avgfunc, bins, _ = angular_average_nd(field,[x,x,x],bins=100) # Call angular_average >>> plt.plot(bins, np.exp(-bins**2), label="Input Function") # Plot input function versus ang. avg. >>> plt.plot(bins, avgfunc, label="Averaged Function") Create a 2D radial function, extended to 3D, and average over first 2 dimensions (cylindrical average): >>> r = np.sqrt(X**2+Y**2) >>> field = np.exp(-r**2) # 2D field >>> field = np.repeat(field,len(x)).reshape((len(x),)*3) # Extended to 3D >>> avgfunc, avbins, coords = angular_average_nd(field, [x,x,x], bins=50, n=2) >>> plt.plot(avbins, np.exp(-avbins**2), label="Input Function") >>> plt.plot(avbins, avgfunc[:,0], label="Averaged Function") """ if n is None: n = len(coords) if len(coords) != len(field.shape): raise ValueError("coords should be a list of arrays, one for each dimension.") if n == len(coords): return angular_average(field, coords, bins, weights, average, bin_ave, get_variance, log_bins=log_bins) coords = _magnitude_grid([c for i, c in enumerate(coords) if i < n]) indx, bins, sumweights = _get_binweights(coords, weights, bins, average, bin_ave=bin_ave, log_bins=log_bins) n1 = np.product(field.shape[:n]) n2 = np.product(field.shape[n:]) res = np.zeros((len(sumweights), n2), dtype=field.dtype) if get_variance: var = np.zeros_like(res) for i, fld in enumerate(field.reshape((n1, n2)).T): try: w = weights.flatten() except AttributeError: w = weights res[:, i] = _field_average(indx, fld, w, sumweights) if get_variance: var[:, i] = _field_variance(indx, fld, res[:,i], w, sumweights) if not get_variance: return res.reshape((len(sumweights),) + field.shape[n:]), bins else: return res.reshape((len(sumweights),) + field.shape[n:]), bins, var
[docs]def get_power(deltax, boxlength, deltax2=None, N=None, a=1., b=1., remove_shotnoise=True, vol_normalised_power=True, bins=None, res_ndim=None, weights=None, weights2=None, dimensionless=True, bin_ave=True, get_variance=False, log_bins=False, ignore_zero_mode=False, k_weights = 1, ): r""" Calculate the isotropic power spectrum of a given field, or cross-power of two similar fields. This function, by default, conforms to typical cosmological power spectrum conventions -- normalising by the volume of the box and removing shot noise if applicable. These options are configurable. Parameters ---------- deltax : array-like The field on which to calculate the power spectrum . Can either be arbitrarily n-dimensional, or 2-dimensional with the first being the number of spatial dimensions, and the second the positions of discrete particles in the field. The former should represent a density field, while the latter is a discrete sampling of a field. This function chooses which to use by checking the value of `N` (see below). Note that if a discrete sampling is used, the power spectrum calculated is the "overdensity" power spectrum, i.e. the field re-centered about zero and rescaled by the mean. boxlength : float or list of floats The length of the box side(s) in real-space. deltax2 : array-like If given, a box of the same shape as deltax, against which deltax will be cross correlated. N : int, optional The number of grid cells per side in the box. Only required if deltax is a discrete sample. If given, the function will assume a discrete sample. a,b : float, optional These define the Fourier convention used. See :mod:`powerbox.dft` for details. The defaults define the standard usage in *cosmology* (for example, as defined in Cosmological Physics, Peacock, 1999, pg. 496.). Standard numerical usage (eg. numpy) is (a,b) = (0,2pi). remove_shotnoise : bool, optional Whether to subtract a shot-noise term after determining the isotropic power. This only affects discrete samples. vol_weighted_power : bool, optional Whether the input power spectrum, ``pk``, is volume-weighted. Default True because of standard cosmological usage. bins : int or array, optional Defines the final k-bins output. If None, chooses a number based on the input resolution of the box. Otherwise, if int, this defines the number of kbins, or if an array, it defines the exact bin edges. res_ndim : int, optional Only perform angular averaging over first `res_ndim` dimensions. By default, uses all dimensions. weights, weights2 : array-like, optional If deltax is a discrete sample, these are weights for each point. dimensionless: bool, optional Whether to normalise the cube by its mean prior to taking the power. bin_ave : bool, optional Whether to return the bin co-ordinates as the (weighted) average of cells within the bin (if True), or the linearly spaced edges of the bins get_variance : bool, optional Whether to also return an estimate of the variance of the power in each bin. log_bins : bool, optional Whether to create bins in log-space. ignore_zero_mode : bool, optional Whether to ignore the k=0 mode (or DC term). k_weights : nd-array, optional The weights of the n-dimensional k modes. This can be used to filter out some modes completely. Returns ------- p_k : array The power spectrum averaged over bins of equal :math:`|k|`. meank : array The bin-centres for the p_k array (in k). This is the mean k-value for cells in that bin. var : array The variance of the power spectrum, estimated from the mean standard error. Only returned if `get_variance` is True. Examples -------- One can use this function to check whether a box created with :class:`PowerBox` has the correct power spectrum: >>> from powerbox import PowerBox >>> import matplotlib.pyplot as plt >>> pb = PowerBox(250,lambda k : k**-2.) >>> p,k = get_power(pb.delta_x,pb.boxlength) >>> plt.plot(k,p) >>> plt.plot(k,k**-2.) >>> plt.xscale('log') >>> plt.yscale('log') """ # Check if the input data is in sampled particle format if N is not None: if deltax.shape[1] > deltax.shape[0]: raise ValueError("It seems that there are more dimensions than particles! Try transposing deltax.") if deltax2 is not None and deltax2.shape[1] > deltax2.shape[0]: raise ValueError("It seems that there are more dimensions than particles! Try transposing deltax2.") dim = deltax.shape[1] if deltax2 is not None and dim != deltax2.shape[1]: raise ValueError("deltax and deltax2 must have the same number of dimensions!") if not np.iterable(N): N = [N] * dim if not np.iterable(boxlength): boxlength = [boxlength] * dim Npart1 = deltax.shape[0] if deltax2 is not None: Npart2 = deltax2.shape[0] else: Npart2 = Npart1 # Generate a histogram of the data, with appropriate number of bins. edges = [np.linspace(0, L, n + 1) for L, n in zip(boxlength, N)] deltax = np.histogramdd(deltax % boxlength, bins=edges, weights=weights)[0].astype("float") if deltax2 is not None: deltax2 = np.histogramdd(deltax2 % boxlength, bins=edges, weights=weights2)[0].astype("float") # Convert sampled data to mean-zero data if dimensionless: deltax = deltax / np.mean(deltax) - 1 if deltax2 is not None: deltax2 = deltax2 / np.mean(deltax2) - 1 else: deltax -= np.mean(deltax) if deltax2 is not None: deltax2 -= np.mean(deltax2) else: # If input data is already a density field, just get the dimensions. dim = len(deltax.shape) if not np.iterable(boxlength): boxlength = [boxlength] * dim if deltax2 is not None and deltax.shape != len(deltax2.shape): raise ValueError("deltax and deltax2 must have the same shape!") N = deltax.shape Npart1 = None V = np.product(boxlength) # Calculate the n-D power spectrum and align it with the k from powerbox. FT, freq, k = dft.fft(deltax, L=boxlength, a=a, b=b, ret_cubegrid=True) if deltax2 is not None: FT2 = dft.fft(deltax2, L=boxlength, a=a, b=b)[0] else: FT2 = FT P = np.real(FT * np.conj(FT2) / V ** 2) if vol_normalised_power: P *= V if res_ndim is None: res_ndim = dim # Determine a nice number of bins. if bins is None: bins = int(np.product(N[:res_ndim]) ** (1. / res_ndim) / 2.2) # Set k_weights so that k=0 mode is ignore if desired. if ignore_zero_mode: kmag = _magnitude_grid([c for i, c in enumerate(freq) if i < res_ndim]) if np.isscalar(k_weights): k_weights = np.ones_like(kmag) k_weights[kmag == 0] = 0 # res is (P, k, <var>) res = angular_average_nd(P, freq, bins, n=res_ndim, bin_ave=bin_ave, get_variance=get_variance, log_bins=log_bins, weights=k_weights) res = list(res) # Remove shot-noise if remove_shotnoise and Npart1: res[0] -= np.sqrt(V ** 2 / Npart1 / Npart2) if res_ndim < dim: return res + [freq[res_ndim:]] else: return res