Source code for pingouin.circular

# Author: Raphael Vallat <raphaelvallat9@gmail.com>
# Date: July 2018
# Python code inspired from the CircStats MATLAB toolbox (Berens 2009)
# and the brainpipe Python package.
# Reference:
# Berens, Philipp. 2009. CircStat: A MATLAB Toolbox for Circular Statistics.
# Journal of Statistical Software, Articles 31 (10): 1–21.
import numpy as np
from scipy.stats import norm

from .utils import remove_na

__all__ = [
    "convert_angles",
    "circ_axial",
    "circ_mean",
    "circ_r",
    "circ_corrcc",
    "circ_corrcl",
    "circ_rayleigh",
    "circ_vtest",
]


###############################################################################
# HELPER FUNCTIONS
###############################################################################


def _checkangles(angles, axis=None):
    """Internal function to check that angles are in radians."""
    msg = (
        "Angles are not in unit of radians. Please use the "
        "`pingouin.convert_angles` function to map your angles to "
        "the [-pi, pi] range."
    )
    ptp_rad = np.nanmax(angles, axis=axis) - np.nanmin(angles, axis=axis)
    ptp_mask = ptp_rad <= 2 * np.pi
    if not ptp_mask.all():
        raise ValueError(msg)


[docs] def convert_angles(angles, low=0, high=360, positive=False): """Element-wise conversion of arbitrary-unit circular quantities to radians. .. versionadded:: 0.3.4 Parameters ---------- angles : array_like Circular data. low : float or int, optional Low boundary for ``angles`` range. Default is 0. high : float or int, optional High boundary for ``angles`` range. Default is 360 (for degrees to radians conversion). positive : boolean If True, radians are mapped on the :math:`[0, 2\\pi]`. Otherwise, the resulting angles are mapped from :math:`[-\\pi, \\pi)` (default). Returns ------- radians : array_like Circular data in radians. Notes ----- The formula to convert a set of angles :math:`\\alpha` from an arbitrary range :math:`[\\text{high},\\text{low}]` to radians :math:`[0, 2\\pi]` is: .. math:: \\alpha_r = \\frac{2\\pi\\alpha}{\\text{high} - \\text{low}} If ``positive=False`` (default), the resulting angles in radians :math:`\\alpha_r` are then wrapped to the :math:`[-\\pi, \\pi)` range: .. math:: (\\text{angle} + \\pi) \\mod 2 \\pi - \\pi Examples -------- 1. Convert degrees to radians >>> from pingouin import convert_angles >>> a = [0, 360, 180, 90, 45, 270] >>> convert_angles(a, low=0, high=360) array([ 0. , 0. , -3.14159265, 1.57079633, 0.78539816, -1.57079633]) with ``positive=True``: >>> convert_angles(a, low=0, high=360, positive=True) array([0. , 6.28318531, 3.14159265, 1.57079633, 0.78539816, 4.71238898]) 2. Convert hours (24h-format) to radians >>> sleep_onset = [22.5, 23.25, 24, 0.5, 1] >>> convert_angles(sleep_onset, low=0, high=24) array([-0.39269908, -0.19634954, 0. , 0.13089969, 0.26179939]) 3. Convert radians from :math:`[0, 2\\pi]` to :math:`[-\\pi, \\pi)`: >>> import numpy as np >>> rad = [0.1, 3.14, 5, 2, 6] >>> convert_angles(rad, low=0, high=2*np.pi) array([ 0.1 , 3.14 , -1.28318531, 2. , -0.28318531]) 4. Convert degrees from a 2-D array >>> np.random.seed(123) >>> deg = np.random.randint(low=0, high=360, size=(3, 4)) >>> convert_angles(deg) array([[-0.66322512, 1.71042267, -2.26892803, 0.29670597], [ 1.44862328, 1.85004901, 2.14675498, 0.99483767], [-2.54818071, -2.35619449, 1.67551608, 1.97222205]]) """ assert isinstance(positive, bool) assert isinstance(high, (int, float)), "high must be numeric" assert isinstance(low, (int, float)), "low must be numeric" ptp = high - low assert ptp > 0, "high - low must be strictly positive." angles = np.asarray(angles) assert np.nanmin(angles) >= low, "angles cannot be >= low." assert np.nanmax(angles) <= high, "angles cannot be <= high." # Map to [0, 2pi] range rad = angles * (2 * np.pi) / ptp if not positive: # https://stackoverflow.com/a/29237626/10581531 # Map to [-pi, pi) range: rad = (rad + np.pi) % (2 * np.pi) - np.pi # [-pi, pi) # Map to (-pi, pi] range: # rad = np.angle(np.exp(1j * rad)) # rad = -1 * ((-rad + np.pi) % (2 * np.pi) - np.pi) return rad
[docs] def circ_axial(angles, n): """Transforms n-axial data to a common scale. Parameters ---------- angles : array Sample of angles in radians n : int Number of modes Returns ------- angles : float Transformed angles Notes ----- Tranform data with multiple modes (known as axial data) to a unimodal sample, for the purpose of certain analysis such as computation of a mean resultant vector (see Berens 2009). Examples -------- Transform degrees to unimodal radians in the Berens 2009 neuro dataset. >>> import numpy as np >>> from pingouin import read_dataset >>> from pingouin.circular import circ_axial >>> df = read_dataset('circular') >>> angles = df['Orientation'].to_numpy() >>> angles = circ_axial(np.deg2rad(angles), 2) """ angles = np.asarray(angles) return np.remainder(angles * n, 2 * np.pi)
############################################################################### # DESCRIPTIVE STATISTICS ###############################################################################
[docs] def circ_mean(angles, w=None, axis=0): """Mean direction for (binned) circular data. Parameters ---------- angles : array_like Samples of angles in radians. The range of ``angles`` must be either :math:`[0, 2\\pi]` or :math:`[-\\pi, \\pi]`. If ``angles`` is not expressed in radians (e.g. degrees or 24-hours), please use the :py:func:`pingouin.convert_angles` function prior to using the present function. w : array_like Number of incidences per bins (i.e. "weights"), in case of binned angle data. axis : int or None Compute along this dimension. Default is the first axis (0). Returns ------- mu : float Circular mean, in radians. See also -------- scipy.stats.circmean, scipy.stats.circstd, pingouin.circ_r Notes ----- From Wikipedia: *In mathematics, a mean of circular quantities is a mean which is sometimes better-suited for quantities like angles, daytimes, and fractional parts of real numbers. This is necessary since most of the usual means may not be appropriate on circular quantities. For example, the arithmetic mean of 0° and 360° is 180°, which is misleading because for most purposes 360° is the same thing as 0°. As another example, the "average time" between 11 PM and 1 AM is either midnight or noon, depending on whether the two times are part of a single night or part of a single calendar day.* The circular mean of a set of angles :math:`\\alpha` is defined by: .. math:: \\bar{\\alpha} = \\text{angle} \\left ( \\sum_{j=1}^n \\exp(i \\cdot \\alpha_j) \\right ) For binned angles with weights :math:`w`, this becomes: .. math:: \\bar{\\alpha} = \\text{angle} \\left ( \\sum_{j=1}^n w \\cdot \\exp(i \\cdot \\alpha_j) \\right ) Missing values in ``angles`` are omitted from the calculations. References ---------- * https://en.wikipedia.org/wiki/Mean_of_circular_quantities * Berens, P. (2009). CircStat: A MATLAB Toolbox for Circular Statistics. Journal of Statistical Software, Articles, 31(10), 1–21. https://doi.org/10.18637/jss.v031.i10 Examples -------- 1. Circular mean of a 1-D array of angles, in radians >>> import pingouin as pg >>> angles = [0.785, 1.570, 3.141, 0.839, 5.934] >>> round(pg.circ_mean(angles), 4) 1.013 Compare with SciPy: >>> from scipy.stats import circmean >>> import numpy as np >>> round(circmean(angles, low=0, high=2*np.pi), 4) 1.013 2. Using a 2-D array of angles in degrees >>> np.random.seed(123) >>> deg = np.random.randint(low=0, high=360, size=(3, 5)) >>> deg array([[322, 98, 230, 17, 83], [106, 123, 57, 214, 225], [ 96, 113, 126, 47, 73]]) We first need to convert from degrees to radians: >>> rad = np.round(pg.convert_angles(deg, low=0, high=360), 4) >>> rad array([[-0.6632, 1.7104, -2.2689, 0.2967, 1.4486], [ 1.85 , 2.1468, 0.9948, -2.5482, -2.3562], [ 1.6755, 1.9722, 2.1991, 0.8203, 1.2741]]) >>> pg.circ_mean(rad) # On the first axis (default) array([1.27532162, 1.94336576, 2.23195927, 0.52110503, 1.80240563]) >>> pg.circ_mean(rad, axis=-1) # On the last axis (default) array([0.68920819, 2.49334852, 1.5954149 ]) >>> round(pg.circ_mean(rad, axis=None), 4) # Across the entire array 1.6954 Missing values are omitted from the calculations: >>> rad[0, 0] = np.nan >>> pg.circ_mean(rad) array([1.76275 , 1.94336576, 2.23195927, 0.52110503, 1.80240563]) 3. Using binned angles >>> np.random.seed(123) >>> nbins = 18 # Number of bins to divide the unit circle >>> angles_bins = np.linspace(0, 2 * np.pi, nbins) >>> # w represents the number of incidences per bins, or "weights". >>> w = np.random.randint(low=0, high=5, size=angles_bins.size) >>> round(pg.circ_mean(angles_bins, w), 4) 0.606 """ angles = np.asarray(angles) _checkangles(angles) # Check that angles is in radians w = np.asarray(w) if w is not None else np.ones(angles.shape) assert angles.shape == w.shape, "Input dimensions do not match" return np.angle(np.nansum(np.multiply(w, np.exp(1j * angles)), axis=axis))
[docs] def circ_r(angles, w=None, d=None, axis=0): """Mean resultant vector length for circular data. Parameters ---------- angles : array_like Samples of angles in radians. The range of ``angles`` must be either :math:`[0, 2\\pi]` or :math:`[-\\pi, \\pi]`. If ``angles`` is not expressed in radians (e.g. degrees or 24-hours), please use the :py:func:`pingouin.convert_angles` function prior to using the present function. w : array_like Number of incidences per bins (i.e. "weights"), in case of binned angle data. d : float Spacing (in radians) of bin centers for binned data. If supplied, a correction factor is used to correct for bias in the estimation of r. axis : int or None Compute along this dimension. Default is the first axis (0). Returns ------- r : float Circular mean vector length. See also -------- pingouin.circ_mean Notes ----- The length of the mean resultant vector is a crucial quantity for the measurement of circular spread or hypothesis testing in directional statistics. The closer it is to one, the more concentrated the data sample is around the mean direction (Berens 2009). The circular vector length of a set of angles :math:`\\alpha` is defined by: .. math:: \\bar{\\alpha} = \\frac{1}{N}\\left \\| \\sum_{j=1}^n \\exp(i \\cdot \\alpha_j) \\right \\| Missing values in ``angles`` are omitted from the calculations. References ---------- * https://en.wikipedia.org/wiki/Mean_of_circular_quantities * Berens, P. (2009). CircStat: A MATLAB Toolbox for Circular Statistics. Journal of Statistical Software, Articles, 31(10), 1–21. https://doi.org/10.18637/jss.v031.i10 Examples -------- 1. Mean resultant vector length of a 1-D array of angles, in radians >>> import pingouin as pg >>> angles = [0.785, 1.570, 3.141, 0.839, 5.934] >>> r = pg.circ_r(angles) >>> round(r, 4) 0.4972 Note that there is a close relationship between the vector length and the circular standard deviation, i.e. :math:`\\sigma = \\sqrt{-2 \\ln R}`: >>> import numpy as np >>> round(np.sqrt(-2 * np.log(r)), 4) 1.1821 which gives similar result as SciPy built-in function: >>> from scipy.stats import circstd >>> round(circstd(angles), 4) 1.1821 Sanity check: if all angles are the same, the vector length should be one: >>> angles = [3.14, 3.14, 3.14, 3.14] >>> round(pg.circ_r(angles), 4) 1.0 2. Using a 2-D array of angles in degrees >>> np.random.seed(123) >>> deg = np.random.randint(low=0, high=360, size=(3, 5)) >>> deg array([[322, 98, 230, 17, 83], [106, 123, 57, 214, 225], [ 96, 113, 126, 47, 73]]) We first need to convert from degrees to radians: >>> rad = np.round(pg.convert_angles(deg, low=0, high=360), 4) >>> rad array([[-0.6632, 1.7104, -2.2689, 0.2967, 1.4486], [ 1.85 , 2.1468, 0.9948, -2.5482, -2.3562], [ 1.6755, 1.9722, 2.1991, 0.8203, 1.2741]]) >>> pg.circ_r(rad) # On the first axis (default) array([0.46695499, 0.98398294, 0.3723287 , 0.31103746, 0.42527149]) >>> pg.circ_r(rad, axis=-1) # On the last axis (default) array([0.28099998, 0.45456096, 0.88261161]) >>> round(pg.circ_r(rad, axis=None), 4) # Across the entire array 0.4486 Missing values are omitted from the calculations: >>> rad[0, 0] = np.nan >>> pg.circ_r(rad) array([0.99619613, 0.98398294, 0.3723287 , 0.31103746, 0.42527149]) 3. Using binned angles >>> np.random.seed(123) >>> nbins = 18 # Number of bins to divide the unit circle >>> angles_bins = np.linspace(0, 2 * np.pi, nbins) >>> # w represents the number of incidences per bins, or "weights". >>> w = np.random.randint(low=0, high=5, size=angles_bins.size) >>> round(pg.circ_r(angles_bins, w), 4) 0.3642 """ angles = np.asarray(angles) _checkangles(angles) # Check that angles is in radians w = np.asarray(w) if w is not None else np.ones(angles.shape) assert angles.shape == w.shape, "Input dimensions do not match." # Add np.nan in weight vector (otherwise nansum(w) is wrong) w = w.astype(float) w[np.isnan(angles)] = np.nan # Compute weighted sum of cos and sin of angles: r = np.nansum(np.multiply(w, np.exp(1j * angles)), axis=axis) # Calculate vector length: r = np.abs(r) / np.nansum(w, axis=axis) # For data with known spacing, apply correction factor if d is not None: c = d / 2 / np.sin(d / 2) r = c * r return r
############################################################################### # INFERENTIAL STATISTICS ###############################################################################
[docs] def circ_corrcc(x, y, correction_uniform=False): """Correlation coefficient between two circular variables. Parameters ---------- x : 1-D array_like First circular variable (expressed in radians). y : 1-D array_like Second circular variable (expressed in radians). correction_uniform : bool Use correction for uniform marginals. Returns ------- r : float Correlation coefficient. pval : float Uncorrected p-value. Notes ----- Adapted from the CircStats MATLAB toolbox [1]_. The range of ``x`` and ``y`` must be either :math:`[0, 2\\pi]` or :math:`[-\\pi, \\pi]`. If ``angles`` is not expressed in radians (e.g. degrees or 24-hours), please use the :py:func:`pingouin.convert_angles` function prior to using the present function. Please note that NaN are automatically removed. If the ``correction_uniform`` is True, an alternative equation from [2]_ (p. 177) is used. If the marginal distribution of ``x`` or ``y`` is uniform, the mean is not well defined, which leads to wrong estimates of the circular correlation. The alternative equation corrects for this by choosing the means in a way that maximizes the positive or negative correlation. References ---------- .. [1] Berens, P. (2009). CircStat: A MATLAB Toolbox for Circular Statistics. Journal of Statistical Software, Articles, 31(10), 1–21. https://doi.org/10.18637/jss.v031.i10 .. [2] Jammalamadaka, S. R., & Sengupta, A. (2001). Topics in circular statistics (Vol. 5). world scientific. Examples -------- Compute the r and p-value of two circular variables >>> from pingouin import circ_corrcc >>> x = [0.785, 1.570, 3.141, 3.839, 5.934] >>> y = [0.593, 1.291, 2.879, 3.892, 6.108] >>> r, pval = circ_corrcc(x, y) >>> print(round(r, 3), round(pval, 4)) 0.942 0.0658 With the correction for uniform marginals >>> r, pval = circ_corrcc(x, y, correction_uniform=True) >>> print(round(r, 3), round(pval, 4)) 0.547 0.2859 """ x = np.asarray(x) y = np.asarray(y) assert x.size == y.size, "x and y must have the same length." # Remove NA x, y = remove_na(x, y, paired=True) n = x.size # Compute correlation coefficient x_sin = np.sin(x - circ_mean(x)) y_sin = np.sin(y - circ_mean(y)) if not correction_uniform: # Similar to np.corrcoef(x_sin, y_sin)[0][1] r = np.sum(x_sin * y_sin) / np.sqrt(np.sum(x_sin**2) * np.sum(y_sin**2)) else: r_minus = np.abs(np.sum(np.exp((x - y) * 1j))) r_plus = np.abs(np.sum(np.exp((x + y) * 1j))) denom = 2 * np.sqrt(np.sum(x_sin**2) * np.sum(y_sin**2)) r = (r_minus - r_plus) / denom # Compute T- and p-values tval = np.sqrt((n * (x_sin**2).mean() * (y_sin**2).mean()) / np.mean(x_sin**2 * y_sin**2)) * r # Approximately distributed as a standard normal pval = 2 * norm.sf(abs(tval)) return r, pval
[docs] def circ_corrcl(x, y): """Correlation coefficient between one circular and one linear variable random variables. Parameters ---------- x : 1-D array_like First circular variable (expressed in radians). The range of ``x`` must be either :math:`[0, 2\\pi]` or :math:`[-\\pi, \\pi]`. If ``angles`` is not expressed in radians (e.g. degrees or 24-hours), please use the :py:func:`pingouin.convert_angles` function prior to using the present function. y : 1-D array_like Second circular variable (linear) Returns ------- r : float Correlation coefficient pval : float Uncorrected p-value Notes ----- Please note that NaN are automatically removed from datasets. Examples -------- Compute the r and p-value between one circular and one linear variables. >>> from pingouin import circ_corrcl >>> x = [0.785, 1.570, 3.141, 0.839, 5.934] >>> y = [1.593, 1.291, -0.248, -2.892, 0.102] >>> r, pval = circ_corrcl(x, y) >>> print(round(r, 3), round(pval, 3)) 0.109 0.971 """ from scipy.stats import pearsonr, chi2 x = np.asarray(x) y = np.asarray(y) assert x.size == y.size, "x and y must have the same length." # Remove NA x, y = remove_na(x, y, paired=True) n = x.size # Compute correlation coefficent for sin and cos independently rxs = pearsonr(y, np.sin(x))[0] rxc = pearsonr(y, np.cos(x))[0] rcs = pearsonr(np.sin(x), np.cos(x))[0] # Compute angular-linear correlation (equ. 27.47) r = np.sqrt((rxc**2 + rxs**2 - 2 * rxc * rxs * rcs) / (1 - rcs**2)) # Compute p-value pval = chi2.sf(n * r**2, 2) return r, pval
[docs] def circ_rayleigh(angles, w=None, d=None): """Rayleigh test for non-uniformity of circular data. Parameters ---------- angles : 1-D array_like Samples of angles in radians. The range of ``angles`` must be either :math:`[0, 2\\pi]` or :math:`[-\\pi, \\pi]`. If ``angles`` is not expressed in radians (e.g. degrees or 24-hours), please use the :py:func:`pingouin.convert_angles` function prior to using the present function. w : array_like Number of incidences per bins (i.e. "weights"), in case of binned angle data. d : float Spacing (in radians) of bin centers for binned data. If supplied, a correction factor is used to correct for bias in the estimation of r. Returns ------- z : float Z-statistic pval : float P-value Notes ----- The Rayleigh test asks how large the resultant vector length R must be to indicate a non-uniform distribution (Fisher 1995). H0: the population is uniformly distributed around the circle HA: the populatoin is not distributed uniformly around the circle The assumptions for the Rayleigh test are that (1) the distribution has only one mode and (2) the data is sampled from a von Mises distribution. Examples -------- 1. Simple Rayleigh test for non-uniformity of circular data. >>> from pingouin import circ_rayleigh >>> x = [0.785, 1.570, 3.141, 0.839, 5.934] >>> z, pval = circ_rayleigh(x) >>> print(round(z, 3), round(pval, 6)) 1.236 0.304844 2. Specifying w and d >>> z, pval = circ_rayleigh(x, w=[.1, .2, .3, .4, .5], d=0.2) >>> print(round(z, 3), round(pval, 6)) 0.278 0.806997 """ angles = np.asarray(angles) _checkangles(angles) # Check that angles is in radians if w is None: r = circ_r(angles) n = len(angles) else: assert len(angles) == len(w), "Input dimensions do not match" r = circ_r(angles, w, d) n = np.sum(w) # Compute Rayleigh's statistic R = n * r z = (R**2) / n # Compute p value using approxation in Zar (1999), p. 617 pval = np.exp(np.sqrt(1 + 4 * n + 4 * (n**2 - R**2)) - (1 + 2 * n)) return z, pval
[docs] def circ_vtest(angles, dir=0.0, w=None, d=None): """V test for non-uniformity of circular data with a specified mean direction. Parameters ---------- angles : 1-D array_like Samples of angles in radians. The range of ``angles`` must be either :math:`[0, 2\\pi]` or :math:`[-\\pi, \\pi]`. If ``angles`` is not expressed in radians (e.g. degrees or 24-hours), please use the :py:func:`pingouin.convert_angles` function prior to using the present function. dir : float Suspected mean direction (angle in radians). w : array_like Number of incidences per bins (i.e. "weights"), in case of binned angle data. d : float Spacing (in radians) of bin centers for binned data. If supplied, a correction factor is used to correct for bias in the estimation of r. Returns ------- V : float V-statistic pval : float P-value Notes ----- H0: the population is uniformly distributed around the circle. HA: the population is not distributed uniformly around the circle but has a mean of dir. Note: Not rejecting H0 may mean that the population is uniformly distributed around the circle OR that it has a mode but that this mode is not centered at dir. The V test has more power than the Rayleigh test and is preferred if there is reason to believe in a specific mean direction. Adapted from the Matlab Circular Statistics Toolbox. Examples -------- 1. V-test for non-uniformity of circular data. >>> from pingouin import circ_vtest >>> x = [0.785, 1.570, 3.141, 0.839, 5.934] >>> v, pval = circ_vtest(x, dir=1) >>> print(round(v, 3), pval) 2.486 0.05794648732225438 2. Specifying w and d >>> v, pval = circ_vtest(x, dir=0.5, w=[.1, .2, .3, .4, .5], d=0.2) >>> print(round(v, 3), round(pval, 5)) 0.637 0.23086 """ angles = np.asarray(angles) if w is None: r = circ_r(angles) mu = circ_mean(angles) n = len(angles) else: assert len(angles) == len(w), "Input dimensions do not match" r = circ_r(angles, w, d) mu = circ_mean(angles, w) n = np.sum(w) # Compute Rayleigh and V statistics R = n * r v = R * np.cos(mu - dir) # Compute p value u = v * np.sqrt(2 / n) pval = 1 - norm.cdf(u) return v, pval