# Author: Raphael Vallat <raphaelvallat9@gmail.com>
# Date: April 2018
import warnings
import numpy as np
from scipy import stats
from scipy.optimize import brenth
__all__ = [
"power_ttest",
"power_ttest2n",
"power_anova",
"power_rm_anova",
"power_corr",
"power_chi2",
]
[docs]
def power_ttest(
d=None, n=None, power=None, alpha=0.05, contrast="two-samples", alternative="two-sided"
):
"""
Evaluate power, sample size, effect size or significance level of a one-sample T-test,
a paired T-test or an independent two-samples T-test with equal sample sizes.
Parameters
----------
d : float
Cohen d effect size
n : int
Sample size
In case of a two-sample T-test, sample sizes are assumed to be equal.
Otherwise, see the :py:func:`power_ttest2n` function.
power : float
Test power (= 1 - type II error).
alpha : float
Significance level (type I error probability).
The default is 0.05.
contrast : str
Can be `"one-sample"`, `"two-samples"` or `"paired"`.
Note that `"one-sample"` and `"paired"` have the same behavior.
alternative : string
Defines the alternative hypothesis, or tail of the test. Must be one of
"two-sided" (default), "greater" or "less".
Notes
-----
Exactly ONE of the parameters ``d``, ``n``, ``power`` and ``alpha`` must be passed as None, and
that parameter is determined from the others.
For a paired T-test, the sample size ``n`` corresponds to the number of pairs. For an
independent two-sample T-test with equal sample sizes, ``n`` corresponds to the sample size of
each group (i.e. number of observations in one group). If the sample sizes are unequal, please
use the :py:func:`power_ttest2n` function instead.
``alpha`` has a default value of 0.05 so None must be explicitly passed if you want to
compute it.
This function is a Python adaptation of the `pwr.t.test` function implemented in the
`pwr <https://cran.r-project.org/web/packages/pwr/pwr.pdf>`_ R package.
Statistical power is the likelihood that a study will detect an effect when there is an effect
there to be detected. A high statistical power means that there is a low probability of
concluding that there is no effect when there is one. Statistical power is mainly affected by
the effect size and the sample size.
The first step is to use the Cohen's d to calculate the non-centrality parameter
:math:`\\delta` and degrees of freedom :math:`v`. In case of paired groups, this is:
.. math:: \\delta = d * \\sqrt n
.. math:: v = n - 1
and in case of independent groups with equal sample sizes:
.. math:: \\delta = d * \\sqrt{\\frac{n}{2}}
.. math:: v = (n - 1) * 2
where :math:`d` is the Cohen d and :math:`n` the sample size.
The critical value is then found using the percent point function of the T distribution with
:math:`q = 1 - alpha` and :math:`v` degrees of freedom.
Finally, the power of the test is given by the survival function of the non-central
distribution using the previously calculated critical value, degrees of freedom and
non-centrality parameter.
:py:func:`scipy.optimize.brenth` is used to solve power equations for other variables (i.e.
sample size, effect size, or significance level). If the solving fails, a nan value is
returned.
Results have been tested against GPower and the
`pwr <https://cran.r-project.org/web/packages/pwr/pwr.pdf>`_ R package.
Examples
--------
1. Compute power of a one-sample T-test given ``d``, ``n`` and ``alpha``
>>> from pingouin import power_ttest
>>> print('power: %.4f' % power_ttest(d=0.5, n=20, contrast='one-sample'))
power: 0.5645
2. Compute required sample size given ``d``, ``power`` and ``alpha``
>>> print('n: %.4f' % power_ttest(d=0.5, power=0.80, alternative='greater'))
n: 50.1508
3. Compute achieved ``d`` given ``n``, ``power`` and ``alpha`` level
>>> print('d: %.4f' % power_ttest(n=20, power=0.80, alpha=0.05, contrast='paired'))
d: 0.6604
4. Compute achieved alpha level given ``d``, ``n`` and ``power``
>>> print('alpha: %.4f' % power_ttest(d=0.5, n=20, power=0.80, alpha=None))
alpha: 0.4430
5. One-sided tests
>>> from pingouin import power_ttest
>>> print('power: %.4f' % power_ttest(d=0.5, n=20, alternative='greater'))
power: 0.4634
>>> print('power: %.4f' % power_ttest(d=0.5, n=20, alternative='less'))
power: 0.0007
"""
# Check the number of arguments that are None
n_none = sum([v is None for v in [d, n, power, alpha]])
if n_none != 1:
raise ValueError("Exactly one of n, d, power, and alpha must be None.")
# Safety checks
assert alternative in [
"two-sided",
"greater",
"less",
], "Alternative must be one of 'two-sided' (default), 'greater' or 'less'."
assert contrast.lower() in ["one-sample", "paired", "two-samples"]
tsample = 2 if contrast.lower() == "two-samples" else 1
tside = 2 if alternative == "two-sided" else 1
if d is not None and tside == 2:
d = abs(d)
if alpha is not None:
assert 0 < alpha <= 1
if power is not None:
assert 0 < power <= 1
if alternative == "less":
def func(d, n, power, alpha):
dof = (n - 1) * tsample
nc = d * np.sqrt(n / tsample)
tcrit = stats.t.ppf(alpha / tside, dof)
return stats.nct.cdf(tcrit, dof, nc)
elif alternative == "two-sided":
def func(d, n, power, alpha):
dof = (n - 1) * tsample
nc = d * np.sqrt(n / tsample)
tcrit = stats.t.ppf(1 - alpha / tside, dof)
return stats.nct.sf(tcrit, dof, nc) + stats.nct.cdf(-tcrit, dof, nc)
else: # Alternative = 'greater'
def func(d, n, power, alpha):
dof = (n - 1) * tsample
nc = d * np.sqrt(n / tsample)
tcrit = stats.t.ppf(1 - alpha / tside, dof)
return stats.nct.sf(tcrit, dof, nc)
# Evaluate missing variable
if power is None:
# Compute achieved power given d, n and alpha
return func(d, n, power=None, alpha=alpha)
elif n is None:
# Compute required sample size given d, power and alpha
def _eval_n(n, d, power, alpha):
return func(d, n, power, alpha) - power
try:
return brenth(_eval_n, 2 + 1e-10, 1e07, args=(d, power, alpha))
except ValueError: # pragma: no cover
return np.nan
elif d is None:
# Compute achieved d given sample size, power and alpha level
if alternative == "two-sided":
b0, b1 = 1e-07, 10
elif alternative == "less":
b0, b1 = -10, 5
else:
b0, b1 = -5, 10
def _eval_d(d, n, power, alpha):
return func(d, n, power, alpha) - power
try:
return brenth(_eval_d, b0, b1, args=(n, power, alpha))
except ValueError: # pragma: no cover
return np.nan
else:
# Compute achieved alpha (significance) level given d, n and power
def _eval_alpha(alpha, d, n, power):
return func(d, n, power, alpha) - power
try:
return brenth(_eval_alpha, 1e-10, 1 - 1e-10, args=(d, n, power))
except ValueError: # pragma: no cover
return np.nan
[docs]
def power_ttest2n(nx, ny, d=None, power=None, alpha=0.05, alternative="two-sided"):
"""
Evaluate power, effect size or significance level of an independent two-samples T-test
with unequal sample sizes.
Parameters
----------
nx, ny : int
Sample sizes. Must be specified. If the sample sizes are equal, you should use the
:py:func:`power_ttest` function instead.
d : float
Cohen d effect size
power : float
Test power (= 1 - type II error).
alpha : float
Significance level (type I error probability). The default is 0.05.
alternative : string
Defines the alternative hypothesis, or tail of the test. Must be one of "two-sided"
(default), "greater" or "less".
Notes
-----
Exactly ONE of the parameters ``d``, ``power`` and ``alpha`` must be passed as None, and that
parameter is determined from the others.
``alpha`` has a default value of 0.05 so None must be explicitly passed if you want to compute
it.
This function is a Python adaptation of the `pwr.t2n.test` function implemented in the
`pwr <https://cran.r-project.org/web/packages/pwr/pwr.pdf>`_ R package.
Statistical power is the likelihood that a study will detect an effect when there is an effect
there to be detected. A high statistical power means that there is a low probability of
concluding that there is no effect when there is one. Statistical power is mainly affected by
the effect size and the sample size.
The first step is to use the Cohen's d to calculate the non-centrality parameter
:math:`\\delta` and degrees of freedom :math:`v`.cIn case of two independent groups with
unequal sample sizes, this is:
.. math:: \\delta = d * \\sqrt{\\frac{n_i * n_j}{n_i + n_j}}
.. math:: v = n_i + n_j - 2
where :math:`d` is the Cohen d, :math:`n` the sample size,
:math:`n_i` the sample size of the first group and
:math:`n_j` the sample size of the second group,
The critical value is then found using the percent point function of the T distribution with
:math:`q = 1 - alpha` and :math:`v` degrees of freedom.
Finally, the power of the test is given by the survival function of the non-central
distribution using the previously calculated critical value, degrees of freedom and
non-centrality parameter.
:py:func:`scipy.optimize.brenth` is used to solve power equations for other variables (i.e.
sample size, effect size, or significance level). If the solving fails, a nan value is
returned.
Results have been tested against GPower and the
`pwr <https://cran.r-project.org/web/packages/pwr/pwr.pdf>`_ R package.
Examples
--------
1. Compute achieved power of a T-test given ``d``, ``n`` and ``alpha``
>>> from pingouin import power_ttest2n
>>> print('power: %.4f' % power_ttest2n(nx=20, ny=15, d=0.5, alternative='greater'))
power: 0.4164
2. Compute achieved ``d`` given ``n``, ``power`` and ``alpha`` level
>>> print('d: %.4f' % power_ttest2n(nx=20, ny=15, power=0.80, alpha=0.05))
d: 0.9859
3. Compute achieved alpha level given ``d``, ``n`` and ``power``
>>> print('alpha: %.4f' % power_ttest2n(nx=20, ny=15, d=0.5, power=0.80, alpha=None))
alpha: 0.5000
"""
# Check the number of arguments that are None
n_none = sum([v is None for v in [d, power, alpha]])
if n_none != 1:
raise ValueError("Exactly one of d, power, and alpha must be None")
# Safety checks
assert alternative in [
"two-sided",
"greater",
"less",
], "Alternative must be one of 'two-sided' (default), 'greater' or 'less'."
tside = 2 if alternative == "two-sided" else 1
if d is not None and tside == 2:
d = abs(d)
if alpha is not None:
assert 0 < alpha <= 1
if power is not None:
assert 0 < power <= 1
if alternative == "less":
def func(d, nx, ny, power, alpha):
dof = nx + ny - 2
nc = d * (1 / np.sqrt(1 / nx + 1 / ny))
tcrit = stats.t.ppf(alpha / tside, dof)
return stats.nct.cdf(tcrit, dof, nc)
elif alternative == "two-sided":
def func(d, nx, ny, power, alpha):
dof = nx + ny - 2
nc = d * (1 / np.sqrt(1 / nx + 1 / ny))
tcrit = stats.t.ppf(1 - alpha / tside, dof)
return stats.nct.sf(tcrit, dof, nc) + stats.nct.cdf(-tcrit, dof, nc)
else: # Alternative = 'greater'
def func(d, nx, ny, power, alpha):
dof = nx + ny - 2
nc = d * (1 / np.sqrt(1 / nx + 1 / ny))
tcrit = stats.t.ppf(1 - alpha / tside, dof)
return stats.nct.sf(tcrit, dof, nc)
# Evaluate missing variable
if power is None:
# Compute achieved power given d, n and alpha
return func(d, nx, ny, power=None, alpha=alpha)
elif d is None:
# Compute achieved d given sample size, power and alpha level
if alternative == "two-sided":
b0, b1 = 1e-07, 10
elif alternative == "less":
b0, b1 = -10, 5
else:
b0, b1 = -5, 10
def _eval_d(d, nx, ny, power, alpha):
return func(d, nx, ny, power, alpha) - power
try:
return brenth(_eval_d, b0, b1, args=(nx, ny, power, alpha))
except ValueError: # pragma: no cover
return np.nan
else:
# Compute achieved alpha (significance) level given d, n and power
def _eval_alpha(alpha, d, nx, ny, power):
return func(d, nx, ny, power, alpha) - power
try:
return brenth(_eval_alpha, 1e-10, 1 - 1e-10, args=(d, nx, ny, power))
except ValueError: # pragma: no cover
return np.nan
[docs]
def power_anova(eta_squared=None, k=None, n=None, power=None, alpha=0.05):
"""
Evaluate power, sample size, effect size or significance level of a one-way balanced ANOVA.
Parameters
----------
eta_squared : float
ANOVA effect size (eta-squared, :math:`\\eta^2`).
k : int
Number of groups
n : int
Sample size per group. Groups are assumed to be balanced (i.e. same sample size).
power : float
Test power (= 1 - type II error).
alpha : float
Significance level :math:`\\alpha` (type I error probability). The default is 0.05.
Notes
-----
Exactly ONE of the parameters ``eta_squared``, ``k``, ``n``, ``power`` and ``alpha``
must be passed as None, and that parameter is determined from the others.
``alpha`` has a default value of 0.05 so None must be explicitly passed if you want to
compute it.
This function is a Python adaptation of the `pwr.anova.test` function implemented in the
`pwr <https://cran.r-project.org/web/packages/pwr/pwr.pdf>`_ R package.
Statistical power is the likelihood that a study will detect an effect when there is an
effect there to be detected. A high statistical power means that there is a low probability of
concluding that there is no effect when there is one. Statistical power is mainly affected by
the effect size and the sample size.
For one-way ANOVA, eta-squared is the same as partial eta-squared. It can be evaluated from the
F-value (:math:`F^*`) and the degrees of freedom of the ANOVA (:math:`v_1, v_2`) using the
following formula:
.. math:: \\eta^2 = \\frac{v_1 F^*}{v_1 F^* + v_2}
GPower uses the :math:`f` effect size instead of the :math:`\\eta^2`. The formula to convert
from one to the other are given below:
.. math:: f = \\sqrt{\\frac{\\eta^2}{1 - \\eta^2}}
.. math:: \\eta^2 = \\frac{f^2}{1 + f^2}
Using :math:`\\eta^2` and the total sample size :math:`N`, the non-centrality parameter is
defined by:
.. math:: \\delta = N * \\frac{\\eta^2}{1 - \\eta^2}
Then the critical value of the non-central F-distribution is computed using the percentile
point function of the F-distribution with:
.. math:: q = 1 - \\alpha
.. math:: v_1 = k - 1
.. math:: v_2 = N - k
where :math:`k` is the number of groups.
Finally, the power of the ANOVA is calculated using the survival function of the non-central
F-distribution using the previously computed critical value, non-centrality parameter, and
degrees of freedom.
:py:func:`scipy.optimize.brenth` is used to solve power equations for other variables (i.e.
sample size, effect size, or significance level). If the solving fails, a nan value is
returned.
Results have been tested against GPower and the
`pwr <https://cran.r-project.org/web/packages/pwr/pwr.pdf>`_ R package.
Examples
--------
1. Compute achieved power
>>> from pingouin import power_anova
>>> print('power: %.4f' % power_anova(eta_squared=0.1, k=3, n=20))
power: 0.6082
2. Compute required number of groups
>>> print('k: %.4f' % power_anova(eta_squared=0.1, n=20, power=0.80))
k: 6.0944
3. Compute required sample size
>>> print('n: %.4f' % power_anova(eta_squared=0.1, k=3, power=0.80))
n: 29.9256
4. Compute achieved effect size
>>> print('eta-squared: %.4f' % power_anova(n=20, k=4, power=0.80, alpha=0.05))
eta-squared: 0.1255
5. Compute achieved alpha (significance)
>>> print('alpha: %.4f' % power_anova(eta_squared=0.1, n=20, k=4, power=0.80, alpha=None))
alpha: 0.1085
"""
# Check the number of arguments that are None
n_none = sum([v is None for v in [eta_squared, k, n, power, alpha]])
if n_none != 1:
err = "Exactly one of eta, k, n, power, and alpha must be None."
raise ValueError(err)
# Safety checks
if eta_squared is not None:
eta_squared = abs(eta_squared)
f_sq = eta_squared / (1 - eta_squared)
if alpha is not None:
assert 0 < alpha <= 1
if power is not None:
assert 0 < power <= 1
def func(f_sq, k, n, power, alpha):
nc = (n * k) * f_sq
dof1 = k - 1
dof2 = (n * k) - k
fcrit = stats.f.ppf(1 - alpha, dof1, dof2)
return stats.ncf.sf(fcrit, dof1, dof2, nc)
# Evaluate missing variable
if power is None:
# Compute achieved power
return func(f_sq, k, n, power, alpha)
elif k is None:
# Compute required number of groups
def _eval_k(k, f_sq, n, power, alpha):
return func(f_sq, k, n, power, alpha) - power
try:
return brenth(_eval_k, 2, 100, args=(f_sq, n, power, alpha))
except ValueError: # pragma: no cover
return np.nan
elif n is None:
# Compute required sample size
def _eval_n(n, f_sq, k, power, alpha):
return func(f_sq, k, n, power, alpha) - power
try:
return brenth(_eval_n, 2, 1e07, args=(f_sq, k, power, alpha))
except ValueError: # pragma: no cover
return np.nan
elif eta_squared is None:
# Compute achieved eta-squared
def _eval_eta(f_sq, k, n, power, alpha):
return func(f_sq, k, n, power, alpha) - power
try:
f_sq = brenth(_eval_eta, 1e-10, 1 - 1e-10, args=(k, n, power, alpha))
return f_sq / (f_sq + 1) # Return eta-square
except ValueError: # pragma: no cover
return np.nan
else:
# Compute achieved alpha
def _eval_alpha(alpha, f_sq, k, n, power):
return func(f_sq, k, n, power, alpha) - power
try:
return brenth(_eval_alpha, 1e-10, 1 - 1e-10, args=(f_sq, k, n, power))
except ValueError: # pragma: no cover
return np.nan
[docs]
def power_rm_anova(eta_squared=None, m=None, n=None, power=None, alpha=0.05, corr=0.5, epsilon=1):
"""
Evaluate power, sample size, effect size or significance level of a balanced one-way
repeated measures ANOVA.
Parameters
----------
eta_squared : float
ANOVA effect size (eta-squared, :math:`\\eta^2`).
m : int
Number of repeated measurements.
n : int
Sample size per measurement. All measurements must have the same sample size.
power : float
Test power (= 1 - type II error).
alpha : float
Significance level :math:`\\alpha` (type I error probability). The default is 0.05.
corr : float
Average correlation coefficient among repeated measurements. The default is :math:`r=0.5`.
epsilon : float
Epsilon adjustement factor for sphericity. This can be calculated using the
:py:func:`pingouin.epsilon` function.
Notes
-----
Exactly ONE of the parameters ``eta_squared``, ``m``, ``n``, ``power`` and ``alpha`` must be
passed as None, and that parameter is determined from the others.
``alpha`` has a default value of 0.05 so None must be explicitly passed if you want to
compute it.
Statistical power is the likelihood that a study will detect an effect when there is an effect
there to be detected. A high statistical power means that there is a low probability of
concluding that there is no effect when there is one. Statistical power is mainly affected by
the effect size and the sample size.
GPower uses the :math:`f` effect size instead of the :math:`\\eta^2`. The formula to convert
from one to the other are given below:
.. math:: f = \\sqrt{\\frac{\\eta^2}{1 - \\eta^2}}
.. math:: \\eta^2 = \\frac{f^2}{1 + f^2}
Using :math:`\\eta^2`, the sample size :math:`N`, the number of repeated measurements
:math:`m`, the epsilon correction factor :math:`\\epsilon` (see :py:func:`pingouin.epsilon`),
and the average correlation between the repeated measures :math:`c`, one can then calculate the
non-centrality parameter as follow:
.. math:: \\delta = \\frac{f^2 * N * m * \\epsilon}{1 - c}
Then the critical value of the non-central F-distribution is computed using the percentile
point function of the F-distribution with:
.. math:: q = 1 - \\alpha
.. math:: v_1 = (m - 1) * \\epsilon
.. math:: v_2 = (N - 1) * v_1
Finally, the power of the ANOVA is calculated using the survival function of the non-central
F-distribution using the previously computed critical value, non-centrality parameter,
and degrees of freedom.
:py:func:`scipy.optimize.brenth` is used to solve power equations for other variables
(i.e. sample size, effect size, or significance level). If the solving fails, a nan value is
returned.
Results have been tested against GPower and the
`pwr <https://cran.r-project.org/web/packages/pwr/pwr.pdf>`_ R package.
Examples
--------
1. Compute achieved power
>>> from pingouin import power_rm_anova
>>> print('power: %.4f' % power_rm_anova(eta_squared=0.1, m=3, n=20))
power: 0.8913
2. Compute required number of groups
>>> print('m: %.4f' % power_rm_anova(eta_squared=0.1, n=20, power=0.90))
m: 3.1347
3. Compute required sample size
>>> print('n: %.4f' % power_rm_anova(eta_squared=0.1, m=3, power=0.80))
n: 15.9979
4. Compute achieved effect size
>>> print('eta-squared: %.4f' % power_rm_anova(n=20, m=4, power=0.80, alpha=0.05))
eta-squared: 0.0680
5. Compute achieved alpha (significance)
>>> print('alpha: %.4f' % power_rm_anova(eta_squared=0.1, n=20, m=4, power=0.80, alpha=None))
alpha: 0.0081
Let's take a more concrete example. First, we'll load a repeated measures
dataset in wide-format. Each row is an observation (e.g. a subject), and
each column a successive repeated measurements (e.g t=0, t=1, ...).
>>> import pingouin as pg
>>> data = pg.read_dataset('rm_anova_wide')
>>> data.head()
Before 1 week 2 week 3 week
0 4.3 5.3 4.8 6.3
1 3.9 2.3 5.6 4.3
2 4.5 2.6 4.1 NaN
3 5.1 4.2 6.0 6.3
4 3.8 3.6 4.8 6.8
Note that this dataset has some missing values. We'll simply delete any row with one or more
missing values, and then compute a repeated measures ANOVA:
>>> data = data.dropna()
>>> pg.rm_anova(data, effsize="n2").round(3)
Source ddof1 ddof2 F p-unc n2 eps
0 Within 3 24 5.201 0.007 0.346 0.694
The repeated measures ANOVA is significant at the 0.05 level. Now, we can
easily compute the power of the ANOVA with the information in the ANOVA table:
>>> # n is the sample size and m is the number of repeated measures
>>> n, m = data.shape
>>> round(pg.power_rm_anova(eta_squared=0.346, m=m, n=n, epsilon=0.694), 3)
0.99
Our ANOVA has a very high statistical power. However, to be even more accurate in our power
calculation, we should also fill in the average correlation among repeated measurements.
Since our dataframe is in wide-format (with each column being a successive measurement), this
can be done by taking the mean of the superdiagonal of the correlation matrix, which is similar
to manually calculating the correlation between each successive pairwise measurements and then
taking the mean. Since correlation coefficients are not normally distributed, we use the
*r-to-z* transform prior to averaging (:py:func:`numpy.arctanh`), and then the *z-to-r*
transform (:py:func:`numpy.tanh`) to convert back to a correlation coefficient. This gives a
more precise estimate of the mean.
>>> import numpy as np
>>> corr = np.diag(data.corr(), k=1)
>>> avgcorr = np.tanh(np.arctanh(corr).mean())
>>> round(avgcorr, 4)
-0.1996
In this example, we're using a fake dataset and the average correlation is negative. However,
it will most likely be positive with real data. Let's now compute the final power of the
repeated measures ANOVA:
>>> round(pg.power_rm_anova(eta_squared=0.346, m=m, n=n, epsilon=0.694, corr=avgcorr), 3)
0.771
"""
# Check the number of arguments that are None
n_none = sum([v is None for v in [eta_squared, m, n, power, alpha]])
if n_none != 1:
msg = "Exactly one of eta, m, n, power, and alpha must be None."
raise ValueError(msg)
# Safety checks
assert 0 < epsilon <= 1, "epsilon must be between 0 and 1."
assert -1 < corr < 1, "corr must be between -1 and 1."
if eta_squared is not None:
eta_squared = abs(eta_squared)
f_sq = eta_squared / (1 - eta_squared)
if alpha is not None:
assert 0 < alpha <= 1, "alpha must be between 0 and 1."
if power is not None:
assert 0 < power <= 1, "power must be between 0 and 1."
if n is not None:
assert n > 1, "The sample size n must be > 1."
if m is not None:
assert m > 1, "The number of repeated measures m must be > 1."
def func(f_sq, m, n, power, alpha, corr):
dof1 = (m - 1) * epsilon
dof2 = (n - 1) * dof1
nc = (f_sq * n * m * epsilon) / (1 - corr)
fcrit = stats.f.ppf(1 - alpha, dof1, dof2)
return stats.ncf.sf(fcrit, dof1, dof2, nc)
# Evaluate missing variable
if power is None:
# Compute achieved power
return func(f_sq, m, n, power, alpha, corr)
elif m is None:
# Compute required number of repeated measures
def _eval_m(m, f_sq, n, power, alpha, corr):
return func(f_sq, m, n, power, alpha, corr) - power
try:
return brenth(_eval_m, 2, 100, args=(f_sq, n, power, alpha, corr))
except ValueError: # pragma: no cover
return np.nan
elif n is None:
# Compute required sample size
def _eval_n(n, f_sq, m, power, alpha, corr):
return func(f_sq, m, n, power, alpha, corr) - power
try:
return brenth(_eval_n, 5, 1e6, args=(f_sq, m, power, alpha, corr))
except ValueError: # pragma: no cover
return np.nan
elif eta_squared is None:
# Compute achieved eta
def _eval_eta(f_sq, m, n, power, alpha, corr):
return func(f_sq, m, n, power, alpha, corr) - power
try:
f_sq = brenth(_eval_eta, 1e-10, 1 - 1e-10, args=(m, n, power, alpha, corr))
return f_sq / (f_sq + 1) # Return eta-square
except ValueError: # pragma: no cover
return np.nan
else:
# Compute achieved alpha
def _eval_alpha(alpha, f_sq, m, n, power, corr):
return func(f_sq, m, n, power, alpha, corr) - power
try:
return brenth(_eval_alpha, 1e-10, 1 - 1e-10, args=(f_sq, m, n, power, corr))
except ValueError: # pragma: no cover
return np.nan
[docs]
def power_corr(r=None, n=None, power=None, alpha=0.05, alternative="two-sided"):
"""
Evaluate power, sample size, correlation coefficient or significance level of a correlation
test.
Parameters
----------
r : float
Correlation coefficient.
n : int
Number of observations (sample size).
power : float
Test power (= 1 - type II error).
alpha : float
Significance level (type I error probability). The default is 0.05.
alternative : string
Defines the alternative hypothesis, or tail of the correlation. Must be one of
"two-sided" (default), "greater" or "less". Both "greater" and "less" return a one-sided
p-value. "greater" tests against the alternative hypothesis that the correlation is
positive (greater than zero), "less" tests against the hypothesis that the correlation is
negative.
Notes
-----
Exactly ONE of the parameters ``r``, ``n``, ``power`` and ``alpha`` must be passed as None,
and that parameter is determined from the others.
``alpha`` has a default value of 0.05 so None must be explicitly passed if you want to
compute it.
:py:func:`scipy.optimize.brenth` is used to solve power equations for other variables (i.e.
sample size, effect size, or significance level). If the solving fails, a nan value is
returned.
This function is a Python adaptation of the `pwr.r.test` function implemented in the
`pwr <https://cran.r-project.org/web/packages/pwr/pwr.pdf>`_ R package.
Examples
--------
1. Compute achieved power given ``r``, ``n`` and ``alpha``
>>> from pingouin import power_corr
>>> print('power: %.4f' % power_corr(r=0.5, n=20))
power: 0.6379
2. Same but one-sided test
>>> print('power: %.4f' % power_corr(r=0.5, n=20, alternative="greater"))
power: 0.7510
>>> print('power: %.4f' % power_corr(r=0.5, n=20, alternative="less"))
power: 0.0000
3. Compute required sample size given ``r``, ``power`` and ``alpha``
>>> print('n: %.4f' % power_corr(r=0.5, power=0.80))
n: 28.2484
4. Compute achieved ``r`` given ``n``, ``power`` and ``alpha`` level
>>> print('r: %.4f' % power_corr(n=20, power=0.80, alpha=0.05))
r: 0.5822
5. Compute achieved alpha level given ``r``, ``n`` and ``power``
>>> print('alpha: %.4f' % power_corr(r=0.5, n=20, power=0.80, alpha=None))
alpha: 0.1377
"""
# Check the number of arguments that are None
n_none = sum([v is None for v in [r, n, power, alpha]])
if n_none != 1:
raise ValueError("Exactly one of n, r, power, and alpha must be None")
# Safety checks
assert alternative in [
"two-sided",
"greater",
"less",
], "Alternative must be one of 'two-sided' (default), 'greater' or 'less'."
if r is not None:
assert -1 <= r <= 1
if alternative == "two-sided":
r = abs(r)
if alpha is not None:
assert 0 < alpha <= 1
if power is not None:
assert 0 < power <= 1
if n is not None:
if n <= 4:
warnings.warn("Sample size is too small to estimate power (n <= 4). Returning NaN.")
return np.nan
# Define main function
if alternative == "two-sided":
def func(r, n, power, alpha):
dof = n - 2
ttt = stats.t.ppf(1 - alpha / 2, dof)
rc = np.sqrt(ttt**2 / (ttt**2 + dof))
zr = np.arctanh(r) + r / (2 * (n - 1))
zrc = np.arctanh(rc)
power = stats.norm.cdf((zr - zrc) * np.sqrt(n - 3)) + stats.norm.cdf(
(-zr - zrc) * np.sqrt(n - 3)
)
return power
elif alternative == "greater":
def func(r, n, power, alpha):
dof = n - 2
ttt = stats.t.ppf(1 - alpha, dof)
rc = np.sqrt(ttt**2 / (ttt**2 + dof))
zr = np.arctanh(r) + r / (2 * (n - 1))
zrc = np.arctanh(rc)
power = stats.norm.cdf((zr - zrc) * np.sqrt(n - 3))
return power
else: # alternative == "less":
def func(r, n, power, alpha):
r = -r
dof = n - 2
ttt = stats.t.ppf(1 - alpha, dof)
rc = np.sqrt(ttt**2 / (ttt**2 + dof))
zr = np.arctanh(r) + r / (2 * (n - 1))
zrc = np.arctanh(rc)
power = stats.norm.cdf((zr - zrc) * np.sqrt(n - 3))
return power
# Evaluate missing variable
if power is None and n is not None and r is not None:
# Compute achieved power given r, n and alpha
return func(r, n, power=None, alpha=alpha)
elif n is None and power is not None and r is not None:
# Compute required sample size given r, power and alpha
def _eval_n(n, r, power, alpha):
return func(r, n, power, alpha) - power
try:
return brenth(_eval_n, 4 + 1e-10, 1e09, args=(r, power, alpha))
except ValueError: # pragma: no cover
return np.nan
elif r is None and power is not None and n is not None:
# Compute achieved r given sample size, power and alpha level
def _eval_r(r, n, power, alpha):
return func(r, n, power, alpha) - power
try:
if alternative == "two-sided":
return brenth(_eval_r, 1e-10, 1 - 1e-10, args=(n, power, alpha))
else:
return brenth(_eval_r, -1 + 1e-10, 1 - 1e-10, args=(n, power, alpha))
except ValueError: # pragma: no cover
return np.nan
else:
# Compute achieved alpha (significance) level given r, n and power
def _eval_alpha(alpha, r, n, power):
return func(r, n, power, alpha) - power
try:
return brenth(_eval_alpha, 1e-10, 1 - 1e-10, args=(r, n, power))
except ValueError: # pragma: no cover
return np.nan
[docs]
def power_chi2(dof, w=None, n=None, power=None, alpha=0.05):
"""
Evaluate power, sample size, effect size or significance level of chi-squared tests.
Parameters
----------
dof : float
Degree of freedom (depends on the chosen test).
w : float
Cohen's w effect size [1]_.
n : int
Total number of observations.
power : float
Test power (= 1 - type II error).
alpha : float
Significance level (type I error probability). The default is 0.05.
Notes
-----
Exactly ONE of the parameters ``w``, ``n``, ``power`` and ``alpha`` must be passed as None,
and that parameter is determined from the others. The degrees of freedom ``dof`` must always
be specified.
``alpha`` has a default value of 0.05 so None must be explicitly passed if you want to
compute it.
This function is a Python adaptation of the `pwr.chisq.test` function implemented in the
`pwr <https://cran.r-project.org/web/packages/pwr/pwr.pdf>`_ R package.
Statistical power is the likelihood that a study will detect an effect when there is an effect
there to be detected. A high statistical power means that there is a low probability of
concluding that there is no effect when there is one. Statistical power is mainly affected by
the effect size and the sample size.
The non-centrality parameter is defined by:
.. math:: \\delta = N * w^2
Then the critical value is computed using the percentile point function of the :math:`\\chi^2`
distribution with the alpha level and degrees of freedom.
Finally, the power of the chi-squared test is calculated using the survival function of the
non-central :math:`\\chi^2` distribution using the previously computed critical value,
non-centrality parameter, and the degrees of freedom of the test.
:py:func:`scipy.optimize.brenth` is used to solve power equations for other variables (i.e.
sample size, effect size, or significance level). If the solving fails, a nan value is
returned.
Results have been tested against GPower and the
`pwr <https://cran.r-project.org/web/packages/pwr/pwr.pdf>`_ R package.
References
----------
.. [1] Cohen, J. (1988). Statistical power analysis for the behavioral sciences (2nd ed.).
Examples
--------
1. Compute achieved power
>>> from pingouin import power_chi2
>>> print('power: %.4f' % power_chi2(dof=1, w=0.3, n=20))
power: 0.2687
2. Compute required sample size
>>> print('n: %.4f' % power_chi2(dof=3, w=0.3, power=0.80))
n: 121.1396
3. Compute achieved effect size
>>> print('w: %.4f' % power_chi2(dof=2, n=20, power=0.80, alpha=0.05))
w: 0.6941
4. Compute achieved alpha (significance)
>>> print('alpha: %.4f' % power_chi2(dof=1, w=0.5, n=20, power=0.80, alpha=None))
alpha: 0.1630
"""
assert isinstance(dof, (int, float))
# Check the number of arguments that are None
n_none = sum([v is None for v in [w, n, power, alpha]])
if n_none != 1:
err = "Exactly one of w, n, power, and alpha must be None."
raise ValueError(err)
# Safety checks
if w is not None:
w = abs(w)
if alpha is not None:
assert 0 < alpha <= 1
if power is not None:
assert 0 < power <= 1
def func(w, n, power, alpha):
k = stats.chi2.ppf(1 - alpha, dof)
nc = n * w**2
return stats.ncx2.sf(k, dof, nc)
# Evaluate missing variable
if power is None:
# Compute achieved power
return func(w, n, power, alpha)
elif n is None:
# Compute required sample size
def _eval_n(n, w, power, alpha):
return func(w, n, power, alpha) - power
try:
return brenth(_eval_n, 1, 1e07, args=(w, power, alpha))
except ValueError: # pragma: no cover
return np.nan
elif w is None:
# Compute achieved effect size
def _eval_w(w, n, power, alpha):
return func(w, n, power, alpha) - power
try:
return brenth(_eval_w, 1e-10, 100, args=(n, power, alpha))
except ValueError: # pragma: no cover
return np.nan
else:
# Compute achieved alpha
def _eval_alpha(alpha, w, n, power):
return func(w, n, power, alpha) - power
try:
return brenth(_eval_alpha, 1e-10, 1 - 1e-10, args=(w, n, power))
except ValueError: # pragma: no cover
return np.nan