Source code for pingouin.equivalence
# Author: Antoine Weill--Duflos <antoine@weill-duflos.fr>
# Date: July 2019
import numpy as np
import pandas as pd
from pingouin.parametric import ttest
from pingouin.utils import _postprocess_dataframe
__all__ = ["tost"]
[docs]
def tost(x, y, bound=1, paired=False, correction=False):
"""Two One-Sided Test (TOST) for equivalence.
Parameters
----------
x, y : array_like
First and second set of observations. ``x`` and ``y`` should have the
same units. If ``y`` is a single value (e.g. 0), a one-sample test is
performed.
bound : float
Magnitude of region of similarity (a.k.a epsilon). Note that this
should be expressed in the same unit as ``x`` and ``y``.
paired : boolean
Specify whether the two observations are related (i.e. repeated
measures) or independent.
correction : auto or boolean
Specify whether or not to correct for unequal variances using Welch
separate variances T-test. This only applies if ``paired`` is False.
Returns
-------
stats : :py:class:`pandas.DataFrame`
* ``'bound'``: bound (= epsilon, or equivalence margin)
* ``'dof'``: degrees of freedom
* ``'pval'``: TOST p-value
See also
--------
ttest
References
----------
.. [1] Schuirmann, D.L. 1981. On hypothesis testing to determine if the
mean of a normal distribution is contained in a known interval.
Biometrics 37 617.
.. [2] https://cran.r-project.org/web/packages/equivalence/equivalence.pdf
Examples
--------
1. Independent two-sample TOST with a region of similarity of 1 (default)
>>> import pingouin as pg
>>> a = [4, 7, 8, 6, 3, 2]
>>> b = [6, 8, 7, 10, 11, 9]
>>> pg.tost(a, b)
bound dof pval
TOST 1 10 0.965097
2. Paired TOST with a different region of similarity
>>> pg.tost(a, b, bound=0.5, paired=True)
bound dof pval
TOST 0.5 5 0.954854
3. One sample TOST
>>> pg.tost(a, y=0, bound=4)
bound dof pval
TOST 4 5 0.825967
"""
x = np.asarray(x)
y = np.asarray(y)
assert isinstance(bound, (int, float)), "bound must be int or float."
# T-tests
df_a = ttest(x + bound, y, paired=paired, correction=correction, alternative="greater")
df_b = ttest(x - bound, y, paired=paired, correction=correction, alternative="less")
pval = max(df_a.at["T-test", "p-val"], df_b.at["T-test", "p-val"])
# Create output dataframe
stats = pd.DataFrame(
{"bound": bound, "dof": df_a.at["T-test", "dof"], "pval": pval}, index=["TOST"]
)
return _postprocess_dataframe(stats)