"""Bayesian functions."""importwarningsimportnumpyasnpfromscipy.integrateimportquadfrommathimportpi,exp,log,lgamma__all__=["bayesfactor_ttest","bayesfactor_pearson","bayesfactor_binom"]def_format_bf(bf,precision=3,trim="0"):"""Format BF10 to floating point or scientific notation."""iftype(bf)==str:returnbfifbf>=1e4orbf<=1e-4:out=np.format_float_scientific(bf,precision=precision,trim=trim)else:out=np.format_float_positional(bf,precision=precision,trim=trim)returnout
[docs]defbayesfactor_ttest(t,nx,ny=None,paired=False,alternative="two-sided",r=0.707):""" Bayes Factor of a T-test. Parameters ---------- t : float T-value of the T-test nx : int Sample size of first group ny : int Sample size of second group (only needed in case of an independent two-sample T-test) paired : boolean Specify whether the two observations are related (i.e. repeated measures) or independent. alternative : string Defines the alternative hypothesis, or tail of the test. Must be one of "two-sided" (default), "greater" or "less". .. warning:: One-sided Bayes Factor (BF) are simply obtained by doubling the two-sided BF, which is not the same behavior as R or JASP. Be extra careful when interpretating one-sided BF, and if you can, always double-check your results. r : float Cauchy scale factor. Smaller values of ``r`` (e.g. 0.5), may be appropriate when small effect sizes are expected a priori; larger values of ``r`` are appropriate when large effect sizes are expected (Rouder et al 2009). The default is :math:`\\sqrt{2} / 2 \\approx 0.707`. Returns ------- bf : float Scaled Jeffrey-Zellner-Siow (JZS) Bayes Factor (BF10). The Bayes Factor quantifies the evidence in favour of the alternative hypothesis. See also -------- ttest : T-test pairwise_test : Pairwise T-tests bayesfactor_pearson : Bayes Factor of a correlation bayesfactor_binom : Bayes Factor of a binomial test Notes ----- Adapted from a Matlab code found at https://github.com/anne-urai/Tools/tree/master/stats/BayesFactors If you would like to compute the Bayes Factor directly from the raw data instead of from the T-value, use the :py:func:`pingouin.ttest` function. The JZS Bayes Factor is approximated using the formula described in ref [1]_: .. math:: \\text{BF}_{10} = \\frac{\\int_{0}^{\\infty}(1 + Ngr^2)^{-1/2} (1 + \\frac{t^2}{v(1 + Ngr^2)})^{-(v+1) / 2}(2\\pi)^{-1/2}g^ {-3/2}e^{-1/2g}}{(1 + \\frac{t^2}{v})^{-(v+1) / 2}} where :math:`t` is the T-value, :math:`v` the degrees of freedom, :math:`N` the sample size, :math:`r` the Cauchy scale factor (= prior on effect size) and :math:`g` is is an auxiliary variable that is integrated out numerically. Results have been validated against JASP and the BayesFactor R package. References ---------- .. [1] Rouder, J.N., Speckman, P.L., Sun, D., Morey, R.D., Iverson, G., 2009. Bayesian t tests for accepting and rejecting the null hypothesis. Psychon. Bull. Rev. 16, 225–237. https://doi.org/10.3758/PBR.16.2.225 Examples -------- 1. Bayes Factor of an independent two-sample T-test >>> from pingouin import bayesfactor_ttest >>> bf = bayesfactor_ttest(3.5, 20, 20) >>> print("Bayes Factor: %.3f (two-sample independent)" % bf) Bayes Factor: 26.743 (two-sample independent) 2. Bayes Factor of a paired two-sample T-test >>> bf = bayesfactor_ttest(3.5, 20, 20, paired=True) >>> print("Bayes Factor: %.3f (two-sample paired)" % bf) Bayes Factor: 17.185 (two-sample paired) 3. Now specifying the direction of the test >>> tval = -3.5 >>> bf_greater = bayesfactor_ttest(tval, 20, alternative='greater') >>> bf_less = bayesfactor_ttest(tval, 20, alternative='less') >>> print("BF10-greater: %.3f | BF10-less: %.3f" % (bf_greater, bf_less)) BF10-greater: 0.029 | BF10-less: 34.369 """# Check tailassertalternativein["two-sided","greater","less",],"Alternative must be one of 'two-sided' (default), 'greater' or 'less'."one_sample=TrueifnyisNoneorny==1elseFalse# Check T-valueassertisinstance(t,(int,float)),"The T-value must be a int or a float."ifnotnp.isfinite(t):returnnp.nan# Function to be integrateddeffun(g,t,n,r,df):return((1+n*g*r**2)**(-0.5)*(1+t**2/((1+n*g*r**2)*df))**(-(df+1)/2)*(2*pi)**(-0.5)*g**(-3.0/2)*exp(-1/(2*g)))# Define n and degrees of freedomifone_sampleorpaired:n=nxdf=n-1else:n=nx*ny/(nx+ny)df=nx+ny-2# JZS Bayes factor calculation: eq. 1 in Rouder et al. (2009)integr=quad(fun,0,np.inf,args=(t,n,r,df))[0]withnp.errstate(divide="ignore"):bf10=1/((1+t**2/df)**(-(df+1)/2)/integr)# Tailtail_binary="two-sided"ifalternative=="two-sided"else"one-sided"bf10=bf10*(1/0.5)iftail_binary=="one-sided"elsebf10# Now check the direction of the testif((alternative=="greater"andt<0)or(alternative=="less"andt>0))andbf10>1:bf10=1/bf10returnbf10
[docs]defbayesfactor_pearson(r,n,alternative="two-sided",method="ly",kappa=1.0):""" Bayes Factor of a Pearson correlation. Parameters ---------- r : float Pearson correlation coefficient. n : int Sample size. alternative : string Defines the alternative hypothesis, or tail of the correlation. Must be one of "two-sided" (default), "greater" or "less". Both "greater" and "less" return a one-sided p-value. "greater" tests against the alternative hypothesis that the correlation is positive (greater than zero), "less" tests against the hypothesis that the correlation is negative. method : str Method to compute the Bayes Factor. Can be "ly" (default) or "wetzels". The former has an exact analytical solution, while the latter requires integral solving (and is therefore slower). "wetzels" was the default in Pingouin <= 0.2.5. See Notes for details. kappa : float Kappa factor. This is sometimes called the *rscale* parameter, and is only used when ``method`` is "ly". Returns ------- bf : float Bayes Factor (BF10). The Bayes Factor quantifies the evidence in favour of the alternative hypothesis. See also -------- corr : (Robust) correlation between two variables pairwise_corr : Pairwise correlation between columns of a pandas DataFrame bayesfactor_ttest : Bayes Factor of a T-test bayesfactor_binom : Bayes Factor of a binomial test Notes ----- To compute the Bayes Factor directly from the raw data, use the :py:func:`pingouin.corr` function. The two-sided **Wetzels Bayes Factor** (also called *JZS Bayes Factor*) is calculated using the equation 13 and associated R code of [1]_: .. math:: \\text{BF}_{10}(n, r) = \\frac{\\sqrt{n/2}}{\\gamma(1/2)}* \\int_{0}^{\\infty}e((n-2)/2)* log(1+g)+(-(n-1)/2)log(1+(1-r^2)*g)+(-3/2)log(g)-n/2g where :math:`n` is the sample size, :math:`r` is the Pearson correlation coefficient and :math:`g` is is an auxiliary variable that is integrated out numerically. Since the Wetzels Bayes Factor requires solving an integral, it is slower than the analytical solution described below. The two-sided **Ly Bayes Factor** (also called *Jeffreys exact Bayes Factor*) is calculated using equation 25 of [2]_: .. math:: \\text{BF}_{10;k}(n, r) = \\frac{2^{\\frac{k-2}{k}}\\sqrt{\\pi}} {\\beta(\\frac{1}{k}, \\frac{1}{k})} \\cdot \\frac{\\Gamma(\\frac{2+k(n-1)}{2k})}{\\Gamma(\\frac{2+nk}{2k})} \\cdot 2F_1(\\frac{n-1}{2}, \\frac{n-1}{2}, \\frac{2+nk}{2k}, r^2) The one-sided version is described in eq. 27 and 28 of Ly et al, 2016. Please take note that the one-sided test requires the `mpmath <http://mpmath.org/>`_ package. Results have been validated against JASP and the BayesFactor R package. References ---------- .. [1] Ly, A., Verhagen, J. & Wagenmakers, E.-J. Harold Jeffreys’s default Bayes factor hypothesis tests: Explanation, extension, and application in psychology. J. Math. Psychol. 72, 19–32 (2016). .. [2] Wetzels, R. & Wagenmakers, E.-J. A default Bayesian hypothesis test for correlations and partial correlations. Psychon. Bull. Rev. 19, 1057–1064 (2012). Examples -------- Bayes Factor of a Pearson correlation >>> from pingouin import bayesfactor_pearson >>> r, n = 0.6, 20 >>> bf = bayesfactor_pearson(r, n) >>> print("Bayes Factor: %.3f" % bf) Bayes Factor: 10.634 Compare to Wetzels method: >>> bf = bayesfactor_pearson(r, n, method='wetzels') >>> print("Bayes Factor: %.3f" % bf) Bayes Factor: 8.221 One-sided test >>> bf10pos = bayesfactor_pearson(r, n, alternative='greater') >>> bf10neg = bayesfactor_pearson(r, n, alternative='less') >>> print("BF-pos: %.3f, BF-neg: %.3f" % (bf10pos, bf10neg)) BF-pos: 21.185, BF-neg: 0.082 """fromscipy.specialimportgamma,betaln,hyp2f1assertmethod.lower()in["ly","wetzels"],"Method not recognized."assertalternativein["two-sided","greater","less",],"Alternative must be one of 'two-sided' (default), 'greater' or 'less'."# Wrong inputifnotnp.isfinite(r)orn<2:returnnp.nanassert-1<=r<=1,"r must be between -1 and 1."ifalternative!="two-sided"andmethod.lower()=="wetzels":warnings.warn("One-sided Bayes Factor are not supported by the ""Wetzels's method. Switching to method='ly'.")method="ly"ifmethod.lower()=="wetzels":# Wetzels & Wagenmakers, 2012. Integral solvingdeffun(g,r,n):returnexp(((n-2)/2)*log(1+g)+(-(n-1)/2)*log(1+(1-r**2)*g)+(-3/2)*log(g)+-n/(2*g))integr=quad(fun,0,np.inf,args=(r,n))[0]bf10=np.sqrt(n/2)/gamma(1/2)*integrelse:# Ly et al, 2016. Analytical solution.k=kappalbeta=betaln(1/k,1/k)log_hyperterm=log(hyp2f1(((n-1)/2),((n-1)/2),((n+2/k)/2),r**2))bf10=exp((1-2/k)*log(2)+0.5*log(pi)-lbeta+lgamma((n+2/k-1)/2)-lgamma((n+2/k)/2)+log_hyperterm)ifalternative!="two-sided":# Directional test.# We need mpmath for the generalized hypergeometric functionfrom.utilsimport_is_mpmath_installed_is_mpmath_installed(raise_error=True)frommpmathimporthyp3f2hyper_term=float(hyp3f2(1,n/2,n/2,3/2,(2+k*(n+1))/(2*k),r**2))log_term=2*(lgamma(n/2)-lgamma((n-1)/2))-lbetaC=2**((3*k-2)/k)*k*r/(2+(n-1)*k)*exp(log_term)*hyper_termbf10neg=bf10-Cbf10pos=2*bf10-bf10negifalternative=="greater":# We expect the correlation to be positivebf10=bf10poselse:# We expect the correlation to be negativebf10=bf10negreturnbf10
[docs]defbayesfactor_binom(k,n,p=0.5,a=1,b=1):""" Bayes factor of a binomial test with :math:`k` successes, :math:`n` trials and base probability :math:`p`. This means that the null hypothesis is that the probability is :math:`p`. It is compared against the alternative hypothesis that :math:`p` is from the Beta distribution with parameters :math:`(a, b)`. By default, both :math:`a` and :math:`b` are 1, making the alternative hypothesis equivalent to the uniform distribution, i.e., we are completely uninformed about :math:`p`. Parameters ---------- k : int Number of successes. n : int Number of trials. p : float Base probability of success (range from 0 to 1). a : float The "a" parameter of the Beta distribution. b : float The "b" parameter of the Beta distribution. Returns ------- bf10 : float The Bayes Factor quantifies the evidence in favour of the alternative hypothesis, where the null hypothesis is that the random variable is binomially distributed with base probability :math:`p`. See also -------- bayesfactor_pearson : Bayes Factor of a correlation bayesfactor_ttest : Bayes Factor of a T-test Notes ----- Adapted from a Matlab code found at https://github.com/anne-urai/Tools/blob/master/stats/BayesFactors/binombf.m The Bayes Factor is given by the formula below: .. math:: BF_{10} = \\frac{\\int_0^1 \\binom{n}{k}g^k(1-g)^{n-k}} {\\binom{n}{k} p^k (1-p)^{n-k}} References ---------- * http://pcl.missouri.edu/bf-binomial * https://en.wikipedia.org/wiki/Bayes_factor Examples -------- We want to determine if a coin if fair. After tossing the coin 200 times in a row, we report 115 heads (hereafter referred to as "successes") and 85 tails ("failures"). The Bayes Factor can be easily computed using Pingouin: >>> import pingouin as pg >>> bf = float(pg.bayesfactor_binom(k=115, n=200, p=0.5)) >>> # Note that Pingouin returns the BF-alt by default. >>> # BF-null is simply 1 / BF-alt >>> print("BF-null: %.3f, BF-alt: %.3f" % (1 / bf, bf)) BF-null: 1.197, BF-alt: 0.835 Since the Bayes Factor of the null hypothesis ("the coin is fair") is higher than the Bayes Factor of the alternative hypothesis ("the coin is not fair"), we can conclude that there is more evidence to support the fact that the coin is indeed fair. However, the strength of the evidence in favor of the null hypothesis (1.197) is "barely worth mentionning" according to Jeffreys's rule of thumb. Interestingly, a frequentist alternative to this test would give very different results. It can be performed using the :py:func:`scipy.stats.binom_test` function: >>> from scipy.stats import binomtest >>> result = binomtest(k=115, n=200, p=0.5) >>> round(result.pvalue, 5) 0.04004 The binomial test rejects the null hypothesis that the coin is fair at the 5% significance level (p=0.04). Thus, whereas a frequentist hypothesis test would yield significant results at the 5% significance level, the Bayes factor indicates preference of the null hypothesis to the alternative hypothesis that we know nothing about p. We can use a more informed alternative hypothesis too, if desirable. E.g., the original test using Beta(5, 4) as the alternative hypothesis: >>> bf = pg.bayesfactor_binom(k=115, n=200, p=0.5, a=5, b=4) >>> print("Bayes Factor: %.3f" % bf) Bayes Factor: 1.930 Using a different base probability of successes: >>> bf = pg.bayesfactor_binom(k=100, n=1000, p=0.1) >>> print("Bayes Factor: %.3f" % bf) Bayes Factor: 0.024 """fromscipy.statsimportbeta,binomassert0<p<1,"p must be between 0 and 1."assertisinstance(k,int),"k must be int."assertisinstance(n,int),"n must be int."assertk<=n,"k (successes) cannot be higher than n (trials)."asserta>0,"a must be positive."assertb>0,"b must be positive."deffun(g):returnbeta.pdf(g,a,b)*binom.pmf(k,n,g)bf10=quad(fun,0,1)[0]/binom.pmf(k,n,p)returnbf10