Source code for pingouin.datasets

import pandas as pd
import os.path as op
from pingouin.utils import print_table

ddir = op.dirname(op.realpath(__file__))
dts = pd.read_csv(op.join(ddir, "datasets.csv"), sep=",")

__all__ = ["read_dataset", "list_dataset"]


[docs] def read_dataset(dname): """Read example datasets. Parameters ---------- dname : string Name of dataset to read (without extension). Must be a valid dataset present in pingouin.datasets Returns ------- data : :py:class:`pandas.DataFrame` Requested dataset. Examples -------- Load the `Penguin <https://github.com/allisonhorst/palmerpenguins>`_ dataset: >>> import pingouin as pg >>> df = pg.read_dataset('penguins') >>> df # doctest: +SKIP species island bill_length_mm ... flipper_length_mm body_mass_g sex 0 Adelie Biscoe 37.8 ... 174.0 3400.0 female 1 Adelie Biscoe 37.7 ... 180.0 3600.0 male 2 Adelie Biscoe 35.9 ... 189.0 3800.0 female 3 Adelie Biscoe 38.2 ... 185.0 3950.0 male 4 Adelie Biscoe 38.8 ... 180.0 3800.0 male .. ... ... ... ... ... ... ... 339 Gentoo Biscoe NaN ... NaN NaN NaN 340 Gentoo Biscoe 46.8 ... 215.0 4850.0 female 341 Gentoo Biscoe 50.4 ... 222.0 5750.0 male 342 Gentoo Biscoe 45.2 ... 212.0 5200.0 female 343 Gentoo Biscoe 49.9 ... 213.0 5400.0 male """ # Check extension d, ext = op.splitext(dname) if ext.lower() == ".csv": dname = d # Check that dataset exist if dname not in dts["dataset"].to_numpy(): raise ValueError( "Dataset does not exist. Valid datasets names are", dts["dataset"].to_numpy() ) # Load dataset return pd.read_csv(op.join(ddir, dname + ".csv"), sep=",")
[docs] def list_dataset(): """List available example datasets. Returns ------- datasets : :py:class:`pandas.DataFrame` A dataframe with the name, description and reference of all the datasets included in Pingouin. Examples -------- >>> import pingouin as pg >>> all_datasets = pg.list_dataset() >>> all_datasets.index.tolist() ['ancova', 'anova', 'anova2', 'anova2_unbalanced', 'anova3', 'anova3_unbalanced', 'blandaltman', 'chi2_independence', 'chi2_mcnemar', 'circular', 'cochran', 'cronbach_alpha', 'cronbach_wide_missing', 'icc', 'mediation', 'mixed_anova', 'mixed_anova_unbalanced', 'multivariate', 'pairwise_corr', 'pairwise_tests', 'pairwise_tests_missing', 'partial_corr', 'penguins', 'rm_anova', 'rm_anova_wide', 'rm_anova2', 'rm_corr', 'rm_missing', 'tips'] """ return dts.set_index("dataset")