Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
directory. To change that, specify the filename with full path.
Returns
-------
aov : DataFrame
ANCOVA summary ::
'Source' : Names of the factor considered
'SS' : Sums of squares
'DF' : Degrees of freedom
'F' : F-values
'p-unc' : Uncorrected p-values
"""
# Check that stasmodels is installed
from pingouin.utils import _is_statsmodels_installed
_is_statsmodels_installed(raise_error=True)
from statsmodels.api import stats
from statsmodels.formula.api import ols
# Check that covariates are numeric ('float', 'int')
assert all([data[covar[i]].dtype.kind in 'fi' for i in range(len(covar))])
# Fit ANCOVA model
formula = dv + ' ~ C(' + between + ')'
for c in covar:
formula += ' + ' + c
model = ols(formula, data=data).fit()
aov = stats.anova_lm(model, typ=2).reset_index()
aov.rename(columns={'index': 'Source', 'sum_sq': 'SS',
'df': 'DF', 'PR(>F)': 'p-unc'}, inplace=True)
def anovan(data=None, dv=None, between=None, ss_type=2, export_filename=None):
"""N-way ANOVA using statsmodels.
This is an internal function. The main call to this function should be done
by the :py:func:`pingouin.anova` function.
"""
# Check that stasmodels is installed
from pingouin.utils import _is_statsmodels_installed
_is_statsmodels_installed(raise_error=True)
from statsmodels.api import stats
from statsmodels.formula.api import ols
# Validate the dataframe
_check_dataframe(dv=dv, between=between, data=data, effects='between')
all_cols = _flatten_list([dv, between])
bad_chars = [',', '(', ')', ':']
if not all([c not in v for c in bad_chars for v in all_cols]):
err_msg = "comma, bracket, and colon are not allowed in column names."
raise ValueError(err_msg)
# Drop missing values
data = data[all_cols].dropna()
assert data.shape[0] >= 5, 'Data must have at least 5 non-missing values.'
# Reset index (avoid duplicate axis error)
With some tweakings
.. plot::
>>> import pingouin as pg
>>> import seaborn as sns
>>> df = pg.read_dataset('rm_corr')
>>> sns.set(style='darkgrid', font_scale=1.2)
>>> g = pg.plot_rm_corr(data=df, x='pH', y='PacO2',
... subject='Subject', legend=True,
... kwargs_facetgrid=dict(height=4.5, aspect=1.5,
... palette='Spectral'))
"""
# Check that stasmodels is installed
from pingouin.utils import _is_statsmodels_installed
_is_statsmodels_installed(raise_error=True)
from statsmodels.formula.api import ols
# Safety check (duplicated from pingouin.rm_corr)
assert isinstance(data, pd.DataFrame), 'Data must be a DataFrame'
assert x in data.columns, 'The %s column is not in data.' % x
assert y in data.columns, 'The %s column is not in data.' % y
assert data[x].dtype.kind in 'bfi', '%s must be numeric.' % x
assert data[y].dtype.kind in 'bfi', '%s must be numeric.' % y
assert subject in data.columns, 'The %s column is not in data.' % subject
if data[subject].nunique() < 3:
raise ValueError('rm_corr requires at least 3 unique subjects.')
# Remove missing values
data = data[[x, y, subject]].dropna(axis=0)
# Calculate rm_corr