Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def anovan(data=None, dv=None, between=None, ss_type=2, export_filename=None):
"""N-way ANOVA using statsmodels.
This is an internal function. The main call to this function should be done
by the :py:func:`pingouin.anova` function.
"""
# Check that stasmodels is installed
from pingouin.utils import _is_statsmodels_installed
_is_statsmodels_installed(raise_error=True)
from statsmodels.api import stats
from statsmodels.formula.api import ols
# Validate the dataframe
_check_dataframe(dv=dv, between=between, data=data, effects='between')
all_cols = _flatten_list([dv, between])
bad_chars = [',', '(', ')', ':']
if not all([c not in v for c in bad_chars for v in all_cols]):
err_msg = "comma, bracket, and colon are not allowed in column names."
raise ValueError(err_msg)
# Drop missing values
data = data[all_cols].dropna()
assert data.shape[0] >= 5, 'Data must have at least 5 non-missing values.'
# Reset index (avoid duplicate axis error)
data = data.reset_index(drop=True)
# Create R-like formula
formula = dv + ' ~ '
for fac in between:
formula += 'C(' + fac + ', Sum) * '
>>> ancova(data=df, dv='Scores', covar=['Income', 'BMI'], between='Method')
Source SS DF F p-unc
0 Method 552.284 3 3.233 0.036113
1 Income 1573.952 1 27.637 0.000011
2 BMI 60.014 1 1.054 0.312842
3 Residual 1708.509 30 NaN NaN
"""
# Safety checks
assert isinstance(data, pd.DataFrame)
assert dv in data.columns, '%s is not in data.' % dv
assert between in data.columns, '%s is not in data.' % between
assert isinstance(covar, (str, list)), 'covar must be a str or a list.'
# Drop missing values
data = data[_flatten_list([dv, between, covar])].dropna()
# Check the number of covariates
if isinstance(covar, list):
if len(covar) > 1:
return ancovan(dv=dv, covar=covar, between=between, data=data,
export_filename=export_filename)
else:
covar = covar[0]
# Assert that covariate is numeric
assert data[covar].dtype.kind in 'fi', 'Covariate must be numeric.'
def linreg(x, y):
return np.corrcoef(x, y)[0, 1] * np.std(y, ddof=1) / np.std(x, ddof=1)
# Compute slopes