Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
x = x[~np.isnan(x).any(axis=1)]
else:
nx, kx = x.shape
y = np.asarray(Y)
if y.ndim == 1:
# One sample with specified null
assert y.size == kx
elif y.ndim == 2:
# Two-sample
err = 'X and Y must have the same number of features (= columns).'
assert y.shape[1] == kx, err
if paired:
err = 'X and Y must have the same number of rows if paired.'
assert y.shape[0] == nx, err
# Remove rows with missing values in both x and y
x, y = remove_na(x, y, paired=paired, axis='rows')
# Shape of arrays
nx, k = x.shape
ny = y.shape[0]
assert nx >= 5, 'At least five samples are required.'
if y.ndim == 1 or paired is True:
n = nx
if y.ndim == 1:
# One sample test
cov = np.cov(x, rowvar=False)
diff = x.mean(0) - y
else:
# Paired two sample
cov = np.cov(x - y, rowvar=False)
diff = x.mean(0) - y.mean(0)
>>> import pandas as pd
>>> data = pd.DataFrame({'x': x, 'y': y})
>>> corr(data['x'], data['y'])
n r CI95% r2 adj_r2 p-val BF10 power
pearson 30 0.147 [-0.23, 0.48] 0.022 -0.051 0.439148 0.302 0.121
"""
x = np.asarray(x)
y = np.asarray(y)
# Check size
if x.size != y.size:
raise ValueError('x and y must have the same length.')
# Remove NA
x, y = remove_na(x, y, paired=True)
nx = x.size
# Compute correlation coefficient
if method == 'pearson':
r, pval = pearsonr(x, y)
elif method == 'spearman':
r, pval = spearmanr(x, y)
elif method == 'kendall':
r, pval = kendalltau(x, y)
elif method == 'percbend':
r, pval = percbend(x, y)
elif method == 'shepherd':
r, pval, outliers = shepherd(x, y)
elif method == 'skipped':
r, pval, outliers = skipped(x, y, method='spearman')
else:
else:
names = []
# Convert to numpy array
X = np.asarray(X)
y = np.asarray(y)
assert y.ndim == 1, 'y must be one-dimensional.'
assert 0 < alpha < 1, 'alpha must be between 0 and 1.'
# Add axis if only one-dimensional array
if X.ndim == 1:
X = X[..., np.newaxis]
# Check for NaN / Inf
if remove_na:
X, y = rm_na(X, y[..., np.newaxis], paired=True, axis='rows')
y = np.squeeze(y)
y_gd = np.isfinite(y).all()
X_gd = np.isfinite(X).all()
assert y_gd, ("Target (y) contains NaN or Inf. Please remove them "
"manually or use remove_na=True.")
assert X_gd, ("Predictors (X) contain NaN or Inf. Please remove them "
"manually or use remove_na=True.")
# Check that X and y have same length
assert y.shape[0] == X.shape[0], 'X and y must have same number of samples'
# Check that y is binary
if np.unique(y).size != 2:
raise ValueError('Dependent variable must be binary.')
if not names:
>>> pg.normality(data, dv='Performance', group='Time')
W pval normal
Pre 0.967718 0.478773 True
Post 0.940728 0.095157 True
"""
assert isinstance(data, (pd.DataFrame, pd.Series, list, np.ndarray))
assert method in ['shapiro', 'normaltest']
if isinstance(data, pd.Series):
data = data.to_frame()
col_names = ['W', 'pval']
func = getattr(scipy.stats, method)
if isinstance(data, (list, np.ndarray)):
data = np.asarray(data)
assert data.ndim == 1, 'Data must be 1D.'
assert data.size > 3, 'Data must have more than 3 samples.'
data = remove_na(data)
stats = pd.DataFrame(func(data)).T
stats.columns = col_names
stats['normal'] = np.where(stats['pval'] > alpha, True, False)
else:
# Data is a Pandas DataFrame
if dv is None and group is None:
# Wide-format
# Get numeric data only
numdata = data._get_numeric_data()
stats = numdata.apply(lambda x: func(x.dropna()),
result_type='expand', axis=0).T
stats.columns = col_names
stats['normal'] = np.where(stats['pval'] > alpha, True, False)
else:
# Long-format
stats = pd.DataFrame([])
else:
names = []
# Convert input to numpy array
X = np.asarray(X)
y = np.asarray(y)
assert y.ndim == 1, 'y must be one-dimensional.'
assert 0 < alpha < 1
if X.ndim == 1:
# Convert to (n_samples, n_features) shape
X = X[..., np.newaxis]
# Check for NaN / Inf
if remove_na:
X, y = rm_na(X, y[..., np.newaxis], paired=True, axis='rows')
y = np.squeeze(y)
y_gd = np.isfinite(y).all()
X_gd = np.isfinite(X).all()
assert y_gd, ("Target (y) contains NaN or Inf. Please remove them "
"manually or use remove_na=True.")
assert X_gd, ("Predictors (X) contain NaN or Inf. Please remove them "
"manually or use remove_na=True.")
# Check that X and y have same length
assert y.shape[0] == X.shape[0], 'X and y must have same number of samples'
if not names:
names = ['x' + str(i + 1) for i in range(X.shape[1])]
if add_intercept:
# Add intercept