Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _perm_test(x, y, stat, equal_var, random_state):
"""For use in parallel perm_test"""
random_state = _check_random_state(random_state)
if stat in ["pearsonr", "spearmanr"]:
y = random_state.permutation(y)
elif stat in ["tstat", "cohensd", "mean"]:
if y is None:
x = x * random_state.choice([1, -1], len(x))
elif isinstance(y, (float, int)):
x -= y
x = x * random_state.choice([1, -1], len(x))
else:
shuffled_combined = random_state.permutation(np.hstack([x, y]))
x, y = shuffled_combined[: x.size], shuffled_combined[x.size :]
elif (stat == "tstat-paired") or (y is None):
x = x * random_state.choice([1, -1], len(x))
return perm_test(x, y, stat, equal_var=equal_var, n_perm=0)
def _boot_func(x, y, func, func_args, paired, random_state):
"""For use in parallel boot_func"""
random_state = _check_random_state(random_state)
if paired:
idx = np.random.choice(np.arange(len(x)), size=x.size, replace=True)
x, y = x[idx], y[idx]
else:
x = random_state.choice(x, size=x.size, replace=True)
y = random_state.choice(y, size=y.size, replace=True)
return boot_func(x, y, func, func_args, paired=paired, n_boot=0)
Returns:
Multiple:
- **original_stat** (*float*): function result with given data
- **ci** (*np.array*): lower and upper bounds of 95% confidence intervals
"""
if not callable(func):
raise TypeError("func must be a valid callable function")
orig_result = func(x, y, **func_args)
if n_boot:
random_state = _check_random_state(seed)
seeds = random_state.randint(MAX_INT, size=n_boot)
par_for = Parallel(n_jobs=n_jobs, backend="multiprocessing")
boots = par_for(
delayed(_boot_func)(
x, y, func, func_args, paired, **func_args, random_state=seeds[i]
)
for i in range(n_boot)
)
ci_u = np.percentile(boots, 97.5, axis=0)
ci_l = np.percentile(boots, 2.5, axis=0)
return orig_result, (ci_l, ci_u)
else:
return orig_result
y.var(ddof=1),
y.size,
)
if equal_var:
pooled_sd = np.sqrt(np.mean([s1, s2]))
else:
pooled_sd = np.sqrt(
(((ss1 - 1) * s1 + ((ss2 - 1) * s2))) / (ss1 + ss2 - 2)
)
numerator = m1 - m2 - value
eff = numerator / pooled_sd
if n_boot:
random_state = _check_random_state(seed)
seeds = random_state.randint(MAX_INT, size=n_boot)
par_for = Parallel(n_jobs=n_jobs, backend="multiprocessing")
boots = par_for(
delayed(_cohens_d)(x, y, paired, equal_var, value, random_state=seeds[i])
for i in range(n_boot)
)
ci_u = np.percentile(boots, 97.5, axis=0)
ci_l = np.percentile(boots, 2.5, axis=0)
return eff, (ci_l, ci_u)
else:
return eff
def _cohens_d(x, y, paired, equal_var, value, random_state):
"""For use in parallel cohens_d"""
random_state = _check_random_state(random_state)
if paired:
idx = np.random.choice(np.arange(len(x)), size=x.size, replace=True)
x, y = x[idx], y[idx]
else:
x = random_state.choice(x, size=x.size, replace=True)
if y is not None:
y = random_state.choice(y, size=y.size, replace=True)
return cohens_d(x, y, 0, equal_var, value)
raise ValueError("x and y must be the same length")
else:
raise ValueError(
"stat must be in ['tstat', 'tstat-paired', 'mean', 'cohensd', 'pearsonr', 'spearmanr']"
)
# Get original statistic
original_stat = func(x, y)
if multi_return:
original_stat = original_stat[0]
# Permute
if n_perm == 0:
return func(x, y)
else:
random_state = _check_random_state(seed)
seeds = random_state.randint(MAX_INT, size=n_perm)
par_for = Parallel(n_jobs=n_jobs, backend="multiprocessing")
perms = par_for(
delayed(_perm_test)(x, y, stat, equal_var, random_state=seeds[i])
for i in range(n_perm)
)
if multi_return:
perms = [elem[0] for elem in perms]
denom = float(len(perms)) + 1
if tails == 2:
numer = np.sum(np.abs(perms) >= np.abs(original_stat)) + 1
elif tails == 1:
if original_stat >= 0:
numer = np.sum(perms >= original_stat) + 1