Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
std_errs = {}
std_errs_no = {}
std_errs_u = {}
std_errs_u_no = {}
std_errs_r = {}
std_errs_r_no = {}
vals = np.zeros((NUM_REPS, 5, 7))
for b in range(NUM_REPS):
if b % 25 == 0:
print(key, n, b)
data = generate_data(0.00, 'pandas', ntk=(n, 3, 5), other_effects=1, const=False, rng=rs)
mo, fo = options[key]
mod_type, cluster_type = key.split(':')
y = PanelData(data.y)
random_effects = np.random.randint(0, n // 3, size=y.dataframe.shape)
other_random = np.random.randint(0, n // 5, size=y.dataframe.shape)
if mod_type == 'random':
effects = y.copy()
effects.dataframe.iloc[:, :] = random_effects
mo['other_effects'] = effects
if cluster_type in ('random', 'other-random', 'entity-nested', 'random-nested'):
clusters = y.copy()
if cluster_type == 'random':
clusters.dataframe.iloc[:, :] = random_effects
elif cluster_type == 'other-random':
clusters.dataframe.iloc[:, :] = other_random
elif cluster_type == 'entity_nested':
eid = y.entity_ids
return PanelData(current)
exclude = np.ptp(np.asarray(self._frame), 0) == 0
max_rmse = np.sqrt(np.asarray(self._frame).var(0).max())
scale = np.asarray(self._frame.std())
exclude = exclude | (scale < 1e-14 * max_rmse)
replacement = np.maximum(scale, 1)
scale[exclude] = replacement[exclude]
scale = scale[None, :]
while np.max(np.abs(np.asarray(current) - np.asarray(previous)) / scale) > 1e-8:
previous = current
current = demean_pass(previous, weights, root_w)
current.index = self._frame.index
return PanelData(current)
Parameters
----------
weights : PanelData, optional
Weights to use in demeaning
"""
if self.nentity > self.nobs:
group = 'entity'
dummy = 'time'
else:
group = 'time'
dummy = 'entity'
e = self.demean(group, weights=weights)
d = self.dummies(dummy, drop_first=True)
d.index = e.index
d = PanelData(d).demean(group, weights=weights)
d = d.values2d
e = e.values2d
resid = e - d @ lstsq(d, e)[0]
resid = DataFrame(resid, index=self._frame.index,
columns=self._frame.columns)
return PanelData(resid)
def copy(self):
"""Return a deep copy"""
return PanelData(self._frame.copy(), var_name=self._var_name,
convert_dummies=self._convert_dummies, drop_first=self._drop_first)
def __init__(self, dependent, exog, *, weights=None):
self.dependent = PanelData(dependent, 'Dep')
self.exog = PanelData(exog, 'Exog')
self._original_shape = self.dependent.shape
self._constant = None
self._formula = None
self._is_weighted = True
self._name = self.__class__.__name__
self.weights = self._adapt_weights(weights)
self._not_null = np.ones(self.dependent.values2d.shape[0], dtype=np.bool)
self._cov_estimators = CovarianceManager(self.__class__.__name__, HomoskedasticCovariance,
HeteroskedasticCovariance, ClusteredCovariance,
DriscollKraay, ACCovariance)
self._original_index = self.dependent.index.copy()
self._validate_data()
self._singleton_index = None
if effects.shape[1:] != self._original_shape[1:]:
raise ValueError('other_effects must have the same number of '
'entities and time periods as dependent.')
num_effects = effects.nvar
if num_effects + self.entity_effects + self.time_effects > 2:
raise ValueError('At most two effects supported.')
cats = {}
effects_frame = effects.dataframe
for col in effects_frame:
cat = pd.Categorical(effects_frame[col])
cats[col] = cat.codes.astype(np.int64)
cats = pd.DataFrame(cats, index=effects_frame.index)
cats = cats[effects_frame.columns]
other_effects = PanelData(cats)
other_effects.drop(~self.not_null)
self._other_effect_cats = other_effects
cats = other_effects.values2d
nested = False
if cats.shape[1] == 2:
nested = self._is_effect_nested(cats[:, [0]], cats[:, [1]])
nested |= self._is_effect_nested(cats[:, [1]], cats[:, [0]])
nesting_effect = 'other effects'
elif self.entity_effects:
nested = self._is_effect_nested(cats[:, [0]], self.dependent.entity_ids)
nested |= self._is_effect_nested(self.dependent.entity_ids, cats[:, [0]])
nesting_effect = 'entity effects'
elif self.time_effects:
nested = self._is_effect_nested(cats[:, [0]], self.dependent.time_ids)
nested |= self._is_effect_nested(self.dependent.time_ids, cats[:, [0]])
nesting_effect = 'time effects'
return frame
# Swap out the index for better performance
init_index = DataFrame(groups)
init_index.set_index(list(init_index.columns), inplace=True)
root_w = np.sqrt(weights)
weights = DataFrame(weights, index=init_index.index)
wframe = root_w * self._frame
wframe.index = init_index.index
previous = wframe
current = demean_pass(previous, weights, root_w)
if groups.shape[1] == 1:
current.index = self._frame.index
return PanelData(current)
exclude = np.ptp(np.asarray(self._frame), 0) == 0
max_rmse = np.sqrt(np.asarray(self._frame).var(0).max())
scale = np.asarray(self._frame.std())
exclude = exclude | (scale < 1e-14 * max_rmse)
replacement = np.maximum(scale, 1)
scale[exclude] = replacement[exclude]
scale = scale[None, :]
while np.max(np.abs(np.asarray(current) - np.asarray(previous)) / scale) > 1e-8:
previous = current
current = demean_pass(previous, weights, root_w)
current.index = self._frame.index
return PanelData(current)
weights : PanelData, optional
Weights to use in the weighted demeaning
Returns
-------
demeaned : PanelData
Weighted, demeaned data according to groups
Notes
-----
Iterates until convergence
"""
if not isinstance(groups, PanelData):
groups = PanelData(groups)
if weights is None:
weights = PanelData(pd.DataFrame(np.ones((self._frame.shape[0], 1)),
index=self.index,
columns=['weights']))
weights = weights.values2d
groups = groups.values2d.astype(np.int64)
weight_sum = {}
def weighted_group_mean(df, weights, root_w, level):
num = (root_w * df).groupby(level=level).transform('sum')
if level in weight_sum:
denom = weight_sum[level]
else:
denom = weights.groupby(level=level).transform('sum')
weight_sum[level] = denom
return num.values / denom.values
np.any(np.isnan(w), axis=1))
missing_warning(all_missing ^ missing)
if np.any(missing):
self.dependent.drop(missing)
self.exog.drop(missing)
self.weights.drop(missing)
x = self.exog.values2d
self._not_null = ~missing
w = self.weights.dataframe
if np.any(np.asarray(w) <= 0):
raise ValueError('weights must be strictly positive.')
w = w / w.mean()
self.weights = PanelData(w)
rank_of_x = self._check_exog_rank()
self._constant, self._constant_index = has_constant(x, rank_of_x)