Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, df):
self._items = df.columns
index = df.index
self._major_axis = Index(index.levels[1][get_codes(index)[1]]).unique()
self._minor_axis = Index(index.levels[0][get_codes(index)[0]]).unique()
self._full_index = MultiIndex.from_product([self._minor_axis,
self._major_axis])
new_df = df.reindex(self._full_index)
new_df.index.names = df.index.names
self._frame = new_df
i, j, k = len(self._items), len(self._major_axis), len(self.minor_axis)
self._shape = (i, j, k)
self._values = np.swapaxes(np.reshape(np.asarray(new_df).copy().T, (i, k, j)), 1, 2)
def entity_ids(self):
"""
Get array containing entity group membership information
Returns
-------
id : ndarray
2d array containing entity ids corresponding dataframe view
"""
return np.asarray(get_codes(self._frame.index)[0])[:, None]
Returns
-------
cp : Series
Categorical series containing the cartesian product of the categories
in cats
"""
if isinstance(cats, Series):
return cats
sizes = []
for c in cats:
if not is_categorical(cats[c]):
raise TypeError('cats must contain only categorical variables')
col = cats[c]
max_code = get_codes(col.cat).max()
size = 1
while max_code >= 2 ** size:
size += 1
sizes.append(size)
nobs = cats.shape[0]
total_size = sum(sizes)
if total_size >= 63:
raise ValueError('There are too many cats with too many states to use this method.')
dtype_size = min(filter(lambda v: total_size < (v - 1), (8, 16, 32, 64)))
dtype_str = 'int{0:d}'.format(dtype_size)
dtype_val = dtype(dtype_str)
codes = zeros(nobs, dtype=dtype_val)
cum_size = 0
for i, col in enumerate(cats):
codes += (get_codes(cats[col].cat).astype(dtype_val) << SCALAR_DTYPES[dtype_str](cum_size))
cum_size += sizes[i]
max_code = get_codes(col.cat).max()
size = 1
while max_code >= 2 ** size:
size += 1
sizes.append(size)
nobs = cats.shape[0]
total_size = sum(sizes)
if total_size >= 63:
raise ValueError('There are too many cats with too many states to use this method.')
dtype_size = min(filter(lambda v: total_size < (v - 1), (8, 16, 32, 64)))
dtype_str = 'int{0:d}'.format(dtype_size)
dtype_val = dtype(dtype_str)
codes = zeros(nobs, dtype=dtype_val)
cum_size = 0
for i, col in enumerate(cats):
codes += (get_codes(cats[col].cat).astype(dtype_val) << SCALAR_DTYPES[dtype_str](cum_size))
cum_size += sizes[i]
return Series(Categorical(codes), index=cats.index)
def hash(self):
hashes = []
hasher = hash_func()
if self._cat is not None:
for col in self._cat:
hasher.update(ascontiguousarray(to_numpy(get_codes(self._cat[col].cat)).data))
hashes.append((hasher.hexdigest(),))
hasher = _reset(hasher)
if self._cont is not None:
for col in self._cont:
hasher.update(ascontiguousarray(to_numpy(self._cont[col]).data))
hashes.append((hasher.hexdigest(),))
hasher = _reset(hasher)
if self._interactions is not None:
for interact in self._interactions:
hashes.extend(interact.hash)
# Add weight hash if provided
if self._weights is not None:
hasher = hash_func()
hasher.update(ascontiguousarray(self._weights.data))
hashes.append((hasher.hexdigest(),))
return tuple(sorted(hashes))
def time_ids(self):
"""
Get array containing time membership information
Returns
-------
id : ndarray
2d array containing time ids corresponding dataframe view
"""
return np.asarray(get_codes(self._frame.index)[1])[:, None]
"""
Parameters
----------
cat : Series
Categorical series to convert to dummy variables
cont : {Series, DataFrame}
Continuous variable values to use in the dummy interaction
precondition : bool
Flag whether dummies should be preconditioned
Returns
-------
interact : csc_matrix
Sparse matrix of dummy interactions with unit column norm
"""
codes = get_codes(category_product(cat).cat)
interact = csc_matrix((to_numpy(cont).flat, (arange(codes.shape[0]), codes)))
if not precondition:
return interact
else:
return preconditioner(interact)[0]
def time(self):
"""List of time index names"""
index = self._frame.index
return list(index.levels[1][get_codes(index)[1]].unique())