Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def convert_columns(s, drop_first):
if is_categorical(s):
out = pd.get_dummies(s, drop_first=drop_first)
out.columns = [str(s.name) + '.' + str(c) for c in out]
return out
return s
cats : {Series, DataFrame}
DataFrame containing categorical variables. If cats is a Series, cats
is returned unmodified.
Returns
-------
cp : Series
Categorical series containing the cartesian product of the categories
in cats
"""
if isinstance(cats, Series):
return cats
sizes = []
for c in cats:
if not is_categorical(cats[c]):
raise TypeError('cats must contain only categorical variables')
col = cats[c]
max_code = get_codes(col.cat).max()
size = 1
while max_code >= 2 ** size:
size += 1
sizes.append(size)
nobs = cats.shape[0]
total_size = sum(sizes)
if total_size >= 63:
raise ValueError('There are too many cats with too many states to use this method.')
dtype_size = min(filter(lambda v: total_size < (v - 1), (8, 16, 32, 64)))
dtype_str = 'int{0:d}'.format(dtype_size)
dtype_val = dtype(dtype_str)
codes = zeros(nobs, dtype=dtype_val)
cum_size = 0
def convert_columns(s, drop_first):
if is_string_dtype(s.dtype) and s.map(lambda v: is_string_like(v)).all():
s = s.astype('category')
if is_categorical(s):
out = pd.get_dummies(s, drop_first=drop_first)
out.columns = [str(s.name) + '.' + str(c) for c in out]
return out
return s
>>> import numpy as np
>>> from linearmodels.iv.absorbing import Interaction
>>> import pandas as pd
>>> rs = np.random.RandomState(0)
>>> n = 100000
>>> cats = pd.concat([pd.Series(pd.Categorical(rs.randint(i+2,size=n)))
... for i in range(4)],1)
>>> cats.columns = ['cat{0}'.format(i) for i in range(4)]
>>> columns = ['cont{0}'.format(i) for i in range(6)]
>>> cont = pd.DataFrame(rs.standard_normal((n, 6)), columns=columns)
>>> frame = pd.concat([cats, cont], 1)
>>> interact = Interaction.from_frame(frame)
>>> interact.sparse.shape # Cart product of all cats, 5!, times ncont, 6
(100000, 720)
"""
cat_cols = [col for col in frame if is_categorical(frame[col])]
cont_cols = [col for col in frame if col not in cat_cols]
return Interaction(frame[cat_cols], frame[cont_cols], nobs=frame.shape[0])
def convert_columns(s, drop_first):
if is_string_dtype(s.dtype) and s.map(is_string_like).all():
s = s.astype('category')
if is_categorical(s):
out = get_dummies(s, drop_first=drop_first)
out.columns = [str(s.name) + '.' + str(c) for c in out]
return out
return s
cont_nobs = getattr(cont, 'shape', (0,))[0]
nobs = max(cat_nobs, cont_nobs)
if cat is None and cont is None:
if self._nobs is not None:
self._cont_data = self._cat_data = IVData(None, 'none', nobs=self._nobs)
else:
raise ValueError('nobs must be provided when cat and cont are None')
return
self._nobs = nobs
self._cat_data = IVData(cat, 'cat', nobs=nobs, convert_dummies=False)
self._cont_data = IVData(cont, 'cont', nobs=nobs, convert_dummies=False)
if self._cat_data.shape[1] == self._cont_data.shape[1] == 0:
raise ValueError('Both cat and cont are empty arrays')
cat_data = self._cat_data.pandas
convert = [col for col in cat_data if not (is_categorical(cat_data[col]))]
if convert:
cat_data = DataFrame({col: cat_data[col].astype('category') for col in cat_data})
self._cat_data = IVData(cat_data, 'cat', convert_dummies=False)