Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# we do not include PyQt because it is an indirect dependency. It might
# be a good idea to include those too, but the list will get long in
# that case.
print("""
python {py}
larray {la}
numexpr {ne}
numpy {np}
pandas {pd}
pytables {pt}
pyyaml {yml}
bcolz {bc}
bottleneck {bn}
matplotlib {mpl}
vitables {vt}
""".format(py=py_version, la=la.__version__, ne=ne.__version__,
np=np.__version__, pd=pd.__version__, pt=pt.__version__,
yml=yaml.__version__,
bc=bcolz_version, bn=bn_version, mpl=mpl_version, vt=vt_version,
))
parser.exit()
def gettype(value):
if isinstance(value, (np.ndarray, la.LArray)):
type_ = value.dtype.type
elif isinstance(value, (tuple, list)):
type_ = type(value[0])
else:
type_ = type(value)
return normalize_type(type_)
% (fpath,
len(str_table),
num_possible_values,
' * '.join(str(len(values))
for values in possible_values)))
# TODO: compare time with numpy built-in conversion:
# if dtype is None, numpy tries to detect the best type itself
# which it does a good job of if the values are already numeric values
# if dtype is provided, numpy does a good job to convert from string
# values.
if celltype is None:
celltype = detect_column_type(str_table)
data = convert_1darray(str_table, celltype)
array = np.array(data, dtype=celltype)
return la.LArray(array.reshape(shape), header, possible_values)
# convert to a 1d array. We don't simply use data = np.array(data),
# because if data is a list of ndarray (for example if we use
# groupby(a, expr=id), *and* all the ndarrays have the same length,
# the result is a 2d array instead of an array of ndarrays like we
# need (at this point).
arr = np.empty(len(data), dtype=type(data[0]))
arr[:] = data
data = arr
# and reshape it
data = data.reshape(len_pvalues)
axes = [la.Axis(axis_labels, axis_name)
for axis_name, axis_labels in zip(labels, possible_values)]
# FIXME: also handle totals
return la.LArray(data, axes)
# return la.LArray(data, labels, possible_values,
args = [np.asarray(args)]
else:
length = len(args[0])
if any(len(a) != length for a in args):
raise ValueError("when plotting multiple arrays, they must "
"have compatible axes")
if len(args) == 1:
data = args[0]
if not isinstance(data, (np.ndarray, la.LArray)):
data = np.asarray(data)
if ndim(data) == ndim_req:
# move the last axis first so that the last dimension is stacked
axes = list(range(data.ndim))
data = data.transpose(axes[-1], *axes[:-1])
elif ndim(data) == ndim_req - 1:
if isinstance(data, la.LArray):
# add dummy axis and move it as the first axis
data = data.expand(la.Axis(1, '__dummy__')).transpose('__dummy__')
else:
data = data[np.newaxis]
else:
raise dimerror
elif all(ndim(a) == ndim_req - 1 for a in args):
data = args
else:
raise dimerror
return np.asarray(data), aslabeledarray(data).axes
def store_result(self, result, context):
if isinstance(result, (np.ndarray, la.LArray)):
res_type = result.dtype.type
else:
res_type = type(result)
if self.temporary:
target = self.entity.temp_variables
else:
# we cannot store/cache self.entity.array[self.name] because the
# array object can change (eg when enlarging it due to births)
target = self.entity.array
result = np.asarray(result)
# TODO: assert type for temporary variables too
target_type_idx = type_to_idx[target[self.name].dtype.type]
res_type_idx = type_to_idx[res_type]
if res_type_idx > target_type_idx:
raise Exception(
def prepare(self, args, kwargs):
ndim_req = self.ndim_req
dimerror = ValueError("%s() only works on %d or %d dimensional data"
% (self.funcname, ndim_req - 1, ndim_req))
if self.check_length and len(args) > 1:
if all(np.isscalar(a) for a in args):
args = [np.asarray(args)]
else:
length = len(args[0])
if any(len(a) != length for a in args):
raise ValueError("when plotting multiple arrays, they must "
"have compatible axes")
if len(args) == 1:
data = args[0]
if not isinstance(data, (np.ndarray, la.LArray)):
data = np.asarray(data)
if ndim(data) == ndim_req:
# move the last axis first so that the last dimension is stacked
axes = list(range(data.ndim))
data = data.transpose(axes[-1], *axes[:-1])
elif ndim(data) == ndim_req - 1:
if isinstance(data, la.LArray):
# add dummy axis and move it as the first axis
data = data.expand(la.Axis(1, '__dummy__')).transpose('__dummy__')
else:
data = data[np.newaxis]
else:
raise dimerror
elif all(ndim(a) == ndim_req - 1 for a in args):
data = args
else:
def compute(self, context, a, size=None, replace=True, p=None):
if isinstance(a, la.LArray):
assert p is None
outcomes_axis = a.axes['outcomes']
outcomes = outcomes_axis.labels
other_axes = a.axes - outcomes_axis
if other_axes:
a = index_array_by_variables(a, context, other_axes)
p = np.asarray(a.transpose('outcomes'))
else:
p = np.asarray(a)
a = outcomes
if isinstance(p, (list, np.ndarray)) and len(p) and not np.isscalar(p[0]):
assert len(p) == len(a)
assert all(len(px) == size for px in p)
assert len(a) >= 2
def aslabeledarray(data):
sequence = (tuple, list)
if isinstance(data, la.LArray):
return data
elif (isinstance(data, sequence) and len(data) and
isinstance(data[0], la.LArray)):
# XXX: use la.stack?
# TODO: check that all arrays have the same axes
axes = [la.Axis(len(data))] + list(data[0].axes)
return la.LArray(data, axes)
else:
return la.LArray(data)
# present in the input file but where not asked for by the
# modeller. They are not accessible anyway.
array = add_and_drop_fields(array, global_type)
attrs = global_data.attrs
dim_names = getattr(attrs, 'dimensions', None)
if dim_names is not None:
# we serialise dim_names as a numpy array so that it is
# stored as a native hdf type and not a pickle but we
# prefer to work with simple lists
# also files serialized using Python2 are "bytes" not "str"
dim_names = [str(dim_name) for dim_name in dim_names]
pvalues = [getattr(attrs, 'dim%d_pvalues' % i)
for i in range(len(dim_names))]
axes = [la.Axis(labels, axis_name)
for axis_name, labels in zip(dim_names, pvalues)]
array = la.LArray(array, axes)
globals_data[name] = array
input_entities = input_root.entities
entities_tables = {}
print(" * indexing tables")
for ent_name, entity in entities.items():
print(" -", ent_name, "...", end=' ')
table = getattr(input_entities, ent_name)
assert_valid_type(table, list(entity.fields.in_input.name_types))
rows_per_period, id_to_rownum_per_period = \
timed(index_table, table)
indexed_table = IndexedTable(table, rows_per_period,
id_to_rownum_per_period)