Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def manual_narrow_condense_back_to_valid_size_slow(self):
"""This test is really slow so we don't want it run by default.
"""
# tests that we don't try to create an 'int24' (which is meaningless)
categories = self.create_categories(24, plus_one=False)
categories.append(categories[0])
arr = LabelArray(categories, missing_value=categories[0])
assert_equal(arr.itemsize, 4)
self.check_roundtrip(arr)
# Generated with:
# data = arange(25).reshape(5, 5).transpose() % 4
data = array([[0, 1, 2, 3, 0],
[1, 2, 3, 0, 1],
[2, 3, 0, 1, 2],
[3, 0, 1, 2, 3],
[0, 1, 2, 3, 0]], dtype=factor_dtype)
# Generated with:
# classifier_data = arange(25).reshape(5, 5).transpose() % 2
classifier_data = array([[0, 1, 0, 1, 0],
[1, 0, 1, 0, 1],
[0, 1, 0, 1, 0],
[1, 0, 1, 0, 1],
[0, 1, 0, 1, 0]], dtype=int64_dtype)
string_classifier_data = LabelArray(
classifier_data.astype(str).astype(object),
missing_value=None,
)
expected_ranks = {
'ordinal': array(
[[1., 1., 3., 2., 2.],
[1., 2., 3., 1., 2.],
[2., 2., 1., 1., 3.],
[2., 1., 1., 2., 3.],
[1., 1., 3., 2., 2.]]
),
'average': array(
[[1.5, 1., 3., 2., 1.5],
[1.5, 2., 3., 1., 1.5],
[2.5, 2., 1., 1., 2.5],
check_arrays(
self.strs == value,
arr1d.as_int_array() == idx,
)
# It should be equivalent to pass the same set of categories manually.
arr1d_explicit_categories = LabelArray(
self.strs,
missing_value='',
categories=arr1d.categories,
)
check_arrays(arr1d, arr1d_explicit_categories)
for shape in (9, 3), (3, 9), (3, 3, 3):
strs2d = self.strs.reshape(shape)
arr2d = LabelArray(strs2d, missing_value='')
codes2d = arr2d.as_int_array()
self.assertEqual(arr2d.shape, shape)
check_arrays(arr2d.categories, categories)
for idx, value in enumerate(arr2d.categories):
check_arrays(strs2d == value, codes2d == idx)
lambda a: LabelArray(
a.astype(unicode).astype(object),
None,
)
def test_string_eq(self, compval, labelarray_dtype):
compval = labelarray_dtype.type(compval)
class C(Classifier):
dtype = categorical_dtype
missing_value = ''
inputs = ()
window_length = 0
c = C()
# There's no significance to the values here other than that they
# contain a mix of the comparison value and other values.
data = LabelArray(
np.asarray(
[['', 'a', 'ab', 'ba'],
['z', 'ab', 'a', 'ab'],
['aa', 'ab', '', 'ab'],
['aa', 'a', 'ba', 'ba']],
dtype=labelarray_dtype,
),
missing_value='',
)
self.check_terms(
terms={
'eq': c.eq(compval),
},
expected={
'eq': (data == compval),
op=vectorized_is_element,
opargs=(choices,),
)
else:
raise TypeError(
"Found non-int in choices for {typename}.element_of.\n"
"Supplied choices were {choices}.".format(
typename=type(self).__name__,
choices=choices,
)
)
elif self.dtype == categorical_dtype:
if only_contains((bytes, unicode), choices):
return ArrayPredicate(
term=self,
op=LabelArray.element_of,
opargs=(choices,),
)
else:
raise TypeError(
"Found non-string in choices for {typename}.element_of.\n"
"Supplied choices were {choices}.".format(
typename=type(self).__name__,
choices=choices,
)
)
assert False, "Unknown dtype in Classifier.element_of %s." % self.dtype
def view(self, dtype=_NotPassed, type=_NotPassed):
if type is _NotPassed and dtype not in (_NotPassed, self.dtype):
raise TypeError("Can't view LabelArray as another dtype.")
# The text signature on ndarray.view makes it look like the default
# values for dtype and type are `None`, but passing None explicitly has
# different semantics than not passing an arg at all, so we reconstruct
# the kwargs dict here to simulate the args not being passed at all.
kwargs = {}
if dtype is not _NotPassed:
kwargs['dtype'] = dtype
if type is not _NotPassed:
kwargs['type'] = type
return super(LabelArray, self).view(**kwargs)
return data, {}
data_dtype = data.dtype
if data_dtype == bool_:
return data.astype(uint8), {'dtype': dtype(bool_)}
elif data_dtype in FLOAT_DTYPES:
return data.astype(float64), {'dtype': dtype(float64)}
elif data_dtype in INT_DTYPES:
return data.astype(int64), {'dtype': dtype(int64)}
elif is_categorical(data_dtype):
if not isinstance(missing_value, LabelArray.SUPPORTED_SCALAR_TYPES):
raise TypeError(
"Invalid missing_value for categorical array.\n"
"Expected None, bytes or unicode. Got %r." % missing_value,
)
return LabelArray(data, missing_value), {}
elif data_dtype.kind == 'M':
try:
outarray = data.astype('datetime64[ns]').view('int64')
return outarray, {'dtype': datetime64ns_dtype}
except OverflowError:
raise ValueError(
"AdjustedArray received a datetime array "
"not representable as datetime64[ns].\n"
"Min Date: %s\n"
"Max Date: %s\n"
% (data.min(), data.max())
)
else:
raise TypeError(
"Don't know how to construct AdjustedArray "
"on data of type %s." % data_dtype
Returns
-------
coerced, view_kwargs : (np.ndarray, np.dtype)
"""
if isinstance(data, LabelArray):
return data, {}
data_dtype = data.dtype
if data_dtype == bool_:
return data.astype(uint8), {'dtype': dtype(bool_)}
elif data_dtype in FLOAT_DTYPES:
return data.astype(float64), {'dtype': dtype(float64)}
elif data_dtype in INT_DTYPES:
return data.astype(int64), {'dtype': dtype(int64)}
elif is_categorical(data_dtype):
if not isinstance(missing_value, LabelArray.SUPPORTED_SCALAR_TYPES):
raise TypeError(
"Invalid missing_value for categorical array.\n"
"Expected None, bytes or unicode. Got %r." % missing_value,
)
return LabelArray(data, missing_value), {}
elif data_dtype.kind == 'M':
try:
outarray = data.astype('datetime64[ns]').view('int64')
return outarray, {'dtype': datetime64ns_dtype}
except OverflowError:
raise ValueError(
"AdjustedArray received a datetime array "
"not representable as datetime64[ns].\n"
"Min Date: %s\n"
"Max Date: %s\n"
% (data.min(), data.max())