Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_incremental_basic(scheduler, dataframes):
# Create observations that we know linear models can recover
n, d = 100, 3
rng = da.random.RandomState(42)
X = rng.normal(size=(n, d), chunks=30)
coef_star = rng.uniform(size=d, chunks=d)
y = da.sign(X.dot(coef_star))
y = (y + 1) / 2
if dataframes:
X = dd.from_array(X)
y = dd.from_array(y)
with scheduler() as (s, [_, _]):
est1 = SGDClassifier(random_state=0, tol=1e-3, average=True)
est2 = clone(est1)
clf = Incremental(est1, random_state=0)
result = clf.fit(X, y, classes=[0, 1])
assert result is clf
def test_basic(self, output_distribution):
rs = da.random.RandomState(0)
a = dpp.QuantileTransformer(output_distribution=output_distribution)
b = spp.QuantileTransformer(output_distribution=output_distribution)
X = rs.uniform(size=(1000, 3), chunks=50)
a.fit(X)
b.fit(X)
assert_estimator_equal(a, b, atol=0.02)
# set the quantiles, so that from here out, we're exact
a.quantiles_ = b.quantiles_
assert_eq_ar(a.transform(X), b.transform(X), atol=1e-7)
assert_eq_ar(X, a.inverse_transform(a.transform(X)))
def fit(self, y):
y = self._check_array(y)
if isinstance(y, da.Array):
classes_ = _encode_dask_array(y)
self.classes_ = classes_.compute()
self.dtype_ = None
elif _is_categorical(y):
self.classes_ = _encode_categorical(y)
self.dtype_ = y.dtype
else:
self.dtype_ = None
return super(LabelEncoder, self).fit(y)
return self
inputs (NumPy array, pandas dataframe, scipy sparse matrix), the
regular return value is returned.
If the underlying estimator does not have a ``predict_proba``
method, then an ``AttributeError`` is raised.
Parameters
----------
X : array or dataframe
Returns
-------
y : array-like
"""
self._check_method("predict_log_proba")
return da.log(self.predict_proba(X))
# Extract dataset bands needed for calculations
blue = dataset_in.blue
green = dataset_in.green
red = dataset_in.red
nir = dataset_in.nir
swir1 = dataset_in.swir1
swir2 = dataset_in.swir2
classified = _run_regression(blue.data, green.data, red.data,
nir.data, swir1.data, swir2.data)
classified_clean = classified - classified + no_data
if isinstance(classified_clean, np.ndarray):
classified_clean = np.where(clean_mask, classified, classified_clean)
elif isinstance(classified_clean, dask.array.core.Array):
classified_clean = dask.array.where(clean_mask, classified, classified_clean)
# Create xarray of data
x_coords = dataset_in[x_coord]
y_coords = dataset_in[y_coord]
time = None
coords = None
dims = None
if mosaic:
coords = [y_coords, x_coords]
dims = [y_coord, x_coord]
else:
time_coords = dataset_in[time_coord]
coords = [time_coords, y_coords, x_coords]
import dask.array as da
kwargs = {'bgcolor': '#00000000',
'rankdir': 'BT',
'node_attr': {'color': 'white',
'fontcolor': '#FFFFFF',
'penwidth': '3'},
'edge_attr': {'color': 'white', 'penwidth': '3'}}
x = da.ones((15,), chunks=(5,))
x.visualize('array-1d.svg', **kwargs)
x.sum().visualize('array-1d-sum.svg', **kwargs)
x = da.ones((15, 15), chunks=(5, 5))
x.sum(axis=1).visualize('array-sum.svg', **kwargs)
(x + x.T).visualize('array-xxT.svg', **kwargs)
(x.dot(x.T + 1)).visualize('array-xdotxT.svg', **kwargs)
(x.dot(x.T + 1) - x.mean()).visualize('array-xdotxT-mean.svg', **kwargs)
(x.dot(x.T + 1) - x.mean()).std().visualize('array-xdotxT-mean-std.svg', **kwargs)
def event_page(self, doc):
@dask.delayed
def delayed_fill(event_page, key):
self.fill_event_page(event_page, include=key)
return numpy.asarray(event_page['data'][key])
descriptor = self._descriptor_cache[doc['descriptor']]
needs_filling = {key for key, val in descriptor['data_keys'].items()
if 'external' in val}
filled_doc = copy.deepcopy(doc)
for key in needs_filling:
shape = extract_shape(descriptor, key)
dtype = extract_dtype(descriptor, key)
filled_doc['data'][key] = array.from_delayed(
delayed_fill(filled_doc, key), shape=shape, dtype=dtype)
return filled_doc
lines = [str(type(self))]
if len(self.columns) == 0:
lines.append('Index: 0 entries')
lines.append('Empty %s' % type(self).__name__)
put_lines(buf, lines)
return
# Group and execute the required computations
computations = {}
if verbose:
computations.update({'index': self.index, 'count': self.count()})
if memory_usage:
computations.update({'memory_usage': self.map_partitions(M.memory_usage, index=True)})
computations = dict(zip(computations.keys(), da.compute(*computations.values())))
column_template = "{0:<%d} {1}" % (self.columns.str.len().max() + 5)
if verbose:
index = computations['index']
counts = computations['count']
lines.append(index.summary())
column_template = column_template.format('{0}', '{1} non-null {2}')
column_info = [column_template.format(*x) for x in zip(self.columns, counts, self.dtypes)]
else:
column_info = [column_template.format(*x) for x in zip(self.columns, self.dtypes)]
lines.append('Data columns (total {} columns):'.format(len(self.columns)))
lines.extend(column_info)
dtype_counts = ['%s(%d)' % k for k in sorted(self.dtypes.value_counts().iteritems(), key=str)]
lines.append('dtypes: {}'.format(', '.join(dtype_counts)))
def inner_stat(array, axis=-1, mdtol=None, **kwargs):
# Call the statistic to get the basic result (missing-data tolerant).
dask_result = dask_stats_function(array, axis=axis, **kwargs)
if mdtol is None or mdtol >= 1.0:
result = dask_result
else:
# Build a lazy computation to compare the fraction of missing
# input points at each output point to the 'mdtol' threshold.
point_mask_counts = da.sum(da.ma.getmaskarray(array), axis=axis)
points_per_calc = array.size / dask_result.size
masked_point_fractions = point_mask_counts / points_per_calc
boolean_mask = masked_point_fractions > mdtol
# Return an mdtol-masked version of the basic result.
result = da.ma.masked_array(
da.ma.getdata(dask_result), boolean_mask
)
return result