Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Use 3 nearest rows which have a feature to fill in each row's missing features
knnImpute = KNN(k=3)
X_filled_knn = knnImpute.fit_transform(X_incomplete)
# matrix completion using convex optimization to find low-rank solution
# that still matches observed values. Slow!
X_filled_nnm = NuclearNormMinimization().fit_transform(X_incomplete)
# Instead of solving the nuclear norm objective directly, instead
# induce sparsity using singular value thresholding
softImpute = SoftImpute()
# simultaneously normalizes the rows and columns of your observed data,
# sometimes useful for low-rank imputation methods
biscaler = BiScaler()
# rescale both rows and columns to have zero mean and unit variance
X_incomplete_normalized = biscaler.fit_transform(X_incomplete)
X_filled_softimpute_normalized = softImpute.fit_transform(X_incomplete_normalized)
X_filled_softimpute = biscaler.inverse_transform(X_filled_softimpute_normalized)
X_filled_softimpute_no_biscale = softImpute.fit_transform(X_incomplete)
meanfill_mse = ((X_filled_mean[missing_mask] - X[missing_mask]) ** 2).mean()
print("meanFill MSE: %f" % meanfill_mse)
# print mean squared error for the imputation methods above
nnm_mse = ((X_filled_nnm[missing_mask] - X[missing_mask]) ** 2).mean()
print("Nuclear norm minimization MSE: %f" % nnm_mse)
self.flattened_images = self.images_array.reshape(self.flattened_array_shape)
n_missing_pixels = int(self.n_pixels * percent_missing)
missing_square_size = int(np.sqrt(n_missing_pixels))
print("[ResultsTable] n_missing_pixels = %d, missing_square_size = %d" % (
n_missing_pixels, missing_square_size))
self.incomplete_images = remove_pixels(
self.images_array,
missing_square_size=missing_square_size)
print("[ResultsTable] Incomplete images shape = %s" % (
self.incomplete_images.shape,))
self.flattened_incomplete_images = self.incomplete_images.reshape(
self.flattened_array_shape)
self.missing_mask = np.isnan(self.flattened_incomplete_images)
self.normalizer = BiScaler(
scale_rows=scale_rows,
center_rows=center_rows,
min_value=self.images_array.min(),
max_value=self.images_array.max())
self.incomplete_normalized = self.normalizer.fit_transform(
self.flattened_incomplete_images)
self.saved_image_indices = list(
range(0, self.n_images, saved_image_stride))
self.saved_images = defaultdict(dict)
self.dirname = dirname
self.mse_dict = {}
self.mae_dict = {}
self.save_images(self.images_array, "original", flattened=False)
self.save_images(self.incomplete_images, "incomplete", flattened=False)
if (not has_fancyimpute) and (method in _fancyimpute_options):
raise ValueError('You must install `fancyimpute` (pip install fancyimpute) to use this method')
_base_options = {'mean', 'median', 'constant'}
if (method not in _base_options) and (method not in _fancyimpute_options) and (not isinstance(method, (int,float))):
raise ValueError('method not understood.. Use `mean`, `median`, a scalar, or an option from `fancyimpute`')
X_incomplete = data.copy()
if method == 'KNN':
if value is None:
value = 3
X_filled = KNN(k=value, verbose=False).complete(X_incomplete)
elif method == 'BiScaler':
X_filled = BiScaler(verbose=False).fit_transform(X_incomplete)
elif method == 'SoftImpute':
X_filled = SoftImpute(verbose=False).complete(X_incomplete)
elif method == 'IterativeSVD':
if value is None:
rank = min(10, X_incomplete.shape[0]-2)
else:
rank = value
X_filled = IterativeSVD(rank=rank, verbose=False).complete(X_incomplete)
elif method == 'mean':
col_means = np.nanmean(X_incomplete, axis=0)
for i in range(X_incomplete.shape[1]):
X_incomplete[:,i][np.isnan(X_incomplete[:,i])] = col_means[i]
X_filled = X_incomplete