Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_mice_row_with_low_rank_random_matrix_approximate():
mice = MICE(n_imputations=100, impute_type='pmm', n_nearest_columns=5)
XY_completed = mice.complete(XY_incomplete)
_, missing_mae = reconstruction_error(
XY,
XY_completed,
missing_mask,
name="MICE (impute_type=row)")
assert missing_mae < 0.1, "Error too high with approximate PMM method!"
def test_mice_column_with_low_rank_random_matrix_approximate():
mice = MICE(n_imputations=100, impute_type='col', n_nearest_columns=5)
XY_completed = mice.complete(XY_incomplete)
_, missing_mae = reconstruction_error(
XY,
XY_completed,
missing_mask,
name="MICE (impute_type=col)")
assert missing_mae < 0.1, "Error too high with approximate column method!"
def test_mice_row_with_low_rank_random_matrix():
mice = MICE(n_imputations=100, impute_type='pmm')
XY_completed = mice.complete(XY_incomplete)
_, missing_mae = reconstruction_error(
XY,
XY_completed,
missing_mask,
name="MICE (impute_type=row)")
assert missing_mae < 0.1, "Error too high with PMM method!"
def test_mice_column_with_low_rank_random_matrix():
mice = MICE(n_imputations=100, impute_type='col')
XY_completed = mice.complete(XY_incomplete)
_, missing_mae = reconstruction_error(
XY,
XY_completed,
missing_mask,
name="MICE (impute_type=col)")
assert missing_mae < 0.1, "Error too high with column method!"
def test(self, flag, data):
if (flag == 1).sum() == self.data.m_num:
return data
else:
solver = MICE()
return self.imputate(flag, data, solver)
def impute_missing_values(numerical_features):
imputed_numerical_features = pd.DataFrame(MICE().complete(numerical_features));
imputed_numerical_features.columns = numerical_features.columns;
imputed_numerical_features.set_index(numerical_features.index, inplace = True);
return imputed_numerical_features;
def estimate_by_mice(df):
df_estimated_var = df.copy()
random.seed(129)
mice = MICE() # model=RandomForestClassifier(n_estimators=100))
res = mice.complete(np.asarray(df.values, dtype=float))
df_estimated_var.loc[:, df.columns] = res[:][:]
return df_estimated_var
def get_predict(self, flag, in_data):
output = in_data.copy()
output.shape = (utils.M_NUM, 1)
output[~flag] = np.nan
solver = MICE()
tmp = self.t_measure.copy()
tmp = np.column_stack((tmp, output)).transpose()
tmp = solver.complete(tmp)
output = np.array(tmp[-1, :]).reshape(utils.M_NUM, 1)
return output
def calculate_imputation_error(feature, numerical_data, numerical_features):
numerical_data = numerical_data.copy(deep=True);
feature_data = numerical_data[feature][0:200].copy().reset_index(drop = True);
numerical_data[feature][0:200] = np.nan
completed_numerical_data = pd.DataFrame(MICE(verbose = False).complete(numerical_data));
completed_numerical_data.columns = numerical_features;
imputed_feature = completed_numerical_data[feature][0:200];
imputed_data = pd.DataFrame([feature_data, imputed_feature]).T
imputed_data.columns =['Real value', 'Imputed value'];
imputed_data['Imputation error (%)'] = np.abs((imputed_data['Real value']-imputed_data['Imputed value']) / imputed_data['Real value'])*100
imputation_error = np.mean(imputed_data['Imputation error (%)'])
print('Imputation error for',feature,': ', imputation_error);
return [feature, imputation_error];