Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def score_model_baseline(fm, labels, fl, hyperparams):
baseline_fm = (fm.reset_index('customer_id', drop=False)
.drop_duplicates('customer_id', keep='last')
.set_index('customer_id'))
baseline_fm, baseline_fl = ft.encode_features(baseline_fm, fl)
baseline_fm, baseline_fl = remove_low_information_features(baseline_fm, baseline_fl)
hyperparams = parse_hyperparams_baseline(hyperparams)
print("HYPERPARAMS:", hyperparams)
cv_score = []
n_splits = 5
splitter = StratifiedKFold(n_splits=n_splits, shuffle=True)
for train_index, test_index in splitter.split(labels, labels):
baseline_train_labels = labels.iloc[train_index]
baseline_test_labels = labels.iloc[test_index]
baseline_train_fm = baseline_fm.loc[baseline_train_labels.index, :]
baseline_test_fm = baseline_fm.loc[baseline_test_labels.index, :]
score = score_baseline_pipeline(baseline_train_fm, baseline_train_labels,
baseline_test_fm, baseline_test_labels,
**hyperparams)
def produce(self, X, **kwargs):
feature_matrix = ft.calculate_feature_matrix(self.__features, cutoff_time=X, **kwargs)
fm_encoded, features_encoded = ft.encode_features(feature_matrix, self.__features)
fm_encoded = fm_encoded.fillna(0)
return fm_encoded
def produce(self, X, **kwargs):
feature_matrix = ft.calculate_feature_matrix(
self.features, cutoff_time=X, **kwargs)
fm_encoded, features_encoded = ft.encode_features(
feature_matrix, self.features)
return fm_encoded.fillna(0)
cutoffs_ft = []
for _id, row in cutoffs.iterrows():
cutoffs_ft.append((row[self.entity_col], row['cutoff_st'] - timedelta(days=1)))
cutoffs_ft = pd.DataFrame(cutoffs_ft, columns=['instance_id', 'time'])
feature_matrix, features = ft.dfs(target_entity=self.entity_col,
cutoff_time=cutoffs_ft,
training_window="%dday" % feature_window, # same as above
entityset=self.es,
cutoff_time_in_index=True,
verbose=True)
# encode categorical values
fm_encoded, features_encoded = ft.encode_features(feature_matrix,
features)
self.features = fm_encoded.fillna(0)
max_depth=max_depth, features_only=True)
applicable_features = []
for feat in features:
for x in feature_substrings:
if x in feat.get_name():
applicable_features.append(feat)
if len(applicable_features) == 0:
raise ValueError('No feature names with %s, verify the name attribute \
is defined and/or generate_name() is defined to \
return %s ' % (feature_substrings, feature_substrings))
df = ft.calculate_feature_matrix(entityset=es,
features=applicable_features,
instance_ids=instance_ids)
ft.encode_features(df, applicable_features)
# TODO: check the multi_output shape by checking
# feature.number_output_features for each feature
# and comparing it with the matrix shape
if not multi_output:
assert len(applicable_features) == df.shape[1]
return