Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
categorical_features = [f for f in features if data[f].dtype == 'O']
category_map = {}
for f in categorical_features:
le = LabelEncoder()
data_tmp = le.fit_transform(data[f].values)
data[f] = data_tmp
category_map[features.index(f)] = list(le.classes_)
# only return data values
data = data.values
target_names = ['<=50K', '>50K']
if return_X_y:
return data, labels
return Bunch(data=data, target=labels, feature_names=features, target_names=target_names, category_map=category_map)
labels = []
for i, member in enumerate(tar.getnames()[1:]):
f = tar.extractfile(member)
for line in f.readlines():
try:
line.decode('utf8')
except UnicodeDecodeError:
continue
data.append(line.decode('utf8').strip())
labels.append(i)
tar.close()
if return_X_y:
return data, labels
target_names = ['negative', 'positive']
return Bunch(data=data, target=labels, target_names=target_names)
resp = requests.get(url_labels)
resp.raise_for_status()
label_dict = pickle.load(BytesIO(resp.content))
except RequestException:
logger.exception("Could not download labels, URL may be out of service")
raise
inv_label = {v: k for k, v in label_dict.items()}
label_idx = inv_label[category]
labels = np.array([label_idx for _ in range(nb_images)])
if return_X_y:
return data, labels
target_names = [category for _ in range(nb_images)]
return Bunch(data=data, target=labels, target_names=target_names)