Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""UMLs Dataset.
The data consists of information about a 135 Graph and the relations between
their nodes given as a DataFrame with three columns, source, target and type,
indicating which nodes are related and with which type of link. The target is
a 1d numpy binary integer array indicating whether the indicated link exists
or not.
"""
dataset_path = _load('umls')
X = _load_csv(dataset_path, 'data')
y = X.pop('label').values
graph = nx.Graph(nx.read_gml(os.path.join(dataset_path, 'graph.gml')))
return Dataset(load_umls.__doc__, X, y, accuracy_score, stratify=True, graph=graph)
def load_boston():
"""Boston House Prices Dataset."""
dataset = datasets.load_boston()
return Dataset(load_boston.__doc__, dataset.data, dataset.target, r2_score)
def load_newsgroups():
"""20 News Groups Dataset.
The data of this dataset is a 1d numpy array vector containing the texts
from 11314 newsgroups posts, and the target is a 1d numpy integer array
containing the label of one of the 20 topics that they are about.
"""
dataset = datasets.fetch_20newsgroups()
return Dataset(load_newsgroups.__doc__, np.array(dataset.data), dataset.target,
accuracy_score, stratify=True)
def load_jester():
"""Ratings from the Jester Online Joke Recommender System.
This dataset consists of over 1.7 million instances of (user_id, item_id, rating)
triples, which is split 50-50 into train and test data.
source: "University of California Berkeley, CA"
sourceURI: "http://eigentaste.berkeley.edu/dataset/"
"""
dataset_path = _load('jester')
X = _load_csv(dataset_path, 'data')
y = X.pop('rating').values
return Dataset(load_jester.__doc__, X, y, r2_score)
def load_usps():
"""USPs Digits Dataset.
The data of this dataset is a 3d numpy array vector with shape (224, 224, 3)
containing 9298 224x224 RGB photos of handwritten digits, and the target is
a 1d numpy integer array containing the label of the digit represented in
the image.
"""
dataset_path = _load('usps')
df = _load_csv(dataset_path, 'data')
X = _load_images(os.path.join(dataset_path, 'images'), df.image)
y = df.label.values
return Dataset(load_usps.__doc__, X, y, accuracy_score, stratify=True)
"""Amazon product co-purchasing network and ground-truth communities.
Network was collected by crawling Amazon website. It is based on Customers Who Bought
This Item Also Bought feature of the Amazon website. If a product i is frequently
co-purchased with product j, the graph contains an undirected edge from i to j.
Each product category provided by Amazon defines each ground-truth community.
"""
dataset_path = _load('amazon')
X = _load_csv(dataset_path, 'data')
y = X.pop('label').values
graph = nx.Graph(nx.read_gml(os.path.join(dataset_path, 'graph.gml')))
return Dataset(load_amazon.__doc__, X, y, normalized_mutual_info_score, graph=graph)
vocabulary = _load_csv(dataset_path, 'vocabulary', set_index=True)
entities = {
'data': (data, 'd3mIndex', None),
'questions': (questions, 'qIndex', None),
'sentences': (sentences, 'sIndex', None),
'vocabulary': (vocabulary, 'index', None)
}
relationships = [
('questions', 'qIndex', 'data', 'qIndex'),
('sentences', 'sIndex', 'data', 'sIndex')
]
target = data.pop('isAnswer').values
return Dataset(load_wikiqa.__doc__, data, target, accuracy_score, startify=True,
entities=entities, relationships=relationships)
def load_iris():
"""Iris Dataset."""
dataset = datasets.load_iris()
return Dataset(load_iris.__doc__, dataset.data, dataset.target,
accuracy_score, stratify=True)
def load_handgeometry():
"""Hand Geometry Dataset.
The data of this dataset is a 3d numpy array vector with shape (224, 224, 3)
containing 112 224x224 RGB photos of hands, and the target is a 1d numpy
float array containing the width of the wrist in centimeters.
"""
dataset_path = _load('handgeometry')
df = _load_csv(dataset_path, 'data')
X = _load_images(os.path.join(dataset_path, 'images'), df.image)
y = df.target.values
return Dataset(load_handgeometry.__doc__, X, y, r2_score)
y = X.pop('label').values
graph1 = nx.Graph(nx.read_gml(os.path.join(dataset_path, 'graph1.gml')))
graph2 = nx.Graph(nx.read_gml(os.path.join(dataset_path, 'graph2.gml')))
graph = graph1.copy()
graph.add_nodes_from(graph2.nodes(data=True))
graph.add_edges_from(graph2.edges)
graph.add_edges_from(X[['graph1', 'graph2']].values)
graphs = {
'graph1': graph1,
'graph2': graph2,
}
return Dataset(load_dic28.__doc__, X, y, accuracy_score,
stratify=True, graph=graph, graphs=graphs)