Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import community
import networkx as nx
from karateclub.estimator import Estimator
class EgoNetSplitter(Estimator):
r"""An implementation of `"Ego-Splitting" `_
from the KDD '17 paper "Ego-Splitting Framework: from Non-Overlapping to Overlapping Clusters". The tool first creates
the ego-nets of nodes. A persona-graph is created which is clustered by the Louvain method. The resulting overlapping
cluster memberships are stored as a dictionary.
Args:
resolution (float): Resolution parameter of Python Louvain. Default 1.0.
seed (int): Random seed value. Default is 42.
"""
def __init__(self, resolution=1.0, seed=42):
self.resolution = resolution
self.seed = seed
def _create_egonet(self, node):
"""
Creating an ego net, extracting personas and partitioning it.
import numpy as np
import networkx as nx
from karateclub.estimator import Estimator
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from karateclub.utils.treefeatures import WeisfeilerLehmanHashing
class GL2Vec(Estimator):
r"""An implementation of `"GL2Vec" `_
from the ICONIP '19 paper "GL2vec: Graph Embedding Enriched by Line Graphs with Edge Features".
First, the algorithm creates the line graph of each graph in the graph dataset.
The procedure creates Weisfeiler-Lehman tree features for nodes in graphs. Using
these features a document (graph) - feature co-occurence matrix is decomposed in order
to generate representations for the graphs.
The procedure assumes that nodes have no string feature present and the WL-hashing
defaults to the degree centrality. However, if a node feature with the key "feature"
is supported for the nodes the feature extraction happens based on the values of this key.
Args:
wl_iterations (int): Number of Weisfeiler-Lehman iterations. Default is 2.
dimensions (int): Dimensionality of embedding. Default is 128.
workers (int): Number of cores. Default is 4.
down_sampling (float): Down sampling frequency. Default is 0.0001.
import numpy as np
import networkx as nx
from karateclub.estimator import Estimator
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from karateclub.utils.treefeatures import WeisfeilerLehmanHashing
class Graph2Vec(Estimator):
r"""An implementation of `"Graph2Vec" `_
from the MLGWorkshop '17 paper "Graph2Vec: Learning Distributed Representations of Graphs".
The procedure creates Weisfeiler-Lehman tree features for nodes in graphs. Using
these features a document (graph) - feature co-occurence matrix is decomposed in order
to generate representations for the graphs.
The procedure assumes that nodes have no string feature present and the WL-hashing
defaults to the degree centrality. However, if a node feature with the key "feature"
is supported for the nodes the feature extraction happens based on the values of this key.
Args:
wl_iterations (int): Number of Weisfeiler-Lehman iterations. Default is 2.
attributed (bool): Presence of graph attributes. Default is False.
dimensions (int): Dimensionality of embedding. Default is 128.
workers (int): Number of cores. Default is 4.
down_sampling (float): Down sampling frequency. Default is 0.0001.
import math
import numpy as np
import networkx as nx
from scipy import sparse
from sklearn.decomposition import TruncatedSVD
from karateclub.estimator import Estimator
class GraRep(Estimator):
r"""An implementation of `"GraRep" `_
from the CIKM '15 paper "GraRep: Learning Graph Representations with Global
Structural Information". The procedure uses sparse truncated SVD to learn
embeddings for the powers of the PMI matrix computed from powers of the
normalized adjacency matrix.
Args:
dimensions (int): Number of individual embedding dimensions. Default is 32.
iteration (int): Number of SVD iterations. Default is 10.
order (int): Number of PMI matrix powers. Default is 5.
seed (int): SVD random seed. Default is 42.
"""
def __init__(self, dimensions=32, iteration=10, order=5, seed=42):
self.dimensions = dimensions
self.iterations = iteration
self.order = order
import math
import numpy as np
import networkx as nx
import scipy.sparse as sparse
from karateclub.estimator import Estimator
class FeatherGraph(Estimator):
r"""An implementation of `"FEATHER-G" `_
from the CIKM '20 paper "Characteristic Functions on Graphs: Birds of a Feather,
from Statistical Descriptors to Parametric Models". The procedure
uses characteristic functions of node features with random walk weights to describe
node neighborhoods. These node level features are pooled by mean pooling to
create graph level statistics.
Args:
order (int): Adjacency matrix powers. Default is 5.
eval_points (int): Number of evaluation points. Default is 25.
theta_max (int): Maximal evaluation point value. Default is 2.5.
seed (int): Random seed value. Default is 42.
"""
def __init__(self, order=5, eval_points=25, theta_max=2.5, seed=42):
self.order = order
self.eval_points = eval_points
import numpy as np
import networkx as nx
from scipy import sparse
from karateclub.estimator import Estimator
class TENE(Estimator):
r"""An implementation of `"TENE" `_
from the ICPR '18 paper "Enhanced Network Embedding with Text Information". The
procedure jointly factorizes the adjacency and node feature matrices using alternating
least squares.
Args:
dimensions (int): Number of embedding dimensions. Default is 32.
lower_control (float): Embedding score minimal value. Default is 10**-15.
alpha (float): Adjacency matrix regularization coefficient. Default is 0.1.
beta (float): Feature matrix regularization coefficient. Default is 0.1.
iterations (int): ALS iterations. Default is 200.
seed (int): Random seed value. Default is 42.
"""
def __init__(self, dimensions=32, lower_control=10**-15,
alpha=0.1, beta=0.1, iterations=200, seed=42):
self.dimensions = dimensions
import numpy as np
import networkx as nx
import scipy.sparse as sps
from karateclub.estimator import Estimator
class NetLSD(Estimator):
r"""An implementation of `"NetLSD" `_
from the KDD '18 paper "NetLSD: Hearing the Shape of a Graph". The procedure
calculate the heat kernel trace of the normalized Laplacian matrix over a
vector of time scales. If the matrix is large it switches to an approximation
of the eigenvalues.
Args:
scale_min (float): Time scale interval minimum. Default is -2.0.
scale_max (float): Time scale interval maximum. Default is 2.0.
scale_steps (int): Number of steps in time scale. Default is 250.
scale_approximations (int): Number of eigenvalue approximations. Default is 200.
seed (int): Random seed value. Default is 42.
"""
def __init__(self, scale_min=-2.0, scale_max=2.0,
scale_steps=250, approximations=200, seed=42):
import numpy as np
import networkx as nx
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from karateclub.utils.walker import RandomWalker
from karateclub.estimator import Estimator
from karateclub.utils.treefeatures import WeisfeilerLehmanHashing
class Role2Vec(Estimator):
r"""An implementation of `"Role2vec" `_
from the IJCAI '18 paper "Learning Role-based Graph Embeddings".
The procedure uses random walks to approximate the pointwise mutual information
matrix obtained by multiplying the pooled adjacency power matrix with a
structural feature matrix (in this case Weisfeiler-Lehman features). This way
one gets structural node embeddings.
Args:
walk_number (int): Number of random walks. Default is 10.
walk_length (int): Length of random walks. Default is 80.
dimensions (int): Dimensionality of embedding. Default is 128.
workers (int): Number of cores. Default is 4.
window_size (int): Matrix power order. Default is 2.
epochs (int): Number of epochs. Default is 1.
learning_rate (float): HogWild! learning rate. Default is 0.05.
down_sampling (float): Down sampling frequency. Default is 0.0001.
from scipy import sparse
import numpy as np
import networkx as nx
from karateclub.estimator import Estimator
class NNSED(Estimator):
r"""An implementation of `"NNSED"
`_
from the CIKM '17 paper "A Non-negative Symmetric Encoder-Decoder Approach
for Community Detection". The procedure uses non-negative matrix factorization
in order to learn an unnormalized cluster membership distribution over nodes.
The method can be used in an overlapping and non-overlapping way.
Args:
layers (int): Embedding layer size. Default is 32.
iterations (int): Number of training epochs. Default 10.
seed (int): Random seed for weight initializations. Default 42.
"""
def __init__(self, dimensions=32, iterations=10, seed=42):
self.dimensions = dimensions
self.iterations = iterations
self.seed = seed
import numpy as np
import networkx as nx
from sklearn.decomposition import NMF
from karateclub.estimator import Estimator
class DANMF(Estimator):
r"""An implementation of `"DANMF" `_
from the CIKM '18 paper "Deep Autoencoder-like Nonnegative Matrix Factorization for
Community Detection". The procedure uses telescopic non-negative matrix factorization
in order to learn a cluster membership distribution over nodes. The method can be
used in an overlapping and non-overlapping way.
Args:
layers (list): Autoencoder layer sizes in a list of integers. Default [32, 8].
pre_iterations (int): Number of pre-training epochs. Default 100.
iterations (int): Number of training epochs. Default 100.
seed (int): Random seed for weight initializations. Default 42.
lamb (float): Regularization parameter. Default 0.01.
seed (int): Random seed value. Default is 42.
"""
def __init__(self, layers=[32, 8], pre_iterations=100,
iterations=100, seed=42, lamb=0.01):