Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
for related_files in iter_files_per_commit(repo, limit):
related_files_by_id = []
for f in filter_files(related_files):
try:
related_files_by_id.append(file_to_id[f])
except KeyError:
related_files_by_id.append(i)
file_to_id[f] = i
id_to_file[i] = f
i += 1
for edge in combinations(related_files_by_id, 2):
togetherness[edge] += 1
finish(s)
s = start("building networkit graph")
g = graph.Graph(weighted=True)
for i in range(len(file_to_id)):
g.addNode()
for e, t in togetherness.items():
g.addEdge(e[0], e[1], 1 / t)
finish(s)
s = start("computing betweenness")
# accurate, slow calculation
b = centrality.Betweenness(g, normalized=True)
# TODO - maybe allow toggling between accurate and estimate methods
# faster but not as precise (10x better in a benchmark test)
# b = networkit.centrality.EstimateBetweenness(g, 128, normalized=True, parallel=True)
b.run()
bb = b.ranking()
finish(s)
def fit(self, data=None):
if data==None and hasattr(self, 'parent_Fit'):
data = self.parent_Fit.data
data = trim_to_range(data, xmin=self.xmin, xmax=self.xmax)
self.n = len(data)
from numpy import log, sum
if not self.discrete and not self.xmax:
self.alpha = 1 + ( self.n / sum( log( data / self.xmin ) ))
if not self.in_range():
Distribution.fit(self, data, suppress_output=True)
self.KS(data)
elif self.discrete and self.estimate_discrete and not self.xmax:
self.alpha = 1 + ( self.n / sum( log( data / ( self.xmin - .5 ) ) ))
if not self.in_range():
Distribution.fit(self, data, suppress_output=True)
self.KS(data)
else:
Distribution.fit(self, data, suppress_output=True)
if not self.in_range():
self.noise_flag=True
else:
self.noise_flag=False
lblA, lblB = line.split()
if lblA in lbl2idx and lblB in lbl2idx:
idxA, idxB = (lbl2idx[i] for i in line.split())
# always go from smaller -> larger, helps merge
if(idxA > idxB):
(idxA, idxB) = (idxB, idxA)
if idxA not in problem:
problem[idxA] = []
problem[idxA].append(idxB)
if verbose:
print("Loaded ", count, " prob pairs.")
print("Grouped into ", len(problem), " query groups.")
print("Constructing network from ", edgePath)
graph = nk.readGraph(edgePath, nk.Format.EdgeListSpaceZero)
outLock = Lock()
outFile = open(outPath, "w")
with ThreadPool() as pool:
pool.map(runProbGroup, problem.items())
outFile.close()
def inspectCommunities(zeta, G):
""" Display information about communities
:param zeta communities
:param G graph
"""
if not have_tabulate:
raise MissingDependencyError("tabulate")
communitySizes = zeta.subsetSizes()
mod = Modularity().getQuality(zeta, G)
commProps = [
["# communities", zeta.numberOfSubsets()],
["min community size", min(communitySizes)],
["max community size", max(communitySizes)],
["avg. community size", sum(communitySizes) / len(communitySizes)],
#["imbalance", zeta.getImbalance()],
["modularity", mod],
]
print(tabulate.tabulate(commProps))
def plotSummary2(self, figsize=None, groupby="framework", palette="Greens_d"):
""" Plot a summary of algorithm performances"""
if not have_plt:
raise MissingDependencyError("matplotlib")
if not have_seaborn:
raise MissingDependencyError("seaborn")
if figsize:
plt.figure(figsize=figsize)
plt.gca().xaxis.get_major_formatter().set_powerlimits((3, 3))
plt.xscale("log")
plt.xlabel("edges/s")
ax = seaborn.boxplot(y="algorithm", x="edges/s", hue=groupby, data=self.dataFrame, linewidth=1, width=.5, palette=palette)
if self.save:
plt.savefig(os.path.join(self.plotDir, "epsSummary.pdf".format(**locals())), bbox_inches="tight")
def plotSummary(self, algoNames=None, figsize=None):
""" Plot a summary of algorithm performances"""
if not have_plt:
raise MissingDependencyError("matplotlib")
if not have_pandas:
raise MissingDependencyError("pandas")
if not have_seaborn:
raise MissingDependencyError("seaborn")
if algoNames is None:
algoNames = list(self.data.keys())
epsSummary = pandas.DataFrame()
for (algoName, algoData) in self.data.items():
if algoName in algoNames:
epsSummary[algoName] = pandas.Series(self.data[algoName]["edges/s"])
# data frame
self.epsSummary = epsSummary
self.epsSummary = self.epsSummary.reindex_axis(sorted(self.epsSummary.columns), axis=1)
if self.save:
self.epsSummary.to_csv(os.path.join(self.outDataDir, "epsSummary.csv".format(**locals())))
# plot
if figsize:
plt.figure(figsize=figsize)
plt.gca().xaxis.get_major_formatter().set_powerlimits((3, 3))
def finalize(self):
if not have_pandas:
raise MissingDependencyError("pandas")
self.dataFrame = pandas.DataFrame(self.data)
if self.save:
self.dataFrame.to_csv(os.path.join(self.outDataDir, "data.csv".format(**locals())))
def plotSummary2(self, figsize=None, groupby="framework", palette="Greens_d"):
""" Plot a summary of algorithm performances"""
if not have_plt:
raise MissingDependencyError("matplotlib")
if not have_seaborn:
raise MissingDependencyError("seaborn")
if figsize:
plt.figure(figsize=figsize)
plt.gca().xaxis.get_major_formatter().set_powerlimits((3, 3))
plt.xscale("log")
plt.xlabel("edges/s")
ax = seaborn.boxplot(y="algorithm", x="edges/s", hue=groupby, data=self.dataFrame, linewidth=1, width=.5, palette=palette)
if self.save:
plt.savefig(os.path.join(self.plotDir, "epsSummary.pdf".format(**locals())), bbox_inches="tight")
def set(self, style="light", color=(0, 0, 1)):
""" sets style and color of the theme
Args:
style: ("light")
color: RGB tuple
"""
if not have_mpl:
raise MissingDependencyError("matplotlib")
optionsStyle = ["light", "system"]
if style not in optionsStyle:
raise ValueError("possible style options: " + str(optionsStyle))
if len(color) != 3:
raise ValueError("(r,g,b) tuple required")
if style == "system":
self.__rcParams = mpl.rcParams
raise ValueError("not implemented, yet")
if style == "light":
self.__defaultColor = (0, 0, 0)
self.__defaultWidth = 1
self.__backgroundColor = (1, 1, 1)
self.__plotColor = Theme.RGBA2RGB(color, 0.6, self.__backgroundColor)
self.__plotWidth = 3
class bFail:
name = "Fail"
def run(self, G):
raise Exception("FAIL!")
# Plots
## plot settings
if not have_seaborn:
raise MissingDependencyError("seaborn")
seaborn.set_style("whitegrid")
### Colors
lightred = seaborn.xkcd_rgb["red"]
darkred = seaborn.xkcd_rgb["crimson"]
green = seaborn.xkcd_rgb["teal"]
orange = seaborn.xkcd_rgb["bright orange"]
# plot functions
def timePlot(data, size=(6,3)):
if not have_plt:
raise MissingDependencyError("matplotlib")
pos = numpy.arange(len(data))+.5 # the bar centers on the y axis
labels = list(data["graph"])
plt.figure(figsize=size)