Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
sns.set(font_scale=2.0, style="whitegrid")
data = pd.read_csv("../data/phad_tuning.csv")
DOT_SIZE = 75
data["tp"] = data.apply(lambda row: row.attacksdetected / 100 * 201, axis=1)
data["recall"] = data.apply(lambda row: row.attacksdetected / 100, axis=1)
data["prec"] = data.apply(lambda row: row.tp / (row.tp + row.TotalNumFP), axis=1)
data["f1"] = data.apply(lambda row: (2 * (row.recall * row.prec) /
(row.recall + row.prec)), axis=1)
plot = sns.lmplot(x="recall", y="prec", data=data, fit_reg=False,
markers=".", scatter_kws={"s": DOT_SIZE}, aspect=2)
plot.set(ylim=(0, 1))
plot.set(xlim=(0, 1))
plot.set(ylabel="Precision")
plot.set(xlabel="Recall")
pp = PdfPages("pr_curve.pdf")
pp.savefig(plot.fig)
pp.close()
plot = sns.lmplot(x="threshold", y="f1", data=data, fit_reg=False,
markers=".", scatter_kws={"s": DOT_SIZE}, aspect=2)
plot.set(ylim=(0, 1))
plot.set(xlim=(0.5, 1))
plot.set(ylabel="F1 Score")
lx = 'RDR'
ly = '0.5 - BAF'
g = 'Sample'
lh = 'Cluster'
size = {i : float(sum(clusters[b[0]][b[1]] == i for b in pos)) for i in set(clusters[b[0]][b[1]] for b in pos)}
data = [{lx : bbc[b[0]][b[1]][p]['RDR'], ly : 0.5 - bbc[b[0]][b[1]][p]['BAF'], g : p, lh : clusters[b[0]][b[1]], 'size' : size[clusters[b[0]][b[1]]]} for b in pos for p in bbc[b[0]][b[1]]]
df = pd.DataFrame(data)
order = sorted(set(df[lh]), key=(lambda x : size[x]), reverse=True)
figsize = args['figsize'] if args['figsize'] is not None else (8, 1.5)
s = args['markersize'] if args['markersize'] > 0 else 20
#with PdfPages(out) as pdf:
# for sample, group in df.groupby(g):
#sys.stderr.write(info("## Plotting for {}..\n".format(sample)))
if args['colwrap'] > 1:
g = sns.lmplot(data=df, x=lx, y=ly, hue=lh, hue_order=order, palette=args['cmap'], fit_reg=False, size=figsize[0], aspect=figsize[1], scatter_kws={"s":s}, legend=False, col=g, col_wrap=args['colwrap'])
else:
g = sns.lmplot(data=df, x=lx, y=ly, hue=lh, hue_order=order, palette=args['cmap'], fit_reg=False, size=figsize[0], aspect=figsize[1], scatter_kws={"s":s}, legend=False, row=g)
#plt.title("{}".format(sample))
coordinates(args, g)
#pdf.savefig(bbox_inches='tight')
plt.savefig(out, bbox_inches='tight')
plt.close()
def plot_data(dataDict):
"""
Takes a dictionary (output from traverseBam) and outputs histograms and
genome-wide plots of various metrics
"""
window_df = dataDict["windows"]
depth_hist = dataDict["depth_freq"]
readbal_hist = dataDict["readbal_freq"]
mapq_hist = dataDict["mapq_freq"]
# Create genome-wide plots based on window means
depth_genome_plot = sns.lmplot('Position', 'Depth', data=window_df, fit_reg=False)
depth_genome_plot.savefig("depth_windows.png")
balance_genome_plot = sns.lmplot('Position', 'ReadBalance', data=window_df, fit_reg=False)
balance_genome_plot.savefig("balance_windows.png")
mapq_genome_plot = sns.lmplot('Position', 'ReadBalance', data=window_df, fit_reg=False)
mapq_genome_plot.savefig("mapq_windows.png")
# Create histograms
depth_bar_plot = sns.countplot(x='Depth', y='Count', data=depth_hist)
depth_bar_plot.savefig("depth_hist.png")
balance_bar_plot = sns.countplot(x='ReadBalance', y='Count', data=readbal_hist)
balance_bar_plot.savefig("readbalance_hist.png")
mapq_bar_plot = sns.countplot(x='Mapq', y='Count', data=mapq_hist)
mapq_bar_plot.savefig("mapq_hist.png")
pass
def __main(infile_path: str, outfile_path: str):
sns.set()
hue_token_overlap = pd.read_csv(infile_path, sep=csv.excel_tab.delimiter, dialect=csv.excel_tab, float_precision="round_trip",
encoding="utf-8", memory_map=True)
xaxis_name = hue_token_overlap.columns[1]
yaxis_name = hue_token_overlap.columns[0]
print("X axis: {}; Y axis: {}".format(xaxis_name, yaxis_name))
# Create scatterplot of dataframe
hue_plot = sns.lmplot(xaxis_name, # Horizontal axis
yaxis_name, # Vertical axis
data=hue_token_overlap, # Data source
fit_reg=False, # Don't fix a regression line
# hue="z", # Set color
scatter_kws={"marker": "D", # Set marker style
"s": 10}) # S marker size
# fig = hue_plot.get_figure()
hue_plot.savefig(outfile_path)
# prettier plot
df = pd.DataFrame()
df['x'] = pos[:, 0]
df['y'] = pos[:, 1]
legends = ['comp.graphics', 'rec.sport.baseball', 'talk.politics.guns']
df['class'] = [legends[l] for l in labels]
sns.set_context("notebook", font_scale=1.5)
sns.set_style("ticks")
# Create scatterplot of dataframe
sns.lmplot('x', # Horizontal axis
'y', # Vertical axis
data=df, # Data source
fit_reg=False, # Don't fix a regression line
hue="class", # Set color,
legend=True,
scatter_kws={"s": 25, 'alpha': 0.5}) # S marker size
sns.despine(top=True, left=True, right=True, bottom=True)
plt.xticks([])
plt.yticks([])
plt.xlabel('')
plt.ylabel('')
plt.savefig('figs/20newsgroup_viz.png')
Faceted logistic regression
===========================
_thumb: .58, .5
"""
import seaborn as sns
sns.set(style="darkgrid")
# Load the example Titanic dataset
df = sns.load_dataset("titanic")
# Make a custom palette with gendered colors
pal = dict(male="#6495ED", female="#F08080")
# Show the survival probability as a function of age and sex
g = sns.lmplot(x="age", y="survived", col="sex", hue="sex", data=df,
palette=pal, y_jitter=.02, logistic=True, truncate=False)
g.set(xlim=(0, 80), ylim=(-.05, 1.05))
def plot_embeddings(embs, reverse_dictionary):
tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
low_dim_embs = tsne.fit_transform(embs)
df = pd.DataFrame(low_dim_embs, columns=['x1', 'x2'])
sns.lmplot('x1', 'x2', data=df, scatter=True, fit_reg=False)
for i in range(low_dim_embs.shape[0]):
if i not in reverse_dictionary:
continue
x, y = low_dim_embs[i, :]
plt.annotate(reverse_dictionary[i],
xy=(x, y),
xytext=(5, 2),
textcoords='offset points',
ha='right',
va='bottom')
if len(base)==2:
foo = data[(data.source==sour)&(data.baseline==base)&(data.band==band)&(data.polarization==polar)]
elif len(base==3):
foo = data[(data.source==sour)&(data.triangle==base)&(data.band==band)&(data.polarization==polar)]
if 'Night' not in data.columns:
data['Night'] =list(map(lambda x: dict_night[x],data.expt_no))
if time_type=='gmst':
util.add_gmst(data)
elif time_type=='fmjd':
data = ut.add_mjd(data)
data = ut.add_fmjd(data)
markers = markers[:len(foo.Night.unique())]
sns.set_style('darkgrid')
sns.lmplot(x=time_type,y=what,data=foo,hue='Night',fit_reg=False,size = 5,aspect = 1.5,markers=markers,scatter_kws={"s": 80})
plt.grid()
#plt.xlabel(time_type,fontsize=13)
#plt.ylabel(what,fontsize=13)
plt.title(sour+', '+Z2SMT[base[0]]+'-'+Z2SMT[base[1]]+', '+band+' band',fontsize=13)
plt.grid()
plt.show()
def plot_pore_speed(dataset, name, plots_dir):
# Plot setting start_time_float as axis index
# Seaborn nomenclature for lmplots/regplots are a little different
sns.set_style('darkgrid')
g = sns.lmplot(x='start_time_float_by_sample', y='pore_speed', data=dataset,
hue='qualitative_pass', hue_order=["Passed", "Failed"],
x_estimator=np.mean, truncate=True, x_bins=10, scatter_kws={'alpha': 0.1},
legend=False)
# Create legend with new alpha
leg_title = "Read Quality"
leg = g.ax.legend(title=leg_title, framealpha=0.5)
for lh in leg.legendHandles:
lh.set_alpha(1)
# Zero base y-axis
y_max = dataset['pore_speed'].mean() * 2
g.set(ylim=(0, y_max))
# Set axis labels
g.set_axis_labels("Time (HH:MM)", "Pore Speed (bases / second)")