Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Extract the UniProt Entry name from header
identifier = header.split("|")[-1].split()[0]
ids.append(identifier)
sequences.append(seq.ProteinSequence(seq_str))
matrix = align.SubstitutionMatrix.std_protein_matrix()
alignment, order, tree, distances = align.align_multiple(
sequences, matrix, gap_penalty=(-10,-1), terminal_penalty=False
)
# Order alignment according to the guide tree
alignment = alignment[:, order]
ids = [ids[i] for i in order]
fig = plt.figure(figsize=(8.0, 20.0))
ax = fig.add_subplot(111)
graphics.plot_alignment_type_based(
ax, alignment, labels=ids, show_numbers=True, spacing=2.0
)
fig.tight_layout()
plt.show()
return colormap(norm_hydropathy)
# Create a color scheme highlighting the hydropathy
colormap = plt.get_cmap("coolwarm")
colorscheme = [
hydropathy_to_color(hydropathy_dict[symbol], colormap)
if symbol in hydropathy_dict else None
for symbol in sequences[0].get_alphabet()
]
# Show only the first 600 alignment columns for the sake of brevity
# This part contains all transmembrane helices
fig = plt.figure(figsize=(8.0, 15))
ax = fig.add_subplot(111)
# Color the symbols instead of the background
graphics.plot_alignment_type_based(
ax, alignment[:600], labels=names, show_numbers=True,
color_scheme=colorscheme
)
plt.show()
def plot_alignment_shapes(axes, alignment, symbols_per_line=30,
show_numbers=False, number_size=None,
number_functions=None,
labels=None, label_size=None,
show_line_position=False,
spacing=1, color_symbols=False,
symbol_size=None, symbol_param=None):
"""
A thin wrapper around the 'ShapePlotter' and 'plot_alignment()'
function.
"""
alphabet = alignment.sequences[0].get_alphabet()
symbol_plotter = ShapePlotter(
axes, font_size=symbol_size, font_param=symbol_param
)
graphics.plot_alignment(
axes=axes, alignment=alignment, symbol_plotter=symbol_plotter,
symbols_per_line=symbols_per_line,
show_numbers=show_numbers, number_size=number_size,
number_functions=number_functions,
labels=labels, label_size=label_size,
show_line_position=show_line_position,
spacing=spacing
)
twin = axes.get_shared_x_axes().get_siblings(axes)[0]
for ax in (axes, twin):
ax.set_yticklabels(ax.get_yticklabels(), fontdict={"color":"white"})
axes.get_figure().patch.set_facecolor("#181818")
s_max = (similarities[i,i] + similarities[j,j]) / 2
return s_max - similarities[i,j]
distances = np.zeros(similarities.shape)
for i in range(distances.shape[0]):
for j in range(distances.shape[1]):
distances[i,j] = get_distance(similarities, i, j)
tree = phylo.upgma(distances)
fig = plt.figure(figsize=(8.0, 5.0))
ax = fig.add_subplot(111)
# Use the 3-letter amino acid code aa label
labels = [seq.ProteinSequence.convert_letter_1to3(letter).capitalize()
for letter in matrix.get_alphabet1()]
graphics.plot_dendrogram(
ax, tree, orientation="top", labels=labels
)
ax.set_ylabel("Distance")
# Add grid for clearer distance perception
ax.yaxis.grid(color="lightgray")
plt.show()
for name, sequence in file.items():
if "CAC34569" in name:
avidin_seq = seq.ProteinSequence(sequence)
elif "ACL82594" in name:
streptavidin_seq = seq.ProteinSequence(sequence)
# Get BLOSUM62 matrix
matrix = align.SubstitutionMatrix.std_protein_matrix()
# Perform pairwise sequence alignment with affine gap penalty
# Terminal gaps are not penalized
alignments = align.align_optimal(avidin_seq, streptavidin_seq, matrix,
gap_penalty=(-10, -1), terminal_penalty=False)
# Draw first and only alignment
# The color intensity indicates the similiarity
fig = plt.figure(figsize=(8.0, 2.5))
ax = fig.add_subplot(111)
graphics.plot_alignment_similarity_based(
ax, alignments[0], matrix=matrix, labels=["Avidin", "Streptavidin"],
show_numbers=True, show_line_position=True
)
fig.tight_layout()
plt.show()
{"regulatory_class" : "ribosome_binding_site",
"note" : "RBS1"})
gene1 = Feature("gene", [Location(81, 380, strand)],
{"gene" : "gene1"})
rbs2 = Feature("regulatory", [Location(400, 415, strand)],
{"regulatory_class" : "ribosome_binding_site",
"note" : "RBS2"})
gene2 = Feature("gene", [Location(421, 1020, strand)],
{"gene" : "gene2"})
term = Feature("regulatory", [Location(1050, 1080, strand)],
{"regulatory_class" : "terminator"})
annotation = Annotation([prom, rbs1, gene1, rbs2, gene2, term])
fig = plt.figure(figsize=(8.0, 0.8))
ax = fig.add_subplot(111)
graphics.plot_feature_map(
ax, annotation, multi_line=False, loc_range=(1, 1101),
)
fig.tight_layout()
plt.show()
alignment = align.align_ungapped(
drug_type_consensus, fiber_type_consensus, matrix=matrix
)
# A colormap for hightlighting sequence dissimilarity:
# At low similarity the symbols are colored red,
# at high similarity the symbols are colored white
cmap = LinearSegmentedColormap.from_list(
"custom", colors=[(1.0, 0.3, 0.3), (1.0, 1.0, 1.0)]
# ^ reddish ^ white
)
fig = plt.figure(figsize=(8.0, 6.0))
ax = fig.add_subplot(111)
graphics.plot_alignment_similarity_based(
ax, alignment, matrix=matrix, symbols_per_line=50,
labels=["Drug-type", "Fiber-type"],
show_numbers=True, cmap=cmap, symbol_size=8
)
fig.tight_layout()
plt.show()
app.start()
app.join()
alignment = app.get_alignment()
# Print the MSA with hit IDs
print("MSA results:")
gapped_seqs = alignment.get_gapped_sequences()
for i in range(len(gapped_seqs)):
print(hits[i], " "*3, gapped_seqs[i])
# Visualize the first 200 columns of the alignment
# Reorder alignments to reflect sequence distance
fig = plt.figure(figsize=(8.0, 8.0))
ax = fig.add_subplot(111)
order = app.get_alignment_order()
graphics.plot_alignment_type_based(
ax, alignment[:200, order.tolist()], labels=[hits[i] for i in order],
show_numbers=True, color_scheme="clustalx"
)
fig.tight_layout()
plt.show()
# Perfrom a multiple sequence alignment of the PB sequences
matrix_dict = align.SubstitutionMatrix.dict_from_str(matrix_str)
matrix = align.SubstitutionMatrix(pb_alphabet, pb_alphabet, matrix_dict)
alignment, order, _, _ = align.align_multiple(
pb_seqs, matrix, gap_penalty=(-500,-100), terminal_penalty=False
)
# Visualize the alignment
# Order alignment according to guide tree
alignment = alignment[:, order.tolist()]
labels = [organisms[i] for i in order]
fig = plt.figure(figsize=(8.0, 4.0))
ax = fig.add_subplot(111)
# The color scheme was generated with the 'Gecos' software
graphics.plot_alignment_type_based(
ax, alignment, labels=labels, symbols_per_line=45, spacing=2,
show_numbers=True, color_scheme="flower"
)
# Organism names in italic
ax.set_yticklabels(ax.get_yticklabels(), fontdict={"fontstyle":"italic"})
fig.tight_layout()
plt.show()
# and check if feature is lacA gene (begin of lac operon)
if "gene" in feature.qual \
and "pseudo" not in feature.qual \
and feature.qual["gene"] == "lacA":
if min_loc > loc.first:
min_loc = loc.first
if max_loc < loc.last:
max_loc = loc.last
# Extend the location range by 1000 (arbitrary) in each direction
min_loc -= 10000
max_loc += 10000
# Visualize the region as feature map
fig = plt.figure(figsize=(8.0, 8.0))
ax = fig.add_subplot(111)
graphics.plot_feature_map(
ax, annotation, loc_range=(min_loc, max_loc), symbols_per_line=2000,
show_numbers=True, show_line_position=True
)
fig.tight_layout()
plt.show()