Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_annotation_concatenation():
feature1 = Feature("CDS", [seq.Location(1,1)], qual={"gene" : "test1"})
feature2 = Feature("CDS", [seq.Location(2,2)], qual={"gene" : "test2"})
annot1 = Annotation([feature1, feature2])
feature3 = Feature("CDS", [seq.Location(3,3)], qual={"gene" : "test3"})
feature4 = Feature("CDS", [seq.Location(4,4)], qual={"gene" : "test4"})
annot2 = Annotation([feature3, feature4])
feature5 = Feature("CDS", [seq.Location(5,5)], qual={"gene" : "test5"})
concat = annot1 + annot2 + feature5
assert set([f.qual["gene"] for f in concat]) \
== set(["test1", "test2", "test3", "test4", "test5"])
def test_genbank_utility_gp():
"""
Check whether the high-level utility functions return the expected
content of a known GenPept file.
"""
gp_file = gb.GenBankFile.read(join(data_dir("sequence"), "bt_lysozyme.gp"))
#[print(e) for e in gp_file._field_pos]
assert gb.get_locus(gp_file) \
== ("AAC37312", 147, "", False, "MAM", "27-APR-1993")
assert gb.get_definition(gp_file) == "lysozyme [Bos taurus]."
assert gb.get_version(gp_file) == "AAC37312.1"
assert gb.get_gi(gp_file) == 163334
annotation = gb.get_annotation(gp_file)
feature = seq.Feature(
"Site",
[seq.Location(start, stop) for start, stop in zip(
[52,55,62,76,78,81,117,120,125],
[53,55,62,76,78,81,117,120,126]
)],
{"note": "lysozyme catalytic cleft [active]", "site_type": "active"}
)
in_annotation = False
for f in annotation:
if f.key == feature.key and f.locs == feature.locs and \
all([(key, val in f.qual.items())
for key, val in feature.qual.items()]):
in_annotation = True
assert in_annotation
assert len(gb.get_sequence(gp_file, format="gp")) == 147
Test whether the same annotation (if reasonable) can be read from a
GFF3 file and a GenBank file.
"""
file = gb.GenBankFile.read(join(data_dir("sequence"), path))
ref_annot = gb.get_annotation(file)
file = gff.GFFFile.read(join(data_dir("sequence"), path[:-3] + ".gff3"))
test_annot = gff.get_annotation(file)
# Remove qualifiers, since they will be different
# in GFF3 and GenBank
ref_annot = seq.Annotation(
[seq.Feature(feature.key, feature.locs) for feature in ref_annot]
)
test_annot = seq.Annotation(
[seq.Feature(feature.key, feature.locs) for feature in test_annot]
)
for feature in test_annot:
# Only CDS, gene, intron and exon should be equal
# in GenBank and GFF3
if feature.key in ["CDS", "gene", "intron", "exon"]:
try:
assert feature in test_annot
except AssertionError:
print(feature.key)
for loc in feature.locs:
print(loc)
raise
def test_annotation_indexing():
feature1 = Feature("CDS", [Location(-10,30 )], qual={"gene" : "test1"})
feature2 = Feature("CDS", [Location(20, 50 )], qual={"gene" : "test2"})
feature3 = Feature("CDS", [Location(100,130)], qual={"gene" : "test3"})
feature4 = Feature("CDS", [Location(150,250)], qual={"gene" : "test4"})
feature5 = Feature("CDS", [Location(-50,200)], qual={"gene" : "test5"})
annotation = Annotation([feature1,feature2,feature3,feature4,feature5])
sub_annot = annotation[40:150]
# Only one location per feature
assert set([list(f.locs)[0].defect for f in sub_annot]) \
== set([Location.Defect.MISS_LEFT, Location.Defect.NONE,
(Location.Defect.MISS_LEFT | Location.Defect.MISS_RIGHT)])
assert set([f.qual["gene"] for f in sub_annot]) \
== set(["test2", "test3", "test5"])
Feature map of a synthetic operon
=================================
This script shows how to create a picture of an synthetic operon for
publication purposes.
"""
# Code source: Patrick Kunzmann
# License: BSD 3 clause
import matplotlib.pyplot as plt
from biotite.sequence import Annotation, Feature, Location
import biotite.sequence.graphics as graphics
strand = Location.Strand.FORWARD
prom = Feature("regulatory", [Location(10, 50, strand)],
{"regulatory_class" : "promoter",
"note" : "T7"})
rbs1 = Feature("regulatory", [Location(60, 75, strand)],
{"regulatory_class" : "ribosome_binding_site",
"note" : "RBS1"})
gene1 = Feature("gene", [Location(81, 380, strand)],
{"gene" : "gene1"})
rbs2 = Feature("regulatory", [Location(400, 415, strand)],
{"regulatory_class" : "ribosome_binding_site",
"note" : "RBS2"})
gene2 = Feature("gene", [Location(421, 1020, strand)],
{"gene" : "gene2"})
term = Feature("regulatory", [Location(1050, 1080, strand)],
{"regulatory_class" : "terminator"})
annotation = Annotation([prom, rbs1, gene1, rbs2, gene2, term])
plasmid map by using a custom 'toy' :class:`Annotation`.
"""
# Code source: Patrick Kunzmann
# License: BSD 3 clause
import matplotlib.pyplot as plt
import numpy as np
import biotite.sequence as seq
import biotite.sequence.io.genbank as gb
import biotite.sequence.graphics as graphics
import biotite.database.entrez as entrez
annotation = seq.Annotation([
seq.Feature(
"source",
[seq.Location(0, 1500)],
{"organism": "Escherichia coli"}
),
# Ori
seq.Feature(
"rep_origin",
[seq.Location(600, 700, seq.Location.Strand.REVERSE)],
{"regulatory_class": "promoter", "note": "MyProm"}
),
# Promoter
seq.Feature(
"regulatory",
[seq.Location(1000, 1060)],
seq.Feature(
"regulatory",
[seq.Location(310, 350)],
{"regulatory_class": "terminator", "note": "MyTerm"}
),
# Primers
# The labels will be too long to be displayed on the map
# If you want to display them nevertheless, set the
# 'omit_oversized_labels' to False
seq.Feature(
"primer_bind",
[seq.Location(1385, 1405)],
{"note": "geneC"}
),
seq.Feature(
"primer_bind",
[seq.Location(345, 365, seq.Location.Strand.REVERSE)],
{"note": "geneC_R"}
),
# Terminator
seq.Feature(
"regulatory",
[seq.Location(310, 350)],
{"regulatory_class": "terminator", "note": "MyTerm"}
),
])
fig = plt.figure(figsize=(8.0, 8.0))
ax = fig.add_subplot(111, projection="polar")
[seq.Location(1000, 1060)],
{"regulatory_class": "promoter", "note": "MyProm"}
),
seq.Feature(
"protein_bind",
[seq.Location(1025, 1045)],
{"note": "repr"}
),
# Gene A
seq.Feature(
"regulatory",
[seq.Location(1070, 1080)],
{"regulatory_class": "ribosome_binding_site"}
),
seq.Feature(
"CDS",
[seq.Location(1091, 1150)],
{"product": "geneA"}
),
# Gene B
seq.Feature(
"regulatory",
[seq.Location(1180, 1190)],
{"regulatory_class": "ribosome_binding_site"}
),
seq.Feature(
"CDS",
[seq.Location(1201, 1350)],
{"product": "geneB"}
),
else:
draw_head = True
axes.add_patch(biotite.AdaptiveFancyArrow(
x, y, dx, dy,
self._tail_width*bbox.height, self._head_width*bbox.height,
# Create head with 90 degrees tip
# -> head width/length ratio = 1/2
head_ratio=0.5, draw_head=draw_head,
color=biotite.colors["orange"], linewidth=0
))
# Test our drawing functions with example annotation
annotation = seq.Annotation([
seq.Feature("SecStr", [seq.Location(10, 40)], {"sec_str_type" : "helix"}),
seq.Feature("SecStr", [seq.Location(60, 90)], {"sec_str_type" : "sheet"}),
])
fig = plt.figure(figsize=(8.0, 0.8))
ax = fig.add_subplot(111)
graphics.plot_feature_map(
ax, annotation, multi_line=False, loc_range=(1,100),
# Register our drawing functions
feature_plotters=[HelixPlotter(), SheetPlotter()]
)
fig.tight_layout()
########################################################################
# Now let us do some serious application.
# We want to visualize the secondary structure of one monomer of the
# homodimeric transketolase (PDB: 1QGD).