Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
type="ERROR")
if region_bo.file_type == 'gff':
is_gtf = True
else:
is_gtf = False
if is_gtf:
gtf = GTF(inputfile.name, check_ensembl_format=False)
bed_obj = gtf.select_by_key("feature",
ft_type).get_midpoints(name=names.split(","),
sep=separator)
for line in bed_obj:
write_properly(chomp(str(line)), outputfile)
else:
for line in region_bo:
diff = line.end - line.start
if diff % 2 != 0:
# e.g 10-13 (zero based) -> 11-13 one based
# mipoint is 12 (one-based) -> 11-12 (zero based)
# e.g 949-1100 (zero based) -> 950-1100 one based
# mipoint is 1025 (one-based) -> 1024-1025 (zero based)
# floored division (python 2)...
line.end = line.start + int(diff // 2) + 1
line.start = line.end - 1
else:
# e.g 10-14 (zero based) -> 11-14 one based
tx_size = gtf.get_transcript_size()
if bed:
bed_obj = gtf.select_by_key("feature",
'transcript').to_bed(['transcript_id'] + names,
add_feature_type=False,
sep=separator,
more_name=['mature_rna'])
for i in bed_obj:
names = i.name.split(separator)
tx_id = names.pop(0)
i.score = tx_size[tx_id]
i.name = separator.join(names)
write_properly(chomp(str(i)), outputfile)
else:
if len(tx_size):
gtf = gtf.add_attr_from_dict(feat="transcript",
key="transcript_id",
a_dict=tx_size,
new_key=key_name)
gtf.write(outputfile, gc_off=True)
close_properly(outputfile, inputfile)
""" Select lines from a GTF file based using a Gene Ontology ID (e.g GO:0050789).
"""
if not go_id.startswith("GO:"):
go_id = "GO:" + go_id
is_associated = OrderedDict()
bm = Biomart(http_proxy=http_proxy,
https_proxy=https_proxy)
bm.get_datasets('ENSEMBL_MART_ENSEMBL')
if list_datasets:
for i in sorted(bm.datasets):
write_properly(i.replace("_gene_ensembl", ""), outputfile)
sys.exit()
else:
if species + "_gene_ensembl" not in bm.datasets:
message("Unknow dataset/species.", type="ERROR")
bm.query({'query': XML.format(species=species, go=go_id)})
for i in bm.response.content.decode().split("\n"):
i = i.rstrip("\n")
if i != '':
is_associated[i] = 1
gtf = GTF(inputfile)
gtf_associated = gtf.select_by_key("gene_id",
",".join(list(is_associated.keys())),
def bed_to_gtf(
inputfile=None,
outputfile=None,
ft_type="transcript",
source="Unknown"):
"""
Convert a bed file to a gtf. This will make the poor bed feel as if it was a
nice gtf (but with lots of empty fields...). May be helpful sometimes...
"""
message("Converting the bed file into GTF file.")
if inputfile.name == '':
tmp_file = make_tmp_file(prefix="input_bed", suffix=".bed")
for i in inputfile:
write_properly(chomp(str(i)), tmp_file)
tmp_file.close()
inputfile.close()
bed_obj = BedTool(tmp_file.name)
else:
bed_obj = BedTool(inputfile.name)
n = 1
for i in bed_obj:
if i.strand == "":
i.strand = "."
if i.name == "":
i.name = str("feature_" + str(n))
if i.score == "":
def write(self, file_out="-"):
"""Write Feature to a file or stdout (if file is '-').
:Example:
>>> from pygtftk.utils import get_example_feature
>>> from pygtftk.utils import make_tmp_file
>>> feat = get_example_feature()
>>> tmp_file = make_tmp_file()
>>> feat.write(tmp_file)
>>> tmp_file.close()
>>> from pygtftk.utils import simple_line_count
>>> assert simple_line_count(tmp_file) == 1
"""
pygtftk.utils.write_properly(self.format(), file_out)
pos_list.remove(pos)
else:
message("Column " + columns[i] + " not found",
type="ERROR")
if not no_header:
out = separator.join([line[k] for k in pos_list])
write_properly(out, outputfile)
else:
out = separator.join([line[k] for k in pos_list])
if unique:
if out not in line_set:
write_properly(out, outputfile)
line_set[out] = 1
else:
write_properly(out, outputfile)
else:
bed_obj = gtf.get_3p_end(feat_type=ft_type,
name=nms,
sep=separator,
more_name=more_names,
explicit=explicit)
if not len(bed_obj):
message("Requested feature could not be found. Use convert_ensembl maybe.",
type="ERROR")
if transpose == 0:
for i in bed_obj:
write_properly(chomp(str(i)), outputfile)
else:
for i in bed_obj:
out_list = list()
if i.strand == "+":
out_list = [i.chrom,
str(i.start + transpose),
str(i.end + transpose),
i.name,
i.score,
i.strand]
elif i.strand == "-":
out_list = [i.chrom,
str(i.start - transpose),
str(i.end - transpose),
i.name,
i.score,
pos_list = list(range(len(line)))
for i in range(len(columns)):
pos = line.index(columns[i]) if columns[i] in line else -1
if pos > -1:
pos_list.remove(pos)
else:
message("Column " + columns[i] + " not found",
type="ERROR")
if not no_header:
out = separator.join([line[k] for k in pos_list])
write_properly(out, outputfile)
else:
out = separator.join([line[k] for k in pos_list])
if unique:
if out not in line_set:
write_properly(out, outputfile)
line_set[out] = 1
else:
write_properly(out, outputfile)