Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
#
# -------------------------------------------------------------------------
tx_to_class = OrderedDict()
tx_class_list = OrderedDict()
if y_factor == 'tx_classes':
if transcript_file is not None:
message("Reading transcript class file (--transcript_file).")
for line in transcript_file:
if line in ('\n', '\r\n'):
continue
line = chomp(line)
fields = line.split("\t")
tx_name = fields[0].strip()
try:
tx_to_class[tx_name] = chomp(fields[1])
tx_class_list[fields[1]] = 1
except:
message("The file provided to --target-tx-file"
" should contain two columns (transcript and "
"class).", type="ERROR")
nb_class = len(list(set(tx_class_list.keys())))
if nb_class == 0:
message("No transcript found in file provided through "
"--target-tx-file.", type="ERROR")
invert_match=False,
no_header=False,
unique=False,
separator=None):
"""Select columns from a tabulated file based on their names."""
line_set = dict()
if re.search(",", columns):
columns = columns.split(",")
else:
columns = [columns]
for p, line in enumerate(inputfile):
line = chomp(line)
line = line.split(separator)
if p == 0:
if not invert_match:
pos_list = list()
for i in range(len(columns)):
pos = line.index(columns[i]) if columns[i] in line else -1
if pos > -1:
pos_list.append(pos)
else:
message("Column " + columns[i] + " not found",
gtf = GTF(inputfile, check_ensembl_format=False)
if not by_transcript:
introns_bo = gtf.get_introns()
for i in introns_bo:
write_properly(chomp(str(i)), outputfile)
else:
introns_bo = gtf.get_introns(by_transcript=True,
name=names.split(","),
sep=separator,
intron_nb_in_name=intron_nb_in_name,
feat_name=not no_feature_name)
for i in introns_bo:
write_properly(chomp(str(i)), outputfile)
gc.disable()
close_properly(outputfile, inputfile)
def __call__(self, parser, namespace, values, option_string=None):
from pandas import __version__ as pandas_ver
from pybedtools import __version__ as pybedtools_ver
from pyBigWig import __version__ as bigwig_ver
from pygtftk import __path__ as pygtftk_path
import subprocess
from pygtftk.utils import chomp
info_sys = []
info_sys += ['\n- pygtftk version : ' + __version__]
info_sys += ['- pygtftk path : ' + pygtftk_path[0]]
info_sys += ['- python version : ' + str(sys.version_info)]
info_sys += ['- python path : ' + str(sys.prefix)]
info_sys += ['- pandas version : ' + pandas_ver]
bedtools_ver = chomp(subprocess.Popen("bedtools --version",
shell=True,
stdout=subprocess.PIPE).stdout.read().decode())
info_sys += ['- Bedtools version : ' + bedtools_ver]
info_sys += ['- pybedtools version : ' + pybedtools_ver]
info_sys += ['- pyBigWig version : ' + bigwig_ver]
info_sys += ['- uname : ' + str(os.uname())]
print("\n".join(info_sys))
sys.exit()
input_file_tx = set()
infile_chrom = set()
infile_bwig = set()
header = ""
for line_number, line in enumerate(inputfile_main):
# comment (line 0)
if line_number == 0:
header = chomp(line.lstrip("#"))
header = header.rstrip(";")
continue
# skip header (line 1)
elif line_number > 1:
line = chomp(line)
field = line.split("\t")
tx_id = field[4]
chrom = field[1]
input_file_tx.add(tx_id)
infile_chrom.add(chrom)
infile_bwig.add(field[0])
message("BigWigs found : " + ",".join(list(infile_bwig)))
# -------------------------------------------------------------------------
#
# Parse the header
#
# -------------------------------------------------------------------------
header = [x.split(":") for x in header.split(";")]
message("Calling add_attr_from_matrix_file", type="DEBUG")
if feat is None:
feat = ",".join(self.get_feature_list(nr=True))
if inputfile is None:
raise GTFtkError("Need an input/join file.")
if isinstance(inputfile, io.IOBase):
inputfile = inputfile.name
id_to_val = defaultdict(lambda: defaultdict(list))
for line_nb, line in enumerate(open(inputfile, "r")):
line = chomp(line)
if line_nb == 0:
tokens = line.split("\t")
if len(tokens) < 2:
raise GTFtkError(
"Found less than 2 columns. Is the file tabulated ?")
key_names = tokens[1:]
else:
token = line.split("\t")
if len(token) < 2:
raise GTFtkError(
"Unable to split the line. Is the file tabulated ?")
if len(token[1:]) != len(key_names):
raise GTFtkError(
def control_list(in_file=None,
out_dir=None,
referenceGeneFile=None,
log2=False,
pseudo_count=1,
tmp_dir=None,
logger_file=None,
verbosity=None):
for p, line in enumerate(in_file):
line = chomp(line)
line = line.split("\t")
try:
fl = float(line[1])
if log2:
fl = fl + pseudo_count
if fl <= 0:
message("Can not log transform negative/zero values.",
type="ERROR")
except:
msg = "It seems that column 2 of input file"
msg += " contains non numeric values."
msg += "Check that no header is present and that "
msg += "columns are ordered properly."