How to use the pygtftk.utils.chomp function in pygtftk

To help you get started, we’ve selected a few pygtftk examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dputhier / pygtftk / pygtftk / plugins / heatmap.py View on Github external
#
    # -------------------------------------------------------------------------

    tx_to_class = OrderedDict()
    tx_class_list = OrderedDict()

    if y_factor == 'tx_classes':
        if transcript_file is not None:

            message("Reading transcript class file (--transcript_file).")

            for line in transcript_file:
                if line in ('\n', '\r\n'):
                    continue

                line = chomp(line)
                fields = line.split("\t")
                tx_name = fields[0].strip()

                try:
                    tx_to_class[tx_name] = chomp(fields[1])
                    tx_class_list[fields[1]] = 1
                except:
                    message("The file provided to --target-tx-file"
                            " should contain two columns (transcript and "
                            "class).", type="ERROR")

            nb_class = len(list(set(tx_class_list.keys())))

            if nb_class == 0:
                message("No transcript found in file provided through "
                        "--target-tx-file.", type="ERROR")
github dputhier / pygtftk / pygtftk / plugins / col_from_tab.py View on Github external
invert_match=False,
                 no_header=False,
                 unique=False,
                 separator=None):
    """Select columns from a tabulated file based on their names."""

    line_set = dict()

    if re.search(",", columns):
        columns = columns.split(",")
    else:
        columns = [columns]

    for p, line in enumerate(inputfile):

        line = chomp(line)
        line = line.split(separator)

        if p == 0:

            if not invert_match:

                pos_list = list()

                for i in range(len(columns)):

                    pos = line.index(columns[i]) if columns[i] in line else -1

                    if pos > -1:
                        pos_list.append(pos)
                    else:
                        message("Column " + columns[i] + " not found",
github dputhier / pygtftk / pygtftk / plugins / intronic.py View on Github external
gtf = GTF(inputfile, check_ensembl_format=False)

    if not by_transcript:
        introns_bo = gtf.get_introns()

        for i in introns_bo:
            write_properly(chomp(str(i)), outputfile)
    else:

        introns_bo = gtf.get_introns(by_transcript=True,
                                     name=names.split(","),
                                     sep=separator,
                                     intron_nb_in_name=intron_nb_in_name,
                                     feat_name=not no_feature_name)
        for i in introns_bo:
            write_properly(chomp(str(i)), outputfile)

    gc.disable()
    close_properly(outputfile, inputfile)
github dputhier / pygtftk / pygtftk / cmd_manager.py View on Github external
def __call__(self, parser, namespace, values, option_string=None):
        from pandas import __version__ as pandas_ver
        from pybedtools import __version__ as pybedtools_ver
        from pyBigWig import __version__ as bigwig_ver
        from pygtftk import __path__ as pygtftk_path
        import subprocess
        from pygtftk.utils import chomp
        info_sys = []
        info_sys += ['\n- pygtftk version : ' + __version__]
        info_sys += ['- pygtftk path : ' + pygtftk_path[0]]
        info_sys += ['- python version : ' + str(sys.version_info)]
        info_sys += ['- python path : ' + str(sys.prefix)]
        info_sys += ['- pandas version : ' + pandas_ver]
        bedtools_ver = chomp(subprocess.Popen("bedtools --version",
                                              shell=True,
                                              stdout=subprocess.PIPE).stdout.read().decode())
        info_sys += ['- Bedtools version : ' + bedtools_ver]
        info_sys += ['- pybedtools version : ' + pybedtools_ver]
        info_sys += ['- pyBigWig version : ' + bigwig_ver]
        info_sys += ['- uname : ' + str(os.uname())]
        print("\n".join(info_sys))
        sys.exit()
github dputhier / pygtftk / pygtftk / plugins / heatmap.py View on Github external
input_file_tx = set()
    infile_chrom = set()
    infile_bwig = set()
    header = ""

    for line_number, line in enumerate(inputfile_main):

        # comment (line 0)
        if line_number == 0:
            header = chomp(line.lstrip("#"))
            header = header.rstrip(";")
            continue
        # skip header (line 1)
        elif line_number > 1:
            line = chomp(line)
            field = line.split("\t")
            tx_id = field[4]
            chrom = field[1]
            input_file_tx.add(tx_id)
            infile_chrom.add(chrom)
            infile_bwig.add(field[0])

    message("BigWigs found : " + ",".join(list(infile_bwig)))

    # -------------------------------------------------------------------------
    #
    # Parse the header
    #
    # -------------------------------------------------------------------------
    header = [x.split(":") for x in header.split(";")]
github dputhier / pygtftk / pygtftk / gtf_interface.py View on Github external
message("Calling add_attr_from_matrix_file", type="DEBUG")

        if feat is None:
            feat = ",".join(self.get_feature_list(nr=True))

        if inputfile is None:
            raise GTFtkError("Need an input/join file.")

        if isinstance(inputfile, io.IOBase):
            inputfile = inputfile.name

        id_to_val = defaultdict(lambda: defaultdict(list))

        for line_nb, line in enumerate(open(inputfile, "r")):

            line = chomp(line)

            if line_nb == 0:
                tokens = line.split("\t")

                if len(tokens) < 2:
                    raise GTFtkError(
                        "Found less than 2 columns. Is the file tabulated ?")
                key_names = tokens[1:]
            else:
                token = line.split("\t")

                if len(token) < 2:
                    raise GTFtkError(
                        "Unable to split the line. Is the file tabulated ?")
                if len(token[1:]) != len(key_names):
                    raise GTFtkError(
github dputhier / pygtftk / pygtftk / plugins / control_list.py View on Github external
def control_list(in_file=None,
                 out_dir=None,
                 referenceGeneFile=None,
                 log2=False,
                 pseudo_count=1,
                 tmp_dir=None,
                 logger_file=None,
                 verbosity=None):
    for p, line in enumerate(in_file):

        line = chomp(line)
        line = line.split("\t")

        try:
            fl = float(line[1])

            if log2:
                fl = fl + pseudo_count
                if fl <= 0:
                    message("Can not log transform negative/zero values.",
                            type="ERROR")

        except:
            msg = "It seems that column 2 of input file"
            msg += " contains non numeric values."
            msg += "Check that no header is present and that "
            msg += "columns are ordered properly."