How to use the pygtftk.utils.write_properly function in pygtftk

To help you get started, we’ve selected a few pygtftk examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dputhier / pygtftk / pygtftk / plugins / midpoints.py View on Github external
type="ERROR")

        if region_bo.file_type == 'gff':
            is_gtf = True
        else:
            is_gtf = False

    if is_gtf:

        gtf = GTF(inputfile.name, check_ensembl_format=False)

        bed_obj = gtf.select_by_key("feature",
                                    ft_type).get_midpoints(name=names.split(","),
                                                           sep=separator)
        for line in bed_obj:
            write_properly(chomp(str(line)), outputfile)

    else:
        for line in region_bo:

            diff = line.end - line.start

            if diff % 2 != 0:
                # e.g 10-13 (zero based) -> 11-13 one based
                # mipoint is 12 (one-based) -> 11-12 (zero based)
                # e.g 949-1100 (zero based) -> 950-1100 one based
                # mipoint is 1025 (one-based) -> 1024-1025 (zero based)
                # floored division (python 2)...
                line.end = line.start + int(diff // 2) + 1
                line.start = line.end - 1
            else:
                # e.g 10-14 (zero based) -> 11-14 one based
github dputhier / pygtftk / pygtftk / plugins / feat_size.py View on Github external
tx_size = gtf.get_transcript_size()

        if bed:
            bed_obj = gtf.select_by_key("feature",
                                        'transcript').to_bed(['transcript_id'] + names,
                                                             add_feature_type=False,
                                                             sep=separator,
                                                             more_name=['mature_rna'])

            for i in bed_obj:
                names = i.name.split(separator)
                tx_id = names.pop(0)
                i.score = tx_size[tx_id]
                i.name = separator.join(names)
                write_properly(chomp(str(i)), outputfile)
        else:

            if len(tx_size):
                gtf = gtf.add_attr_from_dict(feat="transcript",
                                             key="transcript_id",
                                             a_dict=tx_size,
                                             new_key=key_name)

            gtf.write(outputfile, gc_off=True)

    close_properly(outputfile, inputfile)
github dputhier / pygtftk / pygtftk / plugins / select_by_go.py View on Github external
""" Select lines from a GTF file based using a Gene Ontology ID (e.g GO:0050789).
    """

    if not go_id.startswith("GO:"):
        go_id = "GO:" + go_id

    is_associated = OrderedDict()

    bm = Biomart(http_proxy=http_proxy,
                 https_proxy=https_proxy)

    bm.get_datasets('ENSEMBL_MART_ENSEMBL')

    if list_datasets:
        for i in sorted(bm.datasets):
            write_properly(i.replace("_gene_ensembl", ""), outputfile)
        sys.exit()
    else:
        if species + "_gene_ensembl" not in bm.datasets:
            message("Unknow dataset/species.", type="ERROR")

    bm.query({'query': XML.format(species=species, go=go_id)})

    for i in bm.response.content.decode().split("\n"):
        i = i.rstrip("\n")
        if i != '':
            is_associated[i] = 1

    gtf = GTF(inputfile)

    gtf_associated = gtf.select_by_key("gene_id",
                                       ",".join(list(is_associated.keys())),
github dputhier / pygtftk / pygtftk / plugins / bed_to_gtf.py View on Github external
def bed_to_gtf(
        inputfile=None,
        outputfile=None,
        ft_type="transcript",
        source="Unknown"):
    """
 Convert a bed file to a gtf. This will make the poor bed feel as if it was a
 nice gtf (but with lots of empty fields...). May be helpful sometimes...
    """

    message("Converting the bed file into GTF file.")

    if inputfile.name == '':
        tmp_file = make_tmp_file(prefix="input_bed", suffix=".bed")
        for i in inputfile:
            write_properly(chomp(str(i)), tmp_file)

        tmp_file.close()
        inputfile.close()

        bed_obj = BedTool(tmp_file.name)
    else:
        bed_obj = BedTool(inputfile.name)

    n = 1
    for i in bed_obj:

        if i.strand == "":
            i.strand = "."
        if i.name == "":
            i.name = str("feature_" + str(n))
        if i.score == "":
github dputhier / pygtftk / pygtftk / Line.py View on Github external
def write(self, file_out="-"):
        """Write Feature to a file or stdout (if file is '-').

        :Example:

        >>> from pygtftk.utils import get_example_feature
        >>> from pygtftk.utils import make_tmp_file
        >>> feat = get_example_feature()
        >>> tmp_file =  make_tmp_file()
        >>> feat.write(tmp_file)
        >>> tmp_file.close()
        >>> from pygtftk.utils import  simple_line_count
        >>> assert simple_line_count(tmp_file) == 1
        """

        pygtftk.utils.write_properly(self.format(), file_out)
github dputhier / pygtftk / pygtftk / plugins / col_from_tab.py View on Github external
pos_list.remove(pos)
                    else:
                        message("Column " + columns[i] + " not found",
                                type="ERROR")

            if not no_header:
                out = separator.join([line[k] for k in pos_list])
                write_properly(out, outputfile)
        else:
            out = separator.join([line[k] for k in pos_list])
            if unique:
                if out not in line_set:
                    write_properly(out, outputfile)
                    line_set[out] = 1
            else:
                write_properly(out, outputfile)
github dputhier / pygtftk / pygtftk / plugins / get_5p_3p_coords.py View on Github external
else:

        bed_obj = gtf.get_3p_end(feat_type=ft_type,
                                 name=nms,
                                 sep=separator,
                                 more_name=more_names,
                                 explicit=explicit)

    if not len(bed_obj):
        message("Requested feature could not be found. Use convert_ensembl maybe.",
                type="ERROR")

    if transpose == 0:
        for i in bed_obj:
            write_properly(chomp(str(i)), outputfile)
    else:
        for i in bed_obj:
            out_list = list()
            if i.strand == "+":
                out_list = [i.chrom,
                            str(i.start + transpose),
                            str(i.end + transpose),
                            i.name,
                            i.score,
                            i.strand]
            elif i.strand == "-":
                out_list = [i.chrom,
                            str(i.start - transpose),
                            str(i.end - transpose),
                            i.name,
                            i.score,
github dputhier / pygtftk / pygtftk / plugins / col_from_tab.py View on Github external
pos_list = list(range(len(line)))

                for i in range(len(columns)):

                    pos = line.index(columns[i]) if columns[i] in line else -1

                    if pos > -1:
                        pos_list.remove(pos)
                    else:
                        message("Column " + columns[i] + " not found",
                                type="ERROR")

            if not no_header:
                out = separator.join([line[k] for k in pos_list])
                write_properly(out, outputfile)
        else:
            out = separator.join([line[k] for k in pos_list])
            if unique:
                if out not in line_set:
                    write_properly(out, outputfile)
                    line_set[out] = 1
            else:
                write_properly(out, outputfile)