How to use the pygtftk.utils.message function in pygtftk

To help you get started, we’ve selected a few pygtftk examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dputhier / pygtftk / pygtftk / plugins / divergent.py View on Github external
downstream=1500,
        chrom_info=None,
        no_strandness=False,
        no_annotation=False):
    """
Find transcript with divergent promoters.
    """

    message("Using -u " + str(upstream) + ".")
    message("Using -d " + str(downstream) + ".")

    tx_with_divergent = dict()
    dist_to_divergent = dict()
    tss_pos = dict()

    message("Loading GTF.")

    gtf = GTF(inputfile)

    message("Getting transcript coordinates.")

    tx_feat = gtf.select_by_key("feature",
                                "transcript")
    message("Getting tss coordinates.")

    tss_bo = tx_feat.get_tss(name=["transcript_id", "gene_id"],
                             sep="||")

    # get tss position
    for i in tss_bo:
        tx_id_tss, gn_id_tss = i.name.split("||")
        tss_pos[tx_id_tss] = int(i.start)
github dputhier / pygtftk / pygtftk / plugins / rm_dup_tss.py View on Github external
for line in sorted(lines, key=operator.itemgetter(3)):
        tmp_file.write('\t'.join(line))

    tmp_file.close()

    tss_bo = BedTool(tmp_file.name)

    # ----------------------------------------------------------------------
    # Get the list of non redundant TSSs
    # ----------------------------------------------------------------------

    gene_dict = defaultdict(dict)
    to_delete = []

    message("Looking for redundant TSS (gene-wise).")

    for line in tss_bo:

        tss = line.start
        name = line.name
        gene_id, tx_id = name.split("|")

        if gene_id in gene_dict:
            if tss not in gene_dict[gene_id]:
                gene_dict[gene_id][tss] = tx_id
            else:
                to_delete += [tx_id]
        else:
            gene_dict[gene_id][tss] = tx_id

    message("Deleted transcripts: " + ",".join(to_delete[1:min(10,
github dputhier / pygtftk / pygtftk / plugins / profile.py View on Github external
# -------------------------------------------------------------------------
    #
    # Colors orders
    #
    # -------------------------------------------------------------------------

    if color_order is None:
        if group_by == 'bwig':
            color_order = ",".join(input_file_bwig)
        elif group_by == 'tx_classes':
            color_order = ",".join(class_list)
        elif group_by == 'chrom':
            color_order = ",".join(list(input_file_chrom))
        else:
            message("color_order is undefined.", type="ERROR")
        color_order = color_order.split(",")

    else:
        color_order = color_order.split(",")
        color_order_pb = False

        if group_by == 'bwig':
            if len(color_order) != len(input_file_bwig):
                color_order_pb = True
            if len(set(color_order)) != len(set(input_file_bwig)):
                color_order_pb = True
            for co in color_order:
                if co not in input_file_bwig:
                    color_order_pb = True

        elif group_by == 'tx_classes':
github dputhier / pygtftk / pygtftk / plugins / alt_prom.py View on Github external
Takes a GTF as input to search for genes with alternative promoters.
    """

    # -------------------------------------------------------------------------
    # Create a list of labels.
    # Take user input in account
    # -------------------------------------------------------------------------

    bed_list = [x.name for x in bed_list]

    if len(bed_list) != len(set(bed_list)):
        message("Found the same BED file several times.",
                type="ERROR")

    if len(bed_list) < 2:
        message("At least two bed files are needed.",
                type="ERROR")

    message('Checking labels.')

    if labels is not None:
        labels = labels.split(",")
        # Ensure the number of labels is the same as the number of bed files.
        if len(labels) != len(bed_list):
            message("The number of labels should be the same as the number of"
                    " bed files.", type="ERROR")
        # Ensure labels are non-redondant
        if len(labels) > len(set(labels)):
            message("Labels must be unique.", type="ERROR")
    else:
        labels = []
        for i in range(len(bed_list)):
github dputhier / pygtftk / pygtftk / plugins / ologram.py View on Github external
warnings.filterwarnings("ignore", category=matplotlib.cbook.MatplotlibDeprecationWarning)

    def fxn():
        warnings.warn("deprecated", DeprecationWarning)

    # -------------------------------------------------------------------------
    # Saving
    # -------------------------------------------------------------------------

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        fxn()

        message("Saving diagram to file : " + pdf_file.name)
        message("Be patient. This may be long for large datasets.")

        # NOTE : We must manually specify figure size with save_as_pdf_pages
        save_as_pdf_pages(filename=pdf_file.name,
                          plots=[p1 + theme(figure_size=figsize),
                                 p2 + theme(figure_size=figsize),
                                 p3 + theme(figure_size=figsize)],
                          width=pdf_width,
                          height=pdf_height)

    gc.disable()
github dputhier / pygtftk / pygtftk / plugins / bed_to_gtf.py View on Github external
def bed_to_gtf(
        inputfile=None,
        outputfile=None,
        ft_type="transcript",
        source="Unknown"):
    """
 Convert a bed file to a gtf. This will make the poor bed feel as if it was a
 nice gtf (but with lots of empty fields...). May be helpful sometimes...
    """

    message("Converting the bed file into GTF file.")

    if inputfile.name == '':
        tmp_file = make_tmp_file(prefix="input_bed", suffix=".bed")
        for i in inputfile:
            write_properly(chomp(str(i)), tmp_file)

        tmp_file.close()
        inputfile.close()

        bed_obj = BedTool(tmp_file.name)
    else:
        bed_obj = BedTool(inputfile.name)

    n = 1
    for i in bed_obj:
github dputhier / pygtftk / pygtftk / plugins / discretize_key.py View on Github external
message("The number of labels should be the same as the number of levels.",
                    type="ERROR")
        if len(labels) != len(set(labels)):
            message("Redundant labels not allowed.", type="ERROR")

    # -------------------------------------------------------------------------
    #
    # Load GTF. Retrieve values for src-key
    #
    # -------------------------------------------------------------------------

    gtf = GTF(inputfile, check_ensembl_format=False)
    src_values = gtf.extract_data(src_key, as_list=True)

    if len([x for x in src_values if x not in ['.', '?']]) == 0:
        message('The key was not found in this GTF.',
                type="ERROR")

    min_val = None
    max_val = None

    dest_values = []
    dest_pos = []

    for p, v in enumerate(src_values):
        try:
            a = float(v)
            if min_val is not None:
                if a > max_val:
                    max_val = a
                if a < min_val:
                    min_val = a
github dputhier / pygtftk / pygtftk / plugins / retrieve.py View on Github external
message("Successfully change directory to " + ensembl_collection)
        except:
            message("Unable to change directory to '%s'." % ensembl_collection,
                    type="ERROR")

    try:
        all_releases = ftp.listdir(ftp.curdir)
    except Exception as e:
        print(str(e))
        message("Unable to list directory.",
                type="ERROR")

    if release is not None:
        release_dir = "release-" + release
        if release_dir not in all_releases:
            message("This release number could not be found. Aborting",
                    type="ERROR")
    else:

        version_list = []

        for ver in all_releases:
            regexp = re.compile("release-(\d+)")
            hit = regexp.search(ver)
            if hit:
                version_list += [int(hit.group(1))]
        release = max(version_list)
        release_dir = "release-" + str(release)
        message("Latest version is %d." % release)

    try:
        ftp.chdir(release_dir)
github dputhier / pygtftk / pygtftk / plugins / get_example.py View on Github external
for i in file_dataset:
            print("\t" + i)
    else:
        if format == "gtf":
            example_file = get_example_file(datasetname=dataset,
                                            ext=format)

            if example_file:
                example_file = example_file[0]
            else:
                example_file = get_example_file(datasetname=dataset,
                                                ext=format + ".gz")
                if example_file:
                    example_file = example_file[0]
                else:
                    message("No GTF file found for this dataset.",
                            type="ERROR")
            GTF(example_file, check_ensembl_format=False).write(outputfile, gc_off=True)

        elif format in ["fa", "join", "join_mat", "genome", "chromInfo", "genes", "geneList"]:
            try:
                infile = open(get_example_file(datasetname=dataset,
                                               ext=format)[0], "r")
            except:
                message("Unable to find example file.", type="ERROR")

            for line in infile:
                outputfile.write(line)

        elif format == "*":

            file_path = glob.glob(os.path.join(pygtftk.__path__[0],
github dputhier / pygtftk / pygtftk / gtf_interface.py View on Github external
def select_5p_transcript(self):
        """Select the most 5' transcript of each gene. Note that the genomic
        boundaries for each gene could differ from that displayed by its
        associated transcripts. The gene features may be subsequently deleted
        using select_by_key.

        :Example:

        >>> from  pygtftk.utils import get_example_file
        >>> from pygtftk.gtf_interface import GTF
        >>> a_file = get_example_file()[0]
        >>> a_gtf = GTF(a_file)

        """

        message("Calling select_5p_transcript.", type="DEBUG")

        new_data = self._dll.select_transcript(self._data, 3)

        return self._clone(new_data)