How to use the pygtftk.utils.GTFtkError function in pygtftk

To help you get started, we’ve selected a few pygtftk examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dputhier / pygtftk / pygtftk / gtf_interface.py View on Github external
"""

        if keys is None:
            raise GTFtkError("Please provide a key.")

        if [as_list, as_dict, as_dict_of_lists,
            as_list_of_list,
            as_dict_of_values, as_dict_of_merged_list].count(True) > 1:
            msg = "Choose between as_list, as_dict_of_values, as_dict_of_merged_list, as_dict_of_list or as_dict"
            raise GTFtkError(msg)

        if not isinstance(keys, list):
            if isinstance(keys, str):
                keys = keys.split(",")
            else:
                raise GTFtkError("Please provide a key as str or list.")

        if zero_based:
            base = 0
        else:
            base = 1

        if nr:
            nr = 1
        else:
            nr = 0

        keys = [x if x not in ['chrom', 'chr'] else 'seqid' for x in keys]
        keys_csv = ",".join(keys)

        message("Calling extract_data (" + ",".join(keys) + ").", type="DEBUG")
github dputhier / pygtftk / pygtftk / gtf_interface.py View on Github external
>>> assert a_gtf.select_by_key("feature", "transcript").extract_data("bla", as_list=True, nr=False, hide_undef=False).count('?') == 15
        >>> assert a_gtf.select_by_key("feature", "transcript").extract_data("bla", as_list=True, nr=True, hide_undef=False).count('?') == 1
        >>> assert len(a_gtf.select_by_key("feature", "transcript").extract_data("start", as_dict=True)) == 11
        >>> assert len(a_gtf.select_by_key("feature", "transcript").extract_data("seqid", as_dict=True)) == 1
        >>> assert [len(x) for x in a_gtf.select_by_key("feature", "transcript").extract_data("seqid,start", as_list_of_list=True)].count(2) == 15
        >>> assert len(a_gtf.select_by_key("feature", "transcript").extract_data("seqid,start", as_list_of_list=True, nr=True)) == 11
        """

        if keys is None:
            raise GTFtkError("Please provide a key.")

        if [as_list, as_dict, as_dict_of_lists,
            as_list_of_list,
            as_dict_of_values, as_dict_of_merged_list].count(True) > 1:
            msg = "Choose between as_list, as_dict_of_values, as_dict_of_merged_list, as_dict_of_list or as_dict"
            raise GTFtkError(msg)

        if not isinstance(keys, list):
            if isinstance(keys, str):
                keys = keys.split(",")
            else:
                raise GTFtkError("Please provide a key as str or list.")

        if zero_based:
            base = 0
        else:
            base = 1

        if nr:
            nr = 1
        else:
            nr = 0
github dputhier / pygtftk / pygtftk / gtf_interface.py View on Github external
>>> from  pygtftk.utils import get_example_file
        >>> from pygtftk.gtf_interface import GTF
        >>> a_file = get_example_file()[0]
        >>> a_gtf = GTF(a_file)
        >>> a_dict = a_gtf.nb_exons()
        >>> a_gtf = a_gtf.add_attr_from_dict(feat="transcript", a_dict=a_dict, new_key="exon_nb")
        >>> b_dict = a_gtf.select_by_key("feature", "transcript").extract_data("transcript_id,exon_nb", as_dict_of_values=True)
        >>> assert a_dict['G0006T001'] == int(b_dict['G0006T001'])
        >>> assert a_dict['G0008T001'] == int(b_dict['G0008T001'])
        """

        message("Calling add_attr_from_dict", type="DEBUG")

        if len(a_dict) == 0:
            raise GTFtkError(
                "Need some data to join.")

        tmp_file = make_tmp_file("add_attr_from_dict", ".txt")

        for i, j in list(a_dict.items()):
            if isinstance(j, list):
                j = ",".join([str(x) for x in j])
            tmp_file.write("\t".join([str(i), str(j)]) + "\n")
        tmp_file.close()

        new_data = self._dll.add_attributes(self._data,
                                            native_str(feat),
                                            native_str(key),
                                            native_str(new_key),
                                            native_str(tmp_file.name))
github dputhier / pygtftk / pygtftk / gtf_interface.py View on Github external
>>> from  pygtftk.utils import get_example_file
        >>> from pygtftk.gtf_interface import GTF
        >>> a_file = get_example_file()[0]
        >>> chr_info_path = get_example_file(ext="chromInfo")[0]
        >>> chr_info_file = open(chr_info_path, "r")
        >>> a_gtf = GTF(a_file)
        >>> a_bed = a_gtf.get_intergenic(chrom_file=chr_info_file)
        >>> assert len(a_bed) == 10

        """

        message("Calling 'get_intergenic'.", type="DEBUG")

        if not isinstance(chrom_file, io.IOBase):
            raise GTFtkError('chrom_file should be a file object.')

        if not os.path.exists(chrom_file.name):
            raise GTFtkError('chrom_file could not be found.')

        gtf = self.select_by_key("feature",
                                 "transcript")

        tx_bo = gtf.to_bed(name=["gene_id",
                                 "transcript_id"]).slop(s=True,
                                                        l=upstream,
                                                        r=downstream,
                                                        g=chrom_file.name).cut([0, 1,
                                                                                2, 3,
                                                                                4, 5])

        if chr_list is None:
github dputhier / pygtftk / pygtftk / gtf_interface.py View on Github external
parsed_exp_str = flatten_list_recur(parsed_exp.asList())

        result = []
        pos = 0

        for i in tab:
            if not any([True if x in na_omit else False for x in i]):
                try:
                    [float(x) for x in i]
                    if eval(parsed_exp_str):
                        result += [pos]
                except:
                    msg = "Found non numeric values in: '%s'." % ",".join(
                        i)
                    GTFtkError(msg)
            pos += 1
        # Call C function

        if len(result) < 1:
            tmp_f = make_tmp_file()
            a_gtf = GTF(tmp_f.name, check_ensembl_format=False)
            a_gtf.fn = self.fn
            return a_gtf

        return self.select_by_positions(result)
github dputhier / pygtftk / pygtftk / gtf_interface.py View on Github external
group_1 = identifier + comparison_operator + value
        group_2 = value + comparison_operator + identifier
        comparison = group_1 | group_2
        boolean_expr = operatorPrecedence(comparison,
                                          [(and_operator, 2, opAssoc.LEFT),
                                           (or_operator, 2, opAssoc.LEFT)])

        boolean_expr_par = lparen + boolean_expr + rparen

        expression = Forward()
        expression << boolean_expr | boolean_expr_par

        try:
            parsed_exp = expression.parseString(bool_exp, parseAll=True)
        except:
            raise GTFtkError("Expression not supported.")

        # delete the suffix/prefixed: 'float(i.' + .* + ')'
        attr_used = [x[8:-1] for x in _find_keys(parsed_exp, res=[])]

        for i in attr_used:
            if i not in [x for x in attr_list]:
                GTFtkError("Your expression seems to contain an unknow key.")

        tab = self.extract_data(",".join(attr_used), hide_undef=False)

        parsed_exp_str = flatten_list_recur(parsed_exp.asList())

        result = []
        pos = 0

        for i in tab:
github dputhier / pygtftk / pygtftk / gtf_interface.py View on Github external
:param feat: The target features.
        :param keys: The source keys.
        :param new_key: The destination key.
        :param sep: The separator.

        >>> from  pygtftk.utils import get_example_file
        >>> from pygtftk.gtf_interface import GTF
        >>> a_file = get_example_file()[0]
        >>> a_gtf = GTF(a_file)
        >>> a_list = a_gtf.merge_attr(feat="exon,transcript,CDS", keys="gene_id,transcript_id", new_key="merge").extract_data("merge", hide_undef=True, as_list=True, nr=True)
        >>> assert a_list[0] == 'G0001|G0001T002'
        """

        if sep == "\t":
            raise GTFtkError("Tabulation is not allowed as a separator.")

        if new_key in self.get_attr_list(add_basic=False, as_dict=True):

            tmp_file = make_tmp_file(prefix="merge_attr",
                                     suffix=".txt")
            if feat == "*":
                self.extract_data(keys,
                                  no_na=False,
                                  hide_undef=False).write(tmp_file,
                                                          sep=sep)
                self = self.del_attr("*", new_key, force=True)
                self = self.add_attr_column(tmp_file, new_key)
                return self

            else:
github dputhier / pygtftk / pygtftk / gtf_interface.py View on Github external
>>> from pygtftk.gtf_interface import GTF
        >>> from pygtftk.utils import TAB
        >>> a_file = get_example_file()[0]
        >>> a_gtf = GTF(a_file)
        >>> b_gtf = a_gtf.add_attr_from_list(feat="gene", key="gene_id", key_value=("G0001", "G0002"), new_key="coding_pot", new_key_value=("0.5", "0.8"))
        >>> assert b_gtf.extract_data(keys="coding_pot", as_list=True, no_na=True, hide_undef=True) == ['0.5', '0.8']
        >>> b_gtf = a_gtf.add_attr_from_list(feat="gene", key="gene_id", key_value=("G0002", "G0001"), new_key="coding_pot", new_key_value=("0.8", "0.5"))
        >>> assert b_gtf.extract_data(keys="coding_pot", as_list=True, no_na=True, hide_undef=True) == ['0.5', '0.8']
        >>> key_value = tuple(a_gtf.extract_data("transcript_id", no_na=True, as_list=True, nr=True))
        >>> b=a_gtf.add_attr_from_list(None, key="transcript_id", key_value=key_value, new_key="bla", new_key_value=tuple([str(x) for x in range(len(key_value))]))
        """

        message("Calling add_attr_from_list", type="DEBUG")

        if not isinstance(key_value, tuple) or not isinstance(new_key_value, tuple):
            raise GTFtkError("key_value and new_key_value should be tuple.")

        if feat is None:
            feat = ",".join(self.get_feature_list(nr=True))

        if len(set(key_value)) != len(key_value):
            raise GTFtkError("Each key should appear once in key_value.")

        if len(key_value) != len(new_key_value):
            raise GTFtkError(
                "key_value and new_key_value should have the same length.")

        if len(key_value) == 0:
            raise GTFtkError(
                "Need some data to join.")

        tmp_file = make_tmp_file("add_attr", ".txt")
github dputhier / pygtftk / pygtftk / Line.py View on Github external
if format not in ['bed6', 'bed', 'bed3']:
            raise GTFtkError('Unsupported bed format')

        if pygtftk.utils.ADD_CHR == 1:
            chrom_out = "chr" + self.chrom
        else:
            chrom_out = self.chrom

        token = [chrom_out,
                 str(int(self.get_5p_end()) - 1),
                 str(self.get_5p_end())]

        if format == 'bed6' or format == 'bed':
            if name is None:
                raise GTFtkError("Need a name (column 4) to write a BED6 format.")
            token += [name,
                      str(self.score),
                      self.strand]

        pygtftk.utils.write_properly('\t'.join(token), outputfile)
github dputhier / pygtftk / pygtftk / gtf_interface.py View on Github external
if input_obj == '-':
                self.fn = "-"
            else:
                if input_obj != '':
                    check_file_or_dir_exists(input_obj)
                    self.fn = input_obj
                    self._data = 0
                else:
                    self.fn = "-"
                    self._data = 0
        elif isinstance(input_obj, GTF):
            self.fn = input_obj.fn
            self._data = input_obj._data
        else:
            raise GTFtkError("Unsupported input type.")

        message("Instantiating a GTF.")

        if new_data is None:

            self._data = self._dll.load_GTF(native_str(self.fn))

            if check_ensembl_format:

                tab = self.extract_data_iter_list("feature")

                not_found = True
                n = 0

                for i in tab:
                    n += 1