How to use the kipoiseq.utils.DNA function in kipoiseq

To help you get started, we’ve selected a few kipoiseq examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github kipoi / kipoiseq / tests / test_0_transforms_functional.py View on Github external
def test_tokenize_one_hot():
    assert one_hot("ACG", DNA, "N").shape == (3, 4)

    et = tokenize("ACG", DNA, "N")
    assert et.shape == (3,)
    assert np.array_equal(et, np.array([0, 1, 2]))

    et = tokenize("TGTN", DNA, "N")
    assert np.array_equal(et, np.array([3, 2, 3, -1]))  # N mapped to -1
github kipoi / kipoiseq / tests / test_0_transforms_functional.py View on Github external
def test_tokenize():
    assert np.all(tokenize("ACGTTA", DNA, neutral_alphabet="N")
                  == [0, 1, 2, 3, 3, 0])
    assert np.all(
        tokenize("ACGTGATGA", ["ACG", "TGA"], neutral_alphabet="NNN") == [0, 1, 1])
    assert np.all(
        tokenize("ACGTGATGA", ["ACG"], neutral_alphabet="TGA") == [0, -1, -1])
    with pytest.raises(Exception):
        tokenize("ACGTGATGA", ["ACG"], neutral_alphabet="NNN")
github kipoi / kipoiseq / kipoiseq / transforms / functional.py View on Github external
def tokenize(seq, alphabet=DNA, neutral_alphabet=["N"]):
    """Convert sequence to integers

    # Arguments
       seq: Sequence to encode
       alphabet: Alphabet to use
       neutral_alphabet: Neutral alphabet -> assign those values to -1

    # Returns
       List of length `len(seq)` with integers from `-1` to `len(alphabet) - 1`
    """
    # Req: all alphabets have the same length
    if isinstance(neutral_alphabet, str):
        neutral_alphabet = [neutral_alphabet]

    nchar = len(alphabet[0])
    for l in alphabet + neutral_alphabet:
github kipoi / kipoiseq / kipoiseq / transforms / transforms.py View on Github external
    def __init__(self, alphabet=DNA, neutral_alphabet='N', neutral_value=0.25, dtype=None):
        self.alphabet = alphabet
        if isinstance(neutral_alphabet, str):
            neutral_alphabet = [neutral_alphabet]
        self.neutral_alphabet = neutral_alphabet
        self.neutral_value = neutral_value
        self.dtype = dtype
github kipoi / kipoiseq / kipoiseq / transforms / functional.py View on Github external
def one_hot2string(arr, alphabet=DNA):
    """Convert a one-hot encoded array back to string
    """
    tokens = one_hot2token(arr)
    indexToLetter = _get_index_dict(alphabet)

    return [''.join([indexToLetter[x] for x in row]) for row in tokens]
github kipoi / kipoiseq / kipoiseq / transforms / transforms.py View on Github external
def __call__(self, seq):
        if self.alphabet == DNA and self.neutral_alphabet == ['N'] and self.neutral_value == 0.25:
            return F.one_hot_dna(seq, self.dtype)
        else:
            return F.one_hot(seq,
                             alphabet=self.alphabet,
                             neutral_alphabet=self.neutral_alphabet,
                             neutral_value=self.neutral_value,
                             dtype=self.dtype)
github kipoi / kipoiseq / kipoiseq / transforms / functional.py View on Github external
def one_hot_dna(seq, dtype=None):
    """One-hot encode DNA sequence
    """
    if not isinstance(seq, str):
        raise ValueError("seq needs to be a string")

    if one_hot_encode_sequence is not None:
        # genomelake's one_hot_encode_sequence could be imported
        out = np.zeros((len(seq), 4), dtype=np.float32)
        one_hot_encode_sequence(seq, out)
        return out.astype(dtype)
    else:
        return one_hot(seq, alphabet=DNA, neutral_alphabet=['N'], neutral_value=.25, dtype=dtype)