How to use the ngram.LetterModels.Unigram function in ngram

To help you get started, we’ve selected a few ngram examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Ezhil-Language-Foundation / open-tamil / ngram / LetterModels.py View on Github external
prev = next_letter #update always
        return
        
    def save(self,filename):
        with codecs.open(filename,"w","utf-8") as fp:
            d = {}
            for k,v in self.letter2.items():
                for k2,v2 in v.items():
                    if v2 == 0:
                        continue
                    d[k+k2] = v2
            for k,v in sorted(d.items(),key=operator.itemgetter(1),reverse=True):
                fp.write(u"%s - %d\n"%(k,v))
        return True

class Trigram(Unigram):
    def __init__(self,filename):
        Unigram.__init__(self,filename)
        self.letter3 = dict()
    
    def language_model(self,verbose=True):
        """ builds a Tamil bigram letter model """
        # use a generator in corpus
        p2 = None
        p1 = None
        for next_letter in self.corpus.next_tamil_letter():
            # update frequency from corpus
            if p2:
                trig = p2+p1+next_letter
                self.letter3[trig] = 1 + self.letter3.get(trig,0)
            p2 = p1
            p1 = next_letter #update always
github vasurenganathan / tamil-tts / python / pos / tamilvu_ngram.py View on Github external
def __init__(self):
        self.filename = u'tamilvu_dictionary_words.txt'
        self.unigram = Unigram(self.filename)
        self.unigram.frequency_model()
        print(u"--- completed Unigram model ---")
        self.bigram = Bigram(self.filename)
        self.bigram.language_model(verbose=False)
        self.trigram = Trigram(self.filename)
        self.trigram.language_model(verbose=False)
        
        print(u"--- completed Bigram,Trigram model ---")
github Ezhil-Language-Foundation / open-tamil / ngram / LetterModels.py View on Github external
def frequency_model( self ):
        """ build a letter frequency model for Tamil letters from a corpus """
        # use a generator in corpus
        for next_letter in self.corpus.next_tamil_letter():
            # update frequency from corpus
            self.letter[next_letter] = self.letter[next_letter] + 1
    
    def save(self,filename):
        with codecs.open(filename,"w","utf-8") as fp:
            for k,v in sorted(self.letter.items(),key=operator.itemgetter(1),reverse=True):
                if v == 0:
                    continue
                fp.write(u"%s - %d\n"%(k,v))
        return True
    
class Bigram(Unigram):
    def __init__(self,filename):
        Unigram.__init__(self,filename)
        self.letter2 = dict()
        for k in tamil.utf8.tamil_letters:
            self.letter2[k] = copy.copy( self.letter )
    
    def language_model(self,verbose=True):
        """ builds a Tamil bigram letter model """
        # use a generator in corpus
        prev = None
        for next_letter in self.corpus.next_tamil_letter():
            # update frequency from corpus
            if prev:
                self.letter2[prev][next_letter] += 1
                if ( verbose ) :
                    print(prev)