How to use the wordfreq.top_n_list function in wordfreq

To help you get started, we’ve selected a few wordfreq examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github OWASP / passfault / wordlists / wordlist scripts / wordlists.py View on Github external
from wordfreq import word_frequency
from wordfreq import zipf_frequency
from wordfreq import top_n_list
import matplotlib.pyplot as plt

dest = "../src/org/owasp/passfault/wordlists/"

de = top_n_list('de', 1e5, wordlist='large')
en = top_n_list('en', 1e5, wordlist='large')
es = top_n_list('es', 1e5, wordlist='large')
fr = top_n_list('fr', 1e5, wordlist='large')
it = top_n_list('it', 1e5, wordlist='large')
nl = top_n_list('nl', 1e5, wordlist='large')
pt = top_n_list('pt', 1e5, wordlist='large')

#---------------------------------------------------------------
dePopular = open(dest + '/dePopular.words', 'w')
deLongTail = open(dest + '/deLongTail.words', 'w')

integral100 = 0
for i in range(len(de)):
    integral100 += word_frequency(de[i], 'de', wordlist='large')

print(integral100)

integral80 = 0
for i in range(len(de)):
    integral80 += word_frequency(de[i], 'de', wordlist='large')
github OWASP / passfault / wordlists / languageWordlists.py View on Github external
#Written by Bernardo Rodrigues (bernardoaraujor@gmail.com)
#Based on Luminoso Insight's wordfreq module (https://github.com/LuminosoInsight/wordfreq/)

from wordfreq import word_frequency
from wordfreq import zipf_frequency
from wordfreq import top_n_list
#import matplotlib.pyplot as plt

dest = "./wordlists/"

ar = top_n_list('ar', 1e5, wordlist='large')
de = top_n_list('de', 1e5, wordlist='large')
en = top_n_list('en', 1e5, wordlist='large')
es = top_n_list('es', 1e5, wordlist='large')
fi = top_n_list('fi', 1e5)
fr = top_n_list('fr', 1e5, wordlist='large')
hi = top_n_list('hi', 1e5)
it = top_n_list('it', 1e5, wordlist='large')
ja = top_n_list('ja', 1e5)
nl = top_n_list('nl', 1e5, wordlist='large')
sv = top_n_list('sv', 1e5)
pt = top_n_list('pt', 1e5, wordlist='large')
zh = top_n_list('zh', 1e5)

#---------------------------------------------------------------
arPopular = open(dest + '/arPopular.txt', 'w')
arLongTail = open(dest + '/arLongTail.txt', 'w')
github LuminosoInsight / wordfreq / scripts / top_n.py View on Github external
"""
A quick script to output the top N words (1000 for now) in each language.
You can send the output to a file and diff it to see changes between wordfreq
versions.
"""
import wordfreq


N = 1000

if __name__ == '__main__':
    for lang in sorted(wordfreq.available_languages()):
        for word in wordfreq.top_n_list(lang, 1000):
            print('{}\t{}'.format(lang, word))
github OWASP / passfault / wordlists / languageWordlists.py View on Github external
#Written by Bernardo Rodrigues (bernardoaraujor@gmail.com)
#Based on Luminoso Insight's wordfreq module (https://github.com/LuminosoInsight/wordfreq/)

from wordfreq import word_frequency
from wordfreq import zipf_frequency
from wordfreq import top_n_list
#import matplotlib.pyplot as plt

dest = "./wordlists/"

ar = top_n_list('ar', 1e5, wordlist='large')
de = top_n_list('de', 1e5, wordlist='large')
en = top_n_list('en', 1e5, wordlist='large')
es = top_n_list('es', 1e5, wordlist='large')
fi = top_n_list('fi', 1e5)
fr = top_n_list('fr', 1e5, wordlist='large')
hi = top_n_list('hi', 1e5)
it = top_n_list('it', 1e5, wordlist='large')
ja = top_n_list('ja', 1e5)
nl = top_n_list('nl', 1e5, wordlist='large')
sv = top_n_list('sv', 1e5)
pt = top_n_list('pt', 1e5, wordlist='large')
zh = top_n_list('zh', 1e5)

#---------------------------------------------------------------
arPopular = open(dest + '/arPopular.txt', 'w')
arLongTail = open(dest + '/arLongTail.txt', 'w')
github OWASP / passfault / wordlists / wordlist scripts / wordlists.py View on Github external
from wordfreq import word_frequency
from wordfreq import zipf_frequency
from wordfreq import top_n_list
import matplotlib.pyplot as plt

dest = "../src/org/owasp/passfault/wordlists/"

de = top_n_list('de', 1e5, wordlist='large')
en = top_n_list('en', 1e5, wordlist='large')
es = top_n_list('es', 1e5, wordlist='large')
fr = top_n_list('fr', 1e5, wordlist='large')
it = top_n_list('it', 1e5, wordlist='large')
nl = top_n_list('nl', 1e5, wordlist='large')
pt = top_n_list('pt', 1e5, wordlist='large')

#---------------------------------------------------------------
dePopular = open(dest + '/dePopular.words', 'w')
deLongTail = open(dest + '/deLongTail.words', 'w')

integral100 = 0
for i in range(len(de)):
    integral100 += word_frequency(de[i], 'de', wordlist='large')

print(integral100)

integral80 = 0
for i in range(len(de)):
github OWASP / passfault / wordlists / wordlist scripts / wordlists.py View on Github external
from wordfreq import word_frequency
from wordfreq import zipf_frequency
from wordfreq import top_n_list
import matplotlib.pyplot as plt

dest = "../src/org/owasp/passfault/wordlists/"

de = top_n_list('de', 1e5, wordlist='large')
en = top_n_list('en', 1e5, wordlist='large')
es = top_n_list('es', 1e5, wordlist='large')
fr = top_n_list('fr', 1e5, wordlist='large')
it = top_n_list('it', 1e5, wordlist='large')
nl = top_n_list('nl', 1e5, wordlist='large')
pt = top_n_list('pt', 1e5, wordlist='large')

#---------------------------------------------------------------
dePopular = open(dest + '/dePopular.words', 'w')
deLongTail = open(dest + '/deLongTail.words', 'w')

integral100 = 0
for i in range(len(de)):
    integral100 += word_frequency(de[i], 'de', wordlist='large')

print(integral100)

integral80 = 0
for i in range(len(de)):
    integral80 += word_frequency(de[i], 'de', wordlist='large')
    if (integral80 <= 0.80*integral100):
        dePopular.write(de[i] + '\n')
github OWASP / passfault / wordlists / languageWordlists.py View on Github external
from wordfreq import zipf_frequency
from wordfreq import top_n_list
#import matplotlib.pyplot as plt

dest = "./wordlists/"

ar = top_n_list('ar', 1e5, wordlist='large')
de = top_n_list('de', 1e5, wordlist='large')
en = top_n_list('en', 1e5, wordlist='large')
es = top_n_list('es', 1e5, wordlist='large')
fi = top_n_list('fi', 1e5)
fr = top_n_list('fr', 1e5, wordlist='large')
hi = top_n_list('hi', 1e5)
it = top_n_list('it', 1e5, wordlist='large')
ja = top_n_list('ja', 1e5)
nl = top_n_list('nl', 1e5, wordlist='large')
sv = top_n_list('sv', 1e5)
pt = top_n_list('pt', 1e5, wordlist='large')
zh = top_n_list('zh', 1e5)

#---------------------------------------------------------------
arPopular = open(dest + '/arPopular.txt', 'w')
arLongTail = open(dest + '/arLongTail.txt', 'w')

integral100 = 0
for i in range(len(ar)):
    integral100 += word_frequency(ar[i], 'ar')

integral80 = 0
for i in range(len(ar)):
    integral80 += word_frequency(ar[i], 'ar')
    if (integral80 <= 0.80*integral100):
github OWASP / passfault / wordlists / wordlist scripts / wordlists.py View on Github external
from wordfreq import word_frequency
from wordfreq import zipf_frequency
from wordfreq import top_n_list
import matplotlib.pyplot as plt

dest = "../src/org/owasp/passfault/wordlists/"

de = top_n_list('de', 1e5, wordlist='large')
en = top_n_list('en', 1e5, wordlist='large')
es = top_n_list('es', 1e5, wordlist='large')
fr = top_n_list('fr', 1e5, wordlist='large')
it = top_n_list('it', 1e5, wordlist='large')
nl = top_n_list('nl', 1e5, wordlist='large')
pt = top_n_list('pt', 1e5, wordlist='large')

#---------------------------------------------------------------
dePopular = open(dest + '/dePopular.words', 'w')
deLongTail = open(dest + '/deLongTail.words', 'w')

integral100 = 0
for i in range(len(de)):
    integral100 += word_frequency(de[i], 'de', wordlist='large')

print(integral100)

integral80 = 0
github OWASP / passfault / wordlists / languageWordlists.py View on Github external
#Written by Bernardo Rodrigues (bernardoaraujor@gmail.com)
#Based on Luminoso Insight's wordfreq module (https://github.com/LuminosoInsight/wordfreq/)

from wordfreq import word_frequency
from wordfreq import zipf_frequency
from wordfreq import top_n_list
#import matplotlib.pyplot as plt

dest = "./wordlists/"

ar = top_n_list('ar', 1e5, wordlist='large')
de = top_n_list('de', 1e5, wordlist='large')
en = top_n_list('en', 1e5, wordlist='large')
es = top_n_list('es', 1e5, wordlist='large')
fi = top_n_list('fi', 1e5)
fr = top_n_list('fr', 1e5, wordlist='large')
hi = top_n_list('hi', 1e5)
it = top_n_list('it', 1e5, wordlist='large')
ja = top_n_list('ja', 1e5)
nl = top_n_list('nl', 1e5, wordlist='large')
sv = top_n_list('sv', 1e5)
pt = top_n_list('pt', 1e5, wordlist='large')
zh = top_n_list('zh', 1e5)

#---------------------------------------------------------------
arPopular = open(dest + '/arPopular.txt', 'w')
arLongTail = open(dest + '/arLongTail.txt', 'w')

integral100 = 0
for i in range(len(ar)):
    integral100 += word_frequency(ar[i], 'ar')