Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from wordfreq import word_frequency
from wordfreq import zipf_frequency
from wordfreq import top_n_list
import matplotlib.pyplot as plt
dest = "../src/org/owasp/passfault/wordlists/"
de = top_n_list('de', 1e5, wordlist='large')
en = top_n_list('en', 1e5, wordlist='large')
es = top_n_list('es', 1e5, wordlist='large')
fr = top_n_list('fr', 1e5, wordlist='large')
it = top_n_list('it', 1e5, wordlist='large')
nl = top_n_list('nl', 1e5, wordlist='large')
pt = top_n_list('pt', 1e5, wordlist='large')
#---------------------------------------------------------------
dePopular = open(dest + '/dePopular.words', 'w')
deLongTail = open(dest + '/deLongTail.words', 'w')
integral100 = 0
for i in range(len(de)):
integral100 += word_frequency(de[i], 'de', wordlist='large')
print(integral100)
integral80 = 0
for i in range(len(de)):
integral80 += word_frequency(de[i], 'de', wordlist='large')
#Written by Bernardo Rodrigues (bernardoaraujor@gmail.com)
#Based on Luminoso Insight's wordfreq module (https://github.com/LuminosoInsight/wordfreq/)
from wordfreq import word_frequency
from wordfreq import zipf_frequency
from wordfreq import top_n_list
#import matplotlib.pyplot as plt
dest = "./wordlists/"
ar = top_n_list('ar', 1e5, wordlist='large')
de = top_n_list('de', 1e5, wordlist='large')
en = top_n_list('en', 1e5, wordlist='large')
es = top_n_list('es', 1e5, wordlist='large')
fi = top_n_list('fi', 1e5)
fr = top_n_list('fr', 1e5, wordlist='large')
hi = top_n_list('hi', 1e5)
it = top_n_list('it', 1e5, wordlist='large')
ja = top_n_list('ja', 1e5)
nl = top_n_list('nl', 1e5, wordlist='large')
sv = top_n_list('sv', 1e5)
pt = top_n_list('pt', 1e5, wordlist='large')
zh = top_n_list('zh', 1e5)
#---------------------------------------------------------------
arPopular = open(dest + '/arPopular.txt', 'w')
arLongTail = open(dest + '/arLongTail.txt', 'w')
"""
A quick script to output the top N words (1000 for now) in each language.
You can send the output to a file and diff it to see changes between wordfreq
versions.
"""
import wordfreq
N = 1000
if __name__ == '__main__':
for lang in sorted(wordfreq.available_languages()):
for word in wordfreq.top_n_list(lang, 1000):
print('{}\t{}'.format(lang, word))
#Written by Bernardo Rodrigues (bernardoaraujor@gmail.com)
#Based on Luminoso Insight's wordfreq module (https://github.com/LuminosoInsight/wordfreq/)
from wordfreq import word_frequency
from wordfreq import zipf_frequency
from wordfreq import top_n_list
#import matplotlib.pyplot as plt
dest = "./wordlists/"
ar = top_n_list('ar', 1e5, wordlist='large')
de = top_n_list('de', 1e5, wordlist='large')
en = top_n_list('en', 1e5, wordlist='large')
es = top_n_list('es', 1e5, wordlist='large')
fi = top_n_list('fi', 1e5)
fr = top_n_list('fr', 1e5, wordlist='large')
hi = top_n_list('hi', 1e5)
it = top_n_list('it', 1e5, wordlist='large')
ja = top_n_list('ja', 1e5)
nl = top_n_list('nl', 1e5, wordlist='large')
sv = top_n_list('sv', 1e5)
pt = top_n_list('pt', 1e5, wordlist='large')
zh = top_n_list('zh', 1e5)
#---------------------------------------------------------------
arPopular = open(dest + '/arPopular.txt', 'w')
arLongTail = open(dest + '/arLongTail.txt', 'w')
from wordfreq import word_frequency
from wordfreq import zipf_frequency
from wordfreq import top_n_list
import matplotlib.pyplot as plt
dest = "../src/org/owasp/passfault/wordlists/"
de = top_n_list('de', 1e5, wordlist='large')
en = top_n_list('en', 1e5, wordlist='large')
es = top_n_list('es', 1e5, wordlist='large')
fr = top_n_list('fr', 1e5, wordlist='large')
it = top_n_list('it', 1e5, wordlist='large')
nl = top_n_list('nl', 1e5, wordlist='large')
pt = top_n_list('pt', 1e5, wordlist='large')
#---------------------------------------------------------------
dePopular = open(dest + '/dePopular.words', 'w')
deLongTail = open(dest + '/deLongTail.words', 'w')
integral100 = 0
for i in range(len(de)):
integral100 += word_frequency(de[i], 'de', wordlist='large')
print(integral100)
integral80 = 0
for i in range(len(de)):
from wordfreq import word_frequency
from wordfreq import zipf_frequency
from wordfreq import top_n_list
import matplotlib.pyplot as plt
dest = "../src/org/owasp/passfault/wordlists/"
de = top_n_list('de', 1e5, wordlist='large')
en = top_n_list('en', 1e5, wordlist='large')
es = top_n_list('es', 1e5, wordlist='large')
fr = top_n_list('fr', 1e5, wordlist='large')
it = top_n_list('it', 1e5, wordlist='large')
nl = top_n_list('nl', 1e5, wordlist='large')
pt = top_n_list('pt', 1e5, wordlist='large')
#---------------------------------------------------------------
dePopular = open(dest + '/dePopular.words', 'w')
deLongTail = open(dest + '/deLongTail.words', 'w')
integral100 = 0
for i in range(len(de)):
integral100 += word_frequency(de[i], 'de', wordlist='large')
print(integral100)
integral80 = 0
for i in range(len(de)):
integral80 += word_frequency(de[i], 'de', wordlist='large')
if (integral80 <= 0.80*integral100):
dePopular.write(de[i] + '\n')
from wordfreq import zipf_frequency
from wordfreq import top_n_list
#import matplotlib.pyplot as plt
dest = "./wordlists/"
ar = top_n_list('ar', 1e5, wordlist='large')
de = top_n_list('de', 1e5, wordlist='large')
en = top_n_list('en', 1e5, wordlist='large')
es = top_n_list('es', 1e5, wordlist='large')
fi = top_n_list('fi', 1e5)
fr = top_n_list('fr', 1e5, wordlist='large')
hi = top_n_list('hi', 1e5)
it = top_n_list('it', 1e5, wordlist='large')
ja = top_n_list('ja', 1e5)
nl = top_n_list('nl', 1e5, wordlist='large')
sv = top_n_list('sv', 1e5)
pt = top_n_list('pt', 1e5, wordlist='large')
zh = top_n_list('zh', 1e5)
#---------------------------------------------------------------
arPopular = open(dest + '/arPopular.txt', 'w')
arLongTail = open(dest + '/arLongTail.txt', 'w')
integral100 = 0
for i in range(len(ar)):
integral100 += word_frequency(ar[i], 'ar')
integral80 = 0
for i in range(len(ar)):
integral80 += word_frequency(ar[i], 'ar')
if (integral80 <= 0.80*integral100):
from wordfreq import word_frequency
from wordfreq import zipf_frequency
from wordfreq import top_n_list
import matplotlib.pyplot as plt
dest = "../src/org/owasp/passfault/wordlists/"
de = top_n_list('de', 1e5, wordlist='large')
en = top_n_list('en', 1e5, wordlist='large')
es = top_n_list('es', 1e5, wordlist='large')
fr = top_n_list('fr', 1e5, wordlist='large')
it = top_n_list('it', 1e5, wordlist='large')
nl = top_n_list('nl', 1e5, wordlist='large')
pt = top_n_list('pt', 1e5, wordlist='large')
#---------------------------------------------------------------
dePopular = open(dest + '/dePopular.words', 'w')
deLongTail = open(dest + '/deLongTail.words', 'w')
integral100 = 0
for i in range(len(de)):
integral100 += word_frequency(de[i], 'de', wordlist='large')
print(integral100)
integral80 = 0
#Written by Bernardo Rodrigues (bernardoaraujor@gmail.com)
#Based on Luminoso Insight's wordfreq module (https://github.com/LuminosoInsight/wordfreq/)
from wordfreq import word_frequency
from wordfreq import zipf_frequency
from wordfreq import top_n_list
#import matplotlib.pyplot as plt
dest = "./wordlists/"
ar = top_n_list('ar', 1e5, wordlist='large')
de = top_n_list('de', 1e5, wordlist='large')
en = top_n_list('en', 1e5, wordlist='large')
es = top_n_list('es', 1e5, wordlist='large')
fi = top_n_list('fi', 1e5)
fr = top_n_list('fr', 1e5, wordlist='large')
hi = top_n_list('hi', 1e5)
it = top_n_list('it', 1e5, wordlist='large')
ja = top_n_list('ja', 1e5)
nl = top_n_list('nl', 1e5, wordlist='large')
sv = top_n_list('sv', 1e5)
pt = top_n_list('pt', 1e5, wordlist='large')
zh = top_n_list('zh', 1e5)
#---------------------------------------------------------------
arPopular = open(dest + '/arPopular.txt', 'w')
arLongTail = open(dest + '/arLongTail.txt', 'w')
integral100 = 0
for i in range(len(ar)):
integral100 += word_frequency(ar[i], 'ar')