Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_languages():
# Make sure we get all the languages when looking for the default
# 'best' wordlist
avail = available_languages()
assert len(avail) >= 34
# 'small' covers the same languages, but with some different lists
avail_small = available_languages('small')
assert len(avail_small) == len(avail)
assert avail_small != avail
# 'combined' is the same as 'small'
avail_old_name = available_languages('combined')
assert avail_old_name == avail_small
# 'large' covers fewer languages
avail_large = available_languages('large')
assert len(avail_large) >= 14
assert len(avail) > len(avail_large)
# Look up the digit '2' in the main word list for each language
for lang in avail:
assert word_frequency('2', lang) > 0
# Make sure we get all the languages when looking for the default
# 'best' wordlist
avail = available_languages()
assert len(avail) >= 34
# 'small' covers the same languages, but with some different lists
avail_small = available_languages('small')
assert len(avail_small) == len(avail)
assert avail_small != avail
# 'combined' is the same as 'small'
avail_old_name = available_languages('combined')
assert avail_old_name == avail_small
# 'large' covers fewer languages
avail_large = available_languages('large')
assert len(avail_large) >= 14
assert len(avail) > len(avail_large)
# Look up the digit '2' in the main word list for each language
for lang in avail:
assert word_frequency('2', lang) > 0
# Make up a weirdly verbose language code and make sure
# we still get it
new_lang_code = '%s-001-x-fake-extension' % lang.upper()
assert word_frequency('2', new_lang_code) > 0
def test_languages():
# Make sure we get all the languages when looking for the default
# 'best' wordlist
avail = available_languages()
assert len(avail) >= 34
# 'small' covers the same languages, but with some different lists
avail_small = available_languages('small')
assert len(avail_small) == len(avail)
assert avail_small != avail
# 'combined' is the same as 'small'
avail_old_name = available_languages('combined')
assert avail_old_name == avail_small
# 'large' covers fewer languages
avail_large = available_languages('large')
assert len(avail_large) >= 14
assert len(avail) > len(avail_large)
# Look up the digit '2' in the main word list for each language
for lang in avail:
assert word_frequency('2', lang) > 0
# Make up a weirdly verbose language code and make sure
# we still get it
new_lang_code = '%s-001-x-fake-extension' % lang.upper()
assert word_frequency('2', new_lang_code) > 0
def test_languages():
# Make sure we get all the languages when looking for the default
# 'best' wordlist
avail = available_languages()
assert len(avail) >= 34
# 'small' covers the same languages, but with some different lists
avail_small = available_languages('small')
assert len(avail_small) == len(avail)
assert avail_small != avail
# 'combined' is the same as 'small'
avail_old_name = available_languages('combined')
assert avail_old_name == avail_small
# 'large' covers fewer languages
avail_large = available_languages('large')
assert len(avail_large) >= 14
assert len(avail) > len(avail_large)
"""
A quick script to output the top N words (1000 for now) in each language.
You can send the output to a file and diff it to see changes between wordfreq
versions.
"""
import wordfreq
N = 1000
if __name__ == '__main__':
for lang in sorted(wordfreq.available_languages()):
for word in wordfreq.top_n_list(lang, 1000):
print('{}\t{}'.format(lang, word))
def _check_language_settings(self, lang_freq:str):
""" Check if the supplied language is a compatible with the wordfreq package
Parameters
----------
lang_freq : str
The language used to induce the frequencies into the wv.vocab object.
"""
if lang_freq in available_languages(wordlist='best'):
self.lang_freq = str(lang_freq)
logger.info("no frequency mode: using wordfreq for estimation "
f"of frequency for language: {self.lang_freq}")
else:
raise ValueError(f"Language {lang_freq} is not available in wordfreq")