Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _read_csv_basic(filename):
infile = codecs.open(filename, encoding='utf-8')
counts = {}
for line in infile:
if ',' in line:
line = line.rstrip('\n')
word, count = line.rsplit(',', 1)
count = float(count)
counts[standardize_word(word)] = count
return counts
"""
Load word frequencies from a "Web as Corpus" file, collected and
provided by the University of Leeds.
For more information, see: http://corpus.leeds.ac.uk/list.html
"""
infile = codecs.open(filename, encoding='utf-8')
counts = defaultdict(float)
for line in infile:
line = line.rstrip()
if line:
rank = line.split(' ')[0]
if NUMBER_RE.match(rank) and line.count(' ') == 2:
_, freq, token = line.split(' ')
token = standardize_word(ftfy(token))
freq = float(freq)
counts[token] += freq
return _scale_freqs(counts)
def word_frequency(word, lang, wordlist='multi', offset=0.):
"""
Get the frequency of `word` in the language with code `lang`, from the
specified `wordlist`.
The offset gets added to all values, to monotonically account for the
fact that we have not observed all possible words.
"""
c = CONN.cursor()
c.execute("SELECT freq from words where word=? and lang=? and wordlist=?",
(standardize_word(word), lang, wordlist))
row = c.fetchone()
if row is None:
return offset
else:
return row[0] + offset