Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""
from tempfile import TemporaryDirectory
if upload_path is None:
upload_path = config.UPLOAD_PATH
with TemporaryDirectory('.wordfreq') as build_dir:
version_dir = os.path.join(build_dir, config.MINOR_VERSION)
os.makedirs(version_dir)
source_filename = os.path.join(version_dir, 'wordfreq-data.tar.gz')
logger.info("Creating %s" % source_filename)
with tarfile.open(source_filename, 'w:gz') as tarf:
tarf.add(config.RAW_DATA_DIR)
logger.info("Copying database file %s" % config.DB_FILENAME)
subprocess.call([
'/bin/cp',
config.DB_FILENAME,
version_dir
])
logger.info("Uploading to %s" % upload_path)
subprocess.call([
'/usr/bin/rsync',
'-avz',
version_dir,
upload_path
])
from functools32 import lru_cache
PY2 = True
else:
from functools import lru_cache
PY2 = False
SQLITE_ERROR_TEXT = """
Couldn't open the wordlist database.
You may need to run wordfreq's setup.py script.
I was expecting to find the database at:
%(path)s
This can be configured by setting the WORDFREQ_DATA environment variable.
""" % {'path': DB_FILENAME}
try:
if PY2:
CONN = sqlite3.connect(DB_FILENAME)
else:
CONN = sqlite3.connect(DB_FILENAME, check_same_thread=False)
except sqlite3.OperationalError:
raise IOError(SQLITE_ERROR_TEXT)
@lru_cache(maxsize=CACHE_SIZE)
def word_frequency(word, lang, wordlist='multi', offset=0.):
"""
Get the frequency of `word` in the language with code `lang`, from the
specified `wordlist`.
if upload_path is None:
upload_path = config.UPLOAD_PATH
with TemporaryDirectory('.wordfreq') as build_dir:
version_dir = os.path.join(build_dir, config.MINOR_VERSION)
os.makedirs(version_dir)
source_filename = os.path.join(version_dir, 'wordfreq-data.tar.gz')
logger.info("Creating %s" % source_filename)
with tarfile.open(source_filename, 'w:gz') as tarf:
tarf.add(config.RAW_DATA_DIR)
logger.info("Copying database file %s" % config.DB_FILENAME)
subprocess.call([
'/bin/cp',
config.DB_FILENAME,
version_dir
])
logger.info("Uploading to %s" % upload_path)
subprocess.call([
'/usr/bin/rsync',
'-avz',
version_dir,
upload_path
])
SQLITE_ERROR_TEXT = """
Couldn't open the wordlist database.
You may need to run wordfreq's setup.py script.
I was expecting to find the database at:
%(path)s
This can be configured by setting the WORDFREQ_DATA environment variable.
""" % {'path': DB_FILENAME}
try:
if PY2:
CONN = sqlite3.connect(DB_FILENAME)
else:
CONN = sqlite3.connect(DB_FILENAME, check_same_thread=False)
except sqlite3.OperationalError:
raise IOError(SQLITE_ERROR_TEXT)
@lru_cache(maxsize=CACHE_SIZE)
def word_frequency(word, lang, wordlist='multi', offset=0.):
"""
Get the frequency of `word` in the language with code `lang`, from the
specified `wordlist`.
The offset gets added to all values, to monotonically account for the
fact that we have not observed all possible words.
"""
c = CONN.cursor()
c.execute("SELECT freq from words where word=? and lang=? and wordlist=?",
(standardize_word(word), lang, wordlist))
def download_db(url=None, dest_filename=None):
"""
Download the database itself, so we don't have to build it.
"""
if url is None:
url = config.DB_URL
if dest_filename is None:
dest_filename = config.DB_FILENAME
ensure_dir_exists(dest_filename)
download(url, dest_filename)