Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_build():
"""
Ensure that the build process builds the same DB that gets distributed.
"""
if not os.path.exists(config.RAW_DATA_DIR):
download_and_extract_raw_data()
tempdir = tempfile.mkdtemp('.wordfreq')
try:
db_file = os.path.join(tempdir, 'test.db')
load_all_data(config.RAW_DATA_DIR, db_file, do_it_anyway=True)
conn = sqlite3.connect(db_file)
# Compare the information we got to the information in the default DB.
new_info = flatten_list_of_dicts(wordlist_info(conn))
old_info = flatten_list_of_dicts(wordlist_info(None))
eq_(len(new_info), len(old_info))
for i in range(len(new_info)):
# Don't test Greek and emoji on Python 2; we can't make them
# consistent with Python 3.
if PYTHON2 and ((u'lang', u'el') in new_info[i]):
def test_build():
"""
Ensure that the build process builds the same DB that gets distributed.
"""
if not os.path.exists(config.RAW_DATA_DIR):
download_and_extract_raw_data()
tempdir = tempfile.mkdtemp('.wordfreq')
try:
db_file = os.path.join(tempdir, 'test.db')
load_all_data(config.RAW_DATA_DIR, db_file, do_it_anyway=True)
conn = sqlite3.connect(db_file)
# Compare the information we got to the information in the default DB.
new_info = flatten_list_of_dicts(wordlist_info(conn))
old_info = flatten_list_of_dicts(wordlist_info(None))
eq_(len(new_info), len(old_info))
for i in range(len(new_info)):
# Don't test Greek and emoji on Python 2; we can't make them
# consistent with Python 3.
if PYTHON2 and ((u'lang', u'el') in new_info[i]):
continue
if PYTHON2 and ((u'wordlist', u'twitter') in new_info[i]):
continue
eq_(new_info[i], old_info[i])
finally:
shutil.rmtree(tempdir)
up as actual differences in the set of words. For the sake of consistency,
we say that the data is only valid when built on Python 3.
Python 2 can still *use* wordfreq, by downloading the database that was
built on Python 3.
If you insist on building the Python 2 version, pass `do_it_anyway=True`.
"""
if sys.version_info.major == 2 and not do_it_anyway:
raise UnicodeError(
"Python 2.x has insufficient Unicode support, and will build "
"the wrong database. Pass `do_it_anyway=True` to do it anyway."
)
if source_dir is None:
source_dir = config.RAW_DATA_DIR
if filename is None:
filename = config.DB_FILENAME
def wordlist_path(*pieces):
return os.path.join(source_dir, *pieces)
logger.info("Creating database")
conn = create_db(filename)
for lang in LEEDS_LANGUAGES:
filename = wordlist_path('leeds', 'internet-%s-forms.num' % lang)
read_leeds_wordlist_into_db(conn, filename, 'leeds-internet', lang)
read_wordlist_into_db(conn, wordlist_path('google', 'google-books-english.csv'), 'google-books', 'en')
read_wordlist_into_db(conn, wordlist_path('luminoso', 'twitter-52M.csv'), 'twitter', 'xx')
3, and more notably, that it has the proper SSH keys to upload to that
server.
"""
from tempfile import TemporaryDirectory
if upload_path is None:
upload_path = config.UPLOAD_PATH
with TemporaryDirectory('.wordfreq') as build_dir:
version_dir = os.path.join(build_dir, config.MINOR_VERSION)
os.makedirs(version_dir)
source_filename = os.path.join(version_dir, 'wordfreq-data.tar.gz')
logger.info("Creating %s" % source_filename)
with tarfile.open(source_filename, 'w:gz') as tarf:
tarf.add(config.RAW_DATA_DIR)
logger.info("Copying database file %s" % config.DB_FILENAME)
subprocess.call([
'/bin/cp',
config.DB_FILENAME,
version_dir
])
logger.info("Uploading to %s" % upload_path)
subprocess.call([
'/usr/bin/rsync',
'-avz',
version_dir,
upload_path
])