Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_download_files_with_bad_urls():
bad_urls = ["http://foobar"] * 6
with TemporaryDirectory() as tmpdir, patch("requests.Session.get") as mock_get:
mock_get.return_value.ok = False
with pytest.raises(RuntimeError):
download_files(bad_urls, Path(tmpdir))
def test_download_files(directory_of_mbox_files, dry_run):
assert directory_of_mbox_files # so that the files are already present
# Try to re-download files already downloaded by the fixture
url_template = (
"https://mail-archives.apache.org/mod_mbox/httpd-users/20190{month}.mbox"
)
path = Path("/tmp/libratom/test_data/httpd-users")
urls = [url_template.format(month=i) for i in range(1, 7)]
download_files(urls, path, dry_run=dry_run)
"message",
"model",
"multipart",
"text",
"video",
]
# CSV files to download
urls = [
f"https://www.iana.org/assignments/media-types/{registry}.csv"
for registry in media_type_registries
]
with TemporaryDirectory() as tmpdir:
directory = Path(tmpdir)
download_files(urls, directory, dry_run=False)
for file in directory.glob("*.csv"):
with file.open(newline="") as csvfile:
reader = csv.reader(csvfile)
# Use the first token (Name) in each row, skip headers
# The split is to strip DEPRECATED/OBSOLETED/... mentions appended to the name
for [name, *_] in reader:
if name != "Name":
media_types.append(f"{file.stem}/{name.split(maxsplit=1)[0]}")
with out.open(mode="w") as f:
json.dump(sorted(media_types), f, indent=4)
names = [
"albert_meyers",
"andrea_ring",
"andrew_lewis",
"andy_zipper",
"chris_dorland",
"jason_wolfe",
"vkaminski"
]
# CSV files to download
urls = [
f"{ENRON_DATASET_URL}/{name}.zip" for name in names
]
download_files(urls, CACHED_ENRON_DATA_DIR, dry_run=False)