Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_download():
url = "https://raw.githubusercontent.com/wkentaro/gdown/3.1.0/gdown/__init__.py" # NOQA
output = "/tmp/gdown_r"
# Usage before https://github.com/wkentaro/gdown/pull/32
assert download(url, output, quiet=False) == output
os.remove(output)
(file_id, False),
True,
),
(
"https://drive.google.com/a/jsk.imi.i.u-tokyo.ac.jp/uc?id={}&export=download".format( # NOQA
file_id
),
(file_id, True),
False,
),
]
for url, expected, check_warn in urls:
if check_warn:
with pytest.warns(UserWarning):
assert parse_url(url) == expected
else:
assert parse_url(url) == expected
def list_creator(path):
dataset = {}
archive_path = gdown.cached_download(url)
maxsize = sys.maxsize
while True:
try:
csv.field_size_limit(maxsize)
break
except OverflowError:
maxsize = int(maxsize / 10)
csv.field_size_limit(maxsize)
with tarfile.open(archive_path, 'r') as archive:
for split in ('train', 'test'):
filename = f'{key}_csv/{split}.csv'
print(f'Processing {filename}...')
reader = csv.reader(
io.TextIOWrapper(archive.extractfile(filename), encoding='utf-8'))
def maybe_download_files(data_dir: str = "data") -> None:
if not os.path.exists(data_dir):
os.makedirs(data_dir, exist_ok=True)
if IS_TEST:
# Sample data pickle
gdown.download(SMALL_DATA_URL, output=SAMPLE_DATA, quiet=None)
else:
# Books
gdown.download(YA_BOOKS_URL, output=BOOK_DATA, quiet=None)
# Interactions
gdown.download(YA_INTERACTIONS_URL, output=INTERACTIONS_DATA, quiet=None)
# Reviews
gdown.download(YA_REVIEWS_URL, output=REVIEWS_DATA, quiet=None)
def download(url, filename, cachedir='~/hashtag/'):
f_cachedir = os.path.expanduser(cachedir)
os.makedirs(f_cachedir, exist_ok=True)
file_path = os.path.join(f_cachedir, filename)
if os.path.isfile(file_path):
print('Using cached model')
return file_path
gdown.download(url, file_path, quiet=False)
return file_path
def cached_download(url, path, md5=None, quiet=False):
def check_md5(path, md5):
print('[{:s}] Checking md5 ({:s})'.format(path, md5))
return md5sum(path) == md5
if osp.exists(path) and not md5:
print('[{:s}] File exists ({:s})'.format(path, md5sum(path)))
return path
elif osp.exists(path) and md5 and check_md5(path, md5):
return path
else:
dirpath = osp.dirname(path)
if not osp.exists(dirpath):
os.makedirs(dirpath)
return gdown.download(url, path, quiet=quiet)
def md5sum(filename, blocksize=65536):
def maybe_download_files(data_dir: str = "data") -> None:
if not os.path.exists(data_dir):
os.makedirs(data_dir, exist_ok=True)
if IS_TEST:
# Sample data pickle
gdown.download(SMALL_DATA_URL, output=SAMPLE_DATA, quiet=None)
else:
# Books
gdown.download(YA_BOOKS_URL, output=BOOK_DATA, quiet=None)
# Interactions
gdown.download(YA_INTERACTIONS_URL, output=INTERACTIONS_DATA, quiet=None)
# Reviews
gdown.download(YA_REVIEWS_URL, output=REVIEWS_DATA, quiet=None)
def cached_download(url, path, md5=None, quiet=False, postprocess=None):
def check_md5(path, md5):
print('[{:s}] Checking md5 ({:s})'.format(path, md5))
return md5sum(path) == md5
if osp.exists(path) and not md5:
print('[{:s}] File exists ({:s})'.format(path, md5sum(path)))
elif osp.exists(path) and md5 and check_md5(path, md5):
pass
else:
dirpath = osp.dirname(path)
if not osp.exists(dirpath):
os.makedirs(dirpath)
gdown.download(url, path, quiet=quiet)
if postprocess is not None:
postprocess(path)
return path
torch_home = _get_torch_home()
model_dir = os.path.join(torch_home, 'checkpoints')
try:
os.makedirs(model_dir)
except OSError as e:
if e.errno == errno.EEXIST:
# Directory already exists, ignore.
pass
else:
# Unexpected OSError, re-raise.
raise
filename = key + '_imagenet.pth'
cached_file = os.path.join(model_dir, filename)
if not os.path.exists(cached_file):
gdown.download(pretrained_urls[key], cached_file, quiet=False)
state_dict = torch.load(cached_file)
model_dict = model.state_dict()
new_state_dict = OrderedDict()
matched_layers, discarded_layers = [], []
for k, v in state_dict.items():
if k.startswith('module.'):
k = k[7:] # discard module.
if k in model_dict and model_dict[k].size() == v.size():
new_state_dict[k] = v
matched_layers.append(k)
else:
discarded_layers.append(k)
def download_data_gdown(path):
import gdown
file_id = "1efHsY16pxK0lBD2gYCgCTnv1Swstq771"
url = f"https://drive.google.com/uc?id={file_id}"
data_zip = os.path.join(path, "data.zip")
gdown.download(url, data_zip, quiet=False)
with zipfile.ZipFile(data_zip, "r") as zip_ref:
zip_ref.extractall(path)
return