Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
title = source.title or source.booktitle
if filepath.exists():
continue
q = [
'inauthor:' + quote_plus(source.author.encode('utf8')),
'intitle:' + quote_plus(title.encode('utf8')),
]
if source.publisher:
q.append('inpublisher:' + quote_plus(
source.publisher.encode('utf8')))
url = api_url + 'q=%s&key=%s' % ('+'.join(q), args.api_key)
count += 1
r = requests.get(url, headers={'accept': 'application/json'})
log.info('%s - %s' % (r.status_code, url))
if r.status_code == 200:
with open(as_posix(filepath), 'w') as fp:
fp.write(r.text.encode('utf8'))
elif r.status_code == 403:
log.warn("limit reached")
break
if command == 'update':
log.info('assigned gbs ids for %s out of %s sources' % (count, i))
elif command == 'download':
log.info('queried gbs for %s sources' % count)
for rsc in RESOURCES:
args.log.info('Resource type %s ...' % rsc.name)
try:
q = DBSession.query(rsc.model)
except InvalidRequestError:
args.log.info('... skipping')
continue
for obj in page_query(q.order_by(rsc.model.pk), n=10000, verbose=True):
graph = get_graph(obj, args.env['request'], rsc.name)
count_triples += len(graph)
count_rsc += 1
fp.write(n3(graph, with_head=count_rsc == 1))
args.log.info('... finished')
# put in args.data_file('..', 'static', 'download')?
md = {'path': as_posix(tmp), 'resources': count_rsc, 'triples': count_triples}
md.update(count_links(as_posix(tmp_dump)))
jsonlib.dump(md, args.data_file('rdf-metadata.json'))
print(md)
dataset = Dataset.first()
rdf_dump = args.module_dir.joinpath(
'static', 'download', '%s-dataset.n3' % dataset.id)
tmp_dump.copy(rdf_dump)
check_call('gzip -f %s' % rdf_dump, shell=True)
print(str(rdf_dump))
def llod_func(args): # pragma: no cover
"""Create an RDF dump and compute some statistics about it."""
tmp = Path(mkdtemp())
count_rsc = 0
count_triples = 0
tmp_dump = tmp.joinpath('rdf.n3')
with open(as_posix(tmp_dump), 'w') as fp:
for rsc in RESOURCES:
args.log.info('Resource type %s ...' % rsc.name)
try:
q = DBSession.query(rsc.model)
except InvalidRequestError:
args.log.info('... skipping')
continue
for obj in page_query(q.order_by(rsc.model.pk), n=10000, verbose=True):
graph = get_graph(obj, args.env['request'], rsc.name)
count_triples += len(graph)
count_rsc += 1
fp.write(n3(graph, with_head=count_rsc == 1))
args.log.info('... finished')
# put in args.data_file('..', 'static', 'download')?
md = {'path': as_posix(tmp), 'resources': count_rsc, 'triples': count_triples}
def unfreeze_func(args, engine=None):
try:
importlib.import_module(args.module.__name__)
except ImportError:
pass # pragma: no cover
engine = engine or DBSession.get_bind()
data_dir = Path(mkdtemp())
with ZipFile(as_posix(args.module_dir.joinpath('..', 'data.zip'))) as fp:
fp.extractall(as_posix(data_dir))
db_version = None
for table in Base.metadata.sorted_tables:
csv = data_dir.joinpath('%s.csv' % table.name)
if csv.exists():
db_version = load(table, csv, engine)
if db_version:
set_alembic_version(engine, db_version) # pragma: no cover
rmtree(data_dir)
csv = dump_dir.joinpath('%s.csv' % table.name)
if with_history or not table.name.endswith('_history'):
_freeze(table, csv)
if csv.exists():
csvm = '%s.%s' % (table.name, CsvmJsonAdapter.extension)
doc = CsvmJsonAdapter.csvm_doc(
csvm, args.env['request'], [(col.name, col) for col in table.columns])
if db_version:
# We (ab)use a dc:identifier property to pass the alembic revision of the
# database to the unfreeze script.
doc["dc:identifier"] = db_version # pragma: no cover
jsonlib.dump(doc, dump_dir.joinpath(csvm))
with ZipFile(
as_posix(args.data_file('..', 'data.zip')), 'w', ZIP_DEFLATED) as zipfile:
for f in dump_dir.iterdir():
if f.is_file():
with f.open('rb') as fp:
zipfile.writestr(f.name, fp.read())
args.log.info('Resource type %s ...' % rsc.name)
try:
q = DBSession.query(rsc.model)
except InvalidRequestError:
args.log.info('... skipping')
continue
for obj in page_query(q.order_by(rsc.model.pk), n=10000, verbose=True):
graph = get_graph(obj, args.env['request'], rsc.name)
count_triples += len(graph)
count_rsc += 1
fp.write(n3(graph, with_head=count_rsc == 1))
args.log.info('... finished')
# put in args.data_file('..', 'static', 'download')?
md = {'path': as_posix(tmp), 'resources': count_rsc, 'triples': count_triples}
md.update(count_links(as_posix(tmp_dump)))
jsonlib.dump(md, args.data_file('rdf-metadata.json'))
print(md)
dataset = Dataset.first()
rdf_dump = args.module_dir.joinpath(
'static', 'download', '%s-dataset.n3' % dataset.id)
tmp_dump.copy(rdf_dump)
check_call('gzip -f %s' % rdf_dump, shell=True)
print(str(rdf_dump))
def unfreeze_func(args, engine=None):
try:
importlib.import_module(args.module.__name__)
except ImportError:
pass # pragma: no cover
engine = engine or DBSession.get_bind()
data_dir = Path(mkdtemp())
with ZipFile(as_posix(args.module_dir.joinpath('..', 'data.zip'))) as fp:
fp.extractall(as_posix(data_dir))
db_version = None
for table in Base.metadata.sorted_tables:
csv = data_dir.joinpath('%s.csv' % table.name)
if csv.exists():
db_version = load(table, csv, engine)
if db_version:
set_alembic_version(engine, db_version) # pragma: no cover
rmtree(data_dir)