Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
{'name': 'linguistics'},
{'name': 'lod'},
{'name': 'llod'},
]}
if dataset.contact:
md['maintainer_email'] = dataset.contact
if dataset.license:
if 'creativecommons.org/licenses/by/' in dataset.license:
md['license_id'] = 'cc-by-sa'
md['license_title'] = "Creative Commons Attribution Share-Alike"
elif 'creativecommons.org/' in dataset.license and '-nc' in dataset.license:
md['license_id'] = 'cc-nc'
md['license_title'] = "Creative Commons Non-Commercial (Any)"
rdf_md = args.data_file('rdf-metadata.json')
if rdf_md.exists():
rdf_md = jsonlib.load(rdf_md)
md['extras'] = [
{'key': k, 'value': str(rdf_md[k])} for k in rdf_md.keys()
if k.split(':')[0] in ['triples', 'resources', 'links']]
package = datahub('package_update', id=name, **md)
resources = [rsc['name'] for rsc in package['resources']]
if 'VoID description' not in resources:
rsc = datahub(
'resource_create',
package_id=package['id'],
name='VoID description',
url='http://%s/void.ttl' % dataset.domain,
format='meta/void',
mimetype='text/turtle')
assert rsc
def gbs_func(command, args, sources=None): # pragma: no cover
def words(s):
return set(slug(s.strip(), remove_whitespace=False).split())
log = args.log
count = 0
api_url = "https://www.googleapis.com/books/v1/volumes?"
if command == 'cleanup':
for fname in args.data_file('gbs').glob('*.json'):
try:
fname = Path(fname)
data = jsonlib.load(fname)
if data.get('totalItems') == 0:
fname.unlink()
except ValueError:
fname.unlink()
return
if not sources:
sources = DBSession.query(common.Source)\
.order_by(common.Source.id)\
.options(joinedload(common.Source.data))
if callable(sources):
sources = sources()
for i, source in enumerate(page_query(sources, verbose=True, commit=True)):
filepath = args.data_file('gbs', 'source%s.json' % source.id)
try:
q = DBSession.query(rsc.model)
except InvalidRequestError:
args.log.info('... skipping')
continue
for obj in page_query(q.order_by(rsc.model.pk), n=10000, verbose=True):
graph = get_graph(obj, args.env['request'], rsc.name)
count_triples += len(graph)
count_rsc += 1
fp.write(n3(graph, with_head=count_rsc == 1))
args.log.info('... finished')
# put in args.data_file('..', 'static', 'download')?
md = {'path': as_posix(tmp), 'resources': count_rsc, 'triples': count_triples}
md.update(count_links(as_posix(tmp_dump)))
jsonlib.dump(md, args.data_file('rdf-metadata.json'))
print(md)
dataset = Dataset.first()
rdf_dump = args.module_dir.joinpath(
'static', 'download', '%s-dataset.n3' % dataset.id)
tmp_dump.copy(rdf_dump)
check_call('gzip -f %s' % rdf_dump, shell=True)
print(str(rdf_dump))
langs, identifiers = {}, {}
for version in versions:
aggregate(version, langs, identifiers)
for version in versions:
dump(
out.joinpath('glottolog-{0}'.format(version)),
version,
langs,
{pk: list(c) for pk, c in itertools.groupby(identifiers[version], lambda i: i.lpk)})
gc2v = {}
for v in versions:
for gc in sorted(langs[v].keys()):
gc2v[gc] = v
jsonlib.dump(gc2v, out.joinpath('glottocode2version.json'), indent=4)
log.info('%s' % sorted(iwords))
if needs_check:
log.info('------- %s -> %s' % (
source.id, item['volumeInfo'].get('industryIdentifiers')))
log.info('%s %s' % (
item['volumeInfo']['title'], item['volumeInfo'].get('subtitle', '')))
log.info(stitle)
log.info(item['volumeInfo'].get('publishedDate'))
log.info(source.year)
log.info(item['volumeInfo'].get('authors'))
log.info(source.author)
log.info(item['volumeInfo'].get('publisher'))
log.info(source.publisher)
if not confirm('Are the records the same?'):
log.warn('---- removing ----')
jsonlib.dump({"totalItems": 0}, filepath)
elif command == 'update':
source.google_book_search_id = item['id']
source.update_jsondata(gbs=item)
count += 1
elif command == 'download':
if source.author and (source.title or source.booktitle):
title = source.title or source.booktitle
if filepath.exists():
continue
q = [
'inauthor:' + quote_plus(source.author.encode('utf8')),
'intitle:' + quote_plus(title.encode('utf8')),
]
if source.publisher:
q.append('inpublisher:' + quote_plus(
source.publisher.encode('utf8')))
def load(table, csv, engine):
schema = jsonlib.load(csv.parent.joinpath(csv.stem + '.' + CsvmJsonAdapter.extension))
converter = get_converter(schema['tableSchema'], table)
engine.execute(
table.insert(), [converted(d, converter) for d in reader(csv, dicts=True)])
return schema.get("dc:identifier")
db_version = get_alembic_version(DBSession)
for table in Base.metadata.sorted_tables:
csv = dump_dir.joinpath('%s.csv' % table.name)
if with_history or not table.name.endswith('_history'):
_freeze(table, csv)
if csv.exists():
csvm = '%s.%s' % (table.name, CsvmJsonAdapter.extension)
doc = CsvmJsonAdapter.csvm_doc(
csvm, args.env['request'], [(col.name, col) for col in table.columns])
if db_version:
# We (ab)use a dc:identifier property to pass the alembic revision of the
# database to the unfreeze script.
doc["dc:identifier"] = db_version # pragma: no cover
jsonlib.dump(doc, dump_dir.joinpath(csvm))
with ZipFile(
as_posix(args.data_file('..', 'data.zip')), 'w', ZIP_DEFLATED) as zipfile:
for f in dump_dir.iterdir():
if f.is_file():
with f.open('rb') as fp:
zipfile.writestr(f.name, fp.read())
def __json__(self, req):
"""Custom JSON serialization of an object.
:param req: pyramid Request object.
:return: ``dict`` suitable for serialization as JSON.
"""
exclude = {'active', 'version', 'created', 'updated', 'polymorphic_type'}
cols = [
col.key for om in inspect(self).mapper.iterate_to_root()
for col in om.local_table.c
if col.key not in exclude and not exclude.add(col.key)]
return {col: jsonlib.format(getattr(self, col)) for col in cols}