How to use the clldutils.jsonlib.load function in clldutils

To help you get started, we’ve selected a few clldutils examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github clld / clld / src / clld / scripts / util.py View on Github external
.order_by(common.Source.id)\
            .options(joinedload(common.Source.data))
    if callable(sources):
        sources = sources()

    for i, source in enumerate(page_query(sources, verbose=True, commit=True)):
        filepath = args.data_file('gbs', 'source%s.json' % source.id)

        if command == 'update':
            source.google_book_search_id = None
            source.update_jsondata(gbs={})

        if command in ['verify', 'update']:
            if filepath.exists():
                try:
                    data = jsonlib.load(filepath)
                except ValueError:
                    log.warn('no JSON object found in: %s' % filepath)
                    continue
                if not data['totalItems']:
                    continue
                item = data['items'][0]
            else:
                continue

        if command == 'verify':
            stitle = source.description or source.title or source.booktitle
            needs_check = False
            year = item['volumeInfo'].get('publishedDate', '').split('-')[0]
            if not year or year != slug(source.year or ''):
                needs_check = True
            twords = words(stitle)
github clld / glottolog3 / glottolog3 / initdb.py View on Github external
#
    lgsources = defaultdict(list)
    # Note: We rely on languoids() yielding languoids in the "right" order, i.e. such that top-level
    # nodes will precede nested nodes. This order must be preserved using an `OrderedDict`:
    nodemap = OrderedDict([(l.id, l) for l in glottolog.languoids()])
    lgcodes = {k: v.id for k, v in args.repos.languoids_by_code(nodemap).items()}
    for lang in nodemap.values():
        for ref in lang.sources:
            lgsources['{0.provider}#{0.bibkey}'.format(ref)].append(lang.id)
        load_languoid(glottolog, data, lang, nodemap)

    for gc in glottolog.glottocodes:
        if gc not in data['Languoid'] and gc not in legacy:
            common.Config.add_replacement(gc, None, model=common.Language)

    for obj in jsonlib.load(glottolog.references_path('replacements.json')):
        common.Config.add_replacement(
            '{0}'.format(obj['id']),
            '{0}'.format(obj['replacement']) if obj['replacement'] else None,
            model=common.Source)

    DBSession.flush()

    for doctype in glottolog.hhtypes:
        data.add(
            models.Doctype, doctype.id, id=doctype.id,
            name=doctype.name,
            description=doctype.description,
            abbr=doctype.abbv,
            ord=doctype.rank)

    for bib in glottolog.bibfiles:
github clld / glottolog3 / glottolog3 / initdb.py View on Github external
description=de.description,
            jsondata=dict(geojson=read_macroarea_geojson(args.repos, de.name, de.description)),
        ),
    )
    add('ltype',
        args.repos.language_types.values(),
        name='Language Type',
        dekw=lambda de: dict(name=de.category, description=de.description),
        delookup='category',
    )
    add('country',
        args.repos.countries,
        dekw=lambda de: dict(name=de.id, description=de.name),
    )

    legacy = jsonlib.load(gc2version(args))
    for gc, version in legacy.items():
        data.add(models.LegacyCode, gc, id=gc, version=version)

    #
    # Now load languoid data, keeping track of relations that can only be inserted later.
    #
    lgsources = defaultdict(list)
    # Note: We rely on languoids() yielding languoids in the "right" order, i.e. such that top-level
    # nodes will precede nested nodes. This order must be preserved using an `OrderedDict`:
    nodemap = OrderedDict([(l.id, l) for l in glottolog.languoids()])
    lgcodes = {k: v.id for k, v in args.repos.languoids_by_code(nodemap).items()}
    for lang in nodemap.values():
        for ref in lang.sources:
            lgsources['{0.provider}#{0.bibkey}'.format(ref)].append(lang.id)
        load_languoid(glottolog, data, lang, nodemap)
github clld / glottolog3 / glottolog3 / scripts / fetch_downloads.py View on Github external
"""
Script called from a fab task (running on the server) to fetch downloads after deploying
a new release.
"""
from urllib.request import urlretrieve

from clldutils.jsonlib import load
from clldutils.path import Path, md5

import glottolog3

DOWNLOAD_DIR = Path(glottolog3.__file__).parent.joinpath('static', 'download')

for rel, spec in load(DOWNLOAD_DIR.parent / 'downloads.json').items():
    d = DOWNLOAD_DIR / rel
    if not d.exists():
        d.mkdir()
    for bs in spec['bitstreams']:
        url = 'https://cdstar.shh.mpg.de//bitstreams/{0}/{1}'.format(
            spec['oid'], bs['bitstreamid'])
        target = d.joinpath(bs['bitstreamid'].replace('_', '-'))
        if (not target.exists()) or bs['checksum'] != md5(target):
            print('retrieving {0} {1}'.format(rel, target))
            urlretrieve(url, str(target))
github clld / glottolog3 / glottolog3 / commands / cdstar.py View on Github external
else:
                    bitstream = None
                if bitstream:
                    if bitstream._properties['checksum'] != md5(fname):
                        bitstream.delete()
                    else:
                        skip = True
                        print('skipping {0}'.format(fname.name))
                if not skip:
                    print(fname.name)
                    obj.add_bitstream(fname=fname.as_posix(), name=bsname)
        obj.read()
        cat.add(obj, update=True)

    with update(dlfname, default=collections.OrderedDict(), indent=4, sort_keys=True) as downloads:
        for oid, spec in load(args.catalog).items():
            if 'metadata' in spec and 'title' in spec['metadata']:
                match = title_pattern.match(spec['metadata']['title'])
                if match:
                    if (match.group('version') not in downloads) or match.group('version') == release:
                        args.log.info('update info for release {0}'.format(match.group('version')))
                        spec['oid'] = oid
                        downloads[match.group('version')] = spec
    args.log.info('{0} written'.format(dlfname))
    args.log.info('{0}'.format(args.catalog))