How to use pycldf - 10 common examples

To help you get started, we’ve selected a few pycldf examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github clld / clld / tests / test_web_adapters_cldf.py View on Github external
def test_CldfDownload(env, tmppath, mocker, capsys):
    from clld.web.adapters.cldf import CldfDownload

    mocker.patch('clld.web.adapters.cldf.transaction')
    tmp = tmppath / 'dl.zip'
    dl = CldfDownload(Dataset, 'clld')
    dl.create(env['request'], outfile=tmp, verbose=True)
    out, err = capsys.readouterr()
    assert 'Value' in out

    outdir = tmppath / 'cldf'
    with ZipFile(tmp.as_posix()) as zip:
        assert 'Wordlist-metadata.json' in zip.namelist()
        zip.extractall(str(outdir))

    ds = CldfDataset.from_metadata(outdir.joinpath('Wordlist-metadata.json'))
    assert ds.module == 'Wordlist'
    values = list(ds[ds.primary_table])
    assert len(values) == 3
    for v in values:
        list(ds.sources.expand_refs(v['Source']))
github lingpy / lingpy / lingpy / basic / wordlist.py View on Github external
}
        kwargs.update(kw)
        
        if isinstance(namespace, tuple):
            namespace = dict(namespace)

        # get the datatypes from configuration as to namespace
        datatypes = read_conf(kwargs['conf'])[1]

        # Load the dataset.
        fname = Path(path)
        if not fname.exists():
            raise compat.FileNotFoundError(
                '{:} does not exist'.format(fname))
        if fname.suffix == '.json':
            dataset = pycldf.dataset.Dataset.from_metadata(fname)
        else:
            dataset = pycldf.dataset.Dataset.from_data(fname)

        if dataset.module == "Wordlist":
            # First, make a list of cognate codes if they are in a separate table.
            cognateset_assignments = {}
            try:
                form_reference = dataset["CognateTable", "formReference"].name
                for row in dataset["CognateTable"].iterdicts():
                    cognateset_assignments[row[form_reference]] = row
            except KeyError:
                # Either there are no cognate codes, or they are in the form
                # table. Both options are fine.
                pass

            f_id = dataset["FormTable", "id"].name
github lingpy / lingpy / lingpy / basic / wordlist.py View on Github external
if isinstance(namespace, tuple):
            namespace = dict(namespace)

        # get the datatypes from configuration as to namespace
        datatypes = read_conf(kwargs['conf'])[1]

        # Load the dataset.
        fname = Path(path)
        if not fname.exists():
            raise compat.FileNotFoundError(
                '{:} does not exist'.format(fname))
        if fname.suffix == '.json':
            dataset = pycldf.dataset.Dataset.from_metadata(fname)
        else:
            dataset = pycldf.dataset.Dataset.from_data(fname)

        if dataset.module == "Wordlist":
            # First, make a list of cognate codes if they are in a separate table.
            cognateset_assignments = {}
            try:
                form_reference = dataset["CognateTable", "formReference"].name
                for row in dataset["CognateTable"].iterdicts():
                    cognateset_assignments[row[form_reference]] = row
            except KeyError:
                # Either there are no cognate codes, or they are in the form
                # table. Both options are fine.
                pass

            f_id = dataset["FormTable", "id"].name

            # Access columns by type, not by name.
github cldf / cldf / examples / wals / cldf.py View on Github external
def write_cldf(req, contrib, valuesets, features, outdir):
    ds = Dataset('wals-chapter-%s' % contrib.id)
    ds.fields = (
        'ID',
        'Language_ID',
        'Language_name',
        'Parameter_ID',
        'Value',
        'DomainElement',
        'Source',
        'Comment')
    ds.table.schema.aboutUrl = url_template(req, 'valueset', 'ID')
    ds.table.schema.columns['Language_ID'].valueUrl = Identifier(
        type='glottolog', name='{Language_ID}').url()
    ds.table.schema.columns['Parameter_ID'].valueUrl = url_template(
        req, 'parameter', 'Parameter_ID')

    ds.metadata['dc:bibliographicCitation '] = text_citation(req, contrib)
github lmaurits / BEASTling / beastling / fileio / datareaders.py View on Github external
Parameters
    ----------
    fname : str or Path
        Path to a CLDF dataset

    Returns
    -------
    Dataset
    """
    fname = Path(fname)
    if not fname.exists():
        raise FileNotFoundError(
            '{:} does not exist'.format(fname))
    if fname.suffix == '.json':
        return pycldf.dataset.Dataset.from_metadata(fname)
    return pycldf.dataset.Dataset.from_data(fname)
github lmaurits / BEASTling / beastling / fileio / datareaders.py View on Github external
Parameters
    ----------
    fname : str or Path
        Path to a CLDF dataset

    Returns
    -------
    Dataset
    """
    fname = Path(fname)
    if not fname.exists():
        raise FileNotFoundError(
            '{:} does not exist'.format(fname))
    if fname.suffix == '.json':
        return pycldf.dataset.Dataset.from_metadata(fname)
    return pycldf.dataset.Dataset.from_data(fname)
github cldf / cldf / examples / wals / cldf.py View on Github external
def bibrecord2source(req, src):
    rec = src.bibtex()
    rec['wals_url'] = req.resource_url(src)
    return Source(rec.genre.value if rec.genre else 'misc', rec.id, **dict(rec.items()))
github clld / clld / src / clld / web / adapters / cldf.py View on Github external
def source2source(req, source):
    """Harmonize the different Source implementations in clld and pycldf."""
    bibrecord = source.bibtex()
    fields = OrderedDict({'%s_url' % req.dataset.id: req.resource_url(source)})
    for key, value in bibrecord.items():
        fields[key] = '; '.join(value) if isinstance(value, list) else value
    return sources.Source(
        getattr(bibrecord.genre, 'value', bibrecord.genre) if bibrecord.genre else 'misc',
        source.id,
        **fields)
github clld / clld / src / clld / web / adapters / cldf.py View on Github external
def create(self, req, filename=None, verbose=True, outfile=None):
        cldf_cfg = req.registry.getUtility(ICldfConfig)

        with TemporaryDirectory() as tmpd:
            cls = getattr(dataset, cldf_cfg.module)
            ds = cls.in_dir(tmpd)
            ds.properties['dc:bibliographicCitation'] = text_citation(req, req.dataset)
            ds.properties['dc:publisher'] = '%s, %s' % (
                req.dataset.publisher_name, req.dataset.publisher_place)
            ds.properties['dc:license'] = req.dataset.license
            ds.properties['dc:issued'] = req.dataset.published.isoformat()
            ds.properties['dc:title'] = req.dataset.name
            ds.properties['dc:creator'] = req.dataset.formatted_editors()
            ds.properties['dc:identifier'] = req.resource_url(req.dataset)
            ds.properties['dcat:accessURL'] = req.route_url('download')
            if DBSession.query(Sentence).count():
                ds.add_component('ExampleTable')
            if DBSession.query(DomainElement).count():
                ds.add_component('CodeTable', {'name': 'Number', 'datatype': 'integer'})
            ds.add_component('ParameterTable')
            ds.add_component('LanguageTable')
github lingpy / lingpy / lingpy / convert / cldf.py View on Github external
form : str (default="ipa")
        The column in which the unsegmented phonetic strings are stored.
    note : str (default=None)
        The column in which you store your comments.
    form_in_source : str (default=None)
        The column in which you store the original form in the source.
    source : str (default=None)
        The column in which you store your source information. 
    alignment : str (default="alignment")
        The column in which you store the alignments.
    """
    if not cldf:
        raise ValueError('The package pycldf needs to be installed')

    # create cldf-dataset
    ds = CLDF_Wordlist.in_dir(path)
    # add sources if they are available
    ds.add_sources(
            read_text(source_path) if source_path else '')
    # add components
    ds.add_component('LanguageTable')
    ds.add_component('ParameterTable', 'Concepticon_ID')
    ds.add_component('CognateTable')
    ds.add_columns('FormTable', 'form_in_source')

    languages, parameters, forms, cognates = {}, {}, [], []
    for idx in wordlist:
        lid = slug(wordlist[idx, 'doculect'])
        if lid not in languages:
            languages[lid] = dict(
                    ID=lid,
                    Name=wordlist[idx, 'doculect'],

pycldf

A python library to read and write CLDF datasets

Apache-2.0
Latest version published 6 days ago

Package Health Score

70 / 100
Full package analysis