How to use the langcodes.util.data_filename function in langcodes

To help you get started, we’ve selected a few langcodes examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github LuminosoInsight / langcodes / langcodes / load_subtags.py View on Github external
def main():
    # Create the database
    with LanguageDB(data_filename('subtags.db')) as db:
        db.setup()
        load_cldr(db, Path(data_filename('cldr')))
        load_registry(db, parse_registry(), 'en')
        load_custom_aliases(db, Path(data_filename('aliases.csv')))
        load_wiktionary_codes(db, 'en', Path(data_filename('wiktionary/codes-en.csv')))
github LuminosoInsight / langcodes / langcodes / load_subtags.py View on Github external
def main():
    # Create the database
    with LanguageDB(data_filename('subtags.db')) as db:
        db.setup()
        load_cldr(db, Path(data_filename('cldr')))
        load_registry(db, parse_registry(), 'en')
        load_custom_aliases(db, Path(data_filename('aliases.csv')))
        load_wiktionary_codes(db, 'en', Path(data_filename('wiktionary/codes-en.csv')))
github LuminosoInsight / langcodes / langcodes / db.py View on Github external
def parent_locales(self):
        """
        CLDR's list of which locales are "parents" of other locales.
        """
        pl_json = json.load(
            open(data_filename('cldr/supplemental/parentLocales.json'), encoding='ascii')
        )
        return pl_json['supplemental']['parentLocales']['parentLocale']
github LuminosoInsight / langcodes / langcodes / registry_parser.py View on Github external
def parse_registry():
    """
    Yield a sequence of dictionaries, containing the info in the included
    IANA subtag registry file.
    """
    with open(data_filename('language-subtag-registry.txt'),
              encoding='utf-8') as data_file:
        # 'yield from' instead of returning, so that we only close the file
        # when finished.
        yield from parse_file(data_file)
github LuminosoInsight / langcodes / langcodes / names.py View on Github external
A small amount of fuzzy matching is supported: if the name can be
    shortened or lengthened to match a single language name, you get that
    language. This allows, for example, "Hakka Chinese" to match "Hakka".

    Occasionally, names are ambiguous in a way that can be resolved by
    specifying what name the language is supposed to be in. For example,
    there is a language named 'Malayo' in English, but it's different from
    the language named 'Malayo' in Spanish (which is Malay). Specifying the
    language will look up the name in a trie that is only in that language.
    """
    assert '/' not in language, "Language codes cannot contain slashes"
    assert '-' not in language, "This code should be reduced to a language subtag only"
    trie_name = '{}/name_to_{}'.format(language, category)
    if trie_name not in TRIES:
        TRIES[trie_name] = load_trie(data_filename('trie/{}.marisa'.format(trie_name)))

    trie = TRIES[trie_name]
    lookup = normalize_name(name)
    if lookup in trie:
        return get_trie_value(trie, lookup)
    else:
        # Is this a language plus extra junk? Maybe it has "...isch", "... language",
        # or "... Chinese" attached to it, for example.
        prefixes = trie.prefixes(lookup)
        if prefixes and len(prefixes[-1]) >= 4:
            return get_trie_value(trie, prefixes[-1])
        else:
            return None
github LuminosoInsight / langcodes / langcodes / load_subtags.py View on Github external
def main():
    # Create the database
    with LanguageDB(data_filename('subtags.db')) as db:
        db.setup()
        load_cldr(db, Path(data_filename('cldr')))
        load_registry(db, parse_registry(), 'en')
        load_custom_aliases(db, Path(data_filename('aliases.csv')))
        load_wiktionary_codes(db, 'en', Path(data_filename('wiktionary/codes-en.csv')))
github LuminosoInsight / langcodes / langcodes / load_subtags.py View on Github external
def main():
    # Create the database
    with LanguageDB(data_filename('subtags.db')) as db:
        db.setup()
        load_cldr(db, Path(data_filename('cldr')))
        load_registry(db, parse_registry(), 'en')
        load_custom_aliases(db, Path(data_filename('aliases.csv')))
        load_wiktionary_codes(db, 'en', Path(data_filename('wiktionary/codes-en.csv')))