Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def main():
# Create the database
with LanguageDB(data_filename('subtags.db')) as db:
db.setup()
load_cldr(db, Path(data_filename('cldr')))
load_registry(db, parse_registry(), 'en')
load_custom_aliases(db, Path(data_filename('aliases.csv')))
load_wiktionary_codes(db, 'en', Path(data_filename('wiktionary/codes-en.csv')))
def main():
# Create the database
with LanguageDB(data_filename('subtags.db')) as db:
db.setup()
load_cldr(db, Path(data_filename('cldr')))
load_registry(db, parse_registry(), 'en')
load_custom_aliases(db, Path(data_filename('aliases.csv')))
load_wiktionary_codes(db, 'en', Path(data_filename('wiktionary/codes-en.csv')))
def parent_locales(self):
"""
CLDR's list of which locales are "parents" of other locales.
"""
pl_json = json.load(
open(data_filename('cldr/supplemental/parentLocales.json'), encoding='ascii')
)
return pl_json['supplemental']['parentLocales']['parentLocale']
def parse_registry():
"""
Yield a sequence of dictionaries, containing the info in the included
IANA subtag registry file.
"""
with open(data_filename('language-subtag-registry.txt'),
encoding='utf-8') as data_file:
# 'yield from' instead of returning, so that we only close the file
# when finished.
yield from parse_file(data_file)
A small amount of fuzzy matching is supported: if the name can be
shortened or lengthened to match a single language name, you get that
language. This allows, for example, "Hakka Chinese" to match "Hakka".
Occasionally, names are ambiguous in a way that can be resolved by
specifying what name the language is supposed to be in. For example,
there is a language named 'Malayo' in English, but it's different from
the language named 'Malayo' in Spanish (which is Malay). Specifying the
language will look up the name in a trie that is only in that language.
"""
assert '/' not in language, "Language codes cannot contain slashes"
assert '-' not in language, "This code should be reduced to a language subtag only"
trie_name = '{}/name_to_{}'.format(language, category)
if trie_name not in TRIES:
TRIES[trie_name] = load_trie(data_filename('trie/{}.marisa'.format(trie_name)))
trie = TRIES[trie_name]
lookup = normalize_name(name)
if lookup in trie:
return get_trie_value(trie, lookup)
else:
# Is this a language plus extra junk? Maybe it has "...isch", "... language",
# or "... Chinese" attached to it, for example.
prefixes = trie.prefixes(lookup)
if prefixes and len(prefixes[-1]) >= 4:
return get_trie_value(trie, prefixes[-1])
else:
return None
def main():
# Create the database
with LanguageDB(data_filename('subtags.db')) as db:
db.setup()
load_cldr(db, Path(data_filename('cldr')))
load_registry(db, parse_registry(), 'en')
load_custom_aliases(db, Path(data_filename('aliases.csv')))
load_wiktionary_codes(db, 'en', Path(data_filename('wiktionary/codes-en.csv')))
def main():
# Create the database
with LanguageDB(data_filename('subtags.db')) as db:
db.setup()
load_cldr(db, Path(data_filename('cldr')))
load_registry(db, parse_registry(), 'en')
load_custom_aliases(db, Path(data_filename('aliases.csv')))
load_wiktionary_codes(db, 'en', Path(data_filename('wiktionary/codes-en.csv')))