Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
For each short language name, figures out its long name.
Arguments:
short_langs --- Array of strings. Each string is the short name of
a language. Should be 3 characters long (more should be fine as
well)
Returns:
A dictionnary: Keys are the short languages name, values are the
corresponding long languages names.
"""
long_langs = {}
for short_lang in short_langs:
try:
try:
country = pycountry.languages.get(terminology=short_lang[:3])
except KeyError:
country = pycountry.languages.get(bibliographic=short_lang[:3])
extra = None
if "_" in short_lang:
extra = short_lang.split("_")[1]
long_lang = country.name
if extra != None:
long_lang += " (%s)" % (extra)
long_langs[short_lang] = long_lang
except KeyError, exc:
print ("Warning: Long name not found for language '%s'."
% (short_lang))
print (" Exception was: %s" % (str(exc)))
print (" Will use short name as long name.")
long_langs[short_lang] = short_lang
return long_langs
def _try_get_language(param, param_name=None):
try:
if param_name == 'alpha2':
return pycountry.languages.get(alpha2=param)
elif param_name == 'bibliographic':
return pycountry.languages.get(bibliographic=param)
elif param_name == 'terminology':
return pycountry.languages.get(terminology=param)
elif param_name == 'common_name':
return pycountry.languages.get(common_name=param)
else:
return pycountry.languages.get(name=param)
except:
return None
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy.dialects.postgresql import UUID
from critiquebrainz.data.model.vote import Vote
from critiquebrainz.data.model.revision import Revision
from critiquebrainz.data.model.mixins import DeleteMixin
from brainzutils import cache
from werkzeug.exceptions import BadRequest
from flask_babel import lazy_gettext
from datetime import datetime, timedelta
from random import shuffle
import pycountry
DEFAULT_LICENSE_ID = u"CC BY-SA 3.0"
supported_languages = []
for lang in list(pycountry.languages):
if 'iso639_1_code' in dir(lang):
supported_languages.append(lang.iso639_1_code)
ENTITY_TYPES = [
'event',
'place',
'release_group',
]
class Review(db.Model, DeleteMixin):
__tablename__ = 'review'
CACHE_NAMESPACE = 'Review'
id = db.Column(UUID, primary_key=True, server_default=db.text('uuid_generate_v4()'))
entity_id = db.Column(UUID, index=True, nullable=False)
'languages': ('/language', lambda x: [pycountry.languages.get(alpha2=x).terminology])
}
rlz_pointer = 8
# Examples of longer RLZ values (2- and 5-char lang codes):
# 1C1CHBF_en-GBGB901GB901
# 1C1GCEU_enUS820US820
if len(rlz_string) >= 11:
# If a dash is in this position, it means it is a 5 char lang code, not the 2 char code
if rlz_string[10] == '-':
language_code = rlz_string[rlz_pointer:rlz_pointer+5]
rlz_pointer += 5
# langcodes was having install issues on macOS; not using it for now in
# order to not complicate Unfurl's install. Pycountry's languages isn't
# as good (only alpha_2 and alpha_3) but better than nothing for now.
# Old implementation:
# language_name = langcodes.Language.get(language_code).language_name()
language_name = pycountry.languages.get(alpha_2=language_code[:2]).name
else:
language_code = rlz_string[rlz_pointer:rlz_pointer+2]
# language_name = langcodes.Language.get(language_code).language_name()
language_name = pycountry.languages.get(alpha_2=language_code).name
rlz_pointer += 2
# Example of RLZ value without cohorts
# 1C1GCEV_en
elif len(rlz_string) == 10:
language_code = rlz_string[rlz_pointer:rlz_pointer + 2]
# language_name = langcodes.Language.get(language_code).language_name()
language_name = pycountry.languages.get(alpha_2=language_code).name
rlz_pointer += 2
else:
def get_iso3(languages):
if languages is None:
languages = []
supported = []
for lang in languages:
if lang is None or len(lang.strip()) not in [2, 3]:
continue
lang = lang.lower().strip()
if len(lang) == 2:
try:
c = pycountry.languages.get(iso639_1_code=lang)
lang = c.iso639_3_code
except KeyError:
continue
supported.append(lang)
supported.append('eng')
return '+'.join(sorted(set(supported)))
def get_language(self, obj):
"""Export language to the Alpha-2 code (if available)."""
lang = obj['metadata'].get('language', None)
if lang:
lang_res = pycountry.languages.get(alpha_3=lang)
if not lang_res or not hasattr(lang_res, 'alpha_2'):
return None
return lang_res.alpha_2
return None
st_lang = self.st_util.get_subtitle_elem(st_dict, 'Language')
if not st_lang:
return
try:
st_lang_obj = pycountry.languages.lookup(st_lang)
except LookupError:
raise CheckException("Subtitle language from XML could not "
"be detected : {}".format(st_lang))
cpl_lang = asset.get('Language')
if not cpl_lang:
return
cpl_lang_obj = pycountry.languages.lookup(cpl_lang)
if not cpl_lang_obj:
raise CheckException("Subtitle language from CPL could not "
"be detected : {}".format(cpl_lang))
if st_lang_obj != cpl_lang_obj:
raise CheckException(
"Subtitle language mismatch, CPL claims {} but XML {}".format(
cpl_lang_obj.name, st_lang_obj.name))
def apply(self, text, evaluation):
'LanguageIdentify[text_String]'
import langid # see https://github.com/saffsd/langid.py
# an alternative: https://github.com/Mimino666/langdetect
import pycountry
code, _ = langid.classify(text.get_string_value())
try:
language = pycountry.languages.get(alpha_2=code)
except KeyError:
evaluation.message('LanguageIdentify', 'langnotfound', String(code))
return Symbol("$Failed")
return String(language.name)