Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def is_eligible(text, n, lng):
"""Returns True if *text* contains at least *n* words in the specified *lng* language"""
for language in detect_langs(text):
if language.lang == lng:
probability = language.prob
word_count = len(text.split(" "))
if probability * word_count > n:
return True
else:
break
return False
def is_in_language(targetlang, text):
# Quick and dirty regex shortcuts for detecting the most common languages
if FINNISH.search(text) is not None:
return (targetlang == 'fi')
if SWEDISH.search(text) is not None:
return (targetlang == 'sv')
if ENGLISH.search(text) is not None:
return (targetlang == 'en')
try:
langs = detect_langs(text)
for lang in langs:
if lang.lang == targetlang:
return True
return False
except:
return False
def get_lang(text):
resu = None
try:
langs = langdetect.detect_langs(text)
for language in langs:
if language.lang == "ru":
language.prob += 0.2
if resu is None or resu < language:
resu = language
except langdetect.lang_detect_exception.LangDetectException:
pass
if resu is None:
return "ru"
return resu.lang
def is_english(text):
if not only_roman_chars(text):
return False
try:
stats = langdetect.detect_langs(text)
except LangDetectException:
return False
if any(stats.lang == 'en' for stats in stats):
return True
subtitle = pysrt.open(path=srt_path, encoding='iso-8859-1')
except Exception:
try:
subtitle = pysrt.open(path=srt_path, encoding='utf-8')
except Exception:
# If we can't read it, we can't detect, so return
return None
# Read first 5 subtitle lines to determine the language
if len(subtitle) >= 5:
text = ''
for sub in subtitle[0:5]:
text += sub.text
# Detect the language with highest probability and return it if it's more than the required minimum probability
detected_languages = langdetect.detect_langs(text)
log.debug('Detected subtitle language(s): %s', detected_languages)
if len(detected_languages) > 0:
# Get first detected language (list is sorted according to probability, highest first)
detected_language = detected_languages[0]
language_probability = detected_language.prob
if language_probability >= autosubliminal.DETECTEDLANGUAGEPROBABILITY:
log.debug('Probability of detected subtitle language accepted: %s', detected_language)
return Language.fromietf(detected_language.lang)
else:
log.debug('Probability of detected subtitle language too low: %s', detected_language)
return None
keychain_module.device = self.device
keychain_module.output = None
keychain_result = keychain_module.run()
keychain_data = keychain_result["keychain_data"]
data = []
for key in keychain_data:
if (key["entitlement_group"] and \
keychain_id in key["entitlement_group"]) or (key["account"] and \
keychain_id in key["account"]) or (key["service"] and \
keychain_id in key["service"]):
data += [str(key['keychain_data'])]
report_data = []
for item in data:
lang = detect_langs(item)[0]
if lang.prob > float("0.{}".format(self.min_percentage)):
report_data += [item]
if report_data:
result.update({
"report": True,
"details": "The following data was found:\n* {}".format(
"\n* ".join(report_data))
})
return {
"{}_result".format(self.name()): result
}
subtitle = pysrt.open(path=srt_path, encoding='iso-8859-1')
except:
try:
subtitle = pysrt.open(path=srt_path, encoding='utf-8')
except:
# If we can't read it, we can't detect, so return
return None
# Read first 5 subtitle lines to determine the language
if len(subtitle) >= 5:
text = ""
for sub in subtitle[0:5]:
text += sub.text
# Detect the language with highest probability and return it if it's more than the required minimum probability
detected_languages = langdetect.detect_langs(text)
log.debug("Detected subtitle language(s): %s", detected_languages)
if len(detected_languages) > 0:
# Get first detected language (list is sorted according to probability, highest first)
detected_language = detected_languages[0]
language_probability = detected_language.prob
if language_probability >= autosubliminal.DETECTEDLANGUAGEPROBABILITY:
log.debug("Probability of detected subtitle language accepted: %s" % detected_language)
return Language.fromietf(detected_language.lang)
else:
log.debug("Probability of detected subtitle language too low: %s" % detected_language)
return None
def predict_proba(self, text: str) -> List[Dict[str, Any]]:
"""
Predicting probability of languages of a text.
Parameters
----------
text : str
"""
probabilities = detect_langs(text)
converted = []
for el in probabilities:
converted.append({"lang": self.map2wili(el.lang), "prob": el.prob})
return converted