Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def determine_spoken_language(comments):
# Detect spoken language for a string of comments extracted from source code
# Input: comments (string)
# Output: language (string) as a ISO 639-1 code (ex: 'en')
# DetectorFactory Seed forces deterministic results on language assessment
DetectorFactory.seed = 0
language = "unknown"
try:
# Attempt language detection
language = detect(comments)
except Exception as e:
# Return "unknown" if there is not enough information to detect the language
if e.__class__.__name__ in ['LangDetectException']:
pass
# Log unexpected error
else:
logger.info(e.__class__.__name__,"-",e)
return language
import re
from PyQt5.QtCore import *
from PyQt5.QtGui import *
from PyQt5.QtWidgets import *
import chardet
import cchardet
import langdetect
import langid
from wordless_text import wordless_matching
from wordless_utils import wordless_conversion, wordless_misc
# Force consistent results for language detection
langdetect.DetectorFactory.seed = 0
def detect_encoding(main, file_path):
text = b''
success = True
with open(file_path, 'rb') as f:
if main.settings_custom['auto_detection']['detection_settings']['number_lines_no_limit']:
for line in f:
text += line
else:
for i, line in enumerate(f):
if i < main.settings_custom['auto_detection']['detection_settings']['number_lines']:
text += line
else:
break
def __init__(self, max_length=5000, seed=42):
self.max_length = max_length
self.factory = langdetect.DetectorFactory()
self.factory.set_seed(seed)
self.factory.load_profile(langdetect.PROFILES_DIRECTORY)