Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def prepare_name(cat, name, version='CLEAN'):
""" Cleans up the name
"""
name = clean_name(name)
if version.lower() == 'clean':
sc_name = cat(name)
tokens = [str(t.lemma_).lower() for t in sc_name if not t._.is_punct
and not t._.to_skip]
if version.lower() == 'raw':
sc_name = cat(name)
tokens = [t.lower_ for t in sc_name if not t._.is_punct
and not (t._.to_skip and not t.is_stop)]
if version.lower() == 'none':
sc_name = cat(name)
tokens = [t.lower_ for t in sc_name]
if cui not in self.cdb.cui2names:
_new_cuis.add(cui)
if (version == "RAW" and skip_raw) or \
(only_existing and cui not in self.cdb.cui2names):
continue
# Save originals
pretty_name = _name
original_name = _name
name = _name
if version == "CLEAN" and add_cleaner is not None:
name = add_cleaner(name)
name = clean_name(name)
# Clean and preprocess the name
sc_name = self.nlp(name)
if version == 'CLEAN':
tokens = [str(t.lemma_).lower() for t in sc_name if not t._.is_punct
and not t._.to_skip]
elif version == 'RAW':
tokens = [str(t.lower_) for t in sc_name if not t._.is_punct
and not t._.to_skip]
tokens_vocab = [t.lower_ for t in sc_name if not t._.is_punct]
# Don't allow concept names to be above concept_length_limit
if len(tokens) > self.CONCEPT_LENGTH_LIMIT:
continue