Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""
Args:
patstr (str)
Returns:
List[dict]
"""
pattern = []
for tokpatstr in constants.RE_MATCHER_TOKPAT_DELIM.split(patstr):
parts = tokpatstr.split(":")
if 2 <= len(parts) <= 3:
attr = parts[0]
attr_val = parts[1]
if attr and attr_val:
# handle special bool and int attribute values
special_val = constants.RE_MATCHER_SPECIAL_VAL.match(attr_val)
if special_val:
attr_val = eval(special_val.group(0))
tokpat = {attr: attr_val}
# handle wildcard tokens
else:
tokpat = {}
# handle quantifier ops
try:
op_val = parts[2]
if op_val in constants.MATCHER_VALID_OPS:
tokpat["OP"] = op_val
else:
raise ValueError(
"op={} invalid; valid choices are {}".format(
op_val, constants.MATCHER_VALID_OPS)
)
def __init__(
self,
data_dir=constants.DEFAULT_DATA_DIR.joinpath(NAME),
lang="en",
word_rep="lemmapos",
min_freq=3,
):
super().__init__(NAME, meta=META)
if lang not in self._lang_map:
raise ValueError(
"lang='{}' is invalid; valid options are {}".format(
lang, sorted(self._lang_map.keys()))
)
if word_rep not in self._word_reps:
raise ValueError(
"word_rep='{}' is invalid; valid options are {}".format(
word_rep, self._word_reps)
)
self.lang = lang
def normalize_whitespace(text):
"""
Given ``text``, replace one or more spacings with a single space, and one
or more linebreaks with a single newline. Also strip leading/trailing whitespace.
"""
return constants.RE_NONBREAKING_SPACE.sub(
" ", constants.RE_LINEBREAK.sub(r"\n", text)
).strip()
def replace_urls(text, replace_with="*URL*"):
"""Replace all URLs in ``text`` with ``replace_with``."""
return constants.RE_URL.sub(
replace_with, constants.RE_SHORT_URL.sub(replace_with, text)
)
def replace_emails(text, replace_with="*EMAIL*"):
"""Replace all emails in ``text`` with ``replace_with``."""
return constants.RE_EMAIL.sub(replace_with, text)
def replace_phone_numbers(text, replace_with="*PHONE*"):
"""Replace all phone numbers in ``text`` with ``replace_with``."""
return constants.RE_PHONE.sub(replace_with, text)
def __init__(self, data_dir=constants.DEFAULT_DATA_DIR.joinpath(NAME)):
super().__init__(NAME, meta=META)
self.data_dir = utils.to_path(data_dir).resolve()
self._texts_dirpath = self.data_dir.joinpath("udhr_txt")
self._index_filepath = self._texts_dirpath.joinpath("index.xml")
self._index = None
self.langs = None
def get_subjects_of_verb(verb):
"""Return all subjects of a verb according to the dependency parse."""
subjs = [tok for tok in verb.lefts if tok.dep_ in constants.SUBJ_DEPS]
# get additional conjunct subjects
subjs.extend(tok for subj in subjs for tok in _get_conjuncts(subj))
return subjs
def get_objects_of_verb(verb):
"""
Return all objects of a verb according to the dependency parse,
including open clausal complements.
"""
objs = [tok for tok in verb.rights if tok.dep_ in constants.OBJ_DEPS]
# get open clausal complements (xcomp)
objs.extend(tok for tok in verb.rights if tok.dep_ == "xcomp")
# get additional conjunct objects
objs.extend(tok for obj in objs for tok in _get_conjuncts(obj))
return objs
weighting used in building DepecheMood matrix.
Returns:
Dict[dict]: Top-level keys are Lemma#POS strings, values are nested dicts
with emotion names as keys and weights as floats.
References:
Staiano, J., & Guerini, M. (2014). "DepecheMood: a Lexicon for Emotion
Analysis from Crowd-Annotated News". Proceedings of ACL-2014. (arXiv:1405.1605)
Data available at https://github.com/marcoguerini/DepecheMood/releases .
See Also:
:func:`download_depechemood `
"""
if data_dir is None:
data_dir = os.path.join(constants.DEFAULT_DATA_DIR, "depechemood", "DepecheMood_V1.0")
filepath = os.path.join(
data_dir, "DepecheMood_{weighting}.txt".format(weighting=weighting)
)
delimiter = b"\t" if compat.PY2 else "\t"
# HACK: Py2's csv module fail
try:
with io.open(filepath, mode="rt") as csvfile:
csvreader = csv.reader(csvfile, delimiter=delimiter)
rows = list(csvreader)
except (OSError, IOError):
LOGGER.exception(
"Unable to load DepecheMood from %s."
"\n\nHave you downloaded the data? If not, you can use the "
"`textacy.lexicon_methods.download_depechemood()` function."
"\n\nIf so, have you given the correct `data_dir`? The directory "
"should have a `DepecheMood_V1.0` subdirectory, within which are "