Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_groups_by_member(
self, member: Union[int, EntryType]) -> Set[GroupType]:
groups: Set[GroupType] = set()
if isinstance(member, Entry):
tid = member.tid
if tid is None:
raise ValueError("Argument member has no tid. "
"Have you add this entry into the datapack?")
elif isinstance(member, int):
tid = member
else:
raise TypeError("Can only get group via entry id (int) or the "
"group object itself (Entry).")
if not self.index.group_index_on:
self.index.build_group_index(self.groups)
for tid in self.index.group_index(tid):
entry: EntryType = self.get_entry(tid)
if self.validate_group(entry):
pack_name: An name to identify the data pack, which is helpful in
situation like serialization. It is suggested that the packs should
have different doc ids.
language: The language used by this data pack, default is English.
span_unit: The unit used for interpreting the Span object of this
data pack. Default is character.
"""
def __init__(self, pack_name: Optional[str] = None,
language: str = 'eng', span_unit: str = 'character'):
super().__init__(pack_name)
self.language = language
self.span_unit = span_unit
class DataPack(BasePack[Entry, Link, Group]):
# pylint: disable=too-many-public-methods
r"""A :class:`DataPack` contains a piece of natural language text and a
collection of NLP entries (annotations, links, and groups). The natural
language text could be a document617, paragraph or in any other granularity.
Args:
pack_manager(PackManager): A manager that records global
information of packs, such as pack ids.
pack_name (str, optional): A name for this data pack.
"""
def __init__(self, pack_manager: PackManager,
pack_name: Optional[str] = None):
super().__init__(pack_manager, pack_name)
self._text = ""
"Generics",
"Annotation",
"Group",
"Link",
"MultiPackGeneric",
"MultiPackGroup",
"MultiPackLink",
"Query",
"SinglePackEntries",
"MultiPackEntries",
]
QueryType = Union[Dict[str, Any], np.ndarray]
class Generics(Entry):
def __init__(self, pack: PackType):
super().__init__(pack=pack)
@total_ordering
class Annotation(Entry):
r"""Annotation type entries, such as "token", "entity mention" and
"sentence". Each annotation has a :class:`Span` corresponding to its offset
in the text.
Args:
pack (PackType): The container that this annotation
will be added to.
begin (int): The offset of the first character in the annotation.
end (int): The offset of the last character in the annotation + 1.
"""
"MultiPackLink",
"Query",
"SinglePackEntries",
"MultiPackEntries",
]
QueryType = Union[Dict[str, Any], np.ndarray]
class Generics(Entry):
def __init__(self, pack: PackType):
super().__init__(pack=pack)
@total_ordering
class Annotation(Entry):
r"""Annotation type entries, such as "token", "entity mention" and
"sentence". Each annotation has a :class:`Span` corresponding to its offset
in the text.
Args:
pack (PackType): The container that this annotation
will be added to.
begin (int): The offset of the first character in the annotation.
end (int): The offset of the last character in the annotation + 1.
"""
def __init__(self, pack: PackType, begin: int, end: int):
self._span: Optional[Span]
self.set_span(begin, end)
super().__init__(pack)
def get_child(self) -> Entry:
r"""Get the child entry of the link.
Returns:
An instance of :class:`Entry` that is the child of the link.
"""
if self._child is None:
raise IncompleteEntryError("The parent of this link is not set.")
pack_idx, child_tid = self._child
return self.pack.get_subentry(pack_idx, child_tid)
# pylint: disable=duplicate-bases
class MultiPackGroup(MultiEntry, BaseGroup[Entry]):
r"""Group type entries, such as "coreference group". Each group has a set
of members.
"""
MemberType: Type[Entry] = Entry
def __init__(
self, pack: PackType, members: Optional[Iterable[Entry]] = None
): # pylint: disable=useless-super-delegation
self._members: List[Tuple[int, int]] = []
super().__init__(pack)
if members is not None:
self.add_members(members)
def add_member(self, member: Entry):
if not isinstance(member, self.MemberType):
raise TypeError(
Returns:
An instance of :class:`Entry` that is the child of the link.
"""
if self._child is None:
raise IncompleteEntryError("The parent of this link is not set.")
pack_idx, child_tid = self._child
return self.pack.get_subentry(pack_idx, child_tid)
# pylint: disable=duplicate-bases
class MultiPackGroup(MultiEntry, BaseGroup[Entry]):
r"""Group type entries, such as "coreference group". Each group has a set
of members.
"""
MemberType: Type[Entry] = Entry
def __init__(
self, pack: PackType, members: Optional[Iterable[Entry]] = None
): # pylint: disable=useless-super-delegation
self._members: List[Tuple[int, int]] = []
super().__init__(pack)
if members is not None:
self.add_members(members)
def add_member(self, member: Entry):
if not isinstance(member, self.MemberType):
raise TypeError(
f"The members of {type(self)} should be "
f"instances of {self.MemberType}, but got {type(member)}")
self._members.append(
]
MdRequest = Dict[
Type[Union[MultiPackLink, MultiPackGroup]],
Union[Dict, List]
]
class MultiPackMeta(BaseMeta):
r"""Meta information of a MultiPack."""
pass
# pylint: disable=too-many-public-methods
class MultiPack(BasePack[Entry, MultiPackLink, MultiPackGroup]):
r"""A :class:`MultiPack' contains multiple DataPacks and a collection of
cross-pack entries (links, and groups)
"""
def __init__(self, pack_manager: PackManager,
pack_name: Optional[str] = None):
super().__init__(pack_manager, pack_name)
# Store the global ids.
self._pack_ref: List[int] = []
# Store the reverse mapping from global id to the pack index.
self._inverse_pack_ref: Dict[int, int] = {}
# Store the pack names.
self._pack_names: List[str] = []
# Store the reverse mapping from name to the pack index.
"""
return (str(type(self)), self._tid) < (str(type(other)), other.tid)
def __hash__(self) -> int:
r"""The hash function for :class:`Entry` objects.
To be implemented in each subclass.
"""
return hash((type(self), self._tid))
@property
def index_key(self) -> Hashable:
# Think about how to use the index key carefully.
return self._tid
class MultiEntry(Entry, ABC):
def __setattr__(self, key, value):
"""
Handle the special sub-entry case in the multi pack case.
Args:
key:
value:
Returns:
"""
self._check_attr_type(key, value)
if isinstance(value, Entry):
# Save a pointer of the value.
self.__dict__[key] = value.as_pointer(self)
"""
if self.pack is None:
raise ValueError("Cannot get child because link is not"
" attached to any data pack.")
if self._child is None:
raise ValueError("The child of this entry is not set.")
return self.pack.get_entry(self._child)
# pylint: disable=duplicate-bases
class Group(BaseGroup[Entry]):
r"""Group is an entry that represent a group of other entries. For example,
a "coreference group" is a group of coreferential entities. Each group will
store a set of members, no duplications allowed.
"""
MemberType: Type[Entry] = Entry
def __init__(
self,
pack: PackType,
members: Optional[Set[Entry]] = None,
): # pylint: disable=useless-super-delegation
self._members: Set[int] = set()
super().__init__(pack, members)
def add_member(self, member: Entry):
r"""Add one entry to the group.
Args:
member: One member to be added to the group.
"""
if not isinstance(member, self.MemberType):
def as_pointer(self, from_entry: "Entry") -> "Pointer":
"""
Get a pointer of the entry relative to this entry
Args:
from_entry: The entry relative from.
Returns:
A pointer relative to the this entry.
"""
if isinstance(from_entry, MultiEntry):
return Pointer(self.tid)
elif isinstance(from_entry, Entry):
raise ValueError(
"Do not support reference a multi pack entry from an entry.")