How to use the forte.data.span.Span function in forte

To help you get started, we’ve selected a few forte examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github asyml / forte / forte / data / readers / html_reader.py View on Github external
def collect_span(self, begin, end):
        self.spans.append((Span(begin, end), ''))
github asyml / forte / forte / data / data_pack.py View on Github external
inverse_span, original_span = self.processed_original_spans[-1]
                if inverse_span.end <= input_index < len_processed_text:
                    increment = original_span.end - inverse_span.end
                    orig_index = input_index + increment
                else:
                    # check if there input_index is not valid given the
                    # alignment mode or lies outside the processed string
                    raise ValueError(f"The input span either does not adhere "
                                     f"to the {align_mode} alignment mode or "
                                     f"lies outside to the processed string.")
            return orig_index

        orig_begin = get_original_index(req_begin, True, align_mode)
        orig_end = get_original_index(req_end - 1, False, align_mode) + 1

        return Span(orig_begin, orig_end)
github asyml / forte / forte / data / data_utils_io.py View on Github external
Returns:
        modified_text: Text after modification.
        replace_back_operations: A list of spans and the corresponding
            replacement string that the span in the modified string is to be
            replaced with to obtain the original string.
        processed_original_spans: List of processed span and its corresponding
            original span.
        orig_text_len: length of original text.
    """
    orig_text_len: int = len(original_text)
    mod_text: str = original_text
    increment: int = 0
    prev_span_end: int = 0
    replace_back_operations: List[Tuple[Span, str]] = []
    processed_original_spans: List[Tuple[Span, Span]] = []

    # Sorting the spans such that the order of replacement strings
    # is maintained -> utilizing the stable sort property of python sort
    replace_operations.sort(key=lambda item: item[0])

    for span, replacement in replace_operations:
        if span.begin < 0 or span.end < 0:
            raise ValueError(
                "Negative indexing not supported")
        if span.begin > len(original_text) or span.end > len(original_text):
            raise ValueError(
                "One of the span indices are outside the string length")
        if span.end < span.begin:
            print(span.begin, span.end)
            raise ValueError(
                "One of the end indices is lesser than start index")
github asyml / forte / forte / data / types.py View on Github external
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Dict, List, Tuple, Type, Union

from forte.data.span import Span
from forte.data.ontology.core import Entry

__all__ = [
    "ReplaceOperationsType",
    "DataRequest",
]

ReplaceOperationsType = List[Tuple[Span, str]]

DataRequest = Dict[Type[Entry], Union[Dict, List]]
github asyml / forte / forte / processors / srl_predictor.py View on Github external
length = torch.tensor(length, dtype=torch.long, device=self.device)
        batch_size = len(text)
        batch = tx.data.Batch(batch_size, text=text, text_ids=text_ids,
                              length=length, srl=[[]] * batch_size)
        self.model = self.model.to(self.device)
        batch_srl_spans = self.model.decode(batch)

        # Convert predictions into annotations.
        batch_predictions: List[Prediction] = []
        for idx, srl_spans in enumerate(batch_srl_spans):
            word_spans = data_batch["Token"]["span"][idx]
            predictions: Prediction = []
            for pred_idx, pred_args in srl_spans.items():
                begin, end = word_spans[pred_idx]
                # TODO cannot create annotation here.
                pred_span = Span(begin, end)
                arguments = []
                for arg in pred_args:
                    begin = word_spans[arg.start][0]
                    end = word_spans[arg.end][1]
                    arg_annotation = Span(begin, end)
                    arguments.append((arg_annotation, arg.label))
                predictions.append((pred_span, arguments))
            batch_predictions.append(predictions)
        return {"predictions": batch_predictions}
github asyml / forte / forte / data / data_utils_io.py View on Github external
if span.begin > len(original_text) or span.end > len(original_text):
            raise ValueError(
                "One of the span indices are outside the string length")
        if span.end < span.begin:
            print(span.begin, span.end)
            raise ValueError(
                "One of the end indices is lesser than start index")
        if span.begin < prev_span_end:
            raise ValueError(
                "The replacement spans should be mutually exclusive")
        span_begin = span.begin + increment
        span_end = span.end + increment
        original_span_text = mod_text[span_begin: span_end]
        mod_text = mod_text[:span_begin] + replacement + mod_text[span_end:]
        increment += len(replacement) - (span.end - span.begin)
        replacement_span = Span(span_begin, span_begin + len(replacement))
        replace_back_operations.append((replacement_span, original_span_text))
        processed_original_spans.append((replacement_span, span))
        prev_span_end = span.end

    return (mod_text, replace_back_operations, sorted(processed_original_spans),
            orig_text_len)
github asyml / forte / forte / data / data_pack.py View on Github external
def __init__(self, pack_manager: PackManager,
                 pack_name: Optional[str] = None):
        super().__init__(pack_manager, pack_name)
        self._text = ""

        self.annotations: SortedList[Annotation] = SortedList()
        self.links: SortedList[Link] = SortedList()
        self.groups: SortedList[Group] = SortedList()
        self.generics: SortedList[Generics] = SortedList()

        self.replace_back_operations: ReplaceOperationsType = []
        self.processed_original_spans: List[Tuple[Span, Span]] = []
        self.orig_text_len: int = 0

        self.index: DataIndex = DataIndex()