How to use the depccg.utils function in depccg

To help you get started, we’ve selected a few depccg examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github masashi-y / depccg / depccg / models / my_allennlp / dataset / supertagging_dataset.py View on Github external
def text_to_instance(self,
                         sentence: str,
                         tags: List[str] = None,
                         deps: List[int] = None,
                         weight: float = 1.0) -> Instance:  # type: ignore
        # pylint: disable=arguments-differ
        tokens = [Token(utils.normalize(token)) for token in sentence.split(' ')]
        token_field = TextField(tokens, self._token_indexers)
        metadata = MetadataField({'words': sentence})
        weight = ArrayField(numpy.array([weight], 'f'))
        fields = {
            'words': token_field,
            'metadata': metadata,
            'weight': weight,
        }
        if tags is not None and deps is not None:
            fields['head_tags'] = SequenceLabelField(
                tags, token_field, label_namespace='head_tags')
            fields['head_indices'] = SequenceLabelField(
                deps, token_field, label_namespace='head_indices')
        return Instance(fields)
github masashi-y / depccg / depccg / tools / evaluate.py View on Github external
try:
        lines = open(file)
    except IOError as e:
        die(f'could not open gold_deps file ({e.strerror})')

    deps, udeps = set(), set()
    for line in lines:
        line = line.strip()
        if line.startswith('<s>'):
            yield deps, udeps
            deps, udeps = set(), set()
            continue
        arg_index, pred_index, cat, slot, arg, pred = line.split()[:6]
        pred = f'{utils.normalize(pred)}_{int(pred_index) + 1}'
        arg = f'{utils.normalize(arg)}_{int(arg_index) + 1}'
        deps.add((pred, cat, slot, arg))
        udeps.add((pred, arg))
    assert len(deps) == 0 and len(udeps) == 0
</s>
github masashi-y / depccg / depccg / tools / evaluate.py View on Github external
lines = open(file)
    except IOError as e:
        die(f'could not open gold_deps file ({e.strerror})')

    deps, udeps = set(), set()
    for line in lines:
        line = line.strip()
        if line.startswith('<s>'):
            yield deps, udeps
            deps, udeps = set(), set()
            continue
        arg_index, pred_index, cat, slot, arg, pred = line.split()[:6]
        pred = f'{utils.normalize(pred)}_{int(pred_index) + 1}'
        arg = f'{utils.normalize(arg)}_{int(arg_index) + 1}'
        deps.add((pred, cat, slot, arg))
        udeps.add((pred, arg))
    assert len(deps) == 0 and len(udeps) == 0
</s>
github masashi-y / depccg / depccg / tools / data.py View on Github external
def _create_samples(self, trees):
        for tree in trees:
            tokens = tree.leaves
            words = [utils.normalize(token.word) for token in tokens]
            cats = [str(token.cat) for token in tokens]
            deps = self._get_dependencies(tree, len(tokens))
            sent = ' '.join(words)
            self.sents.append(sent)
            self.samples.append((sent, [cats, deps]))
github masashi-y / depccg / depccg / models / my_allennlp / dataset / supertagging_dataset.py View on Github external
def read_dataset_auto_or_json(file_path: str):
    if utils.is_json(file_path):
        logger.info(f'Reading instances from lines in json file at: {file_path}')
        with open(file_path, 'r') as data_file:
            json_data = json.load(data_file)
    else:
        logger.info(f'Reading trees in auto file at: {file_path}')
        json_data = convert_auto_to_json(file_path)
    logger.info(f'loaded {len(json_data)} instances')
    return json_data