Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def text_to_instance(self,
sentence: str,
tags: List[str] = None,
deps: List[int] = None,
weight: float = 1.0) -> Instance: # type: ignore
# pylint: disable=arguments-differ
tokens = [Token(utils.normalize(token)) for token in sentence.split(' ')]
token_field = TextField(tokens, self._token_indexers)
metadata = MetadataField({'words': sentence})
weight = ArrayField(numpy.array([weight], 'f'))
fields = {
'words': token_field,
'metadata': metadata,
'weight': weight,
}
if tags is not None and deps is not None:
fields['head_tags'] = SequenceLabelField(
tags, token_field, label_namespace='head_tags')
fields['head_indices'] = SequenceLabelField(
deps, token_field, label_namespace='head_indices')
return Instance(fields)
try:
lines = open(file)
except IOError as e:
die(f'could not open gold_deps file ({e.strerror})')
deps, udeps = set(), set()
for line in lines:
line = line.strip()
if line.startswith('<s>'):
yield deps, udeps
deps, udeps = set(), set()
continue
arg_index, pred_index, cat, slot, arg, pred = line.split()[:6]
pred = f'{utils.normalize(pred)}_{int(pred_index) + 1}'
arg = f'{utils.normalize(arg)}_{int(arg_index) + 1}'
deps.add((pred, cat, slot, arg))
udeps.add((pred, arg))
assert len(deps) == 0 and len(udeps) == 0
</s>
lines = open(file)
except IOError as e:
die(f'could not open gold_deps file ({e.strerror})')
deps, udeps = set(), set()
for line in lines:
line = line.strip()
if line.startswith('<s>'):
yield deps, udeps
deps, udeps = set(), set()
continue
arg_index, pred_index, cat, slot, arg, pred = line.split()[:6]
pred = f'{utils.normalize(pred)}_{int(pred_index) + 1}'
arg = f'{utils.normalize(arg)}_{int(arg_index) + 1}'
deps.add((pred, cat, slot, arg))
udeps.add((pred, arg))
assert len(deps) == 0 and len(udeps) == 0
</s>
def _create_samples(self, trees):
for tree in trees:
tokens = tree.leaves
words = [utils.normalize(token.word) for token in tokens]
cats = [str(token.cat) for token in tokens]
deps = self._get_dependencies(tree, len(tokens))
sent = ' '.join(words)
self.sents.append(sent)
self.samples.append((sent, [cats, deps]))
def read_dataset_auto_or_json(file_path: str):
if utils.is_json(file_path):
logger.info(f'Reading instances from lines in json file at: {file_path}')
with open(file_path, 'r') as data_file:
json_data = json.load(data_file)
else:
logger.info(f'Reading trees in auto file at: {file_path}')
json_data = convert_auto_to_json(file_path)
logger.info(f'loaded {len(json_data)} instances')
return json_data