Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from forte.common.configuration import Config
from forte.data.data_pack import DataPack
from forte.pipeline import Pipeline
from forte.data.readers.conll03_reader import CoNLL03Reader
from forte.processors.ner_predictor import CoNLLNERPredictor
from ft.onto.base_ontology import Token, Sentence, EntityMention
config_data = yaml.safe_load(open("config_data.yml", "r"))
config_model = yaml.safe_load(open("config_model.yml", "r"))
config = Config({}, default_hparams=None)
config.add_hparam('config_data', config_data)
config.add_hparam('config_model', config_model)
pl = Pipeline[DataPack]()
pl.set_reader(CoNLL03Reader())
pl.add(CoNLLNERPredictor(), config=config)
pl.initialize()
for pack in pl.process_dataset(config.config_data.test_path):
for pred_sentence in pack.get_data(
context_type=Sentence,
request={
Token: {"fields": ["ner"]},
Sentence: [], # span by default
EntityMention: {}
}):
print("============================")
print(pred_sentence["context"])
print(pred_sentence["Token"]["ner"])
def main():
pl = Pipeline[DataPack]()
pl.set_reader(StringReader())
pl.add(NLTKSentenceSegmenter())
pl.add(NLTKWordTokenizer())
pl.add(NLTKPOSTagger())
pl.add(CoNLLNERPredictor(), config=config.NER)
pl.add(SRLPredictor(), config=config.SRL)
pl.initialize()
text = (
"So I was excited to see Journey to the Far Side of the Sun finally "
"get released on an affordable DVD (the previous print had been "
"fetching $100 on eBay - I'm sure those people wish they had their "
"money back - but more about that in a second).")
coref_pl.add(ExampleCorefCounter())
coref_pl.add(
MultiPackWriter(),
config={
'output_dir': output_path,
'indent': 2,
'overwrite': True,
}
)
coref_pl.run(input_path)
print("We can then load the saved results, and see if everything is OK. "
"We should see the same number of multi packs there. ")
reading_pl = Pipeline()
reading_pl.set_reader(MultiPackDiskReader(), {'data_path': output_path})
reading_pl.add(ExampleCorefCounter())
reading_pl.run()
from forte.data.readers import MSMarcoPassageReader
from forte.pipeline import Pipeline
from forte.processors.ir import ElasticSearchIndexProcessor
logging.basicConfig(level=logging.INFO)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--config_file", default="./config.yml",
help="Config YAML filepath")
args = parser.parse_args()
config = yaml.safe_load(open(args.config_file, "r"))
config = Config(config, default_hparams=None)
nlp: Pipeline[DataPack] = Pipeline()
nlp.set_reader(MSMarcoPassageReader())
nlp.add(ElasticSearchIndexProcessor(), config=config.create_index)
nlp.initialize()
data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
config.data.relative_path)
for idx, pack in enumerate(nlp.process_dataset(data_path)):
if idx + 1 > 0 and (idx + 1) % 10000 == 0:
print(f"Indexed {idx + 1} packs")
"""
This example reads data from input path, and write multi pack output
to output path.
Args:
input_path:
output_path:
Returns:
"""
print("Multi Pack serialization example.")
print("We first read the data, and add multi-packs to them, and then "
"save the results.")
coref_pl = Pipeline()
coref_pl.set_reader(DirPackReader())
coref_pl.add(MultiPackBoxer())
coref_pl.add(PackCopier())
coref_pl.add(ExampleCoreferencer())
coref_pl.add(ExampleCorefCounter())
coref_pl.add(
MultiPackWriter(),
config={
'output_dir': output_path,
'indent': 2,
'overwrite': True,
}
)
coref_pl.run(input_path)
def prepare(self):
prepare_pl = Pipeline()
prepare_pl.set_reader(self.train_reader)
for p in self.preprocessors:
prepare_pl.add(p)
prepare_pl.run(self.configs.config_data.train_path)