Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def setup(config: Config) -> Pipeline:
resource = Resources()
query_pipeline = Pipeline[MultiPack](resource=resource)
query_pipeline.set_reader(
reader=MultiPackTerminalReader(), config=config.reader)
query_pipeline.add(
component=MicrosoftBingTranslator(), config=config.translator)
query_pipeline.add(
component=BertBasedQueryCreator(), config=config.query_creator)
query_pipeline.add(
component=SearchProcessor(), config=config.searcher)
top_response_pack_name = config.indexer.response_pack_name + '_0'
query_pipeline.add(
component=NLTKSentenceSegmenter(),
selector=NameMatchSelector(select_name=top_response_pack_name))
query_pipeline.add(
component=NLTKWordTokenizer(),
selector=NameMatchSelector(select_name=top_response_pack_name))
query_pipeline.add(
component=NLTKPOSTagger(),
selector=NameMatchSelector(select_name=top_response_pack_name))
query_pipeline.add(
component=SRLPredictor(), config=config.SRL,
selector=NameMatchSelector(select_name=top_response_pack_name))
query_pipeline.add(
component=MicrosoftBingTranslator(), config=config.back_translator)
query_pipeline.initialize()
return query_pipeline
def pack_example(input_path, output_path):
"""
This example read data from input path and serialize to output path.
Args:
input_path:
output_path:
Returns:
"""
print("Pack serialization example.")
nlp = Pipeline[DataPack]()
nlp.set_reader(OntonotesReader())
nlp.add(NLTKSentenceSegmenter())
nlp.add(NLTKWordTokenizer())
nlp.add(NLTKPOSTagger())
# This is a simple writer that serialize the result to the current
# directory and will use the DocID field in the data pack as the file name.
nlp.add(
PackNameJsonPackWriter(),
{
'output_dir': output_path,
'indent': 2,
'overwrite': True,
}
)
nlp.run(input_path)
def main():
pl = Pipeline[DataPack]()
pl.set_reader(StringReader())
pl.add(NLTKSentenceSegmenter())
pl.add(NLTKWordTokenizer())
pl.add(NLTKPOSTagger())
pl.add(CoNLLNERPredictor(), config=config.NER)
pl.add(SRLPredictor(), config=config.SRL)
pl.initialize()
text = (
"So I was excited to see Journey to the Far Side of the Sun finally "
"get released on an affordable DVD (the previous print had been "
"fetching $100 on eBay - I'm sure those people wish they had their "
"money back - but more about that in a second).")
pack = pl.process_one(text)
def main(dataset_dir: str):
config = yaml.safe_load(open("config.yml", "r"))
config = Config(config, default_hparams=None)
pl = Pipeline[DataPack]()
pl.set_reader(PlainTextReader())
pl.add(NLTKSentenceSegmenter())
pl.add(NLTKWordTokenizer())
pl.add(NLTKPOSTagger())
pl.add(CoNLLNERPredictor(), config=config.NER)
pl.add(SRLPredictor(), config=config.SRL)
pl.initialize()
for pack in pl.process_dataset(dataset_dir):
print(colored("Document", 'red'), pack.meta.pack_name)
for sentence in pack.get(Sentence):
sent_text = sentence.text
print(colored("Sentence:", 'red'), sent_text, "\n")
# first method to get entry in a sentence
tokens = [(token.text, token.pos) for token in
pack.get(Token, sentence)]
entities = [(entity.text, entity.ner_type) for entity in