Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def new_pack(self, pack_name: Optional[str] = None) -> DataPack:
return DataPack(self._pack_manager, pack_name)
Returns: MultiPack containing a datapack for the current query
"""
multi_pack = MultiPack()
# use context to build the query
if self.resource.get("user_utterance"):
user_pack = self.resource.get("user_utterance")[-1]
multi_pack.update_pack({"user_utterance": user_pack})
if self.resource.get("bot_utterance"):
bot_pack = self.resource.get("bot_utterance")[-1]
multi_pack.update_pack({"bot_utterance": bot_pack})
pack = DataPack()
utterance = Utterance(pack, 0, len(data_source))
pack.add_or_get_entry(utterance)
pack.set_text(data_source, replace_func=self.text_replace_operation)
multi_pack.update_pack({self.config.pack_name: pack})
yield multi_pack
ref_name (str): The pack name used to reference this data pack from
the multi pack.
Returns: The newly created data pack.
"""
if ref_name in self._name_index:
raise ValueError(
f"The name {ref_name} has already been taken.")
if ref_name is not None and not isinstance(ref_name, str):
raise ValueError(
f"key of the pack should be str, but got "
f"" f"{type(ref_name)}"
)
pack: DataPack = DataPack(self._pack_manager)
self.add_pack_(pack, ref_name)
return pack
def new_pack(self, pack_name: Optional[str] = None) -> DataPack:
"""
Create a new pack based using the current pack manager.
Args:
pack_name (str, Optional): The name to be used for the pack. If not
set, the pack name will remained unset.
Returns:
"""
return DataPack(self._pack_manager, pack_name)
def main(dataset_dir: str):
config = yaml.safe_load(open("config.yml", "r"))
config = Config(config, default_hparams=None)
pl = Pipeline[DataPack]()
pl.set_reader(PlainTextReader())
pl.add(NLTKSentenceSegmenter())
pl.add(NLTKWordTokenizer())
pl.add(NLTKPOSTagger())
pl.add(CoNLLNERPredictor(), config=config.NER)
pl.add(SRLPredictor(), config=config.SRL)
pl.initialize()
for pack in pl.process_dataset(dataset_dir):
print(colored("Document", 'red'), pack.meta.pack_name)
for sentence in pack.get(Sentence):
sent_text = sentence.text
print(colored("Sentence:", 'red'), sent_text, "\n")
# first method to get entry in a sentence
tokens = [(token.text, token.pos) for token in
def pack_example(input_path, output_path):
"""
This example read data from input path and serialize to output path.
Args:
input_path:
output_path:
Returns:
"""
print("Pack serialization example.")
nlp = Pipeline[DataPack]()
nlp.set_reader(OntonotesReader())
nlp.add(NLTKSentenceSegmenter())
nlp.add(NLTKWordTokenizer())
nlp.add(NLTKPOSTagger())
# This is a simple writer that serialize the result to the current
# directory and will use the DocID field in the data pack as the file name.
nlp.add(
PackNameJsonPackWriter(),
{
'output_dir': output_path,
'indent': 2,
'overwrite': True,
}
)
def new_pack(self, pack_name: Optional[str] = None) -> DataPack:
return DataPack(self._pack_manager, pack_name)