Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_entities_with_bad_model(enron_dataset_part001):
with tempfile.TemporaryDirectory() as tmpdir:
assert 1 == subcommands.entities(
out=Path(tmpdir),
spacy_model_name="no_such_model",
jobs=2,
src=enron_dataset_part001,
progress=False,
)
assert not any(load_spacy_model(spacy_model_name="no_such_model"))
def test_file_report(enron_dataset_part012):
file = sorted(enron_dataset_part012.glob("*.pst"))[1]
with tempfile.TemporaryDirectory() as tmpdir:
out = Path(tmpdir) / "entities.sqlite3"
# Extract entities
assert 0 == subcommands.entities(
out=out,
spacy_model_name=SPACY_MODELS.en_core_web_sm,
jobs=2,
src=file,
progress=False,
)
# Connect to DB file
engine = create_engine(f"sqlite:////{out}")
session = sessionmaker(bind=engine)()
# There should be one FileReport instance for this run
file_report = session.query(FileReport).one() # pylint: disable=no-member
# Path
assert file_report.path == str(file)
def entities(out, spacy_model, include_message_contents, jobs, src, progress):
"""
Extract named entities from a PST or mbox file, or a directory of one or more PST and mbox files.
If SOURCE is a directory it will be walked recursively. Non-PST and non-mbox files will be skipped.
Upon success the result will be a new .sqlite3 database file. If an output path is provided
it will be either the output file's parent directory or the file itself.
If no output path is provided the file will be written in the current working directory.
"""
status = subcommands.entities(
out=out,
spacy_model_name=spacy_model,
jobs=jobs,
src=src,
include_message_contents=include_message_contents,
progress=progress,
)
sys.exit(status)
actions = [_list, install, upgrade]
# Show help if no action option is passed
if not any(actions):
with click.get_current_context() as ctx:
click.echo(ctx.get_help())
sys.exit(0)
# Error out if multiple actions are selected
if [bool(x) for x in actions].count(True) > 1:
raise click.UsageError(
"Only one of [list|install|upgrade] can be selected at once."
)
status = subcommands.model(
_list=_list, install=install, upgrade=upgrade, version=version
)
sys.exit(status)