Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@plac.annotations(
opt=('some option', 'option'),
args='default arguments',
kw='keyword arguments')
def main(opt, *args, **kw):
if opt:
yield 'opt=%s' % opt
if args:
yield 'args=%s' % str(args)
if kw:
yield 'kw=%s' % kw
@plac.annotations(
# arg=(helptext, kind, abbrev, type, choices, metavar)
# [INSERT ARGS HERE]
quiet=("Do not print informational messages.", "flag", "q"),
verbose=("Print debug messages that are probably only useful if something is going wrong.", "flag", "v"),
session_type=("Which terminal multiplexer to use. Currently supported are 'screen' and 'tmux'. Use 'auto' to automatically select the right one, based on which one is currently running.", "option", "t", str, session_types_incl_auto.keys()),
program_path=("Path to multiplexer executable. Only required if not in $PATH", "option", "p", str, None, 'PATH'),
socket=("Socket name", "option", "S", str, None, "SOCKNAME"),
session=("Session number. Only meaningful for tmux.", "option", "s", int, None, 'NUMBER'),
unset_empty=("Unset variables instead of setting them to the empty string", "flag", "u"),
list=("Just list the session where variables would be sent. Any variables specified will be ignored.", "flag", "l"),
vars=("Variables to send to multiplexer. If no value is specified for a variable, its value will be taken from the current environment.", "positional", None, str, None, "VAR[=VALUE]"),
)
def main(unset_empty, list=False,
session_type="auto", session=None,
socket=None, program_path=None,
quiet=False, verbose=False,
@plac.annotations(
glove_dir=("Directory containing the GloVe build", "positional", None, str),
in_dir=("Directory with preprocessed .s2v files", "positional", None, str),
out_dir=("Path to output directory", "positional", None, str),
min_count=("Minimum count for inclusion in vocab", "option", "c", int),
memory=("Soft limit for memory consumption, in GB", "option", "m", float),
window_size=("Number of context words on either side", "option", "w", int),
verbose=("Set verbosity: 0, 1, or 2", "option", "v", int),
)
def main(
glove_dir, in_dir, out_dir, min_count=5, memory=4.0, window_size=15, verbose=2
):
"""
Step 3: Build vocabulary and frequency counts
Expects a directory of preprocessed .s2v input files and will use GloVe to
collect unigram counts and construct and shuffle cooccurrence data. See here
@plac.annotations(
width=("Width of the hidden layers", "option", "w", int),
vector_length=("Width of the word vectors", "option", "V", int),
depth=("Depth of the hidden layers", "option", "d", int),
min_batch_size=("Minimum minibatch size during training", "option", "b", int),
max_batch_size=("Maximum minibatch size during training", "option", "B", int),
learn_rate=("Learning rate", "option", "e", float),
momentum=("Momentum", "option", "m", float),
dropout=("Dropout rate", "option", "D", float),
dropout_decay=("Dropout decay", "option", "C", float),
nb_epoch=("Maximum passes over the training data", "option", "i", int),
L2=("L2 regularization penalty", "option", "L", float),
)
def main(
width=100,
depth=4,
vector_length=64,
@plac.annotations(
lang=("Model language", "positional", None, str),
output_dir=("Model output directory", "positional", None, Path),
freqs_loc=("Location of words frequencies file", "option", "f", Path),
jsonl_loc=("Location of JSONL-formatted attributes file", "option", "j", Path),
clusters_loc=("Optional location of brown clusters data", "option", "c", str),
vectors_loc=("Optional vectors file in Word2Vec format", "option", "v", str),
prune_vectors=("Optional number of vectors to prune to", "option", "V", int),
vectors_name=(
"Optional name for the word vectors, e.g. en_core_web_lg.vectors",
"option",
"vn",
str,
),
model_name=("Optional name for the model meta", "option", "mn", str),
)
def init_model(
@plac.annotations(
input_dir=("directory with model data", "positional", None, str),
output_dir=("output parent directory", "positional", None, str),
meta_path=("path to meta.json", "option", "m", str),
create_meta=("create meta.json, even if one exists in directory – if "
"existing meta is found, entries are shown as defaults in "
"the command line prompt", "flag", "c", bool),
force=("force overwriting of existing model directory in output directory",
"flag", "f", bool))
def package(input_dir, output_dir, meta_path=None, create_meta=False,
force=False):
"""
Generate Python package for model data, including meta and required
installation files. A new directory will be created in the specified
output directory, and model data will be copied over.
"""
input_path = util.ensure_path(input_dir)
@plac.annotations(
model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
new_model_name=("New model name for model meta.", "option", "nm", str),
output_dir=("Optional output directory", "option", "o", Path),
n_iter=("Number of training iterations", "option", "n", int),
)
def main(
model=None,
new_model_name="training",
output_dir='/home/omkarpathak27/Downloads/zipped/pyresparser/pyresparser',
n_iter=30
):
"""Set up the pipeline and entity recognizer, and train the new entity."""
random.seed(0)
if model is not None:
nlp = spacy.load(model) # load existing spaCy model
print("Loaded model '%s'" % model)
@plac.annotations(
out_path = ("path to results file", "positional", None, os.path.abspath),
domain_name = ("name of problem domain"),
budget = ("CPU seconds per instance", "positional", None, float),
tasks_root = ("path to task files", "positional", None, os.path.abspath),
tests_root = ("optional separate test set", "positional", None, os.path.abspath),
live = ("don't simulate the domain", "flag", "l"),
runs = ("number of runs", "option", "r", int),
workers = ("submit jobs?", "option", "w", int),
)
def main(out_path, domain_name, budget, tasks_root, tests_root = None, live = False, runs = 16, workers = 0):
"""Collect validation results."""
cargo.enable_default_logging()
cargo.get_logger("borg.portfolios", level = "DETAIL")
@plac.annotations(
model=("The base model to load or blank:lang", "positional", None, str),
train_path=("The training data (Prodigy JSONL)", "positional", None, str),
eval_path=("The evaluation data (Prodigy JSONL)", "positional", None, str),
n_iter=("Number of iterations", "option", "n", int),
output=("Optional output directory", "option", "o", str),
tok2vec=("Pretrained tok2vec weights to initialize model", "option", "t2v", str),
)
def train_model(
model, train_path, eval_path, n_iter=10, output=None, tok2vec=None,
):
"""
Train a model from Prodigy annotations and optionally save out the best
model to disk.
"""
spacy.util.fix_random_seed(0)
with msg.loading(f"Loading '{model}'..."):
@plac.annotations(
lang=plac.Annotation('Language', 'option', 'l', str),
input_file=plac.Annotation('Input file', 'option', 'i', str),
output_file=plac.Annotation('Output file', 'option', 'o', str),
tokenize=plac.Annotation('Tokenize', 'flag', 't', bool),
)
def main(lang, input_file, output_file, tokenize=False):
nlp = spacy.load(lang)
def repr_word(word, tokenize=False):
if tokenize:
text = word.text
else:
text = word.text_with_ws
if word.pos_ == 'DET':
text = text.lower()
elif word.pos_ != 'PROPN':