Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
max_ngram=plac.Annotation("Max N-gram length", 'option', 'm', int)
)
def main(input_file, alignment_file, output_file, max_ngram=10):
assert input_file and alignment_file and output_file, 'missing arguments'
with io.open(output_file, 'w', encoding='utf-8') as out, \
io.open(input_file, 'r', encoding='utf-8') as input_f, \
io.open(alignment_file, 'r', encoding='utf-8') as alignment_f:
for pair, alignment in izip(input_f, alignment_f):
source, target = pair.split(' ||| ')
for a, b in phrase_extraction(source, target, alignment):
a, b = whitespace_tokenizer(a), whitespace_tokenizer(b)
if 1 <= len(a) <= max_ngram and 1 <= len(b) <= max_ngram:
out.write('{0} ||| {1}\n'.format(' '.join(a), ' '.join(b)))
logging.info((output_file))
job_wd=Annotation("working dir for job", 'option'),
)
def runs(self, script, params=None, group=1, grid="mas", jobname="job", job_cwd=False, job_wd=None):
if not os.path.exists(params) and not os.path.isfile(script):
print("Parameter space file not found: {path}".format(path=params), file=sys.stderr)
sys.exit(1)
ps = ParamSpace(filename=params)
ps.write_grid_summary(jobname + '_params.csv')
grid_cfg = DEFAULT_CONFIGS[grid]
param_grid = ps.param_grid(include_id=True, id_param="id")
job_files = write_job_files(grid_cfg, script, jobname, param_grid, group, jobname, job_cwd, job_wd)
for job_file in job_files:
try:
assign_fpath=plac.Annotation('Series assignment file',
type=str),
out_foldpath=plac.Annotation('Output folder', type=str))
def main(partial_features_fpath, tag_categ_fpath, tseries_fpath,
num_days_to_use, assign_fpath, out_foldpath):
X, feature_ids, feature_names = \
create_input_table(partial_features_fpath, tseries_fpath,
tag_categ_fpath, num_pts = num_days_to_use)
#Sort X by upload date
up_date_col = feature_names['A_UPLOAD_DATE']
sort_by_date = X[:,up_date_col].argsort()
X = X[sort_by_date].copy()
y_clf = np.genfromtxt(assign_fpath)[sort_by_date]
y_regr = np.genfromtxt(tseries_fpath)[:,1:].sum(axis=1)[sort_by_date]
is_text_features=plac.Annotation('Indicates file type',
kind='flag', abbrev='t',
type=bool))
def main(features_fpath, classes_fpath, out_fpath,
trans_fpath, col_to_use=2, is_text_features=False):
initialize_matplotlib()
classes = np.loadtxt(classes_fpath)
if is_text_features:
to_plot, sum_classes, labels = \
load_text_file(features_fpath, col_to_use, classes)
ref=False
else:
to_plot, sum_classes, labels = \
load_svm_file(features_fpath, classes)
ref=True
reset_all=plac.Annotation('combines all reset options', 'flag'),
no_confirm=plac.Annotation('skip confirm dialogs', 'flag')
)
def cli(cfg_file_path, resume, reset_elasticsearch, reset_mysql, reset_json, reset_all, no_confirm):
"A generic news crawler and extractor."
if reset_all:
reset_elasticsearch = True
reset_json = True
reset_mysql = True
if cfg_file_path and not cfg_file_path.endswith(os.path.sep):
cfg_file_path += os.path.sep
NewsPleaseLauncher(cfg_file_path, resume, reset_elasticsearch, reset_json, reset_mysql, no_confirm)
names=plac.Annotation(
'Test case name(s) to use, comma-separated',
),
testfile=plac.Annotation(
'Test file(s) to use',
'option',
'f',
str,
metavar='GLOB'
),
record=plac.Annotation(
'Record a new test',
'flag',
'r'
),
playback_only=plac.Annotation(
'Don\'t write new screenshots',
'flag',
'p'
),
concurrency=plac.Annotation(
'Number of tests to run in parallel',
'option',
'c',
int,
metavar='NUMBER'
),
save_diff=plac.Annotation(
'Save information about failures as last.png and diff.png',
'flag',
'e'
),
classes_fpath=plac.Annotation('Video classes file', type=str),
tseries_fpath=plac.Annotation('Time Series file', type=str))
def main(features_fpath, classes_fpath, tseries_fpath):
X = np.genfromtxt(features_fpath)[:,1:].copy()
y = np.loadtxt(classes_fpath)
T = np.genfromtxt(tseries_fpath)[:,1:].copy()
bah = T.sum(axis=1) / X[:,-1]
print(np.mean(bah))
print(np.median(bah))
print(np.std(bah))
print(stats.scoreatpercentile(bah, 25))
num_clusters = len(set(y))
for k in xrange(num_clusters):
@plac.annotations(partial_features_fpath=plac.Annotation('Partial Features',
type=str),
tag_categ_fpath=plac.Annotation('Tags file', type=str),
tseries_fpath=plac.Annotation('Time series file', type=str),
num_days_to_use=plac.Annotation('Num Days Series', type=int),
assign_fpath=plac.Annotation('Series assignment file',
type=str),
out_foldpath=plac.Annotation('Output folder', type=str))
def main(partial_features_fpath, tag_categ_fpath, tseries_fpath,
num_days_to_use, assign_fpath, out_foldpath):
X, feature_ids, feature_names = \
create_input_table(partial_features_fpath, tseries_fpath,
tag_categ_fpath, num_pts = num_days_to_use)
#Sort X by upload date
up_date_col = feature_names['A_UPLOAD_DATE']
'option',
'f',
str,
metavar='GLOB'
),
record=plac.Annotation(
'Record a new test',
'flag',
'r'
),
playback_only=plac.Annotation(
'Don\'t write new screenshots',
'flag',
'p'
),
concurrency=plac.Annotation(
'Number of tests to run in parallel',
'option',
'c',
int,
metavar='NUMBER'
),
save_diff=plac.Annotation(
'Save information about failures as last.png and diff.png',
'flag',
'e'
),
version=plac.Annotation(
'Get the current version',
'flag',
'v'
)
out_foldpath=plac.Annotation('Output folder', type=str))
def main(features_fpath, tag_categ_fpath, tseries_fpath, num_days_to_use,
assign_fpath, out_foldpath):
X, feature_ids, _ = \
create_input_table(features_fpath, tseries_fpath, tag_categ_fpath,
num_days_to_use)
X = scale(X)
y_clf = np.genfromtxt(assign_fpath)
y_regr = scale(np.genfromtxt(tseries_fpath)[:,1:].sum(axis=1))
run_experiment(X, y_clf, y_regr, feature_ids, out_foldpath)