Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
locale: args.locale,
thingpedia: './thingpedia.tt',
entities: './entities.json',
dataset: './dataset.tt',
flags: args.flags,
template: 'contextual.genie',
random_seed: 'almond is awesome',
maxDepth: args.maxdepth,
targetPruningSize: args.target_pruning_size,
debug: false, // no debugging, ever, because debugging also goes to stdout
};
inputFile
.pipe(Genie.parallelize(PARALLEL_GENERATION,
require.resolve('./workers/generate-contextual-worker.js'), options))
.pipe(new Genie.DatasetStringifier())
.pipe(process.stdout);
await StreamUtils.waitFinish(process.stdout);
process.disconnect();
}
templateFile: 'index.genie',
rng: rng,
locale: args.locale,
flags: args.flags || {},
maxDepth: args.maxdepth,
targetPruningSize: args.target_pruning_size,
debug: false, // no debugging, ever, because debugging also goes to stdout
};
const generator = new Genie.BasicSentenceGenerator(options);
generator.on('progress', (value) => {
process.send({ cmd:'progress', v: value });
});
const stringifier = new Genie.DatasetStringifier();
generator.pipe(stringifier).pipe(process.stdout);
await StreamUtils.waitFinish(process.stdout);
process.disconnect();
}
const contextualParaphrase = this._downloadParaphrase(true)
.pipe(new TypecheckStream(this._schemas));
const basicSource = StreamUtils.chain([basicParaphrase, basicSynthetic], { objectMode: true });
// Spool the basic (non-contextual, not augmented) dataset to disk
// We need to do this because:
// 1) We don't want to run to many generation/processing steps as a pipeline, because that
// would use too much memory
// 2) We need to do multiple passes over the basic dataset for different reasons, and
// we can't cache it in memory
const { path: basicDataset, fd: basicDatasetFD } =
await tmp.file({ mode: 0o600, dir: '/var/tmp' });
await StreamUtils.waitFinish(basicSource
.pipe(new Genie.DatasetStringifier())
.pipe(fs.createWriteStream(basicDataset, { fd: basicDatasetFD })));
// basicDatasetFD is closed here
let contexts = await
fs.createReadStream(basicDataset, { encoding: 'utf8' })
.pipe(byline())
.pipe(new Genie.DatasetParser({ contextual: false }))
.pipe(new Genie.ContextExtractor(this._schemas))
.read();
const contextualized =
fs.createReadStream(basicDataset, { encoding: 'utf8' })
.pipe(byline())
.pipe(new Genie.DatasetParser({ contextual: false }))
.pipe(new Genie.Contextualizer(contexts, {
locale: this._language,
query = dbClient.query(`select id,flags,preprocessed,target_code from example_utterances
where language = ? and not find_in_set('obsolete',flags)
and target_code<>'' and preprocessed<>'' and type in (?)
order by id asc`,
[language, types]);
} else {
query = dbClient.query(`select id,flags,preprocessed,target_code from example_utterances
where language = ? and not find_in_set('obsolete',flags)
and target_code<>'' and preprocessed<>''
order by id asc`,
[language]);
}
if (argv.test)
argv.eval_prob *= 2;
const writer = new Genie.DatasetStringifier();
writer.pipe(argv.output);
query.on('result', (row) => {
row.flags = parseFlags(row.flags);
row.flags.replaced = false;
writer.write(row);
});
query.on('end', () => {
writer.end();
dbDone();
});
await StreamUtils.waitFinish(argv.output);
await db.tearDown();
}
untypedStringProbability: 0,
maxSpanLength: MAX_SPAN_LENGTH,
ppdbProbabilitySynthetic: this._options.ppdbProbabilitySynthetic,
ppdbProbabilityParaphrase: this._options.ppdbProbabilityParaphrase,
syntheticExpandFactor: 1,
paraphrasingExpandFactor: 30,
noQuoteExpandFactor: 10,
ppdbFile: ppdb,
locale: this._language,
rng: this._rng,
debug: this._options.debug,
});
const train = new Genie.DatasetStringifier();
const eval_ = new Genie.DatasetStringifier();
const promises = [];
promises.push(StreamUtils.waitFinish(train.pipe(this._options.train)));
promises.push(StreamUtils.waitFinish(eval_.pipe(this._options.eval)));
const splitter = new Genie.DatasetSplitter({
rng: this._rng,
locale: this._language,
train,
eval: eval_,
evalProbability: this._options.evalProbability,
forDevices: this._forDevices,
splitStrategy: this._options.splitStrategy,
useEvalFlag: true
help: `Start from the nth line of the input tsv file.`
});
argparser.addArgument(['-l', '--locale'], {
required: false,
defaultValue: 'en-US',
help: `BGP 47 locale tag of the natural language being processed (defaults to en-US).`
});
argparser.addArgument('--thingpedia', {
required: false,
defaultValue: 'thingpedia.json',
help: `The path to the thingpedia.json file.`
});
const args = argparser.parseArgs();
const learned = new Genie.DatasetStringifier();
learned.pipe(fs.createWriteStream(args.learned, { flags: (args.offset > 0 ? 'a' : 'w') }));
const dropped = fs.createWriteStream(args.dropped, { flags: (args.offset > 0 ? 'a' : 'w') });
let lines = [];
args.input.setEncoding('utf8');
const input = args.input.pipe(csv.parse({ columns: true, relax: true, delimiter: '\t' }));
input.on('data', (line) => {
lines.push(line);
});
await waitEnd(input);
if (args.offset > 1)
lines = lines.slice(args.offset-1);
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
rl.setPrompt('$ ');