Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""
Convert DOAJ into an intermediate schema. Takes 10 minutes.
"""
def requires(self):
return DOAJInput()
def run(self):
output = shellout("span-import -i doaj {input} > {output}", input=self.input().path)
luigi.LocalTarget(output).move(self.output().path)
def output(self):
return luigi.LocalTarget(path=self.path(ext='ldj'))
if __name__ == '__main__':
luigi.run(['DOAJIntermediateSchema', '--workers', '1', '--local-scheduler'])
weight_multicut_edges=weight_mc_edges,
weight_merge_edges=weight_merge_edges,
n_threads=n_threads)
if ws_type == 'ws_dt':
write_dt_components_config(config_path,
boundary_threshold=boundary_threshold,
distance_threshold=distance_threshold)
seg_key = mc_string
skeleton_keys = ['skeletons/for_eval_20180523',
'skeletons/neurons_of_interest']
# skeleton_keys = []
path = '/nrs/saalfeld/lauritzen/0%i/workspace.n5' % block_id
wf = Workflow2DWS if ws_type == 'ws_2d' else Workflow
luigi.run(['--local-scheduler',
'--path', path,
'--aff-key', 'raw/predictions/affs_glia',
'--mask-key', 'raw/masks/minfilter_mask',
'--ws-key', 'raw/segmentation/' + ws_type,
'--seg-key', 'raw/segmentation/' + seg_key,
'--max-jobs', str(n_jobs),
'--config-path', config_path,
'--tmp-folder-ws', cache_folder_ws,
'--tmp-folder-seg', cache_folder_mc,
'--skeleton-keys', json.dumps(skeleton_keys),
'--time-estimate', '10'], wf)
def out_replatot(self):
return sl.TargetInfo(self, self.in_data().path + '.atot')
# ------------------------------------------------
def run(self):
cmd = 'cat ' + self.in_data().path + ' | sed "s/A/T/g" > ' + self.out_replatot().path
log.info("COMMAND TO EXECUTE: " + cmd)
call(cmd, shell=True)
# Run this file as script
# ------------------------------------------------------------------------
if __name__ == '__main__':
luigi.run(local_scheduler=True, main_task_cls=MyWorkflow)
"""
Once we have a better workflow in place, we can replace the inputs,
e.g. with a deduplicated version (TaggedAndDeduplicatedIntermediateSchema).
"""
return TaggedIntermediateSchema()
def run(self):
output = shellout("gunzip -c {input} | span-export -o {format} | gzip -c > {output} ",
format=self.format, input=self.input().path)
luigi.LocalTarget(output).move(self.output().path)
def output(self):
return luigi.LocalTarget(path='outputs/export-%s.ldj.gz' % self.format)
if __name__ == '__main__':
luigi.run(local_scheduler=True)
port = kwargs['scheduler_port']
else:
port = 8082
#test whether luigid is running, fall back to local scheduler otherwise
if 'local_scheduler' not in kwargs:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
sock.connect((host,port))
log.info("Using scheduler at " + host + ":" + str(port))
except:
kwargs['local_scheduler'] = True
log.info("Using local scheduler")
if not args:
success = luigi.run(**kwargs)
else:
success = luigi.build(args,**kwargs)
if not success:
log.error("LuigiNLP: There were errors in scheduling the workflow, inspect the log at %s for more details", logfile)
else:
log.info("LuigiNLP: Workflow run completed succesfully (logged to %s)", logfile)
return success
def requires(self):
return HarvestLive(endpoint=self.endpoint, date=self.date)
def run(self):
mapdir = 'file:///%s' % self.assets("maps/")
output = shellout("""flux.sh {flux} in={input} MAP_DIR={mapdir} > {output}""",
flux=self.assets("harvest/flux.flux"), mapdir=mapdir, input=self.input().path)
luigi.LocalTarget(output).move(self.output().path)
def output(self):
return luigi.LocalTarget(path=self.path(ext='ldj'))
if __name__ == '__main__':
luigi.run(['HarvestIntermediateSchema', '--workers', '1', '--local-scheduler'])
def requires(self):
return [
CrossrefIntermediateSchema(),
DOAJIntermediateSchema(),
ArxivIntermediateSchema(),
]
def run(self):
merged = merge(self.input())
luigi.LocalTarget(merged).move(self.output().path)
def output(self):
return luigi.LocalTarget(path='outputs/combined.is.ldj.gz', format=Gzip)
if __name__ == '__main__':
luigi.run(local_scheduler=True)
return 1
class GetVttFiles(luigi.Task):
def requires(self):
return []
def output(self):
return luigi.LocalTarget('test.txt')
def run(self):
return 1
if __name__ == '__main__':
luigi.run(["--local-scheduler"], main_task_cls=IdentifyCursor)
only converting one JSON format into another.
"""
def requires(self):
return CrossrefItems()
def run(self):
"""
TODO: convert input to intermediate schema via span-import.
"""
luigi.File(output).move(self.output().path)
def output(self):
return luigi.LocalTarget(path=self.path(ext='ldj.gz'))
if __name__ == '__main__':
luigi.run(['CrossrefIntermediateSchema', '--workers', '1', '--local-scheduler'])
# 3. Open input a count and sum the occurences of each song for the
# given country. (Hint: collections.Counter can be helpful).
# 4. Write an output TSV with two columns [song name, plays] ordered by
# plays descending.
def output(self):
pass
# 5. Use different outputs for different tlds.
if __name__ == '__main__':
luigi.run(local_scheduler=True)