Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def run(self):
filenames = ' '.join([obj.fn for obj in self.input()])
t = """cat {files} | LANG=C awk '{{print $1}}' | LANG=C sort -u > {output}"""
temp = shellout(t, files=filenames)
luigi.File(temp).move(self.output().fn)
def run(self):
with self.input().open() as handle:
for row in handle.iter_tsv(cols=('path',)):
if row.path.endswith('short_abstracts_%s.%s' % (self.language, self.format)):
luigi.File(row.path).copy(self.output().path)
break
else:
raise RuntimeError('no file found')
def run(self):
url = "http://resources.mpi-inf.mpg.de/yago-naga/yago/download/yago/yago3_entire_tsv.7z"
output = shellout("""wget --retry-connrefused -O {output} {url}""", url=url)
luigi.File(output).move(self.output().path)
def requires(self):
output = shellout("oaimi -verbose http://pqdtoai.proquest.com/OAIHandler > {output}")
luigi.File(output).move(self.output().path)
def run(self):
"""
TODO: For each file, we want to run a jq command.
"""
_, temp = tempfile.mkstemp(prefix='byoi-')
with self.input().open() as handle:
# TODO: insert code here
pass
luigi.File(temp).move(self.output().path)
def run(self):
# find similar titles
output = shellout("""esmlt -host {host} -port {port} -indices "{target}" -fields "content.245.a content.245.b"
-file "{file}" -columns "4,5" > {output} """, host=self.es_host, port=self.es_port,
file=self.input().get('file').path, target=self.target)
_, stopover = tempfile.mkstemp(prefix='siskin-')
with luigi.File(output, format=TSV).open() as handle:
with luigi.File(stopover, format=TSV).open('w') as output:
for row in handle.iter_tsv(cols=('idl', 'il', 'tl', 'til', 'sl', 'idr', 'ir', 'tr', 'score', 'tir', 'sr')):
ml = ' '.join([v for v in (row.til, row.sl) if v and not v == "NOT_AVAILABLE"])
mr = ' '.join([v for v in (row.tir, row.sr) if v and not v == "NOT_AVAILABLE"])
if not ml: ml = "NOT_AVAILABLE"
if not mr: mr = "NOT_AVAILABLE"
output.write_tsv(row.idl, row.il, row.tl, ml,
row.idr, row.ir, row.tr, mr, row.score)
luigi.File(stopover).move(self.output().path)
def run(self):
ids = set()
with self.input().get('gnd').open() as handle:
for row in handle.iter_tsv(cols=('uri',)):
ids.add(row.uri.rstrip('/'))
_, stopover = tempfile.mkstemp(prefix='siskin-')
with self.input().get('geo').open() as handle:
with luigi.File(stopover).open('w') as output:
while True:
try:
line = handle.next().strip()
if line.startswith('http://'):
content = handle.next()
if line.rstrip('/') in ids:
output.write(content)
except StopIteration:
break
_, t = tempfile.mkstemp(prefix='siskin-')
output = shellout("""while read r; do echo $r > {t} &&
rapper -q -i rdfxml -o ntriples {t} >> {output}; done < {input} """,
t=t, input=stopover)
luigi.File(output).move(self.output().path)
def run(self):
output = shellout("cut -f 2-3 < {input} | LANG=C sort -u > {output}",
input=self.input().path)
luigi.File(output).move(self.output().path)
def run(self):
output = shellout("span-export <(unpigz -c {input}) | pigz -c > {output}", input=self.input().path)
luigi.File(output).move(self.output().path)
def run(self):
_, combined = tempfile.mkstemp(prefix='tasktree-')
for target in self.input():
shellout("cat {input} >> {output}", input=target.path,
output=combined)
output = shellout("LANG=C sort -k1,1 -k3,3 {input} > {output}", input=combined)
luigi.File(output).move(self.output().fn)