Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
allow_retry = False
graph_path = luigi.Parameter()
graph_key = luigi.Parameter()
node_label_path = luigi.Parameter()
node_label_key = luigi.Parameter()
output_path = luigi.Parameter()
output_key = luigi.Parameter()
prefix = luigi.Parameter()
nh_graph_depth = luigi.IntParameter()
node_ignore_label = luigi.IntParameter(default=0)
# different modes for adding lifted edges:
# "all": add lifted edges between all nodes that have a label
# "same": add lifted edges only between nodes with the same label
# "different": add lifted edges only between nodes with different labels
mode = luigi.Parameter(default='all')
dependency = luigi.TaskParameter(default=DummyTask())
modes = ('all', 'same', 'different')
def requires(self):
return self.dependency
def run_impl(self):
# get the global config and init configs
shebang = self.global_config_values()[0]
self.init(shebang)
assert self.mode in self.modes, "Invalid mode %s" % self.mode
# load the task config
config = self.get_task_config()
#
# Region Center Tasks
#
class RegionCentersBase(luigi.Task):
""" RegionCenters base class
"""
task_name = 'region_centers'
src_file = os.path.abspath(__file__)
allow_retry = False
input_path = luigi.Parameter()
input_key = luigi.Parameter()
morphology_path = luigi.Parameter()
morphology_key = luigi.Parameter()
output_path = luigi.Parameter()
output_key = luigi.Parameter()
ignore_label = luigi.Parameter(default=None)
resolution = luigi.ListParameter(default=[1, 1, 1])
#
dependency = luigi.TaskParameter()
def requires(self):
return self.dependency
def run_impl(self):
# get the global config and init configs
shebang = self.global_config_values()[0]
self.init(shebang)
from cluster_tools.cluster_tasks import SlurmTask, LocalTask, LSFTask
#
# Watershed Tasks
#
class WatershedBase(luigi.Task):
""" Watershed base class
"""
task_name = 'watershed'
src_file = os.path.abspath(__file__)
# input and output volumes
input_path = luigi.Parameter()
input_key = luigi.Parameter()
output_path = luigi.Parameter()
output_key = luigi.Parameter()
mask_path = luigi.Parameter(default='')
mask_key = luigi.Parameter(default='')
@staticmethod
def default_task_config():
# we use this to get also get the common default config
config = LocalTask.default_task_config()
config.update({'threshold': .5,
'apply_dt_2d': True, 'pixel_pitch': None,
'apply_ws_2d': True, 'sigma_seeds': 2., 'size_filter': 25,
'sigma_weights': 2., 'halo': [0, 0, 0],
'channel_begin': 0, 'channel_end': None,
'agglomerate_channels': 'mean', 'alpha': 0.8,
#! /usr/bin/python
import os
import argparse
import subprocess
import json
import luigi
import z5py
from cremi_tools.skeletons import build_skeleton_metrics
from production.util import DummyTask
class SkeletonEvaluationTask(luigi.Task):
path = luigi.Parameter()
seg_key = luigi.Parameter()
skeleton_keys = luigi.ListParameter()
n_threads = luigi.IntParameter()
tmp_folder = luigi.Parameter()
dependency = luigi.TaskParameter(default=DummyTask())
time_estimate = luigi.IntParameter(default=10)
run_local = luigi.BoolParameter(default=False)
def requires(self):
return self.dependency
# TODO enable ROIs
def run(self):
from .. import util
luigi.LocalTarget(stopover).move(self.output().path)
def output(self):
return luigi.LocalTarget(path=self.path(ext='mrc'))
class B3KatFilterSSG(B3KatTask):
"""
Slice out a binary MARC file from the complete dump based on SSG.
List of shortcuts for SSG can be found here:
* https://www.dfg.de/download/pdf/foerderung/programme/lis/fid_zwischenbilanz_umstrukturierung_foerderung_sondersammelgebiete.pdf
* https://web.archive.org/web/20190503122507/https://www.dfg.de/download/pdf/foerderung/programme/lis/fid_zwischenbilanz_umstrukturierung_foerderung_sondersammelgebiete.pdf
"""
ssg = luigi.Parameter(default='9,2', description='ssgn designation to be matched against 84.a')
date = ClosestDateParameter(default=datetime.date.today())
def requires(self):
return B3KatDownload(date=self.date)
def run(self):
"""
Taken from 012_filter.sh
unzip -p $f | tr -d '\t' | sed -e 's/\(\)/\t\1/g' | tr -d
'\n' | tr '\t' '\n' | grep '9,2ssgn' | grep 'digit' >$t
import luigi
from citeomatic import file_util, features, training, corpus
from citeomatic.features import Featurizer
from citeomatic.models import layers
from citeomatic.models.options import ModelOptions
from citeomatic.serialization import import_from
from luigi.util import inherits
logger = logging.getLogger('citeomatic.tasks')
import faulthandler
faulthandler.enable()
class SharedParameters(luigi.Task):
base_dir = luigi.Parameter(default=path.expanduser('~/citeomatic-data/'))
@property
def data_dir(self):
return self.base_dir + '/data'
@property
def model_dir(self):
return self.base_dir + '/model'
def log(self, msg, *args):
logger.info(msg, *args)
class DownloadCorpus(SharedParameters):
corpus_url = luigi.Parameter(
default=
def write_output(self, content):
with self.output().open('w') as f:
f.write(
json.dumps(
content,
indent=4,
default=str,
)
)
class GetSSMParamTask(PuppetTask):
parameter_name = luigi.Parameter()
name = luigi.Parameter()
region = luigi.Parameter(default=None)
def params_for_results_display(self):
return {
"parameter_name": self.parameter_name,
"name": self.name,
"region": self.region,
}
@property
def uid(self):
return f"{self.region}-{self.parameter_name}-{self.name}"
def output(self):
return luigi.LocalTarget(
f"output/{self.__class__.__name__}/"
f"{self.uid}.json"
index (str): Override the elasticsearch config with this index.
process_batch_size (int): Number of documents per batch.
intermediate_bucket (str): S3 bucket where batch chunks are stored.
sql_config_filename (str): SQL config path/filename in the batch task.
'''
routine_id = luigi.Parameter()
db_config_path = luigi.Parameter('mysqldb.config')
endpoint = luigi.Parameter()
dataset = luigi.Parameter()
entity_type = luigi.Parameter()
kwargs = luigi.DictParameter(default={})
index = luigi.Parameter(default=None)
process_batch_size = luigi.IntParameter(default=5000)
intermediate_bucket = luigi.Parameter('nesta-production'
'-intermediate')
sql_config_filename = luigi.Parameter('mysqldb.config')
@property
@functools.lru_cache()
def _done_ids(self):
return self.done_ids()
@abstractmethod
def done_ids(self):
'''All document ids which do not require processing. If
you want to avoid writing that function see
:obj:`LazyElasticsearchTask`.
Returns:
done_ids (set): A set of document ids, not to be processed.
'''
pass
# Lifted Multicut Tasks
#
class SolveLiftedGlobalBase(luigi.Task):
""" SolveLiftedGlobal base class
"""
task_name = 'solve_lifted_global'
src_file = os.path.abspath(__file__)
allow_retry = False
# input volumes and graph
problem_path = luigi.Parameter()
assignment_path = luigi.Parameter()
assignment_key = luigi.Parameter()
scale = luigi.IntParameter()
lifted_prefix = luigi.Parameter()
#
dependency = luigi.TaskParameter()
def requires(self):
return self.dependency
@staticmethod
def default_task_config():
# we use this to get also get the common default config
config = LocalTask.default_task_config()
config.update({'agglomerator': 'kernighan-lin',
'time_limit_solver': None})
return config
import luigi
import pandas as pd
from playbyplays import ProcessPlayByPlay, get_season_game_ids
class PbPToMatchups(luigi.Task):
game_id = luigi.Parameter()
output_file_format = 'matchups_{game_id}.csv'
def requires(self):
return ProcessPlayByPlay(game_id=self.game_id)
def run(self):
with self.input().open() as in_file:
game_pbp = pd.read_csv(in_file,
engine='c',
dtype={'game_id': str},
converters={'period_elapsed_time': pd.to_timedelta,
'overall_elapsed_time': pd.to_timedelta,
'period_remaining_time': pd.to_timedelta,
'overall_remaining_time': pd.to_timedelta})
# fill up the score columns for better tabulation