Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _upsert(self, session, obj, ignore=False):
try:
session.add(obj)
session.commit()
except SQLAlchemyError as err:
session.rollback()
cls = obj.__class__.__name__
logger.warning(f"conflict adding {cls}, {err}")
if not ignore:
raise DBError(f"duplicate {cls} - {err}") from err
if resp:
txt = get_in(resp, 'status.status_text')
if txt:
logger.info(txt)
if watch:
runspec.logs(True, self._get_db())
resp = self._get_db_run(runspec)
except Exception as err:
logger.error('got remote run err, {}'.format(err))
result = self._post_run(task=runspec, err=err)
return self._wrap_result(result, runspec, err=err)
return self._wrap_result(resp, runspec)
elif self._is_remote and not self._is_api_server and not self.kfp:
logger.warning(
'warning!, Api url not set, ' 'trying to exec remote runtime locally'
)
execution = MLClientCtx.from_dict(runspec.to_dict(), db, autocommit=False)
# create task generator (for child runs) from spec
task_generator = None
if not self._is_nested:
task_generator = get_generator(spec, execution)
last_err = None
if task_generator:
# multiple runs (based on hyper params or params file)
generator = task_generator.generate(runspec)
results = self._run_many(generator, execution, runspec)
results_to_iter(results, runspec, execution)
def client(self):
from dask.distributed import Client, default_client
if self.spec.remote and not self.status.scheduler_address:
if not self._load_db_status():
self._start()
if self.status.scheduler_address:
addr, dash = self._remote_addresses()
logger.info('trying dask client at: {}'.format(addr))
try:
client = Client(addr)
except OSError as e:
logger.warning(
'remote scheduler at {} not ready, will try to restart {}'.format(
addr, e
)
)
# todo: figure out if test is needed
# if self._is_remote_api():
# raise Exception('no access to Kubernetes API')
status = self.get_status()
if status != 'running':
self._start()
addr, dash = self._remote_addresses()
client = Client(addr)
logger.info(
if spec.secret_sources:
self._secrets = SecretsStore.from_list(spec.secret_sources)
# update run metadata (uid, labels) and store in DB
meta = runspec.metadata
meta.uid = meta.uid or uuid.uuid4().hex
runspec.spec.output_path = runspec.spec.output_path or config.artifact_path
if runspec.spec.output_path:
runspec.spec.output_path = runspec.spec.output_path.replace(
'{{run.uid}}', meta.uid
)
runspec.spec.output_path = runspec.spec.output_path.replace(
'{{run.project}}', runspec.metadata.project
)
if is_local(runspec.spec.output_path):
logger.warning(
'artifact path is not defined or is local,'
' artifacts will not be visible in the UI'
)
if self.kind not in ['', 'local', 'handler', 'dask']:
raise ValueError(
'absolute artifact_path must be specified'
' when running remote tasks'
)
db = self._get_db()
if not self.is_deployed:
raise RunError(
"function image is not built/ready, use .build() method first"
)
if self.verbose:
def get_builder_status(self, func, offset=0, logs=True):
try:
params = {'name': func.metadata.name,
'project': func.metadata.project,
'tag': func.metadata.tag,
'logs': bool2str(logs),
'offset': str(offset)}
resp = self.api_call('GET', 'build/status', params=params)
except OSError as err:
logger.error('error getting build status: {}'.format(err))
raise OSError(
'error: cannot get build status, {}'.format(err))
if not resp.ok:
logger.warning('failed resp, {}'.format(resp.text))
raise RunDBError('bad function build response')
if resp.headers:
func.status.state = resp.headers.get('function_status', '')
func.status.build_pod = resp.headers.get('builder_pod', '')
func.spec.image = resp.headers.get('function_image', '')
return resp.content
def connect(self, secrets=None):
resp = self.api_call('GET', 'healthz', timeout=5)
try:
server_cfg = resp.json()
self.server_version = server_cfg['version']
if self.server_version != config.version:
logger.warning('warning!, server ({}) and client ({}) ver dont match'
.format(self.server_version, config.version))
if 'namespace' in server_cfg and server_cfg['namespace'] != config.namespace:
logger.warning('warning!, server ({}) and client ({}) namespace dont match'
.format(server_cfg['namespace'], config.namespace))
# get defaults from remote server
config.remote_host = config.remote_host or server_cfg.get('remote_host')
config.mpijob_crd_version = config.mpijob_crd_version or server_cfg.get('mpijob_crd_version')
config.ui_url = config.ui_url or server_cfg.get('ui_url')
config.artifact_path = config.artifact_path or server_cfg.get('artifact_path')
if 'docker_registry' in server_cfg and 'DEFAULT_DOCKER_REGISTRY' not in environ:
environ['DEFAULT_DOCKER_REGISTRY'] = server_cfg['docker_registry']
except Exception:
pass
return self
'state': state,
'iter': id,
}
if state == 'error':
failed += 1
err = get_in(task, ['status', 'error'], '')
logger.error('error in task {}:{} - {}'.format(
execution.uid, id, err))
elif state != 'completed':
running += 1
iter.append(struct)
if not iter:
execution.set_state('completed', commit=True)
logger.warning('warning!, zero iteration results')
return
if hasattr(pd, 'json_normalize'):
df = pd.json_normalize(iter).sort_values('iter')
else:
df = pd.io.json.json_normalize(iter).sort_values('iter')
header = df.columns.values.tolist()
summary = [header] + df.values.tolist()
if not runspec:
return summary
criteria = runspec.spec.selector
item, id = selector(results, criteria)
if runspec.spec.selector and not id:
logger.warning(f'no best result selected, check selector ({criteria}) or results')
if id:
pod = self.get_pod(pod_name, namespace)
if not pod:
return 'error'
status = pod.status.phase.lower()
if status in ['running', 'completed', 'succeeded']:
print('')
break
if status == 'failed':
return 'failed'
elapsed_time = (datetime.now() - start_time).seconds
if elapsed_time > timeout:
return 'timeout'
time.sleep(2)
stdout.write('.')
if status != 'pending':
logger.warning(f'pod state in loop is {status}')
except ApiException as e:
logger.error('failed waiting for pod: {}\n'.format(str(e)))
return 'error'
outputs = self.v1api.read_namespaced_pod_log(
name=pod_name, namespace=namespace, follow=True,
_preload_content=False)
for out in outputs:
print(out.decode('utf-8'), end='')
if writer:
writer.write(out)
for i in range(5):
pod_state = self.get_pod(pod_name, namespace).status.phase.lower()
if pod_state != 'running':
break
logger.warning('pod still running, waiting 2 sec')
'error',
'MpiJob {} finished with state {}'.format(
meta.name, status
),
)
else:
txt = 'MpiJob {} launcher pod {} state {}'.format(
meta.name, launcher, state
)
logger.info(txt)
runobj.status.status_text = txt
else:
txt = 'MpiJob status unknown or failed, check pods: {}'.format(
self.get_pods(meta.name, meta.namespace)
)
logger.warning(txt)
runobj.status.status_text = txt
if self.kfp:
execution.set_state('error', txt)
return None