Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
package_wheel_path = os.path.join(WHEELHOUSE_PATH, package_name)
requirement = cmd_partial[package_name]
logger.info("Building: {}".format(package_name))
completed_process = run(
"pip3 wheel -w {} {}".format(package_wheel_path, requirement).split()
)
if completed_process.returncode != 0:
raise UserException("creating wheels", package_name)
for wheelname in os.listdir(package_wheel_path):
name_split = wheelname.split("-")
dist_name, version = name_split[0], name_split[1]
expected_version_specs = restricted_packages.get(dist_name, None)
if expected_version_specs is not None and not expected_version_specs.contains(version):
raise UserException(
"when installing {}, found {}=={} which conflicts with cortex's requirements {}{}".format(
package_name, dist_name, version, dist_name, expected_version_specs
)
)
logger.info("Validating packages")
for package_name in build_order:
requirement = cmd_partial[package_name]
logger.info("Installing: {}".format(package_name))
completed_process = run(
"pip3 install --no-index --find-links={} {}".format(
os.path.join(WHEELHOUSE_PATH, package_name), requirement
).split()
)
if completed_process.returncode != 0:
cmd_partial[package_name] = os.path.join(LOCAL_PACKAGE_PATH, package_name)
logger.info("Setting up packages")
restricted_packages = get_restricted_packages()
for package_name in build_order:
package_wheel_path = os.path.join(WHEELHOUSE_PATH, package_name)
requirement = cmd_partial[package_name]
logger.info("Building: {}".format(package_name))
completed_process = run(
"pip3 wheel -w {} {}".format(package_wheel_path, requirement).split()
)
if completed_process.returncode != 0:
raise UserException("creating wheels", package_name)
for wheelname in os.listdir(package_wheel_path):
name_split = wheelname.split("-")
dist_name, version = name_split[0], name_split[1]
expected_version_specs = restricted_packages.get(dist_name, None)
if expected_version_specs is not None and not expected_version_specs.contains(version):
raise UserException(
"when installing {}, found {}=={} which conflicts with cortex's requirements {}{}".format(
package_name, dist_name, version, dist_name, expected_version_specs
)
)
logger.info("Validating packages")
for package_name in build_order:
requirement = cmd_partial[package_name]
if "requirements.txt" in python_packages:
storage.download_file(python_packages["requirements.txt"]["src_key"], "/requirements.txt")
for package_name in build_order:
cmd = package_name
if package_name == "requirements.txt":
cmd = "-r /requirements.txt"
completed_process = run(
"pip3 install --no-cache-dir --no-index --find-links={} {}".format(
os.path.join(WHEELHOUSE_PATH, package_name), cmd
).split()
)
if completed_process.returncode != 0:
raise UserException("installing package", package_name)
util.rm_file("/requirements.txt")
util.rm_dir(WHEELHOUSE_PATH)
package_name, dist_name, version, dist_name, expected_version_specs
)
)
logger.info("Validating packages")
for package_name in build_order:
requirement = cmd_partial[package_name]
logger.info("Installing: {}".format(package_name))
completed_process = run(
"pip3 install --no-index --find-links={} {}".format(
os.path.join(WHEELHOUSE_PATH, package_name), requirement
).split()
)
if completed_process.returncode != 0:
raise UserException("installing package", package_name)
logger.info("Caching built packages")
for package_name in build_order:
storage.zip_and_upload(
os.path.join(WHEELHOUSE_PATH, package_name),
python_packages[package_name]["package_key"],
)
def _validate_required_fn_args(impl, fn_name, args):
fn = getattr(impl, fn_name, None)
if not fn:
raise UserException('required function "{}" is not defined'.format(fn_name))
if not callable(fn):
raise UserException('"{}" is defined, but is not a function'.format(fn_name))
argspec = inspect.getargspec(fn)
if argspec.args != args:
raise UserException(
'invalid signature for function "{}": expected arguments ({}) but found ({})'.format(
fn_name, ", ".join(args), ", ".join(argspec.args)
)
def validate_dataset(ctx, raw_df, cols_to_validate):
total_row_count = ctx.get_metadata(ctx.raw_dataset["key"])["dataset_size"]
conditions_dict = spark_util.value_check_data(ctx, raw_df, cols_to_validate)
if len(conditions_dict) > 0:
for column, cond_count_list in conditions_dict.items():
for condition, fail_count in cond_count_list:
logger.error(
"Data validation {} has been violated in {}/{} samples".format(
condition, fail_count, total_row_count
)
)
raise UserException("raw column validations failed")
def read_parquet(ctx, spark):
parquet_config = ctx.environment["data"]
df = spark.read.parquet(parquet_config["path"])
alias_map = {}
for parquet_col_config in parquet_config["schema"]:
col_name = util.get_resource_ref(parquet_col_config["raw_column"])
if col_name in ctx.raw_columns:
alias_map[col_name] = parquet_col_config["parquet_column_name"]
missing_cols = set(alias_map.keys()) - set(df.columns)
if len(missing_cols) > 0:
logger.error("found schema:")
log_df_schema(df, logger.error)
raise UserException("missing column(s) in input dataset", str(missing_cols))
selectExprs = [
"{} as {}".format(parq_name, col_name) for col_name, parq_name in alias_map.items()
]
return df.selectExpr(*selectExprs)
def load_module(self, module_prefix, module_name, impl_path):
full_module_name = "{}_{}".format(module_prefix, module_name)
if impl_path.endswith(".pickle"):
try:
impl = imp.new_module(full_module_name)
with open(impl_path, "rb") as pickle_file:
pickled_dict = dill.load(pickle_file)
for key in pickled_dict:
setattr(impl, key, pickled_dict[key])
except Exception as e:
raise UserException("unable to load pickle", str(e)) from e
else:
try:
impl = imp.load_source(full_module_name, impl_path)
except Exception as e:
raise UserException(str(e)) from e
return impl