Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
x_df = pd.merge(x_df, y_df, on=bids_comps_x, how="left")
if merged_name is not None:
x_df.to_csv(merged_name, index=False)
# Drop samples with invalid rating
nan_labels = x_df[x_df[rate_label].isnull()].index.ravel().tolist()
if nan_labels:
config.loggers.interface.info(
f"Dropping {len(nan_labels)} samples for having non-numerical labels,"
)
x_df = x_df.drop(nan_labels)
# Print out some info
nsamples = len(x_df)
config.loggers.interface.info(
f'Created dataset X="{feat_file}", Y="{label_file}" (N={nsamples} valid samples)'
)
# Inform about ratings distribution
labels = sorted(list(set(x_df[rate_label].values.ravel().tolist())))
ldist = []
for l in labels:
ldist.append(int(np.sum(x_df[rate_label] == l)))
config.loggers.interface.info(
"Ratings distribution: %s (%s, %s)",
"/".join(["%d" % x for x in ldist]),
"/".join(["%.2f%%" % (100 * x / nsamples) for x in ldist]),
"accept/exclude" if len(ldist) == 2 else "exclude/doubtful/accept",
)
def zscore_dataset(dataframe, excl_columns=None, by="site", njobs=-1):
""" Returns a dataset zscored by the column given as argument """
from multiprocessing import Pool, cpu_count
config.loggers.interface.info("z-scoring dataset ...")
if njobs <= 0:
njobs = cpu_count()
sites = list(set(dataframe[[by]].values.ravel().tolist()))
columns = list(dataframe.select_dtypes([np.number]).columns.ravel())
if excl_columns is None:
excl_columns = []
for col in columns:
if not np.isfinite(np.sum(dataframe[[col]].values.ravel())):
excl_columns.append(col)
if excl_columns:
for col in excl_columns:
port=port,
email=email,
)
try:
self._results["api_id"] = response.json()["_id"]
except (AttributeError, KeyError, ValueError):
# response did not give us an ID
errmsg = (
"QC metrics upload failed to create an ID for the record "
"uplOADED. rEsponse from server follows: {}".format(response.text)
)
config.loggers.interface.warning(errmsg)
if response.status_code == 201:
config.loggers.interface.info("QC metrics successfully uploaded.")
return runtime
errmsg = "QC metrics failed to upload. Status %d: %s" % (
response.status_code,
response.text,
)
config.loggers.interface.warning(errmsg)
if self.inputs.strict:
raise RuntimeError(response.text)
return runtime