Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Parameters:
field_name (str): Name of the field by which we filter.
field_value (str): Value of the field by which we filter.
dataframe (DataFrame): Data that we filter.
defect_attributes (dict): Defect attributes.
Returns:
Filtered DataFrame.
"""
filtered_df = pandas.DataFrame()
if field_name in session['config.ini']['DEFECT_ATTRIBUTES']['areas_of_testing']:
field_value = 1 if field_value == 'Yes' else 0
filtered_df = dataframe[dataframe[field_name+'_lab'].swifter.apply(bool) == field_value]
if field_name in get_fields('Categorical', defect_attributes):
filtered_df = dataframe[dataframe[field_name].swifter.apply(apply_categorical_filter, args=(field_value,))]
if field_name in get_fields('Boolean', defect_attributes):
field_value = 1 if field_value == 'Yes' else 0
filtered_df = dataframe[dataframe[field_name].swifter.apply(bool) == field_value]
if field_name in get_fields('String', defect_attributes):
filtered_df = dataframe[dataframe[field_name] == field_value]
if field_name in get_fields('Substring', defect_attributes):
filtered_df = dataframe[dataframe[field_name].
str.contains(field_value,
case=False, na=False, regex=False)]
if field_name in get_fields('Substring_array', defect_attributes):
filtered_df = dataframe
for pattern in field_value.split(','):
filtered_df = apply_substring_array_filter(
filtered_df, field_name, pattern.strip())
if field_name[:-1] in get_fields('Numeric', defect_attributes):
if field_name[-1] == '0':
extreme = np.max(
np.abs(
self.x_train[row.feature].tolist()
+ self.x_test[row.feature].tolist()
)
)
self.x_train.loc[:, row.feature].swifter.apply(np.log1p).hist(
ax=ax[i],
alpha=0.6,
label="Train",
density=True,
bins=np.arange(-extreme, extreme, 0.25),
)
self.x_test.loc[:, row.feature].swifter.apply(np.log1p).hist(
ax=ax[i],
alpha=0.6,
label="Train",
density=True,
bins=np.arange(-extreme, extreme, 0.25),
)
ax[i].set_title(f"Statistic = {row.statistic}, p = {row.p}")
ax[i].set_xlabel(f"Log({row.feature})")
ax[i].legend()
plt.tight_layout()
plt.show()
if self.report is not None:
self.report.report_technique(report_info, [])
def create(self):
"""Creates a :class:`corporacreator.Corpus` for each locale.
"""
_logger.info("Creating corpora...")
corpora_data = self._parse_tsv()
corpora_data[["sentence", "up_votes", "down_votes"]] = corpora_data[
["sentence", "up_votes", "down_votes"]
].swifter.apply(func=lambda arg: common_wrapper(*arg), axis=1)
if self.args.langs:
# check if all languages provided at command line are actually
# in the clips.tsv file, if not, throw error
if set(self.args.langs).issubset(set(corpora_data.locale.unique())):
locales = self.args.langs
else:
raise argparse.ArgumentTypeError("ERROR: You have requested languages which do not exist in clips.tsv")
else:
locales = corpora_data.locale.unique()
for locale in locales:
_logger.info("Selecting %s corpus data..." % locale)
corpus_data = corpora_data.loc[
lambda df: df.locale == locale,
def _convert_csv_data_to_raw_data(self):
self.raw[["wav_filename","wav_filesize","transcript"]] = self.csv_data[
["audio_url","transcript","audio_length"]
].swifter.apply(func=lambda arg: self._convert_csv_data_to_raw_data_impl(*arg), axis=1, raw=True)
self.raw.reset_index()
def p(df: pd.DataFrame) -> pd.DataFrame:
if is_vectorized:
return df.assign(**{col: transformation_function(df[col]) for col in columns_to_transform})
return df.assign(**{col: df[col].swifter.apply(transformation_function) for col in columns_to_transform})
def _pre_process_corpus_data(self):
self.corpus_data[["sentence", "up_votes", "down_votes"]] = self.corpus_data[
["client_id", "sentence", "up_votes", "down_votes"]
].swifter.apply(func=lambda arg: self._preprocessor_wrapper(*arg), axis=1)