Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def apply_substring_array_filter(df, series, pattern):
""" Filters dataframe.
Parameters:
df(DataFrame): defect reports' file parsed to pandas DataFrame;
series(str): series name;
pattern(str): the value you're looking for.
Returns:
filtered_df(DataFrame): filtered dataframe.
"""
filtered_df = df[df[series].swifter.progress_bar(
enable=False, desc=None).apply(compare_words, args=(pattern,))]
return filtered_df[filtered_df[series].str.contains(
pattern, case=False, na=False, regex=False)]
def get_statistical_info(df):
""" Statistical info calculation.
Parameters:
df (DataFrame): defect reports' file parsed to pandas DataFrame.
Returns:
dict object filled in calculated statistics.
"""
comments = df['Comments'].swifter.progress_bar(
enable=False, desc=None).apply(int)
attachments = df['Attachments'].swifter.progress_bar(
enable=False, desc=None).apply(int)
return {'comments_stat': {
'min': str(comments.min()),
'max': str(comments.max()),
'mean': str(int(math_round(comments.mean()))),
'std': str(int(math_round(numpy.nan_to_num(comments.std()))))
},
'attachments_stat': {
'min': str(attachments.min()),
'max': str(attachments.max()),
'mean': str(int(math_round(attachments.mean()))),
'std': str(int(math_round(numpy.nan_to_num(attachments.std()))))
},
'ttr_stat': {
'min': str(df['ttr'].min()),
'max': str(df['ttr'].max()),
def transform_series(df, defect_attributes):
""" Transforms series to make theirs' data ready for analysis.
Parameters:
df (DataFrame): defect reports' file parsed to pandas DataFrame;
defect_attributes (dict): defect attributes configurations;.
Returns:
DataFrame with transformed series appended.
"""
df['Resolved'] = df['Resolved'].fillna(
value='').astype(str).swifter.progress_bar(
enable=False, desc=None).apply(
convert_date)
df['Created'] = df['Created'].fillna(
value='').astype(str).swifter.progress_bar(
enable=False, desc=None).apply(
convert_date)
for group in ['special_attributes', 'mandatory_attributes']:
for attribute in defect_attributes[group]:
df[attribute] = apply_datatype(
df[attribute], attribute, defect_attributes[group][attribute]['type'])
pool = Pool()
df['Description_tr'] = pool.map(clean_description, df['Description'])
pool.close()
pool.join()
df['Resolved_tr'] = df['Resolved'].fillna(
value=datetime.date.today())
df['ttr'] = (df['Resolved_tr'] - df['Created']).dt.days
defect_attributes['special_attributes']['ttr'] = {
series,
area_of_testing,
patterns):
""" Appends binarized series to df.
Parameters:
df (DataFrame): defect reports' file parsed to pandas DataFrame;
series (str): df series name;
area_of_testing (str):
patterns (str): searching elements.
Returns:
The whole df with binarized series.
"""
df[area_of_testing] = df[series].swifter.progress_bar(
enable=False, desc=None).apply(
binarize_value, args=(
patterns,))
return df