Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
:param column_value: The name for the column keeping the value itself.
:type column_value: str
"""
dd, column_id, column_kind, column_value = \
_normalize_input_to_internal_representation(ts, column_id, column_sort, column_kind, column_value)
def create_bins(v):
n_bins = np.ceil(len(v) / interval_length)
return np.repeat(np.arange(n_bins), interval_length)[:len(v)]
dd[column_id] = dd[column_id].apply(str) + "_bin_" + \
dd.groupby([column_id, column_kind])[column_value].transform(create_bins).apply(str)
dd = extract_features(dd,
column_id=column_id,
column_value=column_value,
column_kind=column_kind,
default_fc_parameters=compression_functions)
dd.columns = [x.replace("__", "_") for x in dd.columns]
dd.columns = [x.replace("feature", "map") for x in dd.columns]
dd.reset_index(drop=False, inplace=True)
ids = dd[column_id].str.split("_bin_").apply(lambda s: s[0])
bin_number = dd["id"].str.split("_bin_").apply(lambda s: eval(s[1]))
dd[column_id] = ids
dd["bin"] = bin_number
return dd.sort_values(by=[column_id, "bin"])
local_dt = local.localize(naive, is_dst=None)
utc_dt = local_dt.astimezone(pytz.utc)
timestamp = utc_dt.strftime('%s')
utc_ts_line = '%s,%s,%s\n' % (metric, str(timestamp), value)
with open(tmp_csv, 'a') as fh:
fh.write(utc_ts_line)
df = pd.read_csv(tmp_csv, delimiter=',', header=None, names=['metric', 'timestamp', 'value'])
# if os.path.isfile(tmp_csv):
# os.remove(tmp_csv)
df.columns = ['metric', 'timestamp', 'value']
start_feature_extraction = timer()
try:
df_features = extract_features(df, column_id='metric', column_sort='timestamp', column_kind=None, column_value=None)
except:
print(traceback.print_exc())
print(colored('error: extracting features with tsfresh', 'red'))
sys.exit(1)
end_feature_extraction = timer()
print(colored('notice: extracting features with tsfresh took %.6f seconds', 'cyan') % (end_feature_extraction - start_feature_extraction))
# write to disk
fname_out = fname_in + '.features.csv'
df_features.to_csv(fname_out)
# Transpose
df_t = df_features.transpose()
t_fname_out = fname_in + '.features.transposed.csv'
df_t.to_csv(t_fname_out)
utc_ts_line = '%s,%s,%s\n' % (metric, str(timestamp), value)
with open(tmp_csv, 'a') as fh:
fh.write(utc_ts_line)
# TO HERE
start = timer()
df = pd.read_csv(tmp_csv, delimiter=',', header=None, names=['metric', 'timestamp', 'value'])
if os.path.isfile(tmp_csv):
os.remove(tmp_csv)
df.columns = ['metric', 'timestamp', 'value']
start_feature_extraction = timer()
try:
df_features = extract_features(df, column_id='metric', column_sort='timestamp', column_kind=None, column_value=None)
except:
print(traceback.print_exc())
print(colored('error: extracting features with tsfresh', 'red'))
sys.exit(1)
end_feature_extraction = timer()
print(colored('notice: extracting features with tsfresh took %.6f seconds', 'cyan') % (end_feature_extraction - start_feature_extraction))
# write to disk
fname_out = fname_in + '.features.csv'
df_features.to_csv(fname_out)
# Transpose
df_t = df_features.transpose()
t_fname_out = fname_in + '.features.transposed.csv'
df_t.to_csv(t_fname_out)
def transform(self, X, y=None):
X, y = unzip(X, y)
return extract_features(X, column_id="id", column_sort="time", column_value="value", impute_function=impute,
show_warnings=False), y
"length": None,
"absolute_sum_of_changes": None,
"abs_energy": None,
# "sample_entropy": None,
"number_peaks": [{"n": 2}],
"number_cwt_peaks": [{"n": 2}, {"n": 3}],
"autocorrelation": [{"lag": 2}, {"lag": 3}]
# "value_count": #"large_standard_deviation": [{"r": 0.05}, {"r": 0.1}]
}
# For convenience, three dictionaries are predefined and can be used right away
# ComprehensiveFCParameters, MinimalFCParameters, EfficientFCParameters
# MinimalFCParameters is set by default
else:
extraction_settings = MinimalFCParameters()
extracted_features = extract_features(dataframe_df,
column_id=ref_column,
column_sort=time_column,
default_fc_parameters=extraction_settings)
extracted_features[ref_column] = extracted_features.index
dataframe_json = extracted_features.to_json(orient='split').encode()
compressed_data = bz2.compress(dataframe_json)
dataframe_id = str(uuid.uuid4())
variables.put(dataframe_id, compressed_data)
print("dataframe id: ", dataframe_id)
print('dataframe size (original): ', sys.getsizeof(dataframe_json), " bytes")
print('dataframe size (compressed): ', sys.getsizeof(compressed_data), " bytes")
resultMetadata.put("task.name", __file__)