Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_hierarchical():
raw = pd.read_csv(os.path.join(os.path.dirname(__file__),'..','examples','testdata.csv'), index_col = 0)
orig_raw = pd.read_csv(os.path.join(os.path.dirname(__file__),'..','examples','results','testperiods_hierarchical.csv'), index_col = [0,1])
starttime = time.time()
aggregation = tsam.TimeSeriesAggregation(raw, noTypicalPeriods = 8, hoursPerPeriod = 24,
clusterMethod = 'hierarchical',
extremePeriodMethod = 'new_cluster_center',
addPeakMin = ['T'], addPeakMax = ['Load'] )
typPeriods = aggregation.createTypicalPeriods()
print('Clustering took ' + str(time.time() - starttime))
# sort the typical days in order to avoid error assertion due to different order
sortedDaysOrig = orig_raw.sum(axis=0,level=0).sort_values('GHI').index
sortedDaysTest = typPeriods.sum(axis=0,level=0).sort_values('GHI').index
# rearange their order
orig = orig_raw[typPeriods.columns].unstack().loc[sortedDaysOrig,:].stack()
test = typPeriods.unstack().loc[sortedDaysTest,:].stack()
def test_cluster_order():
raw = pd.read_csv(os.path.join(os.path.dirname(__file__),'..','examples','testdata.csv'), index_col = 0)
raw_wind = raw.loc[:, 'Wind'].to_frame()
orig_raw_predefClusterOrder = pd.read_csv(os.path.join(os.path.dirname(__file__),'..','examples','results','testperiods_predefClusterOrder.csv'), index_col = [0,1])
orig_raw_predefClusterOrderAndClusterCenters = pd.read_csv(os.path.join(os.path.dirname(__file__), '..', 'examples', 'results', 'testperiods_predefClusterOrderAndClusterCenters.csv'),index_col=[0, 1])
starttime = time.time()
aggregation_wind = tsam.TimeSeriesAggregation(raw_wind, noTypicalPeriods = 8, hoursPerPeriod = 24,
clusterMethod = 'hierarchical')
typPeriods_wind = aggregation_wind.createTypicalPeriods()
aggregation_predefClusterOrder = tsam.TimeSeriesAggregation(raw, noTypicalPeriods=8, hoursPerPeriod=24,
clusterMethod='hierarchical',
predefClusterOrder=aggregation_wind.clusterOrder)
typPeriods_predefClusterOrder = aggregation_predefClusterOrder.createTypicalPeriods()
aggregation_predefClusterOrderAndClusterCenters = tsam.TimeSeriesAggregation(raw,
noTypicalPeriods=8, hoursPerPeriod=24,
clusterMethod='hierarchical',
predefClusterOrder=aggregation_wind.clusterOrder,
predefClusterCenterIndices=aggregation_wind.clusterCenterIndices)
raw = pd.read_csv(os.path.join(os.path.dirname(__file__),'..','examples','testdata.csv'), index_col = 0)
raw_wind = raw.loc[:, 'Wind'].to_frame()
orig_raw_predefClusterOrder = pd.read_csv(os.path.join(os.path.dirname(__file__),'..','examples','results','testperiods_predefClusterOrder.csv'), index_col = [0,1])
orig_raw_predefClusterOrderAndClusterCenters = pd.read_csv(os.path.join(os.path.dirname(__file__), '..', 'examples', 'results', 'testperiods_predefClusterOrderAndClusterCenters.csv'),index_col=[0, 1])
starttime = time.time()
aggregation_wind = tsam.TimeSeriesAggregation(raw_wind, noTypicalPeriods = 8, hoursPerPeriod = 24,
clusterMethod = 'hierarchical')
typPeriods_wind = aggregation_wind.createTypicalPeriods()
aggregation_predefClusterOrder = tsam.TimeSeriesAggregation(raw, noTypicalPeriods=8, hoursPerPeriod=24,
clusterMethod='hierarchical',
predefClusterOrder=aggregation_wind.clusterOrder)
typPeriods_predefClusterOrder = aggregation_predefClusterOrder.createTypicalPeriods()
aggregation_predefClusterOrderAndClusterCenters = tsam.TimeSeriesAggregation(raw,
noTypicalPeriods=8, hoursPerPeriod=24,
clusterMethod='hierarchical',
predefClusterOrder=aggregation_wind.clusterOrder,
predefClusterCenterIndices=aggregation_wind.clusterCenterIndices)
typPeriods_predefClusterOrderAndClusterCenters = aggregation_predefClusterOrderAndClusterCenters.createTypicalPeriods()
print('Clustering took ' + str(time.time() - starttime))
def test_preprocess():
raw = pd.read_csv(os.path.join(os.path.dirname(__file__),'..','examples','testdata.csv'), index_col = 0)
raw_wind = raw.loc[:, 'Wind'].to_frame()
aggregation_wind = tsam.TimeSeriesAggregation(raw_wind, noTypicalPeriods = 8, hoursPerPeriod = 24,
clusterMethod = 'hierarchical')
aggregation_wind._preProcessTimeSeries()
test = aggregation_wind.normalizedPeriodlyProfiles
orig = pd.read_csv(os.path.join(os.path.dirname(__file__),'..','examples','results','preprocessed_wind.csv'), index_col = [0], header = [0,1])
np.testing.assert_array_almost_equal(test.values, orig.values,decimal=15)
def test_hierarchical():
raw = pd.read_csv(os.path.join(os.path.dirname(__file__),'..','examples','testdata.csv'), index_col = 0)
orig_raw = pd.read_csv(os.path.join(os.path.dirname(__file__),'..','examples','results','testperiods_kmedoids.csv'), index_col = [0,1])
starttime = time.time()
aggregation = tsam.TimeSeriesAggregation(raw, noTypicalPeriods = 8, hoursPerPeriod = 24*7,
clusterMethod = 'k_medoids', )
typPeriods = aggregation.createTypicalPeriods()
print('Clustering took ' + str(time.time() - starttime))
# sort the typical days in order to avoid error assertion due to different order
sortedDaysOrig = orig_raw.sum(axis=0,level=0).sort_values('GHI').index
sortedDaysTest = typPeriods.sum(axis=0,level=0).sort_values('GHI').index
# rearange their order
orig = orig_raw[typPeriods.columns].unstack().loc[sortedDaysOrig,:].stack()
test = typPeriods.unstack().loc[sortedDaysTest,:].stack()
np.testing.assert_array_almost_equal(orig.values, test.values,decimal=4)
# (b) thereby collect the weights which should be considered for each time series as well in a dictionary
timeSeriesData, weightDict = [], {}
for mdlName, mdl in self.componentModelingDict.items():
for compName, comp in mdl.componentsDict.items():
compTimeSeriesData, compWeightDict = comp.getDataForTimeSeriesAggregation()
if compTimeSeriesData is not None:
timeSeriesData.append(compTimeSeriesData), weightDict.update(compWeightDict)
timeSeriesData = pd.concat(timeSeriesData, axis=1)
# Note: Sets index for the time series data. The index is of no further relevance in the energy system model.
timeSeriesData.index = pd.date_range('2050-01-01 00:30:00', periods=len(self.totalTimeSteps),
freq=(str(self.hoursPerTimeStep) + 'H'), tz='Europe/Berlin')
# Cluster data with tsam package (the reindex call is here for reproducibility of TimeSeriesAggregation
# call)
timeSeriesData = timeSeriesData.reindex(sorted(timeSeriesData.columns), axis=1)
clusterClass = TimeSeriesAggregation(timeSeries=timeSeriesData, noTypicalPeriods=numberOfTypicalPeriods,
hoursPerPeriod=hoursPerPeriod,
clusterMethod=clusterMethod, sortValues=sortValues, weightDict=weightDict,
**kwargs)
# Convert the clustered data to a pandas DataFrame and store the respective clustered time series data in the
# associated components
data = pd.DataFrame.from_dict(clusterClass.clusterPeriodDict)
for mdlName, mdl in self.componentModelingDict.items():
for compName, comp in mdl.componentsDict.items():
comp.setAggregatedTimeSeriesData(data)
# Store time series aggregation parameters in class instance
if storeTSAinstance:
self.tsaInstance = clusterClass
self.typicalPeriods = clusterClass.clusterPeriodIdx
self.timeStepsPerPeriod = list(range(numberOfTimeStepsPerPeriod))
import tsam.timeseriesaggregation as tsam
import pandas as pd
raw = pd.read_csv('testdata.csv', index_col=0)
aggregation = tsam.TimeSeriesAggregation(raw, noTypicalPeriods = 8,
hoursPerPeriod = 24,
clusterMethod = 'hierarchical')
df = aggregation.createTypicalPeriods()
weights = aggregation.clusterPeriodNoOccur
aggregation.clusterOrder
timesteps = [i for i in range(0, len(df.index.get_level_values('TimeStep')))]
print(aggregation.clusterCenterIndices)
# get all index for every hour that in day of the clusterCenterIndices
days = [d for d in raw.index.dayofyear
if d in aggregation.clusterCenterIndices]
# select the dates based on this
dates = raw.iloc[days].index
dfs = {
r.name: pd.DataFrame(r.read(keyed="True"))
.set_index("timeindex")
.astype(float)
for r in sequence_resources
}
sequences = pd.concat(dfs.values(), axis=1)
if how == "daily":
hoursPerPeriod = 24
elif how == "hourly":
hoursPerPeriod = 1
elif how == "weekly":
hoursPerPeriod = 24 * 7
aggregation = tsam.TimeSeriesAggregation(
sequences,
noTypicalPeriods=n,
rescaleClusterPeriods=False,
hoursPerPeriod=hoursPerPeriod,
clusterMethod="hierarchical",
)
cluster_weights = {
aggregation.clusterCenterIndices[n]: w
for n, w in aggregation.clusterPeriodNoOccur.items()
}
if how == "daily":
temporal = pd.Series(
{
d: cluster_weights[d.dayofyear]
for d in sequences.index
clusterCenters.append(currentMean)
if clusterMethod == 'k_means':
from sklearn.cluster import KMeans
k_means = KMeans(
n_clusters=n_clusters,
max_iter=1000,
n_init=n_iter,
tol=1e-4)
clusterOrder = k_means.fit_predict(candidates)
clusterCenters = k_means.cluster_centers_
elif clusterMethod == 'k_medoids':
from tsam.utils.k_medoids_exact import KMedoids
k_medoid = KMedoids(n_clusters=n_clusters, solver=solver)
clusterOrder = k_medoid.fit_predict(candidates)
clusterCenters = k_medoid.cluster_centers_
#
elif clusterMethod == 'hierarchical':
from sklearn.cluster import AgglomerativeClustering
clustering = AgglomerativeClustering(
n_clusters=n_clusters, linkage='ward')
clusterOrder = clustering.fit_predict(candidates)
from sklearn.metrics.pairwise import euclidean_distances
# set cluster center as medoid
clusterCenters = []
for clusterNum in np.unique(clusterOrder):
# make numpy array with rows containing the segmenatation candidates (time steps)
# and columns as dimensions of the
segmentationCandidates = np.asarray(normalizedTypicalPeriods.loc[i,:])
# produce adjacency matrix: Each time step is only connected to its preceding and succeeding one
adjacencyMatrix = np.eye(timeStepsPerPeriod, k=1) + np.eye(timeStepsPerPeriod, k=-1)
# execute clustering of adjacent time steps
if noSegments==1:
clusterOrder = np.asarray([0] * len(segmentationCandidates))
else:
clustering = AgglomerativeClustering(n_clusters=noSegments, linkage='ward', connectivity=adjacencyMatrix)
clusterOrder = clustering.fit_predict(segmentationCandidates)
# determine the indices where the segments change and the number of time steps in each segment
segNo, indices, segmentNoOccur = np.unique(clusterOrder, return_index=True, return_counts=True)
clusterOrderUnique = [clusterOrder[index] for index in sorted(indices)]
# determine the segments' values
clusterCenters = meanRepresentation(segmentationCandidates, clusterOrder)
# predict each time step of the period by representing it with the corresponding segment's values
predictedSegmentedNormalizedTypicalPeriods = pd.DataFrame(
clusterCenters,
columns=normalizedTypicalPeriods.columns).reindex(clusterOrder).reset_index(drop=True)
# represent the period by the segments in the right order only instead of each time step
segmentedNormalizedTypicalPeriods = pd.DataFrame(
clusterCenters,
columns=normalizedTypicalPeriods.columns).reindex(clusterOrderUnique).set_index(np.sort(indices))
# keep additional information on the lengths of the segments in the right order
segmentDuration = pd.DataFrame(segmentNoOccur, columns=['Segment Duration']).reindex(clusterOrderUnique).set_index(np.sort(indices))
# create DataFrame with reduced number of segments together with three indices per period:
# 1. The segment number
# 2. The segment duration
# 3. The index of the original time step, at which the segment starts
result=segmentedNormalizedTypicalPeriods.set_index([pd.Index(segNo, name='Segment Step'), segmentDuration['Segment Duration'], pd.Index(np.sort(indices), name='Original Start Step')])
# append predicted and segmented DataFrame to list to create a big DataFrame for all periods