Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
print('Estimated number of clusters: %d' % n_clusters_)
print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels))
# plt.rcParams.update(pd.tools.plotting.mpl_stylesheet)
plt.figure(301)
plt.clf()
plt.jet()
ax = plt.gca()
nvtools.nvtools.add_attraction_grid(ax, attractmV, attractFreq)
if 0:
df.plot(kind='scatter', x='gate jump', y='yellow jump', ax=plt.gca(), c=0 * labels, cmap=cm.jet, linewidths=0, colorbar=False, grid=False, zorder=3)
plt.savefig(os.path.join(qcodes.config['user']['nvDataDir'], 'results', 'clustering0.png'))
df.plot(kind='scatter', x='gate jump', y='yellow jump', ax=plt.gca(), c=labels, cmap=cm.jet, linewidths=0, colorbar=False, grid=False, zorder=3)
plt.title('Clustering of jumps', fontsize=15)
plt.savefig(os.path.join(qcodes.config['user']['nvDataDir'], 'results', 'clustering.png'))
np.save(os.path.join(qcodes.config['user']['nvDataDir'], 'labels.npy'), labels)
#%% Find dense 0 cluster
densityKern = KernelDensity().fit(X)
s = densityKern.score_samples(X)
plt.figure()
plt.subplot(121)
plt.scatter(df['gate jump'], s)
plt.subplot(122)
plt.scatter(df['yellow jump'], s)
X = X[s < -2.5, :]
#%%
# translate by mean and scale with std
dfS[:] = datascaler.transform(df)
Xbase = dataS[:, 4:] # base data
datascalerBase = StandardScaler().fit(data[:, 4:])
x = dataS[:, 4]
y = dataS[:, 5]
#%% Create data set with 100 data points -> 1 label
lag = 100
ran = range(0, len(dfS[['gate jump']]))
lagSquare = np.concatenate([dfS[['gate jump']].shift(i) for i in ran], axis=1)
gateSet = lagSquare[lag:, :lag]
lagSquare = np.concatenate([dfS[['yellow jump']].shift(i) for i in ran], axis=1)
yellowSet = lagSquare[lag:, :lag]
#%%
labels = np.load(os.path.join(qcodes.config['user']['nvDataDir'], 'labels.npy'))
dataSet = np.dstack((gateSet, yellowSet))[:-1, :, :] # I don't know the label for the final sequence, so drop it
lbls = labels[lag + 1:]
lbls[lbls == -1] = 5 # Setting this class to 5 so it can be one hot encoded and more easily be cut off
if 1: # to make training a little bit easier for now
dataSet = dataSet[lbls < 5, :, :] # Remove all the points that do not belong to a class
lbls = lbls[lbls < 5]
if 0: # Throw out the 0 cluster
dataSet = dataSet[lbls > 0, :, :]
lbls = lbls[lbls > 0]
if 0: # Only classify 0 cluster vs not 0 cluster
lbls[lbls > 0] = 1
lbls = OneHotEncoder(sparse=False).fit_transform(lbls.reshape(-1, 1))
from sklearn.preprocessing import StandardScaler
from statsmodels.graphics.gofplots import qqplot
from scipy.interpolate import interp1d
interpolated = False
rmvZeroClust = False
#%%
print('Generating Data')
data = np.load(os.path.join(qcodes.config['user']['nvDataDir'], 'jdata.npy')).T
df = pd.DataFrame(data, columns=['time', 'gate', 'yellow', 'new', 'gate jump', 'yellow jump', 'jump index'])
#plt.figure(300); plt.clf()
#df.plot(kind='scatter', x='gate jump', y='yellow jump', ax=plt.gca(), linewidths=0)
labels = np.load(os.path.join(qcodes.config['user']['nvDataDir'], 'labels.npy'))
#%% Remove the 0 cluster (optional)
if rmvZeroClust:
strippedLabels = labels[labels != 0]
df = df.iloc[labels != 0]
#%% Data needs to be scaled for almost any machine learning algorithm to work
# translate by mean and scale with std
datascaler = StandardScaler()
dataS = datascaler.fit_transform(data)
dfS = df.copy()
dfS[:] = datascaler.transform(df)
Xbase = dataS[:, 4:] # base data
datascalerBase = StandardScaler().fit(data[:, 4:])
import qcodes
import pandas as pd
from pynufft import pynufft # pip3 install pynufft --user
from scipy.interpolate import interp1d
import matplotlib
import nufftpy
#%%
print('Generating Data')
data = np.load(os.path.join(qcodes.config['user']['nvDataDir'], 'jdata.npy')).T
df = pd.DataFrame(data, columns=['time', 'gate', 'yellow', 'new', 'gate jump', 'yellow jump'])
#plt.figure(300); plt.clf()
#df.plot(kind='scatter', x='gate jump', y='yellow jump', ax=plt.gca(), linewidths=0)
labels = np.load(os.path.join(qcodes.config['user']['nvDataDir'], 'labels.npy'))
#%% Check the time steps
time = df[['time']].values.ravel()
dTime = [time[i + 1] - time[i] for i in range(time.size - 1)]
#%% interpolation test
interp = interp1d(df[['time']].values.ravel(), df[['gate']].values.ravel(), kind='nearest')
x = range(8000, 10000, 10)
ind = np.where((time >= 8000) & (time <= 10000))
plt.figure()
plt.scatter(time[ind], df[['gate']].values.ravel()[ind])
plt.plot(x, interp(x))
#%% fft as is
Args:
station (qcodes station)
tag (str or None)
overwrite (bool): If True overwrite existing data, otherwise raise error
data (None or object): optional extra data
verbose (int)
Example:
>>> save_state(station, tag='tripledot1')
The data is written to an HDF5 file. The default location is the user
home directory with name qtt_statefile.hdf5.
To install hickle: pip install git+https://github.com/telegraphic/hickle.git@dev
"""
statefile = qcodes.config.get('statefile', None)
if statefile is None:
statefile = os.path.join(os.path.expanduser('~'), 'qtt_statefile.hdf5')
snapshot = station.snapshot()
gv = station.gates.allvalues()
datestring = "{:%Y%m%d-%H%M%S}".format(datetime.now());
if verbose >= 2:
print(datestring)
if tag is None:
tag = datestring
obj = {'gatevalues': gv, 'snapshot': snapshot,
'datestring': datestring, 'data': data}
def datasetFromFile(path, runId):
qc.config['core']['db_location'] = path
ds = DataSet(path)
ds.run_id = runId
return ds
def list_states(verbose=1):
""" List available states of the system
Args:
verbose (int)
Returns:
states (list): List of string tags
See also:
load_state
"""
statefile = qcodes.config.get('statefile', None)
if statefile is None:
statefile = os.path.join(os.path.expanduser('~'), 'qtt_statefile.hdf5')
if not os.path.exists(statefile):
return []
tags = []
with h5py.File(statefile, 'r') as h5group:
tags = list(h5group.keys())
if verbose:
print('states on system from file %s: ' % (statefile, ), end='')
print(', '.join([str(x) for x in tags]))
return tags
#%% Global settings
dataSelection = ['yellow jump', 'gate jump', 'yellow', 'gate']
lblsAsInput = True
lag = 100
keepNoClass = True
#%%
print('Generating Data')
data = np.load(os.path.join(qcodes.config['user']['nvDataDir'], 'jdata.npy')).T
data = data[:, 0:6]
df = pd.DataFrame(data, columns=['time', 'gate', 'yellow', 'new', 'gate jump', 'yellow jump'])
jumps = df[['gate jump', 'yellow jump']]
labels = np.load(os.path.join(qcodes.config['user']['nvDataDir'], 'labels.npy'))
labels[labels == -1] = 5 # this makes it a bit nicer to handle
#%% Data needs to be scaled for almost any machine learning algorithm to work
# translate by mean and scale with std
datascaler = StandardScaler()
dataS = datascaler.fit_transform(data)
dfS = df.copy()
dfS[:] = datascaler.transform(df)
#%% Select the subset of data to use:
selectedData = dfS[dataSelection]
ran = range(0, selectedData.shape[0])
laggedData = np.zeros((len(dataSelection), selectedData.shape[0] - lag, lag))
for i in range(len(dataSelection)):
lblsAsInput = True
lag = 100
keepNoClass = False
keepZeroCluster = True
zeroOrNotZero = False # Classify only zero cluster vs not-zero cluster (so don't remove that zero cluster)
sequentialTesting = False # The stateful LSTM will likely work better with this set to True
LSTMtype = 3
doPCA = True
batchSize = 1
nbEpochs = 300
learningRate = 0.00001
#%%
print('Generating Data')
data = np.load(os.path.join(qcodes.config['user']['nvDataDir'], 'jdata.npy')).T
data = data[:, 0:6]
df = pd.DataFrame(data, columns=['time', 'gate', 'yellow', 'new', 'gate jump', 'yellow jump'])
jumps = df[['gate jump', 'yellow jump']]
labels = np.load(os.path.join(qcodes.config['user']['nvDataDir'], 'labels.npy'))
labels[labels == -1] = 5 # this makes it a bit nicer to handle
#%% Data needs to be scaled for almost any machine learning algorithm to work
# translate by mean and scale with std
datascaler = StandardScaler()
dataS = datascaler.fit_transform(data)
dfS = df.copy()
dfS[:] = datascaler.transform(df)
#%% Select the subset of data to use:
#%% Global settings
dataSelection = ['yellow jump', 'gate jump', 'yellow', 'gate']
lblsAsInput = True
lag = 100
keepNoClass = True
colors = ['r', 'b', 'g', 'y', 'c', 'm']
#%%
print('Generating Data')
data = np.load(os.path.join(qcodes.config['user']['nvDataDir'], 'jdata.npy')).T
data = data[:, 0:6]
df = pd.DataFrame(data, columns=['time', 'gate', 'yellow', 'new', 'gate jump', 'yellow jump'])
jumps = df[['gate jump', 'yellow jump']]
labels = np.load(os.path.join(qcodes.config['user']['nvDataDir'], 'labels.npy'))
labels[labels == -1] = 5 # this makes it a bit nicer to handle
#%% Data needs to be scaled for almost any machine learning algorithm to work
# translate by mean and scale with std
datascaler = StandardScaler()
dataS = datascaler.fit_transform(data)
dfS = df.copy()
dfS[:] = datascaler.transform(df)
#%% Select the subset of data to use:
selectedData = dfS[dataSelection]
ran = range(0, selectedData.shape[0])
laggedData = np.zeros((len(dataSelection), selectedData.shape[0] - lag, lag))
for i in range(len(dataSelection)):