Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def load_metadata(self):
try:
import resource
resource.setrlimit(resource.RLIMIT_STACK, [0x10000000, resource.RLIM_INFINITY])
sys.setrecursionlimit(0x100000)
except:
pass
fn = os.path.join(CONFIG.MINDSDB_STORAGE_PATH, self.lmd['name'] + '_light_model_metadata.pickle')
try:
with open(fn, 'rb') as fp:
self.lmd = pickle.load(fp)
except:
self.log.error(f'Could not load mindsdb light metadata from the file: {fn}')
fn = os.path.join(CONFIG.MINDSDB_STORAGE_PATH, self.hmd['name'] + '_heavy_model_metadata.pickle')
try:
with open(fn, 'rb') as fp:
self.hmd = pickle.load(fp)
except:
self.log.error(f'Could not load mindsdb heavy metadata in the file: {fn}')
for key in self.transaction.input_data.all_indexes:
#If this is a group by, skip the `KEY_NO_GROUP_BY` key
if len(self.transaction.input_data.all_indexes) > 1 and key == KEY_NO_GROUP_BY:
continue
length = len(self.transaction.input_data.all_indexes[key])
if self.transaction.lmd['type'] == TRANSACTION_LEARN:
sample_size = int(calculate_sample_size(population_size=length,
margin_error=self.transaction.lmd['sample_margin_of_error'],
confidence_level=self.transaction.lmd['sample_confidence_level']))
# this evals True if it should send the entire group data into test, train or validation as opposed to breaking the group into the subsets
should_split_by_group = type(group_by) == list and len(group_by) > 0
if should_split_by_group:
self.transaction.input_data.train_indexes[key] = self.transaction.input_data.all_indexes[key][0:round(length - length*CONFIG.TEST_TRAIN_RATIO)]
self.transaction.input_data.train_indexes[KEY_NO_GROUP_BY].extend(self.transaction.input_data.train_indexes[key])
self.transaction.input_data.test_indexes[key] = self.transaction.input_data.all_indexes[key][round(length - length*CONFIG.TEST_TRAIN_RATIO):int(round(length - length*CONFIG.TEST_TRAIN_RATIO) + round(length*CONFIG.TEST_TRAIN_RATIO/2))]
self.transaction.input_data.test_indexes[KEY_NO_GROUP_BY].extend(self.transaction.input_data.test_indexes[key])
self.transaction.input_data.validation_indexes[key] = self.transaction.input_data.all_indexes[key][(round(length - length*CONFIG.TEST_TRAIN_RATIO) + round(length*CONFIG.TEST_TRAIN_RATIO/2)):]
self.transaction.input_data.validation_indexes[KEY_NO_GROUP_BY].extend(self.transaction.input_data.validation_indexes[key])
else:
# make sure that the last in the time series are also the subset used for test
train_window = (0,int(length*(1-2*CONFIG.TEST_TRAIN_RATIO)))
self.transaction.input_data.train_indexes[key] = self.transaction.input_data.all_indexes[key][train_window[0]:train_window[1]]
validation_window = (train_window[1],train_window[1] + int(length*CONFIG.TEST_TRAIN_RATIO))
test_window = (validation_window[1],length)
self.transaction.input_data.test_indexes[key] = self.transaction.input_data.all_indexes[key][test_window[0]:test_window[1]]
self.transaction.input_data.validation_indexes[key] = self.transaction.input_data.all_indexes[key][validation_window[0]:validation_window[1]]
if self.lmd['type'] == TRANSACTION_BAD_QUERY:
self.log.error(self.errorMsg)
self.error = True
return
if self.lmd['type'] == TRANSACTION_DROP_MODEL:
self._execute_drop_model()
return
if self.lmd['type'] == TRANSACTION_LEARN:
self.output_data.data_array = [['Model ' + self.lmd['name'] + ' training.']]
self.output_data.columns = ['Status']
if CONFIG.EXEC_LEARN_IN_THREAD == False:
self._execute_learn()
else:
_thread.start_new_thread(self._execute_learn, ())
return
elif self.lmd['type'] == TRANSACTION_PREDICT:
self._execute_predict()
elif self.lmd['type'] == TRANSACTION_NORMAL_SELECT:
self._execute_normal_select()
def check_for_updates():
"""
Check for updates of mindsdb
it will ask the mindsdb server if there are new versions, if there are it will log a message
:return: None
"""
# tmp files
uuid_file = CONFIG.MINDSDB_STORAGE_PATH + '/../uuid.mdb_base'
mdb_file = CONFIG.MINDSDB_STORAGE_PATH + '/start.mdb_base'
uuid_file_path = Path(uuid_file)
if uuid_file_path.is_file():
uuid_str = open(uuid_file).read()
else:
uuid_str = str(uuid.uuid4())
try:
open(uuid_file, 'w').write(uuid_str)
except:
log.warning('Cannot store token, Please add write permissions to file:' + uuid_file)
uuid_str = uuid_str + '.NO_WRITE'
file_path = Path(mdb_file)
if file_path.is_file():
token = open(mdb_file).read()
else:
# move indexes to corresponding train, test, validation, etc and trim input data accordingly
for key in self.transaction.input_data.all_indexes:
if len(self.transaction.input_data.all_indexes) > 1 and key == KEY_NO_GROUP_BY:
continue
length = len(self.transaction.input_data.all_indexes[key])
if self.transaction.lmd['type'] == TRANSACTION_LEARN:
sample_size = int(calculate_sample_size(population_size=length,
margin_error=self.transaction.lmd['sample_margin_of_error'],
confidence_level=self.transaction.lmd['sample_confidence_level']))
# this evals True if it should send the entire group data into test, train or validation as opposed to breaking the group into the subsets
should_split_by_group = type(group_by) == list and len(group_by) > 0
if should_split_by_group:
self.transaction.input_data.train_indexes[key] = self.transaction.input_data.all_indexes[key][0:round(length - length*CONFIG.TEST_TRAIN_RATIO)]
self.transaction.input_data.train_indexes[KEY_NO_GROUP_BY].extend(self.transaction.input_data.train_indexes[key])
self.transaction.input_data.test_indexes[key] = self.transaction.input_data.all_indexes[key][round(length - length*CONFIG.TEST_TRAIN_RATIO):int(round(length - length*CONFIG.TEST_TRAIN_RATIO) + round(length*CONFIG.TEST_TRAIN_RATIO/2))]
self.transaction.input_data.test_indexes[KEY_NO_GROUP_BY].extend(self.transaction.input_data.test_indexes[key])
self.transaction.input_data.validation_indexes[key] = self.transaction.input_data.all_indexes[key][(round(length - length*CONFIG.TEST_TRAIN_RATIO) + round(length*CONFIG.TEST_TRAIN_RATIO/2)):]
self.transaction.input_data.validation_indexes[KEY_NO_GROUP_BY].extend(self.transaction.input_data.validation_indexes[key])
else:
# make sure that the last in the time series are also the subset used for test
train_window = (0,int(length*(1-2*CONFIG.TEST_TRAIN_RATIO)))
self.transaction.input_data.train_indexes[key] = self.transaction.input_data.all_indexes[key][train_window[0]:train_window[1]]
validation_window = (train_window[1],train_window[1] + int(length*CONFIG.TEST_TRAIN_RATIO))
test_window = (validation_window[1],length)
self.transaction.input_data.test_indexes[key] = self.transaction.input_data.all_indexes[key][test_window[0]:test_window[1]]
self.transaction.input_data.validation_indexes[key] = self.transaction.input_data.all_indexes[key][validation_window[0]:validation_window[1]]
all_indexes[KEY_NO_GROUP_BY] += [i]
# move indexes to corresponding train, test, validation, etc and trim input data accordingly
if self.transaction.lmd['type'] == TRANSACTION_LEARN:
for key in all_indexes:
should_split_by_group = type(group_by) == list and len(group_by) > 0
#If this is a group by, skip the `KEY_NO_GROUP_BY` key
if should_split_by_group and key == KEY_NO_GROUP_BY:
continue
length = len(all_indexes[key])
# this evals True if it should send the entire group data into test, train or validation as opposed to breaking the group into the subsets
if should_split_by_group:
train_indexes[key] = all_indexes[key][0:round(length - length*CONFIG.TEST_TRAIN_RATIO)]
train_indexes[KEY_NO_GROUP_BY].extend(train_indexes[key])
test_indexes[key] = all_indexes[key][round(length - length*CONFIG.TEST_TRAIN_RATIO):int(round(length - length*CONFIG.TEST_TRAIN_RATIO) + round(length*CONFIG.TEST_TRAIN_RATIO/2))]
test_indexes[KEY_NO_GROUP_BY].extend(test_indexes[key])
validation_indexes[key] = all_indexes[key][(round(length - length*CONFIG.TEST_TRAIN_RATIO) + round(length*CONFIG.TEST_TRAIN_RATIO/2)):]
validation_indexes[KEY_NO_GROUP_BY].extend(validation_indexes[key])
else:
# make sure that the last in the time series are also the subset used for test
train_window = (0,int(length*(1-2*CONFIG.TEST_TRAIN_RATIO)))
train_indexes[key] = all_indexes[key][train_window[0]:train_window[1]]
validation_window = (train_window[1],train_window[1] + int(length*CONFIG.TEST_TRAIN_RATIO))
test_window = (validation_window[1],length)
test_indexes[key] = all_indexes[key][test_window[0]:test_window[1]]
from mindsdb.config import CONFIG
import torch.nn as nn
import torch
import torch.nn.functional as F
device = 'cuda' if CONFIG.USE_CUDA else 'cpu'
class AttnDecoderRNN(nn.Module):
def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=200):
super(AttnDecoderRNN, self).__init__()
self.hidden_size = hidden_size
self.output_size = output_size
self.dropout_p = dropout_p
self.max_length = max_length
self.embedding = nn.Embedding(self.output_size, self.hidden_size)
self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
self.dropout = nn.Dropout(self.dropout_p)
self.gru = nn.GRU(self.hidden_size, self.hidden_size)
self.out = nn.Linear(self.hidden_size, self.output_size)
def run(self):
"""
:return:
"""
if self.lmd['type'] == TRANSACTION_BAD_QUERY:
self.log.error(self.errorMsg)
self.error = True
return
if self.lmd['type'] == TRANSACTION_LEARN:
if CONFIG.EXEC_LEARN_IN_THREAD == False:
self._execute_learn()
else:
_thread.start_new_thread(self._execute_learn, ())
return
if self.lmd['type'] == TRANSACTION_ANALYSE:
self._execute_analyze()
elif self.lmd['type'] == TRANSACTION_PREDICT:
self._execute_predict()
elif self.lmd['type'] == TRANSACTION_NORMAL_SELECT:
self._execute_normal_select()
should_split_by_group = type(group_by) == list and len(group_by) > 0
#If this is a group by, skip the `KEY_NO_GROUP_BY` key
if should_split_by_group and key == KEY_NO_GROUP_BY:
continue
length = len(all_indexes[key])
# this evals True if it should send the entire group data into test, train or validation as opposed to breaking the group into the subsets
if should_split_by_group:
train_indexes[key] = all_indexes[key][0:round(length - length*CONFIG.TEST_TRAIN_RATIO)]
train_indexes[KEY_NO_GROUP_BY].extend(train_indexes[key])
test_indexes[key] = all_indexes[key][round(length - length*CONFIG.TEST_TRAIN_RATIO):int(round(length - length*CONFIG.TEST_TRAIN_RATIO) + round(length*CONFIG.TEST_TRAIN_RATIO/2))]
test_indexes[KEY_NO_GROUP_BY].extend(test_indexes[key])
validation_indexes[key] = all_indexes[key][(round(length - length*CONFIG.TEST_TRAIN_RATIO) + round(length*CONFIG.TEST_TRAIN_RATIO/2)):]
validation_indexes[KEY_NO_GROUP_BY].extend(validation_indexes[key])
else:
# make sure that the last in the time series are also the subset used for test
train_window = (0,int(length*(1-2*CONFIG.TEST_TRAIN_RATIO)))
train_indexes[key] = all_indexes[key][train_window[0]:train_window[1]]
validation_window = (train_window[1],train_window[1] + int(length*CONFIG.TEST_TRAIN_RATIO))
test_window = (validation_window[1],length)
test_indexes[key] = all_indexes[key][test_window[0]:test_window[1]]
validation_indexes[key] = all_indexes[key][validation_window[0]:validation_window[1]]
self.transaction.input_data.train_df = self.transaction.input_data.data_frame.loc[train_indexes[KEY_NO_GROUP_BY]].copy()
self.transaction.input_data.test_df = self.transaction.input_data.data_frame.loc[test_indexes[KEY_NO_GROUP_BY]].copy()
self.transaction.input_data.validation_df = self.transaction.input_data.data_frame.loc[validation_indexes[KEY_NO_GROUP_BY]].copy()