Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
EMBED_HIDDEN_SIZE = 50
SENT_HIDDEN_SIZE = 100
QUERY_HIDDEN_SIZE = 100
BATCH_SIZE = 32
EPOCH = 2
save_model_path = 'babi_rnn_model.h5'
print("RNN,Embed,Sent,Query={},{},{},{}".format(RNN, EMBED_HIDDEN_SIZE, SENT_HIDDEN_SIZE, QUERY_HIDDEN_SIZE))
challenge = 'tasks_1-20_v1-2/en/qa2_two-supporting-facts_{}.txt'
pwd_path = os.path.abspath(os.path.dirname(__file__))
print('pwd_path:', pwd_path)
path = os.path.join(pwd_path, '../../data/babi_tasks_1-20_v1-2.tar.gz')
print('path:', path)
with tarfile.open(path) as tar:
train = get_stories(tar.extractfile(challenge.format('train')))
test = get_stories(tar.extractfile(challenge.format('test')))
vocab = set()
for story, q, a in train + test:
vocab |= set(story + q + [a])
vocab = sorted(vocab)
vocab_size = len(vocab) + 1
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
story_maxlen = max(map(len, (x for x, _, _ in train + test)))
query_maxlen = max(map(len, (x for _, x, _ in train + test)))
idx_story, idx_query, idx_answer = vectorize_stories(train, word_idx, story_maxlen, query_maxlen)
test_idx_story, test_idx_query, test_idx_answer = vectorize_stories(test, word_idx, story_maxlen, query_maxlen)
print('vocab:', vocab)
print('idx_story.shape:', idx_story.shape)
print('idx_query.shape:', idx_query.shape)
RNN = keras.layers.recurrent.LSTM
EMBED_HIDDEN_SIZE = 50
SENT_HIDDEN_SIZE = 100
QUERY_HIDDEN_SIZE = 100
BATCH_SIZE = 32
EPOCH = 2
save_model_path = 'babi_rnn_model.h5'
print("RNN,Embed,Sent,Query={},{},{},{}".format(RNN, EMBED_HIDDEN_SIZE, SENT_HIDDEN_SIZE, QUERY_HIDDEN_SIZE))
challenge = 'tasks_1-20_v1-2/en/qa2_two-supporting-facts_{}.txt'
pwd_path = os.path.abspath(os.path.dirname(__file__))
print('pwd_path:', pwd_path)
path = os.path.join(pwd_path, '../../data/babi_tasks_1-20_v1-2.tar.gz')
print('path:', path)
with tarfile.open(path) as tar:
train = get_stories(tar.extractfile(challenge.format('train')))
test = get_stories(tar.extractfile(challenge.format('test')))
vocab = set()
for story, q, a in train + test:
vocab |= set(story + q + [a])
vocab = sorted(vocab)
vocab_size = len(vocab) + 1
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
story_maxlen = max(map(len, (x for x, _, _ in train + test)))
query_maxlen = max(map(len, (x for _, x, _ in train + test)))
idx_story, idx_query, idx_answer = vectorize_stories(train, word_idx, story_maxlen, query_maxlen)
test_idx_story, test_idx_query, test_idx_answer = vectorize_stories(test, word_idx, story_maxlen, query_maxlen)
print('vocab:', vocab)
print('idx_story.shape:', idx_story.shape)