Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import numpy as np
import os
import kaggle
DATA_DIR = "../data/comp_data"
QA_TRAIN_FILE = "8thGr-NDMC-Train.csv"
STORY_FILE = "studystack_qa_cleaner_no_qm.txt"
STORY_WEIGHTS = "lstm-story-weights.txt"
STORY_BIAS = "lstm-story-bias.txt"
EMBED_SIZE = 64
BATCH_SIZE = 256
NBR_EPOCHS = 20
stories = kaggle.get_stories(os.path.join(DATA_DIR, STORY_FILE))
story_maxlen = max([len(words) for words in stories])
# this part is only required to get the maximum sequence length
qapairs = kaggle.get_question_answer_pairs(
os.path.join(DATA_DIR, QA_TRAIN_FILE))
question_maxlen = max([len(qapair[0]) for qapair in qapairs])
answer_maxlen = max([len(qapair[1]) for qapair in qapairs])
seq_maxlen = max([story_maxlen, question_maxlen, answer_maxlen])
word2idx = kaggle.build_vocab(stories, qapairs, [])
vocab_size = len(word2idx)
Xs = kaggle.vectorize_stories(stories, word2idx, seq_maxlen)
Xstrain, Xstest = train_test_split(Xs, test_size=0.3, random_state=42)
print(Xstrain.shape, Xstest.shape)
import numpy as np
import os
import kaggle
DATA_DIR = "../data/comp_data"
QA_TRAIN_FILE = "8thGr-NDMC-Train.csv"
STORY_FILE = "studystack_qa_cleaner_no_qm.txt"
STORY_WEIGHTS = "dense-story-weights.txt"
STORY_BIAS = "dense-story-bias.txt"
EMBED_SIZE = 64
BATCH_SIZE = 256
NBR_EPOCHS = 20
stories = kaggle.get_stories(os.path.join(DATA_DIR, STORY_FILE))
story_maxlen = max([len(words) for words in stories])
# this part is only required to get the maximum sequence length
qapairs = kaggle.get_question_answer_pairs(
os.path.join(DATA_DIR, QA_TRAIN_FILE))
question_maxlen = max([len(qapair[0]) for qapair in qapairs])
answer_maxlen = max([len(qapair[1]) for qapair in qapairs])
seq_maxlen = max([story_maxlen, question_maxlen, answer_maxlen])
word2idx = kaggle.build_vocab(stories, qapairs, [])
vocab_size = len(word2idx)
Xs = kaggle.vectorize_stories(stories, word2idx, seq_maxlen)
Xstrain, Xstest = train_test_split(Xs, test_size=0.3, random_state=42)
print(Xstrain.shape, Xstest.shape)