Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def load_data_imdb(batch_size, num_steps=500):
d2l.download_imdb()
train_data, test_data = d2l.read_imdb('train'), d2l.read_imdb('test')
train_tokens = d2l.tokenize(train_data[0], token='word')
test_tokens = d2l.tokenize(test_data[0], token='word')
vocab = nlp.Vocab(nlp.data.count_tokens(itertools.chain.from_iterable(train_tokens)), min_freq=5)
train_features = mx.nd.array([d2l.trim_pad(vocab[line], num_steps, vocab[vocab.unknown_token])
for line in train_tokens])
test_features = mx.nd.array([d2l.trim_pad(vocab[line], num_steps, vocab[vocab.unknown_token])
for line in test_tokens])
train_iter = d2l.load_array((train_features, train_data[1]), batch_size)
test_iter = d2l.load_array((test_features, test_data[1]), batch_size,
is_train=False)
return train_iter, test_iter, vocab
def load_data_imdb(batch_size, num_steps=500):
d2l.download_imdb()
train_data, test_data = d2l.read_imdb('train'), d2l.read_imdb('test')
train_tokens = d2l.tokenize(train_data[0], token='word')
test_tokens = d2l.tokenize(test_data[0], token='word')
vocab = nlp.Vocab(nlp.data.count_tokens(itertools.chain.from_iterable(train_tokens)), min_freq=5)
train_features = mx.nd.array([d2l.trim_pad(vocab[line], num_steps, vocab[vocab.unknown_token])
for line in train_tokens])
test_features = mx.nd.array([d2l.trim_pad(vocab[line], num_steps, vocab[vocab.unknown_token])
for line in test_tokens])
train_iter = d2l.load_array((train_features, train_data[1]), batch_size)
test_iter = d2l.load_array((test_features, test_data[1]), batch_size,
is_train=False)
return train_iter, test_iter, vocab