How to use the tflearn.data_utils.pad_sequences function in tflearn

To help you get started, we’ve selected a few tflearn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github DMALab / autotf / autotf / model / TestCode.py View on Github external
continue
                if token in dictionary:
                    curline.append(dictionary[token])
                else:
                    curline.append(1)
            result.append(curline)
        return result

    sentences = ProcessSentence(sentences)
    testsentence = ProcessSentence(testsentence)
    testsentence, TestY = GetTestIndex(testsentence, 0, dftest)
    dictionary, MaxTokens = GetDictionary(sentences)
    TrainX = BuildNumber(sentences, dictionary)
    TestX = BuildNumber(testsentence, dictionary)
    TrainY = GetLabel(0, df)
    TrainX = pad_sequences(TrainX, maxlen=100, value=0.)
    TestX = pad_sequences(TestX, maxlen=100, value=0.)
    TrainX = np.array(TrainX)
    TrainY = np.array(TrainY)
    TestX = np.array(TestX)
    TestY = np.array(TestY)

    params = {
        "metrics": [],
        "batch_size": 64,
        "class_num": 2,
        "num_epochs": 1,
        "learning_rate": 1e-3,
        "hidden_dimension": 128,
        "sentence_len":100,
        "embdding_dimension":128,
        "vocab_size":45647,
github sliderSun / pynlp / text-classification / a01_FastText / old_single_label / p5_fastTextB_predict.py View on Github external
# 1.load data with vocabulary of words and labels
    vocabulary_word2index, vocabulary_index2word = create_voabulary()
    vocab_size = len(vocabulary_word2index)
    vocabulary_word2index_label,vocabulary_index2word_label = create_voabulary_label()
    questionid_question_lists=load_final_test_data(FLAGS.predict_source_file)
    test= load_data_predict(vocabulary_word2index,vocabulary_word2index_label,questionid_question_lists)
    testX=[]
    question_id_list=[]
    for tuple in test:
        question_id,question_string_list=tuple
        question_id_list.append(question_id)
        testX.append(question_string_list)

    # 2.Data preprocessing: Sequence padding
    print("start padding....")
    testX2 = pad_sequences(testX, maxlen=FLAGS.sentence_len, value=0.)  # padding to max length
    print("end padding...")

    # 3.create session.
    config=tf.ConfigProto()
    config.gpu_options.allow_growth=True
    with tf.Session(config=config) as sess:
        # 4.Instantiate Model
        fast_text=fastText(FLAGS.label_size, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate,FLAGS.num_sampled,FLAGS.sentence_len,vocab_size,FLAGS.embed_size,FLAGS.is_training)
        saver=tf.train.Saver()
        if os.path.exists(FLAGS.ckpt_dir+"checkpoint"):
            print("Restoring Variables from Checkpoint")
            saver.restore(sess,tf.train.latest_checkpoint(FLAGS.ckpt_dir))
        else:
            print("Can't find the checkpoint.going to stop")
            return
        # 5.feed data, to get logits
github acadTags / Automated-Social-Annotation / 0 JMAN / JMAN_train.py View on Github external
output_csv_test = "fold,loss,loss_ce,loss_l2,loss_sim,loss_sub,hamming_loss,acc,prec,rec,f1,acc@k,hamming_loss@k,prec@k,rec@k,f1@k"
        # start iterating over k-folds for training and testing  
        num_run = 0
        time_train = [0]*num_runs # get time spent in training        
        for train, valid in zip(trainlist, validlist):
            print('\n--RUN',num_run,'START--\n')
            start_time_train = time.time() # staring time in training
            # k-fold dataset creation
            trainX, trainX_title, trainY = train
            validX, validX_title, validY = valid
            # Data preprocessing.Sequence padding
            print("start padding & transform to one hot...")
            trainX = pad_sequences(trainX, maxlen=FLAGS.sequence_length, value=0.)  # padding to max length
            trainX_title = pad_sequences(trainX_title, maxlen=FLAGS.sequence_length_title, value=0.)
            validX = pad_sequences(validX, maxlen=FLAGS.sequence_length, value=0.)  # padding to max length
            validX_title = pad_sequences(validX_title, maxlen=FLAGS.sequence_length_title, value=0.)
            #with open(FLAGS.cache_path, 'w') as data_f: #save data to cache file, so we can use it next time quickly.
            #    pickle.dump((trainX,trainY,testX,testY,vocabulary_index2word),data_f)
            print("trainX[0]:", trainX[0]) ;#print("trainY[0]:", trainY[0])
            #print("validX[0]:", validX[0])
            # Converting labels to binary vectors
            print("end padding & transform to one hot...")
            
            saver=tf.train.Saver()            
            if os.path.exists(FLAGS.ckpt_dir+"checkpoint"):
                print("Restoring Variables from Checkpoint")
                saver.restore(sess,tf.train.latest_checkpoint(FLAGS.ckpt_dir))
            else:
                print('Initializing Variables')
                sess.run(tf.global_variables_initializer()) # which initialise parameters
                if FLAGS.use_embedding: #load pre-trained word embedding
                    assign_pretrained_word_embedding(sess, vocabulary_index2word, vocab_size, model,num_run,word2vec_model_path=word2vec_model_path)
github chaitjo / lstm-context-embeddings / tflearn / model.py View on Github external
rnn_hidden_size = FLAGS.rnn_hidden_size
num_filters = FLAGS.num_filters
dropout_prob = FLAGS.dropout_prob
learning_rate = FLAGS.learning_rate
batch_size = FLAGS.batch_size
num_epochs = FLAGS.num_epochs


# IMDB Dataset loading
train, test, _ = imdb.load_data(path='imdb.pkl', n_words=vocab_size, valid_portion=0.1)
trainX, trainY = train
testX, testY = test

# Sequence padding
trainX = pad_sequences(trainX, maxlen=maxlen, value=0.)
testX = pad_sequences(testX, maxlen=maxlen, value=0.)

# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)


# Building network
network = input_data(shape=[None, maxlen], name='input')   

network = embedding(
    network, 
    input_dim=vocab_size, 
    output_dim=embedding_dim, 
    trainable=True)    

network = bidirectional_rnn(
github brightmart / text_classification / a01_FastText / p5_fastTextB_predict_multilabel.py View on Github external
print("vocab_size:",vocab_size)
    #iii=0
    #iii/0
    vocabulary_word2index_label,vocabulary_index2word_label = create_voabulary_label()
    questionid_question_lists=load_final_test_data(FLAGS.predict_source_file) #TODO
    test= load_data_predict(vocabulary_word2index,vocabulary_word2index_label,questionid_question_lists) #TODO
    testX=[]
    question_id_list=[]
    for tuple in test:
        question_id,question_string_list=tuple
        question_id_list.append(question_id)
        testX.append(question_string_list)

    # 2.Data preprocessing: Sequence padding
    print("start padding....")
    testX2 = pad_sequences(testX, maxlen=FLAGS.sentence_len, value=0.)  # padding to max length
    print("end padding...")

    # 3.create session.
    config=tf.ConfigProto()
    config.gpu_options.allow_growth=True
    with tf.Session(config=config) as sess:
        # 4.Instantiate Model
        fast_text=fastText(FLAGS.label_size, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate,FLAGS.num_sampled,FLAGS.sentence_len,vocab_size,FLAGS.embed_size,FLAGS.is_training)
        saver=tf.train.Saver()
        if os.path.exists(FLAGS.ckpt_dir+"checkpoint"):
            print("Restoring Variables from Checkpoint")
            saver.restore(sess,tf.train.latest_checkpoint(FLAGS.ckpt_dir))
        else:
            print("Can't find the checkpoint.going to stop")
            return
        # 5.feed data, to get logits
github mindgarage / Ovation / datasets / __init__.py View on Github external
def padseq(data, pad=0, raw=False):
    if pad == 0:
        return data
    elif raw:
        padded_data = []
        for d in data:
            diff = pad - len(d)
            if diff > 0:
                pads = ['PAD'] * diff
                d = d + pads
                padded_data.append(d[:pad])
            else:
                padded_data.append(d[:pad])
        return padded_data
    else:
        return tflearn.data_utils.pad_sequences(data, maxlen=pad,
                dtype='int32', padding='post', truncating='post', value=0)
github brightmart / text_classification / a07_Transformer / a2_predict.py View on Github external
# 1.load data with vocabulary of words and labels
    vocabulary_word2index, vocabulary_index2word = create_voabulary(word2vec_model_path=FLAGS.word2vec_model_path,name_scope="transformer")  # simple='simple'
    vocab_size = len(vocabulary_word2index)
    print("transformer.vocab_size:", vocab_size)
    vocabulary_word2index_label, vocabulary_index2word_label = create_voabulary_label(name_scope="transformer",use_seq2seq=True)
    questionid_question_lists=load_final_test_data(FLAGS.predict_source_file)
    test= load_data_predict(vocabulary_word2index,vocabulary_word2index_label,questionid_question_lists)
    testX=[]
    question_id_list=[]
    for tuple in test:
        question_id,question_string_list=tuple
        question_id_list.append(question_id)
        testX.append(question_string_list)
    # 2.Data preprocessing: Sequence padding
    print("start padding....")
    testX2 = pad_sequences(testX, maxlen=FLAGS.sequence_length, value=0.)  # padding to max length
    print("end padding...")
   # 3.create session.
    config=tf.ConfigProto()
    config.gpu_options.allow_growth=True
    with tf.Session(config=config) as sess:
        # 4.Instantiate Model
        model=Transformer(FLAGS.num_classes, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate, FLAGS.sequence_length,
                 vocab_size, FLAGS.embed_size,FLAGS.d_model,FLAGS.d_k,FLAGS.d_v,FLAGS.h,FLAGS.num_layer,FLAGS.is_training,decoder_sent_length=FLAGS.decoder_sent_length,l2_lambda=FLAGS.l2_lambda)
        saver=tf.train.Saver()
        if os.path.exists(FLAGS.ckpt_dir+"checkpoint"):
            print("Restoring Variables from Checkpoint")
            saver.restore(sess,tf.train.latest_checkpoint(FLAGS.ckpt_dir))
        else:
            print("Can't find the checkpoint.going to stop")
            return
        # 5.feed data, to get logits
github darksigma / Fundamentals-of-Deep-Learning-Book / archive / read_imdb_data.py View on Github external
import tflearn
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb
import numpy as np

# IMDB Dataset loading
train, test, _ = imdb.load_data(path='data/imdb.pkl', n_words=30000,
                                valid_portion=0.1)
trainX, trainY = train
testX, testY = test

# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=500, value=0.)
testX = pad_sequences(testX, maxlen=500, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)



class IMDBDataset():
    def __init__(self, X, Y):
        self.num_examples = len(X)
        self.inputs = X
        self.tags = Y
        self.ptr = 0


    def minibatch(self, size):
github sliderSun / pynlp / text-classification / a01_FastText / old_single_label / p5_fastTextB_train.py View on Github external
vocabulary_word2index, vocabulary_index2word = create_voabulary()
        vocab_size = len(vocabulary_word2index)
        vocabulary_word2index_label,_ = create_voabulary_label()
        train, test, _ = load_data(vocabulary_word2index, vocabulary_word2index_label,data_type='train')
        trainX, trainY = train
        testX, testY = test
        print("testX.shape:", np.array(testX).shape)  # 2500个list.每个list代表一句话
        print("testY.shape:", np.array(testY).shape)  # 2500个label
        print("testX[0]:", testX[0])  # [17, 25, 10, 406, 26, 14, 56, 61, 62, 323, 4]
        print("testX[1]:", testX[1]);
        print("testY[0]:", testY[0])  # 0 ;print("testY[1]:",testY[1]) #0

        # 2.Data preprocessing
        # Sequence padding
        print("start padding & transform to one hot...")
        trainX = pad_sequences(trainX, maxlen=FLAGS.sentence_len, value=0.)  # padding to max length
        testX = pad_sequences(testX, maxlen=FLAGS.sentence_len, value=0.)  # padding to max length
        ###############################################################################################
        #with open(FLAGS.cache_path, 'w') as data_f: #save data to cache file, so we can use it next time quickly.
        #    pickle.dump((trainX,trainY,testX,testY,vocabulary_index2word),data_f)
        ###############################################################################################
    print("testX[0]:", testX[0]) ;print("testX[1]:", testX[1]); #[17, 25, 10, 406, 26, 14, 56, 61, 62, 323, 4]
    # Converting labels to binary vectors
    print("testY[0]:", testY[0])  # 0 ;print("testY[1]:",testY[1]) #0
    print("end padding & transform to one hot...")
    #2.create session.
    config=tf.ConfigProto()
    config.gpu_options.allow_growth=True
    with tf.Session(config=config) as sess:
        #Instantiate Model
        fast_text=fastText(FLAGS.label_size, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate,FLAGS.num_sampled,FLAGS.sentence_len,vocab_size,FLAGS.embed_size,FLAGS.is_training)
        #Initialize Save
github brightmart / text_classification / a03_TextRNN / p8_TextRNN_predict.py View on Github external
def main(_):
    # 1.load data with vocabulary of words and labels
    vocabulary_word2index, vocabulary_index2word = create_voabulary(simple='simple',word2vec_model_path=FLAGS.word2vec_model_path,name_scope="rnn")
    vocab_size = len(vocabulary_word2index)
    vocabulary_word2index_label, vocabulary_index2word_label = create_voabulary_label(name_scope="rnn")
    questionid_question_lists=load_final_test_data(FLAGS.predict_source_file)
    test= load_data_predict(vocabulary_word2index,vocabulary_word2index_label,questionid_question_lists)
    testX=[]
    question_id_list=[]
    for tuple in test:
        question_id,question_string_list=tuple
        question_id_list.append(question_id)
        testX.append(question_string_list)
    # 2.Data preprocessing: Sequence padding
    print("start padding....")
    testX2 = pad_sequences(testX, maxlen=FLAGS.sequence_length, value=0.)  # padding to max length
    print("end padding...")
   # 3.create session.
    config=tf.ConfigProto()
    config.gpu_options.allow_growth=True
    with tf.Session(config=config) as sess:
        # 4.Instantiate Model
        textRNN=TextRNN(FLAGS.num_classes, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate, FLAGS.sequence_length,
                        vocab_size, FLAGS.embed_size, FLAGS.is_training)
        saver=tf.train.Saver()
        if os.path.exists(FLAGS.ckpt_dir+"checkpoint"):
            print("Restoring Variables from Checkpoint for TextRNN")
            saver.restore(sess,tf.train.latest_checkpoint(FLAGS.ckpt_dir))
        else:
            print("Can't find the checkpoint.going to stop")
            return
        # 5.feed data, to get logits