How to use the dataset.DataSet function in dataset

To help you get started, we’ve selected a few dataset examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github xueyouluo / fsauor2018 / main.py View on Github external
def inference(flags):
    print_out("inference data file {0}".format(flags.data_files))
    dataset = DataSet(flags.data_files, flags.vocab_file, flags.label_file, flags.batch_size, reverse=flags.reverse, split_word=flags.split_word, max_len=flags.max_len)
    hparams = load_hparams(flags.checkpoint_dir,{"mode":'inference','checkpoint_dir':flags.checkpoint_dir+"/best_eval",'embed_file':None})
    with tf.Session(config = get_config_proto(log_device_placement=False)) as sess:
        model = Model(hparams)
        model.build()
        
        try:
            model.restore_model(sess)  #restore best solution
        except Exception as e:
            print("unable to restore model with exception",e)
            exit(1)

        scalars = model.scalars.eval(session=sess)
        print("Scalars:", scalars)
        weight = model.weight.eval(session=sess)
        print("Weight:",weight)
        cnt = 0
github xueyouluo / fsauor2018 / main.py View on Github external
def train_clf(flags):
    dataset = DataSet(flags.data_files, flags.vocab_file, flags.label_file, flags.batch_size, reverse=flags.reverse, split_word=flags.split_word, max_len=flags.max_len)
    eval_dataset = DataSet(flags.eval_files, flags.vocab_file, flags.label_file, 5 * flags.batch_size, reverse=flags.reverse, split_word=flags.split_word, max_len=flags.max_len)

    params = vars(flags)
    params['vocab_size'] = len(dataset.w2i)
    hparams = convert_to_hparams(params)

    save_hparams(flags.checkpoint_dir, hparams)
    print(hparams)

    train_graph = tf.Graph()
    eval_graph = tf.Graph()

    with train_graph.as_default():
        train_model = Model(hparams)
        train_model.build()
        initializer = tf.global_variables_initializer()
github ebonilla / AutoGP / autogp / datasets / mnist.py View on Github external
train_images = train_images[validation_size:]
    train_labels = train_labels[validation_size:]

    # process images
    train_images = process_mnist(train_images)
    validation_images = process_mnist(validation_images)
    test_images = process_mnist(test_images)

    # standardize data
    train_mean, train_std = get_data_info(train_images)
    train_images = standardize_data(train_images, train_mean, train_std)
    validation_images = standardize_data(validation_images, train_mean, train_std)
    test_images = standardize_data(test_images, train_mean, train_std)

    data = DataSet(train_images, train_labels)
    test = DataSet(test_images, test_labels)
    val = DataSet(validation_images, validation_labels)

    return data, test, val
github happynoom / DeepTrade_keras / gossip.py View on Github external
def read_ultimate(path, input_shape):
    ultimate_features = numpy.loadtxt(path + "ultimate_feature." + str(input_shape[0]))
    ultimate_features = numpy.reshape(ultimate_features, [-1, input_shape[0], input_shape[1]])
    ultimate_labels = numpy.loadtxt(path + "ultimate_label." + str(input_shape[0]))
    # ultimate_labels = numpy.reshape(ultimate_labels, [-1, 1])
    train_set = DataSet(ultimate_features, ultimate_labels)
    test_features = numpy.loadtxt(path + "ultimate_feature.test." + str(input_shape[0]))
    test_features = numpy.reshape(test_features, [-1, input_shape[0], input_shape[1]])
    test_labels = numpy.loadtxt(path + "ultimate_label.test." + str(input_shape[0]))
    # test_labels = numpy.reshape(test_labels, [-1, 1])
    test_set = DataSet(test_features, test_labels)
    return train_set, test_set
github pcyin / NL2code / dataset.py View on Github external
if is_builtin_type(rule.parent.type):
                assert rule.parent.label is None
                assert len(rule.children) == 1
                terminal_val = rule.children[0].label

                terminal_str = str(terminal_val)
                # print idx, terminal_str
                terminal_tokens = get_terminal_tokens(terminal_str)

                for terminal_token in terminal_tokens:
                    assert len(terminal_token) > 0
                    terminal_token_seq.append(terminal_token)

    terminal_vocab = gen_vocab(terminal_token_seq, vocab_size=4830, freq_cutoff=5)

    train_data = DataSet(annot_vocab, terminal_vocab, grammar, 'train_data')
    dev_data = DataSet(annot_vocab, terminal_vocab, grammar, 'dev_data')
    test_data = DataSet(annot_vocab, terminal_vocab, grammar, 'test_data')

    all_examples = []

    can_fully_gen_num = 0

    # second pass
    for entry in data:
        idx = entry['id']
        query_tokens = entry['query_tokens']
        code = entry['code']
        str_map = entry['str_map']

        parse_tree = parse(code)
        rule_list = parse_tree.get_rule_list(include_leaf=True, leaf_val=True)
github Ekim-Yurtsever / DeepTL-Lane-Change-Classification / example_for_naren.py View on Github external
from dataset import DataSet
from keras.optimizers import Adam
from models import Models


data = DataSet()
data.read_features(feature_size=2048, feature_path='/media/ekim-hpc/hdd1/lane_change_risk_detection/extracted_features/res_net_50_imagenet', number_of_frames=20)
data.read_risk_data("LCTable.csv")
data.convert_risk_to_one_hot(risk_threshold=0.05)
#data = DataSet.loader("/media/ekim-hpc/hdd1/lane_change_risk_detection/saved data/dataset_resnet_features_5percent.pickle")
filename = "resnet_f_03"

timesteps = data.video_features.shape[1]
nb_samples = data.video_features.shape[0]
nb_features = data.video_features.shape[2]

class_weight = {0: 0.05, 1: 0.95}
training_to_all_data_ratio = 0.9
nb_epoch = 100
batch_size = 32
optimizer = Adam(lr=1e-4, decay=1e-2)
github happynoom / DeepTrade_keras / gossip.py View on Github external
train_labels = []
    test_features = []
    test_labels = []
    with open(path, "rb") as fp:
        while True:
            try:
                train_map = pickle.load(fp)
                test_map = pickle.load(fp)
                train_features.extend(train_map["feature"])
                train_labels.extend(train_map["label"])
                test_features.extend(test_map["feature"])
                test_labels.extend(test_map["label"])
                print("read %s successfully. " % train_map["code"])
            except Exception as e:
                break
    return DataSet(numpy.transpose(numpy.asarray(train_features), [0, 2, 1]), numpy.asarray(train_labels)), \
           DataSet(numpy.transpose(numpy.asarray(test_features), [0, 2, 1]), numpy.asarray(test_labels))
github happynoom / DeepTrade_keras / gossip.py View on Github external
def read_separate_feature(path):
    train_sets = {}
    test_sets = {}
    with open(path, "rb") as fp:
        while True:
            try:
                train_map = pickle.load(fp)
                test_map = pickle.load(fp)
                train_sets[train_map["code"]] = DataSet(numpy.transpose(numpy.asarray(train_map["feature"]), [0, 2, 1]),
                                                        numpy.asarray(train_map["label"]))
                test_sets[test_map["code"]] = DataSet(numpy.transpose(numpy.asarray(test_map["feature"]), [0, 2, 1]),
                                                      numpy.asarray(test_map["label"]))
                print("read %s successfully. " % train_map["code"])
            except Exception as e:
                break
    return train_sets, test_sets
github mauriziofilippone / deep_gp_random_features / code / mcmc / compare_variational_mcmc.py View on Github external
def generate_toy_data():

    N = 50
    DATA_X = np.random.uniform(-5.0, 5.0, [N, 1])

    true_log_lambda = -2.0
    true_std = np.exp(true_log_lambda) / 2.0  # 0.1
    DATA_y = f(DATA_X) + np.random.normal(0.0, true_std, [N, 1])

    Xtest = np.asarray(np.arange(-10.0, 10.0, 0.1))
    Xtest = Xtest[:, np.newaxis]
    ytest = f(Xtest) # + np.random.normal(0, true_std, [Xtest.shape[0], 1])

    data = DataSet(DATA_X, DATA_y)
    test = DataSet(Xtest, ytest, shuffle=False)

    return data, test
github chizhanyuefeng / FD-CNN / src / cnn.py View on Github external
with tf.name_scope('loss'):
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_)
        loss = tf.reduce_mean(cross_entropy)
        tf.summary.scalar("loss", loss)

    with tf.name_scope('optimizer'):
        train = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)

    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(y_,1),tf.argmax(y,1))
        correct_prediction = tf.cast(correct_prediction,tf.float32)
        accuracy = tf.reduce_mean(correct_prediction)
        tf.summary.scalar("accuracy", accuracy)

    data = dataset.DataSet('../data/dataset',CLASS_LIST)
    saver = tf.train.Saver()
    merged = tf.summary.merge_all()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        train_writer = tf.summary.FileWriter("../log/", sess.graph)

        for step in range(1, TRAIN_STEP+1):
            batch_x, batch_y = data.next_batch(BATCH_SIZE)
            if step%100==0:
                train_accuracy = accuracy.eval(feed_dict={x: batch_x, y: batch_y, keep_prob: 1.0})
                print('训练第 %d次, 准确率为 %f' % (step, train_accuracy))
                summ = sess.run(merged, feed_dict={x: batch_x, y: batch_y,keep_prob: 1.0})
                train_writer.add_summary(summ, global_step=step)

            train.run(feed_dict={x: batch_x, y: batch_y, keep_prob: 0.5})