How to use the reader.data_reader function in reader

To help you get started, we’ve selected a few reader examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github PaddlePaddle / paddle-ce-latest-kpis / ce_models / __sequence_tagging_for_ner / infer.py View on Github external
def infer(model_path, batch_size, test_data_file, vocab_file, target_file,
          use_gpu):
    """
    use the model under model_path to predict the test data, the result will be printed on the screen

    return nothing
    """
    word_dict = load_dict(vocab_file)
    word_reverse_dict = load_reverse_dict(vocab_file)

    label_dict = load_dict(target_file)
    label_reverse_dict = load_reverse_dict(target_file)

    test_data = paddle.batch(
        reader.data_reader(test_data_file, word_dict, label_dict),
        batch_size=batch_size)
    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        [inference_program, feed_target_names,
         fetch_targets] = fluid.io.load_inference_model(model_path, exe)
        for data in test_data():
            word = to_lodtensor(map(lambda x: x[0], data), place)
            mark = to_lodtensor(map(lambda x: x[1], data), place)
            target = to_lodtensor(map(lambda x: x[2], data), place)
            crf_decode = exe.run(
                inference_program,
                feed={"word": word,
                      "mark": mark,
github PaddlePaddle / paddle-ce-latest-kpis / ce_models / __sequence_tagging_for_ner / train.py View on Github external
sgd_optimizer.minimize(avg_cost)

    crf_decode = fluid.layers.crf_decoding(
        input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))

    chunk_evaluator = fluid.evaluator.ChunkEvaluator(
        input=crf_decode,
        label=target,
        chunk_scheme="IOB",
        num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))

    train_reader = paddle.batch(
            reader.data_reader(train_data_file, word_dict, label_dict),
        batch_size=BATCH_SIZE, drop_last=False)
    test_reader = paddle.batch(
            reader.data_reader(test_data_file, word_dict, label_dict),
        batch_size=BATCH_SIZE, drop_last=False)

    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    feeder = fluid.DataFeeder(feed_list=[word, mark, target], place=place)
    exe = fluid.Executor(place)

    exe.run(fluid.default_startup_program())

    embedding_name = 'emb'
    embedding_param = fluid.global_scope().find_var(embedding_name).get_tensor(
    )
    embedding_param.set(word_vector_values, place)

    batch_id = 0
    total_time = 0.0
    for pass_id in xrange(num_passes):
github shibing624 / text-classifier / text-classifier / classic / train.py View on Github external
def train(model_type, data_path=None, pr_figure_path=None,
          model_save_path=None, vectorizer_path=None, col_sep=',',
          thresholds=0.5, num_classes=2):
    data_content, data_lbl = data_reader(data_path, col_sep)
    # data feature
    data_tfidf = tfidf(data_content)
    # save data feature
    dump_pkl(data_tfidf, vectorizer_path)
    # label
    data_label = label_encoder(data_lbl)
    X_train, X_val, y_train, y_val = train_test_split(
        data_tfidf, data_label, test_size=0.1, random_state=42)
    model = get_model(model_type)
    # fit
    model.fit(X_train, y_train)
    # save model
    dump_pkl(model, model_save_path)
    # evaluate
    eval(model, X_val, y_val, thresholds=thresholds, num_classes=num_classes,
         model_type=model_type, pr_figure_path=pr_figure_path)
github PaddlePaddle / models / legacy / globally_normalized_reader / infer.py View on Github external
paddle.init(use_gpu=use_gpu, trainer_count=trainer_count)

    ids_2_word = load_reverse_dict(config.dict_path)

    outputs = GNR(config, is_infer=True)

    # load the trained models
    parameters = paddle.parameters.Parameters.from_tar(
        gzip.open(model_path, "r"))
    logger.info("loading parameter is done.")

    inferer = paddle.inference.Inference(
        output_layer=outputs, parameters=parameters)

    _, valid_samples = choose_samples(data_dir)
    test_reader = reader.data_reader(valid_samples, is_train=False)

    test_batch = []
    for i, item in enumerate(test_reader()):
        test_batch.append(item)
        if len(test_batch) == batch_size:
            infer_a_batch(inferer, test_batch, ids_2_word, len(outputs))
            test_batch = []

    if len(test_batch):
        infer_a_batch(inferer, test_batch, ids_2_word, len(outputs))
        test_batch = []
github PaddlePaddle / models / legacy / sequence_tagging_for_ner / infer.py View on Github external
# initialize PaddlePaddle
    paddle.init(use_gpu=False, trainer_count=1)
    parameters = paddle.parameters.Parameters.from_tar(
        gzip.open(model_path, "r"))

    predict = ner_net(
        word_dict_len=word_dict_len,
        label_dict_len=label_dict_len,
        is_train=False)

    inferer = paddle.inference.Inference(
        output_layer=predict, parameters=parameters)

    test_data = []
    for i, item in enumerate(
            reader.data_reader(test_data_file, word_dict, label_dict)()):
        test_data.append([item[0], item[1]])
        if len(test_data) == batch_size:
            _infer_a_batch(inferer, test_data, word_reverse_dict,
                           label_reverse_dict)
            test_data = []

    _infer_a_batch(inferer, test_data, word_reverse_dict, label_reverse_dict)
    test_data = []
github PaddlePaddle / models / conv_seq_to_seq / infer.py View on Github external
prob = conv_seq2seq(
        src_dict_size=src_dict_size,
        trg_dict_size=trg_dict_size,
        pos_size=pos_size,
        emb_dim=emb_dim,
        enc_conv_blocks=enc_conv_blocks,
        dec_conv_blocks=dec_conv_blocks,
        drop_rate=drop_rate,
        is_infer=True)

    # load parameters
    parameters = paddle.parameters.Parameters.from_tar(gzip.open(model_path))

    padding_list = [context_len - 1 for (size, context_len) in dec_conv_blocks]
    padding_num = reduce(lambda x, y: x + y, padding_list)
    infer_reader = reader.data_reader(
        data_file=infer_data_path,
        src_dict=src_dict,
        trg_dict=trg_dict,
        pos_size=pos_size,
        padding_num=padding_num)

    inferer = paddle.inference.Inference(
        output_layer=prob, parameters=parameters)

    searcher = BeamSearch(
        inferer=inferer,
        trg_dict=trg_dict,
        pos_size=pos_size,
        padding_num=padding_num,
        max_len=max_len,
        beam_size=beam_size)
github PaddlePaddle / models / conv_seq_to_seq / train.py View on Github external
train_reader = paddle.batch(
        reader=paddle.reader.shuffle(
            reader=reader.data_reader(
                data_file=train_data_path,
                src_dict=src_dict,
                trg_dict=trg_dict,
                pos_size=pos_size,
                padding_num=padding_num),
            buf_size=10240),
        batch_size=batch_size)

    test_reader = None
    if test_data_path:
        test_reader = paddle.batch(
            reader=paddle.reader.shuffle(
                reader=reader.data_reader(
                    data_file=test_data_path,
                    src_dict=src_dict,
                    trg_dict=trg_dict,
                    pos_size=pos_size,
                    padding_num=padding_num),
                buf_size=10240),
            batch_size=batch_size)

    return train_reader, test_reader
github PaddlePaddle / models / legacy / globally_normalized_reader / train.py View on Github external
"""Build the data reader for this model.

    Arguments:
        - data_dir:   The path of training data.
        - batch_size:   batch size for the training task.
    """
    train_samples, valid_samples = choose_samples(data_dir)

    train_reader = paddle.batch(
        paddle.reader.shuffle(
            reader.data_reader(train_samples), buf_size=102400),
        batch_size=batch_size)

    # testing data is not shuffled
    test_reader = paddle.batch(
        reader.data_reader(
            valid_samples, is_train=False),
        batch_size=batch_size)
    return train_reader, test_reader, len(train_samples)
github PaddlePaddle / models / fluid / PaddleNLP / sequence_tagging_for_ner / train.py View on Github external
num_correct_chunks) = fluid.layers.chunk_eval(
         input=crf_decode,
         label=target,
         chunk_scheme="IOB",
         num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))
    chunk_evaluator = fluid.metrics.ChunkEvaluator()

    inference_program = fluid.default_main_program().clone(for_test=True)
    test_fetch_list = [num_infer_chunks, num_label_chunks, num_correct_chunks]
    sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
    sgd_optimizer.minimize(avg_cost)

    if "CE_MODE_X" not in os.environ:
        train_reader = paddle.batch(
            paddle.reader.shuffle(
                reader.data_reader(train_data_file, word_dict, label_dict),
                buf_size=20000),
            batch_size=batch_size)
        test_reader = paddle.batch(
            paddle.reader.shuffle(
                reader.data_reader(test_data_file, word_dict, label_dict),
                buf_size=20000),
            batch_size=batch_size)
    else:
        train_reader = paddle.batch(
            reader.data_reader(train_data_file, word_dict, label_dict),
            batch_size=batch_size)
        test_reader = paddle.batch(
            reader.data_reader(test_data_file, word_dict, label_dict),
            batch_size=batch_size)

    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()