How to use the parlai.core.build_data.built function in parlai

To help you get started, we’ve selected a few parlai examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github facebookresearch / ParlAI / parlai / tasks / booktest / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'BookTest')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / projects / memnn_feedback / tasks / dbll_babi / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'DBLL')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        fname = 'dbll.tgz'
        url = 'https://s3.amazonaws.com/fair-data/parlai/dbll/' + fname
        build_data.download(url, dpath, fname)
        build_data.untar(dpath, fname)

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / taskntalk / build.py View on Github external
def build(opt):
    """Create train and validation data for synthetic shapes described by attributes."""
    dpath = os.path.join(opt['datapath'], 'taskntalk')

    if not build_data.built(dpath):
        print('[building data: ' + dpath + ']')
        build_data.make_dir(os.path.join(dpath, 'large'))
        build_data.make_dir(os.path.join(dpath, 'small'))

        # save training and validation data
        to_save = {
            'attributes': ['color', 'shape', 'style'],
            'task_defn': [
                ['color', 'shape'],
                ['shape', 'color'],
                ['color', 'style'],
                ['style', 'color'],
                ['shape', 'style'],
                ['style', 'shape'],
            ],
        }
github facebookresearch / ParlAI / parlai / tasks / wmt / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'wmt')
    version = 'None'

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        train_r_fnames = ('train.en', 'train.de')
        train_w_fname = 'en_de_train.txt'
        valid_w_fname = 'en_de_valid.txt'
        test_r_fnames = ('newstest2014.en', 'newstest2014.de')
        test_w_fname = 'en_de_test.txt'
github facebookresearch / ParlAI / parlai / tasks / qangaroo / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'qangaroo')
    version = 'v1.1'

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        fname = 'qangaroo.zip'
        g_ID = "1ytVZ4AhubFDOEL7o7XrIRIyhU8g9wvKA"

        print("downloading ...")
        build_data.download_from_google_drive(g_ID, os.path.join(dpath, fname))
        build_data.untar(dpath, fname)

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / mwsc / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'MWSC')
    version = 'None'

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        fname = 'schema.txt'
        url = (
            'https://raw.githubusercontent.com/salesforce/decanlp/'
            'd594b2bf127e13d0e61151b6a2af3bf63612f380/local_data/' + fname
        )
        build_data.download(url, dpath, fname)

        pattern = '\\[.*\\]'
github facebookresearch / ParlAI / parlai / tasks / vqa_v1 / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'VQA-v1')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        fname1 = 'Questions_Train_mscoco.zip'
        fname2 = 'Questions_Val_mscoco.zip'
        fname3 = 'Questions_Test_mscoco.zip'

        fname4 = 'Annotations_Val_mscoco.zip'
        fname5 = 'Annotations_Train_mscoco.zip'

        url = 'http://visualqa.org/data/mscoco/vqa/'
        build_data.download(url + fname1, dpath, fname1)
        build_data.download(url + fname2, dpath, fname2)
        build_data.download(url + fname3, dpath, fname3)
github facebookresearch / ParlAI / parlai / tasks / ccpe / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'CCPE')
    version = '1.0'

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / personality_captions / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'personality_captions')
    image_path = os.path.join(opt['datapath'], 'yfcc_images')
    version = '2.0'
    if not build_data.built(dpath, version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        build_data.mark_done(dpath, version)

    if not build_data.built(image_path, version) and not opt.get('yfcc_path'):
        download_images(opt)
github facebookresearch / ParlAI / parlai / tasks / coqa / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'CoQA')
    version = VERSION

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        with open(os.path.join(dpath, 'coqa-train-v1.0.json')) as f:
            data = json.load(f)['data']
            make_parlai_format(dpath, 'train', data)

        with open(os.path.join(dpath, 'coqa-dev-v1.0.json')) as f:
            data = json.load(f)['data']