How to use the parlai.core.build_data.remove_dir function in parlai

To help you get started, we’ve selected a few parlai examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github facebookresearch / ParlAI / parlai / tasks / mutualfriends / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'MutualFriends')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / woz / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'WoZ')
    version = 'None'

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        fnames = ['woz_test_en.json', 'woz_train_en.json', 'woz_validate_en.json']
        for fname in fnames:
            url = (
                'https://github.com/nmrksic/'
                'neural-belief-tracker/raw/master/data/woz/' + fname
            )
            build_data.download(url, dpath, fname)

    # Mark the data as built.
    build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / squad2 / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'SQuAD2')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / talkthewalk / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'TalkTheWalk')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        fname = 'talkthewalk.tgz'
        url = 'https://dl.fbaipublicfiles.com/parlai/projects/talkthewalk/' + fname
        build_data.download(url, dpath, fname)
        build_data.untar(dpath, fname)

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / qazre / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'QA-ZRE')
    version = None

    if not build_data.built(dpath, version_string=version):

        print('[building data: ' + dpath + ']')

        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / nlvr / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'nlvr')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')

        # make a clean directory if needed
        if build_data.built(dpath):
            # an older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data from github
        fname = 'nlvr.zip'
        url = 'https://github.com/clic-lab/nlvr/' 'archive/master.zip'
        print('[downloading data from: ' + url + ']')
        build_data.download(url, dpath, fname)
        build_data.untar(dpath, fname)

        # Mark as done
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / opensubtitles / build_2018.py View on Github external
def build(datapath, use_history):
    dpath = os.path.join(datapath, 'OpenSubtitles2018')
    if not use_history:
        dpath += '_no_history'
    version = '1'

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        untar_path = os.path.join(dpath, 'OpenSubtitles', 'xml', 'en')

        if len(glob.glob(untar_path + '/*/*/*.xml')) != NUM_SUBTITLES_FILES:
            # Download the data.
            url = 'https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/xml/en.zip'
            build_data.download(url, dpath, 'OpenSubtitles2018.zip')
            build_data.untar(dpath, 'OpenSubtitles2018.zip')

        create_fb_format(untar_path, dpath, use_history)

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)
    return dpath
github facebookresearch / ParlAI / parlai / tasks / taskmaster / build.py View on Github external
def build(opt):
    # get path to data directory
    dpath = os.path.join(opt['datapath'], 'taskmaster-1')
    # define version if any
    version = "1.0"

    # check if data had been previously built
    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')

        # make a clean directory if needed
        if build_data.built(dpath):
            # an older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        # mark the data as built
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / cnn_dm / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'CNN_DM')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        for i, f in enumerate(RESOURCES[2:5]):
            dt = data_type[i]
            urls_fname = os.path.join(dpath, f.file_name)
            split_fname = os.path.join(dpath, dt + '.txt')
            with open(urls_fname) as urls_file, open(split_fname, 'a') as split_file:
                for url in urls_file:
                    file_name = hashlib.sha1(url.strip().encode('utf-8')).hexdigest()
                    split_file.write("cnn/stories/{}.story\n".format(file_name))
github facebookresearch / ParlAI / parlai / tasks / twitter / build.py View on Github external
def build(opt):
    version = 'v1.1'
    dpath = os.path.join(opt['datapath'], 'Twitter')

    if not build_data.built(dpath, version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        fname1 = "twitter_en_big.txt.gz.partaa"
        fname2 = "twitter_en_big.txt.gz.partab"
        url = 'https://github.com/Marsan-Ma/chat_corpus/raw/master/'
        build_data.download(url + fname1, dpath, fname1)
        build_data.download(url + fname2, dpath, fname2)

        file1 = os.path.join(dpath, fname1)
        file2 = os.path.join(dpath, fname2)
        file3 = "twitter_en_big.txt.gz"
        outzipfile = os.path.join(dpath, file3)
        build_data.cat(file1, file2, outzipfile)

        import gzip