How to use the danlp.utils.extract_single_file_from_zip function in danlp

To help you get started, we’ve selected a few danlp examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alexandrainst / danlp / danlp / datasets / sentiment.py View on Github external
def _twitter_data_process_func(tmp_file_path: str, meta_info: dict,
                               cache_dir: str = DEFAULT_CACHE_DIR,
                               clean_up_raw_data: bool = True,
                               verbose: bool = True):
    from zipfile import ZipFile

    twitter_api = construct_twitter_api_connection()
    
    model_name = meta_info['name']
    full_path = os.path.join(cache_dir, model_name) + meta_info['file_extension']

    with ZipFile(tmp_file_path, 'r') as zip_file:  # Extract files to cache_dir
        file_list = zip_file.namelist()
        extract_single_file_from_zip(cache_dir, file_list[0], full_path, zip_file)
    file_path = os.path.join(cache_dir, 'twitter.sentiment' + '.csv')
    df = pd.read_csv(file_path)

    twitter_ids = list(df['twitterid'])
    
    full_t = lookup_tweets(twitter_ids, twitter_api)
    tweet_texts = [[tweet.id, tweet.full_text] for tweet in full_t]
    tweet_ids, t_texts = list(zip(*tweet_texts))
    tweet_texts_df = pd.DataFrame({'twitterid': tweet_ids, 'text': t_texts})

    resulting_df = pd.merge(df, tweet_texts_df)

    dataset_path = os.path.join(cache_dir,
                                meta_info['name'] + meta_info[
                                    'file_extension'])
github alexandrainst / danlp / danlp / download.py View on Github external
full_path = os.path.join(cache_dir, model_name) + meta_info['file_extension']

    
    if verbose:
        print("Unzipping {} ".format(model_name))

    with ZipFile(tmp_file_path, 'r') as zip_file:  # Extract files to cache_dir
        

        file_list = zip_file.namelist()

        if len(file_list) == 1:
            extract_single_file_from_zip(cache_dir, file_list[0], full_path, zip_file)

        elif file_in_zip:
            extract_single_file_from_zip(cache_dir, file_in_zip, full_path, zip_file)

        else:  # Extract all the files to the name of the model/dataset
            destination = os.path.join(cache_dir, meta_info['name'])
            zip_file.extractall(path=destination)
github alexandrainst / danlp / danlp / download.py View on Github external
model_name = meta_info['name']
    
    
    full_path = os.path.join(cache_dir, model_name) + meta_info['file_extension']

    
    if verbose:
        print("Unzipping {} ".format(model_name))

    with ZipFile(tmp_file_path, 'r') as zip_file:  # Extract files to cache_dir
        

        file_list = zip_file.namelist()

        if len(file_list) == 1:
            extract_single_file_from_zip(cache_dir, file_list[0], full_path, zip_file)

        elif file_in_zip:
            extract_single_file_from_zip(cache_dir, file_in_zip, full_path, zip_file)

        else:  # Extract all the files to the name of the model/dataset
            destination = os.path.join(cache_dir, meta_info['name'])
            zip_file.extractall(path=destination)