Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
be dynamically added to the Layout every time a new Entity is
created. This is implemented by creating a partial function of
the get() function that sets the target argument to the
entity name.
absolute_paths (bool): If True, grabbit uses absolute file paths
everywhere (including when returning query results). If False,
the input path will determine the behavior (i.e., relative if
a relative path was passed, absolute if an absolute path was
passed).
regex_search (bool): Whether to require exact matching (True)
or regex search (False, default) when comparing the query
string to each entity in .get() calls. This sets a default for
the instance, but can be overridden in individual .get()
requests.
"""
self._hdfs_client = Config().get_client()
path = abspath(path) if absolute_paths and self._hdfs_client is None \
else path
# Preprocess the config file
if isinstance(config, six.string_types):
config = '/'.join(config.strip('hdfs://').split('/')[1:])
config = config.replace(self._hdfs_client.root[1:], '')
with self._hdfs_client.read(config) as reader:
config = json.load(reader)
super(HDFSLayout, self).__init__(path, config, dynamic_getters,
absolute_paths, regex_search)
#!/usr/bin/env python
# encoding: utf-8
"""Dataframe extension example."""
from hdfs import Config
from hdfs.ext.dataframe import read_dataframe, write_dataframe
import pandas as pd
# Get the default alias' client.
client = Config().get_client()
# A sample dataframe.
df = pd.DataFrame.from_records([
{'A': 1, 'B': 2},
{'A': 11, 'B': 23}
])
# Write dataframe to HDFS using Avro serialization.
write_dataframe(client, 'data.avro', df, overwrite=True)
# Read the Avro file back from HDFS.
_df = read_dataframe(client, 'data.avro')
# The frames match!
pd.util.testing.assert_frame_equal(df, _df)
#!/usr/bin/env python
# encoding: utf-8
"""Avro extension example."""
from hdfs import Config
from hdfs.ext.avro import AvroReader, AvroWriter
# Get the default alias' client.
client = Config().get_client()
# Some sample data.
records = [
{'name': 'Ann', 'age': 23},
{'name': 'Bob', 'age': 22},
]
# Write an Avro File to HDFS (since our records' schema is very simple, we let
# the writer infer it automatically, otherwise we would pass it as argument).
with AvroWriter(client, 'names.avro', overwrite=True) as writer:
for record in records:
writer.write(record)
# Read it back.
with AvroReader(client, 'names.avro') as reader:
schema = reader.schema # The inferred schema.
# encoding: utf-8
"""Sample HdfsCLI script.
This example shows how to write files to HDFS, read them back, and perform a
few other simple filesystem operations.
"""
from hdfs import Config
from json import dump, load
# Get the default alias' client. (See the quickstart section in the
# documentation to learn more about this.)
client = Config().get_client()
# Some fake data that we are interested in uploading to HDFS.
model = {
'(intercept)': 48.,
'first_feature': 2.,
'second_feature': 12.,
}
# First, we delete any existing `models/` folder on HDFS.
client.delete('models', recursive=True)
# We can now upload the data, first as CSV.
with client.write('models/1.csv', encoding='utf-8') as writer:
for item in model.items():
writer.write(u'%s,%s\n' % item)