Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def handle_datapackage(self):
self.datapackage.commit()
self.stats['count_of_rows'] = DumperBase.get_attr(self.datapackage.descriptor, self.datapackage_rowcount)
self.stats['bytes'] = DumperBase.get_attr(self.datapackage.descriptor, self.datapackage_bytes)
self.stats['hash'] = DumperBase.get_attr(self.datapackage.descriptor, self.datapackage_hash)
self.stats['dataset_name'] = self.datapackage.descriptor.get('name')
resource_descriptor = resource.res.descriptor
for descriptor in self.datapackage.descriptor['resources']:
if descriptor['name'] == resource.res.descriptor['name']:
resource_descriptor = descriptor
# File size:
filesize = temp_file.tell()
DumperBase.inc_attr(self.datapackage.descriptor, self.datapackage_bytes, filesize)
DumperBase.inc_attr(resource_descriptor, self.resource_bytes, filesize)
# File Hash:
if self.resource_hash:
hasher = FileDumper.hash_handler(temp_file)
# Update path with hash
if self.add_filehash_to_path:
DumperBase.insert_hash_in_path(resource_descriptor, hasher.hexdigest())
DumperBase.set_attr(resource_descriptor, self.resource_hash, hasher.hexdigest())
# Finalise
filename = temp_file.name
temp_file.close()
self.write_file_to_output(filename, resource.res.source)
os.unlink(filename)
elif isinstance(obj, decimal.Decimal):
return float(obj)
elif isinstance(obj, (list, set)):
return [strize(x) for x in obj]
elif obj is None:
return None
assert False, "Don't know how to handle object %r" % obj
OBJECT_FIXERS = {
'sqlite': [strize, jsonize],
'postgresql': [strize]
}
class SQLDumper(DumperBase):
def __init__(self,
tables,
engine='env://DATAFLOWS_DB_ENGINE',
updated_column=None, updated_id_column=None,
**options):
super(SQLDumper, self).__init__(options)
table_to_resource = tables
if isinstance(engine, str):
if engine.startswith('env://'):
env_var = engine[6:]
engine = os.environ.get(env_var)
if engine is None:
raise ValueError("Couldn't connect to DB - "
"Please set your '%s' environment variable" % env_var)
for descriptor in self.datapackage.descriptor['resources']:
if descriptor['name'] == resource.res.descriptor['name']:
resource_descriptor = descriptor
# File size:
filesize = temp_file.tell()
DumperBase.inc_attr(self.datapackage.descriptor, self.datapackage_bytes, filesize)
DumperBase.inc_attr(resource_descriptor, self.resource_bytes, filesize)
# File Hash:
if self.resource_hash:
hasher = FileDumper.hash_handler(temp_file)
# Update path with hash
if self.add_filehash_to_path:
DumperBase.insert_hash_in_path(resource_descriptor, hasher.hexdigest())
DumperBase.set_attr(resource_descriptor, self.resource_hash, hasher.hexdigest())
# Finalise
filename = temp_file.name
temp_file.close()
self.write_file_to_output(filename, resource.res.source)
os.unlink(filename)
import os
import json
import tempfile
import hashlib
from datapackage import Resource
from .dumper_base import DumperBase
from .file_formats import CSVFormat, JSONFormat
class FileDumper(DumperBase):
def __init__(self, options):
super(FileDumper, self).__init__(options)
self.force_format = options.get('force_format', True)
self.forced_format = options.get('format', 'csv')
self.temporal_format_property = options.get('temporal_format_property', None)
self.use_titles = options.get('use_titles', False)
def process_datapackage(self, datapackage):
datapackage = \
super(FileDumper, self).process_datapackage(datapackage)
self.file_formatters = {}
# Make sure all resources are proper CSVs
resource: Resource = None
schema_validator(resource.res, resource,
**self.schema_validator_options)
)
)
ret = self.row_counter(resource, ret)
yield ret
# Calculate datapackage hash
if self.datapackage_hash:
datapackage_hash = hashlib.md5(
json.dumps(self.datapackage.descriptor,
indent=2 if self.pretty_descriptor else None,
sort_keys=True,
ensure_ascii=True).encode('ascii')
).hexdigest()
DumperBase.set_attr(self.datapackage.descriptor, self.datapackage_hash, datapackage_hash)
self.handle_datapackage()
self.finalize()
def __init__(self, options={}):
super(DumperBase, self).__init__()
counters = options.get('counters', {})
self.datapackage_rowcount = counters.get('datapackage-rowcount', 'count_of_rows')
self.datapackage_bytes = counters.get('datapackage-bytes', 'bytes')
self.datapackage_hash = counters.get('datapackage-hash', 'hash')
self.resource_rowcount = counters.get('resource-rowcount', 'count_of_rows')
self.resource_bytes = counters.get('resource-bytes', 'bytes')
self.resource_hash = counters.get('resource-hash', 'hash')
self.add_filehash_to_path = options.get('add_filehash_to_path', False)
self.pretty_descriptor = options.get('pretty_descriptor', True)
self.schema_validator_options = options.get('validator_options', {})