Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
for org in add_progress_bar(
orgs, 'Part 1/2' if include_sub_organizations else None):
archivals = (model.Session.query(Archival)
.filter(Archival.is_broken == True) # noqa
.join(model.Package, Archival.package_id == model.Package.id)
.filter(model.Package.owner_org == org.id)
.filter(model.Package.state == 'active')
.join(model.Resource, Archival.resource_id == model.Resource.id)
.filter(model.Resource.state == 'active'))
broken_resources = archivals.count()
broken_datasets = archivals.distinct(model.Package.id).count()
num_datasets = model.Session.query(model.Package)\
.filter_by(owner_org=org.id)\
.filter_by(state='active')\
.count()
num_resources = model.Session.query(model.Package)\
.filter_by(owner_org=org.id)\
.filter_by(state='active')
if p.toolkit.check_ckan_version(max_version='2.2.99'):
num_resources = num_resources.join(model.ResourceGroup)
num_resources = num_resources \
.join(model.Resource)\
.filter_by(state='active')\
.count()
counts[org.name] = {
'organization_title': org.title,
'broken_packages': broken_datasets,
'broken_resources': broken_resources,
'packages': num_datasets,
'resources': num_resources
}
def upload_multipart(context, data_dict):
h.check_access('cloudstorage_upload_multipart', data_dict)
upload_id, part_number, part_content = toolkit.get_or_bust(
data_dict, ['uploadId', 'partNumber', 'upload'])
uploader = ResourceCloudStorage({})
upload = model.Session.query(MultipartUpload).get(upload_id)
resp = uploader.driver.connection.request(
_get_object_url(
uploader, upload.name) + '?partNumber={0}&uploadId={1}'.format(
part_number, upload_id),
method='PUT',
data=bytearray(_get_underlying_file(part_content).read())
)
if resp.status != 200:
raise toolkit.ValidationError('Upload failed: part %s' % part_number)
_save_part_info(part_number, resp.headers['etag'], upload)
return {
'partNumber': part_number,
'ETag': resp.headers['etag']
}
os.unlink(archived_path)
self.log.info("Unlinked {0}".format(archived_path))
os.rmdir(root)
self.log.info("Unlinked {0}".format(root))
writer.writerow([m.groups(0)[0], archived_path, "Resource not found, file deleted"])
except Exception, e:
self.log.error("Failed to unlink {0}: {1}".format(archived_path, e))
else:
writer.writerow([m.groups(0)[0], archived_path, "Resource not found"])
continue
print "General info:"
print " Permission error reading file: {0}".format(perm_error)
print " file on disk but no resource: {0}".format(file_no_resource)
print " Total resources: {0}".format(model.Session.query(model.Resource).count())
print "Active resource info:"
print " No cache_filepath: {0}".format(not_cached_active)
print " cache_filepath not on disk: {0}".format(file_not_found_active)
print "Deleted resource info:"
print " No cache_filepath: {0}".format(not_cached_deleted)
print " cache_filepath not on disk: {0}".format(file_not_found_deleted)
('reason', archival.reason),
('status', archival.status),
('failure_count', archival.failure_count),
))
results.append(row_data)
num_broken_packages = archivals.distinct(model.Package.name).count()
num_broken_resources = len(results)
# Get total number of packages & resources
num_packages = model.Session.query(model.Package)\
.filter(model.Package.owner_org.in_(org_ids))\
.filter_by(state='active')\
.count()
num_resources = model.Session.query(model.Resource)\
.filter_by(state='active')
if p.toolkit.check_ckan_version(max_version='2.2.99'):
num_resources = num_resources.join(model.ResourceGroup)
num_resources = num_resources \
.join(model.Package)\
.filter(model.Package.owner_org.in_(org_ids))\
.filter_by(state='active').count()
return {'organization_name': name,
'organization_title': title,
'num_broken_packages': num_broken_packages,
'num_broken_resources': num_broken_resources,
'num_packages': num_packages,
'num_resources': num_resources,
'broken_package_percent': lib.percent(num_broken_packages, num_packages),
'broken_resource_percent': lib.percent(num_broken_resources, num_resources),
if isinstance(upload_field_storage, (ALLOWED_UPLOAD_TYPES)):
self.filename = munge.munge_filename(upload_field_storage.filename)
self.file_upload = _get_underlying_file(upload_field_storage)
resource['url'] = self.filename
resource['url_type'] = 'upload'
elif multipart_name and self.can_use_advanced_aws:
# This means that file was successfully uploaded and stored
# at cloud.
# Currently implemented just AWS version
resource['url'] = munge.munge_filename(multipart_name)
resource['url_type'] = 'upload'
elif self._clear and resource.get('id'):
# Apparently, this is a created-but-not-commited resource whose
# file upload has been canceled. We're copying the behaviour of
# ckaenxt-s3filestore here.
old_resource = model.Session.query(
model.Resource
).get(
resource['id']
)
self.old_filename = old_resource.url
resource['url_type'] = ''
new_dir_regex = re.compile(r'(.*)/[a-f0-9]{2}/[a-f0-9\-]{36}/[^/]*$')
for resource in model.Session.query(model.Resource).\
filter(model.Resource.state != model.State.DELETED):
if not resource.cache_url or resource.cache_url == 'None':
continue
if new_dir_regex.match(resource.cache_url):
print 'Resource with new url already: %s' % resource.cache_url
continue
match = old_dir_regex.match(resource.cache_url)
if not match:
print 'ERROR Could not match url: %s' % resource.cache_url
continue
url_base, res_id, filename = match.groups()
# check the package isn't deleted
# Need to refresh the resource's session
resource = model.Session.query(model.Resource).get(resource.id)
if p.toolkit.check_ckan_version(max_version='2.2.99'):
package = None
if resource.resource_group:
package = resource.resource_group.package
else:
package = resource.package
if package and package.state == model.State.DELETED:
print 'Package is deleted'
continue
if url_base != site_url_base:
print 'ERROR Base URL is incorrect: %r != %r' % (url_base, site_url_base)
continue
# move the file
def view(self, package_ref=None):
from ckan import model
from ckanext.archiver.model import Archival
r_q = model.Session.query(model.Resource).filter_by(state='active')
print 'Resources: %i total' % r_q.count()
a_q = model.Session.query(Archival)
print 'Archived resources: %i total' % a_q.count()
num_with_cache_url = a_q.filter(Archival.cache_url != '').count()
print ' %i with cache_url' % num_with_cache_url
last_updated_res = a_q.order_by(Archival.updated.desc()).first()
print 'Latest archival: %s' % (last_updated_res.updated.strftime('%Y-%m-%d %H:%M') if last_updated_res else '(no)')
if package_ref:
pkg = model.Package.get(package_ref)
print 'Package %s %s' % (pkg.name, pkg.id)
for res in pkg.resources:
print 'Resource %s' % res.id
for archival in a_q.filter_by(resource_id=res.id):
print '* %r' % archival
def all_organizations(include_none=False):
'''Yields all the organization names, and also None if requested. Useful
when assembling option_combinations'''
from ckan import model
if include_none:
yield None
organizations = model.Session.query(model.Group).\
filter(model.Group.type=='organization').\
filter(model.Group.state=='active').order_by('name')
for organization in organizations:
yield organization.name
or res.mimetype):
add_stat('No archive data', res, stats)
continue
for field_name in ('status_id', 'is_broken', 'reason',
'last_success', 'first_failure',
'failure_count', 'url_redirected_to',
'updated', 'created'):
fields[field_name] = None
fields['cache_filepath'] = res.extras.get('cache_filepath')
fields['cache_url'] = res.cache_url
fields['hash'] = res.hash
fields['size'] = res.size
fields['mimetype'] = res.mimetype
revisions_with_hash = model.Session.query(model.ResourceRevision)\
.filter_by(id=res.id)\
.order_by(model.ResourceRevision.revision_timestamp)\
.filter(model.ResourceRevision.hash != '').all()
if revisions_with_hash:
# these are not perfect by not far off
fields['created'] = revisions_with_hash[0].revision_timestamp
fields['resource_timestamp'] = revisions_with_hash[-1].revision_timestamp
else:
fields['created'] = min(fields['updated'] or END_OF_TIME,
fields['first_failure'] or END_OF_TIME,
fields['last_success'] or END_OF_TIME)
fields['resource_timestamp'] = max(
fields['updated'] or START_OF_TIME,
fields['first_failure'] or START_OF_TIME,
fields['last_success'] or START_OF_TIME)