Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
total_size = int(model.Session.query(func.sum(Archival.size))
.filter(Archival.size > max_size)
.all()[0][0] or 0)
print '{} archivals above the {:,} threshold with total size {:,}'.format(
len(archivals), max_size, total_size)
raw_input('Press Enter to DELETE them')
for archival in archivals:
print 'Deleting %r' % archival
resource = model.Resource.get(archival.resource_id)
if resource.state == 'deleted':
print 'Nothing to delete - Resource is deleted - deleting archival'
model.Session.delete(archival)
model.Session.commit()
model.Session.flush()
continue
pkg = model.Package.get(archival.package_id)
if pkg.state == 'deleted':
print 'Nothing to delete - Dataset is deleted - deleting archival'
model.Session.delete(archival)
model.Session.commit()
model.Session.flush()
continue
filepath = archival.cache_filepath
if not os.path.exists(filepath):
print 'Skipping - file not on disk'
continue
try:
os.unlink(filepath)
except OSError:
print 'ERROR deleting %s' % filepath.decode('utf8')
else:
archival.cache_filepath = None
filter(model.Resource.state == 'active'))
if not include_sub_organizations:
org_ids = [org.id]
archivals = archivals.filter(model.Package.owner_org == org.id)
else:
# We want any organization_id that is part of this organization's tree
org_ids = ['%s' % child_org.id for child_org in lib.go_down_tree(org)]
archivals = archivals.filter(model.Package.owner_org.in_(org_ids))
archivals = archivals.join(model.Group, model.Package.owner_org == model.Group.id)
results = []
for archival, pkg, org in archivals.all():
pkg = model.Package.get(archival.package_id)
resource = model.Resource.get(archival.resource_id)
via = ''
er = pkg.extras.get('external_reference', '')
if er == 'ONSHUB':
via = "Stats Hub"
elif er.startswith("DATA4NR"):
via = "Data4nr"
archived_resource = model.Session.query(model.ResourceRevision)\
.filter_by(id=resource.id)\
.filter_by(revision_timestamp=archival.resource_timestamp)\
.first() or resource
row_data = OrderedDict((
('dataset_title', pkg.title),
('dataset_name', pkg.name),
def package(self):
return model.Package.get(self.resource['package_id'])
def update(self, package_id=None):
"""
Process all resources, or just those belonging to
package_id if provided.
"""
# check that archive and webstore folders exist
if not os.path.exists(self.archive_folder):
log.error("No archived resources available to process")
return
if not os.path.exists(self.webstore_folder):
os.mkdir(self.webstore_folder)
if package_id:
package = Package.get(package_id)
if package:
packages = [package]
else:
log.error("Package not found: %s" % package_id)
else:
# All resources that we can process should be stored
# in a folder with the same name as their package in the
# ckan.qa_archive folder. Get a list of package names by
# these folders, then use the name to get the package object
# from the database.
files = os.listdir(self.archive_folder)
package_names = [f for f in files if os.path.isdir(os.path.join(self.archive_folder, f))]
package_names = [unicode(p) for p in package_names]
packages = [Package.get(p) for p in package_names]
log.info("Total packages to update: %d" % len(packages))
def _get_issue_vars(issue, issue_subject, user_obj, recipient):
return {'issue': issue,
'issue_subject': issue_subject,
'dataset': model.Package.get(issue.dataset_id),
'user': user_obj,
'site_title': get_site_title(),
'recipient': recipient,
'h': h}
def import_stage(self, harvest_object):
# The import stage actually creates the dataset.
log.debug('In %s import_stage' % repr(self))
# Get default values.
harvester_config = self.load_config(harvest_object.source)
# Get the metadata that we stored in the HarvestObject's content field.
dataset = json.loads(harvest_object.content)
# We need to get the owner organization (if any) from the harvest
# source dataset
owner_org = None
source_dataset = model.Package.get(harvest_object.source.id)
if source_dataset.owner_org:
owner_org = source_dataset.owner_org
# Assemble basic information about the dataset.
pkg = {
"name": self.make_package_name(dataset["title"], harvest_object.guid, False),
"state": "active", # in case was previously deleted
"owner_org": owner_org,
"extras": [{
"key": "source_url",
"value": harvest_object.source.url,
},
{
"key": "source_title",
"value": harvest_object.source.title,
},
def view(self, package_ref=None):
from ckan import model
from ckanext.archiver.model import Archival
r_q = model.Session.query(model.Resource).filter_by(state='active')
print 'Resources: %i total' % r_q.count()
a_q = model.Session.query(Archival)
print 'Archived resources: %i total' % a_q.count()
num_with_cache_url = a_q.filter(Archival.cache_url != '').count()
print ' %i with cache_url' % num_with_cache_url
last_updated_res = a_q.order_by(Archival.updated.desc()).first()
print 'Latest archival: %s' % (last_updated_res.updated.strftime('%Y-%m-%d %H:%M') if last_updated_res else '(no)')
if package_ref:
pkg = model.Package.get(package_ref)
print 'Package %s %s' % (pkg.name, pkg.id)
for res in pkg.resources:
print 'Resource %s' % res.id
for archival in a_q.filter_by(resource_id=res.id):
print '* %r' % archival