Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def filter_by_organizations(query, organization, include_sub_organizations):
'''Given an SQLAlchemy ORM query object, it returns it filtered by the
given organization and optionally its sub organizations too.
'''
from ckan import model
if not organization:
return query
if isinstance(organization, basestring):
organization = model.Group.get(organization)
assert organization
if include_sub_organizations:
orgs = sorted([x for x in go_down_tree(organization)], key=lambda x: x.name)
org_ids = [org.id for org in orgs]
return query.filter(model.Package.owner_org.in_(org_ids))
else:
return query.filter(model.Package.owner_org == organization.id)
def __repr__(self):
summary = 'score=%s format=%s' % (self.openness_score, self.format)
details = unicode(self.openness_score_reason).encode('unicode_escape')
package = model.Package.get(self.package_id)
package_name = package.name if package else '?%s?' % self.package_id
return '' % \
(summary, package_name, self.resource_id, details)
def __repr__(self):
broken_details = '' if not self.is_broken else \
('%d failures' % self.failure_count)
package = model.Package.get(self.package_id)
package_name = package.name if package else '?%s?' % self.package_id
return '' % \
(broken_enum[self.is_broken], package_name, self.resource_id,
broken_details)
# Is this slug already in use (and if we're updating a package, is it in
# use by a different package?).
pkg_obj = Session.query(Package).filter(Package.name == name).filter(Package.id != exclude_existing_package).first()
if not pkg_obj:
# The name is available, so use it. Note that if we're updating an
# existing package we will be updating this package's URL, so incoming
# links may break.
return name
if exclude_existing_package:
# The name is not available, and we're updating a package. Chances
# are the package's name already had some random string attached
# to it last time. Prevent spurrious updates to the package's URL
# (choosing new random text) by just reusing the existing package's
# name.
pkg_obj = Session.query(Package).filter(Package.id == exclude_existing_package).first()
if pkg_obj: # the package may not exist yet because we may be passed the desired package GUID before a new package is instantiated
return pkg_obj.name
# Append some random text to the URL. Hope that with five character
# there will be no collsion.
return name + "-" + str(uuid.uuid4())[:5]
Column('identifier', types.UnicodeText, primary_key=True),
Column('package_id', types.UnicodeText, ForeignKey('package.id', onupdate='CASCADE', ondelete='CASCADE'), nullable=False, unique=True),
Column('published', types.DateTime, nullable=True), # Date DOI was published to DataCite
)
class DOI(DomainObject):
"""
DOI Object
"""
pass
meta.mapper(DOI, doi_table, properties={
'dataset': relation(model.Package,
backref=backref('doi', cascade='all, delete-orphan'),
primaryjoin=doi_table.c.package_id.__eq__(Package.id)
)
('resource_id', resource.id),
('resource_url', archived_resource.url),
('url_up_to_date', resource.url == archived_resource.url),
('via', via),
('first_failure', archival.first_failure.isoformat() if archival.first_failure else None),
('last_updated', archival.updated.isoformat() if archival.updated else None),
('last_success', archival.last_success.isoformat() if archival.last_success else None),
('url_redirected_to', archival.url_redirected_to),
('reason', archival.reason),
('status', archival.status),
('failure_count', archival.failure_count),
))
results.append(row_data)
num_broken_packages = archivals.distinct(model.Package.name).count()
num_broken_resources = len(results)
# Get total number of packages & resources
num_packages = model.Session.query(model.Package)\
.filter(model.Package.owner_org.in_(org_ids))\
.filter_by(state='active')\
.count()
num_resources = model.Session.query(model.Resource)\
.filter_by(state='active')
if p.toolkit.check_ckan_version(max_version='2.2.99'):
num_resources = num_resources.join(model.ResourceGroup)
num_resources = num_resources \
.join(model.Package)\
.filter(model.Package.owner_org.in_(org_ids))\
.filter_by(state='active').count()
(gb*gb, '>100 GB'),
]
previous_bin = (0, '')
counts = []
total_sizes = []
print '{:>15}{:>10}{:>20}'.format(
'file size', 'no. files', 'files size (bytes)')
for size_bin in size_bins:
q = model.Session.query(Archival) \
.filter(Archival.size > previous_bin[0]) \
.filter(Archival.size <= size_bin[0]) \
.filter(Archival.cache_filepath != '') \
.join(model.Resource,
Archival.resource_id == model.Resource.id) \
.filter(model.Resource.state != 'deleted') \
.join(model.Package,
Archival.package_id == model.Package.id) \
.filter(model.Package.state != 'deleted')
count = q.count()
counts.append(count)
total_size = model.Session.query(func.sum(Archival.size)) \
.filter(Archival.size > previous_bin[0]) \
.filter(Archival.size <= size_bin[0]) \
.filter(Archival.cache_filepath != '') \
.join(model.Resource,
Archival.resource_id == model.Resource.id) \
.filter(model.Resource.state != 'deleted') \
.join(model.Package,
Archival.package_id == model.Package.id) \
.filter(model.Package.state != 'deleted') \
.all()[0][0]
total_size = int(total_size or 0)
def make_package_name(self, title, exclude_existing_package, for_deletion):
'''
Creates a URL friendly name from a title
If the name already exists, it will add some random characters at the end
'''
name = munge_title_to_name(title).replace('_', '-')
if for_deletion: name = "deleted-" + name
while '--' in name:
name = name.replace('--', '-')
name = name[0:90] # max length is 100
# Is this slug already in use (and if we're updating a package, is it in
# use by a different package?).
pkg_obj = Session.query(Package).filter(Package.name == name).filter(Package.id != exclude_existing_package).first()
if not pkg_obj:
# The name is available, so use it. Note that if we're updating an
# existing package we will be updating this package's URL, so incoming
# links may break.
return name
if exclude_existing_package:
# The name is not available, and we're updating a package. Chances
# are the package's name already had some random string attached
# to it last time. Prevent spurrious updates to the package's URL
# (choosing new random text) by just reusing the existing package's
# name.
pkg_obj = Session.query(Package).filter(Package.id == exclude_existing_package).first()
if pkg_obj: # the package may not exist yet because we may be passed the desired package GUID before a new package is instantiated
return pkg_obj.name
def get_resources(state='active', publisher_ref=None, resource_id=None,
dataset_name=None):
''' Returns all active resources, or filtered by the given criteria. '''
from ckan import model
resources = model.Session.query(model.Resource) \
.filter_by(state=state)
if p.toolkit.check_ckan_version(max_version='2.2.99'):
# earlier CKANs had ResourceGroup
resources = resources.join(model.ResourceGroup)
resources = resources \
.join(model.Package) \
.filter_by(state='active')
criteria = [state]
if publisher_ref:
publisher = model.Group.get(publisher_ref)
assert publisher
resources = resources.filter(model.Package.owner_org == publisher.id)
criteria.append('Publisher:%s' % publisher.name)
if dataset_name:
resources = resources.filter(model.Package.name == dataset_name)
criteria.append('Dataset:%s' % dataset_name)
if resource_id:
resources = resources.filter(model.Resource.id == resource_id)
criteria.append('Resource:%s' % resource_id)
resources = resources.all()
print '%i resources (%s)' % (len(resources), ' '.join(criteria))
return resources