Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
error message (string) describing the attempts.'''
def format_date(date):
if date:
return date.strftime('%d/%m/%Y')
else:
return ''
messages = [_('File could not be downloaded.'),
_('Reason') + ':', unicode(archival.status) + '.',
_('Error details: %s.') % archival.reason,
_('Attempted on %s.') % format_date(archival.updated)]
last_success = format_date(archival.last_success)
if archival.failure_count == 1:
if last_success:
messages.append(_('This URL last worked on: %s.') % last_success)
else:
messages.append(_('This was the first attempt.'))
else:
messages.append(_('Tried %s times since %s.') %
(archival.failure_count,
format_date(archival.first_failure)))
if last_success:
messages.append(_('This URL last worked on: %s.') % last_success)
else:
messages.append(_('This URL has not worked in the history of this tool.'))
return ' '.join(messages)
except httplib.InvalidURL, ve:
log.error("Could not make a head request to %r, error is: %s."
" Package is: %r. This sometimes happens when using an old version of requests on a URL"
" which issues a 301 redirect. Version=%s", url, ve, data.get('package'), requests.__version__)
raise LinkHeadRequestError(_("Invalid URL or Redirect Link"))
except ValueError, ve:
log.error("Could not make a head request to %r, error is: %s. Package is: %r.", url, ve, data.get('package'))
raise LinkHeadRequestError(_("Could not make HEAD request"))
except requests.exceptions.ConnectionError, e:
raise LinkHeadRequestError(_('Connection error: %s') % e)
except requests.exceptions.HTTPError, e:
raise LinkHeadRequestError(_('Invalid HTTP response: %s') % e)
except requests.exceptions.Timeout, e:
raise LinkHeadRequestError(_('Connection timed out after %ss') % url_timeout)
except requests.exceptions.TooManyRedirects, e:
raise LinkHeadRequestError(_('Too many redirects'))
except requests.exceptions.RequestException, e:
raise LinkHeadRequestError(_('Error during request: %s') % e)
except Exception, e:
raise LinkHeadRequestError(_('Error with the request: %s') % e)
else:
if res.status_code == 405:
# this suggests a GET request may be ok, so proceed to that
# in the download
raise LinkHeadMethodNotSupported()
if not res.ok or res.status_code >= 400:
error_message = _('Server returned HTTP error status: %s %s') % \
(res.status_code, res.reason)
raise LinkHeadRequestError(error_message)
return json.dumps(dict(headers))
score_reasons.append(_('Could not determine a file extension in the URL.'))
return (None, None)
for extension in extension_variants_:
format_ = format_get(extension)
if format_:
score = lib.resource_format_scores().get(format_)
if score:
score_reasons.append(_('URL extension "%s" relates to format "%s" and receives score: %s.') % (extension, format_, score))
return score, format_
else:
score = 1
score_reasons.append(_('URL extension "%s" relates to format "%s"'
' but a score for that format is not configured, so giving it default score %s.')
% (extension, format_, score))
return score, format_
score_reasons.append(_('URL extension "%s" is an unknown format.') % extension)
return (None, None)
e.__class__.__name__, unicode(e), traceback.format_exc())
score_reason = _("Unknown error: %s") % str(e)
raise
# Even if we can get the link, we should still treat the resource
# as having a score of 0 if the license isn't open.
#
# It is important we do this check after the link check, otherwise
# the link checker won't get the chance to see if the resource
# is broken.
if toolkit.check_ckan_version(max_version='2.2.99'):
package = resource.resource_group.package
else:
package = resource.package
if score > 0 and not package.isopen():
score_reason = _('License not open')
score = 0
log.info('Score: %s Reason: %s', score, score_reason)
archival_updated = archival.updated.isoformat() \
if archival and archival.updated else None
result = {
'openness_score': score,
'openness_score_reason': score_reason,
'format': format_,
'archival_timestamp': archival_updated
}
return result
def score_by_url_extension(resource, score_reasons):
'''
Looks at the URL for a resource to determine its format and score.
It adds strings to score_reasons list about how it came to the conclusion.
Return values:
* It returns a tuple: (score, format_string)
* If it cannot work out the format then format is None
* If it cannot score it, then score is None
'''
extension_variants_ = extension_variants(resource.url.strip())
if not extension_variants_:
score_reasons.append(_('Could not determine a file extension in the URL.'))
return (None, None)
for extension in extension_variants_:
format_ = format_get(extension)
if format_:
score = lib.resource_format_scores().get(format_)
if score:
score_reasons.append(_('URL extension "%s" relates to format "%s" and receives score: %s.') % (extension, format_, score))
return score, format_
else:
score = 1
score_reasons.append(_('URL extension "%s" relates to format "%s"'
' but a score for that format is not configured, so giving it default score %s.')
% (extension, format_, score))
return score, format_
score_reasons.append(_('URL extension "%s" is an unknown format.') % extension)
return (None, None)
url = tidy_url(data['url'])
# Send a head request
try:
res = requests.head(url, timeout=url_timeout)
headers = res.headers
except httplib.InvalidURL, ve:
log.error("Could not make a head request to %r, error is: %s."
" Package is: %r. This sometimes happens when using an old version of requests on a URL"
" which issues a 301 redirect. Version=%s", url, ve, data.get('package'), requests.__version__)
raise LinkHeadRequestError(_("Invalid URL or Redirect Link"))
except ValueError, ve:
log.error("Could not make a head request to %r, error is: %s. Package is: %r.", url, ve, data.get('package'))
raise LinkHeadRequestError(_("Could not make HEAD request"))
except requests.exceptions.ConnectionError, e:
raise LinkHeadRequestError(_('Connection error: %s') % e)
except requests.exceptions.HTTPError, e:
raise LinkHeadRequestError(_('Invalid HTTP response: %s') % e)
except requests.exceptions.Timeout, e:
raise LinkHeadRequestError(_('Connection timed out after %ss') % url_timeout)
except requests.exceptions.TooManyRedirects, e:
raise LinkHeadRequestError(_('Too many redirects'))
except requests.exceptions.RequestException, e:
raise LinkHeadRequestError(_('Error during request: %s') % e)
except Exception, e:
raise LinkHeadRequestError(_('Error with the request: %s') % e)
else:
if res.status_code == 405:
# this suggests a GET request may be ok, so proceed to that
# in the download
raise LinkHeadMethodNotSupported()
if not res.ok or res.status_code >= 400:
'''
if not archival or not archival.cache_filepath:
score_reasons.append(_('This file had not been downloaded at the time of scoring it.'))
return (None, None)
# Analyse the cached file
filepath = archival.cache_filepath
if not os.path.exists(filepath):
score_reasons.append(_('Cache filepath does not exist: "%s".') % filepath)
return (None, None)
else:
if filepath:
sniffed_format = sniff_file_format(filepath)
score = lib.resource_format_scores().get(sniffed_format['format']) \
if sniffed_format else None
if sniffed_format:
score_reasons.append(_('Content of file appeared to be format "%s" which receives openness score: %s.')
% (sniffed_format['format'], score))
return score, sniffed_format['format']
else:
score_reasons.append(_('The format of the file was not recognized from its contents.'))
return (None, None)
else:
# No cache_url
if archival.status_id == Status.by_text('Chose not to download'):
score_reasons.append(_('File was not downloaded deliberately') + '. '
+ _('Reason') + ': %s. ' % archival.reason + _('Using other methods to determine file openness.'))
return (None, None)
elif archival.is_broken is None and archival.status_id:
# i.e. 'Download failure' or 'System error during archival'
score_reasons.append(_('A system error occurred during downloading this file') + '. '
+ _('Reason') + ': %s. ' % archival.reason + _('Using other methods to determine file openness.'))
return (None, None)
if date:
return date.strftime('%d/%m/%Y')
else:
return ''
messages = [_('File could not be downloaded.'),
_('Reason') + ':', unicode(archival.status) + '.',
_('Error details: %s.') % archival.reason,
_('Attempted on %s.') % format_date(archival.updated)]
last_success = format_date(archival.last_success)
if archival.failure_count == 1:
if last_success:
messages.append(_('This URL last worked on: %s.') % last_success)
else:
messages.append(_('This was the first attempt.'))
else:
messages.append(_('Tried %s times since %s.') %
(archival.failure_count,
format_date(archival.first_failure)))
if last_success:
messages.append(_('This URL last worked on: %s.') % last_success)
else:
messages.append(_('This URL has not worked in the history of this tool.'))
return ' '.join(messages)
if score is None:
score, format_ = score_by_format_field(resource, score_reasons)
if score is None:
log.warning('Could not score resource: "%s" with url: "%s"',
resource.id, resource.url)
score_reasons.append(_('Could not understand the file format, therefore score is 1.'))
score = 1
if format_ is None:
# use any previously stored format value for this resource
format_ = get_qa_format(resource.id)
score_reason = ' '.join(score_reasons)
format_ = format_ or None
except Exception, e:
log.error('Unexpected error while calculating openness score %s: %s\nException: %s',
e.__class__.__name__, unicode(e), traceback.format_exc())
score_reason = _("Unknown error: %s") % str(e)
raise
# Even if we can get the link, we should still treat the resource
# as having a score of 0 if the license isn't open.
#
# It is important we do this check after the link check, otherwise
# the link checker won't get the chance to see if the resource
# is broken.
if toolkit.check_ckan_version(max_version='2.2.99'):
package = resource.resource_group.package
else:
package = resource.package
if score > 0 and not package.isopen():
score_reason = _('License not open')
score = 0