Commit 39ed7098 authored by Marko Kuder's avatar Marko Kuder
Browse files

added proxy settings + update archival status even if ETag is the same

parent c1e42065
......@@ -224,6 +224,11 @@ def _update_resource(resource_id, queue, log):
resource,
queue,
archive_result.get('cache_filename') if archive_result else None)
def _save_if_status_changed(status_id, reason, resource):
cache_filename = update_archival_status(resource, status_id, reason, log)
if cache_filename: #if cache_filename is returned, status was updated
notify_resource(resource, queue, cache_filename)
# Download
try_as_api = False
......@@ -281,7 +286,8 @@ def _update_resource(resource_id, queue, log):
return
if not requires_archive:
# We don't need to archive if the remote content has not changed
# We don't need to archive if the remote content has not changed, only update status
_save_if_status_changed(Status.by_text('Content has not changed'), '', resource)
return None
# Archival
......@@ -609,7 +615,6 @@ def _save_resource(resource, response, max_file_size, chunk_size=1024*16):
content_hash = unicode(resource_hash.hexdigest())
return length, content_hash, tmp_resource_file_path
def save_archival(resource, status_id, reason, url_redirected_to,
download_result, archive_result, log):
'''Writes to the archival table the result of an attempt to download
......@@ -671,6 +676,35 @@ def save_archival(resource, status_id, reason, url_redirected_to,
log.info('Archival saved: %r', archival)
model.repo.commit_and_remove()
def update_archival_status(resource, status_id, reason, log):
'''Checks if archival status need updating. This is only needed
in case where resources are not redownloaded because of ETag
matching, but link was inaccessible for a while and is now broken.
'''
now = datetime.datetime.now()
from ckanext.archiver.model import Archival, Status
from ckan import model
archival = Archival.get_for_resource(resource['id'])
if archival:
previous_archival_was_broken = archival.is_broken
current_archival_is_ok = Status.is_ok(status_id)
if previous_archival_was_broken and current_archival_is_ok:
log.info('Archival from before: %r', archival)
# save the updated archival
archival.status_id = Status.by_text('Archived successfully')
archival.is_broken = Status.is_status_broken(status_id)
archival.reason = reason
archival.last_success = now
archival.first_failure = None
archival.failure_count = 0
archival.updated = now
model.Session.add(archival)
log.info('Archival status updated: %r', archival)
model.repo.commit_and_remove()
if hasattr(archival, 'cache_filepath'):
return archival.cache_filepath
def requests_wrapper(log, func, *args, **kwargs):
'''
......@@ -683,14 +717,26 @@ def requests_wrapper(log, func, *args, **kwargs):
res = requests.get(url, timeout=url_timeout)
'''
from requests_ssl import SSLv3Adapter
from pylons import config
proxies = {}
if config.get('ckanext-archiver.proxy_url_http', ''):
proxies['http'] = config.get('ckanext-archiver.proxy_url_http')
if config.get('ckanext-archiver.proxy_url_https', ''):
proxies['https'] = config.get('ckanext-archiver.proxy_url_https')
log.debug("Setting proxies: "+str(proxies))
try:
try:
response = func(*args, **kwargs)
if func.__name__ != 'get_content':
response = func(*args, proxies=proxies, **kwargs)
else:
response = func(*args, **kwargs)
except requests.exceptions.ConnectionError, e:
if 'SSL23_GET_SERVER_HELLO' not in str(e):
raise
log.info('SSLv23 failed so trying again using SSLv3: %r', args)
requests_session = requests.Session()
if proxies:
requests_session.proxies.update(proxies)
requests_session.mount('https://', SSLv3Adapter())
func = {requests.get: requests_session.get,
requests.post: requests_session.post}[func]
......@@ -869,4 +915,3 @@ def link_checker(context, data):
raise LinkHeadRequestError(error_message)
return json.dumps(dict(headers))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment