Commit 4b2601ed authored by David Read's avatar David Read
Browse files

Only archive if a resource URL changes (or added/deleted), not if something else changes.

parent a677677b
......@@ -33,10 +33,92 @@ class ArchiverPlugin(p.SingletonPlugin, p.toolkit.DefaultDatasetForm):
if not isinstance(entity, model.Package):
return
log.debug('Notified of package event: %s %s', entity.id, operation)
log.debug('Notified of package event: %s %s', entity.name, operation)
run_archiver = \
self._is_it_sufficient_change_to_run_archiver(entity, operation)
if not run_archiver:
return
log.debug('Creating archiver task: %s', entity.name)
lib.create_archiver_package_task(entity, 'priority')
def _is_it_sufficient_change_to_run_archiver(self, package, operation):
''' Returns True if it is a new dataset or there are resources that
have been added, deleted or URL changed in this revision.
'''
if operation == 'new':
log.debug('New package - will archive')
# even if it has no resources, QA needs to show 0 stars against it
return True
elif operation == 'deleted':
log.debug('Deleted package - won\'t archive')
return False
# therefore operation=changed
# check to see if resources are added, deleted or URL changed
# look for the latest revision
rev_list = package.all_related_revisions
if not rev_list:
log.debug('No sign of previous revisions - will archive')
return True
# I am not confident we can rely on the info about the current
# revision, because we are still in the 'before_commit' stage. So
# simply ignore that if it's returned.
if rev_list[0][0].id == model.Session.revision.id:
rev_list = rev_list[1:]
if not rev_list:
log.warn('No sign of previous revisions - will archive')
return True
previous_revision = rev_list[0][0]
log.debug('Comparing with revision: %s %s',
previous_revision.timestamp, previous_revision.id)
# get the package as it was at that previous revision
context = {'model': model, 'session': model.Session,
#'user': c.user or c.author,
'ignore_auth': True,
'revision_id': previous_revision.id}
data_dict = {'id': package.id}
try:
old_pkg_dict = p.toolkit.get_action('package_show')(
context, data_dict)
except p.toolkit.NotFound:
log.warn('No sign of previous package - will archive anyway')
return True
# have any resources been added or deleted?
old_resources = dict((res['id'], res)
for res in old_pkg_dict['resources'])
old_res_ids = set(old_resources.keys())
new_res_ids = set((res.id for res in package.resources))
deleted_res_ids = old_res_ids - new_res_ids
if deleted_res_ids:
log.debug('Deleted resources - will archive. res_ids=%r',
deleted_res_ids)
return True
added_res_ids = new_res_ids - old_res_ids
if added_res_ids:
log.debug('Added resources - will archive. res_ids=%r',
added_res_ids)
return True
# have any resource urls changed?
for res in package.resources:
old_res_url = old_resources[res.id]['url']
if old_res_url != res.url:
log.debug('Resource url changed - will archive. '
'id=%s pos=%s url="%s"->"%s"',
res.id[:4], res.position, old_res_url, res.url)
return True
log.debug('Resource unchanged. pos=%s id=%s',
res.position, res.id[:4])
log.debug('No new, deleted or changed resources - won\'t archive')
return False
# IReport
def register_reports(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment