Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
MJU-POPS
ckanext-archiver
Commits
c1e42065
Commit
c1e42065
authored
May 19, 2016
by
David Read
Browse files
Merge pull request #28 from ckan/only-archive-on-url-change
Only archive if a resource URL changes
parents
5bb00c04
223ed0aa
Changes
2
Hide whitespace changes
Inline
Side-by-side
ckanext/archiver/lib.py
View file @
c1e42065
...
...
@@ -2,7 +2,6 @@ import os
import
logging
import
ckan.plugins
as
p
from
ckan
import
model
from
ckan.model.types
import
make_uuid
from
ckan.lib.celery_app
import
celery
...
...
@@ -34,3 +33,10 @@ def create_archiver_package_task(package, queue):
task_id
=
task_id
,
queue
=
queue
)
log
.
debug
(
'Archival of package put into celery queue %s: %s'
,
queue
,
package
.
name
)
def
get_extra_from_pkg_dict
(
pkg_dict
,
key
,
default
=
None
):
for
extra
in
pkg_dict
[
'extras'
]:
if
extra
[
'key'
]
==
key
:
return
extra
[
'value'
]
return
default
ckanext/archiver/plugin.py
View file @
c1e42065
...
...
@@ -33,10 +33,109 @@ class ArchiverPlugin(p.SingletonPlugin, p.toolkit.DefaultDatasetForm):
if
not
isinstance
(
entity
,
model
.
Package
):
return
log
.
debug
(
'Notified of package event: %s %s'
,
entity
.
id
,
operation
)
log
.
debug
(
'Notified of package event: %s %s'
,
entity
.
name
,
operation
)
run_archiver
=
\
self
.
_is_it_sufficient_change_to_run_archiver
(
entity
,
operation
)
if
not
run_archiver
:
return
log
.
debug
(
'Creating archiver task: %s'
,
entity
.
name
)
lib
.
create_archiver_package_task
(
entity
,
'priority'
)
def
_is_it_sufficient_change_to_run_archiver
(
self
,
package
,
operation
):
''' Returns True if in this revision any of these happened:
* it is a new dataset
* dataset licence changed (affects qa)
* there are resources that have been added or deleted
* resources have changed their URL or format (affects qa)
'''
if
operation
==
'new'
:
log
.
debug
(
'New package - will archive'
)
# even if it has no resources, QA needs to show 0 stars against it
return
True
elif
operation
==
'deleted'
:
log
.
debug
(
'Deleted package - won
\'
t archive'
)
return
False
# therefore operation=changed
# check to see if resources are added, deleted or URL changed
# look for the latest revision
rev_list
=
package
.
all_related_revisions
if
not
rev_list
:
log
.
debug
(
'No sign of previous revisions - will archive'
)
return
True
# I am not confident we can rely on the info about the current
# revision, because we are still in the 'before_commit' stage. So
# simply ignore that if it's returned.
if
rev_list
[
0
][
0
].
id
==
model
.
Session
.
revision
.
id
:
rev_list
=
rev_list
[
1
:]
if
not
rev_list
:
log
.
warn
(
'No sign of previous revisions - will archive'
)
return
True
previous_revision
=
rev_list
[
0
][
0
]
log
.
debug
(
'Comparing with revision: %s %s'
,
previous_revision
.
timestamp
,
previous_revision
.
id
)
# get the package as it was at that previous revision
context
=
{
'model'
:
model
,
'session'
:
model
.
Session
,
#'user': c.user or c.author,
'ignore_auth'
:
True
,
'revision_id'
:
previous_revision
.
id
}
data_dict
=
{
'id'
:
package
.
id
}
try
:
old_pkg_dict
=
p
.
toolkit
.
get_action
(
'package_show'
)(
context
,
data_dict
)
except
p
.
toolkit
.
NotFound
:
log
.
warn
(
'No sign of previous package - will archive anyway'
)
return
True
# has the licence changed?
old_licence
=
(
old_pkg_dict
[
'license_id'
],
lib
.
get_extra_from_pkg_dict
(
old_pkg_dict
,
'licence'
)
or
None
)
new_licence
=
(
package
.
license_id
,
package
.
extras
.
get
(
'licence'
)
or
None
)
if
old_licence
!=
new_licence
:
log
.
debug
(
'Licence has changed - will archive: %r->%r'
,
old_licence
,
new_licence
)
return
True
# have any resources been added or deleted?
old_resources
=
dict
((
res
[
'id'
],
res
)
for
res
in
old_pkg_dict
[
'resources'
])
old_res_ids
=
set
(
old_resources
.
keys
())
new_res_ids
=
set
((
res
.
id
for
res
in
package
.
resources
))
deleted_res_ids
=
old_res_ids
-
new_res_ids
if
deleted_res_ids
:
log
.
debug
(
'Deleted resources - will archive. res_ids=%r'
,
deleted_res_ids
)
return
True
added_res_ids
=
new_res_ids
-
old_res_ids
if
added_res_ids
:
log
.
debug
(
'Added resources - will archive. res_ids=%r'
,
added_res_ids
)
return
True
# have any resources' url/format changed?
for
res
in
package
.
resources
:
for
key
in
(
'url'
,
'format'
):
old_res_value
=
old_resources
[
res
.
id
][
key
]
new_res_value
=
getattr
(
res
,
key
)
if
old_res_value
!=
new_res_value
:
log
.
debug
(
'Resource %s changed - will archive. '
'id=%s pos=%s url="%s"->"%s"'
,
key
,
res
.
id
[:
4
],
res
.
position
,
old_res_value
,
new_res_value
)
return
True
log
.
debug
(
'Resource unchanged. pos=%s id=%s'
,
res
.
position
,
res
.
id
[:
4
])
log
.
debug
(
'No new, deleted or changed resources - won
\'
t archive'
)
return
False
# IReport
def
register_reports
(
self
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment