Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
MJU-POPS
ckanext-archiver
Commits
4b2601ed
Commit
4b2601ed
authored
May 04, 2016
by
David Read
Browse files
Only archive if a resource URL changes (or added/deleted), not if something else changes.
parent
a677677b
Changes
1
Hide whitespace changes
Inline
Side-by-side
ckanext/archiver/plugin.py
View file @
4b2601ed
...
...
@@ -33,10 +33,92 @@ class ArchiverPlugin(p.SingletonPlugin, p.toolkit.DefaultDatasetForm):
if
not
isinstance
(
entity
,
model
.
Package
):
return
log
.
debug
(
'Notified of package event: %s %s'
,
entity
.
id
,
operation
)
log
.
debug
(
'Notified of package event: %s %s'
,
entity
.
name
,
operation
)
run_archiver
=
\
self
.
_is_it_sufficient_change_to_run_archiver
(
entity
,
operation
)
if
not
run_archiver
:
return
log
.
debug
(
'Creating archiver task: %s'
,
entity
.
name
)
lib
.
create_archiver_package_task
(
entity
,
'priority'
)
def
_is_it_sufficient_change_to_run_archiver
(
self
,
package
,
operation
):
''' Returns True if it is a new dataset or there are resources that
have been added, deleted or URL changed in this revision.
'''
if
operation
==
'new'
:
log
.
debug
(
'New package - will archive'
)
# even if it has no resources, QA needs to show 0 stars against it
return
True
elif
operation
==
'deleted'
:
log
.
debug
(
'Deleted package - won
\'
t archive'
)
return
False
# therefore operation=changed
# check to see if resources are added, deleted or URL changed
# look for the latest revision
rev_list
=
package
.
all_related_revisions
if
not
rev_list
:
log
.
debug
(
'No sign of previous revisions - will archive'
)
return
True
# I am not confident we can rely on the info about the current
# revision, because we are still in the 'before_commit' stage. So
# simply ignore that if it's returned.
if
rev_list
[
0
][
0
].
id
==
model
.
Session
.
revision
.
id
:
rev_list
=
rev_list
[
1
:]
if
not
rev_list
:
log
.
warn
(
'No sign of previous revisions - will archive'
)
return
True
previous_revision
=
rev_list
[
0
][
0
]
log
.
debug
(
'Comparing with revision: %s %s'
,
previous_revision
.
timestamp
,
previous_revision
.
id
)
# get the package as it was at that previous revision
context
=
{
'model'
:
model
,
'session'
:
model
.
Session
,
#'user': c.user or c.author,
'ignore_auth'
:
True
,
'revision_id'
:
previous_revision
.
id
}
data_dict
=
{
'id'
:
package
.
id
}
try
:
old_pkg_dict
=
p
.
toolkit
.
get_action
(
'package_show'
)(
context
,
data_dict
)
except
p
.
toolkit
.
NotFound
:
log
.
warn
(
'No sign of previous package - will archive anyway'
)
return
True
# have any resources been added or deleted?
old_resources
=
dict
((
res
[
'id'
],
res
)
for
res
in
old_pkg_dict
[
'resources'
])
old_res_ids
=
set
(
old_resources
.
keys
())
new_res_ids
=
set
((
res
.
id
for
res
in
package
.
resources
))
deleted_res_ids
=
old_res_ids
-
new_res_ids
if
deleted_res_ids
:
log
.
debug
(
'Deleted resources - will archive. res_ids=%r'
,
deleted_res_ids
)
return
True
added_res_ids
=
new_res_ids
-
old_res_ids
if
added_res_ids
:
log
.
debug
(
'Added resources - will archive. res_ids=%r'
,
added_res_ids
)
return
True
# have any resource urls changed?
for
res
in
package
.
resources
:
old_res_url
=
old_resources
[
res
.
id
][
'url'
]
if
old_res_url
!=
res
.
url
:
log
.
debug
(
'Resource url changed - will archive. '
'id=%s pos=%s url="%s"->"%s"'
,
res
.
id
[:
4
],
res
.
position
,
old_res_url
,
res
.
url
)
return
True
log
.
debug
(
'Resource unchanged. pos=%s id=%s'
,
res
.
position
,
res
.
id
[:
4
])
log
.
debug
(
'No new, deleted or changed resources - won
\'
t archive'
)
return
False
# IReport
def
register_reports
(
self
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment