model.py 6.77 KB
Newer Older
Ross Jones's avatar
Ross Jones committed
1
2
3
4
5
6
7
8
import uuid
from datetime import datetime

from sqlalchemy import Column, MetaData
from sqlalchemy import types
from sqlalchemy.ext.declarative import declarative_base

import ckan.model as model
9
10
import ckan.plugins as p

11
from ckan.lib import dictization
Ross Jones's avatar
Ross Jones committed
12
13
14
15
16

log = __import__('logging').getLogger(__name__)

Base = declarative_base()

17

Ross Jones's avatar
Ross Jones committed
18
19
20
21
22
def make_uuid():
    return unicode(uuid.uuid4())

metadata = MetaData()

23
24
25
26
27
28
29
30
31
32
33

# enum of all the archival statuses (singleton)
# NB Be very careful changing these status strings. They are also used in
# ckanext-qa tasks.py.
class Status:
    _instance = None

    def __init__(self):
        not_broken = {
            # is_broken = False
            0: 'Archived successfully',
34
            1: 'Content has not changed',
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
        }
        broken = {
            # is_broken = True
            10: 'URL invalid',
            11: 'URL request failed',
            12: 'Download error',
        }
        not_sure = {
            # is_broken = None i.e. not sure
            21: 'Chose not to download',
            22: 'Download failure',
            23: 'System error during archival',
        }
        self._by_id = dict(not_broken, **broken)
        self._by_id.update(not_sure)
        self._by_text = dict((value, key)
                             for key, value in self._by_id.iteritems())

    @classmethod
    def instance(cls):
        if not cls._instance:
            cls._instance = cls()
        return cls._instance

    @classmethod
    def by_text(cls, status_txt):
        return cls.instance()._by_text[status_txt]

    @classmethod
    def by_id(cls, status_id):
        return cls.instance()._by_id[status_id]

    @classmethod
68
    def is_status_broken(cls, status_id):
69
70
71
72
73
74
75
76
77
        if status_id == 0:
            return False
        elif status_id < 20:
            return True
        else:
            return None  # not sure

    @classmethod
    def is_ok(cls, status_id):
78
        return status_id in [0, 1]
79

80
81
82
83
broken_enum = {True: 'Broken',
               None: 'Not sure if broken',
               False: 'Downloaded OK'}

84
85

class Archival(Base):
Ross Jones's avatar
Ross Jones committed
86
    """
87
88
    Details of the archival of resources. Has the filepath for successfully
    archived resources. Basic error history provided for unsuccessful ones.
Ross Jones's avatar
Ross Jones committed
89
    """
90
    __tablename__ = 'archival'
Ross Jones's avatar
Ross Jones committed
91
92

    id = Column(types.UnicodeText, primary_key=True, default=make_uuid)
93
    package_id = Column(types.UnicodeText, nullable=False, index=True)
Ross Jones's avatar
Ross Jones committed
94
    resource_id = Column(types.UnicodeText, nullable=False, index=True)
95
    resource_timestamp = Column(types.DateTime)  # key to resource_revision
Ross Jones's avatar
Ross Jones committed
96

97
98
99
100
    # Details of the latest archival attempt
    status_id = Column(types.Integer)
    is_broken = Column(types.Boolean)  # Based on status_id. None = not sure
    reason = Column(types.UnicodeText)  # Extra detail explaining the status (cannot be translated)
Ross Jones's avatar
Ross Jones committed
101
102
    url_redirected_to = Column(types.UnicodeText)

103
104
105
    # Details of last successful archival
    cache_filepath = Column(types.UnicodeText)
    cache_url = Column(types.UnicodeText)
106
    size = Column(types.BigInteger, default=0)
107
108
    mimetype = Column(types.UnicodeText)
    hash = Column(types.UnicodeText)
109
110
    etag = Column(types.UnicodeText)
    last_modified = Column(types.UnicodeText)
111
112
113
114

    # History
    first_failure = Column(types.DateTime)
    last_success = Column(types.DateTime)
Ross Jones's avatar
Ross Jones committed
115
116
    failure_count = Column(types.Integer, default=0)

117
118
    created = Column(types.DateTime, default=datetime.now)
    updated = Column(types.DateTime)
Ross Jones's avatar
Ross Jones committed
119

120
121
    def __repr__(self):
        broken_details = '' if not self.is_broken else \
122
                         ('%d failures' % self.failure_count)
123
124
        package = model.Package.get(self.package_id)
        package_name = package.name if package else '?%s?' % self.package_id
125
        return '<Archival %s /dataset/%s/resource/%s %s>' % \
126
            (broken_enum[self.is_broken], package_name, self.resource_id,
127
             broken_details)
Ross Jones's avatar
Ross Jones committed
128
129
130

    @classmethod
    def get_for_resource(cls, resource_id):
131
132
        '''Returns the archival for the given resource, or if it doens't exist,
        returns None.'''
Ross Jones's avatar
Ross Jones committed
133
134
        return model.Session.query(cls).filter(cls.resource_id==resource_id).first()

135
136
137
138
139
140
141
142
143
144
145
    @classmethod
    def get_for_package(cls, package_id):
        '''Returns the archivals for the given package. May not be any if the
        package has no resources or has not been archived. It checks the
        resources are not deleted.'''
        return model.Session.query(cls) \
                    .filter(cls.package_id==package_id) \
                    .join(model.Resource, cls.resource_id==model.Resource.id) \
                    .filter(model.Resource.state=='active') \
                    .all()

Ross Jones's avatar
Ross Jones committed
146
    @classmethod
147
    def create(cls, resource_id):
Ross Jones's avatar
Ross Jones committed
148
        c = cls()
149
        c.resource_id = resource_id
Ross Jones's avatar
Ross Jones committed
150

151
        # Find the package_id for the resource.
152
        dataset = model.Session.query(model.Package)
153
        if p.toolkit.check_ckan_version(max_version='2.2.99'):
154
155
156
157
158
159
160
            # earlier CKANs had ResourceGroup
            dataset = dataset.join(model.ResourceGroup)
        dataset = dataset \
            .join(model.Resource) \
            .filter_by(id=resource_id) \
            .one()
        c.package_id = dataset.id
Ross Jones's avatar
Ross Jones committed
161
162
        return c

163
164
    @property
    def status(self):
165
166
        if self.status_id is None:
            return None
167
168
        return Status.by_id(self.status_id)

169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
    def as_dict(self):
        context = {'model': model}
        archival_dict = dictization.table_dictize(self, context)
        archival_dict['status'] = self.status
        archival_dict['is_broken_printable'] = broken_enum[self.is_broken]
        return archival_dict


def aggregate_archivals_for_a_dataset(archivals):
    '''Returns aggregated archival info for a dataset, given the archivals for
    its resources (returned by get_for_package).

    :param archivals: A list of the archivals for a dataset's resources
    :type archivals: A list of Archival objects
    :returns: Archival dict about the dataset, with keys:
                status_id
                status
                reason
                is_broken
    '''
    archival_dict = {'status_id': None, 'status': None,
                     'reason': None, 'is_broken': None}
    for archival in archivals:
        # status_id takes the highest id i.e. pessimistic
        # reason matches the status_id
        if archival_dict['status_id'] is None or \
                archival.status_id > archival_dict['status_id']:
            archival_dict['status_id'] = archival.status_id
            archival_dict['reason'] = archival.reason

    if archivals:
        archival_dict['status'] = Status.by_id(archival_dict['status_id'])
        archival_dict['is_broken'] = \
            Status.is_status_broken(archival_dict['status_id'])
    return archival_dict


206
207
208
def init_tables(engine):
    Base.metadata.create_all(engine)
    log.info('Archiver database tables are set-up')