Merge pull request #6 from guewen/9.0-attachment-fast-web-access

Store files that need fast access in the database
2026-06-24 16:48:36 +00:00 · 2017-01-16 17:27:31 +01:00
parent 022f9003e8 624cc0b743
commit 4cd8c214ec
4 changed files with 145 additions and 16 deletions
@@ -36,7 +36,6 @@ This addon must be added in the server wide addons with (``--load`` option):
 Limitations
 -----------

-When the ``ir.attachment`` model is started, it will automatically migrate
-the attachments which are not stored in S3 yet. This might be an issue when
-the number of attachments is huge. In that case, you might have more control
-by calling yourself ``env['ir.attachment'].force_storage()``.
+* You need to call ``env['ir.attachment'].force_storage()`` after
+  having changed the ``ir_attachment.location`` configuration in order to
+  migrate the existing attachments to S3.
@@ -5,7 +5,7 @@

 {'name': 'Attachments on S3 storage',
 'summary': 'Store assets and attachments on a S3 compatible object storage',
- 'version': '9.0.1.1.0',
+ 'version': '9.0.1.2.0',
 'author': 'Camptocamp,Odoo Community Association (OCA)',
 'license': 'AGPL-3',
 'category': 'Knowledge Management',
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+# Copyright 2016 Camptocamp SA
+# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html)
+
+import logging
+import os
+
+from contextlib import closing
+
+import openerp
+
+_logger = logging.getLogger(__name__)
+
+
+def migrate(cr, version):
+    if not version:
+        return
+    cr.execute("""
+        SELECT value FROM ir_config_parameter
+        WHERE key = 'ir_attachment.location'
+    """)
+    row = cr.fetchone()
+    bucket = os.environ.get('AWS_BUCKETNAME')
+
+    if row[0] == 's3' and bucket:
+        uid = openerp.SUPERUSER_ID
+        registry = openerp.modules.registry.RegistryManager.get(cr.dbname)
+        new_cr = registry.cursor()
+        with closing(new_cr):
+            with openerp.api.Environment.manage():
+                env = openerp.api.Environment(new_cr, uid, {})
+                store_local = env['ir.attachment'].search(
+                    [('store_fname', '=like', 's3://%'),
+                     '|', ('res_model', '=', 'ir.ui.view'),
+                          ('res_field', 'in', ['image_small',
+                                               'image_medium',
+                                               'web_icon_data'])
+                     ],
+                )
+
+                _logger.info(
+                    'Moving %d attachments from S3 to DB for fast access',
+                    len(store_local)
+                )
+                for attachment_id in store_local.ids:
+                    # force re-storing the document, will move
+                    # it from the object storage to the database
+
+                    # This is a trick to avoid having the 'datas' function
+                    # fields computed for every attachment on each
+                    # iteration of the loop.  The former issue being that
+                    # it reads the content of the file of ALL the
+                    # attachments on each loop.
+                    try:
+                        env.clear()
+                        attachment = env['ir.attachment'].browse(attachment_id)
+                        _logger.info('Moving attachment %s (id: %s)',
+                                     attachment.name, attachment.id)
+                        attachment.write({'datas': attachment.datas})
+                        new_cr.commit()
+                    except:
+                        new_cr.rollback()
@@ -13,7 +13,7 @@ from functools import partial
 import psycopg2

 import openerp
-from openerp import _, api, exceptions, models, SUPERUSER_ID
+from openerp import _, api, exceptions, fields, models
 from ..s3uri import S3Uri

 _logger = logging.getLogger(__name__)
@@ -30,6 +30,84 @@ except ImportError:
 class IrAttachment(models.Model):
    _inherit = "ir.attachment"

+    # this field is in old API, we need to override the 'inverse'
+    # field to modify the behavior when using S3, so we adapt
+    # the calls from a new API field
+    datas = fields.Binary(
+        compute='_compute_datas',
+        inverse='_inverse_datas',
+        string='File Content',
+        nodrop=True,
+    )
+
+    @api.depends('store_fname', 'db_datas')
+    def _compute_datas(self):
+        values = self._data_get('datas', None)
+        for attach in self:
+            attach.datas = values.get(attach.id)
+
+    def _inverse_datas(self):
+        # override in order to store files that need fast access,
+        # we keep them in the database instead of the object storage
+        location = self._storage()
+        for attach in self:
+            if location == 's3' and self._store_in_db_when_s3():
+                # compute the fields that depend on datas
+                value = attach.datas
+                bin_data = value and value.decode('base64') or ''
+                vals = {
+                    'file_size': len(bin_data),
+                    'checksum': self._compute_checksum(bin_data),
+                    'db_datas': value,
+                    # we seriously don't need index content on those fields
+                    'index_content': False,
+                    'store_fname': False,
+                }
+                fname = attach.store_fname
+                # write as superuser, as user probably does not
+                # have write access
+                super(IrAttachment, attach.sudo()).write(vals)
+                if fname:
+                    self._file_delete(fname)
+                continue
+            self._data_set('datas', attach.datas, None)
+
+    @api.multi
+    def _store_in_db_when_s3(self):
+        """ Return whether an attachment must be stored in db
+
+        When we are using S3. This is sometimes required because
+        the object storage is slower than the database/filesystem.
+
+        We store image_small and image_medium from 'Binary' fields
+        because they should be fast to read as they are often displayed
+        in kanbans / lists. The same for web_icon_data.
+
+        We store the assets locally as well. Not only for performance,
+        but also because it improves the portability of the database:
+        when assets are invalidated, they are deleted so we don't have
+        an old database with attachments pointing to deleted assets.
+
+        """
+        self.ensure_one()
+
+        # assets
+        if self.res_model == 'ir.ui.view':
+            # assets are stored in 'ir.ui.view'
+            return True
+
+        # Binary fields
+        if self.res_field:
+            # Binary fields are stored with the name of the field in
+            # 'res_field'
+            local_fields = ('image_small', 'image_medium', 'web_icon_data')
+            # 'image' fields can be rather large and should usually
+            # not be requested in bulk in lists
+            if self.res_field and self.res_field in local_fields:
+                return True
+
+        return False
+
    @api.model
    def _get_s3_bucket(self, name=None):
        """Connect to S3 and return the bucket
@@ -288,13 +366,3 @@ class IrAttachment(models.Model):
            self._force_storage_s3()
        else:
            return super(IrAttachment, self).force_storage()
-
-    @api.cr
-    def _register_hook(self, cr):
-        # We need to call the migration on the loading of the model
-        # because when we are upgrading addons, some of them might
-        # add attachments, and to be sure the are migrated to S3,
-        # we need to call the migration here.
-        super(IrAttachment, self)._register_hook(cr)
-        env = api.Environment(cr, SUPERUSER_ID, {})
-        env['ir.attachment']._force_storage_s3()