Store files that need fast access in the database

An Object Storage read is slower than a disk of database access. It might take ~200 to 300ms to retrieve a file content. This is not an issue for attachments such as the pdf files or any attachment that we want to read on demand. But that's too slow for files needed to render a web page. We'll store in the database: * Assets (js, css, ...). As a side effect, the databases will be more portable, as assets are rebuilt frequently, storing them in the Object Storage led the integration server to try to read assets deleted since long ago * Attachments linked to Binary fields named 'image_small', 'image_medium', 'web_icon_data'. Those fields are often used on kanban views that display a lot a images and retrieving them all was then very slow (Odoo does not do async requests). The migration to S3 is no longer called during initialization of the registry: it would be too slow as we would have to define if the attachments must be kept in database or sent to S3 on each new start. It means we have to call `env['ir.attachment'].force_storage()` to run the migration.
2026-06-24 02:08:36 +00:00 · 2017-01-16 14:04:24 +01:00
parent 4eecb899f3
commit 795692ad78
5 changed files with 128 additions and 34 deletions
@@ -36,7 +36,6 @@ This addon must be added in the server wide addons with (``--load`` option):
 Limitations
 -----------

-When the ``ir.attachment`` model is started, it will automatically migrate
-the attachments which are not stored in S3 yet. This might be an issue when
-the number of attachments is huge. In that case, you might have more control
-by calling yourself ``env['ir.attachment'].force_storage()``.
+* You need to call ``env['ir.attachment'].force_storage()`` after
+  having changed the ``ir_attachment.location`` configuration in order to
+  migrate the existing attachments to S3.
@@ -5,7 +5,7 @@

 {'name': 'Attachments on S3 storage',
 'summary': 'Store assets and attachments on a S3 compatible object storage',
- 'version': '10.0.1.0.0',
+ 'version': '10.0.1.1.0',
 'author': 'Camptocamp,Odoo Community Association (OCA)',
 'license': 'AGPL-3',
 'category': 'Knowledge Management',
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+# Copyright 2016 Camptocamp SA
+# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html)
+
+import logging
+import os
+
+from contextlib import closing
+
+import odoo
+
+_logger = logging.getLogger(__name__)
+
+
+def migrate(cr, version):
+    if not version:
+        return
+    cr.execute("""
+        SELECT value FROM ir_config_parameter
+        WHERE key = 'ir_attachment.location'
+    """)
+    row = cr.fetchone()
+    bucket = os.environ.get('AWS_BUCKETNAME')
+
+    if row[0] == 's3' and bucket:
+        uid = odoo.SUPERUSER_ID
+        registry = odoo.modules.registry.RegistryManager.get(cr.dbname)
+        new_cr = registry.cursor()
+        with closing(new_cr):
+            with odoo.api.Environment.manage():
+                env = odoo.api.Environment(new_cr, uid, {})
+                store_local = env['ir.attachment'].search(
+                    [('store_fname', '=like', 's3://%'),
+                     '|', ('res_model', '=', 'ir.ui.view'),
+                          ('res_field', 'in', ['image_small',
+                                               'image_medium',
+                                               'web_icon_data'])
+                     ],
+                )
+
+                _logger.info(
+                    'Moving %d attachments from S3 to DB for fast access',
+                    len(store_local)
+                )
+                for attachment_id in store_local.ids:
+                    # force re-storing the document, will move
+                    # it from the object storage to the database
+
+                    # This is a trick to avoid having the 'datas' function
+                    # fields computed for every attachment on each
+                    # iteration of the loop.  The former issue being that
+                    # it reads the content of the file of ALL the
+                    # attachments on each loop.
+                    try:
+                        env.clear()
+                        attachment = env['ir.attachment'].browse(attachment_id)
+                        _logger.info('Moving attachment %s (id: %s)',
+                                     attachment.name, attachment.id)
+                        attachment.write({'datas': attachment.datas})
+                        new_cr.commit()
+                    except:
+                        new_cr.rollback()
@@ -1,20 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2016 Camptocamp SA
-# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html)
-
-import os
-
-
-def migrate(cr, version):
-    cr.execute("""
-        SELECT value FROM ir_config_parameter
-        WHERE key = 'ir_attachment.location'
-    """)
-    row = cr.fetchone()
-    bucket = os.environ.get('AWS_BUCKETNAME')
-    if row[0] == 's3' and bucket:
-        cr.execute("""
-            UPDATE ir_attachment
-            SET store_fname = 's3://' || %s || '/' || store_fname
-            WHERE store_fname IS NOT NULL AND store_fname NOT LIKE '%%/%%'
-        """, (os.environ['AWS_BUCKETNAME'],))
@@ -30,6 +30,68 @@ except ImportError:
 class IrAttachment(models.Model):
    _inherit = "ir.attachment"

+    @api.multi
+    def _store_in_db_when_s3(self):
+        """ Return whether an attachment must be stored in db
+
+        When we are using S3. This is sometimes required because
+        the object storage is slower than the database/filesystem.
+
+        We store image_small and image_medium from 'Binary' fields
+        because they should be fast to read as they are often displayed
+        in kanbans / lists. The same for web_icon_data.
+
+        We store the assets locally as well. Not only for performance,
+        but also because it improves the portability of the database:
+        when assets are invalidated, they are deleted so we don't have
+        an old database with attachments pointing to deleted assets.
+
+        """
+        self.ensure_one()
+
+        # assets
+        if self.res_model == 'ir.ui.view':
+            # assets are stored in 'ir.ui.view'
+            return True
+
+        # Binary fields
+        if self.res_field:
+            # Binary fields are stored with the name of the field in
+            # 'res_field'
+            local_fields = ('image_small', 'image_medium', 'web_icon_data')
+            # 'image' fields can be rather large and should usually
+            # not be requests in bulk in lists
+            if self.res_field and self.res_field in local_fields:
+                return True
+
+        return False
+
+    def _inverse_datas(self):
+        # override in order to store files that need fast access,
+        # we keep them in the database instead of the object storage
+        location = self._storage()
+        for attach in self:
+            if location == 's3' and self._store_in_db_when_s3():
+                # compute the fields that depend on datas
+                value = attach.datas
+                bin_data = value and value.decode('base64') or ''
+                vals = {
+                    'file_size': len(bin_data),
+                    'checksum': self._compute_checksum(bin_data),
+                    'db_datas': value,
+                    # we seriously don't need index content on those fields
+                    'index_content': False,
+                    'store_fname': False,
+                }
+                fname = attach.store_fname
+                # write as superuser, as user probably does not
+                # have write access
+                super(IrAttachment, attach.sudo()).write(vals)
+                if fname:
+                    self._file_delete(fname)
+                continue
+            super(IrAttachment, attach)._inverse_datas()
+
    @api.model
    def _get_s3_bucket(self, name=None):
        """Connect to S3 and return the bucket
@@ -288,12 +350,3 @@ class IrAttachment(models.Model):
            self._force_storage_s3()
        else:
            return super(IrAttachment, self).force_storage()
-
-    @api.model_cr
-    def _register_hook(self):
-        # We need to call the migration on the loading of the model
-        # because when we are upgrading addons, some of them might
-        # add attachments, and to be sure the are migrated to S3,
-        # we need to call the migration here.
-        super(IrAttachment, self)._register_hook()
-        self.sudo()._force_storage_s3()