Store files that need fast access in the database

An Object Storage read is slower than a disk of database access.
It might take ~200 to 300ms to retrieve a file content.

This is not an issue for attachments such as the pdf files or any
attachment that we want to read on demand. But that's too slow for
files needed to render a web page.

We'll store in the database:

* Assets (js, css, ...). As a side effect, the databases will be more
  portable, as assets are rebuilt frequently, storing them in the Object
  Storage led the integration server to try to read assets deleted since
  long ago
* Attachments linked to Binary fields named 'image_small',
  'image_medium', 'web_icon_data'. Those fields are often used on kanban
  views that display a lot a images and retrieving them all was then
  very slow (Odoo does not do async requests).

The migration to S3 is no longer called during initialization of the
registry: it would be too slow as we would have to define if the
attachments must be kept in database or sent to S3 on each new start. It
means we have to call `env['ir.attachment'].force_storage()` to run the
migration.
This commit is contained in:
Guewen Baconnier
2017-01-16 14:04:24 +01:00
parent 4eecb899f3
commit 795692ad78
5 changed files with 128 additions and 34 deletions
+3 -4
View File
@@ -36,7 +36,6 @@ This addon must be added in the server wide addons with (``--load`` option):
Limitations
-----------
When the ``ir.attachment`` model is started, it will automatically migrate
the attachments which are not stored in S3 yet. This might be an issue when
the number of attachments is huge. In that case, you might have more control
by calling yourself ``env['ir.attachment'].force_storage()``.
* You need to call ``env['ir.attachment'].force_storage()`` after
having changed the ``ir_attachment.location`` configuration in order to
migrate the existing attachments to S3.
+1 -1
View File
@@ -5,7 +5,7 @@
{'name': 'Attachments on S3 storage',
'summary': 'Store assets and attachments on a S3 compatible object storage',
'version': '10.0.1.0.0',
'version': '10.0.1.1.0',
'author': 'Camptocamp,Odoo Community Association (OCA)',
'license': 'AGPL-3',
'category': 'Knowledge Management',
@@ -0,0 +1,62 @@
# -*- coding: utf-8 -*-
# Copyright 2016 Camptocamp SA
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html)
import logging
import os
from contextlib import closing
import odoo
_logger = logging.getLogger(__name__)
def migrate(cr, version):
if not version:
return
cr.execute("""
SELECT value FROM ir_config_parameter
WHERE key = 'ir_attachment.location'
""")
row = cr.fetchone()
bucket = os.environ.get('AWS_BUCKETNAME')
if row[0] == 's3' and bucket:
uid = odoo.SUPERUSER_ID
registry = odoo.modules.registry.RegistryManager.get(cr.dbname)
new_cr = registry.cursor()
with closing(new_cr):
with odoo.api.Environment.manage():
env = odoo.api.Environment(new_cr, uid, {})
store_local = env['ir.attachment'].search(
[('store_fname', '=like', 's3://%'),
'|', ('res_model', '=', 'ir.ui.view'),
('res_field', 'in', ['image_small',
'image_medium',
'web_icon_data'])
],
)
_logger.info(
'Moving %d attachments from S3 to DB for fast access',
len(store_local)
)
for attachment_id in store_local.ids:
# force re-storing the document, will move
# it from the object storage to the database
# This is a trick to avoid having the 'datas' function
# fields computed for every attachment on each
# iteration of the loop. The former issue being that
# it reads the content of the file of ALL the
# attachments on each loop.
try:
env.clear()
attachment = env['ir.attachment'].browse(attachment_id)
_logger.info('Moving attachment %s (id: %s)',
attachment.name, attachment.id)
attachment.write({'datas': attachment.datas})
new_cr.commit()
except:
new_cr.rollback()
@@ -1,20 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2016 Camptocamp SA
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html)
import os
def migrate(cr, version):
cr.execute("""
SELECT value FROM ir_config_parameter
WHERE key = 'ir_attachment.location'
""")
row = cr.fetchone()
bucket = os.environ.get('AWS_BUCKETNAME')
if row[0] == 's3' and bucket:
cr.execute("""
UPDATE ir_attachment
SET store_fname = 's3://' || %s || '/' || store_fname
WHERE store_fname IS NOT NULL AND store_fname NOT LIKE '%%/%%'
""", (os.environ['AWS_BUCKETNAME'],))
+62 -9
View File
@@ -30,6 +30,68 @@ except ImportError:
class IrAttachment(models.Model):
_inherit = "ir.attachment"
@api.multi
def _store_in_db_when_s3(self):
""" Return whether an attachment must be stored in db
When we are using S3. This is sometimes required because
the object storage is slower than the database/filesystem.
We store image_small and image_medium from 'Binary' fields
because they should be fast to read as they are often displayed
in kanbans / lists. The same for web_icon_data.
We store the assets locally as well. Not only for performance,
but also because it improves the portability of the database:
when assets are invalidated, they are deleted so we don't have
an old database with attachments pointing to deleted assets.
"""
self.ensure_one()
# assets
if self.res_model == 'ir.ui.view':
# assets are stored in 'ir.ui.view'
return True
# Binary fields
if self.res_field:
# Binary fields are stored with the name of the field in
# 'res_field'
local_fields = ('image_small', 'image_medium', 'web_icon_data')
# 'image' fields can be rather large and should usually
# not be requests in bulk in lists
if self.res_field and self.res_field in local_fields:
return True
return False
def _inverse_datas(self):
# override in order to store files that need fast access,
# we keep them in the database instead of the object storage
location = self._storage()
for attach in self:
if location == 's3' and self._store_in_db_when_s3():
# compute the fields that depend on datas
value = attach.datas
bin_data = value and value.decode('base64') or ''
vals = {
'file_size': len(bin_data),
'checksum': self._compute_checksum(bin_data),
'db_datas': value,
# we seriously don't need index content on those fields
'index_content': False,
'store_fname': False,
}
fname = attach.store_fname
# write as superuser, as user probably does not
# have write access
super(IrAttachment, attach.sudo()).write(vals)
if fname:
self._file_delete(fname)
continue
super(IrAttachment, attach)._inverse_datas()
@api.model
def _get_s3_bucket(self, name=None):
"""Connect to S3 and return the bucket
@@ -288,12 +350,3 @@ class IrAttachment(models.Model):
self._force_storage_s3()
else:
return super(IrAttachment, self).force_storage()
@api.model_cr
def _register_hook(self):
# We need to call the migration on the loading of the model
# because when we are upgrading addons, some of them might
# add attachments, and to be sure the are migrated to S3,
# we need to call the migration here.
super(IrAttachment, self)._register_hook()
self.sudo()._force_storage_s3()