Abstract object storage in attachment_s3

Using the base_attachment_object_storage module, the same way
attachment_swift is done. Fixed a few issues along the way in
attachment_swift.
This commit is contained in:
Guewen Baconnier
2017-09-20 15:42:25 +02:00
parent edc700c807
commit cbef735c76
4 changed files with 77 additions and 241 deletions
+2 -2
View File
@@ -9,11 +9,11 @@
'author': 'Camptocamp,Odoo Community Association (OCA)', 'author': 'Camptocamp,Odoo Community Association (OCA)',
'license': 'AGPL-3', 'license': 'AGPL-3',
'category': 'Knowledge Management', 'category': 'Knowledge Management',
'depends': ['base'], 'depends': ['base', 'base_attachment_object_storage'],
'external_dependencies': { 'external_dependencies': {
'python': ['boto'], 'python': ['boto'],
}, },
'website': 'http://www.camptocamp.com', 'website': 'https://www.camptocamp.com',
'data': [], 'data': [],
'installable': True, 'installable': True,
} }
+30 -219
View File
@@ -7,12 +7,8 @@ import base64
import logging import logging
import os import os
import xml.dom.minidom import xml.dom.minidom
from contextlib import closing, contextmanager
from functools import partial from functools import partial
import psycopg2
import odoo
from odoo import _, api, exceptions, models from odoo import _, api, exceptions, models
from ..s3uri import S3Uri from ..s3uri import S3Uri
@@ -30,67 +26,10 @@ except ImportError:
class IrAttachment(models.Model): class IrAttachment(models.Model):
_inherit = "ir.attachment" _inherit = "ir.attachment"
@api.multi def _get_stores(self):
def _store_in_db_when_s3(self): l = ['s3']
""" Return whether an attachment must be stored in db l += super(IrAttachment, self)._get_stores()
return l
When we are using S3. This is sometimes required because
the object storage is slower than the database/filesystem.
We store image_small and image_medium from 'Binary' fields
because they should be fast to read as they are often displayed
in kanbans / lists. The same for web_icon_data.
We store the assets locally as well. Not only for performance,
but also because it improves the portability of the database:
when assets are invalidated, they are deleted so we don't have
an old database with attachments pointing to deleted assets.
"""
self.ensure_one()
# assets
if self.res_model == 'ir.ui.view':
# assets are stored in 'ir.ui.view'
return True
# Binary fields
if self.res_field:
# Binary fields are stored with the name of the field in
# 'res_field'
local_fields = ('image_small', 'image_medium', 'web_icon_data')
# 'image' fields can be rather large and should usually
# not be requests in bulk in lists
if self.res_field and self.res_field in local_fields:
return True
return False
def _inverse_datas(self):
# override in order to store files that need fast access,
# we keep them in the database instead of the object storage
location = self._storage()
for attach in self:
if location == 's3' and self._store_in_db_when_s3():
# compute the fields that depend on datas
value = attach.datas
bin_data = value and value.decode('base64') or ''
vals = {
'file_size': len(bin_data),
'checksum': self._compute_checksum(bin_data),
'db_datas': value,
# we seriously don't need index content on those fields
'index_content': False,
'store_fname': False,
}
fname = attach.store_fname
# write as superuser, as user probably does not
# have write access
super(IrAttachment, attach.sudo()).write(vals)
if fname:
self._file_delete(fname)
continue
super(IrAttachment, attach)._inverse_datas()
@api.model @api.model
def _get_s3_bucket(self, name=None): def _get_s3_bucket(self, name=None):
@@ -157,38 +96,32 @@ class IrAttachment(models.Model):
return msg return msg
@api.model @api.model
def _file_read_s3(self, fname, bin_size=False): def _store_file_read(self, fname, bin_size=False):
s3uri = S3Uri(fname)
try:
bucket = self._get_s3_bucket(name=s3uri.bucket())
except exceptions.UserError:
_logger.exception(
"error reading attachment '%s' from object storage", fname
)
return ''
filekey = bucket.get_key(s3uri.item())
if filekey:
read = base64.b64encode(filekey.get_contents_as_string())
else:
read = ''
_logger.info("attachment '%s' missing on object storage", fname)
return read
@api.model
def _file_read(self, fname, bin_size=False):
if fname.startswith('s3://'): if fname.startswith('s3://'):
return self._file_read_s3(fname, bin_size=bin_size) s3uri = S3Uri(fname)
try:
bucket = self._get_s3_bucket(name=s3uri.bucket())
except exceptions.UserError:
_logger.exception(
"error reading attachment '%s' from object storage", fname
)
return ''
filekey = bucket.get_key(s3uri.item())
if filekey:
read = base64.b64encode(filekey.get_contents_as_string())
else:
read = ''
_logger.info(
"attachment '%s' missing on object storage", fname
)
return read
else: else:
_super = super(IrAttachment, self) return super(IrAttachment, self)._store_file_read(fname, bin_size)
return _super._file_read(fname, bin_size=bin_size)
@api.model @api.model
def _file_write(self, value, checksum): def _store_file_write(self, key, bin_data):
storage = self._storage() if self._storage() == 's3':
if storage == 's3':
bucket = self._get_s3_bucket() bucket = self._get_s3_bucket()
bin_data = value.decode('base64')
key = self._compute_checksum(bin_data)
filekey = bucket.get_key(key) or bucket.new_key(key) filekey = bucket.get_key(key) or bucket.new_key(key)
filename = 's3://%s/%s' % (bucket.name, key) filename = 's3://%s/%s' % (bucket.name, key)
try: try:
@@ -203,18 +136,13 @@ class IrAttachment(models.Model):
(self._parse_s3_error(error),) (self._parse_s3_error(error),)
) )
else: else:
filename = super(IrAttachment, self)._file_write(value, checksum) _super = super(IrAttachment, self)
filename = _super._store_file_write(key, bin_data)
return filename return filename
@api.model @api.model
def _file_delete(self, fname): def _store_file_delete(self, fname):
if fname.startswith('s3://'): if fname.startswith('s3://'):
# using SQL to include files hidden through unlink or due to record
# rules
cr = self.env.cr
cr.execute("SELECT COUNT(*) FROM ir_attachment "
"WHERE store_fname = %s", (fname,))
count = cr.fetchone()[0]
s3uri = S3Uri(fname) s3uri = S3Uri(fname)
bucket_name = s3uri.bucket() bucket_name = s3uri.bucket()
item_name = s3uri.item() item_name = s3uri.item()
@@ -223,7 +151,7 @@ class IrAttachment(models.Model):
if bucket_name == os.environ.get('AWS_BUCKETNAME'): if bucket_name == os.environ.get('AWS_BUCKETNAME'):
bucket = self._get_s3_bucket() bucket = self._get_s3_bucket()
filekey = bucket.get_key(item_name) filekey = bucket.get_key(item_name)
if not count and filekey: if filekey:
try: try:
filekey.delete() filekey.delete()
_logger.info( _logger.info(
@@ -236,121 +164,4 @@ class IrAttachment(models.Model):
'Error during deletion of the file %s' % fname 'Error during deletion of the file %s' % fname
) )
else: else:
super(IrAttachment, self)._file_delete(fname) super(IrAttachment, self)._store_file_delete(fname)
@api.multi
def _move_attachment_to_s3(self):
self.ensure_one()
_logger.info('inspecting attachment %s (%d)',
self.name, self.id)
fname = self.store_fname
if fname:
# migrating from filesystem filestore
# or from the old 'store_fname' without the bucket name
_logger.info('moving %s on the object storage', fname)
self.write({'datas': self.datas,
# this is required otherwise the
# mimetype gets overriden with
# 'application/octet-stream'
# on assets
'mimetype': self.mimetype})
_logger.info('moved %s on the object storage', fname)
full_path = self._full_path(fname)
_logger.info('cleaning fs self')
if os.path.exists(full_path):
try:
os.unlink(full_path)
except OSError:
_logger.info(
"_file_delete could not unlink %s",
full_path, exc_info=True
)
except IOError:
# Harmless and needed for race conditions
_logger.info(
"_file_delete could not unlink %s",
full_path, exc_info=True
)
elif self.db_datas:
_logger.info('moving on the object storage from database')
self.write({'datas': self.datas})
@api.model
def _force_storage_s3(self, new_cr=False):
if not self.env['res.users'].browse(self.env.uid)._is_admin():
raise exceptions.AccessError(
_('Only administrators can execute this action.')
)
storage = self._storage()
if storage != 's3':
return
_logger.info('migrating files to the object storage')
domain = ['!', ('store_fname', '=like', 's3://%'),
'|',
('res_field', '=', False),
('res_field', '!=', False)]
# We do a copy of the environment so we can workaround the
# cache issue below. We do not create a new cursor because
# it causes serialization issues due to concurrent updates on
# attachments during the installation
with self.do_in_new_env(new_cr=new_cr) as new_env:
attachment_model_env = new_env['ir.attachment']
ids = attachment_model_env.search(domain).ids
for attachment_id in ids:
try:
with new_env.cr.savepoint():
# check that no other transaction has
# locked the row, don't send a file to S3
# in that case
self.env.cr.execute("SELECT id "
"FROM ir_attachment "
"WHERE id = %s "
"FOR UPDATE NOWAIT",
(attachment_id,),
log_exceptions=False)
# This is a trick to avoid having the 'datas' function
# fields computed for every attachment on each
# iteration of the loop. The former issue being that
# it reads the content of the file of ALL the
# attachments on each loop.
new_env.clear()
attachment = attachment_model_env.browse(attachment_id)
attachment._move_attachment_to_s3()
except psycopg2.OperationalError:
_logger.error('Could not migrate attachment %s to S3',
attachment_id)
@contextmanager
def do_in_new_env(self, new_cr=False):
""" Context manager that yields a new environment
Using a new Odoo Environment thus a new PG transaction.
"""
with api.Environment.manage():
if new_cr:
registry = odoo.modules.registry.RegistryManager.get(
self.env.cr.dbname
)
with closing(registry.cursor()) as cr:
try:
yield self.env(cr=cr)
except:
cr.rollback()
raise
else:
# disable pylint error because this is a valid commit,
# we are in a new env
cr.commit() # pylint: disable=invalid-commit
else:
# make a copy
yield self.env()
@api.model
def force_storage(self):
storage = self._storage()
if storage == 's3':
self._force_storage_s3()
else:
return super(IrAttachment, self).force_storage()
+13 -14
View File
@@ -24,7 +24,10 @@ except ImportError:
class IrAttachment(models.Model): class IrAttachment(models.Model):
_inherit = 'ir.attachment' _inherit = 'ir.attachment'
store_name = 'swift' def _get_stores(self):
l = ['swift']
l += super(IrAttachment, self)._get_stores()
return l
@api.model @api.model
def _get_swift_connection(self): def _get_swift_connection(self):
@@ -67,13 +70,11 @@ class IrAttachment(models.Model):
else: else:
return super(IrAttachment, self)._store_file_read(fname, bin_size) return super(IrAttachment, self)._store_file_read(fname, bin_size)
def _store_file_write(self, value, checksum): def _store_file_write(self, key, bin_data):
if self._storage() == self.store_name: if self._storage() == 'swift':
container = os.environ.get('SWIFT_WRITE_CONTAINER') container = os.environ.get('SWIFT_WRITE_CONTAINER')
conn = self._get_swift_connection() conn = self._get_swift_connection()
conn.put_container(container) conn.put_container(container)
bin_data = value.decode('base64')
key = self._compute_checksum(bin_data)
filename = 'swift://{}/{}'.format(container, key) filename = 'swift://{}/{}'.format(container, key)
try: try:
conn.put_object(container, key, bin_data) conn.put_object(container, key, bin_data)
@@ -82,14 +83,16 @@ class IrAttachment(models.Model):
raise exceptions.UserError(_('Error writing to Swift')) raise exceptions.UserError(_('Error writing to Swift'))
else: else:
_super = super(IrAttachment, self) _super = super(IrAttachment, self)
filename = _super._store_file_write(value, checksum) filename = _super._store_file_write(key, bin_data)
return filename return filename
@api.model @api.model
def _file_delete_from_store(self, fname): def _store_file_delete(self, fname):
if fname.startswith('swift://'): if fname.startswith('swift://'):
swifturi = SwiftUri(fname) swifturi = SwiftUri(fname)
container = swifturi.container() container = swifturi.container()
# delete the file only if it is on the current configured bucket
# otherwise, we might delete files used on a different environment
if container == os.environ.get('SWIFT_WRITE_CONTAINER'): if container == os.environ.get('SWIFT_WRITE_CONTAINER'):
conn = self._get_swift_connection() conn = self._get_swift_connection()
try: try:
@@ -97,11 +100,7 @@ class IrAttachment(models.Model):
except ClientException: except ClientException:
_logger.exception( _logger.exception(
_('Error deleting an object on the Swift store')) _('Error deleting an object on the Swift store'))
raise exceptions.UserError(_('Error deleting on Swift')) # we ignore the error, file will stay on the object
# storage but won't disrupt the process
else: else:
super(IrAttachment, self)._file_delete(fname) super(IrAttachment, self)._file_delete_from_store(fname)
def _get_stores(self):
l = [self.store_name]
l += super(IrAttachment, self)._get_stores()
return l
@@ -87,10 +87,29 @@ class IrAttachment(models.Model):
_super = super(IrAttachment, self) _super = super(IrAttachment, self)
return _super._file_read(fname, bin_size=bin_size) return _super._file_read(fname, bin_size=bin_size)
def _store_file_read(self, fname, bin_size=False):
storage = fname.partition('://')[0]
raise NotImplementedError(
'No implementation for %s' % (storage,)
)
def _store_file_write(self, key, bin_data):
raise NotImplementedError(
'No implementation for %s' % (self.storage(),)
)
def _store_file_delete(self, fname):
storage = fname.partition('://')[0]
raise NotImplementedError(
'No implementation for %s' % (storage,)
)
@api.model @api.model
def _file_write(self, value, checksum): def _file_write(self, value, checksum):
if self._storage() in self._get_stores(): if self._storage() in self._get_stores():
filename = self._store_file_write(value, checksum) bin_data = value.decode('base64')
key = self._compute_checksum(bin_data)
filename = self._store_file_write(key, bin_data)
else: else:
filename = super(IrAttachment, self)._file_write(value, checksum) filename = super(IrAttachment, self)._file_write(value, checksum)
return filename return filename
@@ -99,6 +118,8 @@ class IrAttachment(models.Model):
def _file_delete(self, fname): def _file_delete(self, fname):
if self._is_file_from_a_store(fname): if self._is_file_from_a_store(fname):
cr = self.env.cr cr = self.env.cr
# using SQL to include files hidden through unlink or due to record
# rules
cr.execute("SELECT COUNT(*) FROM ir_attachment " cr.execute("SELECT COUNT(*) FROM ir_attachment "
"WHERE store_fname = %s", (fname,)) "WHERE store_fname = %s", (fname,))
count = cr.fetchone()[0] count = cr.fetchone()[0]
@@ -184,16 +205,21 @@ class IrAttachment(models.Model):
storage = self._storage() storage = self._storage()
if storage not in self._get_stores(): if storage not in self._get_stores():
return super(IrAttachment, self).force_storage() return super(IrAttachment, self).force_storage()
self._force_storage_to_object_storage()
@api.model
def _force_storage_to_object_storage(self, new_cr=False):
_logger.info('migrating files to the object storage') _logger.info('migrating files to the object storage')
storage = self._storage()
domain = ['!', ('store_fname', '=like', '{}://%'.format(storage)), domain = ['!', ('store_fname', '=like', '{}://%'.format(storage)),
'|', '|',
('res_field', '=', False), ('res_field', '=', False),
('res_field', '!=', False)] ('res_field', '!=', False)]
# We do a copy of the environment so we can workaround the # We do a copy of the environment so we can workaround the cache issue
# cache issue below. We do not create a new cursor because # below. We do not create a new cursor by default because it causes
# it causes serialization issues due to concurrent updates on # serialization issues due to concurrent updates on attachments during
# attachments during the installation # the installation
with self.do_in_new_env() as new_env: with self.do_in_new_env(new_cr=new_cr) as new_env:
model_env = new_env['ir.attachment'] model_env = new_env['ir.attachment']
ids = model_env.search(domain).ids ids = model_env.search(domain).ids
for attachment_id in ids: for attachment_id in ids: