diff --git a/attachment_s3/__manifest__.py b/attachment_s3/__manifest__.py index 1e7e26e..611b76f 100644 --- a/attachment_s3/__manifest__.py +++ b/attachment_s3/__manifest__.py @@ -9,11 +9,11 @@ 'author': 'Camptocamp,Odoo Community Association (OCA)', 'license': 'AGPL-3', 'category': 'Knowledge Management', - 'depends': ['base'], + 'depends': ['base', 'base_attachment_object_storage'], 'external_dependencies': { 'python': ['boto'], }, - 'website': 'http://www.camptocamp.com', + 'website': 'https://www.camptocamp.com', 'data': [], 'installable': True, } diff --git a/attachment_s3/models/ir_attachment.py b/attachment_s3/models/ir_attachment.py index 9dbff2d..84318db 100644 --- a/attachment_s3/models/ir_attachment.py +++ b/attachment_s3/models/ir_attachment.py @@ -7,12 +7,8 @@ import base64 import logging import os import xml.dom.minidom -from contextlib import closing, contextmanager from functools import partial -import psycopg2 - -import odoo from odoo import _, api, exceptions, models from ..s3uri import S3Uri @@ -30,67 +26,10 @@ except ImportError: class IrAttachment(models.Model): _inherit = "ir.attachment" - @api.multi - def _store_in_db_when_s3(self): - """ Return whether an attachment must be stored in db - - When we are using S3. This is sometimes required because - the object storage is slower than the database/filesystem. - - We store image_small and image_medium from 'Binary' fields - because they should be fast to read as they are often displayed - in kanbans / lists. The same for web_icon_data. - - We store the assets locally as well. Not only for performance, - but also because it improves the portability of the database: - when assets are invalidated, they are deleted so we don't have - an old database with attachments pointing to deleted assets. - - """ - self.ensure_one() - - # assets - if self.res_model == 'ir.ui.view': - # assets are stored in 'ir.ui.view' - return True - - # Binary fields - if self.res_field: - # Binary fields are stored with the name of the field in - # 'res_field' - local_fields = ('image_small', 'image_medium', 'web_icon_data') - # 'image' fields can be rather large and should usually - # not be requests in bulk in lists - if self.res_field and self.res_field in local_fields: - return True - - return False - - def _inverse_datas(self): - # override in order to store files that need fast access, - # we keep them in the database instead of the object storage - location = self._storage() - for attach in self: - if location == 's3' and self._store_in_db_when_s3(): - # compute the fields that depend on datas - value = attach.datas - bin_data = value and value.decode('base64') or '' - vals = { - 'file_size': len(bin_data), - 'checksum': self._compute_checksum(bin_data), - 'db_datas': value, - # we seriously don't need index content on those fields - 'index_content': False, - 'store_fname': False, - } - fname = attach.store_fname - # write as superuser, as user probably does not - # have write access - super(IrAttachment, attach.sudo()).write(vals) - if fname: - self._file_delete(fname) - continue - super(IrAttachment, attach)._inverse_datas() + def _get_stores(self): + l = ['s3'] + l += super(IrAttachment, self)._get_stores() + return l @api.model def _get_s3_bucket(self, name=None): @@ -157,38 +96,32 @@ class IrAttachment(models.Model): return msg @api.model - def _file_read_s3(self, fname, bin_size=False): - s3uri = S3Uri(fname) - try: - bucket = self._get_s3_bucket(name=s3uri.bucket()) - except exceptions.UserError: - _logger.exception( - "error reading attachment '%s' from object storage", fname - ) - return '' - filekey = bucket.get_key(s3uri.item()) - if filekey: - read = base64.b64encode(filekey.get_contents_as_string()) - else: - read = '' - _logger.info("attachment '%s' missing on object storage", fname) - return read - - @api.model - def _file_read(self, fname, bin_size=False): + def _store_file_read(self, fname, bin_size=False): if fname.startswith('s3://'): - return self._file_read_s3(fname, bin_size=bin_size) + s3uri = S3Uri(fname) + try: + bucket = self._get_s3_bucket(name=s3uri.bucket()) + except exceptions.UserError: + _logger.exception( + "error reading attachment '%s' from object storage", fname + ) + return '' + filekey = bucket.get_key(s3uri.item()) + if filekey: + read = base64.b64encode(filekey.get_contents_as_string()) + else: + read = '' + _logger.info( + "attachment '%s' missing on object storage", fname + ) + return read else: - _super = super(IrAttachment, self) - return _super._file_read(fname, bin_size=bin_size) + return super(IrAttachment, self)._store_file_read(fname, bin_size) @api.model - def _file_write(self, value, checksum): - storage = self._storage() - if storage == 's3': + def _store_file_write(self, key, bin_data): + if self._storage() == 's3': bucket = self._get_s3_bucket() - bin_data = value.decode('base64') - key = self._compute_checksum(bin_data) filekey = bucket.get_key(key) or bucket.new_key(key) filename = 's3://%s/%s' % (bucket.name, key) try: @@ -203,18 +136,13 @@ class IrAttachment(models.Model): (self._parse_s3_error(error),) ) else: - filename = super(IrAttachment, self)._file_write(value, checksum) + _super = super(IrAttachment, self) + filename = _super._store_file_write(key, bin_data) return filename @api.model - def _file_delete(self, fname): + def _store_file_delete(self, fname): if fname.startswith('s3://'): - # using SQL to include files hidden through unlink or due to record - # rules - cr = self.env.cr - cr.execute("SELECT COUNT(*) FROM ir_attachment " - "WHERE store_fname = %s", (fname,)) - count = cr.fetchone()[0] s3uri = S3Uri(fname) bucket_name = s3uri.bucket() item_name = s3uri.item() @@ -223,7 +151,7 @@ class IrAttachment(models.Model): if bucket_name == os.environ.get('AWS_BUCKETNAME'): bucket = self._get_s3_bucket() filekey = bucket.get_key(item_name) - if not count and filekey: + if filekey: try: filekey.delete() _logger.info( @@ -236,121 +164,4 @@ class IrAttachment(models.Model): 'Error during deletion of the file %s' % fname ) else: - super(IrAttachment, self)._file_delete(fname) - - @api.multi - def _move_attachment_to_s3(self): - self.ensure_one() - _logger.info('inspecting attachment %s (%d)', - self.name, self.id) - fname = self.store_fname - if fname: - # migrating from filesystem filestore - # or from the old 'store_fname' without the bucket name - _logger.info('moving %s on the object storage', fname) - self.write({'datas': self.datas, - # this is required otherwise the - # mimetype gets overriden with - # 'application/octet-stream' - # on assets - 'mimetype': self.mimetype}) - _logger.info('moved %s on the object storage', fname) - full_path = self._full_path(fname) - _logger.info('cleaning fs self') - if os.path.exists(full_path): - try: - os.unlink(full_path) - except OSError: - _logger.info( - "_file_delete could not unlink %s", - full_path, exc_info=True - ) - except IOError: - # Harmless and needed for race conditions - _logger.info( - "_file_delete could not unlink %s", - full_path, exc_info=True - ) - elif self.db_datas: - _logger.info('moving on the object storage from database') - self.write({'datas': self.datas}) - - @api.model - def _force_storage_s3(self, new_cr=False): - if not self.env['res.users'].browse(self.env.uid)._is_admin(): - raise exceptions.AccessError( - _('Only administrators can execute this action.') - ) - - storage = self._storage() - if storage != 's3': - return - _logger.info('migrating files to the object storage') - domain = ['!', ('store_fname', '=like', 's3://%'), - '|', - ('res_field', '=', False), - ('res_field', '!=', False)] - # We do a copy of the environment so we can workaround the - # cache issue below. We do not create a new cursor because - # it causes serialization issues due to concurrent updates on - # attachments during the installation - with self.do_in_new_env(new_cr=new_cr) as new_env: - attachment_model_env = new_env['ir.attachment'] - ids = attachment_model_env.search(domain).ids - for attachment_id in ids: - try: - with new_env.cr.savepoint(): - # check that no other transaction has - # locked the row, don't send a file to S3 - # in that case - self.env.cr.execute("SELECT id " - "FROM ir_attachment " - "WHERE id = %s " - "FOR UPDATE NOWAIT", - (attachment_id,), - log_exceptions=False) - - # This is a trick to avoid having the 'datas' function - # fields computed for every attachment on each - # iteration of the loop. The former issue being that - # it reads the content of the file of ALL the - # attachments on each loop. - new_env.clear() - attachment = attachment_model_env.browse(attachment_id) - attachment._move_attachment_to_s3() - except psycopg2.OperationalError: - _logger.error('Could not migrate attachment %s to S3', - attachment_id) - - @contextmanager - def do_in_new_env(self, new_cr=False): - """ Context manager that yields a new environment - - Using a new Odoo Environment thus a new PG transaction. - """ - with api.Environment.manage(): - if new_cr: - registry = odoo.modules.registry.RegistryManager.get( - self.env.cr.dbname - ) - with closing(registry.cursor()) as cr: - try: - yield self.env(cr=cr) - except: - cr.rollback() - raise - else: - # disable pylint error because this is a valid commit, - # we are in a new env - cr.commit() # pylint: disable=invalid-commit - else: - # make a copy - yield self.env() - - @api.model - def force_storage(self): - storage = self._storage() - if storage == 's3': - self._force_storage_s3() - else: - return super(IrAttachment, self).force_storage() + super(IrAttachment, self)._store_file_delete(fname) diff --git a/attachment_swift/models/ir_attachment.py b/attachment_swift/models/ir_attachment.py index 37c9e8e..428f4d7 100644 --- a/attachment_swift/models/ir_attachment.py +++ b/attachment_swift/models/ir_attachment.py @@ -24,7 +24,10 @@ except ImportError: class IrAttachment(models.Model): _inherit = 'ir.attachment' - store_name = 'swift' + def _get_stores(self): + l = ['swift'] + l += super(IrAttachment, self)._get_stores() + return l @api.model def _get_swift_connection(self): @@ -67,13 +70,11 @@ class IrAttachment(models.Model): else: return super(IrAttachment, self)._store_file_read(fname, bin_size) - def _store_file_write(self, value, checksum): - if self._storage() == self.store_name: + def _store_file_write(self, key, bin_data): + if self._storage() == 'swift': container = os.environ.get('SWIFT_WRITE_CONTAINER') conn = self._get_swift_connection() conn.put_container(container) - bin_data = value.decode('base64') - key = self._compute_checksum(bin_data) filename = 'swift://{}/{}'.format(container, key) try: conn.put_object(container, key, bin_data) @@ -82,14 +83,16 @@ class IrAttachment(models.Model): raise exceptions.UserError(_('Error writing to Swift')) else: _super = super(IrAttachment, self) - filename = _super._store_file_write(value, checksum) + filename = _super._store_file_write(key, bin_data) return filename @api.model - def _file_delete_from_store(self, fname): + def _store_file_delete(self, fname): if fname.startswith('swift://'): swifturi = SwiftUri(fname) container = swifturi.container() + # delete the file only if it is on the current configured bucket + # otherwise, we might delete files used on a different environment if container == os.environ.get('SWIFT_WRITE_CONTAINER'): conn = self._get_swift_connection() try: @@ -97,11 +100,7 @@ class IrAttachment(models.Model): except ClientException: _logger.exception( _('Error deleting an object on the Swift store')) - raise exceptions.UserError(_('Error deleting on Swift')) + # we ignore the error, file will stay on the object + # storage but won't disrupt the process else: - super(IrAttachment, self)._file_delete(fname) - - def _get_stores(self): - l = [self.store_name] - l += super(IrAttachment, self)._get_stores() - return l + super(IrAttachment, self)._file_delete_from_store(fname) diff --git a/base_attachment_object_storage/models/ir_attachment.py b/base_attachment_object_storage/models/ir_attachment.py index 535986a..9063cc6 100644 --- a/base_attachment_object_storage/models/ir_attachment.py +++ b/base_attachment_object_storage/models/ir_attachment.py @@ -87,10 +87,29 @@ class IrAttachment(models.Model): _super = super(IrAttachment, self) return _super._file_read(fname, bin_size=bin_size) + def _store_file_read(self, fname, bin_size=False): + storage = fname.partition('://')[0] + raise NotImplementedError( + 'No implementation for %s' % (storage,) + ) + + def _store_file_write(self, key, bin_data): + raise NotImplementedError( + 'No implementation for %s' % (self.storage(),) + ) + + def _store_file_delete(self, fname): + storage = fname.partition('://')[0] + raise NotImplementedError( + 'No implementation for %s' % (storage,) + ) + @api.model def _file_write(self, value, checksum): if self._storage() in self._get_stores(): - filename = self._store_file_write(value, checksum) + bin_data = value.decode('base64') + key = self._compute_checksum(bin_data) + filename = self._store_file_write(key, bin_data) else: filename = super(IrAttachment, self)._file_write(value, checksum) return filename @@ -99,6 +118,8 @@ class IrAttachment(models.Model): def _file_delete(self, fname): if self._is_file_from_a_store(fname): cr = self.env.cr + # using SQL to include files hidden through unlink or due to record + # rules cr.execute("SELECT COUNT(*) FROM ir_attachment " "WHERE store_fname = %s", (fname,)) count = cr.fetchone()[0] @@ -184,16 +205,21 @@ class IrAttachment(models.Model): storage = self._storage() if storage not in self._get_stores(): return super(IrAttachment, self).force_storage() + self._force_storage_to_object_storage() + + @api.model + def _force_storage_to_object_storage(self, new_cr=False): _logger.info('migrating files to the object storage') + storage = self._storage() domain = ['!', ('store_fname', '=like', '{}://%'.format(storage)), '|', ('res_field', '=', False), ('res_field', '!=', False)] - # We do a copy of the environment so we can workaround the - # cache issue below. We do not create a new cursor because - # it causes serialization issues due to concurrent updates on - # attachments during the installation - with self.do_in_new_env() as new_env: + # We do a copy of the environment so we can workaround the cache issue + # below. We do not create a new cursor by default because it causes + # serialization issues due to concurrent updates on attachments during + # the installation + with self.do_in_new_env(new_cr=new_cr) as new_env: model_env = new_env['ir.attachment'] ids = model_env.search(domain).ids for attachment_id in ids: