diff --git a/attachment_s3/README.rst b/attachment_s3/README.rst new file mode 100644 index 0000000..409fa30 --- /dev/null +++ b/attachment_s3/README.rst @@ -0,0 +1,33 @@ +Attachments on S3 storage +========================= + +This addon allows to store the attachments (documents and assets) on S3 or any +other S3-compatible Object Storage. + +Configuration +------------- + +With system parameters: + +* Create or set the system parameter with the key ``ir_attachment.location`` + and the value in the form ``s3://:@`` +* If the host is not AWS services, you can set the key + ``ir_attachment.location.s3host`` to the hostname of the Object Storage + service + +With environment variables: + +* Create or set the system parameter with the key ``ir_attachment.location`` + to ``s3://`` and configure the following environment variables: + * ``AWS_HOST`` (not required if using AWS services) + * ``AWS_ACCESS_KEY_ID`` + * ``AWS_SECRET_ACCESS_KEY`` + * ``AWS_BUCKETNAME`` + +Limitations +----------- + +When the addon is installed, files have already been created in the filesystem +or in the database. The addon won't automatically move them over the Object +storage. You can move them by calling the method ``force_storage`` on +``ir.attachment`` though (it might take time if you have many attachments). diff --git a/attachment_s3/__init__.py b/attachment_s3/__init__.py new file mode 100644 index 0000000..cde864b --- /dev/null +++ b/attachment_s3/__init__.py @@ -0,0 +1,3 @@ +# -*- coding: utf-8 -*- + +from . import models diff --git a/attachment_s3/__openerp__.py b/attachment_s3/__openerp__.py new file mode 100644 index 0000000..8841fea --- /dev/null +++ b/attachment_s3/__openerp__.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Camptocamp SA +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html) + + +{'name': 'Attachments on S3 storage', + 'summary': 'Store assets and attachments on a S3 compatible object storage', + 'version': '9.0.1.0.0', + 'author': 'Camptocamp', + 'license': 'AGPL-3', + 'category': 'Knowledge Management', + 'depends': ['base'], + 'external_dependencies': { + 'python': ['boto'], + }, + 'website': 'http://www.camptocamp.com', + 'data': [], + 'installable': True, + } diff --git a/attachment_s3/models/__init__.py b/attachment_s3/models/__init__.py new file mode 100644 index 0000000..d44b7af --- /dev/null +++ b/attachment_s3/models/__init__.py @@ -0,0 +1,2 @@ +# -*- coding: utf-8 -*- +from . import ir_attachment diff --git a/attachment_s3/models/ir_attachment.py b/attachment_s3/models/ir_attachment.py new file mode 100644 index 0000000..80dbe63 --- /dev/null +++ b/attachment_s3/models/ir_attachment.py @@ -0,0 +1,242 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Camptocamp SA +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html) + + +import base64 +import logging +import os +import xml.dom.minidom +from functools import partial + +import boto +from boto.exception import S3ResponseError + +from openerp import _, api, exceptions, models + +_logger = logging.getLogger(__name__) + + +class IrAttachment(models.Model): + _inherit = "ir.attachment" + + @api.model + def _get_s3_bucket(self, bucket_url): + """Connect to S3 and return the bucket + + It expects the ``bucket_url`` to be in the form: + + ``s3://:@`` + + Alternatively, we can also use environment variables, in that case, + you must set the url to ``s3://``. + + If the S3 provider is not AWS, the key + ``ir_attachment.location.s3host`` can be configured in the System + Parameters with the hostname. + + The following environment variable can be set: + * ``AWS_HOST`` + * ``AWS_ACCESS_KEY_ID`` + * ``AWS_SECRET_ACCESS_KEY`` + * ``AWS_BUCKETNAME`` + + """ + assert bucket_url.startswith('s3://') + host = os.environ.get('AWS_HOST') + if not host: + host = self.env['ir.config_parameter'].get_param( + 'ir_attachment.location.s3host', default=None + ) + if host: + connect_s3 = partial(boto.connect_s3, host=host) + else: + connect_s3 = boto.connect_s3 + + if bucket_url == 's3://': + access_key = os.environ.get('AWS_ACCESS_KEY_ID') + secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY') + bucket_name = os.environ.get('AWS_BUCKETNAME') + if not (access_key and secret_key and bucket_name): + raise exceptions.UserError( + _('The following environment variables must be set:\n' + '* AWS_ACCESS_KEY_ID\n' + '* AWS_SECRET_ACCESS_KEY\n' + '* AWS_BUCKETNAME\n' + ) + ) + else: + malformed_msg = _( + 'S3 bucket %s is malformed, the expected form is ' + 's3://:@' + ) + params = bucket_url[5:].split('@') + if not len(params) == 2: + raise exceptions.UserError(malformed_msg % bucket_url) + keys, bucket_name = params + keys = keys.split(':') + if not len(keys) == 2: + raise exceptions.UserError(malformed_msg % bucket_url) + access_key, secret_key = keys + + try: + conn = connect_s3(aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + + except S3ResponseError as error: + # log verbose error from s3, return short message for user + _logger.exception('Error during connection on S3') + raise exceptions.UserError(self._parse_s3_error(error)) + + bucket = conn.lookup(bucket_name) + if not bucket: + bucket = conn.create_bucket(bucket_name) + return bucket + + @staticmethod + def _parse_s3_error(s3error): + msg = s3error.reason + # S3 error message is a XML message... + doc = xml.dom.minidom.parseString(s3error.body) + msg_node = doc.getElementsByTagName('Message') + if msg_node: + msg = '%s: %s' % (msg, msg_node[0].childNodes[0].data) + return msg + + @api.model + def _file_read(self, fname, bin_size=False): + _super = super(IrAttachment, self) + storage = self._storage() + if storage.startswith('s3://'): + bucket = self._get_s3_bucket(storage) + filekey = bucket.get_key(fname) + if filekey: + read = base64.b64encode(filekey.get_contents_as_string()) + else: + # If the attachment has been created before the installation + # of the addon, it might be stored on the filesystem. + # Fallback on the filesystem read. + # Consider running ``force_storage()`` to move all the + # attachments on the Object Storage + try: + read = _super._file_read(fname, bin_size=bin_size) + except (IOError, OSError): + # File is missing + return '' + else: + read = _super._file_read(fname, bin_size=bin_size) + return read + + @api.model + def _file_write(self, value, checksum): + storage = self._storage() + if storage.startswith('s3://'): + bucket = self._get_s3_bucket(storage) + bin_data = value.decode('base64') + filename = self._compute_checksum(bin_data) + filekey = bucket.get_key(filename) or bucket.new_key(filename) + try: + filekey.set_contents_from_string(bin_data) + except S3ResponseError as error: + # log verbose error from s3, return short message for user + _logger.exception( + 'Error during storage of the file %s' % filename + ) + raise exceptions.UserError( + _('The file could not be stored: %s') % + (self._parse_s3_error(error),) + ) + else: + filename = super(IrAttachment, self)._file_write(value, checksum) + return filename + + @api.model + def _file_delete(self, fname): + storage = self._storage() + if storage.startswith('s3://'): + bucket = self._get_s3_bucket(storage) + filekey = bucket.get_key(fname) + if filekey: + try: + filekey.delete() + except S3ResponseError as error: + # log verbose error from s3, return short message for user + _logger.exception( + 'Error during deletion of the file %s' % fname + ) + raise exceptions.UserError( + _('The file could not be deleted: %s') % + (self._parse_s3_error(error),) + ) + else: + # If the attachment has been created before the installation + # of the addon, it might be stored on the filesystem. + # Fallback on the filesystem delete method. + # Consider running ``force_storage()`` to move all the + # attachments on the Object Storage + super(IrAttachment, self)._file_delete(fname) + else: + super(IrAttachment, self)._file_delete(fname) + + @api.model + def force_storage(self): + if not self.env['res.users'].browse(self.env.uid)._is_admin(): + raise exceptions.AccessError( + _('Only administrators can execute this action.') + ) + + storage = self._storage() + if storage.startswith('s3://'): + _logger.info('migrating files to the object storage') + s3_bucket = self._get_s3_bucket(storage) + domain = ['|', + ('res_field', '=', False), + ('res_field', '!=', False)] + ids = self.search(domain).ids + for attachment_id in ids: + # This is a trick to avoid having the 'datas' function fields + # computed for every attachment on each iteration of the loop. + # The former issue being that it reads the content of the file + # of ALL the attachments on each loop. + self.env.clear() + attachment = self.browse(attachment_id) + _logger.info('inspecting attachment %s (%d)', + attachment.name, attachment.id) + fname = attachment.store_fname + if fname: + # migrating from filestore + s3_key = s3_bucket.get_key(fname) + if s3_key: + _logger.info('file %s already on the object storage', + fname) + else: + _logger.info('moving %s on the object storage', fname) + attachment.write({'datas': attachment.datas, + # this is required otherwise the + # mimetype gets overriden with + # 'application/octet-stream' + # on assets + 'mimetype': attachment.mimetype}) + _logger.info('moved %s on the object storage', fname) + full_path = self._full_path(fname) + _logger.info('cleaning fs attachment') + if os.path.exists(full_path): + try: + os.unlink(full_path) + except OSError: + _logger.info( + "_file_delete could not unlink %s", + full_path, exc_info=True + ) + except IOError: + # Harmless and needed for race conditions + _logger.info( + "_file_delete could not unlink %s", + full_path, exc_info=True + ) + elif attachment.db_datas: + _logger.info('moving on the object storage from database') + attachment.write({'datas': attachment.datas}) + self.env.cr.commit() + else: + return super(IrAttachment, self).force_storage()