session_redis_public/attachment_s3/models/ir_attachment.py

# -*- coding: utf-8 -*-
# Copyright 2016 Camptocamp SA
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html)


import base64
import inspect
import logging
import os
from contextlib import closing, contextmanager
import io

import psycopg2

import openerp
from openerp import _, api, exceptions, fields, models
from ..s3uri import S3Uri

_logger = logging.getLogger(__name__)

try:
    import boto3
    from botocore.exceptions import ClientError, EndpointConnectionError
except ImportError:
    boto3 = None  # noqa
    ClientError = None  # noqa
    EndpointConnectionError = None  # noqa
    _logger.debug("Cannot 'import boto3'.")


def clean_fs(files):
    _logger.info('cleaning old files from filestore')
    for full_path in files:
        if os.path.exists(full_path):
            try:
                os.unlink(full_path)
            except OSError:
                _logger.info(
                    "_file_delete could not unlink %s",
                    full_path, exc_info=True
                )
            except IOError:
                # Harmless and needed for race conditions
                _logger.info(
                    "_file_delete could not unlink %s",
                    full_path, exc_info=True
                )


class IrAttachment(models.Model):
    _inherit = "ir.attachment"

    # this field is in old API, we need to override the 'inverse'
    # field to modify the behavior when using S3, so we adapt
    # the calls from a new API field
    datas = fields.Binary(
        compute='_compute_datas',
        inverse='_inverse_datas',
        string='File Content',
        nodrop=True,
    )

    @api.depends('store_fname', 'db_datas')
    def _compute_datas(self):
        values = self._data_get('datas', None)
        for attach in self:
            attach.datas = values.get(attach.id)

    def _inverse_datas(self):
        # override in order to store files that need fast access,
        # we keep them in the database instead of the object storage
        location = self._storage()
        for attach in self:
            if location == 's3' and self._store_in_db_when_s3():
                # compute the fields that depend on datas
                value = attach.datas
                bin_data = value and value.decode('base64') or ''
                vals = {
                    'file_size': len(bin_data),
                    'checksum': self._compute_checksum(bin_data),
                    'db_datas': value,
                    # we seriously don't need index content on those fields
                    'index_content': False,
                    'store_fname': False,
                }
                fname = attach.store_fname
                # write as superuser, as user probably does not
                # have write access
                super(IrAttachment, attach.sudo()).write(vals)
                if fname:
                    self._file_delete(fname)
                continue
            attach._data_set('datas', attach.datas, None)

    def _register_hook(self, cr):
        super(IrAttachment, self)._register_hook(cr)
        curframe = inspect.currentframe()
        calframe = inspect.getouterframes(curframe, 2)
        # the caller of _register_hook is 'load_modules' in
        # odoo/modules/loading.py
        # We have to go up 2 stacks because of the old api wrapper
        load_modules_frame = calframe[2][0]
        # 'update_module' is an argument that 'load_modules' receives with a
        # True-ish value meaning that an install or upgrade of addon has been
        # done during the initialization. We need to move the attachments that
        # could have been created or updated in other addons before this addon
        # was loaded
        update_module = load_modules_frame.f_locals.get('update_module')

        # We need to call the migration on the loading of the model because
        # when we are upgrading addons, some of them might add attachments.
        # To be sure they are migrated to the storage we need to call the
        # migration here.
        # Typical example is images of ir.ui.menu which are updated in
        # ir.attachment at every upgrade of the addons
        if update_module:
            env = api.Environment(cr, openerp.SUPERUSER_ID, {})
            env['ir.attachment']._force_storage_s3()

    @api.multi
    def _store_in_db_when_s3(self):
        """ Return whether an attachment must be stored in db

        When we are using S3. This is sometimes required because
        the object storage is slower than the database/filesystem.

        We store image_small and image_medium from 'Binary' fields
        because they should be fast to read as they are often displayed
        in kanbans / lists. The same for web_icon_data.

        We store the assets locally as well. Not only for performance,
        but also because it improves the portability of the database:
        when assets are invalidated, they are deleted so we don't have
        an old database with attachments pointing to deleted assets.

        """
        self.ensure_one()

        # assets
        if self.res_model == 'ir.ui.view':
            # assets are stored in 'ir.ui.view'
            return True

        # Binary fields
        if self.res_field:
            # Binary fields are stored with the name of the field in
            # 'res_field'
            local_fields = ('image_small', 'image_medium', 'web_icon_data')
            # 'image' fields can be rather large and should usually
            # not be requested in bulk in lists
            if self.res_field and self.res_field in local_fields:
                return True

        return False

    @api.model
    def _get_s3_bucket(self, name=None):
        """Connect to S3 and return the bucket

        The following environment variables can be set:
        * ``AWS_HOST``
        * ``AWS_REGION``
        * ``AWS_ACCESS_KEY_ID``
        * ``AWS_SECRET_ACCESS_KEY``
        * ``AWS_BUCKETNAME``

        If a name is provided, we'll read this bucket, otherwise, the bucket
        from the environment variable ``AWS_BUCKETNAME`` will be read.

        """
        host = os.environ.get('AWS_HOST')
        region_name = os.environ.get('AWS_REGION')
        access_key = os.environ.get('AWS_ACCESS_KEY_ID')
        secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY')
        bucket_name = name or os.environ.get('AWS_BUCKETNAME')

        params = {
            'aws_access_key_id': access_key,
            'aws_secret_access_key': secret_key,
        }

        if host:
            params['endpoint_url'] = host
        if region_name:
            params['region_name'] = region_name
        if not (access_key and secret_key and bucket_name):
            msg = _('If you want to read from the %s S3 bucket, the following '
                    'environment variables must be set:\n'
                    '* AWS_ACCESS_KEY_ID\n'
                    '* AWS_SECRET_ACCESS_KEY\n'
                    'If you want to write in the %s S3 bucket, this variable '
                    'must be set as well:\n'
                    '* AWS_BUCKETNAME\n'
                    'Optionally, the S3 host can be changed with:\n'
                    '* AWS_HOST\n'
                    ) % (bucket_name, bucket_name)

            raise exceptions.UserError(msg)
        # try:
        s3 = boto3.resource('s3', **params)
        bucket = s3.Bucket(bucket_name)
        exists = True
        try:
            s3.meta.client.head_bucket(Bucket=bucket_name)
        except ClientError as e:
            # If a client error is thrown, then check that it was a 404 error.
            # If it was a 404 error, then the bucket does not exist.
            error_code = e.response['Error']['Code']
            if error_code == '404':
                exists = False
        except EndpointConnectionError as error:
            # log verbose error from s3, return short message for user
            _logger.exception('Error during connection on S3')
            raise exceptions.UserError(str(error))

        if not exists:
            if not region_name:
                bucket = s3.create_bucket(Bucket=bucket_name)
            else:
                bucket = s3.create_bucket(
                    Bucket=bucket_name,
                    CreateBucketConfiguration={
                        'LocationConstraint': region_name
                    })
        return bucket

    @api.model
    def _file_read_s3(self, fname, bin_size=False):
        s3uri = S3Uri(fname)
        try:
            bucket = self._get_s3_bucket(name=s3uri.bucket())
        except exceptions.UserError:
            _logger.exception(
                "error reading attachment '%s' from object storage", fname
            )
            return ''
        try:
            key = s3uri.item()
            bucket.meta.client.head_object(
                Bucket=bucket.name,  Key=key
            )
            res = io.BytesIO()
            bucket.download_fileobj(key, res)
            res.seek(0)
            read = base64.b64encode(res.read())
        except ClientError:
            read = ''
            _logger.info(
                "attachment '%s' missing on object storage", fname
            )
        return read

    @api.model
    def _file_read(self, fname, bin_size=False):
        if fname.startswith('s3://'):
            return self._file_read_s3(fname, bin_size=bin_size)
        else:
            _super = super(IrAttachment, self)
            return _super._file_read(fname, bin_size=bin_size)

    @api.model
    def _file_write(self, value, checksum):
        storage = self._storage()
        if storage == 's3':
            bucket = self._get_s3_bucket()
            bin_data = value.decode('base64')
            key = self._compute_checksum(bin_data)
            obj = bucket.Object(key=key)
            file = io.BytesIO()
            file.write(bin_data)
            file.seek(0)
            filename = 's3://%s/%s' % (bucket.name, key)
            try:
                obj.upload_fileobj(file)
            except ClientError as error:
                # log verbose error from s3, return short message for user
                _logger.exception(
                    'Error during storage of the file %s' % filename
                )
                raise exceptions.UserError(
                    _('The file could not be stored: %s') % str(error)
                )
        else:
            filename = super(IrAttachment, self)._file_write(value, checksum)
        return filename

    @api.model
    def _file_delete(self, fname):
        if fname.startswith('s3://'):
            s3uri = S3Uri(fname)
            bucket_name = s3uri.bucket()
            item_name = s3uri.item()
            # delete the file only if it is on the current configured bucket
            # otherwise, we might delete files used on a different environment
            if bucket_name == os.environ.get('AWS_BUCKETNAME'):
                bucket = self._get_s3_bucket()
                obj = bucket.Object(key=item_name)
                try:
                    bucket.meta.client.head_object(
                        Bucket=bucket.name, Key=item_name
                    )
                    obj.delete()
                    _logger.info(
                        'file %s deleted on the object storage' % (fname,)
                    )
                except ClientError:
                    # log verbose error from s3, return short message for
                    # user
                    _logger.exception(
                        'Error during deletion of the file %s' % fname
                    )
        else:
            super(IrAttachment, self)._file_delete(fname)

    @api.multi
    def _move_attachment_to_s3(self):
        self.ensure_one()
        _logger.info('inspecting attachment %s (%d)',
                     self.name, self.id)
        fname = self.store_fname
        if fname:
            # migrating from filesystem filestore
            # or from the old 'store_fname' without the bucket name
            _logger.info('moving %s on the object storage', fname)
            self.write({'datas': self.datas,
                        # this is required otherwise the
                        # mimetype gets overriden with
                        # 'application/octet-stream'
                        # on assets
                        'mimetype': self.mimetype})
            _logger.info('moved %s on the object storage', fname)
            return self._full_path(fname)
        elif self.db_datas:
            _logger.info('moving on the object storage from database')
            self.write({'datas': self.datas})

    @api.model
    def _force_storage_s3(self, new_cr=False):
        if not self.env['res.users'].browse(self.env.uid)._is_admin():
            raise exceptions.AccessError(
                _('Only administrators can execute this action.')
            )

        storage = self._storage()
        if storage != 's3':
            return
        _logger.info('migrating files to the object storage')
        # The weird "res_field = False OR res_field != False" domain
        # is required! It's because of an override of _search in ir.attachment
        # which adds ('res_field', '=', False) when the domain does not
        # contain 'res_field'.
        # https://github.com/odoo/odoo/blob/9032617120138848c63b3cfa5d1913c5e5ad76db/odoo/addons/base/ir/ir_attachment.py#L344-L347
        domain = ['!', ('store_fname', '=like', 's3://%'),
                  '|',
                  ('res_field', '=', False),
                  ('res_field', '!=', False)]
        # We do a copy of the environment so we can workaround the
        # cache issue below. We do not create a new cursor because
        # it causes serialization issues due to concurrent updates on
        # attachments during the installation
        with self.do_in_new_env(new_cr=new_cr) as new_env:
            attachment_model_env = new_env['ir.attachment']
            ids = attachment_model_env.search(domain).ids
            files_to_clean = []
            for attachment_id in ids:
                try:
                    with new_env.cr.savepoint():
                        # check that no other transaction has
                        # locked the row, don't send a file to S3
                        # in that case
                        self.env.cr.execute("SELECT id "
                                            "FROM ir_attachment "
                                            "WHERE id = %s "
                                            "FOR UPDATE NOWAIT",
                                            (attachment_id,),
                                            log_exceptions=False)

                        # This is a trick to avoid having the 'datas' function
                        # fields computed for every attachment on each
                        # iteration of the loop.  The former issue being that
                        # it reads the content of the file of ALL the
                        # attachments on each loop.
                        new_env.clear()
                        attachment = attachment_model_env.browse(attachment_id)
                        path = attachment._move_attachment_to_s3()
                        if path:
                            files_to_clean.append(path)
                except psycopg2.OperationalError:
                    _logger.error('Could not migrate attachment %s to S3',
                                  attachment_id)

            def clean():
                clean_fs(files_to_clean)

            # delete the files from the filesystem once we know the changes
            # have been committed in ir.attachment
            if files_to_clean:
                new_env.cr.after('commit', clean)

    @contextmanager
    def do_in_new_env(self, new_cr=False):
        """ Context manager that yields a new environment

        Using a new Odoo Environment thus a new PG transaction.
        """
        with api.Environment.manage():
            if new_cr:
                registry = openerp.modules.registry.RegistryManager.get(
                    self.env.cr.dbname
                )
                with closing(registry.cursor()) as cr:
                    try:
                        yield self.env(cr=cr)
                    except Exception:
                        cr.rollback()
                        raise
                    else:
                        # disable pylint error because this is a valid commit,
                        # we are in a new env
                        cr.commit()  # pylint: disable=invalid-commit
            else:
                # make a copy
                yield self.env()

    @api.model
    def force_storage(self):
        storage = self._storage()
        if storage == 's3':
            self._force_storage_s3()
        else:
            return super(IrAttachment, self).force_storage()