swift/swift/obj/updater.py

# Copyright (c) 2010-2012 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import six.moves.cPickle as pickle
import errno
import os
import signal
import sys
import time
from random import random, shuffle

from eventlet import spawn, Timeout

from swift.common.bufferedhttp import http_connect
from swift.common.constraints import check_drive
from swift.common.exceptions import ConnectionTimeout
from swift.common.ring import Ring
from swift.common.utils import get_logger, renamer, write_pickle, \
    dump_recon_cache, config_true_value, RateLimitedIterator, split_path, \
    eventlet_monkey_patch, get_redirect_data, ContextPool
from swift.common.daemon import Daemon
from swift.common.header_key_dict import HeaderKeyDict
from swift.common.storage_policy import split_policy_string, PolicyError
from swift.common.recon import RECON_OBJECT_FILE, DEFAULT_RECON_CACHE_PATH
from swift.obj.diskfile import get_tmp_dir, ASYNCDIR_BASE
from swift.common.http import is_success, HTTP_INTERNAL_SERVER_ERROR, \
    HTTP_MOVED_PERMANENTLY


class SweepStats(object):
    """
    Stats bucket for an update sweep
    """
    def __init__(self, errors=0, failures=0, quarantines=0, successes=0,
                 unlinks=0, redirects=0):
        self.errors = errors
        self.failures = failures
        self.quarantines = quarantines
        self.successes = successes
        self.unlinks = unlinks
        self.redirects = redirects

    def copy(self):
        return type(self)(self.errors, self.failures, self.quarantines,
                          self.successes, self.unlinks)

    def since(self, other):
        return type(self)(self.errors - other.errors,
                          self.failures - other.failures,
                          self.quarantines - other.quarantines,
                          self.successes - other.successes,
                          self.unlinks - other.unlinks,
                          self.redirects - other.redirects)

    def reset(self):
        self.errors = 0
        self.failures = 0
        self.quarantines = 0
        self.successes = 0
        self.unlinks = 0
        self.redirects = 0

    def __str__(self):
        keys = (
            (self.successes, 'successes'),
            (self.failures, 'failures'),
            (self.quarantines, 'quarantines'),
            (self.unlinks, 'unlinks'),
            (self.errors, 'errors'),
            (self.redirects, 'redirects'),
        )
        return ', '.join('%d %s' % pair for pair in keys)


class ObjectUpdater(Daemon):
    """Update object information in container listings."""

    def __init__(self, conf, logger=None):
        self.conf = conf
        self.logger = logger or get_logger(conf, log_route='object-updater')
        self.devices = conf.get('devices', '/srv/node')
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.interval = float(conf.get('interval', 300))
        self.container_ring = None
        self.concurrency = int(conf.get('concurrency', 8))
        self.updater_workers = int(conf.get('updater_workers', 1))
        if 'slowdown' in conf:
            self.logger.warning(
                'The slowdown option is deprecated in favor of '
                'objects_per_second. This option may be ignored in a '
                'future release.')
            objects_per_second = 1 / (
                float(conf.get('slowdown', '0.01')) + 0.01)
        else:
            objects_per_second = 50
        self.objects_running_time = 0
        self.max_objects_per_second = \
            float(conf.get('objects_per_second',
                           objects_per_second))
        self.node_timeout = float(conf.get('node_timeout', 10))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.report_interval = float(conf.get('report_interval', 300))
        self.recon_cache_path = conf.get('recon_cache_path',
                                         DEFAULT_RECON_CACHE_PATH)
        self.rcache = os.path.join(self.recon_cache_path, RECON_OBJECT_FILE)
        self.stats = SweepStats()

    def _listdir(self, path):
        try:
            return os.listdir(path)
        except OSError as e:
            self.stats.errors += 1
            self.logger.increment('errors')
            self.logger.error('ERROR: Unable to access %(path)s: '
                              '%(error)s',
                              {'path': path, 'error': e})
            return []

    def get_container_ring(self):
        """Get the container ring.  Load it, if it hasn't been yet."""
        if not self.container_ring:
            self.container_ring = Ring(self.swift_dir, ring_name='container')
        return self.container_ring

    def run_forever(self, *args, **kwargs):
        """Run the updater continuously."""
        time.sleep(random() * self.interval)
        while True:
            self.logger.info('Begin object update sweep')
            begin = time.time()
            pids = []
            # read from container ring to ensure it's fresh
            self.get_container_ring().get_nodes('')
            for device in self._listdir(self.devices):
                try:
                    dev_path = check_drive(self.devices, device,
                                           self.mount_check)
                except ValueError as err:
                    # We don't count this as an error. The occasional
                    # unmounted drive is part of normal cluster operations,
                    # so a simple warning is sufficient.
                    self.logger.warning('Skipping: %s', err)
                    continue
                while len(pids) >= self.updater_workers:
                    pids.remove(os.wait()[0])
                pid = os.fork()
                if pid:
                    pids.append(pid)
                else:
                    signal.signal(signal.SIGTERM, signal.SIG_DFL)
                    eventlet_monkey_patch()
                    self.stats.reset()
                    forkbegin = time.time()
                    self.object_sweep(dev_path)
                    elapsed = time.time() - forkbegin
                    self.logger.info(
                        ('Object update sweep of %(device)s '
                         'completed: %(elapsed).02fs, %(stats)s'),
                        {'device': device, 'elapsed': elapsed,
                         'stats': self.stats})
                    sys.exit()
            while pids:
                pids.remove(os.wait()[0])
            elapsed = time.time() - begin
            self.logger.info('Object update sweep completed: %.02fs',
                             elapsed)
            dump_recon_cache({'object_updater_sweep': elapsed},
                             self.rcache, self.logger)
            if elapsed < self.interval:
                time.sleep(self.interval - elapsed)

    def run_once(self, *args, **kwargs):
        """Run the updater once."""
        self.logger.info('Begin object update single threaded sweep')
        begin = time.time()
        self.stats.reset()
        for device in self._listdir(self.devices):
            try:
                dev_path = check_drive(self.devices, device, self.mount_check)
            except ValueError as err:
                # We don't count this as an error. The occasional unmounted
                # drive is part of normal cluster operations, so a simple
                # warning is sufficient.
                self.logger.warning('Skipping: %s', err)
                continue
            self.object_sweep(dev_path)
        elapsed = time.time() - begin
        self.logger.info(
            ('Object update single-threaded sweep completed: '
             '%(elapsed).02fs, %(stats)s'),
            {'elapsed': elapsed, 'stats': self.stats})
        dump_recon_cache({'object_updater_sweep': elapsed},
                         self.rcache, self.logger)

    def _iter_async_pendings(self, device):
        """
        Locate and yield all the async pendings on the device. Multiple updates
        for the same object will come out in reverse-chronological order
        (i.e. newest first) so that callers can skip stale async_pendings.

        Tries to clean up empty directories as it goes.
        """
        # loop through async pending dirs for all policies
        for asyncdir in self._listdir(device):
            # we only care about directories
            async_pending = os.path.join(device, asyncdir)
            if not asyncdir.startswith(ASYNCDIR_BASE):
                # skip stuff like "accounts", "containers", etc.
                continue
            if not os.path.isdir(async_pending):
                continue
            try:
                base, policy = split_policy_string(asyncdir)
            except PolicyError as e:
                # This isn't an error, but a misconfiguration. Logging a
                # warning should be sufficient.
                self.logger.warning('Directory %(directory)r does not map '
                                    'to a valid policy (%(error)s)', {
                                        'directory': asyncdir, 'error': e})
                continue
            prefix_dirs = self._listdir(async_pending)
            shuffle(prefix_dirs)
            for prefix in prefix_dirs:
                prefix_path = os.path.join(async_pending, prefix)
                if not os.path.isdir(prefix_path):
                    continue
                last_obj_hash = None
                for update in sorted(self._listdir(prefix_path), reverse=True):
                    update_path = os.path.join(prefix_path, update)
                    if not os.path.isfile(update_path):
                        continue
                    try:
                        obj_hash, timestamp = update.split('-')
                    except ValueError:
                        self.stats.errors += 1
                        self.logger.increment('errors')
                        self.logger.error(
                            'ERROR async pending file with unexpected '
                            'name %s', update_path)
                        continue
                    # Async pendings are stored on disk like this:
                    #
                    # <device>/async_pending/<suffix>/<obj_hash>-<timestamp>
                    #
                    # If there are multiple updates for a given object,
                    # they'll look like this:
                    #
                    # <device>/async_pending/<obj_suffix>/<obj_hash>-<timestamp1>
                    # <device>/async_pending/<obj_suffix>/<obj_hash>-<timestamp2>
                    # <device>/async_pending/<obj_suffix>/<obj_hash>-<timestamp3>
                    #
                    # Async updates also have the property that newer
                    # updates contain all the information in older updates.
                    # Since we sorted the directory listing in reverse
                    # order, we'll see timestamp3 first, yield it, and then
                    # unlink timestamp2 and timestamp1 since we know they
                    # are obsolete.
                    #
                    # This way, our caller only gets useful async_pendings.
                    if obj_hash == last_obj_hash:
                        self.stats.unlinks += 1
                        self.logger.increment('unlinks')
                        try:
                            os.unlink(update_path)
                        except OSError as e:
                            if e.errno != errno.ENOENT:
                                raise
                    else:
                        last_obj_hash = obj_hash
                        yield {'device': device, 'policy': policy,
                               'path': update_path,
                               'obj_hash': obj_hash, 'timestamp': timestamp}

    def object_sweep(self, device):
        """
        If there are async pendings on the device, walk each one and update.

        :param device: path to device
        """
        start_time = time.time()
        last_status_update = start_time
        start_stats = self.stats.copy()
        my_pid = os.getpid()
        self.logger.info("Object update sweep starting on %s (pid: %d)",
                         device, my_pid)

        ap_iter = RateLimitedIterator(
            self._iter_async_pendings(device),
            elements_per_second=self.max_objects_per_second)
        with ContextPool(self.concurrency) as pool:
            for update in ap_iter:
                pool.spawn(self.process_object_update,
                           update['path'], update['device'], update['policy'])
                now = time.time()
                if now - last_status_update >= self.report_interval:
                    this_sweep = self.stats.since(start_stats)
                    self.logger.info(
                        ('Object update sweep progress on %(device)s: '
                         '%(elapsed).02fs, %(stats)s (pid: %(pid)d)'),
                        {'device': device,
                         'elapsed': now - start_time,
                         'pid': my_pid,
                         'stats': this_sweep})
                    last_status_update = now
            pool.waitall()

        self.logger.timing_since('timing', start_time)
        sweep_totals = self.stats.since(start_stats)
        self.logger.info(
            ('Object update sweep completed on %(device)s '
             'in %(elapsed).02fs seconds:, '
             '%(successes)d successes, %(failures)d failures, '
             '%(quarantines)d quarantines, '
             '%(unlinks)d unlinks, %(errors)d errors, '
             '%(redirects)d redirects '
             '(pid: %(pid)d)'),
            {'device': device,
             'elapsed': time.time() - start_time,
             'pid': my_pid,
             'successes': sweep_totals.successes,
             'failures': sweep_totals.failures,
             'quarantines': sweep_totals.quarantines,
             'unlinks': sweep_totals.unlinks,
             'errors': sweep_totals.errors,
             'redirects': sweep_totals.redirects})

    def process_object_update(self, update_path, device, policy):
        """
        Process the object information to be updated and update.

        :param update_path: path to pickled object update file
        :param device: path to device
        :param policy: storage policy of object update
        """
        try:
            update = pickle.load(open(update_path, 'rb'))
        except Exception as e:
            if getattr(e, 'errno', None) == errno.ENOENT:
                return
            self.logger.exception(
                'ERROR Pickle problem, quarantining %s', update_path)
            self.stats.quarantines += 1
            self.logger.increment('quarantines')
            target_path = os.path.join(device, 'quarantined', 'objects',
                                       os.path.basename(update_path))
            renamer(update_path, target_path, fsync=False)
            try:
                # If this was the last async_pending in the directory,
                # then this will succeed. Otherwise, it'll fail, and
                # that's okay.
                os.rmdir(os.path.dirname(update_path))
            except OSError:
                pass
            return

        def do_update():
            successes = update.get('successes', [])
            headers_out = HeaderKeyDict(update['headers'].copy())
            headers_out['user-agent'] = 'object-updater %s' % os.getpid()
            headers_out.setdefault('X-Backend-Storage-Policy-Index',
                                   str(int(policy)))
            headers_out.setdefault('X-Backend-Accept-Redirect', 'true')
            headers_out.setdefault('X-Backend-Accept-Quoted-Location', 'true')
            container_path = update.get('container_path')
            if container_path:
                acct, cont = split_path('/' + container_path, minsegs=2)
            else:
                acct, cont = update['account'], update['container']
            part, nodes = self.get_container_ring().get_nodes(acct, cont)
            obj = '/%s/%s/%s' % (acct, cont, update['obj'])
            events = [spawn(self.object_update,
                            node, part, update['op'], obj, headers_out)
                      for node in nodes if node['id'] not in successes]
            success = True
            new_successes = rewrite_pickle = False
            redirect = None
            redirects = set()
            for event in events:
                event_success, node_id, redirect = event.wait()
                if event_success is True:
                    successes.append(node_id)
                    new_successes = True
                else:
                    success = False
                if redirect:
                    redirects.add(redirect)

            if success:
                self.stats.successes += 1
                self.logger.increment('successes')
                self.logger.debug('Update sent for %(obj)s %(path)s',
                                  {'obj': obj, 'path': update_path})
                self.stats.unlinks += 1
                self.logger.increment('unlinks')
                os.unlink(update_path)
                try:
                    # If this was the last async_pending in the directory,
                    # then this will succeed. Otherwise, it'll fail, and
                    # that's okay.
                    os.rmdir(os.path.dirname(update_path))
                except OSError:
                    pass
            elif redirects:
                # erase any previous successes
                update.pop('successes', None)
                redirect = max(redirects, key=lambda x: x[-1])[0]
                redirect_history = update.setdefault('redirect_history', [])
                if redirect in redirect_history:
                    # force next update to be sent to root, reset history
                    update['container_path'] = None
                    update['redirect_history'] = []
                else:
                    update['container_path'] = redirect
                    redirect_history.append(redirect)
                self.stats.redirects += 1
                self.logger.increment("redirects")
                self.logger.debug(
                    'Update redirected for %(obj)s %(path)s to %(shard)s',
                    {'obj': obj, 'path': update_path,
                     'shard': update['container_path']})
                rewrite_pickle = True
            else:
                self.stats.failures += 1
                self.logger.increment('failures')
                self.logger.debug('Update failed for %(obj)s %(path)s',
                                  {'obj': obj, 'path': update_path})
                if new_successes:
                    update['successes'] = successes
                    rewrite_pickle = True

            return rewrite_pickle, redirect

        rewrite_pickle, redirect = do_update()
        if redirect:
            # make one immediate retry to the redirect location
            rewrite_pickle, redirect = do_update()
        if rewrite_pickle:
            write_pickle(update, update_path, os.path.join(
                device, get_tmp_dir(policy)))

    def object_update(self, node, part, op, obj, headers_out):
        """
        Perform the object update to the container

        :param node: node dictionary from the container ring
        :param part: partition that holds the container
        :param op: operation performed (ex: 'PUT' or 'DELETE')
        :param obj: object name being updated
        :param headers_out: headers to send with the update
        :return: a tuple of (``success``, ``node_id``, ``redirect``)
            where ``success`` is True if the update succeeded, ``node_id`` is
            the_id of the node updated and ``redirect`` is either None or a
            tuple of (a path, a timestamp string).
        """
        redirect = None
        try:
            with ConnectionTimeout(self.conn_timeout):
                conn = http_connect(
                    node['replication_ip'], node['replication_port'],
                    node['device'], part, op, obj, headers_out)
            with Timeout(self.node_timeout):
                resp = conn.getresponse()
                resp.read()

            if resp.status == HTTP_MOVED_PERMANENTLY:
                try:
                    redirect = get_redirect_data(resp)
                except ValueError as err:
                    self.logger.error(
                        'Container update failed for %r; problem with '
                        'redirect location: %s' % (obj, err))

            success = is_success(resp.status)
            if not success:
                self.logger.debug(
                    'Error code %(status)d is returned from remote '
                    'server %(ip)s: %(port)s / %(device)s',
                    {'status': resp.status, 'ip': node['replication_ip'],
                     'port': node['replication_port'],
                     'device': node['device']})
            return success, node['id'], redirect
        except (Exception, Timeout):
            self.logger.exception(
                'ERROR with remote server '
                '%(replication_ip)s:%(replication_port)s/%(device)s', node)
        return HTTP_INTERNAL_SERVER_ERROR, node['id'], redirect