Merge "Fixes backup with multiple volume backend"

This commit is contained in:
Jenkins 2013-10-01 21:00:49 +00:00 committed by Gerrit Code Review
commit a2673b0feb
4 changed files with 166 additions and 76 deletions

View File

@ -84,14 +84,14 @@ class API(base.Base):
return backups return backups
def _is_backup_service_enabled(self, volume): def _is_backup_service_enabled(self, volume, volume_host):
"""Check if there is an backup service available""" """Check if there is an backup service available"""
topic = CONF.backup_topic topic = CONF.backup_topic
ctxt = context.get_admin_context() ctxt = context.get_admin_context()
services = self.db.service_get_all_by_topic(ctxt, topic) services = self.db.service_get_all_by_topic(ctxt, topic)
for srv in services: for srv in services:
if (srv['availability_zone'] == volume['availability_zone'] and if (srv['availability_zone'] == volume['availability_zone'] and
srv['host'] == volume['host'] and not srv['disabled'] and srv['host'] == volume_host and not srv['disabled'] and
utils.service_is_up(srv)): utils.service_is_up(srv)):
return True return True
return False return False
@ -104,7 +104,8 @@ class API(base.Base):
if volume['status'] != "available": if volume['status'] != "available":
msg = _('Volume to be backed up must be available') msg = _('Volume to be backed up must be available')
raise exception.InvalidVolume(reason=msg) raise exception.InvalidVolume(reason=msg)
if not self._is_backup_service_enabled(volume): volume_host = volume['host'].partition('@')[0]
if not self._is_backup_service_enabled(volume, volume_host):
raise exception.ServiceNotFound(service_id='cinder-backup') raise exception.ServiceNotFound(service_id='cinder-backup')
self.db.volume_update(context, volume_id, {'status': 'backing-up'}) self.db.volume_update(context, volume_id, {'status': 'backing-up'})
@ -117,9 +118,7 @@ class API(base.Base):
'status': 'creating', 'status': 'creating',
'container': container, 'container': container,
'size': volume['size'], 'size': volume['size'],
# TODO(DuncanT): This will need de-managling once 'host': volume_host, }
# multi-backend lands
'host': volume['host'], }
backup = self.db.backup_create(context, options) backup = self.db.backup_create(context, options)

View File

@ -43,7 +43,6 @@ from cinder.openstack.common import importutils
from cinder.openstack.common import log as logging from cinder.openstack.common import log as logging
from cinder import utils from cinder import utils
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
backup_manager_opts = [ backup_manager_opts = [
@ -70,12 +69,10 @@ class BackupManager(manager.SchedulerDependentManager):
def __init__(self, service_name=None, *args, **kwargs): def __init__(self, service_name=None, *args, **kwargs):
self.service = importutils.import_module(self.driver_name) self.service = importutils.import_module(self.driver_name)
self.az = CONF.storage_availability_zone self.az = CONF.storage_availability_zone
self.volume_manager = importutils.import_object( self.volume_managers = {}
CONF.volume_manager) self._setup_volume_drivers()
self.driver = self.volume_manager.driver
super(BackupManager, self).__init__(service_name='backup', super(BackupManager, self).__init__(service_name='backup',
*args, **kwargs) *args, **kwargs)
self.driver.db = self.db
@property @property
def driver_name(self): def driver_name(self):
@ -90,40 +87,114 @@ class BackupManager(manager.SchedulerDependentManager):
return mapper[service] return mapper[service]
return service return service
def init_host(self): @property
"""Do any initialization that needs to be run if this is a def driver(self):
standalone service. return self._get_driver()
"""
ctxt = context.get_admin_context() def _get_volume_backend(self, host=None, allow_null_host=False):
LOG.info(_("Starting volume driver %(driver_name)s (%(version)s)") % if host is None:
{'driver_name': self.driver.__class__.__name__, if not allow_null_host:
'version': self.driver.get_version()}) msg = _("NULL host not allowed for volume backend lookup.")
raise exception.BackupFailedToGetVolumeBackend(msg)
else:
LOG.debug(_("Checking hostname '%s' for backend info.") % (host))
part = host.partition('@')
if (part[1] == '@') and (part[2] != ''):
backend = part[2]
LOG.debug("Got backend '%s'." % (backend))
return backend
LOG.info(_("Backend not found in hostname (%s) so using default.") %
(host))
if 'default' not in self.volume_managers:
# For multi-backend we just pick the top of the list.
return self.volume_managers.keys()[0]
return 'default'
def _get_manager(self, backend):
LOG.debug(_("Manager requested for volume_backend '%s'.") %
(backend))
if backend is None:
LOG.debug(_("Fetching default backend."))
backend = self._get_volume_backend(allow_null_host=True)
if backend not in self.volume_managers:
msg = (_("Volume manager for backend '%s' does not exist.") %
(backend))
raise exception.BackupFailedToGetVolumeBackend(msg)
return self.volume_managers[backend]
def _get_driver(self, backend=None):
LOG.debug(_("Driver requested for volume_backend '%s'.") %
(backend))
if backend is None:
LOG.debug(_("Fetching default backend."))
backend = self._get_volume_backend(allow_null_host=True)
mgr = self._get_manager(backend)
mgr.driver.db = self.db
return mgr.driver
def _setup_volume_drivers(self):
if CONF.enabled_backends:
for backend in CONF.enabled_backends:
host = "%s@%s" % (CONF.host, backend)
mgr = importutils.import_object(CONF.volume_manager,
host=host,
service_name=backend)
config = mgr.configuration
backend_name = config.safe_get('volume_backend_name')
LOG.debug(_("Registering backend %(backend)s (host=%(host)s "
"backend_name=%(backend_name)s).") %
{'backend': backend, 'host': host,
'backend_name': backend_name})
self.volume_managers[backend] = mgr
else:
default = importutils.import_object(CONF.volume_manager)
LOG.debug(_("Registering default backend %s.") % (default))
self.volume_managers['default'] = default
def _init_volume_driver(self, ctxt, driver):
LOG.info(_("Starting volume driver %(driver_name)s (%(version)s).") %
{'driver_name': driver.__class__.__name__,
'version': driver.get_version()})
try: try:
self.driver.do_setup(ctxt) driver.do_setup(ctxt)
self.driver.check_for_setup_error() driver.check_for_setup_error()
except Exception as ex: except Exception as ex:
LOG.error(_("Error encountered during " LOG.error(_("Error encountered during initialization of driver: "
"initialization of driver: %(name)s") % "%(name)s.") %
{'name': self.driver.__class__.__name__}) {'name': driver.__class__.__name__})
LOG.exception(ex) LOG.exception(ex)
# we don't want to continue since we failed # we don't want to continue since we failed
# to initialize the driver correctly. # to initialize the driver correctly.
return return
self.driver.set_initialized() driver.set_initialized()
LOG.info(_("Cleaning up incomplete backup operations")) def init_host(self):
"""Do any initialization that needs to be run if this is a
standalone service.
"""
ctxt = context.get_admin_context()
for mgr in self.volume_managers.itervalues():
self._init_volume_driver(ctxt, mgr.driver)
LOG.info(_("Cleaning up incomplete backup operations."))
volumes = self.db.volume_get_all_by_host(ctxt, self.host) volumes = self.db.volume_get_all_by_host(ctxt, self.host)
for volume in volumes: for volume in volumes:
backend = self._get_volume_backend(host=volume['host'])
if volume['status'] == 'backing-up': if volume['status'] == 'backing-up':
LOG.info(_('Resetting volume %s to available ' LOG.info(_('Resetting volume %s to available '
'(was backing-up)') % volume['id']) '(was backing-up).') % volume['id'])
self.volume_manager.detach_volume(ctxt, volume['id']) mgr = self._get_manager(backend)
mgr.detach_volume(ctxt, volume['id'])
if volume['status'] == 'restoring-backup': if volume['status'] == 'restoring-backup':
LOG.info(_('Resetting volume %s to error_restoring ' LOG.info(_('Resetting volume %s to error_restoring '
'(was restoring-backup)') % volume['id']) '(was restoring-backup).') % volume['id'])
self.volume_manager.detach_volume(ctxt, volume['id']) mgr = self._get_manager(backend)
mgr.detach_volume(ctxt, volume['id'])
self.db.volume_update(ctxt, volume['id'], self.db.volume_update(ctxt, volume['id'],
{'status': 'error_restoring'}) {'status': 'error_restoring'})
@ -132,18 +203,18 @@ class BackupManager(manager.SchedulerDependentManager):
backups = self.db.backup_get_all_by_host(ctxt, self.host) backups = self.db.backup_get_all_by_host(ctxt, self.host)
for backup in backups: for backup in backups:
if backup['status'] == 'creating': if backup['status'] == 'creating':
LOG.info(_('Resetting backup %s to error ' LOG.info(_('Resetting backup %s to error (was creating).')
'(was creating)') % backup['id']) % backup['id'])
err = 'incomplete backup reset on manager restart' err = 'incomplete backup reset on manager restart'
self.db.backup_update(ctxt, backup['id'], {'status': 'error', self.db.backup_update(ctxt, backup['id'], {'status': 'error',
'fail_reason': err}) 'fail_reason': err})
if backup['status'] == 'restoring': if backup['status'] == 'restoring':
LOG.info(_('Resetting backup %s to available ' LOG.info(_('Resetting backup %s to available (was restoring).')
'(was restoring)') % backup['id']) % backup['id'])
self.db.backup_update(ctxt, backup['id'], self.db.backup_update(ctxt, backup['id'],
{'status': 'available'}) {'status': 'available'})
if backup['status'] == 'deleting': if backup['status'] == 'deleting':
LOG.info(_('Resuming delete on backup: %s') % backup['id']) LOG.info(_('Resuming delete on backup: %s.') % backup['id'])
self.delete_backup(ctxt, backup['id']) self.delete_backup(ctxt, backup['id'])
@utils.require_driver_initialized @utils.require_driver_initialized
@ -152,9 +223,11 @@ class BackupManager(manager.SchedulerDependentManager):
backup = self.db.backup_get(context, backup_id) backup = self.db.backup_get(context, backup_id)
volume_id = backup['volume_id'] volume_id = backup['volume_id']
volume = self.db.volume_get(context, volume_id) volume = self.db.volume_get(context, volume_id)
LOG.info(_('create_backup started, backup: %(backup_id)s for ' LOG.info(_('Create backup started, backup: %(backup_id)s '
'volume: %(volume_id)s') % 'volume: %(volume_id)s.') %
{'backup_id': backup_id, 'volume_id': volume_id}) {'backup_id': backup_id, 'volume_id': volume_id})
backend = self._get_volume_backend(host=volume['host'])
self.db.backup_update(context, backup_id, {'host': self.host, self.db.backup_update(context, backup_id, {'host': self.host,
'service': 'service':
self.driver_name}) self.driver_name})
@ -162,8 +235,8 @@ class BackupManager(manager.SchedulerDependentManager):
expected_status = 'backing-up' expected_status = 'backing-up'
actual_status = volume['status'] actual_status = volume['status']
if actual_status != expected_status: if actual_status != expected_status:
err = _('create_backup aborted, expected volume status ' err = _('Create backup aborted, expected volume status '
'%(expected_status)s but got %(actual_status)s') % { '%(expected_status)s but got %(actual_status)s.') % {
'expected_status': expected_status, 'expected_status': expected_status,
'actual_status': actual_status, 'actual_status': actual_status,
} }
@ -174,8 +247,8 @@ class BackupManager(manager.SchedulerDependentManager):
expected_status = 'creating' expected_status = 'creating'
actual_status = backup['status'] actual_status = backup['status']
if actual_status != expected_status: if actual_status != expected_status:
err = _('create_backup aborted, expected backup status ' err = _('Create backup aborted, expected backup status '
'%(expected_status)s but got %(actual_status)s') % { '%(expected_status)s but got %(actual_status)s.') % {
'expected_status': expected_status, 'expected_status': expected_status,
'actual_status': actual_status, 'actual_status': actual_status,
} }
@ -186,7 +259,8 @@ class BackupManager(manager.SchedulerDependentManager):
try: try:
backup_service = self.service.get_backup_driver(context) backup_service = self.service.get_backup_driver(context)
self.driver.backup_volume(context, backup, backup_service) self._get_driver(backend).backup_volume(context, backup,
backup_service)
except Exception as err: except Exception as err:
with excutils.save_and_reraise_exception(): with excutils.save_and_reraise_exception():
self.db.volume_update(context, volume_id, self.db.volume_update(context, volume_id,
@ -200,24 +274,26 @@ class BackupManager(manager.SchedulerDependentManager):
'size': volume['size'], 'size': volume['size'],
'availability_zone': 'availability_zone':
self.az}) self.az})
LOG.info(_('create_backup finished. backup: %s'), backup_id) LOG.info(_('Create backup finished. backup: %s.'), backup_id)
@utils.require_driver_initialized @utils.require_driver_initialized
def restore_backup(self, context, backup_id, volume_id): def restore_backup(self, context, backup_id, volume_id):
"""Restore volume backups from configured backup service.""" """Restore volume backups from configured backup service."""
LOG.info(_('restore_backup started, restoring backup: %(backup_id)s' LOG.info(_('Restore backup started, backup: %(backup_id)s '
' to volume: %(volume_id)s') % 'volume: %(volume_id)s.') %
{'backup_id': backup_id, 'volume_id': volume_id}) {'backup_id': backup_id, 'volume_id': volume_id})
backup = self.db.backup_get(context, backup_id) backup = self.db.backup_get(context, backup_id)
volume = self.db.volume_get(context, volume_id) volume = self.db.volume_get(context, volume_id)
backend = self._get_volume_backend(host=volume['host'])
self.db.backup_update(context, backup_id, {'host': self.host}) self.db.backup_update(context, backup_id, {'host': self.host})
expected_status = 'restoring-backup' expected_status = 'restoring-backup'
actual_status = volume['status'] actual_status = volume['status']
if actual_status != expected_status: if actual_status != expected_status:
err = _('restore_backup aborted, expected volume status ' err = _('Restore backup aborted: expected volume status '
'%(expected_status)s but got %(actual_status)s') % { '%(expected_status)s but got %(actual_status)s.') % {
'expected_status': expected_status, 'expected_status': expected_status,
'actual_status': actual_status 'actual_status': actual_status
} }
@ -227,8 +303,8 @@ class BackupManager(manager.SchedulerDependentManager):
expected_status = 'restoring' expected_status = 'restoring'
actual_status = backup['status'] actual_status = backup['status']
if actual_status != expected_status: if actual_status != expected_status:
err = _('restore_backup aborted, expected backup status ' err = _('Restore backup aborted: expected backup status '
'%(expected_status)s but got %(actual_status)s') % { '%(expected_status)s but got %(actual_status)s.') % {
'expected_status': expected_status, 'expected_status': expected_status,
'actual_status': actual_status 'actual_status': actual_status
} }
@ -238,18 +314,18 @@ class BackupManager(manager.SchedulerDependentManager):
raise exception.InvalidBackup(reason=err) raise exception.InvalidBackup(reason=err)
if volume['size'] > backup['size']: if volume['size'] > backup['size']:
LOG.warn('volume: %s, size: %d is larger than backup: %s, ' LOG.warn('Volume: %s, size: %d is larger than backup: %s, '
'size: %d, continuing with restore', 'size: %d, continuing with restore.',
volume['id'], volume['size'], volume['id'], volume['size'],
backup['id'], backup['size']) backup['id'], backup['size'])
backup_service = self._map_service_to_driver(backup['service']) backup_service = self._map_service_to_driver(backup['service'])
configured_service = self.driver_name configured_service = self.driver_name
if backup_service != configured_service: if backup_service != configured_service:
err = _('restore_backup aborted, the backup service currently' err = _('Restore backup aborted, the backup service currently'
' configured [%(configured_service)s] is not the' ' configured [%(configured_service)s] is not the'
' backup service that was used to create this' ' backup service that was used to create this'
' backup [%(backup_service)s]') % { ' backup [%(backup_service)s].') % {
'configured_service': configured_service, 'configured_service': configured_service,
'backup_service': backup_service, 'backup_service': backup_service,
} }
@ -259,7 +335,8 @@ class BackupManager(manager.SchedulerDependentManager):
try: try:
backup_service = self.service.get_backup_driver(context) backup_service = self.service.get_backup_driver(context)
self.driver.restore_backup(context, backup, volume, self._get_driver(backend).restore_backup(context, backup,
volume,
backup_service) backup_service)
except Exception as err: except Exception as err:
with excutils.save_and_reraise_exception(): with excutils.save_and_reraise_exception():
@ -270,22 +347,22 @@ class BackupManager(manager.SchedulerDependentManager):
self.db.volume_update(context, volume_id, {'status': 'available'}) self.db.volume_update(context, volume_id, {'status': 'available'})
self.db.backup_update(context, backup_id, {'status': 'available'}) self.db.backup_update(context, backup_id, {'status': 'available'})
LOG.info(_('restore_backup finished, backup: %(backup_id)s restored' LOG.info(_('Restore backup finished, backup %(backup_id)s restored'
' to volume: %(volume_id)s') % ' to volume %(volume_id)s.') %
{'backup_id': backup_id, 'volume_id': volume_id}) {'backup_id': backup_id, 'volume_id': volume_id})
@utils.require_driver_initialized @utils.require_driver_initialized
def delete_backup(self, context, backup_id): def delete_backup(self, context, backup_id):
"""Delete volume backup from configured backup service.""" """Delete volume backup from configured backup service."""
LOG.info(_('delete_backup started, backup: %s'), backup_id) LOG.info(_('Delete backup started, backup: %s.'), backup_id)
backup = self.db.backup_get(context, backup_id) backup = self.db.backup_get(context, backup_id)
self.db.backup_update(context, backup_id, {'host': self.host}) self.db.backup_update(context, backup_id, {'host': self.host})
expected_status = 'deleting' expected_status = 'deleting'
actual_status = backup['status'] actual_status = backup['status']
if actual_status != expected_status: if actual_status != expected_status:
err = _('delete_backup aborted, expected backup status ' err = _('Delete_backup aborted, expected backup status '
'%(expected_status)s but got %(actual_status)s') % { '%(expected_status)s but got %(actual_status)s.') % {
'expected_status': expected_status, 'expected_status': expected_status,
'actual_status': actual_status, 'actual_status': actual_status,
} }
@ -297,10 +374,10 @@ class BackupManager(manager.SchedulerDependentManager):
if backup_service is not None: if backup_service is not None:
configured_service = self.driver_name configured_service = self.driver_name
if backup_service != configured_service: if backup_service != configured_service:
err = _('delete_backup aborted, the backup service currently' err = _('Delete backup aborted, the backup service currently'
' configured [%(configured_service)s] is not the' ' configured [%(configured_service)s] is not the'
' backup service that was used to create this' ' backup service that was used to create this'
' backup [%(backup_service)s]') % { ' backup [%(backup_service)s].') % {
'configured_service': configured_service, 'configured_service': configured_service,
'backup_service': backup_service, 'backup_service': backup_service,
} }
@ -320,4 +397,4 @@ class BackupManager(manager.SchedulerDependentManager):
context = context.elevated() context = context.elevated()
self.db.backup_destroy(context, backup_id) self.db.backup_destroy(context, backup_id)
LOG.info(_('delete_backup finished, backup %s deleted'), backup_id) LOG.info(_('Delete backup finished, backup %s deleted.'), backup_id)

View File

@ -546,6 +546,10 @@ class BackupNotFound(NotFound):
message = _("Backup %(backup_id)s could not be found.") message = _("Backup %(backup_id)s could not be found.")
class BackupFailedToGetVolumeBackend(NotFound):
message = _("Failed to identify volume backend.")
class InvalidBackup(Invalid): class InvalidBackup(Invalid):
message = _("Invalid backup: %(reason)s") message = _("Invalid backup: %(reason)s")

View File

@ -503,64 +503,74 @@ class BackupsAPITestCase(test.TestCase):
def empty_service(ctxt, topic): def empty_service(ctxt, topic):
return [] return []
test_host = 'test_host'
alt_host = 'strange_host'
#service host not match with volume's host #service host not match with volume's host
def host_not_match(context, topic): def host_not_match(context, topic):
return [{'availability_zone': "fake_az", 'host': 'strange_host', return [{'availability_zone': "fake_az", 'host': alt_host,
'disabled': 0, 'updated_at': timeutils.utcnow()}] 'disabled': 0, 'updated_at': timeutils.utcnow()}]
#service az not match with volume's az #service az not match with volume's az
def az_not_match(context, topic): def az_not_match(context, topic):
return [{'availability_zone': "strange_az", 'host': 'test_host', return [{'availability_zone': "strange_az", 'host': test_host,
'disabled': 0, 'updated_at': timeutils.utcnow()}] 'disabled': 0, 'updated_at': timeutils.utcnow()}]
#service disabled #service disabled
def disabled_service(context, topic): def disabled_service(context, topic):
return [{'availability_zone': "fake_az", 'host': 'test_host', return [{'availability_zone': "fake_az", 'host': test_host,
'disabled': 1, 'updated_at': timeutils.utcnow()}] 'disabled': 1, 'updated_at': timeutils.utcnow()}]
#dead service that last reported at 20th centry #dead service that last reported at 20th centry
def dead_service(context, topic): def dead_service(context, topic):
return [{'availability_zone': "fake_az", 'host': 'strange_host', return [{'availability_zone': "fake_az", 'host': alt_host,
'disabled': 0, 'updated_at': '1989-04-16 02:55:44'}] 'disabled': 0, 'updated_at': '1989-04-16 02:55:44'}]
#first service's host not match but second one works. #first service's host not match but second one works.
def multi_services(context, topic): def multi_services(context, topic):
return [{'availability_zone': "fake_az", 'host': 'strange_host', return [{'availability_zone': "fake_az", 'host': alt_host,
'disabled': 0, 'updated_at': timeutils.utcnow()}, 'disabled': 0, 'updated_at': timeutils.utcnow()},
{'availability_zone': "fake_az", 'host': 'test_host', {'availability_zone': "fake_az", 'host': test_host,
'disabled': 0, 'updated_at': timeutils.utcnow()}] 'disabled': 0, 'updated_at': timeutils.utcnow()}]
volume_id = utils.create_volume(self.context, size=2)['id'] volume_id = utils.create_volume(self.context, size=2,
host=test_host)['id']
volume = self.volume_api.get(context.get_admin_context(), volume_id) volume = self.volume_api.get(context.get_admin_context(), volume_id)
#test empty service #test empty service
self.stubs.Set(cinder.db, 'service_get_all_by_topic', empty_service) self.stubs.Set(cinder.db, 'service_get_all_by_topic', empty_service)
self.assertEqual(self.backup_api._is_backup_service_enabled(volume), self.assertEqual(self.backup_api._is_backup_service_enabled(volume,
test_host),
False) False)
#test host not match service #test host not match service
self.stubs.Set(cinder.db, 'service_get_all_by_topic', host_not_match) self.stubs.Set(cinder.db, 'service_get_all_by_topic', host_not_match)
self.assertEqual(self.backup_api._is_backup_service_enabled(volume), self.assertEqual(self.backup_api._is_backup_service_enabled(volume,
test_host),
False) False)
#test az not match service #test az not match service
self.stubs.Set(cinder.db, 'service_get_all_by_topic', az_not_match) self.stubs.Set(cinder.db, 'service_get_all_by_topic', az_not_match)
self.assertEqual(self.backup_api._is_backup_service_enabled(volume), self.assertEqual(self.backup_api._is_backup_service_enabled(volume,
test_host),
False) False)
#test disabled service #test disabled service
self.stubs.Set(cinder.db, 'service_get_all_by_topic', disabled_service) self.stubs.Set(cinder.db, 'service_get_all_by_topic', disabled_service)
self.assertEqual(self.backup_api._is_backup_service_enabled(volume), self.assertEqual(self.backup_api._is_backup_service_enabled(volume,
test_host),
False) False)
#test dead service #test dead service
self.stubs.Set(cinder.db, 'service_get_all_by_topic', dead_service) self.stubs.Set(cinder.db, 'service_get_all_by_topic', dead_service)
self.assertEqual(self.backup_api._is_backup_service_enabled(volume), self.assertEqual(self.backup_api._is_backup_service_enabled(volume,
test_host),
False) False)
#test multi services and the last service matches #test multi services and the last service matches
self.stubs.Set(cinder.db, 'service_get_all_by_topic', multi_services) self.stubs.Set(cinder.db, 'service_get_all_by_topic', multi_services)
self.assertEqual(self.backup_api._is_backup_service_enabled(volume), self.assertEqual(self.backup_api._is_backup_service_enabled(volume,
test_host),
True) True)
def test_delete_backup_available(self): def test_delete_backup_available(self):