diff --git a/cinder/backup/manager.py b/cinder/backup/manager.py index 75b8472efeb..84bc3953983 100644 --- a/cinder/backup/manager.py +++ b/cinder/backup/manager.py @@ -111,6 +111,7 @@ class BackupManager(manager.ThreadPoolManager): super(BackupManager, self).__init__(*args, **kwargs) self.is_initialized = False self._set_tpool_size(CONF.backup_native_threads_pool_size) + self._process_number = kwargs.get('process_number', 1) @property def driver_name(self): @@ -175,7 +176,17 @@ class BackupManager(manager.ThreadPoolManager): self.backup_rpcapi = backup_rpcapi.BackupAPI() self.volume_rpcapi = volume_rpcapi.VolumeAPI() + @utils.synchronized('backup-pgid-%s' % os.getpgrp(), + external=True, delay=0.1) def _cleanup_incomplete_backup_operations(self, ctxt): + # Only the first launched process should do the cleanup, the others + # have waited on the lock for the first one to finish the cleanup and + # can now continue with the start process. + if self._process_number != 1: + LOG.debug("Process #%s (pgid=%s) skips cleanup.", + self._process_number, os.getpgrp()) + return + LOG.info("Cleaning up incomplete backup operations.") # TODO(smulcahy) implement full resume of backup and restore diff --git a/cinder/cmd/backup.py b/cinder/cmd/backup.py index d6d4b8c3daf..ffe7cb9460a 100644 --- a/cinder/cmd/backup.py +++ b/cinder/cmd/backup.py @@ -27,6 +27,7 @@ import sys import eventlet eventlet.monkey_patch() +from oslo_concurrency import processutils from oslo_config import cfg from oslo_log import log as logging from oslo_privsep import priv_context @@ -39,6 +40,7 @@ i18n.enable_lazy() # Need to register global_opts from cinder.common import config # noqa +from cinder.db import api as session from cinder import objects from cinder import service from cinder import utils @@ -47,6 +49,15 @@ from cinder import version CONF = cfg.CONF +backup_workers_opt = cfg.IntOpt( + 'backup_workers', + default=1, min=1, max=processutils.get_worker_count(), + help='Number of backup processes to launch. Improves performance with ' + 'concurrent backups.') +CONF.register_opt(backup_workers_opt) + +LOG = None + # NOTE(mriedem): The default backup driver uses swift and performs read/write # operations in a thread. swiftclient will log requests and responses at DEBUG # level, which can cause a thread switch and break the backup operation. So we @@ -54,6 +65,22 @@ CONF = cfg.CONF _EXTRA_DEFAULT_LOG_LEVELS = ['swiftclient=WARN'] +def _launch_backup_process(launcher, num_process): + try: + server = service.Service.create(binary='cinder-backup', + coordination=True, + process_number=num_process + 1) + except Exception: + LOG.exception('Backup service %s failed to start.', CONF.host) + sys.exit(1) + else: + # Dispose of the whole DB connection pool here before + # starting another process. Otherwise we run into cases where + # child processes share DB connections which results in errors. + session.dispose_engine() + launcher.launch_service(server) + + def main(): objects.register_all() gmr_opts.set_defaults(CONF) @@ -67,7 +94,21 @@ def main(): priv_context.init(root_helper=shlex.split(utils.get_root_helper())) utils.monkey_patch() gmr.TextGuruMeditation.setup_autorun(version, conf=CONF) - server = service.Service.create(binary='cinder-backup', - coordination=True) - service.serve(server) - service.wait() + global LOG + LOG = logging.getLogger(__name__) + + if CONF.backup_workers > 1: + LOG.info('Backup running with %s processes.', CONF.backup_workers) + launcher = service.get_launcher() + + for i in range(CONF.backup_workers): + _launch_backup_process(launcher, i) + + launcher.wait() + else: + LOG.info('Backup running in single process mode.') + server = service.Service.create(binary='cinder-backup', + coordination=True, + process_number=1) + service.serve(server) + service.wait() diff --git a/cinder/opts.py b/cinder/opts.py index 8c1d5c8f5ed..e1bcd2da566 100644 --- a/cinder/opts.py +++ b/cinder/opts.py @@ -42,6 +42,7 @@ from cinder.backup.drivers import posix as cinder_backup_drivers_posix from cinder.backup.drivers import swift as cinder_backup_drivers_swift from cinder.backup.drivers import tsm as cinder_backup_drivers_tsm from cinder.backup import manager as cinder_backup_manager +from cinder.cmd import backup as cinder_cmd_backup from cinder.cmd import volume as cinder_cmd_volume from cinder.common import config as cinder_common_config import cinder.compute @@ -221,6 +222,7 @@ def list_opts(): cinder_backup_drivers_swift.swiftbackup_service_opts, cinder_backup_drivers_tsm.tsm_opts, cinder_backup_manager.backup_manager_opts, + [cinder_cmd_backup.backup_workers_opt], [cinder_cmd_volume.cluster_opt], cinder_common_config.core_opts, cinder_common_config.global_opts, diff --git a/cinder/service.py b/cinder/service.py index 903688edfd2..f8658f9afc5 100644 --- a/cinder/service.py +++ b/cinder/service.py @@ -366,7 +366,7 @@ class Service(service.Service): def create(cls, host=None, binary=None, topic=None, manager=None, report_interval=None, periodic_interval=None, periodic_fuzzy_delay=None, service_name=None, - coordination=False, cluster=None): + coordination=False, cluster=None, **kwargs): """Instantiates class and passes back application object. :param host: defaults to CONF.host @@ -400,7 +400,7 @@ class Service(service.Service): periodic_fuzzy_delay=periodic_fuzzy_delay, service_name=service_name, coordination=coordination, - cluster=cluster) + cluster=cluster, **kwargs) return service_obj diff --git a/cinder/tests/unit/backup/test_backup.py b/cinder/tests/unit/backup/test_backup.py index 94e3e5ed8e2..ede7cd5e8ad 100644 --- a/cinder/tests/unit/backup/test_backup.py +++ b/cinder/tests/unit/backup/test_backup.py @@ -379,6 +379,21 @@ class BackupTestCase(BaseBackupTest): self.assertEqual(len(fake_backup_list), mock_backup_cleanup.call_count) self.assertEqual(len(fake_backup_list), mock_temp_cleanup.call_count) + @mock.patch('cinder.objects.BackupList') + @mock.patch.object(manager.BackupManager, '_cleanup_one_backup') + @mock.patch.object(manager.BackupManager, + '_cleanup_temp_volumes_snapshots_for_one_backup') + def test_cleanup_non_primary_process(self, temp_cleanup_mock, + backup_cleanup_mock, backup_ovo_mock): + """Test cleanup doesn't run on non primary processes.""" + self.backup_mgr._process_number = 2 + + self.backup_mgr._cleanup_incomplete_backup_operations(self.ctxt) + + backup_ovo_mock.get_all_by_host.assert_not_called() + backup_cleanup_mock.assert_not_called() + temp_cleanup_mock.assert_not_called() + def test_cleanup_one_backing_up_volume(self): """Test cleanup_one_volume for volume status 'backing-up'.""" diff --git a/cinder/tests/unit/test_cmd.py b/cinder/tests/unit/test_cmd.py index 20f7d3b7044..2b612ff0d52 100644 --- a/cinder/tests/unit/test_cmd.py +++ b/cinder/tests/unit/test_cmd.py @@ -93,13 +93,14 @@ class TestCinderBackupCmd(test.TestCase): super(TestCinderBackupCmd, self).setUp() sys.argv = ['cinder-backup'] + @mock.patch('cinder.cmd.backup._launch_backup_process') @mock.patch('cinder.service.wait') @mock.patch('cinder.service.serve') @mock.patch('cinder.service.Service.create') @mock.patch('cinder.utils.monkey_patch') @mock.patch('oslo_log.log.setup') def test_main(self, log_setup, monkey_patch, service_create, service_serve, - service_wait): + service_wait, launch_mock): server = service_create.return_value cinder_backup.main() @@ -109,9 +110,35 @@ class TestCinderBackupCmd(test.TestCase): log_setup.assert_called_once_with(CONF, "cinder") monkey_patch.assert_called_once_with() service_create.assert_called_once_with(binary='cinder-backup', - coordination=True) + coordination=True, + process_number=1) service_serve.assert_called_once_with(server) service_wait.assert_called_once_with() + launch_mock.assert_not_called() + + @mock.patch('cinder.service.get_launcher') + @mock.patch('cinder.service.Service.create') + @mock.patch('cinder.utils.monkey_patch') + @mock.patch('oslo_log.log.setup') + def test_main_multiprocess(self, log_setup, monkey_patch, service_create, + get_launcher): + CONF.set_override('backup_workers', 2) + cinder_backup.main() + + self.assertEqual('cinder', CONF.project) + self.assertEqual(CONF.version, version.version_string()) + + c1 = mock.call(binary=constants.BACKUP_BINARY, + coordination=True, + process_number=1) + c2 = mock.call(binary=constants.BACKUP_BINARY, + coordination=True, + process_number=2) + service_create.assert_has_calls([c1, c2]) + + launcher = get_launcher.return_value + self.assertEqual(2, launcher.launch_service.call_count) + launcher.wait.assert_called_once_with() class TestCinderSchedulerCmd(test.TestCase): diff --git a/releasenotes/notes/feature-multi-process-backup-8cf5ad5a0cf9b2d5.yaml b/releasenotes/notes/feature-multi-process-backup-8cf5ad5a0cf9b2d5.yaml new file mode 100644 index 00000000000..c6231defdc5 --- /dev/null +++ b/releasenotes/notes/feature-multi-process-backup-8cf5ad5a0cf9b2d5.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + Cinder backup now supports running multiple processes to make the most of + the available CPU cores. Performance gains will be significant when + running multiple concurrent backups/restores with compression. The number + of processes is set with `backup_workers` configuration option.