From e570436d1cca5cfa89388aec8b2daa63d01d0250 Mon Sep 17 00:00:00 2001 From: Gorka Eguileor Date: Thu, 8 Mar 2018 15:40:49 +0100 Subject: [PATCH] Allow configuring tpool size The RBD driver and almost all backup drivers rely heavily on eventlet's tpool, which has a default of 20 threads, which will be too low. Currently the only way to change this is using the environmental variable EVENTLET_THREADPOOL_SIZE, which isn't very clean for the openstack services. This patch adds the possibility of setting specific values for each backend and for the backup service, and increases the default for the backup service from 20 threads to 60. The backup service can be configured under [DEFAULT] section with option backup_native_threads_pool_size, and the backends under their specific sections with backend_native_threads_pool_size. Change-Id: I5d7c1e8d7f1c6592ded1f74eea42d76ab523df92 Closes-Bug: #1754354 --- cinder/backup/manager.py | 7 ++++++ cinder/manager.py | 6 +++++ cinder/test.py | 6 +++++ cinder/tests/unit/backup/test_backup.py | 22 ++++++++++++++++++ cinder/tests/unit/volume/test_volume.py | 23 +++++++++++++++++++ cinder/volume/manager.py | 8 +++++++ .../notes/tpool-size-11121f78df24db39.yaml | 15 ++++++++++++ 7 files changed, 87 insertions(+) create mode 100644 releasenotes/notes/tpool-size-11121f78df24db39.yaml diff --git a/cinder/backup/manager.py b/cinder/backup/manager.py index b7acf3960fe..f6791ef7d30 100644 --- a/cinder/backup/manager.py +++ b/cinder/backup/manager.py @@ -71,6 +71,12 @@ backup_manager_opts = [ 'backup service startup. If false, the backup service ' 'will remain down until all pending backups are ' 'deleted.',), + cfg.IntOpt('backup_native_threads_pool_size', + default=60, + min=20, + help='Size of the native threads pool for the backups. ' + 'Most backup drivers rely heavily on this, it can be ' + 'decreased for specific drivers that don\'t.'), ] # This map doesn't need to be extended in the future since it's only @@ -104,6 +110,7 @@ class BackupManager(manager.ThreadPoolManager): self.volume_rpcapi = volume_rpcapi.VolumeAPI() super(BackupManager, self).__init__(*args, **kwargs) self.is_initialized = False + self._set_tpool_size(CONF.backup_native_threads_pool_size) @property def driver_name(self): diff --git a/cinder/manager.py b/cinder/manager.py index ec673e77922..e478c0b438b 100644 --- a/cinder/manager.py +++ b/cinder/manager.py @@ -68,6 +68,7 @@ from cinder.scheduler import rpcapi as scheduler_rpcapi from cinder import utils from eventlet import greenpool +from eventlet import tpool CONF = cfg.CONF @@ -94,6 +95,11 @@ class Manager(base.Base, PeriodicTasks): self.availability_zone = CONF.storage_availability_zone super(Manager, self).__init__(db_driver) + def _set_tpool_size(self, nthreads): + # NOTE(geguileo): Until PR #472 is merged we have to be very careful + # not to call "tpool.execute" before calling this method. + tpool.set_num_threads(nthreads) + @property def service_topic_queue(self): return self.cluster or self.host diff --git a/cinder/test.py b/cinder/test.py index 18f82ef7c0a..c8c9e6ccd3b 100644 --- a/cinder/test.py +++ b/cinder/test.py @@ -26,6 +26,7 @@ import os import sys import uuid +from eventlet import tpool import fixtures import mock from oslo_concurrency import lockutils @@ -296,6 +297,11 @@ class TestCase(testtools.TestCase): # added. self.assertRaisesRegexp = self.assertRaisesRegex + # Ensure we have the default tpool size value and we don't carry + # threads from other test runs. + tpool.killall() + tpool._nthreads = 20 + def _restore_obj_registry(self): objects_base.CinderObjectRegistry._registry._obj_classes = \ self._base_test_obj_backup diff --git a/cinder/tests/unit/backup/test_backup.py b/cinder/tests/unit/backup/test_backup.py index 24582163666..38ea0482469 100644 --- a/cinder/tests/unit/backup/test_backup.py +++ b/cinder/tests/unit/backup/test_backup.py @@ -19,6 +19,7 @@ import ddt import os import uuid +from eventlet import tpool import mock from os_brick.initiator.connectors import fake as fake_connectors from oslo_config import cfg @@ -1641,6 +1642,27 @@ class BackupTestCase(BaseBackupTest): backup = self._create_backup_db_entry(volume_id=vol_id) self.assertFalse(backup.has_dependent_backups) + def test_default_tpool_size(self): + """Test we can set custom tpool size.""" + tpool._nthreads = 20 + self.assertListEqual([], tpool._threads) + + self.backup_mgr = importutils.import_object(CONF.backup_manager) + + self.assertEqual(60, tpool._nthreads) + self.assertListEqual([], tpool._threads) + + def test_tpool_size(self): + """Test we can set custom tpool size.""" + self.assertNotEqual(100, tpool._nthreads) + self.assertListEqual([], tpool._threads) + + self.override_config('backup_native_threads_pool_size', 100) + self.backup_mgr = importutils.import_object(CONF.backup_manager) + + self.assertEqual(100, tpool._nthreads) + self.assertListEqual([], tpool._threads) + class BackupTestCaseWithVerify(BaseBackupTest): """Test Case for backups.""" diff --git a/cinder/tests/unit/volume/test_volume.py b/cinder/tests/unit/volume/test_volume.py index 0c02b5ab196..855981cfcb9 100644 --- a/cinder/tests/unit/volume/test_volume.py +++ b/cinder/tests/unit/volume/test_volume.py @@ -2987,6 +2987,29 @@ class VolumeTestCase(base.BaseVolumeTestCase): self.assertRaises(exception.ProgrammingError, manager._append_volume_stats, {'pools': 'bad_data'}) + def test_default_tpool_size(self): + """Test we can set custom tpool size.""" + eventlet.tpool._nthreads = 10 + self.assertListEqual([], eventlet.tpool._threads) + + vol_manager.VolumeManager() + + self.assertEqual(20, eventlet.tpool._nthreads) + self.assertListEqual([], eventlet.tpool._threads) + + def test_tpool_size(self): + """Test we can set custom tpool size.""" + self.assertNotEqual(100, eventlet.tpool._nthreads) + self.assertListEqual([], eventlet.tpool._threads) + + self.override_config('backend_native_threads_pool_size', 100, + group='backend_defaults') + vol_manager.VolumeManager() + + self.assertEqual(100, eventlet.tpool._nthreads) + self.assertListEqual([], eventlet.tpool._threads) + eventlet.tpool._nthreads = 20 + class VolumeTestCaseLocks(base.BaseVolumeTestCase): MOCK_TOOZ = False diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 4635f1a5d0e..da95bf5243a 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -143,6 +143,12 @@ volume_backend_opts = [ cfg.BoolOpt('suppress_requests_ssl_warnings', default=False, help='Suppress requests library SSL certificate warnings.'), + cfg.IntOpt('backend_native_threads_pool_size', + default=20, + min=20, + help='Size of the native threads pool for the backend. ' + 'Increase for backends that heavily rely on this, like ' + 'the RBD driver.'), ] CONF = cfg.CONF @@ -190,6 +196,8 @@ class VolumeManager(manager.CleanableManager, service_name = service_name or 'backend_defaults' self.configuration = config.Configuration(volume_backend_opts, config_group=service_name) + self._set_tpool_size( + self.configuration.backend_native_threads_pool_size) self.stats = {} self.service_uuid = None diff --git a/releasenotes/notes/tpool-size-11121f78df24db39.yaml b/releasenotes/notes/tpool-size-11121f78df24db39.yaml new file mode 100644 index 00000000000..de06f2400f4 --- /dev/null +++ b/releasenotes/notes/tpool-size-11121f78df24db39.yaml @@ -0,0 +1,15 @@ +--- +features: + - Adds support to configure the size of the native thread pool used by the + cinder volume and backup services. For the backup we use + `backup_native_threads_pool_size` in the `[DEFAULT]` section, and for the + backends we use `backend_native_threads_pool_size` in the driver section. + +fixes: + - Fixes concurrency issue on backups, where only 20 native threads could be + concurrently be executed. Now default will be 60, and can be changed with + `backup_native_threads_pool_size`. + - RBD driver can have bottlenecks if too many slow operations are happening + at the same time (for example many huge volume deletions), we can now use + the `backend_native_threads_pool_size` option in the RBD driver section to + resolve the issue.