Merge "Cinder replication V2"
This commit is contained in:
commit
30cd678b57
@ -255,6 +255,85 @@ class VolumeAdminController(AdminController):
|
||||
new_volume, error)
|
||||
return {'save_volume_id': ret}
|
||||
|
||||
@wsgi.action('os-enable_replication')
|
||||
def _enable_replication(self, req, id, body):
|
||||
"""Enable/Re-enable replication on replciation capable volume.
|
||||
|
||||
Admin only method, used primarily for cases like disable/re-enable
|
||||
replication proces on a replicated volume for maintenance or testing
|
||||
"""
|
||||
|
||||
context = req.environ['cinder.context']
|
||||
self.authorize(context, 'enable_replication')
|
||||
try:
|
||||
volume = self._get(context, id)
|
||||
except exception.VolumeNotFound as e:
|
||||
raise exc.HTTPNotFound(explanation=e.msg)
|
||||
self.volume_api.enable_replication(context, volume)
|
||||
return webob.Response(status_int=202)
|
||||
|
||||
@wsgi.action('os-disable_replication')
|
||||
def _disable_replication(self, req, id, body):
|
||||
"""Disable replication on replciation capable volume.
|
||||
|
||||
Admin only method, used to instruct a backend to
|
||||
disable replication process to a replicated volume.
|
||||
"""
|
||||
|
||||
context = req.environ['cinder.context']
|
||||
self.authorize(context, 'disable_replication')
|
||||
try:
|
||||
volume = self._get(context, id)
|
||||
except exception.VolumeNotFound as e:
|
||||
raise exc.HTTPNotFound(explanation=e.msg)
|
||||
self.volume_api.disable_replication(context, volume)
|
||||
return webob.Response(status_int=202)
|
||||
|
||||
@wsgi.action('os-failover_replication')
|
||||
def _failover_replication(self, req, id, body):
|
||||
"""Failover a replicating volume to it's secondary
|
||||
|
||||
Admin only method, used to force a fail-over to
|
||||
a replication target. Optional secondary param to
|
||||
indicate what device to promote in case of multiple
|
||||
replication targets.
|
||||
"""
|
||||
|
||||
context = req.environ['cinder.context']
|
||||
self.authorize(context, 'failover_replication')
|
||||
try:
|
||||
volume = self._get(context, id)
|
||||
except exception.VolumeNotFound as e:
|
||||
raise exc.HTTPNotFound(explanation=e.msg)
|
||||
secondary = body['os-failover_replication'].get('secondary', None)
|
||||
self.volume_api.failover_replication(context, volume, secondary)
|
||||
return webob.Response(status_int=202)
|
||||
|
||||
@wsgi.action('os-list_replication_targets')
|
||||
def _list_replication_targets(self, req, id, body):
|
||||
"""Show replication targets for the specified host.
|
||||
|
||||
Admin only method, used to display configured
|
||||
replication target devices for the specified volume.
|
||||
|
||||
"""
|
||||
|
||||
# TODO(jdg): We'll want an equivalent type of command
|
||||
# to querie a backend host (show configuration for a
|
||||
# specified backend), but priority here is for
|
||||
# a volume as it's likely to be more useful.
|
||||
context = req.environ['cinder.context']
|
||||
self.authorize(context, 'list_replication_targets')
|
||||
try:
|
||||
volume = self._get(context, id)
|
||||
except exception.VolumeNotFound as e:
|
||||
raise exc.HTTPNotFound(explanation=e.msg)
|
||||
|
||||
# Expected response is a dict is a dict with unkonwn
|
||||
# keys. Should be of the form:
|
||||
# {'volume_id': xx, 'replication_targets':[{k: v, k1: v1...}]}
|
||||
return self.volume_api.list_replication_targets(context, volume)
|
||||
|
||||
|
||||
class SnapshotAdminController(AdminController):
|
||||
"""AdminController for Snapshots."""
|
||||
|
@ -34,7 +34,10 @@
|
||||
"volume:update_readonly_flag": "",
|
||||
"volume:retype": "",
|
||||
"volume:copy_volume_to_image": "",
|
||||
|
||||
"volume:enable_replication": "rule:admin_api",
|
||||
"volume:disable_replication": "rule:admin_api",
|
||||
"volume:failover_replication": "rule:admin_api",
|
||||
"volume:list_replication_targets": "rule:admin_api",
|
||||
"volume_extension:volume_admin_actions:reset_status": "rule:admin_api",
|
||||
"volume_extension:snapshot_admin_actions:reset_status": "rule:admin_api",
|
||||
"volume_extension:backup_admin_actions:reset_status": "rule:admin_api",
|
||||
|
@ -5849,6 +5849,61 @@ class GenericVolumeDriverTestCase(DriverTestCase):
|
||||
volume_file)
|
||||
self.assertEqual(i, backup_service.restore.call_count)
|
||||
|
||||
def test_enable_replication_invalid_state(self):
|
||||
volume_api = cinder.volume.api.API()
|
||||
ctxt = context.get_admin_context()
|
||||
volume = tests_utils.create_volume(ctxt,
|
||||
size=1,
|
||||
host=CONF.host,
|
||||
replication_status='enabled')
|
||||
|
||||
self.assertRaises(exception.InvalidVolume,
|
||||
volume_api.enable_replication,
|
||||
ctxt, volume)
|
||||
|
||||
def test_enable_replication(self):
|
||||
volume_api = cinder.volume.api.API()
|
||||
ctxt = context.get_admin_context()
|
||||
|
||||
volume = tests_utils.create_volume(self.context,
|
||||
size=1,
|
||||
host=CONF.host,
|
||||
replication_status='disabled')
|
||||
with mock.patch.object(volume_rpcapi.VolumeAPI,
|
||||
'enable_replication') as mock_enable_rep:
|
||||
volume_api.enable_replication(ctxt, volume)
|
||||
self.assertTrue(mock_enable_rep.called)
|
||||
|
||||
def test_disable_replication_invalid_state(self):
|
||||
volume_api = cinder.volume.api.API()
|
||||
ctxt = context.get_admin_context()
|
||||
volume = tests_utils.create_volume(ctxt,
|
||||
size=1,
|
||||
host=CONF.host,
|
||||
replication_status='invalid-state')
|
||||
|
||||
self.assertRaises(exception.InvalidVolume,
|
||||
volume_api.disable_replication,
|
||||
ctxt, volume)
|
||||
|
||||
def test_disable_replication(self):
|
||||
volume_api = cinder.volume.api.API()
|
||||
ctxt = context.get_admin_context()
|
||||
|
||||
volume = tests_utils.create_volume(self.context,
|
||||
size=1,
|
||||
host=CONF.host,
|
||||
replication_status='disabled')
|
||||
|
||||
with mock.patch.object(volume_rpcapi.VolumeAPI,
|
||||
'disable_replication') as mock_disable_rep:
|
||||
volume_api.disable_replication(ctxt, volume)
|
||||
self.assertTrue(mock_disable_rep.called)
|
||||
|
||||
volume['replication_status'] = 'enabled'
|
||||
volume_api.disable_replication(ctxt, volume)
|
||||
self.assertTrue(mock_disable_rep.called)
|
||||
|
||||
|
||||
class LVMISCSIVolumeDriverTestCase(DriverTestCase):
|
||||
"""Test case for VolumeDriver"""
|
||||
|
@ -794,3 +794,11 @@ class VolumeUtilsTestCase(test.TestCase):
|
||||
mock_db, 'volume-d8cd1fe')
|
||||
|
||||
self.assertFalse(result)
|
||||
|
||||
def test_convert_config_string_to_dict(self):
|
||||
test_string = "{'key-1'='val-1' 'key-2'='val-2' 'key-3'='val-3'}"
|
||||
expected_dict = {'key-1': 'val-1', 'key-2': 'val-2', 'key-3': 'val-3'}
|
||||
|
||||
self.assertEqual(
|
||||
expected_dict,
|
||||
volume_utils.convert_config_string_to_dict(test_string))
|
||||
|
@ -1505,6 +1505,121 @@ class API(base.Base):
|
||||
resource=vol_ref)
|
||||
return vol_ref
|
||||
|
||||
# Replication V2 methods ##
|
||||
|
||||
# NOTE(jdg): It might be kinda silly to propogate the named
|
||||
# args with defaults all the way down through rpc into manager
|
||||
# but for now the consistency is useful, and there may be
|
||||
# some usefulness in the future (direct calls in manager?)
|
||||
|
||||
# NOTE(jdg): Relying solely on the volume-type quota mechanism
|
||||
# need to consider looking at how we handle configured backends
|
||||
# WRT quotas, do they count against normal quotas or not? For
|
||||
# now they're a special resource, so no.
|
||||
|
||||
@wrap_check_policy
|
||||
def enable_replication(self, ctxt, volume):
|
||||
|
||||
# NOTE(jdg): details like sync vs async
|
||||
# and replica count are to be set via the
|
||||
# volume-type and config files.
|
||||
|
||||
# Get a fresh ref from db and check status
|
||||
volume = self.db.volume_get(ctxt, volume['id'])
|
||||
|
||||
# NOTE(jdg): Set a valid status as a var to minimize errors via typos
|
||||
# also, use a list, we may want to add to it some day
|
||||
|
||||
# TODO(jdg): Move these up to a global list for each call and ban the
|
||||
# free form typing of states and state checks going forward
|
||||
|
||||
# NOTE(jdg): There may be a need for some backends to allow this
|
||||
# call to driver regardless of replication_status, most likely
|
||||
# this indicates an issue with the driver, but might be useful
|
||||
# cases to consider modifying this for in the future.
|
||||
valid_rep_status = ['disabled']
|
||||
rep_status = volume.get('replication_status', valid_rep_status[0])
|
||||
|
||||
if rep_status not in valid_rep_status:
|
||||
msg = (_("Invalid status to enable replication. "
|
||||
"valid states are: %(valid_states)s, "
|
||||
"current replication-state is: %(curr_state)s."),
|
||||
{'valid_states': valid_rep_status,
|
||||
'curr_state': rep_status})
|
||||
|
||||
raise exception.InvalidVolume(reason=msg)
|
||||
|
||||
vref = self.db.volume_update(ctxt,
|
||||
volume['id'],
|
||||
{'replication_status': 'enabling'})
|
||||
self.volume_rpcapi.enable_replication(ctxt, vref)
|
||||
|
||||
@wrap_check_policy
|
||||
def disable_replication(self, ctxt, volume):
|
||||
|
||||
valid_disable_status = ['disabled', 'enabled']
|
||||
|
||||
# NOTE(jdg): Just use disabled here (item 1 in the list) this
|
||||
# way if someone says disable_rep on a volume that's not being
|
||||
# replicated we just say "ok, done"
|
||||
rep_status = volume.get('replication_status', valid_disable_status[0])
|
||||
|
||||
if rep_status not in valid_disable_status:
|
||||
msg = (_("Invalid status to disable replication. "
|
||||
"valid states are: %(valid_states)s, "
|
||||
"current replication-state is: %(curr_state)s."),
|
||||
{'valid_states': valid_disable_status,
|
||||
'curr_state': rep_status})
|
||||
|
||||
raise exception.InvalidVolume(reason=msg)
|
||||
|
||||
vref = self.db.volume_update(ctxt,
|
||||
volume['id'],
|
||||
{'replication_status': 'disabling'})
|
||||
|
||||
self.volume_rpcapi.disable_replication(ctxt, vref)
|
||||
|
||||
@wrap_check_policy
|
||||
def failover_replication(self,
|
||||
ctxt,
|
||||
volume,
|
||||
secondary=None):
|
||||
|
||||
# FIXME(jdg): What is the secondary argument?
|
||||
# for managed secondaries that's easy; it's a host
|
||||
# for others, it's tricky; will propose a format for
|
||||
# secondaries that includes an ID/Name that can be
|
||||
# used as a handle
|
||||
valid_failover_status = ['enabled']
|
||||
rep_status = volume.get('replication_status', 'na')
|
||||
|
||||
if rep_status not in valid_failover_status:
|
||||
msg = (_("Invalid status to failover replication. "
|
||||
"valid states are: %(valid_states)s, "
|
||||
"current replication-state is: %(curr_state)s."),
|
||||
{'valid_states': valid_failover_status,
|
||||
'curr_state': rep_status})
|
||||
|
||||
raise exception.InvalidVolume(reason=msg)
|
||||
|
||||
vref = self.db.volume_update(
|
||||
ctxt,
|
||||
volume['id'],
|
||||
{'replication_status': 'enabling_secondary'})
|
||||
|
||||
self.volume_rpcapi.failover_replication(ctxt,
|
||||
vref,
|
||||
secondary)
|
||||
|
||||
@wrap_check_policy
|
||||
def list_replication_targets(self, ctxt, volume):
|
||||
|
||||
# NOTE(jdg): This collects info for the specified volume
|
||||
# it is NOT an error if the volume is not being replicated
|
||||
# also, would be worth having something at a backend/host
|
||||
# level to show an admin how a backend is configured.
|
||||
return self.volume_rpcapi.list_replication_targets(ctxt, volume)
|
||||
|
||||
|
||||
class HostAPI(base.Base):
|
||||
def __init__(self):
|
||||
|
@ -221,6 +221,20 @@ volume_opts = [
|
||||
help='List of options that control which trace info '
|
||||
'is written to the DEBUG log level to assist '
|
||||
'developers. Valid values are method and api.'),
|
||||
cfg.BoolOpt('managed_replication_target',
|
||||
default=True,
|
||||
help='There are two types of target configurations '
|
||||
'managed (replicate to another configured backend) '
|
||||
'or unmanaged (replicate to a device not managed '
|
||||
'by Cinder).'),
|
||||
cfg.ListOpt('replication_devices',
|
||||
default=None,
|
||||
help="List of k/v pairs representing a replication target "
|
||||
"for this backend device. For unmanaged the format "
|
||||
"is: {'key-1'='val1' 'key-2'='val2'...},{...} "
|
||||
"and for managed devices its simply a list of valid "
|
||||
"configured backend_names that the driver supports "
|
||||
"replicating to: backend-a,bakcend-b...")
|
||||
]
|
||||
|
||||
# for backward compatibility
|
||||
@ -291,6 +305,7 @@ class BaseVD(object):
|
||||
self.configuration.append_config_values(volume_opts)
|
||||
self.configuration.append_config_values(iser_opts)
|
||||
utils.setup_tracing(self.configuration.safe_get('trace_flags'))
|
||||
|
||||
self.set_execute(execute)
|
||||
self._stats = {}
|
||||
|
||||
@ -1384,6 +1399,187 @@ class ManageableVD(object):
|
||||
pass
|
||||
|
||||
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
class ReplicaV2VD(object):
|
||||
"""Cinder replication functionality.
|
||||
|
||||
The Cinder replication functionality is set up primarily through
|
||||
the use of volume-types in conjunction with the filter scheduler.
|
||||
This requires:
|
||||
1. The driver reports "replication = True" in it's capabilities
|
||||
2. The cinder.conf file includes the valid_replication_devices section
|
||||
|
||||
The driver configuration is expected to take one of the following two
|
||||
forms, see devref replication docs for details.
|
||||
|
||||
Note we provide cinder.volume.utils.convert_config_string_to_dict
|
||||
to parse this out into a usable proper dictionary.
|
||||
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def replication_enable(self, context, volume):
|
||||
"""Enable replication on a replication capable volume.
|
||||
|
||||
If the volume was created on a replication_enabled host this method
|
||||
is used to re-enable replication for the volume.
|
||||
|
||||
Primarily we only want this for testing/admin purposes. The idea
|
||||
being that the bulk of the replication details are handled by the
|
||||
type definition and the driver; however disable/enable(re-enable) is
|
||||
provided for admins to test or do maintenance which is a
|
||||
requirement by some cloud-providers.
|
||||
|
||||
NOTE: This is intended as an ADMIN only call and is not
|
||||
intended to be used by end-user to enable replication. We're
|
||||
leaving that to volume-type info, this is for things like
|
||||
maintenance or testing.
|
||||
|
||||
|
||||
:param context: security context
|
||||
:param volume: volume object returned by DB
|
||||
:response: {replication_driver_data: vendor-data} DB update
|
||||
|
||||
The replication_driver_data response is vendor unique,
|
||||
data returned/used by the driver. It is expected that
|
||||
the reponse from the driver is in the appropriate db update
|
||||
format, in the form of a dict, where the vendor data is
|
||||
stored under the key 'replication_driver_data'
|
||||
|
||||
"""
|
||||
|
||||
# TODO(jdg): Put a check in at API layer to verify the host is
|
||||
# replication capable before even issuing this call (can just
|
||||
# check against the volume-type for said volume as well)
|
||||
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def replication_disable(self, context, volume):
|
||||
"""Disable replication on the specified volume.
|
||||
|
||||
If the specified volume is currently replication enabled,
|
||||
this method can be used to disable the replciation process
|
||||
on the backend.
|
||||
|
||||
Note that we still send this call to a driver whos volume
|
||||
may report replication-disabled already. We do this as a
|
||||
safety mechanism to allow a driver to cleanup any mismatch
|
||||
in state between Cinder and itself.
|
||||
|
||||
This is intended as an ADMIN only call to allow for
|
||||
maintenance and testing. If a driver receives this call
|
||||
and the process fails for some reason the driver should
|
||||
return a status update to "replication_status=disable_failed"
|
||||
|
||||
:param context: security context
|
||||
:param volume: volume object returned by DB
|
||||
:response: {replication_driver_data: vendor-data} DB update
|
||||
|
||||
The replication_driver_data response is vendor unique,
|
||||
data returned/used by the driver. It is expected that
|
||||
the reponse from the driver is in the appropriate db update
|
||||
format, in the form of a dict, where the vendor data is
|
||||
stored under the key 'replication_driver_data'
|
||||
|
||||
"""
|
||||
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def replication_failover(self, context, volume, secondary):
|
||||
"""Force failover to a secondary replication target.
|
||||
|
||||
Forces the failover action of a replicated volume to one of its
|
||||
secondary/target devices. By default the choice of target devices
|
||||
is left up to the driver. In particular we expect one way
|
||||
replication here, but are providing a mechanism for 'n' way
|
||||
if supported/configured.
|
||||
|
||||
Currently we leave it up to the driver to figure out how/what
|
||||
to do here. Rather than doing things like ID swaps, we instead
|
||||
just let the driver figure out how/where to route things.
|
||||
|
||||
In cases where we might want to drop a volume-service node and
|
||||
the replication target is a configured cinder backend, we'll
|
||||
just update the host column for the volume.
|
||||
|
||||
Very important point here is that in the case of a succesful
|
||||
failover, we want to update the replication_status of the
|
||||
volume to "failed-over". This way there's an indication that
|
||||
things worked as expected, and that it's evident that the volume
|
||||
may no longer be replicating to another backend (primary burst
|
||||
in to flames). This status will be set by the manager.
|
||||
|
||||
:param context: security context
|
||||
:param volume: volume object returned by DB
|
||||
:param secondary: Specifies rep target to fail over to
|
||||
:response: dict of udpates
|
||||
|
||||
So the response would take the form:
|
||||
{host: <properly formatted host string for db update>,
|
||||
model_update: {standard_model_update_KVs},
|
||||
replication_driver_data: xxxxxxx}
|
||||
|
||||
It is expected that the format of these responses are in a consumable
|
||||
format to be used in a db.update call directly.
|
||||
|
||||
Additionally we utilize exception catching to report back to the
|
||||
manager when things went wrong and to inform the caller on how
|
||||
to proceed.
|
||||
|
||||
"""
|
||||
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def list_replication_targets(self, context, vref):
|
||||
"""Provide a means to obtain replication targets for a volume.
|
||||
|
||||
This method is used to query a backend to get the current
|
||||
replication config info for the specified volume.
|
||||
|
||||
In the case of a volume that isn't being replicated,
|
||||
the driver should return an empty list.
|
||||
|
||||
|
||||
Example response for replicating to a managed backend:
|
||||
{'volume_id': volume['id'],
|
||||
'targets':[{'type': 'managed',
|
||||
'backend_name': 'backend_name'}...]
|
||||
|
||||
Example response for replicating to an unmanaged backend:
|
||||
{'volume_id': volume['id'],
|
||||
'targets':[{'type': 'managed',
|
||||
'vendor-key-1': 'value-1'}...]
|
||||
|
||||
NOTE: It's the responsibility of the driver to mask out any
|
||||
passwords or sensitive information. Also the format of the
|
||||
response allows mixed (managed/unmanaged) targets, even though
|
||||
the first iteration does not support configuring the driver in
|
||||
such a manner.
|
||||
|
||||
"""
|
||||
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_replication_updates(self, context):
|
||||
"""Provide a means to obtain status updates from backend.
|
||||
|
||||
Provides a concise update for backends to report any errors
|
||||
or problems with replicating volumes. The intent is we only
|
||||
return something here if there's an error or a problem, and to
|
||||
notify where the backend thinks the volume is.
|
||||
|
||||
param: context: context of caller (probably don't need)
|
||||
returns: [{volid: n, status: ok|error,...}]
|
||||
"""
|
||||
# NOTE(jdg): flush this out with implementations so we all
|
||||
# have something usable here
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
class ReplicaVD(object):
|
||||
@abc.abstractmethod
|
||||
@ -1928,6 +2124,7 @@ class ISCSIDriver(VolumeDriver):
|
||||
data["driver_version"] = '1.0'
|
||||
data["storage_protocol"] = 'iSCSI'
|
||||
data["pools"] = []
|
||||
data["replication_enabled"] = False
|
||||
|
||||
self._update_pools_and_stats(data)
|
||||
|
||||
|
@ -189,7 +189,7 @@ def locked_snapshot_operation(f):
|
||||
class VolumeManager(manager.SchedulerDependentManager):
|
||||
"""Manages attachable block storage devices."""
|
||||
|
||||
RPC_API_VERSION = '1.26'
|
||||
RPC_API_VERSION = '1.27'
|
||||
|
||||
target = messaging.Target(version=RPC_API_VERSION)
|
||||
|
||||
@ -405,6 +405,10 @@ class VolumeManager(manager.SchedulerDependentManager):
|
||||
self.publish_service_capabilities(ctxt)
|
||||
|
||||
# conditionally run replication status task
|
||||
|
||||
# FIXME(jdg): This should go away or be handled differently
|
||||
# if/when we're ready for V2 replication
|
||||
|
||||
stats = self.driver.get_volume_stats(refresh=True)
|
||||
if stats and stats.get('replication', False):
|
||||
|
||||
@ -413,6 +417,7 @@ class VolumeManager(manager.SchedulerDependentManager):
|
||||
self._update_replication_relationship_status(ctxt)
|
||||
|
||||
self.add_periodic_task(run_replication_task)
|
||||
|
||||
LOG.info(_LI("Driver initialization completed successfully."),
|
||||
resource={'type': 'driver',
|
||||
'id': self.driver.__class__.__name__})
|
||||
@ -1538,6 +1543,24 @@ class VolumeManager(manager.SchedulerDependentManager):
|
||||
# queue it to be sent to the Schedulers.
|
||||
self.update_service_capabilities(volume_stats)
|
||||
|
||||
if volume_stats.get('replication_enabled', False):
|
||||
# replciation_status provides a concise update of
|
||||
# replicating volumes and any error conditions
|
||||
# detected by the driver. The intent is we don't
|
||||
# expect/worry about updates so long as nothing
|
||||
# changes, but if something goes wrong this is a
|
||||
# handy mechanism to update the manager and the db
|
||||
# possibly let the admin/user be notified
|
||||
|
||||
# TODO(jdg): Refactor the check/update pieces to a
|
||||
# helper method we can share
|
||||
# We want to leverage some of the same update model
|
||||
# that we have in the targets update call
|
||||
|
||||
replication_updates = self.driver.get_replication_updates()
|
||||
for update in replication_updates:
|
||||
pass
|
||||
|
||||
def _append_volume_stats(self, vol_stats):
|
||||
pools = vol_stats.get('pools', None)
|
||||
if pools and isinstance(pools, list):
|
||||
@ -2706,3 +2729,204 @@ class VolumeManager(manager.SchedulerDependentManager):
|
||||
for key in model_update.iterkeys()}
|
||||
self.db.volume_update(ctxt.elevated(), new_volume['id'],
|
||||
model_update_new)
|
||||
|
||||
# Replication V2 methods
|
||||
def enable_replication(self, context, volume):
|
||||
"""Enable replication on a replication capable volume.
|
||||
|
||||
If the volume was created on a replication_enabled host this method
|
||||
is used to enable replication for the volume. Primarily used for
|
||||
testing and maintenance.
|
||||
|
||||
:param context: security context
|
||||
:param volume: volume object returned by DB
|
||||
"""
|
||||
|
||||
# NOTE(jdg): We're going to do fresh get from the DB and verify that
|
||||
# we are in an expected state ('enabling')
|
||||
volume = self.db.volume_get(context, volume['id'])
|
||||
if volume['replication_status'] != 'enabling':
|
||||
raise exception.InvalidVolume()
|
||||
|
||||
try:
|
||||
rep_driver_data = self.driver.replication_enable(context,
|
||||
volume)
|
||||
except exception.CinderException:
|
||||
err_msg = (_("Enable replication for volume failed."))
|
||||
LOG.exception(err_msg, resource=volume)
|
||||
raise exception.VolumeBackendAPIException(data=err_msg)
|
||||
try:
|
||||
if rep_driver_data:
|
||||
volume = self.db.volume_update(context,
|
||||
volume['id'],
|
||||
rep_driver_data)
|
||||
except exception.CinderException as ex:
|
||||
LOG.exception(_LE("Driver replication data update failed."),
|
||||
resource=volume)
|
||||
raise exception.VolumeBackendAPIException(reason=ex)
|
||||
self.db.volume_update(context, volume['id'],
|
||||
{'replication_status': 'enabled'})
|
||||
|
||||
def disable_replication(self, context, volume):
|
||||
"""Disable replication on the specified volume.
|
||||
|
||||
If the specified volume is currently replication enabled,
|
||||
this method can be used to disable the replication process
|
||||
on the backend. This method assumes that we checked
|
||||
replication status in the API layer to ensure we should
|
||||
send this call to the driver.
|
||||
|
||||
:param context: security context
|
||||
:param volume: volume object returned by DB
|
||||
"""
|
||||
|
||||
volume = self.db.volume_get(context, volume['id'])
|
||||
if volume['replication_status'] != 'disabling':
|
||||
raise exception.InvalidVolume()
|
||||
|
||||
try:
|
||||
rep_driver_data = self.driver.replication_disable(context,
|
||||
volume)
|
||||
except exception.CinderException:
|
||||
err_msg = (_("Disable replication for volume failed."))
|
||||
LOG.exception(err_msg, resource=volume)
|
||||
raise exception.VolumeBackendAPIException(data=err_msg)
|
||||
try:
|
||||
if rep_driver_data:
|
||||
volume = self.db.volume_update(context,
|
||||
volume['id'],
|
||||
rep_driver_data)
|
||||
except exception.CinderException as ex:
|
||||
LOG.exception(_LE("Driver replication data update failed."),
|
||||
resource=volume)
|
||||
raise exception.VolumeBackendAPIException(reason=ex)
|
||||
self.db.volume_update(context,
|
||||
volume['id'],
|
||||
{'replication_status': 'disabled'})
|
||||
|
||||
def failover_replication(self, context, volume, secondary=None):
|
||||
"""Force failover to a secondary replication target.
|
||||
|
||||
Forces the failover action of a replicated volume to one of its
|
||||
secondary/target devices. By default the choice of target devices
|
||||
is left up to the driver. In particular we expect one way
|
||||
replication here, but are providing a mechanism for 'n' way
|
||||
if supported/configrued.
|
||||
|
||||
Currently we leave it up to the driver to figure out how/what
|
||||
to do here. Rather than doing things like ID swaps, we instead
|
||||
just let the driver figure out how/where to route things.
|
||||
|
||||
In cases where we might want to drop a volume-service node and
|
||||
the replication target is a configured cinder backend, we'll
|
||||
just update the host column for the volume.
|
||||
|
||||
:param context: security context
|
||||
:param volume: volume object returned by DB
|
||||
:param secondary: Specifies rep target to fail over to
|
||||
"""
|
||||
try:
|
||||
volume_updates = self.driver.replication_failover(context,
|
||||
volume,
|
||||
secondary)
|
||||
|
||||
# volume_updates is a dict containing a report of relevant
|
||||
# items based on the backend and how it operates or what it needs
|
||||
# {'host': 'secondary-configured-cinder-backend',
|
||||
# 'model_update': {'update-all-the-provider-info-etc'},
|
||||
# 'replication_driver_data': 'driver-specific-stuff-for-db'}
|
||||
# Where 'host' is a valid cinder host string like
|
||||
# 'foo@bar#baz'
|
||||
# model_update and replication_driver_data are required
|
||||
|
||||
except exception.CinderException:
|
||||
|
||||
# FIXME(jdg): We need to create a few different exceptions here
|
||||
# and handle each differently:
|
||||
# 1. I couldn't failover, but the original setup is ok so proceed
|
||||
# as if this were never called
|
||||
# 2. I ran into a problem and I have no idea what state things
|
||||
# are in, so set volume to error
|
||||
# 3. I ran into a problem and a human needs to come fix me up
|
||||
|
||||
err_msg = (_("Replication failover for volume failed."))
|
||||
LOG.exception(err_msg, resource=volume)
|
||||
self.db.volume_update(context,
|
||||
volume['id'],
|
||||
{'replication_status': 'error'})
|
||||
raise exception.VolumeBackendAPIException(data=err_msg)
|
||||
|
||||
# TODO(jdg): Come back and condense thes into a single update
|
||||
update = {}
|
||||
model_update = volume_updates.get('model_update', None)
|
||||
driver_update = volume_updates.get('replication_driver_data', None)
|
||||
host_update = volume_updates.get('host', None)
|
||||
|
||||
if model_update:
|
||||
update['model'] = model_update
|
||||
if driver_update:
|
||||
update['replication_driver_data'] = driver_update
|
||||
if host_update:
|
||||
update['host'] = host_update
|
||||
|
||||
if update:
|
||||
try:
|
||||
volume = self.db.volume_update(
|
||||
context,
|
||||
volume['id'],
|
||||
update)
|
||||
|
||||
except exception.CinderException as ex:
|
||||
LOG.exception(_LE("Driver replication data update failed."),
|
||||
resource=volume)
|
||||
raise exception.VolumeBackendAPIException(reason=ex)
|
||||
|
||||
# NOTE(jdg): We're setting replication status to failed-over
|
||||
# which indicates the volume is ok, things went as epected but
|
||||
# we're likely not replicating any longer because... well we
|
||||
# did a fail-over. In the case of admin brining primary
|
||||
# back online he/she can use enable_replication to get this
|
||||
# state set back to enabled.
|
||||
|
||||
# Also, in the case of multiple targets, the driver can update
|
||||
# status in the rep-status checks if it still has valid replication
|
||||
# targets that the volume is being replicated to.
|
||||
|
||||
self.db.volume_update(context,
|
||||
volume['id'],
|
||||
{'replication_status': 'failed-over'})
|
||||
|
||||
def list_replication_targets(self, context, volume):
|
||||
"""Provide a means to obtain replication targets for a volume.
|
||||
|
||||
This method is used to query a backend to get the current
|
||||
replication config info for the specified volume.
|
||||
|
||||
In the case of a volume that isn't being replicated,
|
||||
the driver should return an empty list.
|
||||
|
||||
|
||||
Example response for replicating to a managed backend:
|
||||
{'volume_id': volume['id'],
|
||||
'targets':[{'managed_host': 'backend_name'}...]
|
||||
|
||||
Example response for replicating to an unmanaged backend:
|
||||
{'volume_id': volume['id'], 'targets':[{'san_ip': '1.1.1.1',
|
||||
'san_login': 'admin'},
|
||||
....]}
|
||||
|
||||
NOTE: It's the responsibility of the driver to mask out any
|
||||
passwords or sensitive information.
|
||||
|
||||
"""
|
||||
|
||||
try:
|
||||
replication_targets = self.driver.list_replication_targets(context,
|
||||
volume)
|
||||
|
||||
except exception.CinderException:
|
||||
err_msg = (_("Get replication targets failed."))
|
||||
LOG.exception(err_msg)
|
||||
raise exception.VolumeBackendAPIException(data=err_msg)
|
||||
|
||||
return replication_targets
|
||||
|
@ -72,6 +72,7 @@ class VolumeAPI(object):
|
||||
1.26 - Adds support for sending objects over RPC in
|
||||
create_consistencygroup(), create_consistencygroup_from_src(),
|
||||
update_consistencygroup() and delete_consistencygroup().
|
||||
1.27 - Adds support for replication V2
|
||||
"""
|
||||
|
||||
BASE_RPC_API_VERSION = '1.0'
|
||||
@ -81,7 +82,7 @@ class VolumeAPI(object):
|
||||
target = messaging.Target(topic=CONF.volume_topic,
|
||||
version=self.BASE_RPC_API_VERSION)
|
||||
serializer = objects_base.CinderObjectSerializer()
|
||||
self.client = rpc.get_client(target, '1.26', serializer=serializer)
|
||||
self.client = rpc.get_client(target, '1.27', serializer=serializer)
|
||||
|
||||
def create_consistencygroup(self, ctxt, group, host):
|
||||
new_host = utils.extract_host(host)
|
||||
@ -260,3 +261,29 @@ class VolumeAPI(object):
|
||||
volume=volume,
|
||||
new_volume=new_volume,
|
||||
volume_status=original_volume_status)
|
||||
|
||||
def enable_replication(self, ctxt, volume):
|
||||
new_host = utils.extract_host(volume['host'])
|
||||
cctxt = self.client.prepare(server=new_host, version='1.27')
|
||||
cctxt.cast(ctxt, 'enable_replication', volume=volume)
|
||||
|
||||
def disable_replication(self, ctxt, volume):
|
||||
new_host = utils.extract_host(volume['host'])
|
||||
cctxt = self.client.prepare(server=new_host, version='1.27')
|
||||
cctxt.cast(ctxt, 'disable_replication',
|
||||
volume=volume)
|
||||
|
||||
def failover_replication(self,
|
||||
ctxt,
|
||||
volume,
|
||||
secondary=None):
|
||||
new_host = utils.extract_host(volume['host'])
|
||||
cctxt = self.client.prepare(server=new_host, version='1.27')
|
||||
cctxt.cast(ctxt, 'failover_replication',
|
||||
volume=volume,
|
||||
secondary=secondary)
|
||||
|
||||
def list_replication_targets(self, ctxt, volume):
|
||||
new_host = utils.extract_host(volume['host'])
|
||||
cctxt = self.client.prepare(server=new_host, version='1.27')
|
||||
return cctxt.call(ctxt, 'list_replication_targets', volume=volume)
|
||||
|
@ -15,6 +15,7 @@
|
||||
"""Volume-related Utilities and helpers."""
|
||||
|
||||
|
||||
import ast
|
||||
import math
|
||||
import re
|
||||
import uuid
|
||||
@ -569,3 +570,27 @@ def check_already_managed_volume(db, vol_name):
|
||||
except (exception.VolumeNotFound, ValueError):
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
def convert_config_string_to_dict(config_string):
|
||||
"""Convert config file replication string to a dict.
|
||||
|
||||
The only supported form is as follows:
|
||||
"{'key-1'='val-1' 'key-2'='val-2'...}"
|
||||
|
||||
:param config_string: Properly formatted string to convert to dict.
|
||||
:response: dict of string values
|
||||
"""
|
||||
|
||||
resultant_dict = {}
|
||||
|
||||
try:
|
||||
st = config_string.replace("=", ":")
|
||||
st = st.replace(" ", ", ")
|
||||
resultant_dict = ast.literal_eval(st)
|
||||
except Exception:
|
||||
LOG.warning(_LW("Error encountered translating config_string: "
|
||||
"%(config_string)s to dict"),
|
||||
{'config_string': config_string})
|
||||
|
||||
return resultant_dict
|
||||
|
@ -31,6 +31,7 @@ Programming HowTos and Tutorials
|
||||
addmethod.openstackapi
|
||||
drivers
|
||||
gmr
|
||||
replication
|
||||
|
||||
|
||||
Background Concepts for Cinder
|
||||
|
166
doc/source/devref/replication.rst
Normal file
166
doc/source/devref/replication.rst
Normal file
@ -0,0 +1,166 @@
|
||||
Replication
|
||||
============
|
||||
|
||||
How to implement replication features in a backend driver.
|
||||
|
||||
For backend devices that offer replication features, Cinder
|
||||
provides a common mechanism for exposing that functionality
|
||||
on a volume per volume basis while still trying to allow
|
||||
flexibility for the varying implementation and requirements
|
||||
of all the different backend devices.
|
||||
|
||||
Most of the configuration is done via the cinder.conf file
|
||||
under the driver section and through the use of volume types.
|
||||
|
||||
Config file examples
|
||||
--------------------
|
||||
|
||||
The cinder.conf file is used to specify replication target
|
||||
devices for a specific driver. There are two types of target
|
||||
devices that can be configured:
|
||||
|
||||
1. Cinder Managed (represented by the volume-backend name)
|
||||
2. External devices (require vendor specific data to configure)
|
||||
|
||||
NOTE that it is expected to be an error to have both managed and unmanaged replication
|
||||
config variables set for a single driver.
|
||||
|
||||
Cinder managed target device
|
||||
-----------------------------
|
||||
|
||||
In the case of a Cinder managed target device, we simply
|
||||
use another Cinder configured backend as the replication
|
||||
target.
|
||||
|
||||
For example if we have two backend devices foo and biz that
|
||||
can replicate to each other, we can set up backend biz as
|
||||
a replication target for device foo using the following
|
||||
config entries::
|
||||
|
||||
.....
|
||||
[driver-biz]
|
||||
volume_driver=xxxx
|
||||
volume_backend_name=biz
|
||||
|
||||
[driver-foo]
|
||||
volume_driver=xxxx
|
||||
volume_backend_name=foo
|
||||
managed_replication_target=True
|
||||
replication_devices=volume_backend_name-1,volume_backend_name-2....
|
||||
|
||||
Notice that the only change from the usual driver configuration
|
||||
section here is the addition of the replication_devices option.
|
||||
|
||||
|
||||
Unmanaged target device
|
||||
------------------------
|
||||
|
||||
In some cases the replication target device may not be a
|
||||
configured Cinder backend. In this case it's the configured
|
||||
drivers responsibility to route commands to the active device
|
||||
and to update provider info to ensure the proper iSCSI targets
|
||||
are being used.
|
||||
|
||||
This type of config changes only slightly, and instead of using
|
||||
a backend_name, it takes the vendor unique config options::
|
||||
|
||||
.....
|
||||
[driver-foo]
|
||||
volume_driver=xxxx
|
||||
volume_backend_name=foo
|
||||
managed_replication_target=False
|
||||
replication_devices={'key1'='val1' 'key2'='val2' ...},
|
||||
{'key7'='val7'....},...
|
||||
|
||||
Note the key/value entries can be whatever the device requires, we treat the actual
|
||||
variable in the config parser as a comma delimited list, the {} and = notations are
|
||||
convenient/common parser delimeters, and the K/V entries are space seperated.
|
||||
|
||||
We provide a literal evaluator to convert these entries into a proper dict, thus
|
||||
format is extremely important here.
|
||||
|
||||
|
||||
Volume Types / Extra Specs
|
||||
---------------------------
|
||||
In order for a user to specify they'd like a replicated volume, there needs to be
|
||||
a corresponding Volume Type created by the Cloud Administrator.
|
||||
|
||||
There's a good deal of flexibility by using volume types. The scheduler can
|
||||
send the create request to a backend that provides replication by simply
|
||||
providing the replication=enabled key to the extra-specs of the volume type.
|
||||
|
||||
For example, if the type was set to simply create the volume on any (or if you only had one)
|
||||
backend that supports replication, the extra-specs entry would be::
|
||||
|
||||
{replication: enabled}
|
||||
|
||||
If you needed to provide a specific backend device (multiple backends supporting replication)::
|
||||
{replication: enabled, volume_backend_name: foo}
|
||||
|
||||
Additionally you could provide additional details using scoped keys::
|
||||
{replication: enabled, volume_backend_name: foo,
|
||||
replication:replication_type: async}
|
||||
|
||||
Again, it's up to the driver to parse the volume type info on create and set things up
|
||||
as requested. While the scoping key can be anything, it's strongly recommended that all
|
||||
backends utilize the same key (replication) for consistency and to make things easier for
|
||||
the Cloud Administrator.
|
||||
|
||||
Capabilities reporting
|
||||
----------------------
|
||||
The following entries are expected to be added to the stats/capabilities update for
|
||||
replication configured devices::
|
||||
|
||||
stats["replication_enabled"] = True|False
|
||||
stats["replication_type"] = ['async', 'sync'...]
|
||||
stats["replication_count"] = len(self.cluster_pairs)
|
||||
|
||||
Required methods
|
||||
-----------------
|
||||
The number of API methods associated with replication are intentionally very limited, and are
|
||||
Admin only methods.
|
||||
|
||||
They include::
|
||||
replication_enable(self, context, volume)
|
||||
replication_disable(self, context, volume)
|
||||
replication_failover(self, context, volume)
|
||||
list_replication_targets(self, context)
|
||||
|
||||
**replication_enable**
|
||||
|
||||
Used to notify the driver that we would like to enable replication on a replication capable volume.
|
||||
NOTE this is NOT used as the initial create replication command, that's handled by the volume-type at
|
||||
create time. This is provided as a method for an Admin that may have needed to disable replication
|
||||
on a volume for maintenance or whatever reason to signify that they'd like to "resume" replication on
|
||||
the given volume.
|
||||
|
||||
**replication_disable**
|
||||
|
||||
Used to notify the driver that we would like to disable replication on a replication capable volume.
|
||||
This again would be used by a Cloud Administrator for things like maintenance etc.
|
||||
|
||||
**replication_failover**
|
||||
|
||||
Used to instruct the backend to fail over to the secondary/target device on a replication capable volume.
|
||||
This may be used for triggering a fail-over manually or for testing purposes.
|
||||
|
||||
Note that ideally drivers will know how to update the volume reference properly so that Cinder is now
|
||||
pointing to the secondary. Also, while it's not required, at this time; ideally the command would
|
||||
act as a toggle, allowing to switch back and forth betweeen primary and secondary and back to primary.
|
||||
|
||||
**list_replication_targets**
|
||||
|
||||
Used by the admin to query a volume for a list of configured replication targets
|
||||
The expected return for this call is expeceted to mimic the form used in the config file.
|
||||
|
||||
For a volume replicating to managed replication targets::
|
||||
|
||||
{'volume_id': volume['id'], 'targets':[{'type': 'managed',
|
||||
'backend_name': 'backend_name'}...]
|
||||
|
||||
For a volume replicating to external/unmanaged targets::
|
||||
|
||||
{'volume_id': volume['id'], 'targets':[{'type': 'unmanaged',
|
||||
'san_ip': '127.0.0.1',
|
||||
'san_login': 'admin'...}...]
|
||||
|
@ -64,6 +64,11 @@
|
||||
"volume_extension:replication:promote": "rule:admin_api",
|
||||
"volume_extension:replication:reenable": "rule:admin_api",
|
||||
|
||||
"volume:enable_replication": "rule:admin_api",
|
||||
"volume:disable_replication": "rule:admin_api",
|
||||
"volume:failover_replication": "rule:admin_api",
|
||||
"volume:list_replication_targets": "rule:admin_api",
|
||||
|
||||
"backup:create" : "",
|
||||
"backup:delete": "",
|
||||
"backup:get": "",
|
||||
|
Loading…
x
Reference in New Issue
Block a user