
Now that we support having multiple c-vol services using the same storage backend under one cluster, they no longer clean all resources from the backend with ongoing statuses in the DB, only those from their own host because those are failed operations that were left "in the air" when the service was stopped. So we need a way to trigger the cleanup of resources that were being processed by another c-vol service that failed in the same cluster. This patch adds a new API endpoint (/workers/cleanup) that will trigger cleanup for c-vol services as microversion 3.19. The cleanup will be performed by other services that share the same cluster, so at least one of them must be up to be able to do the cleanup. Cleanup cannot be triggered during a cloud upgrade, but a restarted service will still cleanup it's own resources during an upgrade. If no arguments are provided cleanup will try to issue a clean message for all nodes that are down, but we can restrict which nodes we want to be cleaned using parameters `service_id`, `cluster_name`, `host`, `binary`, and `disabled`. Cleaning specific resources is also possible using `resource_type` and `resource_id` parameters. We can even force cleanup on nodes that are up with `is_up`, but that's not recommended and should only used if you know what you are doing. For example if you know a specific cinder-volume is down even though it's still not being reported as down when listing the services and you know the cluster has at least another service to do the cleanup. API will return a dictionary with 2 lists, one with services that have been issued a cleanup request (`cleaning` key) and another list with services that cannot be cleaned right now because there is no alternative service to do the cleanup in that cluster (`unavailable` key). Data returned for each service element in these two lists consist of the `id`, `host`, `binary`, and `cluster_name`. These are not the services that will be performing the cleanup, but the services that will be cleaned up or couldn't be cleaned up. Specs: https://specs.openstack.org/openstack/cinder-specs/specs/newton/ha-aa-cleanup.html APIImpact: New /workers/cleanup entry Implements: blueprint cinder-volume-active-active-support Change-Id: If336b6569b171846954ed6eb73f5a4314c6c7e2e
228 lines
9.4 KiB
Python
228 lines
9.4 KiB
Python
# Copyright 2012, Red Hat, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
"""
|
|
Client side of the scheduler manager RPC API.
|
|
"""
|
|
|
|
from oslo_serialization import jsonutils
|
|
from oslo_utils import timeutils
|
|
|
|
from cinder.common import constants
|
|
from cinder import exception
|
|
from cinder.i18n import _
|
|
from cinder import rpc
|
|
|
|
|
|
class SchedulerAPI(rpc.RPCAPI):
|
|
"""Client side of the scheduler RPC API.
|
|
|
|
API version history:
|
|
|
|
.. code-block:: none
|
|
|
|
1.0 - Initial version.
|
|
1.1 - Add create_volume() method
|
|
1.2 - Add request_spec, filter_properties arguments to
|
|
create_volume()
|
|
1.3 - Add migrate_volume_to_host() method
|
|
1.4 - Add retype method
|
|
1.5 - Add manage_existing method
|
|
1.6 - Add create_consistencygroup method
|
|
1.7 - Add get_active_pools method
|
|
1.8 - Add sending object over RPC in create_consistencygroup method
|
|
1.9 - Adds support for sending objects over RPC in create_volume()
|
|
1.10 - Adds support for sending objects over RPC in retype()
|
|
1.11 - Adds support for sending objects over RPC in
|
|
migrate_volume_to_host()
|
|
|
|
... Mitaka supports messaging 1.11. Any changes to existing methods in
|
|
1.x after this point should be done so that they can handle version cap
|
|
set to 1.11.
|
|
|
|
2.0 - Remove 1.x compatibility
|
|
2.1 - Adds support for sending objects over RPC in manage_existing()
|
|
2.2 - Sends request_spec as object in create_volume()
|
|
2.3 - Add create_group method
|
|
|
|
... Newton supports messaging 2.3. Any changes to existing methods in
|
|
2.x after this point should be done so that they can handle version cap
|
|
set to 2.3.
|
|
|
|
3.0 - Remove 2.x compatibility
|
|
3.1 - Adds notify_service_capabilities()
|
|
3.2 - Adds extend_volume()
|
|
3.3 - Add cluster support to migrate_volume, and to
|
|
update_service_capabilities and send the timestamp from the
|
|
capabilities.
|
|
3.4 - Adds work_cleanup and do_cleanup methods.
|
|
"""
|
|
|
|
RPC_API_VERSION = '3.4'
|
|
RPC_DEFAULT_VERSION = '3.0'
|
|
TOPIC = constants.SCHEDULER_TOPIC
|
|
BINARY = 'cinder-scheduler'
|
|
|
|
def create_consistencygroup(self, ctxt, group, request_spec_list=None,
|
|
filter_properties_list=None):
|
|
cctxt = self._get_cctxt()
|
|
request_spec_p_list = [jsonutils.to_primitive(rs)
|
|
for rs in request_spec_list]
|
|
msg_args = {
|
|
'group': group, 'request_spec_list': request_spec_p_list,
|
|
'filter_properties_list': filter_properties_list,
|
|
}
|
|
|
|
return cctxt.cast(ctxt, 'create_consistencygroup', **msg_args)
|
|
|
|
def create_group(self, ctxt, group, group_spec=None,
|
|
request_spec_list=None, group_filter_properties=None,
|
|
filter_properties_list=None):
|
|
cctxt = self._get_cctxt()
|
|
request_spec_p_list = [jsonutils.to_primitive(rs)
|
|
for rs in request_spec_list]
|
|
group_spec_p = jsonutils.to_primitive(group_spec)
|
|
msg_args = {
|
|
'group': group, 'group_spec': group_spec_p,
|
|
'request_spec_list': request_spec_p_list,
|
|
'group_filter_properties': group_filter_properties,
|
|
'filter_properties_list': filter_properties_list,
|
|
}
|
|
|
|
return cctxt.cast(ctxt, 'create_group', **msg_args)
|
|
|
|
def create_volume(self, ctxt, volume, snapshot_id=None, image_id=None,
|
|
request_spec=None, filter_properties=None):
|
|
volume.create_worker()
|
|
cctxt = self._get_cctxt()
|
|
msg_args = {'snapshot_id': snapshot_id, 'image_id': image_id,
|
|
'request_spec': request_spec,
|
|
'filter_properties': filter_properties, 'volume': volume}
|
|
return cctxt.cast(ctxt, 'create_volume', **msg_args)
|
|
|
|
def migrate_volume(self, ctxt, volume, backend, force_copy=False,
|
|
request_spec=None, filter_properties=None):
|
|
request_spec_p = jsonutils.to_primitive(request_spec)
|
|
msg_args = {'request_spec': request_spec_p,
|
|
'filter_properties': filter_properties, 'volume': volume}
|
|
version = '3.3'
|
|
if self.client.can_send_version(version):
|
|
msg_args['backend'] = backend
|
|
msg_args['force_copy'] = force_copy
|
|
method = 'migrate_volume'
|
|
else:
|
|
version = '3.0'
|
|
msg_args['host'] = backend
|
|
msg_args['force_host_copy'] = force_copy
|
|
method = 'migrate_volume_to_host'
|
|
|
|
cctxt = self._get_cctxt(version=version)
|
|
return cctxt.cast(ctxt, method, **msg_args)
|
|
|
|
def retype(self, ctxt, volume, request_spec=None, filter_properties=None):
|
|
cctxt = self._get_cctxt()
|
|
request_spec_p = jsonutils.to_primitive(request_spec)
|
|
msg_args = {'request_spec': request_spec_p,
|
|
'filter_properties': filter_properties, 'volume': volume}
|
|
return cctxt.cast(ctxt, 'retype', **msg_args)
|
|
|
|
def manage_existing(self, ctxt, volume, request_spec=None,
|
|
filter_properties=None):
|
|
cctxt = self._get_cctxt()
|
|
request_spec_p = jsonutils.to_primitive(request_spec)
|
|
msg_args = {
|
|
'request_spec': request_spec_p,
|
|
'filter_properties': filter_properties, 'volume': volume,
|
|
}
|
|
return cctxt.cast(ctxt, 'manage_existing', **msg_args)
|
|
|
|
def extend_volume(self, ctxt, volume, new_size, reservations,
|
|
request_spec, filter_properties=None):
|
|
cctxt = self._get_cctxt()
|
|
if not cctxt.can_send_version('3.2'):
|
|
msg = _('extend_volume requires cinder-scheduler '
|
|
'RPC API version >= 3.2.')
|
|
raise exception.ServiceTooOld(msg)
|
|
|
|
request_spec_p = jsonutils.to_primitive(request_spec)
|
|
msg_args = {
|
|
'volume': volume,
|
|
'new_size': new_size,
|
|
'reservations': reservations,
|
|
'request_spec': request_spec_p,
|
|
'filter_properties': filter_properties,
|
|
}
|
|
|
|
return cctxt.cast(ctxt, 'extend_volume', **msg_args)
|
|
|
|
def get_pools(self, ctxt, filters=None):
|
|
cctxt = self._get_cctxt()
|
|
return cctxt.call(ctxt, 'get_pools', filters=filters)
|
|
|
|
def update_service_capabilities(self, ctxt, service_name, host,
|
|
capabilities, cluster_name,
|
|
timestamp=None):
|
|
msg_args = dict(service_name=service_name, host=host,
|
|
capabilities=capabilities)
|
|
|
|
version = '3.3'
|
|
# If server accepts timestamping the capabilities and the cluster name
|
|
if self.client.can_send_version(version):
|
|
# Serialize the timestamp
|
|
timestamp = timestamp or timeutils.utcnow()
|
|
msg_args.update(cluster_name=cluster_name,
|
|
timestamp=jsonutils.to_primitive(timestamp))
|
|
else:
|
|
version = '3.0'
|
|
|
|
cctxt = self._get_cctxt(fanout=True, version=version)
|
|
cctxt.cast(ctxt, 'update_service_capabilities', **msg_args)
|
|
|
|
def notify_service_capabilities(self, ctxt, service_name,
|
|
host, capabilities):
|
|
# TODO(geguileo): Make this work with Active/Active
|
|
cctxt = self._get_cctxt(version='3.1')
|
|
if not cctxt.can_send_version('3.1'):
|
|
msg = _('notify_service_capabilities requires cinder-scheduler '
|
|
'RPC API version >= 3.1.')
|
|
raise exception.ServiceTooOld(msg)
|
|
cctxt.cast(ctxt, 'notify_service_capabilities',
|
|
service_name=service_name, host=host,
|
|
capabilities=capabilities)
|
|
|
|
def work_cleanup(self, ctxt, cleanup_request):
|
|
"""Generate individual service cleanup requests from user request."""
|
|
if not self.client.can_send_version('3.4'):
|
|
msg = _('One of cinder-scheduler services is too old to accept '
|
|
'such request. Are you running mixed Newton-Ocata'
|
|
'cinder-schedulers?')
|
|
raise exception.ServiceTooOld(msg)
|
|
|
|
cctxt = self.client.prepare(version='3.4')
|
|
# Response will have services that are receiving the cleanup request
|
|
# and services that couldn't receive it since they are down.
|
|
return cctxt.call(ctxt, 'work_cleanup',
|
|
cleanup_request=cleanup_request)
|
|
|
|
def do_cleanup(self, ctxt, cleanup_request):
|
|
"""Perform this scheduler's resource cleanup as per cleanup_request."""
|
|
if not self.client.can_send_version('3.4'):
|
|
msg = _('One of cinder-scheduler services is too old to accept '
|
|
'such request. Are you running mixed Newton-Ocata'
|
|
'cinder-schedulers?')
|
|
raise exception.ServiceTooOld(msg)
|
|
|
|
cctxt = self.client.prepare(version='3.4')
|
|
cctxt.cast(ctxt, 'do_cleanup', cleanup_request=cleanup_request)
|