Support new storage driver for backup objects

Story: 2010956
Task: 49038

Change-Id: Ia3283a547f14547b0d7c595cc8c0ff8cbc0b88c5
This commit is contained in:
Bo Tran 2023-12-12 16:36:03 +07:00 committed by wu.chunyang
parent e9fa651ac0
commit e4113c556f
34 changed files with 850 additions and 70 deletions

View File

@ -54,6 +54,7 @@ Response Parameters
- status: backup_status
- updated: updated
- project_id: project_uuid
- storage_driver: storage_driver
Response Example
@ -104,6 +105,7 @@ Request
- description: backup_description
- swift_container: swift_container
- restore_from: backup_restore_from
- storage_driver: storage_driver
Request Example
---------------
@ -132,6 +134,7 @@ Response Parameters
- status: backup_status
- updated: updated
- project_id: project_uuid
- storage_driver: storage_driver
Response Example
@ -181,6 +184,7 @@ Response Parameters
- status: backup_status
- updated: updated
- project_id: project_uuid
- storage_driver: storage_driver
Response Example

View File

@ -855,6 +855,12 @@ slave_of:
in: body
required: false
type: string
storage_driver:
description: |
The backup strategy
type: string
in: body
required: false
swift_container:
description: |
User defined swift container name. When creating backups, the swift

View File

@ -3,6 +3,7 @@
"description": "My Backup",
"incremental": 0,
"instance": "44b277eb-39be-4921-be31-3d61b43651d7",
"name": "snapshot"
"name": "snapshot",
"storage_driver": "cinder"
}
}

View File

@ -14,6 +14,7 @@
"parent_id": null,
"size": null,
"status": "NEW",
"updated": "2014-10-30T12:30:00"
"updated": "2014-10-30T12:30:00",
"storage_driver": "cinder"
}
}

View File

@ -14,6 +14,7 @@
"parent_id": null,
"size": 0.14,
"status": "COMPLETED",
"updated": "2014-10-30T12:30:00"
"updated": "2014-10-30T12:30:00",
"storage_driver": "cinder"
}
}

View File

@ -15,7 +15,8 @@
"parent_id": null,
"size": 0.14,
"status": "COMPLETED",
"updated": "2014-10-30T12:30:00"
"updated": "2014-10-30T12:30:00",
"storage_driver": "cinder"
},
{
"created": "2014-10-30T12:30:00",
@ -32,7 +33,8 @@
"parent_id": "a9832168-7541-4536-b8d9-a8a9b79cf1b4",
"size": 0.14,
"status": "COMPLETED",
"updated": "2014-10-30T12:30:00"
"updated": "2014-10-30T12:30:00",
"storage_driver": "swift"
}
]
}

View File

@ -365,6 +365,48 @@ your-registry/your-repo/db-backup-mariadb:10.3 & your-registry/your-repo/db-back
Finally, when trove-guestagent does backup/restore, it will pull this image with the tag equals datastore version.
Configure backup storage strategy
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
By default, trove uses swift as backup storage backend. when using the swift storage backend, it's relatively slow to
backup and restore instances. as trove guest agent needs to run backup container first, then do backup action and
save it to swift finally.
trove also supports cinder storage strategy. Compare to Swift, Cinder can do backup/restore action quickly, because
guest agent doesn't need to do backup in instance, instead, Trove taskmanager calls Cinder api to do a snapshot for
the data disk. and then restore from the snapshot later.
pros and cons of swift and cinder:
* Swift backend supports restoring from any places that are accessible but more slow.
* Cinder backend backs up more quickly but doesn't support restore across storage system.
administrator can configure different strategies for backing up and replications according to the demands.
For example:
Setting the relevant opts in `/etc/trove/trove-guestagent.conf`
.. path /etc/trove/trove-guestagent.conf
.. code-block:: ini
[DEFAULT]
storage_strategy = swift
replica_snapshot_driver = cinder
Setting the relevant opts in `/etc/trove/trove.conf` as well.
.. path /etc/trove/trove.conf
.. code-block:: ini
[DEFAULT]
storage_strategy = swift
replica_snapshot_driver = cinder
.. note::
administrator needs to set these opts in both `trove-geustagent.conf` and `trove.conf`
Initialize Trove Database
~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -0,0 +1,6 @@
---
features:
- |
Add support of new backup storage driver "cinder".
We can provision a new replica instance or backup fast
if we use cinder as the backend storage.

View File

@ -52,7 +52,7 @@ class Backup(object):
@classmethod
def create(cls, context, instance, name, description=None,
parent_id=None, incremental=False, swift_container=None,
restore_from=None):
restore_from=None, storage_driver=None):
"""
create db record for Backup
:param cls:
@ -66,6 +66,8 @@ class Backup(object):
:param swift_container: Swift container name.
:param restore_from: A dict that contains backup information of another
region.
:param storage_driver: The storage driver - that being use
to save backup data.
:return:
"""
backup_state = BackupState.NEW
@ -77,6 +79,9 @@ class Backup(object):
backup_type = constants.BACKUP_TYPE_FULL
size = None
if not storage_driver:
storage_driver = CONF.storage_strategy
if restore_from:
# Check location and datastore version.
LOG.info(f"Restoring backup, restore_from: {restore_from}")
@ -112,7 +117,8 @@ class Backup(object):
cls.validate_can_perform_action(instance_model, 'backup_create')
cls.verify_swift_auth_token(context)
if storage_driver == "swift":
cls.verify_swift_auth_token(context)
ds = instance_model.datastore
ds_version = instance_model.datastore_version
@ -151,7 +157,8 @@ class Backup(object):
location=location,
checksum=checksum,
backup_type=backup_type,
size=size
size=size,
storage_driver=storage_driver
)
except exception.InvalidModelError as ex:
LOG.exception("Unable to create backup record for "
@ -169,6 +176,7 @@ class Backup(object):
'parent': parent,
'datastore': ds.name,
'datastore_version': ds_version.name,
'storage_driver': storage_driver,
'swift_container': swift_container
}
api.API(context).create_backup(backup_info, instance_id)
@ -412,7 +420,7 @@ class DBBackup(DatabaseModelBase):
'size', 'tenant_id', 'state', 'instance_id',
'checksum', 'backup_timestamp', 'deleted', 'created',
'updated', 'deleted_at', 'parent_id',
'datastore_version_id']
'datastore_version_id', 'storage_driver']
_table_name = 'backups'
@property
@ -457,6 +465,16 @@ class DBBackup(DatabaseModelBase):
return datastore_models.DatastoreVersion.load_by_uuid(
self.datastore_version_id)
@property
def is_snapshot(self):
if not self.location:
return False
if self.location.startswith("cinder://"):
return True
return False
def check_swift_object_exist(self, context, verify_checksum=False):
try:
parts = self.location.split('/')
@ -481,6 +499,24 @@ class DBBackup(DatabaseModelBase):
else:
raise exception.SwiftAuthError(tenant_id=context.project_id)
def check_volume_snapshot_exist(self, context, verify_checksum=False):
try:
snapshot_id = self.location.split('/')[-1]
client = clients.create_cinder_client(context)
LOG.debug("Checking if backup exists in %s", self.location)
client.volume_snapshots.get(snapshot_id)
return True
except Exception as e:
LOG.error(e)
return False
def check_location_exist(self, context, verify_checksum=False):
if self.is_snapshot:
return self.check_volume_snapshot_exist(context, verify_checksum)
return self.check_swift_object_exist(context, verify_checksum)
class DBBackupStrategy(DatabaseModelBase):
"""A table for backup strategy records."""

View File

@ -87,6 +87,7 @@ class BackupController(wsgi.Controller):
incremental = data.get('incremental')
swift_container = data.get('swift_container')
restore_from = data.get('restore_from')
storage_driver = data.get('storage_driver')
if swift_container:
utils.validate_command(swift_container)
@ -108,7 +109,8 @@ class BackupController(wsgi.Controller):
backup = Backup.create(context, instance, name, desc,
parent_id=parent, incremental=incremental,
swift_container=swift_container,
restore_from=restore_from)
restore_from=restore_from,
storage_driver=storage_driver)
return wsgi.Result(views.BackupView(backup).data(), 202)

View File

@ -536,7 +536,16 @@ common_opts = [
cfg.BoolOpt(
'enable_volume_az', default=False,
help='If true create the volume in the same availability-zone as the '
'instance')
'instance'),
cfg.StrOpt(
'replica_snapshot_driver',
choices=['cinder', 'swift'],
help='by default, this is the same as storage_strategy. it may be '
'useful to set to different driver to avoid the limitation of '
'backup drivers. for example, we can set to cinder when '
'storage_strate is swift to increase the backup speed, and '
'set to swift when storage_strate is cinder to support backup '
'cross az or region'),
]
# Mysql

View File

@ -0,0 +1,50 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""add_storage_driver_to_backups_table
Revision ID: 7ee6154548a6
Revises: cee1bcba3541
Create Date: 2024-06-18 16:14:38.561592
"""
from typing import Sequence, Union
from alembic import op
from sqlalchemy import Column
from sqlalchemy import Text, String
from sqlalchemy.sql import table, column
# revision identifiers, used by Alembic.
revision: str = '7ee6154548a6'
down_revision: Union[str, None] = 'cee1bcba3541'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column("backups", Column('storage_driver', Text(),
nullable=True))
backups_tables = table(
"backups",
column("storage_driver", String)
)
op.execute(
backups_tables.update()
.where(backups_tables.c.storage_driver.is_(None))
.values({"storage_driver": "swift"})
)
def downgrade() -> None:
op.drop_column("backups", 'storage_driver')

View File

@ -46,7 +46,7 @@ class API(object):
# API_LATEST_VERSION should bump the minor number each time
# a method signature is added or changed
API_LATEST_VERSION = '1.1'
API_LATEST_VERSION = '1.2'
# API_BASE_VERSION should only change on major version upgrade
API_BASE_VERSION = '1.0'
@ -60,6 +60,7 @@ class API(object):
'newton': '1.0',
'ussuri': '1.0',
'victoria': '1.1',
'epoxy': '1.2',
'latest': API_LATEST_VERSION
}
@ -699,3 +700,23 @@ class API(object):
self._cast("rebuild", version=version,
ds_version=ds_version, config_contents=config_contents,
config_overrides=config_overrides)
def pre_create_backup(self, **kwargs):
version = "1.2"
if not self.client.can_send_version(version):
raise exception.GuestError(
original_message="Operation requires guest version 1.2 or "
"later")
return self._call("pre_create_backup", self.agent_high_timeout,
version=version, **kwargs)
def post_create_backup(self, **kwargs):
version = "1.2"
if not self.client.can_send_version(version):
raise exception.GuestError(
original_message="Operation requires guest version 1.2 or "
"later")
return self._call("post_create_backup", self.agent_high_timeout,
version=version, **kwargs)

View File

@ -921,3 +921,27 @@ def get_filesystem_size(path):
"""
ret = os.statvfs(path)
return ret.f_blocks * ret.f_frsize
def sync(path):
"""Sync path files to disk."""
if not exists(path, is_directory=True, as_root=True):
raise exception.UnprocessableEntity(
_("Invalid path: %s") % path)
execute_shell_cmd('sync', [], path, as_root=True)
def fsfreeze(mount_point):
"""Freeze mount point file system"""
if not exists(mount_point, is_directory=True, as_root=True):
raise exception.UnprocessableEntity(
_("Invalid path: %s") % mount_point)
execute_shell_cmd('fsfreeze', [('f', True)], mount_point, as_root=True)
def fsunfreeze(mount_point):
"""Unfreeze mount point file system"""
if not exists(mount_point, is_directory=True, as_root=True):
raise exception.UnprocessableEntity(
_("Invalid path: %s") % mount_point)
execute_shell_cmd('fsfreeze', [('u', True)], mount_point, as_root=True)

View File

@ -792,7 +792,13 @@ class Manager(periodic_task.PeriodicTasks):
'encrypted backup.')
try:
self.app.restore_backup(context, backup_info, restore_location)
storage_driver = backup_info.get(
'storage_driver', CONF.storage_strategy)
if storage_driver in ["cinder"]:
self.app.restore_snapshot(
context, backup_info, restore_location)
else:
self.app.restore_backup(context, backup_info, restore_location)
except Exception:
LOG.error("Failed to restore from backup %s.", backup_info['id'])
self.status.set_status(service_status.ServiceStatuses.FAILED)
@ -941,3 +947,32 @@ class Manager(periodic_task.PeriodicTasks):
def wait_for_txn(self, context, txn):
raise exception.DatastoreOperationNotSupported(
operation='wait_for_txn', datastore=self.manager)
def pre_create_backup(self, context, **kwargs):
"""This is called before do create backup without drivers
that do in guest
"""
LOG.info('No pre_create_backup work has been defined.')
try:
mount_point = CONF.get(CONF.datastore_manager).mount_point
# Sync Disk
operating_system.sync(mount_point)
# Freeze FS
operating_system.fsfreeze(mount_point)
except Exception as e:
LOG.error("Run pre_create_backup failed, error: %s" % str(e))
return {}
def post_create_backup(self, context, **kwargs):
"""This is called after do create backup without drivers
that do in guest
"""
LOG.info('No post_create_backup work has been defined.')
try:
mount_point = CONF.get(CONF.datastore_manager).mount_point
operating_system.fsunfreeze(mount_point)
except Exception as e:
LOG.error("Run post_create_backup failed, error: %s" % str(e))
return {}

View File

@ -1,4 +1,5 @@
# Copyright 2020 Catalyst Cloud
# Copyright 2023 Bizfly Cloud
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -11,11 +12,22 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo_log import log as logging
from trove.common import cfg
from trove.common import exception
from trove.guestagent.common import operating_system
from trove.guestagent.datastore.mariadb import service
from trove.guestagent.datastore.mysql_common import manager
from trove.guestagent.datastore.mysql_common import service as mysql_service
CONF = cfg.CONF
LOG = logging.getLogger(__name__)
class Manager(manager.MySqlManager):
def __init__(self):
status = mysql_service.BaseMySqlAppStatus(self.docker_client)
@ -32,3 +44,36 @@ class Manager(manager.MySqlManager):
"""
return (f'--ignore-db-dir=lost+found --ignore-db-dir=conf.d '
f'--datadir={data_dir}')
def pre_create_backup(self, context, **kwargs):
LOG.info("Running pre_create_backup")
status = {}
try:
INFO_FILE = "%s/xtrabackup_binlog_info" % self.app.get_data_dir()
self.app.execute_sql("FLUSH TABLES WITH READ LOCK;")
stt = self.app.execute_sql("SHOW MASTER STATUS;")
for row in stt:
status = {
'log_file': row._mapping['File'],
'log_position': row._mapping['Position']
}
for g in self.app.execute_sql(
"select @@global.gtid_current_pos;"):
gtid = g._mapping['@@global.gtid_current_pos']
status['log_executed_gtid_set'] = gtid
binlog = "\t".join(map(str, [
status['log_file'],
status['log_position'],
status['log_executed_gtid_set']]))
operating_system.write_file(INFO_FILE, binlog, as_root=True)
mount_point = CONF.get(CONF.datastore_manager).mount_point
operating_system.sync(mount_point)
operating_system.fsfreeze(mount_point)
except Exception as e:
LOG.error("Run pre_create_backup failed, error: %s" % str(e))
raise exception.BackupCreationError(str(e))
return status

View File

@ -16,11 +16,16 @@
from oslo_log import log as logging
from trove.common import cfg
from trove.common import constants
from trove.common import exception
from trove.common import utils
from trove.guestagent.datastore.mysql_common import service as mysql_service
from trove.guestagent.utils import docker as docker_util
from trove.guestagent.utils import mysql as mysql_util
CONF = cfg.CONF
LOG = logging.getLogger(__name__)
@ -80,6 +85,32 @@ class MariaDBApp(mysql_service.BaseMySqlApp):
with mysql_util.SqlClient(self.get_engine()) as client:
client.execute(cmd)
def reset_data_for_restore_snapshot(self, data_dir):
"""This function try remove slave status in database"""
command = "mysqld --skip-slave-start=ON --datadir=%s" % data_dir
extra_volumes = {
"/etc/mysql": {"bind": "/etc/mysql", "mode": "rw"},
constants.MYSQL_HOST_SOCKET_PATH: {
"bind": "/var/run/mysqld", "mode": "rw"},
data_dir: {"bind": data_dir, "mode": "rw"},
}
try:
self.start_db(ds_version=CONF.datastore_version, command=command,
extra_volumes=extra_volumes)
self.stop_slave(for_failover=False)
except Exception as e:
LOG.error("Failed to start db to restore snapshot: %s", str(e))
finally:
try:
LOG.debug(
'The init container log: %s',
docker_util.get_container_logs(self.docker_client))
docker_util.remove_container(self.docker_client)
except Exception as err:
LOG.error('Failed to remove container. error: %s', str(err))
class MariaDBRootAccess(mysql_service.BaseMySqlRootAccess):
def __init__(self, app):

View File

@ -13,11 +13,19 @@
# limitations under the License.
import semantic_version
from oslo_log import log as logging
from trove.common import cfg
from trove.common import exception
from trove.guestagent.common import operating_system
from trove.guestagent.datastore.mysql import service
from trove.guestagent.datastore.mysql_common import manager
CONF = cfg.CONF
LOG = logging.getLogger(__name__)
class Manager(manager.MySqlManager):
@ -48,3 +56,32 @@ class Manager(manager.MySqlManager):
f"--ignore-db-dir=conf.d")
return params
def pre_create_backup(self, context, **kwargs):
LOG.info("Running pre_create_backup")
status = {}
try:
INFO_FILE = "%s/xtrabackup_binlog_info" % self.app.get_data_dir()
self.app.execute_sql("FLUSH TABLES WITH READ LOCK;")
stt = self.app.execute_sql("SHOW MASTER STATUS;")
for row in stt:
status = {
'log_file': row._mapping['File'],
'log_position': row._mapping['Position'],
'log_executed_gtid_set': row._mapping['Executed_Gtid_Set'],
}
binlog = "\t".join(map(str, [
status['log_file'],
status['log_position'],
status['log_executed_gtid_set']]))
operating_system.write_file(INFO_FILE, binlog, as_root=True)
mount_point = CONF.get(CONF.datastore_manager).mount_point
operating_system.sync(mount_point)
operating_system.fsfreeze(mount_point)
except Exception as e:
LOG.error("Run pre_create_backup failed, error: %s" % str(e))
raise exception.BackupCreationError(str(e))
return status

View File

@ -15,11 +15,16 @@ import semantic_version
from sqlalchemy.sql.expression import text
from oslo_log import log as logging
from trove.common import cfg
from trove.common import constants
from trove.guestagent.datastore.mysql_common import service
from trove.guestagent.utils import docker as docker_util
from trove.guestagent.utils import mysql as mysql_util
CONF = cfg.CONF
LOG = logging.getLogger(__name__)
class MySqlAppStatus(service.BaseMySqlAppStatus):
@ -96,6 +101,36 @@ class MySqlApp(service.BaseMySqlApp):
return strategy
def reset_data_for_restore_snapshot(self, data_dir):
"""This function try remove slave status in database"""
mysql_8 = semantic_version.Version('8.0.0')
cur_ver = semantic_version.Version.coerce(CONF.datastore_version)
command = "mysqld --skip-slave-start=ON --datadir=%s" % data_dir
if cur_ver >= mysql_8:
command = "mysqld --skip-replica-start=ON --datadir=%s" % data_dir
extra_volumes = {
"/etc/mysql": {"bind": "/etc/mysql", "mode": "rw"},
constants.MYSQL_HOST_SOCKET_PATH: {
"bind": "/var/run/mysqld", "mode": "rw"},
data_dir: {"bind": data_dir, "mode": "rw"},
}
try:
self.start_db(ds_version=CONF.datastore_version, command=command,
extra_volumes=extra_volumes)
self.stop_slave(for_failover=False)
except Exception as e:
LOG.error("Failed to start db to restore snapshot: %s", str(e))
finally:
try:
LOG.debug(
'The init container log: %s',
docker_util.get_container_logs(self.docker_client))
docker_util.remove_container(self.docker_client)
except Exception as err:
LOG.error('Failed to remove container. error: %s', str(err))
class MySqlRootAccess(service.BaseMySqlRootAccess):
def __init__(self, app):

View File

@ -363,3 +363,15 @@ class MySqlManager(manager.Manager):
raise
finally:
self.status.end_install(error_occurred=self.prepare_error)
def post_create_backup(self, context, **kwargs):
LOG.info("Running post_create_backup")
try:
self.app.execute_sql("UNLOCK TABLES;")
mount_point = CONF.get(CONF.datastore_manager).mount_point
operating_system.fsunfreeze(mount_point)
except Exception as e:
LOG.error("Run post_create_backup failed, error: %s" % str(e))
return {}

View File

@ -698,7 +698,7 @@ class BaseMySqlApp(service.BaseDbApp):
def restore_backup(self, context, backup_info, restore_location):
backup_id = backup_info['id']
storage_driver = CONF.storage_strategy
storage_driver = backup_info.get('storage_driver', 'swift')
backup_driver = self.get_backup_strategy()
user_token = context.auth_token
auth_url = CONF.service_credentials.auth_url
@ -855,6 +855,29 @@ class BaseMySqlApp(service.BaseDbApp):
new_version)
self.start_db(update_db=True, ds_version=new_version)
def restore_snapshot(self, context, backup_info, restore_location):
LOG.info('Doing restore snapshot.')
backup_id = backup_info['id']
LOG.debug('Stop the database before restore from %s', backup_id)
self.stop_db()
LOG.debug('Deleting ib_logfile files before restore from snapshot %s',
backup_id)
operating_system.chown(restore_location, self.database_service_uid,
self.database_service_gid, force=True,
as_root=True)
files = [
"%s/%s.cnf" % (guestagent_utils.get_conf_dir(), ADMIN_USER_NAME),
"%s/%s.cnf" % (self.get_data_dir(), 'auto')
]
for file in files:
operating_system.remove(
path=file, force=True, recursive=True, as_root=True)
# Start to run restore inside a separate docker container
LOG.info('Starting to restore snapshot %s', backup_id)
self.reset_data_for_restore_snapshot(restore_location)
class BaseMySqlRootAccess(object):

View File

@ -16,6 +16,7 @@ import re
from oslo_log import log as logging
from oslo_service import periodic_task
import semantic_version
from trove.common import cfg
from trove.common import constants
@ -29,6 +30,7 @@ from trove.guestagent import guest_log
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
PG_FILE_AUTOCONF = 'postgresql.auto.conf'
class PostgresManager(manager.Manager):
@ -143,7 +145,9 @@ class PostgresManager(manager.Manager):
# Restore data from backup and reset root password
if backup_info:
self.perform_restore(context, self.app.datadir, backup_info)
if not snapshot:
is_swift = backup_info.get(
'storage_driver', 'swift') == "swift"
if not snapshot and is_swift:
signal_file = f"{self.app.datadir}/recovery.signal"
operating_system.execute_shell_cmd(
f"touch {signal_file}", [], shell=True, as_root=True)
@ -301,3 +305,75 @@ class PostgresManager(manager.Manager):
raise
finally:
self.status.end_install(error_occurred=self.prepare_error)
def pre_create_backup(self, context, **kwargs):
LOG.info("Running pre_create_backup")
cur_version = semantic_version.Version.coerce(CONF.datastore_version)
def _start_backup():
# See this commit:
# https://git.postgresql.org/gitweb/?p=postgresql.git;
# a=commit;h=58c41712d55fadd35477b2ec3a02d12eca2bfbf2
# Avoid:
# https://www.postgresql.org/message-id/
# CAB7nPqTQ7KkijePPtXjGQ65QunKx_KQfc03AzBnO5
# %2B4bLSbObQ%40mail.gmail.com
if not self.app.is_replica():
if cur_version < semantic_version.Version('15.0.0'):
cmd = "SELECT pg_start_backup('snapshot backup', true)"
else:
cmd = "SELECT pg_backup_start('snapshot backup', true)"
self.app.adm.query(cmd)
else:
self.app.adm.psql("CHECKPOINT;")
# Advoid:
# https://www.postgresql.org/message-id/
# 20220203094727.w3ca3sukfu5xu7hk%40jrouhaud
autoconf_file = (f"{self.app.datadir}/"
f"{PG_FILE_AUTOCONF}")
cmd = "SHOW max_connections;"
result = self.app.adm.query(cmd)[0][0]
max_connections = f'max_connections={result}'
operating_system.write_file(autoconf_file,
max_connections, as_root=True)
_start_backup()
try:
mount_point = CONF.get(CONF.datastore_manager).mount_point
operating_system.sync(mount_point)
operating_system.fsfreeze(mount_point)
except Exception as e:
LOG.error("Run pre_create_backup failed, error: %s" % str(e))
raise exception.BackupCreationError(str(e))
return {}
def post_create_backup(self, context, **kwargs):
"""This is called after do create backup without drivers
that do in guest
"""
cur_version = semantic_version.Version.coerce(CONF.datastore_version)
def _stop_backup():
try:
if cur_version < semantic_version.Version('15.0.0'):
command = "SELECT pg_stop_backup(true)"
else:
command = "SELECT pg_backup_stop(true)"
self.app.adm.query(command)
except Exception as e:
LOG.error("Run _stop_backup failed, error: %s" % str(e))
try:
mount_point = CONF.get(CONF.datastore_manager).mount_point
operating_system.fsunfreeze(mount_point)
except Exception as e:
LOG.warning('Failed to run post_create_backup %s' % e)
raise exception.BackupCreationError(str(e))
_stop_backup()
return {}

View File

@ -264,7 +264,7 @@ class PgSqlApp(service.BaseDbApp):
def restore_backup(self, context, backup_info, restore_location):
backup_id = backup_info['id']
storage_driver = CONF.storage_strategy
storage_driver = backup_info.get('storage_driver', 'swift')
backup_driver = self.get_backup_strategy()
image = self.get_backup_image()
name = 'db_restore'
@ -366,6 +366,18 @@ class PgSqlApp(service.BaseDbApp):
LOG.error(msg)
raise Exception(msg)
def reset_data_for_restore_snapshot(self, restore_location):
LOG.info('Run reset_data_for_restore_snapshot work has been defined.')
files = [
f"{self.datadir}/recovery.signal"
]
for file in files:
try:
operating_system.remove(
path=file, force=True, recursive=True, as_root=True)
except Exception as e:
LOG.warning(e)
class PgSqlAdmin(object):
# Default set of options of an administrative account.

View File

@ -447,7 +447,8 @@ class BaseDbApp(object):
def create_backup(self, context, backup_info, volumes_mapping={},
need_dbuser=True, extra_params=''):
storage_driver = CONF.storage_strategy
storage_driver = backup_info.get(
'storage_driver', CONF.storage_strategy)
backup_driver = self.get_backup_strategy()
incremental = ''
backup_type = 'full'
@ -572,3 +573,11 @@ class BaseDbApp(object):
def database_service_gid(self):
return cfg.get_configuration_property(
'database_service_gid') or self.database_service_uid
def restore_snapshot(self, context, backup_info, restore_location):
LOG.info('Doing restore snapshot.')
self.reset_data_for_restore_snapshot(restore_location)
def reset_data_for_restore_snapshot(self, restore_location):
LOG.info('No reset_data_for_restore_snapshot work has been defined.')
pass

View File

@ -99,8 +99,10 @@ class MysqlReplicationBase(base.Replication):
"mode": "ro"},
'/tmp': {'bind': '/tmp', 'mode': 'rw'}
}
service.create_backup(context, snapshot_info,
volumes_mapping=volumes_mapping)
if snapshot_info.get('storage_driver') not in ["cinder"]:
service.create_backup(context, snapshot_info,
volumes_mapping=volumes_mapping)
LOG.info('Creating replication user')
replication_user = self._create_replication_user(service, adm)

View File

@ -22,13 +22,17 @@ LOG = logging.getLogger(__name__)
class MysqlGTIDReplication(mysql_base.MysqlReplicationBase):
"""MySql Replication coordinated by GTIDs."""
def connect_to_master(self, service, master_info):
if 'dataset' in master_info:
# pull the last executed GTID from the master via
# the xtrabackup metadata file. If that value is
# provided we need to set the gtid_purged variable
# before executing the CHANGE MASTER TO command
last_gtid = self.read_last_master_gtid(service)
if master_info.get('dataset', {}).get('log_executed_gtid_set'):
last_gtid = master_info['dataset']['log_executed_gtid_set']
else:
last_gtid = self.read_last_master_gtid(service)
LOG.info("last_gtid value is %s", last_gtid)
if '-' in last_gtid:
# See

View File

@ -132,10 +132,11 @@ class PostgresqlReplicationStreaming(base.Replication):
{"bind": "/var/run/postgresql", "mode": "ro"},
}
extra_params = f"--pg-wal-archive-dir {pg_service.WAL_ARCHIVE_DIR}"
service.create_backup(context, snapshot_info,
volumes_mapping=volumes_mapping,
need_dbuser=False,
extra_params=extra_params)
if snapshot_info.get('storage_driver') not in ["cinder"]:
service.create_backup(context, snapshot_info,
volumes_mapping=volumes_mapping,
need_dbuser=False,
extra_params=extra_params)
LOG.info('Getting or creating replication user')
replication_user = self._get_or_create_replication_user(service)

View File

@ -1304,7 +1304,7 @@ class Instance(BuiltInstance):
raise exception.BackupTooLarge(
backup_size=backup_info.size, disk_size=target_size)
if not backup_info.check_swift_object_exist(
if not backup_info.check_location_exist(
context,
verify_checksum=CONF.verify_swift_checksum_on_restore):
raise exception.BackupFileNotFound(

View File

@ -339,13 +339,16 @@ class Manager(periodic_task.PeriodicTasks):
LOG.debug("Using scheduler hints %s for creating instance %s",
scheduler_hints, instance_id)
snapshot_driver = CONF.replica_snapshot_driver or CONF.storage_strategy
# Create backup for master
snapshot = None
try:
instance_tasks = FreshInstanceTasks.load(context, ids[0])
snapshot = instance_tasks.get_replication_master_snapshot(
context, slave_of_id, flavor,
parent_backup_id=replica_backup_id)
parent_backup_id=replica_backup_id,
snapshot_driver=snapshot_driver)
LOG.info('Snapshot info for creating replica of %s: %s',
slave_of_id, snapshot)
except Exception as err:

193
trove/taskmanager/models.py Executable file → Normal file
View File

@ -20,9 +20,8 @@ import traceback
from eventlet import greenthread
from eventlet.timeout import Timeout
from oslo_log import log as logging
from swiftclient.client import ClientException
from oslo_utils import timeutils as otimeutils
from trove.backup import models as bkup_models
from trove.backup.models import Backup
from trove.backup.models import DBBackup
from trove.backup.state import BackupState
@ -56,6 +55,7 @@ from trove.common import template
from trove.common import timeutils
from trove.common import utils
from trove.common.utils import try_recover
from trove.conductor import api as conductor_api
from trove.configuration import models as config_models
from trove.extensions.common import models as common_models
from trove.instance import models as inst_models
@ -627,26 +627,31 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin):
else:
files = self.get_injected_files(datastore_manager, ds_version)
backup_info = None
cinder_snapshot_id = None
if backup_id is not None:
backup = Backup.get_by_id(self.context, backup_id)
backup_info = {
'id': backup_id,
'instance_id': backup.instance_id,
'location': backup.location,
'type': backup.backup_type,
'checksum': backup.checksum,
'storage_driver': backup.storage_driver
}
if backup.storage_driver in ["cinder"]:
cinder_snapshot_id = backup.location.split("/")[-1]
cinder_volume_type = volume_type or CONF.cinder_volume_type
volume_info = self._create_server_volume(
flavor['id'], image_id,
datastore_manager, volume_size,
availability_zone, networks,
files, cinder_volume_type,
scheduler_hints
files, cinder_snapshot_id,
cinder_volume_type, scheduler_hints
)
config = self._render_config(flavor)
backup_info = None
if backup_id is not None:
backup = bkup_models.Backup.get_by_id(self.context, backup_id)
backup_info = {'id': backup_id,
'instance_id': backup.instance_id,
'location': backup.location,
'type': backup.backup_type,
'checksum': backup.checksum,
}
self._guest_prepare(flavor['ram'], volume_info,
packages, databases, users, backup_info,
config.config_contents, root_password,
@ -682,7 +687,8 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin):
self._log_and_raise(e, log_fmt, exc_fmt, self.id, err)
def get_replication_master_snapshot(self, context, slave_of_id, flavor,
parent_backup_id=None):
parent_backup_id=None,
snapshot_driver='swift'):
# First check to see if we need to take a backup
master = BuiltInstanceTasks.load(context, slave_of_id)
backup_required = master.backup_required_for_replication()
@ -707,6 +713,8 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin):
'datastore_version_id': self.datastore_version.id,
'deleted': False,
'replica_number': 1,
'backup_info': {},
'storage_driver': snapshot_driver
}
replica_backup_id = None
@ -744,6 +752,12 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin):
snapshot.update({
'config': self._render_replica_config(flavor).config_contents
})
# Create a cinder snapshot without call agent
if snapshot_driver == 'cinder':
master.create_backup(snapshot_info)
snapshot['dataset'].update(snapshot_info.get('backup_info'))
return snapshot
except Exception as e_create:
create_log_fmt = (
@ -880,11 +894,12 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin):
def _create_server_volume(self, flavor_id, image_id, datastore_manager,
volume_size, availability_zone, nics, files,
volume_type, scheduler_hints):
snapshot_id, volume_type, scheduler_hints):
LOG.debug("Begin _create_server_volume for instance: %s", self.id)
server = None
volume_info = self._build_volume_info(
datastore_manager,
snapshot_id=snapshot_id,
volume_size=volume_size,
volume_type=volume_type,
availability_zone=availability_zone)
@ -919,7 +934,8 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin):
LOG.debug("End _create_server_volume for instance: %s", self.id)
return volume_info
def _build_volume_info(self, datastore_manager, volume_size=None,
def _build_volume_info(self, datastore_manager,
snapshot_id=None, volume_size=None,
volume_type=None, availability_zone=None):
volume_info = None
volume_support = self.volume_support
@ -930,7 +946,7 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin):
try:
volume_info = self._create_volume(
volume_size, volume_type, datastore_manager,
availability_zone)
availability_zone, snapshot_id=snapshot_id)
except Exception as e:
log_fmt = "Failed to create volume for instance %s"
exc_fmt = _("Failed to create volume for instance %s")
@ -1004,7 +1020,7 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin):
return block_device_mapping_v2
def _create_volume(self, volume_size, volume_type, datastore_manager,
availability_zone):
availability_zone, snapshot_id=None):
LOG.debug("Begin _create_volume for id: %s", self.id)
volume_client = create_cinder_client(self.context, self.region_name)
volume_desc = ("datastore volume for %s" % self.id)
@ -1013,6 +1029,10 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin):
'name': "trove-%s" % self.id,
'description': volume_desc,
'volume_type': volume_type}
if snapshot_id:
volume_kwargs.update({'snapshot_id': snapshot_id})
if CONF.enable_volume_az:
volume_kwargs['availability_zone'] = availability_zone
volume_ref = volume_client.volumes.create(**volume_kwargs)
@ -1257,7 +1277,11 @@ class BuiltInstanceTasks(Instance, NotifyMixin, ConfigurationMixin):
def create_backup(self, backup_info):
LOG.info("Initiating backup for instance %s, backup_info: %s", self.id,
backup_info)
self.guest.create_backup(backup_info)
storage_driver = backup_info.get('storage_driver', 'swfit')
if storage_driver in ["cinder"]:
SnapshotTasks(self, backup_info)._create_snapshot()
else:
self.guest.create_backup(backup_info)
def backup_required_for_replication(self):
LOG.debug("Check if replication backup is required for instance %s.",
@ -1569,6 +1593,14 @@ class BackupTasks(object):
{'cont': container, 'filename': filename})
client.delete_object(container, filename)
@classmethod
def delete_snapshot_from_cinder(cls, context, snapshot_id):
client = clients.create_cinder_client(context)
try:
client.volume_snapshots.delete(snapshot_id)
except Exception as e:
LOG.error(e)
@classmethod
def delete_backup(cls, context, backup_id):
"""Delete backup from swift."""
@ -1581,33 +1613,116 @@ class BackupTasks(object):
backup.save()
LOG.info("Deleting backup %s.", backup_id)
backup = bkup_models.Backup.get_by_id(context, backup_id)
backup = Backup.get_by_id(context, backup_id)
try:
filename = backup.filename
# Do not remove the object if the backup was restored from remote
# location.
if filename and backup.state != bkup_models.BackupState.RESTORED:
BackupTasks.delete_files_from_swift(context,
backup.container_name,
filename)
except ValueError:
_delete(backup)
except ClientException as e:
if e.http_status == 404:
# Backup already deleted in swift
_delete(backup)
else:
LOG.exception("Error occurred when deleting from swift. "
"Details: %s", e)
backup.state = bkup_models.BackupState.DELETE_FAILED
backup.save()
raise TroveError(_("Failed to delete swift object for backup "
"%s.") % backup_id)
else:
if filename and backup.state != BackupState.RESTORED:
if backup.storage_driver in ["cinder"]:
backup_id = backup.location.split("/")[-1]
BackupTasks.delete_snapshot_from_cinder(context, backup_id)
else:
BackupTasks.delete_files_from_swift(context,
backup.container_name,
filename)
except Exception as e:
LOG.exception("Error occurred when deleting. "
"Details: %s", e)
backup.state = BackupState.DELETE_FAILED
backup.save()
raise TroveError(_("Failed to delete object for backup "
"%s.") % backup_id)
finally:
_delete(backup)
LOG.info("Deleted backup %s successfully.", backup_id)
class SnapshotTasks(object):
"""Performs volume snapshot action"""
def __init__(self, instance, backup_info):
self.backup_id = backup_info.get('id')
self.instance = instance
self.cinder = self.instance.volume_client
self.snapshot_id = None
self.snapshot_info = backup_info
self.backup_type = backup_info.get(
'backup_type', constants.BACKUP_TYPE_FULL)
self.ds_version = instance.ds_version
self.description = backup_info.get('description')
self.metadata = {
'backup_id': self.backup_id,
'backup_type': self.backup_type,
'datastore': self.ds_version.manager,
'datastore_version': self.ds_version.version,
'instance_id': self.instance.id,
'tenant_id': self.instance.tenant_id,
}
self.name = "trove-%s-backup" % self.backup_id
def _wait_snapshot_available(self):
snap = self.cinder.volume_snapshots.get(self.snapshot_id)
return snap.status == "available"
def _create_snapshot(self):
backup_state = {
'backup_id': self.backup_id,
'size': 0.0,
'state': BackupState.BUILDING,
'backup_type': self.backup_type
}
conductor = conductor_api.API(self.instance.context)
conductor.update_backup(self.instance.id,
sent=otimeutils.utcnow_ts(microsecond=True),
**backup_state)
try:
bak = self.instance.guest.pre_create_backup()
# !IMPORTANT update backup_info to pass guestagent
self.snapshot_info.update({'backup_info': bak})
snap = self.cinder.volume_snapshots.create(
self.instance.volume_id,
force=True, name=self.name,
description=self.description, metadata=self.metadata)
self.snapshot_id = snap.id
utils.poll_until(self._wait_snapshot_available,
sleep_time=2,
time_out=CONF.volume_time_out)
backup_state.update({
'checksum': None,
'location': "cinder://%s" % self.snapshot_id,
'size': snap.size,
'state': BackupState.COMPLETED,
'success': True,
})
self.instance.guest.post_create_backup()
LOG.info("Completed backup %s.", self.backup_id)
except GuestError as err:
LOG.error("Failed to create backup %s: %s" % (self.backup_id, err))
backup_state.update({
'success': False,
'state': BackupState.FAILED,
})
except Exception as e:
LOG.error("Failed to create backup %s: %s" % (self.backup_id, e))
backup_state.update({
'success': False,
'state': BackupState.FAILED,
})
self.instance.guest.post_create_backup()
finally:
conductor.update_backup(
self.instance.id,
sent=otimeutils.utcnow_ts(microsecond=True),
**backup_state)
class ModuleTasks(object):
@classmethod

View File

@ -66,7 +66,7 @@ class BackupCreateTest(trove_testtools.TestCase):
tenant_id=self.context.project_id).delete()
@patch.object(api.API, 'get_client', MagicMock(return_value=MagicMock()))
def test_create(self):
def test_create_backup(self):
instance = MagicMock()
with patch.object(instance_models.BuiltInstance, 'load',
return_value=instance):
@ -394,6 +394,69 @@ class BackupORMTest(trove_testtools.TestCase):
def test_filename(self):
self.assertEqual(BACKUP_FILENAME, self.backup.filename)
def test_is_failed(self):
self.backup.state = state.BackupState.FAILED
self.backup.location = None
self.backup.save()
self.assertIsNone(self.backup.container_name)
def test_create_snapshot(self):
backup = models.DBBackup.create(tenant_id=self.context.project_id,
name=BACKUP_NAME,
state=BACKUP_STATE,
instance_id=self.instance_id,
deleted=False,
size=2.0,
storage_driver="cinder",
location="cinder://snapshot_id")
self.assertEqual(backup.is_snapshot, True)
def test_create_without_snapshot(self):
backup = models.DBBackup.create(tenant_id=self.context.project_id,
name=BACKUP_NAME,
state=BACKUP_STATE,
instance_id=self.instance_id,
deleted=False,
size=2.0,
storage_driver="swift",
location=BACKUP_LOCATION)
self.assertEqual(backup.is_snapshot, False)
self.assertEqual(backup.location, BACKUP_LOCATION)
def test_create_without_storage_driver_swift(self):
self.patch_conf_property('storage_strategy', 'swift')
backup = models.DBBackup.create(tenant_id=self.context.project_id,
name=BACKUP_NAME,
state=BACKUP_STATE,
instance_id=self.instance_id,
deleted=False,
size=2.0,
location=BACKUP_LOCATION)
self.assertFalse(backup.is_snapshot)
def test_create_without_storage_driver_cinder(self):
self.patch_conf_property('storage_strategy', 'cinder')
backup = models.DBBackup.create(tenant_id=self.context.project_id,
name=BACKUP_NAME,
state=BACKUP_STATE,
instance_id=self.instance_id,
deleted=False,
size=2.0,
location="cinder://snapshot_id")
self.assertTrue(backup.is_snapshot)
def test_check_location_exist_cinder(self):
self.patch_conf_property('storage_strategy', 'cinder')
backup = models.DBBackup.create(tenant_id=self.context.project_id,
name=BACKUP_NAME,
state=BACKUP_STATE,
instance_id=self.instance_id,
deleted=False,
size=2.0,
location="cinder://snapshot_id")
self.assertTrue(backup.is_snapshot)
self.assertFalse(backup.check_location_exist(self.context))
def test_filename_bad(self):
def _set_bad_filename():

View File

@ -0,0 +1,42 @@
# Copyright 2024 Bizfly Cloud, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
from trove.common import cfg
from trove.common.clients import normalize_url
from trove.common import context
from trove.common import timeutils
from trove.common import trove_remote
from trove.tests.unittests import trove_testtools
CONF = cfg.CONF
class TestTroveClient(trove_testtools.TestCase):
def setUp(self):
super(TestTroveClient, self).setUp()
self.context = context.TroveContext(
project_id='TENANT-' + str(timeutils.utcnow()),
user='USER-' + str(timeutils.utcnow()),
auth_token='AUTH_TOKEN-' + str(timeutils.utcnow()))
def test_trove_with_remote_client(self):
self.patch_conf_property('trove_url', 'trove_url')
client = trove_remote.trove_client(self.context)
url = '%(url)s%(tenant)s' % {
'url': normalize_url(CONF.trove_url),
'tenant': self.context.project_id}
self.assertEqual(url, client.client.management_url)

View File

@ -17,6 +17,7 @@
from unittest.mock import MagicMock, Mock, patch, PropertyMock
from trove.backup.models import Backup
from trove.common import cfg
from trove.common.exception import TroveError, ReplicationSlaveAttachError
from trove.common import server_group as srv_grp
from trove.instance.tasks import InstanceTasks
@ -25,6 +26,8 @@ from trove.taskmanager import models
from trove.taskmanager import service
from trove.tests.unittests import trove_testtools
CONF = cfg.CONF
class TestManager(trove_testtools.TestCase):
@ -208,7 +211,33 @@ class TestManager(trove_testtools.TestCase):
None, None)
mock_tasks.get_replication_master_snapshot.assert_called_with(
self.context, 'some-master-id', mock_flavor,
parent_backup_id='temp-backup-id')
parent_backup_id='temp-backup-id', snapshot_driver='swift')
mock_backup_delete.assert_called_with(self.context, 'test-id')
@patch.object(Backup, 'delete')
@patch.object(models.BuiltInstanceTasks, 'load')
def test_create_replication_cinder(self, mock_load, mock_backup_delete):
CONF.set_override('replica_snapshot_driver', 'cinder')
mock_tasks = Mock()
mock_snapshot = {'dataset': {'snapshot_id': 'test-id'}}
mock_tasks.get_replication_master_snapshot = Mock(
return_value=mock_snapshot)
mock_flavor = Mock()
mock_volume = mock_tasks.volume_client.volumes.get.return_value
mock_volume.availability_zone = 'nova'
mock_tasks.get_instance()
with patch.object(models.FreshInstanceTasks, 'load',
return_value=mock_tasks):
self.manager._create_replication_slave(
self.context, ['id_slave'], 'mysql-server',
mock_flavor, Mock(), None, None,
'mysql', None, 2, 'nova', ['password'], None,
None, 'some-master-id', None, None, None, None, None)
mock_tasks.get_replication_master_snapshot.assert_called_with(
self.context, 'some-master-id', mock_flavor,
parent_backup_id=None, snapshot_driver='cinder')
mock_backup_delete.assert_called_with(self.context, 'test-id')
@patch.object(models.FreshInstanceTasks, 'load')

View File

@ -452,7 +452,7 @@ class FreshInstanceTasksTest(BaseFreshInstanceTasksTest):
mock_build_volume_info.assert_called_with(
'mysql', availability_zone=None, volume_size=2,
volume_type='volume_type'
volume_type='volume_type', snapshot_id=None
)
mock_guest_prepare.assert_called_with(
768, mock_build_volume_info(), 'mysql-server', None, None, None,
@ -536,7 +536,7 @@ class FreshInstanceTasksTest(BaseFreshInstanceTasksTest):
image_id = None
mock_build_volume_info.assert_called_with(
'mysql', availability_zone=None, volume_size=2,
volume_type='volume_type'
volume_type='volume_type', snapshot_id=None
)
mock_guest_prepare.assert_called_with(
768, mock_build_volume_info(), 'mysql-server', None, None, None,
@ -1093,6 +1093,7 @@ class BackupTasksTest(trove_testtools.TestCase):
self.backup.updated = 'today'
self.backup.size = 2.0
self.backup.state = state.BackupState.NEW
self.backup.storage_driver = 'backup'
self.bm_backup_patches = patch.multiple(
backup_models.Backup,
delete=MagicMock(return_value=None),