[mariadb] Use service IP to discover endpoints

It was observed that under certain circumstances
galera instances can use old IP address of the node
after pod restart. This patch changes the value of
wsrep_cluster_address variable - instead of listing
all dns names of the cluster nodes the discovery service
IP address is used. In this case cluster_node_address is set to IP
address instead of DNS name - otherwise SST method will fail.

Co-Authored-By: Oleksii Grudev <ogrudev@mirantis.com>

Change-Id: I8059f28943150785abd48316514c0ffde56dfde5
This commit is contained in:
Vasyl Saienko 2024-11-18 08:17:12 +00:00
parent 65ec71b939
commit c2269d70a2
4 changed files with 15 additions and 20 deletions

View File

@ -15,7 +15,7 @@ apiVersion: v1
appVersion: v10.6.7 appVersion: v10.6.7
description: OpenStack-Helm MariaDB description: OpenStack-Helm MariaDB
name: mariadb name: mariadb
version: 0.2.62 version: 0.2.63
home: https://mariadb.com/kb/en/ home: https://mariadb.com/kb/en/
icon: http://badges.mariadb.org/mariadb-badge-180x60.png icon: http://badges.mariadb.org/mariadb-badge-180x60.png
sources: sources:

View File

@ -49,6 +49,10 @@ logger.addHandler(ch)
local_hostname = socket.gethostname() local_hostname = socket.gethostname()
logger.info("This instance hostname: {0}".format(local_hostname)) logger.info("This instance hostname: {0}".format(local_hostname))
# Get local node IP address
local_ip = socket.gethostbyname(local_hostname)
logger.info("This instance IP address: {0}".format(local_ip))
# Get the instance number # Get the instance number
instance_number = local_hostname.split("-")[-1] instance_number = local_hostname.split("-")[-1]
logger.info("This instance number: {0}".format(instance_number)) logger.info("This instance number: {0}".format(instance_number))
@ -270,18 +274,14 @@ def mysqld_write_cluster_conf(mode='run'):
for node in range(int(mariadb_replicas)): for node in range(int(mariadb_replicas)):
node_hostname = "{0}-{1}".format(pod_name_prefix, node) node_hostname = "{0}-{1}".format(pod_name_prefix, node)
if local_hostname == node_hostname: if local_hostname == node_hostname:
wsrep_node_address = "{0}.{1}:{2}".format( cluster_config_params['wsrep_node_address'] = local_ip
node_hostname, discovery_domain, wsrep_port)
cluster_config_params['wsrep_node_address'] = wsrep_node_address
wsrep_node_name = "{0}.{1}".format(node_hostname, discovery_domain) wsrep_node_name = "{0}.{1}".format(node_hostname, discovery_domain)
cluster_config_params['wsrep_node_name'] = wsrep_node_name cluster_config_params['wsrep_node_name'] = wsrep_node_name
else:
addr = "{0}.{1}:{2}".format(node_hostname, discovery_domain, if mode == 'run':
wsrep_port) cluster_config_params['wsrep_cluster_address'] = "gcomm://{0}:{1}".format(
wsrep_cluster_members.append(addr) discovery_domain, wsrep_port)
if wsrep_cluster_members and mode == 'run':
cluster_config_params['wsrep_cluster_address'] = "gcomm://{0}".format(
",".join(wsrep_cluster_members))
else: else:
cluster_config_params['wsrep_cluster_address'] = "gcomm://" cluster_config_params['wsrep_cluster_address'] = "gcomm://"
cluster_config_file = '/etc/mysql/conf.d/10-cluster-config.cnf' cluster_config_file = '/etc/mysql/conf.d/10-cluster-config.cnf'
@ -913,14 +913,6 @@ def run_mysqld(cluster='existing'):
"This is a fresh node joining the cluster for the 1st time, not attempting to set admin passwords or upgrading" "This is a fresh node joining the cluster for the 1st time, not attempting to set admin passwords or upgrading"
) )
# Node ready to start MariaDB, update cluster state to live and remove
# reboot node info, if set previously.
if cluster == 'new':
set_configmap_annotation(
key='openstackhelm.openstack.org/cluster.state', value='live')
set_configmap_annotation(
key='openstackhelm.openstack.org/reboot.node', value='')
logger.info("Launching MariaDB") logger.info("Launching MariaDB")
run_cmd_with_logging(mysqld_cmd, logger) run_cmd_with_logging(mysqld_cmd, logger)
@ -1003,6 +995,8 @@ elif get_cluster_state() == 'live':
"it") "it")
while not check_for_active_nodes(): while not check_for_active_nodes():
time.sleep(default_sleep) time.sleep(default_sleep)
set_configmap_annotation(
key='openstackhelm.openstack.org/cluster.state', value='live')
run_mysqld() run_mysqld()
elif get_cluster_state() == 'reboot': elif get_cluster_state() == 'reboot':
reboot_node = get_configmap_value( reboot_node = get_configmap_value(

View File

@ -30,7 +30,7 @@ spec:
- name: sst - name: sst
port: {{ tuple "oslo_db" "direct" "sst" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }} port: {{ tuple "oslo_db" "direct" "sst" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
clusterIP: None clusterIP: None
publishNotReadyAddresses: true publishNotReadyAddresses: false
selector: selector:
{{ tuple $envAll "mariadb" "server" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }} {{ tuple $envAll "mariadb" "server" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }}
{{ .Values.network.mariadb_discovery | include "helm-toolkit.snippets.service_params" | indent 2 }} {{ .Values.network.mariadb_discovery | include "helm-toolkit.snippets.service_params" | indent 2 }}

View File

@ -78,4 +78,5 @@ mariadb:
- 0.2.60 Refactor liveness/readiness probes - 0.2.60 Refactor liveness/readiness probes
- 0.2.61 Avoid using deprecated isAlive() - 0.2.61 Avoid using deprecated isAlive()
- 0.2.62 Implement mariadb upgrade during start - 0.2.62 Implement mariadb upgrade during start
- 0.2.63 Use service ip for endpoint discovery
... ...