Mark Goddard 86f373a198 Fixes for MariaDB bootstrap and recovery
* Fix wsrep sequence number detection. Log message format is
  'WSREP: Recovered position: <UUID>:<seqno>' but we were picking out
  the UUID rather than the sequence number. This is as good as random.

* Add become: true to log file reading and removal since
  I4a5ebcedaccb9261dbc958ec67e8077d7980e496 added become: true to the
  'docker cp' command which creates it.

* Don't run handlers during recovery. If the config files change we
  would end up restarting the cluster twice.

* Wait for wsrep recovery container completion (don't detach). This
  avoids a potential race between wsrep recovery and the subsequent
  'stop_container'.

* Finally, we now wait for the bootstrap host to report that it is in
  an OPERATIONAL state. Without this we can see errors where the
  MariaDB cluster is not ready when used by other services.

Change-Id: Iaf7862be1affab390f811fc485fd0eb6879fd583
Closes-Bug: #1834467
2019-07-05 09:20:34 +00:00

194 lines
5.5 KiB
YAML

---
- name: Starting first MariaDB container
vars:
service_name: "mariadb"
service: "{{ mariadb_services[service_name] }}"
become: true
kolla_docker:
action: "start_container"
common_options: "{{ docker_common_options }}"
environment:
KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
BOOTSTRAP_ARGS: "--wsrep-new-cluster"
image: "{{ service.image }}"
labels:
BOOTSTRAP:
name: "{{ service.container_name }}"
restart_policy: "never"
volumes: "{{ service.volumes }}"
dimensions: "{{ service.dimensions }}"
when:
- bootstrap_host is defined
- bootstrap_host == inventory_hostname
listen: Bootstrap MariaDB cluster
notify:
- restart mariadb
# TODO(jeffrey4l), remove the task check when the wait_for bug is fixed
# https://github.com/ansible/ansible-modules-core/issues/2788
- name: wait first mariadb container
wait_for:
host: "{{ api_interface_address }}"
port: "{{ mariadb_port }}"
connect_timeout: 1
timeout: 60
search_regex: "MariaDB"
register: check_mariadb_port
until: check_mariadb_port is success
retries: 10
delay: 6
when:
- bootstrap_host is defined
- bootstrap_host == inventory_hostname
listen: Bootstrap MariaDB cluster
- name: Wait for MariaDB to become operational
become: true
command: >-
docker exec {{ mariadb_service.container_name }}
mysql -uroot -p{{ database_password }}
--silent --skip-column-names
-e 'SHOW STATUS LIKE "wsrep_evs_state"'
changed_when: false
register: result
until: '"OPERATIONAL" in result.stdout'
retries: 10
delay: 6
no_log: true
when:
- bootstrap_host is defined
- bootstrap_host == inventory_hostname
listen: Bootstrap MariaDB cluster
- name: restart slave mariadb
vars:
service_name: "mariadb"
service: "{{ mariadb_services[service_name] }}"
become: true
kolla_docker:
action: "recreate_or_restart_container"
common_options: "{{ docker_common_options }}"
name: "{{ service.container_name }}"
image: "{{ service.image }}"
volumes: "{{ service.volumes }}"
dimensions: "{{ service.dimensions }}"
when:
- kolla_action != "config"
- inventory_hostname != master_host
- not mariadb_recover | default(false)
listen: restart mariadb
# TODO(jeffrey4l), remove the task check when the wait_for bug is fixed
# https://github.com/ansible/ansible-modules-core/issues/2788
- name: wait for slave mariadb
wait_for:
host: "{{ api_interface_address }}"
port: "{{ mariadb_port }}"
connect_timeout: 1
timeout: 60
search_regex: "MariaDB"
register: check_mariadb_port
until: check_mariadb_port is success
retries: 10
delay: 6
when:
- kolla_action != "config"
- inventory_hostname != master_host
- not mariadb_recover | default(false)
listen: restart mariadb
- name: run upgrade on slave
vars:
service_name: "mariadb"
service: "{{ mariadb_services[service_name] }}"
become: true
kolla_docker:
action: "start_container"
common_options: "{{ docker_common_options }}"
detach: False
dimensions: "{{ service.dimensions }}"
environment:
KOLLA_UPGRADE:
KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
DB_HOST: "{{ api_interface_address }}"
DB_PORT: "{{ mariadb_port }}"
DB_ROOT_PASSWORD: "{{ database_password }}"
image: "{{ service.image }}"
labels:
UPGRADE:
name: "upgrade_mariadb"
restart_policy: "never"
volumes: "{{ service.volumes }}"
no_log: true
when:
- kolla_action == "upgrade"
- inventory_hostname != master_host
- not mariadb_recover | default(false)
listen: restart mariadb
- name: restart master mariadb
vars:
service_name: "mariadb"
service: "{{ mariadb_services[service_name] }}"
become: true
kolla_docker:
action: "recreate_or_restart_container"
common_options: "{{ docker_common_options }}"
name: "{{ service.container_name }}"
image: "{{ service.image }}"
volumes: "{{ service.volumes }}"
dimensions: "{{ service.dimensions }}"
when:
- kolla_action != "config"
- inventory_hostname == master_host
- not mariadb_recover | default(false)
listen: restart mariadb
# TODO(jeffrey4l), remove the task check when the wait_for bug is fixed
# https://github.com/ansible/ansible-modules-core/issues/2788
- name: Waiting for master mariadb
wait_for:
host: "{{ api_interface_address }}"
port: "{{ mariadb_port }}"
connect_timeout: 1
timeout: 60
search_regex: "MariaDB"
register: check_mariadb_port
until: check_mariadb_port is success
retries: 10
delay: 6
when:
- kolla_action != "config"
- inventory_hostname == master_host
- not mariadb_recover | default(false)
listen: restart mariadb
- name: run upgrade on master
vars:
service_name: "mariadb"
service: "{{ mariadb_services[service_name] }}"
become: true
kolla_docker:
action: "start_container"
common_options: "{{ docker_common_options }}"
detach: False
dimensions: "{{ service.dimensions }}"
environment:
KOLLA_UPGRADE:
KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
DB_HOST: "{{ api_interface_address }}"
DB_PORT: "{{ mariadb_port }}"
DB_ROOT_PASSWORD: "{{ database_password }}"
image: "{{ service.image }}"
labels:
UPGRADE:
name: "upgrade_mariadb"
restart_policy: "never"
volumes: "{{ service.volumes }}"
no_log: true
when:
- kolla_action == "upgrade"
- inventory_hostname == master_host
- not mariadb_recover | default(false)
listen: restart mariadb