
Backup and restore tftpboot as ironic does not recreate files necessary for overcloud nodes to boot. Change-Id: Ibdc8b41be480f9344e0ba014bb0017591c603257
473 lines
20 KiB
YAML
473 lines
20 KiB
YAML
# Copyright (c) 2014 Hewlett-Packard Development Company, L.P.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
---
|
|
- include: step_ping.yml
|
|
- hosts: undercloud
|
|
name: Disable Undercloud
|
|
sudo: yes
|
|
gather_facts: no
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- service_facts:
|
|
when: instance_status == "ACTIVE"
|
|
- command: mv -f /etc/init/mysql.conf /etc/init/mysql-boot-control.conf removes=/etc/init/mysql.conf
|
|
when: instance_status == "ACTIVE"
|
|
- service: name={{ item }} enabled=no state=stopped
|
|
with_items: helion_undercloud_services
|
|
when: helion is defined and instance_status == "ACTIVE" and item in existing_services
|
|
- service: name={{ item }} enabled=no state=stopped
|
|
with_items: undercloud_services
|
|
when: helion is not defined and instance_status == "ACTIVE" and item in existing_services
|
|
- hosts: nova-compute
|
|
name: Disable Overcloud Compute
|
|
sudo: yes
|
|
gather_facts: no
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- include: stop_vms.yml
|
|
when: instance_status == "ACTIVE"
|
|
- service_facts:
|
|
when: instance_status == "ACTIVE"
|
|
- service: name={{ item }} enabled=no state=stopped
|
|
with_items: helion_overcloud_compute_services
|
|
when: helion is defined and instance_status == "ACTIVE" and item in existing_services
|
|
- service: name={{ item }} enabled=no state=stopped
|
|
with_items: overcloud_compute_services
|
|
when: helion is not defined and instance_status == "ACTIVE" and item in existing_services
|
|
- hosts: swift-storage
|
|
name: swift-storage
|
|
sudo: yes
|
|
gather_facts: no
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- service_facts:
|
|
when: instance_status == "ACTIVE"
|
|
- service: name={{ item }} enabled=no state=stopped
|
|
with_items: helion_overcloud_swift_services
|
|
when: helion is defined and instance_status == "ACTIVE" and item in existing_services
|
|
- service: name={{ item }} enabled=no state=stopped
|
|
with_items: overcloud_swift_services
|
|
when: helion is not defined and instance_status == "ACTIVE" and item in existing_services
|
|
- hosts: vsa
|
|
name: "Stop services on VSA"
|
|
sudo: yes
|
|
gather_facts: no
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- service_facts:
|
|
when: instance_status == "ACTIVE"
|
|
- service: name={{ item }} enabled=no state=stopped
|
|
with_items: helion_overcloud_vsa_services
|
|
when: helion is defined and instance_status == "ACTIVE" and item in existing_services
|
|
- hosts: controller
|
|
name: Disable Overcloud Controller
|
|
sudo: yes
|
|
gather_facts: no
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- service_facts:
|
|
when: instance_status == "ACTIVE"
|
|
- service: name={{ item }} enabled=no state=stopped
|
|
with_items: helion_overcloud_controller_services
|
|
when: helion is defined and instance_status == "ACTIVE" and item in existing_services
|
|
- service: name={{ item }} enabled=no state=stopped
|
|
with_items: overcloud_controller_services
|
|
when: helion is not defined and instance_status == "ACTIVE" and item in existing_services
|
|
- include: stop_tgt.yml
|
|
when: instance_status == "ACTIVE"
|
|
- hosts: controllerMgmt
|
|
name: Disable Overcloud Controller Mgmt node
|
|
sudo: yes
|
|
gather_facts: no
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- service_facts:
|
|
when: instance_status == "ACTIVE"
|
|
- service: name={{ item }} enabled=no state=stopped
|
|
with_items: helion_overcloudmgmt_controller_services
|
|
when: helion is defined and instance_status == "ACTIVE" and item in existing_services
|
|
- include: stop_tgt.yml
|
|
when: instance_status == "ACTIVE"
|
|
# Critically, we need to select a single node of the galera cluster to
|
|
# be the 'last'. So controllerMgmt fits that bill for now. We will have
|
|
# to select one to be the "special" node eventually, we can do that with
|
|
# host facts and conditionals. The last to go down must have the
|
|
# Galera bootstrap run on it, or none of them will come up.
|
|
- hosts: controller
|
|
name: Stop MySQL/RabbitMQ on controller nodes
|
|
sudo: yes
|
|
gather_facts: no
|
|
max_fail_percentage: 0
|
|
serial: 1
|
|
tasks:
|
|
- include: galera_status.yml
|
|
when: instance_status == "ACTIVE"
|
|
- name: Stop MySQL under normal circumstances
|
|
service: name=mysql enabled=no state=stopped
|
|
when: instance_status == "ACTIVE" and galera_status == "Synced" and wsrep_cluster_size.stdout != "1"
|
|
- name: Stop MySQL if last node in cluster and single_controller flag has been set.
|
|
service: name=mysql enabled=no state=stopped
|
|
when: instance_status == "ACTIVE" and single_controller is defined and galera_status == "Synced" and wsrep_cluster_size.stdout == "1"
|
|
- fail: msg="Galera Replication is out of sync - cannot safely proceed"
|
|
when: single_controller is not defined and instance_status == "ACTIVE" and galera_status == "Out of Sync"
|
|
- fail: msg="Galera Replication - Node appears to be the last node in a cluster - cannot safely proceed unless overriden via single_controller setting - See README.rst"
|
|
when: instance_status == "ACTIVE" and single_controller is not defined and wsrep_cluster_size.stdout == "1"
|
|
- service: name=rabbitmq-server state=stopped
|
|
when: instance_status == "ACTIVE"
|
|
ignore_errors: yes
|
|
- command: rabbitmqctl -n "rabbit@$(hostname)" stop
|
|
when: instance_status == "ACTIVE"
|
|
ignore_errors: yes
|
|
- name: "Waiting for MySQL to stop"
|
|
wait_for: port=3307 state=stopped timeout=60 delay=10
|
|
when: instance_status == "ACTIVE" and helion is defined and single_controller is not defined and galera_status == 'Synced'
|
|
- name: "Waiting for rabbitmq-server to stop"
|
|
wait_for: port=5672 state=stopped timeout=60 delay=10
|
|
when: instance_status == "ACTIVE"
|
|
- hosts: controllerMgmt
|
|
name: Stop MySQL/RabbitMQ on Overcloud Controller Mgmt node
|
|
sudo: yes
|
|
gather_facts: no
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- include: galera_status.yml
|
|
when: instance_status == "ACTIVE"
|
|
- fail: msg="Galera Replication on controller Management is out of sync - cannot safely proceed"
|
|
when: instance_status == "ACTIVE" and single_controller is not defined and galera_status != "Synced"
|
|
- fail: msg="Galera Replication on controller Management - cannot safely proceed as another MySQL cluster node is active."
|
|
when: instance_status == "ACTIVE" and single_controller is not defined and wsrep_cluster_size.stdout != "1"
|
|
- service: name=mysql enabled=no state=stopped
|
|
when: instance_status == "ACTIVE"
|
|
- service: name=rabbitmq-server enabled=no state=stopped
|
|
when: instance_status == "ACTIVE"
|
|
ignore_errors: yes
|
|
- command: rabbitmqctl -n "rabbit@$(hostname)" stop
|
|
when: instance_status == "ACTIVE"
|
|
ignore_errors: yes
|
|
- name: "Waiting for rabbitmq-server to stop"
|
|
wait_for: port=5672 state=stopped timeout=60 delay=10
|
|
when: instance_status == "ACTIVE"
|
|
- hosts: all
|
|
gather_facts: no
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- include: disable_os_collect_config.yml
|
|
when: instance_status == "ACTIVE"
|
|
- hosts: undercloud
|
|
name: Rebuild and Refresh Undercloud
|
|
gather_facts: no
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- include: step_preserve_password_file.yml
|
|
when: instance_status == "ACTIVE"
|
|
- include: step_undercloud_backup_tftpboot.yml
|
|
when: instance_status == "ACTIVE"
|
|
- include: preserve_ssh_host_keys.yml
|
|
when: instance_status == "ACTIVE"
|
|
- include: step_unmount.yml
|
|
when: instance_status == "ACTIVE"
|
|
- { include: rebuild.yml, instance_id: "{{ instance_id }}", rebuild_image_id: "{{ undercloud_rebuild_image_id }}", when: instance_status != "REBUILD" }
|
|
- local_action: wait_for port=22 timeout="{{ ssh_timeout }}" host="{{ inventory_hostname }}" search_regex=TRIPLEO_HK_RESTORED delay=10
|
|
when: wait_for_hostkey is defined
|
|
ignore_errors: yes
|
|
- local_action: wait_for port=22 timeout="{{ ssh_timeout }}" host="{{ inventory_hostname }}" search_regex=OpenSSH delay=10
|
|
when: wait_for_hostkey is not defined
|
|
ignore_errors: yes
|
|
- include: refresh_config.yml
|
|
- hosts: undercloud
|
|
name: Enable Undercloud
|
|
sudo: yes
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- include: disable_os_collect_config.yml
|
|
- service_facts:
|
|
when: instance_status == "ACTIVE"
|
|
- include: stop_mysql.yml
|
|
- include: step_reset_mnt_state_permissions.yml
|
|
# Directly call os-apply-config to write out configuration files.
|
|
- include: step_os-apply-config.yml
|
|
- include: step_undercloud_restore_tftpboot.yml
|
|
- include: start_mysql.yml
|
|
- include: start_rabbitmq.yml
|
|
# Fix Ironic Reservations due to bug:
|
|
# https://bugs.launchpad.net/ironic/+bug/1382698
|
|
- include: step_undercloud_ironic_release_reservations.yml
|
|
- name: "Run os-collect-config"
|
|
command: os-collect-config --force --one
|
|
- service: name=os-collect-config state=started
|
|
- service: name={{ item }} enabled=yes state=started
|
|
with_items: helion_undercloud_services
|
|
when: helion is defined and item in existing_services
|
|
- service: name={{ item }} enabled=yes state=started
|
|
with_items: undercloud_services
|
|
when: helion is not defined and item in existing_services
|
|
- hosts: controllerMgmt
|
|
name: Rebuild and Refresh ControllerMgmt
|
|
gather_facts: no
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- include: step_preserve_password_file.yml
|
|
when: instance_status == "ACTIVE"
|
|
- include: preserve_ssh_host_keys.yml
|
|
when: instance_status == "ACTIVE"
|
|
- include: cleanup_cinder_volumes.yml
|
|
when: instance_status == "ACTIVE"
|
|
- include: step_unmount.yml
|
|
when: instance_status == "ACTIVE"
|
|
- { include: rebuild.yml, instance_id: "{{ instance_id }}", rebuild_image_id: "{{ controllermgmt_rebuild_image_id }}", when: instance_status != "REBUILD" }
|
|
- local_action: wait_for port=22 timeout="{{ ssh_timeout }}" host="{{ inventory_hostname }}" search_regex=TRIPLEO_HK_RESTORED delay=10
|
|
when: wait_for_hostkey is defined
|
|
ignore_errors: yes
|
|
- local_action: wait_for port=22 timeout="{{ ssh_timeout }}" host="{{ inventory_hostname }}" search_regex=OpenSSH delay=10
|
|
when: wait_for_hostkey is not defined
|
|
ignore_errors: yes
|
|
- pause: seconds=30 prompt="Allowing controllerMgmt node to settle"
|
|
- hosts: controllerMgmt
|
|
name: Start initial cluster node
|
|
max_fail_percentage: 0
|
|
sudo: yes
|
|
tasks:
|
|
- include: mysql_init_fix.yml
|
|
- include: stop_mysql.yml
|
|
- include: rabbitmq_occ_disable.yml
|
|
- include: refresh_config.yml
|
|
- name: Stop os-collect-config to avoid collission
|
|
service: name=os-collect-config state=stopped
|
|
- name: "Work around apache2 starting up at boot w/o config..."
|
|
service: name=apache2 enabled=no state=stopped
|
|
- name: "Remove os-collect-config disable sentinel file"
|
|
file: path=/mnt/state/disable-os-collect-config state=absent
|
|
- name: "Run os-collect-config"
|
|
command: os-collect-config --force --one
|
|
- include: step_reset_mnt_state_permissions.yml
|
|
# Directly call os-apply-config to write out configuration files.
|
|
- include: step_os-apply-config.yml
|
|
- name: Bootstrap the MySQL cluster
|
|
command: /etc/init.d/mysql bootstrap-pxc
|
|
when: single_controller is not defined
|
|
- include: start_mysql.yml
|
|
- include: step_create_databases.yml
|
|
- include: start_rabbitmq.yml
|
|
- name: "Run os-collect-config"
|
|
command: os-collect-config --force --one
|
|
- name: Wait for Rabbit to listen on its usual port
|
|
wait_for: port=5672 state=started timeout=90 delay=10
|
|
- name: Restart os-collect-config
|
|
service: name=os-collect-config state=started
|
|
- hosts: controller
|
|
name: Rebuild and Refresh Controller
|
|
gather_facts: no
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- include: step_preserve_password_file.yml
|
|
when: instance_status == "ACTIVE"
|
|
- include: preserve_ssh_host_keys.yml
|
|
when: instance_status == "ACTIVE"
|
|
- include: cleanup_cinder_volumes.yml
|
|
when: instance_status == "ACTIVE"
|
|
- include: step_unmount.yml
|
|
when: instance_status == "ACTIVE"
|
|
- { include: rebuild.yml, instance_id: "{{ instance_id }}", rebuild_image_id: "{{ controller_rebuild_image_id }}", when: instance_status != "REBUILD" }
|
|
- local_action: wait_for port=22 timeout="{{ ssh_timeout }}" host="{{ inventory_hostname }}" search_regex=TRIPLEO_HK_RESTORED delay=10
|
|
when: wait_for_hostkey is defined
|
|
ignore_errors: yes
|
|
- local_action: wait_for port=22 timeout="{{ ssh_timeout }}" host="{{ inventory_hostname }}" search_regex=OpenSSH delay=10
|
|
when: wait_for_hostkey is not defined
|
|
ignore_errors: yes
|
|
- pause: seconds=30 prompt="Allowing controller node to settle."
|
|
- hosts: controller
|
|
name: Stop and setup for controller refresh
|
|
max_fail_percentage: 0
|
|
sudo: yes
|
|
tasks:
|
|
# This action of stopping prior to starting is to ensure that should
|
|
# MySQL started upon boot, then it would hopefully pickup new config
|
|
# that os-collect-config and os-apply-config would have put in place.
|
|
- include: mysql_init_fix.yml
|
|
- include: stop_mysql.yml
|
|
- include: rabbitmq_occ_disable.yml
|
|
- include: step_reset_mnt_state_permissions.yml
|
|
- include: refresh_config.yml
|
|
- name: Stop os-collect-config to avoid collission
|
|
service: name=os-collect-config state=stopped
|
|
- name: "Work around apache2 starting up at boot w/o config..."
|
|
service: name=apache2 enabled=no state=stopped
|
|
- name: "Remove os-collect-config disable sentinel file"
|
|
file: path=/mnt/state/disable-os-collect-config state=absent
|
|
- name: "Run os-collect-config"
|
|
command: os-collect-config --force --one
|
|
# Directly call os-apply-config to write out configuration files in case
|
|
# os-collect-config has failed to reach that step.
|
|
- include: step_os-apply-config.yml
|
|
- include: start_mysql.yml
|
|
- hosts: controller
|
|
name: Initiate Database Creation
|
|
max_fail_percentage: 0
|
|
serial: 1
|
|
sudo: yes
|
|
tasks:
|
|
- include: step_create_databases.yml
|
|
- hosts: controller
|
|
name: Complete Controller Refresh
|
|
max_fail_percentage: 0
|
|
sudo: yes
|
|
tasks:
|
|
- include: start_rabbitmq.yml
|
|
- name: Re-run os-collect-config in case first one failed due to a race condition
|
|
command: os-collect-config --noforce --one
|
|
- name: "Restart os-collect-config"
|
|
service: name=os-collect-config state=started
|
|
- name: Wait for Rabbit to listen on its usual port
|
|
wait_for: port=5672 state=started timeout=120 delay=10
|
|
- hosts: controller:controllerMgmt
|
|
name: Check RabbitMQ
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- pause: seconds=30 prompt="Giving RabbitMQ time to start-up."
|
|
- name: Checking rabbitmq cluster status
|
|
sudo: yes
|
|
command: rabbitmqctl cluster_status
|
|
when: single_controller is not defined
|
|
- include: cleanup_rabbitmq_start.yml
|
|
- hosts: controllerMgmt
|
|
name: Enable Overcloud ControllerMgmt
|
|
sudo: yes
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- service: name=os-collect-config state=started
|
|
- service_facts:
|
|
when: instance_status == "ACTIVE"
|
|
- service: name={{ item }} enabled=yes state=started
|
|
with_items: helion_overcloudmgmt_controller_service
|
|
when: helion is defined and item in existing_services
|
|
- hosts: controller
|
|
name: Enable Overcloud Controller
|
|
sudo: yes
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- service: name=os-collect-config state=started
|
|
- service_facts:
|
|
when: instance_status == "ACTIVE"
|
|
- service: name={{ item }} enabled=yes state=started
|
|
with_items: helion_overcloud_controller_services
|
|
when: helion is defined and item in existing_services
|
|
- service: name={{ item }} enabled=yes state=started
|
|
with_items: overcloud_controller_services
|
|
when: helion is not defined and item in existing_services
|
|
- hosts: swift-storage
|
|
name: Rebuild and Refresh swift-storage
|
|
gather_facts: no
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- include: preserve_ssh_host_keys.yml
|
|
when: instance_status == "ACTIVE"
|
|
- include: step_unmount.yml
|
|
when: instance_status == "ACTIVE"
|
|
- { include: rebuild.yml, instance_id: "{{ instance_id }}", rebuild_image_id: "{{ swift_storage_rebuild_image_id }}", when: instance_status != "REBUILD" }
|
|
- local_action: wait_for port=22 timeout="{{ ssh_timeout }}" host="{{ inventory_hostname }}" search_regex=TRIPLEO_HK_RESTORED delay=10
|
|
when: wait_for_hostkey is defined
|
|
ignore_errors: yes
|
|
- local_action: wait_for port=22 timeout="{{ ssh_timeout }}" host="{{ inventory_hostname }}" search_regex=OpenSSH delay=10
|
|
when: wait_for_hostkey is not defined
|
|
ignore_errors: yes
|
|
- include: refresh_config.yml
|
|
- hosts: swift-storage
|
|
name: Enable Swift Storage
|
|
sudo: yes
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- include: disable_os_collect_config.yml
|
|
- name: "Run os-collect-config"
|
|
sudo: yes
|
|
command: os-collect-config --force --one
|
|
- service: name=os-collect-config state=started
|
|
- service_facts:
|
|
when: instance_status == "ACTIVE"
|
|
- service: name={{ item }} enabled=yes state=started
|
|
with_items: helion_overcloud_swift_services
|
|
when: helion is defined and item in existing_services
|
|
- service: name={{ item }} enabled=yes state=started
|
|
with_items: overcloud_swift_services
|
|
when: helion is not defined and item in existing_services
|
|
- hosts: vsa
|
|
name: Rebuild and Refresh vsa
|
|
gather_facts: no
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- include: preserve_ssh_host_keys.yml
|
|
when: instance_status == "ACTIVE"
|
|
- include: step_unmount.yml
|
|
when: instance_status == "ACTIVE"
|
|
- { include: rebuild.yml, instance_id: "{{ instance_id }}", rebuild_image_id: "{{ vsa_rebuild_image_id }}", when: instance_status != "REBUILD" }
|
|
- local_action: wait_for port=22 timeout="{{ ssh_timeout }}" host="{{ inventory_hostname }}" search_regex=TRIPLEO_HK_RESTORED delay=10
|
|
when: wait_for_hostkey is defined
|
|
ignore_errors: yes
|
|
- local_action: wait_for port=22 timeout="{{ ssh_timeout }}" host="{{ inventory_hostname }}" search_regex=OpenSSH delay=10
|
|
when: wait_for_hostkey is not defined
|
|
ignore_errors: yes
|
|
- include: refresh_config.yml
|
|
- hosts: vsa
|
|
name: Enable VSA
|
|
sudo: yes
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- include: disable_os_collect_config.yml
|
|
- name: "Run os-collect-config"
|
|
sudo: yes
|
|
command: os-collect-config --force --one
|
|
- service: name=os-collect-config state=started
|
|
- service_facts:
|
|
when: instance_status == "ACTIVE"
|
|
- service: name={{ item }} enabled=yes state=started
|
|
with_items: helion_overcloud_vsa_services
|
|
when: helion is defined and item in existing_services
|
|
- hosts: nova-compute
|
|
name: Rebuild and Refresh Nova Compute
|
|
gather_facts: no
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- include: step_preserve_password_file.yml
|
|
when: instance_status == "ACTIVE"
|
|
- include: preserve_ssh_host_keys.yml
|
|
when: instance_status == "ACTIVE"
|
|
- include: step_unmount.yml
|
|
when: instance_status == "ACTIVE"
|
|
- { include: rebuild.yml, instance_id: "{{ instance_id }}", rebuild_image_id: "{{ nova_compute_rebuild_image_id }}", when: instance_status != "REBUILD" }
|
|
- local_action: wait_for port=22 timeout="{{ ssh_timeout }}" host="{{ inventory_hostname }}" search_regex=TRIPLEO_HK_RESTORED delay=10
|
|
when: wait_for_hostkey is defined
|
|
ignore_errors: yes
|
|
- local_action: wait_for port=22 timeout="{{ ssh_timeout }}" host="{{ inventory_hostname }}" search_regex=OpenSSH delay=10
|
|
when: wait_for_hostkey is not defined
|
|
ignore_errors: yes
|
|
- include: refresh_config.yml
|
|
- hosts: nova-compute
|
|
name: Enable Overcloud Compute
|
|
sudo: yes
|
|
max_fail_percentage: 0
|
|
tasks:
|
|
- include: disable_os_collect_config.yml
|
|
- name: "Run os-collect-config"
|
|
sudo: yes
|
|
command: os-collect-config --force --one
|
|
- service: name=os-collect-config state=started
|
|
- service_facts:
|
|
when: instance_status == "ACTIVE"
|
|
- service: name={{ item }} enabled=yes state=started
|
|
with_items: helion_overcloud_compute_services
|
|
when: helion is defined and item in existing_services
|
|
- service: name={{ item }} enabled=yes state=started
|
|
with_items: overcloud_compute_services
|
|
when: helion is not defined and item in existing_services
|