tripleo-ansible/playbooks/step_update_online.yml
Julia Kreger 0175c378eb Update online update for newer Ansible and base playbooks
Converted the previously working string checks to boolean checks
so Ansible will properly act upon the plays instead of skip them.

Added code to set the instance_rebuilt fact which is checked in
the steps to execute os-collect-config to prevent harm to a
running system.

Change-Id: I91e1fa822655056ceb88a860367ca40183d1db58
2015-01-29 12:17:49 -05:00

198 lines
9.4 KiB
YAML

# Copyright (c) 2014 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
---
- name: "Fail upgrade if node status is not ACTIVE"
fail: "Failure: Node MUST be in ACTIVE state to proceed. Online upgrade cannot be completed, please consider a standard image rebuild upgrade."
when: instance_status != "ACTIVE"
- name: "Setting default fact indicating no rebuild has yet taken place"
set_fact: instance_rebuilt=false
- name: "Setting default policy to run upgrade"
set_fact: test_run_upgrade=true
- name: "Compare, if applicable, the currently deployed image id"
command: grep -q "{{ rebuild_image_id }}" /etc/tripleo_image_id
when: force_rebuild is not defined
register: test_check_version
ignore_errors: yes
- name: "Set action to false if rebuild is not being forced and currently deployed version matches what is being deployed"
set_fact: test_run_upgrade=false
when: force_rebuild is not defined and test_check_version.rc == 0
- name: "Ensure qemu-utils and rsync are installed"
sudo: yes
# This will require gather_facts to have logic at some point to
# support operating systems that do not use apt.
apt: pkg={{ item }} state=present
with_items:
- qemu-utils
- rsync
when: test_run_upgrade == true
- name: "Load nbd kernel module"
sudo: yes
modprobe:
name: nbd
state: present
params: "max_part=16"
when: test_run_upgrade == true
- name: "Triggering upload of image to node"
sudo: yes
include: step_upload_image.yml
vars:
update_image: "/tmp/image-{{ rebuild_image_id }}.qcow2"
when: online_upgrade is defined and test_run_upgrade == true
- name: "Creating directory for update to run from on node"
sudo: yes
file:
path: /tmp/update_image
state: directory
when: test_run_upgrade == true
- name: "Ensuring that the mountpoint is not already mounted"
sudo: yes
command: umount /tmp/update_image
ignore_errors: yes
when: test_run_upgrade == true
- name: "Ensuring that the image is not already attached"
sudo: yes
command: /usr/bin/qemu-nbd -d /dev/nbd4
ignore_errors: yes
when: test_run_upgrade == true
- name: "Attaching update image"
sudo: yes
command: /usr/bin/qemu-nbd -c /dev/nbd4 "{{ default_update_file_location }}"
when: test_run_upgrade == true
- name: "Preserving ssh keys in case online upgrade fails and full rebuild is required to recover"
include: preserve_ssh_host_keys.yml
when: online_upgrade is defined and test_run_upgrade == true
- name: "Ensure that a previous online upgrade folder does not exist"
sudo: yes
file: path=/mnt/state/_upgrade_recovery state=absent
when: test_run_upgrade == true
- name: "Make an upgrade recovery folder on the ephemeral disk"
sudo: yes
file: path=/mnt/state/_upgrade_recovery state=directory owner=root group=root mode=0700
when: test_run_upgrade == true
- name: "Backup /etc to /mnt/state/_upgrade_recovery"
sudo: yes
shell: cp -a /etc /mnt/state/_upgrade_recovery/
- name: "Backup /var/log to /mnt/state/_upgrade_recovery"
sudo: yes
shell: mkdir /mnt/state/_upgrade_recovery/var ; cp -a /var/log /mnt/state/_upgrade_recovery/var/
- name: "Attaching image to mount point"
sudo: yes
command: mount /dev/nbd4 /tmp/update_image
when: test_run_upgrade == true
# The next several steps are intended as a feature to prevent an
# image from being utilized in this fashion that is is known to be
# incompatible (i.e. incompatible base library changes that would
# normally result in a system being left in a damaged state).
- name: "Verify that the disk image does not contain a warning flag"
shell: test -x /tmp/update_image/boot/tripleo_incompatible_upgrade
when: test_run_upgrade == true
register: test_abort_incompatible
ignore_errors: yes
- name: "If aborting, detach update image mountpoint"
sudo: yes
command: umount /tmp/update_image
when: test_run_upgrade == true and test_abort_incompatible.rc == 0
- name: "If aborting, detach image"
sudo: yes
command: /usr/bin/qemu-nbd -d /dev/nbd4
when: test_run_upgrade == true and test_abort_incompatible.rc == 0
- name: "If aborting, fail."
fail: "Image is marked as being incompatible for online upgrades. Please attempt a normal upgrade."
when: test_run_upgrade == true and test_abort_incompatible.rc == 0
- name: "Ensure folder exists for rsync log to be housed"
sudo: yes
file: path=/mnt/state/var/log/online_upgrade state=directory owner=root group=root mode=0700
when: test_run_upgrade == true
- name: "Pre-flight check that we appear to have an image mounted"
shell: test -e /tmp/update_image/boot
when: test_run_upgrade == true
register: test_is_update_image
ignore_errors: yes
- name: "Fail if image does not appear to be valid"
fail: "ERROR: Upgrade image did not mount, or is not valid."
when: test_run_upgrade == true and test_is_update_image.rc != 0
- name: "Initiating update of files, this may take a while."
sudo: yes
# The following list of files excluded are to ensure access by Ansible
# and continuation of existing processes with minimal impact.
# * /tmp - Excluded as this is the source folder
# * /etc/hosts - Excluded as it is normally populated with host information
# by os-collect-config, and being in a reverted state causes latency while
# the host is attempting to resolve the IP address of the node connecting.
# * /etc/ssh/ssh_host_* - This is to preserve the ssh host keys so Ansible
# is able to reconnect.
# * /etc/passwd /etc/shadow /etc/group - These are preserved so Ansible
# is able to log back into the system after the system after the sync
# has been completed as the heat-admin user is not bured into the image.
# * /home/heat-admin - Same case as above.
# * /mnt - Excluded as it is the ephemeral volume mount point.
# * /var/log - Excluded to preserve logs.
# * /lib/modules/$(uname -r) - The current running kernel's modules, which
# need to be kept on disk until a kernel upgrade takes place.
# * /var/lib/heat-* /var/lib/os-collect-config /var/lib/cloud - These
# folders are preserved in order to give the instance a head start on
# starting back up. Additionally /var/lib/cloud contains semaphor locks
# that block operations that would be unsafe to perform at the time of an
# upgrade.
# * /dev - Excluded as the image contains a basic /dev folder, but the host
# operates with devfs. Removal of files from devfs will result in
# unpredictable behavior.
# /var/cache/libvirt/qemu/capabilties /var/lib/libvirt - Removal of these
# files will cause the current virtual machines to stop as they contain
# state information.
# * /var/lib/dhcpd/*.leases - Excluded as we want the DHCP client to attempt
# a renewal instead of requesting a new address.
# * /var/lib/openvswitch - Excluded as it contains the state information for
# openvswitch and we want openvswitch to be running as long as possible
# for minimal connectivity impact to the running virtual machines.
shell: rsync -axHAXv /tmp/update_image/ / --exclude=/tmp --exclude=/etc/hosts --exclude='/etc/ssh/ssh_host_*' --exclude=/etc/passwd --exclude=/etc/shadow --exclude=/etc/group --exclude=/etc/sudoers --exclude=/home/heat-admin --exclude=/mnt --exclude=/var/log --exclude="/lib/modules/$(uname -r)" --exclude='/var/lib/heat-*' --exclude='/dev/' --exclude='/var/cache/libvirt/qemu/capabilities' --exclude='/var/lib/os-collect-config' --exclude='/var/lib/cloud' --exclude='/var/lib/libvirt' --exclude='/var/lib/dhcp/*.leases' --exclude='/var/lib/openvswitch' --delete-after | tee -a /mnt/state/var/log/online_upgrade/rsync.log ; cp -at /etc/ssh/ /mnt/state/_ssh_host_keys/ssh_host_* ; cp -at /etc/ /mnt/state/_upgrade_recovery/etc/hosts /mnt/state/_upgrade_recovery/etc/mtab
when: test_run_upgrade == true
register: test_rsync_completed
- name: "Record that instance was rebuilt under normal circumstances"
set_fact: instance_rebuilt=true
when: test_rsync_completed.rc == 0
- name: "Restart ssh service"
sudo: yes
service: name=ssh state=restarted
# Note: gather_facts will be required to support restarting on
# systems that use different names for their ssh services.
when: test_run_upgrade == true
- name: "Detach update image mountpoint"
sudo: yes
command: umount /tmp/update_image
when: test_run_upgrade == true
# bounce any core services like ssh TODO
- name: "Remove the post-rsync folder"
sudo: yes
file:
path: /tmp/update_image
state: absent
when: test_run_upgrade == true
- name: "Ensure qemu-utils is installed"
sudo: yes
# This will require gather_facts to have logic at some point to
# support operating systems that do not use apt.
apt: pkg={{ item }} state=present
with_items:
- qemu-utils
when: test_run_upgrade == true
- name: "Detach upgrade image"
sudo: yes
command: /usr/bin/qemu-nbd -d /dev/nbd4
when: test_run_upgrade == true
# NOTE: Should TripleO one day support booting from the local block
# device, then this is roughly the locaion where support for updating
# the boot loader would be added.