Improve multi-node AIO robustness
In order to improve the readability and robustness of the mnaio feature I have replaced the shell out to virsh tasks to use the virt module where available. I have also created a vm-status play that will hopefully help resolve SSH failures into the VMs. This play utilizes the block/rescue/handler pattern to attempt to restart the VM once if it fails the initial SSH check. Hopefully this will reduce the SSH failures due to a suck VM. This adds a new variable called vm_ssh_timeout which allows the deployer an easy place to override the default timeout. The python-lxml package is needed for the virt module. Change-Id: I027556b71a8c26d08a56b4ffa56b2eeaf1cbabe9
This commit is contained in:
parent
d0b0668657
commit
482e845d92
@ -1,5 +1,5 @@
|
||||
---
|
||||
# Copyright 2017, Rackspace US, Inc.
|
||||
# Copyright 2018, Rackspace US, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@ -32,7 +32,9 @@
|
||||
- always
|
||||
|
||||
- name: Stop running VMs
|
||||
command: "virsh destroy {{ hostvars[item]['server_hostname'] }}"
|
||||
virt:
|
||||
name: "{{ hostvars[item]['server_hostname'] }}"
|
||||
command: destroy
|
||||
failed_when: false
|
||||
when:
|
||||
- hostvars[item]['server_vm'] | default(false) | bool
|
||||
@ -96,17 +98,6 @@
|
||||
- hostvars[item]['server_vm'] | default(false) | bool
|
||||
with_items: "{{ groups['pxe_servers'] }}"
|
||||
|
||||
- name: Create the VM template
|
||||
template:
|
||||
src: kvm/kvm-vm.xml
|
||||
dest: "/etc/libvirt/qemu/{{ hostvars[item]['server_hostname'] }}.xml"
|
||||
mode: 0644
|
||||
owner: root
|
||||
group: root
|
||||
when:
|
||||
- hostvars[item]['server_vm'] | default(false) | bool
|
||||
with_items: "{{ groups['pxe_servers'] }}"
|
||||
|
||||
- name: Wait for guest capabilities to appear
|
||||
command: "virsh capabilities"
|
||||
register: virsh_caps
|
||||
@ -115,21 +106,35 @@
|
||||
delay: 10
|
||||
|
||||
- name: Define the VM
|
||||
command: "virsh define /etc/libvirt/qemu/{{ hostvars[item]['server_hostname'] }}.xml"
|
||||
virt:
|
||||
name: "{{ hostvars[item]['server_hostname'] }}"
|
||||
command: define
|
||||
xml: "{{ lookup('template', 'kvm/kvm-vm.xml.j2') }}"
|
||||
failed_when: false
|
||||
when:
|
||||
- hostvars[item]['server_vm'] | default(false) | bool
|
||||
with_items: "{{ groups['pxe_servers'] }}"
|
||||
|
||||
- name: Create the VM
|
||||
command: "virsh create /etc/libvirt/qemu/{{ hostvars[item]['server_hostname'] }}.xml"
|
||||
failed_when: false
|
||||
- name: Get the VM xml
|
||||
virt:
|
||||
command: get_xml
|
||||
name: "{{ hostvars[item]['server_hostname'] }}"
|
||||
register: vm_xml
|
||||
when:
|
||||
- hostvars[item]['server_vm'] | default(false) | bool
|
||||
with_items: "{{ groups['pxe_servers'] }}"
|
||||
|
||||
- name: Write the VM xml
|
||||
copy:
|
||||
content: "{{ item.get_xml }}"
|
||||
dest: "/etc/libvirt/qemu/{{ item.item }}.xml"
|
||||
with_items: "{{ vm_xml.results }}"
|
||||
|
||||
- name: Start the VM
|
||||
command: "virsh start {{ hostvars[item]['server_hostname'] }}"
|
||||
virt:
|
||||
name: "{{ hostvars[item]['server_hostname'] }}"
|
||||
command: start
|
||||
state: running
|
||||
failed_when: false
|
||||
when:
|
||||
- hostvars[item]['server_vm'] | default(false) | bool
|
||||
@ -144,6 +149,10 @@
|
||||
with_items: "{{ groups['pxe_servers'] }}"
|
||||
|
||||
|
||||
- name: Check VM Connectivity
|
||||
import_playbook: vm-status.yml
|
||||
|
||||
|
||||
- name: Create vm_servers group
|
||||
hosts: localhost
|
||||
gather_facts: false
|
||||
@ -160,23 +169,12 @@
|
||||
with_items: "{{ groups['pxe_servers'] }}"
|
||||
|
||||
|
||||
- name: Wait for deploy host
|
||||
- name: VM Host Setup
|
||||
hosts: vm_servers
|
||||
gather_facts: false
|
||||
any_errors_fatal: true
|
||||
tasks:
|
||||
- name: Wait for connectivity 1
|
||||
local_action:
|
||||
module: wait_for
|
||||
host: "{{ ansible_host }}"
|
||||
connect_timeout: 10
|
||||
port: 22
|
||||
sleep: 20
|
||||
timeout: 1500
|
||||
state: started
|
||||
search_regex: OpenSSH
|
||||
|
||||
- name: copy host keys
|
||||
- name: Copy Host Keys
|
||||
copy:
|
||||
src: "{{ item.src }}"
|
||||
dest: "{{ item.dest }}"
|
||||
|
@ -30,7 +30,10 @@ default_container_tech: "{{ container_tech | default('lxc') }}"
|
||||
|
||||
ipxe_kernel_url: "http://boot.ipxe.org/ipxe.lkrn"
|
||||
|
||||
# IP address, or domain name of the TFTP server
|
||||
# The timeout for the SSH check to the vm_servers
|
||||
vm_ssh_timeout: 1500
|
||||
|
||||
# IP address, or domain name of the TFTP server
|
||||
tftp_server: "{{ hostvars[groups['pxe_hosts'][0]]['ansible_host'] | default(ansible_host) }}"
|
||||
# tftp_ssh_key: '' # user defined ssh key, used to access the host
|
||||
tftp_port: 69
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
# Copyright 2017, Rackspace US, Inc.
|
||||
# Copyright 2018, Rackspace US, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@ -215,30 +215,32 @@
|
||||
with_dict: "{{ mnaio_host_networks }}"
|
||||
when: mnaio_bridges is changed
|
||||
|
||||
- name: Disable virsh default network
|
||||
shell: |
|
||||
if virsh net-list | grep -qw "default"; then
|
||||
virsh net-autostart default --disable
|
||||
virsh net-destroy default
|
||||
fi
|
||||
- name: Disable default virt network
|
||||
virt_net:
|
||||
name: "default"
|
||||
state: inactive
|
||||
autostart: no
|
||||
|
||||
- name: Drop virsh network configs
|
||||
template:
|
||||
src: "kvm/libvirt-network-template.xml"
|
||||
dest: "/etc/libvirt/qemu/networks/{{ item.value.iface }}.xml"
|
||||
mode: "0644"
|
||||
owner: root
|
||||
group: root
|
||||
with_dict: "{{ mnaio_host_networks }}"
|
||||
- name: List virt network(s)
|
||||
virt_net:
|
||||
command: list_nets
|
||||
register: vm_networks
|
||||
|
||||
- name: Enable new virsh network(s)
|
||||
shell: |
|
||||
if ! virsh net-list | grep -qw "{{ item.value.iface }}"; then
|
||||
virsh net-define --file /etc/libvirt/qemu/networks/{{ item.value.iface }}.xml
|
||||
virsh net-create --file /etc/libvirt/qemu/networks/{{ item.value.iface }}.xml
|
||||
virsh net-autostart {{ item.value.iface }} || ture
|
||||
fi
|
||||
- name: Define virt network(s)
|
||||
virt_net:
|
||||
command: define
|
||||
name: "{{ item.value.iface }}"
|
||||
xml: "{{ lookup('template', 'kvm/libvirt-network-template.xml.j2') }}"
|
||||
with_dict: "{{ mnaio_host_networks }}"
|
||||
when: "item.value.iface not in vm_networks.list_nets"
|
||||
|
||||
- name: Create virt network(s)
|
||||
virt_net:
|
||||
command: create
|
||||
name: "{{ item.value.iface }}"
|
||||
autostart: true
|
||||
with_dict: "{{ mnaio_host_networks }}"
|
||||
when: "item.value.iface not in vm_networks.list_nets"
|
||||
|
||||
- name: Locate data volume
|
||||
command: "vgdisplay vg01"
|
||||
@ -276,27 +278,47 @@
|
||||
- default_vm_disk_mode | default('lvm') == "lvm"
|
||||
- data_volume.rc != 0
|
||||
|
||||
- name: Locate virsh data volume
|
||||
command: "virsh pool-info vg01"
|
||||
- name: Locate virt data volume
|
||||
virt_pool:
|
||||
name: "vg01"
|
||||
command: info
|
||||
failed_when: false
|
||||
when:
|
||||
- default_vm_disk_mode | default('lvm') == "lvm"
|
||||
register: virsh_data_volume
|
||||
register: virt_data_volume
|
||||
|
||||
- name: Create /etc/libvirt/storage directory
|
||||
file:
|
||||
path: "/etc/libvirt/storage/"
|
||||
state: "directory"
|
||||
|
||||
- name: Create virsh data volume
|
||||
shell: |
|
||||
virsh pool-create-as vg01 logical
|
||||
virsh pool-dumpxml vg01 > /etc/libvirt/storage/vg01.xml
|
||||
virsh pool-define /etc/libvirt/storage/vg01.xml
|
||||
virsh pool-autostart vg01 || true
|
||||
- name: Create virt data volume
|
||||
block:
|
||||
- name: Create virt pool
|
||||
virt_pool:
|
||||
command: create
|
||||
name: vg01
|
||||
|
||||
- name: Get virt pool xml
|
||||
virt_pool:
|
||||
command: get_xml
|
||||
name: vg01
|
||||
register: virt_pool_xml
|
||||
|
||||
- name: Write data volume xml
|
||||
copy:
|
||||
content: "{{ virt_pool_xml.get_xml }}"
|
||||
dest: "/etc/libvirt/storage/vg01.xml"
|
||||
|
||||
- name: Define virt data volume
|
||||
virt_pool:
|
||||
command: define
|
||||
name: vg01
|
||||
xml: "/etc/libvirt/storage/vg01.xml"
|
||||
autostart: true
|
||||
when:
|
||||
- default_vm_disk_mode | default('lvm') == "lvm"
|
||||
- virsh_data_volume.rc != 0
|
||||
- virt_data_volume.pools is not defined
|
||||
|
||||
- name: Load virtio kernel modules
|
||||
shell: |
|
||||
|
@ -31,6 +31,7 @@ mnaio_host_distro_packages:
|
||||
- ntp
|
||||
- openssh-server
|
||||
- python2.7
|
||||
- python-lxml
|
||||
- python-software-properties
|
||||
- qemu-kvm
|
||||
- qemu-utils
|
||||
@ -54,4 +55,3 @@ mnaio_pkg_cache_server_distro_packages:
|
||||
mnaio_host_iptables_service: "{{ (ansible_distribution | lower + '-' + ansible_distribution_version | lower == 'ubuntu-14.04') | ternary('iptables-persistent', 'netfilter-persistent') }}"
|
||||
|
||||
ssh_service_name: ssh
|
||||
|
||||
|
77
multi-node-aio/playbooks/vm-status.yml
Normal file
77
multi-node-aio/playbooks/vm-status.yml
Normal file
@ -0,0 +1,77 @@
|
||||
---
|
||||
# Copyright 2018, Rackspace US, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in witing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
- name: Create vm_servers group
|
||||
hosts: localhost
|
||||
gather_facts: false
|
||||
tasks:
|
||||
- name: VM Servers group
|
||||
add_host:
|
||||
name: "{{ item }}"
|
||||
groups: vm_servers
|
||||
when:
|
||||
- hostvars[item]['server_vm'] | default(false) | bool
|
||||
with_items: "{{ groups['pxe_servers'] }}"
|
||||
|
||||
- name: VM Status
|
||||
hosts: vm_servers
|
||||
gather_facts: false
|
||||
tasks:
|
||||
- name: VM Connectivity Check
|
||||
block:
|
||||
- name: Wait for VM
|
||||
wait_for_connection:
|
||||
connect_timeout: 10
|
||||
port: 22
|
||||
sleep: 20
|
||||
timeout: "{{ vm_ssh_timeout }}"
|
||||
rescue:
|
||||
- name: Gather VM info (rescue)
|
||||
virt:
|
||||
command: status
|
||||
name: "{{ inventory_hostname }}"
|
||||
connection: local
|
||||
register: vm_info
|
||||
- name: Stop VM (rescue)
|
||||
virt:
|
||||
command: destroy
|
||||
name: "{{ inventory_hostname }}"
|
||||
connection: local
|
||||
when: vm_info.status == 'running'
|
||||
- name: Start VM (rescue)
|
||||
virt:
|
||||
command: start
|
||||
name: "{{ inventory_hostname }}"
|
||||
connection: local
|
||||
- name: Wait for VM (rescue)
|
||||
wait_for_connection:
|
||||
connect_timeout: 10
|
||||
port: 22
|
||||
sleep: 20
|
||||
timeout: "{{ vm_ssh_timeout }}"
|
||||
register: vm_rescue
|
||||
ignore_errors: true
|
||||
- name: Gather VM info 2nd pass (rescue)
|
||||
virt:
|
||||
command: status
|
||||
name: "{{ inventory_hostname }}"
|
||||
connection: local
|
||||
register: vm_info_2
|
||||
- name: Fail if VM still offline (rescue)
|
||||
fail:
|
||||
msg: "{{ inventory_hostname }} is not responding and cannot be rescued"
|
||||
when:
|
||||
- vm_info_2.status != 'running'
|
||||
- vm_rescue.failed == 'true'
|
Loading…
x
Reference in New Issue
Block a user