
This adds a weekly cronjob that will reboot and update our entire zuul cluster gracefully. The time frame chosen for this should be after North America begins its weekend and before Europe starts their week. The idea is that we're doing this during the quiet time of our week. Change-Id: Ib9a54f273e11744fa1ddbf367c291289f86bddb7
111 lines
3.6 KiB
YAML
111 lines
3.6 KiB
YAML
# This relies on flock -n /var/run/ansible/zuul_reboot.lock to ensure
|
|
# we don't run multiple copies of this playbook concurrently.
|
|
|
|
# TODO: stop pulling in the hourly job if we do this
|
|
- name: "Ensure we are going to restart/reboot on the same image"
|
|
import_playbook: zuul_pull.yaml
|
|
|
|
# TODO Do we want to force disabled servers to be rebooted too?
|
|
- hosts: "zuul-executor:!disabled"
|
|
name: "Reboot zuul-executors gracefully one at a time"
|
|
serial: 1
|
|
tasks:
|
|
- name: Gracefully stop the executor
|
|
include_role:
|
|
name: zuul-executor
|
|
tasks_from: graceful
|
|
- name: Upgrade executor server packages
|
|
apt:
|
|
update_cache: yes
|
|
upgrade: yes
|
|
- name: Reboot the executor server
|
|
reboot:
|
|
- name: Start the executor
|
|
include_role:
|
|
name: zuul-executor
|
|
tasks_from: start
|
|
|
|
- hosts: "zuul-merger:!disabled"
|
|
name: "Reboot zuul-mergers gracefully one at a time"
|
|
serial: 1
|
|
tasks:
|
|
- name: Gracefully stop the merger
|
|
include_role:
|
|
name: zuul-merger
|
|
tasks_from: graceful
|
|
- name: Upgrade merger server packages
|
|
apt:
|
|
update_cache: yes
|
|
upgrade: yes
|
|
- name: Reboot the merger server
|
|
reboot:
|
|
- name: Start the merger
|
|
include_role:
|
|
name: zuul-merger
|
|
tasks_from: start
|
|
|
|
# TODO should we do both schedulers with reboots then do the webs without
|
|
# reboots?
|
|
- hosts: "zuul-scheduler:!disabled"
|
|
name: "Reboot zuul-schedulers gracefully one at a time"
|
|
serial: 1
|
|
tasks:
|
|
- name: Stop the scheduler process
|
|
include_role:
|
|
name: zuul-scheduler
|
|
tasks_from: stop
|
|
- name: Stop the web processes
|
|
include_role:
|
|
name: zuul-web
|
|
tasks_from: stop
|
|
- name: Upgrade scheduler server packages
|
|
apt:
|
|
update_cache: yes
|
|
upgrade: yes
|
|
- name: Reboot the scheduler server
|
|
reboot:
|
|
- name: Start the scheduler process
|
|
include_role:
|
|
name: zuul-scheduler
|
|
tasks_from: start
|
|
- name: Start the web processes
|
|
include_role:
|
|
name: zuul-web
|
|
tasks_from: start
|
|
- name: Wait for scheduler to be running
|
|
uri:
|
|
url: https://zuul.opendev.org/api/components
|
|
method: GET
|
|
return_content: yes
|
|
register: components
|
|
# 3 hours
|
|
retries: 360
|
|
delay: 30
|
|
until: "{{ components.status == 200 and components.content | from_json | json_query(scheduler_query) | length == 1 and components.content | from_json | json_query(scheduler_query) | first == 'running' }}"
|
|
vars:
|
|
scheduler_query: "scheduler[?hostname=='{{ inventory_hostname }}'].state"
|
|
- name: Wait for web to be running
|
|
uri:
|
|
url: https://zuul.opendev.org/api/components
|
|
method: GET
|
|
return_content: yes
|
|
register: components
|
|
# 3 hours
|
|
retries: 360
|
|
delay: 30
|
|
until: "{{ components.status == 200 and components.content | from_json | json_query(web_query) | length == 1 and components.content | from_json | json_query(web_query) | first == 'running' }}"
|
|
vars:
|
|
web_query: "web[?hostname=='{{ inventory_hostname }}'].state"
|
|
- name: Wait for fingergw to be running
|
|
uri:
|
|
url: https://zuul.opendev.org/api/components
|
|
method: GET
|
|
return_content: yes
|
|
register: components
|
|
# 45 minutes
|
|
retries: 180
|
|
delay: 15
|
|
until: "{{ components.status == 200 and components.content | from_json | json_query(finger_query) | length == 1 and components.content | from_json | json_query(finger_query) | first == 'running' }}"
|
|
vars:
|
|
finger_query: "fingergw[?hostname=='{{ inventory_hostname }}'].state"
|