From 741f5b333d41fd7186b5aa0844437c70c673cb1d Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Mon, 12 Sep 2022 09:01:15 -0700 Subject: [PATCH] Fixup zuul merger and executor graceful shutdowns There are two issues in the zuul merger and executor shutdowns. The first is that `docker-compose ps -q` will report exited containers unlike `docker ps -q`. This means we may try to exec into a non running container which is an error. Handle this by checking the error message and proceeding if the 'is not running' string is present. The second issue is a race between stopping a container and running an exec in the container. If a container stops while an exec is running in it that exec appears to be treated with some equivalent of kill -9. The result is the exec returns 137. While theoretically possible for both executor and merger graceful stop command we seem to only hit this with the merger so we handle exit code 137 for the merger only. This way we'll get info if the executors start running into this too. Change-Id: Ia6dc2d7e397631d72968ffa89c4492b803c89c47 --- playbooks/roles/zuul-executor/tasks/graceful.yaml | 8 +++++++- playbooks/roles/zuul-merger/tasks/graceful.yaml | 15 ++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/playbooks/roles/zuul-executor/tasks/graceful.yaml b/playbooks/roles/zuul-executor/tasks/graceful.yaml index 997699169e..a2bca090a6 100644 --- a/playbooks/roles/zuul-executor/tasks/graceful.yaml +++ b/playbooks/roles/zuul-executor/tasks/graceful.yaml @@ -1,5 +1,6 @@ - name: Check if Zuul Executor containers are running - # It is possible they are stopped due to some external circumstance + # It is possible they are stopped due to some external circumstance. + # NOTE: docker-compose ps -q reports exited containers unlike docker ps -q command: cmd: docker-compose ps -q chdir: /etc/zuul-executor @@ -14,6 +15,11 @@ become_user: root # Only run the docker exec command if a container is running when: executor_container_list.stdout_lines | length > 0 + register: ze_graceful + failed_when: + - ze_graceful.rc != 0 + # If the exec fails because the container is not running we continue. + - "'is not running' not in ze_graceful.stderr" - name: Wait for Zuul Executor to stop shell: cmd: docker-compose ps -q | xargs docker wait diff --git a/playbooks/roles/zuul-merger/tasks/graceful.yaml b/playbooks/roles/zuul-merger/tasks/graceful.yaml index 7c801088f1..233f17c636 100644 --- a/playbooks/roles/zuul-merger/tasks/graceful.yaml +++ b/playbooks/roles/zuul-merger/tasks/graceful.yaml @@ -1,5 +1,6 @@ - name: Check if Zuul merger containers are running - # It is possible they are stopped due to some external circumstance + # It is possible they are stopped due to some external circumstance. + # NOTE: docker-compose ps -q reports exited containers unlike docker ps -q command: cmd: docker-compose ps -q chdir: /etc/zuul-merger @@ -14,6 +15,18 @@ become_user: root # Only run the docker exec command if a container is running when: merger_container_list.stdout_lines | length > 0 + register: zm_graceful + failed_when: + - zm_graceful.rc != 0 + # There is a fun race with docker exec and shutting down the + # container running the exec. If the container is stopped while + # the exec is running then the command in the exec effectively gets + # kill -9'd and the docker exec command rc is 137. Since this + # should only happen when the container is stopped we proceed with + # the overall graceful shutdown. + - zm_graceful.rc != 137 + # If the exec fails because the container is not running we continue. + - "'is not running' not in zm_graceful.stderr" - name: Wait for Zuul Merger to stop shell: cmd: docker-compose ps -q | xargs docker wait