From 6bdf202658e08bb9f43ca50334587b05dc4bac03 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Thu, 24 Oct 2019 15:01:42 +0100 Subject: [PATCH] Fix nova scheduler down after first docker restart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to a Docker bug [1] we cannot use Docker to send SIGHUP to the container because it will mark it as stopped. This patch sends the signal directly to the process, bypassing Docker. 'changed_when: false' is also removed from the relevant task as it definitely changes the state. In the future we could do the refresh only if there really is a need for another one. [1] https://github.com/moby/moby/issues/11065 Change-Id: Ief73bbd24568d6941384ea3330ab45f11aa42d37 Co-authored-by: Radosław Piliszek Closes-Bug: #1845244 --- ansible/roles/nova/tasks/refresh_scheduler_cell_cache.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ansible/roles/nova/tasks/refresh_scheduler_cell_cache.yml b/ansible/roles/nova/tasks/refresh_scheduler_cell_cache.yml index 286b111c43..a8ece23601 100644 --- a/ansible/roles/nova/tasks/refresh_scheduler_cell_cache.yml +++ b/ansible/roles/nova/tasks/refresh_scheduler_cell_cache.yml @@ -3,7 +3,10 @@ # in nova scheduler. - name: Refresh cell cache in nova scheduler become: true - command: docker kill --signal HUP nova_scheduler - changed_when: False + # NOTE(yoctozepto): Normally we would send the signal via Docker but, due to a + # Docker bug (https://github.com/moby/moby/issues/11065), this might cause the + # container to be stopped if we restart Docker or reboot the server as we + # use the 'unless-stopped' restart policy by default. + shell: "kill -HUP `docker inspect -f '{% raw %}{{.State.Pid}}{% endraw %}' nova_scheduler`" when: - inventory_hostname in groups['nova-scheduler']