From 6969a5d596f0c124ae3a776f4ec37a6a7654a3be Mon Sep 17 00:00:00 2001
From: Oleksii Grudev <ogrudev@mirantis.com>
Date: Mon, 10 Feb 2020 17:41:40 +0200
Subject: [PATCH] [neutron] Unhardcode probes timings

This patch adds ability to unhardcode readiness/
liveness probes timings for those probes which still
were hardcoded. Moreover it introduces
RPC_PROBE_TIMEOUT and RPC_PROBE_RETRIES variables
which are passed to health probe script and
allow to unhardcode RPCtest  timeout and number of
retries

Change-Id: I2e48eed26abb82208a4ac4ae596d27ca8db99c90
---
 neutron/templates/bin/_health-probe.py.tpl    |  6 +-
 neutron/templates/daemonset-bagpipe-bgp.yaml  | 19 +++--
 neutron/templates/daemonset-dhcp-agent.yaml   |  5 ++
 neutron/templates/daemonset-l2gw-agent.yaml   | 75 ++++++++++---------
 neutron/templates/daemonset-l3-agent.yaml     |  5 ++
 .../templates/daemonset-metadata-agent.yaml   |  5 ++
 neutron/templates/daemonset-ovs-agent.yaml    |  5 ++
 neutron/templates/daemonset-sriov-agent.yaml  |  5 ++
 neutron/values.yaml                           | 25 +++++++
 9 files changed, 106 insertions(+), 44 deletions(-)

diff --git a/neutron/templates/bin/_health-probe.py.tpl b/neutron/templates/bin/_health-probe.py.tpl
index 0aa4a5647d..01ace1e8e1 100644
--- a/neutron/templates/bin/_health-probe.py.tpl
+++ b/neutron/templates/bin/_health-probe.py.tpl
@@ -51,6 +51,8 @@ from oslo_context import context
 from oslo_log import log
 import oslo_messaging
 
+rpc_timeout = int(os.getenv('RPC_PROBE_TIMEOUT', '60'))
+rpc_retries = int(os.getenv('RPC_PROBE_RETRIES', '2'))
 rabbit_port = 5672
 tcp_established = "ESTABLISHED"
 log.logging.basicConfig(level=log.ERROR)
@@ -69,8 +71,8 @@ def check_agent_status(transport):
             topic=cfg.CONF.agent_queue_name,
             server=_get_hostname(use_fqdn))
         client = oslo_messaging.RPCClient(transport, target,
-                                          timeout=60,
-                                          retry=2)
+                                          timeout=rpc_timeout,
+                                          retry=rpc_retries)
         client.call(context.RequestContext(),
                     'pod_health_probe_method_ignore_errors')
     except oslo_messaging.exceptions.MessageDeliveryFailure:
diff --git a/neutron/templates/daemonset-bagpipe-bgp.yaml b/neutron/templates/daemonset-bagpipe-bgp.yaml
index ebd02738e4..5131fc4b19 100644
--- a/neutron/templates/daemonset-bagpipe-bgp.yaml
+++ b/neutron/templates/daemonset-bagpipe-bgp.yaml
@@ -14,6 +14,16 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */}}
 
+{{- define "bagpipeBgpLivenessProbeTemplate" }}
+tcpSocket:
+  port: {{ tuple "network" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
+{{- end }}
+
+{{- define "bagpipeBgpReadinessProbeTemplate" }}
+tcpSocket:
+  port: {{ tuple "network" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
+{{- end }}
+
 {{- define "neutron.bagpipe_bgp.daemonset" }}
 {{- $daemonset := index . 0 }}
 {{- $configMapName := index . 1 }}
@@ -81,13 +91,8 @@ spec:
 {{ tuple $envAll "neutron_bagpipe_bgp" | include "helm-toolkit.snippets.image" | indent 10 }}
 {{ tuple $envAll $envAll.Values.pod.resources.agent.bagpipe_bgp | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
 {{ dict "envAll" $envAll "application" "neutron_bagpipe_bgp" "container" "neutron_bagpipe_bgp" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }}
-          readinessProbe:
-            tcpSocket:
-              port: {{ tuple "network" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
-          livenessProbe:
-            tcpSocket:
-              port: {{ tuple "network" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
-            initialDelaySeconds: 60
+{{ dict "envAll" $envAll "component" "bagpipe_bgp" "container" "bagpipe_bgp" "type" "liveness" "probeTemplate" (include "bagpipeBgpLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
+{{ dict "envAll" $envAll "component" "bagpipe_bgp" "container" "bagpipe_bgp" "type" "readiness" "probeTemplate" (include "bagpipeBgpReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
           command:
             - /tmp/neutron-bagpipe-bgp.sh
           volumeMounts:
diff --git a/neutron/templates/daemonset-dhcp-agent.yaml b/neutron/templates/daemonset-dhcp-agent.yaml
index 187e491469..7a7496601d 100644
--- a/neutron/templates/daemonset-dhcp-agent.yaml
+++ b/neutron/templates/daemonset-dhcp-agent.yaml
@@ -157,6 +157,11 @@ spec:
 {{ tuple $envAll "neutron_dhcp" | include "helm-toolkit.snippets.image" | indent 10 }}
 {{ tuple $envAll $envAll.Values.pod.resources.agent.dhcp | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
 {{ dict "envAll" $envAll "application" "neutron_dhcp_agent" "container" "neutron_dhcp_agent" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }}
+          env:
+            - name: RPC_PROBE_TIMEOUT
+              value: "{{ .Values.pod.probes.rpc_timeout }}"
+            - name: RPC_PROBE_RETRIES
+              value: "{{ .Values.pod.probes.rpc_retries }}"
 {{ dict "envAll" $envAll "component" "dhcp_agent" "container" "dhcp_agent" "type" "readiness" "probeTemplate" (include "dhcpAgentReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
 {{ dict "envAll" $envAll "component" "dhcp_agent" "container" "dhcp_agent" "type" "liveness" "probeTemplate" (include "dhcpAgentLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
           command:
diff --git a/neutron/templates/daemonset-l2gw-agent.yaml b/neutron/templates/daemonset-l2gw-agent.yaml
index 3f673990cd..8affb30bd5 100644
--- a/neutron/templates/daemonset-l2gw-agent.yaml
+++ b/neutron/templates/daemonset-l2gw-agent.yaml
@@ -14,6 +14,39 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */}}
 
+{{- define "l2gwAgentLivenessProbeTemplate" }}
+exec:
+  command:
+    - python
+    - /tmp/health-probe.py
+    - --config-file
+    - /etc/neutron/neutron.conf
+    - --config-file
+    - /etc/neutron/l2gw_agent.ini
+    - --agent-queue-name
+    - l2gateway_agent
+    - --liveness-probe
+{{- if .Values.pod.use_fqdn.neutron_agent }}
+    - --use-fqdn
+{{- end }}
+{{- end }}
+
+{{- define "l2gwAgentReadinessProbeTemplate" }}
+exec:
+  command:
+    - python
+    - /tmp/health-probe.py
+    - --config-file
+    - /etc/neutron/neutron.conf
+    - --config-file
+    - /etc/neutron/l2gw_agent.ini
+    - --agent-queue-name
+    - l2gateway_agent
+{{- if .Values.pod.use_fqdn.neutron_agent }}
+    - --use-fqdn
+{{- end }}
+{{- end }}
+
 {{- define "neutron.l2gw_agent.daemonset" }}
 {{- $daemonset := index . 0 }}
 {{- $configMapName := index . 1 }}
@@ -66,41 +99,13 @@ spec:
 {{ tuple $envAll $envAll.Values.pod.resources.agent.l2gw | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
           securityContext:
             privileged: true
-          readinessProbe:
-            exec:
-              command:
-                - python
-                - /tmp/health-probe.py
-                - --config-file
-                - /etc/neutron/neutron.conf
-                - --config-file
-                - /etc/neutron/l2gw_agent.ini
-                - --agent-queue-name
-                - l2gateway_agent
-{{- if .Values.pod.use_fqdn.neutron_agent }}
-                - --use-fqdn
-{{- end }}
-            initialDelaySeconds: 30
-            periodSeconds: 190
-            timeoutSeconds: 185
-          livenessProbe:
-            exec:
-              command:
-                - python
-                - /tmp/health-probe.py
-                - --config-file
-                - /etc/neutron/neutron.conf
-                - --config-file
-                - /etc/neutron/l2gw_agent.ini
-                - --agent-queue-name
-                - l2gateway_agent
-                - --liveness-probe
-{{- if .Values.pod.use_fqdn.neutron_agent }}
-                - --use-fqdn
-{{- end }}
-            initialDelaySeconds: 120
-            periodSeconds: 600
-            timeoutSeconds: 580
+          env:
+            - name: RPC_PROBE_TIMEOUT
+              value: "{{ .Values.pod.probes.rpc_timeout }}"
+            - name: RPC_PROBE_RETRIES
+              value: "{{ .Values.pod.probes.rpc_retries }}"
+{{ dict "envAll" $envAll "component" "l2gw_agent" "container" "l2gw_agent" "type" "liveness" "probeTemplate" (include "l2gwAgentLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
+{{ dict "envAll" $envAll "component" "l2gw_agent" "container" "l2gw_agent" "type" "readiness" "probeTemplate" (include "l2gwAgentReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
           command:
             - /tmp/neutron-l2gw-agent.sh
           volumeMounts:
diff --git a/neutron/templates/daemonset-l3-agent.yaml b/neutron/templates/daemonset-l3-agent.yaml
index 15861486d1..d85dc557ee 100644
--- a/neutron/templates/daemonset-l3-agent.yaml
+++ b/neutron/templates/daemonset-l3-agent.yaml
@@ -158,6 +158,11 @@ spec:
 {{ tuple $envAll "neutron_l3" | include "helm-toolkit.snippets.image" | indent 10 }}
 {{ tuple $envAll $envAll.Values.pod.resources.agent.l3 | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
 {{ dict "envAll" $envAll "application" "neutron_l3_agent" "container" "neutron_l3_agent" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }}
+          env:
+            - name: RPC_PROBE_TIMEOUT
+              value: "{{ .Values.pod.probes.rpc_timeout }}"
+            - name: RPC_PROBE_RETRIES
+              value: "{{ .Values.pod.probes.rpc_retries }}"
 {{ dict "envAll" $envAll "component" "l3_agent" "container" "l3_agent" "type" "readiness" "probeTemplate" (include "l3AgentReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
 {{ dict "envAll" $envAll "component" "l3_agent" "container" "l3_agent" "type" "liveness" "probeTemplate" (include "l3AgentLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
           command:
diff --git a/neutron/templates/daemonset-metadata-agent.yaml b/neutron/templates/daemonset-metadata-agent.yaml
index 215508c8a6..84cbe1fd90 100644
--- a/neutron/templates/daemonset-metadata-agent.yaml
+++ b/neutron/templates/daemonset-metadata-agent.yaml
@@ -115,6 +115,11 @@ spec:
         - name: neutron-metadata-agent
 {{ tuple $envAll "neutron_metadata" | include "helm-toolkit.snippets.image" | indent 10 }}
 {{ tuple $envAll $envAll.Values.pod.resources.agent.metadata | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
+          env:
+            - name: RPC_PROBE_TIMEOUT
+              value: "{{ .Values.pod.probes.rpc_timeout }}"
+            - name: RPC_PROBE_RETRIES
+              value: "{{ .Values.pod.probes.rpc_retries }}"
 {{ dict "envAll" $envAll "component" "metadata_agent" "container" "metadata_agent" "type" "readiness" "probeTemplate" (include "metadataAgentReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
 {{ dict "envAll" $envAll "component" "metadata_agent" "container" "metadata_agent" "type" "liveness" "probeTemplate" (include "metadataAgentLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
           securityContext:
diff --git a/neutron/templates/daemonset-ovs-agent.yaml b/neutron/templates/daemonset-ovs-agent.yaml
index 22c13a4aa6..c0f1766aab 100644
--- a/neutron/templates/daemonset-ovs-agent.yaml
+++ b/neutron/templates/daemonset-ovs-agent.yaml
@@ -179,6 +179,11 @@ spec:
         - name: neutron-ovs-agent
 {{ tuple $envAll "neutron_openvswitch_agent" | include "helm-toolkit.snippets.image" | indent 10 }}
 {{ tuple $envAll $envAll.Values.pod.resources.agent.ovs | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
+          env:
+            - name: RPC_PROBE_TIMEOUT
+              value: "{{ .Values.pod.probes.rpc_timeout }}"
+            - name: RPC_PROBE_RETRIES
+              value: "{{ .Values.pod.probes.rpc_retries }}"
 {{ dict "envAll" $envAll "component" "ovs_agent" "container" "ovs_agent" "type" "readiness" "probeTemplate" (include "ovsAgentReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
 {{ dict "envAll" $envAll "component" "ovs_agent" "container" "ovs_agent" "type" "liveness" "probeTemplate" (include "ovsAgentLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
 {{ dict "envAll" $envAll "application" "neutron_ovs_agent" "container" "neutron_ovs_agent" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }}
diff --git a/neutron/templates/daemonset-sriov-agent.yaml b/neutron/templates/daemonset-sriov-agent.yaml
index 86b835a1ed..4ebac26798 100644
--- a/neutron/templates/daemonset-sriov-agent.yaml
+++ b/neutron/templates/daemonset-sriov-agent.yaml
@@ -137,6 +137,11 @@ spec:
 {{ tuple $envAll "neutron_sriov_agent" | include "helm-toolkit.snippets.image" | indent 10 }}
 {{ tuple $envAll $envAll.Values.pod.resources.agent.sriov | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
 {{ dict "envAll" $envAll "application" "neutron_sriov_agent" "container" "neutron_sriov_agent" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }}
+          env:
+            - name: RPC_PROBE_TIMEOUT
+              value: "{{ .Values.pod.probes.rpc_timeout }}"
+            - name: RPC_PROBE_RETRIES
+              value: "{{ .Values.pod.probes.rpc_retries }}"
 {{ dict "envAll" $envAll "component" "sriov_agent" "container" "sriov_agent" "type" "readiness" "probeTemplate" (include "sriovAgentReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
           command:
             - /tmp/neutron-sriov-agent.sh
diff --git a/neutron/values.yaml b/neutron/values.yaml
index 3a9ecd7e1b..3a4691faa2 100644
--- a/neutron/values.yaml
+++ b/neutron/values.yaml
@@ -339,6 +339,8 @@ pod:
   use_fqdn:
     neutron_agent: true
   probes:
+    rpc_timeout: 60
+    rpc_retries: 2
     dhcp_agent:
       dhcp_agent:
         readiness:
@@ -404,6 +406,29 @@ pod:
             initialDelaySeconds: 30
             periodSeconds: 190
             timeoutSeconds: 185
+    bagpipe_bgp:
+      bagpipe_bgp:
+        readiness:
+          enabled: true
+          params:
+        liveness:
+          enabled: true
+          params:
+            initialDelaySeconds: 60
+    l2gw_agent:
+      l2gw_agent:
+        readiness:
+          enabled: true
+          params:
+            initialDelaySeconds: 30
+            periodSeconds: 15
+            timeoutSeconds: 65
+        liveness:
+          enabled: true
+          params:
+            initialDelaySeconds: 120
+            periodSeconds: 90
+            timeoutSeconds: 70
     server:
       server:
         readiness: