From a2273026d740c1eef5105bcf4e250bd401054d26 Mon Sep 17 00:00:00 2001 From: Doug Szumski Date: Mon, 29 Apr 2024 16:13:43 +0100 Subject: [PATCH] Define retry_tag for unprocessed Fluentd logs The Kolla Ansible Fluentd config is not idempotent. In practise this can be an issue if some logs are rejected by OpenSearch / ElasticSearch bulk API. In this case, by default, the unprocessed logs will be reprocessed from the start of the Fluentd pipeline, leading to error messages. The solution proposed here is to explicitly set the retry_tag as documented in [1,2], and add a dedicated output for retried logs. An alternative fix could be to make the pipeline idempotent. This would require a compromise of either duplicating content, or having non-standard fields for the log payload/message. [1] https://github.com/fluent/fluent-plugin-opensearch?tab=readme-ov-file#retry_tag [2] https://github.com/uken/fluent-plugin-elasticsearch?tab=readme-ov-file#retry_tag Closes-Bug: #2064104 Change-Id: I310fc1b8e002ce9f0ba60f8bb67b7f372a589314 --- .../common/templates/conf/output/00-local.conf.j2 | 2 ++ .../roles/common/templates/conf/output/01-es.conf.j2 | 11 ++++++++++- .../templates/conf/output/03-opensearch.conf.j2 | 11 ++++++++++- .../bugfix-fluentd-retry-tag-383dd788a42fddd6.yaml | 7 +++++++ 4 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/bugfix-fluentd-retry-tag-383dd788a42fddd6.yaml diff --git a/ansible/roles/common/templates/conf/output/00-local.conf.j2 b/ansible/roles/common/templates/conf/output/00-local.conf.j2 index 62c7965bfa..2c82f539fc 100644 --- a/ansible/roles/common/templates/conf/output/00-local.conf.j2 +++ b/ansible/roles/common/templates/conf/output/00-local.conf.j2 @@ -37,6 +37,7 @@ logstash_format true logstash_prefix {{ opensearch_log_index_prefix }} reconnect_on_error true + retry_tag retry_es request_timeout {{ fluentd_elasticsearch_request_timeout }} suppress_type_name true bulk_message_request_threshold {{ fluentd_bulk_message_request_threshold }} @@ -70,6 +71,7 @@ logstash_format true logstash_prefix {{ opensearch_log_index_prefix }} reconnect_on_error true + retry_tag retry_os request_timeout {{ fluentd_opensearch_request_timeout }} suppress_type_name true bulk_message_request_threshold {{ fluentd_bulk_message_request_threshold }} diff --git a/ansible/roles/common/templates/conf/output/01-es.conf.j2 b/ansible/roles/common/templates/conf/output/01-es.conf.j2 index 91d011391b..6c4831ef75 100644 --- a/ansible/roles/common/templates/conf/output/01-es.conf.j2 +++ b/ansible/roles/common/templates/conf/output/01-es.conf.j2 @@ -1,4 +1,5 @@ - +{% for match_pattern in ['retry_es', '**',] %} + @type copy @type elasticsearch @@ -22,14 +23,22 @@ logstash_format true logstash_prefix {{ opensearch_log_index_prefix }} reconnect_on_error true +{% if match_pattern != 'retry_es' %} + retry_tag retry_es +{% endif %} request_timeout {{ fluentd_elasticsearch_request_timeout }} suppress_type_name true bulk_message_request_threshold {{ fluentd_bulk_message_request_threshold }} @type file +{% if match_pattern == 'retry_es' %} + path /var/lib/fluentd/data/elasticsearch.buffer/openstack_retry.* +{% else %} path /var/lib/fluentd/data/elasticsearch.buffer/openstack.* +{% endif %} flush_interval 15s chunk_limit_size {{ fluentd_buffer_chunk_limit_size }} +{% endfor %} diff --git a/ansible/roles/common/templates/conf/output/03-opensearch.conf.j2 b/ansible/roles/common/templates/conf/output/03-opensearch.conf.j2 index 6f4beb2d64..dd60f1e1cc 100644 --- a/ansible/roles/common/templates/conf/output/03-opensearch.conf.j2 +++ b/ansible/roles/common/templates/conf/output/03-opensearch.conf.j2 @@ -1,4 +1,5 @@ - +{% for match_pattern in ['retry_os', '**',] %} + @type copy @type opensearch @@ -22,14 +23,22 @@ logstash_format true logstash_prefix {{ opensearch_log_index_prefix }} reconnect_on_error true +{% if match_pattern != 'retry_os' %} + retry_tag retry_os +{% endif %} request_timeout {{ fluentd_opensearch_request_timeout }} suppress_type_name true bulk_message_request_threshold {{ fluentd_bulk_message_request_threshold }} @type file +{% if match_pattern == 'retry_os' %} + path /var/lib/fluentd/data/opensearch.buffer/openstack_retry.* +{% else %} path /var/lib/fluentd/data/opensearch.buffer/openstack.* +{% endif %} flush_interval 15s chunk_limit_size {{ fluentd_buffer_chunk_limit_size }} +{% endfor %} diff --git a/releasenotes/notes/bugfix-fluentd-retry-tag-383dd788a42fddd6.yaml b/releasenotes/notes/bugfix-fluentd-retry-tag-383dd788a42fddd6.yaml new file mode 100644 index 0000000000..0a5370e4ba --- /dev/null +++ b/releasenotes/notes/bugfix-fluentd-retry-tag-383dd788a42fddd6.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + Set retry_tag in ElasticSearch/OpenSearch Fluentd output plugins. This + is to prevent log messages from being re-processed by non-idempotent + Fluentd pipeline configuration. See + `LP#2064104 `__