diff --git a/elk_metrics_6x/createElasticIndexes.yml b/elk_metrics_6x/createElasticIndexes.yml index 7e4528b7..c02a8463 100644 --- a/elk_metrics_6x/createElasticIndexes.yml +++ b/elk_metrics_6x/createElasticIndexes.yml @@ -16,6 +16,10 @@ body: "{{ item.index_options | to_json }}" status_code: 200,400 body_format: json + register: elk_indexes + until: elk_indexes is success + retries: 3 + delay: 5 with_items: - name: "osprofiler-notifications" index_options: diff --git a/elk_metrics_6x/installCurator.yml b/elk_metrics_6x/installCurator.yml index 5294bcb8..7573d02b 100644 --- a/elk_metrics_6x/installCurator.yml +++ b/elk_metrics_6x/installCurator.yml @@ -16,6 +16,7 @@ - name: Install Curator hosts: "elastic-logstash" become: true + gather_facts: false vars: haproxy_ssl: false @@ -25,6 +26,47 @@ environment: "{{ deployment_environment_variables | default({}) }}" pre_tasks: + - include_tasks: common_task_data_node_hosts.yml + tags: + - always + + - name: Query es storage + uri: + url: "http://127.0.0.1:9200/_nodes/{{ (data_nodes | map('extract', hostvars, 'ansible_host') | list) | join(',') }}/stats/fs" + method: GET + register: elk_data + until: elk_data is success + retries: 3 + delay: 5 + run_once: true + + - name: Set available storage fact + set_fact: + es_total_available_storage: "{{ ((elk_data['json']['nodes'].values() | list) | map(attribute='fs.total.total_in_bytes') | list | sum) // 1024 // 1024 }}" + + - name: Set assumed buffer storage fact + set_fact: + es_assumed_buffer_storage: "{{ ((es_total_available_storage | int) * 0.25) | round | int }}" + + - name: Set usable buffer storage fact(s) + set_fact: + es_usable_buffer_storage: "{{ (es_total_available_storage | int) - (es_assumed_buffer_storage | int) }}" + es_expected_storage: "{{ ((elastic_beat_retention_policy_hosts.values() | map('int') | list) | sum) * (elastic_beat_storage_constant | int) }}" + + - name: Set buffer storage fact + set_fact: + es_assumed_usable_storage_per_node: "{{ (es_usable_buffer_storage | int) // (data_nodes | length | int) }}" + + - name: Set storage the mulitplier + set_fact: + es_storage_multiplier: "{{ ((es_usable_buffer_storage | int) < (es_expected_storage | int)) | ternary(((elastic_beat_storage_constant | int) * 2), elastic_beat_storage_constant | int) }}" + + - name: Set retention facts + set_fact: "elastic_{{ item.key }}_retention={{ (es_assumed_usable_storage_per_node | int) // ((item.value | int) * (es_storage_multiplier | int)) }}" + when: + - hostvars[inventory_hostname]["elastic_" + item.key + "_retention"] is undefined + with_dict: "{{ elastic_beat_retention_policy_hosts }}" + - name: Ensure virtualenv is installed apt: name: "{{ item }}" diff --git a/elk_metrics_6x/site-elka.yml b/elk_metrics_6x/site-elka.yml index f2e33ede..423a9342 100644 --- a/elk_metrics_6x/site-elka.yml +++ b/elk_metrics_6x/site-elka.yml @@ -14,8 +14,8 @@ # limitations under the License. - import_playbook: installElastic.yml -- import_playbook: installCurator.yml - import_playbook: installLogstash.yml +- import_playbook: installCurator.yml - import_playbook: installKibana.yml - import_playbook: installAPMserver.yml - import_playbook: createElasticIndexes.yml diff --git a/elk_metrics_6x/templates/curator-actions.yml.j2 b/elk_metrics_6x/templates/curator-actions.yml.j2 index e8791b14..42667be5 100644 --- a/elk_metrics_6x/templates/curator-actions.yml.j2 +++ b/elk_metrics_6x/templates/curator-actions.yml.j2 @@ -13,156 +13,112 @@ # See the License for the specific language governing permissions and # limitations under the License. -actions: - 1: - action: delete_indices - description: >- - Delete indices older than 60 days (based on index name), for logstash- - prefixed indices. Ignore the error if the filter does not result in an - actionable list of indices (ignore_empty_list) and exit cleanly. - options: - ignore_empty_list: True - disable_action: False - filters: - - filtertype: pattern - kind: prefix - value: logstash- - - filtertype: age - source: name - direction: older - timestring: '%Y.%m.%d' - unit: days - unit_count: {{ elastic_logstash_retention }} - 2: - action: delete_indices - description: >- - Delete indices older than 10 days (based on index name), for apm- - prefixed indices. Ignore the error if the filter does not result in an - actionable list of indices (ignore_empty_list) and exit cleanly. - options: - ignore_empty_list: True - disable_action: False - filters: - - filtertype: pattern - kind: prefix - value: apm- - - filtertype: age - source: name - direction: older - timestring: '%Y.%m.%d' - unit: days - unit_count: {{ elastic_apm_retention }} - 3: - action: delete_indices - description: >- - Delete indices older than 15 days (based on index name), for auditbeat- - prefixed indices. Ignore the error if the filter does not result in an - actionable list of indices (ignore_empty_list) and exit cleanly. - options: - ignore_empty_list: True - disable_action: False - filters: - - filtertype: pattern - kind: prefix - value: auditbeat- - - filtertype: age - source: name - direction: older - timestring: '%Y.%m.%d' - unit: days - unit_count: {{ elastic_auditbeat_retention }} - 4: - action: delete_indices - description: >- - Delete indices older than 15 days (based on index name), for filebeat- - prefixed indices. Ignore the error if the filter does not result in an - actionable list of indices (ignore_empty_list) and exit cleanly. - options: - ignore_empty_list: True - disable_action: False - filters: - - filtertype: pattern - kind: prefix - value: filebeat- - - filtertype: age - source: name - direction: older - timestring: '%Y.%m.%d' - unit: days - unit_count: {{ elastic_filebeat_retention }} - 5: - action: delete_indices - description: >- - Delete indices older than 10 days (based on index name), for heartbeat- - prefixed indices. Ignore the error if the filter does not result in an - actionable list of indices (ignore_empty_list) and exit cleanly. - options: - ignore_empty_list: True - disable_action: False - filters: - - filtertype: pattern - kind: prefix - value: heartbeat- - - filtertype: age - source: name - direction: older - timestring: '%Y.%m.%d' - unit: days - unit_count: {{ elastic_heartbeat_retention }} - 6: - action: delete_indices - description: >- - Delete indices older than 15 days (based on index name), for journalbeat- - prefixed indices. Ignore the error if the filter does not result in an - actionable list of indices (ignore_empty_list) and exit cleanly. - options: - ignore_empty_list: True - disable_action: False - filters: - - filtertype: pattern - kind: prefix - value: journalbeat- - - filtertype: age - source: name - direction: older - timestring: '%Y.%m.%d' - unit: days - unit_count: {{ elastic_journalbeat_retention }} - 7: - action: delete_indices - description: >- - Delete indices older than 10 days (based on index name), for metricbeat- - prefixed indices. Ignore the error if the filter does not result in an - actionable list of indices (ignore_empty_list) and exit cleanly. - options: - ignore_empty_list: True - disable_action: False - filters: - - filtertype: pattern - kind: prefix - value: metricbeat- - - filtertype: age - source: name - direction: older - timestring: '%Y.%m.%d' - unit: days - unit_count: {{ elastic_metricbeat_retention }} - 8: - action: delete_indices - description: >- - Delete indices older than 5 days (based on index name), for packetbeat- - prefixed indices. Ignore the error if the filter does not result in an - actionable list of indices (ignore_empty_list) and exit cleanly. - options: - ignore_empty_list: True - disable_action: False - filters: - - filtertype: pattern - kind: prefix - value: packetbeat- - - filtertype: age - source: name - direction: older - timestring: '%Y.%m.%d' - unit: days - unit_count: {{ elastic_packetbeat_retention }} +{% set action_items = [] -%} +{# Delete index loop #} +{% for key in elastic_beat_retention_policy_hosts.keys() -%} +{% set delete_indices = {} -%} +{% set index_retention = hostvars[inventory_hostname]['elastic_' + key + '_retention'] -%} +{% set _ = delete_indices.update( + { + 'action': 'delete_indices', + 'description': 'Prune indices for ' + key + ' after ' ~ ((index_retention | int) * 2) ~ ' days.', + 'options': { + 'ignore_empty_list': true, + 'disable_action': false + } + } + ) +-%} +{# add the filter loop #} +{% set filters = [] -%} +{% set _ = filters.append( + { + 'filtertype': 'pattern', + 'kind': 'prefix', + 'value': key + '-' + } + ) +-%} +{% set _ = filters.append( + { + 'filtertype': 'age', + 'source': 'name', + 'direction': 'older', + 'timestring': '%Y.%m.%d', + 'unit': 'days', + 'unit_count': (index_retention | int) + } + ) +-%} +{% set _ = delete_indices.update({'filters': filters}) -%} +{% set _ = action_items.append(delete_indices) -%} + +{# Set shrink curator options #} +{% set shrink_indices = {} -%} +{% set _ = shrink_indices.update( + { + 'action': 'shrink', + 'description': 'Shrink ' + key + ' indices older than ' ~ (index_retention | int) // 4 ~ ' days', + 'options': { + "disable_action": false, + "ignore_empty_list": true, + "shrink_node": "DETERMINISTIC", + "node_filters": { + "permit_masters": ((master_nodes | length) < (data_nodes | length)) | ternary(true, false), + "exclude_nodes": (groups['kibana'] | map('extract', hostvars, 'ansible_host') | list) + }, + "number_of_shards": 1, + "number_of_replicas": 1, + "shrink_suffix": '-shrink', + "copy_aliases": true, + "delete_after": true, + "post_allocation": { + "allocation_type": "include", + "key": "node_tag", + "value": "cold" + }, + "wait_for_active_shards": 1, + "extra_settings": { + "settings": { + "index.codec": "best_compression" + } + }, + "wait_for_completion": true, + "wait_for_rebalance": true, + "wait_interval": 9, + "max_wait": -1 + } + } + ) +-%} +{% set filters = [] -%} +{% set _ = filters.append( + { + 'filtertype': 'pattern', + 'kind': 'prefix', + 'value': key + '-' + } + ) +-%} +{% set _ = filters.append( + { + 'filtertype': 'age', + 'source': 'creation_date', + 'direction': 'older', + 'unit': 'days', + 'unit_count': (index_retention | int) // 4 + } + ) +-%} +{% set _ = shrink_indices.update({'filters': filters}) -%} +{% set _ = action_items.append(shrink_indices) -%} +{% endfor -%} + +{% set actions = {} -%} +{% for action_item in action_items -%} +{% set _ = actions.update({loop.index: action_item}) -%} +{% endfor -%} + +{# Render all actions #} +{% set curator_actions = {'actions': actions} -%} +{{ curator_actions | to_nice_yaml(indent=2) }} diff --git a/elk_metrics_6x/templates/es-log4j2.properties.j2 b/elk_metrics_6x/templates/es-log4j2.properties.j2 index 7091d480..8a33fe3b 100644 --- a/elk_metrics_6x/templates/es-log4j2.properties.j2 +++ b/elk_metrics_6x/templates/es-log4j2.properties.j2 @@ -28,7 +28,7 @@ appender.rolling.strategy.action.basepath = ${sys:es.logs.base_path} appender.rolling.strategy.action.condition.type = IfFileName appender.rolling.strategy.action.condition.glob = ${sys:es.logs.cluster_name}-* appender.rolling.strategy.action.condition.nested_condition.type = IfLastModified -appender.rolling.strategy.action.condition.nested_condition.age = {{ elastic_logstash_retention }}D +appender.rolling.strategy.action.condition.nested_condition.age = {{ elastic_logstash_retention | default(1) }}D rootLogger.level = info diff --git a/elk_metrics_6x/templates/filebeat.yml.j2 b/elk_metrics_6x/templates/filebeat.yml.j2 index c9dd7ccb..763790e1 100644 --- a/elk_metrics_6x/templates/filebeat.yml.j2 +++ b/elk_metrics_6x/templates/filebeat.yml.j2 @@ -1011,8 +1011,11 @@ filebeat.prospectors: # Make sure not file is defined twice as this can lead to unexpected behaviour. paths: - /var/log/beats/*.log + - /openstack/log/*/beats/*.log - /var/log/curator/curator + - /openstack/log/*/curator/curator - /var/log/elasticsearch/*.log + - /openstack/log/*/elasticsearch/*.log # Optional additional fields. These fields can be freely picked # to add additional information to the crawled log files for filtering diff --git a/elk_metrics_6x/vars/variables.yml b/elk_metrics_6x/vars/variables.yml index ae51514f..75c88243 100644 --- a/elk_metrics_6x/vars/variables.yml +++ b/elk_metrics_6x/vars/variables.yml @@ -13,16 +13,39 @@ elastic_hap_port: 9201 cluster_name: openstack_elk node_name: ${HOSTNAME} -# elastic curator vars -# all retention options are in days -elastic_logstash_retention: 14 -elastic_apm_retention: 3 -elastic_auditbeat_retention: 7 -elastic_filebeat_retention: 7 -elastic_heartbeat_retention: 7 -elastic_journalbeat_retention: 14 -elastic_metricbeat_retention: 3 -elastic_packetbeat_retention: 3 +### Elastic curator variables +## Default retention policy options. All retention options are in days. +# elastic_logstash_retention: 1 +# elastic_apm_retention: 1 +# elastic_auditbeat_retention: 1 +# elastic_filebeat_retention: 1 +# elastic_heartbeat_retention: 1 +# elastic_journalbeat_retention: 1 +# elastic_metricbeat_retention: 1 +# elastic_packetbeat_retention: 1 + +# This is used to calculate the storage a beat could generate per node, per day. +# This constant is used as a multiplier. If the expected storage is larger than +# the actual available storage after the buffer is calculated the multiplier +# will be doubled there-by cutting the potential storage days in half. +elastic_beat_storage_constant: 512 + +## If any retention policy option is undefined a dynamic fact will be generated. +## Fact will be generated for the general retention using the storage constant +## per node, per index, where a given collector is expected to be deployed. The +## equation used will take the total available storage from the ES data nodes +## subtract 25% divided by the total number of data nodes. That is then divided +## by number of hosts assumed to be a beat target which is multiplied by the +## storage constant. +elastic_beat_retention_policy_hosts: + logstash: "{{ groups['elastic-logstash'] | default([null]) | length }}" + apm: "{{ groups['apm-server'] | default([null]) | length }}" + auditbeat: "{{ groups['hosts'] | default([null]) | length }}" + filebeat: "{{ groups['hosts'] | default([null]) | length }}" + heartbeat: "{{ groups['utility_all'] | default([null]) | length }}" + journalbeat: "{{ groups['all'] | default([null]) | length }}" + metricbeat: "{{ groups['all'] | default([null]) | length }}" + packetbeat: "{{ groups['hosts'] | default([null]) | length }}" # This is the URL external services can use to communicate with the # elasticsearch cluster.