Merge "Add dynamic retention policies to curator"

This commit is contained in:
Zuul 2018-07-15 04:18:41 +00:00 committed by Gerrit Code Review
commit 93463d6efc
7 changed files with 193 additions and 165 deletions

View File

@ -16,6 +16,10 @@
body: "{{ item.index_options | to_json }}"
status_code: 200,400
body_format: json
register: elk_indexes
until: elk_indexes is success
retries: 3
delay: 5
with_items:
- name: "osprofiler-notifications"
index_options:

View File

@ -16,6 +16,7 @@
- name: Install Curator
hosts: "elastic-logstash"
become: true
gather_facts: false
vars:
haproxy_ssl: false
@ -25,6 +26,47 @@
environment: "{{ deployment_environment_variables | default({}) }}"
pre_tasks:
- include_tasks: common_task_data_node_hosts.yml
tags:
- always
- name: Query es storage
uri:
url: "http://127.0.0.1:9200/_nodes/{{ (data_nodes | map('extract', hostvars, 'ansible_host') | list) | join(',') }}/stats/fs"
method: GET
register: elk_data
until: elk_data is success
retries: 3
delay: 5
run_once: true
- name: Set available storage fact
set_fact:
es_total_available_storage: "{{ ((elk_data['json']['nodes'].values() | list) | map(attribute='fs.total.total_in_bytes') | list | sum) // 1024 // 1024 }}"
- name: Set assumed buffer storage fact
set_fact:
es_assumed_buffer_storage: "{{ ((es_total_available_storage | int) * 0.25) | round | int }}"
- name: Set usable buffer storage fact(s)
set_fact:
es_usable_buffer_storage: "{{ (es_total_available_storage | int) - (es_assumed_buffer_storage | int) }}"
es_expected_storage: "{{ ((elastic_beat_retention_policy_hosts.values() | map('int') | list) | sum) * (elastic_beat_storage_constant | int) }}"
- name: Set buffer storage fact
set_fact:
es_assumed_usable_storage_per_node: "{{ (es_usable_buffer_storage | int) // (data_nodes | length | int) }}"
- name: Set storage the mulitplier
set_fact:
es_storage_multiplier: "{{ ((es_usable_buffer_storage | int) < (es_expected_storage | int)) | ternary(((elastic_beat_storage_constant | int) * 2), elastic_beat_storage_constant | int) }}"
- name: Set retention facts
set_fact: "elastic_{{ item.key }}_retention={{ (es_assumed_usable_storage_per_node | int) // ((item.value | int) * (es_storage_multiplier | int)) }}"
when:
- hostvars[inventory_hostname]["elastic_" + item.key + "_retention"] is undefined
with_dict: "{{ elastic_beat_retention_policy_hosts }}"
- name: Ensure virtualenv is installed
apt:
name: "{{ item }}"

View File

@ -14,8 +14,8 @@
# limitations under the License.
- import_playbook: installElastic.yml
- import_playbook: installCurator.yml
- import_playbook: installLogstash.yml
- import_playbook: installCurator.yml
- import_playbook: installKibana.yml
- import_playbook: installAPMserver.yml
- import_playbook: createElasticIndexes.yml

View File

@ -13,156 +13,112 @@
# See the License for the specific language governing permissions and
# limitations under the License.
actions:
1:
action: delete_indices
description: >-
Delete indices older than 60 days (based on index name), for logstash-
prefixed indices. Ignore the error if the filter does not result in an
actionable list of indices (ignore_empty_list) and exit cleanly.
options:
ignore_empty_list: True
disable_action: False
filters:
- filtertype: pattern
kind: prefix
value: logstash-
- filtertype: age
source: name
direction: older
timestring: '%Y.%m.%d'
unit: days
unit_count: {{ elastic_logstash_retention }}
2:
action: delete_indices
description: >-
Delete indices older than 10 days (based on index name), for apm-
prefixed indices. Ignore the error if the filter does not result in an
actionable list of indices (ignore_empty_list) and exit cleanly.
options:
ignore_empty_list: True
disable_action: False
filters:
- filtertype: pattern
kind: prefix
value: apm-
- filtertype: age
source: name
direction: older
timestring: '%Y.%m.%d'
unit: days
unit_count: {{ elastic_apm_retention }}
3:
action: delete_indices
description: >-
Delete indices older than 15 days (based on index name), for auditbeat-
prefixed indices. Ignore the error if the filter does not result in an
actionable list of indices (ignore_empty_list) and exit cleanly.
options:
ignore_empty_list: True
disable_action: False
filters:
- filtertype: pattern
kind: prefix
value: auditbeat-
- filtertype: age
source: name
direction: older
timestring: '%Y.%m.%d'
unit: days
unit_count: {{ elastic_auditbeat_retention }}
4:
action: delete_indices
description: >-
Delete indices older than 15 days (based on index name), for filebeat-
prefixed indices. Ignore the error if the filter does not result in an
actionable list of indices (ignore_empty_list) and exit cleanly.
options:
ignore_empty_list: True
disable_action: False
filters:
- filtertype: pattern
kind: prefix
value: filebeat-
- filtertype: age
source: name
direction: older
timestring: '%Y.%m.%d'
unit: days
unit_count: {{ elastic_filebeat_retention }}
5:
action: delete_indices
description: >-
Delete indices older than 10 days (based on index name), for heartbeat-
prefixed indices. Ignore the error if the filter does not result in an
actionable list of indices (ignore_empty_list) and exit cleanly.
options:
ignore_empty_list: True
disable_action: False
filters:
- filtertype: pattern
kind: prefix
value: heartbeat-
- filtertype: age
source: name
direction: older
timestring: '%Y.%m.%d'
unit: days
unit_count: {{ elastic_heartbeat_retention }}
6:
action: delete_indices
description: >-
Delete indices older than 15 days (based on index name), for journalbeat-
prefixed indices. Ignore the error if the filter does not result in an
actionable list of indices (ignore_empty_list) and exit cleanly.
options:
ignore_empty_list: True
disable_action: False
filters:
- filtertype: pattern
kind: prefix
value: journalbeat-
- filtertype: age
source: name
direction: older
timestring: '%Y.%m.%d'
unit: days
unit_count: {{ elastic_journalbeat_retention }}
7:
action: delete_indices
description: >-
Delete indices older than 10 days (based on index name), for metricbeat-
prefixed indices. Ignore the error if the filter does not result in an
actionable list of indices (ignore_empty_list) and exit cleanly.
options:
ignore_empty_list: True
disable_action: False
filters:
- filtertype: pattern
kind: prefix
value: metricbeat-
- filtertype: age
source: name
direction: older
timestring: '%Y.%m.%d'
unit: days
unit_count: {{ elastic_metricbeat_retention }}
8:
action: delete_indices
description: >-
Delete indices older than 5 days (based on index name), for packetbeat-
prefixed indices. Ignore the error if the filter does not result in an
actionable list of indices (ignore_empty_list) and exit cleanly.
options:
ignore_empty_list: True
disable_action: False
filters:
- filtertype: pattern
kind: prefix
value: packetbeat-
- filtertype: age
source: name
direction: older
timestring: '%Y.%m.%d'
unit: days
unit_count: {{ elastic_packetbeat_retention }}
{% set action_items = [] -%}
{# Delete index loop #}
{% for key in elastic_beat_retention_policy_hosts.keys() -%}
{% set delete_indices = {} -%}
{% set index_retention = hostvars[inventory_hostname]['elastic_' + key + '_retention'] -%}
{% set _ = delete_indices.update(
{
'action': 'delete_indices',
'description': 'Prune indices for ' + key + ' after ' ~ ((index_retention | int) * 2) ~ ' days.',
'options': {
'ignore_empty_list': true,
'disable_action': false
}
}
)
-%}
{# add the filter loop #}
{% set filters = [] -%}
{% set _ = filters.append(
{
'filtertype': 'pattern',
'kind': 'prefix',
'value': key + '-'
}
)
-%}
{% set _ = filters.append(
{
'filtertype': 'age',
'source': 'name',
'direction': 'older',
'timestring': '%Y.%m.%d',
'unit': 'days',
'unit_count': (index_retention | int)
}
)
-%}
{% set _ = delete_indices.update({'filters': filters}) -%}
{% set _ = action_items.append(delete_indices) -%}
{# Set shrink curator options #}
{% set shrink_indices = {} -%}
{% set _ = shrink_indices.update(
{
'action': 'shrink',
'description': 'Shrink ' + key + ' indices older than ' ~ (index_retention | int) // 4 ~ ' days',
'options': {
"disable_action": false,
"ignore_empty_list": true,
"shrink_node": "DETERMINISTIC",
"node_filters": {
"permit_masters": ((master_nodes | length) < (data_nodes | length)) | ternary(true, false),
"exclude_nodes": (groups['kibana'] | map('extract', hostvars, 'ansible_host') | list)
},
"number_of_shards": 1,
"number_of_replicas": 1,
"shrink_suffix": '-shrink',
"copy_aliases": true,
"delete_after": true,
"post_allocation": {
"allocation_type": "include",
"key": "node_tag",
"value": "cold"
},
"wait_for_active_shards": 1,
"extra_settings": {
"settings": {
"index.codec": "best_compression"
}
},
"wait_for_completion": true,
"wait_for_rebalance": true,
"wait_interval": 9,
"max_wait": -1
}
}
)
-%}
{% set filters = [] -%}
{% set _ = filters.append(
{
'filtertype': 'pattern',
'kind': 'prefix',
'value': key + '-'
}
)
-%}
{% set _ = filters.append(
{
'filtertype': 'age',
'source': 'creation_date',
'direction': 'older',
'unit': 'days',
'unit_count': (index_retention | int) // 4
}
)
-%}
{% set _ = shrink_indices.update({'filters': filters}) -%}
{% set _ = action_items.append(shrink_indices) -%}
{% endfor -%}
{% set actions = {} -%}
{% for action_item in action_items -%}
{% set _ = actions.update({loop.index: action_item}) -%}
{% endfor -%}
{# Render all actions #}
{% set curator_actions = {'actions': actions} -%}
{{ curator_actions | to_nice_yaml(indent=2) }}

View File

@ -28,7 +28,7 @@ appender.rolling.strategy.action.basepath = ${sys:es.logs.base_path}
appender.rolling.strategy.action.condition.type = IfFileName
appender.rolling.strategy.action.condition.glob = ${sys:es.logs.cluster_name}-*
appender.rolling.strategy.action.condition.nested_condition.type = IfLastModified
appender.rolling.strategy.action.condition.nested_condition.age = {{ elastic_logstash_retention }}D
appender.rolling.strategy.action.condition.nested_condition.age = {{ elastic_logstash_retention | default(1) }}D
rootLogger.level = info

View File

@ -1011,8 +1011,11 @@ filebeat.prospectors:
# Make sure not file is defined twice as this can lead to unexpected behaviour.
paths:
- /var/log/beats/*.log
- /openstack/log/*/beats/*.log
- /var/log/curator/curator
- /openstack/log/*/curator/curator
- /var/log/elasticsearch/*.log
- /openstack/log/*/elasticsearch/*.log
# Optional additional fields. These fields can be freely picked
# to add additional information to the crawled log files for filtering

View File

@ -13,16 +13,39 @@ elastic_hap_port: 9201
cluster_name: openstack_elk
node_name: ${HOSTNAME}
# elastic curator vars
# all retention options are in days
elastic_logstash_retention: 14
elastic_apm_retention: 3
elastic_auditbeat_retention: 7
elastic_filebeat_retention: 7
elastic_heartbeat_retention: 7
elastic_journalbeat_retention: 14
elastic_metricbeat_retention: 3
elastic_packetbeat_retention: 3
### Elastic curator variables
## Default retention policy options. All retention options are in days.
# elastic_logstash_retention: 1
# elastic_apm_retention: 1
# elastic_auditbeat_retention: 1
# elastic_filebeat_retention: 1
# elastic_heartbeat_retention: 1
# elastic_journalbeat_retention: 1
# elastic_metricbeat_retention: 1
# elastic_packetbeat_retention: 1
# This is used to calculate the storage a beat could generate per node, per day.
# This constant is used as a multiplier. If the expected storage is larger than
# the actual available storage after the buffer is calculated the multiplier
# will be doubled there-by cutting the potential storage days in half.
elastic_beat_storage_constant: 512
## If any retention policy option is undefined a dynamic fact will be generated.
## Fact will be generated for the general retention using the storage constant
## per node, per index, where a given collector is expected to be deployed. The
## equation used will take the total available storage from the ES data nodes
## subtract 25% divided by the total number of data nodes. That is then divided
## by number of hosts assumed to be a beat target which is multiplied by the
## storage constant.
elastic_beat_retention_policy_hosts:
logstash: "{{ groups['elastic-logstash'] | default([null]) | length }}"
apm: "{{ groups['apm-server'] | default([null]) | length }}"
auditbeat: "{{ groups['hosts'] | default([null]) | length }}"
filebeat: "{{ groups['hosts'] | default([null]) | length }}"
heartbeat: "{{ groups['utility_all'] | default([null]) | length }}"
journalbeat: "{{ groups['all'] | default([null]) | length }}"
metricbeat: "{{ groups['all'] | default([null]) | length }}"
packetbeat: "{{ groups['hosts'] | default([null]) | length }}"
# This is the URL external services can use to communicate with the
# elasticsearch cluster.