Add dynamic retention policies to curator
The curator retention policies will now query the storage nodes within a given deployment and set a suitable index retention policy based on the total amount of storage each index is assumed to produce every day. To ensure we're minimizing the storage required and optimizing search performance several actions are now being taken: * Indexes will be shrunk after a quarter of their retention time. * Indexes will be deleted should they exceed the retention time. Change-Id: I8bf548620b5404d25deaadba8fda93452ef64fa0 Signed-off-by: Kevin Carter <kevin.carter@rackspace.com>
This commit is contained in:
parent
316f527243
commit
b6a9a6fc7a
@ -16,6 +16,10 @@
|
|||||||
body: "{{ item.index_options | to_json }}"
|
body: "{{ item.index_options | to_json }}"
|
||||||
status_code: 200,400
|
status_code: 200,400
|
||||||
body_format: json
|
body_format: json
|
||||||
|
register: elk_indexes
|
||||||
|
until: elk_indexes is success
|
||||||
|
retries: 3
|
||||||
|
delay: 5
|
||||||
with_items:
|
with_items:
|
||||||
- name: "osprofiler-notifications"
|
- name: "osprofiler-notifications"
|
||||||
index_options:
|
index_options:
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
- name: Install Curator
|
- name: Install Curator
|
||||||
hosts: "elastic-logstash"
|
hosts: "elastic-logstash"
|
||||||
become: true
|
become: true
|
||||||
|
gather_facts: false
|
||||||
vars:
|
vars:
|
||||||
haproxy_ssl: false
|
haproxy_ssl: false
|
||||||
|
|
||||||
@ -25,6 +26,47 @@
|
|||||||
environment: "{{ deployment_environment_variables | default({}) }}"
|
environment: "{{ deployment_environment_variables | default({}) }}"
|
||||||
|
|
||||||
pre_tasks:
|
pre_tasks:
|
||||||
|
- include_tasks: common_task_data_node_hosts.yml
|
||||||
|
tags:
|
||||||
|
- always
|
||||||
|
|
||||||
|
- name: Query es storage
|
||||||
|
uri:
|
||||||
|
url: "http://127.0.0.1:9200/_nodes/{{ (data_nodes | map('extract', hostvars, 'ansible_host') | list) | join(',') }}/stats/fs"
|
||||||
|
method: GET
|
||||||
|
register: elk_data
|
||||||
|
until: elk_data is success
|
||||||
|
retries: 3
|
||||||
|
delay: 5
|
||||||
|
run_once: true
|
||||||
|
|
||||||
|
- name: Set available storage fact
|
||||||
|
set_fact:
|
||||||
|
es_total_available_storage: "{{ ((elk_data['json']['nodes'].values() | list) | map(attribute='fs.total.total_in_bytes') | list | sum) // 1024 // 1024 }}"
|
||||||
|
|
||||||
|
- name: Set assumed buffer storage fact
|
||||||
|
set_fact:
|
||||||
|
es_assumed_buffer_storage: "{{ ((es_total_available_storage | int) * 0.25) | round | int }}"
|
||||||
|
|
||||||
|
- name: Set usable buffer storage fact(s)
|
||||||
|
set_fact:
|
||||||
|
es_usable_buffer_storage: "{{ (es_total_available_storage | int) - (es_assumed_buffer_storage | int) }}"
|
||||||
|
es_expected_storage: "{{ ((elastic_beat_retention_policy_hosts.values() | map('int') | list) | sum) * (elastic_beat_storage_constant | int) }}"
|
||||||
|
|
||||||
|
- name: Set buffer storage fact
|
||||||
|
set_fact:
|
||||||
|
es_assumed_usable_storage_per_node: "{{ (es_usable_buffer_storage | int) // (data_nodes | length | int) }}"
|
||||||
|
|
||||||
|
- name: Set storage the mulitplier
|
||||||
|
set_fact:
|
||||||
|
es_storage_multiplier: "{{ ((es_usable_buffer_storage | int) < (es_expected_storage | int)) | ternary(((elastic_beat_storage_constant | int) * 2), elastic_beat_storage_constant | int) }}"
|
||||||
|
|
||||||
|
- name: Set retention facts
|
||||||
|
set_fact: "elastic_{{ item.key }}_retention={{ (es_assumed_usable_storage_per_node | int) // ((item.value | int) * (es_storage_multiplier | int)) }}"
|
||||||
|
when:
|
||||||
|
- hostvars[inventory_hostname]["elastic_" + item.key + "_retention"] is undefined
|
||||||
|
with_dict: "{{ elastic_beat_retention_policy_hosts }}"
|
||||||
|
|
||||||
- name: Ensure virtualenv is installed
|
- name: Ensure virtualenv is installed
|
||||||
apt:
|
apt:
|
||||||
name: "{{ item }}"
|
name: "{{ item }}"
|
||||||
|
@ -14,8 +14,8 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
- import_playbook: installElastic.yml
|
- import_playbook: installElastic.yml
|
||||||
- import_playbook: installCurator.yml
|
|
||||||
- import_playbook: installLogstash.yml
|
- import_playbook: installLogstash.yml
|
||||||
|
- import_playbook: installCurator.yml
|
||||||
- import_playbook: installKibana.yml
|
- import_playbook: installKibana.yml
|
||||||
- import_playbook: installAPMserver.yml
|
- import_playbook: installAPMserver.yml
|
||||||
- import_playbook: createElasticIndexes.yml
|
- import_playbook: createElasticIndexes.yml
|
||||||
|
@ -13,156 +13,112 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
actions:
|
{% set action_items = [] -%}
|
||||||
1:
|
{# Delete index loop #}
|
||||||
action: delete_indices
|
{% for key in elastic_beat_retention_policy_hosts.keys() -%}
|
||||||
description: >-
|
{% set delete_indices = {} -%}
|
||||||
Delete indices older than 60 days (based on index name), for logstash-
|
{% set index_retention = hostvars[inventory_hostname]['elastic_' + key + '_retention'] -%}
|
||||||
prefixed indices. Ignore the error if the filter does not result in an
|
{% set _ = delete_indices.update(
|
||||||
actionable list of indices (ignore_empty_list) and exit cleanly.
|
{
|
||||||
options:
|
'action': 'delete_indices',
|
||||||
ignore_empty_list: True
|
'description': 'Prune indices for ' + key + ' after ' ~ ((index_retention | int) * 2) ~ ' days.',
|
||||||
disable_action: False
|
'options': {
|
||||||
filters:
|
'ignore_empty_list': true,
|
||||||
- filtertype: pattern
|
'disable_action': false
|
||||||
kind: prefix
|
}
|
||||||
value: logstash-
|
}
|
||||||
- filtertype: age
|
)
|
||||||
source: name
|
-%}
|
||||||
direction: older
|
{# add the filter loop #}
|
||||||
timestring: '%Y.%m.%d'
|
{% set filters = [] -%}
|
||||||
unit: days
|
{% set _ = filters.append(
|
||||||
unit_count: {{ elastic_logstash_retention }}
|
{
|
||||||
2:
|
'filtertype': 'pattern',
|
||||||
action: delete_indices
|
'kind': 'prefix',
|
||||||
description: >-
|
'value': key + '-'
|
||||||
Delete indices older than 10 days (based on index name), for apm-
|
}
|
||||||
prefixed indices. Ignore the error if the filter does not result in an
|
)
|
||||||
actionable list of indices (ignore_empty_list) and exit cleanly.
|
-%}
|
||||||
options:
|
{% set _ = filters.append(
|
||||||
ignore_empty_list: True
|
{
|
||||||
disable_action: False
|
'filtertype': 'age',
|
||||||
filters:
|
'source': 'name',
|
||||||
- filtertype: pattern
|
'direction': 'older',
|
||||||
kind: prefix
|
'timestring': '%Y.%m.%d',
|
||||||
value: apm-
|
'unit': 'days',
|
||||||
- filtertype: age
|
'unit_count': (index_retention | int)
|
||||||
source: name
|
}
|
||||||
direction: older
|
)
|
||||||
timestring: '%Y.%m.%d'
|
-%}
|
||||||
unit: days
|
{% set _ = delete_indices.update({'filters': filters}) -%}
|
||||||
unit_count: {{ elastic_apm_retention }}
|
{% set _ = action_items.append(delete_indices) -%}
|
||||||
3:
|
|
||||||
action: delete_indices
|
{# Set shrink curator options #}
|
||||||
description: >-
|
{% set shrink_indices = {} -%}
|
||||||
Delete indices older than 15 days (based on index name), for auditbeat-
|
{% set _ = shrink_indices.update(
|
||||||
prefixed indices. Ignore the error if the filter does not result in an
|
{
|
||||||
actionable list of indices (ignore_empty_list) and exit cleanly.
|
'action': 'shrink',
|
||||||
options:
|
'description': 'Shrink ' + key + ' indices older than ' ~ (index_retention | int) // 4 ~ ' days',
|
||||||
ignore_empty_list: True
|
'options': {
|
||||||
disable_action: False
|
"disable_action": false,
|
||||||
filters:
|
"ignore_empty_list": true,
|
||||||
- filtertype: pattern
|
"shrink_node": "DETERMINISTIC",
|
||||||
kind: prefix
|
"node_filters": {
|
||||||
value: auditbeat-
|
"permit_masters": ((master_nodes | length) < (data_nodes | length)) | ternary(true, false),
|
||||||
- filtertype: age
|
"exclude_nodes": (groups['kibana'] | map('extract', hostvars, 'ansible_host') | list)
|
||||||
source: name
|
},
|
||||||
direction: older
|
"number_of_shards": 1,
|
||||||
timestring: '%Y.%m.%d'
|
"number_of_replicas": 1,
|
||||||
unit: days
|
"shrink_suffix": '-shrink',
|
||||||
unit_count: {{ elastic_auditbeat_retention }}
|
"copy_aliases": true,
|
||||||
4:
|
"delete_after": true,
|
||||||
action: delete_indices
|
"post_allocation": {
|
||||||
description: >-
|
"allocation_type": "include",
|
||||||
Delete indices older than 15 days (based on index name), for filebeat-
|
"key": "node_tag",
|
||||||
prefixed indices. Ignore the error if the filter does not result in an
|
"value": "cold"
|
||||||
actionable list of indices (ignore_empty_list) and exit cleanly.
|
},
|
||||||
options:
|
"wait_for_active_shards": 1,
|
||||||
ignore_empty_list: True
|
"extra_settings": {
|
||||||
disable_action: False
|
"settings": {
|
||||||
filters:
|
"index.codec": "best_compression"
|
||||||
- filtertype: pattern
|
}
|
||||||
kind: prefix
|
},
|
||||||
value: filebeat-
|
"wait_for_completion": true,
|
||||||
- filtertype: age
|
"wait_for_rebalance": true,
|
||||||
source: name
|
"wait_interval": 9,
|
||||||
direction: older
|
"max_wait": -1
|
||||||
timestring: '%Y.%m.%d'
|
}
|
||||||
unit: days
|
}
|
||||||
unit_count: {{ elastic_filebeat_retention }}
|
)
|
||||||
5:
|
-%}
|
||||||
action: delete_indices
|
{% set filters = [] -%}
|
||||||
description: >-
|
{% set _ = filters.append(
|
||||||
Delete indices older than 10 days (based on index name), for heartbeat-
|
{
|
||||||
prefixed indices. Ignore the error if the filter does not result in an
|
'filtertype': 'pattern',
|
||||||
actionable list of indices (ignore_empty_list) and exit cleanly.
|
'kind': 'prefix',
|
||||||
options:
|
'value': key + '-'
|
||||||
ignore_empty_list: True
|
}
|
||||||
disable_action: False
|
)
|
||||||
filters:
|
-%}
|
||||||
- filtertype: pattern
|
{% set _ = filters.append(
|
||||||
kind: prefix
|
{
|
||||||
value: heartbeat-
|
'filtertype': 'age',
|
||||||
- filtertype: age
|
'source': 'creation_date',
|
||||||
source: name
|
'direction': 'older',
|
||||||
direction: older
|
'unit': 'days',
|
||||||
timestring: '%Y.%m.%d'
|
'unit_count': (index_retention | int) // 4
|
||||||
unit: days
|
}
|
||||||
unit_count: {{ elastic_heartbeat_retention }}
|
)
|
||||||
6:
|
-%}
|
||||||
action: delete_indices
|
{% set _ = shrink_indices.update({'filters': filters}) -%}
|
||||||
description: >-
|
{% set _ = action_items.append(shrink_indices) -%}
|
||||||
Delete indices older than 15 days (based on index name), for journalbeat-
|
{% endfor -%}
|
||||||
prefixed indices. Ignore the error if the filter does not result in an
|
|
||||||
actionable list of indices (ignore_empty_list) and exit cleanly.
|
{% set actions = {} -%}
|
||||||
options:
|
{% for action_item in action_items -%}
|
||||||
ignore_empty_list: True
|
{% set _ = actions.update({loop.index: action_item}) -%}
|
||||||
disable_action: False
|
{% endfor -%}
|
||||||
filters:
|
|
||||||
- filtertype: pattern
|
{# Render all actions #}
|
||||||
kind: prefix
|
{% set curator_actions = {'actions': actions} -%}
|
||||||
value: journalbeat-
|
{{ curator_actions | to_nice_yaml(indent=2) }}
|
||||||
- filtertype: age
|
|
||||||
source: name
|
|
||||||
direction: older
|
|
||||||
timestring: '%Y.%m.%d'
|
|
||||||
unit: days
|
|
||||||
unit_count: {{ elastic_journalbeat_retention }}
|
|
||||||
7:
|
|
||||||
action: delete_indices
|
|
||||||
description: >-
|
|
||||||
Delete indices older than 10 days (based on index name), for metricbeat-
|
|
||||||
prefixed indices. Ignore the error if the filter does not result in an
|
|
||||||
actionable list of indices (ignore_empty_list) and exit cleanly.
|
|
||||||
options:
|
|
||||||
ignore_empty_list: True
|
|
||||||
disable_action: False
|
|
||||||
filters:
|
|
||||||
- filtertype: pattern
|
|
||||||
kind: prefix
|
|
||||||
value: metricbeat-
|
|
||||||
- filtertype: age
|
|
||||||
source: name
|
|
||||||
direction: older
|
|
||||||
timestring: '%Y.%m.%d'
|
|
||||||
unit: days
|
|
||||||
unit_count: {{ elastic_metricbeat_retention }}
|
|
||||||
8:
|
|
||||||
action: delete_indices
|
|
||||||
description: >-
|
|
||||||
Delete indices older than 5 days (based on index name), for packetbeat-
|
|
||||||
prefixed indices. Ignore the error if the filter does not result in an
|
|
||||||
actionable list of indices (ignore_empty_list) and exit cleanly.
|
|
||||||
options:
|
|
||||||
ignore_empty_list: True
|
|
||||||
disable_action: False
|
|
||||||
filters:
|
|
||||||
- filtertype: pattern
|
|
||||||
kind: prefix
|
|
||||||
value: packetbeat-
|
|
||||||
- filtertype: age
|
|
||||||
source: name
|
|
||||||
direction: older
|
|
||||||
timestring: '%Y.%m.%d'
|
|
||||||
unit: days
|
|
||||||
unit_count: {{ elastic_packetbeat_retention }}
|
|
||||||
|
@ -28,7 +28,7 @@ appender.rolling.strategy.action.basepath = ${sys:es.logs.base_path}
|
|||||||
appender.rolling.strategy.action.condition.type = IfFileName
|
appender.rolling.strategy.action.condition.type = IfFileName
|
||||||
appender.rolling.strategy.action.condition.glob = ${sys:es.logs.cluster_name}-*
|
appender.rolling.strategy.action.condition.glob = ${sys:es.logs.cluster_name}-*
|
||||||
appender.rolling.strategy.action.condition.nested_condition.type = IfLastModified
|
appender.rolling.strategy.action.condition.nested_condition.type = IfLastModified
|
||||||
appender.rolling.strategy.action.condition.nested_condition.age = {{ elastic_logstash_retention }}D
|
appender.rolling.strategy.action.condition.nested_condition.age = {{ elastic_logstash_retention | default(1) }}D
|
||||||
|
|
||||||
|
|
||||||
rootLogger.level = info
|
rootLogger.level = info
|
||||||
|
@ -1011,8 +1011,11 @@ filebeat.prospectors:
|
|||||||
# Make sure not file is defined twice as this can lead to unexpected behaviour.
|
# Make sure not file is defined twice as this can lead to unexpected behaviour.
|
||||||
paths:
|
paths:
|
||||||
- /var/log/beats/*.log
|
- /var/log/beats/*.log
|
||||||
|
- /openstack/log/*/beats/*.log
|
||||||
- /var/log/curator/curator
|
- /var/log/curator/curator
|
||||||
|
- /openstack/log/*/curator/curator
|
||||||
- /var/log/elasticsearch/*.log
|
- /var/log/elasticsearch/*.log
|
||||||
|
- /openstack/log/*/elasticsearch/*.log
|
||||||
|
|
||||||
# Optional additional fields. These fields can be freely picked
|
# Optional additional fields. These fields can be freely picked
|
||||||
# to add additional information to the crawled log files for filtering
|
# to add additional information to the crawled log files for filtering
|
||||||
|
@ -13,16 +13,39 @@ elastic_hap_port: 9201
|
|||||||
cluster_name: openstack_elk
|
cluster_name: openstack_elk
|
||||||
node_name: ${HOSTNAME}
|
node_name: ${HOSTNAME}
|
||||||
|
|
||||||
# elastic curator vars
|
### Elastic curator variables
|
||||||
# all retention options are in days
|
## Default retention policy options. All retention options are in days.
|
||||||
elastic_logstash_retention: 14
|
# elastic_logstash_retention: 1
|
||||||
elastic_apm_retention: 3
|
# elastic_apm_retention: 1
|
||||||
elastic_auditbeat_retention: 7
|
# elastic_auditbeat_retention: 1
|
||||||
elastic_filebeat_retention: 7
|
# elastic_filebeat_retention: 1
|
||||||
elastic_heartbeat_retention: 7
|
# elastic_heartbeat_retention: 1
|
||||||
elastic_journalbeat_retention: 14
|
# elastic_journalbeat_retention: 1
|
||||||
elastic_metricbeat_retention: 3
|
# elastic_metricbeat_retention: 1
|
||||||
elastic_packetbeat_retention: 3
|
# elastic_packetbeat_retention: 1
|
||||||
|
|
||||||
|
# This is used to calculate the storage a beat could generate per node, per day.
|
||||||
|
# This constant is used as a multiplier. If the expected storage is larger than
|
||||||
|
# the actual available storage after the buffer is calculated the multiplier
|
||||||
|
# will be doubled there-by cutting the potential storage days in half.
|
||||||
|
elastic_beat_storage_constant: 512
|
||||||
|
|
||||||
|
## If any retention policy option is undefined a dynamic fact will be generated.
|
||||||
|
## Fact will be generated for the general retention using the storage constant
|
||||||
|
## per node, per index, where a given collector is expected to be deployed. The
|
||||||
|
## equation used will take the total available storage from the ES data nodes
|
||||||
|
## subtract 25% divided by the total number of data nodes. That is then divided
|
||||||
|
## by number of hosts assumed to be a beat target which is multiplied by the
|
||||||
|
## storage constant.
|
||||||
|
elastic_beat_retention_policy_hosts:
|
||||||
|
logstash: "{{ groups['elastic-logstash'] | default([null]) | length }}"
|
||||||
|
apm: "{{ groups['apm-server'] | default([null]) | length }}"
|
||||||
|
auditbeat: "{{ groups['hosts'] | default([null]) | length }}"
|
||||||
|
filebeat: "{{ groups['hosts'] | default([null]) | length }}"
|
||||||
|
heartbeat: "{{ groups['utility_all'] | default([null]) | length }}"
|
||||||
|
journalbeat: "{{ groups['all'] | default([null]) | length }}"
|
||||||
|
metricbeat: "{{ groups['all'] | default([null]) | length }}"
|
||||||
|
packetbeat: "{{ groups['hosts'] | default([null]) | length }}"
|
||||||
|
|
||||||
# This is the URL external services can use to communicate with the
|
# This is the URL external services can use to communicate with the
|
||||||
# elasticsearch cluster.
|
# elasticsearch cluster.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user