diff --git a/cluster_metrics/playbook-kapacitor.yml b/cluster_metrics/playbook-kapacitor.yml new file mode 100644 index 00000000..7646ec17 --- /dev/null +++ b/cluster_metrics/playbook-kapacitor.yml @@ -0,0 +1,41 @@ +--- +# Copyright 2016, Rackspace US, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- name: Deploy kapacitor + hosts: "cluster-metrics" + gather_facts: true + user: root + tasks: + - name: Add kapacitor repo + apt_repository: + repo: "deb https://repos.influxdata.com/{{ ansible_distribution | lower }} {{ ansible_distribution_release }} stable" + state: "present" + - name: Install kapacitor + apt: + pkg: "kapacitor" + state: "latest" + - name: Drop kapacitor config file + template: + src: templates/kapacitor.conf.j2 + dest: /etc/kapacitor/kapacitor.conf + - name: Enable and restart kapacitor + service: + name: "kapacitor" + enabled: true + state: restarted + - name: Start kapacitor server + shell: kapacitord -config /etc/kapacitor/kapacitor.conf -log-file /var/log/kapacitor/kapacitor.log & + vars_files: + - vars.yml diff --git a/cluster_metrics/readme.rst b/cluster_metrics/readme.rst index 7a66663d..da950838 100644 --- a/cluster_metrics/readme.rst +++ b/cluster_metrics/readme.rst @@ -8,7 +8,8 @@ Gather and visualize cluster wide metrics About this repository --------------------- -This set of playbooks will deploy InfluxDB, Telegraf, and Grafana for the purpose of collecting metrics on an OpenStack cluster. +This set of playbooks will deploy InfluxDB, Telegraf, Grafana and Kapacitor for the purpose of collecting +metrics on an OpenStack cluster. Process ------- @@ -66,3 +67,9 @@ If you're proxy'ing grafana you will need to provide the full ``root_path`` when openstack-ansible playbook-grafana.yml -e galera_root_user=root -e galera_address='127.0.0.1' Once that last playbook is completed you will have a functioning InfluxDB, Telegraf, and Grafana metric collection system active and collecting metrics. Grafana will need some setup, however functional dash boards have been provided in the ``grafana-dashboards`` directory. + +Install Kapacitor + +.. code-block:: bash + + openstack-ansible playbook-kapacitor.yml diff --git a/cluster_metrics/templates/kapacitor.conf.j2 b/cluster_metrics/templates/kapacitor.conf.j2 new file mode 100644 index 00000000..6b0f85e4 --- /dev/null +++ b/cluster_metrics/templates/kapacitor.conf.j2 @@ -0,0 +1,172 @@ +#jinja2:variable_start_string:'[%' , variable_end_string:'%]', trim_blocks: False +hostname = "localhost" +data_dir = "/var/lib/kapacitor" + +[http] + bind-address = ":[% kapacitor_port %]" + auth-enabled = false + log-enabled = true + write-tracing = false + pprof-enabled = false + https-enabled = false + https-certificate = "/etc/ssl/kapacitor.pem" + shutdown-timeout = "10s" + shared-secret = "" + +[replay] + dir = "/var/lib/kapacitor/replay" + +[storage] + boltdb = "/var/lib/kapacitor/kapacitor.db" + +[task] + dir = "/var/lib/kapacitor/tasks" + snapshot-interval = "1m0s" + +[[influxdb]] + enabled = true + name = "[% influxdb_db_name %]" + default = true + urls = ["http://[% hostvars[groups['cluster-metrics'][0]]['ansible_ssh_host'] %]:[% influxdb_port %]"] + username = "[% influxdb_db_root_name %]" + password = "[% influxdb_db_root_password %]" + ssl-ca = "" + ssl-cert = "" + ssl-key = "" + insecure-skip-verify = false + timeout = "0" + disable-subscriptions = false + subscription-protocol = "http" + udp-bind = "" + udp-buffer = 1000 + udp-read-buffer = 0 + startup-timeout = "5m0s" + subscriptions-sync-interval = "1m0s" + [influxdb.subscriptions] + [influxdb.excluded-subscriptions] + _kapacitor = ["autogen"] + +[logging] + file = "/var/log/kapacitor/kapacitor.log" + level = "INFO" + +[collectd] + enabled = false + bind-address = ":25826" + database = "collectd" + retention-policy = "" + batch-size = 1000 + batch-pending = 5 + batch-timeout = "10s" + read-buffer = 0 + typesdb = "/usr/share/collectd/types.db" + +[opentsdb] + enabled = false + bind-address = ":4242" + database = "opentsdb" + retention-policy = "" + consistency-level = "one" + tls-enabled = false + certificate = "/etc/ssl/influxdb.pem" + batch-size = 1000 + batch-pending = 5 + batch-timeout = "1s" + log-point-errors = true + +[smtp] + enabled = false + host = "localhost" + port = 25 + username = "" + password = "" + no-verify = false + global = false + state-changes-only = false + from = "" + idle-timeout = "30s" + +[opsgenie] + enabled = false + api-key = "" + url = "https://api.opsgenie.com/v1/json/alert" + recovery_url = "https://api.opsgenie.com/v1/json/alert/note" + global = false + +[victorops] + enabled = false + api-key = "" + routing-key = "" + url = "https://alert.victorops.com/integrations/generic/20131114/alert" + global = false + +[pagerduty] + enabled = false + url = "https://events.pagerduty.com/generic/2010-04-15/create_event.json" + service-key = "" + global = false + +[sensu] + enabled = false + addr = "sensu-client:3030" + source = "Kapacitor" + +[slack] + enabled = false + url = "" + channel = "" + global = false + state-changes-only = false + +[telegram] + enabled = false + url = "https://api.telegram.org/bot" + token = "" + chat-id = "" + parse-mode = "" + disable-web-page-preview = false + disable-notification = false + global = false + state-changes-only = false + +[hipchat] + enabled = false + url = "https://subdomain.hipchat.com/v2/room" + token = "" + room = "" + global = false + state-changes-only = false + +[alerta] + enabled = false + url = "" + token = "" + environment = "" + origin = "kapacitor" + +[reporting] + enabled = true + url = "https://usage.influxdata.com" + +[stats] + enabled = true + stats-interval = "10s" + database = "_kapacitor" + retention-policy = "autogen" + timing-sample-rate = 0.1 + timing-movavg-size = 1000 + +[udf] + [udf.functions] + +[deadman] + interval = "10s" + threshold = 0.0 + id = "node 'NODE_NAME' in task '{{ .TaskName }}'" + message = "{{ .ID }} is {{ if eq .Level \"OK\" }}alive{{ else }}dead{{ end }}: {{ index .Fields \"collected\" | printf \"%0.3f\" }} points/INTERVAL." + global = false + +[talk] + enabled = false + url = "https://jianliao.com/v2/services/webhook/uuid" + author_name = "Kapacitor" diff --git a/cluster_metrics/vars.yml b/cluster_metrics/vars.yml index 09d8a44c..a9a1604c 100644 --- a/cluster_metrics/vars.yml +++ b/cluster_metrics/vars.yml @@ -32,3 +32,6 @@ influxdb_db_root_name: root influxdb_db_root_password: SuperSecrete influxdb_db_metric_user: openstack influxdb_db_metric_password: SuperDuperSecrete + +# Kapacitor Vars +kapacitor_port: 9092