diff --git a/cluster_metrics/playbook-influx-telegraf.yml b/cluster_metrics/playbook-influx-telegraf.yml index e5e616f6..07d46a61 100644 --- a/cluster_metrics/playbook-influx-telegraf.yml +++ b/cluster_metrics/playbook-influx-telegraf.yml @@ -44,18 +44,8 @@ with_dict: "{{ command_plugins }}" when: - item.value.when_group | bool - - item.value.group == inventory_hostname or + - item.value.group == inventory_hostname or inventory_hostname in item.value.group | default([]) - - name: Add to command plugins - set_fact: - commands: "{{ commands | union(item.value.command) }}" - with_dict: "{{ command_plugins }}" - when: - - item.value.when_group | bool - - item.value.group == inventory_hostname or - inventory_hostname in item.value.group | default([]) - tags: - - always - name: Store my_cnf slurp: src: "/root/.my.cnf" @@ -96,5 +86,11 @@ - "python /opt/telegraf/ironic_nodes.py" group: "{{ groups['utility_all'][0] }}" when_group: "{{ (groups['ironic_api'] | length) > 0 }}" + vm_quota: + plugin_name: "vm_quota.py" + command: + - "python /opt/telegraf/vm_quota.py" + group: "{{ groups['utility_all'][0] }}" + when_group: "{{ (groups['nova_compute'] | length) > 0 }}" influx_telegraf_targets: - "{{ influxdb_host|default(internal_lb_vip_address) }}:{{ influxdb_port }}" diff --git a/cluster_metrics/templates/telegraf-plugins/vm_consumers.py b/cluster_metrics/templates/telegraf-plugins/vm_consumers.py new file mode 100644 index 00000000..f9b4fc7a --- /dev/null +++ b/cluster_metrics/templates/telegraf-plugins/vm_consumers.py @@ -0,0 +1,205 @@ +#!/bin/python +# +# Copyright 2016, Rackspace US, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections + +from openstack import connection as os_conn + + +OS_AUTH_ARGS = { + 'auth_url': '{{ keystone_service_internalurl }}', + 'project_name': '{{ keystone_admin_tenant_name }}', + 'user_domain_name': '{{ openrc_os_domain_name }}', + 'project_domain_name': '{{ openrc_os_domain_name }}', + 'username': '{{ keystone_admin_user_name }}', + 'password': '{{ keystone_auth_admin_password }}', +} + +OS_CONNECTION = {'conn': None} + + +def line_return(collection, metric_name): + system_states_return = '%s ' % metric_name + for key, value in collection.items(): + system_states_return += '%s=%s,' % (key.replace(' ', '_'), value) + else: + system_states_return = system_states_return.rstrip(',') + return system_states_return + + +def _connect(): + if OS_CONNECTION['conn']: + return OS_CONNECTION['conn'] + else: + OS_CONNECTION['conn'] = os_conn.Connection(**OS_AUTH_ARGS) + return OS_CONNECTION['conn'] + + +def get_consumers(): + conn = _connect() + _consumers = list() + projects = conn.identity.projects() + for project in projects: + if project['description'].lower() != 'heat stack user project': + _consumers.append(project) + return _consumers + + +def get_consumer_limits(consumer_id): + conn = _connect() + url = conn.compute.session.get_endpoint( + interface='internal', + service_type='compute' + ) + quota_data = conn.compute.session.get( + url + '/os-quota-sets/' + consumer_id + ) + quota_data = quota_data.json() + return quota_data['quota_set'] + + +def get_consumer_usage(): + conn = _connect() + tenant_kwargs = {'all_tenants': True, 'limit': 5000} + return conn.compute.servers(details=True, **tenant_kwargs) + + +def get_flavors(): + conn = _connect() + flavor_cache = dict() + for flavor in conn.compute.flavors(): + entry = flavor_cache[flavor['id']] = dict() + entry['ram'] = flavor['ram'] + entry['cores'] = flavor['vcpus'] + entry['disk'] = flavor['disk'] + return flavor_cache + + +def main(): + return_data = list() + consumer_quota_instance = dict() + consumer_quota_cores = dict() + consumer_quota_ram = dict() + consumer_used_instances = collections.Counter() + consumer_used_cores = collections.Counter() + consumer_used_ram = collections.Counter() + consumer_used_disk = collections.Counter() + consumer_quota_totals = dict() + + flavor_cache = get_flavors() + consumer_id_cache = dict() + for consumer in get_consumers(): + consumer_name = consumer['name'] + consumer_id = consumer['id'] + _quota = get_consumer_limits(consumer_id) + consumer_id_cache[consumer_id] = consumer_name + consumer_quota_instance[consumer_name] = int(_quota['instances']) + consumer_quota_cores[consumer_name] = int(_quota['cores']) + consumer_quota_ram[consumer_name] = int(_quota['ram']) + + for used_instance in get_consumer_usage(): + consumer_name = consumer_id_cache[used_instance['tenant_id']] + consumer_used_instances[consumer_name] += 1 + consumer_used_cores[consumer_name] += \ + int(flavor_cache[used_instance['flavor']['id']]['cores']) + consumer_used_ram[consumer_name] += \ + int(flavor_cache[used_instance['flavor']['id']]['ram']) + consumer_used_disk[consumer_name] += \ + int(flavor_cache[used_instance['flavor']['id']]['disk']) + + consumer_quota_totals['total_quota_instance'] = sum( + consumer_quota_instance.values() + ) + consumer_quota_totals['total_quota_cores'] = sum( + consumer_quota_cores.values() + ) + consumer_quota_totals['total_quota_ram'] = sum( + consumer_quota_ram.values() + ) + + consumer_quota_totals['total_used_instances'] = sum( + consumer_used_instances.values() + ) + consumer_quota_totals['total_used_cores'] = sum( + consumer_used_cores.values() + ) + consumer_quota_totals['total_used_ram'] = sum( + consumer_used_ram.values() + ) + consumer_quota_totals['total_used_disk'] = sum( + consumer_used_disk.values() + ) + + return_data.append( + line_return( + collection=consumer_quota_instance, + metric_name='consumer_quota_instance' + ) + ) + + return_data.append( + line_return( + collection=consumer_quota_cores, + metric_name='consumer_quota_cores' + ) + ) + + return_data.append( + line_return( + collection=consumer_quota_ram, + metric_name='consumer_quota_ram' + ) + ) + + return_data.append( + line_return( + collection=consumer_used_instances, + metric_name='consumer_used_instances' + ) + ) + + return_data.append( + line_return( + collection=consumer_used_cores, + metric_name='consumer_used_cores' + ) + ) + + return_data.append( + line_return( + collection=consumer_used_ram, + metric_name='consumer_used_ram' + ) + ) + + return_data.append( + line_return( + collection=consumer_used_disk, + metric_name='consumer_used_disk' + ) + ) + + return_data.append( + line_return( + collection=consumer_quota_totals, + metric_name='consumer_quota_totals' + ) + ) + for item in return_data: + print(item) + +if __name__ == '__main__': + main() diff --git a/cluster_metrics/templates/telegraf.conf.j2 b/cluster_metrics/templates/telegraf.conf.j2 index 0341f04b..580a956e 100644 --- a/cluster_metrics/templates/telegraf.conf.j2 +++ b/cluster_metrics/templates/telegraf.conf.j2 @@ -5,13 +5,26 @@ node_type = "physical_host" {% endif %} +{% set run_commands = [] %} +{% for key, value in command_plugins.items() %} +{% if value.when_group | bool and (value.group == inventory_hostname or inventory_hostname in value.group | default([])) %} +{% set _ = run_commands.extend(value.command) %} +{% endif %} +{% endfor %} + +{# The run_int adds padding to the interval so that plugins being added to the system have #} +{# enough time to execute. Every added plugin will add 8 seconds to the interval with a #} +{# default of 24. This value is later used as the flush interval which needs to be 2x the agent. #} +{% set run_int = run_commands | length %} +{% set interval = (run_int < 1 | ternary(0, run_int * 8)) + 24 %} + [agent] - interval = "24s" + interval = "{{ interval }}s" round_interval = false metric_batch_size = 1024 metric_buffer_limit = 10240 collection_jitter = "8s" - flush_interval = "48s" + flush_interval = "{{ interval * 2 }}s" flush_jitter = "8s" debug = false quiet = true @@ -33,10 +46,17 @@ [[inputs.system]] -{% if commands %} +{% set run_commands = [] %} +{% for key, value in command_plugins.items() %} +{% if value.when_group | bool and (value.group == inventory_hostname or inventory_hostname in value.group | default([])) %} +{% set _ = run_commands.extend(value.command) %} +{% endif %} +{% endfor %} + +{% if run_commands %} [[inputs.exec]] - commands = [{{ commands | map('quote') | join(',') }}] - timeout = "15s" + commands = [{{ run_commands | map('quote') | join(',') }}] + timeout = "{{ (run_commands | length) * 8 }}s" data_format = "influx" {% endif %}