diff --git a/roles/telegraf/README.md b/roles/telegraf/README.md new file mode 100644 index 0000000..2a99954 --- /dev/null +++ b/roles/telegraf/README.md @@ -0,0 +1,56 @@ +Telegraf +======== + +An Ansible role to install, configure, and manage [Telegraf](https://github.com/influxdb/telegraf), the plugin-driven server agent for reporting metrics into InfluxDB. + +Requirements +------------ + +Prior knowledge/experience with InfluxDB and Telegraf is highly recommended. Full documentation is available [here](https://docs.influxdata.com). + +Installation +------------ + +Either clone this repository, or install through Ansible Galaxy directly using the command: + +``` +ansible-galaxy install rossmcdonald.telegraf +``` + +Role Variables +-------------- + +The high-level variables are stored in the `defaults/main.yml` file. The most important ones being: + +``` +# Channel of Telegraf to install (currently only 'stable' is supported) +telegraf_install_version: stable +``` + +More advanced configuration options are stored in the `vars/main.yml` file, which includes all of the necessary bells and whistles to tweak your configuration. + +Dependencies +------------ + +No other Ansible dependencies are required. This role was tested and developed with Ansible 1.9.4. + +Example Playbook +---------------- + +An example playbook is included in the `test.yml` file. There is also a `Vagrantfile`, which can be used for quick local testing leveraging [Vagrant](https://www.vagrantup.com/). + +Contributions and Feedback +-------------------------- + +Any contributions are welcome. For any bugs or feature requests, please open an issue through Github. + +License +------- + +MIT + +Author +------ + +Created by [Ross McDonald](https://github.com/rossmcdonald). + diff --git a/roles/telegraf/Vagrantfile b/roles/telegraf/Vagrantfile new file mode 100644 index 0000000..1c38fd6 --- /dev/null +++ b/roles/telegraf/Vagrantfile @@ -0,0 +1,40 @@ +# -*- mode: ruby -*- +# vi: set ft=ruby : + +Vagrant.configure(2) do |config| + config.vm.box = "ubuntu/trusty64" + # config.vm.box = "ubuntu/vivid64" + # config.vm.box = "relativkreativ/centos-7-minimal" + # config.vm.box = "box-cutter/fedora22" + # config.vm.box = "puppetlabs/centos-6.6-64-nocm" + # config.vm.box = "debian/jessie64" + + BOX_COUNT = 1 + (1..BOX_COUNT).each do |machine_id| + config.vm.define "telegraf#{machine_id}" do |machine| + machine.vm.hostname = "telegraf#{machine_id}" + # machine.vm.network "private_network", ip: "10.0.3.#{1+machine_id}", virtualbox__intnet: true + # machine.vm.network "public_network" + machine.vm.network "public_network", :bridge => 'en0: Wi-Fi (AirPort)' + + machine.vm.provider "virtualbox" do |v| + v.memory = 512 + v.cpus = 1 + end + + if machine_id == BOX_COUNT + machine.vm.provision "ansible" do |ansible| + # ansible.verbose = 'vvvv' + ansible.limit = 'all' + ansible.playbook = "test.yml" + ansible.sudo = true + ansible.host_key_checking = false + ansible.extra_vars = { + is_vagrant: true, + } + end + end + + end + end +end diff --git a/roles/telegraf/defaults/main.yml b/roles/telegraf/defaults/main.yml new file mode 100644 index 0000000..08d7b28 --- /dev/null +++ b/roles/telegraf/defaults/main.yml @@ -0,0 +1,85 @@ +--- +# Channel of Telegraf to install +telegraf_install_version: stable + +# The user and group telegraf should run under (should be set to telegraf unless needed otherwise) +telegraf_runas_user: telegraf +telegraf_runas_group: telegraf + +# Configuration Template +telegraf_configuration_template: telegraf.conf.j2 + +# Configuration Variables +telegraf_tags: +telegraf_aws_tags: false +telegraf_aws_tags_prefix: + +telegraf_agent_interval: 10s +telegraf_round_interval: "true" +telegraf_metric_batch_size: "1000" +telegraf_metric_buffer_limit: "10000" + +telegraf_collection_jitter: 0s +telegraf_flush_interval: 10s +telegraf_flush_jitter: 0s +telegraf_debug: "false" +telegraf_quiet: "false" +telegraf_hostname: +telegraf_omit_hostname: "false" +telegraf_install_url: + + +telegraf_influxdb_url: http://stats.regensburg.freifunk.net:8086 +telegraf_influxdb_database: telegraf +telegraf_influxdb_precision: s +telegraf_influxdb_retention_policy: autogen +telegraf_influxdb_write_consistency: any +telegraf_influxdb_ssl_ca: +telegraf_influxdb_ssl_cert: +telegraf_influxdb_ssl_key: +telegraf_influxdb_insecure_skip_verify: + +telegraf_influxdb_timeout: 5s +telegraf_influxdb_username: telegraf +telegraf_influxdb_password: +telegraf_influxdb_user_agent: +telegraf_influxdb_udp_payload: + +telegraf_plugins_base: + - name: swap + - name: processes + - name: kernel + - name: netstat + - name: mem + - name: system + - name: cpu + options: + percpu: "true" + totalcpu: "true" + collect_cpu_time: "false" + report_active: "false" + fielddrop: + - "time_*" + - name: disk + options: + mountpoints: + - "/" + ignore_fs: + - "tmpfs" + - "devtmpfs" + - "devfs" + - name: diskio + options: + skip_serial_number: "true" + - name: procstat + options: + exe: "influxd" + prefix: "influxdb" + - name: net + options: + interfaces: + - "eth0" + +telegraf_plugins: "{{ telegraf_plugins_base }} + {{ telegraf_plugins_extra | default([]) }}" + +telegraf_influxdata_base_url: "https://repos.influxdata.com" diff --git a/roles/telegraf/handlers/install.yml b/roles/telegraf/handlers/install.yml new file mode 100644 index 0000000..e69de29 diff --git a/roles/telegraf/handlers/main.yml b/roles/telegraf/handlers/main.yml new file mode 100644 index 0000000..04dc39f --- /dev/null +++ b/roles/telegraf/handlers/main.yml @@ -0,0 +1,30 @@ +--- +# The order here matters +- name: restart telegraf + service: + name: telegraf + state: restarted + become: true + when: telegraf_start_service + +- name: pause + pause: + seconds: "{{ telegraf_start_delay }}" + when: telegraf_start_service + +## After version 2.2 of ansible 'listen' could be used to +## group 'check status' and 'assert running' into a single listener +- name: check status + command: service telegraf status + args: + warn: false + ignore_errors: yes + register: telegraf_service_status + become: true + when: telegraf_start_service + +- name: assert running + assert: + that: + - "telegraf_service_status.rc == 0" + when: telegraf_start_service diff --git a/roles/telegraf/meta/main.yml b/roles/telegraf/meta/main.yml new file mode 100644 index 0000000..9b8fe5b --- /dev/null +++ b/roles/telegraf/meta/main.yml @@ -0,0 +1,24 @@ +--- +galaxy_info: + author: Ross McDonald + description: Install and configure Telegraf, the plugin-driven server agent for reporting metrics into InfluxDB + company: InfluxData + license: MIT + min_ansible_version: 1.2 + platforms: + - name: EL + versions: + - 6 + - 7 + - name: Ubuntu + versions: + - trusty + - utopic + - vivid + - name: Debian + versions: + - jessie + - wheezy + categories: + - monitoring +dependencies: [] diff --git a/roles/telegraf/tasks/configure.yml b/roles/telegraf/tasks/configure.yml new file mode 100644 index 0000000..ef07f23 --- /dev/null +++ b/roles/telegraf/tasks/configure.yml @@ -0,0 +1,70 @@ +--- +- name: Retrieve ec2 facts + ec2_metadata_facts: + when: telegraf_aws_tags + +- name: Retrieve all ec2 tags on the instance + ec2_tag: + region: "{{ ansible_ec2_placement_region }}" + resource: "{{ ansible_ec2_instance_id }}" + state: list + when: telegraf_aws_tags + register: ec2_tags + +- name: get the rpm or apt package facts + package_facts: + manager: "auto" + +- name: Set templatized Telegraf configuration + template: + src: "{{ telegraf_configuration_template }}" + dest: "{{ telegraf_configuration_dir }}/telegraf.conf" + force: yes + backup: yes + owner: telegraf + group: telegraf + mode: 0744 + when: telegraf_template_configuration + # If config changes, restart telegraf and confirm it remained running + notify: + - "restart telegraf" + - "pause" + - "check status" + - "assert running" + +- name: Test for sysvinit script + stat: + path: /etc/init.d/telegraf + register: telegraf_sysvinit_script + +- name: Modify user Telegraf should run as [sysvinit] + replace: + path: /etc/init.d/telegraf + regexp: USER=.* + replace: USER={{ telegraf_runas_user }} + when: telegraf_runas_user != "telegraf" and telegraf_sysvinit_script.stat.exists + +- name: Modify group Telegraf should run as [sysvinit] + replace: + path: /etc/init.d/telegraf + regexp: GROUP=.* + replace: GROUP={{ telegraf_runas_group }} + when: telegraf_runas_group != "telegraf" and telegraf_sysvinit_script.stat.exists + +- name: Create systemd service directory [systemd] + file: + path: /etc/systemd/system/telegraf.service.d + state: directory + when: telegraf_runas_user != "telegraf" and not telegraf_sysvinit_script.stat.exists + +- name: Modify user Telegraf should run as [systemd] + template: + src: systemd/system/telegraf.service.d/override.conf + dest: /etc/systemd/system/telegraf.service.d/override.conf + when: telegraf_runas_user != "telegraf" and not telegraf_sysvinit_script.stat.exists + register: telegraf_unit_file_updated + +- name: Reload systemd configuration [systemd] + systemd: + daemon_reload: yes + when: telegraf_unit_file_updated is defined and telegraf_unit_file_updated.changed diff --git a/roles/telegraf/tasks/install-debian.yml b/roles/telegraf/tasks/install-debian.yml new file mode 100644 index 0000000..5aeac2e --- /dev/null +++ b/roles/telegraf/tasks/install-debian.yml @@ -0,0 +1,55 @@ +--- +- name: Install any necessary dependencies [Debian/Ubuntu] + apt: + name: + - python-httplib2 + - python-apt + - curl + - apt-transport-https + state: present + update_cache: yes + cache_valid_time: 3600 + register: apt_result + until: apt_result is success + retries: 2 + delay: 5 + +- name: Import InfluxData GPG signing key [Debian/Ubuntu] + apt_key: + url: "{{ telegraf_influxdata_base_url }}/influxdb.key" + state: present + when: telegraf_install_url is not defined or telegraf_install_url == None + +- name: Add InfluxData repository [Debian/Ubuntu] + apt_repository: + repo: deb {{ telegraf_influxdata_base_url }}/{{ ansible_distribution | lower }} {{ ansible_distribution_release }} {{ telegraf_install_version }} + state: present + when: telegraf_install_url is not defined or telegraf_install_url == None + +- name: Install Telegraf packages [Debian/Ubuntu] + apt: + name: telegraf + state: latest + update_cache: yes + cache_valid_time: 3600 + register: apt_result + until: apt_result is success + retries: 2 + delay: 5 + when: telegraf_install_url is not defined or telegraf_install_url == None + +- name: Download Telegraf package via URL [Debian/Ubuntu] + get_url: + url: "{{ telegraf_install_url }}" + dest: /tmp/telegraf-ansible-download.deb + when: telegraf_install_url is defined and telegraf_install_url != None + +- name: Install downloaded Telegraf package [Debian/Ubuntu] + apt: + deb: /tmp/telegraf-ansible-download.deb + state: present + register: apt_result + until: apt_result is success + retries: 2 + delay: 5 + when: telegraf_install_url is defined and telegraf_install_url != None diff --git a/roles/telegraf/tasks/install-redhat.yml b/roles/telegraf/tasks/install-redhat.yml new file mode 100644 index 0000000..dbe5ca1 --- /dev/null +++ b/roles/telegraf/tasks/install-redhat.yml @@ -0,0 +1,21 @@ +--- +- name: Add InfluxData repository file [RHEL/CentOS] + template: + src: etc/yum.repos.d/influxdata.repo.j2 + dest: /etc/yum.repos.d/influxdata.repo + force: yes + backup: yes + when: telegraf_install_url is not defined or telegraf_install_url == None + +- name: Install Telegraf packages [RHEL/CentOS] + yum: + name: telegraf + state: latest + update_cache: yes + when: telegraf_install_url is not defined or telegraf_install_url == None + +- name: Install Telegraf from URL [RHEL/CentOS] + yum: + name: "{{ telegraf_install_url }}" + state: present + when: telegraf_install_url is defined and telegraf_install_url != None diff --git a/roles/telegraf/tasks/install.yml b/roles/telegraf/tasks/install.yml new file mode 100644 index 0000000..b192444 --- /dev/null +++ b/roles/telegraf/tasks/install.yml @@ -0,0 +1,6 @@ +--- +- include: install-redhat.yml + when: ansible_os_family == "RedHat" + +- include: install-debian.yml + when: ansible_os_family == "Debian" diff --git a/roles/telegraf/tasks/main.yml b/roles/telegraf/tasks/main.yml new file mode 100644 index 0000000..bafa60b --- /dev/null +++ b/roles/telegraf/tasks/main.yml @@ -0,0 +1,10 @@ +--- +- include: install.yml + tags: [telegraf, install] + +- include: configure.yml + tags: [telegraf, configure] + +- include: start.yml + tags: [telegraf, start] + when: telegraf_start_service diff --git a/roles/telegraf/tasks/start.yml b/roles/telegraf/tasks/start.yml new file mode 100644 index 0000000..49c497b --- /dev/null +++ b/roles/telegraf/tasks/start.yml @@ -0,0 +1,12 @@ +--- +- name: Start the Telegraf service + service: + name: telegraf + state: started + enabled: yes + # Only care to check the status if the state changed to 'started' + notify: + - "pause" + - "check status" + - "assert running" + become: true diff --git a/roles/telegraf/templates/etc/yum.repos.d/influxdata.repo.j2 b/roles/telegraf/templates/etc/yum.repos.d/influxdata.repo.j2 new file mode 100644 index 0000000..a9af113 --- /dev/null +++ b/roles/telegraf/templates/etc/yum.repos.d/influxdata.repo.j2 @@ -0,0 +1,13 @@ +[influxdb] +name = InfluxDB Repository - {{ ansible_distribution }} $releasever +{% if ansible_distribution|lower == "amazon" %} +baseurl = "{{ telegraf_influxdata_base_url }}/centos/6/amd64/{{ telegraf_install_version }}" +{% elif ansible_distribution|lower == "redhat" %} +baseurl = {{ telegraf_influxdata_base_url }}/rhel/$releasever/$basearch/{{ telegraf_install_version }} +{% else %} +baseurl = {{ telegraf_influxdata_base_url }}/{{ ansible_distribution|lower }}/$releasever/$basearch/{{ telegraf_install_version }} +{% endif %} +enabled = 1 +gpgcheck = 1 +gpgkey = {{ telegraf_influxdata_base_url }}/influxdb.key +sslverify = 1 diff --git a/roles/telegraf/templates/systemd/system/telegraf.service.d/override.conf b/roles/telegraf/templates/systemd/system/telegraf.service.d/override.conf new file mode 100644 index 0000000..967ffc3 --- /dev/null +++ b/roles/telegraf/templates/systemd/system/telegraf.service.d/override.conf @@ -0,0 +1,2 @@ +[Service] +User={{ telegraf_runas_user }} diff --git a/roles/telegraf/templates/telegraf.conf.j2 b/roles/telegraf/templates/telegraf.conf.j2 new file mode 100644 index 0000000..3c83b76 --- /dev/null +++ b/roles/telegraf/templates/telegraf.conf.j2 @@ -0,0 +1,181 @@ +# Telegraf configuration + +# Telegraf is entirely plugin driven. All metrics are gathered from the +# declared plugins. + +# Even if a plugin has no configuration, it must be declared in here +# to be active. Declaring a plugin means just specifying the name +# as a section with no variables. To deactivate a plugin, comment +# out the name and any variables. + +# Use 'telegraf -config telegraf.toml -test' to see what metrics a config +# file would generate. + +# One rule that plugins conform to is wherever a connection string +# can be passed, the values '' and 'localhost' are treated specially. +# They indicate to the plugin to use their own builtin configuration to +# connect to the local system. + +# NOTE: The configuration has a few required parameters. They are marked +# with 'required'. Be sure to edit those to make this configuration work. + +# Tags can also be specified via a normal map, but only one form at a time: +[global_tags] +{% if telegraf_tags is defined and telegraf_tags != None %} +{% for key, value in telegraf_tags.items()%} + {{ key }} = "{{ value }}" +{% endfor %} +{% endif %} +{% if telegraf_aws_tags == true and ec2_tags is defined and ec2_tags != None %} +{% for key, value in ec2_tags.tags.items()%} + {{ telegraf_aws_tags_prefix }}{{ key }} = "{{ value }}" +{% endfor %} +{% endif %} + +# Configuration for telegraf agent +[agent] + ## Default data collection interval for all inputs + interval = "{{ telegraf_agent_interval }}" + ## Rounds collection interval to 'interval' + ## ie, if interval="10s" then always collect on :00, :10, :20, etc. + round_interval = {{ telegraf_round_interval }} + + ## Telegraf will send metrics to outputs in batches of at + ## most metric_batch_size metrics. + metric_batch_size = {{ telegraf_metric_batch_size }} + ## For failed writes, telegraf will cache metric_buffer_limit metrics for each + ## output, and will flush this buffer on a successful write. Oldest metrics + ## are dropped first when this buffer fills. + metric_buffer_limit = {{ telegraf_metric_buffer_limit }} + + ## Collection jitter is used to jitter the collection by a random amount. + ## Each plugin will sleep for a random time within jitter before collecting. + ## This can be used to avoid many plugins querying things like sysfs at the + ## same time, which can have a measurable effect on the system. + collection_jitter = "{{ telegraf_collection_jitter }}" + + ## Default flushing interval for all outputs. You shouldn't set this below + ## interval. Maximum flush_interval will be flush_interval + flush_jitter + flush_interval = "{{ telegraf_flush_interval }}" + ## Jitter the flush interval by a random amount. This is primarily to avoid + ## large write spikes for users running a large number of telegraf instances. + ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s + flush_jitter = "{{ telegraf_flush_jitter }}" + + ## Run telegraf in debug mode + debug = {{ telegraf_debug }} + ## Run telegraf in quiet mode + quiet = {{ telegraf_quiet }} + + hostname = "{{ ansible_fqdn }}" + + ## If set to true, do no set the "host" tag in the telegraf agent. + omit_hostname = {{ telegraf_omit_hostname }} + +############################################################################### +# OUTPUTS # +############################################################################### + +[outputs] + +# Configuration for influxdb server to send metrics to +[[outputs.influxdb]] + # The full HTTP or UDP endpoint URL for your InfluxDB instance. + # Multiple urls can be specified but it is assumed that they are part of the same + # cluster, this means that only ONE of the urls will be written to each interval. + # urls = ["udp://localhost:8089"] # UDP endpoint example + ## urls = [ "" ] # required + urls = ["{{ telegraf_influxdb_url }}"] + # The target database for metrics (telegraf will create it if not exists) + database = "{{ telegraf_influxdb_database }}" # required + # Precision of writes, valid values are n, u, ms, s, m, and h + # note: using second precision greatly helps InfluxDB compression + precision = "{{ telegraf_influxdb_precision }}" + + ## Retention policy to write to. + retention_policy = "{{ telegraf_influxdb_retention_policy }}" + ## Write consistency (clusters only), can be: "any", "one", "quorom", "all" + write_consistency = "{{ telegraf_influxdb_write_consistency }}" + + # Connection timeout (for the connection with InfluxDB), formatted as a string. + # If not provided, will default to 0 (no timeout) + timeout = "{{ telegraf_influxdb_timeout }}" +{% if telegraf_influxdb_username is defined and telegraf_influxdb_username != None %} + username = "{{ telegraf_influxdb_username }}" +{% endif %} + password = "{{ telegraf_influxdb_password }}" + # Set the user agent for HTTP POSTs (can be useful for log differentiation) +{% if telegraf_influxdb_user_agent is defined and telegraf_influxdb_user_agent != None %} + user_agent = "{{ telegraf_influxdb_user_agent }}" +{% endif %} + # Set UDP payload size, defaults to InfluxDB UDP Client default (512 bytes) +{% if telegraf_influxdb_udp_payload is defined and telegraf_influxdb_udp_payload != None %} + udp_payload = {{ telegraf_influxdb_udp_payload }} +{% endif %} + + ## Optional SSL Config +{% if telegraf_influxdb_ssl_ca is defined and telegraf_influxdb_ssl_ca != None %} + # ssl_ca = "{{ telegraf_influxdb_ssl_ca }}" +{% endif %} +{% if telegraf_influxdb_ssl_cert is defined and telegraf_influxdb_ssl_cert != None %} + # ssl_cert = "{{ telegraf_influxdb_ssl_cert }}" +{% endif %} +{% if telegraf_influxdb_ssl_key is defined and telegraf_influxdb_ssl_key != None %} + # ssl_key = "{{ telegraf_influxdb_ssl_key }}" +{% endif %} + +{% if telegraf_influxdb_insecure_skip_verify is defined and telegraf_influxdb_insecure_skip_verify != None %} + ## Use SSL but skip chain & host verification + insecure_skip_verify = {{ telegraf_influxdb_insecure_skip_verify }} +{% endif %} + +############################################################################### +# PLUGINS # +############################################################################### + +{% for plugin in telegraf_plugins %} +[[inputs.{{ plugin.name }}]] +{% if plugin.options is defined %} +{% for key, value in plugin.options.items() %} +{% if value is not mapping %} +{% if value is sequence and value is not string %} +{% if value[0] is number %} + {{ key }} = [ {{ value|join(', ') }} ] +{% else %} + {{ key }} = [ "{{ value|join('", "') }}" ] +{% endif %} +{% else %} +{% if value == "true" or value == "false" or value is number %} + {{ key }} = {{ value | lower }} +{% else %} + {{ key }} = "{{ value }}" +{% endif %} +{% endif %} +{% endif %} +{% endfor %} +{% for key, value in plugin.options.items() %} +{% if value is mapping %} + [inputs.{{ plugin.name }}.{{ key }}] +{% for lv2_key, lv2_value in value.items() %} +{% if lv2_value is sequence and lv2_value is not string %} +{% if lv2_value[0] is number %} + {{ lv2_key }} = [ {{ lv2_value|join(', ') }} ] +{% else %} + {{ lv2_key }} = [ "{{ lv2_value|join('", "') }}" ] +{% endif %} +{% else %} +{% if lv2_value == "true" or lv2_value == "false" or lv2_value is number %} + {{ lv2_key }} = {{ lv2_value | lower }} +{% else %} + {{ lv2_key }} = "{{ lv2_value }}" +{% endif %} +{% endif %} +{% endfor %} +{% endif %} +{% endfor %} +{% endif %} +{% endfor %} + +############################################################################### +# service PLUGINS # +############################################################################### diff --git a/roles/telegraf/test.yml b/roles/telegraf/test.yml new file mode 100644 index 0000000..e0e8b5e --- /dev/null +++ b/roles/telegraf/test.yml @@ -0,0 +1,8 @@ +- hosts: all + vars_files: + - defaults/main.yml + - vars/main.yml + tasks: + - include: tasks/main.yml + handlers: + - include: handlers/main.yml diff --git a/roles/telegraf/vars/main.yml b/roles/telegraf/vars/main.yml new file mode 100644 index 0000000..d2d07b3 --- /dev/null +++ b/roles/telegraf/vars/main.yml @@ -0,0 +1,16 @@ +--- + +# Whether or not the playbook is run locally +# This should only be set in the Vagrantfile and not modified elsewhere +is_vagrant: no + +# If yes, service will be started. Will not be started if set to no. +telegraf_start_service: yes +telegraf_start_delay: 6 + +# If yes, will overwrite the packaged configuration with an Asnible/jinja2 template +telegraf_template_configuration: yes + +# Path for finding Telegraf data. Added for backwards-compatibility. +telegraf_binary_path: /usr/bin/telegraf +telegraf_configuration_dir: /etc/telegraf