Add Telegraf Role

This commit is contained in:
Bastian Mäuser 2021-01-10 19:11:32 +01:00
parent 879cfc0f40
commit 755c1c5af1
17 changed files with 629 additions and 0 deletions

56
roles/telegraf/README.md Normal file
View File

@ -0,0 +1,56 @@
Telegraf
========
An Ansible role to install, configure, and manage [Telegraf](https://github.com/influxdb/telegraf), the plugin-driven server agent for reporting metrics into InfluxDB.
Requirements
------------
Prior knowledge/experience with InfluxDB and Telegraf is highly recommended. Full documentation is available [here](https://docs.influxdata.com).
Installation
------------
Either clone this repository, or install through Ansible Galaxy directly using the command:
```
ansible-galaxy install rossmcdonald.telegraf
```
Role Variables
--------------
The high-level variables are stored in the `defaults/main.yml` file. The most important ones being:
```
# Channel of Telegraf to install (currently only 'stable' is supported)
telegraf_install_version: stable
```
More advanced configuration options are stored in the `vars/main.yml` file, which includes all of the necessary bells and whistles to tweak your configuration.
Dependencies
------------
No other Ansible dependencies are required. This role was tested and developed with Ansible 1.9.4.
Example Playbook
----------------
An example playbook is included in the `test.yml` file. There is also a `Vagrantfile`, which can be used for quick local testing leveraging [Vagrant](https://www.vagrantup.com/).
Contributions and Feedback
--------------------------
Any contributions are welcome. For any bugs or feature requests, please open an issue through Github.
License
-------
MIT
Author
------
Created by [Ross McDonald](https://github.com/rossmcdonald).

40
roles/telegraf/Vagrantfile vendored Normal file
View File

@ -0,0 +1,40 @@
# -*- mode: ruby -*-
# vi: set ft=ruby :
Vagrant.configure(2) do |config|
config.vm.box = "ubuntu/trusty64"
# config.vm.box = "ubuntu/vivid64"
# config.vm.box = "relativkreativ/centos-7-minimal"
# config.vm.box = "box-cutter/fedora22"
# config.vm.box = "puppetlabs/centos-6.6-64-nocm"
# config.vm.box = "debian/jessie64"
BOX_COUNT = 1
(1..BOX_COUNT).each do |machine_id|
config.vm.define "telegraf#{machine_id}" do |machine|
machine.vm.hostname = "telegraf#{machine_id}"
# machine.vm.network "private_network", ip: "10.0.3.#{1+machine_id}", virtualbox__intnet: true
# machine.vm.network "public_network"
machine.vm.network "public_network", :bridge => 'en0: Wi-Fi (AirPort)'
machine.vm.provider "virtualbox" do |v|
v.memory = 512
v.cpus = 1
end
if machine_id == BOX_COUNT
machine.vm.provision "ansible" do |ansible|
# ansible.verbose = 'vvvv'
ansible.limit = 'all'
ansible.playbook = "test.yml"
ansible.sudo = true
ansible.host_key_checking = false
ansible.extra_vars = {
is_vagrant: true,
}
end
end
end
end
end

View File

@ -0,0 +1,85 @@
---
# Channel of Telegraf to install
telegraf_install_version: stable
# The user and group telegraf should run under (should be set to telegraf unless needed otherwise)
telegraf_runas_user: telegraf
telegraf_runas_group: telegraf
# Configuration Template
telegraf_configuration_template: telegraf.conf.j2
# Configuration Variables
telegraf_tags:
telegraf_aws_tags: false
telegraf_aws_tags_prefix:
telegraf_agent_interval: 10s
telegraf_round_interval: "true"
telegraf_metric_batch_size: "1000"
telegraf_metric_buffer_limit: "10000"
telegraf_collection_jitter: 0s
telegraf_flush_interval: 10s
telegraf_flush_jitter: 0s
telegraf_debug: "false"
telegraf_quiet: "false"
telegraf_hostname:
telegraf_omit_hostname: "false"
telegraf_install_url:
telegraf_influxdb_url: http://stats.regensburg.freifunk.net:8086
telegraf_influxdb_database: telegraf
telegraf_influxdb_precision: s
telegraf_influxdb_retention_policy: autogen
telegraf_influxdb_write_consistency: any
telegraf_influxdb_ssl_ca:
telegraf_influxdb_ssl_cert:
telegraf_influxdb_ssl_key:
telegraf_influxdb_insecure_skip_verify:
telegraf_influxdb_timeout: 5s
telegraf_influxdb_username: telegraf
telegraf_influxdb_password:
telegraf_influxdb_user_agent:
telegraf_influxdb_udp_payload:
telegraf_plugins_base:
- name: swap
- name: processes
- name: kernel
- name: netstat
- name: mem
- name: system
- name: cpu
options:
percpu: "true"
totalcpu: "true"
collect_cpu_time: "false"
report_active: "false"
fielddrop:
- "time_*"
- name: disk
options:
mountpoints:
- "/"
ignore_fs:
- "tmpfs"
- "devtmpfs"
- "devfs"
- name: diskio
options:
skip_serial_number: "true"
- name: procstat
options:
exe: "influxd"
prefix: "influxdb"
- name: net
options:
interfaces:
- "eth0"
telegraf_plugins: "{{ telegraf_plugins_base }} + {{ telegraf_plugins_extra | default([]) }}"
telegraf_influxdata_base_url: "https://repos.influxdata.com"

View File

View File

@ -0,0 +1,30 @@
---
# The order here matters
- name: restart telegraf
service:
name: telegraf
state: restarted
become: true
when: telegraf_start_service
- name: pause
pause:
seconds: "{{ telegraf_start_delay }}"
when: telegraf_start_service
## After version 2.2 of ansible 'listen' could be used to
## group 'check status' and 'assert running' into a single listener
- name: check status
command: service telegraf status
args:
warn: false
ignore_errors: yes
register: telegraf_service_status
become: true
when: telegraf_start_service
- name: assert running
assert:
that:
- "telegraf_service_status.rc == 0"
when: telegraf_start_service

View File

@ -0,0 +1,24 @@
---
galaxy_info:
author: Ross McDonald
description: Install and configure Telegraf, the plugin-driven server agent for reporting metrics into InfluxDB
company: InfluxData
license: MIT
min_ansible_version: 1.2
platforms:
- name: EL
versions:
- 6
- 7
- name: Ubuntu
versions:
- trusty
- utopic
- vivid
- name: Debian
versions:
- jessie
- wheezy
categories:
- monitoring
dependencies: []

View File

@ -0,0 +1,70 @@
---
- name: Retrieve ec2 facts
ec2_metadata_facts:
when: telegraf_aws_tags
- name: Retrieve all ec2 tags on the instance
ec2_tag:
region: "{{ ansible_ec2_placement_region }}"
resource: "{{ ansible_ec2_instance_id }}"
state: list
when: telegraf_aws_tags
register: ec2_tags
- name: get the rpm or apt package facts
package_facts:
manager: "auto"
- name: Set templatized Telegraf configuration
template:
src: "{{ telegraf_configuration_template }}"
dest: "{{ telegraf_configuration_dir }}/telegraf.conf"
force: yes
backup: yes
owner: telegraf
group: telegraf
mode: 0744
when: telegraf_template_configuration
# If config changes, restart telegraf and confirm it remained running
notify:
- "restart telegraf"
- "pause"
- "check status"
- "assert running"
- name: Test for sysvinit script
stat:
path: /etc/init.d/telegraf
register: telegraf_sysvinit_script
- name: Modify user Telegraf should run as [sysvinit]
replace:
path: /etc/init.d/telegraf
regexp: USER=.*
replace: USER={{ telegraf_runas_user }}
when: telegraf_runas_user != "telegraf" and telegraf_sysvinit_script.stat.exists
- name: Modify group Telegraf should run as [sysvinit]
replace:
path: /etc/init.d/telegraf
regexp: GROUP=.*
replace: GROUP={{ telegraf_runas_group }}
when: telegraf_runas_group != "telegraf" and telegraf_sysvinit_script.stat.exists
- name: Create systemd service directory [systemd]
file:
path: /etc/systemd/system/telegraf.service.d
state: directory
when: telegraf_runas_user != "telegraf" and not telegraf_sysvinit_script.stat.exists
- name: Modify user Telegraf should run as [systemd]
template:
src: systemd/system/telegraf.service.d/override.conf
dest: /etc/systemd/system/telegraf.service.d/override.conf
when: telegraf_runas_user != "telegraf" and not telegraf_sysvinit_script.stat.exists
register: telegraf_unit_file_updated
- name: Reload systemd configuration [systemd]
systemd:
daemon_reload: yes
when: telegraf_unit_file_updated is defined and telegraf_unit_file_updated.changed

View File

@ -0,0 +1,55 @@
---
- name: Install any necessary dependencies [Debian/Ubuntu]
apt:
name:
- python-httplib2
- python-apt
- curl
- apt-transport-https
state: present
update_cache: yes
cache_valid_time: 3600
register: apt_result
until: apt_result is success
retries: 2
delay: 5
- name: Import InfluxData GPG signing key [Debian/Ubuntu]
apt_key:
url: "{{ telegraf_influxdata_base_url }}/influxdb.key"
state: present
when: telegraf_install_url is not defined or telegraf_install_url == None
- name: Add InfluxData repository [Debian/Ubuntu]
apt_repository:
repo: deb {{ telegraf_influxdata_base_url }}/{{ ansible_distribution | lower }} {{ ansible_distribution_release }} {{ telegraf_install_version }}
state: present
when: telegraf_install_url is not defined or telegraf_install_url == None
- name: Install Telegraf packages [Debian/Ubuntu]
apt:
name: telegraf
state: latest
update_cache: yes
cache_valid_time: 3600
register: apt_result
until: apt_result is success
retries: 2
delay: 5
when: telegraf_install_url is not defined or telegraf_install_url == None
- name: Download Telegraf package via URL [Debian/Ubuntu]
get_url:
url: "{{ telegraf_install_url }}"
dest: /tmp/telegraf-ansible-download.deb
when: telegraf_install_url is defined and telegraf_install_url != None
- name: Install downloaded Telegraf package [Debian/Ubuntu]
apt:
deb: /tmp/telegraf-ansible-download.deb
state: present
register: apt_result
until: apt_result is success
retries: 2
delay: 5
when: telegraf_install_url is defined and telegraf_install_url != None

View File

@ -0,0 +1,21 @@
---
- name: Add InfluxData repository file [RHEL/CentOS]
template:
src: etc/yum.repos.d/influxdata.repo.j2
dest: /etc/yum.repos.d/influxdata.repo
force: yes
backup: yes
when: telegraf_install_url is not defined or telegraf_install_url == None
- name: Install Telegraf packages [RHEL/CentOS]
yum:
name: telegraf
state: latest
update_cache: yes
when: telegraf_install_url is not defined or telegraf_install_url == None
- name: Install Telegraf from URL [RHEL/CentOS]
yum:
name: "{{ telegraf_install_url }}"
state: present
when: telegraf_install_url is defined and telegraf_install_url != None

View File

@ -0,0 +1,6 @@
---
- include: install-redhat.yml
when: ansible_os_family == "RedHat"
- include: install-debian.yml
when: ansible_os_family == "Debian"

View File

@ -0,0 +1,10 @@
---
- include: install.yml
tags: [telegraf, install]
- include: configure.yml
tags: [telegraf, configure]
- include: start.yml
tags: [telegraf, start]
when: telegraf_start_service

View File

@ -0,0 +1,12 @@
---
- name: Start the Telegraf service
service:
name: telegraf
state: started
enabled: yes
# Only care to check the status if the state changed to 'started'
notify:
- "pause"
- "check status"
- "assert running"
become: true

View File

@ -0,0 +1,13 @@
[influxdb]
name = InfluxDB Repository - {{ ansible_distribution }} $releasever
{% if ansible_distribution|lower == "amazon" %}
baseurl = "{{ telegraf_influxdata_base_url }}/centos/6/amd64/{{ telegraf_install_version }}"
{% elif ansible_distribution|lower == "redhat" %}
baseurl = {{ telegraf_influxdata_base_url }}/rhel/$releasever/$basearch/{{ telegraf_install_version }}
{% else %}
baseurl = {{ telegraf_influxdata_base_url }}/{{ ansible_distribution|lower }}/$releasever/$basearch/{{ telegraf_install_version }}
{% endif %}
enabled = 1
gpgcheck = 1
gpgkey = {{ telegraf_influxdata_base_url }}/influxdb.key
sslverify = 1

View File

@ -0,0 +1,2 @@
[Service]
User={{ telegraf_runas_user }}

View File

@ -0,0 +1,181 @@
# Telegraf configuration
# Telegraf is entirely plugin driven. All metrics are gathered from the
# declared plugins.
# Even if a plugin has no configuration, it must be declared in here
# to be active. Declaring a plugin means just specifying the name
# as a section with no variables. To deactivate a plugin, comment
# out the name and any variables.
# Use 'telegraf -config telegraf.toml -test' to see what metrics a config
# file would generate.
# One rule that plugins conform to is wherever a connection string
# can be passed, the values '' and 'localhost' are treated specially.
# They indicate to the plugin to use their own builtin configuration to
# connect to the local system.
# NOTE: The configuration has a few required parameters. They are marked
# with 'required'. Be sure to edit those to make this configuration work.
# Tags can also be specified via a normal map, but only one form at a time:
[global_tags]
{% if telegraf_tags is defined and telegraf_tags != None %}
{% for key, value in telegraf_tags.items()%}
{{ key }} = "{{ value }}"
{% endfor %}
{% endif %}
{% if telegraf_aws_tags == true and ec2_tags is defined and ec2_tags != None %}
{% for key, value in ec2_tags.tags.items()%}
{{ telegraf_aws_tags_prefix }}{{ key }} = "{{ value }}"
{% endfor %}
{% endif %}
# Configuration for telegraf agent
[agent]
## Default data collection interval for all inputs
interval = "{{ telegraf_agent_interval }}"
## Rounds collection interval to 'interval'
## ie, if interval="10s" then always collect on :00, :10, :20, etc.
round_interval = {{ telegraf_round_interval }}
## Telegraf will send metrics to outputs in batches of at
## most metric_batch_size metrics.
metric_batch_size = {{ telegraf_metric_batch_size }}
## For failed writes, telegraf will cache metric_buffer_limit metrics for each
## output, and will flush this buffer on a successful write. Oldest metrics
## are dropped first when this buffer fills.
metric_buffer_limit = {{ telegraf_metric_buffer_limit }}
## Collection jitter is used to jitter the collection by a random amount.
## Each plugin will sleep for a random time within jitter before collecting.
## This can be used to avoid many plugins querying things like sysfs at the
## same time, which can have a measurable effect on the system.
collection_jitter = "{{ telegraf_collection_jitter }}"
## Default flushing interval for all outputs. You shouldn't set this below
## interval. Maximum flush_interval will be flush_interval + flush_jitter
flush_interval = "{{ telegraf_flush_interval }}"
## Jitter the flush interval by a random amount. This is primarily to avoid
## large write spikes for users running a large number of telegraf instances.
## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
flush_jitter = "{{ telegraf_flush_jitter }}"
## Run telegraf in debug mode
debug = {{ telegraf_debug }}
## Run telegraf in quiet mode
quiet = {{ telegraf_quiet }}
hostname = "{{ ansible_fqdn }}"
## If set to true, do no set the "host" tag in the telegraf agent.
omit_hostname = {{ telegraf_omit_hostname }}
###############################################################################
# OUTPUTS #
###############################################################################
[outputs]
# Configuration for influxdb server to send metrics to
[[outputs.influxdb]]
# The full HTTP or UDP endpoint URL for your InfluxDB instance.
# Multiple urls can be specified but it is assumed that they are part of the same
# cluster, this means that only ONE of the urls will be written to each interval.
# urls = ["udp://localhost:8089"] # UDP endpoint example
## urls = [ "" ] # required
urls = ["{{ telegraf_influxdb_url }}"]
# The target database for metrics (telegraf will create it if not exists)
database = "{{ telegraf_influxdb_database }}" # required
# Precision of writes, valid values are n, u, ms, s, m, and h
# note: using second precision greatly helps InfluxDB compression
precision = "{{ telegraf_influxdb_precision }}"
## Retention policy to write to.
retention_policy = "{{ telegraf_influxdb_retention_policy }}"
## Write consistency (clusters only), can be: "any", "one", "quorom", "all"
write_consistency = "{{ telegraf_influxdb_write_consistency }}"
# Connection timeout (for the connection with InfluxDB), formatted as a string.
# If not provided, will default to 0 (no timeout)
timeout = "{{ telegraf_influxdb_timeout }}"
{% if telegraf_influxdb_username is defined and telegraf_influxdb_username != None %}
username = "{{ telegraf_influxdb_username }}"
{% endif %}
password = "{{ telegraf_influxdb_password }}"
# Set the user agent for HTTP POSTs (can be useful for log differentiation)
{% if telegraf_influxdb_user_agent is defined and telegraf_influxdb_user_agent != None %}
user_agent = "{{ telegraf_influxdb_user_agent }}"
{% endif %}
# Set UDP payload size, defaults to InfluxDB UDP Client default (512 bytes)
{% if telegraf_influxdb_udp_payload is defined and telegraf_influxdb_udp_payload != None %}
udp_payload = {{ telegraf_influxdb_udp_payload }}
{% endif %}
## Optional SSL Config
{% if telegraf_influxdb_ssl_ca is defined and telegraf_influxdb_ssl_ca != None %}
# ssl_ca = "{{ telegraf_influxdb_ssl_ca }}"
{% endif %}
{% if telegraf_influxdb_ssl_cert is defined and telegraf_influxdb_ssl_cert != None %}
# ssl_cert = "{{ telegraf_influxdb_ssl_cert }}"
{% endif %}
{% if telegraf_influxdb_ssl_key is defined and telegraf_influxdb_ssl_key != None %}
# ssl_key = "{{ telegraf_influxdb_ssl_key }}"
{% endif %}
{% if telegraf_influxdb_insecure_skip_verify is defined and telegraf_influxdb_insecure_skip_verify != None %}
## Use SSL but skip chain & host verification
insecure_skip_verify = {{ telegraf_influxdb_insecure_skip_verify }}
{% endif %}
###############################################################################
# PLUGINS #
###############################################################################
{% for plugin in telegraf_plugins %}
[[inputs.{{ plugin.name }}]]
{% if plugin.options is defined %}
{% for key, value in plugin.options.items() %}
{% if value is not mapping %}
{% if value is sequence and value is not string %}
{% if value[0] is number %}
{{ key }} = [ {{ value|join(', ') }} ]
{% else %}
{{ key }} = [ "{{ value|join('", "') }}" ]
{% endif %}
{% else %}
{% if value == "true" or value == "false" or value is number %}
{{ key }} = {{ value | lower }}
{% else %}
{{ key }} = "{{ value }}"
{% endif %}
{% endif %}
{% endif %}
{% endfor %}
{% for key, value in plugin.options.items() %}
{% if value is mapping %}
[inputs.{{ plugin.name }}.{{ key }}]
{% for lv2_key, lv2_value in value.items() %}
{% if lv2_value is sequence and lv2_value is not string %}
{% if lv2_value[0] is number %}
{{ lv2_key }} = [ {{ lv2_value|join(', ') }} ]
{% else %}
{{ lv2_key }} = [ "{{ lv2_value|join('", "') }}" ]
{% endif %}
{% else %}
{% if lv2_value == "true" or lv2_value == "false" or lv2_value is number %}
{{ lv2_key }} = {{ lv2_value | lower }}
{% else %}
{{ lv2_key }} = "{{ lv2_value }}"
{% endif %}
{% endif %}
{% endfor %}
{% endif %}
{% endfor %}
{% endif %}
{% endfor %}
###############################################################################
# service PLUGINS #
###############################################################################

8
roles/telegraf/test.yml Normal file
View File

@ -0,0 +1,8 @@
- hosts: all
vars_files:
- defaults/main.yml
- vars/main.yml
tasks:
- include: tasks/main.yml
handlers:
- include: handlers/main.yml

View File

@ -0,0 +1,16 @@
---
# Whether or not the playbook is run locally
# This should only be set in the Vagrantfile and not modified elsewhere
is_vagrant: no
# If yes, service will be started. Will not be started if set to no.
telegraf_start_service: yes
telegraf_start_delay: 6
# If yes, will overwrite the packaged configuration with an Asnible/jinja2 template
telegraf_template_configuration: yes
# Path for finding Telegraf data. Added for backwards-compatibility.
telegraf_binary_path: /usr/bin/telegraf
telegraf_configuration_dir: /etc/telegraf