From 1b5b6d8b9d3e57ac2f2db3402b35b183972302e7 Mon Sep 17 00:00:00 2001 From: Christian Poessinger Date: Thu, 25 Aug 2022 18:51:15 +0200 Subject: telegraf: T4617: add VRF support --- data/templates/monitoring/override.conf.j2 | 7 -- data/templates/monitoring/syslog_telegraf.j2 | 5 - .../monitoring/systemd_vyos_telegraf_service.j2 | 16 --- data/templates/monitoring/telegraf.j2 | 122 --------------------- data/templates/telegraf/override.conf.j2 | 15 +++ data/templates/telegraf/syslog_telegraf.j2 | 5 + data/templates/telegraf/telegraf.j2 | 122 +++++++++++++++++++++ .../service-monitoring-telegraf.xml.in | 1 + .../cli/test_service_monitoring_telegraf.py | 2 +- src/conf_mode/service_monitoring_telegraf.py | 60 +++++----- src/systemd/telegraf.service | 15 +++ 11 files changed, 189 insertions(+), 181 deletions(-) delete mode 100644 data/templates/monitoring/override.conf.j2 delete mode 100644 data/templates/monitoring/syslog_telegraf.j2 delete mode 100644 data/templates/monitoring/systemd_vyos_telegraf_service.j2 delete mode 100644 data/templates/monitoring/telegraf.j2 create mode 100644 data/templates/telegraf/override.conf.j2 create mode 100644 data/templates/telegraf/syslog_telegraf.j2 create mode 100644 data/templates/telegraf/telegraf.j2 create mode 100644 src/systemd/telegraf.service diff --git a/data/templates/monitoring/override.conf.j2 b/data/templates/monitoring/override.conf.j2 deleted file mode 100644 index 9f1b4ebec..000000000 --- a/data/templates/monitoring/override.conf.j2 +++ /dev/null @@ -1,7 +0,0 @@ -[Unit] -After=vyos-router.service -ConditionPathExists=/run/telegraf/vyos-telegraf.conf -[Service] -Environment=INFLUX_TOKEN={{ influxdb.authentication.token }} -CapabilityBoundingSet=CAP_NET_RAW CAP_NET_ADMIN CAP_SYS_ADMIN -AmbientCapabilities=CAP_NET_RAW CAP_NET_ADMIN diff --git a/data/templates/monitoring/syslog_telegraf.j2 b/data/templates/monitoring/syslog_telegraf.j2 deleted file mode 100644 index cdcbd92a4..000000000 --- a/data/templates/monitoring/syslog_telegraf.j2 +++ /dev/null @@ -1,5 +0,0 @@ -# Generated by /usr/libexec/vyos/conf_mode/service_monitoring_telegraf.py - -$ModLoad omuxsock -$OMUxSockSocket /run/telegraf/telegraf_syslog.sock -*.notice :omuxsock: diff --git a/data/templates/monitoring/systemd_vyos_telegraf_service.j2 b/data/templates/monitoring/systemd_vyos_telegraf_service.j2 deleted file mode 100644 index 234ef5586..000000000 --- a/data/templates/monitoring/systemd_vyos_telegraf_service.j2 +++ /dev/null @@ -1,16 +0,0 @@ -[Unit] -Description=The plugin-driven server agent for reporting metrics into InfluxDB -Documentation=https://github.com/influxdata/telegraf -After=network.target - -[Service] -EnvironmentFile=-/etc/default/telegraf -User=telegraf -ExecStart=/usr/bin/telegraf -config /run/telegraf/vyos-telegraf.conf -config-directory /etc/telegraf/telegraf.d $TELEGRAF_OPTS -ExecReload=/bin/kill -HUP $MAINPID -Restart=on-failure -RestartForceExitStatus=SIGPIPE -KillMode=control-group - -[Install] -WantedBy=multi-user.target diff --git a/data/templates/monitoring/telegraf.j2 b/data/templates/monitoring/telegraf.j2 deleted file mode 100644 index 6b395692b..000000000 --- a/data/templates/monitoring/telegraf.j2 +++ /dev/null @@ -1,122 +0,0 @@ -# Generated by /usr/libexec/vyos/conf_mode/service_monitoring_telegraf.py - -[agent] - interval = "15s" - round_interval = true - metric_batch_size = 1000 - metric_buffer_limit = 10000 - collection_jitter = "5s" - flush_interval = "15s" - flush_jitter = "0s" - precision = "" - debug = false - quiet = false - logfile = "" - hostname = "" - omit_hostname = false -{% if azure_data_explorer is vyos_defined %} -### Azure Data Explorer ### -[[outputs.azure_data_explorer]] - ## The URI property of the Azure Data Explorer resource on Azure - endpoint_url = "{{ azure_data_explorer.url }}" - - ## The Azure Data Explorer database that the metrics will be ingested into. - ## The plugin will NOT generate this database automatically, it's expected that this database already exists before ingestion. - database = "{{ azure_data_explorer.database }}" - metrics_grouping_type = "{{ azure_data_explorer.group_metrics }}" - - ## Name of the single table to store all the metrics (Only needed if metrics_grouping_type is "SingleTable"). -{% if azure_data_explorer.table is vyos_defined and azure_data_explorer.group_metrics == 'SingleTable' %} - table_name = "{{ azure_data_explorer.table }}" -{% endif %} -### End Azure Data Explorer ### -{% endif %} -{% if influxdb is vyos_defined %} -### InfluxDB2 ### -[[outputs.influxdb_v2]] - urls = ["{{ influxdb.url }}:{{ influxdb.port }}"] - insecure_skip_verify = true - token = "$INFLUX_TOKEN" - organization = "{{ influxdb.authentication.organization }}" - bucket = "{{ influxdb.bucket }}" -### End InfluxDB2 ### -{% endif %} -{% if prometheus_client is vyos_defined %} -### Prometheus ### -[[outputs.prometheus_client]] - ## Address to listen on - listen = "{{ prometheus_client.listen_address if prometheus_client.listen_address is vyos_defined else '' }}:{{ prometheus_client.port }}" - metric_version = {{ prometheus_client.metric_version }} -{% if prometheus_client.authentication.username is vyos_defined and prometheus_client.authentication.password is vyos_defined %} - ## Use HTTP Basic Authentication - basic_username = "{{ prometheus_client.authentication.username }}" - basic_password = "{{ prometheus_client.authentication.password }}" -{% endif %} -{% if prometheus_client.allow_from is vyos_defined %} - ip_range = {{ prometheus_client.allow_from }} -{% endif %} -### End Prometheus ### -{% endif %} -{% if splunk is vyos_defined %} -### Splunk ### -[[outputs.http]] - ## URL is the address to send metrics to - url = "{{ splunk.url }}" - ## Timeout for HTTP message - # timeout = "5s" - ## Use TLS but skip chain & host verification -{% if splunk.authentication.insecure is vyos_defined %} - insecure_skip_verify = true -{% endif %} - ## Data format to output - data_format = "splunkmetric" - ## Provides time, index, source overrides for the HEC - splunkmetric_hec_routing = true - ## Additional HTTP headers - [outputs.http.headers] - # Should be set manually to "application/json" for json data_format - Content-Type = "application/json" - Authorization = "Splunk {{ splunk.authentication.token }}" - X-Splunk-Request-Channel = "{{ splunk.authentication.token }}" -### End Splunk ### -{% endif %} -[[inputs.cpu]] - percpu = true - totalcpu = true - collect_cpu_time = false - report_active = false -[[inputs.disk]] - ignore_fs = ["devtmpfs", "devfs"] -[[inputs.diskio]] -[[inputs.mem]] -[[inputs.net]] -[[inputs.system]] -[[inputs.netstat]] -[[inputs.processes]] -[[inputs.kernel]] -[[inputs.interrupts]] -[[inputs.linux_sysctl_fs]] -[[inputs.systemd_units]] -[[inputs.conntrack]] - files = ["ip_conntrack_count","ip_conntrack_max","nf_conntrack_count","nf_conntrack_max"] - dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"] -[[inputs.ethtool]] - interface_include = {{ interfaces_ethernet }} -[[inputs.ntpq]] - dns_lookup = true -[[inputs.internal]] -[[inputs.nstat]] -[[inputs.syslog]] - server = "unixgram:///run/telegraf/telegraf_syslog.sock" - best_effort = true - syslog_standard = "RFC3164" -{% if influxdb_configured is vyos_defined %} -[[inputs.exec]] - commands = [ - "{{ custom_scripts_dir }}/show_firewall_input_filter.py", - "{{ custom_scripts_dir }}/show_interfaces_input_filter.py", - "{{ custom_scripts_dir }}/vyos_services_input_filter.py" - ] - timeout = "10s" - data_format = "influx" -{% endif %} diff --git a/data/templates/telegraf/override.conf.j2 b/data/templates/telegraf/override.conf.j2 new file mode 100644 index 000000000..d30bb19de --- /dev/null +++ b/data/templates/telegraf/override.conf.j2 @@ -0,0 +1,15 @@ +{% set vrf_command = 'ip vrf exec ' ~ vrf ~ ' ' if vrf is vyos_defined else '' %} +[Unit] +After= +After=vyos-router.service +ConditionPathExists=/run/telegraf/telegraf.conf + +[Service] +ExecStart= +ExecStart={{ vrf_command }}/usr/bin/telegraf --config /run/telegraf/telegraf.conf --config-directory /etc/telegraf/telegraf.d --pidfile /run/telegraf/telegraf.pid +PIDFile=/run/telegraf/telegraf.pid +EnvironmentFile= +Environment=INFLUX_TOKEN={{ influxdb.authentication.token }} +CapabilityBoundingSet=CAP_NET_RAW CAP_NET_ADMIN CAP_SYS_ADMIN CAP_BPF CAP_DAC_OVERRIDE +AmbientCapabilities=CAP_NET_RAW CAP_NET_ADMIN + diff --git a/data/templates/telegraf/syslog_telegraf.j2 b/data/templates/telegraf/syslog_telegraf.j2 new file mode 100644 index 000000000..cdcbd92a4 --- /dev/null +++ b/data/templates/telegraf/syslog_telegraf.j2 @@ -0,0 +1,5 @@ +# Generated by /usr/libexec/vyos/conf_mode/service_monitoring_telegraf.py + +$ModLoad omuxsock +$OMUxSockSocket /run/telegraf/telegraf_syslog.sock +*.notice :omuxsock: diff --git a/data/templates/telegraf/telegraf.j2 b/data/templates/telegraf/telegraf.j2 new file mode 100644 index 000000000..6b395692b --- /dev/null +++ b/data/templates/telegraf/telegraf.j2 @@ -0,0 +1,122 @@ +# Generated by /usr/libexec/vyos/conf_mode/service_monitoring_telegraf.py + +[agent] + interval = "15s" + round_interval = true + metric_batch_size = 1000 + metric_buffer_limit = 10000 + collection_jitter = "5s" + flush_interval = "15s" + flush_jitter = "0s" + precision = "" + debug = false + quiet = false + logfile = "" + hostname = "" + omit_hostname = false +{% if azure_data_explorer is vyos_defined %} +### Azure Data Explorer ### +[[outputs.azure_data_explorer]] + ## The URI property of the Azure Data Explorer resource on Azure + endpoint_url = "{{ azure_data_explorer.url }}" + + ## The Azure Data Explorer database that the metrics will be ingested into. + ## The plugin will NOT generate this database automatically, it's expected that this database already exists before ingestion. + database = "{{ azure_data_explorer.database }}" + metrics_grouping_type = "{{ azure_data_explorer.group_metrics }}" + + ## Name of the single table to store all the metrics (Only needed if metrics_grouping_type is "SingleTable"). +{% if azure_data_explorer.table is vyos_defined and azure_data_explorer.group_metrics == 'SingleTable' %} + table_name = "{{ azure_data_explorer.table }}" +{% endif %} +### End Azure Data Explorer ### +{% endif %} +{% if influxdb is vyos_defined %} +### InfluxDB2 ### +[[outputs.influxdb_v2]] + urls = ["{{ influxdb.url }}:{{ influxdb.port }}"] + insecure_skip_verify = true + token = "$INFLUX_TOKEN" + organization = "{{ influxdb.authentication.organization }}" + bucket = "{{ influxdb.bucket }}" +### End InfluxDB2 ### +{% endif %} +{% if prometheus_client is vyos_defined %} +### Prometheus ### +[[outputs.prometheus_client]] + ## Address to listen on + listen = "{{ prometheus_client.listen_address if prometheus_client.listen_address is vyos_defined else '' }}:{{ prometheus_client.port }}" + metric_version = {{ prometheus_client.metric_version }} +{% if prometheus_client.authentication.username is vyos_defined and prometheus_client.authentication.password is vyos_defined %} + ## Use HTTP Basic Authentication + basic_username = "{{ prometheus_client.authentication.username }}" + basic_password = "{{ prometheus_client.authentication.password }}" +{% endif %} +{% if prometheus_client.allow_from is vyos_defined %} + ip_range = {{ prometheus_client.allow_from }} +{% endif %} +### End Prometheus ### +{% endif %} +{% if splunk is vyos_defined %} +### Splunk ### +[[outputs.http]] + ## URL is the address to send metrics to + url = "{{ splunk.url }}" + ## Timeout for HTTP message + # timeout = "5s" + ## Use TLS but skip chain & host verification +{% if splunk.authentication.insecure is vyos_defined %} + insecure_skip_verify = true +{% endif %} + ## Data format to output + data_format = "splunkmetric" + ## Provides time, index, source overrides for the HEC + splunkmetric_hec_routing = true + ## Additional HTTP headers + [outputs.http.headers] + # Should be set manually to "application/json" for json data_format + Content-Type = "application/json" + Authorization = "Splunk {{ splunk.authentication.token }}" + X-Splunk-Request-Channel = "{{ splunk.authentication.token }}" +### End Splunk ### +{% endif %} +[[inputs.cpu]] + percpu = true + totalcpu = true + collect_cpu_time = false + report_active = false +[[inputs.disk]] + ignore_fs = ["devtmpfs", "devfs"] +[[inputs.diskio]] +[[inputs.mem]] +[[inputs.net]] +[[inputs.system]] +[[inputs.netstat]] +[[inputs.processes]] +[[inputs.kernel]] +[[inputs.interrupts]] +[[inputs.linux_sysctl_fs]] +[[inputs.systemd_units]] +[[inputs.conntrack]] + files = ["ip_conntrack_count","ip_conntrack_max","nf_conntrack_count","nf_conntrack_max"] + dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"] +[[inputs.ethtool]] + interface_include = {{ interfaces_ethernet }} +[[inputs.ntpq]] + dns_lookup = true +[[inputs.internal]] +[[inputs.nstat]] +[[inputs.syslog]] + server = "unixgram:///run/telegraf/telegraf_syslog.sock" + best_effort = true + syslog_standard = "RFC3164" +{% if influxdb_configured is vyos_defined %} +[[inputs.exec]] + commands = [ + "{{ custom_scripts_dir }}/show_firewall_input_filter.py", + "{{ custom_scripts_dir }}/show_interfaces_input_filter.py", + "{{ custom_scripts_dir }}/vyos_services_input_filter.py" + ] + timeout = "10s" + data_format = "influx" +{% endif %} diff --git a/interface-definitions/service-monitoring-telegraf.xml.in b/interface-definitions/service-monitoring-telegraf.xml.in index 36f40a539..dc014ee16 100644 --- a/interface-definitions/service-monitoring-telegraf.xml.in +++ b/interface-definitions/service-monitoring-telegraf.xml.in @@ -306,6 +306,7 @@ + #include diff --git a/smoketest/scripts/cli/test_service_monitoring_telegraf.py b/smoketest/scripts/cli/test_service_monitoring_telegraf.py index 1c8cc9759..c1c4044e6 100755 --- a/smoketest/scripts/cli/test_service_monitoring_telegraf.py +++ b/smoketest/scripts/cli/test_service_monitoring_telegraf.py @@ -24,7 +24,7 @@ from vyos.util import process_named_running from vyos.util import read_file PROCESS_NAME = 'telegraf' -TELEGRAF_CONF = '/run/telegraf/vyos-telegraf.conf' +TELEGRAF_CONF = '/run/telegraf/telegraf.conf' base_path = ['service', 'monitoring', 'telegraf'] org = 'log@in.local' token = 'GuRJc12tIzfjnYdKRAIYbxdWd2aTpOT9PVYNddzDnFV4HkAcD7u7-kndTFXjGuXzJN6TTxmrvPODB4mnFcseDV==' diff --git a/src/conf_mode/service_monitoring_telegraf.py b/src/conf_mode/service_monitoring_telegraf.py index 62f5e1ddf..18b32edab 100755 --- a/src/conf_mode/service_monitoring_telegraf.py +++ b/src/conf_mode/service_monitoring_telegraf.py @@ -22,6 +22,8 @@ from shutil import rmtree from vyos.config import Config from vyos.configdict import dict_merge +from vyos.configdict import is_node_changed +from vyos.configverify import verify_vrf from vyos.ifconfig import Section from vyos.template import render from vyos.util import call @@ -32,20 +34,14 @@ from vyos import ConfigError from vyos import airbag airbag.enable() - -base_dir = '/run/telegraf' cache_dir = f'/etc/telegraf/.cache' -config_telegraf = f'{base_dir}/vyos-telegraf.conf' +config_telegraf = f'/run/telegraf/telegraf.conf' custom_scripts_dir = '/etc/telegraf/custom_scripts' syslog_telegraf = '/etc/rsyslog.d/50-telegraf.conf' -systemd_telegraf_service = '/etc/systemd/system/vyos-telegraf.service' -systemd_telegraf_override_dir = '/etc/systemd/system/vyos-telegraf.service.d' -systemd_override = f'{systemd_telegraf_override_dir}/10-override.conf' - +systemd_override = '/etc/systemd/system/telegraf.service.d/10-override.conf' def get_interfaces(type='', vlan=True): """ - Get interfaces get_interfaces() ['dum0', 'eth0', 'eth1', 'eth1.5', 'lo', 'tun0'] @@ -62,9 +58,7 @@ def get_interfaces(type='', vlan=True): return interfaces def get_nft_filter_chains(): - """ - Get nft chains for table filter - """ + """ Get nft chains for table filter """ nft = cmd('nft --json list table ip filter') nft = json.loads(nft) chain_list = [] @@ -78,7 +72,6 @@ def get_nft_filter_chains(): def get_config(config=None): - if config: conf = config else: @@ -87,8 +80,12 @@ def get_config(config=None): if not conf.exists(base): return None - monitoring = conf.get_config_dict(base, key_mangling=('-', '_'), get_first_key=True, - no_tag_node_value_mangle=True) + monitoring = conf.get_config_dict(base, key_mangling=('-', '_'), + get_first_key=True, + no_tag_node_value_mangle=True) + + tmp = is_node_changed(conf, base + ['vrf']) + if tmp: monitoring.update({'restart_required': {}}) # We have gathered the dict representation of the CLI, but there are default # options which we need to update into the dictionary retrived. @@ -131,6 +128,8 @@ def verify(monitoring): if not monitoring: return None + verify_vrf(monitoring) + # Verify influxdb if 'influxdb' in monitoring: if 'authentication' not in monitoring['influxdb'] or \ @@ -173,7 +172,7 @@ def verify(monitoring): def generate(monitoring): if not monitoring: # Delete config and systemd files - config_files = [config_telegraf, systemd_telegraf_service, systemd_override, syslog_telegraf] + config_files = [config_telegraf, systemd_override, syslog_telegraf] for file in config_files: if os.path.isfile(file): os.unlink(file) @@ -190,33 +189,34 @@ def generate(monitoring): chown(cache_dir, 'telegraf', 'telegraf') - # Create systemd override dir - if not os.path.exists(systemd_telegraf_override_dir): - os.mkdir(systemd_telegraf_override_dir) - # Create custome scripts dir if not os.path.exists(custom_scripts_dir): os.mkdir(custom_scripts_dir) # Render telegraf configuration and systemd override - render(config_telegraf, 'monitoring/telegraf.j2', monitoring) - render(systemd_telegraf_service, 'monitoring/systemd_vyos_telegraf_service.j2', monitoring) - render(systemd_override, 'monitoring/override.conf.j2', monitoring, permission=0o640) - render(syslog_telegraf, 'monitoring/syslog_telegraf.j2', monitoring) - - chown(base_dir, 'telegraf', 'telegraf') + render(config_telegraf, 'telegraf/telegraf.j2', monitoring, user='telegraf', group='telegraf') + render(systemd_override, 'telegraf/override.conf.j2', monitoring) + render(syslog_telegraf, 'telegraf/syslog_telegraf.j2', monitoring) return None def apply(monitoring): # Reload systemd manager configuration + systemd_service = 'telegraf.service' call('systemctl daemon-reload') - if monitoring: - call('systemctl restart vyos-telegraf.service') - else: - call('systemctl stop vyos-telegraf.service') + if not monitoring: + call(f'systemctl stop {systemd_service}') + return + + # we need to restart the service if e.g. the VRF name changed + systemd_action = 'reload-or-restart' + if 'restart_required' in monitoring: + systemd_action = 'restart' + + call(f'systemctl {systemd_action} {systemd_service}') + # Telegraf include custom rsyslog config changes - call('systemctl restart rsyslog') + call('systemctl reload-or-restart rsyslog') if __name__ == '__main__': try: diff --git a/src/systemd/telegraf.service b/src/systemd/telegraf.service new file mode 100644 index 000000000..553942ac6 --- /dev/null +++ b/src/systemd/telegraf.service @@ -0,0 +1,15 @@ +[Unit] +Description=The plugin-driven server agent for reporting metrics into InfluxDB +Documentation=https://github.com/influxdata/telegraf +After=network.target + +[Service] +EnvironmentFile=-/etc/default/telegraf +ExecStart=/usr/bin/telegraf --config /run/telegraf/vyos-telegraf.conf --config-directory /etc/telegraf/telegraf.d +ExecReload=/bin/kill -HUP $MAINPID +Restart=on-failure +RestartForceExitStatus=SIGPIPE +KillMode=control-group + +[Install] +WantedBy=multi-user.target -- cgit v1.2.3