diff options
-rw-r--r-- | data/templates/monitoring/override.conf.tmpl | 7 | ||||
-rw-r--r-- | data/templates/monitoring/syslog_telegraf.tmpl | 5 | ||||
-rw-r--r-- | data/templates/monitoring/systemd_vyos_telegraf_service.tmpl | 16 | ||||
-rw-r--r-- | data/templates/monitoring/telegraf.tmpl | 60 | ||||
-rw-r--r-- | debian/control | 1 | ||||
-rw-r--r-- | debian/vyos-1x.install | 1 | ||||
-rw-r--r-- | interface-definitions/service_monitoring_telegraf.xml.in | 113 | ||||
-rwxr-xr-x | smoketest/scripts/cli/test_service_monitoring_telegraf.py | 65 | ||||
-rwxr-xr-x | src/conf_mode/service_monitoring_telegraf.py | 175 | ||||
-rwxr-xr-x | src/etc/telegraf/custom_scripts/show_firewall_input_filter.py | 73 | ||||
-rwxr-xr-x | src/etc/telegraf/custom_scripts/show_interfaces_input_filter.py | 88 | ||||
-rwxr-xr-x | src/etc/telegraf/custom_scripts/vyos_services_input_filter.py | 61 |
12 files changed, 665 insertions, 0 deletions
diff --git a/data/templates/monitoring/override.conf.tmpl b/data/templates/monitoring/override.conf.tmpl new file mode 100644 index 000000000..f8f150791 --- /dev/null +++ b/data/templates/monitoring/override.conf.tmpl @@ -0,0 +1,7 @@ +[Unit] +After=vyos-router.service +ConditionPathExists=/run/telegraf/vyos-telegraf.conf +[Service] +Environment=INFLUX_TOKEN={{ authentication.token }} +CapabilityBoundingSet=CAP_NET_RAW CAP_NET_ADMIN CAP_SYS_ADMIN +AmbientCapabilities=CAP_NET_RAW CAP_NET_ADMIN diff --git a/data/templates/monitoring/syslog_telegraf.tmpl b/data/templates/monitoring/syslog_telegraf.tmpl new file mode 100644 index 000000000..cdcbd92a4 --- /dev/null +++ b/data/templates/monitoring/syslog_telegraf.tmpl @@ -0,0 +1,5 @@ +# Generated by /usr/libexec/vyos/conf_mode/service_monitoring_telegraf.py + +$ModLoad omuxsock +$OMUxSockSocket /run/telegraf/telegraf_syslog.sock +*.notice :omuxsock: diff --git a/data/templates/monitoring/systemd_vyos_telegraf_service.tmpl b/data/templates/monitoring/systemd_vyos_telegraf_service.tmpl new file mode 100644 index 000000000..234ef5586 --- /dev/null +++ b/data/templates/monitoring/systemd_vyos_telegraf_service.tmpl @@ -0,0 +1,16 @@ +[Unit] +Description=The plugin-driven server agent for reporting metrics into InfluxDB +Documentation=https://github.com/influxdata/telegraf +After=network.target + +[Service] +EnvironmentFile=-/etc/default/telegraf +User=telegraf +ExecStart=/usr/bin/telegraf -config /run/telegraf/vyos-telegraf.conf -config-directory /etc/telegraf/telegraf.d $TELEGRAF_OPTS +ExecReload=/bin/kill -HUP $MAINPID +Restart=on-failure +RestartForceExitStatus=SIGPIPE +KillMode=control-group + +[Install] +WantedBy=multi-user.target diff --git a/data/templates/monitoring/telegraf.tmpl b/data/templates/monitoring/telegraf.tmpl new file mode 100644 index 000000000..d3145a500 --- /dev/null +++ b/data/templates/monitoring/telegraf.tmpl @@ -0,0 +1,60 @@ +# Generated by /usr/libexec/vyos/conf_mode/service_monitoring_telegraf.py + +[agent] + interval = "10s" + round_interval = true + metric_batch_size = 1000 + metric_buffer_limit = 10000 + collection_jitter = "0s" + flush_interval = "10s" + flush_jitter = "0s" + precision = "" + debug = false + quiet = false + logfile = "" + hostname = "" + omit_hostname = false +[[outputs.influxdb_v2]] + urls = ["{{ url }}:{{ port }}"] + insecure_skip_verify = true + token = "$INFLUX_TOKEN" + organization = "{{ authentication.organization }}" + bucket = "{{ bucket }}" +[[inputs.cpu]] + percpu = true + totalcpu = true + collect_cpu_time = false + report_active = false +[[inputs.disk]] + ignore_fs = ["devtmpfs", "devfs"] +[[inputs.diskio]] +[[inputs.mem]] +[[inputs.net]] +[[inputs.system]] +[[inputs.netstat]] +[[inputs.processes]] +[[inputs.kernel]] +[[inputs.interrupts]] +[[inputs.linux_sysctl_fs]] +[[inputs.systemd_units]] +[[inputs.conntrack]] + files = ["ip_conntrack_count","ip_conntrack_max","nf_conntrack_count","nf_conntrack_max"] + dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"] +[[inputs.ethtool]] + interface_include = {{ interfaces_ethernet }} +[[inputs.ntpq]] + dns_lookup = true +[[inputs.internal]] +[[inputs.nstat]] +[[inputs.syslog]] + server = "unixgram:///run/telegraf/telegraf_syslog.sock" + best_effort = true + syslog_standard = "RFC3164" +[[inputs.exec]] + commands = [ + "{{ custom_scripts_dir }}/show_firewall_input_filter.py", + "{{ custom_scripts_dir }}/show_interfaces_input_filter.py", + "{{ custom_scripts_dir }}/vyos_services_input_filter.py" + ] + timeout = "10s" + data_format = "influx" diff --git a/debian/control b/debian/control index 8cafd8257..7706c6523 100644 --- a/debian/control +++ b/debian/control @@ -130,6 +130,7 @@ Depends: ssl-cert, sudo, systemd, + telegraf (>= 1.20), tcpdump, tcptraceroute, telnet, diff --git a/debian/vyos-1x.install b/debian/vyos-1x.install index 082c5a62f..38a032a63 100644 --- a/debian/vyos-1x.install +++ b/debian/vyos-1x.install @@ -5,6 +5,7 @@ etc/ppp etc/rsyslog.d etc/systemd etc/sysctl.d +etc/telegraf etc/udev etc/update-motd.d etc/vyos diff --git a/interface-definitions/service_monitoring_telegraf.xml.in b/interface-definitions/service_monitoring_telegraf.xml.in new file mode 100644 index 000000000..0db9052ff --- /dev/null +++ b/interface-definitions/service_monitoring_telegraf.xml.in @@ -0,0 +1,113 @@ +<?xml version="1.0"?> +<interfaceDefinition> + <node name="service"> + <children> + <node name="monitoring"> + <properties> + <help>Monitoring services</help> + <priority>1280</priority> + </properties> + <children> + <node name="telegraf" owner="${vyos_conf_scripts_dir}/service_monitoring_telegraf.py"> + <properties> + <help>Telegraf monitoring</help> + </properties> + <children> + <node name="authentication"> + <properties> + <help>Authentication parameters</help> + </properties> + <children> + <leafNode name="organization"> + <properties> + <help>Authentication organization for InfluxDB v2 [REQUIRED]</help> + <constraint> + <regex>^[a-zA-Z][1-9a-zA-Z@_\-.]{2,50}$</regex> + </constraint> + <constraintErrorMessage>Organization name must be alphanumeric and can contain hyphens, underscores and at symbol.</constraintErrorMessage> + </properties> + </leafNode> + <leafNode name="token"> + <properties> + <help>Authentication token for InfluxDB v2 [REQUIRED]</help> + <valueHelp> + <format>txt</format> + <description>Authentication token</description> + </valueHelp> + <constraint> + <regex>^[a-zA-Z0-9-_]{86}==$</regex> + </constraint> + <constraintErrorMessage>Token must be 88 characters long and must contain only [a-zA-Z0-9-_] and '==' characters.</constraintErrorMessage> + </properties> + </leafNode> + </children> + </node> + <leafNode name="bucket"> + <properties> + <help>Remote bucket, by default (main)</help> + </properties> + <defaultValue>main</defaultValue> + </leafNode> + <leafNode name="source"> + <properties> + <help>Source parameters for monitoring (default: all)</help> + <completionHelp> + <list>all hardware-utilization logs network system telegraf</list> + </completionHelp> + <valueHelp> + <format>all</format> + <description>All parameters (default)</description> + </valueHelp> + <valueHelp> + <format>hardware-utilization</format> + <description>Hardware-utilization parameters (CPU, disk, memory)</description> + </valueHelp> + <valueHelp> + <format>logs</format> + <description>Logs parameters</description> + </valueHelp> + <valueHelp> + <format>network</format> + <description>Network parameters (net, netstat, nftables)</description> + </valueHelp> + <valueHelp> + <format>system</format> + <description>System parameters (system, processes, interrupts)</description> + </valueHelp> + <valueHelp> + <format>telegraf</format> + <description>Telegraf internal statistics</description> + </valueHelp> + <constraint> + <regex>^(all|hardware-utilization|logs|network|system|telegraf)$</regex> + </constraint> + <multi/> + </properties> + <defaultValue>all</defaultValue> + </leafNode> + <leafNode name="url"> + <properties> + <help>Remote URL [REQUIRED]</help> + <valueHelp> + <format>url</format> + <description>Remote URL to InfluxDB v2</description> + </valueHelp> + <constraint> + <regex>^(http:\/\/www\.|https:\/\/www\.|http:\/\/|https:\/\/)?[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}?(\/.*)?$</regex> + </constraint> + <constraintErrorMessage>Incorrect URL format.</constraintErrorMessage> + </properties> + </leafNode> + <leafNode name="port"> + <properties> + <help>Remote port (default: 8086)</help> + </properties> + <defaultValue>8086</defaultValue> + </leafNode> + </children> + </node> + </children> + </node> + </children> + </node> +</interfaceDefinition> diff --git a/smoketest/scripts/cli/test_service_monitoring_telegraf.py b/smoketest/scripts/cli/test_service_monitoring_telegraf.py new file mode 100755 index 000000000..b857926e2 --- /dev/null +++ b/smoketest/scripts/cli/test_service_monitoring_telegraf.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2021 VyOS maintainers and contributors +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 or later as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import unittest + +from base_vyostest_shim import VyOSUnitTestSHIM + +from vyos.configsession import ConfigSession +from vyos.configsession import ConfigSessionError +from vyos.util import process_named_running +from vyos.util import read_file + +PROCESS_NAME = 'telegraf' +TELEGRAF_CONF = '/run/telegraf/vyos-telegraf.conf' +base_path = ['service', 'monitoring', 'telegraf'] +org = 'log@in.local' +token = 'GuRJc12tIzfjnYdKRAIYbxdWd2aTpOT9PVYNddzDnFV4HkAcD7u7-kndTFXjGuXzJN6TTxmrvPODB4mnFcseDV==' +port = '8888' +url = 'https://foo.local' +bucket = 'main' +inputs = ['cpu', 'disk', 'mem', 'net', 'system', 'kernel', 'interrupts', 'syslog'] + +class TestMonitoringTelegraf(VyOSUnitTestSHIM.TestCase): + def tearDown(self): + self.cli_delete(base_path) + self.cli_commit() + + def test_01_basic_config(self): + self.cli_set(base_path + ['authentication', 'organization', org]) + self.cli_set(base_path + ['authentication', 'token', token]) + self.cli_set(base_path + ['port', port]) + self.cli_set(base_path + ['url', url]) + + # commit changes + self.cli_commit() + + # Check for running process + self.assertTrue(process_named_running(PROCESS_NAME)) + + config = read_file(TELEGRAF_CONF) + + # Check telegraf config + self.assertIn(f'organization = "{org}"', config) + self.assertIn(token, config) + self.assertIn(f'urls = ["{url}:{port}"]', config) + self.assertIn(f'bucket = "{bucket}"', config) + + for input in inputs: + self.assertIn(input, config) + +if __name__ == '__main__': + unittest.main(verbosity=2) diff --git a/src/conf_mode/service_monitoring_telegraf.py b/src/conf_mode/service_monitoring_telegraf.py new file mode 100755 index 000000000..8a972b9fe --- /dev/null +++ b/src/conf_mode/service_monitoring_telegraf.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2021-2022 VyOS maintainers and contributors +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 or later as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import os +import json + +from sys import exit +from shutil import rmtree + +from vyos.config import Config +from vyos.configdict import dict_merge +from vyos.ifconfig import Section +from vyos.template import render +from vyos.util import call +from vyos.util import chown +from vyos.util import cmd +from vyos.xml import defaults +from vyos import ConfigError +from vyos import airbag +airbag.enable() + + +base_dir = '/run/telegraf' +cache_dir = f'/etc/telegraf/.cache' +config_telegraf = f'{base_dir}/vyos-telegraf.conf' +custom_scripts_dir = '/etc/telegraf/custom_scripts' +syslog_telegraf = '/etc/rsyslog.d/50-telegraf.conf' +systemd_telegraf_service = '/etc/systemd/system/vyos-telegraf.service' +systemd_telegraf_override_dir = '/etc/systemd/system/vyos-telegraf.service.d' +systemd_override = f'{systemd_telegraf_override_dir}/10-override.conf' + + +def get_interfaces(type='', vlan=True): + """ + Get interfaces + get_interfaces() + ['dum0', 'eth0', 'eth1', 'eth1.5', 'lo', 'tun0'] + + get_interfaces("dummy") + ['dum0'] + """ + interfaces = [] + ifaces = Section.interfaces(type) + for iface in ifaces: + if vlan == False and '.' in iface: + continue + interfaces.append(iface) + + return interfaces + +def get_nft_filter_chains(): + """ + Get nft chains for table filter + """ + nft = cmd('nft --json list table ip filter') + nft = json.loads(nft) + chain_list = [] + + for output in nft['nftables']: + if 'chain' in output: + chain = output['chain']['name'] + chain_list.append(chain) + + return chain_list + + +def get_config(config=None): + + if config: + conf = config + else: + conf = Config() + base = ['service', 'monitoring', 'telegraf'] + if not conf.exists(base): + return None + + monitoring = conf.get_config_dict(base, key_mangling=('-', '_'), get_first_key=True, + no_tag_node_value_mangle=True) + + # We have gathered the dict representation of the CLI, but there are default + # options which we need to update into the dictionary retrived. + default_values = defaults(base) + monitoring = dict_merge(default_values, monitoring) + + monitoring['custom_scripts_dir'] = custom_scripts_dir + monitoring['interfaces_ethernet'] = get_interfaces('ethernet', vlan=False) + monitoring['nft_chains'] = get_nft_filter_chains() + + return monitoring + +def verify(monitoring): + # bail out early - looks like removal from running config + if not monitoring: + return None + + if 'authentication' not in monitoring or \ + 'organization' not in monitoring['authentication'] or \ + 'token' not in monitoring['authentication']: + raise ConfigError(f'Authentication "organization and token" are mandatory!') + + if 'url' not in monitoring: + raise ConfigError(f'Monitoring "url" is mandatory!') + + return None + +def generate(monitoring): + if not monitoring: + # Delete config and systemd files + config_files = [config_telegraf, systemd_telegraf_service, systemd_override, syslog_telegraf] + for file in config_files: + if os.path.isfile(file): + os.unlink(file) + + # Delete old directories + if os.path.isdir(cache_dir): + rmtree(cache_dir, ignore_errors=True) + + return None + + # Create telegraf cache dir + if not os.path.exists(cache_dir): + os.makedirs(cache_dir) + + chown(cache_dir, 'telegraf', 'telegraf') + + # Create systemd override dir + if not os.path.exists(systemd_telegraf_override_dir): + os.mkdir(systemd_telegraf_override_dir) + + # Create custome scripts dir + if not os.path.exists(custom_scripts_dir): + os.mkdir(custom_scripts_dir) + + # Render telegraf configuration and systemd override + render(config_telegraf, 'monitoring/telegraf.tmpl', monitoring) + render(systemd_telegraf_service, 'monitoring/systemd_vyos_telegraf_service.tmpl', monitoring) + render(systemd_override, 'monitoring/override.conf.tmpl', monitoring, permission=0o640) + render(syslog_telegraf, 'monitoring/syslog_telegraf.tmpl', monitoring) + + chown(base_dir, 'telegraf', 'telegraf') + + return None + +def apply(monitoring): + # Reload systemd manager configuration + call('systemctl daemon-reload') + if monitoring: + call('systemctl restart vyos-telegraf.service') + else: + call('systemctl stop vyos-telegraf.service') + # Telegraf include custom rsyslog config changes + call('systemctl restart rsyslog') + +if __name__ == '__main__': + try: + c = get_config() + verify(c) + generate(c) + apply(c) + except ConfigError as e: + print(e) + exit(1) diff --git a/src/etc/telegraf/custom_scripts/show_firewall_input_filter.py b/src/etc/telegraf/custom_scripts/show_firewall_input_filter.py new file mode 100755 index 000000000..bf4bfd05d --- /dev/null +++ b/src/etc/telegraf/custom_scripts/show_firewall_input_filter.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 + +import json +import re +import time + +from vyos.util import cmd + + +def get_nft_filter_chains(): + """ + Get list of nft chains for table filter + """ + nft = cmd('/usr/sbin/nft --json list table ip filter') + nft = json.loads(nft) + chain_list = [] + + for output in nft['nftables']: + if 'chain' in output: + chain = output['chain']['name'] + chain_list.append(chain) + + return chain_list + + +def get_nftables_details(name): + """ + Get dict, counters packets and bytes for chain + """ + command = f'/usr/sbin/nft list chain ip filter {name}' + try: + results = cmd(command) + except: + return {} + + # Trick to remove 'NAME_' from chain name in the comment + # It was added to any chain T4218 + # counter packets 0 bytes 0 return comment "FOO default-action accept" + comment_name = name.replace("NAME_", "") + out = {} + for line in results.split('\n'): + comment_search = re.search(rf'{comment_name}[\- ](\d+|default-action)', line) + if not comment_search: + continue + + rule = {} + rule_id = comment_search[1] + counter_search = re.search(r'counter packets (\d+) bytes (\d+)', line) + if counter_search: + rule['packets'] = counter_search[1] + rule['bytes'] = counter_search[2] + + rule['conditions'] = re.sub(r'(\b(counter packets \d+ bytes \d+|drop|reject|return|log)\b|comment "[\w\-]+")', '', line).strip() + out[rule_id] = rule + return out + + +def get_nft_telegraf(name): + """ + Get data for telegraf in influxDB format + """ + for rule, rule_config in get_nftables_details(name).items(): + print(f'nftables,table=filter,chain={name},' + f'ruleid={rule} ' + f'pkts={rule_config["packets"]}i,' + f'bytes={rule_config["bytes"]}i ' + f'{str(int(time.time()))}000000000') + + +chains = get_nft_filter_chains() + +for chain in chains: + get_nft_telegraf(chain) diff --git a/src/etc/telegraf/custom_scripts/show_interfaces_input_filter.py b/src/etc/telegraf/custom_scripts/show_interfaces_input_filter.py new file mode 100755 index 000000000..0c7474156 --- /dev/null +++ b/src/etc/telegraf/custom_scripts/show_interfaces_input_filter.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 + +from vyos.ifconfig import Section +from vyos.ifconfig import Interface + +import time + +def get_interfaces(type='', vlan=True): + """ + Get interfaces: + ['dum0', 'eth0', 'eth1', 'eth1.5', 'lo', 'tun0'] + """ + interfaces = [] + ifaces = Section.interfaces(type) + for iface in ifaces: + if vlan == False and '.' in iface: + continue + interfaces.append(iface) + + return interfaces + +def get_interface_addresses(iface, link_local_v6=False): + """ + Get IP and IPv6 addresses from interface in one string + By default don't get IPv6 link-local addresses + If interface doesn't have address, return "-" + """ + addresses = [] + addrs = Interface(iface).get_addr() + + for addr in addrs: + if link_local_v6 == False: + if addr.startswith('fe80::'): + continue + addresses.append(addr) + + if not addresses: + return "-" + + return (" ".join(addresses)) + +def get_interface_description(iface): + """ + Get interface description + If none return "empty" + """ + description = Interface(iface).get_alias() + + if not description: + return "empty" + + return description + +def get_interface_admin_state(iface): + """ + Interface administrative state + up => 0, down => 2 + """ + state = Interface(iface).get_admin_state() + if state == 'up': + admin_state = 0 + if state == 'down': + admin_state = 2 + + return admin_state + +def get_interface_oper_state(iface): + """ + Interface operational state + up => 0, down => 1 + """ + state = Interface(iface).operational.get_state() + if state == 'down': + oper_state = 1 + else: + oper_state = 0 + + return oper_state + +interfaces = get_interfaces() + +for iface in interfaces: + print(f'show_interfaces,interface={iface} ' + f'ip_addresses="{get_interface_addresses(iface)}",' + f'state={get_interface_admin_state(iface)}i,' + f'link={get_interface_oper_state(iface)}i,' + f'description="{get_interface_description(iface)}" ' + f'{str(int(time.time()))}000000000') diff --git a/src/etc/telegraf/custom_scripts/vyos_services_input_filter.py b/src/etc/telegraf/custom_scripts/vyos_services_input_filter.py new file mode 100755 index 000000000..df4eed131 --- /dev/null +++ b/src/etc/telegraf/custom_scripts/vyos_services_input_filter.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2021 VyOS maintainers and contributors +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 or later as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +import time +from vyos.configquery import ConfigTreeQuery +from vyos.util import is_systemd_service_running, process_named_running + +# Availible services and prouceses +# 1 - service +# 2 - process +services = { + "protocols bgp" : "bgpd", + "protocols ospf" : "ospfd", + "protocols ospfv3" : "ospf6d", + "protocols rip" : "ripd", + "protocols ripng" : "ripngd", + "protocols isis" : "isisd", + "service pppoe" : "accel-ppp@pppoe.service", + "vpn l2tp remote-access" : "accel-ppp@l2tp.service", + "vpn pptp remote-access" : "accel-ppp@pptp.service", + "vpn sstp" : "accel-ppp@sstp.service", + "vpn ipsec" : "charon" +} + +# Configured services +conf_services = { + 'zebra' : 0, + 'staticd' : 0, +} +# Get configured service and create list to check if process running +config = ConfigTreeQuery() +for service in services: + if config.exists(service): + conf_services[services[service]] = 0 + +for conf_service in conf_services: + status = 0 + if ".service" in conf_service: + # Check systemd service + if is_systemd_service_running(conf_service): + status = 1 + else: + # Check process + if process_named_running(conf_service): + status = 1 + print(f'vyos_services,service="{conf_service}" ' + f'status={str(status)}i {str(int(time.time()))}000000000') |