summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Poessinger <christian.poessinger@rohde-schwarz.com>2022-01-03 17:35:28 +0100
committerGitHub <noreply@github.com>2022-01-03 17:35:28 +0100
commitca3cd970f2973ebfc8c8784ead73bbd582d30d54 (patch)
tree8d53d23723015b51f72e4f259a65176d293fa29b
parent4743b91f4eb98bc2b4d5eee1d2f4d06e10ec032e (diff)
parent605cac35526c8dfe409891f777d50547fb94392f (diff)
downloadvyos-1x-ca3cd970f2973ebfc8c8784ead73bbd582d30d54.tar.gz
vyos-1x-ca3cd970f2973ebfc8c8784ead73bbd582d30d54.zip
Merge pull request #1018 from sever-sever/T3872
monitoring: T3872: Add a new feature service monitoring
-rw-r--r--data/templates/monitoring/override.conf.tmpl7
-rw-r--r--data/templates/monitoring/syslog_telegraf.tmpl5
-rw-r--r--data/templates/monitoring/systemd_vyos_telegraf_service.tmpl16
-rw-r--r--data/templates/monitoring/telegraf.tmpl63
-rw-r--r--debian/control1
-rw-r--r--debian/vyos-1x.install1
-rw-r--r--interface-definitions/service_monitoring_telegraf.xml.in113
-rwxr-xr-xsmoketest/scripts/cli/test_service_monitoring_telegraf.py65
-rwxr-xr-xsrc/conf_mode/service_monitoring_telegraf.py154
-rwxr-xr-xsrc/etc/telegraf/custom_scripts/show_interfaces_input_filter.py47
-rwxr-xr-xsrc/etc/telegraf/custom_scripts/vyos_services_input_filter.py61
11 files changed, 533 insertions, 0 deletions
diff --git a/data/templates/monitoring/override.conf.tmpl b/data/templates/monitoring/override.conf.tmpl
new file mode 100644
index 000000000..63f6d7391
--- /dev/null
+++ b/data/templates/monitoring/override.conf.tmpl
@@ -0,0 +1,7 @@
+[Unit]
+After=vyos-router.service
+ConditionPathExists=/run/telegraf/vyos-telegraf.conf
+[Service]
+Environment=INFLUX_TOKEN={{ authentication.token }}
+CapabilityBoundingSet=CAP_NET_RAW CAP_NET_ADMIN
+AmbientCapabilities=CAP_NET_RAW CAP_NET_ADMIN
diff --git a/data/templates/monitoring/syslog_telegraf.tmpl b/data/templates/monitoring/syslog_telegraf.tmpl
new file mode 100644
index 000000000..cdcbd92a4
--- /dev/null
+++ b/data/templates/monitoring/syslog_telegraf.tmpl
@@ -0,0 +1,5 @@
+# Generated by /usr/libexec/vyos/conf_mode/service_monitoring_telegraf.py
+
+$ModLoad omuxsock
+$OMUxSockSocket /run/telegraf/telegraf_syslog.sock
+*.notice :omuxsock:
diff --git a/data/templates/monitoring/systemd_vyos_telegraf_service.tmpl b/data/templates/monitoring/systemd_vyos_telegraf_service.tmpl
new file mode 100644
index 000000000..234ef5586
--- /dev/null
+++ b/data/templates/monitoring/systemd_vyos_telegraf_service.tmpl
@@ -0,0 +1,16 @@
+[Unit]
+Description=The plugin-driven server agent for reporting metrics into InfluxDB
+Documentation=https://github.com/influxdata/telegraf
+After=network.target
+
+[Service]
+EnvironmentFile=-/etc/default/telegraf
+User=telegraf
+ExecStart=/usr/bin/telegraf -config /run/telegraf/vyos-telegraf.conf -config-directory /etc/telegraf/telegraf.d $TELEGRAF_OPTS
+ExecReload=/bin/kill -HUP $MAINPID
+Restart=on-failure
+RestartForceExitStatus=SIGPIPE
+KillMode=control-group
+
+[Install]
+WantedBy=multi-user.target
diff --git a/data/templates/monitoring/telegraf.tmpl b/data/templates/monitoring/telegraf.tmpl
new file mode 100644
index 000000000..62fa4df7a
--- /dev/null
+++ b/data/templates/monitoring/telegraf.tmpl
@@ -0,0 +1,63 @@
+# Generated by /usr/libexec/vyos/conf_mode/service_monitoring_telegraf.py
+
+[agent]
+ interval = "10s"
+ round_interval = true
+ metric_batch_size = 1000
+ metric_buffer_limit = 10000
+ collection_jitter = "0s"
+ flush_interval = "10s"
+ flush_jitter = "0s"
+ precision = ""
+ debug = false
+ quiet = false
+ logfile = ""
+ hostname = ""
+ omit_hostname = false
+[[outputs.influxdb_v2]]
+ urls = ["{{ url }}:{{ port }}"]
+ insecure_skip_verify = true
+ token = "{{ authentication.token }}"
+ organization = "{{ authentication.organization }}"
+ bucket = "{{ bucket }}"
+[[inputs.cpu]]
+ percpu = true
+ totalcpu = true
+ collect_cpu_time = false
+ report_active = false
+[[inputs.disk]]
+ ignore_fs = ["devtmpfs", "devfs"]
+[[inputs.diskio]]
+[[inputs.mem]]
+[[inputs.net]]
+[[inputs.system]]
+[[inputs.netstat]]
+[[inputs.processes]]
+[[inputs.kernel]]
+[[inputs.interrupts]]
+[[inputs.linux_sysctl_fs]]
+[[inputs.systemd_units]]
+[[inputs.conntrack]]
+ files = ["ip_conntrack_count","ip_conntrack_max","nf_conntrack_count","nf_conntrack_max"]
+ dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"]
+[[inputs.ethtool]]
+[[inputs.iptables]]
+ use_sudo = false
+ table = "filter"
+ chains = {{ nft_chains }}
+ use_lock = true
+[[inputs.ntpq]]
+ dns_lookup = true
+[[inputs.internal]]
+[[inputs.nstat]]
+[[inputs.syslog]]
+ server = "unixgram:///run/telegraf/telegraf_syslog.sock"
+ best_effort = true
+ syslog_standard = "RFC3164"
+[[inputs.exec]]
+ commands = [
+ "{{ custom_scripts_dir }}/show_interfaces_input_filter.py",
+ "cat /tmp/vyos_services_input_filter"
+ ]
+ timeout = "10s"
+ data_format = "influx"
diff --git a/debian/control b/debian/control
index ef124679b..3d33a48a6 100644
--- a/debian/control
+++ b/debian/control
@@ -151,6 +151,7 @@ Depends:
strongswan-swanctl (>= 5.9),
sudo,
systemd,
+ telegraf (>= 1.20),
tcpdump,
tcptraceroute,
telnet,
diff --git a/debian/vyos-1x.install b/debian/vyos-1x.install
index 29d74390f..63dff43a5 100644
--- a/debian/vyos-1x.install
+++ b/debian/vyos-1x.install
@@ -12,6 +12,7 @@ etc/security
etc/sudoers.d
etc/systemd
etc/sysctl.d
+etc/telegraf
etc/udev
etc/update-motd.d
etc/vyos
diff --git a/interface-definitions/service_monitoring_telegraf.xml.in b/interface-definitions/service_monitoring_telegraf.xml.in
new file mode 100644
index 000000000..0db9052ff
--- /dev/null
+++ b/interface-definitions/service_monitoring_telegraf.xml.in
@@ -0,0 +1,113 @@
+<?xml version="1.0"?>
+<interfaceDefinition>
+ <node name="service">
+ <children>
+ <node name="monitoring">
+ <properties>
+ <help>Monitoring services</help>
+ <priority>1280</priority>
+ </properties>
+ <children>
+ <node name="telegraf" owner="${vyos_conf_scripts_dir}/service_monitoring_telegraf.py">
+ <properties>
+ <help>Telegraf monitoring</help>
+ </properties>
+ <children>
+ <node name="authentication">
+ <properties>
+ <help>Authentication parameters</help>
+ </properties>
+ <children>
+ <leafNode name="organization">
+ <properties>
+ <help>Authentication organization for InfluxDB v2 [REQUIRED]</help>
+ <constraint>
+ <regex>^[a-zA-Z][1-9a-zA-Z@_\-.]{2,50}$</regex>
+ </constraint>
+ <constraintErrorMessage>Organization name must be alphanumeric and can contain hyphens, underscores and at symbol.</constraintErrorMessage>
+ </properties>
+ </leafNode>
+ <leafNode name="token">
+ <properties>
+ <help>Authentication token for InfluxDB v2 [REQUIRED]</help>
+ <valueHelp>
+ <format>txt</format>
+ <description>Authentication token</description>
+ </valueHelp>
+ <constraint>
+ <regex>^[a-zA-Z0-9-_]{86}==$</regex>
+ </constraint>
+ <constraintErrorMessage>Token must be 88 characters long and must contain only [a-zA-Z0-9-_] and '==' characters.</constraintErrorMessage>
+ </properties>
+ </leafNode>
+ </children>
+ </node>
+ <leafNode name="bucket">
+ <properties>
+ <help>Remote bucket, by default (main)</help>
+ </properties>
+ <defaultValue>main</defaultValue>
+ </leafNode>
+ <leafNode name="source">
+ <properties>
+ <help>Source parameters for monitoring (default: all)</help>
+ <completionHelp>
+ <list>all hardware-utilization logs network system telegraf</list>
+ </completionHelp>
+ <valueHelp>
+ <format>all</format>
+ <description>All parameters (default)</description>
+ </valueHelp>
+ <valueHelp>
+ <format>hardware-utilization</format>
+ <description>Hardware-utilization parameters (CPU, disk, memory)</description>
+ </valueHelp>
+ <valueHelp>
+ <format>logs</format>
+ <description>Logs parameters</description>
+ </valueHelp>
+ <valueHelp>
+ <format>network</format>
+ <description>Network parameters (net, netstat, nftables)</description>
+ </valueHelp>
+ <valueHelp>
+ <format>system</format>
+ <description>System parameters (system, processes, interrupts)</description>
+ </valueHelp>
+ <valueHelp>
+ <format>telegraf</format>
+ <description>Telegraf internal statistics</description>
+ </valueHelp>
+ <constraint>
+ <regex>^(all|hardware-utilization|logs|network|system|telegraf)$</regex>
+ </constraint>
+ <multi/>
+ </properties>
+ <defaultValue>all</defaultValue>
+ </leafNode>
+ <leafNode name="url">
+ <properties>
+ <help>Remote URL [REQUIRED]</help>
+ <valueHelp>
+ <format>url</format>
+ <description>Remote URL to InfluxDB v2</description>
+ </valueHelp>
+ <constraint>
+ <regex>^(http:\/\/www\.|https:\/\/www\.|http:\/\/|https:\/\/)?[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}?(\/.*)?$</regex>
+ </constraint>
+ <constraintErrorMessage>Incorrect URL format.</constraintErrorMessage>
+ </properties>
+ </leafNode>
+ <leafNode name="port">
+ <properties>
+ <help>Remote port (default: 8086)</help>
+ </properties>
+ <defaultValue>8086</defaultValue>
+ </leafNode>
+ </children>
+ </node>
+ </children>
+ </node>
+ </children>
+ </node>
+</interfaceDefinition>
diff --git a/smoketest/scripts/cli/test_service_monitoring_telegraf.py b/smoketest/scripts/cli/test_service_monitoring_telegraf.py
new file mode 100755
index 000000000..b857926e2
--- /dev/null
+++ b/smoketest/scripts/cli/test_service_monitoring_telegraf.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2021 VyOS maintainers and contributors
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 or later as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import unittest
+
+from base_vyostest_shim import VyOSUnitTestSHIM
+
+from vyos.configsession import ConfigSession
+from vyos.configsession import ConfigSessionError
+from vyos.util import process_named_running
+from vyos.util import read_file
+
+PROCESS_NAME = 'telegraf'
+TELEGRAF_CONF = '/run/telegraf/vyos-telegraf.conf'
+base_path = ['service', 'monitoring', 'telegraf']
+org = 'log@in.local'
+token = 'GuRJc12tIzfjnYdKRAIYbxdWd2aTpOT9PVYNddzDnFV4HkAcD7u7-kndTFXjGuXzJN6TTxmrvPODB4mnFcseDV=='
+port = '8888'
+url = 'https://foo.local'
+bucket = 'main'
+inputs = ['cpu', 'disk', 'mem', 'net', 'system', 'kernel', 'interrupts', 'syslog']
+
+class TestMonitoringTelegraf(VyOSUnitTestSHIM.TestCase):
+ def tearDown(self):
+ self.cli_delete(base_path)
+ self.cli_commit()
+
+ def test_01_basic_config(self):
+ self.cli_set(base_path + ['authentication', 'organization', org])
+ self.cli_set(base_path + ['authentication', 'token', token])
+ self.cli_set(base_path + ['port', port])
+ self.cli_set(base_path + ['url', url])
+
+ # commit changes
+ self.cli_commit()
+
+ # Check for running process
+ self.assertTrue(process_named_running(PROCESS_NAME))
+
+ config = read_file(TELEGRAF_CONF)
+
+ # Check telegraf config
+ self.assertIn(f'organization = "{org}"', config)
+ self.assertIn(token, config)
+ self.assertIn(f'urls = ["{url}:{port}"]', config)
+ self.assertIn(f'bucket = "{bucket}"', config)
+
+ for input in inputs:
+ self.assertIn(input, config)
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/src/conf_mode/service_monitoring_telegraf.py b/src/conf_mode/service_monitoring_telegraf.py
new file mode 100755
index 000000000..a1e7a7286
--- /dev/null
+++ b/src/conf_mode/service_monitoring_telegraf.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2021 VyOS maintainers and contributors
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 or later as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import os
+import json
+
+from sys import exit
+from shutil import rmtree
+
+from vyos.config import Config
+from vyos.configdict import dict_merge
+from vyos.template import render
+from vyos.util import call
+from vyos.util import chown
+from vyos.util import cmd
+from vyos.xml import defaults
+from vyos import ConfigError
+from vyos import airbag
+airbag.enable()
+
+
+base_dir = '/run/telegraf'
+cache_dir = f'/etc/telegraf/.cache'
+config_telegraf = f'{base_dir}/vyos-telegraf.conf'
+custom_scripts_dir = '/etc/telegraf/custom_scripts'
+syslog_telegraf = '/etc/rsyslog.d/50-telegraf.conf'
+systemd_telegraf_service = '/etc/systemd/system/vyos-telegraf.service'
+systemd_telegraf_override_dir = '/etc/systemd/system/vyos-telegraf.service.d'
+systemd_override = f'{systemd_telegraf_override_dir}/10-override.conf'
+
+
+def get_nft_filter_chains():
+ """
+ Get nft chains for table filter
+ """
+ nft = cmd('nft --json list table ip filter')
+ nft = json.loads(nft)
+ chain_list = []
+
+ for output in nft['nftables']:
+ if 'chain' in output:
+ chain = output['chain']['name']
+ chain_list.append(chain)
+
+ return chain_list
+
+def get_config(config=None):
+
+ if config:
+ conf = config
+ else:
+ conf = Config()
+ base = ['service', 'monitoring', 'telegraf']
+ if not conf.exists(base):
+ return None
+
+ monitoring = conf.get_config_dict(base, key_mangling=('-', '_'), get_first_key=True,
+ no_tag_node_value_mangle=True)
+
+ # We have gathered the dict representation of the CLI, but there are default
+ # options which we need to update into the dictionary retrived.
+ default_values = defaults(base)
+ monitoring = dict_merge(default_values, monitoring)
+
+ monitoring['nft_chains'] = get_nft_filter_chains()
+ monitoring['custom_scripts_dir'] = custom_scripts_dir
+
+ return monitoring
+
+def verify(monitoring):
+ # bail out early - looks like removal from running config
+ if not monitoring:
+ return None
+
+ if 'authentication' not in monitoring or \
+ 'organization' not in monitoring['authentication'] or \
+ 'token' not in monitoring['authentication']:
+ raise ConfigError(f'Authentication "organization and token" are mandatory!')
+
+ if 'url' not in monitoring:
+ raise ConfigError(f'Monitoring "url" is mandatory!')
+
+ return None
+
+def generate(monitoring):
+ if not monitoring:
+ # Delete config and systemd files
+ config_files = [config_telegraf, systemd_telegraf_service, systemd_override, syslog_telegraf]
+ for file in config_files:
+ if os.path.isfile(file):
+ os.unlink(file)
+
+ # Delete old directories
+ if os.path.isdir(cache_dir):
+ rmtree(cache_dir, ignore_errors=True)
+
+ return None
+
+ # Create telegraf cache dir
+ if not os.path.exists(cache_dir):
+ os.makedirs(cache_dir)
+
+ chown(cache_dir, 'telegraf', 'telegraf')
+
+ # Create systemd override dir
+ if not os.path.exists(systemd_telegraf_override_dir):
+ os.mkdir(systemd_telegraf_override_dir)
+
+ # Create custome scripts dir
+ if not os.path.exists(custom_scripts_dir):
+ os.mkdir(custom_scripts_dir)
+
+ # Render telegraf configuration and systemd override
+ render(config_telegraf, 'monitoring/telegraf.tmpl', monitoring)
+ render(systemd_telegraf_service, 'monitoring/systemd_vyos_telegraf_service.tmpl', monitoring)
+ render(systemd_override, 'monitoring/override.conf.tmpl', monitoring, permission=0o640)
+ render(syslog_telegraf, 'monitoring/syslog_telegraf.tmpl', monitoring)
+
+ chown(base_dir, 'telegraf', 'telegraf')
+
+ return None
+
+def apply(monitoring):
+ # Reload systemd manager configuration
+ call('systemctl daemon-reload')
+ if monitoring:
+ call('systemctl restart vyos-telegraf.service')
+ else:
+ call('systemctl stop vyos-telegraf.service')
+ # Telegraf include custom rsyslog config changes
+ call('systemctl restart rsyslog')
+
+if __name__ == '__main__':
+ try:
+ c = get_config()
+ verify(c)
+ generate(c)
+ apply(c)
+ except ConfigError as e:
+ print(e)
+ exit(1)
diff --git a/src/etc/telegraf/custom_scripts/show_interfaces_input_filter.py b/src/etc/telegraf/custom_scripts/show_interfaces_input_filter.py
new file mode 100755
index 000000000..0f5e366cd
--- /dev/null
+++ b/src/etc/telegraf/custom_scripts/show_interfaces_input_filter.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+
+import subprocess
+import time
+
+def status_to_int(status):
+ switcher={
+ 'u':'0',
+ 'D':'1',
+ 'A':'2'
+ }
+ return switcher.get(status,"")
+
+def description_check(line):
+ desc=" ".join(line[3:])
+ if desc == "":
+ return "empty"
+ else:
+ return desc
+
+def gen_ip_list(index,interfaces):
+ line=interfaces[index].split()
+ ip_list=line[1]
+ if index < len(interfaces):
+ index += 1
+ while len(interfaces[index].split())==1:
+ ip = interfaces[index].split()
+ ip_list = ip_list + " " + ip[0]
+ index += 1
+ if index == len(interfaces):
+ break
+ return ip_list
+
+interfaces = subprocess.check_output("/usr/libexec/vyos/op_mode/show_interfaces.py --action=show-brief", shell=True).decode('utf-8').splitlines()
+del interfaces[:3]
+lines_count=len(interfaces)
+index=0
+while index<lines_count:
+ line=interfaces[index].split()
+ if len(line)>1:
+ print(f'show_interfaces,interface={line[0]} '
+ f'ip_addresses="{gen_ip_list(index,interfaces)}",'
+ f'state={status_to_int(line[2][0])}i,'
+ f'link={status_to_int(line[2][2])}i,'
+ f'description="{description_check(line)}" '
+ f'{str(int(time.time()))}000000000')
+ index += 1
diff --git a/src/etc/telegraf/custom_scripts/vyos_services_input_filter.py b/src/etc/telegraf/custom_scripts/vyos_services_input_filter.py
new file mode 100755
index 000000000..df4eed131
--- /dev/null
+++ b/src/etc/telegraf/custom_scripts/vyos_services_input_filter.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2021 VyOS maintainers and contributors
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 or later as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+import time
+from vyos.configquery import ConfigTreeQuery
+from vyos.util import is_systemd_service_running, process_named_running
+
+# Availible services and prouceses
+# 1 - service
+# 2 - process
+services = {
+ "protocols bgp" : "bgpd",
+ "protocols ospf" : "ospfd",
+ "protocols ospfv3" : "ospf6d",
+ "protocols rip" : "ripd",
+ "protocols ripng" : "ripngd",
+ "protocols isis" : "isisd",
+ "service pppoe" : "accel-ppp@pppoe.service",
+ "vpn l2tp remote-access" : "accel-ppp@l2tp.service",
+ "vpn pptp remote-access" : "accel-ppp@pptp.service",
+ "vpn sstp" : "accel-ppp@sstp.service",
+ "vpn ipsec" : "charon"
+}
+
+# Configured services
+conf_services = {
+ 'zebra' : 0,
+ 'staticd' : 0,
+}
+# Get configured service and create list to check if process running
+config = ConfigTreeQuery()
+for service in services:
+ if config.exists(service):
+ conf_services[services[service]] = 0
+
+for conf_service in conf_services:
+ status = 0
+ if ".service" in conf_service:
+ # Check systemd service
+ if is_systemd_service_running(conf_service):
+ status = 1
+ else:
+ # Check process
+ if process_named_running(conf_service):
+ status = 1
+ print(f'vyos_services,service="{conf_service}" '
+ f'status={str(status)}i {str(int(time.time()))}000000000')