diff options
| author | Christian Poessinger <christian.poessinger@rohde-schwarz.com> | 2022-01-03 17:35:28 +0100 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-01-03 17:35:28 +0100 | 
| commit | ca3cd970f2973ebfc8c8784ead73bbd582d30d54 (patch) | |
| tree | 8d53d23723015b51f72e4f259a65176d293fa29b | |
| parent | 4743b91f4eb98bc2b4d5eee1d2f4d06e10ec032e (diff) | |
| parent | 605cac35526c8dfe409891f777d50547fb94392f (diff) | |
| download | vyos-1x-ca3cd970f2973ebfc8c8784ead73bbd582d30d54.tar.gz vyos-1x-ca3cd970f2973ebfc8c8784ead73bbd582d30d54.zip | |
Merge pull request #1018 from sever-sever/T3872
monitoring: T3872: Add a new feature service monitoring
| -rw-r--r-- | data/templates/monitoring/override.conf.tmpl | 7 | ||||
| -rw-r--r-- | data/templates/monitoring/syslog_telegraf.tmpl | 5 | ||||
| -rw-r--r-- | data/templates/monitoring/systemd_vyos_telegraf_service.tmpl | 16 | ||||
| -rw-r--r-- | data/templates/monitoring/telegraf.tmpl | 63 | ||||
| -rw-r--r-- | debian/control | 1 | ||||
| -rw-r--r-- | debian/vyos-1x.install | 1 | ||||
| -rw-r--r-- | interface-definitions/service_monitoring_telegraf.xml.in | 113 | ||||
| -rwxr-xr-x | smoketest/scripts/cli/test_service_monitoring_telegraf.py | 65 | ||||
| -rwxr-xr-x | src/conf_mode/service_monitoring_telegraf.py | 154 | ||||
| -rwxr-xr-x | src/etc/telegraf/custom_scripts/show_interfaces_input_filter.py | 47 | ||||
| -rwxr-xr-x | src/etc/telegraf/custom_scripts/vyos_services_input_filter.py | 61 | 
11 files changed, 533 insertions, 0 deletions
| diff --git a/data/templates/monitoring/override.conf.tmpl b/data/templates/monitoring/override.conf.tmpl new file mode 100644 index 000000000..63f6d7391 --- /dev/null +++ b/data/templates/monitoring/override.conf.tmpl @@ -0,0 +1,7 @@ +[Unit] +After=vyos-router.service +ConditionPathExists=/run/telegraf/vyos-telegraf.conf +[Service] +Environment=INFLUX_TOKEN={{ authentication.token }} +CapabilityBoundingSet=CAP_NET_RAW CAP_NET_ADMIN +AmbientCapabilities=CAP_NET_RAW CAP_NET_ADMIN diff --git a/data/templates/monitoring/syslog_telegraf.tmpl b/data/templates/monitoring/syslog_telegraf.tmpl new file mode 100644 index 000000000..cdcbd92a4 --- /dev/null +++ b/data/templates/monitoring/syslog_telegraf.tmpl @@ -0,0 +1,5 @@ +# Generated by /usr/libexec/vyos/conf_mode/service_monitoring_telegraf.py + +$ModLoad omuxsock +$OMUxSockSocket /run/telegraf/telegraf_syslog.sock +*.notice :omuxsock: diff --git a/data/templates/monitoring/systemd_vyos_telegraf_service.tmpl b/data/templates/monitoring/systemd_vyos_telegraf_service.tmpl new file mode 100644 index 000000000..234ef5586 --- /dev/null +++ b/data/templates/monitoring/systemd_vyos_telegraf_service.tmpl @@ -0,0 +1,16 @@ +[Unit] +Description=The plugin-driven server agent for reporting metrics into InfluxDB +Documentation=https://github.com/influxdata/telegraf +After=network.target + +[Service] +EnvironmentFile=-/etc/default/telegraf +User=telegraf +ExecStart=/usr/bin/telegraf -config /run/telegraf/vyos-telegraf.conf -config-directory /etc/telegraf/telegraf.d $TELEGRAF_OPTS +ExecReload=/bin/kill -HUP $MAINPID +Restart=on-failure +RestartForceExitStatus=SIGPIPE +KillMode=control-group + +[Install] +WantedBy=multi-user.target diff --git a/data/templates/monitoring/telegraf.tmpl b/data/templates/monitoring/telegraf.tmpl new file mode 100644 index 000000000..62fa4df7a --- /dev/null +++ b/data/templates/monitoring/telegraf.tmpl @@ -0,0 +1,63 @@ +# Generated by /usr/libexec/vyos/conf_mode/service_monitoring_telegraf.py + +[agent] +  interval = "10s" +  round_interval = true +  metric_batch_size = 1000 +  metric_buffer_limit = 10000 +  collection_jitter = "0s" +  flush_interval = "10s" +  flush_jitter = "0s" +  precision = "" +  debug = false +  quiet = false +  logfile = "" +  hostname = "" +  omit_hostname = false +[[outputs.influxdb_v2]] +  urls = ["{{ url }}:{{ port }}"] +  insecure_skip_verify = true +  token = "{{ authentication.token }}" +  organization = "{{ authentication.organization }}" +  bucket = "{{ bucket }}" +[[inputs.cpu]] +    percpu = true +    totalcpu = true +    collect_cpu_time = false +    report_active = false +[[inputs.disk]] +    ignore_fs = ["devtmpfs", "devfs"] +[[inputs.diskio]] +[[inputs.mem]] +[[inputs.net]] +[[inputs.system]] +[[inputs.netstat]] +[[inputs.processes]] +[[inputs.kernel]] +[[inputs.interrupts]] +[[inputs.linux_sysctl_fs]] +[[inputs.systemd_units]] +[[inputs.conntrack]] +  files = ["ip_conntrack_count","ip_conntrack_max","nf_conntrack_count","nf_conntrack_max"] +  dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"] +[[inputs.ethtool]] +[[inputs.iptables]] +  use_sudo = false +  table = "filter" +  chains = {{ nft_chains }} +  use_lock = true +[[inputs.ntpq]] +  dns_lookup = true +[[inputs.internal]] +[[inputs.nstat]] +[[inputs.syslog]] +  server = "unixgram:///run/telegraf/telegraf_syslog.sock" +  best_effort = true +  syslog_standard = "RFC3164" +[[inputs.exec]] +  commands = [ +    "{{ custom_scripts_dir }}/show_interfaces_input_filter.py", +    "cat /tmp/vyos_services_input_filter" +  ] +  timeout = "10s" +  data_format = "influx" diff --git a/debian/control b/debian/control index ef124679b..3d33a48a6 100644 --- a/debian/control +++ b/debian/control @@ -151,6 +151,7 @@ Depends:    strongswan-swanctl (>= 5.9),    sudo,    systemd, +  telegraf (>= 1.20),    tcpdump,    tcptraceroute,    telnet, diff --git a/debian/vyos-1x.install b/debian/vyos-1x.install index 29d74390f..63dff43a5 100644 --- a/debian/vyos-1x.install +++ b/debian/vyos-1x.install @@ -12,6 +12,7 @@ etc/security  etc/sudoers.d  etc/systemd  etc/sysctl.d +etc/telegraf  etc/udev  etc/update-motd.d  etc/vyos diff --git a/interface-definitions/service_monitoring_telegraf.xml.in b/interface-definitions/service_monitoring_telegraf.xml.in new file mode 100644 index 000000000..0db9052ff --- /dev/null +++ b/interface-definitions/service_monitoring_telegraf.xml.in @@ -0,0 +1,113 @@ +<?xml version="1.0"?> +<interfaceDefinition> +  <node name="service"> +    <children> +      <node name="monitoring"> +        <properties> +          <help>Monitoring services</help> +          <priority>1280</priority> +        </properties> +        <children> +          <node name="telegraf" owner="${vyos_conf_scripts_dir}/service_monitoring_telegraf.py"> +            <properties> +              <help>Telegraf monitoring</help> +            </properties> +            <children> +              <node name="authentication"> +                <properties> +                  <help>Authentication parameters</help> +                </properties> +                <children> +                  <leafNode name="organization"> +                    <properties> +                      <help>Authentication organization for InfluxDB v2 [REQUIRED]</help> +                      <constraint> +                        <regex>^[a-zA-Z][1-9a-zA-Z@_\-.]{2,50}$</regex> +                      </constraint> +                      <constraintErrorMessage>Organization name must be alphanumeric and can contain hyphens, underscores and at symbol.</constraintErrorMessage> +                    </properties> +                  </leafNode> +                  <leafNode name="token"> +                    <properties> +                      <help>Authentication token for InfluxDB v2 [REQUIRED]</help> +                      <valueHelp> +                        <format>txt</format> +                        <description>Authentication token</description> +                      </valueHelp> +                      <constraint> +                        <regex>^[a-zA-Z0-9-_]{86}==$</regex> +                      </constraint> +                      <constraintErrorMessage>Token must be 88 characters long and must contain only [a-zA-Z0-9-_] and '==' characters.</constraintErrorMessage> +                    </properties> +                  </leafNode> +                </children> +              </node> +              <leafNode name="bucket"> +                <properties> +                  <help>Remote bucket, by default (main)</help> +                </properties> +                <defaultValue>main</defaultValue> +              </leafNode> +              <leafNode name="source"> +                <properties> +                  <help>Source parameters for monitoring (default: all)</help> +                  <completionHelp> +                    <list>all hardware-utilization logs network system telegraf</list> +                  </completionHelp> +                  <valueHelp> +                    <format>all</format> +                    <description>All parameters (default)</description> +                  </valueHelp> +                  <valueHelp> +                    <format>hardware-utilization</format> +                    <description>Hardware-utilization parameters (CPU, disk, memory)</description> +                  </valueHelp> +                  <valueHelp> +                    <format>logs</format> +                    <description>Logs parameters</description> +                  </valueHelp> +                  <valueHelp> +                    <format>network</format> +                    <description>Network parameters (net, netstat, nftables)</description> +                  </valueHelp> +                  <valueHelp> +                    <format>system</format> +                    <description>System parameters (system, processes, interrupts)</description> +                  </valueHelp> +                  <valueHelp> +                    <format>telegraf</format> +                    <description>Telegraf internal statistics</description> +                  </valueHelp> +                  <constraint> +                    <regex>^(all|hardware-utilization|logs|network|system|telegraf)$</regex> +                  </constraint> +                  <multi/> +                </properties> +                <defaultValue>all</defaultValue> +              </leafNode> +              <leafNode name="url"> +                <properties> +                  <help>Remote URL [REQUIRED]</help> +                  <valueHelp> +                    <format>url</format> +                    <description>Remote URL to InfluxDB v2</description> +                  </valueHelp> +                  <constraint> +                    <regex>^(http:\/\/www\.|https:\/\/www\.|http:\/\/|https:\/\/)?[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}?(\/.*)?$</regex> +                  </constraint> +                  <constraintErrorMessage>Incorrect URL format.</constraintErrorMessage> +                </properties> +              </leafNode> +              <leafNode name="port"> +                <properties> +                  <help>Remote port (default: 8086)</help> +                </properties> +                <defaultValue>8086</defaultValue> +              </leafNode> +            </children> +          </node> +        </children> +      </node> +    </children> +  </node> +</interfaceDefinition> diff --git a/smoketest/scripts/cli/test_service_monitoring_telegraf.py b/smoketest/scripts/cli/test_service_monitoring_telegraf.py new file mode 100755 index 000000000..b857926e2 --- /dev/null +++ b/smoketest/scripts/cli/test_service_monitoring_telegraf.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2021 VyOS maintainers and contributors +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 or later as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. + +import unittest + +from base_vyostest_shim import VyOSUnitTestSHIM + +from vyos.configsession import ConfigSession +from vyos.configsession import ConfigSessionError +from vyos.util import process_named_running +from vyos.util import read_file + +PROCESS_NAME = 'telegraf' +TELEGRAF_CONF = '/run/telegraf/vyos-telegraf.conf' +base_path = ['service', 'monitoring', 'telegraf'] +org = 'log@in.local' +token = 'GuRJc12tIzfjnYdKRAIYbxdWd2aTpOT9PVYNddzDnFV4HkAcD7u7-kndTFXjGuXzJN6TTxmrvPODB4mnFcseDV==' +port = '8888' +url = 'https://foo.local' +bucket = 'main' +inputs = ['cpu', 'disk', 'mem', 'net', 'system', 'kernel', 'interrupts', 'syslog'] + +class TestMonitoringTelegraf(VyOSUnitTestSHIM.TestCase): +    def tearDown(self): +        self.cli_delete(base_path) +        self.cli_commit() + +    def test_01_basic_config(self): +        self.cli_set(base_path + ['authentication', 'organization', org]) +        self.cli_set(base_path + ['authentication', 'token', token]) +        self.cli_set(base_path + ['port', port]) +        self.cli_set(base_path + ['url', url]) + +        # commit changes +        self.cli_commit() + +        # Check for running process +        self.assertTrue(process_named_running(PROCESS_NAME)) + +        config = read_file(TELEGRAF_CONF) + +        # Check telegraf config +        self.assertIn(f'organization = "{org}"', config) +        self.assertIn(token, config) +        self.assertIn(f'urls = ["{url}:{port}"]', config) +        self.assertIn(f'bucket = "{bucket}"', config) + +        for input in inputs: +            self.assertIn(input, config) + +if __name__ == '__main__': +    unittest.main(verbosity=2) diff --git a/src/conf_mode/service_monitoring_telegraf.py b/src/conf_mode/service_monitoring_telegraf.py new file mode 100755 index 000000000..a1e7a7286 --- /dev/null +++ b/src/conf_mode/service_monitoring_telegraf.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2021 VyOS maintainers and contributors +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 or later as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. + +import os +import json + +from sys import exit +from shutil import rmtree + +from vyos.config import Config +from vyos.configdict import dict_merge +from vyos.template import render +from vyos.util import call +from vyos.util import chown +from vyos.util import cmd +from vyos.xml import defaults +from vyos import ConfigError +from vyos import airbag +airbag.enable() + + +base_dir = '/run/telegraf' +cache_dir = f'/etc/telegraf/.cache' +config_telegraf = f'{base_dir}/vyos-telegraf.conf' +custom_scripts_dir = '/etc/telegraf/custom_scripts' +syslog_telegraf = '/etc/rsyslog.d/50-telegraf.conf' +systemd_telegraf_service = '/etc/systemd/system/vyos-telegraf.service' +systemd_telegraf_override_dir = '/etc/systemd/system/vyos-telegraf.service.d' +systemd_override = f'{systemd_telegraf_override_dir}/10-override.conf' + + +def get_nft_filter_chains(): +    """ +    Get nft chains for table filter +    """ +    nft = cmd('nft --json list table ip filter') +    nft = json.loads(nft) +    chain_list = [] + +    for output in nft['nftables']: +        if 'chain' in output: +            chain = output['chain']['name'] +            chain_list.append(chain) + +    return chain_list + +def get_config(config=None): + +    if config: +        conf = config +    else: +        conf = Config() +    base = ['service', 'monitoring', 'telegraf'] +    if not conf.exists(base): +        return None + +    monitoring = conf.get_config_dict(base, key_mangling=('-', '_'), get_first_key=True, +                                    no_tag_node_value_mangle=True) + +    # We have gathered the dict representation of the CLI, but there are default +    # options which we need to update into the dictionary retrived. +    default_values = defaults(base) +    monitoring = dict_merge(default_values, monitoring) + +    monitoring['nft_chains'] = get_nft_filter_chains() +    monitoring['custom_scripts_dir'] = custom_scripts_dir + +    return monitoring + +def verify(monitoring): +    # bail out early - looks like removal from running config +    if not monitoring: +        return None + +    if 'authentication' not in monitoring or \ +       'organization' not in monitoring['authentication'] or \ +       'token' not in monitoring['authentication']: +        raise ConfigError(f'Authentication "organization and token" are mandatory!') + +    if 'url' not in monitoring: +        raise ConfigError(f'Monitoring "url" is mandatory!') + +    return None + +def generate(monitoring): +    if not monitoring: +        # Delete config and systemd files +        config_files = [config_telegraf, systemd_telegraf_service, systemd_override, syslog_telegraf] +        for file in config_files: +            if os.path.isfile(file): +                os.unlink(file) + +        # Delete old directories +        if os.path.isdir(cache_dir): +            rmtree(cache_dir, ignore_errors=True) + +        return None + +    # Create telegraf cache dir +    if not os.path.exists(cache_dir): +        os.makedirs(cache_dir) + +    chown(cache_dir, 'telegraf', 'telegraf') + +    # Create systemd override dir +    if not os.path.exists(systemd_telegraf_override_dir): +        os.mkdir(systemd_telegraf_override_dir) + +    # Create custome scripts dir +    if not os.path.exists(custom_scripts_dir): +        os.mkdir(custom_scripts_dir) + +    # Render telegraf configuration and systemd override +    render(config_telegraf, 'monitoring/telegraf.tmpl', monitoring) +    render(systemd_telegraf_service, 'monitoring/systemd_vyos_telegraf_service.tmpl', monitoring) +    render(systemd_override, 'monitoring/override.conf.tmpl', monitoring, permission=0o640) +    render(syslog_telegraf, 'monitoring/syslog_telegraf.tmpl', monitoring) + +    chown(base_dir, 'telegraf', 'telegraf') + +    return None + +def apply(monitoring): +    # Reload systemd manager configuration +    call('systemctl daemon-reload') +    if monitoring: +        call('systemctl restart vyos-telegraf.service') +    else: +        call('systemctl stop vyos-telegraf.service') +    # Telegraf include custom rsyslog config changes +    call('systemctl restart rsyslog') + +if __name__ == '__main__': +    try: +        c = get_config() +        verify(c) +        generate(c) +        apply(c) +    except ConfigError as e: +        print(e) +        exit(1) diff --git a/src/etc/telegraf/custom_scripts/show_interfaces_input_filter.py b/src/etc/telegraf/custom_scripts/show_interfaces_input_filter.py new file mode 100755 index 000000000..0f5e366cd --- /dev/null +++ b/src/etc/telegraf/custom_scripts/show_interfaces_input_filter.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 + +import subprocess +import time + +def status_to_int(status): +    switcher={ +        'u':'0', +        'D':'1', +        'A':'2' +        } +    return switcher.get(status,"") + +def description_check(line): +    desc=" ".join(line[3:]) +    if desc == "": +        return "empty" +    else: +        return desc + +def gen_ip_list(index,interfaces): +    line=interfaces[index].split() +    ip_list=line[1] +    if index < len(interfaces): +        index += 1 +        while len(interfaces[index].split())==1: +            ip = interfaces[index].split() +            ip_list = ip_list + " " + ip[0] +            index += 1 +            if index == len(interfaces): +                break +    return ip_list + +interfaces = subprocess.check_output("/usr/libexec/vyos/op_mode/show_interfaces.py --action=show-brief", shell=True).decode('utf-8').splitlines() +del interfaces[:3] +lines_count=len(interfaces) +index=0 +while index<lines_count: +    line=interfaces[index].split() +    if len(line)>1: +        print(f'show_interfaces,interface={line[0]} ' +              f'ip_addresses="{gen_ip_list(index,interfaces)}",' +              f'state={status_to_int(line[2][0])}i,' +              f'link={status_to_int(line[2][2])}i,' +              f'description="{description_check(line)}" ' +              f'{str(int(time.time()))}000000000') +    index += 1 diff --git a/src/etc/telegraf/custom_scripts/vyos_services_input_filter.py b/src/etc/telegraf/custom_scripts/vyos_services_input_filter.py new file mode 100755 index 000000000..df4eed131 --- /dev/null +++ b/src/etc/telegraf/custom_scripts/vyos_services_input_filter.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2021 VyOS maintainers and contributors +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 or later as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. + + +import time +from vyos.configquery import ConfigTreeQuery +from vyos.util import is_systemd_service_running, process_named_running + +# Availible services and prouceses +# 1 - service +# 2 - process +services = { +    "protocols bgp"          : "bgpd", +    "protocols ospf"         : "ospfd", +    "protocols ospfv3"       : "ospf6d", +    "protocols rip"          : "ripd", +    "protocols ripng"        : "ripngd", +    "protocols isis"         : "isisd", +    "service pppoe"          : "accel-ppp@pppoe.service", +    "vpn l2tp remote-access" : "accel-ppp@l2tp.service", +    "vpn pptp remote-access" : "accel-ppp@pptp.service", +    "vpn sstp"               : "accel-ppp@sstp.service", +    "vpn ipsec"              : "charon" +} + +# Configured services +conf_services = { +    'zebra'   : 0, +    'staticd' : 0, +} +# Get configured service and create list to check if process running +config = ConfigTreeQuery() +for service in services: +    if config.exists(service): +        conf_services[services[service]] = 0 + +for conf_service in conf_services: +    status = 0 +    if ".service" in conf_service: +        # Check systemd service +        if is_systemd_service_running(conf_service): +            status = 1 +    else: +        # Check process +        if process_named_running(conf_service): +            status = 1 +    print(f'vyos_services,service="{conf_service}" ' +          f'status={str(status)}i {str(int(time.time()))}000000000') | 
