From 932af7f098808009f626c788deb9e1d1c8bf3426 Mon Sep 17 00:00:00 2001 From: Viacheslav Hletenko Date: Mon, 13 Jun 2022 15:40:11 +0000 Subject: routing: T1237: Add new feature failover route Failover route allows to install static routes to the kernel routing table only if required target or gateway is alive When target or gateway doesn't respond for ICMP/ARP checks this route deleted from the routing table Routes are marked as protocol 'failover' (rt_protos) cat /etc/iproute2/rt_protos.d/failover.conf 111 failover ip route add 203.0.113.1 metric 2 via 192.0.2.1 dev eth0 proto failover $ sudo ip route show proto failover 203.0.113.1 via 192.0.2.1 dev eth0 metric 1 So we can safely flush such routes --- src/conf_mode/protocols_failover.py | 121 ++++++++++++++++++++++++ src/helpers/vyos-failover.py | 184 ++++++++++++++++++++++++++++++++++++ 2 files changed, 305 insertions(+) create mode 100755 src/conf_mode/protocols_failover.py create mode 100755 src/helpers/vyos-failover.py (limited to 'src') diff --git a/src/conf_mode/protocols_failover.py b/src/conf_mode/protocols_failover.py new file mode 100755 index 000000000..048ba7a89 --- /dev/null +++ b/src/conf_mode/protocols_failover.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2022 VyOS maintainers and contributors +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 or later as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import json + +from pathlib import Path + +from vyos.config import Config +from vyos.configdict import dict_merge +from vyos.template import render +from vyos.util import call +from vyos.xml import defaults +from vyos import ConfigError +from vyos import airbag + +airbag.enable() + + +service_name = 'vyos-failover' +service_conf = Path(f'/run/{service_name}.conf') +systemd_service = '/etc/systemd/system/vyos-failover.service' +rt_proto_failover = '/etc/iproute2/rt_protos.d/failover.conf' + + +def get_config(config=None): + if config: + conf = config + else: + conf = Config() + + base = ['protocols', 'failover'] + failover = conf.get_config_dict(base, key_mangling=('-', '_'), get_first_key=True) + + # Set default values only if we set config + if failover.get('route'): + for route, route_config in failover.get('route').items(): + for next_hop, next_hop_config in route_config.get('next_hop').items(): + default_values = defaults(base + ['route']) + failover['route'][route]['next_hop'][next_hop] = dict_merge( + default_values['next_hop'], failover['route'][route]['next_hop'][next_hop]) + + return failover + +def verify(failover): + # bail out early - looks like removal from running config + if not failover: + return None + + if 'route' not in failover: + raise ConfigError(f'Failover "route" is mandatory!') + + for route, route_config in failover['route'].items(): + if not route_config.get('next_hop'): + raise ConfigError(f'Next-hop for "{route}" is mandatory!') + + for next_hop, next_hop_config in route_config.get('next_hop').items(): + if 'interface' not in next_hop_config: + raise ConfigError(f'Interface for route "{route}" next-hop "{next_hop}" is mandatory!') + + if not next_hop_config.get('check'): + raise ConfigError(f'Check target for next-hop "{next_hop}" is mandatory!') + + if 'target' not in next_hop_config['check']: + raise ConfigError(f'Check target for next-hop "{next_hop}" is mandatory!') + + check_type = next_hop_config['check']['type'] + if check_type == 'tcp' and 'port' not in next_hop_config['check']: + raise ConfigError(f'Check port for next-hop "{next_hop}" and type TCP is mandatory!') + + return None + +def generate(failover): + if not failover: + service_conf.unlink(missing_ok=True) + return None + + # Add own rt_proto 'failover' + # Helps to detect all own routes 'proto failover' + with open(rt_proto_failover, 'w') as f: + f.write('111 failover\n') + + # Write configuration file + conf_json = json.dumps(failover, indent=4) + service_conf.write_text(conf_json) + render(systemd_service, 'protocols/systemd_vyos_failover_service.j2', failover) + + return None + +def apply(failover): + if not failover: + call(f'systemctl stop {service_name}.service') + call('ip route flush protocol failover') + else: + call('systemctl daemon-reload') + call(f'systemctl restart {service_name}.service') + call(f'ip route flush protocol failover') + + return None + +if __name__ == '__main__': + try: + c = get_config() + verify(c) + generate(c) + apply(c) + except ConfigError as e: + print(e) + exit(1) diff --git a/src/helpers/vyos-failover.py b/src/helpers/vyos-failover.py new file mode 100755 index 000000000..1ac193423 --- /dev/null +++ b/src/helpers/vyos-failover.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2022 VyOS maintainers and contributors +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 or later as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import argparse +import json +import subprocess +import socket +import time + +from vyos.util import rc_cmd +from pathlib import Path +from systemd import journal + + +my_name = Path(__file__).stem + + +def get_best_route_options(route, debug=False): + """ + Return current best route ('gateway, interface, metric) + + % get_best_route_options('203.0.113.1') + ('192.168.0.1', 'eth1', 1) + + % get_best_route_options('203.0.113.254') + (None, None, None) + """ + rc, data = rc_cmd(f'ip --detail --json route show protocol failover {route}') + if rc == 0: + data = json.loads(data) + if len(data) == 0: + print(f'\nRoute {route} for protocol failover was not found') + return None, None, None + # Fake metric 999 by default + # Search route with the lowest metric + best_metric = 999 + for entry in data: + if debug: print('\n', entry) + metric = entry.get('metric') + gateway = entry.get('gateway') + iface = entry.get('dev') + if metric < best_metric: + best_metric = metric + best_gateway = gateway + best_interface = iface + if debug: + print(f'### Best_route exists: {route}, best_gateway: {best_gateway}, ' + f'best_metric: {best_metric}, best_iface: {best_interface}') + return best_gateway, best_interface, best_metric + +def is_port_open(ip, port): + """ + Check connection to remote host and port + Return True if host alive + + % is_port_open('example.com', 8080) + True + """ + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) + s.settimeout(2) + try: + s.connect((ip, int(port))) + s.shutdown(socket.SHUT_RDWR) + return True + except: + return False + finally: + s.close() + +def is_target_alive(target=None, iface='', proto='icmp', port=None, debug=False): + """ + Host availability check by ICMP, ARP, TCP + Return True if target checks is successful + + % is_target_alive('192.0.2.1', 'eth1', proto='arp') + True + """ + if iface != '': + iface = f'-I {iface}' + if proto == 'icmp': + command = f'/usr/bin/ping -q {target} {iface} -n -c 2 -W 1' + rc, response = rc_cmd(command) + if debug: print(f' [ CHECK-TARGET ]: [{command}] -- return-code [RC: {rc}]') + if rc == 0: + return True + elif proto == 'arp': + command = f'/usr/bin/arping -b -c 2 -f -w 1 -i 1 {iface} {target}' + rc, response = rc_cmd(command) + if debug: print(f' [ CHECK-TARGET ]: [{command}] -- return-code [RC: {rc}]') + if rc == 0: + return True + elif proto == 'tcp' and port is not None: + return True if is_port_open(target, port) else False + else: + return False + + +if __name__ == '__main__': + # Parse command arguments and get config + parser = argparse.ArgumentParser() + parser.add_argument('-c', + '--config', + action='store', + help='Path to protocols failover configuration', + required=True, + type=Path) + + args = parser.parse_args() + try: + config_path = Path(args.config) + config = json.loads(config_path.read_text()) + except Exception as err: + print( + f'Configuration file "{config_path}" does not exist or malformed: {err}' + ) + exit(1) + + # Useful debug info to console, use debug = True + # sudo systemctl stop vyos-failover.service + # sudo /usr/libexec/vyos/vyos-failover.py --config /run/vyos-failover.conf + debug = False + + while(True): + + for route, route_config in config.get('route').items(): + + exists_route = exists_gateway, exists_iface, exists_metric = get_best_route_options(route, debug=debug) + + for next_hop, nexthop_config in route_config.get('next_hop').items(): + conf_iface = nexthop_config.get('interface') + conf_metric = int(nexthop_config.get('metric')) + port = nexthop_config.get('check').get('port') + port_opt = f'port {port}' if port else '' + proto = nexthop_config.get('check').get('type') + target = nexthop_config.get('check').get('target') + timeout = nexthop_config.get('check').get('timeout') + + # Best route not fonund in the current routing table + if exists_route == (None, None, None): + if debug: print(f" [NEW_ROUTE_DETECTED] route: [{route}]") + # Add route if check-target alive + if is_target_alive(target, conf_iface, proto, port, debug=debug): + if debug: print(f' [ ADD ] -- ip route add {route} via {next_hop} dev {conf_iface} ' + f'metric {conf_metric} proto failover\n###') + rc, command = rc_cmd(f'ip route add {route} via {next_hop} dev {conf_iface} ' + f'metric {conf_metric} proto failover') + # If something is wrong and gateway not added + # Example: Error: Next-hop has invalid gateway. + if rc !=0: + if debug: print(f'{command} -- return-code [RC: {rc}] {next_hop} dev {conf_iface}') + else: + journal.send(f'ip route add {route} via {next_hop} dev {conf_iface} ' + f'metric {conf_metric} proto failover', SYSLOG_IDENTIFIER=my_name) + else: + if debug: print(f' [ TARGET_FAIL ] target checks fails for [{target}], do nothing') + journal.send(f'Check fail for route {route} target {target} proto {proto} ' + f'{port_opt}', SYSLOG_IDENTIFIER=my_name) + + # Route was added, check if the target is alive + # We should delete route if check fails only if route exists in the routing table + if not is_target_alive(target, conf_iface, proto, port, debug=debug) and \ + exists_route != (None, None, None): + if debug: + print(f'Nexh_hop {next_hop} fail, target not response') + print(f' [ DEL ] -- ip route del {route} via {next_hop} dev {conf_iface} ' + f'metric {conf_metric} proto failover [DELETE]') + rc_cmd(f'ip route del {route} via {next_hop} dev {conf_iface} metric {conf_metric} proto failover') + journal.send(f'ip route del {route} via {next_hop} dev {conf_iface} ' + f'metric {conf_metric} proto failover', SYSLOG_IDENTIFIER=my_name) + + time.sleep(int(timeout)) -- cgit v1.2.3