summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorViacheslav Hletenko <v.gletenko@vyos.io>2022-06-13 15:40:11 +0000
committerViacheslav Hletenko <v.gletenko@vyos.io>2022-12-14 18:19:22 +0000
commit932af7f098808009f626c788deb9e1d1c8bf3426 (patch)
tree3845e39f7ac9c92c4ad5011524aab1e17b063468
parent256ad3fc329a846f1a2b7378bbeb8777357b433a (diff)
downloadvyos-1x-932af7f098808009f626c788deb9e1d1c8bf3426.tar.gz
vyos-1x-932af7f098808009f626c788deb9e1d1c8bf3426.zip
routing: T1237: Add new feature failover route
Failover route allows to install static routes to the kernel routing table only if required target or gateway is alive When target or gateway doesn't respond for ICMP/ARP checks this route deleted from the routing table Routes are marked as protocol 'failover' (rt_protos) cat /etc/iproute2/rt_protos.d/failover.conf 111 failover ip route add 203.0.113.1 metric 2 via 192.0.2.1 dev eth0 proto failover $ sudo ip route show proto failover 203.0.113.1 via 192.0.2.1 dev eth0 metric 1 So we can safely flush such routes
-rw-r--r--data/templates/protocols/systemd_vyos_failover_service.j211
-rw-r--r--interface-definitions/protocols-failover.xml.in114
-rwxr-xr-xsrc/conf_mode/protocols_failover.py121
-rwxr-xr-xsrc/helpers/vyos-failover.py184
4 files changed, 430 insertions, 0 deletions
diff --git a/data/templates/protocols/systemd_vyos_failover_service.j2 b/data/templates/protocols/systemd_vyos_failover_service.j2
new file mode 100644
index 000000000..e6501e0f5
--- /dev/null
+++ b/data/templates/protocols/systemd_vyos_failover_service.j2
@@ -0,0 +1,11 @@
+[Unit]
+Description=Failover route service
+After=vyos-router.service
+
+[Service]
+Type=simple
+Restart=always
+ExecStart=/usr/bin/python3 /usr/libexec/vyos/vyos-failover.py --config /run/vyos-failover.conf
+
+[Install]
+WantedBy=multi-user.target
diff --git a/interface-definitions/protocols-failover.xml.in b/interface-definitions/protocols-failover.xml.in
new file mode 100644
index 000000000..900c76eab
--- /dev/null
+++ b/interface-definitions/protocols-failover.xml.in
@@ -0,0 +1,114 @@
+<?xml version="1.0"?>
+<interfaceDefinition>
+ <node name="protocols">
+ <children>
+ <node name="failover" owner="${vyos_conf_scripts_dir}/protocols_failover.py">
+ <properties>
+ <help>Failover Routing</help>
+ <priority>490</priority>
+ </properties>
+ <children>
+ <tagNode name="route">
+ <properties>
+ <help>Failover IPv4 route</help>
+ <valueHelp>
+ <format>ipv4net</format>
+ <description>IPv4 failover route</description>
+ </valueHelp>
+ <constraint>
+ <validator name="ipv4-prefix"/>
+ </constraint>
+ </properties>
+ <children>
+ <tagNode name="next-hop">
+ <properties>
+ <help>Next-hop IPv4 router address</help>
+ <valueHelp>
+ <format>ipv4</format>
+ <description>Next-hop router address</description>
+ </valueHelp>
+ <constraint>
+ <validator name="ipv4-address"/>
+ </constraint>
+ </properties>
+ <children>
+ <node name="check">
+ <properties>
+ <help>Check target options</help>
+ </properties>
+ <children>
+ #include <include/port-number.xml.i>
+ <leafNode name="target">
+ <properties>
+ <help>Check target address</help>
+ <valueHelp>
+ <format>ipv4</format>
+ <description>Address to check</description>
+ </valueHelp>
+ <constraint>
+ <validator name="ipv4-address"/>
+ </constraint>
+ </properties>
+ </leafNode>
+ <leafNode name="timeout">
+ <properties>
+ <help>Timeout between checks</help>
+ <valueHelp>
+ <format>u32:1-300</format>
+ <description>Timeout in seconds between checks</description>
+ </valueHelp>
+ <constraint>
+ <validator name="numeric" argument="--range 1-255"/>
+ </constraint>
+ </properties>
+ <defaultValue>10</defaultValue>
+ </leafNode>
+ <leafNode name="type">
+ <properties>
+ <help>Check type</help>
+ <completionHelp>
+ <list>arp icmp tcp</list>
+ </completionHelp>
+ <valueHelp>
+ <format>arp</format>
+ <description>Check target by ARP</description>
+ </valueHelp>
+ <valueHelp>
+ <format>icmp</format>
+ <description>Check target by ICMP</description>
+ </valueHelp>
+ <valueHelp>
+ <format>tcp</format>
+ <description>Check target by TCP</description>
+ </valueHelp>
+ <constraint>
+ <regex>(arp|icmp|tcp)</regex>
+ </constraint>
+ </properties>
+ <defaultValue>icmp</defaultValue>
+ </leafNode>
+ </children>
+ </node>
+ #include <include/static/static-route-interface.xml.i>
+ <leafNode name="metric">
+ <properties>
+ <help>Route metric for this gateway</help>
+ <valueHelp>
+ <format>u32:1-255</format>
+ <description>Route metric</description>
+ </valueHelp>
+ <constraint>
+ <validator name="numeric" argument="--range 1-255"/>
+ </constraint>
+ </properties>
+ <defaultValue>1</defaultValue>
+ </leafNode>
+ </children>
+ </tagNode>
+ </children>
+ </tagNode>
+ </children>
+ </node>
+ </children>
+ </node>
+</interfaceDefinition>
diff --git a/src/conf_mode/protocols_failover.py b/src/conf_mode/protocols_failover.py
new file mode 100755
index 000000000..048ba7a89
--- /dev/null
+++ b/src/conf_mode/protocols_failover.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2022 VyOS maintainers and contributors
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 or later as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import json
+
+from pathlib import Path
+
+from vyos.config import Config
+from vyos.configdict import dict_merge
+from vyos.template import render
+from vyos.util import call
+from vyos.xml import defaults
+from vyos import ConfigError
+from vyos import airbag
+
+airbag.enable()
+
+
+service_name = 'vyos-failover'
+service_conf = Path(f'/run/{service_name}.conf')
+systemd_service = '/etc/systemd/system/vyos-failover.service'
+rt_proto_failover = '/etc/iproute2/rt_protos.d/failover.conf'
+
+
+def get_config(config=None):
+ if config:
+ conf = config
+ else:
+ conf = Config()
+
+ base = ['protocols', 'failover']
+ failover = conf.get_config_dict(base, key_mangling=('-', '_'), get_first_key=True)
+
+ # Set default values only if we set config
+ if failover.get('route'):
+ for route, route_config in failover.get('route').items():
+ for next_hop, next_hop_config in route_config.get('next_hop').items():
+ default_values = defaults(base + ['route'])
+ failover['route'][route]['next_hop'][next_hop] = dict_merge(
+ default_values['next_hop'], failover['route'][route]['next_hop'][next_hop])
+
+ return failover
+
+def verify(failover):
+ # bail out early - looks like removal from running config
+ if not failover:
+ return None
+
+ if 'route' not in failover:
+ raise ConfigError(f'Failover "route" is mandatory!')
+
+ for route, route_config in failover['route'].items():
+ if not route_config.get('next_hop'):
+ raise ConfigError(f'Next-hop for "{route}" is mandatory!')
+
+ for next_hop, next_hop_config in route_config.get('next_hop').items():
+ if 'interface' not in next_hop_config:
+ raise ConfigError(f'Interface for route "{route}" next-hop "{next_hop}" is mandatory!')
+
+ if not next_hop_config.get('check'):
+ raise ConfigError(f'Check target for next-hop "{next_hop}" is mandatory!')
+
+ if 'target' not in next_hop_config['check']:
+ raise ConfigError(f'Check target for next-hop "{next_hop}" is mandatory!')
+
+ check_type = next_hop_config['check']['type']
+ if check_type == 'tcp' and 'port' not in next_hop_config['check']:
+ raise ConfigError(f'Check port for next-hop "{next_hop}" and type TCP is mandatory!')
+
+ return None
+
+def generate(failover):
+ if not failover:
+ service_conf.unlink(missing_ok=True)
+ return None
+
+ # Add own rt_proto 'failover'
+ # Helps to detect all own routes 'proto failover'
+ with open(rt_proto_failover, 'w') as f:
+ f.write('111 failover\n')
+
+ # Write configuration file
+ conf_json = json.dumps(failover, indent=4)
+ service_conf.write_text(conf_json)
+ render(systemd_service, 'protocols/systemd_vyos_failover_service.j2', failover)
+
+ return None
+
+def apply(failover):
+ if not failover:
+ call(f'systemctl stop {service_name}.service')
+ call('ip route flush protocol failover')
+ else:
+ call('systemctl daemon-reload')
+ call(f'systemctl restart {service_name}.service')
+ call(f'ip route flush protocol failover')
+
+ return None
+
+if __name__ == '__main__':
+ try:
+ c = get_config()
+ verify(c)
+ generate(c)
+ apply(c)
+ except ConfigError as e:
+ print(e)
+ exit(1)
diff --git a/src/helpers/vyos-failover.py b/src/helpers/vyos-failover.py
new file mode 100755
index 000000000..1ac193423
--- /dev/null
+++ b/src/helpers/vyos-failover.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2022 VyOS maintainers and contributors
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 or later as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import argparse
+import json
+import subprocess
+import socket
+import time
+
+from vyos.util import rc_cmd
+from pathlib import Path
+from systemd import journal
+
+
+my_name = Path(__file__).stem
+
+
+def get_best_route_options(route, debug=False):
+ """
+ Return current best route ('gateway, interface, metric)
+
+ % get_best_route_options('203.0.113.1')
+ ('192.168.0.1', 'eth1', 1)
+
+ % get_best_route_options('203.0.113.254')
+ (None, None, None)
+ """
+ rc, data = rc_cmd(f'ip --detail --json route show protocol failover {route}')
+ if rc == 0:
+ data = json.loads(data)
+ if len(data) == 0:
+ print(f'\nRoute {route} for protocol failover was not found')
+ return None, None, None
+ # Fake metric 999 by default
+ # Search route with the lowest metric
+ best_metric = 999
+ for entry in data:
+ if debug: print('\n', entry)
+ metric = entry.get('metric')
+ gateway = entry.get('gateway')
+ iface = entry.get('dev')
+ if metric < best_metric:
+ best_metric = metric
+ best_gateway = gateway
+ best_interface = iface
+ if debug:
+ print(f'### Best_route exists: {route}, best_gateway: {best_gateway}, '
+ f'best_metric: {best_metric}, best_iface: {best_interface}')
+ return best_gateway, best_interface, best_metric
+
+def is_port_open(ip, port):
+ """
+ Check connection to remote host and port
+ Return True if host alive
+
+ % is_port_open('example.com', 8080)
+ True
+ """
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP)
+ s.settimeout(2)
+ try:
+ s.connect((ip, int(port)))
+ s.shutdown(socket.SHUT_RDWR)
+ return True
+ except:
+ return False
+ finally:
+ s.close()
+
+def is_target_alive(target=None, iface='', proto='icmp', port=None, debug=False):
+ """
+ Host availability check by ICMP, ARP, TCP
+ Return True if target checks is successful
+
+ % is_target_alive('192.0.2.1', 'eth1', proto='arp')
+ True
+ """
+ if iface != '':
+ iface = f'-I {iface}'
+ if proto == 'icmp':
+ command = f'/usr/bin/ping -q {target} {iface} -n -c 2 -W 1'
+ rc, response = rc_cmd(command)
+ if debug: print(f' [ CHECK-TARGET ]: [{command}] -- return-code [RC: {rc}]')
+ if rc == 0:
+ return True
+ elif proto == 'arp':
+ command = f'/usr/bin/arping -b -c 2 -f -w 1 -i 1 {iface} {target}'
+ rc, response = rc_cmd(command)
+ if debug: print(f' [ CHECK-TARGET ]: [{command}] -- return-code [RC: {rc}]')
+ if rc == 0:
+ return True
+ elif proto == 'tcp' and port is not None:
+ return True if is_port_open(target, port) else False
+ else:
+ return False
+
+
+if __name__ == '__main__':
+ # Parse command arguments and get config
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-c',
+ '--config',
+ action='store',
+ help='Path to protocols failover configuration',
+ required=True,
+ type=Path)
+
+ args = parser.parse_args()
+ try:
+ config_path = Path(args.config)
+ config = json.loads(config_path.read_text())
+ except Exception as err:
+ print(
+ f'Configuration file "{config_path}" does not exist or malformed: {err}'
+ )
+ exit(1)
+
+ # Useful debug info to console, use debug = True
+ # sudo systemctl stop vyos-failover.service
+ # sudo /usr/libexec/vyos/vyos-failover.py --config /run/vyos-failover.conf
+ debug = False
+
+ while(True):
+
+ for route, route_config in config.get('route').items():
+
+ exists_route = exists_gateway, exists_iface, exists_metric = get_best_route_options(route, debug=debug)
+
+ for next_hop, nexthop_config in route_config.get('next_hop').items():
+ conf_iface = nexthop_config.get('interface')
+ conf_metric = int(nexthop_config.get('metric'))
+ port = nexthop_config.get('check').get('port')
+ port_opt = f'port {port}' if port else ''
+ proto = nexthop_config.get('check').get('type')
+ target = nexthop_config.get('check').get('target')
+ timeout = nexthop_config.get('check').get('timeout')
+
+ # Best route not fonund in the current routing table
+ if exists_route == (None, None, None):
+ if debug: print(f" [NEW_ROUTE_DETECTED] route: [{route}]")
+ # Add route if check-target alive
+ if is_target_alive(target, conf_iface, proto, port, debug=debug):
+ if debug: print(f' [ ADD ] -- ip route add {route} via {next_hop} dev {conf_iface} '
+ f'metric {conf_metric} proto failover\n###')
+ rc, command = rc_cmd(f'ip route add {route} via {next_hop} dev {conf_iface} '
+ f'metric {conf_metric} proto failover')
+ # If something is wrong and gateway not added
+ # Example: Error: Next-hop has invalid gateway.
+ if rc !=0:
+ if debug: print(f'{command} -- return-code [RC: {rc}] {next_hop} dev {conf_iface}')
+ else:
+ journal.send(f'ip route add {route} via {next_hop} dev {conf_iface} '
+ f'metric {conf_metric} proto failover', SYSLOG_IDENTIFIER=my_name)
+ else:
+ if debug: print(f' [ TARGET_FAIL ] target checks fails for [{target}], do nothing')
+ journal.send(f'Check fail for route {route} target {target} proto {proto} '
+ f'{port_opt}', SYSLOG_IDENTIFIER=my_name)
+
+ # Route was added, check if the target is alive
+ # We should delete route if check fails only if route exists in the routing table
+ if not is_target_alive(target, conf_iface, proto, port, debug=debug) and \
+ exists_route != (None, None, None):
+ if debug:
+ print(f'Nexh_hop {next_hop} fail, target not response')
+ print(f' [ DEL ] -- ip route del {route} via {next_hop} dev {conf_iface} '
+ f'metric {conf_metric} proto failover [DELETE]')
+ rc_cmd(f'ip route del {route} via {next_hop} dev {conf_iface} metric {conf_metric} proto failover')
+ journal.send(f'ip route del {route} via {next_hop} dev {conf_iface} '
+ f'metric {conf_metric} proto failover', SYSLOG_IDENTIFIER=my_name)
+
+ time.sleep(int(timeout))