From 2cec431e5caf9df85640f707cd6dc3077c17c238 Mon Sep 17 00:00:00 2001 From: Christian Poessinger Date: Sun, 13 Feb 2022 20:29:25 +0100 Subject: vrf: T4191: bugfix for "ip rule" when VRFs are created We always mangled and worked on the "ip rule" singleton even when nothing needed to be changed. This resulted in a VRF hickup when the same VRF was added and removed multiple times. set interfaces ethernet eth1 vrf foo set vrf name foo table '1000' commit delete interfaces ethernet eth1 vrf delete vrf commit set interfaces ethernet eth1 vrf foo set vrf name foo table '1000' commit broke reachability on eth1 - a reboot was required. This change will now only alter the ip rule tables once when VRF instances are created for the first time and will not touch the Kernel "ip rule" representation afterwards. --- src/conf_mode/vrf.py | 108 ++++++++++++++++++++++++++------------------------- 1 file changed, 56 insertions(+), 52 deletions(-) (limited to 'src') diff --git a/src/conf_mode/vrf.py b/src/conf_mode/vrf.py index 38c0c4463..cfe0f4d8e 100755 --- a/src/conf_mode/vrf.py +++ b/src/conf_mode/vrf.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# Copyright (C) 2020-2021 VyOS maintainers and contributors +# Copyright (C) 2020-2022 VyOS maintainers and contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 or later as @@ -29,6 +29,7 @@ from vyos.util import dict_search from vyos.util import get_interface_config from vyos.util import popen from vyos.util import run +from vyos.util import sysctl from vyos import ConfigError from vyos import frr from vyos import airbag @@ -37,10 +38,16 @@ airbag.enable() config_file = '/etc/iproute2/rt_tables.d/vyos-vrf.conf' nft_vrf_config = '/tmp/nftables-vrf-zones' -def list_rules(): - command = 'ip -j -4 rule show' - answer = loads(cmd(command)) - return [_ for _ in answer if _] +def has_rule(af : str, priority : int, table : str): + """ Check if a given ip rule exists """ + if af not in ['-4', '-6']: + raise ValueError() + command = f'ip -j {af} rule show' + for tmp in loads(cmd(command)): + if {'priority', 'table'} <= set(tmp): + if tmp['priority'] == priority and tmp['table'] == table: + return True + return False def vrf_interfaces(c, match): matched = [] @@ -69,7 +76,6 @@ def vrf_routing(c, match): c.set_level(old_level) return matched - def get_config(config=None): if config: conf = config @@ -148,13 +154,11 @@ def apply(vrf): bind_all = '0' if 'bind-to-all' in vrf: bind_all = '1' - call(f'sysctl -wq net.ipv4.tcp_l3mdev_accept={bind_all}') - call(f'sysctl -wq net.ipv4.udp_l3mdev_accept={bind_all}') + sysctl('net.ipv4.tcp_l3mdev_accept', bind_all) + sysctl('net.ipv4.udp_l3mdev_accept', bind_all) for tmp in (dict_search('vrf_remove', vrf) or []): if os.path.isdir(f'/sys/class/net/{tmp}'): - call(f'ip -4 route del vrf {tmp} unreachable default metric 4278198272') - call(f'ip -6 route del vrf {tmp} unreachable default metric 4278198272') call(f'ip link delete dev {tmp}') # Remove nftables conntrack zone map item nft_del_element = f'delete element inet vrf_zones ct_iface_map {{ "{tmp}" }}' @@ -165,31 +169,59 @@ def apply(vrf): # check if table already exists _, err = popen('nft list table inet vrf_zones') # If not, create a table - if err: - if os.path.exists(nft_vrf_config): - cmd(f'nft -f {nft_vrf_config}') - os.unlink(nft_vrf_config) + if err and os.path.exists(nft_vrf_config): + cmd(f'nft -f {nft_vrf_config}') + os.unlink(nft_vrf_config) + + # Linux routing uses rules to find tables - routing targets are then + # looked up in those tables. If the lookup got a matching route, the + # process ends. + # + # TL;DR; first table with a matching entry wins! + # + # You can see your routing table lookup rules using "ip rule", sadly the + # local lookup is hit before any VRF lookup. Pinging an addresses from the + # VRF will usually find a hit in the local table, and never reach the VRF + # routing table - this is usually not what you want. Thus we will + # re-arrange the tables and move the local lookup further down once VRFs + # are enabled. + # + # Thanks to https://stbuehler.de/blog/article/2020/02/29/using_vrf__virtual_routing_and_forwarding__on_linux.html + + for afi in ['-4', '-6']: + # move lookup local to pref 32765 (from 0) + if not has_rule(afi, 32765, 'local'): + call(f'ip {afi} rule add pref 32765 table local') + if has_rule(afi, 0, 'local'): + call(f'ip {afi} rule del pref 0') + # make sure that in VRFs after failed lookup in the VRF specific table + # nothing else is reached + if not has_rule(afi, 1000, 'l3mdev'): + # this should be added by the kernel when a VRF is created + # add it here for completeness + call(f'ip {afi} rule add pref 1000 l3mdev protocol kernel') + + # add another rule with an unreachable target which only triggers in VRF context + # if a route could not be reached + if not has_rule(afi, 2000, 'l3mdev'): + call(f'ip {afi} rule add pref 2000 l3mdev unreachable') for name, config in vrf['name'].items(): table = config['table'] - if not os.path.isdir(f'/sys/class/net/{name}'): # For each VRF apart from your default context create a VRF # interface with a separate routing table call(f'ip link add {name} type vrf table {table}') - # The kernel Documentation/networking/vrf.txt also recommends - # adding unreachable routes to the VRF routing tables so that routes - # afterwards are taken. - call(f'ip -4 route add vrf {name} unreachable default metric 4278198272') - call(f'ip -6 route add vrf {name} unreachable default metric 4278198272') - # We also should add proper loopback IP addresses to the newly - # created VRFs for services bound to the loopback address (SNMP, NTP) - call(f'ip -4 addr add 127.0.0.1/8 dev {name}') - call(f'ip -6 addr add ::1/128 dev {name}') # set VRF description for e.g. SNMP monitoring vrf_if = Interface(name) + # We also should add proper loopback IP addresses to the newly + # created VRFs for services bound to the loopback address (SNMP, NTP) + vrf_if.add_addr('127.0.0.1/8') + vrf_if.add_addr('::1/128') + # add VRF description if available vrf_if.set_alias(config.get('description', '')) + # Enable/Disable of an interface must always be done at the end of the # derived class to make use of the ref-counting set_admin_state() # function. We will only enable the interface if 'up' was called as @@ -203,37 +235,9 @@ def apply(vrf): nft_add_element = f'add element inet vrf_zones ct_iface_map {{ "{name}" : {table} }}' cmd(f'nft {nft_add_element}') - # Linux routing uses rules to find tables - routing targets are then - # looked up in those tables. If the lookup got a matching route, the - # process ends. - # - # TL;DR; first table with a matching entry wins! - # - # You can see your routing table lookup rules using "ip rule", sadly the - # local lookup is hit before any VRF lookup. Pinging an addresses from the - # VRF will usually find a hit in the local table, and never reach the VRF - # routing table - this is usually not what you want. Thus we will - # re-arrange the tables and move the local lookup furhter down once VRFs - # are enabled. - - # get current preference on local table - local_pref = [r.get('priority') for r in list_rules() if r.get('table') == 'local'][0] - - # change preference when VRFs are enabled and local lookup table is default - if not local_pref and 'name' in vrf: - for af in ['-4', '-6']: - call(f'ip {af} rule add pref 32765 table local') - call(f'ip {af} rule del pref 0') # return to default lookup preference when no VRF is configured if 'name' not in vrf: - for af in ['-4', '-6']: - call(f'ip {af} rule add pref 0 table local') - call(f'ip {af} rule del pref 32765') - - # clean out l3mdev-table rule if present - if 1000 in [r.get('priority') for r in list_rules() if r.get('priority') == 1000]: - call(f'ip {af} rule del pref 1000') # Remove VRF zones table from nftables tmp = run('nft list table inet vrf_zones') if tmp == 0: -- cgit v1.2.3