diff options
author | Stephen Hemminger <stephen.hemminger@vyatta.com> | 2009-10-28 15:29:40 -0700 |
---|---|---|
committer | Stephen Hemminger <stephen.hemminger@vyatta.com> | 2009-10-28 15:29:40 -0700 |
commit | 97d93e0ff2b196aecdf71dc60468b7c38c06fef4 (patch) | |
tree | 17544f85a5f59f97b673638f256ee61729e155b9 | |
parent | 0339aea4c50a35b737e7c7b574739885913d7cea (diff) | |
parent | 1f17200053dcf6fa9e02fe0b065f382dc78aed13 (diff) | |
download | vyatta-cfg-97d93e0ff2b196aecdf71dc60468b7c38c06fef4.tar.gz vyatta-cfg-97d93e0ff2b196aecdf71dc60468b7c38c06fef4.zip |
Merge branch 'kenwood' of suva.vyatta.com:/git/vyatta-cfg into kenwood
-rw-r--r-- | Makefile.am | 1 | ||||
-rw-r--r-- | debian/changelog | 17 | ||||
-rw-r--r-- | scripts/vyatta-auto-irqaffin.pl | 172 | ||||
-rw-r--r-- | templates/priority | 106 |
4 files changed, 145 insertions, 151 deletions
diff --git a/Makefile.am b/Makefile.am index 54ef9d6..cd8ce56 100644 --- a/Makefile.am +++ b/Makefile.am @@ -46,6 +46,7 @@ src_check_tmpl_SOURCES = src/check_tmpl.c sbin_SCRIPTS = scripts/vyatta-cfg-cmd-wrapper sbin_SCRIPTS += scripts/vyatta-validate-type.pl sbin_SCRIPTS += scripts/vyatta-find-type.pl +sbin_SCRIPTS += scripts/priority.pl sbin_SCRIPTS += scripts/vyatta-config-loader.pl sbin_SCRIPTS += scripts/vyatta-config-gen-sets.pl sbin_SCRIPTS += scripts/vyatta-cli-expand-var.pl diff --git a/debian/changelog b/debian/changelog index b119c4e..80bc7e3 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,20 @@ +vyatta-cfg (0.15.33) unstable; urgency=low + + [ slioch ] + * removed out of date priority values from priority file + + -- root <root@eng-140.vyatta.com> Tue, 27 Oct 2009 17:13:25 -0700 + +vyatta-cfg (0.15.32) unstable; urgency=low + + [ slioch ] + * dismantle old priority file parsing and node matching code. removed + + [ Bob Gilligan ] + * Generalize handling of multi-queue NICs. + + -- Bob Gilligan <gilligan@vyatta.com> Mon, 26 Oct 2009 17:09:30 -0700 + vyatta-cfg (0.15.31) unstable; urgency=low [ slioch ] diff --git a/scripts/vyatta-auto-irqaffin.pl b/scripts/vyatta-auto-irqaffin.pl index 521a181..fa58420 100644 --- a/scripts/vyatta-auto-irqaffin.pl +++ b/scripts/vyatta-auto-irqaffin.pl @@ -23,23 +23,34 @@ # **** End License **** # -# This script attempts to perform a static affinity assignment for network -# interfaces. It is primarily targeted at supporting multi-queue NICs. +# This script attempts to set up a static CPU affinity for the IRQs +# used by network interfaces. It is primarily targeted at supporting +# multi-queue NICs, but does include code to handle single-queue NICs. # Since different NICs may have different queue organizations, and # because there is no standard API for learning the mapping between -# queues and IRQ numbers, different code is required for each driver. +# queues and IRQ numbers, different code is required for each of the +# queue naming conventions. +# +# The general strategy involves trying to achieve the following goals: # -# The general strategy includes: # - Spread the receive load among as many CPUs as possible. -# - For NICs that provide both rx and tx queue, keep the tx queue -# on the same CPU as the corresponding rx queue. -# - For all multi-queue NICs in the system, the same tx and rx queue -# numbers should interrupt the same CPUs. I.e. tx and rx queue 0 -# of all NICs should interrupt the same CPU. +# +# - For all multi-queue NICs in the system that provide both tx and +# rx queues, keep all of the queues that share the same queue +# number on same CPUs. I.e. tx and rx queue 0 of all such NICs +# should interrupt one CPU; tx and rx queue 1 should interrupt a +# different CPU, etc. +# # - If hyperthreading is supported and enabled, avoid assigning # queues to both CPUs of a hyperthreaded pair if there are enough # CPUs available to do that. # +# This strategy yields the greatest MP scaling possible for +# multi-queue NICs. It also ensures that an individual skb is +# processed on the same CPU for the entirity of its lifecycle, +# including transmit time, which optimally utilizes the cache and +# keeps performance high. +# use lib "/opt/vyatta/share/perl5"; @@ -71,24 +82,26 @@ sub log_msg { } -# Affinity strategy function for the igb driver. NICs using this -# driver have an equal number of rx and tx queues. The first part of -# the strategy for optimal performance is to assign irq of each queue -# in a pair of tx and rx queues that have the same queue number to the -# same CPU. I.e., assign queue 0 to CPU X, queue 1 to CPU Y, etc. -# The second part is to avoid assigning any queues to the second CPU -# in a hyper-threaded pair, if posible. I.e., if CPU 0 and 1 are -# hyper-threaded pairs, then assign a queue to CPU 0, but try to avoid -# assigning one to to CPU 1. But if we have more queues than CPUs, then -# it is OK to assign some to the second CPU in a hyperthreaded pair. +# Affinity assignment function for the Intel igb, ixgb and ixgbe +# drivers, and any other NICs that follow their queue naming +# convention. These NICs have an equal number of rx and tx queues. +# The first part of the strategy for optimal performance is to select +# the CPU to assign the IRQs to by mapping from the queue number. +# This ensures that all queues with the same queue number are assigned +# to the same CPU. The second part is to avoid assigning any queues +# to the second CPU in a hyper-threaded pair, if posible. I.e., if +# CPU 0 and 1 are hyper-threaded pairs, then assign a queue to CPU 0, +# but try to avoid assigning one to to CPU 1. But if we have more +# queues than CPUs, then it is OK to assign some to the second CPU in +# a hyperthreaded pair. # -sub igb_func{ +sub intel_func{ my ($ifname, $numcpus, $numcores) = @_; my $rx_queues; # number of rx queues my $tx_queues; # number of tx queues my $ht_factor; # 2 if HT enabled, 1 if not - log_msg("igb_func was called.\n"); + log_msg("intel_func was called.\n"); if ($numcpus > $numcores) { $ht_factor = 2; @@ -157,14 +170,18 @@ sub igb_func{ } }; -# Similar strategy as for igb driver, but Broadcom NICs do not have -# separate receive and transmit queues. -sub bnx2_func{ +# Affinity assignment function for Broadcom NICs using the bnx2 driver +# or other multi-queue NICs that follow their queue naming convention. +# This strategy is similar to that for Intel drivers. But since +# Broadcom NICs do not have separate receive and transmit queues we +# perform one affinity assignment per queue. +# +sub broadcom_func{ my ($ifname, $numcpus, $numcores) = @_; my $num_queues; # number of queues my $ht_factor; # 2 if HT enabled, 1 if not - log_msg("bnx2_func was called.\n"); + log_msg("broadcom_func was called.\n"); # Figure out how many queues we have $num_queues=`grep "$ifname-" /proc/interrupts | wc -l`; @@ -219,9 +236,59 @@ sub bnx2_func{ } } -my %driver_hash = ( 'igb' => \&igb_func, - 'ixbg' => \&igb_func, - 'bnx2' =>\&bnx2_func ); + +# Affinity assignment function for single-quque NICs. The strategy +# here is to just spread the interrupts of different NICs evenly +# across all CPUs. That is the best we can do without monitoring the +# load and traffic patterns. So we just directly map the NIC unit +# number into a CPU number. +# +sub single_func { + my ($ifname, $numcpus, $numcores) = @_; + my $cpu; + use integer; + + log_msg("single_func was calledn.\n"); + + $ifname =~ m/^eth(.*)$/; + + my $ifunit = $1; + log_msg ("ifunit = $ifunit\n"); + + # Get the IRQ number for the queue + my $irq=`grep "$ifname" /proc/interrupts | awk -F: '{print \$1}'`; + $irq =~ s/\n//; + $irq =~ s/ //g; + + log_msg("irq = $irq.\n"); + + # Figure out what CPU to assign it to + if ($numcpus > $numcores) { + # Hyperthreaded + $cpu = (2 * $ifunit) % $numcpus; + + # every other time it wraps, add one to use the hyper-thread pair + # of the CPU selected. + my $use_ht = ((2 * $ifunit) / $numcpus) % 2; + $cpu += $use_ht; + } else { + # Not hyperthreaded. Map it to unit number MOD number of linux CPUs. + $cpu = $ifunit % $numcpus; + } + + # Generate the hex string for the bitmask representing this CPU + my $cpu_bit = 1 << $cpu; + my $cpu_hex = sprintf("%x", $cpu_bit); + log_msg ("cpu=$cpu cpu_bit=$cpu_bit cpu_hex=$cpu_hex\n"); + + # Assign CPU affinity for this IRQs + system "echo $cpu_hex > /proc/irq/$irq/smp_affinity"; +} + +# Mapping from driver type to function that handles it. +my %driver_hash = ( 'intel' => \&intel_func, + 'broadcom' => \&broadcom_func, + 'single' => \&single_func); if (defined $setup_ifname) { # Set up automatic IRQ affinity for the named interface @@ -233,8 +300,10 @@ if (defined $setup_ifname) { my $numcpus; # Number of Linux "cpus" my $numcores; # Number of unique CPU cores my $driver_func; # Pointer to fuction specific to a driver + my $driver_style; # Style of the driver. Whether it is multi-queue + # or not, and if it is, how it names its queues. - # Determine how many CPUs the machine has + # Determine how many CPUs the machine has. $numcpus=`grep "^processor" /proc/cpuinfo | wc -l`; $numcpus =~ s/\n//; @@ -245,32 +314,39 @@ if (defined $setup_ifname) { exit 0; } + # Determine how many cores the machine has. Could be less than + # the number of CPUs if processor supports hyperthreading. + $numcores=`grep "^core id" /proc/cpuinfo | uniq | wc -l`; + $numcores =~ s/\n//; + + log_msg("numcores is $numcores.\n"); + # Verify that interface exists if (! (-e "/proc/sys/net/ipv4/conf/$ifname")) { printf("Error: Interface $ifname does not exist\n"); exit 1; } - # Figure out what driver this NIC is using. - $drivername=`ethtool -i $ifname | grep "^driver" | awk '{print \$2}'`; - $drivername =~ s/\n//; - - log_msg("drivername is $drivername\n"); - - $driver_func = $driver_hash{$drivername}; - - # We only support a couple of drivers at this time, so just exit - # if its not one we support. - if (! defined($driver_func)) { - printf("Automatic SMP affinity not supported for NICs using the $drivername driver.\n"); - exit 0; # not an error + # Figure out what style of driver this NIC is using. + my $numints=`grep $ifname /proc/interrupts | wc -l`; + $numints =~ s/\n//; + if ($numints > 1) { + # It is a multiqueue NIC. Now figure out which one. + my $rx_queues=`grep "$ifname-rx-" /proc/interrupts | wc -l`; + $rx_queues =~ s/\n//; + if ($rx_queues > 0) { + # Driver is following the Intel queue naming style + $driver_style="intel"; + } else { + # The only other queue naming style that we have seen is the + # one used by Broadcom NICs. + $driver_style="broadcom"; + } + } else { + # It is a single queue NIC. + $driver_style="single"; } - - # Determine whether machine has hyperthreading enabled - $numcores=`grep "^core id" /proc/cpuinfo | uniq | wc -l`; - $numcores =~ s/\n//; - - log_msg("numcores is $numcores.\n"); + $driver_func = $driver_hash{$driver_style}; &$driver_func($ifname, $numcpus, $numcores); diff --git a/templates/priority b/templates/priority index e32a5c0..2359178 100644 --- a/templates/priority +++ b/templates/priority @@ -92,107 +92,7 @@ # "active config" tree at the time the lower-level node is committed. # -200 firewall/group/address-group -200 firewall/group/network-group -200 firewall/group/port-group -210 firewall/name/node.tag -210 firewall/modify/node.tag -210 firewall/ipv6-name/node.tag -210 firewall/ipv6-modify/node.tag -215 firewall -310 interfaces/bridge -315 interfaces/bonding -318 interfaces/ethernet -319 interfaces/ethernet/node.tag/vif -319 interfaces/ethernet/node.tag/bond-group -320 interfaces/ethernet/node.tag/vif/node.tag/bridge-group -320 interfaces/bonding/node.tag/bridge-group -320 interfaces/bonding/node.tag/vif -320 interfaces/bridge/node.tag/address -320 interfaces/loopback -330 interfaces/adsl -340 interfaces/serial -350 interfaces/wirelessmodem -350 interfaces/wireless -380 interfaces/tunnel -380 interfaces/openvpn -390 interfaces/pseudo-ethernet -391 interfaces/pseudo-ethernet/node.tag/vif -400 system/domain-name -400 system/domain-search -400 system/gateway-address -400 system/host-name -400 system/ip -400 system/ipv6 -400 system/login -400 system/name-server -400 system/ntp-server -400 system/options -400 system/package -400 system/static-host-mapping -400 system/syslog -400 system/time-zone -405 system -450 protocols/static -470 policy -500 protocols/bgp/node.tag/parameters -510 protocols/bgp/node.tag/neighbor -520 protocols/bgp -610 protocols/ospf/parameters -620 protocols/ospf -630 protocols/ospfv3/parameters -640 protocols/ospfv3 -650 protocols/rip -660 protocols/ripng -800 interfaces/ethernet/node.tag/vrrp -800 interfaces/ethernet/node.tag/vif/node.tag/vrrp -810 interfaces/serial/node.tag/frame-relay/vif -810 interfaces/serial/node.tag/ppp -810 interfaces/serial/node.tag/ppp/vif -810 interfaces/serial/node.tag/cisco-hdlc/vif -850 interfaces -# Router advertisement daemon startup should take place after interfaces -# have been fully configured. We have a router-advert node under just about -# every interface type, hence the large number of priority nodes in this -# source file. They can be removed from this source file once bug 4903 -# is fixed -860 interfaces/ethernet/node.tag/ipv6/router-advert -860 interfaces/ethernet/node.tag/pppoe/node.tag/ipv6/router-advert -860 interfaces/ethernet/node.tag/vif/node.tag/ipv6/router-advert -860 interfaces/ethernet/node.tag/vif/node.tag/pppoe/node.tag/ipv6/router-advert -860 interfaces/bonding/node.tag/ipv6/router-advert -860 interfaces/bonding/node.tag/vif/node.tag/ipv6/router-advert -860 interfaces/tunnel/node.tag/ipv6/router-advert -860 interfaces/bridge/node.tag/ipv6/router-advert -860 interfaces/openvpn/node.tag/ipv6/router-advert -860 interfaces/wirelessmodem/node.tag/ipv6/router-advert -860 interfaces/multilink/node.tag/vif/node.tag/ipv6/router-advert -860 interfaces/adsl/node.tag/pvc/node.tag/bridged-ethernet/ipv6/router-advert -860 interfaces/adsl/node.tag/pvc/node.tag/classical-ipoa/ipv6/router-advert -860 interfaces/adsl/node.tag/pvc/node.tag/pppoa/node.tag/ipv6/router-advert -860 interfaces/adsl/node.tag/pvc/node.tag/pppoe/node.tag/ipv6/router-advert -860 interfaces/serial/node.tag/cisco-hdlc/vif/node.tag/ipv6/router-advert -860 interfaces/serial/node.tag/frame-relay/vif/node.tag/ipv6/router-advert -860 interfaces/serial/node.tag/ppp/vif/node.tag/ipv6/router-advert - -900 vpn -900 qos-policy -900 test-definition -900 content-inspection -900 load-balancing -900 protocols -900 service -910 service/dhcp-relay -911 service/dhcp-server -913 service/https -914 service/nat -915 service/ssh -916 service/telnet -917 service/webproxy -918 service/dns/forwarding -919 service/dns/dynamic -960 cluster -970 zone-policy/zone/node.tag/from -975 zone-policy -980 protocols/snmp +# +# RUN perl /opt/vyatta/sbin/priority.pl to generate the current priority listings +#
\ No newline at end of file |