diff options
-rw-r--r-- | Makefile.am | 1 | ||||
-rw-r--r-- | scripts/vyatta-auto-irqaffin.pl | 283 | ||||
-rw-r--r-- | templates/interfaces/ethernet/node.tag/smp_affinity/node.def | 26 |
3 files changed, 304 insertions, 6 deletions
diff --git a/Makefile.am b/Makefile.am index faa68f7..54ef9d6 100644 --- a/Makefile.am +++ b/Makefile.am @@ -55,6 +55,7 @@ sbin_SCRIPTS += scripts/vyatta-load-config.pl sbin_SCRIPTS += scripts/vyatta-cfg-notify sbin_SCRIPTS += scripts/vyatta-interfaces.pl sbin_SCRIPTS += scripts/vyatta-irqaffin +sbin_SCRIPTS += scripts/vyatta-auto-irqaffin.pl sbin_SCRIPTS += scripts/vyatta-check-typeless-node.pl sbin_SCRIPTS += scripts/vyatta-exists diff --git a/scripts/vyatta-auto-irqaffin.pl b/scripts/vyatta-auto-irqaffin.pl new file mode 100644 index 0000000..521a181 --- /dev/null +++ b/scripts/vyatta-auto-irqaffin.pl @@ -0,0 +1,283 @@ +#!/usr/bin/perl +# +# Module: vyatta-auto-irqaffin.pl +# +# **** License **** +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# This code was originally developed by Vyatta, Inc. +# Portions created by Vyatta are Copyright (C) 2009 Vyatta, Inc. +# All Rights Reserved. +# +# Author: Bob Gilligan (gilligan@vyatta.com) +# Date: October 2009 +# Description: Script to configure optimal IRQ affinity for NICs. +# +# **** End License **** +# + +# This script attempts to perform a static affinity assignment for network +# interfaces. It is primarily targeted at supporting multi-queue NICs. +# Since different NICs may have different queue organizations, and +# because there is no standard API for learning the mapping between +# queues and IRQ numbers, different code is required for each driver. +# +# The general strategy includes: +# - Spread the receive load among as many CPUs as possible. +# - For NICs that provide both rx and tx queue, keep the tx queue +# on the same CPU as the corresponding rx queue. +# - For all multi-queue NICs in the system, the same tx and rx queue +# numbers should interrupt the same CPUs. I.e. tx and rx queue 0 +# of all NICs should interrupt the same CPU. +# - If hyperthreading is supported and enabled, avoid assigning +# queues to both CPUs of a hyperthreaded pair if there are enough +# CPUs available to do that. +# + + +use lib "/opt/vyatta/share/perl5"; +use Getopt::Long; + +use warnings; +use strict; + +# Send output of shell commands to syslog for debugging and so that +# the user is not confused by it. Log at debug level, which is supressed +# by default, so that we don't unnecessarily fill up the syslog file. +my $logger = 'logger -t firewall-cfg -p local0.debug --'; + +# Enable printing debug output to stdout. +my $debug_flag = 0; +my $syslog_flag = 0; + +my $setup_ifname; + +GetOptions("setup=s" => \$setup_ifname, + "debug" => \$debug_flag + ); + +sub log_msg { + my $message = shift; + + print "DEBUG: $message" if $debug_flag; + system("$logger DEBUG: \"$message\"") if $syslog_flag; +} + + +# Affinity strategy function for the igb driver. NICs using this +# driver have an equal number of rx and tx queues. The first part of +# the strategy for optimal performance is to assign irq of each queue +# in a pair of tx and rx queues that have the same queue number to the +# same CPU. I.e., assign queue 0 to CPU X, queue 1 to CPU Y, etc. +# The second part is to avoid assigning any queues to the second CPU +# in a hyper-threaded pair, if posible. I.e., if CPU 0 and 1 are +# hyper-threaded pairs, then assign a queue to CPU 0, but try to avoid +# assigning one to to CPU 1. But if we have more queues than CPUs, then +# it is OK to assign some to the second CPU in a hyperthreaded pair. +# +sub igb_func{ + my ($ifname, $numcpus, $numcores) = @_; + my $rx_queues; # number of rx queues + my $tx_queues; # number of tx queues + my $ht_factor; # 2 if HT enabled, 1 if not + + log_msg("igb_func was called.\n"); + + if ($numcpus > $numcores) { + $ht_factor = 2; + } else { + $ht_factor = 1; + } + + log_msg("ht_factor is $ht_factor.\n"); + + # Figure out how many queues we have + + $rx_queues=`grep "$ifname-rx-" /proc/interrupts | wc -l`; + $rx_queues =~ s/\n//; + + $tx_queues=`grep "$ifname-tx-" /proc/interrupts | wc -l`; + $tx_queues =~ s/\n//; + + log_msg("rx_queues is $rx_queues. tx_queues is $tx_queues\n"); + + if ($rx_queues != $tx_queues) { + printf("Error: rx and tx queues don't match for igb driver.\n"); + exit 1; + } + + # For i = 0 to number of queues: + # Affinity of rx and tx queue $i gets CPU ($i * (2 if HT, 1 if no HT)) + # % number_of_cpus + for (my $queue = 0, my $cpu = 0; ($queue < $rx_queues) ; $queue++) { + # Generate the hex string for the bitmask representing this CPU + my $cpu_bit = 1 << $cpu; + my $cpu_hex = sprintf("%x", $cpu_bit); + log_msg ("queue=$queue cpu=$cpu cpu_bit=$cpu_bit cpu_hex=$cpu_hex\n"); + + # Get the IRQ number for RX queue + my $rx_irq=`grep "$ifname-rx-$queue" /proc/interrupts | awk -F: '{print \$1}'`; + $rx_irq =~ s/\n//; + $rx_irq =~ s/ //g; + + # Get the IRQ number for TX queue + my $tx_irq=`grep "$ifname-tx-$queue" /proc/interrupts | awk -F: '{print \$1}'`; + $tx_irq =~ s/\n//; + $tx_irq =~ s/ //g; + + log_msg("rx_irq = $rx_irq. tx_irq = $tx_irq\n"); + + # Assign CPU affinity for both IRQs + system "echo $cpu_hex > /proc/irq/$rx_irq/smp_affinity"; + system "echo $cpu_hex > /proc/irq/$tx_irq/smp_affinity"; + + $cpu += $ht_factor; + + if ($cpu >= $numcpus) { + # Must "wrap" + $cpu %= $numcpus; + + if ($ht_factor > 1) { + # Next time through, select the other CPU in a hyperthreaded + # pair. + if ($cpu == 0) { + $cpu++; + } else { + $cpu--; + } + } + } + } +}; + +# Similar strategy as for igb driver, but Broadcom NICs do not have +# separate receive and transmit queues. +sub bnx2_func{ + my ($ifname, $numcpus, $numcores) = @_; + my $num_queues; # number of queues + my $ht_factor; # 2 if HT enabled, 1 if not + + log_msg("bnx2_func was called.\n"); + + # Figure out how many queues we have + $num_queues=`grep "$ifname-" /proc/interrupts | wc -l`; + $num_queues =~ s/\n//; + + log_msg("num_queues=$num_queues\n"); + + if ($num_queues <=0) { + printf("ERROR: No queues found for $ifname\n"); + exit 1; + } + + if ($numcpus > $numcores) { + $ht_factor = 2; + } else { + $ht_factor = 1; + } + + log_msg("ht_factor is $ht_factor.\n"); + + for (my $queue = 0, my $cpu = 0; ($queue < $num_queues) ; $queue++) { + # Generate the hex string for the bitmask representing this CPU + my $cpu_bit = 1 << $cpu; + my $cpu_hex = sprintf("%x", $cpu_bit); + log_msg ("queue=$queue cpu=$cpu cpu_bit=$cpu_bit cpu_hex=$cpu_hex\n"); + + # Get the IRQ number for the queue + my $irq=`grep "$ifname-$queue" /proc/interrupts | awk -F: '{print \$1}'`; + $irq =~ s/\n//; + $irq =~ s/ //g; + + log_msg("irq = $irq.\n"); + + # Assign CPU affinity for this IRQs + system "echo $cpu_hex > /proc/irq/$irq/smp_affinity"; + + $cpu += $ht_factor; + if ($cpu >= $numcpus) { + # Must "wrap" + $cpu %= $numcpus; + + if ($ht_factor > 1) { + # Next time through, select the other CPU in a hyperthreaded + # pair. + if ($cpu == 0) { + $cpu++; + } else { + $cpu--; + } + } + } + } +} + +my %driver_hash = ( 'igb' => \&igb_func, + 'ixbg' => \&igb_func, + 'bnx2' =>\&bnx2_func ); + +if (defined $setup_ifname) { + # Set up automatic IRQ affinity for the named interface + + log_msg("setup $setup_ifname\n"); + + my $ifname = $setup_ifname; # shorter variable name + my $drivername; # Name of the NIC driver, e.g. "igb". + my $numcpus; # Number of Linux "cpus" + my $numcores; # Number of unique CPU cores + my $driver_func; # Pointer to fuction specific to a driver + + # Determine how many CPUs the machine has + $numcpus=`grep "^processor" /proc/cpuinfo | wc -l`; + $numcpus =~ s/\n//; + + log_msg("numcpus is $numcpus\n"); + + if ($numcpus == 1) { + # Nothing to do if we only have one CPU, so just exit quietly. + exit 0; + } + + # Verify that interface exists + if (! (-e "/proc/sys/net/ipv4/conf/$ifname")) { + printf("Error: Interface $ifname does not exist\n"); + exit 1; + } + + # Figure out what driver this NIC is using. + $drivername=`ethtool -i $ifname | grep "^driver" | awk '{print \$2}'`; + $drivername =~ s/\n//; + + log_msg("drivername is $drivername\n"); + + $driver_func = $driver_hash{$drivername}; + + # We only support a couple of drivers at this time, so just exit + # if its not one we support. + if (! defined($driver_func)) { + printf("Automatic SMP affinity not supported for NICs using the $drivername driver.\n"); + exit 0; # not an error + } + + # Determine whether machine has hyperthreading enabled + $numcores=`grep "^core id" /proc/cpuinfo | uniq | wc -l`; + $numcores =~ s/\n//; + + log_msg("numcores is $numcores.\n"); + + &$driver_func($ifname, $numcpus, $numcores); + + exit 0; +} + +printf("Must specify options.\n"); +exit(1); + + diff --git a/templates/interfaces/ethernet/node.tag/smp_affinity/node.def b/templates/interfaces/ethernet/node.tag/smp_affinity/node.def index c07fa1d..a444ae9 100644 --- a/templates/interfaces/ethernet/node.tag/smp_affinity/node.def +++ b/templates/interfaces/ethernet/node.tag/smp_affinity/node.def @@ -13,15 +13,29 @@ type: txt help: Set CPU interrupt affinity mask for this interface -comp_help: Hexidecimal bitmask representing CPUs that this NIC will interrupt +comp_help: Possible completions: + XX\tHexidecimal bitmask representing CPUs that this NIC will interrupt + auto\tSet affinity automatically -syntax:expression: exec "/opt/vyatta/sbin/vyatta-irqaffin check $VAR(../@) $VAR(@)" +default: "auto" + +syntax:expression: exec " \ + if [ $VAR(@) = auto ]; then \ + exit 0; + else \ + /opt/vyatta/sbin/vyatta-irqaffin check $VAR(../@) $VAR(@); \ + fi" update: - sudo /opt/vyatta/sbin/vyatta-irqaffin set $VAR(../@) $VAR(@) - if [ $? -ne 0 ]; then - echo "Error setting CPU affinity mask $VAR(@) on interface $VAR(../@)" - exit 1 + if [ "$VAR(@)" = "auto" ]; then + echo "Setting SMP affinity for $VAR(../@) automatically." + sudo /opt/vyatta/sbin/vyatta-auto-irqaffin.pl --setup $VAR(../@) + else + sudo /opt/vyatta/sbin/vyatta-irqaffin set $VAR(../@) $VAR(@) + if [ $? -ne 0 ]; then + echo "Error setting CPU affinity mask $VAR(@) on interface $VAR(../@)" + exit 1 + fi fi delete: [ -d /sys/class/net/$VAR(../@) ] || exit 0 |