summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.am1
-rw-r--r--scripts/vyatta-auto-irqaffin.pl283
-rw-r--r--templates/interfaces/ethernet/node.tag/smp_affinity/node.def26
3 files changed, 304 insertions, 6 deletions
diff --git a/Makefile.am b/Makefile.am
index faa68f7..54ef9d6 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -55,6 +55,7 @@ sbin_SCRIPTS += scripts/vyatta-load-config.pl
sbin_SCRIPTS += scripts/vyatta-cfg-notify
sbin_SCRIPTS += scripts/vyatta-interfaces.pl
sbin_SCRIPTS += scripts/vyatta-irqaffin
+sbin_SCRIPTS += scripts/vyatta-auto-irqaffin.pl
sbin_SCRIPTS += scripts/vyatta-check-typeless-node.pl
sbin_SCRIPTS += scripts/vyatta-exists
diff --git a/scripts/vyatta-auto-irqaffin.pl b/scripts/vyatta-auto-irqaffin.pl
new file mode 100644
index 0000000..521a181
--- /dev/null
+++ b/scripts/vyatta-auto-irqaffin.pl
@@ -0,0 +1,283 @@
+#!/usr/bin/perl
+#
+# Module: vyatta-auto-irqaffin.pl
+#
+# **** License ****
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# This code was originally developed by Vyatta, Inc.
+# Portions created by Vyatta are Copyright (C) 2009 Vyatta, Inc.
+# All Rights Reserved.
+#
+# Author: Bob Gilligan (gilligan@vyatta.com)
+# Date: October 2009
+# Description: Script to configure optimal IRQ affinity for NICs.
+#
+# **** End License ****
+#
+
+# This script attempts to perform a static affinity assignment for network
+# interfaces. It is primarily targeted at supporting multi-queue NICs.
+# Since different NICs may have different queue organizations, and
+# because there is no standard API for learning the mapping between
+# queues and IRQ numbers, different code is required for each driver.
+#
+# The general strategy includes:
+# - Spread the receive load among as many CPUs as possible.
+# - For NICs that provide both rx and tx queue, keep the tx queue
+# on the same CPU as the corresponding rx queue.
+# - For all multi-queue NICs in the system, the same tx and rx queue
+# numbers should interrupt the same CPUs. I.e. tx and rx queue 0
+# of all NICs should interrupt the same CPU.
+# - If hyperthreading is supported and enabled, avoid assigning
+# queues to both CPUs of a hyperthreaded pair if there are enough
+# CPUs available to do that.
+#
+
+
+use lib "/opt/vyatta/share/perl5";
+use Getopt::Long;
+
+use warnings;
+use strict;
+
+# Send output of shell commands to syslog for debugging and so that
+# the user is not confused by it. Log at debug level, which is supressed
+# by default, so that we don't unnecessarily fill up the syslog file.
+my $logger = 'logger -t firewall-cfg -p local0.debug --';
+
+# Enable printing debug output to stdout.
+my $debug_flag = 0;
+my $syslog_flag = 0;
+
+my $setup_ifname;
+
+GetOptions("setup=s" => \$setup_ifname,
+ "debug" => \$debug_flag
+ );
+
+sub log_msg {
+ my $message = shift;
+
+ print "DEBUG: $message" if $debug_flag;
+ system("$logger DEBUG: \"$message\"") if $syslog_flag;
+}
+
+
+# Affinity strategy function for the igb driver. NICs using this
+# driver have an equal number of rx and tx queues. The first part of
+# the strategy for optimal performance is to assign irq of each queue
+# in a pair of tx and rx queues that have the same queue number to the
+# same CPU. I.e., assign queue 0 to CPU X, queue 1 to CPU Y, etc.
+# The second part is to avoid assigning any queues to the second CPU
+# in a hyper-threaded pair, if posible. I.e., if CPU 0 and 1 are
+# hyper-threaded pairs, then assign a queue to CPU 0, but try to avoid
+# assigning one to to CPU 1. But if we have more queues than CPUs, then
+# it is OK to assign some to the second CPU in a hyperthreaded pair.
+#
+sub igb_func{
+ my ($ifname, $numcpus, $numcores) = @_;
+ my $rx_queues; # number of rx queues
+ my $tx_queues; # number of tx queues
+ my $ht_factor; # 2 if HT enabled, 1 if not
+
+ log_msg("igb_func was called.\n");
+
+ if ($numcpus > $numcores) {
+ $ht_factor = 2;
+ } else {
+ $ht_factor = 1;
+ }
+
+ log_msg("ht_factor is $ht_factor.\n");
+
+ # Figure out how many queues we have
+
+ $rx_queues=`grep "$ifname-rx-" /proc/interrupts | wc -l`;
+ $rx_queues =~ s/\n//;
+
+ $tx_queues=`grep "$ifname-tx-" /proc/interrupts | wc -l`;
+ $tx_queues =~ s/\n//;
+
+ log_msg("rx_queues is $rx_queues. tx_queues is $tx_queues\n");
+
+ if ($rx_queues != $tx_queues) {
+ printf("Error: rx and tx queues don't match for igb driver.\n");
+ exit 1;
+ }
+
+ # For i = 0 to number of queues:
+ # Affinity of rx and tx queue $i gets CPU ($i * (2 if HT, 1 if no HT))
+ # % number_of_cpus
+ for (my $queue = 0, my $cpu = 0; ($queue < $rx_queues) ; $queue++) {
+ # Generate the hex string for the bitmask representing this CPU
+ my $cpu_bit = 1 << $cpu;
+ my $cpu_hex = sprintf("%x", $cpu_bit);
+ log_msg ("queue=$queue cpu=$cpu cpu_bit=$cpu_bit cpu_hex=$cpu_hex\n");
+
+ # Get the IRQ number for RX queue
+ my $rx_irq=`grep "$ifname-rx-$queue" /proc/interrupts | awk -F: '{print \$1}'`;
+ $rx_irq =~ s/\n//;
+ $rx_irq =~ s/ //g;
+
+ # Get the IRQ number for TX queue
+ my $tx_irq=`grep "$ifname-tx-$queue" /proc/interrupts | awk -F: '{print \$1}'`;
+ $tx_irq =~ s/\n//;
+ $tx_irq =~ s/ //g;
+
+ log_msg("rx_irq = $rx_irq. tx_irq = $tx_irq\n");
+
+ # Assign CPU affinity for both IRQs
+ system "echo $cpu_hex > /proc/irq/$rx_irq/smp_affinity";
+ system "echo $cpu_hex > /proc/irq/$tx_irq/smp_affinity";
+
+ $cpu += $ht_factor;
+
+ if ($cpu >= $numcpus) {
+ # Must "wrap"
+ $cpu %= $numcpus;
+
+ if ($ht_factor > 1) {
+ # Next time through, select the other CPU in a hyperthreaded
+ # pair.
+ if ($cpu == 0) {
+ $cpu++;
+ } else {
+ $cpu--;
+ }
+ }
+ }
+ }
+};
+
+# Similar strategy as for igb driver, but Broadcom NICs do not have
+# separate receive and transmit queues.
+sub bnx2_func{
+ my ($ifname, $numcpus, $numcores) = @_;
+ my $num_queues; # number of queues
+ my $ht_factor; # 2 if HT enabled, 1 if not
+
+ log_msg("bnx2_func was called.\n");
+
+ # Figure out how many queues we have
+ $num_queues=`grep "$ifname-" /proc/interrupts | wc -l`;
+ $num_queues =~ s/\n//;
+
+ log_msg("num_queues=$num_queues\n");
+
+ if ($num_queues <=0) {
+ printf("ERROR: No queues found for $ifname\n");
+ exit 1;
+ }
+
+ if ($numcpus > $numcores) {
+ $ht_factor = 2;
+ } else {
+ $ht_factor = 1;
+ }
+
+ log_msg("ht_factor is $ht_factor.\n");
+
+ for (my $queue = 0, my $cpu = 0; ($queue < $num_queues) ; $queue++) {
+ # Generate the hex string for the bitmask representing this CPU
+ my $cpu_bit = 1 << $cpu;
+ my $cpu_hex = sprintf("%x", $cpu_bit);
+ log_msg ("queue=$queue cpu=$cpu cpu_bit=$cpu_bit cpu_hex=$cpu_hex\n");
+
+ # Get the IRQ number for the queue
+ my $irq=`grep "$ifname-$queue" /proc/interrupts | awk -F: '{print \$1}'`;
+ $irq =~ s/\n//;
+ $irq =~ s/ //g;
+
+ log_msg("irq = $irq.\n");
+
+ # Assign CPU affinity for this IRQs
+ system "echo $cpu_hex > /proc/irq/$irq/smp_affinity";
+
+ $cpu += $ht_factor;
+ if ($cpu >= $numcpus) {
+ # Must "wrap"
+ $cpu %= $numcpus;
+
+ if ($ht_factor > 1) {
+ # Next time through, select the other CPU in a hyperthreaded
+ # pair.
+ if ($cpu == 0) {
+ $cpu++;
+ } else {
+ $cpu--;
+ }
+ }
+ }
+ }
+}
+
+my %driver_hash = ( 'igb' => \&igb_func,
+ 'ixbg' => \&igb_func,
+ 'bnx2' =>\&bnx2_func );
+
+if (defined $setup_ifname) {
+ # Set up automatic IRQ affinity for the named interface
+
+ log_msg("setup $setup_ifname\n");
+
+ my $ifname = $setup_ifname; # shorter variable name
+ my $drivername; # Name of the NIC driver, e.g. "igb".
+ my $numcpus; # Number of Linux "cpus"
+ my $numcores; # Number of unique CPU cores
+ my $driver_func; # Pointer to fuction specific to a driver
+
+ # Determine how many CPUs the machine has
+ $numcpus=`grep "^processor" /proc/cpuinfo | wc -l`;
+ $numcpus =~ s/\n//;
+
+ log_msg("numcpus is $numcpus\n");
+
+ if ($numcpus == 1) {
+ # Nothing to do if we only have one CPU, so just exit quietly.
+ exit 0;
+ }
+
+ # Verify that interface exists
+ if (! (-e "/proc/sys/net/ipv4/conf/$ifname")) {
+ printf("Error: Interface $ifname does not exist\n");
+ exit 1;
+ }
+
+ # Figure out what driver this NIC is using.
+ $drivername=`ethtool -i $ifname | grep "^driver" | awk '{print \$2}'`;
+ $drivername =~ s/\n//;
+
+ log_msg("drivername is $drivername\n");
+
+ $driver_func = $driver_hash{$drivername};
+
+ # We only support a couple of drivers at this time, so just exit
+ # if its not one we support.
+ if (! defined($driver_func)) {
+ printf("Automatic SMP affinity not supported for NICs using the $drivername driver.\n");
+ exit 0; # not an error
+ }
+
+ # Determine whether machine has hyperthreading enabled
+ $numcores=`grep "^core id" /proc/cpuinfo | uniq | wc -l`;
+ $numcores =~ s/\n//;
+
+ log_msg("numcores is $numcores.\n");
+
+ &$driver_func($ifname, $numcpus, $numcores);
+
+ exit 0;
+}
+
+printf("Must specify options.\n");
+exit(1);
+
+
diff --git a/templates/interfaces/ethernet/node.tag/smp_affinity/node.def b/templates/interfaces/ethernet/node.tag/smp_affinity/node.def
index c07fa1d..a444ae9 100644
--- a/templates/interfaces/ethernet/node.tag/smp_affinity/node.def
+++ b/templates/interfaces/ethernet/node.tag/smp_affinity/node.def
@@ -13,15 +13,29 @@ type: txt
help: Set CPU interrupt affinity mask for this interface
-comp_help: Hexidecimal bitmask representing CPUs that this NIC will interrupt
+comp_help: Possible completions:
+ XX\tHexidecimal bitmask representing CPUs that this NIC will interrupt
+ auto\tSet affinity automatically
-syntax:expression: exec "/opt/vyatta/sbin/vyatta-irqaffin check $VAR(../@) $VAR(@)"
+default: "auto"
+
+syntax:expression: exec " \
+ if [ $VAR(@) = auto ]; then \
+ exit 0;
+ else \
+ /opt/vyatta/sbin/vyatta-irqaffin check $VAR(../@) $VAR(@); \
+ fi"
update:
- sudo /opt/vyatta/sbin/vyatta-irqaffin set $VAR(../@) $VAR(@)
- if [ $? -ne 0 ]; then
- echo "Error setting CPU affinity mask $VAR(@) on interface $VAR(../@)"
- exit 1
+ if [ "$VAR(@)" = "auto" ]; then
+ echo "Setting SMP affinity for $VAR(../@) automatically."
+ sudo /opt/vyatta/sbin/vyatta-auto-irqaffin.pl --setup $VAR(../@)
+ else
+ sudo /opt/vyatta/sbin/vyatta-irqaffin set $VAR(../@) $VAR(@)
+ if [ $? -ne 0 ]; then
+ echo "Error setting CPU affinity mask $VAR(@) on interface $VAR(../@)"
+ exit 1
+ fi
fi
delete: [ -d /sys/class/net/$VAR(../@) ] || exit 0