summaryrefslogtreecommitdiff
path: root/scripts/system/irq-affinity.pl
diff options
context:
space:
mode:
authorStephen Hemminger <stephen.hemminger@vyatta.com>2010-11-12 10:41:31 -0800
committerStephen Hemminger <stephen.hemminger@vyatta.com>2010-11-12 11:07:09 -0800
commitac9042acae2579b03d39fa6d5b7a8dbef1a8a048 (patch)
treed71efb8490456c3b1ec74c6e9cac3e517633e024 /scripts/system/irq-affinity.pl
parent500c844e27d7ff989ae73a8cff506552844cdf70 (diff)
downloadvyatta-cfg-system-ac9042acae2579b03d39fa6d5b7a8dbef1a8a048.tar.gz
vyatta-cfg-system-ac9042acae2579b03d39fa6d5b7a8dbef1a8a048.zip
New IRQ affinity script and RPS support
Replace old script with new cleaner script that handles both IRQ affinity and Receive Packet Steering. Instead of two scripts (one for mask and one for auto), do it all with one script. Receive Packet Steering is supported in two ways. If 'auto' is used, then both threads on HT system will be used for receive processing. If explicit mask is given, then two masks can be used to set both IRQ cpus and RPS cpus.
Diffstat (limited to 'scripts/system/irq-affinity.pl')
-rwxr-xr-xscripts/system/irq-affinity.pl289
1 files changed, 289 insertions, 0 deletions
diff --git a/scripts/system/irq-affinity.pl b/scripts/system/irq-affinity.pl
new file mode 100755
index 00000000..eaad95cc
--- /dev/null
+++ b/scripts/system/irq-affinity.pl
@@ -0,0 +1,289 @@
+#!/usr/bin/perl
+
+# **** License ****
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# This code was originally developed by Vyatta, Inc.
+# Portions created by Vyatta are Copyright (C) 2009,2010 Vyatta, Inc.
+# All Rights Reserved.
+#
+# **** End License ****
+#
+use warnings;
+use strict;
+use Sys::Syslog qw(:standard :macros);
+
+die "Usage: $0 ifname {auto | mask}\n" if ($#ARGV < 1);
+
+my ($ifname, $mask) = @ARGV;
+
+die "Error: Interface $ifname does not exist\n"
+ unless -d "/sys/class/net/$ifname";
+
+openlog("irq-affinity","",LOG_LOCAL0);
+
+my ( $cpus, $cores ) = cpuinfo();
+
+if ($mask eq 'auto') {
+ affinity_auto($ifname);
+} else {
+ affinity_mask($ifname, $mask);
+}
+
+exit 0;
+
+# Get current irq assignments by reading /proc/interrupts
+sub irqinfo {
+ my $irqmap;
+
+ open( my $f, '<', "/proc/interrupts" )
+ or die "Can't read /proc/interrupts";
+
+ while (<$f>) {
+ chomp;
+ my @cols = split;
+
+ # First column is IRQ number (and colon)
+ next unless /^\s*(\d+):\s/;
+ my $irq = $1;
+
+ # Skip columns for IRQ's per CPU
+ foreach my $name ( @cols[ $cpus+1 .. $#cols ] ) {
+ $name =~ s/,$//;
+ $irqmap->{$name} = $irq;
+ }
+ }
+ close $f;
+
+ return $irqmap;
+}
+
+# Determine number of cpus and cores
+sub cpuinfo {
+ my ( $cpu, $core );
+
+ open( my $f, '<', "/proc/cpuinfo" )
+ or die "Can't read /proc/cpuinfo";
+
+ while (<$f>) {
+ chomp;
+ if (/^cpu cores\s+:\s(\d+)$/) {
+ $core = $1;
+ }
+ elsif (/^processor\s+:\s+(\d)$/) {
+ $cpu = $1;
+ }
+ }
+ close $f;
+
+ return ( $cpu + 1, $core );
+}
+
+# Set affinity value for a irq
+sub set_affinity {
+ my ( $ifname, $irq, $mask ) = @_;
+ my $smp_affinity = "/proc/irq/$irq/smp_affinity";
+
+ syslog(LOG_INFO, "%s: irq %d affinity set to 0x%x", $ifname, $irq, $mask);
+
+ open( my $f, '>', $smp_affinity )
+ or die "Can't open: $smp_affinity : $!\n";
+ printf {$f} "%x\n", $mask;
+ close $f;
+}
+
+# set Receive Packet Steering mask
+sub set_rps {
+ my ( $ifname, $q, $mask ) = @_;
+
+ # ignore if older kernel without RPS
+ my $rxq = "/sys/class/net/$ifname/queues";
+ return unless ( -d $rxq );
+
+ syslog(LOG_INFO, "%s: receive queue %d cpus set to 0x%x",
+ $ifname, $q, $mask);
+
+ my $rps_cpus = "$rxq/rx-$q/rps_cpus";
+ open( my $f, '>', $rps_cpus )
+ or die "Can't open: $rps_cpus : $!\n";
+ printf {$f} "%x\n", $mask;
+ close $f;
+}
+
+# For multi-queue NIC choose next cpu to be on next core
+# FIXME assumes all cpu's online
+sub next_cpu {
+ my $cpu = shift;
+ my $threads = $cpus / $cores; # threads per core
+
+ $cpu += $threads;
+ if ( $cpu >= $cpus ) {
+ $cpu = ($cpu + 1) % $threads; # next thread on core 0
+ }
+ return $cpu;
+}
+
+# First cpu to assign for the queues
+sub first_cpu {
+ my ($ifname, $numq) = @_;
+
+ # For multi-queue nic's always starts with 0
+ # This is less than ideal when there are more core's available
+ # than number of queues (probably should barber pole);
+ # but the Intel IXGBE needs CPU 0 <-> queue 0 because of flow director
+ return 0 if ($numq > 1);
+
+ # For single-queue nic choose IRQ based on name
+ # Ideally should make decision on least loaded CPU
+ my ($ifunit) = ($ifname =~ m/^[a-z]*(\d+)$/);
+ die "can't find number for $ifname\n"
+ unless defined($ifunit);
+
+ return ( $ifunit * ($cpus / $cores) ) % $cpus;
+}
+
+# Assignment for multi-queue NICs
+# Assign each queue to successive cores
+sub assign_multiqueue {
+ my ( $ifname, $numq, $irqmap, $irqfmt ) = @_;
+ my $cpu = 0;
+
+ for ( my $q = first_cpu($ifname, $numq) ; $q < $numq ; $q++ ) {
+ # handles multiple irq's per interface (tx/rx)
+ foreach my $fmt (@$irqfmt) {
+ my $name = sprintf( $fmt, $ifname, $q );
+ my $irq = $irqmap->{$name};
+
+ syslog(LOG_INFO, "%s: queue %d assign %s to cpu %d",
+ $ifname, $q, $name, $cpu );
+
+ # Assign CPU affinity for both IRQs
+ set_affinity( $ifname, $irq, 1 << $cpu );
+
+ # TODO use RPS to steer data if cores > queues?
+ }
+ $cpu = next_cpu($cpu);
+ }
+}
+
+# Affinity assignment function for single-queue NICs. The strategy
+# here is to just spread the interrupts of different NICs evenly
+# across all CPUs. That is the best we can do without monitoring the
+# load and traffic patterns. So we just directly map the NIC unit
+# number into a CPU number.
+sub assign_single {
+ my ( $ifname, $irq ) = @_;
+ my $cpu = first_cpu($ifname, 1);
+
+ syslog( LOG_INFO, "%s: assign irq %d to cpu %d", $ifname, $irq, $cpu );
+
+ set_affinity( $ifname, $irq, 1 << $cpu );
+
+ my $threads = $cpus / $cores;
+ if ($threads > 1) {
+ # Use both threads on this cpu if hyperthreading
+ my $mask = ((1 << $threads) - 1) << $cpu;
+ set_rps($ifname, 0, $mask);
+ }
+ # MAYBE - Use all cpu's if no HT
+}
+
+# find irq number used for given interface using sysfs
+sub get_irq {
+ my $ifname = shift;
+
+ open( my $irqf, '<', "/sys/class/net/$ifname/device/irq" )
+ or warn "$ifname: can't find irq : $!\n";
+ my $irq = <$irqf>;
+ chomp $irq;
+ close $irqf;
+
+ return $irq;
+}
+
+# Mask must contain at least one CPU and
+# no bits outside of range of available CPU's
+sub check_mask {
+ my ($ifname, $name, $mask) = @_;
+ my $m = hex($mask);
+
+ die "$ifname: $name mask $mask has no bits set\n"
+ if ($m == 0);
+
+ die "$ifname: $name mask $mask to large for number of CPU's: $cpus\n"
+ if ($m >= 1 << $cpus);
+}
+
+# Set affinity (and RPS) based on mask
+sub affinity_mask {
+ my ($ifname, $mask) = @_;
+
+ # match on <hex> or <hex>,<hex>
+ unless ($mask =~ /^([0-9a-f]+)(|,([0-9a-f]+))$/) {
+ die "$ifname: irq mask $mask is not a valid affinity mask\n"
+ }
+
+ my $irq = $1;
+ my $rps = $3;
+
+ check_mask($ifname, "irq", $irq);
+ check_mask($ifname, "rps", $rps) if $rps;
+
+ set_affinity($ifname, get_irq($ifname), hex($irq));
+ set_rps($ifname, 0, hex($rps)) if $rps;
+}
+
+# The auto strategy involves trying to achieve the following goals:
+#
+# - Spread the receive load among as many CPUs as possible.
+#
+# - For all multi-queue NICs in the system that provide both tx and
+# rx queues, keep all of the queues that share the same queue
+# number on same CPUs. I.e. tx and rx queue 0 of all such NICs
+# should interrupt one CPU; tx and rx queue 1 should interrupt a
+# different CPU, etc.
+#
+# - If hyperthreading is supported and enabled, avoid assigning
+# queues to both CPUs of a hyperthreaded pair if there are enough
+# CPUs available to do that.
+sub affinity_auto {
+ my $ifname = shift;
+ my $irqmap = irqinfo();
+ my @irqnames = keys %{$irqmap};
+
+ # Figure out what style of irq naming is being used
+ my $numirq = grep { /$ifname/ } @irqnames;
+ if ( $numirq > 1 ) {
+ my $nq = grep { /$ifname-rx-/ } @irqnames;
+
+ if ( $nq > 0 ) {
+ my $ntx = grep { /$ifname-tx-/ } @irqnames;
+ die "$ifname: rx queues $nq != tx queues $ntx"
+ unless ( $nq == $ntx );
+
+ return assign_multiqueue( $ifname, $nq, $irqmap,
+ [ '%s-rx-%d', '%s-tx-%d' ] );
+ }
+
+ $nq = grep { /$ifname-TxRx-/ } @irqnames;
+ if ( $nq > 0 ) {
+ return assign_multiqueue( $ifname, $nq, $irqmap, ['%s-TxRx-%d'] );
+ }
+
+ $nq = grep { /$ifname-\d$/ } @irqnames;
+ if ( $nq > 0 ) {
+ return assign_multiqueue( $ifname, $nq, $irqmap, ['%s-%d'] );
+ }
+
+ die "Unknown multiqueue device naming for $ifname\n";
+ }
+
+ assign_single( $ifname, get_irq($ifname) );
+}