summaryrefslogtreecommitdiff
path: root/scripts/system/irq-affinity.pl
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/system/irq-affinity.pl')
-rwxr-xr-xscripts/system/irq-affinity.pl289
1 files changed, 289 insertions, 0 deletions
diff --git a/scripts/system/irq-affinity.pl b/scripts/system/irq-affinity.pl
new file mode 100755
index 00000000..eaad95cc
--- /dev/null
+++ b/scripts/system/irq-affinity.pl
@@ -0,0 +1,289 @@
+#!/usr/bin/perl
+
+# **** License ****
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# This code was originally developed by Vyatta, Inc.
+# Portions created by Vyatta are Copyright (C) 2009,2010 Vyatta, Inc.
+# All Rights Reserved.
+#
+# **** End License ****
+#
+use warnings;
+use strict;
+use Sys::Syslog qw(:standard :macros);
+
+die "Usage: $0 ifname {auto | mask}\n" if ($#ARGV < 1);
+
+my ($ifname, $mask) = @ARGV;
+
+die "Error: Interface $ifname does not exist\n"
+ unless -d "/sys/class/net/$ifname";
+
+openlog("irq-affinity","",LOG_LOCAL0);
+
+my ( $cpus, $cores ) = cpuinfo();
+
+if ($mask eq 'auto') {
+ affinity_auto($ifname);
+} else {
+ affinity_mask($ifname, $mask);
+}
+
+exit 0;
+
+# Get current irq assignments by reading /proc/interrupts
+sub irqinfo {
+ my $irqmap;
+
+ open( my $f, '<', "/proc/interrupts" )
+ or die "Can't read /proc/interrupts";
+
+ while (<$f>) {
+ chomp;
+ my @cols = split;
+
+ # First column is IRQ number (and colon)
+ next unless /^\s*(\d+):\s/;
+ my $irq = $1;
+
+ # Skip columns for IRQ's per CPU
+ foreach my $name ( @cols[ $cpus+1 .. $#cols ] ) {
+ $name =~ s/,$//;
+ $irqmap->{$name} = $irq;
+ }
+ }
+ close $f;
+
+ return $irqmap;
+}
+
+# Determine number of cpus and cores
+sub cpuinfo {
+ my ( $cpu, $core );
+
+ open( my $f, '<', "/proc/cpuinfo" )
+ or die "Can't read /proc/cpuinfo";
+
+ while (<$f>) {
+ chomp;
+ if (/^cpu cores\s+:\s(\d+)$/) {
+ $core = $1;
+ }
+ elsif (/^processor\s+:\s+(\d)$/) {
+ $cpu = $1;
+ }
+ }
+ close $f;
+
+ return ( $cpu + 1, $core );
+}
+
+# Set affinity value for a irq
+sub set_affinity {
+ my ( $ifname, $irq, $mask ) = @_;
+ my $smp_affinity = "/proc/irq/$irq/smp_affinity";
+
+ syslog(LOG_INFO, "%s: irq %d affinity set to 0x%x", $ifname, $irq, $mask);
+
+ open( my $f, '>', $smp_affinity )
+ or die "Can't open: $smp_affinity : $!\n";
+ printf {$f} "%x\n", $mask;
+ close $f;
+}
+
+# set Receive Packet Steering mask
+sub set_rps {
+ my ( $ifname, $q, $mask ) = @_;
+
+ # ignore if older kernel without RPS
+ my $rxq = "/sys/class/net/$ifname/queues";
+ return unless ( -d $rxq );
+
+ syslog(LOG_INFO, "%s: receive queue %d cpus set to 0x%x",
+ $ifname, $q, $mask);
+
+ my $rps_cpus = "$rxq/rx-$q/rps_cpus";
+ open( my $f, '>', $rps_cpus )
+ or die "Can't open: $rps_cpus : $!\n";
+ printf {$f} "%x\n", $mask;
+ close $f;
+}
+
+# For multi-queue NIC choose next cpu to be on next core
+# FIXME assumes all cpu's online
+sub next_cpu {
+ my $cpu = shift;
+ my $threads = $cpus / $cores; # threads per core
+
+ $cpu += $threads;
+ if ( $cpu >= $cpus ) {
+ $cpu = ($cpu + 1) % $threads; # next thread on core 0
+ }
+ return $cpu;
+}
+
+# First cpu to assign for the queues
+sub first_cpu {
+ my ($ifname, $numq) = @_;
+
+ # For multi-queue nic's always starts with 0
+ # This is less than ideal when there are more core's available
+ # than number of queues (probably should barber pole);
+ # but the Intel IXGBE needs CPU 0 <-> queue 0 because of flow director
+ return 0 if ($numq > 1);
+
+ # For single-queue nic choose IRQ based on name
+ # Ideally should make decision on least loaded CPU
+ my ($ifunit) = ($ifname =~ m/^[a-z]*(\d+)$/);
+ die "can't find number for $ifname\n"
+ unless defined($ifunit);
+
+ return ( $ifunit * ($cpus / $cores) ) % $cpus;
+}
+
+# Assignment for multi-queue NICs
+# Assign each queue to successive cores
+sub assign_multiqueue {
+ my ( $ifname, $numq, $irqmap, $irqfmt ) = @_;
+ my $cpu = 0;
+
+ for ( my $q = first_cpu($ifname, $numq) ; $q < $numq ; $q++ ) {
+ # handles multiple irq's per interface (tx/rx)
+ foreach my $fmt (@$irqfmt) {
+ my $name = sprintf( $fmt, $ifname, $q );
+ my $irq = $irqmap->{$name};
+
+ syslog(LOG_INFO, "%s: queue %d assign %s to cpu %d",
+ $ifname, $q, $name, $cpu );
+
+ # Assign CPU affinity for both IRQs
+ set_affinity( $ifname, $irq, 1 << $cpu );
+
+ # TODO use RPS to steer data if cores > queues?
+ }
+ $cpu = next_cpu($cpu);
+ }
+}
+
+# Affinity assignment function for single-queue NICs. The strategy
+# here is to just spread the interrupts of different NICs evenly
+# across all CPUs. That is the best we can do without monitoring the
+# load and traffic patterns. So we just directly map the NIC unit
+# number into a CPU number.
+sub assign_single {
+ my ( $ifname, $irq ) = @_;
+ my $cpu = first_cpu($ifname, 1);
+
+ syslog( LOG_INFO, "%s: assign irq %d to cpu %d", $ifname, $irq, $cpu );
+
+ set_affinity( $ifname, $irq, 1 << $cpu );
+
+ my $threads = $cpus / $cores;
+ if ($threads > 1) {
+ # Use both threads on this cpu if hyperthreading
+ my $mask = ((1 << $threads) - 1) << $cpu;
+ set_rps($ifname, 0, $mask);
+ }
+ # MAYBE - Use all cpu's if no HT
+}
+
+# find irq number used for given interface using sysfs
+sub get_irq {
+ my $ifname = shift;
+
+ open( my $irqf, '<', "/sys/class/net/$ifname/device/irq" )
+ or warn "$ifname: can't find irq : $!\n";
+ my $irq = <$irqf>;
+ chomp $irq;
+ close $irqf;
+
+ return $irq;
+}
+
+# Mask must contain at least one CPU and
+# no bits outside of range of available CPU's
+sub check_mask {
+ my ($ifname, $name, $mask) = @_;
+ my $m = hex($mask);
+
+ die "$ifname: $name mask $mask has no bits set\n"
+ if ($m == 0);
+
+ die "$ifname: $name mask $mask to large for number of CPU's: $cpus\n"
+ if ($m >= 1 << $cpus);
+}
+
+# Set affinity (and RPS) based on mask
+sub affinity_mask {
+ my ($ifname, $mask) = @_;
+
+ # match on <hex> or <hex>,<hex>
+ unless ($mask =~ /^([0-9a-f]+)(|,([0-9a-f]+))$/) {
+ die "$ifname: irq mask $mask is not a valid affinity mask\n"
+ }
+
+ my $irq = $1;
+ my $rps = $3;
+
+ check_mask($ifname, "irq", $irq);
+ check_mask($ifname, "rps", $rps) if $rps;
+
+ set_affinity($ifname, get_irq($ifname), hex($irq));
+ set_rps($ifname, 0, hex($rps)) if $rps;
+}
+
+# The auto strategy involves trying to achieve the following goals:
+#
+# - Spread the receive load among as many CPUs as possible.
+#
+# - For all multi-queue NICs in the system that provide both tx and
+# rx queues, keep all of the queues that share the same queue
+# number on same CPUs. I.e. tx and rx queue 0 of all such NICs
+# should interrupt one CPU; tx and rx queue 1 should interrupt a
+# different CPU, etc.
+#
+# - If hyperthreading is supported and enabled, avoid assigning
+# queues to both CPUs of a hyperthreaded pair if there are enough
+# CPUs available to do that.
+sub affinity_auto {
+ my $ifname = shift;
+ my $irqmap = irqinfo();
+ my @irqnames = keys %{$irqmap};
+
+ # Figure out what style of irq naming is being used
+ my $numirq = grep { /$ifname/ } @irqnames;
+ if ( $numirq > 1 ) {
+ my $nq = grep { /$ifname-rx-/ } @irqnames;
+
+ if ( $nq > 0 ) {
+ my $ntx = grep { /$ifname-tx-/ } @irqnames;
+ die "$ifname: rx queues $nq != tx queues $ntx"
+ unless ( $nq == $ntx );
+
+ return assign_multiqueue( $ifname, $nq, $irqmap,
+ [ '%s-rx-%d', '%s-tx-%d' ] );
+ }
+
+ $nq = grep { /$ifname-TxRx-/ } @irqnames;
+ if ( $nq > 0 ) {
+ return assign_multiqueue( $ifname, $nq, $irqmap, ['%s-TxRx-%d'] );
+ }
+
+ $nq = grep { /$ifname-\d$/ } @irqnames;
+ if ( $nq > 0 ) {
+ return assign_multiqueue( $ifname, $nq, $irqmap, ['%s-%d'] );
+ }
+
+ die "Unknown multiqueue device naming for $ifname\n";
+ }
+
+ assign_single( $ifname, get_irq($ifname) );
+}