summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Bryars <mark@darkskiez.co.uk>2012-05-04 22:19:13 +0100
committerMark Bryars <mark@darkskiez.co.uk>2012-05-04 22:19:13 +0100
commite756c7948078bd5109c5b8a0f252851efc4532d6 (patch)
tree39c4c6d660d7c377989e1adc1492ec198cdaa084
downloadvyos-opennhrp-e756c7948078bd5109c5b8a0f252851efc4532d6.tar.gz
vyos-opennhrp-e756c7948078bd5109c5b8a0f252851efc4532d6.zip
Imported Upstream version 0.13
-rw-r--r--.gitignore6
-rw-r--r--AUTHORS7
-rw-r--r--Make.rules289
-rw-r--r--Makefile34
-rw-r--r--NEWS289
-rw-r--r--README112
-rw-r--r--TODO27
-rw-r--r--contrib/init-scripts/debian/opennhrp.init160
-rw-r--r--doc/draft-ietf-ion-r2r-nhrp-03.txt837
-rw-r--r--doc/rfc2332.txt2915
-rw-r--r--etc/Makefile5
-rwxr-xr-xetc/opennhrp-script38
-rwxr-xr-xetc/opennhrp-script.cert71
-rw-r--r--etc/opennhrp.conf9
-rwxr-xr-xetc/racoon-ph1dead.sh3
-rwxr-xr-xetc/racoon-ph1down.sh6
-rw-r--r--libev/LICENSE36
-rw-r--r--libev/README58
-rw-r--r--libev/VERSION1
-rw-r--r--libev/ev.c3694
-rw-r--r--libev/ev.h705
-rw-r--r--libev/ev_epoll.c228
-rw-r--r--libev/ev_kqueue.c196
-rw-r--r--libev/ev_poll.c144
-rw-r--r--libev/ev_port.c165
-rw-r--r--libev/ev_select.c308
-rw-r--r--libev/ev_vars.h187
-rw-r--r--libev/ev_wrap.h178
-rw-r--r--man/Makefile7
-rw-r--r--man/opennhrp-script.8146
-rw-r--r--man/opennhrp.8119
-rw-r--r--man/opennhrp.conf.5227
-rw-r--r--man/opennhrpctl.8124
-rw-r--r--nhrp/Makefile27
-rw-r--r--nhrp/admin.c609
-rw-r--r--nhrp/afnum.h29
-rw-r--r--nhrp/libev.c3
-rw-r--r--nhrp/libev.h22
-rw-r--r--nhrp/list.h184
-rw-r--r--nhrp/nhrp_address.c454
-rw-r--r--nhrp/nhrp_address.h80
-rw-r--r--nhrp/nhrp_common.h78
-rw-r--r--nhrp/nhrp_defines.h87
-rw-r--r--nhrp/nhrp_interface.c188
-rw-r--r--nhrp/nhrp_interface.h78
-rw-r--r--nhrp/nhrp_packet.c1331
-rw-r--r--nhrp/nhrp_packet.h128
-rw-r--r--nhrp/nhrp_peer.c2106
-rw-r--r--nhrp/nhrp_peer.h194
-rw-r--r--nhrp/nhrp_protocol.h130
-rw-r--r--nhrp/nhrp_server.c566
-rw-r--r--nhrp/opennhrp.c524
-rw-r--r--nhrp/opennhrpctl.c121
-rw-r--r--nhrp/sysdep_netlink.c1159
-rw-r--r--nhrp/sysdep_pfpacket.c388
-rw-r--r--nhrp/sysdep_syslog.c55
-rw-r--r--patches/ipsec-tools-0.7.diff1832
-rw-r--r--patches/linux-2.6.19-ipgre.diff44
-rw-r--r--patches/linux-2.6.20-ipgre.diff44
-rw-r--r--patches/linux-2.6.22-ipgre.diff53
60 files changed, 21845 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..74f6e20
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+opennhrp
+opennhrpctl
+*.o
+*.d
+*.cmd
+*~
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..b0122ba
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,7 @@
+Author:
+Timo Teräs <timo.teras@iki.fi>
+
+Patches from:
+Steffen Schmidt
+Natanael Copa
+Halil Goektepe (Deutsche Telekom DTAG Laboratories)
diff --git a/Make.rules b/Make.rules
new file mode 100644
index 0000000..5c30966
--- /dev/null
+++ b/Make.rules
@@ -0,0 +1,289 @@
+##
+# A set of makefile rules loosely based on kbuild.
+
+all: compile
+
+ifndef build
+
+toplevelrun:=yes
+
+##
+# Disable default rules and make output pretty.
+
+MAKEFLAGS += -rR --no-print-directory
+
+Makefile: ;
+
+ifdef V
+ ifeq ("$(origin V)", "command line")
+ VERBOSE = $(V)
+ endif
+endif
+ifndef VERBOSE
+ VERBOSE = 0
+endif
+
+ifeq ($(VERBOSE),1)
+ quiet =
+ Q =
+else
+ quiet=quiet_
+ Q = @
+endif
+
+ifneq ($(findstring s,$(MAKEFLAGS)),)
+ quiet=silent_
+endif
+
+export quiet Q VERBOSE
+
+##
+# Recursion helpers.
+srctree := $(CURDIR)
+objtree := $(CURDIR)
+
+export srctree objtree
+
+##
+# Consult SCM for better version string.
+
+TAGPREFIX ?= v
+
+GIT_REV := $(shell test -d .git && git describe || echo exported)
+ifneq ($(GIT_REV), exported)
+FULL_VERSION := $(patsubst $(TAGPREFIX)%,%,$(GIT_REV))
+else
+FULL_VERSION := $(VERSION)
+endif
+
+RCS_FIND_IGNORE := \( -name SCCS -o -name BitKeeper -o -name .svn -o -name CVS -o -name .pc -o -name .hg -o -name .git \) -prune -o
+
+export FULL_VERSION RCS_FIND_IGNORE
+
+##
+# Utilities and default flags for them.
+
+CROSS_COMPILE ?=
+CC := $(CROSS_COMPILE)gcc
+LD := $(CROSS_COMPILE)ld
+INSTALL := install
+INSTALLDIR := $(INSTALL) -d
+
+CFLAGS ?= -g -O2
+CFLAGS_ALL := -Wall -Wstrict-prototypes -D_GNU_SOURCE -std=gnu99
+CFLAGS_ALL += $(CFLAGS)
+
+LDFLAGS ?= -g
+LDFLAGS_ALL += $(LDFLAGS)
+
+export CC LD INSTALL INSTALLDIR CFLAGS_ALL LDFLAGS_ALL
+
+build :=
+
+endif
+
+##
+# Reset all variables.
+ifneq ($(origin targets),file)
+targets :=
+endif
+
+src :=
+obj :=
+
+src += $(build)
+obj := $(build)
+
+##
+# Include directory specific stuff
+
+ifneq ($(build),)
+$(build)/Makefile: ;
+include $(build)/Makefile
+endif
+
+##
+# Rules and helpers
+
+PHONY += all compile install clean FORCE
+
+# Convinient variables
+comma := ,
+squote := '
+empty :=
+space := $(empty) $(empty)
+
+# The temporary file to save gcc -MD generated dependencies must not
+# contain a comma
+depfile = $(subst $(comma),_,$(@D)/.$(@F).d)
+
+build-dir = $(patsubst %/,%,$(dir $@))
+target-dir = $(dir $@)
+
+##
+# Build rules
+
+ifneq ($(NOCMDDEP),1)
+# Check if both arguments has same arguments. Result in empty string if equal
+# User may override this check using make NOCMDDEP=1
+# Check if both arguments has same arguments. Result is empty string if equal.
+# User may override this check using make KBUILD_NOCMDDEP=1
+arg-check = $(strip $(filter-out $(cmd_$(1)), $(cmd_$@)) \
+ $(filter-out $(cmd_$@), $(cmd_$(1))) )
+endif
+
+# echo command.
+# Short version is used, if $(quiet) equals `quiet_', otherwise full one.
+echo-cmd = $(if $($(quiet)cmd_$(1)),\
+ echo ' $(call escsq,$($(quiet)cmd_$(1)))$(echo-why)';)
+
+make-cmd = $(subst \#,\\\#,$(subst $$,$$$$,$(call escsq,$(cmd_$(1)))))
+
+# printing commands
+cmd = @$(echo-cmd) $(cmd_$(1))
+
+# Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o
+dot-target = $(dir $@).$(notdir $@)
+
+# The temporary file to save gcc -MD generated dependencies must not
+# contain a comma
+depfile = $(subst $(comma),_,$(dot-target).d)
+
+# Escape single quote for use in echo statements
+escsq = $(subst $(squote),'\$(squote)',$1)
+
+# Find any prerequisites that is newer than target or that does not exist.
+# PHONY targets skipped in both cases.
+local-target-prereqs = %
+any-prereq = $(filter $(local-target-prereqs), $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^), $^))
+
+# Execute command if command has changed or prerequisite(s) are updated.
+#
+if_changed = $(if $(strip $(any-prereq) $(arg-check)), \
+ @set -e; \
+ $(echo-cmd) $(cmd_$(1)); \
+ echo 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd)
+
+# Usage: $(call if_changed_rule,foo)
+# Will check if $(cmd_foo) or any of the prerequisites changed,
+# and if so will execute $(rule_foo).
+if_changed_rule = $(if $(strip $(any-prereq) $(arg-check) ), \
+ @set -e; \
+ $(rule_$(1)))
+
+#####
+# Handle options to gcc.
+
+c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS_ALL) $(CFLAGS_EXTRA) \
+ $(CFLAGS_$(notdir $@))
+ld_flags = $(LDFLAGS_ALL) $(LDFLAGS_EXTRA) $(LDFLAGS_$(notdir $@))
+
+#####
+# Compile c-files.
+quiet_cmd_cc_o_c = CC $@
+
+cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $<
+
+define rule_cc_o_c
+ $(call echo-cmd,cc_o_c) $(cmd_cc_o_c); \
+ (echo 'cmd_$@ := $(call make-cmd,cc_o_c)'; echo; cat $(depfile)) \
+ > $(dot-target).cmd ; \
+ rm $(depfile)
+endef
+
+$(obj)/%.o: override local-target-prereqs=%
+
+$(obj)/%.o: $(src)/%.c FORCE
+ $(call if_changed_rule,cc_o_c)
+
+#####
+# Link programs
+
+# Link an executable based on list of .o files, all plain c
+# host-cmulti -> executable
+__progs := $(addprefix $(obj)/,$(sort $(progs-y)))
+cobjs := $(addprefix $(obj)/,$(sort $(foreach m,$(progs-y),$($(m)-objs))))
+
+quiet_cmd_ld = LD $@
+ cmd_ld = $(CC) $(ld_flags) -o $@ \
+ $(addprefix $(obj)/,$($(@F)-objs)) \
+ $(LIBS) $(LIBS_$(@F))
+
+$(__progs): override local-target-prereqs=$(addprefix $(obj)/,$($(*F)-objs))
+
+$(__progs): $(obj)/%: $(cobjs) FORCE
+ $(call if_changed,ld)
+
+targets += $(__progs) $(cobjs)
+
+###
+# why - tell why a a target got build
+ifeq ($(VERBOSE),2)
+why = \
+ $(if $(filter $@, $(PHONY)),- due to target is PHONY, \
+ $(if $(wildcard $@), \
+ $(if $(strip $(any-prereq)),- due to: $(any-prereq), \
+ $(if $(arg-check), \
+ $(if $(cmd_$@),- due to command line change: $(arg-check), \
+ $(if $(filter $@, $(targets)), \
+ - due to missing .cmd file, \
+ - due to $(notdir $@) not in $$(targets) \
+ ) \
+ ) \
+ ) \
+ ), \
+ - due to target missing \
+ ) \
+ )
+
+echo-why = $(call escsq, $(strip $(why)))
+endif
+
+##
+# Top level rules.
+
+%/: FORCE
+ $(Q)$(MAKE) -f Make.rules build=$(build-dir) $(MAKECMDGOALS)
+
+compile: $(targets)
+ @:
+
+install: $(targets) FORCE
+
+clean: $(filter %/,$(targets))
+ifeq ($(toplevelrun),yes)
+ $(Q)find . $(RCS_FIND_IGNORE) \
+ \( -name '*.[oas]' -o -name '.*.cmd' -o -name '.*.d' \) \
+ -type f -print | xargs rm -f
+endif
+ $(Q)rm -rf $(addprefix $(obj)/,$(sort $(progs-y) $(progs-n) $(progs-)))
+
+ifeq ($(origin VERSION),command line)
+DIST_VERSION=$(VERSION)
+else
+DIST_VERSION=$(FULL_VERSION)
+endif
+
+dist:
+ git archive --format tar --prefix=$(PACKAGE)-$(DIST_VERSION)/ \
+ $(TAGPREFIX)$(DIST_VERSION) \
+ | bzip2 -9 > $(PACKAGE)-$(DIST_VERSION).tar.bz2
+
+FORCE:
+
+# Read all saved command lines and dependencies for the $(targets) we
+# may be building above, using $(if_changed{,_dep}). As an
+# optimization, we don't need to read them if the target does not
+# exist, we will rebuild anyway in that case.
+
+targets := $(wildcard $(sort $(targets)))
+cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd))
+
+ifneq ($(cmd_files),)
+ include $(cmd_files)
+endif
+
+# Declare the contents of the .PHONY variable as phony. We keep that
+# information in a variable se we can use it in if_changed and friends.
+
+.PHONY: $(PHONY)
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..0c85d98
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,34 @@
+##
+# Building opennhrp
+
+PACKAGE := opennhrp
+VERSION := 0.13
+
+##
+# Default directories
+
+DESTDIR=
+SBINDIR=/usr/sbin
+CONFDIR=/etc/opennhrp
+MANDIR=/usr/share/man
+DOCDIR=/usr/share/doc/opennhrp
+STATEDIR=/var/run
+
+export DESTDIR SBINDIR CONFDIR MANDIR DOCDIR STATEDIR
+
+##
+# Top-level rules and targets
+
+targets := nhrp/ etc/ man/
+
+##
+# Include all rules and stuff
+
+include Make.rules
+
+##
+# Top-level targets
+
+install:
+ $(INSTALLDIR) $(DESTDIR)$(DOCDIR)
+ $(INSTALL) README $(DESTDIR)$(DOCDIR)
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..f386771
--- /dev/null
+++ b/NEWS
@@ -0,0 +1,289 @@
+=============================================================================
+OpenNHRP ChangeLog http://sourceforge.net/projects/opennhrp
+=============================================================================
+
+Detailed changelog is available via Git history via web:
+http://opennhrp.git.sf.net/git/gitweb.cgi?p=opennhrp;a=blob;f=NEWS;hb=HEAD
+
+-----------------------------------------------------------------------------
+ opennhrp 0.13 - released 25/Dec/2011
+-----------------------------------------------------------------------------
+ - feature: add admin "interfaces show" command to display information about
+ the interface cache
+ - feature: support GRE interface binding changes (update gre nbma address
+ properly, and purge peer cache) to support dual ISP setups with failover
+ - fix: send registration reply even when all bindings are rejected
+ - fix: fix really the holding-time to apply to shortcut-target
+ - fix: fix hop count handling
+ - fix: various memory leaks fixed
+ - fix: fix memory corruption in the hlist structure (would be visible when
+ opennhrp is acting as NHS with heavy traffic)
+
+-----------------------------------------------------------------------------
+ opennhrp 0.12.3 - released 19/Aug/2011
+-----------------------------------------------------------------------------
+ - feature: export reason why peer-down trigger was executed; and implement
+ 'lowerdown' opennhrpctl command for racoon hook to indicate that the
+ IPsec SA has died (opennhrp-script can then avoid the unneccessary and
+ possibly harmful call to racoonctl)
+ - fix: route NHRP queries always via NHS (because ipsec initial-contact
+ mechanism for purging dead IPsec SAs triggers after NHRP rediscovery,
+ and if remote peer was rebooted, the direct link might be dead)
+ - fix: don't negative cache entries on timeout (timeout is indication of
+ temporary error: none of NHS' is reachable)
+ - fix: don't reply to kernel's ARP queries using local route entries.
+ this also prevents bad shortcut-routes if the local GRE prefix is
+ a sub-prefix of routed subnet over the GRE
+
+-----------------------------------------------------------------------------
+ opennhrp 0.12.2 - released 07/Jul/2011
+-----------------------------------------------------------------------------
+ - fix: regression introduced in 0.12's policy routing changes that
+ shortcuts for in-NBMA network would not work unless using dynamic-map
+ NHS configuration (from David Ward)
+
+-----------------------------------------------------------------------------
+ opennhrp 0.12.1 - released 24/Mar/2011
+-----------------------------------------------------------------------------
+ - feature: export tunnel GRE key to opennhrp-script
+ - fix: build error against certain kernel versions and architectures
+ - fix: update registrations when 1/3 of the holding-time has passed as
+ per rfc recommendation
+ - fix: fix holding-time to apply properly to shortcut-target blocks
+
+-----------------------------------------------------------------------------
+ opennhrp 0.12 - released 01/Nov/2010
+-----------------------------------------------------------------------------
+ - feature: preliminary support for policy routing. cache kernel routes for
+ each gre device and use them for routing lookups. nhrp shortcut routes
+ should be in separate routing table. this allows nhrp message routing to
+ always happen using bgp/ospf routes (for shortcut refreshes) and fixes
+ shortcuts to converge with the main routing information.
+ - feature: shortcut-target config option for subnet specific holding-time
+ overrides and aggregation of local subnet to "summary shortcut"
+ - fix: delete shortcut-routes if their gateway is removed to force renewal
+ of the route (shortcut gateway can change due to bgp/ospf routing change)
+ - fix: actually remove dynamic-nhs from peers if it's A entry is removed
+ - fix: disallow duplicate cached entries with dynamic-nhs entries
+ - randomize retry timer and increase script timeouts
+ - improve logging a bit
+
+-----------------------------------------------------------------------------
+ opennhrp 0.11.5 - released 16/Mar/2010
+-----------------------------------------------------------------------------
+ - clear negative cached entries for peers which sends resolution request
+ - use several netlink sockets to receive notifications so we don't lose
+ sync on all of them
+ - fix shortcut renewals
+ - libev updated to version 3.9
+ - signal handling fixed
+
+-----------------------------------------------------------------------------
+ opennhrp 0.11.4 - released 04/Mar/2010
+-----------------------------------------------------------------------------
+ - multicast packet relay fix
+ - netlink buffer sizes increased
+
+-----------------------------------------------------------------------------
+ opennhrp 0.11.3 - released 30/Oct/2009
+-----------------------------------------------------------------------------
+ - handle dns lookup failures properly
+ - fix failover for shortcut routes
+ - detect forwarding loops for indications
+ - some code cleanups
+
+-----------------------------------------------------------------------------
+ opennhrp 0.11.2 - released 25/Sep/2009
+-----------------------------------------------------------------------------
+ - fixed libev usage bug that could cause crash on script timeout
+ - make lock file closed on exec so opennhrp-script instances won't keep
+ opennhrp daemon lock
+ - fixes traffic indications to work again (captured packet length was
+ not right)
+
+-----------------------------------------------------------------------------
+ opennhrp 0.11.1 - released 31/Aug/2009
+-----------------------------------------------------------------------------
+ - update libev version to 3.8
+ - more permssive build for warnings (libev generates some warnings)
+ - fix packet filter installation timer
+ - fix a false assert for peer deletion
+ - disable icmp redirect properly
+ - minor fixes to documentation and example script
+
+-----------------------------------------------------------------------------
+ opennhrp 0.11 - released 18/Jun/2009
+-----------------------------------------------------------------------------
+ - introduce 'dynamic-map' directive to autodetect all next hop servers
+ from a domain name with multiple A entries
+ - 'multicast' directive to better control softswitching of multicast
+ packets
+ - use libev instead of the self written event handling code
+ - enable Forward NHS extension for Traffic Indications to drop the message
+ after it has visited all NHS:es (otherwise it would loop between them
+ until ttl expires)
+ - performance optimizations to packet capturing, multicast packet process
+ switching, handling of registration requests and logging
+ - fix 64-bit compatibility issues
+ - some code documentation and clean ups
+
+-----------------------------------------------------------------------------
+ opennhrp 0.10.3 - released 04/May/2009
+-----------------------------------------------------------------------------
+ - fix handling of c-ares timeouts
+ - fix cancellation of asynchronous operations in peer cache
+ - fix control socket default location (broke on makefile rewrite)
+ - code clean up (rename reference counting functions)
+
+-----------------------------------------------------------------------------
+ opennhrp 0.10.2 - released 28/Apr/2009
+-----------------------------------------------------------------------------
+ - various safety measures in case of off-nbma routing loops
+ - fix a bug which caused static entries without 'register' to get deleted
+ - try to combine shortcut routes to get less nhrp cache entries
+
+-----------------------------------------------------------------------------
+ opennhrp 0.10.1 - released 22/Apr/2009
+-----------------------------------------------------------------------------
+ - fix the breakage in build system after the rewrite
+ - fix registration to servers when using domain names
+
+-----------------------------------------------------------------------------
+ opennhrp 0.10 - released 21/Apr/2009
+-----------------------------------------------------------------------------
+ - use c-ares library to make dns queries asynchronous
+ - fix mtu handling from registration requests
+ - avoid opennhrp-script zombie floods by reaping children between
+ processing registration request packets
+ - rewrite build system to something similar to kbuild
+ - migrate to git
+
+-----------------------------------------------------------------------------
+ opennhrp 0.9.3 - released 20/Feb/2009
+-----------------------------------------------------------------------------
+ - when public IP changes purge all related peer entries (opennhrp should
+ now survive and automatically re-register when dhcp enforces IP change)
+ - remove an assertation that was invalid (could cause opennhrp to abort
+ when acting as NHS in some situation)
+ - make monotonic clock work with old uclibc
+
+-----------------------------------------------------------------------------
+ opennhrp 0.9.2 - released 31/Dec/2008
+-----------------------------------------------------------------------------
+ - pid file locking change in 0.9.1 broke daemonization, make it work again
+
+-----------------------------------------------------------------------------
+ opennhrp 0.9.1 - released 31/Dec/2008
+-----------------------------------------------------------------------------
+ - fix a crash in peer cache enumeration
+ - update opennhrp-script to show how to add host route with mtu
+ - lock pid file as first thing (to prevent accidental startup when opennhrp
+ is already running)
+
+-----------------------------------------------------------------------------
+ opennhrp 0.9 - released 26/Dec/2008
+-----------------------------------------------------------------------------
+ - use monotonic system clock if available
+ - allow startup even if dns names are unresolveable
+ - make nhrp holding time configurable
+ - Cisco NHS specific feature: send cisco compatible purge if unique NBMA
+ mapping already exists (to re-register when NBMA address changes)
+ - additional opennhrp-script example with ipsec certificate checking
+ - some effort to make opennhrp compile on old system (in limited mode)
+ - detect NBMA MTU from interface and transmit it over NHRP and pass it to
+ opennhrp-script (to insert manual NBMA routes if path MTU discovery
+ does not work)
+
+-----------------------------------------------------------------------------
+ opennhrp 0.8 - released 03/Oct/2008
+-----------------------------------------------------------------------------
+ - licensing terms changed to GPL version 2 or later
+ - send purge request to shortcut subnets after registration
+ - clear redirection rate limiting cache for purge request addresses
+ - new admin commands: "redirect purge" and "schedule"
+ - rename admin commands: "flush", "purge" and "show" to have "cache" prefix
+ (accepts still old style commands for a while)
+ - make logging a bit less verbose
+ - minor fixes to renewals of peers and shortcut routes
+ - fix a memory leak
+
+-----------------------------------------------------------------------------
+ opennhrp 0.7.1 - released 18/Jun/2008
+-----------------------------------------------------------------------------
+ - use only primary interface addresses as nbma source address
+ - fix a access to freed memory in certain special cases of peer cache
+ enumeration
+ - fix a memory leak
+
+-----------------------------------------------------------------------------
+ opennhrp 0.7 - released 30/Apr/2008
+-----------------------------------------------------------------------------
+ - catch multicast packets and send them as multiple unicast packets
+ to all known peers
+ - new script events: interface-up (to clear neighbor and route caches
+ on startup) and peer-register (to e.g. validate peer protocol ip address
+ from the ipsec certificate)
+ - parse nat-oa for cached entries
+ - routing regression fixes (don't try to resolve unreachable statically
+ mapped peers)
+ - fix deletion of multiple cache entries from enumeration code
+ (crashed in some rare circumstances)
+ - check for IFA_LOCAL attribute presence before using it (fixes a crash)
+ - fix bug which caused negative cache entries to prevent registration
+ of the protocol address
+ - code cleanups and some optimizations
+
+-----------------------------------------------------------------------------
+ opennhrp 0.6.2 - released 04/Apr/2008
+-----------------------------------------------------------------------------
+ - accept shortcuts when a route to shortcut-destination interface exists
+ (in addition to local addresses in that interface)
+ - handle netlink link, address and route deleted notifications properly
+ - print error if opennhrp-script fails for some reason
+ - change peer flags: 'lower-up' means opennhrps-script was ran succesfully,
+ 'up' means registration has been also done (if it was required)
+ - fix matching of local-nbma selector when gre interface has no remote
+ address and is not explicitely bound to other interface
+ - fix admin interface to give 'Affected-Entries' result correctly
+ - fix config file reading bug; handle last keyword even if there is no
+ final new line
+ - code cleanups and optimizations
+
+-----------------------------------------------------------------------------
+ opennhrp 0.6.1 - released 20/Mar/2008
+-----------------------------------------------------------------------------
+ - fix a crash in error path of packet forwarding
+ - fix routing of locally generated traffic indications
+
+-----------------------------------------------------------------------------
+ opennhrp 0.6 - released 19/Mar/2008
+-----------------------------------------------------------------------------
+ - accept hostname (domain name) as an NBMA address in config file
+ - sanitize admin interface: accept cache entry selectors on
+ flush, purge and show commands; slight changes to unix socket protocol
+ - multiple gre interfaces do not share nhrp cache anymore
+ - opennhrp-script: NHRP_SRCADDR and NHRP_SRCNBMA added
+ - do not let opennhrp-script inherit sockets file descriptors
+ - run peer-down script when peer was purged via admin interface
+ - add option -V to show version
+ - add option -v to show debug log messages (to see nl-arp messages)
+ - performance improvements
+
+-----------------------------------------------------------------------------
+ opennhrp 0.5 - released 05/Mar/2008
+-----------------------------------------------------------------------------
+ - opennhrpctl command line tool
+ - list nhrp cache
+ - purge entries by protocol or nbma address
+ - flush entries
+ - daemon mode
+ - allow comments in configuration file
+ - various bug fixes
+ - flush neighbor cache when interface is found
+ - do not create proxy arp entries when static mapping exists
+
+-----------------------------------------------------------------------------
+ opennhrp 0.4 - released 04/Jan/2008
+-----------------------------------------------------------------------------
+ - first announced release
+
diff --git a/README b/README
new file mode 100644
index 0000000..0c8673f
--- /dev/null
+++ b/README
@@ -0,0 +1,112 @@
+OpenNHRP Release Notes
+======================
+
+OpenNHRP is an NHRP implementation for Linux. It has most of the RFC2332
+and Cisco IOS extensions.
+
+Project homepage: http://sourceforge.net/projects/opennhrp
+
+Git repository: git://opennhrp.git.sourceforge.net/gitroot/opennhrp
+
+ KERNEL REQUIREMENTS
+
+You need a kernel with ip_gre patched to support sending and receiving
+using NBMA address.
+
+The support was originally added to 2.6.24-rc2, but it contains a bug
+that prevents NAT detection. The latest fix is present in 2.6.24-rc7.
+
+Gentoo kernels: gentoo-sources-2.6.23-r1 and gentoo-sources-2.6.22-r10
+have the partitial support too (no NAT there either).
+
+For the brave who compile their own kernels, there are patches against
+vanilla 2.6.20 and 2.6.22 kernels in the patches directory. Or just
+upgrade to 2.6.24 or later and no patching is required. Though, there
+has been a major performance fixes in newer kernels, so 2.6.35 or later
+is strongly recommended.
+
+Also remember to turn on CONFIG_ARPD and CONFIG_NET_IPGRE in your kernel
+configuration.
+
+ SYSTEM REQUIREMENTS
+
+To compile OpenNHRP you need:
+- GNU make (3.81 or later works)
+- GCC
+- pkg-config
+- c-ares library (Ubuntu package: libc-ares-dev)
+
+ COMPILING
+
+Just type 'make' and 'make install'.
+
+ CONFIGURATION
+
+OpenNHRP currently supports only IPv4 over IPv4 using NBMA GRE tunnels.
+To create NBMA GRE tunnel you might use following:
+
+ ip tunnel add gre1 mode gre key 1234 ttl 64
+ ip addr add 10.255.255.2/24 dev gre1
+ ip link set gre1 up
+
+This should work with the configuration example in opennhrp.conf(5).
+
+ IPSEC ENCRYPTION OF GRE PACKETS
+
+ipsec-tools 0.8.0 or later is recommended. Earlier versions need patching
+for dmvpn to work properly.
+
+The ipsec-tools configuration I prefer to use is: encrypt all GRE
+traffic in transport mode. IPsec policy for that should be defined in
+/etc/ipsec.conf:
+ spdflush;
+ spdadd 0.0.0.0/0 0.0.0.0/0 gre -P out ipsec esp/transport//require;
+ spdadd 0.0.0.0/0 0.0.0.0/0 gre -P in ipsec esp/transport//require;
+
+And ipsec-tools configuration with pre-shared key could look something
+like this:
+
+/etc/racoon/racoon.conf:
+ path pre_shared_key "/etc/racoon/psk.txt";
+ remote anonymous {
+ exchange_mode aggressive;
+ lifetime time 24 hour;
+ my_identifier user_fqdn "my-user-name@my-domain.example";
+ nat_traversal on;
+ # For ipsec-tools snapshot 2010-10-10 or later
+ script "/etc/opennhrp/racoon-ph1dead.sh" phase1_dead;
+ # For earlier ipsec-tools
+ # script "/etc/opennhrp/racoon-ph1down.sh" phase1_down;
+ proposal {
+ encryption_algorithm 3des;
+ hash_algorithm sha1;
+ authentication_method pre_shared_key;
+ dh_group 2;
+ }
+ }
+ sainfo anonymous {
+ pfs_group 2;
+ lifetime time 12 hour;
+ encryption_algorithm 3des, blowfish 448, rijndael;
+ authentication_algorithm hmac_sha1, hmac_md5;
+ compression_algorithm deflate;
+ }
+
+And /etc/racoon/psk.txt:
+ my-user-name@my-domain.example "my-secret-pre-shared-key"
+
+It is of course more secure to use certificates for authentication.
+And using aggressive main mode is not recommended either, but it is
+required to make FQDN pre-shared authentication work. This setup is
+fast to do and can get you started with testing OpenNHRP.
+
+ DOCUMENTATION
+
+Most of the OpenNHRP documentation is in the manpages. Read them.
+
+Also some general NHRP documents can be found from Cisco website
+(www.cisco.com).
+
+ BUGS
+
+Use the SourceForge bug tracker or mailing list.
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..8b2a0ac
--- /dev/null
+++ b/TODO
@@ -0,0 +1,27 @@
+Open items that need work on OpenNHRP:
+
+- interface-up, nhs-up, nhs-down need to be serialized for quagga
+ management. alternatively, the script could return some special
+ value meaning "try again soon".
+
+- offload multicast packet forwarding to kernel
+
+- use mmapped pf_packet interface
+
+- nhrp_peer should be split to more files, it's relatively large now.
+ might split nhrp_peer to separate types.
+
+- Proper handling of unique bit. Currently registration of unique address
+ overwrites previous registration, but this against RFC.
+
+- Load balancing: return multiple CIE entries, when we have multiple
+ local IP addresses. When receiving multi CIE next-hop, balance traffic
+ or for shortcut routes, create a multi nexthop route.
+
+- Create some logic to detect if NBMA and public IPs are mixed up in
+ the "map" directive. Issue on warning about this.
+
+- Support reloading of configuration (via SIGHUP or "opennhrpctl reload")
+
+- Clean shutdown: send purge request to registration servers, dynamic
+ clients and possibly track resolution requests and purge those too.
diff --git a/contrib/init-scripts/debian/opennhrp.init b/contrib/init-scripts/debian/opennhrp.init
new file mode 100644
index 0000000..4a0fe94
--- /dev/null
+++ b/contrib/init-scripts/debian/opennhrp.init
@@ -0,0 +1,160 @@
+#! /bin/sh
+### BEGIN INIT INFO
+# Provides: opennhrp
+# Required-Start: $remote_fs
+# Required-Stop: $remote_fs
+# Default-Start: 2 3 4 5
+# Default-Stop: 0 1 6
+# Short-Description: RFC 2332 2333 daemon
+# Description: This file suports one instance of opennhrp
+### END INIT INFO
+
+# Author: Robin David Hammond <rhammond+nhrp@databit7.com>
+#
+# Do NOT "set -e"
+
+# PATH should only include /usr/* if it runs after the mountnfs.sh script
+PATH=/sbin:/usr/sbin:/bin:/usr/bin
+DESC="OpenNextHopResolutionProtocol"
+NAME=opennhrp
+DAEMON=/usr/sbin/$NAME
+PIDPATH=/var/run/$NAME
+PIDFILE=$PIDPATH/pid
+SCRIPTNAME=/etc/init.d/$NAME
+CTRLPATH=/var/run/$NAME/
+CTRLPIPE=$CTRLPATH/ctrl
+
+CONFFILE=/etc/opennhrp/opennhrp.conf
+SCRIPTFILE=/etc/opennhrp/opennhrp-script
+
+DAEMON_ARGS=" -d -a $CTRLPIPE -c $CONFFILE -s $SCRIPTFILE -p $PIDFILE"
+# -a /var/run/opennhrp/ctrl -c /etc/opennhrp/opennhrp.conf -s /etc/opennhrp/opennhrp-script -d -p /var/run/opennhrp/pid
+# Exit if the package is not installed
+[ -x "$DAEMON" ] || exit 0
+
+# Read configuration variable file if it is present
+[ -r /etc/default/$NAME ] && . /etc/default/$NAME
+
+# Load the VERBOSE setting and other rcS variables
+. /lib/init/vars.sh
+
+# Define LSB log_* functions.
+# Depend on lsb-base (>= 3.0-6) to ensure that this file is present.
+. /lib/lsb/init-functions
+
+#
+# Function that starts the daemon/service
+#
+do_start()
+{
+ mkdir -p $PIDPATH
+ mkdir -p $CTRLPATH
+ # Return
+ # 0 if daemon has been started
+ # 1 if daemon was already running
+ # 2 if daemon could not be started
+ start-stop-daemon --start --quiet --pidfile $PIDFILE --exec $DAEMON --test > /dev/null \
+ || return 1
+ start-stop-daemon --start --quiet --pidfile $PIDFILE --exec $DAEMON -- \
+ $DAEMON_ARGS \
+ || return 2
+ # Add code here, if necessary, that waits for the process to be ready
+ # to handle requests from services started subsequently which depend
+ # on this one. As a last resort, sleep for some time.
+}
+
+#
+# Function that stops the daemon/service
+#
+do_stop()
+{
+ # Return
+ # 0 if daemon has been stopped
+ # 1 if daemon was already stopped
+ # 2 if daemon could not be stopped
+ # other if a failure occurred
+ start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $PIDFILE --name $NAME
+ RETVAL="$?"
+ [ "$RETVAL" = 2 ] && return 2
+ # Wait for children to finish too if this is a daemon that forks
+ # and if the daemon is only ever run from this initscript.
+ # If the above conditions are not satisfied then add some other code
+ # that waits for the process to drop all resources that could be
+ # needed by services started subsequently. A last resort is to
+ # sleep for some time.
+ start-stop-daemon --stop --quiet --oknodo --retry=0/30/KILL/5 --exec $DAEMON
+ [ "$?" = 2 ] && return 2
+ # Many daemons don't delete their pidfiles when they exit.
+ rm -f $PIDFILE
+ return "$RETVAL"
+}
+
+#
+# Function that sends a SIGHUP to the daemon/service
+#
+do_reload() {
+ #
+ # If the daemon can reload its configuration without
+ # restarting (for example, when it is sent a SIGHUP),
+ # then implement that here.
+ #
+ start-stop-daemon --stop --signal 1 --quiet --pidfile $PIDFILE --name $NAME
+ return 0
+}
+
+case "$1" in
+ start)
+ [ "$VERBOSE" != no ] && log_daemon_msg "Starting $DESC" "$NAME"
+ do_start
+ case "$?" in
+ 0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;;
+ 2) [ "$VERBOSE" != no ] && log_end_msg 1 ;;
+ esac
+ ;;
+ stop)
+ [ "$VERBOSE" != no ] && log_daemon_msg "Stopping $DESC" "$NAME"
+ do_stop
+ case "$?" in
+ 0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;;
+ 2) [ "$VERBOSE" != no ] && log_end_msg 1 ;;
+ esac
+ ;;
+ #reload|force-reload)
+ #
+ # If do_reload() is not implemented then leave this commented out
+ # and leave 'force-reload' as an alias for 'restart'.
+ #
+ #log_daemon_msg "Reloading $DESC" "$NAME"
+ #do_reload
+ #log_end_msg $?
+ #;;
+ restart|force-reload)
+ #
+ # If the "reload" option is implemented then remove the
+ # 'force-reload' alias
+ #
+ log_daemon_msg "Restarting $DESC" "$NAME"
+ do_stop
+ case "$?" in
+ 0|1)
+ do_start
+ case "$?" in
+ 0) log_end_msg 0 ;;
+ 1) log_end_msg 1 ;; # Old process is still running
+ *) log_end_msg 1 ;; # Failed to start
+ esac
+ ;;
+ *)
+ # Failed to stop
+ log_end_msg 1
+ ;;
+ esac
+ ;;
+ *)
+ #echo "Usage: $SCRIPTNAME {start|stop|restart|reload|force-reload}" >&2
+ echo "Usage: $SCRIPTNAME {start|stop|restart|force-reload}" >&2
+ exit 3
+ ;;
+esac
+
+:
diff --git a/doc/draft-ietf-ion-r2r-nhrp-03.txt b/doc/draft-ietf-ion-r2r-nhrp-03.txt
new file mode 100644
index 0000000..8f80b36
--- /dev/null
+++ b/doc/draft-ietf-ion-r2r-nhrp-03.txt
@@ -0,0 +1,837 @@
+Internetworking Over NBMA Yakov Rekhter
+INTERNET-DRAFT Cisco Systems
+<draft-ietf-ion-r2r-nhrp-03.txt> Joel Halpern
+Expiration Date: November 1999 Institutional Venture Partners
+ May 1998
+
+
+ NHRP for Destinations off the NBMA Subnetwork
+
+ draft-ietf-ion-r2r-nhrp-03.txt
+
+
+1. Status of this Memo
+
+ This document is an Internet-Draft and is in full conformance with
+ all provisions of Section 10 of RFC2026. Internet-Drafts are working
+ documents of the Internet Engineering Task Force (IETF), its areas,
+ and its working groups. Note that other groups may also distribute
+ working documents as Internet-Drafts.
+
+ Internet-Drafts are draft documents valid for a maximum of six months
+ and may be updated, replaced, or obsoleted by other documents at any
+ time. It is inappropriate to use Internet-Drafts as reference
+ material or to cite them other than as ``work in progress.''
+
+ The list of current Internet-Drafts can be accessed at
+ http://www.ietf.org/ietf/1id-abstracts.txt
+
+ The list of Internet-Draft Shadow Directories can be accessed at
+ http://www.ietf.org/shadow.html.
+
+
+2. Abstract
+
+ The NBMA Next Hop Resolution Protocol (NHRP) [1] specifies a
+ mechanism that allows a source station (e.g., a host or a router) on
+ an NBMA subnetwork to find the NBMA subnetwork address of a
+ destination station when the destination station is connected to the
+ NBMA subnetwork. For the case where the destination station is off
+ the NBMA subnetwork the mechanism described in [1] allows a node to
+ determine the NBMA subnetwork address of an egress router from the
+ NBMA subnetwork that is ``nearest'' to the destination station. If
+ used to locate an egress router wherein the destination station is
+ directly behind the egress router, the currently documented NHRP
+ behaviors are sufficient. However, as documented elsewhere [2],
+ there are cases where if used between routers for generalized
+ transit, NHRP can produce loops.
+
+
+
+
+Joel Halpern [Page 1]
+
+Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998
+
+
+ This document describes extensions to the NBMA Next Hop Resolution
+ Protocol (NHRP) [1] that allow a node to acquire and maintain the
+ information about the egress router without constraining the
+ destination(s) to be directly connected to the egress router.
+
+
+3. CONVENTIONS
+
+ The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+ "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+ document are to be interpreted as described in RFC 2119 [3].
+
+
+4. NHRP Target Information
+
+ The mechanism described in this document allows a node to find an
+ egress router for either a single destination, or a set of
+ destinations (where the set is expressed as a single address prefix).
+ Since a single destination is just a special case of a set of
+ destinations, for the rest of the document we will always talk about
+ a set of destinations, and will refer to this set as an ``NHRP
+ target''.
+
+ The NHRP target is carried in the NHRP Request, Reply, and Purge
+ messages as an address prefix (using the Prefix Length field of the
+ NHRP Client Information Extension). In order to ensure correctness,
+ a target may be replaced by an identical target with a longer prefix
+ length. This replacement may be done at an intermediate or
+ responding NHS. Other than this increase of prefix length, no NHS
+ shall modify the NHRP target information in an NHRP message.
+
+ In general a router may maintain in its Forwarding Information Base
+ (FIB) routes whose Network Layer Reachability Information (NLRI) that
+ exhibits a subset relation. Such routes are called overlapping
+ routes. To expand upon this, entries in a FIB are often related, with
+ one entry being a prefix of another entry. The longer prefix
+ therefore covers a set of routes that are a subset of the shorter
+ prefix. To provide correct forwarding in the presence of such
+ overlapping (or nested) routes this document constrains an NHRP
+ target by requiring that all the destinations covered by the target
+ must form a subset of the NLRI of at least one route in the
+ Forwarding Information Base (FIB) of the router that either
+ originates, or propagates an NHRP Request. That is, there must be at
+ least one route in the FIB which is a prefix of (or equal to) the
+ target of the request. For the rest of the document we'll refer to
+ this as the ``first NHRP target constraint''. A station can
+ originate an NHRP Request, and a router can propagate an NHRP Request
+ only if the NHRP target of the Request does not violate the first
+
+
+
+Joel Halpern [Page 2]
+
+Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998
+
+
+ NHRP target constraint.
+
+ If a received NHRP request does not meet this ``first NHRP target
+ constraint'' when received, the receiving router has two choices. It
+ may answer the request, defining itself as the egress. This is
+ compatible with the base NHRP specification, and preserves the
+ ``first NHRP target constraint''. Alternatively, the router may
+ lengthen the received prefix until the first constraint is met. The
+ prefix is lengthened until the target falls within (or becomes equal
+ to) a FIB entry.
+
+ A route (from a local FIB) whose NLRI forms a minimal superset of all
+ the destinations covered by the NHRP target is called an ``NHRP
+ forwarding route''. This is the longest FIB entry that covers the
+ entire target. Observe that by definition the set of destinations
+ covered by an NHRP target always exhibits a subset relation to the
+ set of destinations covered by the NHRP forwarding route associated
+ with the target.
+
+ This document further constrains origination/propagation of NHRP
+ Requests by prohibiting the NHRP target (carried by a Request) to
+ form a superset of the destinations covered by any of the routes in
+ the local FIB. Remembering that there are nested FIB entries, this
+ constraint says that there must not be a FIB entry which is itself a
+ subset of the target of the NHRP request. If there were, there would
+ be some destinations within the request which would be forwarded
+ differently then others, preventing a single answer from being
+ correct. The constraint applies both to the station that originates
+ an NHRP Request and to the routers that propagate the Request. For
+ the rest of the document we'll refer to this constraint as the
+ ``second NHRP target constraint''. A station can originate an NHRP
+ Request, and a router can propagate an NHRP Request only if the NHRP
+ target of the Request does not violate the second NHRP target
+ constraint. The second NHRP target constraint guarantees that
+ forwarding to all the destinations covered by the NHRP target would
+ be accomplished via a single (common) route, and this route would be
+ the NHRP forwarding route for the target.
+
+ Again, if a received NHRP request does not meet the ``second NHRP
+ target constraint'', the router may either respond to the request,
+ providing its own NBMA address, or it may lengthen the prefix in the
+ request so as to meet the second constraint.
+
+
+
+
+
+
+
+
+
+Joel Halpern [Page 3]
+
+Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998
+
+
+5. NHRP Requester and Terminator Processing
+
+ The issue being addressed with the behaviors being mandated in this
+ document is to ensure that sufficient information is present and
+ processed to avoid NHRP shortcuts causing packet forwarding loops.
+
+ In order to do this, the requester and responder of the request must
+ undertake certain work, and any "border routers" in the forwarding
+ path must also perform certain additional work beyond checking the
+ target consistency with the FIB during request processing. This
+ border work suffices to detect any changes that would cause the path
+ selection to have failed the target constraints.
+
+ The work performed by the requester and responder consists of two
+ kinds of work. One set is requester only work, and is required in
+ order to determine where the protocol boundaries are. The other set
+ is the route monitoring work.
+
+
+5.1. NHRP IGP information
+
+ The primary cause of NHRP forwarding loops is the loss of information
+ at a routing protocol boundary. Normally, such boundaries are
+ detected by the router at the boundary. However, it is possible for
+ IGP boundaries to overlap. Therefore, NHRP requesting Routers MUST
+ include the NHRP IGP Information extension (as defined in section 9).
+ This extension indicates what IGP the originator of the request uses.
+ A requesting router must always include this extension, since it is
+ not possible to tell a priori whether the eventual resolution of the
+ request will be a host or a router.
+
+ Because the entire BGP domain is consider one routing domain, the
+ extension also contains an indication as to whether the originator
+ was a BGP speaker.
+
+
+5.2. NHRP Requestor and Responder monitoring
+
+ NHRP requestors and responders are required to monitor routing to
+ maintain correct shortcut information.
+
+ Once a router that originates an NHRP Request acquires the shortcut
+ next hop information, it is essential for the router to be able to
+ detect any changes that would affect the correctness of this
+ information. The following measures are intended to provide the
+ correctness.
+
+ Both ends of a shortcut have to monitor the status of the route that
+
+
+
+Joel Halpern [Page 4]
+
+Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998
+
+
+ was associated with the shortcut (the NHRP forwarding route). If the
+ status changes at the router that generated the NHRP Reply, this
+ router should send a Purge message, so that the NHRP Requester would
+ issue another NHRP. If the status changes at the Requester, the
+ Requester must issue another NHRP. This ensures that when both ends
+ of a shortcut are up, any changes in routing that impact forwarding
+ to any of the destinations in the NHRP target would result in a
+ revalidation (via NHRP) of the shortcut. Note that in addition to
+ sending purges/reverifies in response to routing changes which
+ directly effect the NHRP target, there is one other case.
+
+ A router MUST perform the appropriate purge/reverification process if
+ it receives routing updates that cause an issued NHRP request to
+ violate either of the target constraints defined earlier. This is
+ possible at an NHRP originator, and is more likely at border devices.
+
+ Once a shortcut is established, the Requester needs to have some
+ mechanism(s) to ensure that the other end of the shortcut is alive.
+ Among the possible mechanisms are: (a) indications from the Data Link
+ layer, (b) presence of traffic in the reverse direction that comes
+ with the Link Layer address of the other end, (c) keepalives sent by
+ the other end. This is intended to suppress black holes, when the
+ next hop router in the shortcut (the router that generated Reply)
+ goes down.
+
+ A requester should establish a shortcut only after the requester
+ determines that the information provided by NHRP is fairly stable.
+ This is necessary in order to avoid initiating shortcuts that are
+ based on transients in the routing information, and thus would need
+ to be revalidated almost immediately anyway. Thus, a router may wait
+ to use NHRP information if the underlying routing information has
+ recently changed. If the routing protocol being used has a notion of
+ stability, it should be used. Information in a transient or
+ holddown state SHOULD NOT be used, and requests which need to be
+ processed based on such information SHOULD be discarded.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Joel Halpern [Page 5]
+
+Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998
+
+
+6. Border Processing of NHRP Request
+
+ Processing of an NHRP Request is covered by two sets of rules: the
+ first set for IGP related processing, and the second set for BGP
+ related processing. The rules for IGP processing relate to
+ determining where the IGP borders are (in particular in the case of
+ overlapping IGPs), and then for what must happen at said borders.
+
+
+6.1. Border Determination
+
+ When a router receives a request, and determines that it is not the
+ NBMA exit router, it must perform a series of checks before
+ forwarding the request.
+
+ When a router receives such a Request, the router uses the NHRP
+ target and the NHRP IGP information to check whether (a) the first
+ and the second NHRP target constraints are satisfied, (b) the router
+ it is in the same routing domain as the originator of the Request,
+ and if yes, then whether (c) it is a border router for that domain.
+
+ When the NHRP target is checked against the forwarding database, a
+ determination must be made as to whether either of the target
+ constraints has been violated. If they are violated, then the router
+ MAY either
+
+ o Extend the prefix so as to meet the constraints.
+
+ o reply to the request indicating that it is the destination
+
+ o return an error indicating which constraint was violated.
+
+ If the NHRP forwarding route indicates a next hop that is not on the
+ same NBMA as the interface on which the Request was received, the
+ router sends back an NHRP Reply and terminates the query.
+
+ If a router receives a request without IGP information, then it was
+ originated within this domain by a host. If the router is an AS
+ Border Router (i.e. running BGP), and if the forwarding path exits
+ the AS, then it must behave as a border router for this request.
+ Otherwise, for requests without IGP information, the router is not a
+ border router.
+
+ For requests with IGP information, the router compares the forwarding
+ information against the IGP in the request. If the forwarding entry
+ indicates that the next hop is to exit the AS (an AS Border Router),
+ then check the BGP behaviors below.
+
+
+
+
+Joel Halpern [Page 6]
+
+Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998
+
+
+ When the IGP the next hop was learned from is the same IGP as
+ indicated in the request, then the NHS simply forwards the request.
+ [Of course, as per NHRP, it is free to respond indicating it is the
+ termination of the shortcut, for example when the Router/NHS is a
+ firewall.]
+
+ When the IGP the next hop was learned from is different from that
+ listed in the NHRP request, then this NHS is a border router for this
+ request.
+
+
+6.2. Border Behavior
+
+ In all cases, a border router has two choices. It MAY terminate and
+ respond to the request, responding with its IP and NBMA address.
+
+ Alternatively, it MAY perform border propagation.
+
+
+6.2.1. Reorigination
+
+ Upon receiving an NHRP request for which the NHS is a border router,
+ if it chooses to propagate the request, it MUST originate a new NHRP
+ request. This request will have a locally generated request
+ identifier, and the same NHRP target information as in the received
+ request. The NHRP IGP Information will be the correct indication for
+ the outgoing interface, with BGP indication if the received request
+ had the BGP indication, or if this transition crosses the AS border.
+ All other extensions are copied from the incoming request to the new
+ request.
+
+
+6.2.2. Response Propagation
+
+ When an NHRP response is received for a propagated request, the
+ information is copies from the received request, and passed on in a
+ new NHRP response, responding to the originally received request.
+ The prefix length in the received response is copied to the new
+ response. All extensions except the NHRP IGP Information are copied
+ to the new response.
+
+ In addition, the border router saves state about this information
+ exchange. The saved state includes the NHRP target from the
+ response, with the NHRP prefix length that resulted from the
+ exchange. It also includes the both the original requester, and the
+ identity of the responder. These are used to generate appropriate
+ reverification and purges whenever routing changes in a way that
+ could effect the resolution.
+
+
+
+Joel Halpern [Page 7]
+
+Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998
+
+
+6.3. Border Information
+
+ Sometimes the routing protocol will have provided the border router
+ with enough information to generate a response to an incoming NHRP
+ request. In particular, the border router may have information about
+ IP prefix to NBMA address bindings. If such information is present,
+ it may be used by a border router to produce an NHRP response without
+ actually propagating the request. In such a case, that information
+ must be monitored for stability to maintain the correctness of the
+ shortcut.
+
+
+7. BGP Operation
+
+ While the NHRP mechanism described above is mostly constrained to the
+ routers within a single routing domain, the same mechanisms can be
+ used for shortcuts that span multiple domains. In doing so, one
+ wants to produce as little additional overhead in the BGP space as
+ possible.
+
+ Therefore, we will treat the space over which BGP runs as a single
+ routing domain. Care must be taken to propagate information across
+ the individual AS without error, and to indicate that one has
+ properly entered the BGP space.
+
+ Additional complexity in handling multi-domain shortcuts arise if
+ routing information gets aggregated at the border routers (which
+ certainly happens in practice). Since BGP is the major protocol that
+ is used to exchange routing information across multiple routing
+ domains, we'll restrict our proposal to the case where the routing
+ information exchange across domains' boundaries is controlled by BGP.
+
+ If both the source and the destination domains are on a common NBMA
+ network, and the path between these two domains is also fully within
+ the same NBMA network, then we have only three routing domains to
+ deal with: source routing domain, BGP routing domain, and destination
+ routing domain. If the destination domain is not on the same NBMA as
+ the source domain, then we need to deal only with two domains - the
+ source and the BGP. Note that we treat all routers that participate
+ in a single (common) instance of BGP as a single BGP routing domain,
+ even if these routers participate in different intra-domain routing
+ protocols, or in different instances of the same intra-domain routing
+ protocol. There are three aspects to consider.
+
+
+ (a) how a border router in the domain that the originator of
+ the Request is in handles the Request (crossing IGP/BGP
+ boundary),
+
+
+
+Joel Halpern [Page 8]
+
+Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998
+
+
+ (b) how the Request is handled across the BGP domain, and
+ finally
+
+ (c) how a border router in the domain where the NHRP target is
+ in handles the Request (crossing BGP/IGP boundary).
+
+
+
+7.1. Handling NHRP Request at the source domain border router
+
+ When a border router receives an NHRP Request originated from within
+ its own (IGP) routing domain, the border router determines the NHRP
+ forwarding route for the NHRP target carried by the Request. If the
+ router already has the shortcut information for the forwarding route,
+ then the router uses this information to construct a Reply to the
+ source of the NHRP Request. Otherwise, the router originates its own
+ NHRP Request. The Request contains exactly the same NHRP target, as
+ was carried by the original Request; The NHRP IGP Information will
+ indicate that the request was generated by BGP, and will indicate the
+ IGP of the BGP AS being entered. While it is assumed that a BGP
+ transit AS will generally use only one IGP, the IGP information (and
+ border processing) is included to allow all cases. The newly
+ originated Request is sent to the next hop of the NHRP forwarding
+ route. Once the border router receives a Reply to its own Request,
+ the border router uses the next hop information from the Reply to
+ construct its own Reply to the source of the original NHRP Request.
+
+ If the border router later on receives a Purge message for the NHRP
+ forwarding route, the border router treats this event as if there was
+ a local change in the NHRP forwarding route (even if the there was no
+ changes in the route).
+
+ This is exactly the same behavior as all other border cases, and is
+ described here for completeness.
+
+
+7.2. Handling NHRP Request within the BGP domain
+
+ Routers within an AS will check the IGP, and perform appropriate
+ processing based on the IGP match. In general, this will result in
+ normal forwarding of the NHRP request.
+
+ Therefore, the significant cases occur at the BGP speaking routers.
+ There are two conditions to check for, early exit of the NBMA, and
+ reachability aggregation. Both of these conditions apply to
+ Autonomous systems that do not contain the NHRP target.
+
+
+
+
+
+Joel Halpern [Page 9]
+
+Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998
+
+
+7.2.1. NBMA exit
+
+ The BGP router in deciding where to send the NHRP request will
+ determine what the correct exit from the autonomous system is. It
+ will determine if that exit is within the NBMA. If it is not within
+ the NBMA, then the router MUST respond to the NHRP request,
+ indicating its own IP and NBMA addresses as the correct termination
+ of the shortcut. This is because the actual NBMA border device is
+ not in a position to monitor the topology properly.
+
+ BGP routers within an NBMA which are supporting R2R NHRP SHOULD be
+ configured to know where the NBMA border is. In the absence of such
+ configuration, requests from other router SHOULD be terminated at the
+ BGP router, since it can not tell what will be crossing the border.
+ A BGP router supporting R2R NHRP may be configured to assume that all
+ of its neighbors are within the NBMA, and therefore not perform such
+ early termination.
+
+
+7.2.2. Reachability Aggregation
+
+ BGP routers aggregate reachability. If the router aggregates
+ reachability that includes the NHRP target, only this router has the
+ visibility to some of the topology changes that can affect the
+ correctness of the route. Therefore, this router is a border router
+ for this NHRP request.
+
+ It must originate a new request, place the correct information in the
+ request, receive the response, and generate the correct response
+ towards the requester. This aggregating router must also monitor
+ routing in case of changes which affect the request.
+
+ If the router later on receives a Purge message for the NHRP
+ forwarding route, the router treats this event as if there was a
+ change in the NHRP forwarding route (even if the there was no changes
+ in the route).
+
+ It should be noted that this conditions applies if the router COULD
+ aggregate relevant routing information, even if it currently does
+ not.
+
+
+
+
+
+
+
+
+
+
+
+Joel Halpern [Page 10]
+
+Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998
+
+
+7.3. Handling NHRP Request at the destination domain border router
+
+ When a border router receives an NHRP Request from a BGP speaker, and
+ the border router determines that all the destinations covered by the
+ NHRP target of the Request are within the (IGP) domain of that border
+ router, the border router determines the NHRP forwarding route for
+ the NHRP target carried by the Request. The newly formed Request
+ contains exactly the same NHRP target as the received Request; the
+ NHRP IGP Information indicates the IGP this router is using to select
+ the route to the destination. The newly originated Request is sent
+ to the next hop of the NHRP forwarding route. Once the border router
+ receives a Reply to its own Request, the border router uses the next
+ hop information from the Reply to construct its own Reply to the
+ source of the original NHRP Request.
+
+ If the border router later on receives a Purge message for the NHRP
+ forwarding route, the border router treats this event as if there was
+ a change in the NHRP forwarding route (even if the there was no
+ changes in the route).
+
+
+8. More state, less messages
+
+ It should be possible to reduce the number of Purge messages and
+ subsequent NHRP messages (caused by the Purge messages) by
+ maintaining more state on the border routers at the source and
+ destination domains, and the BGP routers that perform aggregation
+ along the path from the source to the destination.
+
+ Specifically, on these routers it would be necessary to keep the
+ information about all the NHRP targets for which the routers maintain
+ the shortcut information. This way when such a router determines
+ that the NHRP forwarding route (for which the router maintains the
+ shortcut information) changes due to some local routing changes, the
+ router could check whether these local changes impact forwarding to
+ the destinations covered by the NHRP targets. For the targets that
+ are impacted by the changes the router would send Purge messages.
+
+ Note that this mechanism (maintaining NHRP targets) precludes the use
+ of Address Prefix Extension - the shortcut will be determined only
+ for the destinations covered by the NHRP target (so, if the target is
+ a single IP address, then the shortcut would be determined only for
+ this address).
+
+
+
+
+
+
+
+
+Joel Halpern [Page 11]
+
+Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998
+
+
+9. NHRP IGP Information Extension Format
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ 0-3 |C|u| Type = 9 | Length = 4 |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ 4-7 | flags |b| Reserved | IGP ID |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+ C "Compulsory." If clear, and the NHS does not recognize the
+ type code, the extension maybe safely be ignored. For
+ the IGP Information extension, this bit is clear.
+
+ u Unused and must be set to zero
+
+ Type The extension type code. For the IGP Information
+ extension, this is 9.
+
+ Length the length in octets of the value. For this extension,
+ this is 4.
+
+ flags Other than the "b" flag, these are reserved, SHALL be set
+ to 0 on transmission, and SHALL be ignored on reception.
+
+ b This flag indicates whether the request (or a predecessor
+ thereof) was originated by a BGP speaker. Set (to 1) to
+ indicate that the BGP speaker has operated on this.
+ Clear (to 0) if not.
+
+ IGP ID This field indicates the IGP used by the request
+ originator. The currently defined values are:
+
+ 1 = RIP
+ 2 = RIPv2
+ 3 = OSPF
+ 4 = Dual IS-IS
+
+
+
+
+
+
+
+
+
+
+
+
+
+Joel Halpern [Page 12]
+
+Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998
+
+
+10. IANA Considerations
+
+ This document defines an enumerated field for identifying IGPs in
+ router-to-router NHRP requests. Since there may be additional IGPs
+ in use, a procedure is needed for allocating additional values. The
+ IANA shall allocate values for this field as needed. Specifically,
+ when requested a value shall be allocated for an IGP for any layer 3
+ protocol for which there is a clear and stable definition of the
+ protocol. An RFC is the best example of such stability. Vendor
+ published specifications are also acceptable. The IANA should avoid
+ issuing two values for the same protocol. However, it is not
+ incumbent upon the IANA to determine if two similar protocols are
+ actually the same.
+
+
+11. Open issues
+
+ The mechanisms described in this document assume that certain routers
+ along a path taken by an NHRP Request would be required to maintain
+ state associated with the NHRP forwarding route associated with the
+ NHRP target carried by the Request. However, it is quite clear that
+ the router(s) may also lose this state. Further study of the impact
+ of losing the state is needed before advancing the use of NHRP for
+ establishing shortcuts among routers beyond Proposed Standard.
+
+ The mechanisms described in this document may result in a situation
+ where a router would be required to maintain NHRP peering with
+ potentially a fairly large number of other routers. Further study is
+ needed to understand the implications of this on the scalability of
+ the approach where NHRP is used to establish shortcuts among routers.
+
+ This document doesn't have a proof that the mechanisms described here
+ result in loop-free steady state forwarding when NHRP is used to
+ establish shortcuts among routers, however, a counterexample has not
+ yet been found. Further analysis should be done as part of advancing
+ beyond Proposed Standard.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Joel Halpern [Page 13]
+
+Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998
+
+
+12. Security Considerations
+
+ Security is provided in the base NHRP protocol, using hop-by-hop
+ authentication. There is no change to the fundamental security
+ capabilities provided therein when these extensions are used. It
+ should be noted that the assumption of transitive trust that is the
+ basis of such security may well be significantly weaker in an inter-
+ domain environment, and administrators of border routers should take
+ this into consideration. The hop-by-hop security model is used by
+ NHRP originally because there is no end-to-end security association
+ between the requesting and responding NHRP entities. In this
+ environment there is the additional facet that intermediate NHS are
+ modifying the prefix length field of the CIE, thus changing the end-
+ to-end information.
+
+
+13. References
+
+ [1] J. Luciani, D. Katz, D. Piscitello, B. Cole, N. Doraswamy.,
+ "NBMA Next Hop Resolution Protocol", RFC-2332, USC/Information
+ Sciences Institute, April 1998.
+
+ [2] D. Cansever., "NHRP Protocol Applicability Statement", RFC-2333,
+ USC/Information Sciences Institute, April 1998
+
+ [3] S. Bradner., "Key words for use in RFCs to Indicate Requirement
+ Levels", RFC-2119, USC/Information Sciences Institute, March 1997.
+
+
+14. Acknowledgements
+
+ The authors wish to Thank Curtis Villamizer for his contributions
+ emphasizing both the importance of the looping cases, and some
+ examples of when loops can occur.
+
+
+15. Author Information
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Joel Halpern [Page 14]
+
+Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998
+
+
+ Joel M. Halpern
+ Institutional Venture Partners
+ 3000 Sand Hill Road
+ Menlo Park, CA
+ Phone: (650) 926-5633
+ email: joel@mcquillan.com
+
+ Yakov Rekhter
+ cisco Systems, Inc.
+ 170 Tasman Dr.
+ San Jose, CA 95134
+ Phone: (914) 528-0090
+ email: yakov@cisco.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Joel Halpern [Page 15]
+ \ No newline at end of file
diff --git a/doc/rfc2332.txt b/doc/rfc2332.txt
new file mode 100644
index 0000000..eb79ee3
--- /dev/null
+++ b/doc/rfc2332.txt
@@ -0,0 +1,2915 @@
+
+
+
+
+
+
+Network Working Group J. Luciani
+Request for Comments: 2332 Bay Networks
+Category: Standards Track D. Katz
+ cisco Systems
+ D. Piscitello
+ Core Competence, Inc.
+ B. Cole
+ Juniper Networks
+ N. Doraswamy
+ Bay Networks
+ April 1998
+
+
+ NBMA Next Hop Resolution Protocol (NHRP)
+
+Status of this Memo
+
+ This document specifies an Internet standards track protocol for the
+ Internet community, and requests discussion and suggestions for
+ improvements. Please refer to the current edition of the "Internet
+ Official Protocol Standards" (STD 1) for the standardization state
+ and status of this protocol. Distribution of this memo is unlimited.
+
+Copyright Notice
+
+ Copyright (C) The Internet Society (1998). All Rights Reserved.
+
+Abstract
+
+ This document describes the NBMA Next Hop Resolution Protocol (NHRP).
+ NHRP can be used by a source station (host or router) connected to a
+ Non-Broadcast, Multi-Access (NBMA) subnetwork to determine the
+ internetworking layer address and NBMA subnetwork addresses of the
+ "NBMA next hop" towards a destination station. If the destination is
+ connected to the NBMA subnetwork, then the NBMA next hop is the
+ destination station itself. Otherwise, the NBMA next hop is the
+ egress router from the NBMA subnetwork that is "nearest" to the
+ destination station. NHRP is intended for use in a multiprotocol
+ internetworking layer environment over NBMA subnetworks.
+
+ Note that while this protocol was developed for use with NBMA
+ subnetworks, it is possible, if not likely, that it will be applied
+ to BMA subnetworks as well. However, this usage of NHRP is for
+ further study.
+
+ This document is intended to be a functional superset of the NBMA
+ Address Resolution Protocol (NARP) documented in [1].
+
+
+
+
+Luciani, et. al. Standards Track [Page 1]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ Operation of NHRP as a means of establishing a transit path across an
+ NBMA subnetwork between two routers will be addressed in a separate
+ document (see [13]).
+
+1. Introduction
+
+ The keywords MUST, MUST NOT, REQUIRED, SHALL, SHALL NOT, SHOULD,
+ SHOULD NOT, RECOMMENDED, MAY, and OPTIONAL, when they appear in this
+ document, are to be interpreted as described in [15].
+
+ The NBMA Next Hop Resolution Protocol (NHRP) allows a source station
+ (a host or router), wishing to communicate over a Non-Broadcast,
+ Multi-Access (NBMA) subnetwork, to determine the internetworking
+ layer addresses and NBMA addresses of suitable "NBMA next hops"
+ toward a destination station. A subnetwork can be non-broadcast
+ either because it technically doesn't support broadcasting (e.g., an
+ X.25 subnetwork) or because broadcasting is not feasible for one
+ reason or another (e.g., an SMDS multicast group or an extended
+ Ethernet would be too large). If the destination is connected to the
+ NBMA subnetwork, then the NBMA next hop is the destination station
+ itself. Otherwise, the NBMA next hop is the egress router from the
+ NBMA subnetwork that is "nearest" to the destination station.
+
+ One way to model an NBMA network is by using the notion of logically
+ independent IP subnets (LISs). LISs, as defined in [3] and [4], have
+ the following properties:
+
+ 1) All members of a LIS have the same IP network/subnet number
+ and address mask.
+
+ 2) All members of a LIS are directly connected to the same
+ NBMA subnetwork.
+
+ 3) All hosts and routers outside of the LIS are accessed via
+ a router.
+
+ 4) All members of a LIS access each other directly (without
+ routers).
+
+ Address resolution as described in [3] and [4] only resolves the next
+ hop address if the destination station is a member of the same LIS as
+ the source station; otherwise, the source station must forward
+ packets to a router that is a member of multiple LIS's. In multi-LIS
+
+
+
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 2]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ configurations, hop-by-hop address resolution may not be sufficient
+ to resolve the "NBMA next hop" toward the destination station, and IP
+ packets may have multiple IP hops through the NBMA subnetwork.
+
+ Another way to model NBMA is by using the notion of Local Address
+ Groups (LAGs) [10]. The essential difference between the LIS and the
+ LAG models is that while with the LIS model the outcome of the
+ "local/remote" forwarding decision is driven purely by addressing
+ information, with the LAG model the outcome of this decision is
+ decoupled from the addressing information and is coupled with the
+ Quality of Service and/or traffic characteristics. With the LAG
+ model any two entities on a common NBMA network could establish a
+ direct communication with each other, irrespective of the entities'
+ addresses.
+
+ Support for the LAG model assumes the existence of a mechanism that
+ allows any entity (i.e., host or router) connected to an NBMA network
+ to resolve an internetworking layer address to an NBMA address for
+ any other entity connected to the same NBMA network. This resolution
+ would take place regardless of the address assignments to these
+ entities. Within the parameters described in this document, NHRP
+ describes such a mechanism. For example, when the internetworking
+ layer address is of type IP, once the NBMA next hop has been
+ resolved, the source may either start sending IP packets to the
+ destination (in a connectionless NBMA subnetwork such as SMDS) or may
+ first establish a connection to the destination with the desired
+ bandwidth (in a connection-oriented NBMA subnetwork such as ATM).
+
+ Use of NHRP may be sufficient for hosts doing address resolution when
+ those hosts are directly connected to an NBMA subnetwork, allowing
+ for straightforward implementations in NBMA stations. NHRP also has
+ the capability of determining the egress point from an NBMA
+ subnetwork when the destination is not directly connected to the NBMA
+ subnetwork and the identity of the egress router is not learned by
+ other methods (such as routing protocols). Optional extensions to
+ NHRP provide additional robustness and diagnosability.
+
+ Address resolution techniques such as those described in [3] and [4]
+ may be in use when NHRP is deployed. ARP servers and services over
+ NBMA subnetworks may be required to support hosts that are not
+ capable of dealing with any model for communication other than the
+ LIS model, and deployed hosts may not implement NHRP but may continue
+ to support ARP variants such as those described in [3] and [4]. NHRP
+ is intended to reduce or eliminate the extra router hops required by
+ the LIS model, and can be deployed in a non-interfering manner with
+ existing ARP services [14].
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 3]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ The operation of NHRP to establish transit paths across NBMA
+ subnetworks between two routers requires additional mechanisms to
+ avoid stable routing loops, and will be described in a separate
+ document (see [13]).
+
+2. Overview
+
+2.1 Terminology
+
+ The term "network" is highly overloaded, and is especially confusing
+ in the context of NHRP. We use the following terms:
+
+ Internetwork layer--the media-independent layer (IP in the case of
+ TCP/IP networks).
+
+ Subnetwork layer--the media-dependent layer underlying the
+ internetwork layer, including the NBMA technology (ATM, X.25, SMDS,
+ etc.)
+
+ The term "server", unless explicitly stated to the contrary, refers
+ to a Next Hop Server (NHS). An NHS is an entity performing the
+ Next Hop Resolution Protocol service within the NBMA cloud. An NHS
+ is always tightly coupled with a routing entity (router, route
+ server or edge device) although the converse is not yet guaranteed
+ until ubiquitous deployment of this functionality occurs. Note
+ that the presence of intermediate routers that are not coupled with
+ an NHS entity may preclude the use of NHRP when source and
+ destination stations on different sides of such routers and thus
+ such routers may partition NHRP reachability within an NBMA
+ network.
+
+ The term "client", unless explicitly stated to the contrary, refers
+ to a Next Hop Resolution Protocol client (NHC). An NHC is an
+ entity which initiates NHRP requests of various types in order to
+ obtain access to the NHRP service.
+
+ The term "station" generally refers to a host or router which
+ contains an NHRP entity. Occasionally, the term station will
+ describe a "user" of the NHRP client or service functionality; the
+ difference in usage is largely semantic.
+
+2.2 Protocol Overview
+
+ In this section, we briefly describe how a source S (which
+ potentially can be either a router or a host) uses NHRP to determine
+ the "NBMA next hop" to destination D.
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 4]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ For administrative and policy reasons, a physical NBMA subnetwork may
+ be partitioned into several, disjoint "Logical NBMA subnetworks". A
+ Logical NBMA subnetwork is defined as a collection of hosts and
+ routers that share unfiltered subnetwork connectivity over an NBMA
+ subnetwork. "Unfiltered subnetwork connectivity" refers to the
+ absence of closed user groups, address screening or similar features
+ that may be used to prevent direct communication between stations
+ connected to the same NBMA subnetwork. (Hereafter, unless otherwise
+ specified, we use the term "NBMA subnetwork" to mean *logical* NBMA
+ subnetwork.)
+
+ Placed within the NBMA subnetwork are one or more entities that
+ implement the NHRP protocol. Such stations which are capable of
+ answering NHRP Resolution Requests are known as "Next Hop Servers"
+ (NHSs). Each NHS serves a set of destination hosts, which may or may
+ not be directly connected to the NBMA subnetwork. NHSs cooperatively
+ resolve the NBMA next hop within their logical NBMA subnetwork. In
+ addition to NHRP, NHSs may support "classical" ARP service; however,
+ this will be the subject of a separate document [14].
+
+ An NHS maintains a cache which contains protocol layer address to
+ NBMA subnetwork layer address resolution information. This cache can
+ be constructed from information obtained from NHRP Register packets
+ (see Section 5.2.3 and 5.2.4), from NHRP Resolution Request/Reply
+ packets, or through mechanisms outside the scope of this document
+ (examples of such mechanisms might include ARP[3] and pre-configured
+ tables). Section 6.2 further describes cache management issues.
+
+ For a station within a given LIS to avoid providing NHS
+ functionality, there must be one or more NHSs within the NBMA
+ subnetwork which are providing authoritative address resolution
+ information on its behalf. Such an NHS is said to be "serving" the
+ station. A station on a LIS that lacks NHS functionality and is a
+ client of the NHRP service is known as NHRP Client or just NHCs. If
+ a serving NHS is to be able to supply the address resolution
+ information for an NHC then NHSs must exist at each hop along all
+ routed paths between the NHC making the resolution request and the
+ destination NHC. The last NHRP entity along the routed path is the
+ serving NHS; that is, NHRP Resolution Requests are not forwarded to
+ destination NHCs but rather are processed by the serving NHS.
+
+ An NHC also maintains a cache of protocol address to NBMA address
+ resolution information. This cache is populated through information
+ obtained from NHRP Resolution Reply packets, from manual
+ configuration, or through mechanisms outside the scope of this
+ document.
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 5]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ The protocol proceeds as follows. An event occurs triggering station
+ S to want to resolve the NBMA address of a path to D. This is most
+ likely to be when a data packet addressed to station D is to be
+ emitted from station S (either because station S is a host, or
+ station S is a transit router), but the address resolution could also
+ be triggered by other means (a routing protocol update packet, for
+ example). Station S first determines the next hop to station D
+ through normal routing processes (for a host, the next hop may simply
+ be the default router; for routers, this is the "next hop" to the
+ destination internetwork layer address). If the destination's
+ address resolution information is already available in S's cache then
+ that information is used to forward the packet. Otherwise, if the
+ next hop is reachable through one of its NBMA interfaces, S
+ constructs an NHRP Resolution Request packet (see Section 5.2.1)
+ containing station D's internetwork layer address as the (target)
+ destination address, S's own internetwork layer address as the source
+ address (Next Hop Resolution Request initiator), and station S's NBMA
+ addressing information. Station S may also indicate that it prefers
+ an authoritative NHRP Resolution Reply (i.e., station S only wishes
+ to receive an NHRP Resolution Reply from an NHS serving the
+ destination NHC). Station S emits the NHRP Resolution Request packet
+ towards the destination.
+
+ If the NHRP Resolution Request is triggered by a data packet then S
+ may, while awaiting an NHRP Resolution Reply, choose to dispose of
+ the data packet in one of the following ways:
+
+ (a) Drop the packet
+ (b) Retain the packet until the NHRP Resolution Reply arrives
+ and a more optimal path is available
+ (c) Forward the packet along the routed path toward D
+
+ The choice of which of the above to perform is a local policy matter,
+ though option (c) is the recommended default, since it may allow data
+ to flow to the destination while the NBMA address is being resolved.
+ Note that an NHRP Resolution Request for a given destination MUST NOT
+ be triggered on every packet.
+
+ When the NHS receives an NHRP Resolution Request, a check is made to
+ see if it serves station D. If the NHS does not serve D, the NHS
+ forwards the NHRP Resolution Request to another NHS. Mechanisms for
+ determining how to forward the NHRP Resolution Request are discussed
+ in Section 3.
+
+ If this NHS serves D, the NHS resolves station D's NBMA address
+ information, and generates a positive NHRP Resolution Reply on D's
+ behalf. NHRP Resolution Replies in this scenario are always marked
+ as "authoritative". The NHRP Resolution Reply packet contains the
+
+
+
+Luciani, et. al. Standards Track [Page 6]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ address resolution information for station D which is to be sent back
+ to S. Note that if station D is not on the NBMA subnetwork, the next
+ hop internetwork layer address will be that of the egress router
+ through which packets for station D are forwarded.
+
+ A transit NHS receiving an NHRP Resolution Reply may cache the
+ address resolution information contained therein. To a subsequent
+ NHRP Resolution Request, this NHS may respond with the cached, "non-
+ authoritative" address resolution information if the NHS is permitted
+ to do so (see Sections 5.2.2 and 6.2 for more information on non-
+ authoritative versus authoritative NHRP Resolution Replies). Non-
+ authoritative NHRP Resolution Replies are distinguished from
+ authoritative NHRP Resolution Replies so that if a communication
+ attempt based on non-authoritative information fails, a source
+ station can choose to send an authoritative NHRP Resolution Request.
+ NHSs MUST NOT respond to authoritative NHRP Resolution Requests with
+ cached information.
+
+ If the determination is made that no NHS in the NBMA subnetwork can
+ reply to the NHRP Resolution Request for D then a negative NHRP
+ Resolution Reply (NAK) is returned. This occurs when (a) no next-hop
+ resolution information is available for station D from any NHS, or
+ (b) an NHS is unable to forward the NHRP Resolution Request (e.g.,
+ connectivity is lost).
+
+ NHRP Registration Requests, NHRP Purge Requests, NHRP Purge Replies,
+ and NHRP Error Indications follow a routed path in the same fashion
+ that NHRP Resolution Requests and NHRP Resolution Replies do.
+ Specifically, "requests" and "indications" follow the routed path
+ from Source Protocol Address (which is the address of the station
+ initiating the communication) to the Destination Protocol Address.
+ "Replies", on the other hand, follow the routed path from the
+ Destination Protocol Address back to the Source Protocol Address with
+ the following exceptions: in the case of a NHRP Registration Reply
+ and in the case of an NHC initiated NHRP Purge Request, the packet is
+ always returned via a direct VC (see Sections 5.2.4 and 5.2.5); if
+ one does not exists then one MUST be created.
+
+ NHRP Requests and NHRP Replies do NOT cross the borders of a NBMA
+ subnetwork however further study is being done in this area (see
+ Section 7). Thus, the internetwork layer data traffic out of and
+ into an NBMA subnetwork always traverses an internetwork layer router
+ at its border.
+
+ NHRP optionally provides a mechanism to send a NHRP Resolution Reply
+ which contains aggregated address resolution information. For
+ example, suppose that router X is the next hop from station S to
+ station D and that X is an egress router for all stations sharing an
+
+
+
+Luciani, et. al. Standards Track [Page 7]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ internetwork layer address prefix with station D. When an NHRP
+ Resolution Reply is generated in response to a NHRP Resolution
+ Request, the responder may augment the internetwork layer address of
+ station D with a prefix length (see Section 5.2.0.1). A subsequent
+ (non-authoritative) NHRP Resolution Request for some destination that
+ shares an internetwork layer address prefix (for the number of bits
+ specified in the prefix length) with D may be satisfied with this
+ cached information. See section 6.2 regarding caching issues.
+
+ To dynamically detect subnetwork-layer filtering in NBMA subnetworks
+ (e.g., X.25 closed user group facility, or SMDS address screens), to
+ trace the routed path that an NHRP packet takes, or to provide loop
+ detection and diagnostic capabilities, a "Route Record" may be
+ included in NHRP packets (see Sections 5.3.2 and 5.3.3). The Route
+ Record extensions are the NHRP Forward Transit NHS Record Extension
+ and the NHRP Reverse Transit NHS Record Extension. They contain the
+ internetwork (and subnetwork layer) addresses of all intermediate
+ NHSs between source and destination and between destination and
+ source respectively. When a source station is unable to communicate
+ with the responder (e.g., an attempt to open an SVC fails), it may
+ attempt to do so successively with other subnetwork layer addresses
+ in the NHRP Forward Transit NHS Record Extension until it succeeds
+ (if authentication policy permits such action). This approach can
+ find a suitable egress point in the presence of subnetwork-layer
+ filtering (which may be source/destination sensitive, for instance,
+ without necessarily creating separate logical NBMA subnetworks) or
+ subnetwork-layer congestion (especially in connection-oriented
+ media).
+
+3. Deployment
+
+ NHRP Resolution Requests traverse one or more hops within an NBMA
+ subnetwork before reaching the station that is expected to generate a
+ response. Each station, including the source station, chooses a
+ neighboring NHS to which it will forward the NHRP Resolution Request.
+ The NHS selection procedure typically involves applying a destination
+ protocol layer address to the protocol layer routing table which
+ causes a routing decision to be returned. This routing decision is
+ then used to forward the NHRP Resolution Request to the downstream
+ NHS. The destination protocol layer address previously mentioned is
+ carried within the NHRP Resolution Request packet. Note that even
+ though a protocol layer address was used to acquire a routing
+ decision, NHRP packets are not encapsulated within a protocol layer
+ header but rather are carried at the NBMA layer using the
+ encapsulation described in Section 5.
+
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 8]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ Each NHS/router examines the NHRP Resolution Request packet on its
+ way toward the destination. Each NHS which the NHRP packet traverses
+ on the way to the packet's destination might modify the packet (e.g.,
+ updating the Forward Record extension). Ignoring error situations,
+ the NHRP Resolution Request eventually arrives at a station that is
+ to generate an NHRP Resolution Reply. This responding station
+ "serves" the destination. The responding station generates an NHRP
+ Resolution Reply using the source protocol address from within the
+ NHRP packet to determine where the NHRP Resolution Reply should be
+ sent.
+
+ Rather than use routing to determine the next hop for an NHRP packet,
+ an NHS may use other applicable means (such as static configuration
+ information ) in order to determine to which neighboring NHSs to
+ forward the NHRP Resolution Request packet as long as such other
+ means would not cause the NHRP packet to arrive at an NHS which is
+ not along the routed path. The use of static configuration
+ information for this purpose is beyond the scope of this document.
+
+ The NHS serving a particular destination must lie along the routed
+ path to that destination. In practice, this means that all egress
+ routers must double as NHSs serving the destinations beyond them, and
+ that hosts on the NBMA subnetwork are served by routers that double
+ as NHSs. Also, this implies that forwarding of NHRP packets within
+ an NBMA subnetwork requires a contiguous deployment of NHRP capable
+ routers. It is important that, in a given LIS/LAG which is using
+ NHRP, all NHSs within the LIS/LAG have at least some portion of their
+ resolution databases synchronized so that a packet arriving at one
+ router/NHS in a given LIS/LAG will be forwarded in the same fashion
+ as a packet arriving at a different router/NHS for the given LIS/LAG.
+ One method, among others, is to use the Server Cache Synchronization
+ Protocol (SCSP) [12]. It is RECOMMENDED that SCSP be the method used
+ when a LIS/LAG contains two or more router/NHSs.
+
+ During migration to NHRP, it cannot be expected that all routers
+ within the NBMA subnetwork are NHRP capable. Thus, NHRP traffic
+ which would otherwise need to be forwarded through such routers can
+ be expected to be dropped due to the NHRP packet not being
+ recognized. In this case, NHRP will be unable to establish any
+ transit paths whose discovery requires the traversal of the non-NHRP
+ speaking routers. If the client has tried and failed to acquire a
+ cut through path then the client should use the network layer routed
+ path as a default.
+
+ If an NBMA technology offers a group, an anycast, or a multicast
+ addressing feature then the NHC may be configured with such an
+ address (appropriate to the routing realm it participates in) which
+ would be assigned to all NHS serving that routing realm. This
+
+
+
+Luciani, et. al. Standards Track [Page 9]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ address can then be used for establishing an initial connection to an
+ NHS to transmit a registration request. This address may not be used
+ for sending NHRP requests. The resulting VC may be used for NHRP
+ requests if and only if the registration response is received over
+ that VC, thereby indicating that one happens to have anycast
+ connected to an NHS serving the LIS/LAG. In the case of non-
+ connection oriented networks, or of multicast (rather than anycast)
+ addresses, the addres MUST NOT be used for sending NHRP resolution
+ requests.
+
+ When an NHS "serves" an NHC, the NHS MUST send NHRP messages destined
+ for the NHC directly to the NHC. That is, the NHRP message MUST NOT
+ transit through any NHS which is not serving the NHC when the NHRP
+ message is currently at an NHS which does serve the NHC (this, of
+ course, assumes the NHRP message is destined for the NHC). Further,
+ an NHS which serves an NHC SHOULD have a direct NBMA level connection
+ to that NHC (see Section 5.2.3 and 5.2.4 for examples).
+
+ With the exception of NHRP Registration Requests (see Section 5.2.3
+ and 5.2.4 for details of the NHRP Registration Request case), an NHC
+ MUST send NHRP messages over a direct NBMA level connection between
+ the serving NHS and the served NHC.
+
+ It may not be desirable to maintain semi-permanent NBMA level
+ connectivity between the NHC and the NHS. In this case, when NBMA
+ level connectivity is initially setup between the NHS and the NHC (as
+ described in Section 5.2.4), the NBMA address of the NHS should be
+ obtained through the NBMA level signaling technology. This address
+ should be stored for future use in setting up subsequent NBMA level
+ connections. A somewhat more information rich technique to obtain
+ the address information (and more) of the serving NHS would be for
+ the NHC to include the Responder Address extension (see Section
+ 5.3.1) in the NHRP Registration Request and to store the information
+ returned to the NHC in the Responder Address extension which is
+ subsequently included in the NHRP Registration Reply. Note also
+ that, in practice, a client's default router should also be its NHS;
+ thus a client may be able to know the NBMA address of its NHS from
+ the configuration which was already required for the client to be
+ able to communicate. Further, as mentioned in Section 4, NHCs may be
+ configured with the addressing information of one or more NHSs.
+
+4. Configuration
+
+ Next Hop Clients
+
+ An NHC connected to an NBMA subnetwork MAY be configured with the
+ Protocol address(es) and NBMA address(es) of its NHS(s). The
+ NHS(s) will likely also represent the NHC's default or peer
+
+
+
+Luciani, et. al. Standards Track [Page 10]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ routers, so their NBMA addresses may be obtained from the NHC's
+ existing configuration. If the NHC is attached to several
+ subnetworks (including logical NBMA subnetworks), the NHC should
+ also be configured to receive routing information from its NHS(s)
+ and peer routers so that it can determine which internetwork layer
+ networks are reachable through which subnetworks.
+
+ Next Hop Servers
+
+ An NHS is configured with knowledge of its own internetwork layer
+ and NBMA addresses. An NHS MAY also be configured with a set of
+ internetwork layer address prefixes that correspond to the
+ internetwork layer addresses of the stations it serves. The NBMA
+ addresses of the stations served by the NHS may be learned via NHRP
+ Registration packets.
+
+ If a served NHC is attached to several subnetworks, the
+ router/route-server coresident with the serving NHS may also need
+ to be configured to advertise routing information to such NHCs.
+
+ If an NHS acts as an egress router for stations connected to other
+ subnetworks than the NBMA subnetwork, the NHS must, in addition to
+ the above, be configured to exchange routing information between
+ the NBMA subnetwork and these other subnetworks.
+
+ In all cases, routing information is exchanged using conventional
+ intra-domain and/or inter-domain routing protocols.
+
+5. NHRP Packet Formats
+
+ This section describes the format of NHRP packets. In the following,
+ unless otherwise stated explicitly, the unqualified term "request"
+ refers generically to any of the NHRP packet types which are
+ "requests". Further, unless otherwise stated explicitly, the
+ unqualified term "reply" refers generically to any of the NHRP packet
+ types which are "replies".
+
+ An NHRP packet consists of a Fixed Part, a Mandatory Part, and an
+ Extensions Part. The Fixed Part is common to all NHRP packet types.
+ The Mandatory Part MUST be present, but varies depending on packet
+ type. The Extensions Part also varies depending on packet type, and
+ need not be present.
+
+ The length of the Fixed Part is fixed at 20 octets. The length of
+ the Mandatory Part is determined by the contents of the extensions
+ offset field (ar$extoff). If ar$extoff=0x0 then the mandatory part
+ length is equal to total packet length (ar$pktsz) minus 20 otherwise
+ the mandatory part length is equal to ar$extoff minus 20. The length
+
+
+
+Luciani, et. al. Standards Track [Page 11]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ of the Extensions Part is implied by ar$pktsz minus ar$extoff. NHSs
+ may increase the size of an NHRP packet as a result of extension
+ processing, but not beyond the offered maximum packet size of the
+ NBMA network.
+
+ NHRP packets are actually members of a wider class of address mapping
+ and management protocols being developed by the IETF. A specific
+ encapsulation, based on the native formats used on the particular
+ NBMA network over which NHRP is carried, indicates the generic IETF
+ mapping and management protocol. For example, SMDS networks always
+ use LLC/SNAP encapsulation at the NBMA layer [4], and an NHRP packet
+ is preceded by the following LLC/SNAP encapsulation:
+
+ [0xAA-AA-03] [0x00-00-5E] [0x00-03]
+
+ The first three octets are LLC, indicating that SNAP follows. The
+ SNAP OUI portion is the IANA's OUI, and the SNAP PID portion
+ identifies the mapping and management protocol. A field in the Fixed
+ Header following the encapsulation indicates that it is NHRP.
+
+ ATM uses either LLC/SNAP encapsulation of each packet (including
+ NHRP), or uses no encapsulation on VCs dedicated to a single protocol
+ (see [7]). Frame Relay and X.25 both use NLPID/SNAP encapsulation or
+ identification of NHRP, using a NLPID of 0x0080 and the same SNAP
+ contents as above (see [8], [9]).
+
+ Fields marked "unused" MUST be set to zero on transmission, and
+ ignored on receipt.
+
+ Most packet types (ar$op.type) have both internetwork layer
+ protocol-independent fields and protocol-specific fields. The
+ protocol type/snap fields (ar$pro.type/snap) qualify the format of
+ the protocol-specific fields.
+
+5.1 NHRP Fixed Header
+
+ The Fixed Part of the NHRP packet contains those elements of the NHRP
+ packet which are always present and do not vary in size with the type
+ of packet.
+
+
+
+
+
+
+
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 12]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | ar$afn | ar$pro.type |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | ar$pro.snap |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | ar$pro.snap | ar$hopcnt | ar$pktsz |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | ar$chksum | ar$extoff |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | ar$op.version | ar$op.type | ar$shtl | ar$sstl |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+ ar$afn
+ Defines the type of "link layer" addresses being carried. This
+ number is taken from the 'address family number' list specified in
+ [6]. This field has implications to the coding of ar$shtl and
+ ar$sstl as described below.
+
+ ar$pro.type
+ field is a 16 bit unsigned integer representing the following
+ number space:
+
+ 0x0000 to 0x00FF Protocols defined by the equivalent NLPIDs.
+ 0x0100 to 0x03FF Reserved for future use by the IETF.
+ 0x0400 to 0x04FF Allocated for use by the ATM Forum.
+ 0x0500 to 0x05FF Experimental/Local use.
+ 0x0600 to 0xFFFF Protocols defined by the equivalent Ethertypes.
+
+ (based on the observations that valid Ethertypes are never smaller
+ than 0x600, and NLPIDs never larger than 0xFF.)
+
+ ar$pro.snap
+ When ar$pro.type has a value of 0x0080, a SNAP encoded extension is
+ being used to encode the protocol type. This snap extension is
+ placed in the ar$pro.snap field. This is termed the 'long form'
+ protocol ID. If ar$pro != 0x0080 then the ar$pro.snap field MUST be
+ zero on transmit and ignored on receive. The ar$pro.type field
+ itself identifies the protocol being referred to. This is termed
+ the 'short form' protocol ID.
+
+ In all cases, where a protocol has an assigned number in the
+ ar$pro.type space (excluding 0x0080) the short form MUST be used
+ when transmitting NHRP messages; i.e., if Ethertype or NLPID
+ codings exist then they are used on transmit rather than the
+
+
+
+Luciani, et. al. Standards Track [Page 13]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ ethertype. If both Ethertype and NLPID codings exist then when
+ transmitting NHRP messages, the Ethertype coding MUST be used (this
+ is consistent with RFC 1483 coding). So, for example, the
+ following codings exist for IP:
+
+ SNAP: ar$pro.type = 0x00-80, ar$pro.snap = 0x00-00-00-08-00
+ NLPID: ar$pro.type = 0x00-CC, ar$pro.snap = 0x00-00-00-00-00
+ Ethertype: ar$pro.type = 0x08-00, ar$pro.snap = 0x00-00-00-00-00
+
+ and thus, since the Ethertype coding exists, it is used in
+ preference.
+
+ ar$hopcnt
+ The Hop count indicates the maximum number of NHSs that an NHRP
+ packet is allowed to traverse before being discarded. This field
+ is used in a similar fashion to the way that a TTL is used in an IP
+ packet and should be set accordingly. Each NHS decrements the TTL
+ as the NHRP packet transits the NHS on the way to the next hop
+ along the routed path to the destination. If an NHS receives an
+ NHRP packet which it would normally forward to a next hop and that
+ packet contains an ar$hopcnt set to zero then the NHS sends an
+ error indication message back to the source protocol address
+ stating that the hop count has been exceeded (see Section 5.2.7)
+ and the NHS drops the packet in error; however, an error
+ indication is never sent as a result of receiving an error
+ indication. When a responding NHS replies to an NHRP request, that
+ NHS places a value in ar$hopcnt as if it were sending a request of
+ its own.
+
+ ar$pktsz
+ The total length of the NHRP packet, in octets (excluding link
+ layer encapsulation).
+
+ ar$chksum
+ The standard IP checksum over the entire NHRP packet starting at
+ the fixed header. If the packet is an odd number of bytes in
+ length then this calculation is performed as if a byte set to 0x00
+ is appended to the end of the packet.
+
+ ar$extoff
+ This field identifies the existence and location of NHRP
+ extensions. If this field is 0 then no extensions exist otherwise
+ this field represents the offset from the beginning of the NHRP
+ packet (i.e., starting from the ar$afn field) of the first
+ extension.
+
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 14]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ ar$op.version
+ This field indicates what version of generic address mapping and
+ management protocol is represented by this message.
+
+ 0 MARS protocol [11].
+ 1 NHRP as defined in this document.
+ 0x02 - 0xEF Reserved for future use by the IETF.
+ 0xF0 - 0xFE Allocated for use by the ATM Forum.
+ 0xFF Experimental/Local use.
+
+ ar$op.type
+ When ar$op.version == 1, this is the NHRP packet type: NHRP
+ Resolution Request(1), NHRP Resolution Reply(2), NHRP Registration
+ Request(3), NHRP Registration Reply(4), NHRP Purge Request(5), NHRP
+ Purge Reply(6), or NHRP Error Indication(7). Use of NHRP packet
+ Types in the range 128 to 255 are reserved for research or use in
+ other protocol development and will be administered by IANA as
+ described in Section 9.
+
+ ar$shtl
+ Type & length of source NBMA address interpreted in the context of
+ the 'address family number'[6] indicated by ar$afn. See below for
+ more details.
+
+ ar$sstl
+ Type & length of source NBMA subaddress interpreted in the context
+ of the 'address family number'[6] indicated by ar$afn. When an
+ NBMA technology has no concept of a subaddress, the subaddress
+ length is always coded ar$sstl = 0 and no storage is allocated for
+ the subaddress in the appropriate mandatory part. See below for
+ more details.
+
+ Subnetwork layer address type/length fields (e.g., ar$shtl, Cli Addr
+ T/L) and subnetwork layer subaddresses type/length fields (e.g.,
+ ar$sstl, Cli SAddr T/L) are coded as follows:
+
+ 7 6 5 4 3 2 1 0
+ +-+-+-+-+-+-+-+-+
+ |0|x| length |
+ +-+-+-+-+-+-+-+-+
+
+ The most significant bit is reserved and MUST be set to zero. The
+ second most significant bit (x) is a flag indicating whether the
+ address being referred to is in:
+
+ - NSAP format (x = 0).
+ - Native E.164 format (x = 1).
+
+
+
+
+Luciani, et. al. Standards Track [Page 15]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ For NBMA technologies that use neither NSAP nor E.164 format
+ addresses, x = 0 SHALL be used to indicate the native form for the
+ particular NBMA technology.
+
+ If the NBMA network is ATM and a subaddress (e.g., Source NBMA
+ SubAddress, Client NBMA SubAddress) is to be included in any part of
+ the NHRP packet then ar$afn MUST be set to 0x000F; further, the
+ subnetwork layer address type/length fields (e.g., ar$shtl, Cli Addr
+ T/L) and subnetwork layer subaddress type/length fields (e.g.,
+ ar$sstl, Cli SAddr T/L) MUST be coded as in [11]. If the NBMA
+ network is ATM and no subaddress field is to be included in any part
+ of the NHRP packet then ar$afn MAY be set to 0x0003 (NSAP) or 0x0008
+ (E.164) accordingly.
+
+ The bottom 6 bits is an unsigned integer value indicating the length
+ of the associated NBMA address in octets. If this value is zero the
+ flag x is ignored.
+
+5.2.0 Mandatory Part
+
+ The Mandatory Part of the NHRP packet contains the operation specific
+ information (e.g., NHRP Resolution Request/Reply, etc.) and variable
+ length data which is pertinent to the packet type.
+
+5.2.0.1 Mandatory Part Format
+
+ Sections 5.2.1 through 5.2.6 have a very similar mandatory part.
+ This mandatory part includes a common header and zero or more Client
+ Information Entries (CIEs). Section 5.2.7 has a different format
+ which is specified in that section.
+
+ The common header looks like the following:
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Src Proto Len | Dst Proto Len | Flags |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Request ID |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Source NBMA Address (variable length) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Source NBMA Subaddress (variable length) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Source Protocol Address (variable length) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Destination Protocol Address (variable length) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+Luciani, et. al. Standards Track [Page 16]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ And the CIEs have the following format:
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Code | Prefix Length | unused |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Maximum Transmission Unit | Holding Time |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Cli Addr T/L | Cli SAddr T/L | Cli Proto Len | Preference |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Client NBMA Address (variable length) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Client NBMA Subaddress (variable length) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Client Protocol Address (variable length) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ .....................
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Code | Prefix Length | unused |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Maximum Transmission Unit | Holding Time |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Cli Addr T/L | Cli SAddr T/L | Cli Proto Len | Preference |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Client NBMA Address (variable length) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Client NBMA Subaddress (variable length) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Client Protocol Address (variable length) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ The meanings of the fields are as follows:
+
+ Src Proto Len
+ This field holds the length in octets of the Source Protocol
+ Address.
+
+ Dst Proto Len
+ This field holds the length in octets of the Destination Protocol
+ Address.
+
+ Flags
+ These flags are specific to the given message type and they are
+ explained in each section.
+
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 17]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ Request ID
+ A value which, when coupled with the address of the source,
+ provides a unique identifier for the information contained in a
+ "request" packet. This value is copied directly from an "request"
+ packet into the associated "reply". When a sender of a "request"
+ receives "reply", it will compare the Request ID and source address
+ information in the received "reply" against that found in its
+ outstanding "request" list. When a match is found then the
+ "request" is considered to be acknowledged.
+
+ The value is taken from a 32 bit counter that is incremented each
+ time a new "request" is transmitted. The same value MUST be used
+ when resending a "request", i.e., when a "reply" has not been
+ received for a "request" and a retry is sent after an appropriate
+ interval.
+
+ It is RECOMMENDED that the initial value for this number be 0. A
+ node MAY reuse a sequence number if and only if the reuse of the
+ sequence number is not precluded by use of a particular method of
+ synchronization (e.g., as described in Appendix A).
+
+ The NBMA address/subaddress form specified below allows combined
+ E.164/NSAPA form of NBMA addressing. For NBMA technologies without a
+ subaddress concept, the subaddress field is always ZERO length and
+ ar$sstl = 0.
+
+ Source NBMA Address
+ The Source NBMA address field is the address of the source station
+ which is sending the "request". If the field's length as specified
+ in ar$shtl is 0 then no storage is allocated for this address at
+ all.
+
+ Source NBMA SubAddress
+ The Source NBMA subaddress field is the address of the source
+ station which is sending the "request". If the field's length as
+ specified in ar$sstl is 0 then no storage is allocated for this
+ address at all.
+
+ For those NBMA technologies which have a notion of "Calling Party
+ Addresses", the Source NBMA Addresses above are the addresses used
+ when signaling for an SVC.
+
+ "Requests" and "indications" follow the routed path from Source
+ Protocol Address to the Destination Protocol Address. "Replies", on
+ the other hand, follow the routed path from the Destination Protocol
+ Address back to the Source Protocol Address with the following
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 18]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ exceptions: in the case of a NHRP Registration Reply and in the case
+ of an NHC initiated NHRP Purge Request, the packet is always returned
+ via a direct VC (see Sections 5.2.4 and 5.2.5).
+
+ Source Protocol Address
+ This is the protocol address of the station which is sending the
+ "request". This is also the protocol address of the station toward
+ which a "reply" packet is sent.
+
+ Destination Protocol Address
+ This is the protocol address of the station toward which a
+ "request" packet is sent.
+
+ Code
+ This field is message specific. See the relevant message sections
+ below. In general, this field is a NAK code; i.e., when the field
+ is 0 in a reply then the packet is acknowledging a request and if
+ it contains any other value the packet contains a negative
+ acknowledgment.
+
+ Prefix Length
+ This field is message specific. See the relevant message sections
+ below. In general, however, this fields is used to indicate that
+ the information carried in an NHRP message pertains to an
+ equivalence class of internetwork layer addresses rather than just
+ a single internetwork layer address specified. All internetwork
+ layer addresses that match the first "Prefix Length" bit positions
+ for the specific internetwork layer address are included in the
+ equivalence class. If this field is set to 0x00 then this field
+ MUST be ignored and no equivalence information is assumed (note
+ that 0x00 is thus equivalent to 0xFF).
+
+ Maximum Transmission Unit
+ This field gives the maximum transmission unit for the relevant
+ client station. If this value is 0 then either the default MTU is
+ used or the MTU negotiated via signaling is used if such
+ negotiation is possible for the given NBMA.
+
+ Holding Time
+ The Holding Time field specifies the number of seconds for which
+ the Next Hop NBMA information specified in the CIE is considered to
+ be valid. Cached information SHALL be discarded when the holding
+ time expires. This field must be set to 0 on a NAK.
+
+
+
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 19]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ Cli Addr T/L
+ Type & length of next hop NBMA address specified in the CIE. This
+ field is interpreted in the context of the 'address family
+ number'[6] indicated by ar$afn (e.g., ar$afn=0x0003 for ATM).
+
+ Cli SAddr T/L
+ Type & length of next hop NBMA subaddress specified in the CIE.
+ This field is interpreted in the context of the 'address family
+ number'[6] indicated by ar$afn (e.g., ar$afn=0x0015 for ATM makes
+ the address an E.164 and the subaddress an ATM Forum NSAP address).
+ When an NBMA technology has no concept of a subaddress, the
+ subaddress is always null with a length of 0. When the address
+ length is specified as 0 no storage is allocated for the address.
+
+ Cli Proto Len
+ This field holds the length in octets of the Client Protocol
+ Address specified in the CIE.
+
+ Preference
+ This field specifies the preference for use of the specific CIE
+ relative to other CIEs. Higher values indicate higher preference.
+ Action taken when multiple CIEs have equal or highest preference
+ value is a local matter.
+
+ Client NBMA Address
+ This is the client's NBMA address.
+
+ Client NBMA SubAddress
+ This is the client's NBMA subaddress.
+
+ Client Protocol Address
+ This is the client's internetworking layer address specified.
+
+ Note that an NHS may cache source address binding information from an
+ NHRP Resolution Request if and only if the conditions described in
+ Section 6.2 are met for the NHS. In all other cases, source address
+ binding information appearing in an NHRP message MUST NOT be cached.
+
+5.2.1 NHRP Resolution Request
+
+ The NHRP Resolution Request packet has a Type code of 1. Its
+ mandatory part is coded as described in Section 5.2.0.1 and the
+ message specific meanings of the fields are as follows:
+
+ Flags - The flags field is coded as follows:
+
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 20]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ 0 1
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |Q|A|D|U|S| unused |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Q
+ Set if the station sending the NHRP Resolution Request is a
+ router; clear if the it is a host.
+
+ A
+ This bit is set in a NHRP Resolution Request if only
+ authoritative next hop information is desired and is clear
+ otherwise. See the NHRP Resolution Reply section below for
+ further details on the "A" bit and its usage.
+
+ D
+ Unused (clear on transmit)
+
+ U
+ This is the Uniqueness bit. This bit aids in duplicate address
+ detection. When this bit is set in an NHRP Resolution Request
+ and one or more entries exist in the NHS cache which meet the
+ requirements of the NHRP Resolution Request then only the CIE in
+ the NHS's cache with this bit set will be returned. Note that
+ even if this bit was set at registration time, there may still be
+ multiple CIEs that might fulfill the NHRP Resolution Request
+ because an entire subnet can be registered through use of the
+ Prefix Length in the CIE and the address of interest might be
+ within such a subnet. If the "uniqueness" bit is set and the
+ responding NHS has one or more cache entries which match the
+ request but no such cache entry has the "uniqueness" bit set,
+ then the NHRP Resolution Reply returns with a NAK code of "13 -
+ Binding Exists But Is Not Unique" and no CIE is included. If a
+ client wishes to receive non- unique Next Hop Entries, then
+ the client must have the "uniqueness" bit set to zero in its NHRP
+ Resolution Request. Note that when this bit is set in an NHRP
+ Registration Request, only a single CIE may be specified in the
+ NHRP Registration Request and that CIE must have the Prefix
+ Length field set to 0xFF.
+
+ S
+ Set if the binding between the Source Protocol Address and the
+ Source NBMA information in the NHRP Resolution Request is
+ guaranteed to be stable and accurate (e.g., these addresses are
+ those of an ingress router which is connected to an ethernet stub
+ network or the NHC is an NBMA attached host).
+
+
+
+
+Luciani, et. al. Standards Track [Page 21]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ Zero or one CIEs (see Section 5.2.0.1) may be specified in an NHRP
+ Resolution Request. If one is specified then that entry carries the
+ pertinent information for the client sourcing the NHRP Resolution
+ Request. Usage of the CIE in the NHRP Resolution Request is
+ described below:
+
+ Prefix Length
+ If a CIE is specified in the NHRP Resolution Request then the
+ Prefix Length field may be used to qualify the widest acceptable
+ prefix which may be used to satisfy the NHRP Resolution Request.
+ In the case of NHRP Resolution Request/Reply, the Prefix Length
+ specifies the equivalence class of addresses which match the
+ first "Prefix Length" bit positions of the Destination Protocol
+ Address. If the "U" bit is set in the common header then this
+ field MUST be set to 0xFF.
+
+ Maximum Transmission Unit
+ This field gives the maximum transmission unit for the source
+ station. A possible use of this field in the NHRP Resolution
+ Request packet is for the NHRP Resolution Requester to ask for a
+ target MTU.
+
+ Holding Time
+ The Holding Time specified in the one CIE permitted to be
+ included in an NHRP Resolution Request is the amount of time
+ which the source address binding information in the NHRP
+ Resolution Request is permitted to cached by transit and
+ responding NHSs. Note that this field may only have a non-zero
+ value if the S bit is set.
+
+ All other fields in the CIE MUST be ignored and SHOULD be set to 0.
+
+ The Destination Protocol Address in the common header of the
+ Mandatory Part of this message contains the protocol address of the
+ station for which resolution is desired. An NHC MUST send the NHRP
+ Resolution Request directly to one of its serving NHSs (see Section 3
+ for more information).
+
+5.2.2 NHRP Resolution Reply
+
+ The NHRP Resolution Reply packet has a Type code of 2. CIEs
+ correspond to Next Hop Entries in an NHS's cache which match the
+ criteria in the NHRP Resolution Request. Its mandatory part is coded
+ as described in Section 5.2.0.1. The message specific meanings of
+ the fields are as follows:
+
+ Flags - The flags field is coded as follows:
+
+
+
+
+Luciani, et. al. Standards Track [Page 22]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ 0 1
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |Q|A|D|U|S| unused |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Q
+ Copied from the NHRP Resolution Request. Set if the NHRP
+ Resolution Requester is a router; clear if it is a host.
+
+ A
+ Set if the next hop CIE in the NHRP Resolution Reply is
+ authoritative; clear if the NHRP Resolution Reply is non-
+ authoritative.
+
+ When an NHS receives a NHRP Resolution Request for authoritative
+ information for which it is the authoritative source, it MUST
+ respond with a NHRP Resolution Reply containing all and only
+ those next hop CIEs which are contained in the NHS's cache which
+ both match the criteria of the NHRP Resolution Request and are
+ authoritative cache entries. An NHS is an authoritative source
+ for a NHRP Resolution Request if the information in the NHS's
+ cache matches the NHRP Resolution Request criteria and that
+ information was obtained through a NHRP Registration Request or
+ through synchronization with an NHS which obtained this
+ information through a NHRP Registration Request. An
+ authoritative cache entry is one which is obtained through a NHRP
+ Registration Request or through synchronization with an NHS which
+ obtained this information through a NHRP Registration Request.
+
+ An NHS obtains non-authoritative CIEs through promiscuous
+ listening to NHRP packets other than NHRP Registrations which are
+ directed at it. A NHRP Resolution Request which indicates a
+ request for non-authoritative information should cause a NHRP
+ Resolution Reply which contains all entries in the replying NHS's
+ cache (i.e., both authoritative and non-authoritative) which
+ match the criteria specified in the request.
+
+ D
+ Set if the association between destination and the associate next
+ hop information included in all CIEs of the NHRP Resolution Reply
+ is guaranteed to be stable for the lifetime of the information
+ (the holding time). This is the case if the Next Hop protocol
+ address in a CIE identifies the destination (though it may be
+ different in value than the Destination address if the
+ destination system has multiple addresses) or if the destination
+ is not connected directly to the NBMA subnetwork but the egress
+ router to that destination is guaranteed to be stable (such as
+
+
+
+Luciani, et. al. Standards Track [Page 23]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ when the destination is immediately adjacent to the egress router
+ through a non-NBMA interface).
+
+ U
+ This is the Uniqueness bit. See the NHRP Resolution Request
+ section above for details. When this bit is set, only one CIE is
+ included since only one unique binding should exist in an NHS's
+ cache.
+
+ S
+ Copied from NHRP Resolution Request message.
+
+ One or more CIEs are specified in the NHRP Resolution Reply. Each CIE
+ contains NHRP next hop information which the responding NHS has
+ cached and which matches the parameters specified in the NHRP
+ Resolution Request. If no match is found by the NHS issuing the NHRP
+ Resolution Reply then a single CIE is enclosed with the a CIE Code
+ set appropriately (see below) and all other fields MUST be ignored
+ and SHOULD be set to 0. In order to facilitate the use of NHRP by
+ minimal client implementations, the first CIE MUST contain the next
+ hop with the highest preference value so that such an implementation
+ need parse only a single CIE.
+
+ Code
+ If this field is set to zero then this packet contains a
+ positively acknowledged NHRP Resolution Reply. If this field
+ contains any other value then this message contains an NHRP
+ Resolution Reply NAK which means that an appropriate
+ internetworking layer to NBMA address binding was not available
+ in the responding NHS's cache. If NHRP Resolution Reply contains
+ a Client Information Entry with a NAK Code other than 0 then it
+ MUST NOT contain any other CIE. Currently defined NAK Codes are
+ as follows:
+
+ 4 - Administratively Prohibited
+
+ An NHS may refuse an NHRP Resolution Request attempt for
+ administrative reasons (due to policy constraints or routing
+ state). If so, the NHS MUST send an NHRP Resolution Reply
+ which contains a NAK code of 4.
+
+ 5 - Insufficient Resources
+
+ If an NHS cannot serve a station due to a lack of resources
+ (e.g., can't store sufficient information to send a purge if
+ routing changes), the NHS MUST reply with a NAKed NHRP
+ Resolution Reply which contains a NAK code of 5.
+
+
+
+
+Luciani, et. al. Standards Track [Page 24]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ 12 - No Internetworking Layer Address to NBMA Address Binding
+ Exists
+
+ This code states that there were absolutely no internetworking
+ layer address to NBMA address bindings found in the responding
+ NHS's cache.
+
+ 13 - Binding Exists But Is Not Unique
+
+ This code states that there were one or more internetworking
+ layer address to NBMA address bindings found in the responding
+ NHS's cache, however none of them had the uniqueness bit set.
+
+ Prefix Length
+ In the case of NHRP Resolution Reply, the Prefix Length specifies
+ the equivalence class of addresses which match the first "Prefix
+ Length" bit positions of the Destination Protocol Address.
+
+ Holding Time
+ The Holding Time specified in a CIE of an NHRP Resolution Reply
+ is the amount of time remaining before the expiration of the
+ client information which is cached at the replying NHS. It is
+ not the value which was registered by the client.
+
+ The remainder of the fields for the CIE for each next hop are
+ filled out as they were defined when the next hop was registered
+ with the responding NHS (or one of the responding NHS's
+ synchronized servers) via the NHRP Registration Request.
+
+ Load-splitting may be performed when more than one Client Information
+ Entry is returned to a requester when equal preference values are
+ specified. Also, the alternative addresses may be used in case of
+ connectivity failure in the NBMA subnetwork (such as a failed call
+ attempt in connection-oriented NBMA subnetworks).
+
+ Any extensions present in the NHRP Resolution Request packet MUST be
+ present in the NHRP Resolution Reply even if the extension is non-
+ Compulsory.
+
+ If an unsolicited NHRP Resolution Reply packet is received, an Error
+ Indication of type Invalid NHRP Resolution Reply Received SHOULD be
+ sent in response.
+
+ When an NHS that serves a given NHC receives an NHRP Resolution Reply
+ destined for that NHC then the NHS must MUST send the NHRP Resolution
+ Reply directly to the NHC (see Section 3).
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 25]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+5.2.3 NHRP Registration Request
+
+ The NHRP Registration Request is sent from a station to an NHS to
+ notify the NHS of the station's NBMA information. It has a Type code
+ of 3. Each CIE corresponds to Next Hop information which is to be
+ cached at an NHS. The mandatory part of an NHRP Registration Request
+ is coded as described in Section 5.2.0.1. The message specific
+ meanings of the fields are as follows:
+
+ Flags - The flags field is coded as follows:
+
+ 0 1
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |U| unused |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ U
+ This is the Uniqueness bit. When set in an NHRP Registration
+ Request, this bit indicates that the registration of the protocol
+ address is unique within the confines of the set of synchronized
+ NHSs. This "uniqueness" qualifier MUST be stored in the NHS/NHC
+ cache. Any attempt to register a binding between the protocol
+ address and an NBMA address when this bit is set MUST be rejected
+ with a Code of "14 - Unique Internetworking Layer Address Already
+ Registered" if the replying NHS already has a cache entry for the
+ protocol address and the cache entry has the "uniqueness" bit
+ set. A registration of a CIE's information is rejected when the
+ CIE is returned with the Code field set to anything other than
+ 0x00. See the description of the uniqueness bit in NHRP
+ Resolution Request section above for further details. When this
+ bit is set only, only one CIE MAY be included in the NHRP
+ Registration Request.
+
+ Request ID
+ The request ID has the same meaning as described in Section
+ 5.2.0.1. However, the request ID for NHRP Registrations which is
+ maintained at each client MUST be kept in non-volatile memory so
+ that when a client crashes and reregisters there will be no
+ inconsistency in the NHS's database. In order to reduce the
+ overhead associated with updating non-volatile memory, the actual
+ updating need not be done with every increment of the Request ID
+ but could be done, for example, every 50 or 100 increments. In
+ this scenario, when a client crashes and reregisters it knows to
+ add 100 to the value of the Request ID in the non-volatile memory
+ before using the Request ID for subsequent registrations.
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 26]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ One or more CIEs are specified in the NHRP Registration Request.
+ Each CIE contains next hop information which a client is attempting
+ to register with its servers. Generally, all fields in CIEs enclosed
+ in NHRP Registration Requests are coded as described in Section
+ 5.2.0.1. However, if a station is only registering itself with the
+ NHRP Registration Request then it MAY code the Cli Addr T/L, Cli
+ SAddr T/L, and Cli Proto Len as zero which signifies that the client
+ address information is to be taken from the source information in the
+ common header (see Section 5.2.0.1). Below, further clarification is
+ given for some fields in a CIE in the context of a NHRP Registration
+ Request.
+
+ Code
+ This field is set to 0x00 in NHRP Registration Requests.
+
+ Prefix Length
+
+ This field may be used in a NHRP Registration Request to register
+ equivalence information for the Client Protocol Address specified
+ in the CIE of an NHRP Registration Request In the case of NHRP
+ Registration Request, the Prefix Length specifies the equivalence
+ class of addresses which match the first "Prefix Length" bit
+ positions of the Client Protocol Address. If the "U" bit is set
+ in the common header then this field MUST be set to 0xFF.
+
+ The NHRP Registration Request is used to register an NHC's NHRP
+ information with its NHSs. If an NHC is configured with the protocol
+ address of a serving NHS then the NHC may place the NHS's protocol
+ address in the Destination Protocol Address field of the NHRP
+ Registration Request common header otherwise the NHC must place its
+ own protocol address in the Destination Protocol Address field.
+
+ When an NHS receives an NHRP Registration Request which has the
+ Destination Protocol Address field set to an address which belongs to
+ a LIS/LAG for which the NHS is serving then if the Destination
+ Protocol Address field is equal to the Source Protocol Address field
+ (which would happen if the NHC put its protocol address in the
+ Destination Protocol Address) or the Destination Protocol Address
+ field is equal to the protocol address of the NHS then the NHS
+ processes the NHRP Registration Request after doing appropriate error
+ checking (including any applicable policy checking).
+
+ When an NHS receives an NHRP Registration Request which has the
+ Destination Protocol Address field set to an address which does not
+ belong to a LIS/LAG for which the NHS is serving then the NHS
+ forwards the packet down the routed path toward the appropriate
+ LIS/LAG.
+
+
+
+
+Luciani, et. al. Standards Track [Page 27]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ When an NHS receives an NHRP Registration Request which has the
+ Destination Protocol Address field set to an address which belongs to
+ a LIS/LAG for which the NHS is serving then if the Destination
+ Protocol Address field does not equal the Source Protocol Address
+ field and the Destination Protocol Address field does not equal the
+ protocol address of the NHS then the NHS forwards the message to the
+ appropriate NHS within the LIS/LAG as specified by Destination
+ Protocol Address field.
+
+ It is possible that a misconfigured station will attempt to register
+ with the wrong NHS (i.e., one that cannot serve it due to policy
+ constraints or routing state). If this is the case, the NHS MUST
+ reply with a NAK-ed Registration Reply of type Can't Serve This
+ Address.
+
+ If an NHS cannot serve a station due to a lack of resources, the NHS
+ MUST reply with a NAK-ed Registration Reply of type Registration
+ Overflow.
+
+ In order to keep the registration entry from being discarded, the
+ station MUST re-send the NHRP Registration Request packet often
+ enough to refresh the registration, even in the face of occasional
+ packet loss. It is recommended that the NHRP Registration Request
+ packet be sent at an interval equal to one-third of the Holding Time
+ specified therein.
+
+5.2.4 NHRP Registration Reply
+
+ The NHRP Registration Reply is sent by an NHS to a client in response
+ to that client's NHRP Registration Request. If the Code field of a
+ CIE in the NHRP Registration Reply has anything other than zero in it
+ then the NHRP Registration Reply is a NAK otherwise the reply is an
+ ACK. The NHRP Registration Reply has a Type code of 4.
+
+ An NHRP Registration Reply is formed from an NHRP Registration
+ Request by changing the type code to 4, updating the CIE Code field,
+ and filling in the appropriate extensions if they exist. The message
+ specific meanings of the fields are as follows:
+
+ Attempts to register the information in the CIEs of an NHRP
+ Registration Request may fail for various reasons. If this is the
+ case then each failed attempt to register the information in a CIE of
+ an NHRP Registration Request is logged in the associated NHRP
+ Registration Reply by setting the CIE Code field to the appropriate
+ error code as shown below:
+
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 28]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ CIE Code
+
+ 0 - Successful Registration
+
+ The information in the CIE was successfully registered with the
+ NHS.
+
+ 4 - Administratively Prohibited
+
+ An NHS may refuse an NHRP Registration Request attempt for
+ administrative reasons (due to policy constraints or routing
+ state). If so, the NHS MUST send an NHRP Registration Reply
+ which contains a NAK code of 4.
+
+ 5 - Insufficient Resources
+
+ If an NHS cannot serve a station due to a lack of resources,
+ the NHS MUST reply with a NAKed NHRP Registration Reply which
+ contains a NAK code of 5.
+
+ 14 - Unique Internetworking Layer Address Already Registered
+ If a client tries to register a protocol address to NBMA
+ address binding with the uniqueness bit on and the protocol
+ address already exists in the NHS's cache then if that cache
+ entry also has the uniqueness bit on then this NAK Code is
+ returned in the CIE in the NHRP Registration Reply.
+
+ Due to the possible existence of asymmetric routing, an NHRP
+ Registration Reply may not be able to merely follow the routed path
+ back to the source protocol address specified in the common header of
+ the NHRP Registration Reply. As a result, there MUST exist a direct
+ NBMA level connection between the NHC and its NHS on which to send
+ the NHRP Registration Reply before NHRP Registration Reply may be
+ returned to the NHC. If such a connection does not exist then the
+ NHS must setup such a connection to the NHC by using the source NBMA
+ information supplied in the common header of the NHRP Registration
+ Request.
+
+5.2.5 NHRP Purge Request
+
+ The NHRP Purge Request packet is sent in order to invalidate cached
+ information in a station. The NHRP Purge Request packet has a type
+ code of 5. The mandatory part of an NHRP Purge Request is coded as
+ described in Section 5.2.0.1. The message specific meanings of the
+ fields are as follows:
+
+ Flags - The flags field is coded as follows:
+
+
+
+
+Luciani, et. al. Standards Track [Page 29]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ 0 1
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |N| unused |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ N
+ When set, this bit tells the receiver of the NHRP Purge Request
+ that the requester does not expect to receive an NHRP Purge
+ Reply. If an unsolicited NHRP Purge Reply is received by a
+ station where that station is identified in the Source Protocol
+ Address of the packet then that packet must be ignored.
+
+ One or more CIEs are specified in the NHRP Purge Request. Each CIE
+ contains next hop information which is to be purged from an NHS/NHC
+ cache. Generally, all fields in CIEs enclosed in NHRP Purge Requests
+ are coded as described in Section 5.2.0.1. Below, further
+ clarification is given for some fields in a CIE in the context of a
+ NHRP Purge Request.
+
+ Code
+ This field is set to 0x00 in NHRP Purge Requests.
+
+ Prefix Length
+
+ In the case of NHRP Purge Requests, the Prefix Length specifies
+ the equivalence class of addresses which match the first "Prefix
+ Length" bit positions of the Client Protocol Address specified in
+ the CIE. All next hop information which contains a protocol
+ address which matches an element of this equivalence class is to
+ be purged from the receivers cache.
+
+ The Maximum Transmission Unit and Preference fields of the CIE are
+ coded as zero. The Holding Time should be coded as zero but there
+ may be some utility in supplying a "short" holding time to be
+ applied to the matching next hop information before that
+ information would be purged; this usage is for further study. The
+ Client Protocol Address field and the Cli Proto Len field MUST be
+ filled in. The Client Protocol Address is filled in with the
+ protocol address to be purged from the receiving station's cache
+ while the Cli Proto Len is set the length of the purged client's
+ protocol address. All remaining fields in the CIE MAY be set to
+ zero although the client NBMA information (and associated length
+ fields) MAY be specified to narrow the scope of the NHRP Purge
+ Request if requester desires. However, the receiver of an NHRP
+ Purge Request may choose to ignore the Client NBMA information if
+ it is supplied.
+
+
+
+
+Luciani, et. al. Standards Track [Page 30]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ An NHRP Purge Request packet is sent from an NHS to a station to
+ cause it to delete previously cached information. This is done when
+ the information may be no longer valid (typically when the NHS has
+ previously provided next hop information for a station that is not
+ directly connected to the NBMA subnetwork, and the egress point to
+ that station may have changed).
+
+ An NHRP Purge Request packet may also be sent from an NHC to an NHS
+ with which the NHC had previously registered. This allows for an NHC
+ to invalidate its registration with NHRP before it would otherwise
+ expire via the holding timer. If an NHC does not have knowledge of a
+ protocol address of a serving NHS then the NHC must place its own
+ protocol address in the Destination Protocol Address field and
+ forward the packet along the routed path. Otherwise, the NHC must
+ place the protocol address of a serving NHS in this field.
+
+ Serving NHSs may need to send one or more new NHRP Purge Requests as
+ a result of receiving a purge from one of their served NHCs since the
+ NHS may have previously responded to NHRP Resolution Requests for
+ that NHC's NBMA information. These purges are "new" in that they are
+ sourced by the NHS and not the NHC; that is, for each NHC that
+ previously sent a NHRP Resolution Request for the purged NHC NBMA
+ information, an NHRP Purge Request is sent which contains the Source
+ Protocol/NBMA Addresses of the NHS and the Destination Protocol
+ Address of the NHC which previously sent an NHRP Resolution Request
+ prior to the purge.
+
+ The station sending the NHRP Purge Request MAY periodically
+ retransmit the NHRP Purge Request until either NHRP Purge Request is
+ acknowledged or until the holding time of the information being
+ purged has expired. Retransmission strategies for NHRP Purge Requests
+ are a local matter.
+
+ When a station receives an NHRP Purge Request, it MUST discard any
+ previously cached information that matches the information in the
+ CIEs.
+
+ An NHRP Purge Reply MUST be returned for the NHRP Purge Request even
+ if the station does not have a matching cache entry assuming that the
+ "N" bit is off in the NHRP Purge Request.
+
+ If the station wishes to reestablish communication with the
+ destination shortly after receiving an NHRP Purge Request, it should
+ make an authoritative NHRP Resolution Request in order to avoid any
+ stale cache entries that might be present in intermediate NHSs (See
+ section 6.2.2.). It is recommended that authoritative NHRP
+ Resolution Requests be made for the duration of the holding time of
+ the old information.
+
+
+
+Luciani, et. al. Standards Track [Page 31]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+5.2.6 NHRP Purge Reply
+
+ The NHRP Purge Reply packet is sent in order to assure the sender of
+ an NHRP Purge Request that all cached information of the specified
+ type has been purged from the station sending the reply. The NHRP
+ Purge Reply has a type code of 6.
+
+ An NHRP Purge Reply is formed from an NHRP Purge Request by merely
+ changing the type code in the request to 6. The packet is then
+ returned to the requester after filling in the appropriate extensions
+ if they exist.
+
+5.2.7 NHRP Error Indication
+
+ The NHRP Error Indication is used to convey error indications to the
+ sender of an NHRP packet. It has a type code of 7. The Mandatory
+ Part has the following format:
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Src Proto Len | Dst Proto Len | unused |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Error Code | Error Offset |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Source NBMA Address (variable length) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Source NBMA Subaddress (variable length) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Source Protocol Address (variable length) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Destination Protocol Address (variable length) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Contents of NHRP Packet in error (variable length) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Src Proto Len
+ This field holds the length in octets of the Source Protocol
+ Address.
+
+ Dst Proto Len
+ This field holds the length in octets of the Destination Protocol
+ Address.
+
+
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 32]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ Error Code
+ An error code indicating the type of error detected, chosen from
+ the following list:
+
+ 1 - Unrecognized Extension
+
+ When the Compulsory bit of an extension in NHRP packet is set,
+ the NHRP packet cannot be processed unless the extension has
+ been processed. The responder MUST return an NHRP Error
+ Indication of type Unrecognized Extension if it is incapable of
+ processing the extension. However, if a transit NHS (one which
+ is not going to generate a reply) detects an unrecognized
+ extension, it SHALL ignore the extension.
+
+ 3 - NHRP Loop Detected
+
+ A Loop Detected error is generated when it is determined that
+ an NHRP packet is being forwarded in a loop.
+
+ 6 - Protocol Address Unreachable
+
+ This error occurs when a packet it moving along the routed path
+ and it reaches a point such that the protocol address of
+ interest is not reachable.
+
+ 7 - Protocol Error
+
+ A generic packet processing error has occurred (e.g., invalid
+ version number, invalid protocol type, failed checksum, etc.)
+
+ 8 - NHRP SDU Size Exceeded
+
+ If the SDU size of the NHRP packet exceeds the MTU size of the
+ NBMA network then this error is returned.
+
+ 9 - Invalid Extension
+
+ If an NHS finds an extension in a packet which is inappropriate
+ for the packet type, an error is sent back to the sender with
+ Invalid Extension as the code.
+
+ 10 - Invalid NHRP Resolution Reply Received
+
+ If a client receives a NHRP Resolution Reply for a Next Hop
+ Resolution Request which it believes it did not make then an
+ error packet is sent to the station making the reply with an
+ error code of Invalid Reply Received.
+
+
+
+
+Luciani, et. al. Standards Track [Page 33]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ 11 - Authentication Failure
+
+ If a received packet fails an authentication test then this
+ error is returned.
+
+ 15 - Hop Count Exceeded
+
+ The hop count which was specified in the Fixed Header of an
+ NHRP message has been exceeded.
+
+ Error Offset
+ The offset in octets into the original NHRP packet in which an
+ error was detected. This offset is calculated starting from the
+ NHRP Fixed Header.
+
+ Source NBMA Address
+ The Source NBMA address field is the address of the station which
+ observed the error.
+
+ Source NBMA SubAddress
+ The Source NBMA subaddress field is the address of the station
+ which observed the error. If the field's length as specified in
+ ar$sstl is 0 then no storage is allocated for this address at all.
+
+ Source Protocol Address
+ This is the protocol address of the station which issued the Error
+ packet.
+
+ Destination Protocol Address
+ This is the protocol address of the station which sent the packet
+ which was found to be in error.
+
+ An NHRP Error Indication packet SHALL NEVER be generated in response
+ to another NHRP Error Indication packet. When an NHRP Error
+ Indication packet is generated, the offending NHRP packet SHALL be
+ discarded. In no case should more than one NHRP Error Indication
+ packet be generated for a single NHRP packet.
+
+ If an NHS sees its own Protocol and NBMA Addresses in the Source NBMA
+ and Source Protocol address fields of a transiting NHRP Error
+ Indication packet then the NHS will quietly drop the packet and do
+ nothing (this scenario would occur when the NHRP Error Indication
+ packet was itself in a loop).
+
+ Note that no extensions may be added to an NHRP Error Indication.
+
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 34]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+5.3 Extensions Part
+
+ The Extensions Part, if present, carries one or more extensions in
+ {Type, Length, Value} triplets.
+
+ Extensions have the following format:
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |C|u| Type | Length |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Value... |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ C
+ "Compulsory." If clear, and the NHS does not recognize the type
+ code, the extension may safely be ignored. If set, and the NHS
+ does not recognize the type code, the NHRP "request" is considered
+ to be in error. (See below for details.)
+
+ u
+ Unused and must be set to zero.
+
+ Type
+ The extension type code (see below). The extension type is not
+ qualified by the Compulsory bit, but is orthogonal to it.
+
+ Length
+ The length in octets of the value (not including the Type and
+ Length fields; a null extension will have only an extension header
+ and a length of zero).
+
+ When extensions exist, the extensions list is terminated by the Null
+ TLV, having Type = 0 and Length = 0.
+
+ Extensions may occur in any order, but any particular extension type
+ may occur only once in an NHRP packet unless explicitly stated to the
+ contrary in the extensions definition. For example, the vendor-
+ private extension may occur multiple times in a packet in order to
+ allow for extensions which do not share the same vendor ID to be
+ represented. It is RECOMMENDED that a given vendor include no more
+ than one Vendor Private Extension.
+
+ An NHS MUST NOT change the order of extensions. That is, the order
+ of extensions placed in an NHRP packet by an NHC (or by an NHS when
+ an NHS sources a packet) MUST be preserved as the packet moves
+ between NHSs. Minimal NHC implementations MUST only recognize, but
+
+
+
+Luciani, et. al. Standards Track [Page 35]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ not necessarily parse, the Vendor Private extension and the End Of
+ Extensions extension. Extensions are only present in a "reply" if
+ they were present in the corresponding "request" with the exception
+ of Vendor Private extensions. The previous statement is not intended
+ to preclude the creation of NHS-only extensions which might be added
+ to and removed from NHRP packets by the same NHS; such extensions
+ MUST not be propagated to NHCs.
+
+ The Compulsory bit provides for a means to add to the extension set.
+ If the bit is set in an extension then the station responding to the
+ NHRP message which contains that extension MUST be able to understand
+ the extension (in this case, the station responding to the message is
+ the station that would issue an NHRP reply in response to a NHRP
+ request). As a result, the responder MUST return an NHRP Error
+ Indication of type Unrecognized Extension. If the Compulsory bit is
+ clear then the extension can be safely ignored; however, if an
+ ignored extension is in a "request" then it MUST be returned,
+ unchanged, in the corresponding "reply" packet type.
+
+ If a transit NHS (one which is not going to generate a "reply")
+ detects an unrecognized extension, it SHALL ignore the extension. If
+ the Compulsory bit is set, the transit NHS MUST NOT cache the
+ information contained in the packet and MUST NOT identify itself as
+ an egress router (in the Forward Record or Reverse Record
+ extensions). Effectively, this means, if a transit NHS encounters an
+ extension which it cannot process and which has the Compulsory bit
+ set then that NHS MUST NOT participate in any way in the protocol
+ exchange other than acting as a forwarding agent.
+
+ The NHRP extension Type space is subdivided to encourage use outside
+ the IETF.
+
+ 0x0000 - 0x0FFF Reserved for NHRP.
+ 0x1000 - 0x11FF Allocated to the ATM Forum.
+ 0x1200 - 0x37FF Reserved for the IETF.
+ 0x3800 - 0x3FFF Experimental use.
+
+ IANA will administer the ranges reserved for the IETF as described in
+ Section 9. Values in the 'Experimental use' range have only local
+ significance.
+
+5.3.0 The End Of Extensions
+
+ Compulsory = 1
+ Type = 0
+ Length = 0
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 36]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ When extensions exist, the extensions list is terminated by the End
+ Of Extensions/Null TLV.
+
+5.3.1 Responder Address Extension
+
+ Compulsory = 1
+ Type = 3
+ Length = variable
+
+ This extension is used to determine the address of the NHRP
+ responder; i.e., the entity that generates the appropriate "reply"
+ packet for a given "request" packet. In the case of an NHRP
+ Resolution Request, the station responding may be different (in the
+ case of cached replies) than the system identified in the Next Hop
+ field of the NHRP Resolution Reply. Further, this extension may aid
+ in detecting loops in the NHRP forwarding path.
+
+ This extension uses a single CIE with the extension specific meanings
+ of the fields set as follows:
+
+ The Prefix Length fields MUST be set to 0 and ignored.
+
+ CIE Code
+ 5 - Insufficient Resources
+ If the responder to an NHRP Resolution Request is an egress point
+ for the target of the address resolution request (i.e., it is one
+ of the stations identified in the list of CIEs in an NHRP
+ Resolution Reply) and the Responder Address extension is included
+ in the NHRP Resolution Request and insufficient resources to
+ setup a cut-through VC exist at the responder then the Code field
+ of the Responder Address Extension is set to 5 in order to tell
+ the client that a VC setup attempt would in all likelihood be
+ rejected; otherwise this field MUST be coded as a zero. NHCs MAY
+ use this field to influence whether they attempt to setup a cut-
+ through to the egress router.
+
+ Maximum Transmission Unit
+ This field gives the maximum transmission unit preferred by the
+ responder. If this value is 0 then either the default MTU is used
+ or the MTU negotiated via signaling is used if such negotiation is
+ possible for the given NBMA.
+
+ Holding Time
+ The Holding Time field specifies the number of seconds for which
+ the NBMA information of the responser is considered to be valid.
+ Cached information SHALL be discarded when the holding time
+ expires.
+
+
+
+
+Luciani, et. al. Standards Track [Page 37]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ "Client Address" information is actually "Responder Address"
+ information for this extension. Thus, for example, Cli Addr T/L is
+ the responder NBMA address type and length field.
+
+ If a "requester" desires this information, the "requester" SHALL
+ include this extension with a value of zero. Note that this implies
+ that no storage is allocated for the Holding Time and Type/Length
+ fields until the "Value" portion of the extension is filled out.
+
+ If an NHS is generating a "reply" packet in response to a "request"
+ containing this extension, the NHS SHALL include this extension,
+ containing its protocol address in the "reply". If an NHS has more
+ than one protocol address, it SHALL use the same protocol address
+ consistently in all of the Responder Address, Forward Transit NHS
+ Record, and Reverse Transit NHS Record extensions. The choice of
+ which of several protocol address to include in this extension is a
+ local matter.
+
+ If an NHRP Resolution Reply packet being forwarded by an NHS contains
+ a protocol address of that NHS in the Responder Address Extension
+ then that NHS SHALL generate an NHRP Error Indication of type "NHRP
+ Loop Detected" and discard the NHRP Resolution Reply.
+
+ If an NHRP Resolution Reply packet is being returned by an
+ intermediate NHS based on cached data, it SHALL place its own address
+ in this extension (differentiating it from the address in the Next
+ Hop field).
+
+5.3.2 NHRP Forward Transit NHS Record Extension
+
+ Compulsory = 1
+ Type = 4
+ Length = variable
+
+ The NHRP Forward Transit NHS record contains a list of transit NHSs
+ through which a "request" has traversed. Each NHS SHALL append to
+ the extension a Forward Transit NHS element (as specified below)
+ containing its Protocol address. The extension length field and the
+ ar$chksum fields SHALL be adjusted appropriately.
+
+ The responding NHS, as described in Section 5.3.1, SHALL NOT update
+ this extension.
+
+ In addition, NHSs that are willing to act as egress routers for
+ packets from the source to the destination SHALL include information
+ about their NBMA Address.
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 38]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ This extension uses a single CIE per NHS Record element with the
+ extension specific meanings of the fields set as follows:
+
+ The Prefix Length fields MUST be set to 0 and ignored.
+
+ CIE Code
+ 5 - Insufficient Resources
+ If an NHRP Resolution Request contains an NHRP Forward Transit
+ NHS Record Extension and insufficient resources to setup a cut-
+ through VC exist at the current transit NHS then the CIE Code
+ field for NHRP Forward Transit NHS Record Extension is set to 5
+ in order to tell the client that a VC setup attempt would in all
+ likelihood be rejected; otherwise this field MUST be coded as a
+ zero. NHCs MAY use this field to influence whether they attempt
+ to setup a cut-through as described in Section 2.2. Note that
+ the NHRP Reverse Transit NHS Record Extension MUST always have
+ this field set to zero.
+
+ Maximum Transmission Unit
+ This field gives the maximum transmission unit preferred by the
+ transit NHS. If this value is 0 then either the default MTU is
+ used or the MTU negotiated via signaling is used if such
+ negotiation is possible for the given NBMA.
+
+ Holding Time
+ The Holding Time field specifies the number of seconds for which
+ the NBMA information of the transit NHS is considered to be valid.
+ Cached information SHALL be discarded when the holding time
+ expires.
+
+ "Client Address" information is actually "Forward Transit NHS
+ Address" information for this extension. Thus, for example, Cli Addr
+ T/L is the transit NHS NBMA address type and length field.
+
+ If a "requester" wishes to obtain this information, it SHALL include
+ this extension with a length of zero. Note that this implies that no
+ storage is allocated for the Holding Time and Type/Length fields
+ until the "Value" portion of the extension is filled out.
+
+ If an NHS has more than one Protocol address, it SHALL use the same
+ Protocol address consistently in all of the Responder Address,
+ Forward NHS Record, and Reverse NHS Record extensions. The choice of
+ which of several Protocol addresses to include in this extension is a
+ local matter.
+
+
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 39]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ If a "request" that is being forwarded by an NHS contains the
+ Protocol Address of that NHS in one of the Forward Transit NHS
+ elements then the NHS SHALL generate an NHRP Error Indication of type
+ "NHRP Loop Detected" and discard the "request".
+
+5.3.3 NHRP Reverse Transit NHS Record Extension
+
+ Compulsory = 1
+ Type = 5
+ Length = variable
+
+ The NHRP Reverse Transit NHS record contains a list of transit NHSs
+ through which a "reply" has traversed. Each NHS SHALL append a
+ Reverse Transit NHS element (as specified below) containing its
+ Protocol address to this extension. The extension length field and
+ ar$chksum SHALL be adjusted appropriately.
+
+ The responding NHS, as described in Section 5.3.1, SHALL NOT update
+ this extension.
+
+ In addition, NHSs that are willing to act as egress routers for
+ packets from the source to the destination SHALL include information
+ about their NBMA Address.
+
+ This extension uses a single CIE per NHS Record element with the
+ extension specific meanings of the fields set as follows:
+
+ The CIE Code and Prefix Length fields MUST be set to 0 and ignored.
+
+ Maximum Transmission Unit
+ This field gives the maximum transmission unit preferred by the
+ transit NHS. If this value is 0 then either the default MTU is
+ used or the MTU negotiated via signaling is used if such
+ negotiation is possible for the given NBMA.
+
+ Holding Time
+ The Holding Time field specifies the number of seconds for which
+ the NBMA information of the transit NHS is considered to be valid.
+ Cached information SHALL be discarded when the holding time
+ expires.
+
+ "Client Address" information is actually "Reverse Transit NHS
+ Address" information for this extension. Thus, for example, Cli Addr
+ T/L is the transit NHS NBMA address type and length field.
+
+
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 40]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ If a "requester" wishes to obtain this information, it SHALL include
+ this extension with a length of zero. Note that this implies that no
+ storage is allocated for the Holding Time and Type/Length fields
+ until the "Value" portion of the extension is filled out.
+
+ If an NHS has more than one Protocol address, it SHALL use the same
+ Protocol address consistently in all of the Responder Address,
+ Forward NHS Record, and Reverse NHS Record extensions. The choice of
+ which of several Protocol addresses to include in this extension is a
+ local matter.
+
+ If a "reply" that is being forwarded by an NHS contains the Protocol
+ Address of that NHS in one of the Reverse Transit NHS elements then
+ the NHS SHALL generate an NHRP Error Indication of type "NHRP Loop
+ Detected" and discard the "reply".
+
+ Note that this information may be cached at intermediate NHSs; if
+ so, the cached value SHALL be used when generating a reply.
+
+5.3.4 NHRP Authentication Extension
+
+ Compulsory = 1 Type = 7 Length = variable
+
+ The NHRP Authentication Extension is carried in NHRP packets to
+ convey authentication information between NHRP speakers. The
+ Authentication Extension may be included in any NHRP "request" or
+ "reply" only.
+
+ The authentication is always done pairwise on an NHRP hop-by-hop
+ basis; i.e., the authentication extension is regenerated at each
+ hop. If a received packet fails the authentication test, the station
+ SHALL generate an Error Indication of type "Authentication Failure"
+ and discard the packet. Note that one possible authentication failure
+ is the lack of an Authentication Extension; the presence or absence
+ of the Authentication Extension is a local matter.
+
+5.3.4.1 Header Format
+
+ The authentication header has the following format:
+
+
+
+
+
+
+
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 41]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Reserved | Security Parameter Index (SPI)|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Src Addr... |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ +-+-+-+-+-+-+-+-+-+-+ Authentication Data... -+-+-+-+-+-+-+-+-+-+
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Security Parameter Index (SPI) can be thought of as an index into a
+ table that maintains the keys and other information such as hash
+ algorithm. Src and Dst communicate either offline using manual keying
+ or online using a key management protocol to populate this table. The
+ sending NHRP entity always allocates the SPI and the parameters
+ associated with it.
+
+ Src Addr a variable length field is the address assigned to the
+ outgoing interface. The length of the addr is obtained from the
+ source protocol length field in the mandatory part of the NHRP
+ header. The tuple <spi, src addr> uniquely identifies the key and
+ other parameters that are used in authentication.
+
+ The length of the authentication data field is dependent on the hash
+ algorithm used. The data field contains the keyed hash calculated
+ over the entire NHRP payload. The authentication data field is zeroed
+ out before the hash is calculated.
+
+5.3.4.2 SPI and Security Parameters Negotiation
+
+ SPI's can be negotiated either manually or using an Internet Key
+ Management protocol. Manual keying MUST be supported. The following
+ parameters are associated with the tuple <SPI, src>- lifetime,
+ Algorithm, Key. Lifetime indicates the duration in seconds for which
+ the key is valid. In case of manual keying, this duration can be
+ infinite. Also, in order to better support manual keying, there may
+ be multiple tuples active at the same time (Dst being the same).
+
+ Algorithm specifies the hash algorithm agreed upon by the two
+ entities. HMAC-MD5-128 [16] is the default algorithm. Other
+ algorithms MAY be supported by defining new values. IANA will assign
+ the numbers to identify the algorithm being used as described in
+ Section 9.
+
+ Any Internet standard key management protocol MAY so be used to
+ negotiate the SPI and parameters.
+
+
+
+Luciani, et. al. Standards Track [Page 42]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+5.3.4.3 Message Processing
+
+ At the time of adding the authentication extension header, src looks
+ up in a table to fetch the SPI and the security parameters based on
+ the outgoing interface address. If there are no entries in the table
+ and if there is support for key management, the src initiates the key
+ management protocol to fetch the necessary parameters. The src
+ constructs the Authentication Extension payload and calculates the
+ hash by zeroing authentication data field. The result replaces in the
+ zeroed authentication data field. The src address field in the
+ payload is the IP address assigned to the outgoing interface.
+
+ If key management is not supported and authentication is mandatory,
+ the packet is dropped and this information is logged.
+
+ On the receiving end, dst fetches the parameters based on the SPI and
+ the ip address in the authentication extension payload. The
+ authentication data field is extracted before zeroing out to
+ calculate the hash. It computes the hash on the entire payload and if
+ the hash does not match, then an "abnormal event" has occurred.
+
+5.3.4.4 Security Considerations
+
+ It is important that the keys chosen are strong as the security of
+ the entire system depends on the keys being chosen properly and the
+ correct implementation of the algorithms.
+
+ The security is performed on a hop by hop basis. The data received
+ can be trusted only so much as one trusts all the entities in the
+ path traversed. A chain of trust is established amongst NHRP entities
+ in the path of the NHRP Message . If the security in an NHRP entity
+ is compromised, then security in the entire NHRP domain is
+ compromised.
+
+ Data integrity covers the entire NHRP payload. This guarantees that
+ the message was not modified and the source is authenticated as well.
+ If authentication extension is not used or if the security is
+ compromised, then NHRP entities are liable to both spoofing attacks,
+ active attacks and passive attacks.
+
+ There is no mechanism to encrypt the messages. It is assumed that a
+ standard layer 3 confidentiality mechanism will be used to encrypt
+ and decrypt messages. It is recommended to use an Internet standard
+ key management protocol to negotiate the keys between the neighbors.
+ Transmitting the keys in clear text, if other methods of negotiation
+ is used, compromises the security completely.
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 43]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ Any NHS is susceptible to Denial of Service (DOS) attacks that cause
+ it to become overloaded, preventing legitimate packets from being
+ acted upon properly. A rogue host can send request and registration
+ packets to the first hop NHS. If the authentication option is not
+ used, the registration packet is forwarded along the routed path
+ requiring processing along each NHS. If the authentication option is
+ used, then only the first hop NHS is susceptible to DOS attacks
+ (i.e., unauthenticated packets will be dropped rather than forwarded
+ on). If security of any host is compromised (i.e., the keys it is
+ using to communicate with an NHS become known), then a rogue host can
+ send NHRP packets to the first hop NHS of the host whose keys were
+ compromised, which will then forward them along the routed path as in
+ the case of unauthenticated packets. However, this attack requires
+ that the rogue host to have the same first hop NHS as that of the
+ compromised host. Finally, it should be noted that denial of service
+ attacks that cause routers on the routed path to expend resources
+ processing NHRP packets are also susceptable to attacks that flood
+ packets at the same destination as contained in an NHRP packet's
+ Destination Protocol Address field.
+
+5.3.5 NHRP Vendor-Private Extension
+
+ Compulsory = 0
+ Type = 8
+ Length = variable
+
+ The NHRP Vendor-Private Extension is carried in NHRP packets to
+ convey vendor-private information or NHRP extensions between NHRP
+ speakers.
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Vendor ID | Data.... |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Vendor ID
+ 802 Vendor ID as assigned by the IEEE [6]
+
+ Data
+ The remaining octets after the Vendor ID in the payload are
+ vendor-dependent data.
+
+ This extension may be added to any "request" or "reply" packet and it
+ is the only extension that may be included multiple times. If the
+ receiver does not handle this extension, or does not match the Vendor
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 44]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ ID in the extension then the extension may be completely ignored by
+ the receiver. If a Vendor Private Extension is included in a
+ "request" then it must be copied to the corresponding "reply".
+
+6. Protocol Operation
+
+ In this section, we discuss certain operational considerations of
+ NHRP.
+
+6.1 Router-to-Router Operation
+
+ In practice, the initiating and responding stations may be either
+ hosts or routers. However, there is a possibility under certain
+ conditions that a stable routing loop may occur if NHRP is used
+ between two routers. In particular, attempting to establish an NHRP
+ path across a boundary where information used in route selection is
+ lost may result in a routing loop. Such situations include the loss
+ of BGP path vector information, the interworking of multiple routing
+ protocols with dissimilar metrics (e.g, RIP and OSPF), etc. In such
+ circumstances, NHRP should not be used. This situation can be
+ avoided if there are no "back door" paths between the entry and
+ egress router outside of the NBMA subnetwork. Protocol mechanisms to
+ relax these restrictions are under investigation.
+
+ In general it is preferable to use mechanisms, if they exist, in
+ routing protocols to resolve the egress point when the destination
+ lies outside of the NBMA subnetwork, since such mechanisms will be
+ more tightly coupled to the state of the routing system and will
+ probably be less likely to create loops.
+
+6.2 Cache Management Issues
+
+ The management of NHRP caches in the source station, the NHS serving
+ the destination, and any intermediate NHSs is dependent on a number
+ of factors.
+
+6.2.1 Caching Requirements
+
+ Source Stations
+
+ Source stations MUST cache all received NHRP Resolution Replies
+ that they are actively using. They also must cache "incomplete"
+ entries, i.e., those for which a NHRP Resolution Request has been
+ sent but those for which an NHRP Resolution Reply has not been
+ received. This is necessary in order to preserve the Request ID
+
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 45]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ for retries, and provides the state necessary to avoid triggering
+ NHRP Resolution Requests for every data packet sent to the
+ destination.
+
+ Source stations MUST purge expired information from their caches.
+ Source stations MUST purge the appropriate cached information upon
+ receipt of an NHRP Purge Request packet.
+
+ When a station has a co-resident NHC and NHS, the co-resident NHS
+ may reply to NHRP Resolution Requests from the co-resident NHC with
+ information which the station cached as a result of the co-resident
+ NHC making its own NHRP Resolution Requests as long as the co-
+ resident NHS follows the rules for Transit NHSs as seen below.
+
+ Serving NHSs
+
+ The NHS serving the destination (the one which responds
+ authoritatively to NHRP Resolution Requests) SHOULD cache protocol
+ address information from all NHRP Resolution Requests to which it
+ has responded if the information in the NHRP Resolution Reply has
+ the possibility of changing during its lifetime (so that an NHRP
+ Purge Request packet can be issued). The internetworking to NBMA
+ binding information provided by the source station in the NHRP
+ Resolution Request may also be cached if and only if the "S" bit is
+ set, the NHRP Resolution Request has included a CIE with the
+ Holding Time field set greater than zero (this is the valid Holding
+ Time for the source binding), and only for non-authoritative use
+ for a period not to exceed the Holding Time.
+
+ Transit NHSs
+
+ A Transit NHS (lying along the NHRP path between the source station
+ and the responding NHS) may cache source binding information
+ contained in NHRP Resolution Request packets that it forwards if
+ and only if the "S" bit is set, the NHRP Resolution Request has
+ included a CIE with the Holding Time field set greater than zero
+ (this is the valid Holding Time for the source binding), and only
+ for non-authoritative use for a period not to exceed the Holding
+ Time.
+
+ A Transit NHS may cache destination information contained in NHRP
+ Resolution Reply CIE if only if the D bit is set and then only for
+ non-authoritative use for a period not to exceed the Holding Time
+ value contained in the CIE. A Transit NHS MUST NOT cache source
+ binding information contained in an NHRP Resolution Reply.
+
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 46]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ Further, a transit NHS MUST discard any cached information when the
+ prescribed time has expired. It may return cached information in
+ response to non-authoritative NHRP Resolution Requests only.
+
+6.2.2 Dynamics of Cached Information
+
+ NBMA-Connected Destinations
+
+ NHRP's most basic function is that of simple NBMA address
+ resolution of stations directly attached to the NBMA subnetwork.
+ These mappings are typically very static, and appropriately chosen
+ holding times will minimize problems in the event that the NBMA
+ address of a station must be changed. Stale information will cause
+ a loss of connectivity, which may be used to trigger an
+ authoritative NHRP Resolution Request and bypass the old data. In
+ the worst case, connectivity will fail until the cache entry times
+ out.
+
+ This applies equally to information marked in NHRP Resolution
+ Replies as being "stable" (via the "D" bit).
+
+ Destinations Off of the NBMA Subnetwork
+
+ If the source of an NHRP Resolution Request is a host and the
+ destination is not directly attached to the NBMA subnetwork, and
+ the route to that destination is not considered to be "stable," the
+ destination mapping may be very dynamic (except in the case of a
+ subnetwork where each destination is only singly homed to the NBMA
+ subnetwork). As such the cached information may very likely become
+ stale. The consequence of stale information in this case will be a
+ suboptimal path (unless the internetwork has partitioned or some
+ other routing failure has occurred).
+
+6.3 Use of the Prefix Length field of a CIE
+
+ A certain amount of care needs to be taken when using the Prefix
+ Length field of a CIE, in particular with regard to the prefix length
+ advertised (and thus the size of the equivalence class specified by
+ it). Assuming that the routers on the NBMA subnetwork are exchanging
+ routing information, it should not be possible for an NHS to create a
+ black hole by advertising too large of a set of destinations, but
+ suboptimal routing (e.g., extra internetwork layer hops through the
+ NBMA) can result. To avoid this situation an NHS that wants to send
+ the Prefix Length MUST obey the following rule:
+
+ The NHS examines the Network Layer Reachability Information (NLRI)
+ associated with the route that the NHS would use to forward towards
+ the destination (as specified by the Destination internetwork layer
+
+
+
+Luciani, et. al. Standards Track [Page 47]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ address in the NHRP Resolution Request), and extracts from this
+ NLRI the shortest address prefix such that: (a) the Destination
+ internetwork layer address (from the NHRP Resolution Request) is
+ covered by the prefix, (b) the NHS does not have any routes with
+ NLRI which form a subset of what is covered by the prefix. The
+ prefix may then be used in the CIE.
+
+ The Prefix Length field of the CIE should be used with restraint, in
+ order to avoid NHRP stations choosing suboptimal transit paths when
+ overlapping prefixes are available. This document specifies the use
+ of the prefix length only when all the destinations covered by the
+ prefix are "stable". That is, either:
+
+ (a) All destinations covered by the prefix are on the NBMA network,
+ or
+ (b) All destinations covered by the prefix are directly attached to
+ the NHRP responding station.
+
+ Use of the Prefix Length field of the CIE in other circumstances is
+ outside the scope of this document.
+
+6.4 Domino Effect
+
+ One could easily imagine a situation where a router, acting as an
+ ingress station to the NBMA subnetwork, receives a data packet, such
+ that this packet triggers an NHRP Resolution Request. If the router
+ forwards this data packet without waiting for an NHRP transit path to
+ be established, then when the next router along the path receives the
+ packet, the next router may do exactly the same - originate its own
+ NHRP Resolution Request (as well as forward the packet). In fact
+ such a data packet may trigger NHRP Resolution Request generation at
+ every router along the path through an NBMA subnetwork. We refer to
+ this phenomena as the NHRP "domino" effect.
+
+ The NHRP domino effect is clearly undesirable. At best it may result
+ in excessive NHRP traffic. At worst it may result in an excessive
+ number of virtual circuits being established unnecessarily.
+ Therefore, it is important to take certain measures to avoid or
+ suppress this behavior. NHRP implementations for NHSs MUST provide a
+ mechanism to address this problem. One possible strategy to address
+ this problem would be to configure a router in such a way that NHRP
+ Resolution Request generation by the router would be driven only by
+ the traffic the router receives over its non-NBMA interfaces
+ (interfaces that are not attached to an NBMA subnetwork). Traffic
+ received by the router over its NBMA-attached interfaces would not
+ trigger NHRP Resolution Requests. Such a router avoids the NHRP
+ domino effect through administrative means.
+
+
+
+
+Luciani, et. al. Standards Track [Page 48]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+7. NHRP over Legacy BMA Networks
+
+ There would appear to be no significant impediment to running NHRP
+ over legacy broadcast subnetworks. There may be issues around
+ running NHRP across multiple subnetworks. Running NHRP on broadcast
+ media has some interesting possibilities; especially when setting up
+ a cut-through for inter-ELAN inter-LIS/LAG traffic when one or both
+ end stations are legacy attached. This use for NHRP requires further
+ research.
+
+8. Discussion
+
+ The result of an NHRP Resolution Request depends on how routing is
+ configured among the NHSs of an NBMA subnetwork. If the destination
+ station is directly connected to the NBMA subnetwork and the routed
+ path to it lies entirely within the NBMA subnetwork, the NHRP
+ Resolution Replies always return the NBMA address of the destination
+ station itself rather than the NBMA address of some egress router.
+ On the other hand, if the routed path exits the NBMA subnetwork, NHRP
+ will be unable to resolve the NBMA address of the destination, but
+ rather will return the address of the egress router. For
+ destinations outside the NBMA subnetwork, egress routers and routers
+ in the other subnetworks should exchange routing information so that
+ the optimal egress router may be found.
+
+ In addition to NHSs, an NBMA station could also be associated with
+ one or more regular routers that could act as "connectionless
+ servers" for the station. The station could then choose to resolve
+ the NBMA next hop or just send the packets to one of its
+ connectionless servers. The latter option may be desirable if
+ communication with the destination is short-lived and/or doesn't
+ require much network resources. The connectionless servers could, of
+ course, be physically integrated in the NHSs by augmenting them with
+ internetwork layer switching functionality.
+
+9. IANA Considerations
+
+ IANA will take advice from the Area Director appointed designated
+ subject matter expert, in order to assign numbers from the various
+ number spaces described herein. In the event that the Area Director
+ appointed designated subject matter expert is unavailable, the
+ relevant IESG Area Director will appoint another expert. Any and all
+ requests for value assignment within a given number space will be
+ accepted when the usage of the value assignment documented. Possible
+ forms of documentantion include, but is not limited to, RFCs or the
+ product of another cooperative standards body (e.g., the MPOA and
+ LANE subworking group of the ATM Forum).
+
+
+
+
+Luciani, et. al. Standards Track [Page 49]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+References
+
+ [1] Heinanen, J., and R. Govindan, "NBMA Address Resolution Protocol
+ (NARP)", RFC 1735, December 1994.
+
+ [2] Plummer, D., "Address Resolution Protocol", STD 37, RFC 826,
+ November 1982.
+
+ [3] Laubach, M., and J. Halpern, "Classical IP and ARP over ATM", RFC
+ 2225, April 1998.
+
+ [4] Piscitello,, D., and J. Lawrence, "Transmission of IP datagrams
+ over the SMDS service", RFC 1209, March 1991.
+
+ [5] Protocol Identification in the Network Layer, ISO/IEC TR
+ 9577:1990.
+
+ [6] Reynolds, J., and J. Postel, "Assigned Numbers", STD 2, RFC 1700,
+ October 1994.
+
+ [7] Heinanen, J., "Multiprotocol Encapsulation over ATM Adaptation
+ Layer 5", RFC 1483, July 1993.
+
+ [8] Malis, A., Robinson, D., and R. Ullmann, "Multiprotocol
+ Interconnect on X.25 and ISDN in the Packet Mode", RFC 1356, August
+ 1992.
+
+ [9] Bradley, T., Brown, C., and A. Malis, "Multiprotocol Interconnect
+ over Frame Relay", RFC 1490, July 1993.
+
+ [10] Rekhter, Y., and D. Kandlur, ""Local/Remote" Forwarding Decision
+ in Switched Data Link Subnetworks", RFC 1937, May 1996.
+
+ [11] Armitage, G., "Support for Multicast over UNI 3.0/3.1 based ATM
+ Networks", RFC 2022, November 1996.
+
+ [12] Luciani, J., Armitage, G., and J. Halpern, "Server Cache
+ Synchronization Protocol (SCSP) - NBMA", RFC 2334, April 1998.
+
+ [13] Rekhter, Y., "NHRP for Destinations off the NBMA Subnetwork",
+ Work In Progress.
+
+ [14] Luciani, J., et. al., "Classical IP and ARP over ATM to NHRP
+ Transition", Work In Progress.
+
+ [15] Bradner, S., "Key words for use in RFCs to Indicate Requirement
+ Levels", BCP 14, RFC 2119, March 1997.
+
+
+
+
+Luciani, et. al. Standards Track [Page 50]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+ [16] Krawczyk, H., Bellare, M., and R. Canetti, "HMAC: Keyed Hashing
+ for Message Authentication", RFC 2104, February 1997.
+
+Acknowledgments
+
+ We would like to thank (in no particular order) Thomas Narten of IBM
+ for his comments in the role of Internet AD, Juha Heinenan of Telecom
+ Finland and Ramesh Govidan of ISI for their work on NBMA ARP and the
+ original NHRP draft, which served as the basis for this work.
+ Russell Gardo of IBM, John Burnett of Adaptive, Dennis Ferguson of
+ ANS, Andre Fredette of Bay Networks, Joel Halpern of Newbridge, Paul
+ Francis of NTT, Tony Li, Bryan Gleeson, and Yakov Rekhter of cisco,
+ and Grenville Armitage of Bellcore should also be acknowledged for
+ comments and suggestions that improved this work substantially. We
+ would also like to thank the members of the ION working group of the
+ IETF, whose review and discussion of this document have been
+ invaluable.
+
+Authors' Addresses
+
+ James V. Luciani Dave Katz
+ Bay Networks cisco Systems
+ 3 Federal Street 170 W. Tasman Dr.
+ Mail Stop: BL3-03 San Jose, CA 95134 USA
+ Billerica, MA 01821 Phone: +1 408 526 8284
+ Phone: +1 978 916 4734 EMail: dkatz@cisco.com
+ EMail: luciani@baynetworks.com
+
+ David Piscitello Bruce Cole
+ Core Competence Juniper Networks
+ 1620 Tuckerstown Road 3260 Jay St.
+ Dresher, PA 19025 USA Santa Clara, CA 95054
+ Phone: +1 215 830 0692 Phone: +1 408 327 1900
+ EMail: dave@corecom.com EMail: bcole@jnx.com
+
+ Naganand Doraswamy
+ Bay Networks, Inc.
+ 3 Federal Street
+ Mail Stop: Bl3-03
+ Billerica, MA 01801
+ Phone: +1 978 916 1323
+ EMail: naganand@baynetworks.com
+
+
+
+
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 51]
+
+RFC 2332 NBMA NHRP April 1998
+
+
+Full Copyright Statement
+
+ Copyright (C) The Internet Society (1998). All Rights Reserved.
+
+ This document and translations of it may be copied and furnished to
+ others, and derivative works that comment on or otherwise explain it
+ or assist in its implementation may be prepared, copied, published
+ and distributed, in whole or in part, without restriction of any
+ kind, provided that the above copyright notice and this paragraph are
+ included on all such copies and derivative works. However, this
+ document itself may not be modified in any way, such as by removing
+ the copyright notice or references to the Internet Society or other
+ Internet organizations, except as needed for the purpose of
+ developing Internet standards in which case the procedures for
+ copyrights defined in the Internet Standards process must be
+ followed, or as required to translate it into languages other than
+ English.
+
+ The limited permissions granted above are perpetual and will not be
+ revoked by the Internet Society or its successors or assigns.
+
+ This document and the information contained herein is provided on an
+ "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+ TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+ BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+ HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+ MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Luciani, et. al. Standards Track [Page 52]
+
diff --git a/etc/Makefile b/etc/Makefile
new file mode 100644
index 0000000..d9fdd8b
--- /dev/null
+++ b/etc/Makefile
@@ -0,0 +1,5 @@
+ETCFILES := opennhrp.conf opennhrp-script racoon-ph1down.sh racoon-ph1dead.sh
+
+install:
+ $(INSTALLDIR) $(DESTDIR)$(CONFDIR)
+ $(INSTALL) $(addprefix $(src)/,$(ETCFILES)) $(DESTDIR)$(CONFDIR)
diff --git a/etc/opennhrp-script b/etc/opennhrp-script
new file mode 100755
index 0000000..8d5e2d3
--- /dev/null
+++ b/etc/opennhrp-script
@@ -0,0 +1,38 @@
+#!/bin/sh
+
+case $1 in
+interface-up)
+ ip route flush proto 42 dev $NHRP_INTERFACE
+ ip neigh flush dev $NHRP_INTERFACE
+ ;;
+peer-register)
+ ;;
+peer-up)
+ if [ -n "$NHRP_DESTMTU" ]; then
+ ARGS=`ip route get $NHRP_DESTNBMA from $NHRP_SRCNBMA | head -1`
+ ip route add $ARGS proto 42 mtu $NHRP_DESTMTU
+ fi
+ echo "Create link from $NHRP_SRCADDR ($NHRP_SRCNBMA) to $NHRP_DESTADDR ($NHRP_DESTNBMA)"
+ racoonctl establish-sa -w isakmp inet $NHRP_SRCNBMA $NHRP_DESTNBMA || exit 1
+ racoonctl establish-sa -w esp inet $NHRP_SRCNBMA $NHRP_DESTNBMA gre || exit 1
+ ;;
+peer-down)
+ echo "Delete link from $NHRP_SRCADDR ($NHRP_SRCNBMA) to $NHRP_DESTADDR ($NHRP_DESTNBMA)"
+ if [ "$NHRP_PEER_DOWN_REASON" != "lower-down" ]; then
+ racoonctl delete-sa isakmp inet $NHRP_SRCNBMA $NHRP_DESTNBMA
+ fi
+ ip route del $NHRP_DESTNBMA src $NHRP_SRCNBMA proto 42
+ ;;
+route-up)
+ echo "Route $NHRP_DESTADDR/$NHRP_DESTPREFIX is up"
+ ip route replace $NHRP_DESTADDR/$NHRP_DESTPREFIX proto 42 via $NHRP_NEXTHOP dev $NHRP_INTERFACE
+ ip route flush cache
+ ;;
+route-down)
+ echo "Route $NHRP_DESTADDR/$NHRP_DESTPREFIX is down"
+ ip route del $NHRP_DESTADDR/$NHRP_DESTPREFIX proto 42
+ ip route flush cache
+ ;;
+esac
+
+exit 0
diff --git a/etc/opennhrp-script.cert b/etc/opennhrp-script.cert
new file mode 100755
index 0000000..d013511
--- /dev/null
+++ b/etc/opennhrp-script.cert
@@ -0,0 +1,71 @@
+#!/bin/sh
+#
+# This version of the script check the X509 certificate used to authenticate
+# the IPsec connection. It parses a special format subject field, and verifies
+# the claimed GRE is bound to that certificate, before allowing NHRP
+# registration or direct tunnel to succeed.
+#
+# It also reconfigure BGP filters according to certificate contents. This is
+# only useful for hub nodes.
+#
+# Example of certificate:
+# subjectAltName: DirName:/OU=GRE=192.168.1.1/NET=10.1.0.0/16
+
+case $1 in
+interface-up)
+ ip route flush proto 42 dev $NHRP_INTERFACE
+ ip neigh flush dev $NHRP_INTERFACE
+ ;;
+peer-register)
+ (
+ flock -x 200
+
+ CERT=`racoonctl get-cert inet $NHRP_SRCNBMA $NHRP_DESTNBMA | openssl x509 -inform der -text -noout | egrep -o "/OU=[^/]*(/[0-9]+)?" | cut -b 5-`
+ if [ -z "`echo "$CERT" | grep "^GRE=$NHRP_DESTADDR"`" ]; then
+ echo "ERROR: IP $NHRP_DESTADDR at $NHRP_DESTNBMA NOT verified"
+ exit 1
+ fi
+
+ AS=`echo "$CERT" | grep "^AS=" | cut -b 4-`
+ vtysh -d bgpd -c "configure terminal" \
+ -c "router bgp $MY_AS" \
+ -c "neighbor $NHRP_DESTADDR remote-as $AS" \
+ -c "neighbor $NHRP_DESTADDR peer-group leaf" \
+ -c "neighbor $NHRP_DESTADDR prefix-list net-$AS-in in"
+
+ SEQ=5
+ (echo "$CERT" | grep "^NET=" | cut -b 5-) | while read NET; do
+ vtysh -d bgpd -c "configure terminal" \
+ -c "ip prefix-list net-$AS-in seq $SEQ permit $NET"
+ SEQ=$(($SEQ+5)
+ done
+ ) 200>/var/lock/opennhrp-script.lock
+ ;;
+peer-up)
+ echo "Create link from $NHRP_SRCADDR ($NHRP_SRCNBMA) to $NHRP_DESTADDR ($NHRP_DESTNBMA)"
+ racoonctl establish-sa -w isakmp inet $NHRP_SRCNBMA $NHRP_DESTNBMA || exit 1
+ racoonctl establish-sa -w esp inet $NHRP_SRCNBMA $NHRP_DESTNBMA gre || exit 1
+
+ CERT=`racoonctl get-cert inet $NHRP_SRCNBMA $NHRP_DESTNBMA | openssl x509 -inform der -text -noout | egrep -o "/OU=[^/]*(/[0-9]+)?" | cut -b 5-`
+ if [ -z "`echo "$CERT" | grep "^GRE=$NHRP_DESTADDR"`" ]; then
+ echo "ERROR: IP $NHRP_DESTADDR at $NHRP_DESTNBMA NOT verified"
+ exit 1
+ fi
+ ;;
+peer-down)
+ echo "Delete link from $NHRP_SRCADDR ($NHRP_SRCNBMA) to $NHRP_DESTADDR ($NHRP_DESTNBMA)"
+ racoonctl delete-sa isakmp inet $NHRP_SRCNBMA $NHRP_DESTNBMA
+ ;;
+route-up)
+ echo "Route $NHRP_DESTADDR/$NHRP_DESTPREFIX is up"
+ ip route replace $NHRP_DESTADDR/$NHRP_DESTPREFIX proto 42 via $NHRP_NEXTHOP dev $NHRP_INTERFACE
+ ip route flush cache
+ ;;
+route-down)
+ echo "Route $NHRP_DESTADDR/$NHRP_DESTPREFIX is down"
+ ip route del $NHRP_DESTADDR/$NHRP_DESTPREFIX proto 42
+ ip route flush cache
+ ;;
+esac
+
+exit 0
diff --git a/etc/opennhrp.conf b/etc/opennhrp.conf
new file mode 100644
index 0000000..6451cb0
--- /dev/null
+++ b/etc/opennhrp.conf
@@ -0,0 +1,9 @@
+interface gre1
+ map 10.255.255.1/24 192.168.200.1 register
+ cisco-authentication secret
+ shortcut
+ redirect
+ non-caching
+
+interface lo
+ shortcut-destination
diff --git a/etc/racoon-ph1dead.sh b/etc/racoon-ph1dead.sh
new file mode 100755
index 0000000..942e4c2
--- /dev/null
+++ b/etc/racoon-ph1dead.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+opennhrpctl cache lowerdown nbma $REMOTE_ADDR local-nbma $LOCAL_ADDR
diff --git a/etc/racoon-ph1down.sh b/etc/racoon-ph1down.sh
new file mode 100755
index 0000000..c98d985
--- /dev/null
+++ b/etc/racoon-ph1down.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+# Purge opennhrp entries only if this was the last ISAKMP phase1
+if [ -z "`racoonctl -ll show-sa isakmp | grep "$LOCAL_ADDR\.[0-9]* * $REMOTE_ADDR\.[0-9]* "`" ]; then
+ opennhrpctl cache purge nbma $REMOTE_ADDR local-nbma $LOCAL_ADDR
+fi
diff --git a/libev/LICENSE b/libev/LICENSE
new file mode 100644
index 0000000..df62c4f
--- /dev/null
+++ b/libev/LICENSE
@@ -0,0 +1,36 @@
+All files in libev are Copyright (C)2007,2008 Marc Alexander Lehmann.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Alternatively, the contents of this package may be used under the terms
+of the GNU General Public License ("GPL") version 2 or any later version,
+in which case the provisions of the GPL are applicable instead of the
+above. If you wish to allow the use of your version of this package only
+under the terms of the GPL and not to allow others to use your version of
+this file under the BSD license, indicate your decision by deleting the
+provisions above and replace them with the notice and other provisions
+required by the GPL in this and the other files of this package. If you do
+not delete the provisions above, a recipient may use your version of this
+file under either the BSD or the GPL.
diff --git a/libev/README b/libev/README
new file mode 100644
index 0000000..ca403c6
--- /dev/null
+++ b/libev/README
@@ -0,0 +1,58 @@
+libev is a high-performance event loop/event model with lots of features.
+(see benchmark at http://libev.schmorp.de/bench.html)
+
+
+ABOUT
+
+ Homepage: http://software.schmorp.de/pkg/libev
+ Mailinglist: libev@lists.schmorp.de
+ http://lists.schmorp.de/cgi-bin/mailman/listinfo/libev
+ Library Documentation: http://pod.tst.eu/http://cvs.schmorp.de/libev/ev.pod
+
+ Libev is modelled (very losely) after libevent and the Event perl
+ module, but is faster, scales better and is more correct, and also more
+ featureful. And also smaller. Yay.
+
+ Some of the specialties of libev not commonly found elsewhere are:
+
+ - extensive and detailed, readable documentation (not doxygen garbage).
+ - fully supports fork, can detect fork in various ways and automatically
+ re-arms kernel mechanisms that do not support fork.
+ - highly optimised select, poll, epoll, kqueue and event ports backends.
+ - filesystem object (path) watching (with optional linux inotify support).
+ - wallclock-based times (using absolute time, cron-like).
+ - relative timers/timeouts (handle time jumps).
+ - fast intra-thread communication between multiple
+ event loops (with optional fast linux eventfd backend).
+ - extremely easy to embed.
+ - very small codebase, no bloated library.
+ - fully extensible by being able to plug into the event loop,
+ integrate other event loops, integrate other event loop users.
+ - very little memory use (small watchers, small event loop data).
+ - optional C++ interface allowing method and function callbacks
+ at no extra memory or runtime overhead.
+ - optional Perl interface with similar characteristics (capable
+ of running Glib/Gtk2 on libev, interfaces with Net::SNMP and
+ libadns).
+ - support for other languages (multiple C++ interfaces, D, Ruby,
+ Python) available from third-parties.
+
+ Examples of programs that embed libev: the EV perl module,
+ rxvt-unicode, gvpe (GNU Virtual Private Ethernet), the Deliantra MMORPG
+ server (http://www.deliantra.net/), Rubinius (a next-generation Ruby
+ VM), the Ebb web server, the Rev event toolkit.
+
+
+CONTRIBUTORS
+
+ libev was written and designed by Marc Lehmann and Emanuele Giaquinta.
+
+ The following people sent in patches or made other noteworthy
+ contributions to the design (for minor patches, see the Changes
+ file. If I forgot to include you, please shout at me, it was an
+ accident):
+
+ W.C.A. Wijngaards
+ Christopher Layne
+ Chris Brody
+
diff --git a/libev/VERSION b/libev/VERSION
new file mode 100644
index 0000000..666ac08
--- /dev/null
+++ b/libev/VERSION
@@ -0,0 +1 @@
+libev 3.9
diff --git a/libev/ev.c b/libev/ev.c
new file mode 100644
index 0000000..ccd202b
--- /dev/null
+++ b/libev/ev.c
@@ -0,0 +1,3694 @@
+/*
+ * libev event processing core, watcher management
+ *
+ * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libev@schmorp.de>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* this big block deduces configuration from config.h */
+#ifndef EV_STANDALONE
+# ifdef EV_CONFIG_H
+# include EV_CONFIG_H
+# else
+# include "config.h"
+# endif
+
+# if HAVE_CLOCK_SYSCALL
+# ifndef EV_USE_CLOCK_SYSCALL
+# define EV_USE_CLOCK_SYSCALL 1
+# ifndef EV_USE_REALTIME
+# define EV_USE_REALTIME 0
+# endif
+# ifndef EV_USE_MONOTONIC
+# define EV_USE_MONOTONIC 1
+# endif
+# endif
+# elif !defined(EV_USE_CLOCK_SYSCALL)
+# define EV_USE_CLOCK_SYSCALL 0
+# endif
+
+# if HAVE_CLOCK_GETTIME
+# ifndef EV_USE_MONOTONIC
+# define EV_USE_MONOTONIC 1
+# endif
+# ifndef EV_USE_REALTIME
+# define EV_USE_REALTIME 0
+# endif
+# else
+# ifndef EV_USE_MONOTONIC
+# define EV_USE_MONOTONIC 0
+# endif
+# ifndef EV_USE_REALTIME
+# define EV_USE_REALTIME 0
+# endif
+# endif
+
+# ifndef EV_USE_NANOSLEEP
+# if HAVE_NANOSLEEP
+# define EV_USE_NANOSLEEP 1
+# else
+# define EV_USE_NANOSLEEP 0
+# endif
+# endif
+
+# ifndef EV_USE_SELECT
+# if HAVE_SELECT && HAVE_SYS_SELECT_H
+# define EV_USE_SELECT 1
+# else
+# define EV_USE_SELECT 0
+# endif
+# endif
+
+# ifndef EV_USE_POLL
+# if HAVE_POLL && HAVE_POLL_H
+# define EV_USE_POLL 1
+# else
+# define EV_USE_POLL 0
+# endif
+# endif
+
+# ifndef EV_USE_EPOLL
+# if HAVE_EPOLL_CTL && HAVE_SYS_EPOLL_H
+# define EV_USE_EPOLL 1
+# else
+# define EV_USE_EPOLL 0
+# endif
+# endif
+
+# ifndef EV_USE_KQUEUE
+# if HAVE_KQUEUE && HAVE_SYS_EVENT_H && HAVE_SYS_QUEUE_H
+# define EV_USE_KQUEUE 1
+# else
+# define EV_USE_KQUEUE 0
+# endif
+# endif
+
+# ifndef EV_USE_PORT
+# if HAVE_PORT_H && HAVE_PORT_CREATE
+# define EV_USE_PORT 1
+# else
+# define EV_USE_PORT 0
+# endif
+# endif
+
+# ifndef EV_USE_INOTIFY
+# if HAVE_INOTIFY_INIT && HAVE_SYS_INOTIFY_H
+# define EV_USE_INOTIFY 1
+# else
+# define EV_USE_INOTIFY 0
+# endif
+# endif
+
+# ifndef EV_USE_SIGNALFD
+# if HAVE_SIGNALFD && HAVE_SYS_SIGNALFD_H
+# define EV_USE_SIGNALFD 1
+# else
+# define EV_USE_SIGNALFD 0
+# endif
+# endif
+
+# ifndef EV_USE_EVENTFD
+# if HAVE_EVENTFD
+# define EV_USE_EVENTFD 1
+# else
+# define EV_USE_EVENTFD 0
+# endif
+# endif
+
+#endif
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <stddef.h>
+
+#include <stdio.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <time.h>
+
+#include <signal.h>
+
+#ifdef EV_H
+# include EV_H
+#else
+# include "ev.h"
+#endif
+
+#ifndef _WIN32
+# include <sys/time.h>
+# include <sys/wait.h>
+# include <unistd.h>
+#else
+# include <io.h>
+# define WIN32_LEAN_AND_MEAN
+# include <windows.h>
+# ifndef EV_SELECT_IS_WINSOCKET
+# define EV_SELECT_IS_WINSOCKET 1
+# endif
+#endif
+
+/* this block tries to deduce configuration from header-defined symbols and defaults */
+
+/* try to deduce the maximum number of signals on this platform */
+#if defined (EV_NSIG)
+/* use what's provided */
+#elif defined (NSIG)
+# define EV_NSIG (NSIG)
+#elif defined(_NSIG)
+# define EV_NSIG (_NSIG)
+#elif defined (SIGMAX)
+# define EV_NSIG (SIGMAX+1)
+#elif defined (SIG_MAX)
+# define EV_NSIG (SIG_MAX+1)
+#elif defined (_SIG_MAX)
+# define EV_NSIG (_SIG_MAX+1)
+#elif defined (MAXSIG)
+# define EV_NSIG (MAXSIG+1)
+#elif defined (MAX_SIG)
+# define EV_NSIG (MAX_SIG+1)
+#elif defined (SIGARRAYSIZE)
+# define EV_NSIG SIGARRAYSIZE /* Assume ary[SIGARRAYSIZE] */
+#elif defined (_sys_nsig)
+# define EV_NSIG (_sys_nsig) /* Solaris 2.5 */
+#else
+# error "unable to find value for NSIG, please report"
+/* to make it compile regardless, just remove the above line */
+# define EV_NSIG 65
+#endif
+
+#ifndef EV_USE_CLOCK_SYSCALL
+# if __linux && __GLIBC__ >= 2
+# define EV_USE_CLOCK_SYSCALL 1
+# else
+# define EV_USE_CLOCK_SYSCALL 0
+# endif
+#endif
+
+#ifndef EV_USE_MONOTONIC
+# if defined (_POSIX_MONOTONIC_CLOCK) && _POSIX_MONOTONIC_CLOCK >= 0
+# define EV_USE_MONOTONIC 1
+# else
+# define EV_USE_MONOTONIC 0
+# endif
+#endif
+
+#ifndef EV_USE_REALTIME
+# define EV_USE_REALTIME !EV_USE_CLOCK_SYSCALL
+#endif
+
+#ifndef EV_USE_NANOSLEEP
+# if _POSIX_C_SOURCE >= 199309L
+# define EV_USE_NANOSLEEP 1
+# else
+# define EV_USE_NANOSLEEP 0
+# endif
+#endif
+
+#ifndef EV_USE_SELECT
+# define EV_USE_SELECT 1
+#endif
+
+#ifndef EV_USE_POLL
+# ifdef _WIN32
+# define EV_USE_POLL 0
+# else
+# define EV_USE_POLL 1
+# endif
+#endif
+
+#ifndef EV_USE_EPOLL
+# if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 4))
+# define EV_USE_EPOLL 1
+# else
+# define EV_USE_EPOLL 0
+# endif
+#endif
+
+#ifndef EV_USE_KQUEUE
+# define EV_USE_KQUEUE 0
+#endif
+
+#ifndef EV_USE_PORT
+# define EV_USE_PORT 0
+#endif
+
+#ifndef EV_USE_INOTIFY
+# if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 4))
+# define EV_USE_INOTIFY 1
+# else
+# define EV_USE_INOTIFY 0
+# endif
+#endif
+
+#ifndef EV_PID_HASHSIZE
+# if EV_MINIMAL
+# define EV_PID_HASHSIZE 1
+# else
+# define EV_PID_HASHSIZE 16
+# endif
+#endif
+
+#ifndef EV_INOTIFY_HASHSIZE
+# if EV_MINIMAL
+# define EV_INOTIFY_HASHSIZE 1
+# else
+# define EV_INOTIFY_HASHSIZE 16
+# endif
+#endif
+
+#ifndef EV_USE_EVENTFD
+# if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 7))
+# define EV_USE_EVENTFD 1
+# else
+# define EV_USE_EVENTFD 0
+# endif
+#endif
+
+#ifndef EV_USE_SIGNALFD
+# if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 7))
+# define EV_USE_SIGNALFD 1
+# else
+# define EV_USE_SIGNALFD 0
+# endif
+#endif
+
+#if 0 /* debugging */
+# define EV_VERIFY 3
+# define EV_USE_4HEAP 1
+# define EV_HEAP_CACHE_AT 1
+#endif
+
+#ifndef EV_VERIFY
+# define EV_VERIFY !EV_MINIMAL
+#endif
+
+#ifndef EV_USE_4HEAP
+# define EV_USE_4HEAP !EV_MINIMAL
+#endif
+
+#ifndef EV_HEAP_CACHE_AT
+# define EV_HEAP_CACHE_AT !EV_MINIMAL
+#endif
+
+/* on linux, we can use a (slow) syscall to avoid a dependency on pthread, */
+/* which makes programs even slower. might work on other unices, too. */
+#if EV_USE_CLOCK_SYSCALL
+# include <syscall.h>
+# ifdef SYS_clock_gettime
+# define clock_gettime(id, ts) syscall (SYS_clock_gettime, (id), (ts))
+# undef EV_USE_MONOTONIC
+# define EV_USE_MONOTONIC 1
+# else
+# undef EV_USE_CLOCK_SYSCALL
+# define EV_USE_CLOCK_SYSCALL 0
+# endif
+#endif
+
+/* this block fixes any misconfiguration where we know we run into trouble otherwise */
+
+#ifndef CLOCK_MONOTONIC
+# undef EV_USE_MONOTONIC
+# define EV_USE_MONOTONIC 0
+#endif
+
+#ifndef CLOCK_REALTIME
+# undef EV_USE_REALTIME
+# define EV_USE_REALTIME 0
+#endif
+
+#if !EV_STAT_ENABLE
+# undef EV_USE_INOTIFY
+# define EV_USE_INOTIFY 0
+#endif
+
+#if !EV_USE_NANOSLEEP
+# ifndef _WIN32
+# include <sys/select.h>
+# endif
+#endif
+
+#if EV_USE_INOTIFY
+# include <sys/utsname.h>
+# include <sys/statfs.h>
+# include <sys/inotify.h>
+/* some very old inotify.h headers don't have IN_DONT_FOLLOW */
+# ifndef IN_DONT_FOLLOW
+# undef EV_USE_INOTIFY
+# define EV_USE_INOTIFY 0
+# endif
+#endif
+
+#if EV_SELECT_IS_WINSOCKET
+# include <winsock.h>
+#endif
+
+#if EV_USE_EVENTFD
+/* our minimum requirement is glibc 2.7 which has the stub, but not the header */
+# include <stdint.h>
+# ifndef EFD_NONBLOCK
+# define EFD_NONBLOCK O_NONBLOCK
+# endif
+# ifndef EFD_CLOEXEC
+# ifdef O_CLOEXEC
+# define EFD_CLOEXEC O_CLOEXEC
+# else
+# define EFD_CLOEXEC 02000000
+# endif
+# endif
+# ifdef __cplusplus
+extern "C" {
+# endif
+int eventfd (unsigned int initval, int flags);
+# ifdef __cplusplus
+}
+# endif
+#endif
+
+#if EV_USE_SIGNALFD
+/* our minimum requirement is glibc 2.7 which has the stub, but not the header */
+# include <stdint.h>
+# ifndef SFD_NONBLOCK
+# define SFD_NONBLOCK O_NONBLOCK
+# endif
+# ifndef SFD_CLOEXEC
+# ifdef O_CLOEXEC
+# define SFD_CLOEXEC O_CLOEXEC
+# else
+# define SFD_CLOEXEC 02000000
+# endif
+# endif
+# ifdef __cplusplus
+extern "C" {
+# endif
+int signalfd (int fd, const sigset_t *mask, int flags);
+
+struct signalfd_siginfo
+{
+ uint32_t ssi_signo;
+ char pad[128 - sizeof (uint32_t)];
+};
+# ifdef __cplusplus
+}
+# endif
+#endif
+
+
+/**/
+
+#if EV_VERIFY >= 3
+# define EV_FREQUENT_CHECK ev_loop_verify (EV_A)
+#else
+# define EV_FREQUENT_CHECK do { } while (0)
+#endif
+
+/*
+ * This is used to avoid floating point rounding problems.
+ * It is added to ev_rt_now when scheduling periodics
+ * to ensure progress, time-wise, even when rounding
+ * errors are against us.
+ * This value is good at least till the year 4000.
+ * Better solutions welcome.
+ */
+#define TIME_EPSILON 0.0001220703125 /* 1/8192 */
+
+#define MIN_TIMEJUMP 1. /* minimum timejump that gets detected (if monotonic clock available) */
+#define MAX_BLOCKTIME 59.743 /* never wait longer than this time (to detect time jumps) */
+
+#if __GNUC__ >= 4
+# define expect(expr,value) __builtin_expect ((expr),(value))
+# define noinline __attribute__ ((noinline))
+#else
+# define expect(expr,value) (expr)
+# define noinline
+# if __STDC_VERSION__ < 199901L && __GNUC__ < 2
+# define inline
+# endif
+#endif
+
+#define expect_false(expr) expect ((expr) != 0, 0)
+#define expect_true(expr) expect ((expr) != 0, 1)
+#define inline_size static inline
+
+#if EV_MINIMAL
+# define inline_speed static noinline
+#else
+# define inline_speed static inline
+#endif
+
+#define NUMPRI (EV_MAXPRI - EV_MINPRI + 1)
+
+#if EV_MINPRI == EV_MAXPRI
+# define ABSPRI(w) (((W)w), 0)
+#else
+# define ABSPRI(w) (((W)w)->priority - EV_MINPRI)
+#endif
+
+#define EMPTY /* required for microsofts broken pseudo-c compiler */
+#define EMPTY2(a,b) /* used to suppress some warnings */
+
+typedef ev_watcher *W;
+typedef ev_watcher_list *WL;
+typedef ev_watcher_time *WT;
+
+#define ev_active(w) ((W)(w))->active
+#define ev_at(w) ((WT)(w))->at
+
+#if EV_USE_REALTIME
+/* sig_atomic_t is used to avoid per-thread variables or locking but still */
+/* giving it a reasonably high chance of working on typical architetcures */
+static EV_ATOMIC_T have_realtime; /* did clock_gettime (CLOCK_REALTIME) work? */
+#endif
+
+#if EV_USE_MONOTONIC
+static EV_ATOMIC_T have_monotonic; /* did clock_gettime (CLOCK_MONOTONIC) work? */
+#endif
+
+#ifndef EV_FD_TO_WIN32_HANDLE
+# define EV_FD_TO_WIN32_HANDLE(fd) _get_osfhandle (fd)
+#endif
+#ifndef EV_WIN32_HANDLE_TO_FD
+# define EV_WIN32_HANDLE_TO_FD(handle) _open_osfhandle (fd, 0)
+#endif
+#ifndef EV_WIN32_CLOSE_FD
+# define EV_WIN32_CLOSE_FD(fd) close (fd)
+#endif
+
+#ifdef _WIN32
+# include "ev_win32.c"
+#endif
+
+/*****************************************************************************/
+
+static void (*syserr_cb)(const char *msg);
+
+void
+ev_set_syserr_cb (void (*cb)(const char *msg))
+{
+ syserr_cb = cb;
+}
+
+static void noinline
+ev_syserr (const char *msg)
+{
+ if (!msg)
+ msg = "(libev) system error";
+
+ if (syserr_cb)
+ syserr_cb (msg);
+ else
+ {
+ perror (msg);
+ abort ();
+ }
+}
+
+static void *
+ev_realloc_emul (void *ptr, long size)
+{
+ /* some systems, notably openbsd and darwin, fail to properly
+ * implement realloc (x, 0) (as required by both ansi c-98 and
+ * the single unix specification, so work around them here.
+ */
+
+ if (size)
+ return realloc (ptr, size);
+
+ free (ptr);
+ return 0;
+}
+
+static void *(*alloc)(void *ptr, long size) = ev_realloc_emul;
+
+void
+ev_set_allocator (void *(*cb)(void *ptr, long size))
+{
+ alloc = cb;
+}
+
+inline_speed void *
+ev_realloc (void *ptr, long size)
+{
+ ptr = alloc (ptr, size);
+
+ if (!ptr && size)
+ {
+ fprintf (stderr, "libev: cannot allocate %ld bytes, aborting.", size);
+ abort ();
+ }
+
+ return ptr;
+}
+
+#define ev_malloc(size) ev_realloc (0, (size))
+#define ev_free(ptr) ev_realloc ((ptr), 0)
+
+/*****************************************************************************/
+
+/* set in reify when reification needed */
+#define EV_ANFD_REIFY 1
+
+/* file descriptor info structure */
+typedef struct
+{
+ WL head;
+ unsigned char events; /* the events watched for */
+ unsigned char reify; /* flag set when this ANFD needs reification (EV_ANFD_REIFY, EV__IOFDSET) */
+ unsigned char emask; /* the epoll backend stores the actual kernel mask in here */
+ unsigned char unused;
+#if EV_USE_EPOLL
+ unsigned int egen; /* generation counter to counter epoll bugs */
+#endif
+#if EV_SELECT_IS_WINSOCKET
+ SOCKET handle;
+#endif
+} ANFD;
+
+/* stores the pending event set for a given watcher */
+typedef struct
+{
+ W w;
+ int events; /* the pending event set for the given watcher */
+} ANPENDING;
+
+#if EV_USE_INOTIFY
+/* hash table entry per inotify-id */
+typedef struct
+{
+ WL head;
+} ANFS;
+#endif
+
+/* Heap Entry */
+#if EV_HEAP_CACHE_AT
+ /* a heap element */
+ typedef struct {
+ ev_tstamp at;
+ WT w;
+ } ANHE;
+
+ #define ANHE_w(he) (he).w /* access watcher, read-write */
+ #define ANHE_at(he) (he).at /* access cached at, read-only */
+ #define ANHE_at_cache(he) (he).at = (he).w->at /* update at from watcher */
+#else
+ /* a heap element */
+ typedef WT ANHE;
+
+ #define ANHE_w(he) (he)
+ #define ANHE_at(he) (he)->at
+ #define ANHE_at_cache(he)
+#endif
+
+#if EV_MULTIPLICITY
+
+ struct ev_loop
+ {
+ ev_tstamp ev_rt_now;
+ #define ev_rt_now ((loop)->ev_rt_now)
+ #define VAR(name,decl) decl;
+ #include "ev_vars.h"
+ #undef VAR
+ };
+ #include "ev_wrap.h"
+
+ static struct ev_loop default_loop_struct;
+ struct ev_loop *ev_default_loop_ptr;
+
+#else
+
+ ev_tstamp ev_rt_now;
+ #define VAR(name,decl) static decl;
+ #include "ev_vars.h"
+ #undef VAR
+
+ static int ev_default_loop_ptr;
+
+#endif
+
+#if EV_MINIMAL < 2
+# define EV_RELEASE_CB if (expect_false (release_cb)) release_cb (EV_A)
+# define EV_ACQUIRE_CB if (expect_false (acquire_cb)) acquire_cb (EV_A)
+# define EV_INVOKE_PENDING invoke_cb (EV_A)
+#else
+# define EV_RELEASE_CB (void)0
+# define EV_ACQUIRE_CB (void)0
+# define EV_INVOKE_PENDING ev_invoke_pending (EV_A)
+#endif
+
+#define EVUNLOOP_RECURSE 0x80
+
+/*****************************************************************************/
+
+#ifndef EV_HAVE_EV_TIME
+ev_tstamp
+ev_time (void)
+{
+#if EV_USE_REALTIME
+ if (expect_true (have_realtime))
+ {
+ struct timespec ts;
+ clock_gettime (CLOCK_REALTIME, &ts);
+ return ts.tv_sec + ts.tv_nsec * 1e-9;
+ }
+#endif
+
+ struct timeval tv;
+ gettimeofday (&tv, 0);
+ return tv.tv_sec + tv.tv_usec * 1e-6;
+}
+#endif
+
+inline_size ev_tstamp
+get_clock (void)
+{
+#if EV_USE_MONOTONIC
+ if (expect_true (have_monotonic))
+ {
+ struct timespec ts;
+ clock_gettime (CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec + ts.tv_nsec * 1e-9;
+ }
+#endif
+
+ return ev_time ();
+}
+
+#if EV_MULTIPLICITY
+ev_tstamp
+ev_now (EV_P)
+{
+ return ev_rt_now;
+}
+#endif
+
+void
+ev_sleep (ev_tstamp delay)
+{
+ if (delay > 0.)
+ {
+#if EV_USE_NANOSLEEP
+ struct timespec ts;
+
+ ts.tv_sec = (time_t)delay;
+ ts.tv_nsec = (long)((delay - (ev_tstamp)(ts.tv_sec)) * 1e9);
+
+ nanosleep (&ts, 0);
+#elif defined(_WIN32)
+ Sleep ((unsigned long)(delay * 1e3));
+#else
+ struct timeval tv;
+
+ tv.tv_sec = (time_t)delay;
+ tv.tv_usec = (long)((delay - (ev_tstamp)(tv.tv_sec)) * 1e6);
+
+ /* here we rely on sys/time.h + sys/types.h + unistd.h providing select */
+ /* something not guaranteed by newer posix versions, but guaranteed */
+ /* by older ones */
+ select (0, 0, 0, 0, &tv);
+#endif
+ }
+}
+
+/*****************************************************************************/
+
+#define MALLOC_ROUND 4096 /* prefer to allocate in chunks of this size, must be 2**n and >> 4 longs */
+
+/* find a suitable new size for the given array, */
+/* hopefully by rounding to a ncie-to-malloc size */
+inline_size int
+array_nextsize (int elem, int cur, int cnt)
+{
+ int ncur = cur + 1;
+
+ do
+ ncur <<= 1;
+ while (cnt > ncur);
+
+ /* if size is large, round to MALLOC_ROUND - 4 * longs to accomodate malloc overhead */
+ if (elem * ncur > MALLOC_ROUND - sizeof (void *) * 4)
+ {
+ ncur *= elem;
+ ncur = (ncur + elem + (MALLOC_ROUND - 1) + sizeof (void *) * 4) & ~(MALLOC_ROUND - 1);
+ ncur = ncur - sizeof (void *) * 4;
+ ncur /= elem;
+ }
+
+ return ncur;
+}
+
+static noinline void *
+array_realloc (int elem, void *base, int *cur, int cnt)
+{
+ *cur = array_nextsize (elem, *cur, cnt);
+ return ev_realloc (base, elem * *cur);
+}
+
+#define array_init_zero(base,count) \
+ memset ((void *)(base), 0, sizeof (*(base)) * (count))
+
+#define array_needsize(type,base,cur,cnt,init) \
+ if (expect_false ((cnt) > (cur))) \
+ { \
+ int ocur_ = (cur); \
+ (base) = (type *)array_realloc \
+ (sizeof (type), (base), &(cur), (cnt)); \
+ init ((base) + (ocur_), (cur) - ocur_); \
+ }
+
+#if 0
+#define array_slim(type,stem) \
+ if (stem ## max < array_roundsize (stem ## cnt >> 2)) \
+ { \
+ stem ## max = array_roundsize (stem ## cnt >> 1); \
+ base = (type *)ev_realloc (base, sizeof (type) * (stem ## max));\
+ fprintf (stderr, "slimmed down " # stem " to %d\n", stem ## max);/*D*/\
+ }
+#endif
+
+#define array_free(stem, idx) \
+ ev_free (stem ## s idx); stem ## cnt idx = stem ## max idx = 0; stem ## s idx = 0
+
+/*****************************************************************************/
+
+/* dummy callback for pending events */
+static void noinline
+pendingcb (EV_P_ ev_prepare *w, int revents)
+{
+}
+
+void noinline
+ev_feed_event (EV_P_ void *w, int revents)
+{
+ W w_ = (W)w;
+ int pri = ABSPRI (w_);
+
+ if (expect_false (w_->pending))
+ pendings [pri][w_->pending - 1].events |= revents;
+ else
+ {
+ w_->pending = ++pendingcnt [pri];
+ array_needsize (ANPENDING, pendings [pri], pendingmax [pri], w_->pending, EMPTY2);
+ pendings [pri][w_->pending - 1].w = w_;
+ pendings [pri][w_->pending - 1].events = revents;
+ }
+}
+
+inline_speed void
+feed_reverse (EV_P_ W w)
+{
+ array_needsize (W, rfeeds, rfeedmax, rfeedcnt + 1, EMPTY2);
+ rfeeds [rfeedcnt++] = w;
+}
+
+inline_size void
+feed_reverse_done (EV_P_ int revents)
+{
+ do
+ ev_feed_event (EV_A_ rfeeds [--rfeedcnt], revents);
+ while (rfeedcnt);
+}
+
+inline_speed void
+queue_events (EV_P_ W *events, int eventcnt, int type)
+{
+ int i;
+
+ for (i = 0; i < eventcnt; ++i)
+ ev_feed_event (EV_A_ events [i], type);
+}
+
+/*****************************************************************************/
+
+inline_speed void
+fd_event_nc (EV_P_ int fd, int revents)
+{
+ ANFD *anfd = anfds + fd;
+ ev_io *w;
+
+ for (w = (ev_io *)anfd->head; w; w = (ev_io *)((WL)w)->next)
+ {
+ int ev = w->events & revents;
+
+ if (ev)
+ ev_feed_event (EV_A_ (W)w, ev);
+ }
+}
+
+/* do not submit kernel events for fds that have reify set */
+/* because that means they changed while we were polling for new events */
+inline_speed void
+fd_event (EV_P_ int fd, int revents)
+{
+ ANFD *anfd = anfds + fd;
+
+ if (expect_true (!anfd->reify))
+ fd_event_nc (EV_A_ fd, revents);
+}
+
+void
+ev_feed_fd_event (EV_P_ int fd, int revents)
+{
+ if (fd >= 0 && fd < anfdmax)
+ fd_event_nc (EV_A_ fd, revents);
+}
+
+/* make sure the external fd watch events are in-sync */
+/* with the kernel/libev internal state */
+inline_size void
+fd_reify (EV_P)
+{
+ int i;
+
+ for (i = 0; i < fdchangecnt; ++i)
+ {
+ int fd = fdchanges [i];
+ ANFD *anfd = anfds + fd;
+ ev_io *w;
+
+ unsigned char events = 0;
+
+ for (w = (ev_io *)anfd->head; w; w = (ev_io *)((WL)w)->next)
+ events |= (unsigned char)w->events;
+
+#if EV_SELECT_IS_WINSOCKET
+ if (events)
+ {
+ unsigned long arg;
+ anfd->handle = EV_FD_TO_WIN32_HANDLE (fd);
+ assert (("libev: only socket fds supported in this configuration", ioctlsocket (anfd->handle, FIONREAD, &arg) == 0));
+ }
+#endif
+
+ {
+ unsigned char o_events = anfd->events;
+ unsigned char o_reify = anfd->reify;
+
+ anfd->reify = 0;
+ anfd->events = events;
+
+ if (o_events != events || o_reify & EV__IOFDSET)
+ backend_modify (EV_A_ fd, o_events, events);
+ }
+ }
+
+ fdchangecnt = 0;
+}
+
+/* something about the given fd changed */
+inline_size void
+fd_change (EV_P_ int fd, int flags)
+{
+ unsigned char reify = anfds [fd].reify;
+ anfds [fd].reify |= flags;
+
+ if (expect_true (!reify))
+ {
+ ++fdchangecnt;
+ array_needsize (int, fdchanges, fdchangemax, fdchangecnt, EMPTY2);
+ fdchanges [fdchangecnt - 1] = fd;
+ }
+}
+
+/* the given fd is invalid/unusable, so make sure it doesn't hurt us anymore */
+inline_speed void
+fd_kill (EV_P_ int fd)
+{
+ ev_io *w;
+
+ while ((w = (ev_io *)anfds [fd].head))
+ {
+ ev_io_stop (EV_A_ w);
+ ev_feed_event (EV_A_ (W)w, EV_ERROR | EV_READ | EV_WRITE);
+ }
+}
+
+/* check whether the given fd is atcually valid, for error recovery */
+inline_size int
+fd_valid (int fd)
+{
+#ifdef _WIN32
+ return _get_osfhandle (fd) != -1;
+#else
+ return fcntl (fd, F_GETFD) != -1;
+#endif
+}
+
+/* called on EBADF to verify fds */
+static void noinline
+fd_ebadf (EV_P)
+{
+ int fd;
+
+ for (fd = 0; fd < anfdmax; ++fd)
+ if (anfds [fd].events)
+ if (!fd_valid (fd) && errno == EBADF)
+ fd_kill (EV_A_ fd);
+}
+
+/* called on ENOMEM in select/poll to kill some fds and retry */
+static void noinline
+fd_enomem (EV_P)
+{
+ int fd;
+
+ for (fd = anfdmax; fd--; )
+ if (anfds [fd].events)
+ {
+ fd_kill (EV_A_ fd);
+ break;
+ }
+}
+
+/* usually called after fork if backend needs to re-arm all fds from scratch */
+static void noinline
+fd_rearm_all (EV_P)
+{
+ int fd;
+
+ for (fd = 0; fd < anfdmax; ++fd)
+ if (anfds [fd].events)
+ {
+ anfds [fd].events = 0;
+ anfds [fd].emask = 0;
+ fd_change (EV_A_ fd, EV__IOFDSET | EV_ANFD_REIFY);
+ }
+}
+
+/*****************************************************************************/
+
+/*
+ * the heap functions want a real array index. array index 0 uis guaranteed to not
+ * be in-use at any time. the first heap entry is at array [HEAP0]. DHEAP gives
+ * the branching factor of the d-tree.
+ */
+
+/*
+ * at the moment we allow libev the luxury of two heaps,
+ * a small-code-size 2-heap one and a ~1.5kb larger 4-heap
+ * which is more cache-efficient.
+ * the difference is about 5% with 50000+ watchers.
+ */
+#if EV_USE_4HEAP
+
+#define DHEAP 4
+#define HEAP0 (DHEAP - 1) /* index of first element in heap */
+#define HPARENT(k) ((((k) - HEAP0 - 1) / DHEAP) + HEAP0)
+#define UPHEAP_DONE(p,k) ((p) == (k))
+
+/* away from the root */
+inline_speed void
+downheap (ANHE *heap, int N, int k)
+{
+ ANHE he = heap [k];
+ ANHE *E = heap + N + HEAP0;
+
+ for (;;)
+ {
+ ev_tstamp minat;
+ ANHE *minpos;
+ ANHE *pos = heap + DHEAP * (k - HEAP0) + HEAP0 + 1;
+
+ /* find minimum child */
+ if (expect_true (pos + DHEAP - 1 < E))
+ {
+ /* fast path */ (minpos = pos + 0), (minat = ANHE_at (*minpos));
+ if ( ANHE_at (pos [1]) < minat) (minpos = pos + 1), (minat = ANHE_at (*minpos));
+ if ( ANHE_at (pos [2]) < minat) (minpos = pos + 2), (minat = ANHE_at (*minpos));
+ if ( ANHE_at (pos [3]) < minat) (minpos = pos + 3), (minat = ANHE_at (*minpos));
+ }
+ else if (pos < E)
+ {
+ /* slow path */ (minpos = pos + 0), (minat = ANHE_at (*minpos));
+ if (pos + 1 < E && ANHE_at (pos [1]) < minat) (minpos = pos + 1), (minat = ANHE_at (*minpos));
+ if (pos + 2 < E && ANHE_at (pos [2]) < minat) (minpos = pos + 2), (minat = ANHE_at (*minpos));
+ if (pos + 3 < E && ANHE_at (pos [3]) < minat) (minpos = pos + 3), (minat = ANHE_at (*minpos));
+ }
+ else
+ break;
+
+ if (ANHE_at (he) <= minat)
+ break;
+
+ heap [k] = *minpos;
+ ev_active (ANHE_w (*minpos)) = k;
+
+ k = minpos - heap;
+ }
+
+ heap [k] = he;
+ ev_active (ANHE_w (he)) = k;
+}
+
+#else /* 4HEAP */
+
+#define HEAP0 1
+#define HPARENT(k) ((k) >> 1)
+#define UPHEAP_DONE(p,k) (!(p))
+
+/* away from the root */
+inline_speed void
+downheap (ANHE *heap, int N, int k)
+{
+ ANHE he = heap [k];
+
+ for (;;)
+ {
+ int c = k << 1;
+
+ if (c >= N + HEAP0)
+ break;
+
+ c += c + 1 < N + HEAP0 && ANHE_at (heap [c]) > ANHE_at (heap [c + 1])
+ ? 1 : 0;
+
+ if (ANHE_at (he) <= ANHE_at (heap [c]))
+ break;
+
+ heap [k] = heap [c];
+ ev_active (ANHE_w (heap [k])) = k;
+
+ k = c;
+ }
+
+ heap [k] = he;
+ ev_active (ANHE_w (he)) = k;
+}
+#endif
+
+/* towards the root */
+inline_speed void
+upheap (ANHE *heap, int k)
+{
+ ANHE he = heap [k];
+
+ for (;;)
+ {
+ int p = HPARENT (k);
+
+ if (UPHEAP_DONE (p, k) || ANHE_at (heap [p]) <= ANHE_at (he))
+ break;
+
+ heap [k] = heap [p];
+ ev_active (ANHE_w (heap [k])) = k;
+ k = p;
+ }
+
+ heap [k] = he;
+ ev_active (ANHE_w (he)) = k;
+}
+
+/* move an element suitably so it is in a correct place */
+inline_size void
+adjustheap (ANHE *heap, int N, int k)
+{
+ if (k > HEAP0 && ANHE_at (heap [k]) <= ANHE_at (heap [HPARENT (k)]))
+ upheap (heap, k);
+ else
+ downheap (heap, N, k);
+}
+
+/* rebuild the heap: this function is used only once and executed rarely */
+inline_size void
+reheap (ANHE *heap, int N)
+{
+ int i;
+
+ /* we don't use floyds algorithm, upheap is simpler and is more cache-efficient */
+ /* also, this is easy to implement and correct for both 2-heaps and 4-heaps */
+ for (i = 0; i < N; ++i)
+ upheap (heap, i + HEAP0);
+}
+
+/*****************************************************************************/
+
+/* associate signal watchers to a signal signal */
+typedef struct
+{
+ EV_ATOMIC_T pending;
+#if EV_MULTIPLICITY
+ EV_P;
+#endif
+ WL head;
+} ANSIG;
+
+static ANSIG signals [EV_NSIG - 1];
+
+/*****************************************************************************/
+
+/* used to prepare libev internal fd's */
+/* this is not fork-safe */
+inline_speed void
+fd_intern (int fd)
+{
+#ifdef _WIN32
+ unsigned long arg = 1;
+ ioctlsocket (_get_osfhandle (fd), FIONBIO, &arg);
+#else
+ fcntl (fd, F_SETFD, FD_CLOEXEC);
+ fcntl (fd, F_SETFL, O_NONBLOCK);
+#endif
+}
+
+static void noinline
+evpipe_init (EV_P)
+{
+ if (!ev_is_active (&pipe_w))
+ {
+#if EV_USE_EVENTFD
+ evfd = eventfd (0, EFD_NONBLOCK | EFD_CLOEXEC);
+ if (evfd < 0 && errno == EINVAL)
+ evfd = eventfd (0, 0);
+
+ if (evfd >= 0)
+ {
+ evpipe [0] = -1;
+ fd_intern (evfd); /* doing it twice doesn't hurt */
+ ev_io_set (&pipe_w, evfd, EV_READ);
+ }
+ else
+#endif
+ {
+ while (pipe (evpipe))
+ ev_syserr ("(libev) error creating signal/async pipe");
+
+ fd_intern (evpipe [0]);
+ fd_intern (evpipe [1]);
+ ev_io_set (&pipe_w, evpipe [0], EV_READ);
+ }
+
+ ev_io_start (EV_A_ &pipe_w);
+ ev_unref (EV_A); /* watcher should not keep loop alive */
+ }
+}
+
+inline_size void
+evpipe_write (EV_P_ EV_ATOMIC_T *flag)
+{
+ if (!*flag)
+ {
+ int old_errno = errno; /* save errno because write might clobber it */
+
+ *flag = 1;
+
+#if EV_USE_EVENTFD
+ if (evfd >= 0)
+ {
+ uint64_t counter = 1;
+ write (evfd, &counter, sizeof (uint64_t));
+ }
+ else
+#endif
+ write (evpipe [1], &old_errno, 1);
+
+ errno = old_errno;
+ }
+}
+
+/* called whenever the libev signal pipe */
+/* got some events (signal, async) */
+static void
+pipecb (EV_P_ ev_io *iow, int revents)
+{
+ int i;
+
+#if EV_USE_EVENTFD
+ if (evfd >= 0)
+ {
+ uint64_t counter;
+ read (evfd, &counter, sizeof (uint64_t));
+ }
+ else
+#endif
+ {
+ char dummy;
+ read (evpipe [0], &dummy, 1);
+ }
+
+ if (sig_pending)
+ {
+ sig_pending = 0;
+
+ for (i = EV_NSIG - 1; i--; )
+ if (expect_false (signals [i].pending))
+ ev_feed_signal_event (EV_A_ i + 1);
+ }
+
+#if EV_ASYNC_ENABLE
+ if (async_pending)
+ {
+ async_pending = 0;
+
+ for (i = asynccnt; i--; )
+ if (asyncs [i]->sent)
+ {
+ asyncs [i]->sent = 0;
+ ev_feed_event (EV_A_ asyncs [i], EV_ASYNC);
+ }
+ }
+#endif
+}
+
+/*****************************************************************************/
+
+static void
+ev_sighandler (int signum)
+{
+#if EV_MULTIPLICITY
+ EV_P = signals [signum - 1].loop;
+#endif
+
+#if _WIN32
+ signal (signum, ev_sighandler);
+#endif
+
+ signals [signum - 1].pending = 1;
+ evpipe_write (EV_A_ &sig_pending);
+}
+
+void noinline
+ev_feed_signal_event (EV_P_ int signum)
+{
+ WL w;
+
+ if (expect_false (signum <= 0 || signum > EV_NSIG))
+ return;
+
+ --signum;
+
+#if EV_MULTIPLICITY
+ /* it is permissible to try to feed a signal to the wrong loop */
+ /* or, likely more useful, feeding a signal nobody is waiting for */
+
+ if (expect_false (signals [signum].loop != EV_A))
+ return;
+#endif
+
+ signals [signum].pending = 0;
+
+ for (w = signals [signum].head; w; w = w->next)
+ ev_feed_event (EV_A_ (W)w, EV_SIGNAL);
+}
+
+#if EV_USE_SIGNALFD
+static void
+sigfdcb (EV_P_ ev_io *iow, int revents)
+{
+ struct signalfd_siginfo si[2], *sip; /* these structs are big */
+
+ for (;;)
+ {
+ ssize_t res = read (sigfd, si, sizeof (si));
+
+ /* not ISO-C, as res might be -1, but works with SuS */
+ for (sip = si; (char *)sip < (char *)si + res; ++sip)
+ ev_feed_signal_event (EV_A_ sip->ssi_signo);
+
+ if (res < (ssize_t)sizeof (si))
+ break;
+ }
+}
+#endif
+
+/*****************************************************************************/
+
+static WL childs [EV_PID_HASHSIZE];
+
+#ifndef _WIN32
+
+static ev_signal childev;
+
+#ifndef WIFCONTINUED
+# define WIFCONTINUED(status) 0
+#endif
+
+/* handle a single child status event */
+inline_speed void
+child_reap (EV_P_ int chain, int pid, int status)
+{
+ ev_child *w;
+ int traced = WIFSTOPPED (status) || WIFCONTINUED (status);
+
+ for (w = (ev_child *)childs [chain & (EV_PID_HASHSIZE - 1)]; w; w = (ev_child *)((WL)w)->next)
+ {
+ if ((w->pid == pid || !w->pid)
+ && (!traced || (w->flags & 1)))
+ {
+ ev_set_priority (w, EV_MAXPRI); /* need to do it *now*, this *must* be the same prio as the signal watcher itself */
+ w->rpid = pid;
+ w->rstatus = status;
+ ev_feed_event (EV_A_ (W)w, EV_CHILD);
+ }
+ }
+}
+
+#ifndef WCONTINUED
+# define WCONTINUED 0
+#endif
+
+/* called on sigchld etc., calls waitpid */
+static void
+childcb (EV_P_ ev_signal *sw, int revents)
+{
+ int pid, status;
+
+ /* some systems define WCONTINUED but then fail to support it (linux 2.4) */
+ if (0 >= (pid = waitpid (-1, &status, WNOHANG | WUNTRACED | WCONTINUED)))
+ if (!WCONTINUED
+ || errno != EINVAL
+ || 0 >= (pid = waitpid (-1, &status, WNOHANG | WUNTRACED)))
+ return;
+
+ /* make sure we are called again until all children have been reaped */
+ /* we need to do it this way so that the callback gets called before we continue */
+ ev_feed_event (EV_A_ (W)sw, EV_SIGNAL);
+
+ child_reap (EV_A_ pid, pid, status);
+ if (EV_PID_HASHSIZE > 1)
+ child_reap (EV_A_ 0, pid, status); /* this might trigger a watcher twice, but feed_event catches that */
+}
+
+#endif
+
+/*****************************************************************************/
+
+#if EV_USE_PORT
+# include "ev_port.c"
+#endif
+#if EV_USE_KQUEUE
+# include "ev_kqueue.c"
+#endif
+#if EV_USE_EPOLL
+# include "ev_epoll.c"
+#endif
+#if EV_USE_POLL
+# include "ev_poll.c"
+#endif
+#if EV_USE_SELECT
+# include "ev_select.c"
+#endif
+
+int
+ev_version_major (void)
+{
+ return EV_VERSION_MAJOR;
+}
+
+int
+ev_version_minor (void)
+{
+ return EV_VERSION_MINOR;
+}
+
+/* return true if we are running with elevated privileges and should ignore env variables */
+int inline_size
+enable_secure (void)
+{
+#ifdef _WIN32
+ return 0;
+#else
+ return getuid () != geteuid ()
+ || getgid () != getegid ();
+#endif
+}
+
+unsigned int
+ev_supported_backends (void)
+{
+ unsigned int flags = 0;
+
+ if (EV_USE_PORT ) flags |= EVBACKEND_PORT;
+ if (EV_USE_KQUEUE) flags |= EVBACKEND_KQUEUE;
+ if (EV_USE_EPOLL ) flags |= EVBACKEND_EPOLL;
+ if (EV_USE_POLL ) flags |= EVBACKEND_POLL;
+ if (EV_USE_SELECT) flags |= EVBACKEND_SELECT;
+
+ return flags;
+}
+
+unsigned int
+ev_recommended_backends (void)
+{
+ unsigned int flags = ev_supported_backends ();
+
+#ifndef __NetBSD__
+ /* kqueue is borked on everything but netbsd apparently */
+ /* it usually doesn't work correctly on anything but sockets and pipes */
+ flags &= ~EVBACKEND_KQUEUE;
+#endif
+#ifdef __APPLE__
+ /* only select works correctly on that "unix-certified" platform */
+ flags &= ~EVBACKEND_KQUEUE; /* horribly broken, even for sockets */
+ flags &= ~EVBACKEND_POLL; /* poll is based on kqueue from 10.5 onwards */
+#endif
+
+ return flags;
+}
+
+unsigned int
+ev_embeddable_backends (void)
+{
+ int flags = EVBACKEND_EPOLL | EVBACKEND_KQUEUE | EVBACKEND_PORT;
+
+ /* epoll embeddability broken on all linux versions up to at least 2.6.23 */
+ /* please fix it and tell me how to detect the fix */
+ flags &= ~EVBACKEND_EPOLL;
+
+ return flags;
+}
+
+unsigned int
+ev_backend (EV_P)
+{
+ return backend;
+}
+
+#if EV_MINIMAL < 2
+unsigned int
+ev_loop_count (EV_P)
+{
+ return loop_count;
+}
+
+unsigned int
+ev_loop_depth (EV_P)
+{
+ return loop_depth;
+}
+
+void
+ev_set_io_collect_interval (EV_P_ ev_tstamp interval)
+{
+ io_blocktime = interval;
+}
+
+void
+ev_set_timeout_collect_interval (EV_P_ ev_tstamp interval)
+{
+ timeout_blocktime = interval;
+}
+
+void
+ev_set_userdata (EV_P_ void *data)
+{
+ userdata = data;
+}
+
+void *
+ev_userdata (EV_P)
+{
+ return userdata;
+}
+
+void ev_set_invoke_pending_cb (EV_P_ void (*invoke_pending_cb)(EV_P))
+{
+ invoke_cb = invoke_pending_cb;
+}
+
+void ev_set_loop_release_cb (EV_P_ void (*release)(EV_P), void (*acquire)(EV_P))
+{
+ release_cb = release;
+ acquire_cb = acquire;
+}
+#endif
+
+/* initialise a loop structure, must be zero-initialised */
+static void noinline
+loop_init (EV_P_ unsigned int flags)
+{
+ if (!backend)
+ {
+#if EV_USE_REALTIME
+ if (!have_realtime)
+ {
+ struct timespec ts;
+
+ if (!clock_gettime (CLOCK_REALTIME, &ts))
+ have_realtime = 1;
+ }
+#endif
+
+#if EV_USE_MONOTONIC
+ if (!have_monotonic)
+ {
+ struct timespec ts;
+
+ if (!clock_gettime (CLOCK_MONOTONIC, &ts))
+ have_monotonic = 1;
+ }
+#endif
+
+ /* pid check not overridable via env */
+#ifndef _WIN32
+ if (flags & EVFLAG_FORKCHECK)
+ curpid = getpid ();
+#endif
+
+ if (!(flags & EVFLAG_NOENV)
+ && !enable_secure ()
+ && getenv ("LIBEV_FLAGS"))
+ flags = atoi (getenv ("LIBEV_FLAGS"));
+
+ ev_rt_now = ev_time ();
+ mn_now = get_clock ();
+ now_floor = mn_now;
+ rtmn_diff = ev_rt_now - mn_now;
+#if EV_MINIMAL < 2
+ invoke_cb = ev_invoke_pending;
+#endif
+
+ io_blocktime = 0.;
+ timeout_blocktime = 0.;
+ backend = 0;
+ backend_fd = -1;
+ sig_pending = 0;
+#if EV_ASYNC_ENABLE
+ async_pending = 0;
+#endif
+#if EV_USE_INOTIFY
+ fs_fd = flags & EVFLAG_NOINOTIFY ? -1 : -2;
+#endif
+#if EV_USE_SIGNALFD
+ sigfd = flags & EVFLAG_SIGNALFD ? -2 : -1;
+#endif
+
+ if (!(flags & 0x0000ffffU))
+ flags |= ev_recommended_backends ();
+
+#if EV_USE_PORT
+ if (!backend && (flags & EVBACKEND_PORT )) backend = port_init (EV_A_ flags);
+#endif
+#if EV_USE_KQUEUE
+ if (!backend && (flags & EVBACKEND_KQUEUE)) backend = kqueue_init (EV_A_ flags);
+#endif
+#if EV_USE_EPOLL
+ if (!backend && (flags & EVBACKEND_EPOLL )) backend = epoll_init (EV_A_ flags);
+#endif
+#if EV_USE_POLL
+ if (!backend && (flags & EVBACKEND_POLL )) backend = poll_init (EV_A_ flags);
+#endif
+#if EV_USE_SELECT
+ if (!backend && (flags & EVBACKEND_SELECT)) backend = select_init (EV_A_ flags);
+#endif
+
+ ev_prepare_init (&pending_w, pendingcb);
+
+ ev_init (&pipe_w, pipecb);
+ ev_set_priority (&pipe_w, EV_MAXPRI);
+ }
+}
+
+/* free up a loop structure */
+static void noinline
+loop_destroy (EV_P)
+{
+ int i;
+
+ if (ev_is_active (&pipe_w))
+ {
+ /*ev_ref (EV_A);*/
+ /*ev_io_stop (EV_A_ &pipe_w);*/
+
+#if EV_USE_EVENTFD
+ if (evfd >= 0)
+ close (evfd);
+#endif
+
+ if (evpipe [0] >= 0)
+ {
+ EV_WIN32_CLOSE_FD (evpipe [0]);
+ EV_WIN32_CLOSE_FD (evpipe [1]);
+ }
+ }
+
+#if EV_USE_SIGNALFD
+ if (ev_is_active (&sigfd_w))
+ close (sigfd);
+#endif
+
+#if EV_USE_INOTIFY
+ if (fs_fd >= 0)
+ close (fs_fd);
+#endif
+
+ if (backend_fd >= 0)
+ close (backend_fd);
+
+#if EV_USE_PORT
+ if (backend == EVBACKEND_PORT ) port_destroy (EV_A);
+#endif
+#if EV_USE_KQUEUE
+ if (backend == EVBACKEND_KQUEUE) kqueue_destroy (EV_A);
+#endif
+#if EV_USE_EPOLL
+ if (backend == EVBACKEND_EPOLL ) epoll_destroy (EV_A);
+#endif
+#if EV_USE_POLL
+ if (backend == EVBACKEND_POLL ) poll_destroy (EV_A);
+#endif
+#if EV_USE_SELECT
+ if (backend == EVBACKEND_SELECT) select_destroy (EV_A);
+#endif
+
+ for (i = NUMPRI; i--; )
+ {
+ array_free (pending, [i]);
+#if EV_IDLE_ENABLE
+ array_free (idle, [i]);
+#endif
+ }
+
+ ev_free (anfds); anfds = 0; anfdmax = 0;
+
+ /* have to use the microsoft-never-gets-it-right macro */
+ array_free (rfeed, EMPTY);
+ array_free (fdchange, EMPTY);
+ array_free (timer, EMPTY);
+#if EV_PERIODIC_ENABLE
+ array_free (periodic, EMPTY);
+#endif
+#if EV_FORK_ENABLE
+ array_free (fork, EMPTY);
+#endif
+ array_free (prepare, EMPTY);
+ array_free (check, EMPTY);
+#if EV_ASYNC_ENABLE
+ array_free (async, EMPTY);
+#endif
+
+ backend = 0;
+}
+
+#if EV_USE_INOTIFY
+inline_size void infy_fork (EV_P);
+#endif
+
+inline_size void
+loop_fork (EV_P)
+{
+#if EV_USE_PORT
+ if (backend == EVBACKEND_PORT ) port_fork (EV_A);
+#endif
+#if EV_USE_KQUEUE
+ if (backend == EVBACKEND_KQUEUE) kqueue_fork (EV_A);
+#endif
+#if EV_USE_EPOLL
+ if (backend == EVBACKEND_EPOLL ) epoll_fork (EV_A);
+#endif
+#if EV_USE_INOTIFY
+ infy_fork (EV_A);
+#endif
+
+ if (ev_is_active (&pipe_w))
+ {
+ /* this "locks" the handlers against writing to the pipe */
+ /* while we modify the fd vars */
+ sig_pending = 1;
+#if EV_ASYNC_ENABLE
+ async_pending = 1;
+#endif
+
+ ev_ref (EV_A);
+ ev_io_stop (EV_A_ &pipe_w);
+
+#if EV_USE_EVENTFD
+ if (evfd >= 0)
+ close (evfd);
+#endif
+
+ if (evpipe [0] >= 0)
+ {
+ EV_WIN32_CLOSE_FD (evpipe [0]);
+ EV_WIN32_CLOSE_FD (evpipe [1]);
+ }
+
+ evpipe_init (EV_A);
+ /* now iterate over everything, in case we missed something */
+ pipecb (EV_A_ &pipe_w, EV_READ);
+ }
+
+ postfork = 0;
+}
+
+#if EV_MULTIPLICITY
+
+struct ev_loop *
+ev_loop_new (unsigned int flags)
+{
+ EV_P = (struct ev_loop *)ev_malloc (sizeof (struct ev_loop));
+
+ memset (EV_A, 0, sizeof (struct ev_loop));
+ loop_init (EV_A_ flags);
+
+ if (ev_backend (EV_A))
+ return EV_A;
+
+ return 0;
+}
+
+void
+ev_loop_destroy (EV_P)
+{
+ loop_destroy (EV_A);
+ ev_free (loop);
+}
+
+void
+ev_loop_fork (EV_P)
+{
+ postfork = 1; /* must be in line with ev_default_fork */
+}
+#endif /* multiplicity */
+
+#if EV_VERIFY
+static void noinline
+verify_watcher (EV_P_ W w)
+{
+ assert (("libev: watcher has invalid priority", ABSPRI (w) >= 0 && ABSPRI (w) < NUMPRI));
+
+ if (w->pending)
+ assert (("libev: pending watcher not on pending queue", pendings [ABSPRI (w)][w->pending - 1].w == w));
+}
+
+static void noinline
+verify_heap (EV_P_ ANHE *heap, int N)
+{
+ int i;
+
+ for (i = HEAP0; i < N + HEAP0; ++i)
+ {
+ assert (("libev: active index mismatch in heap", ev_active (ANHE_w (heap [i])) == i));
+ assert (("libev: heap condition violated", i == HEAP0 || ANHE_at (heap [HPARENT (i)]) <= ANHE_at (heap [i])));
+ assert (("libev: heap at cache mismatch", ANHE_at (heap [i]) == ev_at (ANHE_w (heap [i]))));
+
+ verify_watcher (EV_A_ (W)ANHE_w (heap [i]));
+ }
+}
+
+static void noinline
+array_verify (EV_P_ W *ws, int cnt)
+{
+ while (cnt--)
+ {
+ assert (("libev: active index mismatch", ev_active (ws [cnt]) == cnt + 1));
+ verify_watcher (EV_A_ ws [cnt]);
+ }
+}
+#endif
+
+#if EV_MINIMAL < 2
+void
+ev_loop_verify (EV_P)
+{
+#if EV_VERIFY
+ int i;
+ WL w;
+
+ assert (activecnt >= -1);
+
+ assert (fdchangemax >= fdchangecnt);
+ for (i = 0; i < fdchangecnt; ++i)
+ assert (("libev: negative fd in fdchanges", fdchanges [i] >= 0));
+
+ assert (anfdmax >= 0);
+ for (i = 0; i < anfdmax; ++i)
+ for (w = anfds [i].head; w; w = w->next)
+ {
+ verify_watcher (EV_A_ (W)w);
+ assert (("libev: inactive fd watcher on anfd list", ev_active (w) == 1));
+ assert (("libev: fd mismatch between watcher and anfd", ((ev_io *)w)->fd == i));
+ }
+
+ assert (timermax >= timercnt);
+ verify_heap (EV_A_ timers, timercnt);
+
+#if EV_PERIODIC_ENABLE
+ assert (periodicmax >= periodiccnt);
+ verify_heap (EV_A_ periodics, periodiccnt);
+#endif
+
+ for (i = NUMPRI; i--; )
+ {
+ assert (pendingmax [i] >= pendingcnt [i]);
+#if EV_IDLE_ENABLE
+ assert (idleall >= 0);
+ assert (idlemax [i] >= idlecnt [i]);
+ array_verify (EV_A_ (W *)idles [i], idlecnt [i]);
+#endif
+ }
+
+#if EV_FORK_ENABLE
+ assert (forkmax >= forkcnt);
+ array_verify (EV_A_ (W *)forks, forkcnt);
+#endif
+
+#if EV_ASYNC_ENABLE
+ assert (asyncmax >= asynccnt);
+ array_verify (EV_A_ (W *)asyncs, asynccnt);
+#endif
+
+ assert (preparemax >= preparecnt);
+ array_verify (EV_A_ (W *)prepares, preparecnt);
+
+ assert (checkmax >= checkcnt);
+ array_verify (EV_A_ (W *)checks, checkcnt);
+
+# if 0
+ for (w = (ev_child *)childs [chain & (EV_PID_HASHSIZE - 1)]; w; w = (ev_child *)((WL)w)->next)
+ for (signum = EV_NSIG; signum--; ) if (signals [signum].pending)
+# endif
+#endif
+}
+#endif
+
+#if EV_MULTIPLICITY
+struct ev_loop *
+ev_default_loop_init (unsigned int flags)
+#else
+int
+ev_default_loop (unsigned int flags)
+#endif
+{
+ if (!ev_default_loop_ptr)
+ {
+#if EV_MULTIPLICITY
+ EV_P = ev_default_loop_ptr = &default_loop_struct;
+#else
+ ev_default_loop_ptr = 1;
+#endif
+
+ loop_init (EV_A_ flags);
+
+ if (ev_backend (EV_A))
+ {
+#ifndef _WIN32
+ ev_signal_init (&childev, childcb, SIGCHLD);
+ ev_set_priority (&childev, EV_MAXPRI);
+ ev_signal_start (EV_A_ &childev);
+ ev_unref (EV_A); /* child watcher should not keep loop alive */
+#endif
+ }
+ else
+ ev_default_loop_ptr = 0;
+ }
+
+ return ev_default_loop_ptr;
+}
+
+void
+ev_default_destroy (void)
+{
+#if EV_MULTIPLICITY
+ EV_P = ev_default_loop_ptr;
+#endif
+
+ ev_default_loop_ptr = 0;
+
+#ifndef _WIN32
+ ev_ref (EV_A); /* child watcher */
+ ev_signal_stop (EV_A_ &childev);
+#endif
+
+ loop_destroy (EV_A);
+}
+
+void
+ev_default_fork (void)
+{
+#if EV_MULTIPLICITY
+ EV_P = ev_default_loop_ptr;
+#endif
+
+ postfork = 1; /* must be in line with ev_loop_fork */
+}
+
+/*****************************************************************************/
+
+void
+ev_invoke (EV_P_ void *w, int revents)
+{
+ EV_CB_INVOKE ((W)w, revents);
+}
+
+unsigned int
+ev_pending_count (EV_P)
+{
+ int pri;
+ unsigned int count = 0;
+
+ for (pri = NUMPRI; pri--; )
+ count += pendingcnt [pri];
+
+ return count;
+}
+
+void noinline
+ev_invoke_pending (EV_P)
+{
+ int pri;
+
+ for (pri = NUMPRI; pri--; )
+ while (pendingcnt [pri])
+ {
+ ANPENDING *p = pendings [pri] + --pendingcnt [pri];
+
+ /*assert (("libev: non-pending watcher on pending list", p->w->pending));*/
+ /* ^ this is no longer true, as pending_w could be here */
+
+ p->w->pending = 0;
+ EV_CB_INVOKE (p->w, p->events);
+ EV_FREQUENT_CHECK;
+ }
+}
+
+#if EV_IDLE_ENABLE
+/* make idle watchers pending. this handles the "call-idle */
+/* only when higher priorities are idle" logic */
+inline_size void
+idle_reify (EV_P)
+{
+ if (expect_false (idleall))
+ {
+ int pri;
+
+ for (pri = NUMPRI; pri--; )
+ {
+ if (pendingcnt [pri])
+ break;
+
+ if (idlecnt [pri])
+ {
+ queue_events (EV_A_ (W *)idles [pri], idlecnt [pri], EV_IDLE);
+ break;
+ }
+ }
+ }
+}
+#endif
+
+/* make timers pending */
+inline_size void
+timers_reify (EV_P)
+{
+ EV_FREQUENT_CHECK;
+
+ if (timercnt && ANHE_at (timers [HEAP0]) < mn_now)
+ {
+ do
+ {
+ ev_timer *w = (ev_timer *)ANHE_w (timers [HEAP0]);
+
+ /*assert (("libev: inactive timer on timer heap detected", ev_is_active (w)));*/
+
+ /* first reschedule or stop timer */
+ if (w->repeat)
+ {
+ ev_at (w) += w->repeat;
+ if (ev_at (w) < mn_now)
+ ev_at (w) = mn_now;
+
+ assert (("libev: negative ev_timer repeat value found while processing timers", w->repeat > 0.));
+
+ ANHE_at_cache (timers [HEAP0]);
+ downheap (timers, timercnt, HEAP0);
+ }
+ else
+ ev_timer_stop (EV_A_ w); /* nonrepeating: stop timer */
+
+ EV_FREQUENT_CHECK;
+ feed_reverse (EV_A_ (W)w);
+ }
+ while (timercnt && ANHE_at (timers [HEAP0]) < mn_now);
+
+ feed_reverse_done (EV_A_ EV_TIMEOUT);
+ }
+}
+
+#if EV_PERIODIC_ENABLE
+/* make periodics pending */
+inline_size void
+periodics_reify (EV_P)
+{
+ EV_FREQUENT_CHECK;
+
+ while (periodiccnt && ANHE_at (periodics [HEAP0]) < ev_rt_now)
+ {
+ int feed_count = 0;
+
+ do
+ {
+ ev_periodic *w = (ev_periodic *)ANHE_w (periodics [HEAP0]);
+
+ /*assert (("libev: inactive timer on periodic heap detected", ev_is_active (w)));*/
+
+ /* first reschedule or stop timer */
+ if (w->reschedule_cb)
+ {
+ ev_at (w) = w->reschedule_cb (w, ev_rt_now);
+
+ assert (("libev: ev_periodic reschedule callback returned time in the past", ev_at (w) >= ev_rt_now));
+
+ ANHE_at_cache (periodics [HEAP0]);
+ downheap (periodics, periodiccnt, HEAP0);
+ }
+ else if (w->interval)
+ {
+ ev_at (w) = w->offset + ceil ((ev_rt_now - w->offset) / w->interval) * w->interval;
+ /* if next trigger time is not sufficiently in the future, put it there */
+ /* this might happen because of floating point inexactness */
+ if (ev_at (w) - ev_rt_now < TIME_EPSILON)
+ {
+ ev_at (w) += w->interval;
+
+ /* if interval is unreasonably low we might still have a time in the past */
+ /* so correct this. this will make the periodic very inexact, but the user */
+ /* has effectively asked to get triggered more often than possible */
+ if (ev_at (w) < ev_rt_now)
+ ev_at (w) = ev_rt_now;
+ }
+
+ ANHE_at_cache (periodics [HEAP0]);
+ downheap (periodics, periodiccnt, HEAP0);
+ }
+ else
+ ev_periodic_stop (EV_A_ w); /* nonrepeating: stop timer */
+
+ EV_FREQUENT_CHECK;
+ feed_reverse (EV_A_ (W)w);
+ }
+ while (periodiccnt && ANHE_at (periodics [HEAP0]) < ev_rt_now);
+
+ feed_reverse_done (EV_A_ EV_PERIODIC);
+ }
+}
+
+/* simply recalculate all periodics */
+/* TODO: maybe ensure that at leats one event happens when jumping forward? */
+static void noinline
+periodics_reschedule (EV_P)
+{
+ int i;
+
+ /* adjust periodics after time jump */
+ for (i = HEAP0; i < periodiccnt + HEAP0; ++i)
+ {
+ ev_periodic *w = (ev_periodic *)ANHE_w (periodics [i]);
+
+ if (w->reschedule_cb)
+ ev_at (w) = w->reschedule_cb (w, ev_rt_now);
+ else if (w->interval)
+ ev_at (w) = w->offset + ceil ((ev_rt_now - w->offset) / w->interval) * w->interval;
+
+ ANHE_at_cache (periodics [i]);
+ }
+
+ reheap (periodics, periodiccnt);
+}
+#endif
+
+/* adjust all timers by a given offset */
+static void noinline
+timers_reschedule (EV_P_ ev_tstamp adjust)
+{
+ int i;
+
+ for (i = 0; i < timercnt; ++i)
+ {
+ ANHE *he = timers + i + HEAP0;
+ ANHE_w (*he)->at += adjust;
+ ANHE_at_cache (*he);
+ }
+}
+
+/* fetch new monotonic and realtime times from the kernel */
+/* also detetc if there was a timejump, and act accordingly */
+inline_speed void
+time_update (EV_P_ ev_tstamp max_block)
+{
+#if EV_USE_MONOTONIC
+ if (expect_true (have_monotonic))
+ {
+ int i;
+ ev_tstamp odiff = rtmn_diff;
+
+ mn_now = get_clock ();
+
+ /* only fetch the realtime clock every 0.5*MIN_TIMEJUMP seconds */
+ /* interpolate in the meantime */
+ if (expect_true (mn_now - now_floor < MIN_TIMEJUMP * .5))
+ {
+ ev_rt_now = rtmn_diff + mn_now;
+ return;
+ }
+
+ now_floor = mn_now;
+ ev_rt_now = ev_time ();
+
+ /* loop a few times, before making important decisions.
+ * on the choice of "4": one iteration isn't enough,
+ * in case we get preempted during the calls to
+ * ev_time and get_clock. a second call is almost guaranteed
+ * to succeed in that case, though. and looping a few more times
+ * doesn't hurt either as we only do this on time-jumps or
+ * in the unlikely event of having been preempted here.
+ */
+ for (i = 4; --i; )
+ {
+ rtmn_diff = ev_rt_now - mn_now;
+
+ if (expect_true (fabs (odiff - rtmn_diff) < MIN_TIMEJUMP))
+ return; /* all is well */
+
+ ev_rt_now = ev_time ();
+ mn_now = get_clock ();
+ now_floor = mn_now;
+ }
+
+ /* no timer adjustment, as the monotonic clock doesn't jump */
+ /* timers_reschedule (EV_A_ rtmn_diff - odiff) */
+# if EV_PERIODIC_ENABLE
+ periodics_reschedule (EV_A);
+# endif
+ }
+ else
+#endif
+ {
+ ev_rt_now = ev_time ();
+
+ if (expect_false (mn_now > ev_rt_now || ev_rt_now > mn_now + max_block + MIN_TIMEJUMP))
+ {
+ /* adjust timers. this is easy, as the offset is the same for all of them */
+ timers_reschedule (EV_A_ ev_rt_now - mn_now);
+#if EV_PERIODIC_ENABLE
+ periodics_reschedule (EV_A);
+#endif
+ }
+
+ mn_now = ev_rt_now;
+ }
+}
+
+void
+ev_loop (EV_P_ int flags)
+{
+#if EV_MINIMAL < 2
+ ++loop_depth;
+#endif
+
+ assert (("libev: ev_loop recursion during release detected", loop_done != EVUNLOOP_RECURSE));
+
+ loop_done = EVUNLOOP_CANCEL;
+
+ EV_INVOKE_PENDING; /* in case we recurse, ensure ordering stays nice and clean */
+
+ do
+ {
+#if EV_VERIFY >= 2
+ ev_loop_verify (EV_A);
+#endif
+
+#ifndef _WIN32
+ if (expect_false (curpid)) /* penalise the forking check even more */
+ if (expect_false (getpid () != curpid))
+ {
+ curpid = getpid ();
+ postfork = 1;
+ }
+#endif
+
+#if EV_FORK_ENABLE
+ /* we might have forked, so queue fork handlers */
+ if (expect_false (postfork))
+ if (forkcnt)
+ {
+ queue_events (EV_A_ (W *)forks, forkcnt, EV_FORK);
+ EV_INVOKE_PENDING;
+ }
+#endif
+
+ /* queue prepare watchers (and execute them) */
+ if (expect_false (preparecnt))
+ {
+ queue_events (EV_A_ (W *)prepares, preparecnt, EV_PREPARE);
+ EV_INVOKE_PENDING;
+ }
+
+ if (expect_false (loop_done))
+ break;
+
+ /* we might have forked, so reify kernel state if necessary */
+ if (expect_false (postfork))
+ loop_fork (EV_A);
+
+ /* update fd-related kernel structures */
+ fd_reify (EV_A);
+
+ /* calculate blocking time */
+ {
+ ev_tstamp waittime = 0.;
+ ev_tstamp sleeptime = 0.;
+
+ if (expect_true (!(flags & EVLOOP_NONBLOCK || idleall || !activecnt)))
+ {
+ /* remember old timestamp for io_blocktime calculation */
+ ev_tstamp prev_mn_now = mn_now;
+
+ /* update time to cancel out callback processing overhead */
+ time_update (EV_A_ 1e100);
+
+ waittime = MAX_BLOCKTIME;
+
+ if (timercnt)
+ {
+ ev_tstamp to = ANHE_at (timers [HEAP0]) - mn_now + backend_fudge;
+ if (waittime > to) waittime = to;
+ }
+
+#if EV_PERIODIC_ENABLE
+ if (periodiccnt)
+ {
+ ev_tstamp to = ANHE_at (periodics [HEAP0]) - ev_rt_now + backend_fudge;
+ if (waittime > to) waittime = to;
+ }
+#endif
+
+ /* don't let timeouts decrease the waittime below timeout_blocktime */
+ if (expect_false (waittime < timeout_blocktime))
+ waittime = timeout_blocktime;
+
+ /* extra check because io_blocktime is commonly 0 */
+ if (expect_false (io_blocktime))
+ {
+ sleeptime = io_blocktime - (mn_now - prev_mn_now);
+
+ if (sleeptime > waittime - backend_fudge)
+ sleeptime = waittime - backend_fudge;
+
+ if (expect_true (sleeptime > 0.))
+ {
+ ev_sleep (sleeptime);
+ waittime -= sleeptime;
+ }
+ }
+ }
+
+#if EV_MINIMAL < 2
+ ++loop_count;
+#endif
+ assert ((loop_done = EVUNLOOP_RECURSE, 1)); /* assert for side effect */
+ backend_poll (EV_A_ waittime);
+ assert ((loop_done = EVUNLOOP_CANCEL, 1)); /* assert for side effect */
+
+ /* update ev_rt_now, do magic */
+ time_update (EV_A_ waittime + sleeptime);
+ }
+
+ /* queue pending timers and reschedule them */
+ timers_reify (EV_A); /* relative timers called last */
+#if EV_PERIODIC_ENABLE
+ periodics_reify (EV_A); /* absolute timers called first */
+#endif
+
+#if EV_IDLE_ENABLE
+ /* queue idle watchers unless other events are pending */
+ idle_reify (EV_A);
+#endif
+
+ /* queue check watchers, to be executed first */
+ if (expect_false (checkcnt))
+ queue_events (EV_A_ (W *)checks, checkcnt, EV_CHECK);
+
+ EV_INVOKE_PENDING;
+ }
+ while (expect_true (
+ activecnt
+ && !loop_done
+ && !(flags & (EVLOOP_ONESHOT | EVLOOP_NONBLOCK))
+ ));
+
+ if (loop_done == EVUNLOOP_ONE)
+ loop_done = EVUNLOOP_CANCEL;
+
+#if EV_MINIMAL < 2
+ --loop_depth;
+#endif
+}
+
+void
+ev_unloop (EV_P_ int how)
+{
+ loop_done = how;
+}
+
+void
+ev_ref (EV_P)
+{
+ ++activecnt;
+}
+
+void
+ev_unref (EV_P)
+{
+ --activecnt;
+}
+
+void
+ev_now_update (EV_P)
+{
+ time_update (EV_A_ 1e100);
+}
+
+void
+ev_suspend (EV_P)
+{
+ ev_now_update (EV_A);
+}
+
+void
+ev_resume (EV_P)
+{
+ ev_tstamp mn_prev = mn_now;
+
+ ev_now_update (EV_A);
+ timers_reschedule (EV_A_ mn_now - mn_prev);
+#if EV_PERIODIC_ENABLE
+ /* TODO: really do this? */
+ periodics_reschedule (EV_A);
+#endif
+}
+
+/*****************************************************************************/
+/* singly-linked list management, used when the expected list length is short */
+
+inline_size void
+wlist_add (WL *head, WL elem)
+{
+ elem->next = *head;
+ *head = elem;
+}
+
+inline_size void
+wlist_del (WL *head, WL elem)
+{
+ while (*head)
+ {
+ if (expect_true (*head == elem))
+ {
+ *head = elem->next;
+ break;
+ }
+
+ head = &(*head)->next;
+ }
+}
+
+/* internal, faster, version of ev_clear_pending */
+inline_speed void
+clear_pending (EV_P_ W w)
+{
+ if (w->pending)
+ {
+ pendings [ABSPRI (w)][w->pending - 1].w = (W)&pending_w;
+ w->pending = 0;
+ }
+}
+
+int
+ev_clear_pending (EV_P_ void *w)
+{
+ W w_ = (W)w;
+ int pending = w_->pending;
+
+ if (expect_true (pending))
+ {
+ ANPENDING *p = pendings [ABSPRI (w_)] + pending - 1;
+ p->w = (W)&pending_w;
+ w_->pending = 0;
+ return p->events;
+ }
+ else
+ return 0;
+}
+
+inline_size void
+pri_adjust (EV_P_ W w)
+{
+ int pri = ev_priority (w);
+ pri = pri < EV_MINPRI ? EV_MINPRI : pri;
+ pri = pri > EV_MAXPRI ? EV_MAXPRI : pri;
+ ev_set_priority (w, pri);
+}
+
+inline_speed void
+ev_start (EV_P_ W w, int active)
+{
+ pri_adjust (EV_A_ w);
+ w->active = active;
+ ev_ref (EV_A);
+}
+
+inline_size void
+ev_stop (EV_P_ W w)
+{
+ ev_unref (EV_A);
+ w->active = 0;
+}
+
+/*****************************************************************************/
+
+void noinline
+ev_io_start (EV_P_ ev_io *w)
+{
+ int fd = w->fd;
+
+ if (expect_false (ev_is_active (w)))
+ return;
+
+ assert (("libev: ev_io_start called with negative fd", fd >= 0));
+ assert (("libev: ev_io start called with illegal event mask", !(w->events & ~(EV__IOFDSET | EV_READ | EV_WRITE))));
+
+ EV_FREQUENT_CHECK;
+
+ ev_start (EV_A_ (W)w, 1);
+ array_needsize (ANFD, anfds, anfdmax, fd + 1, array_init_zero);
+ wlist_add (&anfds[fd].head, (WL)w);
+
+ fd_change (EV_A_ fd, w->events & EV__IOFDSET | EV_ANFD_REIFY);
+ w->events &= ~EV__IOFDSET;
+
+ EV_FREQUENT_CHECK;
+}
+
+void noinline
+ev_io_stop (EV_P_ ev_io *w)
+{
+ clear_pending (EV_A_ (W)w);
+ if (expect_false (!ev_is_active (w)))
+ return;
+
+ assert (("libev: ev_io_stop called with illegal fd (must stay constant after start!)", w->fd >= 0 && w->fd < anfdmax));
+
+ EV_FREQUENT_CHECK;
+
+ wlist_del (&anfds[w->fd].head, (WL)w);
+ ev_stop (EV_A_ (W)w);
+
+ fd_change (EV_A_ w->fd, 1);
+
+ EV_FREQUENT_CHECK;
+}
+
+void noinline
+ev_timer_start (EV_P_ ev_timer *w)
+{
+ if (expect_false (ev_is_active (w)))
+ return;
+
+ ev_at (w) += mn_now;
+
+ assert (("libev: ev_timer_start called with negative timer repeat value", w->repeat >= 0.));
+
+ EV_FREQUENT_CHECK;
+
+ ++timercnt;
+ ev_start (EV_A_ (W)w, timercnt + HEAP0 - 1);
+ array_needsize (ANHE, timers, timermax, ev_active (w) + 1, EMPTY2);
+ ANHE_w (timers [ev_active (w)]) = (WT)w;
+ ANHE_at_cache (timers [ev_active (w)]);
+ upheap (timers, ev_active (w));
+
+ EV_FREQUENT_CHECK;
+
+ /*assert (("libev: internal timer heap corruption", timers [ev_active (w)] == (WT)w));*/
+}
+
+void noinline
+ev_timer_stop (EV_P_ ev_timer *w)
+{
+ clear_pending (EV_A_ (W)w);
+ if (expect_false (!ev_is_active (w)))
+ return;
+
+ EV_FREQUENT_CHECK;
+
+ {
+ int active = ev_active (w);
+
+ assert (("libev: internal timer heap corruption", ANHE_w (timers [active]) == (WT)w));
+
+ --timercnt;
+
+ if (expect_true (active < timercnt + HEAP0))
+ {
+ timers [active] = timers [timercnt + HEAP0];
+ adjustheap (timers, timercnt, active);
+ }
+ }
+
+ EV_FREQUENT_CHECK;
+
+ ev_at (w) -= mn_now;
+
+ ev_stop (EV_A_ (W)w);
+}
+
+void noinline
+ev_timer_again (EV_P_ ev_timer *w)
+{
+ EV_FREQUENT_CHECK;
+
+ if (ev_is_active (w))
+ {
+ if (w->repeat)
+ {
+ ev_at (w) = mn_now + w->repeat;
+ ANHE_at_cache (timers [ev_active (w)]);
+ adjustheap (timers, timercnt, ev_active (w));
+ }
+ else
+ ev_timer_stop (EV_A_ w);
+ }
+ else if (w->repeat)
+ {
+ ev_at (w) = w->repeat;
+ ev_timer_start (EV_A_ w);
+ }
+
+ EV_FREQUENT_CHECK;
+}
+
+ev_tstamp
+ev_timer_remaining (EV_P_ ev_timer *w)
+{
+ return ev_at (w) - (ev_is_active (w) ? mn_now : 0.);
+}
+
+#if EV_PERIODIC_ENABLE
+void noinline
+ev_periodic_start (EV_P_ ev_periodic *w)
+{
+ if (expect_false (ev_is_active (w)))
+ return;
+
+ if (w->reschedule_cb)
+ ev_at (w) = w->reschedule_cb (w, ev_rt_now);
+ else if (w->interval)
+ {
+ assert (("libev: ev_periodic_start called with negative interval value", w->interval >= 0.));
+ /* this formula differs from the one in periodic_reify because we do not always round up */
+ ev_at (w) = w->offset + ceil ((ev_rt_now - w->offset) / w->interval) * w->interval;
+ }
+ else
+ ev_at (w) = w->offset;
+
+ EV_FREQUENT_CHECK;
+
+ ++periodiccnt;
+ ev_start (EV_A_ (W)w, periodiccnt + HEAP0 - 1);
+ array_needsize (ANHE, periodics, periodicmax, ev_active (w) + 1, EMPTY2);
+ ANHE_w (periodics [ev_active (w)]) = (WT)w;
+ ANHE_at_cache (periodics [ev_active (w)]);
+ upheap (periodics, ev_active (w));
+
+ EV_FREQUENT_CHECK;
+
+ /*assert (("libev: internal periodic heap corruption", ANHE_w (periodics [ev_active (w)]) == (WT)w));*/
+}
+
+void noinline
+ev_periodic_stop (EV_P_ ev_periodic *w)
+{
+ clear_pending (EV_A_ (W)w);
+ if (expect_false (!ev_is_active (w)))
+ return;
+
+ EV_FREQUENT_CHECK;
+
+ {
+ int active = ev_active (w);
+
+ assert (("libev: internal periodic heap corruption", ANHE_w (periodics [active]) == (WT)w));
+
+ --periodiccnt;
+
+ if (expect_true (active < periodiccnt + HEAP0))
+ {
+ periodics [active] = periodics [periodiccnt + HEAP0];
+ adjustheap (periodics, periodiccnt, active);
+ }
+ }
+
+ EV_FREQUENT_CHECK;
+
+ ev_stop (EV_A_ (W)w);
+}
+
+void noinline
+ev_periodic_again (EV_P_ ev_periodic *w)
+{
+ /* TODO: use adjustheap and recalculation */
+ ev_periodic_stop (EV_A_ w);
+ ev_periodic_start (EV_A_ w);
+}
+#endif
+
+#ifndef SA_RESTART
+# define SA_RESTART 0
+#endif
+
+void noinline
+ev_signal_start (EV_P_ ev_signal *w)
+{
+ if (expect_false (ev_is_active (w)))
+ return;
+
+ assert (("libev: ev_signal_start called with illegal signal number", w->signum > 0 && w->signum < EV_NSIG));
+
+#if EV_MULTIPLICITY
+ assert (("libev: a signal must not be attached to two different loops",
+ !signals [w->signum - 1].loop || signals [w->signum - 1].loop == loop));
+
+ signals [w->signum - 1].loop = EV_A;
+#endif
+
+ EV_FREQUENT_CHECK;
+
+#if EV_USE_SIGNALFD
+ if (sigfd == -2)
+ {
+ sigfd = signalfd (-1, &sigfd_set, SFD_NONBLOCK | SFD_CLOEXEC);
+ if (sigfd < 0 && errno == EINVAL)
+ sigfd = signalfd (-1, &sigfd_set, 0); /* retry without flags */
+
+ if (sigfd >= 0)
+ {
+ fd_intern (sigfd); /* doing it twice will not hurt */
+
+ sigemptyset (&sigfd_set);
+
+ ev_io_init (&sigfd_w, sigfdcb, sigfd, EV_READ);
+ ev_set_priority (&sigfd_w, EV_MAXPRI);
+ ev_io_start (EV_A_ &sigfd_w);
+ ev_unref (EV_A); /* signalfd watcher should not keep loop alive */
+ }
+ }
+
+ if (sigfd >= 0)
+ {
+ /* TODO: check .head */
+ sigaddset (&sigfd_set, w->signum);
+ sigprocmask (SIG_BLOCK, &sigfd_set, 0);
+
+ signalfd (sigfd, &sigfd_set, 0);
+ }
+#endif
+
+ ev_start (EV_A_ (W)w, 1);
+ wlist_add (&signals [w->signum - 1].head, (WL)w);
+
+ if (!((WL)w)->next)
+# if EV_USE_SIGNALFD
+ if (sigfd < 0) /*TODO*/
+# endif
+ {
+# if _WIN32
+ evpipe_init (EV_A);
+
+ signal (w->signum, ev_sighandler);
+# else
+ struct sigaction sa;
+
+ evpipe_init (EV_A);
+
+ sa.sa_handler = ev_sighandler;
+ sigfillset (&sa.sa_mask);
+ sa.sa_flags = SA_RESTART; /* if restarting works we save one iteration */
+ sigaction (w->signum, &sa, 0);
+
+ sigemptyset (&sa.sa_mask);
+ sigaddset (&sa.sa_mask, w->signum);
+ sigprocmask (SIG_UNBLOCK, &sa.sa_mask, 0);
+#endif
+ }
+
+ EV_FREQUENT_CHECK;
+}
+
+void noinline
+ev_signal_stop (EV_P_ ev_signal *w)
+{
+ clear_pending (EV_A_ (W)w);
+ if (expect_false (!ev_is_active (w)))
+ return;
+
+ EV_FREQUENT_CHECK;
+
+ wlist_del (&signals [w->signum - 1].head, (WL)w);
+ ev_stop (EV_A_ (W)w);
+
+ if (!signals [w->signum - 1].head)
+ {
+#if EV_MULTIPLICITY
+ signals [w->signum - 1].loop = 0; /* unattach from signal */
+#endif
+#if EV_USE_SIGNALFD
+ if (sigfd >= 0)
+ {
+ sigset_t ss;
+
+ sigemptyset (&ss);
+ sigaddset (&ss, w->signum);
+ sigdelset (&sigfd_set, w->signum);
+
+ signalfd (sigfd, &sigfd_set, 0);
+ sigprocmask (SIG_UNBLOCK, &ss, 0);
+ }
+ else
+#endif
+ signal (w->signum, SIG_DFL);
+ }
+
+ EV_FREQUENT_CHECK;
+}
+
+void
+ev_child_start (EV_P_ ev_child *w)
+{
+#if EV_MULTIPLICITY
+ assert (("libev: child watchers are only supported in the default loop", loop == ev_default_loop_ptr));
+#endif
+ if (expect_false (ev_is_active (w)))
+ return;
+
+ EV_FREQUENT_CHECK;
+
+ ev_start (EV_A_ (W)w, 1);
+ wlist_add (&childs [w->pid & (EV_PID_HASHSIZE - 1)], (WL)w);
+
+ EV_FREQUENT_CHECK;
+}
+
+void
+ev_child_stop (EV_P_ ev_child *w)
+{
+ clear_pending (EV_A_ (W)w);
+ if (expect_false (!ev_is_active (w)))
+ return;
+
+ EV_FREQUENT_CHECK;
+
+ wlist_del (&childs [w->pid & (EV_PID_HASHSIZE - 1)], (WL)w);
+ ev_stop (EV_A_ (W)w);
+
+ EV_FREQUENT_CHECK;
+}
+
+#if EV_STAT_ENABLE
+
+# ifdef _WIN32
+# undef lstat
+# define lstat(a,b) _stati64 (a,b)
+# endif
+
+#define DEF_STAT_INTERVAL 5.0074891
+#define NFS_STAT_INTERVAL 30.1074891 /* for filesystems potentially failing inotify */
+#define MIN_STAT_INTERVAL 0.1074891
+
+static void noinline stat_timer_cb (EV_P_ ev_timer *w_, int revents);
+
+#if EV_USE_INOTIFY
+# define EV_INOTIFY_BUFSIZE 8192
+
+static void noinline
+infy_add (EV_P_ ev_stat *w)
+{
+ w->wd = inotify_add_watch (fs_fd, w->path, IN_ATTRIB | IN_DELETE_SELF | IN_MOVE_SELF | IN_MODIFY | IN_DONT_FOLLOW | IN_MASK_ADD);
+
+ if (w->wd >= 0)
+ {
+ struct statfs sfs;
+
+ /* now local changes will be tracked by inotify, but remote changes won't */
+ /* unless the filesystem is known to be local, we therefore still poll */
+ /* also do poll on <2.6.25, but with normal frequency */
+
+ if (!fs_2625)
+ w->timer.repeat = w->interval ? w->interval : DEF_STAT_INTERVAL;
+ else if (!statfs (w->path, &sfs)
+ && (sfs.f_type == 0x1373 /* devfs */
+ || sfs.f_type == 0xEF53 /* ext2/3 */
+ || sfs.f_type == 0x3153464a /* jfs */
+ || sfs.f_type == 0x52654973 /* reiser3 */
+ || sfs.f_type == 0x01021994 /* tempfs */
+ || sfs.f_type == 0x58465342 /* xfs */))
+ w->timer.repeat = 0.; /* filesystem is local, kernel new enough */
+ else
+ w->timer.repeat = w->interval ? w->interval : NFS_STAT_INTERVAL; /* remote, use reduced frequency */
+ }
+ else
+ {
+ /* can't use inotify, continue to stat */
+ w->timer.repeat = w->interval ? w->interval : DEF_STAT_INTERVAL;
+
+ /* if path is not there, monitor some parent directory for speedup hints */
+ /* note that exceeding the hardcoded path limit is not a correctness issue, */
+ /* but an efficiency issue only */
+ if ((errno == ENOENT || errno == EACCES) && strlen (w->path) < 4096)
+ {
+ char path [4096];
+ strcpy (path, w->path);
+
+ do
+ {
+ int mask = IN_MASK_ADD | IN_DELETE_SELF | IN_MOVE_SELF
+ | (errno == EACCES ? IN_ATTRIB : IN_CREATE | IN_MOVED_TO);
+
+ char *pend = strrchr (path, '/');
+
+ if (!pend || pend == path)
+ break;
+
+ *pend = 0;
+ w->wd = inotify_add_watch (fs_fd, path, mask);
+ }
+ while (w->wd < 0 && (errno == ENOENT || errno == EACCES));
+ }
+ }
+
+ if (w->wd >= 0)
+ wlist_add (&fs_hash [w->wd & (EV_INOTIFY_HASHSIZE - 1)].head, (WL)w);
+
+ /* now re-arm timer, if required */
+ if (ev_is_active (&w->timer)) ev_ref (EV_A);
+ ev_timer_again (EV_A_ &w->timer);
+ if (ev_is_active (&w->timer)) ev_unref (EV_A);
+}
+
+static void noinline
+infy_del (EV_P_ ev_stat *w)
+{
+ int slot;
+ int wd = w->wd;
+
+ if (wd < 0)
+ return;
+
+ w->wd = -2;
+ slot = wd & (EV_INOTIFY_HASHSIZE - 1);
+ wlist_del (&fs_hash [slot].head, (WL)w);
+
+ /* remove this watcher, if others are watching it, they will rearm */
+ inotify_rm_watch (fs_fd, wd);
+}
+
+static void noinline
+infy_wd (EV_P_ int slot, int wd, struct inotify_event *ev)
+{
+ if (slot < 0)
+ /* overflow, need to check for all hash slots */
+ for (slot = 0; slot < EV_INOTIFY_HASHSIZE; ++slot)
+ infy_wd (EV_A_ slot, wd, ev);
+ else
+ {
+ WL w_;
+
+ for (w_ = fs_hash [slot & (EV_INOTIFY_HASHSIZE - 1)].head; w_; )
+ {
+ ev_stat *w = (ev_stat *)w_;
+ w_ = w_->next; /* lets us remove this watcher and all before it */
+
+ if (w->wd == wd || wd == -1)
+ {
+ if (ev->mask & (IN_IGNORED | IN_UNMOUNT | IN_DELETE_SELF))
+ {
+ wlist_del (&fs_hash [slot & (EV_INOTIFY_HASHSIZE - 1)].head, (WL)w);
+ w->wd = -1;
+ infy_add (EV_A_ w); /* re-add, no matter what */
+ }
+
+ stat_timer_cb (EV_A_ &w->timer, 0);
+ }
+ }
+ }
+}
+
+static void
+infy_cb (EV_P_ ev_io *w, int revents)
+{
+ char buf [EV_INOTIFY_BUFSIZE];
+ struct inotify_event *ev = (struct inotify_event *)buf;
+ int ofs;
+ int len = read (fs_fd, buf, sizeof (buf));
+
+ for (ofs = 0; ofs < len; ofs += sizeof (struct inotify_event) + ev->len)
+ infy_wd (EV_A_ ev->wd, ev->wd, ev);
+}
+
+inline_size void
+check_2625 (EV_P)
+{
+ /* kernels < 2.6.25 are borked
+ * http://www.ussg.indiana.edu/hypermail/linux/kernel/0711.3/1208.html
+ */
+ struct utsname buf;
+ int major, minor, micro;
+
+ if (uname (&buf))
+ return;
+
+ if (sscanf (buf.release, "%d.%d.%d", &major, &minor, &micro) != 3)
+ return;
+
+ if (major < 2
+ || (major == 2 && minor < 6)
+ || (major == 2 && minor == 6 && micro < 25))
+ return;
+
+ fs_2625 = 1;
+}
+
+inline_size int
+infy_newfd (void)
+{
+#if defined (IN_CLOEXEC) && defined (IN_NONBLOCK)
+ int fd = inotify_init1 (IN_CLOEXEC | IN_NONBLOCK);
+ if (fd >= 0)
+ return fd;
+#endif
+ return inotify_init ();
+}
+
+inline_size void
+infy_init (EV_P)
+{
+ if (fs_fd != -2)
+ return;
+
+ fs_fd = -1;
+
+ check_2625 (EV_A);
+
+ fs_fd = infy_newfd ();
+
+ if (fs_fd >= 0)
+ {
+ fd_intern (fs_fd);
+ ev_io_init (&fs_w, infy_cb, fs_fd, EV_READ);
+ ev_set_priority (&fs_w, EV_MAXPRI);
+ ev_io_start (EV_A_ &fs_w);
+ ev_unref (EV_A);
+ }
+}
+
+inline_size void
+infy_fork (EV_P)
+{
+ int slot;
+
+ if (fs_fd < 0)
+ return;
+
+ ev_ref (EV_A);
+ ev_io_stop (EV_A_ &fs_w);
+ close (fs_fd);
+ fs_fd = infy_newfd ();
+
+ if (fs_fd >= 0)
+ {
+ fd_intern (fs_fd);
+ ev_io_set (&fs_w, fs_fd, EV_READ);
+ ev_io_start (EV_A_ &fs_w);
+ ev_unref (EV_A);
+ }
+
+ for (slot = 0; slot < EV_INOTIFY_HASHSIZE; ++slot)
+ {
+ WL w_ = fs_hash [slot].head;
+ fs_hash [slot].head = 0;
+
+ while (w_)
+ {
+ ev_stat *w = (ev_stat *)w_;
+ w_ = w_->next; /* lets us add this watcher */
+
+ w->wd = -1;
+
+ if (fs_fd >= 0)
+ infy_add (EV_A_ w); /* re-add, no matter what */
+ else
+ {
+ w->timer.repeat = w->interval ? w->interval : DEF_STAT_INTERVAL;
+ if (ev_is_active (&w->timer)) ev_ref (EV_A);
+ ev_timer_again (EV_A_ &w->timer);
+ if (ev_is_active (&w->timer)) ev_unref (EV_A);
+ }
+ }
+ }
+}
+
+#endif
+
+#ifdef _WIN32
+# define EV_LSTAT(p,b) _stati64 (p, b)
+#else
+# define EV_LSTAT(p,b) lstat (p, b)
+#endif
+
+void
+ev_stat_stat (EV_P_ ev_stat *w)
+{
+ if (lstat (w->path, &w->attr) < 0)
+ w->attr.st_nlink = 0;
+ else if (!w->attr.st_nlink)
+ w->attr.st_nlink = 1;
+}
+
+static void noinline
+stat_timer_cb (EV_P_ ev_timer *w_, int revents)
+{
+ ev_stat *w = (ev_stat *)(((char *)w_) - offsetof (ev_stat, timer));
+
+ ev_statdata prev = w->attr;
+ ev_stat_stat (EV_A_ w);
+
+ /* memcmp doesn't work on netbsd, they.... do stuff to their struct stat */
+ if (
+ prev.st_dev != w->attr.st_dev
+ || prev.st_ino != w->attr.st_ino
+ || prev.st_mode != w->attr.st_mode
+ || prev.st_nlink != w->attr.st_nlink
+ || prev.st_uid != w->attr.st_uid
+ || prev.st_gid != w->attr.st_gid
+ || prev.st_rdev != w->attr.st_rdev
+ || prev.st_size != w->attr.st_size
+ || prev.st_atime != w->attr.st_atime
+ || prev.st_mtime != w->attr.st_mtime
+ || prev.st_ctime != w->attr.st_ctime
+ ) {
+ /* we only update w->prev on actual differences */
+ /* in case we test more often than invoke the callback, */
+ /* to ensure that prev is always different to attr */
+ w->prev = prev;
+
+ #if EV_USE_INOTIFY
+ if (fs_fd >= 0)
+ {
+ infy_del (EV_A_ w);
+ infy_add (EV_A_ w);
+ ev_stat_stat (EV_A_ w); /* avoid race... */
+ }
+ #endif
+
+ ev_feed_event (EV_A_ w, EV_STAT);
+ }
+}
+
+void
+ev_stat_start (EV_P_ ev_stat *w)
+{
+ if (expect_false (ev_is_active (w)))
+ return;
+
+ ev_stat_stat (EV_A_ w);
+
+ if (w->interval < MIN_STAT_INTERVAL && w->interval)
+ w->interval = MIN_STAT_INTERVAL;
+
+ ev_timer_init (&w->timer, stat_timer_cb, 0., w->interval ? w->interval : DEF_STAT_INTERVAL);
+ ev_set_priority (&w->timer, ev_priority (w));
+
+#if EV_USE_INOTIFY
+ infy_init (EV_A);
+
+ if (fs_fd >= 0)
+ infy_add (EV_A_ w);
+ else
+#endif
+ {
+ ev_timer_again (EV_A_ &w->timer);
+ ev_unref (EV_A);
+ }
+
+ ev_start (EV_A_ (W)w, 1);
+
+ EV_FREQUENT_CHECK;
+}
+
+void
+ev_stat_stop (EV_P_ ev_stat *w)
+{
+ clear_pending (EV_A_ (W)w);
+ if (expect_false (!ev_is_active (w)))
+ return;
+
+ EV_FREQUENT_CHECK;
+
+#if EV_USE_INOTIFY
+ infy_del (EV_A_ w);
+#endif
+
+ if (ev_is_active (&w->timer))
+ {
+ ev_ref (EV_A);
+ ev_timer_stop (EV_A_ &w->timer);
+ }
+
+ ev_stop (EV_A_ (W)w);
+
+ EV_FREQUENT_CHECK;
+}
+#endif
+
+#if EV_IDLE_ENABLE
+void
+ev_idle_start (EV_P_ ev_idle *w)
+{
+ if (expect_false (ev_is_active (w)))
+ return;
+
+ pri_adjust (EV_A_ (W)w);
+
+ EV_FREQUENT_CHECK;
+
+ {
+ int active = ++idlecnt [ABSPRI (w)];
+
+ ++idleall;
+ ev_start (EV_A_ (W)w, active);
+
+ array_needsize (ev_idle *, idles [ABSPRI (w)], idlemax [ABSPRI (w)], active, EMPTY2);
+ idles [ABSPRI (w)][active - 1] = w;
+ }
+
+ EV_FREQUENT_CHECK;
+}
+
+void
+ev_idle_stop (EV_P_ ev_idle *w)
+{
+ clear_pending (EV_A_ (W)w);
+ if (expect_false (!ev_is_active (w)))
+ return;
+
+ EV_FREQUENT_CHECK;
+
+ {
+ int active = ev_active (w);
+
+ idles [ABSPRI (w)][active - 1] = idles [ABSPRI (w)][--idlecnt [ABSPRI (w)]];
+ ev_active (idles [ABSPRI (w)][active - 1]) = active;
+
+ ev_stop (EV_A_ (W)w);
+ --idleall;
+ }
+
+ EV_FREQUENT_CHECK;
+}
+#endif
+
+void
+ev_prepare_start (EV_P_ ev_prepare *w)
+{
+ if (expect_false (ev_is_active (w)))
+ return;
+
+ EV_FREQUENT_CHECK;
+
+ ev_start (EV_A_ (W)w, ++preparecnt);
+ array_needsize (ev_prepare *, prepares, preparemax, preparecnt, EMPTY2);
+ prepares [preparecnt - 1] = w;
+
+ EV_FREQUENT_CHECK;
+}
+
+void
+ev_prepare_stop (EV_P_ ev_prepare *w)
+{
+ clear_pending (EV_A_ (W)w);
+ if (expect_false (!ev_is_active (w)))
+ return;
+
+ EV_FREQUENT_CHECK;
+
+ {
+ int active = ev_active (w);
+
+ prepares [active - 1] = prepares [--preparecnt];
+ ev_active (prepares [active - 1]) = active;
+ }
+
+ ev_stop (EV_A_ (W)w);
+
+ EV_FREQUENT_CHECK;
+}
+
+void
+ev_check_start (EV_P_ ev_check *w)
+{
+ if (expect_false (ev_is_active (w)))
+ return;
+
+ EV_FREQUENT_CHECK;
+
+ ev_start (EV_A_ (W)w, ++checkcnt);
+ array_needsize (ev_check *, checks, checkmax, checkcnt, EMPTY2);
+ checks [checkcnt - 1] = w;
+
+ EV_FREQUENT_CHECK;
+}
+
+void
+ev_check_stop (EV_P_ ev_check *w)
+{
+ clear_pending (EV_A_ (W)w);
+ if (expect_false (!ev_is_active (w)))
+ return;
+
+ EV_FREQUENT_CHECK;
+
+ {
+ int active = ev_active (w);
+
+ checks [active - 1] = checks [--checkcnt];
+ ev_active (checks [active - 1]) = active;
+ }
+
+ ev_stop (EV_A_ (W)w);
+
+ EV_FREQUENT_CHECK;
+}
+
+#if EV_EMBED_ENABLE
+void noinline
+ev_embed_sweep (EV_P_ ev_embed *w)
+{
+ ev_loop (w->other, EVLOOP_NONBLOCK);
+}
+
+static void
+embed_io_cb (EV_P_ ev_io *io, int revents)
+{
+ ev_embed *w = (ev_embed *)(((char *)io) - offsetof (ev_embed, io));
+
+ if (ev_cb (w))
+ ev_feed_event (EV_A_ (W)w, EV_EMBED);
+ else
+ ev_loop (w->other, EVLOOP_NONBLOCK);
+}
+
+static void
+embed_prepare_cb (EV_P_ ev_prepare *prepare, int revents)
+{
+ ev_embed *w = (ev_embed *)(((char *)prepare) - offsetof (ev_embed, prepare));
+
+ {
+ EV_P = w->other;
+
+ while (fdchangecnt)
+ {
+ fd_reify (EV_A);
+ ev_loop (EV_A_ EVLOOP_NONBLOCK);
+ }
+ }
+}
+
+static void
+embed_fork_cb (EV_P_ ev_fork *fork_w, int revents)
+{
+ ev_embed *w = (ev_embed *)(((char *)fork_w) - offsetof (ev_embed, fork));
+
+ ev_embed_stop (EV_A_ w);
+
+ {
+ EV_P = w->other;
+
+ ev_loop_fork (EV_A);
+ ev_loop (EV_A_ EVLOOP_NONBLOCK);
+ }
+
+ ev_embed_start (EV_A_ w);
+}
+
+#if 0
+static void
+embed_idle_cb (EV_P_ ev_idle *idle, int revents)
+{
+ ev_idle_stop (EV_A_ idle);
+}
+#endif
+
+void
+ev_embed_start (EV_P_ ev_embed *w)
+{
+ if (expect_false (ev_is_active (w)))
+ return;
+
+ {
+ EV_P = w->other;
+ assert (("libev: loop to be embedded is not embeddable", backend & ev_embeddable_backends ()));
+ ev_io_init (&w->io, embed_io_cb, backend_fd, EV_READ);
+ }
+
+ EV_FREQUENT_CHECK;
+
+ ev_set_priority (&w->io, ev_priority (w));
+ ev_io_start (EV_A_ &w->io);
+
+ ev_prepare_init (&w->prepare, embed_prepare_cb);
+ ev_set_priority (&w->prepare, EV_MINPRI);
+ ev_prepare_start (EV_A_ &w->prepare);
+
+ ev_fork_init (&w->fork, embed_fork_cb);
+ ev_fork_start (EV_A_ &w->fork);
+
+ /*ev_idle_init (&w->idle, e,bed_idle_cb);*/
+
+ ev_start (EV_A_ (W)w, 1);
+
+ EV_FREQUENT_CHECK;
+}
+
+void
+ev_embed_stop (EV_P_ ev_embed *w)
+{
+ clear_pending (EV_A_ (W)w);
+ if (expect_false (!ev_is_active (w)))
+ return;
+
+ EV_FREQUENT_CHECK;
+
+ ev_io_stop (EV_A_ &w->io);
+ ev_prepare_stop (EV_A_ &w->prepare);
+ ev_fork_stop (EV_A_ &w->fork);
+
+ EV_FREQUENT_CHECK;
+}
+#endif
+
+#if EV_FORK_ENABLE
+void
+ev_fork_start (EV_P_ ev_fork *w)
+{
+ if (expect_false (ev_is_active (w)))
+ return;
+
+ EV_FREQUENT_CHECK;
+
+ ev_start (EV_A_ (W)w, ++forkcnt);
+ array_needsize (ev_fork *, forks, forkmax, forkcnt, EMPTY2);
+ forks [forkcnt - 1] = w;
+
+ EV_FREQUENT_CHECK;
+}
+
+void
+ev_fork_stop (EV_P_ ev_fork *w)
+{
+ clear_pending (EV_A_ (W)w);
+ if (expect_false (!ev_is_active (w)))
+ return;
+
+ EV_FREQUENT_CHECK;
+
+ {
+ int active = ev_active (w);
+
+ forks [active - 1] = forks [--forkcnt];
+ ev_active (forks [active - 1]) = active;
+ }
+
+ ev_stop (EV_A_ (W)w);
+
+ EV_FREQUENT_CHECK;
+}
+#endif
+
+#if EV_ASYNC_ENABLE
+void
+ev_async_start (EV_P_ ev_async *w)
+{
+ if (expect_false (ev_is_active (w)))
+ return;
+
+ evpipe_init (EV_A);
+
+ EV_FREQUENT_CHECK;
+
+ ev_start (EV_A_ (W)w, ++asynccnt);
+ array_needsize (ev_async *, asyncs, asyncmax, asynccnt, EMPTY2);
+ asyncs [asynccnt - 1] = w;
+
+ EV_FREQUENT_CHECK;
+}
+
+void
+ev_async_stop (EV_P_ ev_async *w)
+{
+ clear_pending (EV_A_ (W)w);
+ if (expect_false (!ev_is_active (w)))
+ return;
+
+ EV_FREQUENT_CHECK;
+
+ {
+ int active = ev_active (w);
+
+ asyncs [active - 1] = asyncs [--asynccnt];
+ ev_active (asyncs [active - 1]) = active;
+ }
+
+ ev_stop (EV_A_ (W)w);
+
+ EV_FREQUENT_CHECK;
+}
+
+void
+ev_async_send (EV_P_ ev_async *w)
+{
+ w->sent = 1;
+ evpipe_write (EV_A_ &async_pending);
+}
+#endif
+
+/*****************************************************************************/
+
+struct ev_once
+{
+ ev_io io;
+ ev_timer to;
+ void (*cb)(int revents, void *arg);
+ void *arg;
+};
+
+static void
+once_cb (EV_P_ struct ev_once *once, int revents)
+{
+ void (*cb)(int revents, void *arg) = once->cb;
+ void *arg = once->arg;
+
+ ev_io_stop (EV_A_ &once->io);
+ ev_timer_stop (EV_A_ &once->to);
+ ev_free (once);
+
+ cb (revents, arg);
+}
+
+static void
+once_cb_io (EV_P_ ev_io *w, int revents)
+{
+ struct ev_once *once = (struct ev_once *)(((char *)w) - offsetof (struct ev_once, io));
+
+ once_cb (EV_A_ once, revents | ev_clear_pending (EV_A_ &once->to));
+}
+
+static void
+once_cb_to (EV_P_ ev_timer *w, int revents)
+{
+ struct ev_once *once = (struct ev_once *)(((char *)w) - offsetof (struct ev_once, to));
+
+ once_cb (EV_A_ once, revents | ev_clear_pending (EV_A_ &once->io));
+}
+
+void
+ev_once (EV_P_ int fd, int events, ev_tstamp timeout, void (*cb)(int revents, void *arg), void *arg)
+{
+ struct ev_once *once = (struct ev_once *)ev_malloc (sizeof (struct ev_once));
+
+ if (expect_false (!once))
+ {
+ cb (EV_ERROR | EV_READ | EV_WRITE | EV_TIMEOUT, arg);
+ return;
+ }
+
+ once->cb = cb;
+ once->arg = arg;
+
+ ev_init (&once->io, once_cb_io);
+ if (fd >= 0)
+ {
+ ev_io_set (&once->io, fd, events);
+ ev_io_start (EV_A_ &once->io);
+ }
+
+ ev_init (&once->to, once_cb_to);
+ if (timeout >= 0.)
+ {
+ ev_timer_set (&once->to, timeout, 0.);
+ ev_timer_start (EV_A_ &once->to);
+ }
+}
+
+/*****************************************************************************/
+
+#if EV_WALK_ENABLE
+void
+ev_walk (EV_P_ int types, void (*cb)(EV_P_ int type, void *w))
+{
+ int i, j;
+ ev_watcher_list *wl, *wn;
+
+ if (types & (EV_IO | EV_EMBED))
+ for (i = 0; i < anfdmax; ++i)
+ for (wl = anfds [i].head; wl; )
+ {
+ wn = wl->next;
+
+#if EV_EMBED_ENABLE
+ if (ev_cb ((ev_io *)wl) == embed_io_cb)
+ {
+ if (types & EV_EMBED)
+ cb (EV_A_ EV_EMBED, ((char *)wl) - offsetof (struct ev_embed, io));
+ }
+ else
+#endif
+#if EV_USE_INOTIFY
+ if (ev_cb ((ev_io *)wl) == infy_cb)
+ ;
+ else
+#endif
+ if ((ev_io *)wl != &pipe_w)
+ if (types & EV_IO)
+ cb (EV_A_ EV_IO, wl);
+
+ wl = wn;
+ }
+
+ if (types & (EV_TIMER | EV_STAT))
+ for (i = timercnt + HEAP0; i-- > HEAP0; )
+#if EV_STAT_ENABLE
+ /*TODO: timer is not always active*/
+ if (ev_cb ((ev_timer *)ANHE_w (timers [i])) == stat_timer_cb)
+ {
+ if (types & EV_STAT)
+ cb (EV_A_ EV_STAT, ((char *)ANHE_w (timers [i])) - offsetof (struct ev_stat, timer));
+ }
+ else
+#endif
+ if (types & EV_TIMER)
+ cb (EV_A_ EV_TIMER, ANHE_w (timers [i]));
+
+#if EV_PERIODIC_ENABLE
+ if (types & EV_PERIODIC)
+ for (i = periodiccnt + HEAP0; i-- > HEAP0; )
+ cb (EV_A_ EV_PERIODIC, ANHE_w (periodics [i]));
+#endif
+
+#if EV_IDLE_ENABLE
+ if (types & EV_IDLE)
+ for (j = NUMPRI; i--; )
+ for (i = idlecnt [j]; i--; )
+ cb (EV_A_ EV_IDLE, idles [j][i]);
+#endif
+
+#if EV_FORK_ENABLE
+ if (types & EV_FORK)
+ for (i = forkcnt; i--; )
+ if (ev_cb (forks [i]) != embed_fork_cb)
+ cb (EV_A_ EV_FORK, forks [i]);
+#endif
+
+#if EV_ASYNC_ENABLE
+ if (types & EV_ASYNC)
+ for (i = asynccnt; i--; )
+ cb (EV_A_ EV_ASYNC, asyncs [i]);
+#endif
+
+ if (types & EV_PREPARE)
+ for (i = preparecnt; i--; )
+#if EV_EMBED_ENABLE
+ if (ev_cb (prepares [i]) != embed_prepare_cb)
+#endif
+ cb (EV_A_ EV_PREPARE, prepares [i]);
+
+ if (types & EV_CHECK)
+ for (i = checkcnt; i--; )
+ cb (EV_A_ EV_CHECK, checks [i]);
+
+ if (types & EV_SIGNAL)
+ for (i = 0; i < EV_NSIG - 1; ++i)
+ for (wl = signals [i].head; wl; )
+ {
+ wn = wl->next;
+ cb (EV_A_ EV_SIGNAL, wl);
+ wl = wn;
+ }
+
+ if (types & EV_CHILD)
+ for (i = EV_PID_HASHSIZE; i--; )
+ for (wl = childs [i]; wl; )
+ {
+ wn = wl->next;
+ cb (EV_A_ EV_CHILD, wl);
+ wl = wn;
+ }
+/* EV_STAT 0x00001000 /* stat data changed */
+/* EV_EMBED 0x00010000 /* embedded event loop needs sweep */
+}
+#endif
+
+#if EV_MULTIPLICITY
+ #include "ev_wrap.h"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/libev/ev.h b/libev/ev.h
new file mode 100644
index 0000000..c7c44ff
--- /dev/null
+++ b/libev/ev.h
@@ -0,0 +1,705 @@
+/*
+ * libev native API header
+ *
+ * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libev@schmorp.de>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#ifndef EV_H_
+#define EV_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef double ev_tstamp;
+
+/* these priorities are inclusive, higher priorities will be called earlier */
+#ifndef EV_MINPRI
+# define EV_MINPRI -2
+#endif
+#ifndef EV_MAXPRI
+# define EV_MAXPRI +2
+#endif
+
+#ifndef EV_MULTIPLICITY
+# define EV_MULTIPLICITY 1
+#endif
+
+#ifndef EV_PERIODIC_ENABLE
+# define EV_PERIODIC_ENABLE 1
+#endif
+
+#ifndef EV_STAT_ENABLE
+# define EV_STAT_ENABLE 1
+#endif
+
+#ifndef EV_IDLE_ENABLE
+# define EV_IDLE_ENABLE 1
+#endif
+
+#ifndef EV_FORK_ENABLE
+# define EV_FORK_ENABLE 1
+#endif
+
+#ifndef EV_EMBED_ENABLE
+# define EV_EMBED_ENABLE 1
+#endif
+
+#ifndef EV_ASYNC_ENABLE
+# define EV_ASYNC_ENABLE 1
+#endif
+
+#ifndef EV_WALK_ENABLE
+# define EV_WALK_ENABLE 0 /* not yet */
+#endif
+
+#ifndef EV_ATOMIC_T
+# include <signal.h>
+# define EV_ATOMIC_T sig_atomic_t volatile
+#endif
+
+/*****************************************************************************/
+
+#if EV_STAT_ENABLE
+# ifdef _WIN32
+# include <time.h>
+# include <sys/types.h>
+# endif
+# include <sys/stat.h>
+#endif
+
+/* support multiple event loops? */
+#if EV_MULTIPLICITY
+struct ev_loop;
+# define EV_P struct ev_loop *loop
+# define EV_P_ EV_P,
+# define EV_A loop
+# define EV_A_ EV_A,
+# define EV_DEFAULT_UC ev_default_loop_uc ()
+# define EV_DEFAULT_UC_ EV_DEFAULT_UC,
+# define EV_DEFAULT ev_default_loop (0)
+# define EV_DEFAULT_ EV_DEFAULT,
+#else
+# define EV_P void
+# define EV_P_
+# define EV_A
+# define EV_A_
+# define EV_DEFAULT
+# define EV_DEFAULT_
+# define EV_DEFAULT_UC
+# define EV_DEFAULT_UC_
+# undef EV_EMBED_ENABLE
+#endif
+
+#if __STDC_VERSION__ >= 199901L || __GNUC__ >= 3
+# define EV_INLINE static inline
+#else
+# define EV_INLINE static
+#endif
+
+/*****************************************************************************/
+
+/* eventmask, revents, events... */
+#define EV_UNDEF -1 /* guaranteed to be invalid */
+#define EV_NONE 0x00 /* no events */
+#define EV_READ 0x01 /* ev_io detected read will not block */
+#define EV_WRITE 0x02 /* ev_io detected write will not block */
+#define EV__IOFDSET 0x80 /* internal use only */
+#define EV_IO EV_READ /* alias for type-detection */
+#define EV_TIMEOUT 0x00000100 /* timer timed out */
+#define EV_TIMER EV_TIMEOUT /* alias for type-detection */
+#define EV_PERIODIC 0x00000200 /* periodic timer timed out */
+#define EV_SIGNAL 0x00000400 /* signal was received */
+#define EV_CHILD 0x00000800 /* child/pid had status change */
+#define EV_STAT 0x00001000 /* stat data changed */
+#define EV_IDLE 0x00002000 /* event loop is idling */
+#define EV_PREPARE 0x00004000 /* event loop about to poll */
+#define EV_CHECK 0x00008000 /* event loop finished poll */
+#define EV_EMBED 0x00010000 /* embedded event loop needs sweep */
+#define EV_FORK 0x00020000 /* event loop resumed in child */
+#define EV_ASYNC 0x00040000 /* async intra-loop signal */
+#define EV_CUSTOM 0x01000000 /* for use by user code */
+#define EV_ERROR 0x80000000 /* sent when an error occurs */
+
+/* can be used to add custom fields to all watchers, while losing binary compatibility */
+#ifndef EV_COMMON
+# define EV_COMMON void *data;
+#endif
+#ifndef EV_PROTOTYPES
+# define EV_PROTOTYPES 1
+#endif
+
+#define EV_VERSION_MAJOR 3
+#define EV_VERSION_MINOR 9
+
+#ifndef EV_CB_DECLARE
+# define EV_CB_DECLARE(type) void (*cb)(EV_P_ struct type *w, int revents);
+#endif
+#ifndef EV_CB_INVOKE
+# define EV_CB_INVOKE(watcher,revents) (watcher)->cb (EV_A_ (watcher), (revents))
+#endif
+
+/*
+ * struct member types:
+ * private: you may look at them, but not change them,
+ * and they might not mean anything to you.
+ * ro: can be read anytime, but only changed when the watcher isn't active.
+ * rw: can be read and modified anytime, even when the watcher is active.
+ *
+ * some internal details that might be helpful for debugging:
+ *
+ * active is either 0, which means the watcher is not active,
+ * or the array index of the watcher (periodics, timers)
+ * or the array index + 1 (most other watchers)
+ * or simply 1 for watchers that aren't in some array.
+ * pending is either 0, in which case the watcher isn't,
+ * or the array index + 1 in the pendings array.
+ */
+
+#if EV_MINPRI == EV_MAXPRI
+# define EV_DECL_PRIORITY
+#else
+# define EV_DECL_PRIORITY int priority;
+#endif
+
+/* shared by all watchers */
+#define EV_WATCHER(type) \
+ int active; /* private */ \
+ int pending; /* private */ \
+ EV_DECL_PRIORITY /* private */ \
+ EV_COMMON /* rw */ \
+ EV_CB_DECLARE (type) /* private */
+
+#define EV_WATCHER_LIST(type) \
+ EV_WATCHER (type) \
+ struct ev_watcher_list *next; /* private */
+
+#define EV_WATCHER_TIME(type) \
+ EV_WATCHER (type) \
+ ev_tstamp at; /* private */
+
+/* base class, nothing to see here unless you subclass */
+typedef struct ev_watcher
+{
+ EV_WATCHER (ev_watcher)
+} ev_watcher;
+
+/* base class, nothing to see here unless you subclass */
+typedef struct ev_watcher_list
+{
+ EV_WATCHER_LIST (ev_watcher_list)
+} ev_watcher_list;
+
+/* base class, nothing to see here unless you subclass */
+typedef struct ev_watcher_time
+{
+ EV_WATCHER_TIME (ev_watcher_time)
+} ev_watcher_time;
+
+/* invoked when fd is either EV_READable or EV_WRITEable */
+/* revent EV_READ, EV_WRITE */
+typedef struct ev_io
+{
+ EV_WATCHER_LIST (ev_io)
+
+ int fd; /* ro */
+ int events; /* ro */
+} ev_io;
+
+/* invoked after a specific time, repeatable (based on monotonic clock) */
+/* revent EV_TIMEOUT */
+typedef struct ev_timer
+{
+ EV_WATCHER_TIME (ev_timer)
+
+ ev_tstamp repeat; /* rw */
+} ev_timer;
+
+/* invoked at some specific time, possibly repeating at regular intervals (based on UTC) */
+/* revent EV_PERIODIC */
+typedef struct ev_periodic
+{
+ EV_WATCHER_TIME (ev_periodic)
+
+ ev_tstamp offset; /* rw */
+ ev_tstamp interval; /* rw */
+ ev_tstamp (*reschedule_cb)(struct ev_periodic *w, ev_tstamp now); /* rw */
+} ev_periodic;
+
+/* invoked when the given signal has been received */
+/* revent EV_SIGNAL */
+typedef struct ev_signal
+{
+ EV_WATCHER_LIST (ev_signal)
+
+ int signum; /* ro */
+} ev_signal;
+
+/* invoked when sigchld is received and waitpid indicates the given pid */
+/* revent EV_CHILD */
+/* does not support priorities */
+typedef struct ev_child
+{
+ EV_WATCHER_LIST (ev_child)
+
+ int flags; /* private */
+ int pid; /* ro */
+ int rpid; /* rw, holds the received pid */
+ int rstatus; /* rw, holds the exit status, use the macros from sys/wait.h */
+} ev_child;
+
+#if EV_STAT_ENABLE
+/* st_nlink = 0 means missing file or other error */
+# ifdef _WIN32
+typedef struct _stati64 ev_statdata;
+# else
+typedef struct stat ev_statdata;
+# endif
+
+/* invoked each time the stat data changes for a given path */
+/* revent EV_STAT */
+typedef struct ev_stat
+{
+ EV_WATCHER_LIST (ev_stat)
+
+ ev_timer timer; /* private */
+ ev_tstamp interval; /* ro */
+ const char *path; /* ro */
+ ev_statdata prev; /* ro */
+ ev_statdata attr; /* ro */
+
+ int wd; /* wd for inotify, fd for kqueue */
+} ev_stat;
+#endif
+
+#if EV_IDLE_ENABLE
+/* invoked when the nothing else needs to be done, keeps the process from blocking */
+/* revent EV_IDLE */
+typedef struct ev_idle
+{
+ EV_WATCHER (ev_idle)
+} ev_idle;
+#endif
+
+/* invoked for each run of the mainloop, just before the blocking call */
+/* you can still change events in any way you like */
+/* revent EV_PREPARE */
+typedef struct ev_prepare
+{
+ EV_WATCHER (ev_prepare)
+} ev_prepare;
+
+/* invoked for each run of the mainloop, just after the blocking call */
+/* revent EV_CHECK */
+typedef struct ev_check
+{
+ EV_WATCHER (ev_check)
+} ev_check;
+
+#if EV_FORK_ENABLE
+/* the callback gets invoked before check in the child process when a fork was detected */
+typedef struct ev_fork
+{
+ EV_WATCHER (ev_fork)
+} ev_fork;
+#endif
+
+#if EV_EMBED_ENABLE
+/* used to embed an event loop inside another */
+/* the callback gets invoked when the event loop has handled events, and can be 0 */
+typedef struct ev_embed
+{
+ EV_WATCHER (ev_embed)
+
+ struct ev_loop *other; /* ro */
+ ev_io io; /* private */
+ ev_prepare prepare; /* private */
+ ev_check check; /* unused */
+ ev_timer timer; /* unused */
+ ev_periodic periodic; /* unused */
+ ev_idle idle; /* unused */
+ ev_fork fork; /* private */
+} ev_embed;
+#endif
+
+#if EV_ASYNC_ENABLE
+/* invoked when somebody calls ev_async_send on the watcher */
+/* revent EV_ASYNC */
+typedef struct ev_async
+{
+ EV_WATCHER (ev_async)
+
+ EV_ATOMIC_T sent; /* private */
+} ev_async;
+
+# define ev_async_pending(w) (+(w)->sent)
+#endif
+
+/* the presence of this union forces similar struct layout */
+union ev_any_watcher
+{
+ struct ev_watcher w;
+ struct ev_watcher_list wl;
+
+ struct ev_io io;
+ struct ev_timer timer;
+ struct ev_periodic periodic;
+ struct ev_signal signal;
+ struct ev_child child;
+#if EV_STAT_ENABLE
+ struct ev_stat stat;
+#endif
+#if EV_IDLE_ENABLE
+ struct ev_idle idle;
+#endif
+ struct ev_prepare prepare;
+ struct ev_check check;
+#if EV_FORK_ENABLE
+ struct ev_fork fork;
+#endif
+#if EV_EMBED_ENABLE
+ struct ev_embed embed;
+#endif
+#if EV_ASYNC_ENABLE
+ struct ev_async async;
+#endif
+};
+
+/* bits for ev_default_loop and ev_loop_new */
+/* the default */
+#define EVFLAG_AUTO 0x00000000U /* not quite a mask */
+/* flag bits */
+#define EVFLAG_NOENV 0x01000000U /* do NOT consult environment */
+#define EVFLAG_FORKCHECK 0x02000000U /* check for a fork in each iteration */
+/* debugging/feature disable */
+#define EVFLAG_NOINOTIFY 0x00100000U /* do not attempt to use inotify */
+#define EVFLAG_NOSIGFD 0 /* compatibility to pre-3.9 */
+#define EVFLAG_SIGNALFD 0x00200000U /* attempt to use signalfd */
+/* method bits to be ored together */
+#define EVBACKEND_SELECT 0x00000001U /* about anywhere */
+#define EVBACKEND_POLL 0x00000002U /* !win */
+#define EVBACKEND_EPOLL 0x00000004U /* linux */
+#define EVBACKEND_KQUEUE 0x00000008U /* bsd */
+#define EVBACKEND_DEVPOLL 0x00000010U /* solaris 8 */ /* NYI */
+#define EVBACKEND_PORT 0x00000020U /* solaris 10 */
+#define EVBACKEND_ALL 0x0000003FU
+
+#if EV_PROTOTYPES
+int ev_version_major (void);
+int ev_version_minor (void);
+
+unsigned int ev_supported_backends (void);
+unsigned int ev_recommended_backends (void);
+unsigned int ev_embeddable_backends (void);
+
+ev_tstamp ev_time (void);
+void ev_sleep (ev_tstamp delay); /* sleep for a while */
+
+/* Sets the allocation function to use, works like realloc.
+ * It is used to allocate and free memory.
+ * If it returns zero when memory needs to be allocated, the library might abort
+ * or take some potentially destructive action.
+ * The default is your system realloc function.
+ */
+void ev_set_allocator (void *(*cb)(void *ptr, long size));
+
+/* set the callback function to call on a
+ * retryable syscall error
+ * (such as failed select, poll, epoll_wait)
+ */
+void ev_set_syserr_cb (void (*cb)(const char *msg));
+
+#if EV_MULTIPLICITY
+EV_INLINE struct ev_loop *
+ev_default_loop_uc (void)
+{
+ extern struct ev_loop *ev_default_loop_ptr;
+
+ return ev_default_loop_ptr;
+}
+
+/* the default loop is the only one that handles signals and child watchers */
+/* you can call this as often as you like */
+EV_INLINE struct ev_loop *
+ev_default_loop (unsigned int flags)
+{
+ struct ev_loop *loop = ev_default_loop_uc ();
+
+ if (!loop)
+ {
+ extern struct ev_loop *ev_default_loop_init (unsigned int flags);
+
+ loop = ev_default_loop_init (flags);
+ }
+
+ return loop;
+}
+
+/* create and destroy alternative loops that don't handle signals */
+struct ev_loop *ev_loop_new (unsigned int flags);
+void ev_loop_destroy (EV_P);
+void ev_loop_fork (EV_P);
+
+ev_tstamp ev_now (EV_P); /* time w.r.t. timers and the eventloop, updated after each poll */
+
+#else
+
+int ev_default_loop (unsigned int flags); /* returns true when successful */
+
+EV_INLINE ev_tstamp
+ev_now (void)
+{
+ extern ev_tstamp ev_rt_now;
+
+ return ev_rt_now;
+}
+#endif /* multiplicity */
+
+EV_INLINE int
+ev_is_default_loop (EV_P)
+{
+#if EV_MULTIPLICITY
+ extern struct ev_loop *ev_default_loop_ptr;
+
+ return !!(EV_A == ev_default_loop_ptr);
+#else
+ return 1;
+#endif
+}
+
+void ev_default_destroy (void); /* destroy the default loop */
+/* this needs to be called after fork, to duplicate the default loop */
+/* if you create alternative loops you have to call ev_loop_fork on them */
+/* you can call it in either the parent or the child */
+/* you can actually call it at any time, anywhere :) */
+void ev_default_fork (void);
+
+unsigned int ev_backend (EV_P); /* backend in use by loop */
+
+void ev_now_update (EV_P); /* update event loop time */
+
+#if EV_WALK_ENABLE
+/* walk (almost) all watchers in the loop of a given type, invoking the */
+/* callback on every such watcher. The callback might stop the watcher, */
+/* but do nothing else with the loop */
+void ev_walk (EV_P_ int types, void (*cb)(EV_P_ int type, void *w));
+#endif
+
+#endif /* prototypes */
+
+#define EVLOOP_NONBLOCK 1 /* do not block/wait */
+#define EVLOOP_ONESHOT 2 /* block *once* only */
+#define EVUNLOOP_CANCEL 0 /* undo unloop */
+#define EVUNLOOP_ONE 1 /* unloop once */
+#define EVUNLOOP_ALL 2 /* unloop all loops */
+
+#if EV_PROTOTYPES
+void ev_loop (EV_P_ int flags);
+void ev_unloop (EV_P_ int how); /* set to 1 to break out of event loop, set to 2 to break out of all event loops */
+
+/*
+ * ref/unref can be used to add or remove a refcount on the mainloop. every watcher
+ * keeps one reference. if you have a long-running watcher you never unregister that
+ * should not keep ev_loop from running, unref() after starting, and ref() before stopping.
+ */
+void ev_ref (EV_P);
+void ev_unref (EV_P);
+
+/*
+ * convenience function, wait for a single event, without registering an event watcher
+ * if timeout is < 0, do wait indefinitely
+ */
+void ev_once (EV_P_ int fd, int events, ev_tstamp timeout, void (*cb)(int revents, void *arg), void *arg);
+
+# if EV_MINIMAL < 2
+unsigned int ev_loop_count (EV_P); /* number of loop iterations */
+unsigned int ev_loop_depth (EV_P); /* #ev_loop enters - #ev_loop leaves */
+void ev_loop_verify (EV_P); /* abort if loop data corrupted */
+
+void ev_set_io_collect_interval (EV_P_ ev_tstamp interval); /* sleep at least this time, default 0 */
+void ev_set_timeout_collect_interval (EV_P_ ev_tstamp interval); /* sleep at least this time, default 0 */
+
+/* advanced stuff for threading etc. support, see docs */
+void ev_set_userdata (EV_P_ void *data);
+void *ev_userdata (EV_P);
+void ev_set_invoke_pending_cb (EV_P_ void (*invoke_pending_cb)(EV_P));
+void ev_set_loop_release_cb (EV_P_ void (*release)(EV_P), void (*acquire)(EV_P));
+
+unsigned int ev_pending_count (EV_P); /* number of pending events, if any */
+void ev_invoke_pending (EV_P); /* invoke all pending watchers */
+
+/*
+ * stop/start the timer handling.
+ */
+void ev_suspend (EV_P);
+void ev_resume (EV_P);
+#endif
+
+#endif
+
+/* these may evaluate ev multiple times, and the other arguments at most once */
+/* either use ev_init + ev_TYPE_set, or the ev_TYPE_init macro, below, to first initialise a watcher */
+#define ev_init(ev,cb_) do { \
+ ((ev_watcher *)(void *)(ev))->active = \
+ ((ev_watcher *)(void *)(ev))->pending = 0; \
+ ev_set_priority ((ev), 0); \
+ ev_set_cb ((ev), cb_); \
+} while (0)
+
+#define ev_io_set(ev,fd_,events_) do { (ev)->fd = (fd_); (ev)->events = (events_) | EV__IOFDSET; } while (0)
+#define ev_timer_set(ev,after_,repeat_) do { ((ev_watcher_time *)(ev))->at = (after_); (ev)->repeat = (repeat_); } while (0)
+#define ev_periodic_set(ev,ofs_,ival_,rcb_) do { (ev)->offset = (ofs_); (ev)->interval = (ival_); (ev)->reschedule_cb = (rcb_); } while (0)
+#define ev_signal_set(ev,signum_) do { (ev)->signum = (signum_); } while (0)
+#define ev_child_set(ev,pid_,trace_) do { (ev)->pid = (pid_); (ev)->flags = !!(trace_); } while (0)
+#define ev_stat_set(ev,path_,interval_) do { (ev)->path = (path_); (ev)->interval = (interval_); (ev)->wd = -2; } while (0)
+#define ev_idle_set(ev) /* nop, yes, this is a serious in-joke */
+#define ev_prepare_set(ev) /* nop, yes, this is a serious in-joke */
+#define ev_check_set(ev) /* nop, yes, this is a serious in-joke */
+#define ev_embed_set(ev,other_) do { (ev)->other = (other_); } while (0)
+#define ev_fork_set(ev) /* nop, yes, this is a serious in-joke */
+#define ev_async_set(ev) do { (ev)->sent = 0; } while (0)
+
+#define ev_io_init(ev,cb,fd,events) do { ev_init ((ev), (cb)); ev_io_set ((ev),(fd),(events)); } while (0)
+#define ev_timer_init(ev,cb,after,repeat) do { ev_init ((ev), (cb)); ev_timer_set ((ev),(after),(repeat)); } while (0)
+#define ev_periodic_init(ev,cb,ofs,ival,rcb) do { ev_init ((ev), (cb)); ev_periodic_set ((ev),(ofs),(ival),(rcb)); } while (0)
+#define ev_signal_init(ev,cb,signum) do { ev_init ((ev), (cb)); ev_signal_set ((ev), (signum)); } while (0)
+#define ev_child_init(ev,cb,pid,trace) do { ev_init ((ev), (cb)); ev_child_set ((ev),(pid),(trace)); } while (0)
+#define ev_stat_init(ev,cb,path,interval) do { ev_init ((ev), (cb)); ev_stat_set ((ev),(path),(interval)); } while (0)
+#define ev_idle_init(ev,cb) do { ev_init ((ev), (cb)); ev_idle_set ((ev)); } while (0)
+#define ev_prepare_init(ev,cb) do { ev_init ((ev), (cb)); ev_prepare_set ((ev)); } while (0)
+#define ev_check_init(ev,cb) do { ev_init ((ev), (cb)); ev_check_set ((ev)); } while (0)
+#define ev_embed_init(ev,cb,other) do { ev_init ((ev), (cb)); ev_embed_set ((ev),(other)); } while (0)
+#define ev_fork_init(ev,cb) do { ev_init ((ev), (cb)); ev_fork_set ((ev)); } while (0)
+#define ev_async_init(ev,cb) do { ev_init ((ev), (cb)); ev_async_set ((ev)); } while (0)
+
+#define ev_is_pending(ev) (0 + ((ev_watcher *)(void *)(ev))->pending) /* ro, true when watcher is waiting for callback invocation */
+#define ev_is_active(ev) (0 + ((ev_watcher *)(void *)(ev))->active) /* ro, true when the watcher has been started */
+
+#define ev_cb(ev) (ev)->cb /* rw */
+
+#if EV_MINPRI == EV_MAXPRI
+# define ev_priority(ev) ((ev), EV_MINPRI)
+# define ev_set_priority(ev,pri) ((ev), (pri))
+#else
+# define ev_priority(ev) (+(((ev_watcher *)(void *)(ev))->priority))
+# define ev_set_priority(ev,pri) ( (ev_watcher *)(void *)(ev))->priority = (pri)
+#endif
+
+#define ev_periodic_at(ev) (+((ev_watcher_time *)(ev))->at)
+
+#ifndef ev_set_cb
+# define ev_set_cb(ev,cb_) ev_cb (ev) = (cb_)
+#endif
+
+/* stopping (enabling, adding) a watcher does nothing if it is already running */
+/* stopping (disabling, deleting) a watcher does nothing unless its already running */
+#if EV_PROTOTYPES
+
+/* feeds an event into a watcher as if the event actually occured */
+/* accepts any ev_watcher type */
+void ev_feed_event (EV_P_ void *w, int revents);
+void ev_feed_fd_event (EV_P_ int fd, int revents);
+void ev_feed_signal_event (EV_P_ int signum);
+void ev_invoke (EV_P_ void *w, int revents);
+int ev_clear_pending (EV_P_ void *w);
+
+void ev_io_start (EV_P_ ev_io *w);
+void ev_io_stop (EV_P_ ev_io *w);
+
+void ev_timer_start (EV_P_ ev_timer *w);
+void ev_timer_stop (EV_P_ ev_timer *w);
+/* stops if active and no repeat, restarts if active and repeating, starts if inactive and repeating */
+void ev_timer_again (EV_P_ ev_timer *w);
+/* return remaining time */
+ev_tstamp ev_timer_remaining (EV_P_ ev_timer *w);
+
+#if EV_PERIODIC_ENABLE
+void ev_periodic_start (EV_P_ ev_periodic *w);
+void ev_periodic_stop (EV_P_ ev_periodic *w);
+void ev_periodic_again (EV_P_ ev_periodic *w);
+#endif
+
+/* only supported in the default loop */
+void ev_signal_start (EV_P_ ev_signal *w);
+void ev_signal_stop (EV_P_ ev_signal *w);
+
+/* only supported in the default loop */
+void ev_child_start (EV_P_ ev_child *w);
+void ev_child_stop (EV_P_ ev_child *w);
+
+# if EV_STAT_ENABLE
+void ev_stat_start (EV_P_ ev_stat *w);
+void ev_stat_stop (EV_P_ ev_stat *w);
+void ev_stat_stat (EV_P_ ev_stat *w);
+# endif
+
+# if EV_IDLE_ENABLE
+void ev_idle_start (EV_P_ ev_idle *w);
+void ev_idle_stop (EV_P_ ev_idle *w);
+# endif
+
+void ev_prepare_start (EV_P_ ev_prepare *w);
+void ev_prepare_stop (EV_P_ ev_prepare *w);
+
+void ev_check_start (EV_P_ ev_check *w);
+void ev_check_stop (EV_P_ ev_check *w);
+
+# if EV_FORK_ENABLE
+void ev_fork_start (EV_P_ ev_fork *w);
+void ev_fork_stop (EV_P_ ev_fork *w);
+# endif
+
+# if EV_EMBED_ENABLE
+/* only supported when loop to be embedded is in fact embeddable */
+void ev_embed_start (EV_P_ ev_embed *w);
+void ev_embed_stop (EV_P_ ev_embed *w);
+void ev_embed_sweep (EV_P_ ev_embed *w);
+# endif
+
+# if EV_ASYNC_ENABLE
+void ev_async_start (EV_P_ ev_async *w);
+void ev_async_stop (EV_P_ ev_async *w);
+void ev_async_send (EV_P_ ev_async *w);
+# endif
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/libev/ev_epoll.c b/libev/ev_epoll.c
new file mode 100644
index 0000000..f7e3d60
--- /dev/null
+++ b/libev/ev_epoll.c
@@ -0,0 +1,228 @@
+/*
+ * libev epoll fd activity backend
+ *
+ * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libev@schmorp.de>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+/*
+ * general notes about epoll:
+ *
+ * a) epoll silently removes fds from the fd set. as nothing tells us
+ * that an fd has been removed otherwise, we have to continually
+ * "rearm" fds that we suspect *might* have changed (same
+ * problem with kqueue, but much less costly there).
+ * b) the fact that ADD != MOD creates a lot of extra syscalls due to a)
+ * and seems not to have any advantage.
+ * c) the inability to handle fork or file descriptors (think dup)
+ * limits the applicability over poll, so this is not a generic
+ * poll replacement.
+ *
+ * lots of "weird code" and complication handling in this file is due
+ * to these design problems with epoll, as we try very hard to avoid
+ * epoll_ctl syscalls for common usage patterns and handle the breakage
+ * ensuing from receiving events for closed and otherwise long gone
+ * file descriptors.
+ */
+
+#include <sys/epoll.h>
+
+static void
+epoll_modify (EV_P_ int fd, int oev, int nev)
+{
+ struct epoll_event ev;
+ unsigned char oldmask;
+
+ /*
+ * we handle EPOLL_CTL_DEL by ignoring it here
+ * on the assumption that the fd is gone anyways
+ * if that is wrong, we have to handle the spurious
+ * event in epoll_poll.
+ * if the fd is added again, we try to ADD it, and, if that
+ * fails, we assume it still has the same eventmask.
+ */
+ if (!nev)
+ return;
+
+ oldmask = anfds [fd].emask;
+ anfds [fd].emask = nev;
+
+ /* store the generation counter in the upper 32 bits, the fd in the lower 32 bits */
+ ev.data.u64 = (uint64_t)(uint32_t)fd
+ | ((uint64_t)(uint32_t)++anfds [fd].egen << 32);
+ ev.events = (nev & EV_READ ? EPOLLIN : 0)
+ | (nev & EV_WRITE ? EPOLLOUT : 0);
+
+ if (expect_true (!epoll_ctl (backend_fd, oev ? EPOLL_CTL_MOD : EPOLL_CTL_ADD, fd, &ev)))
+ return;
+
+ if (expect_true (errno == ENOENT))
+ {
+ /* if ENOENT then the fd went away, so try to do the right thing */
+ if (!nev)
+ goto dec_egen;
+
+ if (!epoll_ctl (backend_fd, EPOLL_CTL_ADD, fd, &ev))
+ return;
+ }
+ else if (expect_true (errno == EEXIST))
+ {
+ /* EEXIST means we ignored a previous DEL, but the fd is still active */
+ /* if the kernel mask is the same as the new mask, we assume it hasn't changed */
+ if (oldmask == nev)
+ goto dec_egen;
+
+ if (!epoll_ctl (backend_fd, EPOLL_CTL_MOD, fd, &ev))
+ return;
+ }
+
+ fd_kill (EV_A_ fd);
+
+dec_egen:
+ /* we didn't successfully call epoll_ctl, so decrement the generation counter again */
+ --anfds [fd].egen;
+}
+
+static void
+epoll_poll (EV_P_ ev_tstamp timeout)
+{
+ int i;
+ int eventcnt;
+
+ /* epoll wait times cannot be larger than (LONG_MAX - 999UL) / HZ msecs, which is below */
+ /* the default libev max wait time, however. */
+ EV_RELEASE_CB;
+ eventcnt = epoll_wait (backend_fd, epoll_events, epoll_eventmax, (int)ceil (timeout * 1000.));
+ EV_ACQUIRE_CB;
+
+ if (expect_false (eventcnt < 0))
+ {
+ if (errno != EINTR)
+ ev_syserr ("(libev) epoll_wait");
+
+ return;
+ }
+
+ for (i = 0; i < eventcnt; ++i)
+ {
+ struct epoll_event *ev = epoll_events + i;
+
+ int fd = (uint32_t)ev->data.u64; /* mask out the lower 32 bits */
+ int want = anfds [fd].events;
+ int got = (ev->events & (EPOLLOUT | EPOLLERR | EPOLLHUP) ? EV_WRITE : 0)
+ | (ev->events & (EPOLLIN | EPOLLERR | EPOLLHUP) ? EV_READ : 0);
+
+ /* check for spurious notification */
+ if (expect_false ((uint32_t)anfds [fd].egen != (uint32_t)(ev->data.u64 >> 32)))
+ {
+ /* recreate kernel state */
+ postfork = 1;
+ continue;
+ }
+
+ if (expect_false (got & ~want))
+ {
+ anfds [fd].emask = want;
+
+ /* we received an event but are not interested in it, try mod or del */
+ /* I don't think we ever need MOD, but let's handle it anyways */
+ ev->events = (want & EV_READ ? EPOLLIN : 0)
+ | (want & EV_WRITE ? EPOLLOUT : 0);
+
+ /* pre-2.6.9 kernels require a non-null pointer with EPOLL_CTL_DEL, */
+ /* which is fortunately easy to do for us. */
+ if (epoll_ctl (backend_fd, want ? EPOLL_CTL_MOD : EPOLL_CTL_DEL, fd, ev))
+ {
+ postfork = 1; /* an error occured, recreate kernel state */
+ continue;
+ }
+ }
+
+ fd_event (EV_A_ fd, got);
+ }
+
+ /* if the receive array was full, increase its size */
+ if (expect_false (eventcnt == epoll_eventmax))
+ {
+ ev_free (epoll_events);
+ epoll_eventmax = array_nextsize (sizeof (struct epoll_event), epoll_eventmax, epoll_eventmax + 1);
+ epoll_events = (struct epoll_event *)ev_malloc (sizeof (struct epoll_event) * epoll_eventmax);
+ }
+}
+
+int inline_size
+epoll_init (EV_P_ int flags)
+{
+#ifdef EPOLL_CLOEXEC
+ backend_fd = epoll_create1 (EPOLL_CLOEXEC);
+
+ if (backend_fd <= 0)
+#endif
+ backend_fd = epoll_create (256);
+
+ if (backend_fd < 0)
+ return 0;
+
+ fcntl (backend_fd, F_SETFD, FD_CLOEXEC);
+
+ backend_fudge = 0.; /* kernel sources seem to indicate this to be zero */
+ backend_modify = epoll_modify;
+ backend_poll = epoll_poll;
+
+ epoll_eventmax = 64; /* initial number of events receivable per poll */
+ epoll_events = (struct epoll_event *)ev_malloc (sizeof (struct epoll_event) * epoll_eventmax);
+
+ return EVBACKEND_EPOLL;
+}
+
+void inline_size
+epoll_destroy (EV_P)
+{
+ ev_free (epoll_events);
+}
+
+void inline_size
+epoll_fork (EV_P)
+{
+ close (backend_fd);
+
+ while ((backend_fd = epoll_create (256)) < 0)
+ ev_syserr ("(libev) epoll_create");
+
+ fcntl (backend_fd, F_SETFD, FD_CLOEXEC);
+
+ fd_rearm_all (EV_A);
+}
+
diff --git a/libev/ev_kqueue.c b/libev/ev_kqueue.c
new file mode 100644
index 0000000..0fe340b
--- /dev/null
+++ b/libev/ev_kqueue.c
@@ -0,0 +1,196 @@
+/*
+ * libev kqueue backend
+ *
+ * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libev@schmorp.de>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/queue.h>
+#include <sys/event.h>
+#include <string.h>
+#include <errno.h>
+
+void inline_speed
+kqueue_change (EV_P_ int fd, int filter, int flags, int fflags)
+{
+ ++kqueue_changecnt;
+ array_needsize (struct kevent, kqueue_changes, kqueue_changemax, kqueue_changecnt, EMPTY2);
+
+ EV_SET (&kqueue_changes [kqueue_changecnt - 1], fd, filter, flags, fflags, 0, 0);
+}
+
+#ifndef NOTE_EOF
+# define NOTE_EOF 0
+#endif
+
+static void
+kqueue_modify (EV_P_ int fd, int oev, int nev)
+{
+ if (oev != nev)
+ {
+ if (oev & EV_READ)
+ kqueue_change (EV_A_ fd, EVFILT_READ , EV_DELETE, 0);
+
+ if (oev & EV_WRITE)
+ kqueue_change (EV_A_ fd, EVFILT_WRITE, EV_DELETE, 0);
+ }
+
+ /* to detect close/reopen reliably, we have to re-add */
+ /* event requests even when oev == nev */
+
+ if (nev & EV_READ)
+ kqueue_change (EV_A_ fd, EVFILT_READ , EV_ADD, NOTE_EOF);
+
+ if (nev & EV_WRITE)
+ kqueue_change (EV_A_ fd, EVFILT_WRITE, EV_ADD, NOTE_EOF);
+}
+
+static void
+kqueue_poll (EV_P_ ev_tstamp timeout)
+{
+ int res, i;
+ struct timespec ts;
+
+ /* need to resize so there is enough space for errors */
+ if (kqueue_changecnt > kqueue_eventmax)
+ {
+ ev_free (kqueue_events);
+ kqueue_eventmax = array_nextsize (sizeof (struct kevent), kqueue_eventmax, kqueue_changecnt);
+ kqueue_events = (struct kevent *)ev_malloc (sizeof (struct kevent) * kqueue_eventmax);
+ }
+
+ EV_RELEASE_CB;
+ ts.tv_sec = (time_t)timeout;
+ ts.tv_nsec = (long)((timeout - (ev_tstamp)ts.tv_sec) * 1e9);
+ res = kevent (backend_fd, kqueue_changes, kqueue_changecnt, kqueue_events, kqueue_eventmax, &ts);
+ EV_ACQUIRE_CB;
+ kqueue_changecnt = 0;
+
+ if (expect_false (res < 0))
+ {
+ if (errno != EINTR)
+ ev_syserr ("(libev) kevent");
+
+ return;
+ }
+
+ for (i = 0; i < res; ++i)
+ {
+ int fd = kqueue_events [i].ident;
+
+ if (expect_false (kqueue_events [i].flags & EV_ERROR))
+ {
+ int err = kqueue_events [i].data;
+
+ /* we are only interested in errors for fds that we are interested in :) */
+ if (anfds [fd].events)
+ {
+ if (err == ENOENT) /* resubmit changes on ENOENT */
+ kqueue_modify (EV_A_ fd, 0, anfds [fd].events);
+ else if (err == EBADF) /* on EBADF, we re-check the fd */
+ {
+ if (fd_valid (fd))
+ kqueue_modify (EV_A_ fd, 0, anfds [fd].events);
+ else
+ fd_kill (EV_A_ fd);
+ }
+ else /* on all other errors, we error out on the fd */
+ fd_kill (EV_A_ fd);
+ }
+ }
+ else
+ fd_event (
+ EV_A_
+ fd,
+ kqueue_events [i].filter == EVFILT_READ ? EV_READ
+ : kqueue_events [i].filter == EVFILT_WRITE ? EV_WRITE
+ : 0
+ );
+ }
+
+ if (expect_false (res == kqueue_eventmax))
+ {
+ ev_free (kqueue_events);
+ kqueue_eventmax = array_nextsize (sizeof (struct kevent), kqueue_eventmax, kqueue_eventmax + 1);
+ kqueue_events = (struct kevent *)ev_malloc (sizeof (struct kevent) * kqueue_eventmax);
+ }
+}
+
+int inline_size
+kqueue_init (EV_P_ int flags)
+{
+ /* Initalize the kernel queue */
+ if ((backend_fd = kqueue ()) < 0)
+ return 0;
+
+ fcntl (backend_fd, F_SETFD, FD_CLOEXEC); /* not sure if necessary, hopefully doesn't hurt */
+
+ backend_fudge = 0.;
+ backend_modify = kqueue_modify;
+ backend_poll = kqueue_poll;
+
+ kqueue_eventmax = 64; /* initial number of events receivable per poll */
+ kqueue_events = (struct kevent *)ev_malloc (sizeof (struct kevent) * kqueue_eventmax);
+
+ kqueue_changes = 0;
+ kqueue_changemax = 0;
+ kqueue_changecnt = 0;
+
+ return EVBACKEND_KQUEUE;
+}
+
+void inline_size
+kqueue_destroy (EV_P)
+{
+ ev_free (kqueue_events);
+ ev_free (kqueue_changes);
+}
+
+void inline_size
+kqueue_fork (EV_P)
+{
+ close (backend_fd);
+
+ while ((backend_fd = kqueue ()) < 0)
+ ev_syserr ("(libev) kqueue");
+
+ fcntl (backend_fd, F_SETFD, FD_CLOEXEC);
+
+ /* re-register interest in fds */
+ fd_rearm_all (EV_A);
+}
+
diff --git a/libev/ev_poll.c b/libev/ev_poll.c
new file mode 100644
index 0000000..178e458
--- /dev/null
+++ b/libev/ev_poll.c
@@ -0,0 +1,144 @@
+/*
+ * libev poll fd activity backend
+ *
+ * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libev@schmorp.de>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#include <poll.h>
+
+void inline_size
+pollidx_init (int *base, int count)
+{
+ /* consider using memset (.., -1, ...), which is pratically guarenteed
+ * to work on all systems implementing poll */
+ while (count--)
+ *base++ = -1;
+}
+
+static void
+poll_modify (EV_P_ int fd, int oev, int nev)
+{
+ int idx;
+
+ if (oev == nev)
+ return;
+
+ array_needsize (int, pollidxs, pollidxmax, fd + 1, pollidx_init);
+
+ idx = pollidxs [fd];
+
+ if (idx < 0) /* need to allocate a new pollfd */
+ {
+ pollidxs [fd] = idx = pollcnt++;
+ array_needsize (struct pollfd, polls, pollmax, pollcnt, EMPTY2);
+ polls [idx].fd = fd;
+ }
+
+ assert (polls [idx].fd == fd);
+
+ if (nev)
+ polls [idx].events =
+ (nev & EV_READ ? POLLIN : 0)
+ | (nev & EV_WRITE ? POLLOUT : 0);
+ else /* remove pollfd */
+ {
+ pollidxs [fd] = -1;
+
+ if (expect_true (idx < --pollcnt))
+ {
+ polls [idx] = polls [pollcnt];
+ pollidxs [polls [idx].fd] = idx;
+ }
+ }
+}
+
+static void
+poll_poll (EV_P_ ev_tstamp timeout)
+{
+ struct pollfd *p;
+ int res;
+
+ EV_RELEASE_CB;
+ res = poll (polls, pollcnt, (int)ceil (timeout * 1000.));
+ EV_ACQUIRE_CB;
+
+ if (expect_false (res < 0))
+ {
+ if (errno == EBADF)
+ fd_ebadf (EV_A);
+ else if (errno == ENOMEM && !syserr_cb)
+ fd_enomem (EV_A);
+ else if (errno != EINTR)
+ ev_syserr ("(libev) poll");
+ }
+ else
+ for (p = polls; res; ++p)
+ if (expect_false (p->revents)) /* this expect is debatable */
+ {
+ --res;
+
+ if (expect_false (p->revents & POLLNVAL))
+ fd_kill (EV_A_ p->fd);
+ else
+ fd_event (
+ EV_A_
+ p->fd,
+ (p->revents & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0)
+ | (p->revents & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0)
+ );
+ }
+}
+
+int inline_size
+poll_init (EV_P_ int flags)
+{
+ backend_fudge = 0.; /* posix says this is zero */
+ backend_modify = poll_modify;
+ backend_poll = poll_poll;
+
+ pollidxs = 0; pollidxmax = 0;
+ polls = 0; pollmax = 0; pollcnt = 0;
+
+ return EVBACKEND_POLL;
+}
+
+void inline_size
+poll_destroy (EV_P)
+{
+ ev_free (pollidxs);
+ ev_free (polls);
+}
+
diff --git a/libev/ev_port.c b/libev/ev_port.c
new file mode 100644
index 0000000..47da929
--- /dev/null
+++ b/libev/ev_port.c
@@ -0,0 +1,165 @@
+/*
+ * libev solaris event port backend
+ *
+ * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libev@schmorp.de>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <poll.h>
+#include <port.h>
+#include <string.h>
+#include <errno.h>
+
+void inline_speed
+port_associate_and_check (EV_P_ int fd, int ev)
+{
+ if (0 >
+ port_associate (
+ backend_fd, PORT_SOURCE_FD, fd,
+ (ev & EV_READ ? POLLIN : 0)
+ | (ev & EV_WRITE ? POLLOUT : 0),
+ 0
+ )
+ )
+ {
+ if (errno == EBADFD)
+ fd_kill (EV_A_ fd);
+ else
+ ev_syserr ("(libev) port_associate");
+ }
+}
+
+static void
+port_modify (EV_P_ int fd, int oev, int nev)
+{
+ /* we need to reassociate no matter what, as closes are
+ * once more silently being discarded.
+ */
+ if (!nev)
+ {
+ if (oev)
+ port_dissociate (backend_fd, PORT_SOURCE_FD, fd);
+ }
+ else
+ port_associate_and_check (EV_A_ fd, nev);
+}
+
+static void
+port_poll (EV_P_ ev_tstamp timeout)
+{
+ int res, i;
+ struct timespec ts;
+ uint_t nget = 1;
+
+ EV_RELEASE_CB;
+ ts.tv_sec = (time_t)timeout;
+ ts.tv_nsec = (long)(timeout - (ev_tstamp)ts.tv_sec) * 1e9;
+ res = port_getn (backend_fd, port_events, port_eventmax, &nget, &ts);
+ EV_ACQUIRE_CB;
+
+ if (res == -1)
+ {
+ if (errno != EINTR && errno != ETIME)
+ ev_syserr ("(libev) port_getn (see http://bugs.opensolaris.org/view_bug.do?bug_id=6268715, try LIBEV_FLAGS=3 env variable)");
+
+ return;
+ }
+
+ for (i = 0; i < nget; ++i)
+ {
+ if (port_events [i].portev_source == PORT_SOURCE_FD)
+ {
+ int fd = port_events [i].portev_object;
+
+ fd_event (
+ EV_A_
+ fd,
+ (port_events [i].portev_events & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0)
+ | (port_events [i].portev_events & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0)
+ );
+
+ port_associate_and_check (EV_A_ fd, anfds [fd].events);
+ }
+ }
+
+ if (expect_false (nget == port_eventmax))
+ {
+ ev_free (port_events);
+ port_eventmax = array_nextsize (sizeof (port_event_t), port_eventmax, port_eventmax + 1);
+ port_events = (port_event_t *)ev_malloc (sizeof (port_event_t) * port_eventmax);
+ }
+}
+
+int inline_size
+port_init (EV_P_ int flags)
+{
+ /* Initalize the kernel queue */
+ if ((backend_fd = port_create ()) < 0)
+ return 0;
+
+ fcntl (backend_fd, F_SETFD, FD_CLOEXEC); /* not sure if necessary, hopefully doesn't hurt */
+
+ backend_fudge = 1e-3; /* needed to compensate for port_getn returning early */
+ backend_modify = port_modify;
+ backend_poll = port_poll;
+
+ port_eventmax = 64; /* intiial number of events receivable per poll */
+ port_events = (port_event_t *)ev_malloc (sizeof (port_event_t) * port_eventmax);
+
+ return EVBACKEND_PORT;
+}
+
+void inline_size
+port_destroy (EV_P)
+{
+ ev_free (port_events);
+}
+
+void inline_size
+port_fork (EV_P)
+{
+ close (backend_fd);
+
+ while ((backend_fd = port_create ()) < 0)
+ ev_syserr ("(libev) port");
+
+ fcntl (backend_fd, F_SETFD, FD_CLOEXEC);
+
+ /* re-register interest in fds */
+ fd_rearm_all (EV_A);
+}
+
diff --git a/libev/ev_select.c b/libev/ev_select.c
new file mode 100644
index 0000000..818a63e
--- /dev/null
+++ b/libev/ev_select.c
@@ -0,0 +1,308 @@
+/*
+ * libev select fd activity backend
+ *
+ * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libev@schmorp.de>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#ifndef _WIN32
+/* for unix systems */
+# include <sys/select.h>
+# include <inttypes.h>
+#endif
+
+#ifndef EV_SELECT_USE_FD_SET
+# ifdef NFDBITS
+# define EV_SELECT_USE_FD_SET 0
+# else
+# define EV_SELECT_USE_FD_SET 1
+# endif
+#endif
+
+#if EV_SELECT_IS_WINSOCKET
+# undef EV_SELECT_USE_FD_SET
+# define EV_SELECT_USE_FD_SET 1
+# undef NFDBITS
+# define NFDBITS 0
+#endif
+
+#if !EV_SELECT_USE_FD_SET
+# define NFDBYTES (NFDBITS / 8)
+#endif
+
+#include <string.h>
+
+static void
+select_modify (EV_P_ int fd, int oev, int nev)
+{
+ if (oev == nev)
+ return;
+
+ {
+#if EV_SELECT_USE_FD_SET
+
+ #if EV_SELECT_IS_WINSOCKET
+ SOCKET handle = anfds [fd].handle;
+ #else
+ int handle = fd;
+ #endif
+
+ assert (("libev: fd >= FD_SETSIZE passed to fd_set-based select backend", fd < FD_SETSIZE));
+
+ /* FD_SET is broken on windows (it adds the fd to a set twice or more,
+ * which eventually leads to overflows). Need to call it only on changes.
+ */
+ #if EV_SELECT_IS_WINSOCKET
+ if ((oev ^ nev) & EV_READ)
+ #endif
+ if (nev & EV_READ)
+ FD_SET (handle, (fd_set *)vec_ri);
+ else
+ FD_CLR (handle, (fd_set *)vec_ri);
+
+ #if EV_SELECT_IS_WINSOCKET
+ if ((oev ^ nev) & EV_WRITE)
+ #endif
+ if (nev & EV_WRITE)
+ FD_SET (handle, (fd_set *)vec_wi);
+ else
+ FD_CLR (handle, (fd_set *)vec_wi);
+
+#else
+
+ int word = fd / NFDBITS;
+ fd_mask mask = 1UL << (fd % NFDBITS);
+
+ if (expect_false (vec_max <= word))
+ {
+ int new_max = word + 1;
+
+ vec_ri = ev_realloc (vec_ri, new_max * NFDBYTES);
+ vec_ro = ev_realloc (vec_ro, new_max * NFDBYTES); /* could free/malloc */
+ vec_wi = ev_realloc (vec_wi, new_max * NFDBYTES);
+ vec_wo = ev_realloc (vec_wo, new_max * NFDBYTES); /* could free/malloc */
+ #ifdef _WIN32
+ vec_eo = ev_realloc (vec_eo, new_max * NFDBYTES); /* could free/malloc */
+ #endif
+
+ for (; vec_max < new_max; ++vec_max)
+ ((fd_mask *)vec_ri) [vec_max] =
+ ((fd_mask *)vec_wi) [vec_max] = 0;
+ }
+
+ ((fd_mask *)vec_ri) [word] |= mask;
+ if (!(nev & EV_READ))
+ ((fd_mask *)vec_ri) [word] &= ~mask;
+
+ ((fd_mask *)vec_wi) [word] |= mask;
+ if (!(nev & EV_WRITE))
+ ((fd_mask *)vec_wi) [word] &= ~mask;
+#endif
+ }
+}
+
+static void
+select_poll (EV_P_ ev_tstamp timeout)
+{
+ struct timeval tv;
+ int res;
+ int fd_setsize;
+
+ EV_RELEASE_CB;
+ tv.tv_sec = (long)timeout;
+ tv.tv_usec = (long)((timeout - (ev_tstamp)tv.tv_sec) * 1e6);
+
+#if EV_SELECT_USE_FD_SET
+ fd_setsize = sizeof (fd_set);
+#else
+ fd_setsize = vec_max * NFDBYTES;
+#endif
+
+ memcpy (vec_ro, vec_ri, fd_setsize);
+ memcpy (vec_wo, vec_wi, fd_setsize);
+
+#ifdef _WIN32
+ /* pass in the write set as except set.
+ * the idea behind this is to work around a windows bug that causes
+ * errors to be reported as an exception and not by setting
+ * the writable bit. this is so uncontrollably lame.
+ */
+ memcpy (vec_eo, vec_wi, fd_setsize);
+ res = select (vec_max * NFDBITS, (fd_set *)vec_ro, (fd_set *)vec_wo, (fd_set *)vec_eo, &tv);
+#elif EV_SELECT_USE_FD_SET
+ fd_setsize = anfdmax < FD_SETSIZE ? anfdmax : FD_SETSIZE;
+ res = select (fd_setsize, (fd_set *)vec_ro, (fd_set *)vec_wo, 0, &tv);
+#else
+ res = select (vec_max * NFDBITS, (fd_set *)vec_ro, (fd_set *)vec_wo, 0, &tv);
+#endif
+ EV_ACQUIRE_CB;
+
+ if (expect_false (res < 0))
+ {
+ #if EV_SELECT_IS_WINSOCKET
+ errno = WSAGetLastError ();
+ #endif
+ #ifdef WSABASEERR
+ /* on windows, select returns incompatible error codes, fix this */
+ if (errno >= WSABASEERR && errno < WSABASEERR + 1000)
+ if (errno == WSAENOTSOCK)
+ errno = EBADF;
+ else
+ errno -= WSABASEERR;
+ #endif
+
+ #ifdef _WIN32
+ /* select on windows errornously returns EINVAL when no fd sets have been
+ * provided (this is documented). what microsoft doesn't tell you that this bug
+ * exists even when the fd sets _are_ provided, so we have to check for this bug
+ * here and emulate by sleeping manually.
+ * we also get EINVAL when the timeout is invalid, but we ignore this case here
+ * and assume that EINVAL always means: you have to wait manually.
+ */
+ if (errno == EINVAL)
+ {
+ ev_sleep (timeout);
+ return;
+ }
+ #endif
+
+ if (errno == EBADF)
+ fd_ebadf (EV_A);
+ else if (errno == ENOMEM && !syserr_cb)
+ fd_enomem (EV_A);
+ else if (errno != EINTR)
+ ev_syserr ("(libev) select");
+
+ return;
+ }
+
+#if EV_SELECT_USE_FD_SET
+
+ {
+ int fd;
+
+ for (fd = 0; fd < anfdmax; ++fd)
+ if (anfds [fd].events)
+ {
+ int events = 0;
+ #if EV_SELECT_IS_WINSOCKET
+ SOCKET handle = anfds [fd].handle;
+ #else
+ int handle = fd;
+ #endif
+
+ if (FD_ISSET (handle, (fd_set *)vec_ro)) events |= EV_READ;
+ if (FD_ISSET (handle, (fd_set *)vec_wo)) events |= EV_WRITE;
+ #ifdef _WIN32
+ if (FD_ISSET (handle, (fd_set *)vec_eo)) events |= EV_WRITE;
+ #endif
+
+ if (expect_true (events))
+ fd_event (EV_A_ fd, events);
+ }
+ }
+
+#else
+
+ {
+ int word, bit;
+ for (word = vec_max; word--; )
+ {
+ fd_mask word_r = ((fd_mask *)vec_ro) [word];
+ fd_mask word_w = ((fd_mask *)vec_wo) [word];
+ #ifdef _WIN32
+ word_w |= ((fd_mask *)vec_eo) [word];
+ #endif
+
+ if (word_r || word_w)
+ for (bit = NFDBITS; bit--; )
+ {
+ fd_mask mask = 1UL << bit;
+ int events = 0;
+
+ events |= word_r & mask ? EV_READ : 0;
+ events |= word_w & mask ? EV_WRITE : 0;
+
+ if (expect_true (events))
+ fd_event (EV_A_ word * NFDBITS + bit, events);
+ }
+ }
+ }
+
+#endif
+}
+
+int inline_size
+select_init (EV_P_ int flags)
+{
+ backend_fudge = 0.; /* posix says this is zero */
+ backend_modify = select_modify;
+ backend_poll = select_poll;
+
+#if EV_SELECT_USE_FD_SET
+ vec_ri = ev_malloc (sizeof (fd_set)); FD_ZERO ((fd_set *)vec_ri);
+ vec_ro = ev_malloc (sizeof (fd_set));
+ vec_wi = ev_malloc (sizeof (fd_set)); FD_ZERO ((fd_set *)vec_wi);
+ vec_wo = ev_malloc (sizeof (fd_set));
+ #ifdef _WIN32
+ vec_eo = ev_malloc (sizeof (fd_set));
+ #endif
+#else
+ vec_max = 0;
+ vec_ri = 0;
+ vec_ro = 0;
+ vec_wi = 0;
+ vec_wo = 0;
+ #ifdef _WIN32
+ vec_eo = 0;
+ #endif
+#endif
+
+ return EVBACKEND_SELECT;
+}
+
+void inline_size
+select_destroy (EV_P)
+{
+ ev_free (vec_ri);
+ ev_free (vec_ro);
+ ev_free (vec_wi);
+ ev_free (vec_wo);
+ #ifdef _WIN32
+ ev_free (vec_eo);
+ #endif
+}
+
+
diff --git a/libev/ev_vars.h b/libev/ev_vars.h
new file mode 100644
index 0000000..da53ee8
--- /dev/null
+++ b/libev/ev_vars.h
@@ -0,0 +1,187 @@
+/*
+ * loop member variable declarations
+ *
+ * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libev@schmorp.de>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#define VARx(type,name) VAR(name, type name)
+
+VARx(ev_tstamp, now_floor) /* last time we refreshed rt_time */
+VARx(ev_tstamp, mn_now) /* monotonic clock "now" */
+VARx(ev_tstamp, rtmn_diff) /* difference realtime - monotonic time */
+
+VARx(ev_tstamp, io_blocktime)
+VARx(ev_tstamp, timeout_blocktime)
+
+VARx(int, backend)
+VARx(int, activecnt) /* total number of active events ("refcount") */
+VARx(unsigned char, loop_done) /* signal by ev_unloop */
+
+VARx(int, backend_fd)
+VARx(ev_tstamp, backend_fudge) /* assumed typical timer resolution */
+VAR (backend_modify, void (*backend_modify)(EV_P_ int fd, int oev, int nev))
+VAR (backend_poll , void (*backend_poll)(EV_P_ ev_tstamp timeout))
+
+VARx(ANFD *, anfds)
+VARx(int, anfdmax)
+
+VAR (pendings, ANPENDING *pendings [NUMPRI])
+VAR (pendingmax, int pendingmax [NUMPRI])
+VAR (pendingcnt, int pendingcnt [NUMPRI])
+VARx(ev_prepare, pending_w) /* dummy pending watcher */
+
+/* for reverse feeding of events */
+VARx(W *, rfeeds)
+VARx(int, rfeedmax)
+VARx(int, rfeedcnt)
+
+#if EV_USE_EVENTFD || EV_GENWRAP
+VARx(int, evfd)
+#endif
+VAR (evpipe, int evpipe [2])
+VARx(ev_io, pipe_w)
+
+#if !defined(_WIN32) || EV_GENWRAP
+VARx(pid_t, curpid)
+#endif
+
+VARx(char, postfork) /* true if we need to recreate kernel state after fork */
+
+#if EV_USE_SELECT || EV_GENWRAP
+VARx(void *, vec_ri)
+VARx(void *, vec_ro)
+VARx(void *, vec_wi)
+VARx(void *, vec_wo)
+#if defined(_WIN32) || EV_GENWRAP
+VARx(void *, vec_eo)
+#endif
+VARx(int, vec_max)
+#endif
+
+#if EV_USE_POLL || EV_GENWRAP
+VARx(struct pollfd *, polls)
+VARx(int, pollmax)
+VARx(int, pollcnt)
+VARx(int *, pollidxs) /* maps fds into structure indices */
+VARx(int, pollidxmax)
+#endif
+
+#if EV_USE_EPOLL || EV_GENWRAP
+VARx(struct epoll_event *, epoll_events)
+VARx(int, epoll_eventmax)
+#endif
+
+#if EV_USE_KQUEUE || EV_GENWRAP
+VARx(struct kevent *, kqueue_changes)
+VARx(int, kqueue_changemax)
+VARx(int, kqueue_changecnt)
+VARx(struct kevent *, kqueue_events)
+VARx(int, kqueue_eventmax)
+#endif
+
+#if EV_USE_PORT || EV_GENWRAP
+VARx(struct port_event *, port_events)
+VARx(int, port_eventmax)
+#endif
+
+VARx(int *, fdchanges)
+VARx(int, fdchangemax)
+VARx(int, fdchangecnt)
+
+VARx(ANHE *, timers)
+VARx(int, timermax)
+VARx(int, timercnt)
+
+#if EV_PERIODIC_ENABLE || EV_GENWRAP
+VARx(ANHE *, periodics)
+VARx(int, periodicmax)
+VARx(int, periodiccnt)
+#endif
+
+#if EV_IDLE_ENABLE || EV_GENWRAP
+VAR (idles, ev_idle **idles [NUMPRI])
+VAR (idlemax, int idlemax [NUMPRI])
+VAR (idlecnt, int idlecnt [NUMPRI])
+#endif
+VARx(int, idleall) /* total number */
+
+VARx(struct ev_prepare **, prepares)
+VARx(int, preparemax)
+VARx(int, preparecnt)
+
+VARx(struct ev_check **, checks)
+VARx(int, checkmax)
+VARx(int, checkcnt)
+
+#if EV_FORK_ENABLE || EV_GENWRAP
+VARx(struct ev_fork **, forks)
+VARx(int, forkmax)
+VARx(int, forkcnt)
+#endif
+
+#if EV_ASYNC_ENABLE || EV_GENWRAP
+VARx(EV_ATOMIC_T, async_pending)
+VARx(struct ev_async **, asyncs)
+VARx(int, asyncmax)
+VARx(int, asynccnt)
+#endif
+
+#if EV_USE_INOTIFY || EV_GENWRAP
+VARx(int, fs_fd)
+VARx(ev_io, fs_w)
+VARx(char, fs_2625) /* whether we are running in linux 2.6.25 or newer */
+VAR (fs_hash, ANFS fs_hash [EV_INOTIFY_HASHSIZE])
+#endif
+
+VARx(EV_ATOMIC_T, sig_pending)
+#if EV_USE_SIGNALFD || EV_GENWRAP
+VARx(int, sigfd)
+VARx(ev_io, sigfd_w)
+VARx(sigset_t, sigfd_set)
+#endif
+
+#if EV_MINIMAL < 2 || EV_GENWRAP
+VARx(unsigned int, loop_count) /* total number of loop iterations/blocks */
+VARx(unsigned int, loop_depth) /* #ev_loop enters - #ev_loop leaves */
+
+VARx(void *, userdata)
+VAR (release_cb, void (*release_cb)(EV_P))
+VAR (acquire_cb, void (*acquire_cb)(EV_P))
+VAR (invoke_cb , void (*invoke_cb) (EV_P))
+#endif
+
+#undef VARx
+
diff --git a/libev/ev_wrap.h b/libev/ev_wrap.h
new file mode 100644
index 0000000..03b6b87
--- /dev/null
+++ b/libev/ev_wrap.h
@@ -0,0 +1,178 @@
+/* DO NOT EDIT, automatically generated by update_ev_wrap */
+#ifndef EV_WRAP_H
+#define EV_WRAP_H
+#define now_floor ((loop)->now_floor)
+#define mn_now ((loop)->mn_now)
+#define rtmn_diff ((loop)->rtmn_diff)
+#define io_blocktime ((loop)->io_blocktime)
+#define timeout_blocktime ((loop)->timeout_blocktime)
+#define backend ((loop)->backend)
+#define activecnt ((loop)->activecnt)
+#define loop_done ((loop)->loop_done)
+#define backend_fd ((loop)->backend_fd)
+#define backend_fudge ((loop)->backend_fudge)
+#define backend_modify ((loop)->backend_modify)
+#define backend_poll ((loop)->backend_poll)
+#define anfds ((loop)->anfds)
+#define anfdmax ((loop)->anfdmax)
+#define pendings ((loop)->pendings)
+#define pendingmax ((loop)->pendingmax)
+#define pendingcnt ((loop)->pendingcnt)
+#define pending_w ((loop)->pending_w)
+#define rfeeds ((loop)->rfeeds)
+#define rfeedmax ((loop)->rfeedmax)
+#define rfeedcnt ((loop)->rfeedcnt)
+#define evfd ((loop)->evfd)
+#define evpipe ((loop)->evpipe)
+#define pipe_w ((loop)->pipe_w)
+#define curpid ((loop)->curpid)
+#define postfork ((loop)->postfork)
+#define vec_ri ((loop)->vec_ri)
+#define vec_ro ((loop)->vec_ro)
+#define vec_wi ((loop)->vec_wi)
+#define vec_wo ((loop)->vec_wo)
+#define vec_eo ((loop)->vec_eo)
+#define vec_max ((loop)->vec_max)
+#define polls ((loop)->polls)
+#define pollmax ((loop)->pollmax)
+#define pollcnt ((loop)->pollcnt)
+#define pollidxs ((loop)->pollidxs)
+#define pollidxmax ((loop)->pollidxmax)
+#define epoll_events ((loop)->epoll_events)
+#define epoll_eventmax ((loop)->epoll_eventmax)
+#define kqueue_changes ((loop)->kqueue_changes)
+#define kqueue_changemax ((loop)->kqueue_changemax)
+#define kqueue_changecnt ((loop)->kqueue_changecnt)
+#define kqueue_events ((loop)->kqueue_events)
+#define kqueue_eventmax ((loop)->kqueue_eventmax)
+#define port_events ((loop)->port_events)
+#define port_eventmax ((loop)->port_eventmax)
+#define fdchanges ((loop)->fdchanges)
+#define fdchangemax ((loop)->fdchangemax)
+#define fdchangecnt ((loop)->fdchangecnt)
+#define timers ((loop)->timers)
+#define timermax ((loop)->timermax)
+#define timercnt ((loop)->timercnt)
+#define periodics ((loop)->periodics)
+#define periodicmax ((loop)->periodicmax)
+#define periodiccnt ((loop)->periodiccnt)
+#define idles ((loop)->idles)
+#define idlemax ((loop)->idlemax)
+#define idlecnt ((loop)->idlecnt)
+#define idleall ((loop)->idleall)
+#define prepares ((loop)->prepares)
+#define preparemax ((loop)->preparemax)
+#define preparecnt ((loop)->preparecnt)
+#define checks ((loop)->checks)
+#define checkmax ((loop)->checkmax)
+#define checkcnt ((loop)->checkcnt)
+#define forks ((loop)->forks)
+#define forkmax ((loop)->forkmax)
+#define forkcnt ((loop)->forkcnt)
+#define async_pending ((loop)->async_pending)
+#define asyncs ((loop)->asyncs)
+#define asyncmax ((loop)->asyncmax)
+#define asynccnt ((loop)->asynccnt)
+#define fs_fd ((loop)->fs_fd)
+#define fs_w ((loop)->fs_w)
+#define fs_2625 ((loop)->fs_2625)
+#define fs_hash ((loop)->fs_hash)
+#define sig_pending ((loop)->sig_pending)
+#define sigfd ((loop)->sigfd)
+#define sigfd_w ((loop)->sigfd_w)
+#define sigfd_set ((loop)->sigfd_set)
+#define loop_count ((loop)->loop_count)
+#define loop_depth ((loop)->loop_depth)
+#define userdata ((loop)->userdata)
+#define release_cb ((loop)->release_cb)
+#define acquire_cb ((loop)->acquire_cb)
+#define invoke_cb ((loop)->invoke_cb)
+#else
+#undef EV_WRAP_H
+#undef now_floor
+#undef mn_now
+#undef rtmn_diff
+#undef io_blocktime
+#undef timeout_blocktime
+#undef backend
+#undef activecnt
+#undef loop_done
+#undef backend_fd
+#undef backend_fudge
+#undef backend_modify
+#undef backend_poll
+#undef anfds
+#undef anfdmax
+#undef pendings
+#undef pendingmax
+#undef pendingcnt
+#undef pending_w
+#undef rfeeds
+#undef rfeedmax
+#undef rfeedcnt
+#undef evfd
+#undef evpipe
+#undef pipe_w
+#undef curpid
+#undef postfork
+#undef vec_ri
+#undef vec_ro
+#undef vec_wi
+#undef vec_wo
+#undef vec_eo
+#undef vec_max
+#undef polls
+#undef pollmax
+#undef pollcnt
+#undef pollidxs
+#undef pollidxmax
+#undef epoll_events
+#undef epoll_eventmax
+#undef kqueue_changes
+#undef kqueue_changemax
+#undef kqueue_changecnt
+#undef kqueue_events
+#undef kqueue_eventmax
+#undef port_events
+#undef port_eventmax
+#undef fdchanges
+#undef fdchangemax
+#undef fdchangecnt
+#undef timers
+#undef timermax
+#undef timercnt
+#undef periodics
+#undef periodicmax
+#undef periodiccnt
+#undef idles
+#undef idlemax
+#undef idlecnt
+#undef idleall
+#undef prepares
+#undef preparemax
+#undef preparecnt
+#undef checks
+#undef checkmax
+#undef checkcnt
+#undef forks
+#undef forkmax
+#undef forkcnt
+#undef async_pending
+#undef asyncs
+#undef asyncmax
+#undef asynccnt
+#undef fs_fd
+#undef fs_w
+#undef fs_2625
+#undef fs_hash
+#undef sig_pending
+#undef sigfd
+#undef sigfd_w
+#undef sigfd_set
+#undef loop_count
+#undef loop_depth
+#undef userdata
+#undef release_cb
+#undef acquire_cb
+#undef invoke_cb
+#endif
diff --git a/man/Makefile b/man/Makefile
new file mode 100644
index 0000000..c3027f9
--- /dev/null
+++ b/man/Makefile
@@ -0,0 +1,7 @@
+MAN5PAGES = opennhrp.conf.5
+MAN8PAGES = opennhrp.8 opennhrpctl.8 opennhrp-script.8
+
+install:
+ $(INSTALLDIR) $(DESTDIR)$(MANDIR)/man5 $(DESTDIR)$(MANDIR)/man8
+ $(INSTALL) $(addprefix $(src)/,$(MAN5PAGES)) $(DESTDIR)$(MANDIR)/man5
+ $(INSTALL) $(addprefix $(src)/,$(MAN8PAGES)) $(DESTDIR)$(MANDIR)/man8
diff --git a/man/opennhrp-script.8 b/man/opennhrp-script.8
new file mode 100644
index 0000000..0af32b1
--- /dev/null
+++ b/man/opennhrp-script.8
@@ -0,0 +1,146 @@
+.TH OPENNHRP-SCRIPT 8 "20 May 2009" "" "OpenNHRP Documentation"
+
+.SH NAME
+opennhrp-script \- NHRP peer configuration script
+
+.SH DESCRIPTION
+NHRP peer configuration script is used invoked by
+.BR opennhrp (8).
+.PP
+This script can be used to establish a direct NBMA peer to peer connection
+after NHRP Resolution Reply has been received, but prior to injecting the
+peer address to kernel neighbor table. This could be to insert firewall rules
+allowing the traffic and/or establishing an IPsec connection (or some other
+secure communication channel). The script is also called when the cached peer
+information expires.
+
+.SH OPERATION
+When
+.B opennhrp
+needs to invoke the peer configuration script, it defines a set of variables
+in the environment and then executes the script with exactly one argument.
+The argument is set to the name of the reason why the script has been invoked.
+The following reasons are currently defined:
+.BR "interface-up" , " peer-register" , " peer-up" , " peer-down" ,
+.BR " nhs-up" , " nhs-down" , " route-up" " and " route-down .
+
+.SH INTERFACE-UP
+Interface has been just discovered, or it is has changed state from down
+to up. This is the place to clean up old routes if needed.
+
+.SH PEER-REGISTER
+A peer registration request has been received. The script is run before the
+internal peer cache is altered and this allows the script to reject
+registration without it deleting old peers. This could be used to check that
+IPsec connection is up or one might encode allowed protocol-addresses in the
+certificate and it could be enforced here. This hook is executed synchronously
+so it should be fast.
+
+.SH PEER-UP
+A peer has been discovered (either by means of static configuration, dynamic
+client registration or resolution reply arrival to initiate shortcut).
+This hook is invoked right after the peer's NBMA address is available. For all
+other than dynamic-map entries the protocol address is available too.
+The information will not be injected to the kernel ARP cache until the script
+has returned zero. If non-zero return value is returned, the peer entry is
+marked as invalid and negative cached for a short period of time.
+
+.SH PEER-DOWN
+A peer connection is about to be cleared. This can happend for dynamic client
+registrations or cached information. Dynamic client registrations are teared
+down when registration holding time expires (and no re-registration has
+occured) or if it explicitely removed using Purge Request. Cached entries are
+removed when holding time expires (and there has been no traffic to trigger
+renewal of the peer address information) or when it is explicitely removed
+with Purge Request.
+
+.SH NHS-UP
+This is called for NHS right after the first succesful Registration Reply
+is received.
+This can be used to update application level configuration about which
+servers to use.
+
+.SH NHS-DOWN
+Informs that the specified NHS is no longer available.
+
+.SH ROUTE-UP
+In reply to resolution request we have received a shortcut route with
+destination off the NBMA subnetwork. The script should insert appropriate
+entry to kernel routing table.
+
+.SH ROUTE-DOWN
+The associated shortcut route information is no longer valid and should be
+removed from kernel routing table.
+
+.SH ENVIRONMENT
+.B NHRP_TYPE
+.RS
+For peer-up and peer-down reasons this can be:
+\fBstatic\fR (configured information),
+\fBdynamic-nhs\fR (configured NHS with only NBMA address known),
+\fBdynamic\fR (client registered) or
+\fBcached\fR (resolved since we had packets going there).
+
+The nhs-up and nhs-down reasons are called for \fBstatic\fR entries with
+register keyword and \fBdynamic-nhs\fR entries.
+
+For peer-register this is always \fBdynamic\fR.
+
+For route-up and route-down reasons this is always defined as \fBroute\fR.
+
+For interface-up reason this is irrelevant, but always defined as
+\fBinterface\fR.
+.RE
+
+.B NHRP_INTERFACE
+.RS
+The network interface to which this event is related to.
+.RE
+
+.B NHRP_GRE_KEY
+.RS
+The GRE key assigned to the related network interface.
+.RE
+
+.B NHRP_DESTADDR
+.RS
+Destination protocol address. E.g. for NBMA GRE tunnels this is the IP address
+assigned to the tunnel interface being used.
+.RE
+
+.B NHRP_DESTPREFIX
+.RS
+Subnet prefix length for destination protocol address.
+.RE
+
+.B NHRP_DESTNBMA
+.RS
+Defined only for \fBpeer-up\fR and \fBpeer-down\fR reasons. This contains the
+NBMA address of the destination. E.g. for NBMA GRE this contains the public IP
+of the peer.
+.RE
+
+.B NHRP_DESTMTU
+.RS
+Defined only for \fBpeer-up\fR reasons. This contains the MTU for NBMA
+address of the destination.
+.RE
+
+.B NHRP_NEXTHOP
+.RS
+Defined only for \fBroute-up\fR and \fBroute-down\fR reasons. This is the
+protocol address of the next hop to be used in routing.
+.RE
+
+.B NHRP_PEER_DOWN_REASON
+.RS
+Defined only for \fBpeer-down\fR reason. This describes why the peer has
+been deleted. Currently it is one of \fBexpired\fR, \fBuser-request\fR or
+\fBlower-down\fR.
+.RE
+
+.SH "SEE ALSO"
+.BR opennhrp (8)
+
+.SH AUTHORS
+Timo Teras <timo.teras@iki.fi>
diff --git a/man/opennhrp.8 b/man/opennhrp.8
new file mode 100644
index 0000000..b83b94b
--- /dev/null
+++ b/man/opennhrp.8
@@ -0,0 +1,119 @@
+.TH OPENNHRP 8 "16 November 2007" "" "OpenNHRP Documentation"
+
+.SH NAME
+opennhrp \- daemon to resolve next hop address in NBMA network
+
+.SH SYNOPSIS
+.BI "opennhrp [" "option" "]..."
+
+.SH DESCRIPTION
+.B opennhrp
+implements the Next Hop Resolution Protocol (NHRP) which is used to
+improve the efficiency of routing computer network traffic over
+Non-Broadcast, Multiple Access (NBMA) Networks.
+.PP
+NHRP provides an ARP-like solution that allows a system to dynamically
+learn the NBMA address of the other systems that are part of that network,
+allowing these systems to directly communicate without requiring traffic
+to use an intermediate hop.
+.PP
+.B opennhrp
+implementation is based on RFC2332, but contains some modifications and
+extensions to be compatible with Cisco NHRP/DMVPN implementation.
+Modifications have been made for authentication extension, Cisco NAT
+address extension and shortcut switching enhancements support.
+
+.SH OPTIONS
+The following options are recognized:
+
+.IP "\fB\-a \fIadmin\-socket"
+Specify management interface socket as
+.IR admin\-socket .
+The default is
+.IR /var/run/opennhrp.socket .
+
+.IP "\fB\-c \fIconfig\-file"
+Use
+.I config\-file
+instead of
+.I /etc/opennhrp/opennhrp.conf
+for configuration.
+
+.IP "\fB\-s \fIscript\-file"
+Execute
+.I script\-file
+instead of
+.I /etc/opennhrp/opennhrp\-script
+on important events.
+
+.IP "\fB\-p \fIpid\-file"
+Store process id in
+.I pid\-file
+instead of
+.IR /var/run/opennhrp.pid .
+This file is also used to detect if opennhrp daemon is already running.
+Pid-file is not created unless
+.B -d
+is specified too.
+
+.IP "\fB-d"
+Run in daemon mode, forking to background after initialization.
+
+.IP "\fB-v"
+Verbose. Print more log messages.
+
+.IP "\fB-V"
+Print version and exit.
+
+.SH SIGNALS
+.IP \fBSIGHUP
+Forget all cached information about other system addresses.
+.IP \fBSIGUSR1
+Dump NHRP peer database to system log.
+
+.SH FILES
+.I /etc/opennhrp/opennhrp.conf
+.RS
+The system wide configuration file. See
+.BR opennhrp.conf (5)
+for further details.
+.RE
+
+.I /etc/opennhrp/opennhrp\-script
+.RS
+Script executed by
+.B opennhrp
+on important events. See
+.BR opennhrp\-script (8)
+for more information how the script is executed.
+.RE
+
+.I /var/run/opennhrp.socket
+.RS
+.BR opennhrp "(8) control socket"
+.RE
+
+.SH BUGS
+Currently only IPv4 over IPv4 networks using NBMA GRE tunnels is
+supported (you need Linux kernel 2.6.24-rc2 or later).
+.PP
+Replying with cached information to non-authorative resolution
+requests is not implemented.
+.PP
+Please send bug reports to OpenNHRP issue tracker in SourceForge.
+
+.SH "SEE ALSO"
+.BR opennhrp.conf (5),
+.BR opennhrpctl (8),
+.BR opennhrp\-script (8)
+.br
+http://sourceforge.net/projects/opennhrp
+.PP
+For more information about the protocol see:
+.br
+RFC2332 NBMA Next Hop Resolution Protocol (NHRP)
+.br
+RFC2333 NHRP Protocol Applicability Statement
+
+.SH AUTHORS
+Timo Teras <timo.teras@iki.fi>
diff --git a/man/opennhrp.conf.5 b/man/opennhrp.conf.5
new file mode 100644
index 0000000..aacec80
--- /dev/null
+++ b/man/opennhrp.conf.5
@@ -0,0 +1,227 @@
+.TH OPENNHRP.CONF 5 "27 Oct 2010" "" "OpenNHRP Documentation"
+
+.SH NAME
+opennhrp.conf \- NHRP daemon configuration file
+
+.SH DESCRIPTION
+The
+.I opennhrp.conf
+file contains information for the
+.BR opennhrp .
+.PP
+This configuration file is a free-form ASCII text file. It is parsed by the
+word-by-word parser built into
+.BR opennhrp .
+The file may contain extra whitespace, tabs and newline for formatting
+purposes. Keywords and contents are case-sensitive. Comments can be marked
+with a hash sign
+.RB ( # )
+and everything following it until newline is ignored.
+
+.SH "DIRECTIVES"
+Directives are keywords that can appear in any context of the configuration
+file and they select a new context.
+
+.PP
+.BI "interface " interface-name
+.RS
+Marks the start of configuration for network interface
+.IR interface-name .
+Even if no interface specific configuration is required, the
+.B interface
+directive must be present to enable NHRP on that interface.
+.RE
+
+.SH "INTERFACE CONTEXT"
+These configuration keywords can appear only in the interface context.
+
+.PP
+.BI "map " protocol-address[/prefix] " " nbma-address " [register] [cisco]"
+.RS
+Creates static peer mapping of
+.I protocol-address
+to
+.IR nbma-address .
+.PP
+If the
+.I prefix
+parameter is present, it directs
+.B opennhrp
+to use this peer as a next hop server when sending Resolution Requests
+matching this subnet.
+.PP
+The optional parameter
+.I register
+specifies that Registration Request should be sent to this peer on
+startup.
+.PP
+If the statically mapped peer is running Cisco IOS, specify the
+.B cisco
+keyword. It is used to fix statically the Registration Request ID
+so that a matching Purge Request can be sent if NBMA address has changed.
+This is to work around broken IOS which requires Purge Request ID to
+match the original Registration Request ID.
+.RE
+
+.BI "dynamic-map " protocol-address/prefix " " nbma-domain-name
+.RS
+Specifies that the NBMA addresses of the next hop servers are defined in the
+domain name
+.IR nbma-domain-name .
+For each A record opennhrp creates a dynamic NHS entry.
+
+Each dynamic NHS will get a peer entry with the configured network address
+and the discovered NBMA address.
+
+The first registration request is sent to the protocol broadcast address,
+and the server's real protocol address is dynamically detected from the first
+registration reply (requires opennhrp 0.11 or newer).
+
+Alternatively, if
+.BR peer-up
+script hook can determine the protocol address from the NBMA address (e.g.
+by doing an additional DNS lookup or by parsing the IPsec certificate) it can
+inform this mapping via
+.BR opennhrpctl "(8) " "update nbma " command.
+.RE
+
+.PP
+.BI "shortcut-target " protocol-address/prefix " [holding-time " holdtime "]"
+.RS
+Defines an off-NBMA network prefix for which the GRE interface will act
+as a gateway. This an alternative to defining local interfaces with
+shortcut-destination flag.
+.RE
+
+.BR multicast " " dynamic "|" nhs
+.br
+.BI "multicast " protocol-address
+.RS
+Determines how opennhrp daemon should soft switch the multicast traffic.
+Currently, multicast traffic is captured by opennhrp daemon using a packet
+socket, and resent back to proper destinations. This means that multicast
+packet sending is CPU intensive.
+
+Specfying
+.B nhs
+makes all multicast packets to be repeated to each statically configured
+next hop.
+.B dynamic
+instructs to forward to all peers which we have a direct connection with.
+Alternatively, you can specify the directive multiple times for each
+.I protocol-address
+the multicast traffic should be sent to.
+
+.B "WARNING:"
+It is very easy to misconfigure multicast repeating if you have multiple
+NHS:es.
+.RE
+
+.BI "holding-time " holdtime
+.RS
+Specifies the holding time for NHRP Registration Requests and
+Resolution Replies sent from this interface or shortcut-target.
+The
+.I holdtime
+is specified in seconds and defaults to two hours.
+.RE
+
+.BI "route-table " routetable
+.RS
+Specifies the kernel routing table to be monitored for outgoing routes
+to this interface. This is required to do routing lookups excluding
+active shortcut routes (for existing shortcut route renewal). The
+default is main table.
+
+If you use
+.B table
+directive in
+.B zebra.conf
+to put Quagga routes in alternate table, this should match with it.
+.RE
+
+.BI "cisco-authentication " secret
+.RS
+Enables Cisco style authentication on NHRP packets. This embeds the
+.I secret
+plaintext password to the outgoing NHRP packets. Incoming NHRP packets
+on this interface are discarded unless the
+.I secret
+password is present. Maximum length of the
+.I secret
+is 8 characters.
+.RE
+
+.B redirect
+.RS
+Enable sending of Cisco style NHRP Traffic Indication packets. If
+this is enabled and
+.B opennhrp
+detects a forwarded packet, it will send a message to the original sender
+of the packet instructing it to create a direct connection with the
+destination. This is basically a protocol independent equivalent of ICMP
+redirect.
+.RE
+
+.B shortcut
+.RS
+Enable creation of shortcut routes. A received NHRP Traffic Indication
+will trigger the resolution and establishment of a shortcut route.
+.PP
+.B IMPORTANT:
+You still need to run some routing protocol or have static routes
+to some hub node in your NBMA network. NHRP does not advertise routes;
+it can create shortcut route only for an already routable subnet.
+.RE
+
+.B non-caching
+.RS
+Disables caching of peer information from forwarded NHRP Resolution
+Reply packets. This can be used to reduce memory consumption on big
+NBMA subnets.
+.PP
+NOTE: currently does not do much as caching is not implemented.
+.RE
+
+.B shortcut-destination
+.RS
+This instructs
+.B opennhrp
+to reply with authorative answers on NHRP Resolution Requests destinied
+to addresses in this interface (instead of forwarding the packets). This
+effectively allows the creation of shortcut routes to subnets located
+on the interface.
+.PP
+When specified, this should be the only keyword for the interface.
+.RE
+
+.SH EXAMPLE
+The following configuration file was used for testing OpenNHRP on a machine
+with two ethernet network interfaces. GRE tunnel was configured with tunnel
+IP 10.255.255.2/24. Configuration enables registration to hub node at
+10.255.255.1 and resolution of other nodes in the subnet using that hub.
+.PP
+It also enables creation of shortcut routes to networks behind other
+hosts (with holding-time override for the defined shortcut-target)
+in our NBMA network and allows incoming shortcut routes.
+.PP
+.nf
+interface gre1
+ holding-time 3600
+ map 10.255.255.1/24 192.168.200.1 register
+ shortcut-target 172.16.0.0/16 holding-time 1800
+ cisco-authentication secret
+ shortcut
+ redirect
+ non-caching
+
+interface eth1
+ shortcut-destination
+
+.fi
+
+.SH "SEE ALSO"
+.BR opennhrp (8)
+
+.SH AUTHORS
+Timo Teras <timo.teras@iki.fi>
diff --git a/man/opennhrpctl.8 b/man/opennhrpctl.8
new file mode 100644
index 0000000..611c6f7
--- /dev/null
+++ b/man/opennhrpctl.8
@@ -0,0 +1,124 @@
+.TH OPENNHRP 8 "20 May 2009" "" "OpenNHRP Documentation"
+
+.SH NAME
+opennhrpctl \- opennhrp administrative control tool
+
+.SH SYNOPSIS
+.B opennhrpctl
+.BI "[\-a " admin\-socket "]" " command " "[" "arguments" "]..."
+
+.SH DESCRIPTION
+.B opennhrpctl
+is an utility to control
+.BR opennhrp (8)
+daemon operation. A UNIX socket is used for communication between
+.B opennhrpctl
+and
+.BR opennhrp (8).
+Administration priviledges for a non-root user can be granted by modifying
+the permissions and ownership of the socket.
+
+The following commands are available:
+
+.BI "[cache] show [" selector "]..."
+.RS
+Show contents of next hop cache (configured and resolved entries).
+.RE
+
+.BI "[cache] flush [" selector "]..."
+.RS
+Clear all non-permanent entries which match the selector specifiers.
+.RE
+
+.BI "[cache] purge [" selector "]..."
+.RS
+Purge entries from NHRP cache: cached entries are removed and permanent
+entries are forced down, up and finally reregistered.
+.RE
+
+.BI "[cache] lowerdown [" selector "]..."
+.RS
+Purge entries from NHRP cache with indication that lower layer failed:
+e.g. IPsec daemon detected dead-peer or received INITIIAL-CONTACT
+notification.
+.RE
+
+.BI "route show [" selector "]..."
+.RS
+Show the contents of locally cached kernel routing information
+(outbound routing base to do route lookups excluding active shortcut
+routes).
+.RE
+
+.B "interface show"
+.RS
+Show the contents of interface configuration table, and the cached information
+from kernel (like protocol and NBMA IP addresses in use currently).
+.RE
+
+.BI "redirect purge [" protocol-address "/" prefix-length "]"
+.RS
+Clear redirection cache from all entries matching the specified address.
+.RE
+
+.BI "update nbma " nbma-address " " protocol-address
+.RS
+This command can be used from
+.BR opennhrp-script "(8)"
+to inform
+.BR opennhrp
+daemon of the real
+.IR protocol-address
+of dynamically discovered NHS.
+.RE
+
+The following selectors can be used to limit which cache entries will
+be effected:
+
+.BI nbma " nbma-address"
+.RS
+Matches entries where the remote has NBMA address
+.IR nbma-address .
+.RE
+
+.BI protocol " protocol-address" "[/" "prefix-length" "]"
+.RS
+Matches entries where the remote has protocol address
+.IR protocol-address " with at least prefix length " prefix-length .
+.RE
+
+.BI local-nbma " nbma-address"
+.RS
+Matches entries from local interface which owns the NBMA address
+.IR nbma-address .
+.RE
+
+.BI local-protocol " protocol-address"
+.RS
+Matches entries only from local interface which owns the protocol address
+.IR protocol-address .
+.RE
+
+.BI interface " interface-name"
+.br
+.BI iface " interface-name"
+.br
+.BI dev " interface-name"
+.RS
+Search entries only from local interface with name
+.IR interface-name .
+.RE
+
+.RE
+
+.SH FILES
+.I /var/run/opennhrp.socket
+.RS
+.BR opennhrp "(8) control socket"
+.RE
+
+.SH "SEE ALSO"
+.BR opennhrp (8)
+
+.SH AUTHORS
+Timo Teras <timo.teras@iki.fi>
diff --git a/nhrp/Makefile b/nhrp/Makefile
new file mode 100644
index 0000000..7c2560e
--- /dev/null
+++ b/nhrp/Makefile
@@ -0,0 +1,27 @@
+progs-y += opennhrp
+opennhrp-objs += libev.o opennhrp.o nhrp_address.o nhrp_packet.o \
+ nhrp_peer.o nhrp_server.o nhrp_interface.o admin.o \
+ sysdep_netlink.o sysdep_pfpacket.o \
+ sysdep_syslog.o
+
+CFLAGS_libev.o += -Wno-unused -Wno-comment -Wno-parentheses
+CFLAGS_opennhrp.o += -DOPENNHRP_VERSION=\"$(FULL_VERSION)\" \
+ -DOPENNHRP_ADMIN_SOCKET=\"$(STATEDIR)/opennhrp.socket\"
+LIBS_opennhrp += -lm
+ifeq ($(shell pkg-config --exists libcares && echo "yes"),yes)
+CFLAGS +=$(shell pkg-config --cflags libcares)
+LIBS_opennhrp +=$(shell pkg-config --libs libcares)
+else
+LIBS_opennhrp += -lcares
+endif
+
+progs-y += opennhrpctl
+opennhrpctl-objs += opennhrpctl.o
+CFLAGS_opennhrpctl.o += $(CFLAGS_opennhrp.o)
+
+CFLAGS_EXTRA += -I$(srctree)/include -Wno-strict-aliasing
+
+install:
+ $(INSTALLDIR) $(DESTDIR)$(SBINDIR)
+ $(INSTALL) $(addprefix $(obj)/,$(progs-y)) $(DESTDIR)$(SBINDIR)
+
diff --git a/nhrp/admin.c b/nhrp/admin.c
new file mode 100644
index 0000000..68a3e9e
--- /dev/null
+++ b/nhrp/admin.c
@@ -0,0 +1,609 @@
+/* admin.c - OpenNHRP administrative interface implementation
+ *
+ * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <malloc.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <sys/socket.h>
+
+#include "nhrp_common.h"
+#include "nhrp_peer.h"
+#include "nhrp_address.h"
+#include "nhrp_interface.h"
+
+static struct ev_io accept_io;
+
+struct admin_remote {
+ struct ev_timer timeout;
+ struct ev_io io;
+ int num_read;
+ char cmd[512];
+};
+
+static int parse_word(const char **bufptr, size_t len, char *word)
+{
+ const char *buf = *bufptr;
+ int i, pos = 0;
+
+ while (isspace(buf[pos]) && buf[pos] != '\n' && buf[pos])
+ pos++;
+
+ if (buf[pos] == '\n' || buf[pos] == 0)
+ return FALSE;
+
+ for (i = 0; i < len-1 && !isspace(buf[pos+i]); i++)
+ word[i] = buf[pos+i];
+ word[i] = 0;
+
+ *bufptr += i + pos;
+ return TRUE;
+}
+
+
+static void admin_write(void *ctx, const char *format, ...)
+{
+ struct admin_remote *rmt = (struct admin_remote *) ctx;
+ char msg[1024];
+ va_list ap;
+ size_t len;
+
+ va_start(ap, format);
+ len = vsnprintf(msg, sizeof(msg), format, ap);
+ va_end(ap);
+
+ if (write(rmt->io.fd, msg, len) != len) {
+ }
+}
+
+static void admin_free_remote(struct admin_remote *rm)
+{
+ int fd = rm->io.fd;
+
+ ev_io_stop(&rm->io);
+ ev_timer_stop(&rm->timeout);
+ shutdown(fd, SHUT_RDWR);
+ close(fd);
+ free(rm);
+}
+
+static int admin_show_peer(void *ctx, struct nhrp_peer *peer)
+{
+ char buf[512], tmp[32];
+ char *str;
+ size_t len = sizeof(buf);
+ int i = 0, rel;
+
+ if (peer->interface != NULL)
+ i += snprintf(&buf[i], len - i,
+ "Interface: %s\n",
+ peer->interface->name);
+
+ i += snprintf(&buf[i], len - i,
+ "Type: %s\n"
+ "Protocol-Address: %s/%d\n",
+ nhrp_peer_type[peer->type],
+ nhrp_address_format(&peer->protocol_address, sizeof(tmp), tmp),
+ peer->prefix_length);
+
+ if (peer->next_hop_address.type != PF_UNSPEC) {
+ switch (peer->type) {
+ case NHRP_PEER_TYPE_SHORTCUT_ROUTE:
+ case NHRP_PEER_TYPE_LOCAL_ROUTE:
+ str = "Next-hop-Address";
+ break;
+ case NHRP_PEER_TYPE_LOCAL_ADDR:
+ str = "Alias-Address";
+ break;
+ default:
+ str = "NBMA-Address";
+ break;
+ }
+ i += snprintf(&buf[i], len - i, "%s: %s\n",
+ str,
+ nhrp_address_format(&peer->next_hop_address,
+ sizeof(tmp), tmp));
+ }
+ if (peer->nbma_hostname) {
+ i += snprintf(&buf[i], len - i, "Hostname: %s\n",
+ peer->nbma_hostname);
+ }
+ if (peer->next_hop_nat_oa.type != PF_UNSPEC) {
+ i += snprintf(&buf[i], len - i, "NBMA-NAT-OA-Address: %s\n",
+ nhrp_address_format(&peer->next_hop_nat_oa,
+ sizeof(tmp), tmp));
+ }
+ if (peer->flags & (NHRP_PEER_FLAG_USED | NHRP_PEER_FLAG_UNIQUE |
+ NHRP_PEER_FLAG_UP | NHRP_PEER_FLAG_LOWER_UP)) {
+ i += snprintf(&buf[i], len - i, "Flags:");
+ if (peer->flags & NHRP_PEER_FLAG_UNIQUE)
+ i += snprintf(&buf[i], len - i, " unique");
+
+ if (peer->flags & NHRP_PEER_FLAG_USED)
+ i += snprintf(&buf[i], len - i, " used");
+ if (peer->flags & NHRP_PEER_FLAG_UP)
+ i += snprintf(&buf[i], len - i, " up");
+ else if (peer->flags & NHRP_PEER_FLAG_LOWER_UP)
+ i += snprintf(&buf[i], len - i, " lower-up");
+ i += snprintf(&buf[i], len - i, "\n");
+ }
+ if (peer->expire_time) {
+ rel = (int) (peer->expire_time - ev_now());
+ if (rel >= 0) {
+ i += snprintf(&buf[i], len - i, "Expires-In: %d:%02d\n",
+ rel / 60, rel % 60);
+ }
+ }
+
+ admin_write(ctx, "%s\n", buf);
+ return 0;
+}
+
+static void admin_free_selector(struct nhrp_peer_selector *sel)
+{
+ if (sel->hostname != NULL) {
+ free((void *) sel->hostname);
+ sel->hostname = NULL;
+ }
+}
+
+static int admin_parse_selector(void *ctx, const char *cmd,
+ struct nhrp_peer_selector *sel)
+{
+ char keyword[64], tmp[64];
+ struct nhrp_address address;
+ uint8_t prefix_length;
+
+ while (parse_word(&cmd, sizeof(keyword), keyword)) {
+ if (!parse_word(&cmd, sizeof(tmp), tmp)) {
+ admin_write(ctx,
+ "Status: failed\n"
+ "Reason: missing-argument\n"
+ "Near-Keyword: '%s'\n",
+ keyword);
+ return FALSE;
+ }
+
+ if (strcmp(keyword, "interface") == 0 ||
+ strcmp(keyword, "iface") == 0 ||
+ strcmp(keyword, "dev") == 0) {
+ if (sel->interface != NULL)
+ goto err_conflict;
+ sel->interface = nhrp_interface_get_by_name(tmp, FALSE);
+ if (sel->interface == NULL)
+ goto err_noiface;
+ continue;
+ } else if (strcmp(keyword, "host") == 0 ||
+ strcmp(keyword, "hostname") == 0) {
+ if (sel->hostname != NULL)
+ goto err_conflict;
+ sel->hostname = strdup(tmp);
+ continue;
+ }
+
+ if (!nhrp_address_parse(tmp, &address, &prefix_length)) {
+ admin_write(ctx,
+ "Status: failed\n"
+ "Reason: invalid-address\n"
+ "Near-Keyword: '%s'\n",
+ keyword);
+ return FALSE;
+ }
+
+ if (strcmp(keyword, "protocol") == 0) {
+ if (sel->protocol_address.type != AF_UNSPEC)
+ goto err_conflict;
+ sel->protocol_address = address;
+ sel->prefix_length = prefix_length;
+ } else if (strcmp(keyword, "nbma") == 0) {
+ if (sel->next_hop_address.type != AF_UNSPEC)
+ goto err_conflict;
+ sel->type_mask &= ~BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE);
+ sel->next_hop_address = address;
+ } else if (strcmp(keyword, "local-protocol") == 0) {
+ if (sel->interface != NULL)
+ goto err_conflict;
+ sel->interface = nhrp_interface_get_by_protocol(&address);
+ if (sel->interface == NULL)
+ goto err_noiface;
+ } else if (strcmp(keyword, "local-nbma") == 0) {
+ if (sel->interface != NULL)
+ goto err_conflict;
+ sel->interface = nhrp_interface_get_by_nbma(&address);
+ if (sel->interface == NULL)
+ goto err_noiface;
+ } else {
+ admin_write(ctx,
+ "Status: failed\n"
+ "Reason: syntax-error\n"
+ "Near-Keyword: '%s'\n",
+ keyword);
+ return FALSE;
+ }
+ }
+ return TRUE;
+
+err_conflict:
+ admin_write(ctx,
+ "Status: failed\n"
+ "Reason: conflicting-keyword\n"
+ "Near-Keyword: '%s'\n",
+ keyword);
+ goto err;
+err_noiface:
+ admin_write(ctx,
+ "Status: failed\n"
+ "Reason: interface-not-found\n"
+ "Near-Keyword: '%s'\n"
+ "Argument: '%s'\n",
+ keyword, tmp);
+err:
+ admin_free_selector(sel);
+ return FALSE;
+}
+
+static void admin_route_show(void *ctx, const char *cmd)
+{
+ struct nhrp_peer_selector sel;
+
+ memset(&sel, 0, sizeof(sel));
+ sel.type_mask = BIT(NHRP_PEER_TYPE_LOCAL_ROUTE);
+ if (!admin_parse_selector(ctx, cmd, &sel))
+ return;
+
+ admin_write(ctx, "Status: ok\n\n");
+ nhrp_peer_foreach(admin_show_peer, ctx, &sel);
+ admin_free_selector(&sel);
+}
+
+static void admin_cache_show(void *ctx, const char *cmd)
+{
+ struct nhrp_peer_selector sel;
+
+ memset(&sel, 0, sizeof(sel));
+ sel.type_mask = NHRP_PEER_TYPEMASK_ALL &
+ ~BIT(NHRP_PEER_TYPE_LOCAL_ROUTE);
+ if (!admin_parse_selector(ctx, cmd, &sel))
+ return;
+
+ admin_write(ctx, "Status: ok\n\n");
+ nhrp_peer_foreach(admin_show_peer, ctx, &sel);
+ admin_free_selector(&sel);
+}
+
+static void admin_cache_purge(void *ctx, const char *cmd)
+{
+ struct nhrp_peer_selector sel;
+ int count = 0;
+
+ memset(&sel, 0, sizeof(sel));
+ sel.type_mask = NHRP_PEER_TYPEMASK_PURGEABLE;
+ if (!admin_parse_selector(ctx, cmd, &sel))
+ return;
+
+ nhrp_peer_foreach(nhrp_peer_purge_matching, &count, &sel);
+ admin_free_selector(&sel);
+
+ admin_write(ctx,
+ "Status: ok\n"
+ "Entries-Affected: %d\n",
+ count);
+}
+
+static void admin_cache_lower_down(void *ctx, const char *cmd)
+{
+ struct nhrp_peer_selector sel;
+ int count = 0;
+
+ memset(&sel, 0, sizeof(sel));
+ sel.type_mask = NHRP_PEER_TYPEMASK_PURGEABLE;
+ if (!admin_parse_selector(ctx, cmd, &sel))
+ return;
+
+ nhrp_peer_foreach(nhrp_peer_lowerdown_matching, &count, &sel);
+ admin_free_selector(&sel);
+
+ admin_write(ctx,
+ "Status: ok\n"
+ "Entries-Affected: %d\n",
+ count);
+}
+
+static void admin_cache_flush(void *ctx, const char *cmd)
+{
+ struct nhrp_peer_selector sel;
+ int count = 0;
+
+ memset(&sel, 0, sizeof(sel));
+ sel.type_mask = NHRP_PEER_TYPEMASK_REMOVABLE;
+ if (!admin_parse_selector(ctx, cmd, &sel))
+ return;
+
+ nhrp_peer_foreach(nhrp_peer_remove_matching, &count, &sel);
+ admin_free_selector(&sel);
+
+ admin_write(ctx,
+ "Status: ok\n"
+ "Entries-Affected: %d\n",
+ count);
+}
+
+static int admin_show_interface(void *ctx, struct nhrp_interface *iface)
+{
+ char buf[512], tmp[32];
+ size_t len = sizeof(buf);
+ int i = 0;
+
+ i += snprintf(&buf[i], len - i,
+ "Interface: %s\n"
+ "Index: %d\n",
+ iface->name,
+ iface->index);
+
+ if (iface->protocol_address.addr_len != 0) {
+ i += snprintf(&buf[i], len - i,
+ "Protocol-Address: %s/%d\n",
+ nhrp_address_format(&iface->protocol_address, sizeof(tmp), tmp),
+ iface->protocol_address_prefix);
+ }
+
+ if (iface->flags) {
+ i += snprintf(&buf[i], len - i,
+ "Flags:%s%s%s%s%s\n",
+ (iface->flags & NHRP_INTERFACE_FLAG_NON_CACHING) ? " non-caching" : "",
+ (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT) ? " shortcut" : "",
+ (iface->flags & NHRP_INTERFACE_FLAG_REDIRECT) ? " redirect" : "",
+ (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST) ? " shortcut-dest" : "",
+ (iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED) ? " configured" : "");
+ }
+
+ if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED))
+ goto done;
+
+ i += snprintf(&buf[i], len - i,
+ "Holding-Time: %u\n"
+ "Route-Table: %u\n"
+ "GRE-Key: %u\n"
+ "MTU: %u\n",
+ iface->holding_time,
+ iface->route_table,
+ iface->gre_key,
+ iface->mtu);
+
+ if (iface->link_index) {
+ struct nhrp_interface *link;
+
+ i += snprintf(&buf[i], len - i, "Link-Index: %d\n", iface->link_index);
+ link = nhrp_interface_get_by_index(iface->link_index, FALSE);
+ if (link != NULL)
+ i += snprintf(&buf[i], len - i, "Link-Name: %s\n", link->name);
+ }
+
+ if (iface->nbma_address.addr_len != 0) {
+ i += snprintf(&buf[i], len - i,
+ "NBMA-MTU: %u\n"
+ "NBMA-Address: %s\n",
+ iface->nbma_mtu,
+ nhrp_address_format(&iface->nbma_address, sizeof(tmp), tmp));
+ }
+ if (iface->nat_cie.nbma_address.addr_len != 0) {
+ i += snprintf(&buf[i], len - i,
+ "NBMA-NAT-OA: %s\n",
+ nhrp_address_format(&iface->nat_cie.nbma_address, sizeof(tmp), tmp));
+ }
+done:
+ admin_write(ctx, "%s\n", buf);
+ return 0;
+}
+
+static void admin_interface_show(void *ctx, const char *cmd)
+{
+ admin_write(ctx, "Status: ok\n\n");
+ nhrp_interface_foreach(admin_show_interface, ctx);
+}
+
+static void admin_redirect_purge(void *ctx, const char *cmd)
+{
+ char keyword[64];
+ struct nhrp_address addr;
+ uint8_t prefix;
+ int count;
+
+ nhrp_address_set_type(&addr, PF_UNSPEC);
+
+ if (parse_word(&cmd, sizeof(keyword), keyword)) {
+ if (!nhrp_address_parse(keyword, &addr, &prefix)) {
+ admin_write(ctx,
+ "Status: failed\n"
+ "Reason: invalid-address\n"
+ "Near-Keyword: '%s'\n",
+ keyword);
+ return;
+ }
+ }
+
+ count = nhrp_rate_limit_clear(&addr, prefix);
+ admin_write(ctx,
+ "Status: ok\n"
+ "Entries-Affected: %d\n",
+ count);
+}
+
+struct update_nbma {
+ struct nhrp_address addr;
+ int count;
+};
+
+static int update_nbma(void *ctx, struct nhrp_peer *p)
+{
+ struct update_nbma *un = (struct update_nbma *) ctx;
+
+ nhrp_peer_discover_nhs(p, &un->addr);
+ un->count++;
+
+ return 0;
+}
+
+static void admin_update_nbma(void *ctx, const char *cmd)
+{
+ char keyword[64];
+ struct nhrp_peer_selector sel;
+ struct update_nbma un;
+
+ memset(&sel, 0, sizeof(sel));
+ sel.type_mask = BIT(NHRP_PEER_TYPE_DYNAMIC_NHS);
+
+ if (!parse_word(&cmd, sizeof(keyword), keyword))
+ goto err;
+ if (!nhrp_address_parse(keyword, &sel.next_hop_address, NULL))
+ goto err;
+ if (!parse_word(&cmd, sizeof(keyword), keyword))
+ goto err;
+ if (!nhrp_address_parse(keyword, &un.addr, NULL))
+ goto err;
+
+ un.count = 0;
+ nhrp_peer_foreach(update_nbma, &un, &sel);
+
+ admin_write(ctx,
+ "Status: ok\n"
+ "Entries-Affected: %d\n",
+ un.count);
+ return;
+err:
+ admin_write(ctx,
+ "Status: failed\n"
+ "Reason: syntax-error\n"
+ "Near-Keyword: '%s'\n",
+ keyword);
+ return;
+}
+
+static struct {
+ const char *command;
+ void (*handler)(void *ctx, const char *cmd);
+} admin_handler[] = {
+ { "route show", admin_route_show },
+ { "show", admin_cache_show },
+ { "cache show", admin_cache_show },
+ { "flush", admin_cache_flush },
+ { "cache flush", admin_cache_flush },
+ { "purge", admin_cache_purge },
+ { "cache purge", admin_cache_purge },
+ { "cache lowerdown", admin_cache_lower_down },
+ { "interface show", admin_interface_show },
+ { "redirect purge", admin_redirect_purge },
+ { "update nbma", admin_update_nbma },
+};
+
+static void admin_receive_cb(struct ev_io *w, int revents)
+{
+ struct admin_remote *rm = container_of(w, struct admin_remote, io);
+ int fd = rm->io.fd;
+ ssize_t len;
+ int i, cmdlen;
+
+ len = recv(fd, rm->cmd, sizeof(rm->cmd) - rm->num_read, MSG_DONTWAIT);
+ if (len < 0 && errno == EAGAIN)
+ return;
+ if (len <= 0)
+ goto err;
+
+ rm->num_read += len;
+ if (rm->num_read >= sizeof(rm->cmd))
+ goto err;
+
+ if (rm->cmd[rm->num_read-1] != '\n')
+ return;
+ rm->cmd[--rm->num_read] = 0;
+
+ for (i = 0; i < ARRAY_SIZE(admin_handler); i++) {
+ cmdlen = strlen(admin_handler[i].command);
+ if (rm->num_read >= cmdlen &&
+ strncasecmp(rm->cmd, admin_handler[i].command, cmdlen) == 0) {
+ nhrp_debug("Admin: %s", rm->cmd);
+ admin_handler[i].handler(rm, &rm->cmd[cmdlen]);
+ break;
+ }
+ }
+ if (i >= ARRAY_SIZE(admin_handler)) {
+ admin_write(rm,
+ "Status: error\n"
+ "Reason: unrecognized command\n");
+ }
+
+err:
+ admin_free_remote(rm);
+}
+
+static void admin_timeout_cb(struct ev_timer *t, int revents)
+{
+ admin_free_remote(container_of(t, struct admin_remote, timeout));
+}
+
+static void admin_accept_cb(ev_io *w, int revents)
+{
+ struct admin_remote *rm;
+ struct sockaddr_storage from;
+ socklen_t fromlen = sizeof(from);
+ int cnx;
+
+ cnx = accept(w->fd, (struct sockaddr *) &from, &fromlen);
+ if (cnx < 0)
+ return;
+ fcntl(cnx, F_SETFD, FD_CLOEXEC);
+
+ rm = calloc(1, sizeof(struct admin_remote));
+
+ ev_io_init(&rm->io, admin_receive_cb, cnx, EV_READ);
+ ev_io_start(&rm->io);
+ ev_timer_init(&rm->timeout, admin_timeout_cb, 10.0, 0.);
+ ev_timer_start(&rm->timeout);
+}
+
+int admin_init(const char *opennhrp_socket)
+{
+ struct sockaddr_un sun;
+ int fd;
+
+ memset(&sun, 0, sizeof(sun));
+ sun.sun_family = AF_UNIX;
+ strncpy(sun.sun_path, opennhrp_socket, sizeof(sun.sun_path));
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0)
+ return 0;
+
+ fcntl(fd, F_SETFD, FD_CLOEXEC);
+ unlink(opennhrp_socket);
+ if (bind(fd, (struct sockaddr *) &sun, sizeof(sun)) != 0)
+ goto err_close;
+
+ if (listen(fd, 5) != 0)
+ goto err_close;
+
+ ev_io_init(&accept_io, admin_accept_cb, fd, EV_READ);
+ ev_io_start(&accept_io);
+
+ return 1;
+
+err_close:
+ nhrp_error("Failed initialize admin socket [%s]: %s",
+ opennhrp_socket, strerror(errno));
+ close(fd);
+ return 0;
+}
diff --git a/nhrp/afnum.h b/nhrp/afnum.h
new file mode 100644
index 0000000..2dc3d68
--- /dev/null
+++ b/nhrp/afnum.h
@@ -0,0 +1,29 @@
+/* afnum.h - RFC 1700 Address Family Number and
+ * ethernet protocol number definitions
+ *
+ * Copyright (C) 2007 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#ifndef AFNUM_H
+#define AFNUM_H
+
+#include <linux/if_ether.h>
+#include "nhrp_defines.h"
+
+#define AFNUM_RESERVED constant_htons(0)
+#define AFNUM_INET constant_htons(1)
+#define AFNUM_INET6 constant_htons(2)
+
+#define ETH_P_NHRP 0x2001
+
+#define ETHPROTO_IP constant_htons(ETH_P_IP)
+#define ETHPROTO_NHRP constant_htons(ETH_P_NHRP)
+
+#endif
diff --git a/nhrp/libev.c b/nhrp/libev.c
new file mode 100644
index 0000000..c4af3b9
--- /dev/null
+++ b/nhrp/libev.c
@@ -0,0 +1,3 @@
+#include <string.h>
+#include "libev.h"
+#include "../libev/ev.c"
diff --git a/nhrp/libev.h b/nhrp/libev.h
new file mode 100644
index 0000000..f9f5f23
--- /dev/null
+++ b/nhrp/libev.h
@@ -0,0 +1,22 @@
+#define EV_STANDALONE 1
+#define EV_MULTIPLICITY 0
+#define EV_VERIFY 0
+
+#define EV_USE_CLOCK_SYSCALL 1
+#define EV_USE_SELECT 0
+#define EV_USE_POLL 1
+
+#define EV_IDLE_ENABLE 1
+
+/* Unused stuff, disabled for size optimization */
+#define EV_USE_INOTIFY 0
+#define EV_PERIODIC_ENABLE 0
+#define EV_EMBED_ENABLE 0
+#define EV_STAT_ENABLE 0
+#define EV_FORK_ENABLE 0
+#define EV_ASYNC_ENABLE 0
+
+/* Disable the "void *data;" member of watchers to save memory */
+#define EV_COMMON /* empty */
+
+#include "../libev/ev.h"
diff --git a/nhrp/list.h b/nhrp/list.h
new file mode 100644
index 0000000..4387970
--- /dev/null
+++ b/nhrp/list.h
@@ -0,0 +1,184 @@
+/* list.h - Single and double linked list macros
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ *
+ * This is more or less based on the code in the linux kernel. There are
+ * minor differences and this is only a subset of the kernel version.
+ */
+
+#ifndef LIST_H
+#define LIST_H
+
+#ifndef NULL
+#define NULL 0L
+#endif
+
+#ifndef container_of
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+#endif
+
+struct hlist_head {
+ struct hlist_node *first;
+};
+
+struct hlist_node {
+ struct hlist_node *next;
+ struct hlist_node **pprev;
+};
+
+static inline int hlist_empty(const struct hlist_head *h)
+{
+ return !h->first;
+}
+
+static inline int hlist_hashed(const struct hlist_node *n)
+{
+ return n->pprev != NULL;
+}
+
+static inline void hlist_del(struct hlist_node *n)
+{
+ struct hlist_node *next = n->next;
+ struct hlist_node **pprev = n->pprev;
+
+ *pprev = next;
+ if (next)
+ next->pprev = pprev;
+
+ n->next = NULL;
+ n->pprev = NULL;
+}
+
+static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+ struct hlist_node *first = h->first;
+
+ n->next = first;
+ if (first)
+ first->pprev = &n->next;
+ n->pprev = &h->first;
+ h->first = n;
+}
+
+static inline void hlist_add_after(struct hlist_node *n, struct hlist_node *prev)
+{
+ n->next = prev->next;
+ n->pprev = &prev->next;
+ prev->next = n;
+}
+
+static inline struct hlist_node **hlist_tail_ptr(struct hlist_head *h)
+{
+ struct hlist_node *n = h->first;
+ if (n == NULL)
+ return &h->first;
+ while (n->next != NULL)
+ n = n->next;
+ return &n->next;
+}
+
+#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_for_each(pos, head) \
+ for (pos = (head)->first; pos; pos = pos->next)
+
+#define hlist_for_each_safe(pos, n, head) \
+ for (pos = (head)->first; pos && ({ n = pos->next; 1; }); pos = n)
+
+#define hlist_for_each_entry(tpos, pos, head, member) \
+ for (pos = (head)->first; pos && \
+ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = pos->next)
+
+#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \
+ for (pos = (head)->first; \
+ pos && ({ n = pos->next; 1; }) && \
+ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = n)
+
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+#define LIST_INITIALIZER(l) { .next = &l, .prev = &l }
+
+static inline void list_init(struct list_head *list)
+{
+ list->next = list;
+ list->prev = list;
+}
+
+static inline void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ entry->next = NULL;
+ entry->prev = NULL;
+}
+
+static inline int list_hashed(const struct list_head *n)
+{
+ return n->next != n && n->next != NULL;
+}
+
+static inline int list_empty(const struct list_head *n)
+{
+ return !list_hashed(n);
+}
+
+#define list_next(ptr, type, member) \
+ (list_hashed(ptr) ? container_of((ptr)->next,type,member) : NULL)
+
+#define list_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+#endif
diff --git a/nhrp/nhrp_address.c b/nhrp/nhrp_address.c
new file mode 100644
index 0000000..13164e1
--- /dev/null
+++ b/nhrp/nhrp_address.c
@@ -0,0 +1,454 @@
+/* nhrp_address.c - NHRP address conversion functions
+ *
+ * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <netdb.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+#include <linux/ip.h>
+
+#include <ares.h>
+#include <ares_version.h>
+
+#include "afnum.h"
+#include "nhrp_address.h"
+#include "nhrp_packet.h"
+#include "nhrp_common.h"
+
+struct nhrp_resolver {
+ ares_channel channel;
+ struct ev_prepare prepare;
+ struct ev_timer timeout;
+ struct ev_io fds[4];
+};
+
+static struct nhrp_resolver resolver;
+
+static void ares_timeout_cb(struct ev_timer *w, int revents)
+{
+ struct nhrp_resolver *r =
+ container_of(w, struct nhrp_resolver, timeout);
+
+ ares_process(r->channel, NULL, NULL);
+}
+
+static void ares_prepare_cb(struct ev_prepare *w, int revents)
+{
+ struct nhrp_resolver *r =
+ container_of(w, struct nhrp_resolver, prepare);
+ struct timeval *tv, tvbuf;
+
+ tv = ares_timeout(r->channel, NULL, &tvbuf);
+ if (tv != NULL) {
+ r->timeout.repeat = tv->tv_sec + tv->tv_usec * 1e-6;
+ ev_timer_again(&r->timeout);
+ } else {
+ ev_timer_stop(&r->timeout);
+ }
+}
+
+static void ares_io_cb(struct ev_io *w, int revents)
+{
+ ares_socket_t rfd = ARES_SOCKET_BAD, wfd = ARES_SOCKET_BAD;
+
+ if (revents & EV_READ)
+ rfd = w->fd;
+ if (revents & EV_WRITE)
+ wfd = w->fd;
+
+ ares_process_fd(resolver.channel, rfd, wfd);
+}
+
+static void ares_socket_cb(void *data, ares_socket_t fd,
+ int readable, int writable)
+{
+ struct nhrp_resolver *r = (struct nhrp_resolver *) data;
+ int i, fi = -1, events = 0;
+
+ if (readable)
+ events |= EV_READ;
+ if (writable)
+ events |= EV_WRITE;
+
+ for (i = 0; i < ARRAY_SIZE(r->fds); i++) {
+ if (r->fds[i].fd == fd)
+ break;
+ if (fi < 0 && r->fds[i].fd == 0)
+ fi = i;
+ }
+
+ if (events) {
+ if (i >= ARRAY_SIZE(r->fds)) {
+ NHRP_BUG_ON(fi == -1);
+ i = fi;
+ } else {
+ ev_io_stop(&r->fds[fi]);
+ }
+ ev_io_set(&r->fds[i], fd, events);
+ ev_io_start(&r->fds[i]);
+ } else if (i < ARRAY_SIZE(r->fds)) {
+ ev_io_stop(&r->fds[i]);
+ ev_io_set(&r->fds[i], 0, 0);
+ }
+}
+
+static int bitcmp(const uint8_t *a, const uint8_t *b, int len)
+{
+ int bytes, bits, mask, r;
+
+ bytes = len / 8;
+ bits = len % 8;
+
+ if (bytes != 0) {
+ r = memcmp(a, b, bytes);
+ if (r != 0)
+ return r;
+ }
+ if (bits != 0) {
+ mask = (0xff << (8 - bits)) & 0xff;
+ return ((int) (a[bytes] & mask)) - ((int) (b[bytes] & mask));
+ }
+ return 0;
+}
+
+uint16_t nhrp_protocol_from_pf(uint16_t pf)
+{
+ switch (pf) {
+ case PF_INET:
+ return ETHPROTO_IP;
+ }
+ return 0;
+}
+
+uint16_t nhrp_pf_from_protocol(uint16_t protocol)
+{
+ switch (protocol) {
+ case ETHPROTO_IP:
+ return PF_INET;
+ }
+ return PF_UNSPEC;
+}
+
+uint16_t nhrp_afnum_from_pf(uint16_t pf)
+{
+ switch (pf) {
+ case PF_INET:
+ return AFNUM_INET;
+ }
+ return AFNUM_RESERVED;
+}
+
+uint16_t nhrp_pf_from_afnum(uint16_t afnum)
+{
+ switch (afnum) {
+ case AFNUM_INET:
+ return PF_INET;
+ }
+ return PF_UNSPEC;
+}
+
+int nhrp_address_parse(const char *string,
+ struct nhrp_address *addr,
+ uint8_t *prefix_len)
+{
+ uint8_t tmp;
+ int r;
+
+ /* Try IP address format */
+ r = sscanf(string, "%hhd.%hhd.%hhd.%hhd/%hhd",
+ &addr->addr[0], &addr->addr[1],
+ &addr->addr[2], &addr->addr[3],
+ prefix_len ? prefix_len : &tmp);
+ if ((r == 4) || (r == 5 && prefix_len != NULL)) {
+ addr->type = PF_INET;
+ addr->addr_len = 4;
+ addr->subaddr_len = 0;
+ if (r == 4 && prefix_len != NULL)
+ *prefix_len = 32;
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+int nhrp_address_parse_packet(uint16_t protocol, size_t len, uint8_t *packet,
+ struct nhrp_address *src, struct nhrp_address *dst)
+{
+ int pf;
+ struct iphdr *iph;
+
+ pf = nhrp_pf_from_protocol(protocol);
+ switch (protocol) {
+ case ETHPROTO_IP:
+ if (len < sizeof(struct iphdr))
+ return FALSE;
+
+ iph = (struct iphdr *) packet;
+ if (src != NULL)
+ nhrp_address_set(src, pf, 4, (uint8_t *) &iph->saddr);
+ if (dst != NULL)
+ nhrp_address_set(dst, pf, 4, (uint8_t *) &iph->daddr);
+ break;
+ default:
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+#if ARES_VERSION_MAJOR > 1 || ARES_VERSION_MINOR > 4
+static void ares_address_cb(void *arg, int status, int timeouts,
+ struct hostent *he)
+#else
+static void ares_address_cb(void *arg, int status, struct hostent *he)
+#endif
+{
+ struct nhrp_address_query *query =
+ (struct nhrp_address_query *) arg;
+ struct nhrp_address addr[16];
+ int i;
+
+ if (status == ARES_SUCCESS) {
+ for (i = 0; he->h_addr_list[i] != NULL &&
+ i < ARRAY_SIZE(addr); i++)
+ nhrp_address_set(&addr[i], AF_INET, he->h_length,
+ (uint8_t *) he->h_addr_list[i]);
+ } else
+ i = -1;
+
+ NHRP_BUG_ON(query->callback == NULL);
+
+ query->callback(query, i, &addr[0]);
+ query->callback = NULL;
+}
+
+void nhrp_address_resolve(struct nhrp_address_query *query,
+ const char *hostname,
+ nhrp_address_query_callback callback)
+{
+ if (query->callback != NULL) {
+ nhrp_error("Trying to resolve '%s', but previous query "
+ "was not finished yet", hostname);
+ return;
+ }
+
+ query->callback = callback;
+ ares_gethostbyname(resolver.channel, hostname, AF_INET,
+ ares_address_cb, query);
+}
+
+void nhrp_address_resolve_cancel(struct nhrp_address_query *query)
+{
+ /* The kills all active queries; not just the one
+ * given as parameter. But as those will be retried later
+ * anyway, it is not a problem for now. */
+
+ if (query->callback != NULL)
+ ares_cancel(resolver.channel);
+}
+
+void nhrp_address_set_type(struct nhrp_address *addr, uint16_t type)
+{
+ addr->type = type;
+ addr->addr_len = addr->subaddr_len = 0;
+}
+
+int nhrp_address_set(struct nhrp_address *addr, uint16_t type, uint8_t len, uint8_t *bytes)
+{
+ if (len > NHRP_MAX_ADDRESS_LEN)
+ return FALSE;
+
+ addr->type = type;
+ addr->addr_len = len;
+ addr->subaddr_len = 0;
+ if (len != 0)
+ memcpy(addr->addr, bytes, len);
+ return TRUE;
+}
+
+int nhrp_address_set_full(struct nhrp_address *addr, uint16_t type,
+ uint8_t len, uint8_t *bytes,
+ uint8_t sublen, uint8_t *subbytes)
+{
+ if (len + sublen > NHRP_MAX_ADDRESS_LEN)
+ return FALSE;
+
+ addr->type = type;
+ addr->addr_len = len;
+ addr->subaddr_len = 0;
+ if (len != 0)
+ memcpy(addr->addr, bytes, len);
+ if (sublen != 0)
+ memcpy(&addr->addr[len], subbytes, sublen);
+ return TRUE;
+}
+
+int nhrp_address_cmp(const struct nhrp_address *a, const struct nhrp_address *b)
+{
+ if (a->type > b->type)
+ return 1;
+ if (a->type < b->type)
+ return -1;
+ if (a->addr_len > b->addr_len || a->subaddr_len > b->subaddr_len)
+ return 1;
+ if (a->addr_len < b->addr_len || a->subaddr_len < b->subaddr_len)
+ return -1;
+ return memcmp(a->addr, b->addr, a->addr_len + a->subaddr_len);
+}
+
+int nhrp_address_prefix_cmp(const struct nhrp_address *a,
+ const struct nhrp_address *b, int prefix)
+{
+ if (a->type > b->type)
+ return 1;
+ if (a->type < b->type)
+ return -1;
+ if (a->addr_len * 8 < prefix)
+ return 1;
+ if (b->addr_len * 8 < prefix)
+ return 1;
+ return bitcmp(a->addr, b->addr, prefix);
+}
+
+int nhrp_address_is_multicast(const struct nhrp_address *addr)
+{
+ switch (addr->type) {
+ case PF_INET:
+ if ((addr->addr[0] & 0xf0) == 0xe0)
+ return TRUE;
+ break;
+ }
+ return FALSE;
+}
+
+int nhrp_address_is_any_addr(const struct nhrp_address *addr)
+{
+ switch (addr->type) {
+ case PF_UNSPEC:
+ return TRUE;
+ case PF_INET:
+ if (memcmp(addr->addr, "\x00\x00\x00\x00", 4) == 0)
+ return TRUE;
+ break;
+ }
+ return FALSE;
+}
+
+unsigned int nhrp_address_hash(const struct nhrp_address *addr)
+{
+ unsigned int hash = 5381;
+ int i;
+
+ for (i = 0; i < addr->addr_len; i++)
+ hash = hash * 33 + addr->addr[i];
+
+ return hash;
+}
+
+void nhrp_address_set_network(struct nhrp_address *addr, int prefix)
+{
+ int i, bits = 8 * addr->addr_len;
+
+ for (i = prefix; i < bits; i++)
+ addr->addr[i / 8] &= ~(0x80 >> (i % 8));
+}
+
+void nhrp_address_set_broadcast(struct nhrp_address *addr, int prefix)
+{
+ int i, bits = 8 * addr->addr_len;
+
+ for (i = prefix; i < bits; i++)
+ addr->addr[i / 8] |= 0x80 >> (i % 8);
+}
+
+int nhrp_address_is_network(const struct nhrp_address *addr, int prefix)
+{
+ int i, bits = 8 * addr->addr_len;
+
+ for (i = prefix; i < bits; i++)
+ if (addr->addr[i / 8] & (0x80 >> (i % 8)))
+ return FALSE;
+ return TRUE;
+}
+
+const char *nhrp_address_format(const struct nhrp_address *addr,
+ size_t buflen, char *buffer)
+{
+ switch (addr->type) {
+ case PF_UNSPEC:
+ snprintf(buffer, buflen, "(unspecified)");
+ break;
+ case PF_INET:
+ snprintf(buffer, buflen, "%d.%d.%d.%d",
+ addr->addr[0], addr->addr[1],
+ addr->addr[2], addr->addr[3]);
+ break;
+ default:
+ snprintf(buffer, buflen, "(proto 0x%04x)",
+ addr->type);
+ break;
+ }
+
+ return buffer;
+}
+
+int nhrp_address_match_cie_list(struct nhrp_address *nbma_address,
+ struct nhrp_address *protocol_address,
+ struct list_head *cie_list)
+{
+ struct nhrp_cie *cie;
+
+ list_for_each_entry(cie, cie_list, cie_list_entry) {
+ if (nhrp_address_cmp(&cie->nbma_address, nbma_address) == 0 &&
+ nhrp_address_cmp(&cie->protocol_address, protocol_address) == 0)
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+int nhrp_address_init(void)
+{
+ struct ares_options ares_opts;
+ int i;
+
+ memset(&ares_opts, 0, sizeof(ares_opts));
+ ares_opts.sock_state_cb = &ares_socket_cb;
+ ares_opts.sock_state_cb_data = &resolver;
+ ares_opts.timeout = 2;
+ ares_opts.tries = 3;
+ if (ares_init_options(&resolver.channel, &ares_opts,
+ ARES_OPT_SOCK_STATE_CB | ARES_OPT_TIMEOUT |
+ ARES_OPT_TRIES) != ARES_SUCCESS)
+ return FALSE;
+
+ ev_timer_init(&resolver.timeout, ares_timeout_cb, 0.0, 0.0);
+ ev_prepare_init(&resolver.prepare, ares_prepare_cb);
+ ev_prepare_start(&resolver.prepare);
+ for (i = 0; i < ARRAY_SIZE(resolver.fds); i++)
+ ev_init(&resolver.fds[i], ares_io_cb);
+
+ return TRUE;
+}
+
+void nhrp_address_cleanup(void)
+{
+ int i;
+
+ ev_timer_stop(&resolver.timeout);
+ ev_prepare_stop(&resolver.prepare);
+ for (i = 0; i < ARRAY_SIZE(resolver.fds); i++)
+ ev_io_stop(&resolver.fds[i]);
+ ares_destroy(resolver.channel);
+}
diff --git a/nhrp/nhrp_address.h b/nhrp/nhrp_address.h
new file mode 100644
index 0000000..e479631
--- /dev/null
+++ b/nhrp/nhrp_address.h
@@ -0,0 +1,80 @@
+/* nhrp_address.h - NHRP address structures and helpers
+ *
+ * Copyright (C) 2007 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#ifndef NHRP_ADDRESS_H
+#define NHRP_ADDRESS_H
+
+#include <stdint.h>
+#include <sys/socket.h>
+#include "list.h"
+
+#define NHRP_MAX_ADDRESS_LEN 6
+
+struct nhrp_cie_list_head;
+struct nhrp_address;
+struct nhrp_address_query;
+
+typedef void (*nhrp_address_query_callback)(struct nhrp_address_query *query,
+ int num_addr,
+ struct nhrp_address *addrs);
+
+struct nhrp_address {
+ uint16_t type;
+ uint8_t addr_len;
+ uint8_t subaddr_len;
+ uint8_t addr[NHRP_MAX_ADDRESS_LEN];
+};
+
+struct nhrp_address_query {
+ nhrp_address_query_callback callback;
+};
+
+uint16_t nhrp_protocol_from_pf(uint16_t pf);
+uint16_t nhrp_pf_from_protocol(uint16_t protocol);
+uint16_t nhrp_afnum_from_pf(uint16_t pf);
+uint16_t nhrp_pf_from_afnum(uint16_t afnum);
+
+int nhrp_address_init(void);
+void nhrp_address_cleanup(void);
+int nhrp_address_parse_packet(uint16_t protocol, size_t len, uint8_t *packet,
+ struct nhrp_address *src,
+ struct nhrp_address *dst);
+int nhrp_address_parse(const char *string, struct nhrp_address *addr,
+ uint8_t *prefix_len);
+void nhrp_address_resolve(struct nhrp_address_query *query,
+ const char *hostname,
+ nhrp_address_query_callback callback);
+void nhrp_address_resolve_cancel(struct nhrp_address_query *query);
+void nhrp_address_set_type(struct nhrp_address *addr, uint16_t type);
+int nhrp_address_set(struct nhrp_address *addr, uint16_t type,
+ uint8_t len, uint8_t *bytes);
+int nhrp_address_set_full(struct nhrp_address *addr, uint16_t type,
+ uint8_t len, uint8_t *bytes,
+ uint8_t sublen, uint8_t *subbytes);
+int nhrp_address_cmp(const struct nhrp_address *a, const struct nhrp_address *b);
+int nhrp_address_prefix_cmp(const struct nhrp_address *a, const struct nhrp_address *b,
+ int prefix);
+unsigned int nhrp_address_hash(const struct nhrp_address *addr);
+void nhrp_address_set_network(struct nhrp_address *addr, int prefix);
+void nhrp_address_set_broadcast(struct nhrp_address *addr, int prefix);
+int nhrp_address_is_network(const struct nhrp_address *addr, int prefix);
+int nhrp_address_is_broadcast(const struct nhrp_address *addr, int prefix);
+int nhrp_address_is_multicast(const struct nhrp_address *addr);
+int nhrp_address_is_any_addr(const struct nhrp_address *addr);
+const char *nhrp_address_format(const struct nhrp_address *addr,
+ size_t buflen, char *buffer);
+
+int nhrp_address_match_cie_list(struct nhrp_address *nbma_address,
+ struct nhrp_address *protocol_address,
+ struct list_head *cie_list);
+
+#endif
diff --git a/nhrp/nhrp_common.h b/nhrp/nhrp_common.h
new file mode 100644
index 0000000..6730e74
--- /dev/null
+++ b/nhrp/nhrp_common.h
@@ -0,0 +1,78 @@
+/* nhrp_common.h - Generic helper functions
+ *
+ * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#ifndef NHRP_COMMON_H
+#define NHRP_COMMON_H
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <linux/if_ether.h>
+
+struct nhrp_interface;
+struct nhrp_address;
+
+extern const char *nhrp_config_file, *nhrp_script_file;
+extern int nhrp_running, nhrp_verbose;
+
+/* Logging */
+void nhrp_log(int level, const char *format, ...);
+
+#define NHRP_LOG_DEBUG 0
+#define NHRP_LOG_INFO 1
+#define NHRP_LOG_ERROR 2
+
+#define nhrp_debug(...) \
+ do { \
+ if (nhrp_verbose) \
+ nhrp_log(NHRP_LOG_DEBUG, __VA_ARGS__); \
+ } while(0)
+
+#define nhrp_info(...) \
+ nhrp_log(NHRP_LOG_INFO, __VA_ARGS__)
+
+#define nhrp_error(...) \
+ nhrp_log(NHRP_LOG_ERROR, __VA_ARGS__)
+
+void nhrp_perror(const char *message);
+void nhrp_hex_dump(const char *name, const uint8_t *buf, int bytes);
+
+#define NHRP_BUG_ON(cond) if (cond) { \
+ nhrp_error("BUG: failure at %s:%d/%s(): %s!", \
+ __FILE__, __LINE__, __func__, #cond); \
+ abort(); \
+}
+
+/* Initializers for system dependant stuff */
+int forward_init(void);
+void forward_cleanup(void);
+int forward_local_addresses_changed(void);
+
+int kernel_init(void);
+void kernel_stop_listening(void);
+void kernel_cleanup(void);
+int kernel_route(struct nhrp_interface *out_iface,
+ struct nhrp_address *dest,
+ struct nhrp_address *default_source,
+ struct nhrp_address *next_hop,
+ u_int16_t *mtu);
+int kernel_send(uint8_t *packet, size_t bytes, struct nhrp_interface *out,
+ struct nhrp_address *to);
+int kernel_inject_neighbor(struct nhrp_address *neighbor,
+ struct nhrp_address *hwaddr,
+ struct nhrp_interface *dev);
+
+int log_init(void);
+int admin_init(const char *socket);
+void server_init(void);
+
+#endif
diff --git a/nhrp/nhrp_defines.h b/nhrp/nhrp_defines.h
new file mode 100644
index 0000000..2812a13
--- /dev/null
+++ b/nhrp/nhrp_defines.h
@@ -0,0 +1,87 @@
+/* nhrp_defines.h - NHRP definitions
+ *
+ * Copyright (C) 2007 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#ifndef NHRP_DEFINES_H
+#define NHRP_DEFINES_H
+
+#include <stdint.h>
+#include <byteswap.h>
+#include <sys/param.h>
+#include <linux/version.h>
+
+#ifndef NULL
+#define NULL 0L
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef __bswap_constant_16
+#define __bswap_constant_16(x) \
+ ((((x) >> 8) & 0xff) | (((x) & 0xff) << 8))
+#endif
+#ifndef __bswap_constant_32
+#define __bswap_constant_32(x) \
+ ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \
+ (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24))
+#endif
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define constant_ntohl(x) (x)
+#define constant_ntohs(x) (x)
+#define constant_htonl(x) (x)
+#define constant_htons(x) (x)
+#else
+#define constant_ntohl(x) __bswap_constant_32(x)
+#define constant_ntohs(x) __bswap_constant_16(x)
+#define constant_htonl(x) __bswap_constant_32(x)
+#define constant_htons(x) __bswap_constant_16(x)
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0]))
+#endif
+
+#ifndef offsetof
+#ifdef __compiler_offsetof
+#define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER)
+#else
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+#endif
+
+#define BIT(x) (1 << (x))
+
+#ifndef container_of
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+#endif
+
+#if __GNUC__ >= 3
+#define NHRP_EMPTY_ARRAY
+#else
+#define NHRP_EMPTY_ARRAY 0
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+#define NHRP_NO_NBMA_GRE
+#endif
+
+#define NHRP_DEFAULT_HOLDING_TIME (2 * 60 * 60)
+
+#endif
diff --git a/nhrp/nhrp_interface.c b/nhrp/nhrp_interface.c
new file mode 100644
index 0000000..32c2383
--- /dev/null
+++ b/nhrp/nhrp_interface.c
@@ -0,0 +1,188 @@
+/* nhrp_interface.c - NHRP configuration per interface
+ *
+ * Copyright (C) 2007 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#include <stdio.h>
+#include <stddef.h>
+#include <string.h>
+#include <malloc.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <linux/rtnetlink.h>
+#include "nhrp_common.h"
+#include "nhrp_interface.h"
+#include "nhrp_address.h"
+
+#define NHRP_INDEX_HASH_SIZE (1 << 6)
+
+static struct list_head name_list = LIST_INITIALIZER(name_list);
+static struct hlist_head index_hash[NHRP_INDEX_HASH_SIZE];
+
+static char *env(const char *key, const char *value)
+{
+ char *buf;
+ buf = malloc(strlen(key)+strlen(value)+2);
+ if (buf == NULL)
+ return NULL;
+ sprintf(buf, "%s=%s", key, value);
+ return buf;
+}
+
+static char *envu32(const char *key, uint32_t value)
+{
+ char *buf;
+ buf = malloc(strlen(key)+16);
+ if (buf == NULL)
+ return NULL;
+ sprintf(buf, "%s=%u", key, value);
+ return buf;
+}
+
+void nhrp_interface_cleanup(void)
+{
+ struct nhrp_interface *iface, *n;
+
+ list_for_each_entry_safe(iface, n, &name_list, name_list_entry) {
+ list_del(&iface->name_list_entry);
+ hlist_del(&iface->index_list_entry);
+ free(iface);
+ }
+}
+
+void nhrp_interface_hash(struct nhrp_interface *iface)
+{
+ int iidx = iface->index & (NHRP_INDEX_HASH_SIZE - 1);
+
+ list_del(&iface->name_list_entry);
+ list_add(&iface->name_list_entry, &name_list);
+
+ hlist_del(&iface->index_list_entry);
+ hlist_add_head(&iface->index_list_entry, &index_hash[iidx]);
+}
+
+int nhrp_interface_foreach(nhrp_interface_enumerator enumerator, void *ctx)
+{
+ struct nhrp_interface *iface;
+ int rc;
+
+ list_for_each_entry(iface, &name_list, name_list_entry) {
+ rc = enumerator(ctx, iface);
+ if (rc != 0)
+ return rc;
+ }
+ return 0;
+}
+
+struct nhrp_interface *nhrp_interface_get_by_name(const char *name, int create)
+{
+ struct nhrp_interface *iface;
+
+ list_for_each_entry(iface, &name_list, name_list_entry) {
+ if (strcmp(iface->name, name) == 0)
+ return iface;
+ }
+
+ if (!create)
+ return NULL;
+
+ iface = calloc(1, sizeof(struct nhrp_interface));
+ iface->holding_time = NHRP_DEFAULT_HOLDING_TIME;
+ iface->route_table = RT_TABLE_MAIN;
+ strncpy(iface->name, name, sizeof(iface->name));
+
+ list_init(&iface->peer_list);
+ list_init(&iface->mcast_list);
+ list_add(&iface->name_list_entry, &name_list);
+ hlist_add_head(&iface->index_list_entry, &index_hash[0]);
+
+ return iface;
+}
+
+struct nhrp_interface *nhrp_interface_get_by_index(unsigned int index, int create)
+{
+ struct nhrp_interface *iface;
+ struct hlist_node *n;
+ int iidx = index & (NHRP_INDEX_HASH_SIZE - 1);
+
+ hlist_for_each_entry(iface, n, &index_hash[iidx], index_list_entry) {
+ if (iface->index == index)
+ return iface;
+ }
+
+ return NULL;
+}
+
+struct nhrp_interface *nhrp_interface_get_by_nbma(struct nhrp_address *addr)
+{
+ struct nhrp_interface *iface;
+
+ list_for_each_entry(iface, &name_list, name_list_entry) {
+ if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED))
+ continue;
+
+ if (nhrp_address_cmp(addr, &iface->nbma_address) == 0)
+ return iface;
+
+ if (iface->nbma_address.type == PF_UNSPEC && !iface->link_index)
+ return iface;
+ }
+
+ return NULL;
+}
+
+struct nhrp_interface *nhrp_interface_get_by_protocol(struct nhrp_address *addr)
+{
+ struct nhrp_interface *iface;
+
+ list_for_each_entry(iface, &name_list, name_list_entry) {
+ if (nhrp_address_cmp(addr, &iface->protocol_address) == 0)
+ return iface;
+ }
+
+ return NULL;
+}
+
+int nhrp_interface_run_script(struct nhrp_interface *iface, char *action)
+{
+ const char *argv[] = { nhrp_script_file, action, NULL };
+ char *envp[6];
+ pid_t pid;
+ int i = 0;
+
+ pid = fork();
+ if (pid == -1)
+ return FALSE;
+ if (pid > 0)
+ return TRUE;
+
+ envp[i++] = "NHRP_TYPE=INTERFACE";
+ envp[i++] = env("NHRP_INTERFACE", iface->name);
+ envp[i++] = envu32("NHRP_GRE_KEY", iface->gre_key);
+ envp[i++] = NULL;
+
+ execve(nhrp_script_file, (char **) argv, envp);
+ exit(1);
+}
+
+struct nhrp_peer *nhrp_interface_find_peer(struct nhrp_interface *iface,
+ const struct nhrp_address *nbma)
+{
+ unsigned int key = nhrp_address_hash(nbma) % NHRP_INTERFACE_NBMA_HASH_SIZE;
+ struct nhrp_peer *peer;
+ struct hlist_node *n;
+
+ hlist_for_each_entry(peer, n, &iface->nbma_hash[key], nbma_hash_entry) {
+ if (nhrp_address_cmp(nbma, &peer->next_hop_address) == 0)
+ return peer;
+ }
+ return NULL;
+}
diff --git a/nhrp/nhrp_interface.h b/nhrp/nhrp_interface.h
new file mode 100644
index 0000000..8e3e8df
--- /dev/null
+++ b/nhrp/nhrp_interface.h
@@ -0,0 +1,78 @@
+/* nhrp_interface.h - NHRP configuration per interface definitions
+ *
+ * Copyright (C) 2007 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#ifndef NHRP_INTERFACE_H
+#define NHRP_INTERFACE_H
+
+#include "nhrp_packet.h"
+#include "nhrp_peer.h"
+
+#define NHRP_INTERFACE_FLAG_NON_CACHING 0x0001 /* Do not cache entries */
+#define NHRP_INTERFACE_FLAG_SHORTCUT 0x0002 /* Create shortcut routes */
+#define NHRP_INTERFACE_FLAG_REDIRECT 0x0004 /* Send redirects */
+#define NHRP_INTERFACE_FLAG_SHORTCUT_DEST 0x0008 /* Advertise routes */
+#define NHRP_INTERFACE_FLAG_CONFIGURED 0x0010 /* Found in config file */
+
+#define NHRP_INTERFACE_NBMA_HASH_SIZE 256
+
+struct nhrp_interface {
+ struct list_head name_list_entry;
+ struct hlist_node index_list_entry;
+
+ /* Configured information */
+ char name[16];
+ unsigned int flags;
+ unsigned int holding_time;
+ struct nhrp_buffer *auth_token;
+ unsigned int route_table;
+
+ /* Cached from kernel interface */
+ unsigned int index, link_index;
+ uint32_t gre_key;
+ uint16_t afnum;
+ uint16_t mtu, nbma_mtu;
+ struct nhrp_address nbma_address;
+ struct nhrp_cie nat_cie;
+
+ /* Actually, we should have list of protocol addresses;
+ * we might have multiple address and multiple protocol types */
+ struct nhrp_address protocol_address;
+ int protocol_address_prefix;
+
+ /* Peer cache is interface specific */
+ struct list_head peer_list;
+ struct hlist_head nbma_hash[NHRP_INTERFACE_NBMA_HASH_SIZE];
+
+ /* Multicast related stuff */
+ struct list_head mcast_list;
+ int mcast_mask;
+ int mcast_numaddr;
+ struct nhrp_address *mcast_addr;
+};
+
+typedef int (*nhrp_interface_enumerator)(void *ctx, struct nhrp_interface *iface);
+
+void nhrp_interface_cleanup(void);
+void nhrp_interface_hash(struct nhrp_interface *iface);
+int nhrp_interface_foreach(nhrp_interface_enumerator enumerator, void *ctx);
+struct nhrp_interface *nhrp_interface_get_by_name(const char *name, int create);
+struct nhrp_interface *nhrp_interface_get_by_index(unsigned int index, int create);
+struct nhrp_interface *nhrp_interface_get_by_nbma(struct nhrp_address *addr);
+struct nhrp_interface *nhrp_interface_get_by_protocol(struct nhrp_address *addr);
+int nhrp_interface_run_script(struct nhrp_interface *iface, char *action);
+struct nhrp_peer *nhrp_interface_find_peer(struct nhrp_interface *iface, const struct nhrp_address *nbma);
+
+void nhrp_interface_resolve_nbma(struct nhrp_interface *iface,
+ struct nhrp_address *nbmadest,
+ struct nhrp_address *nbma);
+
+#endif
diff --git a/nhrp/nhrp_packet.c b/nhrp/nhrp_packet.c
new file mode 100644
index 0000000..f46b481
--- /dev/null
+++ b/nhrp/nhrp_packet.c
@@ -0,0 +1,1331 @@
+/* nhrp_packet.c - NHRP packet marshalling and tranceiving
+ *
+ * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#include <malloc.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <netinet/in.h>
+
+#include "libev.h"
+#include "nhrp_common.h"
+#include "nhrp_packet.h"
+#include "nhrp_peer.h"
+#include "nhrp_interface.h"
+
+#define PACKET_RETRIES 6
+#define PACKET_RETRY_INTERVAL 5.0
+
+#define RATE_LIMIT_HASH_SIZE 256
+#define RATE_LIMIT_MAX_TOKENS 4
+#define RATE_LIMIT_SEND_INTERVAL 5.0
+#define RATE_LIMIT_SILENCE 360.0
+#define RATE_LIMIT_PURGE_INTERVAL 600.0
+
+#define MAX_PDU_SIZE 1500
+
+struct nhrp_rate_limit {
+ struct hlist_node hash_entry;
+ struct nhrp_address src;
+ struct nhrp_address dst;
+ ev_tstamp rate_last;
+ int rate_tokens;
+};
+
+static uint32_t request_id = 0;
+static struct list_head pending_requests = LIST_INITIALIZER(pending_requests);
+static struct hlist_head rate_limit_hash[RATE_LIMIT_HASH_SIZE];
+static ev_timer rate_limit_timer;
+static int num_rate_limit_entries = 0;
+
+static void nhrp_packet_xmit_timeout_cb(struct ev_timer *w, int revents);
+static int unmarshall_packet_header(uint8_t **pdu, size_t *pdusize,
+ struct nhrp_packet *packet);
+
+static void nhrp_rate_limit_delete(struct nhrp_rate_limit *rl)
+{
+ hlist_del(&rl->hash_entry);
+ free(rl);
+ num_rate_limit_entries--;
+}
+
+int nhrp_rate_limit_clear(struct nhrp_address *a, int pref)
+{
+ struct nhrp_rate_limit *rl;
+ struct hlist_node *n, *c;
+ int i, ret = 0;
+
+ for (i = 0; i < RATE_LIMIT_HASH_SIZE; i++) {
+ hlist_for_each_entry_safe(rl, c, n, &rate_limit_hash[i],
+ hash_entry) {
+ if (a->type == AF_UNSPEC ||
+ nhrp_address_prefix_cmp(a, &rl->src, pref) == 0 ||
+ nhrp_address_prefix_cmp(a, &rl->dst, pref) == 0) {
+ nhrp_rate_limit_delete(rl);
+ ret++;
+ }
+ }
+ }
+
+ if (num_rate_limit_entries == 0)
+ ev_timer_stop(&rate_limit_timer);
+
+ return ret;
+}
+
+static void prune_rate_limit_entries_cb(struct ev_timer *w, int revents)
+{
+ struct nhrp_rate_limit *rl;
+ struct hlist_node *c, *n;
+ int i;
+
+ for (i = 0; i < RATE_LIMIT_HASH_SIZE; i++) {
+ hlist_for_each_entry_safe(rl, c, n, &rate_limit_hash[i],
+ hash_entry) {
+
+ if (ev_now() > rl->rate_last + 2 * RATE_LIMIT_SILENCE)
+ nhrp_rate_limit_delete(rl);
+ }
+ }
+
+ if (num_rate_limit_entries == 0)
+ ev_timer_stop(&rate_limit_timer);
+}
+
+static struct nhrp_rate_limit *get_rate_limit(struct nhrp_address *src,
+ struct nhrp_address *dst)
+{
+ unsigned int key;
+ struct nhrp_rate_limit *e;
+ struct hlist_node *n;
+
+ key = nhrp_address_hash(src) ^ nhrp_address_hash(dst);
+ key %= RATE_LIMIT_HASH_SIZE;
+
+ hlist_for_each_entry(e, n, &rate_limit_hash[key], hash_entry) {
+ if (nhrp_address_cmp(&e->src, src) == 0 &&
+ nhrp_address_cmp(&e->dst, dst) == 0)
+ return e;
+ }
+
+ e = calloc(1, sizeof(struct nhrp_rate_limit));
+ e->src = *src;
+ e->dst = *dst;
+ hlist_add_head(&e->hash_entry, &rate_limit_hash[key]);
+
+ if (num_rate_limit_entries == 0) {
+ ev_timer_init(&rate_limit_timer, prune_rate_limit_entries_cb,
+ RATE_LIMIT_PURGE_INTERVAL,
+ RATE_LIMIT_PURGE_INTERVAL);
+ ev_timer_start(&rate_limit_timer);
+ }
+
+ num_rate_limit_entries++;
+
+ return e;
+}
+
+static uint16_t nhrp_calculate_checksum(uint8_t *pdu, uint16_t len)
+{
+ uint16_t *pdu16 = (uint16_t *) pdu;
+ uint32_t csum = 0;
+ int i;
+
+ for (i = 0; i < len / 2; i++)
+ csum += pdu16[i];
+ if (len & 1)
+ csum += htons(pdu[len - 1]);
+
+ while (csum & 0xffff0000)
+ csum = (csum & 0xffff) + (csum >> 16);
+
+ return (~csum) & 0xffff;
+}
+
+struct nhrp_buffer *nhrp_buffer_alloc(uint32_t size)
+{
+ struct nhrp_buffer *buf;
+
+ buf = malloc(sizeof(struct nhrp_buffer) + size);
+ buf->length = size;
+
+ return buf;
+}
+
+struct nhrp_buffer *nhrp_buffer_copy(struct nhrp_buffer *buffer)
+{
+ struct nhrp_buffer *copy;
+
+ copy = nhrp_buffer_alloc(buffer->length);
+ memcpy(copy->data, buffer->data, buffer->length);
+ return copy;
+}
+
+int nhrp_buffer_cmp(struct nhrp_buffer *a, struct nhrp_buffer *b)
+{
+ if (a->length > b->length)
+ return 1;
+ if (a->length < b->length)
+ return -1;
+ return memcmp(a->data, b->data, a->length);
+}
+
+void nhrp_buffer_free(struct nhrp_buffer *buffer)
+{
+ free(buffer);
+}
+
+struct nhrp_cie *nhrp_cie_alloc(void)
+{
+ return calloc(1, sizeof(struct nhrp_cie));
+}
+
+void nhrp_cie_free(struct nhrp_cie *cie)
+{
+ free(cie);
+}
+
+void nhrp_cie_reset(struct nhrp_cie *cie)
+{
+ memset(&cie->cie_list_entry, 0, sizeof(cie->cie_list_entry));
+}
+
+void nhrp_payload_free(struct nhrp_payload *payload)
+{
+ struct nhrp_cie *cie, *n;
+
+ switch (payload->payload_type) {
+ case NHRP_PAYLOAD_TYPE_RAW:
+ nhrp_buffer_free(payload->u.raw);
+ break;
+ case NHRP_PAYLOAD_TYPE_CIE_LIST:
+ list_for_each_entry_safe(cie, n, &payload->u.cie_list, cie_list_entry) {
+ list_del(&cie->cie_list_entry);
+ nhrp_cie_free(cie);
+ }
+ break;
+ }
+ payload->payload_type = NHRP_PAYLOAD_TYPE_NONE;
+}
+
+void nhrp_payload_set_type(struct nhrp_payload *payload, int type)
+{
+ if (payload->payload_type == type)
+ return;
+
+ nhrp_payload_free(payload);
+ payload->payload_type = type;
+ switch (type) {
+ case NHRP_PAYLOAD_TYPE_CIE_LIST:
+ list_init(&payload->u.cie_list);
+ break;
+ default:
+ payload->u.raw = NULL;
+ break;
+ }
+}
+
+void nhrp_payload_set_raw(struct nhrp_payload *payload, struct nhrp_buffer *raw)
+{
+ nhrp_payload_set_type(payload, NHRP_PAYLOAD_TYPE_RAW);
+ payload->u.raw = raw;
+}
+
+void nhrp_payload_add_cie(struct nhrp_payload *payload, struct nhrp_cie *cie)
+{
+ if (payload->payload_type != NHRP_PAYLOAD_TYPE_CIE_LIST) {
+ nhrp_cie_free(cie);
+ nhrp_info("Trying to add CIE payload to non-CIE payload %d\n",
+ payload->payload_type);
+ return;
+ }
+
+ list_add_tail(&cie->cie_list_entry, &payload->u.cie_list);
+}
+
+struct nhrp_cie *nhrp_payload_get_cie(struct nhrp_payload *payload, int index)
+{
+ struct nhrp_cie *cie;
+
+ if (payload->payload_type != NHRP_PAYLOAD_TYPE_CIE_LIST)
+ return NULL;
+
+ list_for_each_entry(cie, &payload->u.cie_list, cie_list_entry) {
+ index--;
+ if (index == 0)
+ return cie;
+ }
+
+ return NULL;
+}
+
+struct nhrp_packet *nhrp_packet_alloc(void)
+{
+ struct nhrp_packet *packet;
+ packet = calloc(1, sizeof(struct nhrp_packet));
+ packet->ref = 1;
+ packet->hdr.hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT;
+ list_init(&packet->request_list_entry);
+ ev_timer_init(&packet->timeout, nhrp_packet_xmit_timeout_cb,
+ PACKET_RETRY_INTERVAL, PACKET_RETRY_INTERVAL);
+ return packet;
+}
+
+struct nhrp_packet *nhrp_packet_get(struct nhrp_packet *packet)
+{
+ packet->ref++;
+ return packet;
+}
+
+struct nhrp_payload *nhrp_packet_payload(struct nhrp_packet *packet, int payload_type)
+{
+ return nhrp_packet_extension(packet, NHRP_EXTENSION_PAYLOAD, payload_type);
+}
+
+struct nhrp_payload *nhrp_packet_extension(struct nhrp_packet *packet,
+ uint32_t extension, int payload_type)
+{
+ struct nhrp_payload *p;
+
+ p = packet->extension_by_type[extension & 0x7fff];
+ if (p != NULL) {
+ if (payload_type == NHRP_PAYLOAD_TYPE_ANY ||
+ payload_type == p->payload_type)
+ return p;
+ if (extension & NHRP_EXTENSION_FLAG_NOCREATE)
+ return NULL;
+ nhrp_payload_set_type(p, payload_type);
+ return p;
+ }
+
+ if (extension & NHRP_EXTENSION_FLAG_NOCREATE)
+ return NULL;
+
+ p = &packet->extension_by_order[packet->num_extensions++];
+ p->extension_type = extension & 0xffff;
+ packet->extension_by_type[extension & 0x7fff] = p;
+ if (payload_type != NHRP_PAYLOAD_TYPE_ANY)
+ nhrp_payload_set_type(p, payload_type);
+
+ return p;
+}
+
+static void nhrp_packet_release(struct nhrp_packet *packet)
+{
+ int i;
+
+ if (packet->dst_peer != NULL)
+ nhrp_peer_put(packet->dst_peer);
+ for (i = 0; i < packet->num_extensions; i++)
+ nhrp_payload_free(&packet->extension_by_order[i]);
+ free(packet);
+}
+
+void nhrp_packet_put(struct nhrp_packet *packet)
+{
+ NHRP_BUG_ON(packet->ref == 0);
+
+ packet->ref--;
+ if (packet->ref == 0)
+ nhrp_packet_release(packet);
+}
+
+int nhrp_packet_reroute(struct nhrp_packet *packet, struct nhrp_peer *dst_peer)
+{
+ packet->dst_iface = packet->src_iface;
+ if (packet->dst_peer != NULL)
+ nhrp_peer_put(packet->dst_peer);
+ packet->dst_peer = nhrp_peer_get(dst_peer);
+ return nhrp_packet_route(packet);
+}
+
+static void nhrp_packet_dequeue(struct nhrp_packet *packet)
+{
+ ev_timer_stop(&packet->timeout);
+ if (list_hashed(&packet->request_list_entry))
+ list_del(&packet->request_list_entry);
+ nhrp_packet_put(packet);
+}
+
+static int nhrp_do_handle_error_indication(struct nhrp_packet *error_pkt,
+ struct nhrp_packet *orig_pkt)
+{
+ struct nhrp_packet *req;
+
+ list_for_each_entry(req, &pending_requests, request_list_entry) {
+ if (orig_pkt->hdr.u.request_id != req->hdr.u.request_id)
+ continue;
+
+ if (nhrp_address_cmp(&orig_pkt->src_nbma_address,
+ &req->src_nbma_address))
+ continue;
+ if (nhrp_address_cmp(&orig_pkt->src_protocol_address,
+ &req->src_protocol_address))
+ continue;
+
+ if (req->handler != NULL)
+ req->handler(req->handler_ctx, error_pkt);
+ nhrp_packet_dequeue(req);
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static int nhrp_handle_error_indication(struct nhrp_packet *error_packet)
+{
+ struct nhrp_packet *packet;
+ struct nhrp_payload *payload;
+ uint8_t *pdu;
+ size_t pduleft;
+ int r;
+
+ packet = nhrp_packet_alloc();
+ if (packet == NULL)
+ return FALSE;
+
+ payload = nhrp_packet_payload(error_packet, NHRP_PAYLOAD_TYPE_RAW);
+ pdu = payload->u.raw->data;
+ pduleft = payload->u.raw->length;
+
+ if (!unmarshall_packet_header(&pdu, &pduleft, packet)) {
+ nhrp_packet_put(packet);
+ return FALSE;
+ }
+
+ r = nhrp_do_handle_error_indication(error_packet, packet);
+ nhrp_packet_put(packet);
+
+ return r;
+}
+
+#define NHRP_TYPE_REQUEST 0
+#define NHRP_TYPE_REPLY 1
+#define NHRP_TYPE_INDICATION 2
+
+static struct {
+ int type;
+ uint16_t payload_type;
+ int (*handler)(struct nhrp_packet *packet);
+} packet_types[] = {
+ [NHRP_PACKET_RESOLUTION_REQUEST] = {
+ .type = NHRP_TYPE_REQUEST,
+ .payload_type = NHRP_PAYLOAD_TYPE_CIE_LIST,
+ },
+ [NHRP_PACKET_RESOLUTION_REPLY] = {
+ .type = NHRP_TYPE_REPLY,
+ .payload_type = NHRP_PAYLOAD_TYPE_CIE_LIST,
+ },
+ [NHRP_PACKET_REGISTRATION_REQUEST] = {
+ .type = NHRP_TYPE_REQUEST,
+ .payload_type = NHRP_PAYLOAD_TYPE_CIE_LIST,
+ },
+ [NHRP_PACKET_REGISTRATION_REPLY] = {
+ .type = NHRP_TYPE_REPLY,
+ .payload_type = NHRP_PAYLOAD_TYPE_CIE_LIST,
+ },
+ [NHRP_PACKET_PURGE_REQUEST] = {
+ .type = NHRP_TYPE_REQUEST,
+ .payload_type = NHRP_PAYLOAD_TYPE_CIE_LIST,
+ },
+ [NHRP_PACKET_PURGE_REPLY] = {
+ .type = NHRP_TYPE_REPLY,
+ .payload_type = NHRP_PAYLOAD_TYPE_CIE_LIST,
+ },
+ [NHRP_PACKET_ERROR_INDICATION] = {
+ .type = NHRP_TYPE_INDICATION,
+ .payload_type = NHRP_PAYLOAD_TYPE_RAW,
+ .handler = nhrp_handle_error_indication,
+ },
+ [NHRP_PACKET_TRAFFIC_INDICATION] = {
+ .type = NHRP_TYPE_INDICATION,
+ .payload_type = NHRP_PAYLOAD_TYPE_RAW,
+ }
+};
+static int extension_types[] = {
+ [NHRP_EXTENSION_RESPONDER_ADDRESS] = NHRP_PAYLOAD_TYPE_CIE_LIST,
+ [NHRP_EXTENSION_FORWARD_TRANSIT_NHS] = NHRP_PAYLOAD_TYPE_CIE_LIST,
+ [NHRP_EXTENSION_REVERSE_TRANSIT_NHS] = NHRP_PAYLOAD_TYPE_CIE_LIST,
+ [NHRP_EXTENSION_NAT_ADDRESS] = NHRP_PAYLOAD_TYPE_CIE_LIST
+};
+
+static int unmarshall_binary(uint8_t **pdu, size_t *pduleft, size_t size, void *raw)
+{
+ if (*pduleft < size)
+ return FALSE;
+
+ memcpy(raw, *pdu, size);
+ *pdu += size;
+ *pduleft -= size;
+ return TRUE;
+}
+
+static inline int unmarshall_protocol_address(uint8_t **pdu, size_t *pduleft, struct nhrp_address *pa)
+{
+ if (*pduleft < pa->addr_len)
+ return FALSE;
+
+ if (pa->addr_len) {
+ if (!nhrp_address_set(pa, pa->type, pa->addr_len, *pdu))
+ return FALSE;
+ } else {
+ nhrp_address_set_type(pa, PF_UNSPEC);
+ }
+
+ *pdu += pa->addr_len;
+ *pduleft -= pa->addr_len;
+ return TRUE;
+}
+
+static inline int unmarshall_nbma_address(uint8_t **pdu, size_t *pduleft, struct nhrp_address *na)
+{
+ if (*pduleft < na->addr_len + na->subaddr_len)
+ return FALSE;
+
+ if (na->addr_len || na->subaddr_len) {
+ if (!nhrp_address_set_full(na, na->type,
+ na->addr_len, *pdu,
+ na->subaddr_len, *pdu + na->addr_len))
+ return FALSE;
+ } else {
+ nhrp_address_set_type(na, PF_UNSPEC);
+ }
+
+ *pdu += na->addr_len + na->subaddr_len;
+ *pduleft -= na->addr_len + na->subaddr_len;
+ return TRUE;
+}
+
+static int unmarshall_cie(uint8_t **pdu, size_t *pduleft, struct nhrp_packet *p, struct nhrp_cie *cie)
+{
+ if (!unmarshall_binary(pdu, pduleft, sizeof(struct nhrp_cie_header), &cie->hdr))
+ return FALSE;
+
+ cie->nbma_address.type = nhrp_pf_from_afnum(p->hdr.afnum);
+ cie->nbma_address.addr_len = cie->hdr.nbma_address_len;
+ cie->nbma_address.subaddr_len = cie->hdr.nbma_subaddress_len;
+ cie->protocol_address.type = nhrp_pf_from_protocol(p->hdr.protocol_type);
+ cie->protocol_address.addr_len = cie->hdr.protocol_address_len;
+
+ if (!unmarshall_nbma_address(pdu, pduleft, &cie->nbma_address))
+ return FALSE;
+ return unmarshall_protocol_address(pdu, pduleft, &cie->protocol_address);
+}
+
+static int unmarshall_payload(uint8_t **pdu, size_t *pduleft,
+ struct nhrp_packet *packet,
+ int type, size_t size,
+ struct nhrp_payload *p)
+{
+ struct nhrp_cie *cie;
+ size_t cieleft;
+
+ if (*pduleft < size)
+ return FALSE;
+
+ nhrp_payload_set_type(p, type);
+ switch (p->payload_type) {
+ case NHRP_PAYLOAD_TYPE_NONE:
+ *pdu += size;
+ *pduleft -= size;
+ return TRUE;
+ case NHRP_PAYLOAD_TYPE_RAW:
+ p->u.raw = nhrp_buffer_alloc(size);
+ return unmarshall_binary(pdu, pduleft, size, p->u.raw->data);
+ case NHRP_PAYLOAD_TYPE_CIE_LIST:
+ cieleft = size;
+ while (cieleft) {
+ cie = nhrp_cie_alloc();
+ list_add_tail(&cie->cie_list_entry, &p->u.cie_list);
+ if (!unmarshall_cie(pdu, &cieleft, packet, cie))
+ return FALSE;
+ }
+ *pduleft -= size;
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static int unmarshall_packet_header(uint8_t **pdu, size_t *pduleft, struct nhrp_packet *packet)
+{
+ struct nhrp_packet_header *phdr = (struct nhrp_packet_header *) *pdu;
+
+ if (!unmarshall_binary(pdu, pduleft, sizeof(packet->hdr), &packet->hdr))
+ return FALSE;
+
+ if (packet->hdr.type >= ARRAY_SIZE(packet_types))
+ return FALSE;
+
+ packet->src_nbma_address.type = nhrp_pf_from_afnum(packet->hdr.afnum);
+ packet->src_nbma_address.addr_len = phdr->src_nbma_address_len;
+ packet->src_nbma_address.subaddr_len = phdr->src_nbma_subaddress_len;
+ packet->src_protocol_address.type = nhrp_pf_from_protocol(packet->hdr.protocol_type);
+ packet->src_protocol_address.addr_len = phdr->src_protocol_address_len;
+ packet->dst_protocol_address.type = nhrp_pf_from_protocol(packet->hdr.protocol_type);
+ packet->dst_protocol_address.addr_len = phdr->dst_protocol_address_len;
+
+ if (!unmarshall_nbma_address(pdu, pduleft, &packet->src_nbma_address))
+ return FALSE;
+ if (!unmarshall_protocol_address(pdu, pduleft, &packet->src_protocol_address))
+ return FALSE;
+ return unmarshall_protocol_address(pdu, pduleft, &packet->dst_protocol_address);
+}
+
+static int unmarshall_packet(uint8_t *pdu, size_t pdusize, struct nhrp_packet *packet)
+{
+ size_t pduleft = pdusize;
+ uint8_t *pos = pdu;
+ int size, extension_offset;
+
+ if (!unmarshall_packet_header(&pos, &pduleft, packet))
+ return FALSE;
+
+ extension_offset = ntohs(packet->hdr.extension_offset);
+ if (extension_offset == 0) {
+ /* No extensions; rest of data is payload */
+ size = pduleft;
+ } else {
+ /* Extensions present; exclude those from payload */
+ size = extension_offset - (pos - pdu);
+ if (size < 0 || size > pduleft) {
+ nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ERROR, pos - pdu);
+ return FALSE;
+ }
+ }
+
+ if (!unmarshall_payload(&pos, &pduleft, packet,
+ packet_types[packet->hdr.type].payload_type,
+ size, nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_ANY))) {
+ nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ERROR, pos - pdu);
+ return FALSE;
+ }
+
+ if (extension_offset == 0)
+ return TRUE;
+
+ pos = &pdu[extension_offset];
+ pduleft = pdusize - extension_offset;
+ do {
+ struct nhrp_extension_header eh;
+ int extension_type, payload_type;
+
+ if (!unmarshall_binary(&pos, &pduleft, sizeof(eh), &eh)) {
+ nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ERROR, pos - pdu);
+ return FALSE;
+ }
+
+ extension_type = ntohs(eh.type) & ~NHRP_EXTENSION_FLAG_COMPULSORY;
+ if (extension_type == NHRP_EXTENSION_END)
+ break;
+
+ payload_type = NHRP_PAYLOAD_TYPE_NONE;
+ if (extension_type < ARRAY_SIZE(extension_types))
+ payload_type = extension_types[extension_type];
+ if (payload_type == NHRP_PAYLOAD_TYPE_NONE)
+ payload_type = NHRP_PAYLOAD_TYPE_RAW;
+ if (payload_type == NHRP_PAYLOAD_TYPE_RAW &&
+ ntohs(eh.length) == 0)
+ payload_type = NHRP_PAYLOAD_TYPE_NONE;
+
+ if (!unmarshall_payload(&pos, &pduleft, packet,
+ payload_type, ntohs(eh.length),
+ nhrp_packet_extension(packet, ntohs(eh.type), NHRP_PAYLOAD_TYPE_ANY))) {
+ nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ERROR, pos - pdu);
+ return FALSE;
+ }
+ } while (1);
+
+ return TRUE;
+}
+
+static int nhrp_packet_forward(struct nhrp_packet *packet)
+{
+ char tmp[64], tmp2[64], tmp3[64];
+ struct nhrp_payload *p = NULL;
+
+ nhrp_info("Forwarding packet from nbma src %s, proto src %s to proto dst %s, hop count %d",
+ nhrp_address_format(&packet->src_nbma_address,
+ sizeof(tmp), tmp),
+ nhrp_address_format(&packet->src_protocol_address,
+ sizeof(tmp2), tmp2),
+ nhrp_address_format(&packet->dst_protocol_address,
+ sizeof(tmp3), tmp3),
+ packet->hdr.hop_count);
+
+ if (packet->hdr.hop_count == 0) {
+ nhrp_packet_send_error(packet, NHRP_ERROR_HOP_COUNT_EXCEEDED, 0);
+ return TRUE;
+ }
+ packet->hdr.hop_count--;
+
+ if (!nhrp_packet_reroute(packet, NULL)) {
+ nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ADDRESS_UNREACHABLE, 0);
+ return FALSE;
+ }
+
+ switch (packet_types[packet->hdr.type].type) {
+ case NHRP_TYPE_REQUEST:
+ case NHRP_TYPE_INDICATION:
+ p = nhrp_packet_extension(packet,
+ NHRP_EXTENSION_FORWARD_TRANSIT_NHS |
+ NHRP_EXTENSION_FLAG_NOCREATE,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+ break;
+ case NHRP_TYPE_REPLY:
+ p = nhrp_packet_extension(packet,
+ NHRP_EXTENSION_REVERSE_TRANSIT_NHS |
+ NHRP_EXTENSION_FLAG_NOCREATE,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+ break;
+ }
+ if (p != NULL) {
+ struct nhrp_cie *cie;
+
+ if (nhrp_address_match_cie_list(&packet->dst_peer->my_nbma_address,
+ &packet->dst_iface->protocol_address,
+ &p->u.cie_list)) {
+ nhrp_packet_send_error(packet, NHRP_ERROR_LOOP_DETECTED, 0);
+ return FALSE;
+ }
+
+ cie = nhrp_cie_alloc();
+ if (cie != NULL) {
+ cie->hdr = (struct nhrp_cie_header) {
+ .code = NHRP_CODE_SUCCESS,
+ .holding_time = htons(packet->dst_iface->holding_time),
+ };
+ cie->nbma_address = packet->dst_peer->my_nbma_address;
+ cie->protocol_address = packet->dst_iface->protocol_address;
+ nhrp_payload_add_cie(p, cie);
+ }
+ }
+
+ return nhrp_packet_route_and_send(packet);
+}
+
+static int nhrp_packet_receive_local(struct nhrp_packet *packet)
+{
+ struct nhrp_packet *req;
+ char tmp[64], tmp2[64], tmp3[64];
+
+ if (packet_types[packet->hdr.type].type == NHRP_TYPE_REPLY) {
+ list_for_each_entry(req, &pending_requests, request_list_entry) {
+ if (packet->hdr.u.request_id != req->hdr.u.request_id)
+ continue;
+ if (nhrp_address_cmp(&packet->src_nbma_address,
+ &req->src_nbma_address))
+ continue;
+ if (nhrp_address_cmp(&packet->src_protocol_address,
+ &req->src_protocol_address))
+ continue;
+
+ if (req->handler != NULL)
+ req->handler(req->handler_ctx, packet);
+ nhrp_packet_dequeue(req);
+
+ return TRUE;
+ }
+
+ /* Reply to unsent request? */
+ nhrp_info("Packet type %d from nbma src %s, proto src %s, "
+ "proto dst %s dropped: no matching request",
+ packet->hdr.type,
+ nhrp_address_format(&packet->src_nbma_address,
+ sizeof(tmp), tmp),
+ nhrp_address_format(&packet->src_protocol_address,
+ sizeof(tmp2), tmp2),
+ nhrp_address_format(&packet->dst_protocol_address,
+ sizeof(tmp3), tmp3));
+
+ nhrp_packet_send_error(
+ packet, NHRP_ERROR_INVALID_RESOLUTION_REPLY, 0);
+ return TRUE;
+ }
+
+ if (packet_types[packet->hdr.type].handler == NULL) {
+ nhrp_info("Packet type %d from nbma src %s, proto src %s, "
+ "proto dst %s not supported",
+ packet->hdr.type,
+ nhrp_address_format(&packet->src_nbma_address,
+ sizeof(tmp), tmp),
+ nhrp_address_format(&packet->src_protocol_address,
+ sizeof(tmp2), tmp2),
+ nhrp_address_format(&packet->dst_protocol_address,
+ sizeof(tmp3), tmp3));
+ return FALSE;
+ }
+
+ if (packet->dst_peer->next_hop_address.type != PF_UNSPEC) {
+ /* Broadcast destinations gets rewritten as if destinied to
+ * our local address */
+ packet->dst_protocol_address =
+ packet->dst_peer->next_hop_address;
+ }
+
+ return packet_types[packet->hdr.type].handler(packet);
+}
+
+int nhrp_packet_receive(uint8_t *pdu, size_t pdulen,
+ struct nhrp_interface *iface,
+ struct nhrp_address *from)
+{
+ char tmp[64];
+ struct nhrp_packet *packet;
+ struct nhrp_address *dest;
+ struct nhrp_peer *peer;
+ int ret = FALSE;
+
+ if (nhrp_calculate_checksum(pdu, pdulen) != 0) {
+ nhrp_error("Bad checksum in packet from %s",
+ nhrp_address_format(from, sizeof(tmp), tmp));
+ return FALSE;
+ }
+
+ packet = nhrp_packet_alloc();
+ if (packet == NULL)
+ return FALSE;
+
+ if (!unmarshall_packet(pdu, pdulen, packet)) {
+ nhrp_error("Failed to unmarshall packet from %s",
+ nhrp_address_format(from, sizeof(tmp), tmp));
+ goto error;
+ }
+
+ packet->req_pdu = pdu;
+ packet->req_pdulen = pdulen;
+
+ if (packet_types[packet->hdr.type].type == NHRP_TYPE_REPLY)
+ dest = &packet->src_protocol_address;
+ else
+ dest = &packet->dst_protocol_address;
+
+ peer = nhrp_peer_route(iface, dest, 0, BIT(NHRP_PEER_TYPE_LOCAL_ADDR));
+ packet->src_linklayer_address = *from;
+ packet->src_iface = iface;
+ packet->dst_peer = nhrp_peer_get(peer);
+
+ /* RFC2332 5.3.4 - Authentication is always done pairwise on an NHRP
+ * hop-by-hop basis; i.e. regenerated at each hop. */
+ if (packet->src_iface->auth_token &&
+ (packet->hdr.type != NHRP_PACKET_ERROR_INDICATION ||
+ packet->hdr.u.error.code != NHRP_ERROR_AUTHENTICATION_FAILURE)) {
+ struct nhrp_payload *p;
+ p = nhrp_packet_extension(packet,
+ NHRP_EXTENSION_AUTHENTICATION |
+ NHRP_EXTENSION_FLAG_NOCREATE,
+ NHRP_PAYLOAD_TYPE_RAW);
+ if (p == NULL ||
+ nhrp_buffer_cmp(packet->src_iface->auth_token, p->u.raw) != 0) {
+ nhrp_error("Dropping packet from %s with bad authentication",
+ nhrp_address_format(from, sizeof(tmp), tmp));
+ nhrp_packet_send_error(packet, NHRP_ERROR_AUTHENTICATION_FAILURE, 0);
+ goto error;
+ }
+ }
+
+ if (peer != NULL &&
+ peer->type == NHRP_PEER_TYPE_LOCAL_ADDR)
+ ret = nhrp_packet_receive_local(packet);
+ else
+ ret = nhrp_packet_forward(packet);
+
+ packet->req_pdu = NULL;
+ packet->req_pdulen = 0;
+
+error:
+ nhrp_packet_put(packet);
+ return ret;
+}
+
+static int marshall_binary(uint8_t **pdu, size_t *pduleft, size_t size, void *raw)
+{
+ if (*pduleft < size)
+ return FALSE;
+
+ memcpy(*pdu, raw, size);
+ *pdu += size;
+ *pduleft -= size;
+
+ return TRUE;
+}
+
+static inline int marshall_protocol_address(uint8_t **pdu, size_t *pduleft, struct nhrp_address *pa)
+{
+ if (pa->subaddr_len != 0)
+ return FALSE;
+ return marshall_binary(pdu, pduleft, pa->addr_len, pa->addr);
+}
+
+static inline int marshall_nbma_address(uint8_t **pdu, size_t *pduleft, struct nhrp_address *na)
+{
+ return marshall_binary(pdu, pduleft, na->addr_len + na->subaddr_len, na->addr);
+}
+
+static int marshall_cie(uint8_t **pdu, size_t *pduleft, struct nhrp_cie *cie)
+{
+ cie->hdr.nbma_address_len = cie->nbma_address.addr_len;
+ cie->hdr.nbma_subaddress_len = cie->nbma_address.subaddr_len;
+ cie->hdr.protocol_address_len = cie->protocol_address.addr_len;
+
+ if (!marshall_binary(pdu, pduleft, sizeof(struct nhrp_cie_header), &cie->hdr))
+ return FALSE;
+ if (!marshall_nbma_address(pdu, pduleft, &cie->nbma_address))
+ return FALSE;
+ return marshall_protocol_address(pdu, pduleft, &cie->protocol_address);
+}
+
+static int marshall_payload(uint8_t **pdu, size_t *pduleft, struct nhrp_payload *p)
+{
+ struct nhrp_cie *cie;
+
+ switch (p->payload_type) {
+ case NHRP_PAYLOAD_TYPE_NONE:
+ return TRUE;
+ case NHRP_PAYLOAD_TYPE_RAW:
+ if (p->u.raw->length == 0)
+ return TRUE;
+ return marshall_binary(pdu, pduleft, p->u.raw->length, p->u.raw->data);
+ case NHRP_PAYLOAD_TYPE_CIE_LIST:
+ list_for_each_entry(cie, &p->u.cie_list, cie_list_entry) {
+ if (!marshall_cie(pdu, pduleft, cie))
+ return FALSE;
+ }
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static int marshall_packet_header(uint8_t **pdu, size_t *pduleft, struct nhrp_packet *packet)
+{
+ if (!marshall_binary(pdu, pduleft, sizeof(packet->hdr), &packet->hdr))
+ return FALSE;
+ if (!marshall_nbma_address(pdu, pduleft, &packet->src_nbma_address))
+ return FALSE;
+ if (!marshall_protocol_address(pdu, pduleft, &packet->src_protocol_address))
+ return FALSE;
+ return marshall_protocol_address(pdu, pduleft, &packet->dst_protocol_address);
+}
+
+static int marshall_packet(uint8_t *pdu, size_t pduleft, struct nhrp_packet *packet)
+{
+ uint8_t *pos = pdu;
+ struct nhrp_packet_header *phdr = (struct nhrp_packet_header *) pdu;
+ struct nhrp_extension_header neh;
+ int i, size;
+
+ if (!marshall_packet_header(&pos, &pduleft, packet))
+ return -1;
+ if (!marshall_payload(&pos, &pduleft, nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_ANY)))
+ return -2;
+
+ phdr->extension_offset = htons((int)(pos - pdu));
+ for (i = 1; i < packet->num_extensions; i++) {
+ struct nhrp_extension_header *eh = (struct nhrp_extension_header *) pos;
+
+ if (packet->extension_by_order[i].payload_type == NHRP_PAYLOAD_TYPE_NONE)
+ continue;
+
+ neh.type = htons(packet->extension_by_order[i].extension_type);
+ neh.length = 0;
+
+ if (!marshall_binary(&pos, &pduleft, sizeof(neh), &neh))
+ return -3;
+ if (!marshall_payload(&pos, &pduleft, &packet->extension_by_order[i]))
+ return -4;
+ eh->length = htons((pos - (uint8_t *) eh) - sizeof(neh));
+ }
+ neh.type = htons(NHRP_EXTENSION_END | NHRP_EXTENSION_FLAG_COMPULSORY);
+ neh.length = 0;
+ if (!marshall_binary(&pos, &pduleft, sizeof(neh), &neh))
+ return -5;
+
+ /* Cisco is seriously brain damaged. It needs some extra garbage
+ * at the end of error indication or it'll barf out spurious errors. */
+ if (packet->hdr.type == NHRP_PACKET_ERROR_INDICATION &&
+ pduleft >= 0x10) {
+ memset(pos, 0, 0x10);
+ pos += 0x10;
+ pduleft -= 0x10;
+ }
+
+ size = (int)(pos - pdu);
+ phdr->packet_size = htons(size);
+ phdr->checksum = 0;
+ phdr->src_nbma_address_len = packet->src_nbma_address.addr_len;
+ phdr->src_nbma_subaddress_len = packet->src_nbma_address.subaddr_len;
+ phdr->src_protocol_address_len = packet->src_protocol_address.addr_len;
+ phdr->dst_protocol_address_len = packet->dst_protocol_address.addr_len;
+ phdr->checksum = nhrp_calculate_checksum(pdu, size);
+
+ return size;
+}
+
+int nhrp_packet_route(struct nhrp_packet *packet)
+{
+ struct nhrp_address proto_nexthop, *src, *dst;
+ struct list_head *cielist = NULL;
+ struct nhrp_payload *payload;
+ struct nhrp_peer *peer;
+ char tmp[64];
+ int r;
+
+ if (packet->dst_iface == NULL) {
+ nhrp_error("nhrp_packet_route called without destination interface");
+ return FALSE;
+ }
+
+ if (packet_types[packet->hdr.type].type == NHRP_TYPE_REPLY) {
+ dst = &packet->src_protocol_address;
+ src = &packet->dst_protocol_address;
+ r = NHRP_EXTENSION_REVERSE_TRANSIT_NHS;
+ } else {
+ dst = &packet->dst_protocol_address;
+ src = &packet->src_protocol_address;
+ r = NHRP_EXTENSION_FORWARD_TRANSIT_NHS;
+ }
+ payload = nhrp_packet_extension(packet,
+ r | NHRP_EXTENSION_FLAG_NOCREATE,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+ if (payload != NULL)
+ cielist = &payload->u.cie_list;
+
+ if (packet->dst_peer != NULL) {
+ proto_nexthop = packet->dst_peer->next_hop_address;
+ } else {
+ proto_nexthop = *dst;
+ do {
+ peer = nhrp_peer_route_full(
+ packet->dst_iface, &proto_nexthop, 0,
+ NHRP_PEER_TYPEMASK_ROUTE_VIA_NHS, src, cielist);
+ if (peer == NULL || peer->type == NHRP_PEER_TYPE_NEGATIVE) {
+ nhrp_error("No peer entry for protocol address %s",
+ nhrp_address_format(&proto_nexthop,
+ sizeof(tmp), tmp));
+ return FALSE;
+ }
+ if (peer->type != NHRP_PEER_TYPE_LOCAL_ROUTE)
+ break;
+ if (peer->next_hop_address.type == AF_UNSPEC)
+ break;
+ proto_nexthop = peer->next_hop_address;
+ } while (1);
+
+ packet->dst_peer = nhrp_peer_get(peer);
+ }
+
+ return TRUE;
+}
+
+int nhrp_packet_marshall_and_send(struct nhrp_packet *packet)
+{
+ uint8_t pdu[MAX_PDU_SIZE];
+ char tmp[4][64];
+ int size;
+
+ nhrp_debug("Sending packet %d, from: %s (nbma %s), to: %s (nbma %s)",
+ packet->hdr.type,
+ nhrp_address_format(&packet->src_protocol_address,
+ sizeof(tmp[0]), tmp[0]),
+ nhrp_address_format(&packet->src_nbma_address,
+ sizeof(tmp[1]), tmp[1]),
+ nhrp_address_format(&packet->dst_protocol_address,
+ sizeof(tmp[2]), tmp[2]),
+ nhrp_address_format(&packet->dst_peer->next_hop_address,
+ sizeof(tmp[3]), tmp[3]));
+
+ size = marshall_packet(pdu, sizeof(pdu), packet);
+ if (size < 0) {
+ nhrp_error("Packet marshalling failed (r=%d)", size);
+ return FALSE;
+ }
+
+ if (!kernel_send(pdu, size, packet->dst_iface,
+ &packet->dst_peer->next_hop_address))
+ return FALSE;
+
+ return TRUE;
+}
+
+int nhrp_packet_route_and_send(struct nhrp_packet *packet)
+{
+ struct nhrp_payload *payload;
+
+ if (packet->dst_peer == NULL || packet->dst_iface == NULL) {
+ if (!nhrp_packet_route(packet)) {
+ nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ADDRESS_UNREACHABLE, 0);
+ return TRUE;
+ }
+ }
+
+ if (packet->src_nbma_address.addr_len == 0)
+ packet->src_nbma_address = packet->dst_peer->my_nbma_address;
+ if (packet->src_protocol_address.addr_len == 0)
+ packet->src_protocol_address = packet->dst_iface->protocol_address;
+ if (packet->hdr.afnum == AFNUM_RESERVED)
+ packet->hdr.afnum = packet->dst_peer->afnum;
+ if (packet->hdr.protocol_type == 0)
+ packet->hdr.protocol_type = packet->dst_peer->protocol_type;
+
+ /* RFC2332 5.3.1 */
+ payload = nhrp_packet_extension(
+ packet, NHRP_EXTENSION_RESPONDER_ADDRESS |
+ NHRP_EXTENSION_FLAG_COMPULSORY | NHRP_EXTENSION_FLAG_NOCREATE,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+ if (packet_types[packet->hdr.type].type == NHRP_TYPE_REPLY &&
+ (payload != NULL && list_empty(&payload->u.cie_list))) {
+ struct nhrp_cie *cie;
+
+ cie = nhrp_cie_alloc();
+ if (cie == NULL)
+ return FALSE;
+
+ cie->hdr.holding_time = htons(packet->dst_iface->holding_time);
+ cie->nbma_address = packet->dst_peer->my_nbma_address;
+ cie->protocol_address = packet->dst_iface->protocol_address;
+ nhrp_payload_set_type(payload, NHRP_PAYLOAD_TYPE_CIE_LIST);
+ nhrp_payload_add_cie(payload, cie);
+ }
+
+ /* RFC2332 5.3.4 - Authentication is always done pairwise on an NHRP
+ * hop-by-hop basis; i.e. regenerated at each hop. */
+ payload = nhrp_packet_extension(packet,
+ NHRP_EXTENSION_AUTHENTICATION |
+ NHRP_EXTENSION_FLAG_COMPULSORY,
+ NHRP_PAYLOAD_TYPE_RAW);
+ nhrp_payload_free(payload);
+ if (packet->dst_iface->auth_token != NULL)
+ nhrp_payload_set_raw(payload,
+ nhrp_buffer_copy(packet->dst_iface->auth_token));
+
+ if (packet->dst_peer->type == NHRP_PEER_TYPE_LOCAL_ADDR) {
+ packet->src_iface = packet->dst_peer->interface;
+ return nhrp_packet_receive_local(packet);
+ }
+
+ if (packet->dst_peer->flags & (NHRP_PEER_FLAG_UP |
+ NHRP_PEER_FLAG_LOWER_UP))
+ return nhrp_packet_marshall_and_send(packet);
+
+ if (packet->dst_peer->queued_packet != NULL)
+ nhrp_packet_put(packet->dst_peer->queued_packet);
+ packet->dst_peer->queued_packet = nhrp_packet_get(packet);
+
+ return TRUE;
+}
+
+int nhrp_packet_send(struct nhrp_packet *packet)
+{
+ struct nhrp_payload *payload;
+ struct nhrp_cie *cie;
+
+ if (packet->dst_iface == NULL) {
+ if (!nhrp_packet_route(packet)) {
+ nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ADDRESS_UNREACHABLE, 0);
+ return TRUE;
+ }
+ }
+
+ /* Cisco NAT extension CIE */
+ if (packet_types[packet->hdr.type].type != NHRP_TYPE_INDICATION &&
+ (packet->hdr.flags & NHRP_FLAG_REGISTRATION_NAT)) {
+ payload = nhrp_packet_extension(packet, NHRP_EXTENSION_NAT_ADDRESS,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+
+ if (packet->dst_iface->nat_cie.nbma_address.addr_len &&
+ payload != NULL && list_empty(&payload->u.cie_list)) {
+ cie = nhrp_cie_alloc();
+ if (cie != NULL) {
+ *cie = packet->dst_iface->nat_cie;
+ nhrp_cie_reset(cie);
+ nhrp_payload_add_cie(payload, cie);
+ }
+ }
+ }
+
+ return nhrp_packet_route_and_send(packet);
+}
+
+static void nhrp_packet_xmit_timeout_cb(struct ev_timer *w, int revents)
+{
+ struct nhrp_packet *packet =
+ container_of(w, struct nhrp_packet, timeout);
+
+ list_del(&packet->request_list_entry);
+
+ if (packet->dst_peer != NULL &&
+ ++packet->retry < PACKET_RETRIES) {
+ nhrp_packet_marshall_and_send(packet);
+
+ list_add(&packet->request_list_entry, &pending_requests);
+ } else {
+ ev_timer_stop(&packet->timeout);
+ if (packet->dst_peer == NULL)
+ nhrp_error("nhrp_packet_xmit_timeout: no destination peer!");
+ if (packet->handler != NULL)
+ packet->handler(packet->handler_ctx, NULL);
+ nhrp_packet_dequeue(packet);
+ }
+}
+
+int nhrp_packet_send_request(struct nhrp_packet *pkt,
+ void (*handler)(void *ctx, struct nhrp_packet *packet),
+ void *ctx)
+{
+ struct nhrp_packet *packet;
+
+ packet = nhrp_packet_get(pkt);
+
+ packet->retry = 0;
+ if (packet->hdr.u.request_id == constant_htonl(0)) {
+ request_id++;
+ packet->hdr.u.request_id = htonl(request_id);
+ }
+
+ packet->handler = handler;
+ packet->handler_ctx = ctx;
+ list_add(&packet->request_list_entry, &pending_requests);
+ ev_timer_again(&packet->timeout);
+
+ return nhrp_packet_send(packet);
+}
+
+int nhrp_packet_send_error(struct nhrp_packet *error_packet,
+ uint16_t indication_code, uint16_t offset)
+{
+ struct nhrp_packet *p;
+ struct nhrp_payload *pl;
+ int r;
+
+ /* RFC2332 5.2.7 Never generate errors about errors */
+ if (error_packet->hdr.type == NHRP_PACKET_ERROR_INDICATION)
+ return TRUE;
+
+ p = nhrp_packet_alloc();
+ p->hdr = error_packet->hdr;
+ p->hdr.type = NHRP_PACKET_ERROR_INDICATION;
+ p->hdr.hop_count = 0;
+ p->hdr.u.error.code = indication_code;
+ p->hdr.u.error.offset = htons(offset);
+ p->dst_iface = error_packet->src_iface;
+
+ if (packet_types[error_packet->hdr.type].type == NHRP_TYPE_REPLY)
+ p->dst_protocol_address = error_packet->dst_protocol_address;
+ else
+ p->dst_protocol_address = error_packet->src_protocol_address;
+
+ pl = nhrp_packet_payload(p, NHRP_PAYLOAD_TYPE_RAW);
+ pl->u.raw = nhrp_buffer_alloc(error_packet->req_pdulen);
+ memcpy(pl->u.raw->data, error_packet->req_pdu, error_packet->req_pdulen);
+
+ /* Standard extensions */
+ nhrp_packet_extension(p,
+ NHRP_EXTENSION_FORWARD_TRANSIT_NHS |
+ NHRP_EXTENSION_FLAG_COMPULSORY,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+
+ if (p->dst_protocol_address.type == PF_UNSPEC)
+ r = nhrp_do_handle_error_indication(p, error_packet);
+ else
+ r = nhrp_packet_send(p);
+
+ nhrp_packet_put(p);
+
+ return r;
+}
+
+int nhrp_packet_send_traffic(struct nhrp_interface *iface,
+ struct nhrp_address *nbma_src,
+ struct nhrp_address *protocol_src,
+ struct nhrp_address *protocol_dst,
+ int protocol_type, uint8_t *pdu, size_t pdulen)
+{
+ struct nhrp_rate_limit *rl;
+ struct nhrp_packet *p;
+ struct nhrp_payload *pl;
+ struct nhrp_peer *peer;
+ char tmp1[64], tmp2[64], tmp3[64], tmp4[64];
+ int r;
+
+ if (!(iface->flags & NHRP_INTERFACE_FLAG_REDIRECT))
+ return FALSE;
+
+ /* Are we serving the NBMA source */
+ peer = nhrp_interface_find_peer(iface, nbma_src);
+ if (peer == NULL || peer->type != NHRP_PEER_TYPE_DYNAMIC)
+ return FALSE;
+
+ rl = get_rate_limit(protocol_src, protocol_dst);
+ if (rl == NULL)
+ return FALSE;
+
+ /* If silence period has elapsed, reset algorithm */
+ if (ev_now() > rl->rate_last + RATE_LIMIT_SILENCE)
+ rl->rate_tokens = 0;
+
+ /* Too many ignored redirects; just update time of last packet */
+ if (rl->rate_tokens >= RATE_LIMIT_MAX_TOKENS) {
+ rl->rate_last = ev_now();
+ return FALSE;
+ }
+
+ /* Check for load limit; set rate_last to last sent redirect */
+ if (rl->rate_tokens != 0 &&
+ ev_now() < rl->rate_last + RATE_LIMIT_SEND_INTERVAL)
+ return FALSE;
+
+ rl->rate_tokens++;
+ rl->rate_last = ev_now();
+
+ p = nhrp_packet_alloc();
+ p->hdr = (struct nhrp_packet_header) {
+ .protocol_type = protocol_type,
+ .version = NHRP_VERSION_RFC2332,
+ .type = NHRP_PACKET_TRAFFIC_INDICATION,
+ .hop_count = 0,
+ };
+ p->dst_protocol_address = *protocol_src;
+
+ pl = nhrp_packet_payload(p, NHRP_PAYLOAD_TYPE_RAW);
+ pl->u.raw = nhrp_buffer_alloc(pdulen);
+ memcpy(pl->u.raw->data, pdu, pdulen);
+
+ /* Standard extensions */
+ nhrp_packet_extension(p,
+ NHRP_EXTENSION_FORWARD_TRANSIT_NHS |
+ NHRP_EXTENSION_FLAG_COMPULSORY,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+
+ nhrp_info("Sending Traffic Indication about packet from %s to %s (to %s/%s)",
+ nhrp_address_format(protocol_src, sizeof(tmp1), tmp1),
+ nhrp_address_format(protocol_dst, sizeof(tmp2), tmp2),
+ nhrp_address_format(&peer->protocol_address, sizeof(tmp3), tmp3),
+ nhrp_address_format(&peer->next_hop_address, sizeof(tmp4), tmp4));
+
+ p->dst_iface = iface;
+ p->dst_peer = nhrp_peer_get(peer);
+ r = nhrp_packet_send(p);
+ nhrp_packet_put(p);
+
+ return r;
+}
+
+void nhrp_packet_hook_request(int request,
+ int (*handler)(struct nhrp_packet *packet))
+{
+ NHRP_BUG_ON(request < 0 || request >= ARRAY_SIZE(packet_types));
+ NHRP_BUG_ON(packet_types[request].handler != NULL);
+
+ packet_types[request].handler = handler;
+}
diff --git a/nhrp/nhrp_packet.h b/nhrp/nhrp_packet.h
new file mode 100644
index 0000000..3f435c8
--- /dev/null
+++ b/nhrp/nhrp_packet.h
@@ -0,0 +1,128 @@
+/* nhrp_packet.h - In-memory NHRP packet definitions
+ *
+ * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#ifndef NHRP_PACKET_H
+#define NHRP_PACKET_H
+
+#include "libev.h"
+#include "list.h"
+#include "nhrp_protocol.h"
+#include "nhrp_address.h"
+
+#define NHRP_MAX_EXTENSIONS 10
+
+#define NHRP_PACKET_DEFAULT_HOP_COUNT 16
+
+struct nhrp_interface;
+
+struct nhrp_buffer {
+ uint32_t length;
+ uint8_t data[NHRP_EMPTY_ARRAY];
+};
+
+struct nhrp_cie {
+ struct list_head cie_list_entry;
+ struct nhrp_cie_header hdr;
+ struct nhrp_address nbma_address;
+ struct nhrp_address protocol_address;
+};
+
+#define NHRP_PAYLOAD_TYPE_ANY -1
+#define NHRP_PAYLOAD_TYPE_NONE 0
+#define NHRP_PAYLOAD_TYPE_RAW 1
+#define NHRP_PAYLOAD_TYPE_CIE_LIST 2
+
+struct nhrp_payload {
+ uint16_t extension_type;
+ uint16_t payload_type;
+ union {
+ struct nhrp_buffer *raw;
+ struct list_head cie_list;
+ } u;
+};
+
+struct nhrp_packet {
+ int ref;
+
+ struct nhrp_packet_header hdr;
+ struct nhrp_address src_nbma_address;
+ struct nhrp_address src_protocol_address;
+ struct nhrp_address dst_protocol_address;
+
+ int num_extensions;
+ struct nhrp_payload extension_by_order[NHRP_MAX_EXTENSIONS];
+ struct nhrp_payload * extension_by_type[NHRP_MAX_EXTENSIONS];
+
+ struct list_head request_list_entry;
+ struct ev_timer timeout;
+ void (*handler)(void *ctx, struct nhrp_packet *packet);
+ void * handler_ctx;
+ int retry;
+
+ uint8_t * req_pdu;
+ size_t req_pdulen;
+
+ struct nhrp_interface * src_iface;
+ struct nhrp_address src_linklayer_address;
+ struct nhrp_interface * dst_iface;
+ struct nhrp_peer * dst_peer;
+};
+
+#define NHRP_EXTENSION_FLAG_NOCREATE 0x00010000
+
+int nhrp_rate_limit_clear(struct nhrp_address *addr, int prefix_len);
+
+struct nhrp_buffer *nhrp_buffer_alloc(uint32_t size);
+struct nhrp_buffer *nhrp_buffer_copy(struct nhrp_buffer *buffer);
+int nhrp_buffer_cmp(struct nhrp_buffer *a, struct nhrp_buffer *b);
+void nhrp_buffer_free(struct nhrp_buffer *buffer);
+
+struct nhrp_cie *nhrp_cie_alloc(void);
+void nhrp_cie_free(struct nhrp_cie *cie);
+void nhrp_cie_reset(struct nhrp_cie *cie);
+
+void nhrp_payload_set_type(struct nhrp_payload *payload, int type);
+void nhrp_payload_set_raw(struct nhrp_payload *payload, struct nhrp_buffer *buf);
+void nhrp_payload_add_cie(struct nhrp_payload *payload, struct nhrp_cie *cie);
+struct nhrp_cie *nhrp_payload_get_cie(struct nhrp_payload *payload, int index);
+void nhrp_payload_free(struct nhrp_payload *payload);
+
+struct nhrp_packet *nhrp_packet_alloc(void);
+struct nhrp_packet *nhrp_packet_get(struct nhrp_packet *packet);
+void nhrp_packet_put(struct nhrp_packet *packet);
+
+struct nhrp_payload *nhrp_packet_payload(struct nhrp_packet *packet, int payload_type);
+struct nhrp_payload *nhrp_packet_extension(struct nhrp_packet *packet,
+ uint32_t extension, int payload_type);
+int nhrp_packet_receive(uint8_t *pdu, size_t pdulen,
+ struct nhrp_interface *iface,
+ struct nhrp_address *from);
+int nhrp_packet_route(struct nhrp_packet *packet);
+int nhrp_packet_reroute(struct nhrp_packet *packet, struct nhrp_peer *dst_peer);
+int nhrp_packet_marshall_and_send(struct nhrp_packet *packet);
+int nhrp_packet_route_and_send(struct nhrp_packet *packet);
+int nhrp_packet_send(struct nhrp_packet *packet);
+int nhrp_packet_send_request(struct nhrp_packet *packet,
+ void (*handler)(void *ctx, struct nhrp_packet *packet),
+ void *ctx);
+int nhrp_packet_send_error(struct nhrp_packet *error_packet,
+ uint16_t indication_code, uint16_t offset);
+int nhrp_packet_send_traffic(struct nhrp_interface *iface,
+ struct nhrp_address *nbma_src,
+ struct nhrp_address *protocol_src,
+ struct nhrp_address *protocol_dst,
+ int protocol_type, uint8_t *pdu, size_t pdulen);
+
+void nhrp_packet_hook_request(int request,
+ int (*handler)(struct nhrp_packet *packet));
+
+#endif
diff --git a/nhrp/nhrp_peer.c b/nhrp/nhrp_peer.c
new file mode 100644
index 0000000..c53d4c4
--- /dev/null
+++ b/nhrp/nhrp_peer.c
@@ -0,0 +1,2106 @@
+/* nhrp_peer.c - NHRP peer cache implementation
+ *
+ * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include "nhrp_common.h"
+#include "nhrp_peer.h"
+#include "nhrp_interface.h"
+
+#define NHRP_PEER_FORMAT_LEN 128
+
+#define NHRP_SCRIPT_TIMEOUT (2*60)
+#define NHRP_NEGATIVE_CACHE_TIME (3*60)
+#define NHRP_EXPIRY_TIME (5*60)
+
+#define NHRP_HOLDING_TIME_DIVISOR 3 /* See RFC-2332 5.2.3 */
+
+#define NHRP_RETRY_REGISTER_TIME (30 + random()/(RAND_MAX/60))
+#define NHRP_RETRY_ERROR_TIME (60 + random()/(RAND_MAX/120))
+
+#define NHRP_PEER_FLAG_PRUNE_PENDING 0x00010000
+
+const char * const nhrp_peer_type[] = {
+ [NHRP_PEER_TYPE_INCOMPLETE] = "incomplete",
+ [NHRP_PEER_TYPE_NEGATIVE] = "negative",
+ [NHRP_PEER_TYPE_CACHED] = "cached",
+ [NHRP_PEER_TYPE_SHORTCUT_ROUTE] = "shortcut-route",
+ [NHRP_PEER_TYPE_DYNAMIC] = "dynamic",
+ [NHRP_PEER_TYPE_DYNAMIC_NHS] = "dynamic-nhs",
+ [NHRP_PEER_TYPE_STATIC] = "static",
+ [NHRP_PEER_TYPE_STATIC_DNS] = "dynamic-map",
+ [NHRP_PEER_TYPE_LOCAL_ROUTE] = "local-route",
+ [NHRP_PEER_TYPE_LOCAL_ADDR] = "local",
+};
+
+static int nhrp_peer_num_total = 0;
+static struct list_head local_peer_list = LIST_INITIALIZER(local_peer_list);
+
+/* Peer entrys life, pending callbacks and their call order are listed
+ * here.
+ *
+ * Generally everything starts from nhrp_peer_insert() call which schedules
+ * (during startup) or directly invokes nhrp_peer_insert_cb().
+ *
+ * INCOMPLETE:
+ * 1. nhrp_peer_insert_cb: send resolution request
+ * 2. nhrp_peer_handle_resolution_reply: entry deleted or reinserted NEGATIVE
+ *
+ * NEGATIVE:
+ * 1. nhrp_peer_insert_cb: schedule task remove
+ *
+ * CACHED, STATIC, DYNAMIC, DYNAMIC_NHS:
+ * 1. nhrp_peer_insert_cb: calls nhrp_peer_restart_cb
+ * 2. nhrp_peer_restart_cb: resolves dns name, or calls nhrp_run_up_script()
+ * 3. nhrp_peer_address_query_cb: calls nhrp_peer_run_up_script()
+ * 4. nhrp_peer_run_up_script: spawns script, or goes to nhrp_peer_lower_is_up()
+ * 5. nhrp_peer_script_peer_up_done: calls nhrp_peer_lower_is_up()
+ * 6. nhrp_peer_lower_is_up: sends registration, or goes to nhrp_peer_is_up()
+ * 7. nhrp_peer_handle_registration_reply:
+ * a. on success: calls nhrp_peer_is_up()
+ * b. on error reply: calls nhrp_peer_send_purge_protocol()
+ * nhrp_peer_handle_purge_protocol_reply: sends new registration
+ * 8. nhrp_peer_is_up: schedules re-register, expire or deletion
+ *
+ * ON EXPIRE:
+ * schedule remove
+ * nhrp_peer_renew is called if peer has USED flag set or becomes set,
+ * while the peer is expired
+ * ON RENEW: calls sends resolution request, schedule EXPIRE
+ *
+ * ON ERROR for CACHED: reinsert as NEGATIVE
+ * ON ERROR for STATIC: fork peer-down script (if was lower up)
+ * schedule task request link
+ * ON ERROR for DYNAMIC: fork peer-down script (if was lower up)
+ * delete peer
+ *
+ * SHORTCUT_ROUTE:
+ * 1. nhrp_peer_insert_cb: spawns route-up script, or schedules EXPIRE
+ *
+ * STATIC_DNS:
+ * 1. nhrp_peer_insert_cb: calls nhrp_peer_dnsmap_restart_cb
+ * 2. nhrp_peer_dnsmap_restart_cb: resolves dns name
+ * 3. nhrp_peer_dnsmap_query_cb: create new peer entries,
+ * renew existing and delete expired, schedule restart
+ *
+ * LOCAL:
+ * nothing, only netlink code modifies these
+ */
+
+static void nhrp_peer_reinsert(struct nhrp_peer *peer, int type);
+static void nhrp_peer_restart_cb(struct ev_timer *w, int revents);
+static void nhrp_peer_dnsmap_restart_cb(struct ev_timer *w, int revents);
+static void nhrp_peer_remove_cb(struct ev_timer *w, int revents);
+static void nhrp_peer_send_resolve(struct nhrp_peer *peer);
+static void nhrp_peer_send_register_cb(struct ev_timer *w, int revents);
+static void nhrp_peer_expire_cb(struct ev_timer *w, int revents);
+
+static const char *nhrp_error_indication_text(int ei)
+{
+ switch (ei) {
+ case -1:
+ return "timeout";
+ case NHRP_ERROR_UNRECOGNIZED_EXTENSION:
+ return "unrecognized extension";
+ case NHRP_ERROR_LOOP_DETECTED:
+ return "loop detected";
+ case NHRP_ERROR_PROTOCOL_ADDRESS_UNREACHABLE:
+ return "protocol address unreachable";
+ case NHRP_ERROR_PROTOCOL_ERROR:
+ return "protocol error";
+ case NHRP_ERROR_SDU_SIZE_EXCEEDED:
+ return "SDU size exceeded";
+ case NHRP_ERROR_INVALID_EXTENSION:
+ return "invalid extension";
+ case NHRP_ERROR_INVALID_RESOLUTION_REPLY:
+ return "unexpected resolution reply";
+ case NHRP_ERROR_AUTHENTICATION_FAILURE:
+ return "authentication failure";
+ case NHRP_ERROR_HOP_COUNT_EXCEEDED:
+ return "hop count exceeded";
+ }
+ return "unknown";
+}
+
+static const char *nhrp_cie_code_text(int ct)
+{
+ switch (ct) {
+ case NHRP_CODE_SUCCESS:
+ return "success";
+ case NHRP_CODE_ADMINISTRATIVELY_PROHIBITED:
+ return "administratively prohibited";
+ case NHRP_CODE_INSUFFICIENT_RESOURCES:
+ return "insufficient resources";
+ case NHRP_CODE_NO_BINDING_EXISTS:
+ return "no binding exists";
+ case NHRP_CODE_BINDING_NON_UNIQUE:
+ return "binding non-unique";
+ case NHRP_CODE_UNIQUE_ADDRESS_REGISTERED:
+ return "unique address already registered";
+ }
+ return "unknown";
+}
+
+static char *nhrp_peer_format_full(struct nhrp_peer *peer, size_t len,
+ char *buf, int full)
+{
+ char tmp[NHRP_PEER_FORMAT_LEN], *str;
+ int i = 0;
+
+ if (peer == NULL) {
+ snprintf(buf, len, "(null)");
+ return buf;
+ }
+
+ i += snprintf(&buf[i], len - i, "%s/%d",
+ nhrp_address_format(&peer->protocol_address, sizeof(tmp), tmp),
+ peer->prefix_length);
+
+ if (peer->next_hop_address.type != PF_UNSPEC) {
+ switch (peer->type) {
+ case NHRP_PEER_TYPE_SHORTCUT_ROUTE:
+ case NHRP_PEER_TYPE_LOCAL_ROUTE:
+ str = "nexthop";
+ break;
+ case NHRP_PEER_TYPE_LOCAL_ADDR:
+ str = "alias";
+ break;
+ default:
+ str = "nbma";
+ break;
+ }
+ i += snprintf(&buf[i], len - i, " %s %s",
+ str,
+ nhrp_address_format(&peer->next_hop_address,
+ sizeof(tmp), tmp));
+ }
+ if (peer->nbma_hostname != NULL) {
+ i += snprintf(&buf[i], len - i, " hostname %s",
+ peer->nbma_hostname);
+ }
+ if (peer->next_hop_nat_oa.type != PF_UNSPEC) {
+ i += snprintf(&buf[i], len - i, " nbma-nat-oa %s",
+ nhrp_address_format(&peer->next_hop_nat_oa,
+ sizeof(tmp), tmp));
+ }
+ if (peer->interface != NULL)
+ i += snprintf(&buf[i], len - i, " dev %s",
+ peer->interface->name);
+ if (peer->mtu)
+ i += snprintf(&buf[i], len - i, " mtu %d", peer->mtu);
+
+ if (!full)
+ return buf;
+
+ if (peer->flags & NHRP_PEER_FLAG_USED)
+ i += snprintf(&buf[i], len - i, " used");
+ if (peer->flags & NHRP_PEER_FLAG_UNIQUE)
+ i += snprintf(&buf[i], len - i, " unique");
+ if (peer->flags & NHRP_PEER_FLAG_UP)
+ i += snprintf(&buf[i], len - i, " up");
+ else if (peer->flags & NHRP_PEER_FLAG_LOWER_UP)
+ i += snprintf(&buf[i], len - i, " lower-up");
+ if (peer->expire_time != 0.0) {
+ int rel;
+
+ rel = peer->expire_time - ev_now();
+ if (rel >= 0) {
+ i += snprintf(&buf[i], len - i, " expires_in %d:%02d",
+ rel / 60, rel % 60);
+ } else {
+ i += snprintf(&buf[i], len - i, " expired");
+ }
+ }
+ if (peer->flags & NHRP_PEER_FLAG_PRUNE_PENDING)
+ i += snprintf(&buf[i], len - i, " dying");
+
+ return buf;
+}
+
+static inline char *nhrp_peer_format(struct nhrp_peer *peer,
+ size_t len, char *buf)
+{
+ return nhrp_peer_format_full(peer, len, buf, TRUE);
+}
+
+static inline void nhrp_peer_debug_refcount(const char *func,
+ struct nhrp_peer *peer)
+{
+#if 0
+ char tmp[NHRP_PEER_FORMAT_LEN];
+ nhrp_debug("%s(%s %s) ref=%d",
+ func, nhrp_peer_type[peer->type],
+ nhrp_peer_format(peer, sizeof(tmp), tmp),
+ peer->ref);
+#endif
+}
+
+static void nhrp_peer_resolve_nbma(struct nhrp_peer *peer)
+{
+ char tmp[64];
+ int r;
+
+ if (peer->interface->nbma_address.type == PF_UNSPEC) {
+ r = kernel_route(NULL, &peer->next_hop_address,
+ &peer->my_nbma_address, NULL,
+ &peer->my_nbma_mtu);
+ if (!r) {
+ nhrp_error("No route to next hop address %s",
+ nhrp_address_format(&peer->next_hop_address,
+ sizeof(tmp), tmp));
+ }
+ } else {
+ peer->my_nbma_address = peer->interface->nbma_address;
+ peer->my_nbma_mtu = peer->interface->nbma_mtu;
+ }
+}
+
+static char *env(const char *key, const char *value)
+{
+ char *buf;
+ buf = malloc(strlen(key)+strlen(value)+2);
+ if (buf == NULL)
+ return NULL;
+ sprintf(buf, "%s=%s", key, value);
+ return buf;
+}
+
+static char *envu32(const char *key, uint32_t value)
+{
+ char *buf;
+ buf = malloc(strlen(key)+16);
+ if (buf == NULL)
+ return NULL;
+ sprintf(buf, "%s=%u", key, value);
+ return buf;
+}
+
+int nhrp_peer_event_ok(union nhrp_peer_event e, int revents)
+{
+ int status;
+
+ if (revents == 0)
+ return TRUE;
+ if (!(revents & EV_CHILD))
+ return FALSE;
+ status = e.child->rstatus;
+ if (WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ return TRUE;
+ return FALSE;
+}
+
+char *nhrp_peer_event_reason(union nhrp_peer_event e, int revents,
+ size_t buflen, char *buf)
+{
+ int status;
+
+ if (revents & EV_CHILD) {
+ status = e.child->rstatus;
+ if (WIFEXITED(status))
+ snprintf(buf, buflen, "exitstatus %d",
+ WEXITSTATUS(status));
+ else if (WIFSIGNALED(status))
+ snprintf(buf, buflen, "signal %d",
+ WTERMSIG(status));
+ else
+ snprintf(buf, buflen, "rstatus %d", status);
+ } else if (revents & EV_TIMEOUT) {
+ snprintf(buf, buflen, "timeout");
+ } else if (revents == 0) {
+ snprintf(buf, buflen, "success");
+ } else {
+ snprintf(buf, buflen, "unknown, revents=%x", revents);
+ }
+ return buf;
+}
+
+struct nhrp_peer *nhrp_peer_from_event(union nhrp_peer_event e, int revents)
+{
+ struct nhrp_peer *peer;
+
+ if (revents & EV_CHILD) {
+ peer = container_of(e.child, struct nhrp_peer, child);
+ } else if (revents & EV_TIMEOUT) {
+ peer = container_of(e.timer, struct nhrp_peer, timer);
+ } else {
+ NHRP_BUG_ON(revents != 0);
+ peer = container_of(e.child, struct nhrp_peer, child);
+ }
+
+ ev_child_stop(&peer->child);
+ ev_timer_stop(&peer->timer);
+
+ return peer;
+}
+
+void nhrp_peer_run_script(struct nhrp_peer *peer, char *action,
+ void (*cb)(union nhrp_peer_event, int))
+{
+ struct nhrp_interface *iface = peer->interface;
+ const char *argv[] = { nhrp_script_file, action, NULL };
+ char *envp[32];
+ char tmp[64];
+ pid_t pid;
+ int i = 0;
+
+ /* Resolve own NBMA address before forking if required
+ * since it requires traversing peer cache and can trigger
+ * logging and other stuff. */
+ if (peer->my_nbma_address.type == PF_UNSPEC)
+ nhrp_peer_resolve_nbma(peer);
+
+ /* Fork and execute script */
+ pid = fork();
+ if (pid == -1) {
+ if (cb != NULL)
+ cb(&peer->child, EV_CHILD | EV_ERROR);
+ return;
+ } else if (pid > 0) {
+ if (cb != NULL) {
+ ev_child_stop(&peer->child);
+ ev_child_init(&peer->child, cb, pid, 0);
+ ev_child_start(&peer->child);
+
+ ev_set_cb(&peer->timer, cb);
+ peer->timer.repeat = NHRP_SCRIPT_TIMEOUT;
+ ev_timer_again(&peer->timer);
+ }
+ return;
+ }
+
+ envp[i++] = env("NHRP_TYPE", nhrp_peer_type[peer->type]);
+ if (iface->protocol_address.type != PF_UNSPEC)
+ envp[i++] = env("NHRP_SRCADDR",
+ nhrp_address_format(&iface->protocol_address,
+ sizeof(tmp), tmp));
+ if (peer->my_nbma_address.type != PF_UNSPEC)
+ envp[i++] = env("NHRP_SRCNBMA",
+ nhrp_address_format(&peer->my_nbma_address,
+ sizeof(tmp), tmp));
+ envp[i++] = env("NHRP_DESTADDR",
+ nhrp_address_format(&peer->protocol_address,
+ sizeof(tmp), tmp));
+ envp[i++] = envu32("NHRP_DESTPREFIX", peer->prefix_length);
+
+ if (peer->purge_reason)
+ envp[i++] = env("NHRP_PEER_DOWN_REASON", peer->purge_reason);
+
+ switch (peer->type) {
+ case NHRP_PEER_TYPE_CACHED:
+ case NHRP_PEER_TYPE_LOCAL_ADDR:
+ case NHRP_PEER_TYPE_STATIC:
+ case NHRP_PEER_TYPE_DYNAMIC:
+ case NHRP_PEER_TYPE_DYNAMIC_NHS:
+ envp[i++] = env("NHRP_DESTNBMA",
+ nhrp_address_format(&peer->next_hop_address,
+ sizeof(tmp), tmp));
+ if (peer->mtu)
+ envp[i++] = envu32("NHRP_DESTMTU", peer->mtu);
+ if (peer->next_hop_nat_oa.type != PF_UNSPEC)
+ envp[i++] = env("NHRP_DESTNBMA_NAT_OA",
+ nhrp_address_format(&peer->next_hop_nat_oa,
+ sizeof(tmp), tmp));
+ break;
+ case NHRP_PEER_TYPE_SHORTCUT_ROUTE:
+ case NHRP_PEER_TYPE_LOCAL_ROUTE:
+ envp[i++] = env("NHRP_NEXTHOP",
+ nhrp_address_format(&peer->next_hop_address,
+ sizeof(tmp), tmp));
+ break;
+ default:
+ NHRP_BUG_ON("invalid peer type");
+ }
+ envp[i++] = env("NHRP_INTERFACE", peer->interface->name);
+ envp[i++] = envu32("NHRP_GRE_KEY", peer->interface->gre_key);
+ envp[i++] = NULL;
+
+ execve(nhrp_script_file, (char **) argv, envp);
+ exit(1);
+}
+
+void nhrp_peer_cancel_async(struct nhrp_peer *peer)
+{
+ if (peer->queued_packet) {
+ nhrp_packet_put(peer->queued_packet);
+ peer->queued_packet = NULL;
+ }
+ if (peer->request) {
+ nhrp_server_finish_request(peer->request);
+ peer->request = NULL;
+ }
+
+ nhrp_address_resolve_cancel(&peer->address_query);
+ ev_timer_stop(&peer->timer);
+ if (ev_is_active(&peer->child)) {
+ kill(SIGINT, peer->child.pid);
+ ev_child_stop(&peer->child);
+ }
+}
+
+void nhrp_peer_send_packet_queue(struct nhrp_peer *peer)
+{
+ if (peer->queued_packet == NULL)
+ return;
+
+ nhrp_packet_marshall_and_send(peer->queued_packet);
+ nhrp_packet_put(peer->queued_packet);
+ peer->queued_packet = NULL;
+}
+
+static void nhrp_peer_schedule(struct nhrp_peer *peer, ev_tstamp timeout,
+ void (*cb)(struct ev_timer *w, int revents))
+{
+ ev_timer_stop(&peer->timer);
+ ev_timer_init(&peer->timer, cb, timeout, 0.);
+ ev_timer_start(&peer->timer);
+}
+
+static void nhrp_peer_restart_error(struct nhrp_peer *peer)
+{
+ switch (peer->type) {
+ case NHRP_PEER_TYPE_STATIC:
+ case NHRP_PEER_TYPE_DYNAMIC_NHS:
+ nhrp_peer_schedule(peer, NHRP_RETRY_ERROR_TIME,
+ nhrp_peer_restart_cb);
+ break;
+ default:
+ nhrp_peer_reinsert(peer, NHRP_PEER_TYPE_NEGATIVE);
+ break;
+ }
+}
+
+static void nhrp_peer_script_route_up_done(union nhrp_peer_event e, int revents)
+{
+ struct nhrp_peer *peer = nhrp_peer_from_event(e, revents);
+ char tmp[64], reason[32];
+
+ if (nhrp_peer_event_ok(e, revents)) {
+ if (revents)
+ nhrp_debug("[%s] Route up script: success",
+ nhrp_address_format(&peer->protocol_address,
+ sizeof(tmp), tmp));
+
+ peer->flags |= NHRP_PEER_FLAG_UP;
+ nhrp_peer_schedule(peer, peer->expire_time - NHRP_EXPIRY_TIME
+ - 10 - ev_now(), nhrp_peer_expire_cb);
+ } else {
+ nhrp_info("[%s] Route up script: %s; "
+ "adding negative cached entry",
+ nhrp_address_format(&peer->protocol_address,
+ sizeof(tmp), tmp),
+ nhrp_peer_event_reason(e, revents,
+ sizeof(reason), reason));
+
+ nhrp_peer_reinsert(peer, NHRP_PEER_TYPE_NEGATIVE);
+ }
+}
+
+static int nhrp_peer_routes_up(void *ctx, struct nhrp_peer *peer)
+{
+ if (!(peer->flags & NHRP_PEER_FLAG_UP))
+ nhrp_peer_run_script(peer, "route-up",
+ nhrp_peer_script_route_up_done);
+
+ return 0;
+}
+
+static int nhrp_peer_routes_renew(void *ctx, struct nhrp_peer *peer)
+{
+ int *num_routes = (int *) ctx;
+
+ if (peer->flags & NHRP_PEER_FLAG_PRUNE_PENDING) {
+ peer->flags &= ~NHRP_PEER_FLAG_PRUNE_PENDING;
+ nhrp_peer_cancel_async(peer);
+ nhrp_peer_send_resolve(peer);
+ (*num_routes)++;
+ }
+
+ return 0;
+}
+
+static void nhrp_peer_renew(struct nhrp_peer *peer)
+{
+ struct nhrp_interface *iface = peer->interface;
+ struct nhrp_peer_selector sel;
+ int num_routes = 0;
+
+ /* Renew the cached information: all related routes
+ * or the peer itself */
+ if (peer->type != NHRP_PEER_TYPE_SHORTCUT_ROUTE) {
+ memset(&sel, 0, sizeof(sel));
+ sel.flags = NHRP_PEER_FIND_UP;
+ sel.type_mask = BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE);
+ sel.interface = iface;
+ sel.next_hop_address = peer->protocol_address;
+ nhrp_peer_foreach(nhrp_peer_routes_renew, &num_routes, &sel);
+ }
+
+ if (peer->flags & NHRP_PEER_FLAG_PRUNE_PENDING) {
+ peer->flags &= ~NHRP_PEER_FLAG_PRUNE_PENDING;
+ nhrp_peer_cancel_async(peer);
+ nhrp_peer_send_resolve(peer);
+ }
+}
+
+static int is_used(void *ctx, struct nhrp_peer *peer)
+{
+ if (peer->flags & NHRP_PEER_FLAG_USED)
+ return 1;
+
+ return 0;
+}
+
+static void nhrp_peer_expire_cb(struct ev_timer *w, int revents)
+{
+ struct nhrp_peer *peer = container_of(w, struct nhrp_peer, timer);
+ struct nhrp_peer_selector sel;
+ int used;
+
+ peer->flags |= NHRP_PEER_FLAG_PRUNE_PENDING;
+ nhrp_peer_schedule(peer, peer->expire_time - ev_now(),
+ nhrp_peer_remove_cb);
+
+ if (peer->type == NHRP_PEER_TYPE_SHORTCUT_ROUTE) {
+ memset(&sel, 0, sizeof(sel));
+ sel.interface = peer->interface;
+ sel.protocol_address = peer->next_hop_address;
+ used = nhrp_peer_foreach(is_used, NULL, &sel);
+ } else
+ used = peer->flags & NHRP_PEER_FLAG_USED;
+
+ if (used)
+ nhrp_peer_renew(peer);
+}
+
+static void nhrp_peer_is_down(struct nhrp_peer *peer)
+{
+ struct nhrp_peer_selector sel;
+
+ /* Remove UP flags if not being removed permanently, so futher
+ * lookups are valid */
+ if (!(peer->flags & NHRP_PEER_FLAG_REMOVED))
+ peer->flags &= ~(NHRP_PEER_FLAG_LOWER_UP | NHRP_PEER_FLAG_UP);
+
+ /* Check if there are routes using this peer as next-hop */
+ if (peer->type != NHRP_PEER_TYPE_SHORTCUT_ROUTE) {
+ memset(&sel, 0, sizeof(sel));
+ sel.type_mask = BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE);
+ sel.interface = peer->interface;
+ sel.next_hop_address = peer->protocol_address;
+ nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel);
+ }
+
+ /* Remove from lists */
+ if (list_hashed(&peer->mcast_list_entry))
+ list_del(&peer->mcast_list_entry);
+ if (hlist_hashed(&peer->nbma_hash_entry))
+ hlist_del(&peer->nbma_hash_entry);
+}
+
+static void nhrp_peer_is_up(struct nhrp_peer *peer)
+{
+ struct nhrp_interface *iface = peer->interface;
+ struct nhrp_peer_selector sel;
+ int mcast = 0, i;
+ char tmp[64];
+
+ if ((peer->flags & (NHRP_PEER_FLAG_UP | NHRP_PEER_FLAG_REGISTER))
+ == NHRP_PEER_FLAG_REGISTER) {
+ /* First time registration reply received */
+ nhrp_peer_run_script(peer, "nhs-up", NULL);
+ }
+
+ /* Remove from mcast list if previously there */
+ if (list_hashed(&peer->mcast_list_entry))
+ list_del(&peer->mcast_list_entry);
+
+ /* Check if this one needs multicast traffic */
+ if (BIT(peer->type) & iface->mcast_mask) {
+ mcast = 1;
+ } else {
+ for (i = 0; i < iface->mcast_numaddr; i++) {
+ if (!nhrp_address_cmp(&peer->protocol_address,
+ &iface->mcast_addr[i])) {
+ mcast = 1;
+ break;
+ }
+ }
+ }
+
+ if (mcast) {
+ list_add(&peer->mcast_list_entry, &iface->mcast_list);
+ nhrp_info("[%s] Peer inserted to multicast list",
+ nhrp_address_format(&peer->protocol_address,
+ sizeof(tmp), tmp));
+ }
+
+ /* Searchable by NBMA */
+ if (hlist_hashed(&peer->nbma_hash_entry))
+ hlist_del(&peer->nbma_hash_entry);
+ if (BIT(peer->type) & (BIT(NHRP_PEER_TYPE_CACHED) |
+ BIT(NHRP_PEER_TYPE_DYNAMIC) |
+ BIT(NHRP_PEER_TYPE_DYNAMIC_NHS) |
+ BIT(NHRP_PEER_TYPE_STATIC))) {
+ i = nhrp_address_hash(&peer->next_hop_address) % NHRP_INTERFACE_NBMA_HASH_SIZE;
+ hlist_add_head(&peer->nbma_hash_entry, &iface->nbma_hash[i]);
+ }
+
+ peer->flags |= NHRP_PEER_FLAG_UP | NHRP_PEER_FLAG_LOWER_UP;
+
+ /* Check if there are routes using this peer as next-hop*/
+ if (peer->type != NHRP_PEER_TYPE_SHORTCUT_ROUTE) {
+ memset(&sel, 0, sizeof(sel));
+ sel.type_mask = BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE);
+ sel.interface = iface;
+ sel.next_hop_address = peer->protocol_address;
+ nhrp_peer_foreach(nhrp_peer_routes_up, NULL, &sel);
+ }
+
+ nhrp_peer_send_packet_queue(peer);
+
+ /* Schedule expiry or renewal */
+ switch (peer->type) {
+ case NHRP_PEER_TYPE_DYNAMIC:
+ nhrp_peer_schedule(peer, peer->expire_time - ev_now(),
+ nhrp_peer_remove_cb);
+ break;
+ case NHRP_PEER_TYPE_CACHED:
+ nhrp_peer_schedule(
+ peer,
+ peer->expire_time - NHRP_EXPIRY_TIME - ev_now(),
+ nhrp_peer_expire_cb);
+ break;
+ case NHRP_PEER_TYPE_STATIC:
+ case NHRP_PEER_TYPE_DYNAMIC_NHS:
+ if (peer->flags & NHRP_PEER_FLAG_REGISTER) {
+ nhrp_peer_schedule(
+ peer, iface->holding_time /
+ NHRP_HOLDING_TIME_DIVISOR + 1,
+ nhrp_peer_send_register_cb);
+ }
+ break;
+ default:
+ NHRP_BUG_ON("invalid peer type");
+ break;
+ }
+}
+
+static void nhrp_peer_lower_is_up(struct nhrp_peer *peer)
+{
+ peer->flags |= NHRP_PEER_FLAG_LOWER_UP;
+
+ if (peer->flags & NHRP_PEER_FLAG_REGISTER)
+ nhrp_peer_send_register_cb(&peer->timer, 0);
+ else
+ nhrp_peer_is_up(peer);
+}
+
+static void nhrp_peer_script_peer_up_done(union nhrp_peer_event e, int revents)
+{
+ struct nhrp_peer *peer = nhrp_peer_from_event(e, revents);
+ char tmp[64], reason[32];
+
+ if (nhrp_peer_event_ok(e, revents)) {
+ nhrp_debug("[%s] Peer up script: success",
+ nhrp_address_format(&peer->protocol_address,
+ sizeof(tmp), tmp));
+
+ kernel_inject_neighbor(&peer->protocol_address,
+ &peer->next_hop_address,
+ peer->interface);
+ nhrp_peer_lower_is_up(peer);
+ } else {
+ nhrp_error("[%s] Peer up script failed: %s",
+ nhrp_address_format(&peer->protocol_address,
+ sizeof(tmp), tmp),
+ nhrp_peer_event_reason(e, revents,
+ sizeof(reason), reason));
+ nhrp_peer_restart_error(peer);
+ }
+}
+
+static void nhrp_peer_run_up_script(struct nhrp_peer *peer)
+{
+ nhrp_peer_run_script(peer, "peer-up",
+ nhrp_peer_script_peer_up_done);
+}
+
+static void nhrp_peer_address_query_cb(struct nhrp_address_query *query,
+ int num_addr, struct nhrp_address *addrs)
+{
+ struct nhrp_peer *peer = container_of(query, struct nhrp_peer,
+ address_query);
+ char host[64];
+
+ if (num_addr > 0) {
+ nhrp_info("Resolved '%s' as %s",
+ peer->nbma_hostname,
+ nhrp_address_format(&addrs[0], sizeof(host), host));
+ peer->next_hop_address = addrs[0];
+ peer->afnum = nhrp_afnum_from_pf(peer->next_hop_address.type);
+ nhrp_peer_run_up_script(peer);
+ } else {
+ nhrp_error("Failed to resolve '%s'", peer->nbma_hostname);
+ nhrp_peer_restart_error(peer);
+ }
+}
+
+static void nhrp_peer_restart_cb(struct ev_timer *w, int revents)
+{
+ struct nhrp_peer *peer = container_of(w, struct nhrp_peer, timer);
+
+ if (peer->nbma_hostname != NULL) {
+ nhrp_address_resolve(&peer->address_query,
+ peer->nbma_hostname,
+ nhrp_peer_address_query_cb);
+ } else {
+ nhrp_peer_resolve_nbma(peer);
+
+ if (!(peer->flags & NHRP_PEER_FLAG_LOWER_UP))
+ nhrp_peer_run_up_script(peer);
+ else
+ nhrp_peer_script_peer_up_done(&peer->child, 0);
+ }
+}
+
+static void nhrp_peer_send_protocol_purge(struct nhrp_peer *peer)
+{
+ char tmp[64];
+ struct nhrp_packet *packet;
+ struct nhrp_cie *cie;
+ struct nhrp_payload *payload;
+ int sent = FALSE;
+
+ packet = nhrp_packet_alloc();
+ if (packet == NULL)
+ goto error;
+
+ packet->hdr = (struct nhrp_packet_header) {
+ .afnum = peer->afnum,
+ .protocol_type = peer->protocol_type,
+ .version = NHRP_VERSION_RFC2332,
+ .type = NHRP_PACKET_PURGE_REQUEST,
+ .hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT,
+ .flags = NHRP_FLAG_PURGE_NO_REPLY,
+ };
+ if (peer->flags & NHRP_PEER_FLAG_CISCO) {
+ /* Cisco IOS seems to require reqistration and purge
+ * request id to match, so we need to used a fixed
+ * value. This is in violation of RFC, though. */
+ packet->hdr.u.request_id =
+ nhrp_address_hash(&peer->interface->protocol_address);
+ }
+ packet->dst_protocol_address = peer->protocol_address;
+
+ /* Payload CIE */
+ cie = nhrp_cie_alloc();
+ if (cie == NULL)
+ goto error_free_packet;
+
+ *cie = (struct nhrp_cie) {
+ .hdr.code = NHRP_CODE_SUCCESS,
+ .hdr.mtu = 0,
+ .hdr.preference = 0,
+ .hdr.prefix_length = 0xff,
+ };
+ cie->protocol_address = peer->interface->protocol_address;
+
+ payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_CIE_LIST);
+ nhrp_payload_add_cie(payload, cie);
+
+ nhrp_info("Sending Purge Request (of protocol address) to %s",
+ nhrp_address_format(&peer->protocol_address,
+ sizeof(tmp), tmp));
+
+ packet->dst_peer = nhrp_peer_get(peer);
+ packet->dst_iface = peer->interface;
+ sent = nhrp_packet_send(packet);
+error_free_packet:
+ nhrp_packet_put(packet);
+error:
+ if (sent)
+ nhrp_peer_schedule(peer, 2, nhrp_peer_send_register_cb);
+ else
+ nhrp_peer_restart_error(peer);
+}
+
+static int nhrp_add_local_route_cie(void *ctx, struct nhrp_peer *route)
+{
+ struct nhrp_packet *packet = (struct nhrp_packet *) ctx;
+ struct nhrp_payload *payload;
+ struct nhrp_cie *cie;
+
+ if (route->interface != NULL &&
+ !(route->interface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST))
+ return 0;
+
+ cie = nhrp_cie_alloc();
+ if (cie == NULL)
+ return 0;
+
+ *cie = (struct nhrp_cie) {
+ .hdr.code = 0,
+ .hdr.prefix_length = route->prefix_length,
+ .protocol_address = route->protocol_address,
+ };
+
+ payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_CIE_LIST);
+ nhrp_payload_add_cie(payload, cie);
+
+ return 0;
+}
+
+int nhrp_peer_discover_nhs(struct nhrp_peer *peer,
+ struct nhrp_address *newaddr)
+{
+ struct nhrp_peer_selector sel;
+ char tmp[32], tmp2[32];
+
+ if (nhrp_address_cmp(&peer->protocol_address, newaddr) == 0)
+ return TRUE;
+
+ if (peer->type != NHRP_PEER_TYPE_DYNAMIC_NHS ||
+ !nhrp_address_is_network(&peer->protocol_address,
+ peer->prefix_length)) {
+ nhrp_error("Unexpected NHS protocol address change %s -> %s",
+ nhrp_address_format(&peer->protocol_address,
+ sizeof(tmp2), tmp2),
+ nhrp_address_format(newaddr, sizeof(tmp), tmp));
+ return FALSE;
+ }
+
+ if (nhrp_address_prefix_cmp(&peer->protocol_address, newaddr,
+ peer->prefix_length) != 0) {
+ nhrp_error("Protocol address change to %s is not within %s/%d",
+ nhrp_address_format(newaddr, sizeof(tmp), tmp),
+ nhrp_address_format(&peer->protocol_address,
+ sizeof(tmp2), tmp2),
+ peer->prefix_length);
+ return FALSE;
+ }
+
+ /* Remove incomplete/cached entries */
+ memset(&sel, 0, sizeof(sel));
+ sel.flags = NHRP_PEER_FIND_EXACT;
+ sel.type_mask = NHRP_PEER_TYPEMASK_REMOVABLE;
+ sel.interface = peer->interface;
+ sel.protocol_address = *newaddr;
+ nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel);
+
+ /* Update protocol address */
+ peer->protocol_address = *newaddr;
+
+ return TRUE;
+}
+
+static void nhrp_peer_handle_registration_reply(void *ctx,
+ struct nhrp_packet *reply)
+{
+ struct nhrp_peer *peer = (struct nhrp_peer *) ctx;
+ struct nhrp_payload *payload;
+ struct nhrp_cie *cie;
+ struct nhrp_packet *packet;
+ char tmp[NHRP_PEER_FORMAT_LEN];
+ int ec = -1;
+
+ if (peer->flags & NHRP_PEER_FLAG_REMOVED)
+ goto ret;
+
+ if (reply == NULL ||
+ reply->hdr.type != NHRP_PACKET_REGISTRATION_REPLY) {
+ ec = reply ? reply->hdr.u.error.code : -1;
+ nhrp_info("Failed to register to %s: %s (%d)",
+ nhrp_address_format(&peer->protocol_address,
+ sizeof(tmp), tmp),
+ nhrp_error_indication_text(ec), ntohs(ec));
+
+ if (ec == NHRP_ERROR_HOP_COUNT_EXCEEDED)
+ nhrp_peer_discover_nhs(peer,
+ &reply->src_protocol_address);
+
+ if (reply != NULL) {
+ nhrp_peer_schedule(peer, NHRP_RETRY_REGISTER_TIME,
+ nhrp_peer_send_register_cb);
+ } else {
+ nhrp_peer_restart_error(peer);
+ }
+ goto ret;
+ }
+
+ /* Check servers protocol address */
+ if (!nhrp_peer_discover_nhs(peer, &reply->dst_protocol_address)) {
+ nhrp_peer_restart_error(peer);
+ goto ret;
+ }
+
+ /* Check result */
+ payload = nhrp_packet_payload(reply, NHRP_PAYLOAD_TYPE_CIE_LIST);
+ if (payload != NULL) {
+ cie = nhrp_payload_get_cie(payload, 1);
+ if (cie != NULL)
+ ec = cie->hdr.code;
+ }
+
+ nhrp_info("Received Registration Reply from %s: %s",
+ nhrp_address_format(&peer->protocol_address,
+ sizeof(tmp), tmp),
+ nhrp_cie_code_text(ec));
+
+ switch (ec) {
+ case NHRP_CODE_SUCCESS:
+ break;
+ case NHRP_CODE_UNIQUE_ADDRESS_REGISTERED:
+ nhrp_peer_send_protocol_purge(peer);
+ goto ret;
+ default:
+ nhrp_peer_schedule(peer, NHRP_RETRY_REGISTER_TIME,
+ nhrp_peer_send_register_cb);
+ goto ret;
+ }
+
+ /* Check for NAT */
+ payload = nhrp_packet_extension(reply,
+ NHRP_EXTENSION_NAT_ADDRESS |
+ NHRP_EXTENSION_FLAG_NOCREATE,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+ if (payload != NULL) {
+ cie = nhrp_payload_get_cie(payload, 2);
+ if (cie != NULL) {
+ nhrp_info("NAT detected: our real NBMA address is %s",
+ nhrp_address_format(&cie->nbma_address,
+ sizeof(tmp), tmp));
+ peer->interface->nat_cie = *cie;
+ }
+ }
+
+ /* If not re-registration, send a purge request for each subnet
+ * we accept shortcuts to, to clear server redirection cache. */
+ if (!(peer->flags & NHRP_PEER_FLAG_UP) &&
+ (packet = nhrp_packet_alloc()) != NULL) {
+ struct nhrp_peer_selector sel;
+
+ packet->hdr = (struct nhrp_packet_header) {
+ .afnum = peer->afnum,
+ .protocol_type = peer->protocol_type,
+ .version = NHRP_VERSION_RFC2332,
+ .type = NHRP_PACKET_PURGE_REQUEST,
+ .hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT,
+ };
+ packet->dst_protocol_address = peer->protocol_address;
+
+ memset(&sel, 0, sizeof(sel));
+ sel.type_mask = BIT(NHRP_PEER_TYPE_LOCAL_ADDR);
+ nhrp_peer_foreach(nhrp_add_local_route_cie, packet, &sel);
+
+ nhrp_packet_extension(packet,
+ NHRP_EXTENSION_FORWARD_TRANSIT_NHS |
+ NHRP_EXTENSION_FLAG_COMPULSORY,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+ nhrp_packet_extension(packet,
+ NHRP_EXTENSION_REVERSE_TRANSIT_NHS |
+ NHRP_EXTENSION_FLAG_COMPULSORY,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+ nhrp_packet_extension(packet,
+ NHRP_EXTENSION_RESPONDER_ADDRESS |
+ NHRP_EXTENSION_FLAG_COMPULSORY,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+
+ nhrp_info("Sending Purge Request (of local routes) to %s",
+ nhrp_address_format(&peer->protocol_address,
+ sizeof(tmp), tmp));
+
+ packet->dst_peer = nhrp_peer_get(peer);
+ packet->dst_iface = peer->interface;
+ nhrp_packet_send_request(packet, NULL, NULL);
+ nhrp_packet_put(packet);
+ }
+
+ /* Re-register after holding time expires */
+ nhrp_peer_is_up(peer);
+ret:
+ nhrp_peer_put(peer);
+}
+
+static void nhrp_peer_send_register_cb(struct ev_timer *w, int revents)
+{
+ struct nhrp_peer *peer = container_of(w, struct nhrp_peer, timer);
+ char dst[64];
+ struct nhrp_packet *packet;
+ struct nhrp_cie *cie;
+ struct nhrp_payload *payload;
+ int sent = FALSE;
+
+ packet = nhrp_packet_alloc();
+ if (packet == NULL)
+ goto error;
+
+ packet->hdr = (struct nhrp_packet_header) {
+ .afnum = peer->afnum,
+ .protocol_type = peer->protocol_type,
+ .version = NHRP_VERSION_RFC2332,
+ .type = NHRP_PACKET_REGISTRATION_REQUEST,
+ .hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT,
+ .flags = NHRP_FLAG_REGISTRATION_UNIQUE |
+ NHRP_FLAG_REGISTRATION_NAT
+ };
+ if (peer->flags & NHRP_PEER_FLAG_CISCO) {
+ /* Cisco IOS seems to require reqistration and purge
+ * request id to match, so we need to used a fixed
+ * value. This is in violation of RFC, though. */
+ packet->hdr.u.request_id =
+ nhrp_address_hash(&peer->interface->protocol_address);
+ }
+ packet->dst_protocol_address = peer->protocol_address;
+
+ if (peer->type == NHRP_PEER_TYPE_DYNAMIC_NHS &&
+ nhrp_address_is_network(&peer->protocol_address,
+ peer->prefix_length)) {
+ /* We are not yet sure of the protocol address of the NHS -
+ * send registration to the broadcast address with one hop
+ * limit. Except the NHS to reply with it's real protocol
+ * address. */
+ nhrp_address_set_broadcast(&packet->dst_protocol_address,
+ peer->prefix_length);
+ packet->hdr.hop_count = 0;
+ }
+
+
+ /* Payload CIE */
+ cie = nhrp_cie_alloc();
+ if (cie == NULL)
+ goto error;
+
+ *cie = (struct nhrp_cie) {
+ .hdr.code = NHRP_CODE_SUCCESS,
+ .hdr.prefix_length = 0xff,
+ .hdr.mtu = htons(peer->my_nbma_mtu),
+ .hdr.holding_time = htons(peer->interface->holding_time),
+ .hdr.preference = 0,
+ };
+
+ payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_CIE_LIST);
+ nhrp_payload_add_cie(payload, cie);
+
+ /* Standard extensions */
+ nhrp_packet_extension(packet,
+ NHRP_EXTENSION_FORWARD_TRANSIT_NHS |
+ NHRP_EXTENSION_FLAG_COMPULSORY,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+ nhrp_packet_extension(packet,
+ NHRP_EXTENSION_REVERSE_TRANSIT_NHS |
+ NHRP_EXTENSION_FLAG_COMPULSORY,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+ nhrp_packet_extension(packet,
+ NHRP_EXTENSION_RESPONDER_ADDRESS |
+ NHRP_EXTENSION_FLAG_COMPULSORY,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+
+ /* Cisco NAT extension CIE */
+ cie = nhrp_cie_alloc();
+ if (cie == NULL)
+ goto error_free_packet;
+
+ *cie = (struct nhrp_cie) {
+ .hdr.code = NHRP_CODE_SUCCESS,
+ .hdr.prefix_length = peer->protocol_address.addr_len * 8,
+ .hdr.preference = 0,
+ .nbma_address = peer->next_hop_address,
+ .protocol_address = peer->protocol_address,
+ };
+
+ payload = nhrp_packet_extension(packet, NHRP_EXTENSION_NAT_ADDRESS,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+ nhrp_payload_add_cie(payload, cie);
+
+ nhrp_info("Sending Registration Request to %s (my mtu=%d)",
+ nhrp_address_format(&peer->protocol_address,
+ sizeof(dst), dst),
+ peer->my_nbma_mtu);
+
+ packet->dst_peer = nhrp_peer_get(peer);
+ packet->dst_iface = peer->interface;
+ sent = nhrp_packet_send_request(packet,
+ nhrp_peer_handle_registration_reply,
+ nhrp_peer_get(peer));
+
+error_free_packet:
+ nhrp_packet_put(packet);
+error:
+ if (!sent)
+ nhrp_peer_restart_error(peer);
+}
+
+static int error_on_matching(void *ctx, struct nhrp_peer *peer)
+{
+ return 1;
+}
+
+static void nhrp_peer_handle_resolution_reply(void *ctx,
+ struct nhrp_packet *reply)
+{
+ struct nhrp_peer *peer = (struct nhrp_peer *) ctx, *np;
+ struct nhrp_payload *payload;
+ struct nhrp_cie *cie, *natcie = NULL, *natoacie = NULL;
+ struct nhrp_interface *iface;
+ struct nhrp_peer_selector sel;
+ char dst[64], tmp[64], nbma[64];
+ int ec;
+
+ if (peer->flags & NHRP_PEER_FLAG_REMOVED)
+ goto ret;
+
+ if (reply == NULL ||
+ reply->hdr.type != NHRP_PACKET_RESOLUTION_REPLY) {
+ ec = reply ? reply->hdr.u.error.code : -1;
+
+ nhrp_info("Failed to resolve %s: %s (%d)",
+ nhrp_address_format(&peer->protocol_address,
+ sizeof(tmp), tmp),
+ nhrp_error_indication_text(ec), ntohs(ec));
+
+ if (reply != NULL) {
+ /* We got reply that this address is not available -
+ * negative cache it. */
+ peer->flags |= NHRP_PEER_FLAG_UP;
+ nhrp_peer_reinsert(peer, NHRP_PEER_TYPE_NEGATIVE);
+ } else {
+ /* Time out - NHS reachable, or packet lost multiple
+ * times. Keep trying if still needed. */
+ nhrp_peer_remove(peer);
+ }
+ goto ret;
+ }
+
+ payload = nhrp_packet_payload(reply, NHRP_PAYLOAD_TYPE_CIE_LIST);
+ cie = list_next(&payload->u.cie_list, struct nhrp_cie, cie_list_entry);
+ if (cie == NULL)
+ goto ret;
+
+ nhrp_info("Received Resolution Reply %s/%d is at proto %s nbma %s",
+ nhrp_address_format(&peer->protocol_address,
+ sizeof(dst), dst),
+ cie->hdr.prefix_length,
+ nhrp_address_format(&cie->protocol_address,
+ sizeof(tmp), tmp),
+ nhrp_address_format(&cie->nbma_address,
+ sizeof(nbma), nbma));
+
+ payload = nhrp_packet_extension(reply,
+ NHRP_EXTENSION_NAT_ADDRESS |
+ NHRP_EXTENSION_FLAG_NOCREATE,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+ if ((reply->hdr.flags & NHRP_FLAG_RESOLUTION_NAT) &&
+ (payload != NULL)) {
+ natcie = list_next(&payload->u.cie_list, struct nhrp_cie, cie_list_entry);
+ if (natcie != NULL) {
+ natoacie = cie;
+ nhrp_info("NAT detected: really at proto %s nbma %s",
+ nhrp_address_format(&natcie->protocol_address,
+ sizeof(tmp), tmp),
+ nhrp_address_format(&natcie->nbma_address,
+ sizeof(nbma), nbma));
+ }
+ }
+ if (natcie == NULL)
+ natcie = cie;
+
+ if (nhrp_address_cmp(&peer->protocol_address, &cie->protocol_address)
+ == 0) {
+ /* Destination is within NBMA network; update cache */
+ peer->mtu = ntohs(cie->hdr.mtu);
+ peer->prefix_length = cie->hdr.prefix_length;
+ peer->next_hop_address = natcie->nbma_address;
+ if (natoacie != NULL)
+ peer->next_hop_nat_oa = natoacie->nbma_address;
+ peer->expire_time = ev_now() + ntohs(cie->hdr.holding_time);
+ nhrp_address_set_network(&peer->protocol_address,
+ peer->prefix_length);
+ nhrp_peer_reinsert(peer, NHRP_PEER_TYPE_CACHED);
+ goto ret;
+ }
+
+ /* Check that we won't replace a local address */
+ sel = (struct nhrp_peer_selector) {
+ .flags = NHRP_PEER_FIND_EXACT,
+ .type_mask = BIT(NHRP_PEER_TYPE_LOCAL_ADDR),
+ .protocol_address = peer->protocol_address,
+ .prefix_length = cie->hdr.prefix_length,
+ };
+ if (nhrp_peer_foreach(error_on_matching, NULL, &sel)) {
+ nhrp_error("Local route %s/%d exists: not replacing "
+ "with shortcut",
+ nhrp_address_format(&peer->protocol_address,
+ sizeof(tmp), tmp),
+ cie->hdr.prefix_length);
+ peer->flags |= NHRP_PEER_FLAG_UP;
+ nhrp_peer_reinsert(peer, NHRP_PEER_TYPE_NEGATIVE);
+ goto ret;
+ }
+
+ /* Update the received NBMA address to nexthop */
+ iface = peer->interface;
+ np = nhrp_peer_route(iface, &cie->protocol_address,
+ NHRP_PEER_FIND_EXACT, 0);
+ if (np == NULL) {
+ np = nhrp_peer_alloc(iface);
+ np->type = NHRP_PEER_TYPE_CACHED;
+ np->afnum = reply->hdr.afnum;
+ np->protocol_type = reply->hdr.protocol_type;
+ np->protocol_address = cie->protocol_address;
+ np->next_hop_address = natcie->nbma_address;
+ if (natoacie != NULL)
+ np->next_hop_nat_oa = natoacie->nbma_address;
+ np->mtu = ntohs(cie->hdr.mtu);
+ np->prefix_length = cie->protocol_address.addr_len * 8;
+ np->expire_time = ev_now() + ntohs(cie->hdr.holding_time);
+ nhrp_peer_insert(np);
+ nhrp_peer_put(np);
+ }
+
+ /* Off NBMA destination; a shortcut route */
+ np = nhrp_peer_alloc(iface);
+ np->type = NHRP_PEER_TYPE_SHORTCUT_ROUTE;
+ np->afnum = reply->hdr.afnum;
+ np->protocol_type = reply->hdr.protocol_type;
+ np->protocol_address = peer->protocol_address;
+ np->prefix_length = cie->hdr.prefix_length;
+ np->next_hop_address = cie->protocol_address;
+ np->expire_time = ev_now() + ntohs(cie->hdr.holding_time);
+ nhrp_address_set_network(&np->protocol_address, np->prefix_length);
+ nhrp_peer_insert(np);
+ nhrp_peer_put(np);
+
+ /* Delete the incomplete entry */
+ nhrp_peer_remove(peer);
+ret:
+ nhrp_peer_put(peer);
+}
+
+static void nhrp_peer_send_resolve(struct nhrp_peer *peer)
+{
+ char dst[64];
+ struct nhrp_packet *packet;
+ struct nhrp_cie *cie;
+ struct nhrp_payload *payload;
+
+ packet = nhrp_packet_alloc();
+ if (packet == NULL)
+ goto error;
+
+ packet->hdr = (struct nhrp_packet_header) {
+ .afnum = peer->afnum,
+ .protocol_type = peer->protocol_type,
+ .version = NHRP_VERSION_RFC2332,
+ .type = NHRP_PACKET_RESOLUTION_REQUEST,
+ .hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT,
+ .flags = NHRP_FLAG_RESOLUTION_SOURCE_IS_ROUTER |
+ NHRP_FLAG_RESOLUTION_AUTHORATIVE |
+ NHRP_FLAG_RESOLUTION_NAT
+ };
+ packet->dst_protocol_address = peer->protocol_address;
+
+ /* Payload CIE */
+ cie = nhrp_cie_alloc();
+ if (cie == NULL)
+ goto error;
+
+ *cie = (struct nhrp_cie) {
+ .hdr.code = NHRP_CODE_SUCCESS,
+ .hdr.prefix_length = 0,
+ .hdr.mtu = 0,
+ .hdr.holding_time = htons(peer->interface->holding_time),
+ };
+
+ payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_CIE_LIST);
+ nhrp_payload_add_cie(payload, cie);
+
+ nhrp_info("Sending Resolution Request to %s",
+ nhrp_address_format(&peer->protocol_address,
+ sizeof(dst), dst));
+
+ /* Standard extensions */
+ nhrp_packet_extension(packet,
+ NHRP_EXTENSION_FORWARD_TRANSIT_NHS |
+ NHRP_EXTENSION_FLAG_COMPULSORY,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+ nhrp_packet_extension(packet,
+ NHRP_EXTENSION_REVERSE_TRANSIT_NHS |
+ NHRP_EXTENSION_FLAG_COMPULSORY,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+ nhrp_packet_extension(packet,
+ NHRP_EXTENSION_RESPONDER_ADDRESS |
+ NHRP_EXTENSION_FLAG_COMPULSORY,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+ nhrp_packet_extension(packet,
+ NHRP_EXTENSION_NAT_ADDRESS,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+
+ packet->dst_iface = peer->interface;
+ nhrp_packet_send_request(packet,
+ nhrp_peer_handle_resolution_reply,
+ nhrp_peer_get(peer));
+
+error:
+ nhrp_packet_put(packet);
+}
+
+struct nhrp_peer *nhrp_peer_alloc(struct nhrp_interface *iface)
+{
+ struct nhrp_peer *p;
+
+ nhrp_peer_num_total++;
+ p = calloc(1, sizeof(struct nhrp_peer));
+ p->ref = 1;
+ p->interface = iface;
+ list_init(&p->peer_list_entry);
+ list_init(&p->mcast_list_entry);
+ ev_timer_init(&p->timer, NULL, 0., 0.);
+ ev_child_init(&p->child, NULL, 0, 0);
+
+ return p;
+}
+
+struct nhrp_peer *nhrp_peer_get(struct nhrp_peer *peer)
+{
+ if (peer == NULL)
+ return NULL;
+
+ peer->ref++;
+ nhrp_peer_debug_refcount(__FUNCTION__, peer);
+
+ return peer;
+}
+
+static void nhrp_peer_run_nhs_down(struct nhrp_peer *peer)
+{
+ if ((peer->flags & (NHRP_PEER_FLAG_REGISTER |
+ NHRP_PEER_FLAG_UP |
+ NHRP_PEER_FLAG_REPLACED))
+ == (NHRP_PEER_FLAG_REGISTER | NHRP_PEER_FLAG_UP))
+ nhrp_peer_run_script(peer, "nhs-down", NULL);
+}
+
+static void nhrp_peer_release(struct nhrp_peer *peer)
+{
+ struct nhrp_interface *iface = peer->interface;
+ struct nhrp_peer_selector sel;
+
+ nhrp_peer_cancel_async(peer);
+
+ /* Remove from lists */
+ if (list_hashed(&peer->mcast_list_entry))
+ list_del(&peer->mcast_list_entry);
+ if (hlist_hashed(&peer->nbma_hash_entry))
+ hlist_del(&peer->nbma_hash_entry);
+
+ if (peer->parent != NULL) {
+ nhrp_peer_put(peer->parent);
+ peer->parent = NULL;
+ }
+
+ switch (peer->type) {
+ case NHRP_PEER_TYPE_SHORTCUT_ROUTE:
+ if ((peer->flags & NHRP_PEER_FLAG_UP) &&
+ !(peer->flags & NHRP_PEER_FLAG_REPLACED))
+ nhrp_peer_run_script(peer, "route-down", NULL);
+ break;
+ case NHRP_PEER_TYPE_CACHED:
+ case NHRP_PEER_TYPE_DYNAMIC:
+ case NHRP_PEER_TYPE_STATIC:
+ case NHRP_PEER_TYPE_DYNAMIC_NHS:
+ if (peer->flags & NHRP_PEER_FLAG_REPLACED)
+ break;
+
+ /* Remove cached routes using this entry as next-hop */
+ memset(&sel, 0, sizeof(sel));
+ sel.type_mask = BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE);
+ sel.interface = iface;
+ sel.next_hop_address = peer->protocol_address;
+ nhrp_peer_foreach(nhrp_peer_remove_matching, NULL,
+ &sel);
+
+ /* Execute peer-down */
+ nhrp_peer_run_nhs_down(peer);
+ if (peer->flags & NHRP_PEER_FLAG_UP) {
+ peer->purge_reason = "timeout";
+ nhrp_peer_run_script(peer, "peer-down", NULL);
+ }
+
+ /* Remove from arp cache */
+ if (peer->protocol_address.type != PF_UNSPEC)
+ kernel_inject_neighbor(&peer->protocol_address,
+ NULL, peer->interface);
+ break;
+ case NHRP_PEER_TYPE_INCOMPLETE:
+ case NHRP_PEER_TYPE_NEGATIVE:
+ case NHRP_PEER_TYPE_LOCAL_ADDR:
+ case NHRP_PEER_TYPE_LOCAL_ROUTE:
+ case NHRP_PEER_TYPE_STATIC_DNS:
+ break;
+ default:
+ NHRP_BUG_ON("invalid peer type");
+ break;
+ }
+
+ if (peer->nbma_hostname) {
+ free(peer->nbma_hostname);
+ peer->nbma_hostname = NULL;
+ }
+
+ free(peer);
+ nhrp_peer_num_total--;
+}
+
+int nhrp_peer_put(struct nhrp_peer *peer)
+{
+ NHRP_BUG_ON(peer->ref == 0);
+
+ peer->ref--;
+ nhrp_peer_debug_refcount(__FUNCTION__, peer);
+
+ if (peer->ref > 0)
+ return FALSE;
+
+ nhrp_peer_release(peer);
+
+ return TRUE;
+}
+
+static int nhrp_peer_mark_matching(void *ctx, struct nhrp_peer *peer)
+{
+ peer->flags |= NHRP_PEER_FLAG_MARK;
+ return 0;
+}
+
+static int nhrp_peer_renew_nhs_matching(void *ctx, struct nhrp_peer *peer)
+{
+ peer->flags &= ~NHRP_PEER_FLAG_MARK;
+ return 1;
+}
+
+static void nhrp_peer_dnsmap_query_cb(struct nhrp_address_query *query,
+ int num_addr, struct nhrp_address *addrs)
+{
+ struct nhrp_peer *np, *peer =
+ container_of(query, struct nhrp_peer, address_query);
+ struct nhrp_peer_selector sel;
+ int i;
+
+ if (num_addr < 0) {
+ nhrp_error("Failed to resolve '%s'", peer->nbma_hostname);
+ nhrp_peer_schedule(peer, 10, nhrp_peer_dnsmap_restart_cb);
+ return;
+ }
+
+ if (num_addr > 0) {
+ /* Refresh protocol */
+ peer->afnum = nhrp_afnum_from_pf(addrs[0].type);
+ }
+
+ /* Mark existing dynamic nhs entries as expired */
+ memset(&sel, 0, sizeof(sel));
+ sel.type_mask = BIT(NHRP_PEER_TYPE_DYNAMIC_NHS);
+ sel.interface = peer->interface;
+ sel.parent = peer;
+ nhrp_peer_foreach(nhrp_peer_mark_matching, NULL, &sel);
+
+ for (i = 0; i < num_addr; i++) {
+ /* If this NBMA exists as dynamic NHS, mark it ok. */
+ sel.next_hop_address = addrs[i];
+ if (nhrp_peer_foreach(nhrp_peer_renew_nhs_matching,
+ NULL, &sel) != 0)
+ continue;
+
+ /* New NHS, create a peer entry */
+ np = nhrp_peer_alloc(peer->interface);
+ np->type = NHRP_PEER_TYPE_DYNAMIC_NHS;
+ np->flags |= NHRP_PEER_FLAG_REGISTER;
+ np->afnum = peer->afnum;
+ np->protocol_type = peer->protocol_type;
+ np->protocol_address = peer->protocol_address;
+ np->prefix_length = peer->prefix_length;
+ np->next_hop_address = addrs[i];
+ np->parent = nhrp_peer_get(peer);
+ nhrp_address_set_network(&np->protocol_address,
+ np->prefix_length);
+ nhrp_peer_insert(np);
+ nhrp_peer_put(np);
+ }
+
+ /* Delete all dynamic nhs:s that were not in the DNS reply */
+ nhrp_address_set_type(&sel.next_hop_address, AF_UNSPEC);
+ sel.flags = NHRP_PEER_FIND_MARK;
+ nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel);
+
+ /* Refresh DNS info */
+ nhrp_peer_schedule(peer, peer->interface->holding_time,
+ nhrp_peer_dnsmap_restart_cb);
+}
+
+static void nhrp_peer_dnsmap_restart_cb(struct ev_timer *w, int revents)
+{
+ struct nhrp_peer *peer = container_of(w, struct nhrp_peer, timer);
+
+ NHRP_BUG_ON(peer->nbma_hostname == NULL);
+ nhrp_address_resolve(&peer->address_query, peer->nbma_hostname,
+ nhrp_peer_dnsmap_query_cb);
+}
+
+static void nhrp_peer_insert_cb(struct ev_timer *w, int revents)
+{
+ struct nhrp_peer *peer = container_of(w, struct nhrp_peer, timer);
+
+ nhrp_peer_cancel_async(peer);
+ switch (peer->type) {
+ case NHRP_PEER_TYPE_LOCAL_ADDR:
+ peer->flags |= NHRP_PEER_FLAG_UP;
+ forward_local_addresses_changed();
+ break;
+ case NHRP_PEER_TYPE_LOCAL_ROUTE:
+ peer->flags |= NHRP_PEER_FLAG_UP;
+ break;
+ case NHRP_PEER_TYPE_INCOMPLETE:
+ nhrp_peer_send_resolve(peer);
+ break;
+ case NHRP_PEER_TYPE_CACHED:
+ case NHRP_PEER_TYPE_DYNAMIC:
+ case NHRP_PEER_TYPE_STATIC:
+ case NHRP_PEER_TYPE_DYNAMIC_NHS:
+ nhrp_peer_restart_cb(w, 0);
+ break;
+ case NHRP_PEER_TYPE_STATIC_DNS:
+ nhrp_peer_dnsmap_restart_cb(w, 0);
+ break;
+ case NHRP_PEER_TYPE_SHORTCUT_ROUTE:
+ if (peer->flags & NHRP_PEER_FLAG_UP)
+ nhrp_peer_script_route_up_done(&peer->child, 0);
+ else if (nhrp_peer_route(peer->interface,
+ &peer->next_hop_address,
+ NHRP_PEER_FIND_UP | NHRP_PEER_FIND_EXACT,
+ NHRP_PEER_TYPEMASK_ADJACENT) != NULL)
+ nhrp_peer_run_script(peer, "route-up",
+ nhrp_peer_script_route_up_done);
+ else
+ nhrp_peer_schedule(peer, peer->expire_time - NHRP_EXPIRY_TIME
+ - 10 - ev_now(), nhrp_peer_expire_cb);
+ break;
+ case NHRP_PEER_TYPE_NEGATIVE:
+ peer->expire_time = ev_now() + NHRP_NEGATIVE_CACHE_TIME;
+
+ if (peer->flags & NHRP_PEER_FLAG_UP)
+ kernel_inject_neighbor(&peer->protocol_address,
+ NULL, peer->interface);
+ nhrp_peer_schedule(peer, NHRP_NEGATIVE_CACHE_TIME,
+ nhrp_peer_remove_cb);
+ break;
+ default:
+ NHRP_BUG_ON("invalid peer type");
+ break;
+ }
+}
+
+static void nhrp_peer_reinsert(struct nhrp_peer *peer, int type)
+{
+ NHRP_BUG_ON((peer->type == NHRP_PEER_TYPE_LOCAL_ADDR) !=
+ (type == NHRP_PEER_TYPE_LOCAL_ADDR));
+ NHRP_BUG_ON((peer->type == NHRP_PEER_TYPE_LOCAL_ROUTE) !=
+ (type == NHRP_PEER_TYPE_LOCAL_ROUTE));
+
+ peer->flags &= ~NHRP_PEER_FLAG_REMOVED;
+ peer->type = type;
+ nhrp_peer_insert_cb(&peer->timer, 0);
+}
+
+static int nhrp_peer_replace_shortcut(void *ctx, struct nhrp_peer *peer)
+{
+ struct nhrp_peer *shortcut = (struct nhrp_peer *) ctx;
+
+ /* Shortcut of identical prefix is replacement, either
+ * due to renewal, or new shortcut next-hop. */
+ if (nhrp_address_cmp(&peer->protocol_address,
+ &shortcut->protocol_address) == 0 &&
+ peer->prefix_length == shortcut->prefix_length) {
+ peer->flags |= NHRP_PEER_FLAG_REPLACED;
+
+ /* If identical shortcut is being refreshed,
+ * mark the refresher peer entry up. */
+ if ((peer->flags & NHRP_PEER_FLAG_UP) &&
+ nhrp_address_cmp(&peer->next_hop_address,
+ &shortcut->next_hop_address) == 0)
+ shortcut->flags |= NHRP_PEER_FLAG_UP;
+ }
+
+ /* Delete the old peer unconditionally */
+ nhrp_peer_remove(peer);
+
+ return 0;
+}
+
+void nhrp_peer_insert(struct nhrp_peer *peer)
+{
+ struct nhrp_peer_selector sel;
+ char tmp[NHRP_PEER_FORMAT_LEN];
+
+ /* First, prune all duplicates */
+ memset(&sel, 0, sizeof(sel));
+ sel.interface = peer->interface;
+ sel.protocol_address = peer->protocol_address;
+ sel.prefix_length = peer->prefix_length;
+ switch (peer->type) {
+ case NHRP_PEER_TYPE_SHORTCUT_ROUTE:
+ /* remove all existing shortcuts with same nexthop */
+ sel.flags = NHRP_PEER_FIND_SUBNET;
+ sel.type_mask |= BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE);
+ nhrp_peer_foreach(nhrp_peer_replace_shortcut, peer, &sel);
+ break;
+ case NHRP_PEER_TYPE_LOCAL_ROUTE:
+ sel.type_mask |= BIT(NHRP_PEER_TYPE_LOCAL_ROUTE);
+ default:
+ /* remove exact protocol address matches */
+ sel.flags = NHRP_PEER_FIND_EXACT;
+ sel.type_mask |= NHRP_PEER_TYPEMASK_REMOVABLE;
+ nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel);
+ break;
+ }
+
+ /* Keep a reference as long as we are on the list */
+ peer = nhrp_peer_get(peer);
+ nhrp_debug("Adding %s %s",
+ nhrp_peer_type[peer->type],
+ nhrp_peer_format(peer, sizeof(tmp), tmp));
+
+ if (peer->type == NHRP_PEER_TYPE_LOCAL_ADDR)
+ list_add(&peer->peer_list_entry, &local_peer_list);
+ else
+ list_add(&peer->peer_list_entry, &peer->interface->peer_list);
+
+ /* Start peers life */
+ if (nhrp_running || peer->type == NHRP_PEER_TYPE_LOCAL_ADDR)
+ nhrp_peer_insert_cb(&peer->timer, 0);
+ else
+ nhrp_peer_schedule(peer, 0, &nhrp_peer_insert_cb);
+}
+
+static void nhrp_peer_script_peer_down_done(union nhrp_peer_event e,
+ int revents)
+{
+ struct nhrp_peer *peer = nhrp_peer_from_event(e, revents);
+
+ nhrp_peer_schedule(peer, 5, nhrp_peer_restart_cb);
+}
+
+void nhrp_peer_purge(struct nhrp_peer *peer, const char *purge_reason)
+{
+ switch (peer->type) {
+ case NHRP_PEER_TYPE_STATIC:
+ case NHRP_PEER_TYPE_DYNAMIC_NHS:
+ peer->purge_reason = purge_reason;
+ nhrp_peer_run_nhs_down(peer);
+ nhrp_peer_is_down(peer);
+ nhrp_peer_cancel_async(peer);
+ if (peer->flags & NHRP_PEER_FLAG_LOWER_UP) {
+ nhrp_peer_run_script(peer, "peer-down",
+ nhrp_peer_script_peer_down_done);
+ } else {
+ nhrp_peer_script_peer_down_done(&peer->child, 0);
+ }
+ nhrp_address_set_type(&peer->my_nbma_address, PF_UNSPEC);
+ break;
+ case NHRP_PEER_TYPE_STATIC_DNS:
+ nhrp_peer_schedule(peer, 0, nhrp_peer_dnsmap_restart_cb);
+ break;
+ default:
+ peer->purge_reason = purge_reason;
+ nhrp_peer_remove(peer);
+ break;
+ }
+}
+
+int nhrp_peer_purge_matching(void *ctx, struct nhrp_peer *peer)
+{
+ int *count = (int *) ctx;
+ nhrp_peer_purge(peer, "user-request");
+ if (count != NULL)
+ (*count)++;
+ return 0;
+}
+
+int nhrp_peer_lowerdown_matching(void *ctx, struct nhrp_peer *peer)
+{
+ int *count = (int *) ctx;
+ nhrp_peer_purge(peer, "lower-down");
+ if (count != NULL)
+ (*count)++;
+ return 0;
+}
+
+static void nhrp_peer_remove_cb(struct ev_timer *w, int revents)
+{
+ struct nhrp_peer *peer = container_of(w, struct nhrp_peer, timer);
+ int type;
+
+ peer->flags |= NHRP_PEER_FLAG_REMOVED;
+ peer->purge_reason = "expired";
+ nhrp_peer_is_down(peer);
+ list_del(&peer->peer_list_entry);
+
+ type = peer->type;
+ nhrp_peer_put(peer);
+
+ if (type == NHRP_PEER_TYPE_LOCAL_ADDR)
+ forward_local_addresses_changed();
+}
+
+void nhrp_peer_remove(struct nhrp_peer *peer)
+{
+ char tmp[NHRP_PEER_FORMAT_LEN];
+
+ if (peer->flags & NHRP_PEER_FLAG_REMOVED)
+ return;
+
+ nhrp_debug("Removing %s %s",
+ nhrp_peer_type[peer->type],
+ nhrp_peer_format(peer, sizeof(tmp), tmp));
+
+ peer->flags |= NHRP_PEER_FLAG_REMOVED;
+ nhrp_peer_is_down(peer);
+ nhrp_peer_cancel_async(peer);
+ nhrp_peer_schedule(peer, 0, nhrp_peer_remove_cb);
+}
+
+int nhrp_peer_remove_matching(void *ctx, struct nhrp_peer *peer)
+{
+ int *count = (int *) ctx;
+
+ nhrp_peer_remove(peer);
+ if (count != NULL)
+ (*count)++;
+
+ return 0;
+}
+
+int nhrp_peer_set_used_matching(void *ctx, struct nhrp_peer *peer)
+{
+ int used = (int) (intptr_t) ctx;
+
+ if (used) {
+ peer->flags |= NHRP_PEER_FLAG_USED;
+ nhrp_peer_renew(peer);
+ } else {
+ peer->flags &= ~NHRP_PEER_FLAG_USED;
+ }
+ return 0;
+}
+
+int nhrp_peer_match(struct nhrp_peer *p, struct nhrp_peer_selector *sel)
+{
+ if (sel->type_mask && !(sel->type_mask & BIT(p->type)))
+ return FALSE;
+
+ if ((sel->flags & NHRP_PEER_FIND_UP) &&
+ !(p->flags & NHRP_PEER_FLAG_UP))
+ return FALSE;
+
+ if ((sel->flags & NHRP_PEER_FIND_MARK) &&
+ !(p->flags & NHRP_PEER_FLAG_MARK))
+ return FALSE;
+
+ if (sel->interface != NULL &&
+ p->interface != sel->interface &&
+ !(p->interface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST))
+ return FALSE;
+
+ if (sel->hostname != NULL &&
+ (p->nbma_hostname == NULL ||
+ strcmp(sel->hostname, p->nbma_hostname) != 0))
+ return FALSE;
+
+ if (sel->parent != NULL &&
+ p->parent != sel->parent)
+ return FALSE;
+
+ if (sel->protocol_address.type != PF_UNSPEC) {
+ if (sel->prefix_length == 0)
+ sel->prefix_length = sel->protocol_address.addr_len * 8;
+
+ if (sel->flags & NHRP_PEER_FIND_EXACT) {
+ if (nhrp_address_cmp(&p->protocol_address,
+ &sel->protocol_address) != 0)
+ return FALSE;
+
+ if (p->prefix_length != sel->prefix_length &&
+ p->type != NHRP_PEER_TYPE_STATIC &&
+ p->type != NHRP_PEER_TYPE_DYNAMIC_NHS)
+ return FALSE;
+ } else if (sel->flags & NHRP_PEER_FIND_ROUTE) {
+ if (nhrp_address_prefix_cmp(&p->protocol_address,
+ &sel->protocol_address,
+ p->prefix_length) != 0)
+ return FALSE;
+ } else {
+ if (p->prefix_length < sel->prefix_length) {
+ if (sel->prefix_length
+ == sel->protocol_address.addr_len * 8 &&
+ nhrp_address_cmp(&p->protocol_address,
+ &sel->protocol_address)
+ == 0)
+ return TRUE;
+
+ return FALSE;
+ }
+
+ if (nhrp_address_prefix_cmp(&p->protocol_address,
+ &sel->protocol_address,
+ sel->prefix_length) != 0)
+ return FALSE;
+ }
+ }
+
+ if (sel->next_hop_address.type != PF_UNSPEC) {
+ if (nhrp_address_cmp(&p->next_hop_address,
+ &sel->next_hop_address) != 0)
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+struct enum_interface_peers_ctx {
+ nhrp_peer_enumerator enumerator;
+ void *ctx;
+ struct nhrp_peer_selector *sel;
+};
+
+static int enumerate_peer_cache(struct list_head *peer_cache,
+ nhrp_peer_enumerator e, void *ctx,
+ struct nhrp_peer_selector *sel)
+{
+ struct nhrp_peer *p;
+ int rc = 0;
+
+ list_for_each_entry(p, peer_cache, peer_list_entry) {
+ if (p->flags & NHRP_PEER_FLAG_REMOVED)
+ continue;
+
+ if (sel == NULL || nhrp_peer_match(p, sel)) {
+ rc = e(ctx, p);
+ if (rc != 0)
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static int enum_interface_peers(void *ctx, struct nhrp_interface *iface)
+{
+ struct enum_interface_peers_ctx *ectx =
+ (struct enum_interface_peers_ctx *) ctx;
+
+ return enumerate_peer_cache(&iface->peer_list,
+ ectx->enumerator, ectx->ctx,
+ ectx->sel);
+}
+
+int nhrp_peer_foreach(nhrp_peer_enumerator e, void *ctx,
+ struct nhrp_peer_selector *sel)
+{
+ struct nhrp_interface *iface = NULL;
+ struct enum_interface_peers_ctx ectx = { e, ctx, sel };
+ int rc;
+
+ if (sel != NULL)
+ iface = sel->interface;
+
+ rc = enumerate_peer_cache(&local_peer_list, e, ctx, sel);
+ if (rc != 0)
+ return rc;
+
+ /* Speed optimization: TYPE_LOCAL peers cannot be found from
+ * other places */
+ if (sel != NULL &&
+ sel->type_mask == BIT(NHRP_PEER_TYPE_LOCAL_ADDR))
+ return 0;
+
+ if (iface == NULL)
+ rc = nhrp_interface_foreach(enum_interface_peers, &ectx);
+ else
+ rc = enumerate_peer_cache(&iface->peer_list, e, ctx, sel);
+
+ return rc;
+}
+
+struct route_decision {
+ struct nhrp_peer_selector sel;
+ struct list_head *exclude;
+ struct nhrp_peer *best_found;
+ struct nhrp_address *src;
+ int found_exact, found_up;
+};
+
+static int decide_route(void *ctx, struct nhrp_peer *peer)
+{
+ struct route_decision *rd = (struct route_decision *) ctx;
+ int exact;
+
+ if (peer->type != NHRP_PEER_TYPE_SHORTCUT_ROUTE) {
+ /* Exclude addresses from CIE from routing decision
+ * to avoid routing loops within NHS clusters. */
+ if (rd->exclude != NULL &&
+ nhrp_address_match_cie_list(&peer->next_hop_address,
+ &peer->protocol_address,
+ rd->exclude))
+ return 0;
+
+ /* Exclude also source address, we don't want to
+ * forward questions back to who's asking. */
+ if (rd->src != NULL &&
+ nhrp_address_cmp(rd->src, &peer->protocol_address) == 0)
+ return 0;
+ } else {
+ /* Exclude routes that point back to the sender
+ * of the packet */
+ if (rd->src != NULL &&
+ nhrp_address_cmp(rd->src, &peer->next_hop_address) == 0)
+ return 0;
+ }
+
+ exact = (peer->type >= NHRP_PEER_TYPE_DYNAMIC_NHS) &&
+ (nhrp_address_cmp(&peer->protocol_address,
+ &rd->sel.protocol_address) == 0);
+ if (rd->found_exact > exact)
+ return 0;
+
+ if (rd->found_up && !(peer->flags & NHRP_PEER_FLAG_UP))
+ return 0;
+
+ if (rd->best_found != NULL &&
+ rd->found_exact == exact &&
+ rd->found_up == (peer->flags & NHRP_PEER_FLAG_UP)) {
+ if (rd->best_found->prefix_length > peer->prefix_length)
+ return 0;
+
+ if (rd->best_found->prefix_length == peer->prefix_length &&
+ rd->best_found->last_used < peer->last_used)
+ return 0;
+ }
+
+ rd->best_found = peer;
+ rd->found_exact = exact;
+ rd->found_up = peer->flags & NHRP_PEER_FLAG_UP;
+ return 0;
+}
+
+struct nhrp_peer *nhrp_peer_route_full(struct nhrp_interface *interface,
+ struct nhrp_address *dst,
+ int flags, int type_mask,
+ struct nhrp_address *src,
+ struct list_head *exclude)
+{
+ struct route_decision rd;
+
+ memset(&rd, 0, sizeof(rd));
+ rd.sel.flags = flags & ~NHRP_PEER_FIND_UP;
+ if ((flags & (NHRP_PEER_FIND_ROUTE | NHRP_PEER_FIND_EXACT |
+ NHRP_PEER_FIND_SUBNET)) == 0)
+ rd.sel.flags |= NHRP_PEER_FIND_ROUTE;
+ rd.sel.type_mask = type_mask;
+ rd.sel.interface = interface;
+ rd.sel.protocol_address = *dst;
+ rd.exclude = exclude;
+ rd.src = src;
+ nhrp_peer_foreach(decide_route, &rd, &rd.sel);
+
+ if (rd.best_found == NULL)
+ return NULL;
+
+ if ((flags & NHRP_PEER_FIND_UP) &&
+ !(rd.best_found->flags & NHRP_PEER_FLAG_UP))
+ return NULL;
+
+ rd.best_found->last_used = ev_now();
+ return rd.best_found;
+}
+
+void nhrp_peer_traffic_indication(struct nhrp_interface *iface,
+ uint16_t afnum, struct nhrp_address *dst)
+{
+ struct nhrp_peer *peer;
+ int type;
+
+ /* For off-NBMA destinations, we consider all shortcut routes,
+ * but NBMA destinations should be exact because we want to drop
+ * NHS from the path. */
+ if (nhrp_address_prefix_cmp(dst, &iface->protocol_address,
+ iface->protocol_address_prefix) != 0)
+ type = NHRP_PEER_FIND_ROUTE;
+ else
+ type = NHRP_PEER_FIND_EXACT;
+
+ /* Have we done something for this destination already? */
+ peer = nhrp_peer_route(iface, dst, type,
+ ~BIT(NHRP_PEER_TYPE_LOCAL_ROUTE));
+ if (peer != NULL)
+ return;
+
+ /* Initiate resolution */
+ peer = nhrp_peer_alloc(iface);
+ peer->type = NHRP_PEER_TYPE_INCOMPLETE;
+ peer->afnum = afnum;
+ peer->protocol_type = nhrp_protocol_from_pf(dst->type);
+ peer->protocol_address = *dst;
+ peer->prefix_length = dst->addr_len * 8;
+ nhrp_peer_insert(peer);
+ nhrp_peer_put(peer);
+}
+
+static int dump_peer(void *ctx, struct nhrp_peer *peer)
+{
+ int *num_total = (int *) ctx;
+ char tmp[NHRP_PEER_FORMAT_LEN];
+
+ nhrp_info("%s %s",
+ nhrp_peer_type[peer->type],
+ nhrp_peer_format(peer, sizeof(tmp), tmp));
+ (*num_total)++;
+ return 0;
+}
+
+void nhrp_peer_dump_cache(void)
+{
+ int num_total = 0;
+
+ nhrp_info("Peer cache dump:");
+ nhrp_peer_foreach(dump_peer, &num_total, NULL);
+ nhrp_info("Total %d peer cache entries, %d allocated entries",
+ num_total, nhrp_peer_num_total);
+}
+
+void nhrp_peer_cleanup(void)
+{
+ ev_tstamp prev = ev_now();
+
+ nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, NULL);
+
+ while (nhrp_peer_num_total > 0) {
+ if (ev_now() > prev + 5.0) {
+ nhrp_info("Waiting for peers to die, %d left", nhrp_peer_num_total);
+ prev = ev_now();
+ }
+ ev_loop(EVLOOP_ONESHOT);
+ }
+}
diff --git a/nhrp/nhrp_peer.h b/nhrp/nhrp_peer.h
new file mode 100644
index 0000000..dea8d66
--- /dev/null
+++ b/nhrp/nhrp_peer.h
@@ -0,0 +1,194 @@
+/* nhrp_peer.h - NHRP peer cache definitions
+ *
+ * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#ifndef NHRP_PEER_H
+#define NHRP_PEER_H
+
+#include <time.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include "nhrp_address.h"
+#include "libev.h"
+#include "list.h"
+
+#define NHRP_PEER_TYPE_INCOMPLETE 0x00 /* Resolution request sent */
+#define NHRP_PEER_TYPE_NEGATIVE 0x01 /* Negative cached */
+#define NHRP_PEER_TYPE_CACHED 0x02 /* Received/relayed resolution reply */
+#define NHRP_PEER_TYPE_SHORTCUT_ROUTE 0x03 /* Received/relayed resolution for route */
+#define NHRP_PEER_TYPE_DYNAMIC 0x04 /* NHC registration */
+#define NHRP_PEER_TYPE_DYNAMIC_NHS 0x05 /* Dynamic NHS from dns-map */
+#define NHRP_PEER_TYPE_STATIC 0x06 /* Static mapping from config file */
+#define NHRP_PEER_TYPE_STATIC_DNS 0x07 /* Static dns-map from config file */
+#define NHRP_PEER_TYPE_LOCAL_ROUTE 0x08 /* Non-local destination, with local route */
+#define NHRP_PEER_TYPE_LOCAL_ADDR 0x09 /* Local destination (IP or off-NBMA subnet) */
+#define NHRP_PEER_TYPE_MAX (NHRP_PEER_TYPE_LOCAL_ADDR+1)
+
+#define NHRP_PEER_TYPEMASK_ADJACENT \
+ (BIT(NHRP_PEER_TYPE_CACHED) | \
+ BIT(NHRP_PEER_TYPE_DYNAMIC) | \
+ BIT(NHRP_PEER_TYPE_DYNAMIC_NHS) | \
+ BIT(NHRP_PEER_TYPE_STATIC) | \
+ BIT(NHRP_PEER_TYPE_LOCAL_ADDR))
+
+#define NHRP_PEER_TYPEMASK_REMOVABLE \
+ (BIT(NHRP_PEER_TYPE_INCOMPLETE) | \
+ BIT(NHRP_PEER_TYPE_NEGATIVE) | \
+ BIT(NHRP_PEER_TYPE_CACHED) | \
+ BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE) | \
+ BIT(NHRP_PEER_TYPE_DYNAMIC))
+
+#define NHRP_PEER_TYPEMASK_PURGEABLE \
+ (NHRP_PEER_TYPEMASK_REMOVABLE | \
+ BIT(NHRP_PEER_TYPE_DYNAMIC_NHS) | \
+ BIT(NHRP_PEER_TYPE_STATIC) | \
+ BIT(NHRP_PEER_TYPE_STATIC_DNS))
+
+#define NHRP_PEER_TYPEMASK_ALL \
+ (NHRP_PEER_TYPEMASK_PURGEABLE | \
+ BIT(NHRP_PEER_TYPE_LOCAL_ROUTE) | \
+ BIT(NHRP_PEER_TYPE_LOCAL_ADDR))
+
+/* For routing via NHS */
+#define NHRP_PEER_TYPEMASK_ROUTE_VIA_NHS \
+ (BIT(NHRP_PEER_TYPE_DYNAMIC) | \
+ BIT(NHRP_PEER_TYPE_DYNAMIC_NHS) | \
+ BIT(NHRP_PEER_TYPE_STATIC) | \
+ BIT(NHRP_PEER_TYPE_LOCAL_ROUTE) | \
+ BIT(NHRP_PEER_TYPE_LOCAL_ADDR))
+
+#define NHRP_PEER_FLAG_UNIQUE 0x01 /* Peer is unique; see RFC2332 */
+#define NHRP_PEER_FLAG_REGISTER 0x02 /* For TYPE_STATIC: send registration */
+#define NHRP_PEER_FLAG_CISCO 0x04 /* For TYPE_STATIC: peer is Cisco */
+#define NHRP_PEER_FLAG_USED 0x10 /* Peer is in kernel ARP table */
+#define NHRP_PEER_FLAG_LOWER_UP 0x20 /* Script executed succesfully */
+#define NHRP_PEER_FLAG_UP 0x40 /* Can send all packets (registration ok) */
+#define NHRP_PEER_FLAG_REPLACED 0x80 /* Peer has been replaced */
+#define NHRP_PEER_FLAG_REMOVED 0x100 /* Deleted, but not removed from cache yet */
+#define NHRP_PEER_FLAG_MARK 0x200 /* Can be used to temporarily mark peers */
+
+#define NHRP_PEER_FIND_ROUTE 0x01
+#define NHRP_PEER_FIND_EXACT 0x02
+#define NHRP_PEER_FIND_SUBNET 0x04
+#define NHRP_PEER_FIND_UP 0x10
+#define NHRP_PEER_FIND_MARK 0x20
+
+struct nhrp_interface;
+struct nhrp_packet;
+struct nhrp_pending_request;
+
+union __attribute__ ((__transparent_union__)) nhrp_peer_event {
+ struct ev_timer *timer;
+ struct ev_child *child;
+};
+
+struct nhrp_peer {
+ unsigned int ref;
+ unsigned int flags;
+
+ struct list_head peer_list_entry;
+ struct list_head mcast_list_entry;
+ struct hlist_node nbma_hash_entry;
+
+ const char *purge_reason;
+ struct nhrp_interface *interface;
+ struct nhrp_peer *parent;
+ struct nhrp_packet *queued_packet;
+ struct nhrp_pending_request *request;
+
+ struct ev_timer timer;
+ struct ev_child child;
+ struct nhrp_address_query address_query;
+
+ uint8_t type;
+ uint8_t prefix_length;
+ uint16_t afnum;
+ uint16_t protocol_type;
+ uint16_t mtu, my_nbma_mtu;
+ ev_tstamp expire_time;
+ ev_tstamp last_used;
+ struct nhrp_address my_nbma_address;
+ struct nhrp_address protocol_address;
+ unsigned int holding_time;
+
+ char *nbma_hostname;
+ /* NHRP_PEER_TYPE_ROUTE: protocol addr., others: NBMA addr. */
+ struct nhrp_address next_hop_address;
+ struct nhrp_address next_hop_nat_oa;
+};
+
+struct nhrp_peer_selector {
+ int flags; /* NHRP_PEER_FIND_xxx */
+ int type_mask;
+
+ struct nhrp_interface *interface;
+ struct nhrp_peer *parent;
+ const char *hostname;
+
+ int prefix_length;
+ struct nhrp_address protocol_address;
+ struct nhrp_address next_hop_address;
+};
+
+const char * const nhrp_peer_type[NHRP_PEER_TYPE_MAX];
+typedef int (*nhrp_peer_enumerator)(void *ctx, struct nhrp_peer *peer);
+
+void nhrp_peer_cleanup(void);
+
+struct nhrp_peer *nhrp_peer_alloc(struct nhrp_interface *iface);
+struct nhrp_peer *nhrp_peer_get(struct nhrp_peer *peer);
+int nhrp_peer_put(struct nhrp_peer *peer);
+void nhrp_peer_cancel_async(struct nhrp_peer *peer);
+
+void nhrp_peer_insert(struct nhrp_peer *peer);
+void nhrp_peer_remove(struct nhrp_peer *peer);
+void nhrp_peer_purge(struct nhrp_peer *peer, const char *purge_reason);
+
+int nhrp_peer_match(struct nhrp_peer *peer, struct nhrp_peer_selector *sel);
+
+int nhrp_peer_foreach(nhrp_peer_enumerator e, void *ctx,
+ struct nhrp_peer_selector *sel);
+int nhrp_peer_remove_matching(void *count, struct nhrp_peer *peer);
+int nhrp_peer_purge_matching(void *count, struct nhrp_peer *peer);
+int nhrp_peer_lowerdown_matching(void *count, struct nhrp_peer *peer);
+int nhrp_peer_set_used_matching(void *ctx, struct nhrp_peer *peer);
+struct nhrp_peer *nhrp_peer_find_by_nbma(struct nhrp_interface *iface, struct nhrp_address *nbma);
+
+int nhrp_peer_event_ok(union nhrp_peer_event e, int revents);
+char *nhrp_peer_event_reason(union nhrp_peer_event e, int revents,
+ size_t buflen, char *buf);
+struct nhrp_peer *nhrp_peer_from_event(union nhrp_peer_event e, int revents);
+void nhrp_peer_run_script(struct nhrp_peer *peer, char *action,
+ void (*cb)(union nhrp_peer_event, int));
+void nhrp_peer_send_packet_queue(struct nhrp_peer *peer);
+int nhrp_peer_discover_nhs(struct nhrp_peer *peer,
+ struct nhrp_address *newaddr);
+
+struct nhrp_peer *nhrp_peer_route_full(struct nhrp_interface *iface,
+ struct nhrp_address *dest,
+ int flags, int type_mask,
+ struct nhrp_address *source,
+ struct list_head *exclude_cie_list);
+
+static inline struct nhrp_peer *nhrp_peer_route(struct nhrp_interface *iface,
+ struct nhrp_address *dest,
+ int flags, int type_mask)
+{
+ return nhrp_peer_route_full(iface, dest, flags, type_mask, NULL, NULL);
+}
+
+void nhrp_peer_traffic_indication(struct nhrp_interface *iface,
+ uint16_t afnum, struct nhrp_address *dst);
+void nhrp_peer_dump_cache(void);
+
+void nhrp_server_finish_request(struct nhrp_pending_request *pr);
+
+#endif
diff --git a/nhrp/nhrp_protocol.h b/nhrp/nhrp_protocol.h
new file mode 100644
index 0000000..8cf213b
--- /dev/null
+++ b/nhrp/nhrp_protocol.h
@@ -0,0 +1,130 @@
+/* nhrp_protocol.h - NHRP protocol definitions
+ *
+ * Copyright (C) 2007 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#ifndef NHRP_PROTOCOL_H
+#define NHRP_PROTOCOL_H
+
+#include <stdint.h>
+#include "afnum.h"
+
+/* NHRP Version */
+#define NHRP_VERSION_RFC2332 1
+
+/* NHRP Packet Types */
+#define NHRP_PACKET_RESOLUTION_REQUEST 1
+#define NHRP_PACKET_RESOLUTION_REPLY 2
+#define NHRP_PACKET_REGISTRATION_REQUEST 3
+#define NHRP_PACKET_REGISTRATION_REPLY 4
+#define NHRP_PACKET_PURGE_REQUEST 5
+#define NHRP_PACKET_PURGE_REPLY 6
+#define NHRP_PACKET_ERROR_INDICATION 7
+#define NHRP_PACKET_TRAFFIC_INDICATION 8
+
+/* NHRP Extension Types */
+#define NHRP_EXTENSION_FLAG_COMPULSORY 0x8000
+#define NHRP_EXTENSION_END 0
+#define NHRP_EXTENSION_PAYLOAD 0
+#define NHRP_EXTENSION_RESPONDER_ADDRESS 3
+#define NHRP_EXTENSION_FORWARD_TRANSIT_NHS 4
+#define NHRP_EXTENSION_REVERSE_TRANSIT_NHS 5
+#define NHRP_EXTENSION_AUTHENTICATION 7
+#define NHRP_EXTENSION_VENDOR 8
+#define NHRP_EXTENSION_NAT_ADDRESS 9
+
+/* NHRP Error Indication Codes */
+#define NHRP_ERROR_UNRECOGNIZED_EXTENSION constant_htons(1)
+#define NHRP_ERROR_LOOP_DETECTED constant_htons(2)
+#define NHRP_ERROR_PROTOCOL_ADDRESS_UNREACHABLE constant_htons(6)
+#define NHRP_ERROR_PROTOCOL_ERROR constant_htons(7)
+#define NHRP_ERROR_SDU_SIZE_EXCEEDED constant_htons(8)
+#define NHRP_ERROR_INVALID_EXTENSION constant_htons(9)
+#define NHRP_ERROR_INVALID_RESOLUTION_REPLY constant_htons(10)
+#define NHRP_ERROR_AUTHENTICATION_FAILURE constant_htons(11)
+#define NHRP_ERROR_HOP_COUNT_EXCEEDED constant_htons(15)
+
+/* NHRP CIE Codes */
+#define NHRP_CODE_SUCCESS 0
+#define NHRP_CODE_ADMINISTRATIVELY_PROHIBITED 4
+#define NHRP_CODE_INSUFFICIENT_RESOURCES 5
+#define NHRP_CODE_NO_BINDING_EXISTS 11
+#define NHRP_CODE_BINDING_NON_UNIQUE 13
+#define NHRP_CODE_UNIQUE_ADDRESS_REGISTERED 14
+
+/* NHRP Flags for Resolution request/reply */
+#define NHRP_FLAG_RESOLUTION_SOURCE_IS_ROUTER constant_htons(0x8000)
+#define NHRP_FLAG_RESOLUTION_AUTHORATIVE constant_htons(0x4000)
+#define NHRP_FLAG_RESOLUTION_DESTINATION_STABLE constant_htons(0x2000)
+#define NHRP_FLAG_RESOLUTION_UNIQUE constant_htons(0x1000)
+#define NHRP_FLAG_RESOLUTION_SOURCE_STABLE constant_htons(0x0800)
+#define NHRP_FLAG_RESOLUTION_NAT constant_htons(0x0002)
+
+/* NHRP Flags for Registration request/reply */
+#define NHRP_FLAG_REGISTRATION_UNIQUE constant_htons(0x8000)
+#define NHRP_FLAG_REGISTRATION_NAT constant_htons(0x0002)
+
+/* NHRP Flags for Purge request/reply */
+#define NHRP_FLAG_PURGE_NO_REPLY constant_htons(0x8000)
+
+/* NHRP Authentication extension types (ala Cisco) */
+#define NHRP_AUTHENTICATION_PLAINTEXT constant_htonl(0x00000001)
+
+/* NHRP Packet Structures */
+struct nhrp_packet_header {
+ /* Fixed header */
+ uint16_t afnum;
+ uint16_t protocol_type;
+ uint8_t snap[5];
+ uint8_t hop_count;
+ uint16_t packet_size;
+ uint16_t checksum;
+ uint16_t extension_offset;
+ uint8_t version;
+ uint8_t type;
+ uint8_t src_nbma_address_len;
+ uint8_t src_nbma_subaddress_len;
+
+ /* Mandatory header */
+ uint8_t src_protocol_address_len;
+ uint8_t dst_protocol_address_len;
+ uint16_t flags;
+ union {
+ uint32_t request_id;
+ struct {
+ uint16_t code;
+ uint16_t offset;
+ } error;
+ } u;
+};
+
+struct nhrp_cie_header {
+ uint8_t code;
+ uint8_t prefix_length;
+ uint16_t unused;
+ uint16_t mtu;
+ uint16_t holding_time;
+ uint8_t nbma_address_len;
+ uint8_t nbma_subaddress_len;
+ uint8_t protocol_address_len;
+ uint8_t preference;
+};
+
+struct nhrp_extension_header {
+ uint16_t type;
+ uint16_t length;
+};
+
+struct nhrp_cisco_authentication_extension {
+ uint32_t type;
+ uint8_t secret[8];
+};
+
+#endif
diff --git a/nhrp/nhrp_server.c b/nhrp/nhrp_server.c
new file mode 100644
index 0000000..b41e4b8
--- /dev/null
+++ b/nhrp/nhrp_server.c
@@ -0,0 +1,566 @@
+/* nhrp_server.c - NHRP request handling
+ *
+ * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#include <string.h>
+#include <netinet/in.h>
+
+#include "nhrp_common.h"
+#include "nhrp_packet.h"
+#include "nhrp_interface.h"
+#include "nhrp_peer.h"
+
+#define NHRP_MAX_PENDING_REQUESTS 16
+
+struct nhrp_pending_request {
+ struct list_head request_list_entry;
+ int natted;
+ int num_ok, num_error;
+ struct nhrp_packet *packet;
+ struct nhrp_cie *cie;
+ struct nhrp_payload *payload;
+ struct nhrp_peer *peer, *rpeer;
+ ev_tstamp now;
+};
+
+static struct list_head request_list = LIST_INITIALIZER(request_list);
+static int num_pending_requests = 0;
+
+static void nhrp_server_start_cie_reg(struct nhrp_pending_request *pr);
+
+static struct nhrp_pending_request *
+nhrp_server_record_request(struct nhrp_packet *packet)
+{
+ struct nhrp_pending_request *pr;
+
+ pr = calloc(1, sizeof(struct nhrp_pending_request));
+ list_init(&pr->request_list_entry);
+ if (pr != NULL) {
+ num_pending_requests++;
+ list_add(&pr->request_list_entry, &request_list);
+ pr->packet = nhrp_packet_get(packet);
+ pr->now = ev_now();
+ }
+ return pr;
+}
+
+void nhrp_server_finish_request(struct nhrp_pending_request *pr)
+{
+ list_del(&pr->request_list_entry);
+ if (pr->rpeer != NULL) {
+ struct nhrp_peer *peer = pr->rpeer;
+ if (peer->flags & NHRP_PEER_FLAG_REPLACED) {
+ /* The route peer entry was not accepted. We still
+ * send the replies here, and cancel anything pending
+ * so it'll get deleted cleanly on next put(). */
+ nhrp_peer_send_packet_queue(peer);
+ nhrp_peer_cancel_async(peer);
+ }
+ nhrp_peer_put(pr->rpeer);
+ }
+ if (pr->peer != NULL)
+ nhrp_peer_put(pr->peer);
+ if (pr->packet != NULL)
+ nhrp_packet_put(pr->packet);
+ free(pr);
+ num_pending_requests--;
+}
+
+static int nhrp_server_request_pending(struct nhrp_packet *packet)
+{
+ struct nhrp_pending_request *r;
+
+ list_for_each_entry(r, &request_list, request_list_entry) {
+ if (nhrp_address_cmp(&packet->src_nbma_address,
+ &r->packet->src_nbma_address) != 0)
+ continue;
+ if (nhrp_address_cmp(&packet->src_protocol_address,
+ &r->packet->src_protocol_address) != 0)
+ continue;
+ if (nhrp_address_cmp(&packet->dst_protocol_address,
+ &r->packet->dst_protocol_address) != 0)
+ continue;
+
+ /* Request from the same address being already processed */
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static int nhrp_handle_resolution_request(struct nhrp_packet *packet)
+{
+ char tmp[64], tmp2[64];
+ struct nhrp_payload *payload;
+ struct nhrp_peer *peer = packet->dst_peer;
+ struct nhrp_peer_selector sel;
+ struct nhrp_cie *cie;
+
+ nhrp_info("Received Resolution Request from proto src %s to %s",
+ nhrp_address_format(&packet->src_protocol_address,
+ sizeof(tmp), tmp),
+ nhrp_address_format(&packet->dst_protocol_address,
+ sizeof(tmp2), tmp2));
+
+ /* As first thing, flush all negative entries for the
+ * requestor */
+ memset(&sel, 0, sizeof(sel));
+ sel.flags = NHRP_PEER_FIND_EXACT;
+ sel.type_mask = BIT(NHRP_PEER_TYPE_NEGATIVE);
+ sel.interface = packet->src_iface;
+ sel.protocol_address = packet->src_protocol_address;
+ nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel);
+
+ /* Send reply */
+ packet->hdr.type = NHRP_PACKET_RESOLUTION_REPLY;
+ packet->hdr.hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT;
+ packet->hdr.flags &= NHRP_FLAG_RESOLUTION_SOURCE_IS_ROUTER |
+ NHRP_FLAG_RESOLUTION_SOURCE_STABLE |
+ NHRP_FLAG_RESOLUTION_UNIQUE |
+ NHRP_FLAG_RESOLUTION_NAT;
+ packet->hdr.flags |= NHRP_FLAG_RESOLUTION_DESTINATION_STABLE |
+ NHRP_FLAG_RESOLUTION_AUTHORATIVE;
+
+ cie = nhrp_cie_alloc();
+ if (cie == NULL)
+ return FALSE;
+
+ cie->hdr = (struct nhrp_cie_header) {
+ .code = NHRP_CODE_SUCCESS,
+ .prefix_length = peer->prefix_length,
+ };
+ if (peer->holding_time)
+ cie->hdr.holding_time = htons(peer->holding_time);
+ else if (peer->interface != NULL)
+ cie->hdr.holding_time = htons(peer->interface->holding_time);
+ else
+ cie->hdr.holding_time = NHRP_DEFAULT_HOLDING_TIME;
+
+ payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_ANY);
+ nhrp_payload_free(payload);
+ nhrp_payload_set_type(payload, NHRP_PAYLOAD_TYPE_CIE_LIST);
+ nhrp_payload_add_cie(payload, cie);
+
+ if (!nhrp_packet_reroute(packet, NULL))
+ return FALSE;
+
+ peer = packet->dst_peer;
+ cie->hdr.mtu = htons(peer->my_nbma_mtu);
+ cie->nbma_address = peer->my_nbma_address;
+ cie->protocol_address = packet->dst_iface->protocol_address;
+
+ nhrp_info("Sending Resolution Reply %s/%d is-at %s (holdtime %d)",
+ nhrp_address_format(&packet->dst_protocol_address,
+ sizeof(tmp), tmp),
+ cie->hdr.prefix_length,
+ nhrp_address_format(&cie->nbma_address,
+ sizeof(tmp2), tmp2),
+ ntohs(cie->hdr.holding_time));
+
+ /* Reset NAT header to regenerate it for reply */
+ payload = nhrp_packet_extension(packet,
+ NHRP_EXTENSION_NAT_ADDRESS |
+ NHRP_EXTENSION_FLAG_NOCREATE,
+ NHRP_PAYLOAD_TYPE_ANY);
+ if (payload != NULL) {
+ nhrp_payload_free(payload);
+ nhrp_payload_set_type(payload, NHRP_PAYLOAD_TYPE_CIE_LIST);
+ }
+
+ return nhrp_packet_send(packet);
+}
+
+static int find_one(void *ctx, struct nhrp_peer *p)
+{
+ return 1;
+}
+
+static int remove_old_registrations(void *ctx, struct nhrp_peer *p)
+{
+ struct nhrp_peer *peer = (struct nhrp_peer *) ctx;
+
+ /* If re-registration, mark the new connection up */
+ if (nhrp_address_cmp(&peer->protocol_address,
+ &p->protocol_address) == 0 &&
+ nhrp_address_cmp(&peer->next_hop_address,
+ &p->next_hop_address) == 0 &&
+ peer->prefix_length == p->prefix_length)
+ peer->flags |= p->flags & (NHRP_PEER_FLAG_UP |
+ NHRP_PEER_FLAG_LOWER_UP);
+
+ p->flags |= NHRP_PEER_FLAG_REPLACED;
+ nhrp_peer_remove(p);
+ return 0;
+}
+
+static void nhrp_server_finish_reg(struct nhrp_pending_request *pr)
+{
+ char tmp[64], tmp2[64];
+ struct nhrp_packet *packet = pr->packet;
+
+ if (pr->rpeer != NULL &&
+ nhrp_packet_reroute(packet, pr->rpeer)) {
+ nhrp_info("Sending Registration Reply from proto src %s to %s (%d bindings accepted, %d rejected)",
+ nhrp_address_format(&packet->dst_protocol_address,
+ sizeof(tmp), tmp),
+ nhrp_address_format(&packet->src_protocol_address,
+ sizeof(tmp2), tmp2),
+ pr->num_ok, pr->num_error);
+
+ nhrp_packet_send(packet);
+ } else {
+ /* We could not create route peer entry (likely out of memory),
+ * so we can't do much more here. */
+ nhrp_info("Dropping Registration Reply from proto src %s to %s",
+ nhrp_address_format(&packet->dst_protocol_address,
+ sizeof(tmp), tmp),
+ nhrp_address_format(&packet->src_protocol_address,
+ sizeof(tmp2), tmp2));
+ }
+
+ nhrp_server_finish_request(pr);
+}
+
+static void nhrp_server_finish_cie_reg_cb(union nhrp_peer_event e, int revents)
+{
+ struct nhrp_peer *peer;
+ struct nhrp_pending_request *pr;
+ struct nhrp_packet *packet;
+ struct nhrp_cie *cie;
+ struct nhrp_peer_selector sel;
+ char tmp[64], reason[32];
+
+ peer = nhrp_peer_from_event(e, revents);
+ pr = peer->request;
+ packet = pr->packet;
+ cie = pr->cie;
+
+ peer->request = NULL;
+ nhrp_address_format(&peer->protocol_address, sizeof(tmp), tmp);
+ if (revents != 0 && nhrp_peer_event_ok(e, revents)) {
+ nhrp_debug("[%s] Peer registration authorized", tmp);
+
+ /* Remove all old stuff and accept registration */
+ memset(&sel, 0, sizeof(sel));
+ sel.flags = NHRP_PEER_FIND_EXACT;
+ sel.type_mask = NHRP_PEER_TYPEMASK_REMOVABLE;
+ sel.interface = packet->src_iface;
+ sel.protocol_address = peer->protocol_address;
+ sel.prefix_length = peer->prefix_length;
+ nhrp_peer_foreach(remove_old_registrations, peer, &sel);
+
+ pr->num_ok++;
+ cie->hdr.code = NHRP_CODE_SUCCESS;
+ nhrp_peer_insert(peer);
+ } else {
+ if (revents == 0)
+ nhrp_error("[%s] Peer registration failed: "
+ "static entry exists", tmp);
+ else
+ nhrp_error("[%s] Peer registration failed: %s",
+ tmp,
+ nhrp_peer_event_reason(e, revents,
+ sizeof(reason),
+ reason));
+ pr->num_error++;
+ cie->hdr.code = NHRP_CODE_ADMINISTRATIVELY_PROHIBITED;
+ peer->flags |= NHRP_PEER_FLAG_REPLACED;
+ }
+ if (pr->rpeer == NULL)
+ pr->rpeer = nhrp_peer_get(peer);
+
+ nhrp_peer_put(peer);
+ pr->peer = NULL;
+
+ /* Process next CIE or finish registration handling */
+ if (cie->cie_list_entry.next != &pr->payload->u.cie_list) {
+ pr->cie = list_next(&cie->cie_list_entry, struct nhrp_cie, cie_list_entry);
+ nhrp_server_start_cie_reg(pr);
+ } else {
+ nhrp_server_finish_reg(pr);
+ }
+
+}
+
+static void nhrp_server_start_cie_reg(struct nhrp_pending_request *pr)
+{
+ struct nhrp_cie *cie = pr->cie;
+ struct nhrp_packet *packet = pr->packet;
+ struct nhrp_peer *peer;
+ struct nhrp_peer_selector sel;
+
+ peer = nhrp_peer_alloc(packet->src_iface);
+ if (peer == NULL) {
+ /* Mark all remaining registration requests as failed
+ * due to lack of memory, and send reply */
+ for (; cie->cie_list_entry.next != &pr->payload->u.cie_list;
+ cie = list_next(&cie->cie_list_entry, struct nhrp_cie, cie_list_entry)) {
+ pr->num_error++;
+ cie->hdr.code = NHRP_CODE_INSUFFICIENT_RESOURCES;
+ }
+ pr->num_error++;
+ cie->hdr.code = NHRP_CODE_INSUFFICIENT_RESOURCES;
+ nhrp_server_finish_reg(pr);
+ return;
+ }
+
+ peer->type = NHRP_PEER_TYPE_DYNAMIC;
+ peer->afnum = packet->hdr.afnum;
+ peer->protocol_type = packet->hdr.protocol_type;
+ peer->expire_time = pr->now + ntohs(cie->hdr.holding_time);
+ peer->mtu = ntohs(cie->hdr.mtu);
+ if (cie->nbma_address.addr_len != 0)
+ peer->next_hop_address = cie->nbma_address;
+ else
+ peer->next_hop_address = packet->src_nbma_address;
+
+ if (pr->natted) {
+ peer->next_hop_nat_oa = peer->next_hop_address;
+ peer->next_hop_address = packet->src_linklayer_address;
+ }
+
+ if (cie->protocol_address.addr_len != 0)
+ peer->protocol_address = cie->protocol_address;
+ else
+ peer->protocol_address = packet->src_protocol_address;
+
+ peer->prefix_length = cie->hdr.prefix_length;
+ if (peer->prefix_length == 0xff)
+ peer->prefix_length = peer->protocol_address.addr_len * 8;
+
+ memset(&sel, 0, sizeof(sel));
+ sel.flags = NHRP_PEER_FIND_EXACT;
+ sel.type_mask = ~NHRP_PEER_TYPEMASK_REMOVABLE;
+ sel.interface = packet->src_iface;
+ sel.protocol_address = peer->protocol_address;
+ sel.prefix_length = peer->prefix_length;
+
+ /* Link the created peer and pending request structures */
+ pr->peer = peer;
+ peer->request = pr;
+
+ /* Check that there is no conflicting peers */
+ if (nhrp_peer_foreach(find_one, peer, &sel) != 0) {
+ cie->hdr.code = NHRP_CODE_ADMINISTRATIVELY_PROHIBITED;
+ peer->flags |= NHRP_PEER_FLAG_REPLACED;
+ nhrp_server_finish_cie_reg_cb(&peer->child, 0);
+ } else {
+ nhrp_peer_run_script(peer, "peer-register",
+ nhrp_server_finish_cie_reg_cb);
+ }
+}
+
+static int nhrp_handle_registration_request(struct nhrp_packet *packet)
+{
+ char tmp[64], tmp2[64];
+ struct nhrp_payload *payload;
+ struct nhrp_cie *cie;
+ struct nhrp_pending_request *pr;
+ int natted = 0;
+
+ nhrp_info("Received Registration Request from proto src %s to %s",
+ nhrp_address_format(&packet->src_protocol_address,
+ sizeof(tmp), tmp),
+ nhrp_address_format(&packet->dst_protocol_address,
+ sizeof(tmp2), tmp2));
+
+ if (nhrp_server_request_pending(packet)) {
+ nhrp_info("Already processing: resent packet ignored.");
+ return TRUE;
+ }
+
+ if (num_pending_requests >= NHRP_MAX_PENDING_REQUESTS) {
+ /* We should probably send Registration Reply with CIE
+ * error NHRP_CODE_INSUFFICIENT_RESOURCES, or an Error
+ * Indication. However, we do not have a direct peer entry
+ * nor can we make sure that the lower layer is up, so
+ * we just lamely drop the packet for now. */
+ nhrp_info("Too many pending requests: dropping this one");
+ return TRUE;
+ }
+
+ /* Cisco NAT extension, CIE added IF all of the following is true:
+ * 1. We are the first hop registration server
+ * (=no entries in forward transit CIE list)
+ * 2. NAT is detected (link layer address != announced address)
+ * 3. NAT extension is requested */
+ payload = nhrp_packet_extension(packet,
+ NHRP_EXTENSION_FORWARD_TRANSIT_NHS |
+ NHRP_EXTENSION_FLAG_NOCREATE,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+ if (payload != NULL && list_empty(&payload->u.cie_list) &&
+ packet->src_linklayer_address.type != PF_UNSPEC &&
+ nhrp_address_cmp(&packet->src_nbma_address,
+ &packet->src_linklayer_address) != 0) {
+ natted = 1;
+ payload = nhrp_packet_extension(packet,
+ NHRP_EXTENSION_NAT_ADDRESS |
+ NHRP_EXTENSION_FLAG_NOCREATE,
+ NHRP_PAYLOAD_TYPE_CIE_LIST);
+ if (payload != NULL) {
+ cie = nhrp_cie_alloc();
+ if (cie != NULL) {
+ cie->nbma_address = packet->src_linklayer_address;
+ cie->protocol_address = packet->src_protocol_address;
+ nhrp_payload_add_cie(payload, cie);
+ }
+ }
+ }
+
+ packet->hdr.type = NHRP_PACKET_REGISTRATION_REPLY;
+ packet->hdr.hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT;
+ packet->hdr.flags &= NHRP_FLAG_REGISTRATION_UNIQUE |
+ NHRP_FLAG_REGISTRATION_NAT;
+
+ payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_CIE_LIST);
+ if (list_empty(&payload->u.cie_list)) {
+ nhrp_error("Received registration request has no CIEs");
+ return TRUE;
+ }
+
+ /* Start processing the CIEs */
+ pr = nhrp_server_record_request(packet);
+ pr->natted = natted;
+ pr->payload = payload;
+
+ pr->cie = nhrp_payload_get_cie(payload, 1);
+ nhrp_server_start_cie_reg(pr);
+
+ return TRUE;
+}
+
+static int remove_peer_by_nbma(void *ctx, struct nhrp_peer *peer)
+{
+ struct nhrp_address *nbma = ctx;
+ struct nhrp_address *peer_nbma = NULL;
+
+ if (!nhrp_address_is_any_addr(nbma)) {
+ if (peer->type == NHRP_PEER_TYPE_SHORTCUT_ROUTE) {
+ struct nhrp_peer *nexthop;
+
+ nexthop = nhrp_peer_route(peer->interface,
+ &peer->next_hop_address,
+ NHRP_PEER_FIND_EXACT,
+ NHRP_PEER_TYPEMASK_ADJACENT);
+ if (nexthop != NULL)
+ peer_nbma = &nexthop->next_hop_address;
+ } else {
+ peer_nbma = &peer->next_hop_address;
+ }
+ } else {
+ peer_nbma = nbma;
+ }
+
+ if (peer_nbma != NULL &&
+ nhrp_address_cmp(peer_nbma, nbma) == 0)
+ nhrp_peer_remove(peer);
+
+ return 0;
+}
+
+static int nhrp_handle_purge_request(struct nhrp_packet *packet)
+{
+ char tmp[64], tmp2[64];
+ struct nhrp_peer_selector sel;
+ struct nhrp_payload *payload;
+ struct nhrp_cie *cie;
+ int flags, ret = TRUE;
+
+ nhrp_info("Received Purge Request from proto src %s to %s",
+ nhrp_address_format(&packet->src_protocol_address,
+ sizeof(tmp), tmp),
+ nhrp_address_format(&packet->dst_protocol_address,
+ sizeof(tmp2), tmp2));
+
+ flags = packet->hdr.flags;
+ packet->hdr.type = NHRP_PACKET_PURGE_REPLY;
+ packet->hdr.hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT;
+ packet->hdr.flags = 0;
+
+ if (!(flags & NHRP_FLAG_PURGE_NO_REPLY)) {
+ if (nhrp_packet_reroute(packet, NULL))
+ ret = nhrp_packet_send(packet);
+ else
+ ret = FALSE;
+ }
+
+ payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_CIE_LIST);
+ list_for_each_entry(cie, &payload->u.cie_list, cie_list_entry) {
+ nhrp_info("Purge proto %s/%d nbma %s",
+ nhrp_address_format(&cie->protocol_address,
+ sizeof(tmp), tmp),
+ cie->hdr.prefix_length,
+ nhrp_address_format(&cie->nbma_address,
+ sizeof(tmp2), tmp2));
+
+ memset(&sel, 0, sizeof(sel));
+ sel.flags = NHRP_PEER_FIND_EXACT;
+ sel.type_mask = NHRP_PEER_TYPEMASK_REMOVABLE;
+ sel.interface = packet->src_iface;
+ sel.protocol_address = cie->protocol_address;
+ sel.prefix_length = cie->hdr.prefix_length;
+ nhrp_peer_foreach(remove_peer_by_nbma,
+ &cie->nbma_address, &sel);
+ nhrp_rate_limit_clear(&cie->protocol_address,
+ cie->hdr.prefix_length);
+ }
+
+ return ret;
+}
+
+static int nhrp_handle_traffic_indication(struct nhrp_packet *packet)
+{
+ char tmp[64], tmp2[64];
+ struct nhrp_address dst;
+ struct nhrp_payload *pl;
+
+ pl = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_RAW);
+ if (pl == NULL)
+ return FALSE;
+
+ if (!nhrp_address_parse_packet(packet->hdr.protocol_type,
+ pl->u.raw->length, pl->u.raw->data,
+ NULL, &dst))
+ return FALSE;
+
+ /* Shortcuts enabled? */
+ if (packet->src_iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT) {
+ nhrp_info("Traffic Indication from proto src %s; "
+ "about packet to %s",
+ nhrp_address_format(&packet->src_protocol_address,
+ sizeof(tmp), tmp),
+ nhrp_address_format(&dst, sizeof(tmp2), tmp2));
+
+ nhrp_peer_traffic_indication(packet->src_iface,
+ packet->hdr.afnum,
+ &dst);
+ } else {
+ nhrp_info("Traffic Indication ignored from proto src %s; "
+ "about packet to %s",
+ nhrp_address_format(&packet->src_protocol_address,
+ sizeof(tmp), tmp),
+ nhrp_address_format(&dst, sizeof(tmp2), tmp2));
+ }
+
+ return TRUE;
+}
+
+void server_init(void)
+{
+ nhrp_packet_hook_request(NHRP_PACKET_RESOLUTION_REQUEST,
+ nhrp_handle_resolution_request);
+ nhrp_packet_hook_request(NHRP_PACKET_REGISTRATION_REQUEST,
+ nhrp_handle_registration_request);
+ nhrp_packet_hook_request(NHRP_PACKET_PURGE_REQUEST,
+ nhrp_handle_purge_request);
+ nhrp_packet_hook_request(NHRP_PACKET_TRAFFIC_INDICATION,
+ nhrp_handle_traffic_indication);
+}
diff --git a/nhrp/opennhrp.c b/nhrp/opennhrp.c
new file mode 100644
index 0000000..8ba870d
--- /dev/null
+++ b/nhrp/opennhrp.c
@@ -0,0 +1,524 @@
+/* opennhrp.c - OpenNHRP main routines
+ *
+ * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <errno.h>
+#include <malloc.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+
+#include "nhrp_common.h"
+#include "nhrp_peer.h"
+#include "nhrp_interface.h"
+
+const char *nhrp_version_string =
+ "OpenNHRP " OPENNHRP_VERSION
+#ifdef NHRP_NO_NBMA_GRE
+ " (no NBMA GRE support)"
+#endif
+ ;
+
+const char *nhrp_admin_socket = OPENNHRP_ADMIN_SOCKET;
+const char *nhrp_pid_file = "/var/run/opennhrp.pid";
+const char *nhrp_config_file = "/etc/opennhrp/opennhrp.conf";
+const char *nhrp_script_file = "/etc/opennhrp/opennhrp-script";
+int nhrp_verbose = 0;
+int nhrp_running = FALSE;
+
+static int pid_file_fd;
+
+void nhrp_hex_dump(const char *name, const uint8_t *buf, int bytes)
+{
+ int i, j;
+ int left;
+
+ fprintf(stderr, "%s:\n", name);
+ for (i = 0; i < bytes; i++) {
+ fprintf(stderr, "%02X ", buf[i]);
+ if (i % 0x10 == 0x0f) {
+ fprintf(stderr, " ");
+ for (j = 0; j < 0x10; j++)
+ fprintf(stderr, "%c", isgraph(buf[i+j-0xf]) ?
+ buf[i+j-0xf]: '.');
+ fprintf(stderr, "\n");
+ }
+ }
+
+ left = i % 0x10;
+ if (left != 0) {
+ fprintf(stderr, "%*s ", 3 * (0x10 - left), "");
+
+ for (j = 0; j < left; j++)
+ fprintf(stderr, "%c", isgraph(buf[i+j-left]) ?
+ buf[i+j-left]: '.');
+ fprintf(stderr, "\n");
+ }
+ fprintf(stderr, "\n");
+}
+
+static void handle_signal_cb(struct ev_signal *w, int revents)
+{
+ struct nhrp_peer_selector sel;
+
+ switch (w->signum) {
+ case SIGUSR1:
+ nhrp_peer_dump_cache();
+ break;
+ case SIGINT:
+ case SIGTERM:
+ ev_unloop(EVUNLOOP_ALL);
+ break;
+ case SIGHUP:
+ memset(&sel, 0, sizeof(sel));
+ sel.type_mask = NHRP_PEER_TYPEMASK_REMOVABLE;
+ nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel);
+ break;
+ }
+}
+
+static int hook_signal[] = { SIGUSR1, SIGHUP, SIGINT, SIGTERM };
+static ev_signal signal_event[ARRAY_SIZE(hook_signal)];
+
+static void signal_init(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(hook_signal); i++) {
+ ev_signal_init(&signal_event[i], handle_signal_cb,
+ hook_signal[i]);
+ ev_signal_start(&signal_event[i]);
+ }
+}
+
+static int read_word(FILE *in, int *lineno, size_t len, char *word)
+{
+ int ch, i, comment = 0;
+
+ ch = fgetc(in);
+ while (1) {
+ if (ch == EOF)
+ return FALSE;
+ if (ch == '#')
+ comment = 1;
+ if (!comment && !isspace(ch))
+ break;
+ if (ch == '\n') {
+ (*lineno)++;
+ comment = 0;
+ }
+ ch = fgetc(in);
+ }
+
+ for (i = 0; i < len-1 && !isspace(ch); i++) {
+ word[i] = ch;
+ ch = fgetc(in);
+ if (ch == EOF)
+ break;
+ if (ch == '\n')
+ (*lineno)++;
+ }
+ word[i] = 0;
+
+ return TRUE;
+}
+
+static int load_config(const char *config_file)
+{
+#define NEED_INTERFACE() if (iface == NULL) { rc = 2; break; } peer = NULL;
+#define NEED_PEER() if (peer == NULL || peer->type == NHRP_PEER_TYPE_LOCAL_ADDR) { rc = 3; break; }
+
+ static const char *errors[] = {
+ "syntax error",
+ "missing keyword",
+ "keyword valid only for 'interface' definition",
+ "keyword valid only for 'map' definition",
+ "invalid address",
+ "dynamic-map requires a network address",
+ "bad multicast destination",
+ "keyword valid only for 'interace' and 'shortcut-target' definition",
+ };
+ struct nhrp_interface *iface = NULL;
+ struct nhrp_peer *peer = NULL;
+ struct nhrp_address paddr;
+ char word[32], nbma[32], addr[32];
+ FILE *in;
+ int lineno = 1, rc = -1;
+
+ in = fopen(config_file, "r");
+ if (in == NULL) {
+ nhrp_error("Unable to open configuration file '%s'.",
+ config_file);
+ return FALSE;
+ }
+
+ while (read_word(in, &lineno, sizeof(word), word)) {
+ if (strcmp(word, "interface") == 0) {
+ if (!read_word(in, &lineno, sizeof(word), word)) {
+ rc = 1;
+ break;
+ }
+ iface = nhrp_interface_get_by_name(word, TRUE);
+ if (iface != NULL)
+ iface->flags |= NHRP_INTERFACE_FLAG_CONFIGURED;
+ peer = NULL;
+ } else if (strcmp(word, "shortcut-target") == 0) {
+ NEED_INTERFACE();
+ if (!read_word(in, &lineno, sizeof(addr), addr)) {
+ rc = 1;
+ break;
+ }
+ peer = nhrp_peer_alloc(iface);
+ peer->type = NHRP_PEER_TYPE_LOCAL_ADDR;
+ peer->afnum = AFNUM_RESERVED;
+ if (!nhrp_address_parse(addr, &peer->protocol_address,
+ &peer->prefix_length)) {
+ rc = 4;
+ break;
+ }
+ peer->protocol_type = nhrp_protocol_from_pf(peer->protocol_address.type);
+ nhrp_peer_insert(peer);
+ nhrp_peer_put(peer);
+ } else if (strcmp(word, "dynamic-map") == 0) {
+ NEED_INTERFACE();
+ read_word(in, &lineno, sizeof(addr), addr);
+ read_word(in, &lineno, sizeof(nbma), nbma);
+
+ peer = nhrp_peer_alloc(iface);
+ peer->type = NHRP_PEER_TYPE_STATIC_DNS;
+ if (!nhrp_address_parse(addr, &peer->protocol_address,
+ &peer->prefix_length)) {
+ rc = 4;
+ break;
+ }
+ if (!nhrp_address_is_network(&peer->protocol_address,
+ peer->prefix_length)) {
+ rc = 5;
+ break;
+ }
+ peer->protocol_type = nhrp_protocol_from_pf(
+ peer->protocol_address.type);
+ peer->nbma_hostname = strdup(nbma);
+ peer->afnum = nhrp_afnum_from_pf(
+ peer->next_hop_address.type);
+ nhrp_peer_insert(peer);
+ nhrp_peer_put(peer);
+ } else if (strcmp(word, "map") == 0) {
+ NEED_INTERFACE();
+ read_word(in, &lineno, sizeof(addr), addr);
+ read_word(in, &lineno, sizeof(nbma), nbma);
+
+ peer = nhrp_peer_alloc(iface);
+ peer->type = NHRP_PEER_TYPE_STATIC;
+ if (!nhrp_address_parse(addr, &peer->protocol_address,
+ &peer->prefix_length)) {
+ rc = 4;
+ break;
+ }
+ peer->protocol_type = nhrp_protocol_from_pf(
+ peer->protocol_address.type);
+ if (!nhrp_address_parse(nbma, &peer->next_hop_address,
+ NULL))
+ peer->nbma_hostname = strdup(nbma);
+ peer->afnum = nhrp_afnum_from_pf(peer->next_hop_address.type);
+ nhrp_peer_insert(peer);
+ nhrp_peer_put(peer);
+ } else if (strcmp(word, "register") == 0) {
+ NEED_PEER();
+ peer->flags |= NHRP_PEER_FLAG_REGISTER;
+ } else if (strcmp(word, "cisco") == 0) {
+ NEED_PEER();
+ peer->flags |= NHRP_PEER_FLAG_CISCO;
+ } else if (strcmp(word, "holding-time") == 0) {
+ read_word(in, &lineno, sizeof(word), word);
+ if (peer != NULL &&
+ peer->type == NHRP_PEER_TYPE_LOCAL_ADDR) {
+ peer->holding_time = atoi(word);
+ } else if (iface != NULL) {
+ iface->holding_time = atoi(word);
+ peer = NULL;
+ } else {
+ rc = 7;
+ }
+ } else if (strcmp(word, "cisco-authentication") == 0) {
+ struct nhrp_buffer *buf;
+ struct nhrp_cisco_authentication_extension *auth;
+
+ NEED_INTERFACE();
+ read_word(in, &lineno, sizeof(word), word);
+
+ buf = nhrp_buffer_alloc(strlen(word) + sizeof(uint32_t));
+ auth = (struct nhrp_cisco_authentication_extension *) buf->data;
+ auth->type = NHRP_AUTHENTICATION_PLAINTEXT;
+ memcpy(auth->secret, word, strlen(word));
+
+ iface->auth_token = buf;
+ } else if (strcmp(word, "route-table") == 0) {
+ NEED_INTERFACE();
+ read_word(in, &lineno, sizeof(word), word);
+ iface->route_table = atoi(word);
+ } else if (strcmp(word, "shortcut") == 0) {
+ NEED_INTERFACE();
+ iface->flags |= NHRP_INTERFACE_FLAG_SHORTCUT;
+ } else if (strcmp(word, "redirect") == 0) {
+ NEED_INTERFACE();
+ iface->flags |= NHRP_INTERFACE_FLAG_REDIRECT;
+ } else if (strcmp(word, "non-caching") == 0) {
+ NEED_INTERFACE();
+ iface->flags |= NHRP_INTERFACE_FLAG_NON_CACHING;
+ } else if (strcmp(word, "shortcut-destination") == 0) {
+ NEED_INTERFACE();
+ iface->flags |= NHRP_INTERFACE_FLAG_SHORTCUT_DEST;
+ } else if (strcmp(word, "multicast") == 0) {
+ NEED_INTERFACE();
+ read_word(in, &lineno, sizeof(word), word);
+ if (strcmp(word, "dynamic") == 0) {
+ iface->mcast_mask = \
+ BIT(NHRP_PEER_TYPE_STATIC) |
+ BIT(NHRP_PEER_TYPE_DYNAMIC_NHS) |
+ BIT(NHRP_PEER_TYPE_DYNAMIC);
+ } else if (strcmp(word, "nhs") == 0) {
+ iface->mcast_mask = \
+ BIT(NHRP_PEER_TYPE_STATIC) |
+ BIT(NHRP_PEER_TYPE_DYNAMIC_NHS);
+ } else if (nhrp_address_parse(word, &paddr, NULL)) {
+ iface->mcast_numaddr++;
+ iface->mcast_addr = realloc(iface->mcast_addr,
+ iface->mcast_numaddr *
+ sizeof(struct nhrp_address));
+ iface->mcast_addr[iface->mcast_numaddr-1] =
+ paddr;
+ } else {
+ rc = 6;
+ break;
+ }
+ } else {
+ rc = 0;
+ break;
+ }
+ }
+ fclose(in);
+
+ if (rc >= 0) {
+ nhrp_error("Configuration file %s in %s:%d, near word '%s'",
+ errors[rc], config_file, lineno, word);
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static void remove_pid_file(void)
+{
+ if (pid_file_fd != 0) {
+ close(pid_file_fd);
+ pid_file_fd = 0;
+ remove(nhrp_pid_file);
+ }
+}
+
+static int open_pid_file(void)
+{
+ if (strlen(nhrp_pid_file) == 0)
+ return TRUE;
+
+ pid_file_fd = open(nhrp_pid_file, O_CREAT | O_WRONLY,
+ S_IRUSR | S_IWUSR);
+ if (pid_file_fd < 0)
+ goto err;
+
+ fcntl(pid_file_fd, F_SETFD, FD_CLOEXEC);
+ if (flock(pid_file_fd, LOCK_EX | LOCK_NB) < 0)
+ goto err_close;
+
+ return TRUE;
+
+err_close:
+ close(pid_file_fd);
+err:
+ nhrp_error("Unable to open/lock pid file: %s.", strerror(errno));
+ return FALSE;
+}
+
+static int write_pid(void)
+{
+ char tmp[16];
+ int n;
+
+ if (pid_file_fd >= 0) {
+ if (ftruncate(pid_file_fd, 0) < 0)
+ return FALSE;
+
+ n = sprintf(tmp, "%d\n", getpid());
+ if (write(pid_file_fd, tmp, n) != n)
+ return FALSE;
+
+ atexit(remove_pid_file);
+ }
+
+ return TRUE;
+}
+
+static int daemonize(void)
+{
+ pid_t pid;
+
+ pid = fork();
+ if (pid < 0)
+ return FALSE;
+ if (pid > 0)
+ exit(0);
+
+ if (setsid() < 0)
+ return FALSE;
+
+ pid = fork();
+ if (pid < 0)
+ return FALSE;
+ if (pid > 0)
+ exit(0);
+
+ if (chdir("/") < 0)
+ return FALSE;
+
+ umask(0);
+
+ if (freopen("/dev/null", "r", stdin) == NULL ||
+ freopen("/dev/null", "w", stdout) == NULL ||
+ freopen("/dev/null", "w", stderr) == NULL) {
+ nhrp_error("Unable reopen standard file descriptors");
+ goto err;
+ }
+
+ ev_default_fork();
+
+ return TRUE;
+
+err:
+ close(pid_file_fd);
+ pid_file_fd = 0;
+ return FALSE;
+}
+
+int usage(const char *prog)
+{
+ fprintf(stderr,
+ "usage: opennhrp [-a admin-socket] [-c config-file] [-s script-file]\n"
+ " [-p pid-file] [-d] [-v]\n"
+ " opennhrp -V\n"
+ "\n"
+ "\t-a admin-socket\tspecify management interface socket\n"
+ "\t-c config-file\tread configuration from config-file\n"
+ "\t-s script-file\tuse specified script-file for event handling\n"
+ "\t-p pid-file\tspecify pid-file\n"
+ "\t-d\t\tfork to background after startup\n"
+ "\t-v\t\tverbose logging\n"
+ "\t-V\t\tshow version number and exit\n"
+ "\n");
+ return 1;
+}
+
+int main(int argc, char **argv)
+{
+ struct nhrp_address any;
+ int i, daemonmode = 0;
+
+ nhrp_address_set_type(&any, AF_UNSPEC);
+
+ for (i = 1; i < argc; i++) {
+ if (strlen(argv[i]) != 2 || argv[i][0] != '-')
+ return usage(argv[0]);
+
+ switch (argv[i][1]) {
+ case 'c':
+ if (++i >= argc)
+ return usage(argv[0]);
+ nhrp_config_file = argv[i];
+ break;
+ case 's':
+ if (++i >= argc)
+ return usage(argv[0]);
+ nhrp_script_file = argv[i];
+ break;
+ case 'a':
+ if (++i >= argc)
+ return usage(argv[0]);
+ nhrp_admin_socket = argv[i];
+ break;
+ case 'p':
+ if (++i >= argc)
+ return usage(argv[0]);
+ nhrp_pid_file = argv[i];
+ break;
+ case 'd':
+ daemonmode = 1;
+ break;
+ case 'v':
+ nhrp_verbose = 1;
+ break;
+ case 'V':
+ puts(nhrp_version_string);
+ return 0;
+ default:
+ return usage(argv[0]);
+ }
+ }
+
+ srandom(time(NULL));
+ if (!log_init())
+ return 1;
+ if (!open_pid_file())
+ return 1;
+
+ nhrp_info("%s starting", nhrp_version_string);
+
+ ev_default_loop(0);
+ signal_init();
+ server_init();
+ if (!nhrp_address_init())
+ return 3;
+ if (!load_config(nhrp_config_file))
+ return 4;
+ if (!kernel_init())
+ return 5;
+ if (!admin_init(nhrp_admin_socket))
+ return 6;
+ if (!forward_init())
+ return 7;
+
+ if (daemonmode && !daemonize()) {
+ nhrp_error("Failed to daemonize. Exit.");
+ return 8;
+ }
+
+ write_pid();
+
+ nhrp_running = TRUE;
+ ev_loop(0);
+ nhrp_running = FALSE;
+
+ forward_cleanup();
+ kernel_stop_listening();
+ nhrp_peer_cleanup();
+ kernel_cleanup();
+ nhrp_interface_cleanup();
+ nhrp_rate_limit_clear(&any, 0);
+ nhrp_address_cleanup();
+
+ ev_default_destroy();
+
+ return 0;
+}
+
diff --git a/nhrp/opennhrpctl.c b/nhrp/opennhrpctl.c
new file mode 100644
index 0000000..92fb5b5
--- /dev/null
+++ b/nhrp/opennhrpctl.c
@@ -0,0 +1,121 @@
+/* opennhrpctl.c - OpenNHRP command line control utility
+ *
+ * Copyright (C) 2007 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <sys/socket.h>
+
+static int admin_init(const char *opennhrp_socket)
+{
+ struct sockaddr_un sun;
+ int fd;
+
+ memset(&sun, 0, sizeof(sun));
+ sun.sun_family = AF_UNIX;
+ strncpy(sun.sun_path, opennhrp_socket, sizeof(sun.sun_path));
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0)
+ return -1;
+
+ if (connect(fd, (struct sockaddr *) &sun, sizeof(sun)) < 0) {
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+static void admin_close(int fd)
+{
+ close(fd);
+}
+
+static int admin_send(int fd, const char *str)
+{
+ int len = strlen(str);
+
+ if (write(fd, str, len) != len)
+ return -1;
+ shutdown(fd, SHUT_WR);
+ return 0;
+}
+
+static int admin_receive(int fd)
+{
+ char msg[512];
+ size_t len;
+
+ while ((len = recv(fd, msg, sizeof(msg), 0)) > 0) {
+ if (write(fileno(stdout), msg, len) != len)
+ return -1;
+ }
+
+ if (len < 0)
+ return -1;
+
+ return 0;
+}
+
+static int usage(const char *prog)
+{
+ fprintf(stderr, "usage: %s [-a admin-socket] <command>\n", prog);
+ return 1;
+}
+
+int main(int argc, char **argv)
+{
+ const char *socket = OPENNHRP_ADMIN_SOCKET;
+ char cmd[1024] = "", *pos = cmd;
+ int i, fd;
+
+ for (i = 1; i < argc; i++) {
+ if (strlen(argv[i]) != 2 || argv[i][0] != '-') {
+ pos += snprintf(pos, &cmd[sizeof(cmd)-1]-pos,
+ " %s\n", argv[i]) - 1;
+ continue;
+ }
+
+ switch (argv[i][1]) {
+ case 'a':
+ if (++i >= argc)
+ return usage(argv[0]);
+ socket = argv[i];
+ break;
+ default:
+ return usage(argv[0]);
+ }
+ }
+ if (cmd == pos)
+ return usage(argv[0]);
+
+ fd = admin_init(socket);
+ if (fd < 0) {
+ fprintf(stderr,
+ "Failed to connect to opennhrp daemon [%s]: %s.\n\n",
+ socket, strerror(errno));
+ return 1;
+ }
+
+ if (admin_send(fd, &cmd[1]) < 0 ||
+ admin_receive(fd) < 0) {
+ fprintf(stderr, "Failed to send request: %s.\n",
+ strerror(errno));
+ return 2;
+ }
+
+ admin_close(fd);
+ return 0;
+}
diff --git a/nhrp/sysdep_netlink.c b/nhrp/sysdep_netlink.c
new file mode 100644
index 0000000..d058a98
--- /dev/null
+++ b/nhrp/sysdep_netlink.c
@@ -0,0 +1,1159 @@
+/* sysdep_netlink.c - Linux netlink glue
+ *
+ * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#include <time.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <malloc.h>
+#include <string.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <asm/types.h>
+#include <arpa/inet.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/ip.h>
+#include <linux/if_arp.h>
+#include <linux/if_tunnel.h>
+
+#include "libev.h"
+#include "nhrp_common.h"
+#include "nhrp_interface.h"
+#include "nhrp_peer.h"
+
+#define NETLINK_KERNEL_BUFFER (256 * 1024)
+#define NETLINK_RECV_BUFFER (8 * 1024)
+
+#define NLMSG_TAIL(nmsg) \
+ ((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
+
+#define NDA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
+#define NDA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndmsg))
+
+typedef void (*netlink_dispatch_f)(struct nlmsghdr *msg);
+
+struct netlink_fd {
+ int fd;
+ __u32 seq;
+ struct ev_io io;
+
+ int dispatch_size;
+ const netlink_dispatch_f *dispatch;
+};
+
+static const int netlink_groups[] = {
+ 0,
+ RTMGRP_NEIGH,
+ RTMGRP_LINK,
+ RTMGRP_IPV4_IFADDR,
+ RTMGRP_IPV4_ROUTE,
+};
+static struct netlink_fd netlink_fds[ARRAY_SIZE(netlink_groups)];
+#define talk_fd netlink_fds[0]
+
+static struct ev_io packet_io;
+
+static u_int16_t translate_mtu(u_int16_t mtu)
+{
+ /* if mtu is ethernet standard, do not advertise it
+ * pmtu should be working */
+ if (mtu == 1500)
+ return 0;
+ return mtu;
+}
+
+static void netlink_parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
+{
+ memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+ while (RTA_OK(rta, len)) {
+ if (rta->rta_type <= max)
+ tb[rta->rta_type] = rta;
+ rta = RTA_NEXT(rta,len);
+ }
+}
+
+static int netlink_add_rtattr_l(struct nlmsghdr *n, int maxlen, int type,
+ const void *data, int alen)
+{
+ int len = RTA_LENGTH(alen);
+ struct rtattr *rta;
+
+ if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen)
+ return FALSE;
+
+ rta = NLMSG_TAIL(n);
+ rta->rta_type = type;
+ rta->rta_len = len;
+ memcpy(RTA_DATA(rta), data, alen);
+#ifdef VALGRIND
+ /* Clear the padding area to avoid spurious warnings */
+ memset(RTA_DATA(rta) + alen, 0, RTA_ALIGN(len) - alen);
+#endif
+ n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
+ return TRUE;
+}
+
+static int netlink_receive(struct netlink_fd *fd, struct nlmsghdr *reply)
+{
+ struct sockaddr_nl nladdr;
+ struct iovec iov;
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ int got_reply = FALSE, len;
+ char buf[NETLINK_RECV_BUFFER];
+
+ iov.iov_base = buf;
+ while (!got_reply) {
+ int status;
+ struct nlmsghdr *h;
+
+ iov.iov_len = sizeof(buf);
+ status = recvmsg(fd->fd, &msg, MSG_DONTWAIT);
+ if (status < 0) {
+ if (errno == EINTR)
+ continue;
+ if (errno == EAGAIN)
+ return reply == NULL;
+ nhrp_perror("Netlink overrun");
+ continue;
+ }
+
+ if (status == 0) {
+ nhrp_error("Netlink returned EOF");
+ return FALSE;
+ }
+
+ h = (struct nlmsghdr *) buf;
+ while (NLMSG_OK(h, status)) {
+ if (reply != NULL &&
+ h->nlmsg_seq == reply->nlmsg_seq) {
+ len = h->nlmsg_len;
+ if (len > reply->nlmsg_len) {
+ nhrp_error("Netlink message truncated");
+ len = reply->nlmsg_len;
+ }
+ memcpy(reply, h, len);
+ got_reply = TRUE;
+ } else if (h->nlmsg_type <= fd->dispatch_size &&
+ fd->dispatch[h->nlmsg_type] != NULL) {
+ fd->dispatch[h->nlmsg_type](h);
+ } else if (h->nlmsg_type != NLMSG_DONE) {
+ nhrp_info("Unknown NLmsg: 0x%08x, len %d",
+ h->nlmsg_type, h->nlmsg_len);
+ }
+ h = NLMSG_NEXT(h, status);
+ }
+ }
+
+ return TRUE;
+}
+
+static int netlink_send(struct netlink_fd *fd, struct nlmsghdr *req)
+{
+ struct sockaddr_nl nladdr;
+ struct iovec iov = {
+ .iov_base = (void*) req,
+ .iov_len = req->nlmsg_len
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ int status;
+
+ memset(&nladdr, 0, sizeof(nladdr));
+ nladdr.nl_family = AF_NETLINK;
+
+ req->nlmsg_seq = ++fd->seq;
+
+ status = sendmsg(fd->fd, &msg, 0);
+ if (status < 0) {
+ nhrp_perror("Cannot talk to rtnetlink");
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static int netlink_talk(struct netlink_fd *fd, struct nlmsghdr *req,
+ size_t replysize, struct nlmsghdr *reply)
+{
+ if (reply == NULL)
+ req->nlmsg_flags |= NLM_F_ACK;
+
+ if (!netlink_send(fd, req))
+ return FALSE;
+
+ if (reply == NULL)
+ return TRUE;
+
+ reply->nlmsg_len = replysize;
+ return netlink_receive(fd, reply);
+}
+
+static int netlink_enumerate(struct netlink_fd *fd, int family, int type)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct rtgenmsg g;
+ } req;
+ struct sockaddr_nl addr;
+
+ memset(&addr, 0, sizeof(addr));
+ addr.nl_family = AF_NETLINK;
+
+ memset(&req, 0, sizeof(req));
+ req.nlh.nlmsg_len = sizeof(req);
+ req.nlh.nlmsg_type = type;
+ req.nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
+ req.nlh.nlmsg_pid = 0;
+ req.nlh.nlmsg_seq = ++fd->seq;
+ req.g.rtgen_family = family;
+
+ return sendto(fd->fd, (void *) &req, sizeof(req), 0,
+ (struct sockaddr *) &addr, sizeof(addr)) >= 0;
+}
+
+static void netlink_read_cb(struct ev_io *w, int revents)
+{
+ struct netlink_fd *nfd = container_of(w, struct netlink_fd, io);
+
+ if (revents & EV_READ)
+ netlink_receive(nfd, NULL);
+}
+
+static int do_get_ioctl(const char *basedev, struct ip_tunnel_parm *p)
+{
+ struct ifreq ifr;
+
+#ifdef VALGRIND
+ /* Valgrind does not have SIOCGETTUNNEL description, so clear
+ * the memory structs to avoid spurious warnings */
+ memset(&ifr, 0, sizeof(ifr));
+ memset(p, 0, sizeof(*p));
+#endif
+
+ strncpy(ifr.ifr_name, basedev, IFNAMSIZ);
+ ifr.ifr_ifru.ifru_data = (void *) p;
+ if (ioctl(packet_io.fd, SIOCGETTUNNEL, &ifr)) {
+ nhrp_perror("ioctl(SIOCGETTUNNEL)");
+ return FALSE;
+ }
+ return TRUE;
+}
+
+#ifndef NHRP_NO_NBMA_GRE
+
+static int netlink_add_nested_rtattr_u32(struct rtattr *rta, int maxlen,
+ int type, uint32_t value)
+{
+ int len = RTA_LENGTH(4);
+ struct rtattr *subrta;
+
+ if (RTA_ALIGN(rta->rta_len) + len > maxlen)
+ return FALSE;
+
+ subrta = (struct rtattr*)(((char*)rta) + RTA_ALIGN(rta->rta_len));
+ subrta->rta_type = type;
+ subrta->rta_len = len;
+ memcpy(RTA_DATA(subrta), &value, 4);
+ rta->rta_len = NLMSG_ALIGN(rta->rta_len) + len;
+ return TRUE;
+}
+
+static int netlink_configure_arp(struct nhrp_interface *iface, int pf)
+{
+ struct {
+ struct nlmsghdr n;
+ struct ndtmsg ndtm;
+ char buf[256];
+ } req;
+ struct {
+ struct rtattr rta;
+ char buf[256];
+ } parms;
+
+ memset(&req.n, 0, sizeof(req.n));
+ memset(&req.ndtm, 0, sizeof(req.ndtm));
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndtmsg));
+ req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_REPLACE;
+ req.n.nlmsg_type = RTM_SETNEIGHTBL;
+
+ req.ndtm.ndtm_family = pf;
+
+ netlink_add_rtattr_l(&req.n, sizeof(req), NDTA_NAME,
+ "arp_cache", 10);
+
+ parms.rta.rta_type = NDTA_PARMS;
+ parms.rta.rta_len = RTA_LENGTH(0);
+ netlink_add_nested_rtattr_u32(&parms.rta, sizeof(parms),
+ NDTPA_IFINDEX, iface->index);
+ netlink_add_nested_rtattr_u32(&parms.rta, sizeof(parms),
+ NDTPA_APP_PROBES, 1);
+ netlink_add_nested_rtattr_u32(&parms.rta, sizeof(parms),
+ NDTPA_MCAST_PROBES, 0);
+ netlink_add_nested_rtattr_u32(&parms.rta, sizeof(parms),
+ NDTPA_UCAST_PROBES, 0);
+
+ netlink_add_rtattr_l(&req.n, sizeof(req), NDTA_PARMS,
+ parms.buf, parms.rta.rta_len - RTA_LENGTH(0));
+
+ return netlink_send(&talk_fd, &req.n);
+}
+
+static int netlink_link_arp_on(struct nhrp_interface *iface)
+{
+ struct ifreq ifr;
+
+ strncpy(ifr.ifr_name, iface->name, IFNAMSIZ);
+ if (ioctl(packet_io.fd, SIOCGIFFLAGS, &ifr)) {
+ nhrp_perror("ioctl(SIOCGIFFLAGS)");
+ return FALSE;
+ }
+ if (ifr.ifr_flags & IFF_NOARP) {
+ ifr.ifr_flags &= ~IFF_NOARP;
+ if (ioctl(packet_io.fd, SIOCSIFFLAGS, &ifr)) {
+ nhrp_perror("ioctl(SIOCSIFFLAGS)");
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+#else
+
+static int netlink_configure_arp(struct nhrp_interface *iface, int pf)
+{
+ return TRUE;
+}
+
+static int netlink_link_arp_on(struct nhrp_interface *iface)
+{
+ return TRUE;
+}
+
+#endif
+
+static int proc_icmp_redirect_off(const char *interface)
+{
+ char fname[256];
+ int fd, ret = FALSE;
+
+ sprintf(fname, "/proc/sys/net/ipv4/conf/%s/send_redirects", interface);
+ fd = open(fname, O_WRONLY);
+ if (fd < 0)
+ return FALSE;
+ if (write(fd, "0\n", 2) == 2)
+ ret = TRUE;
+ close(fd);
+
+ return ret;
+}
+
+static void netlink_neigh_request(struct nlmsghdr *msg)
+{
+ struct ndmsg *ndm = NLMSG_DATA(msg);
+ struct rtattr *rta[NDA_MAX+1];
+ struct nhrp_peer *peer;
+ struct nhrp_address addr;
+ struct nhrp_interface *iface;
+ char tmp[64];
+
+ netlink_parse_rtattr(rta, NDA_MAX, NDA_RTA(ndm), NDA_PAYLOAD(msg));
+ if (rta[NDA_DST] == NULL)
+ return;
+
+ iface = nhrp_interface_get_by_index(ndm->ndm_ifindex, 0);
+ if (iface == NULL)
+ return;
+
+ nhrp_address_set(&addr, ndm->ndm_family,
+ RTA_PAYLOAD(rta[NDA_DST]),
+ RTA_DATA(rta[NDA_DST]));
+
+ nhrp_debug("NL-ARP(%s) who-has %s",
+ iface->name, nhrp_address_format(&addr, sizeof(tmp), tmp));
+
+ peer = nhrp_peer_route(iface, &addr, 0, ~BIT(NHRP_PEER_TYPE_LOCAL_ROUTE));
+ if (peer == NULL)
+ return;
+
+ if (peer->flags & NHRP_PEER_FLAG_UP)
+ kernel_inject_neighbor(&addr, &peer->next_hop_address, iface);
+
+ if (peer->next_hop_address.type != PF_UNSPEC &&
+ nhrp_address_cmp(&addr, &peer->protocol_address) != 0)
+ nhrp_peer_traffic_indication(iface, peer->afnum, &addr);
+}
+
+static void netlink_neigh_update(struct nlmsghdr *msg)
+{
+ struct ndmsg *ndm = NLMSG_DATA(msg);
+ struct rtattr *rta[NDA_MAX+1];
+ struct nhrp_interface *iface;
+ struct nhrp_peer_selector sel;
+ int used = FALSE;
+
+ netlink_parse_rtattr(rta, NDA_MAX, NDA_RTA(ndm), NDA_PAYLOAD(msg));
+ if (rta[NDA_DST] == NULL)
+ return;
+
+ if (!(ndm->ndm_state & (NUD_STALE | NUD_FAILED | NUD_REACHABLE)))
+ return;
+
+ iface = nhrp_interface_get_by_index(ndm->ndm_ifindex, 0);
+ if (iface == NULL)
+ return;
+
+ memset(&sel, 0, sizeof(sel));
+ sel.flags = NHRP_PEER_FIND_EXACT;
+ sel.interface = iface;
+ nhrp_address_set(&sel.protocol_address, ndm->ndm_family,
+ RTA_PAYLOAD(rta[NDA_DST]),
+ RTA_DATA(rta[NDA_DST]));
+
+ if (msg->nlmsg_type == RTM_NEWNEIGH && (ndm->ndm_state & NUD_REACHABLE))
+ used = TRUE;
+
+ nhrp_peer_foreach(nhrp_peer_set_used_matching,
+ (void*) (intptr_t) used, &sel);
+}
+
+static void netlink_link_new(struct nlmsghdr *msg)
+{
+ struct nhrp_interface *iface;
+ struct ifinfomsg *ifi = NLMSG_DATA(msg);
+ struct rtattr *rta[IFLA_MAX+1];
+ const char *ifname;
+ struct ip_tunnel_parm cfg;
+ int configuration_changed = FALSE;
+
+ netlink_parse_rtattr(rta, IFLA_MAX, IFLA_RTA(ifi), IFLA_PAYLOAD(msg));
+ if (rta[IFLA_IFNAME] == NULL)
+ return;
+
+ ifname = RTA_DATA(rta[IFLA_IFNAME]);
+ iface = nhrp_interface_get_by_name(ifname, TRUE);
+ if (iface == NULL)
+ return;
+
+ if (rta[IFLA_MTU])
+ iface->mtu = *((unsigned*)RTA_DATA(rta[IFLA_MTU]));
+
+ if (iface->index == 0 || (ifi->ifi_flags & ifi->ifi_change & IFF_UP)) {
+ nhrp_info("Interface %s: new or configured up, mtu=%d",
+ ifname, iface->mtu);
+ nhrp_interface_run_script(iface, "interface-up");
+ } else {
+ nhrp_info("Interface %s: config change, mtu=%d",
+ ifname, iface->mtu);
+ }
+
+ iface->index = ifi->ifi_index;
+ nhrp_interface_hash(iface);
+
+ if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED))
+ return;
+
+ switch (ifi->ifi_type) {
+ case ARPHRD_IPGRE:
+ iface->afnum = AFNUM_INET;
+ /* try hard to get the interface nbma address */
+ do_get_ioctl(ifname, &cfg);
+ if (iface->gre_key != ntohl(cfg.i_key)) {
+ configuration_changed = TRUE;
+ iface->gre_key = ntohl(cfg.i_key);
+ }
+ if (cfg.iph.saddr) {
+ struct nhrp_address saddr;
+ nhrp_address_set(&saddr, PF_INET, 4, (uint8_t *) &cfg.iph.saddr);
+ if (nhrp_address_cmp(&iface->nbma_address, &saddr) || iface->link_index) {
+ configuration_changed = TRUE;
+ iface->nbma_address = saddr;
+ iface->link_index = 0;
+ }
+ } else if (cfg.link) {
+ if (cfg.link != iface->link_index) {
+ configuration_changed = TRUE;
+ nhrp_address_set_type(&iface->nbma_address, PF_UNSPEC);
+ iface->link_index = cfg.link;
+ }
+ } else {
+ if (iface->link_index || iface->nbma_address.type != PF_UNSPEC) {
+ configuration_changed = TRUE;
+ /* Mark the interface as owning all NBMA addresses
+ * this works when there's only one GRE interface */
+ iface->link_index = 0;
+ nhrp_address_set_type(&iface->nbma_address, PF_UNSPEC);
+ nhrp_info("WARNING: Cannot figure out NBMA address for "
+ "interface '%s'. Using route hints.", ifname);
+ }
+ }
+ break;
+ }
+
+ if (!(iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST)) {
+ netlink_configure_arp(iface, PF_INET);
+ netlink_link_arp_on(iface);
+ proc_icmp_redirect_off(iface->name);
+ }
+
+ if (configuration_changed) {
+ struct nhrp_peer_selector sel;
+ int count = 0;
+
+ /* Reset the interface values we detect later */
+ memset(&iface->nat_cie, 0, sizeof(iface->nat_cie));
+ iface->nbma_mtu = 0;
+ if (iface->link_index) {
+ /* Reenumerate addresses if needed */
+ netlink_enumerate(&talk_fd, PF_UNSPEC, RTM_GETADDR);
+ netlink_read_cb(&talk_fd.io, EV_READ);
+ }
+
+ /* Purge all NHRP entries for this interface */
+ memset(&sel, 0, sizeof(sel));
+ sel.type_mask = NHRP_PEER_TYPEMASK_PURGEABLE;
+ sel.interface = iface;
+ nhrp_peer_foreach(nhrp_peer_purge_matching, &count, &sel);
+ nhrp_info("Interface %s: GRE configuration changed. Purged %d peers.",
+ ifname, count);
+ }
+}
+
+static void netlink_link_del(struct nlmsghdr *msg)
+{
+ struct nhrp_interface *iface;
+ struct ifinfomsg *ifi = NLMSG_DATA(msg);
+ struct rtattr *rta[IFLA_MAX+1];
+ const char *ifname;
+
+ netlink_parse_rtattr(rta, IFLA_MAX, IFLA_RTA(ifi), IFLA_PAYLOAD(msg));
+ if (rta[IFLA_IFNAME] == NULL)
+ return;
+
+ ifname = RTA_DATA(rta[IFLA_IFNAME]);
+ iface = nhrp_interface_get_by_name(ifname, FALSE);
+ if (iface == NULL)
+ return;
+
+ nhrp_info("Interface '%s' deleted", ifname);
+ iface->index = 0;
+ iface->link_index = 0;
+ nhrp_interface_hash(iface);
+
+ nhrp_address_set_type(&iface->nbma_address, PF_UNSPEC);
+ nhrp_address_set_type(&iface->protocol_address, PF_UNSPEC);
+}
+
+static int netlink_addr_new_nbma(void *ctx, struct nhrp_interface *iface)
+{
+ struct nlmsghdr *msg = (struct nlmsghdr *) ctx;
+ struct ifaddrmsg *ifa = NLMSG_DATA(msg);
+ struct rtattr *rta[IFA_MAX+1];
+ struct nhrp_interface *nbma_iface;
+
+ if (iface->link_index == ifa->ifa_index) {
+ netlink_parse_rtattr(rta, IFA_MAX, IFA_RTA(ifa),
+ IFA_PAYLOAD(msg));
+
+ if (rta[IFA_LOCAL] == NULL)
+ return 0;
+
+ nhrp_address_set(&iface->nbma_address, ifa->ifa_family,
+ RTA_PAYLOAD(rta[IFA_LOCAL]),
+ RTA_DATA(rta[IFA_LOCAL]));
+
+ nbma_iface = nhrp_interface_get_by_index(ifa->ifa_index, FALSE);
+ if (nbma_iface != NULL) {
+ iface->nbma_mtu = translate_mtu(nbma_iface->mtu);
+ }
+ }
+
+ return 0;
+}
+
+static void netlink_addr_new(struct nlmsghdr *msg)
+{
+ struct nhrp_interface *iface;
+ struct nhrp_peer *peer, *bcast;
+ struct ifaddrmsg *ifa = NLMSG_DATA(msg);
+ struct rtattr *rta[IFA_MAX+1];
+
+ if (!(ifa->ifa_flags & IFA_F_SECONDARY))
+ nhrp_interface_foreach(netlink_addr_new_nbma, msg);
+
+ netlink_parse_rtattr(rta, IFA_MAX, IFA_RTA(ifa), IFA_PAYLOAD(msg));
+ iface = nhrp_interface_get_by_index(ifa->ifa_index, FALSE);
+ if (iface == NULL || rta[IFA_LOCAL] == NULL)
+ return;
+
+ /* Shortcut destination stuff is extracted from routes;
+ * not from local address information. */
+ if (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST)
+ return;
+ if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED))
+ return;
+
+ nhrp_address_set(&iface->protocol_address, ifa->ifa_family,
+ RTA_PAYLOAD(rta[IFA_LOCAL]),
+ RTA_DATA(rta[IFA_LOCAL]));
+ iface->protocol_address_prefix = ifa->ifa_prefixlen;
+
+ peer = nhrp_peer_alloc(iface);
+ peer->type = NHRP_PEER_TYPE_LOCAL_ADDR;
+ peer->afnum = AFNUM_RESERVED;
+ nhrp_address_set(&peer->protocol_address, ifa->ifa_family,
+ RTA_PAYLOAD(rta[IFA_LOCAL]),
+ RTA_DATA(rta[IFA_LOCAL]));
+ switch (ifa->ifa_family) {
+ case PF_INET:
+ peer->protocol_type = ETHPROTO_IP;
+ peer->prefix_length = peer->protocol_address.addr_len * 8;
+ nhrp_peer_insert(peer);
+ break;
+ default:
+ nhrp_peer_put(peer);
+ return;
+ }
+
+ bcast = nhrp_peer_alloc(iface);
+ bcast->type = peer->type;
+ bcast->afnum = peer->afnum;
+ bcast->protocol_type = peer->protocol_type;
+ bcast->prefix_length = peer->prefix_length;
+ bcast->protocol_address = peer->protocol_address;
+ nhrp_address_set_broadcast(&bcast->protocol_address,
+ ifa->ifa_prefixlen);
+ bcast->next_hop_address = peer->protocol_address;
+ nhrp_peer_insert(bcast);
+ nhrp_peer_put(bcast);
+
+ nhrp_peer_put(peer);
+}
+
+struct netlink_del_addr_msg {
+ int interface_index;
+ struct nhrp_address address;
+};
+
+static int netlink_addr_del_nbma(void *ctx, struct nhrp_interface *iface)
+{
+ struct netlink_del_addr_msg *msg = (struct netlink_del_addr_msg *) ctx;
+
+ if (iface->link_index == msg->interface_index &&
+ nhrp_address_cmp(&msg->address, &iface->nbma_address) == 0)
+ nhrp_address_set_type(&iface->nbma_address, PF_UNSPEC);
+
+ return 0;
+}
+
+static int netlink_addr_purge_nbma(void *ctx, struct nhrp_peer *peer)
+{
+ struct netlink_del_addr_msg *msg = (struct netlink_del_addr_msg *) ctx;
+
+ if (nhrp_address_cmp(&peer->my_nbma_address, &msg->address) == 0)
+ nhrp_peer_purge(peer, "address-removed");
+
+ return 0;
+}
+
+static void netlink_addr_del(struct nlmsghdr *nlmsg)
+{
+ struct netlink_del_addr_msg msg;
+ struct nhrp_interface *iface;
+ struct ifaddrmsg *ifa = NLMSG_DATA(nlmsg);
+ struct rtattr *rta[IFA_MAX+1];
+ struct nhrp_peer_selector sel;
+
+ netlink_parse_rtattr(rta, IFA_MAX, IFA_RTA(ifa), IFA_PAYLOAD(nlmsg));
+ if (rta[IFA_LOCAL] == NULL)
+ return;
+
+ msg.interface_index = ifa->ifa_index;
+ nhrp_address_set(&msg.address, ifa->ifa_family,
+ RTA_PAYLOAD(rta[IFA_LOCAL]),
+ RTA_DATA(rta[IFA_LOCAL]));
+
+ if (!(ifa->ifa_flags & IFA_F_SECONDARY))
+ nhrp_interface_foreach(netlink_addr_del_nbma, &msg);
+ nhrp_peer_foreach(netlink_addr_purge_nbma, &msg, NULL);
+
+ iface = nhrp_interface_get_by_index(ifa->ifa_index, FALSE);
+ if (iface == NULL)
+ return;
+
+ memset(&sel, 0, sizeof(sel));
+ sel.flags = NHRP_PEER_FIND_EXACT;
+ sel.type_mask = BIT(NHRP_PEER_TYPE_LOCAL_ADDR);
+ sel.interface = iface;
+ sel.protocol_address = msg.address;
+ sel.prefix_length = sel.protocol_address.addr_len * 8;
+
+ if (nhrp_address_cmp(&sel.protocol_address, &iface->protocol_address) == 0)
+ nhrp_address_set_type(&iface->protocol_address, PF_UNSPEC);
+ nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel);
+
+ nhrp_address_set_broadcast(&sel.protocol_address, ifa->ifa_prefixlen);
+ sel.next_hop_address = msg.address;
+ nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel);
+}
+
+static void netlink_route_new(struct nlmsghdr *msg)
+{
+ struct nhrp_interface *iface;
+ struct nhrp_peer *peer;
+ struct rtmsg *rtm = NLMSG_DATA(msg);
+ struct rtattr *rta[RTA_MAX+1];
+ int type = 0;
+
+ netlink_parse_rtattr(rta, RTA_MAX, RTM_RTA(rtm), RTM_PAYLOAD(msg));
+ if (rta[RTA_OIF] == NULL || rta[RTA_DST] == NULL)
+ return;
+
+ if (rtm->rtm_family != PF_INET)
+ return;
+
+ iface = nhrp_interface_get_by_index(*(int*)RTA_DATA(rta[RTA_OIF]),
+ FALSE);
+ if (iface == NULL)
+ return;
+
+ if (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST) {
+ /* Local shortcut target routes */
+ if (rtm->rtm_table != RT_TABLE_MAIN)
+ return;
+ type = NHRP_PEER_TYPE_LOCAL_ADDR;
+ } else if (iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED) {
+ /* Routes which might get additional outbound
+ * shortcuts */
+ if (rtm->rtm_table != iface->route_table ||
+ rtm->rtm_protocol == RTPROT_KERNEL)
+ return;
+ type = NHRP_PEER_TYPE_LOCAL_ROUTE;
+ }
+ if (type == 0)
+ return;
+
+ peer = nhrp_peer_alloc(iface);
+ peer->type = type;
+ peer->afnum = AFNUM_RESERVED;
+ nhrp_address_set(&peer->protocol_address, rtm->rtm_family,
+ RTA_PAYLOAD(rta[RTA_DST]),
+ RTA_DATA(rta[RTA_DST]));
+ if (rta[RTA_GATEWAY] != NULL) {
+ nhrp_address_set(&peer->next_hop_address,
+ rtm->rtm_family,
+ RTA_PAYLOAD(rta[RTA_GATEWAY]),
+ RTA_DATA(rta[RTA_GATEWAY]));
+ }
+ peer->protocol_type = nhrp_protocol_from_pf(rtm->rtm_family);
+ peer->prefix_length = rtm->rtm_dst_len;
+ nhrp_peer_insert(peer);
+ nhrp_peer_put(peer);
+}
+
+static void netlink_route_del(struct nlmsghdr *msg)
+{
+ struct nhrp_interface *iface;
+ struct rtmsg *rtm = NLMSG_DATA(msg);
+ struct rtattr *rta[RTA_MAX+1];
+ struct nhrp_peer_selector sel;
+ int type = 0;
+
+ netlink_parse_rtattr(rta, RTA_MAX, RTM_RTA(rtm), RTM_PAYLOAD(msg));
+ if (rta[RTA_OIF] == NULL || rta[RTA_DST] == NULL)
+ return;
+
+ if (rtm->rtm_family != PF_INET)
+ return;
+
+ iface = nhrp_interface_get_by_index(*(int*)RTA_DATA(rta[RTA_OIF]),
+ FALSE);
+ if (iface == NULL)
+ return;
+
+ if (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST) {
+ /* Local shortcut target routes */
+ if (rtm->rtm_table != RT_TABLE_MAIN)
+ return;
+ type = NHRP_PEER_TYPE_LOCAL_ADDR;
+ } else if (iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED) {
+ /* Routes which might get additional outbound
+ * shortcuts */
+ if (rtm->rtm_table != iface->route_table ||
+ rtm->rtm_protocol == RTPROT_KERNEL)
+ return;
+ type = NHRP_PEER_TYPE_LOCAL_ROUTE;
+ }
+ if (type == 0)
+ return;
+
+ memset(&sel, 0, sizeof(sel));
+ sel.flags = NHRP_PEER_FIND_EXACT;
+ sel.type_mask = BIT(type);
+ sel.interface = iface;
+ nhrp_address_set(&sel.protocol_address, rtm->rtm_family,
+ RTA_PAYLOAD(rta[RTA_DST]),
+ RTA_DATA(rta[RTA_DST]));
+ if (rta[RTA_GATEWAY] != NULL) {
+ nhrp_address_set(&sel.next_hop_address,
+ rtm->rtm_family,
+ RTA_PAYLOAD(rta[RTA_GATEWAY]),
+ RTA_DATA(rta[RTA_GATEWAY]));
+ }
+ sel.prefix_length = rtm->rtm_dst_len;
+ nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel);
+}
+
+static const netlink_dispatch_f route_dispatch[RTM_MAX] = {
+ [RTM_GETNEIGH] = netlink_neigh_request,
+ [RTM_NEWNEIGH] = netlink_neigh_update,
+ [RTM_DELNEIGH] = netlink_neigh_update,
+ [RTM_NEWLINK] = netlink_link_new,
+ [RTM_DELLINK] = netlink_link_del,
+ [RTM_NEWADDR] = netlink_addr_new,
+ [RTM_DELADDR] = netlink_addr_del,
+ [RTM_NEWROUTE] = netlink_route_new,
+ [RTM_DELROUTE] = netlink_route_del,
+};
+
+static void netlink_stop_listening(struct netlink_fd *fd)
+{
+ ev_io_stop(&fd->io);
+}
+
+static void netlink_close(struct netlink_fd *fd)
+{
+ if (fd->fd >= 0) {
+ netlink_stop_listening(fd);
+ close(fd->fd);
+ fd->fd = 0;
+ }
+}
+
+static int netlink_open(struct netlink_fd *fd, int protocol, int groups)
+{
+ struct sockaddr_nl addr;
+ int buf = NETLINK_KERNEL_BUFFER;
+
+ fd->fd = socket(AF_NETLINK, SOCK_RAW, protocol);
+ fd->seq = time(NULL);
+ if (fd->fd < 0) {
+ nhrp_perror("Cannot open netlink socket");
+ return FALSE;
+ }
+
+ fcntl(fd->fd, F_SETFD, FD_CLOEXEC);
+ if (setsockopt(fd->fd, SOL_SOCKET, SO_SNDBUF, &buf, sizeof(buf)) < 0) {
+ nhrp_perror("SO_SNDBUF");
+ goto error;
+ }
+
+ if (setsockopt(fd->fd, SOL_SOCKET, SO_RCVBUF, &buf, sizeof(buf)) < 0) {
+ nhrp_perror("SO_RCVBUF");
+ goto error;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.nl_family = AF_NETLINK;
+ addr.nl_groups = groups;
+ if (bind(fd->fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
+ nhrp_perror("Cannot bind netlink socket");
+ goto error;
+ }
+
+ ev_io_init(&fd->io, netlink_read_cb, fd->fd, EV_READ);
+ ev_io_start(&fd->io);
+
+ return TRUE;
+
+error:
+ netlink_close(fd);
+ return FALSE;
+}
+
+static void pfpacket_read_cb(struct ev_io *w, int revents)
+{
+ struct sockaddr_ll lladdr;
+ struct nhrp_interface *iface;
+ struct iovec iov;
+ struct msghdr msg = {
+ .msg_name = &lladdr,
+ .msg_namelen = sizeof(lladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ uint8_t buf[1500];
+ struct nhrp_address from;
+ int fd = w->fd;
+ int i;
+
+ iov.iov_base = buf;
+ for (i = 0; i < 2; i++) {
+ int status;
+
+ iov.iov_len = sizeof(buf);
+ status = recvmsg(fd, &msg, MSG_DONTWAIT);
+ if (status < 0) {
+ if (errno == EINTR)
+ continue;
+ if (errno == EAGAIN)
+ return;
+ nhrp_perror("PF_PACKET overrun");
+ continue;
+ }
+
+ if (status == 0) {
+ nhrp_error("PF_PACKET returned EOF");
+ return;
+ }
+
+ iface = nhrp_interface_get_by_index(lladdr.sll_ifindex, FALSE);
+ if (iface == NULL)
+ continue;
+
+ nhrp_address_set(&from, PF_INET, lladdr.sll_halen, lladdr.sll_addr);
+ if (memcmp(lladdr.sll_addr, "\x00\x00\x00\x00", 4) == 0)
+ nhrp_address_set_type(&from, PF_UNSPEC);
+ nhrp_packet_receive(buf, status, iface, &from);
+ }
+}
+
+int kernel_init(void)
+{
+ int fd, i;
+
+ proc_icmp_redirect_off("all");
+
+ fd = socket(PF_PACKET, SOCK_DGRAM, ETHPROTO_NHRP);
+ if (fd < 0) {
+ nhrp_error("Unable to create PF_PACKET socket");
+ return FALSE;
+ }
+
+ fcntl(fd, F_SETFD, FD_CLOEXEC);
+ ev_io_init(&packet_io, pfpacket_read_cb, fd, EV_READ);
+ ev_io_start(&packet_io);
+
+ for (i = 0; i < ARRAY_SIZE(netlink_groups); i++) {
+ netlink_fds[i].dispatch_size = sizeof(route_dispatch) / sizeof(route_dispatch[0]);
+ netlink_fds[i].dispatch = route_dispatch;
+ if (!netlink_open(&netlink_fds[i], NETLINK_ROUTE,
+ netlink_groups[i]))
+ goto err_close_all;
+ }
+
+ netlink_enumerate(&talk_fd, PF_UNSPEC, RTM_GETLINK);
+ netlink_read_cb(&talk_fd.io, EV_READ);
+
+ netlink_enumerate(&talk_fd, PF_UNSPEC, RTM_GETADDR);
+ netlink_read_cb(&talk_fd.io, EV_READ);
+
+ netlink_enumerate(&talk_fd, PF_UNSPEC, RTM_GETROUTE);
+ netlink_read_cb(&talk_fd.io, EV_READ);
+
+ return TRUE;
+
+err_close_all:
+ kernel_cleanup();
+ return FALSE;
+}
+
+void kernel_stop_listening(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(netlink_groups); i++)
+ netlink_stop_listening(&netlink_fds[i]);
+ ev_io_stop(&packet_io);
+}
+
+void kernel_cleanup(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(netlink_groups); i++)
+ netlink_close(&netlink_fds[i]);
+ ev_io_stop(&packet_io);
+ close(packet_io.fd);
+}
+
+int kernel_route(struct nhrp_interface *out_iface,
+ struct nhrp_address *dest,
+ struct nhrp_address *default_source,
+ struct nhrp_address *next_hop,
+ u_int16_t *mtu)
+{
+ struct {
+ struct nlmsghdr n;
+ struct rtmsg r;
+ char buf[1024];
+ } req;
+ struct rtmsg *r = NLMSG_DATA(&req.n);
+ struct rtattr *rta[RTA_MAX+1];
+
+ memset(&req, 0, sizeof(req));
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
+ req.n.nlmsg_flags = NLM_F_REQUEST;
+ req.n.nlmsg_type = RTM_GETROUTE;
+ req.r.rtm_family = dest->type;
+
+ netlink_add_rtattr_l(&req.n, sizeof(req), RTA_DST,
+ dest->addr, dest->addr_len);
+ req.r.rtm_dst_len = dest->addr_len * 8;
+
+ if (default_source != NULL && default_source->type != PF_UNSPEC)
+ netlink_add_rtattr_l(&req.n, sizeof(req), RTA_SRC,
+ default_source->addr,
+ default_source->addr_len);
+ if (out_iface != NULL)
+ netlink_add_rtattr_l(&req.n, sizeof(req), RTA_OIF,
+ &out_iface->index, sizeof(int));
+
+ if (!netlink_talk(&talk_fd, &req.n, sizeof(req), &req.n))
+ return FALSE;
+
+ netlink_parse_rtattr(rta, RTA_MAX, RTM_RTA(r), RTM_PAYLOAD(&req.n));
+
+ if (default_source != NULL && default_source->type == PF_UNSPEC &&
+ rta[RTA_PREFSRC] != NULL) {
+ nhrp_address_set(default_source, dest->type,
+ RTA_PAYLOAD(rta[RTA_PREFSRC]),
+ RTA_DATA(rta[RTA_PREFSRC]));
+ }
+
+ if (next_hop != NULL) {
+ if (rta[RTA_GATEWAY] != NULL) {
+ nhrp_address_set(next_hop, dest->type,
+ RTA_PAYLOAD(rta[RTA_GATEWAY]),
+ RTA_DATA(rta[RTA_GATEWAY]));
+ } else {
+ *next_hop = *dest;
+ }
+ }
+
+ if (mtu != NULL) {
+ *mtu = 0;
+
+ if (rta[RTA_OIF] != NULL) {
+ struct nhrp_interface *nbma_iface;
+
+ /* We use interface MTU here instead of the route
+ * cache MTU from RTA_METRICS/RTAX_MTU since we
+ * don't want to announce mtu if PMTU works */
+ nbma_iface = nhrp_interface_get_by_index(
+ *(int*)RTA_DATA(rta[RTA_OIF]),
+ FALSE);
+ if (nbma_iface != NULL)
+ *mtu = translate_mtu(nbma_iface->mtu);
+ }
+ }
+
+ return TRUE;
+}
+
+int kernel_send(uint8_t *packet, size_t bytes, struct nhrp_interface *out,
+ struct nhrp_address *to)
+{
+ struct sockaddr_ll lladdr;
+ struct iovec iov = {
+ .iov_base = (void*) packet,
+ .iov_len = bytes
+ };
+ struct msghdr msg = {
+ .msg_name = &lladdr,
+ .msg_namelen = sizeof(lladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ int status;
+
+ if (to->addr_len > sizeof(lladdr.sll_addr)) {
+ nhrp_error("Destination NBMA address too long");
+ return FALSE;
+ }
+
+ memset(&lladdr, 0, sizeof(lladdr));
+ lladdr.sll_family = AF_PACKET;
+ lladdr.sll_protocol = ETHPROTO_NHRP;
+ lladdr.sll_ifindex = out->index;
+ lladdr.sll_halen = to->addr_len;
+ memcpy(lladdr.sll_addr, to->addr, to->addr_len);
+
+ status = sendmsg(packet_io.fd, &msg, 0);
+ if (status < 0) {
+ nhrp_error("Cannot send packet to %s(%d): %s",
+ out->name, out->index, strerror(errno));
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+int kernel_inject_neighbor(struct nhrp_address *neighbor,
+ struct nhrp_address *hwaddr,
+ struct nhrp_interface *dev)
+{
+ struct {
+ struct nlmsghdr n;
+ struct ndmsg ndm;
+ char buf[256];
+ } req;
+ char neigh[64], nbma[64];
+
+ memset(&req.n, 0, sizeof(req.n));
+ memset(&req.ndm, 0, sizeof(req.ndm));
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
+ req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_REPLACE | NLM_F_CREATE;
+ req.n.nlmsg_type = RTM_NEWNEIGH;
+ req.ndm.ndm_family = neighbor->type;
+ req.ndm.ndm_ifindex = dev->index;
+ req.ndm.ndm_type = RTN_UNICAST;
+
+ netlink_add_rtattr_l(&req.n, sizeof(req), NDA_DST,
+ neighbor->addr, neighbor->addr_len);
+
+ if (hwaddr != NULL && hwaddr->type != PF_UNSPEC) {
+ req.ndm.ndm_state = NUD_REACHABLE;
+
+ netlink_add_rtattr_l(&req.n, sizeof(req), NDA_LLADDR,
+ hwaddr->addr, hwaddr->addr_len);
+
+ nhrp_debug("NL-ARP(%s) %s is-at %s",
+ dev->name,
+ nhrp_address_format(neighbor, sizeof(neigh), neigh),
+ nhrp_address_format(hwaddr, sizeof(nbma), nbma));
+ } else {
+ req.ndm.ndm_state = NUD_FAILED;
+
+ nhrp_debug("NL-ARP(%s) %s not-reachable",
+ dev->name,
+ nhrp_address_format(neighbor, sizeof(neigh), neigh));
+ }
+
+ return netlink_send(&talk_fd, &req.n);
+}
+
diff --git a/nhrp/sysdep_pfpacket.c b/nhrp/sysdep_pfpacket.c
new file mode 100644
index 0000000..514b848
--- /dev/null
+++ b/nhrp/sysdep_pfpacket.c
@@ -0,0 +1,388 @@
+/* sysdep_pfpacket.c - Tracing of forwarded packets using PF_PACKET
+ *
+ * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <netinet/in.h>
+#include <linux/types.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+
+#include "libev.h"
+#include "nhrp_defines.h"
+#include "nhrp_common.h"
+#include "nhrp_interface.h"
+#include "nhrp_peer.h"
+
+#define MAX_OPCODES 100
+
+struct multicast_packet {
+ struct nhrp_interface *iface;
+ struct sockaddr_ll lladdr;
+ unsigned int pdulen;
+ unsigned char pdu[1500];
+};
+
+static struct ev_io packet_io;
+static struct ev_timer install_filter_timer;
+static struct ev_idle mcast_route;
+
+static struct multicast_packet mcast_queue[16];
+static int mcast_head = 0, mcast_tail = 0;
+
+
+enum {
+ LABEL_NEXT = 0,
+ LABEL_SKIP1,
+ LABEL_SKIPN,
+ LABEL_DROP,
+ LABEL_CHECK_MULTICAST,
+ LABEL_CHECK_MULTICAST_DESTINATION,
+ LABEL_CHECK_TRAFFIC_INDICATION,
+ LABEL_CHECK_NON_LOCAL_ADDRESS,
+ NUM_LABELS
+};
+
+struct filter {
+ int pos[NUM_LABELS];
+ int numops;
+ struct sock_filter code[MAX_OPCODES];
+};
+
+static void emit_stmt(struct filter *f, __u16 code, __u32 k)
+{
+ if (f->numops < MAX_OPCODES) {
+ f->code[f->numops].code = code;
+ f->code[f->numops].jt = 0;
+ f->code[f->numops].jf = 0;
+ f->code[f->numops].k = k;
+ }
+ f->numops++;
+}
+
+static void emit_jump(struct filter *f, __u16 code, __u32 k, __u8 jt, __u8 jf)
+{
+ if (f->numops < MAX_OPCODES) {
+ f->code[f->numops].code = code;
+ f->code[f->numops].jt = jt;
+ f->code[f->numops].jf = jf;
+ f->code[f->numops].k = k;
+ }
+ f->numops++;
+}
+
+static void mark(struct filter *f, int label)
+{
+ f->pos[label] = f->numops;
+}
+
+static int check_interface_multicast(void *ctx, struct nhrp_interface *iface)
+{
+ struct filter *f = (struct filter *) ctx;
+
+ if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED))
+ return 0;
+ if (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST)
+ return 0;
+
+ if (iface->mcast_mask || iface->mcast_numaddr)
+ emit_jump(f, BPF_JMP|BPF_JEQ|BPF_K, iface->index,
+ LABEL_CHECK_MULTICAST_DESTINATION, LABEL_NEXT);
+
+ return 0;
+}
+
+static int drop_matching_address(void *ctx, struct nhrp_peer *peer)
+{
+ struct filter *f = (struct filter *) ctx;
+ unsigned long addr, mask;
+
+ if (peer->protocol_type != ETHPROTO_IP)
+ return 0;
+
+ addr = htonl(*((unsigned long *) peer->protocol_address.addr));
+ if (peer->prefix_length != 32) {
+ mask = 0xffffffff >> peer->prefix_length;
+ emit_jump(f, BPF_JMP|BPF_JGE|BPF_K, addr & ~mask, LABEL_NEXT, LABEL_SKIP1);
+ emit_jump(f, BPF_JMP|BPF_JGT|BPF_K, addr | mask, LABEL_NEXT, LABEL_DROP);
+ } else {
+ emit_jump(f, BPF_JMP|BPF_JEQ|BPF_K, addr, LABEL_DROP, LABEL_NEXT);
+ }
+
+ return 0;
+}
+
+static int check_interface_traffic_indication(void *ctx, struct nhrp_interface *iface)
+{
+ struct filter *f = (struct filter *) ctx;
+
+ if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED))
+ return 0;
+ if (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST)
+ return 0;
+ if (!(iface->flags & NHRP_INTERFACE_FLAG_REDIRECT))
+ return 0;
+
+ emit_jump(f, BPF_JMP|BPF_JEQ|BPF_K, iface->index,
+ LABEL_CHECK_NON_LOCAL_ADDRESS, LABEL_NEXT);
+
+ return 0;
+}
+
+static void install_filter_cb(struct ev_timer *w, int revents)
+{
+ struct nhrp_peer_selector sel;
+ struct sock_fprog prog;
+ struct filter f;
+ int i;
+
+ memset(&prog, 0, sizeof(prog));
+ memset(&f, 0, sizeof(f));
+
+ /* Check for IPv4 */
+ emit_stmt(&f, BPF_LD |BPF_W |BPF_ABS, SKF_AD_OFF+SKF_AD_PROTOCOL);
+ emit_jump(&f, BPF_JMP|BPF_JEQ|BPF_K, ETH_P_IP, LABEL_NEXT, LABEL_DROP);
+
+ /* Traffic indication checking is for incoming packets
+ * Multicast checking is for outgoing packets */
+ emit_stmt(&f, BPF_LD |BPF_W |BPF_ABS, SKF_AD_OFF+SKF_AD_PKTTYPE);
+ emit_jump(&f, BPF_JMP|BPF_JEQ|BPF_K, PACKET_OUTGOING, LABEL_CHECK_MULTICAST, LABEL_NEXT);
+ emit_jump(&f, BPF_JMP|BPF_JEQ|BPF_K, PACKET_HOST, LABEL_CHECK_TRAFFIC_INDICATION, LABEL_DROP);
+
+ /* MULTICAST check - for interfaces that have MC forwarding enabled */
+ mark(&f, LABEL_CHECK_MULTICAST);
+ emit_stmt(&f, BPF_LD |BPF_W |BPF_ABS, SKF_AD_OFF+SKF_AD_IFINDEX);
+ nhrp_interface_foreach(check_interface_multicast, &f);
+ emit_stmt(&f, BPF_RET|BPF_K, 0);
+
+ /* Check for multicast IPv4 destination - accept on match (all packet) */
+ mark(&f, LABEL_CHECK_MULTICAST_DESTINATION);
+ emit_stmt(&f, BPF_LD |BPF_W |BPF_ABS, offsetof(struct iphdr, daddr));
+ emit_jump(&f, BPF_JMP|BPF_JGE|BPF_K, 0xe0000000, LABEL_NEXT, LABEL_DROP);
+ emit_jump(&f, BPF_JMP|BPF_JGE|BPF_K, 0xf0000000, LABEL_DROP, LABEL_NEXT);
+ emit_stmt(&f, BPF_RET|BPF_K, 65535);
+
+ /* TRAFFIC INDICATION check - is destination non-local
+ * if yes, capture headers for NHRP traffic indication */
+ mark(&f, LABEL_CHECK_TRAFFIC_INDICATION);
+ emit_stmt(&f, BPF_LD |BPF_W |BPF_ABS, SKF_AD_OFF+SKF_AD_IFINDEX);
+ nhrp_interface_foreach(check_interface_traffic_indication, &f);
+ emit_stmt(&f, BPF_RET|BPF_K, 0);
+
+ mark(&f, LABEL_CHECK_NON_LOCAL_ADDRESS);
+ memset(&sel, 0, sizeof(sel));
+ sel.type_mask = BIT(NHRP_PEER_TYPE_LOCAL_ADDR);
+ emit_stmt(&f, BPF_LD |BPF_W |BPF_ABS, offsetof(struct iphdr, daddr));
+ nhrp_peer_foreach(drop_matching_address, &f, &sel);
+ emit_stmt(&f, BPF_RET|BPF_K, 68);
+
+ mark(&f, LABEL_DROP);
+ emit_stmt(&f, BPF_RET|BPF_K, 0);
+
+ /* All ok so far? */
+ if (f.numops >= MAX_OPCODES) {
+ nhrp_error("Filter code buffer too small (code actual length %d)",
+ f.numops);
+ return;
+ }
+
+ /* Fixup jumps to be relative */
+ for (i = 0; i < f.numops; i++) {
+ if (BPF_CLASS(f.code[i].code) == BPF_JMP) {
+ if (f.code[i].jt > LABEL_SKIPN)
+ f.code[i].jt = f.pos[f.code[i].jt] - i - 1;
+ if (f.code[i].jf > LABEL_SKIPN)
+ f.code[i].jf = f.pos[f.code[i].jf] - i - 1;
+ }
+ }
+
+ /* Attach filter */
+ prog.len = f.numops;
+ prog.filter = f.code;
+ if (setsockopt(packet_io.fd, SOL_SOCKET, SO_ATTACH_FILTER,
+ &prog, sizeof(prog)))
+ return;
+
+ nhrp_info("Filter code installed (%d opcodes)", f.numops);
+}
+
+int forward_local_addresses_changed(void)
+{
+ if (install_filter_timer.cb != NULL)
+ ev_timer_start(&install_filter_timer);
+ return TRUE;
+}
+
+static void send_multicast(struct ev_idle *w, int revents)
+{
+ struct multicast_packet *pkt;
+ struct nhrp_peer *peer;
+ struct iovec iov;
+ struct msghdr msg;
+
+ if (mcast_head == mcast_tail) {
+ ev_idle_stop(&mcast_route);
+ return;
+ }
+
+ /* Pop a packet */
+ pkt = &mcast_queue[mcast_tail];
+ mcast_tail = (mcast_tail + 1) % ARRAY_SIZE(mcast_queue);
+
+ /* And softroute it forward */
+ iov.iov_base = pkt->pdu;
+ iov.iov_len = pkt->pdulen;
+ msg = (struct msghdr) {
+ .msg_name = &pkt->lladdr,
+ .msg_namelen = sizeof(pkt->lladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+
+ list_for_each_entry(peer, &pkt->iface->mcast_list, mcast_list_entry) {
+ /* Update NBMA destination */
+ pkt->lladdr.sll_halen = peer->next_hop_address.addr_len;
+ memcpy(pkt->lladdr.sll_addr, peer->next_hop_address.addr,
+ pkt->lladdr.sll_halen);
+
+ /* Best effort attempt to emulate multicast */
+ (void) sendmsg(packet_io.fd, &msg, 0);
+ }
+}
+
+static void pfp_read_cb(struct ev_io *w, int revents)
+{
+ struct nhrp_address nbma_src, src, dst;
+ struct nhrp_interface *iface;
+ struct sockaddr_ll *lladdr;
+ struct iovec iov;
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ char fr[32], to[32];
+ int r, fd = w->fd;
+
+ if (!(revents & EV_READ))
+ return;
+
+ while (TRUE) {
+ /* Get a scracth buffer directly from mcast queue, so we do
+ * not need copy the data later. */
+ msg.msg_name = &mcast_queue[mcast_head].lladdr;
+ msg.msg_namelen = sizeof(mcast_queue[mcast_head].lladdr);
+ iov.iov_base = mcast_queue[mcast_head].pdu;
+ iov.iov_len = sizeof(mcast_queue[mcast_head].pdu);
+
+ /* Receive */
+ r = recvmsg(fd, &msg, MSG_DONTWAIT);
+ mcast_queue[mcast_head].pdulen = r;
+
+ /* Process */
+ if (r < 0) {
+ if (errno == EINTR)
+ continue;
+ if (errno == EAGAIN)
+ return;
+ nhrp_perror("PF_PACKET overrun");
+ continue;
+ }
+
+ if (r == 0) {
+ nhrp_error("PF_PACKET returned EOF");
+ return;
+ }
+
+ lladdr = &mcast_queue[mcast_head].lladdr;
+ if (lladdr->sll_pkttype != PACKET_OUTGOING &&
+ lladdr->sll_pkttype != PACKET_HOST)
+ continue;
+
+ iface = nhrp_interface_get_by_index(lladdr->sll_ifindex, FALSE);
+ if (iface == NULL)
+ continue;
+ if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED))
+ continue;
+
+ if (!nhrp_address_parse_packet(lladdr->sll_protocol,
+ r, iov.iov_base,
+ &src, &dst))
+ return;
+
+ if (nhrp_address_is_multicast(&dst) &&
+ lladdr->sll_pkttype == PACKET_OUTGOING) {
+ nhrp_debug("Multicast from %s to %s",
+ nhrp_address_format(&src, sizeof(fr), fr),
+ nhrp_address_format(&dst, sizeof(to), to));
+
+ /* Queue packet for processing later (handle important
+ * stuff first) */
+ mcast_queue[mcast_head].iface = iface;
+ mcast_head = (mcast_head + 1) % ARRAY_SIZE(mcast_queue);
+
+ /* Drop packets from queue tail, if we haven't processed
+ * them yet. */
+ if (mcast_head == mcast_tail)
+ mcast_tail = (mcast_tail + 1) %
+ ARRAY_SIZE(mcast_queue);
+
+ ev_idle_start(&mcast_route);
+ } else if (lladdr->sll_pkttype == PACKET_HOST) {
+ nhrp_address_set(&nbma_src, PF_INET,
+ lladdr->sll_halen,
+ lladdr->sll_addr);
+ nhrp_packet_send_traffic(iface,
+ &nbma_src, &src, &dst,
+ lladdr->sll_protocol,
+ iov.iov_base, r);
+ }
+ }
+}
+
+int forward_init(void)
+{
+ int fd;
+
+ fd = socket(PF_PACKET, SOCK_DGRAM, ntohs(ETH_P_ALL));
+ if (fd < 0) {
+ nhrp_error("Unable to create PF_PACKET socket");
+ return FALSE;
+ }
+
+ fcntl(fd, F_SETFD, FD_CLOEXEC);
+
+ ev_io_init(&packet_io, pfp_read_cb, fd, EV_READ);
+ ev_io_start(&packet_io);
+
+ ev_timer_init(&install_filter_timer, install_filter_cb, .01, .0);
+ install_filter_cb(&install_filter_timer, 0);
+
+ ev_idle_init(&mcast_route, send_multicast);
+ ev_set_priority(&mcast_route, -1);
+
+ return TRUE;
+}
+
+void forward_cleanup(void)
+{
+ ev_io_stop(&packet_io);
+ close(packet_io.fd);
+ ev_timer_stop(&install_filter_timer);
+ ev_idle_stop(&mcast_route);
+}
diff --git a/nhrp/sysdep_syslog.c b/nhrp/sysdep_syslog.c
new file mode 100644
index 0000000..c8f9f7e
--- /dev/null
+++ b/nhrp/sysdep_syslog.c
@@ -0,0 +1,55 @@
+/* sysdep_syslog.c - Logging via syslog
+ *
+ * Copyright (C) 2007 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ *
+ * See http://www.gnu.org/ for details.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <syslog.h>
+#include <stdarg.h>
+
+#include "nhrp_defines.h"
+#include "nhrp_common.h"
+
+int log_init(void)
+{
+ openlog("opennhrp", LOG_PERROR | LOG_PID, LOG_DAEMON);
+
+ return TRUE;
+}
+
+void nhrp_log(int level, const char *format, ...)
+{
+ va_list va;
+ int l;
+
+ switch (level) {
+ case NHRP_LOG_ERROR:
+ l = LOG_ERR;
+ break;
+ case NHRP_LOG_INFO:
+ l = LOG_INFO;
+ break;
+ case NHRP_LOG_DEBUG:
+ default:
+ l = LOG_DEBUG;
+ break;
+ }
+
+ va_start(va, format);
+ vsyslog(l, format, va);
+ va_end(va);
+}
+
+void nhrp_perror(const char *message)
+{
+ nhrp_error("%s: %s", message, strerror(errno));
+}
diff --git a/patches/ipsec-tools-0.7.diff b/patches/ipsec-tools-0.7.diff
new file mode 100644
index 0000000..1efba6c
--- /dev/null
+++ b/patches/ipsec-tools-0.7.diff
@@ -0,0 +1,1832 @@
+Index: ipsec-tools-cvs/src/racoon/pfkey.c
+===================================================================
+--- ipsec-tools-cvs.orig/src/racoon/pfkey.c 2008-01-04 15:17:50.000000000 +0200
++++ ipsec-tools-cvs/src/racoon/pfkey.c 2008-01-04 15:18:21.000000000 +0200
+@@ -92,6 +92,7 @@
+ #include "algorithm.h"
+ #include "sainfo.h"
+ #include "admin.h"
++#include "evt.h"
+ #include "privsep.h"
+ #include "strnames.h"
+ #include "backupsa.h"
+@@ -1263,9 +1264,10 @@
+
+ /* turn off the timer for calling pfkey_timeover() */
+ SCHED_KILL(iph2->sce);
+-
++
+ /* update status */
+ iph2->status = PHASE2ST_ESTABLISHED;
++ evt_phase2(iph2, EVTT_PHASE2_UP, NULL);
+
+ #ifdef ENABLE_STATS
+ gettimeofday(&iph2->end, NULL);
+@@ -1636,7 +1638,6 @@
+ struct ph2handle *iph2[MAXNESTEDSA];
+ struct sockaddr *src, *dst;
+ int n; /* # of phase 2 handler */
+- int remoteid=0;
+ #ifdef HAVE_SECCTX
+ struct sadb_x_sec_ctx *m_sec_ctx;
+ #endif /* HAVE_SECCTX */
+@@ -1825,63 +1826,12 @@
+ return -1;
+ }
+
+- plog(LLV_DEBUG, LOCATION, NULL,
+- "new acquire %s\n", spidx2str(&sp_out->spidx));
+-
+- /* get sainfo */
+- {
+- vchar_t *idsrc, *iddst;
+-
+- idsrc = ipsecdoi_sockaddr2id((struct sockaddr *)&sp_out->spidx.src,
+- sp_out->spidx.prefs, sp_out->spidx.ul_proto);
+- if (idsrc == NULL) {
+- plog(LLV_ERROR, LOCATION, NULL,
+- "failed to get ID for %s\n",
+- spidx2str(&sp_out->spidx));
+- delph2(iph2[n]);
+- return -1;
+- }
+- iddst = ipsecdoi_sockaddr2id((struct sockaddr *)&sp_out->spidx.dst,
+- sp_out->spidx.prefd, sp_out->spidx.ul_proto);
+- if (iddst == NULL) {
+- plog(LLV_ERROR, LOCATION, NULL,
+- "failed to get ID for %s\n",
+- spidx2str(&sp_out->spidx));
+- vfree(idsrc);
+- delph2(iph2[n]);
+- return -1;
+- }
+- {
+- struct remoteconf *conf;
+- conf = getrmconf(iph2[n]->dst);
+- if (conf != NULL)
+- remoteid=conf->ph1id;
+- else{
+- plog(LLV_DEBUG, LOCATION, NULL, "Warning: no valid rmconf !\n");
+- remoteid=0;
+- }
+- }
+- iph2[n]->sainfo = getsainfo(idsrc, iddst, NULL, remoteid);
+- vfree(idsrc);
+- vfree(iddst);
+- if (iph2[n]->sainfo == NULL) {
+- plog(LLV_ERROR, LOCATION, NULL,
+- "failed to get sainfo.\n");
++ if (isakmp_get_sainfo(iph2[n], sp_out, sp_in) < 0) {
+ delph2(iph2[n]);
+ return -1;
+- /* XXX should use the algorithm list from register message */
+ }
+
+- plog(LLV_DEBUG, LOCATION, NULL,
+- "selected sainfo: %s\n", sainfo2str(iph2[n]->sainfo));
+- }
+
+- if (set_proposal_from_policy(iph2[n], sp_out, sp_in) < 0) {
+- plog(LLV_ERROR, LOCATION, NULL,
+- "failed to create saprop.\n");
+- delph2(iph2[n]);
+- return -1;
+- }
+ #ifdef HAVE_SECCTX
+ if (m_sec_ctx) {
+ set_secctx_in_proposal(iph2[n], spidx);
+@@ -2814,7 +2764,7 @@
+ struct sadb_msg buf, *newmsg;
+ int reallen;
+ int retry = 0;
+-
++
+ *lenp = -1;
+ do
+ {
+@@ -2823,12 +2773,10 @@
+ retry++;
+ }
+ while (*lenp < 0 && errno == EAGAIN && retry < 3);
++
+ if (*lenp < 0)
+- {
+- if ( errno == EAGAIN ) *lenp = 0; /* non-fatal */
+- return NULL; /*fatal*/
+- }
+-
++ return NULL; /*fatal*/
++
+ else if (*lenp < sizeof(buf))
+ return NULL;
+
+Index: ipsec-tools-cvs/src/racoon/evt.h
+===================================================================
+--- ipsec-tools-cvs.orig/src/racoon/evt.h 2008-01-04 15:17:50.000000000 +0200
++++ ipsec-tools-cvs/src/racoon/evt.h 2008-01-04 15:18:21.000000000 +0200
+@@ -4,6 +4,7 @@
+
+ /*
+ * Copyright (C) 2004 Emmanuel Dreyfus
++ * Copyright (C) 2007 Timo Teras
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+@@ -34,12 +35,27 @@
+ #ifndef _EVT_H
+ #define _EVT_H
+
+-struct evtdump {
+- size_t len;
+- struct sockaddr_storage src;
+- struct sockaddr_storage dst;
+- time_t timestamp;
+- int type;
++#ifdef ENABLE_ADMINPORT
++
++struct evt_listener {
++ LIST_ENTRY(evt_listener) ll_chain;
++ LIST_ENTRY(evt_listener) fd_chain;
++ int fd;
++};
++LIST_HEAD(evt_listener_list, evt_listener);
++#define EVT_LISTENER_LIST(x) struct evt_listener_list x;
++
++struct ph1handle;
++struct ph2handle;
++
++struct evt_common {
++ uint32_t ec_type;
++ time_t ec_timestamp;
++
++ struct sockaddr_storage ec_ph1src;
++ struct sockaddr_storage ec_ph1dst;
++ u_int32_t ec_ph2msgid;
++
+ /*
+ * Optionnal list of struct isakmp_data
+ * for type EVTT_ISAKMP_CFG_DONE
+@@ -47,42 +63,48 @@
+ };
+
+ /* type */
+-#define EVTT_UNSEPC 0
+-#define EVTT_PHASE1_UP 1
+-#define EVTT_PHASE1_DOWN 2
+-#define EVTT_XAUTH_SUCCESS 3
+-#define EVTT_ISAKMP_CFG_DONE 4
+-#define EVTT_PHASE2_UP 5
+-#define EVTT_PHASE2_DOWN 6
+-#define EVTT_DPD_TIMEOUT 7
+-#define EVTT_PEER_NO_RESPONSE 8
+-#define EVTT_PEER_DELETE 9
+-#define EVTT_RACOON_QUIT 10
+-#define EVTT_XAUTH_FAILED 11
+-#define EVTT_OVERFLOW 12 /* Event queue overflowed */
+-#define EVTT_PEERPH1AUTH_FAILED 13
+-#define EVTT_PEERPH1_NOPROP 14 /* NO_PROPOSAL_CHOSEN & friends */
+-#define EVTT_NO_ISAKMP_CFG 15 /* no need to wait for mode_cfg */
+-
+-struct evt {
+- struct evtdump *dump;
+- TAILQ_ENTRY(evt) next;
+-};
++#define EVTT_RACOON_QUIT 0x0001
+
+-TAILQ_HEAD(evtlist, evt);
++#define EVTT_PHASE1_UP 0x0100
++#define EVTT_PHASE1_DOWN 0x0101
++#define EVTT_PHASE1_NO_RESPONSE 0x0102
++#define EVTT_PHASE1_NO_PROPOSAL 0x0103
++#define EVTT_PHASE1_AUTH_FAILED 0x0104
++#define EVTT_PHASE1_DPD_TIMEOUT 0x0105
++#define EVTT_PHASE1_PEER_DELETED 0x0106
++#define EVTT_PHASE1_MODE_CFG 0x0107
++#define EVTT_PHASE1_XAUTH_SUCCESS 0x0108
++#define EVTT_PHASE1_XAUTH_FAILED 0x0109
++
++#define EVTT_PHASE2_NO_PHASE1 0x0200
++#define EVTT_PHASE2_UP 0x0201
++#define EVTT_PHASE2_DOWN 0x0202
++#define EVTT_PHASE2_NO_RESPONSE 0x0203
++
++void evt_generic __P((int type, vchar_t *optdata));
++void evt_phase1 __P((const struct ph1handle *ph1, int type, vchar_t *optdata));
++void evt_phase2 __P((const struct ph2handle *ph2, int type, vchar_t *optdata));
++
++int evt_subscribe __P((struct evt_listener_list *list, int fd));
++void evt_list_init __P((struct evt_listener_list *list));
++void evt_list_cleanup __P((struct evt_listener_list *list));
++int evt_get_fdmask __P((int nfds, fd_set *fdset));
++void evt_handle_fdmask __P((fd_set *fdset));
+
+-#define EVTLIST_MAX 32
++#else
+
+-#ifdef ENABLE_ADMINPORT
+-struct evtdump *evt_pop(void);
+-vchar_t *evt_dump(void);
+-void evt_push(struct sockaddr *, struct sockaddr *, int, vchar_t *);
+-#endif
++#define EVT_LISTENER_LIST(x)
+
+-#ifdef ENABLE_ADMINPORT
+-#define EVT_PUSH(src, dst, type, optdata) evt_push(src, dst, type, optdata);
+-#else
+-#define EVT_PUSH(src, dst, type, optdata) ;
+-#endif
++#define evt_generic(type, optdata) ;
++#define evt_phase1(ph1, type, optdata) ;
++#define evt_phase2(ph2, type, optdata) ;
++
++#define evt_subscribe(eventlist, fd) ;
++#define evt_list_init(eventlist) ;
++#define evt_list_cleanup(eventlist) ;
++#define evt_get_fdmask(nfds, fdset) nfds
++#define evt_handle_fdmask(fdset) ;
++
++#endif /* ENABLE_ADMINPORT */
+
+ #endif /* _EVT_H */
+Index: ipsec-tools-cvs/src/racoon/evt.c
+===================================================================
+--- ipsec-tools-cvs.orig/src/racoon/evt.c 2008-01-04 15:17:50.000000000 +0200
++++ ipsec-tools-cvs/src/racoon/evt.c 2008-01-04 15:18:21.000000000 +0200
+@@ -4,6 +4,7 @@
+
+ /*
+ * Copyright (C) 2004 Emmanuel Dreyfus
++ * Copyright (C) 2007 Timo Teras
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+@@ -46,113 +47,219 @@
+ #include "plog.h"
+ #include "misc.h"
+ #include "admin.h"
++#include "handler.h"
+ #include "gcmalloc.h"
+ #include "evt.h"
+
+ #ifdef ENABLE_ADMINPORT
+-struct evtlist evtlist = TAILQ_HEAD_INITIALIZER(evtlist);
+-int evtlist_len = 0;
+
+-void
+-evt_push(src, dst, type, optdata)
+- struct sockaddr *src;
+- struct sockaddr *dst;
++static EVT_LISTENER_LIST(evt_listeners);
++static EVT_LISTENER_LIST(evt_fds);
++
++struct evtdump {
++ struct admin_com adm;
++ struct evt_common evt;
++};
++
++static struct evtdump *
++evtdump_create(type, optdata)
+ int type;
+ vchar_t *optdata;
+ {
+- struct evtdump *evtdump;
+- struct evt *evt;
++ struct evtdump *e;
+ size_t len;
+
+- /* If admin socket is disabled, silently discard anything */
+- if (adminsock_path == NULL)
+- return;
++ len = sizeof(struct admin_com) + sizeof(struct evt_common);
++ if (optdata != NULL)
++ len += optdata->l;
+
+- /* If we are above the limit, don't record anything */
+- if (evtlist_len > EVTLIST_MAX) {
+- plog(LLV_DEBUG, LOCATION, NULL,
+- "Cannot record event: event queue overflowed\n");
+- return;
++ if ((e = racoon_malloc(len)) == NULL) {
++ plog(LLV_ERROR, LOCATION, NULL, "Cannot allocate event: %s\n",
++ strerror(errno));
++ return NULL;
+ }
+
+- /* If we hit the limit, record an overflow event instead */
+- if (evtlist_len == EVTLIST_MAX) {
+- plog(LLV_ERROR, LOCATION, NULL,
+- "Cannot record event: event queue overflow\n");
+- src = NULL;
+- dst = NULL;
+- type = EVTT_OVERFLOW;
+- optdata = NULL;
++ memset(e, 0, sizeof(struct evtdump));
++ e->adm.ac_len = len;
++ e->adm.ac_cmd = ADMIN_SHOW_EVT;
++ e->adm.ac_errno = 0;
++ e->adm.ac_proto = 0;
++ e->evt.ec_type = type;
++ time(&e->evt.ec_timestamp);
++ if (optdata != NULL)
++ memcpy(e + 1, optdata->v, optdata->l);
++
++ return e;
++}
++
++static void
++evt_unsubscribe(l)
++ struct evt_listener *l;
++{
++ plog(LLV_DEBUG, LOCATION, NULL,
++ "[%d] admin connection released\n", l->fd);
++
++ LIST_REMOVE(l, ll_chain);
++ LIST_REMOVE(l, fd_chain);
++ close(l->fd);
++ racoon_free(l);
++}
++
++static void
++evtdump_broadcast(ll, e)
++ const struct evt_listener_list *ll;
++ struct evtdump *e;
++{
++ struct evt_listener *l, *nl;
++
++ for (l = LIST_FIRST(ll); l != NULL; l = nl) {
++ nl = LIST_NEXT(l, ll_chain);
++
++ if (send(l->fd, e, e->adm.ac_len,
++ MSG_NOSIGNAL | MSG_DONTWAIT) < 0) {
++ plog(LLV_DEBUG, LOCATION, NULL, "Cannot send event to fd: %s\n",
++ strerror(errno));
++ evt_unsubscribe(l);
++ }
+ }
++}
+
+- len = sizeof(*evtdump);
+- if (optdata)
+- len += optdata->l;
++void
++evt_generic(type, optdata)
++ int type;
++ vchar_t *optdata;
++{
++ struct evtdump *e;
+
+- if ((evtdump = racoon_malloc(len)) == NULL) {
+- plog(LLV_ERROR, LOCATION, NULL, "Cannot record event: %s\n",
+- strerror(errno));
++ if ((e = evtdump_create(type, optdata)) == NULL)
+ return;
+- }
+
+- if ((evt = racoon_malloc(sizeof(*evt))) == NULL) {
+- plog(LLV_ERROR, LOCATION, NULL, "Cannot record event: %s\n",
+- strerror(errno));
+- racoon_free(evtdump);
++ evtdump_broadcast(&evt_listeners, e);
++
++ racoon_free(e);
++}
++
++void
++evt_phase1(ph1, type, optdata)
++ const struct ph1handle *ph1;
++ int type;
++ vchar_t *optdata;
++{
++ struct evtdump *e;
++
++ if ((e = evtdump_create(type, optdata)) == NULL)
+ return;
++
++ if (ph1->local)
++ memcpy(&e->evt.ec_ph1src, ph1->local, sysdep_sa_len(ph1->local));
++ if (ph1->remote)
++ memcpy(&e->evt.ec_ph1dst, ph1->remote, sysdep_sa_len(ph1->remote));
++
++ evtdump_broadcast(&ph1->evt_listeners, e);
++ evtdump_broadcast(&evt_listeners, e);
++
++ racoon_free(e);
++}
++
++void
++evt_phase2(ph2, type, optdata)
++ const struct ph2handle *ph2;
++ int type;
++ vchar_t *optdata;
++{
++ struct evtdump *e;
++ struct ph1handle *ph1 = ph2->ph1;
++
++ if ((e = evtdump_create(type, optdata)) == NULL)
++ return;
++
++ if (ph1) {
++ if (ph1->local)
++ memcpy(&e->evt.ec_ph1src, ph1->local, sysdep_sa_len(ph1->local));
++ if (ph1->remote)
++ memcpy(&e->evt.ec_ph1dst, ph1->remote, sysdep_sa_len(ph1->remote));
++ }
++ e->evt.ec_ph2msgid = ph2->msgid;
++
++ evtdump_broadcast(&ph2->evt_listeners, e);
++ if (ph1)
++ evtdump_broadcast(&ph1->evt_listeners, e);
++ evtdump_broadcast(&evt_listeners, e);
++
++ racoon_free(e);
++}
++
++int
++evt_subscribe(list, fd)
++ struct evt_listener_list *list;
++ int fd;
++{
++ struct evt_listener *l;
++
++ if ((l = racoon_malloc(sizeof(*l))) == NULL) {
++ plog(LLV_ERROR, LOCATION, NULL,
++ "Cannot allocate event listener: %s\n",
++ strerror(errno));
++ return errno;
+ }
+
+- if (src)
+- memcpy(&evtdump->src, src, sysdep_sa_len(src));
+- if (dst)
+- memcpy(&evtdump->dst, dst, sysdep_sa_len(dst));
+- evtdump->len = len;
+- evtdump->type = type;
+- time(&evtdump->timestamp);
++ if (list == NULL)
++ list = &evt_listeners;
+
+- if (optdata)
+- memcpy(evtdump + 1, optdata->v, optdata->l);
++ LIST_INSERT_HEAD(list, l, ll_chain);
++ LIST_INSERT_HEAD(&evt_fds, l, fd_chain);
++ l->fd = fd;
+
+- evt->dump = evtdump;
+- TAILQ_INSERT_TAIL(&evtlist, evt, next);
++ plog(LLV_DEBUG, LOCATION, NULL,
++ "[%d] admin connection is polling events\n", fd);
+
+- evtlist_len++;
++ return -2;
++}
+
+- return;
++void
++evt_list_init(list)
++ struct evt_listener_list *list;
++{
++ LIST_INIT(list);
+ }
+
+-struct evtdump *
+-evt_pop(void) {
+- struct evtdump *evtdump;
+- struct evt *evt;
++void
++evt_list_cleanup(list)
++ struct evt_listener_list *list;
++{
++ while (!LIST_EMPTY(list))
++ evt_unsubscribe(LIST_FIRST(list));
++}
+
+- if ((evt = TAILQ_FIRST(&evtlist)) == NULL)
+- return NULL;
++int
++evt_get_fdmask(nfds, fdset)
++ int nfds;
++ fd_set *fdset;
++{
++ struct evt_listener *l;
+
+- evtdump = evt->dump;
+- TAILQ_REMOVE(&evtlist, evt, next);
+- racoon_free(evt);
+- evtlist_len--;
+-
+- return evtdump;
+-}
+-
+-vchar_t *
+-evt_dump(void) {
+- struct evtdump *evtdump;
+- vchar_t *buf = NULL;
+-
+- if ((evtdump = evt_pop()) != NULL) {
+- if ((buf = vmalloc(evtdump->len)) == NULL) {
+- plog(LLV_ERROR, LOCATION, NULL,
+- "evt_dump failed: %s\n", strerror(errno));
+- return NULL;
+- }
+- memcpy(buf->v, evtdump, evtdump->len);
+- racoon_free(evtdump);
++ LIST_FOREACH(l, &evt_fds, fd_chain) {
++ FD_SET(l->fd, fdset);
++ if (l->fd + 1 > nfds)
++ nfds = l->fd + 1;
+ }
+
+- return buf;
++ return nfds;
+ }
+
++void
++evt_handle_fdmask(fdset)
++ fd_set *fdset;
++{
++ struct evt_listener *l, *nl;
++
++ for (l = LIST_FIRST(&evt_fds); l != NULL; l = nl) {
++ nl = LIST_NEXT(l, ll_chain);
++
++ if (FD_ISSET(l->fd, fdset))
++ evt_unsubscribe(l);
++ }
++}
++
++
+ #endif /* ENABLE_ADMINPORT */
+Index: ipsec-tools-cvs/src/racoon/handler.c
+===================================================================
+--- ipsec-tools-cvs.orig/src/racoon/handler.c 2008-01-04 15:17:50.000000000 +0200
++++ ipsec-tools-cvs/src/racoon/handler.c 2008-01-04 15:18:21.000000000 +0200
+@@ -267,6 +267,7 @@
+ iph1->dpd_fails = 0;
+ iph1->dpd_r_u = NULL;
+ #endif
++ evt_list_init(&iph1->evt_listeners);
+
+ return iph1;
+ }
+@@ -283,8 +284,7 @@
+
+ /* SA down shell script hook */
+ script_hook(iph1, SCRIPT_PHASE1_DOWN);
+-
+- EVT_PUSH(iph1->local, iph1->remote, EVTT_PHASE1_DOWN, NULL);
++ evt_list_cleanup(&iph1->evt_listeners);
+
+ #ifdef ENABLE_NATT
+ if (iph1->natt_flags & NAT_KA_QUEUED)
+@@ -489,8 +489,8 @@
+
+ LIST_FOREACH(p, &ph2tree, chain) {
+ if (spid == p->spid &&
+- CMPSADDR(src, p->src) == 0 &&
+- CMPSADDR(dst, p->dst) == 0){
++ cmpsaddrwild(src, p->src) == 0 &&
++ cmpsaddrwild(dst, p->dst) == 0){
+ /* Sanity check to detect zombie handlers
+ * XXX Sould be done "somewhere" more interesting,
+ * because we have lots of getph2byxxxx(), but this one
+@@ -576,6 +576,7 @@
+ return NULL;
+
+ iph2->status = PHASE1ST_SPAWN;
++ evt_list_init(&iph2->evt_listeners);
+
+ return iph2;
+ }
+@@ -589,6 +590,8 @@
+ initph2(iph2)
+ struct ph2handle *iph2;
+ {
++ evt_list_cleanup(&iph2->evt_listeners);
++
+ sched_scrub_param(iph2);
+ iph2->sce = NULL;
+ iph2->scr = NULL;
+Index: ipsec-tools-cvs/src/racoon/isakmp_agg.c
+===================================================================
+--- ipsec-tools-cvs.orig/src/racoon/isakmp_agg.c 2008-01-04 15:17:50.000000000 +0200
++++ ipsec-tools-cvs/src/racoon/isakmp_agg.c 2008-01-04 15:18:21.000000000 +0200
+@@ -587,8 +587,7 @@
+ /* message printed inner oakley_validate_auth() */
+ goto end;
+ }
+- EVT_PUSH(iph1->local, iph1->remote,
+- EVTT_PEERPH1AUTH_FAILED, NULL);
++ evt_phase1(iph1, EVTT_PHASE1_AUTH_FAILED, NULL);
+ isakmp_info_send_n1(iph1, ptype, NULL);
+ goto end;
+ }
+@@ -1486,8 +1485,7 @@
+ /* message printed inner oakley_validate_auth() */
+ goto end;
+ }
+- EVT_PUSH(iph1->local, iph1->remote,
+- EVTT_PEERPH1AUTH_FAILED, NULL);
++ evt_phase1(iph1, EVTT_PHASE1_AUTH_FAILED, NULL);
+ isakmp_info_send_n1(iph1, ptype, NULL);
+ goto end;
+ }
+Index: ipsec-tools-cvs/src/racoon/isakmp_base.c
+===================================================================
+--- ipsec-tools-cvs.orig/src/racoon/isakmp_base.c 2008-01-04 15:17:50.000000000 +0200
++++ ipsec-tools-cvs/src/racoon/isakmp_base.c 2008-01-04 15:18:21.000000000 +0200
+@@ -716,8 +716,7 @@
+ /* message printed inner oakley_validate_auth() */
+ goto end;
+ }
+- EVT_PUSH(iph1->local, iph1->remote,
+- EVTT_PEERPH1AUTH_FAILED, NULL);
++ evt_phase1(iph1, EVTT_PHASE1_AUTH_FAILED, NULL);
+ isakmp_info_send_n1(iph1, ptype, NULL);
+ goto end;
+ }
+@@ -1242,8 +1241,7 @@
+ /* message printed inner oakley_validate_auth() */
+ goto end;
+ }
+- EVT_PUSH(iph1->local, iph1->remote,
+- EVTT_PEERPH1AUTH_FAILED, NULL);
++ evt_phase1(iph1, EVTT_PHASE1_AUTH_FAILED, NULL);
+ isakmp_info_send_n1(iph1, ptype, NULL);
+ goto end;
+ }
+Index: ipsec-tools-cvs/src/racoon/isakmp.c
+===================================================================
+--- ipsec-tools-cvs.orig/src/racoon/isakmp.c 2008-01-04 15:17:50.000000000 +0200
++++ ipsec-tools-cvs/src/racoon/isakmp.c 2008-01-04 15:18:21.000000000 +0200
+@@ -88,6 +88,9 @@
+ #include "pfkey.h"
+ #include "crypto_openssl.h"
+ #include "policy.h"
++#include "algorithm.h"
++#include "proposal.h"
++#include "sainfo.h"
+ #include "isakmp_ident.h"
+ #include "isakmp_agg.h"
+ #include "isakmp_base.h"
+@@ -1026,7 +1029,7 @@
+ }
+
+ /* new negotiation of phase 1 for initiator */
+-int
++struct ph1handle *
+ isakmp_ph1begin_i(rmconf, remote, local)
+ struct remoteconf *rmconf;
+ struct sockaddr *remote, *local;
+@@ -1039,7 +1042,7 @@
+ /* get new entry to isakmp status table. */
+ iph1 = newph1();
+ if (iph1 == NULL)
+- return -1;
++ return NULL;
+
+ iph1->status = PHASE1ST_START;
+ iph1->rmconf = rmconf;
+@@ -1055,7 +1058,7 @@
+ if ((iph1->mode_cfg = isakmp_cfg_mkstate()) == NULL) {
+ remph1(iph1);
+ delph1(iph1);
+- return -1;
++ return NULL;
+ }
+ #endif
+ #ifdef ENABLE_FRAG
+@@ -1072,7 +1075,7 @@
+ if (copy_ph1addresses(iph1, rmconf, remote, local) < 0) {
+ remph1(iph1);
+ delph1(iph1);
+- return -1;
++ return NULL;
+ }
+
+ (void)insph1(iph1);
+@@ -1108,7 +1111,7 @@
+ remph1(iph1);
+ delph1(iph1);
+
+- return -1;
++ return NULL;
+ }
+
+ #ifdef ENABLE_STATS
+@@ -1119,7 +1122,7 @@
+ timedelta(&start, &end));
+ #endif
+
+- return 0;
++ return iph1;
+ }
+
+ /* new negotiation of phase 1 for responder */
+@@ -1929,8 +1932,7 @@
+ plog(LLV_ERROR, LOCATION, NULL,
+ "phase1 negotiation failed due to time up. %s\n",
+ isakmp_pindex(&iph1->index, iph1->msgid));
+- EVT_PUSH(iph1->local, iph1->remote,
+- EVTT_PEER_NO_RESPONSE, NULL);
++ evt_phase1(iph1, EVTT_PHASE1_NO_RESPONSE, NULL);
+
+ return -1;
+ }
+@@ -1939,8 +1941,7 @@
+ plog(LLV_ERROR, LOCATION, NULL,
+ "phase1 negotiation failed due to send error. %s\n",
+ isakmp_pindex(&iph1->index, iph1->msgid));
+- EVT_PUSH(iph1->local, iph1->remote,
+- EVTT_PEER_NO_RESPONSE, NULL);
++ evt_phase1(iph1, EVTT_PHASE1_NO_RESPONSE, NULL);
+ return -1;
+ }
+
+@@ -1989,7 +1990,7 @@
+ plog(LLV_ERROR, LOCATION, NULL,
+ "phase2 negotiation failed due to time up. %s\n",
+ isakmp_pindex(&iph2->ph1->index, iph2->msgid));
+- EVT_PUSH(iph2->src, iph2->dst, EVTT_PEER_NO_RESPONSE, NULL);
++ evt_phase2(iph2, EVTT_PHASE2_NO_RESPONSE, NULL);
+ unbindph12(iph2);
+ return -1;
+ }
+@@ -1998,8 +1999,7 @@
+ plog(LLV_ERROR, LOCATION, NULL,
+ "phase2 negotiation failed due to send error. %s\n",
+ isakmp_pindex(&iph2->ph1->index, iph2->msgid));
+- EVT_PUSH(iph2->src, iph2->dst, EVTT_PEER_NO_RESPONSE, NULL);
+-
++ evt_phase2(iph2, EVTT_PHASE2_NO_RESPONSE, NULL);
+ return -1;
+ }
+
+@@ -2090,7 +2090,7 @@
+ plog(LLV_INFO, LOCATION, NULL,
+ "ISAKMP-SA deleted %s-%s spi:%s\n",
+ src, dst, isakmp_pindex(&iph1->index, 0));
+- EVT_PUSH(iph1->local, iph1->remote, EVTT_PHASE1_DOWN, NULL);
++ evt_phase1(iph1, EVTT_PHASE1_DOWN, NULL);
+ racoon_free(src);
+ racoon_free(dst);
+
+@@ -2237,7 +2237,7 @@
+ saddrwop2str(iph2->dst));
+
+ /* start phase 1 negotiation as a initiator. */
+- if (isakmp_ph1begin_i(rmconf, iph2->dst, iph2->src) < 0) {
++ if (isakmp_ph1begin_i(rmconf, iph2->dst, iph2->src) == NULL) {
+ SCHED_KILL(sc);
+ return -1;
+ }
+@@ -2270,6 +2270,71 @@
+ return 0;
+ }
+
++int
++isakmp_get_sainfo(iph2, sp_out, sp_in)
++ struct ph2handle *iph2;
++ struct secpolicy *sp_out, *sp_in;
++{
++ int remoteid=0;
++
++ plog(LLV_DEBUG, LOCATION, NULL,
++ "new acquire %s\n", spidx2str(&sp_out->spidx));
++
++ /* get sainfo */
++ {
++ vchar_t *idsrc, *iddst;
++
++ idsrc = ipsecdoi_sockaddr2id((struct sockaddr *)&sp_out->spidx.src,
++ sp_out->spidx.prefs, sp_out->spidx.ul_proto);
++ if (idsrc == NULL) {
++ plog(LLV_ERROR, LOCATION, NULL,
++ "failed to get ID for %s\n",
++ spidx2str(&sp_out->spidx));
++ return -1;
++ }
++ iddst = ipsecdoi_sockaddr2id((struct sockaddr *)&sp_out->spidx.dst,
++ sp_out->spidx.prefd, sp_out->spidx.ul_proto);
++ if (iddst == NULL) {
++ plog(LLV_ERROR, LOCATION, NULL,
++ "failed to get ID for %s\n",
++ spidx2str(&sp_out->spidx));
++ vfree(idsrc);
++ return -1;
++ }
++ {
++ struct remoteconf *conf;
++ conf = getrmconf(iph2->dst);
++ if (conf != NULL)
++ remoteid=conf->ph1id;
++ else{
++ plog(LLV_DEBUG, LOCATION, NULL, "Warning: no valid rmconf !\n");
++ remoteid=0;
++ }
++ }
++ iph2->sainfo = getsainfo(idsrc, iddst, NULL, remoteid);
++ vfree(idsrc);
++ vfree(iddst);
++ if (iph2->sainfo == NULL) {
++ plog(LLV_ERROR, LOCATION, NULL,
++ "failed to get sainfo.\n");
++ return -1;
++ /* XXX should use the algorithm list from register message */
++ }
++
++ plog(LLV_DEBUG, LOCATION, NULL,
++ "selected sainfo: %s\n", sainfo2str(iph2->sainfo));
++ }
++
++ if (set_proposal_from_policy(iph2, sp_out, sp_in) < 0) {
++ plog(LLV_ERROR, LOCATION, NULL,
++ "failed to create saprop.\n");
++ return -1;
++ }
++
++ return 0;
++}
++
++
+ /*
+ * receive GETSPI from kernel.
+ */
+@@ -3021,9 +3086,9 @@
+ src, dst,
+ isakmp_pindex(&iph1->index, 0));
+
+- EVT_PUSH(iph1->local, iph1->remote, EVTT_PHASE1_UP, NULL);
++ evt_phase1(iph1, EVTT_PHASE1_UP, NULL);
+ if(!iph1->rmconf->mode_cfg)
+- EVT_PUSH(iph1->local, iph1->remote, EVTT_NO_ISAKMP_CFG, NULL);
++ evt_phase1(iph1, EVTT_PHASE1_MODE_CFG, NULL);
+
+ racoon_free(src);
+ racoon_free(dst);
+Index: ipsec-tools-cvs/src/racoon/isakmp_cfg.c
+===================================================================
+--- ipsec-tools-cvs.orig/src/racoon/isakmp_cfg.c 2008-01-04 15:17:50.000000000 +0200
++++ ipsec-tools-cvs/src/racoon/isakmp_cfg.c 2008-01-04 15:18:21.000000000 +0200
+@@ -473,8 +473,7 @@
+ "Cannot allocate memory: %s\n", strerror(errno));
+ } else {
+ memcpy(buf->v, attrpl + 1, buf->l);
+- EVT_PUSH(iph1->local, iph1->remote,
+- EVTT_ISAKMP_CFG_DONE, buf);
++ evt_phase1(iph1, EVTT_PHASE1_MODE_CFG, buf);
+ vfree(buf);
+ }
+ }
+Index: ipsec-tools-cvs/src/racoon/isakmp_ident.c
+===================================================================
+--- ipsec-tools-cvs.orig/src/racoon/isakmp_ident.c 2008-01-04 15:17:50.000000000 +0200
++++ ipsec-tools-cvs/src/racoon/isakmp_ident.c 2008-01-04 15:18:21.000000000 +0200
+@@ -788,8 +788,7 @@
+ /* msg printed inner oakley_validate_auth() */
+ goto end;
+ }
+- EVT_PUSH(iph1->local, iph1->remote,
+- EVTT_PEERPH1AUTH_FAILED, NULL);
++ evt_phase1(iph1, EVTT_PHASE1_AUTH_FAILED, NULL);
+ isakmp_info_send_n1(iph1, type, NULL);
+ goto end;
+ }
+@@ -1537,8 +1536,7 @@
+ /* msg printed inner oakley_validate_auth() */
+ goto end;
+ }
+- EVT_PUSH(iph1->local, iph1->remote,
+- EVTT_PEERPH1AUTH_FAILED, NULL);
++ evt_phase1(iph1, EVTT_PHASE1_AUTH_FAILED, NULL);
+ isakmp_info_send_n1(iph1, type, NULL);
+ goto end;
+ }
+Index: ipsec-tools-cvs/src/racoon/isakmp_inf.c
+===================================================================
+--- ipsec-tools-cvs.orig/src/racoon/isakmp_inf.c 2008-01-04 15:17:50.000000000 +0200
++++ ipsec-tools-cvs/src/racoon/isakmp_inf.c 2008-01-04 15:18:21.000000000 +0200
+@@ -515,8 +515,7 @@
+ del_ph1=getph1byindex((isakmp_index *)(delete + 1));
+ if(del_ph1 != NULL){
+
+- EVT_PUSH(del_ph1->local, del_ph1->remote,
+- EVTT_PEERPH1_NOPROP, NULL);
++ evt_phase1(iph1, EVTT_PHASE1_PEER_DELETED, NULL);
+ if (del_ph1->scr)
+ SCHED_KILL(del_ph1->scr);
+
+@@ -537,8 +536,6 @@
+ delete->spi_size, delete->proto_id);
+ return 0;
+ }
+- EVT_PUSH(iph1->local, iph1->remote,
+- EVTT_PEER_DELETE, NULL);
+ purge_ipsec_spi(iph1->remote, delete->proto_id,
+ (u_int32_t *)(delete + 1), num_spi);
+ break;
+@@ -1615,7 +1612,7 @@
+ plog(LLV_DEBUG, LOCATION, iph1->remote, "DPD monitoring....\n");
+
+ if (iph1->dpd_fails >= iph1->rmconf->dpd_maxfails) {
+- EVT_PUSH(iph1->local, iph1->remote, EVTT_DPD_TIMEOUT, NULL);
++ evt_phase1(iph1, EVTT_PHASE1_DPD_TIMEOUT, NULL);
+ purge_remote(iph1);
+ plog(LLV_DEBUG, LOCATION, iph1->remote,
+ "DPD: remote seems to be dead\n");
+Index: ipsec-tools-cvs/src/racoon/isakmp_xauth.c
+===================================================================
+--- ipsec-tools-cvs.orig/src/racoon/isakmp_xauth.c 2008-01-04 15:17:50.000000000 +0200
++++ ipsec-tools-cvs/src/racoon/isakmp_xauth.c 2008-01-04 15:18:21.000000000 +0200
+@@ -1570,13 +1570,11 @@
+ plog(LLV_ERROR, LOCATION, NULL,
+ "Xauth authentication failed\n");
+
+- EVT_PUSH(iph1->local, iph1->remote,
+- EVTT_XAUTH_FAILED, NULL);
++ evt_phase1(iph1, EVTT_PHASE1_XAUTH_FAILED, NULL);
+
+ iph1->mode_cfg->flags |= ISAKMP_CFG_DELETE_PH1;
+ } else {
+- EVT_PUSH(iph1->local, iph1->remote,
+- EVTT_XAUTH_SUCCESS, NULL);
++ evt_phase1(iph1, EVTT_PHASE1_XAUTH_SUCCESS, NULL);
+ }
+
+
+Index: ipsec-tools-cvs/src/racoon/session.c
+===================================================================
+--- ipsec-tools-cvs.orig/src/racoon/session.c 2008-01-04 15:17:50.000000000 +0200
++++ ipsec-tools-cvs/src/racoon/session.c 2008-01-04 15:18:21.000000000 +0200
+@@ -192,6 +192,7 @@
+ /* scheduling */
+ timeout = schedular();
+
++ nfds = evt_get_fdmask(nfds, &rfds);
+ error = select(nfds, &rfds, (fd_set *)0, (fd_set *)0, timeout);
+ if (error < 0) {
+ switch (errno) {
+@@ -211,6 +212,7 @@
+ (FD_ISSET(lcconf->sock_admin, &rfds)))
+ admin_handler();
+ #endif
++ evt_handle_fdmask(&rfds);
+
+ for (p = lcconf->myaddrs; p; p = p->next) {
+ if (!p->addr)
+@@ -451,7 +453,7 @@
+ case SIGTERM:
+ plog(LLV_INFO, LOCATION, NULL,
+ "caught signal %d\n", sig);
+- EVT_PUSH(NULL, NULL, EVTT_RACOON_QUIT, NULL);
++ evt_generic(EVTT_RACOON_QUIT, NULL);
+ pfkey_send_flush(lcconf->sock_pfkey,
+ SADB_SATYPE_UNSPEC);
+ #ifdef ENABLE_FASTQUIT
+Index: ipsec-tools-cvs/src/racoon/handler.h
+===================================================================
+--- ipsec-tools-cvs.orig/src/racoon/handler.h 2008-01-04 15:17:50.000000000 +0200
++++ ipsec-tools-cvs/src/racoon/handler.h 2008-01-04 15:18:21.000000000 +0200
+@@ -41,6 +41,7 @@
+
+ #include "isakmp_var.h"
+ #include "oakley.h"
++#include "evt.h"
+
+ /* Phase 1 handler */
+ /*
+@@ -211,7 +212,7 @@
+ #ifdef ENABLE_HYBRID
+ struct isakmp_cfg_state *mode_cfg; /* ISAKMP mode config state */
+ #endif
+-
++ EVT_LISTENER_LIST(evt_listeners);
+ };
+
+ /* Phase 2 handler */
+@@ -320,6 +321,7 @@
+
+ LIST_ENTRY(ph2handle) chain;
+ LIST_ENTRY(ph2handle) ph1bind; /* chain to ph1handle */
++ EVT_LISTENER_LIST(evt_listeners);
+ };
+
+ /*
+Index: ipsec-tools-cvs/src/racoon/isakmp_var.h
+===================================================================
+--- ipsec-tools-cvs.orig/src/racoon/isakmp_var.h 2008-01-04 15:17:50.000000000 +0200
++++ ipsec-tools-cvs/src/racoon/isakmp_var.h 2008-01-04 15:18:21.000000000 +0200
+@@ -35,6 +35,7 @@
+ #define _ISAKMP_VAR_H
+
+ #include "vmbuf.h"
++#include "policy.h"
+
+ #define PORT_ISAKMP 500
+ #define PORT_ISAKMP_NATT 4500
+@@ -62,8 +63,8 @@
+ struct isakmp_pl_nonce; /* XXX */
+
+ extern int isakmp_handler __P((int));
+-extern int isakmp_ph1begin_i __P((struct remoteconf *, struct sockaddr *,
+- struct sockaddr *));
++extern struct ph1handle *isakmp_ph1begin_i __P((struct remoteconf *,
++ struct sockaddr *, struct sockaddr *));
+
+ extern vchar_t *isakmp_parsewoh __P((int, struct isakmp_gen *, int));
+ extern vchar_t *isakmp_parse __P((vchar_t *));
+@@ -87,6 +88,7 @@
+ extern void isakmp_ph2delete_stub __P((void *));
+ extern void isakmp_ph2delete __P((struct ph2handle *));
+
++extern int isakmp_get_sainfo __P((struct ph2handle *, struct secpolicy *, struct secpolicy *));
+ extern int isakmp_post_acquire __P((struct ph2handle *));
+ extern int isakmp_post_getspi __P((struct ph2handle *));
+ extern void isakmp_chkph1there_stub __P((void *));
+Index: ipsec-tools-cvs/src/racoon/racoonctl.c
+===================================================================
+--- ipsec-tools-cvs.orig/src/racoon/racoonctl.c 2008-01-04 15:17:50.000000000 +0200
++++ ipsec-tools-cvs/src/racoon/racoonctl.c 2008-01-04 15:18:21.000000000 +0200
+@@ -4,6 +4,7 @@
+
+ /*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
++ * Copyright (C) 2007 Timo Teras.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+@@ -135,26 +136,24 @@
+ struct evtmsg {
+ int type;
+ char *msg;
+- enum { UNSPEC, ERROR, INFO } level;
+ } evtmsg[] = {
+- { EVTT_PHASE1_UP, "Phase 1 established", INFO },
+- { EVTT_PHASE1_DOWN, "Phase 1 deleted", INFO },
+- { EVTT_XAUTH_SUCCESS, "Xauth exchange passed", INFO },
+- { EVTT_ISAKMP_CFG_DONE, "ISAKMP mode config done", INFO },
+- { EVTT_PHASE2_UP, "Phase 2 established", INFO },
+- { EVTT_PHASE2_DOWN, "Phase 2 deleted", INFO },
+- { EVTT_DPD_TIMEOUT, "Peer not reachable anymore", ERROR },
+- { EVTT_PEER_NO_RESPONSE, "Peer not responding", ERROR },
+- { EVTT_PEER_DELETE, "Peer terminated security association", ERROR },
+- { EVTT_RACOON_QUIT, "Raccon terminated", ERROR },
+- { EVTT_OVERFLOW, "Event queue overflow", ERROR },
+- { EVTT_XAUTH_FAILED, "Xauth exchange failed", ERROR },
+- { EVTT_PEERPH1AUTH_FAILED, "Peer failed phase 1 authentication "
+- "(certificate problem?)", ERROR },
+- { EVTT_PEERPH1_NOPROP, "Peer failed phase 1 initiation "
+- "(proposal problem?)", ERROR },
+- { 0, NULL, UNSPEC },
+- { EVTT_NO_ISAKMP_CFG, "No need for ISAKMP mode config ", INFO },
++ { EVTT_RACOON_QUIT, "Racoon terminated" },
++
++ { EVTT_PHASE1_UP, "Phase 1 established" },
++ { EVTT_PHASE1_DOWN, "Phase 1 deleted" },
++ { EVTT_PHASE1_NO_RESPONSE, "Phase 1 error: peer not responding" },
++ { EVTT_PHASE1_NO_PROPOSAL, "Phase 1 error: no proposal chosen" },
++ { EVTT_PHASE1_AUTH_FAILED,
++ "Phase 1 error: authentication failed (bad certificate?)" },
++ { EVTT_PHASE1_DPD_TIMEOUT, "Phase 1 error: dead peer detected" },
++ { EVTT_PHASE1_MODE_CFG, "Phase 1 mode configuration done" },
++ { EVTT_PHASE1_XAUTH_SUCCESS, "Phase 1 Xauth succeeded" },
++ { EVTT_PHASE1_XAUTH_FAILED, "Phase 1 Xauth failed" },
++
++ { EVTT_PHASE2_NO_PHASE1, "Phase 2 error: no suitable phase 1" },
++ { EVTT_PHASE2_UP, "Phase 2 established" },
++ { EVTT_PHASE2_DOWN, "Phase 2 deleted" },
++ { EVTT_PHASE2_NO_RESPONSE, "Phase 2 error: no response" },
+ };
+
+ static int get_proto __P((char *));
+@@ -184,6 +183,7 @@
+ { IPPROTO_ICMP, "icmp" },
+ { IPPROTO_TCP, "tcp" },
+ { IPPROTO_UDP, "udp" },
++ { IPPROTO_GRE, "gre" },
+ { 0, NULL },
+ };
+
+@@ -193,31 +193,13 @@
+
+ char *pname;
+ int long_format = 0;
+-
+-#define EVTF_NONE 0x0000 /* Ignore any events */
+-#define EVTF_LOOP 0x0001 /* Loop awaiting for new events */
+-#define EVTF_CFG_STOP 0x0002 /* Stop after ISAKMP mode config */
+-#define EVTF_CFG 0x0004 /* Print ISAKMP mode config info */
+-#define EVTF_ALL 0x0008 /* Print any events */
+-#define EVTF_PURGE 0x0010 /* Print all available events */
+-#define EVTF_PH1DOWN_STOP 0x0020 /* Stop when phase 1 SA gets down */
+-#define EVTF_PH1DOWN 0x0040 /* Print that phase 1 SA got down */
+-#define EVTF_ERR 0x0080 /* Print any error */
+-#define EVTF_ERR_STOP 0x0100 /* Stop on any error */
+-
+-int evt_filter = EVTF_NONE;
+-time_t evt_start;
++int evt_quit_event = 0;
+
+ void dump_isakmp_sa __P((char *, int));
+ void dump_internal __P((char *, int));
+ char *pindex_isakmp __P((isakmp_index *));
+ void print_schedule __P((caddr_t, int));
+-void print_evt __P((caddr_t, int));
+-void print_cfg __P((caddr_t, int));
+-void print_err __P((caddr_t, int));
+-void print_ph1down __P((caddr_t, int));
+-void print_ph1up __P((caddr_t, int));
+-int evt_poll __P((void));
++void print_evt __P((struct evt_common *));
+ char * fixed_addr __P((char *, char *, int));
+
+ static void
+@@ -226,12 +208,15 @@
+ printf(
+ "Usage:\n"
+ " %s reload-config\n"
++" %s show-schedule\n"
+ " %s [-l [-l]] show-sa [protocol]\n"
+ " %s flush-sa [protocol]\n"
+ " %s delete-sa <saopts>\n"
+-" %s establish-sa [-u identity] <saopts>\n"
++" %s establish-sa [-u identity] [-w] <saopts>\n"
+ " %s vpn-connect [-u identity] vpn_gateway\n"
+ " %s vpn-disconnect vpn_gateway\n"
++" %s show-event\n"
++" %s logout-user login\n"
+ "\n"
+ " <protocol>: \"isakmp\", \"esp\" or \"ah\".\n"
+ " In the case of \"show-sa\" or \"flush-sa\", you can use \"ipsec\".\n"
+@@ -240,8 +225,8 @@
+ " : {\"esp\",\"ah\"} <family> <src/prefixlen/port> <dst/prefixlen/port>\n"
+ " <ul_proto>\n"
+ " <family>: \"inet\" or \"inet6\"\n"
+-" <ul_proto>: \"icmp\", \"tcp\", \"udp\" or \"any\"\n",
+- pname, pname, pname, pname, pname, pname, pname);
++" <ul_proto>: \"icmp\", \"tcp\", \"udp\", \"gre\" or \"any\"\n",
++ pname, pname, pname, pname, pname, pname, pname, pname, pname, pname);
+ }
+
+ /*
+@@ -312,54 +297,24 @@
+
+ vfree(combuf);
+
+- if (com_recv(&combuf) != 0)
+- goto bad;
+- if (handle_recv(combuf) != 0)
+- goto bad;
+-
+- vfree(combuf);
++ do {
++ if (com_recv(&combuf) != 0)
++ goto bad;
++ if (handle_recv(combuf) != 0)
++ goto bad;
++ vfree(combuf);
++ } while (evt_quit_event != 0);
+
+- if (evt_filter != EVTF_NONE)
+- if (evt_poll() != 0)
+- goto bad;
+-
++ close(so);
+ exit(0);
+
+- bad:
++bad:
++ close(so);
++ if (errno == EEXIST)
++ exit(0);
+ exit(1);
+ }
+
+-int
+-evt_poll(void) {
+- struct timeval tv;
+- vchar_t *recvbuf;
+- vchar_t *sendbuf;
+-
+- if ((sendbuf = f_getevt(0, NULL)) == NULL)
+- errx(1, "Cannot make combuf");
+-
+-
+- while (evt_filter & (EVTF_LOOP|EVTF_PURGE)) {
+- /* handle_recv closes the socket time, so open it each time */
+- com_init();
+-
+- if (com_send(sendbuf) != 0)
+- errx(1, "Cannot send combuf");
+-
+- if (com_recv(&recvbuf) == 0) {
+- handle_recv(recvbuf);
+- vfree(recvbuf);
+- }
+-
+- tv.tv_sec = 0;
+- tv.tv_usec = 10;
+- (void)select(0, NULL, NULL, NULL, &tv);
+- }
+-
+- vfree(sendbuf);
+- return 0;
+-}
+-
+ /* %%% */
+ /*
+ * return command buffer.
+@@ -422,20 +377,8 @@
+ vchar_t *buf;
+ struct admin_com *head;
+
+- /*
+- * There are 3 ways of getting here
+- * 1) racoonctl vc => evt_filter = (EVTF_LOOP|EVTF_CFG| ... )
+- * 2) racoonctl es => evt_filter = EVTF_NONE
+- * 3) racoonctl es -l => evt_filter = EVTF_LOOP
+- * Catch the second case: show-event is here to purge all
+- */
+- if (evt_filter == EVTF_NONE)
+- evt_filter = (EVTF_ALL|EVTF_PURGE);
+-
+- if ((ac >= 1) && (strcmp(av[0], "-l") == 0))
+- evt_filter |= EVTF_LOOP;
+-
+- if (ac >= 2)
++ evt_quit_event = -1;
++ if (ac >= 1)
+ errx(1, "too many arguments");
+
+ buf = vmalloc(sizeof(*head));
+@@ -653,6 +596,7 @@
+ char *id = NULL;
+ char *key = NULL;
+ struct admin_com_psk *acp;
++ int wait = 0;
+
+ if (ac < 1)
+ errx(1, "insufficient arguments");
+@@ -673,6 +617,12 @@
+ ac -= 2;
+ }
+
++ if (ac >= 1 && strcmp(av[0], "-w") == 0) {
++ wait = 1;
++ av++;
++ ac--;
++ }
++
+ /* need protocol */
+ if (ac < 1)
+ errx(1, "insufficient arguments");
+@@ -687,12 +637,16 @@
+ index = get_index(ac, av);
+ if (index == NULL)
+ return NULL;
++ if (wait)
++ evt_quit_event = EVTT_PHASE1_MODE_CFG;
+ break;
+ case ADMIN_PROTO_AH:
+ case ADMIN_PROTO_ESP:
+ index = get_index(ac, av);
+ if (index == NULL)
+ return NULL;
++ if (wait)
++ evt_quit_event = EVTT_PHASE2_UP;
+ break;
+ default:
+ errno = EPROTONOSUPPORT;
+@@ -749,8 +703,7 @@
+ if (ac < 1)
+ errx(1, "insufficient arguments");
+
+- evt_filter = (EVTF_LOOP|EVTF_CFG|EVTF_CFG_STOP|EVTF_ERR|EVTF_ERR_STOP);
+- time(&evt_start);
++ evt_quit_event = EVTT_PHASE1_MODE_CFG;
+
+ /* Optional -u identity */
+ if (strcmp(av[0], "-u") == 0) {
+@@ -814,8 +767,7 @@
+ if (ac > 1)
+ warnx("Extra arguments");
+
+- evt_filter =
+- (EVTF_PH1DOWN|EVTF_PH1DOWN_STOP|EVTF_LOOP|EVTF_ERR|EVTF_ERR_STOP);
++ evt_quit_event = EVTT_PHASE1_DOWN;
+
+ nav[nac++] = isakmp;
+ nav[nac++] = inet;
+@@ -1335,84 +1287,32 @@
+
+
+ void
+-print_evt(buf, len)
+- caddr_t buf;
+- int len;
++print_evt(evtdump)
++ struct evt_common *evtdump;
+ {
+- struct evtdump *evtdump = (struct evtdump *)buf;
+ int i;
+ char *srcstr;
+ char *dststr;
+
+- for (i = 0; evtmsg[i].msg; i++)
+- if (evtmsg[i].type == evtdump->type)
+- break;
+-
+- if (evtmsg[i].msg == NULL)
+- printf("Event %d: ", evtdump->type);
++ for (i = 0; i < sizeof(evtmsg) / sizeof(evtmsg[0]); i++)
++ if (evtmsg[i].type == evtdump->ec_type)
++ break;
++
++ if (evtmsg[i].msg == NULL)
++ printf("Event %d: ", evtdump->ec_type);
+ else
+ printf("%s : ", evtmsg[i].msg);
+
+- if ((srcstr = saddr2str((struct sockaddr *)&evtdump->src)) == NULL)
++ if ((srcstr = saddr2str((struct sockaddr *)&evtdump->ec_ph1src)) == NULL)
+ printf("unknown");
+- else
++ else
+ printf("%s", srcstr);
+ printf(" -> ");
+- if ((dststr = saddr2str((struct sockaddr *)&evtdump->dst)) == NULL)
++ if ((dststr = saddr2str((struct sockaddr *)&evtdump->ec_ph1dst)) == NULL)
+ printf("unknown");
+- else
++ else
+ printf("%s", dststr);
+ printf("\n");
+-
+- return;
+-}
+-
+-void
+-print_err(buf, len)
+- caddr_t buf;
+- int len;
+-{
+- struct evtdump *evtdump = (struct evtdump *)buf;
+- int i;
+-
+-
+- for (i = 0; evtmsg[i].msg; i++)
+- if (evtmsg[i].type == evtdump->type)
+- break;
+-
+- if (evtmsg[i].level != ERROR)
+- return;
+-
+- if (evtmsg[i].msg == NULL)
+- printf("Error: Event %d\n", evtdump->type);
+- else
+- printf("Error: %s\n", evtmsg[i].msg);
+-
+- if (evt_filter & EVTF_ERR_STOP)
+- evt_filter &= ~EVTF_LOOP;
+-
+- return;
+-}
+-
+-/*
+- * Print a message when phase 1 SA goes down
+- */
+-void
+-print_ph1down(buf, len)
+- caddr_t buf;
+- int len;
+-{
+- struct evtdump *evtdump = (struct evtdump *)buf;
+-
+- if (evtdump->type != EVTT_PHASE1_DOWN)
+- return;
+-
+- printf("VPN connexion terminated\n");
+-
+- if (evt_filter & EVTF_PH1DOWN_STOP)
+- evt_filter &= ~EVTF_LOOP;
+-
+- return;
+ }
+
+ /*
+@@ -1423,15 +1323,14 @@
+ caddr_t buf;
+ int len;
+ {
+- struct evtdump *evtdump = (struct evtdump *)buf;
++ struct evt_common *evtdump = (struct evt_common *)buf;
+ struct isakmp_data *attr;
+ char *banner = NULL;
+ struct in_addr addr4;
+
+ memset(&addr4, 0, sizeof(addr4));
+
+- if (evtdump->type != EVTT_ISAKMP_CFG_DONE &&
+- evtdump->type != EVTT_NO_ISAKMP_CFG)
++ if (evtdump->ec_type != EVTT_PHASE1_MODE_CFG)
+ return;
+
+ len -= sizeof(*evtdump);
+@@ -1484,12 +1383,12 @@
+ (n + sizeof(*attr) + ntohs(attr->lorv));
+ }
+ }
+-
+- if (evtdump->type == EVTT_ISAKMP_CFG_DONE)
++
++ if (len > 0)
+ printf("Bound to address %s\n", inet_ntoa(addr4));
+ else
+ printf("VPN connexion established\n");
+-
++
+ if (banner) {
+ struct winsize win;
+ int col = 0;
+@@ -1506,13 +1405,8 @@
+ printf("\n");
+ racoon_free(banner);
+ }
+-
+- if (evt_filter & EVTF_CFG_STOP)
+- evt_filter &= ~EVTF_LOOP;
+-
+- return;
+ }
+-
++
+
+ char *
+ fixed_addr(addr, port, len)
+@@ -1561,32 +1455,29 @@
+ break;
+
+ case ADMIN_SHOW_EVT: {
+- struct evtdump *evtdump;
++ struct evt_common *ec;
+
+- /* We got no event */
+- if (len == 0) {
+- /* If we were purging the queue, it is now done */
+- if (evt_filter & EVTF_PURGE)
+- evt_filter &= ~EVTF_PURGE;
++ /* We got no event? */
++ if (len == 0)
+ break;
+- }
+-
+- if (len < sizeof(struct evtdump))
+- errx(1, "Short buffer\n");
+
+- /* Toss outdated events */
+- evtdump = (struct evtdump *)buf;
+- if (evtdump->timestamp < evt_start)
+- break;
++ if (len < sizeof(struct evt_common))
++ errx(1, "Short buffer\n");
+
+- if (evt_filter & EVTF_ALL)
+- print_evt(buf, len);
+- if (evt_filter & EVTF_ERR)
+- print_err(buf, len);
+- if (evt_filter & EVTF_CFG)
+- print_cfg(buf, len);
+- if (evt_filter & EVTF_PH1DOWN)
+- print_ph1down(buf, len);
++ ec = (struct evt_common *) buf;
++ if (evt_quit_event <= 0)
++ print_evt(ec);
++ else if (evt_quit_event == ec->ec_type) {
++ switch (ec->ec_type) {
++ case EVTT_PHASE1_MODE_CFG:
++ print_cfg(ec, len);
++ break;
++ default:
++ print_evt(ec);
++ break;
++ };
++ evt_quit_event = 0;
++ }
+ break;
+ }
+
+@@ -1643,10 +1534,8 @@
+ break;
+ }
+
+- close(so);
+ return 0;
+
+- bad:
+- close(so);
++bad:
+ return -1;
+ }
+Index: ipsec-tools-cvs/src/racoon/admin.c
+===================================================================
+--- ipsec-tools-cvs.orig/src/racoon/admin.c 2008-01-04 15:17:50.000000000 +0200
++++ ipsec-tools-cvs/src/racoon/admin.c 2008-01-04 15:18:21.000000000 +0200
+@@ -76,6 +76,7 @@
+ #include "evt.h"
+ #include "pfkey.h"
+ #include "ipsec_doi.h"
++#include "policy.h"
+ #include "admin.h"
+ #include "admin_var.h"
+ #include "isakmp_inf.h"
+@@ -147,16 +148,18 @@
+ goto end;
+ }
+
+- if (com.ac_cmd == ADMIN_RELOAD_CONF) {
+- /* reload does not work at all! */
+- signal_handler(SIGHUP);
+- goto end;
+- }
++ plog(LLV_DEBUG, LOCATION, NULL,
++ "[%d] admin connection established\n", so2);
+
+ error = admin_process(so2, combuf);
+
+- end:
+- (void)close(so2);
++end:
++ if (error != -2) {
++ plog(LLV_DEBUG, LOCATION, NULL,
++ "[%d] admin connection closed\n", so2);
++ (void)close(so2);
++ }
++
+ if (combuf)
+ racoon_free(combuf);
+
+@@ -177,13 +180,15 @@
+ vchar_t *key = NULL;
+ int idtype = 0;
+ int error = -1;
++ int send_events = 0;
++ struct evt_listener_list *event_list = NULL;
+
+ com->ac_errno = 0;
+
+ switch (com->ac_cmd) {
+ case ADMIN_RELOAD_CONF:
+- /* don't entered because of proccessing it in other place. */
+- plog(LLV_ERROR, LOCATION, NULL, "should never reach here\n");
++ signal_handler(SIGHUP);
++ error = 0;
+ goto out;
+
+ case ADMIN_SHOW_SCHED:
+@@ -208,9 +213,7 @@
+ }
+
+ case ADMIN_SHOW_EVT:
+- /* It's not really an error, don't force racoonctl to quit */
+- if ((buf = evt_dump()) == NULL)
+- com->ac_errno = 0;
++ send_events = 1;
+ break;
+
+ case ADMIN_SHOW_SA:
+@@ -391,17 +394,17 @@
+ /* FALLTHROUGH */
+ case ADMIN_ESTABLISH_SA:
+ {
++ struct admin_com_indexes *ndx;
+ struct sockaddr *dst;
+ struct sockaddr *src;
+- src = (struct sockaddr *)
+- &((struct admin_com_indexes *)
+- ((caddr_t)com + sizeof(*com)))->src;
+- dst = (struct sockaddr *)
+- &((struct admin_com_indexes *)
+- ((caddr_t)com + sizeof(*com)))->dst;
++
++ ndx = (struct admin_com_indexes *) ((caddr_t)com + sizeof(*com));
++ src = (struct sockaddr *) &ndx->src;
++ dst = (struct sockaddr *) &ndx->dst;
+
+ switch (com->ac_proto) {
+ case ADMIN_PROTO_ISAKMP: {
++ struct ph1handle *ph1;
+ struct remoteconf *rmconf;
+ struct sockaddr *remote = NULL;
+ struct sockaddr *local = NULL;
+@@ -409,6 +412,17 @@
+
+ com->ac_errno = -1;
+
++ /* connected already? */
++ ph1 = getph1byaddrwop(src, dst);
++ if (ph1 != NULL) {
++ event_list = &ph1->evt_listeners;
++ if (ph1->status == PHASE1ST_ESTABLISHED)
++ com->ac_errno = EEXIST;
++ else
++ com->ac_errno = 0;
++ break;
++ }
++
+ /* search appropreate configuration */
+ rmconf = getrmconf(dst);
+ if (rmconf == NULL) {
+@@ -459,9 +473,11 @@
+ "%s\n", saddrwop2str(remote));
+
+ /* begin ident mode */
+- if (isakmp_ph1begin_i(rmconf, remote, local) < 0)
++ ph1 = isakmp_ph1begin_i(rmconf, remote, local);
++ if (ph1 == NULL)
+ goto out1;
+
++ event_list = &ph1->evt_listeners;
+ com->ac_errno = 0;
+ out1:
+ if (local != NULL)
+@@ -471,8 +487,105 @@
+ break;
+ }
+ case ADMIN_PROTO_AH:
+- case ADMIN_PROTO_ESP:
++ case ADMIN_PROTO_ESP: {
++ struct ph2handle *iph2;
++ struct secpolicy *sp_out = NULL, *sp_in = NULL;
++ struct policyindex spidx;
++
++ com->ac_errno = -1;
++
++ /* got outbound policy */
++ memset(&spidx, 0, sizeof(spidx));
++ spidx.dir = IPSEC_DIR_OUTBOUND;
++ memcpy(&spidx.src, src, sizeof(spidx.src));
++ memcpy(&spidx.dst, dst, sizeof(spidx.dst));
++ spidx.prefs = ndx->prefs;
++ spidx.prefd = ndx->prefd;
++ spidx.ul_proto = ndx->ul_proto;
++
++ sp_out = getsp_r(&spidx);
++ if (sp_out) {
++ plog(LLV_DEBUG, LOCATION, NULL,
++ "suitable outbound SP found: %s.\n",
++ spidx2str(&sp_out->spidx));
++ } else {
++ com->ac_errno = ENOENT;
++ plog(LLV_NOTIFY, LOCATION, NULL,
++ "no outbound policy found: %s\n",
++ spidx2str(&spidx));
++ break;
++ }
++
++ iph2 = getph2byid(src, dst, sp_out->id);
++ if (iph2 != NULL) {
++ event_list = &iph2->evt_listeners;
++ if (iph2->status == PHASE2ST_ESTABLISHED)
++ com->ac_errno = EEXIST;
++ else
++ com->ac_errno = 0;
++ break;
++ }
++
++ /* get inbound policy */
++ memset(&spidx, 0, sizeof(spidx));
++ spidx.dir = IPSEC_DIR_INBOUND;
++ memcpy(&spidx.src, dst, sizeof(spidx.src));
++ memcpy(&spidx.dst, src, sizeof(spidx.dst));
++ spidx.prefs = ndx->prefd;
++ spidx.prefd = ndx->prefs;
++ spidx.ul_proto = ndx->ul_proto;
++
++ sp_in = getsp_r(&spidx);
++ if (sp_in) {
++ plog(LLV_DEBUG, LOCATION, NULL,
++ "suitable inbound SP found: %s.\n",
++ spidx2str(&sp_in->spidx));
++ } else {
++ com->ac_errno = ENOENT;
++ plog(LLV_NOTIFY, LOCATION, NULL,
++ "no inbound policy found: %s\n",
++ spidx2str(&spidx));
++ break;
++ }
++
++ /* allocate a phase 2 */
++ iph2 = newph2();
++ if (iph2 == NULL) {
++ plog(LLV_ERROR, LOCATION, NULL,
++ "failed to allocate phase2 entry.\n");
++ break;
++ }
++ iph2->side = INITIATOR;
++ iph2->satype = admin2pfkey_proto(com->ac_proto);
++ iph2->spid = sp_out->id;
++ iph2->seq = pk_getseq();
++ iph2->status = PHASE2ST_STATUS2;
++
++ /* set end addresses of SA */
++ iph2->dst = dupsaddr(dst);
++ iph2->src = dupsaddr(src);
++ if (iph2->dst == NULL || iph2->src == NULL) {
++ delph2(iph2);
++ break;
++ }
++
++ if (isakmp_get_sainfo(iph2, sp_out, sp_in) < 0) {
++ delph2(iph2);
++ break;
++ }
++
++ insph2(iph2);
++ if (isakmp_post_acquire(iph2) < 0) {
++ unbindph12(iph2);
++ remph2(iph2);
++ delph2(iph2);
++ break;
++ }
++
++ event_list = &iph2->evt_listeners;
++ com->ac_errno = 0;
+ break;
++ }
+ default:
+ /* ignore */
+ com->ac_errno = -1;
+@@ -489,7 +602,8 @@
+ if ((error = admin_reply(so2, com, buf)) != 0)
+ goto out;
+
+- error = 0;
++ if (send_events || event_list != NULL)
++ error = evt_subscribe(event_list, so2);
+ out:
+ if (buf != NULL)
+ vfree(buf);
+Index: ipsec-tools-cvs/src/racoon/racoonctl.8
+===================================================================
+--- ipsec-tools-cvs.orig/src/racoon/racoonctl.8 2008-01-04 15:17:50.000000000 +0200
++++ ipsec-tools-cvs/src/racoon/racoonctl.8 2008-01-04 15:18:21.000000000 +0200
+@@ -55,17 +55,17 @@
+ .Nm
+ establish-sa
+ .Op Fl u Ar identity
++.Op Fl w
+ .Ar saopts
+ .Nm
+ vpn-connect
+-.Op Fl u identity
++.Op Fl u Ar identity
+ .Ar vpn_gateway
+ .Nm
+ vpn-disconnect
+ .Ar vpn_gateway
+ .Nm
+ show-event
+-.Op Fl l
+ .Nm
+ logout-user
+ .Ar login
+@@ -104,6 +104,8 @@
+ either ISAKMP SAs, IPsec ESP SAs, IPsec AH SAs, or all IPsec SAs.
+ .It Xo establish-sa
+ .Oo Fl u Ar username
++.Oc
++.Oo Fl w
+ .Oc Ar saopts
+ .Xc
+ Establish an SA, either an ISAKMP SA, IPsec ESP SA, or IPsec AH SA.
+@@ -115,6 +117,11 @@
+ .Ar username
+ and these credentials will be used in the Xauth exchange.
+ .Pp
++Specifying
++.Fl w
++will make racoonctl wait until the SA is actually established or
++an error occurs.
++.Pp
+ .Ar saopts
+ has the following format:
+ .Bl -tag -width Bl
+@@ -135,16 +142,9 @@
+ This is a particular case of the previous command.
+ It will kill all SAs associated with
+ .Ar vpn_gateway .
+-.It show-event Op Fl l
+-Dump all events reported by
+-.Xr racoon 8 ,
+-then quit.
+-The
+-.Fl l
+-flag causes
+-.Nm
+-to not stop once all the events have been read, but rather to loop
+-awaiting and reporting new events.
++.It show-event
++Listen for all events reported by
++.Xr racoon 8 .
+ .It logout-user Ar login
+ Delete all SA established on behalf of the Xauth user
+ .Ar login .
diff --git a/patches/linux-2.6.19-ipgre.diff b/patches/linux-2.6.19-ipgre.diff
new file mode 100644
index 0000000..655b175
--- /dev/null
+++ b/patches/linux-2.6.19-ipgre.diff
@@ -0,0 +1,44 @@
+Index: linux-2.6.19/net/ipv4/ip_gre.c
+===================================================================
+--- linux-2.6.19.orig/net/ipv4/ip_gre.c 2006-11-29 23:57:37.000000000 +0200
++++ linux-2.6.19/net/ipv4/ip_gre.c 2008-01-31 08:50:21.000000000 +0200
+@@ -1033,7 +1033,13 @@
+ return 0;
+ }
+
+-#ifdef CONFIG_NET_IPGRE_BROADCAST
++static int ipgre_tunnel_parse_header(struct sk_buff *skb, unsigned char *haddr)
++{
++ struct iphdr *iph = (struct iphdr*) skb->mac.raw;
++ memcpy(haddr, &iph->saddr, 4);
++ return 4;
++}
++
+ /* Nice toy. Unfortunately, useless in real life :-)
+ It allows to construct virtual multiprotocol broadcast "LAN"
+ over the Internet, provided multicast routing is tuned.
+@@ -1091,6 +1097,7 @@
+ return -t->hlen;
+ }
+
++#ifdef CONFIG_NET_IPGRE_BROADCAST
+ static int ipgre_open(struct net_device *dev)
+ {
+ struct ip_tunnel *t = netdev_priv(dev);
+@@ -1139,6 +1146,7 @@
+ dev->get_stats = ipgre_tunnel_get_stats;
+ dev->do_ioctl = ipgre_tunnel_ioctl;
+ dev->change_mtu = ipgre_tunnel_change_mtu;
++ dev->hard_header_parse = ipgre_tunnel_parse_header;
+
+ dev->type = ARPHRD_IPGRE;
+ dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
+@@ -1193,6 +1201,8 @@
+ dev->stop = ipgre_close;
+ }
+ #endif
++ } else {
++ dev->hard_header = ipgre_header;
+ }
+
+ if (!tdev && tunnel->parms.link)
diff --git a/patches/linux-2.6.20-ipgre.diff b/patches/linux-2.6.20-ipgre.diff
new file mode 100644
index 0000000..a78ed17
--- /dev/null
+++ b/patches/linux-2.6.20-ipgre.diff
@@ -0,0 +1,44 @@
+Index: linux-2.6.20/net/ipv4/ip_gre.c
+===================================================================
+--- linux-2.6.20.orig/net/ipv4/ip_gre.c 2008-01-04 15:05:34.000000000 +0200
++++ linux-2.6.20/net/ipv4/ip_gre.c 2008-01-04 15:05:37.000000000 +0200
+@@ -1033,7 +1033,13 @@
+ return 0;
+ }
+
+-#ifdef CONFIG_NET_IPGRE_BROADCAST
++static int ipgre_tunnel_parse_header(struct sk_buff *skb, unsigned char *haddr)
++{
++ struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
++ memcpy(haddr, &iph->saddr, 4);
++ return 4;
++}
++
+ /* Nice toy. Unfortunately, useless in real life :-)
+ It allows to construct virtual multiprotocol broadcast "LAN"
+ over the Internet, provided multicast routing is tuned.
+@@ -1091,6 +1097,7 @@
+ return -t->hlen;
+ }
+
++#ifdef CONFIG_NET_IPGRE_BROADCAST
+ static int ipgre_open(struct net_device *dev)
+ {
+ struct ip_tunnel *t = netdev_priv(dev);
+@@ -1139,6 +1146,7 @@
+ dev->get_stats = ipgre_tunnel_get_stats;
+ dev->do_ioctl = ipgre_tunnel_ioctl;
+ dev->change_mtu = ipgre_tunnel_change_mtu;
++ dev->hard_header_parse = ipgre_tunnel_parse_header;
+
+ dev->type = ARPHRD_IPGRE;
+ dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
+@@ -1193,6 +1201,8 @@
+ dev->stop = ipgre_close;
+ }
+ #endif
++ } else {
++ dev->hard_header = ipgre_header;
+ }
+
+ if (!tdev && tunnel->parms.link)
diff --git a/patches/linux-2.6.22-ipgre.diff b/patches/linux-2.6.22-ipgre.diff
new file mode 100644
index 0000000..59d4292
--- /dev/null
+++ b/patches/linux-2.6.22-ipgre.diff
@@ -0,0 +1,53 @@
+Index: linux-2.6.20/net/ipv4/ip_gre.c
+===================================================================
+--- linux-2.6.20.orig/net/ipv4/ip_gre.c 2008-01-04 15:06:32.000000000 +0200
++++ linux-2.6.20/net/ipv4/ip_gre.c 2008-01-04 15:08:50.000000000 +0200
+@@ -613,7 +613,7 @@
+ offset += 4;
+ }
+
+- skb_reset_mac_header(skb);
++ skb->mac_header = skb->network_header;
+ __pskb_pull(skb, offset);
+ skb_reset_network_header(skb);
+ skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
+@@ -1032,7 +1032,13 @@
+ return 0;
+ }
+
+-#ifdef CONFIG_NET_IPGRE_BROADCAST
++static int ipgre_tunnel_parse_header(struct sk_buff *skb, unsigned char *haddr)
++{
++ struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
++ memcpy(haddr, &iph->saddr, 4);
++ return 4;
++}
++
+ /* Nice toy. Unfortunately, useless in real life :-)
+ It allows to construct virtual multiprotocol broadcast "LAN"
+ over the Internet, provided multicast routing is tuned.
+@@ -1090,6 +1096,7 @@
+ return -t->hlen;
+ }
+
++#ifdef CONFIG_NET_IPGRE_BROADCAST
+ static int ipgre_open(struct net_device *dev)
+ {
+ struct ip_tunnel *t = netdev_priv(dev);
+@@ -1138,6 +1145,7 @@
+ dev->get_stats = ipgre_tunnel_get_stats;
+ dev->do_ioctl = ipgre_tunnel_ioctl;
+ dev->change_mtu = ipgre_tunnel_change_mtu;
++ dev->hard_header_parse = ipgre_tunnel_parse_header;
+
+ dev->type = ARPHRD_IPGRE;
+ dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
+@@ -1192,6 +1200,8 @@
+ dev->stop = ipgre_close;
+ }
+ #endif
++ } else {
++ dev->hard_header = ipgre_header;
+ }
+
+ if (!tdev && tunnel->parms.link)