From e756c7948078bd5109c5b8a0f252851efc4532d6 Mon Sep 17 00:00:00 2001 From: Mark Bryars Date: Fri, 4 May 2012 22:19:13 +0100 Subject: Imported Upstream version 0.13 --- .gitignore | 6 + AUTHORS | 7 + Make.rules | 289 +++ Makefile | 34 + NEWS | 289 +++ README | 112 + TODO | 27 + contrib/init-scripts/debian/opennhrp.init | 160 ++ doc/draft-ietf-ion-r2r-nhrp-03.txt | 837 +++++++ doc/rfc2332.txt | 2915 +++++++++++++++++++++++ etc/Makefile | 5 + etc/opennhrp-script | 38 + etc/opennhrp-script.cert | 71 + etc/opennhrp.conf | 9 + etc/racoon-ph1dead.sh | 3 + etc/racoon-ph1down.sh | 6 + libev/LICENSE | 36 + libev/README | 58 + libev/VERSION | 1 + libev/ev.c | 3694 +++++++++++++++++++++++++++++ libev/ev.h | 705 ++++++ libev/ev_epoll.c | 228 ++ libev/ev_kqueue.c | 196 ++ libev/ev_poll.c | 144 ++ libev/ev_port.c | 165 ++ libev/ev_select.c | 308 +++ libev/ev_vars.h | 187 ++ libev/ev_wrap.h | 178 ++ man/Makefile | 7 + man/opennhrp-script.8 | 146 ++ man/opennhrp.8 | 119 + man/opennhrp.conf.5 | 227 ++ man/opennhrpctl.8 | 124 + nhrp/Makefile | 27 + nhrp/admin.c | 609 +++++ nhrp/afnum.h | 29 + nhrp/libev.c | 3 + nhrp/libev.h | 22 + nhrp/list.h | 184 ++ nhrp/nhrp_address.c | 454 ++++ nhrp/nhrp_address.h | 80 + nhrp/nhrp_common.h | 78 + nhrp/nhrp_defines.h | 87 + nhrp/nhrp_interface.c | 188 ++ nhrp/nhrp_interface.h | 78 + nhrp/nhrp_packet.c | 1331 +++++++++++ nhrp/nhrp_packet.h | 128 + nhrp/nhrp_peer.c | 2106 ++++++++++++++++ nhrp/nhrp_peer.h | 194 ++ nhrp/nhrp_protocol.h | 130 + nhrp/nhrp_server.c | 566 +++++ nhrp/opennhrp.c | 524 ++++ nhrp/opennhrpctl.c | 121 + nhrp/sysdep_netlink.c | 1159 +++++++++ nhrp/sysdep_pfpacket.c | 388 +++ nhrp/sysdep_syslog.c | 55 + patches/ipsec-tools-0.7.diff | 1832 ++++++++++++++ patches/linux-2.6.19-ipgre.diff | 44 + patches/linux-2.6.20-ipgre.diff | 44 + patches/linux-2.6.22-ipgre.diff | 53 + 60 files changed, 21845 insertions(+) create mode 100644 .gitignore create mode 100644 AUTHORS create mode 100644 Make.rules create mode 100644 Makefile create mode 100644 NEWS create mode 100644 README create mode 100644 TODO create mode 100644 contrib/init-scripts/debian/opennhrp.init create mode 100644 doc/draft-ietf-ion-r2r-nhrp-03.txt create mode 100644 doc/rfc2332.txt create mode 100644 etc/Makefile create mode 100755 etc/opennhrp-script create mode 100755 etc/opennhrp-script.cert create mode 100644 etc/opennhrp.conf create mode 100755 etc/racoon-ph1dead.sh create mode 100755 etc/racoon-ph1down.sh create mode 100644 libev/LICENSE create mode 100644 libev/README create mode 100644 libev/VERSION create mode 100644 libev/ev.c create mode 100644 libev/ev.h create mode 100644 libev/ev_epoll.c create mode 100644 libev/ev_kqueue.c create mode 100644 libev/ev_poll.c create mode 100644 libev/ev_port.c create mode 100644 libev/ev_select.c create mode 100644 libev/ev_vars.h create mode 100644 libev/ev_wrap.h create mode 100644 man/Makefile create mode 100644 man/opennhrp-script.8 create mode 100644 man/opennhrp.8 create mode 100644 man/opennhrp.conf.5 create mode 100644 man/opennhrpctl.8 create mode 100644 nhrp/Makefile create mode 100644 nhrp/admin.c create mode 100644 nhrp/afnum.h create mode 100644 nhrp/libev.c create mode 100644 nhrp/libev.h create mode 100644 nhrp/list.h create mode 100644 nhrp/nhrp_address.c create mode 100644 nhrp/nhrp_address.h create mode 100644 nhrp/nhrp_common.h create mode 100644 nhrp/nhrp_defines.h create mode 100644 nhrp/nhrp_interface.c create mode 100644 nhrp/nhrp_interface.h create mode 100644 nhrp/nhrp_packet.c create mode 100644 nhrp/nhrp_packet.h create mode 100644 nhrp/nhrp_peer.c create mode 100644 nhrp/nhrp_peer.h create mode 100644 nhrp/nhrp_protocol.h create mode 100644 nhrp/nhrp_server.c create mode 100644 nhrp/opennhrp.c create mode 100644 nhrp/opennhrpctl.c create mode 100644 nhrp/sysdep_netlink.c create mode 100644 nhrp/sysdep_pfpacket.c create mode 100644 nhrp/sysdep_syslog.c create mode 100644 patches/ipsec-tools-0.7.diff create mode 100644 patches/linux-2.6.19-ipgre.diff create mode 100644 patches/linux-2.6.20-ipgre.diff create mode 100644 patches/linux-2.6.22-ipgre.diff diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..74f6e20 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +opennhrp +opennhrpctl +*.o +*.d +*.cmd +*~ diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..b0122ba --- /dev/null +++ b/AUTHORS @@ -0,0 +1,7 @@ +Author: +Timo Teräs + +Patches from: +Steffen Schmidt +Natanael Copa +Halil Goektepe (Deutsche Telekom DTAG Laboratories) diff --git a/Make.rules b/Make.rules new file mode 100644 index 0000000..5c30966 --- /dev/null +++ b/Make.rules @@ -0,0 +1,289 @@ +## +# A set of makefile rules loosely based on kbuild. + +all: compile + +ifndef build + +toplevelrun:=yes + +## +# Disable default rules and make output pretty. + +MAKEFLAGS += -rR --no-print-directory + +Makefile: ; + +ifdef V + ifeq ("$(origin V)", "command line") + VERBOSE = $(V) + endif +endif +ifndef VERBOSE + VERBOSE = 0 +endif + +ifeq ($(VERBOSE),1) + quiet = + Q = +else + quiet=quiet_ + Q = @ +endif + +ifneq ($(findstring s,$(MAKEFLAGS)),) + quiet=silent_ +endif + +export quiet Q VERBOSE + +## +# Recursion helpers. +srctree := $(CURDIR) +objtree := $(CURDIR) + +export srctree objtree + +## +# Consult SCM for better version string. + +TAGPREFIX ?= v + +GIT_REV := $(shell test -d .git && git describe || echo exported) +ifneq ($(GIT_REV), exported) +FULL_VERSION := $(patsubst $(TAGPREFIX)%,%,$(GIT_REV)) +else +FULL_VERSION := $(VERSION) +endif + +RCS_FIND_IGNORE := \( -name SCCS -o -name BitKeeper -o -name .svn -o -name CVS -o -name .pc -o -name .hg -o -name .git \) -prune -o + +export FULL_VERSION RCS_FIND_IGNORE + +## +# Utilities and default flags for them. + +CROSS_COMPILE ?= +CC := $(CROSS_COMPILE)gcc +LD := $(CROSS_COMPILE)ld +INSTALL := install +INSTALLDIR := $(INSTALL) -d + +CFLAGS ?= -g -O2 +CFLAGS_ALL := -Wall -Wstrict-prototypes -D_GNU_SOURCE -std=gnu99 +CFLAGS_ALL += $(CFLAGS) + +LDFLAGS ?= -g +LDFLAGS_ALL += $(LDFLAGS) + +export CC LD INSTALL INSTALLDIR CFLAGS_ALL LDFLAGS_ALL + +build := + +endif + +## +# Reset all variables. +ifneq ($(origin targets),file) +targets := +endif + +src := +obj := + +src += $(build) +obj := $(build) + +## +# Include directory specific stuff + +ifneq ($(build),) +$(build)/Makefile: ; +include $(build)/Makefile +endif + +## +# Rules and helpers + +PHONY += all compile install clean FORCE + +# Convinient variables +comma := , +squote := ' +empty := +space := $(empty) $(empty) + +# The temporary file to save gcc -MD generated dependencies must not +# contain a comma +depfile = $(subst $(comma),_,$(@D)/.$(@F).d) + +build-dir = $(patsubst %/,%,$(dir $@)) +target-dir = $(dir $@) + +## +# Build rules + +ifneq ($(NOCMDDEP),1) +# Check if both arguments has same arguments. Result in empty string if equal +# User may override this check using make NOCMDDEP=1 +# Check if both arguments has same arguments. Result is empty string if equal. +# User may override this check using make KBUILD_NOCMDDEP=1 +arg-check = $(strip $(filter-out $(cmd_$(1)), $(cmd_$@)) \ + $(filter-out $(cmd_$@), $(cmd_$(1))) ) +endif + +# echo command. +# Short version is used, if $(quiet) equals `quiet_', otherwise full one. +echo-cmd = $(if $($(quiet)cmd_$(1)),\ + echo ' $(call escsq,$($(quiet)cmd_$(1)))$(echo-why)';) + +make-cmd = $(subst \#,\\\#,$(subst $$,$$$$,$(call escsq,$(cmd_$(1))))) + +# printing commands +cmd = @$(echo-cmd) $(cmd_$(1)) + +# Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o +dot-target = $(dir $@).$(notdir $@) + +# The temporary file to save gcc -MD generated dependencies must not +# contain a comma +depfile = $(subst $(comma),_,$(dot-target).d) + +# Escape single quote for use in echo statements +escsq = $(subst $(squote),'\$(squote)',$1) + +# Find any prerequisites that is newer than target or that does not exist. +# PHONY targets skipped in both cases. +local-target-prereqs = % +any-prereq = $(filter $(local-target-prereqs), $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^), $^)) + +# Execute command if command has changed or prerequisite(s) are updated. +# +if_changed = $(if $(strip $(any-prereq) $(arg-check)), \ + @set -e; \ + $(echo-cmd) $(cmd_$(1)); \ + echo 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd) + +# Usage: $(call if_changed_rule,foo) +# Will check if $(cmd_foo) or any of the prerequisites changed, +# and if so will execute $(rule_foo). +if_changed_rule = $(if $(strip $(any-prereq) $(arg-check) ), \ + @set -e; \ + $(rule_$(1))) + +##### +# Handle options to gcc. + +c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS_ALL) $(CFLAGS_EXTRA) \ + $(CFLAGS_$(notdir $@)) +ld_flags = $(LDFLAGS_ALL) $(LDFLAGS_EXTRA) $(LDFLAGS_$(notdir $@)) + +##### +# Compile c-files. +quiet_cmd_cc_o_c = CC $@ + +cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< + +define rule_cc_o_c + $(call echo-cmd,cc_o_c) $(cmd_cc_o_c); \ + (echo 'cmd_$@ := $(call make-cmd,cc_o_c)'; echo; cat $(depfile)) \ + > $(dot-target).cmd ; \ + rm $(depfile) +endef + +$(obj)/%.o: override local-target-prereqs=% + +$(obj)/%.o: $(src)/%.c FORCE + $(call if_changed_rule,cc_o_c) + +##### +# Link programs + +# Link an executable based on list of .o files, all plain c +# host-cmulti -> executable +__progs := $(addprefix $(obj)/,$(sort $(progs-y))) +cobjs := $(addprefix $(obj)/,$(sort $(foreach m,$(progs-y),$($(m)-objs)))) + +quiet_cmd_ld = LD $@ + cmd_ld = $(CC) $(ld_flags) -o $@ \ + $(addprefix $(obj)/,$($(@F)-objs)) \ + $(LIBS) $(LIBS_$(@F)) + +$(__progs): override local-target-prereqs=$(addprefix $(obj)/,$($(*F)-objs)) + +$(__progs): $(obj)/%: $(cobjs) FORCE + $(call if_changed,ld) + +targets += $(__progs) $(cobjs) + +### +# why - tell why a a target got build +ifeq ($(VERBOSE),2) +why = \ + $(if $(filter $@, $(PHONY)),- due to target is PHONY, \ + $(if $(wildcard $@), \ + $(if $(strip $(any-prereq)),- due to: $(any-prereq), \ + $(if $(arg-check), \ + $(if $(cmd_$@),- due to command line change: $(arg-check), \ + $(if $(filter $@, $(targets)), \ + - due to missing .cmd file, \ + - due to $(notdir $@) not in $$(targets) \ + ) \ + ) \ + ) \ + ), \ + - due to target missing \ + ) \ + ) + +echo-why = $(call escsq, $(strip $(why))) +endif + +## +# Top level rules. + +%/: FORCE + $(Q)$(MAKE) -f Make.rules build=$(build-dir) $(MAKECMDGOALS) + +compile: $(targets) + @: + +install: $(targets) FORCE + +clean: $(filter %/,$(targets)) +ifeq ($(toplevelrun),yes) + $(Q)find . $(RCS_FIND_IGNORE) \ + \( -name '*.[oas]' -o -name '.*.cmd' -o -name '.*.d' \) \ + -type f -print | xargs rm -f +endif + $(Q)rm -rf $(addprefix $(obj)/,$(sort $(progs-y) $(progs-n) $(progs-))) + +ifeq ($(origin VERSION),command line) +DIST_VERSION=$(VERSION) +else +DIST_VERSION=$(FULL_VERSION) +endif + +dist: + git archive --format tar --prefix=$(PACKAGE)-$(DIST_VERSION)/ \ + $(TAGPREFIX)$(DIST_VERSION) \ + | bzip2 -9 > $(PACKAGE)-$(DIST_VERSION).tar.bz2 + +FORCE: + +# Read all saved command lines and dependencies for the $(targets) we +# may be building above, using $(if_changed{,_dep}). As an +# optimization, we don't need to read them if the target does not +# exist, we will rebuild anyway in that case. + +targets := $(wildcard $(sort $(targets))) +cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) + +ifneq ($(cmd_files),) + include $(cmd_files) +endif + +# Declare the contents of the .PHONY variable as phony. We keep that +# information in a variable se we can use it in if_changed and friends. + +.PHONY: $(PHONY) diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0c85d98 --- /dev/null +++ b/Makefile @@ -0,0 +1,34 @@ +## +# Building opennhrp + +PACKAGE := opennhrp +VERSION := 0.13 + +## +# Default directories + +DESTDIR= +SBINDIR=/usr/sbin +CONFDIR=/etc/opennhrp +MANDIR=/usr/share/man +DOCDIR=/usr/share/doc/opennhrp +STATEDIR=/var/run + +export DESTDIR SBINDIR CONFDIR MANDIR DOCDIR STATEDIR + +## +# Top-level rules and targets + +targets := nhrp/ etc/ man/ + +## +# Include all rules and stuff + +include Make.rules + +## +# Top-level targets + +install: + $(INSTALLDIR) $(DESTDIR)$(DOCDIR) + $(INSTALL) README $(DESTDIR)$(DOCDIR) diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..f386771 --- /dev/null +++ b/NEWS @@ -0,0 +1,289 @@ +============================================================================= +OpenNHRP ChangeLog http://sourceforge.net/projects/opennhrp +============================================================================= + +Detailed changelog is available via Git history via web: +http://opennhrp.git.sf.net/git/gitweb.cgi?p=opennhrp;a=blob;f=NEWS;hb=HEAD + +----------------------------------------------------------------------------- + opennhrp 0.13 - released 25/Dec/2011 +----------------------------------------------------------------------------- + - feature: add admin "interfaces show" command to display information about + the interface cache + - feature: support GRE interface binding changes (update gre nbma address + properly, and purge peer cache) to support dual ISP setups with failover + - fix: send registration reply even when all bindings are rejected + - fix: fix really the holding-time to apply to shortcut-target + - fix: fix hop count handling + - fix: various memory leaks fixed + - fix: fix memory corruption in the hlist structure (would be visible when + opennhrp is acting as NHS with heavy traffic) + +----------------------------------------------------------------------------- + opennhrp 0.12.3 - released 19/Aug/2011 +----------------------------------------------------------------------------- + - feature: export reason why peer-down trigger was executed; and implement + 'lowerdown' opennhrpctl command for racoon hook to indicate that the + IPsec SA has died (opennhrp-script can then avoid the unneccessary and + possibly harmful call to racoonctl) + - fix: route NHRP queries always via NHS (because ipsec initial-contact + mechanism for purging dead IPsec SAs triggers after NHRP rediscovery, + and if remote peer was rebooted, the direct link might be dead) + - fix: don't negative cache entries on timeout (timeout is indication of + temporary error: none of NHS' is reachable) + - fix: don't reply to kernel's ARP queries using local route entries. + this also prevents bad shortcut-routes if the local GRE prefix is + a sub-prefix of routed subnet over the GRE + +----------------------------------------------------------------------------- + opennhrp 0.12.2 - released 07/Jul/2011 +----------------------------------------------------------------------------- + - fix: regression introduced in 0.12's policy routing changes that + shortcuts for in-NBMA network would not work unless using dynamic-map + NHS configuration (from David Ward) + +----------------------------------------------------------------------------- + opennhrp 0.12.1 - released 24/Mar/2011 +----------------------------------------------------------------------------- + - feature: export tunnel GRE key to opennhrp-script + - fix: build error against certain kernel versions and architectures + - fix: update registrations when 1/3 of the holding-time has passed as + per rfc recommendation + - fix: fix holding-time to apply properly to shortcut-target blocks + +----------------------------------------------------------------------------- + opennhrp 0.12 - released 01/Nov/2010 +----------------------------------------------------------------------------- + - feature: preliminary support for policy routing. cache kernel routes for + each gre device and use them for routing lookups. nhrp shortcut routes + should be in separate routing table. this allows nhrp message routing to + always happen using bgp/ospf routes (for shortcut refreshes) and fixes + shortcuts to converge with the main routing information. + - feature: shortcut-target config option for subnet specific holding-time + overrides and aggregation of local subnet to "summary shortcut" + - fix: delete shortcut-routes if their gateway is removed to force renewal + of the route (shortcut gateway can change due to bgp/ospf routing change) + - fix: actually remove dynamic-nhs from peers if it's A entry is removed + - fix: disallow duplicate cached entries with dynamic-nhs entries + - randomize retry timer and increase script timeouts + - improve logging a bit + +----------------------------------------------------------------------------- + opennhrp 0.11.5 - released 16/Mar/2010 +----------------------------------------------------------------------------- + - clear negative cached entries for peers which sends resolution request + - use several netlink sockets to receive notifications so we don't lose + sync on all of them + - fix shortcut renewals + - libev updated to version 3.9 + - signal handling fixed + +----------------------------------------------------------------------------- + opennhrp 0.11.4 - released 04/Mar/2010 +----------------------------------------------------------------------------- + - multicast packet relay fix + - netlink buffer sizes increased + +----------------------------------------------------------------------------- + opennhrp 0.11.3 - released 30/Oct/2009 +----------------------------------------------------------------------------- + - handle dns lookup failures properly + - fix failover for shortcut routes + - detect forwarding loops for indications + - some code cleanups + +----------------------------------------------------------------------------- + opennhrp 0.11.2 - released 25/Sep/2009 +----------------------------------------------------------------------------- + - fixed libev usage bug that could cause crash on script timeout + - make lock file closed on exec so opennhrp-script instances won't keep + opennhrp daemon lock + - fixes traffic indications to work again (captured packet length was + not right) + +----------------------------------------------------------------------------- + opennhrp 0.11.1 - released 31/Aug/2009 +----------------------------------------------------------------------------- + - update libev version to 3.8 + - more permssive build for warnings (libev generates some warnings) + - fix packet filter installation timer + - fix a false assert for peer deletion + - disable icmp redirect properly + - minor fixes to documentation and example script + +----------------------------------------------------------------------------- + opennhrp 0.11 - released 18/Jun/2009 +----------------------------------------------------------------------------- + - introduce 'dynamic-map' directive to autodetect all next hop servers + from a domain name with multiple A entries + - 'multicast' directive to better control softswitching of multicast + packets + - use libev instead of the self written event handling code + - enable Forward NHS extension for Traffic Indications to drop the message + after it has visited all NHS:es (otherwise it would loop between them + until ttl expires) + - performance optimizations to packet capturing, multicast packet process + switching, handling of registration requests and logging + - fix 64-bit compatibility issues + - some code documentation and clean ups + +----------------------------------------------------------------------------- + opennhrp 0.10.3 - released 04/May/2009 +----------------------------------------------------------------------------- + - fix handling of c-ares timeouts + - fix cancellation of asynchronous operations in peer cache + - fix control socket default location (broke on makefile rewrite) + - code clean up (rename reference counting functions) + +----------------------------------------------------------------------------- + opennhrp 0.10.2 - released 28/Apr/2009 +----------------------------------------------------------------------------- + - various safety measures in case of off-nbma routing loops + - fix a bug which caused static entries without 'register' to get deleted + - try to combine shortcut routes to get less nhrp cache entries + +----------------------------------------------------------------------------- + opennhrp 0.10.1 - released 22/Apr/2009 +----------------------------------------------------------------------------- + - fix the breakage in build system after the rewrite + - fix registration to servers when using domain names + +----------------------------------------------------------------------------- + opennhrp 0.10 - released 21/Apr/2009 +----------------------------------------------------------------------------- + - use c-ares library to make dns queries asynchronous + - fix mtu handling from registration requests + - avoid opennhrp-script zombie floods by reaping children between + processing registration request packets + - rewrite build system to something similar to kbuild + - migrate to git + +----------------------------------------------------------------------------- + opennhrp 0.9.3 - released 20/Feb/2009 +----------------------------------------------------------------------------- + - when public IP changes purge all related peer entries (opennhrp should + now survive and automatically re-register when dhcp enforces IP change) + - remove an assertation that was invalid (could cause opennhrp to abort + when acting as NHS in some situation) + - make monotonic clock work with old uclibc + +----------------------------------------------------------------------------- + opennhrp 0.9.2 - released 31/Dec/2008 +----------------------------------------------------------------------------- + - pid file locking change in 0.9.1 broke daemonization, make it work again + +----------------------------------------------------------------------------- + opennhrp 0.9.1 - released 31/Dec/2008 +----------------------------------------------------------------------------- + - fix a crash in peer cache enumeration + - update opennhrp-script to show how to add host route with mtu + - lock pid file as first thing (to prevent accidental startup when opennhrp + is already running) + +----------------------------------------------------------------------------- + opennhrp 0.9 - released 26/Dec/2008 +----------------------------------------------------------------------------- + - use monotonic system clock if available + - allow startup even if dns names are unresolveable + - make nhrp holding time configurable + - Cisco NHS specific feature: send cisco compatible purge if unique NBMA + mapping already exists (to re-register when NBMA address changes) + - additional opennhrp-script example with ipsec certificate checking + - some effort to make opennhrp compile on old system (in limited mode) + - detect NBMA MTU from interface and transmit it over NHRP and pass it to + opennhrp-script (to insert manual NBMA routes if path MTU discovery + does not work) + +----------------------------------------------------------------------------- + opennhrp 0.8 - released 03/Oct/2008 +----------------------------------------------------------------------------- + - licensing terms changed to GPL version 2 or later + - send purge request to shortcut subnets after registration + - clear redirection rate limiting cache for purge request addresses + - new admin commands: "redirect purge" and "schedule" + - rename admin commands: "flush", "purge" and "show" to have "cache" prefix + (accepts still old style commands for a while) + - make logging a bit less verbose + - minor fixes to renewals of peers and shortcut routes + - fix a memory leak + +----------------------------------------------------------------------------- + opennhrp 0.7.1 - released 18/Jun/2008 +----------------------------------------------------------------------------- + - use only primary interface addresses as nbma source address + - fix a access to freed memory in certain special cases of peer cache + enumeration + - fix a memory leak + +----------------------------------------------------------------------------- + opennhrp 0.7 - released 30/Apr/2008 +----------------------------------------------------------------------------- + - catch multicast packets and send them as multiple unicast packets + to all known peers + - new script events: interface-up (to clear neighbor and route caches + on startup) and peer-register (to e.g. validate peer protocol ip address + from the ipsec certificate) + - parse nat-oa for cached entries + - routing regression fixes (don't try to resolve unreachable statically + mapped peers) + - fix deletion of multiple cache entries from enumeration code + (crashed in some rare circumstances) + - check for IFA_LOCAL attribute presence before using it (fixes a crash) + - fix bug which caused negative cache entries to prevent registration + of the protocol address + - code cleanups and some optimizations + +----------------------------------------------------------------------------- + opennhrp 0.6.2 - released 04/Apr/2008 +----------------------------------------------------------------------------- + - accept shortcuts when a route to shortcut-destination interface exists + (in addition to local addresses in that interface) + - handle netlink link, address and route deleted notifications properly + - print error if opennhrp-script fails for some reason + - change peer flags: 'lower-up' means opennhrps-script was ran succesfully, + 'up' means registration has been also done (if it was required) + - fix matching of local-nbma selector when gre interface has no remote + address and is not explicitely bound to other interface + - fix admin interface to give 'Affected-Entries' result correctly + - fix config file reading bug; handle last keyword even if there is no + final new line + - code cleanups and optimizations + +----------------------------------------------------------------------------- + opennhrp 0.6.1 - released 20/Mar/2008 +----------------------------------------------------------------------------- + - fix a crash in error path of packet forwarding + - fix routing of locally generated traffic indications + +----------------------------------------------------------------------------- + opennhrp 0.6 - released 19/Mar/2008 +----------------------------------------------------------------------------- + - accept hostname (domain name) as an NBMA address in config file + - sanitize admin interface: accept cache entry selectors on + flush, purge and show commands; slight changes to unix socket protocol + - multiple gre interfaces do not share nhrp cache anymore + - opennhrp-script: NHRP_SRCADDR and NHRP_SRCNBMA added + - do not let opennhrp-script inherit sockets file descriptors + - run peer-down script when peer was purged via admin interface + - add option -V to show version + - add option -v to show debug log messages (to see nl-arp messages) + - performance improvements + +----------------------------------------------------------------------------- + opennhrp 0.5 - released 05/Mar/2008 +----------------------------------------------------------------------------- + - opennhrpctl command line tool + - list nhrp cache + - purge entries by protocol or nbma address + - flush entries + - daemon mode + - allow comments in configuration file + - various bug fixes + - flush neighbor cache when interface is found + - do not create proxy arp entries when static mapping exists + +----------------------------------------------------------------------------- + opennhrp 0.4 - released 04/Jan/2008 +----------------------------------------------------------------------------- + - first announced release + diff --git a/README b/README new file mode 100644 index 0000000..0c8673f --- /dev/null +++ b/README @@ -0,0 +1,112 @@ +OpenNHRP Release Notes +====================== + +OpenNHRP is an NHRP implementation for Linux. It has most of the RFC2332 +and Cisco IOS extensions. + +Project homepage: http://sourceforge.net/projects/opennhrp + +Git repository: git://opennhrp.git.sourceforge.net/gitroot/opennhrp + + KERNEL REQUIREMENTS + +You need a kernel with ip_gre patched to support sending and receiving +using NBMA address. + +The support was originally added to 2.6.24-rc2, but it contains a bug +that prevents NAT detection. The latest fix is present in 2.6.24-rc7. + +Gentoo kernels: gentoo-sources-2.6.23-r1 and gentoo-sources-2.6.22-r10 +have the partitial support too (no NAT there either). + +For the brave who compile their own kernels, there are patches against +vanilla 2.6.20 and 2.6.22 kernels in the patches directory. Or just +upgrade to 2.6.24 or later and no patching is required. Though, there +has been a major performance fixes in newer kernels, so 2.6.35 or later +is strongly recommended. + +Also remember to turn on CONFIG_ARPD and CONFIG_NET_IPGRE in your kernel +configuration. + + SYSTEM REQUIREMENTS + +To compile OpenNHRP you need: +- GNU make (3.81 or later works) +- GCC +- pkg-config +- c-ares library (Ubuntu package: libc-ares-dev) + + COMPILING + +Just type 'make' and 'make install'. + + CONFIGURATION + +OpenNHRP currently supports only IPv4 over IPv4 using NBMA GRE tunnels. +To create NBMA GRE tunnel you might use following: + + ip tunnel add gre1 mode gre key 1234 ttl 64 + ip addr add 10.255.255.2/24 dev gre1 + ip link set gre1 up + +This should work with the configuration example in opennhrp.conf(5). + + IPSEC ENCRYPTION OF GRE PACKETS + +ipsec-tools 0.8.0 or later is recommended. Earlier versions need patching +for dmvpn to work properly. + +The ipsec-tools configuration I prefer to use is: encrypt all GRE +traffic in transport mode. IPsec policy for that should be defined in +/etc/ipsec.conf: + spdflush; + spdadd 0.0.0.0/0 0.0.0.0/0 gre -P out ipsec esp/transport//require; + spdadd 0.0.0.0/0 0.0.0.0/0 gre -P in ipsec esp/transport//require; + +And ipsec-tools configuration with pre-shared key could look something +like this: + +/etc/racoon/racoon.conf: + path pre_shared_key "/etc/racoon/psk.txt"; + remote anonymous { + exchange_mode aggressive; + lifetime time 24 hour; + my_identifier user_fqdn "my-user-name@my-domain.example"; + nat_traversal on; + # For ipsec-tools snapshot 2010-10-10 or later + script "/etc/opennhrp/racoon-ph1dead.sh" phase1_dead; + # For earlier ipsec-tools + # script "/etc/opennhrp/racoon-ph1down.sh" phase1_down; + proposal { + encryption_algorithm 3des; + hash_algorithm sha1; + authentication_method pre_shared_key; + dh_group 2; + } + } + sainfo anonymous { + pfs_group 2; + lifetime time 12 hour; + encryption_algorithm 3des, blowfish 448, rijndael; + authentication_algorithm hmac_sha1, hmac_md5; + compression_algorithm deflate; + } + +And /etc/racoon/psk.txt: + my-user-name@my-domain.example "my-secret-pre-shared-key" + +It is of course more secure to use certificates for authentication. +And using aggressive main mode is not recommended either, but it is +required to make FQDN pre-shared authentication work. This setup is +fast to do and can get you started with testing OpenNHRP. + + DOCUMENTATION + +Most of the OpenNHRP documentation is in the manpages. Read them. + +Also some general NHRP documents can be found from Cisco website +(www.cisco.com). + + BUGS + +Use the SourceForge bug tracker or mailing list. diff --git a/TODO b/TODO new file mode 100644 index 0000000..8b2a0ac --- /dev/null +++ b/TODO @@ -0,0 +1,27 @@ +Open items that need work on OpenNHRP: + +- interface-up, nhs-up, nhs-down need to be serialized for quagga + management. alternatively, the script could return some special + value meaning "try again soon". + +- offload multicast packet forwarding to kernel + +- use mmapped pf_packet interface + +- nhrp_peer should be split to more files, it's relatively large now. + might split nhrp_peer to separate types. + +- Proper handling of unique bit. Currently registration of unique address + overwrites previous registration, but this against RFC. + +- Load balancing: return multiple CIE entries, when we have multiple + local IP addresses. When receiving multi CIE next-hop, balance traffic + or for shortcut routes, create a multi nexthop route. + +- Create some logic to detect if NBMA and public IPs are mixed up in + the "map" directive. Issue on warning about this. + +- Support reloading of configuration (via SIGHUP or "opennhrpctl reload") + +- Clean shutdown: send purge request to registration servers, dynamic + clients and possibly track resolution requests and purge those too. diff --git a/contrib/init-scripts/debian/opennhrp.init b/contrib/init-scripts/debian/opennhrp.init new file mode 100644 index 0000000..4a0fe94 --- /dev/null +++ b/contrib/init-scripts/debian/opennhrp.init @@ -0,0 +1,160 @@ +#! /bin/sh +### BEGIN INIT INFO +# Provides: opennhrp +# Required-Start: $remote_fs +# Required-Stop: $remote_fs +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: RFC 2332 2333 daemon +# Description: This file suports one instance of opennhrp +### END INIT INFO + +# Author: Robin David Hammond +# +# Do NOT "set -e" + +# PATH should only include /usr/* if it runs after the mountnfs.sh script +PATH=/sbin:/usr/sbin:/bin:/usr/bin +DESC="OpenNextHopResolutionProtocol" +NAME=opennhrp +DAEMON=/usr/sbin/$NAME +PIDPATH=/var/run/$NAME +PIDFILE=$PIDPATH/pid +SCRIPTNAME=/etc/init.d/$NAME +CTRLPATH=/var/run/$NAME/ +CTRLPIPE=$CTRLPATH/ctrl + +CONFFILE=/etc/opennhrp/opennhrp.conf +SCRIPTFILE=/etc/opennhrp/opennhrp-script + +DAEMON_ARGS=" -d -a $CTRLPIPE -c $CONFFILE -s $SCRIPTFILE -p $PIDFILE" +# -a /var/run/opennhrp/ctrl -c /etc/opennhrp/opennhrp.conf -s /etc/opennhrp/opennhrp-script -d -p /var/run/opennhrp/pid +# Exit if the package is not installed +[ -x "$DAEMON" ] || exit 0 + +# Read configuration variable file if it is present +[ -r /etc/default/$NAME ] && . /etc/default/$NAME + +# Load the VERBOSE setting and other rcS variables +. /lib/init/vars.sh + +# Define LSB log_* functions. +# Depend on lsb-base (>= 3.0-6) to ensure that this file is present. +. /lib/lsb/init-functions + +# +# Function that starts the daemon/service +# +do_start() +{ + mkdir -p $PIDPATH + mkdir -p $CTRLPATH + # Return + # 0 if daemon has been started + # 1 if daemon was already running + # 2 if daemon could not be started + start-stop-daemon --start --quiet --pidfile $PIDFILE --exec $DAEMON --test > /dev/null \ + || return 1 + start-stop-daemon --start --quiet --pidfile $PIDFILE --exec $DAEMON -- \ + $DAEMON_ARGS \ + || return 2 + # Add code here, if necessary, that waits for the process to be ready + # to handle requests from services started subsequently which depend + # on this one. As a last resort, sleep for some time. +} + +# +# Function that stops the daemon/service +# +do_stop() +{ + # Return + # 0 if daemon has been stopped + # 1 if daemon was already stopped + # 2 if daemon could not be stopped + # other if a failure occurred + start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $PIDFILE --name $NAME + RETVAL="$?" + [ "$RETVAL" = 2 ] && return 2 + # Wait for children to finish too if this is a daemon that forks + # and if the daemon is only ever run from this initscript. + # If the above conditions are not satisfied then add some other code + # that waits for the process to drop all resources that could be + # needed by services started subsequently. A last resort is to + # sleep for some time. + start-stop-daemon --stop --quiet --oknodo --retry=0/30/KILL/5 --exec $DAEMON + [ "$?" = 2 ] && return 2 + # Many daemons don't delete their pidfiles when they exit. + rm -f $PIDFILE + return "$RETVAL" +} + +# +# Function that sends a SIGHUP to the daemon/service +# +do_reload() { + # + # If the daemon can reload its configuration without + # restarting (for example, when it is sent a SIGHUP), + # then implement that here. + # + start-stop-daemon --stop --signal 1 --quiet --pidfile $PIDFILE --name $NAME + return 0 +} + +case "$1" in + start) + [ "$VERBOSE" != no ] && log_daemon_msg "Starting $DESC" "$NAME" + do_start + case "$?" in + 0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;; + 2) [ "$VERBOSE" != no ] && log_end_msg 1 ;; + esac + ;; + stop) + [ "$VERBOSE" != no ] && log_daemon_msg "Stopping $DESC" "$NAME" + do_stop + case "$?" in + 0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;; + 2) [ "$VERBOSE" != no ] && log_end_msg 1 ;; + esac + ;; + #reload|force-reload) + # + # If do_reload() is not implemented then leave this commented out + # and leave 'force-reload' as an alias for 'restart'. + # + #log_daemon_msg "Reloading $DESC" "$NAME" + #do_reload + #log_end_msg $? + #;; + restart|force-reload) + # + # If the "reload" option is implemented then remove the + # 'force-reload' alias + # + log_daemon_msg "Restarting $DESC" "$NAME" + do_stop + case "$?" in + 0|1) + do_start + case "$?" in + 0) log_end_msg 0 ;; + 1) log_end_msg 1 ;; # Old process is still running + *) log_end_msg 1 ;; # Failed to start + esac + ;; + *) + # Failed to stop + log_end_msg 1 + ;; + esac + ;; + *) + #echo "Usage: $SCRIPTNAME {start|stop|restart|reload|force-reload}" >&2 + echo "Usage: $SCRIPTNAME {start|stop|restart|force-reload}" >&2 + exit 3 + ;; +esac + +: diff --git a/doc/draft-ietf-ion-r2r-nhrp-03.txt b/doc/draft-ietf-ion-r2r-nhrp-03.txt new file mode 100644 index 0000000..8f80b36 --- /dev/null +++ b/doc/draft-ietf-ion-r2r-nhrp-03.txt @@ -0,0 +1,837 @@ +Internetworking Over NBMA Yakov Rekhter +INTERNET-DRAFT Cisco Systems + Joel Halpern +Expiration Date: November 1999 Institutional Venture Partners + May 1998 + + + NHRP for Destinations off the NBMA Subnetwork + + draft-ietf-ion-r2r-nhrp-03.txt + + +1. Status of this Memo + + This document is an Internet-Draft and is in full conformance with + all provisions of Section 10 of RFC2026. Internet-Drafts are working + documents of the Internet Engineering Task Force (IETF), its areas, + and its working groups. Note that other groups may also distribute + working documents as Internet-Drafts. + + Internet-Drafts are draft documents valid for a maximum of six months + and may be updated, replaced, or obsoleted by other documents at any + time. It is inappropriate to use Internet-Drafts as reference + material or to cite them other than as ``work in progress.'' + + The list of current Internet-Drafts can be accessed at + http://www.ietf.org/ietf/1id-abstracts.txt + + The list of Internet-Draft Shadow Directories can be accessed at + http://www.ietf.org/shadow.html. + + +2. Abstract + + The NBMA Next Hop Resolution Protocol (NHRP) [1] specifies a + mechanism that allows a source station (e.g., a host or a router) on + an NBMA subnetwork to find the NBMA subnetwork address of a + destination station when the destination station is connected to the + NBMA subnetwork. For the case where the destination station is off + the NBMA subnetwork the mechanism described in [1] allows a node to + determine the NBMA subnetwork address of an egress router from the + NBMA subnetwork that is ``nearest'' to the destination station. If + used to locate an egress router wherein the destination station is + directly behind the egress router, the currently documented NHRP + behaviors are sufficient. However, as documented elsewhere [2], + there are cases where if used between routers for generalized + transit, NHRP can produce loops. + + + + +Joel Halpern [Page 1] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + + This document describes extensions to the NBMA Next Hop Resolution + Protocol (NHRP) [1] that allow a node to acquire and maintain the + information about the egress router without constraining the + destination(s) to be directly connected to the egress router. + + +3. CONVENTIONS + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in RFC 2119 [3]. + + +4. NHRP Target Information + + The mechanism described in this document allows a node to find an + egress router for either a single destination, or a set of + destinations (where the set is expressed as a single address prefix). + Since a single destination is just a special case of a set of + destinations, for the rest of the document we will always talk about + a set of destinations, and will refer to this set as an ``NHRP + target''. + + The NHRP target is carried in the NHRP Request, Reply, and Purge + messages as an address prefix (using the Prefix Length field of the + NHRP Client Information Extension). In order to ensure correctness, + a target may be replaced by an identical target with a longer prefix + length. This replacement may be done at an intermediate or + responding NHS. Other than this increase of prefix length, no NHS + shall modify the NHRP target information in an NHRP message. + + In general a router may maintain in its Forwarding Information Base + (FIB) routes whose Network Layer Reachability Information (NLRI) that + exhibits a subset relation. Such routes are called overlapping + routes. To expand upon this, entries in a FIB are often related, with + one entry being a prefix of another entry. The longer prefix + therefore covers a set of routes that are a subset of the shorter + prefix. To provide correct forwarding in the presence of such + overlapping (or nested) routes this document constrains an NHRP + target by requiring that all the destinations covered by the target + must form a subset of the NLRI of at least one route in the + Forwarding Information Base (FIB) of the router that either + originates, or propagates an NHRP Request. That is, there must be at + least one route in the FIB which is a prefix of (or equal to) the + target of the request. For the rest of the document we'll refer to + this as the ``first NHRP target constraint''. A station can + originate an NHRP Request, and a router can propagate an NHRP Request + only if the NHRP target of the Request does not violate the first + + + +Joel Halpern [Page 2] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + + NHRP target constraint. + + If a received NHRP request does not meet this ``first NHRP target + constraint'' when received, the receiving router has two choices. It + may answer the request, defining itself as the egress. This is + compatible with the base NHRP specification, and preserves the + ``first NHRP target constraint''. Alternatively, the router may + lengthen the received prefix until the first constraint is met. The + prefix is lengthened until the target falls within (or becomes equal + to) a FIB entry. + + A route (from a local FIB) whose NLRI forms a minimal superset of all + the destinations covered by the NHRP target is called an ``NHRP + forwarding route''. This is the longest FIB entry that covers the + entire target. Observe that by definition the set of destinations + covered by an NHRP target always exhibits a subset relation to the + set of destinations covered by the NHRP forwarding route associated + with the target. + + This document further constrains origination/propagation of NHRP + Requests by prohibiting the NHRP target (carried by a Request) to + form a superset of the destinations covered by any of the routes in + the local FIB. Remembering that there are nested FIB entries, this + constraint says that there must not be a FIB entry which is itself a + subset of the target of the NHRP request. If there were, there would + be some destinations within the request which would be forwarded + differently then others, preventing a single answer from being + correct. The constraint applies both to the station that originates + an NHRP Request and to the routers that propagate the Request. For + the rest of the document we'll refer to this constraint as the + ``second NHRP target constraint''. A station can originate an NHRP + Request, and a router can propagate an NHRP Request only if the NHRP + target of the Request does not violate the second NHRP target + constraint. The second NHRP target constraint guarantees that + forwarding to all the destinations covered by the NHRP target would + be accomplished via a single (common) route, and this route would be + the NHRP forwarding route for the target. + + Again, if a received NHRP request does not meet the ``second NHRP + target constraint'', the router may either respond to the request, + providing its own NBMA address, or it may lengthen the prefix in the + request so as to meet the second constraint. + + + + + + + + + +Joel Halpern [Page 3] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + +5. NHRP Requester and Terminator Processing + + The issue being addressed with the behaviors being mandated in this + document is to ensure that sufficient information is present and + processed to avoid NHRP shortcuts causing packet forwarding loops. + + In order to do this, the requester and responder of the request must + undertake certain work, and any "border routers" in the forwarding + path must also perform certain additional work beyond checking the + target consistency with the FIB during request processing. This + border work suffices to detect any changes that would cause the path + selection to have failed the target constraints. + + The work performed by the requester and responder consists of two + kinds of work. One set is requester only work, and is required in + order to determine where the protocol boundaries are. The other set + is the route monitoring work. + + +5.1. NHRP IGP information + + The primary cause of NHRP forwarding loops is the loss of information + at a routing protocol boundary. Normally, such boundaries are + detected by the router at the boundary. However, it is possible for + IGP boundaries to overlap. Therefore, NHRP requesting Routers MUST + include the NHRP IGP Information extension (as defined in section 9). + This extension indicates what IGP the originator of the request uses. + A requesting router must always include this extension, since it is + not possible to tell a priori whether the eventual resolution of the + request will be a host or a router. + + Because the entire BGP domain is consider one routing domain, the + extension also contains an indication as to whether the originator + was a BGP speaker. + + +5.2. NHRP Requestor and Responder monitoring + + NHRP requestors and responders are required to monitor routing to + maintain correct shortcut information. + + Once a router that originates an NHRP Request acquires the shortcut + next hop information, it is essential for the router to be able to + detect any changes that would affect the correctness of this + information. The following measures are intended to provide the + correctness. + + Both ends of a shortcut have to monitor the status of the route that + + + +Joel Halpern [Page 4] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + + was associated with the shortcut (the NHRP forwarding route). If the + status changes at the router that generated the NHRP Reply, this + router should send a Purge message, so that the NHRP Requester would + issue another NHRP. If the status changes at the Requester, the + Requester must issue another NHRP. This ensures that when both ends + of a shortcut are up, any changes in routing that impact forwarding + to any of the destinations in the NHRP target would result in a + revalidation (via NHRP) of the shortcut. Note that in addition to + sending purges/reverifies in response to routing changes which + directly effect the NHRP target, there is one other case. + + A router MUST perform the appropriate purge/reverification process if + it receives routing updates that cause an issued NHRP request to + violate either of the target constraints defined earlier. This is + possible at an NHRP originator, and is more likely at border devices. + + Once a shortcut is established, the Requester needs to have some + mechanism(s) to ensure that the other end of the shortcut is alive. + Among the possible mechanisms are: (a) indications from the Data Link + layer, (b) presence of traffic in the reverse direction that comes + with the Link Layer address of the other end, (c) keepalives sent by + the other end. This is intended to suppress black holes, when the + next hop router in the shortcut (the router that generated Reply) + goes down. + + A requester should establish a shortcut only after the requester + determines that the information provided by NHRP is fairly stable. + This is necessary in order to avoid initiating shortcuts that are + based on transients in the routing information, and thus would need + to be revalidated almost immediately anyway. Thus, a router may wait + to use NHRP information if the underlying routing information has + recently changed. If the routing protocol being used has a notion of + stability, it should be used. Information in a transient or + holddown state SHOULD NOT be used, and requests which need to be + processed based on such information SHOULD be discarded. + + + + + + + + + + + + + + + + +Joel Halpern [Page 5] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + +6. Border Processing of NHRP Request + + Processing of an NHRP Request is covered by two sets of rules: the + first set for IGP related processing, and the second set for BGP + related processing. The rules for IGP processing relate to + determining where the IGP borders are (in particular in the case of + overlapping IGPs), and then for what must happen at said borders. + + +6.1. Border Determination + + When a router receives a request, and determines that it is not the + NBMA exit router, it must perform a series of checks before + forwarding the request. + + When a router receives such a Request, the router uses the NHRP + target and the NHRP IGP information to check whether (a) the first + and the second NHRP target constraints are satisfied, (b) the router + it is in the same routing domain as the originator of the Request, + and if yes, then whether (c) it is a border router for that domain. + + When the NHRP target is checked against the forwarding database, a + determination must be made as to whether either of the target + constraints has been violated. If they are violated, then the router + MAY either + + o Extend the prefix so as to meet the constraints. + + o reply to the request indicating that it is the destination + + o return an error indicating which constraint was violated. + + If the NHRP forwarding route indicates a next hop that is not on the + same NBMA as the interface on which the Request was received, the + router sends back an NHRP Reply and terminates the query. + + If a router receives a request without IGP information, then it was + originated within this domain by a host. If the router is an AS + Border Router (i.e. running BGP), and if the forwarding path exits + the AS, then it must behave as a border router for this request. + Otherwise, for requests without IGP information, the router is not a + border router. + + For requests with IGP information, the router compares the forwarding + information against the IGP in the request. If the forwarding entry + indicates that the next hop is to exit the AS (an AS Border Router), + then check the BGP behaviors below. + + + + +Joel Halpern [Page 6] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + + When the IGP the next hop was learned from is the same IGP as + indicated in the request, then the NHS simply forwards the request. + [Of course, as per NHRP, it is free to respond indicating it is the + termination of the shortcut, for example when the Router/NHS is a + firewall.] + + When the IGP the next hop was learned from is different from that + listed in the NHRP request, then this NHS is a border router for this + request. + + +6.2. Border Behavior + + In all cases, a border router has two choices. It MAY terminate and + respond to the request, responding with its IP and NBMA address. + + Alternatively, it MAY perform border propagation. + + +6.2.1. Reorigination + + Upon receiving an NHRP request for which the NHS is a border router, + if it chooses to propagate the request, it MUST originate a new NHRP + request. This request will have a locally generated request + identifier, and the same NHRP target information as in the received + request. The NHRP IGP Information will be the correct indication for + the outgoing interface, with BGP indication if the received request + had the BGP indication, or if this transition crosses the AS border. + All other extensions are copied from the incoming request to the new + request. + + +6.2.2. Response Propagation + + When an NHRP response is received for a propagated request, the + information is copies from the received request, and passed on in a + new NHRP response, responding to the originally received request. + The prefix length in the received response is copied to the new + response. All extensions except the NHRP IGP Information are copied + to the new response. + + In addition, the border router saves state about this information + exchange. The saved state includes the NHRP target from the + response, with the NHRP prefix length that resulted from the + exchange. It also includes the both the original requester, and the + identity of the responder. These are used to generate appropriate + reverification and purges whenever routing changes in a way that + could effect the resolution. + + + +Joel Halpern [Page 7] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + +6.3. Border Information + + Sometimes the routing protocol will have provided the border router + with enough information to generate a response to an incoming NHRP + request. In particular, the border router may have information about + IP prefix to NBMA address bindings. If such information is present, + it may be used by a border router to produce an NHRP response without + actually propagating the request. In such a case, that information + must be monitored for stability to maintain the correctness of the + shortcut. + + +7. BGP Operation + + While the NHRP mechanism described above is mostly constrained to the + routers within a single routing domain, the same mechanisms can be + used for shortcuts that span multiple domains. In doing so, one + wants to produce as little additional overhead in the BGP space as + possible. + + Therefore, we will treat the space over which BGP runs as a single + routing domain. Care must be taken to propagate information across + the individual AS without error, and to indicate that one has + properly entered the BGP space. + + Additional complexity in handling multi-domain shortcuts arise if + routing information gets aggregated at the border routers (which + certainly happens in practice). Since BGP is the major protocol that + is used to exchange routing information across multiple routing + domains, we'll restrict our proposal to the case where the routing + information exchange across domains' boundaries is controlled by BGP. + + If both the source and the destination domains are on a common NBMA + network, and the path between these two domains is also fully within + the same NBMA network, then we have only three routing domains to + deal with: source routing domain, BGP routing domain, and destination + routing domain. If the destination domain is not on the same NBMA as + the source domain, then we need to deal only with two domains - the + source and the BGP. Note that we treat all routers that participate + in a single (common) instance of BGP as a single BGP routing domain, + even if these routers participate in different intra-domain routing + protocols, or in different instances of the same intra-domain routing + protocol. There are three aspects to consider. + + + (a) how a border router in the domain that the originator of + the Request is in handles the Request (crossing IGP/BGP + boundary), + + + +Joel Halpern [Page 8] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + + (b) how the Request is handled across the BGP domain, and + finally + + (c) how a border router in the domain where the NHRP target is + in handles the Request (crossing BGP/IGP boundary). + + + +7.1. Handling NHRP Request at the source domain border router + + When a border router receives an NHRP Request originated from within + its own (IGP) routing domain, the border router determines the NHRP + forwarding route for the NHRP target carried by the Request. If the + router already has the shortcut information for the forwarding route, + then the router uses this information to construct a Reply to the + source of the NHRP Request. Otherwise, the router originates its own + NHRP Request. The Request contains exactly the same NHRP target, as + was carried by the original Request; The NHRP IGP Information will + indicate that the request was generated by BGP, and will indicate the + IGP of the BGP AS being entered. While it is assumed that a BGP + transit AS will generally use only one IGP, the IGP information (and + border processing) is included to allow all cases. The newly + originated Request is sent to the next hop of the NHRP forwarding + route. Once the border router receives a Reply to its own Request, + the border router uses the next hop information from the Reply to + construct its own Reply to the source of the original NHRP Request. + + If the border router later on receives a Purge message for the NHRP + forwarding route, the border router treats this event as if there was + a local change in the NHRP forwarding route (even if the there was no + changes in the route). + + This is exactly the same behavior as all other border cases, and is + described here for completeness. + + +7.2. Handling NHRP Request within the BGP domain + + Routers within an AS will check the IGP, and perform appropriate + processing based on the IGP match. In general, this will result in + normal forwarding of the NHRP request. + + Therefore, the significant cases occur at the BGP speaking routers. + There are two conditions to check for, early exit of the NBMA, and + reachability aggregation. Both of these conditions apply to + Autonomous systems that do not contain the NHRP target. + + + + + +Joel Halpern [Page 9] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + +7.2.1. NBMA exit + + The BGP router in deciding where to send the NHRP request will + determine what the correct exit from the autonomous system is. It + will determine if that exit is within the NBMA. If it is not within + the NBMA, then the router MUST respond to the NHRP request, + indicating its own IP and NBMA addresses as the correct termination + of the shortcut. This is because the actual NBMA border device is + not in a position to monitor the topology properly. + + BGP routers within an NBMA which are supporting R2R NHRP SHOULD be + configured to know where the NBMA border is. In the absence of such + configuration, requests from other router SHOULD be terminated at the + BGP router, since it can not tell what will be crossing the border. + A BGP router supporting R2R NHRP may be configured to assume that all + of its neighbors are within the NBMA, and therefore not perform such + early termination. + + +7.2.2. Reachability Aggregation + + BGP routers aggregate reachability. If the router aggregates + reachability that includes the NHRP target, only this router has the + visibility to some of the topology changes that can affect the + correctness of the route. Therefore, this router is a border router + for this NHRP request. + + It must originate a new request, place the correct information in the + request, receive the response, and generate the correct response + towards the requester. This aggregating router must also monitor + routing in case of changes which affect the request. + + If the router later on receives a Purge message for the NHRP + forwarding route, the router treats this event as if there was a + change in the NHRP forwarding route (even if the there was no changes + in the route). + + It should be noted that this conditions applies if the router COULD + aggregate relevant routing information, even if it currently does + not. + + + + + + + + + + + +Joel Halpern [Page 10] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + +7.3. Handling NHRP Request at the destination domain border router + + When a border router receives an NHRP Request from a BGP speaker, and + the border router determines that all the destinations covered by the + NHRP target of the Request are within the (IGP) domain of that border + router, the border router determines the NHRP forwarding route for + the NHRP target carried by the Request. The newly formed Request + contains exactly the same NHRP target as the received Request; the + NHRP IGP Information indicates the IGP this router is using to select + the route to the destination. The newly originated Request is sent + to the next hop of the NHRP forwarding route. Once the border router + receives a Reply to its own Request, the border router uses the next + hop information from the Reply to construct its own Reply to the + source of the original NHRP Request. + + If the border router later on receives a Purge message for the NHRP + forwarding route, the border router treats this event as if there was + a change in the NHRP forwarding route (even if the there was no + changes in the route). + + +8. More state, less messages + + It should be possible to reduce the number of Purge messages and + subsequent NHRP messages (caused by the Purge messages) by + maintaining more state on the border routers at the source and + destination domains, and the BGP routers that perform aggregation + along the path from the source to the destination. + + Specifically, on these routers it would be necessary to keep the + information about all the NHRP targets for which the routers maintain + the shortcut information. This way when such a router determines + that the NHRP forwarding route (for which the router maintains the + shortcut information) changes due to some local routing changes, the + router could check whether these local changes impact forwarding to + the destinations covered by the NHRP targets. For the targets that + are impacted by the changes the router would send Purge messages. + + Note that this mechanism (maintaining NHRP targets) precludes the use + of Address Prefix Extension - the shortcut will be determined only + for the destinations covered by the NHRP target (so, if the target is + a single IP address, then the shortcut would be determined only for + this address). + + + + + + + + +Joel Halpern [Page 11] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + +9. NHRP IGP Information Extension Format + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + 0-3 |C|u| Type = 9 | Length = 4 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + 4-7 | flags |b| Reserved | IGP ID | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + C "Compulsory." If clear, and the NHS does not recognize the + type code, the extension maybe safely be ignored. For + the IGP Information extension, this bit is clear. + + u Unused and must be set to zero + + Type The extension type code. For the IGP Information + extension, this is 9. + + Length the length in octets of the value. For this extension, + this is 4. + + flags Other than the "b" flag, these are reserved, SHALL be set + to 0 on transmission, and SHALL be ignored on reception. + + b This flag indicates whether the request (or a predecessor + thereof) was originated by a BGP speaker. Set (to 1) to + indicate that the BGP speaker has operated on this. + Clear (to 0) if not. + + IGP ID This field indicates the IGP used by the request + originator. The currently defined values are: + + 1 = RIP + 2 = RIPv2 + 3 = OSPF + 4 = Dual IS-IS + + + + + + + + + + + + + +Joel Halpern [Page 12] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + +10. IANA Considerations + + This document defines an enumerated field for identifying IGPs in + router-to-router NHRP requests. Since there may be additional IGPs + in use, a procedure is needed for allocating additional values. The + IANA shall allocate values for this field as needed. Specifically, + when requested a value shall be allocated for an IGP for any layer 3 + protocol for which there is a clear and stable definition of the + protocol. An RFC is the best example of such stability. Vendor + published specifications are also acceptable. The IANA should avoid + issuing two values for the same protocol. However, it is not + incumbent upon the IANA to determine if two similar protocols are + actually the same. + + +11. Open issues + + The mechanisms described in this document assume that certain routers + along a path taken by an NHRP Request would be required to maintain + state associated with the NHRP forwarding route associated with the + NHRP target carried by the Request. However, it is quite clear that + the router(s) may also lose this state. Further study of the impact + of losing the state is needed before advancing the use of NHRP for + establishing shortcuts among routers beyond Proposed Standard. + + The mechanisms described in this document may result in a situation + where a router would be required to maintain NHRP peering with + potentially a fairly large number of other routers. Further study is + needed to understand the implications of this on the scalability of + the approach where NHRP is used to establish shortcuts among routers. + + This document doesn't have a proof that the mechanisms described here + result in loop-free steady state forwarding when NHRP is used to + establish shortcuts among routers, however, a counterexample has not + yet been found. Further analysis should be done as part of advancing + beyond Proposed Standard. + + + + + + + + + + + + + + + +Joel Halpern [Page 13] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + +12. Security Considerations + + Security is provided in the base NHRP protocol, using hop-by-hop + authentication. There is no change to the fundamental security + capabilities provided therein when these extensions are used. It + should be noted that the assumption of transitive trust that is the + basis of such security may well be significantly weaker in an inter- + domain environment, and administrators of border routers should take + this into consideration. The hop-by-hop security model is used by + NHRP originally because there is no end-to-end security association + between the requesting and responding NHRP entities. In this + environment there is the additional facet that intermediate NHS are + modifying the prefix length field of the CIE, thus changing the end- + to-end information. + + +13. References + + [1] J. Luciani, D. Katz, D. Piscitello, B. Cole, N. Doraswamy., + "NBMA Next Hop Resolution Protocol", RFC-2332, USC/Information + Sciences Institute, April 1998. + + [2] D. Cansever., "NHRP Protocol Applicability Statement", RFC-2333, + USC/Information Sciences Institute, April 1998 + + [3] S. Bradner., "Key words for use in RFCs to Indicate Requirement + Levels", RFC-2119, USC/Information Sciences Institute, March 1997. + + +14. Acknowledgements + + The authors wish to Thank Curtis Villamizer for his contributions + emphasizing both the importance of the looping cases, and some + examples of when loops can occur. + + +15. Author Information + + + + + + + + + + + + + + +Joel Halpern [Page 14] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + + Joel M. Halpern + Institutional Venture Partners + 3000 Sand Hill Road + Menlo Park, CA + Phone: (650) 926-5633 + email: joel@mcquillan.com + + Yakov Rekhter + cisco Systems, Inc. + 170 Tasman Dr. + San Jose, CA 95134 + Phone: (914) 528-0090 + email: yakov@cisco.com + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Joel Halpern [Page 15] + \ No newline at end of file diff --git a/doc/rfc2332.txt b/doc/rfc2332.txt new file mode 100644 index 0000000..eb79ee3 --- /dev/null +++ b/doc/rfc2332.txt @@ -0,0 +1,2915 @@ + + + + + + +Network Working Group J. Luciani +Request for Comments: 2332 Bay Networks +Category: Standards Track D. Katz + cisco Systems + D. Piscitello + Core Competence, Inc. + B. Cole + Juniper Networks + N. Doraswamy + Bay Networks + April 1998 + + + NBMA Next Hop Resolution Protocol (NHRP) + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (1998). All Rights Reserved. + +Abstract + + This document describes the NBMA Next Hop Resolution Protocol (NHRP). + NHRP can be used by a source station (host or router) connected to a + Non-Broadcast, Multi-Access (NBMA) subnetwork to determine the + internetworking layer address and NBMA subnetwork addresses of the + "NBMA next hop" towards a destination station. If the destination is + connected to the NBMA subnetwork, then the NBMA next hop is the + destination station itself. Otherwise, the NBMA next hop is the + egress router from the NBMA subnetwork that is "nearest" to the + destination station. NHRP is intended for use in a multiprotocol + internetworking layer environment over NBMA subnetworks. + + Note that while this protocol was developed for use with NBMA + subnetworks, it is possible, if not likely, that it will be applied + to BMA subnetworks as well. However, this usage of NHRP is for + further study. + + This document is intended to be a functional superset of the NBMA + Address Resolution Protocol (NARP) documented in [1]. + + + + +Luciani, et. al. Standards Track [Page 1] + +RFC 2332 NBMA NHRP April 1998 + + + Operation of NHRP as a means of establishing a transit path across an + NBMA subnetwork between two routers will be addressed in a separate + document (see [13]). + +1. Introduction + + The keywords MUST, MUST NOT, REQUIRED, SHALL, SHALL NOT, SHOULD, + SHOULD NOT, RECOMMENDED, MAY, and OPTIONAL, when they appear in this + document, are to be interpreted as described in [15]. + + The NBMA Next Hop Resolution Protocol (NHRP) allows a source station + (a host or router), wishing to communicate over a Non-Broadcast, + Multi-Access (NBMA) subnetwork, to determine the internetworking + layer addresses and NBMA addresses of suitable "NBMA next hops" + toward a destination station. A subnetwork can be non-broadcast + either because it technically doesn't support broadcasting (e.g., an + X.25 subnetwork) or because broadcasting is not feasible for one + reason or another (e.g., an SMDS multicast group or an extended + Ethernet would be too large). If the destination is connected to the + NBMA subnetwork, then the NBMA next hop is the destination station + itself. Otherwise, the NBMA next hop is the egress router from the + NBMA subnetwork that is "nearest" to the destination station. + + One way to model an NBMA network is by using the notion of logically + independent IP subnets (LISs). LISs, as defined in [3] and [4], have + the following properties: + + 1) All members of a LIS have the same IP network/subnet number + and address mask. + + 2) All members of a LIS are directly connected to the same + NBMA subnetwork. + + 3) All hosts and routers outside of the LIS are accessed via + a router. + + 4) All members of a LIS access each other directly (without + routers). + + Address resolution as described in [3] and [4] only resolves the next + hop address if the destination station is a member of the same LIS as + the source station; otherwise, the source station must forward + packets to a router that is a member of multiple LIS's. In multi-LIS + + + + + + + + +Luciani, et. al. Standards Track [Page 2] + +RFC 2332 NBMA NHRP April 1998 + + + configurations, hop-by-hop address resolution may not be sufficient + to resolve the "NBMA next hop" toward the destination station, and IP + packets may have multiple IP hops through the NBMA subnetwork. + + Another way to model NBMA is by using the notion of Local Address + Groups (LAGs) [10]. The essential difference between the LIS and the + LAG models is that while with the LIS model the outcome of the + "local/remote" forwarding decision is driven purely by addressing + information, with the LAG model the outcome of this decision is + decoupled from the addressing information and is coupled with the + Quality of Service and/or traffic characteristics. With the LAG + model any two entities on a common NBMA network could establish a + direct communication with each other, irrespective of the entities' + addresses. + + Support for the LAG model assumes the existence of a mechanism that + allows any entity (i.e., host or router) connected to an NBMA network + to resolve an internetworking layer address to an NBMA address for + any other entity connected to the same NBMA network. This resolution + would take place regardless of the address assignments to these + entities. Within the parameters described in this document, NHRP + describes such a mechanism. For example, when the internetworking + layer address is of type IP, once the NBMA next hop has been + resolved, the source may either start sending IP packets to the + destination (in a connectionless NBMA subnetwork such as SMDS) or may + first establish a connection to the destination with the desired + bandwidth (in a connection-oriented NBMA subnetwork such as ATM). + + Use of NHRP may be sufficient for hosts doing address resolution when + those hosts are directly connected to an NBMA subnetwork, allowing + for straightforward implementations in NBMA stations. NHRP also has + the capability of determining the egress point from an NBMA + subnetwork when the destination is not directly connected to the NBMA + subnetwork and the identity of the egress router is not learned by + other methods (such as routing protocols). Optional extensions to + NHRP provide additional robustness and diagnosability. + + Address resolution techniques such as those described in [3] and [4] + may be in use when NHRP is deployed. ARP servers and services over + NBMA subnetworks may be required to support hosts that are not + capable of dealing with any model for communication other than the + LIS model, and deployed hosts may not implement NHRP but may continue + to support ARP variants such as those described in [3] and [4]. NHRP + is intended to reduce or eliminate the extra router hops required by + the LIS model, and can be deployed in a non-interfering manner with + existing ARP services [14]. + + + + + +Luciani, et. al. Standards Track [Page 3] + +RFC 2332 NBMA NHRP April 1998 + + + The operation of NHRP to establish transit paths across NBMA + subnetworks between two routers requires additional mechanisms to + avoid stable routing loops, and will be described in a separate + document (see [13]). + +2. Overview + +2.1 Terminology + + The term "network" is highly overloaded, and is especially confusing + in the context of NHRP. We use the following terms: + + Internetwork layer--the media-independent layer (IP in the case of + TCP/IP networks). + + Subnetwork layer--the media-dependent layer underlying the + internetwork layer, including the NBMA technology (ATM, X.25, SMDS, + etc.) + + The term "server", unless explicitly stated to the contrary, refers + to a Next Hop Server (NHS). An NHS is an entity performing the + Next Hop Resolution Protocol service within the NBMA cloud. An NHS + is always tightly coupled with a routing entity (router, route + server or edge device) although the converse is not yet guaranteed + until ubiquitous deployment of this functionality occurs. Note + that the presence of intermediate routers that are not coupled with + an NHS entity may preclude the use of NHRP when source and + destination stations on different sides of such routers and thus + such routers may partition NHRP reachability within an NBMA + network. + + The term "client", unless explicitly stated to the contrary, refers + to a Next Hop Resolution Protocol client (NHC). An NHC is an + entity which initiates NHRP requests of various types in order to + obtain access to the NHRP service. + + The term "station" generally refers to a host or router which + contains an NHRP entity. Occasionally, the term station will + describe a "user" of the NHRP client or service functionality; the + difference in usage is largely semantic. + +2.2 Protocol Overview + + In this section, we briefly describe how a source S (which + potentially can be either a router or a host) uses NHRP to determine + the "NBMA next hop" to destination D. + + + + + +Luciani, et. al. Standards Track [Page 4] + +RFC 2332 NBMA NHRP April 1998 + + + For administrative and policy reasons, a physical NBMA subnetwork may + be partitioned into several, disjoint "Logical NBMA subnetworks". A + Logical NBMA subnetwork is defined as a collection of hosts and + routers that share unfiltered subnetwork connectivity over an NBMA + subnetwork. "Unfiltered subnetwork connectivity" refers to the + absence of closed user groups, address screening or similar features + that may be used to prevent direct communication between stations + connected to the same NBMA subnetwork. (Hereafter, unless otherwise + specified, we use the term "NBMA subnetwork" to mean *logical* NBMA + subnetwork.) + + Placed within the NBMA subnetwork are one or more entities that + implement the NHRP protocol. Such stations which are capable of + answering NHRP Resolution Requests are known as "Next Hop Servers" + (NHSs). Each NHS serves a set of destination hosts, which may or may + not be directly connected to the NBMA subnetwork. NHSs cooperatively + resolve the NBMA next hop within their logical NBMA subnetwork. In + addition to NHRP, NHSs may support "classical" ARP service; however, + this will be the subject of a separate document [14]. + + An NHS maintains a cache which contains protocol layer address to + NBMA subnetwork layer address resolution information. This cache can + be constructed from information obtained from NHRP Register packets + (see Section 5.2.3 and 5.2.4), from NHRP Resolution Request/Reply + packets, or through mechanisms outside the scope of this document + (examples of such mechanisms might include ARP[3] and pre-configured + tables). Section 6.2 further describes cache management issues. + + For a station within a given LIS to avoid providing NHS + functionality, there must be one or more NHSs within the NBMA + subnetwork which are providing authoritative address resolution + information on its behalf. Such an NHS is said to be "serving" the + station. A station on a LIS that lacks NHS functionality and is a + client of the NHRP service is known as NHRP Client or just NHCs. If + a serving NHS is to be able to supply the address resolution + information for an NHC then NHSs must exist at each hop along all + routed paths between the NHC making the resolution request and the + destination NHC. The last NHRP entity along the routed path is the + serving NHS; that is, NHRP Resolution Requests are not forwarded to + destination NHCs but rather are processed by the serving NHS. + + An NHC also maintains a cache of protocol address to NBMA address + resolution information. This cache is populated through information + obtained from NHRP Resolution Reply packets, from manual + configuration, or through mechanisms outside the scope of this + document. + + + + + +Luciani, et. al. Standards Track [Page 5] + +RFC 2332 NBMA NHRP April 1998 + + + The protocol proceeds as follows. An event occurs triggering station + S to want to resolve the NBMA address of a path to D. This is most + likely to be when a data packet addressed to station D is to be + emitted from station S (either because station S is a host, or + station S is a transit router), but the address resolution could also + be triggered by other means (a routing protocol update packet, for + example). Station S first determines the next hop to station D + through normal routing processes (for a host, the next hop may simply + be the default router; for routers, this is the "next hop" to the + destination internetwork layer address). If the destination's + address resolution information is already available in S's cache then + that information is used to forward the packet. Otherwise, if the + next hop is reachable through one of its NBMA interfaces, S + constructs an NHRP Resolution Request packet (see Section 5.2.1) + containing station D's internetwork layer address as the (target) + destination address, S's own internetwork layer address as the source + address (Next Hop Resolution Request initiator), and station S's NBMA + addressing information. Station S may also indicate that it prefers + an authoritative NHRP Resolution Reply (i.e., station S only wishes + to receive an NHRP Resolution Reply from an NHS serving the + destination NHC). Station S emits the NHRP Resolution Request packet + towards the destination. + + If the NHRP Resolution Request is triggered by a data packet then S + may, while awaiting an NHRP Resolution Reply, choose to dispose of + the data packet in one of the following ways: + + (a) Drop the packet + (b) Retain the packet until the NHRP Resolution Reply arrives + and a more optimal path is available + (c) Forward the packet along the routed path toward D + + The choice of which of the above to perform is a local policy matter, + though option (c) is the recommended default, since it may allow data + to flow to the destination while the NBMA address is being resolved. + Note that an NHRP Resolution Request for a given destination MUST NOT + be triggered on every packet. + + When the NHS receives an NHRP Resolution Request, a check is made to + see if it serves station D. If the NHS does not serve D, the NHS + forwards the NHRP Resolution Request to another NHS. Mechanisms for + determining how to forward the NHRP Resolution Request are discussed + in Section 3. + + If this NHS serves D, the NHS resolves station D's NBMA address + information, and generates a positive NHRP Resolution Reply on D's + behalf. NHRP Resolution Replies in this scenario are always marked + as "authoritative". The NHRP Resolution Reply packet contains the + + + +Luciani, et. al. Standards Track [Page 6] + +RFC 2332 NBMA NHRP April 1998 + + + address resolution information for station D which is to be sent back + to S. Note that if station D is not on the NBMA subnetwork, the next + hop internetwork layer address will be that of the egress router + through which packets for station D are forwarded. + + A transit NHS receiving an NHRP Resolution Reply may cache the + address resolution information contained therein. To a subsequent + NHRP Resolution Request, this NHS may respond with the cached, "non- + authoritative" address resolution information if the NHS is permitted + to do so (see Sections 5.2.2 and 6.2 for more information on non- + authoritative versus authoritative NHRP Resolution Replies). Non- + authoritative NHRP Resolution Replies are distinguished from + authoritative NHRP Resolution Replies so that if a communication + attempt based on non-authoritative information fails, a source + station can choose to send an authoritative NHRP Resolution Request. + NHSs MUST NOT respond to authoritative NHRP Resolution Requests with + cached information. + + If the determination is made that no NHS in the NBMA subnetwork can + reply to the NHRP Resolution Request for D then a negative NHRP + Resolution Reply (NAK) is returned. This occurs when (a) no next-hop + resolution information is available for station D from any NHS, or + (b) an NHS is unable to forward the NHRP Resolution Request (e.g., + connectivity is lost). + + NHRP Registration Requests, NHRP Purge Requests, NHRP Purge Replies, + and NHRP Error Indications follow a routed path in the same fashion + that NHRP Resolution Requests and NHRP Resolution Replies do. + Specifically, "requests" and "indications" follow the routed path + from Source Protocol Address (which is the address of the station + initiating the communication) to the Destination Protocol Address. + "Replies", on the other hand, follow the routed path from the + Destination Protocol Address back to the Source Protocol Address with + the following exceptions: in the case of a NHRP Registration Reply + and in the case of an NHC initiated NHRP Purge Request, the packet is + always returned via a direct VC (see Sections 5.2.4 and 5.2.5); if + one does not exists then one MUST be created. + + NHRP Requests and NHRP Replies do NOT cross the borders of a NBMA + subnetwork however further study is being done in this area (see + Section 7). Thus, the internetwork layer data traffic out of and + into an NBMA subnetwork always traverses an internetwork layer router + at its border. + + NHRP optionally provides a mechanism to send a NHRP Resolution Reply + which contains aggregated address resolution information. For + example, suppose that router X is the next hop from station S to + station D and that X is an egress router for all stations sharing an + + + +Luciani, et. al. Standards Track [Page 7] + +RFC 2332 NBMA NHRP April 1998 + + + internetwork layer address prefix with station D. When an NHRP + Resolution Reply is generated in response to a NHRP Resolution + Request, the responder may augment the internetwork layer address of + station D with a prefix length (see Section 5.2.0.1). A subsequent + (non-authoritative) NHRP Resolution Request for some destination that + shares an internetwork layer address prefix (for the number of bits + specified in the prefix length) with D may be satisfied with this + cached information. See section 6.2 regarding caching issues. + + To dynamically detect subnetwork-layer filtering in NBMA subnetworks + (e.g., X.25 closed user group facility, or SMDS address screens), to + trace the routed path that an NHRP packet takes, or to provide loop + detection and diagnostic capabilities, a "Route Record" may be + included in NHRP packets (see Sections 5.3.2 and 5.3.3). The Route + Record extensions are the NHRP Forward Transit NHS Record Extension + and the NHRP Reverse Transit NHS Record Extension. They contain the + internetwork (and subnetwork layer) addresses of all intermediate + NHSs between source and destination and between destination and + source respectively. When a source station is unable to communicate + with the responder (e.g., an attempt to open an SVC fails), it may + attempt to do so successively with other subnetwork layer addresses + in the NHRP Forward Transit NHS Record Extension until it succeeds + (if authentication policy permits such action). This approach can + find a suitable egress point in the presence of subnetwork-layer + filtering (which may be source/destination sensitive, for instance, + without necessarily creating separate logical NBMA subnetworks) or + subnetwork-layer congestion (especially in connection-oriented + media). + +3. Deployment + + NHRP Resolution Requests traverse one or more hops within an NBMA + subnetwork before reaching the station that is expected to generate a + response. Each station, including the source station, chooses a + neighboring NHS to which it will forward the NHRP Resolution Request. + The NHS selection procedure typically involves applying a destination + protocol layer address to the protocol layer routing table which + causes a routing decision to be returned. This routing decision is + then used to forward the NHRP Resolution Request to the downstream + NHS. The destination protocol layer address previously mentioned is + carried within the NHRP Resolution Request packet. Note that even + though a protocol layer address was used to acquire a routing + decision, NHRP packets are not encapsulated within a protocol layer + header but rather are carried at the NBMA layer using the + encapsulation described in Section 5. + + + + + + +Luciani, et. al. Standards Track [Page 8] + +RFC 2332 NBMA NHRP April 1998 + + + Each NHS/router examines the NHRP Resolution Request packet on its + way toward the destination. Each NHS which the NHRP packet traverses + on the way to the packet's destination might modify the packet (e.g., + updating the Forward Record extension). Ignoring error situations, + the NHRP Resolution Request eventually arrives at a station that is + to generate an NHRP Resolution Reply. This responding station + "serves" the destination. The responding station generates an NHRP + Resolution Reply using the source protocol address from within the + NHRP packet to determine where the NHRP Resolution Reply should be + sent. + + Rather than use routing to determine the next hop for an NHRP packet, + an NHS may use other applicable means (such as static configuration + information ) in order to determine to which neighboring NHSs to + forward the NHRP Resolution Request packet as long as such other + means would not cause the NHRP packet to arrive at an NHS which is + not along the routed path. The use of static configuration + information for this purpose is beyond the scope of this document. + + The NHS serving a particular destination must lie along the routed + path to that destination. In practice, this means that all egress + routers must double as NHSs serving the destinations beyond them, and + that hosts on the NBMA subnetwork are served by routers that double + as NHSs. Also, this implies that forwarding of NHRP packets within + an NBMA subnetwork requires a contiguous deployment of NHRP capable + routers. It is important that, in a given LIS/LAG which is using + NHRP, all NHSs within the LIS/LAG have at least some portion of their + resolution databases synchronized so that a packet arriving at one + router/NHS in a given LIS/LAG will be forwarded in the same fashion + as a packet arriving at a different router/NHS for the given LIS/LAG. + One method, among others, is to use the Server Cache Synchronization + Protocol (SCSP) [12]. It is RECOMMENDED that SCSP be the method used + when a LIS/LAG contains two or more router/NHSs. + + During migration to NHRP, it cannot be expected that all routers + within the NBMA subnetwork are NHRP capable. Thus, NHRP traffic + which would otherwise need to be forwarded through such routers can + be expected to be dropped due to the NHRP packet not being + recognized. In this case, NHRP will be unable to establish any + transit paths whose discovery requires the traversal of the non-NHRP + speaking routers. If the client has tried and failed to acquire a + cut through path then the client should use the network layer routed + path as a default. + + If an NBMA technology offers a group, an anycast, or a multicast + addressing feature then the NHC may be configured with such an + address (appropriate to the routing realm it participates in) which + would be assigned to all NHS serving that routing realm. This + + + +Luciani, et. al. Standards Track [Page 9] + +RFC 2332 NBMA NHRP April 1998 + + + address can then be used for establishing an initial connection to an + NHS to transmit a registration request. This address may not be used + for sending NHRP requests. The resulting VC may be used for NHRP + requests if and only if the registration response is received over + that VC, thereby indicating that one happens to have anycast + connected to an NHS serving the LIS/LAG. In the case of non- + connection oriented networks, or of multicast (rather than anycast) + addresses, the addres MUST NOT be used for sending NHRP resolution + requests. + + When an NHS "serves" an NHC, the NHS MUST send NHRP messages destined + for the NHC directly to the NHC. That is, the NHRP message MUST NOT + transit through any NHS which is not serving the NHC when the NHRP + message is currently at an NHS which does serve the NHC (this, of + course, assumes the NHRP message is destined for the NHC). Further, + an NHS which serves an NHC SHOULD have a direct NBMA level connection + to that NHC (see Section 5.2.3 and 5.2.4 for examples). + + With the exception of NHRP Registration Requests (see Section 5.2.3 + and 5.2.4 for details of the NHRP Registration Request case), an NHC + MUST send NHRP messages over a direct NBMA level connection between + the serving NHS and the served NHC. + + It may not be desirable to maintain semi-permanent NBMA level + connectivity between the NHC and the NHS. In this case, when NBMA + level connectivity is initially setup between the NHS and the NHC (as + described in Section 5.2.4), the NBMA address of the NHS should be + obtained through the NBMA level signaling technology. This address + should be stored for future use in setting up subsequent NBMA level + connections. A somewhat more information rich technique to obtain + the address information (and more) of the serving NHS would be for + the NHC to include the Responder Address extension (see Section + 5.3.1) in the NHRP Registration Request and to store the information + returned to the NHC in the Responder Address extension which is + subsequently included in the NHRP Registration Reply. Note also + that, in practice, a client's default router should also be its NHS; + thus a client may be able to know the NBMA address of its NHS from + the configuration which was already required for the client to be + able to communicate. Further, as mentioned in Section 4, NHCs may be + configured with the addressing information of one or more NHSs. + +4. Configuration + + Next Hop Clients + + An NHC connected to an NBMA subnetwork MAY be configured with the + Protocol address(es) and NBMA address(es) of its NHS(s). The + NHS(s) will likely also represent the NHC's default or peer + + + +Luciani, et. al. Standards Track [Page 10] + +RFC 2332 NBMA NHRP April 1998 + + + routers, so their NBMA addresses may be obtained from the NHC's + existing configuration. If the NHC is attached to several + subnetworks (including logical NBMA subnetworks), the NHC should + also be configured to receive routing information from its NHS(s) + and peer routers so that it can determine which internetwork layer + networks are reachable through which subnetworks. + + Next Hop Servers + + An NHS is configured with knowledge of its own internetwork layer + and NBMA addresses. An NHS MAY also be configured with a set of + internetwork layer address prefixes that correspond to the + internetwork layer addresses of the stations it serves. The NBMA + addresses of the stations served by the NHS may be learned via NHRP + Registration packets. + + If a served NHC is attached to several subnetworks, the + router/route-server coresident with the serving NHS may also need + to be configured to advertise routing information to such NHCs. + + If an NHS acts as an egress router for stations connected to other + subnetworks than the NBMA subnetwork, the NHS must, in addition to + the above, be configured to exchange routing information between + the NBMA subnetwork and these other subnetworks. + + In all cases, routing information is exchanged using conventional + intra-domain and/or inter-domain routing protocols. + +5. NHRP Packet Formats + + This section describes the format of NHRP packets. In the following, + unless otherwise stated explicitly, the unqualified term "request" + refers generically to any of the NHRP packet types which are + "requests". Further, unless otherwise stated explicitly, the + unqualified term "reply" refers generically to any of the NHRP packet + types which are "replies". + + An NHRP packet consists of a Fixed Part, a Mandatory Part, and an + Extensions Part. The Fixed Part is common to all NHRP packet types. + The Mandatory Part MUST be present, but varies depending on packet + type. The Extensions Part also varies depending on packet type, and + need not be present. + + The length of the Fixed Part is fixed at 20 octets. The length of + the Mandatory Part is determined by the contents of the extensions + offset field (ar$extoff). If ar$extoff=0x0 then the mandatory part + length is equal to total packet length (ar$pktsz) minus 20 otherwise + the mandatory part length is equal to ar$extoff minus 20. The length + + + +Luciani, et. al. Standards Track [Page 11] + +RFC 2332 NBMA NHRP April 1998 + + + of the Extensions Part is implied by ar$pktsz minus ar$extoff. NHSs + may increase the size of an NHRP packet as a result of extension + processing, but not beyond the offered maximum packet size of the + NBMA network. + + NHRP packets are actually members of a wider class of address mapping + and management protocols being developed by the IETF. A specific + encapsulation, based on the native formats used on the particular + NBMA network over which NHRP is carried, indicates the generic IETF + mapping and management protocol. For example, SMDS networks always + use LLC/SNAP encapsulation at the NBMA layer [4], and an NHRP packet + is preceded by the following LLC/SNAP encapsulation: + + [0xAA-AA-03] [0x00-00-5E] [0x00-03] + + The first three octets are LLC, indicating that SNAP follows. The + SNAP OUI portion is the IANA's OUI, and the SNAP PID portion + identifies the mapping and management protocol. A field in the Fixed + Header following the encapsulation indicates that it is NHRP. + + ATM uses either LLC/SNAP encapsulation of each packet (including + NHRP), or uses no encapsulation on VCs dedicated to a single protocol + (see [7]). Frame Relay and X.25 both use NLPID/SNAP encapsulation or + identification of NHRP, using a NLPID of 0x0080 and the same SNAP + contents as above (see [8], [9]). + + Fields marked "unused" MUST be set to zero on transmission, and + ignored on receipt. + + Most packet types (ar$op.type) have both internetwork layer + protocol-independent fields and protocol-specific fields. The + protocol type/snap fields (ar$pro.type/snap) qualify the format of + the protocol-specific fields. + +5.1 NHRP Fixed Header + + The Fixed Part of the NHRP packet contains those elements of the NHRP + packet which are always present and do not vary in size with the type + of packet. + + + + + + + + + + + + +Luciani, et. al. Standards Track [Page 12] + +RFC 2332 NBMA NHRP April 1998 + + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ar$afn | ar$pro.type | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ar$pro.snap | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ar$pro.snap | ar$hopcnt | ar$pktsz | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ar$chksum | ar$extoff | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ar$op.version | ar$op.type | ar$shtl | ar$sstl | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + ar$afn + Defines the type of "link layer" addresses being carried. This + number is taken from the 'address family number' list specified in + [6]. This field has implications to the coding of ar$shtl and + ar$sstl as described below. + + ar$pro.type + field is a 16 bit unsigned integer representing the following + number space: + + 0x0000 to 0x00FF Protocols defined by the equivalent NLPIDs. + 0x0100 to 0x03FF Reserved for future use by the IETF. + 0x0400 to 0x04FF Allocated for use by the ATM Forum. + 0x0500 to 0x05FF Experimental/Local use. + 0x0600 to 0xFFFF Protocols defined by the equivalent Ethertypes. + + (based on the observations that valid Ethertypes are never smaller + than 0x600, and NLPIDs never larger than 0xFF.) + + ar$pro.snap + When ar$pro.type has a value of 0x0080, a SNAP encoded extension is + being used to encode the protocol type. This snap extension is + placed in the ar$pro.snap field. This is termed the 'long form' + protocol ID. If ar$pro != 0x0080 then the ar$pro.snap field MUST be + zero on transmit and ignored on receive. The ar$pro.type field + itself identifies the protocol being referred to. This is termed + the 'short form' protocol ID. + + In all cases, where a protocol has an assigned number in the + ar$pro.type space (excluding 0x0080) the short form MUST be used + when transmitting NHRP messages; i.e., if Ethertype or NLPID + codings exist then they are used on transmit rather than the + + + +Luciani, et. al. Standards Track [Page 13] + +RFC 2332 NBMA NHRP April 1998 + + + ethertype. If both Ethertype and NLPID codings exist then when + transmitting NHRP messages, the Ethertype coding MUST be used (this + is consistent with RFC 1483 coding). So, for example, the + following codings exist for IP: + + SNAP: ar$pro.type = 0x00-80, ar$pro.snap = 0x00-00-00-08-00 + NLPID: ar$pro.type = 0x00-CC, ar$pro.snap = 0x00-00-00-00-00 + Ethertype: ar$pro.type = 0x08-00, ar$pro.snap = 0x00-00-00-00-00 + + and thus, since the Ethertype coding exists, it is used in + preference. + + ar$hopcnt + The Hop count indicates the maximum number of NHSs that an NHRP + packet is allowed to traverse before being discarded. This field + is used in a similar fashion to the way that a TTL is used in an IP + packet and should be set accordingly. Each NHS decrements the TTL + as the NHRP packet transits the NHS on the way to the next hop + along the routed path to the destination. If an NHS receives an + NHRP packet which it would normally forward to a next hop and that + packet contains an ar$hopcnt set to zero then the NHS sends an + error indication message back to the source protocol address + stating that the hop count has been exceeded (see Section 5.2.7) + and the NHS drops the packet in error; however, an error + indication is never sent as a result of receiving an error + indication. When a responding NHS replies to an NHRP request, that + NHS places a value in ar$hopcnt as if it were sending a request of + its own. + + ar$pktsz + The total length of the NHRP packet, in octets (excluding link + layer encapsulation). + + ar$chksum + The standard IP checksum over the entire NHRP packet starting at + the fixed header. If the packet is an odd number of bytes in + length then this calculation is performed as if a byte set to 0x00 + is appended to the end of the packet. + + ar$extoff + This field identifies the existence and location of NHRP + extensions. If this field is 0 then no extensions exist otherwise + this field represents the offset from the beginning of the NHRP + packet (i.e., starting from the ar$afn field) of the first + extension. + + + + + + +Luciani, et. al. Standards Track [Page 14] + +RFC 2332 NBMA NHRP April 1998 + + + ar$op.version + This field indicates what version of generic address mapping and + management protocol is represented by this message. + + 0 MARS protocol [11]. + 1 NHRP as defined in this document. + 0x02 - 0xEF Reserved for future use by the IETF. + 0xF0 - 0xFE Allocated for use by the ATM Forum. + 0xFF Experimental/Local use. + + ar$op.type + When ar$op.version == 1, this is the NHRP packet type: NHRP + Resolution Request(1), NHRP Resolution Reply(2), NHRP Registration + Request(3), NHRP Registration Reply(4), NHRP Purge Request(5), NHRP + Purge Reply(6), or NHRP Error Indication(7). Use of NHRP packet + Types in the range 128 to 255 are reserved for research or use in + other protocol development and will be administered by IANA as + described in Section 9. + + ar$shtl + Type & length of source NBMA address interpreted in the context of + the 'address family number'[6] indicated by ar$afn. See below for + more details. + + ar$sstl + Type & length of source NBMA subaddress interpreted in the context + of the 'address family number'[6] indicated by ar$afn. When an + NBMA technology has no concept of a subaddress, the subaddress + length is always coded ar$sstl = 0 and no storage is allocated for + the subaddress in the appropriate mandatory part. See below for + more details. + + Subnetwork layer address type/length fields (e.g., ar$shtl, Cli Addr + T/L) and subnetwork layer subaddresses type/length fields (e.g., + ar$sstl, Cli SAddr T/L) are coded as follows: + + 7 6 5 4 3 2 1 0 + +-+-+-+-+-+-+-+-+ + |0|x| length | + +-+-+-+-+-+-+-+-+ + + The most significant bit is reserved and MUST be set to zero. The + second most significant bit (x) is a flag indicating whether the + address being referred to is in: + + - NSAP format (x = 0). + - Native E.164 format (x = 1). + + + + +Luciani, et. al. Standards Track [Page 15] + +RFC 2332 NBMA NHRP April 1998 + + + For NBMA technologies that use neither NSAP nor E.164 format + addresses, x = 0 SHALL be used to indicate the native form for the + particular NBMA technology. + + If the NBMA network is ATM and a subaddress (e.g., Source NBMA + SubAddress, Client NBMA SubAddress) is to be included in any part of + the NHRP packet then ar$afn MUST be set to 0x000F; further, the + subnetwork layer address type/length fields (e.g., ar$shtl, Cli Addr + T/L) and subnetwork layer subaddress type/length fields (e.g., + ar$sstl, Cli SAddr T/L) MUST be coded as in [11]. If the NBMA + network is ATM and no subaddress field is to be included in any part + of the NHRP packet then ar$afn MAY be set to 0x0003 (NSAP) or 0x0008 + (E.164) accordingly. + + The bottom 6 bits is an unsigned integer value indicating the length + of the associated NBMA address in octets. If this value is zero the + flag x is ignored. + +5.2.0 Mandatory Part + + The Mandatory Part of the NHRP packet contains the operation specific + information (e.g., NHRP Resolution Request/Reply, etc.) and variable + length data which is pertinent to the packet type. + +5.2.0.1 Mandatory Part Format + + Sections 5.2.1 through 5.2.6 have a very similar mandatory part. + This mandatory part includes a common header and zero or more Client + Information Entries (CIEs). Section 5.2.7 has a different format + which is specified in that section. + + The common header looks like the following: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Src Proto Len | Dst Proto Len | Flags | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Request ID | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Source NBMA Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Source NBMA Subaddress (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Source Protocol Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Destination Protocol Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + +Luciani, et. al. Standards Track [Page 16] + +RFC 2332 NBMA NHRP April 1998 + + + And the CIEs have the following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Code | Prefix Length | unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Maximum Transmission Unit | Holding Time | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cli Addr T/L | Cli SAddr T/L | Cli Proto Len | Preference | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Client NBMA Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Client NBMA Subaddress (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Client Protocol Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + ..................... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Code | Prefix Length | unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Maximum Transmission Unit | Holding Time | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cli Addr T/L | Cli SAddr T/L | Cli Proto Len | Preference | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Client NBMA Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Client NBMA Subaddress (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Client Protocol Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The meanings of the fields are as follows: + + Src Proto Len + This field holds the length in octets of the Source Protocol + Address. + + Dst Proto Len + This field holds the length in octets of the Destination Protocol + Address. + + Flags + These flags are specific to the given message type and they are + explained in each section. + + + + + + +Luciani, et. al. Standards Track [Page 17] + +RFC 2332 NBMA NHRP April 1998 + + + Request ID + A value which, when coupled with the address of the source, + provides a unique identifier for the information contained in a + "request" packet. This value is copied directly from an "request" + packet into the associated "reply". When a sender of a "request" + receives "reply", it will compare the Request ID and source address + information in the received "reply" against that found in its + outstanding "request" list. When a match is found then the + "request" is considered to be acknowledged. + + The value is taken from a 32 bit counter that is incremented each + time a new "request" is transmitted. The same value MUST be used + when resending a "request", i.e., when a "reply" has not been + received for a "request" and a retry is sent after an appropriate + interval. + + It is RECOMMENDED that the initial value for this number be 0. A + node MAY reuse a sequence number if and only if the reuse of the + sequence number is not precluded by use of a particular method of + synchronization (e.g., as described in Appendix A). + + The NBMA address/subaddress form specified below allows combined + E.164/NSAPA form of NBMA addressing. For NBMA technologies without a + subaddress concept, the subaddress field is always ZERO length and + ar$sstl = 0. + + Source NBMA Address + The Source NBMA address field is the address of the source station + which is sending the "request". If the field's length as specified + in ar$shtl is 0 then no storage is allocated for this address at + all. + + Source NBMA SubAddress + The Source NBMA subaddress field is the address of the source + station which is sending the "request". If the field's length as + specified in ar$sstl is 0 then no storage is allocated for this + address at all. + + For those NBMA technologies which have a notion of "Calling Party + Addresses", the Source NBMA Addresses above are the addresses used + when signaling for an SVC. + + "Requests" and "indications" follow the routed path from Source + Protocol Address to the Destination Protocol Address. "Replies", on + the other hand, follow the routed path from the Destination Protocol + Address back to the Source Protocol Address with the following + + + + + +Luciani, et. al. Standards Track [Page 18] + +RFC 2332 NBMA NHRP April 1998 + + + exceptions: in the case of a NHRP Registration Reply and in the case + of an NHC initiated NHRP Purge Request, the packet is always returned + via a direct VC (see Sections 5.2.4 and 5.2.5). + + Source Protocol Address + This is the protocol address of the station which is sending the + "request". This is also the protocol address of the station toward + which a "reply" packet is sent. + + Destination Protocol Address + This is the protocol address of the station toward which a + "request" packet is sent. + + Code + This field is message specific. See the relevant message sections + below. In general, this field is a NAK code; i.e., when the field + is 0 in a reply then the packet is acknowledging a request and if + it contains any other value the packet contains a negative + acknowledgment. + + Prefix Length + This field is message specific. See the relevant message sections + below. In general, however, this fields is used to indicate that + the information carried in an NHRP message pertains to an + equivalence class of internetwork layer addresses rather than just + a single internetwork layer address specified. All internetwork + layer addresses that match the first "Prefix Length" bit positions + for the specific internetwork layer address are included in the + equivalence class. If this field is set to 0x00 then this field + MUST be ignored and no equivalence information is assumed (note + that 0x00 is thus equivalent to 0xFF). + + Maximum Transmission Unit + This field gives the maximum transmission unit for the relevant + client station. If this value is 0 then either the default MTU is + used or the MTU negotiated via signaling is used if such + negotiation is possible for the given NBMA. + + Holding Time + The Holding Time field specifies the number of seconds for which + the Next Hop NBMA information specified in the CIE is considered to + be valid. Cached information SHALL be discarded when the holding + time expires. This field must be set to 0 on a NAK. + + + + + + + + +Luciani, et. al. Standards Track [Page 19] + +RFC 2332 NBMA NHRP April 1998 + + + Cli Addr T/L + Type & length of next hop NBMA address specified in the CIE. This + field is interpreted in the context of the 'address family + number'[6] indicated by ar$afn (e.g., ar$afn=0x0003 for ATM). + + Cli SAddr T/L + Type & length of next hop NBMA subaddress specified in the CIE. + This field is interpreted in the context of the 'address family + number'[6] indicated by ar$afn (e.g., ar$afn=0x0015 for ATM makes + the address an E.164 and the subaddress an ATM Forum NSAP address). + When an NBMA technology has no concept of a subaddress, the + subaddress is always null with a length of 0. When the address + length is specified as 0 no storage is allocated for the address. + + Cli Proto Len + This field holds the length in octets of the Client Protocol + Address specified in the CIE. + + Preference + This field specifies the preference for use of the specific CIE + relative to other CIEs. Higher values indicate higher preference. + Action taken when multiple CIEs have equal or highest preference + value is a local matter. + + Client NBMA Address + This is the client's NBMA address. + + Client NBMA SubAddress + This is the client's NBMA subaddress. + + Client Protocol Address + This is the client's internetworking layer address specified. + + Note that an NHS may cache source address binding information from an + NHRP Resolution Request if and only if the conditions described in + Section 6.2 are met for the NHS. In all other cases, source address + binding information appearing in an NHRP message MUST NOT be cached. + +5.2.1 NHRP Resolution Request + + The NHRP Resolution Request packet has a Type code of 1. Its + mandatory part is coded as described in Section 5.2.0.1 and the + message specific meanings of the fields are as follows: + + Flags - The flags field is coded as follows: + + + + + + +Luciani, et. al. Standards Track [Page 20] + +RFC 2332 NBMA NHRP April 1998 + + + 0 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |Q|A|D|U|S| unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Q + Set if the station sending the NHRP Resolution Request is a + router; clear if the it is a host. + + A + This bit is set in a NHRP Resolution Request if only + authoritative next hop information is desired and is clear + otherwise. See the NHRP Resolution Reply section below for + further details on the "A" bit and its usage. + + D + Unused (clear on transmit) + + U + This is the Uniqueness bit. This bit aids in duplicate address + detection. When this bit is set in an NHRP Resolution Request + and one or more entries exist in the NHS cache which meet the + requirements of the NHRP Resolution Request then only the CIE in + the NHS's cache with this bit set will be returned. Note that + even if this bit was set at registration time, there may still be + multiple CIEs that might fulfill the NHRP Resolution Request + because an entire subnet can be registered through use of the + Prefix Length in the CIE and the address of interest might be + within such a subnet. If the "uniqueness" bit is set and the + responding NHS has one or more cache entries which match the + request but no such cache entry has the "uniqueness" bit set, + then the NHRP Resolution Reply returns with a NAK code of "13 - + Binding Exists But Is Not Unique" and no CIE is included. If a + client wishes to receive non- unique Next Hop Entries, then + the client must have the "uniqueness" bit set to zero in its NHRP + Resolution Request. Note that when this bit is set in an NHRP + Registration Request, only a single CIE may be specified in the + NHRP Registration Request and that CIE must have the Prefix + Length field set to 0xFF. + + S + Set if the binding between the Source Protocol Address and the + Source NBMA information in the NHRP Resolution Request is + guaranteed to be stable and accurate (e.g., these addresses are + those of an ingress router which is connected to an ethernet stub + network or the NHC is an NBMA attached host). + + + + +Luciani, et. al. Standards Track [Page 21] + +RFC 2332 NBMA NHRP April 1998 + + + Zero or one CIEs (see Section 5.2.0.1) may be specified in an NHRP + Resolution Request. If one is specified then that entry carries the + pertinent information for the client sourcing the NHRP Resolution + Request. Usage of the CIE in the NHRP Resolution Request is + described below: + + Prefix Length + If a CIE is specified in the NHRP Resolution Request then the + Prefix Length field may be used to qualify the widest acceptable + prefix which may be used to satisfy the NHRP Resolution Request. + In the case of NHRP Resolution Request/Reply, the Prefix Length + specifies the equivalence class of addresses which match the + first "Prefix Length" bit positions of the Destination Protocol + Address. If the "U" bit is set in the common header then this + field MUST be set to 0xFF. + + Maximum Transmission Unit + This field gives the maximum transmission unit for the source + station. A possible use of this field in the NHRP Resolution + Request packet is for the NHRP Resolution Requester to ask for a + target MTU. + + Holding Time + The Holding Time specified in the one CIE permitted to be + included in an NHRP Resolution Request is the amount of time + which the source address binding information in the NHRP + Resolution Request is permitted to cached by transit and + responding NHSs. Note that this field may only have a non-zero + value if the S bit is set. + + All other fields in the CIE MUST be ignored and SHOULD be set to 0. + + The Destination Protocol Address in the common header of the + Mandatory Part of this message contains the protocol address of the + station for which resolution is desired. An NHC MUST send the NHRP + Resolution Request directly to one of its serving NHSs (see Section 3 + for more information). + +5.2.2 NHRP Resolution Reply + + The NHRP Resolution Reply packet has a Type code of 2. CIEs + correspond to Next Hop Entries in an NHS's cache which match the + criteria in the NHRP Resolution Request. Its mandatory part is coded + as described in Section 5.2.0.1. The message specific meanings of + the fields are as follows: + + Flags - The flags field is coded as follows: + + + + +Luciani, et. al. Standards Track [Page 22] + +RFC 2332 NBMA NHRP April 1998 + + + 0 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |Q|A|D|U|S| unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Q + Copied from the NHRP Resolution Request. Set if the NHRP + Resolution Requester is a router; clear if it is a host. + + A + Set if the next hop CIE in the NHRP Resolution Reply is + authoritative; clear if the NHRP Resolution Reply is non- + authoritative. + + When an NHS receives a NHRP Resolution Request for authoritative + information for which it is the authoritative source, it MUST + respond with a NHRP Resolution Reply containing all and only + those next hop CIEs which are contained in the NHS's cache which + both match the criteria of the NHRP Resolution Request and are + authoritative cache entries. An NHS is an authoritative source + for a NHRP Resolution Request if the information in the NHS's + cache matches the NHRP Resolution Request criteria and that + information was obtained through a NHRP Registration Request or + through synchronization with an NHS which obtained this + information through a NHRP Registration Request. An + authoritative cache entry is one which is obtained through a NHRP + Registration Request or through synchronization with an NHS which + obtained this information through a NHRP Registration Request. + + An NHS obtains non-authoritative CIEs through promiscuous + listening to NHRP packets other than NHRP Registrations which are + directed at it. A NHRP Resolution Request which indicates a + request for non-authoritative information should cause a NHRP + Resolution Reply which contains all entries in the replying NHS's + cache (i.e., both authoritative and non-authoritative) which + match the criteria specified in the request. + + D + Set if the association between destination and the associate next + hop information included in all CIEs of the NHRP Resolution Reply + is guaranteed to be stable for the lifetime of the information + (the holding time). This is the case if the Next Hop protocol + address in a CIE identifies the destination (though it may be + different in value than the Destination address if the + destination system has multiple addresses) or if the destination + is not connected directly to the NBMA subnetwork but the egress + router to that destination is guaranteed to be stable (such as + + + +Luciani, et. al. Standards Track [Page 23] + +RFC 2332 NBMA NHRP April 1998 + + + when the destination is immediately adjacent to the egress router + through a non-NBMA interface). + + U + This is the Uniqueness bit. See the NHRP Resolution Request + section above for details. When this bit is set, only one CIE is + included since only one unique binding should exist in an NHS's + cache. + + S + Copied from NHRP Resolution Request message. + + One or more CIEs are specified in the NHRP Resolution Reply. Each CIE + contains NHRP next hop information which the responding NHS has + cached and which matches the parameters specified in the NHRP + Resolution Request. If no match is found by the NHS issuing the NHRP + Resolution Reply then a single CIE is enclosed with the a CIE Code + set appropriately (see below) and all other fields MUST be ignored + and SHOULD be set to 0. In order to facilitate the use of NHRP by + minimal client implementations, the first CIE MUST contain the next + hop with the highest preference value so that such an implementation + need parse only a single CIE. + + Code + If this field is set to zero then this packet contains a + positively acknowledged NHRP Resolution Reply. If this field + contains any other value then this message contains an NHRP + Resolution Reply NAK which means that an appropriate + internetworking layer to NBMA address binding was not available + in the responding NHS's cache. If NHRP Resolution Reply contains + a Client Information Entry with a NAK Code other than 0 then it + MUST NOT contain any other CIE. Currently defined NAK Codes are + as follows: + + 4 - Administratively Prohibited + + An NHS may refuse an NHRP Resolution Request attempt for + administrative reasons (due to policy constraints or routing + state). If so, the NHS MUST send an NHRP Resolution Reply + which contains a NAK code of 4. + + 5 - Insufficient Resources + + If an NHS cannot serve a station due to a lack of resources + (e.g., can't store sufficient information to send a purge if + routing changes), the NHS MUST reply with a NAKed NHRP + Resolution Reply which contains a NAK code of 5. + + + + +Luciani, et. al. Standards Track [Page 24] + +RFC 2332 NBMA NHRP April 1998 + + + 12 - No Internetworking Layer Address to NBMA Address Binding + Exists + + This code states that there were absolutely no internetworking + layer address to NBMA address bindings found in the responding + NHS's cache. + + 13 - Binding Exists But Is Not Unique + + This code states that there were one or more internetworking + layer address to NBMA address bindings found in the responding + NHS's cache, however none of them had the uniqueness bit set. + + Prefix Length + In the case of NHRP Resolution Reply, the Prefix Length specifies + the equivalence class of addresses which match the first "Prefix + Length" bit positions of the Destination Protocol Address. + + Holding Time + The Holding Time specified in a CIE of an NHRP Resolution Reply + is the amount of time remaining before the expiration of the + client information which is cached at the replying NHS. It is + not the value which was registered by the client. + + The remainder of the fields for the CIE for each next hop are + filled out as they were defined when the next hop was registered + with the responding NHS (or one of the responding NHS's + synchronized servers) via the NHRP Registration Request. + + Load-splitting may be performed when more than one Client Information + Entry is returned to a requester when equal preference values are + specified. Also, the alternative addresses may be used in case of + connectivity failure in the NBMA subnetwork (such as a failed call + attempt in connection-oriented NBMA subnetworks). + + Any extensions present in the NHRP Resolution Request packet MUST be + present in the NHRP Resolution Reply even if the extension is non- + Compulsory. + + If an unsolicited NHRP Resolution Reply packet is received, an Error + Indication of type Invalid NHRP Resolution Reply Received SHOULD be + sent in response. + + When an NHS that serves a given NHC receives an NHRP Resolution Reply + destined for that NHC then the NHS must MUST send the NHRP Resolution + Reply directly to the NHC (see Section 3). + + + + + +Luciani, et. al. Standards Track [Page 25] + +RFC 2332 NBMA NHRP April 1998 + + +5.2.3 NHRP Registration Request + + The NHRP Registration Request is sent from a station to an NHS to + notify the NHS of the station's NBMA information. It has a Type code + of 3. Each CIE corresponds to Next Hop information which is to be + cached at an NHS. The mandatory part of an NHRP Registration Request + is coded as described in Section 5.2.0.1. The message specific + meanings of the fields are as follows: + + Flags - The flags field is coded as follows: + + 0 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |U| unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + U + This is the Uniqueness bit. When set in an NHRP Registration + Request, this bit indicates that the registration of the protocol + address is unique within the confines of the set of synchronized + NHSs. This "uniqueness" qualifier MUST be stored in the NHS/NHC + cache. Any attempt to register a binding between the protocol + address and an NBMA address when this bit is set MUST be rejected + with a Code of "14 - Unique Internetworking Layer Address Already + Registered" if the replying NHS already has a cache entry for the + protocol address and the cache entry has the "uniqueness" bit + set. A registration of a CIE's information is rejected when the + CIE is returned with the Code field set to anything other than + 0x00. See the description of the uniqueness bit in NHRP + Resolution Request section above for further details. When this + bit is set only, only one CIE MAY be included in the NHRP + Registration Request. + + Request ID + The request ID has the same meaning as described in Section + 5.2.0.1. However, the request ID for NHRP Registrations which is + maintained at each client MUST be kept in non-volatile memory so + that when a client crashes and reregisters there will be no + inconsistency in the NHS's database. In order to reduce the + overhead associated with updating non-volatile memory, the actual + updating need not be done with every increment of the Request ID + but could be done, for example, every 50 or 100 increments. In + this scenario, when a client crashes and reregisters it knows to + add 100 to the value of the Request ID in the non-volatile memory + before using the Request ID for subsequent registrations. + + + + + +Luciani, et. al. Standards Track [Page 26] + +RFC 2332 NBMA NHRP April 1998 + + + One or more CIEs are specified in the NHRP Registration Request. + Each CIE contains next hop information which a client is attempting + to register with its servers. Generally, all fields in CIEs enclosed + in NHRP Registration Requests are coded as described in Section + 5.2.0.1. However, if a station is only registering itself with the + NHRP Registration Request then it MAY code the Cli Addr T/L, Cli + SAddr T/L, and Cli Proto Len as zero which signifies that the client + address information is to be taken from the source information in the + common header (see Section 5.2.0.1). Below, further clarification is + given for some fields in a CIE in the context of a NHRP Registration + Request. + + Code + This field is set to 0x00 in NHRP Registration Requests. + + Prefix Length + + This field may be used in a NHRP Registration Request to register + equivalence information for the Client Protocol Address specified + in the CIE of an NHRP Registration Request In the case of NHRP + Registration Request, the Prefix Length specifies the equivalence + class of addresses which match the first "Prefix Length" bit + positions of the Client Protocol Address. If the "U" bit is set + in the common header then this field MUST be set to 0xFF. + + The NHRP Registration Request is used to register an NHC's NHRP + information with its NHSs. If an NHC is configured with the protocol + address of a serving NHS then the NHC may place the NHS's protocol + address in the Destination Protocol Address field of the NHRP + Registration Request common header otherwise the NHC must place its + own protocol address in the Destination Protocol Address field. + + When an NHS receives an NHRP Registration Request which has the + Destination Protocol Address field set to an address which belongs to + a LIS/LAG for which the NHS is serving then if the Destination + Protocol Address field is equal to the Source Protocol Address field + (which would happen if the NHC put its protocol address in the + Destination Protocol Address) or the Destination Protocol Address + field is equal to the protocol address of the NHS then the NHS + processes the NHRP Registration Request after doing appropriate error + checking (including any applicable policy checking). + + When an NHS receives an NHRP Registration Request which has the + Destination Protocol Address field set to an address which does not + belong to a LIS/LAG for which the NHS is serving then the NHS + forwards the packet down the routed path toward the appropriate + LIS/LAG. + + + + +Luciani, et. al. Standards Track [Page 27] + +RFC 2332 NBMA NHRP April 1998 + + + When an NHS receives an NHRP Registration Request which has the + Destination Protocol Address field set to an address which belongs to + a LIS/LAG for which the NHS is serving then if the Destination + Protocol Address field does not equal the Source Protocol Address + field and the Destination Protocol Address field does not equal the + protocol address of the NHS then the NHS forwards the message to the + appropriate NHS within the LIS/LAG as specified by Destination + Protocol Address field. + + It is possible that a misconfigured station will attempt to register + with the wrong NHS (i.e., one that cannot serve it due to policy + constraints or routing state). If this is the case, the NHS MUST + reply with a NAK-ed Registration Reply of type Can't Serve This + Address. + + If an NHS cannot serve a station due to a lack of resources, the NHS + MUST reply with a NAK-ed Registration Reply of type Registration + Overflow. + + In order to keep the registration entry from being discarded, the + station MUST re-send the NHRP Registration Request packet often + enough to refresh the registration, even in the face of occasional + packet loss. It is recommended that the NHRP Registration Request + packet be sent at an interval equal to one-third of the Holding Time + specified therein. + +5.2.4 NHRP Registration Reply + + The NHRP Registration Reply is sent by an NHS to a client in response + to that client's NHRP Registration Request. If the Code field of a + CIE in the NHRP Registration Reply has anything other than zero in it + then the NHRP Registration Reply is a NAK otherwise the reply is an + ACK. The NHRP Registration Reply has a Type code of 4. + + An NHRP Registration Reply is formed from an NHRP Registration + Request by changing the type code to 4, updating the CIE Code field, + and filling in the appropriate extensions if they exist. The message + specific meanings of the fields are as follows: + + Attempts to register the information in the CIEs of an NHRP + Registration Request may fail for various reasons. If this is the + case then each failed attempt to register the information in a CIE of + an NHRP Registration Request is logged in the associated NHRP + Registration Reply by setting the CIE Code field to the appropriate + error code as shown below: + + + + + + +Luciani, et. al. Standards Track [Page 28] + +RFC 2332 NBMA NHRP April 1998 + + + CIE Code + + 0 - Successful Registration + + The information in the CIE was successfully registered with the + NHS. + + 4 - Administratively Prohibited + + An NHS may refuse an NHRP Registration Request attempt for + administrative reasons (due to policy constraints or routing + state). If so, the NHS MUST send an NHRP Registration Reply + which contains a NAK code of 4. + + 5 - Insufficient Resources + + If an NHS cannot serve a station due to a lack of resources, + the NHS MUST reply with a NAKed NHRP Registration Reply which + contains a NAK code of 5. + + 14 - Unique Internetworking Layer Address Already Registered + If a client tries to register a protocol address to NBMA + address binding with the uniqueness bit on and the protocol + address already exists in the NHS's cache then if that cache + entry also has the uniqueness bit on then this NAK Code is + returned in the CIE in the NHRP Registration Reply. + + Due to the possible existence of asymmetric routing, an NHRP + Registration Reply may not be able to merely follow the routed path + back to the source protocol address specified in the common header of + the NHRP Registration Reply. As a result, there MUST exist a direct + NBMA level connection between the NHC and its NHS on which to send + the NHRP Registration Reply before NHRP Registration Reply may be + returned to the NHC. If such a connection does not exist then the + NHS must setup such a connection to the NHC by using the source NBMA + information supplied in the common header of the NHRP Registration + Request. + +5.2.5 NHRP Purge Request + + The NHRP Purge Request packet is sent in order to invalidate cached + information in a station. The NHRP Purge Request packet has a type + code of 5. The mandatory part of an NHRP Purge Request is coded as + described in Section 5.2.0.1. The message specific meanings of the + fields are as follows: + + Flags - The flags field is coded as follows: + + + + +Luciani, et. al. Standards Track [Page 29] + +RFC 2332 NBMA NHRP April 1998 + + + 0 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |N| unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + N + When set, this bit tells the receiver of the NHRP Purge Request + that the requester does not expect to receive an NHRP Purge + Reply. If an unsolicited NHRP Purge Reply is received by a + station where that station is identified in the Source Protocol + Address of the packet then that packet must be ignored. + + One or more CIEs are specified in the NHRP Purge Request. Each CIE + contains next hop information which is to be purged from an NHS/NHC + cache. Generally, all fields in CIEs enclosed in NHRP Purge Requests + are coded as described in Section 5.2.0.1. Below, further + clarification is given for some fields in a CIE in the context of a + NHRP Purge Request. + + Code + This field is set to 0x00 in NHRP Purge Requests. + + Prefix Length + + In the case of NHRP Purge Requests, the Prefix Length specifies + the equivalence class of addresses which match the first "Prefix + Length" bit positions of the Client Protocol Address specified in + the CIE. All next hop information which contains a protocol + address which matches an element of this equivalence class is to + be purged from the receivers cache. + + The Maximum Transmission Unit and Preference fields of the CIE are + coded as zero. The Holding Time should be coded as zero but there + may be some utility in supplying a "short" holding time to be + applied to the matching next hop information before that + information would be purged; this usage is for further study. The + Client Protocol Address field and the Cli Proto Len field MUST be + filled in. The Client Protocol Address is filled in with the + protocol address to be purged from the receiving station's cache + while the Cli Proto Len is set the length of the purged client's + protocol address. All remaining fields in the CIE MAY be set to + zero although the client NBMA information (and associated length + fields) MAY be specified to narrow the scope of the NHRP Purge + Request if requester desires. However, the receiver of an NHRP + Purge Request may choose to ignore the Client NBMA information if + it is supplied. + + + + +Luciani, et. al. Standards Track [Page 30] + +RFC 2332 NBMA NHRP April 1998 + + + An NHRP Purge Request packet is sent from an NHS to a station to + cause it to delete previously cached information. This is done when + the information may be no longer valid (typically when the NHS has + previously provided next hop information for a station that is not + directly connected to the NBMA subnetwork, and the egress point to + that station may have changed). + + An NHRP Purge Request packet may also be sent from an NHC to an NHS + with which the NHC had previously registered. This allows for an NHC + to invalidate its registration with NHRP before it would otherwise + expire via the holding timer. If an NHC does not have knowledge of a + protocol address of a serving NHS then the NHC must place its own + protocol address in the Destination Protocol Address field and + forward the packet along the routed path. Otherwise, the NHC must + place the protocol address of a serving NHS in this field. + + Serving NHSs may need to send one or more new NHRP Purge Requests as + a result of receiving a purge from one of their served NHCs since the + NHS may have previously responded to NHRP Resolution Requests for + that NHC's NBMA information. These purges are "new" in that they are + sourced by the NHS and not the NHC; that is, for each NHC that + previously sent a NHRP Resolution Request for the purged NHC NBMA + information, an NHRP Purge Request is sent which contains the Source + Protocol/NBMA Addresses of the NHS and the Destination Protocol + Address of the NHC which previously sent an NHRP Resolution Request + prior to the purge. + + The station sending the NHRP Purge Request MAY periodically + retransmit the NHRP Purge Request until either NHRP Purge Request is + acknowledged or until the holding time of the information being + purged has expired. Retransmission strategies for NHRP Purge Requests + are a local matter. + + When a station receives an NHRP Purge Request, it MUST discard any + previously cached information that matches the information in the + CIEs. + + An NHRP Purge Reply MUST be returned for the NHRP Purge Request even + if the station does not have a matching cache entry assuming that the + "N" bit is off in the NHRP Purge Request. + + If the station wishes to reestablish communication with the + destination shortly after receiving an NHRP Purge Request, it should + make an authoritative NHRP Resolution Request in order to avoid any + stale cache entries that might be present in intermediate NHSs (See + section 6.2.2.). It is recommended that authoritative NHRP + Resolution Requests be made for the duration of the holding time of + the old information. + + + +Luciani, et. al. Standards Track [Page 31] + +RFC 2332 NBMA NHRP April 1998 + + +5.2.6 NHRP Purge Reply + + The NHRP Purge Reply packet is sent in order to assure the sender of + an NHRP Purge Request that all cached information of the specified + type has been purged from the station sending the reply. The NHRP + Purge Reply has a type code of 6. + + An NHRP Purge Reply is formed from an NHRP Purge Request by merely + changing the type code in the request to 6. The packet is then + returned to the requester after filling in the appropriate extensions + if they exist. + +5.2.7 NHRP Error Indication + + The NHRP Error Indication is used to convey error indications to the + sender of an NHRP packet. It has a type code of 7. The Mandatory + Part has the following format: + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Src Proto Len | Dst Proto Len | unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Error Code | Error Offset | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Source NBMA Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Source NBMA Subaddress (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Source Protocol Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Destination Protocol Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Contents of NHRP Packet in error (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Src Proto Len + This field holds the length in octets of the Source Protocol + Address. + + Dst Proto Len + This field holds the length in octets of the Destination Protocol + Address. + + + + + + + +Luciani, et. al. Standards Track [Page 32] + +RFC 2332 NBMA NHRP April 1998 + + + Error Code + An error code indicating the type of error detected, chosen from + the following list: + + 1 - Unrecognized Extension + + When the Compulsory bit of an extension in NHRP packet is set, + the NHRP packet cannot be processed unless the extension has + been processed. The responder MUST return an NHRP Error + Indication of type Unrecognized Extension if it is incapable of + processing the extension. However, if a transit NHS (one which + is not going to generate a reply) detects an unrecognized + extension, it SHALL ignore the extension. + + 3 - NHRP Loop Detected + + A Loop Detected error is generated when it is determined that + an NHRP packet is being forwarded in a loop. + + 6 - Protocol Address Unreachable + + This error occurs when a packet it moving along the routed path + and it reaches a point such that the protocol address of + interest is not reachable. + + 7 - Protocol Error + + A generic packet processing error has occurred (e.g., invalid + version number, invalid protocol type, failed checksum, etc.) + + 8 - NHRP SDU Size Exceeded + + If the SDU size of the NHRP packet exceeds the MTU size of the + NBMA network then this error is returned. + + 9 - Invalid Extension + + If an NHS finds an extension in a packet which is inappropriate + for the packet type, an error is sent back to the sender with + Invalid Extension as the code. + + 10 - Invalid NHRP Resolution Reply Received + + If a client receives a NHRP Resolution Reply for a Next Hop + Resolution Request which it believes it did not make then an + error packet is sent to the station making the reply with an + error code of Invalid Reply Received. + + + + +Luciani, et. al. Standards Track [Page 33] + +RFC 2332 NBMA NHRP April 1998 + + + 11 - Authentication Failure + + If a received packet fails an authentication test then this + error is returned. + + 15 - Hop Count Exceeded + + The hop count which was specified in the Fixed Header of an + NHRP message has been exceeded. + + Error Offset + The offset in octets into the original NHRP packet in which an + error was detected. This offset is calculated starting from the + NHRP Fixed Header. + + Source NBMA Address + The Source NBMA address field is the address of the station which + observed the error. + + Source NBMA SubAddress + The Source NBMA subaddress field is the address of the station + which observed the error. If the field's length as specified in + ar$sstl is 0 then no storage is allocated for this address at all. + + Source Protocol Address + This is the protocol address of the station which issued the Error + packet. + + Destination Protocol Address + This is the protocol address of the station which sent the packet + which was found to be in error. + + An NHRP Error Indication packet SHALL NEVER be generated in response + to another NHRP Error Indication packet. When an NHRP Error + Indication packet is generated, the offending NHRP packet SHALL be + discarded. In no case should more than one NHRP Error Indication + packet be generated for a single NHRP packet. + + If an NHS sees its own Protocol and NBMA Addresses in the Source NBMA + and Source Protocol address fields of a transiting NHRP Error + Indication packet then the NHS will quietly drop the packet and do + nothing (this scenario would occur when the NHRP Error Indication + packet was itself in a loop). + + Note that no extensions may be added to an NHRP Error Indication. + + + + + + +Luciani, et. al. Standards Track [Page 34] + +RFC 2332 NBMA NHRP April 1998 + + +5.3 Extensions Part + + The Extensions Part, if present, carries one or more extensions in + {Type, Length, Value} triplets. + + Extensions have the following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |C|u| Type | Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Value... | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + C + "Compulsory." If clear, and the NHS does not recognize the type + code, the extension may safely be ignored. If set, and the NHS + does not recognize the type code, the NHRP "request" is considered + to be in error. (See below for details.) + + u + Unused and must be set to zero. + + Type + The extension type code (see below). The extension type is not + qualified by the Compulsory bit, but is orthogonal to it. + + Length + The length in octets of the value (not including the Type and + Length fields; a null extension will have only an extension header + and a length of zero). + + When extensions exist, the extensions list is terminated by the Null + TLV, having Type = 0 and Length = 0. + + Extensions may occur in any order, but any particular extension type + may occur only once in an NHRP packet unless explicitly stated to the + contrary in the extensions definition. For example, the vendor- + private extension may occur multiple times in a packet in order to + allow for extensions which do not share the same vendor ID to be + represented. It is RECOMMENDED that a given vendor include no more + than one Vendor Private Extension. + + An NHS MUST NOT change the order of extensions. That is, the order + of extensions placed in an NHRP packet by an NHC (or by an NHS when + an NHS sources a packet) MUST be preserved as the packet moves + between NHSs. Minimal NHC implementations MUST only recognize, but + + + +Luciani, et. al. Standards Track [Page 35] + +RFC 2332 NBMA NHRP April 1998 + + + not necessarily parse, the Vendor Private extension and the End Of + Extensions extension. Extensions are only present in a "reply" if + they were present in the corresponding "request" with the exception + of Vendor Private extensions. The previous statement is not intended + to preclude the creation of NHS-only extensions which might be added + to and removed from NHRP packets by the same NHS; such extensions + MUST not be propagated to NHCs. + + The Compulsory bit provides for a means to add to the extension set. + If the bit is set in an extension then the station responding to the + NHRP message which contains that extension MUST be able to understand + the extension (in this case, the station responding to the message is + the station that would issue an NHRP reply in response to a NHRP + request). As a result, the responder MUST return an NHRP Error + Indication of type Unrecognized Extension. If the Compulsory bit is + clear then the extension can be safely ignored; however, if an + ignored extension is in a "request" then it MUST be returned, + unchanged, in the corresponding "reply" packet type. + + If a transit NHS (one which is not going to generate a "reply") + detects an unrecognized extension, it SHALL ignore the extension. If + the Compulsory bit is set, the transit NHS MUST NOT cache the + information contained in the packet and MUST NOT identify itself as + an egress router (in the Forward Record or Reverse Record + extensions). Effectively, this means, if a transit NHS encounters an + extension which it cannot process and which has the Compulsory bit + set then that NHS MUST NOT participate in any way in the protocol + exchange other than acting as a forwarding agent. + + The NHRP extension Type space is subdivided to encourage use outside + the IETF. + + 0x0000 - 0x0FFF Reserved for NHRP. + 0x1000 - 0x11FF Allocated to the ATM Forum. + 0x1200 - 0x37FF Reserved for the IETF. + 0x3800 - 0x3FFF Experimental use. + + IANA will administer the ranges reserved for the IETF as described in + Section 9. Values in the 'Experimental use' range have only local + significance. + +5.3.0 The End Of Extensions + + Compulsory = 1 + Type = 0 + Length = 0 + + + + + +Luciani, et. al. Standards Track [Page 36] + +RFC 2332 NBMA NHRP April 1998 + + + When extensions exist, the extensions list is terminated by the End + Of Extensions/Null TLV. + +5.3.1 Responder Address Extension + + Compulsory = 1 + Type = 3 + Length = variable + + This extension is used to determine the address of the NHRP + responder; i.e., the entity that generates the appropriate "reply" + packet for a given "request" packet. In the case of an NHRP + Resolution Request, the station responding may be different (in the + case of cached replies) than the system identified in the Next Hop + field of the NHRP Resolution Reply. Further, this extension may aid + in detecting loops in the NHRP forwarding path. + + This extension uses a single CIE with the extension specific meanings + of the fields set as follows: + + The Prefix Length fields MUST be set to 0 and ignored. + + CIE Code + 5 - Insufficient Resources + If the responder to an NHRP Resolution Request is an egress point + for the target of the address resolution request (i.e., it is one + of the stations identified in the list of CIEs in an NHRP + Resolution Reply) and the Responder Address extension is included + in the NHRP Resolution Request and insufficient resources to + setup a cut-through VC exist at the responder then the Code field + of the Responder Address Extension is set to 5 in order to tell + the client that a VC setup attempt would in all likelihood be + rejected; otherwise this field MUST be coded as a zero. NHCs MAY + use this field to influence whether they attempt to setup a cut- + through to the egress router. + + Maximum Transmission Unit + This field gives the maximum transmission unit preferred by the + responder. If this value is 0 then either the default MTU is used + or the MTU negotiated via signaling is used if such negotiation is + possible for the given NBMA. + + Holding Time + The Holding Time field specifies the number of seconds for which + the NBMA information of the responser is considered to be valid. + Cached information SHALL be discarded when the holding time + expires. + + + + +Luciani, et. al. Standards Track [Page 37] + +RFC 2332 NBMA NHRP April 1998 + + + "Client Address" information is actually "Responder Address" + information for this extension. Thus, for example, Cli Addr T/L is + the responder NBMA address type and length field. + + If a "requester" desires this information, the "requester" SHALL + include this extension with a value of zero. Note that this implies + that no storage is allocated for the Holding Time and Type/Length + fields until the "Value" portion of the extension is filled out. + + If an NHS is generating a "reply" packet in response to a "request" + containing this extension, the NHS SHALL include this extension, + containing its protocol address in the "reply". If an NHS has more + than one protocol address, it SHALL use the same protocol address + consistently in all of the Responder Address, Forward Transit NHS + Record, and Reverse Transit NHS Record extensions. The choice of + which of several protocol address to include in this extension is a + local matter. + + If an NHRP Resolution Reply packet being forwarded by an NHS contains + a protocol address of that NHS in the Responder Address Extension + then that NHS SHALL generate an NHRP Error Indication of type "NHRP + Loop Detected" and discard the NHRP Resolution Reply. + + If an NHRP Resolution Reply packet is being returned by an + intermediate NHS based on cached data, it SHALL place its own address + in this extension (differentiating it from the address in the Next + Hop field). + +5.3.2 NHRP Forward Transit NHS Record Extension + + Compulsory = 1 + Type = 4 + Length = variable + + The NHRP Forward Transit NHS record contains a list of transit NHSs + through which a "request" has traversed. Each NHS SHALL append to + the extension a Forward Transit NHS element (as specified below) + containing its Protocol address. The extension length field and the + ar$chksum fields SHALL be adjusted appropriately. + + The responding NHS, as described in Section 5.3.1, SHALL NOT update + this extension. + + In addition, NHSs that are willing to act as egress routers for + packets from the source to the destination SHALL include information + about their NBMA Address. + + + + + +Luciani, et. al. Standards Track [Page 38] + +RFC 2332 NBMA NHRP April 1998 + + + This extension uses a single CIE per NHS Record element with the + extension specific meanings of the fields set as follows: + + The Prefix Length fields MUST be set to 0 and ignored. + + CIE Code + 5 - Insufficient Resources + If an NHRP Resolution Request contains an NHRP Forward Transit + NHS Record Extension and insufficient resources to setup a cut- + through VC exist at the current transit NHS then the CIE Code + field for NHRP Forward Transit NHS Record Extension is set to 5 + in order to tell the client that a VC setup attempt would in all + likelihood be rejected; otherwise this field MUST be coded as a + zero. NHCs MAY use this field to influence whether they attempt + to setup a cut-through as described in Section 2.2. Note that + the NHRP Reverse Transit NHS Record Extension MUST always have + this field set to zero. + + Maximum Transmission Unit + This field gives the maximum transmission unit preferred by the + transit NHS. If this value is 0 then either the default MTU is + used or the MTU negotiated via signaling is used if such + negotiation is possible for the given NBMA. + + Holding Time + The Holding Time field specifies the number of seconds for which + the NBMA information of the transit NHS is considered to be valid. + Cached information SHALL be discarded when the holding time + expires. + + "Client Address" information is actually "Forward Transit NHS + Address" information for this extension. Thus, for example, Cli Addr + T/L is the transit NHS NBMA address type and length field. + + If a "requester" wishes to obtain this information, it SHALL include + this extension with a length of zero. Note that this implies that no + storage is allocated for the Holding Time and Type/Length fields + until the "Value" portion of the extension is filled out. + + If an NHS has more than one Protocol address, it SHALL use the same + Protocol address consistently in all of the Responder Address, + Forward NHS Record, and Reverse NHS Record extensions. The choice of + which of several Protocol addresses to include in this extension is a + local matter. + + + + + + + +Luciani, et. al. Standards Track [Page 39] + +RFC 2332 NBMA NHRP April 1998 + + + If a "request" that is being forwarded by an NHS contains the + Protocol Address of that NHS in one of the Forward Transit NHS + elements then the NHS SHALL generate an NHRP Error Indication of type + "NHRP Loop Detected" and discard the "request". + +5.3.3 NHRP Reverse Transit NHS Record Extension + + Compulsory = 1 + Type = 5 + Length = variable + + The NHRP Reverse Transit NHS record contains a list of transit NHSs + through which a "reply" has traversed. Each NHS SHALL append a + Reverse Transit NHS element (as specified below) containing its + Protocol address to this extension. The extension length field and + ar$chksum SHALL be adjusted appropriately. + + The responding NHS, as described in Section 5.3.1, SHALL NOT update + this extension. + + In addition, NHSs that are willing to act as egress routers for + packets from the source to the destination SHALL include information + about their NBMA Address. + + This extension uses a single CIE per NHS Record element with the + extension specific meanings of the fields set as follows: + + The CIE Code and Prefix Length fields MUST be set to 0 and ignored. + + Maximum Transmission Unit + This field gives the maximum transmission unit preferred by the + transit NHS. If this value is 0 then either the default MTU is + used or the MTU negotiated via signaling is used if such + negotiation is possible for the given NBMA. + + Holding Time + The Holding Time field specifies the number of seconds for which + the NBMA information of the transit NHS is considered to be valid. + Cached information SHALL be discarded when the holding time + expires. + + "Client Address" information is actually "Reverse Transit NHS + Address" information for this extension. Thus, for example, Cli Addr + T/L is the transit NHS NBMA address type and length field. + + + + + + + +Luciani, et. al. Standards Track [Page 40] + +RFC 2332 NBMA NHRP April 1998 + + + If a "requester" wishes to obtain this information, it SHALL include + this extension with a length of zero. Note that this implies that no + storage is allocated for the Holding Time and Type/Length fields + until the "Value" portion of the extension is filled out. + + If an NHS has more than one Protocol address, it SHALL use the same + Protocol address consistently in all of the Responder Address, + Forward NHS Record, and Reverse NHS Record extensions. The choice of + which of several Protocol addresses to include in this extension is a + local matter. + + If a "reply" that is being forwarded by an NHS contains the Protocol + Address of that NHS in one of the Reverse Transit NHS elements then + the NHS SHALL generate an NHRP Error Indication of type "NHRP Loop + Detected" and discard the "reply". + + Note that this information may be cached at intermediate NHSs; if + so, the cached value SHALL be used when generating a reply. + +5.3.4 NHRP Authentication Extension + + Compulsory = 1 Type = 7 Length = variable + + The NHRP Authentication Extension is carried in NHRP packets to + convey authentication information between NHRP speakers. The + Authentication Extension may be included in any NHRP "request" or + "reply" only. + + The authentication is always done pairwise on an NHRP hop-by-hop + basis; i.e., the authentication extension is regenerated at each + hop. If a received packet fails the authentication test, the station + SHALL generate an Error Indication of type "Authentication Failure" + and discard the packet. Note that one possible authentication failure + is the lack of an Authentication Extension; the presence or absence + of the Authentication Extension is a local matter. + +5.3.4.1 Header Format + + The authentication header has the following format: + + + + + + + + + + + + +Luciani, et. al. Standards Track [Page 41] + +RFC 2332 NBMA NHRP April 1998 + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Reserved | Security Parameter Index (SPI)| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Src Addr... | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + +-+-+-+-+-+-+-+-+-+-+ Authentication Data... -+-+-+-+-+-+-+-+-+-+ + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Security Parameter Index (SPI) can be thought of as an index into a + table that maintains the keys and other information such as hash + algorithm. Src and Dst communicate either offline using manual keying + or online using a key management protocol to populate this table. The + sending NHRP entity always allocates the SPI and the parameters + associated with it. + + Src Addr a variable length field is the address assigned to the + outgoing interface. The length of the addr is obtained from the + source protocol length field in the mandatory part of the NHRP + header. The tuple uniquely identifies the key and + other parameters that are used in authentication. + + The length of the authentication data field is dependent on the hash + algorithm used. The data field contains the keyed hash calculated + over the entire NHRP payload. The authentication data field is zeroed + out before the hash is calculated. + +5.3.4.2 SPI and Security Parameters Negotiation + + SPI's can be negotiated either manually or using an Internet Key + Management protocol. Manual keying MUST be supported. The following + parameters are associated with the tuple - lifetime, + Algorithm, Key. Lifetime indicates the duration in seconds for which + the key is valid. In case of manual keying, this duration can be + infinite. Also, in order to better support manual keying, there may + be multiple tuples active at the same time (Dst being the same). + + Algorithm specifies the hash algorithm agreed upon by the two + entities. HMAC-MD5-128 [16] is the default algorithm. Other + algorithms MAY be supported by defining new values. IANA will assign + the numbers to identify the algorithm being used as described in + Section 9. + + Any Internet standard key management protocol MAY so be used to + negotiate the SPI and parameters. + + + +Luciani, et. al. Standards Track [Page 42] + +RFC 2332 NBMA NHRP April 1998 + + +5.3.4.3 Message Processing + + At the time of adding the authentication extension header, src looks + up in a table to fetch the SPI and the security parameters based on + the outgoing interface address. If there are no entries in the table + and if there is support for key management, the src initiates the key + management protocol to fetch the necessary parameters. The src + constructs the Authentication Extension payload and calculates the + hash by zeroing authentication data field. The result replaces in the + zeroed authentication data field. The src address field in the + payload is the IP address assigned to the outgoing interface. + + If key management is not supported and authentication is mandatory, + the packet is dropped and this information is logged. + + On the receiving end, dst fetches the parameters based on the SPI and + the ip address in the authentication extension payload. The + authentication data field is extracted before zeroing out to + calculate the hash. It computes the hash on the entire payload and if + the hash does not match, then an "abnormal event" has occurred. + +5.3.4.4 Security Considerations + + It is important that the keys chosen are strong as the security of + the entire system depends on the keys being chosen properly and the + correct implementation of the algorithms. + + The security is performed on a hop by hop basis. The data received + can be trusted only so much as one trusts all the entities in the + path traversed. A chain of trust is established amongst NHRP entities + in the path of the NHRP Message . If the security in an NHRP entity + is compromised, then security in the entire NHRP domain is + compromised. + + Data integrity covers the entire NHRP payload. This guarantees that + the message was not modified and the source is authenticated as well. + If authentication extension is not used or if the security is + compromised, then NHRP entities are liable to both spoofing attacks, + active attacks and passive attacks. + + There is no mechanism to encrypt the messages. It is assumed that a + standard layer 3 confidentiality mechanism will be used to encrypt + and decrypt messages. It is recommended to use an Internet standard + key management protocol to negotiate the keys between the neighbors. + Transmitting the keys in clear text, if other methods of negotiation + is used, compromises the security completely. + + + + + +Luciani, et. al. Standards Track [Page 43] + +RFC 2332 NBMA NHRP April 1998 + + + Any NHS is susceptible to Denial of Service (DOS) attacks that cause + it to become overloaded, preventing legitimate packets from being + acted upon properly. A rogue host can send request and registration + packets to the first hop NHS. If the authentication option is not + used, the registration packet is forwarded along the routed path + requiring processing along each NHS. If the authentication option is + used, then only the first hop NHS is susceptible to DOS attacks + (i.e., unauthenticated packets will be dropped rather than forwarded + on). If security of any host is compromised (i.e., the keys it is + using to communicate with an NHS become known), then a rogue host can + send NHRP packets to the first hop NHS of the host whose keys were + compromised, which will then forward them along the routed path as in + the case of unauthenticated packets. However, this attack requires + that the rogue host to have the same first hop NHS as that of the + compromised host. Finally, it should be noted that denial of service + attacks that cause routers on the routed path to expend resources + processing NHRP packets are also susceptable to attacks that flood + packets at the same destination as contained in an NHRP packet's + Destination Protocol Address field. + +5.3.5 NHRP Vendor-Private Extension + + Compulsory = 0 + Type = 8 + Length = variable + + The NHRP Vendor-Private Extension is carried in NHRP packets to + convey vendor-private information or NHRP extensions between NHRP + speakers. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Vendor ID | Data.... | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Vendor ID + 802 Vendor ID as assigned by the IEEE [6] + + Data + The remaining octets after the Vendor ID in the payload are + vendor-dependent data. + + This extension may be added to any "request" or "reply" packet and it + is the only extension that may be included multiple times. If the + receiver does not handle this extension, or does not match the Vendor + + + + + +Luciani, et. al. Standards Track [Page 44] + +RFC 2332 NBMA NHRP April 1998 + + + ID in the extension then the extension may be completely ignored by + the receiver. If a Vendor Private Extension is included in a + "request" then it must be copied to the corresponding "reply". + +6. Protocol Operation + + In this section, we discuss certain operational considerations of + NHRP. + +6.1 Router-to-Router Operation + + In practice, the initiating and responding stations may be either + hosts or routers. However, there is a possibility under certain + conditions that a stable routing loop may occur if NHRP is used + between two routers. In particular, attempting to establish an NHRP + path across a boundary where information used in route selection is + lost may result in a routing loop. Such situations include the loss + of BGP path vector information, the interworking of multiple routing + protocols with dissimilar metrics (e.g, RIP and OSPF), etc. In such + circumstances, NHRP should not be used. This situation can be + avoided if there are no "back door" paths between the entry and + egress router outside of the NBMA subnetwork. Protocol mechanisms to + relax these restrictions are under investigation. + + In general it is preferable to use mechanisms, if they exist, in + routing protocols to resolve the egress point when the destination + lies outside of the NBMA subnetwork, since such mechanisms will be + more tightly coupled to the state of the routing system and will + probably be less likely to create loops. + +6.2 Cache Management Issues + + The management of NHRP caches in the source station, the NHS serving + the destination, and any intermediate NHSs is dependent on a number + of factors. + +6.2.1 Caching Requirements + + Source Stations + + Source stations MUST cache all received NHRP Resolution Replies + that they are actively using. They also must cache "incomplete" + entries, i.e., those for which a NHRP Resolution Request has been + sent but those for which an NHRP Resolution Reply has not been + received. This is necessary in order to preserve the Request ID + + + + + + +Luciani, et. al. Standards Track [Page 45] + +RFC 2332 NBMA NHRP April 1998 + + + for retries, and provides the state necessary to avoid triggering + NHRP Resolution Requests for every data packet sent to the + destination. + + Source stations MUST purge expired information from their caches. + Source stations MUST purge the appropriate cached information upon + receipt of an NHRP Purge Request packet. + + When a station has a co-resident NHC and NHS, the co-resident NHS + may reply to NHRP Resolution Requests from the co-resident NHC with + information which the station cached as a result of the co-resident + NHC making its own NHRP Resolution Requests as long as the co- + resident NHS follows the rules for Transit NHSs as seen below. + + Serving NHSs + + The NHS serving the destination (the one which responds + authoritatively to NHRP Resolution Requests) SHOULD cache protocol + address information from all NHRP Resolution Requests to which it + has responded if the information in the NHRP Resolution Reply has + the possibility of changing during its lifetime (so that an NHRP + Purge Request packet can be issued). The internetworking to NBMA + binding information provided by the source station in the NHRP + Resolution Request may also be cached if and only if the "S" bit is + set, the NHRP Resolution Request has included a CIE with the + Holding Time field set greater than zero (this is the valid Holding + Time for the source binding), and only for non-authoritative use + for a period not to exceed the Holding Time. + + Transit NHSs + + A Transit NHS (lying along the NHRP path between the source station + and the responding NHS) may cache source binding information + contained in NHRP Resolution Request packets that it forwards if + and only if the "S" bit is set, the NHRP Resolution Request has + included a CIE with the Holding Time field set greater than zero + (this is the valid Holding Time for the source binding), and only + for non-authoritative use for a period not to exceed the Holding + Time. + + A Transit NHS may cache destination information contained in NHRP + Resolution Reply CIE if only if the D bit is set and then only for + non-authoritative use for a period not to exceed the Holding Time + value contained in the CIE. A Transit NHS MUST NOT cache source + binding information contained in an NHRP Resolution Reply. + + + + + + +Luciani, et. al. Standards Track [Page 46] + +RFC 2332 NBMA NHRP April 1998 + + + Further, a transit NHS MUST discard any cached information when the + prescribed time has expired. It may return cached information in + response to non-authoritative NHRP Resolution Requests only. + +6.2.2 Dynamics of Cached Information + + NBMA-Connected Destinations + + NHRP's most basic function is that of simple NBMA address + resolution of stations directly attached to the NBMA subnetwork. + These mappings are typically very static, and appropriately chosen + holding times will minimize problems in the event that the NBMA + address of a station must be changed. Stale information will cause + a loss of connectivity, which may be used to trigger an + authoritative NHRP Resolution Request and bypass the old data. In + the worst case, connectivity will fail until the cache entry times + out. + + This applies equally to information marked in NHRP Resolution + Replies as being "stable" (via the "D" bit). + + Destinations Off of the NBMA Subnetwork + + If the source of an NHRP Resolution Request is a host and the + destination is not directly attached to the NBMA subnetwork, and + the route to that destination is not considered to be "stable," the + destination mapping may be very dynamic (except in the case of a + subnetwork where each destination is only singly homed to the NBMA + subnetwork). As such the cached information may very likely become + stale. The consequence of stale information in this case will be a + suboptimal path (unless the internetwork has partitioned or some + other routing failure has occurred). + +6.3 Use of the Prefix Length field of a CIE + + A certain amount of care needs to be taken when using the Prefix + Length field of a CIE, in particular with regard to the prefix length + advertised (and thus the size of the equivalence class specified by + it). Assuming that the routers on the NBMA subnetwork are exchanging + routing information, it should not be possible for an NHS to create a + black hole by advertising too large of a set of destinations, but + suboptimal routing (e.g., extra internetwork layer hops through the + NBMA) can result. To avoid this situation an NHS that wants to send + the Prefix Length MUST obey the following rule: + + The NHS examines the Network Layer Reachability Information (NLRI) + associated with the route that the NHS would use to forward towards + the destination (as specified by the Destination internetwork layer + + + +Luciani, et. al. Standards Track [Page 47] + +RFC 2332 NBMA NHRP April 1998 + + + address in the NHRP Resolution Request), and extracts from this + NLRI the shortest address prefix such that: (a) the Destination + internetwork layer address (from the NHRP Resolution Request) is + covered by the prefix, (b) the NHS does not have any routes with + NLRI which form a subset of what is covered by the prefix. The + prefix may then be used in the CIE. + + The Prefix Length field of the CIE should be used with restraint, in + order to avoid NHRP stations choosing suboptimal transit paths when + overlapping prefixes are available. This document specifies the use + of the prefix length only when all the destinations covered by the + prefix are "stable". That is, either: + + (a) All destinations covered by the prefix are on the NBMA network, + or + (b) All destinations covered by the prefix are directly attached to + the NHRP responding station. + + Use of the Prefix Length field of the CIE in other circumstances is + outside the scope of this document. + +6.4 Domino Effect + + One could easily imagine a situation where a router, acting as an + ingress station to the NBMA subnetwork, receives a data packet, such + that this packet triggers an NHRP Resolution Request. If the router + forwards this data packet without waiting for an NHRP transit path to + be established, then when the next router along the path receives the + packet, the next router may do exactly the same - originate its own + NHRP Resolution Request (as well as forward the packet). In fact + such a data packet may trigger NHRP Resolution Request generation at + every router along the path through an NBMA subnetwork. We refer to + this phenomena as the NHRP "domino" effect. + + The NHRP domino effect is clearly undesirable. At best it may result + in excessive NHRP traffic. At worst it may result in an excessive + number of virtual circuits being established unnecessarily. + Therefore, it is important to take certain measures to avoid or + suppress this behavior. NHRP implementations for NHSs MUST provide a + mechanism to address this problem. One possible strategy to address + this problem would be to configure a router in such a way that NHRP + Resolution Request generation by the router would be driven only by + the traffic the router receives over its non-NBMA interfaces + (interfaces that are not attached to an NBMA subnetwork). Traffic + received by the router over its NBMA-attached interfaces would not + trigger NHRP Resolution Requests. Such a router avoids the NHRP + domino effect through administrative means. + + + + +Luciani, et. al. Standards Track [Page 48] + +RFC 2332 NBMA NHRP April 1998 + + +7. NHRP over Legacy BMA Networks + + There would appear to be no significant impediment to running NHRP + over legacy broadcast subnetworks. There may be issues around + running NHRP across multiple subnetworks. Running NHRP on broadcast + media has some interesting possibilities; especially when setting up + a cut-through for inter-ELAN inter-LIS/LAG traffic when one or both + end stations are legacy attached. This use for NHRP requires further + research. + +8. Discussion + + The result of an NHRP Resolution Request depends on how routing is + configured among the NHSs of an NBMA subnetwork. If the destination + station is directly connected to the NBMA subnetwork and the routed + path to it lies entirely within the NBMA subnetwork, the NHRP + Resolution Replies always return the NBMA address of the destination + station itself rather than the NBMA address of some egress router. + On the other hand, if the routed path exits the NBMA subnetwork, NHRP + will be unable to resolve the NBMA address of the destination, but + rather will return the address of the egress router. For + destinations outside the NBMA subnetwork, egress routers and routers + in the other subnetworks should exchange routing information so that + the optimal egress router may be found. + + In addition to NHSs, an NBMA station could also be associated with + one or more regular routers that could act as "connectionless + servers" for the station. The station could then choose to resolve + the NBMA next hop or just send the packets to one of its + connectionless servers. The latter option may be desirable if + communication with the destination is short-lived and/or doesn't + require much network resources. The connectionless servers could, of + course, be physically integrated in the NHSs by augmenting them with + internetwork layer switching functionality. + +9. IANA Considerations + + IANA will take advice from the Area Director appointed designated + subject matter expert, in order to assign numbers from the various + number spaces described herein. In the event that the Area Director + appointed designated subject matter expert is unavailable, the + relevant IESG Area Director will appoint another expert. Any and all + requests for value assignment within a given number space will be + accepted when the usage of the value assignment documented. Possible + forms of documentantion include, but is not limited to, RFCs or the + product of another cooperative standards body (e.g., the MPOA and + LANE subworking group of the ATM Forum). + + + + +Luciani, et. al. Standards Track [Page 49] + +RFC 2332 NBMA NHRP April 1998 + + +References + + [1] Heinanen, J., and R. Govindan, "NBMA Address Resolution Protocol + (NARP)", RFC 1735, December 1994. + + [2] Plummer, D., "Address Resolution Protocol", STD 37, RFC 826, + November 1982. + + [3] Laubach, M., and J. Halpern, "Classical IP and ARP over ATM", RFC + 2225, April 1998. + + [4] Piscitello,, D., and J. Lawrence, "Transmission of IP datagrams + over the SMDS service", RFC 1209, March 1991. + + [5] Protocol Identification in the Network Layer, ISO/IEC TR + 9577:1990. + + [6] Reynolds, J., and J. Postel, "Assigned Numbers", STD 2, RFC 1700, + October 1994. + + [7] Heinanen, J., "Multiprotocol Encapsulation over ATM Adaptation + Layer 5", RFC 1483, July 1993. + + [8] Malis, A., Robinson, D., and R. Ullmann, "Multiprotocol + Interconnect on X.25 and ISDN in the Packet Mode", RFC 1356, August + 1992. + + [9] Bradley, T., Brown, C., and A. Malis, "Multiprotocol Interconnect + over Frame Relay", RFC 1490, July 1993. + + [10] Rekhter, Y., and D. Kandlur, ""Local/Remote" Forwarding Decision + in Switched Data Link Subnetworks", RFC 1937, May 1996. + + [11] Armitage, G., "Support for Multicast over UNI 3.0/3.1 based ATM + Networks", RFC 2022, November 1996. + + [12] Luciani, J., Armitage, G., and J. Halpern, "Server Cache + Synchronization Protocol (SCSP) - NBMA", RFC 2334, April 1998. + + [13] Rekhter, Y., "NHRP for Destinations off the NBMA Subnetwork", + Work In Progress. + + [14] Luciani, J., et. al., "Classical IP and ARP over ATM to NHRP + Transition", Work In Progress. + + [15] Bradner, S., "Key words for use in RFCs to Indicate Requirement + Levels", BCP 14, RFC 2119, March 1997. + + + + +Luciani, et. al. Standards Track [Page 50] + +RFC 2332 NBMA NHRP April 1998 + + + [16] Krawczyk, H., Bellare, M., and R. Canetti, "HMAC: Keyed Hashing + for Message Authentication", RFC 2104, February 1997. + +Acknowledgments + + We would like to thank (in no particular order) Thomas Narten of IBM + for his comments in the role of Internet AD, Juha Heinenan of Telecom + Finland and Ramesh Govidan of ISI for their work on NBMA ARP and the + original NHRP draft, which served as the basis for this work. + Russell Gardo of IBM, John Burnett of Adaptive, Dennis Ferguson of + ANS, Andre Fredette of Bay Networks, Joel Halpern of Newbridge, Paul + Francis of NTT, Tony Li, Bryan Gleeson, and Yakov Rekhter of cisco, + and Grenville Armitage of Bellcore should also be acknowledged for + comments and suggestions that improved this work substantially. We + would also like to thank the members of the ION working group of the + IETF, whose review and discussion of this document have been + invaluable. + +Authors' Addresses + + James V. Luciani Dave Katz + Bay Networks cisco Systems + 3 Federal Street 170 W. Tasman Dr. + Mail Stop: BL3-03 San Jose, CA 95134 USA + Billerica, MA 01821 Phone: +1 408 526 8284 + Phone: +1 978 916 4734 EMail: dkatz@cisco.com + EMail: luciani@baynetworks.com + + David Piscitello Bruce Cole + Core Competence Juniper Networks + 1620 Tuckerstown Road 3260 Jay St. + Dresher, PA 19025 USA Santa Clara, CA 95054 + Phone: +1 215 830 0692 Phone: +1 408 327 1900 + EMail: dave@corecom.com EMail: bcole@jnx.com + + Naganand Doraswamy + Bay Networks, Inc. + 3 Federal Street + Mail Stop: Bl3-03 + Billerica, MA 01801 + Phone: +1 978 916 1323 + EMail: naganand@baynetworks.com + + + + + + + + + +Luciani, et. al. Standards Track [Page 51] + +RFC 2332 NBMA NHRP April 1998 + + +Full Copyright Statement + + Copyright (C) The Internet Society (1998). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + + + + + + + + + + + + + + + + + + + + + + + + +Luciani, et. al. Standards Track [Page 52] + diff --git a/etc/Makefile b/etc/Makefile new file mode 100644 index 0000000..d9fdd8b --- /dev/null +++ b/etc/Makefile @@ -0,0 +1,5 @@ +ETCFILES := opennhrp.conf opennhrp-script racoon-ph1down.sh racoon-ph1dead.sh + +install: + $(INSTALLDIR) $(DESTDIR)$(CONFDIR) + $(INSTALL) $(addprefix $(src)/,$(ETCFILES)) $(DESTDIR)$(CONFDIR) diff --git a/etc/opennhrp-script b/etc/opennhrp-script new file mode 100755 index 0000000..8d5e2d3 --- /dev/null +++ b/etc/opennhrp-script @@ -0,0 +1,38 @@ +#!/bin/sh + +case $1 in +interface-up) + ip route flush proto 42 dev $NHRP_INTERFACE + ip neigh flush dev $NHRP_INTERFACE + ;; +peer-register) + ;; +peer-up) + if [ -n "$NHRP_DESTMTU" ]; then + ARGS=`ip route get $NHRP_DESTNBMA from $NHRP_SRCNBMA | head -1` + ip route add $ARGS proto 42 mtu $NHRP_DESTMTU + fi + echo "Create link from $NHRP_SRCADDR ($NHRP_SRCNBMA) to $NHRP_DESTADDR ($NHRP_DESTNBMA)" + racoonctl establish-sa -w isakmp inet $NHRP_SRCNBMA $NHRP_DESTNBMA || exit 1 + racoonctl establish-sa -w esp inet $NHRP_SRCNBMA $NHRP_DESTNBMA gre || exit 1 + ;; +peer-down) + echo "Delete link from $NHRP_SRCADDR ($NHRP_SRCNBMA) to $NHRP_DESTADDR ($NHRP_DESTNBMA)" + if [ "$NHRP_PEER_DOWN_REASON" != "lower-down" ]; then + racoonctl delete-sa isakmp inet $NHRP_SRCNBMA $NHRP_DESTNBMA + fi + ip route del $NHRP_DESTNBMA src $NHRP_SRCNBMA proto 42 + ;; +route-up) + echo "Route $NHRP_DESTADDR/$NHRP_DESTPREFIX is up" + ip route replace $NHRP_DESTADDR/$NHRP_DESTPREFIX proto 42 via $NHRP_NEXTHOP dev $NHRP_INTERFACE + ip route flush cache + ;; +route-down) + echo "Route $NHRP_DESTADDR/$NHRP_DESTPREFIX is down" + ip route del $NHRP_DESTADDR/$NHRP_DESTPREFIX proto 42 + ip route flush cache + ;; +esac + +exit 0 diff --git a/etc/opennhrp-script.cert b/etc/opennhrp-script.cert new file mode 100755 index 0000000..d013511 --- /dev/null +++ b/etc/opennhrp-script.cert @@ -0,0 +1,71 @@ +#!/bin/sh +# +# This version of the script check the X509 certificate used to authenticate +# the IPsec connection. It parses a special format subject field, and verifies +# the claimed GRE is bound to that certificate, before allowing NHRP +# registration or direct tunnel to succeed. +# +# It also reconfigure BGP filters according to certificate contents. This is +# only useful for hub nodes. +# +# Example of certificate: +# subjectAltName: DirName:/OU=GRE=192.168.1.1/NET=10.1.0.0/16 + +case $1 in +interface-up) + ip route flush proto 42 dev $NHRP_INTERFACE + ip neigh flush dev $NHRP_INTERFACE + ;; +peer-register) + ( + flock -x 200 + + CERT=`racoonctl get-cert inet $NHRP_SRCNBMA $NHRP_DESTNBMA | openssl x509 -inform der -text -noout | egrep -o "/OU=[^/]*(/[0-9]+)?" | cut -b 5-` + if [ -z "`echo "$CERT" | grep "^GRE=$NHRP_DESTADDR"`" ]; then + echo "ERROR: IP $NHRP_DESTADDR at $NHRP_DESTNBMA NOT verified" + exit 1 + fi + + AS=`echo "$CERT" | grep "^AS=" | cut -b 4-` + vtysh -d bgpd -c "configure terminal" \ + -c "router bgp $MY_AS" \ + -c "neighbor $NHRP_DESTADDR remote-as $AS" \ + -c "neighbor $NHRP_DESTADDR peer-group leaf" \ + -c "neighbor $NHRP_DESTADDR prefix-list net-$AS-in in" + + SEQ=5 + (echo "$CERT" | grep "^NET=" | cut -b 5-) | while read NET; do + vtysh -d bgpd -c "configure terminal" \ + -c "ip prefix-list net-$AS-in seq $SEQ permit $NET" + SEQ=$(($SEQ+5) + done + ) 200>/var/lock/opennhrp-script.lock + ;; +peer-up) + echo "Create link from $NHRP_SRCADDR ($NHRP_SRCNBMA) to $NHRP_DESTADDR ($NHRP_DESTNBMA)" + racoonctl establish-sa -w isakmp inet $NHRP_SRCNBMA $NHRP_DESTNBMA || exit 1 + racoonctl establish-sa -w esp inet $NHRP_SRCNBMA $NHRP_DESTNBMA gre || exit 1 + + CERT=`racoonctl get-cert inet $NHRP_SRCNBMA $NHRP_DESTNBMA | openssl x509 -inform der -text -noout | egrep -o "/OU=[^/]*(/[0-9]+)?" | cut -b 5-` + if [ -z "`echo "$CERT" | grep "^GRE=$NHRP_DESTADDR"`" ]; then + echo "ERROR: IP $NHRP_DESTADDR at $NHRP_DESTNBMA NOT verified" + exit 1 + fi + ;; +peer-down) + echo "Delete link from $NHRP_SRCADDR ($NHRP_SRCNBMA) to $NHRP_DESTADDR ($NHRP_DESTNBMA)" + racoonctl delete-sa isakmp inet $NHRP_SRCNBMA $NHRP_DESTNBMA + ;; +route-up) + echo "Route $NHRP_DESTADDR/$NHRP_DESTPREFIX is up" + ip route replace $NHRP_DESTADDR/$NHRP_DESTPREFIX proto 42 via $NHRP_NEXTHOP dev $NHRP_INTERFACE + ip route flush cache + ;; +route-down) + echo "Route $NHRP_DESTADDR/$NHRP_DESTPREFIX is down" + ip route del $NHRP_DESTADDR/$NHRP_DESTPREFIX proto 42 + ip route flush cache + ;; +esac + +exit 0 diff --git a/etc/opennhrp.conf b/etc/opennhrp.conf new file mode 100644 index 0000000..6451cb0 --- /dev/null +++ b/etc/opennhrp.conf @@ -0,0 +1,9 @@ +interface gre1 + map 10.255.255.1/24 192.168.200.1 register + cisco-authentication secret + shortcut + redirect + non-caching + +interface lo + shortcut-destination diff --git a/etc/racoon-ph1dead.sh b/etc/racoon-ph1dead.sh new file mode 100755 index 0000000..942e4c2 --- /dev/null +++ b/etc/racoon-ph1dead.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +opennhrpctl cache lowerdown nbma $REMOTE_ADDR local-nbma $LOCAL_ADDR diff --git a/etc/racoon-ph1down.sh b/etc/racoon-ph1down.sh new file mode 100755 index 0000000..c98d985 --- /dev/null +++ b/etc/racoon-ph1down.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +# Purge opennhrp entries only if this was the last ISAKMP phase1 +if [ -z "`racoonctl -ll show-sa isakmp | grep "$LOCAL_ADDR\.[0-9]* * $REMOTE_ADDR\.[0-9]* "`" ]; then + opennhrpctl cache purge nbma $REMOTE_ADDR local-nbma $LOCAL_ADDR +fi diff --git a/libev/LICENSE b/libev/LICENSE new file mode 100644 index 0000000..df62c4f --- /dev/null +++ b/libev/LICENSE @@ -0,0 +1,36 @@ +All files in libev are Copyright (C)2007,2008 Marc Alexander Lehmann. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Alternatively, the contents of this package may be used under the terms +of the GNU General Public License ("GPL") version 2 or any later version, +in which case the provisions of the GPL are applicable instead of the +above. If you wish to allow the use of your version of this package only +under the terms of the GPL and not to allow others to use your version of +this file under the BSD license, indicate your decision by deleting the +provisions above and replace them with the notice and other provisions +required by the GPL in this and the other files of this package. If you do +not delete the provisions above, a recipient may use your version of this +file under either the BSD or the GPL. diff --git a/libev/README b/libev/README new file mode 100644 index 0000000..ca403c6 --- /dev/null +++ b/libev/README @@ -0,0 +1,58 @@ +libev is a high-performance event loop/event model with lots of features. +(see benchmark at http://libev.schmorp.de/bench.html) + + +ABOUT + + Homepage: http://software.schmorp.de/pkg/libev + Mailinglist: libev@lists.schmorp.de + http://lists.schmorp.de/cgi-bin/mailman/listinfo/libev + Library Documentation: http://pod.tst.eu/http://cvs.schmorp.de/libev/ev.pod + + Libev is modelled (very losely) after libevent and the Event perl + module, but is faster, scales better and is more correct, and also more + featureful. And also smaller. Yay. + + Some of the specialties of libev not commonly found elsewhere are: + + - extensive and detailed, readable documentation (not doxygen garbage). + - fully supports fork, can detect fork in various ways and automatically + re-arms kernel mechanisms that do not support fork. + - highly optimised select, poll, epoll, kqueue and event ports backends. + - filesystem object (path) watching (with optional linux inotify support). + - wallclock-based times (using absolute time, cron-like). + - relative timers/timeouts (handle time jumps). + - fast intra-thread communication between multiple + event loops (with optional fast linux eventfd backend). + - extremely easy to embed. + - very small codebase, no bloated library. + - fully extensible by being able to plug into the event loop, + integrate other event loops, integrate other event loop users. + - very little memory use (small watchers, small event loop data). + - optional C++ interface allowing method and function callbacks + at no extra memory or runtime overhead. + - optional Perl interface with similar characteristics (capable + of running Glib/Gtk2 on libev, interfaces with Net::SNMP and + libadns). + - support for other languages (multiple C++ interfaces, D, Ruby, + Python) available from third-parties. + + Examples of programs that embed libev: the EV perl module, + rxvt-unicode, gvpe (GNU Virtual Private Ethernet), the Deliantra MMORPG + server (http://www.deliantra.net/), Rubinius (a next-generation Ruby + VM), the Ebb web server, the Rev event toolkit. + + +CONTRIBUTORS + + libev was written and designed by Marc Lehmann and Emanuele Giaquinta. + + The following people sent in patches or made other noteworthy + contributions to the design (for minor patches, see the Changes + file. If I forgot to include you, please shout at me, it was an + accident): + + W.C.A. Wijngaards + Christopher Layne + Chris Brody + diff --git a/libev/VERSION b/libev/VERSION new file mode 100644 index 0000000..666ac08 --- /dev/null +++ b/libev/VERSION @@ -0,0 +1 @@ +libev 3.9 diff --git a/libev/ev.c b/libev/ev.c new file mode 100644 index 0000000..ccd202b --- /dev/null +++ b/libev/ev.c @@ -0,0 +1,3694 @@ +/* + * libev event processing core, watcher management + * + * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* this big block deduces configuration from config.h */ +#ifndef EV_STANDALONE +# ifdef EV_CONFIG_H +# include EV_CONFIG_H +# else +# include "config.h" +# endif + +# if HAVE_CLOCK_SYSCALL +# ifndef EV_USE_CLOCK_SYSCALL +# define EV_USE_CLOCK_SYSCALL 1 +# ifndef EV_USE_REALTIME +# define EV_USE_REALTIME 0 +# endif +# ifndef EV_USE_MONOTONIC +# define EV_USE_MONOTONIC 1 +# endif +# endif +# elif !defined(EV_USE_CLOCK_SYSCALL) +# define EV_USE_CLOCK_SYSCALL 0 +# endif + +# if HAVE_CLOCK_GETTIME +# ifndef EV_USE_MONOTONIC +# define EV_USE_MONOTONIC 1 +# endif +# ifndef EV_USE_REALTIME +# define EV_USE_REALTIME 0 +# endif +# else +# ifndef EV_USE_MONOTONIC +# define EV_USE_MONOTONIC 0 +# endif +# ifndef EV_USE_REALTIME +# define EV_USE_REALTIME 0 +# endif +# endif + +# ifndef EV_USE_NANOSLEEP +# if HAVE_NANOSLEEP +# define EV_USE_NANOSLEEP 1 +# else +# define EV_USE_NANOSLEEP 0 +# endif +# endif + +# ifndef EV_USE_SELECT +# if HAVE_SELECT && HAVE_SYS_SELECT_H +# define EV_USE_SELECT 1 +# else +# define EV_USE_SELECT 0 +# endif +# endif + +# ifndef EV_USE_POLL +# if HAVE_POLL && HAVE_POLL_H +# define EV_USE_POLL 1 +# else +# define EV_USE_POLL 0 +# endif +# endif + +# ifndef EV_USE_EPOLL +# if HAVE_EPOLL_CTL && HAVE_SYS_EPOLL_H +# define EV_USE_EPOLL 1 +# else +# define EV_USE_EPOLL 0 +# endif +# endif + +# ifndef EV_USE_KQUEUE +# if HAVE_KQUEUE && HAVE_SYS_EVENT_H && HAVE_SYS_QUEUE_H +# define EV_USE_KQUEUE 1 +# else +# define EV_USE_KQUEUE 0 +# endif +# endif + +# ifndef EV_USE_PORT +# if HAVE_PORT_H && HAVE_PORT_CREATE +# define EV_USE_PORT 1 +# else +# define EV_USE_PORT 0 +# endif +# endif + +# ifndef EV_USE_INOTIFY +# if HAVE_INOTIFY_INIT && HAVE_SYS_INOTIFY_H +# define EV_USE_INOTIFY 1 +# else +# define EV_USE_INOTIFY 0 +# endif +# endif + +# ifndef EV_USE_SIGNALFD +# if HAVE_SIGNALFD && HAVE_SYS_SIGNALFD_H +# define EV_USE_SIGNALFD 1 +# else +# define EV_USE_SIGNALFD 0 +# endif +# endif + +# ifndef EV_USE_EVENTFD +# if HAVE_EVENTFD +# define EV_USE_EVENTFD 1 +# else +# define EV_USE_EVENTFD 0 +# endif +# endif + +#endif + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + +#ifdef EV_H +# include EV_H +#else +# include "ev.h" +#endif + +#ifndef _WIN32 +# include +# include +# include +#else +# include +# define WIN32_LEAN_AND_MEAN +# include +# ifndef EV_SELECT_IS_WINSOCKET +# define EV_SELECT_IS_WINSOCKET 1 +# endif +#endif + +/* this block tries to deduce configuration from header-defined symbols and defaults */ + +/* try to deduce the maximum number of signals on this platform */ +#if defined (EV_NSIG) +/* use what's provided */ +#elif defined (NSIG) +# define EV_NSIG (NSIG) +#elif defined(_NSIG) +# define EV_NSIG (_NSIG) +#elif defined (SIGMAX) +# define EV_NSIG (SIGMAX+1) +#elif defined (SIG_MAX) +# define EV_NSIG (SIG_MAX+1) +#elif defined (_SIG_MAX) +# define EV_NSIG (_SIG_MAX+1) +#elif defined (MAXSIG) +# define EV_NSIG (MAXSIG+1) +#elif defined (MAX_SIG) +# define EV_NSIG (MAX_SIG+1) +#elif defined (SIGARRAYSIZE) +# define EV_NSIG SIGARRAYSIZE /* Assume ary[SIGARRAYSIZE] */ +#elif defined (_sys_nsig) +# define EV_NSIG (_sys_nsig) /* Solaris 2.5 */ +#else +# error "unable to find value for NSIG, please report" +/* to make it compile regardless, just remove the above line */ +# define EV_NSIG 65 +#endif + +#ifndef EV_USE_CLOCK_SYSCALL +# if __linux && __GLIBC__ >= 2 +# define EV_USE_CLOCK_SYSCALL 1 +# else +# define EV_USE_CLOCK_SYSCALL 0 +# endif +#endif + +#ifndef EV_USE_MONOTONIC +# if defined (_POSIX_MONOTONIC_CLOCK) && _POSIX_MONOTONIC_CLOCK >= 0 +# define EV_USE_MONOTONIC 1 +# else +# define EV_USE_MONOTONIC 0 +# endif +#endif + +#ifndef EV_USE_REALTIME +# define EV_USE_REALTIME !EV_USE_CLOCK_SYSCALL +#endif + +#ifndef EV_USE_NANOSLEEP +# if _POSIX_C_SOURCE >= 199309L +# define EV_USE_NANOSLEEP 1 +# else +# define EV_USE_NANOSLEEP 0 +# endif +#endif + +#ifndef EV_USE_SELECT +# define EV_USE_SELECT 1 +#endif + +#ifndef EV_USE_POLL +# ifdef _WIN32 +# define EV_USE_POLL 0 +# else +# define EV_USE_POLL 1 +# endif +#endif + +#ifndef EV_USE_EPOLL +# if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 4)) +# define EV_USE_EPOLL 1 +# else +# define EV_USE_EPOLL 0 +# endif +#endif + +#ifndef EV_USE_KQUEUE +# define EV_USE_KQUEUE 0 +#endif + +#ifndef EV_USE_PORT +# define EV_USE_PORT 0 +#endif + +#ifndef EV_USE_INOTIFY +# if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 4)) +# define EV_USE_INOTIFY 1 +# else +# define EV_USE_INOTIFY 0 +# endif +#endif + +#ifndef EV_PID_HASHSIZE +# if EV_MINIMAL +# define EV_PID_HASHSIZE 1 +# else +# define EV_PID_HASHSIZE 16 +# endif +#endif + +#ifndef EV_INOTIFY_HASHSIZE +# if EV_MINIMAL +# define EV_INOTIFY_HASHSIZE 1 +# else +# define EV_INOTIFY_HASHSIZE 16 +# endif +#endif + +#ifndef EV_USE_EVENTFD +# if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 7)) +# define EV_USE_EVENTFD 1 +# else +# define EV_USE_EVENTFD 0 +# endif +#endif + +#ifndef EV_USE_SIGNALFD +# if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 7)) +# define EV_USE_SIGNALFD 1 +# else +# define EV_USE_SIGNALFD 0 +# endif +#endif + +#if 0 /* debugging */ +# define EV_VERIFY 3 +# define EV_USE_4HEAP 1 +# define EV_HEAP_CACHE_AT 1 +#endif + +#ifndef EV_VERIFY +# define EV_VERIFY !EV_MINIMAL +#endif + +#ifndef EV_USE_4HEAP +# define EV_USE_4HEAP !EV_MINIMAL +#endif + +#ifndef EV_HEAP_CACHE_AT +# define EV_HEAP_CACHE_AT !EV_MINIMAL +#endif + +/* on linux, we can use a (slow) syscall to avoid a dependency on pthread, */ +/* which makes programs even slower. might work on other unices, too. */ +#if EV_USE_CLOCK_SYSCALL +# include +# ifdef SYS_clock_gettime +# define clock_gettime(id, ts) syscall (SYS_clock_gettime, (id), (ts)) +# undef EV_USE_MONOTONIC +# define EV_USE_MONOTONIC 1 +# else +# undef EV_USE_CLOCK_SYSCALL +# define EV_USE_CLOCK_SYSCALL 0 +# endif +#endif + +/* this block fixes any misconfiguration where we know we run into trouble otherwise */ + +#ifndef CLOCK_MONOTONIC +# undef EV_USE_MONOTONIC +# define EV_USE_MONOTONIC 0 +#endif + +#ifndef CLOCK_REALTIME +# undef EV_USE_REALTIME +# define EV_USE_REALTIME 0 +#endif + +#if !EV_STAT_ENABLE +# undef EV_USE_INOTIFY +# define EV_USE_INOTIFY 0 +#endif + +#if !EV_USE_NANOSLEEP +# ifndef _WIN32 +# include +# endif +#endif + +#if EV_USE_INOTIFY +# include +# include +# include +/* some very old inotify.h headers don't have IN_DONT_FOLLOW */ +# ifndef IN_DONT_FOLLOW +# undef EV_USE_INOTIFY +# define EV_USE_INOTIFY 0 +# endif +#endif + +#if EV_SELECT_IS_WINSOCKET +# include +#endif + +#if EV_USE_EVENTFD +/* our minimum requirement is glibc 2.7 which has the stub, but not the header */ +# include +# ifndef EFD_NONBLOCK +# define EFD_NONBLOCK O_NONBLOCK +# endif +# ifndef EFD_CLOEXEC +# ifdef O_CLOEXEC +# define EFD_CLOEXEC O_CLOEXEC +# else +# define EFD_CLOEXEC 02000000 +# endif +# endif +# ifdef __cplusplus +extern "C" { +# endif +int eventfd (unsigned int initval, int flags); +# ifdef __cplusplus +} +# endif +#endif + +#if EV_USE_SIGNALFD +/* our minimum requirement is glibc 2.7 which has the stub, but not the header */ +# include +# ifndef SFD_NONBLOCK +# define SFD_NONBLOCK O_NONBLOCK +# endif +# ifndef SFD_CLOEXEC +# ifdef O_CLOEXEC +# define SFD_CLOEXEC O_CLOEXEC +# else +# define SFD_CLOEXEC 02000000 +# endif +# endif +# ifdef __cplusplus +extern "C" { +# endif +int signalfd (int fd, const sigset_t *mask, int flags); + +struct signalfd_siginfo +{ + uint32_t ssi_signo; + char pad[128 - sizeof (uint32_t)]; +}; +# ifdef __cplusplus +} +# endif +#endif + + +/**/ + +#if EV_VERIFY >= 3 +# define EV_FREQUENT_CHECK ev_loop_verify (EV_A) +#else +# define EV_FREQUENT_CHECK do { } while (0) +#endif + +/* + * This is used to avoid floating point rounding problems. + * It is added to ev_rt_now when scheduling periodics + * to ensure progress, time-wise, even when rounding + * errors are against us. + * This value is good at least till the year 4000. + * Better solutions welcome. + */ +#define TIME_EPSILON 0.0001220703125 /* 1/8192 */ + +#define MIN_TIMEJUMP 1. /* minimum timejump that gets detected (if monotonic clock available) */ +#define MAX_BLOCKTIME 59.743 /* never wait longer than this time (to detect time jumps) */ + +#if __GNUC__ >= 4 +# define expect(expr,value) __builtin_expect ((expr),(value)) +# define noinline __attribute__ ((noinline)) +#else +# define expect(expr,value) (expr) +# define noinline +# if __STDC_VERSION__ < 199901L && __GNUC__ < 2 +# define inline +# endif +#endif + +#define expect_false(expr) expect ((expr) != 0, 0) +#define expect_true(expr) expect ((expr) != 0, 1) +#define inline_size static inline + +#if EV_MINIMAL +# define inline_speed static noinline +#else +# define inline_speed static inline +#endif + +#define NUMPRI (EV_MAXPRI - EV_MINPRI + 1) + +#if EV_MINPRI == EV_MAXPRI +# define ABSPRI(w) (((W)w), 0) +#else +# define ABSPRI(w) (((W)w)->priority - EV_MINPRI) +#endif + +#define EMPTY /* required for microsofts broken pseudo-c compiler */ +#define EMPTY2(a,b) /* used to suppress some warnings */ + +typedef ev_watcher *W; +typedef ev_watcher_list *WL; +typedef ev_watcher_time *WT; + +#define ev_active(w) ((W)(w))->active +#define ev_at(w) ((WT)(w))->at + +#if EV_USE_REALTIME +/* sig_atomic_t is used to avoid per-thread variables or locking but still */ +/* giving it a reasonably high chance of working on typical architetcures */ +static EV_ATOMIC_T have_realtime; /* did clock_gettime (CLOCK_REALTIME) work? */ +#endif + +#if EV_USE_MONOTONIC +static EV_ATOMIC_T have_monotonic; /* did clock_gettime (CLOCK_MONOTONIC) work? */ +#endif + +#ifndef EV_FD_TO_WIN32_HANDLE +# define EV_FD_TO_WIN32_HANDLE(fd) _get_osfhandle (fd) +#endif +#ifndef EV_WIN32_HANDLE_TO_FD +# define EV_WIN32_HANDLE_TO_FD(handle) _open_osfhandle (fd, 0) +#endif +#ifndef EV_WIN32_CLOSE_FD +# define EV_WIN32_CLOSE_FD(fd) close (fd) +#endif + +#ifdef _WIN32 +# include "ev_win32.c" +#endif + +/*****************************************************************************/ + +static void (*syserr_cb)(const char *msg); + +void +ev_set_syserr_cb (void (*cb)(const char *msg)) +{ + syserr_cb = cb; +} + +static void noinline +ev_syserr (const char *msg) +{ + if (!msg) + msg = "(libev) system error"; + + if (syserr_cb) + syserr_cb (msg); + else + { + perror (msg); + abort (); + } +} + +static void * +ev_realloc_emul (void *ptr, long size) +{ + /* some systems, notably openbsd and darwin, fail to properly + * implement realloc (x, 0) (as required by both ansi c-98 and + * the single unix specification, so work around them here. + */ + + if (size) + return realloc (ptr, size); + + free (ptr); + return 0; +} + +static void *(*alloc)(void *ptr, long size) = ev_realloc_emul; + +void +ev_set_allocator (void *(*cb)(void *ptr, long size)) +{ + alloc = cb; +} + +inline_speed void * +ev_realloc (void *ptr, long size) +{ + ptr = alloc (ptr, size); + + if (!ptr && size) + { + fprintf (stderr, "libev: cannot allocate %ld bytes, aborting.", size); + abort (); + } + + return ptr; +} + +#define ev_malloc(size) ev_realloc (0, (size)) +#define ev_free(ptr) ev_realloc ((ptr), 0) + +/*****************************************************************************/ + +/* set in reify when reification needed */ +#define EV_ANFD_REIFY 1 + +/* file descriptor info structure */ +typedef struct +{ + WL head; + unsigned char events; /* the events watched for */ + unsigned char reify; /* flag set when this ANFD needs reification (EV_ANFD_REIFY, EV__IOFDSET) */ + unsigned char emask; /* the epoll backend stores the actual kernel mask in here */ + unsigned char unused; +#if EV_USE_EPOLL + unsigned int egen; /* generation counter to counter epoll bugs */ +#endif +#if EV_SELECT_IS_WINSOCKET + SOCKET handle; +#endif +} ANFD; + +/* stores the pending event set for a given watcher */ +typedef struct +{ + W w; + int events; /* the pending event set for the given watcher */ +} ANPENDING; + +#if EV_USE_INOTIFY +/* hash table entry per inotify-id */ +typedef struct +{ + WL head; +} ANFS; +#endif + +/* Heap Entry */ +#if EV_HEAP_CACHE_AT + /* a heap element */ + typedef struct { + ev_tstamp at; + WT w; + } ANHE; + + #define ANHE_w(he) (he).w /* access watcher, read-write */ + #define ANHE_at(he) (he).at /* access cached at, read-only */ + #define ANHE_at_cache(he) (he).at = (he).w->at /* update at from watcher */ +#else + /* a heap element */ + typedef WT ANHE; + + #define ANHE_w(he) (he) + #define ANHE_at(he) (he)->at + #define ANHE_at_cache(he) +#endif + +#if EV_MULTIPLICITY + + struct ev_loop + { + ev_tstamp ev_rt_now; + #define ev_rt_now ((loop)->ev_rt_now) + #define VAR(name,decl) decl; + #include "ev_vars.h" + #undef VAR + }; + #include "ev_wrap.h" + + static struct ev_loop default_loop_struct; + struct ev_loop *ev_default_loop_ptr; + +#else + + ev_tstamp ev_rt_now; + #define VAR(name,decl) static decl; + #include "ev_vars.h" + #undef VAR + + static int ev_default_loop_ptr; + +#endif + +#if EV_MINIMAL < 2 +# define EV_RELEASE_CB if (expect_false (release_cb)) release_cb (EV_A) +# define EV_ACQUIRE_CB if (expect_false (acquire_cb)) acquire_cb (EV_A) +# define EV_INVOKE_PENDING invoke_cb (EV_A) +#else +# define EV_RELEASE_CB (void)0 +# define EV_ACQUIRE_CB (void)0 +# define EV_INVOKE_PENDING ev_invoke_pending (EV_A) +#endif + +#define EVUNLOOP_RECURSE 0x80 + +/*****************************************************************************/ + +#ifndef EV_HAVE_EV_TIME +ev_tstamp +ev_time (void) +{ +#if EV_USE_REALTIME + if (expect_true (have_realtime)) + { + struct timespec ts; + clock_gettime (CLOCK_REALTIME, &ts); + return ts.tv_sec + ts.tv_nsec * 1e-9; + } +#endif + + struct timeval tv; + gettimeofday (&tv, 0); + return tv.tv_sec + tv.tv_usec * 1e-6; +} +#endif + +inline_size ev_tstamp +get_clock (void) +{ +#if EV_USE_MONOTONIC + if (expect_true (have_monotonic)) + { + struct timespec ts; + clock_gettime (CLOCK_MONOTONIC, &ts); + return ts.tv_sec + ts.tv_nsec * 1e-9; + } +#endif + + return ev_time (); +} + +#if EV_MULTIPLICITY +ev_tstamp +ev_now (EV_P) +{ + return ev_rt_now; +} +#endif + +void +ev_sleep (ev_tstamp delay) +{ + if (delay > 0.) + { +#if EV_USE_NANOSLEEP + struct timespec ts; + + ts.tv_sec = (time_t)delay; + ts.tv_nsec = (long)((delay - (ev_tstamp)(ts.tv_sec)) * 1e9); + + nanosleep (&ts, 0); +#elif defined(_WIN32) + Sleep ((unsigned long)(delay * 1e3)); +#else + struct timeval tv; + + tv.tv_sec = (time_t)delay; + tv.tv_usec = (long)((delay - (ev_tstamp)(tv.tv_sec)) * 1e6); + + /* here we rely on sys/time.h + sys/types.h + unistd.h providing select */ + /* something not guaranteed by newer posix versions, but guaranteed */ + /* by older ones */ + select (0, 0, 0, 0, &tv); +#endif + } +} + +/*****************************************************************************/ + +#define MALLOC_ROUND 4096 /* prefer to allocate in chunks of this size, must be 2**n and >> 4 longs */ + +/* find a suitable new size for the given array, */ +/* hopefully by rounding to a ncie-to-malloc size */ +inline_size int +array_nextsize (int elem, int cur, int cnt) +{ + int ncur = cur + 1; + + do + ncur <<= 1; + while (cnt > ncur); + + /* if size is large, round to MALLOC_ROUND - 4 * longs to accomodate malloc overhead */ + if (elem * ncur > MALLOC_ROUND - sizeof (void *) * 4) + { + ncur *= elem; + ncur = (ncur + elem + (MALLOC_ROUND - 1) + sizeof (void *) * 4) & ~(MALLOC_ROUND - 1); + ncur = ncur - sizeof (void *) * 4; + ncur /= elem; + } + + return ncur; +} + +static noinline void * +array_realloc (int elem, void *base, int *cur, int cnt) +{ + *cur = array_nextsize (elem, *cur, cnt); + return ev_realloc (base, elem * *cur); +} + +#define array_init_zero(base,count) \ + memset ((void *)(base), 0, sizeof (*(base)) * (count)) + +#define array_needsize(type,base,cur,cnt,init) \ + if (expect_false ((cnt) > (cur))) \ + { \ + int ocur_ = (cur); \ + (base) = (type *)array_realloc \ + (sizeof (type), (base), &(cur), (cnt)); \ + init ((base) + (ocur_), (cur) - ocur_); \ + } + +#if 0 +#define array_slim(type,stem) \ + if (stem ## max < array_roundsize (stem ## cnt >> 2)) \ + { \ + stem ## max = array_roundsize (stem ## cnt >> 1); \ + base = (type *)ev_realloc (base, sizeof (type) * (stem ## max));\ + fprintf (stderr, "slimmed down " # stem " to %d\n", stem ## max);/*D*/\ + } +#endif + +#define array_free(stem, idx) \ + ev_free (stem ## s idx); stem ## cnt idx = stem ## max idx = 0; stem ## s idx = 0 + +/*****************************************************************************/ + +/* dummy callback for pending events */ +static void noinline +pendingcb (EV_P_ ev_prepare *w, int revents) +{ +} + +void noinline +ev_feed_event (EV_P_ void *w, int revents) +{ + W w_ = (W)w; + int pri = ABSPRI (w_); + + if (expect_false (w_->pending)) + pendings [pri][w_->pending - 1].events |= revents; + else + { + w_->pending = ++pendingcnt [pri]; + array_needsize (ANPENDING, pendings [pri], pendingmax [pri], w_->pending, EMPTY2); + pendings [pri][w_->pending - 1].w = w_; + pendings [pri][w_->pending - 1].events = revents; + } +} + +inline_speed void +feed_reverse (EV_P_ W w) +{ + array_needsize (W, rfeeds, rfeedmax, rfeedcnt + 1, EMPTY2); + rfeeds [rfeedcnt++] = w; +} + +inline_size void +feed_reverse_done (EV_P_ int revents) +{ + do + ev_feed_event (EV_A_ rfeeds [--rfeedcnt], revents); + while (rfeedcnt); +} + +inline_speed void +queue_events (EV_P_ W *events, int eventcnt, int type) +{ + int i; + + for (i = 0; i < eventcnt; ++i) + ev_feed_event (EV_A_ events [i], type); +} + +/*****************************************************************************/ + +inline_speed void +fd_event_nc (EV_P_ int fd, int revents) +{ + ANFD *anfd = anfds + fd; + ev_io *w; + + for (w = (ev_io *)anfd->head; w; w = (ev_io *)((WL)w)->next) + { + int ev = w->events & revents; + + if (ev) + ev_feed_event (EV_A_ (W)w, ev); + } +} + +/* do not submit kernel events for fds that have reify set */ +/* because that means they changed while we were polling for new events */ +inline_speed void +fd_event (EV_P_ int fd, int revents) +{ + ANFD *anfd = anfds + fd; + + if (expect_true (!anfd->reify)) + fd_event_nc (EV_A_ fd, revents); +} + +void +ev_feed_fd_event (EV_P_ int fd, int revents) +{ + if (fd >= 0 && fd < anfdmax) + fd_event_nc (EV_A_ fd, revents); +} + +/* make sure the external fd watch events are in-sync */ +/* with the kernel/libev internal state */ +inline_size void +fd_reify (EV_P) +{ + int i; + + for (i = 0; i < fdchangecnt; ++i) + { + int fd = fdchanges [i]; + ANFD *anfd = anfds + fd; + ev_io *w; + + unsigned char events = 0; + + for (w = (ev_io *)anfd->head; w; w = (ev_io *)((WL)w)->next) + events |= (unsigned char)w->events; + +#if EV_SELECT_IS_WINSOCKET + if (events) + { + unsigned long arg; + anfd->handle = EV_FD_TO_WIN32_HANDLE (fd); + assert (("libev: only socket fds supported in this configuration", ioctlsocket (anfd->handle, FIONREAD, &arg) == 0)); + } +#endif + + { + unsigned char o_events = anfd->events; + unsigned char o_reify = anfd->reify; + + anfd->reify = 0; + anfd->events = events; + + if (o_events != events || o_reify & EV__IOFDSET) + backend_modify (EV_A_ fd, o_events, events); + } + } + + fdchangecnt = 0; +} + +/* something about the given fd changed */ +inline_size void +fd_change (EV_P_ int fd, int flags) +{ + unsigned char reify = anfds [fd].reify; + anfds [fd].reify |= flags; + + if (expect_true (!reify)) + { + ++fdchangecnt; + array_needsize (int, fdchanges, fdchangemax, fdchangecnt, EMPTY2); + fdchanges [fdchangecnt - 1] = fd; + } +} + +/* the given fd is invalid/unusable, so make sure it doesn't hurt us anymore */ +inline_speed void +fd_kill (EV_P_ int fd) +{ + ev_io *w; + + while ((w = (ev_io *)anfds [fd].head)) + { + ev_io_stop (EV_A_ w); + ev_feed_event (EV_A_ (W)w, EV_ERROR | EV_READ | EV_WRITE); + } +} + +/* check whether the given fd is atcually valid, for error recovery */ +inline_size int +fd_valid (int fd) +{ +#ifdef _WIN32 + return _get_osfhandle (fd) != -1; +#else + return fcntl (fd, F_GETFD) != -1; +#endif +} + +/* called on EBADF to verify fds */ +static void noinline +fd_ebadf (EV_P) +{ + int fd; + + for (fd = 0; fd < anfdmax; ++fd) + if (anfds [fd].events) + if (!fd_valid (fd) && errno == EBADF) + fd_kill (EV_A_ fd); +} + +/* called on ENOMEM in select/poll to kill some fds and retry */ +static void noinline +fd_enomem (EV_P) +{ + int fd; + + for (fd = anfdmax; fd--; ) + if (anfds [fd].events) + { + fd_kill (EV_A_ fd); + break; + } +} + +/* usually called after fork if backend needs to re-arm all fds from scratch */ +static void noinline +fd_rearm_all (EV_P) +{ + int fd; + + for (fd = 0; fd < anfdmax; ++fd) + if (anfds [fd].events) + { + anfds [fd].events = 0; + anfds [fd].emask = 0; + fd_change (EV_A_ fd, EV__IOFDSET | EV_ANFD_REIFY); + } +} + +/*****************************************************************************/ + +/* + * the heap functions want a real array index. array index 0 uis guaranteed to not + * be in-use at any time. the first heap entry is at array [HEAP0]. DHEAP gives + * the branching factor of the d-tree. + */ + +/* + * at the moment we allow libev the luxury of two heaps, + * a small-code-size 2-heap one and a ~1.5kb larger 4-heap + * which is more cache-efficient. + * the difference is about 5% with 50000+ watchers. + */ +#if EV_USE_4HEAP + +#define DHEAP 4 +#define HEAP0 (DHEAP - 1) /* index of first element in heap */ +#define HPARENT(k) ((((k) - HEAP0 - 1) / DHEAP) + HEAP0) +#define UPHEAP_DONE(p,k) ((p) == (k)) + +/* away from the root */ +inline_speed void +downheap (ANHE *heap, int N, int k) +{ + ANHE he = heap [k]; + ANHE *E = heap + N + HEAP0; + + for (;;) + { + ev_tstamp minat; + ANHE *minpos; + ANHE *pos = heap + DHEAP * (k - HEAP0) + HEAP0 + 1; + + /* find minimum child */ + if (expect_true (pos + DHEAP - 1 < E)) + { + /* fast path */ (minpos = pos + 0), (minat = ANHE_at (*minpos)); + if ( ANHE_at (pos [1]) < minat) (minpos = pos + 1), (minat = ANHE_at (*minpos)); + if ( ANHE_at (pos [2]) < minat) (minpos = pos + 2), (minat = ANHE_at (*minpos)); + if ( ANHE_at (pos [3]) < minat) (minpos = pos + 3), (minat = ANHE_at (*minpos)); + } + else if (pos < E) + { + /* slow path */ (minpos = pos + 0), (minat = ANHE_at (*minpos)); + if (pos + 1 < E && ANHE_at (pos [1]) < minat) (minpos = pos + 1), (minat = ANHE_at (*minpos)); + if (pos + 2 < E && ANHE_at (pos [2]) < minat) (minpos = pos + 2), (minat = ANHE_at (*minpos)); + if (pos + 3 < E && ANHE_at (pos [3]) < minat) (minpos = pos + 3), (minat = ANHE_at (*minpos)); + } + else + break; + + if (ANHE_at (he) <= minat) + break; + + heap [k] = *minpos; + ev_active (ANHE_w (*minpos)) = k; + + k = minpos - heap; + } + + heap [k] = he; + ev_active (ANHE_w (he)) = k; +} + +#else /* 4HEAP */ + +#define HEAP0 1 +#define HPARENT(k) ((k) >> 1) +#define UPHEAP_DONE(p,k) (!(p)) + +/* away from the root */ +inline_speed void +downheap (ANHE *heap, int N, int k) +{ + ANHE he = heap [k]; + + for (;;) + { + int c = k << 1; + + if (c >= N + HEAP0) + break; + + c += c + 1 < N + HEAP0 && ANHE_at (heap [c]) > ANHE_at (heap [c + 1]) + ? 1 : 0; + + if (ANHE_at (he) <= ANHE_at (heap [c])) + break; + + heap [k] = heap [c]; + ev_active (ANHE_w (heap [k])) = k; + + k = c; + } + + heap [k] = he; + ev_active (ANHE_w (he)) = k; +} +#endif + +/* towards the root */ +inline_speed void +upheap (ANHE *heap, int k) +{ + ANHE he = heap [k]; + + for (;;) + { + int p = HPARENT (k); + + if (UPHEAP_DONE (p, k) || ANHE_at (heap [p]) <= ANHE_at (he)) + break; + + heap [k] = heap [p]; + ev_active (ANHE_w (heap [k])) = k; + k = p; + } + + heap [k] = he; + ev_active (ANHE_w (he)) = k; +} + +/* move an element suitably so it is in a correct place */ +inline_size void +adjustheap (ANHE *heap, int N, int k) +{ + if (k > HEAP0 && ANHE_at (heap [k]) <= ANHE_at (heap [HPARENT (k)])) + upheap (heap, k); + else + downheap (heap, N, k); +} + +/* rebuild the heap: this function is used only once and executed rarely */ +inline_size void +reheap (ANHE *heap, int N) +{ + int i; + + /* we don't use floyds algorithm, upheap is simpler and is more cache-efficient */ + /* also, this is easy to implement and correct for both 2-heaps and 4-heaps */ + for (i = 0; i < N; ++i) + upheap (heap, i + HEAP0); +} + +/*****************************************************************************/ + +/* associate signal watchers to a signal signal */ +typedef struct +{ + EV_ATOMIC_T pending; +#if EV_MULTIPLICITY + EV_P; +#endif + WL head; +} ANSIG; + +static ANSIG signals [EV_NSIG - 1]; + +/*****************************************************************************/ + +/* used to prepare libev internal fd's */ +/* this is not fork-safe */ +inline_speed void +fd_intern (int fd) +{ +#ifdef _WIN32 + unsigned long arg = 1; + ioctlsocket (_get_osfhandle (fd), FIONBIO, &arg); +#else + fcntl (fd, F_SETFD, FD_CLOEXEC); + fcntl (fd, F_SETFL, O_NONBLOCK); +#endif +} + +static void noinline +evpipe_init (EV_P) +{ + if (!ev_is_active (&pipe_w)) + { +#if EV_USE_EVENTFD + evfd = eventfd (0, EFD_NONBLOCK | EFD_CLOEXEC); + if (evfd < 0 && errno == EINVAL) + evfd = eventfd (0, 0); + + if (evfd >= 0) + { + evpipe [0] = -1; + fd_intern (evfd); /* doing it twice doesn't hurt */ + ev_io_set (&pipe_w, evfd, EV_READ); + } + else +#endif + { + while (pipe (evpipe)) + ev_syserr ("(libev) error creating signal/async pipe"); + + fd_intern (evpipe [0]); + fd_intern (evpipe [1]); + ev_io_set (&pipe_w, evpipe [0], EV_READ); + } + + ev_io_start (EV_A_ &pipe_w); + ev_unref (EV_A); /* watcher should not keep loop alive */ + } +} + +inline_size void +evpipe_write (EV_P_ EV_ATOMIC_T *flag) +{ + if (!*flag) + { + int old_errno = errno; /* save errno because write might clobber it */ + + *flag = 1; + +#if EV_USE_EVENTFD + if (evfd >= 0) + { + uint64_t counter = 1; + write (evfd, &counter, sizeof (uint64_t)); + } + else +#endif + write (evpipe [1], &old_errno, 1); + + errno = old_errno; + } +} + +/* called whenever the libev signal pipe */ +/* got some events (signal, async) */ +static void +pipecb (EV_P_ ev_io *iow, int revents) +{ + int i; + +#if EV_USE_EVENTFD + if (evfd >= 0) + { + uint64_t counter; + read (evfd, &counter, sizeof (uint64_t)); + } + else +#endif + { + char dummy; + read (evpipe [0], &dummy, 1); + } + + if (sig_pending) + { + sig_pending = 0; + + for (i = EV_NSIG - 1; i--; ) + if (expect_false (signals [i].pending)) + ev_feed_signal_event (EV_A_ i + 1); + } + +#if EV_ASYNC_ENABLE + if (async_pending) + { + async_pending = 0; + + for (i = asynccnt; i--; ) + if (asyncs [i]->sent) + { + asyncs [i]->sent = 0; + ev_feed_event (EV_A_ asyncs [i], EV_ASYNC); + } + } +#endif +} + +/*****************************************************************************/ + +static void +ev_sighandler (int signum) +{ +#if EV_MULTIPLICITY + EV_P = signals [signum - 1].loop; +#endif + +#if _WIN32 + signal (signum, ev_sighandler); +#endif + + signals [signum - 1].pending = 1; + evpipe_write (EV_A_ &sig_pending); +} + +void noinline +ev_feed_signal_event (EV_P_ int signum) +{ + WL w; + + if (expect_false (signum <= 0 || signum > EV_NSIG)) + return; + + --signum; + +#if EV_MULTIPLICITY + /* it is permissible to try to feed a signal to the wrong loop */ + /* or, likely more useful, feeding a signal nobody is waiting for */ + + if (expect_false (signals [signum].loop != EV_A)) + return; +#endif + + signals [signum].pending = 0; + + for (w = signals [signum].head; w; w = w->next) + ev_feed_event (EV_A_ (W)w, EV_SIGNAL); +} + +#if EV_USE_SIGNALFD +static void +sigfdcb (EV_P_ ev_io *iow, int revents) +{ + struct signalfd_siginfo si[2], *sip; /* these structs are big */ + + for (;;) + { + ssize_t res = read (sigfd, si, sizeof (si)); + + /* not ISO-C, as res might be -1, but works with SuS */ + for (sip = si; (char *)sip < (char *)si + res; ++sip) + ev_feed_signal_event (EV_A_ sip->ssi_signo); + + if (res < (ssize_t)sizeof (si)) + break; + } +} +#endif + +/*****************************************************************************/ + +static WL childs [EV_PID_HASHSIZE]; + +#ifndef _WIN32 + +static ev_signal childev; + +#ifndef WIFCONTINUED +# define WIFCONTINUED(status) 0 +#endif + +/* handle a single child status event */ +inline_speed void +child_reap (EV_P_ int chain, int pid, int status) +{ + ev_child *w; + int traced = WIFSTOPPED (status) || WIFCONTINUED (status); + + for (w = (ev_child *)childs [chain & (EV_PID_HASHSIZE - 1)]; w; w = (ev_child *)((WL)w)->next) + { + if ((w->pid == pid || !w->pid) + && (!traced || (w->flags & 1))) + { + ev_set_priority (w, EV_MAXPRI); /* need to do it *now*, this *must* be the same prio as the signal watcher itself */ + w->rpid = pid; + w->rstatus = status; + ev_feed_event (EV_A_ (W)w, EV_CHILD); + } + } +} + +#ifndef WCONTINUED +# define WCONTINUED 0 +#endif + +/* called on sigchld etc., calls waitpid */ +static void +childcb (EV_P_ ev_signal *sw, int revents) +{ + int pid, status; + + /* some systems define WCONTINUED but then fail to support it (linux 2.4) */ + if (0 >= (pid = waitpid (-1, &status, WNOHANG | WUNTRACED | WCONTINUED))) + if (!WCONTINUED + || errno != EINVAL + || 0 >= (pid = waitpid (-1, &status, WNOHANG | WUNTRACED))) + return; + + /* make sure we are called again until all children have been reaped */ + /* we need to do it this way so that the callback gets called before we continue */ + ev_feed_event (EV_A_ (W)sw, EV_SIGNAL); + + child_reap (EV_A_ pid, pid, status); + if (EV_PID_HASHSIZE > 1) + child_reap (EV_A_ 0, pid, status); /* this might trigger a watcher twice, but feed_event catches that */ +} + +#endif + +/*****************************************************************************/ + +#if EV_USE_PORT +# include "ev_port.c" +#endif +#if EV_USE_KQUEUE +# include "ev_kqueue.c" +#endif +#if EV_USE_EPOLL +# include "ev_epoll.c" +#endif +#if EV_USE_POLL +# include "ev_poll.c" +#endif +#if EV_USE_SELECT +# include "ev_select.c" +#endif + +int +ev_version_major (void) +{ + return EV_VERSION_MAJOR; +} + +int +ev_version_minor (void) +{ + return EV_VERSION_MINOR; +} + +/* return true if we are running with elevated privileges and should ignore env variables */ +int inline_size +enable_secure (void) +{ +#ifdef _WIN32 + return 0; +#else + return getuid () != geteuid () + || getgid () != getegid (); +#endif +} + +unsigned int +ev_supported_backends (void) +{ + unsigned int flags = 0; + + if (EV_USE_PORT ) flags |= EVBACKEND_PORT; + if (EV_USE_KQUEUE) flags |= EVBACKEND_KQUEUE; + if (EV_USE_EPOLL ) flags |= EVBACKEND_EPOLL; + if (EV_USE_POLL ) flags |= EVBACKEND_POLL; + if (EV_USE_SELECT) flags |= EVBACKEND_SELECT; + + return flags; +} + +unsigned int +ev_recommended_backends (void) +{ + unsigned int flags = ev_supported_backends (); + +#ifndef __NetBSD__ + /* kqueue is borked on everything but netbsd apparently */ + /* it usually doesn't work correctly on anything but sockets and pipes */ + flags &= ~EVBACKEND_KQUEUE; +#endif +#ifdef __APPLE__ + /* only select works correctly on that "unix-certified" platform */ + flags &= ~EVBACKEND_KQUEUE; /* horribly broken, even for sockets */ + flags &= ~EVBACKEND_POLL; /* poll is based on kqueue from 10.5 onwards */ +#endif + + return flags; +} + +unsigned int +ev_embeddable_backends (void) +{ + int flags = EVBACKEND_EPOLL | EVBACKEND_KQUEUE | EVBACKEND_PORT; + + /* epoll embeddability broken on all linux versions up to at least 2.6.23 */ + /* please fix it and tell me how to detect the fix */ + flags &= ~EVBACKEND_EPOLL; + + return flags; +} + +unsigned int +ev_backend (EV_P) +{ + return backend; +} + +#if EV_MINIMAL < 2 +unsigned int +ev_loop_count (EV_P) +{ + return loop_count; +} + +unsigned int +ev_loop_depth (EV_P) +{ + return loop_depth; +} + +void +ev_set_io_collect_interval (EV_P_ ev_tstamp interval) +{ + io_blocktime = interval; +} + +void +ev_set_timeout_collect_interval (EV_P_ ev_tstamp interval) +{ + timeout_blocktime = interval; +} + +void +ev_set_userdata (EV_P_ void *data) +{ + userdata = data; +} + +void * +ev_userdata (EV_P) +{ + return userdata; +} + +void ev_set_invoke_pending_cb (EV_P_ void (*invoke_pending_cb)(EV_P)) +{ + invoke_cb = invoke_pending_cb; +} + +void ev_set_loop_release_cb (EV_P_ void (*release)(EV_P), void (*acquire)(EV_P)) +{ + release_cb = release; + acquire_cb = acquire; +} +#endif + +/* initialise a loop structure, must be zero-initialised */ +static void noinline +loop_init (EV_P_ unsigned int flags) +{ + if (!backend) + { +#if EV_USE_REALTIME + if (!have_realtime) + { + struct timespec ts; + + if (!clock_gettime (CLOCK_REALTIME, &ts)) + have_realtime = 1; + } +#endif + +#if EV_USE_MONOTONIC + if (!have_monotonic) + { + struct timespec ts; + + if (!clock_gettime (CLOCK_MONOTONIC, &ts)) + have_monotonic = 1; + } +#endif + + /* pid check not overridable via env */ +#ifndef _WIN32 + if (flags & EVFLAG_FORKCHECK) + curpid = getpid (); +#endif + + if (!(flags & EVFLAG_NOENV) + && !enable_secure () + && getenv ("LIBEV_FLAGS")) + flags = atoi (getenv ("LIBEV_FLAGS")); + + ev_rt_now = ev_time (); + mn_now = get_clock (); + now_floor = mn_now; + rtmn_diff = ev_rt_now - mn_now; +#if EV_MINIMAL < 2 + invoke_cb = ev_invoke_pending; +#endif + + io_blocktime = 0.; + timeout_blocktime = 0.; + backend = 0; + backend_fd = -1; + sig_pending = 0; +#if EV_ASYNC_ENABLE + async_pending = 0; +#endif +#if EV_USE_INOTIFY + fs_fd = flags & EVFLAG_NOINOTIFY ? -1 : -2; +#endif +#if EV_USE_SIGNALFD + sigfd = flags & EVFLAG_SIGNALFD ? -2 : -1; +#endif + + if (!(flags & 0x0000ffffU)) + flags |= ev_recommended_backends (); + +#if EV_USE_PORT + if (!backend && (flags & EVBACKEND_PORT )) backend = port_init (EV_A_ flags); +#endif +#if EV_USE_KQUEUE + if (!backend && (flags & EVBACKEND_KQUEUE)) backend = kqueue_init (EV_A_ flags); +#endif +#if EV_USE_EPOLL + if (!backend && (flags & EVBACKEND_EPOLL )) backend = epoll_init (EV_A_ flags); +#endif +#if EV_USE_POLL + if (!backend && (flags & EVBACKEND_POLL )) backend = poll_init (EV_A_ flags); +#endif +#if EV_USE_SELECT + if (!backend && (flags & EVBACKEND_SELECT)) backend = select_init (EV_A_ flags); +#endif + + ev_prepare_init (&pending_w, pendingcb); + + ev_init (&pipe_w, pipecb); + ev_set_priority (&pipe_w, EV_MAXPRI); + } +} + +/* free up a loop structure */ +static void noinline +loop_destroy (EV_P) +{ + int i; + + if (ev_is_active (&pipe_w)) + { + /*ev_ref (EV_A);*/ + /*ev_io_stop (EV_A_ &pipe_w);*/ + +#if EV_USE_EVENTFD + if (evfd >= 0) + close (evfd); +#endif + + if (evpipe [0] >= 0) + { + EV_WIN32_CLOSE_FD (evpipe [0]); + EV_WIN32_CLOSE_FD (evpipe [1]); + } + } + +#if EV_USE_SIGNALFD + if (ev_is_active (&sigfd_w)) + close (sigfd); +#endif + +#if EV_USE_INOTIFY + if (fs_fd >= 0) + close (fs_fd); +#endif + + if (backend_fd >= 0) + close (backend_fd); + +#if EV_USE_PORT + if (backend == EVBACKEND_PORT ) port_destroy (EV_A); +#endif +#if EV_USE_KQUEUE + if (backend == EVBACKEND_KQUEUE) kqueue_destroy (EV_A); +#endif +#if EV_USE_EPOLL + if (backend == EVBACKEND_EPOLL ) epoll_destroy (EV_A); +#endif +#if EV_USE_POLL + if (backend == EVBACKEND_POLL ) poll_destroy (EV_A); +#endif +#if EV_USE_SELECT + if (backend == EVBACKEND_SELECT) select_destroy (EV_A); +#endif + + for (i = NUMPRI; i--; ) + { + array_free (pending, [i]); +#if EV_IDLE_ENABLE + array_free (idle, [i]); +#endif + } + + ev_free (anfds); anfds = 0; anfdmax = 0; + + /* have to use the microsoft-never-gets-it-right macro */ + array_free (rfeed, EMPTY); + array_free (fdchange, EMPTY); + array_free (timer, EMPTY); +#if EV_PERIODIC_ENABLE + array_free (periodic, EMPTY); +#endif +#if EV_FORK_ENABLE + array_free (fork, EMPTY); +#endif + array_free (prepare, EMPTY); + array_free (check, EMPTY); +#if EV_ASYNC_ENABLE + array_free (async, EMPTY); +#endif + + backend = 0; +} + +#if EV_USE_INOTIFY +inline_size void infy_fork (EV_P); +#endif + +inline_size void +loop_fork (EV_P) +{ +#if EV_USE_PORT + if (backend == EVBACKEND_PORT ) port_fork (EV_A); +#endif +#if EV_USE_KQUEUE + if (backend == EVBACKEND_KQUEUE) kqueue_fork (EV_A); +#endif +#if EV_USE_EPOLL + if (backend == EVBACKEND_EPOLL ) epoll_fork (EV_A); +#endif +#if EV_USE_INOTIFY + infy_fork (EV_A); +#endif + + if (ev_is_active (&pipe_w)) + { + /* this "locks" the handlers against writing to the pipe */ + /* while we modify the fd vars */ + sig_pending = 1; +#if EV_ASYNC_ENABLE + async_pending = 1; +#endif + + ev_ref (EV_A); + ev_io_stop (EV_A_ &pipe_w); + +#if EV_USE_EVENTFD + if (evfd >= 0) + close (evfd); +#endif + + if (evpipe [0] >= 0) + { + EV_WIN32_CLOSE_FD (evpipe [0]); + EV_WIN32_CLOSE_FD (evpipe [1]); + } + + evpipe_init (EV_A); + /* now iterate over everything, in case we missed something */ + pipecb (EV_A_ &pipe_w, EV_READ); + } + + postfork = 0; +} + +#if EV_MULTIPLICITY + +struct ev_loop * +ev_loop_new (unsigned int flags) +{ + EV_P = (struct ev_loop *)ev_malloc (sizeof (struct ev_loop)); + + memset (EV_A, 0, sizeof (struct ev_loop)); + loop_init (EV_A_ flags); + + if (ev_backend (EV_A)) + return EV_A; + + return 0; +} + +void +ev_loop_destroy (EV_P) +{ + loop_destroy (EV_A); + ev_free (loop); +} + +void +ev_loop_fork (EV_P) +{ + postfork = 1; /* must be in line with ev_default_fork */ +} +#endif /* multiplicity */ + +#if EV_VERIFY +static void noinline +verify_watcher (EV_P_ W w) +{ + assert (("libev: watcher has invalid priority", ABSPRI (w) >= 0 && ABSPRI (w) < NUMPRI)); + + if (w->pending) + assert (("libev: pending watcher not on pending queue", pendings [ABSPRI (w)][w->pending - 1].w == w)); +} + +static void noinline +verify_heap (EV_P_ ANHE *heap, int N) +{ + int i; + + for (i = HEAP0; i < N + HEAP0; ++i) + { + assert (("libev: active index mismatch in heap", ev_active (ANHE_w (heap [i])) == i)); + assert (("libev: heap condition violated", i == HEAP0 || ANHE_at (heap [HPARENT (i)]) <= ANHE_at (heap [i]))); + assert (("libev: heap at cache mismatch", ANHE_at (heap [i]) == ev_at (ANHE_w (heap [i])))); + + verify_watcher (EV_A_ (W)ANHE_w (heap [i])); + } +} + +static void noinline +array_verify (EV_P_ W *ws, int cnt) +{ + while (cnt--) + { + assert (("libev: active index mismatch", ev_active (ws [cnt]) == cnt + 1)); + verify_watcher (EV_A_ ws [cnt]); + } +} +#endif + +#if EV_MINIMAL < 2 +void +ev_loop_verify (EV_P) +{ +#if EV_VERIFY + int i; + WL w; + + assert (activecnt >= -1); + + assert (fdchangemax >= fdchangecnt); + for (i = 0; i < fdchangecnt; ++i) + assert (("libev: negative fd in fdchanges", fdchanges [i] >= 0)); + + assert (anfdmax >= 0); + for (i = 0; i < anfdmax; ++i) + for (w = anfds [i].head; w; w = w->next) + { + verify_watcher (EV_A_ (W)w); + assert (("libev: inactive fd watcher on anfd list", ev_active (w) == 1)); + assert (("libev: fd mismatch between watcher and anfd", ((ev_io *)w)->fd == i)); + } + + assert (timermax >= timercnt); + verify_heap (EV_A_ timers, timercnt); + +#if EV_PERIODIC_ENABLE + assert (periodicmax >= periodiccnt); + verify_heap (EV_A_ periodics, periodiccnt); +#endif + + for (i = NUMPRI; i--; ) + { + assert (pendingmax [i] >= pendingcnt [i]); +#if EV_IDLE_ENABLE + assert (idleall >= 0); + assert (idlemax [i] >= idlecnt [i]); + array_verify (EV_A_ (W *)idles [i], idlecnt [i]); +#endif + } + +#if EV_FORK_ENABLE + assert (forkmax >= forkcnt); + array_verify (EV_A_ (W *)forks, forkcnt); +#endif + +#if EV_ASYNC_ENABLE + assert (asyncmax >= asynccnt); + array_verify (EV_A_ (W *)asyncs, asynccnt); +#endif + + assert (preparemax >= preparecnt); + array_verify (EV_A_ (W *)prepares, preparecnt); + + assert (checkmax >= checkcnt); + array_verify (EV_A_ (W *)checks, checkcnt); + +# if 0 + for (w = (ev_child *)childs [chain & (EV_PID_HASHSIZE - 1)]; w; w = (ev_child *)((WL)w)->next) + for (signum = EV_NSIG; signum--; ) if (signals [signum].pending) +# endif +#endif +} +#endif + +#if EV_MULTIPLICITY +struct ev_loop * +ev_default_loop_init (unsigned int flags) +#else +int +ev_default_loop (unsigned int flags) +#endif +{ + if (!ev_default_loop_ptr) + { +#if EV_MULTIPLICITY + EV_P = ev_default_loop_ptr = &default_loop_struct; +#else + ev_default_loop_ptr = 1; +#endif + + loop_init (EV_A_ flags); + + if (ev_backend (EV_A)) + { +#ifndef _WIN32 + ev_signal_init (&childev, childcb, SIGCHLD); + ev_set_priority (&childev, EV_MAXPRI); + ev_signal_start (EV_A_ &childev); + ev_unref (EV_A); /* child watcher should not keep loop alive */ +#endif + } + else + ev_default_loop_ptr = 0; + } + + return ev_default_loop_ptr; +} + +void +ev_default_destroy (void) +{ +#if EV_MULTIPLICITY + EV_P = ev_default_loop_ptr; +#endif + + ev_default_loop_ptr = 0; + +#ifndef _WIN32 + ev_ref (EV_A); /* child watcher */ + ev_signal_stop (EV_A_ &childev); +#endif + + loop_destroy (EV_A); +} + +void +ev_default_fork (void) +{ +#if EV_MULTIPLICITY + EV_P = ev_default_loop_ptr; +#endif + + postfork = 1; /* must be in line with ev_loop_fork */ +} + +/*****************************************************************************/ + +void +ev_invoke (EV_P_ void *w, int revents) +{ + EV_CB_INVOKE ((W)w, revents); +} + +unsigned int +ev_pending_count (EV_P) +{ + int pri; + unsigned int count = 0; + + for (pri = NUMPRI; pri--; ) + count += pendingcnt [pri]; + + return count; +} + +void noinline +ev_invoke_pending (EV_P) +{ + int pri; + + for (pri = NUMPRI; pri--; ) + while (pendingcnt [pri]) + { + ANPENDING *p = pendings [pri] + --pendingcnt [pri]; + + /*assert (("libev: non-pending watcher on pending list", p->w->pending));*/ + /* ^ this is no longer true, as pending_w could be here */ + + p->w->pending = 0; + EV_CB_INVOKE (p->w, p->events); + EV_FREQUENT_CHECK; + } +} + +#if EV_IDLE_ENABLE +/* make idle watchers pending. this handles the "call-idle */ +/* only when higher priorities are idle" logic */ +inline_size void +idle_reify (EV_P) +{ + if (expect_false (idleall)) + { + int pri; + + for (pri = NUMPRI; pri--; ) + { + if (pendingcnt [pri]) + break; + + if (idlecnt [pri]) + { + queue_events (EV_A_ (W *)idles [pri], idlecnt [pri], EV_IDLE); + break; + } + } + } +} +#endif + +/* make timers pending */ +inline_size void +timers_reify (EV_P) +{ + EV_FREQUENT_CHECK; + + if (timercnt && ANHE_at (timers [HEAP0]) < mn_now) + { + do + { + ev_timer *w = (ev_timer *)ANHE_w (timers [HEAP0]); + + /*assert (("libev: inactive timer on timer heap detected", ev_is_active (w)));*/ + + /* first reschedule or stop timer */ + if (w->repeat) + { + ev_at (w) += w->repeat; + if (ev_at (w) < mn_now) + ev_at (w) = mn_now; + + assert (("libev: negative ev_timer repeat value found while processing timers", w->repeat > 0.)); + + ANHE_at_cache (timers [HEAP0]); + downheap (timers, timercnt, HEAP0); + } + else + ev_timer_stop (EV_A_ w); /* nonrepeating: stop timer */ + + EV_FREQUENT_CHECK; + feed_reverse (EV_A_ (W)w); + } + while (timercnt && ANHE_at (timers [HEAP0]) < mn_now); + + feed_reverse_done (EV_A_ EV_TIMEOUT); + } +} + +#if EV_PERIODIC_ENABLE +/* make periodics pending */ +inline_size void +periodics_reify (EV_P) +{ + EV_FREQUENT_CHECK; + + while (periodiccnt && ANHE_at (periodics [HEAP0]) < ev_rt_now) + { + int feed_count = 0; + + do + { + ev_periodic *w = (ev_periodic *)ANHE_w (periodics [HEAP0]); + + /*assert (("libev: inactive timer on periodic heap detected", ev_is_active (w)));*/ + + /* first reschedule or stop timer */ + if (w->reschedule_cb) + { + ev_at (w) = w->reschedule_cb (w, ev_rt_now); + + assert (("libev: ev_periodic reschedule callback returned time in the past", ev_at (w) >= ev_rt_now)); + + ANHE_at_cache (periodics [HEAP0]); + downheap (periodics, periodiccnt, HEAP0); + } + else if (w->interval) + { + ev_at (w) = w->offset + ceil ((ev_rt_now - w->offset) / w->interval) * w->interval; + /* if next trigger time is not sufficiently in the future, put it there */ + /* this might happen because of floating point inexactness */ + if (ev_at (w) - ev_rt_now < TIME_EPSILON) + { + ev_at (w) += w->interval; + + /* if interval is unreasonably low we might still have a time in the past */ + /* so correct this. this will make the periodic very inexact, but the user */ + /* has effectively asked to get triggered more often than possible */ + if (ev_at (w) < ev_rt_now) + ev_at (w) = ev_rt_now; + } + + ANHE_at_cache (periodics [HEAP0]); + downheap (periodics, periodiccnt, HEAP0); + } + else + ev_periodic_stop (EV_A_ w); /* nonrepeating: stop timer */ + + EV_FREQUENT_CHECK; + feed_reverse (EV_A_ (W)w); + } + while (periodiccnt && ANHE_at (periodics [HEAP0]) < ev_rt_now); + + feed_reverse_done (EV_A_ EV_PERIODIC); + } +} + +/* simply recalculate all periodics */ +/* TODO: maybe ensure that at leats one event happens when jumping forward? */ +static void noinline +periodics_reschedule (EV_P) +{ + int i; + + /* adjust periodics after time jump */ + for (i = HEAP0; i < periodiccnt + HEAP0; ++i) + { + ev_periodic *w = (ev_periodic *)ANHE_w (periodics [i]); + + if (w->reschedule_cb) + ev_at (w) = w->reschedule_cb (w, ev_rt_now); + else if (w->interval) + ev_at (w) = w->offset + ceil ((ev_rt_now - w->offset) / w->interval) * w->interval; + + ANHE_at_cache (periodics [i]); + } + + reheap (periodics, periodiccnt); +} +#endif + +/* adjust all timers by a given offset */ +static void noinline +timers_reschedule (EV_P_ ev_tstamp adjust) +{ + int i; + + for (i = 0; i < timercnt; ++i) + { + ANHE *he = timers + i + HEAP0; + ANHE_w (*he)->at += adjust; + ANHE_at_cache (*he); + } +} + +/* fetch new monotonic and realtime times from the kernel */ +/* also detetc if there was a timejump, and act accordingly */ +inline_speed void +time_update (EV_P_ ev_tstamp max_block) +{ +#if EV_USE_MONOTONIC + if (expect_true (have_monotonic)) + { + int i; + ev_tstamp odiff = rtmn_diff; + + mn_now = get_clock (); + + /* only fetch the realtime clock every 0.5*MIN_TIMEJUMP seconds */ + /* interpolate in the meantime */ + if (expect_true (mn_now - now_floor < MIN_TIMEJUMP * .5)) + { + ev_rt_now = rtmn_diff + mn_now; + return; + } + + now_floor = mn_now; + ev_rt_now = ev_time (); + + /* loop a few times, before making important decisions. + * on the choice of "4": one iteration isn't enough, + * in case we get preempted during the calls to + * ev_time and get_clock. a second call is almost guaranteed + * to succeed in that case, though. and looping a few more times + * doesn't hurt either as we only do this on time-jumps or + * in the unlikely event of having been preempted here. + */ + for (i = 4; --i; ) + { + rtmn_diff = ev_rt_now - mn_now; + + if (expect_true (fabs (odiff - rtmn_diff) < MIN_TIMEJUMP)) + return; /* all is well */ + + ev_rt_now = ev_time (); + mn_now = get_clock (); + now_floor = mn_now; + } + + /* no timer adjustment, as the monotonic clock doesn't jump */ + /* timers_reschedule (EV_A_ rtmn_diff - odiff) */ +# if EV_PERIODIC_ENABLE + periodics_reschedule (EV_A); +# endif + } + else +#endif + { + ev_rt_now = ev_time (); + + if (expect_false (mn_now > ev_rt_now || ev_rt_now > mn_now + max_block + MIN_TIMEJUMP)) + { + /* adjust timers. this is easy, as the offset is the same for all of them */ + timers_reschedule (EV_A_ ev_rt_now - mn_now); +#if EV_PERIODIC_ENABLE + periodics_reschedule (EV_A); +#endif + } + + mn_now = ev_rt_now; + } +} + +void +ev_loop (EV_P_ int flags) +{ +#if EV_MINIMAL < 2 + ++loop_depth; +#endif + + assert (("libev: ev_loop recursion during release detected", loop_done != EVUNLOOP_RECURSE)); + + loop_done = EVUNLOOP_CANCEL; + + EV_INVOKE_PENDING; /* in case we recurse, ensure ordering stays nice and clean */ + + do + { +#if EV_VERIFY >= 2 + ev_loop_verify (EV_A); +#endif + +#ifndef _WIN32 + if (expect_false (curpid)) /* penalise the forking check even more */ + if (expect_false (getpid () != curpid)) + { + curpid = getpid (); + postfork = 1; + } +#endif + +#if EV_FORK_ENABLE + /* we might have forked, so queue fork handlers */ + if (expect_false (postfork)) + if (forkcnt) + { + queue_events (EV_A_ (W *)forks, forkcnt, EV_FORK); + EV_INVOKE_PENDING; + } +#endif + + /* queue prepare watchers (and execute them) */ + if (expect_false (preparecnt)) + { + queue_events (EV_A_ (W *)prepares, preparecnt, EV_PREPARE); + EV_INVOKE_PENDING; + } + + if (expect_false (loop_done)) + break; + + /* we might have forked, so reify kernel state if necessary */ + if (expect_false (postfork)) + loop_fork (EV_A); + + /* update fd-related kernel structures */ + fd_reify (EV_A); + + /* calculate blocking time */ + { + ev_tstamp waittime = 0.; + ev_tstamp sleeptime = 0.; + + if (expect_true (!(flags & EVLOOP_NONBLOCK || idleall || !activecnt))) + { + /* remember old timestamp for io_blocktime calculation */ + ev_tstamp prev_mn_now = mn_now; + + /* update time to cancel out callback processing overhead */ + time_update (EV_A_ 1e100); + + waittime = MAX_BLOCKTIME; + + if (timercnt) + { + ev_tstamp to = ANHE_at (timers [HEAP0]) - mn_now + backend_fudge; + if (waittime > to) waittime = to; + } + +#if EV_PERIODIC_ENABLE + if (periodiccnt) + { + ev_tstamp to = ANHE_at (periodics [HEAP0]) - ev_rt_now + backend_fudge; + if (waittime > to) waittime = to; + } +#endif + + /* don't let timeouts decrease the waittime below timeout_blocktime */ + if (expect_false (waittime < timeout_blocktime)) + waittime = timeout_blocktime; + + /* extra check because io_blocktime is commonly 0 */ + if (expect_false (io_blocktime)) + { + sleeptime = io_blocktime - (mn_now - prev_mn_now); + + if (sleeptime > waittime - backend_fudge) + sleeptime = waittime - backend_fudge; + + if (expect_true (sleeptime > 0.)) + { + ev_sleep (sleeptime); + waittime -= sleeptime; + } + } + } + +#if EV_MINIMAL < 2 + ++loop_count; +#endif + assert ((loop_done = EVUNLOOP_RECURSE, 1)); /* assert for side effect */ + backend_poll (EV_A_ waittime); + assert ((loop_done = EVUNLOOP_CANCEL, 1)); /* assert for side effect */ + + /* update ev_rt_now, do magic */ + time_update (EV_A_ waittime + sleeptime); + } + + /* queue pending timers and reschedule them */ + timers_reify (EV_A); /* relative timers called last */ +#if EV_PERIODIC_ENABLE + periodics_reify (EV_A); /* absolute timers called first */ +#endif + +#if EV_IDLE_ENABLE + /* queue idle watchers unless other events are pending */ + idle_reify (EV_A); +#endif + + /* queue check watchers, to be executed first */ + if (expect_false (checkcnt)) + queue_events (EV_A_ (W *)checks, checkcnt, EV_CHECK); + + EV_INVOKE_PENDING; + } + while (expect_true ( + activecnt + && !loop_done + && !(flags & (EVLOOP_ONESHOT | EVLOOP_NONBLOCK)) + )); + + if (loop_done == EVUNLOOP_ONE) + loop_done = EVUNLOOP_CANCEL; + +#if EV_MINIMAL < 2 + --loop_depth; +#endif +} + +void +ev_unloop (EV_P_ int how) +{ + loop_done = how; +} + +void +ev_ref (EV_P) +{ + ++activecnt; +} + +void +ev_unref (EV_P) +{ + --activecnt; +} + +void +ev_now_update (EV_P) +{ + time_update (EV_A_ 1e100); +} + +void +ev_suspend (EV_P) +{ + ev_now_update (EV_A); +} + +void +ev_resume (EV_P) +{ + ev_tstamp mn_prev = mn_now; + + ev_now_update (EV_A); + timers_reschedule (EV_A_ mn_now - mn_prev); +#if EV_PERIODIC_ENABLE + /* TODO: really do this? */ + periodics_reschedule (EV_A); +#endif +} + +/*****************************************************************************/ +/* singly-linked list management, used when the expected list length is short */ + +inline_size void +wlist_add (WL *head, WL elem) +{ + elem->next = *head; + *head = elem; +} + +inline_size void +wlist_del (WL *head, WL elem) +{ + while (*head) + { + if (expect_true (*head == elem)) + { + *head = elem->next; + break; + } + + head = &(*head)->next; + } +} + +/* internal, faster, version of ev_clear_pending */ +inline_speed void +clear_pending (EV_P_ W w) +{ + if (w->pending) + { + pendings [ABSPRI (w)][w->pending - 1].w = (W)&pending_w; + w->pending = 0; + } +} + +int +ev_clear_pending (EV_P_ void *w) +{ + W w_ = (W)w; + int pending = w_->pending; + + if (expect_true (pending)) + { + ANPENDING *p = pendings [ABSPRI (w_)] + pending - 1; + p->w = (W)&pending_w; + w_->pending = 0; + return p->events; + } + else + return 0; +} + +inline_size void +pri_adjust (EV_P_ W w) +{ + int pri = ev_priority (w); + pri = pri < EV_MINPRI ? EV_MINPRI : pri; + pri = pri > EV_MAXPRI ? EV_MAXPRI : pri; + ev_set_priority (w, pri); +} + +inline_speed void +ev_start (EV_P_ W w, int active) +{ + pri_adjust (EV_A_ w); + w->active = active; + ev_ref (EV_A); +} + +inline_size void +ev_stop (EV_P_ W w) +{ + ev_unref (EV_A); + w->active = 0; +} + +/*****************************************************************************/ + +void noinline +ev_io_start (EV_P_ ev_io *w) +{ + int fd = w->fd; + + if (expect_false (ev_is_active (w))) + return; + + assert (("libev: ev_io_start called with negative fd", fd >= 0)); + assert (("libev: ev_io start called with illegal event mask", !(w->events & ~(EV__IOFDSET | EV_READ | EV_WRITE)))); + + EV_FREQUENT_CHECK; + + ev_start (EV_A_ (W)w, 1); + array_needsize (ANFD, anfds, anfdmax, fd + 1, array_init_zero); + wlist_add (&anfds[fd].head, (WL)w); + + fd_change (EV_A_ fd, w->events & EV__IOFDSET | EV_ANFD_REIFY); + w->events &= ~EV__IOFDSET; + + EV_FREQUENT_CHECK; +} + +void noinline +ev_io_stop (EV_P_ ev_io *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + assert (("libev: ev_io_stop called with illegal fd (must stay constant after start!)", w->fd >= 0 && w->fd < anfdmax)); + + EV_FREQUENT_CHECK; + + wlist_del (&anfds[w->fd].head, (WL)w); + ev_stop (EV_A_ (W)w); + + fd_change (EV_A_ w->fd, 1); + + EV_FREQUENT_CHECK; +} + +void noinline +ev_timer_start (EV_P_ ev_timer *w) +{ + if (expect_false (ev_is_active (w))) + return; + + ev_at (w) += mn_now; + + assert (("libev: ev_timer_start called with negative timer repeat value", w->repeat >= 0.)); + + EV_FREQUENT_CHECK; + + ++timercnt; + ev_start (EV_A_ (W)w, timercnt + HEAP0 - 1); + array_needsize (ANHE, timers, timermax, ev_active (w) + 1, EMPTY2); + ANHE_w (timers [ev_active (w)]) = (WT)w; + ANHE_at_cache (timers [ev_active (w)]); + upheap (timers, ev_active (w)); + + EV_FREQUENT_CHECK; + + /*assert (("libev: internal timer heap corruption", timers [ev_active (w)] == (WT)w));*/ +} + +void noinline +ev_timer_stop (EV_P_ ev_timer *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + { + int active = ev_active (w); + + assert (("libev: internal timer heap corruption", ANHE_w (timers [active]) == (WT)w)); + + --timercnt; + + if (expect_true (active < timercnt + HEAP0)) + { + timers [active] = timers [timercnt + HEAP0]; + adjustheap (timers, timercnt, active); + } + } + + EV_FREQUENT_CHECK; + + ev_at (w) -= mn_now; + + ev_stop (EV_A_ (W)w); +} + +void noinline +ev_timer_again (EV_P_ ev_timer *w) +{ + EV_FREQUENT_CHECK; + + if (ev_is_active (w)) + { + if (w->repeat) + { + ev_at (w) = mn_now + w->repeat; + ANHE_at_cache (timers [ev_active (w)]); + adjustheap (timers, timercnt, ev_active (w)); + } + else + ev_timer_stop (EV_A_ w); + } + else if (w->repeat) + { + ev_at (w) = w->repeat; + ev_timer_start (EV_A_ w); + } + + EV_FREQUENT_CHECK; +} + +ev_tstamp +ev_timer_remaining (EV_P_ ev_timer *w) +{ + return ev_at (w) - (ev_is_active (w) ? mn_now : 0.); +} + +#if EV_PERIODIC_ENABLE +void noinline +ev_periodic_start (EV_P_ ev_periodic *w) +{ + if (expect_false (ev_is_active (w))) + return; + + if (w->reschedule_cb) + ev_at (w) = w->reschedule_cb (w, ev_rt_now); + else if (w->interval) + { + assert (("libev: ev_periodic_start called with negative interval value", w->interval >= 0.)); + /* this formula differs from the one in periodic_reify because we do not always round up */ + ev_at (w) = w->offset + ceil ((ev_rt_now - w->offset) / w->interval) * w->interval; + } + else + ev_at (w) = w->offset; + + EV_FREQUENT_CHECK; + + ++periodiccnt; + ev_start (EV_A_ (W)w, periodiccnt + HEAP0 - 1); + array_needsize (ANHE, periodics, periodicmax, ev_active (w) + 1, EMPTY2); + ANHE_w (periodics [ev_active (w)]) = (WT)w; + ANHE_at_cache (periodics [ev_active (w)]); + upheap (periodics, ev_active (w)); + + EV_FREQUENT_CHECK; + + /*assert (("libev: internal periodic heap corruption", ANHE_w (periodics [ev_active (w)]) == (WT)w));*/ +} + +void noinline +ev_periodic_stop (EV_P_ ev_periodic *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + { + int active = ev_active (w); + + assert (("libev: internal periodic heap corruption", ANHE_w (periodics [active]) == (WT)w)); + + --periodiccnt; + + if (expect_true (active < periodiccnt + HEAP0)) + { + periodics [active] = periodics [periodiccnt + HEAP0]; + adjustheap (periodics, periodiccnt, active); + } + } + + EV_FREQUENT_CHECK; + + ev_stop (EV_A_ (W)w); +} + +void noinline +ev_periodic_again (EV_P_ ev_periodic *w) +{ + /* TODO: use adjustheap and recalculation */ + ev_periodic_stop (EV_A_ w); + ev_periodic_start (EV_A_ w); +} +#endif + +#ifndef SA_RESTART +# define SA_RESTART 0 +#endif + +void noinline +ev_signal_start (EV_P_ ev_signal *w) +{ + if (expect_false (ev_is_active (w))) + return; + + assert (("libev: ev_signal_start called with illegal signal number", w->signum > 0 && w->signum < EV_NSIG)); + +#if EV_MULTIPLICITY + assert (("libev: a signal must not be attached to two different loops", + !signals [w->signum - 1].loop || signals [w->signum - 1].loop == loop)); + + signals [w->signum - 1].loop = EV_A; +#endif + + EV_FREQUENT_CHECK; + +#if EV_USE_SIGNALFD + if (sigfd == -2) + { + sigfd = signalfd (-1, &sigfd_set, SFD_NONBLOCK | SFD_CLOEXEC); + if (sigfd < 0 && errno == EINVAL) + sigfd = signalfd (-1, &sigfd_set, 0); /* retry without flags */ + + if (sigfd >= 0) + { + fd_intern (sigfd); /* doing it twice will not hurt */ + + sigemptyset (&sigfd_set); + + ev_io_init (&sigfd_w, sigfdcb, sigfd, EV_READ); + ev_set_priority (&sigfd_w, EV_MAXPRI); + ev_io_start (EV_A_ &sigfd_w); + ev_unref (EV_A); /* signalfd watcher should not keep loop alive */ + } + } + + if (sigfd >= 0) + { + /* TODO: check .head */ + sigaddset (&sigfd_set, w->signum); + sigprocmask (SIG_BLOCK, &sigfd_set, 0); + + signalfd (sigfd, &sigfd_set, 0); + } +#endif + + ev_start (EV_A_ (W)w, 1); + wlist_add (&signals [w->signum - 1].head, (WL)w); + + if (!((WL)w)->next) +# if EV_USE_SIGNALFD + if (sigfd < 0) /*TODO*/ +# endif + { +# if _WIN32 + evpipe_init (EV_A); + + signal (w->signum, ev_sighandler); +# else + struct sigaction sa; + + evpipe_init (EV_A); + + sa.sa_handler = ev_sighandler; + sigfillset (&sa.sa_mask); + sa.sa_flags = SA_RESTART; /* if restarting works we save one iteration */ + sigaction (w->signum, &sa, 0); + + sigemptyset (&sa.sa_mask); + sigaddset (&sa.sa_mask, w->signum); + sigprocmask (SIG_UNBLOCK, &sa.sa_mask, 0); +#endif + } + + EV_FREQUENT_CHECK; +} + +void noinline +ev_signal_stop (EV_P_ ev_signal *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + wlist_del (&signals [w->signum - 1].head, (WL)w); + ev_stop (EV_A_ (W)w); + + if (!signals [w->signum - 1].head) + { +#if EV_MULTIPLICITY + signals [w->signum - 1].loop = 0; /* unattach from signal */ +#endif +#if EV_USE_SIGNALFD + if (sigfd >= 0) + { + sigset_t ss; + + sigemptyset (&ss); + sigaddset (&ss, w->signum); + sigdelset (&sigfd_set, w->signum); + + signalfd (sigfd, &sigfd_set, 0); + sigprocmask (SIG_UNBLOCK, &ss, 0); + } + else +#endif + signal (w->signum, SIG_DFL); + } + + EV_FREQUENT_CHECK; +} + +void +ev_child_start (EV_P_ ev_child *w) +{ +#if EV_MULTIPLICITY + assert (("libev: child watchers are only supported in the default loop", loop == ev_default_loop_ptr)); +#endif + if (expect_false (ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + ev_start (EV_A_ (W)w, 1); + wlist_add (&childs [w->pid & (EV_PID_HASHSIZE - 1)], (WL)w); + + EV_FREQUENT_CHECK; +} + +void +ev_child_stop (EV_P_ ev_child *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + wlist_del (&childs [w->pid & (EV_PID_HASHSIZE - 1)], (WL)w); + ev_stop (EV_A_ (W)w); + + EV_FREQUENT_CHECK; +} + +#if EV_STAT_ENABLE + +# ifdef _WIN32 +# undef lstat +# define lstat(a,b) _stati64 (a,b) +# endif + +#define DEF_STAT_INTERVAL 5.0074891 +#define NFS_STAT_INTERVAL 30.1074891 /* for filesystems potentially failing inotify */ +#define MIN_STAT_INTERVAL 0.1074891 + +static void noinline stat_timer_cb (EV_P_ ev_timer *w_, int revents); + +#if EV_USE_INOTIFY +# define EV_INOTIFY_BUFSIZE 8192 + +static void noinline +infy_add (EV_P_ ev_stat *w) +{ + w->wd = inotify_add_watch (fs_fd, w->path, IN_ATTRIB | IN_DELETE_SELF | IN_MOVE_SELF | IN_MODIFY | IN_DONT_FOLLOW | IN_MASK_ADD); + + if (w->wd >= 0) + { + struct statfs sfs; + + /* now local changes will be tracked by inotify, but remote changes won't */ + /* unless the filesystem is known to be local, we therefore still poll */ + /* also do poll on <2.6.25, but with normal frequency */ + + if (!fs_2625) + w->timer.repeat = w->interval ? w->interval : DEF_STAT_INTERVAL; + else if (!statfs (w->path, &sfs) + && (sfs.f_type == 0x1373 /* devfs */ + || sfs.f_type == 0xEF53 /* ext2/3 */ + || sfs.f_type == 0x3153464a /* jfs */ + || sfs.f_type == 0x52654973 /* reiser3 */ + || sfs.f_type == 0x01021994 /* tempfs */ + || sfs.f_type == 0x58465342 /* xfs */)) + w->timer.repeat = 0.; /* filesystem is local, kernel new enough */ + else + w->timer.repeat = w->interval ? w->interval : NFS_STAT_INTERVAL; /* remote, use reduced frequency */ + } + else + { + /* can't use inotify, continue to stat */ + w->timer.repeat = w->interval ? w->interval : DEF_STAT_INTERVAL; + + /* if path is not there, monitor some parent directory for speedup hints */ + /* note that exceeding the hardcoded path limit is not a correctness issue, */ + /* but an efficiency issue only */ + if ((errno == ENOENT || errno == EACCES) && strlen (w->path) < 4096) + { + char path [4096]; + strcpy (path, w->path); + + do + { + int mask = IN_MASK_ADD | IN_DELETE_SELF | IN_MOVE_SELF + | (errno == EACCES ? IN_ATTRIB : IN_CREATE | IN_MOVED_TO); + + char *pend = strrchr (path, '/'); + + if (!pend || pend == path) + break; + + *pend = 0; + w->wd = inotify_add_watch (fs_fd, path, mask); + } + while (w->wd < 0 && (errno == ENOENT || errno == EACCES)); + } + } + + if (w->wd >= 0) + wlist_add (&fs_hash [w->wd & (EV_INOTIFY_HASHSIZE - 1)].head, (WL)w); + + /* now re-arm timer, if required */ + if (ev_is_active (&w->timer)) ev_ref (EV_A); + ev_timer_again (EV_A_ &w->timer); + if (ev_is_active (&w->timer)) ev_unref (EV_A); +} + +static void noinline +infy_del (EV_P_ ev_stat *w) +{ + int slot; + int wd = w->wd; + + if (wd < 0) + return; + + w->wd = -2; + slot = wd & (EV_INOTIFY_HASHSIZE - 1); + wlist_del (&fs_hash [slot].head, (WL)w); + + /* remove this watcher, if others are watching it, they will rearm */ + inotify_rm_watch (fs_fd, wd); +} + +static void noinline +infy_wd (EV_P_ int slot, int wd, struct inotify_event *ev) +{ + if (slot < 0) + /* overflow, need to check for all hash slots */ + for (slot = 0; slot < EV_INOTIFY_HASHSIZE; ++slot) + infy_wd (EV_A_ slot, wd, ev); + else + { + WL w_; + + for (w_ = fs_hash [slot & (EV_INOTIFY_HASHSIZE - 1)].head; w_; ) + { + ev_stat *w = (ev_stat *)w_; + w_ = w_->next; /* lets us remove this watcher and all before it */ + + if (w->wd == wd || wd == -1) + { + if (ev->mask & (IN_IGNORED | IN_UNMOUNT | IN_DELETE_SELF)) + { + wlist_del (&fs_hash [slot & (EV_INOTIFY_HASHSIZE - 1)].head, (WL)w); + w->wd = -1; + infy_add (EV_A_ w); /* re-add, no matter what */ + } + + stat_timer_cb (EV_A_ &w->timer, 0); + } + } + } +} + +static void +infy_cb (EV_P_ ev_io *w, int revents) +{ + char buf [EV_INOTIFY_BUFSIZE]; + struct inotify_event *ev = (struct inotify_event *)buf; + int ofs; + int len = read (fs_fd, buf, sizeof (buf)); + + for (ofs = 0; ofs < len; ofs += sizeof (struct inotify_event) + ev->len) + infy_wd (EV_A_ ev->wd, ev->wd, ev); +} + +inline_size void +check_2625 (EV_P) +{ + /* kernels < 2.6.25 are borked + * http://www.ussg.indiana.edu/hypermail/linux/kernel/0711.3/1208.html + */ + struct utsname buf; + int major, minor, micro; + + if (uname (&buf)) + return; + + if (sscanf (buf.release, "%d.%d.%d", &major, &minor, µ) != 3) + return; + + if (major < 2 + || (major == 2 && minor < 6) + || (major == 2 && minor == 6 && micro < 25)) + return; + + fs_2625 = 1; +} + +inline_size int +infy_newfd (void) +{ +#if defined (IN_CLOEXEC) && defined (IN_NONBLOCK) + int fd = inotify_init1 (IN_CLOEXEC | IN_NONBLOCK); + if (fd >= 0) + return fd; +#endif + return inotify_init (); +} + +inline_size void +infy_init (EV_P) +{ + if (fs_fd != -2) + return; + + fs_fd = -1; + + check_2625 (EV_A); + + fs_fd = infy_newfd (); + + if (fs_fd >= 0) + { + fd_intern (fs_fd); + ev_io_init (&fs_w, infy_cb, fs_fd, EV_READ); + ev_set_priority (&fs_w, EV_MAXPRI); + ev_io_start (EV_A_ &fs_w); + ev_unref (EV_A); + } +} + +inline_size void +infy_fork (EV_P) +{ + int slot; + + if (fs_fd < 0) + return; + + ev_ref (EV_A); + ev_io_stop (EV_A_ &fs_w); + close (fs_fd); + fs_fd = infy_newfd (); + + if (fs_fd >= 0) + { + fd_intern (fs_fd); + ev_io_set (&fs_w, fs_fd, EV_READ); + ev_io_start (EV_A_ &fs_w); + ev_unref (EV_A); + } + + for (slot = 0; slot < EV_INOTIFY_HASHSIZE; ++slot) + { + WL w_ = fs_hash [slot].head; + fs_hash [slot].head = 0; + + while (w_) + { + ev_stat *w = (ev_stat *)w_; + w_ = w_->next; /* lets us add this watcher */ + + w->wd = -1; + + if (fs_fd >= 0) + infy_add (EV_A_ w); /* re-add, no matter what */ + else + { + w->timer.repeat = w->interval ? w->interval : DEF_STAT_INTERVAL; + if (ev_is_active (&w->timer)) ev_ref (EV_A); + ev_timer_again (EV_A_ &w->timer); + if (ev_is_active (&w->timer)) ev_unref (EV_A); + } + } + } +} + +#endif + +#ifdef _WIN32 +# define EV_LSTAT(p,b) _stati64 (p, b) +#else +# define EV_LSTAT(p,b) lstat (p, b) +#endif + +void +ev_stat_stat (EV_P_ ev_stat *w) +{ + if (lstat (w->path, &w->attr) < 0) + w->attr.st_nlink = 0; + else if (!w->attr.st_nlink) + w->attr.st_nlink = 1; +} + +static void noinline +stat_timer_cb (EV_P_ ev_timer *w_, int revents) +{ + ev_stat *w = (ev_stat *)(((char *)w_) - offsetof (ev_stat, timer)); + + ev_statdata prev = w->attr; + ev_stat_stat (EV_A_ w); + + /* memcmp doesn't work on netbsd, they.... do stuff to their struct stat */ + if ( + prev.st_dev != w->attr.st_dev + || prev.st_ino != w->attr.st_ino + || prev.st_mode != w->attr.st_mode + || prev.st_nlink != w->attr.st_nlink + || prev.st_uid != w->attr.st_uid + || prev.st_gid != w->attr.st_gid + || prev.st_rdev != w->attr.st_rdev + || prev.st_size != w->attr.st_size + || prev.st_atime != w->attr.st_atime + || prev.st_mtime != w->attr.st_mtime + || prev.st_ctime != w->attr.st_ctime + ) { + /* we only update w->prev on actual differences */ + /* in case we test more often than invoke the callback, */ + /* to ensure that prev is always different to attr */ + w->prev = prev; + + #if EV_USE_INOTIFY + if (fs_fd >= 0) + { + infy_del (EV_A_ w); + infy_add (EV_A_ w); + ev_stat_stat (EV_A_ w); /* avoid race... */ + } + #endif + + ev_feed_event (EV_A_ w, EV_STAT); + } +} + +void +ev_stat_start (EV_P_ ev_stat *w) +{ + if (expect_false (ev_is_active (w))) + return; + + ev_stat_stat (EV_A_ w); + + if (w->interval < MIN_STAT_INTERVAL && w->interval) + w->interval = MIN_STAT_INTERVAL; + + ev_timer_init (&w->timer, stat_timer_cb, 0., w->interval ? w->interval : DEF_STAT_INTERVAL); + ev_set_priority (&w->timer, ev_priority (w)); + +#if EV_USE_INOTIFY + infy_init (EV_A); + + if (fs_fd >= 0) + infy_add (EV_A_ w); + else +#endif + { + ev_timer_again (EV_A_ &w->timer); + ev_unref (EV_A); + } + + ev_start (EV_A_ (W)w, 1); + + EV_FREQUENT_CHECK; +} + +void +ev_stat_stop (EV_P_ ev_stat *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + +#if EV_USE_INOTIFY + infy_del (EV_A_ w); +#endif + + if (ev_is_active (&w->timer)) + { + ev_ref (EV_A); + ev_timer_stop (EV_A_ &w->timer); + } + + ev_stop (EV_A_ (W)w); + + EV_FREQUENT_CHECK; +} +#endif + +#if EV_IDLE_ENABLE +void +ev_idle_start (EV_P_ ev_idle *w) +{ + if (expect_false (ev_is_active (w))) + return; + + pri_adjust (EV_A_ (W)w); + + EV_FREQUENT_CHECK; + + { + int active = ++idlecnt [ABSPRI (w)]; + + ++idleall; + ev_start (EV_A_ (W)w, active); + + array_needsize (ev_idle *, idles [ABSPRI (w)], idlemax [ABSPRI (w)], active, EMPTY2); + idles [ABSPRI (w)][active - 1] = w; + } + + EV_FREQUENT_CHECK; +} + +void +ev_idle_stop (EV_P_ ev_idle *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + { + int active = ev_active (w); + + idles [ABSPRI (w)][active - 1] = idles [ABSPRI (w)][--idlecnt [ABSPRI (w)]]; + ev_active (idles [ABSPRI (w)][active - 1]) = active; + + ev_stop (EV_A_ (W)w); + --idleall; + } + + EV_FREQUENT_CHECK; +} +#endif + +void +ev_prepare_start (EV_P_ ev_prepare *w) +{ + if (expect_false (ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + ev_start (EV_A_ (W)w, ++preparecnt); + array_needsize (ev_prepare *, prepares, preparemax, preparecnt, EMPTY2); + prepares [preparecnt - 1] = w; + + EV_FREQUENT_CHECK; +} + +void +ev_prepare_stop (EV_P_ ev_prepare *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + { + int active = ev_active (w); + + prepares [active - 1] = prepares [--preparecnt]; + ev_active (prepares [active - 1]) = active; + } + + ev_stop (EV_A_ (W)w); + + EV_FREQUENT_CHECK; +} + +void +ev_check_start (EV_P_ ev_check *w) +{ + if (expect_false (ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + ev_start (EV_A_ (W)w, ++checkcnt); + array_needsize (ev_check *, checks, checkmax, checkcnt, EMPTY2); + checks [checkcnt - 1] = w; + + EV_FREQUENT_CHECK; +} + +void +ev_check_stop (EV_P_ ev_check *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + { + int active = ev_active (w); + + checks [active - 1] = checks [--checkcnt]; + ev_active (checks [active - 1]) = active; + } + + ev_stop (EV_A_ (W)w); + + EV_FREQUENT_CHECK; +} + +#if EV_EMBED_ENABLE +void noinline +ev_embed_sweep (EV_P_ ev_embed *w) +{ + ev_loop (w->other, EVLOOP_NONBLOCK); +} + +static void +embed_io_cb (EV_P_ ev_io *io, int revents) +{ + ev_embed *w = (ev_embed *)(((char *)io) - offsetof (ev_embed, io)); + + if (ev_cb (w)) + ev_feed_event (EV_A_ (W)w, EV_EMBED); + else + ev_loop (w->other, EVLOOP_NONBLOCK); +} + +static void +embed_prepare_cb (EV_P_ ev_prepare *prepare, int revents) +{ + ev_embed *w = (ev_embed *)(((char *)prepare) - offsetof (ev_embed, prepare)); + + { + EV_P = w->other; + + while (fdchangecnt) + { + fd_reify (EV_A); + ev_loop (EV_A_ EVLOOP_NONBLOCK); + } + } +} + +static void +embed_fork_cb (EV_P_ ev_fork *fork_w, int revents) +{ + ev_embed *w = (ev_embed *)(((char *)fork_w) - offsetof (ev_embed, fork)); + + ev_embed_stop (EV_A_ w); + + { + EV_P = w->other; + + ev_loop_fork (EV_A); + ev_loop (EV_A_ EVLOOP_NONBLOCK); + } + + ev_embed_start (EV_A_ w); +} + +#if 0 +static void +embed_idle_cb (EV_P_ ev_idle *idle, int revents) +{ + ev_idle_stop (EV_A_ idle); +} +#endif + +void +ev_embed_start (EV_P_ ev_embed *w) +{ + if (expect_false (ev_is_active (w))) + return; + + { + EV_P = w->other; + assert (("libev: loop to be embedded is not embeddable", backend & ev_embeddable_backends ())); + ev_io_init (&w->io, embed_io_cb, backend_fd, EV_READ); + } + + EV_FREQUENT_CHECK; + + ev_set_priority (&w->io, ev_priority (w)); + ev_io_start (EV_A_ &w->io); + + ev_prepare_init (&w->prepare, embed_prepare_cb); + ev_set_priority (&w->prepare, EV_MINPRI); + ev_prepare_start (EV_A_ &w->prepare); + + ev_fork_init (&w->fork, embed_fork_cb); + ev_fork_start (EV_A_ &w->fork); + + /*ev_idle_init (&w->idle, e,bed_idle_cb);*/ + + ev_start (EV_A_ (W)w, 1); + + EV_FREQUENT_CHECK; +} + +void +ev_embed_stop (EV_P_ ev_embed *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + ev_io_stop (EV_A_ &w->io); + ev_prepare_stop (EV_A_ &w->prepare); + ev_fork_stop (EV_A_ &w->fork); + + EV_FREQUENT_CHECK; +} +#endif + +#if EV_FORK_ENABLE +void +ev_fork_start (EV_P_ ev_fork *w) +{ + if (expect_false (ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + ev_start (EV_A_ (W)w, ++forkcnt); + array_needsize (ev_fork *, forks, forkmax, forkcnt, EMPTY2); + forks [forkcnt - 1] = w; + + EV_FREQUENT_CHECK; +} + +void +ev_fork_stop (EV_P_ ev_fork *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + { + int active = ev_active (w); + + forks [active - 1] = forks [--forkcnt]; + ev_active (forks [active - 1]) = active; + } + + ev_stop (EV_A_ (W)w); + + EV_FREQUENT_CHECK; +} +#endif + +#if EV_ASYNC_ENABLE +void +ev_async_start (EV_P_ ev_async *w) +{ + if (expect_false (ev_is_active (w))) + return; + + evpipe_init (EV_A); + + EV_FREQUENT_CHECK; + + ev_start (EV_A_ (W)w, ++asynccnt); + array_needsize (ev_async *, asyncs, asyncmax, asynccnt, EMPTY2); + asyncs [asynccnt - 1] = w; + + EV_FREQUENT_CHECK; +} + +void +ev_async_stop (EV_P_ ev_async *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + { + int active = ev_active (w); + + asyncs [active - 1] = asyncs [--asynccnt]; + ev_active (asyncs [active - 1]) = active; + } + + ev_stop (EV_A_ (W)w); + + EV_FREQUENT_CHECK; +} + +void +ev_async_send (EV_P_ ev_async *w) +{ + w->sent = 1; + evpipe_write (EV_A_ &async_pending); +} +#endif + +/*****************************************************************************/ + +struct ev_once +{ + ev_io io; + ev_timer to; + void (*cb)(int revents, void *arg); + void *arg; +}; + +static void +once_cb (EV_P_ struct ev_once *once, int revents) +{ + void (*cb)(int revents, void *arg) = once->cb; + void *arg = once->arg; + + ev_io_stop (EV_A_ &once->io); + ev_timer_stop (EV_A_ &once->to); + ev_free (once); + + cb (revents, arg); +} + +static void +once_cb_io (EV_P_ ev_io *w, int revents) +{ + struct ev_once *once = (struct ev_once *)(((char *)w) - offsetof (struct ev_once, io)); + + once_cb (EV_A_ once, revents | ev_clear_pending (EV_A_ &once->to)); +} + +static void +once_cb_to (EV_P_ ev_timer *w, int revents) +{ + struct ev_once *once = (struct ev_once *)(((char *)w) - offsetof (struct ev_once, to)); + + once_cb (EV_A_ once, revents | ev_clear_pending (EV_A_ &once->io)); +} + +void +ev_once (EV_P_ int fd, int events, ev_tstamp timeout, void (*cb)(int revents, void *arg), void *arg) +{ + struct ev_once *once = (struct ev_once *)ev_malloc (sizeof (struct ev_once)); + + if (expect_false (!once)) + { + cb (EV_ERROR | EV_READ | EV_WRITE | EV_TIMEOUT, arg); + return; + } + + once->cb = cb; + once->arg = arg; + + ev_init (&once->io, once_cb_io); + if (fd >= 0) + { + ev_io_set (&once->io, fd, events); + ev_io_start (EV_A_ &once->io); + } + + ev_init (&once->to, once_cb_to); + if (timeout >= 0.) + { + ev_timer_set (&once->to, timeout, 0.); + ev_timer_start (EV_A_ &once->to); + } +} + +/*****************************************************************************/ + +#if EV_WALK_ENABLE +void +ev_walk (EV_P_ int types, void (*cb)(EV_P_ int type, void *w)) +{ + int i, j; + ev_watcher_list *wl, *wn; + + if (types & (EV_IO | EV_EMBED)) + for (i = 0; i < anfdmax; ++i) + for (wl = anfds [i].head; wl; ) + { + wn = wl->next; + +#if EV_EMBED_ENABLE + if (ev_cb ((ev_io *)wl) == embed_io_cb) + { + if (types & EV_EMBED) + cb (EV_A_ EV_EMBED, ((char *)wl) - offsetof (struct ev_embed, io)); + } + else +#endif +#if EV_USE_INOTIFY + if (ev_cb ((ev_io *)wl) == infy_cb) + ; + else +#endif + if ((ev_io *)wl != &pipe_w) + if (types & EV_IO) + cb (EV_A_ EV_IO, wl); + + wl = wn; + } + + if (types & (EV_TIMER | EV_STAT)) + for (i = timercnt + HEAP0; i-- > HEAP0; ) +#if EV_STAT_ENABLE + /*TODO: timer is not always active*/ + if (ev_cb ((ev_timer *)ANHE_w (timers [i])) == stat_timer_cb) + { + if (types & EV_STAT) + cb (EV_A_ EV_STAT, ((char *)ANHE_w (timers [i])) - offsetof (struct ev_stat, timer)); + } + else +#endif + if (types & EV_TIMER) + cb (EV_A_ EV_TIMER, ANHE_w (timers [i])); + +#if EV_PERIODIC_ENABLE + if (types & EV_PERIODIC) + for (i = periodiccnt + HEAP0; i-- > HEAP0; ) + cb (EV_A_ EV_PERIODIC, ANHE_w (periodics [i])); +#endif + +#if EV_IDLE_ENABLE + if (types & EV_IDLE) + for (j = NUMPRI; i--; ) + for (i = idlecnt [j]; i--; ) + cb (EV_A_ EV_IDLE, idles [j][i]); +#endif + +#if EV_FORK_ENABLE + if (types & EV_FORK) + for (i = forkcnt; i--; ) + if (ev_cb (forks [i]) != embed_fork_cb) + cb (EV_A_ EV_FORK, forks [i]); +#endif + +#if EV_ASYNC_ENABLE + if (types & EV_ASYNC) + for (i = asynccnt; i--; ) + cb (EV_A_ EV_ASYNC, asyncs [i]); +#endif + + if (types & EV_PREPARE) + for (i = preparecnt; i--; ) +#if EV_EMBED_ENABLE + if (ev_cb (prepares [i]) != embed_prepare_cb) +#endif + cb (EV_A_ EV_PREPARE, prepares [i]); + + if (types & EV_CHECK) + for (i = checkcnt; i--; ) + cb (EV_A_ EV_CHECK, checks [i]); + + if (types & EV_SIGNAL) + for (i = 0; i < EV_NSIG - 1; ++i) + for (wl = signals [i].head; wl; ) + { + wn = wl->next; + cb (EV_A_ EV_SIGNAL, wl); + wl = wn; + } + + if (types & EV_CHILD) + for (i = EV_PID_HASHSIZE; i--; ) + for (wl = childs [i]; wl; ) + { + wn = wl->next; + cb (EV_A_ EV_CHILD, wl); + wl = wn; + } +/* EV_STAT 0x00001000 /* stat data changed */ +/* EV_EMBED 0x00010000 /* embedded event loop needs sweep */ +} +#endif + +#if EV_MULTIPLICITY + #include "ev_wrap.h" +#endif + +#ifdef __cplusplus +} +#endif + diff --git a/libev/ev.h b/libev/ev.h new file mode 100644 index 0000000..c7c44ff --- /dev/null +++ b/libev/ev.h @@ -0,0 +1,705 @@ +/* + * libev native API header + * + * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +#ifndef EV_H_ +#define EV_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef double ev_tstamp; + +/* these priorities are inclusive, higher priorities will be called earlier */ +#ifndef EV_MINPRI +# define EV_MINPRI -2 +#endif +#ifndef EV_MAXPRI +# define EV_MAXPRI +2 +#endif + +#ifndef EV_MULTIPLICITY +# define EV_MULTIPLICITY 1 +#endif + +#ifndef EV_PERIODIC_ENABLE +# define EV_PERIODIC_ENABLE 1 +#endif + +#ifndef EV_STAT_ENABLE +# define EV_STAT_ENABLE 1 +#endif + +#ifndef EV_IDLE_ENABLE +# define EV_IDLE_ENABLE 1 +#endif + +#ifndef EV_FORK_ENABLE +# define EV_FORK_ENABLE 1 +#endif + +#ifndef EV_EMBED_ENABLE +# define EV_EMBED_ENABLE 1 +#endif + +#ifndef EV_ASYNC_ENABLE +# define EV_ASYNC_ENABLE 1 +#endif + +#ifndef EV_WALK_ENABLE +# define EV_WALK_ENABLE 0 /* not yet */ +#endif + +#ifndef EV_ATOMIC_T +# include +# define EV_ATOMIC_T sig_atomic_t volatile +#endif + +/*****************************************************************************/ + +#if EV_STAT_ENABLE +# ifdef _WIN32 +# include +# include +# endif +# include +#endif + +/* support multiple event loops? */ +#if EV_MULTIPLICITY +struct ev_loop; +# define EV_P struct ev_loop *loop +# define EV_P_ EV_P, +# define EV_A loop +# define EV_A_ EV_A, +# define EV_DEFAULT_UC ev_default_loop_uc () +# define EV_DEFAULT_UC_ EV_DEFAULT_UC, +# define EV_DEFAULT ev_default_loop (0) +# define EV_DEFAULT_ EV_DEFAULT, +#else +# define EV_P void +# define EV_P_ +# define EV_A +# define EV_A_ +# define EV_DEFAULT +# define EV_DEFAULT_ +# define EV_DEFAULT_UC +# define EV_DEFAULT_UC_ +# undef EV_EMBED_ENABLE +#endif + +#if __STDC_VERSION__ >= 199901L || __GNUC__ >= 3 +# define EV_INLINE static inline +#else +# define EV_INLINE static +#endif + +/*****************************************************************************/ + +/* eventmask, revents, events... */ +#define EV_UNDEF -1 /* guaranteed to be invalid */ +#define EV_NONE 0x00 /* no events */ +#define EV_READ 0x01 /* ev_io detected read will not block */ +#define EV_WRITE 0x02 /* ev_io detected write will not block */ +#define EV__IOFDSET 0x80 /* internal use only */ +#define EV_IO EV_READ /* alias for type-detection */ +#define EV_TIMEOUT 0x00000100 /* timer timed out */ +#define EV_TIMER EV_TIMEOUT /* alias for type-detection */ +#define EV_PERIODIC 0x00000200 /* periodic timer timed out */ +#define EV_SIGNAL 0x00000400 /* signal was received */ +#define EV_CHILD 0x00000800 /* child/pid had status change */ +#define EV_STAT 0x00001000 /* stat data changed */ +#define EV_IDLE 0x00002000 /* event loop is idling */ +#define EV_PREPARE 0x00004000 /* event loop about to poll */ +#define EV_CHECK 0x00008000 /* event loop finished poll */ +#define EV_EMBED 0x00010000 /* embedded event loop needs sweep */ +#define EV_FORK 0x00020000 /* event loop resumed in child */ +#define EV_ASYNC 0x00040000 /* async intra-loop signal */ +#define EV_CUSTOM 0x01000000 /* for use by user code */ +#define EV_ERROR 0x80000000 /* sent when an error occurs */ + +/* can be used to add custom fields to all watchers, while losing binary compatibility */ +#ifndef EV_COMMON +# define EV_COMMON void *data; +#endif +#ifndef EV_PROTOTYPES +# define EV_PROTOTYPES 1 +#endif + +#define EV_VERSION_MAJOR 3 +#define EV_VERSION_MINOR 9 + +#ifndef EV_CB_DECLARE +# define EV_CB_DECLARE(type) void (*cb)(EV_P_ struct type *w, int revents); +#endif +#ifndef EV_CB_INVOKE +# define EV_CB_INVOKE(watcher,revents) (watcher)->cb (EV_A_ (watcher), (revents)) +#endif + +/* + * struct member types: + * private: you may look at them, but not change them, + * and they might not mean anything to you. + * ro: can be read anytime, but only changed when the watcher isn't active. + * rw: can be read and modified anytime, even when the watcher is active. + * + * some internal details that might be helpful for debugging: + * + * active is either 0, which means the watcher is not active, + * or the array index of the watcher (periodics, timers) + * or the array index + 1 (most other watchers) + * or simply 1 for watchers that aren't in some array. + * pending is either 0, in which case the watcher isn't, + * or the array index + 1 in the pendings array. + */ + +#if EV_MINPRI == EV_MAXPRI +# define EV_DECL_PRIORITY +#else +# define EV_DECL_PRIORITY int priority; +#endif + +/* shared by all watchers */ +#define EV_WATCHER(type) \ + int active; /* private */ \ + int pending; /* private */ \ + EV_DECL_PRIORITY /* private */ \ + EV_COMMON /* rw */ \ + EV_CB_DECLARE (type) /* private */ + +#define EV_WATCHER_LIST(type) \ + EV_WATCHER (type) \ + struct ev_watcher_list *next; /* private */ + +#define EV_WATCHER_TIME(type) \ + EV_WATCHER (type) \ + ev_tstamp at; /* private */ + +/* base class, nothing to see here unless you subclass */ +typedef struct ev_watcher +{ + EV_WATCHER (ev_watcher) +} ev_watcher; + +/* base class, nothing to see here unless you subclass */ +typedef struct ev_watcher_list +{ + EV_WATCHER_LIST (ev_watcher_list) +} ev_watcher_list; + +/* base class, nothing to see here unless you subclass */ +typedef struct ev_watcher_time +{ + EV_WATCHER_TIME (ev_watcher_time) +} ev_watcher_time; + +/* invoked when fd is either EV_READable or EV_WRITEable */ +/* revent EV_READ, EV_WRITE */ +typedef struct ev_io +{ + EV_WATCHER_LIST (ev_io) + + int fd; /* ro */ + int events; /* ro */ +} ev_io; + +/* invoked after a specific time, repeatable (based on monotonic clock) */ +/* revent EV_TIMEOUT */ +typedef struct ev_timer +{ + EV_WATCHER_TIME (ev_timer) + + ev_tstamp repeat; /* rw */ +} ev_timer; + +/* invoked at some specific time, possibly repeating at regular intervals (based on UTC) */ +/* revent EV_PERIODIC */ +typedef struct ev_periodic +{ + EV_WATCHER_TIME (ev_periodic) + + ev_tstamp offset; /* rw */ + ev_tstamp interval; /* rw */ + ev_tstamp (*reschedule_cb)(struct ev_periodic *w, ev_tstamp now); /* rw */ +} ev_periodic; + +/* invoked when the given signal has been received */ +/* revent EV_SIGNAL */ +typedef struct ev_signal +{ + EV_WATCHER_LIST (ev_signal) + + int signum; /* ro */ +} ev_signal; + +/* invoked when sigchld is received and waitpid indicates the given pid */ +/* revent EV_CHILD */ +/* does not support priorities */ +typedef struct ev_child +{ + EV_WATCHER_LIST (ev_child) + + int flags; /* private */ + int pid; /* ro */ + int rpid; /* rw, holds the received pid */ + int rstatus; /* rw, holds the exit status, use the macros from sys/wait.h */ +} ev_child; + +#if EV_STAT_ENABLE +/* st_nlink = 0 means missing file or other error */ +# ifdef _WIN32 +typedef struct _stati64 ev_statdata; +# else +typedef struct stat ev_statdata; +# endif + +/* invoked each time the stat data changes for a given path */ +/* revent EV_STAT */ +typedef struct ev_stat +{ + EV_WATCHER_LIST (ev_stat) + + ev_timer timer; /* private */ + ev_tstamp interval; /* ro */ + const char *path; /* ro */ + ev_statdata prev; /* ro */ + ev_statdata attr; /* ro */ + + int wd; /* wd for inotify, fd for kqueue */ +} ev_stat; +#endif + +#if EV_IDLE_ENABLE +/* invoked when the nothing else needs to be done, keeps the process from blocking */ +/* revent EV_IDLE */ +typedef struct ev_idle +{ + EV_WATCHER (ev_idle) +} ev_idle; +#endif + +/* invoked for each run of the mainloop, just before the blocking call */ +/* you can still change events in any way you like */ +/* revent EV_PREPARE */ +typedef struct ev_prepare +{ + EV_WATCHER (ev_prepare) +} ev_prepare; + +/* invoked for each run of the mainloop, just after the blocking call */ +/* revent EV_CHECK */ +typedef struct ev_check +{ + EV_WATCHER (ev_check) +} ev_check; + +#if EV_FORK_ENABLE +/* the callback gets invoked before check in the child process when a fork was detected */ +typedef struct ev_fork +{ + EV_WATCHER (ev_fork) +} ev_fork; +#endif + +#if EV_EMBED_ENABLE +/* used to embed an event loop inside another */ +/* the callback gets invoked when the event loop has handled events, and can be 0 */ +typedef struct ev_embed +{ + EV_WATCHER (ev_embed) + + struct ev_loop *other; /* ro */ + ev_io io; /* private */ + ev_prepare prepare; /* private */ + ev_check check; /* unused */ + ev_timer timer; /* unused */ + ev_periodic periodic; /* unused */ + ev_idle idle; /* unused */ + ev_fork fork; /* private */ +} ev_embed; +#endif + +#if EV_ASYNC_ENABLE +/* invoked when somebody calls ev_async_send on the watcher */ +/* revent EV_ASYNC */ +typedef struct ev_async +{ + EV_WATCHER (ev_async) + + EV_ATOMIC_T sent; /* private */ +} ev_async; + +# define ev_async_pending(w) (+(w)->sent) +#endif + +/* the presence of this union forces similar struct layout */ +union ev_any_watcher +{ + struct ev_watcher w; + struct ev_watcher_list wl; + + struct ev_io io; + struct ev_timer timer; + struct ev_periodic periodic; + struct ev_signal signal; + struct ev_child child; +#if EV_STAT_ENABLE + struct ev_stat stat; +#endif +#if EV_IDLE_ENABLE + struct ev_idle idle; +#endif + struct ev_prepare prepare; + struct ev_check check; +#if EV_FORK_ENABLE + struct ev_fork fork; +#endif +#if EV_EMBED_ENABLE + struct ev_embed embed; +#endif +#if EV_ASYNC_ENABLE + struct ev_async async; +#endif +}; + +/* bits for ev_default_loop and ev_loop_new */ +/* the default */ +#define EVFLAG_AUTO 0x00000000U /* not quite a mask */ +/* flag bits */ +#define EVFLAG_NOENV 0x01000000U /* do NOT consult environment */ +#define EVFLAG_FORKCHECK 0x02000000U /* check for a fork in each iteration */ +/* debugging/feature disable */ +#define EVFLAG_NOINOTIFY 0x00100000U /* do not attempt to use inotify */ +#define EVFLAG_NOSIGFD 0 /* compatibility to pre-3.9 */ +#define EVFLAG_SIGNALFD 0x00200000U /* attempt to use signalfd */ +/* method bits to be ored together */ +#define EVBACKEND_SELECT 0x00000001U /* about anywhere */ +#define EVBACKEND_POLL 0x00000002U /* !win */ +#define EVBACKEND_EPOLL 0x00000004U /* linux */ +#define EVBACKEND_KQUEUE 0x00000008U /* bsd */ +#define EVBACKEND_DEVPOLL 0x00000010U /* solaris 8 */ /* NYI */ +#define EVBACKEND_PORT 0x00000020U /* solaris 10 */ +#define EVBACKEND_ALL 0x0000003FU + +#if EV_PROTOTYPES +int ev_version_major (void); +int ev_version_minor (void); + +unsigned int ev_supported_backends (void); +unsigned int ev_recommended_backends (void); +unsigned int ev_embeddable_backends (void); + +ev_tstamp ev_time (void); +void ev_sleep (ev_tstamp delay); /* sleep for a while */ + +/* Sets the allocation function to use, works like realloc. + * It is used to allocate and free memory. + * If it returns zero when memory needs to be allocated, the library might abort + * or take some potentially destructive action. + * The default is your system realloc function. + */ +void ev_set_allocator (void *(*cb)(void *ptr, long size)); + +/* set the callback function to call on a + * retryable syscall error + * (such as failed select, poll, epoll_wait) + */ +void ev_set_syserr_cb (void (*cb)(const char *msg)); + +#if EV_MULTIPLICITY +EV_INLINE struct ev_loop * +ev_default_loop_uc (void) +{ + extern struct ev_loop *ev_default_loop_ptr; + + return ev_default_loop_ptr; +} + +/* the default loop is the only one that handles signals and child watchers */ +/* you can call this as often as you like */ +EV_INLINE struct ev_loop * +ev_default_loop (unsigned int flags) +{ + struct ev_loop *loop = ev_default_loop_uc (); + + if (!loop) + { + extern struct ev_loop *ev_default_loop_init (unsigned int flags); + + loop = ev_default_loop_init (flags); + } + + return loop; +} + +/* create and destroy alternative loops that don't handle signals */ +struct ev_loop *ev_loop_new (unsigned int flags); +void ev_loop_destroy (EV_P); +void ev_loop_fork (EV_P); + +ev_tstamp ev_now (EV_P); /* time w.r.t. timers and the eventloop, updated after each poll */ + +#else + +int ev_default_loop (unsigned int flags); /* returns true when successful */ + +EV_INLINE ev_tstamp +ev_now (void) +{ + extern ev_tstamp ev_rt_now; + + return ev_rt_now; +} +#endif /* multiplicity */ + +EV_INLINE int +ev_is_default_loop (EV_P) +{ +#if EV_MULTIPLICITY + extern struct ev_loop *ev_default_loop_ptr; + + return !!(EV_A == ev_default_loop_ptr); +#else + return 1; +#endif +} + +void ev_default_destroy (void); /* destroy the default loop */ +/* this needs to be called after fork, to duplicate the default loop */ +/* if you create alternative loops you have to call ev_loop_fork on them */ +/* you can call it in either the parent or the child */ +/* you can actually call it at any time, anywhere :) */ +void ev_default_fork (void); + +unsigned int ev_backend (EV_P); /* backend in use by loop */ + +void ev_now_update (EV_P); /* update event loop time */ + +#if EV_WALK_ENABLE +/* walk (almost) all watchers in the loop of a given type, invoking the */ +/* callback on every such watcher. The callback might stop the watcher, */ +/* but do nothing else with the loop */ +void ev_walk (EV_P_ int types, void (*cb)(EV_P_ int type, void *w)); +#endif + +#endif /* prototypes */ + +#define EVLOOP_NONBLOCK 1 /* do not block/wait */ +#define EVLOOP_ONESHOT 2 /* block *once* only */ +#define EVUNLOOP_CANCEL 0 /* undo unloop */ +#define EVUNLOOP_ONE 1 /* unloop once */ +#define EVUNLOOP_ALL 2 /* unloop all loops */ + +#if EV_PROTOTYPES +void ev_loop (EV_P_ int flags); +void ev_unloop (EV_P_ int how); /* set to 1 to break out of event loop, set to 2 to break out of all event loops */ + +/* + * ref/unref can be used to add or remove a refcount on the mainloop. every watcher + * keeps one reference. if you have a long-running watcher you never unregister that + * should not keep ev_loop from running, unref() after starting, and ref() before stopping. + */ +void ev_ref (EV_P); +void ev_unref (EV_P); + +/* + * convenience function, wait for a single event, without registering an event watcher + * if timeout is < 0, do wait indefinitely + */ +void ev_once (EV_P_ int fd, int events, ev_tstamp timeout, void (*cb)(int revents, void *arg), void *arg); + +# if EV_MINIMAL < 2 +unsigned int ev_loop_count (EV_P); /* number of loop iterations */ +unsigned int ev_loop_depth (EV_P); /* #ev_loop enters - #ev_loop leaves */ +void ev_loop_verify (EV_P); /* abort if loop data corrupted */ + +void ev_set_io_collect_interval (EV_P_ ev_tstamp interval); /* sleep at least this time, default 0 */ +void ev_set_timeout_collect_interval (EV_P_ ev_tstamp interval); /* sleep at least this time, default 0 */ + +/* advanced stuff for threading etc. support, see docs */ +void ev_set_userdata (EV_P_ void *data); +void *ev_userdata (EV_P); +void ev_set_invoke_pending_cb (EV_P_ void (*invoke_pending_cb)(EV_P)); +void ev_set_loop_release_cb (EV_P_ void (*release)(EV_P), void (*acquire)(EV_P)); + +unsigned int ev_pending_count (EV_P); /* number of pending events, if any */ +void ev_invoke_pending (EV_P); /* invoke all pending watchers */ + +/* + * stop/start the timer handling. + */ +void ev_suspend (EV_P); +void ev_resume (EV_P); +#endif + +#endif + +/* these may evaluate ev multiple times, and the other arguments at most once */ +/* either use ev_init + ev_TYPE_set, or the ev_TYPE_init macro, below, to first initialise a watcher */ +#define ev_init(ev,cb_) do { \ + ((ev_watcher *)(void *)(ev))->active = \ + ((ev_watcher *)(void *)(ev))->pending = 0; \ + ev_set_priority ((ev), 0); \ + ev_set_cb ((ev), cb_); \ +} while (0) + +#define ev_io_set(ev,fd_,events_) do { (ev)->fd = (fd_); (ev)->events = (events_) | EV__IOFDSET; } while (0) +#define ev_timer_set(ev,after_,repeat_) do { ((ev_watcher_time *)(ev))->at = (after_); (ev)->repeat = (repeat_); } while (0) +#define ev_periodic_set(ev,ofs_,ival_,rcb_) do { (ev)->offset = (ofs_); (ev)->interval = (ival_); (ev)->reschedule_cb = (rcb_); } while (0) +#define ev_signal_set(ev,signum_) do { (ev)->signum = (signum_); } while (0) +#define ev_child_set(ev,pid_,trace_) do { (ev)->pid = (pid_); (ev)->flags = !!(trace_); } while (0) +#define ev_stat_set(ev,path_,interval_) do { (ev)->path = (path_); (ev)->interval = (interval_); (ev)->wd = -2; } while (0) +#define ev_idle_set(ev) /* nop, yes, this is a serious in-joke */ +#define ev_prepare_set(ev) /* nop, yes, this is a serious in-joke */ +#define ev_check_set(ev) /* nop, yes, this is a serious in-joke */ +#define ev_embed_set(ev,other_) do { (ev)->other = (other_); } while (0) +#define ev_fork_set(ev) /* nop, yes, this is a serious in-joke */ +#define ev_async_set(ev) do { (ev)->sent = 0; } while (0) + +#define ev_io_init(ev,cb,fd,events) do { ev_init ((ev), (cb)); ev_io_set ((ev),(fd),(events)); } while (0) +#define ev_timer_init(ev,cb,after,repeat) do { ev_init ((ev), (cb)); ev_timer_set ((ev),(after),(repeat)); } while (0) +#define ev_periodic_init(ev,cb,ofs,ival,rcb) do { ev_init ((ev), (cb)); ev_periodic_set ((ev),(ofs),(ival),(rcb)); } while (0) +#define ev_signal_init(ev,cb,signum) do { ev_init ((ev), (cb)); ev_signal_set ((ev), (signum)); } while (0) +#define ev_child_init(ev,cb,pid,trace) do { ev_init ((ev), (cb)); ev_child_set ((ev),(pid),(trace)); } while (0) +#define ev_stat_init(ev,cb,path,interval) do { ev_init ((ev), (cb)); ev_stat_set ((ev),(path),(interval)); } while (0) +#define ev_idle_init(ev,cb) do { ev_init ((ev), (cb)); ev_idle_set ((ev)); } while (0) +#define ev_prepare_init(ev,cb) do { ev_init ((ev), (cb)); ev_prepare_set ((ev)); } while (0) +#define ev_check_init(ev,cb) do { ev_init ((ev), (cb)); ev_check_set ((ev)); } while (0) +#define ev_embed_init(ev,cb,other) do { ev_init ((ev), (cb)); ev_embed_set ((ev),(other)); } while (0) +#define ev_fork_init(ev,cb) do { ev_init ((ev), (cb)); ev_fork_set ((ev)); } while (0) +#define ev_async_init(ev,cb) do { ev_init ((ev), (cb)); ev_async_set ((ev)); } while (0) + +#define ev_is_pending(ev) (0 + ((ev_watcher *)(void *)(ev))->pending) /* ro, true when watcher is waiting for callback invocation */ +#define ev_is_active(ev) (0 + ((ev_watcher *)(void *)(ev))->active) /* ro, true when the watcher has been started */ + +#define ev_cb(ev) (ev)->cb /* rw */ + +#if EV_MINPRI == EV_MAXPRI +# define ev_priority(ev) ((ev), EV_MINPRI) +# define ev_set_priority(ev,pri) ((ev), (pri)) +#else +# define ev_priority(ev) (+(((ev_watcher *)(void *)(ev))->priority)) +# define ev_set_priority(ev,pri) ( (ev_watcher *)(void *)(ev))->priority = (pri) +#endif + +#define ev_periodic_at(ev) (+((ev_watcher_time *)(ev))->at) + +#ifndef ev_set_cb +# define ev_set_cb(ev,cb_) ev_cb (ev) = (cb_) +#endif + +/* stopping (enabling, adding) a watcher does nothing if it is already running */ +/* stopping (disabling, deleting) a watcher does nothing unless its already running */ +#if EV_PROTOTYPES + +/* feeds an event into a watcher as if the event actually occured */ +/* accepts any ev_watcher type */ +void ev_feed_event (EV_P_ void *w, int revents); +void ev_feed_fd_event (EV_P_ int fd, int revents); +void ev_feed_signal_event (EV_P_ int signum); +void ev_invoke (EV_P_ void *w, int revents); +int ev_clear_pending (EV_P_ void *w); + +void ev_io_start (EV_P_ ev_io *w); +void ev_io_stop (EV_P_ ev_io *w); + +void ev_timer_start (EV_P_ ev_timer *w); +void ev_timer_stop (EV_P_ ev_timer *w); +/* stops if active and no repeat, restarts if active and repeating, starts if inactive and repeating */ +void ev_timer_again (EV_P_ ev_timer *w); +/* return remaining time */ +ev_tstamp ev_timer_remaining (EV_P_ ev_timer *w); + +#if EV_PERIODIC_ENABLE +void ev_periodic_start (EV_P_ ev_periodic *w); +void ev_periodic_stop (EV_P_ ev_periodic *w); +void ev_periodic_again (EV_P_ ev_periodic *w); +#endif + +/* only supported in the default loop */ +void ev_signal_start (EV_P_ ev_signal *w); +void ev_signal_stop (EV_P_ ev_signal *w); + +/* only supported in the default loop */ +void ev_child_start (EV_P_ ev_child *w); +void ev_child_stop (EV_P_ ev_child *w); + +# if EV_STAT_ENABLE +void ev_stat_start (EV_P_ ev_stat *w); +void ev_stat_stop (EV_P_ ev_stat *w); +void ev_stat_stat (EV_P_ ev_stat *w); +# endif + +# if EV_IDLE_ENABLE +void ev_idle_start (EV_P_ ev_idle *w); +void ev_idle_stop (EV_P_ ev_idle *w); +# endif + +void ev_prepare_start (EV_P_ ev_prepare *w); +void ev_prepare_stop (EV_P_ ev_prepare *w); + +void ev_check_start (EV_P_ ev_check *w); +void ev_check_stop (EV_P_ ev_check *w); + +# if EV_FORK_ENABLE +void ev_fork_start (EV_P_ ev_fork *w); +void ev_fork_stop (EV_P_ ev_fork *w); +# endif + +# if EV_EMBED_ENABLE +/* only supported when loop to be embedded is in fact embeddable */ +void ev_embed_start (EV_P_ ev_embed *w); +void ev_embed_stop (EV_P_ ev_embed *w); +void ev_embed_sweep (EV_P_ ev_embed *w); +# endif + +# if EV_ASYNC_ENABLE +void ev_async_start (EV_P_ ev_async *w); +void ev_async_stop (EV_P_ ev_async *w); +void ev_async_send (EV_P_ ev_async *w); +# endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/libev/ev_epoll.c b/libev/ev_epoll.c new file mode 100644 index 0000000..f7e3d60 --- /dev/null +++ b/libev/ev_epoll.c @@ -0,0 +1,228 @@ +/* + * libev epoll fd activity backend + * + * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +/* + * general notes about epoll: + * + * a) epoll silently removes fds from the fd set. as nothing tells us + * that an fd has been removed otherwise, we have to continually + * "rearm" fds that we suspect *might* have changed (same + * problem with kqueue, but much less costly there). + * b) the fact that ADD != MOD creates a lot of extra syscalls due to a) + * and seems not to have any advantage. + * c) the inability to handle fork or file descriptors (think dup) + * limits the applicability over poll, so this is not a generic + * poll replacement. + * + * lots of "weird code" and complication handling in this file is due + * to these design problems with epoll, as we try very hard to avoid + * epoll_ctl syscalls for common usage patterns and handle the breakage + * ensuing from receiving events for closed and otherwise long gone + * file descriptors. + */ + +#include + +static void +epoll_modify (EV_P_ int fd, int oev, int nev) +{ + struct epoll_event ev; + unsigned char oldmask; + + /* + * we handle EPOLL_CTL_DEL by ignoring it here + * on the assumption that the fd is gone anyways + * if that is wrong, we have to handle the spurious + * event in epoll_poll. + * if the fd is added again, we try to ADD it, and, if that + * fails, we assume it still has the same eventmask. + */ + if (!nev) + return; + + oldmask = anfds [fd].emask; + anfds [fd].emask = nev; + + /* store the generation counter in the upper 32 bits, the fd in the lower 32 bits */ + ev.data.u64 = (uint64_t)(uint32_t)fd + | ((uint64_t)(uint32_t)++anfds [fd].egen << 32); + ev.events = (nev & EV_READ ? EPOLLIN : 0) + | (nev & EV_WRITE ? EPOLLOUT : 0); + + if (expect_true (!epoll_ctl (backend_fd, oev ? EPOLL_CTL_MOD : EPOLL_CTL_ADD, fd, &ev))) + return; + + if (expect_true (errno == ENOENT)) + { + /* if ENOENT then the fd went away, so try to do the right thing */ + if (!nev) + goto dec_egen; + + if (!epoll_ctl (backend_fd, EPOLL_CTL_ADD, fd, &ev)) + return; + } + else if (expect_true (errno == EEXIST)) + { + /* EEXIST means we ignored a previous DEL, but the fd is still active */ + /* if the kernel mask is the same as the new mask, we assume it hasn't changed */ + if (oldmask == nev) + goto dec_egen; + + if (!epoll_ctl (backend_fd, EPOLL_CTL_MOD, fd, &ev)) + return; + } + + fd_kill (EV_A_ fd); + +dec_egen: + /* we didn't successfully call epoll_ctl, so decrement the generation counter again */ + --anfds [fd].egen; +} + +static void +epoll_poll (EV_P_ ev_tstamp timeout) +{ + int i; + int eventcnt; + + /* epoll wait times cannot be larger than (LONG_MAX - 999UL) / HZ msecs, which is below */ + /* the default libev max wait time, however. */ + EV_RELEASE_CB; + eventcnt = epoll_wait (backend_fd, epoll_events, epoll_eventmax, (int)ceil (timeout * 1000.)); + EV_ACQUIRE_CB; + + if (expect_false (eventcnt < 0)) + { + if (errno != EINTR) + ev_syserr ("(libev) epoll_wait"); + + return; + } + + for (i = 0; i < eventcnt; ++i) + { + struct epoll_event *ev = epoll_events + i; + + int fd = (uint32_t)ev->data.u64; /* mask out the lower 32 bits */ + int want = anfds [fd].events; + int got = (ev->events & (EPOLLOUT | EPOLLERR | EPOLLHUP) ? EV_WRITE : 0) + | (ev->events & (EPOLLIN | EPOLLERR | EPOLLHUP) ? EV_READ : 0); + + /* check for spurious notification */ + if (expect_false ((uint32_t)anfds [fd].egen != (uint32_t)(ev->data.u64 >> 32))) + { + /* recreate kernel state */ + postfork = 1; + continue; + } + + if (expect_false (got & ~want)) + { + anfds [fd].emask = want; + + /* we received an event but are not interested in it, try mod or del */ + /* I don't think we ever need MOD, but let's handle it anyways */ + ev->events = (want & EV_READ ? EPOLLIN : 0) + | (want & EV_WRITE ? EPOLLOUT : 0); + + /* pre-2.6.9 kernels require a non-null pointer with EPOLL_CTL_DEL, */ + /* which is fortunately easy to do for us. */ + if (epoll_ctl (backend_fd, want ? EPOLL_CTL_MOD : EPOLL_CTL_DEL, fd, ev)) + { + postfork = 1; /* an error occured, recreate kernel state */ + continue; + } + } + + fd_event (EV_A_ fd, got); + } + + /* if the receive array was full, increase its size */ + if (expect_false (eventcnt == epoll_eventmax)) + { + ev_free (epoll_events); + epoll_eventmax = array_nextsize (sizeof (struct epoll_event), epoll_eventmax, epoll_eventmax + 1); + epoll_events = (struct epoll_event *)ev_malloc (sizeof (struct epoll_event) * epoll_eventmax); + } +} + +int inline_size +epoll_init (EV_P_ int flags) +{ +#ifdef EPOLL_CLOEXEC + backend_fd = epoll_create1 (EPOLL_CLOEXEC); + + if (backend_fd <= 0) +#endif + backend_fd = epoll_create (256); + + if (backend_fd < 0) + return 0; + + fcntl (backend_fd, F_SETFD, FD_CLOEXEC); + + backend_fudge = 0.; /* kernel sources seem to indicate this to be zero */ + backend_modify = epoll_modify; + backend_poll = epoll_poll; + + epoll_eventmax = 64; /* initial number of events receivable per poll */ + epoll_events = (struct epoll_event *)ev_malloc (sizeof (struct epoll_event) * epoll_eventmax); + + return EVBACKEND_EPOLL; +} + +void inline_size +epoll_destroy (EV_P) +{ + ev_free (epoll_events); +} + +void inline_size +epoll_fork (EV_P) +{ + close (backend_fd); + + while ((backend_fd = epoll_create (256)) < 0) + ev_syserr ("(libev) epoll_create"); + + fcntl (backend_fd, F_SETFD, FD_CLOEXEC); + + fd_rearm_all (EV_A); +} + diff --git a/libev/ev_kqueue.c b/libev/ev_kqueue.c new file mode 100644 index 0000000..0fe340b --- /dev/null +++ b/libev/ev_kqueue.c @@ -0,0 +1,196 @@ +/* + * libev kqueue backend + * + * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +#include +#include +#include +#include +#include +#include + +void inline_speed +kqueue_change (EV_P_ int fd, int filter, int flags, int fflags) +{ + ++kqueue_changecnt; + array_needsize (struct kevent, kqueue_changes, kqueue_changemax, kqueue_changecnt, EMPTY2); + + EV_SET (&kqueue_changes [kqueue_changecnt - 1], fd, filter, flags, fflags, 0, 0); +} + +#ifndef NOTE_EOF +# define NOTE_EOF 0 +#endif + +static void +kqueue_modify (EV_P_ int fd, int oev, int nev) +{ + if (oev != nev) + { + if (oev & EV_READ) + kqueue_change (EV_A_ fd, EVFILT_READ , EV_DELETE, 0); + + if (oev & EV_WRITE) + kqueue_change (EV_A_ fd, EVFILT_WRITE, EV_DELETE, 0); + } + + /* to detect close/reopen reliably, we have to re-add */ + /* event requests even when oev == nev */ + + if (nev & EV_READ) + kqueue_change (EV_A_ fd, EVFILT_READ , EV_ADD, NOTE_EOF); + + if (nev & EV_WRITE) + kqueue_change (EV_A_ fd, EVFILT_WRITE, EV_ADD, NOTE_EOF); +} + +static void +kqueue_poll (EV_P_ ev_tstamp timeout) +{ + int res, i; + struct timespec ts; + + /* need to resize so there is enough space for errors */ + if (kqueue_changecnt > kqueue_eventmax) + { + ev_free (kqueue_events); + kqueue_eventmax = array_nextsize (sizeof (struct kevent), kqueue_eventmax, kqueue_changecnt); + kqueue_events = (struct kevent *)ev_malloc (sizeof (struct kevent) * kqueue_eventmax); + } + + EV_RELEASE_CB; + ts.tv_sec = (time_t)timeout; + ts.tv_nsec = (long)((timeout - (ev_tstamp)ts.tv_sec) * 1e9); + res = kevent (backend_fd, kqueue_changes, kqueue_changecnt, kqueue_events, kqueue_eventmax, &ts); + EV_ACQUIRE_CB; + kqueue_changecnt = 0; + + if (expect_false (res < 0)) + { + if (errno != EINTR) + ev_syserr ("(libev) kevent"); + + return; + } + + for (i = 0; i < res; ++i) + { + int fd = kqueue_events [i].ident; + + if (expect_false (kqueue_events [i].flags & EV_ERROR)) + { + int err = kqueue_events [i].data; + + /* we are only interested in errors for fds that we are interested in :) */ + if (anfds [fd].events) + { + if (err == ENOENT) /* resubmit changes on ENOENT */ + kqueue_modify (EV_A_ fd, 0, anfds [fd].events); + else if (err == EBADF) /* on EBADF, we re-check the fd */ + { + if (fd_valid (fd)) + kqueue_modify (EV_A_ fd, 0, anfds [fd].events); + else + fd_kill (EV_A_ fd); + } + else /* on all other errors, we error out on the fd */ + fd_kill (EV_A_ fd); + } + } + else + fd_event ( + EV_A_ + fd, + kqueue_events [i].filter == EVFILT_READ ? EV_READ + : kqueue_events [i].filter == EVFILT_WRITE ? EV_WRITE + : 0 + ); + } + + if (expect_false (res == kqueue_eventmax)) + { + ev_free (kqueue_events); + kqueue_eventmax = array_nextsize (sizeof (struct kevent), kqueue_eventmax, kqueue_eventmax + 1); + kqueue_events = (struct kevent *)ev_malloc (sizeof (struct kevent) * kqueue_eventmax); + } +} + +int inline_size +kqueue_init (EV_P_ int flags) +{ + /* Initalize the kernel queue */ + if ((backend_fd = kqueue ()) < 0) + return 0; + + fcntl (backend_fd, F_SETFD, FD_CLOEXEC); /* not sure if necessary, hopefully doesn't hurt */ + + backend_fudge = 0.; + backend_modify = kqueue_modify; + backend_poll = kqueue_poll; + + kqueue_eventmax = 64; /* initial number of events receivable per poll */ + kqueue_events = (struct kevent *)ev_malloc (sizeof (struct kevent) * kqueue_eventmax); + + kqueue_changes = 0; + kqueue_changemax = 0; + kqueue_changecnt = 0; + + return EVBACKEND_KQUEUE; +} + +void inline_size +kqueue_destroy (EV_P) +{ + ev_free (kqueue_events); + ev_free (kqueue_changes); +} + +void inline_size +kqueue_fork (EV_P) +{ + close (backend_fd); + + while ((backend_fd = kqueue ()) < 0) + ev_syserr ("(libev) kqueue"); + + fcntl (backend_fd, F_SETFD, FD_CLOEXEC); + + /* re-register interest in fds */ + fd_rearm_all (EV_A); +} + diff --git a/libev/ev_poll.c b/libev/ev_poll.c new file mode 100644 index 0000000..178e458 --- /dev/null +++ b/libev/ev_poll.c @@ -0,0 +1,144 @@ +/* + * libev poll fd activity backend + * + * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +#include + +void inline_size +pollidx_init (int *base, int count) +{ + /* consider using memset (.., -1, ...), which is pratically guarenteed + * to work on all systems implementing poll */ + while (count--) + *base++ = -1; +} + +static void +poll_modify (EV_P_ int fd, int oev, int nev) +{ + int idx; + + if (oev == nev) + return; + + array_needsize (int, pollidxs, pollidxmax, fd + 1, pollidx_init); + + idx = pollidxs [fd]; + + if (idx < 0) /* need to allocate a new pollfd */ + { + pollidxs [fd] = idx = pollcnt++; + array_needsize (struct pollfd, polls, pollmax, pollcnt, EMPTY2); + polls [idx].fd = fd; + } + + assert (polls [idx].fd == fd); + + if (nev) + polls [idx].events = + (nev & EV_READ ? POLLIN : 0) + | (nev & EV_WRITE ? POLLOUT : 0); + else /* remove pollfd */ + { + pollidxs [fd] = -1; + + if (expect_true (idx < --pollcnt)) + { + polls [idx] = polls [pollcnt]; + pollidxs [polls [idx].fd] = idx; + } + } +} + +static void +poll_poll (EV_P_ ev_tstamp timeout) +{ + struct pollfd *p; + int res; + + EV_RELEASE_CB; + res = poll (polls, pollcnt, (int)ceil (timeout * 1000.)); + EV_ACQUIRE_CB; + + if (expect_false (res < 0)) + { + if (errno == EBADF) + fd_ebadf (EV_A); + else if (errno == ENOMEM && !syserr_cb) + fd_enomem (EV_A); + else if (errno != EINTR) + ev_syserr ("(libev) poll"); + } + else + for (p = polls; res; ++p) + if (expect_false (p->revents)) /* this expect is debatable */ + { + --res; + + if (expect_false (p->revents & POLLNVAL)) + fd_kill (EV_A_ p->fd); + else + fd_event ( + EV_A_ + p->fd, + (p->revents & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0) + | (p->revents & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0) + ); + } +} + +int inline_size +poll_init (EV_P_ int flags) +{ + backend_fudge = 0.; /* posix says this is zero */ + backend_modify = poll_modify; + backend_poll = poll_poll; + + pollidxs = 0; pollidxmax = 0; + polls = 0; pollmax = 0; pollcnt = 0; + + return EVBACKEND_POLL; +} + +void inline_size +poll_destroy (EV_P) +{ + ev_free (pollidxs); + ev_free (polls); +} + diff --git a/libev/ev_port.c b/libev/ev_port.c new file mode 100644 index 0000000..47da929 --- /dev/null +++ b/libev/ev_port.c @@ -0,0 +1,165 @@ +/* + * libev solaris event port backend + * + * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +#include +#include +#include +#include +#include +#include + +void inline_speed +port_associate_and_check (EV_P_ int fd, int ev) +{ + if (0 > + port_associate ( + backend_fd, PORT_SOURCE_FD, fd, + (ev & EV_READ ? POLLIN : 0) + | (ev & EV_WRITE ? POLLOUT : 0), + 0 + ) + ) + { + if (errno == EBADFD) + fd_kill (EV_A_ fd); + else + ev_syserr ("(libev) port_associate"); + } +} + +static void +port_modify (EV_P_ int fd, int oev, int nev) +{ + /* we need to reassociate no matter what, as closes are + * once more silently being discarded. + */ + if (!nev) + { + if (oev) + port_dissociate (backend_fd, PORT_SOURCE_FD, fd); + } + else + port_associate_and_check (EV_A_ fd, nev); +} + +static void +port_poll (EV_P_ ev_tstamp timeout) +{ + int res, i; + struct timespec ts; + uint_t nget = 1; + + EV_RELEASE_CB; + ts.tv_sec = (time_t)timeout; + ts.tv_nsec = (long)(timeout - (ev_tstamp)ts.tv_sec) * 1e9; + res = port_getn (backend_fd, port_events, port_eventmax, &nget, &ts); + EV_ACQUIRE_CB; + + if (res == -1) + { + if (errno != EINTR && errno != ETIME) + ev_syserr ("(libev) port_getn (see http://bugs.opensolaris.org/view_bug.do?bug_id=6268715, try LIBEV_FLAGS=3 env variable)"); + + return; + } + + for (i = 0; i < nget; ++i) + { + if (port_events [i].portev_source == PORT_SOURCE_FD) + { + int fd = port_events [i].portev_object; + + fd_event ( + EV_A_ + fd, + (port_events [i].portev_events & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0) + | (port_events [i].portev_events & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0) + ); + + port_associate_and_check (EV_A_ fd, anfds [fd].events); + } + } + + if (expect_false (nget == port_eventmax)) + { + ev_free (port_events); + port_eventmax = array_nextsize (sizeof (port_event_t), port_eventmax, port_eventmax + 1); + port_events = (port_event_t *)ev_malloc (sizeof (port_event_t) * port_eventmax); + } +} + +int inline_size +port_init (EV_P_ int flags) +{ + /* Initalize the kernel queue */ + if ((backend_fd = port_create ()) < 0) + return 0; + + fcntl (backend_fd, F_SETFD, FD_CLOEXEC); /* not sure if necessary, hopefully doesn't hurt */ + + backend_fudge = 1e-3; /* needed to compensate for port_getn returning early */ + backend_modify = port_modify; + backend_poll = port_poll; + + port_eventmax = 64; /* intiial number of events receivable per poll */ + port_events = (port_event_t *)ev_malloc (sizeof (port_event_t) * port_eventmax); + + return EVBACKEND_PORT; +} + +void inline_size +port_destroy (EV_P) +{ + ev_free (port_events); +} + +void inline_size +port_fork (EV_P) +{ + close (backend_fd); + + while ((backend_fd = port_create ()) < 0) + ev_syserr ("(libev) port"); + + fcntl (backend_fd, F_SETFD, FD_CLOEXEC); + + /* re-register interest in fds */ + fd_rearm_all (EV_A); +} + diff --git a/libev/ev_select.c b/libev/ev_select.c new file mode 100644 index 0000000..818a63e --- /dev/null +++ b/libev/ev_select.c @@ -0,0 +1,308 @@ +/* + * libev select fd activity backend + * + * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +#ifndef _WIN32 +/* for unix systems */ +# include +# include +#endif + +#ifndef EV_SELECT_USE_FD_SET +# ifdef NFDBITS +# define EV_SELECT_USE_FD_SET 0 +# else +# define EV_SELECT_USE_FD_SET 1 +# endif +#endif + +#if EV_SELECT_IS_WINSOCKET +# undef EV_SELECT_USE_FD_SET +# define EV_SELECT_USE_FD_SET 1 +# undef NFDBITS +# define NFDBITS 0 +#endif + +#if !EV_SELECT_USE_FD_SET +# define NFDBYTES (NFDBITS / 8) +#endif + +#include + +static void +select_modify (EV_P_ int fd, int oev, int nev) +{ + if (oev == nev) + return; + + { +#if EV_SELECT_USE_FD_SET + + #if EV_SELECT_IS_WINSOCKET + SOCKET handle = anfds [fd].handle; + #else + int handle = fd; + #endif + + assert (("libev: fd >= FD_SETSIZE passed to fd_set-based select backend", fd < FD_SETSIZE)); + + /* FD_SET is broken on windows (it adds the fd to a set twice or more, + * which eventually leads to overflows). Need to call it only on changes. + */ + #if EV_SELECT_IS_WINSOCKET + if ((oev ^ nev) & EV_READ) + #endif + if (nev & EV_READ) + FD_SET (handle, (fd_set *)vec_ri); + else + FD_CLR (handle, (fd_set *)vec_ri); + + #if EV_SELECT_IS_WINSOCKET + if ((oev ^ nev) & EV_WRITE) + #endif + if (nev & EV_WRITE) + FD_SET (handle, (fd_set *)vec_wi); + else + FD_CLR (handle, (fd_set *)vec_wi); + +#else + + int word = fd / NFDBITS; + fd_mask mask = 1UL << (fd % NFDBITS); + + if (expect_false (vec_max <= word)) + { + int new_max = word + 1; + + vec_ri = ev_realloc (vec_ri, new_max * NFDBYTES); + vec_ro = ev_realloc (vec_ro, new_max * NFDBYTES); /* could free/malloc */ + vec_wi = ev_realloc (vec_wi, new_max * NFDBYTES); + vec_wo = ev_realloc (vec_wo, new_max * NFDBYTES); /* could free/malloc */ + #ifdef _WIN32 + vec_eo = ev_realloc (vec_eo, new_max * NFDBYTES); /* could free/malloc */ + #endif + + for (; vec_max < new_max; ++vec_max) + ((fd_mask *)vec_ri) [vec_max] = + ((fd_mask *)vec_wi) [vec_max] = 0; + } + + ((fd_mask *)vec_ri) [word] |= mask; + if (!(nev & EV_READ)) + ((fd_mask *)vec_ri) [word] &= ~mask; + + ((fd_mask *)vec_wi) [word] |= mask; + if (!(nev & EV_WRITE)) + ((fd_mask *)vec_wi) [word] &= ~mask; +#endif + } +} + +static void +select_poll (EV_P_ ev_tstamp timeout) +{ + struct timeval tv; + int res; + int fd_setsize; + + EV_RELEASE_CB; + tv.tv_sec = (long)timeout; + tv.tv_usec = (long)((timeout - (ev_tstamp)tv.tv_sec) * 1e6); + +#if EV_SELECT_USE_FD_SET + fd_setsize = sizeof (fd_set); +#else + fd_setsize = vec_max * NFDBYTES; +#endif + + memcpy (vec_ro, vec_ri, fd_setsize); + memcpy (vec_wo, vec_wi, fd_setsize); + +#ifdef _WIN32 + /* pass in the write set as except set. + * the idea behind this is to work around a windows bug that causes + * errors to be reported as an exception and not by setting + * the writable bit. this is so uncontrollably lame. + */ + memcpy (vec_eo, vec_wi, fd_setsize); + res = select (vec_max * NFDBITS, (fd_set *)vec_ro, (fd_set *)vec_wo, (fd_set *)vec_eo, &tv); +#elif EV_SELECT_USE_FD_SET + fd_setsize = anfdmax < FD_SETSIZE ? anfdmax : FD_SETSIZE; + res = select (fd_setsize, (fd_set *)vec_ro, (fd_set *)vec_wo, 0, &tv); +#else + res = select (vec_max * NFDBITS, (fd_set *)vec_ro, (fd_set *)vec_wo, 0, &tv); +#endif + EV_ACQUIRE_CB; + + if (expect_false (res < 0)) + { + #if EV_SELECT_IS_WINSOCKET + errno = WSAGetLastError (); + #endif + #ifdef WSABASEERR + /* on windows, select returns incompatible error codes, fix this */ + if (errno >= WSABASEERR && errno < WSABASEERR + 1000) + if (errno == WSAENOTSOCK) + errno = EBADF; + else + errno -= WSABASEERR; + #endif + + #ifdef _WIN32 + /* select on windows errornously returns EINVAL when no fd sets have been + * provided (this is documented). what microsoft doesn't tell you that this bug + * exists even when the fd sets _are_ provided, so we have to check for this bug + * here and emulate by sleeping manually. + * we also get EINVAL when the timeout is invalid, but we ignore this case here + * and assume that EINVAL always means: you have to wait manually. + */ + if (errno == EINVAL) + { + ev_sleep (timeout); + return; + } + #endif + + if (errno == EBADF) + fd_ebadf (EV_A); + else if (errno == ENOMEM && !syserr_cb) + fd_enomem (EV_A); + else if (errno != EINTR) + ev_syserr ("(libev) select"); + + return; + } + +#if EV_SELECT_USE_FD_SET + + { + int fd; + + for (fd = 0; fd < anfdmax; ++fd) + if (anfds [fd].events) + { + int events = 0; + #if EV_SELECT_IS_WINSOCKET + SOCKET handle = anfds [fd].handle; + #else + int handle = fd; + #endif + + if (FD_ISSET (handle, (fd_set *)vec_ro)) events |= EV_READ; + if (FD_ISSET (handle, (fd_set *)vec_wo)) events |= EV_WRITE; + #ifdef _WIN32 + if (FD_ISSET (handle, (fd_set *)vec_eo)) events |= EV_WRITE; + #endif + + if (expect_true (events)) + fd_event (EV_A_ fd, events); + } + } + +#else + + { + int word, bit; + for (word = vec_max; word--; ) + { + fd_mask word_r = ((fd_mask *)vec_ro) [word]; + fd_mask word_w = ((fd_mask *)vec_wo) [word]; + #ifdef _WIN32 + word_w |= ((fd_mask *)vec_eo) [word]; + #endif + + if (word_r || word_w) + for (bit = NFDBITS; bit--; ) + { + fd_mask mask = 1UL << bit; + int events = 0; + + events |= word_r & mask ? EV_READ : 0; + events |= word_w & mask ? EV_WRITE : 0; + + if (expect_true (events)) + fd_event (EV_A_ word * NFDBITS + bit, events); + } + } + } + +#endif +} + +int inline_size +select_init (EV_P_ int flags) +{ + backend_fudge = 0.; /* posix says this is zero */ + backend_modify = select_modify; + backend_poll = select_poll; + +#if EV_SELECT_USE_FD_SET + vec_ri = ev_malloc (sizeof (fd_set)); FD_ZERO ((fd_set *)vec_ri); + vec_ro = ev_malloc (sizeof (fd_set)); + vec_wi = ev_malloc (sizeof (fd_set)); FD_ZERO ((fd_set *)vec_wi); + vec_wo = ev_malloc (sizeof (fd_set)); + #ifdef _WIN32 + vec_eo = ev_malloc (sizeof (fd_set)); + #endif +#else + vec_max = 0; + vec_ri = 0; + vec_ro = 0; + vec_wi = 0; + vec_wo = 0; + #ifdef _WIN32 + vec_eo = 0; + #endif +#endif + + return EVBACKEND_SELECT; +} + +void inline_size +select_destroy (EV_P) +{ + ev_free (vec_ri); + ev_free (vec_ro); + ev_free (vec_wi); + ev_free (vec_wo); + #ifdef _WIN32 + ev_free (vec_eo); + #endif +} + + diff --git a/libev/ev_vars.h b/libev/ev_vars.h new file mode 100644 index 0000000..da53ee8 --- /dev/null +++ b/libev/ev_vars.h @@ -0,0 +1,187 @@ +/* + * loop member variable declarations + * + * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +#define VARx(type,name) VAR(name, type name) + +VARx(ev_tstamp, now_floor) /* last time we refreshed rt_time */ +VARx(ev_tstamp, mn_now) /* monotonic clock "now" */ +VARx(ev_tstamp, rtmn_diff) /* difference realtime - monotonic time */ + +VARx(ev_tstamp, io_blocktime) +VARx(ev_tstamp, timeout_blocktime) + +VARx(int, backend) +VARx(int, activecnt) /* total number of active events ("refcount") */ +VARx(unsigned char, loop_done) /* signal by ev_unloop */ + +VARx(int, backend_fd) +VARx(ev_tstamp, backend_fudge) /* assumed typical timer resolution */ +VAR (backend_modify, void (*backend_modify)(EV_P_ int fd, int oev, int nev)) +VAR (backend_poll , void (*backend_poll)(EV_P_ ev_tstamp timeout)) + +VARx(ANFD *, anfds) +VARx(int, anfdmax) + +VAR (pendings, ANPENDING *pendings [NUMPRI]) +VAR (pendingmax, int pendingmax [NUMPRI]) +VAR (pendingcnt, int pendingcnt [NUMPRI]) +VARx(ev_prepare, pending_w) /* dummy pending watcher */ + +/* for reverse feeding of events */ +VARx(W *, rfeeds) +VARx(int, rfeedmax) +VARx(int, rfeedcnt) + +#if EV_USE_EVENTFD || EV_GENWRAP +VARx(int, evfd) +#endif +VAR (evpipe, int evpipe [2]) +VARx(ev_io, pipe_w) + +#if !defined(_WIN32) || EV_GENWRAP +VARx(pid_t, curpid) +#endif + +VARx(char, postfork) /* true if we need to recreate kernel state after fork */ + +#if EV_USE_SELECT || EV_GENWRAP +VARx(void *, vec_ri) +VARx(void *, vec_ro) +VARx(void *, vec_wi) +VARx(void *, vec_wo) +#if defined(_WIN32) || EV_GENWRAP +VARx(void *, vec_eo) +#endif +VARx(int, vec_max) +#endif + +#if EV_USE_POLL || EV_GENWRAP +VARx(struct pollfd *, polls) +VARx(int, pollmax) +VARx(int, pollcnt) +VARx(int *, pollidxs) /* maps fds into structure indices */ +VARx(int, pollidxmax) +#endif + +#if EV_USE_EPOLL || EV_GENWRAP +VARx(struct epoll_event *, epoll_events) +VARx(int, epoll_eventmax) +#endif + +#if EV_USE_KQUEUE || EV_GENWRAP +VARx(struct kevent *, kqueue_changes) +VARx(int, kqueue_changemax) +VARx(int, kqueue_changecnt) +VARx(struct kevent *, kqueue_events) +VARx(int, kqueue_eventmax) +#endif + +#if EV_USE_PORT || EV_GENWRAP +VARx(struct port_event *, port_events) +VARx(int, port_eventmax) +#endif + +VARx(int *, fdchanges) +VARx(int, fdchangemax) +VARx(int, fdchangecnt) + +VARx(ANHE *, timers) +VARx(int, timermax) +VARx(int, timercnt) + +#if EV_PERIODIC_ENABLE || EV_GENWRAP +VARx(ANHE *, periodics) +VARx(int, periodicmax) +VARx(int, periodiccnt) +#endif + +#if EV_IDLE_ENABLE || EV_GENWRAP +VAR (idles, ev_idle **idles [NUMPRI]) +VAR (idlemax, int idlemax [NUMPRI]) +VAR (idlecnt, int idlecnt [NUMPRI]) +#endif +VARx(int, idleall) /* total number */ + +VARx(struct ev_prepare **, prepares) +VARx(int, preparemax) +VARx(int, preparecnt) + +VARx(struct ev_check **, checks) +VARx(int, checkmax) +VARx(int, checkcnt) + +#if EV_FORK_ENABLE || EV_GENWRAP +VARx(struct ev_fork **, forks) +VARx(int, forkmax) +VARx(int, forkcnt) +#endif + +#if EV_ASYNC_ENABLE || EV_GENWRAP +VARx(EV_ATOMIC_T, async_pending) +VARx(struct ev_async **, asyncs) +VARx(int, asyncmax) +VARx(int, asynccnt) +#endif + +#if EV_USE_INOTIFY || EV_GENWRAP +VARx(int, fs_fd) +VARx(ev_io, fs_w) +VARx(char, fs_2625) /* whether we are running in linux 2.6.25 or newer */ +VAR (fs_hash, ANFS fs_hash [EV_INOTIFY_HASHSIZE]) +#endif + +VARx(EV_ATOMIC_T, sig_pending) +#if EV_USE_SIGNALFD || EV_GENWRAP +VARx(int, sigfd) +VARx(ev_io, sigfd_w) +VARx(sigset_t, sigfd_set) +#endif + +#if EV_MINIMAL < 2 || EV_GENWRAP +VARx(unsigned int, loop_count) /* total number of loop iterations/blocks */ +VARx(unsigned int, loop_depth) /* #ev_loop enters - #ev_loop leaves */ + +VARx(void *, userdata) +VAR (release_cb, void (*release_cb)(EV_P)) +VAR (acquire_cb, void (*acquire_cb)(EV_P)) +VAR (invoke_cb , void (*invoke_cb) (EV_P)) +#endif + +#undef VARx + diff --git a/libev/ev_wrap.h b/libev/ev_wrap.h new file mode 100644 index 0000000..03b6b87 --- /dev/null +++ b/libev/ev_wrap.h @@ -0,0 +1,178 @@ +/* DO NOT EDIT, automatically generated by update_ev_wrap */ +#ifndef EV_WRAP_H +#define EV_WRAP_H +#define now_floor ((loop)->now_floor) +#define mn_now ((loop)->mn_now) +#define rtmn_diff ((loop)->rtmn_diff) +#define io_blocktime ((loop)->io_blocktime) +#define timeout_blocktime ((loop)->timeout_blocktime) +#define backend ((loop)->backend) +#define activecnt ((loop)->activecnt) +#define loop_done ((loop)->loop_done) +#define backend_fd ((loop)->backend_fd) +#define backend_fudge ((loop)->backend_fudge) +#define backend_modify ((loop)->backend_modify) +#define backend_poll ((loop)->backend_poll) +#define anfds ((loop)->anfds) +#define anfdmax ((loop)->anfdmax) +#define pendings ((loop)->pendings) +#define pendingmax ((loop)->pendingmax) +#define pendingcnt ((loop)->pendingcnt) +#define pending_w ((loop)->pending_w) +#define rfeeds ((loop)->rfeeds) +#define rfeedmax ((loop)->rfeedmax) +#define rfeedcnt ((loop)->rfeedcnt) +#define evfd ((loop)->evfd) +#define evpipe ((loop)->evpipe) +#define pipe_w ((loop)->pipe_w) +#define curpid ((loop)->curpid) +#define postfork ((loop)->postfork) +#define vec_ri ((loop)->vec_ri) +#define vec_ro ((loop)->vec_ro) +#define vec_wi ((loop)->vec_wi) +#define vec_wo ((loop)->vec_wo) +#define vec_eo ((loop)->vec_eo) +#define vec_max ((loop)->vec_max) +#define polls ((loop)->polls) +#define pollmax ((loop)->pollmax) +#define pollcnt ((loop)->pollcnt) +#define pollidxs ((loop)->pollidxs) +#define pollidxmax ((loop)->pollidxmax) +#define epoll_events ((loop)->epoll_events) +#define epoll_eventmax ((loop)->epoll_eventmax) +#define kqueue_changes ((loop)->kqueue_changes) +#define kqueue_changemax ((loop)->kqueue_changemax) +#define kqueue_changecnt ((loop)->kqueue_changecnt) +#define kqueue_events ((loop)->kqueue_events) +#define kqueue_eventmax ((loop)->kqueue_eventmax) +#define port_events ((loop)->port_events) +#define port_eventmax ((loop)->port_eventmax) +#define fdchanges ((loop)->fdchanges) +#define fdchangemax ((loop)->fdchangemax) +#define fdchangecnt ((loop)->fdchangecnt) +#define timers ((loop)->timers) +#define timermax ((loop)->timermax) +#define timercnt ((loop)->timercnt) +#define periodics ((loop)->periodics) +#define periodicmax ((loop)->periodicmax) +#define periodiccnt ((loop)->periodiccnt) +#define idles ((loop)->idles) +#define idlemax ((loop)->idlemax) +#define idlecnt ((loop)->idlecnt) +#define idleall ((loop)->idleall) +#define prepares ((loop)->prepares) +#define preparemax ((loop)->preparemax) +#define preparecnt ((loop)->preparecnt) +#define checks ((loop)->checks) +#define checkmax ((loop)->checkmax) +#define checkcnt ((loop)->checkcnt) +#define forks ((loop)->forks) +#define forkmax ((loop)->forkmax) +#define forkcnt ((loop)->forkcnt) +#define async_pending ((loop)->async_pending) +#define asyncs ((loop)->asyncs) +#define asyncmax ((loop)->asyncmax) +#define asynccnt ((loop)->asynccnt) +#define fs_fd ((loop)->fs_fd) +#define fs_w ((loop)->fs_w) +#define fs_2625 ((loop)->fs_2625) +#define fs_hash ((loop)->fs_hash) +#define sig_pending ((loop)->sig_pending) +#define sigfd ((loop)->sigfd) +#define sigfd_w ((loop)->sigfd_w) +#define sigfd_set ((loop)->sigfd_set) +#define loop_count ((loop)->loop_count) +#define loop_depth ((loop)->loop_depth) +#define userdata ((loop)->userdata) +#define release_cb ((loop)->release_cb) +#define acquire_cb ((loop)->acquire_cb) +#define invoke_cb ((loop)->invoke_cb) +#else +#undef EV_WRAP_H +#undef now_floor +#undef mn_now +#undef rtmn_diff +#undef io_blocktime +#undef timeout_blocktime +#undef backend +#undef activecnt +#undef loop_done +#undef backend_fd +#undef backend_fudge +#undef backend_modify +#undef backend_poll +#undef anfds +#undef anfdmax +#undef pendings +#undef pendingmax +#undef pendingcnt +#undef pending_w +#undef rfeeds +#undef rfeedmax +#undef rfeedcnt +#undef evfd +#undef evpipe +#undef pipe_w +#undef curpid +#undef postfork +#undef vec_ri +#undef vec_ro +#undef vec_wi +#undef vec_wo +#undef vec_eo +#undef vec_max +#undef polls +#undef pollmax +#undef pollcnt +#undef pollidxs +#undef pollidxmax +#undef epoll_events +#undef epoll_eventmax +#undef kqueue_changes +#undef kqueue_changemax +#undef kqueue_changecnt +#undef kqueue_events +#undef kqueue_eventmax +#undef port_events +#undef port_eventmax +#undef fdchanges +#undef fdchangemax +#undef fdchangecnt +#undef timers +#undef timermax +#undef timercnt +#undef periodics +#undef periodicmax +#undef periodiccnt +#undef idles +#undef idlemax +#undef idlecnt +#undef idleall +#undef prepares +#undef preparemax +#undef preparecnt +#undef checks +#undef checkmax +#undef checkcnt +#undef forks +#undef forkmax +#undef forkcnt +#undef async_pending +#undef asyncs +#undef asyncmax +#undef asynccnt +#undef fs_fd +#undef fs_w +#undef fs_2625 +#undef fs_hash +#undef sig_pending +#undef sigfd +#undef sigfd_w +#undef sigfd_set +#undef loop_count +#undef loop_depth +#undef userdata +#undef release_cb +#undef acquire_cb +#undef invoke_cb +#endif diff --git a/man/Makefile b/man/Makefile new file mode 100644 index 0000000..c3027f9 --- /dev/null +++ b/man/Makefile @@ -0,0 +1,7 @@ +MAN5PAGES = opennhrp.conf.5 +MAN8PAGES = opennhrp.8 opennhrpctl.8 opennhrp-script.8 + +install: + $(INSTALLDIR) $(DESTDIR)$(MANDIR)/man5 $(DESTDIR)$(MANDIR)/man8 + $(INSTALL) $(addprefix $(src)/,$(MAN5PAGES)) $(DESTDIR)$(MANDIR)/man5 + $(INSTALL) $(addprefix $(src)/,$(MAN8PAGES)) $(DESTDIR)$(MANDIR)/man8 diff --git a/man/opennhrp-script.8 b/man/opennhrp-script.8 new file mode 100644 index 0000000..0af32b1 --- /dev/null +++ b/man/opennhrp-script.8 @@ -0,0 +1,146 @@ +.TH OPENNHRP-SCRIPT 8 "20 May 2009" "" "OpenNHRP Documentation" + +.SH NAME +opennhrp-script \- NHRP peer configuration script + +.SH DESCRIPTION +NHRP peer configuration script is used invoked by +.BR opennhrp (8). +.PP +This script can be used to establish a direct NBMA peer to peer connection +after NHRP Resolution Reply has been received, but prior to injecting the +peer address to kernel neighbor table. This could be to insert firewall rules +allowing the traffic and/or establishing an IPsec connection (or some other +secure communication channel). The script is also called when the cached peer +information expires. + +.SH OPERATION +When +.B opennhrp +needs to invoke the peer configuration script, it defines a set of variables +in the environment and then executes the script with exactly one argument. +The argument is set to the name of the reason why the script has been invoked. +The following reasons are currently defined: +.BR "interface-up" , " peer-register" , " peer-up" , " peer-down" , +.BR " nhs-up" , " nhs-down" , " route-up" " and " route-down . + +.SH INTERFACE-UP +Interface has been just discovered, or it is has changed state from down +to up. This is the place to clean up old routes if needed. + +.SH PEER-REGISTER +A peer registration request has been received. The script is run before the +internal peer cache is altered and this allows the script to reject +registration without it deleting old peers. This could be used to check that +IPsec connection is up or one might encode allowed protocol-addresses in the +certificate and it could be enforced here. This hook is executed synchronously +so it should be fast. + +.SH PEER-UP +A peer has been discovered (either by means of static configuration, dynamic +client registration or resolution reply arrival to initiate shortcut). +This hook is invoked right after the peer's NBMA address is available. For all +other than dynamic-map entries the protocol address is available too. +The information will not be injected to the kernel ARP cache until the script +has returned zero. If non-zero return value is returned, the peer entry is +marked as invalid and negative cached for a short period of time. + +.SH PEER-DOWN +A peer connection is about to be cleared. This can happend for dynamic client +registrations or cached information. Dynamic client registrations are teared +down when registration holding time expires (and no re-registration has +occured) or if it explicitely removed using Purge Request. Cached entries are +removed when holding time expires (and there has been no traffic to trigger +renewal of the peer address information) or when it is explicitely removed +with Purge Request. + +.SH NHS-UP +This is called for NHS right after the first succesful Registration Reply +is received. +This can be used to update application level configuration about which +servers to use. + +.SH NHS-DOWN +Informs that the specified NHS is no longer available. + +.SH ROUTE-UP +In reply to resolution request we have received a shortcut route with +destination off the NBMA subnetwork. The script should insert appropriate +entry to kernel routing table. + +.SH ROUTE-DOWN +The associated shortcut route information is no longer valid and should be +removed from kernel routing table. + +.SH ENVIRONMENT +.B NHRP_TYPE +.RS +For peer-up and peer-down reasons this can be: +\fBstatic\fR (configured information), +\fBdynamic-nhs\fR (configured NHS with only NBMA address known), +\fBdynamic\fR (client registered) or +\fBcached\fR (resolved since we had packets going there). + +The nhs-up and nhs-down reasons are called for \fBstatic\fR entries with +register keyword and \fBdynamic-nhs\fR entries. + +For peer-register this is always \fBdynamic\fR. + +For route-up and route-down reasons this is always defined as \fBroute\fR. + +For interface-up reason this is irrelevant, but always defined as +\fBinterface\fR. +.RE + +.B NHRP_INTERFACE +.RS +The network interface to which this event is related to. +.RE + +.B NHRP_GRE_KEY +.RS +The GRE key assigned to the related network interface. +.RE + +.B NHRP_DESTADDR +.RS +Destination protocol address. E.g. for NBMA GRE tunnels this is the IP address +assigned to the tunnel interface being used. +.RE + +.B NHRP_DESTPREFIX +.RS +Subnet prefix length for destination protocol address. +.RE + +.B NHRP_DESTNBMA +.RS +Defined only for \fBpeer-up\fR and \fBpeer-down\fR reasons. This contains the +NBMA address of the destination. E.g. for NBMA GRE this contains the public IP +of the peer. +.RE + +.B NHRP_DESTMTU +.RS +Defined only for \fBpeer-up\fR reasons. This contains the MTU for NBMA +address of the destination. +.RE + +.B NHRP_NEXTHOP +.RS +Defined only for \fBroute-up\fR and \fBroute-down\fR reasons. This is the +protocol address of the next hop to be used in routing. +.RE + +.B NHRP_PEER_DOWN_REASON +.RS +Defined only for \fBpeer-down\fR reason. This describes why the peer has +been deleted. Currently it is one of \fBexpired\fR, \fBuser-request\fR or +\fBlower-down\fR. +.RE + +.SH "SEE ALSO" +.BR opennhrp (8) + +.SH AUTHORS +Timo Teras diff --git a/man/opennhrp.8 b/man/opennhrp.8 new file mode 100644 index 0000000..b83b94b --- /dev/null +++ b/man/opennhrp.8 @@ -0,0 +1,119 @@ +.TH OPENNHRP 8 "16 November 2007" "" "OpenNHRP Documentation" + +.SH NAME +opennhrp \- daemon to resolve next hop address in NBMA network + +.SH SYNOPSIS +.BI "opennhrp [" "option" "]..." + +.SH DESCRIPTION +.B opennhrp +implements the Next Hop Resolution Protocol (NHRP) which is used to +improve the efficiency of routing computer network traffic over +Non-Broadcast, Multiple Access (NBMA) Networks. +.PP +NHRP provides an ARP-like solution that allows a system to dynamically +learn the NBMA address of the other systems that are part of that network, +allowing these systems to directly communicate without requiring traffic +to use an intermediate hop. +.PP +.B opennhrp +implementation is based on RFC2332, but contains some modifications and +extensions to be compatible with Cisco NHRP/DMVPN implementation. +Modifications have been made for authentication extension, Cisco NAT +address extension and shortcut switching enhancements support. + +.SH OPTIONS +The following options are recognized: + +.IP "\fB\-a \fIadmin\-socket" +Specify management interface socket as +.IR admin\-socket . +The default is +.IR /var/run/opennhrp.socket . + +.IP "\fB\-c \fIconfig\-file" +Use +.I config\-file +instead of +.I /etc/opennhrp/opennhrp.conf +for configuration. + +.IP "\fB\-s \fIscript\-file" +Execute +.I script\-file +instead of +.I /etc/opennhrp/opennhrp\-script +on important events. + +.IP "\fB\-p \fIpid\-file" +Store process id in +.I pid\-file +instead of +.IR /var/run/opennhrp.pid . +This file is also used to detect if opennhrp daemon is already running. +Pid-file is not created unless +.B -d +is specified too. + +.IP "\fB-d" +Run in daemon mode, forking to background after initialization. + +.IP "\fB-v" +Verbose. Print more log messages. + +.IP "\fB-V" +Print version and exit. + +.SH SIGNALS +.IP \fBSIGHUP +Forget all cached information about other system addresses. +.IP \fBSIGUSR1 +Dump NHRP peer database to system log. + +.SH FILES +.I /etc/opennhrp/opennhrp.conf +.RS +The system wide configuration file. See +.BR opennhrp.conf (5) +for further details. +.RE + +.I /etc/opennhrp/opennhrp\-script +.RS +Script executed by +.B opennhrp +on important events. See +.BR opennhrp\-script (8) +for more information how the script is executed. +.RE + +.I /var/run/opennhrp.socket +.RS +.BR opennhrp "(8) control socket" +.RE + +.SH BUGS +Currently only IPv4 over IPv4 networks using NBMA GRE tunnels is +supported (you need Linux kernel 2.6.24-rc2 or later). +.PP +Replying with cached information to non-authorative resolution +requests is not implemented. +.PP +Please send bug reports to OpenNHRP issue tracker in SourceForge. + +.SH "SEE ALSO" +.BR opennhrp.conf (5), +.BR opennhrpctl (8), +.BR opennhrp\-script (8) +.br +http://sourceforge.net/projects/opennhrp +.PP +For more information about the protocol see: +.br +RFC2332 NBMA Next Hop Resolution Protocol (NHRP) +.br +RFC2333 NHRP Protocol Applicability Statement + +.SH AUTHORS +Timo Teras diff --git a/man/opennhrp.conf.5 b/man/opennhrp.conf.5 new file mode 100644 index 0000000..aacec80 --- /dev/null +++ b/man/opennhrp.conf.5 @@ -0,0 +1,227 @@ +.TH OPENNHRP.CONF 5 "27 Oct 2010" "" "OpenNHRP Documentation" + +.SH NAME +opennhrp.conf \- NHRP daemon configuration file + +.SH DESCRIPTION +The +.I opennhrp.conf +file contains information for the +.BR opennhrp . +.PP +This configuration file is a free-form ASCII text file. It is parsed by the +word-by-word parser built into +.BR opennhrp . +The file may contain extra whitespace, tabs and newline for formatting +purposes. Keywords and contents are case-sensitive. Comments can be marked +with a hash sign +.RB ( # ) +and everything following it until newline is ignored. + +.SH "DIRECTIVES" +Directives are keywords that can appear in any context of the configuration +file and they select a new context. + +.PP +.BI "interface " interface-name +.RS +Marks the start of configuration for network interface +.IR interface-name . +Even if no interface specific configuration is required, the +.B interface +directive must be present to enable NHRP on that interface. +.RE + +.SH "INTERFACE CONTEXT" +These configuration keywords can appear only in the interface context. + +.PP +.BI "map " protocol-address[/prefix] " " nbma-address " [register] [cisco]" +.RS +Creates static peer mapping of +.I protocol-address +to +.IR nbma-address . +.PP +If the +.I prefix +parameter is present, it directs +.B opennhrp +to use this peer as a next hop server when sending Resolution Requests +matching this subnet. +.PP +The optional parameter +.I register +specifies that Registration Request should be sent to this peer on +startup. +.PP +If the statically mapped peer is running Cisco IOS, specify the +.B cisco +keyword. It is used to fix statically the Registration Request ID +so that a matching Purge Request can be sent if NBMA address has changed. +This is to work around broken IOS which requires Purge Request ID to +match the original Registration Request ID. +.RE + +.BI "dynamic-map " protocol-address/prefix " " nbma-domain-name +.RS +Specifies that the NBMA addresses of the next hop servers are defined in the +domain name +.IR nbma-domain-name . +For each A record opennhrp creates a dynamic NHS entry. + +Each dynamic NHS will get a peer entry with the configured network address +and the discovered NBMA address. + +The first registration request is sent to the protocol broadcast address, +and the server's real protocol address is dynamically detected from the first +registration reply (requires opennhrp 0.11 or newer). + +Alternatively, if +.BR peer-up +script hook can determine the protocol address from the NBMA address (e.g. +by doing an additional DNS lookup or by parsing the IPsec certificate) it can +inform this mapping via +.BR opennhrpctl "(8) " "update nbma " command. +.RE + +.PP +.BI "shortcut-target " protocol-address/prefix " [holding-time " holdtime "]" +.RS +Defines an off-NBMA network prefix for which the GRE interface will act +as a gateway. This an alternative to defining local interfaces with +shortcut-destination flag. +.RE + +.BR multicast " " dynamic "|" nhs +.br +.BI "multicast " protocol-address +.RS +Determines how opennhrp daemon should soft switch the multicast traffic. +Currently, multicast traffic is captured by opennhrp daemon using a packet +socket, and resent back to proper destinations. This means that multicast +packet sending is CPU intensive. + +Specfying +.B nhs +makes all multicast packets to be repeated to each statically configured +next hop. +.B dynamic +instructs to forward to all peers which we have a direct connection with. +Alternatively, you can specify the directive multiple times for each +.I protocol-address +the multicast traffic should be sent to. + +.B "WARNING:" +It is very easy to misconfigure multicast repeating if you have multiple +NHS:es. +.RE + +.BI "holding-time " holdtime +.RS +Specifies the holding time for NHRP Registration Requests and +Resolution Replies sent from this interface or shortcut-target. +The +.I holdtime +is specified in seconds and defaults to two hours. +.RE + +.BI "route-table " routetable +.RS +Specifies the kernel routing table to be monitored for outgoing routes +to this interface. This is required to do routing lookups excluding +active shortcut routes (for existing shortcut route renewal). The +default is main table. + +If you use +.B table +directive in +.B zebra.conf +to put Quagga routes in alternate table, this should match with it. +.RE + +.BI "cisco-authentication " secret +.RS +Enables Cisco style authentication on NHRP packets. This embeds the +.I secret +plaintext password to the outgoing NHRP packets. Incoming NHRP packets +on this interface are discarded unless the +.I secret +password is present. Maximum length of the +.I secret +is 8 characters. +.RE + +.B redirect +.RS +Enable sending of Cisco style NHRP Traffic Indication packets. If +this is enabled and +.B opennhrp +detects a forwarded packet, it will send a message to the original sender +of the packet instructing it to create a direct connection with the +destination. This is basically a protocol independent equivalent of ICMP +redirect. +.RE + +.B shortcut +.RS +Enable creation of shortcut routes. A received NHRP Traffic Indication +will trigger the resolution and establishment of a shortcut route. +.PP +.B IMPORTANT: +You still need to run some routing protocol or have static routes +to some hub node in your NBMA network. NHRP does not advertise routes; +it can create shortcut route only for an already routable subnet. +.RE + +.B non-caching +.RS +Disables caching of peer information from forwarded NHRP Resolution +Reply packets. This can be used to reduce memory consumption on big +NBMA subnets. +.PP +NOTE: currently does not do much as caching is not implemented. +.RE + +.B shortcut-destination +.RS +This instructs +.B opennhrp +to reply with authorative answers on NHRP Resolution Requests destinied +to addresses in this interface (instead of forwarding the packets). This +effectively allows the creation of shortcut routes to subnets located +on the interface. +.PP +When specified, this should be the only keyword for the interface. +.RE + +.SH EXAMPLE +The following configuration file was used for testing OpenNHRP on a machine +with two ethernet network interfaces. GRE tunnel was configured with tunnel +IP 10.255.255.2/24. Configuration enables registration to hub node at +10.255.255.1 and resolution of other nodes in the subnet using that hub. +.PP +It also enables creation of shortcut routes to networks behind other +hosts (with holding-time override for the defined shortcut-target) +in our NBMA network and allows incoming shortcut routes. +.PP +.nf +interface gre1 + holding-time 3600 + map 10.255.255.1/24 192.168.200.1 register + shortcut-target 172.16.0.0/16 holding-time 1800 + cisco-authentication secret + shortcut + redirect + non-caching + +interface eth1 + shortcut-destination + +.fi + +.SH "SEE ALSO" +.BR opennhrp (8) + +.SH AUTHORS +Timo Teras diff --git a/man/opennhrpctl.8 b/man/opennhrpctl.8 new file mode 100644 index 0000000..611c6f7 --- /dev/null +++ b/man/opennhrpctl.8 @@ -0,0 +1,124 @@ +.TH OPENNHRP 8 "20 May 2009" "" "OpenNHRP Documentation" + +.SH NAME +opennhrpctl \- opennhrp administrative control tool + +.SH SYNOPSIS +.B opennhrpctl +.BI "[\-a " admin\-socket "]" " command " "[" "arguments" "]..." + +.SH DESCRIPTION +.B opennhrpctl +is an utility to control +.BR opennhrp (8) +daemon operation. A UNIX socket is used for communication between +.B opennhrpctl +and +.BR opennhrp (8). +Administration priviledges for a non-root user can be granted by modifying +the permissions and ownership of the socket. + +The following commands are available: + +.BI "[cache] show [" selector "]..." +.RS +Show contents of next hop cache (configured and resolved entries). +.RE + +.BI "[cache] flush [" selector "]..." +.RS +Clear all non-permanent entries which match the selector specifiers. +.RE + +.BI "[cache] purge [" selector "]..." +.RS +Purge entries from NHRP cache: cached entries are removed and permanent +entries are forced down, up and finally reregistered. +.RE + +.BI "[cache] lowerdown [" selector "]..." +.RS +Purge entries from NHRP cache with indication that lower layer failed: +e.g. IPsec daemon detected dead-peer or received INITIIAL-CONTACT +notification. +.RE + +.BI "route show [" selector "]..." +.RS +Show the contents of locally cached kernel routing information +(outbound routing base to do route lookups excluding active shortcut +routes). +.RE + +.B "interface show" +.RS +Show the contents of interface configuration table, and the cached information +from kernel (like protocol and NBMA IP addresses in use currently). +.RE + +.BI "redirect purge [" protocol-address "/" prefix-length "]" +.RS +Clear redirection cache from all entries matching the specified address. +.RE + +.BI "update nbma " nbma-address " " protocol-address +.RS +This command can be used from +.BR opennhrp-script "(8)" +to inform +.BR opennhrp +daemon of the real +.IR protocol-address +of dynamically discovered NHS. +.RE + +The following selectors can be used to limit which cache entries will +be effected: + +.BI nbma " nbma-address" +.RS +Matches entries where the remote has NBMA address +.IR nbma-address . +.RE + +.BI protocol " protocol-address" "[/" "prefix-length" "]" +.RS +Matches entries where the remote has protocol address +.IR protocol-address " with at least prefix length " prefix-length . +.RE + +.BI local-nbma " nbma-address" +.RS +Matches entries from local interface which owns the NBMA address +.IR nbma-address . +.RE + +.BI local-protocol " protocol-address" +.RS +Matches entries only from local interface which owns the protocol address +.IR protocol-address . +.RE + +.BI interface " interface-name" +.br +.BI iface " interface-name" +.br +.BI dev " interface-name" +.RS +Search entries only from local interface with name +.IR interface-name . +.RE + +.RE + +.SH FILES +.I /var/run/opennhrp.socket +.RS +.BR opennhrp "(8) control socket" +.RE + +.SH "SEE ALSO" +.BR opennhrp (8) + +.SH AUTHORS +Timo Teras diff --git a/nhrp/Makefile b/nhrp/Makefile new file mode 100644 index 0000000..7c2560e --- /dev/null +++ b/nhrp/Makefile @@ -0,0 +1,27 @@ +progs-y += opennhrp +opennhrp-objs += libev.o opennhrp.o nhrp_address.o nhrp_packet.o \ + nhrp_peer.o nhrp_server.o nhrp_interface.o admin.o \ + sysdep_netlink.o sysdep_pfpacket.o \ + sysdep_syslog.o + +CFLAGS_libev.o += -Wno-unused -Wno-comment -Wno-parentheses +CFLAGS_opennhrp.o += -DOPENNHRP_VERSION=\"$(FULL_VERSION)\" \ + -DOPENNHRP_ADMIN_SOCKET=\"$(STATEDIR)/opennhrp.socket\" +LIBS_opennhrp += -lm +ifeq ($(shell pkg-config --exists libcares && echo "yes"),yes) +CFLAGS +=$(shell pkg-config --cflags libcares) +LIBS_opennhrp +=$(shell pkg-config --libs libcares) +else +LIBS_opennhrp += -lcares +endif + +progs-y += opennhrpctl +opennhrpctl-objs += opennhrpctl.o +CFLAGS_opennhrpctl.o += $(CFLAGS_opennhrp.o) + +CFLAGS_EXTRA += -I$(srctree)/include -Wno-strict-aliasing + +install: + $(INSTALLDIR) $(DESTDIR)$(SBINDIR) + $(INSTALL) $(addprefix $(obj)/,$(progs-y)) $(DESTDIR)$(SBINDIR) + diff --git a/nhrp/admin.c b/nhrp/admin.c new file mode 100644 index 0000000..68a3e9e --- /dev/null +++ b/nhrp/admin.c @@ -0,0 +1,609 @@ +/* admin.c - OpenNHRP administrative interface implementation + * + * Copyright (C) 2007-2009 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nhrp_common.h" +#include "nhrp_peer.h" +#include "nhrp_address.h" +#include "nhrp_interface.h" + +static struct ev_io accept_io; + +struct admin_remote { + struct ev_timer timeout; + struct ev_io io; + int num_read; + char cmd[512]; +}; + +static int parse_word(const char **bufptr, size_t len, char *word) +{ + const char *buf = *bufptr; + int i, pos = 0; + + while (isspace(buf[pos]) && buf[pos] != '\n' && buf[pos]) + pos++; + + if (buf[pos] == '\n' || buf[pos] == 0) + return FALSE; + + for (i = 0; i < len-1 && !isspace(buf[pos+i]); i++) + word[i] = buf[pos+i]; + word[i] = 0; + + *bufptr += i + pos; + return TRUE; +} + + +static void admin_write(void *ctx, const char *format, ...) +{ + struct admin_remote *rmt = (struct admin_remote *) ctx; + char msg[1024]; + va_list ap; + size_t len; + + va_start(ap, format); + len = vsnprintf(msg, sizeof(msg), format, ap); + va_end(ap); + + if (write(rmt->io.fd, msg, len) != len) { + } +} + +static void admin_free_remote(struct admin_remote *rm) +{ + int fd = rm->io.fd; + + ev_io_stop(&rm->io); + ev_timer_stop(&rm->timeout); + shutdown(fd, SHUT_RDWR); + close(fd); + free(rm); +} + +static int admin_show_peer(void *ctx, struct nhrp_peer *peer) +{ + char buf[512], tmp[32]; + char *str; + size_t len = sizeof(buf); + int i = 0, rel; + + if (peer->interface != NULL) + i += snprintf(&buf[i], len - i, + "Interface: %s\n", + peer->interface->name); + + i += snprintf(&buf[i], len - i, + "Type: %s\n" + "Protocol-Address: %s/%d\n", + nhrp_peer_type[peer->type], + nhrp_address_format(&peer->protocol_address, sizeof(tmp), tmp), + peer->prefix_length); + + if (peer->next_hop_address.type != PF_UNSPEC) { + switch (peer->type) { + case NHRP_PEER_TYPE_SHORTCUT_ROUTE: + case NHRP_PEER_TYPE_LOCAL_ROUTE: + str = "Next-hop-Address"; + break; + case NHRP_PEER_TYPE_LOCAL_ADDR: + str = "Alias-Address"; + break; + default: + str = "NBMA-Address"; + break; + } + i += snprintf(&buf[i], len - i, "%s: %s\n", + str, + nhrp_address_format(&peer->next_hop_address, + sizeof(tmp), tmp)); + } + if (peer->nbma_hostname) { + i += snprintf(&buf[i], len - i, "Hostname: %s\n", + peer->nbma_hostname); + } + if (peer->next_hop_nat_oa.type != PF_UNSPEC) { + i += snprintf(&buf[i], len - i, "NBMA-NAT-OA-Address: %s\n", + nhrp_address_format(&peer->next_hop_nat_oa, + sizeof(tmp), tmp)); + } + if (peer->flags & (NHRP_PEER_FLAG_USED | NHRP_PEER_FLAG_UNIQUE | + NHRP_PEER_FLAG_UP | NHRP_PEER_FLAG_LOWER_UP)) { + i += snprintf(&buf[i], len - i, "Flags:"); + if (peer->flags & NHRP_PEER_FLAG_UNIQUE) + i += snprintf(&buf[i], len - i, " unique"); + + if (peer->flags & NHRP_PEER_FLAG_USED) + i += snprintf(&buf[i], len - i, " used"); + if (peer->flags & NHRP_PEER_FLAG_UP) + i += snprintf(&buf[i], len - i, " up"); + else if (peer->flags & NHRP_PEER_FLAG_LOWER_UP) + i += snprintf(&buf[i], len - i, " lower-up"); + i += snprintf(&buf[i], len - i, "\n"); + } + if (peer->expire_time) { + rel = (int) (peer->expire_time - ev_now()); + if (rel >= 0) { + i += snprintf(&buf[i], len - i, "Expires-In: %d:%02d\n", + rel / 60, rel % 60); + } + } + + admin_write(ctx, "%s\n", buf); + return 0; +} + +static void admin_free_selector(struct nhrp_peer_selector *sel) +{ + if (sel->hostname != NULL) { + free((void *) sel->hostname); + sel->hostname = NULL; + } +} + +static int admin_parse_selector(void *ctx, const char *cmd, + struct nhrp_peer_selector *sel) +{ + char keyword[64], tmp[64]; + struct nhrp_address address; + uint8_t prefix_length; + + while (parse_word(&cmd, sizeof(keyword), keyword)) { + if (!parse_word(&cmd, sizeof(tmp), tmp)) { + admin_write(ctx, + "Status: failed\n" + "Reason: missing-argument\n" + "Near-Keyword: '%s'\n", + keyword); + return FALSE; + } + + if (strcmp(keyword, "interface") == 0 || + strcmp(keyword, "iface") == 0 || + strcmp(keyword, "dev") == 0) { + if (sel->interface != NULL) + goto err_conflict; + sel->interface = nhrp_interface_get_by_name(tmp, FALSE); + if (sel->interface == NULL) + goto err_noiface; + continue; + } else if (strcmp(keyword, "host") == 0 || + strcmp(keyword, "hostname") == 0) { + if (sel->hostname != NULL) + goto err_conflict; + sel->hostname = strdup(tmp); + continue; + } + + if (!nhrp_address_parse(tmp, &address, &prefix_length)) { + admin_write(ctx, + "Status: failed\n" + "Reason: invalid-address\n" + "Near-Keyword: '%s'\n", + keyword); + return FALSE; + } + + if (strcmp(keyword, "protocol") == 0) { + if (sel->protocol_address.type != AF_UNSPEC) + goto err_conflict; + sel->protocol_address = address; + sel->prefix_length = prefix_length; + } else if (strcmp(keyword, "nbma") == 0) { + if (sel->next_hop_address.type != AF_UNSPEC) + goto err_conflict; + sel->type_mask &= ~BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE); + sel->next_hop_address = address; + } else if (strcmp(keyword, "local-protocol") == 0) { + if (sel->interface != NULL) + goto err_conflict; + sel->interface = nhrp_interface_get_by_protocol(&address); + if (sel->interface == NULL) + goto err_noiface; + } else if (strcmp(keyword, "local-nbma") == 0) { + if (sel->interface != NULL) + goto err_conflict; + sel->interface = nhrp_interface_get_by_nbma(&address); + if (sel->interface == NULL) + goto err_noiface; + } else { + admin_write(ctx, + "Status: failed\n" + "Reason: syntax-error\n" + "Near-Keyword: '%s'\n", + keyword); + return FALSE; + } + } + return TRUE; + +err_conflict: + admin_write(ctx, + "Status: failed\n" + "Reason: conflicting-keyword\n" + "Near-Keyword: '%s'\n", + keyword); + goto err; +err_noiface: + admin_write(ctx, + "Status: failed\n" + "Reason: interface-not-found\n" + "Near-Keyword: '%s'\n" + "Argument: '%s'\n", + keyword, tmp); +err: + admin_free_selector(sel); + return FALSE; +} + +static void admin_route_show(void *ctx, const char *cmd) +{ + struct nhrp_peer_selector sel; + + memset(&sel, 0, sizeof(sel)); + sel.type_mask = BIT(NHRP_PEER_TYPE_LOCAL_ROUTE); + if (!admin_parse_selector(ctx, cmd, &sel)) + return; + + admin_write(ctx, "Status: ok\n\n"); + nhrp_peer_foreach(admin_show_peer, ctx, &sel); + admin_free_selector(&sel); +} + +static void admin_cache_show(void *ctx, const char *cmd) +{ + struct nhrp_peer_selector sel; + + memset(&sel, 0, sizeof(sel)); + sel.type_mask = NHRP_PEER_TYPEMASK_ALL & + ~BIT(NHRP_PEER_TYPE_LOCAL_ROUTE); + if (!admin_parse_selector(ctx, cmd, &sel)) + return; + + admin_write(ctx, "Status: ok\n\n"); + nhrp_peer_foreach(admin_show_peer, ctx, &sel); + admin_free_selector(&sel); +} + +static void admin_cache_purge(void *ctx, const char *cmd) +{ + struct nhrp_peer_selector sel; + int count = 0; + + memset(&sel, 0, sizeof(sel)); + sel.type_mask = NHRP_PEER_TYPEMASK_PURGEABLE; + if (!admin_parse_selector(ctx, cmd, &sel)) + return; + + nhrp_peer_foreach(nhrp_peer_purge_matching, &count, &sel); + admin_free_selector(&sel); + + admin_write(ctx, + "Status: ok\n" + "Entries-Affected: %d\n", + count); +} + +static void admin_cache_lower_down(void *ctx, const char *cmd) +{ + struct nhrp_peer_selector sel; + int count = 0; + + memset(&sel, 0, sizeof(sel)); + sel.type_mask = NHRP_PEER_TYPEMASK_PURGEABLE; + if (!admin_parse_selector(ctx, cmd, &sel)) + return; + + nhrp_peer_foreach(nhrp_peer_lowerdown_matching, &count, &sel); + admin_free_selector(&sel); + + admin_write(ctx, + "Status: ok\n" + "Entries-Affected: %d\n", + count); +} + +static void admin_cache_flush(void *ctx, const char *cmd) +{ + struct nhrp_peer_selector sel; + int count = 0; + + memset(&sel, 0, sizeof(sel)); + sel.type_mask = NHRP_PEER_TYPEMASK_REMOVABLE; + if (!admin_parse_selector(ctx, cmd, &sel)) + return; + + nhrp_peer_foreach(nhrp_peer_remove_matching, &count, &sel); + admin_free_selector(&sel); + + admin_write(ctx, + "Status: ok\n" + "Entries-Affected: %d\n", + count); +} + +static int admin_show_interface(void *ctx, struct nhrp_interface *iface) +{ + char buf[512], tmp[32]; + size_t len = sizeof(buf); + int i = 0; + + i += snprintf(&buf[i], len - i, + "Interface: %s\n" + "Index: %d\n", + iface->name, + iface->index); + + if (iface->protocol_address.addr_len != 0) { + i += snprintf(&buf[i], len - i, + "Protocol-Address: %s/%d\n", + nhrp_address_format(&iface->protocol_address, sizeof(tmp), tmp), + iface->protocol_address_prefix); + } + + if (iface->flags) { + i += snprintf(&buf[i], len - i, + "Flags:%s%s%s%s%s\n", + (iface->flags & NHRP_INTERFACE_FLAG_NON_CACHING) ? " non-caching" : "", + (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT) ? " shortcut" : "", + (iface->flags & NHRP_INTERFACE_FLAG_REDIRECT) ? " redirect" : "", + (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST) ? " shortcut-dest" : "", + (iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED) ? " configured" : ""); + } + + if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED)) + goto done; + + i += snprintf(&buf[i], len - i, + "Holding-Time: %u\n" + "Route-Table: %u\n" + "GRE-Key: %u\n" + "MTU: %u\n", + iface->holding_time, + iface->route_table, + iface->gre_key, + iface->mtu); + + if (iface->link_index) { + struct nhrp_interface *link; + + i += snprintf(&buf[i], len - i, "Link-Index: %d\n", iface->link_index); + link = nhrp_interface_get_by_index(iface->link_index, FALSE); + if (link != NULL) + i += snprintf(&buf[i], len - i, "Link-Name: %s\n", link->name); + } + + if (iface->nbma_address.addr_len != 0) { + i += snprintf(&buf[i], len - i, + "NBMA-MTU: %u\n" + "NBMA-Address: %s\n", + iface->nbma_mtu, + nhrp_address_format(&iface->nbma_address, sizeof(tmp), tmp)); + } + if (iface->nat_cie.nbma_address.addr_len != 0) { + i += snprintf(&buf[i], len - i, + "NBMA-NAT-OA: %s\n", + nhrp_address_format(&iface->nat_cie.nbma_address, sizeof(tmp), tmp)); + } +done: + admin_write(ctx, "%s\n", buf); + return 0; +} + +static void admin_interface_show(void *ctx, const char *cmd) +{ + admin_write(ctx, "Status: ok\n\n"); + nhrp_interface_foreach(admin_show_interface, ctx); +} + +static void admin_redirect_purge(void *ctx, const char *cmd) +{ + char keyword[64]; + struct nhrp_address addr; + uint8_t prefix; + int count; + + nhrp_address_set_type(&addr, PF_UNSPEC); + + if (parse_word(&cmd, sizeof(keyword), keyword)) { + if (!nhrp_address_parse(keyword, &addr, &prefix)) { + admin_write(ctx, + "Status: failed\n" + "Reason: invalid-address\n" + "Near-Keyword: '%s'\n", + keyword); + return; + } + } + + count = nhrp_rate_limit_clear(&addr, prefix); + admin_write(ctx, + "Status: ok\n" + "Entries-Affected: %d\n", + count); +} + +struct update_nbma { + struct nhrp_address addr; + int count; +}; + +static int update_nbma(void *ctx, struct nhrp_peer *p) +{ + struct update_nbma *un = (struct update_nbma *) ctx; + + nhrp_peer_discover_nhs(p, &un->addr); + un->count++; + + return 0; +} + +static void admin_update_nbma(void *ctx, const char *cmd) +{ + char keyword[64]; + struct nhrp_peer_selector sel; + struct update_nbma un; + + memset(&sel, 0, sizeof(sel)); + sel.type_mask = BIT(NHRP_PEER_TYPE_DYNAMIC_NHS); + + if (!parse_word(&cmd, sizeof(keyword), keyword)) + goto err; + if (!nhrp_address_parse(keyword, &sel.next_hop_address, NULL)) + goto err; + if (!parse_word(&cmd, sizeof(keyword), keyword)) + goto err; + if (!nhrp_address_parse(keyword, &un.addr, NULL)) + goto err; + + un.count = 0; + nhrp_peer_foreach(update_nbma, &un, &sel); + + admin_write(ctx, + "Status: ok\n" + "Entries-Affected: %d\n", + un.count); + return; +err: + admin_write(ctx, + "Status: failed\n" + "Reason: syntax-error\n" + "Near-Keyword: '%s'\n", + keyword); + return; +} + +static struct { + const char *command; + void (*handler)(void *ctx, const char *cmd); +} admin_handler[] = { + { "route show", admin_route_show }, + { "show", admin_cache_show }, + { "cache show", admin_cache_show }, + { "flush", admin_cache_flush }, + { "cache flush", admin_cache_flush }, + { "purge", admin_cache_purge }, + { "cache purge", admin_cache_purge }, + { "cache lowerdown", admin_cache_lower_down }, + { "interface show", admin_interface_show }, + { "redirect purge", admin_redirect_purge }, + { "update nbma", admin_update_nbma }, +}; + +static void admin_receive_cb(struct ev_io *w, int revents) +{ + struct admin_remote *rm = container_of(w, struct admin_remote, io); + int fd = rm->io.fd; + ssize_t len; + int i, cmdlen; + + len = recv(fd, rm->cmd, sizeof(rm->cmd) - rm->num_read, MSG_DONTWAIT); + if (len < 0 && errno == EAGAIN) + return; + if (len <= 0) + goto err; + + rm->num_read += len; + if (rm->num_read >= sizeof(rm->cmd)) + goto err; + + if (rm->cmd[rm->num_read-1] != '\n') + return; + rm->cmd[--rm->num_read] = 0; + + for (i = 0; i < ARRAY_SIZE(admin_handler); i++) { + cmdlen = strlen(admin_handler[i].command); + if (rm->num_read >= cmdlen && + strncasecmp(rm->cmd, admin_handler[i].command, cmdlen) == 0) { + nhrp_debug("Admin: %s", rm->cmd); + admin_handler[i].handler(rm, &rm->cmd[cmdlen]); + break; + } + } + if (i >= ARRAY_SIZE(admin_handler)) { + admin_write(rm, + "Status: error\n" + "Reason: unrecognized command\n"); + } + +err: + admin_free_remote(rm); +} + +static void admin_timeout_cb(struct ev_timer *t, int revents) +{ + admin_free_remote(container_of(t, struct admin_remote, timeout)); +} + +static void admin_accept_cb(ev_io *w, int revents) +{ + struct admin_remote *rm; + struct sockaddr_storage from; + socklen_t fromlen = sizeof(from); + int cnx; + + cnx = accept(w->fd, (struct sockaddr *) &from, &fromlen); + if (cnx < 0) + return; + fcntl(cnx, F_SETFD, FD_CLOEXEC); + + rm = calloc(1, sizeof(struct admin_remote)); + + ev_io_init(&rm->io, admin_receive_cb, cnx, EV_READ); + ev_io_start(&rm->io); + ev_timer_init(&rm->timeout, admin_timeout_cb, 10.0, 0.); + ev_timer_start(&rm->timeout); +} + +int admin_init(const char *opennhrp_socket) +{ + struct sockaddr_un sun; + int fd; + + memset(&sun, 0, sizeof(sun)); + sun.sun_family = AF_UNIX; + strncpy(sun.sun_path, opennhrp_socket, sizeof(sun.sun_path)); + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) + return 0; + + fcntl(fd, F_SETFD, FD_CLOEXEC); + unlink(opennhrp_socket); + if (bind(fd, (struct sockaddr *) &sun, sizeof(sun)) != 0) + goto err_close; + + if (listen(fd, 5) != 0) + goto err_close; + + ev_io_init(&accept_io, admin_accept_cb, fd, EV_READ); + ev_io_start(&accept_io); + + return 1; + +err_close: + nhrp_error("Failed initialize admin socket [%s]: %s", + opennhrp_socket, strerror(errno)); + close(fd); + return 0; +} diff --git a/nhrp/afnum.h b/nhrp/afnum.h new file mode 100644 index 0000000..2dc3d68 --- /dev/null +++ b/nhrp/afnum.h @@ -0,0 +1,29 @@ +/* afnum.h - RFC 1700 Address Family Number and + * ethernet protocol number definitions + * + * Copyright (C) 2007 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#ifndef AFNUM_H +#define AFNUM_H + +#include +#include "nhrp_defines.h" + +#define AFNUM_RESERVED constant_htons(0) +#define AFNUM_INET constant_htons(1) +#define AFNUM_INET6 constant_htons(2) + +#define ETH_P_NHRP 0x2001 + +#define ETHPROTO_IP constant_htons(ETH_P_IP) +#define ETHPROTO_NHRP constant_htons(ETH_P_NHRP) + +#endif diff --git a/nhrp/libev.c b/nhrp/libev.c new file mode 100644 index 0000000..c4af3b9 --- /dev/null +++ b/nhrp/libev.c @@ -0,0 +1,3 @@ +#include +#include "libev.h" +#include "../libev/ev.c" diff --git a/nhrp/libev.h b/nhrp/libev.h new file mode 100644 index 0000000..f9f5f23 --- /dev/null +++ b/nhrp/libev.h @@ -0,0 +1,22 @@ +#define EV_STANDALONE 1 +#define EV_MULTIPLICITY 0 +#define EV_VERIFY 0 + +#define EV_USE_CLOCK_SYSCALL 1 +#define EV_USE_SELECT 0 +#define EV_USE_POLL 1 + +#define EV_IDLE_ENABLE 1 + +/* Unused stuff, disabled for size optimization */ +#define EV_USE_INOTIFY 0 +#define EV_PERIODIC_ENABLE 0 +#define EV_EMBED_ENABLE 0 +#define EV_STAT_ENABLE 0 +#define EV_FORK_ENABLE 0 +#define EV_ASYNC_ENABLE 0 + +/* Disable the "void *data;" member of watchers to save memory */ +#define EV_COMMON /* empty */ + +#include "../libev/ev.h" diff --git a/nhrp/list.h b/nhrp/list.h new file mode 100644 index 0000000..4387970 --- /dev/null +++ b/nhrp/list.h @@ -0,0 +1,184 @@ +/* list.h - Single and double linked list macros + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + * + * This is more or less based on the code in the linux kernel. There are + * minor differences and this is only a subset of the kernel version. + */ + +#ifndef LIST_H +#define LIST_H + +#ifndef NULL +#define NULL 0L +#endif + +#ifndef container_of +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) +#endif + +struct hlist_head { + struct hlist_node *first; +}; + +struct hlist_node { + struct hlist_node *next; + struct hlist_node **pprev; +}; + +static inline int hlist_empty(const struct hlist_head *h) +{ + return !h->first; +} + +static inline int hlist_hashed(const struct hlist_node *n) +{ + return n->pprev != NULL; +} + +static inline void hlist_del(struct hlist_node *n) +{ + struct hlist_node *next = n->next; + struct hlist_node **pprev = n->pprev; + + *pprev = next; + if (next) + next->pprev = pprev; + + n->next = NULL; + n->pprev = NULL; +} + +static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) +{ + struct hlist_node *first = h->first; + + n->next = first; + if (first) + first->pprev = &n->next; + n->pprev = &h->first; + h->first = n; +} + +static inline void hlist_add_after(struct hlist_node *n, struct hlist_node *prev) +{ + n->next = prev->next; + n->pprev = &prev->next; + prev->next = n; +} + +static inline struct hlist_node **hlist_tail_ptr(struct hlist_head *h) +{ + struct hlist_node *n = h->first; + if (n == NULL) + return &h->first; + while (n->next != NULL) + n = n->next; + return &n->next; +} + +#define hlist_entry(ptr, type, member) container_of(ptr,type,member) + +#define hlist_for_each(pos, head) \ + for (pos = (head)->first; pos; pos = pos->next) + +#define hlist_for_each_safe(pos, n, head) \ + for (pos = (head)->first; pos && ({ n = pos->next; 1; }); pos = n) + +#define hlist_for_each_entry(tpos, pos, head, member) \ + for (pos = (head)->first; pos && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ + for (pos = (head)->first; \ + pos && ({ n = pos->next; 1; }) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = n) + + +struct list_head { + struct list_head *next, *prev; +}; + +#define LIST_INITIALIZER(l) { .next = &l, .prev = &l } + +static inline void list_init(struct list_head *list) +{ + list->next = list; + list->prev = list; +} + +static inline void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +static inline void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +static inline void list_add_tail(struct list_head *new, struct list_head *head) +{ + __list_add(new, head->prev, head); +} + +static inline void __list_del(struct list_head * prev, struct list_head * next) +{ + next->prev = prev; + prev->next = next; +} + +static inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->next = NULL; + entry->prev = NULL; +} + +static inline int list_hashed(const struct list_head *n) +{ + return n->next != n && n->next != NULL; +} + +static inline int list_empty(const struct list_head *n) +{ + return !list_hashed(n); +} + +#define list_next(ptr, type, member) \ + (list_hashed(ptr) ? container_of((ptr)->next,type,member) : NULL) + +#define list_entry(ptr, type, member) container_of(ptr,type,member) + +#define list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); pos = pos->next) + +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +#define list_for_each_entry(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + +#define list_for_each_entry_safe(pos, n, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) + +#endif diff --git a/nhrp/nhrp_address.c b/nhrp/nhrp_address.c new file mode 100644 index 0000000..13164e1 --- /dev/null +++ b/nhrp/nhrp_address.c @@ -0,0 +1,454 @@ +/* nhrp_address.c - NHRP address conversion functions + * + * Copyright (C) 2007-2009 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include "afnum.h" +#include "nhrp_address.h" +#include "nhrp_packet.h" +#include "nhrp_common.h" + +struct nhrp_resolver { + ares_channel channel; + struct ev_prepare prepare; + struct ev_timer timeout; + struct ev_io fds[4]; +}; + +static struct nhrp_resolver resolver; + +static void ares_timeout_cb(struct ev_timer *w, int revents) +{ + struct nhrp_resolver *r = + container_of(w, struct nhrp_resolver, timeout); + + ares_process(r->channel, NULL, NULL); +} + +static void ares_prepare_cb(struct ev_prepare *w, int revents) +{ + struct nhrp_resolver *r = + container_of(w, struct nhrp_resolver, prepare); + struct timeval *tv, tvbuf; + + tv = ares_timeout(r->channel, NULL, &tvbuf); + if (tv != NULL) { + r->timeout.repeat = tv->tv_sec + tv->tv_usec * 1e-6; + ev_timer_again(&r->timeout); + } else { + ev_timer_stop(&r->timeout); + } +} + +static void ares_io_cb(struct ev_io *w, int revents) +{ + ares_socket_t rfd = ARES_SOCKET_BAD, wfd = ARES_SOCKET_BAD; + + if (revents & EV_READ) + rfd = w->fd; + if (revents & EV_WRITE) + wfd = w->fd; + + ares_process_fd(resolver.channel, rfd, wfd); +} + +static void ares_socket_cb(void *data, ares_socket_t fd, + int readable, int writable) +{ + struct nhrp_resolver *r = (struct nhrp_resolver *) data; + int i, fi = -1, events = 0; + + if (readable) + events |= EV_READ; + if (writable) + events |= EV_WRITE; + + for (i = 0; i < ARRAY_SIZE(r->fds); i++) { + if (r->fds[i].fd == fd) + break; + if (fi < 0 && r->fds[i].fd == 0) + fi = i; + } + + if (events) { + if (i >= ARRAY_SIZE(r->fds)) { + NHRP_BUG_ON(fi == -1); + i = fi; + } else { + ev_io_stop(&r->fds[fi]); + } + ev_io_set(&r->fds[i], fd, events); + ev_io_start(&r->fds[i]); + } else if (i < ARRAY_SIZE(r->fds)) { + ev_io_stop(&r->fds[i]); + ev_io_set(&r->fds[i], 0, 0); + } +} + +static int bitcmp(const uint8_t *a, const uint8_t *b, int len) +{ + int bytes, bits, mask, r; + + bytes = len / 8; + bits = len % 8; + + if (bytes != 0) { + r = memcmp(a, b, bytes); + if (r != 0) + return r; + } + if (bits != 0) { + mask = (0xff << (8 - bits)) & 0xff; + return ((int) (a[bytes] & mask)) - ((int) (b[bytes] & mask)); + } + return 0; +} + +uint16_t nhrp_protocol_from_pf(uint16_t pf) +{ + switch (pf) { + case PF_INET: + return ETHPROTO_IP; + } + return 0; +} + +uint16_t nhrp_pf_from_protocol(uint16_t protocol) +{ + switch (protocol) { + case ETHPROTO_IP: + return PF_INET; + } + return PF_UNSPEC; +} + +uint16_t nhrp_afnum_from_pf(uint16_t pf) +{ + switch (pf) { + case PF_INET: + return AFNUM_INET; + } + return AFNUM_RESERVED; +} + +uint16_t nhrp_pf_from_afnum(uint16_t afnum) +{ + switch (afnum) { + case AFNUM_INET: + return PF_INET; + } + return PF_UNSPEC; +} + +int nhrp_address_parse(const char *string, + struct nhrp_address *addr, + uint8_t *prefix_len) +{ + uint8_t tmp; + int r; + + /* Try IP address format */ + r = sscanf(string, "%hhd.%hhd.%hhd.%hhd/%hhd", + &addr->addr[0], &addr->addr[1], + &addr->addr[2], &addr->addr[3], + prefix_len ? prefix_len : &tmp); + if ((r == 4) || (r == 5 && prefix_len != NULL)) { + addr->type = PF_INET; + addr->addr_len = 4; + addr->subaddr_len = 0; + if (r == 4 && prefix_len != NULL) + *prefix_len = 32; + return TRUE; + } + + return FALSE; +} + +int nhrp_address_parse_packet(uint16_t protocol, size_t len, uint8_t *packet, + struct nhrp_address *src, struct nhrp_address *dst) +{ + int pf; + struct iphdr *iph; + + pf = nhrp_pf_from_protocol(protocol); + switch (protocol) { + case ETHPROTO_IP: + if (len < sizeof(struct iphdr)) + return FALSE; + + iph = (struct iphdr *) packet; + if (src != NULL) + nhrp_address_set(src, pf, 4, (uint8_t *) &iph->saddr); + if (dst != NULL) + nhrp_address_set(dst, pf, 4, (uint8_t *) &iph->daddr); + break; + default: + return FALSE; + } + + return TRUE; +} + +#if ARES_VERSION_MAJOR > 1 || ARES_VERSION_MINOR > 4 +static void ares_address_cb(void *arg, int status, int timeouts, + struct hostent *he) +#else +static void ares_address_cb(void *arg, int status, struct hostent *he) +#endif +{ + struct nhrp_address_query *query = + (struct nhrp_address_query *) arg; + struct nhrp_address addr[16]; + int i; + + if (status == ARES_SUCCESS) { + for (i = 0; he->h_addr_list[i] != NULL && + i < ARRAY_SIZE(addr); i++) + nhrp_address_set(&addr[i], AF_INET, he->h_length, + (uint8_t *) he->h_addr_list[i]); + } else + i = -1; + + NHRP_BUG_ON(query->callback == NULL); + + query->callback(query, i, &addr[0]); + query->callback = NULL; +} + +void nhrp_address_resolve(struct nhrp_address_query *query, + const char *hostname, + nhrp_address_query_callback callback) +{ + if (query->callback != NULL) { + nhrp_error("Trying to resolve '%s', but previous query " + "was not finished yet", hostname); + return; + } + + query->callback = callback; + ares_gethostbyname(resolver.channel, hostname, AF_INET, + ares_address_cb, query); +} + +void nhrp_address_resolve_cancel(struct nhrp_address_query *query) +{ + /* The kills all active queries; not just the one + * given as parameter. But as those will be retried later + * anyway, it is not a problem for now. */ + + if (query->callback != NULL) + ares_cancel(resolver.channel); +} + +void nhrp_address_set_type(struct nhrp_address *addr, uint16_t type) +{ + addr->type = type; + addr->addr_len = addr->subaddr_len = 0; +} + +int nhrp_address_set(struct nhrp_address *addr, uint16_t type, uint8_t len, uint8_t *bytes) +{ + if (len > NHRP_MAX_ADDRESS_LEN) + return FALSE; + + addr->type = type; + addr->addr_len = len; + addr->subaddr_len = 0; + if (len != 0) + memcpy(addr->addr, bytes, len); + return TRUE; +} + +int nhrp_address_set_full(struct nhrp_address *addr, uint16_t type, + uint8_t len, uint8_t *bytes, + uint8_t sublen, uint8_t *subbytes) +{ + if (len + sublen > NHRP_MAX_ADDRESS_LEN) + return FALSE; + + addr->type = type; + addr->addr_len = len; + addr->subaddr_len = 0; + if (len != 0) + memcpy(addr->addr, bytes, len); + if (sublen != 0) + memcpy(&addr->addr[len], subbytes, sublen); + return TRUE; +} + +int nhrp_address_cmp(const struct nhrp_address *a, const struct nhrp_address *b) +{ + if (a->type > b->type) + return 1; + if (a->type < b->type) + return -1; + if (a->addr_len > b->addr_len || a->subaddr_len > b->subaddr_len) + return 1; + if (a->addr_len < b->addr_len || a->subaddr_len < b->subaddr_len) + return -1; + return memcmp(a->addr, b->addr, a->addr_len + a->subaddr_len); +} + +int nhrp_address_prefix_cmp(const struct nhrp_address *a, + const struct nhrp_address *b, int prefix) +{ + if (a->type > b->type) + return 1; + if (a->type < b->type) + return -1; + if (a->addr_len * 8 < prefix) + return 1; + if (b->addr_len * 8 < prefix) + return 1; + return bitcmp(a->addr, b->addr, prefix); +} + +int nhrp_address_is_multicast(const struct nhrp_address *addr) +{ + switch (addr->type) { + case PF_INET: + if ((addr->addr[0] & 0xf0) == 0xe0) + return TRUE; + break; + } + return FALSE; +} + +int nhrp_address_is_any_addr(const struct nhrp_address *addr) +{ + switch (addr->type) { + case PF_UNSPEC: + return TRUE; + case PF_INET: + if (memcmp(addr->addr, "\x00\x00\x00\x00", 4) == 0) + return TRUE; + break; + } + return FALSE; +} + +unsigned int nhrp_address_hash(const struct nhrp_address *addr) +{ + unsigned int hash = 5381; + int i; + + for (i = 0; i < addr->addr_len; i++) + hash = hash * 33 + addr->addr[i]; + + return hash; +} + +void nhrp_address_set_network(struct nhrp_address *addr, int prefix) +{ + int i, bits = 8 * addr->addr_len; + + for (i = prefix; i < bits; i++) + addr->addr[i / 8] &= ~(0x80 >> (i % 8)); +} + +void nhrp_address_set_broadcast(struct nhrp_address *addr, int prefix) +{ + int i, bits = 8 * addr->addr_len; + + for (i = prefix; i < bits; i++) + addr->addr[i / 8] |= 0x80 >> (i % 8); +} + +int nhrp_address_is_network(const struct nhrp_address *addr, int prefix) +{ + int i, bits = 8 * addr->addr_len; + + for (i = prefix; i < bits; i++) + if (addr->addr[i / 8] & (0x80 >> (i % 8))) + return FALSE; + return TRUE; +} + +const char *nhrp_address_format(const struct nhrp_address *addr, + size_t buflen, char *buffer) +{ + switch (addr->type) { + case PF_UNSPEC: + snprintf(buffer, buflen, "(unspecified)"); + break; + case PF_INET: + snprintf(buffer, buflen, "%d.%d.%d.%d", + addr->addr[0], addr->addr[1], + addr->addr[2], addr->addr[3]); + break; + default: + snprintf(buffer, buflen, "(proto 0x%04x)", + addr->type); + break; + } + + return buffer; +} + +int nhrp_address_match_cie_list(struct nhrp_address *nbma_address, + struct nhrp_address *protocol_address, + struct list_head *cie_list) +{ + struct nhrp_cie *cie; + + list_for_each_entry(cie, cie_list, cie_list_entry) { + if (nhrp_address_cmp(&cie->nbma_address, nbma_address) == 0 && + nhrp_address_cmp(&cie->protocol_address, protocol_address) == 0) + return TRUE; + } + + return FALSE; +} + +int nhrp_address_init(void) +{ + struct ares_options ares_opts; + int i; + + memset(&ares_opts, 0, sizeof(ares_opts)); + ares_opts.sock_state_cb = &ares_socket_cb; + ares_opts.sock_state_cb_data = &resolver; + ares_opts.timeout = 2; + ares_opts.tries = 3; + if (ares_init_options(&resolver.channel, &ares_opts, + ARES_OPT_SOCK_STATE_CB | ARES_OPT_TIMEOUT | + ARES_OPT_TRIES) != ARES_SUCCESS) + return FALSE; + + ev_timer_init(&resolver.timeout, ares_timeout_cb, 0.0, 0.0); + ev_prepare_init(&resolver.prepare, ares_prepare_cb); + ev_prepare_start(&resolver.prepare); + for (i = 0; i < ARRAY_SIZE(resolver.fds); i++) + ev_init(&resolver.fds[i], ares_io_cb); + + return TRUE; +} + +void nhrp_address_cleanup(void) +{ + int i; + + ev_timer_stop(&resolver.timeout); + ev_prepare_stop(&resolver.prepare); + for (i = 0; i < ARRAY_SIZE(resolver.fds); i++) + ev_io_stop(&resolver.fds[i]); + ares_destroy(resolver.channel); +} diff --git a/nhrp/nhrp_address.h b/nhrp/nhrp_address.h new file mode 100644 index 0000000..e479631 --- /dev/null +++ b/nhrp/nhrp_address.h @@ -0,0 +1,80 @@ +/* nhrp_address.h - NHRP address structures and helpers + * + * Copyright (C) 2007 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#ifndef NHRP_ADDRESS_H +#define NHRP_ADDRESS_H + +#include +#include +#include "list.h" + +#define NHRP_MAX_ADDRESS_LEN 6 + +struct nhrp_cie_list_head; +struct nhrp_address; +struct nhrp_address_query; + +typedef void (*nhrp_address_query_callback)(struct nhrp_address_query *query, + int num_addr, + struct nhrp_address *addrs); + +struct nhrp_address { + uint16_t type; + uint8_t addr_len; + uint8_t subaddr_len; + uint8_t addr[NHRP_MAX_ADDRESS_LEN]; +}; + +struct nhrp_address_query { + nhrp_address_query_callback callback; +}; + +uint16_t nhrp_protocol_from_pf(uint16_t pf); +uint16_t nhrp_pf_from_protocol(uint16_t protocol); +uint16_t nhrp_afnum_from_pf(uint16_t pf); +uint16_t nhrp_pf_from_afnum(uint16_t afnum); + +int nhrp_address_init(void); +void nhrp_address_cleanup(void); +int nhrp_address_parse_packet(uint16_t protocol, size_t len, uint8_t *packet, + struct nhrp_address *src, + struct nhrp_address *dst); +int nhrp_address_parse(const char *string, struct nhrp_address *addr, + uint8_t *prefix_len); +void nhrp_address_resolve(struct nhrp_address_query *query, + const char *hostname, + nhrp_address_query_callback callback); +void nhrp_address_resolve_cancel(struct nhrp_address_query *query); +void nhrp_address_set_type(struct nhrp_address *addr, uint16_t type); +int nhrp_address_set(struct nhrp_address *addr, uint16_t type, + uint8_t len, uint8_t *bytes); +int nhrp_address_set_full(struct nhrp_address *addr, uint16_t type, + uint8_t len, uint8_t *bytes, + uint8_t sublen, uint8_t *subbytes); +int nhrp_address_cmp(const struct nhrp_address *a, const struct nhrp_address *b); +int nhrp_address_prefix_cmp(const struct nhrp_address *a, const struct nhrp_address *b, + int prefix); +unsigned int nhrp_address_hash(const struct nhrp_address *addr); +void nhrp_address_set_network(struct nhrp_address *addr, int prefix); +void nhrp_address_set_broadcast(struct nhrp_address *addr, int prefix); +int nhrp_address_is_network(const struct nhrp_address *addr, int prefix); +int nhrp_address_is_broadcast(const struct nhrp_address *addr, int prefix); +int nhrp_address_is_multicast(const struct nhrp_address *addr); +int nhrp_address_is_any_addr(const struct nhrp_address *addr); +const char *nhrp_address_format(const struct nhrp_address *addr, + size_t buflen, char *buffer); + +int nhrp_address_match_cie_list(struct nhrp_address *nbma_address, + struct nhrp_address *protocol_address, + struct list_head *cie_list); + +#endif diff --git a/nhrp/nhrp_common.h b/nhrp/nhrp_common.h new file mode 100644 index 0000000..6730e74 --- /dev/null +++ b/nhrp/nhrp_common.h @@ -0,0 +1,78 @@ +/* nhrp_common.h - Generic helper functions + * + * Copyright (C) 2007-2009 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#ifndef NHRP_COMMON_H +#define NHRP_COMMON_H + +#include +#include +#include +#include + +struct nhrp_interface; +struct nhrp_address; + +extern const char *nhrp_config_file, *nhrp_script_file; +extern int nhrp_running, nhrp_verbose; + +/* Logging */ +void nhrp_log(int level, const char *format, ...); + +#define NHRP_LOG_DEBUG 0 +#define NHRP_LOG_INFO 1 +#define NHRP_LOG_ERROR 2 + +#define nhrp_debug(...) \ + do { \ + if (nhrp_verbose) \ + nhrp_log(NHRP_LOG_DEBUG, __VA_ARGS__); \ + } while(0) + +#define nhrp_info(...) \ + nhrp_log(NHRP_LOG_INFO, __VA_ARGS__) + +#define nhrp_error(...) \ + nhrp_log(NHRP_LOG_ERROR, __VA_ARGS__) + +void nhrp_perror(const char *message); +void nhrp_hex_dump(const char *name, const uint8_t *buf, int bytes); + +#define NHRP_BUG_ON(cond) if (cond) { \ + nhrp_error("BUG: failure at %s:%d/%s(): %s!", \ + __FILE__, __LINE__, __func__, #cond); \ + abort(); \ +} + +/* Initializers for system dependant stuff */ +int forward_init(void); +void forward_cleanup(void); +int forward_local_addresses_changed(void); + +int kernel_init(void); +void kernel_stop_listening(void); +void kernel_cleanup(void); +int kernel_route(struct nhrp_interface *out_iface, + struct nhrp_address *dest, + struct nhrp_address *default_source, + struct nhrp_address *next_hop, + u_int16_t *mtu); +int kernel_send(uint8_t *packet, size_t bytes, struct nhrp_interface *out, + struct nhrp_address *to); +int kernel_inject_neighbor(struct nhrp_address *neighbor, + struct nhrp_address *hwaddr, + struct nhrp_interface *dev); + +int log_init(void); +int admin_init(const char *socket); +void server_init(void); + +#endif diff --git a/nhrp/nhrp_defines.h b/nhrp/nhrp_defines.h new file mode 100644 index 0000000..2812a13 --- /dev/null +++ b/nhrp/nhrp_defines.h @@ -0,0 +1,87 @@ +/* nhrp_defines.h - NHRP definitions + * + * Copyright (C) 2007 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#ifndef NHRP_DEFINES_H +#define NHRP_DEFINES_H + +#include +#include +#include +#include + +#ifndef NULL +#define NULL 0L +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef __bswap_constant_16 +#define __bswap_constant_16(x) \ + ((((x) >> 8) & 0xff) | (((x) & 0xff) << 8)) +#endif +#ifndef __bswap_constant_32 +#define __bswap_constant_32(x) \ + ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \ + (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24)) +#endif + +#if __BYTE_ORDER == __BIG_ENDIAN +#define constant_ntohl(x) (x) +#define constant_ntohs(x) (x) +#define constant_htonl(x) (x) +#define constant_htons(x) (x) +#else +#define constant_ntohl(x) __bswap_constant_32(x) +#define constant_ntohs(x) __bswap_constant_16(x) +#define constant_htonl(x) __bswap_constant_32(x) +#define constant_htons(x) __bswap_constant_16(x) +#endif + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) +#endif + +#ifndef offsetof +#ifdef __compiler_offsetof +#define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER) +#else +#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#endif +#endif + +#define BIT(x) (1 << (x)) + +#ifndef container_of +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) +#endif + +#if __GNUC__ >= 3 +#define NHRP_EMPTY_ARRAY +#else +#define NHRP_EMPTY_ARRAY 0 +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) +#define NHRP_NO_NBMA_GRE +#endif + +#define NHRP_DEFAULT_HOLDING_TIME (2 * 60 * 60) + +#endif diff --git a/nhrp/nhrp_interface.c b/nhrp/nhrp_interface.c new file mode 100644 index 0000000..32c2383 --- /dev/null +++ b/nhrp/nhrp_interface.c @@ -0,0 +1,188 @@ +/* nhrp_interface.c - NHRP configuration per interface + * + * Copyright (C) 2007 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "nhrp_common.h" +#include "nhrp_interface.h" +#include "nhrp_address.h" + +#define NHRP_INDEX_HASH_SIZE (1 << 6) + +static struct list_head name_list = LIST_INITIALIZER(name_list); +static struct hlist_head index_hash[NHRP_INDEX_HASH_SIZE]; + +static char *env(const char *key, const char *value) +{ + char *buf; + buf = malloc(strlen(key)+strlen(value)+2); + if (buf == NULL) + return NULL; + sprintf(buf, "%s=%s", key, value); + return buf; +} + +static char *envu32(const char *key, uint32_t value) +{ + char *buf; + buf = malloc(strlen(key)+16); + if (buf == NULL) + return NULL; + sprintf(buf, "%s=%u", key, value); + return buf; +} + +void nhrp_interface_cleanup(void) +{ + struct nhrp_interface *iface, *n; + + list_for_each_entry_safe(iface, n, &name_list, name_list_entry) { + list_del(&iface->name_list_entry); + hlist_del(&iface->index_list_entry); + free(iface); + } +} + +void nhrp_interface_hash(struct nhrp_interface *iface) +{ + int iidx = iface->index & (NHRP_INDEX_HASH_SIZE - 1); + + list_del(&iface->name_list_entry); + list_add(&iface->name_list_entry, &name_list); + + hlist_del(&iface->index_list_entry); + hlist_add_head(&iface->index_list_entry, &index_hash[iidx]); +} + +int nhrp_interface_foreach(nhrp_interface_enumerator enumerator, void *ctx) +{ + struct nhrp_interface *iface; + int rc; + + list_for_each_entry(iface, &name_list, name_list_entry) { + rc = enumerator(ctx, iface); + if (rc != 0) + return rc; + } + return 0; +} + +struct nhrp_interface *nhrp_interface_get_by_name(const char *name, int create) +{ + struct nhrp_interface *iface; + + list_for_each_entry(iface, &name_list, name_list_entry) { + if (strcmp(iface->name, name) == 0) + return iface; + } + + if (!create) + return NULL; + + iface = calloc(1, sizeof(struct nhrp_interface)); + iface->holding_time = NHRP_DEFAULT_HOLDING_TIME; + iface->route_table = RT_TABLE_MAIN; + strncpy(iface->name, name, sizeof(iface->name)); + + list_init(&iface->peer_list); + list_init(&iface->mcast_list); + list_add(&iface->name_list_entry, &name_list); + hlist_add_head(&iface->index_list_entry, &index_hash[0]); + + return iface; +} + +struct nhrp_interface *nhrp_interface_get_by_index(unsigned int index, int create) +{ + struct nhrp_interface *iface; + struct hlist_node *n; + int iidx = index & (NHRP_INDEX_HASH_SIZE - 1); + + hlist_for_each_entry(iface, n, &index_hash[iidx], index_list_entry) { + if (iface->index == index) + return iface; + } + + return NULL; +} + +struct nhrp_interface *nhrp_interface_get_by_nbma(struct nhrp_address *addr) +{ + struct nhrp_interface *iface; + + list_for_each_entry(iface, &name_list, name_list_entry) { + if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED)) + continue; + + if (nhrp_address_cmp(addr, &iface->nbma_address) == 0) + return iface; + + if (iface->nbma_address.type == PF_UNSPEC && !iface->link_index) + return iface; + } + + return NULL; +} + +struct nhrp_interface *nhrp_interface_get_by_protocol(struct nhrp_address *addr) +{ + struct nhrp_interface *iface; + + list_for_each_entry(iface, &name_list, name_list_entry) { + if (nhrp_address_cmp(addr, &iface->protocol_address) == 0) + return iface; + } + + return NULL; +} + +int nhrp_interface_run_script(struct nhrp_interface *iface, char *action) +{ + const char *argv[] = { nhrp_script_file, action, NULL }; + char *envp[6]; + pid_t pid; + int i = 0; + + pid = fork(); + if (pid == -1) + return FALSE; + if (pid > 0) + return TRUE; + + envp[i++] = "NHRP_TYPE=INTERFACE"; + envp[i++] = env("NHRP_INTERFACE", iface->name); + envp[i++] = envu32("NHRP_GRE_KEY", iface->gre_key); + envp[i++] = NULL; + + execve(nhrp_script_file, (char **) argv, envp); + exit(1); +} + +struct nhrp_peer *nhrp_interface_find_peer(struct nhrp_interface *iface, + const struct nhrp_address *nbma) +{ + unsigned int key = nhrp_address_hash(nbma) % NHRP_INTERFACE_NBMA_HASH_SIZE; + struct nhrp_peer *peer; + struct hlist_node *n; + + hlist_for_each_entry(peer, n, &iface->nbma_hash[key], nbma_hash_entry) { + if (nhrp_address_cmp(nbma, &peer->next_hop_address) == 0) + return peer; + } + return NULL; +} diff --git a/nhrp/nhrp_interface.h b/nhrp/nhrp_interface.h new file mode 100644 index 0000000..8e3e8df --- /dev/null +++ b/nhrp/nhrp_interface.h @@ -0,0 +1,78 @@ +/* nhrp_interface.h - NHRP configuration per interface definitions + * + * Copyright (C) 2007 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#ifndef NHRP_INTERFACE_H +#define NHRP_INTERFACE_H + +#include "nhrp_packet.h" +#include "nhrp_peer.h" + +#define NHRP_INTERFACE_FLAG_NON_CACHING 0x0001 /* Do not cache entries */ +#define NHRP_INTERFACE_FLAG_SHORTCUT 0x0002 /* Create shortcut routes */ +#define NHRP_INTERFACE_FLAG_REDIRECT 0x0004 /* Send redirects */ +#define NHRP_INTERFACE_FLAG_SHORTCUT_DEST 0x0008 /* Advertise routes */ +#define NHRP_INTERFACE_FLAG_CONFIGURED 0x0010 /* Found in config file */ + +#define NHRP_INTERFACE_NBMA_HASH_SIZE 256 + +struct nhrp_interface { + struct list_head name_list_entry; + struct hlist_node index_list_entry; + + /* Configured information */ + char name[16]; + unsigned int flags; + unsigned int holding_time; + struct nhrp_buffer *auth_token; + unsigned int route_table; + + /* Cached from kernel interface */ + unsigned int index, link_index; + uint32_t gre_key; + uint16_t afnum; + uint16_t mtu, nbma_mtu; + struct nhrp_address nbma_address; + struct nhrp_cie nat_cie; + + /* Actually, we should have list of protocol addresses; + * we might have multiple address and multiple protocol types */ + struct nhrp_address protocol_address; + int protocol_address_prefix; + + /* Peer cache is interface specific */ + struct list_head peer_list; + struct hlist_head nbma_hash[NHRP_INTERFACE_NBMA_HASH_SIZE]; + + /* Multicast related stuff */ + struct list_head mcast_list; + int mcast_mask; + int mcast_numaddr; + struct nhrp_address *mcast_addr; +}; + +typedef int (*nhrp_interface_enumerator)(void *ctx, struct nhrp_interface *iface); + +void nhrp_interface_cleanup(void); +void nhrp_interface_hash(struct nhrp_interface *iface); +int nhrp_interface_foreach(nhrp_interface_enumerator enumerator, void *ctx); +struct nhrp_interface *nhrp_interface_get_by_name(const char *name, int create); +struct nhrp_interface *nhrp_interface_get_by_index(unsigned int index, int create); +struct nhrp_interface *nhrp_interface_get_by_nbma(struct nhrp_address *addr); +struct nhrp_interface *nhrp_interface_get_by_protocol(struct nhrp_address *addr); +int nhrp_interface_run_script(struct nhrp_interface *iface, char *action); +struct nhrp_peer *nhrp_interface_find_peer(struct nhrp_interface *iface, const struct nhrp_address *nbma); + +void nhrp_interface_resolve_nbma(struct nhrp_interface *iface, + struct nhrp_address *nbmadest, + struct nhrp_address *nbma); + +#endif diff --git a/nhrp/nhrp_packet.c b/nhrp/nhrp_packet.c new file mode 100644 index 0000000..f46b481 --- /dev/null +++ b/nhrp/nhrp_packet.c @@ -0,0 +1,1331 @@ +/* nhrp_packet.c - NHRP packet marshalling and tranceiving + * + * Copyright (C) 2007-2009 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include +#include +#include +#include +#include + +#include "libev.h" +#include "nhrp_common.h" +#include "nhrp_packet.h" +#include "nhrp_peer.h" +#include "nhrp_interface.h" + +#define PACKET_RETRIES 6 +#define PACKET_RETRY_INTERVAL 5.0 + +#define RATE_LIMIT_HASH_SIZE 256 +#define RATE_LIMIT_MAX_TOKENS 4 +#define RATE_LIMIT_SEND_INTERVAL 5.0 +#define RATE_LIMIT_SILENCE 360.0 +#define RATE_LIMIT_PURGE_INTERVAL 600.0 + +#define MAX_PDU_SIZE 1500 + +struct nhrp_rate_limit { + struct hlist_node hash_entry; + struct nhrp_address src; + struct nhrp_address dst; + ev_tstamp rate_last; + int rate_tokens; +}; + +static uint32_t request_id = 0; +static struct list_head pending_requests = LIST_INITIALIZER(pending_requests); +static struct hlist_head rate_limit_hash[RATE_LIMIT_HASH_SIZE]; +static ev_timer rate_limit_timer; +static int num_rate_limit_entries = 0; + +static void nhrp_packet_xmit_timeout_cb(struct ev_timer *w, int revents); +static int unmarshall_packet_header(uint8_t **pdu, size_t *pdusize, + struct nhrp_packet *packet); + +static void nhrp_rate_limit_delete(struct nhrp_rate_limit *rl) +{ + hlist_del(&rl->hash_entry); + free(rl); + num_rate_limit_entries--; +} + +int nhrp_rate_limit_clear(struct nhrp_address *a, int pref) +{ + struct nhrp_rate_limit *rl; + struct hlist_node *n, *c; + int i, ret = 0; + + for (i = 0; i < RATE_LIMIT_HASH_SIZE; i++) { + hlist_for_each_entry_safe(rl, c, n, &rate_limit_hash[i], + hash_entry) { + if (a->type == AF_UNSPEC || + nhrp_address_prefix_cmp(a, &rl->src, pref) == 0 || + nhrp_address_prefix_cmp(a, &rl->dst, pref) == 0) { + nhrp_rate_limit_delete(rl); + ret++; + } + } + } + + if (num_rate_limit_entries == 0) + ev_timer_stop(&rate_limit_timer); + + return ret; +} + +static void prune_rate_limit_entries_cb(struct ev_timer *w, int revents) +{ + struct nhrp_rate_limit *rl; + struct hlist_node *c, *n; + int i; + + for (i = 0; i < RATE_LIMIT_HASH_SIZE; i++) { + hlist_for_each_entry_safe(rl, c, n, &rate_limit_hash[i], + hash_entry) { + + if (ev_now() > rl->rate_last + 2 * RATE_LIMIT_SILENCE) + nhrp_rate_limit_delete(rl); + } + } + + if (num_rate_limit_entries == 0) + ev_timer_stop(&rate_limit_timer); +} + +static struct nhrp_rate_limit *get_rate_limit(struct nhrp_address *src, + struct nhrp_address *dst) +{ + unsigned int key; + struct nhrp_rate_limit *e; + struct hlist_node *n; + + key = nhrp_address_hash(src) ^ nhrp_address_hash(dst); + key %= RATE_LIMIT_HASH_SIZE; + + hlist_for_each_entry(e, n, &rate_limit_hash[key], hash_entry) { + if (nhrp_address_cmp(&e->src, src) == 0 && + nhrp_address_cmp(&e->dst, dst) == 0) + return e; + } + + e = calloc(1, sizeof(struct nhrp_rate_limit)); + e->src = *src; + e->dst = *dst; + hlist_add_head(&e->hash_entry, &rate_limit_hash[key]); + + if (num_rate_limit_entries == 0) { + ev_timer_init(&rate_limit_timer, prune_rate_limit_entries_cb, + RATE_LIMIT_PURGE_INTERVAL, + RATE_LIMIT_PURGE_INTERVAL); + ev_timer_start(&rate_limit_timer); + } + + num_rate_limit_entries++; + + return e; +} + +static uint16_t nhrp_calculate_checksum(uint8_t *pdu, uint16_t len) +{ + uint16_t *pdu16 = (uint16_t *) pdu; + uint32_t csum = 0; + int i; + + for (i = 0; i < len / 2; i++) + csum += pdu16[i]; + if (len & 1) + csum += htons(pdu[len - 1]); + + while (csum & 0xffff0000) + csum = (csum & 0xffff) + (csum >> 16); + + return (~csum) & 0xffff; +} + +struct nhrp_buffer *nhrp_buffer_alloc(uint32_t size) +{ + struct nhrp_buffer *buf; + + buf = malloc(sizeof(struct nhrp_buffer) + size); + buf->length = size; + + return buf; +} + +struct nhrp_buffer *nhrp_buffer_copy(struct nhrp_buffer *buffer) +{ + struct nhrp_buffer *copy; + + copy = nhrp_buffer_alloc(buffer->length); + memcpy(copy->data, buffer->data, buffer->length); + return copy; +} + +int nhrp_buffer_cmp(struct nhrp_buffer *a, struct nhrp_buffer *b) +{ + if (a->length > b->length) + return 1; + if (a->length < b->length) + return -1; + return memcmp(a->data, b->data, a->length); +} + +void nhrp_buffer_free(struct nhrp_buffer *buffer) +{ + free(buffer); +} + +struct nhrp_cie *nhrp_cie_alloc(void) +{ + return calloc(1, sizeof(struct nhrp_cie)); +} + +void nhrp_cie_free(struct nhrp_cie *cie) +{ + free(cie); +} + +void nhrp_cie_reset(struct nhrp_cie *cie) +{ + memset(&cie->cie_list_entry, 0, sizeof(cie->cie_list_entry)); +} + +void nhrp_payload_free(struct nhrp_payload *payload) +{ + struct nhrp_cie *cie, *n; + + switch (payload->payload_type) { + case NHRP_PAYLOAD_TYPE_RAW: + nhrp_buffer_free(payload->u.raw); + break; + case NHRP_PAYLOAD_TYPE_CIE_LIST: + list_for_each_entry_safe(cie, n, &payload->u.cie_list, cie_list_entry) { + list_del(&cie->cie_list_entry); + nhrp_cie_free(cie); + } + break; + } + payload->payload_type = NHRP_PAYLOAD_TYPE_NONE; +} + +void nhrp_payload_set_type(struct nhrp_payload *payload, int type) +{ + if (payload->payload_type == type) + return; + + nhrp_payload_free(payload); + payload->payload_type = type; + switch (type) { + case NHRP_PAYLOAD_TYPE_CIE_LIST: + list_init(&payload->u.cie_list); + break; + default: + payload->u.raw = NULL; + break; + } +} + +void nhrp_payload_set_raw(struct nhrp_payload *payload, struct nhrp_buffer *raw) +{ + nhrp_payload_set_type(payload, NHRP_PAYLOAD_TYPE_RAW); + payload->u.raw = raw; +} + +void nhrp_payload_add_cie(struct nhrp_payload *payload, struct nhrp_cie *cie) +{ + if (payload->payload_type != NHRP_PAYLOAD_TYPE_CIE_LIST) { + nhrp_cie_free(cie); + nhrp_info("Trying to add CIE payload to non-CIE payload %d\n", + payload->payload_type); + return; + } + + list_add_tail(&cie->cie_list_entry, &payload->u.cie_list); +} + +struct nhrp_cie *nhrp_payload_get_cie(struct nhrp_payload *payload, int index) +{ + struct nhrp_cie *cie; + + if (payload->payload_type != NHRP_PAYLOAD_TYPE_CIE_LIST) + return NULL; + + list_for_each_entry(cie, &payload->u.cie_list, cie_list_entry) { + index--; + if (index == 0) + return cie; + } + + return NULL; +} + +struct nhrp_packet *nhrp_packet_alloc(void) +{ + struct nhrp_packet *packet; + packet = calloc(1, sizeof(struct nhrp_packet)); + packet->ref = 1; + packet->hdr.hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT; + list_init(&packet->request_list_entry); + ev_timer_init(&packet->timeout, nhrp_packet_xmit_timeout_cb, + PACKET_RETRY_INTERVAL, PACKET_RETRY_INTERVAL); + return packet; +} + +struct nhrp_packet *nhrp_packet_get(struct nhrp_packet *packet) +{ + packet->ref++; + return packet; +} + +struct nhrp_payload *nhrp_packet_payload(struct nhrp_packet *packet, int payload_type) +{ + return nhrp_packet_extension(packet, NHRP_EXTENSION_PAYLOAD, payload_type); +} + +struct nhrp_payload *nhrp_packet_extension(struct nhrp_packet *packet, + uint32_t extension, int payload_type) +{ + struct nhrp_payload *p; + + p = packet->extension_by_type[extension & 0x7fff]; + if (p != NULL) { + if (payload_type == NHRP_PAYLOAD_TYPE_ANY || + payload_type == p->payload_type) + return p; + if (extension & NHRP_EXTENSION_FLAG_NOCREATE) + return NULL; + nhrp_payload_set_type(p, payload_type); + return p; + } + + if (extension & NHRP_EXTENSION_FLAG_NOCREATE) + return NULL; + + p = &packet->extension_by_order[packet->num_extensions++]; + p->extension_type = extension & 0xffff; + packet->extension_by_type[extension & 0x7fff] = p; + if (payload_type != NHRP_PAYLOAD_TYPE_ANY) + nhrp_payload_set_type(p, payload_type); + + return p; +} + +static void nhrp_packet_release(struct nhrp_packet *packet) +{ + int i; + + if (packet->dst_peer != NULL) + nhrp_peer_put(packet->dst_peer); + for (i = 0; i < packet->num_extensions; i++) + nhrp_payload_free(&packet->extension_by_order[i]); + free(packet); +} + +void nhrp_packet_put(struct nhrp_packet *packet) +{ + NHRP_BUG_ON(packet->ref == 0); + + packet->ref--; + if (packet->ref == 0) + nhrp_packet_release(packet); +} + +int nhrp_packet_reroute(struct nhrp_packet *packet, struct nhrp_peer *dst_peer) +{ + packet->dst_iface = packet->src_iface; + if (packet->dst_peer != NULL) + nhrp_peer_put(packet->dst_peer); + packet->dst_peer = nhrp_peer_get(dst_peer); + return nhrp_packet_route(packet); +} + +static void nhrp_packet_dequeue(struct nhrp_packet *packet) +{ + ev_timer_stop(&packet->timeout); + if (list_hashed(&packet->request_list_entry)) + list_del(&packet->request_list_entry); + nhrp_packet_put(packet); +} + +static int nhrp_do_handle_error_indication(struct nhrp_packet *error_pkt, + struct nhrp_packet *orig_pkt) +{ + struct nhrp_packet *req; + + list_for_each_entry(req, &pending_requests, request_list_entry) { + if (orig_pkt->hdr.u.request_id != req->hdr.u.request_id) + continue; + + if (nhrp_address_cmp(&orig_pkt->src_nbma_address, + &req->src_nbma_address)) + continue; + if (nhrp_address_cmp(&orig_pkt->src_protocol_address, + &req->src_protocol_address)) + continue; + + if (req->handler != NULL) + req->handler(req->handler_ctx, error_pkt); + nhrp_packet_dequeue(req); + + return TRUE; + } + + return FALSE; +} + +static int nhrp_handle_error_indication(struct nhrp_packet *error_packet) +{ + struct nhrp_packet *packet; + struct nhrp_payload *payload; + uint8_t *pdu; + size_t pduleft; + int r; + + packet = nhrp_packet_alloc(); + if (packet == NULL) + return FALSE; + + payload = nhrp_packet_payload(error_packet, NHRP_PAYLOAD_TYPE_RAW); + pdu = payload->u.raw->data; + pduleft = payload->u.raw->length; + + if (!unmarshall_packet_header(&pdu, &pduleft, packet)) { + nhrp_packet_put(packet); + return FALSE; + } + + r = nhrp_do_handle_error_indication(error_packet, packet); + nhrp_packet_put(packet); + + return r; +} + +#define NHRP_TYPE_REQUEST 0 +#define NHRP_TYPE_REPLY 1 +#define NHRP_TYPE_INDICATION 2 + +static struct { + int type; + uint16_t payload_type; + int (*handler)(struct nhrp_packet *packet); +} packet_types[] = { + [NHRP_PACKET_RESOLUTION_REQUEST] = { + .type = NHRP_TYPE_REQUEST, + .payload_type = NHRP_PAYLOAD_TYPE_CIE_LIST, + }, + [NHRP_PACKET_RESOLUTION_REPLY] = { + .type = NHRP_TYPE_REPLY, + .payload_type = NHRP_PAYLOAD_TYPE_CIE_LIST, + }, + [NHRP_PACKET_REGISTRATION_REQUEST] = { + .type = NHRP_TYPE_REQUEST, + .payload_type = NHRP_PAYLOAD_TYPE_CIE_LIST, + }, + [NHRP_PACKET_REGISTRATION_REPLY] = { + .type = NHRP_TYPE_REPLY, + .payload_type = NHRP_PAYLOAD_TYPE_CIE_LIST, + }, + [NHRP_PACKET_PURGE_REQUEST] = { + .type = NHRP_TYPE_REQUEST, + .payload_type = NHRP_PAYLOAD_TYPE_CIE_LIST, + }, + [NHRP_PACKET_PURGE_REPLY] = { + .type = NHRP_TYPE_REPLY, + .payload_type = NHRP_PAYLOAD_TYPE_CIE_LIST, + }, + [NHRP_PACKET_ERROR_INDICATION] = { + .type = NHRP_TYPE_INDICATION, + .payload_type = NHRP_PAYLOAD_TYPE_RAW, + .handler = nhrp_handle_error_indication, + }, + [NHRP_PACKET_TRAFFIC_INDICATION] = { + .type = NHRP_TYPE_INDICATION, + .payload_type = NHRP_PAYLOAD_TYPE_RAW, + } +}; +static int extension_types[] = { + [NHRP_EXTENSION_RESPONDER_ADDRESS] = NHRP_PAYLOAD_TYPE_CIE_LIST, + [NHRP_EXTENSION_FORWARD_TRANSIT_NHS] = NHRP_PAYLOAD_TYPE_CIE_LIST, + [NHRP_EXTENSION_REVERSE_TRANSIT_NHS] = NHRP_PAYLOAD_TYPE_CIE_LIST, + [NHRP_EXTENSION_NAT_ADDRESS] = NHRP_PAYLOAD_TYPE_CIE_LIST +}; + +static int unmarshall_binary(uint8_t **pdu, size_t *pduleft, size_t size, void *raw) +{ + if (*pduleft < size) + return FALSE; + + memcpy(raw, *pdu, size); + *pdu += size; + *pduleft -= size; + return TRUE; +} + +static inline int unmarshall_protocol_address(uint8_t **pdu, size_t *pduleft, struct nhrp_address *pa) +{ + if (*pduleft < pa->addr_len) + return FALSE; + + if (pa->addr_len) { + if (!nhrp_address_set(pa, pa->type, pa->addr_len, *pdu)) + return FALSE; + } else { + nhrp_address_set_type(pa, PF_UNSPEC); + } + + *pdu += pa->addr_len; + *pduleft -= pa->addr_len; + return TRUE; +} + +static inline int unmarshall_nbma_address(uint8_t **pdu, size_t *pduleft, struct nhrp_address *na) +{ + if (*pduleft < na->addr_len + na->subaddr_len) + return FALSE; + + if (na->addr_len || na->subaddr_len) { + if (!nhrp_address_set_full(na, na->type, + na->addr_len, *pdu, + na->subaddr_len, *pdu + na->addr_len)) + return FALSE; + } else { + nhrp_address_set_type(na, PF_UNSPEC); + } + + *pdu += na->addr_len + na->subaddr_len; + *pduleft -= na->addr_len + na->subaddr_len; + return TRUE; +} + +static int unmarshall_cie(uint8_t **pdu, size_t *pduleft, struct nhrp_packet *p, struct nhrp_cie *cie) +{ + if (!unmarshall_binary(pdu, pduleft, sizeof(struct nhrp_cie_header), &cie->hdr)) + return FALSE; + + cie->nbma_address.type = nhrp_pf_from_afnum(p->hdr.afnum); + cie->nbma_address.addr_len = cie->hdr.nbma_address_len; + cie->nbma_address.subaddr_len = cie->hdr.nbma_subaddress_len; + cie->protocol_address.type = nhrp_pf_from_protocol(p->hdr.protocol_type); + cie->protocol_address.addr_len = cie->hdr.protocol_address_len; + + if (!unmarshall_nbma_address(pdu, pduleft, &cie->nbma_address)) + return FALSE; + return unmarshall_protocol_address(pdu, pduleft, &cie->protocol_address); +} + +static int unmarshall_payload(uint8_t **pdu, size_t *pduleft, + struct nhrp_packet *packet, + int type, size_t size, + struct nhrp_payload *p) +{ + struct nhrp_cie *cie; + size_t cieleft; + + if (*pduleft < size) + return FALSE; + + nhrp_payload_set_type(p, type); + switch (p->payload_type) { + case NHRP_PAYLOAD_TYPE_NONE: + *pdu += size; + *pduleft -= size; + return TRUE; + case NHRP_PAYLOAD_TYPE_RAW: + p->u.raw = nhrp_buffer_alloc(size); + return unmarshall_binary(pdu, pduleft, size, p->u.raw->data); + case NHRP_PAYLOAD_TYPE_CIE_LIST: + cieleft = size; + while (cieleft) { + cie = nhrp_cie_alloc(); + list_add_tail(&cie->cie_list_entry, &p->u.cie_list); + if (!unmarshall_cie(pdu, &cieleft, packet, cie)) + return FALSE; + } + *pduleft -= size; + return TRUE; + default: + return FALSE; + } +} + +static int unmarshall_packet_header(uint8_t **pdu, size_t *pduleft, struct nhrp_packet *packet) +{ + struct nhrp_packet_header *phdr = (struct nhrp_packet_header *) *pdu; + + if (!unmarshall_binary(pdu, pduleft, sizeof(packet->hdr), &packet->hdr)) + return FALSE; + + if (packet->hdr.type >= ARRAY_SIZE(packet_types)) + return FALSE; + + packet->src_nbma_address.type = nhrp_pf_from_afnum(packet->hdr.afnum); + packet->src_nbma_address.addr_len = phdr->src_nbma_address_len; + packet->src_nbma_address.subaddr_len = phdr->src_nbma_subaddress_len; + packet->src_protocol_address.type = nhrp_pf_from_protocol(packet->hdr.protocol_type); + packet->src_protocol_address.addr_len = phdr->src_protocol_address_len; + packet->dst_protocol_address.type = nhrp_pf_from_protocol(packet->hdr.protocol_type); + packet->dst_protocol_address.addr_len = phdr->dst_protocol_address_len; + + if (!unmarshall_nbma_address(pdu, pduleft, &packet->src_nbma_address)) + return FALSE; + if (!unmarshall_protocol_address(pdu, pduleft, &packet->src_protocol_address)) + return FALSE; + return unmarshall_protocol_address(pdu, pduleft, &packet->dst_protocol_address); +} + +static int unmarshall_packet(uint8_t *pdu, size_t pdusize, struct nhrp_packet *packet) +{ + size_t pduleft = pdusize; + uint8_t *pos = pdu; + int size, extension_offset; + + if (!unmarshall_packet_header(&pos, &pduleft, packet)) + return FALSE; + + extension_offset = ntohs(packet->hdr.extension_offset); + if (extension_offset == 0) { + /* No extensions; rest of data is payload */ + size = pduleft; + } else { + /* Extensions present; exclude those from payload */ + size = extension_offset - (pos - pdu); + if (size < 0 || size > pduleft) { + nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ERROR, pos - pdu); + return FALSE; + } + } + + if (!unmarshall_payload(&pos, &pduleft, packet, + packet_types[packet->hdr.type].payload_type, + size, nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_ANY))) { + nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ERROR, pos - pdu); + return FALSE; + } + + if (extension_offset == 0) + return TRUE; + + pos = &pdu[extension_offset]; + pduleft = pdusize - extension_offset; + do { + struct nhrp_extension_header eh; + int extension_type, payload_type; + + if (!unmarshall_binary(&pos, &pduleft, sizeof(eh), &eh)) { + nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ERROR, pos - pdu); + return FALSE; + } + + extension_type = ntohs(eh.type) & ~NHRP_EXTENSION_FLAG_COMPULSORY; + if (extension_type == NHRP_EXTENSION_END) + break; + + payload_type = NHRP_PAYLOAD_TYPE_NONE; + if (extension_type < ARRAY_SIZE(extension_types)) + payload_type = extension_types[extension_type]; + if (payload_type == NHRP_PAYLOAD_TYPE_NONE) + payload_type = NHRP_PAYLOAD_TYPE_RAW; + if (payload_type == NHRP_PAYLOAD_TYPE_RAW && + ntohs(eh.length) == 0) + payload_type = NHRP_PAYLOAD_TYPE_NONE; + + if (!unmarshall_payload(&pos, &pduleft, packet, + payload_type, ntohs(eh.length), + nhrp_packet_extension(packet, ntohs(eh.type), NHRP_PAYLOAD_TYPE_ANY))) { + nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ERROR, pos - pdu); + return FALSE; + } + } while (1); + + return TRUE; +} + +static int nhrp_packet_forward(struct nhrp_packet *packet) +{ + char tmp[64], tmp2[64], tmp3[64]; + struct nhrp_payload *p = NULL; + + nhrp_info("Forwarding packet from nbma src %s, proto src %s to proto dst %s, hop count %d", + nhrp_address_format(&packet->src_nbma_address, + sizeof(tmp), tmp), + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp2), tmp2), + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp3), tmp3), + packet->hdr.hop_count); + + if (packet->hdr.hop_count == 0) { + nhrp_packet_send_error(packet, NHRP_ERROR_HOP_COUNT_EXCEEDED, 0); + return TRUE; + } + packet->hdr.hop_count--; + + if (!nhrp_packet_reroute(packet, NULL)) { + nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ADDRESS_UNREACHABLE, 0); + return FALSE; + } + + switch (packet_types[packet->hdr.type].type) { + case NHRP_TYPE_REQUEST: + case NHRP_TYPE_INDICATION: + p = nhrp_packet_extension(packet, + NHRP_EXTENSION_FORWARD_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_CIE_LIST); + break; + case NHRP_TYPE_REPLY: + p = nhrp_packet_extension(packet, + NHRP_EXTENSION_REVERSE_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_CIE_LIST); + break; + } + if (p != NULL) { + struct nhrp_cie *cie; + + if (nhrp_address_match_cie_list(&packet->dst_peer->my_nbma_address, + &packet->dst_iface->protocol_address, + &p->u.cie_list)) { + nhrp_packet_send_error(packet, NHRP_ERROR_LOOP_DETECTED, 0); + return FALSE; + } + + cie = nhrp_cie_alloc(); + if (cie != NULL) { + cie->hdr = (struct nhrp_cie_header) { + .code = NHRP_CODE_SUCCESS, + .holding_time = htons(packet->dst_iface->holding_time), + }; + cie->nbma_address = packet->dst_peer->my_nbma_address; + cie->protocol_address = packet->dst_iface->protocol_address; + nhrp_payload_add_cie(p, cie); + } + } + + return nhrp_packet_route_and_send(packet); +} + +static int nhrp_packet_receive_local(struct nhrp_packet *packet) +{ + struct nhrp_packet *req; + char tmp[64], tmp2[64], tmp3[64]; + + if (packet_types[packet->hdr.type].type == NHRP_TYPE_REPLY) { + list_for_each_entry(req, &pending_requests, request_list_entry) { + if (packet->hdr.u.request_id != req->hdr.u.request_id) + continue; + if (nhrp_address_cmp(&packet->src_nbma_address, + &req->src_nbma_address)) + continue; + if (nhrp_address_cmp(&packet->src_protocol_address, + &req->src_protocol_address)) + continue; + + if (req->handler != NULL) + req->handler(req->handler_ctx, packet); + nhrp_packet_dequeue(req); + + return TRUE; + } + + /* Reply to unsent request? */ + nhrp_info("Packet type %d from nbma src %s, proto src %s, " + "proto dst %s dropped: no matching request", + packet->hdr.type, + nhrp_address_format(&packet->src_nbma_address, + sizeof(tmp), tmp), + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp2), tmp2), + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp3), tmp3)); + + nhrp_packet_send_error( + packet, NHRP_ERROR_INVALID_RESOLUTION_REPLY, 0); + return TRUE; + } + + if (packet_types[packet->hdr.type].handler == NULL) { + nhrp_info("Packet type %d from nbma src %s, proto src %s, " + "proto dst %s not supported", + packet->hdr.type, + nhrp_address_format(&packet->src_nbma_address, + sizeof(tmp), tmp), + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp2), tmp2), + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp3), tmp3)); + return FALSE; + } + + if (packet->dst_peer->next_hop_address.type != PF_UNSPEC) { + /* Broadcast destinations gets rewritten as if destinied to + * our local address */ + packet->dst_protocol_address = + packet->dst_peer->next_hop_address; + } + + return packet_types[packet->hdr.type].handler(packet); +} + +int nhrp_packet_receive(uint8_t *pdu, size_t pdulen, + struct nhrp_interface *iface, + struct nhrp_address *from) +{ + char tmp[64]; + struct nhrp_packet *packet; + struct nhrp_address *dest; + struct nhrp_peer *peer; + int ret = FALSE; + + if (nhrp_calculate_checksum(pdu, pdulen) != 0) { + nhrp_error("Bad checksum in packet from %s", + nhrp_address_format(from, sizeof(tmp), tmp)); + return FALSE; + } + + packet = nhrp_packet_alloc(); + if (packet == NULL) + return FALSE; + + if (!unmarshall_packet(pdu, pdulen, packet)) { + nhrp_error("Failed to unmarshall packet from %s", + nhrp_address_format(from, sizeof(tmp), tmp)); + goto error; + } + + packet->req_pdu = pdu; + packet->req_pdulen = pdulen; + + if (packet_types[packet->hdr.type].type == NHRP_TYPE_REPLY) + dest = &packet->src_protocol_address; + else + dest = &packet->dst_protocol_address; + + peer = nhrp_peer_route(iface, dest, 0, BIT(NHRP_PEER_TYPE_LOCAL_ADDR)); + packet->src_linklayer_address = *from; + packet->src_iface = iface; + packet->dst_peer = nhrp_peer_get(peer); + + /* RFC2332 5.3.4 - Authentication is always done pairwise on an NHRP + * hop-by-hop basis; i.e. regenerated at each hop. */ + if (packet->src_iface->auth_token && + (packet->hdr.type != NHRP_PACKET_ERROR_INDICATION || + packet->hdr.u.error.code != NHRP_ERROR_AUTHENTICATION_FAILURE)) { + struct nhrp_payload *p; + p = nhrp_packet_extension(packet, + NHRP_EXTENSION_AUTHENTICATION | + NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_RAW); + if (p == NULL || + nhrp_buffer_cmp(packet->src_iface->auth_token, p->u.raw) != 0) { + nhrp_error("Dropping packet from %s with bad authentication", + nhrp_address_format(from, sizeof(tmp), tmp)); + nhrp_packet_send_error(packet, NHRP_ERROR_AUTHENTICATION_FAILURE, 0); + goto error; + } + } + + if (peer != NULL && + peer->type == NHRP_PEER_TYPE_LOCAL_ADDR) + ret = nhrp_packet_receive_local(packet); + else + ret = nhrp_packet_forward(packet); + + packet->req_pdu = NULL; + packet->req_pdulen = 0; + +error: + nhrp_packet_put(packet); + return ret; +} + +static int marshall_binary(uint8_t **pdu, size_t *pduleft, size_t size, void *raw) +{ + if (*pduleft < size) + return FALSE; + + memcpy(*pdu, raw, size); + *pdu += size; + *pduleft -= size; + + return TRUE; +} + +static inline int marshall_protocol_address(uint8_t **pdu, size_t *pduleft, struct nhrp_address *pa) +{ + if (pa->subaddr_len != 0) + return FALSE; + return marshall_binary(pdu, pduleft, pa->addr_len, pa->addr); +} + +static inline int marshall_nbma_address(uint8_t **pdu, size_t *pduleft, struct nhrp_address *na) +{ + return marshall_binary(pdu, pduleft, na->addr_len + na->subaddr_len, na->addr); +} + +static int marshall_cie(uint8_t **pdu, size_t *pduleft, struct nhrp_cie *cie) +{ + cie->hdr.nbma_address_len = cie->nbma_address.addr_len; + cie->hdr.nbma_subaddress_len = cie->nbma_address.subaddr_len; + cie->hdr.protocol_address_len = cie->protocol_address.addr_len; + + if (!marshall_binary(pdu, pduleft, sizeof(struct nhrp_cie_header), &cie->hdr)) + return FALSE; + if (!marshall_nbma_address(pdu, pduleft, &cie->nbma_address)) + return FALSE; + return marshall_protocol_address(pdu, pduleft, &cie->protocol_address); +} + +static int marshall_payload(uint8_t **pdu, size_t *pduleft, struct nhrp_payload *p) +{ + struct nhrp_cie *cie; + + switch (p->payload_type) { + case NHRP_PAYLOAD_TYPE_NONE: + return TRUE; + case NHRP_PAYLOAD_TYPE_RAW: + if (p->u.raw->length == 0) + return TRUE; + return marshall_binary(pdu, pduleft, p->u.raw->length, p->u.raw->data); + case NHRP_PAYLOAD_TYPE_CIE_LIST: + list_for_each_entry(cie, &p->u.cie_list, cie_list_entry) { + if (!marshall_cie(pdu, pduleft, cie)) + return FALSE; + } + return TRUE; + default: + return FALSE; + } +} + +static int marshall_packet_header(uint8_t **pdu, size_t *pduleft, struct nhrp_packet *packet) +{ + if (!marshall_binary(pdu, pduleft, sizeof(packet->hdr), &packet->hdr)) + return FALSE; + if (!marshall_nbma_address(pdu, pduleft, &packet->src_nbma_address)) + return FALSE; + if (!marshall_protocol_address(pdu, pduleft, &packet->src_protocol_address)) + return FALSE; + return marshall_protocol_address(pdu, pduleft, &packet->dst_protocol_address); +} + +static int marshall_packet(uint8_t *pdu, size_t pduleft, struct nhrp_packet *packet) +{ + uint8_t *pos = pdu; + struct nhrp_packet_header *phdr = (struct nhrp_packet_header *) pdu; + struct nhrp_extension_header neh; + int i, size; + + if (!marshall_packet_header(&pos, &pduleft, packet)) + return -1; + if (!marshall_payload(&pos, &pduleft, nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_ANY))) + return -2; + + phdr->extension_offset = htons((int)(pos - pdu)); + for (i = 1; i < packet->num_extensions; i++) { + struct nhrp_extension_header *eh = (struct nhrp_extension_header *) pos; + + if (packet->extension_by_order[i].payload_type == NHRP_PAYLOAD_TYPE_NONE) + continue; + + neh.type = htons(packet->extension_by_order[i].extension_type); + neh.length = 0; + + if (!marshall_binary(&pos, &pduleft, sizeof(neh), &neh)) + return -3; + if (!marshall_payload(&pos, &pduleft, &packet->extension_by_order[i])) + return -4; + eh->length = htons((pos - (uint8_t *) eh) - sizeof(neh)); + } + neh.type = htons(NHRP_EXTENSION_END | NHRP_EXTENSION_FLAG_COMPULSORY); + neh.length = 0; + if (!marshall_binary(&pos, &pduleft, sizeof(neh), &neh)) + return -5; + + /* Cisco is seriously brain damaged. It needs some extra garbage + * at the end of error indication or it'll barf out spurious errors. */ + if (packet->hdr.type == NHRP_PACKET_ERROR_INDICATION && + pduleft >= 0x10) { + memset(pos, 0, 0x10); + pos += 0x10; + pduleft -= 0x10; + } + + size = (int)(pos - pdu); + phdr->packet_size = htons(size); + phdr->checksum = 0; + phdr->src_nbma_address_len = packet->src_nbma_address.addr_len; + phdr->src_nbma_subaddress_len = packet->src_nbma_address.subaddr_len; + phdr->src_protocol_address_len = packet->src_protocol_address.addr_len; + phdr->dst_protocol_address_len = packet->dst_protocol_address.addr_len; + phdr->checksum = nhrp_calculate_checksum(pdu, size); + + return size; +} + +int nhrp_packet_route(struct nhrp_packet *packet) +{ + struct nhrp_address proto_nexthop, *src, *dst; + struct list_head *cielist = NULL; + struct nhrp_payload *payload; + struct nhrp_peer *peer; + char tmp[64]; + int r; + + if (packet->dst_iface == NULL) { + nhrp_error("nhrp_packet_route called without destination interface"); + return FALSE; + } + + if (packet_types[packet->hdr.type].type == NHRP_TYPE_REPLY) { + dst = &packet->src_protocol_address; + src = &packet->dst_protocol_address; + r = NHRP_EXTENSION_REVERSE_TRANSIT_NHS; + } else { + dst = &packet->dst_protocol_address; + src = &packet->src_protocol_address; + r = NHRP_EXTENSION_FORWARD_TRANSIT_NHS; + } + payload = nhrp_packet_extension(packet, + r | NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_CIE_LIST); + if (payload != NULL) + cielist = &payload->u.cie_list; + + if (packet->dst_peer != NULL) { + proto_nexthop = packet->dst_peer->next_hop_address; + } else { + proto_nexthop = *dst; + do { + peer = nhrp_peer_route_full( + packet->dst_iface, &proto_nexthop, 0, + NHRP_PEER_TYPEMASK_ROUTE_VIA_NHS, src, cielist); + if (peer == NULL || peer->type == NHRP_PEER_TYPE_NEGATIVE) { + nhrp_error("No peer entry for protocol address %s", + nhrp_address_format(&proto_nexthop, + sizeof(tmp), tmp)); + return FALSE; + } + if (peer->type != NHRP_PEER_TYPE_LOCAL_ROUTE) + break; + if (peer->next_hop_address.type == AF_UNSPEC) + break; + proto_nexthop = peer->next_hop_address; + } while (1); + + packet->dst_peer = nhrp_peer_get(peer); + } + + return TRUE; +} + +int nhrp_packet_marshall_and_send(struct nhrp_packet *packet) +{ + uint8_t pdu[MAX_PDU_SIZE]; + char tmp[4][64]; + int size; + + nhrp_debug("Sending packet %d, from: %s (nbma %s), to: %s (nbma %s)", + packet->hdr.type, + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp[0]), tmp[0]), + nhrp_address_format(&packet->src_nbma_address, + sizeof(tmp[1]), tmp[1]), + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp[2]), tmp[2]), + nhrp_address_format(&packet->dst_peer->next_hop_address, + sizeof(tmp[3]), tmp[3])); + + size = marshall_packet(pdu, sizeof(pdu), packet); + if (size < 0) { + nhrp_error("Packet marshalling failed (r=%d)", size); + return FALSE; + } + + if (!kernel_send(pdu, size, packet->dst_iface, + &packet->dst_peer->next_hop_address)) + return FALSE; + + return TRUE; +} + +int nhrp_packet_route_and_send(struct nhrp_packet *packet) +{ + struct nhrp_payload *payload; + + if (packet->dst_peer == NULL || packet->dst_iface == NULL) { + if (!nhrp_packet_route(packet)) { + nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ADDRESS_UNREACHABLE, 0); + return TRUE; + } + } + + if (packet->src_nbma_address.addr_len == 0) + packet->src_nbma_address = packet->dst_peer->my_nbma_address; + if (packet->src_protocol_address.addr_len == 0) + packet->src_protocol_address = packet->dst_iface->protocol_address; + if (packet->hdr.afnum == AFNUM_RESERVED) + packet->hdr.afnum = packet->dst_peer->afnum; + if (packet->hdr.protocol_type == 0) + packet->hdr.protocol_type = packet->dst_peer->protocol_type; + + /* RFC2332 5.3.1 */ + payload = nhrp_packet_extension( + packet, NHRP_EXTENSION_RESPONDER_ADDRESS | + NHRP_EXTENSION_FLAG_COMPULSORY | NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_CIE_LIST); + if (packet_types[packet->hdr.type].type == NHRP_TYPE_REPLY && + (payload != NULL && list_empty(&payload->u.cie_list))) { + struct nhrp_cie *cie; + + cie = nhrp_cie_alloc(); + if (cie == NULL) + return FALSE; + + cie->hdr.holding_time = htons(packet->dst_iface->holding_time); + cie->nbma_address = packet->dst_peer->my_nbma_address; + cie->protocol_address = packet->dst_iface->protocol_address; + nhrp_payload_set_type(payload, NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_payload_add_cie(payload, cie); + } + + /* RFC2332 5.3.4 - Authentication is always done pairwise on an NHRP + * hop-by-hop basis; i.e. regenerated at each hop. */ + payload = nhrp_packet_extension(packet, + NHRP_EXTENSION_AUTHENTICATION | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_RAW); + nhrp_payload_free(payload); + if (packet->dst_iface->auth_token != NULL) + nhrp_payload_set_raw(payload, + nhrp_buffer_copy(packet->dst_iface->auth_token)); + + if (packet->dst_peer->type == NHRP_PEER_TYPE_LOCAL_ADDR) { + packet->src_iface = packet->dst_peer->interface; + return nhrp_packet_receive_local(packet); + } + + if (packet->dst_peer->flags & (NHRP_PEER_FLAG_UP | + NHRP_PEER_FLAG_LOWER_UP)) + return nhrp_packet_marshall_and_send(packet); + + if (packet->dst_peer->queued_packet != NULL) + nhrp_packet_put(packet->dst_peer->queued_packet); + packet->dst_peer->queued_packet = nhrp_packet_get(packet); + + return TRUE; +} + +int nhrp_packet_send(struct nhrp_packet *packet) +{ + struct nhrp_payload *payload; + struct nhrp_cie *cie; + + if (packet->dst_iface == NULL) { + if (!nhrp_packet_route(packet)) { + nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ADDRESS_UNREACHABLE, 0); + return TRUE; + } + } + + /* Cisco NAT extension CIE */ + if (packet_types[packet->hdr.type].type != NHRP_TYPE_INDICATION && + (packet->hdr.flags & NHRP_FLAG_REGISTRATION_NAT)) { + payload = nhrp_packet_extension(packet, NHRP_EXTENSION_NAT_ADDRESS, + NHRP_PAYLOAD_TYPE_CIE_LIST); + + if (packet->dst_iface->nat_cie.nbma_address.addr_len && + payload != NULL && list_empty(&payload->u.cie_list)) { + cie = nhrp_cie_alloc(); + if (cie != NULL) { + *cie = packet->dst_iface->nat_cie; + nhrp_cie_reset(cie); + nhrp_payload_add_cie(payload, cie); + } + } + } + + return nhrp_packet_route_and_send(packet); +} + +static void nhrp_packet_xmit_timeout_cb(struct ev_timer *w, int revents) +{ + struct nhrp_packet *packet = + container_of(w, struct nhrp_packet, timeout); + + list_del(&packet->request_list_entry); + + if (packet->dst_peer != NULL && + ++packet->retry < PACKET_RETRIES) { + nhrp_packet_marshall_and_send(packet); + + list_add(&packet->request_list_entry, &pending_requests); + } else { + ev_timer_stop(&packet->timeout); + if (packet->dst_peer == NULL) + nhrp_error("nhrp_packet_xmit_timeout: no destination peer!"); + if (packet->handler != NULL) + packet->handler(packet->handler_ctx, NULL); + nhrp_packet_dequeue(packet); + } +} + +int nhrp_packet_send_request(struct nhrp_packet *pkt, + void (*handler)(void *ctx, struct nhrp_packet *packet), + void *ctx) +{ + struct nhrp_packet *packet; + + packet = nhrp_packet_get(pkt); + + packet->retry = 0; + if (packet->hdr.u.request_id == constant_htonl(0)) { + request_id++; + packet->hdr.u.request_id = htonl(request_id); + } + + packet->handler = handler; + packet->handler_ctx = ctx; + list_add(&packet->request_list_entry, &pending_requests); + ev_timer_again(&packet->timeout); + + return nhrp_packet_send(packet); +} + +int nhrp_packet_send_error(struct nhrp_packet *error_packet, + uint16_t indication_code, uint16_t offset) +{ + struct nhrp_packet *p; + struct nhrp_payload *pl; + int r; + + /* RFC2332 5.2.7 Never generate errors about errors */ + if (error_packet->hdr.type == NHRP_PACKET_ERROR_INDICATION) + return TRUE; + + p = nhrp_packet_alloc(); + p->hdr = error_packet->hdr; + p->hdr.type = NHRP_PACKET_ERROR_INDICATION; + p->hdr.hop_count = 0; + p->hdr.u.error.code = indication_code; + p->hdr.u.error.offset = htons(offset); + p->dst_iface = error_packet->src_iface; + + if (packet_types[error_packet->hdr.type].type == NHRP_TYPE_REPLY) + p->dst_protocol_address = error_packet->dst_protocol_address; + else + p->dst_protocol_address = error_packet->src_protocol_address; + + pl = nhrp_packet_payload(p, NHRP_PAYLOAD_TYPE_RAW); + pl->u.raw = nhrp_buffer_alloc(error_packet->req_pdulen); + memcpy(pl->u.raw->data, error_packet->req_pdu, error_packet->req_pdulen); + + /* Standard extensions */ + nhrp_packet_extension(p, + NHRP_EXTENSION_FORWARD_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + + if (p->dst_protocol_address.type == PF_UNSPEC) + r = nhrp_do_handle_error_indication(p, error_packet); + else + r = nhrp_packet_send(p); + + nhrp_packet_put(p); + + return r; +} + +int nhrp_packet_send_traffic(struct nhrp_interface *iface, + struct nhrp_address *nbma_src, + struct nhrp_address *protocol_src, + struct nhrp_address *protocol_dst, + int protocol_type, uint8_t *pdu, size_t pdulen) +{ + struct nhrp_rate_limit *rl; + struct nhrp_packet *p; + struct nhrp_payload *pl; + struct nhrp_peer *peer; + char tmp1[64], tmp2[64], tmp3[64], tmp4[64]; + int r; + + if (!(iface->flags & NHRP_INTERFACE_FLAG_REDIRECT)) + return FALSE; + + /* Are we serving the NBMA source */ + peer = nhrp_interface_find_peer(iface, nbma_src); + if (peer == NULL || peer->type != NHRP_PEER_TYPE_DYNAMIC) + return FALSE; + + rl = get_rate_limit(protocol_src, protocol_dst); + if (rl == NULL) + return FALSE; + + /* If silence period has elapsed, reset algorithm */ + if (ev_now() > rl->rate_last + RATE_LIMIT_SILENCE) + rl->rate_tokens = 0; + + /* Too many ignored redirects; just update time of last packet */ + if (rl->rate_tokens >= RATE_LIMIT_MAX_TOKENS) { + rl->rate_last = ev_now(); + return FALSE; + } + + /* Check for load limit; set rate_last to last sent redirect */ + if (rl->rate_tokens != 0 && + ev_now() < rl->rate_last + RATE_LIMIT_SEND_INTERVAL) + return FALSE; + + rl->rate_tokens++; + rl->rate_last = ev_now(); + + p = nhrp_packet_alloc(); + p->hdr = (struct nhrp_packet_header) { + .protocol_type = protocol_type, + .version = NHRP_VERSION_RFC2332, + .type = NHRP_PACKET_TRAFFIC_INDICATION, + .hop_count = 0, + }; + p->dst_protocol_address = *protocol_src; + + pl = nhrp_packet_payload(p, NHRP_PAYLOAD_TYPE_RAW); + pl->u.raw = nhrp_buffer_alloc(pdulen); + memcpy(pl->u.raw->data, pdu, pdulen); + + /* Standard extensions */ + nhrp_packet_extension(p, + NHRP_EXTENSION_FORWARD_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + + nhrp_info("Sending Traffic Indication about packet from %s to %s (to %s/%s)", + nhrp_address_format(protocol_src, sizeof(tmp1), tmp1), + nhrp_address_format(protocol_dst, sizeof(tmp2), tmp2), + nhrp_address_format(&peer->protocol_address, sizeof(tmp3), tmp3), + nhrp_address_format(&peer->next_hop_address, sizeof(tmp4), tmp4)); + + p->dst_iface = iface; + p->dst_peer = nhrp_peer_get(peer); + r = nhrp_packet_send(p); + nhrp_packet_put(p); + + return r; +} + +void nhrp_packet_hook_request(int request, + int (*handler)(struct nhrp_packet *packet)) +{ + NHRP_BUG_ON(request < 0 || request >= ARRAY_SIZE(packet_types)); + NHRP_BUG_ON(packet_types[request].handler != NULL); + + packet_types[request].handler = handler; +} diff --git a/nhrp/nhrp_packet.h b/nhrp/nhrp_packet.h new file mode 100644 index 0000000..3f435c8 --- /dev/null +++ b/nhrp/nhrp_packet.h @@ -0,0 +1,128 @@ +/* nhrp_packet.h - In-memory NHRP packet definitions + * + * Copyright (C) 2007-2009 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#ifndef NHRP_PACKET_H +#define NHRP_PACKET_H + +#include "libev.h" +#include "list.h" +#include "nhrp_protocol.h" +#include "nhrp_address.h" + +#define NHRP_MAX_EXTENSIONS 10 + +#define NHRP_PACKET_DEFAULT_HOP_COUNT 16 + +struct nhrp_interface; + +struct nhrp_buffer { + uint32_t length; + uint8_t data[NHRP_EMPTY_ARRAY]; +}; + +struct nhrp_cie { + struct list_head cie_list_entry; + struct nhrp_cie_header hdr; + struct nhrp_address nbma_address; + struct nhrp_address protocol_address; +}; + +#define NHRP_PAYLOAD_TYPE_ANY -1 +#define NHRP_PAYLOAD_TYPE_NONE 0 +#define NHRP_PAYLOAD_TYPE_RAW 1 +#define NHRP_PAYLOAD_TYPE_CIE_LIST 2 + +struct nhrp_payload { + uint16_t extension_type; + uint16_t payload_type; + union { + struct nhrp_buffer *raw; + struct list_head cie_list; + } u; +}; + +struct nhrp_packet { + int ref; + + struct nhrp_packet_header hdr; + struct nhrp_address src_nbma_address; + struct nhrp_address src_protocol_address; + struct nhrp_address dst_protocol_address; + + int num_extensions; + struct nhrp_payload extension_by_order[NHRP_MAX_EXTENSIONS]; + struct nhrp_payload * extension_by_type[NHRP_MAX_EXTENSIONS]; + + struct list_head request_list_entry; + struct ev_timer timeout; + void (*handler)(void *ctx, struct nhrp_packet *packet); + void * handler_ctx; + int retry; + + uint8_t * req_pdu; + size_t req_pdulen; + + struct nhrp_interface * src_iface; + struct nhrp_address src_linklayer_address; + struct nhrp_interface * dst_iface; + struct nhrp_peer * dst_peer; +}; + +#define NHRP_EXTENSION_FLAG_NOCREATE 0x00010000 + +int nhrp_rate_limit_clear(struct nhrp_address *addr, int prefix_len); + +struct nhrp_buffer *nhrp_buffer_alloc(uint32_t size); +struct nhrp_buffer *nhrp_buffer_copy(struct nhrp_buffer *buffer); +int nhrp_buffer_cmp(struct nhrp_buffer *a, struct nhrp_buffer *b); +void nhrp_buffer_free(struct nhrp_buffer *buffer); + +struct nhrp_cie *nhrp_cie_alloc(void); +void nhrp_cie_free(struct nhrp_cie *cie); +void nhrp_cie_reset(struct nhrp_cie *cie); + +void nhrp_payload_set_type(struct nhrp_payload *payload, int type); +void nhrp_payload_set_raw(struct nhrp_payload *payload, struct nhrp_buffer *buf); +void nhrp_payload_add_cie(struct nhrp_payload *payload, struct nhrp_cie *cie); +struct nhrp_cie *nhrp_payload_get_cie(struct nhrp_payload *payload, int index); +void nhrp_payload_free(struct nhrp_payload *payload); + +struct nhrp_packet *nhrp_packet_alloc(void); +struct nhrp_packet *nhrp_packet_get(struct nhrp_packet *packet); +void nhrp_packet_put(struct nhrp_packet *packet); + +struct nhrp_payload *nhrp_packet_payload(struct nhrp_packet *packet, int payload_type); +struct nhrp_payload *nhrp_packet_extension(struct nhrp_packet *packet, + uint32_t extension, int payload_type); +int nhrp_packet_receive(uint8_t *pdu, size_t pdulen, + struct nhrp_interface *iface, + struct nhrp_address *from); +int nhrp_packet_route(struct nhrp_packet *packet); +int nhrp_packet_reroute(struct nhrp_packet *packet, struct nhrp_peer *dst_peer); +int nhrp_packet_marshall_and_send(struct nhrp_packet *packet); +int nhrp_packet_route_and_send(struct nhrp_packet *packet); +int nhrp_packet_send(struct nhrp_packet *packet); +int nhrp_packet_send_request(struct nhrp_packet *packet, + void (*handler)(void *ctx, struct nhrp_packet *packet), + void *ctx); +int nhrp_packet_send_error(struct nhrp_packet *error_packet, + uint16_t indication_code, uint16_t offset); +int nhrp_packet_send_traffic(struct nhrp_interface *iface, + struct nhrp_address *nbma_src, + struct nhrp_address *protocol_src, + struct nhrp_address *protocol_dst, + int protocol_type, uint8_t *pdu, size_t pdulen); + +void nhrp_packet_hook_request(int request, + int (*handler)(struct nhrp_packet *packet)); + +#endif diff --git a/nhrp/nhrp_peer.c b/nhrp/nhrp_peer.c new file mode 100644 index 0000000..c53d4c4 --- /dev/null +++ b/nhrp/nhrp_peer.c @@ -0,0 +1,2106 @@ +/* nhrp_peer.c - NHRP peer cache implementation + * + * Copyright (C) 2007-2009 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "nhrp_common.h" +#include "nhrp_peer.h" +#include "nhrp_interface.h" + +#define NHRP_PEER_FORMAT_LEN 128 + +#define NHRP_SCRIPT_TIMEOUT (2*60) +#define NHRP_NEGATIVE_CACHE_TIME (3*60) +#define NHRP_EXPIRY_TIME (5*60) + +#define NHRP_HOLDING_TIME_DIVISOR 3 /* See RFC-2332 5.2.3 */ + +#define NHRP_RETRY_REGISTER_TIME (30 + random()/(RAND_MAX/60)) +#define NHRP_RETRY_ERROR_TIME (60 + random()/(RAND_MAX/120)) + +#define NHRP_PEER_FLAG_PRUNE_PENDING 0x00010000 + +const char * const nhrp_peer_type[] = { + [NHRP_PEER_TYPE_INCOMPLETE] = "incomplete", + [NHRP_PEER_TYPE_NEGATIVE] = "negative", + [NHRP_PEER_TYPE_CACHED] = "cached", + [NHRP_PEER_TYPE_SHORTCUT_ROUTE] = "shortcut-route", + [NHRP_PEER_TYPE_DYNAMIC] = "dynamic", + [NHRP_PEER_TYPE_DYNAMIC_NHS] = "dynamic-nhs", + [NHRP_PEER_TYPE_STATIC] = "static", + [NHRP_PEER_TYPE_STATIC_DNS] = "dynamic-map", + [NHRP_PEER_TYPE_LOCAL_ROUTE] = "local-route", + [NHRP_PEER_TYPE_LOCAL_ADDR] = "local", +}; + +static int nhrp_peer_num_total = 0; +static struct list_head local_peer_list = LIST_INITIALIZER(local_peer_list); + +/* Peer entrys life, pending callbacks and their call order are listed + * here. + * + * Generally everything starts from nhrp_peer_insert() call which schedules + * (during startup) or directly invokes nhrp_peer_insert_cb(). + * + * INCOMPLETE: + * 1. nhrp_peer_insert_cb: send resolution request + * 2. nhrp_peer_handle_resolution_reply: entry deleted or reinserted NEGATIVE + * + * NEGATIVE: + * 1. nhrp_peer_insert_cb: schedule task remove + * + * CACHED, STATIC, DYNAMIC, DYNAMIC_NHS: + * 1. nhrp_peer_insert_cb: calls nhrp_peer_restart_cb + * 2. nhrp_peer_restart_cb: resolves dns name, or calls nhrp_run_up_script() + * 3. nhrp_peer_address_query_cb: calls nhrp_peer_run_up_script() + * 4. nhrp_peer_run_up_script: spawns script, or goes to nhrp_peer_lower_is_up() + * 5. nhrp_peer_script_peer_up_done: calls nhrp_peer_lower_is_up() + * 6. nhrp_peer_lower_is_up: sends registration, or goes to nhrp_peer_is_up() + * 7. nhrp_peer_handle_registration_reply: + * a. on success: calls nhrp_peer_is_up() + * b. on error reply: calls nhrp_peer_send_purge_protocol() + * nhrp_peer_handle_purge_protocol_reply: sends new registration + * 8. nhrp_peer_is_up: schedules re-register, expire or deletion + * + * ON EXPIRE: + * schedule remove + * nhrp_peer_renew is called if peer has USED flag set or becomes set, + * while the peer is expired + * ON RENEW: calls sends resolution request, schedule EXPIRE + * + * ON ERROR for CACHED: reinsert as NEGATIVE + * ON ERROR for STATIC: fork peer-down script (if was lower up) + * schedule task request link + * ON ERROR for DYNAMIC: fork peer-down script (if was lower up) + * delete peer + * + * SHORTCUT_ROUTE: + * 1. nhrp_peer_insert_cb: spawns route-up script, or schedules EXPIRE + * + * STATIC_DNS: + * 1. nhrp_peer_insert_cb: calls nhrp_peer_dnsmap_restart_cb + * 2. nhrp_peer_dnsmap_restart_cb: resolves dns name + * 3. nhrp_peer_dnsmap_query_cb: create new peer entries, + * renew existing and delete expired, schedule restart + * + * LOCAL: + * nothing, only netlink code modifies these + */ + +static void nhrp_peer_reinsert(struct nhrp_peer *peer, int type); +static void nhrp_peer_restart_cb(struct ev_timer *w, int revents); +static void nhrp_peer_dnsmap_restart_cb(struct ev_timer *w, int revents); +static void nhrp_peer_remove_cb(struct ev_timer *w, int revents); +static void nhrp_peer_send_resolve(struct nhrp_peer *peer); +static void nhrp_peer_send_register_cb(struct ev_timer *w, int revents); +static void nhrp_peer_expire_cb(struct ev_timer *w, int revents); + +static const char *nhrp_error_indication_text(int ei) +{ + switch (ei) { + case -1: + return "timeout"; + case NHRP_ERROR_UNRECOGNIZED_EXTENSION: + return "unrecognized extension"; + case NHRP_ERROR_LOOP_DETECTED: + return "loop detected"; + case NHRP_ERROR_PROTOCOL_ADDRESS_UNREACHABLE: + return "protocol address unreachable"; + case NHRP_ERROR_PROTOCOL_ERROR: + return "protocol error"; + case NHRP_ERROR_SDU_SIZE_EXCEEDED: + return "SDU size exceeded"; + case NHRP_ERROR_INVALID_EXTENSION: + return "invalid extension"; + case NHRP_ERROR_INVALID_RESOLUTION_REPLY: + return "unexpected resolution reply"; + case NHRP_ERROR_AUTHENTICATION_FAILURE: + return "authentication failure"; + case NHRP_ERROR_HOP_COUNT_EXCEEDED: + return "hop count exceeded"; + } + return "unknown"; +} + +static const char *nhrp_cie_code_text(int ct) +{ + switch (ct) { + case NHRP_CODE_SUCCESS: + return "success"; + case NHRP_CODE_ADMINISTRATIVELY_PROHIBITED: + return "administratively prohibited"; + case NHRP_CODE_INSUFFICIENT_RESOURCES: + return "insufficient resources"; + case NHRP_CODE_NO_BINDING_EXISTS: + return "no binding exists"; + case NHRP_CODE_BINDING_NON_UNIQUE: + return "binding non-unique"; + case NHRP_CODE_UNIQUE_ADDRESS_REGISTERED: + return "unique address already registered"; + } + return "unknown"; +} + +static char *nhrp_peer_format_full(struct nhrp_peer *peer, size_t len, + char *buf, int full) +{ + char tmp[NHRP_PEER_FORMAT_LEN], *str; + int i = 0; + + if (peer == NULL) { + snprintf(buf, len, "(null)"); + return buf; + } + + i += snprintf(&buf[i], len - i, "%s/%d", + nhrp_address_format(&peer->protocol_address, sizeof(tmp), tmp), + peer->prefix_length); + + if (peer->next_hop_address.type != PF_UNSPEC) { + switch (peer->type) { + case NHRP_PEER_TYPE_SHORTCUT_ROUTE: + case NHRP_PEER_TYPE_LOCAL_ROUTE: + str = "nexthop"; + break; + case NHRP_PEER_TYPE_LOCAL_ADDR: + str = "alias"; + break; + default: + str = "nbma"; + break; + } + i += snprintf(&buf[i], len - i, " %s %s", + str, + nhrp_address_format(&peer->next_hop_address, + sizeof(tmp), tmp)); + } + if (peer->nbma_hostname != NULL) { + i += snprintf(&buf[i], len - i, " hostname %s", + peer->nbma_hostname); + } + if (peer->next_hop_nat_oa.type != PF_UNSPEC) { + i += snprintf(&buf[i], len - i, " nbma-nat-oa %s", + nhrp_address_format(&peer->next_hop_nat_oa, + sizeof(tmp), tmp)); + } + if (peer->interface != NULL) + i += snprintf(&buf[i], len - i, " dev %s", + peer->interface->name); + if (peer->mtu) + i += snprintf(&buf[i], len - i, " mtu %d", peer->mtu); + + if (!full) + return buf; + + if (peer->flags & NHRP_PEER_FLAG_USED) + i += snprintf(&buf[i], len - i, " used"); + if (peer->flags & NHRP_PEER_FLAG_UNIQUE) + i += snprintf(&buf[i], len - i, " unique"); + if (peer->flags & NHRP_PEER_FLAG_UP) + i += snprintf(&buf[i], len - i, " up"); + else if (peer->flags & NHRP_PEER_FLAG_LOWER_UP) + i += snprintf(&buf[i], len - i, " lower-up"); + if (peer->expire_time != 0.0) { + int rel; + + rel = peer->expire_time - ev_now(); + if (rel >= 0) { + i += snprintf(&buf[i], len - i, " expires_in %d:%02d", + rel / 60, rel % 60); + } else { + i += snprintf(&buf[i], len - i, " expired"); + } + } + if (peer->flags & NHRP_PEER_FLAG_PRUNE_PENDING) + i += snprintf(&buf[i], len - i, " dying"); + + return buf; +} + +static inline char *nhrp_peer_format(struct nhrp_peer *peer, + size_t len, char *buf) +{ + return nhrp_peer_format_full(peer, len, buf, TRUE); +} + +static inline void nhrp_peer_debug_refcount(const char *func, + struct nhrp_peer *peer) +{ +#if 0 + char tmp[NHRP_PEER_FORMAT_LEN]; + nhrp_debug("%s(%s %s) ref=%d", + func, nhrp_peer_type[peer->type], + nhrp_peer_format(peer, sizeof(tmp), tmp), + peer->ref); +#endif +} + +static void nhrp_peer_resolve_nbma(struct nhrp_peer *peer) +{ + char tmp[64]; + int r; + + if (peer->interface->nbma_address.type == PF_UNSPEC) { + r = kernel_route(NULL, &peer->next_hop_address, + &peer->my_nbma_address, NULL, + &peer->my_nbma_mtu); + if (!r) { + nhrp_error("No route to next hop address %s", + nhrp_address_format(&peer->next_hop_address, + sizeof(tmp), tmp)); + } + } else { + peer->my_nbma_address = peer->interface->nbma_address; + peer->my_nbma_mtu = peer->interface->nbma_mtu; + } +} + +static char *env(const char *key, const char *value) +{ + char *buf; + buf = malloc(strlen(key)+strlen(value)+2); + if (buf == NULL) + return NULL; + sprintf(buf, "%s=%s", key, value); + return buf; +} + +static char *envu32(const char *key, uint32_t value) +{ + char *buf; + buf = malloc(strlen(key)+16); + if (buf == NULL) + return NULL; + sprintf(buf, "%s=%u", key, value); + return buf; +} + +int nhrp_peer_event_ok(union nhrp_peer_event e, int revents) +{ + int status; + + if (revents == 0) + return TRUE; + if (!(revents & EV_CHILD)) + return FALSE; + status = e.child->rstatus; + if (WIFEXITED(status) && WEXITSTATUS(status) == 0) + return TRUE; + return FALSE; +} + +char *nhrp_peer_event_reason(union nhrp_peer_event e, int revents, + size_t buflen, char *buf) +{ + int status; + + if (revents & EV_CHILD) { + status = e.child->rstatus; + if (WIFEXITED(status)) + snprintf(buf, buflen, "exitstatus %d", + WEXITSTATUS(status)); + else if (WIFSIGNALED(status)) + snprintf(buf, buflen, "signal %d", + WTERMSIG(status)); + else + snprintf(buf, buflen, "rstatus %d", status); + } else if (revents & EV_TIMEOUT) { + snprintf(buf, buflen, "timeout"); + } else if (revents == 0) { + snprintf(buf, buflen, "success"); + } else { + snprintf(buf, buflen, "unknown, revents=%x", revents); + } + return buf; +} + +struct nhrp_peer *nhrp_peer_from_event(union nhrp_peer_event e, int revents) +{ + struct nhrp_peer *peer; + + if (revents & EV_CHILD) { + peer = container_of(e.child, struct nhrp_peer, child); + } else if (revents & EV_TIMEOUT) { + peer = container_of(e.timer, struct nhrp_peer, timer); + } else { + NHRP_BUG_ON(revents != 0); + peer = container_of(e.child, struct nhrp_peer, child); + } + + ev_child_stop(&peer->child); + ev_timer_stop(&peer->timer); + + return peer; +} + +void nhrp_peer_run_script(struct nhrp_peer *peer, char *action, + void (*cb)(union nhrp_peer_event, int)) +{ + struct nhrp_interface *iface = peer->interface; + const char *argv[] = { nhrp_script_file, action, NULL }; + char *envp[32]; + char tmp[64]; + pid_t pid; + int i = 0; + + /* Resolve own NBMA address before forking if required + * since it requires traversing peer cache and can trigger + * logging and other stuff. */ + if (peer->my_nbma_address.type == PF_UNSPEC) + nhrp_peer_resolve_nbma(peer); + + /* Fork and execute script */ + pid = fork(); + if (pid == -1) { + if (cb != NULL) + cb(&peer->child, EV_CHILD | EV_ERROR); + return; + } else if (pid > 0) { + if (cb != NULL) { + ev_child_stop(&peer->child); + ev_child_init(&peer->child, cb, pid, 0); + ev_child_start(&peer->child); + + ev_set_cb(&peer->timer, cb); + peer->timer.repeat = NHRP_SCRIPT_TIMEOUT; + ev_timer_again(&peer->timer); + } + return; + } + + envp[i++] = env("NHRP_TYPE", nhrp_peer_type[peer->type]); + if (iface->protocol_address.type != PF_UNSPEC) + envp[i++] = env("NHRP_SRCADDR", + nhrp_address_format(&iface->protocol_address, + sizeof(tmp), tmp)); + if (peer->my_nbma_address.type != PF_UNSPEC) + envp[i++] = env("NHRP_SRCNBMA", + nhrp_address_format(&peer->my_nbma_address, + sizeof(tmp), tmp)); + envp[i++] = env("NHRP_DESTADDR", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp)); + envp[i++] = envu32("NHRP_DESTPREFIX", peer->prefix_length); + + if (peer->purge_reason) + envp[i++] = env("NHRP_PEER_DOWN_REASON", peer->purge_reason); + + switch (peer->type) { + case NHRP_PEER_TYPE_CACHED: + case NHRP_PEER_TYPE_LOCAL_ADDR: + case NHRP_PEER_TYPE_STATIC: + case NHRP_PEER_TYPE_DYNAMIC: + case NHRP_PEER_TYPE_DYNAMIC_NHS: + envp[i++] = env("NHRP_DESTNBMA", + nhrp_address_format(&peer->next_hop_address, + sizeof(tmp), tmp)); + if (peer->mtu) + envp[i++] = envu32("NHRP_DESTMTU", peer->mtu); + if (peer->next_hop_nat_oa.type != PF_UNSPEC) + envp[i++] = env("NHRP_DESTNBMA_NAT_OA", + nhrp_address_format(&peer->next_hop_nat_oa, + sizeof(tmp), tmp)); + break; + case NHRP_PEER_TYPE_SHORTCUT_ROUTE: + case NHRP_PEER_TYPE_LOCAL_ROUTE: + envp[i++] = env("NHRP_NEXTHOP", + nhrp_address_format(&peer->next_hop_address, + sizeof(tmp), tmp)); + break; + default: + NHRP_BUG_ON("invalid peer type"); + } + envp[i++] = env("NHRP_INTERFACE", peer->interface->name); + envp[i++] = envu32("NHRP_GRE_KEY", peer->interface->gre_key); + envp[i++] = NULL; + + execve(nhrp_script_file, (char **) argv, envp); + exit(1); +} + +void nhrp_peer_cancel_async(struct nhrp_peer *peer) +{ + if (peer->queued_packet) { + nhrp_packet_put(peer->queued_packet); + peer->queued_packet = NULL; + } + if (peer->request) { + nhrp_server_finish_request(peer->request); + peer->request = NULL; + } + + nhrp_address_resolve_cancel(&peer->address_query); + ev_timer_stop(&peer->timer); + if (ev_is_active(&peer->child)) { + kill(SIGINT, peer->child.pid); + ev_child_stop(&peer->child); + } +} + +void nhrp_peer_send_packet_queue(struct nhrp_peer *peer) +{ + if (peer->queued_packet == NULL) + return; + + nhrp_packet_marshall_and_send(peer->queued_packet); + nhrp_packet_put(peer->queued_packet); + peer->queued_packet = NULL; +} + +static void nhrp_peer_schedule(struct nhrp_peer *peer, ev_tstamp timeout, + void (*cb)(struct ev_timer *w, int revents)) +{ + ev_timer_stop(&peer->timer); + ev_timer_init(&peer->timer, cb, timeout, 0.); + ev_timer_start(&peer->timer); +} + +static void nhrp_peer_restart_error(struct nhrp_peer *peer) +{ + switch (peer->type) { + case NHRP_PEER_TYPE_STATIC: + case NHRP_PEER_TYPE_DYNAMIC_NHS: + nhrp_peer_schedule(peer, NHRP_RETRY_ERROR_TIME, + nhrp_peer_restart_cb); + break; + default: + nhrp_peer_reinsert(peer, NHRP_PEER_TYPE_NEGATIVE); + break; + } +} + +static void nhrp_peer_script_route_up_done(union nhrp_peer_event e, int revents) +{ + struct nhrp_peer *peer = nhrp_peer_from_event(e, revents); + char tmp[64], reason[32]; + + if (nhrp_peer_event_ok(e, revents)) { + if (revents) + nhrp_debug("[%s] Route up script: success", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp)); + + peer->flags |= NHRP_PEER_FLAG_UP; + nhrp_peer_schedule(peer, peer->expire_time - NHRP_EXPIRY_TIME + - 10 - ev_now(), nhrp_peer_expire_cb); + } else { + nhrp_info("[%s] Route up script: %s; " + "adding negative cached entry", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp), + nhrp_peer_event_reason(e, revents, + sizeof(reason), reason)); + + nhrp_peer_reinsert(peer, NHRP_PEER_TYPE_NEGATIVE); + } +} + +static int nhrp_peer_routes_up(void *ctx, struct nhrp_peer *peer) +{ + if (!(peer->flags & NHRP_PEER_FLAG_UP)) + nhrp_peer_run_script(peer, "route-up", + nhrp_peer_script_route_up_done); + + return 0; +} + +static int nhrp_peer_routes_renew(void *ctx, struct nhrp_peer *peer) +{ + int *num_routes = (int *) ctx; + + if (peer->flags & NHRP_PEER_FLAG_PRUNE_PENDING) { + peer->flags &= ~NHRP_PEER_FLAG_PRUNE_PENDING; + nhrp_peer_cancel_async(peer); + nhrp_peer_send_resolve(peer); + (*num_routes)++; + } + + return 0; +} + +static void nhrp_peer_renew(struct nhrp_peer *peer) +{ + struct nhrp_interface *iface = peer->interface; + struct nhrp_peer_selector sel; + int num_routes = 0; + + /* Renew the cached information: all related routes + * or the peer itself */ + if (peer->type != NHRP_PEER_TYPE_SHORTCUT_ROUTE) { + memset(&sel, 0, sizeof(sel)); + sel.flags = NHRP_PEER_FIND_UP; + sel.type_mask = BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE); + sel.interface = iface; + sel.next_hop_address = peer->protocol_address; + nhrp_peer_foreach(nhrp_peer_routes_renew, &num_routes, &sel); + } + + if (peer->flags & NHRP_PEER_FLAG_PRUNE_PENDING) { + peer->flags &= ~NHRP_PEER_FLAG_PRUNE_PENDING; + nhrp_peer_cancel_async(peer); + nhrp_peer_send_resolve(peer); + } +} + +static int is_used(void *ctx, struct nhrp_peer *peer) +{ + if (peer->flags & NHRP_PEER_FLAG_USED) + return 1; + + return 0; +} + +static void nhrp_peer_expire_cb(struct ev_timer *w, int revents) +{ + struct nhrp_peer *peer = container_of(w, struct nhrp_peer, timer); + struct nhrp_peer_selector sel; + int used; + + peer->flags |= NHRP_PEER_FLAG_PRUNE_PENDING; + nhrp_peer_schedule(peer, peer->expire_time - ev_now(), + nhrp_peer_remove_cb); + + if (peer->type == NHRP_PEER_TYPE_SHORTCUT_ROUTE) { + memset(&sel, 0, sizeof(sel)); + sel.interface = peer->interface; + sel.protocol_address = peer->next_hop_address; + used = nhrp_peer_foreach(is_used, NULL, &sel); + } else + used = peer->flags & NHRP_PEER_FLAG_USED; + + if (used) + nhrp_peer_renew(peer); +} + +static void nhrp_peer_is_down(struct nhrp_peer *peer) +{ + struct nhrp_peer_selector sel; + + /* Remove UP flags if not being removed permanently, so futher + * lookups are valid */ + if (!(peer->flags & NHRP_PEER_FLAG_REMOVED)) + peer->flags &= ~(NHRP_PEER_FLAG_LOWER_UP | NHRP_PEER_FLAG_UP); + + /* Check if there are routes using this peer as next-hop */ + if (peer->type != NHRP_PEER_TYPE_SHORTCUT_ROUTE) { + memset(&sel, 0, sizeof(sel)); + sel.type_mask = BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE); + sel.interface = peer->interface; + sel.next_hop_address = peer->protocol_address; + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel); + } + + /* Remove from lists */ + if (list_hashed(&peer->mcast_list_entry)) + list_del(&peer->mcast_list_entry); + if (hlist_hashed(&peer->nbma_hash_entry)) + hlist_del(&peer->nbma_hash_entry); +} + +static void nhrp_peer_is_up(struct nhrp_peer *peer) +{ + struct nhrp_interface *iface = peer->interface; + struct nhrp_peer_selector sel; + int mcast = 0, i; + char tmp[64]; + + if ((peer->flags & (NHRP_PEER_FLAG_UP | NHRP_PEER_FLAG_REGISTER)) + == NHRP_PEER_FLAG_REGISTER) { + /* First time registration reply received */ + nhrp_peer_run_script(peer, "nhs-up", NULL); + } + + /* Remove from mcast list if previously there */ + if (list_hashed(&peer->mcast_list_entry)) + list_del(&peer->mcast_list_entry); + + /* Check if this one needs multicast traffic */ + if (BIT(peer->type) & iface->mcast_mask) { + mcast = 1; + } else { + for (i = 0; i < iface->mcast_numaddr; i++) { + if (!nhrp_address_cmp(&peer->protocol_address, + &iface->mcast_addr[i])) { + mcast = 1; + break; + } + } + } + + if (mcast) { + list_add(&peer->mcast_list_entry, &iface->mcast_list); + nhrp_info("[%s] Peer inserted to multicast list", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp)); + } + + /* Searchable by NBMA */ + if (hlist_hashed(&peer->nbma_hash_entry)) + hlist_del(&peer->nbma_hash_entry); + if (BIT(peer->type) & (BIT(NHRP_PEER_TYPE_CACHED) | + BIT(NHRP_PEER_TYPE_DYNAMIC) | + BIT(NHRP_PEER_TYPE_DYNAMIC_NHS) | + BIT(NHRP_PEER_TYPE_STATIC))) { + i = nhrp_address_hash(&peer->next_hop_address) % NHRP_INTERFACE_NBMA_HASH_SIZE; + hlist_add_head(&peer->nbma_hash_entry, &iface->nbma_hash[i]); + } + + peer->flags |= NHRP_PEER_FLAG_UP | NHRP_PEER_FLAG_LOWER_UP; + + /* Check if there are routes using this peer as next-hop*/ + if (peer->type != NHRP_PEER_TYPE_SHORTCUT_ROUTE) { + memset(&sel, 0, sizeof(sel)); + sel.type_mask = BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE); + sel.interface = iface; + sel.next_hop_address = peer->protocol_address; + nhrp_peer_foreach(nhrp_peer_routes_up, NULL, &sel); + } + + nhrp_peer_send_packet_queue(peer); + + /* Schedule expiry or renewal */ + switch (peer->type) { + case NHRP_PEER_TYPE_DYNAMIC: + nhrp_peer_schedule(peer, peer->expire_time - ev_now(), + nhrp_peer_remove_cb); + break; + case NHRP_PEER_TYPE_CACHED: + nhrp_peer_schedule( + peer, + peer->expire_time - NHRP_EXPIRY_TIME - ev_now(), + nhrp_peer_expire_cb); + break; + case NHRP_PEER_TYPE_STATIC: + case NHRP_PEER_TYPE_DYNAMIC_NHS: + if (peer->flags & NHRP_PEER_FLAG_REGISTER) { + nhrp_peer_schedule( + peer, iface->holding_time / + NHRP_HOLDING_TIME_DIVISOR + 1, + nhrp_peer_send_register_cb); + } + break; + default: + NHRP_BUG_ON("invalid peer type"); + break; + } +} + +static void nhrp_peer_lower_is_up(struct nhrp_peer *peer) +{ + peer->flags |= NHRP_PEER_FLAG_LOWER_UP; + + if (peer->flags & NHRP_PEER_FLAG_REGISTER) + nhrp_peer_send_register_cb(&peer->timer, 0); + else + nhrp_peer_is_up(peer); +} + +static void nhrp_peer_script_peer_up_done(union nhrp_peer_event e, int revents) +{ + struct nhrp_peer *peer = nhrp_peer_from_event(e, revents); + char tmp[64], reason[32]; + + if (nhrp_peer_event_ok(e, revents)) { + nhrp_debug("[%s] Peer up script: success", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp)); + + kernel_inject_neighbor(&peer->protocol_address, + &peer->next_hop_address, + peer->interface); + nhrp_peer_lower_is_up(peer); + } else { + nhrp_error("[%s] Peer up script failed: %s", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp), + nhrp_peer_event_reason(e, revents, + sizeof(reason), reason)); + nhrp_peer_restart_error(peer); + } +} + +static void nhrp_peer_run_up_script(struct nhrp_peer *peer) +{ + nhrp_peer_run_script(peer, "peer-up", + nhrp_peer_script_peer_up_done); +} + +static void nhrp_peer_address_query_cb(struct nhrp_address_query *query, + int num_addr, struct nhrp_address *addrs) +{ + struct nhrp_peer *peer = container_of(query, struct nhrp_peer, + address_query); + char host[64]; + + if (num_addr > 0) { + nhrp_info("Resolved '%s' as %s", + peer->nbma_hostname, + nhrp_address_format(&addrs[0], sizeof(host), host)); + peer->next_hop_address = addrs[0]; + peer->afnum = nhrp_afnum_from_pf(peer->next_hop_address.type); + nhrp_peer_run_up_script(peer); + } else { + nhrp_error("Failed to resolve '%s'", peer->nbma_hostname); + nhrp_peer_restart_error(peer); + } +} + +static void nhrp_peer_restart_cb(struct ev_timer *w, int revents) +{ + struct nhrp_peer *peer = container_of(w, struct nhrp_peer, timer); + + if (peer->nbma_hostname != NULL) { + nhrp_address_resolve(&peer->address_query, + peer->nbma_hostname, + nhrp_peer_address_query_cb); + } else { + nhrp_peer_resolve_nbma(peer); + + if (!(peer->flags & NHRP_PEER_FLAG_LOWER_UP)) + nhrp_peer_run_up_script(peer); + else + nhrp_peer_script_peer_up_done(&peer->child, 0); + } +} + +static void nhrp_peer_send_protocol_purge(struct nhrp_peer *peer) +{ + char tmp[64]; + struct nhrp_packet *packet; + struct nhrp_cie *cie; + struct nhrp_payload *payload; + int sent = FALSE; + + packet = nhrp_packet_alloc(); + if (packet == NULL) + goto error; + + packet->hdr = (struct nhrp_packet_header) { + .afnum = peer->afnum, + .protocol_type = peer->protocol_type, + .version = NHRP_VERSION_RFC2332, + .type = NHRP_PACKET_PURGE_REQUEST, + .hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT, + .flags = NHRP_FLAG_PURGE_NO_REPLY, + }; + if (peer->flags & NHRP_PEER_FLAG_CISCO) { + /* Cisco IOS seems to require reqistration and purge + * request id to match, so we need to used a fixed + * value. This is in violation of RFC, though. */ + packet->hdr.u.request_id = + nhrp_address_hash(&peer->interface->protocol_address); + } + packet->dst_protocol_address = peer->protocol_address; + + /* Payload CIE */ + cie = nhrp_cie_alloc(); + if (cie == NULL) + goto error_free_packet; + + *cie = (struct nhrp_cie) { + .hdr.code = NHRP_CODE_SUCCESS, + .hdr.mtu = 0, + .hdr.preference = 0, + .hdr.prefix_length = 0xff, + }; + cie->protocol_address = peer->interface->protocol_address; + + payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_payload_add_cie(payload, cie); + + nhrp_info("Sending Purge Request (of protocol address) to %s", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp)); + + packet->dst_peer = nhrp_peer_get(peer); + packet->dst_iface = peer->interface; + sent = nhrp_packet_send(packet); +error_free_packet: + nhrp_packet_put(packet); +error: + if (sent) + nhrp_peer_schedule(peer, 2, nhrp_peer_send_register_cb); + else + nhrp_peer_restart_error(peer); +} + +static int nhrp_add_local_route_cie(void *ctx, struct nhrp_peer *route) +{ + struct nhrp_packet *packet = (struct nhrp_packet *) ctx; + struct nhrp_payload *payload; + struct nhrp_cie *cie; + + if (route->interface != NULL && + !(route->interface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST)) + return 0; + + cie = nhrp_cie_alloc(); + if (cie == NULL) + return 0; + + *cie = (struct nhrp_cie) { + .hdr.code = 0, + .hdr.prefix_length = route->prefix_length, + .protocol_address = route->protocol_address, + }; + + payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_payload_add_cie(payload, cie); + + return 0; +} + +int nhrp_peer_discover_nhs(struct nhrp_peer *peer, + struct nhrp_address *newaddr) +{ + struct nhrp_peer_selector sel; + char tmp[32], tmp2[32]; + + if (nhrp_address_cmp(&peer->protocol_address, newaddr) == 0) + return TRUE; + + if (peer->type != NHRP_PEER_TYPE_DYNAMIC_NHS || + !nhrp_address_is_network(&peer->protocol_address, + peer->prefix_length)) { + nhrp_error("Unexpected NHS protocol address change %s -> %s", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp2), tmp2), + nhrp_address_format(newaddr, sizeof(tmp), tmp)); + return FALSE; + } + + if (nhrp_address_prefix_cmp(&peer->protocol_address, newaddr, + peer->prefix_length) != 0) { + nhrp_error("Protocol address change to %s is not within %s/%d", + nhrp_address_format(newaddr, sizeof(tmp), tmp), + nhrp_address_format(&peer->protocol_address, + sizeof(tmp2), tmp2), + peer->prefix_length); + return FALSE; + } + + /* Remove incomplete/cached entries */ + memset(&sel, 0, sizeof(sel)); + sel.flags = NHRP_PEER_FIND_EXACT; + sel.type_mask = NHRP_PEER_TYPEMASK_REMOVABLE; + sel.interface = peer->interface; + sel.protocol_address = *newaddr; + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel); + + /* Update protocol address */ + peer->protocol_address = *newaddr; + + return TRUE; +} + +static void nhrp_peer_handle_registration_reply(void *ctx, + struct nhrp_packet *reply) +{ + struct nhrp_peer *peer = (struct nhrp_peer *) ctx; + struct nhrp_payload *payload; + struct nhrp_cie *cie; + struct nhrp_packet *packet; + char tmp[NHRP_PEER_FORMAT_LEN]; + int ec = -1; + + if (peer->flags & NHRP_PEER_FLAG_REMOVED) + goto ret; + + if (reply == NULL || + reply->hdr.type != NHRP_PACKET_REGISTRATION_REPLY) { + ec = reply ? reply->hdr.u.error.code : -1; + nhrp_info("Failed to register to %s: %s (%d)", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp), + nhrp_error_indication_text(ec), ntohs(ec)); + + if (ec == NHRP_ERROR_HOP_COUNT_EXCEEDED) + nhrp_peer_discover_nhs(peer, + &reply->src_protocol_address); + + if (reply != NULL) { + nhrp_peer_schedule(peer, NHRP_RETRY_REGISTER_TIME, + nhrp_peer_send_register_cb); + } else { + nhrp_peer_restart_error(peer); + } + goto ret; + } + + /* Check servers protocol address */ + if (!nhrp_peer_discover_nhs(peer, &reply->dst_protocol_address)) { + nhrp_peer_restart_error(peer); + goto ret; + } + + /* Check result */ + payload = nhrp_packet_payload(reply, NHRP_PAYLOAD_TYPE_CIE_LIST); + if (payload != NULL) { + cie = nhrp_payload_get_cie(payload, 1); + if (cie != NULL) + ec = cie->hdr.code; + } + + nhrp_info("Received Registration Reply from %s: %s", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp), + nhrp_cie_code_text(ec)); + + switch (ec) { + case NHRP_CODE_SUCCESS: + break; + case NHRP_CODE_UNIQUE_ADDRESS_REGISTERED: + nhrp_peer_send_protocol_purge(peer); + goto ret; + default: + nhrp_peer_schedule(peer, NHRP_RETRY_REGISTER_TIME, + nhrp_peer_send_register_cb); + goto ret; + } + + /* Check for NAT */ + payload = nhrp_packet_extension(reply, + NHRP_EXTENSION_NAT_ADDRESS | + NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_CIE_LIST); + if (payload != NULL) { + cie = nhrp_payload_get_cie(payload, 2); + if (cie != NULL) { + nhrp_info("NAT detected: our real NBMA address is %s", + nhrp_address_format(&cie->nbma_address, + sizeof(tmp), tmp)); + peer->interface->nat_cie = *cie; + } + } + + /* If not re-registration, send a purge request for each subnet + * we accept shortcuts to, to clear server redirection cache. */ + if (!(peer->flags & NHRP_PEER_FLAG_UP) && + (packet = nhrp_packet_alloc()) != NULL) { + struct nhrp_peer_selector sel; + + packet->hdr = (struct nhrp_packet_header) { + .afnum = peer->afnum, + .protocol_type = peer->protocol_type, + .version = NHRP_VERSION_RFC2332, + .type = NHRP_PACKET_PURGE_REQUEST, + .hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT, + }; + packet->dst_protocol_address = peer->protocol_address; + + memset(&sel, 0, sizeof(sel)); + sel.type_mask = BIT(NHRP_PEER_TYPE_LOCAL_ADDR); + nhrp_peer_foreach(nhrp_add_local_route_cie, packet, &sel); + + nhrp_packet_extension(packet, + NHRP_EXTENSION_FORWARD_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_packet_extension(packet, + NHRP_EXTENSION_REVERSE_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_packet_extension(packet, + NHRP_EXTENSION_RESPONDER_ADDRESS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + + nhrp_info("Sending Purge Request (of local routes) to %s", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp)); + + packet->dst_peer = nhrp_peer_get(peer); + packet->dst_iface = peer->interface; + nhrp_packet_send_request(packet, NULL, NULL); + nhrp_packet_put(packet); + } + + /* Re-register after holding time expires */ + nhrp_peer_is_up(peer); +ret: + nhrp_peer_put(peer); +} + +static void nhrp_peer_send_register_cb(struct ev_timer *w, int revents) +{ + struct nhrp_peer *peer = container_of(w, struct nhrp_peer, timer); + char dst[64]; + struct nhrp_packet *packet; + struct nhrp_cie *cie; + struct nhrp_payload *payload; + int sent = FALSE; + + packet = nhrp_packet_alloc(); + if (packet == NULL) + goto error; + + packet->hdr = (struct nhrp_packet_header) { + .afnum = peer->afnum, + .protocol_type = peer->protocol_type, + .version = NHRP_VERSION_RFC2332, + .type = NHRP_PACKET_REGISTRATION_REQUEST, + .hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT, + .flags = NHRP_FLAG_REGISTRATION_UNIQUE | + NHRP_FLAG_REGISTRATION_NAT + }; + if (peer->flags & NHRP_PEER_FLAG_CISCO) { + /* Cisco IOS seems to require reqistration and purge + * request id to match, so we need to used a fixed + * value. This is in violation of RFC, though. */ + packet->hdr.u.request_id = + nhrp_address_hash(&peer->interface->protocol_address); + } + packet->dst_protocol_address = peer->protocol_address; + + if (peer->type == NHRP_PEER_TYPE_DYNAMIC_NHS && + nhrp_address_is_network(&peer->protocol_address, + peer->prefix_length)) { + /* We are not yet sure of the protocol address of the NHS - + * send registration to the broadcast address with one hop + * limit. Except the NHS to reply with it's real protocol + * address. */ + nhrp_address_set_broadcast(&packet->dst_protocol_address, + peer->prefix_length); + packet->hdr.hop_count = 0; + } + + + /* Payload CIE */ + cie = nhrp_cie_alloc(); + if (cie == NULL) + goto error; + + *cie = (struct nhrp_cie) { + .hdr.code = NHRP_CODE_SUCCESS, + .hdr.prefix_length = 0xff, + .hdr.mtu = htons(peer->my_nbma_mtu), + .hdr.holding_time = htons(peer->interface->holding_time), + .hdr.preference = 0, + }; + + payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_payload_add_cie(payload, cie); + + /* Standard extensions */ + nhrp_packet_extension(packet, + NHRP_EXTENSION_FORWARD_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_packet_extension(packet, + NHRP_EXTENSION_REVERSE_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_packet_extension(packet, + NHRP_EXTENSION_RESPONDER_ADDRESS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + + /* Cisco NAT extension CIE */ + cie = nhrp_cie_alloc(); + if (cie == NULL) + goto error_free_packet; + + *cie = (struct nhrp_cie) { + .hdr.code = NHRP_CODE_SUCCESS, + .hdr.prefix_length = peer->protocol_address.addr_len * 8, + .hdr.preference = 0, + .nbma_address = peer->next_hop_address, + .protocol_address = peer->protocol_address, + }; + + payload = nhrp_packet_extension(packet, NHRP_EXTENSION_NAT_ADDRESS, + NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_payload_add_cie(payload, cie); + + nhrp_info("Sending Registration Request to %s (my mtu=%d)", + nhrp_address_format(&peer->protocol_address, + sizeof(dst), dst), + peer->my_nbma_mtu); + + packet->dst_peer = nhrp_peer_get(peer); + packet->dst_iface = peer->interface; + sent = nhrp_packet_send_request(packet, + nhrp_peer_handle_registration_reply, + nhrp_peer_get(peer)); + +error_free_packet: + nhrp_packet_put(packet); +error: + if (!sent) + nhrp_peer_restart_error(peer); +} + +static int error_on_matching(void *ctx, struct nhrp_peer *peer) +{ + return 1; +} + +static void nhrp_peer_handle_resolution_reply(void *ctx, + struct nhrp_packet *reply) +{ + struct nhrp_peer *peer = (struct nhrp_peer *) ctx, *np; + struct nhrp_payload *payload; + struct nhrp_cie *cie, *natcie = NULL, *natoacie = NULL; + struct nhrp_interface *iface; + struct nhrp_peer_selector sel; + char dst[64], tmp[64], nbma[64]; + int ec; + + if (peer->flags & NHRP_PEER_FLAG_REMOVED) + goto ret; + + if (reply == NULL || + reply->hdr.type != NHRP_PACKET_RESOLUTION_REPLY) { + ec = reply ? reply->hdr.u.error.code : -1; + + nhrp_info("Failed to resolve %s: %s (%d)", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp), + nhrp_error_indication_text(ec), ntohs(ec)); + + if (reply != NULL) { + /* We got reply that this address is not available - + * negative cache it. */ + peer->flags |= NHRP_PEER_FLAG_UP; + nhrp_peer_reinsert(peer, NHRP_PEER_TYPE_NEGATIVE); + } else { + /* Time out - NHS reachable, or packet lost multiple + * times. Keep trying if still needed. */ + nhrp_peer_remove(peer); + } + goto ret; + } + + payload = nhrp_packet_payload(reply, NHRP_PAYLOAD_TYPE_CIE_LIST); + cie = list_next(&payload->u.cie_list, struct nhrp_cie, cie_list_entry); + if (cie == NULL) + goto ret; + + nhrp_info("Received Resolution Reply %s/%d is at proto %s nbma %s", + nhrp_address_format(&peer->protocol_address, + sizeof(dst), dst), + cie->hdr.prefix_length, + nhrp_address_format(&cie->protocol_address, + sizeof(tmp), tmp), + nhrp_address_format(&cie->nbma_address, + sizeof(nbma), nbma)); + + payload = nhrp_packet_extension(reply, + NHRP_EXTENSION_NAT_ADDRESS | + NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_CIE_LIST); + if ((reply->hdr.flags & NHRP_FLAG_RESOLUTION_NAT) && + (payload != NULL)) { + natcie = list_next(&payload->u.cie_list, struct nhrp_cie, cie_list_entry); + if (natcie != NULL) { + natoacie = cie; + nhrp_info("NAT detected: really at proto %s nbma %s", + nhrp_address_format(&natcie->protocol_address, + sizeof(tmp), tmp), + nhrp_address_format(&natcie->nbma_address, + sizeof(nbma), nbma)); + } + } + if (natcie == NULL) + natcie = cie; + + if (nhrp_address_cmp(&peer->protocol_address, &cie->protocol_address) + == 0) { + /* Destination is within NBMA network; update cache */ + peer->mtu = ntohs(cie->hdr.mtu); + peer->prefix_length = cie->hdr.prefix_length; + peer->next_hop_address = natcie->nbma_address; + if (natoacie != NULL) + peer->next_hop_nat_oa = natoacie->nbma_address; + peer->expire_time = ev_now() + ntohs(cie->hdr.holding_time); + nhrp_address_set_network(&peer->protocol_address, + peer->prefix_length); + nhrp_peer_reinsert(peer, NHRP_PEER_TYPE_CACHED); + goto ret; + } + + /* Check that we won't replace a local address */ + sel = (struct nhrp_peer_selector) { + .flags = NHRP_PEER_FIND_EXACT, + .type_mask = BIT(NHRP_PEER_TYPE_LOCAL_ADDR), + .protocol_address = peer->protocol_address, + .prefix_length = cie->hdr.prefix_length, + }; + if (nhrp_peer_foreach(error_on_matching, NULL, &sel)) { + nhrp_error("Local route %s/%d exists: not replacing " + "with shortcut", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp), + cie->hdr.prefix_length); + peer->flags |= NHRP_PEER_FLAG_UP; + nhrp_peer_reinsert(peer, NHRP_PEER_TYPE_NEGATIVE); + goto ret; + } + + /* Update the received NBMA address to nexthop */ + iface = peer->interface; + np = nhrp_peer_route(iface, &cie->protocol_address, + NHRP_PEER_FIND_EXACT, 0); + if (np == NULL) { + np = nhrp_peer_alloc(iface); + np->type = NHRP_PEER_TYPE_CACHED; + np->afnum = reply->hdr.afnum; + np->protocol_type = reply->hdr.protocol_type; + np->protocol_address = cie->protocol_address; + np->next_hop_address = natcie->nbma_address; + if (natoacie != NULL) + np->next_hop_nat_oa = natoacie->nbma_address; + np->mtu = ntohs(cie->hdr.mtu); + np->prefix_length = cie->protocol_address.addr_len * 8; + np->expire_time = ev_now() + ntohs(cie->hdr.holding_time); + nhrp_peer_insert(np); + nhrp_peer_put(np); + } + + /* Off NBMA destination; a shortcut route */ + np = nhrp_peer_alloc(iface); + np->type = NHRP_PEER_TYPE_SHORTCUT_ROUTE; + np->afnum = reply->hdr.afnum; + np->protocol_type = reply->hdr.protocol_type; + np->protocol_address = peer->protocol_address; + np->prefix_length = cie->hdr.prefix_length; + np->next_hop_address = cie->protocol_address; + np->expire_time = ev_now() + ntohs(cie->hdr.holding_time); + nhrp_address_set_network(&np->protocol_address, np->prefix_length); + nhrp_peer_insert(np); + nhrp_peer_put(np); + + /* Delete the incomplete entry */ + nhrp_peer_remove(peer); +ret: + nhrp_peer_put(peer); +} + +static void nhrp_peer_send_resolve(struct nhrp_peer *peer) +{ + char dst[64]; + struct nhrp_packet *packet; + struct nhrp_cie *cie; + struct nhrp_payload *payload; + + packet = nhrp_packet_alloc(); + if (packet == NULL) + goto error; + + packet->hdr = (struct nhrp_packet_header) { + .afnum = peer->afnum, + .protocol_type = peer->protocol_type, + .version = NHRP_VERSION_RFC2332, + .type = NHRP_PACKET_RESOLUTION_REQUEST, + .hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT, + .flags = NHRP_FLAG_RESOLUTION_SOURCE_IS_ROUTER | + NHRP_FLAG_RESOLUTION_AUTHORATIVE | + NHRP_FLAG_RESOLUTION_NAT + }; + packet->dst_protocol_address = peer->protocol_address; + + /* Payload CIE */ + cie = nhrp_cie_alloc(); + if (cie == NULL) + goto error; + + *cie = (struct nhrp_cie) { + .hdr.code = NHRP_CODE_SUCCESS, + .hdr.prefix_length = 0, + .hdr.mtu = 0, + .hdr.holding_time = htons(peer->interface->holding_time), + }; + + payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_payload_add_cie(payload, cie); + + nhrp_info("Sending Resolution Request to %s", + nhrp_address_format(&peer->protocol_address, + sizeof(dst), dst)); + + /* Standard extensions */ + nhrp_packet_extension(packet, + NHRP_EXTENSION_FORWARD_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_packet_extension(packet, + NHRP_EXTENSION_REVERSE_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_packet_extension(packet, + NHRP_EXTENSION_RESPONDER_ADDRESS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_packet_extension(packet, + NHRP_EXTENSION_NAT_ADDRESS, + NHRP_PAYLOAD_TYPE_CIE_LIST); + + packet->dst_iface = peer->interface; + nhrp_packet_send_request(packet, + nhrp_peer_handle_resolution_reply, + nhrp_peer_get(peer)); + +error: + nhrp_packet_put(packet); +} + +struct nhrp_peer *nhrp_peer_alloc(struct nhrp_interface *iface) +{ + struct nhrp_peer *p; + + nhrp_peer_num_total++; + p = calloc(1, sizeof(struct nhrp_peer)); + p->ref = 1; + p->interface = iface; + list_init(&p->peer_list_entry); + list_init(&p->mcast_list_entry); + ev_timer_init(&p->timer, NULL, 0., 0.); + ev_child_init(&p->child, NULL, 0, 0); + + return p; +} + +struct nhrp_peer *nhrp_peer_get(struct nhrp_peer *peer) +{ + if (peer == NULL) + return NULL; + + peer->ref++; + nhrp_peer_debug_refcount(__FUNCTION__, peer); + + return peer; +} + +static void nhrp_peer_run_nhs_down(struct nhrp_peer *peer) +{ + if ((peer->flags & (NHRP_PEER_FLAG_REGISTER | + NHRP_PEER_FLAG_UP | + NHRP_PEER_FLAG_REPLACED)) + == (NHRP_PEER_FLAG_REGISTER | NHRP_PEER_FLAG_UP)) + nhrp_peer_run_script(peer, "nhs-down", NULL); +} + +static void nhrp_peer_release(struct nhrp_peer *peer) +{ + struct nhrp_interface *iface = peer->interface; + struct nhrp_peer_selector sel; + + nhrp_peer_cancel_async(peer); + + /* Remove from lists */ + if (list_hashed(&peer->mcast_list_entry)) + list_del(&peer->mcast_list_entry); + if (hlist_hashed(&peer->nbma_hash_entry)) + hlist_del(&peer->nbma_hash_entry); + + if (peer->parent != NULL) { + nhrp_peer_put(peer->parent); + peer->parent = NULL; + } + + switch (peer->type) { + case NHRP_PEER_TYPE_SHORTCUT_ROUTE: + if ((peer->flags & NHRP_PEER_FLAG_UP) && + !(peer->flags & NHRP_PEER_FLAG_REPLACED)) + nhrp_peer_run_script(peer, "route-down", NULL); + break; + case NHRP_PEER_TYPE_CACHED: + case NHRP_PEER_TYPE_DYNAMIC: + case NHRP_PEER_TYPE_STATIC: + case NHRP_PEER_TYPE_DYNAMIC_NHS: + if (peer->flags & NHRP_PEER_FLAG_REPLACED) + break; + + /* Remove cached routes using this entry as next-hop */ + memset(&sel, 0, sizeof(sel)); + sel.type_mask = BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE); + sel.interface = iface; + sel.next_hop_address = peer->protocol_address; + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, + &sel); + + /* Execute peer-down */ + nhrp_peer_run_nhs_down(peer); + if (peer->flags & NHRP_PEER_FLAG_UP) { + peer->purge_reason = "timeout"; + nhrp_peer_run_script(peer, "peer-down", NULL); + } + + /* Remove from arp cache */ + if (peer->protocol_address.type != PF_UNSPEC) + kernel_inject_neighbor(&peer->protocol_address, + NULL, peer->interface); + break; + case NHRP_PEER_TYPE_INCOMPLETE: + case NHRP_PEER_TYPE_NEGATIVE: + case NHRP_PEER_TYPE_LOCAL_ADDR: + case NHRP_PEER_TYPE_LOCAL_ROUTE: + case NHRP_PEER_TYPE_STATIC_DNS: + break; + default: + NHRP_BUG_ON("invalid peer type"); + break; + } + + if (peer->nbma_hostname) { + free(peer->nbma_hostname); + peer->nbma_hostname = NULL; + } + + free(peer); + nhrp_peer_num_total--; +} + +int nhrp_peer_put(struct nhrp_peer *peer) +{ + NHRP_BUG_ON(peer->ref == 0); + + peer->ref--; + nhrp_peer_debug_refcount(__FUNCTION__, peer); + + if (peer->ref > 0) + return FALSE; + + nhrp_peer_release(peer); + + return TRUE; +} + +static int nhrp_peer_mark_matching(void *ctx, struct nhrp_peer *peer) +{ + peer->flags |= NHRP_PEER_FLAG_MARK; + return 0; +} + +static int nhrp_peer_renew_nhs_matching(void *ctx, struct nhrp_peer *peer) +{ + peer->flags &= ~NHRP_PEER_FLAG_MARK; + return 1; +} + +static void nhrp_peer_dnsmap_query_cb(struct nhrp_address_query *query, + int num_addr, struct nhrp_address *addrs) +{ + struct nhrp_peer *np, *peer = + container_of(query, struct nhrp_peer, address_query); + struct nhrp_peer_selector sel; + int i; + + if (num_addr < 0) { + nhrp_error("Failed to resolve '%s'", peer->nbma_hostname); + nhrp_peer_schedule(peer, 10, nhrp_peer_dnsmap_restart_cb); + return; + } + + if (num_addr > 0) { + /* Refresh protocol */ + peer->afnum = nhrp_afnum_from_pf(addrs[0].type); + } + + /* Mark existing dynamic nhs entries as expired */ + memset(&sel, 0, sizeof(sel)); + sel.type_mask = BIT(NHRP_PEER_TYPE_DYNAMIC_NHS); + sel.interface = peer->interface; + sel.parent = peer; + nhrp_peer_foreach(nhrp_peer_mark_matching, NULL, &sel); + + for (i = 0; i < num_addr; i++) { + /* If this NBMA exists as dynamic NHS, mark it ok. */ + sel.next_hop_address = addrs[i]; + if (nhrp_peer_foreach(nhrp_peer_renew_nhs_matching, + NULL, &sel) != 0) + continue; + + /* New NHS, create a peer entry */ + np = nhrp_peer_alloc(peer->interface); + np->type = NHRP_PEER_TYPE_DYNAMIC_NHS; + np->flags |= NHRP_PEER_FLAG_REGISTER; + np->afnum = peer->afnum; + np->protocol_type = peer->protocol_type; + np->protocol_address = peer->protocol_address; + np->prefix_length = peer->prefix_length; + np->next_hop_address = addrs[i]; + np->parent = nhrp_peer_get(peer); + nhrp_address_set_network(&np->protocol_address, + np->prefix_length); + nhrp_peer_insert(np); + nhrp_peer_put(np); + } + + /* Delete all dynamic nhs:s that were not in the DNS reply */ + nhrp_address_set_type(&sel.next_hop_address, AF_UNSPEC); + sel.flags = NHRP_PEER_FIND_MARK; + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel); + + /* Refresh DNS info */ + nhrp_peer_schedule(peer, peer->interface->holding_time, + nhrp_peer_dnsmap_restart_cb); +} + +static void nhrp_peer_dnsmap_restart_cb(struct ev_timer *w, int revents) +{ + struct nhrp_peer *peer = container_of(w, struct nhrp_peer, timer); + + NHRP_BUG_ON(peer->nbma_hostname == NULL); + nhrp_address_resolve(&peer->address_query, peer->nbma_hostname, + nhrp_peer_dnsmap_query_cb); +} + +static void nhrp_peer_insert_cb(struct ev_timer *w, int revents) +{ + struct nhrp_peer *peer = container_of(w, struct nhrp_peer, timer); + + nhrp_peer_cancel_async(peer); + switch (peer->type) { + case NHRP_PEER_TYPE_LOCAL_ADDR: + peer->flags |= NHRP_PEER_FLAG_UP; + forward_local_addresses_changed(); + break; + case NHRP_PEER_TYPE_LOCAL_ROUTE: + peer->flags |= NHRP_PEER_FLAG_UP; + break; + case NHRP_PEER_TYPE_INCOMPLETE: + nhrp_peer_send_resolve(peer); + break; + case NHRP_PEER_TYPE_CACHED: + case NHRP_PEER_TYPE_DYNAMIC: + case NHRP_PEER_TYPE_STATIC: + case NHRP_PEER_TYPE_DYNAMIC_NHS: + nhrp_peer_restart_cb(w, 0); + break; + case NHRP_PEER_TYPE_STATIC_DNS: + nhrp_peer_dnsmap_restart_cb(w, 0); + break; + case NHRP_PEER_TYPE_SHORTCUT_ROUTE: + if (peer->flags & NHRP_PEER_FLAG_UP) + nhrp_peer_script_route_up_done(&peer->child, 0); + else if (nhrp_peer_route(peer->interface, + &peer->next_hop_address, + NHRP_PEER_FIND_UP | NHRP_PEER_FIND_EXACT, + NHRP_PEER_TYPEMASK_ADJACENT) != NULL) + nhrp_peer_run_script(peer, "route-up", + nhrp_peer_script_route_up_done); + else + nhrp_peer_schedule(peer, peer->expire_time - NHRP_EXPIRY_TIME + - 10 - ev_now(), nhrp_peer_expire_cb); + break; + case NHRP_PEER_TYPE_NEGATIVE: + peer->expire_time = ev_now() + NHRP_NEGATIVE_CACHE_TIME; + + if (peer->flags & NHRP_PEER_FLAG_UP) + kernel_inject_neighbor(&peer->protocol_address, + NULL, peer->interface); + nhrp_peer_schedule(peer, NHRP_NEGATIVE_CACHE_TIME, + nhrp_peer_remove_cb); + break; + default: + NHRP_BUG_ON("invalid peer type"); + break; + } +} + +static void nhrp_peer_reinsert(struct nhrp_peer *peer, int type) +{ + NHRP_BUG_ON((peer->type == NHRP_PEER_TYPE_LOCAL_ADDR) != + (type == NHRP_PEER_TYPE_LOCAL_ADDR)); + NHRP_BUG_ON((peer->type == NHRP_PEER_TYPE_LOCAL_ROUTE) != + (type == NHRP_PEER_TYPE_LOCAL_ROUTE)); + + peer->flags &= ~NHRP_PEER_FLAG_REMOVED; + peer->type = type; + nhrp_peer_insert_cb(&peer->timer, 0); +} + +static int nhrp_peer_replace_shortcut(void *ctx, struct nhrp_peer *peer) +{ + struct nhrp_peer *shortcut = (struct nhrp_peer *) ctx; + + /* Shortcut of identical prefix is replacement, either + * due to renewal, or new shortcut next-hop. */ + if (nhrp_address_cmp(&peer->protocol_address, + &shortcut->protocol_address) == 0 && + peer->prefix_length == shortcut->prefix_length) { + peer->flags |= NHRP_PEER_FLAG_REPLACED; + + /* If identical shortcut is being refreshed, + * mark the refresher peer entry up. */ + if ((peer->flags & NHRP_PEER_FLAG_UP) && + nhrp_address_cmp(&peer->next_hop_address, + &shortcut->next_hop_address) == 0) + shortcut->flags |= NHRP_PEER_FLAG_UP; + } + + /* Delete the old peer unconditionally */ + nhrp_peer_remove(peer); + + return 0; +} + +void nhrp_peer_insert(struct nhrp_peer *peer) +{ + struct nhrp_peer_selector sel; + char tmp[NHRP_PEER_FORMAT_LEN]; + + /* First, prune all duplicates */ + memset(&sel, 0, sizeof(sel)); + sel.interface = peer->interface; + sel.protocol_address = peer->protocol_address; + sel.prefix_length = peer->prefix_length; + switch (peer->type) { + case NHRP_PEER_TYPE_SHORTCUT_ROUTE: + /* remove all existing shortcuts with same nexthop */ + sel.flags = NHRP_PEER_FIND_SUBNET; + sel.type_mask |= BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE); + nhrp_peer_foreach(nhrp_peer_replace_shortcut, peer, &sel); + break; + case NHRP_PEER_TYPE_LOCAL_ROUTE: + sel.type_mask |= BIT(NHRP_PEER_TYPE_LOCAL_ROUTE); + default: + /* remove exact protocol address matches */ + sel.flags = NHRP_PEER_FIND_EXACT; + sel.type_mask |= NHRP_PEER_TYPEMASK_REMOVABLE; + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel); + break; + } + + /* Keep a reference as long as we are on the list */ + peer = nhrp_peer_get(peer); + nhrp_debug("Adding %s %s", + nhrp_peer_type[peer->type], + nhrp_peer_format(peer, sizeof(tmp), tmp)); + + if (peer->type == NHRP_PEER_TYPE_LOCAL_ADDR) + list_add(&peer->peer_list_entry, &local_peer_list); + else + list_add(&peer->peer_list_entry, &peer->interface->peer_list); + + /* Start peers life */ + if (nhrp_running || peer->type == NHRP_PEER_TYPE_LOCAL_ADDR) + nhrp_peer_insert_cb(&peer->timer, 0); + else + nhrp_peer_schedule(peer, 0, &nhrp_peer_insert_cb); +} + +static void nhrp_peer_script_peer_down_done(union nhrp_peer_event e, + int revents) +{ + struct nhrp_peer *peer = nhrp_peer_from_event(e, revents); + + nhrp_peer_schedule(peer, 5, nhrp_peer_restart_cb); +} + +void nhrp_peer_purge(struct nhrp_peer *peer, const char *purge_reason) +{ + switch (peer->type) { + case NHRP_PEER_TYPE_STATIC: + case NHRP_PEER_TYPE_DYNAMIC_NHS: + peer->purge_reason = purge_reason; + nhrp_peer_run_nhs_down(peer); + nhrp_peer_is_down(peer); + nhrp_peer_cancel_async(peer); + if (peer->flags & NHRP_PEER_FLAG_LOWER_UP) { + nhrp_peer_run_script(peer, "peer-down", + nhrp_peer_script_peer_down_done); + } else { + nhrp_peer_script_peer_down_done(&peer->child, 0); + } + nhrp_address_set_type(&peer->my_nbma_address, PF_UNSPEC); + break; + case NHRP_PEER_TYPE_STATIC_DNS: + nhrp_peer_schedule(peer, 0, nhrp_peer_dnsmap_restart_cb); + break; + default: + peer->purge_reason = purge_reason; + nhrp_peer_remove(peer); + break; + } +} + +int nhrp_peer_purge_matching(void *ctx, struct nhrp_peer *peer) +{ + int *count = (int *) ctx; + nhrp_peer_purge(peer, "user-request"); + if (count != NULL) + (*count)++; + return 0; +} + +int nhrp_peer_lowerdown_matching(void *ctx, struct nhrp_peer *peer) +{ + int *count = (int *) ctx; + nhrp_peer_purge(peer, "lower-down"); + if (count != NULL) + (*count)++; + return 0; +} + +static void nhrp_peer_remove_cb(struct ev_timer *w, int revents) +{ + struct nhrp_peer *peer = container_of(w, struct nhrp_peer, timer); + int type; + + peer->flags |= NHRP_PEER_FLAG_REMOVED; + peer->purge_reason = "expired"; + nhrp_peer_is_down(peer); + list_del(&peer->peer_list_entry); + + type = peer->type; + nhrp_peer_put(peer); + + if (type == NHRP_PEER_TYPE_LOCAL_ADDR) + forward_local_addresses_changed(); +} + +void nhrp_peer_remove(struct nhrp_peer *peer) +{ + char tmp[NHRP_PEER_FORMAT_LEN]; + + if (peer->flags & NHRP_PEER_FLAG_REMOVED) + return; + + nhrp_debug("Removing %s %s", + nhrp_peer_type[peer->type], + nhrp_peer_format(peer, sizeof(tmp), tmp)); + + peer->flags |= NHRP_PEER_FLAG_REMOVED; + nhrp_peer_is_down(peer); + nhrp_peer_cancel_async(peer); + nhrp_peer_schedule(peer, 0, nhrp_peer_remove_cb); +} + +int nhrp_peer_remove_matching(void *ctx, struct nhrp_peer *peer) +{ + int *count = (int *) ctx; + + nhrp_peer_remove(peer); + if (count != NULL) + (*count)++; + + return 0; +} + +int nhrp_peer_set_used_matching(void *ctx, struct nhrp_peer *peer) +{ + int used = (int) (intptr_t) ctx; + + if (used) { + peer->flags |= NHRP_PEER_FLAG_USED; + nhrp_peer_renew(peer); + } else { + peer->flags &= ~NHRP_PEER_FLAG_USED; + } + return 0; +} + +int nhrp_peer_match(struct nhrp_peer *p, struct nhrp_peer_selector *sel) +{ + if (sel->type_mask && !(sel->type_mask & BIT(p->type))) + return FALSE; + + if ((sel->flags & NHRP_PEER_FIND_UP) && + !(p->flags & NHRP_PEER_FLAG_UP)) + return FALSE; + + if ((sel->flags & NHRP_PEER_FIND_MARK) && + !(p->flags & NHRP_PEER_FLAG_MARK)) + return FALSE; + + if (sel->interface != NULL && + p->interface != sel->interface && + !(p->interface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST)) + return FALSE; + + if (sel->hostname != NULL && + (p->nbma_hostname == NULL || + strcmp(sel->hostname, p->nbma_hostname) != 0)) + return FALSE; + + if (sel->parent != NULL && + p->parent != sel->parent) + return FALSE; + + if (sel->protocol_address.type != PF_UNSPEC) { + if (sel->prefix_length == 0) + sel->prefix_length = sel->protocol_address.addr_len * 8; + + if (sel->flags & NHRP_PEER_FIND_EXACT) { + if (nhrp_address_cmp(&p->protocol_address, + &sel->protocol_address) != 0) + return FALSE; + + if (p->prefix_length != sel->prefix_length && + p->type != NHRP_PEER_TYPE_STATIC && + p->type != NHRP_PEER_TYPE_DYNAMIC_NHS) + return FALSE; + } else if (sel->flags & NHRP_PEER_FIND_ROUTE) { + if (nhrp_address_prefix_cmp(&p->protocol_address, + &sel->protocol_address, + p->prefix_length) != 0) + return FALSE; + } else { + if (p->prefix_length < sel->prefix_length) { + if (sel->prefix_length + == sel->protocol_address.addr_len * 8 && + nhrp_address_cmp(&p->protocol_address, + &sel->protocol_address) + == 0) + return TRUE; + + return FALSE; + } + + if (nhrp_address_prefix_cmp(&p->protocol_address, + &sel->protocol_address, + sel->prefix_length) != 0) + return FALSE; + } + } + + if (sel->next_hop_address.type != PF_UNSPEC) { + if (nhrp_address_cmp(&p->next_hop_address, + &sel->next_hop_address) != 0) + return FALSE; + } + + return TRUE; +} + +struct enum_interface_peers_ctx { + nhrp_peer_enumerator enumerator; + void *ctx; + struct nhrp_peer_selector *sel; +}; + +static int enumerate_peer_cache(struct list_head *peer_cache, + nhrp_peer_enumerator e, void *ctx, + struct nhrp_peer_selector *sel) +{ + struct nhrp_peer *p; + int rc = 0; + + list_for_each_entry(p, peer_cache, peer_list_entry) { + if (p->flags & NHRP_PEER_FLAG_REMOVED) + continue; + + if (sel == NULL || nhrp_peer_match(p, sel)) { + rc = e(ctx, p); + if (rc != 0) + break; + } + } + + return rc; +} + +static int enum_interface_peers(void *ctx, struct nhrp_interface *iface) +{ + struct enum_interface_peers_ctx *ectx = + (struct enum_interface_peers_ctx *) ctx; + + return enumerate_peer_cache(&iface->peer_list, + ectx->enumerator, ectx->ctx, + ectx->sel); +} + +int nhrp_peer_foreach(nhrp_peer_enumerator e, void *ctx, + struct nhrp_peer_selector *sel) +{ + struct nhrp_interface *iface = NULL; + struct enum_interface_peers_ctx ectx = { e, ctx, sel }; + int rc; + + if (sel != NULL) + iface = sel->interface; + + rc = enumerate_peer_cache(&local_peer_list, e, ctx, sel); + if (rc != 0) + return rc; + + /* Speed optimization: TYPE_LOCAL peers cannot be found from + * other places */ + if (sel != NULL && + sel->type_mask == BIT(NHRP_PEER_TYPE_LOCAL_ADDR)) + return 0; + + if (iface == NULL) + rc = nhrp_interface_foreach(enum_interface_peers, &ectx); + else + rc = enumerate_peer_cache(&iface->peer_list, e, ctx, sel); + + return rc; +} + +struct route_decision { + struct nhrp_peer_selector sel; + struct list_head *exclude; + struct nhrp_peer *best_found; + struct nhrp_address *src; + int found_exact, found_up; +}; + +static int decide_route(void *ctx, struct nhrp_peer *peer) +{ + struct route_decision *rd = (struct route_decision *) ctx; + int exact; + + if (peer->type != NHRP_PEER_TYPE_SHORTCUT_ROUTE) { + /* Exclude addresses from CIE from routing decision + * to avoid routing loops within NHS clusters. */ + if (rd->exclude != NULL && + nhrp_address_match_cie_list(&peer->next_hop_address, + &peer->protocol_address, + rd->exclude)) + return 0; + + /* Exclude also source address, we don't want to + * forward questions back to who's asking. */ + if (rd->src != NULL && + nhrp_address_cmp(rd->src, &peer->protocol_address) == 0) + return 0; + } else { + /* Exclude routes that point back to the sender + * of the packet */ + if (rd->src != NULL && + nhrp_address_cmp(rd->src, &peer->next_hop_address) == 0) + return 0; + } + + exact = (peer->type >= NHRP_PEER_TYPE_DYNAMIC_NHS) && + (nhrp_address_cmp(&peer->protocol_address, + &rd->sel.protocol_address) == 0); + if (rd->found_exact > exact) + return 0; + + if (rd->found_up && !(peer->flags & NHRP_PEER_FLAG_UP)) + return 0; + + if (rd->best_found != NULL && + rd->found_exact == exact && + rd->found_up == (peer->flags & NHRP_PEER_FLAG_UP)) { + if (rd->best_found->prefix_length > peer->prefix_length) + return 0; + + if (rd->best_found->prefix_length == peer->prefix_length && + rd->best_found->last_used < peer->last_used) + return 0; + } + + rd->best_found = peer; + rd->found_exact = exact; + rd->found_up = peer->flags & NHRP_PEER_FLAG_UP; + return 0; +} + +struct nhrp_peer *nhrp_peer_route_full(struct nhrp_interface *interface, + struct nhrp_address *dst, + int flags, int type_mask, + struct nhrp_address *src, + struct list_head *exclude) +{ + struct route_decision rd; + + memset(&rd, 0, sizeof(rd)); + rd.sel.flags = flags & ~NHRP_PEER_FIND_UP; + if ((flags & (NHRP_PEER_FIND_ROUTE | NHRP_PEER_FIND_EXACT | + NHRP_PEER_FIND_SUBNET)) == 0) + rd.sel.flags |= NHRP_PEER_FIND_ROUTE; + rd.sel.type_mask = type_mask; + rd.sel.interface = interface; + rd.sel.protocol_address = *dst; + rd.exclude = exclude; + rd.src = src; + nhrp_peer_foreach(decide_route, &rd, &rd.sel); + + if (rd.best_found == NULL) + return NULL; + + if ((flags & NHRP_PEER_FIND_UP) && + !(rd.best_found->flags & NHRP_PEER_FLAG_UP)) + return NULL; + + rd.best_found->last_used = ev_now(); + return rd.best_found; +} + +void nhrp_peer_traffic_indication(struct nhrp_interface *iface, + uint16_t afnum, struct nhrp_address *dst) +{ + struct nhrp_peer *peer; + int type; + + /* For off-NBMA destinations, we consider all shortcut routes, + * but NBMA destinations should be exact because we want to drop + * NHS from the path. */ + if (nhrp_address_prefix_cmp(dst, &iface->protocol_address, + iface->protocol_address_prefix) != 0) + type = NHRP_PEER_FIND_ROUTE; + else + type = NHRP_PEER_FIND_EXACT; + + /* Have we done something for this destination already? */ + peer = nhrp_peer_route(iface, dst, type, + ~BIT(NHRP_PEER_TYPE_LOCAL_ROUTE)); + if (peer != NULL) + return; + + /* Initiate resolution */ + peer = nhrp_peer_alloc(iface); + peer->type = NHRP_PEER_TYPE_INCOMPLETE; + peer->afnum = afnum; + peer->protocol_type = nhrp_protocol_from_pf(dst->type); + peer->protocol_address = *dst; + peer->prefix_length = dst->addr_len * 8; + nhrp_peer_insert(peer); + nhrp_peer_put(peer); +} + +static int dump_peer(void *ctx, struct nhrp_peer *peer) +{ + int *num_total = (int *) ctx; + char tmp[NHRP_PEER_FORMAT_LEN]; + + nhrp_info("%s %s", + nhrp_peer_type[peer->type], + nhrp_peer_format(peer, sizeof(tmp), tmp)); + (*num_total)++; + return 0; +} + +void nhrp_peer_dump_cache(void) +{ + int num_total = 0; + + nhrp_info("Peer cache dump:"); + nhrp_peer_foreach(dump_peer, &num_total, NULL); + nhrp_info("Total %d peer cache entries, %d allocated entries", + num_total, nhrp_peer_num_total); +} + +void nhrp_peer_cleanup(void) +{ + ev_tstamp prev = ev_now(); + + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, NULL); + + while (nhrp_peer_num_total > 0) { + if (ev_now() > prev + 5.0) { + nhrp_info("Waiting for peers to die, %d left", nhrp_peer_num_total); + prev = ev_now(); + } + ev_loop(EVLOOP_ONESHOT); + } +} diff --git a/nhrp/nhrp_peer.h b/nhrp/nhrp_peer.h new file mode 100644 index 0000000..dea8d66 --- /dev/null +++ b/nhrp/nhrp_peer.h @@ -0,0 +1,194 @@ +/* nhrp_peer.h - NHRP peer cache definitions + * + * Copyright (C) 2007-2009 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#ifndef NHRP_PEER_H +#define NHRP_PEER_H + +#include +#include +#include +#include "nhrp_address.h" +#include "libev.h" +#include "list.h" + +#define NHRP_PEER_TYPE_INCOMPLETE 0x00 /* Resolution request sent */ +#define NHRP_PEER_TYPE_NEGATIVE 0x01 /* Negative cached */ +#define NHRP_PEER_TYPE_CACHED 0x02 /* Received/relayed resolution reply */ +#define NHRP_PEER_TYPE_SHORTCUT_ROUTE 0x03 /* Received/relayed resolution for route */ +#define NHRP_PEER_TYPE_DYNAMIC 0x04 /* NHC registration */ +#define NHRP_PEER_TYPE_DYNAMIC_NHS 0x05 /* Dynamic NHS from dns-map */ +#define NHRP_PEER_TYPE_STATIC 0x06 /* Static mapping from config file */ +#define NHRP_PEER_TYPE_STATIC_DNS 0x07 /* Static dns-map from config file */ +#define NHRP_PEER_TYPE_LOCAL_ROUTE 0x08 /* Non-local destination, with local route */ +#define NHRP_PEER_TYPE_LOCAL_ADDR 0x09 /* Local destination (IP or off-NBMA subnet) */ +#define NHRP_PEER_TYPE_MAX (NHRP_PEER_TYPE_LOCAL_ADDR+1) + +#define NHRP_PEER_TYPEMASK_ADJACENT \ + (BIT(NHRP_PEER_TYPE_CACHED) | \ + BIT(NHRP_PEER_TYPE_DYNAMIC) | \ + BIT(NHRP_PEER_TYPE_DYNAMIC_NHS) | \ + BIT(NHRP_PEER_TYPE_STATIC) | \ + BIT(NHRP_PEER_TYPE_LOCAL_ADDR)) + +#define NHRP_PEER_TYPEMASK_REMOVABLE \ + (BIT(NHRP_PEER_TYPE_INCOMPLETE) | \ + BIT(NHRP_PEER_TYPE_NEGATIVE) | \ + BIT(NHRP_PEER_TYPE_CACHED) | \ + BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE) | \ + BIT(NHRP_PEER_TYPE_DYNAMIC)) + +#define NHRP_PEER_TYPEMASK_PURGEABLE \ + (NHRP_PEER_TYPEMASK_REMOVABLE | \ + BIT(NHRP_PEER_TYPE_DYNAMIC_NHS) | \ + BIT(NHRP_PEER_TYPE_STATIC) | \ + BIT(NHRP_PEER_TYPE_STATIC_DNS)) + +#define NHRP_PEER_TYPEMASK_ALL \ + (NHRP_PEER_TYPEMASK_PURGEABLE | \ + BIT(NHRP_PEER_TYPE_LOCAL_ROUTE) | \ + BIT(NHRP_PEER_TYPE_LOCAL_ADDR)) + +/* For routing via NHS */ +#define NHRP_PEER_TYPEMASK_ROUTE_VIA_NHS \ + (BIT(NHRP_PEER_TYPE_DYNAMIC) | \ + BIT(NHRP_PEER_TYPE_DYNAMIC_NHS) | \ + BIT(NHRP_PEER_TYPE_STATIC) | \ + BIT(NHRP_PEER_TYPE_LOCAL_ROUTE) | \ + BIT(NHRP_PEER_TYPE_LOCAL_ADDR)) + +#define NHRP_PEER_FLAG_UNIQUE 0x01 /* Peer is unique; see RFC2332 */ +#define NHRP_PEER_FLAG_REGISTER 0x02 /* For TYPE_STATIC: send registration */ +#define NHRP_PEER_FLAG_CISCO 0x04 /* For TYPE_STATIC: peer is Cisco */ +#define NHRP_PEER_FLAG_USED 0x10 /* Peer is in kernel ARP table */ +#define NHRP_PEER_FLAG_LOWER_UP 0x20 /* Script executed succesfully */ +#define NHRP_PEER_FLAG_UP 0x40 /* Can send all packets (registration ok) */ +#define NHRP_PEER_FLAG_REPLACED 0x80 /* Peer has been replaced */ +#define NHRP_PEER_FLAG_REMOVED 0x100 /* Deleted, but not removed from cache yet */ +#define NHRP_PEER_FLAG_MARK 0x200 /* Can be used to temporarily mark peers */ + +#define NHRP_PEER_FIND_ROUTE 0x01 +#define NHRP_PEER_FIND_EXACT 0x02 +#define NHRP_PEER_FIND_SUBNET 0x04 +#define NHRP_PEER_FIND_UP 0x10 +#define NHRP_PEER_FIND_MARK 0x20 + +struct nhrp_interface; +struct nhrp_packet; +struct nhrp_pending_request; + +union __attribute__ ((__transparent_union__)) nhrp_peer_event { + struct ev_timer *timer; + struct ev_child *child; +}; + +struct nhrp_peer { + unsigned int ref; + unsigned int flags; + + struct list_head peer_list_entry; + struct list_head mcast_list_entry; + struct hlist_node nbma_hash_entry; + + const char *purge_reason; + struct nhrp_interface *interface; + struct nhrp_peer *parent; + struct nhrp_packet *queued_packet; + struct nhrp_pending_request *request; + + struct ev_timer timer; + struct ev_child child; + struct nhrp_address_query address_query; + + uint8_t type; + uint8_t prefix_length; + uint16_t afnum; + uint16_t protocol_type; + uint16_t mtu, my_nbma_mtu; + ev_tstamp expire_time; + ev_tstamp last_used; + struct nhrp_address my_nbma_address; + struct nhrp_address protocol_address; + unsigned int holding_time; + + char *nbma_hostname; + /* NHRP_PEER_TYPE_ROUTE: protocol addr., others: NBMA addr. */ + struct nhrp_address next_hop_address; + struct nhrp_address next_hop_nat_oa; +}; + +struct nhrp_peer_selector { + int flags; /* NHRP_PEER_FIND_xxx */ + int type_mask; + + struct nhrp_interface *interface; + struct nhrp_peer *parent; + const char *hostname; + + int prefix_length; + struct nhrp_address protocol_address; + struct nhrp_address next_hop_address; +}; + +const char * const nhrp_peer_type[NHRP_PEER_TYPE_MAX]; +typedef int (*nhrp_peer_enumerator)(void *ctx, struct nhrp_peer *peer); + +void nhrp_peer_cleanup(void); + +struct nhrp_peer *nhrp_peer_alloc(struct nhrp_interface *iface); +struct nhrp_peer *nhrp_peer_get(struct nhrp_peer *peer); +int nhrp_peer_put(struct nhrp_peer *peer); +void nhrp_peer_cancel_async(struct nhrp_peer *peer); + +void nhrp_peer_insert(struct nhrp_peer *peer); +void nhrp_peer_remove(struct nhrp_peer *peer); +void nhrp_peer_purge(struct nhrp_peer *peer, const char *purge_reason); + +int nhrp_peer_match(struct nhrp_peer *peer, struct nhrp_peer_selector *sel); + +int nhrp_peer_foreach(nhrp_peer_enumerator e, void *ctx, + struct nhrp_peer_selector *sel); +int nhrp_peer_remove_matching(void *count, struct nhrp_peer *peer); +int nhrp_peer_purge_matching(void *count, struct nhrp_peer *peer); +int nhrp_peer_lowerdown_matching(void *count, struct nhrp_peer *peer); +int nhrp_peer_set_used_matching(void *ctx, struct nhrp_peer *peer); +struct nhrp_peer *nhrp_peer_find_by_nbma(struct nhrp_interface *iface, struct nhrp_address *nbma); + +int nhrp_peer_event_ok(union nhrp_peer_event e, int revents); +char *nhrp_peer_event_reason(union nhrp_peer_event e, int revents, + size_t buflen, char *buf); +struct nhrp_peer *nhrp_peer_from_event(union nhrp_peer_event e, int revents); +void nhrp_peer_run_script(struct nhrp_peer *peer, char *action, + void (*cb)(union nhrp_peer_event, int)); +void nhrp_peer_send_packet_queue(struct nhrp_peer *peer); +int nhrp_peer_discover_nhs(struct nhrp_peer *peer, + struct nhrp_address *newaddr); + +struct nhrp_peer *nhrp_peer_route_full(struct nhrp_interface *iface, + struct nhrp_address *dest, + int flags, int type_mask, + struct nhrp_address *source, + struct list_head *exclude_cie_list); + +static inline struct nhrp_peer *nhrp_peer_route(struct nhrp_interface *iface, + struct nhrp_address *dest, + int flags, int type_mask) +{ + return nhrp_peer_route_full(iface, dest, flags, type_mask, NULL, NULL); +} + +void nhrp_peer_traffic_indication(struct nhrp_interface *iface, + uint16_t afnum, struct nhrp_address *dst); +void nhrp_peer_dump_cache(void); + +void nhrp_server_finish_request(struct nhrp_pending_request *pr); + +#endif diff --git a/nhrp/nhrp_protocol.h b/nhrp/nhrp_protocol.h new file mode 100644 index 0000000..8cf213b --- /dev/null +++ b/nhrp/nhrp_protocol.h @@ -0,0 +1,130 @@ +/* nhrp_protocol.h - NHRP protocol definitions + * + * Copyright (C) 2007 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#ifndef NHRP_PROTOCOL_H +#define NHRP_PROTOCOL_H + +#include +#include "afnum.h" + +/* NHRP Version */ +#define NHRP_VERSION_RFC2332 1 + +/* NHRP Packet Types */ +#define NHRP_PACKET_RESOLUTION_REQUEST 1 +#define NHRP_PACKET_RESOLUTION_REPLY 2 +#define NHRP_PACKET_REGISTRATION_REQUEST 3 +#define NHRP_PACKET_REGISTRATION_REPLY 4 +#define NHRP_PACKET_PURGE_REQUEST 5 +#define NHRP_PACKET_PURGE_REPLY 6 +#define NHRP_PACKET_ERROR_INDICATION 7 +#define NHRP_PACKET_TRAFFIC_INDICATION 8 + +/* NHRP Extension Types */ +#define NHRP_EXTENSION_FLAG_COMPULSORY 0x8000 +#define NHRP_EXTENSION_END 0 +#define NHRP_EXTENSION_PAYLOAD 0 +#define NHRP_EXTENSION_RESPONDER_ADDRESS 3 +#define NHRP_EXTENSION_FORWARD_TRANSIT_NHS 4 +#define NHRP_EXTENSION_REVERSE_TRANSIT_NHS 5 +#define NHRP_EXTENSION_AUTHENTICATION 7 +#define NHRP_EXTENSION_VENDOR 8 +#define NHRP_EXTENSION_NAT_ADDRESS 9 + +/* NHRP Error Indication Codes */ +#define NHRP_ERROR_UNRECOGNIZED_EXTENSION constant_htons(1) +#define NHRP_ERROR_LOOP_DETECTED constant_htons(2) +#define NHRP_ERROR_PROTOCOL_ADDRESS_UNREACHABLE constant_htons(6) +#define NHRP_ERROR_PROTOCOL_ERROR constant_htons(7) +#define NHRP_ERROR_SDU_SIZE_EXCEEDED constant_htons(8) +#define NHRP_ERROR_INVALID_EXTENSION constant_htons(9) +#define NHRP_ERROR_INVALID_RESOLUTION_REPLY constant_htons(10) +#define NHRP_ERROR_AUTHENTICATION_FAILURE constant_htons(11) +#define NHRP_ERROR_HOP_COUNT_EXCEEDED constant_htons(15) + +/* NHRP CIE Codes */ +#define NHRP_CODE_SUCCESS 0 +#define NHRP_CODE_ADMINISTRATIVELY_PROHIBITED 4 +#define NHRP_CODE_INSUFFICIENT_RESOURCES 5 +#define NHRP_CODE_NO_BINDING_EXISTS 11 +#define NHRP_CODE_BINDING_NON_UNIQUE 13 +#define NHRP_CODE_UNIQUE_ADDRESS_REGISTERED 14 + +/* NHRP Flags for Resolution request/reply */ +#define NHRP_FLAG_RESOLUTION_SOURCE_IS_ROUTER constant_htons(0x8000) +#define NHRP_FLAG_RESOLUTION_AUTHORATIVE constant_htons(0x4000) +#define NHRP_FLAG_RESOLUTION_DESTINATION_STABLE constant_htons(0x2000) +#define NHRP_FLAG_RESOLUTION_UNIQUE constant_htons(0x1000) +#define NHRP_FLAG_RESOLUTION_SOURCE_STABLE constant_htons(0x0800) +#define NHRP_FLAG_RESOLUTION_NAT constant_htons(0x0002) + +/* NHRP Flags for Registration request/reply */ +#define NHRP_FLAG_REGISTRATION_UNIQUE constant_htons(0x8000) +#define NHRP_FLAG_REGISTRATION_NAT constant_htons(0x0002) + +/* NHRP Flags for Purge request/reply */ +#define NHRP_FLAG_PURGE_NO_REPLY constant_htons(0x8000) + +/* NHRP Authentication extension types (ala Cisco) */ +#define NHRP_AUTHENTICATION_PLAINTEXT constant_htonl(0x00000001) + +/* NHRP Packet Structures */ +struct nhrp_packet_header { + /* Fixed header */ + uint16_t afnum; + uint16_t protocol_type; + uint8_t snap[5]; + uint8_t hop_count; + uint16_t packet_size; + uint16_t checksum; + uint16_t extension_offset; + uint8_t version; + uint8_t type; + uint8_t src_nbma_address_len; + uint8_t src_nbma_subaddress_len; + + /* Mandatory header */ + uint8_t src_protocol_address_len; + uint8_t dst_protocol_address_len; + uint16_t flags; + union { + uint32_t request_id; + struct { + uint16_t code; + uint16_t offset; + } error; + } u; +}; + +struct nhrp_cie_header { + uint8_t code; + uint8_t prefix_length; + uint16_t unused; + uint16_t mtu; + uint16_t holding_time; + uint8_t nbma_address_len; + uint8_t nbma_subaddress_len; + uint8_t protocol_address_len; + uint8_t preference; +}; + +struct nhrp_extension_header { + uint16_t type; + uint16_t length; +}; + +struct nhrp_cisco_authentication_extension { + uint32_t type; + uint8_t secret[8]; +}; + +#endif diff --git a/nhrp/nhrp_server.c b/nhrp/nhrp_server.c new file mode 100644 index 0000000..b41e4b8 --- /dev/null +++ b/nhrp/nhrp_server.c @@ -0,0 +1,566 @@ +/* nhrp_server.c - NHRP request handling + * + * Copyright (C) 2007-2009 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include +#include + +#include "nhrp_common.h" +#include "nhrp_packet.h" +#include "nhrp_interface.h" +#include "nhrp_peer.h" + +#define NHRP_MAX_PENDING_REQUESTS 16 + +struct nhrp_pending_request { + struct list_head request_list_entry; + int natted; + int num_ok, num_error; + struct nhrp_packet *packet; + struct nhrp_cie *cie; + struct nhrp_payload *payload; + struct nhrp_peer *peer, *rpeer; + ev_tstamp now; +}; + +static struct list_head request_list = LIST_INITIALIZER(request_list); +static int num_pending_requests = 0; + +static void nhrp_server_start_cie_reg(struct nhrp_pending_request *pr); + +static struct nhrp_pending_request * +nhrp_server_record_request(struct nhrp_packet *packet) +{ + struct nhrp_pending_request *pr; + + pr = calloc(1, sizeof(struct nhrp_pending_request)); + list_init(&pr->request_list_entry); + if (pr != NULL) { + num_pending_requests++; + list_add(&pr->request_list_entry, &request_list); + pr->packet = nhrp_packet_get(packet); + pr->now = ev_now(); + } + return pr; +} + +void nhrp_server_finish_request(struct nhrp_pending_request *pr) +{ + list_del(&pr->request_list_entry); + if (pr->rpeer != NULL) { + struct nhrp_peer *peer = pr->rpeer; + if (peer->flags & NHRP_PEER_FLAG_REPLACED) { + /* The route peer entry was not accepted. We still + * send the replies here, and cancel anything pending + * so it'll get deleted cleanly on next put(). */ + nhrp_peer_send_packet_queue(peer); + nhrp_peer_cancel_async(peer); + } + nhrp_peer_put(pr->rpeer); + } + if (pr->peer != NULL) + nhrp_peer_put(pr->peer); + if (pr->packet != NULL) + nhrp_packet_put(pr->packet); + free(pr); + num_pending_requests--; +} + +static int nhrp_server_request_pending(struct nhrp_packet *packet) +{ + struct nhrp_pending_request *r; + + list_for_each_entry(r, &request_list, request_list_entry) { + if (nhrp_address_cmp(&packet->src_nbma_address, + &r->packet->src_nbma_address) != 0) + continue; + if (nhrp_address_cmp(&packet->src_protocol_address, + &r->packet->src_protocol_address) != 0) + continue; + if (nhrp_address_cmp(&packet->dst_protocol_address, + &r->packet->dst_protocol_address) != 0) + continue; + + /* Request from the same address being already processed */ + return TRUE; + } + + return FALSE; +} + +static int nhrp_handle_resolution_request(struct nhrp_packet *packet) +{ + char tmp[64], tmp2[64]; + struct nhrp_payload *payload; + struct nhrp_peer *peer = packet->dst_peer; + struct nhrp_peer_selector sel; + struct nhrp_cie *cie; + + nhrp_info("Received Resolution Request from proto src %s to %s", + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp), tmp), + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp2), tmp2)); + + /* As first thing, flush all negative entries for the + * requestor */ + memset(&sel, 0, sizeof(sel)); + sel.flags = NHRP_PEER_FIND_EXACT; + sel.type_mask = BIT(NHRP_PEER_TYPE_NEGATIVE); + sel.interface = packet->src_iface; + sel.protocol_address = packet->src_protocol_address; + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel); + + /* Send reply */ + packet->hdr.type = NHRP_PACKET_RESOLUTION_REPLY; + packet->hdr.hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT; + packet->hdr.flags &= NHRP_FLAG_RESOLUTION_SOURCE_IS_ROUTER | + NHRP_FLAG_RESOLUTION_SOURCE_STABLE | + NHRP_FLAG_RESOLUTION_UNIQUE | + NHRP_FLAG_RESOLUTION_NAT; + packet->hdr.flags |= NHRP_FLAG_RESOLUTION_DESTINATION_STABLE | + NHRP_FLAG_RESOLUTION_AUTHORATIVE; + + cie = nhrp_cie_alloc(); + if (cie == NULL) + return FALSE; + + cie->hdr = (struct nhrp_cie_header) { + .code = NHRP_CODE_SUCCESS, + .prefix_length = peer->prefix_length, + }; + if (peer->holding_time) + cie->hdr.holding_time = htons(peer->holding_time); + else if (peer->interface != NULL) + cie->hdr.holding_time = htons(peer->interface->holding_time); + else + cie->hdr.holding_time = NHRP_DEFAULT_HOLDING_TIME; + + payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_ANY); + nhrp_payload_free(payload); + nhrp_payload_set_type(payload, NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_payload_add_cie(payload, cie); + + if (!nhrp_packet_reroute(packet, NULL)) + return FALSE; + + peer = packet->dst_peer; + cie->hdr.mtu = htons(peer->my_nbma_mtu); + cie->nbma_address = peer->my_nbma_address; + cie->protocol_address = packet->dst_iface->protocol_address; + + nhrp_info("Sending Resolution Reply %s/%d is-at %s (holdtime %d)", + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp), tmp), + cie->hdr.prefix_length, + nhrp_address_format(&cie->nbma_address, + sizeof(tmp2), tmp2), + ntohs(cie->hdr.holding_time)); + + /* Reset NAT header to regenerate it for reply */ + payload = nhrp_packet_extension(packet, + NHRP_EXTENSION_NAT_ADDRESS | + NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_ANY); + if (payload != NULL) { + nhrp_payload_free(payload); + nhrp_payload_set_type(payload, NHRP_PAYLOAD_TYPE_CIE_LIST); + } + + return nhrp_packet_send(packet); +} + +static int find_one(void *ctx, struct nhrp_peer *p) +{ + return 1; +} + +static int remove_old_registrations(void *ctx, struct nhrp_peer *p) +{ + struct nhrp_peer *peer = (struct nhrp_peer *) ctx; + + /* If re-registration, mark the new connection up */ + if (nhrp_address_cmp(&peer->protocol_address, + &p->protocol_address) == 0 && + nhrp_address_cmp(&peer->next_hop_address, + &p->next_hop_address) == 0 && + peer->prefix_length == p->prefix_length) + peer->flags |= p->flags & (NHRP_PEER_FLAG_UP | + NHRP_PEER_FLAG_LOWER_UP); + + p->flags |= NHRP_PEER_FLAG_REPLACED; + nhrp_peer_remove(p); + return 0; +} + +static void nhrp_server_finish_reg(struct nhrp_pending_request *pr) +{ + char tmp[64], tmp2[64]; + struct nhrp_packet *packet = pr->packet; + + if (pr->rpeer != NULL && + nhrp_packet_reroute(packet, pr->rpeer)) { + nhrp_info("Sending Registration Reply from proto src %s to %s (%d bindings accepted, %d rejected)", + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp), tmp), + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp2), tmp2), + pr->num_ok, pr->num_error); + + nhrp_packet_send(packet); + } else { + /* We could not create route peer entry (likely out of memory), + * so we can't do much more here. */ + nhrp_info("Dropping Registration Reply from proto src %s to %s", + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp), tmp), + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp2), tmp2)); + } + + nhrp_server_finish_request(pr); +} + +static void nhrp_server_finish_cie_reg_cb(union nhrp_peer_event e, int revents) +{ + struct nhrp_peer *peer; + struct nhrp_pending_request *pr; + struct nhrp_packet *packet; + struct nhrp_cie *cie; + struct nhrp_peer_selector sel; + char tmp[64], reason[32]; + + peer = nhrp_peer_from_event(e, revents); + pr = peer->request; + packet = pr->packet; + cie = pr->cie; + + peer->request = NULL; + nhrp_address_format(&peer->protocol_address, sizeof(tmp), tmp); + if (revents != 0 && nhrp_peer_event_ok(e, revents)) { + nhrp_debug("[%s] Peer registration authorized", tmp); + + /* Remove all old stuff and accept registration */ + memset(&sel, 0, sizeof(sel)); + sel.flags = NHRP_PEER_FIND_EXACT; + sel.type_mask = NHRP_PEER_TYPEMASK_REMOVABLE; + sel.interface = packet->src_iface; + sel.protocol_address = peer->protocol_address; + sel.prefix_length = peer->prefix_length; + nhrp_peer_foreach(remove_old_registrations, peer, &sel); + + pr->num_ok++; + cie->hdr.code = NHRP_CODE_SUCCESS; + nhrp_peer_insert(peer); + } else { + if (revents == 0) + nhrp_error("[%s] Peer registration failed: " + "static entry exists", tmp); + else + nhrp_error("[%s] Peer registration failed: %s", + tmp, + nhrp_peer_event_reason(e, revents, + sizeof(reason), + reason)); + pr->num_error++; + cie->hdr.code = NHRP_CODE_ADMINISTRATIVELY_PROHIBITED; + peer->flags |= NHRP_PEER_FLAG_REPLACED; + } + if (pr->rpeer == NULL) + pr->rpeer = nhrp_peer_get(peer); + + nhrp_peer_put(peer); + pr->peer = NULL; + + /* Process next CIE or finish registration handling */ + if (cie->cie_list_entry.next != &pr->payload->u.cie_list) { + pr->cie = list_next(&cie->cie_list_entry, struct nhrp_cie, cie_list_entry); + nhrp_server_start_cie_reg(pr); + } else { + nhrp_server_finish_reg(pr); + } + +} + +static void nhrp_server_start_cie_reg(struct nhrp_pending_request *pr) +{ + struct nhrp_cie *cie = pr->cie; + struct nhrp_packet *packet = pr->packet; + struct nhrp_peer *peer; + struct nhrp_peer_selector sel; + + peer = nhrp_peer_alloc(packet->src_iface); + if (peer == NULL) { + /* Mark all remaining registration requests as failed + * due to lack of memory, and send reply */ + for (; cie->cie_list_entry.next != &pr->payload->u.cie_list; + cie = list_next(&cie->cie_list_entry, struct nhrp_cie, cie_list_entry)) { + pr->num_error++; + cie->hdr.code = NHRP_CODE_INSUFFICIENT_RESOURCES; + } + pr->num_error++; + cie->hdr.code = NHRP_CODE_INSUFFICIENT_RESOURCES; + nhrp_server_finish_reg(pr); + return; + } + + peer->type = NHRP_PEER_TYPE_DYNAMIC; + peer->afnum = packet->hdr.afnum; + peer->protocol_type = packet->hdr.protocol_type; + peer->expire_time = pr->now + ntohs(cie->hdr.holding_time); + peer->mtu = ntohs(cie->hdr.mtu); + if (cie->nbma_address.addr_len != 0) + peer->next_hop_address = cie->nbma_address; + else + peer->next_hop_address = packet->src_nbma_address; + + if (pr->natted) { + peer->next_hop_nat_oa = peer->next_hop_address; + peer->next_hop_address = packet->src_linklayer_address; + } + + if (cie->protocol_address.addr_len != 0) + peer->protocol_address = cie->protocol_address; + else + peer->protocol_address = packet->src_protocol_address; + + peer->prefix_length = cie->hdr.prefix_length; + if (peer->prefix_length == 0xff) + peer->prefix_length = peer->protocol_address.addr_len * 8; + + memset(&sel, 0, sizeof(sel)); + sel.flags = NHRP_PEER_FIND_EXACT; + sel.type_mask = ~NHRP_PEER_TYPEMASK_REMOVABLE; + sel.interface = packet->src_iface; + sel.protocol_address = peer->protocol_address; + sel.prefix_length = peer->prefix_length; + + /* Link the created peer and pending request structures */ + pr->peer = peer; + peer->request = pr; + + /* Check that there is no conflicting peers */ + if (nhrp_peer_foreach(find_one, peer, &sel) != 0) { + cie->hdr.code = NHRP_CODE_ADMINISTRATIVELY_PROHIBITED; + peer->flags |= NHRP_PEER_FLAG_REPLACED; + nhrp_server_finish_cie_reg_cb(&peer->child, 0); + } else { + nhrp_peer_run_script(peer, "peer-register", + nhrp_server_finish_cie_reg_cb); + } +} + +static int nhrp_handle_registration_request(struct nhrp_packet *packet) +{ + char tmp[64], tmp2[64]; + struct nhrp_payload *payload; + struct nhrp_cie *cie; + struct nhrp_pending_request *pr; + int natted = 0; + + nhrp_info("Received Registration Request from proto src %s to %s", + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp), tmp), + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp2), tmp2)); + + if (nhrp_server_request_pending(packet)) { + nhrp_info("Already processing: resent packet ignored."); + return TRUE; + } + + if (num_pending_requests >= NHRP_MAX_PENDING_REQUESTS) { + /* We should probably send Registration Reply with CIE + * error NHRP_CODE_INSUFFICIENT_RESOURCES, or an Error + * Indication. However, we do not have a direct peer entry + * nor can we make sure that the lower layer is up, so + * we just lamely drop the packet for now. */ + nhrp_info("Too many pending requests: dropping this one"); + return TRUE; + } + + /* Cisco NAT extension, CIE added IF all of the following is true: + * 1. We are the first hop registration server + * (=no entries in forward transit CIE list) + * 2. NAT is detected (link layer address != announced address) + * 3. NAT extension is requested */ + payload = nhrp_packet_extension(packet, + NHRP_EXTENSION_FORWARD_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_CIE_LIST); + if (payload != NULL && list_empty(&payload->u.cie_list) && + packet->src_linklayer_address.type != PF_UNSPEC && + nhrp_address_cmp(&packet->src_nbma_address, + &packet->src_linklayer_address) != 0) { + natted = 1; + payload = nhrp_packet_extension(packet, + NHRP_EXTENSION_NAT_ADDRESS | + NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_CIE_LIST); + if (payload != NULL) { + cie = nhrp_cie_alloc(); + if (cie != NULL) { + cie->nbma_address = packet->src_linklayer_address; + cie->protocol_address = packet->src_protocol_address; + nhrp_payload_add_cie(payload, cie); + } + } + } + + packet->hdr.type = NHRP_PACKET_REGISTRATION_REPLY; + packet->hdr.hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT; + packet->hdr.flags &= NHRP_FLAG_REGISTRATION_UNIQUE | + NHRP_FLAG_REGISTRATION_NAT; + + payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_CIE_LIST); + if (list_empty(&payload->u.cie_list)) { + nhrp_error("Received registration request has no CIEs"); + return TRUE; + } + + /* Start processing the CIEs */ + pr = nhrp_server_record_request(packet); + pr->natted = natted; + pr->payload = payload; + + pr->cie = nhrp_payload_get_cie(payload, 1); + nhrp_server_start_cie_reg(pr); + + return TRUE; +} + +static int remove_peer_by_nbma(void *ctx, struct nhrp_peer *peer) +{ + struct nhrp_address *nbma = ctx; + struct nhrp_address *peer_nbma = NULL; + + if (!nhrp_address_is_any_addr(nbma)) { + if (peer->type == NHRP_PEER_TYPE_SHORTCUT_ROUTE) { + struct nhrp_peer *nexthop; + + nexthop = nhrp_peer_route(peer->interface, + &peer->next_hop_address, + NHRP_PEER_FIND_EXACT, + NHRP_PEER_TYPEMASK_ADJACENT); + if (nexthop != NULL) + peer_nbma = &nexthop->next_hop_address; + } else { + peer_nbma = &peer->next_hop_address; + } + } else { + peer_nbma = nbma; + } + + if (peer_nbma != NULL && + nhrp_address_cmp(peer_nbma, nbma) == 0) + nhrp_peer_remove(peer); + + return 0; +} + +static int nhrp_handle_purge_request(struct nhrp_packet *packet) +{ + char tmp[64], tmp2[64]; + struct nhrp_peer_selector sel; + struct nhrp_payload *payload; + struct nhrp_cie *cie; + int flags, ret = TRUE; + + nhrp_info("Received Purge Request from proto src %s to %s", + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp), tmp), + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp2), tmp2)); + + flags = packet->hdr.flags; + packet->hdr.type = NHRP_PACKET_PURGE_REPLY; + packet->hdr.hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT; + packet->hdr.flags = 0; + + if (!(flags & NHRP_FLAG_PURGE_NO_REPLY)) { + if (nhrp_packet_reroute(packet, NULL)) + ret = nhrp_packet_send(packet); + else + ret = FALSE; + } + + payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_CIE_LIST); + list_for_each_entry(cie, &payload->u.cie_list, cie_list_entry) { + nhrp_info("Purge proto %s/%d nbma %s", + nhrp_address_format(&cie->protocol_address, + sizeof(tmp), tmp), + cie->hdr.prefix_length, + nhrp_address_format(&cie->nbma_address, + sizeof(tmp2), tmp2)); + + memset(&sel, 0, sizeof(sel)); + sel.flags = NHRP_PEER_FIND_EXACT; + sel.type_mask = NHRP_PEER_TYPEMASK_REMOVABLE; + sel.interface = packet->src_iface; + sel.protocol_address = cie->protocol_address; + sel.prefix_length = cie->hdr.prefix_length; + nhrp_peer_foreach(remove_peer_by_nbma, + &cie->nbma_address, &sel); + nhrp_rate_limit_clear(&cie->protocol_address, + cie->hdr.prefix_length); + } + + return ret; +} + +static int nhrp_handle_traffic_indication(struct nhrp_packet *packet) +{ + char tmp[64], tmp2[64]; + struct nhrp_address dst; + struct nhrp_payload *pl; + + pl = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_RAW); + if (pl == NULL) + return FALSE; + + if (!nhrp_address_parse_packet(packet->hdr.protocol_type, + pl->u.raw->length, pl->u.raw->data, + NULL, &dst)) + return FALSE; + + /* Shortcuts enabled? */ + if (packet->src_iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT) { + nhrp_info("Traffic Indication from proto src %s; " + "about packet to %s", + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp), tmp), + nhrp_address_format(&dst, sizeof(tmp2), tmp2)); + + nhrp_peer_traffic_indication(packet->src_iface, + packet->hdr.afnum, + &dst); + } else { + nhrp_info("Traffic Indication ignored from proto src %s; " + "about packet to %s", + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp), tmp), + nhrp_address_format(&dst, sizeof(tmp2), tmp2)); + } + + return TRUE; +} + +void server_init(void) +{ + nhrp_packet_hook_request(NHRP_PACKET_RESOLUTION_REQUEST, + nhrp_handle_resolution_request); + nhrp_packet_hook_request(NHRP_PACKET_REGISTRATION_REQUEST, + nhrp_handle_registration_request); + nhrp_packet_hook_request(NHRP_PACKET_PURGE_REQUEST, + nhrp_handle_purge_request); + nhrp_packet_hook_request(NHRP_PACKET_TRAFFIC_INDICATION, + nhrp_handle_traffic_indication); +} diff --git a/nhrp/opennhrp.c b/nhrp/opennhrp.c new file mode 100644 index 0000000..8ba870d --- /dev/null +++ b/nhrp/opennhrp.c @@ -0,0 +1,524 @@ +/* opennhrp.c - OpenNHRP main routines + * + * Copyright (C) 2007-2009 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nhrp_common.h" +#include "nhrp_peer.h" +#include "nhrp_interface.h" + +const char *nhrp_version_string = + "OpenNHRP " OPENNHRP_VERSION +#ifdef NHRP_NO_NBMA_GRE + " (no NBMA GRE support)" +#endif + ; + +const char *nhrp_admin_socket = OPENNHRP_ADMIN_SOCKET; +const char *nhrp_pid_file = "/var/run/opennhrp.pid"; +const char *nhrp_config_file = "/etc/opennhrp/opennhrp.conf"; +const char *nhrp_script_file = "/etc/opennhrp/opennhrp-script"; +int nhrp_verbose = 0; +int nhrp_running = FALSE; + +static int pid_file_fd; + +void nhrp_hex_dump(const char *name, const uint8_t *buf, int bytes) +{ + int i, j; + int left; + + fprintf(stderr, "%s:\n", name); + for (i = 0; i < bytes; i++) { + fprintf(stderr, "%02X ", buf[i]); + if (i % 0x10 == 0x0f) { + fprintf(stderr, " "); + for (j = 0; j < 0x10; j++) + fprintf(stderr, "%c", isgraph(buf[i+j-0xf]) ? + buf[i+j-0xf]: '.'); + fprintf(stderr, "\n"); + } + } + + left = i % 0x10; + if (left != 0) { + fprintf(stderr, "%*s ", 3 * (0x10 - left), ""); + + for (j = 0; j < left; j++) + fprintf(stderr, "%c", isgraph(buf[i+j-left]) ? + buf[i+j-left]: '.'); + fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); +} + +static void handle_signal_cb(struct ev_signal *w, int revents) +{ + struct nhrp_peer_selector sel; + + switch (w->signum) { + case SIGUSR1: + nhrp_peer_dump_cache(); + break; + case SIGINT: + case SIGTERM: + ev_unloop(EVUNLOOP_ALL); + break; + case SIGHUP: + memset(&sel, 0, sizeof(sel)); + sel.type_mask = NHRP_PEER_TYPEMASK_REMOVABLE; + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel); + break; + } +} + +static int hook_signal[] = { SIGUSR1, SIGHUP, SIGINT, SIGTERM }; +static ev_signal signal_event[ARRAY_SIZE(hook_signal)]; + +static void signal_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(hook_signal); i++) { + ev_signal_init(&signal_event[i], handle_signal_cb, + hook_signal[i]); + ev_signal_start(&signal_event[i]); + } +} + +static int read_word(FILE *in, int *lineno, size_t len, char *word) +{ + int ch, i, comment = 0; + + ch = fgetc(in); + while (1) { + if (ch == EOF) + return FALSE; + if (ch == '#') + comment = 1; + if (!comment && !isspace(ch)) + break; + if (ch == '\n') { + (*lineno)++; + comment = 0; + } + ch = fgetc(in); + } + + for (i = 0; i < len-1 && !isspace(ch); i++) { + word[i] = ch; + ch = fgetc(in); + if (ch == EOF) + break; + if (ch == '\n') + (*lineno)++; + } + word[i] = 0; + + return TRUE; +} + +static int load_config(const char *config_file) +{ +#define NEED_INTERFACE() if (iface == NULL) { rc = 2; break; } peer = NULL; +#define NEED_PEER() if (peer == NULL || peer->type == NHRP_PEER_TYPE_LOCAL_ADDR) { rc = 3; break; } + + static const char *errors[] = { + "syntax error", + "missing keyword", + "keyword valid only for 'interface' definition", + "keyword valid only for 'map' definition", + "invalid address", + "dynamic-map requires a network address", + "bad multicast destination", + "keyword valid only for 'interace' and 'shortcut-target' definition", + }; + struct nhrp_interface *iface = NULL; + struct nhrp_peer *peer = NULL; + struct nhrp_address paddr; + char word[32], nbma[32], addr[32]; + FILE *in; + int lineno = 1, rc = -1; + + in = fopen(config_file, "r"); + if (in == NULL) { + nhrp_error("Unable to open configuration file '%s'.", + config_file); + return FALSE; + } + + while (read_word(in, &lineno, sizeof(word), word)) { + if (strcmp(word, "interface") == 0) { + if (!read_word(in, &lineno, sizeof(word), word)) { + rc = 1; + break; + } + iface = nhrp_interface_get_by_name(word, TRUE); + if (iface != NULL) + iface->flags |= NHRP_INTERFACE_FLAG_CONFIGURED; + peer = NULL; + } else if (strcmp(word, "shortcut-target") == 0) { + NEED_INTERFACE(); + if (!read_word(in, &lineno, sizeof(addr), addr)) { + rc = 1; + break; + } + peer = nhrp_peer_alloc(iface); + peer->type = NHRP_PEER_TYPE_LOCAL_ADDR; + peer->afnum = AFNUM_RESERVED; + if (!nhrp_address_parse(addr, &peer->protocol_address, + &peer->prefix_length)) { + rc = 4; + break; + } + peer->protocol_type = nhrp_protocol_from_pf(peer->protocol_address.type); + nhrp_peer_insert(peer); + nhrp_peer_put(peer); + } else if (strcmp(word, "dynamic-map") == 0) { + NEED_INTERFACE(); + read_word(in, &lineno, sizeof(addr), addr); + read_word(in, &lineno, sizeof(nbma), nbma); + + peer = nhrp_peer_alloc(iface); + peer->type = NHRP_PEER_TYPE_STATIC_DNS; + if (!nhrp_address_parse(addr, &peer->protocol_address, + &peer->prefix_length)) { + rc = 4; + break; + } + if (!nhrp_address_is_network(&peer->protocol_address, + peer->prefix_length)) { + rc = 5; + break; + } + peer->protocol_type = nhrp_protocol_from_pf( + peer->protocol_address.type); + peer->nbma_hostname = strdup(nbma); + peer->afnum = nhrp_afnum_from_pf( + peer->next_hop_address.type); + nhrp_peer_insert(peer); + nhrp_peer_put(peer); + } else if (strcmp(word, "map") == 0) { + NEED_INTERFACE(); + read_word(in, &lineno, sizeof(addr), addr); + read_word(in, &lineno, sizeof(nbma), nbma); + + peer = nhrp_peer_alloc(iface); + peer->type = NHRP_PEER_TYPE_STATIC; + if (!nhrp_address_parse(addr, &peer->protocol_address, + &peer->prefix_length)) { + rc = 4; + break; + } + peer->protocol_type = nhrp_protocol_from_pf( + peer->protocol_address.type); + if (!nhrp_address_parse(nbma, &peer->next_hop_address, + NULL)) + peer->nbma_hostname = strdup(nbma); + peer->afnum = nhrp_afnum_from_pf(peer->next_hop_address.type); + nhrp_peer_insert(peer); + nhrp_peer_put(peer); + } else if (strcmp(word, "register") == 0) { + NEED_PEER(); + peer->flags |= NHRP_PEER_FLAG_REGISTER; + } else if (strcmp(word, "cisco") == 0) { + NEED_PEER(); + peer->flags |= NHRP_PEER_FLAG_CISCO; + } else if (strcmp(word, "holding-time") == 0) { + read_word(in, &lineno, sizeof(word), word); + if (peer != NULL && + peer->type == NHRP_PEER_TYPE_LOCAL_ADDR) { + peer->holding_time = atoi(word); + } else if (iface != NULL) { + iface->holding_time = atoi(word); + peer = NULL; + } else { + rc = 7; + } + } else if (strcmp(word, "cisco-authentication") == 0) { + struct nhrp_buffer *buf; + struct nhrp_cisco_authentication_extension *auth; + + NEED_INTERFACE(); + read_word(in, &lineno, sizeof(word), word); + + buf = nhrp_buffer_alloc(strlen(word) + sizeof(uint32_t)); + auth = (struct nhrp_cisco_authentication_extension *) buf->data; + auth->type = NHRP_AUTHENTICATION_PLAINTEXT; + memcpy(auth->secret, word, strlen(word)); + + iface->auth_token = buf; + } else if (strcmp(word, "route-table") == 0) { + NEED_INTERFACE(); + read_word(in, &lineno, sizeof(word), word); + iface->route_table = atoi(word); + } else if (strcmp(word, "shortcut") == 0) { + NEED_INTERFACE(); + iface->flags |= NHRP_INTERFACE_FLAG_SHORTCUT; + } else if (strcmp(word, "redirect") == 0) { + NEED_INTERFACE(); + iface->flags |= NHRP_INTERFACE_FLAG_REDIRECT; + } else if (strcmp(word, "non-caching") == 0) { + NEED_INTERFACE(); + iface->flags |= NHRP_INTERFACE_FLAG_NON_CACHING; + } else if (strcmp(word, "shortcut-destination") == 0) { + NEED_INTERFACE(); + iface->flags |= NHRP_INTERFACE_FLAG_SHORTCUT_DEST; + } else if (strcmp(word, "multicast") == 0) { + NEED_INTERFACE(); + read_word(in, &lineno, sizeof(word), word); + if (strcmp(word, "dynamic") == 0) { + iface->mcast_mask = \ + BIT(NHRP_PEER_TYPE_STATIC) | + BIT(NHRP_PEER_TYPE_DYNAMIC_NHS) | + BIT(NHRP_PEER_TYPE_DYNAMIC); + } else if (strcmp(word, "nhs") == 0) { + iface->mcast_mask = \ + BIT(NHRP_PEER_TYPE_STATIC) | + BIT(NHRP_PEER_TYPE_DYNAMIC_NHS); + } else if (nhrp_address_parse(word, &paddr, NULL)) { + iface->mcast_numaddr++; + iface->mcast_addr = realloc(iface->mcast_addr, + iface->mcast_numaddr * + sizeof(struct nhrp_address)); + iface->mcast_addr[iface->mcast_numaddr-1] = + paddr; + } else { + rc = 6; + break; + } + } else { + rc = 0; + break; + } + } + fclose(in); + + if (rc >= 0) { + nhrp_error("Configuration file %s in %s:%d, near word '%s'", + errors[rc], config_file, lineno, word); + return FALSE; + } + return TRUE; +} + +static void remove_pid_file(void) +{ + if (pid_file_fd != 0) { + close(pid_file_fd); + pid_file_fd = 0; + remove(nhrp_pid_file); + } +} + +static int open_pid_file(void) +{ + if (strlen(nhrp_pid_file) == 0) + return TRUE; + + pid_file_fd = open(nhrp_pid_file, O_CREAT | O_WRONLY, + S_IRUSR | S_IWUSR); + if (pid_file_fd < 0) + goto err; + + fcntl(pid_file_fd, F_SETFD, FD_CLOEXEC); + if (flock(pid_file_fd, LOCK_EX | LOCK_NB) < 0) + goto err_close; + + return TRUE; + +err_close: + close(pid_file_fd); +err: + nhrp_error("Unable to open/lock pid file: %s.", strerror(errno)); + return FALSE; +} + +static int write_pid(void) +{ + char tmp[16]; + int n; + + if (pid_file_fd >= 0) { + if (ftruncate(pid_file_fd, 0) < 0) + return FALSE; + + n = sprintf(tmp, "%d\n", getpid()); + if (write(pid_file_fd, tmp, n) != n) + return FALSE; + + atexit(remove_pid_file); + } + + return TRUE; +} + +static int daemonize(void) +{ + pid_t pid; + + pid = fork(); + if (pid < 0) + return FALSE; + if (pid > 0) + exit(0); + + if (setsid() < 0) + return FALSE; + + pid = fork(); + if (pid < 0) + return FALSE; + if (pid > 0) + exit(0); + + if (chdir("/") < 0) + return FALSE; + + umask(0); + + if (freopen("/dev/null", "r", stdin) == NULL || + freopen("/dev/null", "w", stdout) == NULL || + freopen("/dev/null", "w", stderr) == NULL) { + nhrp_error("Unable reopen standard file descriptors"); + goto err; + } + + ev_default_fork(); + + return TRUE; + +err: + close(pid_file_fd); + pid_file_fd = 0; + return FALSE; +} + +int usage(const char *prog) +{ + fprintf(stderr, + "usage: opennhrp [-a admin-socket] [-c config-file] [-s script-file]\n" + " [-p pid-file] [-d] [-v]\n" + " opennhrp -V\n" + "\n" + "\t-a admin-socket\tspecify management interface socket\n" + "\t-c config-file\tread configuration from config-file\n" + "\t-s script-file\tuse specified script-file for event handling\n" + "\t-p pid-file\tspecify pid-file\n" + "\t-d\t\tfork to background after startup\n" + "\t-v\t\tverbose logging\n" + "\t-V\t\tshow version number and exit\n" + "\n"); + return 1; +} + +int main(int argc, char **argv) +{ + struct nhrp_address any; + int i, daemonmode = 0; + + nhrp_address_set_type(&any, AF_UNSPEC); + + for (i = 1; i < argc; i++) { + if (strlen(argv[i]) != 2 || argv[i][0] != '-') + return usage(argv[0]); + + switch (argv[i][1]) { + case 'c': + if (++i >= argc) + return usage(argv[0]); + nhrp_config_file = argv[i]; + break; + case 's': + if (++i >= argc) + return usage(argv[0]); + nhrp_script_file = argv[i]; + break; + case 'a': + if (++i >= argc) + return usage(argv[0]); + nhrp_admin_socket = argv[i]; + break; + case 'p': + if (++i >= argc) + return usage(argv[0]); + nhrp_pid_file = argv[i]; + break; + case 'd': + daemonmode = 1; + break; + case 'v': + nhrp_verbose = 1; + break; + case 'V': + puts(nhrp_version_string); + return 0; + default: + return usage(argv[0]); + } + } + + srandom(time(NULL)); + if (!log_init()) + return 1; + if (!open_pid_file()) + return 1; + + nhrp_info("%s starting", nhrp_version_string); + + ev_default_loop(0); + signal_init(); + server_init(); + if (!nhrp_address_init()) + return 3; + if (!load_config(nhrp_config_file)) + return 4; + if (!kernel_init()) + return 5; + if (!admin_init(nhrp_admin_socket)) + return 6; + if (!forward_init()) + return 7; + + if (daemonmode && !daemonize()) { + nhrp_error("Failed to daemonize. Exit."); + return 8; + } + + write_pid(); + + nhrp_running = TRUE; + ev_loop(0); + nhrp_running = FALSE; + + forward_cleanup(); + kernel_stop_listening(); + nhrp_peer_cleanup(); + kernel_cleanup(); + nhrp_interface_cleanup(); + nhrp_rate_limit_clear(&any, 0); + nhrp_address_cleanup(); + + ev_default_destroy(); + + return 0; +} + diff --git a/nhrp/opennhrpctl.c b/nhrp/opennhrpctl.c new file mode 100644 index 0000000..92fb5b5 --- /dev/null +++ b/nhrp/opennhrpctl.c @@ -0,0 +1,121 @@ +/* opennhrpctl.c - OpenNHRP command line control utility + * + * Copyright (C) 2007 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include +#include +#include +#include +#include +#include + +static int admin_init(const char *opennhrp_socket) +{ + struct sockaddr_un sun; + int fd; + + memset(&sun, 0, sizeof(sun)); + sun.sun_family = AF_UNIX; + strncpy(sun.sun_path, opennhrp_socket, sizeof(sun.sun_path)); + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) + return -1; + + if (connect(fd, (struct sockaddr *) &sun, sizeof(sun)) < 0) { + close(fd); + return -1; + } + + return fd; +} + +static void admin_close(int fd) +{ + close(fd); +} + +static int admin_send(int fd, const char *str) +{ + int len = strlen(str); + + if (write(fd, str, len) != len) + return -1; + shutdown(fd, SHUT_WR); + return 0; +} + +static int admin_receive(int fd) +{ + char msg[512]; + size_t len; + + while ((len = recv(fd, msg, sizeof(msg), 0)) > 0) { + if (write(fileno(stdout), msg, len) != len) + return -1; + } + + if (len < 0) + return -1; + + return 0; +} + +static int usage(const char *prog) +{ + fprintf(stderr, "usage: %s [-a admin-socket] \n", prog); + return 1; +} + +int main(int argc, char **argv) +{ + const char *socket = OPENNHRP_ADMIN_SOCKET; + char cmd[1024] = "", *pos = cmd; + int i, fd; + + for (i = 1; i < argc; i++) { + if (strlen(argv[i]) != 2 || argv[i][0] != '-') { + pos += snprintf(pos, &cmd[sizeof(cmd)-1]-pos, + " %s\n", argv[i]) - 1; + continue; + } + + switch (argv[i][1]) { + case 'a': + if (++i >= argc) + return usage(argv[0]); + socket = argv[i]; + break; + default: + return usage(argv[0]); + } + } + if (cmd == pos) + return usage(argv[0]); + + fd = admin_init(socket); + if (fd < 0) { + fprintf(stderr, + "Failed to connect to opennhrp daemon [%s]: %s.\n\n", + socket, strerror(errno)); + return 1; + } + + if (admin_send(fd, &cmd[1]) < 0 || + admin_receive(fd) < 0) { + fprintf(stderr, "Failed to send request: %s.\n", + strerror(errno)); + return 2; + } + + admin_close(fd); + return 0; +} diff --git a/nhrp/sysdep_netlink.c b/nhrp/sysdep_netlink.c new file mode 100644 index 0000000..d058a98 --- /dev/null +++ b/nhrp/sysdep_netlink.c @@ -0,0 +1,1159 @@ +/* sysdep_netlink.c - Linux netlink glue + * + * Copyright (C) 2007-2009 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libev.h" +#include "nhrp_common.h" +#include "nhrp_interface.h" +#include "nhrp_peer.h" + +#define NETLINK_KERNEL_BUFFER (256 * 1024) +#define NETLINK_RECV_BUFFER (8 * 1024) + +#define NLMSG_TAIL(nmsg) \ + ((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) + +#define NDA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) +#define NDA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndmsg)) + +typedef void (*netlink_dispatch_f)(struct nlmsghdr *msg); + +struct netlink_fd { + int fd; + __u32 seq; + struct ev_io io; + + int dispatch_size; + const netlink_dispatch_f *dispatch; +}; + +static const int netlink_groups[] = { + 0, + RTMGRP_NEIGH, + RTMGRP_LINK, + RTMGRP_IPV4_IFADDR, + RTMGRP_IPV4_ROUTE, +}; +static struct netlink_fd netlink_fds[ARRAY_SIZE(netlink_groups)]; +#define talk_fd netlink_fds[0] + +static struct ev_io packet_io; + +static u_int16_t translate_mtu(u_int16_t mtu) +{ + /* if mtu is ethernet standard, do not advertise it + * pmtu should be working */ + if (mtu == 1500) + return 0; + return mtu; +} + +static void netlink_parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len) +{ + memset(tb, 0, sizeof(struct rtattr *) * (max + 1)); + while (RTA_OK(rta, len)) { + if (rta->rta_type <= max) + tb[rta->rta_type] = rta; + rta = RTA_NEXT(rta,len); + } +} + +static int netlink_add_rtattr_l(struct nlmsghdr *n, int maxlen, int type, + const void *data, int alen) +{ + int len = RTA_LENGTH(alen); + struct rtattr *rta; + + if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) + return FALSE; + + rta = NLMSG_TAIL(n); + rta->rta_type = type; + rta->rta_len = len; + memcpy(RTA_DATA(rta), data, alen); +#ifdef VALGRIND + /* Clear the padding area to avoid spurious warnings */ + memset(RTA_DATA(rta) + alen, 0, RTA_ALIGN(len) - alen); +#endif + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len); + return TRUE; +} + +static int netlink_receive(struct netlink_fd *fd, struct nlmsghdr *reply) +{ + struct sockaddr_nl nladdr; + struct iovec iov; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + int got_reply = FALSE, len; + char buf[NETLINK_RECV_BUFFER]; + + iov.iov_base = buf; + while (!got_reply) { + int status; + struct nlmsghdr *h; + + iov.iov_len = sizeof(buf); + status = recvmsg(fd->fd, &msg, MSG_DONTWAIT); + if (status < 0) { + if (errno == EINTR) + continue; + if (errno == EAGAIN) + return reply == NULL; + nhrp_perror("Netlink overrun"); + continue; + } + + if (status == 0) { + nhrp_error("Netlink returned EOF"); + return FALSE; + } + + h = (struct nlmsghdr *) buf; + while (NLMSG_OK(h, status)) { + if (reply != NULL && + h->nlmsg_seq == reply->nlmsg_seq) { + len = h->nlmsg_len; + if (len > reply->nlmsg_len) { + nhrp_error("Netlink message truncated"); + len = reply->nlmsg_len; + } + memcpy(reply, h, len); + got_reply = TRUE; + } else if (h->nlmsg_type <= fd->dispatch_size && + fd->dispatch[h->nlmsg_type] != NULL) { + fd->dispatch[h->nlmsg_type](h); + } else if (h->nlmsg_type != NLMSG_DONE) { + nhrp_info("Unknown NLmsg: 0x%08x, len %d", + h->nlmsg_type, h->nlmsg_len); + } + h = NLMSG_NEXT(h, status); + } + } + + return TRUE; +} + +static int netlink_send(struct netlink_fd *fd, struct nlmsghdr *req) +{ + struct sockaddr_nl nladdr; + struct iovec iov = { + .iov_base = (void*) req, + .iov_len = req->nlmsg_len + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + int status; + + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + + req->nlmsg_seq = ++fd->seq; + + status = sendmsg(fd->fd, &msg, 0); + if (status < 0) { + nhrp_perror("Cannot talk to rtnetlink"); + return FALSE; + } + return TRUE; +} + +static int netlink_talk(struct netlink_fd *fd, struct nlmsghdr *req, + size_t replysize, struct nlmsghdr *reply) +{ + if (reply == NULL) + req->nlmsg_flags |= NLM_F_ACK; + + if (!netlink_send(fd, req)) + return FALSE; + + if (reply == NULL) + return TRUE; + + reply->nlmsg_len = replysize; + return netlink_receive(fd, reply); +} + +static int netlink_enumerate(struct netlink_fd *fd, int family, int type) +{ + struct { + struct nlmsghdr nlh; + struct rtgenmsg g; + } req; + struct sockaddr_nl addr; + + memset(&addr, 0, sizeof(addr)); + addr.nl_family = AF_NETLINK; + + memset(&req, 0, sizeof(req)); + req.nlh.nlmsg_len = sizeof(req); + req.nlh.nlmsg_type = type; + req.nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.nlh.nlmsg_pid = 0; + req.nlh.nlmsg_seq = ++fd->seq; + req.g.rtgen_family = family; + + return sendto(fd->fd, (void *) &req, sizeof(req), 0, + (struct sockaddr *) &addr, sizeof(addr)) >= 0; +} + +static void netlink_read_cb(struct ev_io *w, int revents) +{ + struct netlink_fd *nfd = container_of(w, struct netlink_fd, io); + + if (revents & EV_READ) + netlink_receive(nfd, NULL); +} + +static int do_get_ioctl(const char *basedev, struct ip_tunnel_parm *p) +{ + struct ifreq ifr; + +#ifdef VALGRIND + /* Valgrind does not have SIOCGETTUNNEL description, so clear + * the memory structs to avoid spurious warnings */ + memset(&ifr, 0, sizeof(ifr)); + memset(p, 0, sizeof(*p)); +#endif + + strncpy(ifr.ifr_name, basedev, IFNAMSIZ); + ifr.ifr_ifru.ifru_data = (void *) p; + if (ioctl(packet_io.fd, SIOCGETTUNNEL, &ifr)) { + nhrp_perror("ioctl(SIOCGETTUNNEL)"); + return FALSE; + } + return TRUE; +} + +#ifndef NHRP_NO_NBMA_GRE + +static int netlink_add_nested_rtattr_u32(struct rtattr *rta, int maxlen, + int type, uint32_t value) +{ + int len = RTA_LENGTH(4); + struct rtattr *subrta; + + if (RTA_ALIGN(rta->rta_len) + len > maxlen) + return FALSE; + + subrta = (struct rtattr*)(((char*)rta) + RTA_ALIGN(rta->rta_len)); + subrta->rta_type = type; + subrta->rta_len = len; + memcpy(RTA_DATA(subrta), &value, 4); + rta->rta_len = NLMSG_ALIGN(rta->rta_len) + len; + return TRUE; +} + +static int netlink_configure_arp(struct nhrp_interface *iface, int pf) +{ + struct { + struct nlmsghdr n; + struct ndtmsg ndtm; + char buf[256]; + } req; + struct { + struct rtattr rta; + char buf[256]; + } parms; + + memset(&req.n, 0, sizeof(req.n)); + memset(&req.ndtm, 0, sizeof(req.ndtm)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndtmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_REPLACE; + req.n.nlmsg_type = RTM_SETNEIGHTBL; + + req.ndtm.ndtm_family = pf; + + netlink_add_rtattr_l(&req.n, sizeof(req), NDTA_NAME, + "arp_cache", 10); + + parms.rta.rta_type = NDTA_PARMS; + parms.rta.rta_len = RTA_LENGTH(0); + netlink_add_nested_rtattr_u32(&parms.rta, sizeof(parms), + NDTPA_IFINDEX, iface->index); + netlink_add_nested_rtattr_u32(&parms.rta, sizeof(parms), + NDTPA_APP_PROBES, 1); + netlink_add_nested_rtattr_u32(&parms.rta, sizeof(parms), + NDTPA_MCAST_PROBES, 0); + netlink_add_nested_rtattr_u32(&parms.rta, sizeof(parms), + NDTPA_UCAST_PROBES, 0); + + netlink_add_rtattr_l(&req.n, sizeof(req), NDTA_PARMS, + parms.buf, parms.rta.rta_len - RTA_LENGTH(0)); + + return netlink_send(&talk_fd, &req.n); +} + +static int netlink_link_arp_on(struct nhrp_interface *iface) +{ + struct ifreq ifr; + + strncpy(ifr.ifr_name, iface->name, IFNAMSIZ); + if (ioctl(packet_io.fd, SIOCGIFFLAGS, &ifr)) { + nhrp_perror("ioctl(SIOCGIFFLAGS)"); + return FALSE; + } + if (ifr.ifr_flags & IFF_NOARP) { + ifr.ifr_flags &= ~IFF_NOARP; + if (ioctl(packet_io.fd, SIOCSIFFLAGS, &ifr)) { + nhrp_perror("ioctl(SIOCSIFFLAGS)"); + return FALSE; + } + } + return TRUE; +} + +#else + +static int netlink_configure_arp(struct nhrp_interface *iface, int pf) +{ + return TRUE; +} + +static int netlink_link_arp_on(struct nhrp_interface *iface) +{ + return TRUE; +} + +#endif + +static int proc_icmp_redirect_off(const char *interface) +{ + char fname[256]; + int fd, ret = FALSE; + + sprintf(fname, "/proc/sys/net/ipv4/conf/%s/send_redirects", interface); + fd = open(fname, O_WRONLY); + if (fd < 0) + return FALSE; + if (write(fd, "0\n", 2) == 2) + ret = TRUE; + close(fd); + + return ret; +} + +static void netlink_neigh_request(struct nlmsghdr *msg) +{ + struct ndmsg *ndm = NLMSG_DATA(msg); + struct rtattr *rta[NDA_MAX+1]; + struct nhrp_peer *peer; + struct nhrp_address addr; + struct nhrp_interface *iface; + char tmp[64]; + + netlink_parse_rtattr(rta, NDA_MAX, NDA_RTA(ndm), NDA_PAYLOAD(msg)); + if (rta[NDA_DST] == NULL) + return; + + iface = nhrp_interface_get_by_index(ndm->ndm_ifindex, 0); + if (iface == NULL) + return; + + nhrp_address_set(&addr, ndm->ndm_family, + RTA_PAYLOAD(rta[NDA_DST]), + RTA_DATA(rta[NDA_DST])); + + nhrp_debug("NL-ARP(%s) who-has %s", + iface->name, nhrp_address_format(&addr, sizeof(tmp), tmp)); + + peer = nhrp_peer_route(iface, &addr, 0, ~BIT(NHRP_PEER_TYPE_LOCAL_ROUTE)); + if (peer == NULL) + return; + + if (peer->flags & NHRP_PEER_FLAG_UP) + kernel_inject_neighbor(&addr, &peer->next_hop_address, iface); + + if (peer->next_hop_address.type != PF_UNSPEC && + nhrp_address_cmp(&addr, &peer->protocol_address) != 0) + nhrp_peer_traffic_indication(iface, peer->afnum, &addr); +} + +static void netlink_neigh_update(struct nlmsghdr *msg) +{ + struct ndmsg *ndm = NLMSG_DATA(msg); + struct rtattr *rta[NDA_MAX+1]; + struct nhrp_interface *iface; + struct nhrp_peer_selector sel; + int used = FALSE; + + netlink_parse_rtattr(rta, NDA_MAX, NDA_RTA(ndm), NDA_PAYLOAD(msg)); + if (rta[NDA_DST] == NULL) + return; + + if (!(ndm->ndm_state & (NUD_STALE | NUD_FAILED | NUD_REACHABLE))) + return; + + iface = nhrp_interface_get_by_index(ndm->ndm_ifindex, 0); + if (iface == NULL) + return; + + memset(&sel, 0, sizeof(sel)); + sel.flags = NHRP_PEER_FIND_EXACT; + sel.interface = iface; + nhrp_address_set(&sel.protocol_address, ndm->ndm_family, + RTA_PAYLOAD(rta[NDA_DST]), + RTA_DATA(rta[NDA_DST])); + + if (msg->nlmsg_type == RTM_NEWNEIGH && (ndm->ndm_state & NUD_REACHABLE)) + used = TRUE; + + nhrp_peer_foreach(nhrp_peer_set_used_matching, + (void*) (intptr_t) used, &sel); +} + +static void netlink_link_new(struct nlmsghdr *msg) +{ + struct nhrp_interface *iface; + struct ifinfomsg *ifi = NLMSG_DATA(msg); + struct rtattr *rta[IFLA_MAX+1]; + const char *ifname; + struct ip_tunnel_parm cfg; + int configuration_changed = FALSE; + + netlink_parse_rtattr(rta, IFLA_MAX, IFLA_RTA(ifi), IFLA_PAYLOAD(msg)); + if (rta[IFLA_IFNAME] == NULL) + return; + + ifname = RTA_DATA(rta[IFLA_IFNAME]); + iface = nhrp_interface_get_by_name(ifname, TRUE); + if (iface == NULL) + return; + + if (rta[IFLA_MTU]) + iface->mtu = *((unsigned*)RTA_DATA(rta[IFLA_MTU])); + + if (iface->index == 0 || (ifi->ifi_flags & ifi->ifi_change & IFF_UP)) { + nhrp_info("Interface %s: new or configured up, mtu=%d", + ifname, iface->mtu); + nhrp_interface_run_script(iface, "interface-up"); + } else { + nhrp_info("Interface %s: config change, mtu=%d", + ifname, iface->mtu); + } + + iface->index = ifi->ifi_index; + nhrp_interface_hash(iface); + + if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED)) + return; + + switch (ifi->ifi_type) { + case ARPHRD_IPGRE: + iface->afnum = AFNUM_INET; + /* try hard to get the interface nbma address */ + do_get_ioctl(ifname, &cfg); + if (iface->gre_key != ntohl(cfg.i_key)) { + configuration_changed = TRUE; + iface->gre_key = ntohl(cfg.i_key); + } + if (cfg.iph.saddr) { + struct nhrp_address saddr; + nhrp_address_set(&saddr, PF_INET, 4, (uint8_t *) &cfg.iph.saddr); + if (nhrp_address_cmp(&iface->nbma_address, &saddr) || iface->link_index) { + configuration_changed = TRUE; + iface->nbma_address = saddr; + iface->link_index = 0; + } + } else if (cfg.link) { + if (cfg.link != iface->link_index) { + configuration_changed = TRUE; + nhrp_address_set_type(&iface->nbma_address, PF_UNSPEC); + iface->link_index = cfg.link; + } + } else { + if (iface->link_index || iface->nbma_address.type != PF_UNSPEC) { + configuration_changed = TRUE; + /* Mark the interface as owning all NBMA addresses + * this works when there's only one GRE interface */ + iface->link_index = 0; + nhrp_address_set_type(&iface->nbma_address, PF_UNSPEC); + nhrp_info("WARNING: Cannot figure out NBMA address for " + "interface '%s'. Using route hints.", ifname); + } + } + break; + } + + if (!(iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST)) { + netlink_configure_arp(iface, PF_INET); + netlink_link_arp_on(iface); + proc_icmp_redirect_off(iface->name); + } + + if (configuration_changed) { + struct nhrp_peer_selector sel; + int count = 0; + + /* Reset the interface values we detect later */ + memset(&iface->nat_cie, 0, sizeof(iface->nat_cie)); + iface->nbma_mtu = 0; + if (iface->link_index) { + /* Reenumerate addresses if needed */ + netlink_enumerate(&talk_fd, PF_UNSPEC, RTM_GETADDR); + netlink_read_cb(&talk_fd.io, EV_READ); + } + + /* Purge all NHRP entries for this interface */ + memset(&sel, 0, sizeof(sel)); + sel.type_mask = NHRP_PEER_TYPEMASK_PURGEABLE; + sel.interface = iface; + nhrp_peer_foreach(nhrp_peer_purge_matching, &count, &sel); + nhrp_info("Interface %s: GRE configuration changed. Purged %d peers.", + ifname, count); + } +} + +static void netlink_link_del(struct nlmsghdr *msg) +{ + struct nhrp_interface *iface; + struct ifinfomsg *ifi = NLMSG_DATA(msg); + struct rtattr *rta[IFLA_MAX+1]; + const char *ifname; + + netlink_parse_rtattr(rta, IFLA_MAX, IFLA_RTA(ifi), IFLA_PAYLOAD(msg)); + if (rta[IFLA_IFNAME] == NULL) + return; + + ifname = RTA_DATA(rta[IFLA_IFNAME]); + iface = nhrp_interface_get_by_name(ifname, FALSE); + if (iface == NULL) + return; + + nhrp_info("Interface '%s' deleted", ifname); + iface->index = 0; + iface->link_index = 0; + nhrp_interface_hash(iface); + + nhrp_address_set_type(&iface->nbma_address, PF_UNSPEC); + nhrp_address_set_type(&iface->protocol_address, PF_UNSPEC); +} + +static int netlink_addr_new_nbma(void *ctx, struct nhrp_interface *iface) +{ + struct nlmsghdr *msg = (struct nlmsghdr *) ctx; + struct ifaddrmsg *ifa = NLMSG_DATA(msg); + struct rtattr *rta[IFA_MAX+1]; + struct nhrp_interface *nbma_iface; + + if (iface->link_index == ifa->ifa_index) { + netlink_parse_rtattr(rta, IFA_MAX, IFA_RTA(ifa), + IFA_PAYLOAD(msg)); + + if (rta[IFA_LOCAL] == NULL) + return 0; + + nhrp_address_set(&iface->nbma_address, ifa->ifa_family, + RTA_PAYLOAD(rta[IFA_LOCAL]), + RTA_DATA(rta[IFA_LOCAL])); + + nbma_iface = nhrp_interface_get_by_index(ifa->ifa_index, FALSE); + if (nbma_iface != NULL) { + iface->nbma_mtu = translate_mtu(nbma_iface->mtu); + } + } + + return 0; +} + +static void netlink_addr_new(struct nlmsghdr *msg) +{ + struct nhrp_interface *iface; + struct nhrp_peer *peer, *bcast; + struct ifaddrmsg *ifa = NLMSG_DATA(msg); + struct rtattr *rta[IFA_MAX+1]; + + if (!(ifa->ifa_flags & IFA_F_SECONDARY)) + nhrp_interface_foreach(netlink_addr_new_nbma, msg); + + netlink_parse_rtattr(rta, IFA_MAX, IFA_RTA(ifa), IFA_PAYLOAD(msg)); + iface = nhrp_interface_get_by_index(ifa->ifa_index, FALSE); + if (iface == NULL || rta[IFA_LOCAL] == NULL) + return; + + /* Shortcut destination stuff is extracted from routes; + * not from local address information. */ + if (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST) + return; + if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED)) + return; + + nhrp_address_set(&iface->protocol_address, ifa->ifa_family, + RTA_PAYLOAD(rta[IFA_LOCAL]), + RTA_DATA(rta[IFA_LOCAL])); + iface->protocol_address_prefix = ifa->ifa_prefixlen; + + peer = nhrp_peer_alloc(iface); + peer->type = NHRP_PEER_TYPE_LOCAL_ADDR; + peer->afnum = AFNUM_RESERVED; + nhrp_address_set(&peer->protocol_address, ifa->ifa_family, + RTA_PAYLOAD(rta[IFA_LOCAL]), + RTA_DATA(rta[IFA_LOCAL])); + switch (ifa->ifa_family) { + case PF_INET: + peer->protocol_type = ETHPROTO_IP; + peer->prefix_length = peer->protocol_address.addr_len * 8; + nhrp_peer_insert(peer); + break; + default: + nhrp_peer_put(peer); + return; + } + + bcast = nhrp_peer_alloc(iface); + bcast->type = peer->type; + bcast->afnum = peer->afnum; + bcast->protocol_type = peer->protocol_type; + bcast->prefix_length = peer->prefix_length; + bcast->protocol_address = peer->protocol_address; + nhrp_address_set_broadcast(&bcast->protocol_address, + ifa->ifa_prefixlen); + bcast->next_hop_address = peer->protocol_address; + nhrp_peer_insert(bcast); + nhrp_peer_put(bcast); + + nhrp_peer_put(peer); +} + +struct netlink_del_addr_msg { + int interface_index; + struct nhrp_address address; +}; + +static int netlink_addr_del_nbma(void *ctx, struct nhrp_interface *iface) +{ + struct netlink_del_addr_msg *msg = (struct netlink_del_addr_msg *) ctx; + + if (iface->link_index == msg->interface_index && + nhrp_address_cmp(&msg->address, &iface->nbma_address) == 0) + nhrp_address_set_type(&iface->nbma_address, PF_UNSPEC); + + return 0; +} + +static int netlink_addr_purge_nbma(void *ctx, struct nhrp_peer *peer) +{ + struct netlink_del_addr_msg *msg = (struct netlink_del_addr_msg *) ctx; + + if (nhrp_address_cmp(&peer->my_nbma_address, &msg->address) == 0) + nhrp_peer_purge(peer, "address-removed"); + + return 0; +} + +static void netlink_addr_del(struct nlmsghdr *nlmsg) +{ + struct netlink_del_addr_msg msg; + struct nhrp_interface *iface; + struct ifaddrmsg *ifa = NLMSG_DATA(nlmsg); + struct rtattr *rta[IFA_MAX+1]; + struct nhrp_peer_selector sel; + + netlink_parse_rtattr(rta, IFA_MAX, IFA_RTA(ifa), IFA_PAYLOAD(nlmsg)); + if (rta[IFA_LOCAL] == NULL) + return; + + msg.interface_index = ifa->ifa_index; + nhrp_address_set(&msg.address, ifa->ifa_family, + RTA_PAYLOAD(rta[IFA_LOCAL]), + RTA_DATA(rta[IFA_LOCAL])); + + if (!(ifa->ifa_flags & IFA_F_SECONDARY)) + nhrp_interface_foreach(netlink_addr_del_nbma, &msg); + nhrp_peer_foreach(netlink_addr_purge_nbma, &msg, NULL); + + iface = nhrp_interface_get_by_index(ifa->ifa_index, FALSE); + if (iface == NULL) + return; + + memset(&sel, 0, sizeof(sel)); + sel.flags = NHRP_PEER_FIND_EXACT; + sel.type_mask = BIT(NHRP_PEER_TYPE_LOCAL_ADDR); + sel.interface = iface; + sel.protocol_address = msg.address; + sel.prefix_length = sel.protocol_address.addr_len * 8; + + if (nhrp_address_cmp(&sel.protocol_address, &iface->protocol_address) == 0) + nhrp_address_set_type(&iface->protocol_address, PF_UNSPEC); + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel); + + nhrp_address_set_broadcast(&sel.protocol_address, ifa->ifa_prefixlen); + sel.next_hop_address = msg.address; + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel); +} + +static void netlink_route_new(struct nlmsghdr *msg) +{ + struct nhrp_interface *iface; + struct nhrp_peer *peer; + struct rtmsg *rtm = NLMSG_DATA(msg); + struct rtattr *rta[RTA_MAX+1]; + int type = 0; + + netlink_parse_rtattr(rta, RTA_MAX, RTM_RTA(rtm), RTM_PAYLOAD(msg)); + if (rta[RTA_OIF] == NULL || rta[RTA_DST] == NULL) + return; + + if (rtm->rtm_family != PF_INET) + return; + + iface = nhrp_interface_get_by_index(*(int*)RTA_DATA(rta[RTA_OIF]), + FALSE); + if (iface == NULL) + return; + + if (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST) { + /* Local shortcut target routes */ + if (rtm->rtm_table != RT_TABLE_MAIN) + return; + type = NHRP_PEER_TYPE_LOCAL_ADDR; + } else if (iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED) { + /* Routes which might get additional outbound + * shortcuts */ + if (rtm->rtm_table != iface->route_table || + rtm->rtm_protocol == RTPROT_KERNEL) + return; + type = NHRP_PEER_TYPE_LOCAL_ROUTE; + } + if (type == 0) + return; + + peer = nhrp_peer_alloc(iface); + peer->type = type; + peer->afnum = AFNUM_RESERVED; + nhrp_address_set(&peer->protocol_address, rtm->rtm_family, + RTA_PAYLOAD(rta[RTA_DST]), + RTA_DATA(rta[RTA_DST])); + if (rta[RTA_GATEWAY] != NULL) { + nhrp_address_set(&peer->next_hop_address, + rtm->rtm_family, + RTA_PAYLOAD(rta[RTA_GATEWAY]), + RTA_DATA(rta[RTA_GATEWAY])); + } + peer->protocol_type = nhrp_protocol_from_pf(rtm->rtm_family); + peer->prefix_length = rtm->rtm_dst_len; + nhrp_peer_insert(peer); + nhrp_peer_put(peer); +} + +static void netlink_route_del(struct nlmsghdr *msg) +{ + struct nhrp_interface *iface; + struct rtmsg *rtm = NLMSG_DATA(msg); + struct rtattr *rta[RTA_MAX+1]; + struct nhrp_peer_selector sel; + int type = 0; + + netlink_parse_rtattr(rta, RTA_MAX, RTM_RTA(rtm), RTM_PAYLOAD(msg)); + if (rta[RTA_OIF] == NULL || rta[RTA_DST] == NULL) + return; + + if (rtm->rtm_family != PF_INET) + return; + + iface = nhrp_interface_get_by_index(*(int*)RTA_DATA(rta[RTA_OIF]), + FALSE); + if (iface == NULL) + return; + + if (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST) { + /* Local shortcut target routes */ + if (rtm->rtm_table != RT_TABLE_MAIN) + return; + type = NHRP_PEER_TYPE_LOCAL_ADDR; + } else if (iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED) { + /* Routes which might get additional outbound + * shortcuts */ + if (rtm->rtm_table != iface->route_table || + rtm->rtm_protocol == RTPROT_KERNEL) + return; + type = NHRP_PEER_TYPE_LOCAL_ROUTE; + } + if (type == 0) + return; + + memset(&sel, 0, sizeof(sel)); + sel.flags = NHRP_PEER_FIND_EXACT; + sel.type_mask = BIT(type); + sel.interface = iface; + nhrp_address_set(&sel.protocol_address, rtm->rtm_family, + RTA_PAYLOAD(rta[RTA_DST]), + RTA_DATA(rta[RTA_DST])); + if (rta[RTA_GATEWAY] != NULL) { + nhrp_address_set(&sel.next_hop_address, + rtm->rtm_family, + RTA_PAYLOAD(rta[RTA_GATEWAY]), + RTA_DATA(rta[RTA_GATEWAY])); + } + sel.prefix_length = rtm->rtm_dst_len; + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel); +} + +static const netlink_dispatch_f route_dispatch[RTM_MAX] = { + [RTM_GETNEIGH] = netlink_neigh_request, + [RTM_NEWNEIGH] = netlink_neigh_update, + [RTM_DELNEIGH] = netlink_neigh_update, + [RTM_NEWLINK] = netlink_link_new, + [RTM_DELLINK] = netlink_link_del, + [RTM_NEWADDR] = netlink_addr_new, + [RTM_DELADDR] = netlink_addr_del, + [RTM_NEWROUTE] = netlink_route_new, + [RTM_DELROUTE] = netlink_route_del, +}; + +static void netlink_stop_listening(struct netlink_fd *fd) +{ + ev_io_stop(&fd->io); +} + +static void netlink_close(struct netlink_fd *fd) +{ + if (fd->fd >= 0) { + netlink_stop_listening(fd); + close(fd->fd); + fd->fd = 0; + } +} + +static int netlink_open(struct netlink_fd *fd, int protocol, int groups) +{ + struct sockaddr_nl addr; + int buf = NETLINK_KERNEL_BUFFER; + + fd->fd = socket(AF_NETLINK, SOCK_RAW, protocol); + fd->seq = time(NULL); + if (fd->fd < 0) { + nhrp_perror("Cannot open netlink socket"); + return FALSE; + } + + fcntl(fd->fd, F_SETFD, FD_CLOEXEC); + if (setsockopt(fd->fd, SOL_SOCKET, SO_SNDBUF, &buf, sizeof(buf)) < 0) { + nhrp_perror("SO_SNDBUF"); + goto error; + } + + if (setsockopt(fd->fd, SOL_SOCKET, SO_RCVBUF, &buf, sizeof(buf)) < 0) { + nhrp_perror("SO_RCVBUF"); + goto error; + } + + memset(&addr, 0, sizeof(addr)); + addr.nl_family = AF_NETLINK; + addr.nl_groups = groups; + if (bind(fd->fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) { + nhrp_perror("Cannot bind netlink socket"); + goto error; + } + + ev_io_init(&fd->io, netlink_read_cb, fd->fd, EV_READ); + ev_io_start(&fd->io); + + return TRUE; + +error: + netlink_close(fd); + return FALSE; +} + +static void pfpacket_read_cb(struct ev_io *w, int revents) +{ + struct sockaddr_ll lladdr; + struct nhrp_interface *iface; + struct iovec iov; + struct msghdr msg = { + .msg_name = &lladdr, + .msg_namelen = sizeof(lladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + uint8_t buf[1500]; + struct nhrp_address from; + int fd = w->fd; + int i; + + iov.iov_base = buf; + for (i = 0; i < 2; i++) { + int status; + + iov.iov_len = sizeof(buf); + status = recvmsg(fd, &msg, MSG_DONTWAIT); + if (status < 0) { + if (errno == EINTR) + continue; + if (errno == EAGAIN) + return; + nhrp_perror("PF_PACKET overrun"); + continue; + } + + if (status == 0) { + nhrp_error("PF_PACKET returned EOF"); + return; + } + + iface = nhrp_interface_get_by_index(lladdr.sll_ifindex, FALSE); + if (iface == NULL) + continue; + + nhrp_address_set(&from, PF_INET, lladdr.sll_halen, lladdr.sll_addr); + if (memcmp(lladdr.sll_addr, "\x00\x00\x00\x00", 4) == 0) + nhrp_address_set_type(&from, PF_UNSPEC); + nhrp_packet_receive(buf, status, iface, &from); + } +} + +int kernel_init(void) +{ + int fd, i; + + proc_icmp_redirect_off("all"); + + fd = socket(PF_PACKET, SOCK_DGRAM, ETHPROTO_NHRP); + if (fd < 0) { + nhrp_error("Unable to create PF_PACKET socket"); + return FALSE; + } + + fcntl(fd, F_SETFD, FD_CLOEXEC); + ev_io_init(&packet_io, pfpacket_read_cb, fd, EV_READ); + ev_io_start(&packet_io); + + for (i = 0; i < ARRAY_SIZE(netlink_groups); i++) { + netlink_fds[i].dispatch_size = sizeof(route_dispatch) / sizeof(route_dispatch[0]); + netlink_fds[i].dispatch = route_dispatch; + if (!netlink_open(&netlink_fds[i], NETLINK_ROUTE, + netlink_groups[i])) + goto err_close_all; + } + + netlink_enumerate(&talk_fd, PF_UNSPEC, RTM_GETLINK); + netlink_read_cb(&talk_fd.io, EV_READ); + + netlink_enumerate(&talk_fd, PF_UNSPEC, RTM_GETADDR); + netlink_read_cb(&talk_fd.io, EV_READ); + + netlink_enumerate(&talk_fd, PF_UNSPEC, RTM_GETROUTE); + netlink_read_cb(&talk_fd.io, EV_READ); + + return TRUE; + +err_close_all: + kernel_cleanup(); + return FALSE; +} + +void kernel_stop_listening(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(netlink_groups); i++) + netlink_stop_listening(&netlink_fds[i]); + ev_io_stop(&packet_io); +} + +void kernel_cleanup(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(netlink_groups); i++) + netlink_close(&netlink_fds[i]); + ev_io_stop(&packet_io); + close(packet_io.fd); +} + +int kernel_route(struct nhrp_interface *out_iface, + struct nhrp_address *dest, + struct nhrp_address *default_source, + struct nhrp_address *next_hop, + u_int16_t *mtu) +{ + struct { + struct nlmsghdr n; + struct rtmsg r; + char buf[1024]; + } req; + struct rtmsg *r = NLMSG_DATA(&req.n); + struct rtattr *rta[RTA_MAX+1]; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = RTM_GETROUTE; + req.r.rtm_family = dest->type; + + netlink_add_rtattr_l(&req.n, sizeof(req), RTA_DST, + dest->addr, dest->addr_len); + req.r.rtm_dst_len = dest->addr_len * 8; + + if (default_source != NULL && default_source->type != PF_UNSPEC) + netlink_add_rtattr_l(&req.n, sizeof(req), RTA_SRC, + default_source->addr, + default_source->addr_len); + if (out_iface != NULL) + netlink_add_rtattr_l(&req.n, sizeof(req), RTA_OIF, + &out_iface->index, sizeof(int)); + + if (!netlink_talk(&talk_fd, &req.n, sizeof(req), &req.n)) + return FALSE; + + netlink_parse_rtattr(rta, RTA_MAX, RTM_RTA(r), RTM_PAYLOAD(&req.n)); + + if (default_source != NULL && default_source->type == PF_UNSPEC && + rta[RTA_PREFSRC] != NULL) { + nhrp_address_set(default_source, dest->type, + RTA_PAYLOAD(rta[RTA_PREFSRC]), + RTA_DATA(rta[RTA_PREFSRC])); + } + + if (next_hop != NULL) { + if (rta[RTA_GATEWAY] != NULL) { + nhrp_address_set(next_hop, dest->type, + RTA_PAYLOAD(rta[RTA_GATEWAY]), + RTA_DATA(rta[RTA_GATEWAY])); + } else { + *next_hop = *dest; + } + } + + if (mtu != NULL) { + *mtu = 0; + + if (rta[RTA_OIF] != NULL) { + struct nhrp_interface *nbma_iface; + + /* We use interface MTU here instead of the route + * cache MTU from RTA_METRICS/RTAX_MTU since we + * don't want to announce mtu if PMTU works */ + nbma_iface = nhrp_interface_get_by_index( + *(int*)RTA_DATA(rta[RTA_OIF]), + FALSE); + if (nbma_iface != NULL) + *mtu = translate_mtu(nbma_iface->mtu); + } + } + + return TRUE; +} + +int kernel_send(uint8_t *packet, size_t bytes, struct nhrp_interface *out, + struct nhrp_address *to) +{ + struct sockaddr_ll lladdr; + struct iovec iov = { + .iov_base = (void*) packet, + .iov_len = bytes + }; + struct msghdr msg = { + .msg_name = &lladdr, + .msg_namelen = sizeof(lladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + int status; + + if (to->addr_len > sizeof(lladdr.sll_addr)) { + nhrp_error("Destination NBMA address too long"); + return FALSE; + } + + memset(&lladdr, 0, sizeof(lladdr)); + lladdr.sll_family = AF_PACKET; + lladdr.sll_protocol = ETHPROTO_NHRP; + lladdr.sll_ifindex = out->index; + lladdr.sll_halen = to->addr_len; + memcpy(lladdr.sll_addr, to->addr, to->addr_len); + + status = sendmsg(packet_io.fd, &msg, 0); + if (status < 0) { + nhrp_error("Cannot send packet to %s(%d): %s", + out->name, out->index, strerror(errno)); + return FALSE; + } + + return TRUE; +} + +int kernel_inject_neighbor(struct nhrp_address *neighbor, + struct nhrp_address *hwaddr, + struct nhrp_interface *dev) +{ + struct { + struct nlmsghdr n; + struct ndmsg ndm; + char buf[256]; + } req; + char neigh[64], nbma[64]; + + memset(&req.n, 0, sizeof(req.n)); + memset(&req.ndm, 0, sizeof(req.ndm)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_REPLACE | NLM_F_CREATE; + req.n.nlmsg_type = RTM_NEWNEIGH; + req.ndm.ndm_family = neighbor->type; + req.ndm.ndm_ifindex = dev->index; + req.ndm.ndm_type = RTN_UNICAST; + + netlink_add_rtattr_l(&req.n, sizeof(req), NDA_DST, + neighbor->addr, neighbor->addr_len); + + if (hwaddr != NULL && hwaddr->type != PF_UNSPEC) { + req.ndm.ndm_state = NUD_REACHABLE; + + netlink_add_rtattr_l(&req.n, sizeof(req), NDA_LLADDR, + hwaddr->addr, hwaddr->addr_len); + + nhrp_debug("NL-ARP(%s) %s is-at %s", + dev->name, + nhrp_address_format(neighbor, sizeof(neigh), neigh), + nhrp_address_format(hwaddr, sizeof(nbma), nbma)); + } else { + req.ndm.ndm_state = NUD_FAILED; + + nhrp_debug("NL-ARP(%s) %s not-reachable", + dev->name, + nhrp_address_format(neighbor, sizeof(neigh), neigh)); + } + + return netlink_send(&talk_fd, &req.n); +} + diff --git a/nhrp/sysdep_pfpacket.c b/nhrp/sysdep_pfpacket.c new file mode 100644 index 0000000..514b848 --- /dev/null +++ b/nhrp/sysdep_pfpacket.c @@ -0,0 +1,388 @@ +/* sysdep_pfpacket.c - Tracing of forwarded packets using PF_PACKET + * + * Copyright (C) 2007-2009 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libev.h" +#include "nhrp_defines.h" +#include "nhrp_common.h" +#include "nhrp_interface.h" +#include "nhrp_peer.h" + +#define MAX_OPCODES 100 + +struct multicast_packet { + struct nhrp_interface *iface; + struct sockaddr_ll lladdr; + unsigned int pdulen; + unsigned char pdu[1500]; +}; + +static struct ev_io packet_io; +static struct ev_timer install_filter_timer; +static struct ev_idle mcast_route; + +static struct multicast_packet mcast_queue[16]; +static int mcast_head = 0, mcast_tail = 0; + + +enum { + LABEL_NEXT = 0, + LABEL_SKIP1, + LABEL_SKIPN, + LABEL_DROP, + LABEL_CHECK_MULTICAST, + LABEL_CHECK_MULTICAST_DESTINATION, + LABEL_CHECK_TRAFFIC_INDICATION, + LABEL_CHECK_NON_LOCAL_ADDRESS, + NUM_LABELS +}; + +struct filter { + int pos[NUM_LABELS]; + int numops; + struct sock_filter code[MAX_OPCODES]; +}; + +static void emit_stmt(struct filter *f, __u16 code, __u32 k) +{ + if (f->numops < MAX_OPCODES) { + f->code[f->numops].code = code; + f->code[f->numops].jt = 0; + f->code[f->numops].jf = 0; + f->code[f->numops].k = k; + } + f->numops++; +} + +static void emit_jump(struct filter *f, __u16 code, __u32 k, __u8 jt, __u8 jf) +{ + if (f->numops < MAX_OPCODES) { + f->code[f->numops].code = code; + f->code[f->numops].jt = jt; + f->code[f->numops].jf = jf; + f->code[f->numops].k = k; + } + f->numops++; +} + +static void mark(struct filter *f, int label) +{ + f->pos[label] = f->numops; +} + +static int check_interface_multicast(void *ctx, struct nhrp_interface *iface) +{ + struct filter *f = (struct filter *) ctx; + + if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED)) + return 0; + if (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST) + return 0; + + if (iface->mcast_mask || iface->mcast_numaddr) + emit_jump(f, BPF_JMP|BPF_JEQ|BPF_K, iface->index, + LABEL_CHECK_MULTICAST_DESTINATION, LABEL_NEXT); + + return 0; +} + +static int drop_matching_address(void *ctx, struct nhrp_peer *peer) +{ + struct filter *f = (struct filter *) ctx; + unsigned long addr, mask; + + if (peer->protocol_type != ETHPROTO_IP) + return 0; + + addr = htonl(*((unsigned long *) peer->protocol_address.addr)); + if (peer->prefix_length != 32) { + mask = 0xffffffff >> peer->prefix_length; + emit_jump(f, BPF_JMP|BPF_JGE|BPF_K, addr & ~mask, LABEL_NEXT, LABEL_SKIP1); + emit_jump(f, BPF_JMP|BPF_JGT|BPF_K, addr | mask, LABEL_NEXT, LABEL_DROP); + } else { + emit_jump(f, BPF_JMP|BPF_JEQ|BPF_K, addr, LABEL_DROP, LABEL_NEXT); + } + + return 0; +} + +static int check_interface_traffic_indication(void *ctx, struct nhrp_interface *iface) +{ + struct filter *f = (struct filter *) ctx; + + if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED)) + return 0; + if (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST) + return 0; + if (!(iface->flags & NHRP_INTERFACE_FLAG_REDIRECT)) + return 0; + + emit_jump(f, BPF_JMP|BPF_JEQ|BPF_K, iface->index, + LABEL_CHECK_NON_LOCAL_ADDRESS, LABEL_NEXT); + + return 0; +} + +static void install_filter_cb(struct ev_timer *w, int revents) +{ + struct nhrp_peer_selector sel; + struct sock_fprog prog; + struct filter f; + int i; + + memset(&prog, 0, sizeof(prog)); + memset(&f, 0, sizeof(f)); + + /* Check for IPv4 */ + emit_stmt(&f, BPF_LD |BPF_W |BPF_ABS, SKF_AD_OFF+SKF_AD_PROTOCOL); + emit_jump(&f, BPF_JMP|BPF_JEQ|BPF_K, ETH_P_IP, LABEL_NEXT, LABEL_DROP); + + /* Traffic indication checking is for incoming packets + * Multicast checking is for outgoing packets */ + emit_stmt(&f, BPF_LD |BPF_W |BPF_ABS, SKF_AD_OFF+SKF_AD_PKTTYPE); + emit_jump(&f, BPF_JMP|BPF_JEQ|BPF_K, PACKET_OUTGOING, LABEL_CHECK_MULTICAST, LABEL_NEXT); + emit_jump(&f, BPF_JMP|BPF_JEQ|BPF_K, PACKET_HOST, LABEL_CHECK_TRAFFIC_INDICATION, LABEL_DROP); + + /* MULTICAST check - for interfaces that have MC forwarding enabled */ + mark(&f, LABEL_CHECK_MULTICAST); + emit_stmt(&f, BPF_LD |BPF_W |BPF_ABS, SKF_AD_OFF+SKF_AD_IFINDEX); + nhrp_interface_foreach(check_interface_multicast, &f); + emit_stmt(&f, BPF_RET|BPF_K, 0); + + /* Check for multicast IPv4 destination - accept on match (all packet) */ + mark(&f, LABEL_CHECK_MULTICAST_DESTINATION); + emit_stmt(&f, BPF_LD |BPF_W |BPF_ABS, offsetof(struct iphdr, daddr)); + emit_jump(&f, BPF_JMP|BPF_JGE|BPF_K, 0xe0000000, LABEL_NEXT, LABEL_DROP); + emit_jump(&f, BPF_JMP|BPF_JGE|BPF_K, 0xf0000000, LABEL_DROP, LABEL_NEXT); + emit_stmt(&f, BPF_RET|BPF_K, 65535); + + /* TRAFFIC INDICATION check - is destination non-local + * if yes, capture headers for NHRP traffic indication */ + mark(&f, LABEL_CHECK_TRAFFIC_INDICATION); + emit_stmt(&f, BPF_LD |BPF_W |BPF_ABS, SKF_AD_OFF+SKF_AD_IFINDEX); + nhrp_interface_foreach(check_interface_traffic_indication, &f); + emit_stmt(&f, BPF_RET|BPF_K, 0); + + mark(&f, LABEL_CHECK_NON_LOCAL_ADDRESS); + memset(&sel, 0, sizeof(sel)); + sel.type_mask = BIT(NHRP_PEER_TYPE_LOCAL_ADDR); + emit_stmt(&f, BPF_LD |BPF_W |BPF_ABS, offsetof(struct iphdr, daddr)); + nhrp_peer_foreach(drop_matching_address, &f, &sel); + emit_stmt(&f, BPF_RET|BPF_K, 68); + + mark(&f, LABEL_DROP); + emit_stmt(&f, BPF_RET|BPF_K, 0); + + /* All ok so far? */ + if (f.numops >= MAX_OPCODES) { + nhrp_error("Filter code buffer too small (code actual length %d)", + f.numops); + return; + } + + /* Fixup jumps to be relative */ + for (i = 0; i < f.numops; i++) { + if (BPF_CLASS(f.code[i].code) == BPF_JMP) { + if (f.code[i].jt > LABEL_SKIPN) + f.code[i].jt = f.pos[f.code[i].jt] - i - 1; + if (f.code[i].jf > LABEL_SKIPN) + f.code[i].jf = f.pos[f.code[i].jf] - i - 1; + } + } + + /* Attach filter */ + prog.len = f.numops; + prog.filter = f.code; + if (setsockopt(packet_io.fd, SOL_SOCKET, SO_ATTACH_FILTER, + &prog, sizeof(prog))) + return; + + nhrp_info("Filter code installed (%d opcodes)", f.numops); +} + +int forward_local_addresses_changed(void) +{ + if (install_filter_timer.cb != NULL) + ev_timer_start(&install_filter_timer); + return TRUE; +} + +static void send_multicast(struct ev_idle *w, int revents) +{ + struct multicast_packet *pkt; + struct nhrp_peer *peer; + struct iovec iov; + struct msghdr msg; + + if (mcast_head == mcast_tail) { + ev_idle_stop(&mcast_route); + return; + } + + /* Pop a packet */ + pkt = &mcast_queue[mcast_tail]; + mcast_tail = (mcast_tail + 1) % ARRAY_SIZE(mcast_queue); + + /* And softroute it forward */ + iov.iov_base = pkt->pdu; + iov.iov_len = pkt->pdulen; + msg = (struct msghdr) { + .msg_name = &pkt->lladdr, + .msg_namelen = sizeof(pkt->lladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + list_for_each_entry(peer, &pkt->iface->mcast_list, mcast_list_entry) { + /* Update NBMA destination */ + pkt->lladdr.sll_halen = peer->next_hop_address.addr_len; + memcpy(pkt->lladdr.sll_addr, peer->next_hop_address.addr, + pkt->lladdr.sll_halen); + + /* Best effort attempt to emulate multicast */ + (void) sendmsg(packet_io.fd, &msg, 0); + } +} + +static void pfp_read_cb(struct ev_io *w, int revents) +{ + struct nhrp_address nbma_src, src, dst; + struct nhrp_interface *iface; + struct sockaddr_ll *lladdr; + struct iovec iov; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + }; + char fr[32], to[32]; + int r, fd = w->fd; + + if (!(revents & EV_READ)) + return; + + while (TRUE) { + /* Get a scracth buffer directly from mcast queue, so we do + * not need copy the data later. */ + msg.msg_name = &mcast_queue[mcast_head].lladdr; + msg.msg_namelen = sizeof(mcast_queue[mcast_head].lladdr); + iov.iov_base = mcast_queue[mcast_head].pdu; + iov.iov_len = sizeof(mcast_queue[mcast_head].pdu); + + /* Receive */ + r = recvmsg(fd, &msg, MSG_DONTWAIT); + mcast_queue[mcast_head].pdulen = r; + + /* Process */ + if (r < 0) { + if (errno == EINTR) + continue; + if (errno == EAGAIN) + return; + nhrp_perror("PF_PACKET overrun"); + continue; + } + + if (r == 0) { + nhrp_error("PF_PACKET returned EOF"); + return; + } + + lladdr = &mcast_queue[mcast_head].lladdr; + if (lladdr->sll_pkttype != PACKET_OUTGOING && + lladdr->sll_pkttype != PACKET_HOST) + continue; + + iface = nhrp_interface_get_by_index(lladdr->sll_ifindex, FALSE); + if (iface == NULL) + continue; + if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED)) + continue; + + if (!nhrp_address_parse_packet(lladdr->sll_protocol, + r, iov.iov_base, + &src, &dst)) + return; + + if (nhrp_address_is_multicast(&dst) && + lladdr->sll_pkttype == PACKET_OUTGOING) { + nhrp_debug("Multicast from %s to %s", + nhrp_address_format(&src, sizeof(fr), fr), + nhrp_address_format(&dst, sizeof(to), to)); + + /* Queue packet for processing later (handle important + * stuff first) */ + mcast_queue[mcast_head].iface = iface; + mcast_head = (mcast_head + 1) % ARRAY_SIZE(mcast_queue); + + /* Drop packets from queue tail, if we haven't processed + * them yet. */ + if (mcast_head == mcast_tail) + mcast_tail = (mcast_tail + 1) % + ARRAY_SIZE(mcast_queue); + + ev_idle_start(&mcast_route); + } else if (lladdr->sll_pkttype == PACKET_HOST) { + nhrp_address_set(&nbma_src, PF_INET, + lladdr->sll_halen, + lladdr->sll_addr); + nhrp_packet_send_traffic(iface, + &nbma_src, &src, &dst, + lladdr->sll_protocol, + iov.iov_base, r); + } + } +} + +int forward_init(void) +{ + int fd; + + fd = socket(PF_PACKET, SOCK_DGRAM, ntohs(ETH_P_ALL)); + if (fd < 0) { + nhrp_error("Unable to create PF_PACKET socket"); + return FALSE; + } + + fcntl(fd, F_SETFD, FD_CLOEXEC); + + ev_io_init(&packet_io, pfp_read_cb, fd, EV_READ); + ev_io_start(&packet_io); + + ev_timer_init(&install_filter_timer, install_filter_cb, .01, .0); + install_filter_cb(&install_filter_timer, 0); + + ev_idle_init(&mcast_route, send_multicast); + ev_set_priority(&mcast_route, -1); + + return TRUE; +} + +void forward_cleanup(void) +{ + ev_io_stop(&packet_io); + close(packet_io.fd); + ev_timer_stop(&install_filter_timer); + ev_idle_stop(&mcast_route); +} diff --git a/nhrp/sysdep_syslog.c b/nhrp/sysdep_syslog.c new file mode 100644 index 0000000..c8f9f7e --- /dev/null +++ b/nhrp/sysdep_syslog.c @@ -0,0 +1,55 @@ +/* sysdep_syslog.c - Logging via syslog + * + * Copyright (C) 2007 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include +#include +#include +#include +#include + +#include "nhrp_defines.h" +#include "nhrp_common.h" + +int log_init(void) +{ + openlog("opennhrp", LOG_PERROR | LOG_PID, LOG_DAEMON); + + return TRUE; +} + +void nhrp_log(int level, const char *format, ...) +{ + va_list va; + int l; + + switch (level) { + case NHRP_LOG_ERROR: + l = LOG_ERR; + break; + case NHRP_LOG_INFO: + l = LOG_INFO; + break; + case NHRP_LOG_DEBUG: + default: + l = LOG_DEBUG; + break; + } + + va_start(va, format); + vsyslog(l, format, va); + va_end(va); +} + +void nhrp_perror(const char *message) +{ + nhrp_error("%s: %s", message, strerror(errno)); +} diff --git a/patches/ipsec-tools-0.7.diff b/patches/ipsec-tools-0.7.diff new file mode 100644 index 0000000..1efba6c --- /dev/null +++ b/patches/ipsec-tools-0.7.diff @@ -0,0 +1,1832 @@ +Index: ipsec-tools-cvs/src/racoon/pfkey.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/pfkey.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/pfkey.c 2008-01-04 15:18:21.000000000 +0200 +@@ -92,6 +92,7 @@ + #include "algorithm.h" + #include "sainfo.h" + #include "admin.h" ++#include "evt.h" + #include "privsep.h" + #include "strnames.h" + #include "backupsa.h" +@@ -1263,9 +1264,10 @@ + + /* turn off the timer for calling pfkey_timeover() */ + SCHED_KILL(iph2->sce); +- ++ + /* update status */ + iph2->status = PHASE2ST_ESTABLISHED; ++ evt_phase2(iph2, EVTT_PHASE2_UP, NULL); + + #ifdef ENABLE_STATS + gettimeofday(&iph2->end, NULL); +@@ -1636,7 +1638,6 @@ + struct ph2handle *iph2[MAXNESTEDSA]; + struct sockaddr *src, *dst; + int n; /* # of phase 2 handler */ +- int remoteid=0; + #ifdef HAVE_SECCTX + struct sadb_x_sec_ctx *m_sec_ctx; + #endif /* HAVE_SECCTX */ +@@ -1825,63 +1826,12 @@ + return -1; + } + +- plog(LLV_DEBUG, LOCATION, NULL, +- "new acquire %s\n", spidx2str(&sp_out->spidx)); +- +- /* get sainfo */ +- { +- vchar_t *idsrc, *iddst; +- +- idsrc = ipsecdoi_sockaddr2id((struct sockaddr *)&sp_out->spidx.src, +- sp_out->spidx.prefs, sp_out->spidx.ul_proto); +- if (idsrc == NULL) { +- plog(LLV_ERROR, LOCATION, NULL, +- "failed to get ID for %s\n", +- spidx2str(&sp_out->spidx)); +- delph2(iph2[n]); +- return -1; +- } +- iddst = ipsecdoi_sockaddr2id((struct sockaddr *)&sp_out->spidx.dst, +- sp_out->spidx.prefd, sp_out->spidx.ul_proto); +- if (iddst == NULL) { +- plog(LLV_ERROR, LOCATION, NULL, +- "failed to get ID for %s\n", +- spidx2str(&sp_out->spidx)); +- vfree(idsrc); +- delph2(iph2[n]); +- return -1; +- } +- { +- struct remoteconf *conf; +- conf = getrmconf(iph2[n]->dst); +- if (conf != NULL) +- remoteid=conf->ph1id; +- else{ +- plog(LLV_DEBUG, LOCATION, NULL, "Warning: no valid rmconf !\n"); +- remoteid=0; +- } +- } +- iph2[n]->sainfo = getsainfo(idsrc, iddst, NULL, remoteid); +- vfree(idsrc); +- vfree(iddst); +- if (iph2[n]->sainfo == NULL) { +- plog(LLV_ERROR, LOCATION, NULL, +- "failed to get sainfo.\n"); ++ if (isakmp_get_sainfo(iph2[n], sp_out, sp_in) < 0) { + delph2(iph2[n]); + return -1; +- /* XXX should use the algorithm list from register message */ + } + +- plog(LLV_DEBUG, LOCATION, NULL, +- "selected sainfo: %s\n", sainfo2str(iph2[n]->sainfo)); +- } + +- if (set_proposal_from_policy(iph2[n], sp_out, sp_in) < 0) { +- plog(LLV_ERROR, LOCATION, NULL, +- "failed to create saprop.\n"); +- delph2(iph2[n]); +- return -1; +- } + #ifdef HAVE_SECCTX + if (m_sec_ctx) { + set_secctx_in_proposal(iph2[n], spidx); +@@ -2814,7 +2764,7 @@ + struct sadb_msg buf, *newmsg; + int reallen; + int retry = 0; +- ++ + *lenp = -1; + do + { +@@ -2823,12 +2773,10 @@ + retry++; + } + while (*lenp < 0 && errno == EAGAIN && retry < 3); ++ + if (*lenp < 0) +- { +- if ( errno == EAGAIN ) *lenp = 0; /* non-fatal */ +- return NULL; /*fatal*/ +- } +- ++ return NULL; /*fatal*/ ++ + else if (*lenp < sizeof(buf)) + return NULL; + +Index: ipsec-tools-cvs/src/racoon/evt.h +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/evt.h 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/evt.h 2008-01-04 15:18:21.000000000 +0200 +@@ -4,6 +4,7 @@ + + /* + * Copyright (C) 2004 Emmanuel Dreyfus ++ * Copyright (C) 2007 Timo Teras + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -34,12 +35,27 @@ + #ifndef _EVT_H + #define _EVT_H + +-struct evtdump { +- size_t len; +- struct sockaddr_storage src; +- struct sockaddr_storage dst; +- time_t timestamp; +- int type; ++#ifdef ENABLE_ADMINPORT ++ ++struct evt_listener { ++ LIST_ENTRY(evt_listener) ll_chain; ++ LIST_ENTRY(evt_listener) fd_chain; ++ int fd; ++}; ++LIST_HEAD(evt_listener_list, evt_listener); ++#define EVT_LISTENER_LIST(x) struct evt_listener_list x; ++ ++struct ph1handle; ++struct ph2handle; ++ ++struct evt_common { ++ uint32_t ec_type; ++ time_t ec_timestamp; ++ ++ struct sockaddr_storage ec_ph1src; ++ struct sockaddr_storage ec_ph1dst; ++ u_int32_t ec_ph2msgid; ++ + /* + * Optionnal list of struct isakmp_data + * for type EVTT_ISAKMP_CFG_DONE +@@ -47,42 +63,48 @@ + }; + + /* type */ +-#define EVTT_UNSEPC 0 +-#define EVTT_PHASE1_UP 1 +-#define EVTT_PHASE1_DOWN 2 +-#define EVTT_XAUTH_SUCCESS 3 +-#define EVTT_ISAKMP_CFG_DONE 4 +-#define EVTT_PHASE2_UP 5 +-#define EVTT_PHASE2_DOWN 6 +-#define EVTT_DPD_TIMEOUT 7 +-#define EVTT_PEER_NO_RESPONSE 8 +-#define EVTT_PEER_DELETE 9 +-#define EVTT_RACOON_QUIT 10 +-#define EVTT_XAUTH_FAILED 11 +-#define EVTT_OVERFLOW 12 /* Event queue overflowed */ +-#define EVTT_PEERPH1AUTH_FAILED 13 +-#define EVTT_PEERPH1_NOPROP 14 /* NO_PROPOSAL_CHOSEN & friends */ +-#define EVTT_NO_ISAKMP_CFG 15 /* no need to wait for mode_cfg */ +- +-struct evt { +- struct evtdump *dump; +- TAILQ_ENTRY(evt) next; +-}; ++#define EVTT_RACOON_QUIT 0x0001 + +-TAILQ_HEAD(evtlist, evt); ++#define EVTT_PHASE1_UP 0x0100 ++#define EVTT_PHASE1_DOWN 0x0101 ++#define EVTT_PHASE1_NO_RESPONSE 0x0102 ++#define EVTT_PHASE1_NO_PROPOSAL 0x0103 ++#define EVTT_PHASE1_AUTH_FAILED 0x0104 ++#define EVTT_PHASE1_DPD_TIMEOUT 0x0105 ++#define EVTT_PHASE1_PEER_DELETED 0x0106 ++#define EVTT_PHASE1_MODE_CFG 0x0107 ++#define EVTT_PHASE1_XAUTH_SUCCESS 0x0108 ++#define EVTT_PHASE1_XAUTH_FAILED 0x0109 ++ ++#define EVTT_PHASE2_NO_PHASE1 0x0200 ++#define EVTT_PHASE2_UP 0x0201 ++#define EVTT_PHASE2_DOWN 0x0202 ++#define EVTT_PHASE2_NO_RESPONSE 0x0203 ++ ++void evt_generic __P((int type, vchar_t *optdata)); ++void evt_phase1 __P((const struct ph1handle *ph1, int type, vchar_t *optdata)); ++void evt_phase2 __P((const struct ph2handle *ph2, int type, vchar_t *optdata)); ++ ++int evt_subscribe __P((struct evt_listener_list *list, int fd)); ++void evt_list_init __P((struct evt_listener_list *list)); ++void evt_list_cleanup __P((struct evt_listener_list *list)); ++int evt_get_fdmask __P((int nfds, fd_set *fdset)); ++void evt_handle_fdmask __P((fd_set *fdset)); + +-#define EVTLIST_MAX 32 ++#else + +-#ifdef ENABLE_ADMINPORT +-struct evtdump *evt_pop(void); +-vchar_t *evt_dump(void); +-void evt_push(struct sockaddr *, struct sockaddr *, int, vchar_t *); +-#endif ++#define EVT_LISTENER_LIST(x) + +-#ifdef ENABLE_ADMINPORT +-#define EVT_PUSH(src, dst, type, optdata) evt_push(src, dst, type, optdata); +-#else +-#define EVT_PUSH(src, dst, type, optdata) ; +-#endif ++#define evt_generic(type, optdata) ; ++#define evt_phase1(ph1, type, optdata) ; ++#define evt_phase2(ph2, type, optdata) ; ++ ++#define evt_subscribe(eventlist, fd) ; ++#define evt_list_init(eventlist) ; ++#define evt_list_cleanup(eventlist) ; ++#define evt_get_fdmask(nfds, fdset) nfds ++#define evt_handle_fdmask(fdset) ; ++ ++#endif /* ENABLE_ADMINPORT */ + + #endif /* _EVT_H */ +Index: ipsec-tools-cvs/src/racoon/evt.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/evt.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/evt.c 2008-01-04 15:18:21.000000000 +0200 +@@ -4,6 +4,7 @@ + + /* + * Copyright (C) 2004 Emmanuel Dreyfus ++ * Copyright (C) 2007 Timo Teras + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -46,113 +47,219 @@ + #include "plog.h" + #include "misc.h" + #include "admin.h" ++#include "handler.h" + #include "gcmalloc.h" + #include "evt.h" + + #ifdef ENABLE_ADMINPORT +-struct evtlist evtlist = TAILQ_HEAD_INITIALIZER(evtlist); +-int evtlist_len = 0; + +-void +-evt_push(src, dst, type, optdata) +- struct sockaddr *src; +- struct sockaddr *dst; ++static EVT_LISTENER_LIST(evt_listeners); ++static EVT_LISTENER_LIST(evt_fds); ++ ++struct evtdump { ++ struct admin_com adm; ++ struct evt_common evt; ++}; ++ ++static struct evtdump * ++evtdump_create(type, optdata) + int type; + vchar_t *optdata; + { +- struct evtdump *evtdump; +- struct evt *evt; ++ struct evtdump *e; + size_t len; + +- /* If admin socket is disabled, silently discard anything */ +- if (adminsock_path == NULL) +- return; ++ len = sizeof(struct admin_com) + sizeof(struct evt_common); ++ if (optdata != NULL) ++ len += optdata->l; + +- /* If we are above the limit, don't record anything */ +- if (evtlist_len > EVTLIST_MAX) { +- plog(LLV_DEBUG, LOCATION, NULL, +- "Cannot record event: event queue overflowed\n"); +- return; ++ if ((e = racoon_malloc(len)) == NULL) { ++ plog(LLV_ERROR, LOCATION, NULL, "Cannot allocate event: %s\n", ++ strerror(errno)); ++ return NULL; + } + +- /* If we hit the limit, record an overflow event instead */ +- if (evtlist_len == EVTLIST_MAX) { +- plog(LLV_ERROR, LOCATION, NULL, +- "Cannot record event: event queue overflow\n"); +- src = NULL; +- dst = NULL; +- type = EVTT_OVERFLOW; +- optdata = NULL; ++ memset(e, 0, sizeof(struct evtdump)); ++ e->adm.ac_len = len; ++ e->adm.ac_cmd = ADMIN_SHOW_EVT; ++ e->adm.ac_errno = 0; ++ e->adm.ac_proto = 0; ++ e->evt.ec_type = type; ++ time(&e->evt.ec_timestamp); ++ if (optdata != NULL) ++ memcpy(e + 1, optdata->v, optdata->l); ++ ++ return e; ++} ++ ++static void ++evt_unsubscribe(l) ++ struct evt_listener *l; ++{ ++ plog(LLV_DEBUG, LOCATION, NULL, ++ "[%d] admin connection released\n", l->fd); ++ ++ LIST_REMOVE(l, ll_chain); ++ LIST_REMOVE(l, fd_chain); ++ close(l->fd); ++ racoon_free(l); ++} ++ ++static void ++evtdump_broadcast(ll, e) ++ const struct evt_listener_list *ll; ++ struct evtdump *e; ++{ ++ struct evt_listener *l, *nl; ++ ++ for (l = LIST_FIRST(ll); l != NULL; l = nl) { ++ nl = LIST_NEXT(l, ll_chain); ++ ++ if (send(l->fd, e, e->adm.ac_len, ++ MSG_NOSIGNAL | MSG_DONTWAIT) < 0) { ++ plog(LLV_DEBUG, LOCATION, NULL, "Cannot send event to fd: %s\n", ++ strerror(errno)); ++ evt_unsubscribe(l); ++ } + } ++} + +- len = sizeof(*evtdump); +- if (optdata) +- len += optdata->l; ++void ++evt_generic(type, optdata) ++ int type; ++ vchar_t *optdata; ++{ ++ struct evtdump *e; + +- if ((evtdump = racoon_malloc(len)) == NULL) { +- plog(LLV_ERROR, LOCATION, NULL, "Cannot record event: %s\n", +- strerror(errno)); ++ if ((e = evtdump_create(type, optdata)) == NULL) + return; +- } + +- if ((evt = racoon_malloc(sizeof(*evt))) == NULL) { +- plog(LLV_ERROR, LOCATION, NULL, "Cannot record event: %s\n", +- strerror(errno)); +- racoon_free(evtdump); ++ evtdump_broadcast(&evt_listeners, e); ++ ++ racoon_free(e); ++} ++ ++void ++evt_phase1(ph1, type, optdata) ++ const struct ph1handle *ph1; ++ int type; ++ vchar_t *optdata; ++{ ++ struct evtdump *e; ++ ++ if ((e = evtdump_create(type, optdata)) == NULL) + return; ++ ++ if (ph1->local) ++ memcpy(&e->evt.ec_ph1src, ph1->local, sysdep_sa_len(ph1->local)); ++ if (ph1->remote) ++ memcpy(&e->evt.ec_ph1dst, ph1->remote, sysdep_sa_len(ph1->remote)); ++ ++ evtdump_broadcast(&ph1->evt_listeners, e); ++ evtdump_broadcast(&evt_listeners, e); ++ ++ racoon_free(e); ++} ++ ++void ++evt_phase2(ph2, type, optdata) ++ const struct ph2handle *ph2; ++ int type; ++ vchar_t *optdata; ++{ ++ struct evtdump *e; ++ struct ph1handle *ph1 = ph2->ph1; ++ ++ if ((e = evtdump_create(type, optdata)) == NULL) ++ return; ++ ++ if (ph1) { ++ if (ph1->local) ++ memcpy(&e->evt.ec_ph1src, ph1->local, sysdep_sa_len(ph1->local)); ++ if (ph1->remote) ++ memcpy(&e->evt.ec_ph1dst, ph1->remote, sysdep_sa_len(ph1->remote)); ++ } ++ e->evt.ec_ph2msgid = ph2->msgid; ++ ++ evtdump_broadcast(&ph2->evt_listeners, e); ++ if (ph1) ++ evtdump_broadcast(&ph1->evt_listeners, e); ++ evtdump_broadcast(&evt_listeners, e); ++ ++ racoon_free(e); ++} ++ ++int ++evt_subscribe(list, fd) ++ struct evt_listener_list *list; ++ int fd; ++{ ++ struct evt_listener *l; ++ ++ if ((l = racoon_malloc(sizeof(*l))) == NULL) { ++ plog(LLV_ERROR, LOCATION, NULL, ++ "Cannot allocate event listener: %s\n", ++ strerror(errno)); ++ return errno; + } + +- if (src) +- memcpy(&evtdump->src, src, sysdep_sa_len(src)); +- if (dst) +- memcpy(&evtdump->dst, dst, sysdep_sa_len(dst)); +- evtdump->len = len; +- evtdump->type = type; +- time(&evtdump->timestamp); ++ if (list == NULL) ++ list = &evt_listeners; + +- if (optdata) +- memcpy(evtdump + 1, optdata->v, optdata->l); ++ LIST_INSERT_HEAD(list, l, ll_chain); ++ LIST_INSERT_HEAD(&evt_fds, l, fd_chain); ++ l->fd = fd; + +- evt->dump = evtdump; +- TAILQ_INSERT_TAIL(&evtlist, evt, next); ++ plog(LLV_DEBUG, LOCATION, NULL, ++ "[%d] admin connection is polling events\n", fd); + +- evtlist_len++; ++ return -2; ++} + +- return; ++void ++evt_list_init(list) ++ struct evt_listener_list *list; ++{ ++ LIST_INIT(list); + } + +-struct evtdump * +-evt_pop(void) { +- struct evtdump *evtdump; +- struct evt *evt; ++void ++evt_list_cleanup(list) ++ struct evt_listener_list *list; ++{ ++ while (!LIST_EMPTY(list)) ++ evt_unsubscribe(LIST_FIRST(list)); ++} + +- if ((evt = TAILQ_FIRST(&evtlist)) == NULL) +- return NULL; ++int ++evt_get_fdmask(nfds, fdset) ++ int nfds; ++ fd_set *fdset; ++{ ++ struct evt_listener *l; + +- evtdump = evt->dump; +- TAILQ_REMOVE(&evtlist, evt, next); +- racoon_free(evt); +- evtlist_len--; +- +- return evtdump; +-} +- +-vchar_t * +-evt_dump(void) { +- struct evtdump *evtdump; +- vchar_t *buf = NULL; +- +- if ((evtdump = evt_pop()) != NULL) { +- if ((buf = vmalloc(evtdump->len)) == NULL) { +- plog(LLV_ERROR, LOCATION, NULL, +- "evt_dump failed: %s\n", strerror(errno)); +- return NULL; +- } +- memcpy(buf->v, evtdump, evtdump->len); +- racoon_free(evtdump); ++ LIST_FOREACH(l, &evt_fds, fd_chain) { ++ FD_SET(l->fd, fdset); ++ if (l->fd + 1 > nfds) ++ nfds = l->fd + 1; + } + +- return buf; ++ return nfds; + } + ++void ++evt_handle_fdmask(fdset) ++ fd_set *fdset; ++{ ++ struct evt_listener *l, *nl; ++ ++ for (l = LIST_FIRST(&evt_fds); l != NULL; l = nl) { ++ nl = LIST_NEXT(l, ll_chain); ++ ++ if (FD_ISSET(l->fd, fdset)) ++ evt_unsubscribe(l); ++ } ++} ++ ++ + #endif /* ENABLE_ADMINPORT */ +Index: ipsec-tools-cvs/src/racoon/handler.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/handler.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/handler.c 2008-01-04 15:18:21.000000000 +0200 +@@ -267,6 +267,7 @@ + iph1->dpd_fails = 0; + iph1->dpd_r_u = NULL; + #endif ++ evt_list_init(&iph1->evt_listeners); + + return iph1; + } +@@ -283,8 +284,7 @@ + + /* SA down shell script hook */ + script_hook(iph1, SCRIPT_PHASE1_DOWN); +- +- EVT_PUSH(iph1->local, iph1->remote, EVTT_PHASE1_DOWN, NULL); ++ evt_list_cleanup(&iph1->evt_listeners); + + #ifdef ENABLE_NATT + if (iph1->natt_flags & NAT_KA_QUEUED) +@@ -489,8 +489,8 @@ + + LIST_FOREACH(p, &ph2tree, chain) { + if (spid == p->spid && +- CMPSADDR(src, p->src) == 0 && +- CMPSADDR(dst, p->dst) == 0){ ++ cmpsaddrwild(src, p->src) == 0 && ++ cmpsaddrwild(dst, p->dst) == 0){ + /* Sanity check to detect zombie handlers + * XXX Sould be done "somewhere" more interesting, + * because we have lots of getph2byxxxx(), but this one +@@ -576,6 +576,7 @@ + return NULL; + + iph2->status = PHASE1ST_SPAWN; ++ evt_list_init(&iph2->evt_listeners); + + return iph2; + } +@@ -589,6 +590,8 @@ + initph2(iph2) + struct ph2handle *iph2; + { ++ evt_list_cleanup(&iph2->evt_listeners); ++ + sched_scrub_param(iph2); + iph2->sce = NULL; + iph2->scr = NULL; +Index: ipsec-tools-cvs/src/racoon/isakmp_agg.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/isakmp_agg.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/isakmp_agg.c 2008-01-04 15:18:21.000000000 +0200 +@@ -587,8 +587,7 @@ + /* message printed inner oakley_validate_auth() */ + goto end; + } +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_PEERPH1AUTH_FAILED, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_AUTH_FAILED, NULL); + isakmp_info_send_n1(iph1, ptype, NULL); + goto end; + } +@@ -1486,8 +1485,7 @@ + /* message printed inner oakley_validate_auth() */ + goto end; + } +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_PEERPH1AUTH_FAILED, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_AUTH_FAILED, NULL); + isakmp_info_send_n1(iph1, ptype, NULL); + goto end; + } +Index: ipsec-tools-cvs/src/racoon/isakmp_base.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/isakmp_base.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/isakmp_base.c 2008-01-04 15:18:21.000000000 +0200 +@@ -716,8 +716,7 @@ + /* message printed inner oakley_validate_auth() */ + goto end; + } +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_PEERPH1AUTH_FAILED, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_AUTH_FAILED, NULL); + isakmp_info_send_n1(iph1, ptype, NULL); + goto end; + } +@@ -1242,8 +1241,7 @@ + /* message printed inner oakley_validate_auth() */ + goto end; + } +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_PEERPH1AUTH_FAILED, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_AUTH_FAILED, NULL); + isakmp_info_send_n1(iph1, ptype, NULL); + goto end; + } +Index: ipsec-tools-cvs/src/racoon/isakmp.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/isakmp.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/isakmp.c 2008-01-04 15:18:21.000000000 +0200 +@@ -88,6 +88,9 @@ + #include "pfkey.h" + #include "crypto_openssl.h" + #include "policy.h" ++#include "algorithm.h" ++#include "proposal.h" ++#include "sainfo.h" + #include "isakmp_ident.h" + #include "isakmp_agg.h" + #include "isakmp_base.h" +@@ -1026,7 +1029,7 @@ + } + + /* new negotiation of phase 1 for initiator */ +-int ++struct ph1handle * + isakmp_ph1begin_i(rmconf, remote, local) + struct remoteconf *rmconf; + struct sockaddr *remote, *local; +@@ -1039,7 +1042,7 @@ + /* get new entry to isakmp status table. */ + iph1 = newph1(); + if (iph1 == NULL) +- return -1; ++ return NULL; + + iph1->status = PHASE1ST_START; + iph1->rmconf = rmconf; +@@ -1055,7 +1058,7 @@ + if ((iph1->mode_cfg = isakmp_cfg_mkstate()) == NULL) { + remph1(iph1); + delph1(iph1); +- return -1; ++ return NULL; + } + #endif + #ifdef ENABLE_FRAG +@@ -1072,7 +1075,7 @@ + if (copy_ph1addresses(iph1, rmconf, remote, local) < 0) { + remph1(iph1); + delph1(iph1); +- return -1; ++ return NULL; + } + + (void)insph1(iph1); +@@ -1108,7 +1111,7 @@ + remph1(iph1); + delph1(iph1); + +- return -1; ++ return NULL; + } + + #ifdef ENABLE_STATS +@@ -1119,7 +1122,7 @@ + timedelta(&start, &end)); + #endif + +- return 0; ++ return iph1; + } + + /* new negotiation of phase 1 for responder */ +@@ -1929,8 +1932,7 @@ + plog(LLV_ERROR, LOCATION, NULL, + "phase1 negotiation failed due to time up. %s\n", + isakmp_pindex(&iph1->index, iph1->msgid)); +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_PEER_NO_RESPONSE, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_NO_RESPONSE, NULL); + + return -1; + } +@@ -1939,8 +1941,7 @@ + plog(LLV_ERROR, LOCATION, NULL, + "phase1 negotiation failed due to send error. %s\n", + isakmp_pindex(&iph1->index, iph1->msgid)); +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_PEER_NO_RESPONSE, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_NO_RESPONSE, NULL); + return -1; + } + +@@ -1989,7 +1990,7 @@ + plog(LLV_ERROR, LOCATION, NULL, + "phase2 negotiation failed due to time up. %s\n", + isakmp_pindex(&iph2->ph1->index, iph2->msgid)); +- EVT_PUSH(iph2->src, iph2->dst, EVTT_PEER_NO_RESPONSE, NULL); ++ evt_phase2(iph2, EVTT_PHASE2_NO_RESPONSE, NULL); + unbindph12(iph2); + return -1; + } +@@ -1998,8 +1999,7 @@ + plog(LLV_ERROR, LOCATION, NULL, + "phase2 negotiation failed due to send error. %s\n", + isakmp_pindex(&iph2->ph1->index, iph2->msgid)); +- EVT_PUSH(iph2->src, iph2->dst, EVTT_PEER_NO_RESPONSE, NULL); +- ++ evt_phase2(iph2, EVTT_PHASE2_NO_RESPONSE, NULL); + return -1; + } + +@@ -2090,7 +2090,7 @@ + plog(LLV_INFO, LOCATION, NULL, + "ISAKMP-SA deleted %s-%s spi:%s\n", + src, dst, isakmp_pindex(&iph1->index, 0)); +- EVT_PUSH(iph1->local, iph1->remote, EVTT_PHASE1_DOWN, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_DOWN, NULL); + racoon_free(src); + racoon_free(dst); + +@@ -2237,7 +2237,7 @@ + saddrwop2str(iph2->dst)); + + /* start phase 1 negotiation as a initiator. */ +- if (isakmp_ph1begin_i(rmconf, iph2->dst, iph2->src) < 0) { ++ if (isakmp_ph1begin_i(rmconf, iph2->dst, iph2->src) == NULL) { + SCHED_KILL(sc); + return -1; + } +@@ -2270,6 +2270,71 @@ + return 0; + } + ++int ++isakmp_get_sainfo(iph2, sp_out, sp_in) ++ struct ph2handle *iph2; ++ struct secpolicy *sp_out, *sp_in; ++{ ++ int remoteid=0; ++ ++ plog(LLV_DEBUG, LOCATION, NULL, ++ "new acquire %s\n", spidx2str(&sp_out->spidx)); ++ ++ /* get sainfo */ ++ { ++ vchar_t *idsrc, *iddst; ++ ++ idsrc = ipsecdoi_sockaddr2id((struct sockaddr *)&sp_out->spidx.src, ++ sp_out->spidx.prefs, sp_out->spidx.ul_proto); ++ if (idsrc == NULL) { ++ plog(LLV_ERROR, LOCATION, NULL, ++ "failed to get ID for %s\n", ++ spidx2str(&sp_out->spidx)); ++ return -1; ++ } ++ iddst = ipsecdoi_sockaddr2id((struct sockaddr *)&sp_out->spidx.dst, ++ sp_out->spidx.prefd, sp_out->spidx.ul_proto); ++ if (iddst == NULL) { ++ plog(LLV_ERROR, LOCATION, NULL, ++ "failed to get ID for %s\n", ++ spidx2str(&sp_out->spidx)); ++ vfree(idsrc); ++ return -1; ++ } ++ { ++ struct remoteconf *conf; ++ conf = getrmconf(iph2->dst); ++ if (conf != NULL) ++ remoteid=conf->ph1id; ++ else{ ++ plog(LLV_DEBUG, LOCATION, NULL, "Warning: no valid rmconf !\n"); ++ remoteid=0; ++ } ++ } ++ iph2->sainfo = getsainfo(idsrc, iddst, NULL, remoteid); ++ vfree(idsrc); ++ vfree(iddst); ++ if (iph2->sainfo == NULL) { ++ plog(LLV_ERROR, LOCATION, NULL, ++ "failed to get sainfo.\n"); ++ return -1; ++ /* XXX should use the algorithm list from register message */ ++ } ++ ++ plog(LLV_DEBUG, LOCATION, NULL, ++ "selected sainfo: %s\n", sainfo2str(iph2->sainfo)); ++ } ++ ++ if (set_proposal_from_policy(iph2, sp_out, sp_in) < 0) { ++ plog(LLV_ERROR, LOCATION, NULL, ++ "failed to create saprop.\n"); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++ + /* + * receive GETSPI from kernel. + */ +@@ -3021,9 +3086,9 @@ + src, dst, + isakmp_pindex(&iph1->index, 0)); + +- EVT_PUSH(iph1->local, iph1->remote, EVTT_PHASE1_UP, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_UP, NULL); + if(!iph1->rmconf->mode_cfg) +- EVT_PUSH(iph1->local, iph1->remote, EVTT_NO_ISAKMP_CFG, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_MODE_CFG, NULL); + + racoon_free(src); + racoon_free(dst); +Index: ipsec-tools-cvs/src/racoon/isakmp_cfg.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/isakmp_cfg.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/isakmp_cfg.c 2008-01-04 15:18:21.000000000 +0200 +@@ -473,8 +473,7 @@ + "Cannot allocate memory: %s\n", strerror(errno)); + } else { + memcpy(buf->v, attrpl + 1, buf->l); +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_ISAKMP_CFG_DONE, buf); ++ evt_phase1(iph1, EVTT_PHASE1_MODE_CFG, buf); + vfree(buf); + } + } +Index: ipsec-tools-cvs/src/racoon/isakmp_ident.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/isakmp_ident.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/isakmp_ident.c 2008-01-04 15:18:21.000000000 +0200 +@@ -788,8 +788,7 @@ + /* msg printed inner oakley_validate_auth() */ + goto end; + } +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_PEERPH1AUTH_FAILED, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_AUTH_FAILED, NULL); + isakmp_info_send_n1(iph1, type, NULL); + goto end; + } +@@ -1537,8 +1536,7 @@ + /* msg printed inner oakley_validate_auth() */ + goto end; + } +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_PEERPH1AUTH_FAILED, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_AUTH_FAILED, NULL); + isakmp_info_send_n1(iph1, type, NULL); + goto end; + } +Index: ipsec-tools-cvs/src/racoon/isakmp_inf.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/isakmp_inf.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/isakmp_inf.c 2008-01-04 15:18:21.000000000 +0200 +@@ -515,8 +515,7 @@ + del_ph1=getph1byindex((isakmp_index *)(delete + 1)); + if(del_ph1 != NULL){ + +- EVT_PUSH(del_ph1->local, del_ph1->remote, +- EVTT_PEERPH1_NOPROP, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_PEER_DELETED, NULL); + if (del_ph1->scr) + SCHED_KILL(del_ph1->scr); + +@@ -537,8 +536,6 @@ + delete->spi_size, delete->proto_id); + return 0; + } +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_PEER_DELETE, NULL); + purge_ipsec_spi(iph1->remote, delete->proto_id, + (u_int32_t *)(delete + 1), num_spi); + break; +@@ -1615,7 +1612,7 @@ + plog(LLV_DEBUG, LOCATION, iph1->remote, "DPD monitoring....\n"); + + if (iph1->dpd_fails >= iph1->rmconf->dpd_maxfails) { +- EVT_PUSH(iph1->local, iph1->remote, EVTT_DPD_TIMEOUT, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_DPD_TIMEOUT, NULL); + purge_remote(iph1); + plog(LLV_DEBUG, LOCATION, iph1->remote, + "DPD: remote seems to be dead\n"); +Index: ipsec-tools-cvs/src/racoon/isakmp_xauth.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/isakmp_xauth.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/isakmp_xauth.c 2008-01-04 15:18:21.000000000 +0200 +@@ -1570,13 +1570,11 @@ + plog(LLV_ERROR, LOCATION, NULL, + "Xauth authentication failed\n"); + +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_XAUTH_FAILED, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_XAUTH_FAILED, NULL); + + iph1->mode_cfg->flags |= ISAKMP_CFG_DELETE_PH1; + } else { +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_XAUTH_SUCCESS, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_XAUTH_SUCCESS, NULL); + } + + +Index: ipsec-tools-cvs/src/racoon/session.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/session.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/session.c 2008-01-04 15:18:21.000000000 +0200 +@@ -192,6 +192,7 @@ + /* scheduling */ + timeout = schedular(); + ++ nfds = evt_get_fdmask(nfds, &rfds); + error = select(nfds, &rfds, (fd_set *)0, (fd_set *)0, timeout); + if (error < 0) { + switch (errno) { +@@ -211,6 +212,7 @@ + (FD_ISSET(lcconf->sock_admin, &rfds))) + admin_handler(); + #endif ++ evt_handle_fdmask(&rfds); + + for (p = lcconf->myaddrs; p; p = p->next) { + if (!p->addr) +@@ -451,7 +453,7 @@ + case SIGTERM: + plog(LLV_INFO, LOCATION, NULL, + "caught signal %d\n", sig); +- EVT_PUSH(NULL, NULL, EVTT_RACOON_QUIT, NULL); ++ evt_generic(EVTT_RACOON_QUIT, NULL); + pfkey_send_flush(lcconf->sock_pfkey, + SADB_SATYPE_UNSPEC); + #ifdef ENABLE_FASTQUIT +Index: ipsec-tools-cvs/src/racoon/handler.h +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/handler.h 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/handler.h 2008-01-04 15:18:21.000000000 +0200 +@@ -41,6 +41,7 @@ + + #include "isakmp_var.h" + #include "oakley.h" ++#include "evt.h" + + /* Phase 1 handler */ + /* +@@ -211,7 +212,7 @@ + #ifdef ENABLE_HYBRID + struct isakmp_cfg_state *mode_cfg; /* ISAKMP mode config state */ + #endif +- ++ EVT_LISTENER_LIST(evt_listeners); + }; + + /* Phase 2 handler */ +@@ -320,6 +321,7 @@ + + LIST_ENTRY(ph2handle) chain; + LIST_ENTRY(ph2handle) ph1bind; /* chain to ph1handle */ ++ EVT_LISTENER_LIST(evt_listeners); + }; + + /* +Index: ipsec-tools-cvs/src/racoon/isakmp_var.h +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/isakmp_var.h 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/isakmp_var.h 2008-01-04 15:18:21.000000000 +0200 +@@ -35,6 +35,7 @@ + #define _ISAKMP_VAR_H + + #include "vmbuf.h" ++#include "policy.h" + + #define PORT_ISAKMP 500 + #define PORT_ISAKMP_NATT 4500 +@@ -62,8 +63,8 @@ + struct isakmp_pl_nonce; /* XXX */ + + extern int isakmp_handler __P((int)); +-extern int isakmp_ph1begin_i __P((struct remoteconf *, struct sockaddr *, +- struct sockaddr *)); ++extern struct ph1handle *isakmp_ph1begin_i __P((struct remoteconf *, ++ struct sockaddr *, struct sockaddr *)); + + extern vchar_t *isakmp_parsewoh __P((int, struct isakmp_gen *, int)); + extern vchar_t *isakmp_parse __P((vchar_t *)); +@@ -87,6 +88,7 @@ + extern void isakmp_ph2delete_stub __P((void *)); + extern void isakmp_ph2delete __P((struct ph2handle *)); + ++extern int isakmp_get_sainfo __P((struct ph2handle *, struct secpolicy *, struct secpolicy *)); + extern int isakmp_post_acquire __P((struct ph2handle *)); + extern int isakmp_post_getspi __P((struct ph2handle *)); + extern void isakmp_chkph1there_stub __P((void *)); +Index: ipsec-tools-cvs/src/racoon/racoonctl.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/racoonctl.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/racoonctl.c 2008-01-04 15:18:21.000000000 +0200 +@@ -4,6 +4,7 @@ + + /* + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. ++ * Copyright (C) 2007 Timo Teras. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -135,26 +136,24 @@ + struct evtmsg { + int type; + char *msg; +- enum { UNSPEC, ERROR, INFO } level; + } evtmsg[] = { +- { EVTT_PHASE1_UP, "Phase 1 established", INFO }, +- { EVTT_PHASE1_DOWN, "Phase 1 deleted", INFO }, +- { EVTT_XAUTH_SUCCESS, "Xauth exchange passed", INFO }, +- { EVTT_ISAKMP_CFG_DONE, "ISAKMP mode config done", INFO }, +- { EVTT_PHASE2_UP, "Phase 2 established", INFO }, +- { EVTT_PHASE2_DOWN, "Phase 2 deleted", INFO }, +- { EVTT_DPD_TIMEOUT, "Peer not reachable anymore", ERROR }, +- { EVTT_PEER_NO_RESPONSE, "Peer not responding", ERROR }, +- { EVTT_PEER_DELETE, "Peer terminated security association", ERROR }, +- { EVTT_RACOON_QUIT, "Raccon terminated", ERROR }, +- { EVTT_OVERFLOW, "Event queue overflow", ERROR }, +- { EVTT_XAUTH_FAILED, "Xauth exchange failed", ERROR }, +- { EVTT_PEERPH1AUTH_FAILED, "Peer failed phase 1 authentication " +- "(certificate problem?)", ERROR }, +- { EVTT_PEERPH1_NOPROP, "Peer failed phase 1 initiation " +- "(proposal problem?)", ERROR }, +- { 0, NULL, UNSPEC }, +- { EVTT_NO_ISAKMP_CFG, "No need for ISAKMP mode config ", INFO }, ++ { EVTT_RACOON_QUIT, "Racoon terminated" }, ++ ++ { EVTT_PHASE1_UP, "Phase 1 established" }, ++ { EVTT_PHASE1_DOWN, "Phase 1 deleted" }, ++ { EVTT_PHASE1_NO_RESPONSE, "Phase 1 error: peer not responding" }, ++ { EVTT_PHASE1_NO_PROPOSAL, "Phase 1 error: no proposal chosen" }, ++ { EVTT_PHASE1_AUTH_FAILED, ++ "Phase 1 error: authentication failed (bad certificate?)" }, ++ { EVTT_PHASE1_DPD_TIMEOUT, "Phase 1 error: dead peer detected" }, ++ { EVTT_PHASE1_MODE_CFG, "Phase 1 mode configuration done" }, ++ { EVTT_PHASE1_XAUTH_SUCCESS, "Phase 1 Xauth succeeded" }, ++ { EVTT_PHASE1_XAUTH_FAILED, "Phase 1 Xauth failed" }, ++ ++ { EVTT_PHASE2_NO_PHASE1, "Phase 2 error: no suitable phase 1" }, ++ { EVTT_PHASE2_UP, "Phase 2 established" }, ++ { EVTT_PHASE2_DOWN, "Phase 2 deleted" }, ++ { EVTT_PHASE2_NO_RESPONSE, "Phase 2 error: no response" }, + }; + + static int get_proto __P((char *)); +@@ -184,6 +183,7 @@ + { IPPROTO_ICMP, "icmp" }, + { IPPROTO_TCP, "tcp" }, + { IPPROTO_UDP, "udp" }, ++ { IPPROTO_GRE, "gre" }, + { 0, NULL }, + }; + +@@ -193,31 +193,13 @@ + + char *pname; + int long_format = 0; +- +-#define EVTF_NONE 0x0000 /* Ignore any events */ +-#define EVTF_LOOP 0x0001 /* Loop awaiting for new events */ +-#define EVTF_CFG_STOP 0x0002 /* Stop after ISAKMP mode config */ +-#define EVTF_CFG 0x0004 /* Print ISAKMP mode config info */ +-#define EVTF_ALL 0x0008 /* Print any events */ +-#define EVTF_PURGE 0x0010 /* Print all available events */ +-#define EVTF_PH1DOWN_STOP 0x0020 /* Stop when phase 1 SA gets down */ +-#define EVTF_PH1DOWN 0x0040 /* Print that phase 1 SA got down */ +-#define EVTF_ERR 0x0080 /* Print any error */ +-#define EVTF_ERR_STOP 0x0100 /* Stop on any error */ +- +-int evt_filter = EVTF_NONE; +-time_t evt_start; ++int evt_quit_event = 0; + + void dump_isakmp_sa __P((char *, int)); + void dump_internal __P((char *, int)); + char *pindex_isakmp __P((isakmp_index *)); + void print_schedule __P((caddr_t, int)); +-void print_evt __P((caddr_t, int)); +-void print_cfg __P((caddr_t, int)); +-void print_err __P((caddr_t, int)); +-void print_ph1down __P((caddr_t, int)); +-void print_ph1up __P((caddr_t, int)); +-int evt_poll __P((void)); ++void print_evt __P((struct evt_common *)); + char * fixed_addr __P((char *, char *, int)); + + static void +@@ -226,12 +208,15 @@ + printf( + "Usage:\n" + " %s reload-config\n" ++" %s show-schedule\n" + " %s [-l [-l]] show-sa [protocol]\n" + " %s flush-sa [protocol]\n" + " %s delete-sa \n" +-" %s establish-sa [-u identity] \n" ++" %s establish-sa [-u identity] [-w] \n" + " %s vpn-connect [-u identity] vpn_gateway\n" + " %s vpn-disconnect vpn_gateway\n" ++" %s show-event\n" ++" %s logout-user login\n" + "\n" + " : \"isakmp\", \"esp\" or \"ah\".\n" + " In the case of \"show-sa\" or \"flush-sa\", you can use \"ipsec\".\n" +@@ -240,8 +225,8 @@ + " : {\"esp\",\"ah\"} \n" + " \n" + " : \"inet\" or \"inet6\"\n" +-" : \"icmp\", \"tcp\", \"udp\" or \"any\"\n", +- pname, pname, pname, pname, pname, pname, pname); ++" : \"icmp\", \"tcp\", \"udp\", \"gre\" or \"any\"\n", ++ pname, pname, pname, pname, pname, pname, pname, pname, pname, pname); + } + + /* +@@ -312,54 +297,24 @@ + + vfree(combuf); + +- if (com_recv(&combuf) != 0) +- goto bad; +- if (handle_recv(combuf) != 0) +- goto bad; +- +- vfree(combuf); ++ do { ++ if (com_recv(&combuf) != 0) ++ goto bad; ++ if (handle_recv(combuf) != 0) ++ goto bad; ++ vfree(combuf); ++ } while (evt_quit_event != 0); + +- if (evt_filter != EVTF_NONE) +- if (evt_poll() != 0) +- goto bad; +- ++ close(so); + exit(0); + +- bad: ++bad: ++ close(so); ++ if (errno == EEXIST) ++ exit(0); + exit(1); + } + +-int +-evt_poll(void) { +- struct timeval tv; +- vchar_t *recvbuf; +- vchar_t *sendbuf; +- +- if ((sendbuf = f_getevt(0, NULL)) == NULL) +- errx(1, "Cannot make combuf"); +- +- +- while (evt_filter & (EVTF_LOOP|EVTF_PURGE)) { +- /* handle_recv closes the socket time, so open it each time */ +- com_init(); +- +- if (com_send(sendbuf) != 0) +- errx(1, "Cannot send combuf"); +- +- if (com_recv(&recvbuf) == 0) { +- handle_recv(recvbuf); +- vfree(recvbuf); +- } +- +- tv.tv_sec = 0; +- tv.tv_usec = 10; +- (void)select(0, NULL, NULL, NULL, &tv); +- } +- +- vfree(sendbuf); +- return 0; +-} +- + /* %%% */ + /* + * return command buffer. +@@ -422,20 +377,8 @@ + vchar_t *buf; + struct admin_com *head; + +- /* +- * There are 3 ways of getting here +- * 1) racoonctl vc => evt_filter = (EVTF_LOOP|EVTF_CFG| ... ) +- * 2) racoonctl es => evt_filter = EVTF_NONE +- * 3) racoonctl es -l => evt_filter = EVTF_LOOP +- * Catch the second case: show-event is here to purge all +- */ +- if (evt_filter == EVTF_NONE) +- evt_filter = (EVTF_ALL|EVTF_PURGE); +- +- if ((ac >= 1) && (strcmp(av[0], "-l") == 0)) +- evt_filter |= EVTF_LOOP; +- +- if (ac >= 2) ++ evt_quit_event = -1; ++ if (ac >= 1) + errx(1, "too many arguments"); + + buf = vmalloc(sizeof(*head)); +@@ -653,6 +596,7 @@ + char *id = NULL; + char *key = NULL; + struct admin_com_psk *acp; ++ int wait = 0; + + if (ac < 1) + errx(1, "insufficient arguments"); +@@ -673,6 +617,12 @@ + ac -= 2; + } + ++ if (ac >= 1 && strcmp(av[0], "-w") == 0) { ++ wait = 1; ++ av++; ++ ac--; ++ } ++ + /* need protocol */ + if (ac < 1) + errx(1, "insufficient arguments"); +@@ -687,12 +637,16 @@ + index = get_index(ac, av); + if (index == NULL) + return NULL; ++ if (wait) ++ evt_quit_event = EVTT_PHASE1_MODE_CFG; + break; + case ADMIN_PROTO_AH: + case ADMIN_PROTO_ESP: + index = get_index(ac, av); + if (index == NULL) + return NULL; ++ if (wait) ++ evt_quit_event = EVTT_PHASE2_UP; + break; + default: + errno = EPROTONOSUPPORT; +@@ -749,8 +703,7 @@ + if (ac < 1) + errx(1, "insufficient arguments"); + +- evt_filter = (EVTF_LOOP|EVTF_CFG|EVTF_CFG_STOP|EVTF_ERR|EVTF_ERR_STOP); +- time(&evt_start); ++ evt_quit_event = EVTT_PHASE1_MODE_CFG; + + /* Optional -u identity */ + if (strcmp(av[0], "-u") == 0) { +@@ -814,8 +767,7 @@ + if (ac > 1) + warnx("Extra arguments"); + +- evt_filter = +- (EVTF_PH1DOWN|EVTF_PH1DOWN_STOP|EVTF_LOOP|EVTF_ERR|EVTF_ERR_STOP); ++ evt_quit_event = EVTT_PHASE1_DOWN; + + nav[nac++] = isakmp; + nav[nac++] = inet; +@@ -1335,84 +1287,32 @@ + + + void +-print_evt(buf, len) +- caddr_t buf; +- int len; ++print_evt(evtdump) ++ struct evt_common *evtdump; + { +- struct evtdump *evtdump = (struct evtdump *)buf; + int i; + char *srcstr; + char *dststr; + +- for (i = 0; evtmsg[i].msg; i++) +- if (evtmsg[i].type == evtdump->type) +- break; +- +- if (evtmsg[i].msg == NULL) +- printf("Event %d: ", evtdump->type); ++ for (i = 0; i < sizeof(evtmsg) / sizeof(evtmsg[0]); i++) ++ if (evtmsg[i].type == evtdump->ec_type) ++ break; ++ ++ if (evtmsg[i].msg == NULL) ++ printf("Event %d: ", evtdump->ec_type); + else + printf("%s : ", evtmsg[i].msg); + +- if ((srcstr = saddr2str((struct sockaddr *)&evtdump->src)) == NULL) ++ if ((srcstr = saddr2str((struct sockaddr *)&evtdump->ec_ph1src)) == NULL) + printf("unknown"); +- else ++ else + printf("%s", srcstr); + printf(" -> "); +- if ((dststr = saddr2str((struct sockaddr *)&evtdump->dst)) == NULL) ++ if ((dststr = saddr2str((struct sockaddr *)&evtdump->ec_ph1dst)) == NULL) + printf("unknown"); +- else ++ else + printf("%s", dststr); + printf("\n"); +- +- return; +-} +- +-void +-print_err(buf, len) +- caddr_t buf; +- int len; +-{ +- struct evtdump *evtdump = (struct evtdump *)buf; +- int i; +- +- +- for (i = 0; evtmsg[i].msg; i++) +- if (evtmsg[i].type == evtdump->type) +- break; +- +- if (evtmsg[i].level != ERROR) +- return; +- +- if (evtmsg[i].msg == NULL) +- printf("Error: Event %d\n", evtdump->type); +- else +- printf("Error: %s\n", evtmsg[i].msg); +- +- if (evt_filter & EVTF_ERR_STOP) +- evt_filter &= ~EVTF_LOOP; +- +- return; +-} +- +-/* +- * Print a message when phase 1 SA goes down +- */ +-void +-print_ph1down(buf, len) +- caddr_t buf; +- int len; +-{ +- struct evtdump *evtdump = (struct evtdump *)buf; +- +- if (evtdump->type != EVTT_PHASE1_DOWN) +- return; +- +- printf("VPN connexion terminated\n"); +- +- if (evt_filter & EVTF_PH1DOWN_STOP) +- evt_filter &= ~EVTF_LOOP; +- +- return; + } + + /* +@@ -1423,15 +1323,14 @@ + caddr_t buf; + int len; + { +- struct evtdump *evtdump = (struct evtdump *)buf; ++ struct evt_common *evtdump = (struct evt_common *)buf; + struct isakmp_data *attr; + char *banner = NULL; + struct in_addr addr4; + + memset(&addr4, 0, sizeof(addr4)); + +- if (evtdump->type != EVTT_ISAKMP_CFG_DONE && +- evtdump->type != EVTT_NO_ISAKMP_CFG) ++ if (evtdump->ec_type != EVTT_PHASE1_MODE_CFG) + return; + + len -= sizeof(*evtdump); +@@ -1484,12 +1383,12 @@ + (n + sizeof(*attr) + ntohs(attr->lorv)); + } + } +- +- if (evtdump->type == EVTT_ISAKMP_CFG_DONE) ++ ++ if (len > 0) + printf("Bound to address %s\n", inet_ntoa(addr4)); + else + printf("VPN connexion established\n"); +- ++ + if (banner) { + struct winsize win; + int col = 0; +@@ -1506,13 +1405,8 @@ + printf("\n"); + racoon_free(banner); + } +- +- if (evt_filter & EVTF_CFG_STOP) +- evt_filter &= ~EVTF_LOOP; +- +- return; + } +- ++ + + char * + fixed_addr(addr, port, len) +@@ -1561,32 +1455,29 @@ + break; + + case ADMIN_SHOW_EVT: { +- struct evtdump *evtdump; ++ struct evt_common *ec; + +- /* We got no event */ +- if (len == 0) { +- /* If we were purging the queue, it is now done */ +- if (evt_filter & EVTF_PURGE) +- evt_filter &= ~EVTF_PURGE; ++ /* We got no event? */ ++ if (len == 0) + break; +- } +- +- if (len < sizeof(struct evtdump)) +- errx(1, "Short buffer\n"); + +- /* Toss outdated events */ +- evtdump = (struct evtdump *)buf; +- if (evtdump->timestamp < evt_start) +- break; ++ if (len < sizeof(struct evt_common)) ++ errx(1, "Short buffer\n"); + +- if (evt_filter & EVTF_ALL) +- print_evt(buf, len); +- if (evt_filter & EVTF_ERR) +- print_err(buf, len); +- if (evt_filter & EVTF_CFG) +- print_cfg(buf, len); +- if (evt_filter & EVTF_PH1DOWN) +- print_ph1down(buf, len); ++ ec = (struct evt_common *) buf; ++ if (evt_quit_event <= 0) ++ print_evt(ec); ++ else if (evt_quit_event == ec->ec_type) { ++ switch (ec->ec_type) { ++ case EVTT_PHASE1_MODE_CFG: ++ print_cfg(ec, len); ++ break; ++ default: ++ print_evt(ec); ++ break; ++ }; ++ evt_quit_event = 0; ++ } + break; + } + +@@ -1643,10 +1534,8 @@ + break; + } + +- close(so); + return 0; + +- bad: +- close(so); ++bad: + return -1; + } +Index: ipsec-tools-cvs/src/racoon/admin.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/admin.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/admin.c 2008-01-04 15:18:21.000000000 +0200 +@@ -76,6 +76,7 @@ + #include "evt.h" + #include "pfkey.h" + #include "ipsec_doi.h" ++#include "policy.h" + #include "admin.h" + #include "admin_var.h" + #include "isakmp_inf.h" +@@ -147,16 +148,18 @@ + goto end; + } + +- if (com.ac_cmd == ADMIN_RELOAD_CONF) { +- /* reload does not work at all! */ +- signal_handler(SIGHUP); +- goto end; +- } ++ plog(LLV_DEBUG, LOCATION, NULL, ++ "[%d] admin connection established\n", so2); + + error = admin_process(so2, combuf); + +- end: +- (void)close(so2); ++end: ++ if (error != -2) { ++ plog(LLV_DEBUG, LOCATION, NULL, ++ "[%d] admin connection closed\n", so2); ++ (void)close(so2); ++ } ++ + if (combuf) + racoon_free(combuf); + +@@ -177,13 +180,15 @@ + vchar_t *key = NULL; + int idtype = 0; + int error = -1; ++ int send_events = 0; ++ struct evt_listener_list *event_list = NULL; + + com->ac_errno = 0; + + switch (com->ac_cmd) { + case ADMIN_RELOAD_CONF: +- /* don't entered because of proccessing it in other place. */ +- plog(LLV_ERROR, LOCATION, NULL, "should never reach here\n"); ++ signal_handler(SIGHUP); ++ error = 0; + goto out; + + case ADMIN_SHOW_SCHED: +@@ -208,9 +213,7 @@ + } + + case ADMIN_SHOW_EVT: +- /* It's not really an error, don't force racoonctl to quit */ +- if ((buf = evt_dump()) == NULL) +- com->ac_errno = 0; ++ send_events = 1; + break; + + case ADMIN_SHOW_SA: +@@ -391,17 +394,17 @@ + /* FALLTHROUGH */ + case ADMIN_ESTABLISH_SA: + { ++ struct admin_com_indexes *ndx; + struct sockaddr *dst; + struct sockaddr *src; +- src = (struct sockaddr *) +- &((struct admin_com_indexes *) +- ((caddr_t)com + sizeof(*com)))->src; +- dst = (struct sockaddr *) +- &((struct admin_com_indexes *) +- ((caddr_t)com + sizeof(*com)))->dst; ++ ++ ndx = (struct admin_com_indexes *) ((caddr_t)com + sizeof(*com)); ++ src = (struct sockaddr *) &ndx->src; ++ dst = (struct sockaddr *) &ndx->dst; + + switch (com->ac_proto) { + case ADMIN_PROTO_ISAKMP: { ++ struct ph1handle *ph1; + struct remoteconf *rmconf; + struct sockaddr *remote = NULL; + struct sockaddr *local = NULL; +@@ -409,6 +412,17 @@ + + com->ac_errno = -1; + ++ /* connected already? */ ++ ph1 = getph1byaddrwop(src, dst); ++ if (ph1 != NULL) { ++ event_list = &ph1->evt_listeners; ++ if (ph1->status == PHASE1ST_ESTABLISHED) ++ com->ac_errno = EEXIST; ++ else ++ com->ac_errno = 0; ++ break; ++ } ++ + /* search appropreate configuration */ + rmconf = getrmconf(dst); + if (rmconf == NULL) { +@@ -459,9 +473,11 @@ + "%s\n", saddrwop2str(remote)); + + /* begin ident mode */ +- if (isakmp_ph1begin_i(rmconf, remote, local) < 0) ++ ph1 = isakmp_ph1begin_i(rmconf, remote, local); ++ if (ph1 == NULL) + goto out1; + ++ event_list = &ph1->evt_listeners; + com->ac_errno = 0; + out1: + if (local != NULL) +@@ -471,8 +487,105 @@ + break; + } + case ADMIN_PROTO_AH: +- case ADMIN_PROTO_ESP: ++ case ADMIN_PROTO_ESP: { ++ struct ph2handle *iph2; ++ struct secpolicy *sp_out = NULL, *sp_in = NULL; ++ struct policyindex spidx; ++ ++ com->ac_errno = -1; ++ ++ /* got outbound policy */ ++ memset(&spidx, 0, sizeof(spidx)); ++ spidx.dir = IPSEC_DIR_OUTBOUND; ++ memcpy(&spidx.src, src, sizeof(spidx.src)); ++ memcpy(&spidx.dst, dst, sizeof(spidx.dst)); ++ spidx.prefs = ndx->prefs; ++ spidx.prefd = ndx->prefd; ++ spidx.ul_proto = ndx->ul_proto; ++ ++ sp_out = getsp_r(&spidx); ++ if (sp_out) { ++ plog(LLV_DEBUG, LOCATION, NULL, ++ "suitable outbound SP found: %s.\n", ++ spidx2str(&sp_out->spidx)); ++ } else { ++ com->ac_errno = ENOENT; ++ plog(LLV_NOTIFY, LOCATION, NULL, ++ "no outbound policy found: %s\n", ++ spidx2str(&spidx)); ++ break; ++ } ++ ++ iph2 = getph2byid(src, dst, sp_out->id); ++ if (iph2 != NULL) { ++ event_list = &iph2->evt_listeners; ++ if (iph2->status == PHASE2ST_ESTABLISHED) ++ com->ac_errno = EEXIST; ++ else ++ com->ac_errno = 0; ++ break; ++ } ++ ++ /* get inbound policy */ ++ memset(&spidx, 0, sizeof(spidx)); ++ spidx.dir = IPSEC_DIR_INBOUND; ++ memcpy(&spidx.src, dst, sizeof(spidx.src)); ++ memcpy(&spidx.dst, src, sizeof(spidx.dst)); ++ spidx.prefs = ndx->prefd; ++ spidx.prefd = ndx->prefs; ++ spidx.ul_proto = ndx->ul_proto; ++ ++ sp_in = getsp_r(&spidx); ++ if (sp_in) { ++ plog(LLV_DEBUG, LOCATION, NULL, ++ "suitable inbound SP found: %s.\n", ++ spidx2str(&sp_in->spidx)); ++ } else { ++ com->ac_errno = ENOENT; ++ plog(LLV_NOTIFY, LOCATION, NULL, ++ "no inbound policy found: %s\n", ++ spidx2str(&spidx)); ++ break; ++ } ++ ++ /* allocate a phase 2 */ ++ iph2 = newph2(); ++ if (iph2 == NULL) { ++ plog(LLV_ERROR, LOCATION, NULL, ++ "failed to allocate phase2 entry.\n"); ++ break; ++ } ++ iph2->side = INITIATOR; ++ iph2->satype = admin2pfkey_proto(com->ac_proto); ++ iph2->spid = sp_out->id; ++ iph2->seq = pk_getseq(); ++ iph2->status = PHASE2ST_STATUS2; ++ ++ /* set end addresses of SA */ ++ iph2->dst = dupsaddr(dst); ++ iph2->src = dupsaddr(src); ++ if (iph2->dst == NULL || iph2->src == NULL) { ++ delph2(iph2); ++ break; ++ } ++ ++ if (isakmp_get_sainfo(iph2, sp_out, sp_in) < 0) { ++ delph2(iph2); ++ break; ++ } ++ ++ insph2(iph2); ++ if (isakmp_post_acquire(iph2) < 0) { ++ unbindph12(iph2); ++ remph2(iph2); ++ delph2(iph2); ++ break; ++ } ++ ++ event_list = &iph2->evt_listeners; ++ com->ac_errno = 0; + break; ++ } + default: + /* ignore */ + com->ac_errno = -1; +@@ -489,7 +602,8 @@ + if ((error = admin_reply(so2, com, buf)) != 0) + goto out; + +- error = 0; ++ if (send_events || event_list != NULL) ++ error = evt_subscribe(event_list, so2); + out: + if (buf != NULL) + vfree(buf); +Index: ipsec-tools-cvs/src/racoon/racoonctl.8 +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/racoonctl.8 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/racoonctl.8 2008-01-04 15:18:21.000000000 +0200 +@@ -55,17 +55,17 @@ + .Nm + establish-sa + .Op Fl u Ar identity ++.Op Fl w + .Ar saopts + .Nm + vpn-connect +-.Op Fl u identity ++.Op Fl u Ar identity + .Ar vpn_gateway + .Nm + vpn-disconnect + .Ar vpn_gateway + .Nm + show-event +-.Op Fl l + .Nm + logout-user + .Ar login +@@ -104,6 +104,8 @@ + either ISAKMP SAs, IPsec ESP SAs, IPsec AH SAs, or all IPsec SAs. + .It Xo establish-sa + .Oo Fl u Ar username ++.Oc ++.Oo Fl w + .Oc Ar saopts + .Xc + Establish an SA, either an ISAKMP SA, IPsec ESP SA, or IPsec AH SA. +@@ -115,6 +117,11 @@ + .Ar username + and these credentials will be used in the Xauth exchange. + .Pp ++Specifying ++.Fl w ++will make racoonctl wait until the SA is actually established or ++an error occurs. ++.Pp + .Ar saopts + has the following format: + .Bl -tag -width Bl +@@ -135,16 +142,9 @@ + This is a particular case of the previous command. + It will kill all SAs associated with + .Ar vpn_gateway . +-.It show-event Op Fl l +-Dump all events reported by +-.Xr racoon 8 , +-then quit. +-The +-.Fl l +-flag causes +-.Nm +-to not stop once all the events have been read, but rather to loop +-awaiting and reporting new events. ++.It show-event ++Listen for all events reported by ++.Xr racoon 8 . + .It logout-user Ar login + Delete all SA established on behalf of the Xauth user + .Ar login . diff --git a/patches/linux-2.6.19-ipgre.diff b/patches/linux-2.6.19-ipgre.diff new file mode 100644 index 0000000..655b175 --- /dev/null +++ b/patches/linux-2.6.19-ipgre.diff @@ -0,0 +1,44 @@ +Index: linux-2.6.19/net/ipv4/ip_gre.c +=================================================================== +--- linux-2.6.19.orig/net/ipv4/ip_gre.c 2006-11-29 23:57:37.000000000 +0200 ++++ linux-2.6.19/net/ipv4/ip_gre.c 2008-01-31 08:50:21.000000000 +0200 +@@ -1033,7 +1033,13 @@ + return 0; + } + +-#ifdef CONFIG_NET_IPGRE_BROADCAST ++static int ipgre_tunnel_parse_header(struct sk_buff *skb, unsigned char *haddr) ++{ ++ struct iphdr *iph = (struct iphdr*) skb->mac.raw; ++ memcpy(haddr, &iph->saddr, 4); ++ return 4; ++} ++ + /* Nice toy. Unfortunately, useless in real life :-) + It allows to construct virtual multiprotocol broadcast "LAN" + over the Internet, provided multicast routing is tuned. +@@ -1091,6 +1097,7 @@ + return -t->hlen; + } + ++#ifdef CONFIG_NET_IPGRE_BROADCAST + static int ipgre_open(struct net_device *dev) + { + struct ip_tunnel *t = netdev_priv(dev); +@@ -1139,6 +1146,7 @@ + dev->get_stats = ipgre_tunnel_get_stats; + dev->do_ioctl = ipgre_tunnel_ioctl; + dev->change_mtu = ipgre_tunnel_change_mtu; ++ dev->hard_header_parse = ipgre_tunnel_parse_header; + + dev->type = ARPHRD_IPGRE; + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4; +@@ -1193,6 +1201,8 @@ + dev->stop = ipgre_close; + } + #endif ++ } else { ++ dev->hard_header = ipgre_header; + } + + if (!tdev && tunnel->parms.link) diff --git a/patches/linux-2.6.20-ipgre.diff b/patches/linux-2.6.20-ipgre.diff new file mode 100644 index 0000000..a78ed17 --- /dev/null +++ b/patches/linux-2.6.20-ipgre.diff @@ -0,0 +1,44 @@ +Index: linux-2.6.20/net/ipv4/ip_gre.c +=================================================================== +--- linux-2.6.20.orig/net/ipv4/ip_gre.c 2008-01-04 15:05:34.000000000 +0200 ++++ linux-2.6.20/net/ipv4/ip_gre.c 2008-01-04 15:05:37.000000000 +0200 +@@ -1033,7 +1033,13 @@ + return 0; + } + +-#ifdef CONFIG_NET_IPGRE_BROADCAST ++static int ipgre_tunnel_parse_header(struct sk_buff *skb, unsigned char *haddr) ++{ ++ struct iphdr *iph = (struct iphdr*) skb_mac_header(skb); ++ memcpy(haddr, &iph->saddr, 4); ++ return 4; ++} ++ + /* Nice toy. Unfortunately, useless in real life :-) + It allows to construct virtual multiprotocol broadcast "LAN" + over the Internet, provided multicast routing is tuned. +@@ -1091,6 +1097,7 @@ + return -t->hlen; + } + ++#ifdef CONFIG_NET_IPGRE_BROADCAST + static int ipgre_open(struct net_device *dev) + { + struct ip_tunnel *t = netdev_priv(dev); +@@ -1139,6 +1146,7 @@ + dev->get_stats = ipgre_tunnel_get_stats; + dev->do_ioctl = ipgre_tunnel_ioctl; + dev->change_mtu = ipgre_tunnel_change_mtu; ++ dev->hard_header_parse = ipgre_tunnel_parse_header; + + dev->type = ARPHRD_IPGRE; + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4; +@@ -1193,6 +1201,8 @@ + dev->stop = ipgre_close; + } + #endif ++ } else { ++ dev->hard_header = ipgre_header; + } + + if (!tdev && tunnel->parms.link) diff --git a/patches/linux-2.6.22-ipgre.diff b/patches/linux-2.6.22-ipgre.diff new file mode 100644 index 0000000..59d4292 --- /dev/null +++ b/patches/linux-2.6.22-ipgre.diff @@ -0,0 +1,53 @@ +Index: linux-2.6.20/net/ipv4/ip_gre.c +=================================================================== +--- linux-2.6.20.orig/net/ipv4/ip_gre.c 2008-01-04 15:06:32.000000000 +0200 ++++ linux-2.6.20/net/ipv4/ip_gre.c 2008-01-04 15:08:50.000000000 +0200 +@@ -613,7 +613,7 @@ + offset += 4; + } + +- skb_reset_mac_header(skb); ++ skb->mac_header = skb->network_header; + __pskb_pull(skb, offset); + skb_reset_network_header(skb); + skb_postpull_rcsum(skb, skb_transport_header(skb), offset); +@@ -1032,7 +1032,13 @@ + return 0; + } + +-#ifdef CONFIG_NET_IPGRE_BROADCAST ++static int ipgre_tunnel_parse_header(struct sk_buff *skb, unsigned char *haddr) ++{ ++ struct iphdr *iph = (struct iphdr*) skb_mac_header(skb); ++ memcpy(haddr, &iph->saddr, 4); ++ return 4; ++} ++ + /* Nice toy. Unfortunately, useless in real life :-) + It allows to construct virtual multiprotocol broadcast "LAN" + over the Internet, provided multicast routing is tuned. +@@ -1090,6 +1096,7 @@ + return -t->hlen; + } + ++#ifdef CONFIG_NET_IPGRE_BROADCAST + static int ipgre_open(struct net_device *dev) + { + struct ip_tunnel *t = netdev_priv(dev); +@@ -1138,6 +1145,7 @@ + dev->get_stats = ipgre_tunnel_get_stats; + dev->do_ioctl = ipgre_tunnel_ioctl; + dev->change_mtu = ipgre_tunnel_change_mtu; ++ dev->hard_header_parse = ipgre_tunnel_parse_header; + + dev->type = ARPHRD_IPGRE; + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4; +@@ -1192,6 +1200,8 @@ + dev->stop = ipgre_close; + } + #endif ++ } else { ++ dev->hard_header = ipgre_header; + } + + if (!tdev && tunnel->parms.link) -- cgit v1.2.3