diff options
author | Mark Bryars <mark@darkskiez.co.uk> | 2012-05-04 22:19:13 +0100 |
---|---|---|
committer | Mark Bryars <mark@darkskiez.co.uk> | 2012-05-04 22:19:13 +0100 |
commit | e756c7948078bd5109c5b8a0f252851efc4532d6 (patch) | |
tree | 39c4c6d660d7c377989e1adc1492ec198cdaa084 | |
download | vyos-opennhrp-e756c7948078bd5109c5b8a0f252851efc4532d6.tar.gz vyos-opennhrp-e756c7948078bd5109c5b8a0f252851efc4532d6.zip |
Imported Upstream version 0.13
60 files changed, 21845 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..74f6e20 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +opennhrp +opennhrpctl +*.o +*.d +*.cmd +*~ @@ -0,0 +1,7 @@ +Author: +Timo Teräs <timo.teras@iki.fi> + +Patches from: +Steffen Schmidt +Natanael Copa +Halil Goektepe (Deutsche Telekom DTAG Laboratories) diff --git a/Make.rules b/Make.rules new file mode 100644 index 0000000..5c30966 --- /dev/null +++ b/Make.rules @@ -0,0 +1,289 @@ +## +# A set of makefile rules loosely based on kbuild. + +all: compile + +ifndef build + +toplevelrun:=yes + +## +# Disable default rules and make output pretty. + +MAKEFLAGS += -rR --no-print-directory + +Makefile: ; + +ifdef V + ifeq ("$(origin V)", "command line") + VERBOSE = $(V) + endif +endif +ifndef VERBOSE + VERBOSE = 0 +endif + +ifeq ($(VERBOSE),1) + quiet = + Q = +else + quiet=quiet_ + Q = @ +endif + +ifneq ($(findstring s,$(MAKEFLAGS)),) + quiet=silent_ +endif + +export quiet Q VERBOSE + +## +# Recursion helpers. +srctree := $(CURDIR) +objtree := $(CURDIR) + +export srctree objtree + +## +# Consult SCM for better version string. + +TAGPREFIX ?= v + +GIT_REV := $(shell test -d .git && git describe || echo exported) +ifneq ($(GIT_REV), exported) +FULL_VERSION := $(patsubst $(TAGPREFIX)%,%,$(GIT_REV)) +else +FULL_VERSION := $(VERSION) +endif + +RCS_FIND_IGNORE := \( -name SCCS -o -name BitKeeper -o -name .svn -o -name CVS -o -name .pc -o -name .hg -o -name .git \) -prune -o + +export FULL_VERSION RCS_FIND_IGNORE + +## +# Utilities and default flags for them. + +CROSS_COMPILE ?= +CC := $(CROSS_COMPILE)gcc +LD := $(CROSS_COMPILE)ld +INSTALL := install +INSTALLDIR := $(INSTALL) -d + +CFLAGS ?= -g -O2 +CFLAGS_ALL := -Wall -Wstrict-prototypes -D_GNU_SOURCE -std=gnu99 +CFLAGS_ALL += $(CFLAGS) + +LDFLAGS ?= -g +LDFLAGS_ALL += $(LDFLAGS) + +export CC LD INSTALL INSTALLDIR CFLAGS_ALL LDFLAGS_ALL + +build := + +endif + +## +# Reset all variables. +ifneq ($(origin targets),file) +targets := +endif + +src := +obj := + +src += $(build) +obj := $(build) + +## +# Include directory specific stuff + +ifneq ($(build),) +$(build)/Makefile: ; +include $(build)/Makefile +endif + +## +# Rules and helpers + +PHONY += all compile install clean FORCE + +# Convinient variables +comma := , +squote := ' +empty := +space := $(empty) $(empty) + +# The temporary file to save gcc -MD generated dependencies must not +# contain a comma +depfile = $(subst $(comma),_,$(@D)/.$(@F).d) + +build-dir = $(patsubst %/,%,$(dir $@)) +target-dir = $(dir $@) + +## +# Build rules + +ifneq ($(NOCMDDEP),1) +# Check if both arguments has same arguments. Result in empty string if equal +# User may override this check using make NOCMDDEP=1 +# Check if both arguments has same arguments. Result is empty string if equal. +# User may override this check using make KBUILD_NOCMDDEP=1 +arg-check = $(strip $(filter-out $(cmd_$(1)), $(cmd_$@)) \ + $(filter-out $(cmd_$@), $(cmd_$(1))) ) +endif + +# echo command. +# Short version is used, if $(quiet) equals `quiet_', otherwise full one. +echo-cmd = $(if $($(quiet)cmd_$(1)),\ + echo ' $(call escsq,$($(quiet)cmd_$(1)))$(echo-why)';) + +make-cmd = $(subst \#,\\\#,$(subst $$,$$$$,$(call escsq,$(cmd_$(1))))) + +# printing commands +cmd = @$(echo-cmd) $(cmd_$(1)) + +# Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o +dot-target = $(dir $@).$(notdir $@) + +# The temporary file to save gcc -MD generated dependencies must not +# contain a comma +depfile = $(subst $(comma),_,$(dot-target).d) + +# Escape single quote for use in echo statements +escsq = $(subst $(squote),'\$(squote)',$1) + +# Find any prerequisites that is newer than target or that does not exist. +# PHONY targets skipped in both cases. +local-target-prereqs = % +any-prereq = $(filter $(local-target-prereqs), $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^), $^)) + +# Execute command if command has changed or prerequisite(s) are updated. +# +if_changed = $(if $(strip $(any-prereq) $(arg-check)), \ + @set -e; \ + $(echo-cmd) $(cmd_$(1)); \ + echo 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd) + +# Usage: $(call if_changed_rule,foo) +# Will check if $(cmd_foo) or any of the prerequisites changed, +# and if so will execute $(rule_foo). +if_changed_rule = $(if $(strip $(any-prereq) $(arg-check) ), \ + @set -e; \ + $(rule_$(1))) + +##### +# Handle options to gcc. + +c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS_ALL) $(CFLAGS_EXTRA) \ + $(CFLAGS_$(notdir $@)) +ld_flags = $(LDFLAGS_ALL) $(LDFLAGS_EXTRA) $(LDFLAGS_$(notdir $@)) + +##### +# Compile c-files. +quiet_cmd_cc_o_c = CC $@ + +cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< + +define rule_cc_o_c + $(call echo-cmd,cc_o_c) $(cmd_cc_o_c); \ + (echo 'cmd_$@ := $(call make-cmd,cc_o_c)'; echo; cat $(depfile)) \ + > $(dot-target).cmd ; \ + rm $(depfile) +endef + +$(obj)/%.o: override local-target-prereqs=% + +$(obj)/%.o: $(src)/%.c FORCE + $(call if_changed_rule,cc_o_c) + +##### +# Link programs + +# Link an executable based on list of .o files, all plain c +# host-cmulti -> executable +__progs := $(addprefix $(obj)/,$(sort $(progs-y))) +cobjs := $(addprefix $(obj)/,$(sort $(foreach m,$(progs-y),$($(m)-objs)))) + +quiet_cmd_ld = LD $@ + cmd_ld = $(CC) $(ld_flags) -o $@ \ + $(addprefix $(obj)/,$($(@F)-objs)) \ + $(LIBS) $(LIBS_$(@F)) + +$(__progs): override local-target-prereqs=$(addprefix $(obj)/,$($(*F)-objs)) + +$(__progs): $(obj)/%: $(cobjs) FORCE + $(call if_changed,ld) + +targets += $(__progs) $(cobjs) + +### +# why - tell why a a target got build +ifeq ($(VERBOSE),2) +why = \ + $(if $(filter $@, $(PHONY)),- due to target is PHONY, \ + $(if $(wildcard $@), \ + $(if $(strip $(any-prereq)),- due to: $(any-prereq), \ + $(if $(arg-check), \ + $(if $(cmd_$@),- due to command line change: $(arg-check), \ + $(if $(filter $@, $(targets)), \ + - due to missing .cmd file, \ + - due to $(notdir $@) not in $$(targets) \ + ) \ + ) \ + ) \ + ), \ + - due to target missing \ + ) \ + ) + +echo-why = $(call escsq, $(strip $(why))) +endif + +## +# Top level rules. + +%/: FORCE + $(Q)$(MAKE) -f Make.rules build=$(build-dir) $(MAKECMDGOALS) + +compile: $(targets) + @: + +install: $(targets) FORCE + +clean: $(filter %/,$(targets)) +ifeq ($(toplevelrun),yes) + $(Q)find . $(RCS_FIND_IGNORE) \ + \( -name '*.[oas]' -o -name '.*.cmd' -o -name '.*.d' \) \ + -type f -print | xargs rm -f +endif + $(Q)rm -rf $(addprefix $(obj)/,$(sort $(progs-y) $(progs-n) $(progs-))) + +ifeq ($(origin VERSION),command line) +DIST_VERSION=$(VERSION) +else +DIST_VERSION=$(FULL_VERSION) +endif + +dist: + git archive --format tar --prefix=$(PACKAGE)-$(DIST_VERSION)/ \ + $(TAGPREFIX)$(DIST_VERSION) \ + | bzip2 -9 > $(PACKAGE)-$(DIST_VERSION).tar.bz2 + +FORCE: + +# Read all saved command lines and dependencies for the $(targets) we +# may be building above, using $(if_changed{,_dep}). As an +# optimization, we don't need to read them if the target does not +# exist, we will rebuild anyway in that case. + +targets := $(wildcard $(sort $(targets))) +cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) + +ifneq ($(cmd_files),) + include $(cmd_files) +endif + +# Declare the contents of the .PHONY variable as phony. We keep that +# information in a variable se we can use it in if_changed and friends. + +.PHONY: $(PHONY) diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0c85d98 --- /dev/null +++ b/Makefile @@ -0,0 +1,34 @@ +## +# Building opennhrp + +PACKAGE := opennhrp +VERSION := 0.13 + +## +# Default directories + +DESTDIR= +SBINDIR=/usr/sbin +CONFDIR=/etc/opennhrp +MANDIR=/usr/share/man +DOCDIR=/usr/share/doc/opennhrp +STATEDIR=/var/run + +export DESTDIR SBINDIR CONFDIR MANDIR DOCDIR STATEDIR + +## +# Top-level rules and targets + +targets := nhrp/ etc/ man/ + +## +# Include all rules and stuff + +include Make.rules + +## +# Top-level targets + +install: + $(INSTALLDIR) $(DESTDIR)$(DOCDIR) + $(INSTALL) README $(DESTDIR)$(DOCDIR) @@ -0,0 +1,289 @@ +============================================================================= +OpenNHRP ChangeLog http://sourceforge.net/projects/opennhrp +============================================================================= + +Detailed changelog is available via Git history via web: +http://opennhrp.git.sf.net/git/gitweb.cgi?p=opennhrp;a=blob;f=NEWS;hb=HEAD + +----------------------------------------------------------------------------- + opennhrp 0.13 - released 25/Dec/2011 +----------------------------------------------------------------------------- + - feature: add admin "interfaces show" command to display information about + the interface cache + - feature: support GRE interface binding changes (update gre nbma address + properly, and purge peer cache) to support dual ISP setups with failover + - fix: send registration reply even when all bindings are rejected + - fix: fix really the holding-time to apply to shortcut-target + - fix: fix hop count handling + - fix: various memory leaks fixed + - fix: fix memory corruption in the hlist structure (would be visible when + opennhrp is acting as NHS with heavy traffic) + +----------------------------------------------------------------------------- + opennhrp 0.12.3 - released 19/Aug/2011 +----------------------------------------------------------------------------- + - feature: export reason why peer-down trigger was executed; and implement + 'lowerdown' opennhrpctl command for racoon hook to indicate that the + IPsec SA has died (opennhrp-script can then avoid the unneccessary and + possibly harmful call to racoonctl) + - fix: route NHRP queries always via NHS (because ipsec initial-contact + mechanism for purging dead IPsec SAs triggers after NHRP rediscovery, + and if remote peer was rebooted, the direct link might be dead) + - fix: don't negative cache entries on timeout (timeout is indication of + temporary error: none of NHS' is reachable) + - fix: don't reply to kernel's ARP queries using local route entries. + this also prevents bad shortcut-routes if the local GRE prefix is + a sub-prefix of routed subnet over the GRE + +----------------------------------------------------------------------------- + opennhrp 0.12.2 - released 07/Jul/2011 +----------------------------------------------------------------------------- + - fix: regression introduced in 0.12's policy routing changes that + shortcuts for in-NBMA network would not work unless using dynamic-map + NHS configuration (from David Ward) + +----------------------------------------------------------------------------- + opennhrp 0.12.1 - released 24/Mar/2011 +----------------------------------------------------------------------------- + - feature: export tunnel GRE key to opennhrp-script + - fix: build error against certain kernel versions and architectures + - fix: update registrations when 1/3 of the holding-time has passed as + per rfc recommendation + - fix: fix holding-time to apply properly to shortcut-target blocks + +----------------------------------------------------------------------------- + opennhrp 0.12 - released 01/Nov/2010 +----------------------------------------------------------------------------- + - feature: preliminary support for policy routing. cache kernel routes for + each gre device and use them for routing lookups. nhrp shortcut routes + should be in separate routing table. this allows nhrp message routing to + always happen using bgp/ospf routes (for shortcut refreshes) and fixes + shortcuts to converge with the main routing information. + - feature: shortcut-target config option for subnet specific holding-time + overrides and aggregation of local subnet to "summary shortcut" + - fix: delete shortcut-routes if their gateway is removed to force renewal + of the route (shortcut gateway can change due to bgp/ospf routing change) + - fix: actually remove dynamic-nhs from peers if it's A entry is removed + - fix: disallow duplicate cached entries with dynamic-nhs entries + - randomize retry timer and increase script timeouts + - improve logging a bit + +----------------------------------------------------------------------------- + opennhrp 0.11.5 - released 16/Mar/2010 +----------------------------------------------------------------------------- + - clear negative cached entries for peers which sends resolution request + - use several netlink sockets to receive notifications so we don't lose + sync on all of them + - fix shortcut renewals + - libev updated to version 3.9 + - signal handling fixed + +----------------------------------------------------------------------------- + opennhrp 0.11.4 - released 04/Mar/2010 +----------------------------------------------------------------------------- + - multicast packet relay fix + - netlink buffer sizes increased + +----------------------------------------------------------------------------- + opennhrp 0.11.3 - released 30/Oct/2009 +----------------------------------------------------------------------------- + - handle dns lookup failures properly + - fix failover for shortcut routes + - detect forwarding loops for indications + - some code cleanups + +----------------------------------------------------------------------------- + opennhrp 0.11.2 - released 25/Sep/2009 +----------------------------------------------------------------------------- + - fixed libev usage bug that could cause crash on script timeout + - make lock file closed on exec so opennhrp-script instances won't keep + opennhrp daemon lock + - fixes traffic indications to work again (captured packet length was + not right) + +----------------------------------------------------------------------------- + opennhrp 0.11.1 - released 31/Aug/2009 +----------------------------------------------------------------------------- + - update libev version to 3.8 + - more permssive build for warnings (libev generates some warnings) + - fix packet filter installation timer + - fix a false assert for peer deletion + - disable icmp redirect properly + - minor fixes to documentation and example script + +----------------------------------------------------------------------------- + opennhrp 0.11 - released 18/Jun/2009 +----------------------------------------------------------------------------- + - introduce 'dynamic-map' directive to autodetect all next hop servers + from a domain name with multiple A entries + - 'multicast' directive to better control softswitching of multicast + packets + - use libev instead of the self written event handling code + - enable Forward NHS extension for Traffic Indications to drop the message + after it has visited all NHS:es (otherwise it would loop between them + until ttl expires) + - performance optimizations to packet capturing, multicast packet process + switching, handling of registration requests and logging + - fix 64-bit compatibility issues + - some code documentation and clean ups + +----------------------------------------------------------------------------- + opennhrp 0.10.3 - released 04/May/2009 +----------------------------------------------------------------------------- + - fix handling of c-ares timeouts + - fix cancellation of asynchronous operations in peer cache + - fix control socket default location (broke on makefile rewrite) + - code clean up (rename reference counting functions) + +----------------------------------------------------------------------------- + opennhrp 0.10.2 - released 28/Apr/2009 +----------------------------------------------------------------------------- + - various safety measures in case of off-nbma routing loops + - fix a bug which caused static entries without 'register' to get deleted + - try to combine shortcut routes to get less nhrp cache entries + +----------------------------------------------------------------------------- + opennhrp 0.10.1 - released 22/Apr/2009 +----------------------------------------------------------------------------- + - fix the breakage in build system after the rewrite + - fix registration to servers when using domain names + +----------------------------------------------------------------------------- + opennhrp 0.10 - released 21/Apr/2009 +----------------------------------------------------------------------------- + - use c-ares library to make dns queries asynchronous + - fix mtu handling from registration requests + - avoid opennhrp-script zombie floods by reaping children between + processing registration request packets + - rewrite build system to something similar to kbuild + - migrate to git + +----------------------------------------------------------------------------- + opennhrp 0.9.3 - released 20/Feb/2009 +----------------------------------------------------------------------------- + - when public IP changes purge all related peer entries (opennhrp should + now survive and automatically re-register when dhcp enforces IP change) + - remove an assertation that was invalid (could cause opennhrp to abort + when acting as NHS in some situation) + - make monotonic clock work with old uclibc + +----------------------------------------------------------------------------- + opennhrp 0.9.2 - released 31/Dec/2008 +----------------------------------------------------------------------------- + - pid file locking change in 0.9.1 broke daemonization, make it work again + +----------------------------------------------------------------------------- + opennhrp 0.9.1 - released 31/Dec/2008 +----------------------------------------------------------------------------- + - fix a crash in peer cache enumeration + - update opennhrp-script to show how to add host route with mtu + - lock pid file as first thing (to prevent accidental startup when opennhrp + is already running) + +----------------------------------------------------------------------------- + opennhrp 0.9 - released 26/Dec/2008 +----------------------------------------------------------------------------- + - use monotonic system clock if available + - allow startup even if dns names are unresolveable + - make nhrp holding time configurable + - Cisco NHS specific feature: send cisco compatible purge if unique NBMA + mapping already exists (to re-register when NBMA address changes) + - additional opennhrp-script example with ipsec certificate checking + - some effort to make opennhrp compile on old system (in limited mode) + - detect NBMA MTU from interface and transmit it over NHRP and pass it to + opennhrp-script (to insert manual NBMA routes if path MTU discovery + does not work) + +----------------------------------------------------------------------------- + opennhrp 0.8 - released 03/Oct/2008 +----------------------------------------------------------------------------- + - licensing terms changed to GPL version 2 or later + - send purge request to shortcut subnets after registration + - clear redirection rate limiting cache for purge request addresses + - new admin commands: "redirect purge" and "schedule" + - rename admin commands: "flush", "purge" and "show" to have "cache" prefix + (accepts still old style commands for a while) + - make logging a bit less verbose + - minor fixes to renewals of peers and shortcut routes + - fix a memory leak + +----------------------------------------------------------------------------- + opennhrp 0.7.1 - released 18/Jun/2008 +----------------------------------------------------------------------------- + - use only primary interface addresses as nbma source address + - fix a access to freed memory in certain special cases of peer cache + enumeration + - fix a memory leak + +----------------------------------------------------------------------------- + opennhrp 0.7 - released 30/Apr/2008 +----------------------------------------------------------------------------- + - catch multicast packets and send them as multiple unicast packets + to all known peers + - new script events: interface-up (to clear neighbor and route caches + on startup) and peer-register (to e.g. validate peer protocol ip address + from the ipsec certificate) + - parse nat-oa for cached entries + - routing regression fixes (don't try to resolve unreachable statically + mapped peers) + - fix deletion of multiple cache entries from enumeration code + (crashed in some rare circumstances) + - check for IFA_LOCAL attribute presence before using it (fixes a crash) + - fix bug which caused negative cache entries to prevent registration + of the protocol address + - code cleanups and some optimizations + +----------------------------------------------------------------------------- + opennhrp 0.6.2 - released 04/Apr/2008 +----------------------------------------------------------------------------- + - accept shortcuts when a route to shortcut-destination interface exists + (in addition to local addresses in that interface) + - handle netlink link, address and route deleted notifications properly + - print error if opennhrp-script fails for some reason + - change peer flags: 'lower-up' means opennhrps-script was ran succesfully, + 'up' means registration has been also done (if it was required) + - fix matching of local-nbma selector when gre interface has no remote + address and is not explicitely bound to other interface + - fix admin interface to give 'Affected-Entries' result correctly + - fix config file reading bug; handle last keyword even if there is no + final new line + - code cleanups and optimizations + +----------------------------------------------------------------------------- + opennhrp 0.6.1 - released 20/Mar/2008 +----------------------------------------------------------------------------- + - fix a crash in error path of packet forwarding + - fix routing of locally generated traffic indications + +----------------------------------------------------------------------------- + opennhrp 0.6 - released 19/Mar/2008 +----------------------------------------------------------------------------- + - accept hostname (domain name) as an NBMA address in config file + - sanitize admin interface: accept cache entry selectors on + flush, purge and show commands; slight changes to unix socket protocol + - multiple gre interfaces do not share nhrp cache anymore + - opennhrp-script: NHRP_SRCADDR and NHRP_SRCNBMA added + - do not let opennhrp-script inherit sockets file descriptors + - run peer-down script when peer was purged via admin interface + - add option -V to show version + - add option -v to show debug log messages (to see nl-arp messages) + - performance improvements + +----------------------------------------------------------------------------- + opennhrp 0.5 - released 05/Mar/2008 +----------------------------------------------------------------------------- + - opennhrpctl command line tool + - list nhrp cache + - purge entries by protocol or nbma address + - flush entries + - daemon mode + - allow comments in configuration file + - various bug fixes + - flush neighbor cache when interface is found + - do not create proxy arp entries when static mapping exists + +----------------------------------------------------------------------------- + opennhrp 0.4 - released 04/Jan/2008 +----------------------------------------------------------------------------- + - first announced release + @@ -0,0 +1,112 @@ +OpenNHRP Release Notes +====================== + +OpenNHRP is an NHRP implementation for Linux. It has most of the RFC2332 +and Cisco IOS extensions. + +Project homepage: http://sourceforge.net/projects/opennhrp + +Git repository: git://opennhrp.git.sourceforge.net/gitroot/opennhrp + + KERNEL REQUIREMENTS + +You need a kernel with ip_gre patched to support sending and receiving +using NBMA address. + +The support was originally added to 2.6.24-rc2, but it contains a bug +that prevents NAT detection. The latest fix is present in 2.6.24-rc7. + +Gentoo kernels: gentoo-sources-2.6.23-r1 and gentoo-sources-2.6.22-r10 +have the partitial support too (no NAT there either). + +For the brave who compile their own kernels, there are patches against +vanilla 2.6.20 and 2.6.22 kernels in the patches directory. Or just +upgrade to 2.6.24 or later and no patching is required. Though, there +has been a major performance fixes in newer kernels, so 2.6.35 or later +is strongly recommended. + +Also remember to turn on CONFIG_ARPD and CONFIG_NET_IPGRE in your kernel +configuration. + + SYSTEM REQUIREMENTS + +To compile OpenNHRP you need: +- GNU make (3.81 or later works) +- GCC +- pkg-config +- c-ares library (Ubuntu package: libc-ares-dev) + + COMPILING + +Just type 'make' and 'make install'. + + CONFIGURATION + +OpenNHRP currently supports only IPv4 over IPv4 using NBMA GRE tunnels. +To create NBMA GRE tunnel you might use following: + + ip tunnel add gre1 mode gre key 1234 ttl 64 + ip addr add 10.255.255.2/24 dev gre1 + ip link set gre1 up + +This should work with the configuration example in opennhrp.conf(5). + + IPSEC ENCRYPTION OF GRE PACKETS + +ipsec-tools 0.8.0 or later is recommended. Earlier versions need patching +for dmvpn to work properly. + +The ipsec-tools configuration I prefer to use is: encrypt all GRE +traffic in transport mode. IPsec policy for that should be defined in +/etc/ipsec.conf: + spdflush; + spdadd 0.0.0.0/0 0.0.0.0/0 gre -P out ipsec esp/transport//require; + spdadd 0.0.0.0/0 0.0.0.0/0 gre -P in ipsec esp/transport//require; + +And ipsec-tools configuration with pre-shared key could look something +like this: + +/etc/racoon/racoon.conf: + path pre_shared_key "/etc/racoon/psk.txt"; + remote anonymous { + exchange_mode aggressive; + lifetime time 24 hour; + my_identifier user_fqdn "my-user-name@my-domain.example"; + nat_traversal on; + # For ipsec-tools snapshot 2010-10-10 or later + script "/etc/opennhrp/racoon-ph1dead.sh" phase1_dead; + # For earlier ipsec-tools + # script "/etc/opennhrp/racoon-ph1down.sh" phase1_down; + proposal { + encryption_algorithm 3des; + hash_algorithm sha1; + authentication_method pre_shared_key; + dh_group 2; + } + } + sainfo anonymous { + pfs_group 2; + lifetime time 12 hour; + encryption_algorithm 3des, blowfish 448, rijndael; + authentication_algorithm hmac_sha1, hmac_md5; + compression_algorithm deflate; + } + +And /etc/racoon/psk.txt: + my-user-name@my-domain.example "my-secret-pre-shared-key" + +It is of course more secure to use certificates for authentication. +And using aggressive main mode is not recommended either, but it is +required to make FQDN pre-shared authentication work. This setup is +fast to do and can get you started with testing OpenNHRP. + + DOCUMENTATION + +Most of the OpenNHRP documentation is in the manpages. Read them. + +Also some general NHRP documents can be found from Cisco website +(www.cisco.com). + + BUGS + +Use the SourceForge bug tracker or mailing list. @@ -0,0 +1,27 @@ +Open items that need work on OpenNHRP: + +- interface-up, nhs-up, nhs-down need to be serialized for quagga + management. alternatively, the script could return some special + value meaning "try again soon". + +- offload multicast packet forwarding to kernel + +- use mmapped pf_packet interface + +- nhrp_peer should be split to more files, it's relatively large now. + might split nhrp_peer to separate types. + +- Proper handling of unique bit. Currently registration of unique address + overwrites previous registration, but this against RFC. + +- Load balancing: return multiple CIE entries, when we have multiple + local IP addresses. When receiving multi CIE next-hop, balance traffic + or for shortcut routes, create a multi nexthop route. + +- Create some logic to detect if NBMA and public IPs are mixed up in + the "map" directive. Issue on warning about this. + +- Support reloading of configuration (via SIGHUP or "opennhrpctl reload") + +- Clean shutdown: send purge request to registration servers, dynamic + clients and possibly track resolution requests and purge those too. diff --git a/contrib/init-scripts/debian/opennhrp.init b/contrib/init-scripts/debian/opennhrp.init new file mode 100644 index 0000000..4a0fe94 --- /dev/null +++ b/contrib/init-scripts/debian/opennhrp.init @@ -0,0 +1,160 @@ +#! /bin/sh +### BEGIN INIT INFO +# Provides: opennhrp +# Required-Start: $remote_fs +# Required-Stop: $remote_fs +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: RFC 2332 2333 daemon +# Description: This file suports one instance of opennhrp +### END INIT INFO + +# Author: Robin David Hammond <rhammond+nhrp@databit7.com> +# +# Do NOT "set -e" + +# PATH should only include /usr/* if it runs after the mountnfs.sh script +PATH=/sbin:/usr/sbin:/bin:/usr/bin +DESC="OpenNextHopResolutionProtocol" +NAME=opennhrp +DAEMON=/usr/sbin/$NAME +PIDPATH=/var/run/$NAME +PIDFILE=$PIDPATH/pid +SCRIPTNAME=/etc/init.d/$NAME +CTRLPATH=/var/run/$NAME/ +CTRLPIPE=$CTRLPATH/ctrl + +CONFFILE=/etc/opennhrp/opennhrp.conf +SCRIPTFILE=/etc/opennhrp/opennhrp-script + +DAEMON_ARGS=" -d -a $CTRLPIPE -c $CONFFILE -s $SCRIPTFILE -p $PIDFILE" +# -a /var/run/opennhrp/ctrl -c /etc/opennhrp/opennhrp.conf -s /etc/opennhrp/opennhrp-script -d -p /var/run/opennhrp/pid +# Exit if the package is not installed +[ -x "$DAEMON" ] || exit 0 + +# Read configuration variable file if it is present +[ -r /etc/default/$NAME ] && . /etc/default/$NAME + +# Load the VERBOSE setting and other rcS variables +. /lib/init/vars.sh + +# Define LSB log_* functions. +# Depend on lsb-base (>= 3.0-6) to ensure that this file is present. +. /lib/lsb/init-functions + +# +# Function that starts the daemon/service +# +do_start() +{ + mkdir -p $PIDPATH + mkdir -p $CTRLPATH + # Return + # 0 if daemon has been started + # 1 if daemon was already running + # 2 if daemon could not be started + start-stop-daemon --start --quiet --pidfile $PIDFILE --exec $DAEMON --test > /dev/null \ + || return 1 + start-stop-daemon --start --quiet --pidfile $PIDFILE --exec $DAEMON -- \ + $DAEMON_ARGS \ + || return 2 + # Add code here, if necessary, that waits for the process to be ready + # to handle requests from services started subsequently which depend + # on this one. As a last resort, sleep for some time. +} + +# +# Function that stops the daemon/service +# +do_stop() +{ + # Return + # 0 if daemon has been stopped + # 1 if daemon was already stopped + # 2 if daemon could not be stopped + # other if a failure occurred + start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $PIDFILE --name $NAME + RETVAL="$?" + [ "$RETVAL" = 2 ] && return 2 + # Wait for children to finish too if this is a daemon that forks + # and if the daemon is only ever run from this initscript. + # If the above conditions are not satisfied then add some other code + # that waits for the process to drop all resources that could be + # needed by services started subsequently. A last resort is to + # sleep for some time. + start-stop-daemon --stop --quiet --oknodo --retry=0/30/KILL/5 --exec $DAEMON + [ "$?" = 2 ] && return 2 + # Many daemons don't delete their pidfiles when they exit. + rm -f $PIDFILE + return "$RETVAL" +} + +# +# Function that sends a SIGHUP to the daemon/service +# +do_reload() { + # + # If the daemon can reload its configuration without + # restarting (for example, when it is sent a SIGHUP), + # then implement that here. + # + start-stop-daemon --stop --signal 1 --quiet --pidfile $PIDFILE --name $NAME + return 0 +} + +case "$1" in + start) + [ "$VERBOSE" != no ] && log_daemon_msg "Starting $DESC" "$NAME" + do_start + case "$?" in + 0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;; + 2) [ "$VERBOSE" != no ] && log_end_msg 1 ;; + esac + ;; + stop) + [ "$VERBOSE" != no ] && log_daemon_msg "Stopping $DESC" "$NAME" + do_stop + case "$?" in + 0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;; + 2) [ "$VERBOSE" != no ] && log_end_msg 1 ;; + esac + ;; + #reload|force-reload) + # + # If do_reload() is not implemented then leave this commented out + # and leave 'force-reload' as an alias for 'restart'. + # + #log_daemon_msg "Reloading $DESC" "$NAME" + #do_reload + #log_end_msg $? + #;; + restart|force-reload) + # + # If the "reload" option is implemented then remove the + # 'force-reload' alias + # + log_daemon_msg "Restarting $DESC" "$NAME" + do_stop + case "$?" in + 0|1) + do_start + case "$?" in + 0) log_end_msg 0 ;; + 1) log_end_msg 1 ;; # Old process is still running + *) log_end_msg 1 ;; # Failed to start + esac + ;; + *) + # Failed to stop + log_end_msg 1 + ;; + esac + ;; + *) + #echo "Usage: $SCRIPTNAME {start|stop|restart|reload|force-reload}" >&2 + echo "Usage: $SCRIPTNAME {start|stop|restart|force-reload}" >&2 + exit 3 + ;; +esac + +: diff --git a/doc/draft-ietf-ion-r2r-nhrp-03.txt b/doc/draft-ietf-ion-r2r-nhrp-03.txt new file mode 100644 index 0000000..8f80b36 --- /dev/null +++ b/doc/draft-ietf-ion-r2r-nhrp-03.txt @@ -0,0 +1,837 @@ +Internetworking Over NBMA Yakov Rekhter +INTERNET-DRAFT Cisco Systems +<draft-ietf-ion-r2r-nhrp-03.txt> Joel Halpern +Expiration Date: November 1999 Institutional Venture Partners + May 1998 + + + NHRP for Destinations off the NBMA Subnetwork + + draft-ietf-ion-r2r-nhrp-03.txt + + +1. Status of this Memo + + This document is an Internet-Draft and is in full conformance with + all provisions of Section 10 of RFC2026. Internet-Drafts are working + documents of the Internet Engineering Task Force (IETF), its areas, + and its working groups. Note that other groups may also distribute + working documents as Internet-Drafts. + + Internet-Drafts are draft documents valid for a maximum of six months + and may be updated, replaced, or obsoleted by other documents at any + time. It is inappropriate to use Internet-Drafts as reference + material or to cite them other than as ``work in progress.'' + + The list of current Internet-Drafts can be accessed at + http://www.ietf.org/ietf/1id-abstracts.txt + + The list of Internet-Draft Shadow Directories can be accessed at + http://www.ietf.org/shadow.html. + + +2. Abstract + + The NBMA Next Hop Resolution Protocol (NHRP) [1] specifies a + mechanism that allows a source station (e.g., a host or a router) on + an NBMA subnetwork to find the NBMA subnetwork address of a + destination station when the destination station is connected to the + NBMA subnetwork. For the case where the destination station is off + the NBMA subnetwork the mechanism described in [1] allows a node to + determine the NBMA subnetwork address of an egress router from the + NBMA subnetwork that is ``nearest'' to the destination station. If + used to locate an egress router wherein the destination station is + directly behind the egress router, the currently documented NHRP + behaviors are sufficient. However, as documented elsewhere [2], + there are cases where if used between routers for generalized + transit, NHRP can produce loops. + + + + +Joel Halpern [Page 1] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + + This document describes extensions to the NBMA Next Hop Resolution + Protocol (NHRP) [1] that allow a node to acquire and maintain the + information about the egress router without constraining the + destination(s) to be directly connected to the egress router. + + +3. CONVENTIONS + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in RFC 2119 [3]. + + +4. NHRP Target Information + + The mechanism described in this document allows a node to find an + egress router for either a single destination, or a set of + destinations (where the set is expressed as a single address prefix). + Since a single destination is just a special case of a set of + destinations, for the rest of the document we will always talk about + a set of destinations, and will refer to this set as an ``NHRP + target''. + + The NHRP target is carried in the NHRP Request, Reply, and Purge + messages as an address prefix (using the Prefix Length field of the + NHRP Client Information Extension). In order to ensure correctness, + a target may be replaced by an identical target with a longer prefix + length. This replacement may be done at an intermediate or + responding NHS. Other than this increase of prefix length, no NHS + shall modify the NHRP target information in an NHRP message. + + In general a router may maintain in its Forwarding Information Base + (FIB) routes whose Network Layer Reachability Information (NLRI) that + exhibits a subset relation. Such routes are called overlapping + routes. To expand upon this, entries in a FIB are often related, with + one entry being a prefix of another entry. The longer prefix + therefore covers a set of routes that are a subset of the shorter + prefix. To provide correct forwarding in the presence of such + overlapping (or nested) routes this document constrains an NHRP + target by requiring that all the destinations covered by the target + must form a subset of the NLRI of at least one route in the + Forwarding Information Base (FIB) of the router that either + originates, or propagates an NHRP Request. That is, there must be at + least one route in the FIB which is a prefix of (or equal to) the + target of the request. For the rest of the document we'll refer to + this as the ``first NHRP target constraint''. A station can + originate an NHRP Request, and a router can propagate an NHRP Request + only if the NHRP target of the Request does not violate the first + + + +Joel Halpern [Page 2] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + + NHRP target constraint. + + If a received NHRP request does not meet this ``first NHRP target + constraint'' when received, the receiving router has two choices. It + may answer the request, defining itself as the egress. This is + compatible with the base NHRP specification, and preserves the + ``first NHRP target constraint''. Alternatively, the router may + lengthen the received prefix until the first constraint is met. The + prefix is lengthened until the target falls within (or becomes equal + to) a FIB entry. + + A route (from a local FIB) whose NLRI forms a minimal superset of all + the destinations covered by the NHRP target is called an ``NHRP + forwarding route''. This is the longest FIB entry that covers the + entire target. Observe that by definition the set of destinations + covered by an NHRP target always exhibits a subset relation to the + set of destinations covered by the NHRP forwarding route associated + with the target. + + This document further constrains origination/propagation of NHRP + Requests by prohibiting the NHRP target (carried by a Request) to + form a superset of the destinations covered by any of the routes in + the local FIB. Remembering that there are nested FIB entries, this + constraint says that there must not be a FIB entry which is itself a + subset of the target of the NHRP request. If there were, there would + be some destinations within the request which would be forwarded + differently then others, preventing a single answer from being + correct. The constraint applies both to the station that originates + an NHRP Request and to the routers that propagate the Request. For + the rest of the document we'll refer to this constraint as the + ``second NHRP target constraint''. A station can originate an NHRP + Request, and a router can propagate an NHRP Request only if the NHRP + target of the Request does not violate the second NHRP target + constraint. The second NHRP target constraint guarantees that + forwarding to all the destinations covered by the NHRP target would + be accomplished via a single (common) route, and this route would be + the NHRP forwarding route for the target. + + Again, if a received NHRP request does not meet the ``second NHRP + target constraint'', the router may either respond to the request, + providing its own NBMA address, or it may lengthen the prefix in the + request so as to meet the second constraint. + + + + + + + + + +Joel Halpern [Page 3] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + +5. NHRP Requester and Terminator Processing + + The issue being addressed with the behaviors being mandated in this + document is to ensure that sufficient information is present and + processed to avoid NHRP shortcuts causing packet forwarding loops. + + In order to do this, the requester and responder of the request must + undertake certain work, and any "border routers" in the forwarding + path must also perform certain additional work beyond checking the + target consistency with the FIB during request processing. This + border work suffices to detect any changes that would cause the path + selection to have failed the target constraints. + + The work performed by the requester and responder consists of two + kinds of work. One set is requester only work, and is required in + order to determine where the protocol boundaries are. The other set + is the route monitoring work. + + +5.1. NHRP IGP information + + The primary cause of NHRP forwarding loops is the loss of information + at a routing protocol boundary. Normally, such boundaries are + detected by the router at the boundary. However, it is possible for + IGP boundaries to overlap. Therefore, NHRP requesting Routers MUST + include the NHRP IGP Information extension (as defined in section 9). + This extension indicates what IGP the originator of the request uses. + A requesting router must always include this extension, since it is + not possible to tell a priori whether the eventual resolution of the + request will be a host or a router. + + Because the entire BGP domain is consider one routing domain, the + extension also contains an indication as to whether the originator + was a BGP speaker. + + +5.2. NHRP Requestor and Responder monitoring + + NHRP requestors and responders are required to monitor routing to + maintain correct shortcut information. + + Once a router that originates an NHRP Request acquires the shortcut + next hop information, it is essential for the router to be able to + detect any changes that would affect the correctness of this + information. The following measures are intended to provide the + correctness. + + Both ends of a shortcut have to monitor the status of the route that + + + +Joel Halpern [Page 4] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + + was associated with the shortcut (the NHRP forwarding route). If the + status changes at the router that generated the NHRP Reply, this + router should send a Purge message, so that the NHRP Requester would + issue another NHRP. If the status changes at the Requester, the + Requester must issue another NHRP. This ensures that when both ends + of a shortcut are up, any changes in routing that impact forwarding + to any of the destinations in the NHRP target would result in a + revalidation (via NHRP) of the shortcut. Note that in addition to + sending purges/reverifies in response to routing changes which + directly effect the NHRP target, there is one other case. + + A router MUST perform the appropriate purge/reverification process if + it receives routing updates that cause an issued NHRP request to + violate either of the target constraints defined earlier. This is + possible at an NHRP originator, and is more likely at border devices. + + Once a shortcut is established, the Requester needs to have some + mechanism(s) to ensure that the other end of the shortcut is alive. + Among the possible mechanisms are: (a) indications from the Data Link + layer, (b) presence of traffic in the reverse direction that comes + with the Link Layer address of the other end, (c) keepalives sent by + the other end. This is intended to suppress black holes, when the + next hop router in the shortcut (the router that generated Reply) + goes down. + + A requester should establish a shortcut only after the requester + determines that the information provided by NHRP is fairly stable. + This is necessary in order to avoid initiating shortcuts that are + based on transients in the routing information, and thus would need + to be revalidated almost immediately anyway. Thus, a router may wait + to use NHRP information if the underlying routing information has + recently changed. If the routing protocol being used has a notion of + stability, it should be used. Information in a transient or + holddown state SHOULD NOT be used, and requests which need to be + processed based on such information SHOULD be discarded. + + + + + + + + + + + + + + + + +Joel Halpern [Page 5] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + +6. Border Processing of NHRP Request + + Processing of an NHRP Request is covered by two sets of rules: the + first set for IGP related processing, and the second set for BGP + related processing. The rules for IGP processing relate to + determining where the IGP borders are (in particular in the case of + overlapping IGPs), and then for what must happen at said borders. + + +6.1. Border Determination + + When a router receives a request, and determines that it is not the + NBMA exit router, it must perform a series of checks before + forwarding the request. + + When a router receives such a Request, the router uses the NHRP + target and the NHRP IGP information to check whether (a) the first + and the second NHRP target constraints are satisfied, (b) the router + it is in the same routing domain as the originator of the Request, + and if yes, then whether (c) it is a border router for that domain. + + When the NHRP target is checked against the forwarding database, a + determination must be made as to whether either of the target + constraints has been violated. If they are violated, then the router + MAY either + + o Extend the prefix so as to meet the constraints. + + o reply to the request indicating that it is the destination + + o return an error indicating which constraint was violated. + + If the NHRP forwarding route indicates a next hop that is not on the + same NBMA as the interface on which the Request was received, the + router sends back an NHRP Reply and terminates the query. + + If a router receives a request without IGP information, then it was + originated within this domain by a host. If the router is an AS + Border Router (i.e. running BGP), and if the forwarding path exits + the AS, then it must behave as a border router for this request. + Otherwise, for requests without IGP information, the router is not a + border router. + + For requests with IGP information, the router compares the forwarding + information against the IGP in the request. If the forwarding entry + indicates that the next hop is to exit the AS (an AS Border Router), + then check the BGP behaviors below. + + + + +Joel Halpern [Page 6] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + + When the IGP the next hop was learned from is the same IGP as + indicated in the request, then the NHS simply forwards the request. + [Of course, as per NHRP, it is free to respond indicating it is the + termination of the shortcut, for example when the Router/NHS is a + firewall.] + + When the IGP the next hop was learned from is different from that + listed in the NHRP request, then this NHS is a border router for this + request. + + +6.2. Border Behavior + + In all cases, a border router has two choices. It MAY terminate and + respond to the request, responding with its IP and NBMA address. + + Alternatively, it MAY perform border propagation. + + +6.2.1. Reorigination + + Upon receiving an NHRP request for which the NHS is a border router, + if it chooses to propagate the request, it MUST originate a new NHRP + request. This request will have a locally generated request + identifier, and the same NHRP target information as in the received + request. The NHRP IGP Information will be the correct indication for + the outgoing interface, with BGP indication if the received request + had the BGP indication, or if this transition crosses the AS border. + All other extensions are copied from the incoming request to the new + request. + + +6.2.2. Response Propagation + + When an NHRP response is received for a propagated request, the + information is copies from the received request, and passed on in a + new NHRP response, responding to the originally received request. + The prefix length in the received response is copied to the new + response. All extensions except the NHRP IGP Information are copied + to the new response. + + In addition, the border router saves state about this information + exchange. The saved state includes the NHRP target from the + response, with the NHRP prefix length that resulted from the + exchange. It also includes the both the original requester, and the + identity of the responder. These are used to generate appropriate + reverification and purges whenever routing changes in a way that + could effect the resolution. + + + +Joel Halpern [Page 7] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + +6.3. Border Information + + Sometimes the routing protocol will have provided the border router + with enough information to generate a response to an incoming NHRP + request. In particular, the border router may have information about + IP prefix to NBMA address bindings. If such information is present, + it may be used by a border router to produce an NHRP response without + actually propagating the request. In such a case, that information + must be monitored for stability to maintain the correctness of the + shortcut. + + +7. BGP Operation + + While the NHRP mechanism described above is mostly constrained to the + routers within a single routing domain, the same mechanisms can be + used for shortcuts that span multiple domains. In doing so, one + wants to produce as little additional overhead in the BGP space as + possible. + + Therefore, we will treat the space over which BGP runs as a single + routing domain. Care must be taken to propagate information across + the individual AS without error, and to indicate that one has + properly entered the BGP space. + + Additional complexity in handling multi-domain shortcuts arise if + routing information gets aggregated at the border routers (which + certainly happens in practice). Since BGP is the major protocol that + is used to exchange routing information across multiple routing + domains, we'll restrict our proposal to the case where the routing + information exchange across domains' boundaries is controlled by BGP. + + If both the source and the destination domains are on a common NBMA + network, and the path between these two domains is also fully within + the same NBMA network, then we have only three routing domains to + deal with: source routing domain, BGP routing domain, and destination + routing domain. If the destination domain is not on the same NBMA as + the source domain, then we need to deal only with two domains - the + source and the BGP. Note that we treat all routers that participate + in a single (common) instance of BGP as a single BGP routing domain, + even if these routers participate in different intra-domain routing + protocols, or in different instances of the same intra-domain routing + protocol. There are three aspects to consider. + + + (a) how a border router in the domain that the originator of + the Request is in handles the Request (crossing IGP/BGP + boundary), + + + +Joel Halpern [Page 8] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + + (b) how the Request is handled across the BGP domain, and + finally + + (c) how a border router in the domain where the NHRP target is + in handles the Request (crossing BGP/IGP boundary). + + + +7.1. Handling NHRP Request at the source domain border router + + When a border router receives an NHRP Request originated from within + its own (IGP) routing domain, the border router determines the NHRP + forwarding route for the NHRP target carried by the Request. If the + router already has the shortcut information for the forwarding route, + then the router uses this information to construct a Reply to the + source of the NHRP Request. Otherwise, the router originates its own + NHRP Request. The Request contains exactly the same NHRP target, as + was carried by the original Request; The NHRP IGP Information will + indicate that the request was generated by BGP, and will indicate the + IGP of the BGP AS being entered. While it is assumed that a BGP + transit AS will generally use only one IGP, the IGP information (and + border processing) is included to allow all cases. The newly + originated Request is sent to the next hop of the NHRP forwarding + route. Once the border router receives a Reply to its own Request, + the border router uses the next hop information from the Reply to + construct its own Reply to the source of the original NHRP Request. + + If the border router later on receives a Purge message for the NHRP + forwarding route, the border router treats this event as if there was + a local change in the NHRP forwarding route (even if the there was no + changes in the route). + + This is exactly the same behavior as all other border cases, and is + described here for completeness. + + +7.2. Handling NHRP Request within the BGP domain + + Routers within an AS will check the IGP, and perform appropriate + processing based on the IGP match. In general, this will result in + normal forwarding of the NHRP request. + + Therefore, the significant cases occur at the BGP speaking routers. + There are two conditions to check for, early exit of the NBMA, and + reachability aggregation. Both of these conditions apply to + Autonomous systems that do not contain the NHRP target. + + + + + +Joel Halpern [Page 9] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + +7.2.1. NBMA exit + + The BGP router in deciding where to send the NHRP request will + determine what the correct exit from the autonomous system is. It + will determine if that exit is within the NBMA. If it is not within + the NBMA, then the router MUST respond to the NHRP request, + indicating its own IP and NBMA addresses as the correct termination + of the shortcut. This is because the actual NBMA border device is + not in a position to monitor the topology properly. + + BGP routers within an NBMA which are supporting R2R NHRP SHOULD be + configured to know where the NBMA border is. In the absence of such + configuration, requests from other router SHOULD be terminated at the + BGP router, since it can not tell what will be crossing the border. + A BGP router supporting R2R NHRP may be configured to assume that all + of its neighbors are within the NBMA, and therefore not perform such + early termination. + + +7.2.2. Reachability Aggregation + + BGP routers aggregate reachability. If the router aggregates + reachability that includes the NHRP target, only this router has the + visibility to some of the topology changes that can affect the + correctness of the route. Therefore, this router is a border router + for this NHRP request. + + It must originate a new request, place the correct information in the + request, receive the response, and generate the correct response + towards the requester. This aggregating router must also monitor + routing in case of changes which affect the request. + + If the router later on receives a Purge message for the NHRP + forwarding route, the router treats this event as if there was a + change in the NHRP forwarding route (even if the there was no changes + in the route). + + It should be noted that this conditions applies if the router COULD + aggregate relevant routing information, even if it currently does + not. + + + + + + + + + + + +Joel Halpern [Page 10] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + +7.3. Handling NHRP Request at the destination domain border router + + When a border router receives an NHRP Request from a BGP speaker, and + the border router determines that all the destinations covered by the + NHRP target of the Request are within the (IGP) domain of that border + router, the border router determines the NHRP forwarding route for + the NHRP target carried by the Request. The newly formed Request + contains exactly the same NHRP target as the received Request; the + NHRP IGP Information indicates the IGP this router is using to select + the route to the destination. The newly originated Request is sent + to the next hop of the NHRP forwarding route. Once the border router + receives a Reply to its own Request, the border router uses the next + hop information from the Reply to construct its own Reply to the + source of the original NHRP Request. + + If the border router later on receives a Purge message for the NHRP + forwarding route, the border router treats this event as if there was + a change in the NHRP forwarding route (even if the there was no + changes in the route). + + +8. More state, less messages + + It should be possible to reduce the number of Purge messages and + subsequent NHRP messages (caused by the Purge messages) by + maintaining more state on the border routers at the source and + destination domains, and the BGP routers that perform aggregation + along the path from the source to the destination. + + Specifically, on these routers it would be necessary to keep the + information about all the NHRP targets for which the routers maintain + the shortcut information. This way when such a router determines + that the NHRP forwarding route (for which the router maintains the + shortcut information) changes due to some local routing changes, the + router could check whether these local changes impact forwarding to + the destinations covered by the NHRP targets. For the targets that + are impacted by the changes the router would send Purge messages. + + Note that this mechanism (maintaining NHRP targets) precludes the use + of Address Prefix Extension - the shortcut will be determined only + for the destinations covered by the NHRP target (so, if the target is + a single IP address, then the shortcut would be determined only for + this address). + + + + + + + + +Joel Halpern [Page 11] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + +9. NHRP IGP Information Extension Format + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + 0-3 |C|u| Type = 9 | Length = 4 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + 4-7 | flags |b| Reserved | IGP ID | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + C "Compulsory." If clear, and the NHS does not recognize the + type code, the extension maybe safely be ignored. For + the IGP Information extension, this bit is clear. + + u Unused and must be set to zero + + Type The extension type code. For the IGP Information + extension, this is 9. + + Length the length in octets of the value. For this extension, + this is 4. + + flags Other than the "b" flag, these are reserved, SHALL be set + to 0 on transmission, and SHALL be ignored on reception. + + b This flag indicates whether the request (or a predecessor + thereof) was originated by a BGP speaker. Set (to 1) to + indicate that the BGP speaker has operated on this. + Clear (to 0) if not. + + IGP ID This field indicates the IGP used by the request + originator. The currently defined values are: + + 1 = RIP + 2 = RIPv2 + 3 = OSPF + 4 = Dual IS-IS + + + + + + + + + + + + + +Joel Halpern [Page 12] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + +10. IANA Considerations + + This document defines an enumerated field for identifying IGPs in + router-to-router NHRP requests. Since there may be additional IGPs + in use, a procedure is needed for allocating additional values. The + IANA shall allocate values for this field as needed. Specifically, + when requested a value shall be allocated for an IGP for any layer 3 + protocol for which there is a clear and stable definition of the + protocol. An RFC is the best example of such stability. Vendor + published specifications are also acceptable. The IANA should avoid + issuing two values for the same protocol. However, it is not + incumbent upon the IANA to determine if two similar protocols are + actually the same. + + +11. Open issues + + The mechanisms described in this document assume that certain routers + along a path taken by an NHRP Request would be required to maintain + state associated with the NHRP forwarding route associated with the + NHRP target carried by the Request. However, it is quite clear that + the router(s) may also lose this state. Further study of the impact + of losing the state is needed before advancing the use of NHRP for + establishing shortcuts among routers beyond Proposed Standard. + + The mechanisms described in this document may result in a situation + where a router would be required to maintain NHRP peering with + potentially a fairly large number of other routers. Further study is + needed to understand the implications of this on the scalability of + the approach where NHRP is used to establish shortcuts among routers. + + This document doesn't have a proof that the mechanisms described here + result in loop-free steady state forwarding when NHRP is used to + establish shortcuts among routers, however, a counterexample has not + yet been found. Further analysis should be done as part of advancing + beyond Proposed Standard. + + + + + + + + + + + + + + + +Joel Halpern [Page 13] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + +12. Security Considerations + + Security is provided in the base NHRP protocol, using hop-by-hop + authentication. There is no change to the fundamental security + capabilities provided therein when these extensions are used. It + should be noted that the assumption of transitive trust that is the + basis of such security may well be significantly weaker in an inter- + domain environment, and administrators of border routers should take + this into consideration. The hop-by-hop security model is used by + NHRP originally because there is no end-to-end security association + between the requesting and responding NHRP entities. In this + environment there is the additional facet that intermediate NHS are + modifying the prefix length field of the CIE, thus changing the end- + to-end information. + + +13. References + + [1] J. Luciani, D. Katz, D. Piscitello, B. Cole, N. Doraswamy., + "NBMA Next Hop Resolution Protocol", RFC-2332, USC/Information + Sciences Institute, April 1998. + + [2] D. Cansever., "NHRP Protocol Applicability Statement", RFC-2333, + USC/Information Sciences Institute, April 1998 + + [3] S. Bradner., "Key words for use in RFCs to Indicate Requirement + Levels", RFC-2119, USC/Information Sciences Institute, March 1997. + + +14. Acknowledgements + + The authors wish to Thank Curtis Villamizer for his contributions + emphasizing both the importance of the looping cases, and some + examples of when loops can occur. + + +15. Author Information + + + + + + + + + + + + + + +Joel Halpern [Page 14] + +Internet Draft draft-ietf-ion-r2r-nhrp-03.txt May 1998 + + + Joel M. Halpern + Institutional Venture Partners + 3000 Sand Hill Road + Menlo Park, CA + Phone: (650) 926-5633 + email: joel@mcquillan.com + + Yakov Rekhter + cisco Systems, Inc. + 170 Tasman Dr. + San Jose, CA 95134 + Phone: (914) 528-0090 + email: yakov@cisco.com + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Joel Halpern [Page 15] +
\ No newline at end of file diff --git a/doc/rfc2332.txt b/doc/rfc2332.txt new file mode 100644 index 0000000..eb79ee3 --- /dev/null +++ b/doc/rfc2332.txt @@ -0,0 +1,2915 @@ + + + + + + +Network Working Group J. Luciani +Request for Comments: 2332 Bay Networks +Category: Standards Track D. Katz + cisco Systems + D. Piscitello + Core Competence, Inc. + B. Cole + Juniper Networks + N. Doraswamy + Bay Networks + April 1998 + + + NBMA Next Hop Resolution Protocol (NHRP) + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (1998). All Rights Reserved. + +Abstract + + This document describes the NBMA Next Hop Resolution Protocol (NHRP). + NHRP can be used by a source station (host or router) connected to a + Non-Broadcast, Multi-Access (NBMA) subnetwork to determine the + internetworking layer address and NBMA subnetwork addresses of the + "NBMA next hop" towards a destination station. If the destination is + connected to the NBMA subnetwork, then the NBMA next hop is the + destination station itself. Otherwise, the NBMA next hop is the + egress router from the NBMA subnetwork that is "nearest" to the + destination station. NHRP is intended for use in a multiprotocol + internetworking layer environment over NBMA subnetworks. + + Note that while this protocol was developed for use with NBMA + subnetworks, it is possible, if not likely, that it will be applied + to BMA subnetworks as well. However, this usage of NHRP is for + further study. + + This document is intended to be a functional superset of the NBMA + Address Resolution Protocol (NARP) documented in [1]. + + + + +Luciani, et. al. Standards Track [Page 1] + +RFC 2332 NBMA NHRP April 1998 + + + Operation of NHRP as a means of establishing a transit path across an + NBMA subnetwork between two routers will be addressed in a separate + document (see [13]). + +1. Introduction + + The keywords MUST, MUST NOT, REQUIRED, SHALL, SHALL NOT, SHOULD, + SHOULD NOT, RECOMMENDED, MAY, and OPTIONAL, when they appear in this + document, are to be interpreted as described in [15]. + + The NBMA Next Hop Resolution Protocol (NHRP) allows a source station + (a host or router), wishing to communicate over a Non-Broadcast, + Multi-Access (NBMA) subnetwork, to determine the internetworking + layer addresses and NBMA addresses of suitable "NBMA next hops" + toward a destination station. A subnetwork can be non-broadcast + either because it technically doesn't support broadcasting (e.g., an + X.25 subnetwork) or because broadcasting is not feasible for one + reason or another (e.g., an SMDS multicast group or an extended + Ethernet would be too large). If the destination is connected to the + NBMA subnetwork, then the NBMA next hop is the destination station + itself. Otherwise, the NBMA next hop is the egress router from the + NBMA subnetwork that is "nearest" to the destination station. + + One way to model an NBMA network is by using the notion of logically + independent IP subnets (LISs). LISs, as defined in [3] and [4], have + the following properties: + + 1) All members of a LIS have the same IP network/subnet number + and address mask. + + 2) All members of a LIS are directly connected to the same + NBMA subnetwork. + + 3) All hosts and routers outside of the LIS are accessed via + a router. + + 4) All members of a LIS access each other directly (without + routers). + + Address resolution as described in [3] and [4] only resolves the next + hop address if the destination station is a member of the same LIS as + the source station; otherwise, the source station must forward + packets to a router that is a member of multiple LIS's. In multi-LIS + + + + + + + + +Luciani, et. al. Standards Track [Page 2] + +RFC 2332 NBMA NHRP April 1998 + + + configurations, hop-by-hop address resolution may not be sufficient + to resolve the "NBMA next hop" toward the destination station, and IP + packets may have multiple IP hops through the NBMA subnetwork. + + Another way to model NBMA is by using the notion of Local Address + Groups (LAGs) [10]. The essential difference between the LIS and the + LAG models is that while with the LIS model the outcome of the + "local/remote" forwarding decision is driven purely by addressing + information, with the LAG model the outcome of this decision is + decoupled from the addressing information and is coupled with the + Quality of Service and/or traffic characteristics. With the LAG + model any two entities on a common NBMA network could establish a + direct communication with each other, irrespective of the entities' + addresses. + + Support for the LAG model assumes the existence of a mechanism that + allows any entity (i.e., host or router) connected to an NBMA network + to resolve an internetworking layer address to an NBMA address for + any other entity connected to the same NBMA network. This resolution + would take place regardless of the address assignments to these + entities. Within the parameters described in this document, NHRP + describes such a mechanism. For example, when the internetworking + layer address is of type IP, once the NBMA next hop has been + resolved, the source may either start sending IP packets to the + destination (in a connectionless NBMA subnetwork such as SMDS) or may + first establish a connection to the destination with the desired + bandwidth (in a connection-oriented NBMA subnetwork such as ATM). + + Use of NHRP may be sufficient for hosts doing address resolution when + those hosts are directly connected to an NBMA subnetwork, allowing + for straightforward implementations in NBMA stations. NHRP also has + the capability of determining the egress point from an NBMA + subnetwork when the destination is not directly connected to the NBMA + subnetwork and the identity of the egress router is not learned by + other methods (such as routing protocols). Optional extensions to + NHRP provide additional robustness and diagnosability. + + Address resolution techniques such as those described in [3] and [4] + may be in use when NHRP is deployed. ARP servers and services over + NBMA subnetworks may be required to support hosts that are not + capable of dealing with any model for communication other than the + LIS model, and deployed hosts may not implement NHRP but may continue + to support ARP variants such as those described in [3] and [4]. NHRP + is intended to reduce or eliminate the extra router hops required by + the LIS model, and can be deployed in a non-interfering manner with + existing ARP services [14]. + + + + + +Luciani, et. al. Standards Track [Page 3] + +RFC 2332 NBMA NHRP April 1998 + + + The operation of NHRP to establish transit paths across NBMA + subnetworks between two routers requires additional mechanisms to + avoid stable routing loops, and will be described in a separate + document (see [13]). + +2. Overview + +2.1 Terminology + + The term "network" is highly overloaded, and is especially confusing + in the context of NHRP. We use the following terms: + + Internetwork layer--the media-independent layer (IP in the case of + TCP/IP networks). + + Subnetwork layer--the media-dependent layer underlying the + internetwork layer, including the NBMA technology (ATM, X.25, SMDS, + etc.) + + The term "server", unless explicitly stated to the contrary, refers + to a Next Hop Server (NHS). An NHS is an entity performing the + Next Hop Resolution Protocol service within the NBMA cloud. An NHS + is always tightly coupled with a routing entity (router, route + server or edge device) although the converse is not yet guaranteed + until ubiquitous deployment of this functionality occurs. Note + that the presence of intermediate routers that are not coupled with + an NHS entity may preclude the use of NHRP when source and + destination stations on different sides of such routers and thus + such routers may partition NHRP reachability within an NBMA + network. + + The term "client", unless explicitly stated to the contrary, refers + to a Next Hop Resolution Protocol client (NHC). An NHC is an + entity which initiates NHRP requests of various types in order to + obtain access to the NHRP service. + + The term "station" generally refers to a host or router which + contains an NHRP entity. Occasionally, the term station will + describe a "user" of the NHRP client or service functionality; the + difference in usage is largely semantic. + +2.2 Protocol Overview + + In this section, we briefly describe how a source S (which + potentially can be either a router or a host) uses NHRP to determine + the "NBMA next hop" to destination D. + + + + + +Luciani, et. al. Standards Track [Page 4] + +RFC 2332 NBMA NHRP April 1998 + + + For administrative and policy reasons, a physical NBMA subnetwork may + be partitioned into several, disjoint "Logical NBMA subnetworks". A + Logical NBMA subnetwork is defined as a collection of hosts and + routers that share unfiltered subnetwork connectivity over an NBMA + subnetwork. "Unfiltered subnetwork connectivity" refers to the + absence of closed user groups, address screening or similar features + that may be used to prevent direct communication between stations + connected to the same NBMA subnetwork. (Hereafter, unless otherwise + specified, we use the term "NBMA subnetwork" to mean *logical* NBMA + subnetwork.) + + Placed within the NBMA subnetwork are one or more entities that + implement the NHRP protocol. Such stations which are capable of + answering NHRP Resolution Requests are known as "Next Hop Servers" + (NHSs). Each NHS serves a set of destination hosts, which may or may + not be directly connected to the NBMA subnetwork. NHSs cooperatively + resolve the NBMA next hop within their logical NBMA subnetwork. In + addition to NHRP, NHSs may support "classical" ARP service; however, + this will be the subject of a separate document [14]. + + An NHS maintains a cache which contains protocol layer address to + NBMA subnetwork layer address resolution information. This cache can + be constructed from information obtained from NHRP Register packets + (see Section 5.2.3 and 5.2.4), from NHRP Resolution Request/Reply + packets, or through mechanisms outside the scope of this document + (examples of such mechanisms might include ARP[3] and pre-configured + tables). Section 6.2 further describes cache management issues. + + For a station within a given LIS to avoid providing NHS + functionality, there must be one or more NHSs within the NBMA + subnetwork which are providing authoritative address resolution + information on its behalf. Such an NHS is said to be "serving" the + station. A station on a LIS that lacks NHS functionality and is a + client of the NHRP service is known as NHRP Client or just NHCs. If + a serving NHS is to be able to supply the address resolution + information for an NHC then NHSs must exist at each hop along all + routed paths between the NHC making the resolution request and the + destination NHC. The last NHRP entity along the routed path is the + serving NHS; that is, NHRP Resolution Requests are not forwarded to + destination NHCs but rather are processed by the serving NHS. + + An NHC also maintains a cache of protocol address to NBMA address + resolution information. This cache is populated through information + obtained from NHRP Resolution Reply packets, from manual + configuration, or through mechanisms outside the scope of this + document. + + + + + +Luciani, et. al. Standards Track [Page 5] + +RFC 2332 NBMA NHRP April 1998 + + + The protocol proceeds as follows. An event occurs triggering station + S to want to resolve the NBMA address of a path to D. This is most + likely to be when a data packet addressed to station D is to be + emitted from station S (either because station S is a host, or + station S is a transit router), but the address resolution could also + be triggered by other means (a routing protocol update packet, for + example). Station S first determines the next hop to station D + through normal routing processes (for a host, the next hop may simply + be the default router; for routers, this is the "next hop" to the + destination internetwork layer address). If the destination's + address resolution information is already available in S's cache then + that information is used to forward the packet. Otherwise, if the + next hop is reachable through one of its NBMA interfaces, S + constructs an NHRP Resolution Request packet (see Section 5.2.1) + containing station D's internetwork layer address as the (target) + destination address, S's own internetwork layer address as the source + address (Next Hop Resolution Request initiator), and station S's NBMA + addressing information. Station S may also indicate that it prefers + an authoritative NHRP Resolution Reply (i.e., station S only wishes + to receive an NHRP Resolution Reply from an NHS serving the + destination NHC). Station S emits the NHRP Resolution Request packet + towards the destination. + + If the NHRP Resolution Request is triggered by a data packet then S + may, while awaiting an NHRP Resolution Reply, choose to dispose of + the data packet in one of the following ways: + + (a) Drop the packet + (b) Retain the packet until the NHRP Resolution Reply arrives + and a more optimal path is available + (c) Forward the packet along the routed path toward D + + The choice of which of the above to perform is a local policy matter, + though option (c) is the recommended default, since it may allow data + to flow to the destination while the NBMA address is being resolved. + Note that an NHRP Resolution Request for a given destination MUST NOT + be triggered on every packet. + + When the NHS receives an NHRP Resolution Request, a check is made to + see if it serves station D. If the NHS does not serve D, the NHS + forwards the NHRP Resolution Request to another NHS. Mechanisms for + determining how to forward the NHRP Resolution Request are discussed + in Section 3. + + If this NHS serves D, the NHS resolves station D's NBMA address + information, and generates a positive NHRP Resolution Reply on D's + behalf. NHRP Resolution Replies in this scenario are always marked + as "authoritative". The NHRP Resolution Reply packet contains the + + + +Luciani, et. al. Standards Track [Page 6] + +RFC 2332 NBMA NHRP April 1998 + + + address resolution information for station D which is to be sent back + to S. Note that if station D is not on the NBMA subnetwork, the next + hop internetwork layer address will be that of the egress router + through which packets for station D are forwarded. + + A transit NHS receiving an NHRP Resolution Reply may cache the + address resolution information contained therein. To a subsequent + NHRP Resolution Request, this NHS may respond with the cached, "non- + authoritative" address resolution information if the NHS is permitted + to do so (see Sections 5.2.2 and 6.2 for more information on non- + authoritative versus authoritative NHRP Resolution Replies). Non- + authoritative NHRP Resolution Replies are distinguished from + authoritative NHRP Resolution Replies so that if a communication + attempt based on non-authoritative information fails, a source + station can choose to send an authoritative NHRP Resolution Request. + NHSs MUST NOT respond to authoritative NHRP Resolution Requests with + cached information. + + If the determination is made that no NHS in the NBMA subnetwork can + reply to the NHRP Resolution Request for D then a negative NHRP + Resolution Reply (NAK) is returned. This occurs when (a) no next-hop + resolution information is available for station D from any NHS, or + (b) an NHS is unable to forward the NHRP Resolution Request (e.g., + connectivity is lost). + + NHRP Registration Requests, NHRP Purge Requests, NHRP Purge Replies, + and NHRP Error Indications follow a routed path in the same fashion + that NHRP Resolution Requests and NHRP Resolution Replies do. + Specifically, "requests" and "indications" follow the routed path + from Source Protocol Address (which is the address of the station + initiating the communication) to the Destination Protocol Address. + "Replies", on the other hand, follow the routed path from the + Destination Protocol Address back to the Source Protocol Address with + the following exceptions: in the case of a NHRP Registration Reply + and in the case of an NHC initiated NHRP Purge Request, the packet is + always returned via a direct VC (see Sections 5.2.4 and 5.2.5); if + one does not exists then one MUST be created. + + NHRP Requests and NHRP Replies do NOT cross the borders of a NBMA + subnetwork however further study is being done in this area (see + Section 7). Thus, the internetwork layer data traffic out of and + into an NBMA subnetwork always traverses an internetwork layer router + at its border. + + NHRP optionally provides a mechanism to send a NHRP Resolution Reply + which contains aggregated address resolution information. For + example, suppose that router X is the next hop from station S to + station D and that X is an egress router for all stations sharing an + + + +Luciani, et. al. Standards Track [Page 7] + +RFC 2332 NBMA NHRP April 1998 + + + internetwork layer address prefix with station D. When an NHRP + Resolution Reply is generated in response to a NHRP Resolution + Request, the responder may augment the internetwork layer address of + station D with a prefix length (see Section 5.2.0.1). A subsequent + (non-authoritative) NHRP Resolution Request for some destination that + shares an internetwork layer address prefix (for the number of bits + specified in the prefix length) with D may be satisfied with this + cached information. See section 6.2 regarding caching issues. + + To dynamically detect subnetwork-layer filtering in NBMA subnetworks + (e.g., X.25 closed user group facility, or SMDS address screens), to + trace the routed path that an NHRP packet takes, or to provide loop + detection and diagnostic capabilities, a "Route Record" may be + included in NHRP packets (see Sections 5.3.2 and 5.3.3). The Route + Record extensions are the NHRP Forward Transit NHS Record Extension + and the NHRP Reverse Transit NHS Record Extension. They contain the + internetwork (and subnetwork layer) addresses of all intermediate + NHSs between source and destination and between destination and + source respectively. When a source station is unable to communicate + with the responder (e.g., an attempt to open an SVC fails), it may + attempt to do so successively with other subnetwork layer addresses + in the NHRP Forward Transit NHS Record Extension until it succeeds + (if authentication policy permits such action). This approach can + find a suitable egress point in the presence of subnetwork-layer + filtering (which may be source/destination sensitive, for instance, + without necessarily creating separate logical NBMA subnetworks) or + subnetwork-layer congestion (especially in connection-oriented + media). + +3. Deployment + + NHRP Resolution Requests traverse one or more hops within an NBMA + subnetwork before reaching the station that is expected to generate a + response. Each station, including the source station, chooses a + neighboring NHS to which it will forward the NHRP Resolution Request. + The NHS selection procedure typically involves applying a destination + protocol layer address to the protocol layer routing table which + causes a routing decision to be returned. This routing decision is + then used to forward the NHRP Resolution Request to the downstream + NHS. The destination protocol layer address previously mentioned is + carried within the NHRP Resolution Request packet. Note that even + though a protocol layer address was used to acquire a routing + decision, NHRP packets are not encapsulated within a protocol layer + header but rather are carried at the NBMA layer using the + encapsulation described in Section 5. + + + + + + +Luciani, et. al. Standards Track [Page 8] + +RFC 2332 NBMA NHRP April 1998 + + + Each NHS/router examines the NHRP Resolution Request packet on its + way toward the destination. Each NHS which the NHRP packet traverses + on the way to the packet's destination might modify the packet (e.g., + updating the Forward Record extension). Ignoring error situations, + the NHRP Resolution Request eventually arrives at a station that is + to generate an NHRP Resolution Reply. This responding station + "serves" the destination. The responding station generates an NHRP + Resolution Reply using the source protocol address from within the + NHRP packet to determine where the NHRP Resolution Reply should be + sent. + + Rather than use routing to determine the next hop for an NHRP packet, + an NHS may use other applicable means (such as static configuration + information ) in order to determine to which neighboring NHSs to + forward the NHRP Resolution Request packet as long as such other + means would not cause the NHRP packet to arrive at an NHS which is + not along the routed path. The use of static configuration + information for this purpose is beyond the scope of this document. + + The NHS serving a particular destination must lie along the routed + path to that destination. In practice, this means that all egress + routers must double as NHSs serving the destinations beyond them, and + that hosts on the NBMA subnetwork are served by routers that double + as NHSs. Also, this implies that forwarding of NHRP packets within + an NBMA subnetwork requires a contiguous deployment of NHRP capable + routers. It is important that, in a given LIS/LAG which is using + NHRP, all NHSs within the LIS/LAG have at least some portion of their + resolution databases synchronized so that a packet arriving at one + router/NHS in a given LIS/LAG will be forwarded in the same fashion + as a packet arriving at a different router/NHS for the given LIS/LAG. + One method, among others, is to use the Server Cache Synchronization + Protocol (SCSP) [12]. It is RECOMMENDED that SCSP be the method used + when a LIS/LAG contains two or more router/NHSs. + + During migration to NHRP, it cannot be expected that all routers + within the NBMA subnetwork are NHRP capable. Thus, NHRP traffic + which would otherwise need to be forwarded through such routers can + be expected to be dropped due to the NHRP packet not being + recognized. In this case, NHRP will be unable to establish any + transit paths whose discovery requires the traversal of the non-NHRP + speaking routers. If the client has tried and failed to acquire a + cut through path then the client should use the network layer routed + path as a default. + + If an NBMA technology offers a group, an anycast, or a multicast + addressing feature then the NHC may be configured with such an + address (appropriate to the routing realm it participates in) which + would be assigned to all NHS serving that routing realm. This + + + +Luciani, et. al. Standards Track [Page 9] + +RFC 2332 NBMA NHRP April 1998 + + + address can then be used for establishing an initial connection to an + NHS to transmit a registration request. This address may not be used + for sending NHRP requests. The resulting VC may be used for NHRP + requests if and only if the registration response is received over + that VC, thereby indicating that one happens to have anycast + connected to an NHS serving the LIS/LAG. In the case of non- + connection oriented networks, or of multicast (rather than anycast) + addresses, the addres MUST NOT be used for sending NHRP resolution + requests. + + When an NHS "serves" an NHC, the NHS MUST send NHRP messages destined + for the NHC directly to the NHC. That is, the NHRP message MUST NOT + transit through any NHS which is not serving the NHC when the NHRP + message is currently at an NHS which does serve the NHC (this, of + course, assumes the NHRP message is destined for the NHC). Further, + an NHS which serves an NHC SHOULD have a direct NBMA level connection + to that NHC (see Section 5.2.3 and 5.2.4 for examples). + + With the exception of NHRP Registration Requests (see Section 5.2.3 + and 5.2.4 for details of the NHRP Registration Request case), an NHC + MUST send NHRP messages over a direct NBMA level connection between + the serving NHS and the served NHC. + + It may not be desirable to maintain semi-permanent NBMA level + connectivity between the NHC and the NHS. In this case, when NBMA + level connectivity is initially setup between the NHS and the NHC (as + described in Section 5.2.4), the NBMA address of the NHS should be + obtained through the NBMA level signaling technology. This address + should be stored for future use in setting up subsequent NBMA level + connections. A somewhat more information rich technique to obtain + the address information (and more) of the serving NHS would be for + the NHC to include the Responder Address extension (see Section + 5.3.1) in the NHRP Registration Request and to store the information + returned to the NHC in the Responder Address extension which is + subsequently included in the NHRP Registration Reply. Note also + that, in practice, a client's default router should also be its NHS; + thus a client may be able to know the NBMA address of its NHS from + the configuration which was already required for the client to be + able to communicate. Further, as mentioned in Section 4, NHCs may be + configured with the addressing information of one or more NHSs. + +4. Configuration + + Next Hop Clients + + An NHC connected to an NBMA subnetwork MAY be configured with the + Protocol address(es) and NBMA address(es) of its NHS(s). The + NHS(s) will likely also represent the NHC's default or peer + + + +Luciani, et. al. Standards Track [Page 10] + +RFC 2332 NBMA NHRP April 1998 + + + routers, so their NBMA addresses may be obtained from the NHC's + existing configuration. If the NHC is attached to several + subnetworks (including logical NBMA subnetworks), the NHC should + also be configured to receive routing information from its NHS(s) + and peer routers so that it can determine which internetwork layer + networks are reachable through which subnetworks. + + Next Hop Servers + + An NHS is configured with knowledge of its own internetwork layer + and NBMA addresses. An NHS MAY also be configured with a set of + internetwork layer address prefixes that correspond to the + internetwork layer addresses of the stations it serves. The NBMA + addresses of the stations served by the NHS may be learned via NHRP + Registration packets. + + If a served NHC is attached to several subnetworks, the + router/route-server coresident with the serving NHS may also need + to be configured to advertise routing information to such NHCs. + + If an NHS acts as an egress router for stations connected to other + subnetworks than the NBMA subnetwork, the NHS must, in addition to + the above, be configured to exchange routing information between + the NBMA subnetwork and these other subnetworks. + + In all cases, routing information is exchanged using conventional + intra-domain and/or inter-domain routing protocols. + +5. NHRP Packet Formats + + This section describes the format of NHRP packets. In the following, + unless otherwise stated explicitly, the unqualified term "request" + refers generically to any of the NHRP packet types which are + "requests". Further, unless otherwise stated explicitly, the + unqualified term "reply" refers generically to any of the NHRP packet + types which are "replies". + + An NHRP packet consists of a Fixed Part, a Mandatory Part, and an + Extensions Part. The Fixed Part is common to all NHRP packet types. + The Mandatory Part MUST be present, but varies depending on packet + type. The Extensions Part also varies depending on packet type, and + need not be present. + + The length of the Fixed Part is fixed at 20 octets. The length of + the Mandatory Part is determined by the contents of the extensions + offset field (ar$extoff). If ar$extoff=0x0 then the mandatory part + length is equal to total packet length (ar$pktsz) minus 20 otherwise + the mandatory part length is equal to ar$extoff minus 20. The length + + + +Luciani, et. al. Standards Track [Page 11] + +RFC 2332 NBMA NHRP April 1998 + + + of the Extensions Part is implied by ar$pktsz minus ar$extoff. NHSs + may increase the size of an NHRP packet as a result of extension + processing, but not beyond the offered maximum packet size of the + NBMA network. + + NHRP packets are actually members of a wider class of address mapping + and management protocols being developed by the IETF. A specific + encapsulation, based on the native formats used on the particular + NBMA network over which NHRP is carried, indicates the generic IETF + mapping and management protocol. For example, SMDS networks always + use LLC/SNAP encapsulation at the NBMA layer [4], and an NHRP packet + is preceded by the following LLC/SNAP encapsulation: + + [0xAA-AA-03] [0x00-00-5E] [0x00-03] + + The first three octets are LLC, indicating that SNAP follows. The + SNAP OUI portion is the IANA's OUI, and the SNAP PID portion + identifies the mapping and management protocol. A field in the Fixed + Header following the encapsulation indicates that it is NHRP. + + ATM uses either LLC/SNAP encapsulation of each packet (including + NHRP), or uses no encapsulation on VCs dedicated to a single protocol + (see [7]). Frame Relay and X.25 both use NLPID/SNAP encapsulation or + identification of NHRP, using a NLPID of 0x0080 and the same SNAP + contents as above (see [8], [9]). + + Fields marked "unused" MUST be set to zero on transmission, and + ignored on receipt. + + Most packet types (ar$op.type) have both internetwork layer + protocol-independent fields and protocol-specific fields. The + protocol type/snap fields (ar$pro.type/snap) qualify the format of + the protocol-specific fields. + +5.1 NHRP Fixed Header + + The Fixed Part of the NHRP packet contains those elements of the NHRP + packet which are always present and do not vary in size with the type + of packet. + + + + + + + + + + + + +Luciani, et. al. Standards Track [Page 12] + +RFC 2332 NBMA NHRP April 1998 + + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ar$afn | ar$pro.type | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ar$pro.snap | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ar$pro.snap | ar$hopcnt | ar$pktsz | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ar$chksum | ar$extoff | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ar$op.version | ar$op.type | ar$shtl | ar$sstl | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + ar$afn + Defines the type of "link layer" addresses being carried. This + number is taken from the 'address family number' list specified in + [6]. This field has implications to the coding of ar$shtl and + ar$sstl as described below. + + ar$pro.type + field is a 16 bit unsigned integer representing the following + number space: + + 0x0000 to 0x00FF Protocols defined by the equivalent NLPIDs. + 0x0100 to 0x03FF Reserved for future use by the IETF. + 0x0400 to 0x04FF Allocated for use by the ATM Forum. + 0x0500 to 0x05FF Experimental/Local use. + 0x0600 to 0xFFFF Protocols defined by the equivalent Ethertypes. + + (based on the observations that valid Ethertypes are never smaller + than 0x600, and NLPIDs never larger than 0xFF.) + + ar$pro.snap + When ar$pro.type has a value of 0x0080, a SNAP encoded extension is + being used to encode the protocol type. This snap extension is + placed in the ar$pro.snap field. This is termed the 'long form' + protocol ID. If ar$pro != 0x0080 then the ar$pro.snap field MUST be + zero on transmit and ignored on receive. The ar$pro.type field + itself identifies the protocol being referred to. This is termed + the 'short form' protocol ID. + + In all cases, where a protocol has an assigned number in the + ar$pro.type space (excluding 0x0080) the short form MUST be used + when transmitting NHRP messages; i.e., if Ethertype or NLPID + codings exist then they are used on transmit rather than the + + + +Luciani, et. al. Standards Track [Page 13] + +RFC 2332 NBMA NHRP April 1998 + + + ethertype. If both Ethertype and NLPID codings exist then when + transmitting NHRP messages, the Ethertype coding MUST be used (this + is consistent with RFC 1483 coding). So, for example, the + following codings exist for IP: + + SNAP: ar$pro.type = 0x00-80, ar$pro.snap = 0x00-00-00-08-00 + NLPID: ar$pro.type = 0x00-CC, ar$pro.snap = 0x00-00-00-00-00 + Ethertype: ar$pro.type = 0x08-00, ar$pro.snap = 0x00-00-00-00-00 + + and thus, since the Ethertype coding exists, it is used in + preference. + + ar$hopcnt + The Hop count indicates the maximum number of NHSs that an NHRP + packet is allowed to traverse before being discarded. This field + is used in a similar fashion to the way that a TTL is used in an IP + packet and should be set accordingly. Each NHS decrements the TTL + as the NHRP packet transits the NHS on the way to the next hop + along the routed path to the destination. If an NHS receives an + NHRP packet which it would normally forward to a next hop and that + packet contains an ar$hopcnt set to zero then the NHS sends an + error indication message back to the source protocol address + stating that the hop count has been exceeded (see Section 5.2.7) + and the NHS drops the packet in error; however, an error + indication is never sent as a result of receiving an error + indication. When a responding NHS replies to an NHRP request, that + NHS places a value in ar$hopcnt as if it were sending a request of + its own. + + ar$pktsz + The total length of the NHRP packet, in octets (excluding link + layer encapsulation). + + ar$chksum + The standard IP checksum over the entire NHRP packet starting at + the fixed header. If the packet is an odd number of bytes in + length then this calculation is performed as if a byte set to 0x00 + is appended to the end of the packet. + + ar$extoff + This field identifies the existence and location of NHRP + extensions. If this field is 0 then no extensions exist otherwise + this field represents the offset from the beginning of the NHRP + packet (i.e., starting from the ar$afn field) of the first + extension. + + + + + + +Luciani, et. al. Standards Track [Page 14] + +RFC 2332 NBMA NHRP April 1998 + + + ar$op.version + This field indicates what version of generic address mapping and + management protocol is represented by this message. + + 0 MARS protocol [11]. + 1 NHRP as defined in this document. + 0x02 - 0xEF Reserved for future use by the IETF. + 0xF0 - 0xFE Allocated for use by the ATM Forum. + 0xFF Experimental/Local use. + + ar$op.type + When ar$op.version == 1, this is the NHRP packet type: NHRP + Resolution Request(1), NHRP Resolution Reply(2), NHRP Registration + Request(3), NHRP Registration Reply(4), NHRP Purge Request(5), NHRP + Purge Reply(6), or NHRP Error Indication(7). Use of NHRP packet + Types in the range 128 to 255 are reserved for research or use in + other protocol development and will be administered by IANA as + described in Section 9. + + ar$shtl + Type & length of source NBMA address interpreted in the context of + the 'address family number'[6] indicated by ar$afn. See below for + more details. + + ar$sstl + Type & length of source NBMA subaddress interpreted in the context + of the 'address family number'[6] indicated by ar$afn. When an + NBMA technology has no concept of a subaddress, the subaddress + length is always coded ar$sstl = 0 and no storage is allocated for + the subaddress in the appropriate mandatory part. See below for + more details. + + Subnetwork layer address type/length fields (e.g., ar$shtl, Cli Addr + T/L) and subnetwork layer subaddresses type/length fields (e.g., + ar$sstl, Cli SAddr T/L) are coded as follows: + + 7 6 5 4 3 2 1 0 + +-+-+-+-+-+-+-+-+ + |0|x| length | + +-+-+-+-+-+-+-+-+ + + The most significant bit is reserved and MUST be set to zero. The + second most significant bit (x) is a flag indicating whether the + address being referred to is in: + + - NSAP format (x = 0). + - Native E.164 format (x = 1). + + + + +Luciani, et. al. Standards Track [Page 15] + +RFC 2332 NBMA NHRP April 1998 + + + For NBMA technologies that use neither NSAP nor E.164 format + addresses, x = 0 SHALL be used to indicate the native form for the + particular NBMA technology. + + If the NBMA network is ATM and a subaddress (e.g., Source NBMA + SubAddress, Client NBMA SubAddress) is to be included in any part of + the NHRP packet then ar$afn MUST be set to 0x000F; further, the + subnetwork layer address type/length fields (e.g., ar$shtl, Cli Addr + T/L) and subnetwork layer subaddress type/length fields (e.g., + ar$sstl, Cli SAddr T/L) MUST be coded as in [11]. If the NBMA + network is ATM and no subaddress field is to be included in any part + of the NHRP packet then ar$afn MAY be set to 0x0003 (NSAP) or 0x0008 + (E.164) accordingly. + + The bottom 6 bits is an unsigned integer value indicating the length + of the associated NBMA address in octets. If this value is zero the + flag x is ignored. + +5.2.0 Mandatory Part + + The Mandatory Part of the NHRP packet contains the operation specific + information (e.g., NHRP Resolution Request/Reply, etc.) and variable + length data which is pertinent to the packet type. + +5.2.0.1 Mandatory Part Format + + Sections 5.2.1 through 5.2.6 have a very similar mandatory part. + This mandatory part includes a common header and zero or more Client + Information Entries (CIEs). Section 5.2.7 has a different format + which is specified in that section. + + The common header looks like the following: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Src Proto Len | Dst Proto Len | Flags | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Request ID | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Source NBMA Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Source NBMA Subaddress (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Source Protocol Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Destination Protocol Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + +Luciani, et. al. Standards Track [Page 16] + +RFC 2332 NBMA NHRP April 1998 + + + And the CIEs have the following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Code | Prefix Length | unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Maximum Transmission Unit | Holding Time | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cli Addr T/L | Cli SAddr T/L | Cli Proto Len | Preference | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Client NBMA Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Client NBMA Subaddress (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Client Protocol Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + ..................... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Code | Prefix Length | unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Maximum Transmission Unit | Holding Time | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cli Addr T/L | Cli SAddr T/L | Cli Proto Len | Preference | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Client NBMA Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Client NBMA Subaddress (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Client Protocol Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The meanings of the fields are as follows: + + Src Proto Len + This field holds the length in octets of the Source Protocol + Address. + + Dst Proto Len + This field holds the length in octets of the Destination Protocol + Address. + + Flags + These flags are specific to the given message type and they are + explained in each section. + + + + + + +Luciani, et. al. Standards Track [Page 17] + +RFC 2332 NBMA NHRP April 1998 + + + Request ID + A value which, when coupled with the address of the source, + provides a unique identifier for the information contained in a + "request" packet. This value is copied directly from an "request" + packet into the associated "reply". When a sender of a "request" + receives "reply", it will compare the Request ID and source address + information in the received "reply" against that found in its + outstanding "request" list. When a match is found then the + "request" is considered to be acknowledged. + + The value is taken from a 32 bit counter that is incremented each + time a new "request" is transmitted. The same value MUST be used + when resending a "request", i.e., when a "reply" has not been + received for a "request" and a retry is sent after an appropriate + interval. + + It is RECOMMENDED that the initial value for this number be 0. A + node MAY reuse a sequence number if and only if the reuse of the + sequence number is not precluded by use of a particular method of + synchronization (e.g., as described in Appendix A). + + The NBMA address/subaddress form specified below allows combined + E.164/NSAPA form of NBMA addressing. For NBMA technologies without a + subaddress concept, the subaddress field is always ZERO length and + ar$sstl = 0. + + Source NBMA Address + The Source NBMA address field is the address of the source station + which is sending the "request". If the field's length as specified + in ar$shtl is 0 then no storage is allocated for this address at + all. + + Source NBMA SubAddress + The Source NBMA subaddress field is the address of the source + station which is sending the "request". If the field's length as + specified in ar$sstl is 0 then no storage is allocated for this + address at all. + + For those NBMA technologies which have a notion of "Calling Party + Addresses", the Source NBMA Addresses above are the addresses used + when signaling for an SVC. + + "Requests" and "indications" follow the routed path from Source + Protocol Address to the Destination Protocol Address. "Replies", on + the other hand, follow the routed path from the Destination Protocol + Address back to the Source Protocol Address with the following + + + + + +Luciani, et. al. Standards Track [Page 18] + +RFC 2332 NBMA NHRP April 1998 + + + exceptions: in the case of a NHRP Registration Reply and in the case + of an NHC initiated NHRP Purge Request, the packet is always returned + via a direct VC (see Sections 5.2.4 and 5.2.5). + + Source Protocol Address + This is the protocol address of the station which is sending the + "request". This is also the protocol address of the station toward + which a "reply" packet is sent. + + Destination Protocol Address + This is the protocol address of the station toward which a + "request" packet is sent. + + Code + This field is message specific. See the relevant message sections + below. In general, this field is a NAK code; i.e., when the field + is 0 in a reply then the packet is acknowledging a request and if + it contains any other value the packet contains a negative + acknowledgment. + + Prefix Length + This field is message specific. See the relevant message sections + below. In general, however, this fields is used to indicate that + the information carried in an NHRP message pertains to an + equivalence class of internetwork layer addresses rather than just + a single internetwork layer address specified. All internetwork + layer addresses that match the first "Prefix Length" bit positions + for the specific internetwork layer address are included in the + equivalence class. If this field is set to 0x00 then this field + MUST be ignored and no equivalence information is assumed (note + that 0x00 is thus equivalent to 0xFF). + + Maximum Transmission Unit + This field gives the maximum transmission unit for the relevant + client station. If this value is 0 then either the default MTU is + used or the MTU negotiated via signaling is used if such + negotiation is possible for the given NBMA. + + Holding Time + The Holding Time field specifies the number of seconds for which + the Next Hop NBMA information specified in the CIE is considered to + be valid. Cached information SHALL be discarded when the holding + time expires. This field must be set to 0 on a NAK. + + + + + + + + +Luciani, et. al. Standards Track [Page 19] + +RFC 2332 NBMA NHRP April 1998 + + + Cli Addr T/L + Type & length of next hop NBMA address specified in the CIE. This + field is interpreted in the context of the 'address family + number'[6] indicated by ar$afn (e.g., ar$afn=0x0003 for ATM). + + Cli SAddr T/L + Type & length of next hop NBMA subaddress specified in the CIE. + This field is interpreted in the context of the 'address family + number'[6] indicated by ar$afn (e.g., ar$afn=0x0015 for ATM makes + the address an E.164 and the subaddress an ATM Forum NSAP address). + When an NBMA technology has no concept of a subaddress, the + subaddress is always null with a length of 0. When the address + length is specified as 0 no storage is allocated for the address. + + Cli Proto Len + This field holds the length in octets of the Client Protocol + Address specified in the CIE. + + Preference + This field specifies the preference for use of the specific CIE + relative to other CIEs. Higher values indicate higher preference. + Action taken when multiple CIEs have equal or highest preference + value is a local matter. + + Client NBMA Address + This is the client's NBMA address. + + Client NBMA SubAddress + This is the client's NBMA subaddress. + + Client Protocol Address + This is the client's internetworking layer address specified. + + Note that an NHS may cache source address binding information from an + NHRP Resolution Request if and only if the conditions described in + Section 6.2 are met for the NHS. In all other cases, source address + binding information appearing in an NHRP message MUST NOT be cached. + +5.2.1 NHRP Resolution Request + + The NHRP Resolution Request packet has a Type code of 1. Its + mandatory part is coded as described in Section 5.2.0.1 and the + message specific meanings of the fields are as follows: + + Flags - The flags field is coded as follows: + + + + + + +Luciani, et. al. Standards Track [Page 20] + +RFC 2332 NBMA NHRP April 1998 + + + 0 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |Q|A|D|U|S| unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Q + Set if the station sending the NHRP Resolution Request is a + router; clear if the it is a host. + + A + This bit is set in a NHRP Resolution Request if only + authoritative next hop information is desired and is clear + otherwise. See the NHRP Resolution Reply section below for + further details on the "A" bit and its usage. + + D + Unused (clear on transmit) + + U + This is the Uniqueness bit. This bit aids in duplicate address + detection. When this bit is set in an NHRP Resolution Request + and one or more entries exist in the NHS cache which meet the + requirements of the NHRP Resolution Request then only the CIE in + the NHS's cache with this bit set will be returned. Note that + even if this bit was set at registration time, there may still be + multiple CIEs that might fulfill the NHRP Resolution Request + because an entire subnet can be registered through use of the + Prefix Length in the CIE and the address of interest might be + within such a subnet. If the "uniqueness" bit is set and the + responding NHS has one or more cache entries which match the + request but no such cache entry has the "uniqueness" bit set, + then the NHRP Resolution Reply returns with a NAK code of "13 - + Binding Exists But Is Not Unique" and no CIE is included. If a + client wishes to receive non- unique Next Hop Entries, then + the client must have the "uniqueness" bit set to zero in its NHRP + Resolution Request. Note that when this bit is set in an NHRP + Registration Request, only a single CIE may be specified in the + NHRP Registration Request and that CIE must have the Prefix + Length field set to 0xFF. + + S + Set if the binding between the Source Protocol Address and the + Source NBMA information in the NHRP Resolution Request is + guaranteed to be stable and accurate (e.g., these addresses are + those of an ingress router which is connected to an ethernet stub + network or the NHC is an NBMA attached host). + + + + +Luciani, et. al. Standards Track [Page 21] + +RFC 2332 NBMA NHRP April 1998 + + + Zero or one CIEs (see Section 5.2.0.1) may be specified in an NHRP + Resolution Request. If one is specified then that entry carries the + pertinent information for the client sourcing the NHRP Resolution + Request. Usage of the CIE in the NHRP Resolution Request is + described below: + + Prefix Length + If a CIE is specified in the NHRP Resolution Request then the + Prefix Length field may be used to qualify the widest acceptable + prefix which may be used to satisfy the NHRP Resolution Request. + In the case of NHRP Resolution Request/Reply, the Prefix Length + specifies the equivalence class of addresses which match the + first "Prefix Length" bit positions of the Destination Protocol + Address. If the "U" bit is set in the common header then this + field MUST be set to 0xFF. + + Maximum Transmission Unit + This field gives the maximum transmission unit for the source + station. A possible use of this field in the NHRP Resolution + Request packet is for the NHRP Resolution Requester to ask for a + target MTU. + + Holding Time + The Holding Time specified in the one CIE permitted to be + included in an NHRP Resolution Request is the amount of time + which the source address binding information in the NHRP + Resolution Request is permitted to cached by transit and + responding NHSs. Note that this field may only have a non-zero + value if the S bit is set. + + All other fields in the CIE MUST be ignored and SHOULD be set to 0. + + The Destination Protocol Address in the common header of the + Mandatory Part of this message contains the protocol address of the + station for which resolution is desired. An NHC MUST send the NHRP + Resolution Request directly to one of its serving NHSs (see Section 3 + for more information). + +5.2.2 NHRP Resolution Reply + + The NHRP Resolution Reply packet has a Type code of 2. CIEs + correspond to Next Hop Entries in an NHS's cache which match the + criteria in the NHRP Resolution Request. Its mandatory part is coded + as described in Section 5.2.0.1. The message specific meanings of + the fields are as follows: + + Flags - The flags field is coded as follows: + + + + +Luciani, et. al. Standards Track [Page 22] + +RFC 2332 NBMA NHRP April 1998 + + + 0 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |Q|A|D|U|S| unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Q + Copied from the NHRP Resolution Request. Set if the NHRP + Resolution Requester is a router; clear if it is a host. + + A + Set if the next hop CIE in the NHRP Resolution Reply is + authoritative; clear if the NHRP Resolution Reply is non- + authoritative. + + When an NHS receives a NHRP Resolution Request for authoritative + information for which it is the authoritative source, it MUST + respond with a NHRP Resolution Reply containing all and only + those next hop CIEs which are contained in the NHS's cache which + both match the criteria of the NHRP Resolution Request and are + authoritative cache entries. An NHS is an authoritative source + for a NHRP Resolution Request if the information in the NHS's + cache matches the NHRP Resolution Request criteria and that + information was obtained through a NHRP Registration Request or + through synchronization with an NHS which obtained this + information through a NHRP Registration Request. An + authoritative cache entry is one which is obtained through a NHRP + Registration Request or through synchronization with an NHS which + obtained this information through a NHRP Registration Request. + + An NHS obtains non-authoritative CIEs through promiscuous + listening to NHRP packets other than NHRP Registrations which are + directed at it. A NHRP Resolution Request which indicates a + request for non-authoritative information should cause a NHRP + Resolution Reply which contains all entries in the replying NHS's + cache (i.e., both authoritative and non-authoritative) which + match the criteria specified in the request. + + D + Set if the association between destination and the associate next + hop information included in all CIEs of the NHRP Resolution Reply + is guaranteed to be stable for the lifetime of the information + (the holding time). This is the case if the Next Hop protocol + address in a CIE identifies the destination (though it may be + different in value than the Destination address if the + destination system has multiple addresses) or if the destination + is not connected directly to the NBMA subnetwork but the egress + router to that destination is guaranteed to be stable (such as + + + +Luciani, et. al. Standards Track [Page 23] + +RFC 2332 NBMA NHRP April 1998 + + + when the destination is immediately adjacent to the egress router + through a non-NBMA interface). + + U + This is the Uniqueness bit. See the NHRP Resolution Request + section above for details. When this bit is set, only one CIE is + included since only one unique binding should exist in an NHS's + cache. + + S + Copied from NHRP Resolution Request message. + + One or more CIEs are specified in the NHRP Resolution Reply. Each CIE + contains NHRP next hop information which the responding NHS has + cached and which matches the parameters specified in the NHRP + Resolution Request. If no match is found by the NHS issuing the NHRP + Resolution Reply then a single CIE is enclosed with the a CIE Code + set appropriately (see below) and all other fields MUST be ignored + and SHOULD be set to 0. In order to facilitate the use of NHRP by + minimal client implementations, the first CIE MUST contain the next + hop with the highest preference value so that such an implementation + need parse only a single CIE. + + Code + If this field is set to zero then this packet contains a + positively acknowledged NHRP Resolution Reply. If this field + contains any other value then this message contains an NHRP + Resolution Reply NAK which means that an appropriate + internetworking layer to NBMA address binding was not available + in the responding NHS's cache. If NHRP Resolution Reply contains + a Client Information Entry with a NAK Code other than 0 then it + MUST NOT contain any other CIE. Currently defined NAK Codes are + as follows: + + 4 - Administratively Prohibited + + An NHS may refuse an NHRP Resolution Request attempt for + administrative reasons (due to policy constraints or routing + state). If so, the NHS MUST send an NHRP Resolution Reply + which contains a NAK code of 4. + + 5 - Insufficient Resources + + If an NHS cannot serve a station due to a lack of resources + (e.g., can't store sufficient information to send a purge if + routing changes), the NHS MUST reply with a NAKed NHRP + Resolution Reply which contains a NAK code of 5. + + + + +Luciani, et. al. Standards Track [Page 24] + +RFC 2332 NBMA NHRP April 1998 + + + 12 - No Internetworking Layer Address to NBMA Address Binding + Exists + + This code states that there were absolutely no internetworking + layer address to NBMA address bindings found in the responding + NHS's cache. + + 13 - Binding Exists But Is Not Unique + + This code states that there were one or more internetworking + layer address to NBMA address bindings found in the responding + NHS's cache, however none of them had the uniqueness bit set. + + Prefix Length + In the case of NHRP Resolution Reply, the Prefix Length specifies + the equivalence class of addresses which match the first "Prefix + Length" bit positions of the Destination Protocol Address. + + Holding Time + The Holding Time specified in a CIE of an NHRP Resolution Reply + is the amount of time remaining before the expiration of the + client information which is cached at the replying NHS. It is + not the value which was registered by the client. + + The remainder of the fields for the CIE for each next hop are + filled out as they were defined when the next hop was registered + with the responding NHS (or one of the responding NHS's + synchronized servers) via the NHRP Registration Request. + + Load-splitting may be performed when more than one Client Information + Entry is returned to a requester when equal preference values are + specified. Also, the alternative addresses may be used in case of + connectivity failure in the NBMA subnetwork (such as a failed call + attempt in connection-oriented NBMA subnetworks). + + Any extensions present in the NHRP Resolution Request packet MUST be + present in the NHRP Resolution Reply even if the extension is non- + Compulsory. + + If an unsolicited NHRP Resolution Reply packet is received, an Error + Indication of type Invalid NHRP Resolution Reply Received SHOULD be + sent in response. + + When an NHS that serves a given NHC receives an NHRP Resolution Reply + destined for that NHC then the NHS must MUST send the NHRP Resolution + Reply directly to the NHC (see Section 3). + + + + + +Luciani, et. al. Standards Track [Page 25] + +RFC 2332 NBMA NHRP April 1998 + + +5.2.3 NHRP Registration Request + + The NHRP Registration Request is sent from a station to an NHS to + notify the NHS of the station's NBMA information. It has a Type code + of 3. Each CIE corresponds to Next Hop information which is to be + cached at an NHS. The mandatory part of an NHRP Registration Request + is coded as described in Section 5.2.0.1. The message specific + meanings of the fields are as follows: + + Flags - The flags field is coded as follows: + + 0 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |U| unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + U + This is the Uniqueness bit. When set in an NHRP Registration + Request, this bit indicates that the registration of the protocol + address is unique within the confines of the set of synchronized + NHSs. This "uniqueness" qualifier MUST be stored in the NHS/NHC + cache. Any attempt to register a binding between the protocol + address and an NBMA address when this bit is set MUST be rejected + with a Code of "14 - Unique Internetworking Layer Address Already + Registered" if the replying NHS already has a cache entry for the + protocol address and the cache entry has the "uniqueness" bit + set. A registration of a CIE's information is rejected when the + CIE is returned with the Code field set to anything other than + 0x00. See the description of the uniqueness bit in NHRP + Resolution Request section above for further details. When this + bit is set only, only one CIE MAY be included in the NHRP + Registration Request. + + Request ID + The request ID has the same meaning as described in Section + 5.2.0.1. However, the request ID for NHRP Registrations which is + maintained at each client MUST be kept in non-volatile memory so + that when a client crashes and reregisters there will be no + inconsistency in the NHS's database. In order to reduce the + overhead associated with updating non-volatile memory, the actual + updating need not be done with every increment of the Request ID + but could be done, for example, every 50 or 100 increments. In + this scenario, when a client crashes and reregisters it knows to + add 100 to the value of the Request ID in the non-volatile memory + before using the Request ID for subsequent registrations. + + + + + +Luciani, et. al. Standards Track [Page 26] + +RFC 2332 NBMA NHRP April 1998 + + + One or more CIEs are specified in the NHRP Registration Request. + Each CIE contains next hop information which a client is attempting + to register with its servers. Generally, all fields in CIEs enclosed + in NHRP Registration Requests are coded as described in Section + 5.2.0.1. However, if a station is only registering itself with the + NHRP Registration Request then it MAY code the Cli Addr T/L, Cli + SAddr T/L, and Cli Proto Len as zero which signifies that the client + address information is to be taken from the source information in the + common header (see Section 5.2.0.1). Below, further clarification is + given for some fields in a CIE in the context of a NHRP Registration + Request. + + Code + This field is set to 0x00 in NHRP Registration Requests. + + Prefix Length + + This field may be used in a NHRP Registration Request to register + equivalence information for the Client Protocol Address specified + in the CIE of an NHRP Registration Request In the case of NHRP + Registration Request, the Prefix Length specifies the equivalence + class of addresses which match the first "Prefix Length" bit + positions of the Client Protocol Address. If the "U" bit is set + in the common header then this field MUST be set to 0xFF. + + The NHRP Registration Request is used to register an NHC's NHRP + information with its NHSs. If an NHC is configured with the protocol + address of a serving NHS then the NHC may place the NHS's protocol + address in the Destination Protocol Address field of the NHRP + Registration Request common header otherwise the NHC must place its + own protocol address in the Destination Protocol Address field. + + When an NHS receives an NHRP Registration Request which has the + Destination Protocol Address field set to an address which belongs to + a LIS/LAG for which the NHS is serving then if the Destination + Protocol Address field is equal to the Source Protocol Address field + (which would happen if the NHC put its protocol address in the + Destination Protocol Address) or the Destination Protocol Address + field is equal to the protocol address of the NHS then the NHS + processes the NHRP Registration Request after doing appropriate error + checking (including any applicable policy checking). + + When an NHS receives an NHRP Registration Request which has the + Destination Protocol Address field set to an address which does not + belong to a LIS/LAG for which the NHS is serving then the NHS + forwards the packet down the routed path toward the appropriate + LIS/LAG. + + + + +Luciani, et. al. Standards Track [Page 27] + +RFC 2332 NBMA NHRP April 1998 + + + When an NHS receives an NHRP Registration Request which has the + Destination Protocol Address field set to an address which belongs to + a LIS/LAG for which the NHS is serving then if the Destination + Protocol Address field does not equal the Source Protocol Address + field and the Destination Protocol Address field does not equal the + protocol address of the NHS then the NHS forwards the message to the + appropriate NHS within the LIS/LAG as specified by Destination + Protocol Address field. + + It is possible that a misconfigured station will attempt to register + with the wrong NHS (i.e., one that cannot serve it due to policy + constraints or routing state). If this is the case, the NHS MUST + reply with a NAK-ed Registration Reply of type Can't Serve This + Address. + + If an NHS cannot serve a station due to a lack of resources, the NHS + MUST reply with a NAK-ed Registration Reply of type Registration + Overflow. + + In order to keep the registration entry from being discarded, the + station MUST re-send the NHRP Registration Request packet often + enough to refresh the registration, even in the face of occasional + packet loss. It is recommended that the NHRP Registration Request + packet be sent at an interval equal to one-third of the Holding Time + specified therein. + +5.2.4 NHRP Registration Reply + + The NHRP Registration Reply is sent by an NHS to a client in response + to that client's NHRP Registration Request. If the Code field of a + CIE in the NHRP Registration Reply has anything other than zero in it + then the NHRP Registration Reply is a NAK otherwise the reply is an + ACK. The NHRP Registration Reply has a Type code of 4. + + An NHRP Registration Reply is formed from an NHRP Registration + Request by changing the type code to 4, updating the CIE Code field, + and filling in the appropriate extensions if they exist. The message + specific meanings of the fields are as follows: + + Attempts to register the information in the CIEs of an NHRP + Registration Request may fail for various reasons. If this is the + case then each failed attempt to register the information in a CIE of + an NHRP Registration Request is logged in the associated NHRP + Registration Reply by setting the CIE Code field to the appropriate + error code as shown below: + + + + + + +Luciani, et. al. Standards Track [Page 28] + +RFC 2332 NBMA NHRP April 1998 + + + CIE Code + + 0 - Successful Registration + + The information in the CIE was successfully registered with the + NHS. + + 4 - Administratively Prohibited + + An NHS may refuse an NHRP Registration Request attempt for + administrative reasons (due to policy constraints or routing + state). If so, the NHS MUST send an NHRP Registration Reply + which contains a NAK code of 4. + + 5 - Insufficient Resources + + If an NHS cannot serve a station due to a lack of resources, + the NHS MUST reply with a NAKed NHRP Registration Reply which + contains a NAK code of 5. + + 14 - Unique Internetworking Layer Address Already Registered + If a client tries to register a protocol address to NBMA + address binding with the uniqueness bit on and the protocol + address already exists in the NHS's cache then if that cache + entry also has the uniqueness bit on then this NAK Code is + returned in the CIE in the NHRP Registration Reply. + + Due to the possible existence of asymmetric routing, an NHRP + Registration Reply may not be able to merely follow the routed path + back to the source protocol address specified in the common header of + the NHRP Registration Reply. As a result, there MUST exist a direct + NBMA level connection between the NHC and its NHS on which to send + the NHRP Registration Reply before NHRP Registration Reply may be + returned to the NHC. If such a connection does not exist then the + NHS must setup such a connection to the NHC by using the source NBMA + information supplied in the common header of the NHRP Registration + Request. + +5.2.5 NHRP Purge Request + + The NHRP Purge Request packet is sent in order to invalidate cached + information in a station. The NHRP Purge Request packet has a type + code of 5. The mandatory part of an NHRP Purge Request is coded as + described in Section 5.2.0.1. The message specific meanings of the + fields are as follows: + + Flags - The flags field is coded as follows: + + + + +Luciani, et. al. Standards Track [Page 29] + +RFC 2332 NBMA NHRP April 1998 + + + 0 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |N| unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + N + When set, this bit tells the receiver of the NHRP Purge Request + that the requester does not expect to receive an NHRP Purge + Reply. If an unsolicited NHRP Purge Reply is received by a + station where that station is identified in the Source Protocol + Address of the packet then that packet must be ignored. + + One or more CIEs are specified in the NHRP Purge Request. Each CIE + contains next hop information which is to be purged from an NHS/NHC + cache. Generally, all fields in CIEs enclosed in NHRP Purge Requests + are coded as described in Section 5.2.0.1. Below, further + clarification is given for some fields in a CIE in the context of a + NHRP Purge Request. + + Code + This field is set to 0x00 in NHRP Purge Requests. + + Prefix Length + + In the case of NHRP Purge Requests, the Prefix Length specifies + the equivalence class of addresses which match the first "Prefix + Length" bit positions of the Client Protocol Address specified in + the CIE. All next hop information which contains a protocol + address which matches an element of this equivalence class is to + be purged from the receivers cache. + + The Maximum Transmission Unit and Preference fields of the CIE are + coded as zero. The Holding Time should be coded as zero but there + may be some utility in supplying a "short" holding time to be + applied to the matching next hop information before that + information would be purged; this usage is for further study. The + Client Protocol Address field and the Cli Proto Len field MUST be + filled in. The Client Protocol Address is filled in with the + protocol address to be purged from the receiving station's cache + while the Cli Proto Len is set the length of the purged client's + protocol address. All remaining fields in the CIE MAY be set to + zero although the client NBMA information (and associated length + fields) MAY be specified to narrow the scope of the NHRP Purge + Request if requester desires. However, the receiver of an NHRP + Purge Request may choose to ignore the Client NBMA information if + it is supplied. + + + + +Luciani, et. al. Standards Track [Page 30] + +RFC 2332 NBMA NHRP April 1998 + + + An NHRP Purge Request packet is sent from an NHS to a station to + cause it to delete previously cached information. This is done when + the information may be no longer valid (typically when the NHS has + previously provided next hop information for a station that is not + directly connected to the NBMA subnetwork, and the egress point to + that station may have changed). + + An NHRP Purge Request packet may also be sent from an NHC to an NHS + with which the NHC had previously registered. This allows for an NHC + to invalidate its registration with NHRP before it would otherwise + expire via the holding timer. If an NHC does not have knowledge of a + protocol address of a serving NHS then the NHC must place its own + protocol address in the Destination Protocol Address field and + forward the packet along the routed path. Otherwise, the NHC must + place the protocol address of a serving NHS in this field. + + Serving NHSs may need to send one or more new NHRP Purge Requests as + a result of receiving a purge from one of their served NHCs since the + NHS may have previously responded to NHRP Resolution Requests for + that NHC's NBMA information. These purges are "new" in that they are + sourced by the NHS and not the NHC; that is, for each NHC that + previously sent a NHRP Resolution Request for the purged NHC NBMA + information, an NHRP Purge Request is sent which contains the Source + Protocol/NBMA Addresses of the NHS and the Destination Protocol + Address of the NHC which previously sent an NHRP Resolution Request + prior to the purge. + + The station sending the NHRP Purge Request MAY periodically + retransmit the NHRP Purge Request until either NHRP Purge Request is + acknowledged or until the holding time of the information being + purged has expired. Retransmission strategies for NHRP Purge Requests + are a local matter. + + When a station receives an NHRP Purge Request, it MUST discard any + previously cached information that matches the information in the + CIEs. + + An NHRP Purge Reply MUST be returned for the NHRP Purge Request even + if the station does not have a matching cache entry assuming that the + "N" bit is off in the NHRP Purge Request. + + If the station wishes to reestablish communication with the + destination shortly after receiving an NHRP Purge Request, it should + make an authoritative NHRP Resolution Request in order to avoid any + stale cache entries that might be present in intermediate NHSs (See + section 6.2.2.). It is recommended that authoritative NHRP + Resolution Requests be made for the duration of the holding time of + the old information. + + + +Luciani, et. al. Standards Track [Page 31] + +RFC 2332 NBMA NHRP April 1998 + + +5.2.6 NHRP Purge Reply + + The NHRP Purge Reply packet is sent in order to assure the sender of + an NHRP Purge Request that all cached information of the specified + type has been purged from the station sending the reply. The NHRP + Purge Reply has a type code of 6. + + An NHRP Purge Reply is formed from an NHRP Purge Request by merely + changing the type code in the request to 6. The packet is then + returned to the requester after filling in the appropriate extensions + if they exist. + +5.2.7 NHRP Error Indication + + The NHRP Error Indication is used to convey error indications to the + sender of an NHRP packet. It has a type code of 7. The Mandatory + Part has the following format: + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Src Proto Len | Dst Proto Len | unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Error Code | Error Offset | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Source NBMA Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Source NBMA Subaddress (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Source Protocol Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Destination Protocol Address (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Contents of NHRP Packet in error (variable length) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Src Proto Len + This field holds the length in octets of the Source Protocol + Address. + + Dst Proto Len + This field holds the length in octets of the Destination Protocol + Address. + + + + + + + +Luciani, et. al. Standards Track [Page 32] + +RFC 2332 NBMA NHRP April 1998 + + + Error Code + An error code indicating the type of error detected, chosen from + the following list: + + 1 - Unrecognized Extension + + When the Compulsory bit of an extension in NHRP packet is set, + the NHRP packet cannot be processed unless the extension has + been processed. The responder MUST return an NHRP Error + Indication of type Unrecognized Extension if it is incapable of + processing the extension. However, if a transit NHS (one which + is not going to generate a reply) detects an unrecognized + extension, it SHALL ignore the extension. + + 3 - NHRP Loop Detected + + A Loop Detected error is generated when it is determined that + an NHRP packet is being forwarded in a loop. + + 6 - Protocol Address Unreachable + + This error occurs when a packet it moving along the routed path + and it reaches a point such that the protocol address of + interest is not reachable. + + 7 - Protocol Error + + A generic packet processing error has occurred (e.g., invalid + version number, invalid protocol type, failed checksum, etc.) + + 8 - NHRP SDU Size Exceeded + + If the SDU size of the NHRP packet exceeds the MTU size of the + NBMA network then this error is returned. + + 9 - Invalid Extension + + If an NHS finds an extension in a packet which is inappropriate + for the packet type, an error is sent back to the sender with + Invalid Extension as the code. + + 10 - Invalid NHRP Resolution Reply Received + + If a client receives a NHRP Resolution Reply for a Next Hop + Resolution Request which it believes it did not make then an + error packet is sent to the station making the reply with an + error code of Invalid Reply Received. + + + + +Luciani, et. al. Standards Track [Page 33] + +RFC 2332 NBMA NHRP April 1998 + + + 11 - Authentication Failure + + If a received packet fails an authentication test then this + error is returned. + + 15 - Hop Count Exceeded + + The hop count which was specified in the Fixed Header of an + NHRP message has been exceeded. + + Error Offset + The offset in octets into the original NHRP packet in which an + error was detected. This offset is calculated starting from the + NHRP Fixed Header. + + Source NBMA Address + The Source NBMA address field is the address of the station which + observed the error. + + Source NBMA SubAddress + The Source NBMA subaddress field is the address of the station + which observed the error. If the field's length as specified in + ar$sstl is 0 then no storage is allocated for this address at all. + + Source Protocol Address + This is the protocol address of the station which issued the Error + packet. + + Destination Protocol Address + This is the protocol address of the station which sent the packet + which was found to be in error. + + An NHRP Error Indication packet SHALL NEVER be generated in response + to another NHRP Error Indication packet. When an NHRP Error + Indication packet is generated, the offending NHRP packet SHALL be + discarded. In no case should more than one NHRP Error Indication + packet be generated for a single NHRP packet. + + If an NHS sees its own Protocol and NBMA Addresses in the Source NBMA + and Source Protocol address fields of a transiting NHRP Error + Indication packet then the NHS will quietly drop the packet and do + nothing (this scenario would occur when the NHRP Error Indication + packet was itself in a loop). + + Note that no extensions may be added to an NHRP Error Indication. + + + + + + +Luciani, et. al. Standards Track [Page 34] + +RFC 2332 NBMA NHRP April 1998 + + +5.3 Extensions Part + + The Extensions Part, if present, carries one or more extensions in + {Type, Length, Value} triplets. + + Extensions have the following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |C|u| Type | Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Value... | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + C + "Compulsory." If clear, and the NHS does not recognize the type + code, the extension may safely be ignored. If set, and the NHS + does not recognize the type code, the NHRP "request" is considered + to be in error. (See below for details.) + + u + Unused and must be set to zero. + + Type + The extension type code (see below). The extension type is not + qualified by the Compulsory bit, but is orthogonal to it. + + Length + The length in octets of the value (not including the Type and + Length fields; a null extension will have only an extension header + and a length of zero). + + When extensions exist, the extensions list is terminated by the Null + TLV, having Type = 0 and Length = 0. + + Extensions may occur in any order, but any particular extension type + may occur only once in an NHRP packet unless explicitly stated to the + contrary in the extensions definition. For example, the vendor- + private extension may occur multiple times in a packet in order to + allow for extensions which do not share the same vendor ID to be + represented. It is RECOMMENDED that a given vendor include no more + than one Vendor Private Extension. + + An NHS MUST NOT change the order of extensions. That is, the order + of extensions placed in an NHRP packet by an NHC (or by an NHS when + an NHS sources a packet) MUST be preserved as the packet moves + between NHSs. Minimal NHC implementations MUST only recognize, but + + + +Luciani, et. al. Standards Track [Page 35] + +RFC 2332 NBMA NHRP April 1998 + + + not necessarily parse, the Vendor Private extension and the End Of + Extensions extension. Extensions are only present in a "reply" if + they were present in the corresponding "request" with the exception + of Vendor Private extensions. The previous statement is not intended + to preclude the creation of NHS-only extensions which might be added + to and removed from NHRP packets by the same NHS; such extensions + MUST not be propagated to NHCs. + + The Compulsory bit provides for a means to add to the extension set. + If the bit is set in an extension then the station responding to the + NHRP message which contains that extension MUST be able to understand + the extension (in this case, the station responding to the message is + the station that would issue an NHRP reply in response to a NHRP + request). As a result, the responder MUST return an NHRP Error + Indication of type Unrecognized Extension. If the Compulsory bit is + clear then the extension can be safely ignored; however, if an + ignored extension is in a "request" then it MUST be returned, + unchanged, in the corresponding "reply" packet type. + + If a transit NHS (one which is not going to generate a "reply") + detects an unrecognized extension, it SHALL ignore the extension. If + the Compulsory bit is set, the transit NHS MUST NOT cache the + information contained in the packet and MUST NOT identify itself as + an egress router (in the Forward Record or Reverse Record + extensions). Effectively, this means, if a transit NHS encounters an + extension which it cannot process and which has the Compulsory bit + set then that NHS MUST NOT participate in any way in the protocol + exchange other than acting as a forwarding agent. + + The NHRP extension Type space is subdivided to encourage use outside + the IETF. + + 0x0000 - 0x0FFF Reserved for NHRP. + 0x1000 - 0x11FF Allocated to the ATM Forum. + 0x1200 - 0x37FF Reserved for the IETF. + 0x3800 - 0x3FFF Experimental use. + + IANA will administer the ranges reserved for the IETF as described in + Section 9. Values in the 'Experimental use' range have only local + significance. + +5.3.0 The End Of Extensions + + Compulsory = 1 + Type = 0 + Length = 0 + + + + + +Luciani, et. al. Standards Track [Page 36] + +RFC 2332 NBMA NHRP April 1998 + + + When extensions exist, the extensions list is terminated by the End + Of Extensions/Null TLV. + +5.3.1 Responder Address Extension + + Compulsory = 1 + Type = 3 + Length = variable + + This extension is used to determine the address of the NHRP + responder; i.e., the entity that generates the appropriate "reply" + packet for a given "request" packet. In the case of an NHRP + Resolution Request, the station responding may be different (in the + case of cached replies) than the system identified in the Next Hop + field of the NHRP Resolution Reply. Further, this extension may aid + in detecting loops in the NHRP forwarding path. + + This extension uses a single CIE with the extension specific meanings + of the fields set as follows: + + The Prefix Length fields MUST be set to 0 and ignored. + + CIE Code + 5 - Insufficient Resources + If the responder to an NHRP Resolution Request is an egress point + for the target of the address resolution request (i.e., it is one + of the stations identified in the list of CIEs in an NHRP + Resolution Reply) and the Responder Address extension is included + in the NHRP Resolution Request and insufficient resources to + setup a cut-through VC exist at the responder then the Code field + of the Responder Address Extension is set to 5 in order to tell + the client that a VC setup attempt would in all likelihood be + rejected; otherwise this field MUST be coded as a zero. NHCs MAY + use this field to influence whether they attempt to setup a cut- + through to the egress router. + + Maximum Transmission Unit + This field gives the maximum transmission unit preferred by the + responder. If this value is 0 then either the default MTU is used + or the MTU negotiated via signaling is used if such negotiation is + possible for the given NBMA. + + Holding Time + The Holding Time field specifies the number of seconds for which + the NBMA information of the responser is considered to be valid. + Cached information SHALL be discarded when the holding time + expires. + + + + +Luciani, et. al. Standards Track [Page 37] + +RFC 2332 NBMA NHRP April 1998 + + + "Client Address" information is actually "Responder Address" + information for this extension. Thus, for example, Cli Addr T/L is + the responder NBMA address type and length field. + + If a "requester" desires this information, the "requester" SHALL + include this extension with a value of zero. Note that this implies + that no storage is allocated for the Holding Time and Type/Length + fields until the "Value" portion of the extension is filled out. + + If an NHS is generating a "reply" packet in response to a "request" + containing this extension, the NHS SHALL include this extension, + containing its protocol address in the "reply". If an NHS has more + than one protocol address, it SHALL use the same protocol address + consistently in all of the Responder Address, Forward Transit NHS + Record, and Reverse Transit NHS Record extensions. The choice of + which of several protocol address to include in this extension is a + local matter. + + If an NHRP Resolution Reply packet being forwarded by an NHS contains + a protocol address of that NHS in the Responder Address Extension + then that NHS SHALL generate an NHRP Error Indication of type "NHRP + Loop Detected" and discard the NHRP Resolution Reply. + + If an NHRP Resolution Reply packet is being returned by an + intermediate NHS based on cached data, it SHALL place its own address + in this extension (differentiating it from the address in the Next + Hop field). + +5.3.2 NHRP Forward Transit NHS Record Extension + + Compulsory = 1 + Type = 4 + Length = variable + + The NHRP Forward Transit NHS record contains a list of transit NHSs + through which a "request" has traversed. Each NHS SHALL append to + the extension a Forward Transit NHS element (as specified below) + containing its Protocol address. The extension length field and the + ar$chksum fields SHALL be adjusted appropriately. + + The responding NHS, as described in Section 5.3.1, SHALL NOT update + this extension. + + In addition, NHSs that are willing to act as egress routers for + packets from the source to the destination SHALL include information + about their NBMA Address. + + + + + +Luciani, et. al. Standards Track [Page 38] + +RFC 2332 NBMA NHRP April 1998 + + + This extension uses a single CIE per NHS Record element with the + extension specific meanings of the fields set as follows: + + The Prefix Length fields MUST be set to 0 and ignored. + + CIE Code + 5 - Insufficient Resources + If an NHRP Resolution Request contains an NHRP Forward Transit + NHS Record Extension and insufficient resources to setup a cut- + through VC exist at the current transit NHS then the CIE Code + field for NHRP Forward Transit NHS Record Extension is set to 5 + in order to tell the client that a VC setup attempt would in all + likelihood be rejected; otherwise this field MUST be coded as a + zero. NHCs MAY use this field to influence whether they attempt + to setup a cut-through as described in Section 2.2. Note that + the NHRP Reverse Transit NHS Record Extension MUST always have + this field set to zero. + + Maximum Transmission Unit + This field gives the maximum transmission unit preferred by the + transit NHS. If this value is 0 then either the default MTU is + used or the MTU negotiated via signaling is used if such + negotiation is possible for the given NBMA. + + Holding Time + The Holding Time field specifies the number of seconds for which + the NBMA information of the transit NHS is considered to be valid. + Cached information SHALL be discarded when the holding time + expires. + + "Client Address" information is actually "Forward Transit NHS + Address" information for this extension. Thus, for example, Cli Addr + T/L is the transit NHS NBMA address type and length field. + + If a "requester" wishes to obtain this information, it SHALL include + this extension with a length of zero. Note that this implies that no + storage is allocated for the Holding Time and Type/Length fields + until the "Value" portion of the extension is filled out. + + If an NHS has more than one Protocol address, it SHALL use the same + Protocol address consistently in all of the Responder Address, + Forward NHS Record, and Reverse NHS Record extensions. The choice of + which of several Protocol addresses to include in this extension is a + local matter. + + + + + + + +Luciani, et. al. Standards Track [Page 39] + +RFC 2332 NBMA NHRP April 1998 + + + If a "request" that is being forwarded by an NHS contains the + Protocol Address of that NHS in one of the Forward Transit NHS + elements then the NHS SHALL generate an NHRP Error Indication of type + "NHRP Loop Detected" and discard the "request". + +5.3.3 NHRP Reverse Transit NHS Record Extension + + Compulsory = 1 + Type = 5 + Length = variable + + The NHRP Reverse Transit NHS record contains a list of transit NHSs + through which a "reply" has traversed. Each NHS SHALL append a + Reverse Transit NHS element (as specified below) containing its + Protocol address to this extension. The extension length field and + ar$chksum SHALL be adjusted appropriately. + + The responding NHS, as described in Section 5.3.1, SHALL NOT update + this extension. + + In addition, NHSs that are willing to act as egress routers for + packets from the source to the destination SHALL include information + about their NBMA Address. + + This extension uses a single CIE per NHS Record element with the + extension specific meanings of the fields set as follows: + + The CIE Code and Prefix Length fields MUST be set to 0 and ignored. + + Maximum Transmission Unit + This field gives the maximum transmission unit preferred by the + transit NHS. If this value is 0 then either the default MTU is + used or the MTU negotiated via signaling is used if such + negotiation is possible for the given NBMA. + + Holding Time + The Holding Time field specifies the number of seconds for which + the NBMA information of the transit NHS is considered to be valid. + Cached information SHALL be discarded when the holding time + expires. + + "Client Address" information is actually "Reverse Transit NHS + Address" information for this extension. Thus, for example, Cli Addr + T/L is the transit NHS NBMA address type and length field. + + + + + + + +Luciani, et. al. Standards Track [Page 40] + +RFC 2332 NBMA NHRP April 1998 + + + If a "requester" wishes to obtain this information, it SHALL include + this extension with a length of zero. Note that this implies that no + storage is allocated for the Holding Time and Type/Length fields + until the "Value" portion of the extension is filled out. + + If an NHS has more than one Protocol address, it SHALL use the same + Protocol address consistently in all of the Responder Address, + Forward NHS Record, and Reverse NHS Record extensions. The choice of + which of several Protocol addresses to include in this extension is a + local matter. + + If a "reply" that is being forwarded by an NHS contains the Protocol + Address of that NHS in one of the Reverse Transit NHS elements then + the NHS SHALL generate an NHRP Error Indication of type "NHRP Loop + Detected" and discard the "reply". + + Note that this information may be cached at intermediate NHSs; if + so, the cached value SHALL be used when generating a reply. + +5.3.4 NHRP Authentication Extension + + Compulsory = 1 Type = 7 Length = variable + + The NHRP Authentication Extension is carried in NHRP packets to + convey authentication information between NHRP speakers. The + Authentication Extension may be included in any NHRP "request" or + "reply" only. + + The authentication is always done pairwise on an NHRP hop-by-hop + basis; i.e., the authentication extension is regenerated at each + hop. If a received packet fails the authentication test, the station + SHALL generate an Error Indication of type "Authentication Failure" + and discard the packet. Note that one possible authentication failure + is the lack of an Authentication Extension; the presence or absence + of the Authentication Extension is a local matter. + +5.3.4.1 Header Format + + The authentication header has the following format: + + + + + + + + + + + + +Luciani, et. al. Standards Track [Page 41] + +RFC 2332 NBMA NHRP April 1998 + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Reserved | Security Parameter Index (SPI)| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Src Addr... | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + +-+-+-+-+-+-+-+-+-+-+ Authentication Data... -+-+-+-+-+-+-+-+-+-+ + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Security Parameter Index (SPI) can be thought of as an index into a + table that maintains the keys and other information such as hash + algorithm. Src and Dst communicate either offline using manual keying + or online using a key management protocol to populate this table. The + sending NHRP entity always allocates the SPI and the parameters + associated with it. + + Src Addr a variable length field is the address assigned to the + outgoing interface. The length of the addr is obtained from the + source protocol length field in the mandatory part of the NHRP + header. The tuple <spi, src addr> uniquely identifies the key and + other parameters that are used in authentication. + + The length of the authentication data field is dependent on the hash + algorithm used. The data field contains the keyed hash calculated + over the entire NHRP payload. The authentication data field is zeroed + out before the hash is calculated. + +5.3.4.2 SPI and Security Parameters Negotiation + + SPI's can be negotiated either manually or using an Internet Key + Management protocol. Manual keying MUST be supported. The following + parameters are associated with the tuple <SPI, src>- lifetime, + Algorithm, Key. Lifetime indicates the duration in seconds for which + the key is valid. In case of manual keying, this duration can be + infinite. Also, in order to better support manual keying, there may + be multiple tuples active at the same time (Dst being the same). + + Algorithm specifies the hash algorithm agreed upon by the two + entities. HMAC-MD5-128 [16] is the default algorithm. Other + algorithms MAY be supported by defining new values. IANA will assign + the numbers to identify the algorithm being used as described in + Section 9. + + Any Internet standard key management protocol MAY so be used to + negotiate the SPI and parameters. + + + +Luciani, et. al. Standards Track [Page 42] + +RFC 2332 NBMA NHRP April 1998 + + +5.3.4.3 Message Processing + + At the time of adding the authentication extension header, src looks + up in a table to fetch the SPI and the security parameters based on + the outgoing interface address. If there are no entries in the table + and if there is support for key management, the src initiates the key + management protocol to fetch the necessary parameters. The src + constructs the Authentication Extension payload and calculates the + hash by zeroing authentication data field. The result replaces in the + zeroed authentication data field. The src address field in the + payload is the IP address assigned to the outgoing interface. + + If key management is not supported and authentication is mandatory, + the packet is dropped and this information is logged. + + On the receiving end, dst fetches the parameters based on the SPI and + the ip address in the authentication extension payload. The + authentication data field is extracted before zeroing out to + calculate the hash. It computes the hash on the entire payload and if + the hash does not match, then an "abnormal event" has occurred. + +5.3.4.4 Security Considerations + + It is important that the keys chosen are strong as the security of + the entire system depends on the keys being chosen properly and the + correct implementation of the algorithms. + + The security is performed on a hop by hop basis. The data received + can be trusted only so much as one trusts all the entities in the + path traversed. A chain of trust is established amongst NHRP entities + in the path of the NHRP Message . If the security in an NHRP entity + is compromised, then security in the entire NHRP domain is + compromised. + + Data integrity covers the entire NHRP payload. This guarantees that + the message was not modified and the source is authenticated as well. + If authentication extension is not used or if the security is + compromised, then NHRP entities are liable to both spoofing attacks, + active attacks and passive attacks. + + There is no mechanism to encrypt the messages. It is assumed that a + standard layer 3 confidentiality mechanism will be used to encrypt + and decrypt messages. It is recommended to use an Internet standard + key management protocol to negotiate the keys between the neighbors. + Transmitting the keys in clear text, if other methods of negotiation + is used, compromises the security completely. + + + + + +Luciani, et. al. Standards Track [Page 43] + +RFC 2332 NBMA NHRP April 1998 + + + Any NHS is susceptible to Denial of Service (DOS) attacks that cause + it to become overloaded, preventing legitimate packets from being + acted upon properly. A rogue host can send request and registration + packets to the first hop NHS. If the authentication option is not + used, the registration packet is forwarded along the routed path + requiring processing along each NHS. If the authentication option is + used, then only the first hop NHS is susceptible to DOS attacks + (i.e., unauthenticated packets will be dropped rather than forwarded + on). If security of any host is compromised (i.e., the keys it is + using to communicate with an NHS become known), then a rogue host can + send NHRP packets to the first hop NHS of the host whose keys were + compromised, which will then forward them along the routed path as in + the case of unauthenticated packets. However, this attack requires + that the rogue host to have the same first hop NHS as that of the + compromised host. Finally, it should be noted that denial of service + attacks that cause routers on the routed path to expend resources + processing NHRP packets are also susceptable to attacks that flood + packets at the same destination as contained in an NHRP packet's + Destination Protocol Address field. + +5.3.5 NHRP Vendor-Private Extension + + Compulsory = 0 + Type = 8 + Length = variable + + The NHRP Vendor-Private Extension is carried in NHRP packets to + convey vendor-private information or NHRP extensions between NHRP + speakers. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Vendor ID | Data.... | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Vendor ID + 802 Vendor ID as assigned by the IEEE [6] + + Data + The remaining octets after the Vendor ID in the payload are + vendor-dependent data. + + This extension may be added to any "request" or "reply" packet and it + is the only extension that may be included multiple times. If the + receiver does not handle this extension, or does not match the Vendor + + + + + +Luciani, et. al. Standards Track [Page 44] + +RFC 2332 NBMA NHRP April 1998 + + + ID in the extension then the extension may be completely ignored by + the receiver. If a Vendor Private Extension is included in a + "request" then it must be copied to the corresponding "reply". + +6. Protocol Operation + + In this section, we discuss certain operational considerations of + NHRP. + +6.1 Router-to-Router Operation + + In practice, the initiating and responding stations may be either + hosts or routers. However, there is a possibility under certain + conditions that a stable routing loop may occur if NHRP is used + between two routers. In particular, attempting to establish an NHRP + path across a boundary where information used in route selection is + lost may result in a routing loop. Such situations include the loss + of BGP path vector information, the interworking of multiple routing + protocols with dissimilar metrics (e.g, RIP and OSPF), etc. In such + circumstances, NHRP should not be used. This situation can be + avoided if there are no "back door" paths between the entry and + egress router outside of the NBMA subnetwork. Protocol mechanisms to + relax these restrictions are under investigation. + + In general it is preferable to use mechanisms, if they exist, in + routing protocols to resolve the egress point when the destination + lies outside of the NBMA subnetwork, since such mechanisms will be + more tightly coupled to the state of the routing system and will + probably be less likely to create loops. + +6.2 Cache Management Issues + + The management of NHRP caches in the source station, the NHS serving + the destination, and any intermediate NHSs is dependent on a number + of factors. + +6.2.1 Caching Requirements + + Source Stations + + Source stations MUST cache all received NHRP Resolution Replies + that they are actively using. They also must cache "incomplete" + entries, i.e., those for which a NHRP Resolution Request has been + sent but those for which an NHRP Resolution Reply has not been + received. This is necessary in order to preserve the Request ID + + + + + + +Luciani, et. al. Standards Track [Page 45] + +RFC 2332 NBMA NHRP April 1998 + + + for retries, and provides the state necessary to avoid triggering + NHRP Resolution Requests for every data packet sent to the + destination. + + Source stations MUST purge expired information from their caches. + Source stations MUST purge the appropriate cached information upon + receipt of an NHRP Purge Request packet. + + When a station has a co-resident NHC and NHS, the co-resident NHS + may reply to NHRP Resolution Requests from the co-resident NHC with + information which the station cached as a result of the co-resident + NHC making its own NHRP Resolution Requests as long as the co- + resident NHS follows the rules for Transit NHSs as seen below. + + Serving NHSs + + The NHS serving the destination (the one which responds + authoritatively to NHRP Resolution Requests) SHOULD cache protocol + address information from all NHRP Resolution Requests to which it + has responded if the information in the NHRP Resolution Reply has + the possibility of changing during its lifetime (so that an NHRP + Purge Request packet can be issued). The internetworking to NBMA + binding information provided by the source station in the NHRP + Resolution Request may also be cached if and only if the "S" bit is + set, the NHRP Resolution Request has included a CIE with the + Holding Time field set greater than zero (this is the valid Holding + Time for the source binding), and only for non-authoritative use + for a period not to exceed the Holding Time. + + Transit NHSs + + A Transit NHS (lying along the NHRP path between the source station + and the responding NHS) may cache source binding information + contained in NHRP Resolution Request packets that it forwards if + and only if the "S" bit is set, the NHRP Resolution Request has + included a CIE with the Holding Time field set greater than zero + (this is the valid Holding Time for the source binding), and only + for non-authoritative use for a period not to exceed the Holding + Time. + + A Transit NHS may cache destination information contained in NHRP + Resolution Reply CIE if only if the D bit is set and then only for + non-authoritative use for a period not to exceed the Holding Time + value contained in the CIE. A Transit NHS MUST NOT cache source + binding information contained in an NHRP Resolution Reply. + + + + + + +Luciani, et. al. Standards Track [Page 46] + +RFC 2332 NBMA NHRP April 1998 + + + Further, a transit NHS MUST discard any cached information when the + prescribed time has expired. It may return cached information in + response to non-authoritative NHRP Resolution Requests only. + +6.2.2 Dynamics of Cached Information + + NBMA-Connected Destinations + + NHRP's most basic function is that of simple NBMA address + resolution of stations directly attached to the NBMA subnetwork. + These mappings are typically very static, and appropriately chosen + holding times will minimize problems in the event that the NBMA + address of a station must be changed. Stale information will cause + a loss of connectivity, which may be used to trigger an + authoritative NHRP Resolution Request and bypass the old data. In + the worst case, connectivity will fail until the cache entry times + out. + + This applies equally to information marked in NHRP Resolution + Replies as being "stable" (via the "D" bit). + + Destinations Off of the NBMA Subnetwork + + If the source of an NHRP Resolution Request is a host and the + destination is not directly attached to the NBMA subnetwork, and + the route to that destination is not considered to be "stable," the + destination mapping may be very dynamic (except in the case of a + subnetwork where each destination is only singly homed to the NBMA + subnetwork). As such the cached information may very likely become + stale. The consequence of stale information in this case will be a + suboptimal path (unless the internetwork has partitioned or some + other routing failure has occurred). + +6.3 Use of the Prefix Length field of a CIE + + A certain amount of care needs to be taken when using the Prefix + Length field of a CIE, in particular with regard to the prefix length + advertised (and thus the size of the equivalence class specified by + it). Assuming that the routers on the NBMA subnetwork are exchanging + routing information, it should not be possible for an NHS to create a + black hole by advertising too large of a set of destinations, but + suboptimal routing (e.g., extra internetwork layer hops through the + NBMA) can result. To avoid this situation an NHS that wants to send + the Prefix Length MUST obey the following rule: + + The NHS examines the Network Layer Reachability Information (NLRI) + associated with the route that the NHS would use to forward towards + the destination (as specified by the Destination internetwork layer + + + +Luciani, et. al. Standards Track [Page 47] + +RFC 2332 NBMA NHRP April 1998 + + + address in the NHRP Resolution Request), and extracts from this + NLRI the shortest address prefix such that: (a) the Destination + internetwork layer address (from the NHRP Resolution Request) is + covered by the prefix, (b) the NHS does not have any routes with + NLRI which form a subset of what is covered by the prefix. The + prefix may then be used in the CIE. + + The Prefix Length field of the CIE should be used with restraint, in + order to avoid NHRP stations choosing suboptimal transit paths when + overlapping prefixes are available. This document specifies the use + of the prefix length only when all the destinations covered by the + prefix are "stable". That is, either: + + (a) All destinations covered by the prefix are on the NBMA network, + or + (b) All destinations covered by the prefix are directly attached to + the NHRP responding station. + + Use of the Prefix Length field of the CIE in other circumstances is + outside the scope of this document. + +6.4 Domino Effect + + One could easily imagine a situation where a router, acting as an + ingress station to the NBMA subnetwork, receives a data packet, such + that this packet triggers an NHRP Resolution Request. If the router + forwards this data packet without waiting for an NHRP transit path to + be established, then when the next router along the path receives the + packet, the next router may do exactly the same - originate its own + NHRP Resolution Request (as well as forward the packet). In fact + such a data packet may trigger NHRP Resolution Request generation at + every router along the path through an NBMA subnetwork. We refer to + this phenomena as the NHRP "domino" effect. + + The NHRP domino effect is clearly undesirable. At best it may result + in excessive NHRP traffic. At worst it may result in an excessive + number of virtual circuits being established unnecessarily. + Therefore, it is important to take certain measures to avoid or + suppress this behavior. NHRP implementations for NHSs MUST provide a + mechanism to address this problem. One possible strategy to address + this problem would be to configure a router in such a way that NHRP + Resolution Request generation by the router would be driven only by + the traffic the router receives over its non-NBMA interfaces + (interfaces that are not attached to an NBMA subnetwork). Traffic + received by the router over its NBMA-attached interfaces would not + trigger NHRP Resolution Requests. Such a router avoids the NHRP + domino effect through administrative means. + + + + +Luciani, et. al. Standards Track [Page 48] + +RFC 2332 NBMA NHRP April 1998 + + +7. NHRP over Legacy BMA Networks + + There would appear to be no significant impediment to running NHRP + over legacy broadcast subnetworks. There may be issues around + running NHRP across multiple subnetworks. Running NHRP on broadcast + media has some interesting possibilities; especially when setting up + a cut-through for inter-ELAN inter-LIS/LAG traffic when one or both + end stations are legacy attached. This use for NHRP requires further + research. + +8. Discussion + + The result of an NHRP Resolution Request depends on how routing is + configured among the NHSs of an NBMA subnetwork. If the destination + station is directly connected to the NBMA subnetwork and the routed + path to it lies entirely within the NBMA subnetwork, the NHRP + Resolution Replies always return the NBMA address of the destination + station itself rather than the NBMA address of some egress router. + On the other hand, if the routed path exits the NBMA subnetwork, NHRP + will be unable to resolve the NBMA address of the destination, but + rather will return the address of the egress router. For + destinations outside the NBMA subnetwork, egress routers and routers + in the other subnetworks should exchange routing information so that + the optimal egress router may be found. + + In addition to NHSs, an NBMA station could also be associated with + one or more regular routers that could act as "connectionless + servers" for the station. The station could then choose to resolve + the NBMA next hop or just send the packets to one of its + connectionless servers. The latter option may be desirable if + communication with the destination is short-lived and/or doesn't + require much network resources. The connectionless servers could, of + course, be physically integrated in the NHSs by augmenting them with + internetwork layer switching functionality. + +9. IANA Considerations + + IANA will take advice from the Area Director appointed designated + subject matter expert, in order to assign numbers from the various + number spaces described herein. In the event that the Area Director + appointed designated subject matter expert is unavailable, the + relevant IESG Area Director will appoint another expert. Any and all + requests for value assignment within a given number space will be + accepted when the usage of the value assignment documented. Possible + forms of documentantion include, but is not limited to, RFCs or the + product of another cooperative standards body (e.g., the MPOA and + LANE subworking group of the ATM Forum). + + + + +Luciani, et. al. Standards Track [Page 49] + +RFC 2332 NBMA NHRP April 1998 + + +References + + [1] Heinanen, J., and R. Govindan, "NBMA Address Resolution Protocol + (NARP)", RFC 1735, December 1994. + + [2] Plummer, D., "Address Resolution Protocol", STD 37, RFC 826, + November 1982. + + [3] Laubach, M., and J. Halpern, "Classical IP and ARP over ATM", RFC + 2225, April 1998. + + [4] Piscitello,, D., and J. Lawrence, "Transmission of IP datagrams + over the SMDS service", RFC 1209, March 1991. + + [5] Protocol Identification in the Network Layer, ISO/IEC TR + 9577:1990. + + [6] Reynolds, J., and J. Postel, "Assigned Numbers", STD 2, RFC 1700, + October 1994. + + [7] Heinanen, J., "Multiprotocol Encapsulation over ATM Adaptation + Layer 5", RFC 1483, July 1993. + + [8] Malis, A., Robinson, D., and R. Ullmann, "Multiprotocol + Interconnect on X.25 and ISDN in the Packet Mode", RFC 1356, August + 1992. + + [9] Bradley, T., Brown, C., and A. Malis, "Multiprotocol Interconnect + over Frame Relay", RFC 1490, July 1993. + + [10] Rekhter, Y., and D. Kandlur, ""Local/Remote" Forwarding Decision + in Switched Data Link Subnetworks", RFC 1937, May 1996. + + [11] Armitage, G., "Support for Multicast over UNI 3.0/3.1 based ATM + Networks", RFC 2022, November 1996. + + [12] Luciani, J., Armitage, G., and J. Halpern, "Server Cache + Synchronization Protocol (SCSP) - NBMA", RFC 2334, April 1998. + + [13] Rekhter, Y., "NHRP for Destinations off the NBMA Subnetwork", + Work In Progress. + + [14] Luciani, J., et. al., "Classical IP and ARP over ATM to NHRP + Transition", Work In Progress. + + [15] Bradner, S., "Key words for use in RFCs to Indicate Requirement + Levels", BCP 14, RFC 2119, March 1997. + + + + +Luciani, et. al. Standards Track [Page 50] + +RFC 2332 NBMA NHRP April 1998 + + + [16] Krawczyk, H., Bellare, M., and R. Canetti, "HMAC: Keyed Hashing + for Message Authentication", RFC 2104, February 1997. + +Acknowledgments + + We would like to thank (in no particular order) Thomas Narten of IBM + for his comments in the role of Internet AD, Juha Heinenan of Telecom + Finland and Ramesh Govidan of ISI for their work on NBMA ARP and the + original NHRP draft, which served as the basis for this work. + Russell Gardo of IBM, John Burnett of Adaptive, Dennis Ferguson of + ANS, Andre Fredette of Bay Networks, Joel Halpern of Newbridge, Paul + Francis of NTT, Tony Li, Bryan Gleeson, and Yakov Rekhter of cisco, + and Grenville Armitage of Bellcore should also be acknowledged for + comments and suggestions that improved this work substantially. We + would also like to thank the members of the ION working group of the + IETF, whose review and discussion of this document have been + invaluable. + +Authors' Addresses + + James V. Luciani Dave Katz + Bay Networks cisco Systems + 3 Federal Street 170 W. Tasman Dr. + Mail Stop: BL3-03 San Jose, CA 95134 USA + Billerica, MA 01821 Phone: +1 408 526 8284 + Phone: +1 978 916 4734 EMail: dkatz@cisco.com + EMail: luciani@baynetworks.com + + David Piscitello Bruce Cole + Core Competence Juniper Networks + 1620 Tuckerstown Road 3260 Jay St. + Dresher, PA 19025 USA Santa Clara, CA 95054 + Phone: +1 215 830 0692 Phone: +1 408 327 1900 + EMail: dave@corecom.com EMail: bcole@jnx.com + + Naganand Doraswamy + Bay Networks, Inc. + 3 Federal Street + Mail Stop: Bl3-03 + Billerica, MA 01801 + Phone: +1 978 916 1323 + EMail: naganand@baynetworks.com + + + + + + + + + +Luciani, et. al. Standards Track [Page 51] + +RFC 2332 NBMA NHRP April 1998 + + +Full Copyright Statement + + Copyright (C) The Internet Society (1998). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + + + + + + + + + + + + + + + + + + + + + + + + +Luciani, et. al. Standards Track [Page 52] + diff --git a/etc/Makefile b/etc/Makefile new file mode 100644 index 0000000..d9fdd8b --- /dev/null +++ b/etc/Makefile @@ -0,0 +1,5 @@ +ETCFILES := opennhrp.conf opennhrp-script racoon-ph1down.sh racoon-ph1dead.sh + +install: + $(INSTALLDIR) $(DESTDIR)$(CONFDIR) + $(INSTALL) $(addprefix $(src)/,$(ETCFILES)) $(DESTDIR)$(CONFDIR) diff --git a/etc/opennhrp-script b/etc/opennhrp-script new file mode 100755 index 0000000..8d5e2d3 --- /dev/null +++ b/etc/opennhrp-script @@ -0,0 +1,38 @@ +#!/bin/sh + +case $1 in +interface-up) + ip route flush proto 42 dev $NHRP_INTERFACE + ip neigh flush dev $NHRP_INTERFACE + ;; +peer-register) + ;; +peer-up) + if [ -n "$NHRP_DESTMTU" ]; then + ARGS=`ip route get $NHRP_DESTNBMA from $NHRP_SRCNBMA | head -1` + ip route add $ARGS proto 42 mtu $NHRP_DESTMTU + fi + echo "Create link from $NHRP_SRCADDR ($NHRP_SRCNBMA) to $NHRP_DESTADDR ($NHRP_DESTNBMA)" + racoonctl establish-sa -w isakmp inet $NHRP_SRCNBMA $NHRP_DESTNBMA || exit 1 + racoonctl establish-sa -w esp inet $NHRP_SRCNBMA $NHRP_DESTNBMA gre || exit 1 + ;; +peer-down) + echo "Delete link from $NHRP_SRCADDR ($NHRP_SRCNBMA) to $NHRP_DESTADDR ($NHRP_DESTNBMA)" + if [ "$NHRP_PEER_DOWN_REASON" != "lower-down" ]; then + racoonctl delete-sa isakmp inet $NHRP_SRCNBMA $NHRP_DESTNBMA + fi + ip route del $NHRP_DESTNBMA src $NHRP_SRCNBMA proto 42 + ;; +route-up) + echo "Route $NHRP_DESTADDR/$NHRP_DESTPREFIX is up" + ip route replace $NHRP_DESTADDR/$NHRP_DESTPREFIX proto 42 via $NHRP_NEXTHOP dev $NHRP_INTERFACE + ip route flush cache + ;; +route-down) + echo "Route $NHRP_DESTADDR/$NHRP_DESTPREFIX is down" + ip route del $NHRP_DESTADDR/$NHRP_DESTPREFIX proto 42 + ip route flush cache + ;; +esac + +exit 0 diff --git a/etc/opennhrp-script.cert b/etc/opennhrp-script.cert new file mode 100755 index 0000000..d013511 --- /dev/null +++ b/etc/opennhrp-script.cert @@ -0,0 +1,71 @@ +#!/bin/sh +# +# This version of the script check the X509 certificate used to authenticate +# the IPsec connection. It parses a special format subject field, and verifies +# the claimed GRE is bound to that certificate, before allowing NHRP +# registration or direct tunnel to succeed. +# +# It also reconfigure BGP filters according to certificate contents. This is +# only useful for hub nodes. +# +# Example of certificate: +# subjectAltName: DirName:/OU=GRE=192.168.1.1/NET=10.1.0.0/16 + +case $1 in +interface-up) + ip route flush proto 42 dev $NHRP_INTERFACE + ip neigh flush dev $NHRP_INTERFACE + ;; +peer-register) + ( + flock -x 200 + + CERT=`racoonctl get-cert inet $NHRP_SRCNBMA $NHRP_DESTNBMA | openssl x509 -inform der -text -noout | egrep -o "/OU=[^/]*(/[0-9]+)?" | cut -b 5-` + if [ -z "`echo "$CERT" | grep "^GRE=$NHRP_DESTADDR"`" ]; then + echo "ERROR: IP $NHRP_DESTADDR at $NHRP_DESTNBMA NOT verified" + exit 1 + fi + + AS=`echo "$CERT" | grep "^AS=" | cut -b 4-` + vtysh -d bgpd -c "configure terminal" \ + -c "router bgp $MY_AS" \ + -c "neighbor $NHRP_DESTADDR remote-as $AS" \ + -c "neighbor $NHRP_DESTADDR peer-group leaf" \ + -c "neighbor $NHRP_DESTADDR prefix-list net-$AS-in in" + + SEQ=5 + (echo "$CERT" | grep "^NET=" | cut -b 5-) | while read NET; do + vtysh -d bgpd -c "configure terminal" \ + -c "ip prefix-list net-$AS-in seq $SEQ permit $NET" + SEQ=$(($SEQ+5) + done + ) 200>/var/lock/opennhrp-script.lock + ;; +peer-up) + echo "Create link from $NHRP_SRCADDR ($NHRP_SRCNBMA) to $NHRP_DESTADDR ($NHRP_DESTNBMA)" + racoonctl establish-sa -w isakmp inet $NHRP_SRCNBMA $NHRP_DESTNBMA || exit 1 + racoonctl establish-sa -w esp inet $NHRP_SRCNBMA $NHRP_DESTNBMA gre || exit 1 + + CERT=`racoonctl get-cert inet $NHRP_SRCNBMA $NHRP_DESTNBMA | openssl x509 -inform der -text -noout | egrep -o "/OU=[^/]*(/[0-9]+)?" | cut -b 5-` + if [ -z "`echo "$CERT" | grep "^GRE=$NHRP_DESTADDR"`" ]; then + echo "ERROR: IP $NHRP_DESTADDR at $NHRP_DESTNBMA NOT verified" + exit 1 + fi + ;; +peer-down) + echo "Delete link from $NHRP_SRCADDR ($NHRP_SRCNBMA) to $NHRP_DESTADDR ($NHRP_DESTNBMA)" + racoonctl delete-sa isakmp inet $NHRP_SRCNBMA $NHRP_DESTNBMA + ;; +route-up) + echo "Route $NHRP_DESTADDR/$NHRP_DESTPREFIX is up" + ip route replace $NHRP_DESTADDR/$NHRP_DESTPREFIX proto 42 via $NHRP_NEXTHOP dev $NHRP_INTERFACE + ip route flush cache + ;; +route-down) + echo "Route $NHRP_DESTADDR/$NHRP_DESTPREFIX is down" + ip route del $NHRP_DESTADDR/$NHRP_DESTPREFIX proto 42 + ip route flush cache + ;; +esac + +exit 0 diff --git a/etc/opennhrp.conf b/etc/opennhrp.conf new file mode 100644 index 0000000..6451cb0 --- /dev/null +++ b/etc/opennhrp.conf @@ -0,0 +1,9 @@ +interface gre1 + map 10.255.255.1/24 192.168.200.1 register + cisco-authentication secret + shortcut + redirect + non-caching + +interface lo + shortcut-destination diff --git a/etc/racoon-ph1dead.sh b/etc/racoon-ph1dead.sh new file mode 100755 index 0000000..942e4c2 --- /dev/null +++ b/etc/racoon-ph1dead.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +opennhrpctl cache lowerdown nbma $REMOTE_ADDR local-nbma $LOCAL_ADDR diff --git a/etc/racoon-ph1down.sh b/etc/racoon-ph1down.sh new file mode 100755 index 0000000..c98d985 --- /dev/null +++ b/etc/racoon-ph1down.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +# Purge opennhrp entries only if this was the last ISAKMP phase1 +if [ -z "`racoonctl -ll show-sa isakmp | grep "$LOCAL_ADDR\.[0-9]* * $REMOTE_ADDR\.[0-9]* "`" ]; then + opennhrpctl cache purge nbma $REMOTE_ADDR local-nbma $LOCAL_ADDR +fi diff --git a/libev/LICENSE b/libev/LICENSE new file mode 100644 index 0000000..df62c4f --- /dev/null +++ b/libev/LICENSE @@ -0,0 +1,36 @@ +All files in libev are Copyright (C)2007,2008 Marc Alexander Lehmann. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Alternatively, the contents of this package may be used under the terms +of the GNU General Public License ("GPL") version 2 or any later version, +in which case the provisions of the GPL are applicable instead of the +above. If you wish to allow the use of your version of this package only +under the terms of the GPL and not to allow others to use your version of +this file under the BSD license, indicate your decision by deleting the +provisions above and replace them with the notice and other provisions +required by the GPL in this and the other files of this package. If you do +not delete the provisions above, a recipient may use your version of this +file under either the BSD or the GPL. diff --git a/libev/README b/libev/README new file mode 100644 index 0000000..ca403c6 --- /dev/null +++ b/libev/README @@ -0,0 +1,58 @@ +libev is a high-performance event loop/event model with lots of features. +(see benchmark at http://libev.schmorp.de/bench.html) + + +ABOUT + + Homepage: http://software.schmorp.de/pkg/libev + Mailinglist: libev@lists.schmorp.de + http://lists.schmorp.de/cgi-bin/mailman/listinfo/libev + Library Documentation: http://pod.tst.eu/http://cvs.schmorp.de/libev/ev.pod + + Libev is modelled (very losely) after libevent and the Event perl + module, but is faster, scales better and is more correct, and also more + featureful. And also smaller. Yay. + + Some of the specialties of libev not commonly found elsewhere are: + + - extensive and detailed, readable documentation (not doxygen garbage). + - fully supports fork, can detect fork in various ways and automatically + re-arms kernel mechanisms that do not support fork. + - highly optimised select, poll, epoll, kqueue and event ports backends. + - filesystem object (path) watching (with optional linux inotify support). + - wallclock-based times (using absolute time, cron-like). + - relative timers/timeouts (handle time jumps). + - fast intra-thread communication between multiple + event loops (with optional fast linux eventfd backend). + - extremely easy to embed. + - very small codebase, no bloated library. + - fully extensible by being able to plug into the event loop, + integrate other event loops, integrate other event loop users. + - very little memory use (small watchers, small event loop data). + - optional C++ interface allowing method and function callbacks + at no extra memory or runtime overhead. + - optional Perl interface with similar characteristics (capable + of running Glib/Gtk2 on libev, interfaces with Net::SNMP and + libadns). + - support for other languages (multiple C++ interfaces, D, Ruby, + Python) available from third-parties. + + Examples of programs that embed libev: the EV perl module, + rxvt-unicode, gvpe (GNU Virtual Private Ethernet), the Deliantra MMORPG + server (http://www.deliantra.net/), Rubinius (a next-generation Ruby + VM), the Ebb web server, the Rev event toolkit. + + +CONTRIBUTORS + + libev was written and designed by Marc Lehmann and Emanuele Giaquinta. + + The following people sent in patches or made other noteworthy + contributions to the design (for minor patches, see the Changes + file. If I forgot to include you, please shout at me, it was an + accident): + + W.C.A. Wijngaards + Christopher Layne + Chris Brody + diff --git a/libev/VERSION b/libev/VERSION new file mode 100644 index 0000000..666ac08 --- /dev/null +++ b/libev/VERSION @@ -0,0 +1 @@ +libev 3.9 diff --git a/libev/ev.c b/libev/ev.c new file mode 100644 index 0000000..ccd202b --- /dev/null +++ b/libev/ev.c @@ -0,0 +1,3694 @@ +/* + * libev event processing core, watcher management + * + * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libev@schmorp.de> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* this big block deduces configuration from config.h */ +#ifndef EV_STANDALONE +# ifdef EV_CONFIG_H +# include EV_CONFIG_H +# else +# include "config.h" +# endif + +# if HAVE_CLOCK_SYSCALL +# ifndef EV_USE_CLOCK_SYSCALL +# define EV_USE_CLOCK_SYSCALL 1 +# ifndef EV_USE_REALTIME +# define EV_USE_REALTIME 0 +# endif +# ifndef EV_USE_MONOTONIC +# define EV_USE_MONOTONIC 1 +# endif +# endif +# elif !defined(EV_USE_CLOCK_SYSCALL) +# define EV_USE_CLOCK_SYSCALL 0 +# endif + +# if HAVE_CLOCK_GETTIME +# ifndef EV_USE_MONOTONIC +# define EV_USE_MONOTONIC 1 +# endif +# ifndef EV_USE_REALTIME +# define EV_USE_REALTIME 0 +# endif +# else +# ifndef EV_USE_MONOTONIC +# define EV_USE_MONOTONIC 0 +# endif +# ifndef EV_USE_REALTIME +# define EV_USE_REALTIME 0 +# endif +# endif + +# ifndef EV_USE_NANOSLEEP +# if HAVE_NANOSLEEP +# define EV_USE_NANOSLEEP 1 +# else +# define EV_USE_NANOSLEEP 0 +# endif +# endif + +# ifndef EV_USE_SELECT +# if HAVE_SELECT && HAVE_SYS_SELECT_H +# define EV_USE_SELECT 1 +# else +# define EV_USE_SELECT 0 +# endif +# endif + +# ifndef EV_USE_POLL +# if HAVE_POLL && HAVE_POLL_H +# define EV_USE_POLL 1 +# else +# define EV_USE_POLL 0 +# endif +# endif + +# ifndef EV_USE_EPOLL +# if HAVE_EPOLL_CTL && HAVE_SYS_EPOLL_H +# define EV_USE_EPOLL 1 +# else +# define EV_USE_EPOLL 0 +# endif +# endif + +# ifndef EV_USE_KQUEUE +# if HAVE_KQUEUE && HAVE_SYS_EVENT_H && HAVE_SYS_QUEUE_H +# define EV_USE_KQUEUE 1 +# else +# define EV_USE_KQUEUE 0 +# endif +# endif + +# ifndef EV_USE_PORT +# if HAVE_PORT_H && HAVE_PORT_CREATE +# define EV_USE_PORT 1 +# else +# define EV_USE_PORT 0 +# endif +# endif + +# ifndef EV_USE_INOTIFY +# if HAVE_INOTIFY_INIT && HAVE_SYS_INOTIFY_H +# define EV_USE_INOTIFY 1 +# else +# define EV_USE_INOTIFY 0 +# endif +# endif + +# ifndef EV_USE_SIGNALFD +# if HAVE_SIGNALFD && HAVE_SYS_SIGNALFD_H +# define EV_USE_SIGNALFD 1 +# else +# define EV_USE_SIGNALFD 0 +# endif +# endif + +# ifndef EV_USE_EVENTFD +# if HAVE_EVENTFD +# define EV_USE_EVENTFD 1 +# else +# define EV_USE_EVENTFD 0 +# endif +# endif + +#endif + +#include <math.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <stddef.h> + +#include <stdio.h> + +#include <assert.h> +#include <errno.h> +#include <sys/types.h> +#include <time.h> + +#include <signal.h> + +#ifdef EV_H +# include EV_H +#else +# include "ev.h" +#endif + +#ifndef _WIN32 +# include <sys/time.h> +# include <sys/wait.h> +# include <unistd.h> +#else +# include <io.h> +# define WIN32_LEAN_AND_MEAN +# include <windows.h> +# ifndef EV_SELECT_IS_WINSOCKET +# define EV_SELECT_IS_WINSOCKET 1 +# endif +#endif + +/* this block tries to deduce configuration from header-defined symbols and defaults */ + +/* try to deduce the maximum number of signals on this platform */ +#if defined (EV_NSIG) +/* use what's provided */ +#elif defined (NSIG) +# define EV_NSIG (NSIG) +#elif defined(_NSIG) +# define EV_NSIG (_NSIG) +#elif defined (SIGMAX) +# define EV_NSIG (SIGMAX+1) +#elif defined (SIG_MAX) +# define EV_NSIG (SIG_MAX+1) +#elif defined (_SIG_MAX) +# define EV_NSIG (_SIG_MAX+1) +#elif defined (MAXSIG) +# define EV_NSIG (MAXSIG+1) +#elif defined (MAX_SIG) +# define EV_NSIG (MAX_SIG+1) +#elif defined (SIGARRAYSIZE) +# define EV_NSIG SIGARRAYSIZE /* Assume ary[SIGARRAYSIZE] */ +#elif defined (_sys_nsig) +# define EV_NSIG (_sys_nsig) /* Solaris 2.5 */ +#else +# error "unable to find value for NSIG, please report" +/* to make it compile regardless, just remove the above line */ +# define EV_NSIG 65 +#endif + +#ifndef EV_USE_CLOCK_SYSCALL +# if __linux && __GLIBC__ >= 2 +# define EV_USE_CLOCK_SYSCALL 1 +# else +# define EV_USE_CLOCK_SYSCALL 0 +# endif +#endif + +#ifndef EV_USE_MONOTONIC +# if defined (_POSIX_MONOTONIC_CLOCK) && _POSIX_MONOTONIC_CLOCK >= 0 +# define EV_USE_MONOTONIC 1 +# else +# define EV_USE_MONOTONIC 0 +# endif +#endif + +#ifndef EV_USE_REALTIME +# define EV_USE_REALTIME !EV_USE_CLOCK_SYSCALL +#endif + +#ifndef EV_USE_NANOSLEEP +# if _POSIX_C_SOURCE >= 199309L +# define EV_USE_NANOSLEEP 1 +# else +# define EV_USE_NANOSLEEP 0 +# endif +#endif + +#ifndef EV_USE_SELECT +# define EV_USE_SELECT 1 +#endif + +#ifndef EV_USE_POLL +# ifdef _WIN32 +# define EV_USE_POLL 0 +# else +# define EV_USE_POLL 1 +# endif +#endif + +#ifndef EV_USE_EPOLL +# if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 4)) +# define EV_USE_EPOLL 1 +# else +# define EV_USE_EPOLL 0 +# endif +#endif + +#ifndef EV_USE_KQUEUE +# define EV_USE_KQUEUE 0 +#endif + +#ifndef EV_USE_PORT +# define EV_USE_PORT 0 +#endif + +#ifndef EV_USE_INOTIFY +# if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 4)) +# define EV_USE_INOTIFY 1 +# else +# define EV_USE_INOTIFY 0 +# endif +#endif + +#ifndef EV_PID_HASHSIZE +# if EV_MINIMAL +# define EV_PID_HASHSIZE 1 +# else +# define EV_PID_HASHSIZE 16 +# endif +#endif + +#ifndef EV_INOTIFY_HASHSIZE +# if EV_MINIMAL +# define EV_INOTIFY_HASHSIZE 1 +# else +# define EV_INOTIFY_HASHSIZE 16 +# endif +#endif + +#ifndef EV_USE_EVENTFD +# if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 7)) +# define EV_USE_EVENTFD 1 +# else +# define EV_USE_EVENTFD 0 +# endif +#endif + +#ifndef EV_USE_SIGNALFD +# if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 7)) +# define EV_USE_SIGNALFD 1 +# else +# define EV_USE_SIGNALFD 0 +# endif +#endif + +#if 0 /* debugging */ +# define EV_VERIFY 3 +# define EV_USE_4HEAP 1 +# define EV_HEAP_CACHE_AT 1 +#endif + +#ifndef EV_VERIFY +# define EV_VERIFY !EV_MINIMAL +#endif + +#ifndef EV_USE_4HEAP +# define EV_USE_4HEAP !EV_MINIMAL +#endif + +#ifndef EV_HEAP_CACHE_AT +# define EV_HEAP_CACHE_AT !EV_MINIMAL +#endif + +/* on linux, we can use a (slow) syscall to avoid a dependency on pthread, */ +/* which makes programs even slower. might work on other unices, too. */ +#if EV_USE_CLOCK_SYSCALL +# include <syscall.h> +# ifdef SYS_clock_gettime +# define clock_gettime(id, ts) syscall (SYS_clock_gettime, (id), (ts)) +# undef EV_USE_MONOTONIC +# define EV_USE_MONOTONIC 1 +# else +# undef EV_USE_CLOCK_SYSCALL +# define EV_USE_CLOCK_SYSCALL 0 +# endif +#endif + +/* this block fixes any misconfiguration where we know we run into trouble otherwise */ + +#ifndef CLOCK_MONOTONIC +# undef EV_USE_MONOTONIC +# define EV_USE_MONOTONIC 0 +#endif + +#ifndef CLOCK_REALTIME +# undef EV_USE_REALTIME +# define EV_USE_REALTIME 0 +#endif + +#if !EV_STAT_ENABLE +# undef EV_USE_INOTIFY +# define EV_USE_INOTIFY 0 +#endif + +#if !EV_USE_NANOSLEEP +# ifndef _WIN32 +# include <sys/select.h> +# endif +#endif + +#if EV_USE_INOTIFY +# include <sys/utsname.h> +# include <sys/statfs.h> +# include <sys/inotify.h> +/* some very old inotify.h headers don't have IN_DONT_FOLLOW */ +# ifndef IN_DONT_FOLLOW +# undef EV_USE_INOTIFY +# define EV_USE_INOTIFY 0 +# endif +#endif + +#if EV_SELECT_IS_WINSOCKET +# include <winsock.h> +#endif + +#if EV_USE_EVENTFD +/* our minimum requirement is glibc 2.7 which has the stub, but not the header */ +# include <stdint.h> +# ifndef EFD_NONBLOCK +# define EFD_NONBLOCK O_NONBLOCK +# endif +# ifndef EFD_CLOEXEC +# ifdef O_CLOEXEC +# define EFD_CLOEXEC O_CLOEXEC +# else +# define EFD_CLOEXEC 02000000 +# endif +# endif +# ifdef __cplusplus +extern "C" { +# endif +int eventfd (unsigned int initval, int flags); +# ifdef __cplusplus +} +# endif +#endif + +#if EV_USE_SIGNALFD +/* our minimum requirement is glibc 2.7 which has the stub, but not the header */ +# include <stdint.h> +# ifndef SFD_NONBLOCK +# define SFD_NONBLOCK O_NONBLOCK +# endif +# ifndef SFD_CLOEXEC +# ifdef O_CLOEXEC +# define SFD_CLOEXEC O_CLOEXEC +# else +# define SFD_CLOEXEC 02000000 +# endif +# endif +# ifdef __cplusplus +extern "C" { +# endif +int signalfd (int fd, const sigset_t *mask, int flags); + +struct signalfd_siginfo +{ + uint32_t ssi_signo; + char pad[128 - sizeof (uint32_t)]; +}; +# ifdef __cplusplus +} +# endif +#endif + + +/**/ + +#if EV_VERIFY >= 3 +# define EV_FREQUENT_CHECK ev_loop_verify (EV_A) +#else +# define EV_FREQUENT_CHECK do { } while (0) +#endif + +/* + * This is used to avoid floating point rounding problems. + * It is added to ev_rt_now when scheduling periodics + * to ensure progress, time-wise, even when rounding + * errors are against us. + * This value is good at least till the year 4000. + * Better solutions welcome. + */ +#define TIME_EPSILON 0.0001220703125 /* 1/8192 */ + +#define MIN_TIMEJUMP 1. /* minimum timejump that gets detected (if monotonic clock available) */ +#define MAX_BLOCKTIME 59.743 /* never wait longer than this time (to detect time jumps) */ + +#if __GNUC__ >= 4 +# define expect(expr,value) __builtin_expect ((expr),(value)) +# define noinline __attribute__ ((noinline)) +#else +# define expect(expr,value) (expr) +# define noinline +# if __STDC_VERSION__ < 199901L && __GNUC__ < 2 +# define inline +# endif +#endif + +#define expect_false(expr) expect ((expr) != 0, 0) +#define expect_true(expr) expect ((expr) != 0, 1) +#define inline_size static inline + +#if EV_MINIMAL +# define inline_speed static noinline +#else +# define inline_speed static inline +#endif + +#define NUMPRI (EV_MAXPRI - EV_MINPRI + 1) + +#if EV_MINPRI == EV_MAXPRI +# define ABSPRI(w) (((W)w), 0) +#else +# define ABSPRI(w) (((W)w)->priority - EV_MINPRI) +#endif + +#define EMPTY /* required for microsofts broken pseudo-c compiler */ +#define EMPTY2(a,b) /* used to suppress some warnings */ + +typedef ev_watcher *W; +typedef ev_watcher_list *WL; +typedef ev_watcher_time *WT; + +#define ev_active(w) ((W)(w))->active +#define ev_at(w) ((WT)(w))->at + +#if EV_USE_REALTIME +/* sig_atomic_t is used to avoid per-thread variables or locking but still */ +/* giving it a reasonably high chance of working on typical architetcures */ +static EV_ATOMIC_T have_realtime; /* did clock_gettime (CLOCK_REALTIME) work? */ +#endif + +#if EV_USE_MONOTONIC +static EV_ATOMIC_T have_monotonic; /* did clock_gettime (CLOCK_MONOTONIC) work? */ +#endif + +#ifndef EV_FD_TO_WIN32_HANDLE +# define EV_FD_TO_WIN32_HANDLE(fd) _get_osfhandle (fd) +#endif +#ifndef EV_WIN32_HANDLE_TO_FD +# define EV_WIN32_HANDLE_TO_FD(handle) _open_osfhandle (fd, 0) +#endif +#ifndef EV_WIN32_CLOSE_FD +# define EV_WIN32_CLOSE_FD(fd) close (fd) +#endif + +#ifdef _WIN32 +# include "ev_win32.c" +#endif + +/*****************************************************************************/ + +static void (*syserr_cb)(const char *msg); + +void +ev_set_syserr_cb (void (*cb)(const char *msg)) +{ + syserr_cb = cb; +} + +static void noinline +ev_syserr (const char *msg) +{ + if (!msg) + msg = "(libev) system error"; + + if (syserr_cb) + syserr_cb (msg); + else + { + perror (msg); + abort (); + } +} + +static void * +ev_realloc_emul (void *ptr, long size) +{ + /* some systems, notably openbsd and darwin, fail to properly + * implement realloc (x, 0) (as required by both ansi c-98 and + * the single unix specification, so work around them here. + */ + + if (size) + return realloc (ptr, size); + + free (ptr); + return 0; +} + +static void *(*alloc)(void *ptr, long size) = ev_realloc_emul; + +void +ev_set_allocator (void *(*cb)(void *ptr, long size)) +{ + alloc = cb; +} + +inline_speed void * +ev_realloc (void *ptr, long size) +{ + ptr = alloc (ptr, size); + + if (!ptr && size) + { + fprintf (stderr, "libev: cannot allocate %ld bytes, aborting.", size); + abort (); + } + + return ptr; +} + +#define ev_malloc(size) ev_realloc (0, (size)) +#define ev_free(ptr) ev_realloc ((ptr), 0) + +/*****************************************************************************/ + +/* set in reify when reification needed */ +#define EV_ANFD_REIFY 1 + +/* file descriptor info structure */ +typedef struct +{ + WL head; + unsigned char events; /* the events watched for */ + unsigned char reify; /* flag set when this ANFD needs reification (EV_ANFD_REIFY, EV__IOFDSET) */ + unsigned char emask; /* the epoll backend stores the actual kernel mask in here */ + unsigned char unused; +#if EV_USE_EPOLL + unsigned int egen; /* generation counter to counter epoll bugs */ +#endif +#if EV_SELECT_IS_WINSOCKET + SOCKET handle; +#endif +} ANFD; + +/* stores the pending event set for a given watcher */ +typedef struct +{ + W w; + int events; /* the pending event set for the given watcher */ +} ANPENDING; + +#if EV_USE_INOTIFY +/* hash table entry per inotify-id */ +typedef struct +{ + WL head; +} ANFS; +#endif + +/* Heap Entry */ +#if EV_HEAP_CACHE_AT + /* a heap element */ + typedef struct { + ev_tstamp at; + WT w; + } ANHE; + + #define ANHE_w(he) (he).w /* access watcher, read-write */ + #define ANHE_at(he) (he).at /* access cached at, read-only */ + #define ANHE_at_cache(he) (he).at = (he).w->at /* update at from watcher */ +#else + /* a heap element */ + typedef WT ANHE; + + #define ANHE_w(he) (he) + #define ANHE_at(he) (he)->at + #define ANHE_at_cache(he) +#endif + +#if EV_MULTIPLICITY + + struct ev_loop + { + ev_tstamp ev_rt_now; + #define ev_rt_now ((loop)->ev_rt_now) + #define VAR(name,decl) decl; + #include "ev_vars.h" + #undef VAR + }; + #include "ev_wrap.h" + + static struct ev_loop default_loop_struct; + struct ev_loop *ev_default_loop_ptr; + +#else + + ev_tstamp ev_rt_now; + #define VAR(name,decl) static decl; + #include "ev_vars.h" + #undef VAR + + static int ev_default_loop_ptr; + +#endif + +#if EV_MINIMAL < 2 +# define EV_RELEASE_CB if (expect_false (release_cb)) release_cb (EV_A) +# define EV_ACQUIRE_CB if (expect_false (acquire_cb)) acquire_cb (EV_A) +# define EV_INVOKE_PENDING invoke_cb (EV_A) +#else +# define EV_RELEASE_CB (void)0 +# define EV_ACQUIRE_CB (void)0 +# define EV_INVOKE_PENDING ev_invoke_pending (EV_A) +#endif + +#define EVUNLOOP_RECURSE 0x80 + +/*****************************************************************************/ + +#ifndef EV_HAVE_EV_TIME +ev_tstamp +ev_time (void) +{ +#if EV_USE_REALTIME + if (expect_true (have_realtime)) + { + struct timespec ts; + clock_gettime (CLOCK_REALTIME, &ts); + return ts.tv_sec + ts.tv_nsec * 1e-9; + } +#endif + + struct timeval tv; + gettimeofday (&tv, 0); + return tv.tv_sec + tv.tv_usec * 1e-6; +} +#endif + +inline_size ev_tstamp +get_clock (void) +{ +#if EV_USE_MONOTONIC + if (expect_true (have_monotonic)) + { + struct timespec ts; + clock_gettime (CLOCK_MONOTONIC, &ts); + return ts.tv_sec + ts.tv_nsec * 1e-9; + } +#endif + + return ev_time (); +} + +#if EV_MULTIPLICITY +ev_tstamp +ev_now (EV_P) +{ + return ev_rt_now; +} +#endif + +void +ev_sleep (ev_tstamp delay) +{ + if (delay > 0.) + { +#if EV_USE_NANOSLEEP + struct timespec ts; + + ts.tv_sec = (time_t)delay; + ts.tv_nsec = (long)((delay - (ev_tstamp)(ts.tv_sec)) * 1e9); + + nanosleep (&ts, 0); +#elif defined(_WIN32) + Sleep ((unsigned long)(delay * 1e3)); +#else + struct timeval tv; + + tv.tv_sec = (time_t)delay; + tv.tv_usec = (long)((delay - (ev_tstamp)(tv.tv_sec)) * 1e6); + + /* here we rely on sys/time.h + sys/types.h + unistd.h providing select */ + /* something not guaranteed by newer posix versions, but guaranteed */ + /* by older ones */ + select (0, 0, 0, 0, &tv); +#endif + } +} + +/*****************************************************************************/ + +#define MALLOC_ROUND 4096 /* prefer to allocate in chunks of this size, must be 2**n and >> 4 longs */ + +/* find a suitable new size for the given array, */ +/* hopefully by rounding to a ncie-to-malloc size */ +inline_size int +array_nextsize (int elem, int cur, int cnt) +{ + int ncur = cur + 1; + + do + ncur <<= 1; + while (cnt > ncur); + + /* if size is large, round to MALLOC_ROUND - 4 * longs to accomodate malloc overhead */ + if (elem * ncur > MALLOC_ROUND - sizeof (void *) * 4) + { + ncur *= elem; + ncur = (ncur + elem + (MALLOC_ROUND - 1) + sizeof (void *) * 4) & ~(MALLOC_ROUND - 1); + ncur = ncur - sizeof (void *) * 4; + ncur /= elem; + } + + return ncur; +} + +static noinline void * +array_realloc (int elem, void *base, int *cur, int cnt) +{ + *cur = array_nextsize (elem, *cur, cnt); + return ev_realloc (base, elem * *cur); +} + +#define array_init_zero(base,count) \ + memset ((void *)(base), 0, sizeof (*(base)) * (count)) + +#define array_needsize(type,base,cur,cnt,init) \ + if (expect_false ((cnt) > (cur))) \ + { \ + int ocur_ = (cur); \ + (base) = (type *)array_realloc \ + (sizeof (type), (base), &(cur), (cnt)); \ + init ((base) + (ocur_), (cur) - ocur_); \ + } + +#if 0 +#define array_slim(type,stem) \ + if (stem ## max < array_roundsize (stem ## cnt >> 2)) \ + { \ + stem ## max = array_roundsize (stem ## cnt >> 1); \ + base = (type *)ev_realloc (base, sizeof (type) * (stem ## max));\ + fprintf (stderr, "slimmed down " # stem " to %d\n", stem ## max);/*D*/\ + } +#endif + +#define array_free(stem, idx) \ + ev_free (stem ## s idx); stem ## cnt idx = stem ## max idx = 0; stem ## s idx = 0 + +/*****************************************************************************/ + +/* dummy callback for pending events */ +static void noinline +pendingcb (EV_P_ ev_prepare *w, int revents) +{ +} + +void noinline +ev_feed_event (EV_P_ void *w, int revents) +{ + W w_ = (W)w; + int pri = ABSPRI (w_); + + if (expect_false (w_->pending)) + pendings [pri][w_->pending - 1].events |= revents; + else + { + w_->pending = ++pendingcnt [pri]; + array_needsize (ANPENDING, pendings [pri], pendingmax [pri], w_->pending, EMPTY2); + pendings [pri][w_->pending - 1].w = w_; + pendings [pri][w_->pending - 1].events = revents; + } +} + +inline_speed void +feed_reverse (EV_P_ W w) +{ + array_needsize (W, rfeeds, rfeedmax, rfeedcnt + 1, EMPTY2); + rfeeds [rfeedcnt++] = w; +} + +inline_size void +feed_reverse_done (EV_P_ int revents) +{ + do + ev_feed_event (EV_A_ rfeeds [--rfeedcnt], revents); + while (rfeedcnt); +} + +inline_speed void +queue_events (EV_P_ W *events, int eventcnt, int type) +{ + int i; + + for (i = 0; i < eventcnt; ++i) + ev_feed_event (EV_A_ events [i], type); +} + +/*****************************************************************************/ + +inline_speed void +fd_event_nc (EV_P_ int fd, int revents) +{ + ANFD *anfd = anfds + fd; + ev_io *w; + + for (w = (ev_io *)anfd->head; w; w = (ev_io *)((WL)w)->next) + { + int ev = w->events & revents; + + if (ev) + ev_feed_event (EV_A_ (W)w, ev); + } +} + +/* do not submit kernel events for fds that have reify set */ +/* because that means they changed while we were polling for new events */ +inline_speed void +fd_event (EV_P_ int fd, int revents) +{ + ANFD *anfd = anfds + fd; + + if (expect_true (!anfd->reify)) + fd_event_nc (EV_A_ fd, revents); +} + +void +ev_feed_fd_event (EV_P_ int fd, int revents) +{ + if (fd >= 0 && fd < anfdmax) + fd_event_nc (EV_A_ fd, revents); +} + +/* make sure the external fd watch events are in-sync */ +/* with the kernel/libev internal state */ +inline_size void +fd_reify (EV_P) +{ + int i; + + for (i = 0; i < fdchangecnt; ++i) + { + int fd = fdchanges [i]; + ANFD *anfd = anfds + fd; + ev_io *w; + + unsigned char events = 0; + + for (w = (ev_io *)anfd->head; w; w = (ev_io *)((WL)w)->next) + events |= (unsigned char)w->events; + +#if EV_SELECT_IS_WINSOCKET + if (events) + { + unsigned long arg; + anfd->handle = EV_FD_TO_WIN32_HANDLE (fd); + assert (("libev: only socket fds supported in this configuration", ioctlsocket (anfd->handle, FIONREAD, &arg) == 0)); + } +#endif + + { + unsigned char o_events = anfd->events; + unsigned char o_reify = anfd->reify; + + anfd->reify = 0; + anfd->events = events; + + if (o_events != events || o_reify & EV__IOFDSET) + backend_modify (EV_A_ fd, o_events, events); + } + } + + fdchangecnt = 0; +} + +/* something about the given fd changed */ +inline_size void +fd_change (EV_P_ int fd, int flags) +{ + unsigned char reify = anfds [fd].reify; + anfds [fd].reify |= flags; + + if (expect_true (!reify)) + { + ++fdchangecnt; + array_needsize (int, fdchanges, fdchangemax, fdchangecnt, EMPTY2); + fdchanges [fdchangecnt - 1] = fd; + } +} + +/* the given fd is invalid/unusable, so make sure it doesn't hurt us anymore */ +inline_speed void +fd_kill (EV_P_ int fd) +{ + ev_io *w; + + while ((w = (ev_io *)anfds [fd].head)) + { + ev_io_stop (EV_A_ w); + ev_feed_event (EV_A_ (W)w, EV_ERROR | EV_READ | EV_WRITE); + } +} + +/* check whether the given fd is atcually valid, for error recovery */ +inline_size int +fd_valid (int fd) +{ +#ifdef _WIN32 + return _get_osfhandle (fd) != -1; +#else + return fcntl (fd, F_GETFD) != -1; +#endif +} + +/* called on EBADF to verify fds */ +static void noinline +fd_ebadf (EV_P) +{ + int fd; + + for (fd = 0; fd < anfdmax; ++fd) + if (anfds [fd].events) + if (!fd_valid (fd) && errno == EBADF) + fd_kill (EV_A_ fd); +} + +/* called on ENOMEM in select/poll to kill some fds and retry */ +static void noinline +fd_enomem (EV_P) +{ + int fd; + + for (fd = anfdmax; fd--; ) + if (anfds [fd].events) + { + fd_kill (EV_A_ fd); + break; + } +} + +/* usually called after fork if backend needs to re-arm all fds from scratch */ +static void noinline +fd_rearm_all (EV_P) +{ + int fd; + + for (fd = 0; fd < anfdmax; ++fd) + if (anfds [fd].events) + { + anfds [fd].events = 0; + anfds [fd].emask = 0; + fd_change (EV_A_ fd, EV__IOFDSET | EV_ANFD_REIFY); + } +} + +/*****************************************************************************/ + +/* + * the heap functions want a real array index. array index 0 uis guaranteed to not + * be in-use at any time. the first heap entry is at array [HEAP0]. DHEAP gives + * the branching factor of the d-tree. + */ + +/* + * at the moment we allow libev the luxury of two heaps, + * a small-code-size 2-heap one and a ~1.5kb larger 4-heap + * which is more cache-efficient. + * the difference is about 5% with 50000+ watchers. + */ +#if EV_USE_4HEAP + +#define DHEAP 4 +#define HEAP0 (DHEAP - 1) /* index of first element in heap */ +#define HPARENT(k) ((((k) - HEAP0 - 1) / DHEAP) + HEAP0) +#define UPHEAP_DONE(p,k) ((p) == (k)) + +/* away from the root */ +inline_speed void +downheap (ANHE *heap, int N, int k) +{ + ANHE he = heap [k]; + ANHE *E = heap + N + HEAP0; + + for (;;) + { + ev_tstamp minat; + ANHE *minpos; + ANHE *pos = heap + DHEAP * (k - HEAP0) + HEAP0 + 1; + + /* find minimum child */ + if (expect_true (pos + DHEAP - 1 < E)) + { + /* fast path */ (minpos = pos + 0), (minat = ANHE_at (*minpos)); + if ( ANHE_at (pos [1]) < minat) (minpos = pos + 1), (minat = ANHE_at (*minpos)); + if ( ANHE_at (pos [2]) < minat) (minpos = pos + 2), (minat = ANHE_at (*minpos)); + if ( ANHE_at (pos [3]) < minat) (minpos = pos + 3), (minat = ANHE_at (*minpos)); + } + else if (pos < E) + { + /* slow path */ (minpos = pos + 0), (minat = ANHE_at (*minpos)); + if (pos + 1 < E && ANHE_at (pos [1]) < minat) (minpos = pos + 1), (minat = ANHE_at (*minpos)); + if (pos + 2 < E && ANHE_at (pos [2]) < minat) (minpos = pos + 2), (minat = ANHE_at (*minpos)); + if (pos + 3 < E && ANHE_at (pos [3]) < minat) (minpos = pos + 3), (minat = ANHE_at (*minpos)); + } + else + break; + + if (ANHE_at (he) <= minat) + break; + + heap [k] = *minpos; + ev_active (ANHE_w (*minpos)) = k; + + k = minpos - heap; + } + + heap [k] = he; + ev_active (ANHE_w (he)) = k; +} + +#else /* 4HEAP */ + +#define HEAP0 1 +#define HPARENT(k) ((k) >> 1) +#define UPHEAP_DONE(p,k) (!(p)) + +/* away from the root */ +inline_speed void +downheap (ANHE *heap, int N, int k) +{ + ANHE he = heap [k]; + + for (;;) + { + int c = k << 1; + + if (c >= N + HEAP0) + break; + + c += c + 1 < N + HEAP0 && ANHE_at (heap [c]) > ANHE_at (heap [c + 1]) + ? 1 : 0; + + if (ANHE_at (he) <= ANHE_at (heap [c])) + break; + + heap [k] = heap [c]; + ev_active (ANHE_w (heap [k])) = k; + + k = c; + } + + heap [k] = he; + ev_active (ANHE_w (he)) = k; +} +#endif + +/* towards the root */ +inline_speed void +upheap (ANHE *heap, int k) +{ + ANHE he = heap [k]; + + for (;;) + { + int p = HPARENT (k); + + if (UPHEAP_DONE (p, k) || ANHE_at (heap [p]) <= ANHE_at (he)) + break; + + heap [k] = heap [p]; + ev_active (ANHE_w (heap [k])) = k; + k = p; + } + + heap [k] = he; + ev_active (ANHE_w (he)) = k; +} + +/* move an element suitably so it is in a correct place */ +inline_size void +adjustheap (ANHE *heap, int N, int k) +{ + if (k > HEAP0 && ANHE_at (heap [k]) <= ANHE_at (heap [HPARENT (k)])) + upheap (heap, k); + else + downheap (heap, N, k); +} + +/* rebuild the heap: this function is used only once and executed rarely */ +inline_size void +reheap (ANHE *heap, int N) +{ + int i; + + /* we don't use floyds algorithm, upheap is simpler and is more cache-efficient */ + /* also, this is easy to implement and correct for both 2-heaps and 4-heaps */ + for (i = 0; i < N; ++i) + upheap (heap, i + HEAP0); +} + +/*****************************************************************************/ + +/* associate signal watchers to a signal signal */ +typedef struct +{ + EV_ATOMIC_T pending; +#if EV_MULTIPLICITY + EV_P; +#endif + WL head; +} ANSIG; + +static ANSIG signals [EV_NSIG - 1]; + +/*****************************************************************************/ + +/* used to prepare libev internal fd's */ +/* this is not fork-safe */ +inline_speed void +fd_intern (int fd) +{ +#ifdef _WIN32 + unsigned long arg = 1; + ioctlsocket (_get_osfhandle (fd), FIONBIO, &arg); +#else + fcntl (fd, F_SETFD, FD_CLOEXEC); + fcntl (fd, F_SETFL, O_NONBLOCK); +#endif +} + +static void noinline +evpipe_init (EV_P) +{ + if (!ev_is_active (&pipe_w)) + { +#if EV_USE_EVENTFD + evfd = eventfd (0, EFD_NONBLOCK | EFD_CLOEXEC); + if (evfd < 0 && errno == EINVAL) + evfd = eventfd (0, 0); + + if (evfd >= 0) + { + evpipe [0] = -1; + fd_intern (evfd); /* doing it twice doesn't hurt */ + ev_io_set (&pipe_w, evfd, EV_READ); + } + else +#endif + { + while (pipe (evpipe)) + ev_syserr ("(libev) error creating signal/async pipe"); + + fd_intern (evpipe [0]); + fd_intern (evpipe [1]); + ev_io_set (&pipe_w, evpipe [0], EV_READ); + } + + ev_io_start (EV_A_ &pipe_w); + ev_unref (EV_A); /* watcher should not keep loop alive */ + } +} + +inline_size void +evpipe_write (EV_P_ EV_ATOMIC_T *flag) +{ + if (!*flag) + { + int old_errno = errno; /* save errno because write might clobber it */ + + *flag = 1; + +#if EV_USE_EVENTFD + if (evfd >= 0) + { + uint64_t counter = 1; + write (evfd, &counter, sizeof (uint64_t)); + } + else +#endif + write (evpipe [1], &old_errno, 1); + + errno = old_errno; + } +} + +/* called whenever the libev signal pipe */ +/* got some events (signal, async) */ +static void +pipecb (EV_P_ ev_io *iow, int revents) +{ + int i; + +#if EV_USE_EVENTFD + if (evfd >= 0) + { + uint64_t counter; + read (evfd, &counter, sizeof (uint64_t)); + } + else +#endif + { + char dummy; + read (evpipe [0], &dummy, 1); + } + + if (sig_pending) + { + sig_pending = 0; + + for (i = EV_NSIG - 1; i--; ) + if (expect_false (signals [i].pending)) + ev_feed_signal_event (EV_A_ i + 1); + } + +#if EV_ASYNC_ENABLE + if (async_pending) + { + async_pending = 0; + + for (i = asynccnt; i--; ) + if (asyncs [i]->sent) + { + asyncs [i]->sent = 0; + ev_feed_event (EV_A_ asyncs [i], EV_ASYNC); + } + } +#endif +} + +/*****************************************************************************/ + +static void +ev_sighandler (int signum) +{ +#if EV_MULTIPLICITY + EV_P = signals [signum - 1].loop; +#endif + +#if _WIN32 + signal (signum, ev_sighandler); +#endif + + signals [signum - 1].pending = 1; + evpipe_write (EV_A_ &sig_pending); +} + +void noinline +ev_feed_signal_event (EV_P_ int signum) +{ + WL w; + + if (expect_false (signum <= 0 || signum > EV_NSIG)) + return; + + --signum; + +#if EV_MULTIPLICITY + /* it is permissible to try to feed a signal to the wrong loop */ + /* or, likely more useful, feeding a signal nobody is waiting for */ + + if (expect_false (signals [signum].loop != EV_A)) + return; +#endif + + signals [signum].pending = 0; + + for (w = signals [signum].head; w; w = w->next) + ev_feed_event (EV_A_ (W)w, EV_SIGNAL); +} + +#if EV_USE_SIGNALFD +static void +sigfdcb (EV_P_ ev_io *iow, int revents) +{ + struct signalfd_siginfo si[2], *sip; /* these structs are big */ + + for (;;) + { + ssize_t res = read (sigfd, si, sizeof (si)); + + /* not ISO-C, as res might be -1, but works with SuS */ + for (sip = si; (char *)sip < (char *)si + res; ++sip) + ev_feed_signal_event (EV_A_ sip->ssi_signo); + + if (res < (ssize_t)sizeof (si)) + break; + } +} +#endif + +/*****************************************************************************/ + +static WL childs [EV_PID_HASHSIZE]; + +#ifndef _WIN32 + +static ev_signal childev; + +#ifndef WIFCONTINUED +# define WIFCONTINUED(status) 0 +#endif + +/* handle a single child status event */ +inline_speed void +child_reap (EV_P_ int chain, int pid, int status) +{ + ev_child *w; + int traced = WIFSTOPPED (status) || WIFCONTINUED (status); + + for (w = (ev_child *)childs [chain & (EV_PID_HASHSIZE - 1)]; w; w = (ev_child *)((WL)w)->next) + { + if ((w->pid == pid || !w->pid) + && (!traced || (w->flags & 1))) + { + ev_set_priority (w, EV_MAXPRI); /* need to do it *now*, this *must* be the same prio as the signal watcher itself */ + w->rpid = pid; + w->rstatus = status; + ev_feed_event (EV_A_ (W)w, EV_CHILD); + } + } +} + +#ifndef WCONTINUED +# define WCONTINUED 0 +#endif + +/* called on sigchld etc., calls waitpid */ +static void +childcb (EV_P_ ev_signal *sw, int revents) +{ + int pid, status; + + /* some systems define WCONTINUED but then fail to support it (linux 2.4) */ + if (0 >= (pid = waitpid (-1, &status, WNOHANG | WUNTRACED | WCONTINUED))) + if (!WCONTINUED + || errno != EINVAL + || 0 >= (pid = waitpid (-1, &status, WNOHANG | WUNTRACED))) + return; + + /* make sure we are called again until all children have been reaped */ + /* we need to do it this way so that the callback gets called before we continue */ + ev_feed_event (EV_A_ (W)sw, EV_SIGNAL); + + child_reap (EV_A_ pid, pid, status); + if (EV_PID_HASHSIZE > 1) + child_reap (EV_A_ 0, pid, status); /* this might trigger a watcher twice, but feed_event catches that */ +} + +#endif + +/*****************************************************************************/ + +#if EV_USE_PORT +# include "ev_port.c" +#endif +#if EV_USE_KQUEUE +# include "ev_kqueue.c" +#endif +#if EV_USE_EPOLL +# include "ev_epoll.c" +#endif +#if EV_USE_POLL +# include "ev_poll.c" +#endif +#if EV_USE_SELECT +# include "ev_select.c" +#endif + +int +ev_version_major (void) +{ + return EV_VERSION_MAJOR; +} + +int +ev_version_minor (void) +{ + return EV_VERSION_MINOR; +} + +/* return true if we are running with elevated privileges and should ignore env variables */ +int inline_size +enable_secure (void) +{ +#ifdef _WIN32 + return 0; +#else + return getuid () != geteuid () + || getgid () != getegid (); +#endif +} + +unsigned int +ev_supported_backends (void) +{ + unsigned int flags = 0; + + if (EV_USE_PORT ) flags |= EVBACKEND_PORT; + if (EV_USE_KQUEUE) flags |= EVBACKEND_KQUEUE; + if (EV_USE_EPOLL ) flags |= EVBACKEND_EPOLL; + if (EV_USE_POLL ) flags |= EVBACKEND_POLL; + if (EV_USE_SELECT) flags |= EVBACKEND_SELECT; + + return flags; +} + +unsigned int +ev_recommended_backends (void) +{ + unsigned int flags = ev_supported_backends (); + +#ifndef __NetBSD__ + /* kqueue is borked on everything but netbsd apparently */ + /* it usually doesn't work correctly on anything but sockets and pipes */ + flags &= ~EVBACKEND_KQUEUE; +#endif +#ifdef __APPLE__ + /* only select works correctly on that "unix-certified" platform */ + flags &= ~EVBACKEND_KQUEUE; /* horribly broken, even for sockets */ + flags &= ~EVBACKEND_POLL; /* poll is based on kqueue from 10.5 onwards */ +#endif + + return flags; +} + +unsigned int +ev_embeddable_backends (void) +{ + int flags = EVBACKEND_EPOLL | EVBACKEND_KQUEUE | EVBACKEND_PORT; + + /* epoll embeddability broken on all linux versions up to at least 2.6.23 */ + /* please fix it and tell me how to detect the fix */ + flags &= ~EVBACKEND_EPOLL; + + return flags; +} + +unsigned int +ev_backend (EV_P) +{ + return backend; +} + +#if EV_MINIMAL < 2 +unsigned int +ev_loop_count (EV_P) +{ + return loop_count; +} + +unsigned int +ev_loop_depth (EV_P) +{ + return loop_depth; +} + +void +ev_set_io_collect_interval (EV_P_ ev_tstamp interval) +{ + io_blocktime = interval; +} + +void +ev_set_timeout_collect_interval (EV_P_ ev_tstamp interval) +{ + timeout_blocktime = interval; +} + +void +ev_set_userdata (EV_P_ void *data) +{ + userdata = data; +} + +void * +ev_userdata (EV_P) +{ + return userdata; +} + +void ev_set_invoke_pending_cb (EV_P_ void (*invoke_pending_cb)(EV_P)) +{ + invoke_cb = invoke_pending_cb; +} + +void ev_set_loop_release_cb (EV_P_ void (*release)(EV_P), void (*acquire)(EV_P)) +{ + release_cb = release; + acquire_cb = acquire; +} +#endif + +/* initialise a loop structure, must be zero-initialised */ +static void noinline +loop_init (EV_P_ unsigned int flags) +{ + if (!backend) + { +#if EV_USE_REALTIME + if (!have_realtime) + { + struct timespec ts; + + if (!clock_gettime (CLOCK_REALTIME, &ts)) + have_realtime = 1; + } +#endif + +#if EV_USE_MONOTONIC + if (!have_monotonic) + { + struct timespec ts; + + if (!clock_gettime (CLOCK_MONOTONIC, &ts)) + have_monotonic = 1; + } +#endif + + /* pid check not overridable via env */ +#ifndef _WIN32 + if (flags & EVFLAG_FORKCHECK) + curpid = getpid (); +#endif + + if (!(flags & EVFLAG_NOENV) + && !enable_secure () + && getenv ("LIBEV_FLAGS")) + flags = atoi (getenv ("LIBEV_FLAGS")); + + ev_rt_now = ev_time (); + mn_now = get_clock (); + now_floor = mn_now; + rtmn_diff = ev_rt_now - mn_now; +#if EV_MINIMAL < 2 + invoke_cb = ev_invoke_pending; +#endif + + io_blocktime = 0.; + timeout_blocktime = 0.; + backend = 0; + backend_fd = -1; + sig_pending = 0; +#if EV_ASYNC_ENABLE + async_pending = 0; +#endif +#if EV_USE_INOTIFY + fs_fd = flags & EVFLAG_NOINOTIFY ? -1 : -2; +#endif +#if EV_USE_SIGNALFD + sigfd = flags & EVFLAG_SIGNALFD ? -2 : -1; +#endif + + if (!(flags & 0x0000ffffU)) + flags |= ev_recommended_backends (); + +#if EV_USE_PORT + if (!backend && (flags & EVBACKEND_PORT )) backend = port_init (EV_A_ flags); +#endif +#if EV_USE_KQUEUE + if (!backend && (flags & EVBACKEND_KQUEUE)) backend = kqueue_init (EV_A_ flags); +#endif +#if EV_USE_EPOLL + if (!backend && (flags & EVBACKEND_EPOLL )) backend = epoll_init (EV_A_ flags); +#endif +#if EV_USE_POLL + if (!backend && (flags & EVBACKEND_POLL )) backend = poll_init (EV_A_ flags); +#endif +#if EV_USE_SELECT + if (!backend && (flags & EVBACKEND_SELECT)) backend = select_init (EV_A_ flags); +#endif + + ev_prepare_init (&pending_w, pendingcb); + + ev_init (&pipe_w, pipecb); + ev_set_priority (&pipe_w, EV_MAXPRI); + } +} + +/* free up a loop structure */ +static void noinline +loop_destroy (EV_P) +{ + int i; + + if (ev_is_active (&pipe_w)) + { + /*ev_ref (EV_A);*/ + /*ev_io_stop (EV_A_ &pipe_w);*/ + +#if EV_USE_EVENTFD + if (evfd >= 0) + close (evfd); +#endif + + if (evpipe [0] >= 0) + { + EV_WIN32_CLOSE_FD (evpipe [0]); + EV_WIN32_CLOSE_FD (evpipe [1]); + } + } + +#if EV_USE_SIGNALFD + if (ev_is_active (&sigfd_w)) + close (sigfd); +#endif + +#if EV_USE_INOTIFY + if (fs_fd >= 0) + close (fs_fd); +#endif + + if (backend_fd >= 0) + close (backend_fd); + +#if EV_USE_PORT + if (backend == EVBACKEND_PORT ) port_destroy (EV_A); +#endif +#if EV_USE_KQUEUE + if (backend == EVBACKEND_KQUEUE) kqueue_destroy (EV_A); +#endif +#if EV_USE_EPOLL + if (backend == EVBACKEND_EPOLL ) epoll_destroy (EV_A); +#endif +#if EV_USE_POLL + if (backend == EVBACKEND_POLL ) poll_destroy (EV_A); +#endif +#if EV_USE_SELECT + if (backend == EVBACKEND_SELECT) select_destroy (EV_A); +#endif + + for (i = NUMPRI; i--; ) + { + array_free (pending, [i]); +#if EV_IDLE_ENABLE + array_free (idle, [i]); +#endif + } + + ev_free (anfds); anfds = 0; anfdmax = 0; + + /* have to use the microsoft-never-gets-it-right macro */ + array_free (rfeed, EMPTY); + array_free (fdchange, EMPTY); + array_free (timer, EMPTY); +#if EV_PERIODIC_ENABLE + array_free (periodic, EMPTY); +#endif +#if EV_FORK_ENABLE + array_free (fork, EMPTY); +#endif + array_free (prepare, EMPTY); + array_free (check, EMPTY); +#if EV_ASYNC_ENABLE + array_free (async, EMPTY); +#endif + + backend = 0; +} + +#if EV_USE_INOTIFY +inline_size void infy_fork (EV_P); +#endif + +inline_size void +loop_fork (EV_P) +{ +#if EV_USE_PORT + if (backend == EVBACKEND_PORT ) port_fork (EV_A); +#endif +#if EV_USE_KQUEUE + if (backend == EVBACKEND_KQUEUE) kqueue_fork (EV_A); +#endif +#if EV_USE_EPOLL + if (backend == EVBACKEND_EPOLL ) epoll_fork (EV_A); +#endif +#if EV_USE_INOTIFY + infy_fork (EV_A); +#endif + + if (ev_is_active (&pipe_w)) + { + /* this "locks" the handlers against writing to the pipe */ + /* while we modify the fd vars */ + sig_pending = 1; +#if EV_ASYNC_ENABLE + async_pending = 1; +#endif + + ev_ref (EV_A); + ev_io_stop (EV_A_ &pipe_w); + +#if EV_USE_EVENTFD + if (evfd >= 0) + close (evfd); +#endif + + if (evpipe [0] >= 0) + { + EV_WIN32_CLOSE_FD (evpipe [0]); + EV_WIN32_CLOSE_FD (evpipe [1]); + } + + evpipe_init (EV_A); + /* now iterate over everything, in case we missed something */ + pipecb (EV_A_ &pipe_w, EV_READ); + } + + postfork = 0; +} + +#if EV_MULTIPLICITY + +struct ev_loop * +ev_loop_new (unsigned int flags) +{ + EV_P = (struct ev_loop *)ev_malloc (sizeof (struct ev_loop)); + + memset (EV_A, 0, sizeof (struct ev_loop)); + loop_init (EV_A_ flags); + + if (ev_backend (EV_A)) + return EV_A; + + return 0; +} + +void +ev_loop_destroy (EV_P) +{ + loop_destroy (EV_A); + ev_free (loop); +} + +void +ev_loop_fork (EV_P) +{ + postfork = 1; /* must be in line with ev_default_fork */ +} +#endif /* multiplicity */ + +#if EV_VERIFY +static void noinline +verify_watcher (EV_P_ W w) +{ + assert (("libev: watcher has invalid priority", ABSPRI (w) >= 0 && ABSPRI (w) < NUMPRI)); + + if (w->pending) + assert (("libev: pending watcher not on pending queue", pendings [ABSPRI (w)][w->pending - 1].w == w)); +} + +static void noinline +verify_heap (EV_P_ ANHE *heap, int N) +{ + int i; + + for (i = HEAP0; i < N + HEAP0; ++i) + { + assert (("libev: active index mismatch in heap", ev_active (ANHE_w (heap [i])) == i)); + assert (("libev: heap condition violated", i == HEAP0 || ANHE_at (heap [HPARENT (i)]) <= ANHE_at (heap [i]))); + assert (("libev: heap at cache mismatch", ANHE_at (heap [i]) == ev_at (ANHE_w (heap [i])))); + + verify_watcher (EV_A_ (W)ANHE_w (heap [i])); + } +} + +static void noinline +array_verify (EV_P_ W *ws, int cnt) +{ + while (cnt--) + { + assert (("libev: active index mismatch", ev_active (ws [cnt]) == cnt + 1)); + verify_watcher (EV_A_ ws [cnt]); + } +} +#endif + +#if EV_MINIMAL < 2 +void +ev_loop_verify (EV_P) +{ +#if EV_VERIFY + int i; + WL w; + + assert (activecnt >= -1); + + assert (fdchangemax >= fdchangecnt); + for (i = 0; i < fdchangecnt; ++i) + assert (("libev: negative fd in fdchanges", fdchanges [i] >= 0)); + + assert (anfdmax >= 0); + for (i = 0; i < anfdmax; ++i) + for (w = anfds [i].head; w; w = w->next) + { + verify_watcher (EV_A_ (W)w); + assert (("libev: inactive fd watcher on anfd list", ev_active (w) == 1)); + assert (("libev: fd mismatch between watcher and anfd", ((ev_io *)w)->fd == i)); + } + + assert (timermax >= timercnt); + verify_heap (EV_A_ timers, timercnt); + +#if EV_PERIODIC_ENABLE + assert (periodicmax >= periodiccnt); + verify_heap (EV_A_ periodics, periodiccnt); +#endif + + for (i = NUMPRI; i--; ) + { + assert (pendingmax [i] >= pendingcnt [i]); +#if EV_IDLE_ENABLE + assert (idleall >= 0); + assert (idlemax [i] >= idlecnt [i]); + array_verify (EV_A_ (W *)idles [i], idlecnt [i]); +#endif + } + +#if EV_FORK_ENABLE + assert (forkmax >= forkcnt); + array_verify (EV_A_ (W *)forks, forkcnt); +#endif + +#if EV_ASYNC_ENABLE + assert (asyncmax >= asynccnt); + array_verify (EV_A_ (W *)asyncs, asynccnt); +#endif + + assert (preparemax >= preparecnt); + array_verify (EV_A_ (W *)prepares, preparecnt); + + assert (checkmax >= checkcnt); + array_verify (EV_A_ (W *)checks, checkcnt); + +# if 0 + for (w = (ev_child *)childs [chain & (EV_PID_HASHSIZE - 1)]; w; w = (ev_child *)((WL)w)->next) + for (signum = EV_NSIG; signum--; ) if (signals [signum].pending) +# endif +#endif +} +#endif + +#if EV_MULTIPLICITY +struct ev_loop * +ev_default_loop_init (unsigned int flags) +#else +int +ev_default_loop (unsigned int flags) +#endif +{ + if (!ev_default_loop_ptr) + { +#if EV_MULTIPLICITY + EV_P = ev_default_loop_ptr = &default_loop_struct; +#else + ev_default_loop_ptr = 1; +#endif + + loop_init (EV_A_ flags); + + if (ev_backend (EV_A)) + { +#ifndef _WIN32 + ev_signal_init (&childev, childcb, SIGCHLD); + ev_set_priority (&childev, EV_MAXPRI); + ev_signal_start (EV_A_ &childev); + ev_unref (EV_A); /* child watcher should not keep loop alive */ +#endif + } + else + ev_default_loop_ptr = 0; + } + + return ev_default_loop_ptr; +} + +void +ev_default_destroy (void) +{ +#if EV_MULTIPLICITY + EV_P = ev_default_loop_ptr; +#endif + + ev_default_loop_ptr = 0; + +#ifndef _WIN32 + ev_ref (EV_A); /* child watcher */ + ev_signal_stop (EV_A_ &childev); +#endif + + loop_destroy (EV_A); +} + +void +ev_default_fork (void) +{ +#if EV_MULTIPLICITY + EV_P = ev_default_loop_ptr; +#endif + + postfork = 1; /* must be in line with ev_loop_fork */ +} + +/*****************************************************************************/ + +void +ev_invoke (EV_P_ void *w, int revents) +{ + EV_CB_INVOKE ((W)w, revents); +} + +unsigned int +ev_pending_count (EV_P) +{ + int pri; + unsigned int count = 0; + + for (pri = NUMPRI; pri--; ) + count += pendingcnt [pri]; + + return count; +} + +void noinline +ev_invoke_pending (EV_P) +{ + int pri; + + for (pri = NUMPRI; pri--; ) + while (pendingcnt [pri]) + { + ANPENDING *p = pendings [pri] + --pendingcnt [pri]; + + /*assert (("libev: non-pending watcher on pending list", p->w->pending));*/ + /* ^ this is no longer true, as pending_w could be here */ + + p->w->pending = 0; + EV_CB_INVOKE (p->w, p->events); + EV_FREQUENT_CHECK; + } +} + +#if EV_IDLE_ENABLE +/* make idle watchers pending. this handles the "call-idle */ +/* only when higher priorities are idle" logic */ +inline_size void +idle_reify (EV_P) +{ + if (expect_false (idleall)) + { + int pri; + + for (pri = NUMPRI; pri--; ) + { + if (pendingcnt [pri]) + break; + + if (idlecnt [pri]) + { + queue_events (EV_A_ (W *)idles [pri], idlecnt [pri], EV_IDLE); + break; + } + } + } +} +#endif + +/* make timers pending */ +inline_size void +timers_reify (EV_P) +{ + EV_FREQUENT_CHECK; + + if (timercnt && ANHE_at (timers [HEAP0]) < mn_now) + { + do + { + ev_timer *w = (ev_timer *)ANHE_w (timers [HEAP0]); + + /*assert (("libev: inactive timer on timer heap detected", ev_is_active (w)));*/ + + /* first reschedule or stop timer */ + if (w->repeat) + { + ev_at (w) += w->repeat; + if (ev_at (w) < mn_now) + ev_at (w) = mn_now; + + assert (("libev: negative ev_timer repeat value found while processing timers", w->repeat > 0.)); + + ANHE_at_cache (timers [HEAP0]); + downheap (timers, timercnt, HEAP0); + } + else + ev_timer_stop (EV_A_ w); /* nonrepeating: stop timer */ + + EV_FREQUENT_CHECK; + feed_reverse (EV_A_ (W)w); + } + while (timercnt && ANHE_at (timers [HEAP0]) < mn_now); + + feed_reverse_done (EV_A_ EV_TIMEOUT); + } +} + +#if EV_PERIODIC_ENABLE +/* make periodics pending */ +inline_size void +periodics_reify (EV_P) +{ + EV_FREQUENT_CHECK; + + while (periodiccnt && ANHE_at (periodics [HEAP0]) < ev_rt_now) + { + int feed_count = 0; + + do + { + ev_periodic *w = (ev_periodic *)ANHE_w (periodics [HEAP0]); + + /*assert (("libev: inactive timer on periodic heap detected", ev_is_active (w)));*/ + + /* first reschedule or stop timer */ + if (w->reschedule_cb) + { + ev_at (w) = w->reschedule_cb (w, ev_rt_now); + + assert (("libev: ev_periodic reschedule callback returned time in the past", ev_at (w) >= ev_rt_now)); + + ANHE_at_cache (periodics [HEAP0]); + downheap (periodics, periodiccnt, HEAP0); + } + else if (w->interval) + { + ev_at (w) = w->offset + ceil ((ev_rt_now - w->offset) / w->interval) * w->interval; + /* if next trigger time is not sufficiently in the future, put it there */ + /* this might happen because of floating point inexactness */ + if (ev_at (w) - ev_rt_now < TIME_EPSILON) + { + ev_at (w) += w->interval; + + /* if interval is unreasonably low we might still have a time in the past */ + /* so correct this. this will make the periodic very inexact, but the user */ + /* has effectively asked to get triggered more often than possible */ + if (ev_at (w) < ev_rt_now) + ev_at (w) = ev_rt_now; + } + + ANHE_at_cache (periodics [HEAP0]); + downheap (periodics, periodiccnt, HEAP0); + } + else + ev_periodic_stop (EV_A_ w); /* nonrepeating: stop timer */ + + EV_FREQUENT_CHECK; + feed_reverse (EV_A_ (W)w); + } + while (periodiccnt && ANHE_at (periodics [HEAP0]) < ev_rt_now); + + feed_reverse_done (EV_A_ EV_PERIODIC); + } +} + +/* simply recalculate all periodics */ +/* TODO: maybe ensure that at leats one event happens when jumping forward? */ +static void noinline +periodics_reschedule (EV_P) +{ + int i; + + /* adjust periodics after time jump */ + for (i = HEAP0; i < periodiccnt + HEAP0; ++i) + { + ev_periodic *w = (ev_periodic *)ANHE_w (periodics [i]); + + if (w->reschedule_cb) + ev_at (w) = w->reschedule_cb (w, ev_rt_now); + else if (w->interval) + ev_at (w) = w->offset + ceil ((ev_rt_now - w->offset) / w->interval) * w->interval; + + ANHE_at_cache (periodics [i]); + } + + reheap (periodics, periodiccnt); +} +#endif + +/* adjust all timers by a given offset */ +static void noinline +timers_reschedule (EV_P_ ev_tstamp adjust) +{ + int i; + + for (i = 0; i < timercnt; ++i) + { + ANHE *he = timers + i + HEAP0; + ANHE_w (*he)->at += adjust; + ANHE_at_cache (*he); + } +} + +/* fetch new monotonic and realtime times from the kernel */ +/* also detetc if there was a timejump, and act accordingly */ +inline_speed void +time_update (EV_P_ ev_tstamp max_block) +{ +#if EV_USE_MONOTONIC + if (expect_true (have_monotonic)) + { + int i; + ev_tstamp odiff = rtmn_diff; + + mn_now = get_clock (); + + /* only fetch the realtime clock every 0.5*MIN_TIMEJUMP seconds */ + /* interpolate in the meantime */ + if (expect_true (mn_now - now_floor < MIN_TIMEJUMP * .5)) + { + ev_rt_now = rtmn_diff + mn_now; + return; + } + + now_floor = mn_now; + ev_rt_now = ev_time (); + + /* loop a few times, before making important decisions. + * on the choice of "4": one iteration isn't enough, + * in case we get preempted during the calls to + * ev_time and get_clock. a second call is almost guaranteed + * to succeed in that case, though. and looping a few more times + * doesn't hurt either as we only do this on time-jumps or + * in the unlikely event of having been preempted here. + */ + for (i = 4; --i; ) + { + rtmn_diff = ev_rt_now - mn_now; + + if (expect_true (fabs (odiff - rtmn_diff) < MIN_TIMEJUMP)) + return; /* all is well */ + + ev_rt_now = ev_time (); + mn_now = get_clock (); + now_floor = mn_now; + } + + /* no timer adjustment, as the monotonic clock doesn't jump */ + /* timers_reschedule (EV_A_ rtmn_diff - odiff) */ +# if EV_PERIODIC_ENABLE + periodics_reschedule (EV_A); +# endif + } + else +#endif + { + ev_rt_now = ev_time (); + + if (expect_false (mn_now > ev_rt_now || ev_rt_now > mn_now + max_block + MIN_TIMEJUMP)) + { + /* adjust timers. this is easy, as the offset is the same for all of them */ + timers_reschedule (EV_A_ ev_rt_now - mn_now); +#if EV_PERIODIC_ENABLE + periodics_reschedule (EV_A); +#endif + } + + mn_now = ev_rt_now; + } +} + +void +ev_loop (EV_P_ int flags) +{ +#if EV_MINIMAL < 2 + ++loop_depth; +#endif + + assert (("libev: ev_loop recursion during release detected", loop_done != EVUNLOOP_RECURSE)); + + loop_done = EVUNLOOP_CANCEL; + + EV_INVOKE_PENDING; /* in case we recurse, ensure ordering stays nice and clean */ + + do + { +#if EV_VERIFY >= 2 + ev_loop_verify (EV_A); +#endif + +#ifndef _WIN32 + if (expect_false (curpid)) /* penalise the forking check even more */ + if (expect_false (getpid () != curpid)) + { + curpid = getpid (); + postfork = 1; + } +#endif + +#if EV_FORK_ENABLE + /* we might have forked, so queue fork handlers */ + if (expect_false (postfork)) + if (forkcnt) + { + queue_events (EV_A_ (W *)forks, forkcnt, EV_FORK); + EV_INVOKE_PENDING; + } +#endif + + /* queue prepare watchers (and execute them) */ + if (expect_false (preparecnt)) + { + queue_events (EV_A_ (W *)prepares, preparecnt, EV_PREPARE); + EV_INVOKE_PENDING; + } + + if (expect_false (loop_done)) + break; + + /* we might have forked, so reify kernel state if necessary */ + if (expect_false (postfork)) + loop_fork (EV_A); + + /* update fd-related kernel structures */ + fd_reify (EV_A); + + /* calculate blocking time */ + { + ev_tstamp waittime = 0.; + ev_tstamp sleeptime = 0.; + + if (expect_true (!(flags & EVLOOP_NONBLOCK || idleall || !activecnt))) + { + /* remember old timestamp for io_blocktime calculation */ + ev_tstamp prev_mn_now = mn_now; + + /* update time to cancel out callback processing overhead */ + time_update (EV_A_ 1e100); + + waittime = MAX_BLOCKTIME; + + if (timercnt) + { + ev_tstamp to = ANHE_at (timers [HEAP0]) - mn_now + backend_fudge; + if (waittime > to) waittime = to; + } + +#if EV_PERIODIC_ENABLE + if (periodiccnt) + { + ev_tstamp to = ANHE_at (periodics [HEAP0]) - ev_rt_now + backend_fudge; + if (waittime > to) waittime = to; + } +#endif + + /* don't let timeouts decrease the waittime below timeout_blocktime */ + if (expect_false (waittime < timeout_blocktime)) + waittime = timeout_blocktime; + + /* extra check because io_blocktime is commonly 0 */ + if (expect_false (io_blocktime)) + { + sleeptime = io_blocktime - (mn_now - prev_mn_now); + + if (sleeptime > waittime - backend_fudge) + sleeptime = waittime - backend_fudge; + + if (expect_true (sleeptime > 0.)) + { + ev_sleep (sleeptime); + waittime -= sleeptime; + } + } + } + +#if EV_MINIMAL < 2 + ++loop_count; +#endif + assert ((loop_done = EVUNLOOP_RECURSE, 1)); /* assert for side effect */ + backend_poll (EV_A_ waittime); + assert ((loop_done = EVUNLOOP_CANCEL, 1)); /* assert for side effect */ + + /* update ev_rt_now, do magic */ + time_update (EV_A_ waittime + sleeptime); + } + + /* queue pending timers and reschedule them */ + timers_reify (EV_A); /* relative timers called last */ +#if EV_PERIODIC_ENABLE + periodics_reify (EV_A); /* absolute timers called first */ +#endif + +#if EV_IDLE_ENABLE + /* queue idle watchers unless other events are pending */ + idle_reify (EV_A); +#endif + + /* queue check watchers, to be executed first */ + if (expect_false (checkcnt)) + queue_events (EV_A_ (W *)checks, checkcnt, EV_CHECK); + + EV_INVOKE_PENDING; + } + while (expect_true ( + activecnt + && !loop_done + && !(flags & (EVLOOP_ONESHOT | EVLOOP_NONBLOCK)) + )); + + if (loop_done == EVUNLOOP_ONE) + loop_done = EVUNLOOP_CANCEL; + +#if EV_MINIMAL < 2 + --loop_depth; +#endif +} + +void +ev_unloop (EV_P_ int how) +{ + loop_done = how; +} + +void +ev_ref (EV_P) +{ + ++activecnt; +} + +void +ev_unref (EV_P) +{ + --activecnt; +} + +void +ev_now_update (EV_P) +{ + time_update (EV_A_ 1e100); +} + +void +ev_suspend (EV_P) +{ + ev_now_update (EV_A); +} + +void +ev_resume (EV_P) +{ + ev_tstamp mn_prev = mn_now; + + ev_now_update (EV_A); + timers_reschedule (EV_A_ mn_now - mn_prev); +#if EV_PERIODIC_ENABLE + /* TODO: really do this? */ + periodics_reschedule (EV_A); +#endif +} + +/*****************************************************************************/ +/* singly-linked list management, used when the expected list length is short */ + +inline_size void +wlist_add (WL *head, WL elem) +{ + elem->next = *head; + *head = elem; +} + +inline_size void +wlist_del (WL *head, WL elem) +{ + while (*head) + { + if (expect_true (*head == elem)) + { + *head = elem->next; + break; + } + + head = &(*head)->next; + } +} + +/* internal, faster, version of ev_clear_pending */ +inline_speed void +clear_pending (EV_P_ W w) +{ + if (w->pending) + { + pendings [ABSPRI (w)][w->pending - 1].w = (W)&pending_w; + w->pending = 0; + } +} + +int +ev_clear_pending (EV_P_ void *w) +{ + W w_ = (W)w; + int pending = w_->pending; + + if (expect_true (pending)) + { + ANPENDING *p = pendings [ABSPRI (w_)] + pending - 1; + p->w = (W)&pending_w; + w_->pending = 0; + return p->events; + } + else + return 0; +} + +inline_size void +pri_adjust (EV_P_ W w) +{ + int pri = ev_priority (w); + pri = pri < EV_MINPRI ? EV_MINPRI : pri; + pri = pri > EV_MAXPRI ? EV_MAXPRI : pri; + ev_set_priority (w, pri); +} + +inline_speed void +ev_start (EV_P_ W w, int active) +{ + pri_adjust (EV_A_ w); + w->active = active; + ev_ref (EV_A); +} + +inline_size void +ev_stop (EV_P_ W w) +{ + ev_unref (EV_A); + w->active = 0; +} + +/*****************************************************************************/ + +void noinline +ev_io_start (EV_P_ ev_io *w) +{ + int fd = w->fd; + + if (expect_false (ev_is_active (w))) + return; + + assert (("libev: ev_io_start called with negative fd", fd >= 0)); + assert (("libev: ev_io start called with illegal event mask", !(w->events & ~(EV__IOFDSET | EV_READ | EV_WRITE)))); + + EV_FREQUENT_CHECK; + + ev_start (EV_A_ (W)w, 1); + array_needsize (ANFD, anfds, anfdmax, fd + 1, array_init_zero); + wlist_add (&anfds[fd].head, (WL)w); + + fd_change (EV_A_ fd, w->events & EV__IOFDSET | EV_ANFD_REIFY); + w->events &= ~EV__IOFDSET; + + EV_FREQUENT_CHECK; +} + +void noinline +ev_io_stop (EV_P_ ev_io *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + assert (("libev: ev_io_stop called with illegal fd (must stay constant after start!)", w->fd >= 0 && w->fd < anfdmax)); + + EV_FREQUENT_CHECK; + + wlist_del (&anfds[w->fd].head, (WL)w); + ev_stop (EV_A_ (W)w); + + fd_change (EV_A_ w->fd, 1); + + EV_FREQUENT_CHECK; +} + +void noinline +ev_timer_start (EV_P_ ev_timer *w) +{ + if (expect_false (ev_is_active (w))) + return; + + ev_at (w) += mn_now; + + assert (("libev: ev_timer_start called with negative timer repeat value", w->repeat >= 0.)); + + EV_FREQUENT_CHECK; + + ++timercnt; + ev_start (EV_A_ (W)w, timercnt + HEAP0 - 1); + array_needsize (ANHE, timers, timermax, ev_active (w) + 1, EMPTY2); + ANHE_w (timers [ev_active (w)]) = (WT)w; + ANHE_at_cache (timers [ev_active (w)]); + upheap (timers, ev_active (w)); + + EV_FREQUENT_CHECK; + + /*assert (("libev: internal timer heap corruption", timers [ev_active (w)] == (WT)w));*/ +} + +void noinline +ev_timer_stop (EV_P_ ev_timer *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + { + int active = ev_active (w); + + assert (("libev: internal timer heap corruption", ANHE_w (timers [active]) == (WT)w)); + + --timercnt; + + if (expect_true (active < timercnt + HEAP0)) + { + timers [active] = timers [timercnt + HEAP0]; + adjustheap (timers, timercnt, active); + } + } + + EV_FREQUENT_CHECK; + + ev_at (w) -= mn_now; + + ev_stop (EV_A_ (W)w); +} + +void noinline +ev_timer_again (EV_P_ ev_timer *w) +{ + EV_FREQUENT_CHECK; + + if (ev_is_active (w)) + { + if (w->repeat) + { + ev_at (w) = mn_now + w->repeat; + ANHE_at_cache (timers [ev_active (w)]); + adjustheap (timers, timercnt, ev_active (w)); + } + else + ev_timer_stop (EV_A_ w); + } + else if (w->repeat) + { + ev_at (w) = w->repeat; + ev_timer_start (EV_A_ w); + } + + EV_FREQUENT_CHECK; +} + +ev_tstamp +ev_timer_remaining (EV_P_ ev_timer *w) +{ + return ev_at (w) - (ev_is_active (w) ? mn_now : 0.); +} + +#if EV_PERIODIC_ENABLE +void noinline +ev_periodic_start (EV_P_ ev_periodic *w) +{ + if (expect_false (ev_is_active (w))) + return; + + if (w->reschedule_cb) + ev_at (w) = w->reschedule_cb (w, ev_rt_now); + else if (w->interval) + { + assert (("libev: ev_periodic_start called with negative interval value", w->interval >= 0.)); + /* this formula differs from the one in periodic_reify because we do not always round up */ + ev_at (w) = w->offset + ceil ((ev_rt_now - w->offset) / w->interval) * w->interval; + } + else + ev_at (w) = w->offset; + + EV_FREQUENT_CHECK; + + ++periodiccnt; + ev_start (EV_A_ (W)w, periodiccnt + HEAP0 - 1); + array_needsize (ANHE, periodics, periodicmax, ev_active (w) + 1, EMPTY2); + ANHE_w (periodics [ev_active (w)]) = (WT)w; + ANHE_at_cache (periodics [ev_active (w)]); + upheap (periodics, ev_active (w)); + + EV_FREQUENT_CHECK; + + /*assert (("libev: internal periodic heap corruption", ANHE_w (periodics [ev_active (w)]) == (WT)w));*/ +} + +void noinline +ev_periodic_stop (EV_P_ ev_periodic *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + { + int active = ev_active (w); + + assert (("libev: internal periodic heap corruption", ANHE_w (periodics [active]) == (WT)w)); + + --periodiccnt; + + if (expect_true (active < periodiccnt + HEAP0)) + { + periodics [active] = periodics [periodiccnt + HEAP0]; + adjustheap (periodics, periodiccnt, active); + } + } + + EV_FREQUENT_CHECK; + + ev_stop (EV_A_ (W)w); +} + +void noinline +ev_periodic_again (EV_P_ ev_periodic *w) +{ + /* TODO: use adjustheap and recalculation */ + ev_periodic_stop (EV_A_ w); + ev_periodic_start (EV_A_ w); +} +#endif + +#ifndef SA_RESTART +# define SA_RESTART 0 +#endif + +void noinline +ev_signal_start (EV_P_ ev_signal *w) +{ + if (expect_false (ev_is_active (w))) + return; + + assert (("libev: ev_signal_start called with illegal signal number", w->signum > 0 && w->signum < EV_NSIG)); + +#if EV_MULTIPLICITY + assert (("libev: a signal must not be attached to two different loops", + !signals [w->signum - 1].loop || signals [w->signum - 1].loop == loop)); + + signals [w->signum - 1].loop = EV_A; +#endif + + EV_FREQUENT_CHECK; + +#if EV_USE_SIGNALFD + if (sigfd == -2) + { + sigfd = signalfd (-1, &sigfd_set, SFD_NONBLOCK | SFD_CLOEXEC); + if (sigfd < 0 && errno == EINVAL) + sigfd = signalfd (-1, &sigfd_set, 0); /* retry without flags */ + + if (sigfd >= 0) + { + fd_intern (sigfd); /* doing it twice will not hurt */ + + sigemptyset (&sigfd_set); + + ev_io_init (&sigfd_w, sigfdcb, sigfd, EV_READ); + ev_set_priority (&sigfd_w, EV_MAXPRI); + ev_io_start (EV_A_ &sigfd_w); + ev_unref (EV_A); /* signalfd watcher should not keep loop alive */ + } + } + + if (sigfd >= 0) + { + /* TODO: check .head */ + sigaddset (&sigfd_set, w->signum); + sigprocmask (SIG_BLOCK, &sigfd_set, 0); + + signalfd (sigfd, &sigfd_set, 0); + } +#endif + + ev_start (EV_A_ (W)w, 1); + wlist_add (&signals [w->signum - 1].head, (WL)w); + + if (!((WL)w)->next) +# if EV_USE_SIGNALFD + if (sigfd < 0) /*TODO*/ +# endif + { +# if _WIN32 + evpipe_init (EV_A); + + signal (w->signum, ev_sighandler); +# else + struct sigaction sa; + + evpipe_init (EV_A); + + sa.sa_handler = ev_sighandler; + sigfillset (&sa.sa_mask); + sa.sa_flags = SA_RESTART; /* if restarting works we save one iteration */ + sigaction (w->signum, &sa, 0); + + sigemptyset (&sa.sa_mask); + sigaddset (&sa.sa_mask, w->signum); + sigprocmask (SIG_UNBLOCK, &sa.sa_mask, 0); +#endif + } + + EV_FREQUENT_CHECK; +} + +void noinline +ev_signal_stop (EV_P_ ev_signal *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + wlist_del (&signals [w->signum - 1].head, (WL)w); + ev_stop (EV_A_ (W)w); + + if (!signals [w->signum - 1].head) + { +#if EV_MULTIPLICITY + signals [w->signum - 1].loop = 0; /* unattach from signal */ +#endif +#if EV_USE_SIGNALFD + if (sigfd >= 0) + { + sigset_t ss; + + sigemptyset (&ss); + sigaddset (&ss, w->signum); + sigdelset (&sigfd_set, w->signum); + + signalfd (sigfd, &sigfd_set, 0); + sigprocmask (SIG_UNBLOCK, &ss, 0); + } + else +#endif + signal (w->signum, SIG_DFL); + } + + EV_FREQUENT_CHECK; +} + +void +ev_child_start (EV_P_ ev_child *w) +{ +#if EV_MULTIPLICITY + assert (("libev: child watchers are only supported in the default loop", loop == ev_default_loop_ptr)); +#endif + if (expect_false (ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + ev_start (EV_A_ (W)w, 1); + wlist_add (&childs [w->pid & (EV_PID_HASHSIZE - 1)], (WL)w); + + EV_FREQUENT_CHECK; +} + +void +ev_child_stop (EV_P_ ev_child *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + wlist_del (&childs [w->pid & (EV_PID_HASHSIZE - 1)], (WL)w); + ev_stop (EV_A_ (W)w); + + EV_FREQUENT_CHECK; +} + +#if EV_STAT_ENABLE + +# ifdef _WIN32 +# undef lstat +# define lstat(a,b) _stati64 (a,b) +# endif + +#define DEF_STAT_INTERVAL 5.0074891 +#define NFS_STAT_INTERVAL 30.1074891 /* for filesystems potentially failing inotify */ +#define MIN_STAT_INTERVAL 0.1074891 + +static void noinline stat_timer_cb (EV_P_ ev_timer *w_, int revents); + +#if EV_USE_INOTIFY +# define EV_INOTIFY_BUFSIZE 8192 + +static void noinline +infy_add (EV_P_ ev_stat *w) +{ + w->wd = inotify_add_watch (fs_fd, w->path, IN_ATTRIB | IN_DELETE_SELF | IN_MOVE_SELF | IN_MODIFY | IN_DONT_FOLLOW | IN_MASK_ADD); + + if (w->wd >= 0) + { + struct statfs sfs; + + /* now local changes will be tracked by inotify, but remote changes won't */ + /* unless the filesystem is known to be local, we therefore still poll */ + /* also do poll on <2.6.25, but with normal frequency */ + + if (!fs_2625) + w->timer.repeat = w->interval ? w->interval : DEF_STAT_INTERVAL; + else if (!statfs (w->path, &sfs) + && (sfs.f_type == 0x1373 /* devfs */ + || sfs.f_type == 0xEF53 /* ext2/3 */ + || sfs.f_type == 0x3153464a /* jfs */ + || sfs.f_type == 0x52654973 /* reiser3 */ + || sfs.f_type == 0x01021994 /* tempfs */ + || sfs.f_type == 0x58465342 /* xfs */)) + w->timer.repeat = 0.; /* filesystem is local, kernel new enough */ + else + w->timer.repeat = w->interval ? w->interval : NFS_STAT_INTERVAL; /* remote, use reduced frequency */ + } + else + { + /* can't use inotify, continue to stat */ + w->timer.repeat = w->interval ? w->interval : DEF_STAT_INTERVAL; + + /* if path is not there, monitor some parent directory for speedup hints */ + /* note that exceeding the hardcoded path limit is not a correctness issue, */ + /* but an efficiency issue only */ + if ((errno == ENOENT || errno == EACCES) && strlen (w->path) < 4096) + { + char path [4096]; + strcpy (path, w->path); + + do + { + int mask = IN_MASK_ADD | IN_DELETE_SELF | IN_MOVE_SELF + | (errno == EACCES ? IN_ATTRIB : IN_CREATE | IN_MOVED_TO); + + char *pend = strrchr (path, '/'); + + if (!pend || pend == path) + break; + + *pend = 0; + w->wd = inotify_add_watch (fs_fd, path, mask); + } + while (w->wd < 0 && (errno == ENOENT || errno == EACCES)); + } + } + + if (w->wd >= 0) + wlist_add (&fs_hash [w->wd & (EV_INOTIFY_HASHSIZE - 1)].head, (WL)w); + + /* now re-arm timer, if required */ + if (ev_is_active (&w->timer)) ev_ref (EV_A); + ev_timer_again (EV_A_ &w->timer); + if (ev_is_active (&w->timer)) ev_unref (EV_A); +} + +static void noinline +infy_del (EV_P_ ev_stat *w) +{ + int slot; + int wd = w->wd; + + if (wd < 0) + return; + + w->wd = -2; + slot = wd & (EV_INOTIFY_HASHSIZE - 1); + wlist_del (&fs_hash [slot].head, (WL)w); + + /* remove this watcher, if others are watching it, they will rearm */ + inotify_rm_watch (fs_fd, wd); +} + +static void noinline +infy_wd (EV_P_ int slot, int wd, struct inotify_event *ev) +{ + if (slot < 0) + /* overflow, need to check for all hash slots */ + for (slot = 0; slot < EV_INOTIFY_HASHSIZE; ++slot) + infy_wd (EV_A_ slot, wd, ev); + else + { + WL w_; + + for (w_ = fs_hash [slot & (EV_INOTIFY_HASHSIZE - 1)].head; w_; ) + { + ev_stat *w = (ev_stat *)w_; + w_ = w_->next; /* lets us remove this watcher and all before it */ + + if (w->wd == wd || wd == -1) + { + if (ev->mask & (IN_IGNORED | IN_UNMOUNT | IN_DELETE_SELF)) + { + wlist_del (&fs_hash [slot & (EV_INOTIFY_HASHSIZE - 1)].head, (WL)w); + w->wd = -1; + infy_add (EV_A_ w); /* re-add, no matter what */ + } + + stat_timer_cb (EV_A_ &w->timer, 0); + } + } + } +} + +static void +infy_cb (EV_P_ ev_io *w, int revents) +{ + char buf [EV_INOTIFY_BUFSIZE]; + struct inotify_event *ev = (struct inotify_event *)buf; + int ofs; + int len = read (fs_fd, buf, sizeof (buf)); + + for (ofs = 0; ofs < len; ofs += sizeof (struct inotify_event) + ev->len) + infy_wd (EV_A_ ev->wd, ev->wd, ev); +} + +inline_size void +check_2625 (EV_P) +{ + /* kernels < 2.6.25 are borked + * http://www.ussg.indiana.edu/hypermail/linux/kernel/0711.3/1208.html + */ + struct utsname buf; + int major, minor, micro; + + if (uname (&buf)) + return; + + if (sscanf (buf.release, "%d.%d.%d", &major, &minor, µ) != 3) + return; + + if (major < 2 + || (major == 2 && minor < 6) + || (major == 2 && minor == 6 && micro < 25)) + return; + + fs_2625 = 1; +} + +inline_size int +infy_newfd (void) +{ +#if defined (IN_CLOEXEC) && defined (IN_NONBLOCK) + int fd = inotify_init1 (IN_CLOEXEC | IN_NONBLOCK); + if (fd >= 0) + return fd; +#endif + return inotify_init (); +} + +inline_size void +infy_init (EV_P) +{ + if (fs_fd != -2) + return; + + fs_fd = -1; + + check_2625 (EV_A); + + fs_fd = infy_newfd (); + + if (fs_fd >= 0) + { + fd_intern (fs_fd); + ev_io_init (&fs_w, infy_cb, fs_fd, EV_READ); + ev_set_priority (&fs_w, EV_MAXPRI); + ev_io_start (EV_A_ &fs_w); + ev_unref (EV_A); + } +} + +inline_size void +infy_fork (EV_P) +{ + int slot; + + if (fs_fd < 0) + return; + + ev_ref (EV_A); + ev_io_stop (EV_A_ &fs_w); + close (fs_fd); + fs_fd = infy_newfd (); + + if (fs_fd >= 0) + { + fd_intern (fs_fd); + ev_io_set (&fs_w, fs_fd, EV_READ); + ev_io_start (EV_A_ &fs_w); + ev_unref (EV_A); + } + + for (slot = 0; slot < EV_INOTIFY_HASHSIZE; ++slot) + { + WL w_ = fs_hash [slot].head; + fs_hash [slot].head = 0; + + while (w_) + { + ev_stat *w = (ev_stat *)w_; + w_ = w_->next; /* lets us add this watcher */ + + w->wd = -1; + + if (fs_fd >= 0) + infy_add (EV_A_ w); /* re-add, no matter what */ + else + { + w->timer.repeat = w->interval ? w->interval : DEF_STAT_INTERVAL; + if (ev_is_active (&w->timer)) ev_ref (EV_A); + ev_timer_again (EV_A_ &w->timer); + if (ev_is_active (&w->timer)) ev_unref (EV_A); + } + } + } +} + +#endif + +#ifdef _WIN32 +# define EV_LSTAT(p,b) _stati64 (p, b) +#else +# define EV_LSTAT(p,b) lstat (p, b) +#endif + +void +ev_stat_stat (EV_P_ ev_stat *w) +{ + if (lstat (w->path, &w->attr) < 0) + w->attr.st_nlink = 0; + else if (!w->attr.st_nlink) + w->attr.st_nlink = 1; +} + +static void noinline +stat_timer_cb (EV_P_ ev_timer *w_, int revents) +{ + ev_stat *w = (ev_stat *)(((char *)w_) - offsetof (ev_stat, timer)); + + ev_statdata prev = w->attr; + ev_stat_stat (EV_A_ w); + + /* memcmp doesn't work on netbsd, they.... do stuff to their struct stat */ + if ( + prev.st_dev != w->attr.st_dev + || prev.st_ino != w->attr.st_ino + || prev.st_mode != w->attr.st_mode + || prev.st_nlink != w->attr.st_nlink + || prev.st_uid != w->attr.st_uid + || prev.st_gid != w->attr.st_gid + || prev.st_rdev != w->attr.st_rdev + || prev.st_size != w->attr.st_size + || prev.st_atime != w->attr.st_atime + || prev.st_mtime != w->attr.st_mtime + || prev.st_ctime != w->attr.st_ctime + ) { + /* we only update w->prev on actual differences */ + /* in case we test more often than invoke the callback, */ + /* to ensure that prev is always different to attr */ + w->prev = prev; + + #if EV_USE_INOTIFY + if (fs_fd >= 0) + { + infy_del (EV_A_ w); + infy_add (EV_A_ w); + ev_stat_stat (EV_A_ w); /* avoid race... */ + } + #endif + + ev_feed_event (EV_A_ w, EV_STAT); + } +} + +void +ev_stat_start (EV_P_ ev_stat *w) +{ + if (expect_false (ev_is_active (w))) + return; + + ev_stat_stat (EV_A_ w); + + if (w->interval < MIN_STAT_INTERVAL && w->interval) + w->interval = MIN_STAT_INTERVAL; + + ev_timer_init (&w->timer, stat_timer_cb, 0., w->interval ? w->interval : DEF_STAT_INTERVAL); + ev_set_priority (&w->timer, ev_priority (w)); + +#if EV_USE_INOTIFY + infy_init (EV_A); + + if (fs_fd >= 0) + infy_add (EV_A_ w); + else +#endif + { + ev_timer_again (EV_A_ &w->timer); + ev_unref (EV_A); + } + + ev_start (EV_A_ (W)w, 1); + + EV_FREQUENT_CHECK; +} + +void +ev_stat_stop (EV_P_ ev_stat *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + +#if EV_USE_INOTIFY + infy_del (EV_A_ w); +#endif + + if (ev_is_active (&w->timer)) + { + ev_ref (EV_A); + ev_timer_stop (EV_A_ &w->timer); + } + + ev_stop (EV_A_ (W)w); + + EV_FREQUENT_CHECK; +} +#endif + +#if EV_IDLE_ENABLE +void +ev_idle_start (EV_P_ ev_idle *w) +{ + if (expect_false (ev_is_active (w))) + return; + + pri_adjust (EV_A_ (W)w); + + EV_FREQUENT_CHECK; + + { + int active = ++idlecnt [ABSPRI (w)]; + + ++idleall; + ev_start (EV_A_ (W)w, active); + + array_needsize (ev_idle *, idles [ABSPRI (w)], idlemax [ABSPRI (w)], active, EMPTY2); + idles [ABSPRI (w)][active - 1] = w; + } + + EV_FREQUENT_CHECK; +} + +void +ev_idle_stop (EV_P_ ev_idle *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + { + int active = ev_active (w); + + idles [ABSPRI (w)][active - 1] = idles [ABSPRI (w)][--idlecnt [ABSPRI (w)]]; + ev_active (idles [ABSPRI (w)][active - 1]) = active; + + ev_stop (EV_A_ (W)w); + --idleall; + } + + EV_FREQUENT_CHECK; +} +#endif + +void +ev_prepare_start (EV_P_ ev_prepare *w) +{ + if (expect_false (ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + ev_start (EV_A_ (W)w, ++preparecnt); + array_needsize (ev_prepare *, prepares, preparemax, preparecnt, EMPTY2); + prepares [preparecnt - 1] = w; + + EV_FREQUENT_CHECK; +} + +void +ev_prepare_stop (EV_P_ ev_prepare *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + { + int active = ev_active (w); + + prepares [active - 1] = prepares [--preparecnt]; + ev_active (prepares [active - 1]) = active; + } + + ev_stop (EV_A_ (W)w); + + EV_FREQUENT_CHECK; +} + +void +ev_check_start (EV_P_ ev_check *w) +{ + if (expect_false (ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + ev_start (EV_A_ (W)w, ++checkcnt); + array_needsize (ev_check *, checks, checkmax, checkcnt, EMPTY2); + checks [checkcnt - 1] = w; + + EV_FREQUENT_CHECK; +} + +void +ev_check_stop (EV_P_ ev_check *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + { + int active = ev_active (w); + + checks [active - 1] = checks [--checkcnt]; + ev_active (checks [active - 1]) = active; + } + + ev_stop (EV_A_ (W)w); + + EV_FREQUENT_CHECK; +} + +#if EV_EMBED_ENABLE +void noinline +ev_embed_sweep (EV_P_ ev_embed *w) +{ + ev_loop (w->other, EVLOOP_NONBLOCK); +} + +static void +embed_io_cb (EV_P_ ev_io *io, int revents) +{ + ev_embed *w = (ev_embed *)(((char *)io) - offsetof (ev_embed, io)); + + if (ev_cb (w)) + ev_feed_event (EV_A_ (W)w, EV_EMBED); + else + ev_loop (w->other, EVLOOP_NONBLOCK); +} + +static void +embed_prepare_cb (EV_P_ ev_prepare *prepare, int revents) +{ + ev_embed *w = (ev_embed *)(((char *)prepare) - offsetof (ev_embed, prepare)); + + { + EV_P = w->other; + + while (fdchangecnt) + { + fd_reify (EV_A); + ev_loop (EV_A_ EVLOOP_NONBLOCK); + } + } +} + +static void +embed_fork_cb (EV_P_ ev_fork *fork_w, int revents) +{ + ev_embed *w = (ev_embed *)(((char *)fork_w) - offsetof (ev_embed, fork)); + + ev_embed_stop (EV_A_ w); + + { + EV_P = w->other; + + ev_loop_fork (EV_A); + ev_loop (EV_A_ EVLOOP_NONBLOCK); + } + + ev_embed_start (EV_A_ w); +} + +#if 0 +static void +embed_idle_cb (EV_P_ ev_idle *idle, int revents) +{ + ev_idle_stop (EV_A_ idle); +} +#endif + +void +ev_embed_start (EV_P_ ev_embed *w) +{ + if (expect_false (ev_is_active (w))) + return; + + { + EV_P = w->other; + assert (("libev: loop to be embedded is not embeddable", backend & ev_embeddable_backends ())); + ev_io_init (&w->io, embed_io_cb, backend_fd, EV_READ); + } + + EV_FREQUENT_CHECK; + + ev_set_priority (&w->io, ev_priority (w)); + ev_io_start (EV_A_ &w->io); + + ev_prepare_init (&w->prepare, embed_prepare_cb); + ev_set_priority (&w->prepare, EV_MINPRI); + ev_prepare_start (EV_A_ &w->prepare); + + ev_fork_init (&w->fork, embed_fork_cb); + ev_fork_start (EV_A_ &w->fork); + + /*ev_idle_init (&w->idle, e,bed_idle_cb);*/ + + ev_start (EV_A_ (W)w, 1); + + EV_FREQUENT_CHECK; +} + +void +ev_embed_stop (EV_P_ ev_embed *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + ev_io_stop (EV_A_ &w->io); + ev_prepare_stop (EV_A_ &w->prepare); + ev_fork_stop (EV_A_ &w->fork); + + EV_FREQUENT_CHECK; +} +#endif + +#if EV_FORK_ENABLE +void +ev_fork_start (EV_P_ ev_fork *w) +{ + if (expect_false (ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + ev_start (EV_A_ (W)w, ++forkcnt); + array_needsize (ev_fork *, forks, forkmax, forkcnt, EMPTY2); + forks [forkcnt - 1] = w; + + EV_FREQUENT_CHECK; +} + +void +ev_fork_stop (EV_P_ ev_fork *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + { + int active = ev_active (w); + + forks [active - 1] = forks [--forkcnt]; + ev_active (forks [active - 1]) = active; + } + + ev_stop (EV_A_ (W)w); + + EV_FREQUENT_CHECK; +} +#endif + +#if EV_ASYNC_ENABLE +void +ev_async_start (EV_P_ ev_async *w) +{ + if (expect_false (ev_is_active (w))) + return; + + evpipe_init (EV_A); + + EV_FREQUENT_CHECK; + + ev_start (EV_A_ (W)w, ++asynccnt); + array_needsize (ev_async *, asyncs, asyncmax, asynccnt, EMPTY2); + asyncs [asynccnt - 1] = w; + + EV_FREQUENT_CHECK; +} + +void +ev_async_stop (EV_P_ ev_async *w) +{ + clear_pending (EV_A_ (W)w); + if (expect_false (!ev_is_active (w))) + return; + + EV_FREQUENT_CHECK; + + { + int active = ev_active (w); + + asyncs [active - 1] = asyncs [--asynccnt]; + ev_active (asyncs [active - 1]) = active; + } + + ev_stop (EV_A_ (W)w); + + EV_FREQUENT_CHECK; +} + +void +ev_async_send (EV_P_ ev_async *w) +{ + w->sent = 1; + evpipe_write (EV_A_ &async_pending); +} +#endif + +/*****************************************************************************/ + +struct ev_once +{ + ev_io io; + ev_timer to; + void (*cb)(int revents, void *arg); + void *arg; +}; + +static void +once_cb (EV_P_ struct ev_once *once, int revents) +{ + void (*cb)(int revents, void *arg) = once->cb; + void *arg = once->arg; + + ev_io_stop (EV_A_ &once->io); + ev_timer_stop (EV_A_ &once->to); + ev_free (once); + + cb (revents, arg); +} + +static void +once_cb_io (EV_P_ ev_io *w, int revents) +{ + struct ev_once *once = (struct ev_once *)(((char *)w) - offsetof (struct ev_once, io)); + + once_cb (EV_A_ once, revents | ev_clear_pending (EV_A_ &once->to)); +} + +static void +once_cb_to (EV_P_ ev_timer *w, int revents) +{ + struct ev_once *once = (struct ev_once *)(((char *)w) - offsetof (struct ev_once, to)); + + once_cb (EV_A_ once, revents | ev_clear_pending (EV_A_ &once->io)); +} + +void +ev_once (EV_P_ int fd, int events, ev_tstamp timeout, void (*cb)(int revents, void *arg), void *arg) +{ + struct ev_once *once = (struct ev_once *)ev_malloc (sizeof (struct ev_once)); + + if (expect_false (!once)) + { + cb (EV_ERROR | EV_READ | EV_WRITE | EV_TIMEOUT, arg); + return; + } + + once->cb = cb; + once->arg = arg; + + ev_init (&once->io, once_cb_io); + if (fd >= 0) + { + ev_io_set (&once->io, fd, events); + ev_io_start (EV_A_ &once->io); + } + + ev_init (&once->to, once_cb_to); + if (timeout >= 0.) + { + ev_timer_set (&once->to, timeout, 0.); + ev_timer_start (EV_A_ &once->to); + } +} + +/*****************************************************************************/ + +#if EV_WALK_ENABLE +void +ev_walk (EV_P_ int types, void (*cb)(EV_P_ int type, void *w)) +{ + int i, j; + ev_watcher_list *wl, *wn; + + if (types & (EV_IO | EV_EMBED)) + for (i = 0; i < anfdmax; ++i) + for (wl = anfds [i].head; wl; ) + { + wn = wl->next; + +#if EV_EMBED_ENABLE + if (ev_cb ((ev_io *)wl) == embed_io_cb) + { + if (types & EV_EMBED) + cb (EV_A_ EV_EMBED, ((char *)wl) - offsetof (struct ev_embed, io)); + } + else +#endif +#if EV_USE_INOTIFY + if (ev_cb ((ev_io *)wl) == infy_cb) + ; + else +#endif + if ((ev_io *)wl != &pipe_w) + if (types & EV_IO) + cb (EV_A_ EV_IO, wl); + + wl = wn; + } + + if (types & (EV_TIMER | EV_STAT)) + for (i = timercnt + HEAP0; i-- > HEAP0; ) +#if EV_STAT_ENABLE + /*TODO: timer is not always active*/ + if (ev_cb ((ev_timer *)ANHE_w (timers [i])) == stat_timer_cb) + { + if (types & EV_STAT) + cb (EV_A_ EV_STAT, ((char *)ANHE_w (timers [i])) - offsetof (struct ev_stat, timer)); + } + else +#endif + if (types & EV_TIMER) + cb (EV_A_ EV_TIMER, ANHE_w (timers [i])); + +#if EV_PERIODIC_ENABLE + if (types & EV_PERIODIC) + for (i = periodiccnt + HEAP0; i-- > HEAP0; ) + cb (EV_A_ EV_PERIODIC, ANHE_w (periodics [i])); +#endif + +#if EV_IDLE_ENABLE + if (types & EV_IDLE) + for (j = NUMPRI; i--; ) + for (i = idlecnt [j]; i--; ) + cb (EV_A_ EV_IDLE, idles [j][i]); +#endif + +#if EV_FORK_ENABLE + if (types & EV_FORK) + for (i = forkcnt; i--; ) + if (ev_cb (forks [i]) != embed_fork_cb) + cb (EV_A_ EV_FORK, forks [i]); +#endif + +#if EV_ASYNC_ENABLE + if (types & EV_ASYNC) + for (i = asynccnt; i--; ) + cb (EV_A_ EV_ASYNC, asyncs [i]); +#endif + + if (types & EV_PREPARE) + for (i = preparecnt; i--; ) +#if EV_EMBED_ENABLE + if (ev_cb (prepares [i]) != embed_prepare_cb) +#endif + cb (EV_A_ EV_PREPARE, prepares [i]); + + if (types & EV_CHECK) + for (i = checkcnt; i--; ) + cb (EV_A_ EV_CHECK, checks [i]); + + if (types & EV_SIGNAL) + for (i = 0; i < EV_NSIG - 1; ++i) + for (wl = signals [i].head; wl; ) + { + wn = wl->next; + cb (EV_A_ EV_SIGNAL, wl); + wl = wn; + } + + if (types & EV_CHILD) + for (i = EV_PID_HASHSIZE; i--; ) + for (wl = childs [i]; wl; ) + { + wn = wl->next; + cb (EV_A_ EV_CHILD, wl); + wl = wn; + } +/* EV_STAT 0x00001000 /* stat data changed */ +/* EV_EMBED 0x00010000 /* embedded event loop needs sweep */ +} +#endif + +#if EV_MULTIPLICITY + #include "ev_wrap.h" +#endif + +#ifdef __cplusplus +} +#endif + diff --git a/libev/ev.h b/libev/ev.h new file mode 100644 index 0000000..c7c44ff --- /dev/null +++ b/libev/ev.h @@ -0,0 +1,705 @@ +/* + * libev native API header + * + * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libev@schmorp.de> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +#ifndef EV_H_ +#define EV_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef double ev_tstamp; + +/* these priorities are inclusive, higher priorities will be called earlier */ +#ifndef EV_MINPRI +# define EV_MINPRI -2 +#endif +#ifndef EV_MAXPRI +# define EV_MAXPRI +2 +#endif + +#ifndef EV_MULTIPLICITY +# define EV_MULTIPLICITY 1 +#endif + +#ifndef EV_PERIODIC_ENABLE +# define EV_PERIODIC_ENABLE 1 +#endif + +#ifndef EV_STAT_ENABLE +# define EV_STAT_ENABLE 1 +#endif + +#ifndef EV_IDLE_ENABLE +# define EV_IDLE_ENABLE 1 +#endif + +#ifndef EV_FORK_ENABLE +# define EV_FORK_ENABLE 1 +#endif + +#ifndef EV_EMBED_ENABLE +# define EV_EMBED_ENABLE 1 +#endif + +#ifndef EV_ASYNC_ENABLE +# define EV_ASYNC_ENABLE 1 +#endif + +#ifndef EV_WALK_ENABLE +# define EV_WALK_ENABLE 0 /* not yet */ +#endif + +#ifndef EV_ATOMIC_T +# include <signal.h> +# define EV_ATOMIC_T sig_atomic_t volatile +#endif + +/*****************************************************************************/ + +#if EV_STAT_ENABLE +# ifdef _WIN32 +# include <time.h> +# include <sys/types.h> +# endif +# include <sys/stat.h> +#endif + +/* support multiple event loops? */ +#if EV_MULTIPLICITY +struct ev_loop; +# define EV_P struct ev_loop *loop +# define EV_P_ EV_P, +# define EV_A loop +# define EV_A_ EV_A, +# define EV_DEFAULT_UC ev_default_loop_uc () +# define EV_DEFAULT_UC_ EV_DEFAULT_UC, +# define EV_DEFAULT ev_default_loop (0) +# define EV_DEFAULT_ EV_DEFAULT, +#else +# define EV_P void +# define EV_P_ +# define EV_A +# define EV_A_ +# define EV_DEFAULT +# define EV_DEFAULT_ +# define EV_DEFAULT_UC +# define EV_DEFAULT_UC_ +# undef EV_EMBED_ENABLE +#endif + +#if __STDC_VERSION__ >= 199901L || __GNUC__ >= 3 +# define EV_INLINE static inline +#else +# define EV_INLINE static +#endif + +/*****************************************************************************/ + +/* eventmask, revents, events... */ +#define EV_UNDEF -1 /* guaranteed to be invalid */ +#define EV_NONE 0x00 /* no events */ +#define EV_READ 0x01 /* ev_io detected read will not block */ +#define EV_WRITE 0x02 /* ev_io detected write will not block */ +#define EV__IOFDSET 0x80 /* internal use only */ +#define EV_IO EV_READ /* alias for type-detection */ +#define EV_TIMEOUT 0x00000100 /* timer timed out */ +#define EV_TIMER EV_TIMEOUT /* alias for type-detection */ +#define EV_PERIODIC 0x00000200 /* periodic timer timed out */ +#define EV_SIGNAL 0x00000400 /* signal was received */ +#define EV_CHILD 0x00000800 /* child/pid had status change */ +#define EV_STAT 0x00001000 /* stat data changed */ +#define EV_IDLE 0x00002000 /* event loop is idling */ +#define EV_PREPARE 0x00004000 /* event loop about to poll */ +#define EV_CHECK 0x00008000 /* event loop finished poll */ +#define EV_EMBED 0x00010000 /* embedded event loop needs sweep */ +#define EV_FORK 0x00020000 /* event loop resumed in child */ +#define EV_ASYNC 0x00040000 /* async intra-loop signal */ +#define EV_CUSTOM 0x01000000 /* for use by user code */ +#define EV_ERROR 0x80000000 /* sent when an error occurs */ + +/* can be used to add custom fields to all watchers, while losing binary compatibility */ +#ifndef EV_COMMON +# define EV_COMMON void *data; +#endif +#ifndef EV_PROTOTYPES +# define EV_PROTOTYPES 1 +#endif + +#define EV_VERSION_MAJOR 3 +#define EV_VERSION_MINOR 9 + +#ifndef EV_CB_DECLARE +# define EV_CB_DECLARE(type) void (*cb)(EV_P_ struct type *w, int revents); +#endif +#ifndef EV_CB_INVOKE +# define EV_CB_INVOKE(watcher,revents) (watcher)->cb (EV_A_ (watcher), (revents)) +#endif + +/* + * struct member types: + * private: you may look at them, but not change them, + * and they might not mean anything to you. + * ro: can be read anytime, but only changed when the watcher isn't active. + * rw: can be read and modified anytime, even when the watcher is active. + * + * some internal details that might be helpful for debugging: + * + * active is either 0, which means the watcher is not active, + * or the array index of the watcher (periodics, timers) + * or the array index + 1 (most other watchers) + * or simply 1 for watchers that aren't in some array. + * pending is either 0, in which case the watcher isn't, + * or the array index + 1 in the pendings array. + */ + +#if EV_MINPRI == EV_MAXPRI +# define EV_DECL_PRIORITY +#else +# define EV_DECL_PRIORITY int priority; +#endif + +/* shared by all watchers */ +#define EV_WATCHER(type) \ + int active; /* private */ \ + int pending; /* private */ \ + EV_DECL_PRIORITY /* private */ \ + EV_COMMON /* rw */ \ + EV_CB_DECLARE (type) /* private */ + +#define EV_WATCHER_LIST(type) \ + EV_WATCHER (type) \ + struct ev_watcher_list *next; /* private */ + +#define EV_WATCHER_TIME(type) \ + EV_WATCHER (type) \ + ev_tstamp at; /* private */ + +/* base class, nothing to see here unless you subclass */ +typedef struct ev_watcher +{ + EV_WATCHER (ev_watcher) +} ev_watcher; + +/* base class, nothing to see here unless you subclass */ +typedef struct ev_watcher_list +{ + EV_WATCHER_LIST (ev_watcher_list) +} ev_watcher_list; + +/* base class, nothing to see here unless you subclass */ +typedef struct ev_watcher_time +{ + EV_WATCHER_TIME (ev_watcher_time) +} ev_watcher_time; + +/* invoked when fd is either EV_READable or EV_WRITEable */ +/* revent EV_READ, EV_WRITE */ +typedef struct ev_io +{ + EV_WATCHER_LIST (ev_io) + + int fd; /* ro */ + int events; /* ro */ +} ev_io; + +/* invoked after a specific time, repeatable (based on monotonic clock) */ +/* revent EV_TIMEOUT */ +typedef struct ev_timer +{ + EV_WATCHER_TIME (ev_timer) + + ev_tstamp repeat; /* rw */ +} ev_timer; + +/* invoked at some specific time, possibly repeating at regular intervals (based on UTC) */ +/* revent EV_PERIODIC */ +typedef struct ev_periodic +{ + EV_WATCHER_TIME (ev_periodic) + + ev_tstamp offset; /* rw */ + ev_tstamp interval; /* rw */ + ev_tstamp (*reschedule_cb)(struct ev_periodic *w, ev_tstamp now); /* rw */ +} ev_periodic; + +/* invoked when the given signal has been received */ +/* revent EV_SIGNAL */ +typedef struct ev_signal +{ + EV_WATCHER_LIST (ev_signal) + + int signum; /* ro */ +} ev_signal; + +/* invoked when sigchld is received and waitpid indicates the given pid */ +/* revent EV_CHILD */ +/* does not support priorities */ +typedef struct ev_child +{ + EV_WATCHER_LIST (ev_child) + + int flags; /* private */ + int pid; /* ro */ + int rpid; /* rw, holds the received pid */ + int rstatus; /* rw, holds the exit status, use the macros from sys/wait.h */ +} ev_child; + +#if EV_STAT_ENABLE +/* st_nlink = 0 means missing file or other error */ +# ifdef _WIN32 +typedef struct _stati64 ev_statdata; +# else +typedef struct stat ev_statdata; +# endif + +/* invoked each time the stat data changes for a given path */ +/* revent EV_STAT */ +typedef struct ev_stat +{ + EV_WATCHER_LIST (ev_stat) + + ev_timer timer; /* private */ + ev_tstamp interval; /* ro */ + const char *path; /* ro */ + ev_statdata prev; /* ro */ + ev_statdata attr; /* ro */ + + int wd; /* wd for inotify, fd for kqueue */ +} ev_stat; +#endif + +#if EV_IDLE_ENABLE +/* invoked when the nothing else needs to be done, keeps the process from blocking */ +/* revent EV_IDLE */ +typedef struct ev_idle +{ + EV_WATCHER (ev_idle) +} ev_idle; +#endif + +/* invoked for each run of the mainloop, just before the blocking call */ +/* you can still change events in any way you like */ +/* revent EV_PREPARE */ +typedef struct ev_prepare +{ + EV_WATCHER (ev_prepare) +} ev_prepare; + +/* invoked for each run of the mainloop, just after the blocking call */ +/* revent EV_CHECK */ +typedef struct ev_check +{ + EV_WATCHER (ev_check) +} ev_check; + +#if EV_FORK_ENABLE +/* the callback gets invoked before check in the child process when a fork was detected */ +typedef struct ev_fork +{ + EV_WATCHER (ev_fork) +} ev_fork; +#endif + +#if EV_EMBED_ENABLE +/* used to embed an event loop inside another */ +/* the callback gets invoked when the event loop has handled events, and can be 0 */ +typedef struct ev_embed +{ + EV_WATCHER (ev_embed) + + struct ev_loop *other; /* ro */ + ev_io io; /* private */ + ev_prepare prepare; /* private */ + ev_check check; /* unused */ + ev_timer timer; /* unused */ + ev_periodic periodic; /* unused */ + ev_idle idle; /* unused */ + ev_fork fork; /* private */ +} ev_embed; +#endif + +#if EV_ASYNC_ENABLE +/* invoked when somebody calls ev_async_send on the watcher */ +/* revent EV_ASYNC */ +typedef struct ev_async +{ + EV_WATCHER (ev_async) + + EV_ATOMIC_T sent; /* private */ +} ev_async; + +# define ev_async_pending(w) (+(w)->sent) +#endif + +/* the presence of this union forces similar struct layout */ +union ev_any_watcher +{ + struct ev_watcher w; + struct ev_watcher_list wl; + + struct ev_io io; + struct ev_timer timer; + struct ev_periodic periodic; + struct ev_signal signal; + struct ev_child child; +#if EV_STAT_ENABLE + struct ev_stat stat; +#endif +#if EV_IDLE_ENABLE + struct ev_idle idle; +#endif + struct ev_prepare prepare; + struct ev_check check; +#if EV_FORK_ENABLE + struct ev_fork fork; +#endif +#if EV_EMBED_ENABLE + struct ev_embed embed; +#endif +#if EV_ASYNC_ENABLE + struct ev_async async; +#endif +}; + +/* bits for ev_default_loop and ev_loop_new */ +/* the default */ +#define EVFLAG_AUTO 0x00000000U /* not quite a mask */ +/* flag bits */ +#define EVFLAG_NOENV 0x01000000U /* do NOT consult environment */ +#define EVFLAG_FORKCHECK 0x02000000U /* check for a fork in each iteration */ +/* debugging/feature disable */ +#define EVFLAG_NOINOTIFY 0x00100000U /* do not attempt to use inotify */ +#define EVFLAG_NOSIGFD 0 /* compatibility to pre-3.9 */ +#define EVFLAG_SIGNALFD 0x00200000U /* attempt to use signalfd */ +/* method bits to be ored together */ +#define EVBACKEND_SELECT 0x00000001U /* about anywhere */ +#define EVBACKEND_POLL 0x00000002U /* !win */ +#define EVBACKEND_EPOLL 0x00000004U /* linux */ +#define EVBACKEND_KQUEUE 0x00000008U /* bsd */ +#define EVBACKEND_DEVPOLL 0x00000010U /* solaris 8 */ /* NYI */ +#define EVBACKEND_PORT 0x00000020U /* solaris 10 */ +#define EVBACKEND_ALL 0x0000003FU + +#if EV_PROTOTYPES +int ev_version_major (void); +int ev_version_minor (void); + +unsigned int ev_supported_backends (void); +unsigned int ev_recommended_backends (void); +unsigned int ev_embeddable_backends (void); + +ev_tstamp ev_time (void); +void ev_sleep (ev_tstamp delay); /* sleep for a while */ + +/* Sets the allocation function to use, works like realloc. + * It is used to allocate and free memory. + * If it returns zero when memory needs to be allocated, the library might abort + * or take some potentially destructive action. + * The default is your system realloc function. + */ +void ev_set_allocator (void *(*cb)(void *ptr, long size)); + +/* set the callback function to call on a + * retryable syscall error + * (such as failed select, poll, epoll_wait) + */ +void ev_set_syserr_cb (void (*cb)(const char *msg)); + +#if EV_MULTIPLICITY +EV_INLINE struct ev_loop * +ev_default_loop_uc (void) +{ + extern struct ev_loop *ev_default_loop_ptr; + + return ev_default_loop_ptr; +} + +/* the default loop is the only one that handles signals and child watchers */ +/* you can call this as often as you like */ +EV_INLINE struct ev_loop * +ev_default_loop (unsigned int flags) +{ + struct ev_loop *loop = ev_default_loop_uc (); + + if (!loop) + { + extern struct ev_loop *ev_default_loop_init (unsigned int flags); + + loop = ev_default_loop_init (flags); + } + + return loop; +} + +/* create and destroy alternative loops that don't handle signals */ +struct ev_loop *ev_loop_new (unsigned int flags); +void ev_loop_destroy (EV_P); +void ev_loop_fork (EV_P); + +ev_tstamp ev_now (EV_P); /* time w.r.t. timers and the eventloop, updated after each poll */ + +#else + +int ev_default_loop (unsigned int flags); /* returns true when successful */ + +EV_INLINE ev_tstamp +ev_now (void) +{ + extern ev_tstamp ev_rt_now; + + return ev_rt_now; +} +#endif /* multiplicity */ + +EV_INLINE int +ev_is_default_loop (EV_P) +{ +#if EV_MULTIPLICITY + extern struct ev_loop *ev_default_loop_ptr; + + return !!(EV_A == ev_default_loop_ptr); +#else + return 1; +#endif +} + +void ev_default_destroy (void); /* destroy the default loop */ +/* this needs to be called after fork, to duplicate the default loop */ +/* if you create alternative loops you have to call ev_loop_fork on them */ +/* you can call it in either the parent or the child */ +/* you can actually call it at any time, anywhere :) */ +void ev_default_fork (void); + +unsigned int ev_backend (EV_P); /* backend in use by loop */ + +void ev_now_update (EV_P); /* update event loop time */ + +#if EV_WALK_ENABLE +/* walk (almost) all watchers in the loop of a given type, invoking the */ +/* callback on every such watcher. The callback might stop the watcher, */ +/* but do nothing else with the loop */ +void ev_walk (EV_P_ int types, void (*cb)(EV_P_ int type, void *w)); +#endif + +#endif /* prototypes */ + +#define EVLOOP_NONBLOCK 1 /* do not block/wait */ +#define EVLOOP_ONESHOT 2 /* block *once* only */ +#define EVUNLOOP_CANCEL 0 /* undo unloop */ +#define EVUNLOOP_ONE 1 /* unloop once */ +#define EVUNLOOP_ALL 2 /* unloop all loops */ + +#if EV_PROTOTYPES +void ev_loop (EV_P_ int flags); +void ev_unloop (EV_P_ int how); /* set to 1 to break out of event loop, set to 2 to break out of all event loops */ + +/* + * ref/unref can be used to add or remove a refcount on the mainloop. every watcher + * keeps one reference. if you have a long-running watcher you never unregister that + * should not keep ev_loop from running, unref() after starting, and ref() before stopping. + */ +void ev_ref (EV_P); +void ev_unref (EV_P); + +/* + * convenience function, wait for a single event, without registering an event watcher + * if timeout is < 0, do wait indefinitely + */ +void ev_once (EV_P_ int fd, int events, ev_tstamp timeout, void (*cb)(int revents, void *arg), void *arg); + +# if EV_MINIMAL < 2 +unsigned int ev_loop_count (EV_P); /* number of loop iterations */ +unsigned int ev_loop_depth (EV_P); /* #ev_loop enters - #ev_loop leaves */ +void ev_loop_verify (EV_P); /* abort if loop data corrupted */ + +void ev_set_io_collect_interval (EV_P_ ev_tstamp interval); /* sleep at least this time, default 0 */ +void ev_set_timeout_collect_interval (EV_P_ ev_tstamp interval); /* sleep at least this time, default 0 */ + +/* advanced stuff for threading etc. support, see docs */ +void ev_set_userdata (EV_P_ void *data); +void *ev_userdata (EV_P); +void ev_set_invoke_pending_cb (EV_P_ void (*invoke_pending_cb)(EV_P)); +void ev_set_loop_release_cb (EV_P_ void (*release)(EV_P), void (*acquire)(EV_P)); + +unsigned int ev_pending_count (EV_P); /* number of pending events, if any */ +void ev_invoke_pending (EV_P); /* invoke all pending watchers */ + +/* + * stop/start the timer handling. + */ +void ev_suspend (EV_P); +void ev_resume (EV_P); +#endif + +#endif + +/* these may evaluate ev multiple times, and the other arguments at most once */ +/* either use ev_init + ev_TYPE_set, or the ev_TYPE_init macro, below, to first initialise a watcher */ +#define ev_init(ev,cb_) do { \ + ((ev_watcher *)(void *)(ev))->active = \ + ((ev_watcher *)(void *)(ev))->pending = 0; \ + ev_set_priority ((ev), 0); \ + ev_set_cb ((ev), cb_); \ +} while (0) + +#define ev_io_set(ev,fd_,events_) do { (ev)->fd = (fd_); (ev)->events = (events_) | EV__IOFDSET; } while (0) +#define ev_timer_set(ev,after_,repeat_) do { ((ev_watcher_time *)(ev))->at = (after_); (ev)->repeat = (repeat_); } while (0) +#define ev_periodic_set(ev,ofs_,ival_,rcb_) do { (ev)->offset = (ofs_); (ev)->interval = (ival_); (ev)->reschedule_cb = (rcb_); } while (0) +#define ev_signal_set(ev,signum_) do { (ev)->signum = (signum_); } while (0) +#define ev_child_set(ev,pid_,trace_) do { (ev)->pid = (pid_); (ev)->flags = !!(trace_); } while (0) +#define ev_stat_set(ev,path_,interval_) do { (ev)->path = (path_); (ev)->interval = (interval_); (ev)->wd = -2; } while (0) +#define ev_idle_set(ev) /* nop, yes, this is a serious in-joke */ +#define ev_prepare_set(ev) /* nop, yes, this is a serious in-joke */ +#define ev_check_set(ev) /* nop, yes, this is a serious in-joke */ +#define ev_embed_set(ev,other_) do { (ev)->other = (other_); } while (0) +#define ev_fork_set(ev) /* nop, yes, this is a serious in-joke */ +#define ev_async_set(ev) do { (ev)->sent = 0; } while (0) + +#define ev_io_init(ev,cb,fd,events) do { ev_init ((ev), (cb)); ev_io_set ((ev),(fd),(events)); } while (0) +#define ev_timer_init(ev,cb,after,repeat) do { ev_init ((ev), (cb)); ev_timer_set ((ev),(after),(repeat)); } while (0) +#define ev_periodic_init(ev,cb,ofs,ival,rcb) do { ev_init ((ev), (cb)); ev_periodic_set ((ev),(ofs),(ival),(rcb)); } while (0) +#define ev_signal_init(ev,cb,signum) do { ev_init ((ev), (cb)); ev_signal_set ((ev), (signum)); } while (0) +#define ev_child_init(ev,cb,pid,trace) do { ev_init ((ev), (cb)); ev_child_set ((ev),(pid),(trace)); } while (0) +#define ev_stat_init(ev,cb,path,interval) do { ev_init ((ev), (cb)); ev_stat_set ((ev),(path),(interval)); } while (0) +#define ev_idle_init(ev,cb) do { ev_init ((ev), (cb)); ev_idle_set ((ev)); } while (0) +#define ev_prepare_init(ev,cb) do { ev_init ((ev), (cb)); ev_prepare_set ((ev)); } while (0) +#define ev_check_init(ev,cb) do { ev_init ((ev), (cb)); ev_check_set ((ev)); } while (0) +#define ev_embed_init(ev,cb,other) do { ev_init ((ev), (cb)); ev_embed_set ((ev),(other)); } while (0) +#define ev_fork_init(ev,cb) do { ev_init ((ev), (cb)); ev_fork_set ((ev)); } while (0) +#define ev_async_init(ev,cb) do { ev_init ((ev), (cb)); ev_async_set ((ev)); } while (0) + +#define ev_is_pending(ev) (0 + ((ev_watcher *)(void *)(ev))->pending) /* ro, true when watcher is waiting for callback invocation */ +#define ev_is_active(ev) (0 + ((ev_watcher *)(void *)(ev))->active) /* ro, true when the watcher has been started */ + +#define ev_cb(ev) (ev)->cb /* rw */ + +#if EV_MINPRI == EV_MAXPRI +# define ev_priority(ev) ((ev), EV_MINPRI) +# define ev_set_priority(ev,pri) ((ev), (pri)) +#else +# define ev_priority(ev) (+(((ev_watcher *)(void *)(ev))->priority)) +# define ev_set_priority(ev,pri) ( (ev_watcher *)(void *)(ev))->priority = (pri) +#endif + +#define ev_periodic_at(ev) (+((ev_watcher_time *)(ev))->at) + +#ifndef ev_set_cb +# define ev_set_cb(ev,cb_) ev_cb (ev) = (cb_) +#endif + +/* stopping (enabling, adding) a watcher does nothing if it is already running */ +/* stopping (disabling, deleting) a watcher does nothing unless its already running */ +#if EV_PROTOTYPES + +/* feeds an event into a watcher as if the event actually occured */ +/* accepts any ev_watcher type */ +void ev_feed_event (EV_P_ void *w, int revents); +void ev_feed_fd_event (EV_P_ int fd, int revents); +void ev_feed_signal_event (EV_P_ int signum); +void ev_invoke (EV_P_ void *w, int revents); +int ev_clear_pending (EV_P_ void *w); + +void ev_io_start (EV_P_ ev_io *w); +void ev_io_stop (EV_P_ ev_io *w); + +void ev_timer_start (EV_P_ ev_timer *w); +void ev_timer_stop (EV_P_ ev_timer *w); +/* stops if active and no repeat, restarts if active and repeating, starts if inactive and repeating */ +void ev_timer_again (EV_P_ ev_timer *w); +/* return remaining time */ +ev_tstamp ev_timer_remaining (EV_P_ ev_timer *w); + +#if EV_PERIODIC_ENABLE +void ev_periodic_start (EV_P_ ev_periodic *w); +void ev_periodic_stop (EV_P_ ev_periodic *w); +void ev_periodic_again (EV_P_ ev_periodic *w); +#endif + +/* only supported in the default loop */ +void ev_signal_start (EV_P_ ev_signal *w); +void ev_signal_stop (EV_P_ ev_signal *w); + +/* only supported in the default loop */ +void ev_child_start (EV_P_ ev_child *w); +void ev_child_stop (EV_P_ ev_child *w); + +# if EV_STAT_ENABLE +void ev_stat_start (EV_P_ ev_stat *w); +void ev_stat_stop (EV_P_ ev_stat *w); +void ev_stat_stat (EV_P_ ev_stat *w); +# endif + +# if EV_IDLE_ENABLE +void ev_idle_start (EV_P_ ev_idle *w); +void ev_idle_stop (EV_P_ ev_idle *w); +# endif + +void ev_prepare_start (EV_P_ ev_prepare *w); +void ev_prepare_stop (EV_P_ ev_prepare *w); + +void ev_check_start (EV_P_ ev_check *w); +void ev_check_stop (EV_P_ ev_check *w); + +# if EV_FORK_ENABLE +void ev_fork_start (EV_P_ ev_fork *w); +void ev_fork_stop (EV_P_ ev_fork *w); +# endif + +# if EV_EMBED_ENABLE +/* only supported when loop to be embedded is in fact embeddable */ +void ev_embed_start (EV_P_ ev_embed *w); +void ev_embed_stop (EV_P_ ev_embed *w); +void ev_embed_sweep (EV_P_ ev_embed *w); +# endif + +# if EV_ASYNC_ENABLE +void ev_async_start (EV_P_ ev_async *w); +void ev_async_stop (EV_P_ ev_async *w); +void ev_async_send (EV_P_ ev_async *w); +# endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/libev/ev_epoll.c b/libev/ev_epoll.c new file mode 100644 index 0000000..f7e3d60 --- /dev/null +++ b/libev/ev_epoll.c @@ -0,0 +1,228 @@ +/* + * libev epoll fd activity backend + * + * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libev@schmorp.de> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +/* + * general notes about epoll: + * + * a) epoll silently removes fds from the fd set. as nothing tells us + * that an fd has been removed otherwise, we have to continually + * "rearm" fds that we suspect *might* have changed (same + * problem with kqueue, but much less costly there). + * b) the fact that ADD != MOD creates a lot of extra syscalls due to a) + * and seems not to have any advantage. + * c) the inability to handle fork or file descriptors (think dup) + * limits the applicability over poll, so this is not a generic + * poll replacement. + * + * lots of "weird code" and complication handling in this file is due + * to these design problems with epoll, as we try very hard to avoid + * epoll_ctl syscalls for common usage patterns and handle the breakage + * ensuing from receiving events for closed and otherwise long gone + * file descriptors. + */ + +#include <sys/epoll.h> + +static void +epoll_modify (EV_P_ int fd, int oev, int nev) +{ + struct epoll_event ev; + unsigned char oldmask; + + /* + * we handle EPOLL_CTL_DEL by ignoring it here + * on the assumption that the fd is gone anyways + * if that is wrong, we have to handle the spurious + * event in epoll_poll. + * if the fd is added again, we try to ADD it, and, if that + * fails, we assume it still has the same eventmask. + */ + if (!nev) + return; + + oldmask = anfds [fd].emask; + anfds [fd].emask = nev; + + /* store the generation counter in the upper 32 bits, the fd in the lower 32 bits */ + ev.data.u64 = (uint64_t)(uint32_t)fd + | ((uint64_t)(uint32_t)++anfds [fd].egen << 32); + ev.events = (nev & EV_READ ? EPOLLIN : 0) + | (nev & EV_WRITE ? EPOLLOUT : 0); + + if (expect_true (!epoll_ctl (backend_fd, oev ? EPOLL_CTL_MOD : EPOLL_CTL_ADD, fd, &ev))) + return; + + if (expect_true (errno == ENOENT)) + { + /* if ENOENT then the fd went away, so try to do the right thing */ + if (!nev) + goto dec_egen; + + if (!epoll_ctl (backend_fd, EPOLL_CTL_ADD, fd, &ev)) + return; + } + else if (expect_true (errno == EEXIST)) + { + /* EEXIST means we ignored a previous DEL, but the fd is still active */ + /* if the kernel mask is the same as the new mask, we assume it hasn't changed */ + if (oldmask == nev) + goto dec_egen; + + if (!epoll_ctl (backend_fd, EPOLL_CTL_MOD, fd, &ev)) + return; + } + + fd_kill (EV_A_ fd); + +dec_egen: + /* we didn't successfully call epoll_ctl, so decrement the generation counter again */ + --anfds [fd].egen; +} + +static void +epoll_poll (EV_P_ ev_tstamp timeout) +{ + int i; + int eventcnt; + + /* epoll wait times cannot be larger than (LONG_MAX - 999UL) / HZ msecs, which is below */ + /* the default libev max wait time, however. */ + EV_RELEASE_CB; + eventcnt = epoll_wait (backend_fd, epoll_events, epoll_eventmax, (int)ceil (timeout * 1000.)); + EV_ACQUIRE_CB; + + if (expect_false (eventcnt < 0)) + { + if (errno != EINTR) + ev_syserr ("(libev) epoll_wait"); + + return; + } + + for (i = 0; i < eventcnt; ++i) + { + struct epoll_event *ev = epoll_events + i; + + int fd = (uint32_t)ev->data.u64; /* mask out the lower 32 bits */ + int want = anfds [fd].events; + int got = (ev->events & (EPOLLOUT | EPOLLERR | EPOLLHUP) ? EV_WRITE : 0) + | (ev->events & (EPOLLIN | EPOLLERR | EPOLLHUP) ? EV_READ : 0); + + /* check for spurious notification */ + if (expect_false ((uint32_t)anfds [fd].egen != (uint32_t)(ev->data.u64 >> 32))) + { + /* recreate kernel state */ + postfork = 1; + continue; + } + + if (expect_false (got & ~want)) + { + anfds [fd].emask = want; + + /* we received an event but are not interested in it, try mod or del */ + /* I don't think we ever need MOD, but let's handle it anyways */ + ev->events = (want & EV_READ ? EPOLLIN : 0) + | (want & EV_WRITE ? EPOLLOUT : 0); + + /* pre-2.6.9 kernels require a non-null pointer with EPOLL_CTL_DEL, */ + /* which is fortunately easy to do for us. */ + if (epoll_ctl (backend_fd, want ? EPOLL_CTL_MOD : EPOLL_CTL_DEL, fd, ev)) + { + postfork = 1; /* an error occured, recreate kernel state */ + continue; + } + } + + fd_event (EV_A_ fd, got); + } + + /* if the receive array was full, increase its size */ + if (expect_false (eventcnt == epoll_eventmax)) + { + ev_free (epoll_events); + epoll_eventmax = array_nextsize (sizeof (struct epoll_event), epoll_eventmax, epoll_eventmax + 1); + epoll_events = (struct epoll_event *)ev_malloc (sizeof (struct epoll_event) * epoll_eventmax); + } +} + +int inline_size +epoll_init (EV_P_ int flags) +{ +#ifdef EPOLL_CLOEXEC + backend_fd = epoll_create1 (EPOLL_CLOEXEC); + + if (backend_fd <= 0) +#endif + backend_fd = epoll_create (256); + + if (backend_fd < 0) + return 0; + + fcntl (backend_fd, F_SETFD, FD_CLOEXEC); + + backend_fudge = 0.; /* kernel sources seem to indicate this to be zero */ + backend_modify = epoll_modify; + backend_poll = epoll_poll; + + epoll_eventmax = 64; /* initial number of events receivable per poll */ + epoll_events = (struct epoll_event *)ev_malloc (sizeof (struct epoll_event) * epoll_eventmax); + + return EVBACKEND_EPOLL; +} + +void inline_size +epoll_destroy (EV_P) +{ + ev_free (epoll_events); +} + +void inline_size +epoll_fork (EV_P) +{ + close (backend_fd); + + while ((backend_fd = epoll_create (256)) < 0) + ev_syserr ("(libev) epoll_create"); + + fcntl (backend_fd, F_SETFD, FD_CLOEXEC); + + fd_rearm_all (EV_A); +} + diff --git a/libev/ev_kqueue.c b/libev/ev_kqueue.c new file mode 100644 index 0000000..0fe340b --- /dev/null +++ b/libev/ev_kqueue.c @@ -0,0 +1,196 @@ +/* + * libev kqueue backend + * + * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libev@schmorp.de> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/queue.h> +#include <sys/event.h> +#include <string.h> +#include <errno.h> + +void inline_speed +kqueue_change (EV_P_ int fd, int filter, int flags, int fflags) +{ + ++kqueue_changecnt; + array_needsize (struct kevent, kqueue_changes, kqueue_changemax, kqueue_changecnt, EMPTY2); + + EV_SET (&kqueue_changes [kqueue_changecnt - 1], fd, filter, flags, fflags, 0, 0); +} + +#ifndef NOTE_EOF +# define NOTE_EOF 0 +#endif + +static void +kqueue_modify (EV_P_ int fd, int oev, int nev) +{ + if (oev != nev) + { + if (oev & EV_READ) + kqueue_change (EV_A_ fd, EVFILT_READ , EV_DELETE, 0); + + if (oev & EV_WRITE) + kqueue_change (EV_A_ fd, EVFILT_WRITE, EV_DELETE, 0); + } + + /* to detect close/reopen reliably, we have to re-add */ + /* event requests even when oev == nev */ + + if (nev & EV_READ) + kqueue_change (EV_A_ fd, EVFILT_READ , EV_ADD, NOTE_EOF); + + if (nev & EV_WRITE) + kqueue_change (EV_A_ fd, EVFILT_WRITE, EV_ADD, NOTE_EOF); +} + +static void +kqueue_poll (EV_P_ ev_tstamp timeout) +{ + int res, i; + struct timespec ts; + + /* need to resize so there is enough space for errors */ + if (kqueue_changecnt > kqueue_eventmax) + { + ev_free (kqueue_events); + kqueue_eventmax = array_nextsize (sizeof (struct kevent), kqueue_eventmax, kqueue_changecnt); + kqueue_events = (struct kevent *)ev_malloc (sizeof (struct kevent) * kqueue_eventmax); + } + + EV_RELEASE_CB; + ts.tv_sec = (time_t)timeout; + ts.tv_nsec = (long)((timeout - (ev_tstamp)ts.tv_sec) * 1e9); + res = kevent (backend_fd, kqueue_changes, kqueue_changecnt, kqueue_events, kqueue_eventmax, &ts); + EV_ACQUIRE_CB; + kqueue_changecnt = 0; + + if (expect_false (res < 0)) + { + if (errno != EINTR) + ev_syserr ("(libev) kevent"); + + return; + } + + for (i = 0; i < res; ++i) + { + int fd = kqueue_events [i].ident; + + if (expect_false (kqueue_events [i].flags & EV_ERROR)) + { + int err = kqueue_events [i].data; + + /* we are only interested in errors for fds that we are interested in :) */ + if (anfds [fd].events) + { + if (err == ENOENT) /* resubmit changes on ENOENT */ + kqueue_modify (EV_A_ fd, 0, anfds [fd].events); + else if (err == EBADF) /* on EBADF, we re-check the fd */ + { + if (fd_valid (fd)) + kqueue_modify (EV_A_ fd, 0, anfds [fd].events); + else + fd_kill (EV_A_ fd); + } + else /* on all other errors, we error out on the fd */ + fd_kill (EV_A_ fd); + } + } + else + fd_event ( + EV_A_ + fd, + kqueue_events [i].filter == EVFILT_READ ? EV_READ + : kqueue_events [i].filter == EVFILT_WRITE ? EV_WRITE + : 0 + ); + } + + if (expect_false (res == kqueue_eventmax)) + { + ev_free (kqueue_events); + kqueue_eventmax = array_nextsize (sizeof (struct kevent), kqueue_eventmax, kqueue_eventmax + 1); + kqueue_events = (struct kevent *)ev_malloc (sizeof (struct kevent) * kqueue_eventmax); + } +} + +int inline_size +kqueue_init (EV_P_ int flags) +{ + /* Initalize the kernel queue */ + if ((backend_fd = kqueue ()) < 0) + return 0; + + fcntl (backend_fd, F_SETFD, FD_CLOEXEC); /* not sure if necessary, hopefully doesn't hurt */ + + backend_fudge = 0.; + backend_modify = kqueue_modify; + backend_poll = kqueue_poll; + + kqueue_eventmax = 64; /* initial number of events receivable per poll */ + kqueue_events = (struct kevent *)ev_malloc (sizeof (struct kevent) * kqueue_eventmax); + + kqueue_changes = 0; + kqueue_changemax = 0; + kqueue_changecnt = 0; + + return EVBACKEND_KQUEUE; +} + +void inline_size +kqueue_destroy (EV_P) +{ + ev_free (kqueue_events); + ev_free (kqueue_changes); +} + +void inline_size +kqueue_fork (EV_P) +{ + close (backend_fd); + + while ((backend_fd = kqueue ()) < 0) + ev_syserr ("(libev) kqueue"); + + fcntl (backend_fd, F_SETFD, FD_CLOEXEC); + + /* re-register interest in fds */ + fd_rearm_all (EV_A); +} + diff --git a/libev/ev_poll.c b/libev/ev_poll.c new file mode 100644 index 0000000..178e458 --- /dev/null +++ b/libev/ev_poll.c @@ -0,0 +1,144 @@ +/* + * libev poll fd activity backend + * + * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libev@schmorp.de> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +#include <poll.h> + +void inline_size +pollidx_init (int *base, int count) +{ + /* consider using memset (.., -1, ...), which is pratically guarenteed + * to work on all systems implementing poll */ + while (count--) + *base++ = -1; +} + +static void +poll_modify (EV_P_ int fd, int oev, int nev) +{ + int idx; + + if (oev == nev) + return; + + array_needsize (int, pollidxs, pollidxmax, fd + 1, pollidx_init); + + idx = pollidxs [fd]; + + if (idx < 0) /* need to allocate a new pollfd */ + { + pollidxs [fd] = idx = pollcnt++; + array_needsize (struct pollfd, polls, pollmax, pollcnt, EMPTY2); + polls [idx].fd = fd; + } + + assert (polls [idx].fd == fd); + + if (nev) + polls [idx].events = + (nev & EV_READ ? POLLIN : 0) + | (nev & EV_WRITE ? POLLOUT : 0); + else /* remove pollfd */ + { + pollidxs [fd] = -1; + + if (expect_true (idx < --pollcnt)) + { + polls [idx] = polls [pollcnt]; + pollidxs [polls [idx].fd] = idx; + } + } +} + +static void +poll_poll (EV_P_ ev_tstamp timeout) +{ + struct pollfd *p; + int res; + + EV_RELEASE_CB; + res = poll (polls, pollcnt, (int)ceil (timeout * 1000.)); + EV_ACQUIRE_CB; + + if (expect_false (res < 0)) + { + if (errno == EBADF) + fd_ebadf (EV_A); + else if (errno == ENOMEM && !syserr_cb) + fd_enomem (EV_A); + else if (errno != EINTR) + ev_syserr ("(libev) poll"); + } + else + for (p = polls; res; ++p) + if (expect_false (p->revents)) /* this expect is debatable */ + { + --res; + + if (expect_false (p->revents & POLLNVAL)) + fd_kill (EV_A_ p->fd); + else + fd_event ( + EV_A_ + p->fd, + (p->revents & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0) + | (p->revents & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0) + ); + } +} + +int inline_size +poll_init (EV_P_ int flags) +{ + backend_fudge = 0.; /* posix says this is zero */ + backend_modify = poll_modify; + backend_poll = poll_poll; + + pollidxs = 0; pollidxmax = 0; + polls = 0; pollmax = 0; pollcnt = 0; + + return EVBACKEND_POLL; +} + +void inline_size +poll_destroy (EV_P) +{ + ev_free (pollidxs); + ev_free (polls); +} + diff --git a/libev/ev_port.c b/libev/ev_port.c new file mode 100644 index 0000000..47da929 --- /dev/null +++ b/libev/ev_port.c @@ -0,0 +1,165 @@ +/* + * libev solaris event port backend + * + * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libev@schmorp.de> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +#include <sys/types.h> +#include <sys/time.h> +#include <poll.h> +#include <port.h> +#include <string.h> +#include <errno.h> + +void inline_speed +port_associate_and_check (EV_P_ int fd, int ev) +{ + if (0 > + port_associate ( + backend_fd, PORT_SOURCE_FD, fd, + (ev & EV_READ ? POLLIN : 0) + | (ev & EV_WRITE ? POLLOUT : 0), + 0 + ) + ) + { + if (errno == EBADFD) + fd_kill (EV_A_ fd); + else + ev_syserr ("(libev) port_associate"); + } +} + +static void +port_modify (EV_P_ int fd, int oev, int nev) +{ + /* we need to reassociate no matter what, as closes are + * once more silently being discarded. + */ + if (!nev) + { + if (oev) + port_dissociate (backend_fd, PORT_SOURCE_FD, fd); + } + else + port_associate_and_check (EV_A_ fd, nev); +} + +static void +port_poll (EV_P_ ev_tstamp timeout) +{ + int res, i; + struct timespec ts; + uint_t nget = 1; + + EV_RELEASE_CB; + ts.tv_sec = (time_t)timeout; + ts.tv_nsec = (long)(timeout - (ev_tstamp)ts.tv_sec) * 1e9; + res = port_getn (backend_fd, port_events, port_eventmax, &nget, &ts); + EV_ACQUIRE_CB; + + if (res == -1) + { + if (errno != EINTR && errno != ETIME) + ev_syserr ("(libev) port_getn (see http://bugs.opensolaris.org/view_bug.do?bug_id=6268715, try LIBEV_FLAGS=3 env variable)"); + + return; + } + + for (i = 0; i < nget; ++i) + { + if (port_events [i].portev_source == PORT_SOURCE_FD) + { + int fd = port_events [i].portev_object; + + fd_event ( + EV_A_ + fd, + (port_events [i].portev_events & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0) + | (port_events [i].portev_events & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0) + ); + + port_associate_and_check (EV_A_ fd, anfds [fd].events); + } + } + + if (expect_false (nget == port_eventmax)) + { + ev_free (port_events); + port_eventmax = array_nextsize (sizeof (port_event_t), port_eventmax, port_eventmax + 1); + port_events = (port_event_t *)ev_malloc (sizeof (port_event_t) * port_eventmax); + } +} + +int inline_size +port_init (EV_P_ int flags) +{ + /* Initalize the kernel queue */ + if ((backend_fd = port_create ()) < 0) + return 0; + + fcntl (backend_fd, F_SETFD, FD_CLOEXEC); /* not sure if necessary, hopefully doesn't hurt */ + + backend_fudge = 1e-3; /* needed to compensate for port_getn returning early */ + backend_modify = port_modify; + backend_poll = port_poll; + + port_eventmax = 64; /* intiial number of events receivable per poll */ + port_events = (port_event_t *)ev_malloc (sizeof (port_event_t) * port_eventmax); + + return EVBACKEND_PORT; +} + +void inline_size +port_destroy (EV_P) +{ + ev_free (port_events); +} + +void inline_size +port_fork (EV_P) +{ + close (backend_fd); + + while ((backend_fd = port_create ()) < 0) + ev_syserr ("(libev) port"); + + fcntl (backend_fd, F_SETFD, FD_CLOEXEC); + + /* re-register interest in fds */ + fd_rearm_all (EV_A); +} + diff --git a/libev/ev_select.c b/libev/ev_select.c new file mode 100644 index 0000000..818a63e --- /dev/null +++ b/libev/ev_select.c @@ -0,0 +1,308 @@ +/* + * libev select fd activity backend + * + * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libev@schmorp.de> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +#ifndef _WIN32 +/* for unix systems */ +# include <sys/select.h> +# include <inttypes.h> +#endif + +#ifndef EV_SELECT_USE_FD_SET +# ifdef NFDBITS +# define EV_SELECT_USE_FD_SET 0 +# else +# define EV_SELECT_USE_FD_SET 1 +# endif +#endif + +#if EV_SELECT_IS_WINSOCKET +# undef EV_SELECT_USE_FD_SET +# define EV_SELECT_USE_FD_SET 1 +# undef NFDBITS +# define NFDBITS 0 +#endif + +#if !EV_SELECT_USE_FD_SET +# define NFDBYTES (NFDBITS / 8) +#endif + +#include <string.h> + +static void +select_modify (EV_P_ int fd, int oev, int nev) +{ + if (oev == nev) + return; + + { +#if EV_SELECT_USE_FD_SET + + #if EV_SELECT_IS_WINSOCKET + SOCKET handle = anfds [fd].handle; + #else + int handle = fd; + #endif + + assert (("libev: fd >= FD_SETSIZE passed to fd_set-based select backend", fd < FD_SETSIZE)); + + /* FD_SET is broken on windows (it adds the fd to a set twice or more, + * which eventually leads to overflows). Need to call it only on changes. + */ + #if EV_SELECT_IS_WINSOCKET + if ((oev ^ nev) & EV_READ) + #endif + if (nev & EV_READ) + FD_SET (handle, (fd_set *)vec_ri); + else + FD_CLR (handle, (fd_set *)vec_ri); + + #if EV_SELECT_IS_WINSOCKET + if ((oev ^ nev) & EV_WRITE) + #endif + if (nev & EV_WRITE) + FD_SET (handle, (fd_set *)vec_wi); + else + FD_CLR (handle, (fd_set *)vec_wi); + +#else + + int word = fd / NFDBITS; + fd_mask mask = 1UL << (fd % NFDBITS); + + if (expect_false (vec_max <= word)) + { + int new_max = word + 1; + + vec_ri = ev_realloc (vec_ri, new_max * NFDBYTES); + vec_ro = ev_realloc (vec_ro, new_max * NFDBYTES); /* could free/malloc */ + vec_wi = ev_realloc (vec_wi, new_max * NFDBYTES); + vec_wo = ev_realloc (vec_wo, new_max * NFDBYTES); /* could free/malloc */ + #ifdef _WIN32 + vec_eo = ev_realloc (vec_eo, new_max * NFDBYTES); /* could free/malloc */ + #endif + + for (; vec_max < new_max; ++vec_max) + ((fd_mask *)vec_ri) [vec_max] = + ((fd_mask *)vec_wi) [vec_max] = 0; + } + + ((fd_mask *)vec_ri) [word] |= mask; + if (!(nev & EV_READ)) + ((fd_mask *)vec_ri) [word] &= ~mask; + + ((fd_mask *)vec_wi) [word] |= mask; + if (!(nev & EV_WRITE)) + ((fd_mask *)vec_wi) [word] &= ~mask; +#endif + } +} + +static void +select_poll (EV_P_ ev_tstamp timeout) +{ + struct timeval tv; + int res; + int fd_setsize; + + EV_RELEASE_CB; + tv.tv_sec = (long)timeout; + tv.tv_usec = (long)((timeout - (ev_tstamp)tv.tv_sec) * 1e6); + +#if EV_SELECT_USE_FD_SET + fd_setsize = sizeof (fd_set); +#else + fd_setsize = vec_max * NFDBYTES; +#endif + + memcpy (vec_ro, vec_ri, fd_setsize); + memcpy (vec_wo, vec_wi, fd_setsize); + +#ifdef _WIN32 + /* pass in the write set as except set. + * the idea behind this is to work around a windows bug that causes + * errors to be reported as an exception and not by setting + * the writable bit. this is so uncontrollably lame. + */ + memcpy (vec_eo, vec_wi, fd_setsize); + res = select (vec_max * NFDBITS, (fd_set *)vec_ro, (fd_set *)vec_wo, (fd_set *)vec_eo, &tv); +#elif EV_SELECT_USE_FD_SET + fd_setsize = anfdmax < FD_SETSIZE ? anfdmax : FD_SETSIZE; + res = select (fd_setsize, (fd_set *)vec_ro, (fd_set *)vec_wo, 0, &tv); +#else + res = select (vec_max * NFDBITS, (fd_set *)vec_ro, (fd_set *)vec_wo, 0, &tv); +#endif + EV_ACQUIRE_CB; + + if (expect_false (res < 0)) + { + #if EV_SELECT_IS_WINSOCKET + errno = WSAGetLastError (); + #endif + #ifdef WSABASEERR + /* on windows, select returns incompatible error codes, fix this */ + if (errno >= WSABASEERR && errno < WSABASEERR + 1000) + if (errno == WSAENOTSOCK) + errno = EBADF; + else + errno -= WSABASEERR; + #endif + + #ifdef _WIN32 + /* select on windows errornously returns EINVAL when no fd sets have been + * provided (this is documented). what microsoft doesn't tell you that this bug + * exists even when the fd sets _are_ provided, so we have to check for this bug + * here and emulate by sleeping manually. + * we also get EINVAL when the timeout is invalid, but we ignore this case here + * and assume that EINVAL always means: you have to wait manually. + */ + if (errno == EINVAL) + { + ev_sleep (timeout); + return; + } + #endif + + if (errno == EBADF) + fd_ebadf (EV_A); + else if (errno == ENOMEM && !syserr_cb) + fd_enomem (EV_A); + else if (errno != EINTR) + ev_syserr ("(libev) select"); + + return; + } + +#if EV_SELECT_USE_FD_SET + + { + int fd; + + for (fd = 0; fd < anfdmax; ++fd) + if (anfds [fd].events) + { + int events = 0; + #if EV_SELECT_IS_WINSOCKET + SOCKET handle = anfds [fd].handle; + #else + int handle = fd; + #endif + + if (FD_ISSET (handle, (fd_set *)vec_ro)) events |= EV_READ; + if (FD_ISSET (handle, (fd_set *)vec_wo)) events |= EV_WRITE; + #ifdef _WIN32 + if (FD_ISSET (handle, (fd_set *)vec_eo)) events |= EV_WRITE; + #endif + + if (expect_true (events)) + fd_event (EV_A_ fd, events); + } + } + +#else + + { + int word, bit; + for (word = vec_max; word--; ) + { + fd_mask word_r = ((fd_mask *)vec_ro) [word]; + fd_mask word_w = ((fd_mask *)vec_wo) [word]; + #ifdef _WIN32 + word_w |= ((fd_mask *)vec_eo) [word]; + #endif + + if (word_r || word_w) + for (bit = NFDBITS; bit--; ) + { + fd_mask mask = 1UL << bit; + int events = 0; + + events |= word_r & mask ? EV_READ : 0; + events |= word_w & mask ? EV_WRITE : 0; + + if (expect_true (events)) + fd_event (EV_A_ word * NFDBITS + bit, events); + } + } + } + +#endif +} + +int inline_size +select_init (EV_P_ int flags) +{ + backend_fudge = 0.; /* posix says this is zero */ + backend_modify = select_modify; + backend_poll = select_poll; + +#if EV_SELECT_USE_FD_SET + vec_ri = ev_malloc (sizeof (fd_set)); FD_ZERO ((fd_set *)vec_ri); + vec_ro = ev_malloc (sizeof (fd_set)); + vec_wi = ev_malloc (sizeof (fd_set)); FD_ZERO ((fd_set *)vec_wi); + vec_wo = ev_malloc (sizeof (fd_set)); + #ifdef _WIN32 + vec_eo = ev_malloc (sizeof (fd_set)); + #endif +#else + vec_max = 0; + vec_ri = 0; + vec_ro = 0; + vec_wi = 0; + vec_wo = 0; + #ifdef _WIN32 + vec_eo = 0; + #endif +#endif + + return EVBACKEND_SELECT; +} + +void inline_size +select_destroy (EV_P) +{ + ev_free (vec_ri); + ev_free (vec_ro); + ev_free (vec_wi); + ev_free (vec_wo); + #ifdef _WIN32 + ev_free (vec_eo); + #endif +} + + diff --git a/libev/ev_vars.h b/libev/ev_vars.h new file mode 100644 index 0000000..da53ee8 --- /dev/null +++ b/libev/ev_vars.h @@ -0,0 +1,187 @@ +/* + * loop member variable declarations + * + * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libev@schmorp.de> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +#define VARx(type,name) VAR(name, type name) + +VARx(ev_tstamp, now_floor) /* last time we refreshed rt_time */ +VARx(ev_tstamp, mn_now) /* monotonic clock "now" */ +VARx(ev_tstamp, rtmn_diff) /* difference realtime - monotonic time */ + +VARx(ev_tstamp, io_blocktime) +VARx(ev_tstamp, timeout_blocktime) + +VARx(int, backend) +VARx(int, activecnt) /* total number of active events ("refcount") */ +VARx(unsigned char, loop_done) /* signal by ev_unloop */ + +VARx(int, backend_fd) +VARx(ev_tstamp, backend_fudge) /* assumed typical timer resolution */ +VAR (backend_modify, void (*backend_modify)(EV_P_ int fd, int oev, int nev)) +VAR (backend_poll , void (*backend_poll)(EV_P_ ev_tstamp timeout)) + +VARx(ANFD *, anfds) +VARx(int, anfdmax) + +VAR (pendings, ANPENDING *pendings [NUMPRI]) +VAR (pendingmax, int pendingmax [NUMPRI]) +VAR (pendingcnt, int pendingcnt [NUMPRI]) +VARx(ev_prepare, pending_w) /* dummy pending watcher */ + +/* for reverse feeding of events */ +VARx(W *, rfeeds) +VARx(int, rfeedmax) +VARx(int, rfeedcnt) + +#if EV_USE_EVENTFD || EV_GENWRAP +VARx(int, evfd) +#endif +VAR (evpipe, int evpipe [2]) +VARx(ev_io, pipe_w) + +#if !defined(_WIN32) || EV_GENWRAP +VARx(pid_t, curpid) +#endif + +VARx(char, postfork) /* true if we need to recreate kernel state after fork */ + +#if EV_USE_SELECT || EV_GENWRAP +VARx(void *, vec_ri) +VARx(void *, vec_ro) +VARx(void *, vec_wi) +VARx(void *, vec_wo) +#if defined(_WIN32) || EV_GENWRAP +VARx(void *, vec_eo) +#endif +VARx(int, vec_max) +#endif + +#if EV_USE_POLL || EV_GENWRAP +VARx(struct pollfd *, polls) +VARx(int, pollmax) +VARx(int, pollcnt) +VARx(int *, pollidxs) /* maps fds into structure indices */ +VARx(int, pollidxmax) +#endif + +#if EV_USE_EPOLL || EV_GENWRAP +VARx(struct epoll_event *, epoll_events) +VARx(int, epoll_eventmax) +#endif + +#if EV_USE_KQUEUE || EV_GENWRAP +VARx(struct kevent *, kqueue_changes) +VARx(int, kqueue_changemax) +VARx(int, kqueue_changecnt) +VARx(struct kevent *, kqueue_events) +VARx(int, kqueue_eventmax) +#endif + +#if EV_USE_PORT || EV_GENWRAP +VARx(struct port_event *, port_events) +VARx(int, port_eventmax) +#endif + +VARx(int *, fdchanges) +VARx(int, fdchangemax) +VARx(int, fdchangecnt) + +VARx(ANHE *, timers) +VARx(int, timermax) +VARx(int, timercnt) + +#if EV_PERIODIC_ENABLE || EV_GENWRAP +VARx(ANHE *, periodics) +VARx(int, periodicmax) +VARx(int, periodiccnt) +#endif + +#if EV_IDLE_ENABLE || EV_GENWRAP +VAR (idles, ev_idle **idles [NUMPRI]) +VAR (idlemax, int idlemax [NUMPRI]) +VAR (idlecnt, int idlecnt [NUMPRI]) +#endif +VARx(int, idleall) /* total number */ + +VARx(struct ev_prepare **, prepares) +VARx(int, preparemax) +VARx(int, preparecnt) + +VARx(struct ev_check **, checks) +VARx(int, checkmax) +VARx(int, checkcnt) + +#if EV_FORK_ENABLE || EV_GENWRAP +VARx(struct ev_fork **, forks) +VARx(int, forkmax) +VARx(int, forkcnt) +#endif + +#if EV_ASYNC_ENABLE || EV_GENWRAP +VARx(EV_ATOMIC_T, async_pending) +VARx(struct ev_async **, asyncs) +VARx(int, asyncmax) +VARx(int, asynccnt) +#endif + +#if EV_USE_INOTIFY || EV_GENWRAP +VARx(int, fs_fd) +VARx(ev_io, fs_w) +VARx(char, fs_2625) /* whether we are running in linux 2.6.25 or newer */ +VAR (fs_hash, ANFS fs_hash [EV_INOTIFY_HASHSIZE]) +#endif + +VARx(EV_ATOMIC_T, sig_pending) +#if EV_USE_SIGNALFD || EV_GENWRAP +VARx(int, sigfd) +VARx(ev_io, sigfd_w) +VARx(sigset_t, sigfd_set) +#endif + +#if EV_MINIMAL < 2 || EV_GENWRAP +VARx(unsigned int, loop_count) /* total number of loop iterations/blocks */ +VARx(unsigned int, loop_depth) /* #ev_loop enters - #ev_loop leaves */ + +VARx(void *, userdata) +VAR (release_cb, void (*release_cb)(EV_P)) +VAR (acquire_cb, void (*acquire_cb)(EV_P)) +VAR (invoke_cb , void (*invoke_cb) (EV_P)) +#endif + +#undef VARx + diff --git a/libev/ev_wrap.h b/libev/ev_wrap.h new file mode 100644 index 0000000..03b6b87 --- /dev/null +++ b/libev/ev_wrap.h @@ -0,0 +1,178 @@ +/* DO NOT EDIT, automatically generated by update_ev_wrap */ +#ifndef EV_WRAP_H +#define EV_WRAP_H +#define now_floor ((loop)->now_floor) +#define mn_now ((loop)->mn_now) +#define rtmn_diff ((loop)->rtmn_diff) +#define io_blocktime ((loop)->io_blocktime) +#define timeout_blocktime ((loop)->timeout_blocktime) +#define backend ((loop)->backend) +#define activecnt ((loop)->activecnt) +#define loop_done ((loop)->loop_done) +#define backend_fd ((loop)->backend_fd) +#define backend_fudge ((loop)->backend_fudge) +#define backend_modify ((loop)->backend_modify) +#define backend_poll ((loop)->backend_poll) +#define anfds ((loop)->anfds) +#define anfdmax ((loop)->anfdmax) +#define pendings ((loop)->pendings) +#define pendingmax ((loop)->pendingmax) +#define pendingcnt ((loop)->pendingcnt) +#define pending_w ((loop)->pending_w) +#define rfeeds ((loop)->rfeeds) +#define rfeedmax ((loop)->rfeedmax) +#define rfeedcnt ((loop)->rfeedcnt) +#define evfd ((loop)->evfd) +#define evpipe ((loop)->evpipe) +#define pipe_w ((loop)->pipe_w) +#define curpid ((loop)->curpid) +#define postfork ((loop)->postfork) +#define vec_ri ((loop)->vec_ri) +#define vec_ro ((loop)->vec_ro) +#define vec_wi ((loop)->vec_wi) +#define vec_wo ((loop)->vec_wo) +#define vec_eo ((loop)->vec_eo) +#define vec_max ((loop)->vec_max) +#define polls ((loop)->polls) +#define pollmax ((loop)->pollmax) +#define pollcnt ((loop)->pollcnt) +#define pollidxs ((loop)->pollidxs) +#define pollidxmax ((loop)->pollidxmax) +#define epoll_events ((loop)->epoll_events) +#define epoll_eventmax ((loop)->epoll_eventmax) +#define kqueue_changes ((loop)->kqueue_changes) +#define kqueue_changemax ((loop)->kqueue_changemax) +#define kqueue_changecnt ((loop)->kqueue_changecnt) +#define kqueue_events ((loop)->kqueue_events) +#define kqueue_eventmax ((loop)->kqueue_eventmax) +#define port_events ((loop)->port_events) +#define port_eventmax ((loop)->port_eventmax) +#define fdchanges ((loop)->fdchanges) +#define fdchangemax ((loop)->fdchangemax) +#define fdchangecnt ((loop)->fdchangecnt) +#define timers ((loop)->timers) +#define timermax ((loop)->timermax) +#define timercnt ((loop)->timercnt) +#define periodics ((loop)->periodics) +#define periodicmax ((loop)->periodicmax) +#define periodiccnt ((loop)->periodiccnt) +#define idles ((loop)->idles) +#define idlemax ((loop)->idlemax) +#define idlecnt ((loop)->idlecnt) +#define idleall ((loop)->idleall) +#define prepares ((loop)->prepares) +#define preparemax ((loop)->preparemax) +#define preparecnt ((loop)->preparecnt) +#define checks ((loop)->checks) +#define checkmax ((loop)->checkmax) +#define checkcnt ((loop)->checkcnt) +#define forks ((loop)->forks) +#define forkmax ((loop)->forkmax) +#define forkcnt ((loop)->forkcnt) +#define async_pending ((loop)->async_pending) +#define asyncs ((loop)->asyncs) +#define asyncmax ((loop)->asyncmax) +#define asynccnt ((loop)->asynccnt) +#define fs_fd ((loop)->fs_fd) +#define fs_w ((loop)->fs_w) +#define fs_2625 ((loop)->fs_2625) +#define fs_hash ((loop)->fs_hash) +#define sig_pending ((loop)->sig_pending) +#define sigfd ((loop)->sigfd) +#define sigfd_w ((loop)->sigfd_w) +#define sigfd_set ((loop)->sigfd_set) +#define loop_count ((loop)->loop_count) +#define loop_depth ((loop)->loop_depth) +#define userdata ((loop)->userdata) +#define release_cb ((loop)->release_cb) +#define acquire_cb ((loop)->acquire_cb) +#define invoke_cb ((loop)->invoke_cb) +#else +#undef EV_WRAP_H +#undef now_floor +#undef mn_now +#undef rtmn_diff +#undef io_blocktime +#undef timeout_blocktime +#undef backend +#undef activecnt +#undef loop_done +#undef backend_fd +#undef backend_fudge +#undef backend_modify +#undef backend_poll +#undef anfds +#undef anfdmax +#undef pendings +#undef pendingmax +#undef pendingcnt +#undef pending_w +#undef rfeeds +#undef rfeedmax +#undef rfeedcnt +#undef evfd +#undef evpipe +#undef pipe_w +#undef curpid +#undef postfork +#undef vec_ri +#undef vec_ro +#undef vec_wi +#undef vec_wo +#undef vec_eo +#undef vec_max +#undef polls +#undef pollmax +#undef pollcnt +#undef pollidxs +#undef pollidxmax +#undef epoll_events +#undef epoll_eventmax +#undef kqueue_changes +#undef kqueue_changemax +#undef kqueue_changecnt +#undef kqueue_events +#undef kqueue_eventmax +#undef port_events +#undef port_eventmax +#undef fdchanges +#undef fdchangemax +#undef fdchangecnt +#undef timers +#undef timermax +#undef timercnt +#undef periodics +#undef periodicmax +#undef periodiccnt +#undef idles +#undef idlemax +#undef idlecnt +#undef idleall +#undef prepares +#undef preparemax +#undef preparecnt +#undef checks +#undef checkmax +#undef checkcnt +#undef forks +#undef forkmax +#undef forkcnt +#undef async_pending +#undef asyncs +#undef asyncmax +#undef asynccnt +#undef fs_fd +#undef fs_w +#undef fs_2625 +#undef fs_hash +#undef sig_pending +#undef sigfd +#undef sigfd_w +#undef sigfd_set +#undef loop_count +#undef loop_depth +#undef userdata +#undef release_cb +#undef acquire_cb +#undef invoke_cb +#endif diff --git a/man/Makefile b/man/Makefile new file mode 100644 index 0000000..c3027f9 --- /dev/null +++ b/man/Makefile @@ -0,0 +1,7 @@ +MAN5PAGES = opennhrp.conf.5 +MAN8PAGES = opennhrp.8 opennhrpctl.8 opennhrp-script.8 + +install: + $(INSTALLDIR) $(DESTDIR)$(MANDIR)/man5 $(DESTDIR)$(MANDIR)/man8 + $(INSTALL) $(addprefix $(src)/,$(MAN5PAGES)) $(DESTDIR)$(MANDIR)/man5 + $(INSTALL) $(addprefix $(src)/,$(MAN8PAGES)) $(DESTDIR)$(MANDIR)/man8 diff --git a/man/opennhrp-script.8 b/man/opennhrp-script.8 new file mode 100644 index 0000000..0af32b1 --- /dev/null +++ b/man/opennhrp-script.8 @@ -0,0 +1,146 @@ +.TH OPENNHRP-SCRIPT 8 "20 May 2009" "" "OpenNHRP Documentation" + +.SH NAME +opennhrp-script \- NHRP peer configuration script + +.SH DESCRIPTION +NHRP peer configuration script is used invoked by +.BR opennhrp (8). +.PP +This script can be used to establish a direct NBMA peer to peer connection +after NHRP Resolution Reply has been received, but prior to injecting the +peer address to kernel neighbor table. This could be to insert firewall rules +allowing the traffic and/or establishing an IPsec connection (or some other +secure communication channel). The script is also called when the cached peer +information expires. + +.SH OPERATION +When +.B opennhrp +needs to invoke the peer configuration script, it defines a set of variables +in the environment and then executes the script with exactly one argument. +The argument is set to the name of the reason why the script has been invoked. +The following reasons are currently defined: +.BR "interface-up" , " peer-register" , " peer-up" , " peer-down" , +.BR " nhs-up" , " nhs-down" , " route-up" " and " route-down . + +.SH INTERFACE-UP +Interface has been just discovered, or it is has changed state from down +to up. This is the place to clean up old routes if needed. + +.SH PEER-REGISTER +A peer registration request has been received. The script is run before the +internal peer cache is altered and this allows the script to reject +registration without it deleting old peers. This could be used to check that +IPsec connection is up or one might encode allowed protocol-addresses in the +certificate and it could be enforced here. This hook is executed synchronously +so it should be fast. + +.SH PEER-UP +A peer has been discovered (either by means of static configuration, dynamic +client registration or resolution reply arrival to initiate shortcut). +This hook is invoked right after the peer's NBMA address is available. For all +other than dynamic-map entries the protocol address is available too. +The information will not be injected to the kernel ARP cache until the script +has returned zero. If non-zero return value is returned, the peer entry is +marked as invalid and negative cached for a short period of time. + +.SH PEER-DOWN +A peer connection is about to be cleared. This can happend for dynamic client +registrations or cached information. Dynamic client registrations are teared +down when registration holding time expires (and no re-registration has +occured) or if it explicitely removed using Purge Request. Cached entries are +removed when holding time expires (and there has been no traffic to trigger +renewal of the peer address information) or when it is explicitely removed +with Purge Request. + +.SH NHS-UP +This is called for NHS right after the first succesful Registration Reply +is received. +This can be used to update application level configuration about which +servers to use. + +.SH NHS-DOWN +Informs that the specified NHS is no longer available. + +.SH ROUTE-UP +In reply to resolution request we have received a shortcut route with +destination off the NBMA subnetwork. The script should insert appropriate +entry to kernel routing table. + +.SH ROUTE-DOWN +The associated shortcut route information is no longer valid and should be +removed from kernel routing table. + +.SH ENVIRONMENT +.B NHRP_TYPE +.RS +For peer-up and peer-down reasons this can be: +\fBstatic\fR (configured information), +\fBdynamic-nhs\fR (configured NHS with only NBMA address known), +\fBdynamic\fR (client registered) or +\fBcached\fR (resolved since we had packets going there). + +The nhs-up and nhs-down reasons are called for \fBstatic\fR entries with +register keyword and \fBdynamic-nhs\fR entries. + +For peer-register this is always \fBdynamic\fR. + +For route-up and route-down reasons this is always defined as \fBroute\fR. + +For interface-up reason this is irrelevant, but always defined as +\fBinterface\fR. +.RE + +.B NHRP_INTERFACE +.RS +The network interface to which this event is related to. +.RE + +.B NHRP_GRE_KEY +.RS +The GRE key assigned to the related network interface. +.RE + +.B NHRP_DESTADDR +.RS +Destination protocol address. E.g. for NBMA GRE tunnels this is the IP address +assigned to the tunnel interface being used. +.RE + +.B NHRP_DESTPREFIX +.RS +Subnet prefix length for destination protocol address. +.RE + +.B NHRP_DESTNBMA +.RS +Defined only for \fBpeer-up\fR and \fBpeer-down\fR reasons. This contains the +NBMA address of the destination. E.g. for NBMA GRE this contains the public IP +of the peer. +.RE + +.B NHRP_DESTMTU +.RS +Defined only for \fBpeer-up\fR reasons. This contains the MTU for NBMA +address of the destination. +.RE + +.B NHRP_NEXTHOP +.RS +Defined only for \fBroute-up\fR and \fBroute-down\fR reasons. This is the +protocol address of the next hop to be used in routing. +.RE + +.B NHRP_PEER_DOWN_REASON +.RS +Defined only for \fBpeer-down\fR reason. This describes why the peer has +been deleted. Currently it is one of \fBexpired\fR, \fBuser-request\fR or +\fBlower-down\fR. +.RE + +.SH "SEE ALSO" +.BR opennhrp (8) + +.SH AUTHORS +Timo Teras <timo.teras@iki.fi> diff --git a/man/opennhrp.8 b/man/opennhrp.8 new file mode 100644 index 0000000..b83b94b --- /dev/null +++ b/man/opennhrp.8 @@ -0,0 +1,119 @@ +.TH OPENNHRP 8 "16 November 2007" "" "OpenNHRP Documentation" + +.SH NAME +opennhrp \- daemon to resolve next hop address in NBMA network + +.SH SYNOPSIS +.BI "opennhrp [" "option" "]..." + +.SH DESCRIPTION +.B opennhrp +implements the Next Hop Resolution Protocol (NHRP) which is used to +improve the efficiency of routing computer network traffic over +Non-Broadcast, Multiple Access (NBMA) Networks. +.PP +NHRP provides an ARP-like solution that allows a system to dynamically +learn the NBMA address of the other systems that are part of that network, +allowing these systems to directly communicate without requiring traffic +to use an intermediate hop. +.PP +.B opennhrp +implementation is based on RFC2332, but contains some modifications and +extensions to be compatible with Cisco NHRP/DMVPN implementation. +Modifications have been made for authentication extension, Cisco NAT +address extension and shortcut switching enhancements support. + +.SH OPTIONS +The following options are recognized: + +.IP "\fB\-a \fIadmin\-socket" +Specify management interface socket as +.IR admin\-socket . +The default is +.IR /var/run/opennhrp.socket . + +.IP "\fB\-c \fIconfig\-file" +Use +.I config\-file +instead of +.I /etc/opennhrp/opennhrp.conf +for configuration. + +.IP "\fB\-s \fIscript\-file" +Execute +.I script\-file +instead of +.I /etc/opennhrp/opennhrp\-script +on important events. + +.IP "\fB\-p \fIpid\-file" +Store process id in +.I pid\-file +instead of +.IR /var/run/opennhrp.pid . +This file is also used to detect if opennhrp daemon is already running. +Pid-file is not created unless +.B -d +is specified too. + +.IP "\fB-d" +Run in daemon mode, forking to background after initialization. + +.IP "\fB-v" +Verbose. Print more log messages. + +.IP "\fB-V" +Print version and exit. + +.SH SIGNALS +.IP \fBSIGHUP +Forget all cached information about other system addresses. +.IP \fBSIGUSR1 +Dump NHRP peer database to system log. + +.SH FILES +.I /etc/opennhrp/opennhrp.conf +.RS +The system wide configuration file. See +.BR opennhrp.conf (5) +for further details. +.RE + +.I /etc/opennhrp/opennhrp\-script +.RS +Script executed by +.B opennhrp +on important events. See +.BR opennhrp\-script (8) +for more information how the script is executed. +.RE + +.I /var/run/opennhrp.socket +.RS +.BR opennhrp "(8) control socket" +.RE + +.SH BUGS +Currently only IPv4 over IPv4 networks using NBMA GRE tunnels is +supported (you need Linux kernel 2.6.24-rc2 or later). +.PP +Replying with cached information to non-authorative resolution +requests is not implemented. +.PP +Please send bug reports to OpenNHRP issue tracker in SourceForge. + +.SH "SEE ALSO" +.BR opennhrp.conf (5), +.BR opennhrpctl (8), +.BR opennhrp\-script (8) +.br +http://sourceforge.net/projects/opennhrp +.PP +For more information about the protocol see: +.br +RFC2332 NBMA Next Hop Resolution Protocol (NHRP) +.br +RFC2333 NHRP Protocol Applicability Statement + +.SH AUTHORS +Timo Teras <timo.teras@iki.fi> diff --git a/man/opennhrp.conf.5 b/man/opennhrp.conf.5 new file mode 100644 index 0000000..aacec80 --- /dev/null +++ b/man/opennhrp.conf.5 @@ -0,0 +1,227 @@ +.TH OPENNHRP.CONF 5 "27 Oct 2010" "" "OpenNHRP Documentation" + +.SH NAME +opennhrp.conf \- NHRP daemon configuration file + +.SH DESCRIPTION +The +.I opennhrp.conf +file contains information for the +.BR opennhrp . +.PP +This configuration file is a free-form ASCII text file. It is parsed by the +word-by-word parser built into +.BR opennhrp . +The file may contain extra whitespace, tabs and newline for formatting +purposes. Keywords and contents are case-sensitive. Comments can be marked +with a hash sign +.RB ( # ) +and everything following it until newline is ignored. + +.SH "DIRECTIVES" +Directives are keywords that can appear in any context of the configuration +file and they select a new context. + +.PP +.BI "interface " interface-name +.RS +Marks the start of configuration for network interface +.IR interface-name . +Even if no interface specific configuration is required, the +.B interface +directive must be present to enable NHRP on that interface. +.RE + +.SH "INTERFACE CONTEXT" +These configuration keywords can appear only in the interface context. + +.PP +.BI "map " protocol-address[/prefix] " " nbma-address " [register] [cisco]" +.RS +Creates static peer mapping of +.I protocol-address +to +.IR nbma-address . +.PP +If the +.I prefix +parameter is present, it directs +.B opennhrp +to use this peer as a next hop server when sending Resolution Requests +matching this subnet. +.PP +The optional parameter +.I register +specifies that Registration Request should be sent to this peer on +startup. +.PP +If the statically mapped peer is running Cisco IOS, specify the +.B cisco +keyword. It is used to fix statically the Registration Request ID +so that a matching Purge Request can be sent if NBMA address has changed. +This is to work around broken IOS which requires Purge Request ID to +match the original Registration Request ID. +.RE + +.BI "dynamic-map " protocol-address/prefix " " nbma-domain-name +.RS +Specifies that the NBMA addresses of the next hop servers are defined in the +domain name +.IR nbma-domain-name . +For each A record opennhrp creates a dynamic NHS entry. + +Each dynamic NHS will get a peer entry with the configured network address +and the discovered NBMA address. + +The first registration request is sent to the protocol broadcast address, +and the server's real protocol address is dynamically detected from the first +registration reply (requires opennhrp 0.11 or newer). + +Alternatively, if +.BR peer-up +script hook can determine the protocol address from the NBMA address (e.g. +by doing an additional DNS lookup or by parsing the IPsec certificate) it can +inform this mapping via +.BR opennhrpctl "(8) " "update nbma " command. +.RE + +.PP +.BI "shortcut-target " protocol-address/prefix " [holding-time " holdtime "]" +.RS +Defines an off-NBMA network prefix for which the GRE interface will act +as a gateway. This an alternative to defining local interfaces with +shortcut-destination flag. +.RE + +.BR multicast " " dynamic "|" nhs +.br +.BI "multicast " protocol-address +.RS +Determines how opennhrp daemon should soft switch the multicast traffic. +Currently, multicast traffic is captured by opennhrp daemon using a packet +socket, and resent back to proper destinations. This means that multicast +packet sending is CPU intensive. + +Specfying +.B nhs +makes all multicast packets to be repeated to each statically configured +next hop. +.B dynamic +instructs to forward to all peers which we have a direct connection with. +Alternatively, you can specify the directive multiple times for each +.I protocol-address +the multicast traffic should be sent to. + +.B "WARNING:" +It is very easy to misconfigure multicast repeating if you have multiple +NHS:es. +.RE + +.BI "holding-time " holdtime +.RS +Specifies the holding time for NHRP Registration Requests and +Resolution Replies sent from this interface or shortcut-target. +The +.I holdtime +is specified in seconds and defaults to two hours. +.RE + +.BI "route-table " routetable +.RS +Specifies the kernel routing table to be monitored for outgoing routes +to this interface. This is required to do routing lookups excluding +active shortcut routes (for existing shortcut route renewal). The +default is main table. + +If you use +.B table +directive in +.B zebra.conf +to put Quagga routes in alternate table, this should match with it. +.RE + +.BI "cisco-authentication " secret +.RS +Enables Cisco style authentication on NHRP packets. This embeds the +.I secret +plaintext password to the outgoing NHRP packets. Incoming NHRP packets +on this interface are discarded unless the +.I secret +password is present. Maximum length of the +.I secret +is 8 characters. +.RE + +.B redirect +.RS +Enable sending of Cisco style NHRP Traffic Indication packets. If +this is enabled and +.B opennhrp +detects a forwarded packet, it will send a message to the original sender +of the packet instructing it to create a direct connection with the +destination. This is basically a protocol independent equivalent of ICMP +redirect. +.RE + +.B shortcut +.RS +Enable creation of shortcut routes. A received NHRP Traffic Indication +will trigger the resolution and establishment of a shortcut route. +.PP +.B IMPORTANT: +You still need to run some routing protocol or have static routes +to some hub node in your NBMA network. NHRP does not advertise routes; +it can create shortcut route only for an already routable subnet. +.RE + +.B non-caching +.RS +Disables caching of peer information from forwarded NHRP Resolution +Reply packets. This can be used to reduce memory consumption on big +NBMA subnets. +.PP +NOTE: currently does not do much as caching is not implemented. +.RE + +.B shortcut-destination +.RS +This instructs +.B opennhrp +to reply with authorative answers on NHRP Resolution Requests destinied +to addresses in this interface (instead of forwarding the packets). This +effectively allows the creation of shortcut routes to subnets located +on the interface. +.PP +When specified, this should be the only keyword for the interface. +.RE + +.SH EXAMPLE +The following configuration file was used for testing OpenNHRP on a machine +with two ethernet network interfaces. GRE tunnel was configured with tunnel +IP 10.255.255.2/24. Configuration enables registration to hub node at +10.255.255.1 and resolution of other nodes in the subnet using that hub. +.PP +It also enables creation of shortcut routes to networks behind other +hosts (with holding-time override for the defined shortcut-target) +in our NBMA network and allows incoming shortcut routes. +.PP +.nf +interface gre1 + holding-time 3600 + map 10.255.255.1/24 192.168.200.1 register + shortcut-target 172.16.0.0/16 holding-time 1800 + cisco-authentication secret + shortcut + redirect + non-caching + +interface eth1 + shortcut-destination + +.fi + +.SH "SEE ALSO" +.BR opennhrp (8) + +.SH AUTHORS +Timo Teras <timo.teras@iki.fi> diff --git a/man/opennhrpctl.8 b/man/opennhrpctl.8 new file mode 100644 index 0000000..611c6f7 --- /dev/null +++ b/man/opennhrpctl.8 @@ -0,0 +1,124 @@ +.TH OPENNHRP 8 "20 May 2009" "" "OpenNHRP Documentation" + +.SH NAME +opennhrpctl \- opennhrp administrative control tool + +.SH SYNOPSIS +.B opennhrpctl +.BI "[\-a " admin\-socket "]" " command " "[" "arguments" "]..." + +.SH DESCRIPTION +.B opennhrpctl +is an utility to control +.BR opennhrp (8) +daemon operation. A UNIX socket is used for communication between +.B opennhrpctl +and +.BR opennhrp (8). +Administration priviledges for a non-root user can be granted by modifying +the permissions and ownership of the socket. + +The following commands are available: + +.BI "[cache] show [" selector "]..." +.RS +Show contents of next hop cache (configured and resolved entries). +.RE + +.BI "[cache] flush [" selector "]..." +.RS +Clear all non-permanent entries which match the selector specifiers. +.RE + +.BI "[cache] purge [" selector "]..." +.RS +Purge entries from NHRP cache: cached entries are removed and permanent +entries are forced down, up and finally reregistered. +.RE + +.BI "[cache] lowerdown [" selector "]..." +.RS +Purge entries from NHRP cache with indication that lower layer failed: +e.g. IPsec daemon detected dead-peer or received INITIIAL-CONTACT +notification. +.RE + +.BI "route show [" selector "]..." +.RS +Show the contents of locally cached kernel routing information +(outbound routing base to do route lookups excluding active shortcut +routes). +.RE + +.B "interface show" +.RS +Show the contents of interface configuration table, and the cached information +from kernel (like protocol and NBMA IP addresses in use currently). +.RE + +.BI "redirect purge [" protocol-address "/" prefix-length "]" +.RS +Clear redirection cache from all entries matching the specified address. +.RE + +.BI "update nbma " nbma-address " " protocol-address +.RS +This command can be used from +.BR opennhrp-script "(8)" +to inform +.BR opennhrp +daemon of the real +.IR protocol-address +of dynamically discovered NHS. +.RE + +The following selectors can be used to limit which cache entries will +be effected: + +.BI nbma " nbma-address" +.RS +Matches entries where the remote has NBMA address +.IR nbma-address . +.RE + +.BI protocol " protocol-address" "[/" "prefix-length" "]" +.RS +Matches entries where the remote has protocol address +.IR protocol-address " with at least prefix length " prefix-length . +.RE + +.BI local-nbma " nbma-address" +.RS +Matches entries from local interface which owns the NBMA address +.IR nbma-address . +.RE + +.BI local-protocol " protocol-address" +.RS +Matches entries only from local interface which owns the protocol address +.IR protocol-address . +.RE + +.BI interface " interface-name" +.br +.BI iface " interface-name" +.br +.BI dev " interface-name" +.RS +Search entries only from local interface with name +.IR interface-name . +.RE + +.RE + +.SH FILES +.I /var/run/opennhrp.socket +.RS +.BR opennhrp "(8) control socket" +.RE + +.SH "SEE ALSO" +.BR opennhrp (8) + +.SH AUTHORS +Timo Teras <timo.teras@iki.fi> diff --git a/nhrp/Makefile b/nhrp/Makefile new file mode 100644 index 0000000..7c2560e --- /dev/null +++ b/nhrp/Makefile @@ -0,0 +1,27 @@ +progs-y += opennhrp +opennhrp-objs += libev.o opennhrp.o nhrp_address.o nhrp_packet.o \ + nhrp_peer.o nhrp_server.o nhrp_interface.o admin.o \ + sysdep_netlink.o sysdep_pfpacket.o \ + sysdep_syslog.o + +CFLAGS_libev.o += -Wno-unused -Wno-comment -Wno-parentheses +CFLAGS_opennhrp.o += -DOPENNHRP_VERSION=\"$(FULL_VERSION)\" \ + -DOPENNHRP_ADMIN_SOCKET=\"$(STATEDIR)/opennhrp.socket\" +LIBS_opennhrp += -lm +ifeq ($(shell pkg-config --exists libcares && echo "yes"),yes) +CFLAGS +=$(shell pkg-config --cflags libcares) +LIBS_opennhrp +=$(shell pkg-config --libs libcares) +else +LIBS_opennhrp += -lcares +endif + +progs-y += opennhrpctl +opennhrpctl-objs += opennhrpctl.o +CFLAGS_opennhrpctl.o += $(CFLAGS_opennhrp.o) + +CFLAGS_EXTRA += -I$(srctree)/include -Wno-strict-aliasing + +install: + $(INSTALLDIR) $(DESTDIR)$(SBINDIR) + $(INSTALL) $(addprefix $(obj)/,$(progs-y)) $(DESTDIR)$(SBINDIR) + diff --git a/nhrp/admin.c b/nhrp/admin.c new file mode 100644 index 0000000..68a3e9e --- /dev/null +++ b/nhrp/admin.c @@ -0,0 +1,609 @@ +/* admin.c - OpenNHRP administrative interface implementation + * + * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include <ctype.h> +#include <errno.h> +#include <stdio.h> +#include <fcntl.h> +#include <malloc.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> +#include <sys/un.h> +#include <sys/socket.h> + +#include "nhrp_common.h" +#include "nhrp_peer.h" +#include "nhrp_address.h" +#include "nhrp_interface.h" + +static struct ev_io accept_io; + +struct admin_remote { + struct ev_timer timeout; + struct ev_io io; + int num_read; + char cmd[512]; +}; + +static int parse_word(const char **bufptr, size_t len, char *word) +{ + const char *buf = *bufptr; + int i, pos = 0; + + while (isspace(buf[pos]) && buf[pos] != '\n' && buf[pos]) + pos++; + + if (buf[pos] == '\n' || buf[pos] == 0) + return FALSE; + + for (i = 0; i < len-1 && !isspace(buf[pos+i]); i++) + word[i] = buf[pos+i]; + word[i] = 0; + + *bufptr += i + pos; + return TRUE; +} + + +static void admin_write(void *ctx, const char *format, ...) +{ + struct admin_remote *rmt = (struct admin_remote *) ctx; + char msg[1024]; + va_list ap; + size_t len; + + va_start(ap, format); + len = vsnprintf(msg, sizeof(msg), format, ap); + va_end(ap); + + if (write(rmt->io.fd, msg, len) != len) { + } +} + +static void admin_free_remote(struct admin_remote *rm) +{ + int fd = rm->io.fd; + + ev_io_stop(&rm->io); + ev_timer_stop(&rm->timeout); + shutdown(fd, SHUT_RDWR); + close(fd); + free(rm); +} + +static int admin_show_peer(void *ctx, struct nhrp_peer *peer) +{ + char buf[512], tmp[32]; + char *str; + size_t len = sizeof(buf); + int i = 0, rel; + + if (peer->interface != NULL) + i += snprintf(&buf[i], len - i, + "Interface: %s\n", + peer->interface->name); + + i += snprintf(&buf[i], len - i, + "Type: %s\n" + "Protocol-Address: %s/%d\n", + nhrp_peer_type[peer->type], + nhrp_address_format(&peer->protocol_address, sizeof(tmp), tmp), + peer->prefix_length); + + if (peer->next_hop_address.type != PF_UNSPEC) { + switch (peer->type) { + case NHRP_PEER_TYPE_SHORTCUT_ROUTE: + case NHRP_PEER_TYPE_LOCAL_ROUTE: + str = "Next-hop-Address"; + break; + case NHRP_PEER_TYPE_LOCAL_ADDR: + str = "Alias-Address"; + break; + default: + str = "NBMA-Address"; + break; + } + i += snprintf(&buf[i], len - i, "%s: %s\n", + str, + nhrp_address_format(&peer->next_hop_address, + sizeof(tmp), tmp)); + } + if (peer->nbma_hostname) { + i += snprintf(&buf[i], len - i, "Hostname: %s\n", + peer->nbma_hostname); + } + if (peer->next_hop_nat_oa.type != PF_UNSPEC) { + i += snprintf(&buf[i], len - i, "NBMA-NAT-OA-Address: %s\n", + nhrp_address_format(&peer->next_hop_nat_oa, + sizeof(tmp), tmp)); + } + if (peer->flags & (NHRP_PEER_FLAG_USED | NHRP_PEER_FLAG_UNIQUE | + NHRP_PEER_FLAG_UP | NHRP_PEER_FLAG_LOWER_UP)) { + i += snprintf(&buf[i], len - i, "Flags:"); + if (peer->flags & NHRP_PEER_FLAG_UNIQUE) + i += snprintf(&buf[i], len - i, " unique"); + + if (peer->flags & NHRP_PEER_FLAG_USED) + i += snprintf(&buf[i], len - i, " used"); + if (peer->flags & NHRP_PEER_FLAG_UP) + i += snprintf(&buf[i], len - i, " up"); + else if (peer->flags & NHRP_PEER_FLAG_LOWER_UP) + i += snprintf(&buf[i], len - i, " lower-up"); + i += snprintf(&buf[i], len - i, "\n"); + } + if (peer->expire_time) { + rel = (int) (peer->expire_time - ev_now()); + if (rel >= 0) { + i += snprintf(&buf[i], len - i, "Expires-In: %d:%02d\n", + rel / 60, rel % 60); + } + } + + admin_write(ctx, "%s\n", buf); + return 0; +} + +static void admin_free_selector(struct nhrp_peer_selector *sel) +{ + if (sel->hostname != NULL) { + free((void *) sel->hostname); + sel->hostname = NULL; + } +} + +static int admin_parse_selector(void *ctx, const char *cmd, + struct nhrp_peer_selector *sel) +{ + char keyword[64], tmp[64]; + struct nhrp_address address; + uint8_t prefix_length; + + while (parse_word(&cmd, sizeof(keyword), keyword)) { + if (!parse_word(&cmd, sizeof(tmp), tmp)) { + admin_write(ctx, + "Status: failed\n" + "Reason: missing-argument\n" + "Near-Keyword: '%s'\n", + keyword); + return FALSE; + } + + if (strcmp(keyword, "interface") == 0 || + strcmp(keyword, "iface") == 0 || + strcmp(keyword, "dev") == 0) { + if (sel->interface != NULL) + goto err_conflict; + sel->interface = nhrp_interface_get_by_name(tmp, FALSE); + if (sel->interface == NULL) + goto err_noiface; + continue; + } else if (strcmp(keyword, "host") == 0 || + strcmp(keyword, "hostname") == 0) { + if (sel->hostname != NULL) + goto err_conflict; + sel->hostname = strdup(tmp); + continue; + } + + if (!nhrp_address_parse(tmp, &address, &prefix_length)) { + admin_write(ctx, + "Status: failed\n" + "Reason: invalid-address\n" + "Near-Keyword: '%s'\n", + keyword); + return FALSE; + } + + if (strcmp(keyword, "protocol") == 0) { + if (sel->protocol_address.type != AF_UNSPEC) + goto err_conflict; + sel->protocol_address = address; + sel->prefix_length = prefix_length; + } else if (strcmp(keyword, "nbma") == 0) { + if (sel->next_hop_address.type != AF_UNSPEC) + goto err_conflict; + sel->type_mask &= ~BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE); + sel->next_hop_address = address; + } else if (strcmp(keyword, "local-protocol") == 0) { + if (sel->interface != NULL) + goto err_conflict; + sel->interface = nhrp_interface_get_by_protocol(&address); + if (sel->interface == NULL) + goto err_noiface; + } else if (strcmp(keyword, "local-nbma") == 0) { + if (sel->interface != NULL) + goto err_conflict; + sel->interface = nhrp_interface_get_by_nbma(&address); + if (sel->interface == NULL) + goto err_noiface; + } else { + admin_write(ctx, + "Status: failed\n" + "Reason: syntax-error\n" + "Near-Keyword: '%s'\n", + keyword); + return FALSE; + } + } + return TRUE; + +err_conflict: + admin_write(ctx, + "Status: failed\n" + "Reason: conflicting-keyword\n" + "Near-Keyword: '%s'\n", + keyword); + goto err; +err_noiface: + admin_write(ctx, + "Status: failed\n" + "Reason: interface-not-found\n" + "Near-Keyword: '%s'\n" + "Argument: '%s'\n", + keyword, tmp); +err: + admin_free_selector(sel); + return FALSE; +} + +static void admin_route_show(void *ctx, const char *cmd) +{ + struct nhrp_peer_selector sel; + + memset(&sel, 0, sizeof(sel)); + sel.type_mask = BIT(NHRP_PEER_TYPE_LOCAL_ROUTE); + if (!admin_parse_selector(ctx, cmd, &sel)) + return; + + admin_write(ctx, "Status: ok\n\n"); + nhrp_peer_foreach(admin_show_peer, ctx, &sel); + admin_free_selector(&sel); +} + +static void admin_cache_show(void *ctx, const char *cmd) +{ + struct nhrp_peer_selector sel; + + memset(&sel, 0, sizeof(sel)); + sel.type_mask = NHRP_PEER_TYPEMASK_ALL & + ~BIT(NHRP_PEER_TYPE_LOCAL_ROUTE); + if (!admin_parse_selector(ctx, cmd, &sel)) + return; + + admin_write(ctx, "Status: ok\n\n"); + nhrp_peer_foreach(admin_show_peer, ctx, &sel); + admin_free_selector(&sel); +} + +static void admin_cache_purge(void *ctx, const char *cmd) +{ + struct nhrp_peer_selector sel; + int count = 0; + + memset(&sel, 0, sizeof(sel)); + sel.type_mask = NHRP_PEER_TYPEMASK_PURGEABLE; + if (!admin_parse_selector(ctx, cmd, &sel)) + return; + + nhrp_peer_foreach(nhrp_peer_purge_matching, &count, &sel); + admin_free_selector(&sel); + + admin_write(ctx, + "Status: ok\n" + "Entries-Affected: %d\n", + count); +} + +static void admin_cache_lower_down(void *ctx, const char *cmd) +{ + struct nhrp_peer_selector sel; + int count = 0; + + memset(&sel, 0, sizeof(sel)); + sel.type_mask = NHRP_PEER_TYPEMASK_PURGEABLE; + if (!admin_parse_selector(ctx, cmd, &sel)) + return; + + nhrp_peer_foreach(nhrp_peer_lowerdown_matching, &count, &sel); + admin_free_selector(&sel); + + admin_write(ctx, + "Status: ok\n" + "Entries-Affected: %d\n", + count); +} + +static void admin_cache_flush(void *ctx, const char *cmd) +{ + struct nhrp_peer_selector sel; + int count = 0; + + memset(&sel, 0, sizeof(sel)); + sel.type_mask = NHRP_PEER_TYPEMASK_REMOVABLE; + if (!admin_parse_selector(ctx, cmd, &sel)) + return; + + nhrp_peer_foreach(nhrp_peer_remove_matching, &count, &sel); + admin_free_selector(&sel); + + admin_write(ctx, + "Status: ok\n" + "Entries-Affected: %d\n", + count); +} + +static int admin_show_interface(void *ctx, struct nhrp_interface *iface) +{ + char buf[512], tmp[32]; + size_t len = sizeof(buf); + int i = 0; + + i += snprintf(&buf[i], len - i, + "Interface: %s\n" + "Index: %d\n", + iface->name, + iface->index); + + if (iface->protocol_address.addr_len != 0) { + i += snprintf(&buf[i], len - i, + "Protocol-Address: %s/%d\n", + nhrp_address_format(&iface->protocol_address, sizeof(tmp), tmp), + iface->protocol_address_prefix); + } + + if (iface->flags) { + i += snprintf(&buf[i], len - i, + "Flags:%s%s%s%s%s\n", + (iface->flags & NHRP_INTERFACE_FLAG_NON_CACHING) ? " non-caching" : "", + (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT) ? " shortcut" : "", + (iface->flags & NHRP_INTERFACE_FLAG_REDIRECT) ? " redirect" : "", + (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST) ? " shortcut-dest" : "", + (iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED) ? " configured" : ""); + } + + if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED)) + goto done; + + i += snprintf(&buf[i], len - i, + "Holding-Time: %u\n" + "Route-Table: %u\n" + "GRE-Key: %u\n" + "MTU: %u\n", + iface->holding_time, + iface->route_table, + iface->gre_key, + iface->mtu); + + if (iface->link_index) { + struct nhrp_interface *link; + + i += snprintf(&buf[i], len - i, "Link-Index: %d\n", iface->link_index); + link = nhrp_interface_get_by_index(iface->link_index, FALSE); + if (link != NULL) + i += snprintf(&buf[i], len - i, "Link-Name: %s\n", link->name); + } + + if (iface->nbma_address.addr_len != 0) { + i += snprintf(&buf[i], len - i, + "NBMA-MTU: %u\n" + "NBMA-Address: %s\n", + iface->nbma_mtu, + nhrp_address_format(&iface->nbma_address, sizeof(tmp), tmp)); + } + if (iface->nat_cie.nbma_address.addr_len != 0) { + i += snprintf(&buf[i], len - i, + "NBMA-NAT-OA: %s\n", + nhrp_address_format(&iface->nat_cie.nbma_address, sizeof(tmp), tmp)); + } +done: + admin_write(ctx, "%s\n", buf); + return 0; +} + +static void admin_interface_show(void *ctx, const char *cmd) +{ + admin_write(ctx, "Status: ok\n\n"); + nhrp_interface_foreach(admin_show_interface, ctx); +} + +static void admin_redirect_purge(void *ctx, const char *cmd) +{ + char keyword[64]; + struct nhrp_address addr; + uint8_t prefix; + int count; + + nhrp_address_set_type(&addr, PF_UNSPEC); + + if (parse_word(&cmd, sizeof(keyword), keyword)) { + if (!nhrp_address_parse(keyword, &addr, &prefix)) { + admin_write(ctx, + "Status: failed\n" + "Reason: invalid-address\n" + "Near-Keyword: '%s'\n", + keyword); + return; + } + } + + count = nhrp_rate_limit_clear(&addr, prefix); + admin_write(ctx, + "Status: ok\n" + "Entries-Affected: %d\n", + count); +} + +struct update_nbma { + struct nhrp_address addr; + int count; +}; + +static int update_nbma(void *ctx, struct nhrp_peer *p) +{ + struct update_nbma *un = (struct update_nbma *) ctx; + + nhrp_peer_discover_nhs(p, &un->addr); + un->count++; + + return 0; +} + +static void admin_update_nbma(void *ctx, const char *cmd) +{ + char keyword[64]; + struct nhrp_peer_selector sel; + struct update_nbma un; + + memset(&sel, 0, sizeof(sel)); + sel.type_mask = BIT(NHRP_PEER_TYPE_DYNAMIC_NHS); + + if (!parse_word(&cmd, sizeof(keyword), keyword)) + goto err; + if (!nhrp_address_parse(keyword, &sel.next_hop_address, NULL)) + goto err; + if (!parse_word(&cmd, sizeof(keyword), keyword)) + goto err; + if (!nhrp_address_parse(keyword, &un.addr, NULL)) + goto err; + + un.count = 0; + nhrp_peer_foreach(update_nbma, &un, &sel); + + admin_write(ctx, + "Status: ok\n" + "Entries-Affected: %d\n", + un.count); + return; +err: + admin_write(ctx, + "Status: failed\n" + "Reason: syntax-error\n" + "Near-Keyword: '%s'\n", + keyword); + return; +} + +static struct { + const char *command; + void (*handler)(void *ctx, const char *cmd); +} admin_handler[] = { + { "route show", admin_route_show }, + { "show", admin_cache_show }, + { "cache show", admin_cache_show }, + { "flush", admin_cache_flush }, + { "cache flush", admin_cache_flush }, + { "purge", admin_cache_purge }, + { "cache purge", admin_cache_purge }, + { "cache lowerdown", admin_cache_lower_down }, + { "interface show", admin_interface_show }, + { "redirect purge", admin_redirect_purge }, + { "update nbma", admin_update_nbma }, +}; + +static void admin_receive_cb(struct ev_io *w, int revents) +{ + struct admin_remote *rm = container_of(w, struct admin_remote, io); + int fd = rm->io.fd; + ssize_t len; + int i, cmdlen; + + len = recv(fd, rm->cmd, sizeof(rm->cmd) - rm->num_read, MSG_DONTWAIT); + if (len < 0 && errno == EAGAIN) + return; + if (len <= 0) + goto err; + + rm->num_read += len; + if (rm->num_read >= sizeof(rm->cmd)) + goto err; + + if (rm->cmd[rm->num_read-1] != '\n') + return; + rm->cmd[--rm->num_read] = 0; + + for (i = 0; i < ARRAY_SIZE(admin_handler); i++) { + cmdlen = strlen(admin_handler[i].command); + if (rm->num_read >= cmdlen && + strncasecmp(rm->cmd, admin_handler[i].command, cmdlen) == 0) { + nhrp_debug("Admin: %s", rm->cmd); + admin_handler[i].handler(rm, &rm->cmd[cmdlen]); + break; + } + } + if (i >= ARRAY_SIZE(admin_handler)) { + admin_write(rm, + "Status: error\n" + "Reason: unrecognized command\n"); + } + +err: + admin_free_remote(rm); +} + +static void admin_timeout_cb(struct ev_timer *t, int revents) +{ + admin_free_remote(container_of(t, struct admin_remote, timeout)); +} + +static void admin_accept_cb(ev_io *w, int revents) +{ + struct admin_remote *rm; + struct sockaddr_storage from; + socklen_t fromlen = sizeof(from); + int cnx; + + cnx = accept(w->fd, (struct sockaddr *) &from, &fromlen); + if (cnx < 0) + return; + fcntl(cnx, F_SETFD, FD_CLOEXEC); + + rm = calloc(1, sizeof(struct admin_remote)); + + ev_io_init(&rm->io, admin_receive_cb, cnx, EV_READ); + ev_io_start(&rm->io); + ev_timer_init(&rm->timeout, admin_timeout_cb, 10.0, 0.); + ev_timer_start(&rm->timeout); +} + +int admin_init(const char *opennhrp_socket) +{ + struct sockaddr_un sun; + int fd; + + memset(&sun, 0, sizeof(sun)); + sun.sun_family = AF_UNIX; + strncpy(sun.sun_path, opennhrp_socket, sizeof(sun.sun_path)); + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) + return 0; + + fcntl(fd, F_SETFD, FD_CLOEXEC); + unlink(opennhrp_socket); + if (bind(fd, (struct sockaddr *) &sun, sizeof(sun)) != 0) + goto err_close; + + if (listen(fd, 5) != 0) + goto err_close; + + ev_io_init(&accept_io, admin_accept_cb, fd, EV_READ); + ev_io_start(&accept_io); + + return 1; + +err_close: + nhrp_error("Failed initialize admin socket [%s]: %s", + opennhrp_socket, strerror(errno)); + close(fd); + return 0; +} diff --git a/nhrp/afnum.h b/nhrp/afnum.h new file mode 100644 index 0000000..2dc3d68 --- /dev/null +++ b/nhrp/afnum.h @@ -0,0 +1,29 @@ +/* afnum.h - RFC 1700 Address Family Number and + * ethernet protocol number definitions + * + * Copyright (C) 2007 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#ifndef AFNUM_H +#define AFNUM_H + +#include <linux/if_ether.h> +#include "nhrp_defines.h" + +#define AFNUM_RESERVED constant_htons(0) +#define AFNUM_INET constant_htons(1) +#define AFNUM_INET6 constant_htons(2) + +#define ETH_P_NHRP 0x2001 + +#define ETHPROTO_IP constant_htons(ETH_P_IP) +#define ETHPROTO_NHRP constant_htons(ETH_P_NHRP) + +#endif diff --git a/nhrp/libev.c b/nhrp/libev.c new file mode 100644 index 0000000..c4af3b9 --- /dev/null +++ b/nhrp/libev.c @@ -0,0 +1,3 @@ +#include <string.h> +#include "libev.h" +#include "../libev/ev.c" diff --git a/nhrp/libev.h b/nhrp/libev.h new file mode 100644 index 0000000..f9f5f23 --- /dev/null +++ b/nhrp/libev.h @@ -0,0 +1,22 @@ +#define EV_STANDALONE 1 +#define EV_MULTIPLICITY 0 +#define EV_VERIFY 0 + +#define EV_USE_CLOCK_SYSCALL 1 +#define EV_USE_SELECT 0 +#define EV_USE_POLL 1 + +#define EV_IDLE_ENABLE 1 + +/* Unused stuff, disabled for size optimization */ +#define EV_USE_INOTIFY 0 +#define EV_PERIODIC_ENABLE 0 +#define EV_EMBED_ENABLE 0 +#define EV_STAT_ENABLE 0 +#define EV_FORK_ENABLE 0 +#define EV_ASYNC_ENABLE 0 + +/* Disable the "void *data;" member of watchers to save memory */ +#define EV_COMMON /* empty */ + +#include "../libev/ev.h" diff --git a/nhrp/list.h b/nhrp/list.h new file mode 100644 index 0000000..4387970 --- /dev/null +++ b/nhrp/list.h @@ -0,0 +1,184 @@ +/* list.h - Single and double linked list macros + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + * + * This is more or less based on the code in the linux kernel. There are + * minor differences and this is only a subset of the kernel version. + */ + +#ifndef LIST_H +#define LIST_H + +#ifndef NULL +#define NULL 0L +#endif + +#ifndef container_of +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) +#endif + +struct hlist_head { + struct hlist_node *first; +}; + +struct hlist_node { + struct hlist_node *next; + struct hlist_node **pprev; +}; + +static inline int hlist_empty(const struct hlist_head *h) +{ + return !h->first; +} + +static inline int hlist_hashed(const struct hlist_node *n) +{ + return n->pprev != NULL; +} + +static inline void hlist_del(struct hlist_node *n) +{ + struct hlist_node *next = n->next; + struct hlist_node **pprev = n->pprev; + + *pprev = next; + if (next) + next->pprev = pprev; + + n->next = NULL; + n->pprev = NULL; +} + +static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) +{ + struct hlist_node *first = h->first; + + n->next = first; + if (first) + first->pprev = &n->next; + n->pprev = &h->first; + h->first = n; +} + +static inline void hlist_add_after(struct hlist_node *n, struct hlist_node *prev) +{ + n->next = prev->next; + n->pprev = &prev->next; + prev->next = n; +} + +static inline struct hlist_node **hlist_tail_ptr(struct hlist_head *h) +{ + struct hlist_node *n = h->first; + if (n == NULL) + return &h->first; + while (n->next != NULL) + n = n->next; + return &n->next; +} + +#define hlist_entry(ptr, type, member) container_of(ptr,type,member) + +#define hlist_for_each(pos, head) \ + for (pos = (head)->first; pos; pos = pos->next) + +#define hlist_for_each_safe(pos, n, head) \ + for (pos = (head)->first; pos && ({ n = pos->next; 1; }); pos = n) + +#define hlist_for_each_entry(tpos, pos, head, member) \ + for (pos = (head)->first; pos && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ + for (pos = (head)->first; \ + pos && ({ n = pos->next; 1; }) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = n) + + +struct list_head { + struct list_head *next, *prev; +}; + +#define LIST_INITIALIZER(l) { .next = &l, .prev = &l } + +static inline void list_init(struct list_head *list) +{ + list->next = list; + list->prev = list; +} + +static inline void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +static inline void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +static inline void list_add_tail(struct list_head *new, struct list_head *head) +{ + __list_add(new, head->prev, head); +} + +static inline void __list_del(struct list_head * prev, struct list_head * next) +{ + next->prev = prev; + prev->next = next; +} + +static inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->next = NULL; + entry->prev = NULL; +} + +static inline int list_hashed(const struct list_head *n) +{ + return n->next != n && n->next != NULL; +} + +static inline int list_empty(const struct list_head *n) +{ + return !list_hashed(n); +} + +#define list_next(ptr, type, member) \ + (list_hashed(ptr) ? container_of((ptr)->next,type,member) : NULL) + +#define list_entry(ptr, type, member) container_of(ptr,type,member) + +#define list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); pos = pos->next) + +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +#define list_for_each_entry(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + +#define list_for_each_entry_safe(pos, n, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) + +#endif diff --git a/nhrp/nhrp_address.c b/nhrp/nhrp_address.c new file mode 100644 index 0000000..13164e1 --- /dev/null +++ b/nhrp/nhrp_address.c @@ -0,0 +1,454 @@ +/* nhrp_address.c - NHRP address conversion functions + * + * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include <stdio.h> +#include <string.h> + +#include <netdb.h> +#include <arpa/inet.h> +#include <arpa/nameser.h> +#include <linux/ip.h> + +#include <ares.h> +#include <ares_version.h> + +#include "afnum.h" +#include "nhrp_address.h" +#include "nhrp_packet.h" +#include "nhrp_common.h" + +struct nhrp_resolver { + ares_channel channel; + struct ev_prepare prepare; + struct ev_timer timeout; + struct ev_io fds[4]; +}; + +static struct nhrp_resolver resolver; + +static void ares_timeout_cb(struct ev_timer *w, int revents) +{ + struct nhrp_resolver *r = + container_of(w, struct nhrp_resolver, timeout); + + ares_process(r->channel, NULL, NULL); +} + +static void ares_prepare_cb(struct ev_prepare *w, int revents) +{ + struct nhrp_resolver *r = + container_of(w, struct nhrp_resolver, prepare); + struct timeval *tv, tvbuf; + + tv = ares_timeout(r->channel, NULL, &tvbuf); + if (tv != NULL) { + r->timeout.repeat = tv->tv_sec + tv->tv_usec * 1e-6; + ev_timer_again(&r->timeout); + } else { + ev_timer_stop(&r->timeout); + } +} + +static void ares_io_cb(struct ev_io *w, int revents) +{ + ares_socket_t rfd = ARES_SOCKET_BAD, wfd = ARES_SOCKET_BAD; + + if (revents & EV_READ) + rfd = w->fd; + if (revents & EV_WRITE) + wfd = w->fd; + + ares_process_fd(resolver.channel, rfd, wfd); +} + +static void ares_socket_cb(void *data, ares_socket_t fd, + int readable, int writable) +{ + struct nhrp_resolver *r = (struct nhrp_resolver *) data; + int i, fi = -1, events = 0; + + if (readable) + events |= EV_READ; + if (writable) + events |= EV_WRITE; + + for (i = 0; i < ARRAY_SIZE(r->fds); i++) { + if (r->fds[i].fd == fd) + break; + if (fi < 0 && r->fds[i].fd == 0) + fi = i; + } + + if (events) { + if (i >= ARRAY_SIZE(r->fds)) { + NHRP_BUG_ON(fi == -1); + i = fi; + } else { + ev_io_stop(&r->fds[fi]); + } + ev_io_set(&r->fds[i], fd, events); + ev_io_start(&r->fds[i]); + } else if (i < ARRAY_SIZE(r->fds)) { + ev_io_stop(&r->fds[i]); + ev_io_set(&r->fds[i], 0, 0); + } +} + +static int bitcmp(const uint8_t *a, const uint8_t *b, int len) +{ + int bytes, bits, mask, r; + + bytes = len / 8; + bits = len % 8; + + if (bytes != 0) { + r = memcmp(a, b, bytes); + if (r != 0) + return r; + } + if (bits != 0) { + mask = (0xff << (8 - bits)) & 0xff; + return ((int) (a[bytes] & mask)) - ((int) (b[bytes] & mask)); + } + return 0; +} + +uint16_t nhrp_protocol_from_pf(uint16_t pf) +{ + switch (pf) { + case PF_INET: + return ETHPROTO_IP; + } + return 0; +} + +uint16_t nhrp_pf_from_protocol(uint16_t protocol) +{ + switch (protocol) { + case ETHPROTO_IP: + return PF_INET; + } + return PF_UNSPEC; +} + +uint16_t nhrp_afnum_from_pf(uint16_t pf) +{ + switch (pf) { + case PF_INET: + return AFNUM_INET; + } + return AFNUM_RESERVED; +} + +uint16_t nhrp_pf_from_afnum(uint16_t afnum) +{ + switch (afnum) { + case AFNUM_INET: + return PF_INET; + } + return PF_UNSPEC; +} + +int nhrp_address_parse(const char *string, + struct nhrp_address *addr, + uint8_t *prefix_len) +{ + uint8_t tmp; + int r; + + /* Try IP address format */ + r = sscanf(string, "%hhd.%hhd.%hhd.%hhd/%hhd", + &addr->addr[0], &addr->addr[1], + &addr->addr[2], &addr->addr[3], + prefix_len ? prefix_len : &tmp); + if ((r == 4) || (r == 5 && prefix_len != NULL)) { + addr->type = PF_INET; + addr->addr_len = 4; + addr->subaddr_len = 0; + if (r == 4 && prefix_len != NULL) + *prefix_len = 32; + return TRUE; + } + + return FALSE; +} + +int nhrp_address_parse_packet(uint16_t protocol, size_t len, uint8_t *packet, + struct nhrp_address *src, struct nhrp_address *dst) +{ + int pf; + struct iphdr *iph; + + pf = nhrp_pf_from_protocol(protocol); + switch (protocol) { + case ETHPROTO_IP: + if (len < sizeof(struct iphdr)) + return FALSE; + + iph = (struct iphdr *) packet; + if (src != NULL) + nhrp_address_set(src, pf, 4, (uint8_t *) &iph->saddr); + if (dst != NULL) + nhrp_address_set(dst, pf, 4, (uint8_t *) &iph->daddr); + break; + default: + return FALSE; + } + + return TRUE; +} + +#if ARES_VERSION_MAJOR > 1 || ARES_VERSION_MINOR > 4 +static void ares_address_cb(void *arg, int status, int timeouts, + struct hostent *he) +#else +static void ares_address_cb(void *arg, int status, struct hostent *he) +#endif +{ + struct nhrp_address_query *query = + (struct nhrp_address_query *) arg; + struct nhrp_address addr[16]; + int i; + + if (status == ARES_SUCCESS) { + for (i = 0; he->h_addr_list[i] != NULL && + i < ARRAY_SIZE(addr); i++) + nhrp_address_set(&addr[i], AF_INET, he->h_length, + (uint8_t *) he->h_addr_list[i]); + } else + i = -1; + + NHRP_BUG_ON(query->callback == NULL); + + query->callback(query, i, &addr[0]); + query->callback = NULL; +} + +void nhrp_address_resolve(struct nhrp_address_query *query, + const char *hostname, + nhrp_address_query_callback callback) +{ + if (query->callback != NULL) { + nhrp_error("Trying to resolve '%s', but previous query " + "was not finished yet", hostname); + return; + } + + query->callback = callback; + ares_gethostbyname(resolver.channel, hostname, AF_INET, + ares_address_cb, query); +} + +void nhrp_address_resolve_cancel(struct nhrp_address_query *query) +{ + /* The kills all active queries; not just the one + * given as parameter. But as those will be retried later + * anyway, it is not a problem for now. */ + + if (query->callback != NULL) + ares_cancel(resolver.channel); +} + +void nhrp_address_set_type(struct nhrp_address *addr, uint16_t type) +{ + addr->type = type; + addr->addr_len = addr->subaddr_len = 0; +} + +int nhrp_address_set(struct nhrp_address *addr, uint16_t type, uint8_t len, uint8_t *bytes) +{ + if (len > NHRP_MAX_ADDRESS_LEN) + return FALSE; + + addr->type = type; + addr->addr_len = len; + addr->subaddr_len = 0; + if (len != 0) + memcpy(addr->addr, bytes, len); + return TRUE; +} + +int nhrp_address_set_full(struct nhrp_address *addr, uint16_t type, + uint8_t len, uint8_t *bytes, + uint8_t sublen, uint8_t *subbytes) +{ + if (len + sublen > NHRP_MAX_ADDRESS_LEN) + return FALSE; + + addr->type = type; + addr->addr_len = len; + addr->subaddr_len = 0; + if (len != 0) + memcpy(addr->addr, bytes, len); + if (sublen != 0) + memcpy(&addr->addr[len], subbytes, sublen); + return TRUE; +} + +int nhrp_address_cmp(const struct nhrp_address *a, const struct nhrp_address *b) +{ + if (a->type > b->type) + return 1; + if (a->type < b->type) + return -1; + if (a->addr_len > b->addr_len || a->subaddr_len > b->subaddr_len) + return 1; + if (a->addr_len < b->addr_len || a->subaddr_len < b->subaddr_len) + return -1; + return memcmp(a->addr, b->addr, a->addr_len + a->subaddr_len); +} + +int nhrp_address_prefix_cmp(const struct nhrp_address *a, + const struct nhrp_address *b, int prefix) +{ + if (a->type > b->type) + return 1; + if (a->type < b->type) + return -1; + if (a->addr_len * 8 < prefix) + return 1; + if (b->addr_len * 8 < prefix) + return 1; + return bitcmp(a->addr, b->addr, prefix); +} + +int nhrp_address_is_multicast(const struct nhrp_address *addr) +{ + switch (addr->type) { + case PF_INET: + if ((addr->addr[0] & 0xf0) == 0xe0) + return TRUE; + break; + } + return FALSE; +} + +int nhrp_address_is_any_addr(const struct nhrp_address *addr) +{ + switch (addr->type) { + case PF_UNSPEC: + return TRUE; + case PF_INET: + if (memcmp(addr->addr, "\x00\x00\x00\x00", 4) == 0) + return TRUE; + break; + } + return FALSE; +} + +unsigned int nhrp_address_hash(const struct nhrp_address *addr) +{ + unsigned int hash = 5381; + int i; + + for (i = 0; i < addr->addr_len; i++) + hash = hash * 33 + addr->addr[i]; + + return hash; +} + +void nhrp_address_set_network(struct nhrp_address *addr, int prefix) +{ + int i, bits = 8 * addr->addr_len; + + for (i = prefix; i < bits; i++) + addr->addr[i / 8] &= ~(0x80 >> (i % 8)); +} + +void nhrp_address_set_broadcast(struct nhrp_address *addr, int prefix) +{ + int i, bits = 8 * addr->addr_len; + + for (i = prefix; i < bits; i++) + addr->addr[i / 8] |= 0x80 >> (i % 8); +} + +int nhrp_address_is_network(const struct nhrp_address *addr, int prefix) +{ + int i, bits = 8 * addr->addr_len; + + for (i = prefix; i < bits; i++) + if (addr->addr[i / 8] & (0x80 >> (i % 8))) + return FALSE; + return TRUE; +} + +const char *nhrp_address_format(const struct nhrp_address *addr, + size_t buflen, char *buffer) +{ + switch (addr->type) { + case PF_UNSPEC: + snprintf(buffer, buflen, "(unspecified)"); + break; + case PF_INET: + snprintf(buffer, buflen, "%d.%d.%d.%d", + addr->addr[0], addr->addr[1], + addr->addr[2], addr->addr[3]); + break; + default: + snprintf(buffer, buflen, "(proto 0x%04x)", + addr->type); + break; + } + + return buffer; +} + +int nhrp_address_match_cie_list(struct nhrp_address *nbma_address, + struct nhrp_address *protocol_address, + struct list_head *cie_list) +{ + struct nhrp_cie *cie; + + list_for_each_entry(cie, cie_list, cie_list_entry) { + if (nhrp_address_cmp(&cie->nbma_address, nbma_address) == 0 && + nhrp_address_cmp(&cie->protocol_address, protocol_address) == 0) + return TRUE; + } + + return FALSE; +} + +int nhrp_address_init(void) +{ + struct ares_options ares_opts; + int i; + + memset(&ares_opts, 0, sizeof(ares_opts)); + ares_opts.sock_state_cb = &ares_socket_cb; + ares_opts.sock_state_cb_data = &resolver; + ares_opts.timeout = 2; + ares_opts.tries = 3; + if (ares_init_options(&resolver.channel, &ares_opts, + ARES_OPT_SOCK_STATE_CB | ARES_OPT_TIMEOUT | + ARES_OPT_TRIES) != ARES_SUCCESS) + return FALSE; + + ev_timer_init(&resolver.timeout, ares_timeout_cb, 0.0, 0.0); + ev_prepare_init(&resolver.prepare, ares_prepare_cb); + ev_prepare_start(&resolver.prepare); + for (i = 0; i < ARRAY_SIZE(resolver.fds); i++) + ev_init(&resolver.fds[i], ares_io_cb); + + return TRUE; +} + +void nhrp_address_cleanup(void) +{ + int i; + + ev_timer_stop(&resolver.timeout); + ev_prepare_stop(&resolver.prepare); + for (i = 0; i < ARRAY_SIZE(resolver.fds); i++) + ev_io_stop(&resolver.fds[i]); + ares_destroy(resolver.channel); +} diff --git a/nhrp/nhrp_address.h b/nhrp/nhrp_address.h new file mode 100644 index 0000000..e479631 --- /dev/null +++ b/nhrp/nhrp_address.h @@ -0,0 +1,80 @@ +/* nhrp_address.h - NHRP address structures and helpers + * + * Copyright (C) 2007 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#ifndef NHRP_ADDRESS_H +#define NHRP_ADDRESS_H + +#include <stdint.h> +#include <sys/socket.h> +#include "list.h" + +#define NHRP_MAX_ADDRESS_LEN 6 + +struct nhrp_cie_list_head; +struct nhrp_address; +struct nhrp_address_query; + +typedef void (*nhrp_address_query_callback)(struct nhrp_address_query *query, + int num_addr, + struct nhrp_address *addrs); + +struct nhrp_address { + uint16_t type; + uint8_t addr_len; + uint8_t subaddr_len; + uint8_t addr[NHRP_MAX_ADDRESS_LEN]; +}; + +struct nhrp_address_query { + nhrp_address_query_callback callback; +}; + +uint16_t nhrp_protocol_from_pf(uint16_t pf); +uint16_t nhrp_pf_from_protocol(uint16_t protocol); +uint16_t nhrp_afnum_from_pf(uint16_t pf); +uint16_t nhrp_pf_from_afnum(uint16_t afnum); + +int nhrp_address_init(void); +void nhrp_address_cleanup(void); +int nhrp_address_parse_packet(uint16_t protocol, size_t len, uint8_t *packet, + struct nhrp_address *src, + struct nhrp_address *dst); +int nhrp_address_parse(const char *string, struct nhrp_address *addr, + uint8_t *prefix_len); +void nhrp_address_resolve(struct nhrp_address_query *query, + const char *hostname, + nhrp_address_query_callback callback); +void nhrp_address_resolve_cancel(struct nhrp_address_query *query); +void nhrp_address_set_type(struct nhrp_address *addr, uint16_t type); +int nhrp_address_set(struct nhrp_address *addr, uint16_t type, + uint8_t len, uint8_t *bytes); +int nhrp_address_set_full(struct nhrp_address *addr, uint16_t type, + uint8_t len, uint8_t *bytes, + uint8_t sublen, uint8_t *subbytes); +int nhrp_address_cmp(const struct nhrp_address *a, const struct nhrp_address *b); +int nhrp_address_prefix_cmp(const struct nhrp_address *a, const struct nhrp_address *b, + int prefix); +unsigned int nhrp_address_hash(const struct nhrp_address *addr); +void nhrp_address_set_network(struct nhrp_address *addr, int prefix); +void nhrp_address_set_broadcast(struct nhrp_address *addr, int prefix); +int nhrp_address_is_network(const struct nhrp_address *addr, int prefix); +int nhrp_address_is_broadcast(const struct nhrp_address *addr, int prefix); +int nhrp_address_is_multicast(const struct nhrp_address *addr); +int nhrp_address_is_any_addr(const struct nhrp_address *addr); +const char *nhrp_address_format(const struct nhrp_address *addr, + size_t buflen, char *buffer); + +int nhrp_address_match_cie_list(struct nhrp_address *nbma_address, + struct nhrp_address *protocol_address, + struct list_head *cie_list); + +#endif diff --git a/nhrp/nhrp_common.h b/nhrp/nhrp_common.h new file mode 100644 index 0000000..6730e74 --- /dev/null +++ b/nhrp/nhrp_common.h @@ -0,0 +1,78 @@ +/* nhrp_common.h - Generic helper functions + * + * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#ifndef NHRP_COMMON_H +#define NHRP_COMMON_H + +#include <stdint.h> +#include <stdlib.h> +#include <sys/time.h> +#include <linux/if_ether.h> + +struct nhrp_interface; +struct nhrp_address; + +extern const char *nhrp_config_file, *nhrp_script_file; +extern int nhrp_running, nhrp_verbose; + +/* Logging */ +void nhrp_log(int level, const char *format, ...); + +#define NHRP_LOG_DEBUG 0 +#define NHRP_LOG_INFO 1 +#define NHRP_LOG_ERROR 2 + +#define nhrp_debug(...) \ + do { \ + if (nhrp_verbose) \ + nhrp_log(NHRP_LOG_DEBUG, __VA_ARGS__); \ + } while(0) + +#define nhrp_info(...) \ + nhrp_log(NHRP_LOG_INFO, __VA_ARGS__) + +#define nhrp_error(...) \ + nhrp_log(NHRP_LOG_ERROR, __VA_ARGS__) + +void nhrp_perror(const char *message); +void nhrp_hex_dump(const char *name, const uint8_t *buf, int bytes); + +#define NHRP_BUG_ON(cond) if (cond) { \ + nhrp_error("BUG: failure at %s:%d/%s(): %s!", \ + __FILE__, __LINE__, __func__, #cond); \ + abort(); \ +} + +/* Initializers for system dependant stuff */ +int forward_init(void); +void forward_cleanup(void); +int forward_local_addresses_changed(void); + +int kernel_init(void); +void kernel_stop_listening(void); +void kernel_cleanup(void); +int kernel_route(struct nhrp_interface *out_iface, + struct nhrp_address *dest, + struct nhrp_address *default_source, + struct nhrp_address *next_hop, + u_int16_t *mtu); +int kernel_send(uint8_t *packet, size_t bytes, struct nhrp_interface *out, + struct nhrp_address *to); +int kernel_inject_neighbor(struct nhrp_address *neighbor, + struct nhrp_address *hwaddr, + struct nhrp_interface *dev); + +int log_init(void); +int admin_init(const char *socket); +void server_init(void); + +#endif diff --git a/nhrp/nhrp_defines.h b/nhrp/nhrp_defines.h new file mode 100644 index 0000000..2812a13 --- /dev/null +++ b/nhrp/nhrp_defines.h @@ -0,0 +1,87 @@ +/* nhrp_defines.h - NHRP definitions + * + * Copyright (C) 2007 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#ifndef NHRP_DEFINES_H +#define NHRP_DEFINES_H + +#include <stdint.h> +#include <byteswap.h> +#include <sys/param.h> +#include <linux/version.h> + +#ifndef NULL +#define NULL 0L +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef __bswap_constant_16 +#define __bswap_constant_16(x) \ + ((((x) >> 8) & 0xff) | (((x) & 0xff) << 8)) +#endif +#ifndef __bswap_constant_32 +#define __bswap_constant_32(x) \ + ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \ + (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24)) +#endif + +#if __BYTE_ORDER == __BIG_ENDIAN +#define constant_ntohl(x) (x) +#define constant_ntohs(x) (x) +#define constant_htonl(x) (x) +#define constant_htons(x) (x) +#else +#define constant_ntohl(x) __bswap_constant_32(x) +#define constant_ntohs(x) __bswap_constant_16(x) +#define constant_htonl(x) __bswap_constant_32(x) +#define constant_htons(x) __bswap_constant_16(x) +#endif + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) +#endif + +#ifndef offsetof +#ifdef __compiler_offsetof +#define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER) +#else +#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#endif +#endif + +#define BIT(x) (1 << (x)) + +#ifndef container_of +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) +#endif + +#if __GNUC__ >= 3 +#define NHRP_EMPTY_ARRAY +#else +#define NHRP_EMPTY_ARRAY 0 +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) +#define NHRP_NO_NBMA_GRE +#endif + +#define NHRP_DEFAULT_HOLDING_TIME (2 * 60 * 60) + +#endif diff --git a/nhrp/nhrp_interface.c b/nhrp/nhrp_interface.c new file mode 100644 index 0000000..32c2383 --- /dev/null +++ b/nhrp/nhrp_interface.c @@ -0,0 +1,188 @@ +/* nhrp_interface.c - NHRP configuration per interface + * + * Copyright (C) 2007 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include <stdio.h> +#include <stddef.h> +#include <string.h> +#include <malloc.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/socket.h> +#include <linux/rtnetlink.h> +#include "nhrp_common.h" +#include "nhrp_interface.h" +#include "nhrp_address.h" + +#define NHRP_INDEX_HASH_SIZE (1 << 6) + +static struct list_head name_list = LIST_INITIALIZER(name_list); +static struct hlist_head index_hash[NHRP_INDEX_HASH_SIZE]; + +static char *env(const char *key, const char *value) +{ + char *buf; + buf = malloc(strlen(key)+strlen(value)+2); + if (buf == NULL) + return NULL; + sprintf(buf, "%s=%s", key, value); + return buf; +} + +static char *envu32(const char *key, uint32_t value) +{ + char *buf; + buf = malloc(strlen(key)+16); + if (buf == NULL) + return NULL; + sprintf(buf, "%s=%u", key, value); + return buf; +} + +void nhrp_interface_cleanup(void) +{ + struct nhrp_interface *iface, *n; + + list_for_each_entry_safe(iface, n, &name_list, name_list_entry) { + list_del(&iface->name_list_entry); + hlist_del(&iface->index_list_entry); + free(iface); + } +} + +void nhrp_interface_hash(struct nhrp_interface *iface) +{ + int iidx = iface->index & (NHRP_INDEX_HASH_SIZE - 1); + + list_del(&iface->name_list_entry); + list_add(&iface->name_list_entry, &name_list); + + hlist_del(&iface->index_list_entry); + hlist_add_head(&iface->index_list_entry, &index_hash[iidx]); +} + +int nhrp_interface_foreach(nhrp_interface_enumerator enumerator, void *ctx) +{ + struct nhrp_interface *iface; + int rc; + + list_for_each_entry(iface, &name_list, name_list_entry) { + rc = enumerator(ctx, iface); + if (rc != 0) + return rc; + } + return 0; +} + +struct nhrp_interface *nhrp_interface_get_by_name(const char *name, int create) +{ + struct nhrp_interface *iface; + + list_for_each_entry(iface, &name_list, name_list_entry) { + if (strcmp(iface->name, name) == 0) + return iface; + } + + if (!create) + return NULL; + + iface = calloc(1, sizeof(struct nhrp_interface)); + iface->holding_time = NHRP_DEFAULT_HOLDING_TIME; + iface->route_table = RT_TABLE_MAIN; + strncpy(iface->name, name, sizeof(iface->name)); + + list_init(&iface->peer_list); + list_init(&iface->mcast_list); + list_add(&iface->name_list_entry, &name_list); + hlist_add_head(&iface->index_list_entry, &index_hash[0]); + + return iface; +} + +struct nhrp_interface *nhrp_interface_get_by_index(unsigned int index, int create) +{ + struct nhrp_interface *iface; + struct hlist_node *n; + int iidx = index & (NHRP_INDEX_HASH_SIZE - 1); + + hlist_for_each_entry(iface, n, &index_hash[iidx], index_list_entry) { + if (iface->index == index) + return iface; + } + + return NULL; +} + +struct nhrp_interface *nhrp_interface_get_by_nbma(struct nhrp_address *addr) +{ + struct nhrp_interface *iface; + + list_for_each_entry(iface, &name_list, name_list_entry) { + if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED)) + continue; + + if (nhrp_address_cmp(addr, &iface->nbma_address) == 0) + return iface; + + if (iface->nbma_address.type == PF_UNSPEC && !iface->link_index) + return iface; + } + + return NULL; +} + +struct nhrp_interface *nhrp_interface_get_by_protocol(struct nhrp_address *addr) +{ + struct nhrp_interface *iface; + + list_for_each_entry(iface, &name_list, name_list_entry) { + if (nhrp_address_cmp(addr, &iface->protocol_address) == 0) + return iface; + } + + return NULL; +} + +int nhrp_interface_run_script(struct nhrp_interface *iface, char *action) +{ + const char *argv[] = { nhrp_script_file, action, NULL }; + char *envp[6]; + pid_t pid; + int i = 0; + + pid = fork(); + if (pid == -1) + return FALSE; + if (pid > 0) + return TRUE; + + envp[i++] = "NHRP_TYPE=INTERFACE"; + envp[i++] = env("NHRP_INTERFACE", iface->name); + envp[i++] = envu32("NHRP_GRE_KEY", iface->gre_key); + envp[i++] = NULL; + + execve(nhrp_script_file, (char **) argv, envp); + exit(1); +} + +struct nhrp_peer *nhrp_interface_find_peer(struct nhrp_interface *iface, + const struct nhrp_address *nbma) +{ + unsigned int key = nhrp_address_hash(nbma) % NHRP_INTERFACE_NBMA_HASH_SIZE; + struct nhrp_peer *peer; + struct hlist_node *n; + + hlist_for_each_entry(peer, n, &iface->nbma_hash[key], nbma_hash_entry) { + if (nhrp_address_cmp(nbma, &peer->next_hop_address) == 0) + return peer; + } + return NULL; +} diff --git a/nhrp/nhrp_interface.h b/nhrp/nhrp_interface.h new file mode 100644 index 0000000..8e3e8df --- /dev/null +++ b/nhrp/nhrp_interface.h @@ -0,0 +1,78 @@ +/* nhrp_interface.h - NHRP configuration per interface definitions + * + * Copyright (C) 2007 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#ifndef NHRP_INTERFACE_H +#define NHRP_INTERFACE_H + +#include "nhrp_packet.h" +#include "nhrp_peer.h" + +#define NHRP_INTERFACE_FLAG_NON_CACHING 0x0001 /* Do not cache entries */ +#define NHRP_INTERFACE_FLAG_SHORTCUT 0x0002 /* Create shortcut routes */ +#define NHRP_INTERFACE_FLAG_REDIRECT 0x0004 /* Send redirects */ +#define NHRP_INTERFACE_FLAG_SHORTCUT_DEST 0x0008 /* Advertise routes */ +#define NHRP_INTERFACE_FLAG_CONFIGURED 0x0010 /* Found in config file */ + +#define NHRP_INTERFACE_NBMA_HASH_SIZE 256 + +struct nhrp_interface { + struct list_head name_list_entry; + struct hlist_node index_list_entry; + + /* Configured information */ + char name[16]; + unsigned int flags; + unsigned int holding_time; + struct nhrp_buffer *auth_token; + unsigned int route_table; + + /* Cached from kernel interface */ + unsigned int index, link_index; + uint32_t gre_key; + uint16_t afnum; + uint16_t mtu, nbma_mtu; + struct nhrp_address nbma_address; + struct nhrp_cie nat_cie; + + /* Actually, we should have list of protocol addresses; + * we might have multiple address and multiple protocol types */ + struct nhrp_address protocol_address; + int protocol_address_prefix; + + /* Peer cache is interface specific */ + struct list_head peer_list; + struct hlist_head nbma_hash[NHRP_INTERFACE_NBMA_HASH_SIZE]; + + /* Multicast related stuff */ + struct list_head mcast_list; + int mcast_mask; + int mcast_numaddr; + struct nhrp_address *mcast_addr; +}; + +typedef int (*nhrp_interface_enumerator)(void *ctx, struct nhrp_interface *iface); + +void nhrp_interface_cleanup(void); +void nhrp_interface_hash(struct nhrp_interface *iface); +int nhrp_interface_foreach(nhrp_interface_enumerator enumerator, void *ctx); +struct nhrp_interface *nhrp_interface_get_by_name(const char *name, int create); +struct nhrp_interface *nhrp_interface_get_by_index(unsigned int index, int create); +struct nhrp_interface *nhrp_interface_get_by_nbma(struct nhrp_address *addr); +struct nhrp_interface *nhrp_interface_get_by_protocol(struct nhrp_address *addr); +int nhrp_interface_run_script(struct nhrp_interface *iface, char *action); +struct nhrp_peer *nhrp_interface_find_peer(struct nhrp_interface *iface, const struct nhrp_address *nbma); + +void nhrp_interface_resolve_nbma(struct nhrp_interface *iface, + struct nhrp_address *nbmadest, + struct nhrp_address *nbma); + +#endif diff --git a/nhrp/nhrp_packet.c b/nhrp/nhrp_packet.c new file mode 100644 index 0000000..f46b481 --- /dev/null +++ b/nhrp/nhrp_packet.c @@ -0,0 +1,1331 @@ +/* nhrp_packet.c - NHRP packet marshalling and tranceiving + * + * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include <malloc.h> +#include <string.h> +#include <stdio.h> +#include <ctype.h> +#include <netinet/in.h> + +#include "libev.h" +#include "nhrp_common.h" +#include "nhrp_packet.h" +#include "nhrp_peer.h" +#include "nhrp_interface.h" + +#define PACKET_RETRIES 6 +#define PACKET_RETRY_INTERVAL 5.0 + +#define RATE_LIMIT_HASH_SIZE 256 +#define RATE_LIMIT_MAX_TOKENS 4 +#define RATE_LIMIT_SEND_INTERVAL 5.0 +#define RATE_LIMIT_SILENCE 360.0 +#define RATE_LIMIT_PURGE_INTERVAL 600.0 + +#define MAX_PDU_SIZE 1500 + +struct nhrp_rate_limit { + struct hlist_node hash_entry; + struct nhrp_address src; + struct nhrp_address dst; + ev_tstamp rate_last; + int rate_tokens; +}; + +static uint32_t request_id = 0; +static struct list_head pending_requests = LIST_INITIALIZER(pending_requests); +static struct hlist_head rate_limit_hash[RATE_LIMIT_HASH_SIZE]; +static ev_timer rate_limit_timer; +static int num_rate_limit_entries = 0; + +static void nhrp_packet_xmit_timeout_cb(struct ev_timer *w, int revents); +static int unmarshall_packet_header(uint8_t **pdu, size_t *pdusize, + struct nhrp_packet *packet); + +static void nhrp_rate_limit_delete(struct nhrp_rate_limit *rl) +{ + hlist_del(&rl->hash_entry); + free(rl); + num_rate_limit_entries--; +} + +int nhrp_rate_limit_clear(struct nhrp_address *a, int pref) +{ + struct nhrp_rate_limit *rl; + struct hlist_node *n, *c; + int i, ret = 0; + + for (i = 0; i < RATE_LIMIT_HASH_SIZE; i++) { + hlist_for_each_entry_safe(rl, c, n, &rate_limit_hash[i], + hash_entry) { + if (a->type == AF_UNSPEC || + nhrp_address_prefix_cmp(a, &rl->src, pref) == 0 || + nhrp_address_prefix_cmp(a, &rl->dst, pref) == 0) { + nhrp_rate_limit_delete(rl); + ret++; + } + } + } + + if (num_rate_limit_entries == 0) + ev_timer_stop(&rate_limit_timer); + + return ret; +} + +static void prune_rate_limit_entries_cb(struct ev_timer *w, int revents) +{ + struct nhrp_rate_limit *rl; + struct hlist_node *c, *n; + int i; + + for (i = 0; i < RATE_LIMIT_HASH_SIZE; i++) { + hlist_for_each_entry_safe(rl, c, n, &rate_limit_hash[i], + hash_entry) { + + if (ev_now() > rl->rate_last + 2 * RATE_LIMIT_SILENCE) + nhrp_rate_limit_delete(rl); + } + } + + if (num_rate_limit_entries == 0) + ev_timer_stop(&rate_limit_timer); +} + +static struct nhrp_rate_limit *get_rate_limit(struct nhrp_address *src, + struct nhrp_address *dst) +{ + unsigned int key; + struct nhrp_rate_limit *e; + struct hlist_node *n; + + key = nhrp_address_hash(src) ^ nhrp_address_hash(dst); + key %= RATE_LIMIT_HASH_SIZE; + + hlist_for_each_entry(e, n, &rate_limit_hash[key], hash_entry) { + if (nhrp_address_cmp(&e->src, src) == 0 && + nhrp_address_cmp(&e->dst, dst) == 0) + return e; + } + + e = calloc(1, sizeof(struct nhrp_rate_limit)); + e->src = *src; + e->dst = *dst; + hlist_add_head(&e->hash_entry, &rate_limit_hash[key]); + + if (num_rate_limit_entries == 0) { + ev_timer_init(&rate_limit_timer, prune_rate_limit_entries_cb, + RATE_LIMIT_PURGE_INTERVAL, + RATE_LIMIT_PURGE_INTERVAL); + ev_timer_start(&rate_limit_timer); + } + + num_rate_limit_entries++; + + return e; +} + +static uint16_t nhrp_calculate_checksum(uint8_t *pdu, uint16_t len) +{ + uint16_t *pdu16 = (uint16_t *) pdu; + uint32_t csum = 0; + int i; + + for (i = 0; i < len / 2; i++) + csum += pdu16[i]; + if (len & 1) + csum += htons(pdu[len - 1]); + + while (csum & 0xffff0000) + csum = (csum & 0xffff) + (csum >> 16); + + return (~csum) & 0xffff; +} + +struct nhrp_buffer *nhrp_buffer_alloc(uint32_t size) +{ + struct nhrp_buffer *buf; + + buf = malloc(sizeof(struct nhrp_buffer) + size); + buf->length = size; + + return buf; +} + +struct nhrp_buffer *nhrp_buffer_copy(struct nhrp_buffer *buffer) +{ + struct nhrp_buffer *copy; + + copy = nhrp_buffer_alloc(buffer->length); + memcpy(copy->data, buffer->data, buffer->length); + return copy; +} + +int nhrp_buffer_cmp(struct nhrp_buffer *a, struct nhrp_buffer *b) +{ + if (a->length > b->length) + return 1; + if (a->length < b->length) + return -1; + return memcmp(a->data, b->data, a->length); +} + +void nhrp_buffer_free(struct nhrp_buffer *buffer) +{ + free(buffer); +} + +struct nhrp_cie *nhrp_cie_alloc(void) +{ + return calloc(1, sizeof(struct nhrp_cie)); +} + +void nhrp_cie_free(struct nhrp_cie *cie) +{ + free(cie); +} + +void nhrp_cie_reset(struct nhrp_cie *cie) +{ + memset(&cie->cie_list_entry, 0, sizeof(cie->cie_list_entry)); +} + +void nhrp_payload_free(struct nhrp_payload *payload) +{ + struct nhrp_cie *cie, *n; + + switch (payload->payload_type) { + case NHRP_PAYLOAD_TYPE_RAW: + nhrp_buffer_free(payload->u.raw); + break; + case NHRP_PAYLOAD_TYPE_CIE_LIST: + list_for_each_entry_safe(cie, n, &payload->u.cie_list, cie_list_entry) { + list_del(&cie->cie_list_entry); + nhrp_cie_free(cie); + } + break; + } + payload->payload_type = NHRP_PAYLOAD_TYPE_NONE; +} + +void nhrp_payload_set_type(struct nhrp_payload *payload, int type) +{ + if (payload->payload_type == type) + return; + + nhrp_payload_free(payload); + payload->payload_type = type; + switch (type) { + case NHRP_PAYLOAD_TYPE_CIE_LIST: + list_init(&payload->u.cie_list); + break; + default: + payload->u.raw = NULL; + break; + } +} + +void nhrp_payload_set_raw(struct nhrp_payload *payload, struct nhrp_buffer *raw) +{ + nhrp_payload_set_type(payload, NHRP_PAYLOAD_TYPE_RAW); + payload->u.raw = raw; +} + +void nhrp_payload_add_cie(struct nhrp_payload *payload, struct nhrp_cie *cie) +{ + if (payload->payload_type != NHRP_PAYLOAD_TYPE_CIE_LIST) { + nhrp_cie_free(cie); + nhrp_info("Trying to add CIE payload to non-CIE payload %d\n", + payload->payload_type); + return; + } + + list_add_tail(&cie->cie_list_entry, &payload->u.cie_list); +} + +struct nhrp_cie *nhrp_payload_get_cie(struct nhrp_payload *payload, int index) +{ + struct nhrp_cie *cie; + + if (payload->payload_type != NHRP_PAYLOAD_TYPE_CIE_LIST) + return NULL; + + list_for_each_entry(cie, &payload->u.cie_list, cie_list_entry) { + index--; + if (index == 0) + return cie; + } + + return NULL; +} + +struct nhrp_packet *nhrp_packet_alloc(void) +{ + struct nhrp_packet *packet; + packet = calloc(1, sizeof(struct nhrp_packet)); + packet->ref = 1; + packet->hdr.hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT; + list_init(&packet->request_list_entry); + ev_timer_init(&packet->timeout, nhrp_packet_xmit_timeout_cb, + PACKET_RETRY_INTERVAL, PACKET_RETRY_INTERVAL); + return packet; +} + +struct nhrp_packet *nhrp_packet_get(struct nhrp_packet *packet) +{ + packet->ref++; + return packet; +} + +struct nhrp_payload *nhrp_packet_payload(struct nhrp_packet *packet, int payload_type) +{ + return nhrp_packet_extension(packet, NHRP_EXTENSION_PAYLOAD, payload_type); +} + +struct nhrp_payload *nhrp_packet_extension(struct nhrp_packet *packet, + uint32_t extension, int payload_type) +{ + struct nhrp_payload *p; + + p = packet->extension_by_type[extension & 0x7fff]; + if (p != NULL) { + if (payload_type == NHRP_PAYLOAD_TYPE_ANY || + payload_type == p->payload_type) + return p; + if (extension & NHRP_EXTENSION_FLAG_NOCREATE) + return NULL; + nhrp_payload_set_type(p, payload_type); + return p; + } + + if (extension & NHRP_EXTENSION_FLAG_NOCREATE) + return NULL; + + p = &packet->extension_by_order[packet->num_extensions++]; + p->extension_type = extension & 0xffff; + packet->extension_by_type[extension & 0x7fff] = p; + if (payload_type != NHRP_PAYLOAD_TYPE_ANY) + nhrp_payload_set_type(p, payload_type); + + return p; +} + +static void nhrp_packet_release(struct nhrp_packet *packet) +{ + int i; + + if (packet->dst_peer != NULL) + nhrp_peer_put(packet->dst_peer); + for (i = 0; i < packet->num_extensions; i++) + nhrp_payload_free(&packet->extension_by_order[i]); + free(packet); +} + +void nhrp_packet_put(struct nhrp_packet *packet) +{ + NHRP_BUG_ON(packet->ref == 0); + + packet->ref--; + if (packet->ref == 0) + nhrp_packet_release(packet); +} + +int nhrp_packet_reroute(struct nhrp_packet *packet, struct nhrp_peer *dst_peer) +{ + packet->dst_iface = packet->src_iface; + if (packet->dst_peer != NULL) + nhrp_peer_put(packet->dst_peer); + packet->dst_peer = nhrp_peer_get(dst_peer); + return nhrp_packet_route(packet); +} + +static void nhrp_packet_dequeue(struct nhrp_packet *packet) +{ + ev_timer_stop(&packet->timeout); + if (list_hashed(&packet->request_list_entry)) + list_del(&packet->request_list_entry); + nhrp_packet_put(packet); +} + +static int nhrp_do_handle_error_indication(struct nhrp_packet *error_pkt, + struct nhrp_packet *orig_pkt) +{ + struct nhrp_packet *req; + + list_for_each_entry(req, &pending_requests, request_list_entry) { + if (orig_pkt->hdr.u.request_id != req->hdr.u.request_id) + continue; + + if (nhrp_address_cmp(&orig_pkt->src_nbma_address, + &req->src_nbma_address)) + continue; + if (nhrp_address_cmp(&orig_pkt->src_protocol_address, + &req->src_protocol_address)) + continue; + + if (req->handler != NULL) + req->handler(req->handler_ctx, error_pkt); + nhrp_packet_dequeue(req); + + return TRUE; + } + + return FALSE; +} + +static int nhrp_handle_error_indication(struct nhrp_packet *error_packet) +{ + struct nhrp_packet *packet; + struct nhrp_payload *payload; + uint8_t *pdu; + size_t pduleft; + int r; + + packet = nhrp_packet_alloc(); + if (packet == NULL) + return FALSE; + + payload = nhrp_packet_payload(error_packet, NHRP_PAYLOAD_TYPE_RAW); + pdu = payload->u.raw->data; + pduleft = payload->u.raw->length; + + if (!unmarshall_packet_header(&pdu, &pduleft, packet)) { + nhrp_packet_put(packet); + return FALSE; + } + + r = nhrp_do_handle_error_indication(error_packet, packet); + nhrp_packet_put(packet); + + return r; +} + +#define NHRP_TYPE_REQUEST 0 +#define NHRP_TYPE_REPLY 1 +#define NHRP_TYPE_INDICATION 2 + +static struct { + int type; + uint16_t payload_type; + int (*handler)(struct nhrp_packet *packet); +} packet_types[] = { + [NHRP_PACKET_RESOLUTION_REQUEST] = { + .type = NHRP_TYPE_REQUEST, + .payload_type = NHRP_PAYLOAD_TYPE_CIE_LIST, + }, + [NHRP_PACKET_RESOLUTION_REPLY] = { + .type = NHRP_TYPE_REPLY, + .payload_type = NHRP_PAYLOAD_TYPE_CIE_LIST, + }, + [NHRP_PACKET_REGISTRATION_REQUEST] = { + .type = NHRP_TYPE_REQUEST, + .payload_type = NHRP_PAYLOAD_TYPE_CIE_LIST, + }, + [NHRP_PACKET_REGISTRATION_REPLY] = { + .type = NHRP_TYPE_REPLY, + .payload_type = NHRP_PAYLOAD_TYPE_CIE_LIST, + }, + [NHRP_PACKET_PURGE_REQUEST] = { + .type = NHRP_TYPE_REQUEST, + .payload_type = NHRP_PAYLOAD_TYPE_CIE_LIST, + }, + [NHRP_PACKET_PURGE_REPLY] = { + .type = NHRP_TYPE_REPLY, + .payload_type = NHRP_PAYLOAD_TYPE_CIE_LIST, + }, + [NHRP_PACKET_ERROR_INDICATION] = { + .type = NHRP_TYPE_INDICATION, + .payload_type = NHRP_PAYLOAD_TYPE_RAW, + .handler = nhrp_handle_error_indication, + }, + [NHRP_PACKET_TRAFFIC_INDICATION] = { + .type = NHRP_TYPE_INDICATION, + .payload_type = NHRP_PAYLOAD_TYPE_RAW, + } +}; +static int extension_types[] = { + [NHRP_EXTENSION_RESPONDER_ADDRESS] = NHRP_PAYLOAD_TYPE_CIE_LIST, + [NHRP_EXTENSION_FORWARD_TRANSIT_NHS] = NHRP_PAYLOAD_TYPE_CIE_LIST, + [NHRP_EXTENSION_REVERSE_TRANSIT_NHS] = NHRP_PAYLOAD_TYPE_CIE_LIST, + [NHRP_EXTENSION_NAT_ADDRESS] = NHRP_PAYLOAD_TYPE_CIE_LIST +}; + +static int unmarshall_binary(uint8_t **pdu, size_t *pduleft, size_t size, void *raw) +{ + if (*pduleft < size) + return FALSE; + + memcpy(raw, *pdu, size); + *pdu += size; + *pduleft -= size; + return TRUE; +} + +static inline int unmarshall_protocol_address(uint8_t **pdu, size_t *pduleft, struct nhrp_address *pa) +{ + if (*pduleft < pa->addr_len) + return FALSE; + + if (pa->addr_len) { + if (!nhrp_address_set(pa, pa->type, pa->addr_len, *pdu)) + return FALSE; + } else { + nhrp_address_set_type(pa, PF_UNSPEC); + } + + *pdu += pa->addr_len; + *pduleft -= pa->addr_len; + return TRUE; +} + +static inline int unmarshall_nbma_address(uint8_t **pdu, size_t *pduleft, struct nhrp_address *na) +{ + if (*pduleft < na->addr_len + na->subaddr_len) + return FALSE; + + if (na->addr_len || na->subaddr_len) { + if (!nhrp_address_set_full(na, na->type, + na->addr_len, *pdu, + na->subaddr_len, *pdu + na->addr_len)) + return FALSE; + } else { + nhrp_address_set_type(na, PF_UNSPEC); + } + + *pdu += na->addr_len + na->subaddr_len; + *pduleft -= na->addr_len + na->subaddr_len; + return TRUE; +} + +static int unmarshall_cie(uint8_t **pdu, size_t *pduleft, struct nhrp_packet *p, struct nhrp_cie *cie) +{ + if (!unmarshall_binary(pdu, pduleft, sizeof(struct nhrp_cie_header), &cie->hdr)) + return FALSE; + + cie->nbma_address.type = nhrp_pf_from_afnum(p->hdr.afnum); + cie->nbma_address.addr_len = cie->hdr.nbma_address_len; + cie->nbma_address.subaddr_len = cie->hdr.nbma_subaddress_len; + cie->protocol_address.type = nhrp_pf_from_protocol(p->hdr.protocol_type); + cie->protocol_address.addr_len = cie->hdr.protocol_address_len; + + if (!unmarshall_nbma_address(pdu, pduleft, &cie->nbma_address)) + return FALSE; + return unmarshall_protocol_address(pdu, pduleft, &cie->protocol_address); +} + +static int unmarshall_payload(uint8_t **pdu, size_t *pduleft, + struct nhrp_packet *packet, + int type, size_t size, + struct nhrp_payload *p) +{ + struct nhrp_cie *cie; + size_t cieleft; + + if (*pduleft < size) + return FALSE; + + nhrp_payload_set_type(p, type); + switch (p->payload_type) { + case NHRP_PAYLOAD_TYPE_NONE: + *pdu += size; + *pduleft -= size; + return TRUE; + case NHRP_PAYLOAD_TYPE_RAW: + p->u.raw = nhrp_buffer_alloc(size); + return unmarshall_binary(pdu, pduleft, size, p->u.raw->data); + case NHRP_PAYLOAD_TYPE_CIE_LIST: + cieleft = size; + while (cieleft) { + cie = nhrp_cie_alloc(); + list_add_tail(&cie->cie_list_entry, &p->u.cie_list); + if (!unmarshall_cie(pdu, &cieleft, packet, cie)) + return FALSE; + } + *pduleft -= size; + return TRUE; + default: + return FALSE; + } +} + +static int unmarshall_packet_header(uint8_t **pdu, size_t *pduleft, struct nhrp_packet *packet) +{ + struct nhrp_packet_header *phdr = (struct nhrp_packet_header *) *pdu; + + if (!unmarshall_binary(pdu, pduleft, sizeof(packet->hdr), &packet->hdr)) + return FALSE; + + if (packet->hdr.type >= ARRAY_SIZE(packet_types)) + return FALSE; + + packet->src_nbma_address.type = nhrp_pf_from_afnum(packet->hdr.afnum); + packet->src_nbma_address.addr_len = phdr->src_nbma_address_len; + packet->src_nbma_address.subaddr_len = phdr->src_nbma_subaddress_len; + packet->src_protocol_address.type = nhrp_pf_from_protocol(packet->hdr.protocol_type); + packet->src_protocol_address.addr_len = phdr->src_protocol_address_len; + packet->dst_protocol_address.type = nhrp_pf_from_protocol(packet->hdr.protocol_type); + packet->dst_protocol_address.addr_len = phdr->dst_protocol_address_len; + + if (!unmarshall_nbma_address(pdu, pduleft, &packet->src_nbma_address)) + return FALSE; + if (!unmarshall_protocol_address(pdu, pduleft, &packet->src_protocol_address)) + return FALSE; + return unmarshall_protocol_address(pdu, pduleft, &packet->dst_protocol_address); +} + +static int unmarshall_packet(uint8_t *pdu, size_t pdusize, struct nhrp_packet *packet) +{ + size_t pduleft = pdusize; + uint8_t *pos = pdu; + int size, extension_offset; + + if (!unmarshall_packet_header(&pos, &pduleft, packet)) + return FALSE; + + extension_offset = ntohs(packet->hdr.extension_offset); + if (extension_offset == 0) { + /* No extensions; rest of data is payload */ + size = pduleft; + } else { + /* Extensions present; exclude those from payload */ + size = extension_offset - (pos - pdu); + if (size < 0 || size > pduleft) { + nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ERROR, pos - pdu); + return FALSE; + } + } + + if (!unmarshall_payload(&pos, &pduleft, packet, + packet_types[packet->hdr.type].payload_type, + size, nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_ANY))) { + nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ERROR, pos - pdu); + return FALSE; + } + + if (extension_offset == 0) + return TRUE; + + pos = &pdu[extension_offset]; + pduleft = pdusize - extension_offset; + do { + struct nhrp_extension_header eh; + int extension_type, payload_type; + + if (!unmarshall_binary(&pos, &pduleft, sizeof(eh), &eh)) { + nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ERROR, pos - pdu); + return FALSE; + } + + extension_type = ntohs(eh.type) & ~NHRP_EXTENSION_FLAG_COMPULSORY; + if (extension_type == NHRP_EXTENSION_END) + break; + + payload_type = NHRP_PAYLOAD_TYPE_NONE; + if (extension_type < ARRAY_SIZE(extension_types)) + payload_type = extension_types[extension_type]; + if (payload_type == NHRP_PAYLOAD_TYPE_NONE) + payload_type = NHRP_PAYLOAD_TYPE_RAW; + if (payload_type == NHRP_PAYLOAD_TYPE_RAW && + ntohs(eh.length) == 0) + payload_type = NHRP_PAYLOAD_TYPE_NONE; + + if (!unmarshall_payload(&pos, &pduleft, packet, + payload_type, ntohs(eh.length), + nhrp_packet_extension(packet, ntohs(eh.type), NHRP_PAYLOAD_TYPE_ANY))) { + nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ERROR, pos - pdu); + return FALSE; + } + } while (1); + + return TRUE; +} + +static int nhrp_packet_forward(struct nhrp_packet *packet) +{ + char tmp[64], tmp2[64], tmp3[64]; + struct nhrp_payload *p = NULL; + + nhrp_info("Forwarding packet from nbma src %s, proto src %s to proto dst %s, hop count %d", + nhrp_address_format(&packet->src_nbma_address, + sizeof(tmp), tmp), + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp2), tmp2), + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp3), tmp3), + packet->hdr.hop_count); + + if (packet->hdr.hop_count == 0) { + nhrp_packet_send_error(packet, NHRP_ERROR_HOP_COUNT_EXCEEDED, 0); + return TRUE; + } + packet->hdr.hop_count--; + + if (!nhrp_packet_reroute(packet, NULL)) { + nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ADDRESS_UNREACHABLE, 0); + return FALSE; + } + + switch (packet_types[packet->hdr.type].type) { + case NHRP_TYPE_REQUEST: + case NHRP_TYPE_INDICATION: + p = nhrp_packet_extension(packet, + NHRP_EXTENSION_FORWARD_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_CIE_LIST); + break; + case NHRP_TYPE_REPLY: + p = nhrp_packet_extension(packet, + NHRP_EXTENSION_REVERSE_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_CIE_LIST); + break; + } + if (p != NULL) { + struct nhrp_cie *cie; + + if (nhrp_address_match_cie_list(&packet->dst_peer->my_nbma_address, + &packet->dst_iface->protocol_address, + &p->u.cie_list)) { + nhrp_packet_send_error(packet, NHRP_ERROR_LOOP_DETECTED, 0); + return FALSE; + } + + cie = nhrp_cie_alloc(); + if (cie != NULL) { + cie->hdr = (struct nhrp_cie_header) { + .code = NHRP_CODE_SUCCESS, + .holding_time = htons(packet->dst_iface->holding_time), + }; + cie->nbma_address = packet->dst_peer->my_nbma_address; + cie->protocol_address = packet->dst_iface->protocol_address; + nhrp_payload_add_cie(p, cie); + } + } + + return nhrp_packet_route_and_send(packet); +} + +static int nhrp_packet_receive_local(struct nhrp_packet *packet) +{ + struct nhrp_packet *req; + char tmp[64], tmp2[64], tmp3[64]; + + if (packet_types[packet->hdr.type].type == NHRP_TYPE_REPLY) { + list_for_each_entry(req, &pending_requests, request_list_entry) { + if (packet->hdr.u.request_id != req->hdr.u.request_id) + continue; + if (nhrp_address_cmp(&packet->src_nbma_address, + &req->src_nbma_address)) + continue; + if (nhrp_address_cmp(&packet->src_protocol_address, + &req->src_protocol_address)) + continue; + + if (req->handler != NULL) + req->handler(req->handler_ctx, packet); + nhrp_packet_dequeue(req); + + return TRUE; + } + + /* Reply to unsent request? */ + nhrp_info("Packet type %d from nbma src %s, proto src %s, " + "proto dst %s dropped: no matching request", + packet->hdr.type, + nhrp_address_format(&packet->src_nbma_address, + sizeof(tmp), tmp), + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp2), tmp2), + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp3), tmp3)); + + nhrp_packet_send_error( + packet, NHRP_ERROR_INVALID_RESOLUTION_REPLY, 0); + return TRUE; + } + + if (packet_types[packet->hdr.type].handler == NULL) { + nhrp_info("Packet type %d from nbma src %s, proto src %s, " + "proto dst %s not supported", + packet->hdr.type, + nhrp_address_format(&packet->src_nbma_address, + sizeof(tmp), tmp), + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp2), tmp2), + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp3), tmp3)); + return FALSE; + } + + if (packet->dst_peer->next_hop_address.type != PF_UNSPEC) { + /* Broadcast destinations gets rewritten as if destinied to + * our local address */ + packet->dst_protocol_address = + packet->dst_peer->next_hop_address; + } + + return packet_types[packet->hdr.type].handler(packet); +} + +int nhrp_packet_receive(uint8_t *pdu, size_t pdulen, + struct nhrp_interface *iface, + struct nhrp_address *from) +{ + char tmp[64]; + struct nhrp_packet *packet; + struct nhrp_address *dest; + struct nhrp_peer *peer; + int ret = FALSE; + + if (nhrp_calculate_checksum(pdu, pdulen) != 0) { + nhrp_error("Bad checksum in packet from %s", + nhrp_address_format(from, sizeof(tmp), tmp)); + return FALSE; + } + + packet = nhrp_packet_alloc(); + if (packet == NULL) + return FALSE; + + if (!unmarshall_packet(pdu, pdulen, packet)) { + nhrp_error("Failed to unmarshall packet from %s", + nhrp_address_format(from, sizeof(tmp), tmp)); + goto error; + } + + packet->req_pdu = pdu; + packet->req_pdulen = pdulen; + + if (packet_types[packet->hdr.type].type == NHRP_TYPE_REPLY) + dest = &packet->src_protocol_address; + else + dest = &packet->dst_protocol_address; + + peer = nhrp_peer_route(iface, dest, 0, BIT(NHRP_PEER_TYPE_LOCAL_ADDR)); + packet->src_linklayer_address = *from; + packet->src_iface = iface; + packet->dst_peer = nhrp_peer_get(peer); + + /* RFC2332 5.3.4 - Authentication is always done pairwise on an NHRP + * hop-by-hop basis; i.e. regenerated at each hop. */ + if (packet->src_iface->auth_token && + (packet->hdr.type != NHRP_PACKET_ERROR_INDICATION || + packet->hdr.u.error.code != NHRP_ERROR_AUTHENTICATION_FAILURE)) { + struct nhrp_payload *p; + p = nhrp_packet_extension(packet, + NHRP_EXTENSION_AUTHENTICATION | + NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_RAW); + if (p == NULL || + nhrp_buffer_cmp(packet->src_iface->auth_token, p->u.raw) != 0) { + nhrp_error("Dropping packet from %s with bad authentication", + nhrp_address_format(from, sizeof(tmp), tmp)); + nhrp_packet_send_error(packet, NHRP_ERROR_AUTHENTICATION_FAILURE, 0); + goto error; + } + } + + if (peer != NULL && + peer->type == NHRP_PEER_TYPE_LOCAL_ADDR) + ret = nhrp_packet_receive_local(packet); + else + ret = nhrp_packet_forward(packet); + + packet->req_pdu = NULL; + packet->req_pdulen = 0; + +error: + nhrp_packet_put(packet); + return ret; +} + +static int marshall_binary(uint8_t **pdu, size_t *pduleft, size_t size, void *raw) +{ + if (*pduleft < size) + return FALSE; + + memcpy(*pdu, raw, size); + *pdu += size; + *pduleft -= size; + + return TRUE; +} + +static inline int marshall_protocol_address(uint8_t **pdu, size_t *pduleft, struct nhrp_address *pa) +{ + if (pa->subaddr_len != 0) + return FALSE; + return marshall_binary(pdu, pduleft, pa->addr_len, pa->addr); +} + +static inline int marshall_nbma_address(uint8_t **pdu, size_t *pduleft, struct nhrp_address *na) +{ + return marshall_binary(pdu, pduleft, na->addr_len + na->subaddr_len, na->addr); +} + +static int marshall_cie(uint8_t **pdu, size_t *pduleft, struct nhrp_cie *cie) +{ + cie->hdr.nbma_address_len = cie->nbma_address.addr_len; + cie->hdr.nbma_subaddress_len = cie->nbma_address.subaddr_len; + cie->hdr.protocol_address_len = cie->protocol_address.addr_len; + + if (!marshall_binary(pdu, pduleft, sizeof(struct nhrp_cie_header), &cie->hdr)) + return FALSE; + if (!marshall_nbma_address(pdu, pduleft, &cie->nbma_address)) + return FALSE; + return marshall_protocol_address(pdu, pduleft, &cie->protocol_address); +} + +static int marshall_payload(uint8_t **pdu, size_t *pduleft, struct nhrp_payload *p) +{ + struct nhrp_cie *cie; + + switch (p->payload_type) { + case NHRP_PAYLOAD_TYPE_NONE: + return TRUE; + case NHRP_PAYLOAD_TYPE_RAW: + if (p->u.raw->length == 0) + return TRUE; + return marshall_binary(pdu, pduleft, p->u.raw->length, p->u.raw->data); + case NHRP_PAYLOAD_TYPE_CIE_LIST: + list_for_each_entry(cie, &p->u.cie_list, cie_list_entry) { + if (!marshall_cie(pdu, pduleft, cie)) + return FALSE; + } + return TRUE; + default: + return FALSE; + } +} + +static int marshall_packet_header(uint8_t **pdu, size_t *pduleft, struct nhrp_packet *packet) +{ + if (!marshall_binary(pdu, pduleft, sizeof(packet->hdr), &packet->hdr)) + return FALSE; + if (!marshall_nbma_address(pdu, pduleft, &packet->src_nbma_address)) + return FALSE; + if (!marshall_protocol_address(pdu, pduleft, &packet->src_protocol_address)) + return FALSE; + return marshall_protocol_address(pdu, pduleft, &packet->dst_protocol_address); +} + +static int marshall_packet(uint8_t *pdu, size_t pduleft, struct nhrp_packet *packet) +{ + uint8_t *pos = pdu; + struct nhrp_packet_header *phdr = (struct nhrp_packet_header *) pdu; + struct nhrp_extension_header neh; + int i, size; + + if (!marshall_packet_header(&pos, &pduleft, packet)) + return -1; + if (!marshall_payload(&pos, &pduleft, nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_ANY))) + return -2; + + phdr->extension_offset = htons((int)(pos - pdu)); + for (i = 1; i < packet->num_extensions; i++) { + struct nhrp_extension_header *eh = (struct nhrp_extension_header *) pos; + + if (packet->extension_by_order[i].payload_type == NHRP_PAYLOAD_TYPE_NONE) + continue; + + neh.type = htons(packet->extension_by_order[i].extension_type); + neh.length = 0; + + if (!marshall_binary(&pos, &pduleft, sizeof(neh), &neh)) + return -3; + if (!marshall_payload(&pos, &pduleft, &packet->extension_by_order[i])) + return -4; + eh->length = htons((pos - (uint8_t *) eh) - sizeof(neh)); + } + neh.type = htons(NHRP_EXTENSION_END | NHRP_EXTENSION_FLAG_COMPULSORY); + neh.length = 0; + if (!marshall_binary(&pos, &pduleft, sizeof(neh), &neh)) + return -5; + + /* Cisco is seriously brain damaged. It needs some extra garbage + * at the end of error indication or it'll barf out spurious errors. */ + if (packet->hdr.type == NHRP_PACKET_ERROR_INDICATION && + pduleft >= 0x10) { + memset(pos, 0, 0x10); + pos += 0x10; + pduleft -= 0x10; + } + + size = (int)(pos - pdu); + phdr->packet_size = htons(size); + phdr->checksum = 0; + phdr->src_nbma_address_len = packet->src_nbma_address.addr_len; + phdr->src_nbma_subaddress_len = packet->src_nbma_address.subaddr_len; + phdr->src_protocol_address_len = packet->src_protocol_address.addr_len; + phdr->dst_protocol_address_len = packet->dst_protocol_address.addr_len; + phdr->checksum = nhrp_calculate_checksum(pdu, size); + + return size; +} + +int nhrp_packet_route(struct nhrp_packet *packet) +{ + struct nhrp_address proto_nexthop, *src, *dst; + struct list_head *cielist = NULL; + struct nhrp_payload *payload; + struct nhrp_peer *peer; + char tmp[64]; + int r; + + if (packet->dst_iface == NULL) { + nhrp_error("nhrp_packet_route called without destination interface"); + return FALSE; + } + + if (packet_types[packet->hdr.type].type == NHRP_TYPE_REPLY) { + dst = &packet->src_protocol_address; + src = &packet->dst_protocol_address; + r = NHRP_EXTENSION_REVERSE_TRANSIT_NHS; + } else { + dst = &packet->dst_protocol_address; + src = &packet->src_protocol_address; + r = NHRP_EXTENSION_FORWARD_TRANSIT_NHS; + } + payload = nhrp_packet_extension(packet, + r | NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_CIE_LIST); + if (payload != NULL) + cielist = &payload->u.cie_list; + + if (packet->dst_peer != NULL) { + proto_nexthop = packet->dst_peer->next_hop_address; + } else { + proto_nexthop = *dst; + do { + peer = nhrp_peer_route_full( + packet->dst_iface, &proto_nexthop, 0, + NHRP_PEER_TYPEMASK_ROUTE_VIA_NHS, src, cielist); + if (peer == NULL || peer->type == NHRP_PEER_TYPE_NEGATIVE) { + nhrp_error("No peer entry for protocol address %s", + nhrp_address_format(&proto_nexthop, + sizeof(tmp), tmp)); + return FALSE; + } + if (peer->type != NHRP_PEER_TYPE_LOCAL_ROUTE) + break; + if (peer->next_hop_address.type == AF_UNSPEC) + break; + proto_nexthop = peer->next_hop_address; + } while (1); + + packet->dst_peer = nhrp_peer_get(peer); + } + + return TRUE; +} + +int nhrp_packet_marshall_and_send(struct nhrp_packet *packet) +{ + uint8_t pdu[MAX_PDU_SIZE]; + char tmp[4][64]; + int size; + + nhrp_debug("Sending packet %d, from: %s (nbma %s), to: %s (nbma %s)", + packet->hdr.type, + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp[0]), tmp[0]), + nhrp_address_format(&packet->src_nbma_address, + sizeof(tmp[1]), tmp[1]), + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp[2]), tmp[2]), + nhrp_address_format(&packet->dst_peer->next_hop_address, + sizeof(tmp[3]), tmp[3])); + + size = marshall_packet(pdu, sizeof(pdu), packet); + if (size < 0) { + nhrp_error("Packet marshalling failed (r=%d)", size); + return FALSE; + } + + if (!kernel_send(pdu, size, packet->dst_iface, + &packet->dst_peer->next_hop_address)) + return FALSE; + + return TRUE; +} + +int nhrp_packet_route_and_send(struct nhrp_packet *packet) +{ + struct nhrp_payload *payload; + + if (packet->dst_peer == NULL || packet->dst_iface == NULL) { + if (!nhrp_packet_route(packet)) { + nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ADDRESS_UNREACHABLE, 0); + return TRUE; + } + } + + if (packet->src_nbma_address.addr_len == 0) + packet->src_nbma_address = packet->dst_peer->my_nbma_address; + if (packet->src_protocol_address.addr_len == 0) + packet->src_protocol_address = packet->dst_iface->protocol_address; + if (packet->hdr.afnum == AFNUM_RESERVED) + packet->hdr.afnum = packet->dst_peer->afnum; + if (packet->hdr.protocol_type == 0) + packet->hdr.protocol_type = packet->dst_peer->protocol_type; + + /* RFC2332 5.3.1 */ + payload = nhrp_packet_extension( + packet, NHRP_EXTENSION_RESPONDER_ADDRESS | + NHRP_EXTENSION_FLAG_COMPULSORY | NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_CIE_LIST); + if (packet_types[packet->hdr.type].type == NHRP_TYPE_REPLY && + (payload != NULL && list_empty(&payload->u.cie_list))) { + struct nhrp_cie *cie; + + cie = nhrp_cie_alloc(); + if (cie == NULL) + return FALSE; + + cie->hdr.holding_time = htons(packet->dst_iface->holding_time); + cie->nbma_address = packet->dst_peer->my_nbma_address; + cie->protocol_address = packet->dst_iface->protocol_address; + nhrp_payload_set_type(payload, NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_payload_add_cie(payload, cie); + } + + /* RFC2332 5.3.4 - Authentication is always done pairwise on an NHRP + * hop-by-hop basis; i.e. regenerated at each hop. */ + payload = nhrp_packet_extension(packet, + NHRP_EXTENSION_AUTHENTICATION | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_RAW); + nhrp_payload_free(payload); + if (packet->dst_iface->auth_token != NULL) + nhrp_payload_set_raw(payload, + nhrp_buffer_copy(packet->dst_iface->auth_token)); + + if (packet->dst_peer->type == NHRP_PEER_TYPE_LOCAL_ADDR) { + packet->src_iface = packet->dst_peer->interface; + return nhrp_packet_receive_local(packet); + } + + if (packet->dst_peer->flags & (NHRP_PEER_FLAG_UP | + NHRP_PEER_FLAG_LOWER_UP)) + return nhrp_packet_marshall_and_send(packet); + + if (packet->dst_peer->queued_packet != NULL) + nhrp_packet_put(packet->dst_peer->queued_packet); + packet->dst_peer->queued_packet = nhrp_packet_get(packet); + + return TRUE; +} + +int nhrp_packet_send(struct nhrp_packet *packet) +{ + struct nhrp_payload *payload; + struct nhrp_cie *cie; + + if (packet->dst_iface == NULL) { + if (!nhrp_packet_route(packet)) { + nhrp_packet_send_error(packet, NHRP_ERROR_PROTOCOL_ADDRESS_UNREACHABLE, 0); + return TRUE; + } + } + + /* Cisco NAT extension CIE */ + if (packet_types[packet->hdr.type].type != NHRP_TYPE_INDICATION && + (packet->hdr.flags & NHRP_FLAG_REGISTRATION_NAT)) { + payload = nhrp_packet_extension(packet, NHRP_EXTENSION_NAT_ADDRESS, + NHRP_PAYLOAD_TYPE_CIE_LIST); + + if (packet->dst_iface->nat_cie.nbma_address.addr_len && + payload != NULL && list_empty(&payload->u.cie_list)) { + cie = nhrp_cie_alloc(); + if (cie != NULL) { + *cie = packet->dst_iface->nat_cie; + nhrp_cie_reset(cie); + nhrp_payload_add_cie(payload, cie); + } + } + } + + return nhrp_packet_route_and_send(packet); +} + +static void nhrp_packet_xmit_timeout_cb(struct ev_timer *w, int revents) +{ + struct nhrp_packet *packet = + container_of(w, struct nhrp_packet, timeout); + + list_del(&packet->request_list_entry); + + if (packet->dst_peer != NULL && + ++packet->retry < PACKET_RETRIES) { + nhrp_packet_marshall_and_send(packet); + + list_add(&packet->request_list_entry, &pending_requests); + } else { + ev_timer_stop(&packet->timeout); + if (packet->dst_peer == NULL) + nhrp_error("nhrp_packet_xmit_timeout: no destination peer!"); + if (packet->handler != NULL) + packet->handler(packet->handler_ctx, NULL); + nhrp_packet_dequeue(packet); + } +} + +int nhrp_packet_send_request(struct nhrp_packet *pkt, + void (*handler)(void *ctx, struct nhrp_packet *packet), + void *ctx) +{ + struct nhrp_packet *packet; + + packet = nhrp_packet_get(pkt); + + packet->retry = 0; + if (packet->hdr.u.request_id == constant_htonl(0)) { + request_id++; + packet->hdr.u.request_id = htonl(request_id); + } + + packet->handler = handler; + packet->handler_ctx = ctx; + list_add(&packet->request_list_entry, &pending_requests); + ev_timer_again(&packet->timeout); + + return nhrp_packet_send(packet); +} + +int nhrp_packet_send_error(struct nhrp_packet *error_packet, + uint16_t indication_code, uint16_t offset) +{ + struct nhrp_packet *p; + struct nhrp_payload *pl; + int r; + + /* RFC2332 5.2.7 Never generate errors about errors */ + if (error_packet->hdr.type == NHRP_PACKET_ERROR_INDICATION) + return TRUE; + + p = nhrp_packet_alloc(); + p->hdr = error_packet->hdr; + p->hdr.type = NHRP_PACKET_ERROR_INDICATION; + p->hdr.hop_count = 0; + p->hdr.u.error.code = indication_code; + p->hdr.u.error.offset = htons(offset); + p->dst_iface = error_packet->src_iface; + + if (packet_types[error_packet->hdr.type].type == NHRP_TYPE_REPLY) + p->dst_protocol_address = error_packet->dst_protocol_address; + else + p->dst_protocol_address = error_packet->src_protocol_address; + + pl = nhrp_packet_payload(p, NHRP_PAYLOAD_TYPE_RAW); + pl->u.raw = nhrp_buffer_alloc(error_packet->req_pdulen); + memcpy(pl->u.raw->data, error_packet->req_pdu, error_packet->req_pdulen); + + /* Standard extensions */ + nhrp_packet_extension(p, + NHRP_EXTENSION_FORWARD_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + + if (p->dst_protocol_address.type == PF_UNSPEC) + r = nhrp_do_handle_error_indication(p, error_packet); + else + r = nhrp_packet_send(p); + + nhrp_packet_put(p); + + return r; +} + +int nhrp_packet_send_traffic(struct nhrp_interface *iface, + struct nhrp_address *nbma_src, + struct nhrp_address *protocol_src, + struct nhrp_address *protocol_dst, + int protocol_type, uint8_t *pdu, size_t pdulen) +{ + struct nhrp_rate_limit *rl; + struct nhrp_packet *p; + struct nhrp_payload *pl; + struct nhrp_peer *peer; + char tmp1[64], tmp2[64], tmp3[64], tmp4[64]; + int r; + + if (!(iface->flags & NHRP_INTERFACE_FLAG_REDIRECT)) + return FALSE; + + /* Are we serving the NBMA source */ + peer = nhrp_interface_find_peer(iface, nbma_src); + if (peer == NULL || peer->type != NHRP_PEER_TYPE_DYNAMIC) + return FALSE; + + rl = get_rate_limit(protocol_src, protocol_dst); + if (rl == NULL) + return FALSE; + + /* If silence period has elapsed, reset algorithm */ + if (ev_now() > rl->rate_last + RATE_LIMIT_SILENCE) + rl->rate_tokens = 0; + + /* Too many ignored redirects; just update time of last packet */ + if (rl->rate_tokens >= RATE_LIMIT_MAX_TOKENS) { + rl->rate_last = ev_now(); + return FALSE; + } + + /* Check for load limit; set rate_last to last sent redirect */ + if (rl->rate_tokens != 0 && + ev_now() < rl->rate_last + RATE_LIMIT_SEND_INTERVAL) + return FALSE; + + rl->rate_tokens++; + rl->rate_last = ev_now(); + + p = nhrp_packet_alloc(); + p->hdr = (struct nhrp_packet_header) { + .protocol_type = protocol_type, + .version = NHRP_VERSION_RFC2332, + .type = NHRP_PACKET_TRAFFIC_INDICATION, + .hop_count = 0, + }; + p->dst_protocol_address = *protocol_src; + + pl = nhrp_packet_payload(p, NHRP_PAYLOAD_TYPE_RAW); + pl->u.raw = nhrp_buffer_alloc(pdulen); + memcpy(pl->u.raw->data, pdu, pdulen); + + /* Standard extensions */ + nhrp_packet_extension(p, + NHRP_EXTENSION_FORWARD_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + + nhrp_info("Sending Traffic Indication about packet from %s to %s (to %s/%s)", + nhrp_address_format(protocol_src, sizeof(tmp1), tmp1), + nhrp_address_format(protocol_dst, sizeof(tmp2), tmp2), + nhrp_address_format(&peer->protocol_address, sizeof(tmp3), tmp3), + nhrp_address_format(&peer->next_hop_address, sizeof(tmp4), tmp4)); + + p->dst_iface = iface; + p->dst_peer = nhrp_peer_get(peer); + r = nhrp_packet_send(p); + nhrp_packet_put(p); + + return r; +} + +void nhrp_packet_hook_request(int request, + int (*handler)(struct nhrp_packet *packet)) +{ + NHRP_BUG_ON(request < 0 || request >= ARRAY_SIZE(packet_types)); + NHRP_BUG_ON(packet_types[request].handler != NULL); + + packet_types[request].handler = handler; +} diff --git a/nhrp/nhrp_packet.h b/nhrp/nhrp_packet.h new file mode 100644 index 0000000..3f435c8 --- /dev/null +++ b/nhrp/nhrp_packet.h @@ -0,0 +1,128 @@ +/* nhrp_packet.h - In-memory NHRP packet definitions + * + * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#ifndef NHRP_PACKET_H +#define NHRP_PACKET_H + +#include "libev.h" +#include "list.h" +#include "nhrp_protocol.h" +#include "nhrp_address.h" + +#define NHRP_MAX_EXTENSIONS 10 + +#define NHRP_PACKET_DEFAULT_HOP_COUNT 16 + +struct nhrp_interface; + +struct nhrp_buffer { + uint32_t length; + uint8_t data[NHRP_EMPTY_ARRAY]; +}; + +struct nhrp_cie { + struct list_head cie_list_entry; + struct nhrp_cie_header hdr; + struct nhrp_address nbma_address; + struct nhrp_address protocol_address; +}; + +#define NHRP_PAYLOAD_TYPE_ANY -1 +#define NHRP_PAYLOAD_TYPE_NONE 0 +#define NHRP_PAYLOAD_TYPE_RAW 1 +#define NHRP_PAYLOAD_TYPE_CIE_LIST 2 + +struct nhrp_payload { + uint16_t extension_type; + uint16_t payload_type; + union { + struct nhrp_buffer *raw; + struct list_head cie_list; + } u; +}; + +struct nhrp_packet { + int ref; + + struct nhrp_packet_header hdr; + struct nhrp_address src_nbma_address; + struct nhrp_address src_protocol_address; + struct nhrp_address dst_protocol_address; + + int num_extensions; + struct nhrp_payload extension_by_order[NHRP_MAX_EXTENSIONS]; + struct nhrp_payload * extension_by_type[NHRP_MAX_EXTENSIONS]; + + struct list_head request_list_entry; + struct ev_timer timeout; + void (*handler)(void *ctx, struct nhrp_packet *packet); + void * handler_ctx; + int retry; + + uint8_t * req_pdu; + size_t req_pdulen; + + struct nhrp_interface * src_iface; + struct nhrp_address src_linklayer_address; + struct nhrp_interface * dst_iface; + struct nhrp_peer * dst_peer; +}; + +#define NHRP_EXTENSION_FLAG_NOCREATE 0x00010000 + +int nhrp_rate_limit_clear(struct nhrp_address *addr, int prefix_len); + +struct nhrp_buffer *nhrp_buffer_alloc(uint32_t size); +struct nhrp_buffer *nhrp_buffer_copy(struct nhrp_buffer *buffer); +int nhrp_buffer_cmp(struct nhrp_buffer *a, struct nhrp_buffer *b); +void nhrp_buffer_free(struct nhrp_buffer *buffer); + +struct nhrp_cie *nhrp_cie_alloc(void); +void nhrp_cie_free(struct nhrp_cie *cie); +void nhrp_cie_reset(struct nhrp_cie *cie); + +void nhrp_payload_set_type(struct nhrp_payload *payload, int type); +void nhrp_payload_set_raw(struct nhrp_payload *payload, struct nhrp_buffer *buf); +void nhrp_payload_add_cie(struct nhrp_payload *payload, struct nhrp_cie *cie); +struct nhrp_cie *nhrp_payload_get_cie(struct nhrp_payload *payload, int index); +void nhrp_payload_free(struct nhrp_payload *payload); + +struct nhrp_packet *nhrp_packet_alloc(void); +struct nhrp_packet *nhrp_packet_get(struct nhrp_packet *packet); +void nhrp_packet_put(struct nhrp_packet *packet); + +struct nhrp_payload *nhrp_packet_payload(struct nhrp_packet *packet, int payload_type); +struct nhrp_payload *nhrp_packet_extension(struct nhrp_packet *packet, + uint32_t extension, int payload_type); +int nhrp_packet_receive(uint8_t *pdu, size_t pdulen, + struct nhrp_interface *iface, + struct nhrp_address *from); +int nhrp_packet_route(struct nhrp_packet *packet); +int nhrp_packet_reroute(struct nhrp_packet *packet, struct nhrp_peer *dst_peer); +int nhrp_packet_marshall_and_send(struct nhrp_packet *packet); +int nhrp_packet_route_and_send(struct nhrp_packet *packet); +int nhrp_packet_send(struct nhrp_packet *packet); +int nhrp_packet_send_request(struct nhrp_packet *packet, + void (*handler)(void *ctx, struct nhrp_packet *packet), + void *ctx); +int nhrp_packet_send_error(struct nhrp_packet *error_packet, + uint16_t indication_code, uint16_t offset); +int nhrp_packet_send_traffic(struct nhrp_interface *iface, + struct nhrp_address *nbma_src, + struct nhrp_address *protocol_src, + struct nhrp_address *protocol_dst, + int protocol_type, uint8_t *pdu, size_t pdulen); + +void nhrp_packet_hook_request(int request, + int (*handler)(struct nhrp_packet *packet)); + +#endif diff --git a/nhrp/nhrp_peer.c b/nhrp/nhrp_peer.c new file mode 100644 index 0000000..c53d4c4 --- /dev/null +++ b/nhrp/nhrp_peer.c @@ -0,0 +1,2106 @@ +/* nhrp_peer.c - NHRP peer cache implementation + * + * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <sys/wait.h> +#include <sys/time.h> +#include <netinet/in.h> +#include "nhrp_common.h" +#include "nhrp_peer.h" +#include "nhrp_interface.h" + +#define NHRP_PEER_FORMAT_LEN 128 + +#define NHRP_SCRIPT_TIMEOUT (2*60) +#define NHRP_NEGATIVE_CACHE_TIME (3*60) +#define NHRP_EXPIRY_TIME (5*60) + +#define NHRP_HOLDING_TIME_DIVISOR 3 /* See RFC-2332 5.2.3 */ + +#define NHRP_RETRY_REGISTER_TIME (30 + random()/(RAND_MAX/60)) +#define NHRP_RETRY_ERROR_TIME (60 + random()/(RAND_MAX/120)) + +#define NHRP_PEER_FLAG_PRUNE_PENDING 0x00010000 + +const char * const nhrp_peer_type[] = { + [NHRP_PEER_TYPE_INCOMPLETE] = "incomplete", + [NHRP_PEER_TYPE_NEGATIVE] = "negative", + [NHRP_PEER_TYPE_CACHED] = "cached", + [NHRP_PEER_TYPE_SHORTCUT_ROUTE] = "shortcut-route", + [NHRP_PEER_TYPE_DYNAMIC] = "dynamic", + [NHRP_PEER_TYPE_DYNAMIC_NHS] = "dynamic-nhs", + [NHRP_PEER_TYPE_STATIC] = "static", + [NHRP_PEER_TYPE_STATIC_DNS] = "dynamic-map", + [NHRP_PEER_TYPE_LOCAL_ROUTE] = "local-route", + [NHRP_PEER_TYPE_LOCAL_ADDR] = "local", +}; + +static int nhrp_peer_num_total = 0; +static struct list_head local_peer_list = LIST_INITIALIZER(local_peer_list); + +/* Peer entrys life, pending callbacks and their call order are listed + * here. + * + * Generally everything starts from nhrp_peer_insert() call which schedules + * (during startup) or directly invokes nhrp_peer_insert_cb(). + * + * INCOMPLETE: + * 1. nhrp_peer_insert_cb: send resolution request + * 2. nhrp_peer_handle_resolution_reply: entry deleted or reinserted NEGATIVE + * + * NEGATIVE: + * 1. nhrp_peer_insert_cb: schedule task remove + * + * CACHED, STATIC, DYNAMIC, DYNAMIC_NHS: + * 1. nhrp_peer_insert_cb: calls nhrp_peer_restart_cb + * 2. nhrp_peer_restart_cb: resolves dns name, or calls nhrp_run_up_script() + * 3. nhrp_peer_address_query_cb: calls nhrp_peer_run_up_script() + * 4. nhrp_peer_run_up_script: spawns script, or goes to nhrp_peer_lower_is_up() + * 5. nhrp_peer_script_peer_up_done: calls nhrp_peer_lower_is_up() + * 6. nhrp_peer_lower_is_up: sends registration, or goes to nhrp_peer_is_up() + * 7. nhrp_peer_handle_registration_reply: + * a. on success: calls nhrp_peer_is_up() + * b. on error reply: calls nhrp_peer_send_purge_protocol() + * nhrp_peer_handle_purge_protocol_reply: sends new registration + * 8. nhrp_peer_is_up: schedules re-register, expire or deletion + * + * ON EXPIRE: + * schedule remove + * nhrp_peer_renew is called if peer has USED flag set or becomes set, + * while the peer is expired + * ON RENEW: calls sends resolution request, schedule EXPIRE + * + * ON ERROR for CACHED: reinsert as NEGATIVE + * ON ERROR for STATIC: fork peer-down script (if was lower up) + * schedule task request link + * ON ERROR for DYNAMIC: fork peer-down script (if was lower up) + * delete peer + * + * SHORTCUT_ROUTE: + * 1. nhrp_peer_insert_cb: spawns route-up script, or schedules EXPIRE + * + * STATIC_DNS: + * 1. nhrp_peer_insert_cb: calls nhrp_peer_dnsmap_restart_cb + * 2. nhrp_peer_dnsmap_restart_cb: resolves dns name + * 3. nhrp_peer_dnsmap_query_cb: create new peer entries, + * renew existing and delete expired, schedule restart + * + * LOCAL: + * nothing, only netlink code modifies these + */ + +static void nhrp_peer_reinsert(struct nhrp_peer *peer, int type); +static void nhrp_peer_restart_cb(struct ev_timer *w, int revents); +static void nhrp_peer_dnsmap_restart_cb(struct ev_timer *w, int revents); +static void nhrp_peer_remove_cb(struct ev_timer *w, int revents); +static void nhrp_peer_send_resolve(struct nhrp_peer *peer); +static void nhrp_peer_send_register_cb(struct ev_timer *w, int revents); +static void nhrp_peer_expire_cb(struct ev_timer *w, int revents); + +static const char *nhrp_error_indication_text(int ei) +{ + switch (ei) { + case -1: + return "timeout"; + case NHRP_ERROR_UNRECOGNIZED_EXTENSION: + return "unrecognized extension"; + case NHRP_ERROR_LOOP_DETECTED: + return "loop detected"; + case NHRP_ERROR_PROTOCOL_ADDRESS_UNREACHABLE: + return "protocol address unreachable"; + case NHRP_ERROR_PROTOCOL_ERROR: + return "protocol error"; + case NHRP_ERROR_SDU_SIZE_EXCEEDED: + return "SDU size exceeded"; + case NHRP_ERROR_INVALID_EXTENSION: + return "invalid extension"; + case NHRP_ERROR_INVALID_RESOLUTION_REPLY: + return "unexpected resolution reply"; + case NHRP_ERROR_AUTHENTICATION_FAILURE: + return "authentication failure"; + case NHRP_ERROR_HOP_COUNT_EXCEEDED: + return "hop count exceeded"; + } + return "unknown"; +} + +static const char *nhrp_cie_code_text(int ct) +{ + switch (ct) { + case NHRP_CODE_SUCCESS: + return "success"; + case NHRP_CODE_ADMINISTRATIVELY_PROHIBITED: + return "administratively prohibited"; + case NHRP_CODE_INSUFFICIENT_RESOURCES: + return "insufficient resources"; + case NHRP_CODE_NO_BINDING_EXISTS: + return "no binding exists"; + case NHRP_CODE_BINDING_NON_UNIQUE: + return "binding non-unique"; + case NHRP_CODE_UNIQUE_ADDRESS_REGISTERED: + return "unique address already registered"; + } + return "unknown"; +} + +static char *nhrp_peer_format_full(struct nhrp_peer *peer, size_t len, + char *buf, int full) +{ + char tmp[NHRP_PEER_FORMAT_LEN], *str; + int i = 0; + + if (peer == NULL) { + snprintf(buf, len, "(null)"); + return buf; + } + + i += snprintf(&buf[i], len - i, "%s/%d", + nhrp_address_format(&peer->protocol_address, sizeof(tmp), tmp), + peer->prefix_length); + + if (peer->next_hop_address.type != PF_UNSPEC) { + switch (peer->type) { + case NHRP_PEER_TYPE_SHORTCUT_ROUTE: + case NHRP_PEER_TYPE_LOCAL_ROUTE: + str = "nexthop"; + break; + case NHRP_PEER_TYPE_LOCAL_ADDR: + str = "alias"; + break; + default: + str = "nbma"; + break; + } + i += snprintf(&buf[i], len - i, " %s %s", + str, + nhrp_address_format(&peer->next_hop_address, + sizeof(tmp), tmp)); + } + if (peer->nbma_hostname != NULL) { + i += snprintf(&buf[i], len - i, " hostname %s", + peer->nbma_hostname); + } + if (peer->next_hop_nat_oa.type != PF_UNSPEC) { + i += snprintf(&buf[i], len - i, " nbma-nat-oa %s", + nhrp_address_format(&peer->next_hop_nat_oa, + sizeof(tmp), tmp)); + } + if (peer->interface != NULL) + i += snprintf(&buf[i], len - i, " dev %s", + peer->interface->name); + if (peer->mtu) + i += snprintf(&buf[i], len - i, " mtu %d", peer->mtu); + + if (!full) + return buf; + + if (peer->flags & NHRP_PEER_FLAG_USED) + i += snprintf(&buf[i], len - i, " used"); + if (peer->flags & NHRP_PEER_FLAG_UNIQUE) + i += snprintf(&buf[i], len - i, " unique"); + if (peer->flags & NHRP_PEER_FLAG_UP) + i += snprintf(&buf[i], len - i, " up"); + else if (peer->flags & NHRP_PEER_FLAG_LOWER_UP) + i += snprintf(&buf[i], len - i, " lower-up"); + if (peer->expire_time != 0.0) { + int rel; + + rel = peer->expire_time - ev_now(); + if (rel >= 0) { + i += snprintf(&buf[i], len - i, " expires_in %d:%02d", + rel / 60, rel % 60); + } else { + i += snprintf(&buf[i], len - i, " expired"); + } + } + if (peer->flags & NHRP_PEER_FLAG_PRUNE_PENDING) + i += snprintf(&buf[i], len - i, " dying"); + + return buf; +} + +static inline char *nhrp_peer_format(struct nhrp_peer *peer, + size_t len, char *buf) +{ + return nhrp_peer_format_full(peer, len, buf, TRUE); +} + +static inline void nhrp_peer_debug_refcount(const char *func, + struct nhrp_peer *peer) +{ +#if 0 + char tmp[NHRP_PEER_FORMAT_LEN]; + nhrp_debug("%s(%s %s) ref=%d", + func, nhrp_peer_type[peer->type], + nhrp_peer_format(peer, sizeof(tmp), tmp), + peer->ref); +#endif +} + +static void nhrp_peer_resolve_nbma(struct nhrp_peer *peer) +{ + char tmp[64]; + int r; + + if (peer->interface->nbma_address.type == PF_UNSPEC) { + r = kernel_route(NULL, &peer->next_hop_address, + &peer->my_nbma_address, NULL, + &peer->my_nbma_mtu); + if (!r) { + nhrp_error("No route to next hop address %s", + nhrp_address_format(&peer->next_hop_address, + sizeof(tmp), tmp)); + } + } else { + peer->my_nbma_address = peer->interface->nbma_address; + peer->my_nbma_mtu = peer->interface->nbma_mtu; + } +} + +static char *env(const char *key, const char *value) +{ + char *buf; + buf = malloc(strlen(key)+strlen(value)+2); + if (buf == NULL) + return NULL; + sprintf(buf, "%s=%s", key, value); + return buf; +} + +static char *envu32(const char *key, uint32_t value) +{ + char *buf; + buf = malloc(strlen(key)+16); + if (buf == NULL) + return NULL; + sprintf(buf, "%s=%u", key, value); + return buf; +} + +int nhrp_peer_event_ok(union nhrp_peer_event e, int revents) +{ + int status; + + if (revents == 0) + return TRUE; + if (!(revents & EV_CHILD)) + return FALSE; + status = e.child->rstatus; + if (WIFEXITED(status) && WEXITSTATUS(status) == 0) + return TRUE; + return FALSE; +} + +char *nhrp_peer_event_reason(union nhrp_peer_event e, int revents, + size_t buflen, char *buf) +{ + int status; + + if (revents & EV_CHILD) { + status = e.child->rstatus; + if (WIFEXITED(status)) + snprintf(buf, buflen, "exitstatus %d", + WEXITSTATUS(status)); + else if (WIFSIGNALED(status)) + snprintf(buf, buflen, "signal %d", + WTERMSIG(status)); + else + snprintf(buf, buflen, "rstatus %d", status); + } else if (revents & EV_TIMEOUT) { + snprintf(buf, buflen, "timeout"); + } else if (revents == 0) { + snprintf(buf, buflen, "success"); + } else { + snprintf(buf, buflen, "unknown, revents=%x", revents); + } + return buf; +} + +struct nhrp_peer *nhrp_peer_from_event(union nhrp_peer_event e, int revents) +{ + struct nhrp_peer *peer; + + if (revents & EV_CHILD) { + peer = container_of(e.child, struct nhrp_peer, child); + } else if (revents & EV_TIMEOUT) { + peer = container_of(e.timer, struct nhrp_peer, timer); + } else { + NHRP_BUG_ON(revents != 0); + peer = container_of(e.child, struct nhrp_peer, child); + } + + ev_child_stop(&peer->child); + ev_timer_stop(&peer->timer); + + return peer; +} + +void nhrp_peer_run_script(struct nhrp_peer *peer, char *action, + void (*cb)(union nhrp_peer_event, int)) +{ + struct nhrp_interface *iface = peer->interface; + const char *argv[] = { nhrp_script_file, action, NULL }; + char *envp[32]; + char tmp[64]; + pid_t pid; + int i = 0; + + /* Resolve own NBMA address before forking if required + * since it requires traversing peer cache and can trigger + * logging and other stuff. */ + if (peer->my_nbma_address.type == PF_UNSPEC) + nhrp_peer_resolve_nbma(peer); + + /* Fork and execute script */ + pid = fork(); + if (pid == -1) { + if (cb != NULL) + cb(&peer->child, EV_CHILD | EV_ERROR); + return; + } else if (pid > 0) { + if (cb != NULL) { + ev_child_stop(&peer->child); + ev_child_init(&peer->child, cb, pid, 0); + ev_child_start(&peer->child); + + ev_set_cb(&peer->timer, cb); + peer->timer.repeat = NHRP_SCRIPT_TIMEOUT; + ev_timer_again(&peer->timer); + } + return; + } + + envp[i++] = env("NHRP_TYPE", nhrp_peer_type[peer->type]); + if (iface->protocol_address.type != PF_UNSPEC) + envp[i++] = env("NHRP_SRCADDR", + nhrp_address_format(&iface->protocol_address, + sizeof(tmp), tmp)); + if (peer->my_nbma_address.type != PF_UNSPEC) + envp[i++] = env("NHRP_SRCNBMA", + nhrp_address_format(&peer->my_nbma_address, + sizeof(tmp), tmp)); + envp[i++] = env("NHRP_DESTADDR", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp)); + envp[i++] = envu32("NHRP_DESTPREFIX", peer->prefix_length); + + if (peer->purge_reason) + envp[i++] = env("NHRP_PEER_DOWN_REASON", peer->purge_reason); + + switch (peer->type) { + case NHRP_PEER_TYPE_CACHED: + case NHRP_PEER_TYPE_LOCAL_ADDR: + case NHRP_PEER_TYPE_STATIC: + case NHRP_PEER_TYPE_DYNAMIC: + case NHRP_PEER_TYPE_DYNAMIC_NHS: + envp[i++] = env("NHRP_DESTNBMA", + nhrp_address_format(&peer->next_hop_address, + sizeof(tmp), tmp)); + if (peer->mtu) + envp[i++] = envu32("NHRP_DESTMTU", peer->mtu); + if (peer->next_hop_nat_oa.type != PF_UNSPEC) + envp[i++] = env("NHRP_DESTNBMA_NAT_OA", + nhrp_address_format(&peer->next_hop_nat_oa, + sizeof(tmp), tmp)); + break; + case NHRP_PEER_TYPE_SHORTCUT_ROUTE: + case NHRP_PEER_TYPE_LOCAL_ROUTE: + envp[i++] = env("NHRP_NEXTHOP", + nhrp_address_format(&peer->next_hop_address, + sizeof(tmp), tmp)); + break; + default: + NHRP_BUG_ON("invalid peer type"); + } + envp[i++] = env("NHRP_INTERFACE", peer->interface->name); + envp[i++] = envu32("NHRP_GRE_KEY", peer->interface->gre_key); + envp[i++] = NULL; + + execve(nhrp_script_file, (char **) argv, envp); + exit(1); +} + +void nhrp_peer_cancel_async(struct nhrp_peer *peer) +{ + if (peer->queued_packet) { + nhrp_packet_put(peer->queued_packet); + peer->queued_packet = NULL; + } + if (peer->request) { + nhrp_server_finish_request(peer->request); + peer->request = NULL; + } + + nhrp_address_resolve_cancel(&peer->address_query); + ev_timer_stop(&peer->timer); + if (ev_is_active(&peer->child)) { + kill(SIGINT, peer->child.pid); + ev_child_stop(&peer->child); + } +} + +void nhrp_peer_send_packet_queue(struct nhrp_peer *peer) +{ + if (peer->queued_packet == NULL) + return; + + nhrp_packet_marshall_and_send(peer->queued_packet); + nhrp_packet_put(peer->queued_packet); + peer->queued_packet = NULL; +} + +static void nhrp_peer_schedule(struct nhrp_peer *peer, ev_tstamp timeout, + void (*cb)(struct ev_timer *w, int revents)) +{ + ev_timer_stop(&peer->timer); + ev_timer_init(&peer->timer, cb, timeout, 0.); + ev_timer_start(&peer->timer); +} + +static void nhrp_peer_restart_error(struct nhrp_peer *peer) +{ + switch (peer->type) { + case NHRP_PEER_TYPE_STATIC: + case NHRP_PEER_TYPE_DYNAMIC_NHS: + nhrp_peer_schedule(peer, NHRP_RETRY_ERROR_TIME, + nhrp_peer_restart_cb); + break; + default: + nhrp_peer_reinsert(peer, NHRP_PEER_TYPE_NEGATIVE); + break; + } +} + +static void nhrp_peer_script_route_up_done(union nhrp_peer_event e, int revents) +{ + struct nhrp_peer *peer = nhrp_peer_from_event(e, revents); + char tmp[64], reason[32]; + + if (nhrp_peer_event_ok(e, revents)) { + if (revents) + nhrp_debug("[%s] Route up script: success", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp)); + + peer->flags |= NHRP_PEER_FLAG_UP; + nhrp_peer_schedule(peer, peer->expire_time - NHRP_EXPIRY_TIME + - 10 - ev_now(), nhrp_peer_expire_cb); + } else { + nhrp_info("[%s] Route up script: %s; " + "adding negative cached entry", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp), + nhrp_peer_event_reason(e, revents, + sizeof(reason), reason)); + + nhrp_peer_reinsert(peer, NHRP_PEER_TYPE_NEGATIVE); + } +} + +static int nhrp_peer_routes_up(void *ctx, struct nhrp_peer *peer) +{ + if (!(peer->flags & NHRP_PEER_FLAG_UP)) + nhrp_peer_run_script(peer, "route-up", + nhrp_peer_script_route_up_done); + + return 0; +} + +static int nhrp_peer_routes_renew(void *ctx, struct nhrp_peer *peer) +{ + int *num_routes = (int *) ctx; + + if (peer->flags & NHRP_PEER_FLAG_PRUNE_PENDING) { + peer->flags &= ~NHRP_PEER_FLAG_PRUNE_PENDING; + nhrp_peer_cancel_async(peer); + nhrp_peer_send_resolve(peer); + (*num_routes)++; + } + + return 0; +} + +static void nhrp_peer_renew(struct nhrp_peer *peer) +{ + struct nhrp_interface *iface = peer->interface; + struct nhrp_peer_selector sel; + int num_routes = 0; + + /* Renew the cached information: all related routes + * or the peer itself */ + if (peer->type != NHRP_PEER_TYPE_SHORTCUT_ROUTE) { + memset(&sel, 0, sizeof(sel)); + sel.flags = NHRP_PEER_FIND_UP; + sel.type_mask = BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE); + sel.interface = iface; + sel.next_hop_address = peer->protocol_address; + nhrp_peer_foreach(nhrp_peer_routes_renew, &num_routes, &sel); + } + + if (peer->flags & NHRP_PEER_FLAG_PRUNE_PENDING) { + peer->flags &= ~NHRP_PEER_FLAG_PRUNE_PENDING; + nhrp_peer_cancel_async(peer); + nhrp_peer_send_resolve(peer); + } +} + +static int is_used(void *ctx, struct nhrp_peer *peer) +{ + if (peer->flags & NHRP_PEER_FLAG_USED) + return 1; + + return 0; +} + +static void nhrp_peer_expire_cb(struct ev_timer *w, int revents) +{ + struct nhrp_peer *peer = container_of(w, struct nhrp_peer, timer); + struct nhrp_peer_selector sel; + int used; + + peer->flags |= NHRP_PEER_FLAG_PRUNE_PENDING; + nhrp_peer_schedule(peer, peer->expire_time - ev_now(), + nhrp_peer_remove_cb); + + if (peer->type == NHRP_PEER_TYPE_SHORTCUT_ROUTE) { + memset(&sel, 0, sizeof(sel)); + sel.interface = peer->interface; + sel.protocol_address = peer->next_hop_address; + used = nhrp_peer_foreach(is_used, NULL, &sel); + } else + used = peer->flags & NHRP_PEER_FLAG_USED; + + if (used) + nhrp_peer_renew(peer); +} + +static void nhrp_peer_is_down(struct nhrp_peer *peer) +{ + struct nhrp_peer_selector sel; + + /* Remove UP flags if not being removed permanently, so futher + * lookups are valid */ + if (!(peer->flags & NHRP_PEER_FLAG_REMOVED)) + peer->flags &= ~(NHRP_PEER_FLAG_LOWER_UP | NHRP_PEER_FLAG_UP); + + /* Check if there are routes using this peer as next-hop */ + if (peer->type != NHRP_PEER_TYPE_SHORTCUT_ROUTE) { + memset(&sel, 0, sizeof(sel)); + sel.type_mask = BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE); + sel.interface = peer->interface; + sel.next_hop_address = peer->protocol_address; + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel); + } + + /* Remove from lists */ + if (list_hashed(&peer->mcast_list_entry)) + list_del(&peer->mcast_list_entry); + if (hlist_hashed(&peer->nbma_hash_entry)) + hlist_del(&peer->nbma_hash_entry); +} + +static void nhrp_peer_is_up(struct nhrp_peer *peer) +{ + struct nhrp_interface *iface = peer->interface; + struct nhrp_peer_selector sel; + int mcast = 0, i; + char tmp[64]; + + if ((peer->flags & (NHRP_PEER_FLAG_UP | NHRP_PEER_FLAG_REGISTER)) + == NHRP_PEER_FLAG_REGISTER) { + /* First time registration reply received */ + nhrp_peer_run_script(peer, "nhs-up", NULL); + } + + /* Remove from mcast list if previously there */ + if (list_hashed(&peer->mcast_list_entry)) + list_del(&peer->mcast_list_entry); + + /* Check if this one needs multicast traffic */ + if (BIT(peer->type) & iface->mcast_mask) { + mcast = 1; + } else { + for (i = 0; i < iface->mcast_numaddr; i++) { + if (!nhrp_address_cmp(&peer->protocol_address, + &iface->mcast_addr[i])) { + mcast = 1; + break; + } + } + } + + if (mcast) { + list_add(&peer->mcast_list_entry, &iface->mcast_list); + nhrp_info("[%s] Peer inserted to multicast list", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp)); + } + + /* Searchable by NBMA */ + if (hlist_hashed(&peer->nbma_hash_entry)) + hlist_del(&peer->nbma_hash_entry); + if (BIT(peer->type) & (BIT(NHRP_PEER_TYPE_CACHED) | + BIT(NHRP_PEER_TYPE_DYNAMIC) | + BIT(NHRP_PEER_TYPE_DYNAMIC_NHS) | + BIT(NHRP_PEER_TYPE_STATIC))) { + i = nhrp_address_hash(&peer->next_hop_address) % NHRP_INTERFACE_NBMA_HASH_SIZE; + hlist_add_head(&peer->nbma_hash_entry, &iface->nbma_hash[i]); + } + + peer->flags |= NHRP_PEER_FLAG_UP | NHRP_PEER_FLAG_LOWER_UP; + + /* Check if there are routes using this peer as next-hop*/ + if (peer->type != NHRP_PEER_TYPE_SHORTCUT_ROUTE) { + memset(&sel, 0, sizeof(sel)); + sel.type_mask = BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE); + sel.interface = iface; + sel.next_hop_address = peer->protocol_address; + nhrp_peer_foreach(nhrp_peer_routes_up, NULL, &sel); + } + + nhrp_peer_send_packet_queue(peer); + + /* Schedule expiry or renewal */ + switch (peer->type) { + case NHRP_PEER_TYPE_DYNAMIC: + nhrp_peer_schedule(peer, peer->expire_time - ev_now(), + nhrp_peer_remove_cb); + break; + case NHRP_PEER_TYPE_CACHED: + nhrp_peer_schedule( + peer, + peer->expire_time - NHRP_EXPIRY_TIME - ev_now(), + nhrp_peer_expire_cb); + break; + case NHRP_PEER_TYPE_STATIC: + case NHRP_PEER_TYPE_DYNAMIC_NHS: + if (peer->flags & NHRP_PEER_FLAG_REGISTER) { + nhrp_peer_schedule( + peer, iface->holding_time / + NHRP_HOLDING_TIME_DIVISOR + 1, + nhrp_peer_send_register_cb); + } + break; + default: + NHRP_BUG_ON("invalid peer type"); + break; + } +} + +static void nhrp_peer_lower_is_up(struct nhrp_peer *peer) +{ + peer->flags |= NHRP_PEER_FLAG_LOWER_UP; + + if (peer->flags & NHRP_PEER_FLAG_REGISTER) + nhrp_peer_send_register_cb(&peer->timer, 0); + else + nhrp_peer_is_up(peer); +} + +static void nhrp_peer_script_peer_up_done(union nhrp_peer_event e, int revents) +{ + struct nhrp_peer *peer = nhrp_peer_from_event(e, revents); + char tmp[64], reason[32]; + + if (nhrp_peer_event_ok(e, revents)) { + nhrp_debug("[%s] Peer up script: success", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp)); + + kernel_inject_neighbor(&peer->protocol_address, + &peer->next_hop_address, + peer->interface); + nhrp_peer_lower_is_up(peer); + } else { + nhrp_error("[%s] Peer up script failed: %s", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp), + nhrp_peer_event_reason(e, revents, + sizeof(reason), reason)); + nhrp_peer_restart_error(peer); + } +} + +static void nhrp_peer_run_up_script(struct nhrp_peer *peer) +{ + nhrp_peer_run_script(peer, "peer-up", + nhrp_peer_script_peer_up_done); +} + +static void nhrp_peer_address_query_cb(struct nhrp_address_query *query, + int num_addr, struct nhrp_address *addrs) +{ + struct nhrp_peer *peer = container_of(query, struct nhrp_peer, + address_query); + char host[64]; + + if (num_addr > 0) { + nhrp_info("Resolved '%s' as %s", + peer->nbma_hostname, + nhrp_address_format(&addrs[0], sizeof(host), host)); + peer->next_hop_address = addrs[0]; + peer->afnum = nhrp_afnum_from_pf(peer->next_hop_address.type); + nhrp_peer_run_up_script(peer); + } else { + nhrp_error("Failed to resolve '%s'", peer->nbma_hostname); + nhrp_peer_restart_error(peer); + } +} + +static void nhrp_peer_restart_cb(struct ev_timer *w, int revents) +{ + struct nhrp_peer *peer = container_of(w, struct nhrp_peer, timer); + + if (peer->nbma_hostname != NULL) { + nhrp_address_resolve(&peer->address_query, + peer->nbma_hostname, + nhrp_peer_address_query_cb); + } else { + nhrp_peer_resolve_nbma(peer); + + if (!(peer->flags & NHRP_PEER_FLAG_LOWER_UP)) + nhrp_peer_run_up_script(peer); + else + nhrp_peer_script_peer_up_done(&peer->child, 0); + } +} + +static void nhrp_peer_send_protocol_purge(struct nhrp_peer *peer) +{ + char tmp[64]; + struct nhrp_packet *packet; + struct nhrp_cie *cie; + struct nhrp_payload *payload; + int sent = FALSE; + + packet = nhrp_packet_alloc(); + if (packet == NULL) + goto error; + + packet->hdr = (struct nhrp_packet_header) { + .afnum = peer->afnum, + .protocol_type = peer->protocol_type, + .version = NHRP_VERSION_RFC2332, + .type = NHRP_PACKET_PURGE_REQUEST, + .hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT, + .flags = NHRP_FLAG_PURGE_NO_REPLY, + }; + if (peer->flags & NHRP_PEER_FLAG_CISCO) { + /* Cisco IOS seems to require reqistration and purge + * request id to match, so we need to used a fixed + * value. This is in violation of RFC, though. */ + packet->hdr.u.request_id = + nhrp_address_hash(&peer->interface->protocol_address); + } + packet->dst_protocol_address = peer->protocol_address; + + /* Payload CIE */ + cie = nhrp_cie_alloc(); + if (cie == NULL) + goto error_free_packet; + + *cie = (struct nhrp_cie) { + .hdr.code = NHRP_CODE_SUCCESS, + .hdr.mtu = 0, + .hdr.preference = 0, + .hdr.prefix_length = 0xff, + }; + cie->protocol_address = peer->interface->protocol_address; + + payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_payload_add_cie(payload, cie); + + nhrp_info("Sending Purge Request (of protocol address) to %s", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp)); + + packet->dst_peer = nhrp_peer_get(peer); + packet->dst_iface = peer->interface; + sent = nhrp_packet_send(packet); +error_free_packet: + nhrp_packet_put(packet); +error: + if (sent) + nhrp_peer_schedule(peer, 2, nhrp_peer_send_register_cb); + else + nhrp_peer_restart_error(peer); +} + +static int nhrp_add_local_route_cie(void *ctx, struct nhrp_peer *route) +{ + struct nhrp_packet *packet = (struct nhrp_packet *) ctx; + struct nhrp_payload *payload; + struct nhrp_cie *cie; + + if (route->interface != NULL && + !(route->interface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST)) + return 0; + + cie = nhrp_cie_alloc(); + if (cie == NULL) + return 0; + + *cie = (struct nhrp_cie) { + .hdr.code = 0, + .hdr.prefix_length = route->prefix_length, + .protocol_address = route->protocol_address, + }; + + payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_payload_add_cie(payload, cie); + + return 0; +} + +int nhrp_peer_discover_nhs(struct nhrp_peer *peer, + struct nhrp_address *newaddr) +{ + struct nhrp_peer_selector sel; + char tmp[32], tmp2[32]; + + if (nhrp_address_cmp(&peer->protocol_address, newaddr) == 0) + return TRUE; + + if (peer->type != NHRP_PEER_TYPE_DYNAMIC_NHS || + !nhrp_address_is_network(&peer->protocol_address, + peer->prefix_length)) { + nhrp_error("Unexpected NHS protocol address change %s -> %s", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp2), tmp2), + nhrp_address_format(newaddr, sizeof(tmp), tmp)); + return FALSE; + } + + if (nhrp_address_prefix_cmp(&peer->protocol_address, newaddr, + peer->prefix_length) != 0) { + nhrp_error("Protocol address change to %s is not within %s/%d", + nhrp_address_format(newaddr, sizeof(tmp), tmp), + nhrp_address_format(&peer->protocol_address, + sizeof(tmp2), tmp2), + peer->prefix_length); + return FALSE; + } + + /* Remove incomplete/cached entries */ + memset(&sel, 0, sizeof(sel)); + sel.flags = NHRP_PEER_FIND_EXACT; + sel.type_mask = NHRP_PEER_TYPEMASK_REMOVABLE; + sel.interface = peer->interface; + sel.protocol_address = *newaddr; + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel); + + /* Update protocol address */ + peer->protocol_address = *newaddr; + + return TRUE; +} + +static void nhrp_peer_handle_registration_reply(void *ctx, + struct nhrp_packet *reply) +{ + struct nhrp_peer *peer = (struct nhrp_peer *) ctx; + struct nhrp_payload *payload; + struct nhrp_cie *cie; + struct nhrp_packet *packet; + char tmp[NHRP_PEER_FORMAT_LEN]; + int ec = -1; + + if (peer->flags & NHRP_PEER_FLAG_REMOVED) + goto ret; + + if (reply == NULL || + reply->hdr.type != NHRP_PACKET_REGISTRATION_REPLY) { + ec = reply ? reply->hdr.u.error.code : -1; + nhrp_info("Failed to register to %s: %s (%d)", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp), + nhrp_error_indication_text(ec), ntohs(ec)); + + if (ec == NHRP_ERROR_HOP_COUNT_EXCEEDED) + nhrp_peer_discover_nhs(peer, + &reply->src_protocol_address); + + if (reply != NULL) { + nhrp_peer_schedule(peer, NHRP_RETRY_REGISTER_TIME, + nhrp_peer_send_register_cb); + } else { + nhrp_peer_restart_error(peer); + } + goto ret; + } + + /* Check servers protocol address */ + if (!nhrp_peer_discover_nhs(peer, &reply->dst_protocol_address)) { + nhrp_peer_restart_error(peer); + goto ret; + } + + /* Check result */ + payload = nhrp_packet_payload(reply, NHRP_PAYLOAD_TYPE_CIE_LIST); + if (payload != NULL) { + cie = nhrp_payload_get_cie(payload, 1); + if (cie != NULL) + ec = cie->hdr.code; + } + + nhrp_info("Received Registration Reply from %s: %s", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp), + nhrp_cie_code_text(ec)); + + switch (ec) { + case NHRP_CODE_SUCCESS: + break; + case NHRP_CODE_UNIQUE_ADDRESS_REGISTERED: + nhrp_peer_send_protocol_purge(peer); + goto ret; + default: + nhrp_peer_schedule(peer, NHRP_RETRY_REGISTER_TIME, + nhrp_peer_send_register_cb); + goto ret; + } + + /* Check for NAT */ + payload = nhrp_packet_extension(reply, + NHRP_EXTENSION_NAT_ADDRESS | + NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_CIE_LIST); + if (payload != NULL) { + cie = nhrp_payload_get_cie(payload, 2); + if (cie != NULL) { + nhrp_info("NAT detected: our real NBMA address is %s", + nhrp_address_format(&cie->nbma_address, + sizeof(tmp), tmp)); + peer->interface->nat_cie = *cie; + } + } + + /* If not re-registration, send a purge request for each subnet + * we accept shortcuts to, to clear server redirection cache. */ + if (!(peer->flags & NHRP_PEER_FLAG_UP) && + (packet = nhrp_packet_alloc()) != NULL) { + struct nhrp_peer_selector sel; + + packet->hdr = (struct nhrp_packet_header) { + .afnum = peer->afnum, + .protocol_type = peer->protocol_type, + .version = NHRP_VERSION_RFC2332, + .type = NHRP_PACKET_PURGE_REQUEST, + .hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT, + }; + packet->dst_protocol_address = peer->protocol_address; + + memset(&sel, 0, sizeof(sel)); + sel.type_mask = BIT(NHRP_PEER_TYPE_LOCAL_ADDR); + nhrp_peer_foreach(nhrp_add_local_route_cie, packet, &sel); + + nhrp_packet_extension(packet, + NHRP_EXTENSION_FORWARD_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_packet_extension(packet, + NHRP_EXTENSION_REVERSE_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_packet_extension(packet, + NHRP_EXTENSION_RESPONDER_ADDRESS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + + nhrp_info("Sending Purge Request (of local routes) to %s", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp)); + + packet->dst_peer = nhrp_peer_get(peer); + packet->dst_iface = peer->interface; + nhrp_packet_send_request(packet, NULL, NULL); + nhrp_packet_put(packet); + } + + /* Re-register after holding time expires */ + nhrp_peer_is_up(peer); +ret: + nhrp_peer_put(peer); +} + +static void nhrp_peer_send_register_cb(struct ev_timer *w, int revents) +{ + struct nhrp_peer *peer = container_of(w, struct nhrp_peer, timer); + char dst[64]; + struct nhrp_packet *packet; + struct nhrp_cie *cie; + struct nhrp_payload *payload; + int sent = FALSE; + + packet = nhrp_packet_alloc(); + if (packet == NULL) + goto error; + + packet->hdr = (struct nhrp_packet_header) { + .afnum = peer->afnum, + .protocol_type = peer->protocol_type, + .version = NHRP_VERSION_RFC2332, + .type = NHRP_PACKET_REGISTRATION_REQUEST, + .hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT, + .flags = NHRP_FLAG_REGISTRATION_UNIQUE | + NHRP_FLAG_REGISTRATION_NAT + }; + if (peer->flags & NHRP_PEER_FLAG_CISCO) { + /* Cisco IOS seems to require reqistration and purge + * request id to match, so we need to used a fixed + * value. This is in violation of RFC, though. */ + packet->hdr.u.request_id = + nhrp_address_hash(&peer->interface->protocol_address); + } + packet->dst_protocol_address = peer->protocol_address; + + if (peer->type == NHRP_PEER_TYPE_DYNAMIC_NHS && + nhrp_address_is_network(&peer->protocol_address, + peer->prefix_length)) { + /* We are not yet sure of the protocol address of the NHS - + * send registration to the broadcast address with one hop + * limit. Except the NHS to reply with it's real protocol + * address. */ + nhrp_address_set_broadcast(&packet->dst_protocol_address, + peer->prefix_length); + packet->hdr.hop_count = 0; + } + + + /* Payload CIE */ + cie = nhrp_cie_alloc(); + if (cie == NULL) + goto error; + + *cie = (struct nhrp_cie) { + .hdr.code = NHRP_CODE_SUCCESS, + .hdr.prefix_length = 0xff, + .hdr.mtu = htons(peer->my_nbma_mtu), + .hdr.holding_time = htons(peer->interface->holding_time), + .hdr.preference = 0, + }; + + payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_payload_add_cie(payload, cie); + + /* Standard extensions */ + nhrp_packet_extension(packet, + NHRP_EXTENSION_FORWARD_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_packet_extension(packet, + NHRP_EXTENSION_REVERSE_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_packet_extension(packet, + NHRP_EXTENSION_RESPONDER_ADDRESS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + + /* Cisco NAT extension CIE */ + cie = nhrp_cie_alloc(); + if (cie == NULL) + goto error_free_packet; + + *cie = (struct nhrp_cie) { + .hdr.code = NHRP_CODE_SUCCESS, + .hdr.prefix_length = peer->protocol_address.addr_len * 8, + .hdr.preference = 0, + .nbma_address = peer->next_hop_address, + .protocol_address = peer->protocol_address, + }; + + payload = nhrp_packet_extension(packet, NHRP_EXTENSION_NAT_ADDRESS, + NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_payload_add_cie(payload, cie); + + nhrp_info("Sending Registration Request to %s (my mtu=%d)", + nhrp_address_format(&peer->protocol_address, + sizeof(dst), dst), + peer->my_nbma_mtu); + + packet->dst_peer = nhrp_peer_get(peer); + packet->dst_iface = peer->interface; + sent = nhrp_packet_send_request(packet, + nhrp_peer_handle_registration_reply, + nhrp_peer_get(peer)); + +error_free_packet: + nhrp_packet_put(packet); +error: + if (!sent) + nhrp_peer_restart_error(peer); +} + +static int error_on_matching(void *ctx, struct nhrp_peer *peer) +{ + return 1; +} + +static void nhrp_peer_handle_resolution_reply(void *ctx, + struct nhrp_packet *reply) +{ + struct nhrp_peer *peer = (struct nhrp_peer *) ctx, *np; + struct nhrp_payload *payload; + struct nhrp_cie *cie, *natcie = NULL, *natoacie = NULL; + struct nhrp_interface *iface; + struct nhrp_peer_selector sel; + char dst[64], tmp[64], nbma[64]; + int ec; + + if (peer->flags & NHRP_PEER_FLAG_REMOVED) + goto ret; + + if (reply == NULL || + reply->hdr.type != NHRP_PACKET_RESOLUTION_REPLY) { + ec = reply ? reply->hdr.u.error.code : -1; + + nhrp_info("Failed to resolve %s: %s (%d)", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp), + nhrp_error_indication_text(ec), ntohs(ec)); + + if (reply != NULL) { + /* We got reply that this address is not available - + * negative cache it. */ + peer->flags |= NHRP_PEER_FLAG_UP; + nhrp_peer_reinsert(peer, NHRP_PEER_TYPE_NEGATIVE); + } else { + /* Time out - NHS reachable, or packet lost multiple + * times. Keep trying if still needed. */ + nhrp_peer_remove(peer); + } + goto ret; + } + + payload = nhrp_packet_payload(reply, NHRP_PAYLOAD_TYPE_CIE_LIST); + cie = list_next(&payload->u.cie_list, struct nhrp_cie, cie_list_entry); + if (cie == NULL) + goto ret; + + nhrp_info("Received Resolution Reply %s/%d is at proto %s nbma %s", + nhrp_address_format(&peer->protocol_address, + sizeof(dst), dst), + cie->hdr.prefix_length, + nhrp_address_format(&cie->protocol_address, + sizeof(tmp), tmp), + nhrp_address_format(&cie->nbma_address, + sizeof(nbma), nbma)); + + payload = nhrp_packet_extension(reply, + NHRP_EXTENSION_NAT_ADDRESS | + NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_CIE_LIST); + if ((reply->hdr.flags & NHRP_FLAG_RESOLUTION_NAT) && + (payload != NULL)) { + natcie = list_next(&payload->u.cie_list, struct nhrp_cie, cie_list_entry); + if (natcie != NULL) { + natoacie = cie; + nhrp_info("NAT detected: really at proto %s nbma %s", + nhrp_address_format(&natcie->protocol_address, + sizeof(tmp), tmp), + nhrp_address_format(&natcie->nbma_address, + sizeof(nbma), nbma)); + } + } + if (natcie == NULL) + natcie = cie; + + if (nhrp_address_cmp(&peer->protocol_address, &cie->protocol_address) + == 0) { + /* Destination is within NBMA network; update cache */ + peer->mtu = ntohs(cie->hdr.mtu); + peer->prefix_length = cie->hdr.prefix_length; + peer->next_hop_address = natcie->nbma_address; + if (natoacie != NULL) + peer->next_hop_nat_oa = natoacie->nbma_address; + peer->expire_time = ev_now() + ntohs(cie->hdr.holding_time); + nhrp_address_set_network(&peer->protocol_address, + peer->prefix_length); + nhrp_peer_reinsert(peer, NHRP_PEER_TYPE_CACHED); + goto ret; + } + + /* Check that we won't replace a local address */ + sel = (struct nhrp_peer_selector) { + .flags = NHRP_PEER_FIND_EXACT, + .type_mask = BIT(NHRP_PEER_TYPE_LOCAL_ADDR), + .protocol_address = peer->protocol_address, + .prefix_length = cie->hdr.prefix_length, + }; + if (nhrp_peer_foreach(error_on_matching, NULL, &sel)) { + nhrp_error("Local route %s/%d exists: not replacing " + "with shortcut", + nhrp_address_format(&peer->protocol_address, + sizeof(tmp), tmp), + cie->hdr.prefix_length); + peer->flags |= NHRP_PEER_FLAG_UP; + nhrp_peer_reinsert(peer, NHRP_PEER_TYPE_NEGATIVE); + goto ret; + } + + /* Update the received NBMA address to nexthop */ + iface = peer->interface; + np = nhrp_peer_route(iface, &cie->protocol_address, + NHRP_PEER_FIND_EXACT, 0); + if (np == NULL) { + np = nhrp_peer_alloc(iface); + np->type = NHRP_PEER_TYPE_CACHED; + np->afnum = reply->hdr.afnum; + np->protocol_type = reply->hdr.protocol_type; + np->protocol_address = cie->protocol_address; + np->next_hop_address = natcie->nbma_address; + if (natoacie != NULL) + np->next_hop_nat_oa = natoacie->nbma_address; + np->mtu = ntohs(cie->hdr.mtu); + np->prefix_length = cie->protocol_address.addr_len * 8; + np->expire_time = ev_now() + ntohs(cie->hdr.holding_time); + nhrp_peer_insert(np); + nhrp_peer_put(np); + } + + /* Off NBMA destination; a shortcut route */ + np = nhrp_peer_alloc(iface); + np->type = NHRP_PEER_TYPE_SHORTCUT_ROUTE; + np->afnum = reply->hdr.afnum; + np->protocol_type = reply->hdr.protocol_type; + np->protocol_address = peer->protocol_address; + np->prefix_length = cie->hdr.prefix_length; + np->next_hop_address = cie->protocol_address; + np->expire_time = ev_now() + ntohs(cie->hdr.holding_time); + nhrp_address_set_network(&np->protocol_address, np->prefix_length); + nhrp_peer_insert(np); + nhrp_peer_put(np); + + /* Delete the incomplete entry */ + nhrp_peer_remove(peer); +ret: + nhrp_peer_put(peer); +} + +static void nhrp_peer_send_resolve(struct nhrp_peer *peer) +{ + char dst[64]; + struct nhrp_packet *packet; + struct nhrp_cie *cie; + struct nhrp_payload *payload; + + packet = nhrp_packet_alloc(); + if (packet == NULL) + goto error; + + packet->hdr = (struct nhrp_packet_header) { + .afnum = peer->afnum, + .protocol_type = peer->protocol_type, + .version = NHRP_VERSION_RFC2332, + .type = NHRP_PACKET_RESOLUTION_REQUEST, + .hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT, + .flags = NHRP_FLAG_RESOLUTION_SOURCE_IS_ROUTER | + NHRP_FLAG_RESOLUTION_AUTHORATIVE | + NHRP_FLAG_RESOLUTION_NAT + }; + packet->dst_protocol_address = peer->protocol_address; + + /* Payload CIE */ + cie = nhrp_cie_alloc(); + if (cie == NULL) + goto error; + + *cie = (struct nhrp_cie) { + .hdr.code = NHRP_CODE_SUCCESS, + .hdr.prefix_length = 0, + .hdr.mtu = 0, + .hdr.holding_time = htons(peer->interface->holding_time), + }; + + payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_payload_add_cie(payload, cie); + + nhrp_info("Sending Resolution Request to %s", + nhrp_address_format(&peer->protocol_address, + sizeof(dst), dst)); + + /* Standard extensions */ + nhrp_packet_extension(packet, + NHRP_EXTENSION_FORWARD_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_packet_extension(packet, + NHRP_EXTENSION_REVERSE_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_packet_extension(packet, + NHRP_EXTENSION_RESPONDER_ADDRESS | + NHRP_EXTENSION_FLAG_COMPULSORY, + NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_packet_extension(packet, + NHRP_EXTENSION_NAT_ADDRESS, + NHRP_PAYLOAD_TYPE_CIE_LIST); + + packet->dst_iface = peer->interface; + nhrp_packet_send_request(packet, + nhrp_peer_handle_resolution_reply, + nhrp_peer_get(peer)); + +error: + nhrp_packet_put(packet); +} + +struct nhrp_peer *nhrp_peer_alloc(struct nhrp_interface *iface) +{ + struct nhrp_peer *p; + + nhrp_peer_num_total++; + p = calloc(1, sizeof(struct nhrp_peer)); + p->ref = 1; + p->interface = iface; + list_init(&p->peer_list_entry); + list_init(&p->mcast_list_entry); + ev_timer_init(&p->timer, NULL, 0., 0.); + ev_child_init(&p->child, NULL, 0, 0); + + return p; +} + +struct nhrp_peer *nhrp_peer_get(struct nhrp_peer *peer) +{ + if (peer == NULL) + return NULL; + + peer->ref++; + nhrp_peer_debug_refcount(__FUNCTION__, peer); + + return peer; +} + +static void nhrp_peer_run_nhs_down(struct nhrp_peer *peer) +{ + if ((peer->flags & (NHRP_PEER_FLAG_REGISTER | + NHRP_PEER_FLAG_UP | + NHRP_PEER_FLAG_REPLACED)) + == (NHRP_PEER_FLAG_REGISTER | NHRP_PEER_FLAG_UP)) + nhrp_peer_run_script(peer, "nhs-down", NULL); +} + +static void nhrp_peer_release(struct nhrp_peer *peer) +{ + struct nhrp_interface *iface = peer->interface; + struct nhrp_peer_selector sel; + + nhrp_peer_cancel_async(peer); + + /* Remove from lists */ + if (list_hashed(&peer->mcast_list_entry)) + list_del(&peer->mcast_list_entry); + if (hlist_hashed(&peer->nbma_hash_entry)) + hlist_del(&peer->nbma_hash_entry); + + if (peer->parent != NULL) { + nhrp_peer_put(peer->parent); + peer->parent = NULL; + } + + switch (peer->type) { + case NHRP_PEER_TYPE_SHORTCUT_ROUTE: + if ((peer->flags & NHRP_PEER_FLAG_UP) && + !(peer->flags & NHRP_PEER_FLAG_REPLACED)) + nhrp_peer_run_script(peer, "route-down", NULL); + break; + case NHRP_PEER_TYPE_CACHED: + case NHRP_PEER_TYPE_DYNAMIC: + case NHRP_PEER_TYPE_STATIC: + case NHRP_PEER_TYPE_DYNAMIC_NHS: + if (peer->flags & NHRP_PEER_FLAG_REPLACED) + break; + + /* Remove cached routes using this entry as next-hop */ + memset(&sel, 0, sizeof(sel)); + sel.type_mask = BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE); + sel.interface = iface; + sel.next_hop_address = peer->protocol_address; + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, + &sel); + + /* Execute peer-down */ + nhrp_peer_run_nhs_down(peer); + if (peer->flags & NHRP_PEER_FLAG_UP) { + peer->purge_reason = "timeout"; + nhrp_peer_run_script(peer, "peer-down", NULL); + } + + /* Remove from arp cache */ + if (peer->protocol_address.type != PF_UNSPEC) + kernel_inject_neighbor(&peer->protocol_address, + NULL, peer->interface); + break; + case NHRP_PEER_TYPE_INCOMPLETE: + case NHRP_PEER_TYPE_NEGATIVE: + case NHRP_PEER_TYPE_LOCAL_ADDR: + case NHRP_PEER_TYPE_LOCAL_ROUTE: + case NHRP_PEER_TYPE_STATIC_DNS: + break; + default: + NHRP_BUG_ON("invalid peer type"); + break; + } + + if (peer->nbma_hostname) { + free(peer->nbma_hostname); + peer->nbma_hostname = NULL; + } + + free(peer); + nhrp_peer_num_total--; +} + +int nhrp_peer_put(struct nhrp_peer *peer) +{ + NHRP_BUG_ON(peer->ref == 0); + + peer->ref--; + nhrp_peer_debug_refcount(__FUNCTION__, peer); + + if (peer->ref > 0) + return FALSE; + + nhrp_peer_release(peer); + + return TRUE; +} + +static int nhrp_peer_mark_matching(void *ctx, struct nhrp_peer *peer) +{ + peer->flags |= NHRP_PEER_FLAG_MARK; + return 0; +} + +static int nhrp_peer_renew_nhs_matching(void *ctx, struct nhrp_peer *peer) +{ + peer->flags &= ~NHRP_PEER_FLAG_MARK; + return 1; +} + +static void nhrp_peer_dnsmap_query_cb(struct nhrp_address_query *query, + int num_addr, struct nhrp_address *addrs) +{ + struct nhrp_peer *np, *peer = + container_of(query, struct nhrp_peer, address_query); + struct nhrp_peer_selector sel; + int i; + + if (num_addr < 0) { + nhrp_error("Failed to resolve '%s'", peer->nbma_hostname); + nhrp_peer_schedule(peer, 10, nhrp_peer_dnsmap_restart_cb); + return; + } + + if (num_addr > 0) { + /* Refresh protocol */ + peer->afnum = nhrp_afnum_from_pf(addrs[0].type); + } + + /* Mark existing dynamic nhs entries as expired */ + memset(&sel, 0, sizeof(sel)); + sel.type_mask = BIT(NHRP_PEER_TYPE_DYNAMIC_NHS); + sel.interface = peer->interface; + sel.parent = peer; + nhrp_peer_foreach(nhrp_peer_mark_matching, NULL, &sel); + + for (i = 0; i < num_addr; i++) { + /* If this NBMA exists as dynamic NHS, mark it ok. */ + sel.next_hop_address = addrs[i]; + if (nhrp_peer_foreach(nhrp_peer_renew_nhs_matching, + NULL, &sel) != 0) + continue; + + /* New NHS, create a peer entry */ + np = nhrp_peer_alloc(peer->interface); + np->type = NHRP_PEER_TYPE_DYNAMIC_NHS; + np->flags |= NHRP_PEER_FLAG_REGISTER; + np->afnum = peer->afnum; + np->protocol_type = peer->protocol_type; + np->protocol_address = peer->protocol_address; + np->prefix_length = peer->prefix_length; + np->next_hop_address = addrs[i]; + np->parent = nhrp_peer_get(peer); + nhrp_address_set_network(&np->protocol_address, + np->prefix_length); + nhrp_peer_insert(np); + nhrp_peer_put(np); + } + + /* Delete all dynamic nhs:s that were not in the DNS reply */ + nhrp_address_set_type(&sel.next_hop_address, AF_UNSPEC); + sel.flags = NHRP_PEER_FIND_MARK; + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel); + + /* Refresh DNS info */ + nhrp_peer_schedule(peer, peer->interface->holding_time, + nhrp_peer_dnsmap_restart_cb); +} + +static void nhrp_peer_dnsmap_restart_cb(struct ev_timer *w, int revents) +{ + struct nhrp_peer *peer = container_of(w, struct nhrp_peer, timer); + + NHRP_BUG_ON(peer->nbma_hostname == NULL); + nhrp_address_resolve(&peer->address_query, peer->nbma_hostname, + nhrp_peer_dnsmap_query_cb); +} + +static void nhrp_peer_insert_cb(struct ev_timer *w, int revents) +{ + struct nhrp_peer *peer = container_of(w, struct nhrp_peer, timer); + + nhrp_peer_cancel_async(peer); + switch (peer->type) { + case NHRP_PEER_TYPE_LOCAL_ADDR: + peer->flags |= NHRP_PEER_FLAG_UP; + forward_local_addresses_changed(); + break; + case NHRP_PEER_TYPE_LOCAL_ROUTE: + peer->flags |= NHRP_PEER_FLAG_UP; + break; + case NHRP_PEER_TYPE_INCOMPLETE: + nhrp_peer_send_resolve(peer); + break; + case NHRP_PEER_TYPE_CACHED: + case NHRP_PEER_TYPE_DYNAMIC: + case NHRP_PEER_TYPE_STATIC: + case NHRP_PEER_TYPE_DYNAMIC_NHS: + nhrp_peer_restart_cb(w, 0); + break; + case NHRP_PEER_TYPE_STATIC_DNS: + nhrp_peer_dnsmap_restart_cb(w, 0); + break; + case NHRP_PEER_TYPE_SHORTCUT_ROUTE: + if (peer->flags & NHRP_PEER_FLAG_UP) + nhrp_peer_script_route_up_done(&peer->child, 0); + else if (nhrp_peer_route(peer->interface, + &peer->next_hop_address, + NHRP_PEER_FIND_UP | NHRP_PEER_FIND_EXACT, + NHRP_PEER_TYPEMASK_ADJACENT) != NULL) + nhrp_peer_run_script(peer, "route-up", + nhrp_peer_script_route_up_done); + else + nhrp_peer_schedule(peer, peer->expire_time - NHRP_EXPIRY_TIME + - 10 - ev_now(), nhrp_peer_expire_cb); + break; + case NHRP_PEER_TYPE_NEGATIVE: + peer->expire_time = ev_now() + NHRP_NEGATIVE_CACHE_TIME; + + if (peer->flags & NHRP_PEER_FLAG_UP) + kernel_inject_neighbor(&peer->protocol_address, + NULL, peer->interface); + nhrp_peer_schedule(peer, NHRP_NEGATIVE_CACHE_TIME, + nhrp_peer_remove_cb); + break; + default: + NHRP_BUG_ON("invalid peer type"); + break; + } +} + +static void nhrp_peer_reinsert(struct nhrp_peer *peer, int type) +{ + NHRP_BUG_ON((peer->type == NHRP_PEER_TYPE_LOCAL_ADDR) != + (type == NHRP_PEER_TYPE_LOCAL_ADDR)); + NHRP_BUG_ON((peer->type == NHRP_PEER_TYPE_LOCAL_ROUTE) != + (type == NHRP_PEER_TYPE_LOCAL_ROUTE)); + + peer->flags &= ~NHRP_PEER_FLAG_REMOVED; + peer->type = type; + nhrp_peer_insert_cb(&peer->timer, 0); +} + +static int nhrp_peer_replace_shortcut(void *ctx, struct nhrp_peer *peer) +{ + struct nhrp_peer *shortcut = (struct nhrp_peer *) ctx; + + /* Shortcut of identical prefix is replacement, either + * due to renewal, or new shortcut next-hop. */ + if (nhrp_address_cmp(&peer->protocol_address, + &shortcut->protocol_address) == 0 && + peer->prefix_length == shortcut->prefix_length) { + peer->flags |= NHRP_PEER_FLAG_REPLACED; + + /* If identical shortcut is being refreshed, + * mark the refresher peer entry up. */ + if ((peer->flags & NHRP_PEER_FLAG_UP) && + nhrp_address_cmp(&peer->next_hop_address, + &shortcut->next_hop_address) == 0) + shortcut->flags |= NHRP_PEER_FLAG_UP; + } + + /* Delete the old peer unconditionally */ + nhrp_peer_remove(peer); + + return 0; +} + +void nhrp_peer_insert(struct nhrp_peer *peer) +{ + struct nhrp_peer_selector sel; + char tmp[NHRP_PEER_FORMAT_LEN]; + + /* First, prune all duplicates */ + memset(&sel, 0, sizeof(sel)); + sel.interface = peer->interface; + sel.protocol_address = peer->protocol_address; + sel.prefix_length = peer->prefix_length; + switch (peer->type) { + case NHRP_PEER_TYPE_SHORTCUT_ROUTE: + /* remove all existing shortcuts with same nexthop */ + sel.flags = NHRP_PEER_FIND_SUBNET; + sel.type_mask |= BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE); + nhrp_peer_foreach(nhrp_peer_replace_shortcut, peer, &sel); + break; + case NHRP_PEER_TYPE_LOCAL_ROUTE: + sel.type_mask |= BIT(NHRP_PEER_TYPE_LOCAL_ROUTE); + default: + /* remove exact protocol address matches */ + sel.flags = NHRP_PEER_FIND_EXACT; + sel.type_mask |= NHRP_PEER_TYPEMASK_REMOVABLE; + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel); + break; + } + + /* Keep a reference as long as we are on the list */ + peer = nhrp_peer_get(peer); + nhrp_debug("Adding %s %s", + nhrp_peer_type[peer->type], + nhrp_peer_format(peer, sizeof(tmp), tmp)); + + if (peer->type == NHRP_PEER_TYPE_LOCAL_ADDR) + list_add(&peer->peer_list_entry, &local_peer_list); + else + list_add(&peer->peer_list_entry, &peer->interface->peer_list); + + /* Start peers life */ + if (nhrp_running || peer->type == NHRP_PEER_TYPE_LOCAL_ADDR) + nhrp_peer_insert_cb(&peer->timer, 0); + else + nhrp_peer_schedule(peer, 0, &nhrp_peer_insert_cb); +} + +static void nhrp_peer_script_peer_down_done(union nhrp_peer_event e, + int revents) +{ + struct nhrp_peer *peer = nhrp_peer_from_event(e, revents); + + nhrp_peer_schedule(peer, 5, nhrp_peer_restart_cb); +} + +void nhrp_peer_purge(struct nhrp_peer *peer, const char *purge_reason) +{ + switch (peer->type) { + case NHRP_PEER_TYPE_STATIC: + case NHRP_PEER_TYPE_DYNAMIC_NHS: + peer->purge_reason = purge_reason; + nhrp_peer_run_nhs_down(peer); + nhrp_peer_is_down(peer); + nhrp_peer_cancel_async(peer); + if (peer->flags & NHRP_PEER_FLAG_LOWER_UP) { + nhrp_peer_run_script(peer, "peer-down", + nhrp_peer_script_peer_down_done); + } else { + nhrp_peer_script_peer_down_done(&peer->child, 0); + } + nhrp_address_set_type(&peer->my_nbma_address, PF_UNSPEC); + break; + case NHRP_PEER_TYPE_STATIC_DNS: + nhrp_peer_schedule(peer, 0, nhrp_peer_dnsmap_restart_cb); + break; + default: + peer->purge_reason = purge_reason; + nhrp_peer_remove(peer); + break; + } +} + +int nhrp_peer_purge_matching(void *ctx, struct nhrp_peer *peer) +{ + int *count = (int *) ctx; + nhrp_peer_purge(peer, "user-request"); + if (count != NULL) + (*count)++; + return 0; +} + +int nhrp_peer_lowerdown_matching(void *ctx, struct nhrp_peer *peer) +{ + int *count = (int *) ctx; + nhrp_peer_purge(peer, "lower-down"); + if (count != NULL) + (*count)++; + return 0; +} + +static void nhrp_peer_remove_cb(struct ev_timer *w, int revents) +{ + struct nhrp_peer *peer = container_of(w, struct nhrp_peer, timer); + int type; + + peer->flags |= NHRP_PEER_FLAG_REMOVED; + peer->purge_reason = "expired"; + nhrp_peer_is_down(peer); + list_del(&peer->peer_list_entry); + + type = peer->type; + nhrp_peer_put(peer); + + if (type == NHRP_PEER_TYPE_LOCAL_ADDR) + forward_local_addresses_changed(); +} + +void nhrp_peer_remove(struct nhrp_peer *peer) +{ + char tmp[NHRP_PEER_FORMAT_LEN]; + + if (peer->flags & NHRP_PEER_FLAG_REMOVED) + return; + + nhrp_debug("Removing %s %s", + nhrp_peer_type[peer->type], + nhrp_peer_format(peer, sizeof(tmp), tmp)); + + peer->flags |= NHRP_PEER_FLAG_REMOVED; + nhrp_peer_is_down(peer); + nhrp_peer_cancel_async(peer); + nhrp_peer_schedule(peer, 0, nhrp_peer_remove_cb); +} + +int nhrp_peer_remove_matching(void *ctx, struct nhrp_peer *peer) +{ + int *count = (int *) ctx; + + nhrp_peer_remove(peer); + if (count != NULL) + (*count)++; + + return 0; +} + +int nhrp_peer_set_used_matching(void *ctx, struct nhrp_peer *peer) +{ + int used = (int) (intptr_t) ctx; + + if (used) { + peer->flags |= NHRP_PEER_FLAG_USED; + nhrp_peer_renew(peer); + } else { + peer->flags &= ~NHRP_PEER_FLAG_USED; + } + return 0; +} + +int nhrp_peer_match(struct nhrp_peer *p, struct nhrp_peer_selector *sel) +{ + if (sel->type_mask && !(sel->type_mask & BIT(p->type))) + return FALSE; + + if ((sel->flags & NHRP_PEER_FIND_UP) && + !(p->flags & NHRP_PEER_FLAG_UP)) + return FALSE; + + if ((sel->flags & NHRP_PEER_FIND_MARK) && + !(p->flags & NHRP_PEER_FLAG_MARK)) + return FALSE; + + if (sel->interface != NULL && + p->interface != sel->interface && + !(p->interface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST)) + return FALSE; + + if (sel->hostname != NULL && + (p->nbma_hostname == NULL || + strcmp(sel->hostname, p->nbma_hostname) != 0)) + return FALSE; + + if (sel->parent != NULL && + p->parent != sel->parent) + return FALSE; + + if (sel->protocol_address.type != PF_UNSPEC) { + if (sel->prefix_length == 0) + sel->prefix_length = sel->protocol_address.addr_len * 8; + + if (sel->flags & NHRP_PEER_FIND_EXACT) { + if (nhrp_address_cmp(&p->protocol_address, + &sel->protocol_address) != 0) + return FALSE; + + if (p->prefix_length != sel->prefix_length && + p->type != NHRP_PEER_TYPE_STATIC && + p->type != NHRP_PEER_TYPE_DYNAMIC_NHS) + return FALSE; + } else if (sel->flags & NHRP_PEER_FIND_ROUTE) { + if (nhrp_address_prefix_cmp(&p->protocol_address, + &sel->protocol_address, + p->prefix_length) != 0) + return FALSE; + } else { + if (p->prefix_length < sel->prefix_length) { + if (sel->prefix_length + == sel->protocol_address.addr_len * 8 && + nhrp_address_cmp(&p->protocol_address, + &sel->protocol_address) + == 0) + return TRUE; + + return FALSE; + } + + if (nhrp_address_prefix_cmp(&p->protocol_address, + &sel->protocol_address, + sel->prefix_length) != 0) + return FALSE; + } + } + + if (sel->next_hop_address.type != PF_UNSPEC) { + if (nhrp_address_cmp(&p->next_hop_address, + &sel->next_hop_address) != 0) + return FALSE; + } + + return TRUE; +} + +struct enum_interface_peers_ctx { + nhrp_peer_enumerator enumerator; + void *ctx; + struct nhrp_peer_selector *sel; +}; + +static int enumerate_peer_cache(struct list_head *peer_cache, + nhrp_peer_enumerator e, void *ctx, + struct nhrp_peer_selector *sel) +{ + struct nhrp_peer *p; + int rc = 0; + + list_for_each_entry(p, peer_cache, peer_list_entry) { + if (p->flags & NHRP_PEER_FLAG_REMOVED) + continue; + + if (sel == NULL || nhrp_peer_match(p, sel)) { + rc = e(ctx, p); + if (rc != 0) + break; + } + } + + return rc; +} + +static int enum_interface_peers(void *ctx, struct nhrp_interface *iface) +{ + struct enum_interface_peers_ctx *ectx = + (struct enum_interface_peers_ctx *) ctx; + + return enumerate_peer_cache(&iface->peer_list, + ectx->enumerator, ectx->ctx, + ectx->sel); +} + +int nhrp_peer_foreach(nhrp_peer_enumerator e, void *ctx, + struct nhrp_peer_selector *sel) +{ + struct nhrp_interface *iface = NULL; + struct enum_interface_peers_ctx ectx = { e, ctx, sel }; + int rc; + + if (sel != NULL) + iface = sel->interface; + + rc = enumerate_peer_cache(&local_peer_list, e, ctx, sel); + if (rc != 0) + return rc; + + /* Speed optimization: TYPE_LOCAL peers cannot be found from + * other places */ + if (sel != NULL && + sel->type_mask == BIT(NHRP_PEER_TYPE_LOCAL_ADDR)) + return 0; + + if (iface == NULL) + rc = nhrp_interface_foreach(enum_interface_peers, &ectx); + else + rc = enumerate_peer_cache(&iface->peer_list, e, ctx, sel); + + return rc; +} + +struct route_decision { + struct nhrp_peer_selector sel; + struct list_head *exclude; + struct nhrp_peer *best_found; + struct nhrp_address *src; + int found_exact, found_up; +}; + +static int decide_route(void *ctx, struct nhrp_peer *peer) +{ + struct route_decision *rd = (struct route_decision *) ctx; + int exact; + + if (peer->type != NHRP_PEER_TYPE_SHORTCUT_ROUTE) { + /* Exclude addresses from CIE from routing decision + * to avoid routing loops within NHS clusters. */ + if (rd->exclude != NULL && + nhrp_address_match_cie_list(&peer->next_hop_address, + &peer->protocol_address, + rd->exclude)) + return 0; + + /* Exclude also source address, we don't want to + * forward questions back to who's asking. */ + if (rd->src != NULL && + nhrp_address_cmp(rd->src, &peer->protocol_address) == 0) + return 0; + } else { + /* Exclude routes that point back to the sender + * of the packet */ + if (rd->src != NULL && + nhrp_address_cmp(rd->src, &peer->next_hop_address) == 0) + return 0; + } + + exact = (peer->type >= NHRP_PEER_TYPE_DYNAMIC_NHS) && + (nhrp_address_cmp(&peer->protocol_address, + &rd->sel.protocol_address) == 0); + if (rd->found_exact > exact) + return 0; + + if (rd->found_up && !(peer->flags & NHRP_PEER_FLAG_UP)) + return 0; + + if (rd->best_found != NULL && + rd->found_exact == exact && + rd->found_up == (peer->flags & NHRP_PEER_FLAG_UP)) { + if (rd->best_found->prefix_length > peer->prefix_length) + return 0; + + if (rd->best_found->prefix_length == peer->prefix_length && + rd->best_found->last_used < peer->last_used) + return 0; + } + + rd->best_found = peer; + rd->found_exact = exact; + rd->found_up = peer->flags & NHRP_PEER_FLAG_UP; + return 0; +} + +struct nhrp_peer *nhrp_peer_route_full(struct nhrp_interface *interface, + struct nhrp_address *dst, + int flags, int type_mask, + struct nhrp_address *src, + struct list_head *exclude) +{ + struct route_decision rd; + + memset(&rd, 0, sizeof(rd)); + rd.sel.flags = flags & ~NHRP_PEER_FIND_UP; + if ((flags & (NHRP_PEER_FIND_ROUTE | NHRP_PEER_FIND_EXACT | + NHRP_PEER_FIND_SUBNET)) == 0) + rd.sel.flags |= NHRP_PEER_FIND_ROUTE; + rd.sel.type_mask = type_mask; + rd.sel.interface = interface; + rd.sel.protocol_address = *dst; + rd.exclude = exclude; + rd.src = src; + nhrp_peer_foreach(decide_route, &rd, &rd.sel); + + if (rd.best_found == NULL) + return NULL; + + if ((flags & NHRP_PEER_FIND_UP) && + !(rd.best_found->flags & NHRP_PEER_FLAG_UP)) + return NULL; + + rd.best_found->last_used = ev_now(); + return rd.best_found; +} + +void nhrp_peer_traffic_indication(struct nhrp_interface *iface, + uint16_t afnum, struct nhrp_address *dst) +{ + struct nhrp_peer *peer; + int type; + + /* For off-NBMA destinations, we consider all shortcut routes, + * but NBMA destinations should be exact because we want to drop + * NHS from the path. */ + if (nhrp_address_prefix_cmp(dst, &iface->protocol_address, + iface->protocol_address_prefix) != 0) + type = NHRP_PEER_FIND_ROUTE; + else + type = NHRP_PEER_FIND_EXACT; + + /* Have we done something for this destination already? */ + peer = nhrp_peer_route(iface, dst, type, + ~BIT(NHRP_PEER_TYPE_LOCAL_ROUTE)); + if (peer != NULL) + return; + + /* Initiate resolution */ + peer = nhrp_peer_alloc(iface); + peer->type = NHRP_PEER_TYPE_INCOMPLETE; + peer->afnum = afnum; + peer->protocol_type = nhrp_protocol_from_pf(dst->type); + peer->protocol_address = *dst; + peer->prefix_length = dst->addr_len * 8; + nhrp_peer_insert(peer); + nhrp_peer_put(peer); +} + +static int dump_peer(void *ctx, struct nhrp_peer *peer) +{ + int *num_total = (int *) ctx; + char tmp[NHRP_PEER_FORMAT_LEN]; + + nhrp_info("%s %s", + nhrp_peer_type[peer->type], + nhrp_peer_format(peer, sizeof(tmp), tmp)); + (*num_total)++; + return 0; +} + +void nhrp_peer_dump_cache(void) +{ + int num_total = 0; + + nhrp_info("Peer cache dump:"); + nhrp_peer_foreach(dump_peer, &num_total, NULL); + nhrp_info("Total %d peer cache entries, %d allocated entries", + num_total, nhrp_peer_num_total); +} + +void nhrp_peer_cleanup(void) +{ + ev_tstamp prev = ev_now(); + + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, NULL); + + while (nhrp_peer_num_total > 0) { + if (ev_now() > prev + 5.0) { + nhrp_info("Waiting for peers to die, %d left", nhrp_peer_num_total); + prev = ev_now(); + } + ev_loop(EVLOOP_ONESHOT); + } +} diff --git a/nhrp/nhrp_peer.h b/nhrp/nhrp_peer.h new file mode 100644 index 0000000..dea8d66 --- /dev/null +++ b/nhrp/nhrp_peer.h @@ -0,0 +1,194 @@ +/* nhrp_peer.h - NHRP peer cache definitions + * + * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#ifndef NHRP_PEER_H +#define NHRP_PEER_H + +#include <time.h> +#include <stdint.h> +#include <sys/types.h> +#include "nhrp_address.h" +#include "libev.h" +#include "list.h" + +#define NHRP_PEER_TYPE_INCOMPLETE 0x00 /* Resolution request sent */ +#define NHRP_PEER_TYPE_NEGATIVE 0x01 /* Negative cached */ +#define NHRP_PEER_TYPE_CACHED 0x02 /* Received/relayed resolution reply */ +#define NHRP_PEER_TYPE_SHORTCUT_ROUTE 0x03 /* Received/relayed resolution for route */ +#define NHRP_PEER_TYPE_DYNAMIC 0x04 /* NHC registration */ +#define NHRP_PEER_TYPE_DYNAMIC_NHS 0x05 /* Dynamic NHS from dns-map */ +#define NHRP_PEER_TYPE_STATIC 0x06 /* Static mapping from config file */ +#define NHRP_PEER_TYPE_STATIC_DNS 0x07 /* Static dns-map from config file */ +#define NHRP_PEER_TYPE_LOCAL_ROUTE 0x08 /* Non-local destination, with local route */ +#define NHRP_PEER_TYPE_LOCAL_ADDR 0x09 /* Local destination (IP or off-NBMA subnet) */ +#define NHRP_PEER_TYPE_MAX (NHRP_PEER_TYPE_LOCAL_ADDR+1) + +#define NHRP_PEER_TYPEMASK_ADJACENT \ + (BIT(NHRP_PEER_TYPE_CACHED) | \ + BIT(NHRP_PEER_TYPE_DYNAMIC) | \ + BIT(NHRP_PEER_TYPE_DYNAMIC_NHS) | \ + BIT(NHRP_PEER_TYPE_STATIC) | \ + BIT(NHRP_PEER_TYPE_LOCAL_ADDR)) + +#define NHRP_PEER_TYPEMASK_REMOVABLE \ + (BIT(NHRP_PEER_TYPE_INCOMPLETE) | \ + BIT(NHRP_PEER_TYPE_NEGATIVE) | \ + BIT(NHRP_PEER_TYPE_CACHED) | \ + BIT(NHRP_PEER_TYPE_SHORTCUT_ROUTE) | \ + BIT(NHRP_PEER_TYPE_DYNAMIC)) + +#define NHRP_PEER_TYPEMASK_PURGEABLE \ + (NHRP_PEER_TYPEMASK_REMOVABLE | \ + BIT(NHRP_PEER_TYPE_DYNAMIC_NHS) | \ + BIT(NHRP_PEER_TYPE_STATIC) | \ + BIT(NHRP_PEER_TYPE_STATIC_DNS)) + +#define NHRP_PEER_TYPEMASK_ALL \ + (NHRP_PEER_TYPEMASK_PURGEABLE | \ + BIT(NHRP_PEER_TYPE_LOCAL_ROUTE) | \ + BIT(NHRP_PEER_TYPE_LOCAL_ADDR)) + +/* For routing via NHS */ +#define NHRP_PEER_TYPEMASK_ROUTE_VIA_NHS \ + (BIT(NHRP_PEER_TYPE_DYNAMIC) | \ + BIT(NHRP_PEER_TYPE_DYNAMIC_NHS) | \ + BIT(NHRP_PEER_TYPE_STATIC) | \ + BIT(NHRP_PEER_TYPE_LOCAL_ROUTE) | \ + BIT(NHRP_PEER_TYPE_LOCAL_ADDR)) + +#define NHRP_PEER_FLAG_UNIQUE 0x01 /* Peer is unique; see RFC2332 */ +#define NHRP_PEER_FLAG_REGISTER 0x02 /* For TYPE_STATIC: send registration */ +#define NHRP_PEER_FLAG_CISCO 0x04 /* For TYPE_STATIC: peer is Cisco */ +#define NHRP_PEER_FLAG_USED 0x10 /* Peer is in kernel ARP table */ +#define NHRP_PEER_FLAG_LOWER_UP 0x20 /* Script executed succesfully */ +#define NHRP_PEER_FLAG_UP 0x40 /* Can send all packets (registration ok) */ +#define NHRP_PEER_FLAG_REPLACED 0x80 /* Peer has been replaced */ +#define NHRP_PEER_FLAG_REMOVED 0x100 /* Deleted, but not removed from cache yet */ +#define NHRP_PEER_FLAG_MARK 0x200 /* Can be used to temporarily mark peers */ + +#define NHRP_PEER_FIND_ROUTE 0x01 +#define NHRP_PEER_FIND_EXACT 0x02 +#define NHRP_PEER_FIND_SUBNET 0x04 +#define NHRP_PEER_FIND_UP 0x10 +#define NHRP_PEER_FIND_MARK 0x20 + +struct nhrp_interface; +struct nhrp_packet; +struct nhrp_pending_request; + +union __attribute__ ((__transparent_union__)) nhrp_peer_event { + struct ev_timer *timer; + struct ev_child *child; +}; + +struct nhrp_peer { + unsigned int ref; + unsigned int flags; + + struct list_head peer_list_entry; + struct list_head mcast_list_entry; + struct hlist_node nbma_hash_entry; + + const char *purge_reason; + struct nhrp_interface *interface; + struct nhrp_peer *parent; + struct nhrp_packet *queued_packet; + struct nhrp_pending_request *request; + + struct ev_timer timer; + struct ev_child child; + struct nhrp_address_query address_query; + + uint8_t type; + uint8_t prefix_length; + uint16_t afnum; + uint16_t protocol_type; + uint16_t mtu, my_nbma_mtu; + ev_tstamp expire_time; + ev_tstamp last_used; + struct nhrp_address my_nbma_address; + struct nhrp_address protocol_address; + unsigned int holding_time; + + char *nbma_hostname; + /* NHRP_PEER_TYPE_ROUTE: protocol addr., others: NBMA addr. */ + struct nhrp_address next_hop_address; + struct nhrp_address next_hop_nat_oa; +}; + +struct nhrp_peer_selector { + int flags; /* NHRP_PEER_FIND_xxx */ + int type_mask; + + struct nhrp_interface *interface; + struct nhrp_peer *parent; + const char *hostname; + + int prefix_length; + struct nhrp_address protocol_address; + struct nhrp_address next_hop_address; +}; + +const char * const nhrp_peer_type[NHRP_PEER_TYPE_MAX]; +typedef int (*nhrp_peer_enumerator)(void *ctx, struct nhrp_peer *peer); + +void nhrp_peer_cleanup(void); + +struct nhrp_peer *nhrp_peer_alloc(struct nhrp_interface *iface); +struct nhrp_peer *nhrp_peer_get(struct nhrp_peer *peer); +int nhrp_peer_put(struct nhrp_peer *peer); +void nhrp_peer_cancel_async(struct nhrp_peer *peer); + +void nhrp_peer_insert(struct nhrp_peer *peer); +void nhrp_peer_remove(struct nhrp_peer *peer); +void nhrp_peer_purge(struct nhrp_peer *peer, const char *purge_reason); + +int nhrp_peer_match(struct nhrp_peer *peer, struct nhrp_peer_selector *sel); + +int nhrp_peer_foreach(nhrp_peer_enumerator e, void *ctx, + struct nhrp_peer_selector *sel); +int nhrp_peer_remove_matching(void *count, struct nhrp_peer *peer); +int nhrp_peer_purge_matching(void *count, struct nhrp_peer *peer); +int nhrp_peer_lowerdown_matching(void *count, struct nhrp_peer *peer); +int nhrp_peer_set_used_matching(void *ctx, struct nhrp_peer *peer); +struct nhrp_peer *nhrp_peer_find_by_nbma(struct nhrp_interface *iface, struct nhrp_address *nbma); + +int nhrp_peer_event_ok(union nhrp_peer_event e, int revents); +char *nhrp_peer_event_reason(union nhrp_peer_event e, int revents, + size_t buflen, char *buf); +struct nhrp_peer *nhrp_peer_from_event(union nhrp_peer_event e, int revents); +void nhrp_peer_run_script(struct nhrp_peer *peer, char *action, + void (*cb)(union nhrp_peer_event, int)); +void nhrp_peer_send_packet_queue(struct nhrp_peer *peer); +int nhrp_peer_discover_nhs(struct nhrp_peer *peer, + struct nhrp_address *newaddr); + +struct nhrp_peer *nhrp_peer_route_full(struct nhrp_interface *iface, + struct nhrp_address *dest, + int flags, int type_mask, + struct nhrp_address *source, + struct list_head *exclude_cie_list); + +static inline struct nhrp_peer *nhrp_peer_route(struct nhrp_interface *iface, + struct nhrp_address *dest, + int flags, int type_mask) +{ + return nhrp_peer_route_full(iface, dest, flags, type_mask, NULL, NULL); +} + +void nhrp_peer_traffic_indication(struct nhrp_interface *iface, + uint16_t afnum, struct nhrp_address *dst); +void nhrp_peer_dump_cache(void); + +void nhrp_server_finish_request(struct nhrp_pending_request *pr); + +#endif diff --git a/nhrp/nhrp_protocol.h b/nhrp/nhrp_protocol.h new file mode 100644 index 0000000..8cf213b --- /dev/null +++ b/nhrp/nhrp_protocol.h @@ -0,0 +1,130 @@ +/* nhrp_protocol.h - NHRP protocol definitions + * + * Copyright (C) 2007 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#ifndef NHRP_PROTOCOL_H +#define NHRP_PROTOCOL_H + +#include <stdint.h> +#include "afnum.h" + +/* NHRP Version */ +#define NHRP_VERSION_RFC2332 1 + +/* NHRP Packet Types */ +#define NHRP_PACKET_RESOLUTION_REQUEST 1 +#define NHRP_PACKET_RESOLUTION_REPLY 2 +#define NHRP_PACKET_REGISTRATION_REQUEST 3 +#define NHRP_PACKET_REGISTRATION_REPLY 4 +#define NHRP_PACKET_PURGE_REQUEST 5 +#define NHRP_PACKET_PURGE_REPLY 6 +#define NHRP_PACKET_ERROR_INDICATION 7 +#define NHRP_PACKET_TRAFFIC_INDICATION 8 + +/* NHRP Extension Types */ +#define NHRP_EXTENSION_FLAG_COMPULSORY 0x8000 +#define NHRP_EXTENSION_END 0 +#define NHRP_EXTENSION_PAYLOAD 0 +#define NHRP_EXTENSION_RESPONDER_ADDRESS 3 +#define NHRP_EXTENSION_FORWARD_TRANSIT_NHS 4 +#define NHRP_EXTENSION_REVERSE_TRANSIT_NHS 5 +#define NHRP_EXTENSION_AUTHENTICATION 7 +#define NHRP_EXTENSION_VENDOR 8 +#define NHRP_EXTENSION_NAT_ADDRESS 9 + +/* NHRP Error Indication Codes */ +#define NHRP_ERROR_UNRECOGNIZED_EXTENSION constant_htons(1) +#define NHRP_ERROR_LOOP_DETECTED constant_htons(2) +#define NHRP_ERROR_PROTOCOL_ADDRESS_UNREACHABLE constant_htons(6) +#define NHRP_ERROR_PROTOCOL_ERROR constant_htons(7) +#define NHRP_ERROR_SDU_SIZE_EXCEEDED constant_htons(8) +#define NHRP_ERROR_INVALID_EXTENSION constant_htons(9) +#define NHRP_ERROR_INVALID_RESOLUTION_REPLY constant_htons(10) +#define NHRP_ERROR_AUTHENTICATION_FAILURE constant_htons(11) +#define NHRP_ERROR_HOP_COUNT_EXCEEDED constant_htons(15) + +/* NHRP CIE Codes */ +#define NHRP_CODE_SUCCESS 0 +#define NHRP_CODE_ADMINISTRATIVELY_PROHIBITED 4 +#define NHRP_CODE_INSUFFICIENT_RESOURCES 5 +#define NHRP_CODE_NO_BINDING_EXISTS 11 +#define NHRP_CODE_BINDING_NON_UNIQUE 13 +#define NHRP_CODE_UNIQUE_ADDRESS_REGISTERED 14 + +/* NHRP Flags for Resolution request/reply */ +#define NHRP_FLAG_RESOLUTION_SOURCE_IS_ROUTER constant_htons(0x8000) +#define NHRP_FLAG_RESOLUTION_AUTHORATIVE constant_htons(0x4000) +#define NHRP_FLAG_RESOLUTION_DESTINATION_STABLE constant_htons(0x2000) +#define NHRP_FLAG_RESOLUTION_UNIQUE constant_htons(0x1000) +#define NHRP_FLAG_RESOLUTION_SOURCE_STABLE constant_htons(0x0800) +#define NHRP_FLAG_RESOLUTION_NAT constant_htons(0x0002) + +/* NHRP Flags for Registration request/reply */ +#define NHRP_FLAG_REGISTRATION_UNIQUE constant_htons(0x8000) +#define NHRP_FLAG_REGISTRATION_NAT constant_htons(0x0002) + +/* NHRP Flags for Purge request/reply */ +#define NHRP_FLAG_PURGE_NO_REPLY constant_htons(0x8000) + +/* NHRP Authentication extension types (ala Cisco) */ +#define NHRP_AUTHENTICATION_PLAINTEXT constant_htonl(0x00000001) + +/* NHRP Packet Structures */ +struct nhrp_packet_header { + /* Fixed header */ + uint16_t afnum; + uint16_t protocol_type; + uint8_t snap[5]; + uint8_t hop_count; + uint16_t packet_size; + uint16_t checksum; + uint16_t extension_offset; + uint8_t version; + uint8_t type; + uint8_t src_nbma_address_len; + uint8_t src_nbma_subaddress_len; + + /* Mandatory header */ + uint8_t src_protocol_address_len; + uint8_t dst_protocol_address_len; + uint16_t flags; + union { + uint32_t request_id; + struct { + uint16_t code; + uint16_t offset; + } error; + } u; +}; + +struct nhrp_cie_header { + uint8_t code; + uint8_t prefix_length; + uint16_t unused; + uint16_t mtu; + uint16_t holding_time; + uint8_t nbma_address_len; + uint8_t nbma_subaddress_len; + uint8_t protocol_address_len; + uint8_t preference; +}; + +struct nhrp_extension_header { + uint16_t type; + uint16_t length; +}; + +struct nhrp_cisco_authentication_extension { + uint32_t type; + uint8_t secret[8]; +}; + +#endif diff --git a/nhrp/nhrp_server.c b/nhrp/nhrp_server.c new file mode 100644 index 0000000..b41e4b8 --- /dev/null +++ b/nhrp/nhrp_server.c @@ -0,0 +1,566 @@ +/* nhrp_server.c - NHRP request handling + * + * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include <string.h> +#include <netinet/in.h> + +#include "nhrp_common.h" +#include "nhrp_packet.h" +#include "nhrp_interface.h" +#include "nhrp_peer.h" + +#define NHRP_MAX_PENDING_REQUESTS 16 + +struct nhrp_pending_request { + struct list_head request_list_entry; + int natted; + int num_ok, num_error; + struct nhrp_packet *packet; + struct nhrp_cie *cie; + struct nhrp_payload *payload; + struct nhrp_peer *peer, *rpeer; + ev_tstamp now; +}; + +static struct list_head request_list = LIST_INITIALIZER(request_list); +static int num_pending_requests = 0; + +static void nhrp_server_start_cie_reg(struct nhrp_pending_request *pr); + +static struct nhrp_pending_request * +nhrp_server_record_request(struct nhrp_packet *packet) +{ + struct nhrp_pending_request *pr; + + pr = calloc(1, sizeof(struct nhrp_pending_request)); + list_init(&pr->request_list_entry); + if (pr != NULL) { + num_pending_requests++; + list_add(&pr->request_list_entry, &request_list); + pr->packet = nhrp_packet_get(packet); + pr->now = ev_now(); + } + return pr; +} + +void nhrp_server_finish_request(struct nhrp_pending_request *pr) +{ + list_del(&pr->request_list_entry); + if (pr->rpeer != NULL) { + struct nhrp_peer *peer = pr->rpeer; + if (peer->flags & NHRP_PEER_FLAG_REPLACED) { + /* The route peer entry was not accepted. We still + * send the replies here, and cancel anything pending + * so it'll get deleted cleanly on next put(). */ + nhrp_peer_send_packet_queue(peer); + nhrp_peer_cancel_async(peer); + } + nhrp_peer_put(pr->rpeer); + } + if (pr->peer != NULL) + nhrp_peer_put(pr->peer); + if (pr->packet != NULL) + nhrp_packet_put(pr->packet); + free(pr); + num_pending_requests--; +} + +static int nhrp_server_request_pending(struct nhrp_packet *packet) +{ + struct nhrp_pending_request *r; + + list_for_each_entry(r, &request_list, request_list_entry) { + if (nhrp_address_cmp(&packet->src_nbma_address, + &r->packet->src_nbma_address) != 0) + continue; + if (nhrp_address_cmp(&packet->src_protocol_address, + &r->packet->src_protocol_address) != 0) + continue; + if (nhrp_address_cmp(&packet->dst_protocol_address, + &r->packet->dst_protocol_address) != 0) + continue; + + /* Request from the same address being already processed */ + return TRUE; + } + + return FALSE; +} + +static int nhrp_handle_resolution_request(struct nhrp_packet *packet) +{ + char tmp[64], tmp2[64]; + struct nhrp_payload *payload; + struct nhrp_peer *peer = packet->dst_peer; + struct nhrp_peer_selector sel; + struct nhrp_cie *cie; + + nhrp_info("Received Resolution Request from proto src %s to %s", + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp), tmp), + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp2), tmp2)); + + /* As first thing, flush all negative entries for the + * requestor */ + memset(&sel, 0, sizeof(sel)); + sel.flags = NHRP_PEER_FIND_EXACT; + sel.type_mask = BIT(NHRP_PEER_TYPE_NEGATIVE); + sel.interface = packet->src_iface; + sel.protocol_address = packet->src_protocol_address; + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel); + + /* Send reply */ + packet->hdr.type = NHRP_PACKET_RESOLUTION_REPLY; + packet->hdr.hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT; + packet->hdr.flags &= NHRP_FLAG_RESOLUTION_SOURCE_IS_ROUTER | + NHRP_FLAG_RESOLUTION_SOURCE_STABLE | + NHRP_FLAG_RESOLUTION_UNIQUE | + NHRP_FLAG_RESOLUTION_NAT; + packet->hdr.flags |= NHRP_FLAG_RESOLUTION_DESTINATION_STABLE | + NHRP_FLAG_RESOLUTION_AUTHORATIVE; + + cie = nhrp_cie_alloc(); + if (cie == NULL) + return FALSE; + + cie->hdr = (struct nhrp_cie_header) { + .code = NHRP_CODE_SUCCESS, + .prefix_length = peer->prefix_length, + }; + if (peer->holding_time) + cie->hdr.holding_time = htons(peer->holding_time); + else if (peer->interface != NULL) + cie->hdr.holding_time = htons(peer->interface->holding_time); + else + cie->hdr.holding_time = NHRP_DEFAULT_HOLDING_TIME; + + payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_ANY); + nhrp_payload_free(payload); + nhrp_payload_set_type(payload, NHRP_PAYLOAD_TYPE_CIE_LIST); + nhrp_payload_add_cie(payload, cie); + + if (!nhrp_packet_reroute(packet, NULL)) + return FALSE; + + peer = packet->dst_peer; + cie->hdr.mtu = htons(peer->my_nbma_mtu); + cie->nbma_address = peer->my_nbma_address; + cie->protocol_address = packet->dst_iface->protocol_address; + + nhrp_info("Sending Resolution Reply %s/%d is-at %s (holdtime %d)", + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp), tmp), + cie->hdr.prefix_length, + nhrp_address_format(&cie->nbma_address, + sizeof(tmp2), tmp2), + ntohs(cie->hdr.holding_time)); + + /* Reset NAT header to regenerate it for reply */ + payload = nhrp_packet_extension(packet, + NHRP_EXTENSION_NAT_ADDRESS | + NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_ANY); + if (payload != NULL) { + nhrp_payload_free(payload); + nhrp_payload_set_type(payload, NHRP_PAYLOAD_TYPE_CIE_LIST); + } + + return nhrp_packet_send(packet); +} + +static int find_one(void *ctx, struct nhrp_peer *p) +{ + return 1; +} + +static int remove_old_registrations(void *ctx, struct nhrp_peer *p) +{ + struct nhrp_peer *peer = (struct nhrp_peer *) ctx; + + /* If re-registration, mark the new connection up */ + if (nhrp_address_cmp(&peer->protocol_address, + &p->protocol_address) == 0 && + nhrp_address_cmp(&peer->next_hop_address, + &p->next_hop_address) == 0 && + peer->prefix_length == p->prefix_length) + peer->flags |= p->flags & (NHRP_PEER_FLAG_UP | + NHRP_PEER_FLAG_LOWER_UP); + + p->flags |= NHRP_PEER_FLAG_REPLACED; + nhrp_peer_remove(p); + return 0; +} + +static void nhrp_server_finish_reg(struct nhrp_pending_request *pr) +{ + char tmp[64], tmp2[64]; + struct nhrp_packet *packet = pr->packet; + + if (pr->rpeer != NULL && + nhrp_packet_reroute(packet, pr->rpeer)) { + nhrp_info("Sending Registration Reply from proto src %s to %s (%d bindings accepted, %d rejected)", + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp), tmp), + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp2), tmp2), + pr->num_ok, pr->num_error); + + nhrp_packet_send(packet); + } else { + /* We could not create route peer entry (likely out of memory), + * so we can't do much more here. */ + nhrp_info("Dropping Registration Reply from proto src %s to %s", + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp), tmp), + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp2), tmp2)); + } + + nhrp_server_finish_request(pr); +} + +static void nhrp_server_finish_cie_reg_cb(union nhrp_peer_event e, int revents) +{ + struct nhrp_peer *peer; + struct nhrp_pending_request *pr; + struct nhrp_packet *packet; + struct nhrp_cie *cie; + struct nhrp_peer_selector sel; + char tmp[64], reason[32]; + + peer = nhrp_peer_from_event(e, revents); + pr = peer->request; + packet = pr->packet; + cie = pr->cie; + + peer->request = NULL; + nhrp_address_format(&peer->protocol_address, sizeof(tmp), tmp); + if (revents != 0 && nhrp_peer_event_ok(e, revents)) { + nhrp_debug("[%s] Peer registration authorized", tmp); + + /* Remove all old stuff and accept registration */ + memset(&sel, 0, sizeof(sel)); + sel.flags = NHRP_PEER_FIND_EXACT; + sel.type_mask = NHRP_PEER_TYPEMASK_REMOVABLE; + sel.interface = packet->src_iface; + sel.protocol_address = peer->protocol_address; + sel.prefix_length = peer->prefix_length; + nhrp_peer_foreach(remove_old_registrations, peer, &sel); + + pr->num_ok++; + cie->hdr.code = NHRP_CODE_SUCCESS; + nhrp_peer_insert(peer); + } else { + if (revents == 0) + nhrp_error("[%s] Peer registration failed: " + "static entry exists", tmp); + else + nhrp_error("[%s] Peer registration failed: %s", + tmp, + nhrp_peer_event_reason(e, revents, + sizeof(reason), + reason)); + pr->num_error++; + cie->hdr.code = NHRP_CODE_ADMINISTRATIVELY_PROHIBITED; + peer->flags |= NHRP_PEER_FLAG_REPLACED; + } + if (pr->rpeer == NULL) + pr->rpeer = nhrp_peer_get(peer); + + nhrp_peer_put(peer); + pr->peer = NULL; + + /* Process next CIE or finish registration handling */ + if (cie->cie_list_entry.next != &pr->payload->u.cie_list) { + pr->cie = list_next(&cie->cie_list_entry, struct nhrp_cie, cie_list_entry); + nhrp_server_start_cie_reg(pr); + } else { + nhrp_server_finish_reg(pr); + } + +} + +static void nhrp_server_start_cie_reg(struct nhrp_pending_request *pr) +{ + struct nhrp_cie *cie = pr->cie; + struct nhrp_packet *packet = pr->packet; + struct nhrp_peer *peer; + struct nhrp_peer_selector sel; + + peer = nhrp_peer_alloc(packet->src_iface); + if (peer == NULL) { + /* Mark all remaining registration requests as failed + * due to lack of memory, and send reply */ + for (; cie->cie_list_entry.next != &pr->payload->u.cie_list; + cie = list_next(&cie->cie_list_entry, struct nhrp_cie, cie_list_entry)) { + pr->num_error++; + cie->hdr.code = NHRP_CODE_INSUFFICIENT_RESOURCES; + } + pr->num_error++; + cie->hdr.code = NHRP_CODE_INSUFFICIENT_RESOURCES; + nhrp_server_finish_reg(pr); + return; + } + + peer->type = NHRP_PEER_TYPE_DYNAMIC; + peer->afnum = packet->hdr.afnum; + peer->protocol_type = packet->hdr.protocol_type; + peer->expire_time = pr->now + ntohs(cie->hdr.holding_time); + peer->mtu = ntohs(cie->hdr.mtu); + if (cie->nbma_address.addr_len != 0) + peer->next_hop_address = cie->nbma_address; + else + peer->next_hop_address = packet->src_nbma_address; + + if (pr->natted) { + peer->next_hop_nat_oa = peer->next_hop_address; + peer->next_hop_address = packet->src_linklayer_address; + } + + if (cie->protocol_address.addr_len != 0) + peer->protocol_address = cie->protocol_address; + else + peer->protocol_address = packet->src_protocol_address; + + peer->prefix_length = cie->hdr.prefix_length; + if (peer->prefix_length == 0xff) + peer->prefix_length = peer->protocol_address.addr_len * 8; + + memset(&sel, 0, sizeof(sel)); + sel.flags = NHRP_PEER_FIND_EXACT; + sel.type_mask = ~NHRP_PEER_TYPEMASK_REMOVABLE; + sel.interface = packet->src_iface; + sel.protocol_address = peer->protocol_address; + sel.prefix_length = peer->prefix_length; + + /* Link the created peer and pending request structures */ + pr->peer = peer; + peer->request = pr; + + /* Check that there is no conflicting peers */ + if (nhrp_peer_foreach(find_one, peer, &sel) != 0) { + cie->hdr.code = NHRP_CODE_ADMINISTRATIVELY_PROHIBITED; + peer->flags |= NHRP_PEER_FLAG_REPLACED; + nhrp_server_finish_cie_reg_cb(&peer->child, 0); + } else { + nhrp_peer_run_script(peer, "peer-register", + nhrp_server_finish_cie_reg_cb); + } +} + +static int nhrp_handle_registration_request(struct nhrp_packet *packet) +{ + char tmp[64], tmp2[64]; + struct nhrp_payload *payload; + struct nhrp_cie *cie; + struct nhrp_pending_request *pr; + int natted = 0; + + nhrp_info("Received Registration Request from proto src %s to %s", + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp), tmp), + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp2), tmp2)); + + if (nhrp_server_request_pending(packet)) { + nhrp_info("Already processing: resent packet ignored."); + return TRUE; + } + + if (num_pending_requests >= NHRP_MAX_PENDING_REQUESTS) { + /* We should probably send Registration Reply with CIE + * error NHRP_CODE_INSUFFICIENT_RESOURCES, or an Error + * Indication. However, we do not have a direct peer entry + * nor can we make sure that the lower layer is up, so + * we just lamely drop the packet for now. */ + nhrp_info("Too many pending requests: dropping this one"); + return TRUE; + } + + /* Cisco NAT extension, CIE added IF all of the following is true: + * 1. We are the first hop registration server + * (=no entries in forward transit CIE list) + * 2. NAT is detected (link layer address != announced address) + * 3. NAT extension is requested */ + payload = nhrp_packet_extension(packet, + NHRP_EXTENSION_FORWARD_TRANSIT_NHS | + NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_CIE_LIST); + if (payload != NULL && list_empty(&payload->u.cie_list) && + packet->src_linklayer_address.type != PF_UNSPEC && + nhrp_address_cmp(&packet->src_nbma_address, + &packet->src_linklayer_address) != 0) { + natted = 1; + payload = nhrp_packet_extension(packet, + NHRP_EXTENSION_NAT_ADDRESS | + NHRP_EXTENSION_FLAG_NOCREATE, + NHRP_PAYLOAD_TYPE_CIE_LIST); + if (payload != NULL) { + cie = nhrp_cie_alloc(); + if (cie != NULL) { + cie->nbma_address = packet->src_linklayer_address; + cie->protocol_address = packet->src_protocol_address; + nhrp_payload_add_cie(payload, cie); + } + } + } + + packet->hdr.type = NHRP_PACKET_REGISTRATION_REPLY; + packet->hdr.hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT; + packet->hdr.flags &= NHRP_FLAG_REGISTRATION_UNIQUE | + NHRP_FLAG_REGISTRATION_NAT; + + payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_CIE_LIST); + if (list_empty(&payload->u.cie_list)) { + nhrp_error("Received registration request has no CIEs"); + return TRUE; + } + + /* Start processing the CIEs */ + pr = nhrp_server_record_request(packet); + pr->natted = natted; + pr->payload = payload; + + pr->cie = nhrp_payload_get_cie(payload, 1); + nhrp_server_start_cie_reg(pr); + + return TRUE; +} + +static int remove_peer_by_nbma(void *ctx, struct nhrp_peer *peer) +{ + struct nhrp_address *nbma = ctx; + struct nhrp_address *peer_nbma = NULL; + + if (!nhrp_address_is_any_addr(nbma)) { + if (peer->type == NHRP_PEER_TYPE_SHORTCUT_ROUTE) { + struct nhrp_peer *nexthop; + + nexthop = nhrp_peer_route(peer->interface, + &peer->next_hop_address, + NHRP_PEER_FIND_EXACT, + NHRP_PEER_TYPEMASK_ADJACENT); + if (nexthop != NULL) + peer_nbma = &nexthop->next_hop_address; + } else { + peer_nbma = &peer->next_hop_address; + } + } else { + peer_nbma = nbma; + } + + if (peer_nbma != NULL && + nhrp_address_cmp(peer_nbma, nbma) == 0) + nhrp_peer_remove(peer); + + return 0; +} + +static int nhrp_handle_purge_request(struct nhrp_packet *packet) +{ + char tmp[64], tmp2[64]; + struct nhrp_peer_selector sel; + struct nhrp_payload *payload; + struct nhrp_cie *cie; + int flags, ret = TRUE; + + nhrp_info("Received Purge Request from proto src %s to %s", + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp), tmp), + nhrp_address_format(&packet->dst_protocol_address, + sizeof(tmp2), tmp2)); + + flags = packet->hdr.flags; + packet->hdr.type = NHRP_PACKET_PURGE_REPLY; + packet->hdr.hop_count = NHRP_PACKET_DEFAULT_HOP_COUNT; + packet->hdr.flags = 0; + + if (!(flags & NHRP_FLAG_PURGE_NO_REPLY)) { + if (nhrp_packet_reroute(packet, NULL)) + ret = nhrp_packet_send(packet); + else + ret = FALSE; + } + + payload = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_CIE_LIST); + list_for_each_entry(cie, &payload->u.cie_list, cie_list_entry) { + nhrp_info("Purge proto %s/%d nbma %s", + nhrp_address_format(&cie->protocol_address, + sizeof(tmp), tmp), + cie->hdr.prefix_length, + nhrp_address_format(&cie->nbma_address, + sizeof(tmp2), tmp2)); + + memset(&sel, 0, sizeof(sel)); + sel.flags = NHRP_PEER_FIND_EXACT; + sel.type_mask = NHRP_PEER_TYPEMASK_REMOVABLE; + sel.interface = packet->src_iface; + sel.protocol_address = cie->protocol_address; + sel.prefix_length = cie->hdr.prefix_length; + nhrp_peer_foreach(remove_peer_by_nbma, + &cie->nbma_address, &sel); + nhrp_rate_limit_clear(&cie->protocol_address, + cie->hdr.prefix_length); + } + + return ret; +} + +static int nhrp_handle_traffic_indication(struct nhrp_packet *packet) +{ + char tmp[64], tmp2[64]; + struct nhrp_address dst; + struct nhrp_payload *pl; + + pl = nhrp_packet_payload(packet, NHRP_PAYLOAD_TYPE_RAW); + if (pl == NULL) + return FALSE; + + if (!nhrp_address_parse_packet(packet->hdr.protocol_type, + pl->u.raw->length, pl->u.raw->data, + NULL, &dst)) + return FALSE; + + /* Shortcuts enabled? */ + if (packet->src_iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT) { + nhrp_info("Traffic Indication from proto src %s; " + "about packet to %s", + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp), tmp), + nhrp_address_format(&dst, sizeof(tmp2), tmp2)); + + nhrp_peer_traffic_indication(packet->src_iface, + packet->hdr.afnum, + &dst); + } else { + nhrp_info("Traffic Indication ignored from proto src %s; " + "about packet to %s", + nhrp_address_format(&packet->src_protocol_address, + sizeof(tmp), tmp), + nhrp_address_format(&dst, sizeof(tmp2), tmp2)); + } + + return TRUE; +} + +void server_init(void) +{ + nhrp_packet_hook_request(NHRP_PACKET_RESOLUTION_REQUEST, + nhrp_handle_resolution_request); + nhrp_packet_hook_request(NHRP_PACKET_REGISTRATION_REQUEST, + nhrp_handle_registration_request); + nhrp_packet_hook_request(NHRP_PACKET_PURGE_REQUEST, + nhrp_handle_purge_request); + nhrp_packet_hook_request(NHRP_PACKET_TRAFFIC_INDICATION, + nhrp_handle_traffic_indication); +} diff --git a/nhrp/opennhrp.c b/nhrp/opennhrp.c new file mode 100644 index 0000000..8ba870d --- /dev/null +++ b/nhrp/opennhrp.c @@ -0,0 +1,524 @@ +/* opennhrp.c - OpenNHRP main routines + * + * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include <ctype.h> +#include <stdio.h> +#include <errno.h> +#include <malloc.h> +#include <stddef.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/file.h> +#include <sys/stat.h> + +#include "nhrp_common.h" +#include "nhrp_peer.h" +#include "nhrp_interface.h" + +const char *nhrp_version_string = + "OpenNHRP " OPENNHRP_VERSION +#ifdef NHRP_NO_NBMA_GRE + " (no NBMA GRE support)" +#endif + ; + +const char *nhrp_admin_socket = OPENNHRP_ADMIN_SOCKET; +const char *nhrp_pid_file = "/var/run/opennhrp.pid"; +const char *nhrp_config_file = "/etc/opennhrp/opennhrp.conf"; +const char *nhrp_script_file = "/etc/opennhrp/opennhrp-script"; +int nhrp_verbose = 0; +int nhrp_running = FALSE; + +static int pid_file_fd; + +void nhrp_hex_dump(const char *name, const uint8_t *buf, int bytes) +{ + int i, j; + int left; + + fprintf(stderr, "%s:\n", name); + for (i = 0; i < bytes; i++) { + fprintf(stderr, "%02X ", buf[i]); + if (i % 0x10 == 0x0f) { + fprintf(stderr, " "); + for (j = 0; j < 0x10; j++) + fprintf(stderr, "%c", isgraph(buf[i+j-0xf]) ? + buf[i+j-0xf]: '.'); + fprintf(stderr, "\n"); + } + } + + left = i % 0x10; + if (left != 0) { + fprintf(stderr, "%*s ", 3 * (0x10 - left), ""); + + for (j = 0; j < left; j++) + fprintf(stderr, "%c", isgraph(buf[i+j-left]) ? + buf[i+j-left]: '.'); + fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); +} + +static void handle_signal_cb(struct ev_signal *w, int revents) +{ + struct nhrp_peer_selector sel; + + switch (w->signum) { + case SIGUSR1: + nhrp_peer_dump_cache(); + break; + case SIGINT: + case SIGTERM: + ev_unloop(EVUNLOOP_ALL); + break; + case SIGHUP: + memset(&sel, 0, sizeof(sel)); + sel.type_mask = NHRP_PEER_TYPEMASK_REMOVABLE; + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel); + break; + } +} + +static int hook_signal[] = { SIGUSR1, SIGHUP, SIGINT, SIGTERM }; +static ev_signal signal_event[ARRAY_SIZE(hook_signal)]; + +static void signal_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(hook_signal); i++) { + ev_signal_init(&signal_event[i], handle_signal_cb, + hook_signal[i]); + ev_signal_start(&signal_event[i]); + } +} + +static int read_word(FILE *in, int *lineno, size_t len, char *word) +{ + int ch, i, comment = 0; + + ch = fgetc(in); + while (1) { + if (ch == EOF) + return FALSE; + if (ch == '#') + comment = 1; + if (!comment && !isspace(ch)) + break; + if (ch == '\n') { + (*lineno)++; + comment = 0; + } + ch = fgetc(in); + } + + for (i = 0; i < len-1 && !isspace(ch); i++) { + word[i] = ch; + ch = fgetc(in); + if (ch == EOF) + break; + if (ch == '\n') + (*lineno)++; + } + word[i] = 0; + + return TRUE; +} + +static int load_config(const char *config_file) +{ +#define NEED_INTERFACE() if (iface == NULL) { rc = 2; break; } peer = NULL; +#define NEED_PEER() if (peer == NULL || peer->type == NHRP_PEER_TYPE_LOCAL_ADDR) { rc = 3; break; } + + static const char *errors[] = { + "syntax error", + "missing keyword", + "keyword valid only for 'interface' definition", + "keyword valid only for 'map' definition", + "invalid address", + "dynamic-map requires a network address", + "bad multicast destination", + "keyword valid only for 'interace' and 'shortcut-target' definition", + }; + struct nhrp_interface *iface = NULL; + struct nhrp_peer *peer = NULL; + struct nhrp_address paddr; + char word[32], nbma[32], addr[32]; + FILE *in; + int lineno = 1, rc = -1; + + in = fopen(config_file, "r"); + if (in == NULL) { + nhrp_error("Unable to open configuration file '%s'.", + config_file); + return FALSE; + } + + while (read_word(in, &lineno, sizeof(word), word)) { + if (strcmp(word, "interface") == 0) { + if (!read_word(in, &lineno, sizeof(word), word)) { + rc = 1; + break; + } + iface = nhrp_interface_get_by_name(word, TRUE); + if (iface != NULL) + iface->flags |= NHRP_INTERFACE_FLAG_CONFIGURED; + peer = NULL; + } else if (strcmp(word, "shortcut-target") == 0) { + NEED_INTERFACE(); + if (!read_word(in, &lineno, sizeof(addr), addr)) { + rc = 1; + break; + } + peer = nhrp_peer_alloc(iface); + peer->type = NHRP_PEER_TYPE_LOCAL_ADDR; + peer->afnum = AFNUM_RESERVED; + if (!nhrp_address_parse(addr, &peer->protocol_address, + &peer->prefix_length)) { + rc = 4; + break; + } + peer->protocol_type = nhrp_protocol_from_pf(peer->protocol_address.type); + nhrp_peer_insert(peer); + nhrp_peer_put(peer); + } else if (strcmp(word, "dynamic-map") == 0) { + NEED_INTERFACE(); + read_word(in, &lineno, sizeof(addr), addr); + read_word(in, &lineno, sizeof(nbma), nbma); + + peer = nhrp_peer_alloc(iface); + peer->type = NHRP_PEER_TYPE_STATIC_DNS; + if (!nhrp_address_parse(addr, &peer->protocol_address, + &peer->prefix_length)) { + rc = 4; + break; + } + if (!nhrp_address_is_network(&peer->protocol_address, + peer->prefix_length)) { + rc = 5; + break; + } + peer->protocol_type = nhrp_protocol_from_pf( + peer->protocol_address.type); + peer->nbma_hostname = strdup(nbma); + peer->afnum = nhrp_afnum_from_pf( + peer->next_hop_address.type); + nhrp_peer_insert(peer); + nhrp_peer_put(peer); + } else if (strcmp(word, "map") == 0) { + NEED_INTERFACE(); + read_word(in, &lineno, sizeof(addr), addr); + read_word(in, &lineno, sizeof(nbma), nbma); + + peer = nhrp_peer_alloc(iface); + peer->type = NHRP_PEER_TYPE_STATIC; + if (!nhrp_address_parse(addr, &peer->protocol_address, + &peer->prefix_length)) { + rc = 4; + break; + } + peer->protocol_type = nhrp_protocol_from_pf( + peer->protocol_address.type); + if (!nhrp_address_parse(nbma, &peer->next_hop_address, + NULL)) + peer->nbma_hostname = strdup(nbma); + peer->afnum = nhrp_afnum_from_pf(peer->next_hop_address.type); + nhrp_peer_insert(peer); + nhrp_peer_put(peer); + } else if (strcmp(word, "register") == 0) { + NEED_PEER(); + peer->flags |= NHRP_PEER_FLAG_REGISTER; + } else if (strcmp(word, "cisco") == 0) { + NEED_PEER(); + peer->flags |= NHRP_PEER_FLAG_CISCO; + } else if (strcmp(word, "holding-time") == 0) { + read_word(in, &lineno, sizeof(word), word); + if (peer != NULL && + peer->type == NHRP_PEER_TYPE_LOCAL_ADDR) { + peer->holding_time = atoi(word); + } else if (iface != NULL) { + iface->holding_time = atoi(word); + peer = NULL; + } else { + rc = 7; + } + } else if (strcmp(word, "cisco-authentication") == 0) { + struct nhrp_buffer *buf; + struct nhrp_cisco_authentication_extension *auth; + + NEED_INTERFACE(); + read_word(in, &lineno, sizeof(word), word); + + buf = nhrp_buffer_alloc(strlen(word) + sizeof(uint32_t)); + auth = (struct nhrp_cisco_authentication_extension *) buf->data; + auth->type = NHRP_AUTHENTICATION_PLAINTEXT; + memcpy(auth->secret, word, strlen(word)); + + iface->auth_token = buf; + } else if (strcmp(word, "route-table") == 0) { + NEED_INTERFACE(); + read_word(in, &lineno, sizeof(word), word); + iface->route_table = atoi(word); + } else if (strcmp(word, "shortcut") == 0) { + NEED_INTERFACE(); + iface->flags |= NHRP_INTERFACE_FLAG_SHORTCUT; + } else if (strcmp(word, "redirect") == 0) { + NEED_INTERFACE(); + iface->flags |= NHRP_INTERFACE_FLAG_REDIRECT; + } else if (strcmp(word, "non-caching") == 0) { + NEED_INTERFACE(); + iface->flags |= NHRP_INTERFACE_FLAG_NON_CACHING; + } else if (strcmp(word, "shortcut-destination") == 0) { + NEED_INTERFACE(); + iface->flags |= NHRP_INTERFACE_FLAG_SHORTCUT_DEST; + } else if (strcmp(word, "multicast") == 0) { + NEED_INTERFACE(); + read_word(in, &lineno, sizeof(word), word); + if (strcmp(word, "dynamic") == 0) { + iface->mcast_mask = \ + BIT(NHRP_PEER_TYPE_STATIC) | + BIT(NHRP_PEER_TYPE_DYNAMIC_NHS) | + BIT(NHRP_PEER_TYPE_DYNAMIC); + } else if (strcmp(word, "nhs") == 0) { + iface->mcast_mask = \ + BIT(NHRP_PEER_TYPE_STATIC) | + BIT(NHRP_PEER_TYPE_DYNAMIC_NHS); + } else if (nhrp_address_parse(word, &paddr, NULL)) { + iface->mcast_numaddr++; + iface->mcast_addr = realloc(iface->mcast_addr, + iface->mcast_numaddr * + sizeof(struct nhrp_address)); + iface->mcast_addr[iface->mcast_numaddr-1] = + paddr; + } else { + rc = 6; + break; + } + } else { + rc = 0; + break; + } + } + fclose(in); + + if (rc >= 0) { + nhrp_error("Configuration file %s in %s:%d, near word '%s'", + errors[rc], config_file, lineno, word); + return FALSE; + } + return TRUE; +} + +static void remove_pid_file(void) +{ + if (pid_file_fd != 0) { + close(pid_file_fd); + pid_file_fd = 0; + remove(nhrp_pid_file); + } +} + +static int open_pid_file(void) +{ + if (strlen(nhrp_pid_file) == 0) + return TRUE; + + pid_file_fd = open(nhrp_pid_file, O_CREAT | O_WRONLY, + S_IRUSR | S_IWUSR); + if (pid_file_fd < 0) + goto err; + + fcntl(pid_file_fd, F_SETFD, FD_CLOEXEC); + if (flock(pid_file_fd, LOCK_EX | LOCK_NB) < 0) + goto err_close; + + return TRUE; + +err_close: + close(pid_file_fd); +err: + nhrp_error("Unable to open/lock pid file: %s.", strerror(errno)); + return FALSE; +} + +static int write_pid(void) +{ + char tmp[16]; + int n; + + if (pid_file_fd >= 0) { + if (ftruncate(pid_file_fd, 0) < 0) + return FALSE; + + n = sprintf(tmp, "%d\n", getpid()); + if (write(pid_file_fd, tmp, n) != n) + return FALSE; + + atexit(remove_pid_file); + } + + return TRUE; +} + +static int daemonize(void) +{ + pid_t pid; + + pid = fork(); + if (pid < 0) + return FALSE; + if (pid > 0) + exit(0); + + if (setsid() < 0) + return FALSE; + + pid = fork(); + if (pid < 0) + return FALSE; + if (pid > 0) + exit(0); + + if (chdir("/") < 0) + return FALSE; + + umask(0); + + if (freopen("/dev/null", "r", stdin) == NULL || + freopen("/dev/null", "w", stdout) == NULL || + freopen("/dev/null", "w", stderr) == NULL) { + nhrp_error("Unable reopen standard file descriptors"); + goto err; + } + + ev_default_fork(); + + return TRUE; + +err: + close(pid_file_fd); + pid_file_fd = 0; + return FALSE; +} + +int usage(const char *prog) +{ + fprintf(stderr, + "usage: opennhrp [-a admin-socket] [-c config-file] [-s script-file]\n" + " [-p pid-file] [-d] [-v]\n" + " opennhrp -V\n" + "\n" + "\t-a admin-socket\tspecify management interface socket\n" + "\t-c config-file\tread configuration from config-file\n" + "\t-s script-file\tuse specified script-file for event handling\n" + "\t-p pid-file\tspecify pid-file\n" + "\t-d\t\tfork to background after startup\n" + "\t-v\t\tverbose logging\n" + "\t-V\t\tshow version number and exit\n" + "\n"); + return 1; +} + +int main(int argc, char **argv) +{ + struct nhrp_address any; + int i, daemonmode = 0; + + nhrp_address_set_type(&any, AF_UNSPEC); + + for (i = 1; i < argc; i++) { + if (strlen(argv[i]) != 2 || argv[i][0] != '-') + return usage(argv[0]); + + switch (argv[i][1]) { + case 'c': + if (++i >= argc) + return usage(argv[0]); + nhrp_config_file = argv[i]; + break; + case 's': + if (++i >= argc) + return usage(argv[0]); + nhrp_script_file = argv[i]; + break; + case 'a': + if (++i >= argc) + return usage(argv[0]); + nhrp_admin_socket = argv[i]; + break; + case 'p': + if (++i >= argc) + return usage(argv[0]); + nhrp_pid_file = argv[i]; + break; + case 'd': + daemonmode = 1; + break; + case 'v': + nhrp_verbose = 1; + break; + case 'V': + puts(nhrp_version_string); + return 0; + default: + return usage(argv[0]); + } + } + + srandom(time(NULL)); + if (!log_init()) + return 1; + if (!open_pid_file()) + return 1; + + nhrp_info("%s starting", nhrp_version_string); + + ev_default_loop(0); + signal_init(); + server_init(); + if (!nhrp_address_init()) + return 3; + if (!load_config(nhrp_config_file)) + return 4; + if (!kernel_init()) + return 5; + if (!admin_init(nhrp_admin_socket)) + return 6; + if (!forward_init()) + return 7; + + if (daemonmode && !daemonize()) { + nhrp_error("Failed to daemonize. Exit."); + return 8; + } + + write_pid(); + + nhrp_running = TRUE; + ev_loop(0); + nhrp_running = FALSE; + + forward_cleanup(); + kernel_stop_listening(); + nhrp_peer_cleanup(); + kernel_cleanup(); + nhrp_interface_cleanup(); + nhrp_rate_limit_clear(&any, 0); + nhrp_address_cleanup(); + + ev_default_destroy(); + + return 0; +} + diff --git a/nhrp/opennhrpctl.c b/nhrp/opennhrpctl.c new file mode 100644 index 0000000..92fb5b5 --- /dev/null +++ b/nhrp/opennhrpctl.c @@ -0,0 +1,121 @@ +/* opennhrpctl.c - OpenNHRP command line control utility + * + * Copyright (C) 2007 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include <errno.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <sys/un.h> +#include <sys/socket.h> + +static int admin_init(const char *opennhrp_socket) +{ + struct sockaddr_un sun; + int fd; + + memset(&sun, 0, sizeof(sun)); + sun.sun_family = AF_UNIX; + strncpy(sun.sun_path, opennhrp_socket, sizeof(sun.sun_path)); + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) + return -1; + + if (connect(fd, (struct sockaddr *) &sun, sizeof(sun)) < 0) { + close(fd); + return -1; + } + + return fd; +} + +static void admin_close(int fd) +{ + close(fd); +} + +static int admin_send(int fd, const char *str) +{ + int len = strlen(str); + + if (write(fd, str, len) != len) + return -1; + shutdown(fd, SHUT_WR); + return 0; +} + +static int admin_receive(int fd) +{ + char msg[512]; + size_t len; + + while ((len = recv(fd, msg, sizeof(msg), 0)) > 0) { + if (write(fileno(stdout), msg, len) != len) + return -1; + } + + if (len < 0) + return -1; + + return 0; +} + +static int usage(const char *prog) +{ + fprintf(stderr, "usage: %s [-a admin-socket] <command>\n", prog); + return 1; +} + +int main(int argc, char **argv) +{ + const char *socket = OPENNHRP_ADMIN_SOCKET; + char cmd[1024] = "", *pos = cmd; + int i, fd; + + for (i = 1; i < argc; i++) { + if (strlen(argv[i]) != 2 || argv[i][0] != '-') { + pos += snprintf(pos, &cmd[sizeof(cmd)-1]-pos, + " %s\n", argv[i]) - 1; + continue; + } + + switch (argv[i][1]) { + case 'a': + if (++i >= argc) + return usage(argv[0]); + socket = argv[i]; + break; + default: + return usage(argv[0]); + } + } + if (cmd == pos) + return usage(argv[0]); + + fd = admin_init(socket); + if (fd < 0) { + fprintf(stderr, + "Failed to connect to opennhrp daemon [%s]: %s.\n\n", + socket, strerror(errno)); + return 1; + } + + if (admin_send(fd, &cmd[1]) < 0 || + admin_receive(fd) < 0) { + fprintf(stderr, "Failed to send request: %s.\n", + strerror(errno)); + return 2; + } + + admin_close(fd); + return 0; +} diff --git a/nhrp/sysdep_netlink.c b/nhrp/sysdep_netlink.c new file mode 100644 index 0000000..d058a98 --- /dev/null +++ b/nhrp/sysdep_netlink.c @@ -0,0 +1,1159 @@ +/* sysdep_netlink.c - Linux netlink glue + * + * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include <time.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> +#include <stdlib.h> +#include <malloc.h> +#include <string.h> +#include <sys/uio.h> +#include <sys/wait.h> +#include <sys/types.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <asm/types.h> +#include <arpa/inet.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/ip.h> +#include <linux/if_arp.h> +#include <linux/if_tunnel.h> + +#include "libev.h" +#include "nhrp_common.h" +#include "nhrp_interface.h" +#include "nhrp_peer.h" + +#define NETLINK_KERNEL_BUFFER (256 * 1024) +#define NETLINK_RECV_BUFFER (8 * 1024) + +#define NLMSG_TAIL(nmsg) \ + ((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) + +#define NDA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) +#define NDA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndmsg)) + +typedef void (*netlink_dispatch_f)(struct nlmsghdr *msg); + +struct netlink_fd { + int fd; + __u32 seq; + struct ev_io io; + + int dispatch_size; + const netlink_dispatch_f *dispatch; +}; + +static const int netlink_groups[] = { + 0, + RTMGRP_NEIGH, + RTMGRP_LINK, + RTMGRP_IPV4_IFADDR, + RTMGRP_IPV4_ROUTE, +}; +static struct netlink_fd netlink_fds[ARRAY_SIZE(netlink_groups)]; +#define talk_fd netlink_fds[0] + +static struct ev_io packet_io; + +static u_int16_t translate_mtu(u_int16_t mtu) +{ + /* if mtu is ethernet standard, do not advertise it + * pmtu should be working */ + if (mtu == 1500) + return 0; + return mtu; +} + +static void netlink_parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len) +{ + memset(tb, 0, sizeof(struct rtattr *) * (max + 1)); + while (RTA_OK(rta, len)) { + if (rta->rta_type <= max) + tb[rta->rta_type] = rta; + rta = RTA_NEXT(rta,len); + } +} + +static int netlink_add_rtattr_l(struct nlmsghdr *n, int maxlen, int type, + const void *data, int alen) +{ + int len = RTA_LENGTH(alen); + struct rtattr *rta; + + if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) + return FALSE; + + rta = NLMSG_TAIL(n); + rta->rta_type = type; + rta->rta_len = len; + memcpy(RTA_DATA(rta), data, alen); +#ifdef VALGRIND + /* Clear the padding area to avoid spurious warnings */ + memset(RTA_DATA(rta) + alen, 0, RTA_ALIGN(len) - alen); +#endif + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len); + return TRUE; +} + +static int netlink_receive(struct netlink_fd *fd, struct nlmsghdr *reply) +{ + struct sockaddr_nl nladdr; + struct iovec iov; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + int got_reply = FALSE, len; + char buf[NETLINK_RECV_BUFFER]; + + iov.iov_base = buf; + while (!got_reply) { + int status; + struct nlmsghdr *h; + + iov.iov_len = sizeof(buf); + status = recvmsg(fd->fd, &msg, MSG_DONTWAIT); + if (status < 0) { + if (errno == EINTR) + continue; + if (errno == EAGAIN) + return reply == NULL; + nhrp_perror("Netlink overrun"); + continue; + } + + if (status == 0) { + nhrp_error("Netlink returned EOF"); + return FALSE; + } + + h = (struct nlmsghdr *) buf; + while (NLMSG_OK(h, status)) { + if (reply != NULL && + h->nlmsg_seq == reply->nlmsg_seq) { + len = h->nlmsg_len; + if (len > reply->nlmsg_len) { + nhrp_error("Netlink message truncated"); + len = reply->nlmsg_len; + } + memcpy(reply, h, len); + got_reply = TRUE; + } else if (h->nlmsg_type <= fd->dispatch_size && + fd->dispatch[h->nlmsg_type] != NULL) { + fd->dispatch[h->nlmsg_type](h); + } else if (h->nlmsg_type != NLMSG_DONE) { + nhrp_info("Unknown NLmsg: 0x%08x, len %d", + h->nlmsg_type, h->nlmsg_len); + } + h = NLMSG_NEXT(h, status); + } + } + + return TRUE; +} + +static int netlink_send(struct netlink_fd *fd, struct nlmsghdr *req) +{ + struct sockaddr_nl nladdr; + struct iovec iov = { + .iov_base = (void*) req, + .iov_len = req->nlmsg_len + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + int status; + + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + + req->nlmsg_seq = ++fd->seq; + + status = sendmsg(fd->fd, &msg, 0); + if (status < 0) { + nhrp_perror("Cannot talk to rtnetlink"); + return FALSE; + } + return TRUE; +} + +static int netlink_talk(struct netlink_fd *fd, struct nlmsghdr *req, + size_t replysize, struct nlmsghdr *reply) +{ + if (reply == NULL) + req->nlmsg_flags |= NLM_F_ACK; + + if (!netlink_send(fd, req)) + return FALSE; + + if (reply == NULL) + return TRUE; + + reply->nlmsg_len = replysize; + return netlink_receive(fd, reply); +} + +static int netlink_enumerate(struct netlink_fd *fd, int family, int type) +{ + struct { + struct nlmsghdr nlh; + struct rtgenmsg g; + } req; + struct sockaddr_nl addr; + + memset(&addr, 0, sizeof(addr)); + addr.nl_family = AF_NETLINK; + + memset(&req, 0, sizeof(req)); + req.nlh.nlmsg_len = sizeof(req); + req.nlh.nlmsg_type = type; + req.nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.nlh.nlmsg_pid = 0; + req.nlh.nlmsg_seq = ++fd->seq; + req.g.rtgen_family = family; + + return sendto(fd->fd, (void *) &req, sizeof(req), 0, + (struct sockaddr *) &addr, sizeof(addr)) >= 0; +} + +static void netlink_read_cb(struct ev_io *w, int revents) +{ + struct netlink_fd *nfd = container_of(w, struct netlink_fd, io); + + if (revents & EV_READ) + netlink_receive(nfd, NULL); +} + +static int do_get_ioctl(const char *basedev, struct ip_tunnel_parm *p) +{ + struct ifreq ifr; + +#ifdef VALGRIND + /* Valgrind does not have SIOCGETTUNNEL description, so clear + * the memory structs to avoid spurious warnings */ + memset(&ifr, 0, sizeof(ifr)); + memset(p, 0, sizeof(*p)); +#endif + + strncpy(ifr.ifr_name, basedev, IFNAMSIZ); + ifr.ifr_ifru.ifru_data = (void *) p; + if (ioctl(packet_io.fd, SIOCGETTUNNEL, &ifr)) { + nhrp_perror("ioctl(SIOCGETTUNNEL)"); + return FALSE; + } + return TRUE; +} + +#ifndef NHRP_NO_NBMA_GRE + +static int netlink_add_nested_rtattr_u32(struct rtattr *rta, int maxlen, + int type, uint32_t value) +{ + int len = RTA_LENGTH(4); + struct rtattr *subrta; + + if (RTA_ALIGN(rta->rta_len) + len > maxlen) + return FALSE; + + subrta = (struct rtattr*)(((char*)rta) + RTA_ALIGN(rta->rta_len)); + subrta->rta_type = type; + subrta->rta_len = len; + memcpy(RTA_DATA(subrta), &value, 4); + rta->rta_len = NLMSG_ALIGN(rta->rta_len) + len; + return TRUE; +} + +static int netlink_configure_arp(struct nhrp_interface *iface, int pf) +{ + struct { + struct nlmsghdr n; + struct ndtmsg ndtm; + char buf[256]; + } req; + struct { + struct rtattr rta; + char buf[256]; + } parms; + + memset(&req.n, 0, sizeof(req.n)); + memset(&req.ndtm, 0, sizeof(req.ndtm)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndtmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_REPLACE; + req.n.nlmsg_type = RTM_SETNEIGHTBL; + + req.ndtm.ndtm_family = pf; + + netlink_add_rtattr_l(&req.n, sizeof(req), NDTA_NAME, + "arp_cache", 10); + + parms.rta.rta_type = NDTA_PARMS; + parms.rta.rta_len = RTA_LENGTH(0); + netlink_add_nested_rtattr_u32(&parms.rta, sizeof(parms), + NDTPA_IFINDEX, iface->index); + netlink_add_nested_rtattr_u32(&parms.rta, sizeof(parms), + NDTPA_APP_PROBES, 1); + netlink_add_nested_rtattr_u32(&parms.rta, sizeof(parms), + NDTPA_MCAST_PROBES, 0); + netlink_add_nested_rtattr_u32(&parms.rta, sizeof(parms), + NDTPA_UCAST_PROBES, 0); + + netlink_add_rtattr_l(&req.n, sizeof(req), NDTA_PARMS, + parms.buf, parms.rta.rta_len - RTA_LENGTH(0)); + + return netlink_send(&talk_fd, &req.n); +} + +static int netlink_link_arp_on(struct nhrp_interface *iface) +{ + struct ifreq ifr; + + strncpy(ifr.ifr_name, iface->name, IFNAMSIZ); + if (ioctl(packet_io.fd, SIOCGIFFLAGS, &ifr)) { + nhrp_perror("ioctl(SIOCGIFFLAGS)"); + return FALSE; + } + if (ifr.ifr_flags & IFF_NOARP) { + ifr.ifr_flags &= ~IFF_NOARP; + if (ioctl(packet_io.fd, SIOCSIFFLAGS, &ifr)) { + nhrp_perror("ioctl(SIOCSIFFLAGS)"); + return FALSE; + } + } + return TRUE; +} + +#else + +static int netlink_configure_arp(struct nhrp_interface *iface, int pf) +{ + return TRUE; +} + +static int netlink_link_arp_on(struct nhrp_interface *iface) +{ + return TRUE; +} + +#endif + +static int proc_icmp_redirect_off(const char *interface) +{ + char fname[256]; + int fd, ret = FALSE; + + sprintf(fname, "/proc/sys/net/ipv4/conf/%s/send_redirects", interface); + fd = open(fname, O_WRONLY); + if (fd < 0) + return FALSE; + if (write(fd, "0\n", 2) == 2) + ret = TRUE; + close(fd); + + return ret; +} + +static void netlink_neigh_request(struct nlmsghdr *msg) +{ + struct ndmsg *ndm = NLMSG_DATA(msg); + struct rtattr *rta[NDA_MAX+1]; + struct nhrp_peer *peer; + struct nhrp_address addr; + struct nhrp_interface *iface; + char tmp[64]; + + netlink_parse_rtattr(rta, NDA_MAX, NDA_RTA(ndm), NDA_PAYLOAD(msg)); + if (rta[NDA_DST] == NULL) + return; + + iface = nhrp_interface_get_by_index(ndm->ndm_ifindex, 0); + if (iface == NULL) + return; + + nhrp_address_set(&addr, ndm->ndm_family, + RTA_PAYLOAD(rta[NDA_DST]), + RTA_DATA(rta[NDA_DST])); + + nhrp_debug("NL-ARP(%s) who-has %s", + iface->name, nhrp_address_format(&addr, sizeof(tmp), tmp)); + + peer = nhrp_peer_route(iface, &addr, 0, ~BIT(NHRP_PEER_TYPE_LOCAL_ROUTE)); + if (peer == NULL) + return; + + if (peer->flags & NHRP_PEER_FLAG_UP) + kernel_inject_neighbor(&addr, &peer->next_hop_address, iface); + + if (peer->next_hop_address.type != PF_UNSPEC && + nhrp_address_cmp(&addr, &peer->protocol_address) != 0) + nhrp_peer_traffic_indication(iface, peer->afnum, &addr); +} + +static void netlink_neigh_update(struct nlmsghdr *msg) +{ + struct ndmsg *ndm = NLMSG_DATA(msg); + struct rtattr *rta[NDA_MAX+1]; + struct nhrp_interface *iface; + struct nhrp_peer_selector sel; + int used = FALSE; + + netlink_parse_rtattr(rta, NDA_MAX, NDA_RTA(ndm), NDA_PAYLOAD(msg)); + if (rta[NDA_DST] == NULL) + return; + + if (!(ndm->ndm_state & (NUD_STALE | NUD_FAILED | NUD_REACHABLE))) + return; + + iface = nhrp_interface_get_by_index(ndm->ndm_ifindex, 0); + if (iface == NULL) + return; + + memset(&sel, 0, sizeof(sel)); + sel.flags = NHRP_PEER_FIND_EXACT; + sel.interface = iface; + nhrp_address_set(&sel.protocol_address, ndm->ndm_family, + RTA_PAYLOAD(rta[NDA_DST]), + RTA_DATA(rta[NDA_DST])); + + if (msg->nlmsg_type == RTM_NEWNEIGH && (ndm->ndm_state & NUD_REACHABLE)) + used = TRUE; + + nhrp_peer_foreach(nhrp_peer_set_used_matching, + (void*) (intptr_t) used, &sel); +} + +static void netlink_link_new(struct nlmsghdr *msg) +{ + struct nhrp_interface *iface; + struct ifinfomsg *ifi = NLMSG_DATA(msg); + struct rtattr *rta[IFLA_MAX+1]; + const char *ifname; + struct ip_tunnel_parm cfg; + int configuration_changed = FALSE; + + netlink_parse_rtattr(rta, IFLA_MAX, IFLA_RTA(ifi), IFLA_PAYLOAD(msg)); + if (rta[IFLA_IFNAME] == NULL) + return; + + ifname = RTA_DATA(rta[IFLA_IFNAME]); + iface = nhrp_interface_get_by_name(ifname, TRUE); + if (iface == NULL) + return; + + if (rta[IFLA_MTU]) + iface->mtu = *((unsigned*)RTA_DATA(rta[IFLA_MTU])); + + if (iface->index == 0 || (ifi->ifi_flags & ifi->ifi_change & IFF_UP)) { + nhrp_info("Interface %s: new or configured up, mtu=%d", + ifname, iface->mtu); + nhrp_interface_run_script(iface, "interface-up"); + } else { + nhrp_info("Interface %s: config change, mtu=%d", + ifname, iface->mtu); + } + + iface->index = ifi->ifi_index; + nhrp_interface_hash(iface); + + if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED)) + return; + + switch (ifi->ifi_type) { + case ARPHRD_IPGRE: + iface->afnum = AFNUM_INET; + /* try hard to get the interface nbma address */ + do_get_ioctl(ifname, &cfg); + if (iface->gre_key != ntohl(cfg.i_key)) { + configuration_changed = TRUE; + iface->gre_key = ntohl(cfg.i_key); + } + if (cfg.iph.saddr) { + struct nhrp_address saddr; + nhrp_address_set(&saddr, PF_INET, 4, (uint8_t *) &cfg.iph.saddr); + if (nhrp_address_cmp(&iface->nbma_address, &saddr) || iface->link_index) { + configuration_changed = TRUE; + iface->nbma_address = saddr; + iface->link_index = 0; + } + } else if (cfg.link) { + if (cfg.link != iface->link_index) { + configuration_changed = TRUE; + nhrp_address_set_type(&iface->nbma_address, PF_UNSPEC); + iface->link_index = cfg.link; + } + } else { + if (iface->link_index || iface->nbma_address.type != PF_UNSPEC) { + configuration_changed = TRUE; + /* Mark the interface as owning all NBMA addresses + * this works when there's only one GRE interface */ + iface->link_index = 0; + nhrp_address_set_type(&iface->nbma_address, PF_UNSPEC); + nhrp_info("WARNING: Cannot figure out NBMA address for " + "interface '%s'. Using route hints.", ifname); + } + } + break; + } + + if (!(iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST)) { + netlink_configure_arp(iface, PF_INET); + netlink_link_arp_on(iface); + proc_icmp_redirect_off(iface->name); + } + + if (configuration_changed) { + struct nhrp_peer_selector sel; + int count = 0; + + /* Reset the interface values we detect later */ + memset(&iface->nat_cie, 0, sizeof(iface->nat_cie)); + iface->nbma_mtu = 0; + if (iface->link_index) { + /* Reenumerate addresses if needed */ + netlink_enumerate(&talk_fd, PF_UNSPEC, RTM_GETADDR); + netlink_read_cb(&talk_fd.io, EV_READ); + } + + /* Purge all NHRP entries for this interface */ + memset(&sel, 0, sizeof(sel)); + sel.type_mask = NHRP_PEER_TYPEMASK_PURGEABLE; + sel.interface = iface; + nhrp_peer_foreach(nhrp_peer_purge_matching, &count, &sel); + nhrp_info("Interface %s: GRE configuration changed. Purged %d peers.", + ifname, count); + } +} + +static void netlink_link_del(struct nlmsghdr *msg) +{ + struct nhrp_interface *iface; + struct ifinfomsg *ifi = NLMSG_DATA(msg); + struct rtattr *rta[IFLA_MAX+1]; + const char *ifname; + + netlink_parse_rtattr(rta, IFLA_MAX, IFLA_RTA(ifi), IFLA_PAYLOAD(msg)); + if (rta[IFLA_IFNAME] == NULL) + return; + + ifname = RTA_DATA(rta[IFLA_IFNAME]); + iface = nhrp_interface_get_by_name(ifname, FALSE); + if (iface == NULL) + return; + + nhrp_info("Interface '%s' deleted", ifname); + iface->index = 0; + iface->link_index = 0; + nhrp_interface_hash(iface); + + nhrp_address_set_type(&iface->nbma_address, PF_UNSPEC); + nhrp_address_set_type(&iface->protocol_address, PF_UNSPEC); +} + +static int netlink_addr_new_nbma(void *ctx, struct nhrp_interface *iface) +{ + struct nlmsghdr *msg = (struct nlmsghdr *) ctx; + struct ifaddrmsg *ifa = NLMSG_DATA(msg); + struct rtattr *rta[IFA_MAX+1]; + struct nhrp_interface *nbma_iface; + + if (iface->link_index == ifa->ifa_index) { + netlink_parse_rtattr(rta, IFA_MAX, IFA_RTA(ifa), + IFA_PAYLOAD(msg)); + + if (rta[IFA_LOCAL] == NULL) + return 0; + + nhrp_address_set(&iface->nbma_address, ifa->ifa_family, + RTA_PAYLOAD(rta[IFA_LOCAL]), + RTA_DATA(rta[IFA_LOCAL])); + + nbma_iface = nhrp_interface_get_by_index(ifa->ifa_index, FALSE); + if (nbma_iface != NULL) { + iface->nbma_mtu = translate_mtu(nbma_iface->mtu); + } + } + + return 0; +} + +static void netlink_addr_new(struct nlmsghdr *msg) +{ + struct nhrp_interface *iface; + struct nhrp_peer *peer, *bcast; + struct ifaddrmsg *ifa = NLMSG_DATA(msg); + struct rtattr *rta[IFA_MAX+1]; + + if (!(ifa->ifa_flags & IFA_F_SECONDARY)) + nhrp_interface_foreach(netlink_addr_new_nbma, msg); + + netlink_parse_rtattr(rta, IFA_MAX, IFA_RTA(ifa), IFA_PAYLOAD(msg)); + iface = nhrp_interface_get_by_index(ifa->ifa_index, FALSE); + if (iface == NULL || rta[IFA_LOCAL] == NULL) + return; + + /* Shortcut destination stuff is extracted from routes; + * not from local address information. */ + if (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST) + return; + if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED)) + return; + + nhrp_address_set(&iface->protocol_address, ifa->ifa_family, + RTA_PAYLOAD(rta[IFA_LOCAL]), + RTA_DATA(rta[IFA_LOCAL])); + iface->protocol_address_prefix = ifa->ifa_prefixlen; + + peer = nhrp_peer_alloc(iface); + peer->type = NHRP_PEER_TYPE_LOCAL_ADDR; + peer->afnum = AFNUM_RESERVED; + nhrp_address_set(&peer->protocol_address, ifa->ifa_family, + RTA_PAYLOAD(rta[IFA_LOCAL]), + RTA_DATA(rta[IFA_LOCAL])); + switch (ifa->ifa_family) { + case PF_INET: + peer->protocol_type = ETHPROTO_IP; + peer->prefix_length = peer->protocol_address.addr_len * 8; + nhrp_peer_insert(peer); + break; + default: + nhrp_peer_put(peer); + return; + } + + bcast = nhrp_peer_alloc(iface); + bcast->type = peer->type; + bcast->afnum = peer->afnum; + bcast->protocol_type = peer->protocol_type; + bcast->prefix_length = peer->prefix_length; + bcast->protocol_address = peer->protocol_address; + nhrp_address_set_broadcast(&bcast->protocol_address, + ifa->ifa_prefixlen); + bcast->next_hop_address = peer->protocol_address; + nhrp_peer_insert(bcast); + nhrp_peer_put(bcast); + + nhrp_peer_put(peer); +} + +struct netlink_del_addr_msg { + int interface_index; + struct nhrp_address address; +}; + +static int netlink_addr_del_nbma(void *ctx, struct nhrp_interface *iface) +{ + struct netlink_del_addr_msg *msg = (struct netlink_del_addr_msg *) ctx; + + if (iface->link_index == msg->interface_index && + nhrp_address_cmp(&msg->address, &iface->nbma_address) == 0) + nhrp_address_set_type(&iface->nbma_address, PF_UNSPEC); + + return 0; +} + +static int netlink_addr_purge_nbma(void *ctx, struct nhrp_peer *peer) +{ + struct netlink_del_addr_msg *msg = (struct netlink_del_addr_msg *) ctx; + + if (nhrp_address_cmp(&peer->my_nbma_address, &msg->address) == 0) + nhrp_peer_purge(peer, "address-removed"); + + return 0; +} + +static void netlink_addr_del(struct nlmsghdr *nlmsg) +{ + struct netlink_del_addr_msg msg; + struct nhrp_interface *iface; + struct ifaddrmsg *ifa = NLMSG_DATA(nlmsg); + struct rtattr *rta[IFA_MAX+1]; + struct nhrp_peer_selector sel; + + netlink_parse_rtattr(rta, IFA_MAX, IFA_RTA(ifa), IFA_PAYLOAD(nlmsg)); + if (rta[IFA_LOCAL] == NULL) + return; + + msg.interface_index = ifa->ifa_index; + nhrp_address_set(&msg.address, ifa->ifa_family, + RTA_PAYLOAD(rta[IFA_LOCAL]), + RTA_DATA(rta[IFA_LOCAL])); + + if (!(ifa->ifa_flags & IFA_F_SECONDARY)) + nhrp_interface_foreach(netlink_addr_del_nbma, &msg); + nhrp_peer_foreach(netlink_addr_purge_nbma, &msg, NULL); + + iface = nhrp_interface_get_by_index(ifa->ifa_index, FALSE); + if (iface == NULL) + return; + + memset(&sel, 0, sizeof(sel)); + sel.flags = NHRP_PEER_FIND_EXACT; + sel.type_mask = BIT(NHRP_PEER_TYPE_LOCAL_ADDR); + sel.interface = iface; + sel.protocol_address = msg.address; + sel.prefix_length = sel.protocol_address.addr_len * 8; + + if (nhrp_address_cmp(&sel.protocol_address, &iface->protocol_address) == 0) + nhrp_address_set_type(&iface->protocol_address, PF_UNSPEC); + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel); + + nhrp_address_set_broadcast(&sel.protocol_address, ifa->ifa_prefixlen); + sel.next_hop_address = msg.address; + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel); +} + +static void netlink_route_new(struct nlmsghdr *msg) +{ + struct nhrp_interface *iface; + struct nhrp_peer *peer; + struct rtmsg *rtm = NLMSG_DATA(msg); + struct rtattr *rta[RTA_MAX+1]; + int type = 0; + + netlink_parse_rtattr(rta, RTA_MAX, RTM_RTA(rtm), RTM_PAYLOAD(msg)); + if (rta[RTA_OIF] == NULL || rta[RTA_DST] == NULL) + return; + + if (rtm->rtm_family != PF_INET) + return; + + iface = nhrp_interface_get_by_index(*(int*)RTA_DATA(rta[RTA_OIF]), + FALSE); + if (iface == NULL) + return; + + if (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST) { + /* Local shortcut target routes */ + if (rtm->rtm_table != RT_TABLE_MAIN) + return; + type = NHRP_PEER_TYPE_LOCAL_ADDR; + } else if (iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED) { + /* Routes which might get additional outbound + * shortcuts */ + if (rtm->rtm_table != iface->route_table || + rtm->rtm_protocol == RTPROT_KERNEL) + return; + type = NHRP_PEER_TYPE_LOCAL_ROUTE; + } + if (type == 0) + return; + + peer = nhrp_peer_alloc(iface); + peer->type = type; + peer->afnum = AFNUM_RESERVED; + nhrp_address_set(&peer->protocol_address, rtm->rtm_family, + RTA_PAYLOAD(rta[RTA_DST]), + RTA_DATA(rta[RTA_DST])); + if (rta[RTA_GATEWAY] != NULL) { + nhrp_address_set(&peer->next_hop_address, + rtm->rtm_family, + RTA_PAYLOAD(rta[RTA_GATEWAY]), + RTA_DATA(rta[RTA_GATEWAY])); + } + peer->protocol_type = nhrp_protocol_from_pf(rtm->rtm_family); + peer->prefix_length = rtm->rtm_dst_len; + nhrp_peer_insert(peer); + nhrp_peer_put(peer); +} + +static void netlink_route_del(struct nlmsghdr *msg) +{ + struct nhrp_interface *iface; + struct rtmsg *rtm = NLMSG_DATA(msg); + struct rtattr *rta[RTA_MAX+1]; + struct nhrp_peer_selector sel; + int type = 0; + + netlink_parse_rtattr(rta, RTA_MAX, RTM_RTA(rtm), RTM_PAYLOAD(msg)); + if (rta[RTA_OIF] == NULL || rta[RTA_DST] == NULL) + return; + + if (rtm->rtm_family != PF_INET) + return; + + iface = nhrp_interface_get_by_index(*(int*)RTA_DATA(rta[RTA_OIF]), + FALSE); + if (iface == NULL) + return; + + if (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST) { + /* Local shortcut target routes */ + if (rtm->rtm_table != RT_TABLE_MAIN) + return; + type = NHRP_PEER_TYPE_LOCAL_ADDR; + } else if (iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED) { + /* Routes which might get additional outbound + * shortcuts */ + if (rtm->rtm_table != iface->route_table || + rtm->rtm_protocol == RTPROT_KERNEL) + return; + type = NHRP_PEER_TYPE_LOCAL_ROUTE; + } + if (type == 0) + return; + + memset(&sel, 0, sizeof(sel)); + sel.flags = NHRP_PEER_FIND_EXACT; + sel.type_mask = BIT(type); + sel.interface = iface; + nhrp_address_set(&sel.protocol_address, rtm->rtm_family, + RTA_PAYLOAD(rta[RTA_DST]), + RTA_DATA(rta[RTA_DST])); + if (rta[RTA_GATEWAY] != NULL) { + nhrp_address_set(&sel.next_hop_address, + rtm->rtm_family, + RTA_PAYLOAD(rta[RTA_GATEWAY]), + RTA_DATA(rta[RTA_GATEWAY])); + } + sel.prefix_length = rtm->rtm_dst_len; + nhrp_peer_foreach(nhrp_peer_remove_matching, NULL, &sel); +} + +static const netlink_dispatch_f route_dispatch[RTM_MAX] = { + [RTM_GETNEIGH] = netlink_neigh_request, + [RTM_NEWNEIGH] = netlink_neigh_update, + [RTM_DELNEIGH] = netlink_neigh_update, + [RTM_NEWLINK] = netlink_link_new, + [RTM_DELLINK] = netlink_link_del, + [RTM_NEWADDR] = netlink_addr_new, + [RTM_DELADDR] = netlink_addr_del, + [RTM_NEWROUTE] = netlink_route_new, + [RTM_DELROUTE] = netlink_route_del, +}; + +static void netlink_stop_listening(struct netlink_fd *fd) +{ + ev_io_stop(&fd->io); +} + +static void netlink_close(struct netlink_fd *fd) +{ + if (fd->fd >= 0) { + netlink_stop_listening(fd); + close(fd->fd); + fd->fd = 0; + } +} + +static int netlink_open(struct netlink_fd *fd, int protocol, int groups) +{ + struct sockaddr_nl addr; + int buf = NETLINK_KERNEL_BUFFER; + + fd->fd = socket(AF_NETLINK, SOCK_RAW, protocol); + fd->seq = time(NULL); + if (fd->fd < 0) { + nhrp_perror("Cannot open netlink socket"); + return FALSE; + } + + fcntl(fd->fd, F_SETFD, FD_CLOEXEC); + if (setsockopt(fd->fd, SOL_SOCKET, SO_SNDBUF, &buf, sizeof(buf)) < 0) { + nhrp_perror("SO_SNDBUF"); + goto error; + } + + if (setsockopt(fd->fd, SOL_SOCKET, SO_RCVBUF, &buf, sizeof(buf)) < 0) { + nhrp_perror("SO_RCVBUF"); + goto error; + } + + memset(&addr, 0, sizeof(addr)); + addr.nl_family = AF_NETLINK; + addr.nl_groups = groups; + if (bind(fd->fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) { + nhrp_perror("Cannot bind netlink socket"); + goto error; + } + + ev_io_init(&fd->io, netlink_read_cb, fd->fd, EV_READ); + ev_io_start(&fd->io); + + return TRUE; + +error: + netlink_close(fd); + return FALSE; +} + +static void pfpacket_read_cb(struct ev_io *w, int revents) +{ + struct sockaddr_ll lladdr; + struct nhrp_interface *iface; + struct iovec iov; + struct msghdr msg = { + .msg_name = &lladdr, + .msg_namelen = sizeof(lladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + uint8_t buf[1500]; + struct nhrp_address from; + int fd = w->fd; + int i; + + iov.iov_base = buf; + for (i = 0; i < 2; i++) { + int status; + + iov.iov_len = sizeof(buf); + status = recvmsg(fd, &msg, MSG_DONTWAIT); + if (status < 0) { + if (errno == EINTR) + continue; + if (errno == EAGAIN) + return; + nhrp_perror("PF_PACKET overrun"); + continue; + } + + if (status == 0) { + nhrp_error("PF_PACKET returned EOF"); + return; + } + + iface = nhrp_interface_get_by_index(lladdr.sll_ifindex, FALSE); + if (iface == NULL) + continue; + + nhrp_address_set(&from, PF_INET, lladdr.sll_halen, lladdr.sll_addr); + if (memcmp(lladdr.sll_addr, "\x00\x00\x00\x00", 4) == 0) + nhrp_address_set_type(&from, PF_UNSPEC); + nhrp_packet_receive(buf, status, iface, &from); + } +} + +int kernel_init(void) +{ + int fd, i; + + proc_icmp_redirect_off("all"); + + fd = socket(PF_PACKET, SOCK_DGRAM, ETHPROTO_NHRP); + if (fd < 0) { + nhrp_error("Unable to create PF_PACKET socket"); + return FALSE; + } + + fcntl(fd, F_SETFD, FD_CLOEXEC); + ev_io_init(&packet_io, pfpacket_read_cb, fd, EV_READ); + ev_io_start(&packet_io); + + for (i = 0; i < ARRAY_SIZE(netlink_groups); i++) { + netlink_fds[i].dispatch_size = sizeof(route_dispatch) / sizeof(route_dispatch[0]); + netlink_fds[i].dispatch = route_dispatch; + if (!netlink_open(&netlink_fds[i], NETLINK_ROUTE, + netlink_groups[i])) + goto err_close_all; + } + + netlink_enumerate(&talk_fd, PF_UNSPEC, RTM_GETLINK); + netlink_read_cb(&talk_fd.io, EV_READ); + + netlink_enumerate(&talk_fd, PF_UNSPEC, RTM_GETADDR); + netlink_read_cb(&talk_fd.io, EV_READ); + + netlink_enumerate(&talk_fd, PF_UNSPEC, RTM_GETROUTE); + netlink_read_cb(&talk_fd.io, EV_READ); + + return TRUE; + +err_close_all: + kernel_cleanup(); + return FALSE; +} + +void kernel_stop_listening(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(netlink_groups); i++) + netlink_stop_listening(&netlink_fds[i]); + ev_io_stop(&packet_io); +} + +void kernel_cleanup(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(netlink_groups); i++) + netlink_close(&netlink_fds[i]); + ev_io_stop(&packet_io); + close(packet_io.fd); +} + +int kernel_route(struct nhrp_interface *out_iface, + struct nhrp_address *dest, + struct nhrp_address *default_source, + struct nhrp_address *next_hop, + u_int16_t *mtu) +{ + struct { + struct nlmsghdr n; + struct rtmsg r; + char buf[1024]; + } req; + struct rtmsg *r = NLMSG_DATA(&req.n); + struct rtattr *rta[RTA_MAX+1]; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = RTM_GETROUTE; + req.r.rtm_family = dest->type; + + netlink_add_rtattr_l(&req.n, sizeof(req), RTA_DST, + dest->addr, dest->addr_len); + req.r.rtm_dst_len = dest->addr_len * 8; + + if (default_source != NULL && default_source->type != PF_UNSPEC) + netlink_add_rtattr_l(&req.n, sizeof(req), RTA_SRC, + default_source->addr, + default_source->addr_len); + if (out_iface != NULL) + netlink_add_rtattr_l(&req.n, sizeof(req), RTA_OIF, + &out_iface->index, sizeof(int)); + + if (!netlink_talk(&talk_fd, &req.n, sizeof(req), &req.n)) + return FALSE; + + netlink_parse_rtattr(rta, RTA_MAX, RTM_RTA(r), RTM_PAYLOAD(&req.n)); + + if (default_source != NULL && default_source->type == PF_UNSPEC && + rta[RTA_PREFSRC] != NULL) { + nhrp_address_set(default_source, dest->type, + RTA_PAYLOAD(rta[RTA_PREFSRC]), + RTA_DATA(rta[RTA_PREFSRC])); + } + + if (next_hop != NULL) { + if (rta[RTA_GATEWAY] != NULL) { + nhrp_address_set(next_hop, dest->type, + RTA_PAYLOAD(rta[RTA_GATEWAY]), + RTA_DATA(rta[RTA_GATEWAY])); + } else { + *next_hop = *dest; + } + } + + if (mtu != NULL) { + *mtu = 0; + + if (rta[RTA_OIF] != NULL) { + struct nhrp_interface *nbma_iface; + + /* We use interface MTU here instead of the route + * cache MTU from RTA_METRICS/RTAX_MTU since we + * don't want to announce mtu if PMTU works */ + nbma_iface = nhrp_interface_get_by_index( + *(int*)RTA_DATA(rta[RTA_OIF]), + FALSE); + if (nbma_iface != NULL) + *mtu = translate_mtu(nbma_iface->mtu); + } + } + + return TRUE; +} + +int kernel_send(uint8_t *packet, size_t bytes, struct nhrp_interface *out, + struct nhrp_address *to) +{ + struct sockaddr_ll lladdr; + struct iovec iov = { + .iov_base = (void*) packet, + .iov_len = bytes + }; + struct msghdr msg = { + .msg_name = &lladdr, + .msg_namelen = sizeof(lladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + int status; + + if (to->addr_len > sizeof(lladdr.sll_addr)) { + nhrp_error("Destination NBMA address too long"); + return FALSE; + } + + memset(&lladdr, 0, sizeof(lladdr)); + lladdr.sll_family = AF_PACKET; + lladdr.sll_protocol = ETHPROTO_NHRP; + lladdr.sll_ifindex = out->index; + lladdr.sll_halen = to->addr_len; + memcpy(lladdr.sll_addr, to->addr, to->addr_len); + + status = sendmsg(packet_io.fd, &msg, 0); + if (status < 0) { + nhrp_error("Cannot send packet to %s(%d): %s", + out->name, out->index, strerror(errno)); + return FALSE; + } + + return TRUE; +} + +int kernel_inject_neighbor(struct nhrp_address *neighbor, + struct nhrp_address *hwaddr, + struct nhrp_interface *dev) +{ + struct { + struct nlmsghdr n; + struct ndmsg ndm; + char buf[256]; + } req; + char neigh[64], nbma[64]; + + memset(&req.n, 0, sizeof(req.n)); + memset(&req.ndm, 0, sizeof(req.ndm)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_REPLACE | NLM_F_CREATE; + req.n.nlmsg_type = RTM_NEWNEIGH; + req.ndm.ndm_family = neighbor->type; + req.ndm.ndm_ifindex = dev->index; + req.ndm.ndm_type = RTN_UNICAST; + + netlink_add_rtattr_l(&req.n, sizeof(req), NDA_DST, + neighbor->addr, neighbor->addr_len); + + if (hwaddr != NULL && hwaddr->type != PF_UNSPEC) { + req.ndm.ndm_state = NUD_REACHABLE; + + netlink_add_rtattr_l(&req.n, sizeof(req), NDA_LLADDR, + hwaddr->addr, hwaddr->addr_len); + + nhrp_debug("NL-ARP(%s) %s is-at %s", + dev->name, + nhrp_address_format(neighbor, sizeof(neigh), neigh), + nhrp_address_format(hwaddr, sizeof(nbma), nbma)); + } else { + req.ndm.ndm_state = NUD_FAILED; + + nhrp_debug("NL-ARP(%s) %s not-reachable", + dev->name, + nhrp_address_format(neighbor, sizeof(neigh), neigh)); + } + + return netlink_send(&talk_fd, &req.n); +} + diff --git a/nhrp/sysdep_pfpacket.c b/nhrp/sysdep_pfpacket.c new file mode 100644 index 0000000..514b848 --- /dev/null +++ b/nhrp/sysdep_pfpacket.c @@ -0,0 +1,388 @@ +/* sysdep_pfpacket.c - Tracing of forwarded packets using PF_PACKET + * + * Copyright (C) 2007-2009 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <netinet/in.h> +#include <linux/types.h> +#include <linux/filter.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> + +#include "libev.h" +#include "nhrp_defines.h" +#include "nhrp_common.h" +#include "nhrp_interface.h" +#include "nhrp_peer.h" + +#define MAX_OPCODES 100 + +struct multicast_packet { + struct nhrp_interface *iface; + struct sockaddr_ll lladdr; + unsigned int pdulen; + unsigned char pdu[1500]; +}; + +static struct ev_io packet_io; +static struct ev_timer install_filter_timer; +static struct ev_idle mcast_route; + +static struct multicast_packet mcast_queue[16]; +static int mcast_head = 0, mcast_tail = 0; + + +enum { + LABEL_NEXT = 0, + LABEL_SKIP1, + LABEL_SKIPN, + LABEL_DROP, + LABEL_CHECK_MULTICAST, + LABEL_CHECK_MULTICAST_DESTINATION, + LABEL_CHECK_TRAFFIC_INDICATION, + LABEL_CHECK_NON_LOCAL_ADDRESS, + NUM_LABELS +}; + +struct filter { + int pos[NUM_LABELS]; + int numops; + struct sock_filter code[MAX_OPCODES]; +}; + +static void emit_stmt(struct filter *f, __u16 code, __u32 k) +{ + if (f->numops < MAX_OPCODES) { + f->code[f->numops].code = code; + f->code[f->numops].jt = 0; + f->code[f->numops].jf = 0; + f->code[f->numops].k = k; + } + f->numops++; +} + +static void emit_jump(struct filter *f, __u16 code, __u32 k, __u8 jt, __u8 jf) +{ + if (f->numops < MAX_OPCODES) { + f->code[f->numops].code = code; + f->code[f->numops].jt = jt; + f->code[f->numops].jf = jf; + f->code[f->numops].k = k; + } + f->numops++; +} + +static void mark(struct filter *f, int label) +{ + f->pos[label] = f->numops; +} + +static int check_interface_multicast(void *ctx, struct nhrp_interface *iface) +{ + struct filter *f = (struct filter *) ctx; + + if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED)) + return 0; + if (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST) + return 0; + + if (iface->mcast_mask || iface->mcast_numaddr) + emit_jump(f, BPF_JMP|BPF_JEQ|BPF_K, iface->index, + LABEL_CHECK_MULTICAST_DESTINATION, LABEL_NEXT); + + return 0; +} + +static int drop_matching_address(void *ctx, struct nhrp_peer *peer) +{ + struct filter *f = (struct filter *) ctx; + unsigned long addr, mask; + + if (peer->protocol_type != ETHPROTO_IP) + return 0; + + addr = htonl(*((unsigned long *) peer->protocol_address.addr)); + if (peer->prefix_length != 32) { + mask = 0xffffffff >> peer->prefix_length; + emit_jump(f, BPF_JMP|BPF_JGE|BPF_K, addr & ~mask, LABEL_NEXT, LABEL_SKIP1); + emit_jump(f, BPF_JMP|BPF_JGT|BPF_K, addr | mask, LABEL_NEXT, LABEL_DROP); + } else { + emit_jump(f, BPF_JMP|BPF_JEQ|BPF_K, addr, LABEL_DROP, LABEL_NEXT); + } + + return 0; +} + +static int check_interface_traffic_indication(void *ctx, struct nhrp_interface *iface) +{ + struct filter *f = (struct filter *) ctx; + + if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED)) + return 0; + if (iface->flags & NHRP_INTERFACE_FLAG_SHORTCUT_DEST) + return 0; + if (!(iface->flags & NHRP_INTERFACE_FLAG_REDIRECT)) + return 0; + + emit_jump(f, BPF_JMP|BPF_JEQ|BPF_K, iface->index, + LABEL_CHECK_NON_LOCAL_ADDRESS, LABEL_NEXT); + + return 0; +} + +static void install_filter_cb(struct ev_timer *w, int revents) +{ + struct nhrp_peer_selector sel; + struct sock_fprog prog; + struct filter f; + int i; + + memset(&prog, 0, sizeof(prog)); + memset(&f, 0, sizeof(f)); + + /* Check for IPv4 */ + emit_stmt(&f, BPF_LD |BPF_W |BPF_ABS, SKF_AD_OFF+SKF_AD_PROTOCOL); + emit_jump(&f, BPF_JMP|BPF_JEQ|BPF_K, ETH_P_IP, LABEL_NEXT, LABEL_DROP); + + /* Traffic indication checking is for incoming packets + * Multicast checking is for outgoing packets */ + emit_stmt(&f, BPF_LD |BPF_W |BPF_ABS, SKF_AD_OFF+SKF_AD_PKTTYPE); + emit_jump(&f, BPF_JMP|BPF_JEQ|BPF_K, PACKET_OUTGOING, LABEL_CHECK_MULTICAST, LABEL_NEXT); + emit_jump(&f, BPF_JMP|BPF_JEQ|BPF_K, PACKET_HOST, LABEL_CHECK_TRAFFIC_INDICATION, LABEL_DROP); + + /* MULTICAST check - for interfaces that have MC forwarding enabled */ + mark(&f, LABEL_CHECK_MULTICAST); + emit_stmt(&f, BPF_LD |BPF_W |BPF_ABS, SKF_AD_OFF+SKF_AD_IFINDEX); + nhrp_interface_foreach(check_interface_multicast, &f); + emit_stmt(&f, BPF_RET|BPF_K, 0); + + /* Check for multicast IPv4 destination - accept on match (all packet) */ + mark(&f, LABEL_CHECK_MULTICAST_DESTINATION); + emit_stmt(&f, BPF_LD |BPF_W |BPF_ABS, offsetof(struct iphdr, daddr)); + emit_jump(&f, BPF_JMP|BPF_JGE|BPF_K, 0xe0000000, LABEL_NEXT, LABEL_DROP); + emit_jump(&f, BPF_JMP|BPF_JGE|BPF_K, 0xf0000000, LABEL_DROP, LABEL_NEXT); + emit_stmt(&f, BPF_RET|BPF_K, 65535); + + /* TRAFFIC INDICATION check - is destination non-local + * if yes, capture headers for NHRP traffic indication */ + mark(&f, LABEL_CHECK_TRAFFIC_INDICATION); + emit_stmt(&f, BPF_LD |BPF_W |BPF_ABS, SKF_AD_OFF+SKF_AD_IFINDEX); + nhrp_interface_foreach(check_interface_traffic_indication, &f); + emit_stmt(&f, BPF_RET|BPF_K, 0); + + mark(&f, LABEL_CHECK_NON_LOCAL_ADDRESS); + memset(&sel, 0, sizeof(sel)); + sel.type_mask = BIT(NHRP_PEER_TYPE_LOCAL_ADDR); + emit_stmt(&f, BPF_LD |BPF_W |BPF_ABS, offsetof(struct iphdr, daddr)); + nhrp_peer_foreach(drop_matching_address, &f, &sel); + emit_stmt(&f, BPF_RET|BPF_K, 68); + + mark(&f, LABEL_DROP); + emit_stmt(&f, BPF_RET|BPF_K, 0); + + /* All ok so far? */ + if (f.numops >= MAX_OPCODES) { + nhrp_error("Filter code buffer too small (code actual length %d)", + f.numops); + return; + } + + /* Fixup jumps to be relative */ + for (i = 0; i < f.numops; i++) { + if (BPF_CLASS(f.code[i].code) == BPF_JMP) { + if (f.code[i].jt > LABEL_SKIPN) + f.code[i].jt = f.pos[f.code[i].jt] - i - 1; + if (f.code[i].jf > LABEL_SKIPN) + f.code[i].jf = f.pos[f.code[i].jf] - i - 1; + } + } + + /* Attach filter */ + prog.len = f.numops; + prog.filter = f.code; + if (setsockopt(packet_io.fd, SOL_SOCKET, SO_ATTACH_FILTER, + &prog, sizeof(prog))) + return; + + nhrp_info("Filter code installed (%d opcodes)", f.numops); +} + +int forward_local_addresses_changed(void) +{ + if (install_filter_timer.cb != NULL) + ev_timer_start(&install_filter_timer); + return TRUE; +} + +static void send_multicast(struct ev_idle *w, int revents) +{ + struct multicast_packet *pkt; + struct nhrp_peer *peer; + struct iovec iov; + struct msghdr msg; + + if (mcast_head == mcast_tail) { + ev_idle_stop(&mcast_route); + return; + } + + /* Pop a packet */ + pkt = &mcast_queue[mcast_tail]; + mcast_tail = (mcast_tail + 1) % ARRAY_SIZE(mcast_queue); + + /* And softroute it forward */ + iov.iov_base = pkt->pdu; + iov.iov_len = pkt->pdulen; + msg = (struct msghdr) { + .msg_name = &pkt->lladdr, + .msg_namelen = sizeof(pkt->lladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + list_for_each_entry(peer, &pkt->iface->mcast_list, mcast_list_entry) { + /* Update NBMA destination */ + pkt->lladdr.sll_halen = peer->next_hop_address.addr_len; + memcpy(pkt->lladdr.sll_addr, peer->next_hop_address.addr, + pkt->lladdr.sll_halen); + + /* Best effort attempt to emulate multicast */ + (void) sendmsg(packet_io.fd, &msg, 0); + } +} + +static void pfp_read_cb(struct ev_io *w, int revents) +{ + struct nhrp_address nbma_src, src, dst; + struct nhrp_interface *iface; + struct sockaddr_ll *lladdr; + struct iovec iov; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + }; + char fr[32], to[32]; + int r, fd = w->fd; + + if (!(revents & EV_READ)) + return; + + while (TRUE) { + /* Get a scracth buffer directly from mcast queue, so we do + * not need copy the data later. */ + msg.msg_name = &mcast_queue[mcast_head].lladdr; + msg.msg_namelen = sizeof(mcast_queue[mcast_head].lladdr); + iov.iov_base = mcast_queue[mcast_head].pdu; + iov.iov_len = sizeof(mcast_queue[mcast_head].pdu); + + /* Receive */ + r = recvmsg(fd, &msg, MSG_DONTWAIT); + mcast_queue[mcast_head].pdulen = r; + + /* Process */ + if (r < 0) { + if (errno == EINTR) + continue; + if (errno == EAGAIN) + return; + nhrp_perror("PF_PACKET overrun"); + continue; + } + + if (r == 0) { + nhrp_error("PF_PACKET returned EOF"); + return; + } + + lladdr = &mcast_queue[mcast_head].lladdr; + if (lladdr->sll_pkttype != PACKET_OUTGOING && + lladdr->sll_pkttype != PACKET_HOST) + continue; + + iface = nhrp_interface_get_by_index(lladdr->sll_ifindex, FALSE); + if (iface == NULL) + continue; + if (!(iface->flags & NHRP_INTERFACE_FLAG_CONFIGURED)) + continue; + + if (!nhrp_address_parse_packet(lladdr->sll_protocol, + r, iov.iov_base, + &src, &dst)) + return; + + if (nhrp_address_is_multicast(&dst) && + lladdr->sll_pkttype == PACKET_OUTGOING) { + nhrp_debug("Multicast from %s to %s", + nhrp_address_format(&src, sizeof(fr), fr), + nhrp_address_format(&dst, sizeof(to), to)); + + /* Queue packet for processing later (handle important + * stuff first) */ + mcast_queue[mcast_head].iface = iface; + mcast_head = (mcast_head + 1) % ARRAY_SIZE(mcast_queue); + + /* Drop packets from queue tail, if we haven't processed + * them yet. */ + if (mcast_head == mcast_tail) + mcast_tail = (mcast_tail + 1) % + ARRAY_SIZE(mcast_queue); + + ev_idle_start(&mcast_route); + } else if (lladdr->sll_pkttype == PACKET_HOST) { + nhrp_address_set(&nbma_src, PF_INET, + lladdr->sll_halen, + lladdr->sll_addr); + nhrp_packet_send_traffic(iface, + &nbma_src, &src, &dst, + lladdr->sll_protocol, + iov.iov_base, r); + } + } +} + +int forward_init(void) +{ + int fd; + + fd = socket(PF_PACKET, SOCK_DGRAM, ntohs(ETH_P_ALL)); + if (fd < 0) { + nhrp_error("Unable to create PF_PACKET socket"); + return FALSE; + } + + fcntl(fd, F_SETFD, FD_CLOEXEC); + + ev_io_init(&packet_io, pfp_read_cb, fd, EV_READ); + ev_io_start(&packet_io); + + ev_timer_init(&install_filter_timer, install_filter_cb, .01, .0); + install_filter_cb(&install_filter_timer, 0); + + ev_idle_init(&mcast_route, send_multicast); + ev_set_priority(&mcast_route, -1); + + return TRUE; +} + +void forward_cleanup(void) +{ + ev_io_stop(&packet_io); + close(packet_io.fd); + ev_timer_stop(&install_filter_timer); + ev_idle_stop(&mcast_route); +} diff --git a/nhrp/sysdep_syslog.c b/nhrp/sysdep_syslog.c new file mode 100644 index 0000000..c8f9f7e --- /dev/null +++ b/nhrp/sysdep_syslog.c @@ -0,0 +1,55 @@ +/* sysdep_syslog.c - Logging via syslog + * + * Copyright (C) 2007 Timo Teräs <timo.teras@iki.fi> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + * + * See http://www.gnu.org/ for details. + */ + +#include <errno.h> +#include <stdio.h> +#include <string.h> +#include <syslog.h> +#include <stdarg.h> + +#include "nhrp_defines.h" +#include "nhrp_common.h" + +int log_init(void) +{ + openlog("opennhrp", LOG_PERROR | LOG_PID, LOG_DAEMON); + + return TRUE; +} + +void nhrp_log(int level, const char *format, ...) +{ + va_list va; + int l; + + switch (level) { + case NHRP_LOG_ERROR: + l = LOG_ERR; + break; + case NHRP_LOG_INFO: + l = LOG_INFO; + break; + case NHRP_LOG_DEBUG: + default: + l = LOG_DEBUG; + break; + } + + va_start(va, format); + vsyslog(l, format, va); + va_end(va); +} + +void nhrp_perror(const char *message) +{ + nhrp_error("%s: %s", message, strerror(errno)); +} diff --git a/patches/ipsec-tools-0.7.diff b/patches/ipsec-tools-0.7.diff new file mode 100644 index 0000000..1efba6c --- /dev/null +++ b/patches/ipsec-tools-0.7.diff @@ -0,0 +1,1832 @@ +Index: ipsec-tools-cvs/src/racoon/pfkey.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/pfkey.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/pfkey.c 2008-01-04 15:18:21.000000000 +0200 +@@ -92,6 +92,7 @@ + #include "algorithm.h" + #include "sainfo.h" + #include "admin.h" ++#include "evt.h" + #include "privsep.h" + #include "strnames.h" + #include "backupsa.h" +@@ -1263,9 +1264,10 @@ + + /* turn off the timer for calling pfkey_timeover() */ + SCHED_KILL(iph2->sce); +- ++ + /* update status */ + iph2->status = PHASE2ST_ESTABLISHED; ++ evt_phase2(iph2, EVTT_PHASE2_UP, NULL); + + #ifdef ENABLE_STATS + gettimeofday(&iph2->end, NULL); +@@ -1636,7 +1638,6 @@ + struct ph2handle *iph2[MAXNESTEDSA]; + struct sockaddr *src, *dst; + int n; /* # of phase 2 handler */ +- int remoteid=0; + #ifdef HAVE_SECCTX + struct sadb_x_sec_ctx *m_sec_ctx; + #endif /* HAVE_SECCTX */ +@@ -1825,63 +1826,12 @@ + return -1; + } + +- plog(LLV_DEBUG, LOCATION, NULL, +- "new acquire %s\n", spidx2str(&sp_out->spidx)); +- +- /* get sainfo */ +- { +- vchar_t *idsrc, *iddst; +- +- idsrc = ipsecdoi_sockaddr2id((struct sockaddr *)&sp_out->spidx.src, +- sp_out->spidx.prefs, sp_out->spidx.ul_proto); +- if (idsrc == NULL) { +- plog(LLV_ERROR, LOCATION, NULL, +- "failed to get ID for %s\n", +- spidx2str(&sp_out->spidx)); +- delph2(iph2[n]); +- return -1; +- } +- iddst = ipsecdoi_sockaddr2id((struct sockaddr *)&sp_out->spidx.dst, +- sp_out->spidx.prefd, sp_out->spidx.ul_proto); +- if (iddst == NULL) { +- plog(LLV_ERROR, LOCATION, NULL, +- "failed to get ID for %s\n", +- spidx2str(&sp_out->spidx)); +- vfree(idsrc); +- delph2(iph2[n]); +- return -1; +- } +- { +- struct remoteconf *conf; +- conf = getrmconf(iph2[n]->dst); +- if (conf != NULL) +- remoteid=conf->ph1id; +- else{ +- plog(LLV_DEBUG, LOCATION, NULL, "Warning: no valid rmconf !\n"); +- remoteid=0; +- } +- } +- iph2[n]->sainfo = getsainfo(idsrc, iddst, NULL, remoteid); +- vfree(idsrc); +- vfree(iddst); +- if (iph2[n]->sainfo == NULL) { +- plog(LLV_ERROR, LOCATION, NULL, +- "failed to get sainfo.\n"); ++ if (isakmp_get_sainfo(iph2[n], sp_out, sp_in) < 0) { + delph2(iph2[n]); + return -1; +- /* XXX should use the algorithm list from register message */ + } + +- plog(LLV_DEBUG, LOCATION, NULL, +- "selected sainfo: %s\n", sainfo2str(iph2[n]->sainfo)); +- } + +- if (set_proposal_from_policy(iph2[n], sp_out, sp_in) < 0) { +- plog(LLV_ERROR, LOCATION, NULL, +- "failed to create saprop.\n"); +- delph2(iph2[n]); +- return -1; +- } + #ifdef HAVE_SECCTX + if (m_sec_ctx) { + set_secctx_in_proposal(iph2[n], spidx); +@@ -2814,7 +2764,7 @@ + struct sadb_msg buf, *newmsg; + int reallen; + int retry = 0; +- ++ + *lenp = -1; + do + { +@@ -2823,12 +2773,10 @@ + retry++; + } + while (*lenp < 0 && errno == EAGAIN && retry < 3); ++ + if (*lenp < 0) +- { +- if ( errno == EAGAIN ) *lenp = 0; /* non-fatal */ +- return NULL; /*fatal*/ +- } +- ++ return NULL; /*fatal*/ ++ + else if (*lenp < sizeof(buf)) + return NULL; + +Index: ipsec-tools-cvs/src/racoon/evt.h +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/evt.h 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/evt.h 2008-01-04 15:18:21.000000000 +0200 +@@ -4,6 +4,7 @@ + + /* + * Copyright (C) 2004 Emmanuel Dreyfus ++ * Copyright (C) 2007 Timo Teras + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -34,12 +35,27 @@ + #ifndef _EVT_H + #define _EVT_H + +-struct evtdump { +- size_t len; +- struct sockaddr_storage src; +- struct sockaddr_storage dst; +- time_t timestamp; +- int type; ++#ifdef ENABLE_ADMINPORT ++ ++struct evt_listener { ++ LIST_ENTRY(evt_listener) ll_chain; ++ LIST_ENTRY(evt_listener) fd_chain; ++ int fd; ++}; ++LIST_HEAD(evt_listener_list, evt_listener); ++#define EVT_LISTENER_LIST(x) struct evt_listener_list x; ++ ++struct ph1handle; ++struct ph2handle; ++ ++struct evt_common { ++ uint32_t ec_type; ++ time_t ec_timestamp; ++ ++ struct sockaddr_storage ec_ph1src; ++ struct sockaddr_storage ec_ph1dst; ++ u_int32_t ec_ph2msgid; ++ + /* + * Optionnal list of struct isakmp_data + * for type EVTT_ISAKMP_CFG_DONE +@@ -47,42 +63,48 @@ + }; + + /* type */ +-#define EVTT_UNSEPC 0 +-#define EVTT_PHASE1_UP 1 +-#define EVTT_PHASE1_DOWN 2 +-#define EVTT_XAUTH_SUCCESS 3 +-#define EVTT_ISAKMP_CFG_DONE 4 +-#define EVTT_PHASE2_UP 5 +-#define EVTT_PHASE2_DOWN 6 +-#define EVTT_DPD_TIMEOUT 7 +-#define EVTT_PEER_NO_RESPONSE 8 +-#define EVTT_PEER_DELETE 9 +-#define EVTT_RACOON_QUIT 10 +-#define EVTT_XAUTH_FAILED 11 +-#define EVTT_OVERFLOW 12 /* Event queue overflowed */ +-#define EVTT_PEERPH1AUTH_FAILED 13 +-#define EVTT_PEERPH1_NOPROP 14 /* NO_PROPOSAL_CHOSEN & friends */ +-#define EVTT_NO_ISAKMP_CFG 15 /* no need to wait for mode_cfg */ +- +-struct evt { +- struct evtdump *dump; +- TAILQ_ENTRY(evt) next; +-}; ++#define EVTT_RACOON_QUIT 0x0001 + +-TAILQ_HEAD(evtlist, evt); ++#define EVTT_PHASE1_UP 0x0100 ++#define EVTT_PHASE1_DOWN 0x0101 ++#define EVTT_PHASE1_NO_RESPONSE 0x0102 ++#define EVTT_PHASE1_NO_PROPOSAL 0x0103 ++#define EVTT_PHASE1_AUTH_FAILED 0x0104 ++#define EVTT_PHASE1_DPD_TIMEOUT 0x0105 ++#define EVTT_PHASE1_PEER_DELETED 0x0106 ++#define EVTT_PHASE1_MODE_CFG 0x0107 ++#define EVTT_PHASE1_XAUTH_SUCCESS 0x0108 ++#define EVTT_PHASE1_XAUTH_FAILED 0x0109 ++ ++#define EVTT_PHASE2_NO_PHASE1 0x0200 ++#define EVTT_PHASE2_UP 0x0201 ++#define EVTT_PHASE2_DOWN 0x0202 ++#define EVTT_PHASE2_NO_RESPONSE 0x0203 ++ ++void evt_generic __P((int type, vchar_t *optdata)); ++void evt_phase1 __P((const struct ph1handle *ph1, int type, vchar_t *optdata)); ++void evt_phase2 __P((const struct ph2handle *ph2, int type, vchar_t *optdata)); ++ ++int evt_subscribe __P((struct evt_listener_list *list, int fd)); ++void evt_list_init __P((struct evt_listener_list *list)); ++void evt_list_cleanup __P((struct evt_listener_list *list)); ++int evt_get_fdmask __P((int nfds, fd_set *fdset)); ++void evt_handle_fdmask __P((fd_set *fdset)); + +-#define EVTLIST_MAX 32 ++#else + +-#ifdef ENABLE_ADMINPORT +-struct evtdump *evt_pop(void); +-vchar_t *evt_dump(void); +-void evt_push(struct sockaddr *, struct sockaddr *, int, vchar_t *); +-#endif ++#define EVT_LISTENER_LIST(x) + +-#ifdef ENABLE_ADMINPORT +-#define EVT_PUSH(src, dst, type, optdata) evt_push(src, dst, type, optdata); +-#else +-#define EVT_PUSH(src, dst, type, optdata) ; +-#endif ++#define evt_generic(type, optdata) ; ++#define evt_phase1(ph1, type, optdata) ; ++#define evt_phase2(ph2, type, optdata) ; ++ ++#define evt_subscribe(eventlist, fd) ; ++#define evt_list_init(eventlist) ; ++#define evt_list_cleanup(eventlist) ; ++#define evt_get_fdmask(nfds, fdset) nfds ++#define evt_handle_fdmask(fdset) ; ++ ++#endif /* ENABLE_ADMINPORT */ + + #endif /* _EVT_H */ +Index: ipsec-tools-cvs/src/racoon/evt.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/evt.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/evt.c 2008-01-04 15:18:21.000000000 +0200 +@@ -4,6 +4,7 @@ + + /* + * Copyright (C) 2004 Emmanuel Dreyfus ++ * Copyright (C) 2007 Timo Teras + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -46,113 +47,219 @@ + #include "plog.h" + #include "misc.h" + #include "admin.h" ++#include "handler.h" + #include "gcmalloc.h" + #include "evt.h" + + #ifdef ENABLE_ADMINPORT +-struct evtlist evtlist = TAILQ_HEAD_INITIALIZER(evtlist); +-int evtlist_len = 0; + +-void +-evt_push(src, dst, type, optdata) +- struct sockaddr *src; +- struct sockaddr *dst; ++static EVT_LISTENER_LIST(evt_listeners); ++static EVT_LISTENER_LIST(evt_fds); ++ ++struct evtdump { ++ struct admin_com adm; ++ struct evt_common evt; ++}; ++ ++static struct evtdump * ++evtdump_create(type, optdata) + int type; + vchar_t *optdata; + { +- struct evtdump *evtdump; +- struct evt *evt; ++ struct evtdump *e; + size_t len; + +- /* If admin socket is disabled, silently discard anything */ +- if (adminsock_path == NULL) +- return; ++ len = sizeof(struct admin_com) + sizeof(struct evt_common); ++ if (optdata != NULL) ++ len += optdata->l; + +- /* If we are above the limit, don't record anything */ +- if (evtlist_len > EVTLIST_MAX) { +- plog(LLV_DEBUG, LOCATION, NULL, +- "Cannot record event: event queue overflowed\n"); +- return; ++ if ((e = racoon_malloc(len)) == NULL) { ++ plog(LLV_ERROR, LOCATION, NULL, "Cannot allocate event: %s\n", ++ strerror(errno)); ++ return NULL; + } + +- /* If we hit the limit, record an overflow event instead */ +- if (evtlist_len == EVTLIST_MAX) { +- plog(LLV_ERROR, LOCATION, NULL, +- "Cannot record event: event queue overflow\n"); +- src = NULL; +- dst = NULL; +- type = EVTT_OVERFLOW; +- optdata = NULL; ++ memset(e, 0, sizeof(struct evtdump)); ++ e->adm.ac_len = len; ++ e->adm.ac_cmd = ADMIN_SHOW_EVT; ++ e->adm.ac_errno = 0; ++ e->adm.ac_proto = 0; ++ e->evt.ec_type = type; ++ time(&e->evt.ec_timestamp); ++ if (optdata != NULL) ++ memcpy(e + 1, optdata->v, optdata->l); ++ ++ return e; ++} ++ ++static void ++evt_unsubscribe(l) ++ struct evt_listener *l; ++{ ++ plog(LLV_DEBUG, LOCATION, NULL, ++ "[%d] admin connection released\n", l->fd); ++ ++ LIST_REMOVE(l, ll_chain); ++ LIST_REMOVE(l, fd_chain); ++ close(l->fd); ++ racoon_free(l); ++} ++ ++static void ++evtdump_broadcast(ll, e) ++ const struct evt_listener_list *ll; ++ struct evtdump *e; ++{ ++ struct evt_listener *l, *nl; ++ ++ for (l = LIST_FIRST(ll); l != NULL; l = nl) { ++ nl = LIST_NEXT(l, ll_chain); ++ ++ if (send(l->fd, e, e->adm.ac_len, ++ MSG_NOSIGNAL | MSG_DONTWAIT) < 0) { ++ plog(LLV_DEBUG, LOCATION, NULL, "Cannot send event to fd: %s\n", ++ strerror(errno)); ++ evt_unsubscribe(l); ++ } + } ++} + +- len = sizeof(*evtdump); +- if (optdata) +- len += optdata->l; ++void ++evt_generic(type, optdata) ++ int type; ++ vchar_t *optdata; ++{ ++ struct evtdump *e; + +- if ((evtdump = racoon_malloc(len)) == NULL) { +- plog(LLV_ERROR, LOCATION, NULL, "Cannot record event: %s\n", +- strerror(errno)); ++ if ((e = evtdump_create(type, optdata)) == NULL) + return; +- } + +- if ((evt = racoon_malloc(sizeof(*evt))) == NULL) { +- plog(LLV_ERROR, LOCATION, NULL, "Cannot record event: %s\n", +- strerror(errno)); +- racoon_free(evtdump); ++ evtdump_broadcast(&evt_listeners, e); ++ ++ racoon_free(e); ++} ++ ++void ++evt_phase1(ph1, type, optdata) ++ const struct ph1handle *ph1; ++ int type; ++ vchar_t *optdata; ++{ ++ struct evtdump *e; ++ ++ if ((e = evtdump_create(type, optdata)) == NULL) + return; ++ ++ if (ph1->local) ++ memcpy(&e->evt.ec_ph1src, ph1->local, sysdep_sa_len(ph1->local)); ++ if (ph1->remote) ++ memcpy(&e->evt.ec_ph1dst, ph1->remote, sysdep_sa_len(ph1->remote)); ++ ++ evtdump_broadcast(&ph1->evt_listeners, e); ++ evtdump_broadcast(&evt_listeners, e); ++ ++ racoon_free(e); ++} ++ ++void ++evt_phase2(ph2, type, optdata) ++ const struct ph2handle *ph2; ++ int type; ++ vchar_t *optdata; ++{ ++ struct evtdump *e; ++ struct ph1handle *ph1 = ph2->ph1; ++ ++ if ((e = evtdump_create(type, optdata)) == NULL) ++ return; ++ ++ if (ph1) { ++ if (ph1->local) ++ memcpy(&e->evt.ec_ph1src, ph1->local, sysdep_sa_len(ph1->local)); ++ if (ph1->remote) ++ memcpy(&e->evt.ec_ph1dst, ph1->remote, sysdep_sa_len(ph1->remote)); ++ } ++ e->evt.ec_ph2msgid = ph2->msgid; ++ ++ evtdump_broadcast(&ph2->evt_listeners, e); ++ if (ph1) ++ evtdump_broadcast(&ph1->evt_listeners, e); ++ evtdump_broadcast(&evt_listeners, e); ++ ++ racoon_free(e); ++} ++ ++int ++evt_subscribe(list, fd) ++ struct evt_listener_list *list; ++ int fd; ++{ ++ struct evt_listener *l; ++ ++ if ((l = racoon_malloc(sizeof(*l))) == NULL) { ++ plog(LLV_ERROR, LOCATION, NULL, ++ "Cannot allocate event listener: %s\n", ++ strerror(errno)); ++ return errno; + } + +- if (src) +- memcpy(&evtdump->src, src, sysdep_sa_len(src)); +- if (dst) +- memcpy(&evtdump->dst, dst, sysdep_sa_len(dst)); +- evtdump->len = len; +- evtdump->type = type; +- time(&evtdump->timestamp); ++ if (list == NULL) ++ list = &evt_listeners; + +- if (optdata) +- memcpy(evtdump + 1, optdata->v, optdata->l); ++ LIST_INSERT_HEAD(list, l, ll_chain); ++ LIST_INSERT_HEAD(&evt_fds, l, fd_chain); ++ l->fd = fd; + +- evt->dump = evtdump; +- TAILQ_INSERT_TAIL(&evtlist, evt, next); ++ plog(LLV_DEBUG, LOCATION, NULL, ++ "[%d] admin connection is polling events\n", fd); + +- evtlist_len++; ++ return -2; ++} + +- return; ++void ++evt_list_init(list) ++ struct evt_listener_list *list; ++{ ++ LIST_INIT(list); + } + +-struct evtdump * +-evt_pop(void) { +- struct evtdump *evtdump; +- struct evt *evt; ++void ++evt_list_cleanup(list) ++ struct evt_listener_list *list; ++{ ++ while (!LIST_EMPTY(list)) ++ evt_unsubscribe(LIST_FIRST(list)); ++} + +- if ((evt = TAILQ_FIRST(&evtlist)) == NULL) +- return NULL; ++int ++evt_get_fdmask(nfds, fdset) ++ int nfds; ++ fd_set *fdset; ++{ ++ struct evt_listener *l; + +- evtdump = evt->dump; +- TAILQ_REMOVE(&evtlist, evt, next); +- racoon_free(evt); +- evtlist_len--; +- +- return evtdump; +-} +- +-vchar_t * +-evt_dump(void) { +- struct evtdump *evtdump; +- vchar_t *buf = NULL; +- +- if ((evtdump = evt_pop()) != NULL) { +- if ((buf = vmalloc(evtdump->len)) == NULL) { +- plog(LLV_ERROR, LOCATION, NULL, +- "evt_dump failed: %s\n", strerror(errno)); +- return NULL; +- } +- memcpy(buf->v, evtdump, evtdump->len); +- racoon_free(evtdump); ++ LIST_FOREACH(l, &evt_fds, fd_chain) { ++ FD_SET(l->fd, fdset); ++ if (l->fd + 1 > nfds) ++ nfds = l->fd + 1; + } + +- return buf; ++ return nfds; + } + ++void ++evt_handle_fdmask(fdset) ++ fd_set *fdset; ++{ ++ struct evt_listener *l, *nl; ++ ++ for (l = LIST_FIRST(&evt_fds); l != NULL; l = nl) { ++ nl = LIST_NEXT(l, ll_chain); ++ ++ if (FD_ISSET(l->fd, fdset)) ++ evt_unsubscribe(l); ++ } ++} ++ ++ + #endif /* ENABLE_ADMINPORT */ +Index: ipsec-tools-cvs/src/racoon/handler.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/handler.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/handler.c 2008-01-04 15:18:21.000000000 +0200 +@@ -267,6 +267,7 @@ + iph1->dpd_fails = 0; + iph1->dpd_r_u = NULL; + #endif ++ evt_list_init(&iph1->evt_listeners); + + return iph1; + } +@@ -283,8 +284,7 @@ + + /* SA down shell script hook */ + script_hook(iph1, SCRIPT_PHASE1_DOWN); +- +- EVT_PUSH(iph1->local, iph1->remote, EVTT_PHASE1_DOWN, NULL); ++ evt_list_cleanup(&iph1->evt_listeners); + + #ifdef ENABLE_NATT + if (iph1->natt_flags & NAT_KA_QUEUED) +@@ -489,8 +489,8 @@ + + LIST_FOREACH(p, &ph2tree, chain) { + if (spid == p->spid && +- CMPSADDR(src, p->src) == 0 && +- CMPSADDR(dst, p->dst) == 0){ ++ cmpsaddrwild(src, p->src) == 0 && ++ cmpsaddrwild(dst, p->dst) == 0){ + /* Sanity check to detect zombie handlers + * XXX Sould be done "somewhere" more interesting, + * because we have lots of getph2byxxxx(), but this one +@@ -576,6 +576,7 @@ + return NULL; + + iph2->status = PHASE1ST_SPAWN; ++ evt_list_init(&iph2->evt_listeners); + + return iph2; + } +@@ -589,6 +590,8 @@ + initph2(iph2) + struct ph2handle *iph2; + { ++ evt_list_cleanup(&iph2->evt_listeners); ++ + sched_scrub_param(iph2); + iph2->sce = NULL; + iph2->scr = NULL; +Index: ipsec-tools-cvs/src/racoon/isakmp_agg.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/isakmp_agg.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/isakmp_agg.c 2008-01-04 15:18:21.000000000 +0200 +@@ -587,8 +587,7 @@ + /* message printed inner oakley_validate_auth() */ + goto end; + } +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_PEERPH1AUTH_FAILED, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_AUTH_FAILED, NULL); + isakmp_info_send_n1(iph1, ptype, NULL); + goto end; + } +@@ -1486,8 +1485,7 @@ + /* message printed inner oakley_validate_auth() */ + goto end; + } +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_PEERPH1AUTH_FAILED, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_AUTH_FAILED, NULL); + isakmp_info_send_n1(iph1, ptype, NULL); + goto end; + } +Index: ipsec-tools-cvs/src/racoon/isakmp_base.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/isakmp_base.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/isakmp_base.c 2008-01-04 15:18:21.000000000 +0200 +@@ -716,8 +716,7 @@ + /* message printed inner oakley_validate_auth() */ + goto end; + } +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_PEERPH1AUTH_FAILED, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_AUTH_FAILED, NULL); + isakmp_info_send_n1(iph1, ptype, NULL); + goto end; + } +@@ -1242,8 +1241,7 @@ + /* message printed inner oakley_validate_auth() */ + goto end; + } +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_PEERPH1AUTH_FAILED, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_AUTH_FAILED, NULL); + isakmp_info_send_n1(iph1, ptype, NULL); + goto end; + } +Index: ipsec-tools-cvs/src/racoon/isakmp.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/isakmp.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/isakmp.c 2008-01-04 15:18:21.000000000 +0200 +@@ -88,6 +88,9 @@ + #include "pfkey.h" + #include "crypto_openssl.h" + #include "policy.h" ++#include "algorithm.h" ++#include "proposal.h" ++#include "sainfo.h" + #include "isakmp_ident.h" + #include "isakmp_agg.h" + #include "isakmp_base.h" +@@ -1026,7 +1029,7 @@ + } + + /* new negotiation of phase 1 for initiator */ +-int ++struct ph1handle * + isakmp_ph1begin_i(rmconf, remote, local) + struct remoteconf *rmconf; + struct sockaddr *remote, *local; +@@ -1039,7 +1042,7 @@ + /* get new entry to isakmp status table. */ + iph1 = newph1(); + if (iph1 == NULL) +- return -1; ++ return NULL; + + iph1->status = PHASE1ST_START; + iph1->rmconf = rmconf; +@@ -1055,7 +1058,7 @@ + if ((iph1->mode_cfg = isakmp_cfg_mkstate()) == NULL) { + remph1(iph1); + delph1(iph1); +- return -1; ++ return NULL; + } + #endif + #ifdef ENABLE_FRAG +@@ -1072,7 +1075,7 @@ + if (copy_ph1addresses(iph1, rmconf, remote, local) < 0) { + remph1(iph1); + delph1(iph1); +- return -1; ++ return NULL; + } + + (void)insph1(iph1); +@@ -1108,7 +1111,7 @@ + remph1(iph1); + delph1(iph1); + +- return -1; ++ return NULL; + } + + #ifdef ENABLE_STATS +@@ -1119,7 +1122,7 @@ + timedelta(&start, &end)); + #endif + +- return 0; ++ return iph1; + } + + /* new negotiation of phase 1 for responder */ +@@ -1929,8 +1932,7 @@ + plog(LLV_ERROR, LOCATION, NULL, + "phase1 negotiation failed due to time up. %s\n", + isakmp_pindex(&iph1->index, iph1->msgid)); +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_PEER_NO_RESPONSE, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_NO_RESPONSE, NULL); + + return -1; + } +@@ -1939,8 +1941,7 @@ + plog(LLV_ERROR, LOCATION, NULL, + "phase1 negotiation failed due to send error. %s\n", + isakmp_pindex(&iph1->index, iph1->msgid)); +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_PEER_NO_RESPONSE, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_NO_RESPONSE, NULL); + return -1; + } + +@@ -1989,7 +1990,7 @@ + plog(LLV_ERROR, LOCATION, NULL, + "phase2 negotiation failed due to time up. %s\n", + isakmp_pindex(&iph2->ph1->index, iph2->msgid)); +- EVT_PUSH(iph2->src, iph2->dst, EVTT_PEER_NO_RESPONSE, NULL); ++ evt_phase2(iph2, EVTT_PHASE2_NO_RESPONSE, NULL); + unbindph12(iph2); + return -1; + } +@@ -1998,8 +1999,7 @@ + plog(LLV_ERROR, LOCATION, NULL, + "phase2 negotiation failed due to send error. %s\n", + isakmp_pindex(&iph2->ph1->index, iph2->msgid)); +- EVT_PUSH(iph2->src, iph2->dst, EVTT_PEER_NO_RESPONSE, NULL); +- ++ evt_phase2(iph2, EVTT_PHASE2_NO_RESPONSE, NULL); + return -1; + } + +@@ -2090,7 +2090,7 @@ + plog(LLV_INFO, LOCATION, NULL, + "ISAKMP-SA deleted %s-%s spi:%s\n", + src, dst, isakmp_pindex(&iph1->index, 0)); +- EVT_PUSH(iph1->local, iph1->remote, EVTT_PHASE1_DOWN, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_DOWN, NULL); + racoon_free(src); + racoon_free(dst); + +@@ -2237,7 +2237,7 @@ + saddrwop2str(iph2->dst)); + + /* start phase 1 negotiation as a initiator. */ +- if (isakmp_ph1begin_i(rmconf, iph2->dst, iph2->src) < 0) { ++ if (isakmp_ph1begin_i(rmconf, iph2->dst, iph2->src) == NULL) { + SCHED_KILL(sc); + return -1; + } +@@ -2270,6 +2270,71 @@ + return 0; + } + ++int ++isakmp_get_sainfo(iph2, sp_out, sp_in) ++ struct ph2handle *iph2; ++ struct secpolicy *sp_out, *sp_in; ++{ ++ int remoteid=0; ++ ++ plog(LLV_DEBUG, LOCATION, NULL, ++ "new acquire %s\n", spidx2str(&sp_out->spidx)); ++ ++ /* get sainfo */ ++ { ++ vchar_t *idsrc, *iddst; ++ ++ idsrc = ipsecdoi_sockaddr2id((struct sockaddr *)&sp_out->spidx.src, ++ sp_out->spidx.prefs, sp_out->spidx.ul_proto); ++ if (idsrc == NULL) { ++ plog(LLV_ERROR, LOCATION, NULL, ++ "failed to get ID for %s\n", ++ spidx2str(&sp_out->spidx)); ++ return -1; ++ } ++ iddst = ipsecdoi_sockaddr2id((struct sockaddr *)&sp_out->spidx.dst, ++ sp_out->spidx.prefd, sp_out->spidx.ul_proto); ++ if (iddst == NULL) { ++ plog(LLV_ERROR, LOCATION, NULL, ++ "failed to get ID for %s\n", ++ spidx2str(&sp_out->spidx)); ++ vfree(idsrc); ++ return -1; ++ } ++ { ++ struct remoteconf *conf; ++ conf = getrmconf(iph2->dst); ++ if (conf != NULL) ++ remoteid=conf->ph1id; ++ else{ ++ plog(LLV_DEBUG, LOCATION, NULL, "Warning: no valid rmconf !\n"); ++ remoteid=0; ++ } ++ } ++ iph2->sainfo = getsainfo(idsrc, iddst, NULL, remoteid); ++ vfree(idsrc); ++ vfree(iddst); ++ if (iph2->sainfo == NULL) { ++ plog(LLV_ERROR, LOCATION, NULL, ++ "failed to get sainfo.\n"); ++ return -1; ++ /* XXX should use the algorithm list from register message */ ++ } ++ ++ plog(LLV_DEBUG, LOCATION, NULL, ++ "selected sainfo: %s\n", sainfo2str(iph2->sainfo)); ++ } ++ ++ if (set_proposal_from_policy(iph2, sp_out, sp_in) < 0) { ++ plog(LLV_ERROR, LOCATION, NULL, ++ "failed to create saprop.\n"); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++ + /* + * receive GETSPI from kernel. + */ +@@ -3021,9 +3086,9 @@ + src, dst, + isakmp_pindex(&iph1->index, 0)); + +- EVT_PUSH(iph1->local, iph1->remote, EVTT_PHASE1_UP, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_UP, NULL); + if(!iph1->rmconf->mode_cfg) +- EVT_PUSH(iph1->local, iph1->remote, EVTT_NO_ISAKMP_CFG, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_MODE_CFG, NULL); + + racoon_free(src); + racoon_free(dst); +Index: ipsec-tools-cvs/src/racoon/isakmp_cfg.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/isakmp_cfg.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/isakmp_cfg.c 2008-01-04 15:18:21.000000000 +0200 +@@ -473,8 +473,7 @@ + "Cannot allocate memory: %s\n", strerror(errno)); + } else { + memcpy(buf->v, attrpl + 1, buf->l); +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_ISAKMP_CFG_DONE, buf); ++ evt_phase1(iph1, EVTT_PHASE1_MODE_CFG, buf); + vfree(buf); + } + } +Index: ipsec-tools-cvs/src/racoon/isakmp_ident.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/isakmp_ident.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/isakmp_ident.c 2008-01-04 15:18:21.000000000 +0200 +@@ -788,8 +788,7 @@ + /* msg printed inner oakley_validate_auth() */ + goto end; + } +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_PEERPH1AUTH_FAILED, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_AUTH_FAILED, NULL); + isakmp_info_send_n1(iph1, type, NULL); + goto end; + } +@@ -1537,8 +1536,7 @@ + /* msg printed inner oakley_validate_auth() */ + goto end; + } +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_PEERPH1AUTH_FAILED, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_AUTH_FAILED, NULL); + isakmp_info_send_n1(iph1, type, NULL); + goto end; + } +Index: ipsec-tools-cvs/src/racoon/isakmp_inf.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/isakmp_inf.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/isakmp_inf.c 2008-01-04 15:18:21.000000000 +0200 +@@ -515,8 +515,7 @@ + del_ph1=getph1byindex((isakmp_index *)(delete + 1)); + if(del_ph1 != NULL){ + +- EVT_PUSH(del_ph1->local, del_ph1->remote, +- EVTT_PEERPH1_NOPROP, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_PEER_DELETED, NULL); + if (del_ph1->scr) + SCHED_KILL(del_ph1->scr); + +@@ -537,8 +536,6 @@ + delete->spi_size, delete->proto_id); + return 0; + } +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_PEER_DELETE, NULL); + purge_ipsec_spi(iph1->remote, delete->proto_id, + (u_int32_t *)(delete + 1), num_spi); + break; +@@ -1615,7 +1612,7 @@ + plog(LLV_DEBUG, LOCATION, iph1->remote, "DPD monitoring....\n"); + + if (iph1->dpd_fails >= iph1->rmconf->dpd_maxfails) { +- EVT_PUSH(iph1->local, iph1->remote, EVTT_DPD_TIMEOUT, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_DPD_TIMEOUT, NULL); + purge_remote(iph1); + plog(LLV_DEBUG, LOCATION, iph1->remote, + "DPD: remote seems to be dead\n"); +Index: ipsec-tools-cvs/src/racoon/isakmp_xauth.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/isakmp_xauth.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/isakmp_xauth.c 2008-01-04 15:18:21.000000000 +0200 +@@ -1570,13 +1570,11 @@ + plog(LLV_ERROR, LOCATION, NULL, + "Xauth authentication failed\n"); + +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_XAUTH_FAILED, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_XAUTH_FAILED, NULL); + + iph1->mode_cfg->flags |= ISAKMP_CFG_DELETE_PH1; + } else { +- EVT_PUSH(iph1->local, iph1->remote, +- EVTT_XAUTH_SUCCESS, NULL); ++ evt_phase1(iph1, EVTT_PHASE1_XAUTH_SUCCESS, NULL); + } + + +Index: ipsec-tools-cvs/src/racoon/session.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/session.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/session.c 2008-01-04 15:18:21.000000000 +0200 +@@ -192,6 +192,7 @@ + /* scheduling */ + timeout = schedular(); + ++ nfds = evt_get_fdmask(nfds, &rfds); + error = select(nfds, &rfds, (fd_set *)0, (fd_set *)0, timeout); + if (error < 0) { + switch (errno) { +@@ -211,6 +212,7 @@ + (FD_ISSET(lcconf->sock_admin, &rfds))) + admin_handler(); + #endif ++ evt_handle_fdmask(&rfds); + + for (p = lcconf->myaddrs; p; p = p->next) { + if (!p->addr) +@@ -451,7 +453,7 @@ + case SIGTERM: + plog(LLV_INFO, LOCATION, NULL, + "caught signal %d\n", sig); +- EVT_PUSH(NULL, NULL, EVTT_RACOON_QUIT, NULL); ++ evt_generic(EVTT_RACOON_QUIT, NULL); + pfkey_send_flush(lcconf->sock_pfkey, + SADB_SATYPE_UNSPEC); + #ifdef ENABLE_FASTQUIT +Index: ipsec-tools-cvs/src/racoon/handler.h +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/handler.h 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/handler.h 2008-01-04 15:18:21.000000000 +0200 +@@ -41,6 +41,7 @@ + + #include "isakmp_var.h" + #include "oakley.h" ++#include "evt.h" + + /* Phase 1 handler */ + /* +@@ -211,7 +212,7 @@ + #ifdef ENABLE_HYBRID + struct isakmp_cfg_state *mode_cfg; /* ISAKMP mode config state */ + #endif +- ++ EVT_LISTENER_LIST(evt_listeners); + }; + + /* Phase 2 handler */ +@@ -320,6 +321,7 @@ + + LIST_ENTRY(ph2handle) chain; + LIST_ENTRY(ph2handle) ph1bind; /* chain to ph1handle */ ++ EVT_LISTENER_LIST(evt_listeners); + }; + + /* +Index: ipsec-tools-cvs/src/racoon/isakmp_var.h +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/isakmp_var.h 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/isakmp_var.h 2008-01-04 15:18:21.000000000 +0200 +@@ -35,6 +35,7 @@ + #define _ISAKMP_VAR_H + + #include "vmbuf.h" ++#include "policy.h" + + #define PORT_ISAKMP 500 + #define PORT_ISAKMP_NATT 4500 +@@ -62,8 +63,8 @@ + struct isakmp_pl_nonce; /* XXX */ + + extern int isakmp_handler __P((int)); +-extern int isakmp_ph1begin_i __P((struct remoteconf *, struct sockaddr *, +- struct sockaddr *)); ++extern struct ph1handle *isakmp_ph1begin_i __P((struct remoteconf *, ++ struct sockaddr *, struct sockaddr *)); + + extern vchar_t *isakmp_parsewoh __P((int, struct isakmp_gen *, int)); + extern vchar_t *isakmp_parse __P((vchar_t *)); +@@ -87,6 +88,7 @@ + extern void isakmp_ph2delete_stub __P((void *)); + extern void isakmp_ph2delete __P((struct ph2handle *)); + ++extern int isakmp_get_sainfo __P((struct ph2handle *, struct secpolicy *, struct secpolicy *)); + extern int isakmp_post_acquire __P((struct ph2handle *)); + extern int isakmp_post_getspi __P((struct ph2handle *)); + extern void isakmp_chkph1there_stub __P((void *)); +Index: ipsec-tools-cvs/src/racoon/racoonctl.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/racoonctl.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/racoonctl.c 2008-01-04 15:18:21.000000000 +0200 +@@ -4,6 +4,7 @@ + + /* + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. ++ * Copyright (C) 2007 Timo Teras. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -135,26 +136,24 @@ + struct evtmsg { + int type; + char *msg; +- enum { UNSPEC, ERROR, INFO } level; + } evtmsg[] = { +- { EVTT_PHASE1_UP, "Phase 1 established", INFO }, +- { EVTT_PHASE1_DOWN, "Phase 1 deleted", INFO }, +- { EVTT_XAUTH_SUCCESS, "Xauth exchange passed", INFO }, +- { EVTT_ISAKMP_CFG_DONE, "ISAKMP mode config done", INFO }, +- { EVTT_PHASE2_UP, "Phase 2 established", INFO }, +- { EVTT_PHASE2_DOWN, "Phase 2 deleted", INFO }, +- { EVTT_DPD_TIMEOUT, "Peer not reachable anymore", ERROR }, +- { EVTT_PEER_NO_RESPONSE, "Peer not responding", ERROR }, +- { EVTT_PEER_DELETE, "Peer terminated security association", ERROR }, +- { EVTT_RACOON_QUIT, "Raccon terminated", ERROR }, +- { EVTT_OVERFLOW, "Event queue overflow", ERROR }, +- { EVTT_XAUTH_FAILED, "Xauth exchange failed", ERROR }, +- { EVTT_PEERPH1AUTH_FAILED, "Peer failed phase 1 authentication " +- "(certificate problem?)", ERROR }, +- { EVTT_PEERPH1_NOPROP, "Peer failed phase 1 initiation " +- "(proposal problem?)", ERROR }, +- { 0, NULL, UNSPEC }, +- { EVTT_NO_ISAKMP_CFG, "No need for ISAKMP mode config ", INFO }, ++ { EVTT_RACOON_QUIT, "Racoon terminated" }, ++ ++ { EVTT_PHASE1_UP, "Phase 1 established" }, ++ { EVTT_PHASE1_DOWN, "Phase 1 deleted" }, ++ { EVTT_PHASE1_NO_RESPONSE, "Phase 1 error: peer not responding" }, ++ { EVTT_PHASE1_NO_PROPOSAL, "Phase 1 error: no proposal chosen" }, ++ { EVTT_PHASE1_AUTH_FAILED, ++ "Phase 1 error: authentication failed (bad certificate?)" }, ++ { EVTT_PHASE1_DPD_TIMEOUT, "Phase 1 error: dead peer detected" }, ++ { EVTT_PHASE1_MODE_CFG, "Phase 1 mode configuration done" }, ++ { EVTT_PHASE1_XAUTH_SUCCESS, "Phase 1 Xauth succeeded" }, ++ { EVTT_PHASE1_XAUTH_FAILED, "Phase 1 Xauth failed" }, ++ ++ { EVTT_PHASE2_NO_PHASE1, "Phase 2 error: no suitable phase 1" }, ++ { EVTT_PHASE2_UP, "Phase 2 established" }, ++ { EVTT_PHASE2_DOWN, "Phase 2 deleted" }, ++ { EVTT_PHASE2_NO_RESPONSE, "Phase 2 error: no response" }, + }; + + static int get_proto __P((char *)); +@@ -184,6 +183,7 @@ + { IPPROTO_ICMP, "icmp" }, + { IPPROTO_TCP, "tcp" }, + { IPPROTO_UDP, "udp" }, ++ { IPPROTO_GRE, "gre" }, + { 0, NULL }, + }; + +@@ -193,31 +193,13 @@ + + char *pname; + int long_format = 0; +- +-#define EVTF_NONE 0x0000 /* Ignore any events */ +-#define EVTF_LOOP 0x0001 /* Loop awaiting for new events */ +-#define EVTF_CFG_STOP 0x0002 /* Stop after ISAKMP mode config */ +-#define EVTF_CFG 0x0004 /* Print ISAKMP mode config info */ +-#define EVTF_ALL 0x0008 /* Print any events */ +-#define EVTF_PURGE 0x0010 /* Print all available events */ +-#define EVTF_PH1DOWN_STOP 0x0020 /* Stop when phase 1 SA gets down */ +-#define EVTF_PH1DOWN 0x0040 /* Print that phase 1 SA got down */ +-#define EVTF_ERR 0x0080 /* Print any error */ +-#define EVTF_ERR_STOP 0x0100 /* Stop on any error */ +- +-int evt_filter = EVTF_NONE; +-time_t evt_start; ++int evt_quit_event = 0; + + void dump_isakmp_sa __P((char *, int)); + void dump_internal __P((char *, int)); + char *pindex_isakmp __P((isakmp_index *)); + void print_schedule __P((caddr_t, int)); +-void print_evt __P((caddr_t, int)); +-void print_cfg __P((caddr_t, int)); +-void print_err __P((caddr_t, int)); +-void print_ph1down __P((caddr_t, int)); +-void print_ph1up __P((caddr_t, int)); +-int evt_poll __P((void)); ++void print_evt __P((struct evt_common *)); + char * fixed_addr __P((char *, char *, int)); + + static void +@@ -226,12 +208,15 @@ + printf( + "Usage:\n" + " %s reload-config\n" ++" %s show-schedule\n" + " %s [-l [-l]] show-sa [protocol]\n" + " %s flush-sa [protocol]\n" + " %s delete-sa <saopts>\n" +-" %s establish-sa [-u identity] <saopts>\n" ++" %s establish-sa [-u identity] [-w] <saopts>\n" + " %s vpn-connect [-u identity] vpn_gateway\n" + " %s vpn-disconnect vpn_gateway\n" ++" %s show-event\n" ++" %s logout-user login\n" + "\n" + " <protocol>: \"isakmp\", \"esp\" or \"ah\".\n" + " In the case of \"show-sa\" or \"flush-sa\", you can use \"ipsec\".\n" +@@ -240,8 +225,8 @@ + " : {\"esp\",\"ah\"} <family> <src/prefixlen/port> <dst/prefixlen/port>\n" + " <ul_proto>\n" + " <family>: \"inet\" or \"inet6\"\n" +-" <ul_proto>: \"icmp\", \"tcp\", \"udp\" or \"any\"\n", +- pname, pname, pname, pname, pname, pname, pname); ++" <ul_proto>: \"icmp\", \"tcp\", \"udp\", \"gre\" or \"any\"\n", ++ pname, pname, pname, pname, pname, pname, pname, pname, pname, pname); + } + + /* +@@ -312,54 +297,24 @@ + + vfree(combuf); + +- if (com_recv(&combuf) != 0) +- goto bad; +- if (handle_recv(combuf) != 0) +- goto bad; +- +- vfree(combuf); ++ do { ++ if (com_recv(&combuf) != 0) ++ goto bad; ++ if (handle_recv(combuf) != 0) ++ goto bad; ++ vfree(combuf); ++ } while (evt_quit_event != 0); + +- if (evt_filter != EVTF_NONE) +- if (evt_poll() != 0) +- goto bad; +- ++ close(so); + exit(0); + +- bad: ++bad: ++ close(so); ++ if (errno == EEXIST) ++ exit(0); + exit(1); + } + +-int +-evt_poll(void) { +- struct timeval tv; +- vchar_t *recvbuf; +- vchar_t *sendbuf; +- +- if ((sendbuf = f_getevt(0, NULL)) == NULL) +- errx(1, "Cannot make combuf"); +- +- +- while (evt_filter & (EVTF_LOOP|EVTF_PURGE)) { +- /* handle_recv closes the socket time, so open it each time */ +- com_init(); +- +- if (com_send(sendbuf) != 0) +- errx(1, "Cannot send combuf"); +- +- if (com_recv(&recvbuf) == 0) { +- handle_recv(recvbuf); +- vfree(recvbuf); +- } +- +- tv.tv_sec = 0; +- tv.tv_usec = 10; +- (void)select(0, NULL, NULL, NULL, &tv); +- } +- +- vfree(sendbuf); +- return 0; +-} +- + /* %%% */ + /* + * return command buffer. +@@ -422,20 +377,8 @@ + vchar_t *buf; + struct admin_com *head; + +- /* +- * There are 3 ways of getting here +- * 1) racoonctl vc => evt_filter = (EVTF_LOOP|EVTF_CFG| ... ) +- * 2) racoonctl es => evt_filter = EVTF_NONE +- * 3) racoonctl es -l => evt_filter = EVTF_LOOP +- * Catch the second case: show-event is here to purge all +- */ +- if (evt_filter == EVTF_NONE) +- evt_filter = (EVTF_ALL|EVTF_PURGE); +- +- if ((ac >= 1) && (strcmp(av[0], "-l") == 0)) +- evt_filter |= EVTF_LOOP; +- +- if (ac >= 2) ++ evt_quit_event = -1; ++ if (ac >= 1) + errx(1, "too many arguments"); + + buf = vmalloc(sizeof(*head)); +@@ -653,6 +596,7 @@ + char *id = NULL; + char *key = NULL; + struct admin_com_psk *acp; ++ int wait = 0; + + if (ac < 1) + errx(1, "insufficient arguments"); +@@ -673,6 +617,12 @@ + ac -= 2; + } + ++ if (ac >= 1 && strcmp(av[0], "-w") == 0) { ++ wait = 1; ++ av++; ++ ac--; ++ } ++ + /* need protocol */ + if (ac < 1) + errx(1, "insufficient arguments"); +@@ -687,12 +637,16 @@ + index = get_index(ac, av); + if (index == NULL) + return NULL; ++ if (wait) ++ evt_quit_event = EVTT_PHASE1_MODE_CFG; + break; + case ADMIN_PROTO_AH: + case ADMIN_PROTO_ESP: + index = get_index(ac, av); + if (index == NULL) + return NULL; ++ if (wait) ++ evt_quit_event = EVTT_PHASE2_UP; + break; + default: + errno = EPROTONOSUPPORT; +@@ -749,8 +703,7 @@ + if (ac < 1) + errx(1, "insufficient arguments"); + +- evt_filter = (EVTF_LOOP|EVTF_CFG|EVTF_CFG_STOP|EVTF_ERR|EVTF_ERR_STOP); +- time(&evt_start); ++ evt_quit_event = EVTT_PHASE1_MODE_CFG; + + /* Optional -u identity */ + if (strcmp(av[0], "-u") == 0) { +@@ -814,8 +767,7 @@ + if (ac > 1) + warnx("Extra arguments"); + +- evt_filter = +- (EVTF_PH1DOWN|EVTF_PH1DOWN_STOP|EVTF_LOOP|EVTF_ERR|EVTF_ERR_STOP); ++ evt_quit_event = EVTT_PHASE1_DOWN; + + nav[nac++] = isakmp; + nav[nac++] = inet; +@@ -1335,84 +1287,32 @@ + + + void +-print_evt(buf, len) +- caddr_t buf; +- int len; ++print_evt(evtdump) ++ struct evt_common *evtdump; + { +- struct evtdump *evtdump = (struct evtdump *)buf; + int i; + char *srcstr; + char *dststr; + +- for (i = 0; evtmsg[i].msg; i++) +- if (evtmsg[i].type == evtdump->type) +- break; +- +- if (evtmsg[i].msg == NULL) +- printf("Event %d: ", evtdump->type); ++ for (i = 0; i < sizeof(evtmsg) / sizeof(evtmsg[0]); i++) ++ if (evtmsg[i].type == evtdump->ec_type) ++ break; ++ ++ if (evtmsg[i].msg == NULL) ++ printf("Event %d: ", evtdump->ec_type); + else + printf("%s : ", evtmsg[i].msg); + +- if ((srcstr = saddr2str((struct sockaddr *)&evtdump->src)) == NULL) ++ if ((srcstr = saddr2str((struct sockaddr *)&evtdump->ec_ph1src)) == NULL) + printf("unknown"); +- else ++ else + printf("%s", srcstr); + printf(" -> "); +- if ((dststr = saddr2str((struct sockaddr *)&evtdump->dst)) == NULL) ++ if ((dststr = saddr2str((struct sockaddr *)&evtdump->ec_ph1dst)) == NULL) + printf("unknown"); +- else ++ else + printf("%s", dststr); + printf("\n"); +- +- return; +-} +- +-void +-print_err(buf, len) +- caddr_t buf; +- int len; +-{ +- struct evtdump *evtdump = (struct evtdump *)buf; +- int i; +- +- +- for (i = 0; evtmsg[i].msg; i++) +- if (evtmsg[i].type == evtdump->type) +- break; +- +- if (evtmsg[i].level != ERROR) +- return; +- +- if (evtmsg[i].msg == NULL) +- printf("Error: Event %d\n", evtdump->type); +- else +- printf("Error: %s\n", evtmsg[i].msg); +- +- if (evt_filter & EVTF_ERR_STOP) +- evt_filter &= ~EVTF_LOOP; +- +- return; +-} +- +-/* +- * Print a message when phase 1 SA goes down +- */ +-void +-print_ph1down(buf, len) +- caddr_t buf; +- int len; +-{ +- struct evtdump *evtdump = (struct evtdump *)buf; +- +- if (evtdump->type != EVTT_PHASE1_DOWN) +- return; +- +- printf("VPN connexion terminated\n"); +- +- if (evt_filter & EVTF_PH1DOWN_STOP) +- evt_filter &= ~EVTF_LOOP; +- +- return; + } + + /* +@@ -1423,15 +1323,14 @@ + caddr_t buf; + int len; + { +- struct evtdump *evtdump = (struct evtdump *)buf; ++ struct evt_common *evtdump = (struct evt_common *)buf; + struct isakmp_data *attr; + char *banner = NULL; + struct in_addr addr4; + + memset(&addr4, 0, sizeof(addr4)); + +- if (evtdump->type != EVTT_ISAKMP_CFG_DONE && +- evtdump->type != EVTT_NO_ISAKMP_CFG) ++ if (evtdump->ec_type != EVTT_PHASE1_MODE_CFG) + return; + + len -= sizeof(*evtdump); +@@ -1484,12 +1383,12 @@ + (n + sizeof(*attr) + ntohs(attr->lorv)); + } + } +- +- if (evtdump->type == EVTT_ISAKMP_CFG_DONE) ++ ++ if (len > 0) + printf("Bound to address %s\n", inet_ntoa(addr4)); + else + printf("VPN connexion established\n"); +- ++ + if (banner) { + struct winsize win; + int col = 0; +@@ -1506,13 +1405,8 @@ + printf("\n"); + racoon_free(banner); + } +- +- if (evt_filter & EVTF_CFG_STOP) +- evt_filter &= ~EVTF_LOOP; +- +- return; + } +- ++ + + char * + fixed_addr(addr, port, len) +@@ -1561,32 +1455,29 @@ + break; + + case ADMIN_SHOW_EVT: { +- struct evtdump *evtdump; ++ struct evt_common *ec; + +- /* We got no event */ +- if (len == 0) { +- /* If we were purging the queue, it is now done */ +- if (evt_filter & EVTF_PURGE) +- evt_filter &= ~EVTF_PURGE; ++ /* We got no event? */ ++ if (len == 0) + break; +- } +- +- if (len < sizeof(struct evtdump)) +- errx(1, "Short buffer\n"); + +- /* Toss outdated events */ +- evtdump = (struct evtdump *)buf; +- if (evtdump->timestamp < evt_start) +- break; ++ if (len < sizeof(struct evt_common)) ++ errx(1, "Short buffer\n"); + +- if (evt_filter & EVTF_ALL) +- print_evt(buf, len); +- if (evt_filter & EVTF_ERR) +- print_err(buf, len); +- if (evt_filter & EVTF_CFG) +- print_cfg(buf, len); +- if (evt_filter & EVTF_PH1DOWN) +- print_ph1down(buf, len); ++ ec = (struct evt_common *) buf; ++ if (evt_quit_event <= 0) ++ print_evt(ec); ++ else if (evt_quit_event == ec->ec_type) { ++ switch (ec->ec_type) { ++ case EVTT_PHASE1_MODE_CFG: ++ print_cfg(ec, len); ++ break; ++ default: ++ print_evt(ec); ++ break; ++ }; ++ evt_quit_event = 0; ++ } + break; + } + +@@ -1643,10 +1534,8 @@ + break; + } + +- close(so); + return 0; + +- bad: +- close(so); ++bad: + return -1; + } +Index: ipsec-tools-cvs/src/racoon/admin.c +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/admin.c 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/admin.c 2008-01-04 15:18:21.000000000 +0200 +@@ -76,6 +76,7 @@ + #include "evt.h" + #include "pfkey.h" + #include "ipsec_doi.h" ++#include "policy.h" + #include "admin.h" + #include "admin_var.h" + #include "isakmp_inf.h" +@@ -147,16 +148,18 @@ + goto end; + } + +- if (com.ac_cmd == ADMIN_RELOAD_CONF) { +- /* reload does not work at all! */ +- signal_handler(SIGHUP); +- goto end; +- } ++ plog(LLV_DEBUG, LOCATION, NULL, ++ "[%d] admin connection established\n", so2); + + error = admin_process(so2, combuf); + +- end: +- (void)close(so2); ++end: ++ if (error != -2) { ++ plog(LLV_DEBUG, LOCATION, NULL, ++ "[%d] admin connection closed\n", so2); ++ (void)close(so2); ++ } ++ + if (combuf) + racoon_free(combuf); + +@@ -177,13 +180,15 @@ + vchar_t *key = NULL; + int idtype = 0; + int error = -1; ++ int send_events = 0; ++ struct evt_listener_list *event_list = NULL; + + com->ac_errno = 0; + + switch (com->ac_cmd) { + case ADMIN_RELOAD_CONF: +- /* don't entered because of proccessing it in other place. */ +- plog(LLV_ERROR, LOCATION, NULL, "should never reach here\n"); ++ signal_handler(SIGHUP); ++ error = 0; + goto out; + + case ADMIN_SHOW_SCHED: +@@ -208,9 +213,7 @@ + } + + case ADMIN_SHOW_EVT: +- /* It's not really an error, don't force racoonctl to quit */ +- if ((buf = evt_dump()) == NULL) +- com->ac_errno = 0; ++ send_events = 1; + break; + + case ADMIN_SHOW_SA: +@@ -391,17 +394,17 @@ + /* FALLTHROUGH */ + case ADMIN_ESTABLISH_SA: + { ++ struct admin_com_indexes *ndx; + struct sockaddr *dst; + struct sockaddr *src; +- src = (struct sockaddr *) +- &((struct admin_com_indexes *) +- ((caddr_t)com + sizeof(*com)))->src; +- dst = (struct sockaddr *) +- &((struct admin_com_indexes *) +- ((caddr_t)com + sizeof(*com)))->dst; ++ ++ ndx = (struct admin_com_indexes *) ((caddr_t)com + sizeof(*com)); ++ src = (struct sockaddr *) &ndx->src; ++ dst = (struct sockaddr *) &ndx->dst; + + switch (com->ac_proto) { + case ADMIN_PROTO_ISAKMP: { ++ struct ph1handle *ph1; + struct remoteconf *rmconf; + struct sockaddr *remote = NULL; + struct sockaddr *local = NULL; +@@ -409,6 +412,17 @@ + + com->ac_errno = -1; + ++ /* connected already? */ ++ ph1 = getph1byaddrwop(src, dst); ++ if (ph1 != NULL) { ++ event_list = &ph1->evt_listeners; ++ if (ph1->status == PHASE1ST_ESTABLISHED) ++ com->ac_errno = EEXIST; ++ else ++ com->ac_errno = 0; ++ break; ++ } ++ + /* search appropreate configuration */ + rmconf = getrmconf(dst); + if (rmconf == NULL) { +@@ -459,9 +473,11 @@ + "%s\n", saddrwop2str(remote)); + + /* begin ident mode */ +- if (isakmp_ph1begin_i(rmconf, remote, local) < 0) ++ ph1 = isakmp_ph1begin_i(rmconf, remote, local); ++ if (ph1 == NULL) + goto out1; + ++ event_list = &ph1->evt_listeners; + com->ac_errno = 0; + out1: + if (local != NULL) +@@ -471,8 +487,105 @@ + break; + } + case ADMIN_PROTO_AH: +- case ADMIN_PROTO_ESP: ++ case ADMIN_PROTO_ESP: { ++ struct ph2handle *iph2; ++ struct secpolicy *sp_out = NULL, *sp_in = NULL; ++ struct policyindex spidx; ++ ++ com->ac_errno = -1; ++ ++ /* got outbound policy */ ++ memset(&spidx, 0, sizeof(spidx)); ++ spidx.dir = IPSEC_DIR_OUTBOUND; ++ memcpy(&spidx.src, src, sizeof(spidx.src)); ++ memcpy(&spidx.dst, dst, sizeof(spidx.dst)); ++ spidx.prefs = ndx->prefs; ++ spidx.prefd = ndx->prefd; ++ spidx.ul_proto = ndx->ul_proto; ++ ++ sp_out = getsp_r(&spidx); ++ if (sp_out) { ++ plog(LLV_DEBUG, LOCATION, NULL, ++ "suitable outbound SP found: %s.\n", ++ spidx2str(&sp_out->spidx)); ++ } else { ++ com->ac_errno = ENOENT; ++ plog(LLV_NOTIFY, LOCATION, NULL, ++ "no outbound policy found: %s\n", ++ spidx2str(&spidx)); ++ break; ++ } ++ ++ iph2 = getph2byid(src, dst, sp_out->id); ++ if (iph2 != NULL) { ++ event_list = &iph2->evt_listeners; ++ if (iph2->status == PHASE2ST_ESTABLISHED) ++ com->ac_errno = EEXIST; ++ else ++ com->ac_errno = 0; ++ break; ++ } ++ ++ /* get inbound policy */ ++ memset(&spidx, 0, sizeof(spidx)); ++ spidx.dir = IPSEC_DIR_INBOUND; ++ memcpy(&spidx.src, dst, sizeof(spidx.src)); ++ memcpy(&spidx.dst, src, sizeof(spidx.dst)); ++ spidx.prefs = ndx->prefd; ++ spidx.prefd = ndx->prefs; ++ spidx.ul_proto = ndx->ul_proto; ++ ++ sp_in = getsp_r(&spidx); ++ if (sp_in) { ++ plog(LLV_DEBUG, LOCATION, NULL, ++ "suitable inbound SP found: %s.\n", ++ spidx2str(&sp_in->spidx)); ++ } else { ++ com->ac_errno = ENOENT; ++ plog(LLV_NOTIFY, LOCATION, NULL, ++ "no inbound policy found: %s\n", ++ spidx2str(&spidx)); ++ break; ++ } ++ ++ /* allocate a phase 2 */ ++ iph2 = newph2(); ++ if (iph2 == NULL) { ++ plog(LLV_ERROR, LOCATION, NULL, ++ "failed to allocate phase2 entry.\n"); ++ break; ++ } ++ iph2->side = INITIATOR; ++ iph2->satype = admin2pfkey_proto(com->ac_proto); ++ iph2->spid = sp_out->id; ++ iph2->seq = pk_getseq(); ++ iph2->status = PHASE2ST_STATUS2; ++ ++ /* set end addresses of SA */ ++ iph2->dst = dupsaddr(dst); ++ iph2->src = dupsaddr(src); ++ if (iph2->dst == NULL || iph2->src == NULL) { ++ delph2(iph2); ++ break; ++ } ++ ++ if (isakmp_get_sainfo(iph2, sp_out, sp_in) < 0) { ++ delph2(iph2); ++ break; ++ } ++ ++ insph2(iph2); ++ if (isakmp_post_acquire(iph2) < 0) { ++ unbindph12(iph2); ++ remph2(iph2); ++ delph2(iph2); ++ break; ++ } ++ ++ event_list = &iph2->evt_listeners; ++ com->ac_errno = 0; + break; ++ } + default: + /* ignore */ + com->ac_errno = -1; +@@ -489,7 +602,8 @@ + if ((error = admin_reply(so2, com, buf)) != 0) + goto out; + +- error = 0; ++ if (send_events || event_list != NULL) ++ error = evt_subscribe(event_list, so2); + out: + if (buf != NULL) + vfree(buf); +Index: ipsec-tools-cvs/src/racoon/racoonctl.8 +=================================================================== +--- ipsec-tools-cvs.orig/src/racoon/racoonctl.8 2008-01-04 15:17:50.000000000 +0200 ++++ ipsec-tools-cvs/src/racoon/racoonctl.8 2008-01-04 15:18:21.000000000 +0200 +@@ -55,17 +55,17 @@ + .Nm + establish-sa + .Op Fl u Ar identity ++.Op Fl w + .Ar saopts + .Nm + vpn-connect +-.Op Fl u identity ++.Op Fl u Ar identity + .Ar vpn_gateway + .Nm + vpn-disconnect + .Ar vpn_gateway + .Nm + show-event +-.Op Fl l + .Nm + logout-user + .Ar login +@@ -104,6 +104,8 @@ + either ISAKMP SAs, IPsec ESP SAs, IPsec AH SAs, or all IPsec SAs. + .It Xo establish-sa + .Oo Fl u Ar username ++.Oc ++.Oo Fl w + .Oc Ar saopts + .Xc + Establish an SA, either an ISAKMP SA, IPsec ESP SA, or IPsec AH SA. +@@ -115,6 +117,11 @@ + .Ar username + and these credentials will be used in the Xauth exchange. + .Pp ++Specifying ++.Fl w ++will make racoonctl wait until the SA is actually established or ++an error occurs. ++.Pp + .Ar saopts + has the following format: + .Bl -tag -width Bl +@@ -135,16 +142,9 @@ + This is a particular case of the previous command. + It will kill all SAs associated with + .Ar vpn_gateway . +-.It show-event Op Fl l +-Dump all events reported by +-.Xr racoon 8 , +-then quit. +-The +-.Fl l +-flag causes +-.Nm +-to not stop once all the events have been read, but rather to loop +-awaiting and reporting new events. ++.It show-event ++Listen for all events reported by ++.Xr racoon 8 . + .It logout-user Ar login + Delete all SA established on behalf of the Xauth user + .Ar login . diff --git a/patches/linux-2.6.19-ipgre.diff b/patches/linux-2.6.19-ipgre.diff new file mode 100644 index 0000000..655b175 --- /dev/null +++ b/patches/linux-2.6.19-ipgre.diff @@ -0,0 +1,44 @@ +Index: linux-2.6.19/net/ipv4/ip_gre.c +=================================================================== +--- linux-2.6.19.orig/net/ipv4/ip_gre.c 2006-11-29 23:57:37.000000000 +0200 ++++ linux-2.6.19/net/ipv4/ip_gre.c 2008-01-31 08:50:21.000000000 +0200 +@@ -1033,7 +1033,13 @@ + return 0; + } + +-#ifdef CONFIG_NET_IPGRE_BROADCAST ++static int ipgre_tunnel_parse_header(struct sk_buff *skb, unsigned char *haddr) ++{ ++ struct iphdr *iph = (struct iphdr*) skb->mac.raw; ++ memcpy(haddr, &iph->saddr, 4); ++ return 4; ++} ++ + /* Nice toy. Unfortunately, useless in real life :-) + It allows to construct virtual multiprotocol broadcast "LAN" + over the Internet, provided multicast routing is tuned. +@@ -1091,6 +1097,7 @@ + return -t->hlen; + } + ++#ifdef CONFIG_NET_IPGRE_BROADCAST + static int ipgre_open(struct net_device *dev) + { + struct ip_tunnel *t = netdev_priv(dev); +@@ -1139,6 +1146,7 @@ + dev->get_stats = ipgre_tunnel_get_stats; + dev->do_ioctl = ipgre_tunnel_ioctl; + dev->change_mtu = ipgre_tunnel_change_mtu; ++ dev->hard_header_parse = ipgre_tunnel_parse_header; + + dev->type = ARPHRD_IPGRE; + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4; +@@ -1193,6 +1201,8 @@ + dev->stop = ipgre_close; + } + #endif ++ } else { ++ dev->hard_header = ipgre_header; + } + + if (!tdev && tunnel->parms.link) diff --git a/patches/linux-2.6.20-ipgre.diff b/patches/linux-2.6.20-ipgre.diff new file mode 100644 index 0000000..a78ed17 --- /dev/null +++ b/patches/linux-2.6.20-ipgre.diff @@ -0,0 +1,44 @@ +Index: linux-2.6.20/net/ipv4/ip_gre.c +=================================================================== +--- linux-2.6.20.orig/net/ipv4/ip_gre.c 2008-01-04 15:05:34.000000000 +0200 ++++ linux-2.6.20/net/ipv4/ip_gre.c 2008-01-04 15:05:37.000000000 +0200 +@@ -1033,7 +1033,13 @@ + return 0; + } + +-#ifdef CONFIG_NET_IPGRE_BROADCAST ++static int ipgre_tunnel_parse_header(struct sk_buff *skb, unsigned char *haddr) ++{ ++ struct iphdr *iph = (struct iphdr*) skb_mac_header(skb); ++ memcpy(haddr, &iph->saddr, 4); ++ return 4; ++} ++ + /* Nice toy. Unfortunately, useless in real life :-) + It allows to construct virtual multiprotocol broadcast "LAN" + over the Internet, provided multicast routing is tuned. +@@ -1091,6 +1097,7 @@ + return -t->hlen; + } + ++#ifdef CONFIG_NET_IPGRE_BROADCAST + static int ipgre_open(struct net_device *dev) + { + struct ip_tunnel *t = netdev_priv(dev); +@@ -1139,6 +1146,7 @@ + dev->get_stats = ipgre_tunnel_get_stats; + dev->do_ioctl = ipgre_tunnel_ioctl; + dev->change_mtu = ipgre_tunnel_change_mtu; ++ dev->hard_header_parse = ipgre_tunnel_parse_header; + + dev->type = ARPHRD_IPGRE; + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4; +@@ -1193,6 +1201,8 @@ + dev->stop = ipgre_close; + } + #endif ++ } else { ++ dev->hard_header = ipgre_header; + } + + if (!tdev && tunnel->parms.link) diff --git a/patches/linux-2.6.22-ipgre.diff b/patches/linux-2.6.22-ipgre.diff new file mode 100644 index 0000000..59d4292 --- /dev/null +++ b/patches/linux-2.6.22-ipgre.diff @@ -0,0 +1,53 @@ +Index: linux-2.6.20/net/ipv4/ip_gre.c +=================================================================== +--- linux-2.6.20.orig/net/ipv4/ip_gre.c 2008-01-04 15:06:32.000000000 +0200 ++++ linux-2.6.20/net/ipv4/ip_gre.c 2008-01-04 15:08:50.000000000 +0200 +@@ -613,7 +613,7 @@ + offset += 4; + } + +- skb_reset_mac_header(skb); ++ skb->mac_header = skb->network_header; + __pskb_pull(skb, offset); + skb_reset_network_header(skb); + skb_postpull_rcsum(skb, skb_transport_header(skb), offset); +@@ -1032,7 +1032,13 @@ + return 0; + } + +-#ifdef CONFIG_NET_IPGRE_BROADCAST ++static int ipgre_tunnel_parse_header(struct sk_buff *skb, unsigned char *haddr) ++{ ++ struct iphdr *iph = (struct iphdr*) skb_mac_header(skb); ++ memcpy(haddr, &iph->saddr, 4); ++ return 4; ++} ++ + /* Nice toy. Unfortunately, useless in real life :-) + It allows to construct virtual multiprotocol broadcast "LAN" + over the Internet, provided multicast routing is tuned. +@@ -1090,6 +1096,7 @@ + return -t->hlen; + } + ++#ifdef CONFIG_NET_IPGRE_BROADCAST + static int ipgre_open(struct net_device *dev) + { + struct ip_tunnel *t = netdev_priv(dev); +@@ -1138,6 +1145,7 @@ + dev->get_stats = ipgre_tunnel_get_stats; + dev->do_ioctl = ipgre_tunnel_ioctl; + dev->change_mtu = ipgre_tunnel_change_mtu; ++ dev->hard_header_parse = ipgre_tunnel_parse_header; + + dev->type = ARPHRD_IPGRE; + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4; +@@ -1192,6 +1200,8 @@ + dev->stop = ipgre_close; + } + #endif ++ } else { ++ dev->hard_header = ipgre_header; + } + + if (!tdev && tunnel->parms.link) |