diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/Makefile.am | 2 | ||||
| -rw-r--r-- | src/build.c | 99 | ||||
| -rw-r--r-- | src/cache-ct.c | 6 | ||||
| -rw-r--r-- | src/cache-exp.c | 308 | ||||
| -rw-r--r-- | src/external_cache.c | 85 | ||||
| -rw-r--r-- | src/external_inject.c | 95 | ||||
| -rw-r--r-- | src/filter.c | 76 | ||||
| -rw-r--r-- | src/internal_bypass.c | 146 | ||||
| -rw-r--r-- | src/internal_cache.c | 173 | ||||
| -rw-r--r-- | src/log.c | 37 | ||||
| -rw-r--r-- | src/main.c | 69 | ||||
| -rw-r--r-- | src/netlink.c | 63 | ||||
| -rw-r--r-- | src/network.c | 5 | ||||
| -rw-r--r-- | src/parse.c | 192 | ||||
| -rw-r--r-- | src/read_config_lex.l | 1 | ||||
| -rw-r--r-- | src/read_config_yy.y | 50 | ||||
| -rw-r--r-- | src/run.c | 206 | ||||
| -rw-r--r-- | src/sync-ftfw.c | 7 | ||||
| -rw-r--r-- | src/sync-mode.c | 165 | ||||
| -rw-r--r-- | src/sync-notrack.c | 6 | 
20 files changed, 1728 insertions, 63 deletions
| diff --git a/src/Makefile.am b/src/Makefile.am index a0abeee..7d7b2ac 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -12,7 +12,7 @@ conntrack_LDADD = ../extensions/libct_proto_tcp.la ../extensions/libct_proto_udp  conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \  		    local.c log.c mcast.c udp.c netlink.c vector.c \  		    filter.c fds.c event.c process.c origin.c date.c \ -		    cache.c cache-ct.c \ +		    cache.c cache-ct.c cache-exp.c \  		    cache_timer.c \  		    sync-mode.c sync-alarm.c sync-ftfw.c sync-notrack.c \  		    traffic_stats.c stats-mode.c \ diff --git a/src/build.c b/src/build.c index 9c3687c..3193884 100644 --- a/src/build.c +++ b/src/build.c @@ -224,3 +224,102 @@ void ct2msg(const struct nf_conntrack *ct, struct nethdr *n)  	if (nfct_attr_is_set_array(ct, nat_type, 6))  		ct_build_natseqadj(ct, n);  } + +static void +exp_build_l4proto_tcp(const struct nf_conntrack *ct, struct nethdr *n, int a) +{ +	ct_build_group(ct, ATTR_GRP_ORIG_PORT, n, a, +			sizeof(struct nfct_attr_grp_port)); +} + +static void +exp_build_l4proto_sctp(const struct nf_conntrack *ct, struct nethdr *n, int a) +{ +	ct_build_group(ct, ATTR_GRP_ORIG_PORT, n, a, +			sizeof(struct nfct_attr_grp_port)); +} + +static void +exp_build_l4proto_dccp(const struct nf_conntrack *ct, struct nethdr *n, int a) +{ +	ct_build_group(ct, ATTR_GRP_ORIG_PORT, n, a, +		      sizeof(struct nfct_attr_grp_port)); +} + +static void +exp_build_l4proto_udp(const struct nf_conntrack *ct, struct nethdr *n, int a) +{ +	ct_build_group(ct, ATTR_GRP_ORIG_PORT, n, a, +		      sizeof(struct nfct_attr_grp_port)); +} + +static struct exp_build_l4proto { +	void (*build)(const struct nf_conntrack *, struct nethdr *n, int a); +} exp_l4proto_fcn[IPPROTO_MAX] = { +	[IPPROTO_TCP]		= { .build = exp_build_l4proto_tcp }, +	[IPPROTO_SCTP]		= { .build = exp_build_l4proto_sctp }, +	[IPPROTO_DCCP]		= { .build = exp_build_l4proto_dccp }, +	[IPPROTO_UDP]		= { .build = exp_build_l4proto_udp }, +}; + +static inline void +exp_build_u32(const struct nf_expect *exp, int a, struct nethdr *n, int b) +{ +	uint32_t data = nfexp_get_attr_u32(exp, a); +	data = htonl(data); +	addattr(n, b, &data, sizeof(uint32_t)); +} + +void exp2msg(const struct nf_expect *exp, struct nethdr *n) +{ +	const struct nf_conntrack *ct = nfexp_get_attr(exp, ATTR_EXP_MASTER); +	uint8_t l4proto = nfct_get_attr_u8(ct, ATTR_L4PROTO); + +	/* master conntrack for this expectation. */ +	if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV4)) { +		ct_build_group(ct, ATTR_GRP_ORIG_IPV4, n, NTA_EXP_MASTER_IPV4, +			      sizeof(struct nfct_attr_grp_ipv4)); +	} else if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV6)) { +		ct_build_group(ct, ATTR_GRP_ORIG_IPV6, n, NTA_EXP_MASTER_IPV6, +			      sizeof(struct nfct_attr_grp_ipv6)); +	} +	ct_build_u8(ct, ATTR_L4PROTO, n, NTA_EXP_MASTER_L4PROTO); + +	if (exp_l4proto_fcn[l4proto].build) +		exp_l4proto_fcn[l4proto].build(ct, n, NTA_EXP_MASTER_PORT); + +	/* the expectation itself. */ +	ct = nfexp_get_attr(exp, ATTR_EXP_EXPECTED); + +	if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV4)) { +		ct_build_group(ct, ATTR_GRP_ORIG_IPV4, n, NTA_EXP_EXPECT_IPV4, +			      sizeof(struct nfct_attr_grp_ipv4)); +	} else if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV6)) { +		ct_build_group(ct, ATTR_GRP_ORIG_IPV6, n, NTA_EXP_EXPECT_IPV6, +			      sizeof(struct nfct_attr_grp_ipv6)); +	} +	ct_build_u8(ct, ATTR_L4PROTO, n, NTA_EXP_EXPECT_L4PROTO); + +	if (exp_l4proto_fcn[l4proto].build) +		exp_l4proto_fcn[l4proto].build(ct, n, NTA_EXP_EXPECT_PORT); + +	/* mask for the expectation. */ +	ct = nfexp_get_attr(exp, ATTR_EXP_MASK); + +	if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV4)) { +		ct_build_group(ct, ATTR_GRP_ORIG_IPV4, n, NTA_EXP_MASK_IPV4, +			      sizeof(struct nfct_attr_grp_ipv4)); +	} else if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV6)) { +		ct_build_group(ct, ATTR_GRP_ORIG_IPV6, n, NTA_EXP_MASK_IPV6, +			      sizeof(struct nfct_attr_grp_ipv6)); +	} +	ct_build_u8(ct, ATTR_L4PROTO, n, NTA_EXP_MASK_L4PROTO); + +	if (exp_l4proto_fcn[l4proto].build) +		exp_l4proto_fcn[l4proto].build(ct, n, NTA_EXP_MASK_PORT); + +	if (!CONFIG(commit_timeout) && nfexp_attr_is_set(exp, ATTR_EXP_TIMEOUT)) +		exp_build_u32(exp, ATTR_EXP_TIMEOUT, n, NTA_EXP_TIMEOUT); + +	exp_build_u32(exp, ATTR_EXP_FLAGS, n, NTA_EXP_FLAGS); +} diff --git a/src/cache-ct.c b/src/cache-ct.c index 2c6fd4e..0ad8d2a 100644 --- a/src/cache-ct.c +++ b/src/cache-ct.c @@ -251,7 +251,7 @@ static int cache_ct_commit(struct cache *c, struct nfct_handle *h, int clientfd)  	/* we already have one commit in progress, skip this. The clientfd  	 * descriptor has to be closed by the caller. */  	if (clientfd && STATE_SYNC(commit).clientfd != -1) -		return 0; +		return -1;  	switch(STATE_SYNC(commit).state) {  	case COMMIT_STATE_INACTIVE: @@ -308,9 +308,7 @@ static int cache_ct_commit(struct cache *c, struct nfct_handle *h, int clientfd)  		STATE_SYNC(commit).current = 0;  		STATE_SYNC(commit).state = COMMIT_STATE_INACTIVE; -		/* Close the client socket now that we're done. */ -		close(STATE_SYNC(commit).clientfd); -		STATE_SYNC(commit).clientfd = -1; +		return 0;  	}  	return 1;  } diff --git a/src/cache-exp.c b/src/cache-exp.c new file mode 100644 index 0000000..e88877a --- /dev/null +++ b/src/cache-exp.c @@ -0,0 +1,308 @@ +/* + * (C) 2006-2011 by Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2011 by Vyatta Inc. <http://www.vyatta.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "cache.h" +#include "hash.h" +#include "log.h" +#include "conntrackd.h" +#include "netlink.h" +#include "event.h" +#include "jhash.h" +#include "network.h" + +#include <errno.h> +#include <string.h> +#include <time.h> +#include <libnetfilter_conntrack/libnetfilter_conntrack.h> + +static uint32_t +cache_hash4_exp(const struct nf_conntrack *ct, const struct hashtable *table) +{ +	uint32_t a[4] = { +		[0]	= nfct_get_attr_u32(ct, ATTR_IPV4_SRC), +		[1]	= nfct_get_attr_u32(ct, ATTR_IPV4_DST), +		[2]	= nfct_get_attr_u8(ct, ATTR_L3PROTO) << 16 | +			  nfct_get_attr_u8(ct, ATTR_L4PROTO), +		[3]	= nfct_get_attr_u16(ct, ATTR_PORT_SRC) << 16 | +			  nfct_get_attr_u16(ct, ATTR_PORT_DST), +	}; + +	/* +	 * Instead of returning hash % table->hashsize (implying a divide) +	 * we return the high 32 bits of the (hash * table->hashsize) that will +	 * give results between [0 and hashsize-1] and same hash distribution, +	 * but using a multiply, less expensive than a divide. See: +	 * http://www.mail-archive.com/netdev@vger.kernel.org/msg56623.html +	 */ +	return ((uint64_t)jhash2(a, 4, 0) * table->hashsize) >> 32; +} + +static uint32_t +cache_hash6_exp(const struct nf_conntrack *ct, const struct hashtable *table) +{ +	uint32_t a[10]; + +	memcpy(&a[0], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4); +	memcpy(&a[4], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4); +	a[8] = nfct_get_attr_u8(ct, ATTR_ORIG_L3PROTO) << 16 | +	       nfct_get_attr_u8(ct, ATTR_ORIG_L4PROTO); +	a[9] = nfct_get_attr_u16(ct, ATTR_ORIG_PORT_SRC) << 16 | +	       nfct_get_attr_u16(ct, ATTR_ORIG_PORT_DST); + +	return ((uint64_t)jhash2(a, 10, 0) * table->hashsize) >> 32; +} + +static uint32_t +cache_exp_hash(const void *data, const struct hashtable *table) +{ +	int ret = 0; +	const struct nf_expect *exp = data; +	const struct nf_conntrack *ct = nfexp_get_attr(exp, ATTR_EXP_MASTER); + +	switch(nfct_get_attr_u8(ct, ATTR_L3PROTO)) { +		case AF_INET: +			ret = cache_hash4_exp(ct, table); +			break; +		case AF_INET6: +			ret = cache_hash6_exp(ct, table); +			break; +		default: +			dlog(LOG_ERR, "unknown layer 3 proto in hash"); +			break; +	} +	return ret; +} + +static int cache_exp_cmp(const void *data1, const void *data2) +{ +	const struct cache_object *obj = data1; +	const struct nf_expect *exp = data2; + +	return nfexp_cmp(obj->ptr, exp, 0); +} + +static void *cache_exp_alloc(void) +{ +	return nfexp_new(); +} + +static void cache_exp_free(void *ptr) +{ +	nfexp_destroy(ptr); +} + +static void cache_exp_copy(void *dst, void *src, unsigned int flags) +{ +	/* XXX: add nfexp_copy(...) to libnetfilter_conntrack. */ +	memcpy(dst, src, nfexp_maxsize()); +} + +static int cache_exp_dump_step(void *data1, void *n) +{ +	char buf[1024]; +	int size; +	struct __dump_container *container = data1; +	struct cache_object *obj = n; +	char *data = obj->data; +	unsigned i; + +	/* +	 * XXX: Do not dump the entries that are scheduled to expire. +	 *	These entries talk about already destroyed connections +	 *	that we keep for some time just in case that we have to +	 *	resent some lost messages. We do not show them to the +	 *	user as he may think that the firewall replicas are not +	 *	in sync. The branch below is a hack as it is quite +	 *	specific and it breaks conntrackd modularity. Probably +	 *	there's a nicer way to do this but until I come up with it... +	 */ +	if (CONFIG(flags) & CTD_SYNC_FTFW && obj->status == C_OBJ_DEAD) +		return 0; + +	/* do not show cached timeout, this may confuse users */ +	if (nfexp_attr_is_set(obj->ptr, ATTR_EXP_TIMEOUT)) +		nfexp_attr_unset(obj->ptr, ATTR_EXP_TIMEOUT); + +	memset(buf, 0, sizeof(buf)); +	size = nfexp_snprintf(buf, sizeof(buf),obj->ptr, +			      NFCT_T_UNKNOWN, container->type, 0); + +	for (i = 0; i < obj->cache->num_features; i++) { +		if (obj->cache->features[i]->dump) { +			size += obj->cache->features[i]->dump(obj, data, +							      buf+size, +							      container->type); +			data += obj->cache->features[i]->size; +		} +	} +	if (container->type != NFCT_O_XML) { +		long tm = time(NULL); +		size += sprintf(buf+size, " [active since %lds]", +				tm - obj->lifetime); +	} +	size += sprintf(buf+size, "\n"); +	if (send(container->fd, buf, size, 0) == -1) { +		if (errno != EPIPE) +			return -1; +	} + +	return 0; +} + +static int cache_exp_commit_step(void *data, void *n) +{ +	struct cache_object *obj = n; +	struct __commit_container *tmp = data; +	int ret, retry = 1, timeout; +	struct nf_expect *exp = obj->ptr; + +	if (CONFIG(commit_timeout)) { +		timeout = CONFIG(commit_timeout); +	} else { +		timeout = time(NULL) - obj->lastupdate; +		if (timeout < 0) { +			/* XXX: Arbitrarily set the timer to one minute, how +			 * can this happen? For example, an adjustment due to +			 * daylight-saving. Probably other situations can +			 * trigger this. */ +			timeout = 60; +		} +		/* calculate an estimation of the current timeout */ +		timeout = nfexp_get_attr_u32(exp, ATTR_EXP_TIMEOUT) - timeout; +		if (timeout < 0) { +			timeout = 60; +		} +	} + +retry: +	if (nl_create_expect(tmp->h, exp, timeout) == -1) { +		if (errno == EEXIST && retry == 1) { +			ret = nl_destroy_expect(tmp->h, exp); +			if (ret == 0 || (ret == -1 && errno == ENOENT)) { +				if (retry) { +					retry = 0; +					goto retry; +				} +			} +			dlog(LOG_ERR, "commit-destroy: %s", strerror(errno)); +			dlog_exp(STATE(log), exp, NFCT_O_PLAIN); +			tmp->c->stats.commit_fail++; +		} else { +			dlog(LOG_ERR, "commit-create: %s", strerror(errno)); +			dlog_exp(STATE(log), exp, NFCT_O_PLAIN); +			tmp->c->stats.commit_fail++; +		} +	} else { +		tmp->c->stats.commit_ok++; +	} +	/* keep iterating even if we have found errors */ +	return 0; +} + +static int +cache_exp_commit(struct cache *c, struct nfct_handle *h, int clientfd) +{ +	unsigned int commit_ok, commit_fail; +	struct timeval commit_stop, res; +	struct __commit_container tmp = { +		.h = h, +		.c = c, +	}; + +	/* we already have one commit in progress, skip this. The clientfd +	 * descriptor has to be closed by the caller. */ +	if (clientfd && STATE_SYNC(commit).clientfd != -1) +		return -1; + +	switch(STATE_SYNC(commit).state) { +	case COMMIT_STATE_INACTIVE: +		gettimeofday(&STATE_SYNC(commit).stats.start, NULL); +		STATE_SYNC(commit).stats.ok = c->stats.commit_ok; +		STATE_SYNC(commit).stats.fail = c->stats.commit_fail; +		STATE_SYNC(commit).clientfd = clientfd; +	case COMMIT_STATE_MASTER: +		STATE_SYNC(commit).current = +			hashtable_iterate_limit(c->h, &tmp, +						STATE_SYNC(commit).current, +						CONFIG(general).commit_steps, +						cache_exp_commit_step); +		if (STATE_SYNC(commit).current < CONFIG(hashsize)) { +			STATE_SYNC(commit).state = COMMIT_STATE_MASTER; +			/* give it another step as soon as possible */ +			write_evfd(STATE_SYNC(commit).evfd); +			return 1; +		} + +		/* calculate the time that commit has taken */ +		gettimeofday(&commit_stop, NULL); +		timersub(&commit_stop, &STATE_SYNC(commit).stats.start, &res); + +		/* calculate new entries committed */ +		commit_ok = c->stats.commit_ok - STATE_SYNC(commit).stats.ok; +		commit_fail = +			c->stats.commit_fail - STATE_SYNC(commit).stats.fail; + +		/* log results */ +		dlog(LOG_NOTICE, "Committed %u new expectations", commit_ok); + +		if (commit_fail) +			dlog(LOG_NOTICE, "%u expectations can't be " +					 "committed", commit_fail); + +		dlog(LOG_NOTICE, "commit has taken %lu.%06lu seconds", +			res.tv_sec, res.tv_usec); + +		/* prepare the state machine for new commits */ +		STATE_SYNC(commit).current = 0; +		STATE_SYNC(commit).state = COMMIT_STATE_INACTIVE; + +		return 0; +	} +	return 1; +} + +static struct nethdr * +cache_exp_build_msg(const struct cache_object *obj, int type) +{ +	return BUILD_NETMSG_FROM_EXP(obj->ptr, type); +} + +/* template to cache expectations coming from the kernel. */ +struct cache_ops cache_sync_internal_exp_ops = { +	.hash		= cache_exp_hash, +	.cmp		= cache_exp_cmp, +	.alloc		= cache_exp_alloc, +	.free		= cache_exp_free, +	.copy		= cache_exp_copy, +	.dump_step	= cache_exp_dump_step, +	.commit		= NULL, +	.build_msg	= cache_exp_build_msg, +}; + +/* template to cache expectations coming from the network. */ +struct cache_ops cache_sync_external_exp_ops = { +	.hash		= cache_exp_hash, +	.cmp		= cache_exp_cmp, +	.alloc		= cache_exp_alloc, +	.free		= cache_exp_free, +	.copy		= cache_exp_copy, +	.dump_step	= cache_exp_dump_step, +	.commit		= cache_exp_commit, +	.build_msg	= NULL, +}; diff --git a/src/external_cache.c b/src/external_cache.c index 3f896a0..e290249 100644 --- a/src/external_cache.c +++ b/src/external_cache.c @@ -26,6 +26,7 @@  #include <stdlib.h>  static struct cache *external; +static struct cache *external_exp;  static int external_cache_init(void)  { @@ -36,12 +37,21 @@ static int external_cache_init(void)  		dlog(LOG_ERR, "can't allocate memory for the external cache");  		return -1;  	} +	external_exp = cache_create("external", CACHE_T_EXP, +				STATE_SYNC(sync)->external_cache_flags, +				NULL, &cache_sync_external_exp_ops); +	if (external_exp == NULL) { +		dlog(LOG_ERR, "can't allocate memory for the external cache"); +		return -1; +	} +  	return 0;  }  static void external_cache_close(void)  {  	cache_destroy(external); +	cache_destroy(external_exp);  }  static void external_cache_ct_new(struct nf_conntrack *ct) @@ -109,6 +119,71 @@ static void external_cache_ct_stats_ext(int fd)  	cache_stats_extended(external, fd);  } +static void external_cache_exp_new(struct nf_expect *exp) +{ +	struct cache_object *obj; +	int id; + +	obj = cache_find(external_exp, exp, &id); +	if (obj == NULL) { +retry: +		obj = cache_object_new(external_exp, exp); +		if (obj == NULL) +			return; + +		if (cache_add(external_exp, obj, id) == -1) { +			cache_object_free(obj); +			return; +		} +	} else { +		cache_del(external_exp, obj); +		cache_object_free(obj); +		goto retry; +	} +} + +static void external_cache_exp_upd(struct nf_expect *exp) +{ +	cache_update_force(external_exp, exp); +} + +static void external_cache_exp_del(struct nf_expect *exp) +{ +	struct cache_object *obj; +	int id; + +	obj = cache_find(external_exp, exp, &id); +	if (obj) { +		cache_del(external_exp, obj); +		cache_object_free(obj); +	} +} + +static void external_cache_exp_dump(int fd, int type) +{ +	cache_dump(external_exp, fd, type); +} + +static int external_cache_exp_commit(struct nfct_handle *h, int fd) +{ +	return cache_commit(external_exp, h, fd); +} + +static void external_cache_exp_flush(void) +{ +	cache_flush(external_exp); +} + +static void external_cache_exp_stats(int fd) +{ +	cache_stats(external_exp, fd); +} + +static void external_cache_exp_stats_ext(int fd) +{ +	cache_stats_extended(external_exp, fd); +} +  struct external_handler external_cache = {  	.init		= external_cache_init,  	.close		= external_cache_close, @@ -122,4 +197,14 @@ struct external_handler external_cache = {  		.stats		= external_cache_ct_stats,  		.stats_ext	= external_cache_ct_stats_ext,  	}, +	.exp = { +		.new		= external_cache_exp_new, +		.upd		= external_cache_exp_upd, +		.del		= external_cache_exp_del, +		.dump		= external_cache_exp_dump, +		.commit		= external_cache_exp_commit, +		.flush		= external_cache_exp_flush, +		.stats		= external_cache_exp_stats, +		.stats_ext	= external_cache_exp_stats_ext, +	},  }; diff --git a/src/external_inject.c b/src/external_inject.c index ba5f3d1..0ad3478 100644 --- a/src/external_inject.c +++ b/src/external_inject.c @@ -42,7 +42,7 @@ struct {  static int external_inject_init(void)  {  	/* handler to directly inject conntracks into kernel-space */ -	inject = nfct_open(CONNTRACK, 0); +	inject = nfct_open(CONFIG(netlink).subsys_id, 0);  	if (inject == NULL) {  		dlog(LOG_ERR, "can't open netlink handler: %s",  		     strerror(errno)); @@ -175,6 +175,89 @@ static void external_inject_ct_stats(int fd)  	send(fd, buf, size, 0);  } +struct { +	uint32_t	add_ok; +	uint32_t	add_fail; +	uint32_t	upd_ok; +	uint32_t	upd_fail; +	uint32_t	del_ok; +	uint32_t	del_fail; +} exp_external_inject_stat; + +static void external_inject_exp_new(struct nf_expect *exp) +{ +	int ret, retry = 1; + +retry: +	if (nl_create_expect(inject, exp, 0) == -1) { +		/* if the state entry exists, we delete and try again */ +		if (errno == EEXIST && retry == 1) { +			ret = nl_destroy_expect(inject, exp); +			if (ret == 0 || (ret == -1 && errno == ENOENT)) { +				if (retry) { +					retry = 0; +					goto retry; +				} +			} +			exp_external_inject_stat.add_fail++; +			dlog(LOG_ERR, "inject-add1: %s", strerror(errno)); +			dlog_exp(STATE(log), exp, NFCT_O_PLAIN); +			return; +		} +		exp_external_inject_stat.add_fail++; +		dlog(LOG_ERR, "inject-add2: %s", strerror(errno)); +		dlog_exp(STATE(log), exp, NFCT_O_PLAIN); +	} else { +		exp_external_inject_stat.add_ok++; +	} +} + +static void external_inject_exp_del(struct nf_expect *exp) +{ +	if (nl_destroy_expect(inject, exp) == -1) { +		if (errno != ENOENT) { +			exp_external_inject_stat.del_fail++; +			dlog(LOG_ERR, "inject-del: %s", strerror(errno)); +			dlog_exp(STATE(log), exp, NFCT_O_PLAIN); +		} +	} else { +		exp_external_inject_stat.del_ok++; +	} +} + +static void external_inject_exp_dump(int fd, int type) +{ +} + +static int external_inject_exp_commit(struct nfct_handle *h, int fd) +{ +	/* close the commit socket. */ +	return LOCAL_RET_OK; +} + +static void external_inject_exp_flush(void) +{ +} + +static void external_inject_exp_stats(int fd) +{ +	char buf[512]; +	int size; + +	size = sprintf(buf, "external inject:\n" +			    "connections created:\t\t%12u\tfailed:\t%12u\n" +			    "connections updated:\t\t%12u\tfailed:\t%12u\n" +			    "connections destroyed:\t\t%12u\tfailed:\t%12u\n\n", +			    exp_external_inject_stat.add_ok, +			    exp_external_inject_stat.add_fail, +			    exp_external_inject_stat.upd_ok, +			    exp_external_inject_stat.upd_fail, +			    exp_external_inject_stat.del_ok, +			    exp_external_inject_stat.del_fail); + +	send(fd, buf, size, 0); +} +  struct external_handler external_inject = {  	.init		= external_inject_init,  	.close		= external_inject_close, @@ -188,4 +271,14 @@ struct external_handler external_inject = {  		.stats		= external_inject_ct_stats,  		.stats_ext	= external_inject_ct_stats,  	}, +	.exp = { +		.new		= external_inject_exp_new, +		.upd		= external_inject_exp_new, +		.del		= external_inject_exp_del, +		.dump		= external_inject_exp_dump, +		.commit		= external_inject_exp_commit, +		.flush		= external_inject_exp_flush, +		.stats		= external_inject_exp_stats, +		.stats_ext	= external_inject_exp_stats, +	},  }; diff --git a/src/filter.c b/src/filter.c index 746a9bb..e8515d6 100644 --- a/src/filter.c +++ b/src/filter.c @@ -405,3 +405,79 @@ int ct_filter_conntrack(const struct nf_conntrack *ct, int userspace)  	return 0;  } + +struct exp_filter { +	struct list_head 	list; +}; + +struct exp_filter *exp_filter_create(void) +{ +	struct exp_filter *f; + +	f = calloc(1, sizeof(struct exp_filter)); +	if (f == NULL) +		return NULL; + +	INIT_LIST_HEAD(&f->list); +	return f; +} + +struct exp_filter_item { +	struct list_head	head; +	char			helper_name[NFCT_HELPER_NAME_MAX]; +}; + +/* this is ugly, but it simplifies read_config_yy.y */ +static struct exp_filter *exp_filter_alloc(void) +{ +	if (STATE(exp_filter) == NULL) { +		STATE(exp_filter) = exp_filter_create(); +		if (STATE(exp_filter) == NULL) { +			fprintf(stderr, "Can't init expectation filtering!\n"); +			return NULL; +		} +	} +	return STATE(exp_filter);; +} + +int exp_filter_add(struct exp_filter *f, const char *helper_name) +{ +	struct exp_filter_item *item; + +	f = exp_filter_alloc(); +	if (f == NULL) +		return -1; + +	list_for_each_entry(item, &f->list, head) { +		if (strncmp(item->helper_name, helper_name, +				NFCT_HELPER_NAME_MAX) == 0) { +			return -1; +		} +	} +	item = calloc(1, sizeof(struct exp_filter_item)); +	if (item == NULL) +		return -1; + +	strncpy(item->helper_name, helper_name, NFCT_HELPER_NAME_MAX); +	list_add(&item->head, &f->list); +	return 0; +} + +int exp_filter_find(struct exp_filter *f, const struct nf_expect *exp) +{ +	struct exp_filter_item *item; + +	if (f == NULL) +		return 0; + +	list_for_each_entry(item, &f->list, head) { +		const char *name = nfexp_get_attr(exp, ATTR_EXP_HELPER_NAME); + +		/* we allow partial matching to support things like sip-PORT. */ +		if (strncmp(item->helper_name, name, +				strlen(item->helper_name)) == 0) { +			return 1; +		} +	} +	return 0; +} diff --git a/src/internal_bypass.c b/src/internal_bypass.c index 98717f3..5c83c21 100644 --- a/src/internal_bypass.c +++ b/src/internal_bypass.c @@ -52,7 +52,7 @@ static void internal_bypass_ct_dump(int fd, int type)  	u_int32_t family = AF_UNSPEC;  	int ret; -	h = nfct_open(CONNTRACK, 0); +	h = nfct_open(CONFIG(netlink).subsys_id, 0);  	if (h == NULL) {  		dlog(LOG_ERR, "can't allocate memory for the internal cache");  		return; @@ -151,6 +151,138 @@ static int internal_bypass_ct_event_del(struct nf_conntrack *ct, int origin)  	return 1;  } +static int +internal_bypass_exp_dump_cb(enum nf_conntrack_msg_type type, +			    struct nf_expect *exp, void *data) +{ +	char buf[1024]; +	int size, *fd = data; +	const struct nf_conntrack *master = +		nfexp_get_attr(exp, ATTR_EXP_MASTER); + +	if (!exp_filter_find(STATE(exp_filter), exp)) +		return NFCT_CB_CONTINUE; + +	if (ct_filter_conntrack(master, 1)) +		return NFCT_CB_CONTINUE; + +	size = nfexp_snprintf(buf, 1024, exp, +			      NFCT_T_UNKNOWN, NFCT_O_DEFAULT, 0); +	if (size < 1024) { +		buf[size] = '\n'; +		size++; +	} +	send(*fd, buf, size, 0); + +	return NFCT_CB_CONTINUE; +} + +static void internal_bypass_exp_dump(int fd, int type) +{ +	struct nfct_handle *h; +	u_int32_t family = AF_UNSPEC; +	int ret; + +	h = nfct_open(CONFIG(netlink).subsys_id, 0); +	if (h == NULL) { +		dlog(LOG_ERR, "can't allocate memory for the internal cache"); +		return; +	} +	nfexp_callback_register(h, NFCT_T_ALL, +				internal_bypass_exp_dump_cb, &fd); +	ret = nfexp_query(h, NFCT_Q_DUMP, &family); +	if (ret == -1) { +		dlog(LOG_ERR, "can't dump kernel table"); +	} +	nfct_close(h); +} + +static void internal_bypass_exp_flush(void) +{ +	nl_flush_expect_table(STATE(flush)); +} + +struct { +	uint32_t	new; +	uint32_t	upd; +	uint32_t	del; +} exp_internal_bypass_stats; + +static void internal_bypass_exp_stats(int fd) +{ +	char buf[512]; +	int size; + +	size = sprintf(buf, "internal bypass:\n" +			    "connections new:\t\t%12u\n" +			    "connections updated:\t\t%12u\n" +			    "connections destroyed:\t\t%12u\n\n", +			    exp_internal_bypass_stats.new, +			    exp_internal_bypass_stats.upd, +			    exp_internal_bypass_stats.del); + +	send(fd, buf, size, 0); +} + +/* unused, INTERNAL_F_POPULATE is unset. No cache, nothing to populate. */ +static void internal_bypass_exp_populate(struct nf_expect *exp) +{ +} + +/* unused, INTERNAL_F_RESYNC is unset. */ +static void internal_bypass_exp_purge(void) +{ +} + +/* unused, INTERNAL_F_RESYNC is unset. Nothing to resync, we have no cache. */ +static int +internal_bypass_exp_resync(enum nf_conntrack_msg_type type, +			   struct nf_expect *exp, void *data) +{ +	return NFCT_CB_CONTINUE; +} + +static void internal_bypass_exp_event_new(struct nf_expect *exp, int origin) +{ +	struct nethdr *net; + +	/* this event has been triggered by me, skip */ +	if (origin != CTD_ORIGIN_NOT_ME) +		return; + +	net = BUILD_NETMSG_FROM_EXP(exp, NET_T_STATE_EXP_NEW); +	multichannel_send(STATE_SYNC(channel), net); +	exp_internal_bypass_stats.new++; +} + +static void internal_bypass_exp_event_upd(struct nf_expect *exp, int origin) +{ +	struct nethdr *net; + +	/* this event has been triggered by me, skip */ +	if (origin != CTD_ORIGIN_NOT_ME) +		return; + +	net = BUILD_NETMSG_FROM_EXP(exp, NET_T_STATE_EXP_UPD); +	multichannel_send(STATE_SYNC(channel), net); +	exp_internal_bypass_stats.upd++; +} + +static int internal_bypass_exp_event_del(struct nf_expect *exp, int origin) +{ +	struct nethdr *net; + +	/* this event has been triggered by me, skip */ +	if (origin != CTD_ORIGIN_NOT_ME) +		return 1; + +	net = BUILD_NETMSG_FROM_EXP(exp, NET_T_STATE_EXP_DEL); +	multichannel_send(STATE_SYNC(channel), net); +	exp_internal_bypass_stats.del++; + +	return 1; +} +  struct internal_handler internal_bypass = {  	.init			= internal_bypass_init,  	.close			= internal_bypass_close, @@ -166,4 +298,16 @@ struct internal_handler internal_bypass = {  		.upd			= internal_bypass_ct_event_upd,  		.del			= internal_bypass_ct_event_del,  	}, +	.exp = { +		.dump			= internal_bypass_exp_dump, +		.flush			= internal_bypass_exp_flush, +		.stats			= internal_bypass_exp_stats, +		.stats_ext		= internal_bypass_exp_stats, +		.populate		= internal_bypass_exp_populate, +		.purge			= internal_bypass_exp_purge, +		.resync			= internal_bypass_exp_resync, +		.new			= internal_bypass_exp_event_new, +		.upd			= internal_bypass_exp_event_upd, +		.del			= internal_bypass_exp_event_del, +	},  }; diff --git a/src/internal_cache.c b/src/internal_cache.c index 952327d..ba2d74b 100644 --- a/src/internal_cache.c +++ b/src/internal_cache.c @@ -32,12 +32,25 @@ static int internal_cache_init(void)  		dlog(LOG_ERR, "can't allocate memory for the internal cache");  		return -1;  	} + +	STATE(mode)->internal->exp.data = +		cache_create("internal", CACHE_T_EXP, +			STATE_SYNC(sync)->internal_cache_flags, +			STATE_SYNC(sync)->internal_cache_extra, +			&cache_sync_internal_exp_ops); + +	if (!STATE(mode)->internal->exp.data) { +		dlog(LOG_ERR, "can't allocate memory for the internal cache"); +		return -1; +	} +  	return 0;  }  static void internal_cache_close(void)  {  	cache_destroy(STATE(mode)->internal->ct.data); +	cache_destroy(STATE(mode)->internal->exp.data);  }  static void internal_cache_ct_dump(int fd, int type) @@ -203,6 +216,154 @@ static int internal_cache_ct_event_del(struct nf_conntrack *ct, int origin)  	return 1;  } +static void internal_cache_exp_dump(int fd, int type) +{ +	cache_dump(STATE(mode)->internal->exp.data, fd, type); +} + +static void internal_cache_exp_flush(void) +{ +	cache_flush(STATE(mode)->internal->exp.data); +} + +static void internal_cache_exp_stats(int fd) +{ +	cache_stats(STATE(mode)->internal->exp.data, fd); +} + +static void internal_cache_exp_stats_ext(int fd) +{ +	cache_stats_extended(STATE(mode)->internal->exp.data, fd); +} + +static void internal_cache_exp_populate(struct nf_expect *exp) +{ +	cache_update_force(STATE(mode)->internal->exp.data, exp); +} + +static int internal_cache_exp_purge_step(void *data1, void *data2) +{ +	struct cache_object *obj = data2; + +	STATE(get_retval) = 0; +	nl_get_expect(STATE(get), obj->ptr);	/* modifies STATE(get_reval) */ +	if (!STATE(get_retval)) { +		if (obj->status != C_OBJ_DEAD) { +			cache_object_set_status(obj, C_OBJ_DEAD); +			sync_send(obj, NET_T_STATE_EXP_DEL); +			cache_object_put(obj); +		} +	} + +	return 0; +} + +static void internal_cache_exp_purge(void) +{ +	cache_iterate(STATE(mode)->internal->exp.data, NULL, +			internal_cache_exp_purge_step); +} + +static int +internal_cache_exp_resync(enum nf_conntrack_msg_type type, +			  struct nf_expect *exp, void *data) +{ +	struct cache_object *obj; +	const struct nf_conntrack *master = +		nfexp_get_attr(exp, ATTR_EXP_MASTER); + +	if (!exp_filter_find(STATE(exp_filter), exp)) +		return NFCT_CB_CONTINUE; + +	if (ct_filter_conntrack(master, 1)) +		return NFCT_CB_CONTINUE; + +	obj = cache_update_force(STATE(mode)->internal->exp.data, exp); +	if (obj == NULL) +		return NFCT_CB_CONTINUE; + +	switch (obj->status) { +	case C_OBJ_NEW: +		sync_send(obj, NET_T_STATE_EXP_NEW); +		break; +	case C_OBJ_ALIVE: +		sync_send(obj, NET_T_STATE_EXP_UPD); +		break; +	} +	return NFCT_CB_CONTINUE; +} + +static void internal_cache_exp_event_new(struct nf_expect *exp, int origin) +{ +	struct cache_object *obj; +	int id; + +	/* this event has been triggered by a direct inject, skip */ +	if (origin == CTD_ORIGIN_INJECT) +		return; + +	obj = cache_find(STATE(mode)->internal->exp.data, exp, &id); +	if (obj == NULL) { +retry: +		obj = cache_object_new(STATE(mode)->internal->exp.data, exp); +		if (obj == NULL) +			return; +		if (cache_add(STATE(mode)->internal->exp.data, obj, id) == -1) { +			cache_object_free(obj); +			return; +		} +		/* only synchronize events that have been triggered by other +		 * processes or the kernel, but don't propagate events that +		 * have been triggered by conntrackd itself, eg. commits. */ +		if (origin == CTD_ORIGIN_NOT_ME) +			sync_send(obj, NET_T_STATE_EXP_NEW); +	} else { +		cache_del(STATE(mode)->internal->exp.data, obj); +		cache_object_free(obj); +		goto retry; +	} +} + +static void internal_cache_exp_event_upd(struct nf_expect *exp, int origin) +{ +	struct cache_object *obj; + +	/* this event has been triggered by a direct inject, skip */ +	if (origin == CTD_ORIGIN_INJECT) +		return; + +	obj = cache_update_force(STATE(mode)->internal->exp.data, exp); +	if (obj == NULL) +		return; + +	if (origin == CTD_ORIGIN_NOT_ME) +		sync_send(obj, NET_T_STATE_EXP_UPD); +} + +static int internal_cache_exp_event_del(struct nf_expect *exp, int origin) +{ +	struct cache_object *obj; +	int id; + +	/* this event has been triggered by a direct inject, skip */ +	if (origin == CTD_ORIGIN_INJECT) +		return 0; + +	/* we don't synchronize events for objects that are not in the cache */ +	obj = cache_find(STATE(mode)->internal->exp.data, exp, &id); +	if (obj == NULL) +		return 0; + +	if (obj->status != C_OBJ_DEAD) { +		cache_object_set_status(obj, C_OBJ_DEAD); +		if (origin == CTD_ORIGIN_NOT_ME) { +			sync_send(obj, NET_T_STATE_EXP_DEL); +		} +		cache_object_put(obj); +	} +	return 1; +} +  struct internal_handler internal_cache = {  	.flags			= INTERNAL_F_POPULATE | INTERNAL_F_RESYNC,  	.init			= internal_cache_init, @@ -219,4 +380,16 @@ struct internal_handler internal_cache = {  		.upd			= internal_cache_ct_event_upd,  		.del			= internal_cache_ct_event_del,  	}, +	.exp = { +		.dump			= internal_cache_exp_dump, +		.flush			= internal_cache_exp_flush, +		.stats			= internal_cache_exp_stats, +		.stats_ext		= internal_cache_exp_stats_ext, +		.populate		= internal_cache_exp_populate, +		.purge			= internal_cache_exp_purge, +		.resync			= internal_cache_exp_resync, +		.new			= internal_cache_exp_event_new, +		.upd			= internal_cache_exp_event_upd, +		.del			= internal_cache_exp_event_del, +	},  }; @@ -145,6 +145,43 @@ void dlog_ct(FILE *fd, struct nf_conntrack *ct, unsigned int type)  	}  } +void dlog_exp(FILE *fd, struct nf_expect *exp, unsigned int type) +{ +	time_t t; +	char buf[1024]; +	char *tmp; +	unsigned int flags = 0; + +	buf[0]='\0'; + +	switch(type) { +	case NFCT_O_PLAIN: +		t = time(NULL); +		ctime_r(&t, buf); +		tmp = buf + strlen(buf); +		buf[strlen(buf)-1]='\t'; +		break; +	default: +		return; +	} +	nfexp_snprintf(buf+strlen(buf), 1024-strlen(buf), exp, 0, type, flags); + +	if (fd) { +		snprintf(buf+strlen(buf), 1024-strlen(buf), "\n"); +		fputs(buf, fd); +	} + +	if (fd == STATE(log)) { +		/* error reporting */ +		if (CONFIG(syslog_facility) != -1) +			syslog(LOG_ERR, "%s", tmp); +	} else if (fd == STATE(stats_log)) { +		/* connection logging */ +		if (CONFIG(stats).syslog_facility != -1) +			syslog(LOG_INFO, "%s", tmp); +	} +} +  void close_log(void)  {  	if (STATE(log) != NULL) @@ -1,5 +1,6 @@  /* - * (C) 2006-2007 by Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2006-2011 by Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2011 by Vyatta Inc. <http://www.vyatta.com>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License as published by @@ -38,14 +39,15 @@ static const char usage_daemon_commands[] =  static const char usage_client_commands[] =  	"Client mode commands:\n" -	"  -c, commit external cache to conntrack table\n" +	"  -c [ct|expect], commit external cache to conntrack table\n"  	"  -f [|internal|external], flush internal and external cache\n" -	"  -F, flush kernel conntrack table\n" -	"  -i, display content of the internal cache\n" -	"  -e, display the content of the external cache\n" +	"  -F [ct|expect], flush kernel conntrack table\n" +	"  -i [ct|expect], display content of the internal cache\n" +	"  -e [ct|expect], display the content of the external cache\n"  	"  -k, kill conntrack daemon\n" -	"  -s  [|network|cache|runtime|link|rsqueue|queue], dump statistics\n" -	"  -R, resync with kernel conntrack table\n" +	"  -s  [|network|cache|runtime|link|rsqueue|queue|ct|expect], " +		"dump statistics\n" +	"  -R [ct|expect], resync with kernel conntrack table\n"  	"  -n, request resync with other node (only FT-FW and NOTRACK modes)\n"  	"  -x, dump cache in XML format (requires -i or -e)\n"  	"  -t, reset the kernel timeout (see PurgeTimeout clause)\n" @@ -89,6 +91,25 @@ set_operation_mode(int *current, int want, char *argv[])  	}  } +static int +set_action_by_table(int i, int argc, char *argv[], +		    int ct_action, int exp_action, int dfl_action, int *action) +{ +	if (i+1 < argc && argv[i+1][0] != '-') { +		if (strncmp(argv[i+1], "ct", strlen(argv[i+1])) == 0) { +			*action = ct_action; +			i++; +		} else if (strncmp(argv[i+1], "expect", +						strlen(argv[i+1])) == 0) { +			*action = exp_action; +			i++; +		} +	} else +		*action = dfl_action; + +	return i; +} +  int main(int argc, char *argv[])  {  	int ret, i, action = -1; @@ -115,15 +136,23 @@ int main(int argc, char *argv[])  			break;  		case 'c':  			set_operation_mode(&type, REQUEST, argv); -			action = CT_COMMIT; +			i = set_action_by_table(i, argc, argv, +						CT_COMMIT, EXP_COMMIT, +						ALL_COMMIT, &action);  			break;  		case 'i':  			set_operation_mode(&type, REQUEST, argv); -			action = CT_DUMP_INTERNAL; +			i = set_action_by_table(i, argc, argv, +						CT_DUMP_INTERNAL, +						EXP_DUMP_INTERNAL, +						CT_DUMP_INTERNAL, &action);  			break;  		case 'e':  			set_operation_mode(&type, REQUEST, argv); -			action = CT_DUMP_EXTERNAL; +			i = set_action_by_table(i, argc, argv, +						CT_DUMP_EXTERNAL, +						EXP_DUMP_EXTERNAL, +						CT_DUMP_EXTERNAL, &action);  			break;  		case 'C':  			if (++i < argc) { @@ -142,7 +171,10 @@ int main(int argc, char *argv[])  			break;  		case 'F':  			set_operation_mode(&type, REQUEST, argv); -			action = CT_FLUSH_MASTER; +			i = set_action_by_table(i, argc, argv, +						CT_FLUSH_MASTER, +						EXP_FLUSH_MASTER, +						ALL_FLUSH_MASTER, &action);  			break;  		case 'f':  			set_operation_mode(&type, REQUEST, argv); @@ -164,12 +196,15 @@ int main(int argc, char *argv[])  				}  			} else {  				/* default to general flushing */ -				action = CT_FLUSH_CACHE; +				action = ALL_FLUSH_CACHE;  			}  			break;  		case 'R':  			set_operation_mode(&type, REQUEST, argv); -			action = CT_RESYNC_MASTER; +			i = set_action_by_table(i, argc, argv, +						CT_RESYNC_MASTER, +						EXP_RESYNC_MASTER, +						ALL_RESYNC_MASTER, &action);  			break;  		case 'B':  			set_operation_mode(&type, REQUEST, argv); @@ -222,6 +257,14 @@ int main(int argc, char *argv[])  						strlen(argv[i+1])) == 0) {  					action = STATS_QUEUE;  					i++; +				} else if (strncmp(argv[i+1], "ct", +						strlen(argv[i+1])) == 0) { +					action = STATS; +					i++; +				} else if (strncmp(argv[i+1], "expect", +						strlen(argv[i+1])) == 0) { +					action = EXP_STATS; +					i++;  				} else {  					fprintf(stderr, "ERROR: unknown "  							"parameter `%s' for " diff --git a/src/netlink.c b/src/netlink.c index 60274f3..fe979e3 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -1,5 +1,6 @@  /* - * (C) 2006 by Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2006-2011 by Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2011 by Vyatta Inc. <http://www.vyatta.com>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License as published by @@ -32,7 +33,7 @@ struct nfct_handle *nl_init_event_handler(void)  {  	struct nfct_handle *h; -	h = nfct_open(CONNTRACK, NFCT_ALL_CT_GROUPS); +	h = nfct_open(CONFIG(netlink).subsys_id, CONFIG(netlink).groups);  	if (h == NULL)  		return NULL; @@ -301,3 +302,61 @@ int nl_destroy_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct)  {  	return nfct_query(h, NFCT_Q_DESTROY, ct);  } + +int nl_create_expect(struct nfct_handle *h, const struct nf_expect *orig, +		     int timeout) +{ +	int ret; +	struct nf_expect *exp; + +	exp = nfexp_clone(orig); +	if (exp == NULL) +		return -1; + +	if (timeout > 0) +		nfexp_set_attr_u32(exp, ATTR_EXP_TIMEOUT, timeout); + +	ret = nfexp_query(h, NFCT_Q_CREATE, exp); +	nfexp_destroy(exp); + +	return ret; +} + +int nl_destroy_expect(struct nfct_handle *h, const struct nf_expect *exp) +{ +	return nfexp_query(h, NFCT_Q_DESTROY, exp); +} + +/* if the handle has no callback, check for existence, otherwise, update */ +int nl_get_expect(struct nfct_handle *h, const struct nf_expect *exp) +{ +	int ret = 1; +	struct nf_expect *tmp; + +	/* XXX: we only need the expectation, not the mask and the master. */ +	tmp = nfexp_clone(exp); +	if (tmp == NULL) +		return -1; + +	if (nfexp_query(h, NFCT_Q_GET, tmp) == -1) +		ret = (errno == ENOENT) ? 0 : -1; + +	nfexp_destroy(tmp); +	return ret; +} + +int nl_dump_expect_table(struct nfct_handle *h) +{ +	return nfexp_query(h, NFCT_Q_DUMP, &CONFIG(family)); +} + +int nl_flush_expect_table(struct nfct_handle *h) +{ +	return nfexp_query(h, NFCT_Q_FLUSH, &CONFIG(family)); +} + +int nl_send_expect_resync(struct nfct_handle *h) +{ +	int family = CONFIG(family); +	return nfexp_send(h, NFCT_Q_DUMP, &family); +} diff --git a/src/network.c b/src/network.c index cadc466..13db37c 100644 --- a/src/network.c +++ b/src/network.c @@ -126,6 +126,11 @@ static int status2type[CACHE_T_MAX][C_OBJ_MAX] = {  		[C_OBJ_ALIVE]	= NET_T_STATE_CT_UPD,  		[C_OBJ_DEAD]	= NET_T_STATE_CT_DEL,  	}, +	[CACHE_T_EXP] = { +		[C_OBJ_NEW]	= NET_T_STATE_EXP_NEW, +		[C_OBJ_ALIVE]	= NET_T_STATE_EXP_UPD, +		[C_OBJ_DEAD]	= NET_T_STATE_EXP_DEL, +	},  };  int object_status_to_network_type(struct cache_object *obj) diff --git a/src/parse.c b/src/parse.c index 0718128..81e9c6b 100644 --- a/src/parse.c +++ b/src/parse.c @@ -248,3 +248,195 @@ int msg2ct(struct nf_conntrack *ct, struct nethdr *net, size_t remain)  	return 0;  } + +static void exp_parse_ct_group(void *ct, int attr, void *data); +static void exp_parse_ct_u8(void *ct, int attr, void *data); +static void exp_parse_u32(void *exp, int attr, void *data); + +static struct exp_parser { +	void 	(*parse)(void *obj, int attr, void *data); +	int 	exp_attr; +	int 	ct_attr; +	int	size; +} exp_h[NTA_EXP_MAX] = { +	[NTA_EXP_MASTER_IPV4] = { +		.parse		= exp_parse_ct_group, +		.exp_attr	= ATTR_EXP_MASTER, +		.ct_attr	= ATTR_GRP_ORIG_IPV4, +		.size		= NTA_SIZE(sizeof(struct nfct_attr_grp_ipv4)), +	}, +	[NTA_EXP_MASTER_IPV6] = { +		.parse		= exp_parse_ct_group, +		.exp_attr	= ATTR_EXP_MASTER, +		.ct_attr	= ATTR_GRP_ORIG_IPV6, +		.size		= NTA_SIZE(sizeof(struct nfct_attr_grp_ipv6)), +	}, +	[NTA_EXP_MASTER_L4PROTO] = { +		.parse		= exp_parse_ct_u8, +		.exp_attr	= ATTR_EXP_MASTER, +		.ct_attr	= ATTR_L4PROTO, +		.size		= NTA_SIZE(sizeof(uint8_t)), +	}, +	[NTA_EXP_MASTER_PORT] = { +		.parse		= exp_parse_ct_group, +		.exp_attr	= ATTR_EXP_MASTER, +		.ct_attr	= ATTR_GRP_ORIG_PORT, +		.size		= NTA_SIZE(sizeof(struct nfct_attr_grp_port)), +	}, +	[NTA_EXP_EXPECT_IPV4] = { +		.parse		= exp_parse_ct_group, +		.exp_attr	= ATTR_EXP_EXPECTED, +		.ct_attr	= ATTR_GRP_ORIG_IPV4, +		.size		= NTA_SIZE(sizeof(struct nfct_attr_grp_ipv4)), +	}, +	[NTA_EXP_EXPECT_IPV6] = { +		.parse		= exp_parse_ct_group, +		.exp_attr	= ATTR_EXP_EXPECTED, +		.ct_attr	= ATTR_GRP_ORIG_IPV6, +		.size		= NTA_SIZE(sizeof(struct nfct_attr_grp_ipv6)), +	}, +	[NTA_EXP_EXPECT_L4PROTO] = { +		.parse		= exp_parse_ct_u8, +		.exp_attr	= ATTR_EXP_EXPECTED, +		.ct_attr	= ATTR_L4PROTO, +		.size		= NTA_SIZE(sizeof(uint8_t)), +	}, +	[NTA_EXP_EXPECT_PORT] = { +		.parse		= exp_parse_ct_group, +		.exp_attr	= ATTR_EXP_EXPECTED, +		.ct_attr	= ATTR_GRP_ORIG_PORT, +		.size		= NTA_SIZE(sizeof(struct nfct_attr_grp_port)), +	}, +	[NTA_EXP_MASK_IPV4] = { +		.parse		= exp_parse_ct_group, +		.exp_attr	= ATTR_EXP_MASK, +		.ct_attr	= ATTR_GRP_ORIG_IPV4, +		.size		= NTA_SIZE(sizeof(struct nfct_attr_grp_ipv4)), +	}, +	[NTA_EXP_MASK_IPV6] = { +		.parse		= exp_parse_ct_group, +		.exp_attr	= ATTR_EXP_MASK, +		.ct_attr	= ATTR_GRP_ORIG_IPV6, +		.size		= NTA_SIZE(sizeof(struct nfct_attr_grp_ipv6)), +	}, +	[NTA_EXP_MASK_L4PROTO] = { +		.parse		= exp_parse_ct_u8, +		.exp_attr	= ATTR_EXP_MASK, +		.ct_attr	= ATTR_L4PROTO, +		.size		= NTA_SIZE(sizeof(uint8_t)), +	}, +	[NTA_EXP_MASK_PORT] = { +		.parse		= exp_parse_ct_group, +		.exp_attr	= ATTR_EXP_MASK, +		.ct_attr	= ATTR_GRP_ORIG_PORT, +		.size		= NTA_SIZE(sizeof(struct nfct_attr_grp_port)), +	}, +	[NTA_EXP_TIMEOUT] = { +		.parse		= exp_parse_u32, +		.exp_attr	= ATTR_EXP_TIMEOUT, +		.size		= NTA_SIZE(sizeof(uint32_t)), +	}, +	[NTA_EXP_FLAGS] = { +		.parse		= exp_parse_u32, +		.exp_attr	= ATTR_EXP_FLAGS, +		.size		= NTA_SIZE(sizeof(uint32_t)), +	}, +}; + +static void exp_parse_ct_group(void *ct, int attr, void *data) +{ +	nfct_set_attr_grp(ct, exp_h[attr].ct_attr, data); +} + +static void exp_parse_ct_u8(void *ct, int attr, void *data) +{ +	uint8_t *value = (uint8_t *) data; +	nfct_set_attr_u8(ct, exp_h[attr].ct_attr, *value); +} + +static void exp_parse_u32(void *exp, int attr, void *data) +{ +	uint32_t *value = (uint32_t *) data; +	nfexp_set_attr_u32(exp, exp_h[attr].exp_attr, ntohl(*value)); +} + +int msg2exp(struct nf_expect *exp, struct nethdr *net, size_t remain) +{ +	int len; +	struct netattr *attr; +	struct nf_conntrack *master, *expected, *mask; + +	if (remain < net->len) +		return -1; + +	len = net->len - NETHDR_SIZ; +	attr = NETHDR_DATA(net); + +	master = nfct_new(); +	if (master == NULL) +		goto err_master; + +	expected = nfct_new(); +	if (expected == NULL) +		goto err_expected; + +	mask = nfct_new(); +	if (mask == NULL) +		goto err_mask; + +	while (len > ssizeof(struct netattr)) { +		ATTR_NETWORK2HOST(attr); +		if (attr->nta_len > len) +			goto err; +		if (attr->nta_attr > NTA_MAX) +			goto err; +		if (attr->nta_len != exp_h[attr->nta_attr].size) +			goto err; +		if (exp_h[attr->nta_attr].parse == NULL) { +			attr = NTA_NEXT(attr, len); +			continue; +		} +		switch(exp_h[attr->nta_attr].exp_attr) { +		case ATTR_EXP_MASTER: +			exp_h[attr->nta_attr].parse(master, attr->nta_attr, +						    NTA_DATA(attr)); +		case ATTR_EXP_EXPECTED: +			exp_h[attr->nta_attr].parse(expected, attr->nta_attr, +						    NTA_DATA(attr)); +		case ATTR_EXP_MASK: +			exp_h[attr->nta_attr].parse(mask, attr->nta_attr, +						    NTA_DATA(attr)); +			break; +		case ATTR_EXP_TIMEOUT: +		case ATTR_EXP_FLAGS: +			exp_h[attr->nta_attr].parse(exp, attr->nta_attr, +						    NTA_DATA(attr)); +			break; +		} +		attr = NTA_NEXT(attr, len); +	} + +	nfexp_set_attr(exp, ATTR_EXP_MASTER, master); +	nfexp_set_attr(exp, ATTR_EXP_EXPECTED, expected); +	nfexp_set_attr(exp, ATTR_EXP_MASK, mask); + +	/* We can release the conntrack objects at this point because the +	 * setter makes a copy of them. This is not efficient, it would be +	 * better to save that extra copy but this is how the library works. +	 * I'm sorry, I cannot change it without breaking backward +	 * compatibility. Probably it is a good idea to think of adding new +	 * interfaces in the near future to get it better. */ +	nfct_destroy(mask); +	nfct_destroy(expected); +	nfct_destroy(master); + +	return 0; +err: +	nfct_destroy(mask); +err_mask: +	nfct_destroy(expected); +err_expected: +	nfct_destroy(master); +err_master: +	return -1; +} diff --git a/src/read_config_lex.l b/src/read_config_lex.l index be6bf8b..01fe4fc 100644 --- a/src/read_config_lex.l +++ b/src/read_config_lex.l @@ -140,6 +140,7 @@ notrack		[N|n][O|o][T|t][R|r][A|a][C|c][K|k]  "DisableExternalCache"		{ return T_DISABLE_EXTERNAL_CACHE; }  "Options"			{ return T_OPTIONS; }  "TCPWindowTracking"		{ return T_TCP_WINDOW_TRACKING; } +"ExpectationSync"		{ return T_EXPECT_SYNC; }  "ErrorQueueLength"		{ return T_ERROR_QUEUE_LENGTH; }  {is_on}			{ return T_ON; } diff --git a/src/read_config_yy.y b/src/read_config_yy.y index 68a83f7..b22784c 100644 --- a/src/read_config_yy.y +++ b/src/read_config_yy.y @@ -73,7 +73,7 @@ static void __max_dedicated_links_reached(void);  %token T_NETLINK_OVERRUN_RESYNC T_NICE T_IPV4_DEST_ADDR T_IPV6_DEST_ADDR  %token T_SCHEDULER T_TYPE T_PRIO T_NETLINK_EVENTS_RELIABLE  %token T_DISABLE_INTERNAL_CACHE T_DISABLE_EXTERNAL_CACHE T_ERROR_QUEUE_LENGTH -%token T_OPTIONS T_TCP_WINDOW_TRACKING +%token T_OPTIONS T_TCP_WINDOW_TRACKING T_EXPECT_SYNC  %token <string> T_IP T_PATH_VAL  %token <val> T_NUMBER @@ -828,6 +828,46 @@ option: T_TCP_WINDOW_TRACKING T_OFF  	CONFIG(sync).tcp_window_tracking = 0;  }; +option: T_EXPECT_SYNC T_ON +{ +	CONFIG(flags) |= CTD_EXPECT; +	CONFIG(netlink).subsys_id = NFNL_SUBSYS_NONE; +	CONFIG(netlink).groups = NF_NETLINK_CONNTRACK_NEW | +				 NF_NETLINK_CONNTRACK_UPDATE | +				 NF_NETLINK_CONNTRACK_DESTROY | +				 NF_NETLINK_CONNTRACK_EXP_NEW | +				 NF_NETLINK_CONNTRACK_EXP_UPDATE | +				 NF_NETLINK_CONNTRACK_EXP_DESTROY; +}; + +option: T_EXPECT_SYNC T_OFF +{ +	CONFIG(netlink).subsys_id = NFNL_SUBSYS_CTNETLINK; +	CONFIG(netlink).groups = NF_NETLINK_CONNTRACK_NEW | +				 NF_NETLINK_CONNTRACK_UPDATE | +				 NF_NETLINK_CONNTRACK_DESTROY; +}; + +option: T_EXPECT_SYNC '{' expect_list '}' +{ +	CONFIG(flags) |= CTD_EXPECT; +	CONFIG(netlink).subsys_id = NFNL_SUBSYS_NONE; +	CONFIG(netlink).groups = NF_NETLINK_CONNTRACK_NEW | +				 NF_NETLINK_CONNTRACK_UPDATE | +				 NF_NETLINK_CONNTRACK_DESTROY | +				 NF_NETLINK_CONNTRACK_EXP_NEW | +				 NF_NETLINK_CONNTRACK_EXP_UPDATE | +				 NF_NETLINK_CONNTRACK_EXP_DESTROY; +}; + +expect_list: +            | expect_list expect_item ; + +expect_item: T_STRING +{ +	exp_filter_add(STATE(exp_filter), $1); +} +  sync_mode_alarm: T_SYNC_MODE T_ALARM '{' sync_mode_alarm_list '}'  {  	conf.flags |= CTD_SYNC_ALARM; @@ -1598,6 +1638,7 @@ init_config(char *filename)  	/* Zero may be a valid facility */  	CONFIG(syslog_facility) = -1;  	CONFIG(stats).syslog_facility = -1; +	CONFIG(netlink).subsys_id = -1;  	yyrestart(fp);  	yyparse(); @@ -1646,5 +1687,12 @@ init_config(char *filename)  	if (CONFIG(channelc).error_queue_length == 0)  		CONFIG(channelc).error_queue_length = 128; +	if (CONFIG(netlink).subsys_id == -1) { +		CONFIG(netlink).subsys_id = NFNL_SUBSYS_CTNETLINK; +		CONFIG(netlink).groups = NF_NETLINK_CONNTRACK_NEW | +					 NF_NETLINK_CONNTRACK_UPDATE | +					 NF_NETLINK_CONNTRACK_DESTROY; +	} +  	return 0;  } @@ -187,6 +187,62 @@ static void dump_stats_runtime(int fd)  	send(fd, buf, size, 0);  } +static void local_flush_master(void) +{ +	STATE(stats).nl_kernel_table_flush++; +	dlog(LOG_NOTICE, "flushing kernel conntrack table"); + +	/* fork a child process that performs the flush operation, +	 * meanwhile the parent process handles events. */ +	if (fork_process_new(CTD_PROC_FLUSH, CTD_PROC_F_EXCL, +			     NULL, NULL) == 0) { +		nl_flush_conntrack_table(STATE(flush)); +		exit(EXIT_SUCCESS); +	} +} + +static void local_resync_master(void) +{ +	if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { +		STATE(stats).nl_kernel_table_resync++; +		dlog(LOG_NOTICE, "resync with master conntrack table"); +		nl_dump_conntrack_table(STATE(dump)); +	} else { +		dlog(LOG_NOTICE, "resync is unsupported in this mode"); +	} +} + +static void local_exp_flush_master(void) +{ +	if (!(CONFIG(flags) & CTD_EXPECT)) +		return; + +	STATE(stats).nl_kernel_table_flush++; +	dlog(LOG_NOTICE, "flushing kernel expect table"); + +	/* fork a child process that performs the flush operation, +	 * meanwhile the parent process handles events. */ +	if (fork_process_new(CTD_PROC_FLUSH, CTD_PROC_F_EXCL, +			     NULL, NULL) == 0) { +		nl_flush_expect_table(STATE(flush)); +		exit(EXIT_SUCCESS); +	} +} + +static void local_exp_resync_master(void) +{ +	if (!(CONFIG(flags) & CTD_EXPECT)) +		return; + +	if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { +		STATE(stats).nl_kernel_table_resync++; +		dlog(LOG_NOTICE, "resync with master expect table"); +		nl_dump_expect_table(STATE(dump)); +	} else { +		dlog(LOG_NOTICE, "resync is unsupported in this mode"); +	} +} +  static int local_handler(int fd, void *data)  {  	int ret = LOCAL_RET_OK; @@ -198,25 +254,24 @@ static int local_handler(int fd, void *data)  	}  	switch(type) {  	case CT_FLUSH_MASTER: -		STATE(stats).nl_kernel_table_flush++; -		dlog(LOG_NOTICE, "flushing kernel conntrack table"); - -		/* fork a child process that performs the flush operation, -		 * meanwhile the parent process handles events. */ -		if (fork_process_new(CTD_PROC_FLUSH, CTD_PROC_F_EXCL, -				     NULL, NULL) == 0) { -			nl_flush_conntrack_table(STATE(flush)); -			exit(EXIT_SUCCESS); -		} +		local_flush_master();  		break;  	case CT_RESYNC_MASTER: -		if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { -			STATE(stats).nl_kernel_table_resync++; -			dlog(LOG_NOTICE, "resync with master table"); -			nl_dump_conntrack_table(STATE(dump)); -		} else { -			dlog(LOG_NOTICE, "resync is unsupported in this mode"); -		} +		local_resync_master(); +		break; +	case EXP_FLUSH_MASTER: +		local_exp_flush_master(); +		break; +	case EXP_RESYNC_MASTER: +		local_exp_resync_master(); +		break; +	case ALL_FLUSH_MASTER: +		local_flush_master(); +		local_exp_flush_master(); +		break; +	case ALL_RESYNC_MASTER: +		local_resync_master(); +		local_exp_resync_master();  		break;  	case STATS_RUNTIME:  		dump_stats_runtime(fd); @@ -245,7 +300,11 @@ static void do_polling_alarm(struct alarm_block *a, void *data)  	if (STATE(mode)->internal->ct.purge)  		STATE(mode)->internal->ct.purge(); +	if (STATE(mode)->internal->exp.purge) +		STATE(mode)->internal->exp.purge(); +  	nl_send_resync(STATE(resync)); +	nl_send_expect_resync(STATE(resync));  	add_alarm(&STATE(polling_alarm), CONFIG(poll_kernel_secs), 0);  } @@ -290,6 +349,49 @@ out:  		return NFCT_CB_CONTINUE;  } +static int exp_event_handler(const struct nlmsghdr *nlh, +			     enum nf_conntrack_msg_type type, +			     struct nf_expect *exp, +			     void *data) +{ +	int origin_type; +	const struct nf_conntrack *master = +		nfexp_get_attr(exp, ATTR_EXP_MASTER); + +	STATE(stats).nl_events_received++; + +	if (!exp_filter_find(STATE(exp_filter), exp)) { +		STATE(stats).nl_events_filtered++; +		goto out; +	} +	if (ct_filter_conntrack(master, 1)) +		return NFCT_CB_CONTINUE; + +	origin_type = origin_find(nlh); + +	switch(type) { +	case NFCT_T_NEW: +		STATE(mode)->internal->exp.new(exp, origin_type); +		break; +	case NFCT_T_UPDATE: +		STATE(mode)->internal->exp.upd(exp, origin_type); +		break; +	case NFCT_T_DESTROY: +		STATE(mode)->internal->exp.del(exp, origin_type); +		break; +	default: +		STATE(stats).nl_events_unknown_type++; +		break; +	} + +out: +	/* we reset the iteration limiter in the main select loop. */ +	if (STATE(event_iterations_limit)-- <= 0) +		return NFCT_CB_STOP; +	else +		return NFCT_CB_CONTINUE; +} +  static int dump_handler(enum nf_conntrack_msg_type type,  			struct nf_conntrack *ct,  			void *data) @@ -308,6 +410,29 @@ static int dump_handler(enum nf_conntrack_msg_type type,  	return NFCT_CB_CONTINUE;  } +static int exp_dump_handler(enum nf_conntrack_msg_type type, +			    struct nf_expect *exp, void *data) +{ +	const struct nf_conntrack *master = +		nfexp_get_attr(exp, ATTR_EXP_MASTER); + +	if (!exp_filter_find(STATE(exp_filter), exp)) +		return NFCT_CB_CONTINUE; + +	if (ct_filter_conntrack(master, 1)) +		return NFCT_CB_CONTINUE; + +	switch(type) { +	case NFCT_T_UPDATE: +		STATE(mode)->internal->exp.populate(exp); +		break; +	default: +		STATE(stats).nl_dump_unknown_type++; +		break; +	} +	return NFCT_CB_CONTINUE; +} +  static int get_handler(enum nf_conntrack_msg_type type,  		       struct nf_conntrack *ct,  		       void *data) @@ -319,6 +444,22 @@ static int get_handler(enum nf_conntrack_msg_type type,  	return NFCT_CB_CONTINUE;  } +static int exp_get_handler(enum nf_conntrack_msg_type type, +			   struct nf_expect *exp, void *data) +{ +	const struct nf_conntrack *master = +		nfexp_get_attr(exp, ATTR_EXP_MASTER); + +	if (!exp_filter_find(STATE(exp_filter), exp)) +		return NFCT_CB_CONTINUE; + +	if (ct_filter_conntrack(master, 1)) +		return NFCT_CB_CONTINUE; + +	STATE(get_retval) = 1; +	return NFCT_CB_CONTINUE; +} +  int  init(void)  { @@ -355,7 +496,7 @@ init(void)  	register_fd(STATE(local).fd, STATE(fds));  	/* resynchronize (like 'dump' socket) but it also purges old entries */ -	STATE(resync) = nfct_open(CONNTRACK, 0); +	STATE(resync) = nfct_open(CONFIG(netlink).subsys_id, 0);  	if (STATE(resync)== NULL) {  		dlog(LOG_ERR, "can't open netlink handler: %s",  		     strerror(errno)); @@ -370,7 +511,7 @@ init(void)  	fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK);  	if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { -		STATE(dump) = nfct_open(CONNTRACK, 0); +		STATE(dump) = nfct_open(CONFIG(netlink).subsys_id, 0);  		if (STATE(dump) == NULL) {  			dlog(LOG_ERR, "can't open netlink handler: %s",  			     strerror(errno)); @@ -380,13 +521,26 @@ init(void)  		nfct_callback_register(STATE(dump), NFCT_T_ALL,  				       dump_handler, NULL); +		if (CONFIG(flags) & CTD_EXPECT) { +			nfexp_callback_register(STATE(dump), NFCT_T_ALL, +						exp_dump_handler, NULL); +		} +  		if (nl_dump_conntrack_table(STATE(dump)) == -1) {  			dlog(LOG_ERR, "can't get kernel conntrack table");  			return -1;  		} + +		if (CONFIG(flags) & CTD_EXPECT) { +			if (nl_dump_expect_table(STATE(dump)) == -1) { +				dlog(LOG_ERR, "can't get kernel " +					      "expect table"); +				return -1; +			} +		}  	} -	STATE(get) = nfct_open(CONNTRACK, 0); +	STATE(get) = nfct_open(CONFIG(netlink).subsys_id, 0);  	if (STATE(get) == NULL) {  		dlog(LOG_ERR, "can't open netlink handler: %s",  		     strerror(errno)); @@ -395,7 +549,12 @@ init(void)  	}  	nfct_callback_register(STATE(get), NFCT_T_ALL, get_handler, NULL); -	STATE(flush) = nfct_open(CONNTRACK, 0); +	if (CONFIG(flags) & CTD_EXPECT) { +		nfexp_callback_register(STATE(get), NFCT_T_ALL, +					exp_get_handler, NULL); +	} + +	STATE(flush) = nfct_open(CONFIG(netlink).subsys_id, 0);  	if (STATE(flush) == NULL) {  		dlog(LOG_ERR, "cannot open flusher handler");  		return -1; @@ -426,6 +585,11 @@ init(void)  		}  		nfct_callback_register2(STATE(event), NFCT_T_ALL,  				        event_handler, NULL); + +		if (CONFIG(flags) & CTD_EXPECT) { +			nfexp_callback_register2(STATE(event), NFCT_T_ALL, +						 exp_event_handler, NULL); +		}  		register_fd(nfct_fd(STATE(event)), STATE(fds));  	} diff --git a/src/sync-ftfw.c b/src/sync-ftfw.c index fa76c0c..1bc2d9f 100644 --- a/src/sync-ftfw.c +++ b/src/sync-ftfw.c @@ -231,6 +231,8 @@ static int ftfw_local(int fd, int type, void *data)  		dlog(LOG_NOTICE, "sending bulk update");  		cache_iterate(STATE(mode)->internal->ct.data,  			      NULL, do_cache_to_tx); +		cache_iterate(STATE(mode)->internal->exp.data, +			      NULL, do_cache_to_tx);  		break;  	case STATS_RSQUEUE:  		ftfw_local_queue(fd); @@ -350,7 +352,10 @@ static int digest_msg(const struct nethdr *net)  	} else if (IS_RESYNC(net)) {  		dp("RESYNC ALL\n"); -		cache_iterate(STATE(mode)->internal->ct.data, NULL, do_cache_to_tx); +		cache_iterate(STATE(mode)->internal->ct.data, NULL, +			      do_cache_to_tx); +		cache_iterate(STATE(mode)->internal->exp.data, NULL, +			      do_cache_to_tx);  		return MSG_CTL;  	} else if (IS_ALIVE(net)) diff --git a/src/sync-mode.c b/src/sync-mode.c index fa522c7..2505631 100644 --- a/src/sync-mode.c +++ b/src/sync-mode.c @@ -59,10 +59,29 @@ static struct nf_conntrack *msg2ct_alloc(struct nethdr *net, size_t remain)  	return ct;  } +static struct nf_expect *msg2exp_alloc(struct nethdr *net, size_t remain) +{ +	struct nf_expect *exp; + +	/* TODO: add stats on ENOMEM errors in the future. */ +	exp = nfexp_new(); +	if (exp == NULL) +		return NULL; + +	if (msg2exp(exp, net, remain) == -1) { +		STATE_SYNC(error).msg_rcv_malformed++; +		STATE_SYNC(error).msg_rcv_bad_payload++; +		nfexp_destroy(exp); +		return NULL; +	} +	return exp; +} +  static void  do_channel_handler_step(int i, struct nethdr *net, size_t remain)  { -	struct nf_conntrack *ct; +	struct nf_conntrack *ct = NULL; +	struct nf_expect *exp = NULL;  	if (net->version != CONNTRACKD_PROTOCOL_VERSION) {  		STATE_SYNC(error).msg_rcv_malformed++; @@ -112,12 +131,33 @@ do_channel_handler_step(int i, struct nethdr *net, size_t remain)  			return;  		STATE_SYNC(external)->ct.del(ct);  		break; +	case NET_T_STATE_EXP_NEW: +		exp = msg2exp_alloc(net, remain); +		if (exp == NULL) +			return; +		STATE_SYNC(external)->exp.new(exp); +		break; +	case NET_T_STATE_EXP_UPD: +		exp = msg2exp_alloc(net, remain); +		if (exp == NULL) +			return; +		STATE_SYNC(external)->exp.upd(exp); +		break; +	case NET_T_STATE_EXP_DEL: +		exp = msg2exp_alloc(net, remain); +		if (exp == NULL) +			return; +		STATE_SYNC(external)->exp.del(exp); +		break;  	default:  		STATE_SYNC(error).msg_rcv_malformed++;  		STATE_SYNC(error).msg_rcv_bad_type++;  		break;  	} -	nfct_destroy(ct); +	if (ct != NULL) +		nfct_destroy(ct); +	if (exp != NULL) +		nfexp_destroy(exp);  }  static char __net[65536];		/* XXX: maximum MTU for IPv4 */ @@ -351,7 +391,7 @@ static int init_sync(void)  							STATE(fds)) == -1)  		return -1; -	STATE_SYNC(commit).h = nfct_open(CONNTRACK, 0); +	STATE_SYNC(commit).h = nfct_open(CONFIG(netlink).subsys_id, 0);  	if (STATE_SYNC(commit).h == NULL) {  		dlog(LOG_ERR, "can't create handler to commit");  		return -1; @@ -402,8 +442,30 @@ static void run_sync(fd_set *readfds)  		interface_handler();  	if (FD_ISSET(get_read_evfd(STATE_SYNC(commit).evfd), readfds)) { +		int ret; +  		read_evfd(STATE_SYNC(commit).evfd); -		STATE_SYNC(external)->ct.commit(STATE_SYNC(commit).h, 0); + +		ret = STATE_SYNC(commit).rq[0].cb(STATE_SYNC(commit).h, 0); +		if (ret == 0) { +			/* we still have things in the callback queue. */ +			if (STATE_SYNC(commit).rq[1].cb) { +				int fd = STATE_SYNC(commit).clientfd; + +				STATE_SYNC(commit).rq[0].cb = +					STATE_SYNC(commit).rq[1].cb; + +				STATE_SYNC(commit).rq[1].cb = NULL; + +				STATE_SYNC(commit).clientfd = -1; +				STATE_SYNC(commit).rq[0].cb( +					STATE_SYNC(commit).h, fd); +			} else { +				/* Close the client socket now, we're done. */ +				close(STATE_SYNC(commit).clientfd); +				STATE_SYNC(commit).clientfd = -1; +			} +		}  	}  	/* flush pending messages */ @@ -480,6 +542,27 @@ static void dump_stats_sync_extended(int fd)  	send(fd, buf, size, 0);  } +static int local_commit(int fd) +{ +	int ret; + +	/* delete the reset alarm if any before committing */ +	del_alarm(&STATE_SYNC(reset_cache_alarm)); + +	ret = STATE_SYNC(commit).rq[0].cb(STATE_SYNC(commit).h, fd); +	if (ret == -1) { +		dlog(LOG_NOTICE, "commit already in progress, skipping"); +		ret = LOCAL_RET_OK; +	} else if (ret == 0) { +		/* we've finished the commit. */ +		ret = LOCAL_RET_OK; +	} else { +		/* Keep open the client, we want synchronous commit. */ +		ret = LOCAL_RET_STOLEN; +	} +	return ret; +} +  /* handler for requests coming via UNIX socket */  static int local_handler_sync(int fd, int type, void *data)  { @@ -511,19 +594,10 @@ static int local_handler_sync(int fd, int type, void *data)  		}  		break;  	case CT_COMMIT: -		/* delete the reset alarm if any before committing */ -		del_alarm(&STATE_SYNC(reset_cache_alarm)); - -		dlog(LOG_NOTICE, "committing external cache"); -		ret = STATE_SYNC(external)->ct.commit(STATE_SYNC(commit).h, fd); -		if (ret == 0) { -			dlog(LOG_NOTICE, "commit already in progress, " -					 "skipping"); -			ret = LOCAL_RET_OK; -		} else { -			/* Keep open the client, we want synchronous commit. */ -			ret = LOCAL_RET_STOLEN; -		} +		dlog(LOG_NOTICE, "committing conntrack cache"); +		STATE_SYNC(commit).rq[0].cb = STATE_SYNC(external)->ct.commit; +		STATE_SYNC(commit).rq[1].cb = NULL; +		ret = local_commit(fd);  		break;  	case RESET_TIMERS:  		if (!alarm_pending(&STATE_SYNC(reset_cache_alarm))) { @@ -575,6 +649,63 @@ static int local_handler_sync(int fd, int type, void *data)  	case STATS_QUEUE:  		queue_stats_show(fd);  		break; +	case EXP_STATS: +		if (!(CONFIG(flags) & CTD_EXPECT)) +			break; + +		STATE(mode)->internal->exp.stats(fd); +		STATE_SYNC(external)->exp.stats(fd); +		dump_traffic_stats(fd); +		multichannel_stats(STATE_SYNC(channel), fd); +		dump_stats_sync(fd); +		break; +	case EXP_DUMP_INTERNAL: +		if (!(CONFIG(flags) & CTD_EXPECT)) +			break; + +		if (fork_process_new(CTD_PROC_ANY, 0, NULL, NULL) == 0) { +			STATE(mode)->internal->exp.dump(fd, NFCT_O_PLAIN); +			exit(EXIT_SUCCESS); +		} +		break; +	case EXP_DUMP_EXTERNAL: +		if (!(CONFIG(flags) & CTD_EXPECT)) +			break; + +		if (fork_process_new(CTD_PROC_ANY, 0, NULL, NULL) == 0) { +			STATE_SYNC(external)->exp.dump(fd, NFCT_O_PLAIN); +			exit(EXIT_SUCCESS); +		} +		break; +	case EXP_COMMIT: +		if (!(CONFIG(flags) & CTD_EXPECT)) +			break; + +		dlog(LOG_NOTICE, "committing expectation cache"); +		STATE_SYNC(commit).rq[0].cb = STATE_SYNC(external)->exp.commit; +		STATE_SYNC(commit).rq[1].cb = NULL; +		local_commit(fd); +		break; +	case ALL_FLUSH_CACHE: +		dlog(LOG_NOTICE, "flushing caches"); +		STATE(mode)->internal->ct.flush(); +		STATE_SYNC(external)->ct.flush(); +		if (CONFIG(flags) & CTD_EXPECT) { +			STATE(mode)->internal->exp.flush(); +			STATE_SYNC(external)->exp.flush(); +		} +		break; +	case ALL_COMMIT: +		dlog(LOG_NOTICE, "committing all external caches"); +		STATE_SYNC(commit).rq[0].cb = STATE_SYNC(external)->ct.commit; +		if (CONFIG(flags) & CTD_EXPECT) { +			STATE_SYNC(commit).rq[1].cb = +				STATE_SYNC(external)->exp.commit; +		} else { +			STATE_SYNC(commit).rq[1].cb = NULL; +		} +		local_commit(fd); +		break;  	default:  		if (STATE_SYNC(sync)->local)  			ret = STATE_SYNC(sync)->local(fd, type, data); diff --git a/src/sync-notrack.c b/src/sync-notrack.c index 06ad1f0..a7df4e7 100644 --- a/src/sync-notrack.c +++ b/src/sync-notrack.c @@ -102,7 +102,7 @@ static void kernel_resync(void)  	u_int32_t family = AF_UNSPEC;  	int ret; -	h = nfct_open(CONNTRACK, 0); +	h = nfct_open(CONFIG(netlink).subsys_id, 0);  	if (h == NULL) {  		dlog(LOG_ERR, "can't allocate memory for the internal cache");  		return; @@ -131,6 +131,8 @@ static int notrack_local(int fd, int type, void *data)  		} else {  			cache_iterate(STATE(mode)->internal->ct.data,  				      NULL, do_cache_to_tx); +			cache_iterate(STATE(mode)->internal->exp.data, +				      NULL, do_cache_to_tx);  		}  		break;  	default: @@ -152,6 +154,8 @@ static int digest_msg(const struct nethdr *net)  		} else {  			cache_iterate(STATE(mode)->internal->ct.data,  				      NULL, do_cache_to_tx); +			cache_iterate(STATE(mode)->internal->exp.data, +				      NULL, do_cache_to_tx);  		}  		return MSG_CTL;  	} | 
