From 8ad5df6121c46753a6d12fafa5ab9da309ddb721 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 21 Oct 2009 01:43:07 +0200 Subject: conntrackd: add `DisableInternalCache' clause This patch adds the clause `DisableInternalCache' that allows you to bypass the internal cache. This clause can only be used with the notrack synchronization mode. Signed-off-by: Pablo Neira Ayuso --- include/internal.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 include/internal.h (limited to 'include/internal.h') diff --git a/include/internal.h b/include/internal.h new file mode 100644 index 0000000..1f11340 --- /dev/null +++ b/include/internal.h @@ -0,0 +1,39 @@ +#ifndef _INTERNAL_H_ +#define _INTERNAL_H_ + +#include + +struct nf_conntrack; + +enum { + INTERNAL_F_POPULATE = (1 << 0), + INTERNAL_F_RESYNC = (1 << 1), + INTERNAL_F_MAX = (1 << 2) +}; + +struct internal_handler { + void *data; + unsigned int flags; + + int (*init)(void); + void (*close)(void); + + void (*new)(struct nf_conntrack *ct, int origin_type); + void (*update)(struct nf_conntrack *ct, int origin_type); + int (*destroy)(struct nf_conntrack *ct, int origin_type); + + void (*dump)(int fd, int type); + void (*populate)(struct nf_conntrack *ct); + void (*purge)(void); + int (*resync)(enum nf_conntrack_msg_type type, + struct nf_conntrack *ct, void *data); + void (*flush)(void); + + void (*stats)(int fd); + void (*stats_ext)(int fd); +}; + +extern struct internal_handler internal_cache; +extern struct internal_handler internal_bypass; + +#endif -- cgit v1.2.3 From 65be3d49b0f4350a227dedd70ac17c7c9cf6274e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 4 Jan 2012 14:28:50 +0100 Subject: conntrackd: generalize caching infrastructure This patch generalizes the caching infrastructure to store different object types. This patch is the first in the series to prepare support for the synchronization of expectations. Signed-off-by: Pablo Neira Ayuso --- include/cache.h | 57 +++++++- include/internal.h | 27 ++-- src/Makefile.am | 2 +- src/cache-ct.c | 358 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/cache.c | 154 +++++++++------------- src/cache_iterators.c | 268 ------------------------------------- src/external_cache.c | 4 +- src/internal_bypass.c | 67 +++++----- src/internal_cache.c | 104 ++++++++------- src/run.c | 23 ++-- src/stats-mode.c | 42 +++--- src/sync-alarm.c | 11 +- src/sync-ftfw.c | 23 ++-- src/sync-mode.c | 19 +-- src/sync-notrack.c | 12 +- 15 files changed, 647 insertions(+), 524 deletions(-) create mode 100644 src/cache-ct.c delete mode 100644 src/cache_iterators.c (limited to 'include/internal.h') diff --git a/include/cache.h b/include/cache.h index ddf2049..a42e395 100644 --- a/include/cache.h +++ b/include/cache.h @@ -27,7 +27,7 @@ enum { struct cache; struct cache_object { struct hashtable_node hashnode; - struct nf_conntrack *ct; + void *ptr; struct cache *cache; int status; int refcnt; @@ -48,14 +48,22 @@ extern struct cache_feature timer_feature; #define CACHE_MAX_NAMELEN 32 +enum cache_type { + CACHE_T_NONE = 0, + CACHE_T_CT, + CACHE_T_MAX +}; + struct cache { char name[CACHE_MAX_NAMELEN]; + enum cache_type type; struct hashtable *h; unsigned int num_features; struct cache_feature **features; unsigned int feature_type[CACHE_MAX_FEATURE]; unsigned int *feature_offset; + struct cache_ops *ops; struct cache_extra *extra; unsigned int extra_offset; size_t object_size; @@ -94,22 +102,48 @@ struct cache_extra { void (*destroy)(struct cache_object *obj, void *data); }; +struct nfct_handle; + +/* cache options depends on the object type: conntrack or expectation. */ +struct cache_ops { + /* hashing and comparison of objects. */ + uint32_t (*hash)(const void *data, const struct hashtable *table); + int (*cmp)(const void *data1, const void *data2); + + /* object allocation, copy and release. */ + void *(*alloc)(void); + void (*copy)(void *dst, void *src, unsigned int flags); + void (*free)(void *ptr); + + /* dump and commit. */ + int (*dump_step)(void *data1, void *n); + int (*commit)(struct cache *c, struct nfct_handle *h, int clientfd); + + /* build network message from object. */ + struct nethdr *(*build_msg)(const struct cache_object *obj, int type); +}; + +/* templates to configure conntrack caching. */ +extern struct cache_ops cache_sync_internal_ct_ops; +extern struct cache_ops cache_sync_external_ct_ops; +extern struct cache_ops cache_stats_ct_ops; + struct nf_conntrack; -struct cache *cache_create(const char *name, unsigned int features, struct cache_extra *extra); +struct cache *cache_create(const char *name, enum cache_type type, unsigned int features, struct cache_extra *extra, struct cache_ops *ops); void cache_destroy(struct cache *e); -struct cache_object *cache_object_new(struct cache *c, struct nf_conntrack *ct); +struct cache_object *cache_object_new(struct cache *c, void *ptr); void cache_object_free(struct cache_object *obj); void cache_object_get(struct cache_object *obj); int cache_object_put(struct cache_object *obj); void cache_object_set_status(struct cache_object *obj, int status); int cache_add(struct cache *c, struct cache_object *obj, int id); -void cache_update(struct cache *c, struct cache_object *obj, int id, struct nf_conntrack *ct); -struct cache_object *cache_update_force(struct cache *c, struct nf_conntrack *ct); +void cache_update(struct cache *c, struct cache_object *obj, int id, void *ptr); +struct cache_object *cache_update_force(struct cache *c, void *ptr); void cache_del(struct cache *c, struct cache_object *obj); -struct cache_object *cache_find(struct cache *c, struct nf_conntrack *ct, int *pos); +struct cache_object *cache_find(struct cache *c, void *ptr, int *pos); void cache_stats(const struct cache *c, int fd); void cache_stats_extended(const struct cache *c, int fd); struct cache_object *cache_data_get_object(struct cache *c, void *data); @@ -120,7 +154,18 @@ void cache_iterate_limit(struct cache *c, void *data, uint32_t from, uint32_t st /* iterators */ struct nfct_handle; +struct __dump_container { + int fd; + int type; +}; + void cache_dump(struct cache *c, int fd, int type); + +struct __commit_container { + struct nfct_handle *h; + struct cache *c; +}; + int cache_commit(struct cache *c, struct nfct_handle *h, int clientfd); void cache_flush(struct cache *c); void cache_bulk(struct cache *c); diff --git a/include/internal.h b/include/internal.h index 1f11340..f50eb79 100644 --- a/include/internal.h +++ b/include/internal.h @@ -12,25 +12,28 @@ enum { }; struct internal_handler { - void *data; unsigned int flags; int (*init)(void); void (*close)(void); - void (*new)(struct nf_conntrack *ct, int origin_type); - void (*update)(struct nf_conntrack *ct, int origin_type); - int (*destroy)(struct nf_conntrack *ct, int origin_type); + struct { + void *data; - void (*dump)(int fd, int type); - void (*populate)(struct nf_conntrack *ct); - void (*purge)(void); - int (*resync)(enum nf_conntrack_msg_type type, - struct nf_conntrack *ct, void *data); - void (*flush)(void); + void (*new)(struct nf_conntrack *ct, int origin_type); + void (*upd)(struct nf_conntrack *ct, int origin_type); + int (*del)(struct nf_conntrack *ct, int origin_type); - void (*stats)(int fd); - void (*stats_ext)(int fd); + void (*dump)(int fd, int type); + void (*populate)(struct nf_conntrack *ct); + void (*purge)(void); + int (*resync)(enum nf_conntrack_msg_type type, + struct nf_conntrack *ct, void *data); + void (*flush)(void); + + void (*stats)(int fd); + void (*stats_ext)(int fd); + } ct; }; extern struct internal_handler internal_cache; diff --git a/src/Makefile.am b/src/Makefile.am index 70e496d..a0abeee 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -12,7 +12,7 @@ conntrack_LDADD = ../extensions/libct_proto_tcp.la ../extensions/libct_proto_udp conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \ local.c log.c mcast.c udp.c netlink.c vector.c \ filter.c fds.c event.c process.c origin.c date.c \ - cache.c cache_iterators.c \ + cache.c cache-ct.c \ cache_timer.c \ sync-mode.c sync-alarm.c sync-ftfw.c sync-notrack.c \ traffic_stats.c stats-mode.c \ diff --git a/src/cache-ct.c b/src/cache-ct.c new file mode 100644 index 0000000..2c6fd4e --- /dev/null +++ b/src/cache-ct.c @@ -0,0 +1,358 @@ +/* + * (C) 2006-2011 by Pablo Neira Ayuso + * (C) 2011 by Vyatta Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "cache.h" +#include "hash.h" +#include "log.h" +#include "conntrackd.h" +#include "netlink.h" +#include "event.h" +#include "jhash.h" +#include "network.h" + +#include +#include +#include +#include + +static uint32_t +cache_hash4_ct(const struct nf_conntrack *ct, const struct hashtable *table) +{ + uint32_t a[4] = { + [0] = nfct_get_attr_u32(ct, ATTR_IPV4_SRC), + [1] = nfct_get_attr_u32(ct, ATTR_IPV4_DST), + [2] = nfct_get_attr_u8(ct, ATTR_L3PROTO) << 16 | + nfct_get_attr_u8(ct, ATTR_L4PROTO), + [3] = nfct_get_attr_u16(ct, ATTR_PORT_SRC) << 16 | + nfct_get_attr_u16(ct, ATTR_PORT_DST), + }; + + /* + * Instead of returning hash % table->hashsize (implying a divide) + * we return the high 32 bits of the (hash * table->hashsize) that will + * give results between [0 and hashsize-1] and same hash distribution, + * but using a multiply, less expensive than a divide. See: + * http://www.mail-archive.com/netdev@vger.kernel.org/msg56623.html + */ + return ((uint64_t)jhash2(a, 4, 0) * table->hashsize) >> 32; +} + +static uint32_t +cache_hash6_ct(const struct nf_conntrack *ct, const struct hashtable *table) +{ + uint32_t a[10]; + + memcpy(&a[0], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4); + memcpy(&a[4], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4); + a[8] = nfct_get_attr_u8(ct, ATTR_ORIG_L3PROTO) << 16 | + nfct_get_attr_u8(ct, ATTR_ORIG_L4PROTO); + a[9] = nfct_get_attr_u16(ct, ATTR_ORIG_PORT_SRC) << 16 | + nfct_get_attr_u16(ct, ATTR_ORIG_PORT_DST); + + return ((uint64_t)jhash2(a, 10, 0) * table->hashsize) >> 32; +} + +static uint32_t +cache_ct_hash(const void *data, const struct hashtable *table) +{ + int ret = 0; + const struct nf_conntrack *ct = data; + + switch(nfct_get_attr_u8(ct, ATTR_L3PROTO)) { + case AF_INET: + ret = cache_hash4_ct(ct, table); + break; + case AF_INET6: + ret = cache_hash6_ct(ct, table); + break; + default: + dlog(LOG_ERR, "unknown layer 3 proto in hash"); + break; + } + return ret; +} + +static int cache_ct_cmp(const void *data1, const void *data2) +{ + const struct cache_object *obj = data1; + const struct nf_conntrack *ct = data2; + + return nfct_cmp(obj->ptr, ct, NFCT_CMP_ORIG) && + nfct_get_attr_u32(obj->ptr, ATTR_ID) == + nfct_get_attr_u32(ct, ATTR_ID); +} + +static void *cache_ct_alloc(void) +{ + return nfct_new(); +} + +static void cache_ct_free(void *ptr) +{ + nfct_destroy(ptr); +} + +static void cache_ct_copy(void *dst, void *src, unsigned int flags) +{ + nfct_copy(dst, src, flags); +} + +static int cache_ct_dump_step(void *data1, void *n) +{ + char buf[1024]; + int size; + struct __dump_container *container = data1; + struct cache_object *obj = n; + char *data = obj->data; + unsigned i; + + /* + * XXX: Do not dump the entries that are scheduled to expire. + * These entries talk about already destroyed connections + * that we keep for some time just in case that we have to + * resent some lost messages. We do not show them to the + * user as he may think that the firewall replicas are not + * in sync. The branch below is a hack as it is quite + * specific and it breaks conntrackd modularity. Probably + * there's a nicer way to do this but until I come up with it... + */ + if (CONFIG(flags) & CTD_SYNC_FTFW && obj->status == C_OBJ_DEAD) + return 0; + + /* do not show cached timeout, this may confuse users */ + if (nfct_attr_is_set(obj->ptr, ATTR_TIMEOUT)) + nfct_attr_unset(obj->ptr, ATTR_TIMEOUT); + + memset(buf, 0, sizeof(buf)); + size = nfct_snprintf(buf, + sizeof(buf), + obj->ptr, + NFCT_T_UNKNOWN, + container->type, + 0); + + for (i = 0; i < obj->cache->num_features; i++) { + if (obj->cache->features[i]->dump) { + size += obj->cache->features[i]->dump(obj, + data, + buf+size, + container->type); + data += obj->cache->features[i]->size; + } + } + if (container->type != NFCT_O_XML) { + long tm = time(NULL); + size += sprintf(buf+size, " [active since %lds]", + tm - obj->lifetime); + } + size += sprintf(buf+size, "\n"); + if (send(container->fd, buf, size, 0) == -1) { + if (errno != EPIPE) + return -1; + } + + return 0; +} + +static void +cache_ct_commit_step(struct __commit_container *tmp, struct cache_object *obj) +{ + int ret, retry = 1, timeout; + struct nf_conntrack *ct = obj->ptr; + + if (CONFIG(commit_timeout)) { + timeout = CONFIG(commit_timeout); + } else { + timeout = time(NULL) - obj->lastupdate; + if (timeout < 0) { + /* XXX: Arbitrarily set the timer to one minute, how + * can this happen? For example, an adjustment due to + * daylight-saving. Probably other situations can + * trigger this. */ + timeout = 60; + } + /* calculate an estimation of the current timeout */ + timeout = nfct_get_attr_u32(ct, ATTR_TIMEOUT) - timeout; + if (timeout < 0) { + timeout = 60; + } + } + +retry: + if (nl_create_conntrack(tmp->h, ct, timeout) == -1) { + if (errno == EEXIST && retry == 1) { + ret = nl_destroy_conntrack(tmp->h, ct); + if (ret == 0 || (ret == -1 && errno == ENOENT)) { + if (retry) { + retry = 0; + goto retry; + } + } + dlog(LOG_ERR, "commit-destroy: %s", strerror(errno)); + dlog_ct(STATE(log), ct, NFCT_O_PLAIN); + tmp->c->stats.commit_fail++; + } else { + dlog(LOG_ERR, "commit-create: %s", strerror(errno)); + dlog_ct(STATE(log), ct, NFCT_O_PLAIN); + tmp->c->stats.commit_fail++; + } + } else { + tmp->c->stats.commit_ok++; + } +} + +static int cache_ct_commit_related(void *data, void *n) +{ + struct cache_object *obj = n; + + if (ct_is_related(obj->ptr)) + cache_ct_commit_step(data, obj); + + /* keep iterating even if we have found errors */ + return 0; +} + +static int cache_ct_commit_master(void *data, void *n) +{ + struct cache_object *obj = n; + + if (ct_is_related(obj->ptr)) + return 0; + + cache_ct_commit_step(data, obj); + return 0; +} + +static int cache_ct_commit(struct cache *c, struct nfct_handle *h, int clientfd) +{ + unsigned int commit_ok, commit_fail; + struct __commit_container tmp = { + .h = h, + .c = c, + }; + struct timeval commit_stop, res; + + /* we already have one commit in progress, skip this. The clientfd + * descriptor has to be closed by the caller. */ + if (clientfd && STATE_SYNC(commit).clientfd != -1) + return 0; + + switch(STATE_SYNC(commit).state) { + case COMMIT_STATE_INACTIVE: + gettimeofday(&STATE_SYNC(commit).stats.start, NULL); + STATE_SYNC(commit).stats.ok = c->stats.commit_ok; + STATE_SYNC(commit).stats.fail = c->stats.commit_fail; + STATE_SYNC(commit).clientfd = clientfd; + case COMMIT_STATE_MASTER: + STATE_SYNC(commit).current = + hashtable_iterate_limit(c->h, &tmp, + STATE_SYNC(commit).current, + CONFIG(general).commit_steps, + cache_ct_commit_master); + if (STATE_SYNC(commit).current < CONFIG(hashsize)) { + STATE_SYNC(commit).state = COMMIT_STATE_MASTER; + /* give it another step as soon as possible */ + write_evfd(STATE_SYNC(commit).evfd); + return 1; + } + STATE_SYNC(commit).current = 0; + STATE_SYNC(commit).state = COMMIT_STATE_RELATED; + case COMMIT_STATE_RELATED: + STATE_SYNC(commit).current = + hashtable_iterate_limit(c->h, &tmp, + STATE_SYNC(commit).current, + CONFIG(general).commit_steps, + cache_ct_commit_related); + if (STATE_SYNC(commit).current < CONFIG(hashsize)) { + STATE_SYNC(commit).state = COMMIT_STATE_RELATED; + /* give it another step as soon as possible */ + write_evfd(STATE_SYNC(commit).evfd); + return 1; + } + /* calculate the time that commit has taken */ + gettimeofday(&commit_stop, NULL); + timersub(&commit_stop, &STATE_SYNC(commit).stats.start, &res); + + /* calculate new entries committed */ + commit_ok = c->stats.commit_ok - STATE_SYNC(commit).stats.ok; + commit_fail = + c->stats.commit_fail - STATE_SYNC(commit).stats.fail; + + /* log results */ + dlog(LOG_NOTICE, "Committed %u new entries", commit_ok); + + if (commit_fail) + dlog(LOG_NOTICE, "%u entries can't be " + "committed", commit_fail); + + dlog(LOG_NOTICE, "commit has taken %lu.%06lu seconds", + res.tv_sec, res.tv_usec); + + /* prepare the state machine for new commits */ + STATE_SYNC(commit).current = 0; + STATE_SYNC(commit).state = COMMIT_STATE_INACTIVE; + + /* Close the client socket now that we're done. */ + close(STATE_SYNC(commit).clientfd); + STATE_SYNC(commit).clientfd = -1; + } + return 1; +} + +static struct nethdr * +cache_ct_build_msg(const struct cache_object *obj, int type) +{ + return BUILD_NETMSG_FROM_CT(obj->ptr, type); +} + +/* template to cache conntracks coming from the kernel. */ +struct cache_ops cache_sync_internal_ct_ops = { + .hash = cache_ct_hash, + .cmp = cache_ct_cmp, + .alloc = cache_ct_alloc, + .free = cache_ct_free, + .copy = cache_ct_copy, + .dump_step = cache_ct_dump_step, + .commit = NULL, + .build_msg = cache_ct_build_msg, +}; + +/* template to cache conntracks coming from the network. */ +struct cache_ops cache_sync_external_ct_ops = { + .hash = cache_ct_hash, + .cmp = cache_ct_cmp, + .alloc = cache_ct_alloc, + .free = cache_ct_free, + .copy = cache_ct_copy, + .dump_step = cache_ct_dump_step, + .commit = cache_ct_commit, + .build_msg = NULL, +}; + +/* template to cache conntracks for the statistics mode. */ +struct cache_ops cache_stats_ct_ops = { + .hash = cache_ct_hash, + .cmp = cache_ct_cmp, + .alloc = cache_ct_alloc, + .free = cache_ct_free, + .copy = cache_ct_copy, + .dump_step = cache_ct_dump_step, + .commit = NULL, + .build_msg = NULL, +}; diff --git a/src/cache.c b/src/cache.c index f411121..efdab0e 100644 --- a/src/cache.c +++ b/src/cache.c @@ -1,5 +1,6 @@ /* - * (C) 2006-2009 by Pablo Neira Ayuso + * (C) 2006-2011 by Pablo Neira Ayuso + * (C) 2011 by Vyatta Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -28,80 +29,14 @@ #include #include -static uint32_t -__hash4(const struct nf_conntrack *ct, const struct hashtable *table) -{ - uint32_t a[4] = { - [0] = nfct_get_attr_u32(ct, ATTR_IPV4_SRC), - [1] = nfct_get_attr_u32(ct, ATTR_IPV4_DST), - [2] = nfct_get_attr_u8(ct, ATTR_L3PROTO) << 16 | - nfct_get_attr_u8(ct, ATTR_L4PROTO), - [3] = nfct_get_attr_u16(ct, ATTR_PORT_SRC) << 16 | - nfct_get_attr_u16(ct, ATTR_PORT_DST), - }; - - /* - * Instead of returning hash % table->hashsize (implying a divide) - * we return the high 32 bits of the (hash * table->hashsize) that will - * give results between [0 and hashsize-1] and same hash distribution, - * but using a multiply, less expensive than a divide. See: - * http://www.mail-archive.com/netdev@vger.kernel.org/msg56623.html - */ - return ((uint64_t)jhash2(a, 4, 0) * table->hashsize) >> 32; -} - -static uint32_t -__hash6(const struct nf_conntrack *ct, const struct hashtable *table) -{ - uint32_t a[10]; - - memcpy(&a[0], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4); - memcpy(&a[4], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4); - a[8] = nfct_get_attr_u8(ct, ATTR_ORIG_L3PROTO) << 16 | - nfct_get_attr_u8(ct, ATTR_ORIG_L4PROTO); - a[9] = nfct_get_attr_u16(ct, ATTR_ORIG_PORT_SRC) << 16 | - nfct_get_attr_u16(ct, ATTR_ORIG_PORT_DST); - - return ((uint64_t)jhash2(a, 10, 0) * table->hashsize) >> 32; -} - -static uint32_t hash(const void *data, const struct hashtable *table) -{ - int ret = 0; - const struct nf_conntrack *ct = data; - - switch(nfct_get_attr_u8(ct, ATTR_L3PROTO)) { - case AF_INET: - ret = __hash4(ct, table); - break; - case AF_INET6: - ret = __hash6(ct, table); - break; - default: - dlog(LOG_ERR, "unknown layer 3 proto in hash"); - break; - } - - return ret; -} - -static int compare(const void *data1, const void *data2) -{ - const struct cache_object *obj = data1; - const struct nf_conntrack *ct = data2; - - return nfct_cmp(obj->ct, ct, NFCT_CMP_ORIG) && - nfct_get_attr_u32(obj->ct, ATTR_ID) == - nfct_get_attr_u32(ct, ATTR_ID); -} - struct cache_feature *cache_feature[CACHE_MAX_FEATURE] = { [TIMER_FEATURE] = &timer_feature, }; -struct cache *cache_create(const char *name, +struct cache *cache_create(const char *name, enum cache_type type, unsigned int features, - struct cache_extra *extra) + struct cache_extra *extra, + struct cache_ops *ops) { size_t size = sizeof(struct cache_object); int i, j = 0; @@ -110,12 +45,16 @@ struct cache *cache_create(const char *name, unsigned int feature_offset[CACHE_MAX_FEATURE] = {}; unsigned int feature_type[CACHE_MAX_FEATURE] = {}; + if (type == CACHE_T_NONE || type >= CACHE_T_MAX) + return NULL; + c = malloc(sizeof(struct cache)); if (!c) return NULL; memset(c, 0, sizeof(struct cache)); strcpy(c->name, name); + c->type = type; for (i = 0; i < CACHE_MAX_FEATURE; i++) { if ((1 << i) & features) { @@ -150,11 +89,19 @@ struct cache *cache_create(const char *name, } memcpy(c->feature_offset, feature_offset, sizeof(unsigned int) * j); + if (!ops || !ops->hash || !ops->cmp || + !ops->alloc || !ops->copy || !ops->free) { + free(c->feature_offset); + free(c->features); + free(c); + return NULL; + } + c->ops = ops; + c->h = hashtable_create(CONFIG(hashsize), CONFIG(limit), - hash, - compare); - + c->ops->hash, + c->ops->cmp); if (!c->h) { free(c->features); free(c->feature_offset); @@ -175,7 +122,7 @@ void cache_destroy(struct cache *c) free(c); } -struct cache_object *cache_object_new(struct cache *c, struct nf_conntrack *ct) +struct cache_object *cache_object_new(struct cache *c, void *ptr) { struct cache_object *obj; @@ -187,13 +134,14 @@ struct cache_object *cache_object_new(struct cache *c, struct nf_conntrack *ct) } obj->cache = c; - if ((obj->ct = nfct_new()) == NULL) { + obj->ptr = c->ops->alloc(); + if (obj->ptr == NULL) { free(obj); errno = ENOMEM; c->stats.add_fail_enomem++; return NULL; } - nfct_copy(obj->ct, ct, NFCT_CP_OVERRIDE); + c->ops->copy(obj->ptr, ptr, NFCT_CP_OVERRIDE); obj->status = C_OBJ_NONE; c->stats.objects++; @@ -203,7 +151,8 @@ struct cache_object *cache_object_new(struct cache *c, struct nf_conntrack *ct) void cache_object_free(struct cache_object *obj) { obj->cache->stats.objects--; - nfct_destroy(obj->ct); + obj->cache->ops->free(obj->ptr); + free(obj); } @@ -271,13 +220,12 @@ int cache_add(struct cache *c, struct cache_object *obj, int id) return 0; } -void cache_update(struct cache *c, struct cache_object *obj, int id, - struct nf_conntrack *ct) +void cache_update(struct cache *c, struct cache_object *obj, int id, void *ptr) { char *data = obj->data; unsigned int i; - nfct_copy(obj->ct, ct, NFCT_CP_META); + c->ops->copy(obj->ptr, ptr, NFCT_CP_META); for (i = 0; i < c->num_features; i++) { c->features[i]->update(obj, data); @@ -322,23 +270,22 @@ void cache_del(struct cache *c, struct cache_object *obj) __del(c, obj); } -struct cache_object * -cache_update_force(struct cache *c, struct nf_conntrack *ct) +struct cache_object *cache_update_force(struct cache *c, void *ptr) { struct cache_object *obj; int id; - obj = cache_find(c, ct, &id); + obj = cache_find(c, ptr, &id); if (obj) { if (obj->status != C_OBJ_DEAD) { - cache_update(c, obj, id, ct); + cache_update(c, obj, id, ptr); return obj; } else { cache_del(c, obj); cache_object_free(obj); } } - obj = cache_object_new(c, ct); + obj = cache_object_new(c, ptr); if (obj == NULL) return NULL; @@ -350,11 +297,10 @@ cache_update_force(struct cache *c, struct nf_conntrack *ct) return obj; } -struct cache_object * -cache_find(struct cache *c, struct nf_conntrack *ct, int *id) +struct cache_object *cache_find(struct cache *c, void *ptr, int *id) { - *id = hashtable_hash(c->h, ct); - return ((struct cache_object *) hashtable_find(c->h, ct, *id)); + *id = hashtable_hash(c->h, ptr); + return ((struct cache_object *) hashtable_find(c->h, ptr, *id)); } struct cache_object *cache_data_get_object(struct cache *c, void *data) @@ -432,3 +378,33 @@ void cache_iterate_limit(struct cache *c, void *data, { hashtable_iterate_limit(c->h, data, from, steps, iterate); } + +void cache_dump(struct cache *c, int fd, int type) +{ + struct __dump_container tmp = { + .fd = fd, + .type = type + }; + hashtable_iterate(c->h, (void *) &tmp, c->ops->dump_step); +} + +int cache_commit(struct cache *c, struct nfct_handle *h, int clientfd) +{ + return c->ops->commit(c, h, clientfd); +} + +static int do_flush(void *data, void *n) +{ + struct cache *c = data; + struct cache_object *obj = n; + + cache_del(c, obj); + cache_object_free(obj); + return 0; +} + +void cache_flush(struct cache *c) +{ + hashtable_iterate(c->h, c, do_flush); + c->stats.flush++; +} diff --git a/src/cache_iterators.c b/src/cache_iterators.c deleted file mode 100644 index 3248c70..0000000 --- a/src/cache_iterators.c +++ /dev/null @@ -1,268 +0,0 @@ -/* - * (C) 2006-2007 by Pablo Neira Ayuso - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "cache.h" -#include "hash.h" -#include "log.h" -#include "conntrackd.h" -#include "netlink.h" -#include "event.h" - -#include -#include -#include -#include -#include - -struct __dump_container { - int fd; - int type; -}; - -static int do_dump(void *data1, void *n) -{ - char buf[1024]; - int size; - struct __dump_container *container = data1; - struct cache_object *obj = n; - char *data = obj->data; - unsigned i; - - /* - * XXX: Do not dump the entries that are scheduled to expire. - * These entries talk about already destroyed connections - * that we keep for some time just in case that we have to - * resent some lost messages. We do not show them to the - * user as he may think that the firewall replicas are not - * in sync. The branch below is a hack as it is quite - * specific and it breaks conntrackd modularity. Probably - * there's a nicer way to do this but until I come up with it... - */ - if (CONFIG(flags) & CTD_SYNC_FTFW && obj->status == C_OBJ_DEAD) - return 0; - - /* do not show cached timeout, this may confuse users */ - if (nfct_attr_is_set(obj->ct, ATTR_TIMEOUT)) - nfct_attr_unset(obj->ct, ATTR_TIMEOUT); - - memset(buf, 0, sizeof(buf)); - size = nfct_snprintf(buf, - sizeof(buf), - obj->ct, - NFCT_T_UNKNOWN, - container->type, - 0); - - for (i = 0; i < obj->cache->num_features; i++) { - if (obj->cache->features[i]->dump) { - size += obj->cache->features[i]->dump(obj, - data, - buf+size, - container->type); - data += obj->cache->features[i]->size; - } - } - if (container->type != NFCT_O_XML) { - long tm = time(NULL); - size += sprintf(buf+size, " [active since %lds]", - tm - obj->lifetime); - } - size += sprintf(buf+size, "\n"); - if (send(container->fd, buf, size, 0) == -1) { - if (errno != EPIPE) - return -1; - } - - return 0; -} - -void cache_dump(struct cache *c, int fd, int type) -{ - struct __dump_container tmp = { - .fd = fd, - .type = type - }; - - hashtable_iterate(c->h, (void *) &tmp, do_dump); -} - -struct __commit_container { - struct nfct_handle *h; - struct cache *c; -}; - -static void -__do_commit_step(struct __commit_container *tmp, struct cache_object *obj) -{ - int ret, retry = 1, timeout; - struct nf_conntrack *ct = obj->ct; - - if (CONFIG(commit_timeout)) { - timeout = CONFIG(commit_timeout); - } else { - timeout = time(NULL) - obj->lastupdate; - if (timeout < 0) { - /* XXX: Arbitrarily set the timer to one minute, how - * can this happen? For example, an adjustment due to - * daylight-saving. Probably other situations can - * trigger this. */ - timeout = 60; - } - /* calculate an estimation of the current timeout */ - timeout = nfct_get_attr_u32(ct, ATTR_TIMEOUT) - timeout; - if (timeout < 0) { - timeout = 60; - } - } - -retry: - if (nl_create_conntrack(tmp->h, ct, timeout) == -1) { - if (errno == EEXIST && retry == 1) { - ret = nl_destroy_conntrack(tmp->h, ct); - if (ret == 0 || (ret == -1 && errno == ENOENT)) { - if (retry) { - retry = 0; - goto retry; - } - } - dlog(LOG_ERR, "commit-destroy: %s", strerror(errno)); - dlog_ct(STATE(log), ct, NFCT_O_PLAIN); - tmp->c->stats.commit_fail++; - } else { - dlog(LOG_ERR, "commit-create: %s", strerror(errno)); - dlog_ct(STATE(log), ct, NFCT_O_PLAIN); - tmp->c->stats.commit_fail++; - } - } else { - tmp->c->stats.commit_ok++; - } -} - -static int do_commit_related(void *data, void *n) -{ - struct cache_object *obj = n; - - if (ct_is_related(obj->ct)) - __do_commit_step(data, obj); - - /* keep iterating even if we have found errors */ - return 0; -} - -static int do_commit_master(void *data, void *n) -{ - struct cache_object *obj = n; - - if (ct_is_related(obj->ct)) - return 0; - - __do_commit_step(data, obj); - return 0; -} - -int cache_commit(struct cache *c, struct nfct_handle *h, int clientfd) -{ - unsigned int commit_ok, commit_fail; - struct __commit_container tmp = { - .h = h, - .c = c, - }; - struct timeval commit_stop, res; - - /* we already have one commit in progress, skip this. The clientfd - * descriptor has to be closed by the caller. */ - if (clientfd && STATE_SYNC(commit).clientfd != -1) - return 0; - - switch(STATE_SYNC(commit).state) { - case COMMIT_STATE_INACTIVE: - gettimeofday(&STATE_SYNC(commit).stats.start, NULL); - STATE_SYNC(commit).stats.ok = c->stats.commit_ok; - STATE_SYNC(commit).stats.fail = c->stats.commit_fail; - STATE_SYNC(commit).clientfd = clientfd; - case COMMIT_STATE_MASTER: - STATE_SYNC(commit).current = - hashtable_iterate_limit(c->h, &tmp, - STATE_SYNC(commit).current, - CONFIG(general).commit_steps, - do_commit_master); - if (STATE_SYNC(commit).current < CONFIG(hashsize)) { - STATE_SYNC(commit).state = COMMIT_STATE_MASTER; - /* give it another step as soon as possible */ - write_evfd(STATE_SYNC(commit).evfd); - return 1; - } - STATE_SYNC(commit).current = 0; - STATE_SYNC(commit).state = COMMIT_STATE_RELATED; - case COMMIT_STATE_RELATED: - STATE_SYNC(commit).current = - hashtable_iterate_limit(c->h, &tmp, - STATE_SYNC(commit).current, - CONFIG(general).commit_steps, - do_commit_related); - if (STATE_SYNC(commit).current < CONFIG(hashsize)) { - STATE_SYNC(commit).state = COMMIT_STATE_RELATED; - /* give it another step as soon as possible */ - write_evfd(STATE_SYNC(commit).evfd); - return 1; - } - /* calculate the time that commit has taken */ - gettimeofday(&commit_stop, NULL); - timersub(&commit_stop, &STATE_SYNC(commit).stats.start, &res); - - /* calculate new entries committed */ - commit_ok = c->stats.commit_ok - STATE_SYNC(commit).stats.ok; - commit_fail = - c->stats.commit_fail - STATE_SYNC(commit).stats.fail; - - /* log results */ - dlog(LOG_NOTICE, "Committed %u new entries", commit_ok); - - if (commit_fail) - dlog(LOG_NOTICE, "%u entries can't be " - "committed", commit_fail); - - dlog(LOG_NOTICE, "commit has taken %lu.%06lu seconds", - res.tv_sec, res.tv_usec); - - /* prepare the state machine for new commits */ - STATE_SYNC(commit).current = 0; - STATE_SYNC(commit).state = COMMIT_STATE_INACTIVE; - - /* Close the client socket now that we're done. */ - close(STATE_SYNC(commit).clientfd); - STATE_SYNC(commit).clientfd = -1; - } - return 1; -} - -static int do_flush(void *data, void *n) -{ - struct cache *c = data; - struct cache_object *obj = n; - - cache_del(c, obj); - cache_object_free(obj); - return 0; -} - -void cache_flush(struct cache *c) -{ - hashtable_iterate(c->h, c, do_flush); - c->stats.flush++; -} diff --git a/src/external_cache.c b/src/external_cache.c index 59c706a..073f309 100644 --- a/src/external_cache.c +++ b/src/external_cache.c @@ -28,9 +28,9 @@ static struct cache *external; static int external_cache_init(void) { - external = cache_create("external", + external = cache_create("external", CACHE_T_CT, STATE_SYNC(sync)->external_cache_flags, - NULL); + NULL, &cache_sync_external_ct_ops); if (external == NULL) { dlog(LOG_ERR, "can't allocate memory for the external cache"); return -1; diff --git a/src/internal_bypass.c b/src/internal_bypass.c index 1e1478f..8ecec34 100644 --- a/src/internal_bypass.c +++ b/src/internal_bypass.c @@ -1,6 +1,7 @@ /* - * (C) 2009 by Pablo Neira Ayuso - * + * (C) 2006-2011 by Pablo Neira Ayuso + * (C) 2011 by Vyatta Inc. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -16,17 +17,18 @@ #include "network.h" #include "origin.h" -static int _init(void) +static int internal_bypass_init(void) { return 0; } -static void _close(void) +static void internal_bypass_close(void) { } -static int dump_cb(enum nf_conntrack_msg_type type, - struct nf_conntrack *ct, void *data) +static int +internal_bypass_ct_dump_cb(enum nf_conntrack_msg_type type, + struct nf_conntrack *ct, void *data) { char buf[1024]; int size, *fd = data; @@ -44,7 +46,7 @@ static int dump_cb(enum nf_conntrack_msg_type type, return NFCT_CB_CONTINUE; } -static void dump(int fd, int type) +static void internal_bypass_ct_dump(int fd, int type) { struct nfct_handle *h; u_int32_t family = AF_UNSPEC; @@ -55,7 +57,7 @@ static void dump(int fd, int type) dlog(LOG_ERR, "can't allocate memory for the internal cache"); return; } - nfct_callback_register(h, NFCT_T_ALL, dump_cb, &fd); + nfct_callback_register(h, NFCT_T_ALL, internal_bypass_ct_dump_cb, &fd); ret = nfct_query(h, NFCT_Q_DUMP, &family); if (ret == -1) { dlog(LOG_ERR, "can't dump kernel table"); @@ -63,7 +65,7 @@ static void dump(int fd, int type) nfct_close(h); } -static void flush(void) +static void internal_bypass_ct_flush(void) { nl_flush_conntrack_table(STATE(flush)); } @@ -74,7 +76,7 @@ struct { uint32_t del; } internal_bypass_stats; -static void stats(int fd) +static void internal_bypass_ct_stats(int fd) { char buf[512]; int size; @@ -91,25 +93,24 @@ static void stats(int fd) } /* unused, INTERNAL_F_POPULATE is unset. No cache, nothing to populate. */ -static void populate(struct nf_conntrack *ct) +static void internal_bypass_ct_populate(struct nf_conntrack *ct) { } /* unused, INTERNAL_F_RESYNC is unset. */ -static void purge(void) +static void internal_bypass_ct_purge(void) { } /* unused, INTERNAL_F_RESYNC is unset. Nothing to resync, we have no cache. */ -static int resync(enum nf_conntrack_msg_type type, - struct nf_conntrack *ct, - void *data) +static int +internal_bypass_ct_resync(enum nf_conntrack_msg_type type, + struct nf_conntrack *ct, void *data) { return NFCT_CB_CONTINUE; } -static void -event_new_sync(struct nf_conntrack *ct, int origin) +static void internal_bypass_ct_event_new(struct nf_conntrack *ct, int origin) { struct nethdr *net; @@ -122,8 +123,7 @@ event_new_sync(struct nf_conntrack *ct, int origin) internal_bypass_stats.new++; } -static void -event_update_sync(struct nf_conntrack *ct, int origin) +static void internal_bypass_ct_event_upd(struct nf_conntrack *ct, int origin) { struct nethdr *net; @@ -136,8 +136,7 @@ event_update_sync(struct nf_conntrack *ct, int origin) internal_bypass_stats.upd++; } -static int -event_destroy_sync(struct nf_conntrack *ct, int origin) +static int internal_bypass_ct_event_del(struct nf_conntrack *ct, int origin) { struct nethdr *net; @@ -153,16 +152,18 @@ event_destroy_sync(struct nf_conntrack *ct, int origin) } struct internal_handler internal_bypass = { - .init = _init, - .close = _close, - .dump = dump, - .flush = flush, - .stats = stats, - .stats_ext = stats, - .populate = populate, - .purge = purge, - .resync = resync, - .new = event_new_sync, - .update = event_update_sync, - .destroy = event_destroy_sync, + .init = internal_bypass_init, + .close = internal_bypass_close, + .ct = { + .dump = internal_bypass_ct_dump, + .flush = internal_bypass_ct_flush, + .stats = internal_bypass_ct_stats, + .stats_ext = internal_bypass_ct_stats, + .populate = internal_bypass_ct_populate, + .purge = internal_bypass_ct_purge, + .resync = internal_bypass_ct_resync, + .new = internal_bypass_ct_event_new, + .upd = internal_bypass_ct_event_upd, + .del = internal_bypass_ct_event_del, + }, }; diff --git a/src/internal_cache.c b/src/internal_cache.c index e50e1db..7a698e6 100644 --- a/src/internal_cache.c +++ b/src/internal_cache.c @@ -1,6 +1,7 @@ /* - * (C) 2009 by Pablo Neira Ayuso - * + * (C) 2006-2011 by Pablo Neira Ayuso + * (C) 2011 by Vyatta Inc. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -19,46 +20,47 @@ static inline void sync_send(struct cache_object *obj, int query) STATE_SYNC(sync)->enqueue(obj, query); } -static int _init(void) +static int internal_cache_init(void) { - STATE(mode)->internal->data = - cache_create("internal", + STATE(mode)->internal->ct.data = + cache_create("internal", CACHE_T_CT, STATE_SYNC(sync)->internal_cache_flags, - STATE_SYNC(sync)->internal_cache_extra); + STATE_SYNC(sync)->internal_cache_extra, + &cache_sync_internal_ct_ops); - if (!STATE(mode)->internal->data) { + if (!STATE(mode)->internal->ct.data) { dlog(LOG_ERR, "can't allocate memory for the internal cache"); return -1; } return 0; } -static void _close(void) +static void internal_cache_close(void) { - cache_destroy(STATE(mode)->internal->data); + cache_destroy(STATE(mode)->internal->ct.data); } -static void dump(int fd, int type) +static void internal_cache_ct_dump(int fd, int type) { - cache_dump(STATE(mode)->internal->data, fd, type); + cache_dump(STATE(mode)->internal->ct.data, fd, type); } -static void flush(void) +static void internal_cache_ct_flush(void) { - cache_flush(STATE(mode)->internal->data); + cache_flush(STATE(mode)->internal->ct.data); } -static void stats(int fd) +static void internal_cache_ct_stats(int fd) { - cache_stats(STATE(mode)->internal->data, fd); + cache_stats(STATE(mode)->internal->ct.data, fd); } -static void stats_ext(int fd) +static void internal_cache_ct_stats_ext(int fd) { - cache_stats_extended(STATE(mode)->internal->data, fd); + cache_stats_extended(STATE(mode)->internal->ct.data, fd); } -static void populate(struct nf_conntrack *ct) +static void internal_cache_ct_populate(struct nf_conntrack *ct) { /* This is required by kernels < 2.6.20 */ nfct_attr_unset(ct, ATTR_ORIG_COUNTER_BYTES); @@ -67,15 +69,15 @@ static void populate(struct nf_conntrack *ct) nfct_attr_unset(ct, ATTR_REPL_COUNTER_PACKETS); nfct_attr_unset(ct, ATTR_USE); - cache_update_force(STATE(mode)->internal->data, ct); + cache_update_force(STATE(mode)->internal->ct.data, ct); } -static int purge_step(void *data1, void *data2) +static int internal_cache_ct_purge_step(void *data1, void *data2) { struct cache_object *obj = data2; STATE(get_retval) = 0; - nl_get_conntrack(STATE(get), obj->ct); /* modifies STATE(get_reval) */ + nl_get_conntrack(STATE(get), obj->ptr); /* modifies STATE(get_reval) */ if (!STATE(get_retval)) { if (obj->status != C_OBJ_DEAD) { cache_object_set_status(obj, C_OBJ_DEAD); @@ -87,14 +89,15 @@ static int purge_step(void *data1, void *data2) return 0; } -static void purge(void) +static void internal_cache_ct_purge(void) { - cache_iterate(STATE(mode)->internal->data, NULL, purge_step); + cache_iterate(STATE(mode)->internal->ct.data, NULL, + internal_cache_ct_purge_step); } -static int resync(enum nf_conntrack_msg_type type, - struct nf_conntrack *ct, - void *data) +static int +internal_cache_ct_resync(enum nf_conntrack_msg_type type, + struct nf_conntrack *ct, void *data) { struct cache_object *obj; @@ -108,7 +111,7 @@ static int resync(enum nf_conntrack_msg_type type, nfct_attr_unset(ct, ATTR_REPL_COUNTER_PACKETS); nfct_attr_unset(ct, ATTR_USE); - obj = cache_update_force(STATE(mode)->internal->data, ct); + obj = cache_update_force(STATE(mode)->internal->ct.data, ct); if (obj == NULL) return NFCT_CB_CONTINUE; @@ -123,8 +126,7 @@ static int resync(enum nf_conntrack_msg_type type, return NFCT_CB_CONTINUE; } -static void -event_new_sync(struct nf_conntrack *ct, int origin) +static void internal_cache_ct_event_new(struct nf_conntrack *ct, int origin) { struct cache_object *obj; int id; @@ -139,13 +141,13 @@ event_new_sync(struct nf_conntrack *ct, int origin) nfct_attr_unset(ct, ATTR_REPL_COUNTER_BYTES); nfct_attr_unset(ct, ATTR_REPL_COUNTER_PACKETS); - obj = cache_find(STATE(mode)->internal->data, ct, &id); + obj = cache_find(STATE(mode)->internal->ct.data, ct, &id); if (obj == NULL) { retry: - obj = cache_object_new(STATE(mode)->internal->data, ct); + obj = cache_object_new(STATE(mode)->internal->ct.data, ct); if (obj == NULL) return; - if (cache_add(STATE(mode)->internal->data, obj, id) == -1) { + if (cache_add(STATE(mode)->internal->ct.data, obj, id) == -1) { cache_object_free(obj); return; } @@ -155,14 +157,13 @@ retry: if (origin == CTD_ORIGIN_NOT_ME) sync_send(obj, NET_T_STATE_NEW); } else { - cache_del(STATE(mode)->internal->data, obj); + cache_del(STATE(mode)->internal->ct.data, obj); cache_object_free(obj); goto retry; } } -static void -event_update_sync(struct nf_conntrack *ct, int origin) +static void internal_cache_ct_event_upd(struct nf_conntrack *ct, int origin) { struct cache_object *obj; @@ -170,7 +171,7 @@ event_update_sync(struct nf_conntrack *ct, int origin) if (origin == CTD_ORIGIN_INJECT) return; - obj = cache_update_force(STATE(mode)->internal->data, ct); + obj = cache_update_force(STATE(mode)->internal->ct.data, ct); if (obj == NULL) return; @@ -178,8 +179,7 @@ event_update_sync(struct nf_conntrack *ct, int origin) sync_send(obj, NET_T_STATE_UPD); } -static int -event_destroy_sync(struct nf_conntrack *ct, int origin) +static int internal_cache_ct_event_del(struct nf_conntrack *ct, int origin) { struct cache_object *obj; int id; @@ -189,7 +189,7 @@ event_destroy_sync(struct nf_conntrack *ct, int origin) return 0; /* we don't synchronize events for objects that are not in the cache */ - obj = cache_find(STATE(mode)->internal->data, ct, &id); + obj = cache_find(STATE(mode)->internal->ct.data, ct, &id); if (obj == NULL) return 0; @@ -205,16 +205,18 @@ event_destroy_sync(struct nf_conntrack *ct, int origin) struct internal_handler internal_cache = { .flags = INTERNAL_F_POPULATE | INTERNAL_F_RESYNC, - .init = _init, - .close = _close, - .dump = dump, - .flush = flush, - .stats = stats, - .stats_ext = stats_ext, - .populate = populate, - .purge = purge, - .resync = resync, - .new = event_new_sync, - .update = event_update_sync, - .destroy = event_destroy_sync, + .init = internal_cache_init, + .close = internal_cache_close, + .ct = { + .dump = internal_cache_ct_dump, + .flush = internal_cache_ct_flush, + .stats = internal_cache_ct_stats, + .stats_ext = internal_cache_ct_stats_ext, + .populate = internal_cache_ct_populate, + .purge = internal_cache_ct_purge, + .resync = internal_cache_ct_resync, + .new = internal_cache_ct_event_new, + .upd = internal_cache_ct_event_upd, + .del = internal_cache_ct_event_del, + }, }; diff --git a/src/run.c b/src/run.c index 265a949..f8d3fad 100644 --- a/src/run.c +++ b/src/run.c @@ -1,6 +1,7 @@ /* - * (C) 2006-2009 by Pablo Neira Ayuso - * + * (C) 2006-2011 by Pablo Neira Ayuso + * (C) 2011 by Vyatta Inc. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -241,8 +242,8 @@ static void do_overrun_resync_alarm(struct alarm_block *a, void *data) static void do_polling_alarm(struct alarm_block *a, void *data) { - if (STATE(mode)->internal->purge) - STATE(mode)->internal->purge(); + if (STATE(mode)->internal->ct.purge) + STATE(mode)->internal->ct.purge(); nl_send_resync(STATE(resync)); add_alarm(&STATE(polling_alarm), CONFIG(poll_kernel_secs), 0); @@ -267,13 +268,13 @@ static int event_handler(const struct nlmsghdr *nlh, switch(type) { case NFCT_T_NEW: - STATE(mode)->internal->new(ct, origin_type); + STATE(mode)->internal->ct.new(ct, origin_type); break; case NFCT_T_UPDATE: - STATE(mode)->internal->update(ct, origin_type); + STATE(mode)->internal->ct.upd(ct, origin_type); break; case NFCT_T_DESTROY: - if (STATE(mode)->internal->destroy(ct, origin_type)) + if (STATE(mode)->internal->ct.del(ct, origin_type)) update_traffic_stats(ct); break; default: @@ -298,7 +299,7 @@ static int dump_handler(enum nf_conntrack_msg_type type, switch(type) { case NFCT_T_UPDATE: - STATE(mode)->internal->populate(ct); + STATE(mode)->internal->ct.populate(ct); break; default: STATE(stats).nl_dump_unknown_type++; @@ -363,7 +364,7 @@ init(void) } nfct_callback_register(STATE(resync), NFCT_T_ALL, - STATE(mode)->internal->resync, + STATE(mode)->internal->ct.resync, NULL); register_fd(nfct_fd(STATE(resync)), STATE(fds)); fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK); @@ -537,8 +538,8 @@ static void run_events(struct timeval *next_alarm) /* we previously requested a resync due to buffer overrun. */ if (FD_ISSET(nfct_fd(STATE(resync)), &readfds)) { nfct_catch(STATE(resync)); - if (STATE(mode)->internal->purge) - STATE(mode)->internal->purge(); + if (STATE(mode)->internal->ct.purge) + STATE(mode)->internal->ct.purge(); } if (STATE(mode)->run) diff --git a/src/stats-mode.c b/src/stats-mode.c index 0403ce2..c7a81e3 100644 --- a/src/stats-mode.c +++ b/src/stats-mode.c @@ -1,6 +1,7 @@ /* - * (C) 2006-2007 by Pablo Neira Ayuso - * + * (C) 2006-2011 by Pablo Neira Ayuso + * (C) 2011 by Vyatta Inc. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -37,7 +38,9 @@ static int init_stats(void) } memset(state.stats, 0, sizeof(struct ct_stats_state)); - STATE_STATS(cache) = cache_create("stats", NO_FEATURES, NULL); + STATE_STATS(cache) = cache_create("stats", CACHE_T_CT, + NO_FEATURES, NULL, + &cache_stats_ct_ops); if (!STATE_STATS(cache)) { dlog(LOG_ERR, "can't allocate memory for the " "external cache"); @@ -88,7 +91,7 @@ static int local_handler_stats(int fd, int type, void *data) return ret; } -static void populate_stats(struct nf_conntrack *ct) +static void stats_populate(struct nf_conntrack *ct) { nfct_attr_unset(ct, ATTR_ORIG_COUNTER_BYTES); nfct_attr_unset(ct, ATTR_ORIG_COUNTER_PACKETS); @@ -100,7 +103,7 @@ static void populate_stats(struct nf_conntrack *ct) cache_update_force(STATE_STATS(cache), ct); } -static int resync_stats(enum nf_conntrack_msg_type type, +static int stats_resync(enum nf_conntrack_msg_type type, struct nf_conntrack *ct, void *data) { @@ -125,23 +128,22 @@ static int purge_step(void *data1, void *data2) struct cache_object *obj = data2; STATE(get_retval) = 0; - nl_get_conntrack(STATE(get), obj->ct); /* modifies STATE(get_retval) */ + nl_get_conntrack(STATE(get), obj->ptr); /* modifies STATE(get_retval) */ if (!STATE(get_retval)) { cache_del(STATE_STATS(cache), obj); - dlog_ct(STATE(stats_log), obj->ct, NFCT_O_PLAIN); + dlog_ct(STATE(stats_log), obj->ptr, NFCT_O_PLAIN); cache_object_free(obj); } return 0; } -static void purge_stats(void) +static void stats_purge(void) { cache_iterate(STATE_STATS(cache), NULL, purge_step); } -static void -event_new_stats(struct nf_conntrack *ct, int origin) +static void stats_event_new(struct nf_conntrack *ct, int origin) { int id; struct cache_object *obj; @@ -162,15 +164,13 @@ event_new_stats(struct nf_conntrack *ct, int origin) return; } -static void -event_update_stats(struct nf_conntrack *ct, int origin) +static void stats_event_upd(struct nf_conntrack *ct, int origin) { nfct_attr_unset(ct, ATTR_TIMEOUT); cache_update_force(STATE_STATS(cache), ct); } -static int -event_destroy_stats(struct nf_conntrack *ct, int origin) +static int stats_event_del(struct nf_conntrack *ct, int origin) { int id; struct cache_object *obj; @@ -189,12 +189,14 @@ event_destroy_stats(struct nf_conntrack *ct, int origin) static struct internal_handler internal_cache_stats = { .flags = INTERNAL_F_POPULATE | INTERNAL_F_RESYNC, - .populate = populate_stats, - .resync = resync_stats, - .purge = purge_stats, - .new = event_new_stats, - .update = event_update_stats, - .destroy = event_destroy_stats + .ct = { + .populate = stats_populate, + .resync = stats_resync, + .purge = stats_purge, + .new = stats_event_new, + .upd = stats_event_upd, + .del = stats_event_del, + }, }; struct ct_mode stats_mode = { diff --git a/src/sync-alarm.c b/src/sync-alarm.c index b555dd5..8d6b34d 100644 --- a/src/sync-alarm.c +++ b/src/sync-alarm.c @@ -1,6 +1,7 @@ /* - * (C) 2006-2007 by Pablo Neira Ayuso - * + * (C) 2006-2011 by Pablo Neira Ayuso + * (C) 2011 by Vyatta Inc. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -110,7 +111,7 @@ static int alarm_recv(const struct nethdr *net) static void alarm_enqueue(struct cache_object *obj, int query) { struct cache_alarm *ca = - cache_get_extra(STATE(mode)->internal->data, obj); + cache_get_extra(STATE(mode)->internal->ct.data, obj); if (queue_add(STATE_SYNC(tx_queue), &ca->qnode) > 0) cache_object_get(obj); } @@ -135,9 +136,9 @@ static int tx_queue_xmit(struct queue_node *n, const void *data) int type; ca = (struct cache_alarm *)n; - obj = cache_data_get_object(STATE(mode)->internal->data, ca); + obj = cache_data_get_object(STATE(mode)->internal->ct.data, ca); type = object_status_to_network_type(obj->status); - net = BUILD_NETMSG(obj->ct, type); + net = obj->cache->ops->build_msg(obj, type); multichannel_send(STATE_SYNC(channel), net); cache_object_put(obj); break; diff --git a/src/sync-ftfw.c b/src/sync-ftfw.c index 581b5ca..55eda0b 100644 --- a/src/sync-ftfw.c +++ b/src/sync-ftfw.c @@ -1,6 +1,7 @@ /* - * (C) 2006-2008 by Pablo Neira Ayuso - * + * (C) 2006-2011 by Pablo Neira Ayuso + * (C) 2011 by Vyatta Inc. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -169,7 +170,7 @@ static int do_cache_to_tx(void *data1, void *data2) { struct cache_object *obj = data2; struct cache_ftfw *cn = - cache_get_extra(STATE(mode)->internal->data, obj); + cache_get_extra(STATE(mode)->internal->ct.data, obj); if (queue_in(rs_queue, &cn->qnode)) { queue_del(&cn->qnode); @@ -227,7 +228,7 @@ static int ftfw_local(int fd, int type, void *data) break; case SEND_BULK: dlog(LOG_NOTICE, "sending bulk update"); - cache_iterate(STATE(mode)->internal->data, + cache_iterate(STATE(mode)->internal->ct.data, NULL, do_cache_to_tx); break; case STATS_RSQUEUE: @@ -307,7 +308,7 @@ static int rs_queue_empty(struct queue_node *n, const void *data) cn = (struct cache_ftfw *) n; if (h == NULL) { queue_del(n); - obj = cache_data_get_object(STATE(mode)->internal->data, cn); + obj = cache_data_get_object(STATE(mode)->internal->ct.data, cn); cache_object_put(obj); return 0; } @@ -318,7 +319,7 @@ static int rs_queue_empty(struct queue_node *n, const void *data) dp("queue: deleting from queue (seq=%u)\n", cn->seq); queue_del(n); - obj = cache_data_get_object(STATE(mode)->internal->data, cn); + obj = cache_data_get_object(STATE(mode)->internal->ct.data, cn); cache_object_put(obj); break; } @@ -351,7 +352,7 @@ static int digest_msg(const struct nethdr *net) } else if (IS_RESYNC(net)) { dp("RESYNC ALL\n"); - cache_iterate(STATE(mode)->internal->data, NULL, do_cache_to_tx); + cache_iterate(STATE(mode)->internal->ct.data, NULL, do_cache_to_tx); return MSG_CTL; } else if (IS_ALIVE(net)) @@ -468,7 +469,7 @@ static void rs_queue_purge_full(void) struct cache_object *obj; cn = (struct cache_ftfw *)n; - obj = cache_data_get_object(STATE(mode)->internal->data, cn); + obj = cache_data_get_object(STATE(mode)->internal->ct.data, cn); cache_object_put(obj); break; } @@ -516,9 +517,9 @@ static int tx_queue_xmit(struct queue_node *n, const void *data) struct nethdr *net; cn = (struct cache_ftfw *)n; - obj = cache_data_get_object(STATE(mode)->internal->data, cn); + obj = cache_data_get_object(STATE(mode)->internal->ct.data, cn); type = object_status_to_network_type(obj->status); - net = BUILD_NETMSG(obj->ct, type); + net = obj->cache->ops->build_msg(obj, type); nethdr_set_hello(net); dp("tx_list sq: %u fl:%u len:%u\n", @@ -551,7 +552,7 @@ static void ftfw_xmit(void) static void ftfw_enqueue(struct cache_object *obj, int type) { struct cache_ftfw *cn = - cache_get_extra(STATE(mode)->internal->data, obj); + cache_get_extra(STATE(mode)->internal->ct.data, obj); if (queue_in(rs_queue, &cn->qnode)) { queue_del(&cn->qnode); queue_add(STATE_SYNC(tx_queue), &cn->qnode); diff --git a/src/sync-mode.c b/src/sync-mode.c index 5351110..34d9706 100644 --- a/src/sync-mode.c +++ b/src/sync-mode.c @@ -1,6 +1,7 @@ /* - * (C) 2006-2007 by Pablo Neira Ayuso - * + * (C) 2006-2011 by Pablo Neira Ayuso + * (C) 2011 by Vyatta Inc. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -251,7 +252,7 @@ static void do_reset_cache_alarm(struct alarm_block *a, void *data) exit(EXIT_SUCCESS); } /* this is not required if events don't get lost */ - STATE(mode)->internal->flush(); + STATE(mode)->internal->ct.flush(); } static int init_sync(void) @@ -471,7 +472,7 @@ static int local_handler_sync(int fd, int type, void *data) switch(type) { case DUMP_INTERNAL: if (fork_process_new(CTD_PROC_ANY, 0, NULL, NULL) == 0) { - STATE(mode)->internal->dump(fd, NFCT_O_PLAIN); + STATE(mode)->internal->ct.dump(fd, NFCT_O_PLAIN); exit(EXIT_SUCCESS); } break; @@ -483,7 +484,7 @@ static int local_handler_sync(int fd, int type, void *data) break; case DUMP_INT_XML: if (fork_process_new(CTD_PROC_ANY, 0, NULL, NULL) == 0) { - STATE(mode)->internal->dump(fd, NFCT_O_XML); + STATE(mode)->internal->ct.dump(fd, NFCT_O_XML); exit(EXIT_SUCCESS); } break; @@ -512,14 +513,14 @@ static int local_handler_sync(int fd, int type, void *data) /* inmediate flush, remove pending flush scheduled if any */ del_alarm(&STATE_SYNC(reset_cache_alarm)); dlog(LOG_NOTICE, "flushing caches"); - STATE(mode)->internal->flush(); + STATE(mode)->internal->ct.flush(); STATE_SYNC(external)->flush(); break; case FLUSH_INT_CACHE: /* inmediate flush, remove pending flush scheduled if any */ del_alarm(&STATE_SYNC(reset_cache_alarm)); dlog(LOG_NOTICE, "flushing internal cache"); - STATE(mode)->internal->flush(); + STATE(mode)->internal->ct.flush(); break; case FLUSH_EXT_CACHE: dlog(LOG_NOTICE, "flushing external cache"); @@ -529,7 +530,7 @@ static int local_handler_sync(int fd, int type, void *data) killer(0); break; case STATS: - STATE(mode)->internal->stats(fd); + STATE(mode)->internal->ct.stats(fd); STATE_SYNC(external)->stats(fd); dump_traffic_stats(fd); multichannel_stats(STATE_SYNC(channel), fd); @@ -540,7 +541,7 @@ static int local_handler_sync(int fd, int type, void *data) multichannel_stats(STATE_SYNC(channel), fd); break; case STATS_CACHE: - STATE(mode)->internal->stats_ext(fd); + STATE(mode)->internal->ct.stats_ext(fd); STATE_SYNC(external)->stats_ext(fd); break; case STATS_LINK: diff --git a/src/sync-notrack.c b/src/sync-notrack.c index 06af58b..e25cfd8 100644 --- a/src/sync-notrack.c +++ b/src/sync-notrack.c @@ -76,7 +76,7 @@ static int do_cache_to_tx(void *data1, void *data2) { struct cache_object *obj = data2; struct cache_notrack *cn = - cache_get_extra(STATE(mode)->internal->data, obj); + cache_get_extra(STATE(mode)->internal->ct.data, obj); if (queue_add(STATE_SYNC(tx_queue), &cn->qnode) > 0) cache_object_get(obj); return 0; @@ -127,7 +127,7 @@ static int notrack_local(int fd, int type, void *data) if (CONFIG(sync).internal_cache_disable) { kernel_resync(); } else { - cache_iterate(STATE(mode)->internal->data, + cache_iterate(STATE(mode)->internal->ct.data, NULL, do_cache_to_tx); } break; @@ -148,7 +148,7 @@ static int digest_msg(const struct nethdr *net) if (CONFIG(sync).internal_cache_disable) { kernel_resync(); } else { - cache_iterate(STATE(mode)->internal->data, + cache_iterate(STATE(mode)->internal->ct.data, NULL, do_cache_to_tx); } return MSG_CTL; @@ -197,9 +197,9 @@ static int tx_queue_xmit(struct queue_node *n, const void *data2) struct nethdr *net; cn = (struct cache_ftfw *)n; - obj = cache_data_get_object(STATE(mode)->internal->data, cn); + obj = cache_data_get_object(STATE(mode)->internal->ct.data, cn); type = object_status_to_network_type(obj->status);; - net = BUILD_NETMSG(obj->ct, type); + net = obj->cache->ops->build_msg(obj, type); multichannel_send(STATE_SYNC(channel), net); queue_del(n); @@ -219,7 +219,7 @@ static void notrack_xmit(void) static void notrack_enqueue(struct cache_object *obj, int query) { struct cache_notrack *cn = - cache_get_extra(STATE(mode)->internal->data, obj); + cache_get_extra(STATE(mode)->internal->ct.data, obj); if (queue_add(STATE_SYNC(tx_queue), &cn->qnode) > 0) cache_object_get(obj); } -- cgit v1.2.3 From 79a777c60cfe02197c135adcc4edb2f63ae9a695 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 19 Dec 2011 17:13:25 +0100 Subject: conntrackd: support for expectation synchronization This patch adds support to synchronize expectations between firewalls. This addition aims to re-use as much as possible of the existing infrastructure for stability reasons. The expectation support has been tested with the FTP helper. This extension requires libnetfilter_conntrack 1.0.0. If this is the first time you're playing with conntrackd, I *strongly* recommend you to get working setup of conntrackd without expectation support before as described in the documentation. Then, enabling expectation support is rather easy. To know more about expectations, if you're not familiar with them, I suggest you to read: "Netfilter's Connection Tracking System" http://people.netfilter.org/pablo/docs/login.pdf Reprinted from ;login: The Magazine of USENIX, vol. 31, no. 3 (Berkeley, CA: USENIX Association, 2006, pp40-45.) In short, expectations allow one Linux firewall to filter multi-flow traffic like FTP, SIP and H.323. In my testbed, there are two firewalls in a primary-backup configuration running keepalived. The use a couple of floating cluster IP address (192.168.0.100 and 192.168.1.100) that are used by the client. These firewalls protect one FTP server (192.168.1.2) that will be accessed by one client. In ASCII art, it looks like this: 192.168.0.100 192.168.1.100 eth1 eth2 fw-1 / \ FTP -- client ------ ------ server -- 192.168.0.2 \ / 192.168.1.2 fw-2 This is the rule-set for the firewalls: -A POSTROUTING -t nat -s 192.168.0.2/32 -d 192.168.1.2/32 -j SNAT --to-source 192.168.1.100 -A INPUT -p tcp -m tcp --dport 22 -j ACCEPT -A INPUT -m state --state INVALID -j DROP -A FORWARD -m state --state RELATED -j ACCEPT -A FORWARD -i eth2 -m state --state ESTABLISHED -j ACCEPT -A FORWARD -i eth1 -p tcp -m tcp --dport 21 --tcp-flags FIN,SYN,RST,ACK SYN -m state --state NEW -j ACCEPT -A FORWARD -i eth1 -p tcp -m state --state ESTABLISHED -j ACCEPT -A FORWARD -m state --state INVALID -j LOG --log-prefix "invalid: " The following steps detail how to check that the expectation support works fine for conntrackd: 1) You have to enable the expectation support in the configuration file with the following option: Sync { ... Options { ExpectationSync { ftp sip h323 } } } This enables expectation synchronization for the FTP, SIP and H.323 helpers. You can alternatively use: Sync { ... Options { ExpectationSync On } } To enable expectation synchronization for all helpers. 2) Make sure you have loaded the FTP helper in both firewalls. root@fw1# modprobe nf_conntrack_ftp root@fw2# modprobe nf_conntrack_ftp 3) Switch to the client. Start one FTP control connection to one server that is protected by the firewalls, enter passive mode: (term-1) user@client$ nc 192.168.1.2 21 220 dummy FTP server USER anonymous 331 Please specify the password. PASS nothing 230 Login successful. PASV 227 Entering Passive Mode (192,168,1,2,163,11). This means that port 163*256+11=41739 will be used for the data traffic. Read this if you are not familiar with the FTP protocol: http://www.freefire.org/articles/ftpexample.php 3) Switch to fw-1 (primary) to check that the expectation is in the internal cache. root@fw1# conntrackd -i exp proto=6 src=192.168.0.2 dst=192.168.1.2 sport=0 dport=41739 mask-src=255.255.255.255 mask-dst=255.255.255.255 sport=0 dport=65535 master-src=192.168.0.2 master-dst=192.168.1.2 sport=36390 dport=21 [active since 5s] 4) Switch to fw-2 (backup) to check that the expectation has been successfully replicated. root@fw2# conntrackd -e exp proto=6 src=192.168.0.2 dst=192.168.1.2 sport=0 dport=41739 mask-src=255.255.255.255 mask-dst=255.255.255.255 sport=0 dport=65535 master-src=192.168.0.2 master-dst=192.168.1.2 sport=36390 dport=21 [active since 8s] 5) Make the primary firewall fw-1 fail. Now fw-2 becomes primary. 6) Switch to fw-2 (primary) to commit the external cache into the kernel. root@fw2# conntrackd -c exp The logs should display that the commit was successful: root@fw2# tail -100f /var/log/conntrackd.log [Wed Dec 7 22:16:31 2011] (pid=19195) [notice] committing external cache: expectations [Wed Dec 7 22:16:31 2011] (pid=19195) [notice] Committed 1 new entries [Wed Dec 7 22:16:31 2011] (pid=19195) [notice] commit has taken 0.000366 seconds 7) Switch to the client. Open a new terminal and connect to the port that has been announced by the server: (term-2) user@client$ nc -vvv 192.168.1.2 41739 (UNKNOWN) [192.168.1.2] 41739 (?) open 8) Switch to term-1 and ask for the file listing: [...] 227 Entering Passive Mode (192,168,1,2,163,11). LIST 9) Switch to term-2, it should display the listing. That means everything has worked fine. You may want to try disabling the expectation support and repeating the steps to check that *it does not work* without the state-synchronization. You can also display expectation statistics by means of: root@fwX# conntrackd -s exp This update requires no changes in the primary-backup.sh script that is used by the HA manager to interact with conntrackd. Thus, we provide a backward compatible command line interface. Regarding the Filter clause and expectations, we use the master conntrack to filter expectation events. The filtering is performed in user-space. No kernel-space filtering support for expectations yet (this support should go in libnetfilter_conntrack at some point). This patch also includes support to disable caching and to allow direct injection of expectations. Signed-off-by: Pablo Neira Ayuso --- configure.ac | 2 +- conntrackd.8 | 12 +- doc/sync/alarm/conntrackd.conf | 16 ++ doc/sync/ftfw/conntrackd.conf | 16 ++ doc/sync/notrack/conntrackd.conf | 16 ++ include/cache.h | 4 + include/conntrackd.h | 19 +++ include/external.h | 11 ++ include/filter.h | 7 + include/internal.h | 17 +++ include/log.h | 2 + include/netlink.h | 7 + include/network.h | 40 ++++- src/Makefile.am | 2 +- src/build.c | 99 +++++++++++++ src/cache-ct.c | 6 +- src/cache-exp.c | 308 +++++++++++++++++++++++++++++++++++++++ src/external_cache.c | 85 +++++++++++ src/external_inject.c | 95 +++++++++++- src/filter.c | 76 ++++++++++ src/internal_bypass.c | 146 ++++++++++++++++++- src/internal_cache.c | 173 ++++++++++++++++++++++ src/log.c | 37 +++++ src/main.c | 69 +++++++-- src/netlink.c | 63 +++++++- src/network.c | 5 + src/parse.c | 192 ++++++++++++++++++++++++ src/read_config_lex.l | 1 + src/read_config_yy.y | 50 ++++++- src/run.c | 206 +++++++++++++++++++++++--- src/sync-ftfw.c | 7 +- src/sync-mode.c | 165 ++++++++++++++++++--- src/sync-notrack.c | 6 +- 33 files changed, 1889 insertions(+), 71 deletions(-) create mode 100644 src/cache-exp.c (limited to 'include/internal.h') diff --git a/configure.ac b/configure.ac index 0481e23..26a7e02 100644 --- a/configure.ac +++ b/configure.ac @@ -52,7 +52,7 @@ else fi PKG_CHECK_MODULES([LIBNFNETLINK], [libnfnetlink >= 1.0.0]) -PKG_CHECK_MODULES([LIBNETFILTER_CONNTRACK], [libnetfilter_conntrack >= 0.9.1]) +PKG_CHECK_MODULES([LIBNETFILTER_CONNTRACK], [libnetfilter_conntrack >= 1.0.0]) AC_CHECK_HEADERS([linux/capability.h],, [AC_MSG_ERROR([Cannot find linux/capabibility.h])]) diff --git a/conntrackd.8 b/conntrackd.8 index 0c9054e..f07ad7a 100644 --- a/conntrackd.8 +++ b/conntrackd.8 @@ -24,10 +24,10 @@ Run conntrackd in daemon mode. .B conntrackd can be used in client mode to request several information and operations to a running daemon .TP -.BI "-i " +.BI "-i "[ct|expect]" Dump the internal cache, i.e. show local states .TP -.BI "-e " +.BI "-e "[ct|expect]" Dump the external cache, i.e. show foreign states .TP .BI "-x " @@ -37,7 +37,7 @@ with "-i" and "-e" parameters. .BI "-f " "[|internal|external]" Flush the internal and/or external cache .TP -.BI "-F " +.BI "-F [ct|expect]" Flush the kernel conntrack table (if you use a Linux kernel >= 2.6.29, this option will not flush your internal and external cache). .TP @@ -48,15 +48,17 @@ ask conntrackd to send the state-entries that it owns to others. .BI "-k " Kill the daemon .TP -.BI "-s " "[|network|cache|runtime|link|rsqueue|process|queue]" +.BI "-s " "[|network|cache|runtime|link|rsqueue|process|queue|ct|expect]" Dump statistics. If no parameter is passed, it displays the general statistics. If "network" is passed as parameter it displays the networking statistics. If "cache" is passed as parameter, it shows the extended cache statistics. If "runtime" is passed as parameter, it shows the run-time statistics. If "process" is passed as parameter, it shows existing child processes (if any). If "queue" is passed as parameter, it shows queue statistics. +If "ct" is passed, it displays the general statistics. +If "expect" is passed as parameter, it shows expectation statistics. .TP -.BI "-R " +.BI "-R " "[ct|expect]" Force a resync against the kernel connection tracking table .TP .BI "-t " diff --git a/doc/sync/alarm/conntrackd.conf b/doc/sync/alarm/conntrackd.conf index d05b499..deed291 100644 --- a/doc/sync/alarm/conntrackd.conf +++ b/doc/sync/alarm/conntrackd.conf @@ -191,6 +191,22 @@ Sync { # This feature requires a Linux kernel >= 2.6.36. # # TCPWindowTracking Off + + # Set this option on if you want to enable the synchronization + # of expectations. You have to specify the list of helpers that + # you want to enable. Default is off. + # + # ExpectationSync { + # ftp + # h323 + # sip + # } + # + # You can use this alternatively: + # + # ExpectationSync On + # + # If you want to synchronize expectations of all helpers. # } } diff --git a/doc/sync/ftfw/conntrackd.conf b/doc/sync/ftfw/conntrackd.conf index c52f214..0304f0f 100644 --- a/doc/sync/ftfw/conntrackd.conf +++ b/doc/sync/ftfw/conntrackd.conf @@ -214,6 +214,22 @@ Sync { # This feature requires a Linux kernel >= 2.6.36. # # TCPWindowTracking Off + + # Set this option on if you want to enable the synchronization + # of expectations. You have to specify the list of helpers that + # you want to enable. Default is off. + # + # ExpectationSync { + # ftp + # h323 + # sip + # } + # + # You can use this alternatively: + # + # ExpectationSync On + # + # If you want to synchronize expectations of all helpers. # } } diff --git a/doc/sync/notrack/conntrackd.conf b/doc/sync/notrack/conntrackd.conf index 4d77266..34e7b32 100644 --- a/doc/sync/notrack/conntrackd.conf +++ b/doc/sync/notrack/conntrackd.conf @@ -253,6 +253,22 @@ Sync { # This feature requires a Linux kernel >= 2.6.36. # # TCPWindowTracking Off + + # Set this option on if you want to enable the synchronization + # of expectations. You have to specify the list of helpers that + # you want to enable. Default is off. + # + # ExpectationSync { + # ftp + # h323 + # sip + # } + # + # You can use this alternatively: + # + # ExpectationSync On + # + # If you want to synchronize expectations of all helpers. # } } diff --git a/include/cache.h b/include/cache.h index abebb97..3af2741 100644 --- a/include/cache.h +++ b/include/cache.h @@ -52,6 +52,7 @@ extern struct cache_feature timer_feature; enum cache_type { CACHE_T_NONE = 0, CACHE_T_CT, + CACHE_T_EXP, CACHE_T_MAX }; @@ -128,6 +129,9 @@ struct cache_ops { extern struct cache_ops cache_sync_internal_ct_ops; extern struct cache_ops cache_sync_external_ct_ops; extern struct cache_ops cache_stats_ct_ops; +/* templates to configure expectation caching. */ +extern struct cache_ops cache_sync_internal_exp_ops; +extern struct cache_ops cache_sync_external_exp_ops; struct nf_conntrack; diff --git a/include/conntrackd.h b/include/conntrackd.h index 697d3d7..8baa088 100644 --- a/include/conntrackd.h +++ b/include/conntrackd.h @@ -37,6 +37,16 @@ #define CT_FLUSH_EXT_CACHE 34 /* flush external cache */ #define STATS_PROCESS 35 /* child process stats */ #define STATS_QUEUE 36 /* queue stats */ +#define EXP_STATS 37 /* dump statistics */ +#define EXP_FLUSH_MASTER 38 /* flush kernel expect table */ +#define EXP_RESYNC_MASTER 39 /* resync with kernel exp table */ +#define EXP_DUMP_INTERNAL 40 /* dump internal expect cache */ +#define EXP_DUMP_EXTERNAL 41 /* dump external expect cache */ +#define EXP_COMMIT 42 /* commit expectations */ +#define ALL_FLUSH_MASTER 43 /* flush all kernel tables */ +#define ALL_RESYNC_MASTER 44 /* resync w/all kernel tables */ +#define ALL_FLUSH_CACHE 45 /* flush all caches */ +#define ALL_COMMIT 46 /* commit all tables */ #define DEFAULT_CONFIGFILE "/etc/conntrackd/conntrackd.conf" #define DEFAULT_LOCKFILE "/var/lock/conntrackd.lock" @@ -56,6 +66,7 @@ #define CTD_SYNC_ALARM (1UL << 3) #define CTD_SYNC_NOTRACK (1UL << 4) #define CTD_POLL (1UL << 5) +#define CTD_EXPECT (1UL << 6) /* FILENAME_MAX is 4096 on my system, perhaps too much? */ #ifndef FILENAME_MAXLEN @@ -105,6 +116,8 @@ struct ct_conf { int tcp_window_tracking; } sync; struct { + int subsys_id; + int groups; int events_reliable; } netlink; struct { @@ -130,6 +143,7 @@ struct ct_general_state { struct local_server local; struct ct_mode *mode; struct ct_filter *us_filter; + struct exp_filter *exp_filter; struct nfct_handle *event; /* event handler */ struct nfct_filter *filter; /* event filter */ @@ -177,6 +191,10 @@ struct ct_general_state { } stats; }; +struct commit_runqueue { + int (*cb)(struct nfct_handle *h, int step); +}; + #define STATE_SYNC(x) state.sync->x struct ct_sync_state { @@ -196,6 +214,7 @@ struct ct_sync_state { struct nfct_handle *h; struct evfd *evfd; int current; + struct commit_runqueue rq[2]; struct { int ok; int fail; diff --git a/include/external.h b/include/external.h index eef0e42..70f0c5c 100644 --- a/include/external.h +++ b/include/external.h @@ -18,6 +18,17 @@ struct external_handler { void (*stats)(int fd); void (*stats_ext)(int fd); } ct; + struct { + void (*new)(struct nf_expect *exp); + void (*upd)(struct nf_expect *exp); + void (*del)(struct nf_expect *exp); + + void (*dump)(int fd, int type); + void (*flush)(void); + int (*commit)(struct nfct_handle *h, int fd); + void (*stats)(int fd); + void (*stats_ext)(int fd); + } exp; }; extern struct external_handler external_cache; diff --git a/include/filter.h b/include/filter.h index f19b18b..3c7c8cc 100644 --- a/include/filter.h +++ b/include/filter.h @@ -52,4 +52,11 @@ void ct_filter_set_logic(struct ct_filter *f, enum ct_filter_logic logic); int ct_filter_conntrack(const struct nf_conntrack *ct, int userspace); +struct exp_filter; +struct nf_expect; + +struct exp_filter *exp_filter_create(void); +int exp_filter_add(struct exp_filter *f, const char *helper_name); +int exp_filter_find(struct exp_filter *f, const struct nf_expect *exp); + #endif diff --git a/include/internal.h b/include/internal.h index f50eb79..2ba9714 100644 --- a/include/internal.h +++ b/include/internal.h @@ -34,6 +34,23 @@ struct internal_handler { void (*stats)(int fd); void (*stats_ext)(int fd); } ct; + struct { + void *data; + + void (*new)(struct nf_expect *exp, int origin_type); + void (*upd)(struct nf_expect *exp, int origin_type); + int (*del)(struct nf_expect *exp, int origin_type); + + void (*dump)(int fd, int type); + void (*populate)(struct nf_expect *exp); + void (*purge)(void); + int (*resync)(enum nf_conntrack_msg_type type, + struct nf_expect *exp, void *data); + void (*flush)(void); + + void (*stats)(int fd); + void (*stats_ext)(int fd); + } exp; }; extern struct internal_handler internal_cache; diff --git a/include/log.h b/include/log.h index f5c5b4f..ae58e79 100644 --- a/include/log.h +++ b/include/log.h @@ -4,10 +4,12 @@ #include struct nf_conntrack; +struct nf_expect; int init_log(void); void dlog(int priority, const char *format, ...); void dlog_ct(FILE *fd, struct nf_conntrack *ct, unsigned int type); +void dlog_exp(FILE *fd, struct nf_expect *exp, unsigned int type); void close_log(void); #endif diff --git a/include/netlink.h b/include/netlink.h index 0df0cbb..3bde30c 100644 --- a/include/netlink.h +++ b/include/netlink.h @@ -30,4 +30,11 @@ static inline int ct_is_related(const struct nf_conntrack *ct) nfct_attr_is_set(ct, ATTR_MASTER_PORT_DST)); } +int nl_create_expect(struct nfct_handle *h, const struct nf_expect *orig, int timeout); +int nl_destroy_expect(struct nfct_handle *h, const struct nf_expect *exp); +int nl_get_expect(struct nfct_handle *h, const struct nf_expect *exp); +int nl_dump_expect_table(struct nfct_handle *h); +int nl_flush_expect_table(struct nfct_handle *h); +int nl_send_expect_resync(struct nfct_handle *h); + #endif diff --git a/include/network.h b/include/network.h index d0531b9..ab95499 100644 --- a/include/network.h +++ b/include/network.h @@ -4,9 +4,10 @@ #include #include -#define CONNTRACKD_PROTOCOL_VERSION 0 +#define CONNTRACKD_PROTOCOL_VERSION 1 struct nf_conntrack; +struct nf_expect; struct nethdr { #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -28,7 +29,10 @@ enum nethdr_type { NET_T_STATE_CT_NEW = 0, NET_T_STATE_CT_UPD, NET_T_STATE_CT_DEL, - NET_T_STATE_MAX = NET_T_STATE_CT_DEL, + NET_T_STATE_EXP_NEW = 3, + NET_T_STATE_EXP_UPD, + NET_T_STATE_EXP_DEL, + NET_T_STATE_MAX = NET_T_STATE_EXP_DEL, NET_T_CTL = 10, }; @@ -92,6 +96,17 @@ enum { __hdr; \ }) +#define BUILD_NETMSG_FROM_EXP(exp, query) \ +({ \ + static char __net[4096]; \ + struct nethdr *__hdr = (struct nethdr *) __net; \ + memset(__hdr, 0, NETHDR_SIZ); \ + nethdr_set(__hdr, query); \ + exp2msg(exp, __hdr); \ + HDR_HOST2NETWORK(__hdr); \ + __hdr; \ +}) + struct mcast_sock_multi; enum { @@ -239,4 +254,25 @@ struct nta_attr_natseqadj { void ct2msg(const struct nf_conntrack *ct, struct nethdr *n); int msg2ct(struct nf_conntrack *ct, struct nethdr *n, size_t remain); +enum nta_exp_attr { + NTA_EXP_MASTER_IPV4 = 0, /* struct nfct_attr_grp_ipv4 */ + NTA_EXP_MASTER_IPV6, /* struct nfct_attr_grp_ipv6 */ + NTA_EXP_MASTER_L4PROTO, /* uint8_t */ + NTA_EXP_MASTER_PORT, /* struct nfct_attr_grp_port */ + NTA_EXP_EXPECT_IPV4 = 4, /* struct nfct_attr_grp_ipv4 */ + NTA_EXP_EXPECT_IPV6, /* struct nfct_attr_grp_ipv6 */ + NTA_EXP_EXPECT_L4PROTO, /* uint8_t */ + NTA_EXP_EXPECT_PORT, /* struct nfct_attr_grp_port */ + NTA_EXP_MASK_IPV4 = 8, /* struct nfct_attr_grp_ipv4 */ + NTA_EXP_MASK_IPV6, /* struct nfct_attr_grp_ipv6 */ + NTA_EXP_MASK_L4PROTO, /* uint8_t */ + NTA_EXP_MASK_PORT, /* struct nfct_attr_grp_port */ + NTA_EXP_TIMEOUT, /* uint32_t */ + NTA_EXP_FLAGS, /* uint32_t */ + NTA_EXP_MAX +}; + +void exp2msg(const struct nf_expect *exp, struct nethdr *n); +int msg2exp(struct nf_expect *exp, struct nethdr *n, size_t remain); + #endif diff --git a/src/Makefile.am b/src/Makefile.am index a0abeee..7d7b2ac 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -12,7 +12,7 @@ conntrack_LDADD = ../extensions/libct_proto_tcp.la ../extensions/libct_proto_udp conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \ local.c log.c mcast.c udp.c netlink.c vector.c \ filter.c fds.c event.c process.c origin.c date.c \ - cache.c cache-ct.c \ + cache.c cache-ct.c cache-exp.c \ cache_timer.c \ sync-mode.c sync-alarm.c sync-ftfw.c sync-notrack.c \ traffic_stats.c stats-mode.c \ diff --git a/src/build.c b/src/build.c index 9c3687c..3193884 100644 --- a/src/build.c +++ b/src/build.c @@ -224,3 +224,102 @@ void ct2msg(const struct nf_conntrack *ct, struct nethdr *n) if (nfct_attr_is_set_array(ct, nat_type, 6)) ct_build_natseqadj(ct, n); } + +static void +exp_build_l4proto_tcp(const struct nf_conntrack *ct, struct nethdr *n, int a) +{ + ct_build_group(ct, ATTR_GRP_ORIG_PORT, n, a, + sizeof(struct nfct_attr_grp_port)); +} + +static void +exp_build_l4proto_sctp(const struct nf_conntrack *ct, struct nethdr *n, int a) +{ + ct_build_group(ct, ATTR_GRP_ORIG_PORT, n, a, + sizeof(struct nfct_attr_grp_port)); +} + +static void +exp_build_l4proto_dccp(const struct nf_conntrack *ct, struct nethdr *n, int a) +{ + ct_build_group(ct, ATTR_GRP_ORIG_PORT, n, a, + sizeof(struct nfct_attr_grp_port)); +} + +static void +exp_build_l4proto_udp(const struct nf_conntrack *ct, struct nethdr *n, int a) +{ + ct_build_group(ct, ATTR_GRP_ORIG_PORT, n, a, + sizeof(struct nfct_attr_grp_port)); +} + +static struct exp_build_l4proto { + void (*build)(const struct nf_conntrack *, struct nethdr *n, int a); +} exp_l4proto_fcn[IPPROTO_MAX] = { + [IPPROTO_TCP] = { .build = exp_build_l4proto_tcp }, + [IPPROTO_SCTP] = { .build = exp_build_l4proto_sctp }, + [IPPROTO_DCCP] = { .build = exp_build_l4proto_dccp }, + [IPPROTO_UDP] = { .build = exp_build_l4proto_udp }, +}; + +static inline void +exp_build_u32(const struct nf_expect *exp, int a, struct nethdr *n, int b) +{ + uint32_t data = nfexp_get_attr_u32(exp, a); + data = htonl(data); + addattr(n, b, &data, sizeof(uint32_t)); +} + +void exp2msg(const struct nf_expect *exp, struct nethdr *n) +{ + const struct nf_conntrack *ct = nfexp_get_attr(exp, ATTR_EXP_MASTER); + uint8_t l4proto = nfct_get_attr_u8(ct, ATTR_L4PROTO); + + /* master conntrack for this expectation. */ + if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV4)) { + ct_build_group(ct, ATTR_GRP_ORIG_IPV4, n, NTA_EXP_MASTER_IPV4, + sizeof(struct nfct_attr_grp_ipv4)); + } else if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV6)) { + ct_build_group(ct, ATTR_GRP_ORIG_IPV6, n, NTA_EXP_MASTER_IPV6, + sizeof(struct nfct_attr_grp_ipv6)); + } + ct_build_u8(ct, ATTR_L4PROTO, n, NTA_EXP_MASTER_L4PROTO); + + if (exp_l4proto_fcn[l4proto].build) + exp_l4proto_fcn[l4proto].build(ct, n, NTA_EXP_MASTER_PORT); + + /* the expectation itself. */ + ct = nfexp_get_attr(exp, ATTR_EXP_EXPECTED); + + if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV4)) { + ct_build_group(ct, ATTR_GRP_ORIG_IPV4, n, NTA_EXP_EXPECT_IPV4, + sizeof(struct nfct_attr_grp_ipv4)); + } else if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV6)) { + ct_build_group(ct, ATTR_GRP_ORIG_IPV6, n, NTA_EXP_EXPECT_IPV6, + sizeof(struct nfct_attr_grp_ipv6)); + } + ct_build_u8(ct, ATTR_L4PROTO, n, NTA_EXP_EXPECT_L4PROTO); + + if (exp_l4proto_fcn[l4proto].build) + exp_l4proto_fcn[l4proto].build(ct, n, NTA_EXP_EXPECT_PORT); + + /* mask for the expectation. */ + ct = nfexp_get_attr(exp, ATTR_EXP_MASK); + + if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV4)) { + ct_build_group(ct, ATTR_GRP_ORIG_IPV4, n, NTA_EXP_MASK_IPV4, + sizeof(struct nfct_attr_grp_ipv4)); + } else if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV6)) { + ct_build_group(ct, ATTR_GRP_ORIG_IPV6, n, NTA_EXP_MASK_IPV6, + sizeof(struct nfct_attr_grp_ipv6)); + } + ct_build_u8(ct, ATTR_L4PROTO, n, NTA_EXP_MASK_L4PROTO); + + if (exp_l4proto_fcn[l4proto].build) + exp_l4proto_fcn[l4proto].build(ct, n, NTA_EXP_MASK_PORT); + + if (!CONFIG(commit_timeout) && nfexp_attr_is_set(exp, ATTR_EXP_TIMEOUT)) + exp_build_u32(exp, ATTR_EXP_TIMEOUT, n, NTA_EXP_TIMEOUT); + + exp_build_u32(exp, ATTR_EXP_FLAGS, n, NTA_EXP_FLAGS); +} diff --git a/src/cache-ct.c b/src/cache-ct.c index 2c6fd4e..0ad8d2a 100644 --- a/src/cache-ct.c +++ b/src/cache-ct.c @@ -251,7 +251,7 @@ static int cache_ct_commit(struct cache *c, struct nfct_handle *h, int clientfd) /* we already have one commit in progress, skip this. The clientfd * descriptor has to be closed by the caller. */ if (clientfd && STATE_SYNC(commit).clientfd != -1) - return 0; + return -1; switch(STATE_SYNC(commit).state) { case COMMIT_STATE_INACTIVE: @@ -308,9 +308,7 @@ static int cache_ct_commit(struct cache *c, struct nfct_handle *h, int clientfd) STATE_SYNC(commit).current = 0; STATE_SYNC(commit).state = COMMIT_STATE_INACTIVE; - /* Close the client socket now that we're done. */ - close(STATE_SYNC(commit).clientfd); - STATE_SYNC(commit).clientfd = -1; + return 0; } return 1; } diff --git a/src/cache-exp.c b/src/cache-exp.c new file mode 100644 index 0000000..e88877a --- /dev/null +++ b/src/cache-exp.c @@ -0,0 +1,308 @@ +/* + * (C) 2006-2011 by Pablo Neira Ayuso + * (C) 2011 by Vyatta Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "cache.h" +#include "hash.h" +#include "log.h" +#include "conntrackd.h" +#include "netlink.h" +#include "event.h" +#include "jhash.h" +#include "network.h" + +#include +#include +#include +#include + +static uint32_t +cache_hash4_exp(const struct nf_conntrack *ct, const struct hashtable *table) +{ + uint32_t a[4] = { + [0] = nfct_get_attr_u32(ct, ATTR_IPV4_SRC), + [1] = nfct_get_attr_u32(ct, ATTR_IPV4_DST), + [2] = nfct_get_attr_u8(ct, ATTR_L3PROTO) << 16 | + nfct_get_attr_u8(ct, ATTR_L4PROTO), + [3] = nfct_get_attr_u16(ct, ATTR_PORT_SRC) << 16 | + nfct_get_attr_u16(ct, ATTR_PORT_DST), + }; + + /* + * Instead of returning hash % table->hashsize (implying a divide) + * we return the high 32 bits of the (hash * table->hashsize) that will + * give results between [0 and hashsize-1] and same hash distribution, + * but using a multiply, less expensive than a divide. See: + * http://www.mail-archive.com/netdev@vger.kernel.org/msg56623.html + */ + return ((uint64_t)jhash2(a, 4, 0) * table->hashsize) >> 32; +} + +static uint32_t +cache_hash6_exp(const struct nf_conntrack *ct, const struct hashtable *table) +{ + uint32_t a[10]; + + memcpy(&a[0], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4); + memcpy(&a[4], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4); + a[8] = nfct_get_attr_u8(ct, ATTR_ORIG_L3PROTO) << 16 | + nfct_get_attr_u8(ct, ATTR_ORIG_L4PROTO); + a[9] = nfct_get_attr_u16(ct, ATTR_ORIG_PORT_SRC) << 16 | + nfct_get_attr_u16(ct, ATTR_ORIG_PORT_DST); + + return ((uint64_t)jhash2(a, 10, 0) * table->hashsize) >> 32; +} + +static uint32_t +cache_exp_hash(const void *data, const struct hashtable *table) +{ + int ret = 0; + const struct nf_expect *exp = data; + const struct nf_conntrack *ct = nfexp_get_attr(exp, ATTR_EXP_MASTER); + + switch(nfct_get_attr_u8(ct, ATTR_L3PROTO)) { + case AF_INET: + ret = cache_hash4_exp(ct, table); + break; + case AF_INET6: + ret = cache_hash6_exp(ct, table); + break; + default: + dlog(LOG_ERR, "unknown layer 3 proto in hash"); + break; + } + return ret; +} + +static int cache_exp_cmp(const void *data1, const void *data2) +{ + const struct cache_object *obj = data1; + const struct nf_expect *exp = data2; + + return nfexp_cmp(obj->ptr, exp, 0); +} + +static void *cache_exp_alloc(void) +{ + return nfexp_new(); +} + +static void cache_exp_free(void *ptr) +{ + nfexp_destroy(ptr); +} + +static void cache_exp_copy(void *dst, void *src, unsigned int flags) +{ + /* XXX: add nfexp_copy(...) to libnetfilter_conntrack. */ + memcpy(dst, src, nfexp_maxsize()); +} + +static int cache_exp_dump_step(void *data1, void *n) +{ + char buf[1024]; + int size; + struct __dump_container *container = data1; + struct cache_object *obj = n; + char *data = obj->data; + unsigned i; + + /* + * XXX: Do not dump the entries that are scheduled to expire. + * These entries talk about already destroyed connections + * that we keep for some time just in case that we have to + * resent some lost messages. We do not show them to the + * user as he may think that the firewall replicas are not + * in sync. The branch below is a hack as it is quite + * specific and it breaks conntrackd modularity. Probably + * there's a nicer way to do this but until I come up with it... + */ + if (CONFIG(flags) & CTD_SYNC_FTFW && obj->status == C_OBJ_DEAD) + return 0; + + /* do not show cached timeout, this may confuse users */ + if (nfexp_attr_is_set(obj->ptr, ATTR_EXP_TIMEOUT)) + nfexp_attr_unset(obj->ptr, ATTR_EXP_TIMEOUT); + + memset(buf, 0, sizeof(buf)); + size = nfexp_snprintf(buf, sizeof(buf),obj->ptr, + NFCT_T_UNKNOWN, container->type, 0); + + for (i = 0; i < obj->cache->num_features; i++) { + if (obj->cache->features[i]->dump) { + size += obj->cache->features[i]->dump(obj, data, + buf+size, + container->type); + data += obj->cache->features[i]->size; + } + } + if (container->type != NFCT_O_XML) { + long tm = time(NULL); + size += sprintf(buf+size, " [active since %lds]", + tm - obj->lifetime); + } + size += sprintf(buf+size, "\n"); + if (send(container->fd, buf, size, 0) == -1) { + if (errno != EPIPE) + return -1; + } + + return 0; +} + +static int cache_exp_commit_step(void *data, void *n) +{ + struct cache_object *obj = n; + struct __commit_container *tmp = data; + int ret, retry = 1, timeout; + struct nf_expect *exp = obj->ptr; + + if (CONFIG(commit_timeout)) { + timeout = CONFIG(commit_timeout); + } else { + timeout = time(NULL) - obj->lastupdate; + if (timeout < 0) { + /* XXX: Arbitrarily set the timer to one minute, how + * can this happen? For example, an adjustment due to + * daylight-saving. Probably other situations can + * trigger this. */ + timeout = 60; + } + /* calculate an estimation of the current timeout */ + timeout = nfexp_get_attr_u32(exp, ATTR_EXP_TIMEOUT) - timeout; + if (timeout < 0) { + timeout = 60; + } + } + +retry: + if (nl_create_expect(tmp->h, exp, timeout) == -1) { + if (errno == EEXIST && retry == 1) { + ret = nl_destroy_expect(tmp->h, exp); + if (ret == 0 || (ret == -1 && errno == ENOENT)) { + if (retry) { + retry = 0; + goto retry; + } + } + dlog(LOG_ERR, "commit-destroy: %s", strerror(errno)); + dlog_exp(STATE(log), exp, NFCT_O_PLAIN); + tmp->c->stats.commit_fail++; + } else { + dlog(LOG_ERR, "commit-create: %s", strerror(errno)); + dlog_exp(STATE(log), exp, NFCT_O_PLAIN); + tmp->c->stats.commit_fail++; + } + } else { + tmp->c->stats.commit_ok++; + } + /* keep iterating even if we have found errors */ + return 0; +} + +static int +cache_exp_commit(struct cache *c, struct nfct_handle *h, int clientfd) +{ + unsigned int commit_ok, commit_fail; + struct timeval commit_stop, res; + struct __commit_container tmp = { + .h = h, + .c = c, + }; + + /* we already have one commit in progress, skip this. The clientfd + * descriptor has to be closed by the caller. */ + if (clientfd && STATE_SYNC(commit).clientfd != -1) + return -1; + + switch(STATE_SYNC(commit).state) { + case COMMIT_STATE_INACTIVE: + gettimeofday(&STATE_SYNC(commit).stats.start, NULL); + STATE_SYNC(commit).stats.ok = c->stats.commit_ok; + STATE_SYNC(commit).stats.fail = c->stats.commit_fail; + STATE_SYNC(commit).clientfd = clientfd; + case COMMIT_STATE_MASTER: + STATE_SYNC(commit).current = + hashtable_iterate_limit(c->h, &tmp, + STATE_SYNC(commit).current, + CONFIG(general).commit_steps, + cache_exp_commit_step); + if (STATE_SYNC(commit).current < CONFIG(hashsize)) { + STATE_SYNC(commit).state = COMMIT_STATE_MASTER; + /* give it another step as soon as possible */ + write_evfd(STATE_SYNC(commit).evfd); + return 1; + } + + /* calculate the time that commit has taken */ + gettimeofday(&commit_stop, NULL); + timersub(&commit_stop, &STATE_SYNC(commit).stats.start, &res); + + /* calculate new entries committed */ + commit_ok = c->stats.commit_ok - STATE_SYNC(commit).stats.ok; + commit_fail = + c->stats.commit_fail - STATE_SYNC(commit).stats.fail; + + /* log results */ + dlog(LOG_NOTICE, "Committed %u new expectations", commit_ok); + + if (commit_fail) + dlog(LOG_NOTICE, "%u expectations can't be " + "committed", commit_fail); + + dlog(LOG_NOTICE, "commit has taken %lu.%06lu seconds", + res.tv_sec, res.tv_usec); + + /* prepare the state machine for new commits */ + STATE_SYNC(commit).current = 0; + STATE_SYNC(commit).state = COMMIT_STATE_INACTIVE; + + return 0; + } + return 1; +} + +static struct nethdr * +cache_exp_build_msg(const struct cache_object *obj, int type) +{ + return BUILD_NETMSG_FROM_EXP(obj->ptr, type); +} + +/* template to cache expectations coming from the kernel. */ +struct cache_ops cache_sync_internal_exp_ops = { + .hash = cache_exp_hash, + .cmp = cache_exp_cmp, + .alloc = cache_exp_alloc, + .free = cache_exp_free, + .copy = cache_exp_copy, + .dump_step = cache_exp_dump_step, + .commit = NULL, + .build_msg = cache_exp_build_msg, +}; + +/* template to cache expectations coming from the network. */ +struct cache_ops cache_sync_external_exp_ops = { + .hash = cache_exp_hash, + .cmp = cache_exp_cmp, + .alloc = cache_exp_alloc, + .free = cache_exp_free, + .copy = cache_exp_copy, + .dump_step = cache_exp_dump_step, + .commit = cache_exp_commit, + .build_msg = NULL, +}; diff --git a/src/external_cache.c b/src/external_cache.c index 3f896a0..e290249 100644 --- a/src/external_cache.c +++ b/src/external_cache.c @@ -26,6 +26,7 @@ #include static struct cache *external; +static struct cache *external_exp; static int external_cache_init(void) { @@ -36,12 +37,21 @@ static int external_cache_init(void) dlog(LOG_ERR, "can't allocate memory for the external cache"); return -1; } + external_exp = cache_create("external", CACHE_T_EXP, + STATE_SYNC(sync)->external_cache_flags, + NULL, &cache_sync_external_exp_ops); + if (external_exp == NULL) { + dlog(LOG_ERR, "can't allocate memory for the external cache"); + return -1; + } + return 0; } static void external_cache_close(void) { cache_destroy(external); + cache_destroy(external_exp); } static void external_cache_ct_new(struct nf_conntrack *ct) @@ -109,6 +119,71 @@ static void external_cache_ct_stats_ext(int fd) cache_stats_extended(external, fd); } +static void external_cache_exp_new(struct nf_expect *exp) +{ + struct cache_object *obj; + int id; + + obj = cache_find(external_exp, exp, &id); + if (obj == NULL) { +retry: + obj = cache_object_new(external_exp, exp); + if (obj == NULL) + return; + + if (cache_add(external_exp, obj, id) == -1) { + cache_object_free(obj); + return; + } + } else { + cache_del(external_exp, obj); + cache_object_free(obj); + goto retry; + } +} + +static void external_cache_exp_upd(struct nf_expect *exp) +{ + cache_update_force(external_exp, exp); +} + +static void external_cache_exp_del(struct nf_expect *exp) +{ + struct cache_object *obj; + int id; + + obj = cache_find(external_exp, exp, &id); + if (obj) { + cache_del(external_exp, obj); + cache_object_free(obj); + } +} + +static void external_cache_exp_dump(int fd, int type) +{ + cache_dump(external_exp, fd, type); +} + +static int external_cache_exp_commit(struct nfct_handle *h, int fd) +{ + return cache_commit(external_exp, h, fd); +} + +static void external_cache_exp_flush(void) +{ + cache_flush(external_exp); +} + +static void external_cache_exp_stats(int fd) +{ + cache_stats(external_exp, fd); +} + +static void external_cache_exp_stats_ext(int fd) +{ + cache_stats_extended(external_exp, fd); +} + struct external_handler external_cache = { .init = external_cache_init, .close = external_cache_close, @@ -122,4 +197,14 @@ struct external_handler external_cache = { .stats = external_cache_ct_stats, .stats_ext = external_cache_ct_stats_ext, }, + .exp = { + .new = external_cache_exp_new, + .upd = external_cache_exp_upd, + .del = external_cache_exp_del, + .dump = external_cache_exp_dump, + .commit = external_cache_exp_commit, + .flush = external_cache_exp_flush, + .stats = external_cache_exp_stats, + .stats_ext = external_cache_exp_stats_ext, + }, }; diff --git a/src/external_inject.c b/src/external_inject.c index ba5f3d1..0ad3478 100644 --- a/src/external_inject.c +++ b/src/external_inject.c @@ -42,7 +42,7 @@ struct { static int external_inject_init(void) { /* handler to directly inject conntracks into kernel-space */ - inject = nfct_open(CONNTRACK, 0); + inject = nfct_open(CONFIG(netlink).subsys_id, 0); if (inject == NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); @@ -175,6 +175,89 @@ static void external_inject_ct_stats(int fd) send(fd, buf, size, 0); } +struct { + uint32_t add_ok; + uint32_t add_fail; + uint32_t upd_ok; + uint32_t upd_fail; + uint32_t del_ok; + uint32_t del_fail; +} exp_external_inject_stat; + +static void external_inject_exp_new(struct nf_expect *exp) +{ + int ret, retry = 1; + +retry: + if (nl_create_expect(inject, exp, 0) == -1) { + /* if the state entry exists, we delete and try again */ + if (errno == EEXIST && retry == 1) { + ret = nl_destroy_expect(inject, exp); + if (ret == 0 || (ret == -1 && errno == ENOENT)) { + if (retry) { + retry = 0; + goto retry; + } + } + exp_external_inject_stat.add_fail++; + dlog(LOG_ERR, "inject-add1: %s", strerror(errno)); + dlog_exp(STATE(log), exp, NFCT_O_PLAIN); + return; + } + exp_external_inject_stat.add_fail++; + dlog(LOG_ERR, "inject-add2: %s", strerror(errno)); + dlog_exp(STATE(log), exp, NFCT_O_PLAIN); + } else { + exp_external_inject_stat.add_ok++; + } +} + +static void external_inject_exp_del(struct nf_expect *exp) +{ + if (nl_destroy_expect(inject, exp) == -1) { + if (errno != ENOENT) { + exp_external_inject_stat.del_fail++; + dlog(LOG_ERR, "inject-del: %s", strerror(errno)); + dlog_exp(STATE(log), exp, NFCT_O_PLAIN); + } + } else { + exp_external_inject_stat.del_ok++; + } +} + +static void external_inject_exp_dump(int fd, int type) +{ +} + +static int external_inject_exp_commit(struct nfct_handle *h, int fd) +{ + /* close the commit socket. */ + return LOCAL_RET_OK; +} + +static void external_inject_exp_flush(void) +{ +} + +static void external_inject_exp_stats(int fd) +{ + char buf[512]; + int size; + + size = sprintf(buf, "external inject:\n" + "connections created:\t\t%12u\tfailed:\t%12u\n" + "connections updated:\t\t%12u\tfailed:\t%12u\n" + "connections destroyed:\t\t%12u\tfailed:\t%12u\n\n", + exp_external_inject_stat.add_ok, + exp_external_inject_stat.add_fail, + exp_external_inject_stat.upd_ok, + exp_external_inject_stat.upd_fail, + exp_external_inject_stat.del_ok, + exp_external_inject_stat.del_fail); + + send(fd, buf, size, 0); +} + struct external_handler external_inject = { .init = external_inject_init, .close = external_inject_close, @@ -188,4 +271,14 @@ struct external_handler external_inject = { .stats = external_inject_ct_stats, .stats_ext = external_inject_ct_stats, }, + .exp = { + .new = external_inject_exp_new, + .upd = external_inject_exp_new, + .del = external_inject_exp_del, + .dump = external_inject_exp_dump, + .commit = external_inject_exp_commit, + .flush = external_inject_exp_flush, + .stats = external_inject_exp_stats, + .stats_ext = external_inject_exp_stats, + }, }; diff --git a/src/filter.c b/src/filter.c index 746a9bb..e8515d6 100644 --- a/src/filter.c +++ b/src/filter.c @@ -405,3 +405,79 @@ int ct_filter_conntrack(const struct nf_conntrack *ct, int userspace) return 0; } + +struct exp_filter { + struct list_head list; +}; + +struct exp_filter *exp_filter_create(void) +{ + struct exp_filter *f; + + f = calloc(1, sizeof(struct exp_filter)); + if (f == NULL) + return NULL; + + INIT_LIST_HEAD(&f->list); + return f; +} + +struct exp_filter_item { + struct list_head head; + char helper_name[NFCT_HELPER_NAME_MAX]; +}; + +/* this is ugly, but it simplifies read_config_yy.y */ +static struct exp_filter *exp_filter_alloc(void) +{ + if (STATE(exp_filter) == NULL) { + STATE(exp_filter) = exp_filter_create(); + if (STATE(exp_filter) == NULL) { + fprintf(stderr, "Can't init expectation filtering!\n"); + return NULL; + } + } + return STATE(exp_filter);; +} + +int exp_filter_add(struct exp_filter *f, const char *helper_name) +{ + struct exp_filter_item *item; + + f = exp_filter_alloc(); + if (f == NULL) + return -1; + + list_for_each_entry(item, &f->list, head) { + if (strncmp(item->helper_name, helper_name, + NFCT_HELPER_NAME_MAX) == 0) { + return -1; + } + } + item = calloc(1, sizeof(struct exp_filter_item)); + if (item == NULL) + return -1; + + strncpy(item->helper_name, helper_name, NFCT_HELPER_NAME_MAX); + list_add(&item->head, &f->list); + return 0; +} + +int exp_filter_find(struct exp_filter *f, const struct nf_expect *exp) +{ + struct exp_filter_item *item; + + if (f == NULL) + return 0; + + list_for_each_entry(item, &f->list, head) { + const char *name = nfexp_get_attr(exp, ATTR_EXP_HELPER_NAME); + + /* we allow partial matching to support things like sip-PORT. */ + if (strncmp(item->helper_name, name, + strlen(item->helper_name)) == 0) { + return 1; + } + } + return 0; +} diff --git a/src/internal_bypass.c b/src/internal_bypass.c index 98717f3..5c83c21 100644 --- a/src/internal_bypass.c +++ b/src/internal_bypass.c @@ -52,7 +52,7 @@ static void internal_bypass_ct_dump(int fd, int type) u_int32_t family = AF_UNSPEC; int ret; - h = nfct_open(CONNTRACK, 0); + h = nfct_open(CONFIG(netlink).subsys_id, 0); if (h == NULL) { dlog(LOG_ERR, "can't allocate memory for the internal cache"); return; @@ -151,6 +151,138 @@ static int internal_bypass_ct_event_del(struct nf_conntrack *ct, int origin) return 1; } +static int +internal_bypass_exp_dump_cb(enum nf_conntrack_msg_type type, + struct nf_expect *exp, void *data) +{ + char buf[1024]; + int size, *fd = data; + const struct nf_conntrack *master = + nfexp_get_attr(exp, ATTR_EXP_MASTER); + + if (!exp_filter_find(STATE(exp_filter), exp)) + return NFCT_CB_CONTINUE; + + if (ct_filter_conntrack(master, 1)) + return NFCT_CB_CONTINUE; + + size = nfexp_snprintf(buf, 1024, exp, + NFCT_T_UNKNOWN, NFCT_O_DEFAULT, 0); + if (size < 1024) { + buf[size] = '\n'; + size++; + } + send(*fd, buf, size, 0); + + return NFCT_CB_CONTINUE; +} + +static void internal_bypass_exp_dump(int fd, int type) +{ + struct nfct_handle *h; + u_int32_t family = AF_UNSPEC; + int ret; + + h = nfct_open(CONFIG(netlink).subsys_id, 0); + if (h == NULL) { + dlog(LOG_ERR, "can't allocate memory for the internal cache"); + return; + } + nfexp_callback_register(h, NFCT_T_ALL, + internal_bypass_exp_dump_cb, &fd); + ret = nfexp_query(h, NFCT_Q_DUMP, &family); + if (ret == -1) { + dlog(LOG_ERR, "can't dump kernel table"); + } + nfct_close(h); +} + +static void internal_bypass_exp_flush(void) +{ + nl_flush_expect_table(STATE(flush)); +} + +struct { + uint32_t new; + uint32_t upd; + uint32_t del; +} exp_internal_bypass_stats; + +static void internal_bypass_exp_stats(int fd) +{ + char buf[512]; + int size; + + size = sprintf(buf, "internal bypass:\n" + "connections new:\t\t%12u\n" + "connections updated:\t\t%12u\n" + "connections destroyed:\t\t%12u\n\n", + exp_internal_bypass_stats.new, + exp_internal_bypass_stats.upd, + exp_internal_bypass_stats.del); + + send(fd, buf, size, 0); +} + +/* unused, INTERNAL_F_POPULATE is unset. No cache, nothing to populate. */ +static void internal_bypass_exp_populate(struct nf_expect *exp) +{ +} + +/* unused, INTERNAL_F_RESYNC is unset. */ +static void internal_bypass_exp_purge(void) +{ +} + +/* unused, INTERNAL_F_RESYNC is unset. Nothing to resync, we have no cache. */ +static int +internal_bypass_exp_resync(enum nf_conntrack_msg_type type, + struct nf_expect *exp, void *data) +{ + return NFCT_CB_CONTINUE; +} + +static void internal_bypass_exp_event_new(struct nf_expect *exp, int origin) +{ + struct nethdr *net; + + /* this event has been triggered by me, skip */ + if (origin != CTD_ORIGIN_NOT_ME) + return; + + net = BUILD_NETMSG_FROM_EXP(exp, NET_T_STATE_EXP_NEW); + multichannel_send(STATE_SYNC(channel), net); + exp_internal_bypass_stats.new++; +} + +static void internal_bypass_exp_event_upd(struct nf_expect *exp, int origin) +{ + struct nethdr *net; + + /* this event has been triggered by me, skip */ + if (origin != CTD_ORIGIN_NOT_ME) + return; + + net = BUILD_NETMSG_FROM_EXP(exp, NET_T_STATE_EXP_UPD); + multichannel_send(STATE_SYNC(channel), net); + exp_internal_bypass_stats.upd++; +} + +static int internal_bypass_exp_event_del(struct nf_expect *exp, int origin) +{ + struct nethdr *net; + + /* this event has been triggered by me, skip */ + if (origin != CTD_ORIGIN_NOT_ME) + return 1; + + net = BUILD_NETMSG_FROM_EXP(exp, NET_T_STATE_EXP_DEL); + multichannel_send(STATE_SYNC(channel), net); + exp_internal_bypass_stats.del++; + + return 1; +} + struct internal_handler internal_bypass = { .init = internal_bypass_init, .close = internal_bypass_close, @@ -166,4 +298,16 @@ struct internal_handler internal_bypass = { .upd = internal_bypass_ct_event_upd, .del = internal_bypass_ct_event_del, }, + .exp = { + .dump = internal_bypass_exp_dump, + .flush = internal_bypass_exp_flush, + .stats = internal_bypass_exp_stats, + .stats_ext = internal_bypass_exp_stats, + .populate = internal_bypass_exp_populate, + .purge = internal_bypass_exp_purge, + .resync = internal_bypass_exp_resync, + .new = internal_bypass_exp_event_new, + .upd = internal_bypass_exp_event_upd, + .del = internal_bypass_exp_event_del, + }, }; diff --git a/src/internal_cache.c b/src/internal_cache.c index 952327d..ba2d74b 100644 --- a/src/internal_cache.c +++ b/src/internal_cache.c @@ -32,12 +32,25 @@ static int internal_cache_init(void) dlog(LOG_ERR, "can't allocate memory for the internal cache"); return -1; } + + STATE(mode)->internal->exp.data = + cache_create("internal", CACHE_T_EXP, + STATE_SYNC(sync)->internal_cache_flags, + STATE_SYNC(sync)->internal_cache_extra, + &cache_sync_internal_exp_ops); + + if (!STATE(mode)->internal->exp.data) { + dlog(LOG_ERR, "can't allocate memory for the internal cache"); + return -1; + } + return 0; } static void internal_cache_close(void) { cache_destroy(STATE(mode)->internal->ct.data); + cache_destroy(STATE(mode)->internal->exp.data); } static void internal_cache_ct_dump(int fd, int type) @@ -203,6 +216,154 @@ static int internal_cache_ct_event_del(struct nf_conntrack *ct, int origin) return 1; } +static void internal_cache_exp_dump(int fd, int type) +{ + cache_dump(STATE(mode)->internal->exp.data, fd, type); +} + +static void internal_cache_exp_flush(void) +{ + cache_flush(STATE(mode)->internal->exp.data); +} + +static void internal_cache_exp_stats(int fd) +{ + cache_stats(STATE(mode)->internal->exp.data, fd); +} + +static void internal_cache_exp_stats_ext(int fd) +{ + cache_stats_extended(STATE(mode)->internal->exp.data, fd); +} + +static void internal_cache_exp_populate(struct nf_expect *exp) +{ + cache_update_force(STATE(mode)->internal->exp.data, exp); +} + +static int internal_cache_exp_purge_step(void *data1, void *data2) +{ + struct cache_object *obj = data2; + + STATE(get_retval) = 0; + nl_get_expect(STATE(get), obj->ptr); /* modifies STATE(get_reval) */ + if (!STATE(get_retval)) { + if (obj->status != C_OBJ_DEAD) { + cache_object_set_status(obj, C_OBJ_DEAD); + sync_send(obj, NET_T_STATE_EXP_DEL); + cache_object_put(obj); + } + } + + return 0; +} + +static void internal_cache_exp_purge(void) +{ + cache_iterate(STATE(mode)->internal->exp.data, NULL, + internal_cache_exp_purge_step); +} + +static int +internal_cache_exp_resync(enum nf_conntrack_msg_type type, + struct nf_expect *exp, void *data) +{ + struct cache_object *obj; + const struct nf_conntrack *master = + nfexp_get_attr(exp, ATTR_EXP_MASTER); + + if (!exp_filter_find(STATE(exp_filter), exp)) + return NFCT_CB_CONTINUE; + + if (ct_filter_conntrack(master, 1)) + return NFCT_CB_CONTINUE; + + obj = cache_update_force(STATE(mode)->internal->exp.data, exp); + if (obj == NULL) + return NFCT_CB_CONTINUE; + + switch (obj->status) { + case C_OBJ_NEW: + sync_send(obj, NET_T_STATE_EXP_NEW); + break; + case C_OBJ_ALIVE: + sync_send(obj, NET_T_STATE_EXP_UPD); + break; + } + return NFCT_CB_CONTINUE; +} + +static void internal_cache_exp_event_new(struct nf_expect *exp, int origin) +{ + struct cache_object *obj; + int id; + + /* this event has been triggered by a direct inject, skip */ + if (origin == CTD_ORIGIN_INJECT) + return; + + obj = cache_find(STATE(mode)->internal->exp.data, exp, &id); + if (obj == NULL) { +retry: + obj = cache_object_new(STATE(mode)->internal->exp.data, exp); + if (obj == NULL) + return; + if (cache_add(STATE(mode)->internal->exp.data, obj, id) == -1) { + cache_object_free(obj); + return; + } + /* only synchronize events that have been triggered by other + * processes or the kernel, but don't propagate events that + * have been triggered by conntrackd itself, eg. commits. */ + if (origin == CTD_ORIGIN_NOT_ME) + sync_send(obj, NET_T_STATE_EXP_NEW); + } else { + cache_del(STATE(mode)->internal->exp.data, obj); + cache_object_free(obj); + goto retry; + } +} + +static void internal_cache_exp_event_upd(struct nf_expect *exp, int origin) +{ + struct cache_object *obj; + + /* this event has been triggered by a direct inject, skip */ + if (origin == CTD_ORIGIN_INJECT) + return; + + obj = cache_update_force(STATE(mode)->internal->exp.data, exp); + if (obj == NULL) + return; + + if (origin == CTD_ORIGIN_NOT_ME) + sync_send(obj, NET_T_STATE_EXP_UPD); +} + +static int internal_cache_exp_event_del(struct nf_expect *exp, int origin) +{ + struct cache_object *obj; + int id; + + /* this event has been triggered by a direct inject, skip */ + if (origin == CTD_ORIGIN_INJECT) + return 0; + + /* we don't synchronize events for objects that are not in the cache */ + obj = cache_find(STATE(mode)->internal->exp.data, exp, &id); + if (obj == NULL) + return 0; + + if (obj->status != C_OBJ_DEAD) { + cache_object_set_status(obj, C_OBJ_DEAD); + if (origin == CTD_ORIGIN_NOT_ME) { + sync_send(obj, NET_T_STATE_EXP_DEL); + } + cache_object_put(obj); + } + return 1; +} + struct internal_handler internal_cache = { .flags = INTERNAL_F_POPULATE | INTERNAL_F_RESYNC, .init = internal_cache_init, @@ -219,4 +380,16 @@ struct internal_handler internal_cache = { .upd = internal_cache_ct_event_upd, .del = internal_cache_ct_event_del, }, + .exp = { + .dump = internal_cache_exp_dump, + .flush = internal_cache_exp_flush, + .stats = internal_cache_exp_stats, + .stats_ext = internal_cache_exp_stats_ext, + .populate = internal_cache_exp_populate, + .purge = internal_cache_exp_purge, + .resync = internal_cache_exp_resync, + .new = internal_cache_exp_event_new, + .upd = internal_cache_exp_event_upd, + .del = internal_cache_exp_event_del, + }, }; diff --git a/src/log.c b/src/log.c index 9fe5119..d4de111 100644 --- a/src/log.c +++ b/src/log.c @@ -145,6 +145,43 @@ void dlog_ct(FILE *fd, struct nf_conntrack *ct, unsigned int type) } } +void dlog_exp(FILE *fd, struct nf_expect *exp, unsigned int type) +{ + time_t t; + char buf[1024]; + char *tmp; + unsigned int flags = 0; + + buf[0]='\0'; + + switch(type) { + case NFCT_O_PLAIN: + t = time(NULL); + ctime_r(&t, buf); + tmp = buf + strlen(buf); + buf[strlen(buf)-1]='\t'; + break; + default: + return; + } + nfexp_snprintf(buf+strlen(buf), 1024-strlen(buf), exp, 0, type, flags); + + if (fd) { + snprintf(buf+strlen(buf), 1024-strlen(buf), "\n"); + fputs(buf, fd); + } + + if (fd == STATE(log)) { + /* error reporting */ + if (CONFIG(syslog_facility) != -1) + syslog(LOG_ERR, "%s", tmp); + } else if (fd == STATE(stats_log)) { + /* connection logging */ + if (CONFIG(stats).syslog_facility != -1) + syslog(LOG_INFO, "%s", tmp); + } +} + void close_log(void) { if (STATE(log) != NULL) diff --git a/src/main.c b/src/main.c index ebfc8b9..342ed45 100644 --- a/src/main.c +++ b/src/main.c @@ -1,5 +1,6 @@ /* - * (C) 2006-2007 by Pablo Neira Ayuso + * (C) 2006-2011 by Pablo Neira Ayuso + * (C) 2011 by Vyatta Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -38,14 +39,15 @@ static const char usage_daemon_commands[] = static const char usage_client_commands[] = "Client mode commands:\n" - " -c, commit external cache to conntrack table\n" + " -c [ct|expect], commit external cache to conntrack table\n" " -f [|internal|external], flush internal and external cache\n" - " -F, flush kernel conntrack table\n" - " -i, display content of the internal cache\n" - " -e, display the content of the external cache\n" + " -F [ct|expect], flush kernel conntrack table\n" + " -i [ct|expect], display content of the internal cache\n" + " -e [ct|expect], display the content of the external cache\n" " -k, kill conntrack daemon\n" - " -s [|network|cache|runtime|link|rsqueue|queue], dump statistics\n" - " -R, resync with kernel conntrack table\n" + " -s [|network|cache|runtime|link|rsqueue|queue|ct|expect], " + "dump statistics\n" + " -R [ct|expect], resync with kernel conntrack table\n" " -n, request resync with other node (only FT-FW and NOTRACK modes)\n" " -x, dump cache in XML format (requires -i or -e)\n" " -t, reset the kernel timeout (see PurgeTimeout clause)\n" @@ -89,6 +91,25 @@ set_operation_mode(int *current, int want, char *argv[]) } } +static int +set_action_by_table(int i, int argc, char *argv[], + int ct_action, int exp_action, int dfl_action, int *action) +{ + if (i+1 < argc && argv[i+1][0] != '-') { + if (strncmp(argv[i+1], "ct", strlen(argv[i+1])) == 0) { + *action = ct_action; + i++; + } else if (strncmp(argv[i+1], "expect", + strlen(argv[i+1])) == 0) { + *action = exp_action; + i++; + } + } else + *action = dfl_action; + + return i; +} + int main(int argc, char *argv[]) { int ret, i, action = -1; @@ -115,15 +136,23 @@ int main(int argc, char *argv[]) break; case 'c': set_operation_mode(&type, REQUEST, argv); - action = CT_COMMIT; + i = set_action_by_table(i, argc, argv, + CT_COMMIT, EXP_COMMIT, + ALL_COMMIT, &action); break; case 'i': set_operation_mode(&type, REQUEST, argv); - action = CT_DUMP_INTERNAL; + i = set_action_by_table(i, argc, argv, + CT_DUMP_INTERNAL, + EXP_DUMP_INTERNAL, + CT_DUMP_INTERNAL, &action); break; case 'e': set_operation_mode(&type, REQUEST, argv); - action = CT_DUMP_EXTERNAL; + i = set_action_by_table(i, argc, argv, + CT_DUMP_EXTERNAL, + EXP_DUMP_EXTERNAL, + CT_DUMP_EXTERNAL, &action); break; case 'C': if (++i < argc) { @@ -142,7 +171,10 @@ int main(int argc, char *argv[]) break; case 'F': set_operation_mode(&type, REQUEST, argv); - action = CT_FLUSH_MASTER; + i = set_action_by_table(i, argc, argv, + CT_FLUSH_MASTER, + EXP_FLUSH_MASTER, + ALL_FLUSH_MASTER, &action); break; case 'f': set_operation_mode(&type, REQUEST, argv); @@ -164,12 +196,15 @@ int main(int argc, char *argv[]) } } else { /* default to general flushing */ - action = CT_FLUSH_CACHE; + action = ALL_FLUSH_CACHE; } break; case 'R': set_operation_mode(&type, REQUEST, argv); - action = CT_RESYNC_MASTER; + i = set_action_by_table(i, argc, argv, + CT_RESYNC_MASTER, + EXP_RESYNC_MASTER, + ALL_RESYNC_MASTER, &action); break; case 'B': set_operation_mode(&type, REQUEST, argv); @@ -222,6 +257,14 @@ int main(int argc, char *argv[]) strlen(argv[i+1])) == 0) { action = STATS_QUEUE; i++; + } else if (strncmp(argv[i+1], "ct", + strlen(argv[i+1])) == 0) { + action = STATS; + i++; + } else if (strncmp(argv[i+1], "expect", + strlen(argv[i+1])) == 0) { + action = EXP_STATS; + i++; } else { fprintf(stderr, "ERROR: unknown " "parameter `%s' for " diff --git a/src/netlink.c b/src/netlink.c index 60274f3..fe979e3 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -1,5 +1,6 @@ /* - * (C) 2006 by Pablo Neira Ayuso + * (C) 2006-2011 by Pablo Neira Ayuso + * (C) 2011 by Vyatta Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -32,7 +33,7 @@ struct nfct_handle *nl_init_event_handler(void) { struct nfct_handle *h; - h = nfct_open(CONNTRACK, NFCT_ALL_CT_GROUPS); + h = nfct_open(CONFIG(netlink).subsys_id, CONFIG(netlink).groups); if (h == NULL) return NULL; @@ -301,3 +302,61 @@ int nl_destroy_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct) { return nfct_query(h, NFCT_Q_DESTROY, ct); } + +int nl_create_expect(struct nfct_handle *h, const struct nf_expect *orig, + int timeout) +{ + int ret; + struct nf_expect *exp; + + exp = nfexp_clone(orig); + if (exp == NULL) + return -1; + + if (timeout > 0) + nfexp_set_attr_u32(exp, ATTR_EXP_TIMEOUT, timeout); + + ret = nfexp_query(h, NFCT_Q_CREATE, exp); + nfexp_destroy(exp); + + return ret; +} + +int nl_destroy_expect(struct nfct_handle *h, const struct nf_expect *exp) +{ + return nfexp_query(h, NFCT_Q_DESTROY, exp); +} + +/* if the handle has no callback, check for existence, otherwise, update */ +int nl_get_expect(struct nfct_handle *h, const struct nf_expect *exp) +{ + int ret = 1; + struct nf_expect *tmp; + + /* XXX: we only need the expectation, not the mask and the master. */ + tmp = nfexp_clone(exp); + if (tmp == NULL) + return -1; + + if (nfexp_query(h, NFCT_Q_GET, tmp) == -1) + ret = (errno == ENOENT) ? 0 : -1; + + nfexp_destroy(tmp); + return ret; +} + +int nl_dump_expect_table(struct nfct_handle *h) +{ + return nfexp_query(h, NFCT_Q_DUMP, &CONFIG(family)); +} + +int nl_flush_expect_table(struct nfct_handle *h) +{ + return nfexp_query(h, NFCT_Q_FLUSH, &CONFIG(family)); +} + +int nl_send_expect_resync(struct nfct_handle *h) +{ + int family = CONFIG(family); + return nfexp_send(h, NFCT_Q_DUMP, &family); +} diff --git a/src/network.c b/src/network.c index cadc466..13db37c 100644 --- a/src/network.c +++ b/src/network.c @@ -126,6 +126,11 @@ static int status2type[CACHE_T_MAX][C_OBJ_MAX] = { [C_OBJ_ALIVE] = NET_T_STATE_CT_UPD, [C_OBJ_DEAD] = NET_T_STATE_CT_DEL, }, + [CACHE_T_EXP] = { + [C_OBJ_NEW] = NET_T_STATE_EXP_NEW, + [C_OBJ_ALIVE] = NET_T_STATE_EXP_UPD, + [C_OBJ_DEAD] = NET_T_STATE_EXP_DEL, + }, }; int object_status_to_network_type(struct cache_object *obj) diff --git a/src/parse.c b/src/parse.c index 0718128..81e9c6b 100644 --- a/src/parse.c +++ b/src/parse.c @@ -248,3 +248,195 @@ int msg2ct(struct nf_conntrack *ct, struct nethdr *net, size_t remain) return 0; } + +static void exp_parse_ct_group(void *ct, int attr, void *data); +static void exp_parse_ct_u8(void *ct, int attr, void *data); +static void exp_parse_u32(void *exp, int attr, void *data); + +static struct exp_parser { + void (*parse)(void *obj, int attr, void *data); + int exp_attr; + int ct_attr; + int size; +} exp_h[NTA_EXP_MAX] = { + [NTA_EXP_MASTER_IPV4] = { + .parse = exp_parse_ct_group, + .exp_attr = ATTR_EXP_MASTER, + .ct_attr = ATTR_GRP_ORIG_IPV4, + .size = NTA_SIZE(sizeof(struct nfct_attr_grp_ipv4)), + }, + [NTA_EXP_MASTER_IPV6] = { + .parse = exp_parse_ct_group, + .exp_attr = ATTR_EXP_MASTER, + .ct_attr = ATTR_GRP_ORIG_IPV6, + .size = NTA_SIZE(sizeof(struct nfct_attr_grp_ipv6)), + }, + [NTA_EXP_MASTER_L4PROTO] = { + .parse = exp_parse_ct_u8, + .exp_attr = ATTR_EXP_MASTER, + .ct_attr = ATTR_L4PROTO, + .size = NTA_SIZE(sizeof(uint8_t)), + }, + [NTA_EXP_MASTER_PORT] = { + .parse = exp_parse_ct_group, + .exp_attr = ATTR_EXP_MASTER, + .ct_attr = ATTR_GRP_ORIG_PORT, + .size = NTA_SIZE(sizeof(struct nfct_attr_grp_port)), + }, + [NTA_EXP_EXPECT_IPV4] = { + .parse = exp_parse_ct_group, + .exp_attr = ATTR_EXP_EXPECTED, + .ct_attr = ATTR_GRP_ORIG_IPV4, + .size = NTA_SIZE(sizeof(struct nfct_attr_grp_ipv4)), + }, + [NTA_EXP_EXPECT_IPV6] = { + .parse = exp_parse_ct_group, + .exp_attr = ATTR_EXP_EXPECTED, + .ct_attr = ATTR_GRP_ORIG_IPV6, + .size = NTA_SIZE(sizeof(struct nfct_attr_grp_ipv6)), + }, + [NTA_EXP_EXPECT_L4PROTO] = { + .parse = exp_parse_ct_u8, + .exp_attr = ATTR_EXP_EXPECTED, + .ct_attr = ATTR_L4PROTO, + .size = NTA_SIZE(sizeof(uint8_t)), + }, + [NTA_EXP_EXPECT_PORT] = { + .parse = exp_parse_ct_group, + .exp_attr = ATTR_EXP_EXPECTED, + .ct_attr = ATTR_GRP_ORIG_PORT, + .size = NTA_SIZE(sizeof(struct nfct_attr_grp_port)), + }, + [NTA_EXP_MASK_IPV4] = { + .parse = exp_parse_ct_group, + .exp_attr = ATTR_EXP_MASK, + .ct_attr = ATTR_GRP_ORIG_IPV4, + .size = NTA_SIZE(sizeof(struct nfct_attr_grp_ipv4)), + }, + [NTA_EXP_MASK_IPV6] = { + .parse = exp_parse_ct_group, + .exp_attr = ATTR_EXP_MASK, + .ct_attr = ATTR_GRP_ORIG_IPV6, + .size = NTA_SIZE(sizeof(struct nfct_attr_grp_ipv6)), + }, + [NTA_EXP_MASK_L4PROTO] = { + .parse = exp_parse_ct_u8, + .exp_attr = ATTR_EXP_MASK, + .ct_attr = ATTR_L4PROTO, + .size = NTA_SIZE(sizeof(uint8_t)), + }, + [NTA_EXP_MASK_PORT] = { + .parse = exp_parse_ct_group, + .exp_attr = ATTR_EXP_MASK, + .ct_attr = ATTR_GRP_ORIG_PORT, + .size = NTA_SIZE(sizeof(struct nfct_attr_grp_port)), + }, + [NTA_EXP_TIMEOUT] = { + .parse = exp_parse_u32, + .exp_attr = ATTR_EXP_TIMEOUT, + .size = NTA_SIZE(sizeof(uint32_t)), + }, + [NTA_EXP_FLAGS] = { + .parse = exp_parse_u32, + .exp_attr = ATTR_EXP_FLAGS, + .size = NTA_SIZE(sizeof(uint32_t)), + }, +}; + +static void exp_parse_ct_group(void *ct, int attr, void *data) +{ + nfct_set_attr_grp(ct, exp_h[attr].ct_attr, data); +} + +static void exp_parse_ct_u8(void *ct, int attr, void *data) +{ + uint8_t *value = (uint8_t *) data; + nfct_set_attr_u8(ct, exp_h[attr].ct_attr, *value); +} + +static void exp_parse_u32(void *exp, int attr, void *data) +{ + uint32_t *value = (uint32_t *) data; + nfexp_set_attr_u32(exp, exp_h[attr].exp_attr, ntohl(*value)); +} + +int msg2exp(struct nf_expect *exp, struct nethdr *net, size_t remain) +{ + int len; + struct netattr *attr; + struct nf_conntrack *master, *expected, *mask; + + if (remain < net->len) + return -1; + + len = net->len - NETHDR_SIZ; + attr = NETHDR_DATA(net); + + master = nfct_new(); + if (master == NULL) + goto err_master; + + expected = nfct_new(); + if (expected == NULL) + goto err_expected; + + mask = nfct_new(); + if (mask == NULL) + goto err_mask; + + while (len > ssizeof(struct netattr)) { + ATTR_NETWORK2HOST(attr); + if (attr->nta_len > len) + goto err; + if (attr->nta_attr > NTA_MAX) + goto err; + if (attr->nta_len != exp_h[attr->nta_attr].size) + goto err; + if (exp_h[attr->nta_attr].parse == NULL) { + attr = NTA_NEXT(attr, len); + continue; + } + switch(exp_h[attr->nta_attr].exp_attr) { + case ATTR_EXP_MASTER: + exp_h[attr->nta_attr].parse(master, attr->nta_attr, + NTA_DATA(attr)); + case ATTR_EXP_EXPECTED: + exp_h[attr->nta_attr].parse(expected, attr->nta_attr, + NTA_DATA(attr)); + case ATTR_EXP_MASK: + exp_h[attr->nta_attr].parse(mask, attr->nta_attr, + NTA_DATA(attr)); + break; + case ATTR_EXP_TIMEOUT: + case ATTR_EXP_FLAGS: + exp_h[attr->nta_attr].parse(exp, attr->nta_attr, + NTA_DATA(attr)); + break; + } + attr = NTA_NEXT(attr, len); + } + + nfexp_set_attr(exp, ATTR_EXP_MASTER, master); + nfexp_set_attr(exp, ATTR_EXP_EXPECTED, expected); + nfexp_set_attr(exp, ATTR_EXP_MASK, mask); + + /* We can release the conntrack objects at this point because the + * setter makes a copy of them. This is not efficient, it would be + * better to save that extra copy but this is how the library works. + * I'm sorry, I cannot change it without breaking backward + * compatibility. Probably it is a good idea to think of adding new + * interfaces in the near future to get it better. */ + nfct_destroy(mask); + nfct_destroy(expected); + nfct_destroy(master); + + return 0; +err: + nfct_destroy(mask); +err_mask: + nfct_destroy(expected); +err_expected: + nfct_destroy(master); +err_master: + return -1; +} diff --git a/src/read_config_lex.l b/src/read_config_lex.l index be6bf8b..01fe4fc 100644 --- a/src/read_config_lex.l +++ b/src/read_config_lex.l @@ -140,6 +140,7 @@ notrack [N|n][O|o][T|t][R|r][A|a][C|c][K|k] "DisableExternalCache" { return T_DISABLE_EXTERNAL_CACHE; } "Options" { return T_OPTIONS; } "TCPWindowTracking" { return T_TCP_WINDOW_TRACKING; } +"ExpectationSync" { return T_EXPECT_SYNC; } "ErrorQueueLength" { return T_ERROR_QUEUE_LENGTH; } {is_on} { return T_ON; } diff --git a/src/read_config_yy.y b/src/read_config_yy.y index 68a83f7..b22784c 100644 --- a/src/read_config_yy.y +++ b/src/read_config_yy.y @@ -73,7 +73,7 @@ static void __max_dedicated_links_reached(void); %token T_NETLINK_OVERRUN_RESYNC T_NICE T_IPV4_DEST_ADDR T_IPV6_DEST_ADDR %token T_SCHEDULER T_TYPE T_PRIO T_NETLINK_EVENTS_RELIABLE %token T_DISABLE_INTERNAL_CACHE T_DISABLE_EXTERNAL_CACHE T_ERROR_QUEUE_LENGTH -%token T_OPTIONS T_TCP_WINDOW_TRACKING +%token T_OPTIONS T_TCP_WINDOW_TRACKING T_EXPECT_SYNC %token T_IP T_PATH_VAL %token T_NUMBER @@ -828,6 +828,46 @@ option: T_TCP_WINDOW_TRACKING T_OFF CONFIG(sync).tcp_window_tracking = 0; }; +option: T_EXPECT_SYNC T_ON +{ + CONFIG(flags) |= CTD_EXPECT; + CONFIG(netlink).subsys_id = NFNL_SUBSYS_NONE; + CONFIG(netlink).groups = NF_NETLINK_CONNTRACK_NEW | + NF_NETLINK_CONNTRACK_UPDATE | + NF_NETLINK_CONNTRACK_DESTROY | + NF_NETLINK_CONNTRACK_EXP_NEW | + NF_NETLINK_CONNTRACK_EXP_UPDATE | + NF_NETLINK_CONNTRACK_EXP_DESTROY; +}; + +option: T_EXPECT_SYNC T_OFF +{ + CONFIG(netlink).subsys_id = NFNL_SUBSYS_CTNETLINK; + CONFIG(netlink).groups = NF_NETLINK_CONNTRACK_NEW | + NF_NETLINK_CONNTRACK_UPDATE | + NF_NETLINK_CONNTRACK_DESTROY; +}; + +option: T_EXPECT_SYNC '{' expect_list '}' +{ + CONFIG(flags) |= CTD_EXPECT; + CONFIG(netlink).subsys_id = NFNL_SUBSYS_NONE; + CONFIG(netlink).groups = NF_NETLINK_CONNTRACK_NEW | + NF_NETLINK_CONNTRACK_UPDATE | + NF_NETLINK_CONNTRACK_DESTROY | + NF_NETLINK_CONNTRACK_EXP_NEW | + NF_NETLINK_CONNTRACK_EXP_UPDATE | + NF_NETLINK_CONNTRACK_EXP_DESTROY; +}; + +expect_list: + | expect_list expect_item ; + +expect_item: T_STRING +{ + exp_filter_add(STATE(exp_filter), $1); +} + sync_mode_alarm: T_SYNC_MODE T_ALARM '{' sync_mode_alarm_list '}' { conf.flags |= CTD_SYNC_ALARM; @@ -1598,6 +1638,7 @@ init_config(char *filename) /* Zero may be a valid facility */ CONFIG(syslog_facility) = -1; CONFIG(stats).syslog_facility = -1; + CONFIG(netlink).subsys_id = -1; yyrestart(fp); yyparse(); @@ -1646,5 +1687,12 @@ init_config(char *filename) if (CONFIG(channelc).error_queue_length == 0) CONFIG(channelc).error_queue_length = 128; + if (CONFIG(netlink).subsys_id == -1) { + CONFIG(netlink).subsys_id = NFNL_SUBSYS_CTNETLINK; + CONFIG(netlink).groups = NF_NETLINK_CONNTRACK_NEW | + NF_NETLINK_CONNTRACK_UPDATE | + NF_NETLINK_CONNTRACK_DESTROY; + } + return 0; } diff --git a/src/run.c b/src/run.c index c21db2e..26c1783 100644 --- a/src/run.c +++ b/src/run.c @@ -187,6 +187,62 @@ static void dump_stats_runtime(int fd) send(fd, buf, size, 0); } +static void local_flush_master(void) +{ + STATE(stats).nl_kernel_table_flush++; + dlog(LOG_NOTICE, "flushing kernel conntrack table"); + + /* fork a child process that performs the flush operation, + * meanwhile the parent process handles events. */ + if (fork_process_new(CTD_PROC_FLUSH, CTD_PROC_F_EXCL, + NULL, NULL) == 0) { + nl_flush_conntrack_table(STATE(flush)); + exit(EXIT_SUCCESS); + } +} + +static void local_resync_master(void) +{ + if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { + STATE(stats).nl_kernel_table_resync++; + dlog(LOG_NOTICE, "resync with master conntrack table"); + nl_dump_conntrack_table(STATE(dump)); + } else { + dlog(LOG_NOTICE, "resync is unsupported in this mode"); + } +} + +static void local_exp_flush_master(void) +{ + if (!(CONFIG(flags) & CTD_EXPECT)) + return; + + STATE(stats).nl_kernel_table_flush++; + dlog(LOG_NOTICE, "flushing kernel expect table"); + + /* fork a child process that performs the flush operation, + * meanwhile the parent process handles events. */ + if (fork_process_new(CTD_PROC_FLUSH, CTD_PROC_F_EXCL, + NULL, NULL) == 0) { + nl_flush_expect_table(STATE(flush)); + exit(EXIT_SUCCESS); + } +} + +static void local_exp_resync_master(void) +{ + if (!(CONFIG(flags) & CTD_EXPECT)) + return; + + if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { + STATE(stats).nl_kernel_table_resync++; + dlog(LOG_NOTICE, "resync with master expect table"); + nl_dump_expect_table(STATE(dump)); + } else { + dlog(LOG_NOTICE, "resync is unsupported in this mode"); + } +} + static int local_handler(int fd, void *data) { int ret = LOCAL_RET_OK; @@ -198,25 +254,24 @@ static int local_handler(int fd, void *data) } switch(type) { case CT_FLUSH_MASTER: - STATE(stats).nl_kernel_table_flush++; - dlog(LOG_NOTICE, "flushing kernel conntrack table"); - - /* fork a child process that performs the flush operation, - * meanwhile the parent process handles events. */ - if (fork_process_new(CTD_PROC_FLUSH, CTD_PROC_F_EXCL, - NULL, NULL) == 0) { - nl_flush_conntrack_table(STATE(flush)); - exit(EXIT_SUCCESS); - } + local_flush_master(); break; case CT_RESYNC_MASTER: - if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { - STATE(stats).nl_kernel_table_resync++; - dlog(LOG_NOTICE, "resync with master table"); - nl_dump_conntrack_table(STATE(dump)); - } else { - dlog(LOG_NOTICE, "resync is unsupported in this mode"); - } + local_resync_master(); + break; + case EXP_FLUSH_MASTER: + local_exp_flush_master(); + break; + case EXP_RESYNC_MASTER: + local_exp_resync_master(); + break; + case ALL_FLUSH_MASTER: + local_flush_master(); + local_exp_flush_master(); + break; + case ALL_RESYNC_MASTER: + local_resync_master(); + local_exp_resync_master(); break; case STATS_RUNTIME: dump_stats_runtime(fd); @@ -245,7 +300,11 @@ static void do_polling_alarm(struct alarm_block *a, void *data) if (STATE(mode)->internal->ct.purge) STATE(mode)->internal->ct.purge(); + if (STATE(mode)->internal->exp.purge) + STATE(mode)->internal->exp.purge(); + nl_send_resync(STATE(resync)); + nl_send_expect_resync(STATE(resync)); add_alarm(&STATE(polling_alarm), CONFIG(poll_kernel_secs), 0); } @@ -290,6 +349,49 @@ out: return NFCT_CB_CONTINUE; } +static int exp_event_handler(const struct nlmsghdr *nlh, + enum nf_conntrack_msg_type type, + struct nf_expect *exp, + void *data) +{ + int origin_type; + const struct nf_conntrack *master = + nfexp_get_attr(exp, ATTR_EXP_MASTER); + + STATE(stats).nl_events_received++; + + if (!exp_filter_find(STATE(exp_filter), exp)) { + STATE(stats).nl_events_filtered++; + goto out; + } + if (ct_filter_conntrack(master, 1)) + return NFCT_CB_CONTINUE; + + origin_type = origin_find(nlh); + + switch(type) { + case NFCT_T_NEW: + STATE(mode)->internal->exp.new(exp, origin_type); + break; + case NFCT_T_UPDATE: + STATE(mode)->internal->exp.upd(exp, origin_type); + break; + case NFCT_T_DESTROY: + STATE(mode)->internal->exp.del(exp, origin_type); + break; + default: + STATE(stats).nl_events_unknown_type++; + break; + } + +out: + /* we reset the iteration limiter in the main select loop. */ + if (STATE(event_iterations_limit)-- <= 0) + return NFCT_CB_STOP; + else + return NFCT_CB_CONTINUE; +} + static int dump_handler(enum nf_conntrack_msg_type type, struct nf_conntrack *ct, void *data) @@ -308,6 +410,29 @@ static int dump_handler(enum nf_conntrack_msg_type type, return NFCT_CB_CONTINUE; } +static int exp_dump_handler(enum nf_conntrack_msg_type type, + struct nf_expect *exp, void *data) +{ + const struct nf_conntrack *master = + nfexp_get_attr(exp, ATTR_EXP_MASTER); + + if (!exp_filter_find(STATE(exp_filter), exp)) + return NFCT_CB_CONTINUE; + + if (ct_filter_conntrack(master, 1)) + return NFCT_CB_CONTINUE; + + switch(type) { + case NFCT_T_UPDATE: + STATE(mode)->internal->exp.populate(exp); + break; + default: + STATE(stats).nl_dump_unknown_type++; + break; + } + return NFCT_CB_CONTINUE; +} + static int get_handler(enum nf_conntrack_msg_type type, struct nf_conntrack *ct, void *data) @@ -319,6 +444,22 @@ static int get_handler(enum nf_conntrack_msg_type type, return NFCT_CB_CONTINUE; } +static int exp_get_handler(enum nf_conntrack_msg_type type, + struct nf_expect *exp, void *data) +{ + const struct nf_conntrack *master = + nfexp_get_attr(exp, ATTR_EXP_MASTER); + + if (!exp_filter_find(STATE(exp_filter), exp)) + return NFCT_CB_CONTINUE; + + if (ct_filter_conntrack(master, 1)) + return NFCT_CB_CONTINUE; + + STATE(get_retval) = 1; + return NFCT_CB_CONTINUE; +} + int init(void) { @@ -355,7 +496,7 @@ init(void) register_fd(STATE(local).fd, STATE(fds)); /* resynchronize (like 'dump' socket) but it also purges old entries */ - STATE(resync) = nfct_open(CONNTRACK, 0); + STATE(resync) = nfct_open(CONFIG(netlink).subsys_id, 0); if (STATE(resync)== NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); @@ -370,7 +511,7 @@ init(void) fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK); if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { - STATE(dump) = nfct_open(CONNTRACK, 0); + STATE(dump) = nfct_open(CONFIG(netlink).subsys_id, 0); if (STATE(dump) == NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); @@ -380,13 +521,26 @@ init(void) nfct_callback_register(STATE(dump), NFCT_T_ALL, dump_handler, NULL); + if (CONFIG(flags) & CTD_EXPECT) { + nfexp_callback_register(STATE(dump), NFCT_T_ALL, + exp_dump_handler, NULL); + } + if (nl_dump_conntrack_table(STATE(dump)) == -1) { dlog(LOG_ERR, "can't get kernel conntrack table"); return -1; } + + if (CONFIG(flags) & CTD_EXPECT) { + if (nl_dump_expect_table(STATE(dump)) == -1) { + dlog(LOG_ERR, "can't get kernel " + "expect table"); + return -1; + } + } } - STATE(get) = nfct_open(CONNTRACK, 0); + STATE(get) = nfct_open(CONFIG(netlink).subsys_id, 0); if (STATE(get) == NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); @@ -395,7 +549,12 @@ init(void) } nfct_callback_register(STATE(get), NFCT_T_ALL, get_handler, NULL); - STATE(flush) = nfct_open(CONNTRACK, 0); + if (CONFIG(flags) & CTD_EXPECT) { + nfexp_callback_register(STATE(get), NFCT_T_ALL, + exp_get_handler, NULL); + } + + STATE(flush) = nfct_open(CONFIG(netlink).subsys_id, 0); if (STATE(flush) == NULL) { dlog(LOG_ERR, "cannot open flusher handler"); return -1; @@ -426,6 +585,11 @@ init(void) } nfct_callback_register2(STATE(event), NFCT_T_ALL, event_handler, NULL); + + if (CONFIG(flags) & CTD_EXPECT) { + nfexp_callback_register2(STATE(event), NFCT_T_ALL, + exp_event_handler, NULL); + } register_fd(nfct_fd(STATE(event)), STATE(fds)); } diff --git a/src/sync-ftfw.c b/src/sync-ftfw.c index fa76c0c..1bc2d9f 100644 --- a/src/sync-ftfw.c +++ b/src/sync-ftfw.c @@ -231,6 +231,8 @@ static int ftfw_local(int fd, int type, void *data) dlog(LOG_NOTICE, "sending bulk update"); cache_iterate(STATE(mode)->internal->ct.data, NULL, do_cache_to_tx); + cache_iterate(STATE(mode)->internal->exp.data, + NULL, do_cache_to_tx); break; case STATS_RSQUEUE: ftfw_local_queue(fd); @@ -350,7 +352,10 @@ static int digest_msg(const struct nethdr *net) } else if (IS_RESYNC(net)) { dp("RESYNC ALL\n"); - cache_iterate(STATE(mode)->internal->ct.data, NULL, do_cache_to_tx); + cache_iterate(STATE(mode)->internal->ct.data, NULL, + do_cache_to_tx); + cache_iterate(STATE(mode)->internal->exp.data, NULL, + do_cache_to_tx); return MSG_CTL; } else if (IS_ALIVE(net)) diff --git a/src/sync-mode.c b/src/sync-mode.c index fa522c7..2505631 100644 --- a/src/sync-mode.c +++ b/src/sync-mode.c @@ -59,10 +59,29 @@ static struct nf_conntrack *msg2ct_alloc(struct nethdr *net, size_t remain) return ct; } +static struct nf_expect *msg2exp_alloc(struct nethdr *net, size_t remain) +{ + struct nf_expect *exp; + + /* TODO: add stats on ENOMEM errors in the future. */ + exp = nfexp_new(); + if (exp == NULL) + return NULL; + + if (msg2exp(exp, net, remain) == -1) { + STATE_SYNC(error).msg_rcv_malformed++; + STATE_SYNC(error).msg_rcv_bad_payload++; + nfexp_destroy(exp); + return NULL; + } + return exp; +} + static void do_channel_handler_step(int i, struct nethdr *net, size_t remain) { - struct nf_conntrack *ct; + struct nf_conntrack *ct = NULL; + struct nf_expect *exp = NULL; if (net->version != CONNTRACKD_PROTOCOL_VERSION) { STATE_SYNC(error).msg_rcv_malformed++; @@ -112,12 +131,33 @@ do_channel_handler_step(int i, struct nethdr *net, size_t remain) return; STATE_SYNC(external)->ct.del(ct); break; + case NET_T_STATE_EXP_NEW: + exp = msg2exp_alloc(net, remain); + if (exp == NULL) + return; + STATE_SYNC(external)->exp.new(exp); + break; + case NET_T_STATE_EXP_UPD: + exp = msg2exp_alloc(net, remain); + if (exp == NULL) + return; + STATE_SYNC(external)->exp.upd(exp); + break; + case NET_T_STATE_EXP_DEL: + exp = msg2exp_alloc(net, remain); + if (exp == NULL) + return; + STATE_SYNC(external)->exp.del(exp); + break; default: STATE_SYNC(error).msg_rcv_malformed++; STATE_SYNC(error).msg_rcv_bad_type++; break; } - nfct_destroy(ct); + if (ct != NULL) + nfct_destroy(ct); + if (exp != NULL) + nfexp_destroy(exp); } static char __net[65536]; /* XXX: maximum MTU for IPv4 */ @@ -351,7 +391,7 @@ static int init_sync(void) STATE(fds)) == -1) return -1; - STATE_SYNC(commit).h = nfct_open(CONNTRACK, 0); + STATE_SYNC(commit).h = nfct_open(CONFIG(netlink).subsys_id, 0); if (STATE_SYNC(commit).h == NULL) { dlog(LOG_ERR, "can't create handler to commit"); return -1; @@ -402,8 +442,30 @@ static void run_sync(fd_set *readfds) interface_handler(); if (FD_ISSET(get_read_evfd(STATE_SYNC(commit).evfd), readfds)) { + int ret; + read_evfd(STATE_SYNC(commit).evfd); - STATE_SYNC(external)->ct.commit(STATE_SYNC(commit).h, 0); + + ret = STATE_SYNC(commit).rq[0].cb(STATE_SYNC(commit).h, 0); + if (ret == 0) { + /* we still have things in the callback queue. */ + if (STATE_SYNC(commit).rq[1].cb) { + int fd = STATE_SYNC(commit).clientfd; + + STATE_SYNC(commit).rq[0].cb = + STATE_SYNC(commit).rq[1].cb; + + STATE_SYNC(commit).rq[1].cb = NULL; + + STATE_SYNC(commit).clientfd = -1; + STATE_SYNC(commit).rq[0].cb( + STATE_SYNC(commit).h, fd); + } else { + /* Close the client socket now, we're done. */ + close(STATE_SYNC(commit).clientfd); + STATE_SYNC(commit).clientfd = -1; + } + } } /* flush pending messages */ @@ -480,6 +542,27 @@ static void dump_stats_sync_extended(int fd) send(fd, buf, size, 0); } +static int local_commit(int fd) +{ + int ret; + + /* delete the reset alarm if any before committing */ + del_alarm(&STATE_SYNC(reset_cache_alarm)); + + ret = STATE_SYNC(commit).rq[0].cb(STATE_SYNC(commit).h, fd); + if (ret == -1) { + dlog(LOG_NOTICE, "commit already in progress, skipping"); + ret = LOCAL_RET_OK; + } else if (ret == 0) { + /* we've finished the commit. */ + ret = LOCAL_RET_OK; + } else { + /* Keep open the client, we want synchronous commit. */ + ret = LOCAL_RET_STOLEN; + } + return ret; +} + /* handler for requests coming via UNIX socket */ static int local_handler_sync(int fd, int type, void *data) { @@ -511,19 +594,10 @@ static int local_handler_sync(int fd, int type, void *data) } break; case CT_COMMIT: - /* delete the reset alarm if any before committing */ - del_alarm(&STATE_SYNC(reset_cache_alarm)); - - dlog(LOG_NOTICE, "committing external cache"); - ret = STATE_SYNC(external)->ct.commit(STATE_SYNC(commit).h, fd); - if (ret == 0) { - dlog(LOG_NOTICE, "commit already in progress, " - "skipping"); - ret = LOCAL_RET_OK; - } else { - /* Keep open the client, we want synchronous commit. */ - ret = LOCAL_RET_STOLEN; - } + dlog(LOG_NOTICE, "committing conntrack cache"); + STATE_SYNC(commit).rq[0].cb = STATE_SYNC(external)->ct.commit; + STATE_SYNC(commit).rq[1].cb = NULL; + ret = local_commit(fd); break; case RESET_TIMERS: if (!alarm_pending(&STATE_SYNC(reset_cache_alarm))) { @@ -575,6 +649,63 @@ static int local_handler_sync(int fd, int type, void *data) case STATS_QUEUE: queue_stats_show(fd); break; + case EXP_STATS: + if (!(CONFIG(flags) & CTD_EXPECT)) + break; + + STATE(mode)->internal->exp.stats(fd); + STATE_SYNC(external)->exp.stats(fd); + dump_traffic_stats(fd); + multichannel_stats(STATE_SYNC(channel), fd); + dump_stats_sync(fd); + break; + case EXP_DUMP_INTERNAL: + if (!(CONFIG(flags) & CTD_EXPECT)) + break; + + if (fork_process_new(CTD_PROC_ANY, 0, NULL, NULL) == 0) { + STATE(mode)->internal->exp.dump(fd, NFCT_O_PLAIN); + exit(EXIT_SUCCESS); + } + break; + case EXP_DUMP_EXTERNAL: + if (!(CONFIG(flags) & CTD_EXPECT)) + break; + + if (fork_process_new(CTD_PROC_ANY, 0, NULL, NULL) == 0) { + STATE_SYNC(external)->exp.dump(fd, NFCT_O_PLAIN); + exit(EXIT_SUCCESS); + } + break; + case EXP_COMMIT: + if (!(CONFIG(flags) & CTD_EXPECT)) + break; + + dlog(LOG_NOTICE, "committing expectation cache"); + STATE_SYNC(commit).rq[0].cb = STATE_SYNC(external)->exp.commit; + STATE_SYNC(commit).rq[1].cb = NULL; + local_commit(fd); + break; + case ALL_FLUSH_CACHE: + dlog(LOG_NOTICE, "flushing caches"); + STATE(mode)->internal->ct.flush(); + STATE_SYNC(external)->ct.flush(); + if (CONFIG(flags) & CTD_EXPECT) { + STATE(mode)->internal->exp.flush(); + STATE_SYNC(external)->exp.flush(); + } + break; + case ALL_COMMIT: + dlog(LOG_NOTICE, "committing all external caches"); + STATE_SYNC(commit).rq[0].cb = STATE_SYNC(external)->ct.commit; + if (CONFIG(flags) & CTD_EXPECT) { + STATE_SYNC(commit).rq[1].cb = + STATE_SYNC(external)->exp.commit; + } else { + STATE_SYNC(commit).rq[1].cb = NULL; + } + local_commit(fd); + break; default: if (STATE_SYNC(sync)->local) ret = STATE_SYNC(sync)->local(fd, type, data); diff --git a/src/sync-notrack.c b/src/sync-notrack.c index 06ad1f0..a7df4e7 100644 --- a/src/sync-notrack.c +++ b/src/sync-notrack.c @@ -102,7 +102,7 @@ static void kernel_resync(void) u_int32_t family = AF_UNSPEC; int ret; - h = nfct_open(CONNTRACK, 0); + h = nfct_open(CONFIG(netlink).subsys_id, 0); if (h == NULL) { dlog(LOG_ERR, "can't allocate memory for the internal cache"); return; @@ -131,6 +131,8 @@ static int notrack_local(int fd, int type, void *data) } else { cache_iterate(STATE(mode)->internal->ct.data, NULL, do_cache_to_tx); + cache_iterate(STATE(mode)->internal->exp.data, + NULL, do_cache_to_tx); } break; default: @@ -152,6 +154,8 @@ static int digest_msg(const struct nethdr *net) } else { cache_iterate(STATE(mode)->internal->ct.data, NULL, do_cache_to_tx); + cache_iterate(STATE(mode)->internal->exp.data, + NULL, do_cache_to_tx); } return MSG_CTL; } -- cgit v1.2.3