summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am2
-rw-r--r--src/build.c99
-rw-r--r--src/cache-ct.c6
-rw-r--r--src/cache-exp.c308
-rw-r--r--src/external_cache.c85
-rw-r--r--src/external_inject.c95
-rw-r--r--src/filter.c76
-rw-r--r--src/internal_bypass.c146
-rw-r--r--src/internal_cache.c173
-rw-r--r--src/log.c37
-rw-r--r--src/main.c69
-rw-r--r--src/netlink.c63
-rw-r--r--src/network.c5
-rw-r--r--src/parse.c192
-rw-r--r--src/read_config_lex.l1
-rw-r--r--src/read_config_yy.y50
-rw-r--r--src/run.c206
-rw-r--r--src/sync-ftfw.c7
-rw-r--r--src/sync-mode.c165
-rw-r--r--src/sync-notrack.c6
20 files changed, 1728 insertions, 63 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index a0abeee..7d7b2ac 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -12,7 +12,7 @@ conntrack_LDADD = ../extensions/libct_proto_tcp.la ../extensions/libct_proto_udp
conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \
local.c log.c mcast.c udp.c netlink.c vector.c \
filter.c fds.c event.c process.c origin.c date.c \
- cache.c cache-ct.c \
+ cache.c cache-ct.c cache-exp.c \
cache_timer.c \
sync-mode.c sync-alarm.c sync-ftfw.c sync-notrack.c \
traffic_stats.c stats-mode.c \
diff --git a/src/build.c b/src/build.c
index 9c3687c..3193884 100644
--- a/src/build.c
+++ b/src/build.c
@@ -224,3 +224,102 @@ void ct2msg(const struct nf_conntrack *ct, struct nethdr *n)
if (nfct_attr_is_set_array(ct, nat_type, 6))
ct_build_natseqadj(ct, n);
}
+
+static void
+exp_build_l4proto_tcp(const struct nf_conntrack *ct, struct nethdr *n, int a)
+{
+ ct_build_group(ct, ATTR_GRP_ORIG_PORT, n, a,
+ sizeof(struct nfct_attr_grp_port));
+}
+
+static void
+exp_build_l4proto_sctp(const struct nf_conntrack *ct, struct nethdr *n, int a)
+{
+ ct_build_group(ct, ATTR_GRP_ORIG_PORT, n, a,
+ sizeof(struct nfct_attr_grp_port));
+}
+
+static void
+exp_build_l4proto_dccp(const struct nf_conntrack *ct, struct nethdr *n, int a)
+{
+ ct_build_group(ct, ATTR_GRP_ORIG_PORT, n, a,
+ sizeof(struct nfct_attr_grp_port));
+}
+
+static void
+exp_build_l4proto_udp(const struct nf_conntrack *ct, struct nethdr *n, int a)
+{
+ ct_build_group(ct, ATTR_GRP_ORIG_PORT, n, a,
+ sizeof(struct nfct_attr_grp_port));
+}
+
+static struct exp_build_l4proto {
+ void (*build)(const struct nf_conntrack *, struct nethdr *n, int a);
+} exp_l4proto_fcn[IPPROTO_MAX] = {
+ [IPPROTO_TCP] = { .build = exp_build_l4proto_tcp },
+ [IPPROTO_SCTP] = { .build = exp_build_l4proto_sctp },
+ [IPPROTO_DCCP] = { .build = exp_build_l4proto_dccp },
+ [IPPROTO_UDP] = { .build = exp_build_l4proto_udp },
+};
+
+static inline void
+exp_build_u32(const struct nf_expect *exp, int a, struct nethdr *n, int b)
+{
+ uint32_t data = nfexp_get_attr_u32(exp, a);
+ data = htonl(data);
+ addattr(n, b, &data, sizeof(uint32_t));
+}
+
+void exp2msg(const struct nf_expect *exp, struct nethdr *n)
+{
+ const struct nf_conntrack *ct = nfexp_get_attr(exp, ATTR_EXP_MASTER);
+ uint8_t l4proto = nfct_get_attr_u8(ct, ATTR_L4PROTO);
+
+ /* master conntrack for this expectation. */
+ if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV4)) {
+ ct_build_group(ct, ATTR_GRP_ORIG_IPV4, n, NTA_EXP_MASTER_IPV4,
+ sizeof(struct nfct_attr_grp_ipv4));
+ } else if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV6)) {
+ ct_build_group(ct, ATTR_GRP_ORIG_IPV6, n, NTA_EXP_MASTER_IPV6,
+ sizeof(struct nfct_attr_grp_ipv6));
+ }
+ ct_build_u8(ct, ATTR_L4PROTO, n, NTA_EXP_MASTER_L4PROTO);
+
+ if (exp_l4proto_fcn[l4proto].build)
+ exp_l4proto_fcn[l4proto].build(ct, n, NTA_EXP_MASTER_PORT);
+
+ /* the expectation itself. */
+ ct = nfexp_get_attr(exp, ATTR_EXP_EXPECTED);
+
+ if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV4)) {
+ ct_build_group(ct, ATTR_GRP_ORIG_IPV4, n, NTA_EXP_EXPECT_IPV4,
+ sizeof(struct nfct_attr_grp_ipv4));
+ } else if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV6)) {
+ ct_build_group(ct, ATTR_GRP_ORIG_IPV6, n, NTA_EXP_EXPECT_IPV6,
+ sizeof(struct nfct_attr_grp_ipv6));
+ }
+ ct_build_u8(ct, ATTR_L4PROTO, n, NTA_EXP_EXPECT_L4PROTO);
+
+ if (exp_l4proto_fcn[l4proto].build)
+ exp_l4proto_fcn[l4proto].build(ct, n, NTA_EXP_EXPECT_PORT);
+
+ /* mask for the expectation. */
+ ct = nfexp_get_attr(exp, ATTR_EXP_MASK);
+
+ if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV4)) {
+ ct_build_group(ct, ATTR_GRP_ORIG_IPV4, n, NTA_EXP_MASK_IPV4,
+ sizeof(struct nfct_attr_grp_ipv4));
+ } else if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV6)) {
+ ct_build_group(ct, ATTR_GRP_ORIG_IPV6, n, NTA_EXP_MASK_IPV6,
+ sizeof(struct nfct_attr_grp_ipv6));
+ }
+ ct_build_u8(ct, ATTR_L4PROTO, n, NTA_EXP_MASK_L4PROTO);
+
+ if (exp_l4proto_fcn[l4proto].build)
+ exp_l4proto_fcn[l4proto].build(ct, n, NTA_EXP_MASK_PORT);
+
+ if (!CONFIG(commit_timeout) && nfexp_attr_is_set(exp, ATTR_EXP_TIMEOUT))
+ exp_build_u32(exp, ATTR_EXP_TIMEOUT, n, NTA_EXP_TIMEOUT);
+
+ exp_build_u32(exp, ATTR_EXP_FLAGS, n, NTA_EXP_FLAGS);
+}
diff --git a/src/cache-ct.c b/src/cache-ct.c
index 2c6fd4e..0ad8d2a 100644
--- a/src/cache-ct.c
+++ b/src/cache-ct.c
@@ -251,7 +251,7 @@ static int cache_ct_commit(struct cache *c, struct nfct_handle *h, int clientfd)
/* we already have one commit in progress, skip this. The clientfd
* descriptor has to be closed by the caller. */
if (clientfd && STATE_SYNC(commit).clientfd != -1)
- return 0;
+ return -1;
switch(STATE_SYNC(commit).state) {
case COMMIT_STATE_INACTIVE:
@@ -308,9 +308,7 @@ static int cache_ct_commit(struct cache *c, struct nfct_handle *h, int clientfd)
STATE_SYNC(commit).current = 0;
STATE_SYNC(commit).state = COMMIT_STATE_INACTIVE;
- /* Close the client socket now that we're done. */
- close(STATE_SYNC(commit).clientfd);
- STATE_SYNC(commit).clientfd = -1;
+ return 0;
}
return 1;
}
diff --git a/src/cache-exp.c b/src/cache-exp.c
new file mode 100644
index 0000000..e88877a
--- /dev/null
+++ b/src/cache-exp.c
@@ -0,0 +1,308 @@
+/*
+ * (C) 2006-2011 by Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2011 by Vyatta Inc. <http://www.vyatta.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "cache.h"
+#include "hash.h"
+#include "log.h"
+#include "conntrackd.h"
+#include "netlink.h"
+#include "event.h"
+#include "jhash.h"
+#include "network.h"
+
+#include <errno.h>
+#include <string.h>
+#include <time.h>
+#include <libnetfilter_conntrack/libnetfilter_conntrack.h>
+
+static uint32_t
+cache_hash4_exp(const struct nf_conntrack *ct, const struct hashtable *table)
+{
+ uint32_t a[4] = {
+ [0] = nfct_get_attr_u32(ct, ATTR_IPV4_SRC),
+ [1] = nfct_get_attr_u32(ct, ATTR_IPV4_DST),
+ [2] = nfct_get_attr_u8(ct, ATTR_L3PROTO) << 16 |
+ nfct_get_attr_u8(ct, ATTR_L4PROTO),
+ [3] = nfct_get_attr_u16(ct, ATTR_PORT_SRC) << 16 |
+ nfct_get_attr_u16(ct, ATTR_PORT_DST),
+ };
+
+ /*
+ * Instead of returning hash % table->hashsize (implying a divide)
+ * we return the high 32 bits of the (hash * table->hashsize) that will
+ * give results between [0 and hashsize-1] and same hash distribution,
+ * but using a multiply, less expensive than a divide. See:
+ * http://www.mail-archive.com/netdev@vger.kernel.org/msg56623.html
+ */
+ return ((uint64_t)jhash2(a, 4, 0) * table->hashsize) >> 32;
+}
+
+static uint32_t
+cache_hash6_exp(const struct nf_conntrack *ct, const struct hashtable *table)
+{
+ uint32_t a[10];
+
+ memcpy(&a[0], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4);
+ memcpy(&a[4], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4);
+ a[8] = nfct_get_attr_u8(ct, ATTR_ORIG_L3PROTO) << 16 |
+ nfct_get_attr_u8(ct, ATTR_ORIG_L4PROTO);
+ a[9] = nfct_get_attr_u16(ct, ATTR_ORIG_PORT_SRC) << 16 |
+ nfct_get_attr_u16(ct, ATTR_ORIG_PORT_DST);
+
+ return ((uint64_t)jhash2(a, 10, 0) * table->hashsize) >> 32;
+}
+
+static uint32_t
+cache_exp_hash(const void *data, const struct hashtable *table)
+{
+ int ret = 0;
+ const struct nf_expect *exp = data;
+ const struct nf_conntrack *ct = nfexp_get_attr(exp, ATTR_EXP_MASTER);
+
+ switch(nfct_get_attr_u8(ct, ATTR_L3PROTO)) {
+ case AF_INET:
+ ret = cache_hash4_exp(ct, table);
+ break;
+ case AF_INET6:
+ ret = cache_hash6_exp(ct, table);
+ break;
+ default:
+ dlog(LOG_ERR, "unknown layer 3 proto in hash");
+ break;
+ }
+ return ret;
+}
+
+static int cache_exp_cmp(const void *data1, const void *data2)
+{
+ const struct cache_object *obj = data1;
+ const struct nf_expect *exp = data2;
+
+ return nfexp_cmp(obj->ptr, exp, 0);
+}
+
+static void *cache_exp_alloc(void)
+{
+ return nfexp_new();
+}
+
+static void cache_exp_free(void *ptr)
+{
+ nfexp_destroy(ptr);
+}
+
+static void cache_exp_copy(void *dst, void *src, unsigned int flags)
+{
+ /* XXX: add nfexp_copy(...) to libnetfilter_conntrack. */
+ memcpy(dst, src, nfexp_maxsize());
+}
+
+static int cache_exp_dump_step(void *data1, void *n)
+{
+ char buf[1024];
+ int size;
+ struct __dump_container *container = data1;
+ struct cache_object *obj = n;
+ char *data = obj->data;
+ unsigned i;
+
+ /*
+ * XXX: Do not dump the entries that are scheduled to expire.
+ * These entries talk about already destroyed connections
+ * that we keep for some time just in case that we have to
+ * resent some lost messages. We do not show them to the
+ * user as he may think that the firewall replicas are not
+ * in sync. The branch below is a hack as it is quite
+ * specific and it breaks conntrackd modularity. Probably
+ * there's a nicer way to do this but until I come up with it...
+ */
+ if (CONFIG(flags) & CTD_SYNC_FTFW && obj->status == C_OBJ_DEAD)
+ return 0;
+
+ /* do not show cached timeout, this may confuse users */
+ if (nfexp_attr_is_set(obj->ptr, ATTR_EXP_TIMEOUT))
+ nfexp_attr_unset(obj->ptr, ATTR_EXP_TIMEOUT);
+
+ memset(buf, 0, sizeof(buf));
+ size = nfexp_snprintf(buf, sizeof(buf),obj->ptr,
+ NFCT_T_UNKNOWN, container->type, 0);
+
+ for (i = 0; i < obj->cache->num_features; i++) {
+ if (obj->cache->features[i]->dump) {
+ size += obj->cache->features[i]->dump(obj, data,
+ buf+size,
+ container->type);
+ data += obj->cache->features[i]->size;
+ }
+ }
+ if (container->type != NFCT_O_XML) {
+ long tm = time(NULL);
+ size += sprintf(buf+size, " [active since %lds]",
+ tm - obj->lifetime);
+ }
+ size += sprintf(buf+size, "\n");
+ if (send(container->fd, buf, size, 0) == -1) {
+ if (errno != EPIPE)
+ return -1;
+ }
+
+ return 0;
+}
+
+static int cache_exp_commit_step(void *data, void *n)
+{
+ struct cache_object *obj = n;
+ struct __commit_container *tmp = data;
+ int ret, retry = 1, timeout;
+ struct nf_expect *exp = obj->ptr;
+
+ if (CONFIG(commit_timeout)) {
+ timeout = CONFIG(commit_timeout);
+ } else {
+ timeout = time(NULL) - obj->lastupdate;
+ if (timeout < 0) {
+ /* XXX: Arbitrarily set the timer to one minute, how
+ * can this happen? For example, an adjustment due to
+ * daylight-saving. Probably other situations can
+ * trigger this. */
+ timeout = 60;
+ }
+ /* calculate an estimation of the current timeout */
+ timeout = nfexp_get_attr_u32(exp, ATTR_EXP_TIMEOUT) - timeout;
+ if (timeout < 0) {
+ timeout = 60;
+ }
+ }
+
+retry:
+ if (nl_create_expect(tmp->h, exp, timeout) == -1) {
+ if (errno == EEXIST && retry == 1) {
+ ret = nl_destroy_expect(tmp->h, exp);
+ if (ret == 0 || (ret == -1 && errno == ENOENT)) {
+ if (retry) {
+ retry = 0;
+ goto retry;
+ }
+ }
+ dlog(LOG_ERR, "commit-destroy: %s", strerror(errno));
+ dlog_exp(STATE(log), exp, NFCT_O_PLAIN);
+ tmp->c->stats.commit_fail++;
+ } else {
+ dlog(LOG_ERR, "commit-create: %s", strerror(errno));
+ dlog_exp(STATE(log), exp, NFCT_O_PLAIN);
+ tmp->c->stats.commit_fail++;
+ }
+ } else {
+ tmp->c->stats.commit_ok++;
+ }
+ /* keep iterating even if we have found errors */
+ return 0;
+}
+
+static int
+cache_exp_commit(struct cache *c, struct nfct_handle *h, int clientfd)
+{
+ unsigned int commit_ok, commit_fail;
+ struct timeval commit_stop, res;
+ struct __commit_container tmp = {
+ .h = h,
+ .c = c,
+ };
+
+ /* we already have one commit in progress, skip this. The clientfd
+ * descriptor has to be closed by the caller. */
+ if (clientfd && STATE_SYNC(commit).clientfd != -1)
+ return -1;
+
+ switch(STATE_SYNC(commit).state) {
+ case COMMIT_STATE_INACTIVE:
+ gettimeofday(&STATE_SYNC(commit).stats.start, NULL);
+ STATE_SYNC(commit).stats.ok = c->stats.commit_ok;
+ STATE_SYNC(commit).stats.fail = c->stats.commit_fail;
+ STATE_SYNC(commit).clientfd = clientfd;
+ case COMMIT_STATE_MASTER:
+ STATE_SYNC(commit).current =
+ hashtable_iterate_limit(c->h, &tmp,
+ STATE_SYNC(commit).current,
+ CONFIG(general).commit_steps,
+ cache_exp_commit_step);
+ if (STATE_SYNC(commit).current < CONFIG(hashsize)) {
+ STATE_SYNC(commit).state = COMMIT_STATE_MASTER;
+ /* give it another step as soon as possible */
+ write_evfd(STATE_SYNC(commit).evfd);
+ return 1;
+ }
+
+ /* calculate the time that commit has taken */
+ gettimeofday(&commit_stop, NULL);
+ timersub(&commit_stop, &STATE_SYNC(commit).stats.start, &res);
+
+ /* calculate new entries committed */
+ commit_ok = c->stats.commit_ok - STATE_SYNC(commit).stats.ok;
+ commit_fail =
+ c->stats.commit_fail - STATE_SYNC(commit).stats.fail;
+
+ /* log results */
+ dlog(LOG_NOTICE, "Committed %u new expectations", commit_ok);
+
+ if (commit_fail)
+ dlog(LOG_NOTICE, "%u expectations can't be "
+ "committed", commit_fail);
+
+ dlog(LOG_NOTICE, "commit has taken %lu.%06lu seconds",
+ res.tv_sec, res.tv_usec);
+
+ /* prepare the state machine for new commits */
+ STATE_SYNC(commit).current = 0;
+ STATE_SYNC(commit).state = COMMIT_STATE_INACTIVE;
+
+ return 0;
+ }
+ return 1;
+}
+
+static struct nethdr *
+cache_exp_build_msg(const struct cache_object *obj, int type)
+{
+ return BUILD_NETMSG_FROM_EXP(obj->ptr, type);
+}
+
+/* template to cache expectations coming from the kernel. */
+struct cache_ops cache_sync_internal_exp_ops = {
+ .hash = cache_exp_hash,
+ .cmp = cache_exp_cmp,
+ .alloc = cache_exp_alloc,
+ .free = cache_exp_free,
+ .copy = cache_exp_copy,
+ .dump_step = cache_exp_dump_step,
+ .commit = NULL,
+ .build_msg = cache_exp_build_msg,
+};
+
+/* template to cache expectations coming from the network. */
+struct cache_ops cache_sync_external_exp_ops = {
+ .hash = cache_exp_hash,
+ .cmp = cache_exp_cmp,
+ .alloc = cache_exp_alloc,
+ .free = cache_exp_free,
+ .copy = cache_exp_copy,
+ .dump_step = cache_exp_dump_step,
+ .commit = cache_exp_commit,
+ .build_msg = NULL,
+};
diff --git a/src/external_cache.c b/src/external_cache.c
index 3f896a0..e290249 100644
--- a/src/external_cache.c
+++ b/src/external_cache.c
@@ -26,6 +26,7 @@
#include <stdlib.h>
static struct cache *external;
+static struct cache *external_exp;
static int external_cache_init(void)
{
@@ -36,12 +37,21 @@ static int external_cache_init(void)
dlog(LOG_ERR, "can't allocate memory for the external cache");
return -1;
}
+ external_exp = cache_create("external", CACHE_T_EXP,
+ STATE_SYNC(sync)->external_cache_flags,
+ NULL, &cache_sync_external_exp_ops);
+ if (external_exp == NULL) {
+ dlog(LOG_ERR, "can't allocate memory for the external cache");
+ return -1;
+ }
+
return 0;
}
static void external_cache_close(void)
{
cache_destroy(external);
+ cache_destroy(external_exp);
}
static void external_cache_ct_new(struct nf_conntrack *ct)
@@ -109,6 +119,71 @@ static void external_cache_ct_stats_ext(int fd)
cache_stats_extended(external, fd);
}
+static void external_cache_exp_new(struct nf_expect *exp)
+{
+ struct cache_object *obj;
+ int id;
+
+ obj = cache_find(external_exp, exp, &id);
+ if (obj == NULL) {
+retry:
+ obj = cache_object_new(external_exp, exp);
+ if (obj == NULL)
+ return;
+
+ if (cache_add(external_exp, obj, id) == -1) {
+ cache_object_free(obj);
+ return;
+ }
+ } else {
+ cache_del(external_exp, obj);
+ cache_object_free(obj);
+ goto retry;
+ }
+}
+
+static void external_cache_exp_upd(struct nf_expect *exp)
+{
+ cache_update_force(external_exp, exp);
+}
+
+static void external_cache_exp_del(struct nf_expect *exp)
+{
+ struct cache_object *obj;
+ int id;
+
+ obj = cache_find(external_exp, exp, &id);
+ if (obj) {
+ cache_del(external_exp, obj);
+ cache_object_free(obj);
+ }
+}
+
+static void external_cache_exp_dump(int fd, int type)
+{
+ cache_dump(external_exp, fd, type);
+}
+
+static int external_cache_exp_commit(struct nfct_handle *h, int fd)
+{
+ return cache_commit(external_exp, h, fd);
+}
+
+static void external_cache_exp_flush(void)
+{
+ cache_flush(external_exp);
+}
+
+static void external_cache_exp_stats(int fd)
+{
+ cache_stats(external_exp, fd);
+}
+
+static void external_cache_exp_stats_ext(int fd)
+{
+ cache_stats_extended(external_exp, fd);
+}
+
struct external_handler external_cache = {
.init = external_cache_init,
.close = external_cache_close,
@@ -122,4 +197,14 @@ struct external_handler external_cache = {
.stats = external_cache_ct_stats,
.stats_ext = external_cache_ct_stats_ext,
},
+ .exp = {
+ .new = external_cache_exp_new,
+ .upd = external_cache_exp_upd,
+ .del = external_cache_exp_del,
+ .dump = external_cache_exp_dump,
+ .commit = external_cache_exp_commit,
+ .flush = external_cache_exp_flush,
+ .stats = external_cache_exp_stats,
+ .stats_ext = external_cache_exp_stats_ext,
+ },
};
diff --git a/src/external_inject.c b/src/external_inject.c
index ba5f3d1..0ad3478 100644
--- a/src/external_inject.c
+++ b/src/external_inject.c
@@ -42,7 +42,7 @@ struct {
static int external_inject_init(void)
{
/* handler to directly inject conntracks into kernel-space */
- inject = nfct_open(CONNTRACK, 0);
+ inject = nfct_open(CONFIG(netlink).subsys_id, 0);
if (inject == NULL) {
dlog(LOG_ERR, "can't open netlink handler: %s",
strerror(errno));
@@ -175,6 +175,89 @@ static void external_inject_ct_stats(int fd)
send(fd, buf, size, 0);
}
+struct {
+ uint32_t add_ok;
+ uint32_t add_fail;
+ uint32_t upd_ok;
+ uint32_t upd_fail;
+ uint32_t del_ok;
+ uint32_t del_fail;
+} exp_external_inject_stat;
+
+static void external_inject_exp_new(struct nf_expect *exp)
+{
+ int ret, retry = 1;
+
+retry:
+ if (nl_create_expect(inject, exp, 0) == -1) {
+ /* if the state entry exists, we delete and try again */
+ if (errno == EEXIST && retry == 1) {
+ ret = nl_destroy_expect(inject, exp);
+ if (ret == 0 || (ret == -1 && errno == ENOENT)) {
+ if (retry) {
+ retry = 0;
+ goto retry;
+ }
+ }
+ exp_external_inject_stat.add_fail++;
+ dlog(LOG_ERR, "inject-add1: %s", strerror(errno));
+ dlog_exp(STATE(log), exp, NFCT_O_PLAIN);
+ return;
+ }
+ exp_external_inject_stat.add_fail++;
+ dlog(LOG_ERR, "inject-add2: %s", strerror(errno));
+ dlog_exp(STATE(log), exp, NFCT_O_PLAIN);
+ } else {
+ exp_external_inject_stat.add_ok++;
+ }
+}
+
+static void external_inject_exp_del(struct nf_expect *exp)
+{
+ if (nl_destroy_expect(inject, exp) == -1) {
+ if (errno != ENOENT) {
+ exp_external_inject_stat.del_fail++;
+ dlog(LOG_ERR, "inject-del: %s", strerror(errno));
+ dlog_exp(STATE(log), exp, NFCT_O_PLAIN);
+ }
+ } else {
+ exp_external_inject_stat.del_ok++;
+ }
+}
+
+static void external_inject_exp_dump(int fd, int type)
+{
+}
+
+static int external_inject_exp_commit(struct nfct_handle *h, int fd)
+{
+ /* close the commit socket. */
+ return LOCAL_RET_OK;
+}
+
+static void external_inject_exp_flush(void)
+{
+}
+
+static void external_inject_exp_stats(int fd)
+{
+ char buf[512];
+ int size;
+
+ size = sprintf(buf, "external inject:\n"
+ "connections created:\t\t%12u\tfailed:\t%12u\n"
+ "connections updated:\t\t%12u\tfailed:\t%12u\n"
+ "connections destroyed:\t\t%12u\tfailed:\t%12u\n\n",
+ exp_external_inject_stat.add_ok,
+ exp_external_inject_stat.add_fail,
+ exp_external_inject_stat.upd_ok,
+ exp_external_inject_stat.upd_fail,
+ exp_external_inject_stat.del_ok,
+ exp_external_inject_stat.del_fail);
+
+ send(fd, buf, size, 0);
+}
+
struct external_handler external_inject = {
.init = external_inject_init,
.close = external_inject_close,
@@ -188,4 +271,14 @@ struct external_handler external_inject = {
.stats = external_inject_ct_stats,
.stats_ext = external_inject_ct_stats,
},
+ .exp = {
+ .new = external_inject_exp_new,
+ .upd = external_inject_exp_new,
+ .del = external_inject_exp_del,
+ .dump = external_inject_exp_dump,
+ .commit = external_inject_exp_commit,
+ .flush = external_inject_exp_flush,
+ .stats = external_inject_exp_stats,
+ .stats_ext = external_inject_exp_stats,
+ },
};
diff --git a/src/filter.c b/src/filter.c
index 746a9bb..e8515d6 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -405,3 +405,79 @@ int ct_filter_conntrack(const struct nf_conntrack *ct, int userspace)
return 0;
}
+
+struct exp_filter {
+ struct list_head list;
+};
+
+struct exp_filter *exp_filter_create(void)
+{
+ struct exp_filter *f;
+
+ f = calloc(1, sizeof(struct exp_filter));
+ if (f == NULL)
+ return NULL;
+
+ INIT_LIST_HEAD(&f->list);
+ return f;
+}
+
+struct exp_filter_item {
+ struct list_head head;
+ char helper_name[NFCT_HELPER_NAME_MAX];
+};
+
+/* this is ugly, but it simplifies read_config_yy.y */
+static struct exp_filter *exp_filter_alloc(void)
+{
+ if (STATE(exp_filter) == NULL) {
+ STATE(exp_filter) = exp_filter_create();
+ if (STATE(exp_filter) == NULL) {
+ fprintf(stderr, "Can't init expectation filtering!\n");
+ return NULL;
+ }
+ }
+ return STATE(exp_filter);;
+}
+
+int exp_filter_add(struct exp_filter *f, const char *helper_name)
+{
+ struct exp_filter_item *item;
+
+ f = exp_filter_alloc();
+ if (f == NULL)
+ return -1;
+
+ list_for_each_entry(item, &f->list, head) {
+ if (strncmp(item->helper_name, helper_name,
+ NFCT_HELPER_NAME_MAX) == 0) {
+ return -1;
+ }
+ }
+ item = calloc(1, sizeof(struct exp_filter_item));
+ if (item == NULL)
+ return -1;
+
+ strncpy(item->helper_name, helper_name, NFCT_HELPER_NAME_MAX);
+ list_add(&item->head, &f->list);
+ return 0;
+}
+
+int exp_filter_find(struct exp_filter *f, const struct nf_expect *exp)
+{
+ struct exp_filter_item *item;
+
+ if (f == NULL)
+ return 0;
+
+ list_for_each_entry(item, &f->list, head) {
+ const char *name = nfexp_get_attr(exp, ATTR_EXP_HELPER_NAME);
+
+ /* we allow partial matching to support things like sip-PORT. */
+ if (strncmp(item->helper_name, name,
+ strlen(item->helper_name)) == 0) {
+ return 1;
+ }
+ }
+ return 0;
+}
diff --git a/src/internal_bypass.c b/src/internal_bypass.c
index 98717f3..5c83c21 100644
--- a/src/internal_bypass.c
+++ b/src/internal_bypass.c
@@ -52,7 +52,7 @@ static void internal_bypass_ct_dump(int fd, int type)
u_int32_t family = AF_UNSPEC;
int ret;
- h = nfct_open(CONNTRACK, 0);
+ h = nfct_open(CONFIG(netlink).subsys_id, 0);
if (h == NULL) {
dlog(LOG_ERR, "can't allocate memory for the internal cache");
return;
@@ -151,6 +151,138 @@ static int internal_bypass_ct_event_del(struct nf_conntrack *ct, int origin)
return 1;
}
+static int
+internal_bypass_exp_dump_cb(enum nf_conntrack_msg_type type,
+ struct nf_expect *exp, void *data)
+{
+ char buf[1024];
+ int size, *fd = data;
+ const struct nf_conntrack *master =
+ nfexp_get_attr(exp, ATTR_EXP_MASTER);
+
+ if (!exp_filter_find(STATE(exp_filter), exp))
+ return NFCT_CB_CONTINUE;
+
+ if (ct_filter_conntrack(master, 1))
+ return NFCT_CB_CONTINUE;
+
+ size = nfexp_snprintf(buf, 1024, exp,
+ NFCT_T_UNKNOWN, NFCT_O_DEFAULT, 0);
+ if (size < 1024) {
+ buf[size] = '\n';
+ size++;
+ }
+ send(*fd, buf, size, 0);
+
+ return NFCT_CB_CONTINUE;
+}
+
+static void internal_bypass_exp_dump(int fd, int type)
+{
+ struct nfct_handle *h;
+ u_int32_t family = AF_UNSPEC;
+ int ret;
+
+ h = nfct_open(CONFIG(netlink).subsys_id, 0);
+ if (h == NULL) {
+ dlog(LOG_ERR, "can't allocate memory for the internal cache");
+ return;
+ }
+ nfexp_callback_register(h, NFCT_T_ALL,
+ internal_bypass_exp_dump_cb, &fd);
+ ret = nfexp_query(h, NFCT_Q_DUMP, &family);
+ if (ret == -1) {
+ dlog(LOG_ERR, "can't dump kernel table");
+ }
+ nfct_close(h);
+}
+
+static void internal_bypass_exp_flush(void)
+{
+ nl_flush_expect_table(STATE(flush));
+}
+
+struct {
+ uint32_t new;
+ uint32_t upd;
+ uint32_t del;
+} exp_internal_bypass_stats;
+
+static void internal_bypass_exp_stats(int fd)
+{
+ char buf[512];
+ int size;
+
+ size = sprintf(buf, "internal bypass:\n"
+ "connections new:\t\t%12u\n"
+ "connections updated:\t\t%12u\n"
+ "connections destroyed:\t\t%12u\n\n",
+ exp_internal_bypass_stats.new,
+ exp_internal_bypass_stats.upd,
+ exp_internal_bypass_stats.del);
+
+ send(fd, buf, size, 0);
+}
+
+/* unused, INTERNAL_F_POPULATE is unset. No cache, nothing to populate. */
+static void internal_bypass_exp_populate(struct nf_expect *exp)
+{
+}
+
+/* unused, INTERNAL_F_RESYNC is unset. */
+static void internal_bypass_exp_purge(void)
+{
+}
+
+/* unused, INTERNAL_F_RESYNC is unset. Nothing to resync, we have no cache. */
+static int
+internal_bypass_exp_resync(enum nf_conntrack_msg_type type,
+ struct nf_expect *exp, void *data)
+{
+ return NFCT_CB_CONTINUE;
+}
+
+static void internal_bypass_exp_event_new(struct nf_expect *exp, int origin)
+{
+ struct nethdr *net;
+
+ /* this event has been triggered by me, skip */
+ if (origin != CTD_ORIGIN_NOT_ME)
+ return;
+
+ net = BUILD_NETMSG_FROM_EXP(exp, NET_T_STATE_EXP_NEW);
+ multichannel_send(STATE_SYNC(channel), net);
+ exp_internal_bypass_stats.new++;
+}
+
+static void internal_bypass_exp_event_upd(struct nf_expect *exp, int origin)
+{
+ struct nethdr *net;
+
+ /* this event has been triggered by me, skip */
+ if (origin != CTD_ORIGIN_NOT_ME)
+ return;
+
+ net = BUILD_NETMSG_FROM_EXP(exp, NET_T_STATE_EXP_UPD);
+ multichannel_send(STATE_SYNC(channel), net);
+ exp_internal_bypass_stats.upd++;
+}
+
+static int internal_bypass_exp_event_del(struct nf_expect *exp, int origin)
+{
+ struct nethdr *net;
+
+ /* this event has been triggered by me, skip */
+ if (origin != CTD_ORIGIN_NOT_ME)
+ return 1;
+
+ net = BUILD_NETMSG_FROM_EXP(exp, NET_T_STATE_EXP_DEL);
+ multichannel_send(STATE_SYNC(channel), net);
+ exp_internal_bypass_stats.del++;
+
+ return 1;
+}
+
struct internal_handler internal_bypass = {
.init = internal_bypass_init,
.close = internal_bypass_close,
@@ -166,4 +298,16 @@ struct internal_handler internal_bypass = {
.upd = internal_bypass_ct_event_upd,
.del = internal_bypass_ct_event_del,
},
+ .exp = {
+ .dump = internal_bypass_exp_dump,
+ .flush = internal_bypass_exp_flush,
+ .stats = internal_bypass_exp_stats,
+ .stats_ext = internal_bypass_exp_stats,
+ .populate = internal_bypass_exp_populate,
+ .purge = internal_bypass_exp_purge,
+ .resync = internal_bypass_exp_resync,
+ .new = internal_bypass_exp_event_new,
+ .upd = internal_bypass_exp_event_upd,
+ .del = internal_bypass_exp_event_del,
+ },
};
diff --git a/src/internal_cache.c b/src/internal_cache.c
index 952327d..ba2d74b 100644
--- a/src/internal_cache.c
+++ b/src/internal_cache.c
@@ -32,12 +32,25 @@ static int internal_cache_init(void)
dlog(LOG_ERR, "can't allocate memory for the internal cache");
return -1;
}
+
+ STATE(mode)->internal->exp.data =
+ cache_create("internal", CACHE_T_EXP,
+ STATE_SYNC(sync)->internal_cache_flags,
+ STATE_SYNC(sync)->internal_cache_extra,
+ &cache_sync_internal_exp_ops);
+
+ if (!STATE(mode)->internal->exp.data) {
+ dlog(LOG_ERR, "can't allocate memory for the internal cache");
+ return -1;
+ }
+
return 0;
}
static void internal_cache_close(void)
{
cache_destroy(STATE(mode)->internal->ct.data);
+ cache_destroy(STATE(mode)->internal->exp.data);
}
static void internal_cache_ct_dump(int fd, int type)
@@ -203,6 +216,154 @@ static int internal_cache_ct_event_del(struct nf_conntrack *ct, int origin)
return 1;
}
+static void internal_cache_exp_dump(int fd, int type)
+{
+ cache_dump(STATE(mode)->internal->exp.data, fd, type);
+}
+
+static void internal_cache_exp_flush(void)
+{
+ cache_flush(STATE(mode)->internal->exp.data);
+}
+
+static void internal_cache_exp_stats(int fd)
+{
+ cache_stats(STATE(mode)->internal->exp.data, fd);
+}
+
+static void internal_cache_exp_stats_ext(int fd)
+{
+ cache_stats_extended(STATE(mode)->internal->exp.data, fd);
+}
+
+static void internal_cache_exp_populate(struct nf_expect *exp)
+{
+ cache_update_force(STATE(mode)->internal->exp.data, exp);
+}
+
+static int internal_cache_exp_purge_step(void *data1, void *data2)
+{
+ struct cache_object *obj = data2;
+
+ STATE(get_retval) = 0;
+ nl_get_expect(STATE(get), obj->ptr); /* modifies STATE(get_reval) */
+ if (!STATE(get_retval)) {
+ if (obj->status != C_OBJ_DEAD) {
+ cache_object_set_status(obj, C_OBJ_DEAD);
+ sync_send(obj, NET_T_STATE_EXP_DEL);
+ cache_object_put(obj);
+ }
+ }
+
+ return 0;
+}
+
+static void internal_cache_exp_purge(void)
+{
+ cache_iterate(STATE(mode)->internal->exp.data, NULL,
+ internal_cache_exp_purge_step);
+}
+
+static int
+internal_cache_exp_resync(enum nf_conntrack_msg_type type,
+ struct nf_expect *exp, void *data)
+{
+ struct cache_object *obj;
+ const struct nf_conntrack *master =
+ nfexp_get_attr(exp, ATTR_EXP_MASTER);
+
+ if (!exp_filter_find(STATE(exp_filter), exp))
+ return NFCT_CB_CONTINUE;
+
+ if (ct_filter_conntrack(master, 1))
+ return NFCT_CB_CONTINUE;
+
+ obj = cache_update_force(STATE(mode)->internal->exp.data, exp);
+ if (obj == NULL)
+ return NFCT_CB_CONTINUE;
+
+ switch (obj->status) {
+ case C_OBJ_NEW:
+ sync_send(obj, NET_T_STATE_EXP_NEW);
+ break;
+ case C_OBJ_ALIVE:
+ sync_send(obj, NET_T_STATE_EXP_UPD);
+ break;
+ }
+ return NFCT_CB_CONTINUE;
+}
+
+static void internal_cache_exp_event_new(struct nf_expect *exp, int origin)
+{
+ struct cache_object *obj;
+ int id;
+
+ /* this event has been triggered by a direct inject, skip */
+ if (origin == CTD_ORIGIN_INJECT)
+ return;
+
+ obj = cache_find(STATE(mode)->internal->exp.data, exp, &id);
+ if (obj == NULL) {
+retry:
+ obj = cache_object_new(STATE(mode)->internal->exp.data, exp);
+ if (obj == NULL)
+ return;
+ if (cache_add(STATE(mode)->internal->exp.data, obj, id) == -1) {
+ cache_object_free(obj);
+ return;
+ }
+ /* only synchronize events that have been triggered by other
+ * processes or the kernel, but don't propagate events that
+ * have been triggered by conntrackd itself, eg. commits. */
+ if (origin == CTD_ORIGIN_NOT_ME)
+ sync_send(obj, NET_T_STATE_EXP_NEW);
+ } else {
+ cache_del(STATE(mode)->internal->exp.data, obj);
+ cache_object_free(obj);
+ goto retry;
+ }
+}
+
+static void internal_cache_exp_event_upd(struct nf_expect *exp, int origin)
+{
+ struct cache_object *obj;
+
+ /* this event has been triggered by a direct inject, skip */
+ if (origin == CTD_ORIGIN_INJECT)
+ return;
+
+ obj = cache_update_force(STATE(mode)->internal->exp.data, exp);
+ if (obj == NULL)
+ return;
+
+ if (origin == CTD_ORIGIN_NOT_ME)
+ sync_send(obj, NET_T_STATE_EXP_UPD);
+}
+
+static int internal_cache_exp_event_del(struct nf_expect *exp, int origin)
+{
+ struct cache_object *obj;
+ int id;
+
+ /* this event has been triggered by a direct inject, skip */
+ if (origin == CTD_ORIGIN_INJECT)
+ return 0;
+
+ /* we don't synchronize events for objects that are not in the cache */
+ obj = cache_find(STATE(mode)->internal->exp.data, exp, &id);
+ if (obj == NULL)
+ return 0;
+
+ if (obj->status != C_OBJ_DEAD) {
+ cache_object_set_status(obj, C_OBJ_DEAD);
+ if (origin == CTD_ORIGIN_NOT_ME) {
+ sync_send(obj, NET_T_STATE_EXP_DEL);
+ }
+ cache_object_put(obj);
+ }
+ return 1;
+}
+
struct internal_handler internal_cache = {
.flags = INTERNAL_F_POPULATE | INTERNAL_F_RESYNC,
.init = internal_cache_init,
@@ -219,4 +380,16 @@ struct internal_handler internal_cache = {
.upd = internal_cache_ct_event_upd,
.del = internal_cache_ct_event_del,
},
+ .exp = {
+ .dump = internal_cache_exp_dump,
+ .flush = internal_cache_exp_flush,
+ .stats = internal_cache_exp_stats,
+ .stats_ext = internal_cache_exp_stats_ext,
+ .populate = internal_cache_exp_populate,
+ .purge = internal_cache_exp_purge,
+ .resync = internal_cache_exp_resync,
+ .new = internal_cache_exp_event_new,
+ .upd = internal_cache_exp_event_upd,
+ .del = internal_cache_exp_event_del,
+ },
};
diff --git a/src/log.c b/src/log.c
index 9fe5119..d4de111 100644
--- a/src/log.c
+++ b/src/log.c
@@ -145,6 +145,43 @@ void dlog_ct(FILE *fd, struct nf_conntrack *ct, unsigned int type)
}
}
+void dlog_exp(FILE *fd, struct nf_expect *exp, unsigned int type)
+{
+ time_t t;
+ char buf[1024];
+ char *tmp;
+ unsigned int flags = 0;
+
+ buf[0]='\0';
+
+ switch(type) {
+ case NFCT_O_PLAIN:
+ t = time(NULL);
+ ctime_r(&t, buf);
+ tmp = buf + strlen(buf);
+ buf[strlen(buf)-1]='\t';
+ break;
+ default:
+ return;
+ }
+ nfexp_snprintf(buf+strlen(buf), 1024-strlen(buf), exp, 0, type, flags);
+
+ if (fd) {
+ snprintf(buf+strlen(buf), 1024-strlen(buf), "\n");
+ fputs(buf, fd);
+ }
+
+ if (fd == STATE(log)) {
+ /* error reporting */
+ if (CONFIG(syslog_facility) != -1)
+ syslog(LOG_ERR, "%s", tmp);
+ } else if (fd == STATE(stats_log)) {
+ /* connection logging */
+ if (CONFIG(stats).syslog_facility != -1)
+ syslog(LOG_INFO, "%s", tmp);
+ }
+}
+
void close_log(void)
{
if (STATE(log) != NULL)
diff --git a/src/main.c b/src/main.c
index ebfc8b9..342ed45 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,5 +1,6 @@
/*
- * (C) 2006-2007 by Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2006-2011 by Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2011 by Vyatta Inc. <http://www.vyatta.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -38,14 +39,15 @@ static const char usage_daemon_commands[] =
static const char usage_client_commands[] =
"Client mode commands:\n"
- " -c, commit external cache to conntrack table\n"
+ " -c [ct|expect], commit external cache to conntrack table\n"
" -f [|internal|external], flush internal and external cache\n"
- " -F, flush kernel conntrack table\n"
- " -i, display content of the internal cache\n"
- " -e, display the content of the external cache\n"
+ " -F [ct|expect], flush kernel conntrack table\n"
+ " -i [ct|expect], display content of the internal cache\n"
+ " -e [ct|expect], display the content of the external cache\n"
" -k, kill conntrack daemon\n"
- " -s [|network|cache|runtime|link|rsqueue|queue], dump statistics\n"
- " -R, resync with kernel conntrack table\n"
+ " -s [|network|cache|runtime|link|rsqueue|queue|ct|expect], "
+ "dump statistics\n"
+ " -R [ct|expect], resync with kernel conntrack table\n"
" -n, request resync with other node (only FT-FW and NOTRACK modes)\n"
" -x, dump cache in XML format (requires -i or -e)\n"
" -t, reset the kernel timeout (see PurgeTimeout clause)\n"
@@ -89,6 +91,25 @@ set_operation_mode(int *current, int want, char *argv[])
}
}
+static int
+set_action_by_table(int i, int argc, char *argv[],
+ int ct_action, int exp_action, int dfl_action, int *action)
+{
+ if (i+1 < argc && argv[i+1][0] != '-') {
+ if (strncmp(argv[i+1], "ct", strlen(argv[i+1])) == 0) {
+ *action = ct_action;
+ i++;
+ } else if (strncmp(argv[i+1], "expect",
+ strlen(argv[i+1])) == 0) {
+ *action = exp_action;
+ i++;
+ }
+ } else
+ *action = dfl_action;
+
+ return i;
+}
+
int main(int argc, char *argv[])
{
int ret, i, action = -1;
@@ -115,15 +136,23 @@ int main(int argc, char *argv[])
break;
case 'c':
set_operation_mode(&type, REQUEST, argv);
- action = CT_COMMIT;
+ i = set_action_by_table(i, argc, argv,
+ CT_COMMIT, EXP_COMMIT,
+ ALL_COMMIT, &action);
break;
case 'i':
set_operation_mode(&type, REQUEST, argv);
- action = CT_DUMP_INTERNAL;
+ i = set_action_by_table(i, argc, argv,
+ CT_DUMP_INTERNAL,
+ EXP_DUMP_INTERNAL,
+ CT_DUMP_INTERNAL, &action);
break;
case 'e':
set_operation_mode(&type, REQUEST, argv);
- action = CT_DUMP_EXTERNAL;
+ i = set_action_by_table(i, argc, argv,
+ CT_DUMP_EXTERNAL,
+ EXP_DUMP_EXTERNAL,
+ CT_DUMP_EXTERNAL, &action);
break;
case 'C':
if (++i < argc) {
@@ -142,7 +171,10 @@ int main(int argc, char *argv[])
break;
case 'F':
set_operation_mode(&type, REQUEST, argv);
- action = CT_FLUSH_MASTER;
+ i = set_action_by_table(i, argc, argv,
+ CT_FLUSH_MASTER,
+ EXP_FLUSH_MASTER,
+ ALL_FLUSH_MASTER, &action);
break;
case 'f':
set_operation_mode(&type, REQUEST, argv);
@@ -164,12 +196,15 @@ int main(int argc, char *argv[])
}
} else {
/* default to general flushing */
- action = CT_FLUSH_CACHE;
+ action = ALL_FLUSH_CACHE;
}
break;
case 'R':
set_operation_mode(&type, REQUEST, argv);
- action = CT_RESYNC_MASTER;
+ i = set_action_by_table(i, argc, argv,
+ CT_RESYNC_MASTER,
+ EXP_RESYNC_MASTER,
+ ALL_RESYNC_MASTER, &action);
break;
case 'B':
set_operation_mode(&type, REQUEST, argv);
@@ -222,6 +257,14 @@ int main(int argc, char *argv[])
strlen(argv[i+1])) == 0) {
action = STATS_QUEUE;
i++;
+ } else if (strncmp(argv[i+1], "ct",
+ strlen(argv[i+1])) == 0) {
+ action = STATS;
+ i++;
+ } else if (strncmp(argv[i+1], "expect",
+ strlen(argv[i+1])) == 0) {
+ action = EXP_STATS;
+ i++;
} else {
fprintf(stderr, "ERROR: unknown "
"parameter `%s' for "
diff --git a/src/netlink.c b/src/netlink.c
index 60274f3..fe979e3 100644
--- a/src/netlink.c
+++ b/src/netlink.c
@@ -1,5 +1,6 @@
/*
- * (C) 2006 by Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2006-2011 by Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2011 by Vyatta Inc. <http://www.vyatta.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -32,7 +33,7 @@ struct nfct_handle *nl_init_event_handler(void)
{
struct nfct_handle *h;
- h = nfct_open(CONNTRACK, NFCT_ALL_CT_GROUPS);
+ h = nfct_open(CONFIG(netlink).subsys_id, CONFIG(netlink).groups);
if (h == NULL)
return NULL;
@@ -301,3 +302,61 @@ int nl_destroy_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct)
{
return nfct_query(h, NFCT_Q_DESTROY, ct);
}
+
+int nl_create_expect(struct nfct_handle *h, const struct nf_expect *orig,
+ int timeout)
+{
+ int ret;
+ struct nf_expect *exp;
+
+ exp = nfexp_clone(orig);
+ if (exp == NULL)
+ return -1;
+
+ if (timeout > 0)
+ nfexp_set_attr_u32(exp, ATTR_EXP_TIMEOUT, timeout);
+
+ ret = nfexp_query(h, NFCT_Q_CREATE, exp);
+ nfexp_destroy(exp);
+
+ return ret;
+}
+
+int nl_destroy_expect(struct nfct_handle *h, const struct nf_expect *exp)
+{
+ return nfexp_query(h, NFCT_Q_DESTROY, exp);
+}
+
+/* if the handle has no callback, check for existence, otherwise, update */
+int nl_get_expect(struct nfct_handle *h, const struct nf_expect *exp)
+{
+ int ret = 1;
+ struct nf_expect *tmp;
+
+ /* XXX: we only need the expectation, not the mask and the master. */
+ tmp = nfexp_clone(exp);
+ if (tmp == NULL)
+ return -1;
+
+ if (nfexp_query(h, NFCT_Q_GET, tmp) == -1)
+ ret = (errno == ENOENT) ? 0 : -1;
+
+ nfexp_destroy(tmp);
+ return ret;
+}
+
+int nl_dump_expect_table(struct nfct_handle *h)
+{
+ return nfexp_query(h, NFCT_Q_DUMP, &CONFIG(family));
+}
+
+int nl_flush_expect_table(struct nfct_handle *h)
+{
+ return nfexp_query(h, NFCT_Q_FLUSH, &CONFIG(family));
+}
+
+int nl_send_expect_resync(struct nfct_handle *h)
+{
+ int family = CONFIG(family);
+ return nfexp_send(h, NFCT_Q_DUMP, &family);
+}
diff --git a/src/network.c b/src/network.c
index cadc466..13db37c 100644
--- a/src/network.c
+++ b/src/network.c
@@ -126,6 +126,11 @@ static int status2type[CACHE_T_MAX][C_OBJ_MAX] = {
[C_OBJ_ALIVE] = NET_T_STATE_CT_UPD,
[C_OBJ_DEAD] = NET_T_STATE_CT_DEL,
},
+ [CACHE_T_EXP] = {
+ [C_OBJ_NEW] = NET_T_STATE_EXP_NEW,
+ [C_OBJ_ALIVE] = NET_T_STATE_EXP_UPD,
+ [C_OBJ_DEAD] = NET_T_STATE_EXP_DEL,
+ },
};
int object_status_to_network_type(struct cache_object *obj)
diff --git a/src/parse.c b/src/parse.c
index 0718128..81e9c6b 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -248,3 +248,195 @@ int msg2ct(struct nf_conntrack *ct, struct nethdr *net, size_t remain)
return 0;
}
+
+static void exp_parse_ct_group(void *ct, int attr, void *data);
+static void exp_parse_ct_u8(void *ct, int attr, void *data);
+static void exp_parse_u32(void *exp, int attr, void *data);
+
+static struct exp_parser {
+ void (*parse)(void *obj, int attr, void *data);
+ int exp_attr;
+ int ct_attr;
+ int size;
+} exp_h[NTA_EXP_MAX] = {
+ [NTA_EXP_MASTER_IPV4] = {
+ .parse = exp_parse_ct_group,
+ .exp_attr = ATTR_EXP_MASTER,
+ .ct_attr = ATTR_GRP_ORIG_IPV4,
+ .size = NTA_SIZE(sizeof(struct nfct_attr_grp_ipv4)),
+ },
+ [NTA_EXP_MASTER_IPV6] = {
+ .parse = exp_parse_ct_group,
+ .exp_attr = ATTR_EXP_MASTER,
+ .ct_attr = ATTR_GRP_ORIG_IPV6,
+ .size = NTA_SIZE(sizeof(struct nfct_attr_grp_ipv6)),
+ },
+ [NTA_EXP_MASTER_L4PROTO] = {
+ .parse = exp_parse_ct_u8,
+ .exp_attr = ATTR_EXP_MASTER,
+ .ct_attr = ATTR_L4PROTO,
+ .size = NTA_SIZE(sizeof(uint8_t)),
+ },
+ [NTA_EXP_MASTER_PORT] = {
+ .parse = exp_parse_ct_group,
+ .exp_attr = ATTR_EXP_MASTER,
+ .ct_attr = ATTR_GRP_ORIG_PORT,
+ .size = NTA_SIZE(sizeof(struct nfct_attr_grp_port)),
+ },
+ [NTA_EXP_EXPECT_IPV4] = {
+ .parse = exp_parse_ct_group,
+ .exp_attr = ATTR_EXP_EXPECTED,
+ .ct_attr = ATTR_GRP_ORIG_IPV4,
+ .size = NTA_SIZE(sizeof(struct nfct_attr_grp_ipv4)),
+ },
+ [NTA_EXP_EXPECT_IPV6] = {
+ .parse = exp_parse_ct_group,
+ .exp_attr = ATTR_EXP_EXPECTED,
+ .ct_attr = ATTR_GRP_ORIG_IPV6,
+ .size = NTA_SIZE(sizeof(struct nfct_attr_grp_ipv6)),
+ },
+ [NTA_EXP_EXPECT_L4PROTO] = {
+ .parse = exp_parse_ct_u8,
+ .exp_attr = ATTR_EXP_EXPECTED,
+ .ct_attr = ATTR_L4PROTO,
+ .size = NTA_SIZE(sizeof(uint8_t)),
+ },
+ [NTA_EXP_EXPECT_PORT] = {
+ .parse = exp_parse_ct_group,
+ .exp_attr = ATTR_EXP_EXPECTED,
+ .ct_attr = ATTR_GRP_ORIG_PORT,
+ .size = NTA_SIZE(sizeof(struct nfct_attr_grp_port)),
+ },
+ [NTA_EXP_MASK_IPV4] = {
+ .parse = exp_parse_ct_group,
+ .exp_attr = ATTR_EXP_MASK,
+ .ct_attr = ATTR_GRP_ORIG_IPV4,
+ .size = NTA_SIZE(sizeof(struct nfct_attr_grp_ipv4)),
+ },
+ [NTA_EXP_MASK_IPV6] = {
+ .parse = exp_parse_ct_group,
+ .exp_attr = ATTR_EXP_MASK,
+ .ct_attr = ATTR_GRP_ORIG_IPV6,
+ .size = NTA_SIZE(sizeof(struct nfct_attr_grp_ipv6)),
+ },
+ [NTA_EXP_MASK_L4PROTO] = {
+ .parse = exp_parse_ct_u8,
+ .exp_attr = ATTR_EXP_MASK,
+ .ct_attr = ATTR_L4PROTO,
+ .size = NTA_SIZE(sizeof(uint8_t)),
+ },
+ [NTA_EXP_MASK_PORT] = {
+ .parse = exp_parse_ct_group,
+ .exp_attr = ATTR_EXP_MASK,
+ .ct_attr = ATTR_GRP_ORIG_PORT,
+ .size = NTA_SIZE(sizeof(struct nfct_attr_grp_port)),
+ },
+ [NTA_EXP_TIMEOUT] = {
+ .parse = exp_parse_u32,
+ .exp_attr = ATTR_EXP_TIMEOUT,
+ .size = NTA_SIZE(sizeof(uint32_t)),
+ },
+ [NTA_EXP_FLAGS] = {
+ .parse = exp_parse_u32,
+ .exp_attr = ATTR_EXP_FLAGS,
+ .size = NTA_SIZE(sizeof(uint32_t)),
+ },
+};
+
+static void exp_parse_ct_group(void *ct, int attr, void *data)
+{
+ nfct_set_attr_grp(ct, exp_h[attr].ct_attr, data);
+}
+
+static void exp_parse_ct_u8(void *ct, int attr, void *data)
+{
+ uint8_t *value = (uint8_t *) data;
+ nfct_set_attr_u8(ct, exp_h[attr].ct_attr, *value);
+}
+
+static void exp_parse_u32(void *exp, int attr, void *data)
+{
+ uint32_t *value = (uint32_t *) data;
+ nfexp_set_attr_u32(exp, exp_h[attr].exp_attr, ntohl(*value));
+}
+
+int msg2exp(struct nf_expect *exp, struct nethdr *net, size_t remain)
+{
+ int len;
+ struct netattr *attr;
+ struct nf_conntrack *master, *expected, *mask;
+
+ if (remain < net->len)
+ return -1;
+
+ len = net->len - NETHDR_SIZ;
+ attr = NETHDR_DATA(net);
+
+ master = nfct_new();
+ if (master == NULL)
+ goto err_master;
+
+ expected = nfct_new();
+ if (expected == NULL)
+ goto err_expected;
+
+ mask = nfct_new();
+ if (mask == NULL)
+ goto err_mask;
+
+ while (len > ssizeof(struct netattr)) {
+ ATTR_NETWORK2HOST(attr);
+ if (attr->nta_len > len)
+ goto err;
+ if (attr->nta_attr > NTA_MAX)
+ goto err;
+ if (attr->nta_len != exp_h[attr->nta_attr].size)
+ goto err;
+ if (exp_h[attr->nta_attr].parse == NULL) {
+ attr = NTA_NEXT(attr, len);
+ continue;
+ }
+ switch(exp_h[attr->nta_attr].exp_attr) {
+ case ATTR_EXP_MASTER:
+ exp_h[attr->nta_attr].parse(master, attr->nta_attr,
+ NTA_DATA(attr));
+ case ATTR_EXP_EXPECTED:
+ exp_h[attr->nta_attr].parse(expected, attr->nta_attr,
+ NTA_DATA(attr));
+ case ATTR_EXP_MASK:
+ exp_h[attr->nta_attr].parse(mask, attr->nta_attr,
+ NTA_DATA(attr));
+ break;
+ case ATTR_EXP_TIMEOUT:
+ case ATTR_EXP_FLAGS:
+ exp_h[attr->nta_attr].parse(exp, attr->nta_attr,
+ NTA_DATA(attr));
+ break;
+ }
+ attr = NTA_NEXT(attr, len);
+ }
+
+ nfexp_set_attr(exp, ATTR_EXP_MASTER, master);
+ nfexp_set_attr(exp, ATTR_EXP_EXPECTED, expected);
+ nfexp_set_attr(exp, ATTR_EXP_MASK, mask);
+
+ /* We can release the conntrack objects at this point because the
+ * setter makes a copy of them. This is not efficient, it would be
+ * better to save that extra copy but this is how the library works.
+ * I'm sorry, I cannot change it without breaking backward
+ * compatibility. Probably it is a good idea to think of adding new
+ * interfaces in the near future to get it better. */
+ nfct_destroy(mask);
+ nfct_destroy(expected);
+ nfct_destroy(master);
+
+ return 0;
+err:
+ nfct_destroy(mask);
+err_mask:
+ nfct_destroy(expected);
+err_expected:
+ nfct_destroy(master);
+err_master:
+ return -1;
+}
diff --git a/src/read_config_lex.l b/src/read_config_lex.l
index be6bf8b..01fe4fc 100644
--- a/src/read_config_lex.l
+++ b/src/read_config_lex.l
@@ -140,6 +140,7 @@ notrack [N|n][O|o][T|t][R|r][A|a][C|c][K|k]
"DisableExternalCache" { return T_DISABLE_EXTERNAL_CACHE; }
"Options" { return T_OPTIONS; }
"TCPWindowTracking" { return T_TCP_WINDOW_TRACKING; }
+"ExpectationSync" { return T_EXPECT_SYNC; }
"ErrorQueueLength" { return T_ERROR_QUEUE_LENGTH; }
{is_on} { return T_ON; }
diff --git a/src/read_config_yy.y b/src/read_config_yy.y
index 68a83f7..b22784c 100644
--- a/src/read_config_yy.y
+++ b/src/read_config_yy.y
@@ -73,7 +73,7 @@ static void __max_dedicated_links_reached(void);
%token T_NETLINK_OVERRUN_RESYNC T_NICE T_IPV4_DEST_ADDR T_IPV6_DEST_ADDR
%token T_SCHEDULER T_TYPE T_PRIO T_NETLINK_EVENTS_RELIABLE
%token T_DISABLE_INTERNAL_CACHE T_DISABLE_EXTERNAL_CACHE T_ERROR_QUEUE_LENGTH
-%token T_OPTIONS T_TCP_WINDOW_TRACKING
+%token T_OPTIONS T_TCP_WINDOW_TRACKING T_EXPECT_SYNC
%token <string> T_IP T_PATH_VAL
%token <val> T_NUMBER
@@ -828,6 +828,46 @@ option: T_TCP_WINDOW_TRACKING T_OFF
CONFIG(sync).tcp_window_tracking = 0;
};
+option: T_EXPECT_SYNC T_ON
+{
+ CONFIG(flags) |= CTD_EXPECT;
+ CONFIG(netlink).subsys_id = NFNL_SUBSYS_NONE;
+ CONFIG(netlink).groups = NF_NETLINK_CONNTRACK_NEW |
+ NF_NETLINK_CONNTRACK_UPDATE |
+ NF_NETLINK_CONNTRACK_DESTROY |
+ NF_NETLINK_CONNTRACK_EXP_NEW |
+ NF_NETLINK_CONNTRACK_EXP_UPDATE |
+ NF_NETLINK_CONNTRACK_EXP_DESTROY;
+};
+
+option: T_EXPECT_SYNC T_OFF
+{
+ CONFIG(netlink).subsys_id = NFNL_SUBSYS_CTNETLINK;
+ CONFIG(netlink).groups = NF_NETLINK_CONNTRACK_NEW |
+ NF_NETLINK_CONNTRACK_UPDATE |
+ NF_NETLINK_CONNTRACK_DESTROY;
+};
+
+option: T_EXPECT_SYNC '{' expect_list '}'
+{
+ CONFIG(flags) |= CTD_EXPECT;
+ CONFIG(netlink).subsys_id = NFNL_SUBSYS_NONE;
+ CONFIG(netlink).groups = NF_NETLINK_CONNTRACK_NEW |
+ NF_NETLINK_CONNTRACK_UPDATE |
+ NF_NETLINK_CONNTRACK_DESTROY |
+ NF_NETLINK_CONNTRACK_EXP_NEW |
+ NF_NETLINK_CONNTRACK_EXP_UPDATE |
+ NF_NETLINK_CONNTRACK_EXP_DESTROY;
+};
+
+expect_list:
+ | expect_list expect_item ;
+
+expect_item: T_STRING
+{
+ exp_filter_add(STATE(exp_filter), $1);
+}
+
sync_mode_alarm: T_SYNC_MODE T_ALARM '{' sync_mode_alarm_list '}'
{
conf.flags |= CTD_SYNC_ALARM;
@@ -1598,6 +1638,7 @@ init_config(char *filename)
/* Zero may be a valid facility */
CONFIG(syslog_facility) = -1;
CONFIG(stats).syslog_facility = -1;
+ CONFIG(netlink).subsys_id = -1;
yyrestart(fp);
yyparse();
@@ -1646,5 +1687,12 @@ init_config(char *filename)
if (CONFIG(channelc).error_queue_length == 0)
CONFIG(channelc).error_queue_length = 128;
+ if (CONFIG(netlink).subsys_id == -1) {
+ CONFIG(netlink).subsys_id = NFNL_SUBSYS_CTNETLINK;
+ CONFIG(netlink).groups = NF_NETLINK_CONNTRACK_NEW |
+ NF_NETLINK_CONNTRACK_UPDATE |
+ NF_NETLINK_CONNTRACK_DESTROY;
+ }
+
return 0;
}
diff --git a/src/run.c b/src/run.c
index c21db2e..26c1783 100644
--- a/src/run.c
+++ b/src/run.c
@@ -187,6 +187,62 @@ static void dump_stats_runtime(int fd)
send(fd, buf, size, 0);
}
+static void local_flush_master(void)
+{
+ STATE(stats).nl_kernel_table_flush++;
+ dlog(LOG_NOTICE, "flushing kernel conntrack table");
+
+ /* fork a child process that performs the flush operation,
+ * meanwhile the parent process handles events. */
+ if (fork_process_new(CTD_PROC_FLUSH, CTD_PROC_F_EXCL,
+ NULL, NULL) == 0) {
+ nl_flush_conntrack_table(STATE(flush));
+ exit(EXIT_SUCCESS);
+ }
+}
+
+static void local_resync_master(void)
+{
+ if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) {
+ STATE(stats).nl_kernel_table_resync++;
+ dlog(LOG_NOTICE, "resync with master conntrack table");
+ nl_dump_conntrack_table(STATE(dump));
+ } else {
+ dlog(LOG_NOTICE, "resync is unsupported in this mode");
+ }
+}
+
+static void local_exp_flush_master(void)
+{
+ if (!(CONFIG(flags) & CTD_EXPECT))
+ return;
+
+ STATE(stats).nl_kernel_table_flush++;
+ dlog(LOG_NOTICE, "flushing kernel expect table");
+
+ /* fork a child process that performs the flush operation,
+ * meanwhile the parent process handles events. */
+ if (fork_process_new(CTD_PROC_FLUSH, CTD_PROC_F_EXCL,
+ NULL, NULL) == 0) {
+ nl_flush_expect_table(STATE(flush));
+ exit(EXIT_SUCCESS);
+ }
+}
+
+static void local_exp_resync_master(void)
+{
+ if (!(CONFIG(flags) & CTD_EXPECT))
+ return;
+
+ if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) {
+ STATE(stats).nl_kernel_table_resync++;
+ dlog(LOG_NOTICE, "resync with master expect table");
+ nl_dump_expect_table(STATE(dump));
+ } else {
+ dlog(LOG_NOTICE, "resync is unsupported in this mode");
+ }
+}
+
static int local_handler(int fd, void *data)
{
int ret = LOCAL_RET_OK;
@@ -198,25 +254,24 @@ static int local_handler(int fd, void *data)
}
switch(type) {
case CT_FLUSH_MASTER:
- STATE(stats).nl_kernel_table_flush++;
- dlog(LOG_NOTICE, "flushing kernel conntrack table");
-
- /* fork a child process that performs the flush operation,
- * meanwhile the parent process handles events. */
- if (fork_process_new(CTD_PROC_FLUSH, CTD_PROC_F_EXCL,
- NULL, NULL) == 0) {
- nl_flush_conntrack_table(STATE(flush));
- exit(EXIT_SUCCESS);
- }
+ local_flush_master();
break;
case CT_RESYNC_MASTER:
- if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) {
- STATE(stats).nl_kernel_table_resync++;
- dlog(LOG_NOTICE, "resync with master table");
- nl_dump_conntrack_table(STATE(dump));
- } else {
- dlog(LOG_NOTICE, "resync is unsupported in this mode");
- }
+ local_resync_master();
+ break;
+ case EXP_FLUSH_MASTER:
+ local_exp_flush_master();
+ break;
+ case EXP_RESYNC_MASTER:
+ local_exp_resync_master();
+ break;
+ case ALL_FLUSH_MASTER:
+ local_flush_master();
+ local_exp_flush_master();
+ break;
+ case ALL_RESYNC_MASTER:
+ local_resync_master();
+ local_exp_resync_master();
break;
case STATS_RUNTIME:
dump_stats_runtime(fd);
@@ -245,7 +300,11 @@ static void do_polling_alarm(struct alarm_block *a, void *data)
if (STATE(mode)->internal->ct.purge)
STATE(mode)->internal->ct.purge();
+ if (STATE(mode)->internal->exp.purge)
+ STATE(mode)->internal->exp.purge();
+
nl_send_resync(STATE(resync));
+ nl_send_expect_resync(STATE(resync));
add_alarm(&STATE(polling_alarm), CONFIG(poll_kernel_secs), 0);
}
@@ -290,6 +349,49 @@ out:
return NFCT_CB_CONTINUE;
}
+static int exp_event_handler(const struct nlmsghdr *nlh,
+ enum nf_conntrack_msg_type type,
+ struct nf_expect *exp,
+ void *data)
+{
+ int origin_type;
+ const struct nf_conntrack *master =
+ nfexp_get_attr(exp, ATTR_EXP_MASTER);
+
+ STATE(stats).nl_events_received++;
+
+ if (!exp_filter_find(STATE(exp_filter), exp)) {
+ STATE(stats).nl_events_filtered++;
+ goto out;
+ }
+ if (ct_filter_conntrack(master, 1))
+ return NFCT_CB_CONTINUE;
+
+ origin_type = origin_find(nlh);
+
+ switch(type) {
+ case NFCT_T_NEW:
+ STATE(mode)->internal->exp.new(exp, origin_type);
+ break;
+ case NFCT_T_UPDATE:
+ STATE(mode)->internal->exp.upd(exp, origin_type);
+ break;
+ case NFCT_T_DESTROY:
+ STATE(mode)->internal->exp.del(exp, origin_type);
+ break;
+ default:
+ STATE(stats).nl_events_unknown_type++;
+ break;
+ }
+
+out:
+ /* we reset the iteration limiter in the main select loop. */
+ if (STATE(event_iterations_limit)-- <= 0)
+ return NFCT_CB_STOP;
+ else
+ return NFCT_CB_CONTINUE;
+}
+
static int dump_handler(enum nf_conntrack_msg_type type,
struct nf_conntrack *ct,
void *data)
@@ -308,6 +410,29 @@ static int dump_handler(enum nf_conntrack_msg_type type,
return NFCT_CB_CONTINUE;
}
+static int exp_dump_handler(enum nf_conntrack_msg_type type,
+ struct nf_expect *exp, void *data)
+{
+ const struct nf_conntrack *master =
+ nfexp_get_attr(exp, ATTR_EXP_MASTER);
+
+ if (!exp_filter_find(STATE(exp_filter), exp))
+ return NFCT_CB_CONTINUE;
+
+ if (ct_filter_conntrack(master, 1))
+ return NFCT_CB_CONTINUE;
+
+ switch(type) {
+ case NFCT_T_UPDATE:
+ STATE(mode)->internal->exp.populate(exp);
+ break;
+ default:
+ STATE(stats).nl_dump_unknown_type++;
+ break;
+ }
+ return NFCT_CB_CONTINUE;
+}
+
static int get_handler(enum nf_conntrack_msg_type type,
struct nf_conntrack *ct,
void *data)
@@ -319,6 +444,22 @@ static int get_handler(enum nf_conntrack_msg_type type,
return NFCT_CB_CONTINUE;
}
+static int exp_get_handler(enum nf_conntrack_msg_type type,
+ struct nf_expect *exp, void *data)
+{
+ const struct nf_conntrack *master =
+ nfexp_get_attr(exp, ATTR_EXP_MASTER);
+
+ if (!exp_filter_find(STATE(exp_filter), exp))
+ return NFCT_CB_CONTINUE;
+
+ if (ct_filter_conntrack(master, 1))
+ return NFCT_CB_CONTINUE;
+
+ STATE(get_retval) = 1;
+ return NFCT_CB_CONTINUE;
+}
+
int
init(void)
{
@@ -355,7 +496,7 @@ init(void)
register_fd(STATE(local).fd, STATE(fds));
/* resynchronize (like 'dump' socket) but it also purges old entries */
- STATE(resync) = nfct_open(CONNTRACK, 0);
+ STATE(resync) = nfct_open(CONFIG(netlink).subsys_id, 0);
if (STATE(resync)== NULL) {
dlog(LOG_ERR, "can't open netlink handler: %s",
strerror(errno));
@@ -370,7 +511,7 @@ init(void)
fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK);
if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) {
- STATE(dump) = nfct_open(CONNTRACK, 0);
+ STATE(dump) = nfct_open(CONFIG(netlink).subsys_id, 0);
if (STATE(dump) == NULL) {
dlog(LOG_ERR, "can't open netlink handler: %s",
strerror(errno));
@@ -380,13 +521,26 @@ init(void)
nfct_callback_register(STATE(dump), NFCT_T_ALL,
dump_handler, NULL);
+ if (CONFIG(flags) & CTD_EXPECT) {
+ nfexp_callback_register(STATE(dump), NFCT_T_ALL,
+ exp_dump_handler, NULL);
+ }
+
if (nl_dump_conntrack_table(STATE(dump)) == -1) {
dlog(LOG_ERR, "can't get kernel conntrack table");
return -1;
}
+
+ if (CONFIG(flags) & CTD_EXPECT) {
+ if (nl_dump_expect_table(STATE(dump)) == -1) {
+ dlog(LOG_ERR, "can't get kernel "
+ "expect table");
+ return -1;
+ }
+ }
}
- STATE(get) = nfct_open(CONNTRACK, 0);
+ STATE(get) = nfct_open(CONFIG(netlink).subsys_id, 0);
if (STATE(get) == NULL) {
dlog(LOG_ERR, "can't open netlink handler: %s",
strerror(errno));
@@ -395,7 +549,12 @@ init(void)
}
nfct_callback_register(STATE(get), NFCT_T_ALL, get_handler, NULL);
- STATE(flush) = nfct_open(CONNTRACK, 0);
+ if (CONFIG(flags) & CTD_EXPECT) {
+ nfexp_callback_register(STATE(get), NFCT_T_ALL,
+ exp_get_handler, NULL);
+ }
+
+ STATE(flush) = nfct_open(CONFIG(netlink).subsys_id, 0);
if (STATE(flush) == NULL) {
dlog(LOG_ERR, "cannot open flusher handler");
return -1;
@@ -426,6 +585,11 @@ init(void)
}
nfct_callback_register2(STATE(event), NFCT_T_ALL,
event_handler, NULL);
+
+ if (CONFIG(flags) & CTD_EXPECT) {
+ nfexp_callback_register2(STATE(event), NFCT_T_ALL,
+ exp_event_handler, NULL);
+ }
register_fd(nfct_fd(STATE(event)), STATE(fds));
}
diff --git a/src/sync-ftfw.c b/src/sync-ftfw.c
index fa76c0c..1bc2d9f 100644
--- a/src/sync-ftfw.c
+++ b/src/sync-ftfw.c
@@ -231,6 +231,8 @@ static int ftfw_local(int fd, int type, void *data)
dlog(LOG_NOTICE, "sending bulk update");
cache_iterate(STATE(mode)->internal->ct.data,
NULL, do_cache_to_tx);
+ cache_iterate(STATE(mode)->internal->exp.data,
+ NULL, do_cache_to_tx);
break;
case STATS_RSQUEUE:
ftfw_local_queue(fd);
@@ -350,7 +352,10 @@ static int digest_msg(const struct nethdr *net)
} else if (IS_RESYNC(net)) {
dp("RESYNC ALL\n");
- cache_iterate(STATE(mode)->internal->ct.data, NULL, do_cache_to_tx);
+ cache_iterate(STATE(mode)->internal->ct.data, NULL,
+ do_cache_to_tx);
+ cache_iterate(STATE(mode)->internal->exp.data, NULL,
+ do_cache_to_tx);
return MSG_CTL;
} else if (IS_ALIVE(net))
diff --git a/src/sync-mode.c b/src/sync-mode.c
index fa522c7..2505631 100644
--- a/src/sync-mode.c
+++ b/src/sync-mode.c
@@ -59,10 +59,29 @@ static struct nf_conntrack *msg2ct_alloc(struct nethdr *net, size_t remain)
return ct;
}
+static struct nf_expect *msg2exp_alloc(struct nethdr *net, size_t remain)
+{
+ struct nf_expect *exp;
+
+ /* TODO: add stats on ENOMEM errors in the future. */
+ exp = nfexp_new();
+ if (exp == NULL)
+ return NULL;
+
+ if (msg2exp(exp, net, remain) == -1) {
+ STATE_SYNC(error).msg_rcv_malformed++;
+ STATE_SYNC(error).msg_rcv_bad_payload++;
+ nfexp_destroy(exp);
+ return NULL;
+ }
+ return exp;
+}
+
static void
do_channel_handler_step(int i, struct nethdr *net, size_t remain)
{
- struct nf_conntrack *ct;
+ struct nf_conntrack *ct = NULL;
+ struct nf_expect *exp = NULL;
if (net->version != CONNTRACKD_PROTOCOL_VERSION) {
STATE_SYNC(error).msg_rcv_malformed++;
@@ -112,12 +131,33 @@ do_channel_handler_step(int i, struct nethdr *net, size_t remain)
return;
STATE_SYNC(external)->ct.del(ct);
break;
+ case NET_T_STATE_EXP_NEW:
+ exp = msg2exp_alloc(net, remain);
+ if (exp == NULL)
+ return;
+ STATE_SYNC(external)->exp.new(exp);
+ break;
+ case NET_T_STATE_EXP_UPD:
+ exp = msg2exp_alloc(net, remain);
+ if (exp == NULL)
+ return;
+ STATE_SYNC(external)->exp.upd(exp);
+ break;
+ case NET_T_STATE_EXP_DEL:
+ exp = msg2exp_alloc(net, remain);
+ if (exp == NULL)
+ return;
+ STATE_SYNC(external)->exp.del(exp);
+ break;
default:
STATE_SYNC(error).msg_rcv_malformed++;
STATE_SYNC(error).msg_rcv_bad_type++;
break;
}
- nfct_destroy(ct);
+ if (ct != NULL)
+ nfct_destroy(ct);
+ if (exp != NULL)
+ nfexp_destroy(exp);
}
static char __net[65536]; /* XXX: maximum MTU for IPv4 */
@@ -351,7 +391,7 @@ static int init_sync(void)
STATE(fds)) == -1)
return -1;
- STATE_SYNC(commit).h = nfct_open(CONNTRACK, 0);
+ STATE_SYNC(commit).h = nfct_open(CONFIG(netlink).subsys_id, 0);
if (STATE_SYNC(commit).h == NULL) {
dlog(LOG_ERR, "can't create handler to commit");
return -1;
@@ -402,8 +442,30 @@ static void run_sync(fd_set *readfds)
interface_handler();
if (FD_ISSET(get_read_evfd(STATE_SYNC(commit).evfd), readfds)) {
+ int ret;
+
read_evfd(STATE_SYNC(commit).evfd);
- STATE_SYNC(external)->ct.commit(STATE_SYNC(commit).h, 0);
+
+ ret = STATE_SYNC(commit).rq[0].cb(STATE_SYNC(commit).h, 0);
+ if (ret == 0) {
+ /* we still have things in the callback queue. */
+ if (STATE_SYNC(commit).rq[1].cb) {
+ int fd = STATE_SYNC(commit).clientfd;
+
+ STATE_SYNC(commit).rq[0].cb =
+ STATE_SYNC(commit).rq[1].cb;
+
+ STATE_SYNC(commit).rq[1].cb = NULL;
+
+ STATE_SYNC(commit).clientfd = -1;
+ STATE_SYNC(commit).rq[0].cb(
+ STATE_SYNC(commit).h, fd);
+ } else {
+ /* Close the client socket now, we're done. */
+ close(STATE_SYNC(commit).clientfd);
+ STATE_SYNC(commit).clientfd = -1;
+ }
+ }
}
/* flush pending messages */
@@ -480,6 +542,27 @@ static void dump_stats_sync_extended(int fd)
send(fd, buf, size, 0);
}
+static int local_commit(int fd)
+{
+ int ret;
+
+ /* delete the reset alarm if any before committing */
+ del_alarm(&STATE_SYNC(reset_cache_alarm));
+
+ ret = STATE_SYNC(commit).rq[0].cb(STATE_SYNC(commit).h, fd);
+ if (ret == -1) {
+ dlog(LOG_NOTICE, "commit already in progress, skipping");
+ ret = LOCAL_RET_OK;
+ } else if (ret == 0) {
+ /* we've finished the commit. */
+ ret = LOCAL_RET_OK;
+ } else {
+ /* Keep open the client, we want synchronous commit. */
+ ret = LOCAL_RET_STOLEN;
+ }
+ return ret;
+}
+
/* handler for requests coming via UNIX socket */
static int local_handler_sync(int fd, int type, void *data)
{
@@ -511,19 +594,10 @@ static int local_handler_sync(int fd, int type, void *data)
}
break;
case CT_COMMIT:
- /* delete the reset alarm if any before committing */
- del_alarm(&STATE_SYNC(reset_cache_alarm));
-
- dlog(LOG_NOTICE, "committing external cache");
- ret = STATE_SYNC(external)->ct.commit(STATE_SYNC(commit).h, fd);
- if (ret == 0) {
- dlog(LOG_NOTICE, "commit already in progress, "
- "skipping");
- ret = LOCAL_RET_OK;
- } else {
- /* Keep open the client, we want synchronous commit. */
- ret = LOCAL_RET_STOLEN;
- }
+ dlog(LOG_NOTICE, "committing conntrack cache");
+ STATE_SYNC(commit).rq[0].cb = STATE_SYNC(external)->ct.commit;
+ STATE_SYNC(commit).rq[1].cb = NULL;
+ ret = local_commit(fd);
break;
case RESET_TIMERS:
if (!alarm_pending(&STATE_SYNC(reset_cache_alarm))) {
@@ -575,6 +649,63 @@ static int local_handler_sync(int fd, int type, void *data)
case STATS_QUEUE:
queue_stats_show(fd);
break;
+ case EXP_STATS:
+ if (!(CONFIG(flags) & CTD_EXPECT))
+ break;
+
+ STATE(mode)->internal->exp.stats(fd);
+ STATE_SYNC(external)->exp.stats(fd);
+ dump_traffic_stats(fd);
+ multichannel_stats(STATE_SYNC(channel), fd);
+ dump_stats_sync(fd);
+ break;
+ case EXP_DUMP_INTERNAL:
+ if (!(CONFIG(flags) & CTD_EXPECT))
+ break;
+
+ if (fork_process_new(CTD_PROC_ANY, 0, NULL, NULL) == 0) {
+ STATE(mode)->internal->exp.dump(fd, NFCT_O_PLAIN);
+ exit(EXIT_SUCCESS);
+ }
+ break;
+ case EXP_DUMP_EXTERNAL:
+ if (!(CONFIG(flags) & CTD_EXPECT))
+ break;
+
+ if (fork_process_new(CTD_PROC_ANY, 0, NULL, NULL) == 0) {
+ STATE_SYNC(external)->exp.dump(fd, NFCT_O_PLAIN);
+ exit(EXIT_SUCCESS);
+ }
+ break;
+ case EXP_COMMIT:
+ if (!(CONFIG(flags) & CTD_EXPECT))
+ break;
+
+ dlog(LOG_NOTICE, "committing expectation cache");
+ STATE_SYNC(commit).rq[0].cb = STATE_SYNC(external)->exp.commit;
+ STATE_SYNC(commit).rq[1].cb = NULL;
+ local_commit(fd);
+ break;
+ case ALL_FLUSH_CACHE:
+ dlog(LOG_NOTICE, "flushing caches");
+ STATE(mode)->internal->ct.flush();
+ STATE_SYNC(external)->ct.flush();
+ if (CONFIG(flags) & CTD_EXPECT) {
+ STATE(mode)->internal->exp.flush();
+ STATE_SYNC(external)->exp.flush();
+ }
+ break;
+ case ALL_COMMIT:
+ dlog(LOG_NOTICE, "committing all external caches");
+ STATE_SYNC(commit).rq[0].cb = STATE_SYNC(external)->ct.commit;
+ if (CONFIG(flags) & CTD_EXPECT) {
+ STATE_SYNC(commit).rq[1].cb =
+ STATE_SYNC(external)->exp.commit;
+ } else {
+ STATE_SYNC(commit).rq[1].cb = NULL;
+ }
+ local_commit(fd);
+ break;
default:
if (STATE_SYNC(sync)->local)
ret = STATE_SYNC(sync)->local(fd, type, data);
diff --git a/src/sync-notrack.c b/src/sync-notrack.c
index 06ad1f0..a7df4e7 100644
--- a/src/sync-notrack.c
+++ b/src/sync-notrack.c
@@ -102,7 +102,7 @@ static void kernel_resync(void)
u_int32_t family = AF_UNSPEC;
int ret;
- h = nfct_open(CONNTRACK, 0);
+ h = nfct_open(CONFIG(netlink).subsys_id, 0);
if (h == NULL) {
dlog(LOG_ERR, "can't allocate memory for the internal cache");
return;
@@ -131,6 +131,8 @@ static int notrack_local(int fd, int type, void *data)
} else {
cache_iterate(STATE(mode)->internal->ct.data,
NULL, do_cache_to_tx);
+ cache_iterate(STATE(mode)->internal->exp.data,
+ NULL, do_cache_to_tx);
}
break;
default:
@@ -152,6 +154,8 @@ static int digest_msg(const struct nethdr *net)
} else {
cache_iterate(STATE(mode)->internal->ct.data,
NULL, do_cache_to_tx);
+ cache_iterate(STATE(mode)->internal->exp.data,
+ NULL, do_cache_to_tx);
}
return MSG_CTL;
}