From 6dd62d2a2b8c06507c3e2eb5026a43e2deb3da6e Mon Sep 17 00:00:00 2001 From: "/C=EU/ST=EU/CN=Pablo Neira Ayuso/emailAddress=pablo@netfilter.org" Date: Tue, 15 Jan 2008 13:40:35 +0000 Subject: Add include/netlink.h and include/traffic_stats.h --- include/netlink.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 include/netlink.h (limited to 'include/netlink.h') diff --git a/include/netlink.h b/include/netlink.h new file mode 100644 index 0000000..543eeda --- /dev/null +++ b/include/netlink.h @@ -0,0 +1,21 @@ +#ifndef _NETLINK_H_ +#define _NETLINK_H_ + +struct nf_conntrack; +struct nfct_handle; + +int ignore_conntrack(struct nf_conntrack *ct); + +int nl_init_event_handler(void); + +int nl_init_dump_handler(void); + +void nl_resize_socket_buffer(struct nfct_handle *h); + +int nl_dump_conntrack_table(void); + +int nl_create_conntrack(struct nf_conntrack *ct); + +int nl_destroy_conntrack(struct nf_conntrack *ct); + +#endif -- cgit v1.2.3 From 92701a6b224c533346f233061226bee5bb29a5dd Mon Sep 17 00:00:00 2001 From: "/C=EU/ST=EU/CN=Pablo Neira Ayuso/emailAddress=pablo@netfilter.org" Date: Tue, 8 Apr 2008 15:50:42 +0000 Subject: fix asymmetric path support (still some open concerns) --- ChangeLog | 1 + include/netlink.h | 4 ++++ src/cache_wt.c | 36 ++++++++++++++++++++++++++++++++---- src/netlink.c | 31 +++++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 4 deletions(-) (limited to 'include/netlink.h') diff --git a/ChangeLog b/ChangeLog index 6d94c6a..4bd878b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ version 0.9.7 (yet unreleased) Pablo Neira Ayuso : o remove .svn directory from make distcheck tarballs (reported by B.Benjamini) o fix minor compilation issue in amd64 with gcc4.3 (reported by Daniel Schepler) +o fix asymmetric path support (reported by Gary Richards) Krzysztof Oledzki : o fix minor compilation warning diff --git a/include/netlink.h b/include/netlink.h index 543eeda..d345656 100644 --- a/include/netlink.h +++ b/include/netlink.h @@ -14,8 +14,12 @@ void nl_resize_socket_buffer(struct nfct_handle *h); int nl_dump_conntrack_table(void); +int nl_exist_conntrack(struct nf_conntrack *ct); + int nl_create_conntrack(struct nf_conntrack *ct); +int nl_update_conntrack(struct nf_conntrack *ct); + int nl_destroy_conntrack(struct nf_conntrack *ct); #endif diff --git a/src/cache_wt.c b/src/cache_wt.c index 8ff8fae..65eb3fe 100644 --- a/src/cache_wt.c +++ b/src/cache_wt.c @@ -16,30 +16,58 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include "conntrackd.h" #include "cache.h" #include "netlink.h" #include "us-conntrack.h" +#include "log.h" #include +#include -static void add_update(struct us_conntrack *u) +static void add_wt(struct us_conntrack *u) +{ + int ret; + char __ct[nfct_maxsize()]; + struct nf_conntrack *ct = (struct nf_conntrack *)(void*) __ct; + + ret = nl_exist_conntrack(u->ct); + switch (ret) { + case -1: + dlog(LOG_ERR, "cache_wt problem: %s", strerror(errno)); + break; + case 0: + memcpy(ct, u->ct, nfct_maxsize()); + if (nl_create_conntrack(ct) == -1) + dlog(LOG_ERR, "cache_wt create: %s", strerror(errno)); + break; + case 1: + memcpy(ct, u->ct, nfct_maxsize()); + if (nl_update_conntrack(ct) == -1) + dlog(LOG_ERR, "cache_wt crt-upd: %s", strerror(errno)); + break; + } +} + +static void upd_wt(struct us_conntrack *u) { char __ct[nfct_maxsize()]; struct nf_conntrack *ct = (struct nf_conntrack *)(void*) __ct; memcpy(ct, u->ct, nfct_maxsize()); - nl_create_conntrack(ct); + if (nl_update_conntrack(ct) == -1) + dlog(LOG_ERR, "cache_wt update:%s", strerror(errno)); } static void writethrough_add(struct us_conntrack *u, void *data) { - add_update(u); + add_wt(u); } static void writethrough_update(struct us_conntrack *u, void *data) { - add_update(u); + upd_wt(u); } static void writethrough_destroy(struct us_conntrack *u, void *data) diff --git a/src/netlink.c b/src/netlink.c index f6a2378..1ab75e4 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -23,6 +23,8 @@ #include "log.h" #include "debug.h" +#include + int ignore_conntrack(struct nf_conntrack *ct) { /* ignore a certain protocol */ @@ -193,6 +195,17 @@ int nl_dump_conntrack_table(void) return nfct_query(STATE(dump), NFCT_Q_DUMP, &CONFIG(family)); } +int nl_exist_conntrack(struct nf_conntrack *ct) +{ + int ret; + + ret = nfct_query(STATE(dump), NFCT_Q_GET, ct); + if (ret == -1) + return errno == ENOENT ? 0 : -1; + + return 1; +} + /* This function modifies the conntrack passed as argument! */ int nl_create_conntrack(struct nf_conntrack *ct) { @@ -219,6 +232,24 @@ int nl_create_conntrack(struct nf_conntrack *ct) return nfct_query(STATE(dump), NFCT_Q_CREATE_UPDATE, ct); } +/* This function modifies the conntrack passed as argument! */ +int nl_update_conntrack(struct nf_conntrack *ct) +{ + /* unset NAT info, otherwise we hit error */ + nfct_attr_unset(ct, ATTR_SNAT_IPV4); + nfct_attr_unset(ct, ATTR_DNAT_IPV4); + nfct_attr_unset(ct, ATTR_SNAT_PORT); + nfct_attr_unset(ct, ATTR_DNAT_PORT); + + if (nfct_attr_is_set(ct, ATTR_STATUS)) { + uint32_t status = nfct_get_attr_u32(ct, ATTR_STATUS); + status &= ~IPS_NAT_MASK; + nfct_set_attr_u32(ct, ATTR_STATUS, status); + } + + return nl_create_conntrack(ct); +} + int nl_destroy_conntrack(struct nf_conntrack *ct) { return nfct_query(STATE(dump), NFCT_Q_DESTROY, ct); -- cgit v1.2.3 From 5e5d8cdb3cfed98f1af3f3e265220c90df684674 Mon Sep 17 00:00:00 2001 From: "/C=EU/ST=EU/CN=Pablo Neira Ayuso/emailAddress=pablo@netfilter.org" Date: Wed, 9 Apr 2008 15:25:59 +0000 Subject: improve netlink overrun handling --- ChangeLog | 1 + include/conntrackd.h | 8 ++++- include/netlink.h | 4 +++ src/netlink.c | 21 +++++++++++++ src/run.c | 27 ++++++++++++++-- src/stats-mode.c | 36 ++++++++++----------- src/sync-mode.c | 89 +++++++++++++++++++++------------------------------- 7 files changed, 111 insertions(+), 75 deletions(-) (limited to 'include/netlink.h') diff --git a/ChangeLog b/ChangeLog index 4bd878b..4ca2af1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,6 +5,7 @@ Pablo Neira Ayuso : o remove .svn directory from make distcheck tarballs (reported by B.Benjamini) o fix minor compilation issue in amd64 with gcc4.3 (reported by Daniel Schepler) o fix asymmetric path support (reported by Gary Richards) +o improve netlink overrun handling Krzysztof Oledzki : o fix minor compilation warning diff --git a/include/conntrackd.h b/include/conntrackd.h index 69c1303..57ac7e4 100644 --- a/include/conntrackd.h +++ b/include/conntrackd.h @@ -3,6 +3,7 @@ #include "mcast.h" #include "local.h" +#include "alarm.h" #include #include @@ -104,6 +105,8 @@ struct ct_general_state { struct nfct_handle *event; /* event handler */ struct nfct_handle *dump; /* dump handler */ + struct nfct_handle *overrun; /* overrun handler */ + struct alarm_block overrun_alarm; struct fds *fds; @@ -158,7 +161,10 @@ struct ct_mode { int (*local)(int fd, int type, void *data); void (*kill)(void); void (*dump)(struct nf_conntrack *ct); - void (*overrun)(void); + int (*overrun)(enum nf_conntrack_msg_type type, + struct nf_conntrack *ct, + void *data); + int (*purge)(void); void (*event_new)(struct nf_conntrack *ct); void (*event_upd)(struct nf_conntrack *ct); int (*event_dst)(struct nf_conntrack *ct); diff --git a/include/netlink.h b/include/netlink.h index d345656..a46fe11 100644 --- a/include/netlink.h +++ b/include/netlink.h @@ -10,6 +10,10 @@ int nl_init_event_handler(void); int nl_init_dump_handler(void); +int nl_init_overrun_handler(void); + +int nl_overrun_request_resync(void); + void nl_resize_socket_buffer(struct nfct_handle *h); int nl_dump_conntrack_table(void); diff --git a/src/netlink.c b/src/netlink.c index 1ab75e4..10c4643 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -158,6 +158,21 @@ int nl_init_dump_handler(void) return 0; } +int nl_init_overrun_handler(void) +{ + STATE(overrun) = nfct_open(CONNTRACK, 0); + if (!STATE(overrun)) + return -1; + + fcntl(nfct_fd(STATE(overrun)), F_SETFL, O_NONBLOCK); + + nfct_callback_register(STATE(overrun), + NFCT_T_ALL, + STATE(mode)->overrun, + NULL); + return 0; +} + static int warned = 0; void nl_resize_socket_buffer(struct nfct_handle *h) @@ -195,6 +210,12 @@ int nl_dump_conntrack_table(void) return nfct_query(STATE(dump), NFCT_Q_DUMP, &CONFIG(family)); } +int nl_overrun_request_resync(void) +{ + int family = CONFIG(family); + return nfct_send(STATE(overrun), NFCT_Q_DUMP, &family); +} + int nl_exist_conntrack(struct nf_conntrack *ct) { int ret; diff --git a/src/run.c b/src/run.c index b259f2e..63761b4 100644 --- a/src/run.c +++ b/src/run.c @@ -89,6 +89,12 @@ void local_handler(int fd, void *data) dlog(LOG_WARNING, "unknown local request %d", type); } +static void do_overrun_alarm(struct alarm_block *a, void *data) +{ + nl_overrun_request_resync(); + add_alarm(&STATE(overrun_alarm), 2, 0); +} + int init(void) { @@ -129,6 +135,15 @@ init(void) return -1; } + if (nl_init_overrun_handler() == -1) { + dlog(LOG_ERR, "can't open netlink handler: %s", + strerror(errno)); + dlog(LOG_ERR, "no ctnetlink kernel support?"); + return -1; + } + + init_alarm(&STATE(overrun_alarm), NULL, do_overrun_alarm); + STATE(fds) = create_fds(); if (STATE(fds) == NULL) { dlog(LOG_ERR, "can't create file descriptor pool"); @@ -137,6 +152,7 @@ init(void) register_fd(STATE(local).fd, STATE(fds)); register_fd(nfct_fd(STATE(event)), STATE(fds)); + register_fd(nfct_fd(STATE(overrun)), STATE(fds)); if (STATE(mode)->register_fds && STATE(mode)->register_fds(STATE(fds)) == -1) { @@ -203,8 +219,8 @@ static void __run(struct timeval *next_alarm) * size and resync with master conntrack table. */ nl_resize_socket_buffer(STATE(event)); - /* XXX: schedule overrun call via alarm */ - STATE(mode)->overrun(); + nl_overrun_request_resync(); + add_alarm(&STATE(overrun_alarm), 2, 0); break; case ENOENT: /* @@ -223,6 +239,13 @@ static void __run(struct timeval *next_alarm) } } + if (FD_ISSET(nfct_fd(STATE(overrun)), &readfds)) { + del_alarm(&STATE(overrun_alarm)); + nfct_catch(STATE(overrun)); + if (STATE(mode)->purge) + STATE(mode)->purge(); + } + if (STATE(mode)->run) STATE(mode)->run(&readfds); diff --git a/src/stats-mode.c b/src/stats-mode.c index 42fa35a..3773feb 100644 --- a/src/stats-mode.c +++ b/src/stats-mode.c @@ -22,10 +22,12 @@ #include "cache.h" #include "log.h" #include "conntrackd.h" +#include "us-conntrack.h" #include #include #include +#include static int init_stats(void) { @@ -93,9 +95,9 @@ static void dump_stats(struct nf_conntrack *ct) debug_ct(ct, "resync entry"); } -static int overrun_cb(enum nf_conntrack_msg_type type, - struct nf_conntrack *ct, - void *data) +static int overrun_stats(enum nf_conntrack_msg_type type, + struct nf_conntrack *ct, + void *data) { if (ignore_conntrack(ct)) return NFCT_CB_CONTINUE; @@ -115,28 +117,25 @@ static int overrun_cb(enum nf_conntrack_msg_type type, return NFCT_CB_CONTINUE; } -static void overrun_stats(void) +static int purge_step(void *data1, void *data2) { int ret; - struct nfct_handle *h; - int family = CONFIG(family); + struct us_conntrack *u = data2; - h = nfct_open(CONNTRACK, 0); - if (!h) { - dlog(LOG_ERR, "can't open overrun handler"); - return; + ret = nfct_query(STATE(dump), NFCT_Q_GET, u->ct); + if (ret == -1 && errno == ENOENT) { + debug_ct(u->ct, "overrun purge stats"); + cache_del(STATE_STATS(cache), u->ct); } - nfct_callback_register(h, NFCT_T_ALL, overrun_cb, NULL); - - cache_flush(STATE_STATS(cache)); + return 0; +} - ret = nfct_query(h, NFCT_Q_DUMP, &family); - if (ret == -1) - dlog(LOG_ERR, - "overrun query error %s", strerror(errno)); +static int purge_stats(void) +{ + cache_iterate(STATE_STATS(cache), NULL, purge_step); - nfct_close(h); + return 0; } static void event_new_stats(struct nf_conntrack *ct) @@ -187,6 +186,7 @@ struct ct_mode stats_mode = { .kill = kill_stats, .dump = dump_stats, .overrun = overrun_stats, + .purge = purge_stats, .event_new = event_new_stats, .event_upd = event_update_stats, .event_dst = event_destroy_stats diff --git a/src/sync-mode.c b/src/sync-mode.c index 79afcdf..3851e4a 100644 --- a/src/sync-mode.c +++ b/src/sync-mode.c @@ -350,9 +350,40 @@ static void mcast_send_sync(struct us_conntrack *u, STATE_SYNC(sync)->send(net, u); } -static int overrun_cb(enum nf_conntrack_msg_type type, - struct nf_conntrack *ct, - void *data) +static int purge_step(void *data1, void *data2) +{ + int ret; + struct nfct_handle *h = STATE(dump); + struct us_conntrack *u = data2; + + ret = nfct_query(h, NFCT_Q_GET, u->ct); + if (ret == -1 && errno == ENOENT) { + size_t len; + struct nethdr *net = BUILD_NETMSG(u->ct, NFCT_Q_DESTROY); + + debug_ct(u->ct, "overrun purge resync"); + + len = prepare_send_netmsg(STATE_SYNC(mcast_client), net); + mcast_buffered_send_netmsg(STATE_SYNC(mcast_client), net, len); + if (STATE_SYNC(sync)->send) + STATE_SYNC(sync)->send(net, u); + + cache_del(STATE_SYNC(internal), u->ct); + } + + return 0; +} + +static int purge_sync(void) +{ + cache_iterate(STATE_SYNC(internal), NULL, purge_step); + + return 0; +} + +static int overrun_sync(enum nf_conntrack_msg_type type, + struct nf_conntrack *ct, + void *data) { struct us_conntrack *u; @@ -387,57 +418,6 @@ static int overrun_cb(enum nf_conntrack_msg_type type, return NFCT_CB_CONTINUE; } -static int overrun_purge_step(void *data1, void *data2) -{ - int ret; - struct nfct_handle *h = data1; - struct us_conntrack *u = data2; - - ret = nfct_query(h, NFCT_Q_GET, u->ct); - if (ret == -1 && errno == ENOENT) { - size_t len; - struct nethdr *net = BUILD_NETMSG(u->ct, NFCT_Q_DESTROY); - - debug_ct(u->ct, "overrun purge resync"); - - len = prepare_send_netmsg(STATE_SYNC(mcast_client), net); - mcast_buffered_send_netmsg(STATE_SYNC(mcast_client), net, len); - if (STATE_SYNC(sync)->send) - STATE_SYNC(sync)->send(net, u); - - cache_del(STATE_SYNC(internal), u->ct); - } - - return 0; -} - -/* it's likely that we're losing events, just try to do our best here */ -static void overrun_sync(void) -{ - int ret; - struct nfct_handle *h; - int family = CONFIG(family); - - h = nfct_open(CONNTRACK, 0); - if (!h) { - dlog(LOG_ERR, "can't open overrun handler"); - return; - } - - nfct_callback_register(h, NFCT_T_ALL, overrun_cb, NULL); - - ret = nfct_query(h, NFCT_Q_DUMP, &family); - if (ret == -1) - dlog(LOG_ERR, - "overrun query error %s", strerror(errno)); - - nfct_callback_unregister(h); - - cache_iterate(STATE_SYNC(internal), h, overrun_purge_step); - - nfct_close(h); -} - static void event_new_sync(struct nf_conntrack *ct) { struct us_conntrack *u; @@ -505,6 +485,7 @@ struct ct_mode sync_mode = { .kill = kill_sync, .dump = dump_sync, .overrun = overrun_sync, + .purge = purge_sync, .event_new = event_new_sync, .event_upd = event_update_sync, .event_dst = event_destroy_sync -- cgit v1.2.3 From a4f4647b4b7f32f2d1caab98544802c8cdd7b4d6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 7 Aug 2008 14:52:41 +0200 Subject: netlink: add getter and check existence functions This patch adds nl_get_conntrack and it changes the behaviour of nl_exist_conntrack. Now, nl_get_conntrack requests the kernel for a conntrack and updates the cached entry. On the other hand, nl_exist_conntrack only inquiries for the existence of the entry. Signed-off-by: Pablo Neira Ayuso --- include/conntrackd.h | 1 + include/netlink.h | 4 ++++ src/netlink.c | 25 +++++++++++++++++++++++-- src/run.c | 8 ++++++++ 4 files changed, 36 insertions(+), 2 deletions(-) (limited to 'include/netlink.h') diff --git a/include/conntrackd.h b/include/conntrackd.h index 2f0d7e5..60bb2de 100644 --- a/include/conntrackd.h +++ b/include/conntrackd.h @@ -110,6 +110,7 @@ struct ct_general_state { struct nfct_filter *filter; /* event filter */ struct nfct_handle *dump; /* dump handler */ + struct nfct_handle *request; /* request handler */ struct nfct_handle *overrun; /* overrun handler */ struct alarm_block overrun_alarm; diff --git a/include/netlink.h b/include/netlink.h index a46fe11..a7b7dda 100644 --- a/include/netlink.h +++ b/include/netlink.h @@ -10,6 +10,8 @@ int nl_init_event_handler(void); int nl_init_dump_handler(void); +int nl_init_request_handler(void); + int nl_init_overrun_handler(void); int nl_overrun_request_resync(void); @@ -20,6 +22,8 @@ int nl_dump_conntrack_table(void); int nl_exist_conntrack(struct nf_conntrack *ct); +int nl_get_conntrack(struct nf_conntrack *ct); + int nl_create_conntrack(struct nf_conntrack *ct); int nl_update_conntrack(struct nf_conntrack *ct); diff --git a/src/netlink.c b/src/netlink.c index a8a5503..0d9b7db 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -214,6 +214,16 @@ int nl_init_overrun_handler(void) return 0; } +/* no callback, it does not do anything with the output */ +int nl_init_request_handler(void) +{ + STATE(request) = nfct_open(CONNTRACK, 0); + if (!STATE(request)) + return -1; + + return 0; +} + static int warned = 0; void nl_resize_socket_buffer(struct nfct_handle *h) @@ -257,7 +267,7 @@ int nl_overrun_request_resync(void) return nfct_send(STATE(overrun), NFCT_Q_DUMP, &family); } -int nl_exist_conntrack(struct nf_conntrack *ct) +static int __nl_get_conntrack(struct nfct_handle *h, struct nf_conntrack *ct) { int ret; char __tmp[nfct_maxsize()]; @@ -268,13 +278,24 @@ int nl_exist_conntrack(struct nf_conntrack *ct) /* use the original tuple to check if it is there */ nfct_copy(tmp, ct, NFCT_CP_ORIG); - ret = nfct_query(STATE(dump), NFCT_Q_GET, tmp); + ret = nfct_query(h, NFCT_Q_GET, tmp); if (ret == -1) return errno == ENOENT ? 0 : -1; return 1; } +int nl_exist_conntrack(struct nf_conntrack *ct) +{ + return __nl_get_conntrack(STATE(request), ct); +} + +/* get the conntrack and update the cache */ +int nl_get_conntrack(struct nf_conntrack *ct) +{ + return __nl_get_conntrack(STATE(dump), ct); +} + /* This function modifies the conntrack passed as argument! */ int nl_create_conntrack(struct nf_conntrack *ct) { diff --git a/src/run.c b/src/run.c index cf570d8..b7da18c 100644 --- a/src/run.c +++ b/src/run.c @@ -38,6 +38,7 @@ void killer(int foo) sigprocmask(SIG_BLOCK, &STATE(block), NULL); nfct_close(STATE(event)); + nfct_close(STATE(request)); ct_filter_destroy(STATE(us_filter)); local_server_destroy(&STATE(local)); @@ -144,6 +145,13 @@ init(void) return -1; } + if (nl_init_request_handler() == -1) { + dlog(LOG_ERR, "can't open netlink handler: %s", + strerror(errno)); + dlog(LOG_ERR, "no ctnetlink kernel support?"); + return -1; + } + init_alarm(&STATE(overrun_alarm), NULL, do_overrun_alarm); STATE(fds) = create_fds(); -- cgit v1.2.3 From 8a78dda3e6676286f09f5c78cca60a8178186930 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 7 Aug 2008 14:53:29 +0200 Subject: cache iterators: commit master entries before related ones Commit master entries before related ones to avoid ENOENT errors. Signed-off-by: Pablo Neira Ayuso --- include/netlink.h | 14 ++++++++++++++ src/cache_iterators.c | 29 ++++++++++++++++++++++++----- src/netlink.c | 12 ++++++++++++ 3 files changed, 50 insertions(+), 5 deletions(-) (limited to 'include/netlink.h') diff --git a/include/netlink.h b/include/netlink.h index a7b7dda..6d28ac6 100644 --- a/include/netlink.h +++ b/include/netlink.h @@ -1,6 +1,8 @@ #ifndef _NETLINK_H_ #define _NETLINK_H_ +#include + struct nf_conntrack; struct nfct_handle; @@ -30,4 +32,16 @@ int nl_update_conntrack(struct nf_conntrack *ct); int nl_destroy_conntrack(struct nf_conntrack *ct); +static inline int ct_is_related(const struct nf_conntrack *ct) +{ + return (nfct_attr_is_set(ct, ATTR_MASTER_L3PROTO) && + nfct_attr_is_set(ct, ATTR_MASTER_L4PROTO) && + ((nfct_attr_is_set(ct, ATTR_MASTER_IPV4_SRC) && + nfct_attr_is_set(ct, ATTR_MASTER_IPV4_DST)) || + (nfct_attr_is_set(ct, ATTR_MASTER_IPV6_SRC) && + nfct_attr_is_set(ct, ATTR_MASTER_IPV6_DST))) && + nfct_attr_is_set(ct, ATTR_MASTER_PORT_SRC) && + nfct_attr_is_set(ct, ATTR_MASTER_PORT_DST)); +} + #endif diff --git a/src/cache_iterators.c b/src/cache_iterators.c index 8898930..8811fc4 100644 --- a/src/cache_iterators.c +++ b/src/cache_iterators.c @@ -95,12 +95,9 @@ void cache_dump(struct cache *c, int fd, int type) hashtable_iterate(c->h, (void *) &tmp, do_dump); } -/* no need to clone, called from child process */ -static int do_commit(void *data1, void *data2) +static void __do_commit_step(struct cache *c, struct us_conntrack *u) { int ret, retry = 1; - struct cache *c = data1; - struct us_conntrack *u = data2; struct nf_conntrack *ct = u->ct; /* @@ -149,18 +146,40 @@ try_again_update: c->commit_ok++; break; } +} + +static int do_commit_related(void *data1, void *data2) +{ + struct us_conntrack *u = data2; + + if (ct_is_related(u->ct)) + __do_commit_step(data1, u); /* keep iterating even if we have found errors */ return 0; } +static int do_commit_master(void *data1, void *data2) +{ + struct us_conntrack *u = data2; + + if (ct_is_related(u->ct)) + return 0; + + __do_commit_step(data1, u); + return 0; +} + +/* no need to clone, called from child process */ void cache_commit(struct cache *c) { unsigned int commit_ok = c->commit_ok; unsigned int commit_exist = c->commit_exist; unsigned int commit_fail = c->commit_fail; - hashtable_iterate(c->h, c, do_commit); + /* commit master conntrack first, then related ones */ + hashtable_iterate(c->h, c, do_commit_master); + hashtable_iterate(c->h, c, do_commit_related); /* calculate new entries committed */ commit_ok = c->commit_ok - commit_ok; diff --git a/src/netlink.c b/src/netlink.c index 0d9b7db..e9b1cfd 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -337,6 +337,18 @@ int nl_update_conntrack(struct nf_conntrack *ct) nfct_set_attr_u32(ct, ATTR_STATUS, status); } + /* we hit error if we try to update the master conntrack */ + if (ct_is_related(ct)) { + nfct_attr_unset(ct, ATTR_MASTER_L3PROTO); + nfct_attr_unset(ct, ATTR_MASTER_L4PROTO); + nfct_attr_unset(ct, ATTR_MASTER_IPV4_SRC); + nfct_attr_unset(ct, ATTR_MASTER_IPV4_DST); + nfct_attr_unset(ct, ATTR_MASTER_IPV6_SRC); + nfct_attr_unset(ct, ATTR_MASTER_IPV6_DST); + nfct_attr_unset(ct, ATTR_MASTER_PORT_SRC); + nfct_attr_unset(ct, ATTR_MASTER_PORT_DST); + } + return nl_create_conntrack(ct); } -- cgit v1.2.3 From 50162d3c19e38a491d95ec26767438ec25bab0dc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 21 Oct 2008 19:11:42 +0200 Subject: filter: do not filter in user-space if kernel supports BSF This patch avoids a double filtering in user-space and kernel-space if the kernel support BSF. Since we do not use BSF for dumps and resyncs, we add a new parameter to ignore_conntrack to indicate if we have to perform the filtering in user-space or not. Signed-off-by: Pablo Neira Ayuso --- include/netlink.h | 2 +- src/netlink.c | 11 ++++++----- src/stats-mode.c | 2 +- src/sync-mode.c | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include/netlink.h') diff --git a/include/netlink.h b/include/netlink.h index 6d28ac6..d13d33d 100644 --- a/include/netlink.h +++ b/include/netlink.h @@ -6,7 +6,7 @@ struct nf_conntrack; struct nfct_handle; -int ignore_conntrack(struct nf_conntrack *ct); +int ignore_conntrack(struct nf_conntrack *ct, int userspace); int nl_init_event_handler(void); diff --git a/src/netlink.c b/src/netlink.c index c0a0805..89a4ebc 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -60,14 +60,14 @@ static int sanity_check(struct nf_conntrack *ct) return 1; } -int ignore_conntrack(struct nf_conntrack *ct) +/* we do user-space filtering for dump and resyncs */ +int ignore_conntrack(struct nf_conntrack *ct, int userspace) { /* missing mandatory attributes in object */ if (!sanity_check(ct)) return 1; - /* Ignore traffic */ - if (!ct_filter_check(STATE(us_filter), ct)) { + if (userspace && !ct_filter_check(STATE(us_filter), ct)) { debug_ct(ct, "ignore traffic"); return 1; } @@ -79,7 +79,8 @@ static int event_handler(enum nf_conntrack_msg_type type, struct nf_conntrack *ct, void *data) { - if (ignore_conntrack(ct)) + /* skip user-space filtering if already do it in the kernel */ + if (ignore_conntrack(ct, !CONFIG(kernel_support_netlink_bsf))) return NFCT_CB_STOP; switch(type) { @@ -155,7 +156,7 @@ static int dump_handler(enum nf_conntrack_msg_type type, struct nf_conntrack *ct, void *data) { - if (ignore_conntrack(ct)) + if (ignore_conntrack(ct, 1)) return NFCT_CB_CONTINUE; switch(type) { diff --git a/src/stats-mode.c b/src/stats-mode.c index 1650d5d..763afe0 100644 --- a/src/stats-mode.c +++ b/src/stats-mode.c @@ -104,7 +104,7 @@ static int overrun_stats(enum nf_conntrack_msg_type type, struct nf_conntrack *ct, void *data) { - if (ignore_conntrack(ct)) + if (ignore_conntrack(ct, 1)) return NFCT_CB_CONTINUE; /* This is required by kernels < 2.6.20 */ diff --git a/src/sync-mode.c b/src/sync-mode.c index db199bc..4c22745 100644 --- a/src/sync-mode.c +++ b/src/sync-mode.c @@ -432,7 +432,7 @@ static int overrun_sync(enum nf_conntrack_msg_type type, { struct us_conntrack *u; - if (ignore_conntrack(ct)) + if (ignore_conntrack(ct, 1)) return NFCT_CB_CONTINUE; /* This is required by kernels < 2.6.20 */ -- cgit v1.2.3 From 9aba3974d60bfbc773ac366ad6b8859a5c000377 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 27 Nov 2008 23:40:13 +0100 Subject: src: move callbacks to run.c for better readability This patch is a cleanup. It moves the callbacks from netlink.c to run.c where they are actually invoked. This is better for code readability as I usually have to switch from run.c to netlink.c to remember what the callbacks actually do. Signed-off-by: Pablo Neira Ayuso --- include/filter.h | 2 +- include/netlink.h | 10 ++-- src/filter.c | 49 +++++++++++++++- src/netlink.c | 166 +++++++++++------------------------------------------- src/run.c | 72 +++++++++++++++++++++-- src/stats-mode.c | 2 +- src/sync-mode.c | 2 +- 7 files changed, 154 insertions(+), 149 deletions(-) (limited to 'include/netlink.h') diff --git a/include/filter.h b/include/filter.h index 567be34..9c2cf66 100644 --- a/include/filter.h +++ b/include/filter.h @@ -39,6 +39,6 @@ void ct_filter_add_state(struct ct_filter *f, int protonum, int state); void ct_filter_set_logic(struct ct_filter *f, enum ct_filter_type type, enum ct_filter_logic logic); -int ct_filter_check(struct ct_filter *filter, struct nf_conntrack *ct); +int ct_filter_conntrack(struct nf_conntrack *ct, int userspace); #endif diff --git a/include/netlink.h b/include/netlink.h index d13d33d..52482c1 100644 --- a/include/netlink.h +++ b/include/netlink.h @@ -6,15 +6,13 @@ struct nf_conntrack; struct nfct_handle; -int ignore_conntrack(struct nf_conntrack *ct, int userspace); +struct nfct_handle *nl_init_event_handler(void); -int nl_init_event_handler(void); +struct nfct_handle *nl_init_dump_handler(void); -int nl_init_dump_handler(void); +struct nfct_handle *nl_init_request_handler(void); -int nl_init_request_handler(void); - -int nl_init_overrun_handler(void); +struct nfct_handle *nl_init_overrun_handler(void); int nl_overrun_request_resync(void); diff --git a/src/filter.c b/src/filter.c index 905d10f..5a8b5d8 100644 --- a/src/filter.c +++ b/src/filter.c @@ -279,7 +279,7 @@ static int __ct_filter_test_state(struct ct_filter *f, struct nf_conntrack *ct) return test_bit_u16(val, &f->statemap[protonum]); } -int ct_filter_check(struct ct_filter *f, struct nf_conntrack *ct) +static int ct_filter_check(struct ct_filter *f, struct nf_conntrack *ct) { int ret, protonum = nfct_get_attr_u8(ct, ATTR_L4PROTO); @@ -324,3 +324,50 @@ int ct_filter_check(struct ct_filter *f, struct nf_conntrack *ct) return 1; } + +static inline int ct_filter_sanity_check(struct nf_conntrack *ct) +{ + if (!nfct_attr_is_set(ct, ATTR_L3PROTO)) { + dlog(LOG_ERR, "missing layer 3 protocol"); + return 0; + } + + switch(nfct_get_attr_u8(ct, ATTR_L3PROTO)) { + case AF_INET: + if (!nfct_attr_is_set(ct, ATTR_IPV4_SRC) || + !nfct_attr_is_set(ct, ATTR_IPV4_DST) || + !nfct_attr_is_set(ct, ATTR_REPL_IPV4_SRC) || + !nfct_attr_is_set(ct, ATTR_REPL_IPV4_DST)) { + dlog(LOG_ERR, "missing IPv4 address. " + "You forgot to load " + "nf_conntrack_ipv4?"); + return 0; + } + break; + case AF_INET6: + if (!nfct_attr_is_set(ct, ATTR_IPV6_SRC) || + !nfct_attr_is_set(ct, ATTR_IPV6_DST) || + !nfct_attr_is_set(ct, ATTR_REPL_IPV6_SRC) || + !nfct_attr_is_set(ct, ATTR_REPL_IPV6_DST)) { + dlog(LOG_ERR, "missing IPv6 address. " + "You forgot to load " + "nf_conntrack_ipv6?"); + return 0; + } + break; + } + return 1; +} + +/* we do user-space filtering for dump and resyncs */ +int ct_filter_conntrack(struct nf_conntrack *ct, int userspace) +{ + /* missing mandatory attributes in object */ + if (!ct_filter_sanity_check(ct)) + return 1; + + if (userspace && !ct_filter_check(STATE(us_filter), ct)) + return 1; + + return 0; +} diff --git a/src/netlink.c b/src/netlink.c index b8a2a02..81ac7a1 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -18,103 +18,27 @@ #include "netlink.h" #include "conntrackd.h" -#include "traffic_stats.h" #include "filter.h" #include "log.h" #include "debug.h" #include #include - -static int sanity_check(struct nf_conntrack *ct) -{ - if (!nfct_attr_is_set(ct, ATTR_L3PROTO)) { - dlog(LOG_ERR, "missing layer 3 protocol"); - return 0; - } - - switch(nfct_get_attr_u8(ct, ATTR_L3PROTO)) { - case AF_INET: - if (!nfct_attr_is_set(ct, ATTR_IPV4_SRC) || - !nfct_attr_is_set(ct, ATTR_IPV4_DST) || - !nfct_attr_is_set(ct, ATTR_REPL_IPV4_SRC) || - !nfct_attr_is_set(ct, ATTR_REPL_IPV4_DST)) { - dlog(LOG_ERR, "missing IPv4 address. " - "You forgot to load " - "nf_conntrack_ipv4?"); - return 0; - } - break; - case AF_INET6: - if (!nfct_attr_is_set(ct, ATTR_IPV6_SRC) || - !nfct_attr_is_set(ct, ATTR_IPV6_DST) || - !nfct_attr_is_set(ct, ATTR_REPL_IPV6_SRC) || - !nfct_attr_is_set(ct, ATTR_REPL_IPV6_DST)) { - dlog(LOG_ERR, "missing IPv6 address. " - "You forgot to load " - "nf_conntrack_ipv6?"); - return 0; - } - break; - } - return 1; -} - -/* we do user-space filtering for dump and resyncs */ -int ignore_conntrack(struct nf_conntrack *ct, int userspace) -{ - /* missing mandatory attributes in object */ - if (!sanity_check(ct)) - return 1; - - if (userspace && !ct_filter_check(STATE(us_filter), ct)) { - debug_ct(ct, "ignore traffic"); - return 1; - } - - return 0; -} - -static int event_handler(enum nf_conntrack_msg_type type, - struct nf_conntrack *ct, - void *data) -{ - /* skip user-space filtering if already do it in the kernel */ - if (ignore_conntrack(ct, !CONFIG(filter_from_kernelspace))) - return NFCT_CB_STOP; - - switch(type) { - case NFCT_T_NEW: - STATE(mode)->event_new(ct); - break; - case NFCT_T_UPDATE: - STATE(mode)->event_upd(ct); - break; - case NFCT_T_DESTROY: - if (STATE(mode)->event_dst(ct)) - update_traffic_stats(ct); - break; - default: - dlog(LOG_WARNING, "unknown msg from ctnetlink\n"); - break; - } - - return NFCT_CB_CONTINUE; -} - #include #include #include -int nl_init_event_handler(void) +struct nfct_handle *nl_init_event_handler(void) { - STATE(event) = nfct_open(CONNTRACK, NFCT_ALL_CT_GROUPS); - if (!STATE(event)) - return -1; + struct nfct_handle *h; + + h = nfct_open(CONNTRACK, NFCT_ALL_CT_GROUPS); + if (h == NULL) + return NULL; if (STATE(filter)) { if (CONFIG(filter_from_kernelspace)) { - if (nfct_filter_attach(nfct_fd(STATE(event)), + if (nfct_filter_attach(nfct_fd(h), STATE(filter)) == -1) { dlog(LOG_ERR, "cannot set event filtering: %s", strerror(errno)); @@ -126,18 +50,18 @@ int nl_init_event_handler(void) nfct_filter_destroy(STATE(filter)); } - fcntl(nfct_fd(STATE(event)), F_SETFL, O_NONBLOCK); + fcntl(nfct_fd(h), F_SETFL, O_NONBLOCK); /* set up socket buffer size */ if (CONFIG(netlink_buffer_size)) - nfnl_rcvbufsiz(nfct_nfnlh(STATE(event)), + nfnl_rcvbufsiz(nfct_nfnlh(h), CONFIG(netlink_buffer_size)); else { socklen_t socklen = sizeof(unsigned int); unsigned int read_size; /* get current buffer size */ - getsockopt(nfct_fd(STATE(event)), SOL_SOCKET, + getsockopt(nfct_fd(h), SOL_SOCKET, SO_RCVBUF, &read_size, &socklen); CONFIG(netlink_buffer_size) = read_size; @@ -148,69 +72,43 @@ int nl_init_event_handler(void) CONFIG(netlink_buffer_size_max_grown) = CONFIG(netlink_buffer_size); - /* register callback for events */ - nfct_callback_register(STATE(event), NFCT_T_ALL, event_handler, NULL); - - return 0; + return h; } -static int dump_handler(enum nf_conntrack_msg_type type, - struct nf_conntrack *ct, - void *data) +struct nfct_handle *nl_init_dump_handler(void) { - if (ignore_conntrack(ct, 1)) - return NFCT_CB_CONTINUE; - - switch(type) { - case NFCT_T_UPDATE: - STATE(mode)->dump(ct); - break; - default: - dlog(LOG_WARNING, "unknown msg from ctnetlink"); - break; - } - return NFCT_CB_CONTINUE; -} + struct nfct_handle *h; -int nl_init_dump_handler(void) -{ /* open dump netlink socket */ - STATE(dump) = nfct_open(CONNTRACK, 0); - if (!STATE(dump)) - return -1; - - /* register callback for dumped entries */ - nfct_callback_register(STATE(dump), NFCT_T_ALL, dump_handler, NULL); + h = nfct_open(CONNTRACK, 0); + if (h == NULL) + return NULL; - if (nl_dump_conntrack_table() == -1) - return -1; - - return 0; + return h; } -int nl_init_overrun_handler(void) +struct nfct_handle *nl_init_overrun_handler(void) { - STATE(overrun) = nfct_open(CONNTRACK, 0); - if (!STATE(overrun)) - return -1; + struct nfct_handle *h; + + h = nfct_open(CONNTRACK, 0); + if (h == NULL) + return NULL; - fcntl(nfct_fd(STATE(overrun)), F_SETFL, O_NONBLOCK); + fcntl(nfct_fd(h), F_SETFL, O_NONBLOCK); - nfct_callback_register(STATE(overrun), - NFCT_T_ALL, - STATE(mode)->overrun, - NULL); - return 0; + return h; } -/* no callback, it does not do anything with the output */ -int nl_init_request_handler(void) +struct nfct_handle *nl_init_request_handler(void) { - STATE(request) = nfct_open(CONNTRACK, 0); - if (!STATE(request)) - return -1; + struct nfct_handle *h; + + h = nfct_open(CONNTRACK, 0); + if (h == NULL) + return NULL; - return 0; + return h; } static int warned = 0; diff --git a/src/run.c b/src/run.c index ec110d7..6515e62 100644 --- a/src/run.c +++ b/src/run.c @@ -24,6 +24,7 @@ #include "log.h" #include "alarm.h" #include "fds.h" +#include "traffic_stats.h" #include #include @@ -100,6 +101,51 @@ static void do_overrun_alarm(struct alarm_block *a, void *data) add_alarm(&STATE(overrun_alarm), 2, 0); } +static int event_handler(enum nf_conntrack_msg_type type, + struct nf_conntrack *ct, + void *data) +{ + /* skip user-space filtering if already do it in the kernel */ + if (ct_filter_conntrack(ct, !CONFIG(filter_from_kernelspace))) + return NFCT_CB_STOP; + + switch(type) { + case NFCT_T_NEW: + STATE(mode)->event_new(ct); + break; + case NFCT_T_UPDATE: + STATE(mode)->event_upd(ct); + break; + case NFCT_T_DESTROY: + if (STATE(mode)->event_dst(ct)) + update_traffic_stats(ct); + break; + default: + dlog(LOG_WARNING, "unknown msg from ctnetlink\n"); + break; + } + + return NFCT_CB_CONTINUE; +} + +static int dump_handler(enum nf_conntrack_msg_type type, + struct nf_conntrack *ct, + void *data) +{ + if (ct_filter_conntrack(ct, 1)) + return NFCT_CB_CONTINUE; + + switch(type) { + case NFCT_T_UPDATE: + STATE(mode)->dump(ct); + break; + default: + dlog(LOG_WARNING, "unknown msg from ctnetlink"); + break; + } + return NFCT_CB_CONTINUE; +} + int init(void) { @@ -126,28 +172,44 @@ init(void) return -1; } - if (nl_init_event_handler() == -1) { + STATE(event) = nl_init_event_handler(); + if (STATE(event) == NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); dlog(LOG_ERR, "no ctnetlink kernel support?"); return -1; } + nfct_callback_register(STATE(event), NFCT_T_ALL, event_handler, NULL); - if (nl_init_dump_handler() == -1) { + STATE(dump) = nl_init_dump_handler(); + if (STATE(dump) == NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); dlog(LOG_ERR, "no ctnetlink kernel support?"); return -1; } + nfct_callback_register(STATE(dump), NFCT_T_ALL, dump_handler, NULL); - if (nl_init_overrun_handler() == -1) { + if (nl_dump_conntrack_table() == -1) { + dlog(LOG_ERR, "can't get kernel conntrack table"); + return -1; + } + + STATE(overrun) = nl_init_overrun_handler(); + if (STATE(overrun)== NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); dlog(LOG_ERR, "no ctnetlink kernel support?"); return -1; } - - if (nl_init_request_handler() == -1) { + nfct_callback_register(STATE(overrun), + NFCT_T_ALL, + STATE(mode)->overrun, + NULL); + + /* no callback, it does not do anything with the output */ + STATE(request) = nl_init_request_handler(); + if (STATE(request) == NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); dlog(LOG_ERR, "no ctnetlink kernel support?"); diff --git a/src/stats-mode.c b/src/stats-mode.c index 763afe0..ad28008 100644 --- a/src/stats-mode.c +++ b/src/stats-mode.c @@ -104,7 +104,7 @@ static int overrun_stats(enum nf_conntrack_msg_type type, struct nf_conntrack *ct, void *data) { - if (ignore_conntrack(ct, 1)) + if (ct_filter_conntrack(ct, 1)) return NFCT_CB_CONTINUE; /* This is required by kernels < 2.6.20 */ diff --git a/src/sync-mode.c b/src/sync-mode.c index 152a8e2..e613111 100644 --- a/src/sync-mode.c +++ b/src/sync-mode.c @@ -438,7 +438,7 @@ static int overrun_sync(enum nf_conntrack_msg_type type, { struct us_conntrack *u; - if (ignore_conntrack(ct, 1)) + if (ct_filter_conntrack(ct, 1)) return NFCT_CB_CONTINUE; /* This is required by kernels < 2.6.20 */ -- cgit v1.2.3 From 65ad316d921930c9d5c1c8640fbf2f05ecd0ca49 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 6 Dec 2008 21:54:43 +0100 Subject: netlink: clone conntrack object while creation/update This patch changes the behaviour of nl_create_conntrack() and nl_update_conntrack() which now clone the conntrack object received as parameter. This was not required as these functions were called inside fork(), thus, they modified a copy of the real conntrack objects in the child process. However, this behaviour is broken following the try-again logic in __do_commit_step. For example, if we try to update an expected conntrack object that has vanished for whatever reason, since nl_update_conntrack() modifies the object (unset the master conntrack information), nl_create_conntrak() will create an entry without the master conntrack information. Signed-off-by: Pablo Neira Ayuso --- include/netlink.h | 4 ++-- src/netlink.c | 28 ++++++++++++++++++++++------ 2 files changed, 24 insertions(+), 8 deletions(-) (limited to 'include/netlink.h') diff --git a/include/netlink.h b/include/netlink.h index 52482c1..7e2b94c 100644 --- a/include/netlink.h +++ b/include/netlink.h @@ -24,9 +24,9 @@ int nl_exist_conntrack(struct nf_conntrack *ct); int nl_get_conntrack(struct nf_conntrack *ct); -int nl_create_conntrack(struct nf_conntrack *ct); +int nl_create_conntrack(const struct nf_conntrack *ct); -int nl_update_conntrack(struct nf_conntrack *ct); +int nl_update_conntrack(const struct nf_conntrack *ct); int nl_destroy_conntrack(struct nf_conntrack *ct); diff --git a/src/netlink.c b/src/netlink.c index 81ac7a1..1a86a21 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -183,10 +183,15 @@ int nl_get_conntrack(struct nf_conntrack *ct) return __nl_get_conntrack(STATE(dump), ct); } -/* This function modifies the conntrack passed as argument! */ -int nl_create_conntrack(struct nf_conntrack *ct) +int nl_create_conntrack(const struct nf_conntrack *orig) { + int ret; uint8_t flags; + struct nf_conntrack *ct; + + ct = nfct_clone(orig); + if (ct == NULL) + return -1; /* we hit error if we try to change the expected bit */ if (nfct_attr_is_set(ct, ATTR_STATUS)) { @@ -206,13 +211,21 @@ int nl_create_conntrack(struct nf_conntrack *ct) nfct_set_attr_u8(ct, ATTR_TCP_FLAGS_REPL, flags); nfct_set_attr_u8(ct, ATTR_TCP_MASK_REPL, flags); - return nfct_query(STATE(dump), NFCT_Q_CREATE_UPDATE, ct); + ret = nfct_query(STATE(dump), NFCT_Q_CREATE_UPDATE, ct); + nfct_destroy(ct); + + return ret; } -/* This function modifies the conntrack passed as argument! */ -int nl_update_conntrack(struct nf_conntrack *ct) +int nl_update_conntrack(const struct nf_conntrack *orig) { + int ret; uint8_t flags; + struct nf_conntrack *ct; + + ct = nfct_clone(orig); + if (ct == NULL) + return -1; /* unset NAT info, otherwise we hit error */ nfct_attr_unset(ct, ATTR_SNAT_IPV4); @@ -249,7 +262,10 @@ int nl_update_conntrack(struct nf_conntrack *ct) nfct_set_attr_u8(ct, ATTR_TCP_FLAGS_REPL, flags); nfct_set_attr_u8(ct, ATTR_TCP_MASK_REPL, flags); - return nfct_query(STATE(dump), NFCT_Q_CREATE_UPDATE, ct); + ret = nfct_query(STATE(dump), NFCT_Q_CREATE_UPDATE, ct); + nfct_destroy(ct); + + return ret; } int nl_destroy_conntrack(struct nf_conntrack *ct) -- cgit v1.2.3 From 27ee6a0f1255cb6c7dadc55caf3928fd62354314 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 7 Dec 2008 12:03:42 +0100 Subject: netlink: constify conntrack object parameter of nl_*_conntrack() This patch constifies the first parameter, which is a conntrack object, in all nl_*_conntrack() functions. Signed-off-by: Pablo Neira Ayuso --- include/netlink.h | 6 +++--- src/netlink.c | 9 +++++---- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include/netlink.h') diff --git a/include/netlink.h b/include/netlink.h index 7e2b94c..af98c5e 100644 --- a/include/netlink.h +++ b/include/netlink.h @@ -20,15 +20,15 @@ void nl_resize_socket_buffer(struct nfct_handle *h); int nl_dump_conntrack_table(void); -int nl_exist_conntrack(struct nf_conntrack *ct); +int nl_exist_conntrack(const struct nf_conntrack *ct); -int nl_get_conntrack(struct nf_conntrack *ct); +int nl_get_conntrack(const struct nf_conntrack *ct); int nl_create_conntrack(const struct nf_conntrack *ct); int nl_update_conntrack(const struct nf_conntrack *ct); -int nl_destroy_conntrack(struct nf_conntrack *ct); +int nl_destroy_conntrack(const struct nf_conntrack *ct); static inline int ct_is_related(const struct nf_conntrack *ct) { diff --git a/src/netlink.c b/src/netlink.c index 5929232..89c85d7 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -154,7 +154,8 @@ int nl_overrun_request_resync(void) return nfct_send(STATE(overrun), NFCT_Q_DUMP, &family); } -static int __nl_get_conntrack(struct nfct_handle *h, struct nf_conntrack *ct) +static int +__nl_get_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct) { int ret; char __tmp[nfct_maxsize()]; @@ -172,13 +173,13 @@ static int __nl_get_conntrack(struct nfct_handle *h, struct nf_conntrack *ct) return 1; } -int nl_exist_conntrack(struct nf_conntrack *ct) +int nl_exist_conntrack(const struct nf_conntrack *ct) { return __nl_get_conntrack(STATE(request), ct); } /* get the conntrack and update the cache */ -int nl_get_conntrack(struct nf_conntrack *ct) +int nl_get_conntrack(const struct nf_conntrack *ct) { return __nl_get_conntrack(STATE(dump), ct); } @@ -268,7 +269,7 @@ int nl_update_conntrack(const struct nf_conntrack *orig) return ret; } -int nl_destroy_conntrack(struct nf_conntrack *ct) +int nl_destroy_conntrack(const struct nf_conntrack *ct) { return nfct_query(STATE(dump), NFCT_Q_DESTROY, ct); } -- cgit v1.2.3 From 528b304b587dc5ad5b147d53eeca60cb9df8c087 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 7 Dec 2008 12:03:54 +0100 Subject: netlink: remove unnecessary whitespace lines in netlink.h Signed-off-by: Pablo Neira Ayuso --- include/netlink.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/netlink.h') diff --git a/include/netlink.h b/include/netlink.h index af98c5e..52d2480 100644 --- a/include/netlink.h +++ b/include/netlink.h @@ -7,27 +7,17 @@ struct nf_conntrack; struct nfct_handle; struct nfct_handle *nl_init_event_handler(void); - struct nfct_handle *nl_init_dump_handler(void); - struct nfct_handle *nl_init_request_handler(void); - struct nfct_handle *nl_init_overrun_handler(void); int nl_overrun_request_resync(void); - void nl_resize_socket_buffer(struct nfct_handle *h); - int nl_dump_conntrack_table(void); - int nl_exist_conntrack(const struct nf_conntrack *ct); - int nl_get_conntrack(const struct nf_conntrack *ct); - int nl_create_conntrack(const struct nf_conntrack *ct); - int nl_update_conntrack(const struct nf_conntrack *ct); - int nl_destroy_conntrack(const struct nf_conntrack *ct); static inline int ct_is_related(const struct nf_conntrack *ct) -- cgit v1.2.3 From dd93edbbd09af4523dfe0f0c3c92f510daf223e8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 9 Dec 2008 00:02:44 +0100 Subject: src: recover conntrackd -F operation This patch recovers the option -F for conntrackd. This will be particularly useful to flush the kernel conntrack table without getting the event notification of the conntrack deletions (that will happen with Linux kernel >= 2.6.29). Signed-off-by: Pablo Neira Ayuso --- conntrackd.8 | 4 ++++ include/netlink.h | 1 + src/netlink.c | 5 +++++ src/run.c | 8 ++------ 4 files changed, 12 insertions(+), 6 deletions(-) (limited to 'include/netlink.h') diff --git a/conntrackd.8 b/conntrackd.8 index 769a0f1..2d7b228 100644 --- a/conntrackd.8 +++ b/conntrackd.8 @@ -37,6 +37,10 @@ with "-i" and "-e" parameters. .BI "-f " Flush the internal and the external cache .TP +.BI "-F " +Flush the kernel conntrack table (if you use a Linux kernel >= 2.6.29, this +option will not flush your internal and external cache). +.TP .BI "-k " Kill the daemon .TP diff --git a/include/netlink.h b/include/netlink.h index 52d2480..b44ef21 100644 --- a/include/netlink.h +++ b/include/netlink.h @@ -14,6 +14,7 @@ struct nfct_handle *nl_init_overrun_handler(void); int nl_overrun_request_resync(void); void nl_resize_socket_buffer(struct nfct_handle *h); int nl_dump_conntrack_table(void); +int nl_flush_conntrack_table(void); int nl_exist_conntrack(const struct nf_conntrack *ct); int nl_get_conntrack(const struct nf_conntrack *ct); int nl_create_conntrack(const struct nf_conntrack *ct); diff --git a/src/netlink.c b/src/netlink.c index 31cee61..9d155aa 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -148,6 +148,11 @@ int nl_dump_conntrack_table(void) return nfct_query(STATE(dump), NFCT_Q_DUMP, &CONFIG(family)); } +int nl_flush_conntrack_table(void) +{ + return nfct_query(STATE(request), NFCT_Q_FLUSH, &CONFIG(family)); +} + int nl_overrun_request_resync(void) { int family = CONFIG(family); diff --git a/src/run.c b/src/run.c index 6515e62..4bd0e5b 100644 --- a/src/run.c +++ b/src/run.c @@ -78,12 +78,8 @@ void local_handler(int fd, void *data) switch(type) { case FLUSH_MASTER: - dlog(LOG_WARNING, "`conntrackd -F' is deprecated. " - "Use conntrack -F instead."); - if (fork() == 0) { - execlp("conntrack", "conntrack", "-F", NULL); - exit(EXIT_SUCCESS); - } + dlog(LOG_NOTICE, "flushing kernel conntrack table"); + nl_flush_conntrack_table(); return; case RESYNC_MASTER: dlog(LOG_NOTICE, "resync with master table"); -- cgit v1.2.3 From 98154b7d83d1493ba9c2d1b0a8e4b39b635e3082 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 11 Dec 2008 18:35:03 +0100 Subject: netlink: fix EILSEQ error messages due to process race condition This patch fixes a race condition that triggers EILSEQ errors (wrong sequence message). The problems is triggered when the child process resets the timers at the same time that the parent process requests a resync. Since both the child and the parent process use the same descriptors, the sequence tracking code in libnfnetlink gets confused as it considers that it is receiving out of sequence netlink messages. This patch introduces internal handlers to commit and reset timers so that the parent and the child do not use the same descriptors to operate with the kernel. This patch changes the prototype of all nf_*_conntrack() functions. Now, the nfct handler is passed as first parameter, this change is required to fix this problem. The rest of the changes on the API is done for consistency. Signed-off-by: Pablo Neira Ayuso --- include/netlink.h | 16 +++++++-------- src/cache_iterators.c | 56 ++++++++++++++++++++++++++++++++++++--------------- src/cache_wt.c | 10 ++++----- src/netlink.c | 32 ++++++++++++++--------------- src/run.c | 10 ++++----- 5 files changed, 74 insertions(+), 50 deletions(-) (limited to 'include/netlink.h') diff --git a/include/netlink.h b/include/netlink.h index b44ef21..5feb3e9 100644 --- a/include/netlink.h +++ b/include/netlink.h @@ -11,15 +11,15 @@ struct nfct_handle *nl_init_dump_handler(void); struct nfct_handle *nl_init_request_handler(void); struct nfct_handle *nl_init_overrun_handler(void); -int nl_overrun_request_resync(void); +int nl_overrun_request_resync(struct nfct_handle *h); void nl_resize_socket_buffer(struct nfct_handle *h); -int nl_dump_conntrack_table(void); -int nl_flush_conntrack_table(void); -int nl_exist_conntrack(const struct nf_conntrack *ct); -int nl_get_conntrack(const struct nf_conntrack *ct); -int nl_create_conntrack(const struct nf_conntrack *ct); -int nl_update_conntrack(const struct nf_conntrack *ct); -int nl_destroy_conntrack(const struct nf_conntrack *ct); +int nl_dump_conntrack_table(struct nfct_handle *h); +int nl_flush_conntrack_table(struct nfct_handle *h); +int nl_exist_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct); +int nl_get_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct); +int nl_create_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct); +int nl_update_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct); +int nl_destroy_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct); static inline int ct_is_related(const struct nf_conntrack *ct) { diff --git a/src/cache_iterators.c b/src/cache_iterators.c index fd7aed6..661528f 100644 --- a/src/cache_iterators.c +++ b/src/cache_iterators.c @@ -95,7 +95,13 @@ void cache_dump(struct cache *c, int fd, int type) hashtable_iterate(c->h, (void *) &tmp, do_dump); } -static void __do_commit_step(struct cache *c, struct us_conntrack *u) +struct __commit_container { + struct nfct_handle *h; + struct cache *c; +}; + +static void +__do_commit_step(struct __commit_container *tmp, struct us_conntrack *u) { int ret, retry = 1; struct nf_conntrack *ct = u->ct; @@ -107,14 +113,14 @@ static void __do_commit_step(struct cache *c, struct us_conntrack *u) nfct_set_attr_u32(ct, ATTR_TIMEOUT, CONFIG(commit_timeout)); try_again: - ret = nl_exist_conntrack(ct); + ret = nl_exist_conntrack(tmp->h, ct); switch (ret) { case -1: dlog(LOG_ERR, "commit-exist: %s", strerror(errno)); dlog_ct(STATE(log), ct, NFCT_O_PLAIN); break; case 0: - if (nl_create_conntrack(ct) == -1) { + if (nl_create_conntrack(tmp->h, ct) == -1) { if (errno == ENOMEM) { if (retry) { retry = 0; @@ -124,13 +130,13 @@ try_again: } dlog(LOG_ERR, "commit-create: %s", strerror(errno)); dlog_ct(STATE(log), ct, NFCT_O_PLAIN); - c->commit_fail++; + tmp->c->commit_fail++; } else - c->commit_ok++; + tmp->c->commit_ok++; break; case 1: - c->commit_exist++; - if (nl_update_conntrack(ct) == -1) { + tmp->c->commit_exist++; + if (nl_update_conntrack(tmp->h, ct) == -1) { if (errno == ENOMEM || errno == ETIME) { if (retry) { retry = 0; @@ -140,7 +146,7 @@ try_again: } /* try harder, delete the entry and retry */ if (retry) { - ret = nl_destroy_conntrack(ct); + ret = nl_destroy_conntrack(tmp->h, ct); if (ret == 0 || (ret == -1 && errno == ENOENT)) { retry = 0; @@ -148,14 +154,14 @@ try_again: } dlog(LOG_ERR, "commit-rm: %s", strerror(errno)); dlog_ct(STATE(log), ct, NFCT_O_PLAIN); - c->commit_fail++; + tmp->c->commit_fail++; break; } dlog(LOG_ERR, "commit-update: %s", strerror(errno)); dlog_ct(STATE(log), ct, NFCT_O_PLAIN); - c->commit_fail++; + tmp->c->commit_fail++; } else - c->commit_ok++; + tmp->c->commit_ok++; break; } } @@ -188,10 +194,18 @@ void cache_commit(struct cache *c) unsigned int commit_ok = c->commit_ok; unsigned int commit_exist = c->commit_exist; unsigned int commit_fail = c->commit_fail; + struct __commit_container tmp; + + tmp.h = nfct_open(CONNTRACK, 0); + if (tmp.h == NULL) { + dlog(LOG_ERR, "can't create handler to commit entries"); + return; + } + tmp.c = c; /* commit master conntrack first, then related ones */ - hashtable_iterate(c->h, c, do_commit_master); - hashtable_iterate(c->h, c, do_commit_related); + hashtable_iterate(c->h, &tmp, do_commit_master); + hashtable_iterate(c->h, &tmp, do_commit_related); /* calculate new entries committed */ commit_ok = c->commit_ok - commit_ok; @@ -207,16 +221,18 @@ void cache_commit(struct cache *c) if (commit_fail) dlog(LOG_NOTICE, "%u entries can't be " "committed", commit_fail); + nfct_close(tmp.h); } static int do_reset_timers(void *data1, void *data2) { int ret; u_int32_t current_timeout; + struct nfct_handle *h = data1; struct us_conntrack *u = data2; struct nf_conntrack *ct = u->ct; - ret = nl_get_conntrack(ct); + ret = nl_get_conntrack(h, ct); switch (ret) { case -1: /* the kernel table is not in sync with internal cache */ @@ -231,7 +247,7 @@ static int do_reset_timers(void *data1, void *data2) nfct_set_attr_u32(ct, ATTR_TIMEOUT, CONFIG(purge_timeout)); - if (nl_update_conntrack(ct) == -1) { + if (nl_update_conntrack(h, ct) == -1) { if (errno == ETIME || errno == ENOENT) break; dlog(LOG_ERR, "reset-timers-upd: %s", strerror(errno)); @@ -244,7 +260,15 @@ static int do_reset_timers(void *data1, void *data2) void cache_reset_timers(struct cache *c) { - hashtable_iterate(c->h, NULL, do_reset_timers); + struct nfct_handle *h; + + h = nfct_open(CONNTRACK, 0); + if (h == NULL) { + dlog(LOG_ERR, "can't create handler to reset timers"); + return; + } + hashtable_iterate(c->h, h, do_reset_timers); + nfct_close(h); } static int do_flush(void *data1, void *data2) diff --git a/src/cache_wt.c b/src/cache_wt.c index 65a1fc4..d0ae8bb 100644 --- a/src/cache_wt.c +++ b/src/cache_wt.c @@ -31,7 +31,7 @@ static void add_wt(struct us_conntrack *u) char __ct[nfct_maxsize()]; struct nf_conntrack *ct = (struct nf_conntrack *)(void*) __ct; - ret = nl_exist_conntrack(u->ct); + ret = nl_exist_conntrack(STATE(request), u->ct); switch (ret) { case -1: dlog(LOG_ERR, "cache_wt problem: %s", strerror(errno)); @@ -39,14 +39,14 @@ static void add_wt(struct us_conntrack *u) break; case 0: memcpy(ct, u->ct, nfct_maxsize()); - if (nl_create_conntrack(ct) == -1) { + if (nl_create_conntrack(STATE(dump), ct) == -1) { dlog(LOG_ERR, "cache_wt create: %s", strerror(errno)); dlog_ct(STATE(log), u->ct, NFCT_O_PLAIN); } break; case 1: memcpy(ct, u->ct, nfct_maxsize()); - if (nl_update_conntrack(ct) == -1) { + if (nl_update_conntrack(STATE(dump), ct) == -1) { dlog(LOG_ERR, "cache_wt crt-upd: %s", strerror(errno)); dlog_ct(STATE(log), u->ct, NFCT_O_PLAIN); } @@ -61,7 +61,7 @@ static void upd_wt(struct us_conntrack *u) memcpy(ct, u->ct, nfct_maxsize()); - if (nl_update_conntrack(ct) == -1) { + if (nl_update_conntrack(STATE(dump), ct) == -1) { dlog(LOG_ERR, "cache_wt update:%s", strerror(errno)); dlog_ct(STATE(log), u->ct, NFCT_O_PLAIN); } @@ -79,7 +79,7 @@ static void writethrough_update(struct us_conntrack *u, void *data) static void writethrough_destroy(struct us_conntrack *u, void *data) { - nl_destroy_conntrack(u->ct); + nl_destroy_conntrack(STATE(dump), u->ct); } struct cache_feature writethrough_feature = { diff --git a/src/netlink.c b/src/netlink.c index 9d155aa..29281f4 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -143,20 +143,20 @@ void nl_resize_socket_buffer(struct nfct_handle *h) CONFIG(netlink_buffer_size)); } -int nl_dump_conntrack_table(void) +int nl_dump_conntrack_table(struct nfct_handle *h) { - return nfct_query(STATE(dump), NFCT_Q_DUMP, &CONFIG(family)); + return nfct_query(h, NFCT_Q_DUMP, &CONFIG(family)); } -int nl_flush_conntrack_table(void) +int nl_flush_conntrack_table(struct nfct_handle *h) { - return nfct_query(STATE(request), NFCT_Q_FLUSH, &CONFIG(family)); + return nfct_query(h, NFCT_Q_FLUSH, &CONFIG(family)); } -int nl_overrun_request_resync(void) +int nl_overrun_request_resync(struct nfct_handle *h) { int family = CONFIG(family); - return nfct_send(STATE(overrun), NFCT_Q_DUMP, &family); + return nfct_send(h, NFCT_Q_DUMP, &family); } static int @@ -178,18 +178,18 @@ __nl_get_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct) return 1; } -int nl_exist_conntrack(const struct nf_conntrack *ct) +int nl_exist_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct) { - return __nl_get_conntrack(STATE(request), ct); + return __nl_get_conntrack(h, ct); } /* get the conntrack and update the cache */ -int nl_get_conntrack(const struct nf_conntrack *ct) +int nl_get_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct) { - return __nl_get_conntrack(STATE(dump), ct); + return __nl_get_conntrack(h, ct); } -int nl_create_conntrack(const struct nf_conntrack *orig) +int nl_create_conntrack(struct nfct_handle *h, const struct nf_conntrack *orig) { int ret; uint8_t flags; @@ -217,13 +217,13 @@ int nl_create_conntrack(const struct nf_conntrack *orig) nfct_set_attr_u8(ct, ATTR_TCP_FLAGS_REPL, flags); nfct_set_attr_u8(ct, ATTR_TCP_MASK_REPL, flags); - ret = nfct_query(STATE(dump), NFCT_Q_CREATE, ct); + ret = nfct_query(h, NFCT_Q_CREATE, ct); nfct_destroy(ct); return ret; } -int nl_update_conntrack(const struct nf_conntrack *orig) +int nl_update_conntrack(struct nfct_handle *h, const struct nf_conntrack *orig) { int ret; uint8_t flags; @@ -271,13 +271,13 @@ int nl_update_conntrack(const struct nf_conntrack *orig) nfct_set_attr_u8(ct, ATTR_TCP_FLAGS_REPL, flags); nfct_set_attr_u8(ct, ATTR_TCP_MASK_REPL, flags); - ret = nfct_query(STATE(dump), NFCT_Q_UPDATE, ct); + ret = nfct_query(h, NFCT_Q_UPDATE, ct); nfct_destroy(ct); return ret; } -int nl_destroy_conntrack(const struct nf_conntrack *ct) +int nl_destroy_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct) { - return nfct_query(STATE(dump), NFCT_Q_DESTROY, ct); + return nfct_query(h, NFCT_Q_DESTROY, ct); } diff --git a/src/run.c b/src/run.c index 8158f10..ee985f4 100644 --- a/src/run.c +++ b/src/run.c @@ -111,11 +111,11 @@ void local_handler(int fd, void *data) switch(type) { case FLUSH_MASTER: dlog(LOG_NOTICE, "flushing kernel conntrack table"); - nl_flush_conntrack_table(); + nl_flush_conntrack_table(STATE(request)); return; case RESYNC_MASTER: dlog(LOG_NOTICE, "resync with master table"); - nl_dump_conntrack_table(); + nl_dump_conntrack_table(STATE(dump)); return; } @@ -125,7 +125,7 @@ void local_handler(int fd, void *data) static void do_overrun_alarm(struct alarm_block *a, void *data) { - nl_overrun_request_resync(); + nl_overrun_request_resync(STATE(overrun)); add_alarm(&STATE(overrun_alarm), 2, 0); } @@ -218,7 +218,7 @@ init(void) } nfct_callback_register(STATE(dump), NFCT_T_ALL, dump_handler, NULL); - if (nl_dump_conntrack_table() == -1) { + if (nl_dump_conntrack_table(STATE(dump)) == -1) { dlog(LOG_ERR, "can't get kernel conntrack table"); return -1; } @@ -321,7 +321,7 @@ static void __run(struct timeval *next_alarm) * size and resync with master conntrack table. */ nl_resize_socket_buffer(STATE(event)); - nl_overrun_request_resync(); + nl_overrun_request_resync(STATE(overrun)); add_alarm(&STATE(overrun_alarm), 2, 0); break; case ENOENT: -- cgit v1.2.3 From 7ae054f8aae252ee9c57e26327675e466fc1d15d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 17 Jan 2009 18:03:52 +0100 Subject: src: support for redundant dedicated links This patch adds support for redundant dedicated links. You can add a pool of dedicated links that can be used if the current active fails. Signed-off-by: Pablo Neira Ayuso --- doc/sync/alarm/conntrackd.conf | 19 ++++ doc/sync/ftfw/conntrackd.conf | 19 ++++ doc/sync/notrack/conntrackd.conf | 19 ++++ include/conntrackd.h | 10 +- include/mcast.h | 28 ++++- include/netlink.h | 1 + include/network.h | 6 +- src/main.c | 6 +- src/mcast.c | 231 ++++++++++++++++++++++++++++++++++++--- src/netlink.c | 16 +++ src/network.c | 9 +- src/read_config_lex.l | 1 + src/read_config_yy.y | 105 ++++++++++++------ src/sync-mode.c | 109 ++++++++++++++---- 14 files changed, 495 insertions(+), 84 deletions(-) (limited to 'include/netlink.h') diff --git a/doc/sync/alarm/conntrackd.conf b/doc/sync/alarm/conntrackd.conf index f16f439..528ff8f 100644 --- a/doc/sync/alarm/conntrackd.conf +++ b/doc/sync/alarm/conntrackd.conf @@ -104,6 +104,25 @@ Sync { # Checksum on } + # + # You can specify more than one dedicated link. Thus, if one dedicated + # link fails, conntrackd can fail-over to another. Note that adding + # more than one dedicated link does not mean that state-updates will + # be sent to all of them. There is only one active dedicated link at + # a given moment. The `Default' keyword indicates that this interface + # will be selected as the initial dedicated link. You can have + # up to 4 redundant dedicated links. Note: Use different multicast + # groups for every redundant link. + # + # Multicast Default { + # IPv4_address 225.0.0.51 + # Group 3781 + # IPv4_interface 192.168.100.101 + # Interface eth3 + # # McastSndSocketBuffer 1249280 + # # McastRcvSocketBuffer 1249280 + # Checksum on + # } } # diff --git a/doc/sync/ftfw/conntrackd.conf b/doc/sync/ftfw/conntrackd.conf index d85fc28..2e60f2c 100644 --- a/doc/sync/ftfw/conntrackd.conf +++ b/doc/sync/ftfw/conntrackd.conf @@ -112,6 +112,25 @@ Sync { # Checksum on } + # + # You can specify more than one dedicated link. Thus, if one dedicated + # link fails, conntrackd can fail-over to another. Note that adding + # more than one dedicated link does not mean that state-updates will + # be sent to all of them. There is only one active dedicated link at + # a given moment. The `Default' keyword indicates that this interface + # will be selected as the initial dedicated link. You can have + # up to 4 redundant dedicated links. Note: Use different multicast + # groups for every redundant link. + # + # Multicast Default { + # IPv4_address 225.0.0.51 + # Group 3781 + # IPv4_interface 192.168.100.101 + # Interface eth3 + # # McastSndSocketBuffer 1249280 + # # McastRcvSocketBuffer 1249280 + # Checksum on + # } } # diff --git a/doc/sync/notrack/conntrackd.conf b/doc/sync/notrack/conntrackd.conf index 4d03234..7f8c8a3 100644 --- a/doc/sync/notrack/conntrackd.conf +++ b/doc/sync/notrack/conntrackd.conf @@ -94,6 +94,25 @@ Sync { # Checksum on } + # + # You can specify more than one dedicated link. Thus, if one dedicated + # link fails, conntrackd can fail-over to another. Note that adding + # more than one dedicated link does not mean that state-updates will + # be sent to all of them. There is only one active dedicated link at + # a given moment. The `Default' keyword indicates that this interface + # will be selected as the initial dedicated link. You can have + # up to 4 redundant dedicated links. Note: Use different multicast + # groups for every redundant link. + # + # Multicast Default { + # IPv4_address 225.0.0.51 + # Group 3781 + # IPv4_interface 192.168.100.101 + # Interface eth3 + # # McastSndSocketBuffer 1249280 + # # McastRcvSocketBuffer 1249280 + # Checksum on + # } } # diff --git a/include/conntrackd.h b/include/conntrackd.h index 3637e2c..ab5d825 100644 --- a/include/conntrackd.h +++ b/include/conntrackd.h @@ -29,6 +29,7 @@ #define STATS_NETWORK 28 /* extended network stats */ #define STATS_CACHE 29 /* extended cache stats */ #define STATS_RUNTIME 30 /* extended runtime stats */ +#define STATS_MULTICAST 31 /* multicast network stats */ #define DEFAULT_CONFIGFILE "/etc/conntrackd/conntrackd.conf" #define DEFAULT_LOCKFILE "/var/lock/conntrackd.lock" @@ -66,7 +67,9 @@ struct ct_conf { int syslog_facility; char lockfile[FILENAME_MAXLEN]; int hashsize; /* hashtable size */ - struct mcast_conf mcast; /* multicast settings */ + int mcast_links; + int mcast_default_link; + struct mcast_conf mcast[MCAST_LINKS_MAX]; struct local_conf local; /* unix socket facilities */ int limit; int refresh; @@ -148,8 +151,9 @@ struct ct_sync_state { struct cache *internal; /* internal events cache (netlink) */ struct cache *external; /* external events cache (mcast) */ - struct mcast_sock *mcast_server; /* multicast socket: incoming */ - struct mcast_sock *mcast_client; /* multicast socket: outgoing */ + struct mcast_sock_multi *mcast_server; /* multicast incoming */ + struct mcast_sock_multi *mcast_client; /* multicast outgoing */ + struct nlif_handle *mcast_iface; struct queue *tx_queue; struct sync_mode *sync; /* sync mode */ diff --git a/include/mcast.h b/include/mcast.h index 7c4b1d6..623f390 100644 --- a/include/mcast.h +++ b/include/mcast.h @@ -19,6 +19,7 @@ struct mcast_conf { unsigned int interface_index6; } ifa; int mtu; + int interface_idx; int sndbuf; int rcvbuf; char iface[IFNAMSIZ]; @@ -37,19 +38,42 @@ struct mcast_sock { struct sockaddr_in6 ipv6; } addr; socklen_t sockaddr_len; + int interface_idx; struct mcast_stats stats; }; +#define MCAST_LINKS_MAX 4 + +struct mcast_sock_multi { + int num_links; + int max_mtu; + struct mcast_sock *current_link; + struct mcast_sock *multi[MCAST_LINKS_MAX]; +}; + struct mcast_sock *mcast_server_create(struct mcast_conf *conf); void mcast_server_destroy(struct mcast_sock *m); +struct mcast_sock_multi *mcast_server_create_multi(struct mcast_conf *conf, int conf_len); +void mcast_server_destroy_multi(struct mcast_sock_multi *m); struct mcast_sock *mcast_client_create(struct mcast_conf *conf); void mcast_client_destroy(struct mcast_sock *m); +struct mcast_sock_multi *mcast_client_create_multi(struct mcast_conf *conf, int conf_len); +void mcast_client_destroy_multi(struct mcast_sock_multi*m); ssize_t mcast_send(struct mcast_sock *m, void *data, int size); ssize_t mcast_recv(struct mcast_sock *m, void *data, int size); -struct mcast_stats *mcast_get_stats(struct mcast_sock *m); -void mcast_dump_stats(int fd, struct mcast_sock *s, struct mcast_sock *r); +int mcast_get_fd(struct mcast_sock *m); +int mcast_get_ifidx(struct mcast_sock_multi *m, int i); +int mcast_get_current_ifidx(struct mcast_sock_multi *m); + +struct mcast_sock *mcast_get_current_link(struct mcast_sock_multi *m); +void mcast_set_current_link(struct mcast_sock_multi *m, int i); + +void mcast_dump_stats(int fd, const struct mcast_sock_multi *s, const struct mcast_sock_multi *r); + +struct nlif_handle; +void mcast_dump_stats_extended(int fd, const struct mcast_sock_multi *s, const struct mcast_sock_multi *r, const struct nlif_handle *h); #endif diff --git a/include/netlink.h b/include/netlink.h index 5feb3e9..4bc5ee4 100644 --- a/include/netlink.h +++ b/include/netlink.h @@ -10,6 +10,7 @@ struct nfct_handle *nl_init_event_handler(void); struct nfct_handle *nl_init_dump_handler(void); struct nfct_handle *nl_init_request_handler(void); struct nfct_handle *nl_init_overrun_handler(void); +struct nlif_handle *nl_init_interface_handler(void); int nl_overrun_request_resync(struct nfct_handle *h); void nl_resize_socket_buffer(struct nfct_handle *h); diff --git a/include/network.h b/include/network.h index f02d920..740e762 100644 --- a/include/network.h +++ b/include/network.h @@ -76,7 +76,7 @@ enum { __hdr; \ }) -struct mcast_sock; +struct mcast_sock_multi; enum { SEQ_UNKNOWN, @@ -94,8 +94,8 @@ struct mcast_conf; int mcast_buffered_init(int mtu); void mcast_buffered_destroy(void); -int mcast_buffered_send_netmsg(struct mcast_sock *m, const struct nethdr *net); -ssize_t mcast_buffered_pending_netmsg(struct mcast_sock *m); +int mcast_buffered_send_netmsg(struct mcast_sock_multi *m, const struct nethdr *net); +ssize_t mcast_buffered_pending_netmsg(struct mcast_sock_multi *m); #define IS_DATA(x) (x->type <= NET_T_STATE_MAX && \ (x->flags & ~(NET_F_HELLO | NET_F_HELLO_BACK)) == 0) diff --git a/src/main.c b/src/main.c index 929b5c9..061a73e 100644 --- a/src/main.c +++ b/src/main.c @@ -43,7 +43,7 @@ static const char usage_client_commands[] = " -i, display content of the internal cache\n" " -e, display the content of the external cache\n" " -k, kill conntrack daemon\n" - " -s [|network|cache|runtime], dump statistics\n" + " -s [|network|cache|runtime|multicast], dump statistics\n" " -R, resync with kernel conntrack table\n" " -n, request resync with other node (only FT-FW and NOTRACK modes)\n" " -x, dump cache in XML format (requires -i or -e)\n" @@ -169,6 +169,10 @@ int main(int argc, char *argv[]) strlen(argv[i+1])) == 0) { action = STATS_RUNTIME; i++; + } else if (strncmp(argv[i+1], "multicast", + strlen(argv[i+1])) == 0) { + action = STATS_MULTICAST; + i++; } else { fprintf(stderr, "ERROR: unknown " "parameter `%s' for " diff --git a/src/mcast.c b/src/mcast.c index 2bb8743..70205d8 100644 --- a/src/mcast.c +++ b/src/mcast.c @@ -1,5 +1,5 @@ /* - * (C) 2006 by Pablo Neira Ayuso + * (C) 2006-2009 by Pablo Neira Ayuso * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -45,6 +45,8 @@ struct mcast_sock *mcast_server_create(struct mcast_conf *conf) return NULL; memset(m, 0, sizeof(struct mcast_sock)); + m->interface_idx = conf->interface_idx; + switch(conf->ipproto) { case AF_INET: mreq.ipv4.imr_multiaddr.s_addr = conf->in.inet_addr.s_addr; @@ -147,12 +149,52 @@ struct mcast_sock *mcast_server_create(struct mcast_conf *conf) return m; } +struct mcast_sock_multi * +mcast_server_create_multi(struct mcast_conf *conf, int conf_len) +{ + struct mcast_sock_multi *m; + int i, j; + + if (conf_len <= 0 || conf_len > MCAST_LINKS_MAX) + return NULL; + + m = calloc(sizeof(struct mcast_sock_multi), 1); + if (m == NULL) + return NULL; + + m->max_mtu = INT_MAX; + for (i=0; imulti[i] = mcast_server_create(&conf[i]); + if (m->multi[i] == NULL) { + for (j=0; jmulti[j]); + } + free(m); + return NULL; + } + if (m->max_mtu > conf[i].mtu) + m->max_mtu = conf[i].mtu; + } + m->num_links = conf_len; + + return m; +} + void mcast_server_destroy(struct mcast_sock *m) { close(m->fd); free(m); } +void mcast_server_destroy_multi(struct mcast_sock_multi *m) +{ + int i; + + for (i=0; inum_links; i++) + mcast_server_destroy(m->multi[i]); + free(m); +} + static int __mcast_client_create_ipv4(struct mcast_sock *m, struct mcast_conf *conf) { @@ -222,6 +264,8 @@ struct mcast_sock *mcast_client_create(struct mcast_conf *conf) return NULL; memset(m, 0, sizeof(struct mcast_sock)); + m->interface_idx = conf->interface_idx; + if ((m->fd = socket(conf->ipproto, SOCK_DGRAM, 0)) == -1) { debug("mcast_sock_client_create:socket"); free(m); @@ -275,12 +319,52 @@ struct mcast_sock *mcast_client_create(struct mcast_conf *conf) return m; } +struct mcast_sock_multi * +mcast_client_create_multi(struct mcast_conf *conf, int conf_len) +{ + struct mcast_sock_multi *m; + int i, j; + + if (conf_len <= 0 || conf_len > MCAST_LINKS_MAX) + return NULL; + + m = calloc(sizeof(struct mcast_sock_multi), 1); + if (m == NULL) + return NULL; + + m->max_mtu = INT_MAX; + for (i=0; imulti[i] = mcast_client_create(&conf[i]); + if (m->multi[i] == NULL) { + for (j=0; jmulti[j]); + } + free(m); + return NULL; + } + if (m->max_mtu > conf[i].mtu) + m->max_mtu = conf[i].mtu; + } + m->num_links = conf_len; + + return m; +} + void mcast_client_destroy(struct mcast_sock *m) { close(m->fd); free(m); } +void mcast_client_destroy_multi(struct mcast_sock_multi *m) +{ + int i; + + for (i=0; inum_links; i++) + mcast_client_destroy(m->multi[i]); + free(m); +} + ssize_t mcast_send(struct mcast_sock *m, void *data, int size) { ssize_t ret; @@ -326,29 +410,140 @@ ssize_t mcast_recv(struct mcast_sock *m, void *data, int size) return ret; } -struct mcast_stats *mcast_get_stats(struct mcast_sock *m) +void mcast_set_current_link(struct mcast_sock_multi *m, int i) +{ + m->current_link = m->multi[i]; +} + +struct mcast_sock *mcast_get_current_link(struct mcast_sock_multi *m) +{ + return m->current_link; +} + +int mcast_get_fd(struct mcast_sock *m) +{ + return m->fd; +} + +int mcast_get_current_ifidx(struct mcast_sock_multi *m) { - return &m->stats; + return m->current_link->interface_idx; } -void mcast_dump_stats(int fd, struct mcast_sock *s, struct mcast_sock *r) +int mcast_get_ifidx(struct mcast_sock_multi *m, int i) { - char buf[512]; + return m->multi[i]->interface_idx; +} + +static int +mcast_snprintf_stats(char *buf, size_t buflen, char *ifname, + struct mcast_stats *s, struct mcast_stats *r) +{ + size_t size; + + size = snprintf(buf, buflen, "multicast traffic (active device=%s):\n" + "%20llu Bytes sent " + "%20llu Bytes recv\n" + "%20llu Pckts sent " + "%20llu Pckts recv\n" + "%20llu Error send " + "%20llu Error recv\n\n", + ifname, + (unsigned long long)s->bytes, + (unsigned long long)r->bytes, + (unsigned long long)s->messages, + (unsigned long long)r->messages, + (unsigned long long)s->error, + (unsigned long long)r->error); + return size; +} + +static int +mcast_snprintf_stats2(char *buf, size_t buflen, const char *ifname, + const char *status, int active, + struct mcast_stats *s, struct mcast_stats *r) +{ + size_t size; + + size = snprintf(buf, buflen, + "multicast traffic device=%s status=%s role=%s:\n" + "%20llu Bytes sent " + "%20llu Bytes recv\n" + "%20llu Pckts sent " + "%20llu Pckts recv\n" + "%20llu Error send " + "%20llu Error recv\n\n", + ifname, status, active ? "ACTIVE" : "BACKUP", + (unsigned long long)s->bytes, + (unsigned long long)r->bytes, + (unsigned long long)s->messages, + (unsigned long long)r->messages, + (unsigned long long)s->error, + (unsigned long long)r->error); + return size; +} + +void +mcast_dump_stats(int fd, + const struct mcast_sock_multi *s, + const struct mcast_sock_multi *r) +{ + int i; + struct mcast_stats snd = { 0, 0, 0}; + struct mcast_stats rcv = { 0, 0, 0}; + char ifname[IFNAMSIZ], buf[512]; int size; - size = sprintf(buf, "multicast traffic:\n" - "%20llu Bytes sent " - "%20llu Bytes recv\n" - "%20llu Pckts sent " - "%20llu Pckts recv\n" - "%20llu Error send " - "%20llu Error recv\n\n", - (unsigned long long)s->stats.bytes, - (unsigned long long)r->stats.bytes, - (unsigned long long)s->stats.messages, - (unsigned long long)r->stats.messages, - (unsigned long long)s->stats.error, - (unsigned long long)r->stats.error); + /* it is the same for the receiver, no need to do it twice */ + if_indextoname(s->current_link->interface_idx, ifname); + + for (i=0; inum_links && inum_links; i++) { + snd.bytes += s->multi[i]->stats.bytes; + snd.messages += s->multi[i]->stats.messages; + snd.error += s->multi[i]->stats.error; + rcv.bytes += r->multi[i]->stats.bytes; + rcv.messages += r->multi[i]->stats.messages; + rcv.error += r->multi[i]->stats.error; + } + size = mcast_snprintf_stats(buf, sizeof(buf), ifname, &snd, &rcv); + send(fd, buf, size, 0); +} +void +mcast_dump_stats_extended(int fd, + const struct mcast_sock_multi *s, + const struct mcast_sock_multi *r, + const struct nlif_handle *h) +{ + int i; + char buf[4096]; + int size = 0; + + for (i=0; inum_links && inum_links; i++) { + int idx = s->multi[i]->interface_idx, active; + unsigned int flags; + char ifname[IFNAMSIZ]; + const char *status; + + if_indextoname(idx, ifname); + nlif_get_ifflags(h, idx, &flags); + active = (s->multi[i] == s->current_link); + /* + * IFF_UP shows administrative status + * IFF_RUNNING shows carrier status + */ + if (flags & IFF_UP) { + if (!(flags & IFF_RUNNING)) + status = "NO-CARRIER"; + else + status = "RUNNING"; + } else { + status = "DOWN"; + } + size += mcast_snprintf_stats2(buf+size, sizeof(buf), + ifname, status, active, + &s->multi[i]->stats, + &r->multi[i]->stats); + } send(fd, buf, size, 0); } diff --git a/src/netlink.c b/src/netlink.c index 92fbf00..2266201 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -114,6 +114,22 @@ struct nfct_handle *nl_init_request_handler(void) return h; } +struct nlif_handle *nl_init_interface_handler(void) +{ + struct nlif_handle *h; + h = nlif_open(); + if (h == NULL) + return NULL; + + if (nlif_query(h) == -1) { + free(h); + return NULL; + } + fcntl(nlif_fd(h), F_SETFL, O_NONBLOCK); + + return h; +} + static int warned = 0; void nl_resize_socket_buffer(struct nfct_handle *h) diff --git a/src/network.c b/src/network.c index 7a106b1..f71aef0 100644 --- a/src/network.c +++ b/src/network.c @@ -95,7 +95,8 @@ void mcast_buffered_destroy(void) } /* return 0 if it is not sent, otherwise return 1 */ -int mcast_buffered_send_netmsg(struct mcast_sock *m, const struct nethdr *net) +int +mcast_buffered_send_netmsg(struct mcast_sock_multi *m, const struct nethdr *net) { int ret = 0, len = ntohs(net->len); @@ -104,7 +105,7 @@ retry: memcpy(tx_buf + tx_buflen, net, len); tx_buflen += len; } else { - mcast_send(m, tx_buf, tx_buflen); + mcast_send(mcast_get_current_link(m), tx_buf, tx_buflen); ret = 1; tx_buflen = 0; goto retry; @@ -113,14 +114,14 @@ retry: return ret; } -ssize_t mcast_buffered_pending_netmsg(struct mcast_sock *m) +ssize_t mcast_buffered_pending_netmsg(struct mcast_sock_multi *m) { ssize_t ret; if (tx_buflen == 0) return 0; - ret = mcast_send(m, tx_buf, tx_buflen); + ret = mcast_send(mcast_get_current_link(m), tx_buf, tx_buflen); tx_buflen = 0; return ret; diff --git a/src/read_config_lex.l b/src/read_config_lex.l index f8b0ba1..e9e5d43 100644 --- a/src/read_config_lex.l +++ b/src/read_config_lex.l @@ -118,6 +118,7 @@ notrack [N|n][O|o][T|t][R|r][A|a][C|c][K|k] "Userspace" { return T_USERSPACE; } "Kernelspace" { return T_KERNELSPACE; } "EventIterationLimit" { return T_EVENT_ITER_LIMIT; } +"Default" { return T_DEFAULT; } {is_on} { return T_ON; } {is_off} { return T_OFF; } diff --git a/src/read_config_yy.y b/src/read_config_yy.y index 274bfc3..de6cef3 100644 --- a/src/read_config_yy.y +++ b/src/read_config_yy.y @@ -38,6 +38,7 @@ struct ct_conf conf; static void __kernel_filter_start(void); static void __kernel_filter_add_state(int value); +static void __max_mcast_dedicated_links_reached(void); %} %union { @@ -59,7 +60,7 @@ static void __kernel_filter_add_state(int value); %token T_SYSLOG T_WRITE_THROUGH T_STAT_BUFFER_SIZE T_DESTROY_TIMEOUT %token T_MCAST_RCVBUFF T_MCAST_SNDBUFF T_NOTRACK %token T_FILTER T_ADDRESS T_PROTOCOL T_STATE T_ACCEPT T_IGNORE -%token T_FROM T_USERSPACE T_KERNELSPACE T_EVENT_ITER_LIMIT +%token T_FROM T_USERSPACE T_KERNELSPACE T_EVENT_ITER_LIMIT T_DEFAULT %token T_IP T_PATH_VAL %token T_NUMBER @@ -174,14 +175,22 @@ checksum: T_CHECKSUM T_ON { fprintf(stderr, "WARNING: The use of `Checksum' outside the " "`Multicast' clause is ambiguous.\n"); - conf.mcast.checksum = 0; + /* + * XXX: The use of Checksum outside of the Multicast clause is broken + * if we have more than one dedicated links. + */ + conf.mcast[0].checksum = 0; }; checksum: T_CHECKSUM T_OFF { fprintf(stderr, "WARNING: The use of `Checksum' outside the " "`Multicast' clause is ambiguous.\n"); - conf.mcast.checksum = 1; + /* + * XXX: The use of Checksum outside of the Multicast clause is broken + * if we have more than one dedicated links. + */ + conf.mcast[0].checksum = 1; }; ignore_traffic : T_IGNORE_TRAFFIC '{' ignore_traffic_options '}' @@ -243,32 +252,45 @@ ignore_traffic_option : T_IPV6_ADDR T_IP }; -multicast_line : T_MULTICAST '{' multicast_options '}'; +multicast_line : T_MULTICAST '{' multicast_options '}' +{ + conf.mcast_links++; +}; + +multicast_line : T_MULTICAST T_DEFAULT '{' multicast_options '}' +{ + conf.mcast_default_link = conf.mcast_links; + conf.mcast_links++; +}; multicast_options : | multicast_options multicast_option; multicast_option : T_IPV4_ADDR T_IP { - if (!inet_aton($2, &conf.mcast.in)) { + __max_mcast_dedicated_links_reached(); + + if (!inet_aton($2, &conf.mcast[conf.mcast_links].in)) { fprintf(stderr, "%s is not a valid IPv4 address\n", $2); break; } - if (conf.mcast.ipproto == AF_INET6) { + if (conf.mcast[conf.mcast_links].ipproto == AF_INET6) { fprintf(stderr, "Your multicast address is IPv4 but " "is binded to an IPv6 interface? Surely " "this is not what you want\n"); break; } - conf.mcast.ipproto = AF_INET; + conf.mcast[conf.mcast_links].ipproto = AF_INET; }; multicast_option : T_IPV6_ADDR T_IP { + __max_mcast_dedicated_links_reached(); + #ifdef HAVE_INET_PTON_IPV6 - if (inet_pton(AF_INET6, $2, &conf.mcast.in) <= 0) { + if (inet_pton(AF_INET6, $2, &conf.mcast[conf.mcast_links].in) <= 0) { fprintf(stderr, "%s is not a valid IPv6 address\n", $2); break; } @@ -277,16 +299,17 @@ multicast_option : T_IPV6_ADDR T_IP break; #endif - if (conf.mcast.ipproto == AF_INET) { + if (conf.mcast[conf.mcast_links].ipproto == AF_INET) { fprintf(stderr, "Your multicast address is IPv6 but " "is binded to an IPv4 interface? Surely " "this is not what you want\n"); break; } - conf.mcast.ipproto = AF_INET6; + conf.mcast[conf.mcast_links].ipproto = AF_INET6; - if (conf.mcast.iface[0] && !conf.mcast.ifa.interface_index6) { + if (conf.mcast[conf.mcast_links].iface[0] && + !conf.mcast[conf.mcast_links].ifa.interface_index6) { unsigned int idx; idx = if_nametoindex($2); @@ -295,26 +318,28 @@ multicast_option : T_IPV6_ADDR T_IP break; } - conf.mcast.ifa.interface_index6 = idx; - conf.mcast.ipproto = AF_INET6; + conf.mcast[conf.mcast_links].ifa.interface_index6 = idx; + conf.mcast[conf.mcast_links].ipproto = AF_INET6; } }; multicast_option : T_IPV4_IFACE T_IP { - if (!inet_aton($2, &conf.mcast.ifa)) { + __max_mcast_dedicated_links_reached(); + + if (!inet_aton($2, &conf.mcast[conf.mcast_links].ifa)) { fprintf(stderr, "%s is not a valid IPv4 address\n", $2); break; } - if (conf.mcast.ipproto == AF_INET6) { + if (conf.mcast[conf.mcast_links].ipproto == AF_INET6) { fprintf(stderr, "Your multicast interface is IPv4 but " "is binded to an IPv6 interface? Surely " "this is not what you want\n"); break; } - conf.mcast.ipproto = AF_INET; + conf.mcast[conf.mcast_links].ipproto = AF_INET; }; multicast_option : T_IPV6_IFACE T_IP @@ -324,19 +349,22 @@ multicast_option : T_IPV6_IFACE T_IP multicast_option : T_IFACE T_STRING { - strncpy(conf.mcast.iface, $2, IFNAMSIZ); + unsigned int idx; - if (conf.mcast.ipproto == AF_INET6) { - unsigned int idx; + __max_mcast_dedicated_links_reached(); - idx = if_nametoindex($2); - if (!idx) { - fprintf(stderr, "%s is an invalid interface.\n", $2); - break; - } + strncpy(conf.mcast[conf.mcast_links].iface, $2, IFNAMSIZ); + + idx = if_nametoindex($2); + if (!idx) { + fprintf(stderr, "%s is an invalid interface.\n", $2); + break; + } + conf.mcast[conf.mcast_links].interface_idx = idx; - conf.mcast.ifa.interface_index6 = idx; - conf.mcast.ipproto = AF_INET6; + if (conf.mcast[conf.mcast_links].ipproto == AF_INET6) { + conf.mcast[conf.mcast_links].ifa.interface_index6 = idx; + conf.mcast[conf.mcast_links].ipproto = AF_INET6; } }; @@ -348,27 +376,32 @@ multicast_option : T_BACKLOG T_NUMBER multicast_option : T_GROUP T_NUMBER { - conf.mcast.port = $2; + __max_mcast_dedicated_links_reached(); + conf.mcast[conf.mcast_links].port = $2; }; multicast_option: T_MCAST_SNDBUFF T_NUMBER { - conf.mcast.sndbuf = $2; + __max_mcast_dedicated_links_reached(); + conf.mcast[conf.mcast_links].sndbuf = $2; }; multicast_option: T_MCAST_RCVBUFF T_NUMBER { - conf.mcast.rcvbuf = $2; + __max_mcast_dedicated_links_reached(); + conf.mcast[conf.mcast_links].rcvbuf = $2; }; multicast_option: T_CHECKSUM T_ON { - conf.mcast.checksum = 0; + __max_mcast_dedicated_links_reached(); + conf.mcast[conf.mcast_links].checksum = 0; }; multicast_option: T_CHECKSUM T_OFF { - conf.mcast.checksum = 1; + __max_mcast_dedicated_links_reached(); + conf.mcast[conf.mcast_links].checksum = 1; }; hashsize : T_HASHSIZE T_NUMBER @@ -1050,6 +1083,16 @@ static void __kernel_filter_add_state(int value) &filter_proto); } +static void __max_mcast_dedicated_links_reached(void) +{ + if (conf.mcast_links >= MCAST_LINKS_MAX) { + fprintf(stderr, "ERROR: too many dedicated links in " + "the configuration file (Maximum: %d).\n", + MCAST_LINKS_MAX); + exit(EXIT_FAILURE); + } +} + int init_config(char *filename) { diff --git a/src/sync-mode.c b/src/sync-mode.c index 00e2f7b..0dbd12d 100644 --- a/src/sync-mode.c +++ b/src/sync-mode.c @@ -33,8 +33,10 @@ #include #include #include +#include -static void do_mcast_handler_step(struct nethdr *net, size_t remain) +static void +do_mcast_handler_step(int if_idx, struct nethdr *net, size_t remain) { char __ct[nfct_maxsize()]; struct nf_conntrack *ct = (struct nf_conntrack *)(void*) __ct; @@ -47,6 +49,9 @@ static void do_mcast_handler_step(struct nethdr *net, size_t remain) return; } + if (if_idx != mcast_get_current_ifidx(STATE_SYNC(mcast_client))) + mcast_set_current_link(STATE_SYNC(mcast_client), if_idx); + switch (STATE_SYNC(sync)->recv(net)) { case MSG_DATA: break; @@ -111,13 +116,13 @@ retry: } /* handler for multicast messages received */ -static void mcast_handler(void) +static void mcast_handler(struct mcast_sock *m, int if_idx) { ssize_t numbytes; ssize_t remain; char __net[65536], *ptr = __net; /* XXX: maximum MTU for IPv4 */ - numbytes = mcast_recv(STATE_SYNC(mcast_server), __net, sizeof(__net)); + numbytes = mcast_recv(m, __net, sizeof(__net)); if (numbytes <= 0) return; @@ -160,12 +165,46 @@ static void mcast_handler(void) HDR_NETWORK2HOST(net); - do_mcast_handler_step(net, remain); + do_mcast_handler_step(if_idx, net, remain); ptr += net->len; remain -= net->len; } } +/* select a new interface candidate in a round robin basis */ +static void mcast_iface_candidate(void) +{ + int i, idx; + unsigned int flags; + char buf[IFNAMSIZ]; + + for (i=0; inum_links; i++) { + idx = mcast_get_ifidx(STATE_SYNC(mcast_client), i); + if (idx == mcast_get_current_ifidx(STATE_SYNC(mcast_client))) + continue; + nlif_get_ifflags(STATE_SYNC(mcast_iface), idx, &flags); + if (flags & (IFF_RUNNING | IFF_UP)) { + mcast_set_current_link(STATE_SYNC(mcast_client), i); + dlog(LOG_NOTICE, "device `%s' becomes multicast " + "dedicated link", + if_indextoname(idx, buf)); + return; + } + } + dlog(LOG_ERR, "no dedicated links available!"); +} + +static void mcast_iface_handler(void) +{ + int idx = mcast_get_current_ifidx(STATE_SYNC(mcast_client)); + unsigned int flags; + + nlif_catch(STATE_SYNC(mcast_iface)); + nlif_get_ifflags(STATE_SYNC(mcast_iface), idx, &flags); + if (!(flags & IFF_RUNNING) || !(flags & IFF_UP)) + mcast_iface_candidate(); +} + static int init_sync(void) { state.sync = malloc(sizeof(struct ct_sync_state)); @@ -216,30 +255,35 @@ static int init_sync(void) } /* multicast server to receive events from the wire */ - STATE_SYNC(mcast_server) = mcast_server_create(&CONFIG(mcast)); + STATE_SYNC(mcast_server) = + mcast_server_create_multi(CONFIG(mcast), CONFIG(mcast_links)); if (STATE_SYNC(mcast_server) == NULL) { dlog(LOG_ERR, "can't open multicast server!"); return -1; } - dlog(LOG_NOTICE, "multicast server socket receiver queue " - "has been set to %d bytes", CONFIG(mcast).rcvbuf); - /* multicast client to send events on the wire */ - STATE_SYNC(mcast_client) = mcast_client_create(&CONFIG(mcast)); + STATE_SYNC(mcast_client) = + mcast_client_create_multi(CONFIG(mcast), CONFIG(mcast_links)); if (STATE_SYNC(mcast_client) == NULL) { dlog(LOG_ERR, "can't open client multicast socket"); - mcast_server_destroy(STATE_SYNC(mcast_server)); + mcast_server_destroy_multi(STATE_SYNC(mcast_server)); return -1; } + /* we only use one link to send events, but all to receive them */ + mcast_set_current_link(STATE_SYNC(mcast_client), + CONFIG(mcast_default_link)); - dlog(LOG_NOTICE, "multicast client socket sender queue " - "has been set to %d bytes", CONFIG(mcast).sndbuf); - - if (mcast_buffered_init(CONFIG(mcast).mtu) == -1) { + if (mcast_buffered_init(STATE_SYNC(mcast_client)->max_mtu) == -1) { dlog(LOG_ERR, "can't init tx buffer!"); - mcast_server_destroy(STATE_SYNC(mcast_server)); - mcast_client_destroy(STATE_SYNC(mcast_client)); + mcast_server_destroy_multi(STATE_SYNC(mcast_server)); + mcast_client_destroy_multi(STATE_SYNC(mcast_client)); + return -1; + } + + STATE_SYNC(mcast_iface) = nl_init_interface_handler(); + if (!STATE_SYNC(mcast_iface)) { + dlog(LOG_ERR, "can't open interface watcher"); return -1; } @@ -257,7 +301,14 @@ static int init_sync(void) static int register_fds_sync(struct fds *fds) { - if (register_fd(STATE_SYNC(mcast_server->fd), fds) == -1) + int i; + + for (i=0; inum_links; i++) { + int fd = mcast_get_fd(STATE_SYNC(mcast_server)->multi[i]); + if (register_fd(fd, fds) == -1) + return -1; + } + if (register_fd(nlif_fd(STATE_SYNC(mcast_iface)), fds) == -1) return -1; if (register_fd(queue_get_eventfd(STATE_SYNC(tx_queue)), fds) == -1) @@ -268,13 +319,20 @@ static int register_fds_sync(struct fds *fds) static void run_sync(fd_set *readfds) { - /* multicast packet has been received */ - if (FD_ISSET(STATE_SYNC(mcast_server->fd), readfds)) - mcast_handler(); + int i; + + for (i=0; inum_links; i++) { + int fd = mcast_get_fd(STATE_SYNC(mcast_server)->multi[i]); + if (FD_ISSET(fd, readfds)) + mcast_handler(STATE_SYNC(mcast_server)->multi[i], i); + } if (FD_ISSET(queue_get_eventfd(STATE_SYNC(tx_queue)), readfds)) STATE_SYNC(sync)->xmit(); + if (FD_ISSET(nlif_fd(STATE_SYNC(mcast_iface)), readfds)) + mcast_iface_handler(); + /* flush pending messages */ mcast_buffered_pending_netmsg(STATE_SYNC(mcast_client)); } @@ -284,8 +342,10 @@ static void kill_sync(void) cache_destroy(STATE_SYNC(internal)); cache_destroy(STATE_SYNC(external)); - mcast_server_destroy(STATE_SYNC(mcast_server)); - mcast_client_destroy(STATE_SYNC(mcast_client)); + mcast_server_destroy_multi(STATE_SYNC(mcast_server)); + mcast_client_destroy_multi(STATE_SYNC(mcast_client)); + + nlif_close(STATE_SYNC(mcast_iface)); mcast_buffered_destroy(); queue_destroy(STATE_SYNC(tx_queue)); @@ -418,6 +478,11 @@ static int local_handler_sync(int fd, int type, void *data) cache_stats_extended(STATE_SYNC(internal), fd); cache_stats_extended(STATE_SYNC(external), fd); break; + case STATS_MULTICAST: + mcast_dump_stats_extended(fd, STATE_SYNC(mcast_client), + STATE_SYNC(mcast_server), + STATE_SYNC(mcast_iface)); + break; default: if (STATE_SYNC(sync)->local) ret = STATE_SYNC(sync)->local(fd, type, data); -- cgit v1.2.3 From c54c8c9287fc87177daf9b51933f92c7e6402904 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 17 Jan 2009 18:03:52 +0100 Subject: src: rename overrun handler to resync handler This patch is a cleanup. The overrun handler is actually a way to resynchronize against the conntrack kernel table. The name overrun was used because it was initially its purpose. The new naming shows its genericity. Signed-off-by: Pablo Neira Ayuso --- include/conntrackd.h | 10 +++++----- include/netlink.h | 4 ++-- src/netlink.c | 4 ++-- src/run.c | 22 +++++++++++----------- src/stats-mode.c | 12 ++++++------ src/sync-mode.c | 12 ++++++------ 6 files changed, 32 insertions(+), 32 deletions(-) (limited to 'include/netlink.h') diff --git a/include/conntrackd.h b/include/conntrackd.h index ab5d825..d5b61c6 100644 --- a/include/conntrackd.h +++ b/include/conntrackd.h @@ -111,8 +111,8 @@ struct ct_general_state { struct nfct_handle *dump; /* dump handler */ struct nfct_handle *request; /* request handler */ - struct nfct_handle *overrun; /* overrun handler */ - struct alarm_block overrun_alarm; + struct nfct_handle *resync; /* resync handler */ + struct alarm_block resync_alarm; struct fds *fds; @@ -204,9 +204,9 @@ struct ct_mode { int (*local)(int fd, int type, void *data); void (*kill)(void); void (*dump)(struct nf_conntrack *ct); - int (*overrun)(enum nf_conntrack_msg_type type, - struct nf_conntrack *ct, - void *data); + int (*resync)(enum nf_conntrack_msg_type type, + struct nf_conntrack *ct, + void *data); int (*purge)(void); void (*event_new)(struct nf_conntrack *ct); void (*event_upd)(struct nf_conntrack *ct); diff --git a/include/netlink.h b/include/netlink.h index 4bc5ee4..a8eb919 100644 --- a/include/netlink.h +++ b/include/netlink.h @@ -9,10 +9,10 @@ struct nfct_handle; struct nfct_handle *nl_init_event_handler(void); struct nfct_handle *nl_init_dump_handler(void); struct nfct_handle *nl_init_request_handler(void); -struct nfct_handle *nl_init_overrun_handler(void); +struct nfct_handle *nl_init_resync_handler(void); struct nlif_handle *nl_init_interface_handler(void); -int nl_overrun_request_resync(struct nfct_handle *h); +int nl_send_resync(struct nfct_handle *h); void nl_resize_socket_buffer(struct nfct_handle *h); int nl_dump_conntrack_table(struct nfct_handle *h); int nl_flush_conntrack_table(struct nfct_handle *h); diff --git a/src/netlink.c b/src/netlink.c index 2266201..15a30f6 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -90,7 +90,7 @@ struct nfct_handle *nl_init_dump_handler(void) return h; } -struct nfct_handle *nl_init_overrun_handler(void) +struct nfct_handle *nl_init_resync_handler(void) { struct nfct_handle *h; @@ -172,7 +172,7 @@ int nl_flush_conntrack_table(struct nfct_handle *h) return nfct_query(h, NFCT_Q_FLUSH, &CONFIG(family)); } -int nl_overrun_request_resync(struct nfct_handle *h) +int nl_send_resync(struct nfct_handle *h) { int family = CONFIG(family); return nfct_send(h, NFCT_Q_DUMP, &family); diff --git a/src/run.c b/src/run.c index 2e373ce..b436113 100644 --- a/src/run.c +++ b/src/run.c @@ -204,9 +204,9 @@ void local_handler(int fd, void *data) STATE(stats).local_unknown_request++; } -static void do_overrun_alarm(struct alarm_block *a, void *data) +static void do_resync_alarm(struct alarm_block *a, void *data) { - nl_overrun_request_resync(STATE(overrun)); + nl_send_resync(STATE(resync)); STATE(stats).nl_kernel_table_resync++; } @@ -313,16 +313,16 @@ init(void) return -1; } - STATE(overrun) = nl_init_overrun_handler(); - if (STATE(overrun)== NULL) { + STATE(resync) = nl_init_resync_handler(); + if (STATE(resync)== NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); dlog(LOG_ERR, "no ctnetlink kernel support?"); return -1; } - nfct_callback_register(STATE(overrun), + nfct_callback_register(STATE(resync), NFCT_T_ALL, - STATE(mode)->overrun, + STATE(mode)->resync, NULL); /* no callback, it does not do anything with the output */ @@ -334,7 +334,7 @@ init(void) return -1; } - init_alarm(&STATE(overrun_alarm), NULL, do_overrun_alarm); + init_alarm(&STATE(resync_alarm), NULL, do_resync_alarm); STATE(fds) = create_fds(); if (STATE(fds) == NULL) { @@ -344,7 +344,7 @@ init(void) register_fd(STATE(local).fd, STATE(fds)); register_fd(nfct_fd(STATE(event)), STATE(fds)); - register_fd(nfct_fd(STATE(overrun)), STATE(fds)); + register_fd(nfct_fd(STATE(resync)), STATE(fds)); if (STATE(mode)->register_fds && STATE(mode)->register_fds(STATE(fds)) == -1) { @@ -435,7 +435,7 @@ static void __run(struct timeval *next_alarm) * we resync ourselves. */ nl_resize_socket_buffer(STATE(event)); - add_alarm(&STATE(overrun_alarm), OVRUN_INT, 0); + add_alarm(&STATE(resync_alarm), OVRUN_INT, 0); STATE(stats).nl_catch_event_failed++; STATE(stats).nl_overrun++; break; @@ -455,8 +455,8 @@ static void __run(struct timeval *next_alarm) } } - if (FD_ISSET(nfct_fd(STATE(overrun)), &readfds)) { - nfct_catch(STATE(overrun)); + if (FD_ISSET(nfct_fd(STATE(resync)), &readfds)) { + nfct_catch(STATE(resync)); if (STATE(mode)->purge) STATE(mode)->purge(); } diff --git a/src/stats-mode.c b/src/stats-mode.c index 679a50c..159bbef 100644 --- a/src/stats-mode.c +++ b/src/stats-mode.c @@ -102,9 +102,9 @@ static void dump_stats(struct nf_conntrack *ct) debug_ct(ct, "resync entry"); } -static int overrun_stats(enum nf_conntrack_msg_type type, - struct nf_conntrack *ct, - void *data) +static int resync_stats(enum nf_conntrack_msg_type type, + struct nf_conntrack *ct, + void *data) { if (ct_filter_conntrack(ct, 1)) return NFCT_CB_CONTINUE; @@ -118,7 +118,7 @@ static int overrun_stats(enum nf_conntrack_msg_type type, nfct_attr_unset(ct, ATTR_USE); if (!cache_update_force(STATE_STATS(cache), ct)) - debug_ct(ct, "overrun stats resync"); + debug_ct(ct, "stats resync"); return NFCT_CB_CONTINUE; } @@ -130,7 +130,7 @@ static int purge_step(void *data1, void *data2) ret = nfct_query(STATE(dump), NFCT_Q_GET, obj->ct); if (ret == -1 && errno == ENOENT) { - debug_ct(obj->ct, "overrun purge stats"); + debug_ct(obj->ct, "purge stats"); cache_del(STATE_STATS(cache), obj); cache_object_free(obj); } @@ -196,7 +196,7 @@ struct ct_mode stats_mode = { .local = local_handler_stats, .kill = kill_stats, .dump = dump_stats, - .overrun = overrun_stats, + .resync = resync_stats, .purge = purge_stats, .event_new = event_new_stats, .event_upd = event_update_stats, diff --git a/src/sync-mode.c b/src/sync-mode.c index 0dbd12d..2e189cb 100644 --- a/src/sync-mode.c +++ b/src/sync-mode.c @@ -518,7 +518,7 @@ static int purge_step(void *data1, void *data2) ret = nfct_query(h, NFCT_Q_GET, obj->ct); if (ret == -1 && errno == ENOENT) { - debug_ct(obj->ct, "overrun purge resync"); + debug_ct(obj->ct, "purge resync"); if (obj->status != C_OBJ_DEAD) { cache_object_set_status(obj, C_OBJ_DEAD); mcast_send_sync(obj, NET_T_STATE_DEL); @@ -536,9 +536,9 @@ static int purge_sync(void) return 0; } -static int overrun_sync(enum nf_conntrack_msg_type type, - struct nf_conntrack *ct, - void *data) +static int resync_sync(enum nf_conntrack_msg_type type, + struct nf_conntrack *ct, + void *data) { struct cache_object *obj; @@ -553,7 +553,7 @@ static int overrun_sync(enum nf_conntrack_msg_type type, nfct_attr_unset(ct, ATTR_USE); if ((obj = cache_update_force(STATE_SYNC(internal), ct))) { - debug_ct(obj->ct, "overrun resync"); + debug_ct(obj->ct, "resync"); mcast_send_sync(obj, NET_T_STATE_UPD); } @@ -629,7 +629,7 @@ struct ct_mode sync_mode = { .local = local_handler_sync, .kill = kill_sync, .dump = dump_sync, - .overrun = overrun_sync, + .resync = resync_sync, .purge = purge_sync, .event_new = event_new_sync, .event_upd = event_update_sync, -- cgit v1.2.3 From e6732c96ffd9baaaa84dab763ff6e600bf6abc95 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 25 Jan 2009 17:51:09 +0100 Subject: cache: remove nl_exist_conntrack() function This function is a synonimous of nl_get_conntrack(), use the get function instead. Signed-off-by: Pablo Neira Ayuso --- include/netlink.h | 1 - src/cache_wt.c | 2 +- src/netlink.c | 15 ++------------- 3 files changed, 3 insertions(+), 15 deletions(-) (limited to 'include/netlink.h') diff --git a/include/netlink.h b/include/netlink.h index a8eb919..d8a4fef 100644 --- a/include/netlink.h +++ b/include/netlink.h @@ -16,7 +16,6 @@ int nl_send_resync(struct nfct_handle *h); void nl_resize_socket_buffer(struct nfct_handle *h); int nl_dump_conntrack_table(struct nfct_handle *h); int nl_flush_conntrack_table(struct nfct_handle *h); -int nl_exist_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct); int nl_get_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct); int nl_create_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct); int nl_update_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct); diff --git a/src/cache_wt.c b/src/cache_wt.c index 84a816f..4b67e8e 100644 --- a/src/cache_wt.c +++ b/src/cache_wt.c @@ -30,7 +30,7 @@ static void add_wt(struct cache_object *obj) char __ct[nfct_maxsize()]; struct nf_conntrack *ct = (struct nf_conntrack *)(void*) __ct; - ret = nl_exist_conntrack(STATE(request), obj->ct); + ret = nl_get_conntrack(STATE(request), obj->ct); switch (ret) { case -1: dlog(LOG_ERR, "cache_wt problem: %s", strerror(errno)); diff --git a/src/netlink.c b/src/netlink.c index 15a30f6..e538aa0 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -178,8 +178,8 @@ int nl_send_resync(struct nfct_handle *h) return nfct_send(h, NFCT_Q_DUMP, &family); } -static int -__nl_get_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct) +/* if the handle has no callback, check for existence, otherwise, update */ +int nl_get_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct) { int ret; char __tmp[nfct_maxsize()]; @@ -197,17 +197,6 @@ __nl_get_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct) return 1; } -int nl_exist_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct) -{ - return __nl_get_conntrack(h, ct); -} - -/* get the conntrack and update the cache */ -int nl_get_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct) -{ - return __nl_get_conntrack(h, ct); -} - int nl_create_conntrack(struct nfct_handle *h, const struct nf_conntrack *orig) { int ret; -- cgit v1.2.3 From 8d689ebb67c511f5c03acdfc2226156d5f87c319 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 25 Jan 2009 17:51:18 +0100 Subject: cache: mangle timeout inside nl_*_conntrack() functions This patch moves the timeout mangling inside nl_*_conntrack(). Signed-off-by: Pablo Neira Ayuso --- include/netlink.h | 4 ++-- src/cache_iterators.c | 12 ++---------- src/cache_wt.c | 6 +++--- src/netlink.c | 14 ++++++++++++-- 4 files changed, 19 insertions(+), 17 deletions(-) (limited to 'include/netlink.h') diff --git a/include/netlink.h b/include/netlink.h index d8a4fef..9d67165 100644 --- a/include/netlink.h +++ b/include/netlink.h @@ -17,8 +17,8 @@ void nl_resize_socket_buffer(struct nfct_handle *h); int nl_dump_conntrack_table(struct nfct_handle *h); int nl_flush_conntrack_table(struct nfct_handle *h); int nl_get_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct); -int nl_create_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct); -int nl_update_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct); +int nl_create_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct, int timeout); +int nl_update_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct, int timeout); int nl_destroy_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct); static inline int ct_is_related(const struct nf_conntrack *ct) diff --git a/src/cache_iterators.c b/src/cache_iterators.c index 379deed..9b54ea1 100644 --- a/src/cache_iterators.c +++ b/src/cache_iterators.c @@ -105,14 +105,8 @@ __do_commit_step(struct __commit_container *tmp, struct cache_object *obj) int ret, retry = 1; struct nf_conntrack *ct = obj->ct; - /* - * Set a reduced timeout for candidate-to-be-committed - * conntracks that live in the external cache - */ - nfct_set_attr_u32(ct, ATTR_TIMEOUT, CONFIG(commit_timeout)); - retry: - if (nl_create_conntrack(tmp->h, ct) == -1) { + if (nl_create_conntrack(tmp->h, ct, CONFIG(commit_timeout)) == -1) { if (errno == EEXIST && retry == 1) { ret = nl_destroy_conntrack(tmp->h, ct); if (ret == 0 || (ret == -1 && errno == ENOENT)) { @@ -223,9 +217,7 @@ static int do_reset_timers(void *data1, struct hashtable_node *n) if (current_timeout < CONFIG(purge_timeout)) break; - nfct_set_attr_u32(tmp, ATTR_TIMEOUT, CONFIG(purge_timeout)); - - if (nl_update_conntrack(h, tmp) == -1) { + if (nl_update_conntrack(h, tmp, CONFIG(purge_timeout)) == -1) { if (errno == ETIME || errno == ENOENT) break; dlog(LOG_ERR, "reset-timers-upd: %s", strerror(errno)); diff --git a/src/cache_wt.c b/src/cache_wt.c index 4b67e8e..6f9ccc7 100644 --- a/src/cache_wt.c +++ b/src/cache_wt.c @@ -38,14 +38,14 @@ static void add_wt(struct cache_object *obj) break; case 0: memcpy(ct, obj->ct, nfct_maxsize()); - if (nl_create_conntrack(STATE(dump), ct) == -1) { + if (nl_create_conntrack(STATE(dump), ct, 0) == -1) { dlog(LOG_ERR, "cache_wt create: %s", strerror(errno)); dlog_ct(STATE(log), obj->ct, NFCT_O_PLAIN); } break; case 1: memcpy(ct, obj->ct, nfct_maxsize()); - if (nl_update_conntrack(STATE(dump), ct) == -1) { + if (nl_update_conntrack(STATE(dump), ct, 0) == -1) { dlog(LOG_ERR, "cache_wt crt-upd: %s", strerror(errno)); dlog_ct(STATE(log), obj->ct, NFCT_O_PLAIN); } @@ -60,7 +60,7 @@ static void upd_wt(struct cache_object *obj) memcpy(ct, obj->ct, nfct_maxsize()); - if (nl_update_conntrack(STATE(dump), ct) == -1) { + if (nl_update_conntrack(STATE(dump), ct, 0) == -1) { dlog(LOG_ERR, "cache_wt update:%s", strerror(errno)); dlog_ct(STATE(log), obj->ct, NFCT_O_PLAIN); } diff --git a/src/netlink.c b/src/netlink.c index e538aa0..24d61a0 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -197,7 +197,9 @@ int nl_get_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct) return 1; } -int nl_create_conntrack(struct nfct_handle *h, const struct nf_conntrack *orig) +int nl_create_conntrack(struct nfct_handle *h, + const struct nf_conntrack *orig, + int timeout) { int ret; struct nf_conntrack *ct; @@ -206,6 +208,9 @@ int nl_create_conntrack(struct nfct_handle *h, const struct nf_conntrack *orig) if (ct == NULL) return -1; + if (timeout > 0) + nfct_set_attr_u32(ct, ATTR_TIMEOUT, timeout); + /* we hit error if we try to change the expected bit */ if (nfct_attr_is_set(ct, ATTR_STATUS)) { uint32_t status = nfct_get_attr_u32(ct, ATTR_STATUS); @@ -233,7 +238,9 @@ int nl_create_conntrack(struct nfct_handle *h, const struct nf_conntrack *orig) return ret; } -int nl_update_conntrack(struct nfct_handle *h, const struct nf_conntrack *orig) +int nl_update_conntrack(struct nfct_handle *h, + const struct nf_conntrack *orig, + int timeout) { int ret; struct nf_conntrack *ct; @@ -242,6 +249,9 @@ int nl_update_conntrack(struct nfct_handle *h, const struct nf_conntrack *orig) if (ct == NULL) return -1; + if (timeout > 0) + nfct_set_attr_u32(ct, ATTR_TIMEOUT, timeout); + /* unset NAT info, otherwise we hit error */ nfct_attr_unset(ct, ATTR_SNAT_IPV4); nfct_attr_unset(ct, ATTR_DNAT_IPV4); -- cgit v1.2.3 From c3ef4d9b32ca653571f0976f73aaa99218a36db0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 5 Feb 2009 21:28:02 +0100 Subject: netlink: refactorize several nl_init_*_handler() functions This patch removes: * nl_init_dump_handler() * nl_init_request_handler() * nl_init_resync_handler() since they all look very similar. Signed-off-by: Pablo Neira Ayuso --- include/netlink.h | 3 --- src/netlink.c | 36 ------------------------------------ src/run.c | 8 +++++--- 3 files changed, 5 insertions(+), 42 deletions(-) (limited to 'include/netlink.h') diff --git a/include/netlink.h b/include/netlink.h index 9d67165..0df0cbb 100644 --- a/include/netlink.h +++ b/include/netlink.h @@ -7,9 +7,6 @@ struct nf_conntrack; struct nfct_handle; struct nfct_handle *nl_init_event_handler(void); -struct nfct_handle *nl_init_dump_handler(void); -struct nfct_handle *nl_init_request_handler(void); -struct nfct_handle *nl_init_resync_handler(void); struct nlif_handle *nl_init_interface_handler(void); int nl_send_resync(struct nfct_handle *h); diff --git a/src/netlink.c b/src/netlink.c index a9e3d2d..78cc466 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -79,42 +79,6 @@ struct nfct_handle *nl_init_event_handler(void) return h; } -struct nfct_handle *nl_init_dump_handler(void) -{ - struct nfct_handle *h; - - /* open dump netlink socket */ - h = nfct_open(CONNTRACK, 0); - if (h == NULL) - return NULL; - - return h; -} - -struct nfct_handle *nl_init_resync_handler(void) -{ - struct nfct_handle *h; - - h = nfct_open(CONNTRACK, 0); - if (h == NULL) - return NULL; - - fcntl(nfct_fd(h), F_SETFL, O_NONBLOCK); - - return h; -} - -struct nfct_handle *nl_init_request_handler(void) -{ - struct nfct_handle *h; - - h = nfct_open(CONNTRACK, 0); - if (h == NULL) - return NULL; - - return h; -} - struct nlif_handle *nl_init_interface_handler(void) { struct nlif_handle *h; diff --git a/src/run.c b/src/run.c index a483ab3..7d48865 100644 --- a/src/run.c +++ b/src/run.c @@ -33,6 +33,7 @@ #include #include #include +#include void killer(int foo) { @@ -317,7 +318,7 @@ init(void) register_fd(nfct_fd(STATE(event)), STATE(fds)); } - STATE(dump) = nl_init_dump_handler(); + STATE(dump) = nfct_open(CONNTRACK, 0); if (STATE(dump) == NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); @@ -331,7 +332,7 @@ init(void) return -1; } - STATE(resync) = nl_init_resync_handler(); + STATE(resync) = nfct_open(CONNTRACK, 0); if (STATE(resync)== NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); @@ -343,9 +344,10 @@ init(void) STATE(mode)->resync, NULL); register_fd(nfct_fd(STATE(resync)), STATE(fds)); + fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK); /* no callback, it does not do anything with the output */ - STATE(request) = nl_init_request_handler(); + STATE(request) = nfct_open(CONNTRACK, 0); if (STATE(request) == NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); -- cgit v1.2.3 From 79a777c60cfe02197c135adcc4edb2f63ae9a695 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 19 Dec 2011 17:13:25 +0100 Subject: conntrackd: support for expectation synchronization This patch adds support to synchronize expectations between firewalls. This addition aims to re-use as much as possible of the existing infrastructure for stability reasons. The expectation support has been tested with the FTP helper. This extension requires libnetfilter_conntrack 1.0.0. If this is the first time you're playing with conntrackd, I *strongly* recommend you to get working setup of conntrackd without expectation support before as described in the documentation. Then, enabling expectation support is rather easy. To know more about expectations, if you're not familiar with them, I suggest you to read: "Netfilter's Connection Tracking System" http://people.netfilter.org/pablo/docs/login.pdf Reprinted from ;login: The Magazine of USENIX, vol. 31, no. 3 (Berkeley, CA: USENIX Association, 2006, pp40-45.) In short, expectations allow one Linux firewall to filter multi-flow traffic like FTP, SIP and H.323. In my testbed, there are two firewalls in a primary-backup configuration running keepalived. The use a couple of floating cluster IP address (192.168.0.100 and 192.168.1.100) that are used by the client. These firewalls protect one FTP server (192.168.1.2) that will be accessed by one client. In ASCII art, it looks like this: 192.168.0.100 192.168.1.100 eth1 eth2 fw-1 / \ FTP -- client ------ ------ server -- 192.168.0.2 \ / 192.168.1.2 fw-2 This is the rule-set for the firewalls: -A POSTROUTING -t nat -s 192.168.0.2/32 -d 192.168.1.2/32 -j SNAT --to-source 192.168.1.100 -A INPUT -p tcp -m tcp --dport 22 -j ACCEPT -A INPUT -m state --state INVALID -j DROP -A FORWARD -m state --state RELATED -j ACCEPT -A FORWARD -i eth2 -m state --state ESTABLISHED -j ACCEPT -A FORWARD -i eth1 -p tcp -m tcp --dport 21 --tcp-flags FIN,SYN,RST,ACK SYN -m state --state NEW -j ACCEPT -A FORWARD -i eth1 -p tcp -m state --state ESTABLISHED -j ACCEPT -A FORWARD -m state --state INVALID -j LOG --log-prefix "invalid: " The following steps detail how to check that the expectation support works fine for conntrackd: 1) You have to enable the expectation support in the configuration file with the following option: Sync { ... Options { ExpectationSync { ftp sip h323 } } } This enables expectation synchronization for the FTP, SIP and H.323 helpers. You can alternatively use: Sync { ... Options { ExpectationSync On } } To enable expectation synchronization for all helpers. 2) Make sure you have loaded the FTP helper in both firewalls. root@fw1# modprobe nf_conntrack_ftp root@fw2# modprobe nf_conntrack_ftp 3) Switch to the client. Start one FTP control connection to one server that is protected by the firewalls, enter passive mode: (term-1) user@client$ nc 192.168.1.2 21 220 dummy FTP server USER anonymous 331 Please specify the password. PASS nothing 230 Login successful. PASV 227 Entering Passive Mode (192,168,1,2,163,11). This means that port 163*256+11=41739 will be used for the data traffic. Read this if you are not familiar with the FTP protocol: http://www.freefire.org/articles/ftpexample.php 3) Switch to fw-1 (primary) to check that the expectation is in the internal cache. root@fw1# conntrackd -i exp proto=6 src=192.168.0.2 dst=192.168.1.2 sport=0 dport=41739 mask-src=255.255.255.255 mask-dst=255.255.255.255 sport=0 dport=65535 master-src=192.168.0.2 master-dst=192.168.1.2 sport=36390 dport=21 [active since 5s] 4) Switch to fw-2 (backup) to check that the expectation has been successfully replicated. root@fw2# conntrackd -e exp proto=6 src=192.168.0.2 dst=192.168.1.2 sport=0 dport=41739 mask-src=255.255.255.255 mask-dst=255.255.255.255 sport=0 dport=65535 master-src=192.168.0.2 master-dst=192.168.1.2 sport=36390 dport=21 [active since 8s] 5) Make the primary firewall fw-1 fail. Now fw-2 becomes primary. 6) Switch to fw-2 (primary) to commit the external cache into the kernel. root@fw2# conntrackd -c exp The logs should display that the commit was successful: root@fw2# tail -100f /var/log/conntrackd.log [Wed Dec 7 22:16:31 2011] (pid=19195) [notice] committing external cache: expectations [Wed Dec 7 22:16:31 2011] (pid=19195) [notice] Committed 1 new entries [Wed Dec 7 22:16:31 2011] (pid=19195) [notice] commit has taken 0.000366 seconds 7) Switch to the client. Open a new terminal and connect to the port that has been announced by the server: (term-2) user@client$ nc -vvv 192.168.1.2 41739 (UNKNOWN) [192.168.1.2] 41739 (?) open 8) Switch to term-1 and ask for the file listing: [...] 227 Entering Passive Mode (192,168,1,2,163,11). LIST 9) Switch to term-2, it should display the listing. That means everything has worked fine. You may want to try disabling the expectation support and repeating the steps to check that *it does not work* without the state-synchronization. You can also display expectation statistics by means of: root@fwX# conntrackd -s exp This update requires no changes in the primary-backup.sh script that is used by the HA manager to interact with conntrackd. Thus, we provide a backward compatible command line interface. Regarding the Filter clause and expectations, we use the master conntrack to filter expectation events. The filtering is performed in user-space. No kernel-space filtering support for expectations yet (this support should go in libnetfilter_conntrack at some point). This patch also includes support to disable caching and to allow direct injection of expectations. Signed-off-by: Pablo Neira Ayuso --- configure.ac | 2 +- conntrackd.8 | 12 +- doc/sync/alarm/conntrackd.conf | 16 ++ doc/sync/ftfw/conntrackd.conf | 16 ++ doc/sync/notrack/conntrackd.conf | 16 ++ include/cache.h | 4 + include/conntrackd.h | 19 +++ include/external.h | 11 ++ include/filter.h | 7 + include/internal.h | 17 +++ include/log.h | 2 + include/netlink.h | 7 + include/network.h | 40 ++++- src/Makefile.am | 2 +- src/build.c | 99 +++++++++++++ src/cache-ct.c | 6 +- src/cache-exp.c | 308 +++++++++++++++++++++++++++++++++++++++ src/external_cache.c | 85 +++++++++++ src/external_inject.c | 95 +++++++++++- src/filter.c | 76 ++++++++++ src/internal_bypass.c | 146 ++++++++++++++++++- src/internal_cache.c | 173 ++++++++++++++++++++++ src/log.c | 37 +++++ src/main.c | 69 +++++++-- src/netlink.c | 63 +++++++- src/network.c | 5 + src/parse.c | 192 ++++++++++++++++++++++++ src/read_config_lex.l | 1 + src/read_config_yy.y | 50 ++++++- src/run.c | 206 +++++++++++++++++++++++--- src/sync-ftfw.c | 7 +- src/sync-mode.c | 165 ++++++++++++++++++--- src/sync-notrack.c | 6 +- 33 files changed, 1889 insertions(+), 71 deletions(-) create mode 100644 src/cache-exp.c (limited to 'include/netlink.h') diff --git a/configure.ac b/configure.ac index 0481e23..26a7e02 100644 --- a/configure.ac +++ b/configure.ac @@ -52,7 +52,7 @@ else fi PKG_CHECK_MODULES([LIBNFNETLINK], [libnfnetlink >= 1.0.0]) -PKG_CHECK_MODULES([LIBNETFILTER_CONNTRACK], [libnetfilter_conntrack >= 0.9.1]) +PKG_CHECK_MODULES([LIBNETFILTER_CONNTRACK], [libnetfilter_conntrack >= 1.0.0]) AC_CHECK_HEADERS([linux/capability.h],, [AC_MSG_ERROR([Cannot find linux/capabibility.h])]) diff --git a/conntrackd.8 b/conntrackd.8 index 0c9054e..f07ad7a 100644 --- a/conntrackd.8 +++ b/conntrackd.8 @@ -24,10 +24,10 @@ Run conntrackd in daemon mode. .B conntrackd can be used in client mode to request several information and operations to a running daemon .TP -.BI "-i " +.BI "-i "[ct|expect]" Dump the internal cache, i.e. show local states .TP -.BI "-e " +.BI "-e "[ct|expect]" Dump the external cache, i.e. show foreign states .TP .BI "-x " @@ -37,7 +37,7 @@ with "-i" and "-e" parameters. .BI "-f " "[|internal|external]" Flush the internal and/or external cache .TP -.BI "-F " +.BI "-F [ct|expect]" Flush the kernel conntrack table (if you use a Linux kernel >= 2.6.29, this option will not flush your internal and external cache). .TP @@ -48,15 +48,17 @@ ask conntrackd to send the state-entries that it owns to others. .BI "-k " Kill the daemon .TP -.BI "-s " "[|network|cache|runtime|link|rsqueue|process|queue]" +.BI "-s " "[|network|cache|runtime|link|rsqueue|process|queue|ct|expect]" Dump statistics. If no parameter is passed, it displays the general statistics. If "network" is passed as parameter it displays the networking statistics. If "cache" is passed as parameter, it shows the extended cache statistics. If "runtime" is passed as parameter, it shows the run-time statistics. If "process" is passed as parameter, it shows existing child processes (if any). If "queue" is passed as parameter, it shows queue statistics. +If "ct" is passed, it displays the general statistics. +If "expect" is passed as parameter, it shows expectation statistics. .TP -.BI "-R " +.BI "-R " "[ct|expect]" Force a resync against the kernel connection tracking table .TP .BI "-t " diff --git a/doc/sync/alarm/conntrackd.conf b/doc/sync/alarm/conntrackd.conf index d05b499..deed291 100644 --- a/doc/sync/alarm/conntrackd.conf +++ b/doc/sync/alarm/conntrackd.conf @@ -191,6 +191,22 @@ Sync { # This feature requires a Linux kernel >= 2.6.36. # # TCPWindowTracking Off + + # Set this option on if you want to enable the synchronization + # of expectations. You have to specify the list of helpers that + # you want to enable. Default is off. + # + # ExpectationSync { + # ftp + # h323 + # sip + # } + # + # You can use this alternatively: + # + # ExpectationSync On + # + # If you want to synchronize expectations of all helpers. # } } diff --git a/doc/sync/ftfw/conntrackd.conf b/doc/sync/ftfw/conntrackd.conf index c52f214..0304f0f 100644 --- a/doc/sync/ftfw/conntrackd.conf +++ b/doc/sync/ftfw/conntrackd.conf @@ -214,6 +214,22 @@ Sync { # This feature requires a Linux kernel >= 2.6.36. # # TCPWindowTracking Off + + # Set this option on if you want to enable the synchronization + # of expectations. You have to specify the list of helpers that + # you want to enable. Default is off. + # + # ExpectationSync { + # ftp + # h323 + # sip + # } + # + # You can use this alternatively: + # + # ExpectationSync On + # + # If you want to synchronize expectations of all helpers. # } } diff --git a/doc/sync/notrack/conntrackd.conf b/doc/sync/notrack/conntrackd.conf index 4d77266..34e7b32 100644 --- a/doc/sync/notrack/conntrackd.conf +++ b/doc/sync/notrack/conntrackd.conf @@ -253,6 +253,22 @@ Sync { # This feature requires a Linux kernel >= 2.6.36. # # TCPWindowTracking Off + + # Set this option on if you want to enable the synchronization + # of expectations. You have to specify the list of helpers that + # you want to enable. Default is off. + # + # ExpectationSync { + # ftp + # h323 + # sip + # } + # + # You can use this alternatively: + # + # ExpectationSync On + # + # If you want to synchronize expectations of all helpers. # } } diff --git a/include/cache.h b/include/cache.h index abebb97..3af2741 100644 --- a/include/cache.h +++ b/include/cache.h @@ -52,6 +52,7 @@ extern struct cache_feature timer_feature; enum cache_type { CACHE_T_NONE = 0, CACHE_T_CT, + CACHE_T_EXP, CACHE_T_MAX }; @@ -128,6 +129,9 @@ struct cache_ops { extern struct cache_ops cache_sync_internal_ct_ops; extern struct cache_ops cache_sync_external_ct_ops; extern struct cache_ops cache_stats_ct_ops; +/* templates to configure expectation caching. */ +extern struct cache_ops cache_sync_internal_exp_ops; +extern struct cache_ops cache_sync_external_exp_ops; struct nf_conntrack; diff --git a/include/conntrackd.h b/include/conntrackd.h index 697d3d7..8baa088 100644 --- a/include/conntrackd.h +++ b/include/conntrackd.h @@ -37,6 +37,16 @@ #define CT_FLUSH_EXT_CACHE 34 /* flush external cache */ #define STATS_PROCESS 35 /* child process stats */ #define STATS_QUEUE 36 /* queue stats */ +#define EXP_STATS 37 /* dump statistics */ +#define EXP_FLUSH_MASTER 38 /* flush kernel expect table */ +#define EXP_RESYNC_MASTER 39 /* resync with kernel exp table */ +#define EXP_DUMP_INTERNAL 40 /* dump internal expect cache */ +#define EXP_DUMP_EXTERNAL 41 /* dump external expect cache */ +#define EXP_COMMIT 42 /* commit expectations */ +#define ALL_FLUSH_MASTER 43 /* flush all kernel tables */ +#define ALL_RESYNC_MASTER 44 /* resync w/all kernel tables */ +#define ALL_FLUSH_CACHE 45 /* flush all caches */ +#define ALL_COMMIT 46 /* commit all tables */ #define DEFAULT_CONFIGFILE "/etc/conntrackd/conntrackd.conf" #define DEFAULT_LOCKFILE "/var/lock/conntrackd.lock" @@ -56,6 +66,7 @@ #define CTD_SYNC_ALARM (1UL << 3) #define CTD_SYNC_NOTRACK (1UL << 4) #define CTD_POLL (1UL << 5) +#define CTD_EXPECT (1UL << 6) /* FILENAME_MAX is 4096 on my system, perhaps too much? */ #ifndef FILENAME_MAXLEN @@ -105,6 +116,8 @@ struct ct_conf { int tcp_window_tracking; } sync; struct { + int subsys_id; + int groups; int events_reliable; } netlink; struct { @@ -130,6 +143,7 @@ struct ct_general_state { struct local_server local; struct ct_mode *mode; struct ct_filter *us_filter; + struct exp_filter *exp_filter; struct nfct_handle *event; /* event handler */ struct nfct_filter *filter; /* event filter */ @@ -177,6 +191,10 @@ struct ct_general_state { } stats; }; +struct commit_runqueue { + int (*cb)(struct nfct_handle *h, int step); +}; + #define STATE_SYNC(x) state.sync->x struct ct_sync_state { @@ -196,6 +214,7 @@ struct ct_sync_state { struct nfct_handle *h; struct evfd *evfd; int current; + struct commit_runqueue rq[2]; struct { int ok; int fail; diff --git a/include/external.h b/include/external.h index eef0e42..70f0c5c 100644 --- a/include/external.h +++ b/include/external.h @@ -18,6 +18,17 @@ struct external_handler { void (*stats)(int fd); void (*stats_ext)(int fd); } ct; + struct { + void (*new)(struct nf_expect *exp); + void (*upd)(struct nf_expect *exp); + void (*del)(struct nf_expect *exp); + + void (*dump)(int fd, int type); + void (*flush)(void); + int (*commit)(struct nfct_handle *h, int fd); + void (*stats)(int fd); + void (*stats_ext)(int fd); + } exp; }; extern struct external_handler external_cache; diff --git a/include/filter.h b/include/filter.h index f19b18b..3c7c8cc 100644 --- a/include/filter.h +++ b/include/filter.h @@ -52,4 +52,11 @@ void ct_filter_set_logic(struct ct_filter *f, enum ct_filter_logic logic); int ct_filter_conntrack(const struct nf_conntrack *ct, int userspace); +struct exp_filter; +struct nf_expect; + +struct exp_filter *exp_filter_create(void); +int exp_filter_add(struct exp_filter *f, const char *helper_name); +int exp_filter_find(struct exp_filter *f, const struct nf_expect *exp); + #endif diff --git a/include/internal.h b/include/internal.h index f50eb79..2ba9714 100644 --- a/include/internal.h +++ b/include/internal.h @@ -34,6 +34,23 @@ struct internal_handler { void (*stats)(int fd); void (*stats_ext)(int fd); } ct; + struct { + void *data; + + void (*new)(struct nf_expect *exp, int origin_type); + void (*upd)(struct nf_expect *exp, int origin_type); + int (*del)(struct nf_expect *exp, int origin_type); + + void (*dump)(int fd, int type); + void (*populate)(struct nf_expect *exp); + void (*purge)(void); + int (*resync)(enum nf_conntrack_msg_type type, + struct nf_expect *exp, void *data); + void (*flush)(void); + + void (*stats)(int fd); + void (*stats_ext)(int fd); + } exp; }; extern struct internal_handler internal_cache; diff --git a/include/log.h b/include/log.h index f5c5b4f..ae58e79 100644 --- a/include/log.h +++ b/include/log.h @@ -4,10 +4,12 @@ #include struct nf_conntrack; +struct nf_expect; int init_log(void); void dlog(int priority, const char *format, ...); void dlog_ct(FILE *fd, struct nf_conntrack *ct, unsigned int type); +void dlog_exp(FILE *fd, struct nf_expect *exp, unsigned int type); void close_log(void); #endif diff --git a/include/netlink.h b/include/netlink.h index 0df0cbb..3bde30c 100644 --- a/include/netlink.h +++ b/include/netlink.h @@ -30,4 +30,11 @@ static inline int ct_is_related(const struct nf_conntrack *ct) nfct_attr_is_set(ct, ATTR_MASTER_PORT_DST)); } +int nl_create_expect(struct nfct_handle *h, const struct nf_expect *orig, int timeout); +int nl_destroy_expect(struct nfct_handle *h, const struct nf_expect *exp); +int nl_get_expect(struct nfct_handle *h, const struct nf_expect *exp); +int nl_dump_expect_table(struct nfct_handle *h); +int nl_flush_expect_table(struct nfct_handle *h); +int nl_send_expect_resync(struct nfct_handle *h); + #endif diff --git a/include/network.h b/include/network.h index d0531b9..ab95499 100644 --- a/include/network.h +++ b/include/network.h @@ -4,9 +4,10 @@ #include #include -#define CONNTRACKD_PROTOCOL_VERSION 0 +#define CONNTRACKD_PROTOCOL_VERSION 1 struct nf_conntrack; +struct nf_expect; struct nethdr { #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -28,7 +29,10 @@ enum nethdr_type { NET_T_STATE_CT_NEW = 0, NET_T_STATE_CT_UPD, NET_T_STATE_CT_DEL, - NET_T_STATE_MAX = NET_T_STATE_CT_DEL, + NET_T_STATE_EXP_NEW = 3, + NET_T_STATE_EXP_UPD, + NET_T_STATE_EXP_DEL, + NET_T_STATE_MAX = NET_T_STATE_EXP_DEL, NET_T_CTL = 10, }; @@ -92,6 +96,17 @@ enum { __hdr; \ }) +#define BUILD_NETMSG_FROM_EXP(exp, query) \ +({ \ + static char __net[4096]; \ + struct nethdr *__hdr = (struct nethdr *) __net; \ + memset(__hdr, 0, NETHDR_SIZ); \ + nethdr_set(__hdr, query); \ + exp2msg(exp, __hdr); \ + HDR_HOST2NETWORK(__hdr); \ + __hdr; \ +}) + struct mcast_sock_multi; enum { @@ -239,4 +254,25 @@ struct nta_attr_natseqadj { void ct2msg(const struct nf_conntrack *ct, struct nethdr *n); int msg2ct(struct nf_conntrack *ct, struct nethdr *n, size_t remain); +enum nta_exp_attr { + NTA_EXP_MASTER_IPV4 = 0, /* struct nfct_attr_grp_ipv4 */ + NTA_EXP_MASTER_IPV6, /* struct nfct_attr_grp_ipv6 */ + NTA_EXP_MASTER_L4PROTO, /* uint8_t */ + NTA_EXP_MASTER_PORT, /* struct nfct_attr_grp_port */ + NTA_EXP_EXPECT_IPV4 = 4, /* struct nfct_attr_grp_ipv4 */ + NTA_EXP_EXPECT_IPV6, /* struct nfct_attr_grp_ipv6 */ + NTA_EXP_EXPECT_L4PROTO, /* uint8_t */ + NTA_EXP_EXPECT_PORT, /* struct nfct_attr_grp_port */ + NTA_EXP_MASK_IPV4 = 8, /* struct nfct_attr_grp_ipv4 */ + NTA_EXP_MASK_IPV6, /* struct nfct_attr_grp_ipv6 */ + NTA_EXP_MASK_L4PROTO, /* uint8_t */ + NTA_EXP_MASK_PORT, /* struct nfct_attr_grp_port */ + NTA_EXP_TIMEOUT, /* uint32_t */ + NTA_EXP_FLAGS, /* uint32_t */ + NTA_EXP_MAX +}; + +void exp2msg(const struct nf_expect *exp, struct nethdr *n); +int msg2exp(struct nf_expect *exp, struct nethdr *n, size_t remain); + #endif diff --git a/src/Makefile.am b/src/Makefile.am index a0abeee..7d7b2ac 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -12,7 +12,7 @@ conntrack_LDADD = ../extensions/libct_proto_tcp.la ../extensions/libct_proto_udp conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \ local.c log.c mcast.c udp.c netlink.c vector.c \ filter.c fds.c event.c process.c origin.c date.c \ - cache.c cache-ct.c \ + cache.c cache-ct.c cache-exp.c \ cache_timer.c \ sync-mode.c sync-alarm.c sync-ftfw.c sync-notrack.c \ traffic_stats.c stats-mode.c \ diff --git a/src/build.c b/src/build.c index 9c3687c..3193884 100644 --- a/src/build.c +++ b/src/build.c @@ -224,3 +224,102 @@ void ct2msg(const struct nf_conntrack *ct, struct nethdr *n) if (nfct_attr_is_set_array(ct, nat_type, 6)) ct_build_natseqadj(ct, n); } + +static void +exp_build_l4proto_tcp(const struct nf_conntrack *ct, struct nethdr *n, int a) +{ + ct_build_group(ct, ATTR_GRP_ORIG_PORT, n, a, + sizeof(struct nfct_attr_grp_port)); +} + +static void +exp_build_l4proto_sctp(const struct nf_conntrack *ct, struct nethdr *n, int a) +{ + ct_build_group(ct, ATTR_GRP_ORIG_PORT, n, a, + sizeof(struct nfct_attr_grp_port)); +} + +static void +exp_build_l4proto_dccp(const struct nf_conntrack *ct, struct nethdr *n, int a) +{ + ct_build_group(ct, ATTR_GRP_ORIG_PORT, n, a, + sizeof(struct nfct_attr_grp_port)); +} + +static void +exp_build_l4proto_udp(const struct nf_conntrack *ct, struct nethdr *n, int a) +{ + ct_build_group(ct, ATTR_GRP_ORIG_PORT, n, a, + sizeof(struct nfct_attr_grp_port)); +} + +static struct exp_build_l4proto { + void (*build)(const struct nf_conntrack *, struct nethdr *n, int a); +} exp_l4proto_fcn[IPPROTO_MAX] = { + [IPPROTO_TCP] = { .build = exp_build_l4proto_tcp }, + [IPPROTO_SCTP] = { .build = exp_build_l4proto_sctp }, + [IPPROTO_DCCP] = { .build = exp_build_l4proto_dccp }, + [IPPROTO_UDP] = { .build = exp_build_l4proto_udp }, +}; + +static inline void +exp_build_u32(const struct nf_expect *exp, int a, struct nethdr *n, int b) +{ + uint32_t data = nfexp_get_attr_u32(exp, a); + data = htonl(data); + addattr(n, b, &data, sizeof(uint32_t)); +} + +void exp2msg(const struct nf_expect *exp, struct nethdr *n) +{ + const struct nf_conntrack *ct = nfexp_get_attr(exp, ATTR_EXP_MASTER); + uint8_t l4proto = nfct_get_attr_u8(ct, ATTR_L4PROTO); + + /* master conntrack for this expectation. */ + if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV4)) { + ct_build_group(ct, ATTR_GRP_ORIG_IPV4, n, NTA_EXP_MASTER_IPV4, + sizeof(struct nfct_attr_grp_ipv4)); + } else if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV6)) { + ct_build_group(ct, ATTR_GRP_ORIG_IPV6, n, NTA_EXP_MASTER_IPV6, + sizeof(struct nfct_attr_grp_ipv6)); + } + ct_build_u8(ct, ATTR_L4PROTO, n, NTA_EXP_MASTER_L4PROTO); + + if (exp_l4proto_fcn[l4proto].build) + exp_l4proto_fcn[l4proto].build(ct, n, NTA_EXP_MASTER_PORT); + + /* the expectation itself. */ + ct = nfexp_get_attr(exp, ATTR_EXP_EXPECTED); + + if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV4)) { + ct_build_group(ct, ATTR_GRP_ORIG_IPV4, n, NTA_EXP_EXPECT_IPV4, + sizeof(struct nfct_attr_grp_ipv4)); + } else if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV6)) { + ct_build_group(ct, ATTR_GRP_ORIG_IPV6, n, NTA_EXP_EXPECT_IPV6, + sizeof(struct nfct_attr_grp_ipv6)); + } + ct_build_u8(ct, ATTR_L4PROTO, n, NTA_EXP_EXPECT_L4PROTO); + + if (exp_l4proto_fcn[l4proto].build) + exp_l4proto_fcn[l4proto].build(ct, n, NTA_EXP_EXPECT_PORT); + + /* mask for the expectation. */ + ct = nfexp_get_attr(exp, ATTR_EXP_MASK); + + if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV4)) { + ct_build_group(ct, ATTR_GRP_ORIG_IPV4, n, NTA_EXP_MASK_IPV4, + sizeof(struct nfct_attr_grp_ipv4)); + } else if (nfct_attr_grp_is_set(ct, ATTR_GRP_ORIG_IPV6)) { + ct_build_group(ct, ATTR_GRP_ORIG_IPV6, n, NTA_EXP_MASK_IPV6, + sizeof(struct nfct_attr_grp_ipv6)); + } + ct_build_u8(ct, ATTR_L4PROTO, n, NTA_EXP_MASK_L4PROTO); + + if (exp_l4proto_fcn[l4proto].build) + exp_l4proto_fcn[l4proto].build(ct, n, NTA_EXP_MASK_PORT); + + if (!CONFIG(commit_timeout) && nfexp_attr_is_set(exp, ATTR_EXP_TIMEOUT)) + exp_build_u32(exp, ATTR_EXP_TIMEOUT, n, NTA_EXP_TIMEOUT); + + exp_build_u32(exp, ATTR_EXP_FLAGS, n, NTA_EXP_FLAGS); +} diff --git a/src/cache-ct.c b/src/cache-ct.c index 2c6fd4e..0ad8d2a 100644 --- a/src/cache-ct.c +++ b/src/cache-ct.c @@ -251,7 +251,7 @@ static int cache_ct_commit(struct cache *c, struct nfct_handle *h, int clientfd) /* we already have one commit in progress, skip this. The clientfd * descriptor has to be closed by the caller. */ if (clientfd && STATE_SYNC(commit).clientfd != -1) - return 0; + return -1; switch(STATE_SYNC(commit).state) { case COMMIT_STATE_INACTIVE: @@ -308,9 +308,7 @@ static int cache_ct_commit(struct cache *c, struct nfct_handle *h, int clientfd) STATE_SYNC(commit).current = 0; STATE_SYNC(commit).state = COMMIT_STATE_INACTIVE; - /* Close the client socket now that we're done. */ - close(STATE_SYNC(commit).clientfd); - STATE_SYNC(commit).clientfd = -1; + return 0; } return 1; } diff --git a/src/cache-exp.c b/src/cache-exp.c new file mode 100644 index 0000000..e88877a --- /dev/null +++ b/src/cache-exp.c @@ -0,0 +1,308 @@ +/* + * (C) 2006-2011 by Pablo Neira Ayuso + * (C) 2011 by Vyatta Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "cache.h" +#include "hash.h" +#include "log.h" +#include "conntrackd.h" +#include "netlink.h" +#include "event.h" +#include "jhash.h" +#include "network.h" + +#include +#include +#include +#include + +static uint32_t +cache_hash4_exp(const struct nf_conntrack *ct, const struct hashtable *table) +{ + uint32_t a[4] = { + [0] = nfct_get_attr_u32(ct, ATTR_IPV4_SRC), + [1] = nfct_get_attr_u32(ct, ATTR_IPV4_DST), + [2] = nfct_get_attr_u8(ct, ATTR_L3PROTO) << 16 | + nfct_get_attr_u8(ct, ATTR_L4PROTO), + [3] = nfct_get_attr_u16(ct, ATTR_PORT_SRC) << 16 | + nfct_get_attr_u16(ct, ATTR_PORT_DST), + }; + + /* + * Instead of returning hash % table->hashsize (implying a divide) + * we return the high 32 bits of the (hash * table->hashsize) that will + * give results between [0 and hashsize-1] and same hash distribution, + * but using a multiply, less expensive than a divide. See: + * http://www.mail-archive.com/netdev@vger.kernel.org/msg56623.html + */ + return ((uint64_t)jhash2(a, 4, 0) * table->hashsize) >> 32; +} + +static uint32_t +cache_hash6_exp(const struct nf_conntrack *ct, const struct hashtable *table) +{ + uint32_t a[10]; + + memcpy(&a[0], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4); + memcpy(&a[4], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4); + a[8] = nfct_get_attr_u8(ct, ATTR_ORIG_L3PROTO) << 16 | + nfct_get_attr_u8(ct, ATTR_ORIG_L4PROTO); + a[9] = nfct_get_attr_u16(ct, ATTR_ORIG_PORT_SRC) << 16 | + nfct_get_attr_u16(ct, ATTR_ORIG_PORT_DST); + + return ((uint64_t)jhash2(a, 10, 0) * table->hashsize) >> 32; +} + +static uint32_t +cache_exp_hash(const void *data, const struct hashtable *table) +{ + int ret = 0; + const struct nf_expect *exp = data; + const struct nf_conntrack *ct = nfexp_get_attr(exp, ATTR_EXP_MASTER); + + switch(nfct_get_attr_u8(ct, ATTR_L3PROTO)) { + case AF_INET: + ret = cache_hash4_exp(ct, table); + break; + case AF_INET6: + ret = cache_hash6_exp(ct, table); + break; + default: + dlog(LOG_ERR, "unknown layer 3 proto in hash"); + break; + } + return ret; +} + +static int cache_exp_cmp(const void *data1, const void *data2) +{ + const struct cache_object *obj = data1; + const struct nf_expect *exp = data2; + + return nfexp_cmp(obj->ptr, exp, 0); +} + +static void *cache_exp_alloc(void) +{ + return nfexp_new(); +} + +static void cache_exp_free(void *ptr) +{ + nfexp_destroy(ptr); +} + +static void cache_exp_copy(void *dst, void *src, unsigned int flags) +{ + /* XXX: add nfexp_copy(...) to libnetfilter_conntrack. */ + memcpy(dst, src, nfexp_maxsize()); +} + +static int cache_exp_dump_step(void *data1, void *n) +{ + char buf[1024]; + int size; + struct __dump_container *container = data1; + struct cache_object *obj = n; + char *data = obj->data; + unsigned i; + + /* + * XXX: Do not dump the entries that are scheduled to expire. + * These entries talk about already destroyed connections + * that we keep for some time just in case that we have to + * resent some lost messages. We do not show them to the + * user as he may think that the firewall replicas are not + * in sync. The branch below is a hack as it is quite + * specific and it breaks conntrackd modularity. Probably + * there's a nicer way to do this but until I come up with it... + */ + if (CONFIG(flags) & CTD_SYNC_FTFW && obj->status == C_OBJ_DEAD) + return 0; + + /* do not show cached timeout, this may confuse users */ + if (nfexp_attr_is_set(obj->ptr, ATTR_EXP_TIMEOUT)) + nfexp_attr_unset(obj->ptr, ATTR_EXP_TIMEOUT); + + memset(buf, 0, sizeof(buf)); + size = nfexp_snprintf(buf, sizeof(buf),obj->ptr, + NFCT_T_UNKNOWN, container->type, 0); + + for (i = 0; i < obj->cache->num_features; i++) { + if (obj->cache->features[i]->dump) { + size += obj->cache->features[i]->dump(obj, data, + buf+size, + container->type); + data += obj->cache->features[i]->size; + } + } + if (container->type != NFCT_O_XML) { + long tm = time(NULL); + size += sprintf(buf+size, " [active since %lds]", + tm - obj->lifetime); + } + size += sprintf(buf+size, "\n"); + if (send(container->fd, buf, size, 0) == -1) { + if (errno != EPIPE) + return -1; + } + + return 0; +} + +static int cache_exp_commit_step(void *data, void *n) +{ + struct cache_object *obj = n; + struct __commit_container *tmp = data; + int ret, retry = 1, timeout; + struct nf_expect *exp = obj->ptr; + + if (CONFIG(commit_timeout)) { + timeout = CONFIG(commit_timeout); + } else { + timeout = time(NULL) - obj->lastupdate; + if (timeout < 0) { + /* XXX: Arbitrarily set the timer to one minute, how + * can this happen? For example, an adjustment due to + * daylight-saving. Probably other situations can + * trigger this. */ + timeout = 60; + } + /* calculate an estimation of the current timeout */ + timeout = nfexp_get_attr_u32(exp, ATTR_EXP_TIMEOUT) - timeout; + if (timeout < 0) { + timeout = 60; + } + } + +retry: + if (nl_create_expect(tmp->h, exp, timeout) == -1) { + if (errno == EEXIST && retry == 1) { + ret = nl_destroy_expect(tmp->h, exp); + if (ret == 0 || (ret == -1 && errno == ENOENT)) { + if (retry) { + retry = 0; + goto retry; + } + } + dlog(LOG_ERR, "commit-destroy: %s", strerror(errno)); + dlog_exp(STATE(log), exp, NFCT_O_PLAIN); + tmp->c->stats.commit_fail++; + } else { + dlog(LOG_ERR, "commit-create: %s", strerror(errno)); + dlog_exp(STATE(log), exp, NFCT_O_PLAIN); + tmp->c->stats.commit_fail++; + } + } else { + tmp->c->stats.commit_ok++; + } + /* keep iterating even if we have found errors */ + return 0; +} + +static int +cache_exp_commit(struct cache *c, struct nfct_handle *h, int clientfd) +{ + unsigned int commit_ok, commit_fail; + struct timeval commit_stop, res; + struct __commit_container tmp = { + .h = h, + .c = c, + }; + + /* we already have one commit in progress, skip this. The clientfd + * descriptor has to be closed by the caller. */ + if (clientfd && STATE_SYNC(commit).clientfd != -1) + return -1; + + switch(STATE_SYNC(commit).state) { + case COMMIT_STATE_INACTIVE: + gettimeofday(&STATE_SYNC(commit).stats.start, NULL); + STATE_SYNC(commit).stats.ok = c->stats.commit_ok; + STATE_SYNC(commit).stats.fail = c->stats.commit_fail; + STATE_SYNC(commit).clientfd = clientfd; + case COMMIT_STATE_MASTER: + STATE_SYNC(commit).current = + hashtable_iterate_limit(c->h, &tmp, + STATE_SYNC(commit).current, + CONFIG(general).commit_steps, + cache_exp_commit_step); + if (STATE_SYNC(commit).current < CONFIG(hashsize)) { + STATE_SYNC(commit).state = COMMIT_STATE_MASTER; + /* give it another step as soon as possible */ + write_evfd(STATE_SYNC(commit).evfd); + return 1; + } + + /* calculate the time that commit has taken */ + gettimeofday(&commit_stop, NULL); + timersub(&commit_stop, &STATE_SYNC(commit).stats.start, &res); + + /* calculate new entries committed */ + commit_ok = c->stats.commit_ok - STATE_SYNC(commit).stats.ok; + commit_fail = + c->stats.commit_fail - STATE_SYNC(commit).stats.fail; + + /* log results */ + dlog(LOG_NOTICE, "Committed %u new expectations", commit_ok); + + if (commit_fail) + dlog(LOG_NOTICE, "%u expectations can't be " + "committed", commit_fail); + + dlog(LOG_NOTICE, "commit has taken %lu.%06lu seconds", + res.tv_sec, res.tv_usec); + + /* prepare the state machine for new commits */ + STATE_SYNC(commit).current = 0; + STATE_SYNC(commit).state = COMMIT_STATE_INACTIVE; + + return 0; + } + return 1; +} + +static struct nethdr * +cache_exp_build_msg(const struct cache_object *obj, int type) +{ + return BUILD_NETMSG_FROM_EXP(obj->ptr, type); +} + +/* template to cache expectations coming from the kernel. */ +struct cache_ops cache_sync_internal_exp_ops = { + .hash = cache_exp_hash, + .cmp = cache_exp_cmp, + .alloc = cache_exp_alloc, + .free = cache_exp_free, + .copy = cache_exp_copy, + .dump_step = cache_exp_dump_step, + .commit = NULL, + .build_msg = cache_exp_build_msg, +}; + +/* template to cache expectations coming from the network. */ +struct cache_ops cache_sync_external_exp_ops = { + .hash = cache_exp_hash, + .cmp = cache_exp_cmp, + .alloc = cache_exp_alloc, + .free = cache_exp_free, + .copy = cache_exp_copy, + .dump_step = cache_exp_dump_step, + .commit = cache_exp_commit, + .build_msg = NULL, +}; diff --git a/src/external_cache.c b/src/external_cache.c index 3f896a0..e290249 100644 --- a/src/external_cache.c +++ b/src/external_cache.c @@ -26,6 +26,7 @@ #include static struct cache *external; +static struct cache *external_exp; static int external_cache_init(void) { @@ -36,12 +37,21 @@ static int external_cache_init(void) dlog(LOG_ERR, "can't allocate memory for the external cache"); return -1; } + external_exp = cache_create("external", CACHE_T_EXP, + STATE_SYNC(sync)->external_cache_flags, + NULL, &cache_sync_external_exp_ops); + if (external_exp == NULL) { + dlog(LOG_ERR, "can't allocate memory for the external cache"); + return -1; + } + return 0; } static void external_cache_close(void) { cache_destroy(external); + cache_destroy(external_exp); } static void external_cache_ct_new(struct nf_conntrack *ct) @@ -109,6 +119,71 @@ static void external_cache_ct_stats_ext(int fd) cache_stats_extended(external, fd); } +static void external_cache_exp_new(struct nf_expect *exp) +{ + struct cache_object *obj; + int id; + + obj = cache_find(external_exp, exp, &id); + if (obj == NULL) { +retry: + obj = cache_object_new(external_exp, exp); + if (obj == NULL) + return; + + if (cache_add(external_exp, obj, id) == -1) { + cache_object_free(obj); + return; + } + } else { + cache_del(external_exp, obj); + cache_object_free(obj); + goto retry; + } +} + +static void external_cache_exp_upd(struct nf_expect *exp) +{ + cache_update_force(external_exp, exp); +} + +static void external_cache_exp_del(struct nf_expect *exp) +{ + struct cache_object *obj; + int id; + + obj = cache_find(external_exp, exp, &id); + if (obj) { + cache_del(external_exp, obj); + cache_object_free(obj); + } +} + +static void external_cache_exp_dump(int fd, int type) +{ + cache_dump(external_exp, fd, type); +} + +static int external_cache_exp_commit(struct nfct_handle *h, int fd) +{ + return cache_commit(external_exp, h, fd); +} + +static void external_cache_exp_flush(void) +{ + cache_flush(external_exp); +} + +static void external_cache_exp_stats(int fd) +{ + cache_stats(external_exp, fd); +} + +static void external_cache_exp_stats_ext(int fd) +{ + cache_stats_extended(external_exp, fd); +} + struct external_handler external_cache = { .init = external_cache_init, .close = external_cache_close, @@ -122,4 +197,14 @@ struct external_handler external_cache = { .stats = external_cache_ct_stats, .stats_ext = external_cache_ct_stats_ext, }, + .exp = { + .new = external_cache_exp_new, + .upd = external_cache_exp_upd, + .del = external_cache_exp_del, + .dump = external_cache_exp_dump, + .commit = external_cache_exp_commit, + .flush = external_cache_exp_flush, + .stats = external_cache_exp_stats, + .stats_ext = external_cache_exp_stats_ext, + }, }; diff --git a/src/external_inject.c b/src/external_inject.c index ba5f3d1..0ad3478 100644 --- a/src/external_inject.c +++ b/src/external_inject.c @@ -42,7 +42,7 @@ struct { static int external_inject_init(void) { /* handler to directly inject conntracks into kernel-space */ - inject = nfct_open(CONNTRACK, 0); + inject = nfct_open(CONFIG(netlink).subsys_id, 0); if (inject == NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); @@ -175,6 +175,89 @@ static void external_inject_ct_stats(int fd) send(fd, buf, size, 0); } +struct { + uint32_t add_ok; + uint32_t add_fail; + uint32_t upd_ok; + uint32_t upd_fail; + uint32_t del_ok; + uint32_t del_fail; +} exp_external_inject_stat; + +static void external_inject_exp_new(struct nf_expect *exp) +{ + int ret, retry = 1; + +retry: + if (nl_create_expect(inject, exp, 0) == -1) { + /* if the state entry exists, we delete and try again */ + if (errno == EEXIST && retry == 1) { + ret = nl_destroy_expect(inject, exp); + if (ret == 0 || (ret == -1 && errno == ENOENT)) { + if (retry) { + retry = 0; + goto retry; + } + } + exp_external_inject_stat.add_fail++; + dlog(LOG_ERR, "inject-add1: %s", strerror(errno)); + dlog_exp(STATE(log), exp, NFCT_O_PLAIN); + return; + } + exp_external_inject_stat.add_fail++; + dlog(LOG_ERR, "inject-add2: %s", strerror(errno)); + dlog_exp(STATE(log), exp, NFCT_O_PLAIN); + } else { + exp_external_inject_stat.add_ok++; + } +} + +static void external_inject_exp_del(struct nf_expect *exp) +{ + if (nl_destroy_expect(inject, exp) == -1) { + if (errno != ENOENT) { + exp_external_inject_stat.del_fail++; + dlog(LOG_ERR, "inject-del: %s", strerror(errno)); + dlog_exp(STATE(log), exp, NFCT_O_PLAIN); + } + } else { + exp_external_inject_stat.del_ok++; + } +} + +static void external_inject_exp_dump(int fd, int type) +{ +} + +static int external_inject_exp_commit(struct nfct_handle *h, int fd) +{ + /* close the commit socket. */ + return LOCAL_RET_OK; +} + +static void external_inject_exp_flush(void) +{ +} + +static void external_inject_exp_stats(int fd) +{ + char buf[512]; + int size; + + size = sprintf(buf, "external inject:\n" + "connections created:\t\t%12u\tfailed:\t%12u\n" + "connections updated:\t\t%12u\tfailed:\t%12u\n" + "connections destroyed:\t\t%12u\tfailed:\t%12u\n\n", + exp_external_inject_stat.add_ok, + exp_external_inject_stat.add_fail, + exp_external_inject_stat.upd_ok, + exp_external_inject_stat.upd_fail, + exp_external_inject_stat.del_ok, + exp_external_inject_stat.del_fail); + + send(fd, buf, size, 0); +} + struct external_handler external_inject = { .init = external_inject_init, .close = external_inject_close, @@ -188,4 +271,14 @@ struct external_handler external_inject = { .stats = external_inject_ct_stats, .stats_ext = external_inject_ct_stats, }, + .exp = { + .new = external_inject_exp_new, + .upd = external_inject_exp_new, + .del = external_inject_exp_del, + .dump = external_inject_exp_dump, + .commit = external_inject_exp_commit, + .flush = external_inject_exp_flush, + .stats = external_inject_exp_stats, + .stats_ext = external_inject_exp_stats, + }, }; diff --git a/src/filter.c b/src/filter.c index 746a9bb..e8515d6 100644 --- a/src/filter.c +++ b/src/filter.c @@ -405,3 +405,79 @@ int ct_filter_conntrack(const struct nf_conntrack *ct, int userspace) return 0; } + +struct exp_filter { + struct list_head list; +}; + +struct exp_filter *exp_filter_create(void) +{ + struct exp_filter *f; + + f = calloc(1, sizeof(struct exp_filter)); + if (f == NULL) + return NULL; + + INIT_LIST_HEAD(&f->list); + return f; +} + +struct exp_filter_item { + struct list_head head; + char helper_name[NFCT_HELPER_NAME_MAX]; +}; + +/* this is ugly, but it simplifies read_config_yy.y */ +static struct exp_filter *exp_filter_alloc(void) +{ + if (STATE(exp_filter) == NULL) { + STATE(exp_filter) = exp_filter_create(); + if (STATE(exp_filter) == NULL) { + fprintf(stderr, "Can't init expectation filtering!\n"); + return NULL; + } + } + return STATE(exp_filter);; +} + +int exp_filter_add(struct exp_filter *f, const char *helper_name) +{ + struct exp_filter_item *item; + + f = exp_filter_alloc(); + if (f == NULL) + return -1; + + list_for_each_entry(item, &f->list, head) { + if (strncmp(item->helper_name, helper_name, + NFCT_HELPER_NAME_MAX) == 0) { + return -1; + } + } + item = calloc(1, sizeof(struct exp_filter_item)); + if (item == NULL) + return -1; + + strncpy(item->helper_name, helper_name, NFCT_HELPER_NAME_MAX); + list_add(&item->head, &f->list); + return 0; +} + +int exp_filter_find(struct exp_filter *f, const struct nf_expect *exp) +{ + struct exp_filter_item *item; + + if (f == NULL) + return 0; + + list_for_each_entry(item, &f->list, head) { + const char *name = nfexp_get_attr(exp, ATTR_EXP_HELPER_NAME); + + /* we allow partial matching to support things like sip-PORT. */ + if (strncmp(item->helper_name, name, + strlen(item->helper_name)) == 0) { + return 1; + } + } + return 0; +} diff --git a/src/internal_bypass.c b/src/internal_bypass.c index 98717f3..5c83c21 100644 --- a/src/internal_bypass.c +++ b/src/internal_bypass.c @@ -52,7 +52,7 @@ static void internal_bypass_ct_dump(int fd, int type) u_int32_t family = AF_UNSPEC; int ret; - h = nfct_open(CONNTRACK, 0); + h = nfct_open(CONFIG(netlink).subsys_id, 0); if (h == NULL) { dlog(LOG_ERR, "can't allocate memory for the internal cache"); return; @@ -151,6 +151,138 @@ static int internal_bypass_ct_event_del(struct nf_conntrack *ct, int origin) return 1; } +static int +internal_bypass_exp_dump_cb(enum nf_conntrack_msg_type type, + struct nf_expect *exp, void *data) +{ + char buf[1024]; + int size, *fd = data; + const struct nf_conntrack *master = + nfexp_get_attr(exp, ATTR_EXP_MASTER); + + if (!exp_filter_find(STATE(exp_filter), exp)) + return NFCT_CB_CONTINUE; + + if (ct_filter_conntrack(master, 1)) + return NFCT_CB_CONTINUE; + + size = nfexp_snprintf(buf, 1024, exp, + NFCT_T_UNKNOWN, NFCT_O_DEFAULT, 0); + if (size < 1024) { + buf[size] = '\n'; + size++; + } + send(*fd, buf, size, 0); + + return NFCT_CB_CONTINUE; +} + +static void internal_bypass_exp_dump(int fd, int type) +{ + struct nfct_handle *h; + u_int32_t family = AF_UNSPEC; + int ret; + + h = nfct_open(CONFIG(netlink).subsys_id, 0); + if (h == NULL) { + dlog(LOG_ERR, "can't allocate memory for the internal cache"); + return; + } + nfexp_callback_register(h, NFCT_T_ALL, + internal_bypass_exp_dump_cb, &fd); + ret = nfexp_query(h, NFCT_Q_DUMP, &family); + if (ret == -1) { + dlog(LOG_ERR, "can't dump kernel table"); + } + nfct_close(h); +} + +static void internal_bypass_exp_flush(void) +{ + nl_flush_expect_table(STATE(flush)); +} + +struct { + uint32_t new; + uint32_t upd; + uint32_t del; +} exp_internal_bypass_stats; + +static void internal_bypass_exp_stats(int fd) +{ + char buf[512]; + int size; + + size = sprintf(buf, "internal bypass:\n" + "connections new:\t\t%12u\n" + "connections updated:\t\t%12u\n" + "connections destroyed:\t\t%12u\n\n", + exp_internal_bypass_stats.new, + exp_internal_bypass_stats.upd, + exp_internal_bypass_stats.del); + + send(fd, buf, size, 0); +} + +/* unused, INTERNAL_F_POPULATE is unset. No cache, nothing to populate. */ +static void internal_bypass_exp_populate(struct nf_expect *exp) +{ +} + +/* unused, INTERNAL_F_RESYNC is unset. */ +static void internal_bypass_exp_purge(void) +{ +} + +/* unused, INTERNAL_F_RESYNC is unset. Nothing to resync, we have no cache. */ +static int +internal_bypass_exp_resync(enum nf_conntrack_msg_type type, + struct nf_expect *exp, void *data) +{ + return NFCT_CB_CONTINUE; +} + +static void internal_bypass_exp_event_new(struct nf_expect *exp, int origin) +{ + struct nethdr *net; + + /* this event has been triggered by me, skip */ + if (origin != CTD_ORIGIN_NOT_ME) + return; + + net = BUILD_NETMSG_FROM_EXP(exp, NET_T_STATE_EXP_NEW); + multichannel_send(STATE_SYNC(channel), net); + exp_internal_bypass_stats.new++; +} + +static void internal_bypass_exp_event_upd(struct nf_expect *exp, int origin) +{ + struct nethdr *net; + + /* this event has been triggered by me, skip */ + if (origin != CTD_ORIGIN_NOT_ME) + return; + + net = BUILD_NETMSG_FROM_EXP(exp, NET_T_STATE_EXP_UPD); + multichannel_send(STATE_SYNC(channel), net); + exp_internal_bypass_stats.upd++; +} + +static int internal_bypass_exp_event_del(struct nf_expect *exp, int origin) +{ + struct nethdr *net; + + /* this event has been triggered by me, skip */ + if (origin != CTD_ORIGIN_NOT_ME) + return 1; + + net = BUILD_NETMSG_FROM_EXP(exp, NET_T_STATE_EXP_DEL); + multichannel_send(STATE_SYNC(channel), net); + exp_internal_bypass_stats.del++; + + return 1; +} + struct internal_handler internal_bypass = { .init = internal_bypass_init, .close = internal_bypass_close, @@ -166,4 +298,16 @@ struct internal_handler internal_bypass = { .upd = internal_bypass_ct_event_upd, .del = internal_bypass_ct_event_del, }, + .exp = { + .dump = internal_bypass_exp_dump, + .flush = internal_bypass_exp_flush, + .stats = internal_bypass_exp_stats, + .stats_ext = internal_bypass_exp_stats, + .populate = internal_bypass_exp_populate, + .purge = internal_bypass_exp_purge, + .resync = internal_bypass_exp_resync, + .new = internal_bypass_exp_event_new, + .upd = internal_bypass_exp_event_upd, + .del = internal_bypass_exp_event_del, + }, }; diff --git a/src/internal_cache.c b/src/internal_cache.c index 952327d..ba2d74b 100644 --- a/src/internal_cache.c +++ b/src/internal_cache.c @@ -32,12 +32,25 @@ static int internal_cache_init(void) dlog(LOG_ERR, "can't allocate memory for the internal cache"); return -1; } + + STATE(mode)->internal->exp.data = + cache_create("internal", CACHE_T_EXP, + STATE_SYNC(sync)->internal_cache_flags, + STATE_SYNC(sync)->internal_cache_extra, + &cache_sync_internal_exp_ops); + + if (!STATE(mode)->internal->exp.data) { + dlog(LOG_ERR, "can't allocate memory for the internal cache"); + return -1; + } + return 0; } static void internal_cache_close(void) { cache_destroy(STATE(mode)->internal->ct.data); + cache_destroy(STATE(mode)->internal->exp.data); } static void internal_cache_ct_dump(int fd, int type) @@ -203,6 +216,154 @@ static int internal_cache_ct_event_del(struct nf_conntrack *ct, int origin) return 1; } +static void internal_cache_exp_dump(int fd, int type) +{ + cache_dump(STATE(mode)->internal->exp.data, fd, type); +} + +static void internal_cache_exp_flush(void) +{ + cache_flush(STATE(mode)->internal->exp.data); +} + +static void internal_cache_exp_stats(int fd) +{ + cache_stats(STATE(mode)->internal->exp.data, fd); +} + +static void internal_cache_exp_stats_ext(int fd) +{ + cache_stats_extended(STATE(mode)->internal->exp.data, fd); +} + +static void internal_cache_exp_populate(struct nf_expect *exp) +{ + cache_update_force(STATE(mode)->internal->exp.data, exp); +} + +static int internal_cache_exp_purge_step(void *data1, void *data2) +{ + struct cache_object *obj = data2; + + STATE(get_retval) = 0; + nl_get_expect(STATE(get), obj->ptr); /* modifies STATE(get_reval) */ + if (!STATE(get_retval)) { + if (obj->status != C_OBJ_DEAD) { + cache_object_set_status(obj, C_OBJ_DEAD); + sync_send(obj, NET_T_STATE_EXP_DEL); + cache_object_put(obj); + } + } + + return 0; +} + +static void internal_cache_exp_purge(void) +{ + cache_iterate(STATE(mode)->internal->exp.data, NULL, + internal_cache_exp_purge_step); +} + +static int +internal_cache_exp_resync(enum nf_conntrack_msg_type type, + struct nf_expect *exp, void *data) +{ + struct cache_object *obj; + const struct nf_conntrack *master = + nfexp_get_attr(exp, ATTR_EXP_MASTER); + + if (!exp_filter_find(STATE(exp_filter), exp)) + return NFCT_CB_CONTINUE; + + if (ct_filter_conntrack(master, 1)) + return NFCT_CB_CONTINUE; + + obj = cache_update_force(STATE(mode)->internal->exp.data, exp); + if (obj == NULL) + return NFCT_CB_CONTINUE; + + switch (obj->status) { + case C_OBJ_NEW: + sync_send(obj, NET_T_STATE_EXP_NEW); + break; + case C_OBJ_ALIVE: + sync_send(obj, NET_T_STATE_EXP_UPD); + break; + } + return NFCT_CB_CONTINUE; +} + +static void internal_cache_exp_event_new(struct nf_expect *exp, int origin) +{ + struct cache_object *obj; + int id; + + /* this event has been triggered by a direct inject, skip */ + if (origin == CTD_ORIGIN_INJECT) + return; + + obj = cache_find(STATE(mode)->internal->exp.data, exp, &id); + if (obj == NULL) { +retry: + obj = cache_object_new(STATE(mode)->internal->exp.data, exp); + if (obj == NULL) + return; + if (cache_add(STATE(mode)->internal->exp.data, obj, id) == -1) { + cache_object_free(obj); + return; + } + /* only synchronize events that have been triggered by other + * processes or the kernel, but don't propagate events that + * have been triggered by conntrackd itself, eg. commits. */ + if (origin == CTD_ORIGIN_NOT_ME) + sync_send(obj, NET_T_STATE_EXP_NEW); + } else { + cache_del(STATE(mode)->internal->exp.data, obj); + cache_object_free(obj); + goto retry; + } +} + +static void internal_cache_exp_event_upd(struct nf_expect *exp, int origin) +{ + struct cache_object *obj; + + /* this event has been triggered by a direct inject, skip */ + if (origin == CTD_ORIGIN_INJECT) + return; + + obj = cache_update_force(STATE(mode)->internal->exp.data, exp); + if (obj == NULL) + return; + + if (origin == CTD_ORIGIN_NOT_ME) + sync_send(obj, NET_T_STATE_EXP_UPD); +} + +static int internal_cache_exp_event_del(struct nf_expect *exp, int origin) +{ + struct cache_object *obj; + int id; + + /* this event has been triggered by a direct inject, skip */ + if (origin == CTD_ORIGIN_INJECT) + return 0; + + /* we don't synchronize events for objects that are not in the cache */ + obj = cache_find(STATE(mode)->internal->exp.data, exp, &id); + if (obj == NULL) + return 0; + + if (obj->status != C_OBJ_DEAD) { + cache_object_set_status(obj, C_OBJ_DEAD); + if (origin == CTD_ORIGIN_NOT_ME) { + sync_send(obj, NET_T_STATE_EXP_DEL); + } + cache_object_put(obj); + } + return 1; +} + struct internal_handler internal_cache = { .flags = INTERNAL_F_POPULATE | INTERNAL_F_RESYNC, .init = internal_cache_init, @@ -219,4 +380,16 @@ struct internal_handler internal_cache = { .upd = internal_cache_ct_event_upd, .del = internal_cache_ct_event_del, }, + .exp = { + .dump = internal_cache_exp_dump, + .flush = internal_cache_exp_flush, + .stats = internal_cache_exp_stats, + .stats_ext = internal_cache_exp_stats_ext, + .populate = internal_cache_exp_populate, + .purge = internal_cache_exp_purge, + .resync = internal_cache_exp_resync, + .new = internal_cache_exp_event_new, + .upd = internal_cache_exp_event_upd, + .del = internal_cache_exp_event_del, + }, }; diff --git a/src/log.c b/src/log.c index 9fe5119..d4de111 100644 --- a/src/log.c +++ b/src/log.c @@ -145,6 +145,43 @@ void dlog_ct(FILE *fd, struct nf_conntrack *ct, unsigned int type) } } +void dlog_exp(FILE *fd, struct nf_expect *exp, unsigned int type) +{ + time_t t; + char buf[1024]; + char *tmp; + unsigned int flags = 0; + + buf[0]='\0'; + + switch(type) { + case NFCT_O_PLAIN: + t = time(NULL); + ctime_r(&t, buf); + tmp = buf + strlen(buf); + buf[strlen(buf)-1]='\t'; + break; + default: + return; + } + nfexp_snprintf(buf+strlen(buf), 1024-strlen(buf), exp, 0, type, flags); + + if (fd) { + snprintf(buf+strlen(buf), 1024-strlen(buf), "\n"); + fputs(buf, fd); + } + + if (fd == STATE(log)) { + /* error reporting */ + if (CONFIG(syslog_facility) != -1) + syslog(LOG_ERR, "%s", tmp); + } else if (fd == STATE(stats_log)) { + /* connection logging */ + if (CONFIG(stats).syslog_facility != -1) + syslog(LOG_INFO, "%s", tmp); + } +} + void close_log(void) { if (STATE(log) != NULL) diff --git a/src/main.c b/src/main.c index ebfc8b9..342ed45 100644 --- a/src/main.c +++ b/src/main.c @@ -1,5 +1,6 @@ /* - * (C) 2006-2007 by Pablo Neira Ayuso + * (C) 2006-2011 by Pablo Neira Ayuso + * (C) 2011 by Vyatta Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -38,14 +39,15 @@ static const char usage_daemon_commands[] = static const char usage_client_commands[] = "Client mode commands:\n" - " -c, commit external cache to conntrack table\n" + " -c [ct|expect], commit external cache to conntrack table\n" " -f [|internal|external], flush internal and external cache\n" - " -F, flush kernel conntrack table\n" - " -i, display content of the internal cache\n" - " -e, display the content of the external cache\n" + " -F [ct|expect], flush kernel conntrack table\n" + " -i [ct|expect], display content of the internal cache\n" + " -e [ct|expect], display the content of the external cache\n" " -k, kill conntrack daemon\n" - " -s [|network|cache|runtime|link|rsqueue|queue], dump statistics\n" - " -R, resync with kernel conntrack table\n" + " -s [|network|cache|runtime|link|rsqueue|queue|ct|expect], " + "dump statistics\n" + " -R [ct|expect], resync with kernel conntrack table\n" " -n, request resync with other node (only FT-FW and NOTRACK modes)\n" " -x, dump cache in XML format (requires -i or -e)\n" " -t, reset the kernel timeout (see PurgeTimeout clause)\n" @@ -89,6 +91,25 @@ set_operation_mode(int *current, int want, char *argv[]) } } +static int +set_action_by_table(int i, int argc, char *argv[], + int ct_action, int exp_action, int dfl_action, int *action) +{ + if (i+1 < argc && argv[i+1][0] != '-') { + if (strncmp(argv[i+1], "ct", strlen(argv[i+1])) == 0) { + *action = ct_action; + i++; + } else if (strncmp(argv[i+1], "expect", + strlen(argv[i+1])) == 0) { + *action = exp_action; + i++; + } + } else + *action = dfl_action; + + return i; +} + int main(int argc, char *argv[]) { int ret, i, action = -1; @@ -115,15 +136,23 @@ int main(int argc, char *argv[]) break; case 'c': set_operation_mode(&type, REQUEST, argv); - action = CT_COMMIT; + i = set_action_by_table(i, argc, argv, + CT_COMMIT, EXP_COMMIT, + ALL_COMMIT, &action); break; case 'i': set_operation_mode(&type, REQUEST, argv); - action = CT_DUMP_INTERNAL; + i = set_action_by_table(i, argc, argv, + CT_DUMP_INTERNAL, + EXP_DUMP_INTERNAL, + CT_DUMP_INTERNAL, &action); break; case 'e': set_operation_mode(&type, REQUEST, argv); - action = CT_DUMP_EXTERNAL; + i = set_action_by_table(i, argc, argv, + CT_DUMP_EXTERNAL, + EXP_DUMP_EXTERNAL, + CT_DUMP_EXTERNAL, &action); break; case 'C': if (++i < argc) { @@ -142,7 +171,10 @@ int main(int argc, char *argv[]) break; case 'F': set_operation_mode(&type, REQUEST, argv); - action = CT_FLUSH_MASTER; + i = set_action_by_table(i, argc, argv, + CT_FLUSH_MASTER, + EXP_FLUSH_MASTER, + ALL_FLUSH_MASTER, &action); break; case 'f': set_operation_mode(&type, REQUEST, argv); @@ -164,12 +196,15 @@ int main(int argc, char *argv[]) } } else { /* default to general flushing */ - action = CT_FLUSH_CACHE; + action = ALL_FLUSH_CACHE; } break; case 'R': set_operation_mode(&type, REQUEST, argv); - action = CT_RESYNC_MASTER; + i = set_action_by_table(i, argc, argv, + CT_RESYNC_MASTER, + EXP_RESYNC_MASTER, + ALL_RESYNC_MASTER, &action); break; case 'B': set_operation_mode(&type, REQUEST, argv); @@ -222,6 +257,14 @@ int main(int argc, char *argv[]) strlen(argv[i+1])) == 0) { action = STATS_QUEUE; i++; + } else if (strncmp(argv[i+1], "ct", + strlen(argv[i+1])) == 0) { + action = STATS; + i++; + } else if (strncmp(argv[i+1], "expect", + strlen(argv[i+1])) == 0) { + action = EXP_STATS; + i++; } else { fprintf(stderr, "ERROR: unknown " "parameter `%s' for " diff --git a/src/netlink.c b/src/netlink.c index 60274f3..fe979e3 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -1,5 +1,6 @@ /* - * (C) 2006 by Pablo Neira Ayuso + * (C) 2006-2011 by Pablo Neira Ayuso + * (C) 2011 by Vyatta Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -32,7 +33,7 @@ struct nfct_handle *nl_init_event_handler(void) { struct nfct_handle *h; - h = nfct_open(CONNTRACK, NFCT_ALL_CT_GROUPS); + h = nfct_open(CONFIG(netlink).subsys_id, CONFIG(netlink).groups); if (h == NULL) return NULL; @@ -301,3 +302,61 @@ int nl_destroy_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct) { return nfct_query(h, NFCT_Q_DESTROY, ct); } + +int nl_create_expect(struct nfct_handle *h, const struct nf_expect *orig, + int timeout) +{ + int ret; + struct nf_expect *exp; + + exp = nfexp_clone(orig); + if (exp == NULL) + return -1; + + if (timeout > 0) + nfexp_set_attr_u32(exp, ATTR_EXP_TIMEOUT, timeout); + + ret = nfexp_query(h, NFCT_Q_CREATE, exp); + nfexp_destroy(exp); + + return ret; +} + +int nl_destroy_expect(struct nfct_handle *h, const struct nf_expect *exp) +{ + return nfexp_query(h, NFCT_Q_DESTROY, exp); +} + +/* if the handle has no callback, check for existence, otherwise, update */ +int nl_get_expect(struct nfct_handle *h, const struct nf_expect *exp) +{ + int ret = 1; + struct nf_expect *tmp; + + /* XXX: we only need the expectation, not the mask and the master. */ + tmp = nfexp_clone(exp); + if (tmp == NULL) + return -1; + + if (nfexp_query(h, NFCT_Q_GET, tmp) == -1) + ret = (errno == ENOENT) ? 0 : -1; + + nfexp_destroy(tmp); + return ret; +} + +int nl_dump_expect_table(struct nfct_handle *h) +{ + return nfexp_query(h, NFCT_Q_DUMP, &CONFIG(family)); +} + +int nl_flush_expect_table(struct nfct_handle *h) +{ + return nfexp_query(h, NFCT_Q_FLUSH, &CONFIG(family)); +} + +int nl_send_expect_resync(struct nfct_handle *h) +{ + int family = CONFIG(family); + return nfexp_send(h, NFCT_Q_DUMP, &family); +} diff --git a/src/network.c b/src/network.c index cadc466..13db37c 100644 --- a/src/network.c +++ b/src/network.c @@ -126,6 +126,11 @@ static int status2type[CACHE_T_MAX][C_OBJ_MAX] = { [C_OBJ_ALIVE] = NET_T_STATE_CT_UPD, [C_OBJ_DEAD] = NET_T_STATE_CT_DEL, }, + [CACHE_T_EXP] = { + [C_OBJ_NEW] = NET_T_STATE_EXP_NEW, + [C_OBJ_ALIVE] = NET_T_STATE_EXP_UPD, + [C_OBJ_DEAD] = NET_T_STATE_EXP_DEL, + }, }; int object_status_to_network_type(struct cache_object *obj) diff --git a/src/parse.c b/src/parse.c index 0718128..81e9c6b 100644 --- a/src/parse.c +++ b/src/parse.c @@ -248,3 +248,195 @@ int msg2ct(struct nf_conntrack *ct, struct nethdr *net, size_t remain) return 0; } + +static void exp_parse_ct_group(void *ct, int attr, void *data); +static void exp_parse_ct_u8(void *ct, int attr, void *data); +static void exp_parse_u32(void *exp, int attr, void *data); + +static struct exp_parser { + void (*parse)(void *obj, int attr, void *data); + int exp_attr; + int ct_attr; + int size; +} exp_h[NTA_EXP_MAX] = { + [NTA_EXP_MASTER_IPV4] = { + .parse = exp_parse_ct_group, + .exp_attr = ATTR_EXP_MASTER, + .ct_attr = ATTR_GRP_ORIG_IPV4, + .size = NTA_SIZE(sizeof(struct nfct_attr_grp_ipv4)), + }, + [NTA_EXP_MASTER_IPV6] = { + .parse = exp_parse_ct_group, + .exp_attr = ATTR_EXP_MASTER, + .ct_attr = ATTR_GRP_ORIG_IPV6, + .size = NTA_SIZE(sizeof(struct nfct_attr_grp_ipv6)), + }, + [NTA_EXP_MASTER_L4PROTO] = { + .parse = exp_parse_ct_u8, + .exp_attr = ATTR_EXP_MASTER, + .ct_attr = ATTR_L4PROTO, + .size = NTA_SIZE(sizeof(uint8_t)), + }, + [NTA_EXP_MASTER_PORT] = { + .parse = exp_parse_ct_group, + .exp_attr = ATTR_EXP_MASTER, + .ct_attr = ATTR_GRP_ORIG_PORT, + .size = NTA_SIZE(sizeof(struct nfct_attr_grp_port)), + }, + [NTA_EXP_EXPECT_IPV4] = { + .parse = exp_parse_ct_group, + .exp_attr = ATTR_EXP_EXPECTED, + .ct_attr = ATTR_GRP_ORIG_IPV4, + .size = NTA_SIZE(sizeof(struct nfct_attr_grp_ipv4)), + }, + [NTA_EXP_EXPECT_IPV6] = { + .parse = exp_parse_ct_group, + .exp_attr = ATTR_EXP_EXPECTED, + .ct_attr = ATTR_GRP_ORIG_IPV6, + .size = NTA_SIZE(sizeof(struct nfct_attr_grp_ipv6)), + }, + [NTA_EXP_EXPECT_L4PROTO] = { + .parse = exp_parse_ct_u8, + .exp_attr = ATTR_EXP_EXPECTED, + .ct_attr = ATTR_L4PROTO, + .size = NTA_SIZE(sizeof(uint8_t)), + }, + [NTA_EXP_EXPECT_PORT] = { + .parse = exp_parse_ct_group, + .exp_attr = ATTR_EXP_EXPECTED, + .ct_attr = ATTR_GRP_ORIG_PORT, + .size = NTA_SIZE(sizeof(struct nfct_attr_grp_port)), + }, + [NTA_EXP_MASK_IPV4] = { + .parse = exp_parse_ct_group, + .exp_attr = ATTR_EXP_MASK, + .ct_attr = ATTR_GRP_ORIG_IPV4, + .size = NTA_SIZE(sizeof(struct nfct_attr_grp_ipv4)), + }, + [NTA_EXP_MASK_IPV6] = { + .parse = exp_parse_ct_group, + .exp_attr = ATTR_EXP_MASK, + .ct_attr = ATTR_GRP_ORIG_IPV6, + .size = NTA_SIZE(sizeof(struct nfct_attr_grp_ipv6)), + }, + [NTA_EXP_MASK_L4PROTO] = { + .parse = exp_parse_ct_u8, + .exp_attr = ATTR_EXP_MASK, + .ct_attr = ATTR_L4PROTO, + .size = NTA_SIZE(sizeof(uint8_t)), + }, + [NTA_EXP_MASK_PORT] = { + .parse = exp_parse_ct_group, + .exp_attr = ATTR_EXP_MASK, + .ct_attr = ATTR_GRP_ORIG_PORT, + .size = NTA_SIZE(sizeof(struct nfct_attr_grp_port)), + }, + [NTA_EXP_TIMEOUT] = { + .parse = exp_parse_u32, + .exp_attr = ATTR_EXP_TIMEOUT, + .size = NTA_SIZE(sizeof(uint32_t)), + }, + [NTA_EXP_FLAGS] = { + .parse = exp_parse_u32, + .exp_attr = ATTR_EXP_FLAGS, + .size = NTA_SIZE(sizeof(uint32_t)), + }, +}; + +static void exp_parse_ct_group(void *ct, int attr, void *data) +{ + nfct_set_attr_grp(ct, exp_h[attr].ct_attr, data); +} + +static void exp_parse_ct_u8(void *ct, int attr, void *data) +{ + uint8_t *value = (uint8_t *) data; + nfct_set_attr_u8(ct, exp_h[attr].ct_attr, *value); +} + +static void exp_parse_u32(void *exp, int attr, void *data) +{ + uint32_t *value = (uint32_t *) data; + nfexp_set_attr_u32(exp, exp_h[attr].exp_attr, ntohl(*value)); +} + +int msg2exp(struct nf_expect *exp, struct nethdr *net, size_t remain) +{ + int len; + struct netattr *attr; + struct nf_conntrack *master, *expected, *mask; + + if (remain < net->len) + return -1; + + len = net->len - NETHDR_SIZ; + attr = NETHDR_DATA(net); + + master = nfct_new(); + if (master == NULL) + goto err_master; + + expected = nfct_new(); + if (expected == NULL) + goto err_expected; + + mask = nfct_new(); + if (mask == NULL) + goto err_mask; + + while (len > ssizeof(struct netattr)) { + ATTR_NETWORK2HOST(attr); + if (attr->nta_len > len) + goto err; + if (attr->nta_attr > NTA_MAX) + goto err; + if (attr->nta_len != exp_h[attr->nta_attr].size) + goto err; + if (exp_h[attr->nta_attr].parse == NULL) { + attr = NTA_NEXT(attr, len); + continue; + } + switch(exp_h[attr->nta_attr].exp_attr) { + case ATTR_EXP_MASTER: + exp_h[attr->nta_attr].parse(master, attr->nta_attr, + NTA_DATA(attr)); + case ATTR_EXP_EXPECTED: + exp_h[attr->nta_attr].parse(expected, attr->nta_attr, + NTA_DATA(attr)); + case ATTR_EXP_MASK: + exp_h[attr->nta_attr].parse(mask, attr->nta_attr, + NTA_DATA(attr)); + break; + case ATTR_EXP_TIMEOUT: + case ATTR_EXP_FLAGS: + exp_h[attr->nta_attr].parse(exp, attr->nta_attr, + NTA_DATA(attr)); + break; + } + attr = NTA_NEXT(attr, len); + } + + nfexp_set_attr(exp, ATTR_EXP_MASTER, master); + nfexp_set_attr(exp, ATTR_EXP_EXPECTED, expected); + nfexp_set_attr(exp, ATTR_EXP_MASK, mask); + + /* We can release the conntrack objects at this point because the + * setter makes a copy of them. This is not efficient, it would be + * better to save that extra copy but this is how the library works. + * I'm sorry, I cannot change it without breaking backward + * compatibility. Probably it is a good idea to think of adding new + * interfaces in the near future to get it better. */ + nfct_destroy(mask); + nfct_destroy(expected); + nfct_destroy(master); + + return 0; +err: + nfct_destroy(mask); +err_mask: + nfct_destroy(expected); +err_expected: + nfct_destroy(master); +err_master: + return -1; +} diff --git a/src/read_config_lex.l b/src/read_config_lex.l index be6bf8b..01fe4fc 100644 --- a/src/read_config_lex.l +++ b/src/read_config_lex.l @@ -140,6 +140,7 @@ notrack [N|n][O|o][T|t][R|r][A|a][C|c][K|k] "DisableExternalCache" { return T_DISABLE_EXTERNAL_CACHE; } "Options" { return T_OPTIONS; } "TCPWindowTracking" { return T_TCP_WINDOW_TRACKING; } +"ExpectationSync" { return T_EXPECT_SYNC; } "ErrorQueueLength" { return T_ERROR_QUEUE_LENGTH; } {is_on} { return T_ON; } diff --git a/src/read_config_yy.y b/src/read_config_yy.y index 68a83f7..b22784c 100644 --- a/src/read_config_yy.y +++ b/src/read_config_yy.y @@ -73,7 +73,7 @@ static void __max_dedicated_links_reached(void); %token T_NETLINK_OVERRUN_RESYNC T_NICE T_IPV4_DEST_ADDR T_IPV6_DEST_ADDR %token T_SCHEDULER T_TYPE T_PRIO T_NETLINK_EVENTS_RELIABLE %token T_DISABLE_INTERNAL_CACHE T_DISABLE_EXTERNAL_CACHE T_ERROR_QUEUE_LENGTH -%token T_OPTIONS T_TCP_WINDOW_TRACKING +%token T_OPTIONS T_TCP_WINDOW_TRACKING T_EXPECT_SYNC %token T_IP T_PATH_VAL %token T_NUMBER @@ -828,6 +828,46 @@ option: T_TCP_WINDOW_TRACKING T_OFF CONFIG(sync).tcp_window_tracking = 0; }; +option: T_EXPECT_SYNC T_ON +{ + CONFIG(flags) |= CTD_EXPECT; + CONFIG(netlink).subsys_id = NFNL_SUBSYS_NONE; + CONFIG(netlink).groups = NF_NETLINK_CONNTRACK_NEW | + NF_NETLINK_CONNTRACK_UPDATE | + NF_NETLINK_CONNTRACK_DESTROY | + NF_NETLINK_CONNTRACK_EXP_NEW | + NF_NETLINK_CONNTRACK_EXP_UPDATE | + NF_NETLINK_CONNTRACK_EXP_DESTROY; +}; + +option: T_EXPECT_SYNC T_OFF +{ + CONFIG(netlink).subsys_id = NFNL_SUBSYS_CTNETLINK; + CONFIG(netlink).groups = NF_NETLINK_CONNTRACK_NEW | + NF_NETLINK_CONNTRACK_UPDATE | + NF_NETLINK_CONNTRACK_DESTROY; +}; + +option: T_EXPECT_SYNC '{' expect_list '}' +{ + CONFIG(flags) |= CTD_EXPECT; + CONFIG(netlink).subsys_id = NFNL_SUBSYS_NONE; + CONFIG(netlink).groups = NF_NETLINK_CONNTRACK_NEW | + NF_NETLINK_CONNTRACK_UPDATE | + NF_NETLINK_CONNTRACK_DESTROY | + NF_NETLINK_CONNTRACK_EXP_NEW | + NF_NETLINK_CONNTRACK_EXP_UPDATE | + NF_NETLINK_CONNTRACK_EXP_DESTROY; +}; + +expect_list: + | expect_list expect_item ; + +expect_item: T_STRING +{ + exp_filter_add(STATE(exp_filter), $1); +} + sync_mode_alarm: T_SYNC_MODE T_ALARM '{' sync_mode_alarm_list '}' { conf.flags |= CTD_SYNC_ALARM; @@ -1598,6 +1638,7 @@ init_config(char *filename) /* Zero may be a valid facility */ CONFIG(syslog_facility) = -1; CONFIG(stats).syslog_facility = -1; + CONFIG(netlink).subsys_id = -1; yyrestart(fp); yyparse(); @@ -1646,5 +1687,12 @@ init_config(char *filename) if (CONFIG(channelc).error_queue_length == 0) CONFIG(channelc).error_queue_length = 128; + if (CONFIG(netlink).subsys_id == -1) { + CONFIG(netlink).subsys_id = NFNL_SUBSYS_CTNETLINK; + CONFIG(netlink).groups = NF_NETLINK_CONNTRACK_NEW | + NF_NETLINK_CONNTRACK_UPDATE | + NF_NETLINK_CONNTRACK_DESTROY; + } + return 0; } diff --git a/src/run.c b/src/run.c index c21db2e..26c1783 100644 --- a/src/run.c +++ b/src/run.c @@ -187,6 +187,62 @@ static void dump_stats_runtime(int fd) send(fd, buf, size, 0); } +static void local_flush_master(void) +{ + STATE(stats).nl_kernel_table_flush++; + dlog(LOG_NOTICE, "flushing kernel conntrack table"); + + /* fork a child process that performs the flush operation, + * meanwhile the parent process handles events. */ + if (fork_process_new(CTD_PROC_FLUSH, CTD_PROC_F_EXCL, + NULL, NULL) == 0) { + nl_flush_conntrack_table(STATE(flush)); + exit(EXIT_SUCCESS); + } +} + +static void local_resync_master(void) +{ + if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { + STATE(stats).nl_kernel_table_resync++; + dlog(LOG_NOTICE, "resync with master conntrack table"); + nl_dump_conntrack_table(STATE(dump)); + } else { + dlog(LOG_NOTICE, "resync is unsupported in this mode"); + } +} + +static void local_exp_flush_master(void) +{ + if (!(CONFIG(flags) & CTD_EXPECT)) + return; + + STATE(stats).nl_kernel_table_flush++; + dlog(LOG_NOTICE, "flushing kernel expect table"); + + /* fork a child process that performs the flush operation, + * meanwhile the parent process handles events. */ + if (fork_process_new(CTD_PROC_FLUSH, CTD_PROC_F_EXCL, + NULL, NULL) == 0) { + nl_flush_expect_table(STATE(flush)); + exit(EXIT_SUCCESS); + } +} + +static void local_exp_resync_master(void) +{ + if (!(CONFIG(flags) & CTD_EXPECT)) + return; + + if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { + STATE(stats).nl_kernel_table_resync++; + dlog(LOG_NOTICE, "resync with master expect table"); + nl_dump_expect_table(STATE(dump)); + } else { + dlog(LOG_NOTICE, "resync is unsupported in this mode"); + } +} + static int local_handler(int fd, void *data) { int ret = LOCAL_RET_OK; @@ -198,25 +254,24 @@ static int local_handler(int fd, void *data) } switch(type) { case CT_FLUSH_MASTER: - STATE(stats).nl_kernel_table_flush++; - dlog(LOG_NOTICE, "flushing kernel conntrack table"); - - /* fork a child process that performs the flush operation, - * meanwhile the parent process handles events. */ - if (fork_process_new(CTD_PROC_FLUSH, CTD_PROC_F_EXCL, - NULL, NULL) == 0) { - nl_flush_conntrack_table(STATE(flush)); - exit(EXIT_SUCCESS); - } + local_flush_master(); break; case CT_RESYNC_MASTER: - if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { - STATE(stats).nl_kernel_table_resync++; - dlog(LOG_NOTICE, "resync with master table"); - nl_dump_conntrack_table(STATE(dump)); - } else { - dlog(LOG_NOTICE, "resync is unsupported in this mode"); - } + local_resync_master(); + break; + case EXP_FLUSH_MASTER: + local_exp_flush_master(); + break; + case EXP_RESYNC_MASTER: + local_exp_resync_master(); + break; + case ALL_FLUSH_MASTER: + local_flush_master(); + local_exp_flush_master(); + break; + case ALL_RESYNC_MASTER: + local_resync_master(); + local_exp_resync_master(); break; case STATS_RUNTIME: dump_stats_runtime(fd); @@ -245,7 +300,11 @@ static void do_polling_alarm(struct alarm_block *a, void *data) if (STATE(mode)->internal->ct.purge) STATE(mode)->internal->ct.purge(); + if (STATE(mode)->internal->exp.purge) + STATE(mode)->internal->exp.purge(); + nl_send_resync(STATE(resync)); + nl_send_expect_resync(STATE(resync)); add_alarm(&STATE(polling_alarm), CONFIG(poll_kernel_secs), 0); } @@ -290,6 +349,49 @@ out: return NFCT_CB_CONTINUE; } +static int exp_event_handler(const struct nlmsghdr *nlh, + enum nf_conntrack_msg_type type, + struct nf_expect *exp, + void *data) +{ + int origin_type; + const struct nf_conntrack *master = + nfexp_get_attr(exp, ATTR_EXP_MASTER); + + STATE(stats).nl_events_received++; + + if (!exp_filter_find(STATE(exp_filter), exp)) { + STATE(stats).nl_events_filtered++; + goto out; + } + if (ct_filter_conntrack(master, 1)) + return NFCT_CB_CONTINUE; + + origin_type = origin_find(nlh); + + switch(type) { + case NFCT_T_NEW: + STATE(mode)->internal->exp.new(exp, origin_type); + break; + case NFCT_T_UPDATE: + STATE(mode)->internal->exp.upd(exp, origin_type); + break; + case NFCT_T_DESTROY: + STATE(mode)->internal->exp.del(exp, origin_type); + break; + default: + STATE(stats).nl_events_unknown_type++; + break; + } + +out: + /* we reset the iteration limiter in the main select loop. */ + if (STATE(event_iterations_limit)-- <= 0) + return NFCT_CB_STOP; + else + return NFCT_CB_CONTINUE; +} + static int dump_handler(enum nf_conntrack_msg_type type, struct nf_conntrack *ct, void *data) @@ -308,6 +410,29 @@ static int dump_handler(enum nf_conntrack_msg_type type, return NFCT_CB_CONTINUE; } +static int exp_dump_handler(enum nf_conntrack_msg_type type, + struct nf_expect *exp, void *data) +{ + const struct nf_conntrack *master = + nfexp_get_attr(exp, ATTR_EXP_MASTER); + + if (!exp_filter_find(STATE(exp_filter), exp)) + return NFCT_CB_CONTINUE; + + if (ct_filter_conntrack(master, 1)) + return NFCT_CB_CONTINUE; + + switch(type) { + case NFCT_T_UPDATE: + STATE(mode)->internal->exp.populate(exp); + break; + default: + STATE(stats).nl_dump_unknown_type++; + break; + } + return NFCT_CB_CONTINUE; +} + static int get_handler(enum nf_conntrack_msg_type type, struct nf_conntrack *ct, void *data) @@ -319,6 +444,22 @@ static int get_handler(enum nf_conntrack_msg_type type, return NFCT_CB_CONTINUE; } +static int exp_get_handler(enum nf_conntrack_msg_type type, + struct nf_expect *exp, void *data) +{ + const struct nf_conntrack *master = + nfexp_get_attr(exp, ATTR_EXP_MASTER); + + if (!exp_filter_find(STATE(exp_filter), exp)) + return NFCT_CB_CONTINUE; + + if (ct_filter_conntrack(master, 1)) + return NFCT_CB_CONTINUE; + + STATE(get_retval) = 1; + return NFCT_CB_CONTINUE; +} + int init(void) { @@ -355,7 +496,7 @@ init(void) register_fd(STATE(local).fd, STATE(fds)); /* resynchronize (like 'dump' socket) but it also purges old entries */ - STATE(resync) = nfct_open(CONNTRACK, 0); + STATE(resync) = nfct_open(CONFIG(netlink).subsys_id, 0); if (STATE(resync)== NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); @@ -370,7 +511,7 @@ init(void) fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK); if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { - STATE(dump) = nfct_open(CONNTRACK, 0); + STATE(dump) = nfct_open(CONFIG(netlink).subsys_id, 0); if (STATE(dump) == NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); @@ -380,13 +521,26 @@ init(void) nfct_callback_register(STATE(dump), NFCT_T_ALL, dump_handler, NULL); + if (CONFIG(flags) & CTD_EXPECT) { + nfexp_callback_register(STATE(dump), NFCT_T_ALL, + exp_dump_handler, NULL); + } + if (nl_dump_conntrack_table(STATE(dump)) == -1) { dlog(LOG_ERR, "can't get kernel conntrack table"); return -1; } + + if (CONFIG(flags) & CTD_EXPECT) { + if (nl_dump_expect_table(STATE(dump)) == -1) { + dlog(LOG_ERR, "can't get kernel " + "expect table"); + return -1; + } + } } - STATE(get) = nfct_open(CONNTRACK, 0); + STATE(get) = nfct_open(CONFIG(netlink).subsys_id, 0); if (STATE(get) == NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); @@ -395,7 +549,12 @@ init(void) } nfct_callback_register(STATE(get), NFCT_T_ALL, get_handler, NULL); - STATE(flush) = nfct_open(CONNTRACK, 0); + if (CONFIG(flags) & CTD_EXPECT) { + nfexp_callback_register(STATE(get), NFCT_T_ALL, + exp_get_handler, NULL); + } + + STATE(flush) = nfct_open(CONFIG(netlink).subsys_id, 0); if (STATE(flush) == NULL) { dlog(LOG_ERR, "cannot open flusher handler"); return -1; @@ -426,6 +585,11 @@ init(void) } nfct_callback_register2(STATE(event), NFCT_T_ALL, event_handler, NULL); + + if (CONFIG(flags) & CTD_EXPECT) { + nfexp_callback_register2(STATE(event), NFCT_T_ALL, + exp_event_handler, NULL); + } register_fd(nfct_fd(STATE(event)), STATE(fds)); } diff --git a/src/sync-ftfw.c b/src/sync-ftfw.c index fa76c0c..1bc2d9f 100644 --- a/src/sync-ftfw.c +++ b/src/sync-ftfw.c @@ -231,6 +231,8 @@ static int ftfw_local(int fd, int type, void *data) dlog(LOG_NOTICE, "sending bulk update"); cache_iterate(STATE(mode)->internal->ct.data, NULL, do_cache_to_tx); + cache_iterate(STATE(mode)->internal->exp.data, + NULL, do_cache_to_tx); break; case STATS_RSQUEUE: ftfw_local_queue(fd); @@ -350,7 +352,10 @@ static int digest_msg(const struct nethdr *net) } else if (IS_RESYNC(net)) { dp("RESYNC ALL\n"); - cache_iterate(STATE(mode)->internal->ct.data, NULL, do_cache_to_tx); + cache_iterate(STATE(mode)->internal->ct.data, NULL, + do_cache_to_tx); + cache_iterate(STATE(mode)->internal->exp.data, NULL, + do_cache_to_tx); return MSG_CTL; } else if (IS_ALIVE(net)) diff --git a/src/sync-mode.c b/src/sync-mode.c index fa522c7..2505631 100644 --- a/src/sync-mode.c +++ b/src/sync-mode.c @@ -59,10 +59,29 @@ static struct nf_conntrack *msg2ct_alloc(struct nethdr *net, size_t remain) return ct; } +static struct nf_expect *msg2exp_alloc(struct nethdr *net, size_t remain) +{ + struct nf_expect *exp; + + /* TODO: add stats on ENOMEM errors in the future. */ + exp = nfexp_new(); + if (exp == NULL) + return NULL; + + if (msg2exp(exp, net, remain) == -1) { + STATE_SYNC(error).msg_rcv_malformed++; + STATE_SYNC(error).msg_rcv_bad_payload++; + nfexp_destroy(exp); + return NULL; + } + return exp; +} + static void do_channel_handler_step(int i, struct nethdr *net, size_t remain) { - struct nf_conntrack *ct; + struct nf_conntrack *ct = NULL; + struct nf_expect *exp = NULL; if (net->version != CONNTRACKD_PROTOCOL_VERSION) { STATE_SYNC(error).msg_rcv_malformed++; @@ -112,12 +131,33 @@ do_channel_handler_step(int i, struct nethdr *net, size_t remain) return; STATE_SYNC(external)->ct.del(ct); break; + case NET_T_STATE_EXP_NEW: + exp = msg2exp_alloc(net, remain); + if (exp == NULL) + return; + STATE_SYNC(external)->exp.new(exp); + break; + case NET_T_STATE_EXP_UPD: + exp = msg2exp_alloc(net, remain); + if (exp == NULL) + return; + STATE_SYNC(external)->exp.upd(exp); + break; + case NET_T_STATE_EXP_DEL: + exp = msg2exp_alloc(net, remain); + if (exp == NULL) + return; + STATE_SYNC(external)->exp.del(exp); + break; default: STATE_SYNC(error).msg_rcv_malformed++; STATE_SYNC(error).msg_rcv_bad_type++; break; } - nfct_destroy(ct); + if (ct != NULL) + nfct_destroy(ct); + if (exp != NULL) + nfexp_destroy(exp); } static char __net[65536]; /* XXX: maximum MTU for IPv4 */ @@ -351,7 +391,7 @@ static int init_sync(void) STATE(fds)) == -1) return -1; - STATE_SYNC(commit).h = nfct_open(CONNTRACK, 0); + STATE_SYNC(commit).h = nfct_open(CONFIG(netlink).subsys_id, 0); if (STATE_SYNC(commit).h == NULL) { dlog(LOG_ERR, "can't create handler to commit"); return -1; @@ -402,8 +442,30 @@ static void run_sync(fd_set *readfds) interface_handler(); if (FD_ISSET(get_read_evfd(STATE_SYNC(commit).evfd), readfds)) { + int ret; + read_evfd(STATE_SYNC(commit).evfd); - STATE_SYNC(external)->ct.commit(STATE_SYNC(commit).h, 0); + + ret = STATE_SYNC(commit).rq[0].cb(STATE_SYNC(commit).h, 0); + if (ret == 0) { + /* we still have things in the callback queue. */ + if (STATE_SYNC(commit).rq[1].cb) { + int fd = STATE_SYNC(commit).clientfd; + + STATE_SYNC(commit).rq[0].cb = + STATE_SYNC(commit).rq[1].cb; + + STATE_SYNC(commit).rq[1].cb = NULL; + + STATE_SYNC(commit).clientfd = -1; + STATE_SYNC(commit).rq[0].cb( + STATE_SYNC(commit).h, fd); + } else { + /* Close the client socket now, we're done. */ + close(STATE_SYNC(commit).clientfd); + STATE_SYNC(commit).clientfd = -1; + } + } } /* flush pending messages */ @@ -480,6 +542,27 @@ static void dump_stats_sync_extended(int fd) send(fd, buf, size, 0); } +static int local_commit(int fd) +{ + int ret; + + /* delete the reset alarm if any before committing */ + del_alarm(&STATE_SYNC(reset_cache_alarm)); + + ret = STATE_SYNC(commit).rq[0].cb(STATE_SYNC(commit).h, fd); + if (ret == -1) { + dlog(LOG_NOTICE, "commit already in progress, skipping"); + ret = LOCAL_RET_OK; + } else if (ret == 0) { + /* we've finished the commit. */ + ret = LOCAL_RET_OK; + } else { + /* Keep open the client, we want synchronous commit. */ + ret = LOCAL_RET_STOLEN; + } + return ret; +} + /* handler for requests coming via UNIX socket */ static int local_handler_sync(int fd, int type, void *data) { @@ -511,19 +594,10 @@ static int local_handler_sync(int fd, int type, void *data) } break; case CT_COMMIT: - /* delete the reset alarm if any before committing */ - del_alarm(&STATE_SYNC(reset_cache_alarm)); - - dlog(LOG_NOTICE, "committing external cache"); - ret = STATE_SYNC(external)->ct.commit(STATE_SYNC(commit).h, fd); - if (ret == 0) { - dlog(LOG_NOTICE, "commit already in progress, " - "skipping"); - ret = LOCAL_RET_OK; - } else { - /* Keep open the client, we want synchronous commit. */ - ret = LOCAL_RET_STOLEN; - } + dlog(LOG_NOTICE, "committing conntrack cache"); + STATE_SYNC(commit).rq[0].cb = STATE_SYNC(external)->ct.commit; + STATE_SYNC(commit).rq[1].cb = NULL; + ret = local_commit(fd); break; case RESET_TIMERS: if (!alarm_pending(&STATE_SYNC(reset_cache_alarm))) { @@ -575,6 +649,63 @@ static int local_handler_sync(int fd, int type, void *data) case STATS_QUEUE: queue_stats_show(fd); break; + case EXP_STATS: + if (!(CONFIG(flags) & CTD_EXPECT)) + break; + + STATE(mode)->internal->exp.stats(fd); + STATE_SYNC(external)->exp.stats(fd); + dump_traffic_stats(fd); + multichannel_stats(STATE_SYNC(channel), fd); + dump_stats_sync(fd); + break; + case EXP_DUMP_INTERNAL: + if (!(CONFIG(flags) & CTD_EXPECT)) + break; + + if (fork_process_new(CTD_PROC_ANY, 0, NULL, NULL) == 0) { + STATE(mode)->internal->exp.dump(fd, NFCT_O_PLAIN); + exit(EXIT_SUCCESS); + } + break; + case EXP_DUMP_EXTERNAL: + if (!(CONFIG(flags) & CTD_EXPECT)) + break; + + if (fork_process_new(CTD_PROC_ANY, 0, NULL, NULL) == 0) { + STATE_SYNC(external)->exp.dump(fd, NFCT_O_PLAIN); + exit(EXIT_SUCCESS); + } + break; + case EXP_COMMIT: + if (!(CONFIG(flags) & CTD_EXPECT)) + break; + + dlog(LOG_NOTICE, "committing expectation cache"); + STATE_SYNC(commit).rq[0].cb = STATE_SYNC(external)->exp.commit; + STATE_SYNC(commit).rq[1].cb = NULL; + local_commit(fd); + break; + case ALL_FLUSH_CACHE: + dlog(LOG_NOTICE, "flushing caches"); + STATE(mode)->internal->ct.flush(); + STATE_SYNC(external)->ct.flush(); + if (CONFIG(flags) & CTD_EXPECT) { + STATE(mode)->internal->exp.flush(); + STATE_SYNC(external)->exp.flush(); + } + break; + case ALL_COMMIT: + dlog(LOG_NOTICE, "committing all external caches"); + STATE_SYNC(commit).rq[0].cb = STATE_SYNC(external)->ct.commit; + if (CONFIG(flags) & CTD_EXPECT) { + STATE_SYNC(commit).rq[1].cb = + STATE_SYNC(external)->exp.commit; + } else { + STATE_SYNC(commit).rq[1].cb = NULL; + } + local_commit(fd); + break; default: if (STATE_SYNC(sync)->local) ret = STATE_SYNC(sync)->local(fd, type, data); diff --git a/src/sync-notrack.c b/src/sync-notrack.c index 06ad1f0..a7df4e7 100644 --- a/src/sync-notrack.c +++ b/src/sync-notrack.c @@ -102,7 +102,7 @@ static void kernel_resync(void) u_int32_t family = AF_UNSPEC; int ret; - h = nfct_open(CONNTRACK, 0); + h = nfct_open(CONFIG(netlink).subsys_id, 0); if (h == NULL) { dlog(LOG_ERR, "can't allocate memory for the internal cache"); return; @@ -131,6 +131,8 @@ static int notrack_local(int fd, int type, void *data) } else { cache_iterate(STATE(mode)->internal->ct.data, NULL, do_cache_to_tx); + cache_iterate(STATE(mode)->internal->exp.data, + NULL, do_cache_to_tx); } break; default: @@ -152,6 +154,8 @@ static int digest_msg(const struct nethdr *net) } else { cache_iterate(STATE(mode)->internal->ct.data, NULL, do_cache_to_tx); + cache_iterate(STATE(mode)->internal->exp.data, + NULL, do_cache_to_tx); } return MSG_CTL; } -- cgit v1.2.3