diff options
author | Alexander Wirt <formorer@debian.org> | 2012-06-03 08:49:55 +0200 |
---|---|---|
committer | Alexander Wirt <formorer@debian.org> | 2012-06-03 08:49:55 +0200 |
commit | ea27bb406e3d8fe9466ba274af38e6f540ff5bfc (patch) | |
tree | 9f0c78416f8b617d6af715800ce22815645ee8ec /src/netlink.c | |
parent | ed902b39d4f4aa2fc8130441d25b849a69b75c15 (diff) | |
download | conntrack-tools-ea27bb406e3d8fe9466ba274af38e6f540ff5bfc.tar.gz conntrack-tools-ea27bb406e3d8fe9466ba274af38e6f540ff5bfc.zip |
Imported Upstream version 1.2.1
Diffstat (limited to 'src/netlink.c')
-rw-r--r-- | src/netlink.c | 362 |
1 files changed, 362 insertions, 0 deletions
diff --git a/src/netlink.c b/src/netlink.c new file mode 100644 index 0000000..fe979e3 --- /dev/null +++ b/src/netlink.c @@ -0,0 +1,362 @@ +/* + * (C) 2006-2011 by Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2011 by Vyatta Inc. <http://www.vyatta.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "netlink.h" +#include "conntrackd.h" +#include "filter.h" +#include "log.h" + +#include <string.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/fcntl.h> +#include <libnetfilter_conntrack/libnetfilter_conntrack_tcp.h> + +struct nfct_handle *nl_init_event_handler(void) +{ + struct nfct_handle *h; + + h = nfct_open(CONFIG(netlink).subsys_id, CONFIG(netlink).groups); + if (h == NULL) + return NULL; + + if (CONFIG(netlink).events_reliable) { + int on = 1; + + setsockopt(nfct_fd(h), SOL_NETLINK, + NETLINK_BROADCAST_SEND_ERROR, &on, sizeof(int)); + + setsockopt(nfct_fd(h), SOL_NETLINK, + NETLINK_NO_ENOBUFS, &on, sizeof(int)); + + dlog(LOG_NOTICE, "reliable ctnetlink event delivery " + "is ENABLED."); + } + + if (STATE(filter)) { + if (CONFIG(filter_from_kernelspace)) { + if (nfct_filter_attach(nfct_fd(h), + STATE(filter)) == -1) { + dlog(LOG_ERR, "cannot set event filtering: %s", + strerror(errno)); + } + dlog(LOG_NOTICE, "using kernel-space event filtering"); + } else + dlog(LOG_NOTICE, "using user-space event filtering"); + + nfct_filter_destroy(STATE(filter)); + } + + fcntl(nfct_fd(h), F_SETFL, O_NONBLOCK); + + /* set up socket buffer size */ + if (CONFIG(netlink_buffer_size) && + CONFIG(netlink_buffer_size) <= + CONFIG(netlink_buffer_size_max_grown)) { + /* we divide netlink_buffer_size by 2 here since value passed + to kernel gets doubled in SO_RCVBUF; see net/core/sock.c */ + CONFIG(netlink_buffer_size) = + nfnl_rcvbufsiz(nfct_nfnlh(h), CONFIG(netlink_buffer_size)/2); + } else { + dlog(LOG_NOTICE, "NetlinkBufferSize is either not set or " + "is greater than NetlinkBufferSizeMaxGrowth. " + "Using current system buffer size"); + + socklen_t socklen = sizeof(unsigned int); + unsigned int read_size; + + /* get current buffer size */ + getsockopt(nfct_fd(h), SOL_SOCKET, + SO_RCVBUF, &read_size, &socklen); + + CONFIG(netlink_buffer_size) = read_size; + } + + dlog(LOG_NOTICE, "netlink event socket buffer size has been set " + "to %u bytes", CONFIG(netlink_buffer_size)); + + return h; +} + +struct nlif_handle *nl_init_interface_handler(void) +{ + struct nlif_handle *h; + h = nlif_open(); + if (h == NULL) + return NULL; + + if (nlif_query(h) == -1) { + free(h); + return NULL; + } + fcntl(nlif_fd(h), F_SETFL, O_NONBLOCK); + + return h; +} + +static int warned = 0; + +void nl_resize_socket_buffer(struct nfct_handle *h) +{ + unsigned int s = CONFIG(netlink_buffer_size); + + /* already warned that we have reached the maximum buffer size */ + if (warned) + return; + + /* since sock_setsockopt in net/core/sock.c doubles the size of socket + buffer passed to it using nfnl_rcvbufsiz, only call nfnl_rcvbufsiz + if new value is not greater than netlink_buffer_size_max_grown */ + if (s*2 > CONFIG(netlink_buffer_size_max_grown)) { + dlog(LOG_WARNING, + "netlink event socket buffer size cannot " + "be doubled further since it will exceed " + "NetlinkBufferSizeMaxGrowth. We are likely to " + "be losing events, this may lead to " + "unsynchronized replicas. Please, consider " + "increasing netlink socket buffer size via " + "NetlinkBufferSize and " + "NetlinkBufferSizeMaxGrowth clauses in " + "conntrackd.conf"); + warned = 1; + return; + } + + CONFIG(netlink_buffer_size) = nfnl_rcvbufsiz(nfct_nfnlh(h), s); + + /* notify the sysadmin */ + dlog(LOG_NOTICE, "netlink event socket buffer size has been doubled " + "to %u bytes", CONFIG(netlink_buffer_size)); +} + +int nl_dump_conntrack_table(struct nfct_handle *h) +{ + return nfct_query(h, NFCT_Q_DUMP, &CONFIG(family)); +} + +int nl_flush_conntrack_table(struct nfct_handle *h) +{ + return nfct_query(h, NFCT_Q_FLUSH, &CONFIG(family)); +} + +int nl_send_resync(struct nfct_handle *h) +{ + int family = CONFIG(family); + return nfct_send(h, NFCT_Q_DUMP, &family); +} + +/* if the handle has no callback, check for existence, otherwise, update */ +int nl_get_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct) +{ + int ret = 1; + struct nf_conntrack *tmp; + + tmp = nfct_new(); + if (tmp == NULL) + return -1; + + /* use the original tuple to check if it is there */ + nfct_copy(tmp, ct, NFCT_CP_ORIG); + + if (nfct_query(h, NFCT_Q_GET, tmp) == -1) + ret = (errno == ENOENT) ? 0 : -1; + + nfct_destroy(tmp); + return ret; +} + +int nl_create_conntrack(struct nfct_handle *h, + const struct nf_conntrack *orig, + int timeout) +{ + int ret; + struct nf_conntrack *ct; + + ct = nfct_clone(orig); + if (ct == NULL) + return -1; + + if (timeout > 0) + nfct_set_attr_u32(ct, ATTR_TIMEOUT, timeout); + + /* we hit error if we try to change the expected bit */ + if (nfct_attr_is_set(ct, ATTR_STATUS)) { + uint32_t status = nfct_get_attr_u32(ct, ATTR_STATUS); + status &= ~IPS_EXPECTED; + nfct_set_attr_u32(ct, ATTR_STATUS, status); + } + + nfct_setobjopt(ct, NFCT_SOPT_SETUP_REPLY); + + /* disable TCP window tracking for recovered connections if required */ + if (nfct_attr_is_set(ct, ATTR_TCP_STATE)) { + uint8_t flags = IP_CT_TCP_FLAG_SACK_PERM; + + if (!CONFIG(sync).tcp_window_tracking) + flags |= IP_CT_TCP_FLAG_BE_LIBERAL; + else + flags |= IP_CT_TCP_FLAG_WINDOW_SCALE; + + /* FIXME: workaround, we should send TCP flags in updates */ + if (nfct_get_attr_u8(ct, ATTR_TCP_STATE) >= + TCP_CONNTRACK_TIME_WAIT) { + flags |= IP_CT_TCP_FLAG_CLOSE_INIT; + } + nfct_set_attr_u8(ct, ATTR_TCP_FLAGS_ORIG, flags); + nfct_set_attr_u8(ct, ATTR_TCP_MASK_ORIG, flags); + nfct_set_attr_u8(ct, ATTR_TCP_FLAGS_REPL, flags); + nfct_set_attr_u8(ct, ATTR_TCP_MASK_REPL, flags); + } + + ret = nfct_query(h, NFCT_Q_CREATE, ct); + nfct_destroy(ct); + + return ret; +} + +int nl_update_conntrack(struct nfct_handle *h, + const struct nf_conntrack *orig, + int timeout) +{ + int ret; + struct nf_conntrack *ct; + + ct = nfct_clone(orig); + if (ct == NULL) + return -1; + + if (timeout > 0) + nfct_set_attr_u32(ct, ATTR_TIMEOUT, timeout); + + /* unset NAT info, otherwise we hit error */ + nfct_attr_unset(ct, ATTR_SNAT_IPV4); + nfct_attr_unset(ct, ATTR_DNAT_IPV4); + nfct_attr_unset(ct, ATTR_SNAT_PORT); + nfct_attr_unset(ct, ATTR_DNAT_PORT); + + if (nfct_attr_is_set(ct, ATTR_STATUS)) { + uint32_t status = nfct_get_attr_u32(ct, ATTR_STATUS); + status &= ~IPS_NAT_MASK; + nfct_set_attr_u32(ct, ATTR_STATUS, status); + } + /* we have to unset the helper to avoid EBUSY in reset timers */ + if (nfct_attr_is_set(ct, ATTR_HELPER_NAME)) + nfct_attr_unset(ct, ATTR_HELPER_NAME); + + /* we hit error if we try to update the master conntrack */ + if (ct_is_related(ct)) { + nfct_attr_unset(ct, ATTR_MASTER_L3PROTO); + nfct_attr_unset(ct, ATTR_MASTER_L4PROTO); + nfct_attr_unset(ct, ATTR_MASTER_IPV4_SRC); + nfct_attr_unset(ct, ATTR_MASTER_IPV4_DST); + nfct_attr_unset(ct, ATTR_MASTER_IPV6_SRC); + nfct_attr_unset(ct, ATTR_MASTER_IPV6_DST); + nfct_attr_unset(ct, ATTR_MASTER_PORT_SRC); + nfct_attr_unset(ct, ATTR_MASTER_PORT_DST); + } + + /* disable TCP window tracking for recovered connections if required */ + if (nfct_attr_is_set(ct, ATTR_TCP_STATE)) { + uint8_t flags = IP_CT_TCP_FLAG_SACK_PERM; + + if (!CONFIG(sync).tcp_window_tracking) + flags |= IP_CT_TCP_FLAG_BE_LIBERAL; + else + flags |= IP_CT_TCP_FLAG_WINDOW_SCALE; + + /* FIXME: workaround, we should send TCP flags in updates */ + if (nfct_get_attr_u8(ct, ATTR_TCP_STATE) >= + TCP_CONNTRACK_TIME_WAIT) { + flags |= IP_CT_TCP_FLAG_CLOSE_INIT; + } + nfct_set_attr_u8(ct, ATTR_TCP_FLAGS_ORIG, flags); + nfct_set_attr_u8(ct, ATTR_TCP_MASK_ORIG, flags); + nfct_set_attr_u8(ct, ATTR_TCP_FLAGS_REPL, flags); + nfct_set_attr_u8(ct, ATTR_TCP_MASK_REPL, flags); + } + + ret = nfct_query(h, NFCT_Q_UPDATE, ct); + nfct_destroy(ct); + + return ret; +} + +int nl_destroy_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct) +{ + return nfct_query(h, NFCT_Q_DESTROY, ct); +} + +int nl_create_expect(struct nfct_handle *h, const struct nf_expect *orig, + int timeout) +{ + int ret; + struct nf_expect *exp; + + exp = nfexp_clone(orig); + if (exp == NULL) + return -1; + + if (timeout > 0) + nfexp_set_attr_u32(exp, ATTR_EXP_TIMEOUT, timeout); + + ret = nfexp_query(h, NFCT_Q_CREATE, exp); + nfexp_destroy(exp); + + return ret; +} + +int nl_destroy_expect(struct nfct_handle *h, const struct nf_expect *exp) +{ + return nfexp_query(h, NFCT_Q_DESTROY, exp); +} + +/* if the handle has no callback, check for existence, otherwise, update */ +int nl_get_expect(struct nfct_handle *h, const struct nf_expect *exp) +{ + int ret = 1; + struct nf_expect *tmp; + + /* XXX: we only need the expectation, not the mask and the master. */ + tmp = nfexp_clone(exp); + if (tmp == NULL) + return -1; + + if (nfexp_query(h, NFCT_Q_GET, tmp) == -1) + ret = (errno == ENOENT) ? 0 : -1; + + nfexp_destroy(tmp); + return ret; +} + +int nl_dump_expect_table(struct nfct_handle *h) +{ + return nfexp_query(h, NFCT_Q_DUMP, &CONFIG(family)); +} + +int nl_flush_expect_table(struct nfct_handle *h) +{ + return nfexp_query(h, NFCT_Q_FLUSH, &CONFIG(family)); +} + +int nl_send_expect_resync(struct nfct_handle *h) +{ + int family = CONFIG(family); + return nfexp_send(h, NFCT_Q_DUMP, &family); +} |