From bdb2cc7c859b3c27a38f23f00f00f62eba30e577 Mon Sep 17 00:00:00 2001 From: Grant Limberg Date: Fri, 16 Feb 2018 15:08:04 -0800 Subject: Linux: link system libcurl when compiling with ZT_VAULT_SUPPORT=1 Requires libcurl and development headers to be installed --- make-linux.mk | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'make-linux.mk') diff --git a/make-linux.mk b/make-linux.mk index 064a4608..8fa70e2f 100644 --- a/make-linux.mk +++ b/make-linux.mk @@ -88,6 +88,11 @@ ifeq ($(ZT_USE_TEST_TAP),1) override DEFS+=-DZT_USE_TEST_TAP endif +ifeq ($(ZT_VAULT_SUPPORT),1) + override DEFS+=-DZT_VAULT_SUPPORT=1 + override LDLIBS+=-lcurl +endif + # Uncomment for gprof profile build #CFLAGS=-Wall -g -pg -pthread $(INCLUDES) $(DEFS) #CXXFLAGS=-Wall -g -pg -pthread $(INCLUDES) $(DEFS) -- cgit v1.2.3 From c24d16e62eb6db70ff8aa8777156a938d1b7b2c0 Mon Sep 17 00:00:00 2001 From: Grant Limberg Date: Thu, 24 May 2018 15:22:11 -0700 Subject: ManagedRoute uses ioctl to add/remove routes on Linux Added LinuxNetLink to talk to the rtnetlink socket for adding interfaces, addresses routes. Not yet complete. Can currently monitor changes on the system. --- make-linux.mk | 11 +- nltest | Bin 0 -> 24872 bytes nltest.cpp | 13 ++ osdep/LinuxNetLink.cpp | 476 +++++++++++++++++++++++++++++++++++++++++++++++++ osdep/LinuxNetLink.hpp | 133 ++++++++++++++ osdep/ManagedRoute.cpp | 163 +++++++++++++++-- 6 files changed, 780 insertions(+), 16 deletions(-) create mode 100755 nltest create mode 100644 nltest.cpp create mode 100644 osdep/LinuxNetLink.cpp create mode 100644 osdep/LinuxNetLink.hpp (limited to 'make-linux.mk') diff --git a/make-linux.mk b/make-linux.mk index 56096da8..69dc5619 100644 --- a/make-linux.mk +++ b/make-linux.mk @@ -17,6 +17,10 @@ DESTDIR?= include objects.mk ONE_OBJS+=osdep/LinuxEthernetTap.o +ONE_OBJS+=osdep/LinuxNetLink.o + +NLTEST_OBJS+=osdep/LinuxNetLink.o +NLTEST_OBJS+=nltest.o # Auto-detect miniupnpc and nat-pmp as well and use system libs if present, # otherwise build into binary as done on Mac and Windows. @@ -55,8 +59,8 @@ ifeq ($(ZT_SANITIZE),1) SANFLAGS+=-fsanitize=address -DASAN_OPTIONS=symbolize=1 endif ifeq ($(ZT_DEBUG),1) - override CFLAGS+=-Wall -Wno-deprecated -Werror -g -pthread $(INCLUDES) $(DEFS) - override CXXFLAGS+=-Wall -Wno-deprecated -Werror -g -std=c++11 -pthread $(INCLUDES) $(DEFS) + override CFLAGS+=-Wall -Wno-deprecated -g -pthread $(INCLUDES) $(DEFS) + override CXXFLAGS+=-Wall -Wno-deprecated -g -std=c++11 -pthread $(INCLUDES) $(DEFS) ZT_TRACE=1 STRIP?=echo # The following line enables optimization for the crypto code, since @@ -307,6 +311,9 @@ debug: FORCE make ZT_DEBUG=1 one make ZT_DEBUG=1 selftest +nltest: $(NLTEST_OBJS) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o nltest $(NLTEST_OBJS) $(LDLIBS) + # Note: keep the symlinks in /var/lib/zerotier-one to the binaries since these # provide backward compatibility with old releases where the binaries actually # lived here. Folks got scripts. diff --git a/nltest b/nltest new file mode 100755 index 00000000..8d782c18 Binary files /dev/null and b/nltest differ diff --git a/nltest.cpp b/nltest.cpp new file mode 100644 index 00000000..9659a794 --- /dev/null +++ b/nltest.cpp @@ -0,0 +1,13 @@ +#include "osdep/LinuxNetLink.hpp" + +using namespace ZeroTier; + +int main(int argc, char **argv) +{ + LinuxNetLink &nl = LinuxNetLink::getInstance(); + + + while(true) { + Thread::sleep(1000); + } +} \ No newline at end of file diff --git a/osdep/LinuxNetLink.cpp b/osdep/LinuxNetLink.cpp new file mode 100644 index 00000000..308f0f8a --- /dev/null +++ b/osdep/LinuxNetLink.cpp @@ -0,0 +1,476 @@ +/* + * ZeroTier One - Network Virtualization Everywhere + * Copyright (C) 2011-2018 ZeroTier, Inc. https://www.zerotier.com/ + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * -- + * + * You can be released from the requirements of the license by purchasing + * a commercial license. Buying such a license is mandatory as soon as you + * develop commercial closed-source software that incorporates or links + * directly against ZeroTier software without disclosing the source code + * of your own application. + */ + +#include "LinuxNetLink.hpp" + +#include + +namespace ZeroTier { + +LinuxNetLink::LinuxNetLink() + : _t() + , _running(false) + , _routes_ipv4() + , _routes_ipv6() + , _fd(socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) + , _la({0}) + , _pa({0}) + , _msg({0}) + , _iov({0}) + , _rtn(0) + , _nlp(NULL) + , _nll(0) + , _rtp(NULL) + , _rtl(0) + , _rtap(NULL) + , _ifip(NULL) + , _ifil(0) + , _ifap(NULL) + , _ifal(0) +{ + memset(_buf, 0, sizeof(_buf)); + + // set socket timeout to 1 sec so we're not permablocking recv() calls + struct timeval tv; + tv.tv_sec = 1; + tv.tv_usec = 0; + if(setsockopt(_fd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof(tv)) != 0) { + fprintf(stderr, "setsockopt failed: %s\n", strerror(errno)); + } + + _la.nl_family = AF_NETLINK; + _la.nl_pid = getpid(); + _la.nl_groups = RTMGRP_LINK|RTMGRP_IPV4_IFADDR|RTMGRP_IPV6_IFADDR|RTMGRP_IPV4_ROUTE|RTMGRP_IPV6_ROUTE; + if (bind(_fd, (struct sockaddr*)&_la, sizeof(_la))) { + fprintf(stderr, "Error connecting to RTNETLINK: %s\n", strerror(errno)); + ::exit(1); + } + + _running = true; + _t = Thread::start(this); + + fprintf(stderr, "Requesting IPV4 Routes\n"); + _requestIPv4Routes(); + Thread::sleep(10); + fprintf(stderr, "Requesting IPV6 Routes\n"); + _requestIPv6Routes(); +} + +LinuxNetLink::~LinuxNetLink() +{ + _running = false; + Thread::join(_t); + + ::close(_fd); +} + +void LinuxNetLink::threadMain() throw() +{ + char *p; + p = _buf; + _nll = 0; + + while(_running) { + _rtn = recv(_fd, p, sizeof(_buf) - _nll, 0); + + if (_rtn > 0) { + _nlp = (struct nlmsghdr *) p; + + if(_nlp->nlmsg_type == NLMSG_ERROR && (_nlp->nlmsg_flags & NLM_F_ACK) != NLM_F_ACK) { + fprintf(stderr, "NLMSG_ERROR\n"); + struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(_nlp); + if (err->error != 0) { + fprintf(stderr, "rtnetlink error: %s\n", strerror(-(err->error))); + } + p = _buf; + _nll = 0; + continue; + } + + if (_nlp->nlmsg_type == NLMSG_NOOP) { + fprintf(stderr, "noop\n"); + continue; + } + + if( (_nlp->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI || (_nlp->nlmsg_type == NLMSG_DONE)) + { + if (_nlp->nlmsg_type == NLMSG_DONE) { + _processMessage(); + p = _buf; + _nll = 0; + continue; + } + p += _rtn; + _nll += _rtn; + } + + if (_nlp->nlmsg_type == NLMSG_OVERRUN) { + fprintf(stderr, "NLMSG_OVERRUN: Data lost\n"); + p = _buf; + _nll = 0; + continue; + } + + _nll += _rtn; + + _processMessage(); + p = _buf; + _nll = 0; + } + else { + Thread::sleep(100); + continue; + } + } +} + +void LinuxNetLink::_processMessage() +{ + for(_nlp = (struct nlmsghdr *)_buf; NLMSG_OK(_nlp, _nll); _nlp=NLMSG_NEXT(_nlp, _nll)) + { + switch(_nlp->nlmsg_type) + { + case RTM_NEWLINK: + _linkAdded(); + break; + case RTM_DELLINK: + _linkDeleted(); + break; + case RTM_GETLINK: + fprintf(stderr, "Get Link\n"); + break; + case RTM_SETLINK: + fprintf(stderr, "Set Link\n"); + break; + case RTM_NEWADDR: + _ipAddressAdded(); + break; + case RTM_DELADDR: + _ipAddressDeleted(); + break; + case RTM_GETADDR: + fprintf(stderr, "Get IP Address\n"); + break; + case RTM_NEWROUTE: + _routeAdded(); + break; + case RTM_DELROUTE: + _routeDeleted(); + break; + case RTM_GETROUTE: + break; + default: + fprintf(stderr, "ignore msgtype %d...\n", _nlp->nlmsg_type); + } + } + _nlp = NULL; + _nll = 0; + _rtp = NULL; + _rtl = 0; + _ifip = NULL; + _ifil = 0; + _ifap = NULL; + _ifal = 0; +} + +void LinuxNetLink::_ipAddressAdded() +{ + _ifap = (struct ifaddrmsg *)NLMSG_DATA(_nlp); + _rtap = (struct rtattr *)IFA_RTA(_ifap); + _ifal = IFA_PAYLOAD(_nlp); + + char addr[40] = {0}; + char local[40] = {0}; + char label[40] = {0}; + char bcast[40] = {0}; + + for(;RTA_OK(_rtap, _ifal); _rtap=RTA_NEXT(_rtap,_ifal)) + { + switch(_rtap->rta_type) { + case IFA_ADDRESS: + inet_ntop(_ifap->ifa_family, RTA_DATA(_rtap), addr, 40); + break; + case IFA_LOCAL: + inet_ntop(_ifap->ifa_family, RTA_DATA(_rtap), local, 40); + break; + case IFA_LABEL: + memcpy(label, RTA_DATA(_rtap), 40); + break; + case IFA_BROADCAST: + inet_ntop(_ifap->ifa_family, RTA_DATA(_rtap), bcast, 40); + break; + } + } + + fprintf(stderr, "Added IP Address %s local: %s label: %s broadcast: %s\n", addr, local, label, bcast); +} + +void LinuxNetLink::_ipAddressDeleted() +{ + _ifap = (struct ifaddrmsg *)NLMSG_DATA(_nlp); + _rtap = (struct rtattr *)IFA_RTA(_ifap); + _ifal = IFA_PAYLOAD(_nlp); + + char addr[40] = {0}; + char local[40] = {0}; + char label[40] = {0}; + char bcast[40] = {0}; + + for(;RTA_OK(_rtap, _ifal); _rtap=RTA_NEXT(_rtap,_ifal)) + { + switch(_rtap->rta_type) { + case IFA_ADDRESS: + inet_ntop(_ifap->ifa_family, RTA_DATA(_rtap), addr, 40); + break; + case IFA_LOCAL: + inet_ntop(_ifap->ifa_family, RTA_DATA(_rtap), local, 40); + break; + case IFA_LABEL: + memcpy(label, RTA_DATA(_rtap), 40); + break; + case IFA_BROADCAST: + inet_ntop(_ifap->ifa_family, RTA_DATA(_rtap), bcast, 40); + break; + } + } + + fprintf(stderr, "Removed IP Address %s local: %s label: %s broadcast: %s\n", addr, local, label, bcast); +} + +void LinuxNetLink::_routeAdded() +{ + char dsts[40] = {0}; + char gws[40] = {0}; + char ifs[16] = {0}; + char ms[24] = {0}; + + _rtp = (struct rtmsg *) NLMSG_DATA(_nlp); + + _rtap = (struct rtattr *)RTM_RTA(_rtp); + _rtl = RTM_PAYLOAD(_nlp); + for(;RTA_OK(_rtap, _rtl); _rtap=RTA_NEXT(_rtap, _rtl)) + { + switch(_rtap->rta_type) + { + case RTA_DST: + inet_ntop(_rtp->rtm_family, RTA_DATA(_rtap), dsts, _rtp->rtm_family == AF_INET ? 24 : 40); + break; + case RTA_GATEWAY: + inet_ntop(_rtp->rtm_family, RTA_DATA(_rtap), gws, _rtp->rtm_family == AF_INET ? 24 : 40); + break; + case RTA_OIF: + sprintf(ifs, "%d", *((int*)RTA_DATA(_rtap))); + break; + } + } + sprintf(ms, "%d", _rtp->rtm_dst_len); + + fprintf(stderr, "Route Added: dst %s/%s gw %s if %s\n", dsts, ms, gws, ifs); +} + +void LinuxNetLink::_routeDeleted() +{ + char dsts[40] = {0}; + char gws[40] = {0}; + char ifs[16] = {0}; + char ms[24] = {0}; + + _rtp = (struct rtmsg *) NLMSG_DATA(_nlp); + + _rtap = (struct rtattr *)RTM_RTA(_rtp); + _rtl = RTM_PAYLOAD(_nlp); + for(;RTA_OK(_rtap, _rtl); _rtap=RTA_NEXT(_rtap, _rtl)) + { + switch(_rtap->rta_type) + { + case RTA_DST: + inet_ntop(_rtp->rtm_family, RTA_DATA(_rtap), dsts, _rtp->rtm_family == AF_INET ? 24 : 40); + break; + case RTA_GATEWAY: + inet_ntop(_rtp->rtm_family, RTA_DATA(_rtap), gws, _rtp->rtm_family == AF_INET ? 24 : 40); + break; + case RTA_OIF: + sprintf(ifs, "%d", *((int*)RTA_DATA(_rtap))); + break; + } + } + sprintf(ms, "%d", _rtp->rtm_dst_len); + + fprintf(stderr, "Route Deleted: dst %s/%s gw %s if %s\n", dsts, ms, gws, ifs); +} + +void LinuxNetLink::_linkAdded() +{ + char mac[20] = {0}; + unsigned int mtu = 0; + char ifname[40] = {0}; + + _ifip = (struct ifinfomsg *)NLMSG_DATA(_nlp); + _rtap = (struct rtattr *)IFLA_RTA(_ifip); + _ifil = RTM_PAYLOAD(_nlp); + + const char *ptr; + unsigned char *ptr2; + for(;RTA_OK(_rtap, _ifil);_rtap=RTA_NEXT(_rtap, _ifil)) + { + switch(_rtap->rta_type) { + case IFLA_ADDRESS: + ptr2 = (unsigned char*)RTA_DATA(_rtap); + snprintf(mac, 20, "%02x:%02x:%02x:%02x:%02x:%02x", + ptr2[0], ptr2[1], ptr2[2], ptr2[3], ptr2[4], ptr2[5]); + break; + case IFLA_IFNAME: + ptr = (const char*)RTA_DATA(_rtap); + memcpy(ifname, ptr, strlen(ptr)); + break; + case IFLA_MTU: + memcpy(&mtu, RTA_DATA(_rtap), sizeof(unsigned int)); + break; + } + } + + fprintf(stderr, "Link Added: %s mac: %s, mtu: %d\n", ifname, mac, mtu); +} + +void LinuxNetLink::_linkDeleted() +{ + char mac[20] = {0}; + unsigned int mtu = 0; + char ifname[40] = {0}; + + _ifip = (struct ifinfomsg *)NLMSG_DATA(_nlp); + _rtap = (struct rtattr *)IFLA_RTA(_ifip); + _ifil = RTM_PAYLOAD(_nlp); + + const char *ptr; + unsigned char *ptr2; + for(;RTA_OK(_rtap, _ifil);_rtap=RTA_NEXT(_rtap, _ifil)) + { + switch(_rtap->rta_type) { + case IFLA_ADDRESS: + ptr2 = (unsigned char*)RTA_DATA(_rtap); + snprintf(mac, 20, "%02x:%02x:%02x:%02x:%02x:%02x", + ptr2[0], ptr2[1], ptr2[2], ptr2[3], ptr2[4], ptr2[5]); + break; + case IFLA_IFNAME: + ptr = (const char*)RTA_DATA(_rtap); + memcpy(ifname, ptr, strlen(ptr)); + break; + case IFLA_MTU: + memcpy(&mtu, RTA_DATA(_rtap), sizeof(unsigned int)); + break; + } + } + + fprintf(stderr, "Link Deleted: %s mac: %s, mtu: %d\n", ifname, mac, mtu); +} + +void LinuxNetLink::_requestIPv4Routes() +{ + struct nl_req req; + bzero(&req, sizeof(req)); + req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.nl.nlmsg_type = RTM_GETROUTE; + req.rt.rtm_family = AF_INET; + req.rt.rtm_table = RT_TABLE_MAIN; + + + bzero(&_pa, sizeof(_pa)); + _pa.nl_family = AF_NETLINK; + + bzero(&_msg, sizeof(_msg)); + _msg.msg_name = (void*)&_pa; + _msg.msg_namelen = sizeof(_pa); + + _iov.iov_base = (void*)&req.nl; + _iov.iov_len = req.nl.nlmsg_len; + _msg.msg_iov = &_iov; + _msg.msg_iovlen = 1; + + _rtn = sendmsg(_fd, &_msg, 0); +} + +void LinuxNetLink::_requestIPv6Routes() +{ + struct nl_req req; + bzero(&req, sizeof(req)); + req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.nl.nlmsg_type = RTM_GETROUTE; + req.rt.rtm_family = AF_INET6; + req.rt.rtm_table = RT_TABLE_MAIN; + + + bzero(&_pa, sizeof(_pa)); + _pa.nl_family = AF_NETLINK; + + bzero(&_msg, sizeof(_msg)); + _msg.msg_name = (void*)&_pa; + _msg.msg_namelen = sizeof(_pa); + + _iov.iov_base = (void*)&req.nl; + _iov.iov_len = req.nl.nlmsg_len; + _msg.msg_iov = &_iov; + _msg.msg_iovlen = 1; + + while((_rtn = sendmsg(_fd, &_msg, 0)) == -1) { + fprintf(stderr, "ipv6 waiting..."); + Thread::sleep(100); + } +} + +void LinuxNetLink::addRoute(const InetAddress &target, const InetAddress &via, const char *ifaceName) +{ + +} + +void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, const char *ifaceName) +{ + +} + +void LinuxNetLink::addInterface(const char *iface) +{ + +} + +void LinuxNetLink::addAddress(const InetAddress &addr, const char *iface) +{ + +} + +RouteList LinuxNetLink::getIPV4Routes() const +{ + return _routes_ipv4; +} + +RouteList LinuxNetLink::getIPV6Routes() const +{ + return _routes_ipv6; +} + +} // namespace ZeroTier \ No newline at end of file diff --git a/osdep/LinuxNetLink.hpp b/osdep/LinuxNetLink.hpp new file mode 100644 index 00000000..519bb435 --- /dev/null +++ b/osdep/LinuxNetLink.hpp @@ -0,0 +1,133 @@ +/* + * ZeroTier One - Network Virtualization Everywhere + * Copyright (C) 2011-2018 ZeroTier, Inc. https://www.zerotier.com/ + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * -- + * + * You can be released from the requirements of the license by purchasing + * a commercial license. Buying such a license is mandatory as soon as you + * develop commercial closed-source software that incorporates or links + * directly against ZeroTier software without disclosing the source code + * of your own application. + */ + +#ifndef ZT_LINUX_NETLINK_HPP +#define ZT_LINUX_NETLINK_HPP + +#include + +#include +#include +#include +#include + + +#include "../node/InetAddress.hpp" +#include "Thread.hpp" + + +namespace ZeroTier { + +struct route_entry { + InetAddress target; + InetAddress via; + const char *iface; +}; + +typedef std::vector RouteList; + +struct nl_req { + struct nlmsghdr nl; + struct rtmsg rt; + char buf[8192]; +}; + +/** + * Interface with Linux's RTNETLINK + */ +class LinuxNetLink +{ +private: + LinuxNetLink(); + ~LinuxNetLink(); + +public: + static LinuxNetLink& getInstance() + { + static LinuxNetLink instance; + return instance; + } + + LinuxNetLink(LinuxNetLink const&) = delete; + void operator=(LinuxNetLink const&) = delete; + + void addRoute(const InetAddress &target, const InetAddress &via, const char *ifaceName); + void delRoute(const InetAddress &target, const InetAddress &via, const char *ifaceName); + RouteList getIPV4Routes() const; + RouteList getIPV6Routes() const; + + void addInterface(const char *iface); + + void addAddress(const InetAddress &addr, const char *iface); + + void threadMain() throw(); +private: + void _processMessage(); + void _routeAdded(); + void _routeDeleted(); + void _linkAdded(); + void _linkDeleted(); + void _ipAddressAdded(); + void _ipAddressDeleted(); + + + void _requestIPv4Routes(); + void _requestIPv6Routes(); + + + Thread _t; + bool _running; + RouteList _routes_ipv4; + RouteList _routes_ipv6; + + // socket communication vars; + int _fd; + struct sockaddr_nl _la; + struct sockaddr_nl _pa; + struct msghdr _msg; + struct iovec _iov; + int _rtn; + char _buf[8192]; + + // RTNETLINK message pointers & lengths + // used for processing messages + struct nlmsghdr *_nlp; + int _nll; + + struct rtmsg *_rtp; + int _rtl; + struct rtattr *_rtap; + + struct ifinfomsg *_ifip; + int _ifil; + + struct ifaddrmsg *_ifap; + int _ifal; +}; + +} + +#endif // ZT_LINUX_NETLINK_HPPS \ No newline at end of file diff --git a/osdep/ManagedRoute.cpp b/osdep/ManagedRoute.cpp index d7c80704..8d64fde3 100644 --- a/osdep/ManagedRoute.cpp +++ b/osdep/ManagedRoute.cpp @@ -48,6 +48,13 @@ #include #include #include +#ifdef __LINUX__ +#include +#include +#include +#include +#include +#endif #ifdef __BSD__ #include #include @@ -277,27 +284,155 @@ static void _routeCmd(const char *op,const InetAddress &target,const InetAddress #ifdef __LINUX__ // ---------------------------------------------------------- #define ZT_ROUTING_SUPPORT_FOUND 1 -static void _routeCmd(const char *op,const InetAddress &target,const InetAddress &via,const char *localInterface) +static bool _hasRoute(const InetAddress &target, const InetAddress &via, const char *localInterface) { - long p = (long)fork(); - if (p > 0) { - int exitcode = -1; - ::waitpid(p,&exitcode,0); - } else if (p == 0) { - ::close(STDOUT_FILENO); - ::close(STDERR_FILENO); - char ipbuf[64],ipbuf2[64]; + if (target.ss_family == AF_INET) { + int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + + char *buf; + int nll; + struct rtmsg *rtp; + int rtl; + struct rtattr *rtap; + + struct sockaddr_nl la; + bzero(&la, sizeof(la)); + la.nl_family = AF_NETLINK; + la.nl_pad = 0; + la.nl_pid = (uint32_t)((ptrdiff_t)&target % getpid()); + la.nl_groups = 0; + int rtn = bind(fd, (struct sockaddr*)&la, sizeof(la)); + + + + close(fd); + return false; + } else { + + return false; + } +} + + +static void _routeCmd(const char *op, const InetAddress &target, const InetAddress &via, const char *localInterface) +{ + bool hasRoute = _hasRoute(target, via, localInterface); + if (hasRoute && (strcmp(op, "add") == 0 || strcmp(op, "replace") == 0)) { + return; + } else if (!hasRoute && (strcmp(op, "remove") == 0 || strcmp(op, "del") == 0)) { + return; + } + + char targetStr[64] = {0}; + char viaStr[64] = {0}; + InetAddress nmsk = target.netmask(); + char nmskStr[64] = {0}; + fprintf(stderr, "Received Route Cmd: %s target: %s via: %s netmask: %s localInterface: %s\n", op, target.toString(targetStr), via.toString(viaStr), nmsk.toString(nmskStr), localInterface); + + int fd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);; + struct rtentry route = {0}; + + if (target.ss_family == AF_INET) { + struct sockaddr_in *target_in = (struct sockaddr_in*)⌖ + struct sockaddr_in *via_in = (struct sockaddr_in*)&via; + InetAddress netmask = target.netmask(); + struct sockaddr_in *netmask_in = (struct sockaddr_in*)&netmask; + + struct sockaddr_in *addr = NULL; + + // set target + addr = (struct sockaddr_in *)&route.rt_dst; + addr->sin_family = AF_INET; + addr->sin_addr = target_in->sin_addr; + + // set netmask + addr = (struct sockaddr_in *)&route.rt_genmask; + addr->sin_family = AF_INET; + addr->sin_addr = netmask_in->sin_addr; + + route.rt_dev = const_cast(localInterface); + if (via) { - ::execl(ZT_LINUX_IP_COMMAND,ZT_LINUX_IP_COMMAND,(target.ss_family == AF_INET6) ? "-6" : "-4","route",op,target.toString(ipbuf),"via",via.toIpString(ipbuf2),(const char *)0); - ::execl(ZT_LINUX_IP_COMMAND_2,ZT_LINUX_IP_COMMAND_2,(target.ss_family == AF_INET6) ? "-6" : "-4","route",op,target.toString(ipbuf),"via",via.toIpString(ipbuf2),(const char *)0); + // set the gateway + addr = (struct sockaddr_in *)&route.rt_gateway; + addr->sin_family = AF_INET; + addr->sin_addr = via_in->sin_addr; + + route.rt_flags = RTF_UP | RTF_GATEWAY; } else if ((localInterface)&&(localInterface[0])) { - ::execl(ZT_LINUX_IP_COMMAND,ZT_LINUX_IP_COMMAND,(target.ss_family == AF_INET6) ? "-6" : "-4","route",op,target.toString(ipbuf),"dev",localInterface,(const char *)0); - ::execl(ZT_LINUX_IP_COMMAND_2,ZT_LINUX_IP_COMMAND_2,(target.ss_family == AF_INET6) ? "-6" : "-4","route",op,target.toString(ipbuf),"dev",localInterface,(const char *)0); + route.rt_flags = RTF_UP;//| RTF_HOST; + } + } + else if (target.ss_family == AF_INET6) + { + struct sockaddr_in6 *addr = NULL; + + // set target + addr = (struct sockaddr_in6 *)&route.rt_dst; + addr->sin6_family = AF_INET6; + memcpy(&addr->sin6_addr, &((struct sockaddr_in6*)&target)->sin6_addr, sizeof(struct in6_addr)); + + //set netmask + addr = (struct sockaddr_in6 *)&route.rt_genmask; + addr->sin6_family = AF_INET6; + InetAddress netmask = target.netmask(); + memcpy(&addr->sin6_addr, &((struct sockaddr_in6*)&netmask)->sin6_addr, sizeof(struct in6_addr)); + + if (via) { + // set the gateway + addr = (struct sockaddr_in6*)&route.rt_gateway; + addr->sin6_family = AF_INET; + memcpy(&addr->sin6_addr, &((struct sockaddr_in6*)&via)->sin6_addr, sizeof(struct in6_addr)); + + route.rt_flags = RTF_UP | RTF_GATEWAY; + } else if ((localInterface)&&(localInterface[0])) { + route.rt_dev = const_cast(localInterface); + route.rt_flags = RTF_UP; } - ::_exit(-1); } + + unsigned long ctl = -1; + if (strcmp(op, "add") == 0 || strcmp(op, "replace") == 0) { + ctl = SIOCADDRT; + } else if (strcmp(op, "remove") == 0 || strcmp(op, "del") == 0) { + ctl = SIOCDELRT; + } else { + close(fd); + return; + } + + if ( ioctl(fd, ctl, &route)) { + fprintf(stderr, "Error adding route: %s\n", strerror(errno)); + close(fd); + ::exit(1); + } + close(fd); } +// static void _routeCmd(const char *op,const InetAddress &target,const InetAddress &via,const char *localInterface) +// { +// // long p = (long)fork(); +// // if (p > 0) { +// // int exitcode = -1; +// // ::waitpid(p,&exitcode,0); +// // } else if (p == 0) { +// // ::close(STDOUT_FILENO); +// // ::close(STDERR_FILENO); +// char ipbuf[64],ipbuf2[64]; + + + +// if (via) { +// ::execl(ZT_LINUX_IP_COMMAND,ZT_LINUX_IP_COMMAND,(target.ss_family == AF_INET6) ? "-6" : "-4","route",op,target.toString(ipbuf),"via",via.toIpString(ipbuf2),(const char *)0); +// ::execl(ZT_LINUX_IP_COMMAND_2,ZT_LINUX_IP_COMMAND_2,(target.ss_family == AF_INET6) ? "-6" : "-4","route",op,target.toString(ipbuf),"via",via.toIpString(ipbuf2),(const char *)0); +// } else if ((localInterface)&&(localInterface[0])) { +// ::execl(ZT_LINUX_IP_COMMAND,ZT_LINUX_IP_COMMAND,(target.ss_family == AF_INET6) ? "-6" : "-4","route",op,target.toString(ipbuf),"dev",localInterface,(const char *)0); +// ::execl(ZT_LINUX_IP_COMMAND_2,ZT_LINUX_IP_COMMAND_2,(target.ss_family == AF_INET6) ? "-6" : "-4","route",op,target.toString(ipbuf),"dev",localInterface,(const char *)0); +// } +// // ::_exit(-1); +// // } +// } + #endif // __LINUX__ ---------------------------------------------------------- #ifdef __WINDOWS__ // -------------------------------------------------------- -- cgit v1.2.3 From 46a7a2be2e1c4d09d1da2bf26b110a31ec3d0661 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Wed, 30 May 2018 17:45:29 -0700 Subject: Added VERB_ACK and VERB_QOS_MEASUREMENT, refined notion of path quality --- include/ZeroTierDebug.h | 2 +- make-linux.mk | 3 + make-mac.mk | 5 +- node/Constants.hpp | 92 +++++----- node/IncomingPacket.cpp | 119 +++++++++---- node/IncomingPacket.hpp | 2 + node/Packet.hpp | 52 +++++- node/Path.hpp | 458 +++++++++++++++++++++++++++++------------------- node/Peer.cpp | 428 ++++++++++++++++++++++++++------------------ node/Peer.hpp | 85 ++++++++- node/RingBuffer.hpp | 81 ++++++--- node/Switch.cpp | 2 + node/Trace.cpp | 22 +-- node/Trace.hpp | 8 +- node/Utils.hpp | 8 + osdep/Binder.hpp | 14 -- osdep/Phy.hpp | 150 ++-------------- service/OneService.cpp | 33 ---- 18 files changed, 899 insertions(+), 665 deletions(-) (limited to 'make-linux.mk') diff --git a/include/ZeroTierDebug.h b/include/ZeroTierDebug.h index 8e5366f0..a60179b7 100644 --- a/include/ZeroTierDebug.h +++ b/include/ZeroTierDebug.h @@ -86,7 +86,7 @@ #include #define ZT_LOG_TAG "ZTSDK" #endif -#if defined(ZT_TRACE) +#if defined(ZT_DEBUG_TRACE) #if ZT_MSG_INFO == true #if defined(__ANDROID__) #define DEBUG_INFO(fmt, args...) ((void)__android_log_print(ANDROID_LOG_VERBOSE, ZT_LOG_TAG, \ diff --git a/make-linux.mk b/make-linux.mk index 0f5ef384..749c24b8 100644 --- a/make-linux.mk +++ b/make-linux.mk @@ -45,6 +45,9 @@ ONE_OBJS+=ext/http-parser/http_parser.o ifeq ($(ZT_TRACE),1) override DEFS+=-DZT_TRACE endif +ifeq ($(ZT_DEBUG_TRACE),1) + DEFS+=-DZT_DEBUG_TRACE +endif ifeq ($(ZT_RULES_ENGINE_DEBUGGING),1) override DEFS+=-DZT_RULES_ENGINE_DEBUGGING diff --git a/make-mac.mk b/make-mac.mk index 1178437a..12357f68 100644 --- a/make-mac.mk +++ b/make-mac.mk @@ -43,7 +43,10 @@ ONE_OBJS+=ext/libnatpmp/natpmp.o ext/libnatpmp/getgateway.o ext/miniupnpc/connec # Build with address sanitization library for advanced debugging (clang) ifeq ($(ZT_SANITIZE),1) - SANFLAGS+=-fsanitize=address -DASAN_OPTIONS=symbolize=1 + DEFS+=-fsanitize=address -DASAN_OPTIONS=symbolize=1 +endif +ifeq ($(ZT_DEBUG_TRACE),1) + DEFS+=-DZT_DEBUG_TRACE endif # Debug mode -- dump trace output, build binary with -g ifeq ($(ZT_DEBUG),1) diff --git a/node/Constants.hpp b/node/Constants.hpp index ee2ff0a6..227497de 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -267,11 +267,6 @@ */ #define ZT_PING_CHECK_INVERVAL 5000 -/** - * Length of interface name - */ -#define ZT_PATH_INTERFACE_NAME_SZ 16 - /** * How frequently to check for changes to the system's network interfaces. When * the service decides to use this constant it's because we want to react more @@ -286,78 +281,95 @@ #define ZT_MULTIPATH_PROPORTION_WIN_SZ 128 /** - * Threshold for flow to be considered balanced. + * How often we will sample packet latency. Should be at least greater than ZT_PING_CHECK_INVERVAL + * since we will record a 0 bit/s measurement if no valid latency measurement was made within this + * window of time. */ -#define ZT_MULTIPATH_FLOW_BALANCE_THESHOLD 0.80 +#define ZT_PATH_LATENCY_SAMPLE_INTERVAL ZT_MULTIPATH_PEER_PING_PERIOD * 2 /** - * Number of samples to consider when computing path statistics + * Interval used for rate-limiting the computation of path quality estimates. Set at 0 + * to compute as new packets arrive with no delay. */ -#define ZT_PATH_QUALITY_METRIC_WIN_SZ 64 +#define ZT_PATH_QUALITY_COMPUTE_INTERVAL 1000 /** - * How often important path metrics are sampled (in ms). These metrics are later used - * for path quality estimates + * Number of samples to consider when computing real-time path statistics */ -#define ZT_PATH_QUALITY_SAMPLE_INTERVAL 100 +#define ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ 128 /** - * How often new path quality estimates are computed + * Number of samples to consider when computing performing long-term path quality analysis. + * By default this value is set to ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ but can + * be set to any value greater than that to observe longer-term path quality behavior. */ -#define ZT_PATH_QUALITY_ESTIMATE_INTERVAL 100 +#define ZT_PATH_QUALITY_METRIC_WIN_SZ ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ /** - * How often we will sample packet latency. Should be at least greater than ZT_PING_CHECK_INVERVAL - * since we will record a 0 bit/s measurement if no valid latency measurement was made within this - * window of time. + * Maximum acceptable Packet Delay Variance (PDV) over a path */ -#define ZT_PATH_LATENCY_SAMPLE_INTERVAL ZT_MULTIPATH_PEER_PING_PERIOD * 2 +#define ZT_PATH_MAX_PDV 1000 /** - * Interval used for rate-limiting the computation of path quality estimates. Set at 0 - * to compute as new packets arrive with no delay. + * Maximum acceptable time interval between expectation and receipt of at least one ACK over a path + */ +#define ZT_PATH_MAX_AGE 30000 + +/** + * Maximum acceptable mean latency over a path + */ +#define ZT_PATH_MAX_MEAN_LATENCY 1000 + +/** + * How much each factor contributes to the "stability" score of a path + */ +#define ZT_PATH_CONTRIB_PDV 1.0 / 3.0 +#define ZT_PATH_CONTRIB_LATENCY 1.0 / 3.0 +#define ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE 1.0 / 3.0 + +/** + * How much each factor contributes to the "quality" score of a path */ -#define ZT_PATH_QUALITY_COMPUTE_INTERVAL 0 +#define ZT_PATH_CONTRIB_STABILITY 0.75 / 3.0 +#define ZT_PATH_CONTRIB_THROUGHPUT 1.50 / 3.0 +#define ZT_PATH_CONTRIB_SCOPE 0.75 / 3.0 /** - * Path error rate history window size. This is used to keep track of packet error - * measurements over a path's medium-term history. + * Min and max acceptable sizes for a VERB_QOS_MEASUREMENT packet */ -#define ZT_PATH_ERROR_HIST_WIN_SZ 10 +#define ZT_PATH_MIN_QOS_PACKET_SZ 8 + 1 +#define ZT_PATH_MAX_QOS_PACKET_SZ 1400 /** - * The number of packet error measurements in each sample + * How many ID:sojourn time pairs in a single QoS packet */ -#define ZT_PATH_ERROR_SAMPLE_WIN_SZ 1024 +#define ZT_PATH_QOS_TABLE_SIZE (ZT_PATH_MAX_QOS_PACKET_SZ * 8) / (64 + 8) /** - * How often a peer will prune its own paths. Pruning is important when multipath is - * enabled because we want to prevent the allocation algorithms from sending anything - * out on known dead paths. Additionally, quickly marking paths as dead helps when - * a new path is learned and needs to replace an older path. + * How often the service tests the path throughput */ -#define ZT_CLOSED_PATH_PRUNING_INTERVAL 1000 +#define ZT_PATH_THROUGHPUT_MEASUREMENT_INTERVAL ZT_PATH_ACK_INTERVAL * 8 /** - * Datagram used to test link throughput. Contents are random. + * Minimum amount of time between each ACK packet */ -#define ZT_LINK_TEST_DATAGRAM_SZ 1024 +#define ZT_PATH_ACK_INTERVAL 250 /** - * Size of datagram expected as a reply to a link speed test + * How often a QoS packet is sent */ -#define ZT_LINK_TEST_DATAGRAM_RESPONSE_SZ 8 +#define ZT_PATH_QOS_INTERVAL 1000 /** - * Time before a link test datagram is considered lost. Any corresponding - * timing records that would have been used to compute a RTT are purged. + * How often an aggregate link statistics report is emitted into this tracing system */ -#define ZT_LINK_TEST_TIMEOUT 10000 +#define ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL 60000 /** - * How often the service tests the link throughput. + * How much an aggregate link's component paths can vary from their target allocation + * before the link is considered to be in a state of imbalance. */ -#define ZT_LINK_SPEED_TEST_INTERVAL 1000 +#define ZT_PATH_IMBALANCE_THRESHOLD 0.20 /** * How frequently to send heartbeats over in-use paths diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index 8f6dda63..e7227412 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -80,7 +80,7 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) if (!trusted) { if (!dearmor(peer->key())) { RR->t->incomingPacketMessageAuthenticationFailure(tPtr,_path,packetId(),sourceAddress,hops(),"invalid MAC"); - _path->recordPacket(false); + _path->recordInvalidPacket(); return true; } } @@ -90,15 +90,15 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) return true; } - _path->recordPacket(true); - const Packet::Verb v = verb(); switch(v) { //case Packet::VERB_NOP: default: // ignore unknown verbs, but if they pass auth check they are "received" - peer->received(tPtr,_path,hops(),packetId(),v,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),v,0,Packet::VERB_NOP,false,0); return true; case Packet::VERB_HELLO: return _doHELLO(RR,tPtr,true); + case Packet::VERB_ACK: return _doACK(RR,tPtr,peer); + case Packet::VERB_QOS_MEASUREMENT: return _doQOS_MEASUREMENT(RR,tPtr,peer); case Packet::VERB_ERROR: return _doERROR(RR,tPtr,peer); case Packet::VERB_OK: return _doOK(RR,tPtr,peer); case Packet::VERB_WHOIS: return _doWHOIS(RR,tPtr,peer); @@ -197,11 +197,55 @@ bool IncomingPacket::_doERROR(const RuntimeEnvironment *RR,void *tPtr,const Shar default: break; } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_ERROR,inRePacketId,inReVerb,false,networkId); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_ERROR,inRePacketId,inReVerb,false,networkId); + + return true; +} + +bool IncomingPacket::_doACK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) +{ + /* Dissect incoming ACK packet. From this we can estimate current throughput of the path, establish known + * maximums and detect packet loss. */ + + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + int32_t ackedBytes; + memcpy(&ackedBytes, payload(), sizeof(int32_t)); + _path->receivedAck(RR->node->now(), Utils::ntoh(ackedBytes)); + } return true; } +bool IncomingPacket::_doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) +{ + + /* Dissect incoming QoS packet. From this we can compute latency values and their variance. + * The latency variance is used as a measure of "jitter". */ + + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + if (payloadLength() < ZT_PATH_MAX_QOS_PACKET_SZ && payloadLength() > ZT_PATH_MIN_QOS_PACKET_SZ) { + const int64_t now = RR->node->now(); + uint64_t rx_id[ZT_PATH_QOS_TABLE_SIZE]; + uint8_t rx_ts[ZT_PATH_QOS_TABLE_SIZE]; + char *begin = (char *)payload(); + char *ptr = begin; + int count = 0; + int len = payloadLength(); + // Read packet IDs and latency compensation intervals for each packet tracked by thie QoS packet + while (ptr < (begin + len)) { + memcpy((void*)&rx_id[count], ptr, sizeof(uint64_t)); + rx_id[count] = Utils::ntoh(rx_id[count]); + ptr+=sizeof(uint64_t); + memcpy((void*)&rx_ts[count], ptr, sizeof(uint8_t)); + ptr+=sizeof(uint8_t); + count++; + } + _path->receivedQoS(now, count, rx_id, rx_ts); + } + } + return true; +} + bool IncomingPacket::_doHELLO(const RuntimeEnvironment *RR,void *tPtr,const bool alreadyAuthenticated) { const int64_t now = RR->node->now(); @@ -398,7 +442,7 @@ bool IncomingPacket::_doHELLO(const RuntimeEnvironment *RR,void *tPtr,const bool _path->send(RR,tPtr,outp.data(),outp.size(),now); peer->setRemoteVersion(protoVersion,vMajor,vMinor,vRevision); // important for this to go first so received() knows the version - peer->received(tPtr,_path,hops(),pid,Packet::VERB_HELLO,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_HELLO,0,Packet::VERB_NOP,false,0); return true; } @@ -448,8 +492,9 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP } } - if (!hops()) + if (!hops() && (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE)) { _path->updateLatency((unsigned int)latency, RR->node->now()); + } peer->setRemoteVersion(vProto,vMajor,vMinor,vRevision); @@ -510,7 +555,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP default: break; } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_OK,inRePacketId,inReVerb,false,networkId); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_OK,inRePacketId,inReVerb,false,networkId); return true; } @@ -545,7 +590,7 @@ bool IncomingPacket::_doWHOIS(const RuntimeEnvironment *RR,void *tPtr,const Shar _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_WHOIS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_WHOIS,0,Packet::VERB_NOP,false,0); return true; } @@ -569,7 +614,7 @@ bool IncomingPacket::_doRENDEZVOUS(const RuntimeEnvironment *RR,void *tPtr,const } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP,false,0); return true; } @@ -598,7 +643,7 @@ bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,void *tPtr,const Shar _sendErrorNeedCredentials(RR,tPtr,peer,nwid); } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_FRAME,0,Packet::VERB_NOP,trustEstablished,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_FRAME,0,Packet::VERB_NOP,trustEstablished,nwid); return true; } @@ -621,7 +666,7 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const if (!network->gate(tPtr,peer)) { RR->t->incomingNetworkAccessDenied(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,true); _sendErrorNeedCredentials(RR,tPtr,peer,nwid); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid); return true; } @@ -633,7 +678,7 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const const uint8_t *const frameData = (const uint8_t *)field(comLen + ZT_PROTO_VERB_EXT_FRAME_IDX_PAYLOAD,frameLen); if ((!from)||(from == network->mac())) { - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } @@ -644,19 +689,19 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const network->learnBridgeRoute(from,peer->address()); } else { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"bridging not allowed (remote)"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } } else if (to != network->mac()) { if (to.isMulticast()) { if (network->config().multicastLimit == 0) { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"multicast disabled"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } } else if (!network->config().permitsBridging(RR->identity.address())) { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"bridging not allowed (local)"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } } @@ -676,9 +721,9 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); } else { - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid); } return true; @@ -698,7 +743,7 @@ bool IncomingPacket::_doECHO(const RuntimeEnvironment *RR,void *tPtr,const Share outp.armor(peer->key(),true); _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); - peer->received(tPtr,_path,hops(),pid,Packet::VERB_ECHO,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_ECHO,0,Packet::VERB_NOP,false,0); return true; } @@ -743,7 +788,7 @@ bool IncomingPacket::_doMULTICAST_LIKE(const RuntimeEnvironment *RR,void *tPtr,c } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0); return true; } @@ -866,7 +911,7 @@ bool IncomingPacket::_doNETWORK_CREDENTIALS(const RuntimeEnvironment *RR,void *t } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_NETWORK_CREDENTIALS,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CREDENTIALS,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0); return true; } @@ -892,7 +937,7 @@ bool IncomingPacket::_doNETWORK_CONFIG_REQUEST(const RuntimeEnvironment *RR,void _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } - peer->received(tPtr,_path,hopCount,requestPacketId,Packet::VERB_NETWORK_CONFIG_REQUEST,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hopCount,requestPacketId,payloadLength(),Packet::VERB_NETWORK_CONFIG_REQUEST,0,Packet::VERB_NOP,false,nwid); return true; } @@ -913,7 +958,7 @@ bool IncomingPacket::_doNETWORK_CONFIG(const RuntimeEnvironment *RR,void *tPtr,c } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_NETWORK_CONFIG,0,Packet::VERB_NOP,false,(network) ? network->id() : 0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CONFIG,0,Packet::VERB_NOP,false,(network) ? network->id() : 0); return true; } @@ -956,7 +1001,7 @@ bool IncomingPacket::_doMULTICAST_GATHER(const RuntimeEnvironment *RR,void *tPtr } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP,trustEstablished,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP,trustEstablished,nwid); return true; } @@ -982,7 +1027,7 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr, if (!network->gate(tPtr,peer)) { RR->t->incomingNetworkAccessDenied(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,true); _sendErrorNeedCredentials(RR,tPtr,peer,nwid); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); return true; } @@ -1006,19 +1051,19 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr, if (network->config().multicastLimit == 0) { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,from,to.mac(),"multicast disabled"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); return true; } if ((frameLen > 0)&&(frameLen <= ZT_MAX_MTU)) { if (!to.mac().isMulticast()) { RR->t->incomingPacketInvalid(tPtr,_path,packetId(),source(),hops(),Packet::VERB_MULTICAST_FRAME,"destination not multicast"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } if ((!from)||(from.isMulticast())||(from == network->mac())) { RR->t->incomingPacketInvalid(tPtr,_path,packetId(),source(),hops(),Packet::VERB_MULTICAST_FRAME,"invalid source MAC"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } @@ -1032,7 +1077,7 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr, network->learnBridgeRoute(from,peer->address()); } else { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,from,to.mac(),"bridging not allowed (remote)"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } } @@ -1055,10 +1100,10 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr, } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); } else { _sendErrorNeedCredentials(RR,tPtr,peer,nwid); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); } return true; @@ -1070,7 +1115,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt // First, subject this to a rate limit if (!peer->rateGatePushDirectPaths(now)) { - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0); return true; } @@ -1094,7 +1139,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt case 4: { const InetAddress a(field(ptr,4),4,at(ptr + 4)); if ( - ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_FORGET_PATH) == 0) && // not being told to forget + ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_FORGET_PATH) == 0) && // not being told to forget (!( ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_CLUSTER_REDIRECT) == 0) && (peer->hasActivePathTo(now,a)) )) && // not already known (RR->node->shouldUsePathForZeroTierTraffic(tPtr,peer->address(),_path->localSocket(),a)) ) // should use path { @@ -1108,7 +1153,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt case 6: { const InetAddress a(field(ptr,16),16,at(ptr + 16)); if ( - ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_FORGET_PATH) == 0) && // not being told to forget + ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_FORGET_PATH) == 0) && // not being told to forget (!( ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_CLUSTER_REDIRECT) == 0) && (peer->hasActivePathTo(now,a)) )) && // not already known (RR->node->shouldUsePathForZeroTierTraffic(tPtr,peer->address(),_path->localSocket(),a)) ) // should use path { @@ -1123,7 +1168,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt ptr += addrLen; } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0); return true; } @@ -1139,7 +1184,7 @@ bool IncomingPacket::_doUSER_MESSAGE(const RuntimeEnvironment *RR,void *tPtr,con RR->node->postEvent(tPtr,ZT_EVENT_USER_MESSAGE,reinterpret_cast(&um)); } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_USER_MESSAGE,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_USER_MESSAGE,0,Packet::VERB_NOP,false,0); return true; } @@ -1163,7 +1208,7 @@ bool IncomingPacket::_doREMOTE_TRACE(const RuntimeEnvironment *RR,void *tPtr,con } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_REMOTE_TRACE,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_REMOTE_TRACE,0,Packet::VERB_NOP,false,0); return true; } diff --git a/node/IncomingPacket.hpp b/node/IncomingPacket.hpp index 88f4f066..9144277c 100644 --- a/node/IncomingPacket.hpp +++ b/node/IncomingPacket.hpp @@ -125,6 +125,8 @@ private: // been authenticated, decrypted, decompressed, and classified. bool _doERROR(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doHELLO(const RuntimeEnvironment *RR,void *tPtr,const bool alreadyAuthenticated); + bool _doACK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); + bool _doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doWHOIS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doRENDEZVOUS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); diff --git a/node/Packet.hpp b/node/Packet.hpp index 27da6fb5..6869691e 100644 --- a/node/Packet.hpp +++ b/node/Packet.hpp @@ -419,7 +419,7 @@ public: template Fragment(const Buffer &b) : - Buffer(b) + Buffer(b) { } @@ -930,7 +930,53 @@ public: */ VERB_PUSH_DIRECT_PATHS = 0x10, - // 0x11, 0x12 -- deprecated + // 0x11 -- deprecated + + /** + * An acknowledgement of receipt of a series of recent packets from another + * peer. This is used to calculate relative throughput values and to detect + * packet loss. Only VERB_FRAME and VERB_EXT_FRAME packets are counted. + * + * ACK response format: + * <[4] 32-bit number of bytes received since last ACK> + * + * Upon receipt of this packet, the local peer will verify that the correct + * number of bytes were received by the remote peer. If these values do + * not agree that could be an indicator of packet loss. + * + * Additionally, the local peer knows the interval of time that has + * elapsed since the last received ACK. With this information it can compute + * a rough estimate of the current throughput. + * + * This is sent at a maximum rate of once per every ZT_PATH_ACK_INTERVAL + */ + VERB_ACK = 0x12, + + /** + * A packet containing timing measurements useful for estimating path quality. + * Composed of a list of pairs for an + * arbitrary set of recent packets. This is used to sample for latency and + * packet delay variance (PDV, "jitter"). + * + * QoS record format: + * + * <[8] 64-bit packet ID of previously-received packet> + * <[1] 8-bit packet sojourn time> + * <...repeat until end of max 1400 byte packet...> + * + * The number of possible records per QoS packet is: (1400 * 8) / 72 = 155 + * This packet should be sent very rarely (every few seconds) as it can be + * somewhat large if the connection is saturated. Future versions might use + * a bloom table to probablistically determine these values in a vastly + * more space-efficient manner. + * + * Note: The 'internal packet sojourn time' is a slight misnomer as it is a + * measure of the amount of time between when a packet was received and the + * egress time of its tracking QoS packet. + * + * This is sent at a maximum rate of once per every ZT_PATH_QOS_INTERVAL + */ + VERB_QOS_MEASUREMENT = 0x13, /** * A message with arbitrary user-definable content: @@ -999,7 +1045,7 @@ public: template Packet(const Buffer &b) : - Buffer(b) + Buffer(b) { } diff --git a/node/Path.hpp b/node/Path.hpp index e6bcecf0..0278d919 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -102,24 +102,23 @@ public: _latency(0xffff), _addr(), _ipScope(InetAddress::IP_SCOPE_NONE), - _currentPacketSampleCounter(0), - _meanPacketErrorRatio(0.0), - _meanLatency(0.0), - _lastLatencyUpdate(0), - _jitter(0.0), - _lastPathQualitySampleTime(0), - _lastComputedQuality(0.0), - _lastPathQualityEstimate(0), - _meanAge(0.0), + _lastAck(0), + _lastThroughputEstimation(0), + _lastQoSMeasurement(0), + _unackedBytes(0), + _expectingAckAsOf(0), + _packetsReceivedSinceLastAck(0), _meanThroughput(0.0), - _packetLossRatio(0) + _maxLifetimeThroughput(0), + _bytesAckedSinceLastThroughputEstimation(0), + _meanLatency(0.0), + _packetDelayVariance(0.0), + _packetErrorRatio(0.0), + _packetLossRatio(0), + _lastComputedStability(0.0), + _lastComputedRelativeQuality(0) { - memset(_ifname, 0, sizeof(_ifname)); - memset(_addrString, 0, sizeof(_addrString)); - _throughputSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _ageSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _latencySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _errSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + prepareBuffers(); } Path(const int64_t localSocket,const InetAddress &addr) : @@ -131,37 +130,35 @@ public: _latency(0xffff), _addr(addr), _ipScope(addr.ipScope()), - _currentPacketSampleCounter(0), - _meanPacketErrorRatio(0.0), - _meanLatency(0.0), - _lastLatencyUpdate(0), - _jitter(0.0), - _lastPathQualitySampleTime(0), - _lastComputedQuality(0.0), - _lastPathQualityEstimate(0), - _meanAge(0.0), + _lastAck(0), + _lastThroughputEstimation(0), + _lastQoSMeasurement(0), + _unackedBytes(0), + _expectingAckAsOf(0), + _packetsReceivedSinceLastAck(0), _meanThroughput(0.0), - _packetLossRatio(0) + _maxLifetimeThroughput(0), + _bytesAckedSinceLastThroughputEstimation(0), + _meanLatency(0.0), + _packetDelayVariance(0.0), + _packetErrorRatio(0.0), + _packetLossRatio(0), + _lastComputedStability(0.0), + _lastComputedRelativeQuality(0) { - memset(_ifname, 0, sizeof(_ifname)); - memset(_addrString, 0, sizeof(_addrString)); - _throughputSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _ageSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _latencySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _errSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + prepareBuffers(); } ~Path() { delete _throughputSamples; - delete _ageSamples; delete _latencySamples; - delete _errSamples; - + delete _qualitySamples; + delete _packetValiditySamples; _throughputSamples = NULL; - _ageSamples = NULL; _latencySamples = NULL; - _errSamples = NULL; + _qualitySamples = NULL; + _packetValiditySamples = NULL; } /** @@ -209,7 +206,6 @@ public: else { _latency = l; } - _lastLatencyUpdate = now; _latencySamples->push(l); } @@ -299,194 +295,273 @@ public: } /** - * @return An estimate of path quality -- higher is better. + * Take note that we're expecting a VERB_ACK on this path as of a specific time + * + * @param now Current time + * @param packetId ID of the packet + * @param payloadLength Number of bytes we're is expecting a reply to */ - inline float computeQuality(const int64_t now) + inline void expectingAck(int64_t now, int64_t packetId, uint16_t payloadLength) { - float latency_contrib = _meanLatency ? (float)1.0 / _meanLatency : 0; - float jitter_contrib = _jitter ? (float)1.0 / _jitter : 0; - float throughput_contrib = _meanThroughput ? _meanThroughput / 1000000 : 0; // in Mbps - float age_contrib = _meanAge > 0 ? (float)sqrt(_meanAge) : 1; - float error_contrib = (float)1.0 - _meanPacketErrorRatio; - float sum = (latency_contrib + jitter_contrib + throughput_contrib + error_contrib) / age_contrib; - _lastComputedQuality = sum * (long)((_ipScope) + 1); - return _lastComputedQuality; + _expectingAckAsOf = ackAge(now) > ZT_PATH_ACK_INTERVAL ? _expectingAckAsOf : now; + _unackedBytes += payloadLength; + _outgoingPacketRecords[packetId] = now; } /** - * Since quality estimates can become expensive we should cache the most recent result for traffic allocation - * algorithms which may need to reference this value multiple times through the course of their execution. + * Record that we've received a VERB_ACK on this path, also compute throughput if required. + * + * @param now Current time + * @param ackedBytes Number of bytes awknowledged by other peer */ - inline float lastComputedQuality() { - return _lastComputedQuality; + inline void receivedAck(int64_t now, int32_t ackedBytes) + { + _expectingAckAsOf = 0; + _unackedBytes = (ackedBytes > _unackedBytes) ? 0 : _unackedBytes - ackedBytes; + int64_t timeSinceThroughputEstimate = (now - _lastThroughputEstimation); + if (timeSinceThroughputEstimate >= ZT_PATH_THROUGHPUT_MEASUREMENT_INTERVAL) { + uint64_t throughput = (float)(_bytesAckedSinceLastThroughputEstimation) / ((float)timeSinceThroughputEstimate / (float)1000); + _throughputSamples->push(throughput); + _maxLifetimeThroughput = throughput > _maxLifetimeThroughput ? throughput : _maxLifetimeThroughput; + _lastThroughputEstimation = now; + _bytesAckedSinceLastThroughputEstimation = 0; + } else { + _bytesAckedSinceLastThroughputEstimation += ackedBytes; + } } /** - * @return A pointer to a cached copy of the human-readable name of the interface this Path's localSocket is bound to + * @return Number of bytes this peer is responsible for ACKing since last ACK */ - inline char *getName() { return _ifname; } + inline int32_t bytesToAck() + { + int32_t bytesToAck = 0; + for (int i=0; i<_packetsReceivedSinceLastAck; i++) { + bytesToAck += _recorded_len[i]; + } + return bytesToAck; + } /** - * @return Estimated throughput in bps of this link + * @return Number of bytes thusfar sent that have not been awknowledged by the remote peer */ - inline uint64_t getThroughput() { return _phy->getThroughput((PhySocket *)((uintptr_t)_localSocket)); } + inline int64_t unackedSentBytes() + { + return _unackedBytes; + } /** - * @return Packet delay varience + * Account for the fact that an ACK was just sent. Reset counters, timers, and clear statistics buffers + * + * @param Current time */ - inline float jitter() { return _jitter; } + inline void sentAck(int64_t now) + { + memset(_recorded_id, 0, sizeof(_recorded_id)); + memset(_recorded_ts, 0, sizeof(_recorded_ts)); + memset(_recorded_len, 0, sizeof(_recorded_len)); + _packetsReceivedSinceLastAck = 0; + _lastAck = now; + } /** - * @return Previously-computed mean latency + * Receive QoS data, match with recorded egress times from this peer, compute latency + * estimates. + * + * @param now Current time + * @param count Number of records + * @param rx_id table of packet IDs + * @param rx_ts table of holding times */ - inline float meanLatency() { return _meanLatency; } + inline void receivedQoS(int64_t now, int count, uint64_t *rx_id, uint8_t *rx_ts) + { + // Look up egress times and compute latency values for each record + for (int j=0; j::iterator it = _outgoingPacketRecords.find(rx_id[j]); + if (it != _outgoingPacketRecords.end()) { + uint16_t rtt = (uint16_t)(now - it->second); + uint16_t rtt_compensated = rtt - rx_ts[j]; + float latency = rtt_compensated / 2.0; + updateLatency(latency, now); + _outgoingPacketRecords.erase(it); + } + } + } /** - * @return Packet loss rate + * Generate the contents of a VERB_QOS_MEASUREMENT packet. + * + * @param now Current time + * @param qosBuffer destination buffer + * @return Size of payload */ - inline float packetLossRatio() { return _packetLossRatio; } + inline int32_t generateQoSPacket(int64_t now, char *qosBuffer) + { + int32_t len = 0; + for (int i=0; i<_packetsReceivedSinceLastAck; i++) { + uint64_t id = _recorded_id[i]; + memcpy(qosBuffer, &id, sizeof(uint64_t)); + qosBuffer+=sizeof(uint64_t); + uint8_t holdingTime = (uint8_t)(now - _recorded_ts[i]); + memcpy(qosBuffer, &holdingTime, sizeof(uint8_t)); + qosBuffer+=sizeof(uint8_t); + len+=sizeof(uint64_t)+sizeof(uint8_t); + } + return len; + } /** - * @return Mean packet error ratio + * Account for the fact that a VERB_QOS_MEASUREMENT was just sent. Reset timers. + * + * @param Current time */ - inline float meanPacketErrorRatio() { return _meanPacketErrorRatio; } + inline void sentQoS(int64_t now) { _lastQoSMeasurement = now; } /** - * @return Current packet error ratio (possibly incomplete sample set) + * Record statistics on incoming packets. Used later to estimate QoS. + * + * @param now Current time + * @param packetId + * @param payloadLength */ - inline float currentPacketErrorRatio() { - int errorsPerSample = 0; - for (int i=0; i<_currentPacketSampleCounter; i++) { - if (_packetValidity[i] == false) { - errorsPerSample++; - } - } - return (float)errorsPerSample / (float)ZT_PATH_ERROR_SAMPLE_WIN_SZ; + inline void recordIncomingPacket(int64_t now, int64_t packetId, int32_t payloadLength) + { + _recorded_ts[_packetsReceivedSinceLastAck] = now; + _recorded_id[_packetsReceivedSinceLastAck] = packetId; + _recorded_len[_packetsReceivedSinceLastAck] = payloadLength; + _packetsReceivedSinceLastAck++; + _packetValiditySamples->push(true); } /** - * @return Whether the Path's local socket is in a CLOSED state + * @param now Current time + * @return Whether an ACK (VERB_ACK) packet needs to be emitted at this time */ - inline bool isClosed() { return _phy->isClosed((PhySocket *)((uintptr_t)_localSocket)); } + inline bool needsToSendAck(int64_t now) { + return ((now - _lastAck) >= ZT_PATH_ACK_INTERVAL || + (_packetsReceivedSinceLastAck == ZT_PATH_QOS_TABLE_SIZE)) && _packetsReceivedSinceLastAck; + } /** - * @return The state of a Path's local socket + * @param now Current time + * @return Whether a QoS (VERB_QOS_MEASUREMENT) packet needs to be emitted at this time */ - inline int getState() { return _phy->getState((PhySocket *)((uintptr_t)_localSocket)); } + inline bool needsToSendQoS(int64_t now) { + return ((_packetsReceivedSinceLastAck >= ZT_PATH_QOS_TABLE_SIZE) || + ((now - _lastQoSMeasurement) > ZT_PATH_QOS_INTERVAL)) && _packetsReceivedSinceLastAck; + } /** - * @return Whether this socket may have been erased by the virtual physical link layer + * How much time has elapsed since we've been expecting a VERB_ACK on this path. This value + * is used to determine a more relevant path "age". This lets us penalize paths which are no + * longer ACKing, but not those that simple aren't being used to carry traffic at the + * current time. */ - inline bool isValidState() { return _phy->isValidState((PhySocket *)((uintptr_t)_localSocket)); } + inline int64_t ackAge(int64_t now) { return _expectingAckAsOf ? now - _expectingAckAsOf : 0; } /** - * @return Whether the path quality monitors have collected enough data to provide a quality value - * TODO: expand this + * The maximum observed throughput for this path */ - inline bool monitorsReady() { - return _latencySamples->count() && _ageSamples->count() && _throughputSamples->count(); - } + inline uint64_t maxLifetimeThroughput() { return _maxLifetimeThroughput; } /** - * @return A pointer to a cached copy of the address string for this Path (For debugging only) + * @return The mean throughput (in bits/s) of this link */ - inline char *getAddressString() { return _addrString; } + inline float meanThroughput() { return _meanThroughput; } /** - * Handle path sampling, computation of quality estimates, and other periodic tasks - * @param now Current time + * Assign a new relative quality value for this path in the aggregate link + * + * @param rq Quality of this path in comparison to other paths available to this peer */ - inline void measureLink(int64_t now) { - // Sample path properties and store them in a continuously-revolving buffer - if (now - _lastPathQualitySampleTime > ZT_PATH_QUALITY_SAMPLE_INTERVAL) { - _lastPathQualitySampleTime = now; - _throughputSamples->push(getThroughput()); // Thoughtput in bits/s - _ageSamples->push(now - _lastIn); // Age (time since last received packet) - if (now - _lastLatencyUpdate > ZT_PATH_LATENCY_SAMPLE_INTERVAL) { - _lastLatencyUpdate = now; - // Record 0 bp/s. Since we're using this to detect possible packet loss - updateLatency(0, now); - } - } - // Compute statistical values for use in link quality estimates - if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) { - _lastPathQualityComputeTime = now; - // Cache Path address string - address().toString(_addrString); - _phy->getIfName((PhySocket *)((uintptr_t)_localSocket), _ifname, ZT_PATH_INTERFACE_NAME_SZ); // Cache Interface name - // Derived values - if (_throughputSamples->count()) { - _packetLossRatio = (float)_throughputSamples->zeroCount() / (float)_throughputSamples->count(); - } - _meanThroughput = _throughputSamples->mean(); - _meanAge = _ageSamples->mean(); - _meanLatency = _latencySamples->mean(); - // Jitter - // SEE: RFC 3393, RFC 4689 - _jitter = _latencySamples->stddev(); - _meanPacketErrorRatio = _errSamples->mean(); // Packet Error Ratio (PER) - } - // Periodically compute a path quality estimate - if (now - _lastPathQualityEstimate > ZT_PATH_QUALITY_ESTIMATE_INTERVAL) { - computeQuality(now); - } - } + inline void updateRelativeQuality(float rq) { _lastComputedRelativeQuality = rq; } /** - * @param buf Buffer to store resultant string - * @return Description of path, in ASCII string format + * @return Quality of this path compared to others in the aggregate link */ - inline char *toString(char *buf) { - sprintf(buf,"%6s, q=%8.3f, %5.3f Mb/s, j=%8.2f, ml=%8.2f, meanAge=%8.2f, addr=%45s", - getName(), - lastComputedQuality(), - (float)meanThroughput() / (float)1000000, - jitter(), - meanLatency(), - meanAge(), - getAddressString()); - return buf; - } + inline float relativeQuality() { return _lastComputedRelativeQuality; } /** - * Record whether a packet is considered invalid by MAC/compression/cipher checks. This - * could be an indication of a bit error. This function will keep a running counter of - * up to a given window size and with each counter overflow it will compute a mean error rate - * and store that in a continuously shifting sample window. - * - * @param isValid Whether the packet in question is considered invalid + * @return Stability estimates can become expensive to compute, we cache the most recent result. */ - inline void recordPacket(bool isValid) { - if (_currentPacketSampleCounter < ZT_PATH_ERROR_SAMPLE_WIN_SZ) { - _packetValidity[_currentPacketSampleCounter] = isValid; - _currentPacketSampleCounter++; - } - else { - // Sample array is full, compute an mean and stick it in the ring buffer for trend analysis - _errSamples->push(currentPacketErrorRatio()); - _currentPacketSampleCounter=0; - } - } + inline float lastComputedStability() { return _lastComputedStability; } /** - * @return The mean age (in ms) of this link + * @return A pointer to a cached copy of the human-readable name of the interface this Path's localSocket is bound to */ - inline float meanAge() { return _meanAge; } + inline char *getName() { return _ifname; } /** - * @return The mean throughput (in bits/s) of this link + * @return Packet delay varience */ - inline float meanThroughput() { return _meanThroughput; } + inline float packetDelayVariance() { return _packetDelayVariance; } /** - * @return True if this path is alive (receiving heartbeats) + * @return Previously-computed mean latency */ - inline bool alive(const int64_t now) const { return ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000)); } + inline float meanLatency() { return _meanLatency; } + + /** + * @return Packet loss rate (PLR) + */ + inline float packetLossRatio() { return _packetLossRatio; } + + /** + * @return Packet error ratio (PER) + */ + inline float packetErrorRatio() { return _packetErrorRatio; } + + /** + * Record an invalid incoming packet. This packet failed MAC/compression/cipher checks and will now + * contribute to a Packet Error Ratio (PER). + */ + inline void recordInvalidPacket() { _packetValiditySamples->push(false); } /** - * @return True if this path hasn't received a packet in a "significant" amount of time + * @return A pointer to a cached copy of the address string for this Path (For debugging only) */ - inline bool stale(const int64_t now) const { return ((now - _lastIn) > ZT_LINK_SPEED_TEST_INTERVAL * 10); } + inline char *getAddressString() { return _addrString; } + + /** + * Compute and cache stability and performance metrics. The resultant stability coefficint is a measure of how "well behaved" + * this path is. This figure is substantially different from (but required for the estimation of the path's overall "quality". + * + * @param now Current time + */ + inline void processBackgroundPathMeasurements(int64_t now, const int64_t peerId) { + if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) { + _lastPathQualityComputeTime = now; + _phy->getIfName((PhySocket *)((uintptr_t)_localSocket), _ifname, 16); + address().toString(_addrString); + _meanThroughput = _throughputSamples->mean(); + _meanLatency = _latencySamples->mean(); + _packetDelayVariance = _latencySamples->stddev(); // Similar to "jitter" (SEE: RFC 3393, RFC 4689) + // If no packet validity samples, assume PER==0 + _packetErrorRatio = 1 - (_packetValiditySamples->count() ? _packetValiditySamples->mean() : 1); + // Compute path stability + // Normalize measurements with wildly different ranges into a reasonable range + float normalized_pdv = Utils::normalize(_packetDelayVariance, 0, ZT_PATH_MAX_PDV, 0, 10); + float normalized_la = Utils::normalize(_meanLatency, 0, ZT_PATH_MAX_MEAN_LATENCY, 0, 10); + float throughput_cv = _throughputSamples->mean() > 0 ? _throughputSamples->stddev() / _throughputSamples->mean() : 1; + // Form an exponential cutoff and apply contribution weights + float pdv_contrib = exp((-1)*normalized_pdv) * ZT_PATH_CONTRIB_PDV; + float latency_contrib = exp((-1)*normalized_la) * ZT_PATH_CONTRIB_LATENCY; + float throughput_disturbance_contrib = exp((-1)*throughput_cv) * ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE; + // Obey user-defined ignored contributions + pdv_contrib = ZT_PATH_CONTRIB_PDV > 0.0 ? pdv_contrib : 1; + latency_contrib = ZT_PATH_CONTRIB_LATENCY > 0.0 ? latency_contrib : 1; + throughput_disturbance_contrib = ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE > 0.0 ? throughput_disturbance_contrib : 1; + // Compute the quality product + _lastComputedStability = pdv_contrib + latency_contrib + throughput_disturbance_contrib; + _lastComputedStability *= 1 - _packetErrorRatio; + _qualitySamples->push(_lastComputedStability); + } + } + + /** + * @return True if this path is alive (receiving heartbeats) + */ + inline bool alive(const int64_t now) const { return ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000)); } /** * @return True if this path needs a heartbeat @@ -508,6 +583,21 @@ public: */ inline int64_t lastTrustEstablishedPacketReceived() const { return _lastTrustEstablishedPacketReceived; } + /** + * Initialize statistical buffers + */ + inline void prepareBuffers() { + _throughputSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _latencySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _qualitySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _packetValiditySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + memset(_ifname, 0, 16); + memset(_recorded_id, 0, sizeof(_recorded_id)); + memset(_recorded_ts, 0, sizeof(_recorded_ts)); + memset(_recorded_len, 0, sizeof(_recorded_len)); + memset(_addrString, 0, sizeof(_addrString)); + } + private: volatile int64_t _lastOut; volatile int64_t _lastIn; @@ -519,32 +609,42 @@ private: InetAddress::IpScope _ipScope; // memoize this since it's a computed value checked often AtomicCounter __refCount; - // Packet Error Ratio (PER) - int _packetValidity[ZT_PATH_ERROR_SAMPLE_WIN_SZ]; - int _currentPacketSampleCounter; - volatile float _meanPacketErrorRatio; + uint64_t _recorded_id[ZT_PATH_QOS_TABLE_SIZE]; + uint64_t _recorded_ts[ZT_PATH_QOS_TABLE_SIZE]; + uint16_t _recorded_len[ZT_PATH_QOS_TABLE_SIZE]; - // Latency and Jitter - volatile float _meanLatency; - int64_t _lastLatencyUpdate; - volatile float _jitter; + std::map _outgoingPacketRecords; + + int64_t _lastAck; + int64_t _lastThroughputEstimation; + int64_t _lastQoSMeasurement; + + int64_t _unackedBytes; + int64_t _expectingAckAsOf; + int16_t _packetsReceivedSinceLastAck; - int64_t _lastPathQualitySampleTime; - float _lastComputedQuality; - int64_t _lastPathQualityEstimate; - float _meanAge; float _meanThroughput; + uint64_t _maxLifetimeThroughput; + uint64_t _bytesAckedSinceLastThroughputEstimation; - // Circular buffers used to efficiently store large time series - RingBuffer *_throughputSamples; - RingBuffer *_latencySamples; - RingBuffer *_ageSamples; - RingBuffer *_errSamples; + volatile float _meanLatency; + float _packetDelayVariance; + float _packetErrorRatio; float _packetLossRatio; - char _ifname[ZT_PATH_INTERFACE_NAME_SZ]; + // cached estimates + float _lastComputedStability; + float _lastComputedRelativeQuality; + + // cached human-readable strings for tracing purposes + char _ifname[16]; char _addrString[256]; + + RingBuffer *_throughputSamples; + RingBuffer *_latencySamples; + RingBuffer *_qualitySamples; + RingBuffer *_packetValiditySamples; }; } // namespace ZeroTier diff --git a/node/Peer.cpp b/node/Peer.cpp index c46ed751..8deaa362 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -24,8 +24,8 @@ * of your own application. */ -#include "../version.h" +#include "../version.h" #include "Constants.hpp" #include "Peer.hpp" #include "Node.hpp" @@ -36,6 +36,7 @@ #include "Trace.hpp" #include "InetAddress.hpp" #include "RingBuffer.hpp" +#include "Utils.hpp" namespace ZeroTier { @@ -61,13 +62,13 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _id(peerIdentity), _directPathPushCutoffCount(0), _credentialsCutoffCount(0), - _linkBalanceStatus(false), - _linkRedundancyStatus(false) + _linkIsBalanced(false), + _linkIsRedundant(false), + _lastAggregateStatsReport(0) { if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) throw ZT_EXCEPTION_INVALID_ARGUMENT; _pathChoiceHist = new RingBuffer(ZT_MULTIPATH_PROPORTION_WIN_SZ); - _flowBalanceHist = new RingBuffer(ZT_MULTIPATH_PROPORTION_WIN_SZ); } void Peer::received( @@ -75,6 +76,7 @@ void Peer::received( const SharedPtr &path, const unsigned int hops, const uint64_t packetId, + const unsigned int payloadLength, const Packet::Verb verb, const uint64_t inRePacketId, const Packet::Verb inReVerb, @@ -103,13 +105,13 @@ void Peer::received( { Mutex::Lock _l(_paths_m); if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { - if ((now - _lastPathPrune) > ZT_CLOSED_PATH_PRUNING_INTERVAL) { - _lastPathPrune = now; - prunePaths(); + recordIncomingPacket(tPtr, path, packetId, payloadLength, verb, now); + if (path->needsToSendQoS(now)) { + sendQOS_MEASUREMENT(tPtr, path, path->localSocket(), path->address(), now); } for(unsigned int i=0;imeasureLink(now); + _paths[i].p->processBackgroundPathMeasurements(now, _id.address().toInt()); } } } @@ -117,7 +119,6 @@ void Peer::received( if (hops == 0) { // If this is a direct packet (no hops), update existing paths or learn new ones - bool havePath = false; { Mutex::Lock _l(_paths_m); @@ -164,6 +165,19 @@ void Peer::received( } } + // If we find a pre-existing path with the same address, just replace it. + // If we don't find anything we can replace, just use the replacePath that we previously decided on. + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + for(unsigned int i=0;iaddress().ss_family == path->address().ss_family && _paths[i].p->address().ipsEqual2(path->address())) { + replacePath = i; + break; + } + } + } + } + if (replacePath != ZT_MAX_PEER_NETWORK_PATHS) { if (verb == Packet::VERB_OK) { RR->t->peerLearnedNewPath(tPtr,networkId,*this,path,packetId); @@ -252,6 +266,117 @@ void Peer::received( } } +void Peer::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, + uint16_t payloadLength, const Packet::Verb verb, int64_t now) +{ + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + if (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME) { + path->expectingAck(now, packetId, payloadLength); + } + } +} + +void Peer::recordIncomingPacket(void *tPtr, const SharedPtr &path, const uint64_t packetId, + uint16_t payloadLength, const Packet::Verb verb, int64_t now) +{ + if (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME) { + if (path->needsToSendAck(now)) { + sendACK(tPtr, path, path->localSocket(), path->address(), now); + } + path->recordIncomingPacket(now, packetId, payloadLength); + } +} + +float Peer::computeAggregateLinkRelativeQuality(int64_t now) +{ + float maxStability = 0; + float totalRelativeQuality = 0; + float maxThroughput = 1; + float maxScope = 0; + float relStability[ZT_MAX_PEER_NETWORK_PATHS]; + float relThroughput[ZT_MAX_PEER_NETWORK_PATHS]; + memset(&relStability, 0, sizeof(relStability)); + memset(&relThroughput, 0, sizeof(relThroughput)); + // Survey all paths + for(unsigned int i=0;ilastComputedStability(); + relThroughput[i] = _paths[i].p->maxLifetimeThroughput(); + maxStability = relStability[i] > maxStability ? relStability[i] : maxStability; + maxThroughput = relThroughput[i] > maxThroughput ? relThroughput[i] : maxThroughput; + maxScope = _paths[i].p->ipScope() > maxScope ? _paths[i].p->ipScope() : maxScope; + } + } + // Convert to relative values + for(unsigned int i=0;iackAge(now), 0, ZT_PATH_MAX_AGE, 0, 10); + float age_contrib = exp((-1)*normalized_ma); + float relScope = ((float)(_paths[i].p->ipScope()+1) / (maxScope + 1)); + float relQuality = + (relStability[i] * ZT_PATH_CONTRIB_STABILITY) + + (fmax(1, relThroughput[i]) * ZT_PATH_CONTRIB_THROUGHPUT) + + relScope * ZT_PATH_CONTRIB_SCOPE; + relQuality *= age_contrib; + totalRelativeQuality += relQuality; + _paths[i].p->updateRelativeQuality(relQuality); + } + } + return (float)1.0 / totalRelativeQuality; // Used later to convert relative quantities into flow allocations +} + +float Peer::computeAggregateLinkPacketDelayVariance() +{ + float pdv = 0.0; + for(unsigned int i=0;irelativeQuality() * _paths[i].p->packetDelayVariance(); + } + } + return pdv; +} + +float Peer::computeAggregateLinkMeanLatency() +{ + float ml = 0.0; + for(unsigned int i=0;irelativeQuality() * _paths[i].p->meanLatency(); + } + } + return ml; +} + +int Peer::aggregateLinkPhysicalPathCount() +{ + std::map ifnamemap; + int pathCount = 0; + int64_t now = RR->node->now(); + for(unsigned int i=0;ialive(now)) { + if (!ifnamemap[_paths[i].p->getName()]) { + ifnamemap[_paths[i].p->getName()] = true; + pathCount++; + } + } + } + return pathCount; +} + +int Peer::aggregateLinkLogicalPathCount() +{ + int pathCount = 0; + int64_t now = RR->node->now(); + for(unsigned int i=0;ialive(now)) { + pathCount++; + } + } + return pathCount; +} + SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) { Mutex::Lock _l(_paths_m); @@ -264,7 +389,7 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) if (RR->node->getMultipathMode() == ZT_MULTIPATH_NONE) { long bestPathQuality = 2147483647; for(unsigned int i=0;iisValidState()) { + if (_paths[i].p) { if ((includeExpired)||((now - _paths[i].lr) < ZT_PEER_PATH_EXPIRATION)) { const long q = _paths[i].p->quality(now) / _paths[i].priority; if (q <= bestPathQuality) { @@ -280,23 +405,14 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) return SharedPtr(); } - if ((now - _lastPathPrune) > ZT_CLOSED_PATH_PRUNING_INTERVAL) { - _lastPathPrune = now; - prunePaths(); - } for(unsigned int i=0;imeasureLink(now); + _paths[i].p->processBackgroundPathMeasurements(now, _id.address().toInt()); } } /** * Randomly distribute traffic across all paths - * - * Behavior: - * - If path DOWN: Stop randomly choosing that path - * - If path UP: Start randomly choosing that path - * - If all paths are unresponsive: randomly choose from all paths */ int numAlivePaths = 0; int numStalePaths = 0; @@ -307,15 +423,13 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) memset(&stalePaths, -1, sizeof(stalePaths)); for(unsigned int i=0;iisValidState()) { - if (_paths[i].p->alive(now)) { - alivePaths[numAlivePaths] = i; - numAlivePaths++; - } - else { - stalePaths[numStalePaths] = i; - numStalePaths++; - } + if (_paths[i].p->alive(now)) { + alivePaths[numAlivePaths] = i; + numAlivePaths++; + } + else { + stalePaths[numStalePaths] = i; + numStalePaths++; } } } @@ -337,160 +451,104 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) * Proportionally allocate traffic according to dynamic path quality measurements */ if (RR->node->getMultipathMode() == ZT_MULTIPATH_PROPORTIONALLY_BALANCED) { - float relq[ZT_MAX_PEER_NETWORK_PATHS]; - memset(&relq, 0, sizeof(relq)); float alloc[ZT_MAX_PEER_NETWORK_PATHS]; memset(&alloc, 0, sizeof(alloc)); - - // Survey - // - // Take a survey of all available link qualities. We use this to determine if we - // can skip this algorithm altogether and if not, to establish baseline for physical - // link quality used in later calculations. - // - // We find the min/max quality of our currently-active links so - // that we can form a relative scale to rank each link proportionally - // to each other link. - uint16_t alivePaths[ZT_MAX_PEER_NETWORK_PATHS]; - uint16_t stalePaths[ZT_MAX_PEER_NETWORK_PATHS]; + int numAlivePaths = 0; + int numStalePaths = 0; + int alivePaths[ZT_MAX_PEER_NETWORK_PATHS]; + int stalePaths[ZT_MAX_PEER_NETWORK_PATHS]; memset(&alivePaths, -1, sizeof(alivePaths)); memset(&stalePaths, -1, sizeof(stalePaths)); - uint16_t numAlivePaths = 0; - uint16_t numStalePaths = 0; - float minQuality = 10000; - float maxQuality = -1; - float currQuality; - for(uint16_t i=0;iisValidState()) { - if (!_paths[i].p->monitorsReady()) { - // TODO: This should fix itself anyway but we should test whether forcing the use of a new path will - // aid in establishing flow balance more quickly. - } - // Compute quality here, going forward we will use lastComputedQuality() - currQuality = _paths[i].p->computeQuality(now); - if (!_paths[i].p->stale(now)) { + // Attempt to find an excuse not to use the rest of this algorithm + // Alive or Stale? + for(unsigned int i=0;ialive(now)) { + alivePaths[numAlivePaths] = i; numAlivePaths++; - } - else { + } else { + stalePaths[numStalePaths] = i; numStalePaths++; } - if (currQuality > maxQuality) { - maxQuality = currQuality; - bestPath = i; - } - if (currQuality < minQuality) { - minQuality = currQuality; - } - relq[i] = currQuality; + // Record a default path to use as a short-circuit for the rest of the algorithm + bestPath = i; } } - - // Attempt to find an excuse not to use the rest of this algorithm - if (bestPath == ZT_MAX_PEER_NETWORK_PATHS || (numAlivePaths == 0 && numStalePaths == 0)) { + if (numAlivePaths == 0 && numStalePaths == 0) { return SharedPtr(); - } if (numAlivePaths == 1) { - //return _paths[bestPath].p; - } if (numStalePaths == 1) { - //return _paths[bestPath].p; - } - - // Relative quality - // - // The strongest link will have a value of 1.0 whereas every other - // link will have a value which represents some fraction of the strongest link. - float totalRelativeQuality = 0; - for(unsigned int i=0;iisValidState()) { - relq[i] /= maxQuality ? maxQuality : 1; - totalRelativeQuality += relq[i]; - } + } if (numAlivePaths == 1 || numStalePaths == 1) { + return _paths[bestPath].p; } - - // Convert the relative quality values into flow allocations. - // Additionally, determine whether each path in the flow is - // contributing more or less than its target allocation. If - // it is contributing more than required, don't allow it to be - // randomly selected for the next packet. If however the path - // needs to contribute more to the flow, we should record - float imbalance = 0; - float qualityScalingFactor = (float)1.0 / totalRelativeQuality; + // Compare paths to each-other + float qualityScalingFactor = computeAggregateLinkRelativeQuality(now); + // Convert set of relative performances into an allocation set for(uint16_t i=0;icountValue(i); - // Compute traffic allocation for each path in the flow - if (_paths[i].p && _paths[i].p->isValidState()) { - // Allocation - // This is the percentage of traffic we want to send over a given path - alloc[i] = relq[i] * qualityScalingFactor; - float currProportion = numPktSentWithinWin / (float)ZT_MULTIPATH_PROPORTION_WIN_SZ; - float targetProportion = alloc[i]; - float diffProportion = currProportion - targetProportion; - // Imbalance - // - // This is the sum of the distances of each path's currently observed flow contributions - // from its most recent target allocation. In other words, this is a measure of how closely we - // are adhering to our desired allocations. It is worth noting that this value can be greater - // than 1.0 if a significant change to allocations is made by the algorithm, this will - // eventually correct itself. - imbalance += fabs(diffProportion); - if (diffProportion < 0) { - alloc[i] = targetProportion; - } - else { - alloc[i] = targetProportion; - } - } - } - - // Compute and record current flow balance - float balance = (float)1.0 - imbalance; - if (balance >= ZT_MULTIPATH_FLOW_BALANCE_THESHOLD) { - if (!_linkBalanceStatus) { - _linkBalanceStatus = true; - RR->t->peerLinkBalanced(NULL,0,*this); - } - } - else { - if (_linkBalanceStatus) { - _linkBalanceStatus = false; - RR->t->peerLinkImbalanced(NULL,0,*this); + if (_paths[i].p) { + alloc[i] = _paths[i].p->relativeQuality() * qualityScalingFactor; } } - - // Record the current flow balance. Later used for computing a mean flow balance value. - _flowBalanceHist->push(balance); - - // Randomly choose path from allocated candidates + // Randomly choose path according to their allocations unsigned int r; Utils::getSecureRandom(&r, 1); float rf = (float)(r %= 100) / 100; for(int i=0;iisValidState() && _paths[i].p->address().isV4()) { - if (alloc[i] > 0 && rf < alloc[i]) { + if (_paths[i].p) { + if (rf < alloc[i]) { bestPath = i; _pathChoiceHist->push(bestPath); // Record which path we chose break; } - if (alloc[i] > 0) { - rf -= alloc[i]; - } - else { - rf -= alloc[i]*-1; - } + rf -= alloc[i]; } } if (bestPath < ZT_MAX_PEER_NETWORK_PATHS) { return _paths[bestPath].p; } - return SharedPtr(); } + return SharedPtr(); +} - // Adhere to a user-defined interface/allocation scheme - if (RR->node->getMultipathMode() == ZT_MULTIPATH_MANUALLY_BALANCED) { - // TODO +char *Peer::interfaceListStr() +{ + std::map ifnamemap; + char tmp[32]; + const int64_t now = RR->node->now(); + char *ptr = _interfaceListStr; + bool imbalanced = false; + memset(_interfaceListStr, 0, sizeof(_interfaceListStr)); + int alivePathCount = aggregateLinkLogicalPathCount(); + for(unsigned int i=0;ialive(now)) { + int ipv = _paths[i].p->address().isV4(); + // If this is acting as an aggregate link, check allocations + float targetAllocation = 1.0 / alivePathCount; + float currentAllocation = 1.0; + if (alivePathCount > 1) { + currentAllocation = (float)_pathChoiceHist->countValue(i) / (float)_pathChoiceHist->count(); + if (fabs(targetAllocation - currentAllocation) > ZT_PATH_IMBALANCE_THRESHOLD) { + imbalanced = true; + } + } + char *ipvStr = ipv ? (char*)"ipv4" : (char*)"ipv6"; + sprintf(tmp, "(%s, %s, %5.4f)", _paths[i].p->getName(), ipvStr, currentAllocation); + // Prevent duplicates + if(ifnamemap[_paths[i].p->getName()] != ipv) { + memcpy(ptr, tmp, strlen(tmp)); + ptr += strlen(tmp); + *ptr = ' '; + ptr++; + ifnamemap[_paths[i].p->getName()] = ipv; + } + } } - - return SharedPtr(); + ptr--; // Overwrite trailing space + if (imbalanced) { + sprintf(tmp, ", is IMBALANCED"); + memcpy(ptr, tmp, sizeof(tmp)); + } else { + *ptr = '\0'; + } + return _interfaceListStr; } void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr &other) const @@ -614,6 +672,35 @@ void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr &o } } +void Peer::sendACK(void *tPtr,const SharedPtr &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now) +{ + Packet outp(_id.address(),RR->identity.address(),Packet::VERB_ACK); + uint32_t bytesToAck = path->bytesToAck(); + outp.append(bytesToAck); + if (atAddress) { + outp.armor(_key,false); + RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size()); + } else { + RR->sw->send(tPtr,outp,false); + } + path->sentAck(now); +} + +void Peer::sendQOS_MEASUREMENT(void *tPtr,const SharedPtr &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now) +{ + Packet outp(_id.address(),RR->identity.address(),Packet::VERB_QOS_MEASUREMENT); + char qosData[ZT_PATH_MAX_QOS_PACKET_SZ]; + path->generateQoSPacket(now,qosData); + outp.append(qosData,sizeof(qosData)); + if (atAddress) { + outp.armor(_key,false); + RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size()); + } else { + RR->sw->send(tPtr,outp,false); + } + path->sentQoS(now); +} + void Peer::sendHELLO(void *tPtr,const int64_t localSocket,const InetAddress &atAddress,int64_t now) { Packet outp(_id.address(),RR->identity.address(),Packet::VERB_HELLO); @@ -688,6 +775,25 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) const bool sendFullHello = ((now - _lastSentFullHello) >= ZT_PEER_PING_PERIOD); _lastSentFullHello = now; + // Emit traces regarding the status of aggregate links + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + int alivePathCount = aggregateLinkPhysicalPathCount(); + if ((now - _lastAggregateStatsReport) > ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL) { + _lastAggregateStatsReport = now; + if (alivePathCount) { + RR->t->peerLinkAggregateStatistics(NULL,*this); + } + } + // Report link redundancy + if (alivePathCount < 2 && _linkIsRedundant) { + _linkIsRedundant = !_linkIsRedundant; + RR->t->peerLinkNoLongerRedundant(NULL,*this); + } if (alivePathCount > 1 && !_linkIsRedundant) { + _linkIsRedundant = !_linkIsRedundant; + RR->t->peerLinkNowRedundant(NULL,*this); + } + } + // Right now we only keep pinging links that have the maximum priority. The // priority is used to track cluster redirections, meaning that when a cluster // redirects us its redirect target links override all other links and we @@ -726,22 +832,6 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) return sent; } -unsigned int Peer::prunePaths() -{ - unsigned int pruned = 0; - for(unsigned int i=0;iisClosed() || !_paths[i].p->isValidState()) { - _paths[i].lr = 0; - _paths[i].p.zero(); - _paths[i].priority = 1; - pruned++; - } - } - } - return pruned; -} - void Peer::clusterRedirect(void *tPtr,const SharedPtr &originatingPath,const InetAddress &remoteAddress,const int64_t now) { SharedPtr np(RR->topology->getPath(originatingPath->localSocket(),remoteAddress)); diff --git a/node/Peer.hpp b/node/Peer.hpp index 9873729b..2f723b07 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -68,9 +68,7 @@ public: ~Peer() { Utils::burn(_key,sizeof(_key)); delete _pathChoiceHist; - delete _flowBalanceHist; _pathChoiceHist = NULL; - _flowBalanceHist = NULL; } /** @@ -114,6 +112,7 @@ public: const SharedPtr &path, const unsigned int hops, const uint64_t packetId, + const unsigned int payloadLength, const Packet::Verb verb, const uint64_t inRePacketId, const Packet::Verb inReVerb, @@ -158,7 +157,74 @@ public: } /** - * Get the most appropriate direct path based on current multipath configuration + * Record statistics on outgoing packets + * + * @param path Path over which packet was sent + * @param id Packet ID + * @param len Length of packet payload + * @param verb Packet verb + * @param now Current time + */ + void recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now); + + /** + * Record statistics on incoming packets + * + * @param path Path over which packet was sent + * @param id Packet ID + * @param len Length of packet payload + * @param verb Packet verb + * @param now Current time + */ + void recordIncomingPacket(void *tPtr, const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now); + + /** + * Send an ACK to peer for the most recent packets received + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param localSocket Raw socket the ACK packet will be sent over + * @param atAddress Destination for the ACK packet + * @param now Current time + */ + void sendACK(void *tPtr, const SharedPtr &path, const int64_t localSocket,const InetAddress &atAddress,int64_t now); + + /** + * Send a QoS packet to peer so that it can evaluate the quality of this link + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param localSocket Raw socket the QoS packet will be sent over + * @param atAddress Destination for the QoS packet + * @param now Current time + */ + void sendQOS_MEASUREMENT(void *tPtr, const SharedPtr &path, const int64_t localSocket,const InetAddress &atAddress,int64_t now); + + /** + * @return The relative quality values for each path + */ + float computeAggregateLinkRelativeQuality(int64_t now); + + /** + * @return The aggregate link Packet Delay Variance (PDV) + */ + float computeAggregateLinkPacketDelayVariance(); + + /** + * @return The aggregate link mean latenct + */ + float computeAggregateLinkMeanLatency(); + + /** + * @return The number of currently alive "physical" paths in the aggregate link + */ + int aggregateLinkPhysicalPathCount(); + + /** + * @return The number of currently alive "logical" paths in the aggregate link + */ + int aggregateLinkLogicalPathCount(); + + /** + * Get the most appropriate direct path based on current multipath and QoS configuration * * @param now Current time * @param includeExpired If true, include even expired paths @@ -166,6 +232,12 @@ public: */ SharedPtr getAppropriatePath(int64_t now, bool includeExpired); + /** + * Generate a human-readable string of interface names making up the aggregate link, also include + * moving allocation and IP version number for each (for tracing) + */ + char *interfaceListStr(); + /** * Send VERB_RENDEZVOUS to this and another peer via the best common IP scope and path */ @@ -549,11 +621,12 @@ private: AtomicCounter __refCount; RingBuffer *_pathChoiceHist; - RingBuffer *_flowBalanceHist; - bool _linkBalanceStatus; - bool _linkRedundancyStatus; + bool _linkIsBalanced; + bool _linkIsRedundant; + uint64_t _lastAggregateStatsReport; + char _interfaceListStr[256]; // 16 characters * 16 paths in a link }; } // namespace ZeroTier diff --git a/node/RingBuffer.hpp b/node/RingBuffer.hpp index cd384749..32ae037c 100644 --- a/node/RingBuffer.hpp +++ b/node/RingBuffer.hpp @@ -172,6 +172,11 @@ public: write(&value, 1); } + /** + * @return The most recently pushed element on the buffer + */ + T get_most_recent() { return *(buf + end); } + /** * @param dest Destination buffer * @param n Size (in terms of number of elements) of the destination buffer @@ -218,10 +223,7 @@ public: /** * @return The number of slots that are unused in the buffer */ - size_t getFree() - { - return size - count(); - } + size_t getFree() { return size - count(); } /** * @return The arithmetic mean of the contents of the buffer @@ -229,45 +231,67 @@ public: float mean() { size_t iterator = begin; - float mean = 0; - for (size_t i=0; irecordOutgoingPacket(viaPath, packet.packetId(), packet.payloadLength(), packet.verb(), now); + if (trustedPathId) { packet.setTrusted(trustedPathId); } else { diff --git a/node/Trace.cpp b/node/Trace.cpp index 01a8da55..f47a029b 100644 --- a/node/Trace.cpp +++ b/node/Trace.cpp @@ -106,24 +106,24 @@ void Trace::peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId, } } -void Trace::peerLinkNowRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath) +void Trace::peerLinkNowRedundant(void *const tPtr,Peer &peer) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is fully redundant",peer.address().toInt(),networkId); + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is fully redundant",peer.address().toInt()); } -void Trace::peerLinkNoLongerRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath) +void Trace::peerLinkNoLongerRedundant(void *const tPtr,Peer &peer) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is no longer redundant",peer.address().toInt(),networkId); + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is no longer redundant",peer.address().toInt()); } -void Trace::peerLinkBalanced(void *const tPtr,const uint64_t networkId,Peer &peer) +void Trace::peerLinkAggregateStatistics(void *const tPtr,Peer &peer) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is balanced",peer.address().toInt(),networkId); -} - -void Trace::peerLinkImbalanced(void *const tPtr,const uint64_t networkId,Peer &peer) -{ - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is unbalanced",peer.address().toInt(),networkId); + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is composed of (%d) physical paths %s, has packet delay variance (%.0f ms), mean latency (%.0f ms)", + peer.address().toInt(), + peer.aggregateLinkPhysicalPathCount(), + peer.interfaceListStr(), + peer.computeAggregateLinkPacketDelayVariance(), + peer.computeAggregateLinkMeanLatency()); } void Trace::peerLearnedNewPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath,const uint64_t packetId) diff --git a/node/Trace.hpp b/node/Trace.hpp index b01163d6..734e84a5 100644 --- a/node/Trace.hpp +++ b/node/Trace.hpp @@ -122,10 +122,10 @@ public: void peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &path,const uint64_t packetId,const Packet::Verb verb); - void peerLinkNowRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath); - void peerLinkNoLongerRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath); - void peerLinkBalanced(void *const tPtr,const uint64_t networkId,Peer &peer); - void peerLinkImbalanced(void *const tPtr,const uint64_t networkId,Peer &peer); + void peerLinkNowRedundant(void *const tPtr,Peer &peer); + void peerLinkNoLongerRedundant(void *const tPtr,Peer &peer); + + void peerLinkAggregateStatistics(void *const tPtr,Peer &peer); void peerLearnedNewPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath,const uint64_t packetId); void peerRedirected(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath); diff --git a/node/Utils.hpp b/node/Utils.hpp index a24f2c9a..6ce67328 100644 --- a/node/Utils.hpp +++ b/node/Utils.hpp @@ -261,6 +261,14 @@ public: return l; } + static inline float normalize(float value, int64_t bigMin, int64_t bigMax, int32_t targetMin, int32_t targetMax) + { + int64_t bigSpan = bigMax - bigMin; + int64_t smallSpan = targetMax - targetMin; + float valueScaled = (value - (float)bigMin) / (float)bigSpan; + return (float)targetMin + valueScaled * (float)smallSpan; + } + /** * Generate secure random bytes * diff --git a/osdep/Binder.hpp b/osdep/Binder.hpp index 6e13836c..1f06021b 100644 --- a/osdep/Binder.hpp +++ b/osdep/Binder.hpp @@ -456,20 +456,6 @@ public: return false; } - /** - * Get a list of socket pointers for all bindings. - * - * @return A list of socket pointers for current bindings - */ - inline std::vector getBoundSockets() - { - std::vector sockets; - for (int i=0; i _bindingCount; diff --git a/osdep/Phy.hpp b/osdep/Phy.hpp index 2e276a2a..5e659767 100644 --- a/osdep/Phy.hpp +++ b/osdep/Phy.hpp @@ -27,8 +27,6 @@ #ifndef ZT_PHY_HPP #define ZT_PHY_HPP -#include "../osdep/OSUtils.hpp" - #include #include #include @@ -88,22 +86,6 @@ namespace ZeroTier { */ typedef void PhySocket; -struct link_test_record -{ - link_test_record(PhySocket *_s, uint64_t _id, uint64_t _egress_time, uint32_t _length) : - s(_s), - id(_id), - egress_time(_egress_time), - length(_length) - { - // - } - PhySocket *s; - uint64_t id; - uint64_t egress_time; - uint32_t length; -}; - /** * Simple templated non-blocking sockets implementation * @@ -170,19 +152,13 @@ private: ZT_PHY_SOCKET_UNIX_LISTEN = 0x08 }; - struct PhySocketImpl - { - PhySocketImpl() : - throughput(0) - { - memset(ifname, 0, sizeof(ifname)); - } + struct PhySocketImpl { + PhySocketImpl() { memset(ifname, 0, sizeof(ifname)); } PhySocketType type; ZT_PHY_SOCKFD_TYPE sock; void *uptr; // user-settable pointer ZT_PHY_SOCKADDR_STORAGE_TYPE saddr; // remote for TCP_OUT and TCP_IN, local for TCP_LISTEN, RAW, and UDP char ifname[16]; - uint64_t throughput; }; std::list _socks; @@ -198,7 +174,6 @@ private: bool _noDelay; bool _noCheck; - std::vector link_test_records; public: /** @@ -282,7 +257,9 @@ public: */ static inline void getIfName(PhySocket *s, char *nameBuf, int buflen) { - memcpy(nameBuf, reinterpret_cast(s)->ifname, buflen); + if (s) { + memcpy(nameBuf, reinterpret_cast(s)->ifname, buflen); + } } /** @@ -292,18 +269,9 @@ public: */ static inline void setIfName(PhySocket *s, char *ifname, int len) { - memcpy(&(reinterpret_cast(s)->ifname), ifname, len); - } - - /** - * Get result of most recent throughput test - * - * @param s Socket object - */ - inline uint64_t getThroughput(PhySocket *s) - { - PhySocketImpl *sws = (reinterpret_cast(s)); - return sws ? sws->throughput : 0; + if (s) { + memcpy(&(reinterpret_cast(s)->ifname), ifname, len); + } } /** @@ -339,105 +307,9 @@ public: */ inline bool isValidState(PhySocket *s) { - PhySocketImpl *sws = (reinterpret_cast(s)); - return sws->type >= ZT_PHY_SOCKET_CLOSED && sws->type <= ZT_PHY_SOCKET_UNIX_LISTEN; - } - - /** - * Send a datagram of a known size to a selected peer and record egress time. The peer - * shall eventually respond by echoing back a smaller datagram. - * - * @param s Socket object - * @param remoteAddress Address of remote peer to receive link test packet - * @param data Buffer containing random packet data - * @param len Length of packet data buffer - * @return Number of bytes successfully written to socket - */ - inline int test_link_speed(PhySocket *s, const struct sockaddr *to, void *data, uint32_t len) { - if (!reinterpret_cast(s)) { - return 0; - } - uint64_t *buf = (uint64_t*)data; - uint64_t id = buf[0]; - if (to->sa_family != AF_INET && to->sa_family != AF_INET6) { - return 0; - } - uint64_t egress_time = OSUtils::now(); - PhySocketImpl *sws = (reinterpret_cast(s)); -#if defined(_WIN32) || defined(_WIN64) - int w = ::sendto(sws->sock,reinterpret_cast(data),len,0,to,(to->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)) -#else - int w = ::sendto(sws->sock,data,len,0,to,(to->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); -#endif - if (w > 0) { - link_test_records.push_back(new link_test_record(s, id, egress_time, len)); - } - return w; - } - - /** - * Remove link speed test records which have timed-out and record a 0 bits/s measurement - */ - inline void refresh_link_speed_records() - { - for(size_t i=0;iegress_time > ZT_LINK_TEST_TIMEOUT) { - PhySocketImpl *sws = (reinterpret_cast(link_test_records[i]->s)); - if (sws) { - sws->throughput = 0; - } - link_test_records.erase(link_test_records.begin() + i); - } - } - } - - /** - * Upon receipt of a link speed test datagram we echo back only the identification portion - * - * @param s Socket object - * @param from Address of remote peer that sent this datagram - * @param data Buffer containing datagram's contents - * @param len Length of datagram - * @return Number of bytes successfully written to socket in response - */ - inline int respond_to_link_test(PhySocket *s,const struct sockaddr *from,void *data,unsigned long len) { - PhySocketImpl *sws = (reinterpret_cast(s)); - uint64_t *id = (uint64_t*)data; -#if defined(_WIN32) || defined(_WIN64) - int w = ::sendto(sws->sock,reinterpret_cast(id),sizeof(id[0]),0,from,(from->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); -#else - int w = ::sendto(sws->sock,id,sizeof(id[0]),0,from,(from->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); -#endif - return w; - } - - /** - * Upon receipt of a response to our original link test datagram, correlate this new datagram with the record - * of the one we sent. Compute the transit time and update the throughput field of the relevant socket. This - * value will later be read by the path quality estimation logic located in Path.hpp. - * - * @param s Socket object - * @param from Address of remote peer that sent this datagram - * @param data Buffer containing datagram contents (ID of original link test datagram) - * @param len Length of datagram - * @return true if datagram correponded to previous record, false if otherwise - */ - inline bool handle_link_test_response(PhySocket *s,const struct sockaddr *from,void *data,unsigned long len) { - uint64_t *id = (uint64_t*)data; - for(size_t i=0;iid == id[0]) { - float rtt = (OSUtils::now()-link_test_records[i]->egress_time) / (float)1000; // s - uint32_t sz = (link_test_records[i]->length) * 8; // bits - float transit_time = rtt / (float)2.0; - uint64_t raw = (uint64_t)(sz / transit_time); - PhySocketImpl *sws = (reinterpret_cast(s)); - if (sws) { - sws->throughput = raw; - } - delete link_test_records[i]; - link_test_records.erase(link_test_records.begin() + i); - return true; - } + if (s) { + PhySocketImpl *sws = (reinterpret_cast(s)); + return sws->type >= ZT_PHY_SOCKET_CLOSED && sws->type <= ZT_PHY_SOCKET_UNIX_LISTEN; } return false; } diff --git a/service/OneService.cpp b/service/OneService.cpp index 051629bc..cf2a6eda 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -37,7 +37,6 @@ #include "../version.h" #include "../include/ZeroTierOne.h" -#include "../include/ZeroTierDebug.h" #include "../node/Constants.hpp" #include "../node/Mutex.hpp" @@ -458,9 +457,6 @@ public: // Last potential sleep/wake event uint64_t _lastRestart; - // Last time link throughput was tested - uint64_t _lastLinkSpeedTest; - // Deadline for the next background task service function volatile int64_t _nextBackgroundTaskDeadline; @@ -881,26 +877,6 @@ public: lastMultipathModeUpdate = now; _node->setMultipathMode(_multipathMode); } - // Test link speeds - // TODO: This logic should eventually find its way into the core or as part of a passive - // measure within the protocol. - if (_multipathMode && ((now - _lastLinkSpeedTest) >= ZT_LINK_SPEED_TEST_INTERVAL)) { - _phy.refresh_link_speed_records(); - _lastLinkSpeedTest = now; - // Generate random data to fill UDP packet - uint64_t pktBuf[ZT_LINK_TEST_DATAGRAM_SZ / sizeof(uint64_t)]; - Utils::getSecureRandom(pktBuf, ZT_LINK_TEST_DATAGRAM_SZ); - ZT_PeerList *pl = _node->peers(); - std::vector sockets = _binder.getBoundSockets(); - for (int i=0; ipeerCount;++j) { - for (int k=0; k<(ZT_MAX_PEER_NETWORK_PATHS/4); k++) { - Utils::getSecureRandom(pktBuf, 8); // generate one random integer for unique id - _phy.test_link_speed(sockets[i], (struct sockaddr*)&(pl->peers[j].paths[k].address), pktBuf, ZT_LINK_TEST_DATAGRAM_SZ); - } - } - } - } // Run background task processor in core if it's time to do so int64_t dl = _nextBackgroundTaskDeadline; @@ -1799,15 +1775,6 @@ public: inline void phyOnDatagram(PhySocket *sock,void **uptr,const struct sockaddr *localAddr,const struct sockaddr *from,void *data,unsigned long len) { - if (_multipathMode) { - // Handle link test packets (should eventually be moved into the protocol itself) - if (len == ZT_LINK_TEST_DATAGRAM_SZ) { - _phy.respond_to_link_test(sock, from, data, len); - } - if (len == ZT_LINK_TEST_DATAGRAM_RESPONSE_SZ) { - _phy.handle_link_test_response(sock, from, data, len); - } - } if ((len >= 16)&&(reinterpret_cast(from)->ipScope() == InetAddress::IP_SCOPE_GLOBAL)) _lastDirectReceiveFromGlobal = OSUtils::now(); const ZT_ResultCode rc = _node->processWirePacket( -- cgit v1.2.3 From b22405b64b0b26fa1d5d8932cf212e4ae7760632 Mon Sep 17 00:00:00 2001 From: Grant Limberg Date: Mon, 4 Jun 2018 12:24:12 -0700 Subject: rtnetlink integrated. no more callouts to iproute2 --- make-linux.mk | 2 +- osdep/LinuxEthernetTap.cpp | 104 +++----- osdep/LinuxNetLink.cpp | 640 ++++++++++++++++++++++++++++++++++----------- osdep/LinuxNetLink.hpp | 16 +- osdep/ManagedRoute.cpp | 24 +- osdep/ManagedRoute.hpp | 10 +- service/OneService.cpp | 13 +- 7 files changed, 566 insertions(+), 243 deletions(-) (limited to 'make-linux.mk') diff --git a/make-linux.mk b/make-linux.mk index 69dc5619..1776fa35 100644 --- a/make-linux.mk +++ b/make-linux.mk @@ -19,7 +19,7 @@ include objects.mk ONE_OBJS+=osdep/LinuxEthernetTap.o ONE_OBJS+=osdep/LinuxNetLink.o -NLTEST_OBJS+=osdep/LinuxNetLink.o +NLTEST_OBJS+=osdep/LinuxNetLink.o node/InetAddress.o node/Utils.o node/Salsa20.o NLTEST_OBJS+=nltest.o # Auto-detect miniupnpc and nat-pmp as well and use system libs if present, diff --git a/osdep/LinuxEthernetTap.cpp b/osdep/LinuxEthernetTap.cpp index 06bbbada..5dc21391 100644 --- a/osdep/LinuxEthernetTap.cpp +++ b/osdep/LinuxEthernetTap.cpp @@ -56,6 +56,7 @@ #include "../node/Dictionary.hpp" #include "OSUtils.hpp" #include "LinuxEthernetTap.hpp" +#include "LinuxNetLink.hpp" // ff:ff:ff:ff:ff:ff with no ADI static const ZeroTier::MulticastGroup _blindWildcardMulticastGroup(ZeroTier::MAC(0xff),0); @@ -97,6 +98,9 @@ LinuxEthernetTap::LinuxEthernetTap( char procpath[128],nwids[32]; struct stat sbuf; + // ensure netlink connection is started + (void)LinuxNetLink::getInstance(); + OSUtils::ztsnprintf(nwids,sizeof(nwids),"%.16llx",nwid); Mutex::Lock _l(__tapCreateLock); // create only one tap at a time, globally @@ -263,18 +267,8 @@ bool LinuxEthernetTap::enabled() const static bool ___removeIp(const std::string &_dev,const InetAddress &ip) { - long cpid = (long)vfork(); - if (cpid == 0) { - OSUtils::redirectUnixOutputs("/dev/null",(const char *)0); - setenv("PATH", "/sbin:/bin:/usr/sbin:/usr/bin", 1); - char iptmp[128]; - ::execlp("ip","ip","addr","del",ip.toString(iptmp),"dev",_dev.c_str(),(const char *)0); - ::_exit(-1); - } else { - int exitcode = -1; - ::waitpid(cpid,&exitcode,0); - return (exitcode == 0); - } + LinuxNetLink::getInstance().removeAddress(ip, _dev.c_str()); + return true; } #ifdef __SYNOLOGY__ @@ -285,49 +279,32 @@ bool LinuxEthernetTap::addIpSyn(std::vector ips) std::string cfg_contents = "DEVICE="+_dev+"\nBOOTPROTO=static"; int ip4=0,ip6=0,ip4_tot=0,ip6_tot=0; - long cpid = (long)vfork(); - if (cpid == 0) { - OSUtils::redirectUnixOutputs("/dev/null",(const char *)0); - setenv("PATH", "/sbin:/bin:/usr/sbin:/usr/bin", 1); - // We must know if there is at least (one) of each protocol version so we - // can properly enumerate address/netmask combinations in the ifcfg-dev file - for(int i=0; i<(int)ips.size(); i++) { - if (ips[i].isV4()) - ip4_tot++; - else - ip6_tot++; - } - // Assemble and write contents of ifcfg-dev file - for(int i=0; i<(int)ips.size(); i++) { - if (ips[i].isV4()) { - char iptmp[64],iptmp2[64]; - std::string numstr4 = ip4_tot > 1 ? std::to_string(ip4) : ""; - cfg_contents += "\nIPADDR"+numstr4+"="+ips[i].toIpString(iptmp) - + "\nNETMASK"+numstr4+"="+ips[i].netmask().toIpString(iptmp2)+"\n"; - ip4++; - } - else { - char iptmp[64],iptmp2[64]; - std::string numstr6 = ip6_tot > 1 ? std::to_string(ip6) : ""; - cfg_contents += "\nIPV6ADDR"+numstr6+"="+ips[i].toIpString(iptmp) - + "\nNETMASK"+numstr6+"="+ips[i].netmask().toIpString(iptmp2)+"\n"; - ip6++; - } - } - OSUtils::writeFile(filepath.c_str(), cfg_contents.c_str(), cfg_contents.length()); - // Finaly, add IPs - for(int i=0; i<(int)ips.size(); i++){ - char iptmp[128],iptmp2[128]; - if (ips[i].isV4()) - ::execlp("ip","ip","addr","add",ips[i].toString(iptmp),"broadcast",ips[i].broadcast().toIpString(iptmp2),"dev",_dev.c_str(),(const char *)0); - else - ::execlp("ip","ip","addr","add",ips[i].toString(iptmp),"dev",_dev.c_str(),(const char *)0); + for(int i=0; i<(int)ips.size(); i++) { + if (ips[i].isV4()) + ip4_tot++; + else + ip6_tot++; + } + // Assemble and write contents of ifcfg-dev file + for(int i=0; i<(int)ips.size(); i++) { + if (ips[i].isV4()) { + char iptmp[64],iptmp2[64]; + std::string numstr4 = ip4_tot > 1 ? std::to_string(ip4) : ""; + cfg_contents += "\nIPADDR"+numstr4+"="+ips[i].toIpString(iptmp) + + "\nNETMASK"+numstr4+"="+ips[i].netmask().toIpString(iptmp2)+"\n"; + ip4++; + } else { + char iptmp[64],iptmp2[64]; + std::string numstr6 = ip6_tot > 1 ? std::to_string(ip6) : ""; + cfg_contents += "\nIPV6ADDR"+numstr6+"="+ips[i].toIpString(iptmp) + + "\nNETMASK"+numstr6+"="+ips[i].netmask().toIpString(iptmp2)+"\n"; + ip6++; } - ::_exit(-1); - } else if (cpid > 0) { - int exitcode = -1; - ::waitpid(cpid,&exitcode,0); - return (exitcode == 0); + } + OSUtils::writeFile(filepath.c_str(), cfg_contents.c_str(), cfg_contents.length()); + // Finaly, add IPs + for(int i=0; i<(int)ips.size(); i++){ + LinuxNetLink::getInstance().addAddress(ips[i], _dev.c_str()); } return true; } @@ -348,24 +325,9 @@ bool LinuxEthernetTap::addIp(const InetAddress &ip) ___removeIp(_dev,*i); } - long cpid = (long)vfork(); - if (cpid == 0) { - OSUtils::redirectUnixOutputs("/dev/null",(const char *)0); - setenv("PATH", "/sbin:/bin:/usr/sbin:/usr/bin", 1); - char iptmp[128],iptmp2[128]; - if (ip.isV4()) { - ::execlp("ip","ip","addr","add",ip.toString(iptmp),"broadcast",ip.broadcast().toIpString(iptmp2),"dev",_dev.c_str(),(const char *)0); - } else { - ::execlp("ip","ip","addr","add",ip.toString(iptmp),"dev",_dev.c_str(),(const char *)0); - } - ::_exit(-1); - } else if (cpid > 0) { - int exitcode = -1; - ::waitpid(cpid,&exitcode,0); - return (exitcode == 0); - } + LinuxNetLink::getInstance().addAddress(ip, _dev.c_str()); - return false; + return true; } bool LinuxEthernetTap::removeIp(const InetAddress &ip) diff --git a/osdep/LinuxNetLink.cpp b/osdep/LinuxNetLink.cpp index 79483b96..7ad687fb 100644 --- a/osdep/LinuxNetLink.cpp +++ b/osdep/LinuxNetLink.cpp @@ -54,39 +54,36 @@ LinuxNetLink::LinuxNetLink() : _t() , _running(false) , _routes_ipv4() + , _rv4_m() , _routes_ipv6() + , _rv6_m() , _seq(0) + , _interfaces() + , _if_m() , _fd(socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) , _la({0}) { // set socket timeout to 1 sec so we're not permablocking recv() calls - struct timeval tv; - tv.tv_sec = 1; - tv.tv_usec = 0; - if(setsockopt(_fd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof(tv)) != 0) { - fprintf(stderr, "setsockopt failed: %s\n", strerror(errno)); - } + _setSocketTimeout(_fd, 1); _la.nl_family = AF_NETLINK; - _la.nl_pid = getpid(); + _la.nl_pid = getpid()+1; _la.nl_groups = RTMGRP_LINK|RTMGRP_IPV4_IFADDR|RTMGRP_IPV6_IFADDR|RTMGRP_IPV4_ROUTE|RTMGRP_IPV6_ROUTE|RTMGRP_NOTIFY; if (bind(_fd, (struct sockaddr*)&_la, sizeof(_la))) { fprintf(stderr, "Error connecting to RTNETLINK: %s\n", strerror(errno)); ::exit(1); } - _running = true; - _t = Thread::start(this); - fprintf(stderr, "Requesting IPV4 Routes\n"); _requestIPv4Routes(); - Thread::sleep(10); fprintf(stderr, "Requesting IPV6 Routes\n"); _requestIPv6Routes(); - Thread::sleep(10); fprintf(stderr, "Requesting Interface List\n"); _requestInterfaceList(); + + _running = true; + _t = Thread::start(this); } LinuxNetLink::~LinuxNetLink() @@ -97,7 +94,17 @@ LinuxNetLink::~LinuxNetLink() ::close(_fd); } -void LinuxNetLink::threadMain() throw() +void LinuxNetLink::_setSocketTimeout(int fd, int seconds) +{ + struct timeval tv; + tv.tv_sec = seconds; + tv.tv_usec = 0; + if(setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof(tv)) != 0) { + fprintf(stderr, "setsockopt failed: %s\n", strerror(errno)); + } +} + +int LinuxNetLink::_doRecv(int fd) { char buf[8192]; char *p = NULL; @@ -106,26 +113,24 @@ void LinuxNetLink::threadMain() throw() int rtn = 0; p = buf; - while(_running) { - rtn = recv(_fd, p, sizeof(buf) - nll, 0); + while(true) { + rtn = recv(fd, p, sizeof(buf) - nll, 0); if (rtn > 0) { nlp = (struct nlmsghdr *)p; if(nlp->nlmsg_type == NLMSG_ERROR && (nlp->nlmsg_flags & NLM_F_ACK) != NLM_F_ACK) { - fprintf(stderr, "NLMSG_ERROR\n"); struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(nlp); if (err->error != 0) { fprintf(stderr, "rtnetlink error: %s\n", strerror(-(err->error))); } p = buf; nll = 0; - continue; + break; } if (nlp->nlmsg_type == NLMSG_NOOP) { - fprintf(stderr, "noop\n"); - continue; + break; } if( (nlp->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI || (nlp->nlmsg_type == NLMSG_DONE)) @@ -134,7 +139,7 @@ void LinuxNetLink::threadMain() throw() _processMessage(nlp, nll); p = buf; nll = 0; - continue; + break; } p += rtn; nll += rtn; @@ -144,7 +149,7 @@ void LinuxNetLink::threadMain() throw() fprintf(stderr, "NLMSG_OVERRUN: Data lost\n"); p = buf; nll = 0; - continue; + break; } nll += rtn; @@ -152,8 +157,21 @@ void LinuxNetLink::threadMain() throw() _processMessage(nlp, nll); p = buf; nll = 0; + break; + } else { + break; } - else { + } + return rtn; +} + +void LinuxNetLink::threadMain() throw() +{ + int rtn = 0; + + while(_running) { + rtn = _doRecv(_fd); + if (rtn <= 0) { Thread::sleep(100); continue; } @@ -258,6 +276,7 @@ void LinuxNetLink::_routeAdded(struct nlmsghdr *nlp) { char dsts[40] = {0}; char gws[40] = {0}; + char srcs[40] = {0}; char ifs[16] = {0}; char ms[24] = {0}; @@ -272,6 +291,9 @@ void LinuxNetLink::_routeAdded(struct nlmsghdr *nlp) case RTA_DST: inet_ntop(rtp->rtm_family, RTA_DATA(rtap), dsts, rtp->rtm_family == AF_INET ? 24 : 40); break; + case RTA_SRC: + inet_ntop(rtp->rtm_family, RTA_DATA(rtap), srcs, rtp->rtm_family == AF_INET ? 24: 40); + break; case RTA_GATEWAY: inet_ntop(rtp->rtm_family, RTA_DATA(rtap), gws, rtp->rtm_family == AF_INET ? 24 : 40); break; @@ -282,13 +304,14 @@ void LinuxNetLink::_routeAdded(struct nlmsghdr *nlp) } sprintf(ms, "%d", rtp->rtm_dst_len); - fprintf(stderr, "Route Added: dst %s/%s gw %s if %s\n", dsts, ms, gws, ifs); + fprintf(stderr, "Route Added: dst %s/%s gw %s src %s if %s\n", dsts, ms, gws, srcs, ifs); } void LinuxNetLink::_routeDeleted(struct nlmsghdr *nlp) { char dsts[40] = {0}; char gws[40] = {0}; + char srcs[40] = {0}; char ifs[16] = {0}; char ms[24] = {0}; @@ -303,6 +326,9 @@ void LinuxNetLink::_routeDeleted(struct nlmsghdr *nlp) case RTA_DST: inet_ntop(rtp->rtm_family, RTA_DATA(rtap), dsts, rtp->rtm_family == AF_INET ? 24 : 40); break; + case RTA_SRC: + inet_ntop(rtp->rtm_family, RTA_DATA(rtap), srcs, rtp->rtm_family == AF_INET ? 24 : 40); + break; case RTA_GATEWAY: inet_ntop(rtp->rtm_family, RTA_DATA(rtap), gws, rtp->rtm_family == AF_INET ? 24 : 40); break; @@ -313,7 +339,7 @@ void LinuxNetLink::_routeDeleted(struct nlmsghdr *nlp) } sprintf(ms, "%d", rtp->rtm_dst_len); - fprintf(stderr, "Route Deleted: dst %s/%s gw %s if %s\n", dsts, ms, gws, ifs); + fprintf(stderr, "Route Deleted: dst %s/%s gw %s src %s if %s\n", dsts, ms, gws, srcs, ifs); } void LinuxNetLink::_linkAdded(struct nlmsghdr *nlp) @@ -348,12 +374,15 @@ void LinuxNetLink::_linkAdded(struct nlmsghdr *nlp) } } - struct iface_entry &entry = _interfaces[ifip->ifi_index]; - entry.index = ifip->ifi_index; - memcpy(entry.ifacename, ifname, sizeof(ifname)); - memcpy(entry.mac, mac, sizeof(mac)); - memcpy(entry.mac_bin, mac_bin, 6); - entry.mtu = mtu; + { + Mutex::Lock l(_if_m); + struct iface_entry &entry = _interfaces[ifip->ifi_index]; + entry.index = ifip->ifi_index; + memcpy(entry.ifacename, ifname, sizeof(ifname)); + memcpy(entry.mac, mac, sizeof(mac)); + memcpy(entry.mac_bin, mac_bin, 6); + entry.mtu = mtu; + } fprintf(stderr, "Link Added: %s mac: %s, mtu: %d\n", ifname, mac, mtu); } @@ -389,13 +418,33 @@ void LinuxNetLink::_linkDeleted(struct nlmsghdr *nlp) } fprintf(stderr, "Link Deleted: %s mac: %s, mtu: %d\n", ifname, mac, mtu); - if(_interfaces.contains(ifip->ifi_index)) { - _interfaces.erase(ifip->ifi_index); + { + Mutex::Lock l(_if_m); + if(_interfaces.contains(ifip->ifi_index)) { + _interfaces.erase(ifip->ifi_index); + } } } void LinuxNetLink::_requestIPv4Routes() { + int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd == -1) { + fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); + return; + } + + _setSocketTimeout(fd); + + struct sockaddr_nl la; + la.nl_family = AF_NETLINK; + la.nl_pid = getpid(); + la.nl_groups = RTMGRP_IPV4_ROUTE; + if(bind(fd, (struct sockaddr*)&la, sizeof(la))) { + fprintf(stderr, "Error binding RTNETLINK: %s\n", strerror(errno)); + return; + } + struct nl_route_req req; bzero(&req, sizeof(req)); req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); @@ -422,11 +471,32 @@ void LinuxNetLink::_requestIPv4Routes() msg.msg_iov = &iov; msg.msg_iovlen = 1; - sendmsg(_fd, &msg, 0); + sendmsg(fd, &msg, 0); + + _doRecv(fd); + + close(fd); } void LinuxNetLink::_requestIPv6Routes() { + int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd == -1) { + fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); + return; + } + + _setSocketTimeout(fd); + + struct sockaddr_nl la; + la.nl_family = AF_NETLINK; + la.nl_pid = getpid(); + la.nl_groups = RTMGRP_IPV6_ROUTE; + if(bind(fd, (struct sockaddr*)&la, sizeof(struct sockaddr_nl))) { + fprintf(stderr, "Error binding RTNETLINK: %s\n", strerror(errno)); + return; + } + struct nl_route_req req; bzero(&req, sizeof(req)); req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); @@ -453,11 +523,32 @@ void LinuxNetLink::_requestIPv6Routes() msg.msg_iov = &iov; msg.msg_iovlen = 1; - sendmsg(_fd, &msg, 0); + sendmsg(fd, &msg, 0); + + _doRecv(fd); + + close(fd); } void LinuxNetLink::_requestInterfaceList() { + int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd == -1) { + fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); + return; + } + + _setSocketTimeout(fd); + + struct sockaddr_nl la; + la.nl_family = AF_NETLINK; + la.nl_pid = getpid(); + la.nl_groups = RTMGRP_LINK; + if(bind(fd, (struct sockaddr*)&la, sizeof(struct sockaddr_nl))) { + fprintf(stderr, "Error binding RTNETLINK: %s\n", strerror(errno)); + return; + } + struct nl_if_req req; bzero(&req, sizeof(req)); req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); @@ -482,11 +573,43 @@ void LinuxNetLink::_requestInterfaceList() iov.iov_len = req.nl.nlmsg_len; msg.msg_iov = &iov; msg.msg_iovlen = 1; - sendmsg(_fd, &msg, 0); + sendmsg(fd, &msg, 0); + + _doRecv(fd); + + close(fd); } -void LinuxNetLink::addRoute(const InetAddress &target, const InetAddress &via, const char *ifaceName) +void LinuxNetLink::addRoute(const InetAddress &target, const InetAddress &via, const InetAddress &src, const char *ifaceName) { + int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd == -1) { + fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); + return; + } + + _setSocketTimeout(fd); + + struct sockaddr_nl la; + bzero(&la, sizeof(la)); + la.nl_family = AF_NETLINK; + la.nl_pid = getpid(); + + if(bind(fd, (struct sockaddr*)&la, sizeof(struct sockaddr_nl))) { + fprintf(stderr, "Error binding RTNETLINK: %s\n", strerror(errno)); + return; + } + + char tmp[64]; + char tmp2[64]; + char tmp3[64]; + fprintf(stderr, "Adding Route. target: %s via: %s src: %s iface: %s\n", target.toString(tmp), via.toString(tmp2), src.toString(tmp3), ifaceName); + + if(!target) { + fprintf(stderr, "Uhhhh adding an empty route?!?!?"); + return; + } + int rtl = sizeof(struct rtmsg); struct nl_route_req req; bzero(&req, sizeof(req)); @@ -494,49 +617,54 @@ void LinuxNetLink::addRoute(const InetAddress &target, const InetAddress &via, c struct rtattr *rtap = (struct rtattr *)req.buf; rtap->rta_type = RTA_DST; if (target.isV4()) { - rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in_addr); - memcpy((void*)((char*)rtap+sizeof(struct rtattr)), &((struct sockaddr_in*)&target)->sin_addr, sizeof(struct in_addr)); + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in*)&target)->sin_addr, sizeof(struct in_addr)); } else { - rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in6_addr); - memcpy((void*)((char*)rtap+sizeof(struct rtattr)), &((struct sockaddr_in6*)&target)->sin6_addr, sizeof(struct in6_addr)); + rtap->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in6*)&target)->sin6_addr, sizeof(struct in6_addr)); } rtl += rtap->rta_len; - int interface_index = -1; - if (ifaceName != NULL) { - Hashtable::Iterator iter(_interfaces); - int *k = NULL; - iface_entry *v = NULL; - while(iter.next(k, v)) { - if(strcmp(ifaceName, v->ifacename) == 0) { - interface_index = v->index; - break; - } - } - if (interface_index != -1) { - rtap = (struct rtattr *) (((char*)rtap) + rtap->rta_len); - rtap->rta_type = RTA_OIF; - rtap->rta_len = sizeof(struct rtattr)+sizeof(int); - memcpy(((char*)rtap)+sizeof(rtattr), &interface_index, sizeof(int)); - rtl += rtap->rta_len; - } - } - if(via) { rtap = (struct rtattr *)(((char*)rtap)+rtap->rta_len); rtap->rta_type = RTA_GATEWAY; if(via.isV4()) { - rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in_addr); - memcpy((char*)rtap+sizeof(struct rtattr), &((struct sockaddr_in*)&via)->sin_addr, sizeof(struct in_addr)); + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in*)&via)->sin_addr, sizeof(struct in_addr)); } else { - rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in6_addr); - memcpy((char*)rtap+sizeof(struct rtattr), &((struct sockaddr_in6*)&via)->sin6_addr, sizeof(struct in6_addr)); + rtap->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in6*)&via)->sin6_addr, sizeof(struct in6_addr)); } rtl += rtap->rta_len; + } else if (src) { + rtap = (struct rtattr *)(((char*)rtap)+rtap->rta_len); + rtap->rta_type = RTA_SRC; + if(src.isV4()) { + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in*)&src)->sin_addr, sizeof(struct in_addr)); + + } else { + rtap->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in6*)&src)->sin6_addr, sizeof(struct in6_addr)); + } + req.rt.rtm_src_len = src.netmaskBits(); } + if (ifaceName != NULL) { + int interface_index = _indexForInterface(ifaceName); + if (interface_index != -1) { + rtap = (struct rtattr *) (((char*)rtap) + rtap->rta_len); + rtap->rta_type = RTA_OIF; + rtap->rta_len = RTA_LENGTH(sizeof(int)); + memcpy(RTA_DATA(rtap), &interface_index, sizeof(int)); + rtl += rtap->rta_len; + } + } + + + req.nl.nlmsg_len = NLMSG_LENGTH(rtl); - req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE | NLM_F_ACK; req.nl.nlmsg_type = RTM_NEWROUTE; req.nl.nlmsg_pid = 0; req.nl.nlmsg_seq = ++_seq; @@ -546,6 +674,7 @@ void LinuxNetLink::addRoute(const InetAddress &target, const InetAddress &via, c req.rt.rtm_scope = RT_SCOPE_UNIVERSE; req.rt.rtm_type = RTN_UNICAST; req.rt.rtm_dst_len = target.netmaskBits(); + req.rt.rtm_flags = 0; struct sockaddr_nl pa; bzero(&pa, sizeof(pa)); @@ -562,11 +691,41 @@ void LinuxNetLink::addRoute(const InetAddress &target, const InetAddress &via, c iov.iov_len = req.nl.nlmsg_len; msg.msg_iov = &iov; msg.msg_iovlen = 1; - sendmsg(_fd, &msg, 0); + sendmsg(fd, &msg, 0); + + _doRecv(fd); + + close(fd); } -void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, const char *ifaceName) +void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, const InetAddress &src, const char *ifaceName) { + int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd == -1) { + fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); + return; + } + + _setSocketTimeout(fd); + + struct sockaddr_nl la; + la.nl_family = AF_NETLINK; + la.nl_pid = getpid(); + + if(bind(fd, (struct sockaddr*)&la, sizeof(struct sockaddr_nl))) { + fprintf(stderr, "Error binding RTNETLINK: %s\n", strerror(errno)); + return; + } + char tmp[64]; + char tmp2[64]; + char tmp3[64]; + fprintf(stderr, "Removing Route. target: %s via: %s src: %s iface: %s\n", target.toString(tmp), via.toString(tmp2), src.toString(tmp3), ifaceName); + + if(!target) { + fprintf(stderr, "Uhhhh deleting an empty route?!?!?"); + return; + } + int rtl = sizeof(struct rtmsg); struct nl_route_req req; bzero(&req, sizeof(req)); @@ -574,47 +733,52 @@ void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, c struct rtattr *rtap = (struct rtattr *)req.buf; rtap->rta_type = RTA_DST; if (target.isV4()) { - rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in_addr); - memcpy((void*)((char*)rtap+sizeof(struct rtattr)), &((struct sockaddr_in*)&target)->sin_addr, sizeof(struct in_addr)); + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in*)&target)->sin_addr, sizeof(struct in_addr)); } else { - rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in6_addr); - memcpy((void*)((char*)rtap+sizeof(struct rtattr)), &((struct sockaddr_in6*)&target)->sin6_addr, sizeof(struct in6_addr)); + rtap->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in6*)&target)->sin6_addr, sizeof(struct in6_addr)); } rtl += rtap->rta_len; - int interface_index = -1; - if (ifaceName != NULL) { - Hashtable::Iterator iter(_interfaces); - int *k = NULL; - iface_entry *v = NULL; - while(iter.next(k, v)) { - if(strcmp(ifaceName, v->ifacename) == 0) { - interface_index = v->index; - break; - } - } - if (interface_index != -1) { - rtap = (struct rtattr *) (((char*)rtap) + rtap->rta_len); - rtap->rta_type = RTA_OIF; - rtap->rta_len = sizeof(struct rtattr)+sizeof(int); - memcpy(((char*)rtap)+sizeof(rtattr), &interface_index, sizeof(int)); - rtl += rtap->rta_len; - } - } - if(via) { rtap = (struct rtattr *)(((char*)rtap)+rtap->rta_len); rtap->rta_type = RTA_GATEWAY; if(via.isV4()) { - rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in_addr); - memcpy((char*)rtap+sizeof(struct rtattr), &((struct sockaddr_in*)&via)->sin_addr, sizeof(struct in_addr)); + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in*)&via)->sin_addr, sizeof(struct in_addr)); } else { - rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in6_addr); - memcpy((char*)rtap+sizeof(struct rtattr), &((struct sockaddr_in6*)&via)->sin6_addr, sizeof(struct in6_addr)); + rtap->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in6*)&via)->sin6_addr, sizeof(struct in6_addr)); } rtl += rtap->rta_len; + } else if (src) { + rtap = (struct rtattr *)(((char*)rtap)+rtap->rta_len); + rtap->rta_type = RTA_SRC; + if(src.isV4()) { + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in*)&src)->sin_addr, sizeof(struct in_addr)); + + } else { + rtap->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in6*)&src)->sin6_addr, sizeof(struct in6_addr)); + } + req.rt.rtm_src_len = src.netmaskBits(); } + if (ifaceName != NULL) { + int interface_index = _indexForInterface(ifaceName); + if (interface_index != -1) { + rtap = (struct rtattr *) (((char*)rtap) + rtap->rta_len); + rtap->rta_type = RTA_OIF; + rtap->rta_len = RTA_LENGTH(sizeof(int)); + memcpy(RTA_DATA(rtap), &interface_index, sizeof(int)); + rtl += rtap->rta_len; + } + } + + + req.nl.nlmsg_len = NLMSG_LENGTH(rtl); req.nl.nlmsg_flags = NLM_F_REQUEST; req.nl.nlmsg_type = RTM_DELROUTE; @@ -626,6 +790,7 @@ void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, c req.rt.rtm_scope = RT_SCOPE_UNIVERSE; req.rt.rtm_type = RTN_UNICAST; req.rt.rtm_dst_len = target.netmaskBits(); + req.rt.rtm_flags = 0; struct sockaddr_nl pa; bzero(&pa, sizeof(pa)); @@ -642,73 +807,226 @@ void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, c iov.iov_len = req.nl.nlmsg_len; msg.msg_iov = &iov; msg.msg_iovlen = 1; - sendmsg(_fd, &msg, 0); -} + sendmsg(fd, &msg, 0); -// void LinuxNetLink::addInterface(const char *iface, unsigned int mtu, const MAC &mac) -// { -// int rtl = sizeof(struct ifinfomsg); -// struct nl_if_req req; -// bzero(&req, sizeof(nl_if_req)); - -// struct rtattr *rtap = (struct rtattr *)req.buf; -// rtap->rta_type = IFLA_IFNAME; -// rtap->rta_len = sizeof(struct rtattr)+strlen(iface)+1; -// rtl += rtap->rta_len; - -// rtap = (struct rtattr*)(((char*)rtap)+rtap->rta_len); -// rtap->rta_type = IFLA_MTU; -// rtap->rta_len = sizeof(struct rtattr)+sizeof(unsigned int); -// rtl += rtap->rta_len; - -// rtap = (struct rtattr*)(((char*)rtap)+rtap->rta_len); -// rtap->rta_type = IFLA_ADDRESS; -// rtap->rta_len = sizeof(struct rtattr)+6; -// mac.copyTo(((char*)rtap)+sizeof(struct rtattr), 6); -// rtl += rtap->rta_len; - -// IFLA_LINKINFO; -// req.nl.nlmsg_len = NLMSG_LENGTH(rtl); -// req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; -// req.nl.nlmsg_type = RTM_NEWLINK; -// req.nl.nlmsg_pid = 0; -// req.nl.nlmsg_seq = ++_seq; - -// req.ifa.ifi_family = AF_UNSPEC; -// req.ifa.ifi_type = 0; // TODO figure this one out -// req.ifa.ifi_index = 0; -// req.ifa.ifi_flags = IFF_UP; - -// struct sockaddr_nl pa; -// bzero(&pa, sizeof(pa)); -// pa.nl_family = AF_NETLINK; - -// struct msghdr msg; -// bzero(&msg, sizeof(msg)); -// msg.msg_name = (void*)&pa; -// msg.msg_namelen = sizeof(pa); - -// struct iovec iov; -// iov.iov_base = (void*)&req.nl; -// iov.iov_len = req.nl.nlmsg_len; -// msg.msg_iov = &iov; -// msg.msg_iovlen = 1; -// sendmsg(_fd, &msg, 0); -// } - -// void LinuxNetLink::removeInterface(const char *iface) -// { - -// } + _doRecv(fd); + + close(fd); +} void LinuxNetLink::addAddress(const InetAddress &addr, const char *iface) { + int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd == -1) { + fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); + return; + } + _setSocketTimeout(fd); + + struct sockaddr_nl la; + la.nl_family = AF_NETLINK; + la.nl_pid = getpid(); + if (addr.isV4()) { + la.nl_groups = RTMGRP_IPV4_IFADDR; + } else { + la.nl_groups = RTMGRP_IPV6_IFADDR; + } + if(bind(fd, (struct sockaddr*)&la, sizeof(struct sockaddr_nl))) { + fprintf(stderr, "Error binding RTNETLINK: %s\n", strerror(errno)); + return; + } + + char tmp[128]; + fprintf(stderr, "Adding IP address %s to interface %s", addr.toString(tmp), iface); + int interface_index = _indexForInterface(iface); + + if (interface_index == -1) { + fprintf(stderr, "Unable to find index for interface %s\n", iface); + return; + } + + int rtl = sizeof(struct ifaddrmsg); + struct nl_adr_req req; + bzero(&req, sizeof(struct nl_adr_req)); + + struct rtattr *rtap = (struct rtattr *)req.buf;; + if(addr.isV4()) { + struct sockaddr_in *addr_v4 = (struct sockaddr_in*)&addr; + rtap->rta_type = IFA_ADDRESS; + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &addr_v4->sin_addr, sizeof(struct in_addr)); + rtl += rtap->rta_len; + + rtap = (struct rtattr*)(((char*)rtap) + rtap->rta_len); + rtap->rta_type = IFA_LOCAL; + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &addr_v4->sin_addr, sizeof(struct in_addr)); + rtl += rtap->rta_len; + + InetAddress broadcast = addr.broadcast(); + if(broadcast) { + rtap = (struct rtattr*)(((char*)rtap)+rtap->rta_len); + struct sockaddr_in *bcast = (struct sockaddr_in*)&broadcast; + rtap->rta_type = IFA_BROADCAST; + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &bcast->sin_addr, sizeof(struct in_addr)); + rtl += rtap->rta_len; + } + } else { //V6 + rtap->rta_type = IFA_ADDRESS; + struct sockaddr_in6 *addr_v6 = (struct sockaddr_in6*)&addr; + rtap->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); + memcpy(RTA_DATA(rtap), &addr_v6->sin6_addr, sizeof(struct in6_addr)); + rtl += rtap->rta_len; + } + + if (iface) { + rtap = (struct rtattr*)(((char*)rtap)+rtap->rta_len); + rtap->rta_type = IFA_LABEL; + rtap->rta_len = RTA_LENGTH(strlen(iface)); + memcpy(RTA_DATA(rtap), iface, strlen(iface)); + rtl += rtap->rta_len; + } + + req.nl.nlmsg_len = NLMSG_LENGTH(rtl); + req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + req.nl.nlmsg_type = RTM_NEWADDR; + req.nl.nlmsg_pid = 0; + req.nl.nlmsg_seq = ++_seq; + req.ifa.ifa_family = addr.ss_family; + req.ifa.ifa_prefixlen = addr.port(); + req.ifa.ifa_flags = IFA_F_PERMANENT; + req.ifa.ifa_scope = 0; + req.ifa.ifa_index = interface_index; + + struct sockaddr_nl pa; + bzero(&pa, sizeof(sockaddr_nl)); + pa.nl_family = AF_NETLINK; + + struct msghdr msg; + bzero(&msg, sizeof(msg)); + msg.msg_name = (void*)&pa; + msg.msg_namelen = sizeof(pa); + + struct iovec iov; + iov.iov_base = (void*)&req.nl; + iov.iov_len = req.nl.nlmsg_len; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + sendmsg(fd, &msg, 0); + + _doRecv(fd); + + close(fd); } void LinuxNetLink::removeAddress(const InetAddress &addr, const char *iface) { + int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd == -1) { + fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); + return; + } + + _setSocketTimeout(fd); + + struct sockaddr_nl la; + la.nl_family = AF_NETLINK; + la.nl_pid = getpid(); + if (addr.isV4()) { + la.nl_groups = RTMGRP_IPV4_IFADDR; + } else { + la.nl_groups = RTMGRP_IPV6_IFADDR; + } + if(bind(fd, (struct sockaddr*)&la, sizeof(struct sockaddr_nl))) { + fprintf(stderr, "Error binding RTNETLINK: %s\n", strerror(errno)); + return; + } + + char tmp[128]; + fprintf(stderr, "Removing IP address %s from interface %s", addr.toString(tmp), iface); + + int interface_index = _indexForInterface(iface); + + if (interface_index == -1) { + fprintf(stderr, "Unable to find index for interface %s\n", iface); + return; + } + + int rtl = sizeof(struct ifaddrmsg); + struct nl_adr_req req; + bzero(&req, sizeof(struct nl_adr_req)); + + struct rtattr *rtap = (struct rtattr *)req.buf; + if(addr.isV4()) { + struct sockaddr_in *addr_v4 = (struct sockaddr_in*)&addr; + rtap->rta_type = IFA_ADDRESS; + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &addr_v4->sin_addr, sizeof(struct in_addr)); + rtl += rtap->rta_len; + + rtap = (struct rtattr*)(((char*)rtap) + rtap->rta_len); + rtap->rta_type = IFA_LOCAL; + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &addr_v4->sin_addr, sizeof(struct in_addr)); + rtl += rtap->rta_len; + + InetAddress broadcast = addr.broadcast(); + if(broadcast) { + rtap = (struct rtattr*)(((char*)rtap)+rtap->rta_len); + struct sockaddr_in *bcast = (struct sockaddr_in*)&broadcast; + rtap->rta_type = IFA_BROADCAST; + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &bcast->sin_addr, sizeof(struct in_addr)); + rtl += rtap->rta_len; + } + } else { //V6 + rtap->rta_type = IFA_ADDRESS; + struct sockaddr_in6 *addr_v6 = (struct sockaddr_in6*)&addr; + rtap->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); + memcpy(RTA_DATA(rtap), &addr_v6->sin6_addr, sizeof(struct in6_addr)); + rtl += rtap->rta_len; + } + + if (iface) { + rtap = (struct rtattr*)(((char*)rtap)+rtap->rta_len); + rtap->rta_type = IFA_LABEL; + rtap->rta_len = RTA_LENGTH(strlen(iface)); + memcpy(RTA_DATA(rtap), iface, strlen(iface)); + rtl += rtap->rta_len; + } + + req.nl.nlmsg_len = NLMSG_LENGTH(rtl); + req.nl.nlmsg_flags = NLM_F_REQUEST; + req.nl.nlmsg_type = RTM_DELADDR; + req.nl.nlmsg_pid = 0; + req.nl.nlmsg_seq = ++_seq; + req.ifa.ifa_family = addr.ss_family; + req.ifa.ifa_prefixlen = addr.port(); + req.ifa.ifa_flags = IFA_F_PERMANENT; + req.ifa.ifa_scope = 0; + req.ifa.ifa_index = interface_index; + + struct sockaddr_nl pa; + bzero(&pa, sizeof(sockaddr_nl)); + pa.nl_family = AF_NETLINK; + + struct msghdr msg; + bzero(&msg, sizeof(msg)); + msg.msg_name = (void*)&pa; + msg.msg_namelen = sizeof(pa); + struct iovec iov; + iov.iov_base = (void*)&req.nl; + iov.iov_len = req.nl.nlmsg_len; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + sendmsg(fd, &msg, 0); + + _doRecv(fd); + + close(fd); } RouteList LinuxNetLink::getIPV4Routes() const @@ -721,4 +1039,20 @@ RouteList LinuxNetLink::getIPV6Routes() const return _routes_ipv6; } -} // namespace ZeroTier \ No newline at end of file +int LinuxNetLink::_indexForInterface(const char *iface) +{ + Mutex::Lock l(_if_m); + int interface_index = -1; + Hashtable::Iterator iter(_interfaces); + int *k = NULL; + iface_entry *v = NULL; + while(iter.next(k,v)) { + if(strcmp(iface, v->ifacename) == 0) { + interface_index = v->index; + break; + } + } + return interface_index; +} + +} // namespace ZeroTier diff --git a/osdep/LinuxNetLink.hpp b/osdep/LinuxNetLink.hpp index ad457772..681aa46f 100644 --- a/osdep/LinuxNetLink.hpp +++ b/osdep/LinuxNetLink.hpp @@ -39,6 +39,7 @@ #include "../node/MAC.hpp" #include "Thread.hpp" #include "../node/Hashtable.hpp" +#include "../node/Mutex.hpp" namespace ZeroTier { @@ -70,19 +71,18 @@ public: LinuxNetLink(LinuxNetLink const&) = delete; void operator=(LinuxNetLink const&) = delete; - void addRoute(const InetAddress &target, const InetAddress &via, const char *ifaceName); - void delRoute(const InetAddress &target, const InetAddress &via, const char *ifaceName); + void addRoute(const InetAddress &target, const InetAddress &via, const InetAddress &src, const char *ifaceName); + void delRoute(const InetAddress &target, const InetAddress &via, const InetAddress &src, const char *ifaceName); RouteList getIPV4Routes() const; RouteList getIPV6Routes() const; - // void addInterface(const char *iface, unsigned int mtu, const MAC &mac); - // void removeInterface(const char *iface); - void addAddress(const InetAddress &addr, const char *iface); void removeAddress(const InetAddress &addr, const char *iface); void threadMain() throw(); private: + int _doRecv(int fd); + void _processMessage(struct nlmsghdr *nlp, int nll); void _routeAdded(struct nlmsghdr *nlp); void _routeDeleted(struct nlmsghdr *nlp); @@ -95,12 +95,17 @@ private: void _requestIPv4Routes(); void _requestIPv6Routes(); + int _indexForInterface(const char *iface); + + void _setSocketTimeout(int fd, int seconds = 1); Thread _t; bool _running; RouteList _routes_ipv4; + Mutex _rv4_m; RouteList _routes_ipv6; + Mutex _rv6_m; uint32_t _seq; @@ -112,6 +117,7 @@ private: unsigned int mtu; }; Hashtable _interfaces; + Mutex _if_m; // socket communication vars; int _fd; diff --git a/osdep/ManagedRoute.cpp b/osdep/ManagedRoute.cpp index fe7c6267..99277c9f 100644 --- a/osdep/ManagedRoute.cpp +++ b/osdep/ManagedRoute.cpp @@ -285,21 +285,23 @@ static void _routeCmd(const char *op,const InetAddress &target,const InetAddress #ifdef __LINUX__ // ---------------------------------------------------------- #define ZT_ROUTING_SUPPORT_FOUND 1 -static void _routeCmd(const char *op, const InetAddress &target, const InetAddress &via, const char *localInterface) +static void _routeCmd(const char *op, const InetAddress &target, const InetAddress &via, const InetAddress &src, const char *localInterface) { - if ((strcmp(op, "add") == 0 || strcmp(op, "replace") == 0)) { - LinuxNetLink::getInstance().addRoute(target, via, localInterface); - } else if ((strcmp(op, "remove") == 0 || strcmp(op, "del") == 0)) { - LinuxNetLink::getInstance().delRoute(target, via, localInterface); - } - return; - char targetStr[64] = {0}; char viaStr[64] = {0}; InetAddress nmsk = target.netmask(); char nmskStr[64] = {0}; fprintf(stderr, "Received Route Cmd: %s target: %s via: %s netmask: %s localInterface: %s\n", op, target.toString(targetStr), via.toString(viaStr), nmsk.toString(nmskStr), localInterface); + + if ((strcmp(op, "add") == 0 || strcmp(op, "replace") == 0)) { + LinuxNetLink::getInstance().addRoute(target, via, src, localInterface); + } else if ((strcmp(op, "remove") == 0 || strcmp(op, "del") == 0)) { + LinuxNetLink::getInstance().delRoute(target, via, src, localInterface); + } + return; + + int fd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);; struct rtentry route = {0}; @@ -600,11 +602,11 @@ bool ManagedRoute::sync() if (!_applied.count(leftt)) { _applied[leftt] = false; // boolean unused - _routeCmd("replace",leftt,_via,(_via) ? (const char *)0 : _device); + _routeCmd("replace",leftt,_via,_src,_device); } if ((rightt)&&(!_applied.count(rightt))) { _applied[rightt] = false; // boolean unused - _routeCmd("replace",rightt,_via,(_via) ? (const char *)0 : _device); + _routeCmd("replace",rightt,_via,_src,_device); } #endif // __LINUX__ ---------------------------------------------------------- @@ -651,7 +653,7 @@ void ManagedRoute::remove() #endif // __BSD__ ------------------------------------------------------------ #ifdef __LINUX__ // ---------------------------------------------------------- - _routeCmd("del",r->first,_via,(_via) ? (const char *)0 : _device); + _routeCmd("del",r->first,_via,_src,_device); #endif // __LINUX__ ---------------------------------------------------------- #ifdef __WINDOWS__ // -------------------------------------------------------- diff --git a/osdep/ManagedRoute.hpp b/osdep/ManagedRoute.hpp index 779ad6a1..301c54a8 100644 --- a/osdep/ManagedRoute.hpp +++ b/osdep/ManagedRoute.hpp @@ -49,14 +49,20 @@ class ManagedRoute friend class SharedPtr; public: - ManagedRoute(const InetAddress &target,const InetAddress &via,const char *device) + ManagedRoute(const InetAddress &target,const InetAddress &via,const InetAddress &src,const char *device) { _target = target; _via = via; + _src = src; if (via.ss_family == AF_INET) _via.setPort(32); else if (via.ss_family == AF_INET6) _via.setPort(128); + if (src.ss_family == AF_INET) { + _src.setPort(32); + } else if (src.ss_family == AF_INET6) { + _src.setPort(128); + } Utils::scopy(_device,sizeof(_device),device); _systemDevice[0] = (char)0; } @@ -87,6 +93,7 @@ public: inline const InetAddress &target() const { return _target; } inline const InetAddress &via() const { return _via; } + inline const InetAddress &src() const { return _src; } inline const char *device() const { return _device; } private: @@ -95,6 +102,7 @@ private: InetAddress _target; InetAddress _via; + InetAddress _src; InetAddress _systemVia; // for route overrides std::map _applied; // routes currently applied char _device[128]; diff --git a/service/OneService.cpp b/service/OneService.cpp index 91cf49ee..1851c88c 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -1631,6 +1631,17 @@ public: for(unsigned int i=0;i(&(n.config.routes[i].target)); const InetAddress *const via = reinterpret_cast(&(n.config.routes[i].via)); + InetAddress *src = NULL; + for (unsigned int j=0; j(&(n.config.assignedAddresses[j])); + if (target->isV4() && tmp->isV4()) { + src = reinterpret_cast(&(n.config.assignedAddresses[j])); + break; + } else if (target->isV6() && tmp->isV6()) { + src = reinterpret_cast(&(n.config.assignedAddresses[j])); + break; + } + } if ( (!checkIfManagedIsAllowed(n,*target)) || ((via->ss_family == target->ss_family)&&(matchIpOnly(myIps,*via))) ) continue; @@ -1662,7 +1673,7 @@ public: continue; // Add and apply new routes - n.managedRoutes.push_back(SharedPtr(new ManagedRoute(*target,*via,tapdev))); + n.managedRoutes.push_back(SharedPtr(new ManagedRoute(*target,*via,*src,tapdev))); if (!n.managedRoutes.back()->sync()) n.managedRoutes.pop_back(); } -- cgit v1.2.3 From f302fac423db6cc0e5e8c1758a74a7d7b7d8bf04 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Wed, 11 Jul 2018 16:32:49 -0700 Subject: Minor tweak to linux makefile --- make-linux.mk | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'make-linux.mk') diff --git a/make-linux.mk b/make-linux.mk index b3a63f54..47d66118 100644 --- a/make-linux.mk +++ b/make-linux.mk @@ -46,12 +46,16 @@ endif # Trying to use dynamically linked libhttp-parser causes tons of compatibility problems. ONE_OBJS+=ext/http-parser/http_parser.o -ifeq ($(ZT_TRACE),1) - override DEFS+=-DZT_TRACE +# Build with address sanitization library for advanced debugging (clang) +ifeq ($(ZT_SANITIZE),1) + DEFS+=-fsanitize=address -DASAN_OPTIONS=symbolize=1 endif ifeq ($(ZT_DEBUG_TRACE),1) DEFS+=-DZT_DEBUG_TRACE endif +ifeq ($(ZT_TRACE),1) + DEFS+=-DZT_TRACE +endif ifeq ($(ZT_RULES_ENGINE_DEBUGGING),1) override DEFS+=-DZT_RULES_ENGINE_DEBUGGING -- cgit v1.2.3