From 3ba54c7e3559359abd8d4734aa969829309a9dab Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Thu, 23 Jul 2015 09:50:10 -0700 Subject: Eliminate some poorly thought out optimizations from the netconf/controller interaction, and go ahead and bump version to 1.0.4. For a while in 1.0.3 -dev I was trying to optimize out repeated network controller requests by using a ratcheting mechanism. If the client received a network config that was indeed different from the one it had, it would respond by instantlly requesting it again. Not sure what I was thinking. It's fundamentally unsafe to respond to a message with another message of the same type -- it risks a race condition. In this case that's exactly what could happen. It just isn't worth the added complexity to avoid a tiny, tiny amount of network overhead, so I've taken this whole path out. A few extra bytes every two minutes isn't worth fretting about, but as I recall the reason for this optimization was to save CPU on the controller. This can be achieved by just caching responses in memory *there* and serving those same responses back out if they haven't changed. I think I developed that 'ratcheting' stuff before I went full time on this. It's hard to develop stuff like this without hours of sustained focus. --- node/IncomingPacket.cpp | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) (limited to 'node/IncomingPacket.cpp') diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index ae99352e..c3d8cc6d 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -392,28 +392,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,const SharedPtr &p const unsigned int dictlen = at(ZT_PROTO_VERB_NETWORK_CONFIG_REQUEST__OK__IDX_DICT_LEN); const std::string dict((const char *)field(ZT_PROTO_VERB_NETWORK_CONFIG_REQUEST__OK__IDX_DICT,dictlen),dictlen); if (dict.length()) { - if (nw->setConfiguration(Dictionary(dict)) == 2) { // 2 == accepted and actually new - /* If this configuration was indeed new, we do another - * controller request with its revision. We do this in - * order to (a) tell the network controller we got it (it - * won't send a duplicate if ts == current), and (b) - * get another one if the controller is changing rapidly - * until we finally have the final version. - * - * Note that we don't do this for network controllers with - * versions <= 1.0.3, since those regenerate a new controller - * with a new revision every time. In that case this double - * confirmation would create a race condition. */ - const SharedPtr nc(nw->config2()); - if ((peer->atLeastVersion(1,0,3))&&(nc)&&(nc->revision() > 0)) { - Packet outp(peer->address(),RR->identity.address(),Packet::VERB_NETWORK_CONFIG_REQUEST); - outp.append((uint64_t)nw->id()); - outp.append((uint16_t)0); // no meta-data - outp.append((uint64_t)nc->revision()); - outp.armor(peer->key(),true); - RR->node->putPacket(_remoteAddress,outp.data(),outp.size()); - } - } + nw->setConfiguration(Dictionary(dict)); TRACE("got network configuration for network %.16llx from %s",(unsigned long long)nw->id(),source().toString().c_str()); } } @@ -692,6 +671,7 @@ bool IncomingPacket::_doNETWORK_CONFIG_REQUEST(const RuntimeEnvironment *RR,cons if (RR->localNetworkController) { Dictionary netconf; switch(RR->localNetworkController->doNetworkConfigRequest((h > 0) ? InetAddress() : _remoteAddress,RR->identity,peer->identity(),nwid,metaData,haveRevision,netconf)) { + case NetworkController::NETCONF_QUERY_OK: { const std::string netconfStr(netconf.toString()); if (netconfStr.length() > 0xffff) { // sanity check since field ix 16-bit @@ -712,8 +692,7 @@ bool IncomingPacket::_doNETWORK_CONFIG_REQUEST(const RuntimeEnvironment *RR,cons } } } break; - case NetworkController::NETCONF_QUERY_OK_BUT_NOT_NEWER: // nothing to do -- netconf has not changed - break; + case NetworkController::NETCONF_QUERY_OBJECT_NOT_FOUND: { Packet outp(peer->address(),RR->identity.address(),Packet::VERB_ERROR); outp.append((unsigned char)Packet::VERB_NETWORK_CONFIG_REQUEST); @@ -723,6 +702,7 @@ bool IncomingPacket::_doNETWORK_CONFIG_REQUEST(const RuntimeEnvironment *RR,cons outp.armor(peer->key(),true); RR->node->putPacket(_remoteAddress,outp.data(),outp.size()); } break; + case NetworkController::NETCONF_QUERY_ACCESS_DENIED: { Packet outp(peer->address(),RR->identity.address(),Packet::VERB_ERROR); outp.append((unsigned char)Packet::VERB_NETWORK_CONFIG_REQUEST); @@ -732,12 +712,15 @@ bool IncomingPacket::_doNETWORK_CONFIG_REQUEST(const RuntimeEnvironment *RR,cons outp.armor(peer->key(),true); RR->node->putPacket(_remoteAddress,outp.data(),outp.size()); } break; + case NetworkController::NETCONF_QUERY_INTERNAL_SERVER_ERROR: TRACE("NETWORK_CONFIG_REQUEST failed: internal error: %s",netconf.get("error","(unknown)").c_str()); break; + default: TRACE("NETWORK_CONFIG_REQUEST failed: invalid return value from NetworkController::doNetworkConfigRequest()"); break; + } } else { Packet outp(peer->address(),RR->identity.address(),Packet::VERB_ERROR); -- cgit v1.2.3 From b3516c599bb0beb4b4827f28da472972344379c6 Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Thu, 23 Jul 2015 10:10:17 -0700 Subject: Add a rate limiting circuit breaker to the network controller to prevent flooding attacks and race conditions. --- controller/SqliteNetworkController.cpp | 13 +++++++++++++ controller/SqliteNetworkController.hpp | 2 ++ node/IncomingPacket.cpp | 3 +++ node/NetworkController.hpp | 3 ++- 4 files changed, 20 insertions(+), 1 deletion(-) (limited to 'node/IncomingPacket.cpp') diff --git a/controller/SqliteNetworkController.cpp b/controller/SqliteNetworkController.cpp index f6489640..bdf337ec 100644 --- a/controller/SqliteNetworkController.cpp +++ b/controller/SqliteNetworkController.cpp @@ -64,6 +64,10 @@ // API version reported via JSON control plane #define ZT_NETCONF_CONTROLLER_API_VERSION 1 +// Drop requests for a given peer and network ID that occur more frequently +// than this (ms). +#define ZT_NETCONF_MIN_REQUEST_PERIOD 5000 + namespace ZeroTier { namespace { @@ -316,6 +320,15 @@ NetworkController::ResultCode SqliteNetworkController::doNetworkConfigRequest(co return NetworkController::NETCONF_QUERY_INTERNAL_SERVER_ERROR; } + // Check rate limit + + { + uint64_t &lrt = _lastRequestTime[std::pair(identity.address(),nwid)]; + uint64_t lrt2 = lrt; + if (((lrt = OSUtils::now()) - lrt2) <= ZT_NETCONF_MIN_REQUEST_PERIOD) + return NetworkController::NETCONF_QUERY_IGNORE; + } + NetworkRecord network; memset(&network,0,sizeof(network)); Utils::snprintf(network.id,sizeof(network.id),"%.16llx",(unsigned long long)nwid); diff --git a/controller/SqliteNetworkController.hpp b/controller/SqliteNetworkController.hpp index bae11519..002493ec 100644 --- a/controller/SqliteNetworkController.hpp +++ b/controller/SqliteNetworkController.hpp @@ -98,6 +98,8 @@ private: std::string _dbPath; std::string _instanceId; + std::map< std::pair,uint64_t > _lastRequestTime; + sqlite3 *_db; sqlite3_stmt *_sGetNetworkById; diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index c3d8cc6d..76c47933 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -717,6 +717,9 @@ bool IncomingPacket::_doNETWORK_CONFIG_REQUEST(const RuntimeEnvironment *RR,cons TRACE("NETWORK_CONFIG_REQUEST failed: internal error: %s",netconf.get("error","(unknown)").c_str()); break; + case NetworkController::NETCONF_QUERY_IGNORE: + break; + default: TRACE("NETWORK_CONFIG_REQUEST failed: invalid return value from NetworkController::doNetworkConfigRequest()"); break; diff --git a/node/NetworkController.hpp b/node/NetworkController.hpp index bef884de..ee481a62 100644 --- a/node/NetworkController.hpp +++ b/node/NetworkController.hpp @@ -54,7 +54,8 @@ public: NETCONF_QUERY_OK = 0, NETCONF_QUERY_OBJECT_NOT_FOUND = 1, NETCONF_QUERY_ACCESS_DENIED = 2, - NETCONF_QUERY_INTERNAL_SERVER_ERROR = 3 + NETCONF_QUERY_INTERNAL_SERVER_ERROR = 3, + NETCONF_QUERY_IGNORE = 4 }; NetworkController() {} -- cgit v1.2.3 From b31071463cafda54afbf6f01d37aa7451b217b12 Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Tue, 28 Jul 2015 11:28:47 -0700 Subject: Try another NAT traversal improvement. --- node/Constants.hpp | 3 +++ node/IncomingPacket.cpp | 2 +- node/SelfAwareness.cpp | 15 +++++++++++++++ node/SelfAwareness.hpp | 5 +++++ node/Switch.cpp | 19 ++++++++++++++----- node/Switch.hpp | 4 ++-- 6 files changed, 40 insertions(+), 8 deletions(-) (limited to 'node/IncomingPacket.cpp') diff --git a/node/Constants.hpp b/node/Constants.hpp index c192381c..31a4c313 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -296,6 +296,9 @@ /** * Delay between initial direct NAT-t packet and more aggressive techniques + * + * This may also be a delay before sending the first packet if we determine + * that we should wait for the remote to initiate rendezvous first. */ #define ZT_NAT_T_TACTICAL_ESCALATION_DELAY 1000 diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index 76c47933..b1fda8ef 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -488,7 +488,7 @@ bool IncomingPacket::_doRENDEZVOUS(const RuntimeEnvironment *RR,const SharedPtr< InetAddress atAddr(field(ZT_PROTO_VERB_RENDEZVOUS_IDX_ADDRESS,addrlen),addrlen,port); TRACE("RENDEZVOUS from %s says %s might be at %s, starting NAT-t",peer->address().toString().c_str(),with.toString().c_str(),atAddr.toString().c_str()); peer->received(RR,_remoteAddress,hops(),packetId(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP); - RR->sw->contact(withPeer,atAddr); + RR->sw->rendezvous(withPeer,atAddr); } else { TRACE("dropped corrupt RENDEZVOUS from %s(%s) (bad address or port)",peer->address().toString().c_str(),_remoteAddress.toString().c_str()); } diff --git a/node/SelfAwareness.cpp b/node/SelfAwareness.cpp index 00015788..716cf7f3 100644 --- a/node/SelfAwareness.cpp +++ b/node/SelfAwareness.cpp @@ -147,4 +147,19 @@ void SelfAwareness::clean(uint64_t now) } } +bool SelfAwareness::areGlobalIPv4PortsRandomized() const +{ + int port = 0; + Mutex::Lock _l(_phy_m); + for(std::map< PhySurfaceKey,PhySurfaceEntry >::const_iterator p(_phy.begin());p!=_phy.end();++p) { + if ((p->first.scope == InetAddress::IP_SCOPE_GLOBAL)&&(p->second.mySurface.ss_family == AF_INET)) { + const int tmp = (int)p->second.mySurface.port(); + if ((port)&&(tmp != port)) + return true; + else port = tmp; + } + } + return false; +} + } // namespace ZeroTier diff --git a/node/SelfAwareness.hpp b/node/SelfAwareness.hpp index 4eede592..d3b79d18 100644 --- a/node/SelfAwareness.hpp +++ b/node/SelfAwareness.hpp @@ -66,6 +66,11 @@ public: */ void clean(uint64_t now); + /** + * @return True if our external (global scope) IPv4 ports appear to be randomized by a NAT device + */ + bool areGlobalIPv4PortsRandomized() const; + private: struct PhySurfaceKey { diff --git a/node/Switch.cpp b/node/Switch.cpp index 18935ce5..a580078e 100644 --- a/node/Switch.cpp +++ b/node/Switch.cpp @@ -43,6 +43,7 @@ #include "Topology.hpp" #include "Peer.hpp" #include "AntiRecursion.hpp" +#include "SelfAwareness.hpp" #include "Packet.hpp" namespace ZeroTier { @@ -385,15 +386,23 @@ bool Switch::unite(const Address &p1,const Address &p2,bool force) return true; } -void Switch::contact(const SharedPtr &peer,const InetAddress &atAddr) +void Switch::rendezvous(const SharedPtr &peer,const InetAddress &atAddr) { TRACE("sending NAT-t message to %s(%s)",peer->address().toString().c_str(),atAddr.toString().c_str()); const uint64_t now = RR->node->now(); - // Attempt to contact directly - peer->attemptToContactAt(RR,atAddr,now); - - // If we have not punched through after this timeout, open refreshing can of whupass + /* Attempt direct contact now unless we are IPv4 and our external ports + * appear to be randomized by a NAT device. In that case, we should let + * the other side send a message first. Why? If the other side is also + * randomized and symmetric, we are probably going to fail. But if the + * other side is "port restricted" but otherwise sane, us sending a + * packet first may actually close the remote's outgoing port to us! + * This assists with NAT-t in cases where one side is symmetric and the + * other is full cone but port restricted. */ + if ((atAddr.ss_family != AF_INET)||(!RR->sa->areGlobalIPv4PortsRandomized())) + peer->attemptToContactAt(RR,atAddr,now); + + // After 1s, try again and perhaps try more NAT-t strategies { Mutex::Lock _l(_contactQueue_m); _contactQueue.push_back(ContactQueueEntry(peer,now + ZT_NAT_T_TACTICAL_ESCALATION_DELAY,atAddr)); diff --git a/node/Switch.hpp b/node/Switch.hpp index e7f1523a..ac85606e 100644 --- a/node/Switch.hpp +++ b/node/Switch.hpp @@ -136,12 +136,12 @@ public: bool unite(const Address &p1,const Address &p2,bool force); /** - * Send NAT traversal messages to peer at the given candidate address + * Attempt NAT traversal to peer at a given physical address * * @param peer Peer to contact * @param atAddr Address of peer */ - void contact(const SharedPtr &peer,const InetAddress &atAddr); + void rendezvous(const SharedPtr &peer,const InetAddress &atAddr); /** * Request WHOIS on a given address -- cgit v1.2.3