diff options
author | Adam Ierymenko <adam.ierymenko@gmail.com> | 2014-04-01 18:39:10 -0700 |
---|---|---|
committer | Adam Ierymenko <adam.ierymenko@gmail.com> | 2014-04-01 18:39:10 -0700 |
commit | 700a450806263f035f7b1266d5f63c5934461e97 (patch) | |
tree | 07d7d69299f3f14a4129532e2b126999b89bb653 | |
parent | 0e1fc06a6f3b5c7aaa06ba370f85337176ae8a85 (diff) | |
download | infinitytier-700a450806263f035f7b1266d5f63c5934461e97.tar.gz infinitytier-700a450806263f035f7b1266d5f63c5934461e97.zip |
More tweaks to algorithm for determining when to fail over to TCP, and stop supernodes from resynchronizing unless explicitly ordered.
-rw-r--r-- | node/Constants.hpp | 9 | ||||
-rw-r--r-- | node/Node.cpp | 27 | ||||
-rw-r--r-- | node/PacketDecoder.cpp | 2 | ||||
-rw-r--r-- | node/Peer.cpp | 37 | ||||
-rw-r--r-- | node/Peer.hpp | 30 | ||||
-rw-r--r-- | node/Topology.hpp | 8 |
6 files changed, 70 insertions, 43 deletions
diff --git a/node/Constants.hpp b/node/Constants.hpp index a003c88c..f0db9d3f 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -305,9 +305,14 @@ error_no_byte_order_defined; #define ZT_TCP_TUNNEL_ACTIVITY_TIMEOUT ZT_PEER_PATH_ACTIVITY_TIMEOUT /** - * Try TCP tunnels if no response to UDP PINGs in this many milliseconds + * Try TCP tunnels if nothing received for this long */ -#define ZT_PING_UNANSWERED_AFTER 5000 +#define ZT_TCP_TUNNEL_FAILOVER_TIMEOUT 5000 + +/** + * Try to ping supernodes this often until we get something from somewhere + */ +#define ZT_STARTUP_AGGRO 5000 /** * Stop relaying via peers that have not responded to direct sends in this long diff --git a/node/Node.cpp b/node/Node.cpp index a5978936..a0dd14c1 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -540,7 +540,7 @@ Node::ReasonForTermination Node::run() long lastDelayDelta = 0; uint64_t networkConfigurationFingerprint = 0; - _r->timeOfLastResynchronize = 0; + _r->timeOfLastResynchronize = Utils::now(); while (impl->reasonForTermination == NODE_RUNNING) { if (Utils::fileExists(shutdownIfUnreadablePath.c_str(),false)) { @@ -551,13 +551,7 @@ Node::ReasonForTermination Node::run() } uint64_t now = Utils::now(); - - // Did the user send SIGHUP or otherwise order network resync? (mostly for debugging) - bool resynchronize = impl->resynchronize; - impl->resynchronize = false; - if (resynchronize) { - LOG("manual resynchronize ordered, resyncing with network"); - } + bool resynchronize = false; // If it looks like the computer slept and woke, resynchronize. if (lastDelayDelta >= ZT_SLEEP_WAKE_DETECTION_THRESHOLD) { @@ -577,18 +571,29 @@ Node::ReasonForTermination Node::run() } } + // Supernodes do not resynchronize unless explicitly ordered via SIGHUP. + if ((resynchronize)&&(_r->topology->amSupernode())) + resynchronize = false; + + // Check for SIGHUP / force resync. + if (impl->resynchronize) { + impl->resynchronize = false; + resynchronize = true; + LOG("resynchronize forced by user, syncing with network"); + } + if (resynchronize) _r->timeOfLastResynchronize = now; /* Ping supernodes separately, and do so more aggressively if we haven't * heard anything from anyone since our last resynchronize / startup. */ if ( ((now - lastSupernodePing) >= ZT_PEER_DIRECT_PING_DELAY) || - ((_r->timeOfLastResynchronize > _r->timeOfLastPacketReceived) && ((now - lastSupernodePing) >= ZT_PING_UNANSWERED_AFTER)) ) { + ((_r->timeOfLastResynchronize > _r->timeOfLastPacketReceived) && ((now - lastSupernodePing) >= ZT_STARTUP_AGGRO)) ) { lastSupernodePing = now; std::vector< SharedPtr<Peer> > sns(_r->topology->supernodePeers()); TRACE("pinging %d supernodes",(int)sns.size()); for(std::vector< SharedPtr<Peer> >::const_iterator p(sns.begin());p!=sns.end();++p) - (*p)->sendPing(_r,now,resynchronize); + (*p)->sendPing(_r,now); } if (resynchronize) { @@ -625,7 +630,7 @@ Node::ReasonForTermination Node::run() if ((now - lastPingCheck) >= ZT_PING_CHECK_DELAY) { lastPingCheck = now; try { - _r->topology->eachPeer(Topology::PingPeersThatNeedPing(_r,now,resynchronize)); + _r->topology->eachPeer(Topology::PingPeersThatNeedPing(_r,now)); _r->topology->eachPeer(Topology::OpenPeersThatNeedFirewallOpener(_r,now)); } catch (std::exception &exc) { LOG("unexpected exception running ping check cycle: %s",exc.what()); diff --git a/node/PacketDecoder.cpp b/node/PacketDecoder.cpp index 2b8cba51..c7d3ffda 100644 --- a/node/PacketDecoder.cpp +++ b/node/PacketDecoder.cpp @@ -490,6 +490,7 @@ bool PacketDecoder::_doMULTICAST_FRAME(const RuntimeEnvironment *_r,const Shared const unsigned int signatureLen = at<uint16_t>(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FRAME + frameLen); const unsigned char *const signature = field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FRAME + frameLen + 2,signatureLen); + /* TRACE("MULTICAST_FRAME @%.16llx #%.16llx from %s<%s> via %s(%s) to %s [ %s, %d bytes, depth %d ]", (unsigned long long)nwid, (unsigned long long)guid, @@ -499,6 +500,7 @@ bool PacketDecoder::_doMULTICAST_FRAME(const RuntimeEnvironment *_r,const Shared Switch::etherTypeName(etherType), (int)frameLen, (int)depth); + */ SharedPtr<Network> network(_r->nc->network(nwid)); diff --git a/node/Peer.cpp b/node/Peer.cpp index 28926f97..3fad7c05 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -117,8 +117,13 @@ void Peer::receive( _lastMulticastFrame = now; } + bool Peer::send(const RuntimeEnvironment *_r,const void *data,unsigned int len,uint64_t now) { + // Note: we'll still use TCP here if that's all we have, but if this + // is false we will prefer UDP. + bool useTcp = isTcpFailoverTime(_r,now); + Mutex::Lock _l(_lock); std::vector<Path>::iterator p(_paths.begin()); @@ -127,11 +132,13 @@ bool Peer::send(const RuntimeEnvironment *_r,const void *data,unsigned int len,u uint64_t bestPathLastReceived = p->lastReceived(); std::vector<Path>::iterator bestPath = p; + bool bestPathIsTcp = p->tcp(); while (++p != _paths.end()) { uint64_t lr = p->lastReceived(); - if (lr > bestPathLastReceived) { + if ( (lr > bestPathLastReceived) || ((bestPathIsTcp)&&(!useTcp)) ) { bestPathLastReceived = lr; bestPath = p; + bestPathIsTcp = p->tcp(); } } @@ -156,35 +163,19 @@ bool Peer::sendFirewallOpener(const RuntimeEnvironment *_r,uint64_t now) return sent; } -bool Peer::sendPing(const RuntimeEnvironment *_r,uint64_t now,bool firstSinceReset) +bool Peer::sendPing(const RuntimeEnvironment *_r,uint64_t now) { bool sent = false; SharedPtr<Peer> self(this); - Mutex::Lock _l(_lock); - - // NOTE: this will never ping a peer that has *only* TCP paths. Right - // now there's never such a thing as TCP is only for failover. - - bool pingTcp; - if (!firstSinceReset) { - uint64_t lastUdp = 0; - uint64_t lastTcp = 0; - uint64_t lastPing = 0; - for(std::vector<Path>::iterator p(_paths.begin());p!=_paths.end();++p) { - if (p->tcp()) - lastTcp = std::max(p->lastReceived(),lastTcp); - else lastUdp = std::max(p->lastReceived(),lastUdp); - lastPing = std::max(p->lastPing(),lastPing); - } - uint64_t lastAny = std::max(lastUdp,lastTcp); - pingTcp = ( ( (lastAny < lastPing) && ((lastPing - lastAny) >= ZT_PING_UNANSWERED_AFTER) ) || (lastTcp > lastUdp) ); - } else pingTcp = false; + // In the ping case we will never send TCP unless this returns true. + bool useTcp = isTcpFailoverTime(_r,now); - TRACE("PING %s (pingTcp==%d)",_id.address().toString().c_str(),(int)pingTcp); + TRACE("PING %s (useTcp==%d)",_id.address().toString().c_str(),(int)useTcp); + Mutex::Lock _l(_lock); for(std::vector<Path>::iterator p(_paths.begin());p!=_paths.end();++p) { - if ((pingTcp)||(!p->tcp())) { + if ((useTcp)||(!p->tcp())) { if (_r->sw->sendHELLO(self,*p)) { p->sent(now); p->pinged(now); diff --git a/node/Peer.hpp b/node/Peer.hpp index d5d0a291..8b8bf578 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -160,10 +160,9 @@ public: * * @param _r Runtime environment * @param now Current time - * @param firstSinceReset If true, this is the first ping sent since a network reset * @return True if send appears successful for at least one address type */ - bool sendPing(const RuntimeEnvironment *_r,uint64_t now,bool firstSinceReset); + bool sendPing(const RuntimeEnvironment *_r,uint64_t now); /** * Called periodically by Topology::clean() to remove stale paths and do other cleanup @@ -264,6 +263,33 @@ public: } /** + * @param _r Runtime environment + * @param now Current time + * @return True if it's time to attempt TCP failover (if we have TCP_OUT paths) + */ + inline bool isTcpFailoverTime(const RuntimeEnvironment *_r,uint64_t now) const + throw() + { + if ((now - _r->timeOfLastResynchronize) >= ZT_TCP_TUNNEL_FAILOVER_TIMEOUT) { + uint64_t lastUdpPingSent = 0; + uint64_t lastUdpReceive = 0; + + { + Mutex::Lock _l(_lock); + for(std::vector<Path>::const_iterator p(_paths.begin());p!=_paths.end();++p) { + if (p->type() == Path::PATH_TYPE_UDP) { + lastUdpPingSent = std::max(lastUdpPingSent,p->lastPing()); + lastUdpReceive = std::max(lastUdpReceive,p->lastReceived()); + } + } + } + + return ( (lastUdpPingSent > lastUdpReceive) && ((now - lastUdpPingSent) >= ZT_TCP_TUNNEL_FAILOVER_TIMEOUT) ); + } + return false; + } + + /** * @return Current latency or 0 if unknown (max: 65535) */ inline unsigned int latency() const diff --git a/node/Topology.hpp b/node/Topology.hpp index bd39d0ec..35914cc9 100644 --- a/node/Topology.hpp +++ b/node/Topology.hpp @@ -207,11 +207,10 @@ public: class PingPeersThatNeedPing { public: - PingPeersThatNeedPing(const RuntimeEnvironment *renv,uint64_t now,bool firstSinceReset) throw() : + PingPeersThatNeedPing(const RuntimeEnvironment *renv,uint64_t now) throw() : _now(now), _supernodeAddresses(renv->topology->supernodeAddresses()), - _r(renv), - _firstSinceReset(firstSinceReset) {} + _r(renv) {} inline void operator()(Topology &t,const SharedPtr<Peer> &p) { @@ -228,14 +227,13 @@ public: /* 2b: peer is not a supernode */ (!_supernodeAddresses.count(p->address())) ) - ) { p->sendPing(_r,_now,_firstSinceReset); } + ) { p->sendPing(_r,_now); } } private: uint64_t _now; std::set<Address> _supernodeAddresses; const RuntimeEnvironment *_r; - bool _firstSinceReset; }; /** |