diff options
-rw-r--r-- | node/Constants.hpp | 13 | ||||
-rw-r--r-- | node/Node.cpp | 12 | ||||
-rw-r--r-- | node/Peer.cpp | 2 | ||||
-rw-r--r-- | node/Peer.hpp | 20 | ||||
-rw-r--r-- | node/Topology.hpp | 62 |
5 files changed, 82 insertions, 27 deletions
diff --git a/node/Constants.hpp b/node/Constants.hpp index f0db9d3f..9e80a3e3 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -263,11 +263,11 @@ error_no_byte_order_defined; #define ZT_NETWORK_FINGERPRINT_CHECK_DELAY 5000 /** - * Delay between pings (actually HELLOs) to direct links + * Delay between ordinary case pings of direct links */ #define ZT_PEER_DIRECT_PING_DELAY 120000 -/** + /** * Delay in ms between firewall opener packets to direct links * * This should be lower than the UDP conversation entry timeout in most @@ -297,12 +297,12 @@ error_no_byte_order_defined; * * A link that hasn't spoken in this long is simply considered inactive. */ -#define ZT_PEER_PATH_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 2) + 1000) +#define ZT_PEER_PATH_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 2) + 10000) /** * Close TCP tunnels if unused for this long */ -#define ZT_TCP_TUNNEL_ACTIVITY_TIMEOUT ZT_PEER_PATH_ACTIVITY_TIMEOUT +#define ZT_TCP_TUNNEL_ACTIVITY_TIMEOUT (ZT_PEER_PATH_ACTIVITY_TIMEOUT + 10000) /** * Try TCP tunnels if nothing received for this long @@ -315,6 +315,11 @@ error_no_byte_order_defined; #define ZT_STARTUP_AGGRO 5000 /** + * Time since a ping was sent to be considered unanswered + */ +#define ZT_PING_UNANSWERED_AFTER 2500 + +/** * Stop relaying via peers that have not responded to direct sends in this long */ #define ZT_PEER_RELAY_CONVERSATION_LATENCY_THRESHOLD 10000 diff --git a/node/Node.cpp b/node/Node.cpp index f9a0a614..bc01378d 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -536,10 +536,10 @@ Node::ReasonForTermination Node::run() uint64_t lastNetworkAutoconfCheck = Utils::now() - 5000ULL; // check autoconf again after 5s for startup uint64_t lastPingCheck = 0; - uint64_t lastSupernodePing = 0; uint64_t lastClean = Utils::now(); // don't need to do this immediately uint64_t lastNetworkFingerprintCheck = 0; uint64_t lastMulticastCheck = 0; + uint64_t lastSupernodePingCheck = 0; long lastDelayDelta = 0; uint64_t networkConfigurationFingerprint = 0; @@ -592,13 +592,9 @@ Node::ReasonForTermination Node::run() /* Ping supernodes separately, and do so more aggressively if we haven't * heard anything from anyone since our last resynchronize / startup. */ - if ( ((now - lastSupernodePing) >= ZT_PEER_DIRECT_PING_DELAY) || - ((_r->timeOfLastResynchronize > _r->timeOfLastPacketReceived) && ((now - lastSupernodePing) >= ZT_STARTUP_AGGRO)) ) { - lastSupernodePing = now; - std::vector< SharedPtr<Peer> > sns(_r->topology->supernodePeers()); - TRACE("pinging %d supernodes",(int)sns.size()); - for(std::vector< SharedPtr<Peer> >::const_iterator p(sns.begin());p!=sns.end();++p) - (*p)->sendPing(_r,now); + if ((now - lastSupernodePingCheck) >= ZT_STARTUP_AGGRO) { + lastSupernodePingCheck = now; + _r->topology->eachSupernodePeer(Topology::PingSupernodesThatNeedPing(_r,now)); } if (resynchronize) { diff --git a/node/Peer.cpp b/node/Peer.cpp index f8ff4082..db6ad261 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -216,7 +216,7 @@ bool Peer::isTcpFailoverTime(const RuntimeEnvironment *_r,uint64_t now) const void Peer::clean(uint64_t now) { Mutex::Lock _l(_lock); - unsigned long i = 0,o = 0,l = _paths.size(); + unsigned long i = 0,o = 0,l = (unsigned long)_paths.size(); while (i != l) { if (_paths[i].active(now)) _paths[o++] = _paths[i]; diff --git a/node/Peer.hpp b/node/Peer.hpp index 6a57e14a..864fea70 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -227,6 +227,26 @@ public: } /** + * @param _r Runtime environment + * @param now Current time + * @return True if the last ping is unanswered + */ + inline bool pingUnanswered(const RuntimeEnvironment *_r,uint64_t now) + throw() + { + uint64_t lp = 0; + uint64_t lr = 0; + { + Mutex::Lock _l(_lock); + for(std::vector<Path>::const_iterator p(_paths.begin());p!=_paths.end();++p) { + lp = std::max(p->lastPing(),lp); + lr = std::max(p->lastReceived(),lr); + } + } + return ( (lp > _r->timeOfLastResynchronize) && ((lr < lp)&&((lp - lr) >= ZT_PING_UNANSWERED_AFTER)) ); + } + + /** * @return Time of most recent unicast frame received */ inline uint64_t lastUnicastFrame() const diff --git a/node/Topology.hpp b/node/Topology.hpp index 35914cc9..34b3f1bf 100644 --- a/node/Topology.hpp +++ b/node/Topology.hpp @@ -181,6 +181,20 @@ public: } /** + * Apply a function or function object to all supernode peers + * + * @param f Function to apply + * @tparam F Function or function object type + */ + template<typename F> + inline void eachSupernodePeer(F f) + { + Mutex::Lock _l(_supernodes_m); + for(std::vector< SharedPtr<Peer> >::const_iterator p(_supernodePeers.begin());p!=_supernodePeers.end();++p) + f(*this,*p); + } + + /** * Function object to collect peers that need a firewall opener sent */ class OpenPeersThatNeedFirewallOpener @@ -214,20 +228,16 @@ public: inline void operator()(Topology &t,const SharedPtr<Peer> &p) { - if ( - /* 1: we have not heard anything directly in ZT_PEER_DIRECT_PING_DELAY ms */ - ((_now - p->lastDirectReceive()) >= ZT_PEER_DIRECT_PING_DELAY) && - /* 2: */ - ( - /* 2a: peer has direct path, and has sent us something recently */ - ( - (p->hasDirectPath())&& - ((_now - p->lastFrame()) < ZT_PEER_PATH_ACTIVITY_TIMEOUT) - ) && - /* 2b: peer is not a supernode */ - (!_supernodeAddresses.count(p->address())) - ) - ) { p->sendPing(_r,_now); } + /* For ordinary nodes we ping if they've sent us a frame recently, + * otherwise they are stale and we let the link die. + * + * Note that we measure ping time from time of last receive rather + * than time of last send in order to only count full round trips. */ + if ( (!_supernodeAddresses.count(p->address())) && + ((_now - p->lastFrame()) < ZT_PEER_PATH_ACTIVITY_TIMEOUT) && + ((_now - p->lastDirectReceive()) > ZT_PEER_DIRECT_PING_DELAY) ) { + p->sendPing(_r,_now); + } } private: @@ -237,6 +247,30 @@ public: }; /** + * Ping peers that need ping according to supernode rules (slightly more aggressive) + */ + class PingSupernodesThatNeedPing + { + public: + PingSupernodesThatNeedPing(const RuntimeEnvironment *renv,uint64_t now) throw() : + _now(now), + _r(renv) {} + + inline void operator()(Topology &t,const SharedPtr<Peer> &p) + { + /* For supernodes we always ping even if no frames have been seen, and + * we ping aggressively if pings are unanswered. The limit to this + * frequency is set in the main loop to no more than ZT_STARTUP_AGGRO. */ + if ( (p->pingUnanswered(_r,_now)) || ((_now - p->lastDirectReceive()) > ZT_PEER_DIRECT_PING_DELAY) ) + p->sendPing(_r,_now); + } + + private: + uint64_t _now; + const RuntimeEnvironment *_r; + }; + + /** * Function object to forget direct links to active peers and then ping them indirectly * * Note that this excludes supernodes. |