summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Ierymenko <adam.ierymenko@gmail.com>2014-04-01 18:39:10 -0700
committerAdam Ierymenko <adam.ierymenko@gmail.com>2014-04-01 18:39:10 -0700
commit700a450806263f035f7b1266d5f63c5934461e97 (patch)
tree07d7d69299f3f14a4129532e2b126999b89bb653
parent0e1fc06a6f3b5c7aaa06ba370f85337176ae8a85 (diff)
downloadinfinitytier-700a450806263f035f7b1266d5f63c5934461e97.tar.gz
infinitytier-700a450806263f035f7b1266d5f63c5934461e97.zip
More tweaks to algorithm for determining when to fail over to TCP, and stop supernodes from resynchronizing unless explicitly ordered.
-rw-r--r--node/Constants.hpp9
-rw-r--r--node/Node.cpp27
-rw-r--r--node/PacketDecoder.cpp2
-rw-r--r--node/Peer.cpp37
-rw-r--r--node/Peer.hpp30
-rw-r--r--node/Topology.hpp8
6 files changed, 70 insertions, 43 deletions
diff --git a/node/Constants.hpp b/node/Constants.hpp
index a003c88c..f0db9d3f 100644
--- a/node/Constants.hpp
+++ b/node/Constants.hpp
@@ -305,9 +305,14 @@ error_no_byte_order_defined;
#define ZT_TCP_TUNNEL_ACTIVITY_TIMEOUT ZT_PEER_PATH_ACTIVITY_TIMEOUT
/**
- * Try TCP tunnels if no response to UDP PINGs in this many milliseconds
+ * Try TCP tunnels if nothing received for this long
*/
-#define ZT_PING_UNANSWERED_AFTER 5000
+#define ZT_TCP_TUNNEL_FAILOVER_TIMEOUT 5000
+
+/**
+ * Try to ping supernodes this often until we get something from somewhere
+ */
+#define ZT_STARTUP_AGGRO 5000
/**
* Stop relaying via peers that have not responded to direct sends in this long
diff --git a/node/Node.cpp b/node/Node.cpp
index a5978936..a0dd14c1 100644
--- a/node/Node.cpp
+++ b/node/Node.cpp
@@ -540,7 +540,7 @@ Node::ReasonForTermination Node::run()
long lastDelayDelta = 0;
uint64_t networkConfigurationFingerprint = 0;
- _r->timeOfLastResynchronize = 0;
+ _r->timeOfLastResynchronize = Utils::now();
while (impl->reasonForTermination == NODE_RUNNING) {
if (Utils::fileExists(shutdownIfUnreadablePath.c_str(),false)) {
@@ -551,13 +551,7 @@ Node::ReasonForTermination Node::run()
}
uint64_t now = Utils::now();
-
- // Did the user send SIGHUP or otherwise order network resync? (mostly for debugging)
- bool resynchronize = impl->resynchronize;
- impl->resynchronize = false;
- if (resynchronize) {
- LOG("manual resynchronize ordered, resyncing with network");
- }
+ bool resynchronize = false;
// If it looks like the computer slept and woke, resynchronize.
if (lastDelayDelta >= ZT_SLEEP_WAKE_DETECTION_THRESHOLD) {
@@ -577,18 +571,29 @@ Node::ReasonForTermination Node::run()
}
}
+ // Supernodes do not resynchronize unless explicitly ordered via SIGHUP.
+ if ((resynchronize)&&(_r->topology->amSupernode()))
+ resynchronize = false;
+
+ // Check for SIGHUP / force resync.
+ if (impl->resynchronize) {
+ impl->resynchronize = false;
+ resynchronize = true;
+ LOG("resynchronize forced by user, syncing with network");
+ }
+
if (resynchronize)
_r->timeOfLastResynchronize = now;
/* Ping supernodes separately, and do so more aggressively if we haven't
* heard anything from anyone since our last resynchronize / startup. */
if ( ((now - lastSupernodePing) >= ZT_PEER_DIRECT_PING_DELAY) ||
- ((_r->timeOfLastResynchronize > _r->timeOfLastPacketReceived) && ((now - lastSupernodePing) >= ZT_PING_UNANSWERED_AFTER)) ) {
+ ((_r->timeOfLastResynchronize > _r->timeOfLastPacketReceived) && ((now - lastSupernodePing) >= ZT_STARTUP_AGGRO)) ) {
lastSupernodePing = now;
std::vector< SharedPtr<Peer> > sns(_r->topology->supernodePeers());
TRACE("pinging %d supernodes",(int)sns.size());
for(std::vector< SharedPtr<Peer> >::const_iterator p(sns.begin());p!=sns.end();++p)
- (*p)->sendPing(_r,now,resynchronize);
+ (*p)->sendPing(_r,now);
}
if (resynchronize) {
@@ -625,7 +630,7 @@ Node::ReasonForTermination Node::run()
if ((now - lastPingCheck) >= ZT_PING_CHECK_DELAY) {
lastPingCheck = now;
try {
- _r->topology->eachPeer(Topology::PingPeersThatNeedPing(_r,now,resynchronize));
+ _r->topology->eachPeer(Topology::PingPeersThatNeedPing(_r,now));
_r->topology->eachPeer(Topology::OpenPeersThatNeedFirewallOpener(_r,now));
} catch (std::exception &exc) {
LOG("unexpected exception running ping check cycle: %s",exc.what());
diff --git a/node/PacketDecoder.cpp b/node/PacketDecoder.cpp
index 2b8cba51..c7d3ffda 100644
--- a/node/PacketDecoder.cpp
+++ b/node/PacketDecoder.cpp
@@ -490,6 +490,7 @@ bool PacketDecoder::_doMULTICAST_FRAME(const RuntimeEnvironment *_r,const Shared
const unsigned int signatureLen = at<uint16_t>(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FRAME + frameLen);
const unsigned char *const signature = field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FRAME + frameLen + 2,signatureLen);
+ /*
TRACE("MULTICAST_FRAME @%.16llx #%.16llx from %s<%s> via %s(%s) to %s [ %s, %d bytes, depth %d ]",
(unsigned long long)nwid,
(unsigned long long)guid,
@@ -499,6 +500,7 @@ bool PacketDecoder::_doMULTICAST_FRAME(const RuntimeEnvironment *_r,const Shared
Switch::etherTypeName(etherType),
(int)frameLen,
(int)depth);
+ */
SharedPtr<Network> network(_r->nc->network(nwid));
diff --git a/node/Peer.cpp b/node/Peer.cpp
index 28926f97..3fad7c05 100644
--- a/node/Peer.cpp
+++ b/node/Peer.cpp
@@ -117,8 +117,13 @@ void Peer::receive(
_lastMulticastFrame = now;
}
+
bool Peer::send(const RuntimeEnvironment *_r,const void *data,unsigned int len,uint64_t now)
{
+ // Note: we'll still use TCP here if that's all we have, but if this
+ // is false we will prefer UDP.
+ bool useTcp = isTcpFailoverTime(_r,now);
+
Mutex::Lock _l(_lock);
std::vector<Path>::iterator p(_paths.begin());
@@ -127,11 +132,13 @@ bool Peer::send(const RuntimeEnvironment *_r,const void *data,unsigned int len,u
uint64_t bestPathLastReceived = p->lastReceived();
std::vector<Path>::iterator bestPath = p;
+ bool bestPathIsTcp = p->tcp();
while (++p != _paths.end()) {
uint64_t lr = p->lastReceived();
- if (lr > bestPathLastReceived) {
+ if ( (lr > bestPathLastReceived) || ((bestPathIsTcp)&&(!useTcp)) ) {
bestPathLastReceived = lr;
bestPath = p;
+ bestPathIsTcp = p->tcp();
}
}
@@ -156,35 +163,19 @@ bool Peer::sendFirewallOpener(const RuntimeEnvironment *_r,uint64_t now)
return sent;
}
-bool Peer::sendPing(const RuntimeEnvironment *_r,uint64_t now,bool firstSinceReset)
+bool Peer::sendPing(const RuntimeEnvironment *_r,uint64_t now)
{
bool sent = false;
SharedPtr<Peer> self(this);
- Mutex::Lock _l(_lock);
-
- // NOTE: this will never ping a peer that has *only* TCP paths. Right
- // now there's never such a thing as TCP is only for failover.
-
- bool pingTcp;
- if (!firstSinceReset) {
- uint64_t lastUdp = 0;
- uint64_t lastTcp = 0;
- uint64_t lastPing = 0;
- for(std::vector<Path>::iterator p(_paths.begin());p!=_paths.end();++p) {
- if (p->tcp())
- lastTcp = std::max(p->lastReceived(),lastTcp);
- else lastUdp = std::max(p->lastReceived(),lastUdp);
- lastPing = std::max(p->lastPing(),lastPing);
- }
- uint64_t lastAny = std::max(lastUdp,lastTcp);
- pingTcp = ( ( (lastAny < lastPing) && ((lastPing - lastAny) >= ZT_PING_UNANSWERED_AFTER) ) || (lastTcp > lastUdp) );
- } else pingTcp = false;
+ // In the ping case we will never send TCP unless this returns true.
+ bool useTcp = isTcpFailoverTime(_r,now);
- TRACE("PING %s (pingTcp==%d)",_id.address().toString().c_str(),(int)pingTcp);
+ TRACE("PING %s (useTcp==%d)",_id.address().toString().c_str(),(int)useTcp);
+ Mutex::Lock _l(_lock);
for(std::vector<Path>::iterator p(_paths.begin());p!=_paths.end();++p) {
- if ((pingTcp)||(!p->tcp())) {
+ if ((useTcp)||(!p->tcp())) {
if (_r->sw->sendHELLO(self,*p)) {
p->sent(now);
p->pinged(now);
diff --git a/node/Peer.hpp b/node/Peer.hpp
index d5d0a291..8b8bf578 100644
--- a/node/Peer.hpp
+++ b/node/Peer.hpp
@@ -160,10 +160,9 @@ public:
*
* @param _r Runtime environment
* @param now Current time
- * @param firstSinceReset If true, this is the first ping sent since a network reset
* @return True if send appears successful for at least one address type
*/
- bool sendPing(const RuntimeEnvironment *_r,uint64_t now,bool firstSinceReset);
+ bool sendPing(const RuntimeEnvironment *_r,uint64_t now);
/**
* Called periodically by Topology::clean() to remove stale paths and do other cleanup
@@ -264,6 +263,33 @@ public:
}
/**
+ * @param _r Runtime environment
+ * @param now Current time
+ * @return True if it's time to attempt TCP failover (if we have TCP_OUT paths)
+ */
+ inline bool isTcpFailoverTime(const RuntimeEnvironment *_r,uint64_t now) const
+ throw()
+ {
+ if ((now - _r->timeOfLastResynchronize) >= ZT_TCP_TUNNEL_FAILOVER_TIMEOUT) {
+ uint64_t lastUdpPingSent = 0;
+ uint64_t lastUdpReceive = 0;
+
+ {
+ Mutex::Lock _l(_lock);
+ for(std::vector<Path>::const_iterator p(_paths.begin());p!=_paths.end();++p) {
+ if (p->type() == Path::PATH_TYPE_UDP) {
+ lastUdpPingSent = std::max(lastUdpPingSent,p->lastPing());
+ lastUdpReceive = std::max(lastUdpReceive,p->lastReceived());
+ }
+ }
+ }
+
+ return ( (lastUdpPingSent > lastUdpReceive) && ((now - lastUdpPingSent) >= ZT_TCP_TUNNEL_FAILOVER_TIMEOUT) );
+ }
+ return false;
+ }
+
+ /**
* @return Current latency or 0 if unknown (max: 65535)
*/
inline unsigned int latency() const
diff --git a/node/Topology.hpp b/node/Topology.hpp
index bd39d0ec..35914cc9 100644
--- a/node/Topology.hpp
+++ b/node/Topology.hpp
@@ -207,11 +207,10 @@ public:
class PingPeersThatNeedPing
{
public:
- PingPeersThatNeedPing(const RuntimeEnvironment *renv,uint64_t now,bool firstSinceReset) throw() :
+ PingPeersThatNeedPing(const RuntimeEnvironment *renv,uint64_t now) throw() :
_now(now),
_supernodeAddresses(renv->topology->supernodeAddresses()),
- _r(renv),
- _firstSinceReset(firstSinceReset) {}
+ _r(renv) {}
inline void operator()(Topology &t,const SharedPtr<Peer> &p)
{
@@ -228,14 +227,13 @@ public:
/* 2b: peer is not a supernode */
(!_supernodeAddresses.count(p->address()))
)
- ) { p->sendPing(_r,_now,_firstSinceReset); }
+ ) { p->sendPing(_r,_now); }
}
private:
uint64_t _now;
std::set<Address> _supernodeAddresses;
const RuntimeEnvironment *_r;
- bool _firstSinceReset;
};
/**