summaryrefslogtreecommitdiff
path: root/node
diff options
context:
space:
mode:
Diffstat (limited to 'node')
-rw-r--r--node/Constants.hpp2
-rw-r--r--node/Path.hpp43
-rw-r--r--node/Peer.cpp31
3 files changed, 65 insertions, 11 deletions
diff --git a/node/Constants.hpp b/node/Constants.hpp
index 1a47112a..a2ba1c1a 100644
--- a/node/Constants.hpp
+++ b/node/Constants.hpp
@@ -274,7 +274,7 @@
/**
* No answer timeout to trigger dead path detection
*/
-#define ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT 3000
+#define ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT 2500
/**
* Probation threshold after which a path becomes dead
diff --git a/node/Path.hpp b/node/Path.hpp
index 23e47408..7e0a8ed1 100644
--- a/node/Path.hpp
+++ b/node/Path.hpp
@@ -66,6 +66,8 @@ class Path
public:
Path() :
_lastSend(0),
+ _lastPing(0),
+ _lastKeepalive(0),
_lastReceived(0),
_addr(),
_localAddress(),
@@ -76,6 +78,8 @@ public:
Path(const InetAddress &localAddress,const InetAddress &addr) :
_lastSend(0),
+ _lastPing(0),
+ _lastKeepalive(0),
_lastReceived(0),
_addr(addr),
_localAddress(localAddress),
@@ -98,10 +102,21 @@ public:
*
* @param t Time of send
*/
- inline void sent(uint64_t t)
- {
- _lastSend = t;
- }
+ inline void sent(uint64_t t) { _lastSend = t; }
+
+ /**
+ * Called when we've sent a ping or echo
+ *
+ * @param t Time of send
+ */
+ inline void pinged(uint64_t t) { _lastPing = t; }
+
+ /**
+ * Called when we send a NAT keepalive
+ *
+ * @param t Time of send
+ */
+ inline void sentKeepalive(uint64_t t) { _lastKeepalive = t; }
/**
* Called when a packet is received from this remote path
@@ -146,6 +161,16 @@ public:
inline uint64_t lastSend() const throw() { return _lastSend; }
/**
+ * @return Time we last pinged or dead path checked this link
+ */
+ inline uint64_t lastPing() const throw() { return _lastPing; }
+
+ /**
+ * @return Time of last keepalive
+ */
+ inline uint64_t lastKeepalive() const throw() { return _lastKeepalive; }
+
+ /**
* @return Time of last receive from this path
*/
inline uint64_t lastReceived() const throw() { return _lastReceived; }
@@ -260,8 +285,10 @@ public:
template<unsigned int C>
inline void serialize(Buffer<C> &b) const
{
- b.append((uint8_t)1); // version
+ b.append((uint8_t)2); // version
b.append((uint64_t)_lastSend);
+ b.append((uint64_t)_lastPing);
+ b.append((uint64_t)_lastKeepalive);
b.append((uint64_t)_lastReceived);
_addr.serialize(b);
_localAddress.serialize(b);
@@ -273,9 +300,11 @@ public:
inline unsigned int deserialize(const Buffer<C> &b,unsigned int startAt = 0)
{
unsigned int p = startAt;
- if (b[p++] != 1)
+ if (b[p++] != 2)
throw std::invalid_argument("invalid serialized Path");
_lastSend = b.template at<uint64_t>(p); p += 8;
+ _lastPing = b.template at<uint64_t>(p); p += 8;
+ _lastKeepalive = b.template at<uint64_t>(p); p += 8;
_lastReceived = b.template at<uint64_t>(p); p += 8;
p += _addr.deserialize(b,p);
p += _localAddress.deserialize(b,p);
@@ -290,6 +319,8 @@ public:
private:
uint64_t _lastSend;
+ uint64_t _lastPing;
+ uint64_t _lastKeepalive;
uint64_t _lastReceived;
InetAddress _addr;
InetAddress _localAddress;
diff --git a/node/Peer.cpp b/node/Peer.cpp
index a98d94c4..aff610d5 100644
--- a/node/Peer.cpp
+++ b/node/Peer.cpp
@@ -168,7 +168,10 @@ void Peer::received(
} else {
uint64_t slotLRmin = 0xffffffffffffffffULL;
for(unsigned int p=0;p<ZT_MAX_PEER_NETWORK_PATHS;++p) {
- if (_paths[p].lastReceived() <= slotLRmin) {
+ if (!_paths[p].active(now)) {
+ slot = &(_paths[p]);
+ break;
+ } else if (_paths[p].lastReceived() <= slotLRmin) {
slotLRmin = _paths[p].lastReceived();
slot = &(_paths[p]);
}
@@ -249,11 +252,12 @@ bool Peer::doPingAndKeepalive(uint64_t now,int inetAddressFamily)
//TRACE("PING %s(%s) after %llums/%llums send/receive inactivity",_id.address().toString().c_str(),p->address().toString().c_str(),now - p->lastSend(),now - p->lastReceived());
sendHELLO(p->localAddress(),p->address(),now);
p->sent(now);
- } else if (((now - p->lastSend()) >= ZT_NAT_KEEPALIVE_DELAY)&&(!p->reliable())) {
+ p->pinged(now);
+ } else if (((now - std::max(p->lastSend(),p->lastKeepalive())) >= ZT_NAT_KEEPALIVE_DELAY)&&(!p->reliable())) {
//TRACE("NAT keepalive %s(%s) after %llums/%llums send/receive inactivity",_id.address().toString().c_str(),p->address().toString().c_str(),now - p->lastSend(),now - p->lastReceived());
_natKeepaliveBuf += (uint32_t)((now * 0x9e3779b1) >> 1); // tumble this around to send constantly varying (meaningless) payloads
RR->node->putPacket(p->localAddress(),p->address(),&_natKeepaliveBuf,sizeof(_natKeepaliveBuf));
- p->sent(now);
+ p->sentKeepalive(now);
} else {
//TRACE("no PING or NAT keepalive: addr==%s reliable==%d %llums/%llums send/receive inactivity",p->address().toString().c_str(),(int)p->reliable(),now - p->lastSend(),now - p->lastReceived());
}
@@ -339,6 +343,8 @@ bool Peer::resetWithinScope(InetAddress::IpScope scope,uint64_t now)
unsigned int y = 0;
while (x < np) {
if (_paths[x].address().ipScope() == scope) {
+ // Resetting a path means sending a HELLO and then forgetting it. If we
+ // get OK(HELLO) then it will be re-learned.
sendHELLO(_paths[x].localAddress(),_paths[x].address(),now);
} else {
_paths[y++] = _paths[x];
@@ -491,7 +497,22 @@ bool Peer::_checkPath(Path &p,const uint64_t now)
if (!p.active(now))
return false;
- if ( (p.lastSend() > p.lastReceived()) && ((p.lastSend() - p.lastReceived()) >= ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT) ) {
+ /* Dead path detection: if we have sent something to this peer and have not
+ * yet received a reply, double check this path. The majority of outbound
+ * packets including Ethernet frames do generate some kind of reply either
+ * immediately or at some point in the near future. This will occasionally
+ * (every NO_ANSWER_TIMEOUT ms) check paths unnecessarily if traffic that
+ * does not generate a response is being sent such as multicast announcements
+ * or frames belonging to unidirectional UDP protocols, but the cost is very
+ * tiny and the benefit in reliability is very large. This takes care of many
+ * failure modes including crap NATs that forget links and spurious changes
+ * to physical network topology that cannot be otherwise detected.
+ *
+ * Each time we do this we increment a probation counter in the path. This
+ * counter is reset on any packet receive over this path. If it reaches the
+ * MAX_PROBATION threshold the path is considred dead. */
+
+ if ( (p.lastSend() > p.lastReceived()) && ((p.lastSend() - p.lastReceived()) >= ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT) && ((now - p.lastPing()) >= ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT) ) {
TRACE("%s(%s) has not answered, checking if dead (probation: %u)",_id.address().toString().c_str(),p.address().toString().c_str(),p.probation());
if ( (_vProto >= 5) && ( !((_vMajor == 1)&&(_vMinor == 1)&&(_vRevision == 0)) ) ) {
@@ -499,9 +520,11 @@ bool Peer::_checkPath(Path &p,const uint64_t now)
Packet outp(_id.address(),RR->identity.address(),Packet::VERB_ECHO);
outp.armor(_key,true);
p.send(RR,outp.data(),outp.size(),now);
+ p.pinged(now);
} else {
sendHELLO(p.localAddress(),p.address(),now);
p.sent(now);
+ p.pinged(now);
}
p.increaseProbation();