From 62db18b6dd0002520d4828b0091995a40b4eb2e9 Mon Sep 17 00:00:00 2001
From: Adam Ierymenko <adam.ierymenko@gmail.com>
Date: Tue, 27 Oct 2015 11:01:58 -0700
Subject: Lessen this limit just a bit to make cluster settle faster.

---
 node/Constants.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'node/Constants.hpp')

diff --git a/node/Constants.hpp b/node/Constants.hpp
index e45602f7..3ad07e4c 100644
--- a/node/Constants.hpp
+++ b/node/Constants.hpp
@@ -327,7 +327,7 @@
 /**
  * Minimum interval between direct path pushes from a given peer or we will ignore them
  */
-#define ZT_DIRECT_PATH_PUSH_MIN_RECEIVE_INTERVAL 2500
+#define ZT_DIRECT_PATH_PUSH_MIN_RECEIVE_INTERVAL 2000
 
 /**
  * How long (max) to remember network certificates of membership?
-- 
cgit v1.2.3


From a1a0ee4edb0933c9b82abad8715def6a63049658 Mon Sep 17 00:00:00 2001
From: Adam Ierymenko <adam.ierymenko@gmail.com>
Date: Tue, 27 Oct 2015 12:01:00 -0700
Subject: Fix infinite loop in Cluster, clean up some stuff elsewhere, and back
 out rate limiting in PUSH_DIRECT_PATHS for now (but we will do something else
 to mitigate amplification attacks)

---
 node/Cluster.cpp        |  5 ++--
 node/Constants.hpp      |  7 +-----
 node/IncomingPacket.cpp | 12 +++-------
 node/Peer.cpp           |  1 -
 node/Peer.hpp           | 17 ++------------
 node/SelfAwareness.cpp  | 12 +++++-----
 node/Topology.cpp       | 62 +------------------------------------------------
 7 files changed, 16 insertions(+), 100 deletions(-)

(limited to 'node/Constants.hpp')

diff --git a/node/Cluster.cpp b/node/Cluster.cpp
index bd455933..eef02bc7 100644
--- a/node/Cluster.cpp
+++ b/node/Cluster.cpp
@@ -239,7 +239,7 @@ void Cluster::handleIncomingStateMessage(const void *msg,unsigned int len)
 							const MAC mac(dmsg.field(ptr,6),6); ptr += 6;
 							const uint32_t adi = dmsg.at<uint32_t>(ptr); ptr += 4;
 							RR->mc->add(RR->node->now(),nwid,MulticastGroup(mac,adi),address);
-							TRACE("[%u] %s likes %s/%.8x on %.16llu",(unsigned int)fromMemberId,address.toString().c_str(),mac.toString().c_str(),(unsigned int)adi,nwid);
+							TRACE("[%u] %s likes %s/%.8x on %.16llx",(unsigned int)fromMemberId,address.toString().c_str(),mac.toString().c_str(),(unsigned int)adi,nwid);
 						}	break;
 
 						case STATE_MESSAGE_COM: {
@@ -376,11 +376,12 @@ bool Cluster::sendViaCluster(const Address &fromPeerAddress,const Address &toPee
 		Mutex::Lock _l2(_peerAffinities_m);
 		std::vector<_PeerAffinity>::iterator i(std::lower_bound(_peerAffinities.begin(),_peerAffinities.end(),_PeerAffinity(toPeerAddress,0,0))); // O(log(n))
 		while ((i != _peerAffinities.end())&&(i->address() == toPeerAddress)) {
-			uint16_t mid = i->clusterMemberId();
+			const uint16_t mid = i->clusterMemberId();
 			if ((mid != _id)&&(i->timestamp > mostRecentTimestamp)) {
 				mostRecentTimestamp = i->timestamp;
 				canHasPeer = mid;
 			}
+			++i;
 		}
 	}
 
diff --git a/node/Constants.hpp b/node/Constants.hpp
index 3ad07e4c..bef1183a 100644
--- a/node/Constants.hpp
+++ b/node/Constants.hpp
@@ -324,11 +324,6 @@
  */
 #define ZT_DIRECT_PATH_PUSH_INTERVAL 120000
 
-/**
- * Minimum interval between direct path pushes from a given peer or we will ignore them
- */
-#define ZT_DIRECT_PATH_PUSH_MIN_RECEIVE_INTERVAL 2000
-
 /**
  * How long (max) to remember network certificates of membership?
  *
@@ -355,7 +350,7 @@
 /**
  * Maximum number of endpoints to contact per address type (to limit pushes like GitHub issue #235)
  */
-#define ZT_PUSH_DIRECT_PATHS_MAX_ENDPOINTS_PER_TYPE 8
+#define ZT_PUSH_DIRECT_PATHS_MAX_ENDPOINTS_PER_TYPE 2
 
 /**
  * A test pseudo-network-ID that can be joined
diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp
index 51e88364..2514cd64 100644
--- a/node/IncomingPacket.cpp
+++ b/node/IncomingPacket.cpp
@@ -622,7 +622,7 @@ bool IncomingPacket::_doMULTICAST_LIKE(const RuntimeEnvironment *RR,const Shared
 
 		// Iterate through 18-byte network,MAC,ADI tuples
 		for(unsigned int ptr=ZT_PACKET_IDX_PAYLOAD;ptr<size();ptr+=18) {
-			const uint64_t nwid(at<uint64_t>(ptr));
+			const uint64_t nwid = at<uint64_t>(ptr);
 			const MulticastGroup group(MAC(field(ptr + 8,6),6),at<uint32_t>(ptr + 14));
 			RR->mc->add(now,nwid,group,peer->address());
 #ifdef ZT_ENABLE_CLUSTER
@@ -888,12 +888,6 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,const Sha
 {
 	try {
 		const uint64_t now = RR->node->now();
-		if ((now - peer->lastDirectPathPushReceived()) < ZT_DIRECT_PATH_PUSH_MIN_RECEIVE_INTERVAL) {
-			TRACE("dropped PUSH_DIRECT_PATHS from %s(%s): too frequent!",source().toString().c_str(),_remoteAddress.toString().c_str());
-			return true;
-		}
-		peer->setLastDirectPathPushReceived(now);
-
 		const RemotePath *currentBest = peer->getBestPath(now);
 
 		unsigned int count = at<uint16_t>(ZT_PACKET_IDX_PAYLOAD);
@@ -916,7 +910,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,const Sha
 						TRACE("attempting to contact %s at pushed direct path %s",peer->address().toString().c_str(),a.toString().c_str());
 						if (v4Count++ < ZT_PUSH_DIRECT_PATHS_MAX_ENDPOINTS_PER_TYPE) {
 							if ((!currentBest)||(currentBest->address() != a))
-								peer->attemptToContactAt(RR,_localAddress,a,RR->node->now());
+								peer->attemptToContactAt(RR,_localAddress,a,now);
 						}
 					}
 				}	break;
@@ -926,7 +920,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,const Sha
 						TRACE("attempting to contact %s at pushed direct path %s",peer->address().toString().c_str(),a.toString().c_str());
 						if (v6Count++ < ZT_PUSH_DIRECT_PATHS_MAX_ENDPOINTS_PER_TYPE) {
 							if ((!currentBest)||(currentBest->address() != a))
-								peer->attemptToContactAt(RR,_localAddress,a,RR->node->now());
+								peer->attemptToContactAt(RR,_localAddress,a,now);
 						}
 					}
 				}	break;
diff --git a/node/Peer.cpp b/node/Peer.cpp
index f16f1421..d5367b17 100644
--- a/node/Peer.cpp
+++ b/node/Peer.cpp
@@ -55,7 +55,6 @@ Peer::Peer(const Identity &myIdentity,const Identity &peerIdentity)
 	_lastAnnouncedTo(0),
 	_lastPathConfirmationSent(0),
 	_lastDirectPathPushSent(0),
-	_lastDirectPathPushReceived(0),
 	_lastPathSort(0),
 	_vProto(0),
 	_vMajor(0),
diff --git a/node/Peer.hpp b/node/Peer.hpp
index 0aca70b4..04b541af 100644
--- a/node/Peer.hpp
+++ b/node/Peer.hpp
@@ -407,16 +407,6 @@ public:
 	 */
 	bool needsOurNetworkMembershipCertificate(uint64_t nwid,uint64_t now,bool updateLastPushedTime);
 
-	/**
-	 * @return Time last direct path push was received
-	 */
-	inline uint64_t lastDirectPathPushReceived() const throw() { return _lastDirectPathPushReceived; }
-
-	/**
-	 * @param t New time of last direct path push received
-	 */
-	inline void setLastDirectPathPushReceived(uint64_t t) throw() { _lastDirectPathPushReceived = t; }
-
 	/**
 	 * Perform periodic cleaning operations
 	 */
@@ -450,7 +440,7 @@ public:
 		const unsigned int recSizePos = b.size();
 		b.addSize(4); // space for uint32_t field length
 
-		b.append((uint16_t)1); // version of serialized Peer data
+		b.append((uint16_t)0); // version of serialized Peer data
 
 		_id.serialize(b,false);
 
@@ -461,7 +451,6 @@ public:
 		b.append((uint64_t)_lastAnnouncedTo);
 		b.append((uint64_t)_lastPathConfirmationSent);
 		b.append((uint64_t)_lastDirectPathPushSent);
-		b.append((uint64_t)_lastDirectPathPushReceived);
 		b.append((uint64_t)_lastPathSort);
 		b.append((uint16_t)_vProto);
 		b.append((uint16_t)_vMajor);
@@ -513,7 +502,7 @@ public:
 		const unsigned int recSize = b.template at<uint32_t>(p); p += 4;
 		if ((p + recSize) > b.size())
 			return SharedPtr<Peer>(); // size invalid
-		if (b.template at<uint16_t>(p) != 1)
+		if (b.template at<uint16_t>(p) != 0)
 			return SharedPtr<Peer>(); // version mismatch
 		p += 2;
 
@@ -531,7 +520,6 @@ public:
 		np->_lastAnnouncedTo = b.template at<uint64_t>(p); p += 8;
 		np->_lastPathConfirmationSent = b.template at<uint64_t>(p); p += 8;
 		np->_lastDirectPathPushSent = b.template at<uint64_t>(p); p += 8;
-		np->_lastDirectPathPushReceived = b.template at<uint64_t>(p); p += 8;
 		np->_lastPathSort = b.template at<uint64_t>(p); p += 8;
 		np->_vProto = b.template at<uint16_t>(p); p += 2;
 		np->_vMajor = b.template at<uint16_t>(p); p += 2;
@@ -581,7 +569,6 @@ private:
 	uint64_t _lastAnnouncedTo;
 	uint64_t _lastPathConfirmationSent;
 	uint64_t _lastDirectPathPushSent;
-	uint64_t _lastDirectPathPushReceived;
 	uint64_t _lastPathSort;
 	uint16_t _vProto;
 	uint16_t _vMajor;
diff --git a/node/SelfAwareness.cpp b/node/SelfAwareness.cpp
index 1b70f17c..81d19369 100644
--- a/node/SelfAwareness.cpp
+++ b/node/SelfAwareness.cpp
@@ -123,16 +123,16 @@ void SelfAwareness::iam(const Address &reporter,const InetAddress &reporterPhysi
 
 		// For all peers for whom we forgot an address, send a packet indirectly if
 		// they are still considered alive so that we will re-establish direct links.
-		SharedPtr<Peer> sn(RR->topology->getBestRoot());
-		if (sn) {
-			RemotePath *snp = sn->getBestPath(now);
-			if (snp) {
+		SharedPtr<Peer> r(RR->topology->getBestRoot());
+		if (r) {
+			RemotePath *rp = r->getBestPath(now);
+			if (rp) {
 				for(std::vector< SharedPtr<Peer> >::const_iterator p(rset.peersReset.begin());p!=rset.peersReset.end();++p) {
 					if ((*p)->alive(now)) {
-						TRACE("sending indirect NOP to %s via %s(%s) to re-establish link",(*p)->address().toString().c_str(),sn->address().toString().c_str(),snp->address().toString().c_str());
+						TRACE("sending indirect NOP to %s via %s to re-establish link",(*p)->address().toString().c_str(),r->address().toString().c_str());
 						Packet outp((*p)->address(),RR->identity.address(),Packet::VERB_NOP);
 						outp.armor((*p)->key(),true);
-						snp->send(RR,outp.data(),outp.size(),now);
+						rp->send(RR,outp.data(),outp.size(),now);
 					}
 				}
 			}
diff --git a/node/Topology.cpp b/node/Topology.cpp
index 7bb4b449..09668ef5 100644
--- a/node/Topology.cpp
+++ b/node/Topology.cpp
@@ -254,68 +254,8 @@ SharedPtr<Peer> Topology::getBestRoot(const Address *avoid,unsigned int avoidCou
 			return *bestOverall;
 		}
 
-		/*
-		unsigned int l,bestLatency = 65536;
-		uint64_t lds,ldr;
-
-		// First look for a best root by comparing latencies, but exclude
-		// root servers that have not responded to direct messages in order to
-		// try to exclude any that are dead or unreachable.
-		for(std::vector< SharedPtr<Peer> >::const_iterator sn(_rootPeers.begin());sn!=_rootPeers.end();) {
-			// Skip explicitly avoided relays
-			for(unsigned int i=0;i<avoidCount;++i) {
-				if (avoid[i] == (*sn)->address())
-					goto keep_searching_for_roots;
-			}
-
-			// Skip possibly comatose or unreachable relays
-			lds = (*sn)->lastDirectSend();
-			ldr = (*sn)->lastDirectReceive();
-			if ((lds)&&(lds > ldr)&&((lds - ldr) > ZT_PEER_RELAY_CONVERSATION_LATENCY_THRESHOLD))
-				goto keep_searching_for_roots;
-
-			if ((*sn)->hasActiveDirectPath(now)) {
-				l = (*sn)->latency();
-				if (bestRoot) {
-					if ((l)&&(l < bestLatency)) {
-						bestLatency = l;
-						bestRoot = *sn;
-					}
-				} else {
-					if (l)
-						bestLatency = l;
-					bestRoot = *sn;
-				}
-			}
-
-keep_searching_for_roots:
-			++sn;
-		}
-
-		if (bestRoot) {
-			bestRoot->use(now);
-			return bestRoot;
-		} else if (strictAvoid)
-			return SharedPtr<Peer>();
-
-		// If we have nothing from above, just pick one without avoidance criteria.
-		for(std::vector< SharedPtr<Peer> >::const_iterator sn=_rootPeers.begin();sn!=_rootPeers.end();++sn) {
-			if ((*sn)->hasActiveDirectPath(now)) {
-				unsigned int l = (*sn)->latency();
-				if (bestRoot) {
-					if ((l)&&(l < bestLatency)) {
-						bestLatency = l;
-						bestRoot = *sn;
-					}
-				} else {
-					if (l)
-						bestLatency = l;
-					bestRoot = *sn;
-				}
-			}
-		}
-		*/
 	}
+
 	return SharedPtr<Peer>();
 }
 
-- 
cgit v1.2.3


From cc1b275ad97bf186f21b487aa57d7893bee3c956 Mon Sep 17 00:00:00 2001
From: Adam Ierymenko <adam.ierymenko@gmail.com>
Date: Tue, 27 Oct 2015 16:47:13 -0700
Subject: Replicate peer endpoints and forget paths if we have them -- this
 allows two clusters to talk to each other, whereas forgetting all paths does
 not.

---
 node/Cluster.cpp   | 45 +++++++++++++++++++++++++++------------------
 node/Cluster.hpp   |  7 ++++++-
 node/Constants.hpp |  4 ++--
 node/Peer.cpp      | 11 ++++-------
 node/Peer.hpp      | 19 +++++++++++++++++++
 5 files changed, 58 insertions(+), 28 deletions(-)

(limited to 'node/Constants.hpp')

diff --git a/node/Cluster.cpp b/node/Cluster.cpp
index b2f3d585..0797d83d 100644
--- a/node/Cluster.cpp
+++ b/node/Cluster.cpp
@@ -210,22 +210,30 @@ void Cluster::handleIncomingStateMessage(const void *msg,unsigned int len)
 						}	break;
 
 						case STATE_MESSAGE_HAVE_PEER: {
-							try {
-								Identity id;
-								ptr += id.deserialize(dmsg,ptr);
-								if (id) {
-									RR->topology->saveIdentity(id);
-									{
-										Mutex::Lock _l2(_peerAffinities_m);
-										_PA &pa = _peerAffinities[id.address()];
-										pa.ts = RR->node->now();
-										pa.mid = fromMemberId;
-			 						}
-			 						TRACE("[%u] has %s",(unsigned int)fromMemberId,id.address().toString().c_str());
-			 					}
-							} catch ( ... ) {
-								// ignore invalid identities
-							}
+							Identity id;
+							InetAddress physicalAddress;
+							ptr += id.deserialize(dmsg,ptr);
+							ptr += physicalAddress.deserialize(dmsg,ptr);
+							if (id) {
+								// Forget any paths that we have to this peer at its address
+								if (physicalAddress) {
+									SharedPtr<Peer> myPeerRecord(RR->topology->getPeer(id.address()));
+									if (myPeerRecord)
+										myPeerRecord->removePathByAddress(physicalAddress);
+								}
+
+								// Always save identity to update file time
+								RR->topology->saveIdentity(id);
+
+								// Set peer affinity to its new home
+								{
+									Mutex::Lock _l2(_peerAffinities_m);
+									_PA &pa = _peerAffinities[id.address()];
+									pa.ts = RR->node->now();
+									pa.mid = fromMemberId;
+		 						}
+		 						TRACE("[%u] has %s @ %s",(unsigned int)fromMemberId,id.address().toString().c_str(),physicalAddress.toString().c_str());
+		 					}
 						}	break;
 
 						case STATE_MESSAGE_MULTICAST_LIKE: {
@@ -396,7 +404,7 @@ bool Cluster::sendViaCluster(const Address &fromPeerAddress,const Address &toPee
 	return true;
 }
 
-void Cluster::replicateHavePeer(const Identity &peerId)
+void Cluster::replicateHavePeer(const Identity &peerId,const InetAddress &physicalAddress)
 {
 	const uint64_t now = RR->node->now();
 	{	// Use peer affinity table to track our own last announce time for peers
@@ -405,7 +413,7 @@ void Cluster::replicateHavePeer(const Identity &peerId)
 		if (pa.mid != _id) {
 			pa.ts = now;
 			pa.mid = _id;
-		} else if ((now - pa.ts) >= ZT_CLUSTER_HAVE_PEER_ANNOUNCE_PERIOD) {
+		} else if ((now - pa.ts) < ZT_CLUSTER_HAVE_PEER_ANNOUNCE_PERIOD) {
 			return;
 		} else {
 			pa.ts = now;
@@ -415,6 +423,7 @@ void Cluster::replicateHavePeer(const Identity &peerId)
 	// announcement
 	Buffer<4096> buf;
 	peerId.serialize(buf,false);
+	physicalAddress.serialize(buf);
 	{
 		Mutex::Lock _l(_memberIds_m);
 		for(std::vector<uint16_t>::const_iterator mid(_memberIds.begin());mid!=_memberIds.end();++mid) {
diff --git a/node/Cluster.hpp b/node/Cluster.hpp
index 42a26c7f..c3367d57 100644
--- a/node/Cluster.hpp
+++ b/node/Cluster.hpp
@@ -117,6 +117,10 @@ public:
 		/**
 		 * Cluster member has this peer:
 		 *   <[...] binary serialized peer identity>
+		 *   <[...] binary serialized peer remote physical address>
+		 *
+		 * Clusters send this message when they learn a path to a peer. The
+		 * replicated physical address is the one learned.
 		 */
 		STATE_MESSAGE_HAVE_PEER = 2,
 
@@ -225,8 +229,9 @@ public:
 	 * Advertise to the cluster that we have this peer
 	 *
 	 * @param peerId Identity of peer that we have
+	 * @param physicalAddress Physical address of peer (from our POV)
 	 */
-	void replicateHavePeer(const Identity &peerId);
+	void replicateHavePeer(const Identity &peerId,const InetAddress &physicalAddress);
 
 	/**
 	 * Advertise a multicast LIKE to the cluster
diff --git a/node/Constants.hpp b/node/Constants.hpp
index bef1183a..4b06db44 100644
--- a/node/Constants.hpp
+++ b/node/Constants.hpp
@@ -317,7 +317,7 @@
 /**
  * Minimum delay between attempts to confirm new paths to peers (to avoid HELLO flooding)
  */
-#define ZT_MIN_PATH_CONFIRMATION_INTERVAL 5000
+#define ZT_MIN_PATH_CONFIRMATION_INTERVAL 1000
 
 /**
  * Interval between direct path pushes in milliseconds
@@ -350,7 +350,7 @@
 /**
  * Maximum number of endpoints to contact per address type (to limit pushes like GitHub issue #235)
  */
-#define ZT_PUSH_DIRECT_PATHS_MAX_ENDPOINTS_PER_TYPE 2
+#define ZT_PUSH_DIRECT_PATHS_MAX_ENDPOINTS_PER_TYPE 4
 
 /**
  * A test pseudo-network-ID that can be joined
diff --git a/node/Peer.cpp b/node/Peer.cpp
index 99e2156e..99eb32c7 100644
--- a/node/Peer.cpp
+++ b/node/Peer.cpp
@@ -140,13 +140,10 @@ void Peer::received(
 			_lastMulticastFrame = now;
 
 #ifdef ZT_ENABLE_CLUSTER
-		// If we're in cluster mode and there's a better endpoint, stop here and don't
-		// learn or confirm paths. Also reset any existing paths, since they should
-		// go there and no longer talk to us here.
-		if (redirectTo) {
-			_numPaths = 0;
+		// If we think this peer belongs elsewhere, don't learn this path or
+		// do other connection init stuff.
+		if (redirectTo)
 			return;
-		}
 #endif
 
 		if ((now - _lastAnnouncedTo) >= ((ZT_MULTICAST_LIKE_EXPIRE / 2) - 1000)) {
@@ -206,7 +203,7 @@ void Peer::received(
 
 #ifdef ZT_ENABLE_CLUSTER
 	if ((RR->cluster)&&(pathIsConfirmed))
-		RR->cluster->replicateHavePeer(_id);
+		RR->cluster->replicateHavePeer(_id,remoteAddr);
 #endif
 
 	if (needMulticastGroupAnnounce) {
diff --git a/node/Peer.hpp b/node/Peer.hpp
index aa75b3f4..69343f20 100644
--- a/node/Peer.hpp
+++ b/node/Peer.hpp
@@ -412,6 +412,25 @@ public:
 	 */
 	void clean(const RuntimeEnvironment *RR,uint64_t now);
 
+	/**
+	 * Remove all paths with this remote address
+	 *
+	 * @param addr Remote address to remove
+	 */
+	inline void removePathByAddress(const InetAddress &addr)
+	{
+		Mutex::Lock _l(_lock);
+		unsigned int np = _numPaths;
+		unsigned int x = 0;
+		unsigned int y = 0;
+		while (x < np) {
+			if (_paths[x].address() != addr)
+				_paths[y++] = _paths[x];
+			++x;
+		}
+		_numPaths = y;
+	}
+
 	/**
 	 * Find a common set of addresses by which two peers can link, if any
 	 *
-- 
cgit v1.2.3


From cdc99bfee10ac58a8dab1aabcb85e69f3862b7ad Mon Sep 17 00:00:00 2001
From: Adam Ierymenko <adam.ierymenko@gmail.com>
Date: Tue, 27 Oct 2015 18:18:26 -0700
Subject: Add a circuit breaker for VERB_PUSH_DIRECT_PATHS.

---
 node/Constants.hpp      | 26 ++++++++++++++++++++------
 node/IncomingPacket.cpp |  5 +++++
 node/Peer.cpp           |  2 ++
 node/Peer.hpp           | 31 +++++++++++++++++++++++++++++--
 4 files changed, 56 insertions(+), 8 deletions(-)

(limited to 'node/Constants.hpp')

diff --git a/node/Constants.hpp b/node/Constants.hpp
index 4b06db44..53cc64c7 100644
--- a/node/Constants.hpp
+++ b/node/Constants.hpp
@@ -319,11 +319,6 @@
  */
 #define ZT_MIN_PATH_CONFIRMATION_INTERVAL 1000
 
-/**
- * Interval between direct path pushes in milliseconds
- */
-#define ZT_DIRECT_PATH_PUSH_INTERVAL 120000
-
 /**
  * How long (max) to remember network certificates of membership?
  *
@@ -347,10 +342,29 @@
  */
 #define ZT_MAX_BRIDGE_SPAM 16
 
+/**
+ * Interval between direct path pushes in milliseconds
+ */
+#define ZT_DIRECT_PATH_PUSH_INTERVAL 120000
+
 /**
  * Maximum number of endpoints to contact per address type (to limit pushes like GitHub issue #235)
  */
-#define ZT_PUSH_DIRECT_PATHS_MAX_ENDPOINTS_PER_TYPE 4
+#define ZT_PUSH_DIRECT_PATHS_MAX_ENDPOINTS_PER_TYPE 5
+
+/**
+ * Time horizon for push direct paths cutoff
+ */
+#define ZT_PUSH_DIRECT_PATHS_CUTOFF_TIME 60000
+
+/**
+ * Maximum number of direct path pushes within cutoff time
+ *
+ * This limits response to PUSH_DIRECT_PATHS to CUTOFF_LIMIT responses
+ * per CUTOFF_TIME milliseconds per peer to prevent this from being
+ * useful for DOS amplification attacks.
+ */
+#define ZT_PUSH_DIRECT_PATHS_CUTOFF_LIMIT 5
 
 /**
  * A test pseudo-network-ID that can be joined
diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp
index b1a9af3b..e985b34c 100644
--- a/node/IncomingPacket.cpp
+++ b/node/IncomingPacket.cpp
@@ -901,6 +901,11 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,const Sha
 {
 	try {
 		const uint64_t now = RR->node->now();
+		if (!peer->shouldRespondToDirectPathPush(now)) {
+			TRACE("dropped PUSH_DIRECT_PATHS from %s(%s): circuit breaker tripped",source().toString().c_str(),_remoteAddress.toString().c_str());
+			return true;
+		}
+
 		const Path *currentBest = peer->getBestPath(now);
 
 		unsigned int count = at<uint16_t>(ZT_PACKET_IDX_PAYLOAD);
diff --git a/node/Peer.cpp b/node/Peer.cpp
index 99eb32c7..976c7c44 100644
--- a/node/Peer.cpp
+++ b/node/Peer.cpp
@@ -55,6 +55,7 @@ Peer::Peer(const Identity &myIdentity,const Identity &peerIdentity)
 	_lastAnnouncedTo(0),
 	_lastPathConfirmationSent(0),
 	_lastDirectPathPushSent(0),
+	_lastDirectPathPushReceive(0),
 	_lastPathSort(0),
 	_vProto(0),
 	_vMajor(0),
@@ -63,6 +64,7 @@ Peer::Peer(const Identity &myIdentity,const Identity &peerIdentity)
 	_id(peerIdentity),
 	_numPaths(0),
 	_latency(0),
+	_directPathPushCutoffCount(0),
 	_networkComs(4),
 	_lastPushedComs(4)
 {
diff --git a/node/Peer.hpp b/node/Peer.hpp
index 69343f20..d9ef5fcb 100644
--- a/node/Peer.hpp
+++ b/node/Peer.hpp
@@ -431,6 +431,27 @@ public:
 		_numPaths = y;
 	}
 
+	/**
+	 * Update direct path push stats and return true if we should respond
+	 *
+	 * This is a circuit breaker to make VERB_PUSH_DIRECT_PATHS not particularly
+	 * useful as a DDOS amplification attack vector. Otherwise a malicious peer
+	 * could send loads of these and cause others to bombard arbitrary IPs with
+	 * traffic.
+	 *
+	 * @param now Current time
+	 * @return True if we should respond
+	 */
+	inline bool shouldRespondToDirectPathPush(const uint64_t now)
+	{
+		Mutex::Lock _l(_lock);
+		if ((now - _lastDirectPathPushReceive) <= ZT_PUSH_DIRECT_PATHS_CUTOFF_TIME)
+			++_directPathPushCutoffCount;
+		else _directPathPushCutoffCount = 0;
+		_lastDirectPathPushReceive = now;
+		return (_directPathPushCutoffCount >= ZT_PUSH_DIRECT_PATHS_CUTOFF_LIMIT);
+	}
+
 	/**
 	 * Find a common set of addresses by which two peers can link, if any
 	 *
@@ -459,7 +480,7 @@ public:
 		const unsigned int recSizePos = b.size();
 		b.addSize(4); // space for uint32_t field length
 
-		b.append((uint16_t)0); // version of serialized Peer data
+		b.append((uint16_t)1); // version of serialized Peer data
 
 		_id.serialize(b,false);
 
@@ -470,12 +491,14 @@ public:
 		b.append((uint64_t)_lastAnnouncedTo);
 		b.append((uint64_t)_lastPathConfirmationSent);
 		b.append((uint64_t)_lastDirectPathPushSent);
+		b.append((uint64_t)_lastDirectPathPushReceive);
 		b.append((uint64_t)_lastPathSort);
 		b.append((uint16_t)_vProto);
 		b.append((uint16_t)_vMajor);
 		b.append((uint16_t)_vMinor);
 		b.append((uint16_t)_vRevision);
 		b.append((uint32_t)_latency);
+		b.append((uint16_t)_directPathPushCutoffCount);
 
 		b.append((uint16_t)_numPaths);
 		for(unsigned int i=0;i<_numPaths;++i)
@@ -521,7 +544,7 @@ public:
 		const unsigned int recSize = b.template at<uint32_t>(p); p += 4;
 		if ((p + recSize) > b.size())
 			return SharedPtr<Peer>(); // size invalid
-		if (b.template at<uint16_t>(p) != 0)
+		if (b.template at<uint16_t>(p) != 1)
 			return SharedPtr<Peer>(); // version mismatch
 		p += 2;
 
@@ -539,12 +562,14 @@ public:
 		np->_lastAnnouncedTo = b.template at<uint64_t>(p); p += 8;
 		np->_lastPathConfirmationSent = b.template at<uint64_t>(p); p += 8;
 		np->_lastDirectPathPushSent = b.template at<uint64_t>(p); p += 8;
+		np->_lastDirectPathPushReceive = b.template at<uint64_t>(p); p += 8;
 		np->_lastPathSort = b.template at<uint64_t>(p); p += 8;
 		np->_vProto = b.template at<uint16_t>(p); p += 2;
 		np->_vMajor = b.template at<uint16_t>(p); p += 2;
 		np->_vMinor = b.template at<uint16_t>(p); p += 2;
 		np->_vRevision = b.template at<uint16_t>(p); p += 2;
 		np->_latency = b.template at<uint32_t>(p); p += 4;
+		np->_directPathPushCutoffCount = b.template at<uint16_t>(p); p += 2;
 
 		const unsigned int numPaths = b.template at<uint16_t>(p); p += 2;
 		for(unsigned int i=0;i<numPaths;++i) {
@@ -588,6 +613,7 @@ private:
 	uint64_t _lastAnnouncedTo;
 	uint64_t _lastPathConfirmationSent;
 	uint64_t _lastDirectPathPushSent;
+	uint64_t _lastDirectPathPushReceive;
 	uint64_t _lastPathSort;
 	uint16_t _vProto;
 	uint16_t _vMajor;
@@ -597,6 +623,7 @@ private:
 	Path _paths[ZT_MAX_PEER_NETWORK_PATHS];
 	unsigned int _numPaths;
 	unsigned int _latency;
+	unsigned int _directPathPushCutoffCount;
 
 	struct _NetworkCom
 	{
-- 
cgit v1.2.3


From da9371284625d2481ed496921505c8afd2dae1f0 Mon Sep 17 00:00:00 2001
From: Adam Ierymenko <adam.ierymenko@gmail.com>
Date: Wed, 28 Oct 2015 09:11:30 -0700
Subject: Clean up PUSH_DIRECT_PATH limits a bit more and make them a bit
 smarter.

---
 node/Constants.hpp      | 10 +++++-----
 node/IncomingPacket.cpp | 25 +++++++++++++++----------
 node/InetAddress.hpp    |  8 +++++++-
 3 files changed, 27 insertions(+), 16 deletions(-)

(limited to 'node/Constants.hpp')

diff --git a/node/Constants.hpp b/node/Constants.hpp
index 53cc64c7..5a4d4a4d 100644
--- a/node/Constants.hpp
+++ b/node/Constants.hpp
@@ -347,11 +347,6 @@
  */
 #define ZT_DIRECT_PATH_PUSH_INTERVAL 120000
 
-/**
- * Maximum number of endpoints to contact per address type (to limit pushes like GitHub issue #235)
- */
-#define ZT_PUSH_DIRECT_PATHS_MAX_ENDPOINTS_PER_TYPE 5
-
 /**
  * Time horizon for push direct paths cutoff
  */
@@ -366,6 +361,11 @@
  */
 #define ZT_PUSH_DIRECT_PATHS_CUTOFF_LIMIT 5
 
+/**
+ * Maximum number of paths per IP scope (e.g. global, link-local) and family (e.g. v4/v6)
+ */
+#define ZT_PUSH_DIRECT_PATHS_MAX_PER_SCOPE_AND_FAMILY 1
+
 /**
  * A test pseudo-network-ID that can be joined
  *
diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp
index e985b34c..f06eb30c 100644
--- a/node/IncomingPacket.cpp
+++ b/node/IncomingPacket.cpp
@@ -901,16 +901,19 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,const Sha
 {
 	try {
 		const uint64_t now = RR->node->now();
+
+		// First, subject this to a rate limit
 		if (!peer->shouldRespondToDirectPathPush(now)) {
 			TRACE("dropped PUSH_DIRECT_PATHS from %s(%s): circuit breaker tripped",source().toString().c_str(),_remoteAddress.toString().c_str());
 			return true;
 		}
 
-		const Path *currentBest = peer->getBestPath(now);
+		// Second, limit addresses by scope and type
+		uint8_t countPerScope[ZT_INETADDRESS_MAX_SCOPE+1][2]; // [][0] is v4, [][1] is v6
+		memset(countPerScope,0,sizeof(countPerScope));
 
 		unsigned int count = at<uint16_t>(ZT_PACKET_IDX_PAYLOAD);
 		unsigned int ptr = ZT_PACKET_IDX_PAYLOAD + 2;
-		unsigned int v4Count = 0,v6Count = 0;
 
 		while (count--) { // if ptr overflows Buffer will throw
 			// TODO: some flags are not yet implemented
@@ -925,20 +928,22 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,const Sha
 				case 4: {
 					InetAddress a(field(ptr,4),4,at<uint16_t>(ptr + 4));
 					if ( ((flags & 0x01) == 0) && (Path::isAddressValidForPath(a)) ) {
-						TRACE("attempting to contact %s at pushed direct path %s",peer->address().toString().c_str(),a.toString().c_str());
-						if (v4Count++ < ZT_PUSH_DIRECT_PATHS_MAX_ENDPOINTS_PER_TYPE) {
-							if ((!currentBest)||(currentBest->address() != a))
-								peer->attemptToContactAt(RR,_localAddress,a,now);
+						if (++countPerScope[(int)a.ipScope()][0] <= ZT_PUSH_DIRECT_PATHS_MAX_PER_SCOPE_AND_FAMILY) {
+							TRACE("attempting to contact %s at pushed direct path %s",peer->address().toString().c_str(),a.toString().c_str());
+							peer->attemptToContactAt(RR,_localAddress,a,now);
+						} else {
+							TRACE("ignoring contact for %s at %s -- too many per scope",peer->address().toString().c_str(),a.toString().c_str());
 						}
 					}
 				}	break;
 				case 6: {
 					InetAddress a(field(ptr,16),16,at<uint16_t>(ptr + 16));
 					if ( ((flags & 0x01) == 0) && (Path::isAddressValidForPath(a)) ) {
-						TRACE("attempting to contact %s at pushed direct path %s",peer->address().toString().c_str(),a.toString().c_str());
-						if (v6Count++ < ZT_PUSH_DIRECT_PATHS_MAX_ENDPOINTS_PER_TYPE) {
-							if ((!currentBest)||(currentBest->address() != a))
-								peer->attemptToContactAt(RR,_localAddress,a,now);
+						if (++countPerScope[(int)a.ipScope()][1] <= ZT_PUSH_DIRECT_PATHS_MAX_PER_SCOPE_AND_FAMILY) {
+							TRACE("attempting to contact %s at pushed direct path %s",peer->address().toString().c_str(),a.toString().c_str());
+							peer->attemptToContactAt(RR,_localAddress,a,now);
+						} else {
+							TRACE("ignoring contact for %s at %s -- too many per scope",peer->address().toString().c_str(),a.toString().c_str());
 						}
 					}
 				}	break;
diff --git a/node/InetAddress.hpp b/node/InetAddress.hpp
index fcbed4b1..5e5eb06e 100644
--- a/node/InetAddress.hpp
+++ b/node/InetAddress.hpp
@@ -42,6 +42,11 @@
 
 namespace ZeroTier {
 
+/**
+ * Maximum integer value of enum IpScope
+ */
+#define ZT_INETADDRESS_MAX_SCOPE 7
+
 /**
  * Extends sockaddr_storage with friendly C++ methods
  *
@@ -66,7 +71,8 @@ struct InetAddress : public sockaddr_storage
 	 * IP address scope
 	 *
 	 * Note that these values are in ascending order of path preference and
-	 * MUST remain that way or Path must be changed to reflect.
+	 * MUST remain that way or Path must be changed to reflect. Also be sure
+	 * to change ZT_INETADDRESS_MAX_SCOPE if the max changes.
 	 */
 	enum IpScope
 	{
-- 
cgit v1.2.3


From b6725c44150af306130d38a2875ab31a640607c8 Mon Sep 17 00:00:00 2001
From: Adam Ierymenko <adam.ierymenko@gmail.com>
Date: Fri, 30 Oct 2015 11:48:33 -0700
Subject: Optimize AntiRecursion.

---
 node/AntiRecursion.hpp | 66 ++++++++++++++++++++++++++++++++------------------
 node/Constants.hpp     |  5 ----
 2 files changed, 42 insertions(+), 29 deletions(-)

(limited to 'node/Constants.hpp')

diff --git a/node/AntiRecursion.hpp b/node/AntiRecursion.hpp
index 4bb24bf3..8629d19a 100644
--- a/node/AntiRecursion.hpp
+++ b/node/AntiRecursion.hpp
@@ -35,28 +35,28 @@
 
 namespace ZeroTier {
 
-#define ZT_ANTIRECURSION_TAIL_LEN 128
+/**
+ * Size of anti-recursion history
+ */
+#define ZT_ANTIRECURSION_HISTORY_SIZE 16
 
 /**
  * Filter to prevent recursion (ZeroTier-over-ZeroTier)
  *
  * This works by logging ZeroTier packets that we send. It's then invoked
- * again against packets read from local Ethernet taps. If the last N
+ * again against packets read from local Ethernet taps. If the last 32
  * bytes representing the ZeroTier packet match in the tap frame, then
  * the frame is a re-injection of a frame that we sent and is rejected.
  *
  * This means that ZeroTier packets simply will not traverse ZeroTier
  * networks, which would cause all sorts of weird problems.
  *
- * NOTE: this is applied to low-level packets before they are sent to
- * SocketManager and/or sockets, not to fully assembled packets before
- * (possible) fragmentation.
+ * This is highly optimized code since it's checked for every packet.
  */
 class AntiRecursion
 {
 public:
 	AntiRecursion()
-		throw()
 	{
 		memset(_history,0,sizeof(_history));
 		_ptr = 0;
@@ -68,13 +68,20 @@ public:
 	 * @param data ZT packet data
 	 * @param len Length of packet
 	 */
-	inline void logOutgoingZT(const void *data,unsigned int len)
-		throw()
+	inline void logOutgoingZT(const void *const data,const unsigned int len)
 	{
-		ArItem *i = &(_history[_ptr++ % ZT_ANTIRECURSION_HISTORY_SIZE]);
-		const unsigned int tl = (len > ZT_ANTIRECURSION_TAIL_LEN) ? ZT_ANTIRECURSION_TAIL_LEN : len;
-		memcpy(i->tail,((const unsigned char *)data) + (len - tl),tl);
-		i->len = tl;
+		if (len < 32)
+			return;
+#ifdef ZT_NO_TYPE_PUNNING
+		memcpy(_history[++_ptr % ZT_ANTIRECURSION_HISTORY_SIZE].tail,reinterpret_cast<const uint8_t *>(data) + (len - 32),32);
+#else
+		uint64_t *t = _history[++_ptr % ZT_ANTIRECURSION_HISTORY_SIZE].tail;
+		const uint64_t *p = reinterpret_cast<const uint64_t *>(reinterpret_cast<const uint8_t *>(data) + (len - 32));
+		*(t++) = *(p++);
+		*(t++) = *(p++);
+		*(t++) = *(p++);
+		*t = *p;
+#endif
 	}
 
 	/**
@@ -84,25 +91,36 @@ public:
 	 * @param len Length of frame
 	 * @return True if frame is OK to be passed, false if it's a ZT frame that we sent
 	 */
-	inline bool checkEthernetFrame(const void *data,unsigned int len)
-		throw()
+	inline bool checkEthernetFrame(const void *const data,const unsigned int len) const
 	{
-		for(unsigned int h=0;h<ZT_ANTIRECURSION_HISTORY_SIZE;++h) {
-			ArItem *i = &(_history[h]);
-			if ((i->len > 0)&&(len >= i->len)&&(!memcmp(((const unsigned char *)data) + (len - i->len),i->tail,i->len)))
+		if (len < 32)
+			return true;
+		const uint8_t *const pp = reinterpret_cast<const uint8_t *>(data) + (len - 32);
+		const _ArItem *i = _history;
+		const _ArItem *const end = i + ZT_ANTIRECURSION_HISTORY_SIZE;
+		while (i != end) {
+#ifdef ZT_NO_TYPE_PUNNING
+			if (!memcmp(pp,i->tail,32))
 				return false;
+#else
+			const uint64_t *t = i->tail;
+			const uint64_t *p = reinterpret_cast<const uint64_t *>(pp);
+			uint64_t bits = *(t++) ^ *(p++);
+			bits |= *(t++) ^ *(p++);
+			bits |= *(t++) ^ *(p++);
+			bits |= *t ^ *p;
+			if (!bits)
+				return false;
+#endif
+			++i;
 		}
 		return true;
 	}
 
 private:
-	struct ArItem
-	{
-		unsigned char tail[ZT_ANTIRECURSION_TAIL_LEN];
-		unsigned int len;
-	};
-	ArItem _history[ZT_ANTIRECURSION_HISTORY_SIZE];
-	volatile unsigned int _ptr;
+	struct _ArItem { uint64_t tail[4]; };
+	_ArItem _history[ZT_ANTIRECURSION_HISTORY_SIZE];
+	volatile unsigned long _ptr;
 };
 
 } // namespace ZeroTier
diff --git a/node/Constants.hpp b/node/Constants.hpp
index 5a4d4a4d..1d5fa6f4 100644
--- a/node/Constants.hpp
+++ b/node/Constants.hpp
@@ -309,11 +309,6 @@
  */
 #define ZT_NAT_T_TACTICAL_ESCALATION_DELAY 1000
 
-/**
- * Size of anti-recursion history (see AntiRecursion.hpp)
- */
-#define ZT_ANTIRECURSION_HISTORY_SIZE 16
-
 /**
  * Minimum delay between attempts to confirm new paths to peers (to avoid HELLO flooding)
  */
-- 
cgit v1.2.3


From 60ce886605c0298fc22dbce48beb106a96bd35e2 Mon Sep 17 00:00:00 2001
From: Adam Ierymenko <adam.ierymenko@gmail.com>
Date: Mon, 2 Nov 2015 15:15:20 -0800
Subject: Tweak some timings for better reliability.

---
 node/Cluster.cpp             |   6 +-
 node/Cluster.hpp             |   8 +-
 node/Constants.hpp           |  24 +----
 node/Multicaster.cpp         | 220 ++++++++++++++++++++++---------------------
 node/Node.cpp                |  13 +--
 tests/http/big-test-kill.sh  |   2 +-
 tests/http/big-test-ready.sh |   2 +-
 tests/http/big-test-start.sh |   4 +-
 8 files changed, 131 insertions(+), 148 deletions(-)

(limited to 'node/Constants.hpp')

diff --git a/node/Cluster.cpp b/node/Cluster.cpp
index d0daae43..e9e31ede 100644
--- a/node/Cluster.cpp
+++ b/node/Cluster.cpp
@@ -85,7 +85,8 @@ Cluster::Cluster(
 	_members(new _Member[ZT_CLUSTER_MAX_MEMBERS]),
 	_peerAffinities(65536),
 	_lastCleanedPeerAffinities(0),
-	_lastCheckedPeersForAnnounce(0)
+	_lastCheckedPeersForAnnounce(0),
+	_lastFlushed(0)
 {
 	uint16_t stmp[ZT_SHA512_DIGEST_LEN / sizeof(uint16_t)];
 
@@ -510,7 +511,8 @@ void Cluster::doPeriodicTasks()
 	}
 
 	// Flush outgoing packet send queue every doPeriodicTasks()
-	{
+	if ((now - _lastFlushed) >= ZT_CLUSTER_FLUSH_PERIOD) {
+		_lastFlushed = now;
 		Mutex::Lock _l(_memberIds_m);
 		for(std::vector<uint16_t>::const_iterator mid(_memberIds.begin());mid!=_memberIds.end();++mid) {
 			Mutex::Lock _l2(_members[*mid].lock);
diff --git a/node/Cluster.hpp b/node/Cluster.hpp
index 7d7a1ced..f1caa436 100644
--- a/node/Cluster.hpp
+++ b/node/Cluster.hpp
@@ -55,13 +55,18 @@
 /**
  * How often should we announce that we have a peer?
  */
-#define ZT_CLUSTER_HAVE_PEER_ANNOUNCE_PERIOD ((ZT_PEER_ACTIVITY_TIMEOUT / 2) - 1000)
+#define ZT_CLUSTER_HAVE_PEER_ANNOUNCE_PERIOD (ZT_PEER_DIRECT_PING_DELAY / 2)
 
 /**
  * Desired period between doPeriodicTasks() in milliseconds
  */
 #define ZT_CLUSTER_PERIODIC_TASK_PERIOD 250
 
+/**
+ * How often to flush outgoing message queues (maximum interval)
+ */
+#define ZT_CLUSTER_FLUSH_PERIOD 500
+
 namespace ZeroTier {
 
 class RuntimeEnvironment;
@@ -355,6 +360,7 @@ private:
 
 	uint64_t _lastCleanedPeerAffinities;
 	uint64_t _lastCheckedPeersForAnnounce;
+	uint64_t _lastFlushed;
 };
 
 } // namespace ZeroTier
diff --git a/node/Constants.hpp b/node/Constants.hpp
index 1d5fa6f4..bb62484d 100644
--- a/node/Constants.hpp
+++ b/node/Constants.hpp
@@ -173,13 +173,8 @@
 
 /**
  * Timeout for receipt of fragmented packets in ms
- *
- * Since there's no retransmits, this is just a really bad case scenario for
- * transit time. It's short enough that a DOS attack from exhausing buffers is
- * very unlikely, as the transfer rate would have to be fast enough to fill
- * system memory in this time.
  */
-#define ZT_FRAGMENTED_PACKET_RECEIVE_TIMEOUT 1000
+#define ZT_FRAGMENTED_PACKET_RECEIVE_TIMEOUT 500
 
 /**
  * Length of secret key in bytes -- 256-bit -- do not change
@@ -194,7 +189,7 @@
 /**
  * Overriding granularity for timer tasks to prevent CPU-intensive thrashing on every packet
  */
-#define ZT_CORE_TIMER_TASK_GRANULARITY 1000
+#define ZT_CORE_TIMER_TASK_GRANULARITY 500
 
 /**
  * How long to remember peer records in RAM if they haven't been used
@@ -269,7 +264,7 @@
 /**
  * Delay between ordinary case pings of direct links
  */
-#define ZT_PEER_DIRECT_PING_DELAY 120000
+#define ZT_PEER_DIRECT_PING_DELAY 60000
 
 /**
  * Delay between requests for updated network autoconf information
@@ -279,18 +274,7 @@
 /**
  * Timeout for overall peer activity (measured from last receive)
  */
-#define ZT_PEER_ACTIVITY_TIMEOUT (ZT_PEER_DIRECT_PING_DELAY + (ZT_PING_CHECK_INVERVAL * 3))
-
-/**
- * Stop relaying via peers that have not responded to direct sends
- *
- * When we send something (including frames), we generally expect a response.
- * Switching relays if no response in a short period of time causes more
- * rapid failover if a root server goes down or becomes unreachable. In the
- * mistaken case, little harm is done as it'll pick the next-fastest
- * root server and will switch back eventually.
- */
-#define ZT_PEER_RELAY_CONVERSATION_LATENCY_THRESHOLD 10000
+#define ZT_PEER_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 3) + (ZT_PING_CHECK_INVERVAL * 2))
 
 /**
  * Minimum interval between attempts by relays to unite peers
diff --git a/node/Multicaster.cpp b/node/Multicaster.cpp
index e43d7d88..01e6b799 100644
--- a/node/Multicaster.cpp
+++ b/node/Multicaster.cpp
@@ -175,128 +175,130 @@ void Multicaster::send(
 	unsigned long idxbuf[8194];
 	unsigned long *indexes = idxbuf;
 
-	Mutex::Lock _l(_groups_m);
-	MulticastGroupStatus &gs = _groups[Multicaster::Key(nwid,mg)];
-
-	if (!gs.members.empty()) {
-		// Allocate a memory buffer if group is monstrous
-		if (gs.members.size() > (sizeof(idxbuf) / sizeof(unsigned long)))
-			indexes = new unsigned long[gs.members.size()];
-
-		// Generate a random permutation of member indexes
-		for(unsigned long i=0;i<gs.members.size();++i)
-			indexes[i] = i;
-		for(unsigned long i=(unsigned long)gs.members.size()-1;i>0;--i) {
-			unsigned long j = (unsigned long)RR->node->prng() % (i + 1);
-			unsigned long tmp = indexes[j];
-			indexes[j] = indexes[i];
-			indexes[i] = tmp;
+	try {
+		Mutex::Lock _l(_groups_m);
+		MulticastGroupStatus &gs = _groups[Multicaster::Key(nwid,mg)];
+
+		if (!gs.members.empty()) {
+			// Allocate a memory buffer if group is monstrous
+			if (gs.members.size() > (sizeof(idxbuf) / sizeof(unsigned long)))
+				indexes = new unsigned long[gs.members.size()];
+
+			// Generate a random permutation of member indexes
+			for(unsigned long i=0;i<gs.members.size();++i)
+				indexes[i] = i;
+			for(unsigned long i=(unsigned long)gs.members.size()-1;i>0;--i) {
+				unsigned long j = (unsigned long)RR->node->prng() % (i + 1);
+				unsigned long tmp = indexes[j];
+				indexes[j] = indexes[i];
+				indexes[i] = tmp;
+			}
 		}
-	}
 
-	if (gs.members.size() >= limit) {
-		// Skip queue if we already have enough members to complete the send operation
-		OutboundMulticast out;
-
-		out.init(
-			RR,
-			now,
-			nwid,
-			com,
-			limit,
-			1, // we'll still gather a little from peers to keep multicast list fresh
-			src,
-			mg,
-			etherType,
-			data,
-			len);
-
-		unsigned int count = 0;
-
-		for(std::vector<Address>::const_iterator ast(alwaysSendTo.begin());ast!=alwaysSendTo.end();++ast) {
-			if (*ast != RR->identity.address()) {
-				out.sendOnly(RR,*ast);
-				if (++count >= limit)
-					break;
+		if (gs.members.size() >= limit) {
+			// Skip queue if we already have enough members to complete the send operation
+			OutboundMulticast out;
+
+			out.init(
+				RR,
+				now,
+				nwid,
+				com,
+				limit,
+				1, // we'll still gather a little from peers to keep multicast list fresh
+				src,
+				mg,
+				etherType,
+				data,
+				len);
+
+			unsigned int count = 0;
+
+			for(std::vector<Address>::const_iterator ast(alwaysSendTo.begin());ast!=alwaysSendTo.end();++ast) {
+				if (*ast != RR->identity.address()) {
+					out.sendOnly(RR,*ast); // optimization: don't use dedup log if it's a one-pass send
+					if (++count >= limit)
+						break;
+				}
 			}
-		}
 
-		unsigned long idx = 0;
-		while ((count < limit)&&(idx < gs.members.size())) {
-			Address ma(gs.members[indexes[idx++]].address);
-			if (std::find(alwaysSendTo.begin(),alwaysSendTo.end(),ma) == alwaysSendTo.end()) {
-				out.sendOnly(RR,ma);
-				++count;
+			unsigned long idx = 0;
+			while ((count < limit)&&(idx < gs.members.size())) {
+				Address ma(gs.members[indexes[idx++]].address);
+				if (std::find(alwaysSendTo.begin(),alwaysSendTo.end(),ma) == alwaysSendTo.end()) {
+					out.sendOnly(RR,ma); // optimization: don't use dedup log if it's a one-pass send
+					++count;
+				}
 			}
-		}
-	} else {
-		unsigned int gatherLimit = (limit - (unsigned int)gs.members.size()) + 1;
-
-		if ((now - gs.lastExplicitGather) >= ZT_MULTICAST_EXPLICIT_GATHER_DELAY) {
-			gs.lastExplicitGather = now;
-			SharedPtr<Peer> r(RR->topology->getBestRoot());
-			if (r) {
-				TRACE(">>MC upstream GATHER up to %u for group %.16llx/%s",gatherLimit,nwid,mg.toString().c_str());
-
-				const CertificateOfMembership *com = (CertificateOfMembership *)0;
-				{
-					SharedPtr<Network> nw(RR->node->network(nwid));
-					if (nw) {
-						SharedPtr<NetworkConfig> nconf(nw->config2());
-						if ((nconf)&&(nconf->com())&&(nconf->isPrivate())&&(r->needsOurNetworkMembershipCertificate(nwid,now,true)))
-							com = &(nconf->com());
+		} else {
+			unsigned int gatherLimit = (limit - (unsigned int)gs.members.size()) + 1;
+
+			if ((gs.members.empty())||((now - gs.lastExplicitGather) >= ZT_MULTICAST_EXPLICIT_GATHER_DELAY)) {
+				gs.lastExplicitGather = now;
+				SharedPtr<Peer> r(RR->topology->getBestRoot());
+				if (r) {
+					TRACE(">>MC upstream GATHER up to %u for group %.16llx/%s",gatherLimit,nwid,mg.toString().c_str());
+
+					const CertificateOfMembership *com = (CertificateOfMembership *)0;
+					{
+						SharedPtr<Network> nw(RR->node->network(nwid));
+						if (nw) {
+							SharedPtr<NetworkConfig> nconf(nw->config2());
+							if ((nconf)&&(nconf->com())&&(nconf->isPrivate())&&(r->needsOurNetworkMembershipCertificate(nwid,now,true)))
+								com = &(nconf->com());
+						}
 					}
-				}
 
-				Packet outp(r->address(),RR->identity.address(),Packet::VERB_MULTICAST_GATHER);
-				outp.append(nwid);
-				outp.append((uint8_t)(com ? 0x01 : 0x00));
-				mg.mac().appendTo(outp);
-				outp.append((uint32_t)mg.adi());
-				outp.append((uint32_t)gatherLimit);
-				if (com)
-					com->serialize(outp);
-				outp.armor(r->key(),true);
-				r->send(RR,outp.data(),outp.size(),now);
+					Packet outp(r->address(),RR->identity.address(),Packet::VERB_MULTICAST_GATHER);
+					outp.append(nwid);
+					outp.append((uint8_t)(com ? 0x01 : 0x00));
+					mg.mac().appendTo(outp);
+					outp.append((uint32_t)mg.adi());
+					outp.append((uint32_t)gatherLimit);
+					if (com)
+						com->serialize(outp);
+					outp.armor(r->key(),true);
+					r->send(RR,outp.data(),outp.size(),now);
+				}
+				gatherLimit = 0;
 			}
-			gatherLimit = 0;
-		}
 
-		gs.txQueue.push_back(OutboundMulticast());
-		OutboundMulticast &out = gs.txQueue.back();
-
-		out.init(
-			RR,
-			now,
-			nwid,
-			com,
-			limit,
-			gatherLimit,
-			src,
-			mg,
-			etherType,
-			data,
-			len);
-
-		unsigned int count = 0;
-
-		for(std::vector<Address>::const_iterator ast(alwaysSendTo.begin());ast!=alwaysSendTo.end();++ast) {
-			if (*ast != RR->identity.address()) {
-				out.sendAndLog(RR,*ast);
-				if (++count >= limit)
-					break;
+			gs.txQueue.push_back(OutboundMulticast());
+			OutboundMulticast &out = gs.txQueue.back();
+
+			out.init(
+				RR,
+				now,
+				nwid,
+				com,
+				limit,
+				gatherLimit,
+				src,
+				mg,
+				etherType,
+				data,
+				len);
+
+			unsigned int count = 0;
+
+			for(std::vector<Address>::const_iterator ast(alwaysSendTo.begin());ast!=alwaysSendTo.end();++ast) {
+				if (*ast != RR->identity.address()) {
+					out.sendAndLog(RR,*ast);
+					if (++count >= limit)
+						break;
+				}
 			}
-		}
 
-		unsigned long idx = 0;
-		while ((count < limit)&&(idx < gs.members.size())) {
-			Address ma(gs.members[indexes[idx++]].address);
-			if (std::find(alwaysSendTo.begin(),alwaysSendTo.end(),ma) == alwaysSendTo.end()) {
-				out.sendAndLog(RR,ma);
-				++count;
+			unsigned long idx = 0;
+			while ((count < limit)&&(idx < gs.members.size())) {
+				Address ma(gs.members[indexes[idx++]].address);
+				if (std::find(alwaysSendTo.begin(),alwaysSendTo.end(),ma) == alwaysSendTo.end()) {
+					out.sendAndLog(RR,ma);
+					++count;
+				}
 			}
 		}
-	}
+	} catch ( ... ) {} // this is a sanity check to catch any failures and make sure indexes[] still gets deleted
 
 	// Free allocated memory buffer if any
 	if (indexes != idxbuf)
diff --git a/node/Node.cpp b/node/Node.cpp
index 42180e99..74acc869 100644
--- a/node/Node.cpp
+++ b/node/Node.cpp
@@ -305,18 +305,7 @@ ZT_ResultCode Node::processBackgroundTasks(uint64_t now,volatile uint64_t *nextB
 			for(std::vector< SharedPtr<Network> >::const_iterator n(needConfig.begin());n!=needConfig.end();++n)
 				(*n)->requestConfiguration();
 
-			// Attempt to contact network preferred relays that we don't have direct links to
-			std::sort(networkRelays.begin(),networkRelays.end());
-			networkRelays.erase(std::unique(networkRelays.begin(),networkRelays.end()),networkRelays.end());
-			for(std::vector< std::pair<Address,InetAddress> >::const_iterator nr(networkRelays.begin());nr!=networkRelays.end();++nr) {
-				if (nr->second) {
-					SharedPtr<Peer> rp(RR->topology->getPeer(nr->first));
-					if ((rp)&&(!rp->hasActiveDirectPath(now)))
-						rp->attemptToContactAt(RR,InetAddress(),nr->second,now);
-				}
-			}
-
-			// Ping living or root server/relay peers
+			// Do pings and keepalives
 			_PingPeersThatNeedPing pfunc(RR,now,networkRelays);
 			RR->topology->eachPeer<_PingPeersThatNeedPing &>(pfunc);
 
diff --git a/tests/http/big-test-kill.sh b/tests/http/big-test-kill.sh
index 4a764d1f..59f36788 100755
--- a/tests/http/big-test-kill.sh
+++ b/tests/http/big-test-kill.sh
@@ -13,6 +13,6 @@ CONTAINER_IMAGE=zerotier/http-test
 
 export PATH=/bin:/usr/bin:/usr/local/bin:/usr/sbin:/sbin
 
-pssh -h big-test-hosts -i -t 128 -p 256 "docker ps -aq | xargs -r docker rm -f"
+pssh -h big-test-hosts -i -t 0 -p 256 "docker ps -aq | xargs -r docker rm -f"
 
 exit 0
diff --git a/tests/http/big-test-ready.sh b/tests/http/big-test-ready.sh
index 391ca2a1..aa540bba 100755
--- a/tests/http/big-test-ready.sh
+++ b/tests/http/big-test-ready.sh
@@ -25,6 +25,6 @@ export PATH=/bin:/usr/bin:/usr/local/bin:/usr/sbin:/sbin
 #	docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE
 #done
 
-pssh -h big-test-hosts -i -t 128 -p 256 "docker pull $CONTAINER_IMAGE"
+pssh -h big-test-hosts -i -t 0 -p 256 "docker pull $CONTAINER_IMAGE"
 
 exit 0
diff --git a/tests/http/big-test-start.sh b/tests/http/big-test-start.sh
index a5e71ef1..43166c6e 100755
--- a/tests/http/big-test-start.sh
+++ b/tests/http/big-test-start.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 # Edit as needed -- note that >1000 per host is likely problematic due to Linux kernel limits
-NUM_CONTAINERS=100
+NUM_CONTAINERS=25
 CONTAINER_IMAGE=zerotier/http-test
 
 #
@@ -25,6 +25,6 @@ export PATH=/bin:/usr/bin:/usr/local/bin:/usr/sbin:/sbin
 #	docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE
 #done
 
-pssh -h big-test-hosts -i -t 128 -p 256 "for ((n=0;n<$NUM_CONTAINERS;n++)); do docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE; sleep 0.25; done"
+pssh -h big-test-hosts -i -t 0 -p 256 "for ((n=0;n<$NUM_CONTAINERS;n++)); do docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE; sleep 0.25; done"
 
 exit 0
-- 
cgit v1.2.3


From 7fbe2f7adf3575f3a21fc1ab3a5a2a036e18e6e2 Mon Sep 17 00:00:00 2001
From: Adam Ierymenko <adam.ierymenko@gmail.com>
Date: Mon, 2 Nov 2015 15:38:53 -0800
Subject: Tweak some more timings for better reliability.

---
 node/Cluster.hpp             |  2 +-
 node/Constants.hpp           | 10 +++++-----
 node/Node.cpp                |  2 +-
 node/Peer.hpp                |  4 ++--
 node/SelfAwareness.cpp       |  2 +-
 node/Switch.cpp              |  6 +++---
 node/Topology.hpp            |  9 ++++++---
 tests/http/big-test-start.sh |  4 ++--
 8 files changed, 21 insertions(+), 18 deletions(-)

(limited to 'node/Constants.hpp')

diff --git a/node/Cluster.hpp b/node/Cluster.hpp
index f1caa436..ee220999 100644
--- a/node/Cluster.hpp
+++ b/node/Cluster.hpp
@@ -55,7 +55,7 @@
 /**
  * How often should we announce that we have a peer?
  */
-#define ZT_CLUSTER_HAVE_PEER_ANNOUNCE_PERIOD (ZT_PEER_DIRECT_PING_DELAY / 2)
+#define ZT_CLUSTER_HAVE_PEER_ANNOUNCE_PERIOD ZT_PEER_DIRECT_PING_DELAY
 
 /**
  * Desired period between doPeriodicTasks() in milliseconds
diff --git a/node/Constants.hpp b/node/Constants.hpp
index bb62484d..552688a6 100644
--- a/node/Constants.hpp
+++ b/node/Constants.hpp
@@ -267,14 +267,14 @@
 #define ZT_PEER_DIRECT_PING_DELAY 60000
 
 /**
- * Delay between requests for updated network autoconf information
+ * Timeout for overall peer activity (measured from last receive)
  */
-#define ZT_NETWORK_AUTOCONF_DELAY 60000
+#define ZT_PEER_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 4) + ZT_PING_CHECK_INVERVAL)
 
 /**
- * Timeout for overall peer activity (measured from last receive)
+ * Delay between requests for updated network autoconf information
  */
-#define ZT_PEER_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 3) + (ZT_PING_CHECK_INVERVAL * 2))
+#define ZT_NETWORK_AUTOCONF_DELAY 60000
 
 /**
  * Minimum interval between attempts by relays to unite peers
@@ -283,7 +283,7 @@
  * a RENDEZVOUS message no more than this often. This instructs the peers
  * to attempt NAT-t and gives each the other's corresponding IP:port pair.
  */
-#define ZT_MIN_UNITE_INTERVAL 60000
+#define ZT_MIN_UNITE_INTERVAL 30000
 
 /**
  * Delay between initial direct NAT-t packet and more aggressive techniques
diff --git a/node/Node.cpp b/node/Node.cpp
index 74acc869..82cb7ddb 100644
--- a/node/Node.cpp
+++ b/node/Node.cpp
@@ -263,7 +263,7 @@ public:
 			}
 
 			lastReceiveFromUpstream = std::max(p->lastReceive(),lastReceiveFromUpstream);
-		} else if (p->alive(_now)) {
+		} else if (p->activelyTransferringFrames(_now)) {
 			// Normal nodes get their preferred link kept alive if the node has generated frame traffic recently
 			p->doPingAndKeepalive(RR,_now,0);
 		}
diff --git a/node/Peer.hpp b/node/Peer.hpp
index e5db3bde..ad4c6746 100644
--- a/node/Peer.hpp
+++ b/node/Peer.hpp
@@ -231,9 +231,9 @@ public:
 	inline uint64_t lastAnnouncedTo() const throw() { return _lastAnnouncedTo; }
 
 	/**
-	 * @return True if peer has received an actual data frame within ZT_PEER_ACTIVITY_TIMEOUT milliseconds
+	 * @return True if this peer is actively sending real network frames
 	 */
-	inline uint64_t alive(uint64_t now) const throw() { return ((now - lastFrame()) < ZT_PEER_ACTIVITY_TIMEOUT); }
+	inline uint64_t activelyTransferringFrames(uint64_t now) const throw() { return ((now - lastFrame()) < ZT_PEER_ACTIVITY_TIMEOUT); }
 
 	/**
 	 * @return Current latency or 0 if unknown (max: 65535)
diff --git a/node/SelfAwareness.cpp b/node/SelfAwareness.cpp
index d8eca071..ce75eb03 100644
--- a/node/SelfAwareness.cpp
+++ b/node/SelfAwareness.cpp
@@ -128,7 +128,7 @@ void SelfAwareness::iam(const Address &reporter,const InetAddress &reporterPhysi
 		// links to be re-established if possible, possibly using a root server or some
 		// other relay.
 		for(std::vector< SharedPtr<Peer> >::const_iterator p(rset.peersReset.begin());p!=rset.peersReset.end();++p) {
-			if ((*p)->alive(now)) {
+			if ((*p)->activelyTransferringFrames(now)) {
 				Packet outp((*p)->address(),RR->identity.address(),Packet::VERB_NOP);
 				RR->sw->send(outp,true,0);
 			}
diff --git a/node/Switch.cpp b/node/Switch.cpp
index 2f72f57a..120ce7a4 100644
--- a/node/Switch.cpp
+++ b/node/Switch.cpp
@@ -442,8 +442,8 @@ unsigned long Switch::doTimerTasks(uint64_t now)
 		Mutex::Lock _l(_contactQueue_m);
 		for(std::list<ContactQueueEntry>::iterator qi(_contactQueue.begin());qi!=_contactQueue.end();) {
 			if (now >= qi->fireAtTime) {
-				if ((!qi->peer->alive(now))||(qi->peer->hasActiveDirectPath(now))) {
-					// Cancel attempt if we've already connected or peer is no longer "alive"
+				if (qi->peer->hasActiveDirectPath(now)) {
+					// Cancel if connection has succeeded
 					_contactQueue.erase(qi++);
 					continue;
 				} else {
@@ -539,7 +539,7 @@ unsigned long Switch::doTimerTasks(uint64_t now)
 		_LastUniteKey *k = (_LastUniteKey *)0;
 		uint64_t *v = (uint64_t *)0;
 		while (i.next(k,v)) {
-			if ((now - *v) >= (ZT_MIN_UNITE_INTERVAL * 16))
+			if ((now - *v) >= (ZT_MIN_UNITE_INTERVAL * 8))
 				_lastUniteAttempt.erase(*k);
 		}
 	}
diff --git a/node/Topology.hpp b/node/Topology.hpp
index 4c1a2ab3..a0c28b0f 100644
--- a/node/Topology.hpp
+++ b/node/Topology.hpp
@@ -81,6 +81,11 @@ public:
 	/**
 	 * Get a peer only if it is presently in memory (no disk cache)
 	 *
+	 * This also does not update the lastUsed() time for peers, which means
+	 * that it won't prevent them from falling out of RAM. This is currently
+	 * used in the Cluster code to update peer info without forcing all peers
+	 * across the entire cluster to remain in memory cache.
+	 *
 	 * @param zta ZeroTier address
 	 * @param now Current time
 	 */
@@ -88,10 +93,8 @@ public:
 	{
 		Mutex::Lock _l(_lock);
 		const SharedPtr<Peer> *const ap = _peers.get(zta);
-		if (ap) {
-			(*ap)->use(now);
+		if (ap)
 			return *ap;
-		}
 		return SharedPtr<Peer>();
 	}
 
diff --git a/tests/http/big-test-start.sh b/tests/http/big-test-start.sh
index 43166c6e..f300ac61 100755
--- a/tests/http/big-test-start.sh
+++ b/tests/http/big-test-start.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 # Edit as needed -- note that >1000 per host is likely problematic due to Linux kernel limits
-NUM_CONTAINERS=25
+NUM_CONTAINERS=50
 CONTAINER_IMAGE=zerotier/http-test
 
 #
@@ -25,6 +25,6 @@ export PATH=/bin:/usr/bin:/usr/local/bin:/usr/sbin:/sbin
 #	docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE
 #done
 
-pssh -h big-test-hosts -i -t 0 -p 256 "for ((n=0;n<$NUM_CONTAINERS;n++)); do docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE; sleep 0.25; done"
+pssh -h big-test-hosts -o big-test-out -t 0 -p 256 "for ((n=0;n<$NUM_CONTAINERS;n++)); do docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE; sleep 0.25; done"
 
 exit 0
-- 
cgit v1.2.3