From 258f95b2cd228f1b76817c5839dd2f523cb16fcc Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Tue, 5 Jan 2016 14:19:16 -0800 Subject: dead code removal --- node/Packet.hpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/node/Packet.hpp b/node/Packet.hpp index 6c1b2984..0c07fa24 100644 --- a/node/Packet.hpp +++ b/node/Packet.hpp @@ -235,17 +235,6 @@ */ #define ZT_PROTO_MIN_FRAGMENT_LENGTH ZT_PACKET_FRAGMENT_IDX_PAYLOAD -// Ephemeral key record flags -#define ZT_PROTO_EPHEMERAL_KEY_FLAG_FIPS 0x01 // future use - -// Ephemeral key record symmetric cipher types -#define ZT_PROTO_EPHEMERAL_KEY_SYMMETRIC_CIPHER_SALSA2012_POLY1305 0x01 -#define ZT_PROTO_EPHEMERAL_KEY_SYMMETRIC_CIPHER_AES256_GCM 0x02 - -// Ephemeral key record public key types -#define ZT_PROTO_EPHEMERAL_KEY_PK_C25519 0x01 -#define ZT_PROTO_EPHEMERAL_KEY_PK_NISTP256 0x02 - // Field incides for parsing verbs ------------------------------------------- // Some verbs have variable-length fields. Those aren't fully defined here -- cgit v1.2.3 From fb5237d5b68bbe33ac06f8cd48a4caa5f3d60a4d Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Tue, 5 Jan 2016 14:42:56 -0800 Subject: Outline dead path detection mechanism. --- node/Packet.hpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/node/Packet.hpp b/node/Packet.hpp index 0c07fa24..e1d38de5 100644 --- a/node/Packet.hpp +++ b/node/Packet.hpp @@ -664,20 +664,32 @@ public: /** * ECHO request (a.k.a. ping): - * <[...] arbitrary payload to be echoed back> + * <[1] 8-bit purpose of echo request> + * <[...] additional arbitrary payload> * * This generates OK with a copy of the transmitted payload. No ERROR * is generated. Response to ECHO requests is optional and ECHO may be * ignored if a node detects a possible flood. * - * There is a de-facto standard for ECHO payload. No payload indicates an - * ECHO used for path confirmation. Otherwise the first byte contains - * flags, in which currently the only flag is 0x01 for a user-requested - * echo. For user-requested echoes the result may be reported back through - * the API. Otherwise the payload is for internal use. + * An empty payload is permitted. This is used in some versions for + * path checking and validation. If a payload is present it must + * follow the above format, though the recipient does not have to check + * this. It can simply echo it back. + * + * Echo purpose codes: + * 0x00 - User ECHO request + * 0x01 - Dead path detection * * Support for fragmented echo packets is optional and their use is not * recommended. + * + * Dead path detection is performed by sending ECHOs with the same random + * payload to the best (or every) direct path and then once indirectly + * (such as via a root server). When an OK is received echoing back this + * test payload, all paths that have not yet received this OK are cancelled + * or re-tested. This can be done after a short period of inactivity to + * detect and automatically cancel dead paths without requiring any + * special logic (other than support for ECHO) at the remote end. */ VERB_ECHO = 8, -- cgit v1.2.3 From cba739fd6bf43807956aa38a91449ec913e2eda3 Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Tue, 5 Jan 2016 14:46:26 -0800 Subject: more dead code --- node/Packet.hpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/node/Packet.hpp b/node/Packet.hpp index e1d38de5..f04e8742 100644 --- a/node/Packet.hpp +++ b/node/Packet.hpp @@ -101,15 +101,6 @@ */ #define ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012 1 -/** - * Cipher suite: PFS negotiated ephemeral cipher suite and authentication - * - * This message is encrypted with the latest negotiated ephemeral (PFS) - * key pair and cipher suite. If authentication fails, VERB_SET_EPHEMERAL_KEY - * may be sent to renegotiate ephemeral keys. - */ -#define ZT_PROTO_CIPHER_SUITE__EPHEMERAL 7 - /** * DEPRECATED payload encrypted flag, will be removed for re-use soon. * -- cgit v1.2.3 From d8143a5e186faf722d2cae703f0a618c37e588ea Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Tue, 5 Jan 2016 16:41:54 -0800 Subject: Implement first pass on rapid dead path detection, and increment version to 1.1.3 (dev) --- node/Constants.hpp | 12 ++++++++++- node/IncomingPacket.cpp | 52 +++++++++++++++++++++++----------------------- node/Node.cpp | 10 ++++----- node/Packet.hpp | 23 +-------------------- node/Path.hpp | 30 ++++++++++++++++++++++----- node/Peer.cpp | 55 ++++++++++++++++++++++++++++++++++++------------- node/Peer.hpp | 34 ++++++++++++++---------------- node/SelfAwareness.cpp | 8 +++---- node/Switch.cpp | 22 ++++++++++---------- node/Topology.cpp | 8 +++---- version.h | 2 +- 11 files changed, 144 insertions(+), 112 deletions(-) diff --git a/node/Constants.hpp b/node/Constants.hpp index 7368a634..1a47112a 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -264,13 +264,23 @@ /** * Delay between ordinary case pings of direct links */ -#define ZT_PEER_DIRECT_PING_DELAY 60000 +#define ZT_PEER_DIRECT_PING_DELAY 90000 /** * Timeout for overall peer activity (measured from last receive) */ #define ZT_PEER_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 4) + ZT_PING_CHECK_INVERVAL) +/** + * No answer timeout to trigger dead path detection + */ +#define ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT 3000 + +/** + * Probation threshold after which a path becomes dead + */ +#define ZT_PEER_DEAD_PATH_DETECTION_MAX_PROBATION 3 + /** * Delay between requests for updated network autoconf information */ diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index 781ba202..2a51a387 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -86,7 +86,7 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,bool deferred) switch(v) { //case Packet::VERB_NOP: default: // ignore unknown verbs, but if they pass auth check they are "received" - peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),v,0,Packet::VERB_NOP); + peer->received(_localAddress,_remoteAddress,hops(),packetId(),v,0,Packet::VERB_NOP); return true; case Packet::VERB_HELLO: return _doHELLO(RR,peer); @@ -185,7 +185,7 @@ bool IncomingPacket::_doERROR(const RuntimeEnvironment *RR,const SharedPtr default: break; } - peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_ERROR,inRePacketId,inReVerb); + peer->received(_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_ERROR,inRePacketId,inReVerb); } catch ( ... ) { TRACE("dropped ERROR from %s(%s): unexpected exception",peer->address().toString().c_str(),_remoteAddress.toString().c_str()); } @@ -279,7 +279,7 @@ bool IncomingPacket::_doHELLO(const RuntimeEnvironment *RR,SharedPtr &peer } // Check packet integrity and authentication - SharedPtr newPeer(new Peer(RR->identity,id)); + SharedPtr newPeer(new Peer(RR,RR->identity,id)); if (!dearmor(newPeer->key())) { TRACE("rejected HELLO from %s(%s): packet failed authentication",id.address().toString().c_str(),_remoteAddress.toString().c_str()); return true; @@ -349,7 +349,7 @@ bool IncomingPacket::_doHELLO(const RuntimeEnvironment *RR,SharedPtr &peer RR->node->putPacket(_localAddress,_remoteAddress,outp.data(),outp.size()); peer->setRemoteVersion(protoVersion,vMajor,vMinor,vRevision); // important for this to go first so received() knows the version - peer->received(RR,_localAddress,_remoteAddress,hops(),pid,Packet::VERB_HELLO,0,Packet::VERB_NOP); + peer->received(_localAddress,_remoteAddress,hops(),pid,Packet::VERB_HELLO,0,Packet::VERB_NOP); } catch ( ... ) { TRACE("dropped HELLO from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str()); } @@ -410,7 +410,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,const SharedPtr &p // roots. In the future it should be done if we query less trusted // sources. //if (id.locallyValidate()) - RR->sw->doAnythingWaitingForPeer(RR->topology->addPeer(SharedPtr(new Peer(RR->identity,id)))); + RR->sw->doAnythingWaitingForPeer(RR->topology->addPeer(SharedPtr(new Peer(RR,RR->identity,id)))); } } break; @@ -450,7 +450,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,const SharedPtr &p // OK(MULTICAST_FRAME) includes certificate of membership update CertificateOfMembership com; offset += com.deserialize(*this,ZT_PROTO_VERB_MULTICAST_FRAME__OK__IDX_COM_AND_GATHER_RESULTS); - peer->validateAndSetNetworkMembershipCertificate(RR,nwid,com); + peer->validateAndSetNetworkMembershipCertificate(nwid,com); } if ((flags & 0x02) != 0) { @@ -465,7 +465,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,const SharedPtr &p default: break; } - peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_OK,inRePacketId,inReVerb); + peer->received(_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_OK,inRePacketId,inReVerb); } catch ( ... ) { TRACE("dropped OK from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str()); } @@ -494,7 +494,7 @@ bool IncomingPacket::_doWHOIS(const RuntimeEnvironment *RR,const SharedPtr } else { TRACE("dropped WHOIS from %s(%s): missing or invalid address",source().toString().c_str(),_remoteAddress.toString().c_str()); } - peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_WHOIS,0,Packet::VERB_NOP); + peer->received(_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_WHOIS,0,Packet::VERB_NOP); } catch ( ... ) { TRACE("dropped WHOIS from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str()); } @@ -513,7 +513,7 @@ bool IncomingPacket::_doRENDEZVOUS(const RuntimeEnvironment *RR,const SharedPtr< if ((port > 0)&&((addrlen == 4)||(addrlen == 16))) { InetAddress atAddr(field(ZT_PROTO_VERB_RENDEZVOUS_IDX_ADDRESS,addrlen),addrlen,port); TRACE("RENDEZVOUS from %s says %s might be at %s, starting NAT-t",peer->address().toString().c_str(),with.toString().c_str(),atAddr.toString().c_str()); - peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP); + peer->received(_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP); RR->sw->rendezvous(withPeer,_localAddress,atAddr); } else { TRACE("dropped corrupt RENDEZVOUS from %s(%s) (bad address or port)",peer->address().toString().c_str(),_remoteAddress.toString().c_str()); @@ -553,7 +553,7 @@ bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,const SharedPtr RR->node->putFrame(network->id(),MAC(peer->address(),network->id()),network->mac(),etherType,0,field(ZT_PROTO_VERB_FRAME_IDX_PAYLOAD,payloadLen),payloadLen); } - peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_FRAME,0,Packet::VERB_NOP); + peer->received(_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_FRAME,0,Packet::VERB_NOP); } else { TRACE("dropped FRAME from %s(%s): we are not connected to network %.16llx",source().toString().c_str(),_remoteAddress.toString().c_str(),at(ZT_PROTO_VERB_FRAME_IDX_NETWORK_ID)); } @@ -575,7 +575,7 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,const SharedPtr

validateAndSetNetworkMembershipCertificate(RR,network->id(),com); + peer->validateAndSetNetworkMembershipCertificate(network->id(),com); } if (!network->isAllowed(peer)) { @@ -624,7 +624,7 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,const SharedPtr

node->putFrame(network->id(),from,to,etherType,0,field(comLen + ZT_PROTO_VERB_EXT_FRAME_IDX_PAYLOAD,payloadLen),payloadLen); } - peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP); + peer->received(_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP); } else { TRACE("dropped EXT_FRAME from %s(%s): we are not connected to network %.16llx",source().toString().c_str(),_remoteAddress.toString().c_str(),at(ZT_PROTO_VERB_FRAME_IDX_NETWORK_ID)); } @@ -646,7 +646,7 @@ bool IncomingPacket::_doECHO(const RuntimeEnvironment *RR,const SharedPtr outp.armor(peer->key(),true); RR->antiRec->logOutgoingZT(outp.data(),outp.size()); RR->node->putPacket(_localAddress,_remoteAddress,outp.data(),outp.size()); - peer->received(RR,_localAddress,_remoteAddress,hops(),pid,Packet::VERB_ECHO,0,Packet::VERB_NOP); + peer->received(_localAddress,_remoteAddress,hops(),pid,Packet::VERB_ECHO,0,Packet::VERB_NOP); } catch ( ... ) { TRACE("dropped ECHO from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str()); } @@ -665,7 +665,7 @@ bool IncomingPacket::_doMULTICAST_LIKE(const RuntimeEnvironment *RR,const Shared RR->mc->add(now,nwid,group,peer->address()); } - peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP); + peer->received(_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP); } catch ( ... ) { TRACE("dropped MULTICAST_LIKE from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str()); } @@ -680,10 +680,10 @@ bool IncomingPacket::_doNETWORK_MEMBERSHIP_CERTIFICATE(const RuntimeEnvironment unsigned int ptr = ZT_PACKET_IDX_PAYLOAD; while (ptr < size()) { ptr += com.deserialize(*this,ptr); - peer->validateAndSetNetworkMembershipCertificate(RR,com.networkId(),com); + peer->validateAndSetNetworkMembershipCertificate(com.networkId(),com); } - peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_NETWORK_MEMBERSHIP_CERTIFICATE,0,Packet::VERB_NOP); + peer->received(_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_NETWORK_MEMBERSHIP_CERTIFICATE,0,Packet::VERB_NOP); } catch ( ... ) { TRACE("dropped NETWORK_MEMBERSHIP_CERTIFICATE from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str()); } @@ -700,7 +700,7 @@ bool IncomingPacket::_doNETWORK_CONFIG_REQUEST(const RuntimeEnvironment *RR,cons const unsigned int h = hops(); const uint64_t pid = packetId(); - peer->received(RR,_localAddress,_remoteAddress,h,pid,Packet::VERB_NETWORK_CONFIG_REQUEST,0,Packet::VERB_NOP); + peer->received(_localAddress,_remoteAddress,h,pid,Packet::VERB_NETWORK_CONFIG_REQUEST,0,Packet::VERB_NOP); if (RR->localNetworkController) { Dictionary netconf; @@ -789,7 +789,7 @@ bool IncomingPacket::_doNETWORK_CONFIG_REFRESH(const RuntimeEnvironment *RR,cons nw->requestConfiguration(); ptr += 8; } - peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_NETWORK_CONFIG_REFRESH,0,Packet::VERB_NOP); + peer->received(_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_NETWORK_CONFIG_REFRESH,0,Packet::VERB_NOP); } catch ( ... ) { TRACE("dropped NETWORK_CONFIG_REFRESH from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str()); } @@ -825,7 +825,7 @@ bool IncomingPacket::_doMULTICAST_GATHER(const RuntimeEnvironment *RR,const Shar #endif } - peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP); + peer->received(_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP); } catch ( ... ) { TRACE("dropped MULTICAST_GATHER from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str()); } @@ -846,7 +846,7 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,const Share if ((flags & 0x01) != 0) { CertificateOfMembership com; offset += com.deserialize(*this,ZT_PROTO_VERB_MULTICAST_FRAME_IDX_COM); - peer->validateAndSetNetworkMembershipCertificate(RR,nwid,com); + peer->validateAndSetNetworkMembershipCertificate(nwid,com); } // Check membership after we've read any included COM, since @@ -915,7 +915,7 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,const Share } } // else ignore -- not a member of this network - peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP); + peer->received(_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP); } catch ( ... ) { TRACE("dropped MULTICAST_FRAME from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str()); } @@ -955,7 +955,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,const Sha if ( ((flags & 0x01) == 0) && (Path::isAddressValidForPath(a)) && (!peer->hasActivePathTo(now,a)) ) { if (++countPerScope[(int)a.ipScope()][0] <= ZT_PUSH_DIRECT_PATHS_MAX_PER_SCOPE_AND_FAMILY) { TRACE("attempting to contact %s at pushed direct path %s",peer->address().toString().c_str(),a.toString().c_str()); - peer->sendHELLO(RR,_localAddress,a,now); + peer->sendHELLO(_localAddress,a,now); } else { TRACE("ignoring contact for %s at %s -- too many per scope",peer->address().toString().c_str(),a.toString().c_str()); } @@ -966,7 +966,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,const Sha if ( ((flags & 0x01) == 0) && (Path::isAddressValidForPath(a)) && (!peer->hasActivePathTo(now,a)) ) { if (++countPerScope[(int)a.ipScope()][1] <= ZT_PUSH_DIRECT_PATHS_MAX_PER_SCOPE_AND_FAMILY) { TRACE("attempting to contact %s at pushed direct path %s",peer->address().toString().c_str(),a.toString().c_str()); - peer->sendHELLO(RR,_localAddress,a,now); + peer->sendHELLO(_localAddress,a,now); } else { TRACE("ignoring contact for %s at %s -- too many per scope",peer->address().toString().c_str(),a.toString().c_str()); } @@ -976,7 +976,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,const Sha ptr += addrLen; } - peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP); + peer->received(_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP); } catch ( ... ) { TRACE("dropped PUSH_DIRECT_PATHS from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str()); } @@ -1143,7 +1143,7 @@ bool IncomingPacket::_doCIRCUIT_TEST(const RuntimeEnvironment *RR,const SharedPt } } - peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_CIRCUIT_TEST,0,Packet::VERB_NOP); + peer->received(_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_CIRCUIT_TEST,0,Packet::VERB_NOP); } catch ( ... ) { TRACE("dropped CIRCUIT_TEST from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str()); } @@ -1238,7 +1238,7 @@ bool IncomingPacket::_doREQUEST_PROOF_OF_WORK(const RuntimeEnvironment *RR,const break; } - peer->received(RR,_localAddress,_remoteAddress,hops(),pid,Packet::VERB_REQUEST_PROOF_OF_WORK,0,Packet::VERB_NOP); + peer->received(_localAddress,_remoteAddress,hops(),pid,Packet::VERB_REQUEST_PROOF_OF_WORK,0,Packet::VERB_NOP); } else { TRACE("dropped REQUEST_PROOF_OF_WORK from %s(%s): not trusted enough",peer->address().toString().c_str(),_remoteAddress.toString().c_str()); } diff --git a/node/Node.cpp b/node/Node.cpp index f077424b..7e4ad642 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -244,20 +244,20 @@ public: // "Upstream" devices are roots and relays and get special treatment -- they stay alive // forever and we try to keep (if available) both IPv4 and IPv6 channels open to them. bool needToContactIndirect = true; - if (p->doPingAndKeepalive(RR,_now,AF_INET)) { + if (p->doPingAndKeepalive(_now,AF_INET)) { needToContactIndirect = false; } else { if (stableEndpoint4) { needToContactIndirect = false; - p->sendHELLO(RR,InetAddress(),stableEndpoint4,_now); + p->sendHELLO(InetAddress(),stableEndpoint4,_now); } } - if (p->doPingAndKeepalive(RR,_now,AF_INET6)) { + if (p->doPingAndKeepalive(_now,AF_INET6)) { needToContactIndirect = false; } else { if (stableEndpoint6) { needToContactIndirect = false; - p->sendHELLO(RR,InetAddress(),stableEndpoint6,_now); + p->sendHELLO(InetAddress(),stableEndpoint6,_now); } } @@ -273,7 +273,7 @@ public: lastReceiveFromUpstream = std::max(p->lastReceive(),lastReceiveFromUpstream); } else if (p->activelyTransferringFrames(_now)) { // Normal nodes get their preferred link kept alive if the node has generated frame traffic recently - p->doPingAndKeepalive(RR,_now,0); + p->doPingAndKeepalive(_now,0); } } diff --git a/node/Packet.hpp b/node/Packet.hpp index f04e8742..5a30841a 100644 --- a/node/Packet.hpp +++ b/node/Packet.hpp @@ -655,32 +655,11 @@ public: /** * ECHO request (a.k.a. ping): - * <[1] 8-bit purpose of echo request> - * <[...] additional arbitrary payload> + * <[...] arbitrary payload> * * This generates OK with a copy of the transmitted payload. No ERROR * is generated. Response to ECHO requests is optional and ECHO may be * ignored if a node detects a possible flood. - * - * An empty payload is permitted. This is used in some versions for - * path checking and validation. If a payload is present it must - * follow the above format, though the recipient does not have to check - * this. It can simply echo it back. - * - * Echo purpose codes: - * 0x00 - User ECHO request - * 0x01 - Dead path detection - * - * Support for fragmented echo packets is optional and their use is not - * recommended. - * - * Dead path detection is performed by sending ECHOs with the same random - * payload to the best (or every) direct path and then once indirectly - * (such as via a root server). When an OK is received echoing back this - * test payload, all paths that have not yet received this OK are cancelled - * or re-tested. This can be done after a short period of inactivity to - * detect and automatically cancel dead paths without requiring any - * special logic (other than support for ECHO) at the remote end. */ VERB_ECHO = 8, diff --git a/node/Path.hpp b/node/Path.hpp index c6de6612..23e47408 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -98,14 +98,21 @@ public: * * @param t Time of send */ - inline void sent(uint64_t t) { _lastSend = t; } + inline void sent(uint64_t t) + { + _lastSend = t; + } /** * Called when a packet is received from this remote path * * @param t Time of receive */ - inline void received(uint64_t t) { _lastReceived = t; } + inline void received(uint64_t t) + { + _lastReceived = t; + _probation = 0; + } /** * @param now Current time @@ -114,7 +121,7 @@ public: inline bool active(uint64_t now) const throw() { - return ((now - _lastReceived) < ZT_PEER_ACTIVITY_TIMEOUT); + return (((now - _lastReceived) < ZT_PEER_ACTIVITY_TIMEOUT)&&(_probation < ZT_PEER_DEAD_PATH_DETECTION_MAX_PROBATION)); } /** @@ -240,28 +247,40 @@ public: inline bool isClusterSuboptimal() const { return ((_flags & ZT_PATH_FLAG_CLUSTER_SUBOPTIMAL) != 0); } #endif + /** + * @return Current path probation count (for dead path detect) + */ + inline unsigned int probation() const { return _probation; } + + /** + * Increase this path's probation violation count (for dead path detect) + */ + inline void increaseProbation() { ++_probation; } + template inline void serialize(Buffer &b) const { - b.append((uint8_t)0); // version + b.append((uint8_t)1); // version b.append((uint64_t)_lastSend); b.append((uint64_t)_lastReceived); _addr.serialize(b); _localAddress.serialize(b); b.append((uint16_t)_flags); + b.append((uint16_t)_probation); } template inline unsigned int deserialize(const Buffer &b,unsigned int startAt = 0) { unsigned int p = startAt; - if (b[p++] != 0) + if (b[p++] != 1) throw std::invalid_argument("invalid serialized Path"); _lastSend = b.template at(p); p += 8; _lastReceived = b.template at(p); p += 8; p += _addr.deserialize(b,p); p += _localAddress.deserialize(b,p); _flags = b.template at(p); p += 2; + _probation = b.template at(p); p += 2; _ipScope = _addr.ipScope(); return (p - startAt); } @@ -275,6 +294,7 @@ private: InetAddress _addr; InetAddress _localAddress; unsigned int _flags; + unsigned int _probation; InetAddress::IpScope _ipScope; // memoize this since it's a computed value checked often }; diff --git a/node/Peer.cpp b/node/Peer.cpp index 340f0c10..0f72be9f 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -46,8 +46,8 @@ namespace ZeroTier { // Used to send varying values for NAT keepalive static uint32_t _natKeepaliveBuf = 0; -Peer::Peer(const Identity &myIdentity,const Identity &peerIdentity) - throw(std::runtime_error) : +Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Identity &peerIdentity) : + RR(renv), _lastUsed(0), _lastReceive(0), _lastUnicastFrame(0), @@ -72,7 +72,6 @@ Peer::Peer(const Identity &myIdentity,const Identity &peerIdentity) } void Peer::received( - const RuntimeEnvironment *RR, const InetAddress &localAddr, const InetAddress &remoteAddr, unsigned int hops, @@ -199,7 +198,7 @@ void Peer::received( outp.armor(_key,true); RR->node->putPacket(localAddr,remoteAddr,outp.data(),outp.size()); } else { - sendHELLO(RR,localAddr,remoteAddr,now); + sendHELLO(localAddr,remoteAddr,now); } } @@ -214,7 +213,7 @@ void Peer::received( } } -void Peer::sendHELLO(const RuntimeEnvironment *RR,const InetAddress &localAddr,const InetAddress &atAddress,uint64_t now,unsigned int ttl) +void Peer::sendHELLO(const InetAddress &localAddr,const InetAddress &atAddress,uint64_t now,unsigned int ttl) { // _lock not required here since _id is immutable and nothing else is accessed @@ -234,7 +233,7 @@ void Peer::sendHELLO(const RuntimeEnvironment *RR,const InetAddress &localAddr,c RR->node->putPacket(localAddr,atAddress,outp.data(),outp.size(),ttl); } -bool Peer::doPingAndKeepalive(const RuntimeEnvironment *RR,uint64_t now,int inetAddressFamily) +bool Peer::doPingAndKeepalive(uint64_t now,int inetAddressFamily) { Path *p = (Path *)0; @@ -248,7 +247,7 @@ bool Peer::doPingAndKeepalive(const RuntimeEnvironment *RR,uint64_t now,int inet if (p) { if ((now - p->lastReceived()) >= ZT_PEER_DIRECT_PING_DELAY) { //TRACE("PING %s(%s) after %llums/%llums send/receive inactivity",_id.address().toString().c_str(),p->address().toString().c_str(),now - p->lastSend(),now - p->lastReceived()); - sendHELLO(RR,p->localAddress(),p->address(),now); + sendHELLO(p->localAddress(),p->address(),now); p->sent(now); } else if (((now - p->lastSend()) >= ZT_NAT_KEEPALIVE_DELAY)&&(!p->reliable())) { //TRACE("NAT keepalive %s(%s) after %llums/%llums send/receive inactivity",_id.address().toString().c_str(),p->address().toString().c_str(),now - p->lastSend(),now - p->lastReceived()); @@ -264,7 +263,7 @@ bool Peer::doPingAndKeepalive(const RuntimeEnvironment *RR,uint64_t now,int inet return false; } -void Peer::pushDirectPaths(const RuntimeEnvironment *RR,Path *path,uint64_t now,bool force) +void Peer::pushDirectPaths(Path *path,uint64_t now,bool force) { #ifdef ZT_ENABLE_CLUSTER // Cluster mode disables normal PUSH_DIRECT_PATHS in favor of cluster-based peer redirection @@ -332,7 +331,7 @@ void Peer::pushDirectPaths(const RuntimeEnvironment *RR,Path *path,uint64_t now, } } -bool Peer::resetWithinScope(const RuntimeEnvironment *RR,InetAddress::IpScope scope,uint64_t now) +bool Peer::resetWithinScope(InetAddress::IpScope scope,uint64_t now) { Mutex::Lock _l(_lock); unsigned int np = _numPaths; @@ -340,7 +339,7 @@ bool Peer::resetWithinScope(const RuntimeEnvironment *RR,InetAddress::IpScope sc unsigned int y = 0; while (x < np) { if (_paths[x].address().ipScope() == scope) { - sendHELLO(RR,_paths[x].localAddress(),_paths[x].address(),now); + sendHELLO(_paths[x].localAddress(),_paths[x].address(),now); } else { _paths[y++] = _paths[x]; } @@ -383,7 +382,7 @@ bool Peer::networkMembershipCertificatesAgree(uint64_t nwid,const CertificateOfM return false; } -bool Peer::validateAndSetNetworkMembershipCertificate(const RuntimeEnvironment *RR,uint64_t nwid,const CertificateOfMembership &com) +bool Peer::validateAndSetNetworkMembershipCertificate(uint64_t nwid,const CertificateOfMembership &com) { // Sanity checks if ((!com)||(com.issuedTo() != _id.address())) @@ -448,7 +447,7 @@ bool Peer::needsOurNetworkMembershipCertificate(uint64_t nwid,uint64_t now,bool return ((now - tmp) >= (ZT_NETWORK_AUTOCONF_DELAY / 2)); } -void Peer::clean(const RuntimeEnvironment *RR,uint64_t now) +void Peer::clean(uint64_t now) { Mutex::Lock _l(_lock); @@ -485,6 +484,34 @@ void Peer::clean(const RuntimeEnvironment *RR,uint64_t now) } } +bool Peer::_checkPath(Path &p,const uint64_t now) +{ + // assumes _lock is locked + + if (!p.active(now)) + return false; + + if (p.lastSend() > p.lastReceived()) { + if ((p.lastSend() - p.lastReceived()) >= ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT) { + TRACE("%s(%s) has not answered, checking if dead (probation: %u)",_id.address().toString().c_str(),p.address().toString().c_str(),p.probation()); + + if ( (_vProto >= 5) && ( !((_vMajor == 1)&&(_vMinor == 1)&&(_vRevision == 0)) ) ) { + // 1.1.1 and newer nodes support ECHO, which is smaller -- but 1.1.0 has a bug so use HELLO there too + Packet outp(_id.address(),RR->identity.address(),Packet::VERB_ECHO); + outp.armor(_key,true); + p.send(RR,outp.data(),outp.size(),now); + } else { + sendHELLO(p.localAddress(),p.address(),now); + p.sent(now); + } + + p.increaseProbation(); + } + } + + return true; +} + Path *Peer::_getBestPath(const uint64_t now) { // assumes _lock is locked @@ -492,7 +519,7 @@ Path *Peer::_getBestPath(const uint64_t now) uint64_t bestPathScore = 0; for(unsigned int i=0;i<_numPaths;++i) { const uint64_t score = _paths[i].score(); - if ((score >= bestPathScore)&&(_paths[i].active(now))) { + if ((score >= bestPathScore)&&(_checkPath(_paths[i],now))) { bestPathScore = score; bestPath = &(_paths[i]); } @@ -507,7 +534,7 @@ Path *Peer::_getBestPath(const uint64_t now,int inetAddressFamily) uint64_t bestPathScore = 0; for(unsigned int i=0;i<_numPaths;++i) { const uint64_t score = _paths[i].score(); - if (((int)_paths[i].address().ss_family == inetAddressFamily)&&(score >= bestPathScore)&&(_paths[i].active(now))) { + if (((int)_paths[i].address().ss_family == inetAddressFamily)&&(score >= bestPathScore)&&(_checkPath(_paths[i],now))) { bestPathScore = score; bestPath = &(_paths[i]); } diff --git a/node/Peer.hpp b/node/Peer.hpp index 86635d77..079640db 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -75,12 +75,12 @@ public: /** * Construct a new peer * + * @param renv Runtime environment * @param myIdentity Identity of THIS node (for key agreement) * @param peerIdentity Identity of peer * @throws std::runtime_error Key agreement with peer's identity failed */ - Peer(const Identity &myIdentity,const Identity &peerIdentity) - throw(std::runtime_error); + Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Identity &peerIdentity); /** * @return Time peer record was last used in any way @@ -120,7 +120,6 @@ public: * @param inReVerb Verb in reply to (for OK/ERROR, default: VERB_NOP) */ void received( - const RuntimeEnvironment *RR, const InetAddress &localAddr, const InetAddress &remoteAddr, unsigned int hops, @@ -144,13 +143,12 @@ public: /** * Send via best path * - * @param RR Runtime environment * @param data Packet data * @param len Packet length * @param now Current time * @return Path used on success or NULL on failure */ - inline Path *send(const RuntimeEnvironment *RR,const void *data,unsigned int len,uint64_t now) + inline Path *send(const void *data,unsigned int len,uint64_t now) { Path *const bestPath = getBestPath(now); if (bestPath) { @@ -166,33 +164,30 @@ public: * This does not update any statistics. It's used to send initial HELLOs * for NAT traversal and path verification. * - * @param RR Runtime environment * @param localAddr Local address * @param atAddress Destination address * @param now Current time * @param ttl Desired IP TTL (default: 0 to leave alone) */ - void sendHELLO(const RuntimeEnvironment *RR,const InetAddress &localAddr,const InetAddress &atAddress,uint64_t now,unsigned int ttl = 0); + void sendHELLO(const InetAddress &localAddr,const InetAddress &atAddress,uint64_t now,unsigned int ttl = 0); /** * Send pings or keepalives depending on configured timeouts * - * @param RR Runtime environment * @param now Current time * @param inetAddressFamily Keep this address family alive, or 0 to simply pick current best ignoring family * @return True if at least one direct path seems alive */ - bool doPingAndKeepalive(const RuntimeEnvironment *RR,uint64_t now,int inetAddressFamily); + bool doPingAndKeepalive(uint64_t now,int inetAddressFamily); /** * Push direct paths back to self if we haven't done so in the configured timeout * - * @param RR Runtime environment * @param path Remote path to use to send the push * @param now Current time * @param force If true, push regardless of rate limit */ - void pushDirectPaths(const RuntimeEnvironment *RR,Path *path,uint64_t now,bool force); + void pushDirectPaths(Path *path,uint64_t now,bool force); /** * @return All known direct paths to this peer @@ -324,12 +319,11 @@ public: /** * Reset paths within a given scope * - * @param RR Runtime environment * @param scope IP scope of paths to reset * @param now Current time * @return True if at least one path was forgotten */ - bool resetWithinScope(const RuntimeEnvironment *RR,InetAddress::IpScope scope,uint64_t now); + bool resetWithinScope(InetAddress::IpScope scope,uint64_t now); /** * @return 256-bit secret symmetric encryption key @@ -383,11 +377,10 @@ public: /** * Check the validity of the COM and add/update if valid and new * - * @param RR Runtime Environment * @param nwid Network ID * @param com Externally supplied COM */ - bool validateAndSetNetworkMembershipCertificate(const RuntimeEnvironment *RR,uint64_t nwid,const CertificateOfMembership &com); + bool validateAndSetNetworkMembershipCertificate(uint64_t nwid,const CertificateOfMembership &com); /** * @param nwid Network ID @@ -399,8 +392,10 @@ public: /** * Perform periodic cleaning operations + * + * @param now Current time */ - void clean(const RuntimeEnvironment *RR,uint64_t now); + void clean(uint64_t now); /** * Update direct path push stats and return true if we should respond @@ -503,13 +498,14 @@ public: /** * Create a new Peer from a serialized instance * + * @param renv Runtime environment * @param myIdentity This node's identity * @param b Buffer containing serialized Peer data * @param p Pointer to current position in buffer, will be updated in place as buffer is read (value/result) * @return New instance of Peer or NULL if serialized data was corrupt or otherwise invalid (may also throw an exception via Buffer) */ template - static inline SharedPtr deserializeNew(const Identity &myIdentity,const Buffer &b,unsigned int &p) + static inline SharedPtr deserializeNew(const RuntimeEnvironment *renv,const Identity &myIdentity,const Buffer &b,unsigned int &p) { const unsigned int recSize = b.template at(p); p += 4; if ((p + recSize) > b.size()) @@ -523,7 +519,7 @@ public: if (!npid) return SharedPtr(); - SharedPtr np(new Peer(myIdentity,npid)); + SharedPtr np(new Peer(renv,myIdentity,npid)); np->_lastUsed = b.template at(p); p += 8; np->_lastReceive = b.template at(p); p += 8; @@ -569,11 +565,13 @@ public: } private: + bool _checkPath(Path &p,const uint64_t now); Path *_getBestPath(const uint64_t now); Path *_getBestPath(const uint64_t now,int inetAddressFamily); unsigned char _key[ZT_PEER_SECRET_KEY_LENGTH]; // computed with key agreement, not serialized + const RuntimeEnvironment *RR; uint64_t _lastUsed; uint64_t _lastReceive; // direct or indirect uint64_t _lastUnicastFrame; diff --git a/node/SelfAwareness.cpp b/node/SelfAwareness.cpp index ce75eb03..424a3cad 100644 --- a/node/SelfAwareness.cpp +++ b/node/SelfAwareness.cpp @@ -46,21 +46,19 @@ namespace ZeroTier { class _ResetWithinScope { public: - _ResetWithinScope(const RuntimeEnvironment *renv,uint64_t now,InetAddress::IpScope scope) : - RR(renv), + _ResetWithinScope(uint64_t now,InetAddress::IpScope scope) : _now(now), _scope(scope) {} inline void operator()(Topology &t,const SharedPtr &p) { - if (p->resetWithinScope(RR,_scope,_now)) + if (p->resetWithinScope(_scope,_now)) peersReset.push_back(p); } std::vector< SharedPtr > peersReset; private: - const RuntimeEnvironment *RR; uint64_t _now; InetAddress::IpScope _scope; }; @@ -121,7 +119,7 @@ void SelfAwareness::iam(const Address &reporter,const InetAddress &reporterPhysi } // Reset all paths within this scope - _ResetWithinScope rset(RR,now,(InetAddress::IpScope)scope); + _ResetWithinScope rset(now,(InetAddress::IpScope)scope); RR->topology->eachPeer<_ResetWithinScope &>(rset); // Send a NOP to all peers for whom we forgot a path. This will cause direct diff --git a/node/Switch.cpp b/node/Switch.cpp index a06de17e..a6575836 100644 --- a/node/Switch.cpp +++ b/node/Switch.cpp @@ -408,7 +408,7 @@ bool Switch::unite(const Address &p1,const Address &p2) outp.append(cg.first.rawIpData(),4); } outp.armor(p1p->key(),true); - p1p->send(RR,outp.data(),outp.size(),now); + p1p->send(outp.data(),outp.size(),now); } else { // Tell p2 where to find p1. Packet outp(p2,RR->identity.address(),Packet::VERB_RENDEZVOUS); @@ -423,7 +423,7 @@ bool Switch::unite(const Address &p1,const Address &p2) outp.append(cg.second.rawIpData(),4); } outp.armor(p2p->key(),true); - p2p->send(RR,outp.data(),outp.size(),now); + p2p->send(outp.data(),outp.size(),now); } ++alt; // counts up and also flips LSB } @@ -435,7 +435,7 @@ void Switch::rendezvous(const SharedPtr &peer,const InetAddress &localAddr { TRACE("sending NAT-t message to %s(%s)",peer->address().toString().c_str(),atAddr.toString().c_str()); const uint64_t now = RR->node->now(); - peer->sendHELLO(RR,localAddr,atAddr,now,2); // first attempt: send low-TTL packet to 'open' local NAT + peer->sendHELLO(localAddr,atAddr,now,2); // first attempt: send low-TTL packet to 'open' local NAT { Mutex::Lock _l(_contactQueue_m); _contactQueue.push_back(ContactQueueEntry(peer,now + ZT_NAT_T_TACTICAL_ESCALATION_DELAY,localAddr,atAddr)); @@ -508,14 +508,14 @@ unsigned long Switch::doTimerTasks(uint64_t now) } else { if (qi->strategyIteration == 0) { // First strategy: send packet directly to destination - qi->peer->sendHELLO(RR,qi->localAddr,qi->inaddr,now); + qi->peer->sendHELLO(qi->localAddr,qi->inaddr,now); } else if (qi->strategyIteration <= 3) { // Strategies 1-3: try escalating ports for symmetric NATs that remap sequentially InetAddress tmpaddr(qi->inaddr); int p = (int)qi->inaddr.port() + qi->strategyIteration; if (p < 0xffff) { tmpaddr.setPort((unsigned int)p); - qi->peer->sendHELLO(RR,qi->localAddr,tmpaddr,now); + qi->peer->sendHELLO(qi->localAddr,tmpaddr,now); } else qi->strategyIteration = 5; } else { // All strategies tried, expire entry @@ -619,7 +619,7 @@ void Switch::_handleRemotePacketFragment(const InetAddress &localAddr,const Inet // Note: we don't bother initiating NAT-t for fragments, since heads will set that off. // It wouldn't hurt anything, just redundant and unnecessary. SharedPtr relayTo = RR->topology->getPeer(destination); - if ((!relayTo)||(!relayTo->send(RR,fragment.data(),fragment.size(),RR->node->now()))) { + if ((!relayTo)||(!relayTo->send(fragment.data(),fragment.size(),RR->node->now()))) { #ifdef ZT_ENABLE_CLUSTER if (RR->cluster) { RR->cluster->sendViaCluster(Address(),destination,fragment.data(),fragment.size(),false); @@ -630,7 +630,7 @@ void Switch::_handleRemotePacketFragment(const InetAddress &localAddr,const Inet // Don't know peer or no direct path -- so relay via root server relayTo = RR->topology->getBestRoot(); if (relayTo) - relayTo->send(RR,fragment.data(),fragment.size(),RR->node->now()); + relayTo->send(fragment.data(),fragment.size(),RR->node->now()); } } else { TRACE("dropped relay [fragment](%s) -> %s, max hops exceeded",fromAddr.toString().c_str(),destination.toString().c_str()); @@ -705,7 +705,7 @@ void Switch::_handleRemotePacketHead(const InetAddress &localAddr,const InetAddr packet->incrementHops(); SharedPtr relayTo = RR->topology->getPeer(destination); - if ((relayTo)&&((relayTo->send(RR,packet->data(),packet->size(),now)))) { + if ((relayTo)&&((relayTo->send(packet->data(),packet->size(),now)))) { Mutex::Lock _l(_lastUniteAttempt_m); uint64_t &luts = _lastUniteAttempt[_LastUniteKey(source,destination)]; if ((now - luts) >= ZT_MIN_UNITE_INTERVAL) { @@ -730,7 +730,7 @@ void Switch::_handleRemotePacketHead(const InetAddress &localAddr,const InetAddr relayTo = RR->topology->getBestRoot(&source,1,true); if (relayTo) - relayTo->send(RR,packet->data(),packet->size(),now); + relayTo->send(packet->data(),packet->size(),now); } } else { TRACE("dropped relay %s(%s) -> %s, max hops exceeded",packet->source().toString().c_str(),fromAddr.toString().c_str(),destination.toString().c_str()); @@ -787,7 +787,7 @@ Address Switch::_sendWhoisRequest(const Address &addr,const Address *peersAlread Packet outp(root->address(),RR->identity.address(),Packet::VERB_WHOIS); addr.appendTo(outp); outp.armor(root->key(),true); - if (root->send(RR,outp.data(),outp.size(),RR->node->now())) + if (root->send(outp.data(),outp.size(),RR->node->now())) return root->address(); } return Address(); @@ -841,7 +841,7 @@ bool Switch::_trySend(const Packet &packet,bool encrypt,uint64_t nwid) if ((network)&&(relay)&&(network->isAllowed(peer))) { // Push hints for direct connectivity to this peer if we are relaying - peer->pushDirectPaths(RR,viaPath,now,false); + peer->pushDirectPaths(viaPath,now,false); } Packet tmp(packet); diff --git a/node/Topology.cpp b/node/Topology.cpp index cc18708a..e0592ea5 100644 --- a/node/Topology.cpp +++ b/node/Topology.cpp @@ -67,7 +67,7 @@ Topology::Topology(const RuntimeEnvironment *renv) : ); unsigned int pos = 0; deserializeBuf->copyFrom(all + ptr,reclen + 4); - SharedPtr p(Peer::deserializeNew(RR->identity,*deserializeBuf,pos)); + SharedPtr p(Peer::deserializeNew(RR,RR->identity,*deserializeBuf,pos)); ptr += pos; if (!p) break; // stop if invalid records @@ -180,7 +180,7 @@ SharedPtr Topology::getPeer(const Address &zta) try { Identity id(_getIdentity(zta)); if (id) { - SharedPtr np(new Peer(RR->identity,id)); + SharedPtr np(new Peer(RR,RR->identity,id)); { Mutex::Lock _l(_lock); SharedPtr &ap = _peers[zta]; @@ -327,7 +327,7 @@ void Topology::clean(uint64_t now) if (((now - (*p)->lastUsed()) >= ZT_PEER_IN_MEMORY_EXPIRATION)&&(std::find(_rootAddresses.begin(),_rootAddresses.end(),*a) == _rootAddresses.end())) { _peers.erase(*a); } else { - (*p)->clean(RR,now); + (*p)->clean(now); } } } @@ -361,7 +361,7 @@ void Topology::_setWorld(const World &newWorld) if (rp) { _rootPeers.push_back(*rp); } else { - SharedPtr newrp(new Peer(RR->identity,r->identity)); + SharedPtr newrp(new Peer(RR,RR->identity,r->identity)); _peers.set(r->identity.address(),newrp); _rootPeers.push_back(newrp); } diff --git a/version.h b/version.h index 71d9ca61..24555f51 100644 --- a/version.h +++ b/version.h @@ -41,6 +41,6 @@ /** * Revision */ -#define ZEROTIER_ONE_VERSION_REVISION 2 +#define ZEROTIER_ONE_VERSION_REVISION 3 #endif -- cgit v1.2.3 From 4d94ae77b45e16272fe0c5c685cc20ece5057c32 Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Tue, 5 Jan 2016 16:48:35 -0800 Subject: simplify if --- node/Peer.cpp | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/node/Peer.cpp b/node/Peer.cpp index 0f72be9f..a98d94c4 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -491,22 +491,20 @@ bool Peer::_checkPath(Path &p,const uint64_t now) if (!p.active(now)) return false; - if (p.lastSend() > p.lastReceived()) { - if ((p.lastSend() - p.lastReceived()) >= ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT) { - TRACE("%s(%s) has not answered, checking if dead (probation: %u)",_id.address().toString().c_str(),p.address().toString().c_str(),p.probation()); - - if ( (_vProto >= 5) && ( !((_vMajor == 1)&&(_vMinor == 1)&&(_vRevision == 0)) ) ) { - // 1.1.1 and newer nodes support ECHO, which is smaller -- but 1.1.0 has a bug so use HELLO there too - Packet outp(_id.address(),RR->identity.address(),Packet::VERB_ECHO); - outp.armor(_key,true); - p.send(RR,outp.data(),outp.size(),now); - } else { - sendHELLO(p.localAddress(),p.address(),now); - p.sent(now); - } - - p.increaseProbation(); + if ( (p.lastSend() > p.lastReceived()) && ((p.lastSend() - p.lastReceived()) >= ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT) ) { + TRACE("%s(%s) has not answered, checking if dead (probation: %u)",_id.address().toString().c_str(),p.address().toString().c_str(),p.probation()); + + if ( (_vProto >= 5) && ( !((_vMajor == 1)&&(_vMinor == 1)&&(_vRevision == 0)) ) ) { + // 1.1.1 and newer nodes support ECHO, which is smaller -- but 1.1.0 has a bug so use HELLO there too + Packet outp(_id.address(),RR->identity.address(),Packet::VERB_ECHO); + outp.armor(_key,true); + p.send(RR,outp.data(),outp.size(),now); + } else { + sendHELLO(p.localAddress(),p.address(),now); + p.sent(now); } + + p.increaseProbation(); } return true; -- cgit v1.2.3 From 05b2c0743f1733d2725266ad3249496ed09383a5 Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Wed, 6 Jan 2016 10:00:03 -0800 Subject: Tighten up dead path detection. Should now auto-detect dead paths in less than 10 seconds at a very small cost in ECHO requests (or HELLOs for older peers). GitHib issue #272 --- node/Constants.hpp | 2 +- node/Path.hpp | 43 +++++++++++++++++++++++++++++++++++++------ node/Peer.cpp | 31 +++++++++++++++++++++++++++---- 3 files changed, 65 insertions(+), 11 deletions(-) diff --git a/node/Constants.hpp b/node/Constants.hpp index 1a47112a..a2ba1c1a 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -274,7 +274,7 @@ /** * No answer timeout to trigger dead path detection */ -#define ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT 3000 +#define ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT 2500 /** * Probation threshold after which a path becomes dead diff --git a/node/Path.hpp b/node/Path.hpp index 23e47408..7e0a8ed1 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -66,6 +66,8 @@ class Path public: Path() : _lastSend(0), + _lastPing(0), + _lastKeepalive(0), _lastReceived(0), _addr(), _localAddress(), @@ -76,6 +78,8 @@ public: Path(const InetAddress &localAddress,const InetAddress &addr) : _lastSend(0), + _lastPing(0), + _lastKeepalive(0), _lastReceived(0), _addr(addr), _localAddress(localAddress), @@ -98,10 +102,21 @@ public: * * @param t Time of send */ - inline void sent(uint64_t t) - { - _lastSend = t; - } + inline void sent(uint64_t t) { _lastSend = t; } + + /** + * Called when we've sent a ping or echo + * + * @param t Time of send + */ + inline void pinged(uint64_t t) { _lastPing = t; } + + /** + * Called when we send a NAT keepalive + * + * @param t Time of send + */ + inline void sentKeepalive(uint64_t t) { _lastKeepalive = t; } /** * Called when a packet is received from this remote path @@ -145,6 +160,16 @@ public: */ inline uint64_t lastSend() const throw() { return _lastSend; } + /** + * @return Time we last pinged or dead path checked this link + */ + inline uint64_t lastPing() const throw() { return _lastPing; } + + /** + * @return Time of last keepalive + */ + inline uint64_t lastKeepalive() const throw() { return _lastKeepalive; } + /** * @return Time of last receive from this path */ @@ -260,8 +285,10 @@ public: template inline void serialize(Buffer &b) const { - b.append((uint8_t)1); // version + b.append((uint8_t)2); // version b.append((uint64_t)_lastSend); + b.append((uint64_t)_lastPing); + b.append((uint64_t)_lastKeepalive); b.append((uint64_t)_lastReceived); _addr.serialize(b); _localAddress.serialize(b); @@ -273,9 +300,11 @@ public: inline unsigned int deserialize(const Buffer &b,unsigned int startAt = 0) { unsigned int p = startAt; - if (b[p++] != 1) + if (b[p++] != 2) throw std::invalid_argument("invalid serialized Path"); _lastSend = b.template at(p); p += 8; + _lastPing = b.template at(p); p += 8; + _lastKeepalive = b.template at(p); p += 8; _lastReceived = b.template at(p); p += 8; p += _addr.deserialize(b,p); p += _localAddress.deserialize(b,p); @@ -290,6 +319,8 @@ public: private: uint64_t _lastSend; + uint64_t _lastPing; + uint64_t _lastKeepalive; uint64_t _lastReceived; InetAddress _addr; InetAddress _localAddress; diff --git a/node/Peer.cpp b/node/Peer.cpp index a98d94c4..aff610d5 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -168,7 +168,10 @@ void Peer::received( } else { uint64_t slotLRmin = 0xffffffffffffffffULL; for(unsigned int p=0;paddress().toString().c_str(),now - p->lastSend(),now - p->lastReceived()); sendHELLO(p->localAddress(),p->address(),now); p->sent(now); - } else if (((now - p->lastSend()) >= ZT_NAT_KEEPALIVE_DELAY)&&(!p->reliable())) { + p->pinged(now); + } else if (((now - std::max(p->lastSend(),p->lastKeepalive())) >= ZT_NAT_KEEPALIVE_DELAY)&&(!p->reliable())) { //TRACE("NAT keepalive %s(%s) after %llums/%llums send/receive inactivity",_id.address().toString().c_str(),p->address().toString().c_str(),now - p->lastSend(),now - p->lastReceived()); _natKeepaliveBuf += (uint32_t)((now * 0x9e3779b1) >> 1); // tumble this around to send constantly varying (meaningless) payloads RR->node->putPacket(p->localAddress(),p->address(),&_natKeepaliveBuf,sizeof(_natKeepaliveBuf)); - p->sent(now); + p->sentKeepalive(now); } else { //TRACE("no PING or NAT keepalive: addr==%s reliable==%d %llums/%llums send/receive inactivity",p->address().toString().c_str(),(int)p->reliable(),now - p->lastSend(),now - p->lastReceived()); } @@ -339,6 +343,8 @@ bool Peer::resetWithinScope(InetAddress::IpScope scope,uint64_t now) unsigned int y = 0; while (x < np) { if (_paths[x].address().ipScope() == scope) { + // Resetting a path means sending a HELLO and then forgetting it. If we + // get OK(HELLO) then it will be re-learned. sendHELLO(_paths[x].localAddress(),_paths[x].address(),now); } else { _paths[y++] = _paths[x]; @@ -491,7 +497,22 @@ bool Peer::_checkPath(Path &p,const uint64_t now) if (!p.active(now)) return false; - if ( (p.lastSend() > p.lastReceived()) && ((p.lastSend() - p.lastReceived()) >= ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT) ) { + /* Dead path detection: if we have sent something to this peer and have not + * yet received a reply, double check this path. The majority of outbound + * packets including Ethernet frames do generate some kind of reply either + * immediately or at some point in the near future. This will occasionally + * (every NO_ANSWER_TIMEOUT ms) check paths unnecessarily if traffic that + * does not generate a response is being sent such as multicast announcements + * or frames belonging to unidirectional UDP protocols, but the cost is very + * tiny and the benefit in reliability is very large. This takes care of many + * failure modes including crap NATs that forget links and spurious changes + * to physical network topology that cannot be otherwise detected. + * + * Each time we do this we increment a probation counter in the path. This + * counter is reset on any packet receive over this path. If it reaches the + * MAX_PROBATION threshold the path is considred dead. */ + + if ( (p.lastSend() > p.lastReceived()) && ((p.lastSend() - p.lastReceived()) >= ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT) && ((now - p.lastPing()) >= ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT) ) { TRACE("%s(%s) has not answered, checking if dead (probation: %u)",_id.address().toString().c_str(),p.address().toString().c_str(),p.probation()); if ( (_vProto >= 5) && ( !((_vMajor == 1)&&(_vMinor == 1)&&(_vRevision == 0)) ) ) { @@ -499,9 +520,11 @@ bool Peer::_checkPath(Path &p,const uint64_t now) Packet outp(_id.address(),RR->identity.address(),Packet::VERB_ECHO); outp.armor(_key,true); p.send(RR,outp.data(),outp.size(),now); + p.pinged(now); } else { sendHELLO(p.localAddress(),p.address(),now); p.sent(now); + p.pinged(now); } p.increaseProbation(); -- cgit v1.2.3 From 9aee72099e518636acb243237042049c50dcf483 Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Wed, 6 Jan 2016 10:59:39 -0800 Subject: AntiRecursion cleanup and some other minor things. --- node/AntiRecursion.hpp | 6 ++++-- node/Path.cpp | 2 +- node/Peer.cpp | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/node/AntiRecursion.hpp b/node/AntiRecursion.hpp index 4d9df465..78ee95c2 100644 --- a/node/AntiRecursion.hpp +++ b/node/AntiRecursion.hpp @@ -105,8 +105,9 @@ public: const _ArItem *const end = i + ZT_ANTIRECURSION_HISTORY_SIZE; while (i != end) { #ifdef ZT_NO_TYPE_PUNNING - if (!memcmp(pp,i->tail,32)) + if (!memcmp(pp,i->tail,32)) { return false; + } #else const uint64_t *t = i->tail; const uint64_t *p = reinterpret_cast(pp); @@ -114,8 +115,9 @@ public: bits |= *(t++) ^ *(p++); bits |= *(t++) ^ *(p++); bits |= *t ^ *p; - if (!bits) + if (!bits) { return false; + } #endif ++i; } diff --git a/node/Path.cpp b/node/Path.cpp index e2475751..c67352de 100644 --- a/node/Path.cpp +++ b/node/Path.cpp @@ -34,9 +34,9 @@ namespace ZeroTier { bool Path::send(const RuntimeEnvironment *RR,const void *data,unsigned int len,uint64_t now) { + RR->antiRec->logOutgoingZT(data,len); if (RR->node->putPacket(_localAddress,address(),data,len)) { sent(now); - RR->antiRec->logOutgoingZT(data,len); return true; } return false; diff --git a/node/Peer.cpp b/node/Peer.cpp index aff610d5..1914da97 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -253,7 +253,7 @@ bool Peer::doPingAndKeepalive(uint64_t now,int inetAddressFamily) sendHELLO(p->localAddress(),p->address(),now); p->sent(now); p->pinged(now); - } else if (((now - std::max(p->lastSend(),p->lastKeepalive())) >= ZT_NAT_KEEPALIVE_DELAY)&&(!p->reliable())) { + } else if ( ((now - std::max(p->lastSend(),p->lastKeepalive())) >= ZT_NAT_KEEPALIVE_DELAY) && (!p->reliable()) ) { //TRACE("NAT keepalive %s(%s) after %llums/%llums send/receive inactivity",_id.address().toString().c_str(),p->address().toString().c_str(),now - p->lastSend(),now - p->lastReceived()); _natKeepaliveBuf += (uint32_t)((now * 0x9e3779b1) >> 1); // tumble this around to send constantly varying (meaningless) payloads RR->node->putPacket(p->localAddress(),p->address(),&_natKeepaliveBuf,sizeof(_natKeepaliveBuf)); @@ -513,7 +513,7 @@ bool Peer::_checkPath(Path &p,const uint64_t now) * MAX_PROBATION threshold the path is considred dead. */ if ( (p.lastSend() > p.lastReceived()) && ((p.lastSend() - p.lastReceived()) >= ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT) && ((now - p.lastPing()) >= ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT) ) { - TRACE("%s(%s) has not answered, checking if dead (probation: %u)",_id.address().toString().c_str(),p.address().toString().c_str(),p.probation()); + TRACE("%s(%s) does not seem to be answering in a timely manner, checking if dead (probation == %u)",_id.address().toString().c_str(),p.address().toString().c_str(),p.probation()); if ( (_vProto >= 5) && ( !((_vMajor == 1)&&(_vMinor == 1)&&(_vRevision == 0)) ) ) { // 1.1.1 and newer nodes support ECHO, which is smaller -- but 1.1.0 has a bug so use HELLO there too -- cgit v1.2.3 From 0a3ef38cade03c9b6a4f94611bd3df620ce1f4e6 Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Wed, 6 Jan 2016 11:06:47 -0800 Subject: Put old test code in attic. --- attic/big-http-test/2015-11-10_01_50000.out.xz | Bin 0 -> 730360 bytes attic/big-http-test/2015-11-10_02_50000.out.xz | Bin 0 -> 2373664 bytes .../2015-11-10_03_12500_ec2-east-only.out.xz | Bin 0 -> 802932 bytes attic/big-http-test/Dockerfile | 24 +++ attic/big-http-test/README.md | 12 ++ attic/big-http-test/agent.js | 196 +++++++++++++++++++++ attic/big-http-test/big-test-kill.sh | 9 + attic/big-http-test/big-test-start.sh | 13 ++ attic/big-http-test/crunch-results.js | 65 +++++++ attic/big-http-test/docker-main.sh | 16 ++ attic/big-http-test/nodesource-el.repo | 6 + attic/big-http-test/package.json | 16 ++ attic/big-http-test/server.js | 53 ++++++ tests/http/2015-11-10_01_50000.out.xz | Bin 730360 -> 0 bytes tests/http/2015-11-10_02_50000.out.xz | Bin 2373664 -> 0 bytes .../http/2015-11-10_03_12500_ec2-east-only.out.xz | Bin 802932 -> 0 bytes tests/http/Dockerfile | 24 --- tests/http/README.md | 12 -- tests/http/agent.js | 196 --------------------- tests/http/big-test-kill.sh | 9 - tests/http/big-test-start.sh | 13 -- tests/http/crunch-results.js | 65 ------- tests/http/docker-main.sh | 16 -- tests/http/nodesource-el.repo | 6 - tests/http/package.json | 16 -- tests/http/server.js | 53 ------ 26 files changed, 410 insertions(+), 410 deletions(-) create mode 100644 attic/big-http-test/2015-11-10_01_50000.out.xz create mode 100644 attic/big-http-test/2015-11-10_02_50000.out.xz create mode 100644 attic/big-http-test/2015-11-10_03_12500_ec2-east-only.out.xz create mode 100644 attic/big-http-test/Dockerfile create mode 100644 attic/big-http-test/README.md create mode 100644 attic/big-http-test/agent.js create mode 100755 attic/big-http-test/big-test-kill.sh create mode 100755 attic/big-http-test/big-test-start.sh create mode 100644 attic/big-http-test/crunch-results.js create mode 100755 attic/big-http-test/docker-main.sh create mode 100644 attic/big-http-test/nodesource-el.repo create mode 100644 attic/big-http-test/package.json create mode 100644 attic/big-http-test/server.js delete mode 100644 tests/http/2015-11-10_01_50000.out.xz delete mode 100644 tests/http/2015-11-10_02_50000.out.xz delete mode 100644 tests/http/2015-11-10_03_12500_ec2-east-only.out.xz delete mode 100644 tests/http/Dockerfile delete mode 100644 tests/http/README.md delete mode 100644 tests/http/agent.js delete mode 100755 tests/http/big-test-kill.sh delete mode 100755 tests/http/big-test-start.sh delete mode 100644 tests/http/crunch-results.js delete mode 100755 tests/http/docker-main.sh delete mode 100644 tests/http/nodesource-el.repo delete mode 100644 tests/http/package.json delete mode 100644 tests/http/server.js diff --git a/attic/big-http-test/2015-11-10_01_50000.out.xz b/attic/big-http-test/2015-11-10_01_50000.out.xz new file mode 100644 index 00000000..d3e2a666 Binary files /dev/null and b/attic/big-http-test/2015-11-10_01_50000.out.xz differ diff --git a/attic/big-http-test/2015-11-10_02_50000.out.xz b/attic/big-http-test/2015-11-10_02_50000.out.xz new file mode 100644 index 00000000..0154da79 Binary files /dev/null and b/attic/big-http-test/2015-11-10_02_50000.out.xz differ diff --git a/attic/big-http-test/2015-11-10_03_12500_ec2-east-only.out.xz b/attic/big-http-test/2015-11-10_03_12500_ec2-east-only.out.xz new file mode 100644 index 00000000..3ae3555e Binary files /dev/null and b/attic/big-http-test/2015-11-10_03_12500_ec2-east-only.out.xz differ diff --git a/attic/big-http-test/Dockerfile b/attic/big-http-test/Dockerfile new file mode 100644 index 00000000..e19b3fee --- /dev/null +++ b/attic/big-http-test/Dockerfile @@ -0,0 +1,24 @@ +FROM centos:latest + +MAINTAINER https://www.zerotier.com/ + +EXPOSE 9993/udp + +ADD nodesource-el.repo /etc/yum.repos.d/nodesource-el.repo +RUN yum -y update && yum install -y nodejs && yum clean all + +RUN mkdir -p /var/lib/zerotier-one +RUN mkdir -p /var/lib/zerotier-one/networks.d +RUN touch /var/lib/zerotier-one/networks.d/ffffffffffffffff.conf + +ADD package.json / +RUN npm install + +ADD zerotier-one / +RUN chmod a+x /zerotier-one + +ADD agent.js / +ADD docker-main.sh / +RUN chmod a+x /docker-main.sh + +CMD ["./docker-main.sh"] diff --git a/attic/big-http-test/README.md b/attic/big-http-test/README.md new file mode 100644 index 00000000..23a95605 --- /dev/null +++ b/attic/big-http-test/README.md @@ -0,0 +1,12 @@ +HTTP one-to-all test +====== + +*This is really internal use code. You're free to test it out but expect to do some editing/tweaking to make it work. We used this to run some massive scale tests of our new geo-cluster-based root server infrastructure prior to taking it live.* + +Before using this code you will want to edit agent.js to change SERVER_HOST to the IP address of where you will run server.js. This should typically be an open Internet IP, since this makes reporting not dependent upon the thing being tested. Also note that this thing does no security of any kind. It's designed for one-off tests run over a short period of time, not to be anything that runs permanently. You will also want to edit the Dockerfile if you want to build containers and change the network ID to the network you want to run tests over. + +This code can be deployed across a large number of VMs or containers to test and benchmark HTTP traffic within a virtual network at scale. The agent acts as a server and can query other agents, while the server collects agent data and tells agents about each other. It's designed to use RFC4193-based ZeroTier IPv6 addresses within the cluster, which allows the easy provisioning of a large cluster without IP conflicts. + +The Dockerfile builds an image that launches the agent. The image must be "docker run" with "--device=/dev/net/tun --privileged" to permit it to open a tun/tap device within the container. (Unfortunately CAP_NET_ADMIN may not work due to a bug in Docker and/or Linux.) You can run a bunch with a command like: + + for ((n=0;n<10;n++)); do docker run --device=/dev/net/tun --privileged -d zerotier/http-test; done diff --git a/attic/big-http-test/agent.js b/attic/big-http-test/agent.js new file mode 100644 index 00000000..9ab2e019 --- /dev/null +++ b/attic/big-http-test/agent.js @@ -0,0 +1,196 @@ +// ZeroTier distributed HTTP test agent + +// --------------------------------------------------------------------------- +// Customizable parameters: + +// Time between startup and first test attempt +var TEST_STARTUP_LAG = 10000; + +// Maximum interval between test attempts (actual timing is random % this) +var TEST_INTERVAL_MAX = (60000 * 10); + +// Test timeout in ms +var TEST_TIMEOUT = 30000; + +// Where should I get other agents' IDs and POST results? +var SERVER_HOST = '52.26.196.147'; +var SERVER_PORT = 18080; + +// Which port do agents use to serve up test data to each other? +var AGENT_PORT = 18888; + +// Payload size in bytes +var PAYLOAD_SIZE = 5000; + +// --------------------------------------------------------------------------- + +var ipaddr = require('ipaddr.js'); +var os = require('os'); +var http = require('http'); +var async = require('async'); + +var express = require('express'); +var app = express(); + +// Find our ZeroTier-assigned RFC4193 IPv6 address +var thisAgentId = null; +var interfaces = os.networkInterfaces(); +if (!interfaces) { + console.error('FATAL: os.networkInterfaces() failed.'); + process.exit(1); +} +for(var ifname in interfaces) { + var ifaddrs = interfaces[ifname]; + if (Array.isArray(ifaddrs)) { + for(var i=0;i 1) { + + var target = agents[Math.floor(Math.random() * agents.length)]; + while (target === thisAgentId) + target = agents[Math.floor(Math.random() * agents.length)]; + + var testRequest = null; + var timeoutId = null; + timeoutId = setTimeout(function() { + if (testRequest !== null) + testRequest.abort(); + timeoutId = null; + },TEST_TIMEOUT); + var startTime = Date.now(); + + testRequest = http.get({ + host: agentIdToIp(target), + port: AGENT_PORT, + path: '/' + },function(res) { + var bytes = 0; + res.on('data',function(chunk) { bytes += chunk.length; }); + res.on('end',function() { + lastTestResult = { + source: thisAgentId, + target: target, + time: (Date.now() - startTime), + bytes: bytes, + timedOut: (timeoutId === null), + error: null + }; + if (timeoutId !== null) + clearTimeout(timeoutId); + return setTimeout(doTest,Math.round(Math.random() * TEST_INTERVAL_MAX) + 1); + }); + }).on('error',function(e) { + lastTestResult = { + source: thisAgentId, + target: target, + time: (Date.now() - startTime), + bytes: 0, + timedOut: (timeoutId === null), + error: e.toString() + }; + if (timeoutId !== null) + clearTimeout(timeoutId); + return setTimeout(doTest,Math.round(Math.random() * TEST_INTERVAL_MAX) + 1); + }); + + } else { + return setTimeout(doTest,1000); + } + + }); + }).on('error',function(e) { + console.log('POST failed: '+e.toString()); + return setTimeout(doTest,1000); + }); + if (lastTestResult !== null) { + submit.write(JSON.stringify(lastTestResult)); + lastTestResult = null; + } + submit.end(); +}; + +// Agents just serve up a test payload +app.get('/',function(req,res) { return res.status(200).send(payload); }); + +var expressServer = app.listen(AGENT_PORT,function () { + // Start timeout-based loop + setTimeout(doTest(),TEST_STARTUP_LAG); +}); diff --git a/attic/big-http-test/big-test-kill.sh b/attic/big-http-test/big-test-kill.sh new file mode 100755 index 00000000..fa7f3cc4 --- /dev/null +++ b/attic/big-http-test/big-test-kill.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Kills all running Docker containers on all big-test-hosts + +export PATH=/bin:/usr/bin:/usr/local/bin:/usr/sbin:/sbin + +pssh -h big-test-hosts -x '-t -t' -i -OUserKnownHostsFile=/dev/null -OStrictHostKeyChecking=no -t 0 -p 256 "sudo docker ps -aq | xargs -r sudo docker rm -f" + +exit 0 diff --git a/attic/big-http-test/big-test-start.sh b/attic/big-http-test/big-test-start.sh new file mode 100755 index 00000000..2411eeda --- /dev/null +++ b/attic/big-http-test/big-test-start.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# More than 500 container seems to result in a lot of sporadic failures, probably due to Linux kernel scaling issues with virtual network ports +# 250 with a 16GB RAM VM like Amazon m4.xlarge seems good +NUM_CONTAINERS=250 +CONTAINER_IMAGE=zerotier/http-test +SCALE_UP_DELAY=10 + +export PATH=/bin:/usr/bin:/usr/local/bin:/usr/sbin:/sbin + +pssh -h big-test-hosts -x '-t -t' -i -OUserKnownHostsFile=/dev/null -OStrictHostKeyChecking=no -t 0 -p 256 "sudo sysctl -w net.netfilter.nf_conntrack_max=262144 ; for ((n=0;n<$NUM_CONTAINERS;n++)); do sudo docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE; sleep $SCALE_UP_DELAY; done" + +exit 0 diff --git a/attic/big-http-test/crunch-results.js b/attic/big-http-test/crunch-results.js new file mode 100644 index 00000000..50e5c49a --- /dev/null +++ b/attic/big-http-test/crunch-results.js @@ -0,0 +1,65 @@ +// +// Pipe the output of server.js into this to convert raw test results into bracketed statistics +// suitable for graphing. +// + +// Time duration per statistical bracket +var BRACKET_SIZE = 10000; + +// Number of bytes expected from each test +var EXPECTED_BYTES = 5000; + +var readline = require('readline'); +var rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + terminal: false +}); + +var count = 0.0; +var overallCount = 0.0; +var totalFailures = 0.0; +var totalOverallFailures = 0.0; +var totalMs = 0; +var totalData = 0; +var devices = {}; +var lastBracketTs = 0; + +rl.on('line',function(line) { + line = line.trim(); + var ls = line.split(','); + if (ls.length == 7) { + var ts = parseInt(ls[0]); + var fromId = ls[1]; + var toId = ls[2]; + var ms = parseFloat(ls[3]); + var bytes = parseInt(ls[4]); + var timedOut = (ls[5] == 'true') ? true : false; + var errMsg = ls[6]; + + count += 1.0; + overallCount += 1.0; + if ((bytes !== EXPECTED_BYTES)||(timedOut)) { + totalFailures += 1.0; + totalOverallFailures += 1.0; + } + totalMs += ms; + totalData += bytes; + + devices[fromId] = true; + devices[toId] = true; + + if (lastBracketTs === 0) + lastBracketTs = ts; + + if (((ts - lastBracketTs) >= BRACKET_SIZE)&&(count > 0.0)) { + console.log(count.toString()+','+overallCount.toString()+','+(totalMs / count)+','+(totalFailures / count)+','+(totalOverallFailures / overallCount)+','+totalData+','+Object.keys(devices).length); + + count = 0.0; + totalFailures = 0.0; + totalMs = 0; + totalData = 0; + lastBracketTs = ts; + } + } // else ignore junk +}); diff --git a/attic/big-http-test/docker-main.sh b/attic/big-http-test/docker-main.sh new file mode 100755 index 00000000..29cdced9 --- /dev/null +++ b/attic/big-http-test/docker-main.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +export PATH=/bin:/usr/bin:/usr/local/bin:/sbin:/usr/sbin + +/zerotier-one -d >>zerotier-one.out 2>&1 + +# Wait for ZeroTier to start and join the network +while [ ! -d "/proc/sys/net/ipv6/conf/zt0" ]; do + sleep 0.25 +done + +# Wait just a bit longer for stuff to settle +sleep 5 + +exec node --harmony /agent.js >>agent.out 2>&1 +#exec node --harmony /agent.js diff --git a/attic/big-http-test/nodesource-el.repo b/attic/big-http-test/nodesource-el.repo new file mode 100644 index 00000000..b785d3d0 --- /dev/null +++ b/attic/big-http-test/nodesource-el.repo @@ -0,0 +1,6 @@ +[nodesource] +name=Node.js Packages for Enterprise Linux 7 - $basearch +baseurl=https://rpm.nodesource.com/pub_4.x/el/7/$basearch +failovermethod=priority +enabled=1 +gpgcheck=0 diff --git a/attic/big-http-test/package.json b/attic/big-http-test/package.json new file mode 100644 index 00000000..173a6f99 --- /dev/null +++ b/attic/big-http-test/package.json @@ -0,0 +1,16 @@ +{ + "name": "zerotier-test-http", + "version": "1.0.0", + "description": "ZeroTier in-network HTTP test", + "main": "agent.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "author": "ZeroTier, Inc.", + "license": "GPL-3.0", + "dependencies": { + "async": "^1.5.0", + "express": "^4.13.3", + "ipaddr.js": "^1.0.3" + } +} diff --git a/attic/big-http-test/server.js b/attic/big-http-test/server.js new file mode 100644 index 00000000..629784da --- /dev/null +++ b/attic/big-http-test/server.js @@ -0,0 +1,53 @@ +// ZeroTier distributed HTTP test coordinator and result-reporting server + +// --------------------------------------------------------------------------- +// Customizable parameters: + +var SERVER_PORT = 18080; + +// --------------------------------------------------------------------------- + +var fs = require('fs'); + +var express = require('express'); +var app = express(); + +app.use(function(req,res,next) { + req.rawBody = ''; + req.on('data', function(chunk) { req.rawBody += chunk.toString(); }); + req.on('end', function() { return next(); }); +}); + +var knownAgents = {}; + +app.post('/:agentId',function(req,res) { + var agentId = req.params.agentId; + if ((!agentId)||(agentId.length !== 32)) + return res.status(404).send(''); + + if (req.rawBody) { + var receiveTime = Date.now(); + var resultData = null; + try { + resultData = JSON.parse(req.rawBody); + console.log(Date.now().toString()+','+resultData.source+','+resultData.target+','+resultData.time+','+resultData.bytes+','+resultData.timedOut+',"'+((resultData.error) ? resultData.error : '')+'"'); + } catch (e) {} + } + + knownAgents[agentId] = true; + var thisUpdate = []; + var agents = Object.keys(knownAgents); + if (agents.length < 100) + thisUpdate = agents; + else { + for(var xx=0;xx<100;++xx) + thisUpdate.push(agents[Math.floor(Math.random() * agents.length)]); + } + + return res.status(200).send(JSON.stringify(thisUpdate)); +}); + +var expressServer = app.listen(SERVER_PORT,function () { + console.log('LISTENING ON '+SERVER_PORT); + console.log(''); +}); diff --git a/tests/http/2015-11-10_01_50000.out.xz b/tests/http/2015-11-10_01_50000.out.xz deleted file mode 100644 index d3e2a666..00000000 Binary files a/tests/http/2015-11-10_01_50000.out.xz and /dev/null differ diff --git a/tests/http/2015-11-10_02_50000.out.xz b/tests/http/2015-11-10_02_50000.out.xz deleted file mode 100644 index 0154da79..00000000 Binary files a/tests/http/2015-11-10_02_50000.out.xz and /dev/null differ diff --git a/tests/http/2015-11-10_03_12500_ec2-east-only.out.xz b/tests/http/2015-11-10_03_12500_ec2-east-only.out.xz deleted file mode 100644 index 3ae3555e..00000000 Binary files a/tests/http/2015-11-10_03_12500_ec2-east-only.out.xz and /dev/null differ diff --git a/tests/http/Dockerfile b/tests/http/Dockerfile deleted file mode 100644 index e19b3fee..00000000 --- a/tests/http/Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -FROM centos:latest - -MAINTAINER https://www.zerotier.com/ - -EXPOSE 9993/udp - -ADD nodesource-el.repo /etc/yum.repos.d/nodesource-el.repo -RUN yum -y update && yum install -y nodejs && yum clean all - -RUN mkdir -p /var/lib/zerotier-one -RUN mkdir -p /var/lib/zerotier-one/networks.d -RUN touch /var/lib/zerotier-one/networks.d/ffffffffffffffff.conf - -ADD package.json / -RUN npm install - -ADD zerotier-one / -RUN chmod a+x /zerotier-one - -ADD agent.js / -ADD docker-main.sh / -RUN chmod a+x /docker-main.sh - -CMD ["./docker-main.sh"] diff --git a/tests/http/README.md b/tests/http/README.md deleted file mode 100644 index 23a95605..00000000 --- a/tests/http/README.md +++ /dev/null @@ -1,12 +0,0 @@ -HTTP one-to-all test -====== - -*This is really internal use code. You're free to test it out but expect to do some editing/tweaking to make it work. We used this to run some massive scale tests of our new geo-cluster-based root server infrastructure prior to taking it live.* - -Before using this code you will want to edit agent.js to change SERVER_HOST to the IP address of where you will run server.js. This should typically be an open Internet IP, since this makes reporting not dependent upon the thing being tested. Also note that this thing does no security of any kind. It's designed for one-off tests run over a short period of time, not to be anything that runs permanently. You will also want to edit the Dockerfile if you want to build containers and change the network ID to the network you want to run tests over. - -This code can be deployed across a large number of VMs or containers to test and benchmark HTTP traffic within a virtual network at scale. The agent acts as a server and can query other agents, while the server collects agent data and tells agents about each other. It's designed to use RFC4193-based ZeroTier IPv6 addresses within the cluster, which allows the easy provisioning of a large cluster without IP conflicts. - -The Dockerfile builds an image that launches the agent. The image must be "docker run" with "--device=/dev/net/tun --privileged" to permit it to open a tun/tap device within the container. (Unfortunately CAP_NET_ADMIN may not work due to a bug in Docker and/or Linux.) You can run a bunch with a command like: - - for ((n=0;n<10;n++)); do docker run --device=/dev/net/tun --privileged -d zerotier/http-test; done diff --git a/tests/http/agent.js b/tests/http/agent.js deleted file mode 100644 index 9ab2e019..00000000 --- a/tests/http/agent.js +++ /dev/null @@ -1,196 +0,0 @@ -// ZeroTier distributed HTTP test agent - -// --------------------------------------------------------------------------- -// Customizable parameters: - -// Time between startup and first test attempt -var TEST_STARTUP_LAG = 10000; - -// Maximum interval between test attempts (actual timing is random % this) -var TEST_INTERVAL_MAX = (60000 * 10); - -// Test timeout in ms -var TEST_TIMEOUT = 30000; - -// Where should I get other agents' IDs and POST results? -var SERVER_HOST = '52.26.196.147'; -var SERVER_PORT = 18080; - -// Which port do agents use to serve up test data to each other? -var AGENT_PORT = 18888; - -// Payload size in bytes -var PAYLOAD_SIZE = 5000; - -// --------------------------------------------------------------------------- - -var ipaddr = require('ipaddr.js'); -var os = require('os'); -var http = require('http'); -var async = require('async'); - -var express = require('express'); -var app = express(); - -// Find our ZeroTier-assigned RFC4193 IPv6 address -var thisAgentId = null; -var interfaces = os.networkInterfaces(); -if (!interfaces) { - console.error('FATAL: os.networkInterfaces() failed.'); - process.exit(1); -} -for(var ifname in interfaces) { - var ifaddrs = interfaces[ifname]; - if (Array.isArray(ifaddrs)) { - for(var i=0;i 1) { - - var target = agents[Math.floor(Math.random() * agents.length)]; - while (target === thisAgentId) - target = agents[Math.floor(Math.random() * agents.length)]; - - var testRequest = null; - var timeoutId = null; - timeoutId = setTimeout(function() { - if (testRequest !== null) - testRequest.abort(); - timeoutId = null; - },TEST_TIMEOUT); - var startTime = Date.now(); - - testRequest = http.get({ - host: agentIdToIp(target), - port: AGENT_PORT, - path: '/' - },function(res) { - var bytes = 0; - res.on('data',function(chunk) { bytes += chunk.length; }); - res.on('end',function() { - lastTestResult = { - source: thisAgentId, - target: target, - time: (Date.now() - startTime), - bytes: bytes, - timedOut: (timeoutId === null), - error: null - }; - if (timeoutId !== null) - clearTimeout(timeoutId); - return setTimeout(doTest,Math.round(Math.random() * TEST_INTERVAL_MAX) + 1); - }); - }).on('error',function(e) { - lastTestResult = { - source: thisAgentId, - target: target, - time: (Date.now() - startTime), - bytes: 0, - timedOut: (timeoutId === null), - error: e.toString() - }; - if (timeoutId !== null) - clearTimeout(timeoutId); - return setTimeout(doTest,Math.round(Math.random() * TEST_INTERVAL_MAX) + 1); - }); - - } else { - return setTimeout(doTest,1000); - } - - }); - }).on('error',function(e) { - console.log('POST failed: '+e.toString()); - return setTimeout(doTest,1000); - }); - if (lastTestResult !== null) { - submit.write(JSON.stringify(lastTestResult)); - lastTestResult = null; - } - submit.end(); -}; - -// Agents just serve up a test payload -app.get('/',function(req,res) { return res.status(200).send(payload); }); - -var expressServer = app.listen(AGENT_PORT,function () { - // Start timeout-based loop - setTimeout(doTest(),TEST_STARTUP_LAG); -}); diff --git a/tests/http/big-test-kill.sh b/tests/http/big-test-kill.sh deleted file mode 100755 index fa7f3cc4..00000000 --- a/tests/http/big-test-kill.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -# Kills all running Docker containers on all big-test-hosts - -export PATH=/bin:/usr/bin:/usr/local/bin:/usr/sbin:/sbin - -pssh -h big-test-hosts -x '-t -t' -i -OUserKnownHostsFile=/dev/null -OStrictHostKeyChecking=no -t 0 -p 256 "sudo docker ps -aq | xargs -r sudo docker rm -f" - -exit 0 diff --git a/tests/http/big-test-start.sh b/tests/http/big-test-start.sh deleted file mode 100755 index 2411eeda..00000000 --- a/tests/http/big-test-start.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -# More than 500 container seems to result in a lot of sporadic failures, probably due to Linux kernel scaling issues with virtual network ports -# 250 with a 16GB RAM VM like Amazon m4.xlarge seems good -NUM_CONTAINERS=250 -CONTAINER_IMAGE=zerotier/http-test -SCALE_UP_DELAY=10 - -export PATH=/bin:/usr/bin:/usr/local/bin:/usr/sbin:/sbin - -pssh -h big-test-hosts -x '-t -t' -i -OUserKnownHostsFile=/dev/null -OStrictHostKeyChecking=no -t 0 -p 256 "sudo sysctl -w net.netfilter.nf_conntrack_max=262144 ; for ((n=0;n<$NUM_CONTAINERS;n++)); do sudo docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE; sleep $SCALE_UP_DELAY; done" - -exit 0 diff --git a/tests/http/crunch-results.js b/tests/http/crunch-results.js deleted file mode 100644 index 50e5c49a..00000000 --- a/tests/http/crunch-results.js +++ /dev/null @@ -1,65 +0,0 @@ -// -// Pipe the output of server.js into this to convert raw test results into bracketed statistics -// suitable for graphing. -// - -// Time duration per statistical bracket -var BRACKET_SIZE = 10000; - -// Number of bytes expected from each test -var EXPECTED_BYTES = 5000; - -var readline = require('readline'); -var rl = readline.createInterface({ - input: process.stdin, - output: process.stdout, - terminal: false -}); - -var count = 0.0; -var overallCount = 0.0; -var totalFailures = 0.0; -var totalOverallFailures = 0.0; -var totalMs = 0; -var totalData = 0; -var devices = {}; -var lastBracketTs = 0; - -rl.on('line',function(line) { - line = line.trim(); - var ls = line.split(','); - if (ls.length == 7) { - var ts = parseInt(ls[0]); - var fromId = ls[1]; - var toId = ls[2]; - var ms = parseFloat(ls[3]); - var bytes = parseInt(ls[4]); - var timedOut = (ls[5] == 'true') ? true : false; - var errMsg = ls[6]; - - count += 1.0; - overallCount += 1.0; - if ((bytes !== EXPECTED_BYTES)||(timedOut)) { - totalFailures += 1.0; - totalOverallFailures += 1.0; - } - totalMs += ms; - totalData += bytes; - - devices[fromId] = true; - devices[toId] = true; - - if (lastBracketTs === 0) - lastBracketTs = ts; - - if (((ts - lastBracketTs) >= BRACKET_SIZE)&&(count > 0.0)) { - console.log(count.toString()+','+overallCount.toString()+','+(totalMs / count)+','+(totalFailures / count)+','+(totalOverallFailures / overallCount)+','+totalData+','+Object.keys(devices).length); - - count = 0.0; - totalFailures = 0.0; - totalMs = 0; - totalData = 0; - lastBracketTs = ts; - } - } // else ignore junk -}); diff --git a/tests/http/docker-main.sh b/tests/http/docker-main.sh deleted file mode 100755 index 29cdced9..00000000 --- a/tests/http/docker-main.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -export PATH=/bin:/usr/bin:/usr/local/bin:/sbin:/usr/sbin - -/zerotier-one -d >>zerotier-one.out 2>&1 - -# Wait for ZeroTier to start and join the network -while [ ! -d "/proc/sys/net/ipv6/conf/zt0" ]; do - sleep 0.25 -done - -# Wait just a bit longer for stuff to settle -sleep 5 - -exec node --harmony /agent.js >>agent.out 2>&1 -#exec node --harmony /agent.js diff --git a/tests/http/nodesource-el.repo b/tests/http/nodesource-el.repo deleted file mode 100644 index b785d3d0..00000000 --- a/tests/http/nodesource-el.repo +++ /dev/null @@ -1,6 +0,0 @@ -[nodesource] -name=Node.js Packages for Enterprise Linux 7 - $basearch -baseurl=https://rpm.nodesource.com/pub_4.x/el/7/$basearch -failovermethod=priority -enabled=1 -gpgcheck=0 diff --git a/tests/http/package.json b/tests/http/package.json deleted file mode 100644 index 173a6f99..00000000 --- a/tests/http/package.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "name": "zerotier-test-http", - "version": "1.0.0", - "description": "ZeroTier in-network HTTP test", - "main": "agent.js", - "scripts": { - "test": "echo \"Error: no test specified\" && exit 1" - }, - "author": "ZeroTier, Inc.", - "license": "GPL-3.0", - "dependencies": { - "async": "^1.5.0", - "express": "^4.13.3", - "ipaddr.js": "^1.0.3" - } -} diff --git a/tests/http/server.js b/tests/http/server.js deleted file mode 100644 index 629784da..00000000 --- a/tests/http/server.js +++ /dev/null @@ -1,53 +0,0 @@ -// ZeroTier distributed HTTP test coordinator and result-reporting server - -// --------------------------------------------------------------------------- -// Customizable parameters: - -var SERVER_PORT = 18080; - -// --------------------------------------------------------------------------- - -var fs = require('fs'); - -var express = require('express'); -var app = express(); - -app.use(function(req,res,next) { - req.rawBody = ''; - req.on('data', function(chunk) { req.rawBody += chunk.toString(); }); - req.on('end', function() { return next(); }); -}); - -var knownAgents = {}; - -app.post('/:agentId',function(req,res) { - var agentId = req.params.agentId; - if ((!agentId)||(agentId.length !== 32)) - return res.status(404).send(''); - - if (req.rawBody) { - var receiveTime = Date.now(); - var resultData = null; - try { - resultData = JSON.parse(req.rawBody); - console.log(Date.now().toString()+','+resultData.source+','+resultData.target+','+resultData.time+','+resultData.bytes+','+resultData.timedOut+',"'+((resultData.error) ? resultData.error : '')+'"'); - } catch (e) {} - } - - knownAgents[agentId] = true; - var thisUpdate = []; - var agents = Object.keys(knownAgents); - if (agents.length < 100) - thisUpdate = agents; - else { - for(var xx=0;xx<100;++xx) - thisUpdate.push(agents[Math.floor(Math.random() * agents.length)]); - } - - return res.status(200).send(JSON.stringify(thisUpdate)); -}); - -var expressServer = app.listen(SERVER_PORT,function () { - console.log('LISTENING ON '+SERVER_PORT); - console.log(''); -}); -- cgit v1.2.3