diff options
-rwxr-xr-x | .gitignore | 14 | ||||
-rwxr-xr-x | cluster-geo/cluster-geo.exe | 13 | ||||
-rw-r--r-- | cluster-geo/cluster-geo/config.js.sample | 7 | ||||
-rw-r--r-- | cluster-geo/cluster-geo/index.js | 94 | ||||
-rw-r--r-- | cluster-geo/cluster-geo/package.json | 16 | ||||
-rw-r--r-- | include/ZeroTierOne.h | 121 | ||||
-rw-r--r-- | make-mac.mk | 2 | ||||
-rw-r--r-- | node/Cluster.cpp | 366 | ||||
-rw-r--r-- | node/Cluster.hpp | 133 | ||||
-rw-r--r-- | node/IncomingPacket.cpp | 43 | ||||
-rw-r--r-- | node/Node.cpp | 155 | ||||
-rw-r--r-- | node/Node.hpp | 14 | ||||
-rw-r--r-- | node/Peer.cpp | 19 | ||||
-rw-r--r-- | node/RuntimeEnvironment.hpp | 24 | ||||
-rw-r--r-- | node/Switch.cpp | 12 | ||||
-rw-r--r-- | node/Topology.cpp | 27 |
16 files changed, 897 insertions, 163 deletions
@@ -49,23 +49,17 @@ Thumbs.db *.rpm *.autosave *.tmp +node_modules -# Root topology build files, temporaries, and never check in secrets -/root-topology/bin2c -/root-topology/mktopology -/root-topology/*.secret -/root-topology/test/supernodes -/root-topology/test/test-root-topology +# cluster-geo stuff +cluster-geo/cluster-geo/config.js +cluster-geo/cluster-geo/cache.* # MacGap wrapper build files /ext/mac-ui-macgap1-wrapper/src/MacGap.xcodeproj/project.xcworkspace/xcuserdata/* /ext/mac-ui-macgap1-wrapper/src/MacGap.xcodeproj/xcuserdata/* /ext/mac-ui-macgap1-wrapper/src/build -# Web UI dev temporaries -/ui/.module-cache -node_modules - # Java/Android/JNI build droppings java/obj/ java/libs/ diff --git a/cluster-geo/cluster-geo.exe b/cluster-geo/cluster-geo.exe new file mode 100755 index 00000000..ae720610 --- /dev/null +++ b/cluster-geo/cluster-geo.exe @@ -0,0 +1,13 @@ +#!/bin/bash + +export PATH=/bin:/usr/bin:/usr/local/bin:/sbin:/usr/sbin + +cd `dirname $0` +if [ ! -d cluster-geo -o ! -f cluster-geo/index.js ]; then + echo 'Cannot find ./cluster-geo containing NodeJS script files.' + exit 1 +fi + +cd cluster-geo + +exec node index.js diff --git a/cluster-geo/cluster-geo/config.js.sample b/cluster-geo/cluster-geo/config.js.sample new file mode 100644 index 00000000..ec1ebfea --- /dev/null +++ b/cluster-geo/cluster-geo/config.js.sample @@ -0,0 +1,7 @@ +// MaxMind GeoIP2 config +module.exports.maxmind = { + userId: 1234, + licenseKey: 'asdf', + service: 'city', + requestTimeout: 1000 +}; diff --git a/cluster-geo/cluster-geo/index.js b/cluster-geo/cluster-geo/index.js new file mode 100644 index 00000000..0e903ade --- /dev/null +++ b/cluster-geo/cluster-geo/index.js @@ -0,0 +1,94 @@ +// +// GeoIP lookup service +// + +// GeoIP cache TTL in ms +var CACHE_TTL = (60 * 60 * 24 * 60 * 1000); // 60 days + +var config = require(__dirname + '/config.js'); + +if (!config.maxmind) { + console.error('FATAL: only MaxMind GeoIP2 is currently supported and is not configured in config.js'); + process.exit(1); +} +var geo = require('geoip2ws')(config.maxmind); + +var cache = require('levelup')(__dirname + '/cache.leveldb'); + +function lookup(ip,callback) +{ + cache.get(ip,function(err,cachedEntryJson) { + if ((!err)&&(cachedEntryJson)) { + try { + var cachedEntry = JSON.parse(cachedEntryJson.toString()); + if (cachedEntry) { + var ts = cachedEntry.ts; + var r = cachedEntry.r; + if ((ts)&&(r)) { + if ((Date.now() - ts) < CACHE_TTL) { + r._cached = true; + return callback(null,r); + } + } + } + } catch (e) {} + } + + geo(ip,function(err,result) { + if (err) + return callback(err,null); + if ((!result)||(!result.location)) + return callback(new Error('null result'),null); + + cache.put(ip,JSON.stringify({ + ts: Date.now(), + r: result + }),function(err) { + if (err) + console.error('Error saving to cache: '+err); + return callback(null,result); + }); + }); + }); +}; + +var linebuf = ''; +process.stdin.on('readable',function() { + var chunk; + while (null !== (chunk = process.stdin.read())) { + for(var i=0;i<chunk.length;++i) { + var c = chunk[i]; + if ((c == 0x0d)||(c == 0x0a)) { + if (linebuf.length > 0) { + var ip = linebuf; + lookup(ip,function(err,result) { + if ((err)||(!result)||(!result.location)) { + return process.stdout.write(ip+',0,0,0,0,0,0\n'); + } else { + var lat = parseFloat(result.location.latitude); + var lon = parseFloat(result.location.longitude); + + // Convert to X,Y,Z coordinates from Earth's origin, Earth-as-sphere approximation. + var latRadians = lat * 0.01745329251994; // PI / 180 + var lonRadians = lon * 0.01745329251994; // PI / 180 + var cosLat = Math.cos(latRadians); + var x = Math.round((-6371.0) * cosLat * Math.cos(lonRadians)); // 6371 == Earth's approximate radius in kilometers + var y = Math.round(6371.0 * Math.sin(latRadians)); + var z = Math.round(6371.0 * cosLat * Math.sin(lonRadians)); + + return process.stdout.write(ip+',1,'+lat+','+lon+','+x+','+y+','+z+'\n'); + } + }); + } + linebuf = ''; + } else { + linebuf += String.fromCharCode(c); + } + } + } +}); + +process.stdin.on('end',function() { + cache.close(); + process.exit(0); +}); diff --git a/cluster-geo/cluster-geo/package.json b/cluster-geo/cluster-geo/package.json new file mode 100644 index 00000000..1927197e --- /dev/null +++ b/cluster-geo/cluster-geo/package.json @@ -0,0 +1,16 @@ +{ + "name": "cluster-geo", + "version": "1.0.0", + "description": "Cluster GEO-IP Query Service", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "author": "ZeroTier, Inc.", + "license": "GPL-3.0", + "dependencies": { + "geoip2ws": "^1.7.1", + "leveldown": "^1.4.2", + "levelup": "^1.2.1" + } +} diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index 42c904eb..135c8e11 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -129,6 +129,16 @@ extern "C" { #define ZT_CIRCUIT_TEST_MAX_HOP_BREADTH 256 /** + * Maximum number of cluster members (and max member ID plus one) + */ +#define ZT_CLUSTER_MAX_MEMBERS 256 + +/** + * Maximum allowed cluster message length in bytes + */ +#define ZT_CLUSTER_MAX_MESSAGE_LENGTH 65535 + +/** * A null/empty sockaddr (all zero) to signify an unspecified socket address */ extern const struct sockaddr_storage ZT_SOCKADDR_NULL; @@ -174,7 +184,17 @@ enum ZT_ResultCode /** * Network ID not valid */ - ZT_RESULT_ERROR_NETWORK_NOT_FOUND = 1000 + ZT_RESULT_ERROR_NETWORK_NOT_FOUND = 1000, + + /** + * The requested operation is not supported on this version or build + */ + ZT_RESULT_ERROR_UNSUPPORTED_OPERATION = 1001, + + /** + * The requestion operation was given a bad parameter or was called in an invalid state + */ + ZT_RESULT_ERROR_BAD_PARAMETER = 1002 }; /** @@ -1321,6 +1341,105 @@ enum ZT_ResultCode ZT_Node_circuitTestBegin(ZT_Node *node,ZT_CircuitTest *test,v void ZT_Node_circuitTestEnd(ZT_Node *node,ZT_CircuitTest *test); /** + * Initialize cluster operation + * + * This initializes the internal structures and state for cluster operation. + * It takes two function pointers. The first is to a function that can be + * used to send data to cluster peers (mechanism is not defined by Node), + * and the second is to a function that can be used to get the location of + * a physical address in X,Y,Z coordinate space (e.g. as cartesian coordinates + * projected from the center of the Earth). + * + * Send function takes an arbitrary pointer followed by the cluster member ID + * to send data to, a pointer to the data, and the length of the data. The + * maximum message length is ZT_CLUSTER_MAX_MESSAGE_LENGTH (65535). Messages + * must be delivered whole and may be dropped or transposed, though high + * failure rates are undesirable and can cause problems. Validity checking or + * CRC is also not required since the Node validates the authenticity of + * cluster messages using cryptogrphic methods and will silently drop invalid + * messages. + * + * Address to location function is optional and if NULL geo-handoff is not + * enabled (in this case x, y, and z in clusterInit are also unused). It + * takes an arbitrary pointer followed by a physical address and three result + * parameters for x, y, and z. It returns zero on failure or nonzero if these + * three coordinates have been set. Coordinate space is arbitrary and can be + * e.g. coordinates on Earth relative to Earth's center. These can be obtained + * from latitutde and longitude with versions of the Haversine formula. + * + * See: http://stackoverflow.com/questions/1185408/converting-from-longitude-latitude-to-cartesian-coordinates + * + * Neither the send nor the address to location function should block. If the + * address to location function does not have a location for an address, it + * should return zero and then look up the address for future use since it + * will be called again in (typically) 1-3 minutes. + * + * Note that both functions can be called from any thread from which the + * various Node functions are called, and so must be thread safe if multiple + * threads are being used. + * + * @param node Node instance + * @param myId My cluster member ID (less than or equal to ZT_CLUSTER_MAX_MEMBERS) + * @param zeroTierPhysicalEndpoints Preferred physical address(es) for ZeroTier clients to contact this cluster member (for peer redirect) + * @param numZeroTierPhysicalEndpoints Number of physical endpoints in zeroTierPhysicalEndpoints[] (max allowed: 255) + * @param x My cluster member's X location + * @param y My cluster member's Y location + * @param z My cluster member's Z location + * @param sendFunction Function to be called to send data to other cluster members + * @param sendFunctionArg First argument to sendFunction() + * @param addressToLocationFunction Function to be called to get the location of a physical address or NULL to disable geo-handoff + * @param addressToLocationFunctionArg First argument to addressToLocationFunction() + * @return OK or UNSUPPORTED_OPERATION if this Node was not built with cluster support + */ +enum ZT_ResultCode ZT_Node_clusterInit( + ZT_Node *node, + unsigned int myId, + const struct sockaddr_storage *zeroTierPhysicalEndpoints, + unsigned int numZeroTierPhysicalEndpoints, + int x, + int y, + int z, + void (*sendFunction)(void *,unsigned int,const void *,unsigned int), + void *sendFunctionArg, + int (*addressToLocationFunction)(void *,const struct sockaddr_storage *,int *,int *,int *), + void *addressToLocationFunctionArg); + +/** + * Add a member to this cluster + * + * Calling this without having called clusterInit() will do nothing. + * + * @param node Node instance + * @param memberId Member ID (must be less than or equal to ZT_CLUSTER_MAX_MEMBERS) + * @return OK or error if clustering is disabled, ID invalid, etc. + */ +enum ZT_ResultCode ZT_Node_clusterAddMember(ZT_Node *node,unsigned int memberId); + +/** + * Remove a member from this cluster + * + * Calling this without having called clusterInit() will do nothing. + * + * @param node Node instance + * @param memberId Member ID to remove (nothing happens if not present) + */ +void ZT_Node_clusterRemoveMember(ZT_Node *node,unsigned int memberId); + +/** + * Handle an incoming cluster state message + * + * The message itself contains cluster member IDs, and invalid or badly + * addressed messages will be silently discarded. + * + * Calling this without having called clusterInit() will do nothing. + * + * @param node Node instance + * @param msg Cluster message + * @param len Length of cluster message + */ +void ZT_Node_clusterHandleIncomingMessage(ZT_Node *node,const void *msg,unsigned int len); + +/** * Get ZeroTier One version * * @param major Result: major version diff --git a/make-mac.mk b/make-mac.mk index 174216fb..e53212c0 100644 --- a/make-mac.mk +++ b/make-mac.mk @@ -6,7 +6,7 @@ ifeq ($(origin CXX),default) endif INCLUDES= -DEFS= +DEFS=-DZT_ENABLE_CLUSTER LIBS= ARCH_FLAGS=-arch x86_64 diff --git a/node/Cluster.cpp b/node/Cluster.cpp index bfa39d22..d9514db5 100644 --- a/node/Cluster.cpp +++ b/node/Cluster.cpp @@ -31,10 +31,13 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <math.h> #include <algorithm> #include <utility> +#include "../version.h" + #include "Cluster.hpp" #include "RuntimeEnvironment.hpp" #include "MulticastGroup.hpp" @@ -42,22 +45,44 @@ #include "Salsa20.hpp" #include "Poly1305.hpp" #include "Packet.hpp" +#include "Identity.hpp" #include "Peer.hpp" #include "Switch.hpp" #include "Node.hpp" namespace ZeroTier { -Cluster::Cluster(const RuntimeEnvironment *renv,uint16_t id,DistanceAlgorithm da,int32_t x,int32_t y,int32_t z,void (*sendFunction)(void *,uint16_t,const void *,unsigned int),void *arg) : +static inline double _dist3d(int x1,int y1,int z1,int x2,int y2,int z2) + throw() +{ + double dx = ((double)x2 - (double)x1); + double dy = ((double)y2 - (double)y1); + double dz = ((double)z2 - (double)z1); + return sqrt((dx * dx) + (dy * dy) + (dz * dz)); +} + +Cluster::Cluster( + const RuntimeEnvironment *renv, + uint16_t id, + const std::vector<InetAddress> &zeroTierPhysicalEndpoints, + int32_t x, + int32_t y, + int32_t z, + void (*sendFunction)(void *,unsigned int,const void *,unsigned int), + void *sendFunctionArg, + int (*addressToLocationFunction)(void *,const struct sockaddr_storage *,int *,int *,int *), + void *addressToLocationFunctionArg) : RR(renv), _sendFunction(sendFunction), - _arg(arg), + _sendFunctionArg(sendFunctionArg), + _addressToLocationFunction(addressToLocationFunction), + _addressToLocationFunctionArg(addressToLocationFunctionArg), _x(x), _y(y), _z(z), - _da(da), _id(id), - _members(new _Member[65536]) + _zeroTierPhysicalEndpoints(zeroTierPhysicalEndpoints), + _members(new _Member[ZT_CLUSTER_MAX_MEMBERS]) { uint16_t stmp[ZT_SHA512_DIGEST_LEN / sizeof(uint16_t)]; @@ -114,16 +139,20 @@ void Cluster::handleIncomingStateMessage(const void *msg,unsigned int len) s20.decrypt12(reinterpret_cast<const char *>(msg) + 24,const_cast<void *>(dmsg.data()),dmsg.size()); } - if (dmsg.size() < 2) + if (dmsg.size() < 4) return; const uint16_t fromMemberId = dmsg.at<uint16_t>(0); unsigned int ptr = 2; + if (fromMemberId == _id) + return; + const uint16_t toMemberId = dmsg.at<uint16_t>(ptr); + ptr += 2; + if (toMemberId != _id) + return; _Member &m = _members[fromMemberId]; Mutex::Lock mlck(m.lock); - m.lastReceivedFrom = RR->node->now(); - try { while (ptr < dmsg.size()) { const unsigned int mlen = dmsg.at<uint16_t>(ptr); ptr += 2; @@ -143,31 +172,51 @@ void Cluster::handleIncomingStateMessage(const void *msg,unsigned int len) ptr += 8; // skip local clock, not used m.load = dmsg.at<uint64_t>(ptr); ptr += 8; ptr += 8; // skip flags, unused - m.physicalAddressCount = dmsg[ptr++]; - if (m.physicalAddressCount > ZT_CLUSTER_MEMBER_MAX_PHYSICAL_ADDRS) - m.physicalAddressCount = ZT_CLUSTER_MEMBER_MAX_PHYSICAL_ADDRS; - for(unsigned int i=0;i<m.physicalAddressCount;++i) - ptr += m.physicalAddresses[i].deserialize(dmsg,ptr); +#ifdef ZT_TRACE + std::string addrs; +#endif + unsigned int physicalAddressCount = dmsg[ptr++]; + for(unsigned int i=0;i<physicalAddressCount;++i) { + m.zeroTierPhysicalEndpoints.push_back(InetAddress()); + ptr += m.zeroTierPhysicalEndpoints.back().deserialize(dmsg,ptr); + if (!(m.zeroTierPhysicalEndpoints.back())) { + m.zeroTierPhysicalEndpoints.pop_back(); + } +#ifdef ZT_TRACE + else { + if (addrs.length() > 0) + addrs.push_back(','); + addrs.append(m.zeroTierPhysicalEndpoints.back().toString()); + } +#endif + } m.lastReceivedAliveAnnouncement = RR->node->now(); +#ifdef ZT_TRACE + TRACE("[%u] I'm alive! send me peers at %s",(unsigned int)fromMemberId,addrs.c_str()); +#endif } break; case STATE_MESSAGE_HAVE_PEER: { try { Identity id; ptr += id.deserialize(dmsg,ptr); - RR->topology->saveIdentity(id); - - { // Add or update peer affinity entry - _PeerAffinity pa(id.address(),fromMemberId,RR->node->now()); - Mutex::Lock _l2(_peerAffinities_m); - std::vector<_PeerAffinity>::iterator i(std::lower_bound(_peerAffinities.begin(),_peerAffinities.end(),pa)); // O(log(n)) - if ((i != _peerAffinities.end())&&(i->key == pa.key)) { - i->timestamp = pa.timestamp; - } else { - _peerAffinities.push_back(pa); - std::sort(_peerAffinities.begin(),_peerAffinities.end()); // probably a more efficient way to insert but okay for now - } - } + if (id) { + RR->topology->saveIdentity(id); + + { // Add or update peer affinity entry + _PeerAffinity pa(id.address(),fromMemberId,RR->node->now()); + Mutex::Lock _l2(_peerAffinities_m); + std::vector<_PeerAffinity>::iterator i(std::lower_bound(_peerAffinities.begin(),_peerAffinities.end(),pa)); // O(log(n)) + if ((i != _peerAffinities.end())&&(i->key == pa.key)) { + i->timestamp = pa.timestamp; + } else { + _peerAffinities.push_back(pa); + std::sort(_peerAffinities.begin(),_peerAffinities.end()); // probably a more efficient way to insert but okay for now + } + } + + TRACE("[%u] has %s",(unsigned int)fromMemberId,id.address().toString().c_str()); + } } catch ( ... ) { // ignore invalid identities } @@ -179,10 +228,15 @@ void Cluster::handleIncomingStateMessage(const void *msg,unsigned int len) const MAC mac(dmsg.field(ptr,6),6); ptr += 6; const uint32_t adi = dmsg.at<uint32_t>(ptr); ptr += 4; RR->mc->add(RR->node->now(),nwid,MulticastGroup(mac,adi),address); + TRACE("[%u] %s likes %s/%u on %.16llu",(unsigned int)fromMemberId,address.toString().c_str(),mac.toString().c_str(),(unsigned int)adi,nwid); } break; case STATE_MESSAGE_COM: { - // TODO: not used yet + CertificateOfMembership com; + ptr += com.deserialize(dmsg,ptr); + if (com) { + TRACE("[%u] COM for %s on %.16llu rev %llu",(unsigned int)fromMemberId,com.issuedTo().toString().c_str(),com.networkId(),com.revision()); + } } break; case STATE_MESSAGE_RELAY: { @@ -195,6 +249,8 @@ void Cluster::handleIncomingStateMessage(const void *msg,unsigned int len) if (packetLen >= ZT_PROTO_MIN_FRAGMENT_LENGTH) { // ignore anything too short to contain a dest address const Address destinationAddress(reinterpret_cast<const char *>(packet) + 8,ZT_ADDRESS_LENGTH); + TRACE("[%u] relay %u bytes to %s (%u remote paths included)",(unsigned int)fromMemberId,packetLen,destinationAddress.toString().c_str(),numRemotePeerPaths); + SharedPtr<Peer> destinationPeer(RR->topology->getPeer(destinationAddress)); if (destinationPeer) { if ( @@ -232,8 +288,6 @@ void Cluster::handleIncomingStateMessage(const void *msg,unsigned int len) remotePeerAddress.appendTo(rendezvousForDest); Buffer<2048> rendezvousForOtherEnd; - rendezvousForOtherEnd.addSize(2); // leave room for payload size - rendezvousForOtherEnd.append((uint8_t)STATE_MESSAGE_PROXY_SEND); remotePeerAddress.appendTo(rendezvousForOtherEnd); rendezvousForOtherEnd.append((uint8_t)Packet::VERB_RENDEZVOUS); const unsigned int rendezvousForOtherEndPayloadSizePtr = rendezvousForOtherEnd.size(); @@ -267,9 +321,8 @@ void Cluster::handleIncomingStateMessage(const void *msg,unsigned int len) } if (haveMatch) { + _send(fromMemberId,STATE_MESSAGE_PROXY_SEND,rendezvousForOtherEnd.data(),rendezvousForOtherEnd.size()); RR->sw->send(rendezvousForDest,true,0); - rendezvousForOtherEnd.setAt<uint16_t>(0,(uint16_t)(rendezvousForOtherEnd.size() - 2)); - _send(fromMemberId,rendezvousForOtherEnd.data(),rendezvousForOtherEnd.size()); } } } @@ -283,6 +336,7 @@ void Cluster::handleIncomingStateMessage(const void *msg,unsigned int len) Packet outp(rcpt,RR->identity.address(),verb); outp.append(dmsg.field(ptr,len),len); RR->sw->send(outp,true,0); + TRACE("[%u] proxy send %s to %s length %u",(unsigned int)fromMemberId,Packet::verbString(verb),rcpt.toString().c_str(),len); } break; } } catch ( ... ) { @@ -298,37 +352,172 @@ void Cluster::handleIncomingStateMessage(const void *msg,unsigned int len) } } -void Cluster::replicateHavePeer(const Address &peerAddress) +bool Cluster::sendViaCluster(const Address &fromPeerAddress,const Address &toPeerAddress,const void *data,unsigned int len) { + if (len > 16384) // sanity check + return false; + + uint64_t mostRecentTimestamp = 0; + uint16_t canHasPeer = 0; + + { // Anyone got this peer? + Mutex::Lock _l2(_peerAffinities_m); + std::vector<_PeerAffinity>::iterator i(std::lower_bound(_peerAffinities.begin(),_peerAffinities.end(),_PeerAffinity(toPeerAddress,0,0))); // O(log(n)) + while ((i != _peerAffinities.end())&&(i->address() == toPeerAddress)) { + uint16_t mid = i->clusterMemberId(); + if ((mid != _id)&&(i->timestamp > mostRecentTimestamp)) { + mostRecentTimestamp = i->timestamp; + canHasPeer = mid; + } + } + } + + const uint64_t now = RR->node->now(); + if ((now - mostRecentTimestamp) < ZT_PEER_ACTIVITY_TIMEOUT) { + Buffer<16384> buf; + + InetAddress v4,v6; + if (fromPeerAddress) { + SharedPtr<Peer> fromPeer(RR->topology->getPeer(fromPeerAddress)); + if (fromPeer) + fromPeer->getBestActiveAddresses(now,v4,v6); + } + buf.append((uint8_t)( (v4) ? ((v6) ? 2 : 1) : ((v6) ? 1 : 0) )); + if (v4) + v4.serialize(buf); + if (v6) + v6.serialize(buf); + buf.append((uint16_t)len); + buf.append(data,len); + + { + Mutex::Lock _l2(_members[canHasPeer].lock); + _send(canHasPeer,STATE_MESSAGE_RELAY,buf.data(),buf.size()); + } + + return true; + } + + return false; +} + +void Cluster::replicateHavePeer(const Identity &peerId) +{ + { // Use peer affinity table to track our own last announce time for peers + _PeerAffinity pa(peerId.address(),_id,RR->node->now()); + Mutex::Lock _l2(_peerAffinities_m); + std::vector<_PeerAffinity>::iterator i(std::lower_bound(_peerAffinities.begin(),_peerAffinities.end(),pa)); // O(log(n)) + if ((i != _peerAffinities.end())&&(i->key == pa.key)) { + if ((pa.timestamp - i->timestamp) >= ZT_CLUSTER_HAVE_PEER_ANNOUNCE_PERIOD) { + i->timestamp = pa.timestamp; + // continue to announcement + } else { + // we've already announced this peer recently, so skip + return; + } + } else { + _peerAffinities.push_back(pa); + std::sort(_peerAffinities.begin(),_peerAffinities.end()); // probably a more efficient way to insert but okay for now + // continue to announcement + } + } + + // announcement + Buffer<4096> buf; + peerId.serialize(buf,false); + { + Mutex::Lock _l(_memberIds_m); + for(std::vector<uint16_t>::const_iterator mid(_memberIds.begin());mid!=_memberIds.end();++mid) { + Mutex::Lock _l2(_members[*mid].lock); + _send(*mid,STATE_MESSAGE_HAVE_PEER,buf.data(),buf.size()); + } + } } void Cluster::replicateMulticastLike(uint64_t nwid,const Address &peerAddress,const MulticastGroup &group) { + Buffer<4096> buf; + buf.append((uint64_t)nwid); + peerAddress.appendTo(buf); + group.mac().appendTo(buf); + buf.append((uint32_t)group.adi()); + { + Mutex::Lock _l(_memberIds_m); + for(std::vector<uint16_t>::const_iterator mid(_memberIds.begin());mid!=_memberIds.end();++mid) { + Mutex::Lock _l2(_members[*mid].lock); + _send(*mid,STATE_MESSAGE_MULTICAST_LIKE,buf.data(),buf.size()); + } + } } void Cluster::replicateCertificateOfNetworkMembership(const CertificateOfMembership &com) { + Buffer<4096> buf; + com.serialize(buf); + { + Mutex::Lock _l(_memberIds_m); + for(std::vector<uint16_t>::const_iterator mid(_memberIds.begin());mid!=_memberIds.end();++mid) { + Mutex::Lock _l2(_members[*mid].lock); + _send(*mid,STATE_MESSAGE_COM,buf.data(),buf.size()); + } + } } void Cluster::doPeriodicTasks() { - // Go ahead and flush whenever possible right now + const uint64_t now = RR->node->now(); + { Mutex::Lock _l(_memberIds_m); for(std::vector<uint16_t>::const_iterator mid(_memberIds.begin());mid!=_memberIds.end();++mid) { Mutex::Lock _l2(_members[*mid].lock); - _flush(*mid); + + if ((now - _members[*mid].lastAnnouncedAliveTo) >= ((ZT_CLUSTER_TIMEOUT / 2) - 1000)) { + Buffer<2048> alive; + alive.append((uint16_t)ZEROTIER_ONE_VERSION_MAJOR); + alive.append((uint16_t)ZEROTIER_ONE_VERSION_MINOR); + alive.append((uint16_t)ZEROTIER_ONE_VERSION_REVISION); + alive.append((uint8_t)ZT_PROTO_VERSION); + if (_addressToLocationFunction) { + alive.append((int32_t)_x); + alive.append((int32_t)_y); + alive.append((int32_t)_z); + } else { + alive.append((int32_t)0); + alive.append((int32_t)0); + alive.append((int32_t)0); + } + alive.append((uint64_t)now); + alive.append((uint64_t)0); // TODO: compute and send load average + alive.append((uint64_t)0); // unused/reserved flags + alive.append((uint8_t)_zeroTierPhysicalEndpoints.size()); + for(std::vector<InetAddress>::const_iterator pe(_zeroTierPhysicalEndpoints.begin());pe!=_zeroTierPhysicalEndpoints.end();++pe) + pe->serialize(alive); + _send(*mid,STATE_MESSAGE_ALIVE,alive.data(),alive.size()); + _members[*mid].lastAnnouncedAliveTo = now; + } + + _flush(*mid); // does nothing if nothing to flush } } } void Cluster::addMember(uint16_t memberId) { + if (memberId >= ZT_CLUSTER_MAX_MEMBERS) + return; + Mutex::Lock _l2(_members[memberId].lock); - Mutex::Lock _l(_memberIds_m); - _memberIds.push_back(memberId); - std::sort(_memberIds.begin(),_memberIds.end()); + { + Mutex::Lock _l(_memberIds_m); + if (std::find(_memberIds.begin(),_memberIds.end(),memberId) != _memberIds.end()) + return; + _memberIds.push_back(memberId); + std::sort(_memberIds.begin(),_memberIds.end()); + } + + _members[memberId].clear(); // Generate this member's message key from the master and its ID uint16_t stmp[ZT_SHA512_DIGEST_LEN / sizeof(uint16_t)]; @@ -346,27 +535,107 @@ void Cluster::addMember(uint16_t memberId) _members[memberId].q.append(iv,16); _members[memberId].q.addSize(8); // room for MAC _members[memberId].q.append((uint16_t)_id); + _members[memberId].q.append((uint16_t)memberId); } -void Cluster::_send(uint16_t memberId,const void *msg,unsigned int len) +void Cluster::removeMember(uint16_t memberId) { - _Member &m = _members[memberId]; - // assumes m.lock is locked! - for(;;) { - if ((m.q.size() + len) > ZT_CLUSTER_MAX_MESSAGE_LENGTH) - _flush(memberId); - else { - m.q.append(msg,len); - break; + Mutex::Lock _l(_memberIds_m); + std::vector<uint16_t> newMemberIds; + for(std::vector<uint16_t>::const_iterator mid(_memberIds.begin());mid!=_memberIds.end();++mid) { + if (*mid != memberId) + newMemberIds.push_back(*mid); + } + _memberIds = newMemberIds; +} + +bool Cluster::redirectPeer(const Address &peerAddress,const InetAddress &peerPhysicalAddress,bool offload) +{ + if (!peerPhysicalAddress) // sanity check + return false; + if (_addressToLocationFunction) { + // Pick based on location if it can be determined + int px = 0,py = 0,pz = 0; + if (_addressToLocationFunction(_addressToLocationFunctionArg,reinterpret_cast<const struct sockaddr_storage *>(&peerPhysicalAddress),&px,&py,&pz) == 0) { + // No geo-info so no change + return false; } + + // Find member closest to this peer + const uint64_t now = RR->node->now(); + std::vector<InetAddress> best; // initial "best" is for peer to stay put + const double currentDistance = _dist3d(_x,_y,_z,px,py,pz); + double bestDistance = (offload ? 2147483648.0 : currentDistance); + unsigned int bestMember = _id; + { + Mutex::Lock _l(_memberIds_m); + for(std::vector<uint16_t>::const_iterator mid(_memberIds.begin());mid!=_memberIds.end();++mid) { + _Member &m = _members[*mid]; + Mutex::Lock _ml(m.lock); + + // Consider member if it's alive and has sent us a location and one or more physical endpoints to send peers to + if ( ((now - m.lastReceivedAliveAnnouncement) < ZT_CLUSTER_TIMEOUT) && ((m.x != 0)||(m.y != 0)||(m.z != 0)) && (m.zeroTierPhysicalEndpoints.size() > 0) ) { + double mdist = _dist3d(m.x,m.y,m.z,px,py,pz); + if (mdist < bestDistance) { + bestMember = *mid; + best = m.zeroTierPhysicalEndpoints; + } + } + } + } + + if (best.size() > 0) { + TRACE("peer %s is at [%d,%d,%d], distance to us is %f, sending to %u instead for better distance %f",peerAddress.toString().c_str(),px,py,pz,currentDistance,bestMember,bestDistance); + + /* if (peer->remoteVersionProtocol() >= 5) { + // If it's a newer peer send VERB_PUSH_DIRECT_PATHS which is more idiomatic + } else { */ + // Otherwise send VERB_RENDEZVOUS for ourselves, which will trick peers into trying other endpoints for us even if they're too old for PUSH_DIRECT_PATHS + for(std::vector<InetAddress>::const_iterator a(best.begin());a!=best.end();++a) { + if ((a->ss_family == AF_INET)||(a->ss_family == AF_INET6)) { + Packet outp(peerAddress,RR->identity.address(),Packet::VERB_RENDEZVOUS); + outp.append((uint8_t)0); // no flags + RR->identity.address().appendTo(outp); // HACK: rendezvous with ourselves! with really old peers this will only work if I'm a root server! + outp.append((uint16_t)a->port()); + if (a->ss_family == AF_INET) { + outp.append((uint8_t)4); + outp.append(a->rawIpData(),4); + } else { + outp.append((uint8_t)16); + outp.append(a->rawIpData(),16); + } + RR->sw->send(outp,true,0); + } + } + //} + + return true; + } else { + TRACE("peer %s is at [%d,%d,%d], distance to us is %f and this seems to be the best",peerAddress.toString().c_str(),px,py,pz,currentDistance); + return false; + } + } else { + // TODO: pick based on load if no location info? + return false; } } +void Cluster::_send(uint16_t memberId,StateMessageType type,const void *msg,unsigned int len) +{ + _Member &m = _members[memberId]; + // assumes m.lock is locked! + if ((m.q.size() + len + 3) > ZT_CLUSTER_MAX_MESSAGE_LENGTH) + _flush(memberId); + m.q.append((uint16_t)(len + 1)); + m.q.append((uint8_t)type); + m.q.append(msg,len); +} + void Cluster::_flush(uint16_t memberId) { _Member &m = _members[memberId]; // assumes m.lock is locked! - if (m.q.size() > 26) { // 16-byte IV + 8-byte MAC + 2-byte cluster member ID (latter two bytes are inside crypto envelope) + if (m.q.size() > (24 + 2 + 2)) { // 16-byte IV + 8-byte MAC + 2 byte from-member-ID + 2 byte to-member-ID // Create key from member's key and IV char keytmp[32]; memcpy(keytmp,m.key,32); @@ -389,7 +658,7 @@ void Cluster::_flush(uint16_t memberId) memcpy(m.q.field(16,8),mac,8); // Send! - _sendFunction(_arg,memberId,m.q.data(),m.q.size()); + _sendFunction(_sendFunctionArg,memberId,m.q.data(),m.q.size()); // Prepare for more m.q.clear(); @@ -397,7 +666,8 @@ void Cluster::_flush(uint16_t memberId) Utils::getSecureRandom(iv,16); m.q.append(iv,16); m.q.addSize(8); // room for MAC - m.q.append((uint16_t)_id); + m.q.append((uint16_t)_id); // from member ID + m.q.append((uint16_t)memberId); // to member ID } } diff --git a/node/Cluster.hpp b/node/Cluster.hpp index 016730e3..2e60fd6b 100644 --- a/node/Cluster.hpp +++ b/node/Cluster.hpp @@ -34,43 +34,38 @@ #include <algorithm> #include "Constants.hpp" +#include "../include/ZeroTierOne.h" #include "Address.hpp" #include "InetAddress.hpp" #include "SHA512.hpp" #include "Utils.hpp" #include "Buffer.hpp" #include "Mutex.hpp" +#include "SharedPtr.hpp" +#include "Hashtable.hpp" /** * Timeout for cluster members being considered "alive" */ -#define ZT_CLUSTER_TIMEOUT ZT_PEER_ACTIVITY_TIMEOUT +#define ZT_CLUSTER_TIMEOUT 30000 /** - * Maximum cluster message length in bytes - * - * Cluster nodes speak via TCP, with data encapsulated into individually - * encrypted and authenticated messages. The maximum message size is - * 65535 (0xffff) since the TCP stream uses 16-bit message size headers - * (and this is a reasonable chunk size anyway). - */ -#define ZT_CLUSTER_MAX_MESSAGE_LENGTH 65535 - -/** - * Maximum number of physical addresses we will cache for a cluster member + * How often should we announce that we have a peer? */ -#define ZT_CLUSTER_MEMBER_MAX_PHYSICAL_ADDRS 8 +#define ZT_CLUSTER_HAVE_PEER_ANNOUNCE_PERIOD 60000 /** - * How frequently should doPeriodicTasks() be ideally called? (ms) + * Desired period between doPeriodicTasks() in milliseconds */ -#define ZT_CLUSTER_PERIODIC_TASK_DEADLINE 10 +#define ZT_CLUSTER_PERIODIC_TASK_PERIOD 50 namespace ZeroTier { class RuntimeEnvironment; class CertificateOfMembership; class MulticastGroup; +class Peer; +class Identity; /** * Multi-homing cluster state replication and packet relaying @@ -96,22 +91,6 @@ class Cluster { public: /** - * Which distance algorithm is this cluster using? - */ - enum DistanceAlgorithm - { - /** - * Simple linear distance in three dimensions - */ - DISTANCE_SIMPLE = 0, - - /** - * Haversine formula using X,Y as lat,long and ignoring Z - */ - DISTANCE_HAVERSINE = 1 - }; - - /** * State message types */ enum StateMessageType @@ -184,25 +163,18 @@ public: /** * Construct a new cluster - * - * @param renv Runtime environment - * @param id This member's ID in the cluster - * @param da Distance algorithm this cluster uses to compute distance and hand off peers - * @param x My X - * @param y My Y - * @param z My Z - * @param sendFunction Function to call to send messages to other cluster members - * @param arg First argument to sendFunction */ Cluster( const RuntimeEnvironment *renv, uint16_t id, - DistanceAlgorithm da, + const std::vector<InetAddress> &zeroTierPhysicalEndpoints, int32_t x, int32_t y, int32_t z, - void (*sendFunction)(void *,uint16_t,const void *,unsigned int), - void *arg); + void (*sendFunction)(void *,unsigned int,const void *,unsigned int), + void *sendFunctionArg, + int (*addressToLocationFunction)(void *,const struct sockaddr_storage *,int *,int *,int *), + void *addressToLocationFunctionArg); ~Cluster(); @@ -220,11 +192,22 @@ public: void handleIncomingStateMessage(const void *msg,unsigned int len); /** + * Send this packet via another node in this cluster if another node has this peer + * + * @param fromPeerAddress Source peer address (if known, should be NULL for fragments) + * @param toPeerAddress Destination peer address + * @param data Packet or packet fragment data + * @param len Length of packet or fragment + * @return True if this data was sent via another cluster member, false if none have this peer + */ + bool sendViaCluster(const Address &fromPeerAddress,const Address &toPeerAddress,const void *data,unsigned int len); + + /** * Advertise to the cluster that we have this peer * - * @param peerAddress Peer address that we have + * @param peerId Identity of peer that we have */ - void replicateHavePeer(const Address &peerAddress); + void replicateHavePeer(const Identity &peerId); /** * Advertise a multicast LIKE to the cluster @@ -243,7 +226,7 @@ public: void replicateCertificateOfNetworkMembership(const CertificateOfMembership &com); /** - * Call every ~ZT_CLUSTER_PERIODIC_TASK_DEADLINE milliseconds. + * Call every ~ZT_CLUSTER_PERIODIC_TASK_PERIOD milliseconds. */ void doPeriodicTasks(); @@ -254,52 +237,70 @@ public: */ void addMember(uint16_t memberId); + /** + * Remove a member ID from this cluster + * + * @param memberId Member ID to remove + */ + void removeMember(uint16_t memberId); + + /** + * Redirect this peer to a better cluster member if needed + * + * @param peerAddress Peer to (possibly) redirect + * @param peerPhysicalAddress Physical address of peer's current best path (where packet was most recently received or getBestPath()->address()) + * @param offload Always redirect if possible -- can be used to offload peers during shutdown + * @return True if peer was redirected + */ + bool redirectPeer(const Address &peerAddress,const InetAddress &peerPhysicalAddress,bool offload); + private: - void _send(uint16_t memberId,const void *msg,unsigned int len); + void _send(uint16_t memberId,StateMessageType type,const void *msg,unsigned int len); void _flush(uint16_t memberId); // These are initialized in the constructor and remain static uint16_t _masterSecret[ZT_SHA512_DIGEST_LEN / sizeof(uint16_t)]; unsigned char _key[ZT_PEER_SECRET_KEY_LENGTH]; const RuntimeEnvironment *RR; - void (*_sendFunction)(void *,uint16_t,const void *,unsigned int); - void *_arg; + void (*_sendFunction)(void *,unsigned int,const void *,unsigned int); + void *_sendFunctionArg; + int (*_addressToLocationFunction)(void *,const struct sockaddr_storage *,int *,int *,int *); + void *_addressToLocationFunctionArg; const int32_t _x; const int32_t _y; const int32_t _z; - const DistanceAlgorithm _da; const uint16_t _id; + const std::vector<InetAddress> _zeroTierPhysicalEndpoints; struct _Member { unsigned char key[ZT_PEER_SECRET_KEY_LENGTH]; - uint64_t lastReceivedFrom; uint64_t lastReceivedAliveAnnouncement; - uint64_t lastSentTo; uint64_t lastAnnouncedAliveTo; uint64_t load; int32_t x,y,z; - InetAddress physicalAddresses[ZT_CLUSTER_MEMBER_MAX_PHYSICAL_ADDRS]; - unsigned int physicalAddressCount; + std::vector<InetAddress> zeroTierPhysicalEndpoints; Buffer<ZT_CLUSTER_MAX_MESSAGE_LENGTH> q; Mutex lock; - _Member() : - lastReceivedFrom(0), - lastReceivedAliveAnnouncement(0), - lastSentTo(0), - lastAnnouncedAliveTo(0), - load(0), - x(0), - y(0), - z(0), - physicalAddressCount(0) {} - + inline void clear() + { + lastReceivedAliveAnnouncement = 0; + lastAnnouncedAliveTo = 0; + load = 0; + x = 0; + y = 0; + z = 0; + zeroTierPhysicalEndpoints.clear(); + q.clear(); + } + + _Member() { this->clear(); } ~_Member() { Utils::burn(key,sizeof(key)); } }; @@ -308,7 +309,7 @@ private: std::vector<uint16_t> _memberIds; Mutex _memberIds_m; - // Record tracking which members have which peers and how recently they claimed this + // Record tracking which members have which peers and how recently they claimed this -- also used to track our last claimed time struct _PeerAffinity { _PeerAffinity(const Address &a,uint16_t mid,uint64_t ts) : diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index 19747bbd..c8526dfb 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -43,6 +43,7 @@ #include "Salsa20.hpp" #include "SHA512.hpp" #include "World.hpp" +#include "Cluster.hpp" namespace ZeroTier { @@ -272,7 +273,6 @@ bool IncomingPacket::_doHELLO(const RuntimeEnvironment *RR) TRACE("rejected HELLO from %s(%s): packet failed authentication",id.address().toString().c_str(),_remoteAddress.toString().c_str()); return true; } - peer = RR->topology->addPeer(newPeer); // Continue at // VALID @@ -406,6 +406,10 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,const SharedPtr<Peer> &p CertificateOfMembership com; offset += com.deserialize(*this,ZT_PROTO_VERB_MULTICAST_FRAME__OK__IDX_COM_AND_GATHER_RESULTS); peer->validateAndSetNetworkMembershipCertificate(RR,nwid,com); +#ifdef ZT_ENABLE_CLUSTER + if (RR->cluster) + RR->cluster->replicateCertificateOfNetworkMembership(com); +#endif } if ((flags & 0x02) != 0) { @@ -533,6 +537,10 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,const SharedPtr<P CertificateOfMembership com; comLen = com.deserialize(*this,ZT_PROTO_VERB_EXT_FRAME_IDX_COM); peer->validateAndSetNetworkMembershipCertificate(RR,network->id(),com); +#ifdef ZT_ENABLE_CLUSTER + if (RR->cluster) + RR->cluster->replicateCertificateOfNetworkMembership(com); +#endif } if (!network->isAllowed(peer)) { @@ -613,8 +621,15 @@ bool IncomingPacket::_doMULTICAST_LIKE(const RuntimeEnvironment *RR,const Shared const uint64_t now = RR->node->now(); // Iterate through 18-byte network,MAC,ADI tuples - for(unsigned int ptr=ZT_PACKET_IDX_PAYLOAD;ptr<size();ptr+=18) - RR->mc->add(now,at<uint64_t>(ptr),MulticastGroup(MAC(field(ptr + 8,6),6),at<uint32_t>(ptr + 14)),peer->address()); + for(unsigned int ptr=ZT_PACKET_IDX_PAYLOAD;ptr<size();ptr+=18) { + const uint32_t nwid(at<uint64_t>(ptr)); + const MulticastGroup group(MAC(field(ptr + 8,6),6),at<uint32_t>(ptr + 14)); + RR->mc->add(now,nwid,group,peer->address()); +#ifdef ZT_ENABLE_CLUSTER + if (RR->cluster) + RR->cluster->replicateMulticastLike(nwid,peer->address(),group); +#endif + } peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP); } catch ( ... ) { @@ -632,6 +647,10 @@ bool IncomingPacket::_doNETWORK_MEMBERSHIP_CERTIFICATE(const RuntimeEnvironment while (ptr < size()) { ptr += com.deserialize(*this,ptr); peer->validateAndSetNetworkMembershipCertificate(RR,com.networkId(),com); +#ifdef ZT_ENABLE_CLUSTER + if (RR->cluster) + RR->cluster->replicateCertificateOfNetworkMembership(com); +#endif } peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_NETWORK_MEMBERSHIP_CERTIFICATE,0,Packet::VERB_NOP); @@ -787,6 +806,10 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,const Share CertificateOfMembership com; offset += com.deserialize(*this,ZT_PROTO_VERB_MULTICAST_FRAME_IDX_COM); peer->validateAndSetNetworkMembershipCertificate(RR,nwid,com); +#ifdef ZT_ENABLE_CLUSTER + if (RR->cluster) + RR->cluster->replicateCertificateOfNetworkMembership(com); +#endif } // Check membership after we've read any included COM, since @@ -871,6 +894,8 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,const Sha } peer->setLastDirectPathPushReceived(now); + const RemotePath *currentBest = peer->getBestPath(now); + unsigned int count = at<uint16_t>(ZT_PACKET_IDX_PAYLOAD); unsigned int ptr = ZT_PACKET_IDX_PAYLOAD + 2; unsigned int v4Count = 0,v6Count = 0; @@ -889,16 +914,20 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,const Sha InetAddress a(field(ptr,4),4,at<uint16_t>(ptr + 4)); if ( ((flags & 0x01) == 0) && (Path::isAddressValidForPath(a)) ) { TRACE("attempting to contact %s at pushed direct path %s",peer->address().toString().c_str(),a.toString().c_str()); - if (v4Count++ < ZT_PUSH_DIRECT_PATHS_MAX_ENDPOINTS_PER_TYPE) - peer->attemptToContactAt(RR,_localAddress,a,RR->node->now()); + if (v4Count++ < ZT_PUSH_DIRECT_PATHS_MAX_ENDPOINTS_PER_TYPE) { + if ((!currentBest)||(currentBest->address() != a)) + peer->attemptToContactAt(RR,_localAddress,a,RR->node->now()); + } } } break; case 6: { InetAddress a(field(ptr,16),16,at<uint16_t>(ptr + 16)); if ( ((flags & 0x01) == 0) && (Path::isAddressValidForPath(a)) ) { TRACE("attempting to contact %s at pushed direct path %s",peer->address().toString().c_str(),a.toString().c_str()); - if (v6Count++ < ZT_PUSH_DIRECT_PATHS_MAX_ENDPOINTS_PER_TYPE) - peer->attemptToContactAt(RR,_localAddress,a,RR->node->now()); + if (v6Count++ < ZT_PUSH_DIRECT_PATHS_MAX_ENDPOINTS_PER_TYPE) { + if ((!currentBest)||(currentBest->address() != a)) + peer->attemptToContactAt(RR,_localAddress,a,RR->node->now()); + } } } break; } diff --git a/node/Node.cpp b/node/Node.cpp index 26d5513e..6eea3d3d 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -46,6 +46,7 @@ #include "Address.hpp" #include "Identity.hpp" #include "SelfAwareness.hpp" +#include "Cluster.hpp" const struct sockaddr_storage ZT_SOCKADDR_NULL = {0}; @@ -135,6 +136,9 @@ Node::~Node() delete RR->antiRec; delete RR->mc; delete RR->sw; +#ifdef ZT_ENABLE_CLUSTER + delete RR->cluster; +#endif } ZT_ResultCode Node::processWirePacket( @@ -329,7 +333,18 @@ ZT_ResultCode Node::processBackgroundTasks(uint64_t now,volatile uint64_t *nextB } try { - *nextBackgroundTaskDeadline = now + (uint64_t)std::max(std::min(timeUntilNextPingCheck,RR->sw->doTimerTasks(now)),(unsigned long)ZT_CORE_TIMER_TASK_GRANULARITY); +#ifdef ZT_ENABLE_CLUSTER + // If clustering is enabled we have to call cluster->doPeriodicTasks() very often, so we override normal timer deadline behavior + if (RR->cluster) { + RR->sw->doTimerTasks(now); + RR->cluster->doPeriodicTasks(); + *nextBackgroundTaskDeadline = now + ZT_CLUSTER_PERIODIC_TASK_PERIOD; // this is really short so just tick at this rate + } else { +#endif + *nextBackgroundTaskDeadline = now + (uint64_t)std::max(std::min(timeUntilNextPingCheck,RR->sw->doTimerTasks(now)),(unsigned long)ZT_CORE_TIMER_TASK_GRANULARITY); +#ifdef ZT_ENABLE_CLUSTER + } +#endif } catch ( ... ) { return ZT_RESULT_FATAL_ERROR_INTERNAL; } @@ -554,6 +569,62 @@ void Node::circuitTestEnd(ZT_CircuitTest *test) } } +ZT_ResultCode Node::clusterInit( + unsigned int myId, + const struct sockaddr_storage *zeroTierPhysicalEndpoints, + unsigned int numZeroTierPhysicalEndpoints, + int x, + int y, + int z, + void (*sendFunction)(void *,unsigned int,const void *,unsigned int), + void *sendFunctionArg, + int (*addressToLocationFunction)(void *,const struct sockaddr_storage *,int *,int *,int *), + void *addressToLocationFunctionArg) +{ +#ifdef ZT_ENABLE_CLUSTER + if (RR->cluster) + return ZT_RESULT_ERROR_BAD_PARAMETER; + + std::vector<InetAddress> eps; + for(unsigned int i=0;i<numZeroTierPhysicalEndpoints;++i) + eps.push_back(InetAddress(zeroTierPhysicalEndpoints[i])); + std::sort(eps.begin(),eps.end()); + RR->cluster = new Cluster(RR,myId,eps,x,y,z,sendFunction,sendFunctionArg,addressToLocationFunction,addressToLocationFunctionArg); + + return ZT_RESULT_OK; +#else + return ZT_RESULT_ERROR_UNSUPPORTED_OPERATION; +#endif +} + +ZT_ResultCode Node::clusterAddMember(unsigned int memberId) +{ +#ifdef ZT_ENABLE_CLUSTER + if (!RR->cluster) + return ZT_RESULT_ERROR_BAD_PARAMETER; + RR->cluster->addMember((uint16_t)memberId); + return ZT_RESULT_OK; +#else + return ZT_RESULT_ERROR_UNSUPPORTED_OPERATION; +#endif +} + +void Node::clusterRemoveMember(unsigned int memberId) +{ +#ifdef ZT_ENABLE_CLUSTER + if (RR->cluster) + RR->cluster->removeMember((uint16_t)memberId); +#endif +} + +void Node::clusterHandleIncomingMessage(const void *msg,unsigned int len) +{ +#ifdef ZT_ENABLE_CLUSTER + if (RR->cluster) + RR->cluster->handleIncomingStateMessage(msg,len); +#endif +} + /****************************************************************************/ /* Node methods used only within node/ */ /****************************************************************************/ @@ -806,6 +877,22 @@ void ZT_Node_freeQueryResult(ZT_Node *node,void *qr) } catch ( ... ) {} } +int ZT_Node_addLocalInterfaceAddress(ZT_Node *node,const struct sockaddr_storage *addr,int metric, enum ZT_LocalInterfaceAddressTrust trust) +{ + try { + return reinterpret_cast<ZeroTier::Node *>(node)->addLocalInterfaceAddress(addr,metric,trust); + } catch ( ... ) { + return 0; + } +} + +void ZT_Node_clearLocalInterfaceAddresses(ZT_Node *node) +{ + try { + reinterpret_cast<ZeroTier::Node *>(node)->clearLocalInterfaceAddresses(); + } catch ( ... ) {} +} + void ZT_Node_setNetconfMaster(ZT_Node *node,void *networkControllerInstance) { try { @@ -829,19 +916,75 @@ void ZT_Node_circuitTestEnd(ZT_Node *node,ZT_CircuitTest *test) } catch ( ... ) {} } -int ZT_Node_addLocalInterfaceAddress(ZT_Node *node,const struct sockaddr_storage *addr,int metric, enum ZT_LocalInterfaceAddressTrust trust) +enum ZT_ResultCode ZT_Node_clusterInit( + ZT_Node *node, + unsigned int myId, + const struct sockaddr_storage *zeroTierPhysicalEndpoints, + unsigned int numZeroTierPhysicalEndpoints, + int x, + int y, + int z, + void (*sendFunction)(void *,unsigned int,const void *,unsigned int), + void *sendFunctionArg, + int (*addressToLocationFunction)(void *,const struct sockaddr_storage *,int *,int *,int *), + void *addressToLocationFunctionArg) { try { - return reinterpret_cast<ZeroTier::Node *>(node)->addLocalInterfaceAddress(addr,metric,trust); + return reinterpret_cast<ZeroTier::Node *>(node)->clusterInit(myId,zeroTierPhysicalEndpoints,numZeroTierPhysicalEndpoints,x,y,z,sendFunction,sendFunctionArg,addressToLocationFunction,addressToLocationFunctionArg); } catch ( ... ) { - return 0; + return ZT_RESULT_FATAL_ERROR_INTERNAL; } } -void ZT_Node_clearLocalInterfaceAddresses(ZT_Node *node) +/** + * Add a member to this cluster + * + * Calling this without having called clusterInit() will do nothing. + * + * @param node Node instance + * @param memberId Member ID (must be less than or equal to ZT_CLUSTER_MAX_MEMBERS) + * @return OK or error if clustering is disabled, ID invalid, etc. + */ +enum ZT_ResultCode ZT_Node_clusterAddMember(ZT_Node *node,unsigned int memberId) { try { - reinterpret_cast<ZeroTier::Node *>(node)->clearLocalInterfaceAddresses(); + return reinterpret_cast<ZeroTier::Node *>(node)->clusterAddMember(memberId); + } catch ( ... ) { + return ZT_RESULT_FATAL_ERROR_INTERNAL; + } +} + +/** + * Remove a member from this cluster + * + * Calling this without having called clusterInit() will do nothing. + * + * @param node Node instance + * @param memberId Member ID to remove (nothing happens if not present) + */ +void ZT_Node_clusterRemoveMember(ZT_Node *node,unsigned int memberId) +{ + try { + reinterpret_cast<ZeroTier::Node *>(node)->clusterRemoveMember(memberId); + } catch ( ... ) {} +} + +/** + * Handle an incoming cluster state message + * + * The message itself contains cluster member IDs, and invalid or badly + * addressed messages will be silently discarded. + * + * Calling this without having called clusterInit() will do nothing. + * + * @param node Node instance + * @param msg Cluster message + * @param len Length of cluster message + */ +void ZT_Node_clusterHandleIncomingMessage(ZT_Node *node,const void *msg,unsigned int len) +{ + try { + reinterpret_cast<ZeroTier::Node *>(node)->clusterHandleIncomingMessage(msg,len); } catch ( ... ) {} } diff --git a/node/Node.hpp b/node/Node.hpp index c7038ed4..b8bd4dc5 100644 --- a/node/Node.hpp +++ b/node/Node.hpp @@ -110,6 +110,20 @@ public: void setNetconfMaster(void *networkControllerInstance); ZT_ResultCode circuitTestBegin(ZT_CircuitTest *test,void (*reportCallback)(ZT_Node *,ZT_CircuitTest *,const ZT_CircuitTestReport *)); void circuitTestEnd(ZT_CircuitTest *test); + ZT_ResultCode clusterInit( + unsigned int myId, + const struct sockaddr_storage *zeroTierPhysicalEndpoints, + unsigned int numZeroTierPhysicalEndpoints, + int x, + int y, + int z, + void (*sendFunction)(void *,unsigned int,const void *,unsigned int), + void *sendFunctionArg, + int (*addressToLocationFunction)(void *,const struct sockaddr_storage *,int *,int *,int *), + void *addressToLocationFunctionArg); + ZT_ResultCode clusterAddMember(unsigned int memberId); + void clusterRemoveMember(unsigned int memberId); + void clusterHandleIncomingMessage(const void *msg,unsigned int len); // Internal functions ------------------------------------------------------ diff --git a/node/Peer.cpp b/node/Peer.cpp index 6f566be4..fa7b3aa4 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -34,6 +34,7 @@ #include "Network.hpp" #include "AntiRecursion.hpp" #include "SelfAwareness.hpp" +#include "Cluster.hpp" #include <algorithm> @@ -81,6 +82,7 @@ void Peer::received( { const uint64_t now = RR->node->now(); bool needMulticastGroupAnnounce = false; + bool pathIsConfirmed = false; { Mutex::Lock _l(_lock); @@ -88,8 +90,6 @@ void Peer::received( _lastReceive = now; if (!hops) { - bool pathIsConfirmed = false; - /* Learn new paths from direct (hops == 0) packets */ { unsigned int np = _numPaths; @@ -107,7 +107,6 @@ void Peer::received( // Learn paths if they've been confirmed via a HELLO or an ECHO RemotePath *slot = (RemotePath *)0; if (np < ZT_MAX_PEER_NETWORK_PATHS) { - // Add new path slot = &(_paths[np++]); } else { uint64_t slotLRmin = 0xffffffffffffffffULL; @@ -154,6 +153,14 @@ void Peer::received( _lastMulticastFrame = now; } +#ifdef ZT_ENABLE_CLUSTER + if ((pathIsConfirmed)&&(RR->cluster)) { + // Either shuttle this peer off somewhere else or report to other members that we have it + if (!RR->cluster->redirectPeer(_id.address(),remoteAddr,false)) + RR->cluster->replicateHavePeer(_id); + } +#endif + if (needMulticastGroupAnnounce) { const std::vector< SharedPtr<Network> > networks(RR->node->allNetworks()); for(std::vector< SharedPtr<Network> >::const_iterator n(networks.begin());n!=networks.end();++n) @@ -213,6 +220,12 @@ bool Peer::doPingAndKeepalive(const RuntimeEnvironment *RR,uint64_t now,int inet void Peer::pushDirectPaths(const RuntimeEnvironment *RR,RemotePath *path,uint64_t now,bool force) { +#ifdef ZT_ENABLE_CLUSTER + // Cluster mode disables normal PUSH_DIRECT_PATHS in favor of cluster-based peer redirection + if (RR->cluster) + return; +#endif + Mutex::Lock _l(_lock); if (((now - _lastDirectPathPushSent) >= ZT_DIRECT_PATH_PUSH_INTERVAL)||(force)) { diff --git a/node/RuntimeEnvironment.hpp b/node/RuntimeEnvironment.hpp index e5d1f446..2ec88f72 100644 --- a/node/RuntimeEnvironment.hpp +++ b/node/RuntimeEnvironment.hpp @@ -43,6 +43,7 @@ class Multicaster; class AntiRecursion; class NetworkController; class SelfAwareness; +class Cluster; /** * Holds global state for an instance of ZeroTier::Node @@ -51,14 +52,17 @@ class RuntimeEnvironment { public: RuntimeEnvironment(Node *n) : - node(n), - identity(), - localNetworkController((NetworkController *)0), - sw((Switch *)0), - mc((Multicaster *)0), - antiRec((AntiRecursion *)0), - topology((Topology *)0), - sa((SelfAwareness *)0) + node(n) + ,identity() + ,localNetworkController((NetworkController *)0) + ,sw((Switch *)0) + ,mc((Multicaster *)0) + ,antiRec((AntiRecursion *)0) + ,topology((Topology *)0) + ,sa((SelfAwareness *)0) +#ifdef ZT_ENABLE_CLUSTER + ,cluster((Cluster *)0) +#endif { } @@ -86,6 +90,10 @@ public: AntiRecursion *antiRec; Topology *topology; SelfAwareness *sa; + +#ifdef ZT_ENABLE_CLUSTER + Cluster *cluster; +#endif }; } // namespace ZeroTier diff --git a/node/Switch.cpp b/node/Switch.cpp index 249a21d5..0edaa96d 100644 --- a/node/Switch.cpp +++ b/node/Switch.cpp @@ -45,6 +45,7 @@ #include "AntiRecursion.hpp" #include "SelfAwareness.hpp" #include "Packet.hpp" +#include "Cluster.hpp" namespace ZeroTier { @@ -567,6 +568,11 @@ void Switch::_handleRemotePacketFragment(const InetAddress &localAddr,const Inet // It wouldn't hurt anything, just redundant and unnecessary. SharedPtr<Peer> relayTo = RR->topology->getPeer(destination); if ((!relayTo)||(!relayTo->send(RR,fragment.data(),fragment.size(),RR->node->now()))) { +#ifdef ZT_ENABLE_CLUSTER + if ((RR->cluster)&&(RR->cluster->sendViaCluster(Address(),destination,fragment.data(),fragment.size()))) + return; // sent by way of another member of this cluster +#endif + // Don't know peer or no direct path -- so relay via root server relayTo = RR->topology->getBestRoot(); if (relayTo) @@ -642,7 +648,11 @@ void Switch::_handleRemotePacketHead(const InetAddress &localAddr,const InetAddr if ((relayTo)&&((relayTo->send(RR,packet->data(),packet->size(),RR->node->now())))) { unite(source,destination,false); } else { - // Don't know peer or no direct path -- so relay via root server +#ifdef ZT_ENABLE_CLUSTER + if ((RR->cluster)&&(RR->cluster->sendViaCluster(source,destination,packet->data(),packet->size()))) + return; // sent by way of another member of this cluster +#endif + relayTo = RR->topology->getBestRoot(&source,1,true); if (relayTo) relayTo->send(RR,packet->data(),packet->size(),RR->node->now()); diff --git a/node/Topology.cpp b/node/Topology.cpp index 56ca47c8..88c8856c 100644 --- a/node/Topology.cpp +++ b/node/Topology.cpp @@ -122,18 +122,22 @@ Topology::~Topology() SharedPtr<Peer> Topology::addPeer(const SharedPtr<Peer> &peer) { if (peer->address() == RR->identity.address()) { - TRACE("BUG: addNewPeer() caught and ignored attempt to add peer for self"); + TRACE("BUG: addPeer() caught and ignored attempt to add peer for self"); throw std::logic_error("cannot add peer for self"); } - const uint64_t now = RR->node->now(); - Mutex::Lock _l(_lock); - - SharedPtr<Peer> &p = _peers.set(peer->address(),peer); - p->use(now); - saveIdentity(p->identity()); + SharedPtr<Peer> np; + { + Mutex::Lock _l(_lock); + SharedPtr<Peer> &hp = _peers[peer->address()]; + if (!hp) + hp = peer; + np = hp; + } + np->use(RR->node->now()); + saveIdentity(np->identity()); - return p; + return np; } SharedPtr<Peer> Topology::getPeer(const Address &zta) @@ -143,13 +147,12 @@ SharedPtr<Peer> Topology::getPeer(const Address &zta) return SharedPtr<Peer>(); } - const uint64_t now = RR->node->now(); Mutex::Lock _l(_lock); SharedPtr<Peer> &ap = _peers[zta]; if (ap) { - ap->use(now); + ap->use(RR->node->now()); return ap; } @@ -157,13 +160,13 @@ SharedPtr<Peer> Topology::getPeer(const Address &zta) if (id) { try { ap = SharedPtr<Peer>(new Peer(RR->identity,id)); - ap->use(now); + ap->use(RR->node->now()); return ap; } catch ( ... ) {} // invalid identity? } + // If we get here it means we read an invalid cache identity or had some other error _peers.erase(zta); - return SharedPtr<Peer>(); } |