From c908b9f67af7020c81ebd78c8feaad0950c570eb Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Wed, 21 Mar 2018 15:27:26 -0700 Subject: Add anchor and multicast hub address field to new multicast enabled ad-hoc address format. --- node/Network.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'node') diff --git a/node/Network.cpp b/node/Network.cpp index 0015f2bf..a75d9fd1 100644 --- a/node/Network.cpp +++ b/node/Network.cpp @@ -1144,8 +1144,10 @@ void Network::requestConfiguration(void *tPtr) this->setNotFound(); } } else if ((_id & 0xff) == 0x01) { - // ffAA__________01 + // ffAAaaaaaaaaaa01 -- where AA is the IPv4 /8 to use and aaaaaaaaaa is the anchor node for multicast gather and replication const uint64_t myAddress = RR->identity.address().toInt(); + const uint64_t networkHub = (_id >> 8) & 0xffffffffffULL; + uint8_t ipv4[4]; ipv4[0] = (uint8_t)((_id >> 48) & 0xff); ipv4[1] = (uint8_t)((myAddress >> 16) & 0xff); @@ -1165,8 +1167,13 @@ void Network::requestConfiguration(void *tPtr) nconf->flags = ZT_NETWORKCONFIG_FLAG_ENABLE_IPV6_NDP_EMULATION; nconf->mtu = ZT_DEFAULT_MTU; nconf->multicastLimit = 1024; + nconf->specialistCount = (networkHub == 0) ? 0 : 1; nconf->staticIpCount = 2; nconf->ruleCount = 1; + + if (networkHub != 0) + nconf->specialists[0] = networkHub; + nconf->staticIps[0] = InetAddress::makeIpv66plane(_id,myAddress); nconf->staticIps[1].set(ipv4,4,8); -- cgit v1.2.3 From 6a2ba4baca326272c45930208b70cfedf8cb1638 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 1 May 2018 16:32:15 -0700 Subject: Introduced basic multipath support --- include/ZeroTierOne.h | 36 ++++++ node/Constants.hpp | 92 ++++++++++++++ node/IncomingPacket.cpp | 5 +- node/Multicaster.cpp | 2 +- node/Node.cpp | 4 +- node/Node.hpp | 5 + node/Path.hpp | 265 +++++++++++++++++++++++++++++++++++++++- node/Peer.cpp | 289 +++++++++++++++++++++++++++++++++++++++++--- node/Peer.hpp | 38 ++++-- node/RingBuffer.hpp | 315 ++++++++++++++++++++++++++++++++++++++++++++++++ node/Switch.cpp | 6 +- node/Topology.hpp | 2 +- node/Trace.cpp | 20 +++ node/Trace.hpp | 6 + osdep/Binder.hpp | 15 +++ osdep/Phy.hpp | 195 +++++++++++++++++++++++++++++- service/OneService.cpp | 56 ++++++++- 17 files changed, 1310 insertions(+), 41 deletions(-) create mode 100644 node/RingBuffer.hpp (limited to 'node') diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index 6da53a73..15e15bd1 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -422,6 +422,42 @@ enum ZT_ResultCode */ #define ZT_ResultCode_isFatal(x) ((((int)(x)) >= 100)&&(((int)(x)) < 1000)) +/** + * The multipath algorithm in use by this node. + */ +enum ZT_MultipathMode +{ + /** + * No active multipath. + * + * Traffic is merely sent over the strongest path. That being + * said, this mode will automatically failover in the event that a link goes down. + */ + ZT_MULTIPATH_NONE = 0, + + /** + * Traffic is randomly distributed among all active paths. + * + * Will cease sending traffic over links that appear to be stale. + */ + ZT_MULTIPATH_RANDOM = 1, + + /** + * Traffic is allocated across all active paths in proportion to their strength and + * reliability. + * + * Will cease sending traffic over links that appear to be stale. + */ + ZT_MULTIPATH_PROPORTIONALLY_BALANCED = 2, + + /** + * Traffic is allocated across a user-defined interface/allocation + * + * Will cease sending traffic over links that appear to be stale. + */ + ZT_MULTIPATH_MANUALLY_BALANCED = 3 +}; + /** * Status codes sent to status update callback when things happen */ diff --git a/node/Constants.hpp b/node/Constants.hpp index e2a35dce..80083abe 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -267,6 +267,98 @@ */ #define ZT_PING_CHECK_INVERVAL 5000 +/** + * Length of interface name + */ +#define ZT_PATH_INTERFACE_NAME_SZ 16 + +/** + * How frequently to check for changes to the system's network interfaces. When + * the service decides to use this constant it's because we want to react more + * quickly to new interfaces that pop up or go down. + */ +#define ZT_MULTIPATH_BINDER_REFRESH_PERIOD 5000 + +/** + * Path choice history window size. This is used to keep track of which paths were + * previously selected so that we can maintain a target allocation over time. + */ +#define ZT_MULTIPATH_PROPORTION_WIN_SZ 128 + +/** + * Threshold for flow to be considered balanced. + */ +#define ZT_MULTIPATH_FLOW_BALANCE_THESHOLD 0.80 + +/** + * Number of samples to consider when computing path statistics + */ +#define ZT_PATH_QUALITY_METRIC_WIN_SZ 128 + +/** + * How often important path metrics are sampled (in ms). These metrics are later used + * for path quality estimates + */ +#define ZT_PATH_QUALITY_SAMPLE_INTERVAL 100 + +/** + * How often new path quality estimates are computed + */ +#define ZT_PATH_QUALITY_ESTIMATE_INTERVAL 100 + +/** + * How often we will sample packet latency. Should be at least greater than ZT_PING_CHECK_INVERVAL + * since we will record a 0 bit/s measurement if no valid latency measurement was made within this + * window of time. + */ +#define ZT_PATH_LATENCY_SAMPLE_INTERVAL ZT_PING_CHECK_INVERVAL * 2 + +/** + * Interval used for rate-limiting the computation of path quality estimates. Set at 0 + * to compute as new packets arrive with no delay. + */ +#define ZT_PATH_QUALITY_COMPUTE_INTERVAL 0 + +/** + * Path error rate history window size. This is used to keep track of packet error + * measurements over a path's medium-term history. + */ +#define ZT_PATH_ERROR_HIST_WIN_SZ 10 + +/** + * The number of packet error measurements in each sample + */ +#define ZT_PATH_ERROR_SAMPLE_WIN_SZ 1024 + +/** + * How often a peer will prune its own paths. Pruning is important when multipath is + * enabled because we want to prevent the allocation algorithms from sending anything + * out on known dead paths. Additionally, quickly marking paths as dead helps when + * a new path is learned and needs to replace an older path. + */ +#define ZT_CLOSED_PATH_PRUNING_INTERVAL 1000 + +/** + * Datagram used to test link throughput. Contents are random. + */ +#define ZT_LINK_TEST_DATAGRAM_SZ 1024 + +/** + * Size of datagram expected as a reply to a link speed test + */ +#define ZT_LINK_TEST_DATAGRAM_RESPONSE_SZ 8 + +/** + * Time before a link test datagram is considered lost. Any corresponding + * timing records that would have been used to compute a RTT are purged. + */ +#define ZT_LINK_TEST_TIMEOUT 10000 + +/** + * How often the service tests the link throughput. + */ +#define ZT_LINK_SPEED_TEST_INTERVAL 1000 + /** * How frequently to send heartbeats over in-use paths */ diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index ff4fc94b..8f6dda63 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -80,6 +80,7 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) if (!trusted) { if (!dearmor(peer->key())) { RR->t->incomingPacketMessageAuthenticationFailure(tPtr,_path,packetId(),sourceAddress,hops(),"invalid MAC"); + _path->recordPacket(false); return true; } } @@ -89,6 +90,8 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) return true; } + _path->recordPacket(true); + const Packet::Verb v = verb(); switch(v) { //case Packet::VERB_NOP: @@ -446,7 +449,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP } if (!hops()) - _path->updateLatency((unsigned int)latency); + _path->updateLatency((unsigned int)latency, RR->node->now()); peer->setRemoteVersion(vProto,vMajor,vMinor,vRevision); diff --git a/node/Multicaster.cpp b/node/Multicaster.cpp index 753e4ee0..b2a5a205 100644 --- a/node/Multicaster.cpp +++ b/node/Multicaster.cpp @@ -190,7 +190,7 @@ void Multicaster::send( for(unsigned int i=0;i p(RR->topology->getPeerNoCache(multicastReplicators[i])); if ((p)&&(p->isAlive(now))) { - const SharedPtr pp(p->getBestPath(now,false)); + const SharedPtr pp(p->getAppropriatePath(now,false)); if ((pp)&&(pp->latency() < bestMulticastReplicatorLatency)) { bestMulticastReplicatorLatency = pp->latency(); bestMulticastReplicatorPath = pp; diff --git a/node/Node.cpp b/node/Node.cpp index db511430..71e5b6a7 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -234,7 +234,7 @@ public: } if ((!contacted)&&(_bestCurrentUpstream)) { - const SharedPtr up(_bestCurrentUpstream->getBestPath(_now,true)); + const SharedPtr up(_bestCurrentUpstream->getAppropriatePath(_now,true)); if (up) p->sendHELLO(_tPtr,up->localSocket(),up->address(),_now); } @@ -465,7 +465,7 @@ ZT_PeerList *Node::peers() const p->role = RR->topology->role(pi->second->identity().address()); std::vector< SharedPtr > paths(pi->second->paths(_now)); - SharedPtr bestp(pi->second->getBestPath(_now,false)); + SharedPtr bestp(pi->second->getAppropriatePath(_now,false)); p->pathCount = 0; for(std::vector< SharedPtr >::iterator path(paths.begin());path!=paths.end();++path) { ZT_FAST_MEMCPY(&(p->paths[p->pathCount].address),&((*path)->address()),sizeof(struct sockaddr_storage)); diff --git a/node/Node.hpp b/node/Node.hpp index 79284b63..4ad0e3cd 100644 --- a/node/Node.hpp +++ b/node/Node.hpp @@ -260,6 +260,9 @@ public: inline const Address &remoteTraceTarget() const { return _remoteTraceTarget; } inline Trace::Level remoteTraceLevel() const { return _remoteTraceLevel; } + inline void setMultipathMode(uint8_t mode) { _multipathMode = mode; } + inline uint8_t getMultipathMode() { return _multipathMode; } + private: RuntimeEnvironment _RR; RuntimeEnvironment *RR; @@ -284,6 +287,8 @@ private: Address _remoteTraceTarget; enum Trace::Level _remoteTraceLevel; + uint8_t _multipathMode; + volatile int64_t _now; int64_t _lastPingCheck; int64_t _lastHousekeepingRun; diff --git a/node/Path.hpp b/node/Path.hpp index e12328ff..5751d326 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -39,6 +39,9 @@ #include "SharedPtr.hpp" #include "AtomicCounter.hpp" #include "Utils.hpp" +#include "RingBuffer.hpp" + +#include "../osdep/Phy.hpp" /** * Maximum return value of preferenceRank() @@ -55,6 +58,7 @@ class RuntimeEnvironment; class Path { friend class SharedPtr; + Phy *_phy; public: /** @@ -93,22 +97,71 @@ public: _lastOut(0), _lastIn(0), _lastTrustEstablishedPacketReceived(0), + _lastPathQualityComputeTime(0), _localSocket(-1), _latency(0xffff), _addr(), - _ipScope(InetAddress::IP_SCOPE_NONE) + _ipScope(InetAddress::IP_SCOPE_NONE), + _currentPacketSampleCounter(0), + _meanPacketErrorRatio(0.0), + _meanLatency(0.0), + _lastLatencyUpdate(0), + _jitter(0.0), + _lastPathQualitySampleTime(0), + _lastComputedQuality(0.0), + _lastPathQualityEstimate(0), + _meanAge(0.0), + _meanThroughput(0.0), + _packetLossRatio(0) { + memset(_ifname, 0, sizeof(_ifname)); + memset(_addrString, 0, sizeof(_addrString)); + _throughputSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _ageSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _latencySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _errSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); } Path(const int64_t localSocket,const InetAddress &addr) : _lastOut(0), _lastIn(0), _lastTrustEstablishedPacketReceived(0), + _lastPathQualityComputeTime(0), _localSocket(localSocket), _latency(0xffff), _addr(addr), - _ipScope(addr.ipScope()) + _ipScope(addr.ipScope()), + _currentPacketSampleCounter(0), + _meanPacketErrorRatio(0.0), + _meanLatency(0.0), + _lastLatencyUpdate(0), + _jitter(0.0), + _lastPathQualitySampleTime(0), + _lastComputedQuality(0.0), + _lastPathQualityEstimate(0), + _meanAge(0.0), + _meanThroughput(0.0), + _packetLossRatio(0) + { + memset(_ifname, 0, sizeof(_ifname)); + memset(_addrString, 0, sizeof(_addrString)); + _throughputSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _ageSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _latencySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _errSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + } + + ~Path() { + delete _throughputSamples; + delete _ageSamples; + delete _latencySamples; + delete _errSamples; + + _throughputSamples = NULL; + _ageSamples = NULL; + _latencySamples = NULL; + _errSamples = NULL; } /** @@ -147,12 +200,17 @@ public: * * @param l Measured latency */ - inline void updateLatency(const unsigned int l) + inline void updateLatency(const unsigned int l, int64_t now) { unsigned int pl = _latency; - if (pl < 0xffff) + if (pl < 0xffff) { _latency = (pl + l) / 2; - else _latency = l; + } + else { + _latency = l; + } + _lastLatencyUpdate = now; + _latencySamples->push(l); } /** @@ -240,11 +298,180 @@ public: return (((age < (ZT_PATH_HEARTBEAT_PERIOD + 5000)) ? l : (l + 0xffff + age)) * (long)((ZT_INETADDRESS_MAX_SCOPE - _ipScope) + 1)); } + /** + * @return An estimate of path quality -- higher is better. + */ + inline float computeQuality(const int64_t now) + { + float latency_contrib = _meanLatency ? 1.0 / _meanLatency : 0; + float jitter_contrib = _jitter ? 1.0 / _jitter : 0; + float throughput_contrib = _meanThroughput ? _meanThroughput / 1000000 : 0; // in Mbps + float age_contrib = _meanAge > 0 ? (float)sqrt(_meanAge) : 1; + float error_contrib = 1.0 - _meanPacketErrorRatio; + float sum = (latency_contrib + jitter_contrib + throughput_contrib + error_contrib) / age_contrib; + _lastComputedQuality = sum * (long)((_ipScope) + 1); + return _lastComputedQuality; + } + + /** + * Since quality estimates can become expensive we should cache the most recent result for traffic allocation + * algorithms which may need to reference this value multiple times through the course of their execution. + */ + inline float lastComputedQuality() { + return _lastComputedQuality; + } + + /** + * @return A pointer to a cached copy of the human-readable name of the interface this Path's localSocket is bound to + */ + inline char *getName() { return _ifname; } + + /** + * @return Estimated throughput in bps of this link + */ + inline uint64_t getThroughput() { return _phy->getThroughput((PhySocket *)((uintptr_t)_localSocket)); } + + /** + * @return Packet delay varience + */ + inline float jitter() { return _jitter; } + + /** + * @return Previously-computed mean latency + */ + inline float meanLatency() { return _meanLatency; } + + /** + * @return Packet loss rate + */ + inline float packetLossRatio() { return _packetLossRatio; } + + /** + * @return Mean packet error ratio + */ + inline float meanPacketErrorRatio() { return _meanPacketErrorRatio; } + + /** + * @return Current packet error ratio (possibly incomplete sample set) + */ + inline float currentPacketErrorRatio() { + int errorsPerSample = 0; + for (int i=0; i<_currentPacketSampleCounter; i++) { + if (_packetValidity[i] == false) { + errorsPerSample++; + } + } + return (float)errorsPerSample / (float)ZT_PATH_ERROR_SAMPLE_WIN_SZ; + } + + /** + * @return Whether the Path's local socket is in a CLOSED state + */ + inline bool isClosed() { return _phy->isClosed((PhySocket *)((uintptr_t)_localSocket)); } + + /** + * @return The state of a Path's local socket + */ + inline int getState() { return _phy->getState((PhySocket *)((uintptr_t)_localSocket)); } + + /** + * @return Whether this socket may have been erased by the virtual physical link layer + */ + inline bool isValidState() { return _phy->isValidState((PhySocket *)((uintptr_t)_localSocket)); } + + /** + * @return Whether the path quality monitors have collected enough data to provide a quality value + * TODO: expand this + */ + inline bool monitorsReady() { + return _latencySamples->count() && _ageSamples->count() && _throughputSamples->count(); + } + + /** + * @return A pointer to a cached copy of the address string for this Path (For debugging only) + */ + inline char *getAddressString() { return _addrString; } + + /** + * Handle path sampling, computation of quality estimates, and other periodic tasks + * @param now Current time + */ + inline void measureLink(int64_t now) { + // Sample path properties and store them in a continuously-revolving buffer + if (now - _lastPathQualitySampleTime > ZT_PATH_QUALITY_SAMPLE_INTERVAL) { + _lastPathQualitySampleTime = now; + _throughputSamples->push(getThroughput()); // Thoughtput in bits/s + _ageSamples->push(now - _lastIn); // Age (time since last received packet) + if (now - _lastLatencyUpdate > ZT_PATH_LATENCY_SAMPLE_INTERVAL) { + _lastLatencyUpdate = now; + // Record 0 bp/s. Since we're using this to detect possible packet loss + updateLatency(0, now); + } + } + // Compute statistical values for use in link quality estimates + if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) { + _lastPathQualityComputeTime = now; + // Cache Path address string + address().toString(_addrString); + _phy->getIfName((PhySocket *)((uintptr_t)_localSocket), _ifname, ZT_PATH_INTERFACE_NAME_SZ); // Cache Interface name + // Derived values + if (_throughputSamples->count()) { + _packetLossRatio = (float)_throughputSamples->zeroCount() / (float)_throughputSamples->count(); + } + _meanThroughput = _throughputSamples->mean(); + _meanAge = _ageSamples->mean(); + _meanLatency = _latencySamples->mean(); + // Jitter + // SEE: RFC 3393, RFC 4689 + _jitter = _latencySamples->stddev(); + _meanPacketErrorRatio = _errSamples->mean(); // Packet Error Ratio (PER) + } + // Periodically compute a path quality estimate + if (now - _lastPathQualityEstimate > ZT_PATH_QUALITY_ESTIMATE_INTERVAL) { + computeQuality(now); + } + } + + /** + * Record whether a packet is considered invalid by MAC/compression/cipher checks. This + * could be an indication of a bit error. This function will keep a running counter of + * up to a given window size and with each counter overflow it will compute a mean error rate + * and store that in a continuously shifting sample window. + * + * @param isValid Whether the packet in question is considered invalid + */ + inline void recordPacket(bool isValid) { + if (_currentPacketSampleCounter < ZT_PATH_ERROR_SAMPLE_WIN_SZ) { + _packetValidity[_currentPacketSampleCounter] = isValid; + _currentPacketSampleCounter++; + } + else { + // Sample array is full, compute an mean and stick it in the ring buffer for trend analysis + _errSamples->push(currentPacketErrorRatio()); + _currentPacketSampleCounter=0; + } + } + + /** + * @return The mean age (in ms) of this link + */ + inline float meanAge() { return _meanAge; } + + /** + * @return The mean throughput (in bits/s) of this link + */ + inline float meanThroughput() { return _meanThroughput; } + /** * @return True if this path is alive (receiving heartbeats) */ inline bool alive(const int64_t now) const { return ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000)); } + /** + * @return True if this path hasn't received a packet in a "significant" amount of time + */ + inline bool stale(const int64_t now) const { return ((now - _lastIn) > ZT_LINK_SPEED_TEST_INTERVAL * 10); } + /** * @return True if this path needs a heartbeat */ @@ -269,11 +496,39 @@ private: volatile int64_t _lastOut; volatile int64_t _lastIn; volatile int64_t _lastTrustEstablishedPacketReceived; + volatile int64_t _lastPathQualityComputeTime; int64_t _localSocket; volatile unsigned int _latency; InetAddress _addr; InetAddress::IpScope _ipScope; // memoize this since it's a computed value checked often AtomicCounter __refCount; + + // Packet Error Ratio (PER) + int _packetValidity[ZT_PATH_ERROR_SAMPLE_WIN_SZ]; + int _currentPacketSampleCounter; + volatile float _meanPacketErrorRatio; + + // Latency and Jitter + volatile float _meanLatency; + int64_t _lastLatencyUpdate; + volatile float _jitter; + + int64_t _lastPathQualitySampleTime; + float _lastComputedQuality; + int64_t _lastPathQualityEstimate; + float _meanAge; + float _meanThroughput; + + // Circular buffers used to efficiently store large time series + RingBuffer *_throughputSamples; + RingBuffer *_latencySamples; + RingBuffer *_ageSamples; + RingBuffer *_errSamples; + + float _packetLossRatio; + + char _ifname[ZT_PATH_INTERFACE_NAME_SZ]; + char _addrString[256]; }; } // namespace ZeroTier diff --git a/node/Peer.cpp b/node/Peer.cpp index 71afd852..862a4529 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -35,6 +35,7 @@ #include "Packet.hpp" #include "Trace.hpp" #include "InetAddress.hpp" +#include "RingBuffer.hpp" namespace ZeroTier { @@ -59,10 +60,14 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _vRevision(0), _id(peerIdentity), _directPathPushCutoffCount(0), - _credentialsCutoffCount(0) + _credentialsCutoffCount(0), + _linkBalanceStatus(false), + _linkRedundancyStatus(false) { if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) throw ZT_EXCEPTION_INVALID_ARGUMENT; + _pathChoiceHist = new RingBuffer(ZT_MULTIPATH_PROPORTION_WIN_SZ); + _flowBalanceHist = new RingBuffer(ZT_MULTIPATH_PROPORTION_WIN_SZ); } void Peer::received( @@ -95,6 +100,18 @@ void Peer::received( path->trustedPacketReceived(now); } + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + if ((now - _lastPathPrune) > ZT_CLOSED_PATH_PRUNING_INTERVAL) { + _lastPathPrune = now; + prunePaths(); + } + for(unsigned int i=0;imeasureLink(now); + } + } + } + if (hops == 0) { // If this is a direct packet (no hops), update existing paths or learn new ones @@ -232,26 +249,246 @@ void Peer::received( } } -SharedPtr Peer::getBestPath(int64_t now,bool includeExpired) const +SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) { Mutex::Lock _l(_paths_m); - unsigned int bestPath = ZT_MAX_PEER_NETWORK_PATHS; - long bestPathQuality = 2147483647; + + /** + * Send traffic across the highest quality path only. This algorithm will still + * use the old path quality metric. + */ + if (RR->node->getMultipathMode() == ZT_MULTIPATH_NONE) { + long bestPathQuality = 2147483647; + for(unsigned int i=0;iisValidState()) { + if ((includeExpired)||((now - _paths[i].lr) < ZT_PEER_PATH_EXPIRATION)) { + const long q = _paths[i].p->quality(now) / _paths[i].priority; + if (q <= bestPathQuality) { + bestPathQuality = q; + bestPath = i; + } + } + } else break; + } + if (bestPath != ZT_MAX_PEER_NETWORK_PATHS) { + return _paths[bestPath].p; + } + return SharedPtr(); + } + + if ((now - _lastPathPrune) > ZT_CLOSED_PATH_PRUNING_INTERVAL) { + _lastPathPrune = now; + prunePaths(); + } for(unsigned int i=0;iquality(now) / _paths[i].priority; - if (q <= bestPathQuality) { - bestPathQuality = q; + _paths[i].p->measureLink(now); + } + } + + /** + * Randomly distribute traffic across all paths + * + * Behavior: + * - If path DOWN: Stop randomly choosing that path + * - If path UP: Start randomly choosing that path + * - If all paths are unresponsive: randomly choose from all paths + */ + int numAlivePaths = 0; + int numStalePaths = 0; + if (RR->node->getMultipathMode() == ZT_MULTIPATH_RANDOM) { + int alivePaths[ZT_MAX_PEER_NETWORK_PATHS]; + int stalePaths[ZT_MAX_PEER_NETWORK_PATHS]; + memset(&alivePaths, -1, sizeof(alivePaths)); + memset(&stalePaths, -1, sizeof(stalePaths)); + for(unsigned int i=0;iisValidState()) { + if (_paths[i].p->alive(now)) { + alivePaths[numAlivePaths] = i; + numAlivePaths++; + } + else { + stalePaths[numStalePaths] = i; + numStalePaths++; + } + } + } + } + unsigned int r; + Utils::getSecureRandom(&r, 1); + if (numAlivePaths > 0) { + // pick a random out of the set deemed "alive" + int rf = (float)(r %= numAlivePaths); + return _paths[alivePaths[rf]].p; + } + else if(numStalePaths > 0) { + // resort to trying any non-expired path + int rf = (float)(r %= numStalePaths); + return _paths[stalePaths[rf]].p; + } + } + + /** + * Proportionally allocate traffic according to dynamic path quality measurements + */ + if (RR->node->getMultipathMode() == ZT_MULTIPATH_PROPORTIONALLY_BALANCED) { + float relq[ZT_MAX_PEER_NETWORK_PATHS]; + memset(&relq, 0, sizeof(relq)); + float alloc[ZT_MAX_PEER_NETWORK_PATHS]; + memset(&alloc, 0, sizeof(alloc)); + + // Survey + // + // Take a survey of all available link qualities. We use this to determine if we + // can skip this algorithm altogether and if not, to establish baseline for physical + // link quality used in later calculations. + // + // We find the min/max quality of our currently-active links so + // that we can form a relative scale to rank each link proportionally + // to each other link. + uint16_t alivePaths[ZT_MAX_PEER_NETWORK_PATHS]; + uint16_t stalePaths[ZT_MAX_PEER_NETWORK_PATHS]; + memset(&alivePaths, -1, sizeof(alivePaths)); + memset(&stalePaths, -1, sizeof(stalePaths)); + uint16_t numAlivePaths = 0; + uint16_t numStalePaths = 0; + float minQuality = 10000; + float maxQuality = -1; + float currQuality; + for(uint16_t i=0;iisValidState()) { + if (!_paths[i].p->monitorsReady()) { + // TODO: This should fix itself anyway but we should test whether forcing the use of a new path will + // aid in establishing flow balance more quickly. + } + // Compute quality here, going forward we will use lastComputedQuality() + currQuality = _paths[i].p->computeQuality(now); + if (!_paths[i].p->stale(now)) { + alivePaths[i] = currQuality; + numAlivePaths++; + } + else { + stalePaths[i] = currQuality; + numStalePaths++; + } + if (currQuality > maxQuality) { + maxQuality = currQuality; bestPath = i; } + if (currQuality < minQuality) { + minQuality = currQuality; + } + relq[i] = currQuality; } - } else break; + } + + // Attempt to find an excuse not to use the rest of this algorithm + if (bestPath == ZT_MAX_PEER_NETWORK_PATHS || (numAlivePaths == 0 && numStalePaths == 0)) { + return SharedPtr(); + } if (numAlivePaths == 1) { + return _paths[bestPath].p; + } if (numStalePaths == 1) { + return _paths[bestPath].p; + } + + // Relative quality + // + // The strongest link will have a value of 1.0 whereas every other + // link will have a value which represents some fraction of the strongest link. + float totalRelativeQuality = 0; + for(unsigned int i=0;iisValidState()) { + relq[i] /= maxQuality ? maxQuality : 1; + totalRelativeQuality += relq[i]; + } + } + + // Convert the relative quality values into flow allocations. + // Additionally, determine whether each path in the flow is + // contributing more or less than its target allocation. If + // it is contributing more than required, don't allow it to be + // randomly selected for the next packet. If however the path + // needs to contribute more to the flow, we should record + float imbalance = 0; + float qualityScalingFactor = 1.0 / totalRelativeQuality; + for(uint16_t i=0;icountValue((float)i); + // Compute traffic allocation for each path in the flow + if (_paths[i].p && _paths[i].p->isValidState()) { + // Allocation + // This is the percentage of traffic we want to send over a given path + alloc[i] = relq[i] * qualityScalingFactor; + float currProportion = numPktSentWithinWin / (float)ZT_MULTIPATH_PROPORTION_WIN_SZ; + float targetProportion = alloc[i]; + float diffProportion = currProportion - targetProportion; + // Imbalance + // + // This is the sum of the distances of each path's currently observed flow contributions + // from its most recent target allocation. In other words, this is a measure of how closely we + // are adhering to our desired allocations. It is worth noting that this value can be greater + // than 1.0 if a significant change to allocations is made by the algorithm, this will + // eventually correct itself. + imbalance += fabs(diffProportion); + if (diffProportion < 0) { + alloc[i] = targetProportion; + } + else { + alloc[i] = targetProportion; + } + } + } + + // Compute and record current flow balance + float balance = 1.0 - imbalance; + if (balance >= ZT_MULTIPATH_FLOW_BALANCE_THESHOLD) { + if (!_linkBalanceStatus) { + _linkBalanceStatus = true; + RR->t->peerLinkBalanced(NULL,0,*this); + } + } + else { + if (_linkBalanceStatus) { + _linkBalanceStatus = false; + RR->t->peerLinkImbalanced(NULL,0,*this); + } + } + + // Record the current flow balance. Later used for computing a mean flow balance value. + _flowBalanceHist->push(balance); + + // Randomly choose path from allocated candidates + unsigned int r; + Utils::getSecureRandom(&r, 1); + float rf = (float)(r %= 100) / 100; + for(int i=0;iisValidState() && _paths[i].p->address().isV4()) { + if (alloc[i] > 0 && rf < alloc[i]) { + bestPath = i; + _pathChoiceHist->push(bestPath); // Record which path we chose + break; + } + if (alloc[i] > 0) { + rf -= alloc[i]; + } + else { + rf -= alloc[i]*-1; + } + } + } + if (bestPath < ZT_MAX_PEER_NETWORK_PATHS) { + return _paths[bestPath].p; + } + return SharedPtr(); + } + + // Adhere to a user-defined interface/allocation scheme + if (RR->node->getMultipathMode() == ZT_MULTIPATH_MANUALLY_BALANCED) { + // TODO } - if (bestPath != ZT_MAX_PEER_NETWORK_PATHS) - return _paths[bestPath].p; return SharedPtr(); } @@ -477,16 +714,34 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) } } else break; } - while(j < ZT_MAX_PEER_NETWORK_PATHS) { - _paths[j].lr = 0; - _paths[j].p.zero(); - _paths[j].priority = 1; - ++j; + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + while(j < ZT_MAX_PEER_NETWORK_PATHS) { + _paths[j].lr = 0; + _paths[j].p.zero(); + _paths[j].priority = 1; + ++j; + } } - return sent; } +unsigned int Peer::prunePaths() +{ + Mutex::Lock _l(_paths_m); + unsigned int pruned = 0; + for(unsigned int i=0;iisClosed() || !_paths[i].p->isValidState()) { + _paths[i].lr = 0; + _paths[i].p.zero(); + _paths[i].priority = 1; + pruned++; + } + } + } + return pruned; +} + void Peer::clusterRedirect(void *tPtr,const SharedPtr &originatingPath,const InetAddress &remoteAddress,const int64_t now) { SharedPtr np(RR->topology->getPath(originatingPath->localSocket(),remoteAddress)); diff --git a/node/Peer.hpp b/node/Peer.hpp index b6f3c695..9873729b 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -65,7 +65,13 @@ private: Peer() {} // disabled to prevent bugs -- should not be constructed uninitialized public: - ~Peer() { Utils::burn(_key,sizeof(_key)); } + ~Peer() { + Utils::burn(_key,sizeof(_key)); + delete _pathChoiceHist; + delete _flowBalanceHist; + _pathChoiceHist = NULL; + _flowBalanceHist = NULL; + } /** * Construct a new peer @@ -145,20 +151,20 @@ public: */ inline bool sendDirect(void *tPtr,const void *data,unsigned int len,int64_t now,bool force) { - SharedPtr bp(getBestPath(now,force)); + SharedPtr bp(getAppropriatePath(now,force)); if (bp) return bp->send(RR,tPtr,data,len,now); return false; } /** - * Get the best current direct path + * Get the most appropriate direct path based on current multipath configuration * * @param now Current time * @param includeExpired If true, include even expired paths * @return Best current path or NULL if none */ - SharedPtr getBestPath(int64_t now,bool includeExpired) const; + SharedPtr getAppropriatePath(int64_t now, bool includeExpired); /** * Send VERB_RENDEZVOUS to this and another peer via the best common IP scope and path @@ -212,6 +218,16 @@ public: */ unsigned int doPingAndKeepalive(void *tPtr,int64_t now); + /** + * Clear paths whose localSocket(s) are in a CLOSED state or have an otherwise INVALID state. + * This should be called frequently so that we can detect and remove unproductive or invalid paths. + * + * Under the hood this is done periodically based on ZT_CLOSED_PATH_PRUNING_INTERVAL. + * + * @return Number of paths that were pruned this round + */ + unsigned int prunePaths(); + /** * Process a cluster redirect sent by this peer * @@ -270,9 +286,9 @@ public: /** * @return Latency in milliseconds of best path or 0xffff if unknown / no paths */ - inline unsigned int latency(const int64_t now) const + inline unsigned int latency(const int64_t now) { - SharedPtr bp(getBestPath(now,false)); + SharedPtr bp(getAppropriatePath(now,false)); if (bp) return bp->latency(); return 0xffff; @@ -289,7 +305,7 @@ public: * * @return Relay quality score computed from latency and other factors, lower is better */ - inline unsigned int relayQuality(const int64_t now) const + inline unsigned int relayQuality(const int64_t now) { const uint64_t tsr = now - _lastReceive; if (tsr >= ZT_PEER_ACTIVITY_TIMEOUT) @@ -515,6 +531,7 @@ private: int64_t _lastCredentialsReceived; int64_t _lastTrustEstablishedPacketReceived; int64_t _lastSentFullHello; + int64_t _lastPathPrune; uint16_t _vProto; uint16_t _vMajor; @@ -530,6 +547,13 @@ private: unsigned int _credentialsCutoffCount; AtomicCounter __refCount; + + RingBuffer *_pathChoiceHist; + RingBuffer *_flowBalanceHist; + + bool _linkBalanceStatus; + bool _linkRedundancyStatus; + }; } // namespace ZeroTier diff --git a/node/RingBuffer.hpp b/node/RingBuffer.hpp new file mode 100644 index 00000000..62f1cf47 --- /dev/null +++ b/node/RingBuffer.hpp @@ -0,0 +1,315 @@ +/* + * ZeroTier One - Network Virtualization Everywhere + * Copyright (C) 2011-2018 ZeroTier, Inc. https://www.zerotier.com/ + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * -- + * + * You can be released from the requirements of the license by purchasing + * a commercial license. Buying such a license is mandatory as soon as you + * develop commercial closed-source software that incorporates or links + * directly against ZeroTier software without disclosing the source code + * of your own application. + */ + +#ifndef ZT_RINGBUFFER_H +#define ZT_RINGBUFFER_H + +#include +#include +#include +#include +#include +#include + +namespace ZeroTier { + +/** + * A revolving (ring) buffer. + * + * For fast handling of continuously-evolving variables (such as path quality metrics). + * Using this, we can maintain longer sliding historical windows for important path + * metrics without the need for potentially expensive calls to memcpy/memmove. + * + * Some basic statistical functionality is implemented here in an attempt + * to reduce the complexity of code needed to interact with this type of buffer. + */ + +template +class RingBuffer +{ +private: + T * buf; + size_t size; + size_t begin; + size_t end; + bool wrap; + +public: + + /** + * create a RingBuffer with space for up to size elements. + */ + explicit RingBuffer(size_t size) + : size(size), + begin(0), + end(0), + wrap(false) + { + buf = new T[size]; + memset(buf, 0, sizeof(T) * size); + } + + /** + * @return A pointer to the underlying buffer + */ + T* get_buf() + { + return buf + begin; + } + + /** + * Adjust buffer index pointer as if we copied data in + * @param n Number of elements to copy in + * @return Number of elements we copied in + */ + size_t produce(size_t n) + { + n = std::min(n, getFree()); + if (n == 0) { + return n; + } + const size_t first_chunk = std::min(n, size - end); + end = (end + first_chunk) % size; + if (first_chunk < n) { + const size_t second_chunk = n - first_chunk; + end = (end + second_chunk) % size; + } + if (begin == end) { + wrap = true; + } + return n; + } + + /** + * Fast erase, O(1). + * Merely reset the buffer pointer, doesn't erase contents + */ + void reset() + { + consume(count()); + } + + /** + * adjust buffer index pointer as if we copied data out + * @param n Number of elements we copied from the buffer + * @return Number of elements actually available from the buffer + */ + size_t consume(size_t n) + { + n = std::min(n, count()); + if (n == 0) { + return n; + } + if (wrap) { + wrap = false; + } + const size_t first_chunk = std::min(n, size - begin); + begin = (begin + first_chunk) % size; + if (first_chunk < n) { + const size_t second_chunk = n - first_chunk; + begin = (begin + second_chunk) % size; + } + return n; + } + + /** + * @param data Buffer that is to be written to the ring + * @param n Number of elements to write to the buffer + */ + size_t write(const T * data, size_t n) + { + n = std::min(n, getFree()); + if (n == 0) { + return n; + } + const size_t first_chunk = std::min(n, size - end); + memcpy(buf + end, data, first_chunk * sizeof(T)); + end = (end + first_chunk) % size; + if (first_chunk < n) { + const size_t second_chunk = n - first_chunk; + memcpy(buf + end, data + first_chunk, second_chunk * sizeof(T)); + end = (end + second_chunk) % size; + } + if (begin == end) { + wrap = true; + } + return n; + } + + /** + * Place a single value on the buffer. If the buffer is full, consume a value first. + * + * @param value A single value to be placed in the buffer + */ + void push(const T value) + { + if (count() == size) { + consume(1); + } + write(&value, 1); + } + + /** + * @param dest Destination buffer + * @param n Size (in terms of number of elements) of the destination buffer + * @return Number of elements read from the buffer + */ + size_t read(T * dest, size_t n) + { + n = std::min(n, count()); + if (n == 0) { + return n; + } + if (wrap) { + wrap = false; + } + const size_t first_chunk = std::min(n, size - begin); + memcpy(dest, buf + begin, first_chunk * sizeof(T)); + begin = (begin + first_chunk) % size; + if (first_chunk < n) { + const size_t second_chunk = n - first_chunk; + memcpy(dest + first_chunk, buf + begin, second_chunk * sizeof(T)); + begin = (begin + second_chunk) % size; + } + return n; + } + + /** + * Return how many elements are in the buffer, O(1). + * + * @return The number of elements in the buffer + */ + size_t count() + { + if (end == begin) { + return wrap ? size : 0; + } + else if (end > begin) { + return end - begin; + } + else { + return size + end - begin; + } + } + + /** + * @return The number of slots that are unused in the buffer + */ + size_t getFree() + { + return size - count(); + } + + /** + * @return The arithmetic mean of the contents of the buffer + */ + T mean() + { + size_t iterator = begin; + T mean = 0; + for (int i=0; i peer(RR->topology->getPeer(tPtr,destination)); if (peer) { - viaPath = peer->getBestPath(now,false); + viaPath = peer->getAppropriatePath(now,false); if (!viaPath) { peer->tryMemorizedPath(tPtr,now); // periodically attempt memorized or statically defined paths, if any are known const SharedPtr relay(RR->topology->getUpstreamPeer()); - if ( (!relay) || (!(viaPath = relay->getBestPath(now,false))) ) { - if (!(viaPath = peer->getBestPath(now,true))) + if ( (!relay) || (!(viaPath = relay->getAppropriatePath(now,false))) ) { + if (!(viaPath = peer->getAppropriatePath(now,true))) return false; } } diff --git a/node/Topology.hpp b/node/Topology.hpp index 63946a32..5d726007 100644 --- a/node/Topology.hpp +++ b/node/Topology.hpp @@ -299,7 +299,7 @@ public: Address *a = (Address *)0; SharedPtr *p = (SharedPtr *)0; while (i.next(a,p)) { - const SharedPtr pp((*p)->getBestPath(now,false)); + const SharedPtr pp((*p)->getAppropriatePath(now,false)); if (pp) ++cnt; } diff --git a/node/Trace.cpp b/node/Trace.cpp index 386edaac..01a8da55 100644 --- a/node/Trace.cpp +++ b/node/Trace.cpp @@ -106,6 +106,26 @@ void Trace::peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId, } } +void Trace::peerLinkNowRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath) +{ + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is fully redundant",peer.address().toInt(),networkId); +} + +void Trace::peerLinkNoLongerRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath) +{ + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is no longer redundant",peer.address().toInt(),networkId); +} + +void Trace::peerLinkBalanced(void *const tPtr,const uint64_t networkId,Peer &peer) +{ + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is balanced",peer.address().toInt(),networkId); +} + +void Trace::peerLinkImbalanced(void *const tPtr,const uint64_t networkId,Peer &peer) +{ + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is unbalanced",peer.address().toInt(),networkId); +} + void Trace::peerLearnedNewPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath,const uint64_t packetId) { char tmp[128]; diff --git a/node/Trace.hpp b/node/Trace.hpp index 2a2fca6c..b01163d6 100644 --- a/node/Trace.hpp +++ b/node/Trace.hpp @@ -121,6 +121,12 @@ public: void resettingPathsInScope(void *const tPtr,const Address &reporter,const InetAddress &reporterPhysicalAddress,const InetAddress &myPhysicalAddress,const InetAddress::IpScope scope); void peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &path,const uint64_t packetId,const Packet::Verb verb); + + void peerLinkNowRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath); + void peerLinkNoLongerRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath); + void peerLinkBalanced(void *const tPtr,const uint64_t networkId,Peer &peer); + void peerLinkImbalanced(void *const tPtr,const uint64_t networkId,Peer &peer); + void peerLearnedNewPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath,const uint64_t packetId); void peerRedirected(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath); diff --git a/osdep/Binder.hpp b/osdep/Binder.hpp index 93fad9f1..6e13836c 100644 --- a/osdep/Binder.hpp +++ b/osdep/Binder.hpp @@ -388,6 +388,7 @@ public: _bindings[_bindingCount].udpSock = udps; _bindings[_bindingCount].tcpListenSock = tcps; _bindings[_bindingCount].address = ii->first; + phy.setIfName(udps, (char*)ii->second.c_str(), ii->second.length()); ++_bindingCount; } } else { @@ -455,6 +456,20 @@ public: return false; } + /** + * Get a list of socket pointers for all bindings. + * + * @return A list of socket pointers for current bindings + */ + inline std::vector getBoundSockets() + { + std::vector sockets; + for (int i=0; i _bindingCount; diff --git a/osdep/Phy.hpp b/osdep/Phy.hpp index e359ccdd..d4f68681 100644 --- a/osdep/Phy.hpp +++ b/osdep/Phy.hpp @@ -27,6 +27,8 @@ #ifndef ZT_PHY_HPP #define ZT_PHY_HPP +#include "../osdep/OSUtils.hpp" + #include #include #include @@ -86,6 +88,22 @@ namespace ZeroTier { */ typedef void PhySocket; +struct link_test_record +{ + link_test_record(PhySocket *_s, uint64_t _id, uint64_t _egress_time, uint32_t _length) : + s(_s), + id(_id), + egress_time(_egress_time), + length(_length) + { + // + } + PhySocket *s; + uint64_t id; + uint64_t egress_time; + uint32_t length; +}; + /** * Simple templated non-blocking sockets implementation * @@ -154,10 +172,17 @@ private: struct PhySocketImpl { + PhySocketImpl() : + throughput(0) + { + memset(ifname, 0, sizeof(ifname)); + } PhySocketType type; ZT_PHY_SOCKFD_TYPE sock; void *uptr; // user-settable pointer ZT_PHY_SOCKADDR_STORAGE_TYPE saddr; // remote for TCP_OUT and TCP_IN, local for TCP_LISTEN, RAW, and UDP + char ifname[16]; + uint64_t throughput; }; std::list _socks; @@ -173,6 +198,7 @@ private: bool _noDelay; bool _noCheck; + std::vector link_test_records; public: /** @@ -249,6 +275,173 @@ public: */ static inline void** getuptr(PhySocket *s) throw() { return &(reinterpret_cast(s)->uptr); } + /** + * @param s Socket object + * @param nameBuf Buffer to store name of interface which this Socket object is bound to + * @param buflen Length of buffer to copy name into + */ + static inline void getIfName(PhySocket *s, char *nameBuf, int buflen) + { + memcpy(nameBuf, reinterpret_cast(s)->ifname, buflen); + } + + /** + * @param s Socket object + * @param ifname Buffer containing name of interface that this Socket object is bound to + * @param len Length of name of interface + */ + static inline void setIfName(PhySocket *s, char *ifname, int len) + { + memcpy(&(reinterpret_cast(s)->ifname), ifname, len); + } + + /** + * Get result of most recent throughput test + * + * @param s Socket object + */ + inline uint64_t getThroughput(PhySocket *s) + { + PhySocketImpl *sws = (reinterpret_cast(s)); + return sws ? sws->throughput : 0; + } + + /** + * Whether or not the socket object is in a closed state + * + * @param s Socket object + * @return true if socket is closed, false if otherwise + */ + inline bool isClosed(PhySocket *s) + { + PhySocketImpl *sws = (reinterpret_cast(s)); + return sws->type == ZT_PHY_SOCKET_CLOSED; + } + + /** + * Get state of socket object + * + * @param s Socket object + * @return State of socket + */ + inline int getState(PhySocket *s) + { + PhySocketImpl *sws = (reinterpret_cast(s)); + return sws->type; + } + + /** + * In the event that this socket is erased, we need a way to convey to the multipath logic + * that this path is no longer valid. + * + * @param s Socket object + * @return Whether the state of this socket is within an acceptable range of values + */ + inline bool isValidState(PhySocket *s) + { + PhySocketImpl *sws = (reinterpret_cast(s)); + return sws->type >= ZT_PHY_SOCKET_CLOSED && sws->type <= ZT_PHY_SOCKET_UNIX_LISTEN; + } + + /** + * Send a datagram of a known size to a selected peer and record egress time. The peer + * shall eventually respond by echoing back a smaller datagram. + * + * @param s Socket object + * @param remoteAddress Address of remote peer to receive link test packet + * @param data Buffer containing random packet data + * @param len Length of packet data buffer + * @return Number of bytes successfully written to socket + */ + inline int test_link_speed(PhySocket *s, const struct sockaddr *to, void *data, uint32_t len) { + if (!reinterpret_cast(s)) { + return 0; + } + uint64_t *buf = (uint64_t*)data; + uint64_t id = buf[0]; + if (to->sa_family != AF_INET && to->sa_family != AF_INET6) { + return 0; + } + uint64_t egress_time = OSUtils::now(); + PhySocketImpl *sws = (reinterpret_cast(s)); +#if defined(_WIN32) || defined(_WIN64) + return ((long)::sendto(sws->sock,reinterpret_cast(data),len,0,to,(to->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)) == (long)len); +#else + int w = ::sendto(sws->sock,data,len,0,to,(to->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); +#endif + if (w > 0) { + link_test_records.push_back(new link_test_record(s, id, egress_time, len)); + } + return w; + } + + /** + * Remove link speed test records which have timed-out and record a 0 bits/s measurement + */ + inline void refresh_link_speed_records() + { + for(int i=0;iegress_time > ZT_LINK_TEST_TIMEOUT) { + PhySocketImpl *sws = (reinterpret_cast(link_test_records[i]->s)); + if (sws) { + sws->throughput = 0; + } + link_test_records.erase(link_test_records.begin() + i); + } + } + } + + /** + * Upon receipt of a link speed test datagram we echo back only the identification portion + * + * @param s Socket object + * @param from Address of remote peer that sent this datagram + * @param data Buffer containing datagram's contents + * @param len Length of datagram + * @return Number of bytes successfully written to socket in response + */ + inline int respond_to_link_test(PhySocket *s,const struct sockaddr *from,void *data,unsigned long len) { + PhySocketImpl *sws = (reinterpret_cast(s)); + uint64_t *id = (uint64_t*)data; +#if defined(_WIN32) || defined(_WIN64) + return ((long)::sendto(sws->sock,reinterpret_cast(data),len,0,from,(from->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)) == (long)len); +#else + int w = ::sendto(sws->sock,id,sizeof(id[0]),0,from,(from->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); +#endif + return w; + } + + /** + * Upon receipt of a response to our original link test datagram, correlate this new datagram with the record + * of the one we sent. Compute the transit time and update the throughput field of the relevant socket. This + * value will later be read by the path quality estimation logic located in Path.hpp. + * + * @param s Socket object + * @param from Address of remote peer that sent this datagram + * @param data Buffer containing datagram contents (ID of original link test datagram) + * @param len Length of datagram + * @return true if datagram correponded to previous record, false if otherwise + */ + inline bool handle_link_test_response(PhySocket *s,const struct sockaddr *from,void *data,unsigned long len) { + uint64_t *id = (uint64_t*)data; + for(int i=0;iid == id[0]) { + float rtt = (OSUtils::now()-link_test_records[i]->egress_time) / (float)1000; // s + uint32_t sz = (link_test_records[i]->length) * 8; // bits + float transit_time = rtt / 2.0; + int64_t raw = sz / transit_time; + PhySocketImpl *sws = (reinterpret_cast(s)); + if (sws) { + sws->throughput = raw; + } + delete link_test_records[i]; + link_test_records.erase(link_test_records.begin() + i); + return true; + } + } + return false; + } + /** * Cause poll() to stop waiting immediately * @@ -985,7 +1178,7 @@ public: ZT_PHY_SOCKFD_TYPE sock = s->sock; // if closed, s->sock becomes invalid as s is no longer dereferencable if ((FD_ISSET(sock,&wfds))&&(FD_ISSET(sock,&_writefds))) { try { - _handler->phyOnUnixWritable((PhySocket *)&(*s),&(s->uptr),false); + _handler->phyOnUnixWritable((PhySocket *)&(*s),&(s->uptr)); } catch ( ... ) {} } if (FD_ISSET(sock,&rfds)) { diff --git a/service/OneService.cpp b/service/OneService.cpp index 80b42818..24456f71 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -37,6 +37,7 @@ #include "../version.h" #include "../include/ZeroTierOne.h" +#include "../include/ZeroTierDebug.h" #include "../node/Constants.hpp" #include "../node/Mutex.hpp" @@ -417,6 +418,7 @@ public: PhySocket *_localControlSocket6; bool _updateAutoApply; bool _allowTcpFallbackRelay; + unsigned int _multipathMode; unsigned int _primaryPort; volatile unsigned int _udpPortPickerCounter; @@ -455,6 +457,9 @@ public: // Last potential sleep/wake event uint64_t _lastRestart; + // Last time link throughput was tested + uint64_t _lastLinkSpeedTest; + // Deadline for the next background task service function volatile int64_t _nextBackgroundTaskDeadline; @@ -818,6 +823,7 @@ public: _lastRestart = clockShouldBe; int64_t lastTapMulticastGroupCheck = 0; int64_t lastBindRefresh = 0; + int64_t lastMultipathModeUpdate = 0; int64_t lastUpdateCheck = clockShouldBe; int64_t lastCleanedPeersDb = 0; int64_t lastLocalInterfaceAddressCheck = (clockShouldBe - ZT_LOCAL_INTERFACE_CHECK_INTERVAL) + 15000; // do this in 15s to give portmapper time to configure and other things time to settle @@ -849,8 +855,9 @@ public: _updater->apply(); } - // Refresh bindings in case device's interfaces have changed, and also sync routes to update any shadow routes (e.g. shadow default) - if (((now - lastBindRefresh) >= ZT_BINDER_REFRESH_PERIOD)||(restarted)) { + // Refresh bindings + int interfaceRefreshPeriod = _multipathMode ? ZT_MULTIPATH_BINDER_REFRESH_PERIOD : ZT_BINDER_REFRESH_PERIOD; + if (((now - lastBindRefresh) >= interfaceRefreshPeriod)||(restarted)) { lastBindRefresh = now; unsigned int p[3]; unsigned int pc = 0; @@ -867,6 +874,34 @@ public: } } } + // Update multipath mode (if needed) + if (((now - lastMultipathModeUpdate) >= interfaceRefreshPeriod)||(restarted)) { + lastMultipathModeUpdate = now; + _node->setMultipathMode(_multipathMode); + } + + // Test link speeds + // TODO: This logic should eventually find its way into the core or as part of a passive + // measure within the protocol. + if (_multipathMode && ((now - _lastLinkSpeedTest) >= ZT_LINK_SPEED_TEST_INTERVAL)) { + _phy.refresh_link_speed_records(); + _lastLinkSpeedTest = now; + // Generate random data to fill UDP packet + uint64_t pktBuf[ZT_LINK_TEST_DATAGRAM_SZ / sizeof(uint64_t)]; + Utils::getSecureRandom(pktBuf, ZT_LINK_TEST_DATAGRAM_SZ); + ZT_PeerList *pl = _node->peers(); + // get bindings (specifically just the sockets) + std::vector sockets = _binder.getBoundSockets(); + // interfaces + for (int i=0; ipeerCount;++j) { + for (int k=0; k<(ZT_MAX_PEER_NETWORK_PATHS/4); k++) { + Utils::getSecureRandom(pktBuf, 8); // generate one random integer for unique id + _phy.test_link_speed(sockets[i], (struct sockaddr*)&(pl->peers[j].paths[k].address), pktBuf, ZT_LINK_TEST_DATAGRAM_SZ); + } + } + } + } // Run background task processor in core if it's time to do so int64_t dl = _nextBackgroundTaskDeadline; @@ -1190,6 +1225,7 @@ public: json &settings = res["config"]["settings"]; settings["primaryPort"] = OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff; settings["allowTcpFallbackRelay"] = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],_allowTcpFallbackRelay); + settings["multipathMode"] = OSUtils::jsonInt(settings["multipathMode"],_multipathMode); #ifdef ZT_USE_MINIUPNPC settings["portMappingEnabled"] = OSUtils::jsonBool(settings["portMappingEnabled"],true); #else @@ -1518,6 +1554,11 @@ public: _primaryPort = (unsigned int)OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff; _allowTcpFallbackRelay = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],true); + _multipathMode = OSUtils::jsonInt(settings["multipathMode"],0); + if (_multipathMode != 0 && _allowTcpFallbackRelay) { + fprintf(stderr,"WARNING: multipathMode cannot be used with allowTcpFallbackRelay. Disabling allowTcpFallbackRelay"); + _allowTcpFallbackRelay = false; + } _portMappingEnabled = OSUtils::jsonBool(settings["portMappingEnabled"],true); json &ignoreIfs = settings["interfacePrefixBlacklist"]; @@ -1758,6 +1799,15 @@ public: inline void phyOnDatagram(PhySocket *sock,void **uptr,const struct sockaddr *localAddr,const struct sockaddr *from,void *data,unsigned long len) { + if (_multipathMode) { + // Handle link test packets (should eventually be moved into the protocol itself) + if (len == ZT_LINK_TEST_DATAGRAM_SZ) { + _phy.respond_to_link_test(sock, from, data, len); + } + if (len == ZT_LINK_TEST_DATAGRAM_RESPONSE_SZ) { + _phy.handle_link_test_response(sock, from, data, len); + } + } if ((len >= 16)&&(reinterpret_cast(from)->ipScope() == InetAddress::IP_SCOPE_GLOBAL)) _lastDirectReceiveFromGlobal = OSUtils::now(); const ZT_ResultCode rc = _node->processWirePacket( @@ -2007,7 +2057,7 @@ public: inline void phyOnUnixAccept(PhySocket *sockL,PhySocket *sockN,void **uptrL,void **uptrN) {} inline void phyOnUnixClose(PhySocket *sock,void **uptr) {} inline void phyOnUnixData(PhySocket *sock,void **uptr,void *data,unsigned long len) {} - inline void phyOnUnixWritable(PhySocket *sock,void **uptr,bool lwip_invoked) {} + inline void phyOnUnixWritable(PhySocket *sock,void **uptr) {} inline int nodeVirtualNetworkConfigFunction(uint64_t nwid,void **nuptr,enum ZT_VirtualNetworkConfigOperation op,const ZT_VirtualNetworkConfig *nwc) { -- cgit v1.2.3 From 1debe2292d85e2d377064f74246244ac607046bf Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Wed, 2 May 2018 11:22:07 -0700 Subject: Cleanup. Misc type conversion and signedness fixes --- node/Path.hpp | 6 +++--- node/Peer.cpp | 12 +++++------- node/RingBuffer.hpp | 28 ++++++++++++++-------------- osdep/Phy.hpp | 12 ++++++------ service/OneService.cpp | 5 ++--- 5 files changed, 30 insertions(+), 33 deletions(-) (limited to 'node') diff --git a/node/Path.hpp b/node/Path.hpp index 5751d326..7ce6e0f1 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -303,11 +303,11 @@ public: */ inline float computeQuality(const int64_t now) { - float latency_contrib = _meanLatency ? 1.0 / _meanLatency : 0; - float jitter_contrib = _jitter ? 1.0 / _jitter : 0; + float latency_contrib = _meanLatency ? (float)1.0 / _meanLatency : 0; + float jitter_contrib = _jitter ? (float)1.0 / _jitter : 0; float throughput_contrib = _meanThroughput ? _meanThroughput / 1000000 : 0; // in Mbps float age_contrib = _meanAge > 0 ? (float)sqrt(_meanAge) : 1; - float error_contrib = 1.0 - _meanPacketErrorRatio; + float error_contrib = (float)1.0 - _meanPacketErrorRatio; float sum = (latency_contrib + jitter_contrib + throughput_contrib + error_contrib) / age_contrib; _lastComputedQuality = sum * (long)((_ipScope) + 1); return _lastComputedQuality; diff --git a/node/Peer.cpp b/node/Peer.cpp index 862a4529..bbc6d6d2 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -320,12 +320,12 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) Utils::getSecureRandom(&r, 1); if (numAlivePaths > 0) { // pick a random out of the set deemed "alive" - int rf = (float)(r %= numAlivePaths); + int rf = r % numAlivePaths; return _paths[alivePaths[rf]].p; } else if(numStalePaths > 0) { // resort to trying any non-expired path - int rf = (float)(r %= numStalePaths); + int rf = r % numStalePaths; return _paths[stalePaths[rf]].p; } } @@ -366,11 +366,9 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) // Compute quality here, going forward we will use lastComputedQuality() currQuality = _paths[i].p->computeQuality(now); if (!_paths[i].p->stale(now)) { - alivePaths[i] = currQuality; numAlivePaths++; } else { - stalePaths[i] = currQuality; numStalePaths++; } if (currQuality > maxQuality) { @@ -412,10 +410,10 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) // randomly selected for the next packet. If however the path // needs to contribute more to the flow, we should record float imbalance = 0; - float qualityScalingFactor = 1.0 / totalRelativeQuality; + float qualityScalingFactor = (float)1.0 / totalRelativeQuality; for(uint16_t i=0;icountValue((float)i); + int numPktSentWithinWin = (int)_pathChoiceHist->countValue(i); // Compute traffic allocation for each path in the flow if (_paths[i].p && _paths[i].p->isValidState()) { // Allocation @@ -442,7 +440,7 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) } // Compute and record current flow balance - float balance = 1.0 - imbalance; + float balance = (float)1.0 - imbalance; if (balance >= ZT_MULTIPATH_FLOW_BALANCE_THESHOLD) { if (!_linkBalanceStatus) { _linkBalanceStatus = true; diff --git a/node/RingBuffer.hpp b/node/RingBuffer.hpp index 62f1cf47..cd384749 100644 --- a/node/RingBuffer.hpp +++ b/node/RingBuffer.hpp @@ -226,34 +226,34 @@ public: /** * @return The arithmetic mean of the contents of the buffer */ - T mean() + float mean() { size_t iterator = begin; - T mean = 0; - for (int i=0; i(s)); #if defined(_WIN32) || defined(_WIN64) - return ((long)::sendto(sws->sock,reinterpret_cast(data),len,0,to,(to->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)) == (long)len); + int w = ::sendto(sws->sock,reinterpret_cast(data),len,0,to,(to->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)) #else int w = ::sendto(sws->sock,data,len,0,to,(to->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); #endif @@ -380,7 +380,7 @@ public: */ inline void refresh_link_speed_records() { - for(int i=0;iegress_time > ZT_LINK_TEST_TIMEOUT) { PhySocketImpl *sws = (reinterpret_cast(link_test_records[i]->s)); if (sws) { @@ -404,7 +404,7 @@ public: PhySocketImpl *sws = (reinterpret_cast(s)); uint64_t *id = (uint64_t*)data; #if defined(_WIN32) || defined(_WIN64) - return ((long)::sendto(sws->sock,reinterpret_cast(data),len,0,from,(from->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)) == (long)len); + int w = ::sendto(sws->sock,reinterpret_cast(id),sizeof(id[0]),0,from,(from->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); #else int w = ::sendto(sws->sock,id,sizeof(id[0]),0,from,(from->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); #endif @@ -424,12 +424,12 @@ public: */ inline bool handle_link_test_response(PhySocket *s,const struct sockaddr *from,void *data,unsigned long len) { uint64_t *id = (uint64_t*)data; - for(int i=0;iid == id[0]) { float rtt = (OSUtils::now()-link_test_records[i]->egress_time) / (float)1000; // s uint32_t sz = (link_test_records[i]->length) * 8; // bits - float transit_time = rtt / 2.0; - int64_t raw = sz / transit_time; + float transit_time = rtt / (float)2.0; + uint64_t raw = (uint64_t)(sz / transit_time); PhySocketImpl *sws = (reinterpret_cast(s)); if (sws) { sws->throughput = raw; diff --git a/service/OneService.cpp b/service/OneService.cpp index 24456f71..27930a52 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -879,7 +879,6 @@ public: lastMultipathModeUpdate = now; _node->setMultipathMode(_multipathMode); } - // Test link speeds // TODO: This logic should eventually find its way into the core or as part of a passive // measure within the protocol. @@ -894,7 +893,7 @@ public: std::vector sockets = _binder.getBoundSockets(); // interfaces for (int i=0; ipeerCount;++j) { + for(size_t j=0;jpeerCount;++j) { for (int k=0; k<(ZT_MAX_PEER_NETWORK_PATHS/4); k++) { Utils::getSecureRandom(pktBuf, 8); // generate one random integer for unique id _phy.test_link_speed(sockets[i], (struct sockaddr*)&(pl->peers[j].paths[k].address), pktBuf, ZT_LINK_TEST_DATAGRAM_SZ); @@ -1554,7 +1553,7 @@ public: _primaryPort = (unsigned int)OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff; _allowTcpFallbackRelay = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],true); - _multipathMode = OSUtils::jsonInt(settings["multipathMode"],0); + _multipathMode = (unsigned int)OSUtils::jsonInt(settings["multipathMode"],0); if (_multipathMode != 0 && _allowTcpFallbackRelay) { fprintf(stderr,"WARNING: multipathMode cannot be used with allowTcpFallbackRelay. Disabling allowTcpFallbackRelay"); _allowTcpFallbackRelay = false; -- cgit v1.2.3 From 91c8e82c428cea9d9e0a6911fbd1820212871ad8 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Wed, 2 May 2018 15:24:14 -0700 Subject: Adjusted locking order of _paths_m for path pruning. Other minor multipath changes --- node/Constants.hpp | 11 +++++++++-- node/Path.hpp | 16 ++++++++++++++++ node/Peer.cpp | 24 +++++++++++++----------- service/OneService.cpp | 7 ++++--- 4 files changed, 42 insertions(+), 16 deletions(-) (limited to 'node') diff --git a/node/Constants.hpp b/node/Constants.hpp index 80083abe..ee2ff0a6 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -293,7 +293,7 @@ /** * Number of samples to consider when computing path statistics */ -#define ZT_PATH_QUALITY_METRIC_WIN_SZ 128 +#define ZT_PATH_QUALITY_METRIC_WIN_SZ 64 /** * How often important path metrics are sampled (in ms). These metrics are later used @@ -311,7 +311,7 @@ * since we will record a 0 bit/s measurement if no valid latency measurement was made within this * window of time. */ -#define ZT_PATH_LATENCY_SAMPLE_INTERVAL ZT_PING_CHECK_INVERVAL * 2 +#define ZT_PATH_LATENCY_SAMPLE_INTERVAL ZT_MULTIPATH_PEER_PING_PERIOD * 2 /** * Interval used for rate-limiting the computation of path quality estimates. Set at 0 @@ -374,6 +374,13 @@ */ #define ZT_PEER_PING_PERIOD 60000 +/** + * Delay between full-fledge pings of directly connected peers. + * With multipath bonding enabled ping peers more often to measure + * packet loss and latency. + */ +#define ZT_MULTIPATH_PEER_PING_PERIOD 5000 + /** * Paths are considered expired if they have not sent us a real packet in this long */ diff --git a/node/Path.hpp b/node/Path.hpp index 7ce6e0f1..e6bcecf0 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -432,6 +432,22 @@ public: } } + /** + * @param buf Buffer to store resultant string + * @return Description of path, in ASCII string format + */ + inline char *toString(char *buf) { + sprintf(buf,"%6s, q=%8.3f, %5.3f Mb/s, j=%8.2f, ml=%8.2f, meanAge=%8.2f, addr=%45s", + getName(), + lastComputedQuality(), + (float)meanThroughput() / (float)1000000, + jitter(), + meanLatency(), + meanAge(), + getAddressString()); + return buf; + } + /** * Record whether a packet is considered invalid by MAC/compression/cipher checks. This * could be an indication of a bit error. This function will keep a running counter of diff --git a/node/Peer.cpp b/node/Peer.cpp index bbc6d6d2..c46ed751 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -100,14 +100,17 @@ void Peer::received( path->trustedPacketReceived(now); } - if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { - if ((now - _lastPathPrune) > ZT_CLOSED_PATH_PRUNING_INTERVAL) { - _lastPathPrune = now; - prunePaths(); - } - for(unsigned int i=0;imeasureLink(now); + { + Mutex::Lock _l(_paths_m); + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + if ((now - _lastPathPrune) > ZT_CLOSED_PATH_PRUNING_INTERVAL) { + _lastPathPrune = now; + prunePaths(); + } + for(unsigned int i=0;imeasureLink(now); + } } } } @@ -386,9 +389,9 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) if (bestPath == ZT_MAX_PEER_NETWORK_PATHS || (numAlivePaths == 0 && numStalePaths == 0)) { return SharedPtr(); } if (numAlivePaths == 1) { - return _paths[bestPath].p; + //return _paths[bestPath].p; } if (numStalePaths == 1) { - return _paths[bestPath].p; + //return _paths[bestPath].p; } // Relative quality @@ -725,7 +728,6 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) unsigned int Peer::prunePaths() { - Mutex::Lock _l(_paths_m); unsigned int pruned = 0; for(unsigned int i=0;i= interfaceRefreshPeriod)||(restarted)) { lastBindRefresh = now; @@ -889,9 +891,7 @@ public: uint64_t pktBuf[ZT_LINK_TEST_DATAGRAM_SZ / sizeof(uint64_t)]; Utils::getSecureRandom(pktBuf, ZT_LINK_TEST_DATAGRAM_SZ); ZT_PeerList *pl = _node->peers(); - // get bindings (specifically just the sockets) std::vector sockets = _binder.getBoundSockets(); - // interfaces for (int i=0; ipeerCount;++j) { for (int k=0; k<(ZT_MAX_PEER_NETWORK_PATHS/4); k++) { @@ -936,7 +936,8 @@ public: } // Sync information about physical network interfaces - if ((now - lastLocalInterfaceAddressCheck) >= ZT_LOCAL_INTERFACE_CHECK_INTERVAL) { + int interfaceAddressCheckInterval = _multipathMode ? ZT_MULTIPATH_LOCAL_INTERFACE_CHECK_INTERVAL : ZT_LOCAL_INTERFACE_CHECK_INTERVAL; + if ((now - lastLocalInterfaceAddressCheck) >= interfaceAddressCheckInterval) { lastLocalInterfaceAddressCheck = now; _node->clearLocalInterfaceAddresses(); -- cgit v1.2.3 From be469f4dd034050ca95a953903b3170ace275b27 Mon Sep 17 00:00:00 2001 From: Grant Limberg Date: Fri, 25 May 2018 14:18:06 -0700 Subject: add/remove routes via rtnetlink --- node/Hashtable.hpp | 4 + osdep/LinuxNetLink.cpp | 896 ++++++++++++++++++++++++++++++------------------- osdep/LinuxNetLink.hpp | 28 +- osdep/ManagedRoute.cpp | 41 +-- 4 files changed, 574 insertions(+), 395 deletions(-) (limited to 'node') diff --git a/node/Hashtable.hpp b/node/Hashtable.hpp index 777e88dc..46d5c007 100644 --- a/node/Hashtable.hpp +++ b/node/Hashtable.hpp @@ -399,6 +399,10 @@ private: { return ((unsigned long)i * (unsigned long)0x9e3779b1); } + static inline unsigned long _hc(const int i) + { + return ((unsigned long)i * (unsigned long)0x9e3379b1); + } inline void _grow() { diff --git a/osdep/LinuxNetLink.cpp b/osdep/LinuxNetLink.cpp index 062144dc..6251cb10 100644 --- a/osdep/LinuxNetLink.cpp +++ b/osdep/LinuxNetLink.cpp @@ -30,408 +30,608 @@ namespace ZeroTier { +struct nl_route_req { + struct nlmsghdr nl; + struct rtmsg rt; + char buf[8192]; +}; + +struct nl_if_req { + struct nlmsghdr nl; + struct ifinfomsg ifa; + char buf[8192]; +}; + LinuxNetLink::LinuxNetLink() - : _t() - , _running(false) - , _routes_ipv4() - , _routes_ipv6() - , _seq(0) - , _fd(socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) - , _la({0}) + : _t() + , _running(false) + , _routes_ipv4() + , _routes_ipv6() + , _seq(0) + , _fd(socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) + , _la({0}) { - // set socket timeout to 1 sec so we're not permablocking recv() calls - struct timeval tv; - tv.tv_sec = 1; - tv.tv_usec = 0; - if(setsockopt(_fd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof(tv)) != 0) { - fprintf(stderr, "setsockopt failed: %s\n", strerror(errno)); - } - - _la.nl_family = AF_NETLINK; - _la.nl_pid = getpid(); - _la.nl_groups = RTMGRP_LINK|RTMGRP_IPV4_IFADDR|RTMGRP_IPV6_IFADDR|RTMGRP_IPV4_ROUTE|RTMGRP_IPV6_ROUTE|RTMGRP_NOTIFY; - if (bind(_fd, (struct sockaddr*)&_la, sizeof(_la))) { - fprintf(stderr, "Error connecting to RTNETLINK: %s\n", strerror(errno)); - ::exit(1); - } - - _running = true; - _t = Thread::start(this); - - fprintf(stderr, "Requesting IPV4 Routes\n"); - _requestIPv4Routes(); - Thread::sleep(10); - fprintf(stderr, "Requesting IPV6 Routes\n"); - _requestIPv6Routes(); + // set socket timeout to 1 sec so we're not permablocking recv() calls + struct timeval tv; + tv.tv_sec = 1; + tv.tv_usec = 0; + if(setsockopt(_fd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof(tv)) != 0) { + fprintf(stderr, "setsockopt failed: %s\n", strerror(errno)); + } + + _la.nl_family = AF_NETLINK; + _la.nl_pid = getpid(); + _la.nl_groups = RTMGRP_LINK|RTMGRP_IPV4_IFADDR|RTMGRP_IPV6_IFADDR|RTMGRP_IPV4_ROUTE|RTMGRP_IPV6_ROUTE|RTMGRP_NOTIFY; + if (bind(_fd, (struct sockaddr*)&_la, sizeof(_la))) { + fprintf(stderr, "Error connecting to RTNETLINK: %s\n", strerror(errno)); + ::exit(1); + } + + _running = true; + _t = Thread::start(this); + + fprintf(stderr, "Requesting IPV4 Routes\n"); + _requestIPv4Routes(); + Thread::sleep(10); + fprintf(stderr, "Requesting IPV6 Routes\n"); + _requestIPv6Routes(); + Thread::sleep(10); + fprintf(stderr, "Requesting Interface List\n"); + _requestInterfaceList(); } LinuxNetLink::~LinuxNetLink() { - _running = false; - Thread::join(_t); + _running = false; + Thread::join(_t); - ::close(_fd); + ::close(_fd); } void LinuxNetLink::threadMain() throw() { - char buf[8192]; - char *p = NULL; - struct nlmsghdr *nlp; - int nll = 0; - int rtn = 0; - p = buf; - - while(_running) { - rtn = recv(_fd, p, sizeof(buf) - nll, 0); - - if (rtn > 0) { - nlp = (struct nlmsghdr *)p; - - if(nlp->nlmsg_type == NLMSG_ERROR && (nlp->nlmsg_flags & NLM_F_ACK) != NLM_F_ACK) { - fprintf(stderr, "NLMSG_ERROR\n"); - struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(nlp); - if (err->error != 0) { - fprintf(stderr, "rtnetlink error: %s\n", strerror(-(err->error))); - } - p = buf; - nll = 0; - continue; - } - - if (nlp->nlmsg_type == NLMSG_NOOP) { - fprintf(stderr, "noop\n"); - continue; - } - - if( (nlp->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI || (nlp->nlmsg_type == NLMSG_DONE)) - { - if (nlp->nlmsg_type == NLMSG_DONE) { - _processMessage(nlp, nll); - p = buf; - nll = 0; - continue; - } - p += rtn; - nll += rtn; - } - - if (nlp->nlmsg_type == NLMSG_OVERRUN) { - fprintf(stderr, "NLMSG_OVERRUN: Data lost\n"); - p = buf; - nll = 0; - continue; - } - - nll += rtn; - - _processMessage(nlp, nll); - p = buf; - nll = 0; - } - else { - Thread::sleep(100); - continue; - } - } + char buf[8192]; + char *p = NULL; + struct nlmsghdr *nlp; + int nll = 0; + int rtn = 0; + p = buf; + + while(_running) { + rtn = recv(_fd, p, sizeof(buf) - nll, 0); + + if (rtn > 0) { + nlp = (struct nlmsghdr *)p; + + if(nlp->nlmsg_type == NLMSG_ERROR && (nlp->nlmsg_flags & NLM_F_ACK) != NLM_F_ACK) { + fprintf(stderr, "NLMSG_ERROR\n"); + struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(nlp); + if (err->error != 0) { + fprintf(stderr, "rtnetlink error: %s\n", strerror(-(err->error))); + } + p = buf; + nll = 0; + continue; + } + + if (nlp->nlmsg_type == NLMSG_NOOP) { + fprintf(stderr, "noop\n"); + continue; + } + + if( (nlp->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI || (nlp->nlmsg_type == NLMSG_DONE)) + { + if (nlp->nlmsg_type == NLMSG_DONE) { + _processMessage(nlp, nll); + p = buf; + nll = 0; + continue; + } + p += rtn; + nll += rtn; + } + + if (nlp->nlmsg_type == NLMSG_OVERRUN) { + fprintf(stderr, "NLMSG_OVERRUN: Data lost\n"); + p = buf; + nll = 0; + continue; + } + + nll += rtn; + + _processMessage(nlp, nll); + p = buf; + nll = 0; + } + else { + Thread::sleep(100); + continue; + } + } } void LinuxNetLink::_processMessage(struct nlmsghdr *nlp, int nll) { - for(; NLMSG_OK(nlp, nll); nlp=NLMSG_NEXT(nlp, nll)) - { - switch(nlp->nlmsg_type) - { - case RTM_NEWLINK: - _linkAdded(nlp); - break; - case RTM_DELLINK: - _linkDeleted(nlp); - break; - case RTM_NEWADDR: - _ipAddressAdded(nlp); - break; - case RTM_DELADDR: - _ipAddressDeleted(nlp); - break; - case RTM_NEWROUTE: - _routeAdded(nlp); - break; - case RTM_DELROUTE: - _routeDeleted(nlp); - break; - default: - fprintf(stderr, "ignore msgtype %d...\n", nlp->nlmsg_type); - } - } + for(; NLMSG_OK(nlp, nll); nlp=NLMSG_NEXT(nlp, nll)) + { + switch(nlp->nlmsg_type) + { + case RTM_NEWLINK: + _linkAdded(nlp); + break; + case RTM_DELLINK: + _linkDeleted(nlp); + break; + case RTM_NEWADDR: + _ipAddressAdded(nlp); + break; + case RTM_DELADDR: + _ipAddressDeleted(nlp); + break; + case RTM_NEWROUTE: + _routeAdded(nlp); + break; + case RTM_DELROUTE: + _routeDeleted(nlp); + break; + default: + fprintf(stderr, "ignore msgtype %d...\n", nlp->nlmsg_type); + } + } } void LinuxNetLink::_ipAddressAdded(struct nlmsghdr *nlp) { - struct ifaddrmsg *ifap = (struct ifaddrmsg *)NLMSG_DATA(nlp); - struct rtattr *rtap = (struct rtattr *)IFA_RTA(ifap); - int ifal = IFA_PAYLOAD(nlp); - - char addr[40] = {0}; - char local[40] = {0}; - char label[40] = {0}; - char bcast[40] = {0}; - - for(;RTA_OK(rtap, ifal); rtap=RTA_NEXT(rtap,ifal)) - { - switch(rtap->rta_type) { - case IFA_ADDRESS: - inet_ntop(ifap->ifa_family, RTA_DATA(rtap), addr, 40); - break; - case IFA_LOCAL: - inet_ntop(ifap->ifa_family, RTA_DATA(rtap), local, 40); - break; - case IFA_LABEL: - memcpy(label, RTA_DATA(rtap), 40); - break; - case IFA_BROADCAST: - inet_ntop(ifap->ifa_family, RTA_DATA(rtap), bcast, 40); - break; - } - } - - fprintf(stderr, "Added IP Address %s local: %s label: %s broadcast: %s\n", addr, local, label, bcast); + struct ifaddrmsg *ifap = (struct ifaddrmsg *)NLMSG_DATA(nlp); + struct rtattr *rtap = (struct rtattr *)IFA_RTA(ifap); + int ifal = IFA_PAYLOAD(nlp); + + char addr[40] = {0}; + char local[40] = {0}; + char label[40] = {0}; + char bcast[40] = {0}; + + for(;RTA_OK(rtap, ifal); rtap=RTA_NEXT(rtap,ifal)) + { + switch(rtap->rta_type) { + case IFA_ADDRESS: + inet_ntop(ifap->ifa_family, RTA_DATA(rtap), addr, 40); + break; + case IFA_LOCAL: + inet_ntop(ifap->ifa_family, RTA_DATA(rtap), local, 40); + break; + case IFA_LABEL: + memcpy(label, RTA_DATA(rtap), 40); + break; + case IFA_BROADCAST: + inet_ntop(ifap->ifa_family, RTA_DATA(rtap), bcast, 40); + break; + } + } + + fprintf(stderr, "Added IP Address %s local: %s label: %s broadcast: %s\n", addr, local, label, bcast); } void LinuxNetLink::_ipAddressDeleted(struct nlmsghdr *nlp) { - struct ifaddrmsg *ifap = (struct ifaddrmsg *)NLMSG_DATA(nlp); - struct rtattr *rtap = (struct rtattr *)IFA_RTA(ifap); - int ifal = IFA_PAYLOAD(nlp); - - char addr[40] = {0}; - char local[40] = {0}; - char label[40] = {0}; - char bcast[40] = {0}; - - for(;RTA_OK(rtap, ifal); rtap=RTA_NEXT(rtap,ifal)) - { - switch(rtap->rta_type) { - case IFA_ADDRESS: - inet_ntop(ifap->ifa_family, RTA_DATA(rtap), addr, 40); - break; - case IFA_LOCAL: - inet_ntop(ifap->ifa_family, RTA_DATA(rtap), local, 40); - break; - case IFA_LABEL: - memcpy(label, RTA_DATA(rtap), 40); - break; - case IFA_BROADCAST: - inet_ntop(ifap->ifa_family, RTA_DATA(rtap), bcast, 40); - break; - } - } - - fprintf(stderr, "Removed IP Address %s local: %s label: %s broadcast: %s\n", addr, local, label, bcast); + struct ifaddrmsg *ifap = (struct ifaddrmsg *)NLMSG_DATA(nlp); + struct rtattr *rtap = (struct rtattr *)IFA_RTA(ifap); + int ifal = IFA_PAYLOAD(nlp); + + char addr[40] = {0}; + char local[40] = {0}; + char label[40] = {0}; + char bcast[40] = {0}; + + for(;RTA_OK(rtap, ifal); rtap=RTA_NEXT(rtap,ifal)) + { + switch(rtap->rta_type) { + case IFA_ADDRESS: + inet_ntop(ifap->ifa_family, RTA_DATA(rtap), addr, 40); + break; + case IFA_LOCAL: + inet_ntop(ifap->ifa_family, RTA_DATA(rtap), local, 40); + break; + case IFA_LABEL: + memcpy(label, RTA_DATA(rtap), 40); + break; + case IFA_BROADCAST: + inet_ntop(ifap->ifa_family, RTA_DATA(rtap), bcast, 40); + break; + } + } + + fprintf(stderr, "Removed IP Address %s local: %s label: %s broadcast: %s\n", addr, local, label, bcast); } void LinuxNetLink::_routeAdded(struct nlmsghdr *nlp) { - char dsts[40] = {0}; - char gws[40] = {0}; - char ifs[16] = {0}; - char ms[24] = {0}; - - struct rtmsg *rtp = (struct rtmsg *) NLMSG_DATA(nlp); - struct rtattr *rtap = (struct rtattr *)RTM_RTA(rtp); - int rtl = RTM_PAYLOAD(nlp); - - for(;RTA_OK(rtap, rtl); rtap=RTA_NEXT(rtap, rtl)) - { - switch(rtap->rta_type) - { - case RTA_DST: - inet_ntop(rtp->rtm_family, RTA_DATA(rtap), dsts, rtp->rtm_family == AF_INET ? 24 : 40); - break; - case RTA_GATEWAY: - inet_ntop(rtp->rtm_family, RTA_DATA(rtap), gws, rtp->rtm_family == AF_INET ? 24 : 40); - break; - case RTA_OIF: - sprintf(ifs, "%d", *((int*)RTA_DATA(rtap))); - break; - } - } - sprintf(ms, "%d", rtp->rtm_dst_len); - - fprintf(stderr, "Route Added: dst %s/%s gw %s if %s\n", dsts, ms, gws, ifs); + char dsts[40] = {0}; + char gws[40] = {0}; + char ifs[16] = {0}; + char ms[24] = {0}; + + struct rtmsg *rtp = (struct rtmsg *) NLMSG_DATA(nlp); + struct rtattr *rtap = (struct rtattr *)RTM_RTA(rtp); + int rtl = RTM_PAYLOAD(nlp); + + for(;RTA_OK(rtap, rtl); rtap=RTA_NEXT(rtap, rtl)) + { + switch(rtap->rta_type) + { + case RTA_DST: + inet_ntop(rtp->rtm_family, RTA_DATA(rtap), dsts, rtp->rtm_family == AF_INET ? 24 : 40); + break; + case RTA_GATEWAY: + inet_ntop(rtp->rtm_family, RTA_DATA(rtap), gws, rtp->rtm_family == AF_INET ? 24 : 40); + break; + case RTA_OIF: + sprintf(ifs, "%d", *((int*)RTA_DATA(rtap))); + break; + } + } + sprintf(ms, "%d", rtp->rtm_dst_len); + + fprintf(stderr, "Route Added: dst %s/%s gw %s if %s\n", dsts, ms, gws, ifs); } void LinuxNetLink::_routeDeleted(struct nlmsghdr *nlp) { - char dsts[40] = {0}; - char gws[40] = {0}; - char ifs[16] = {0}; - char ms[24] = {0}; - - struct rtmsg *rtp = (struct rtmsg *) NLMSG_DATA(nlp); - struct rtattr *rtap = (struct rtattr *)RTM_RTA(rtp); - int rtl = RTM_PAYLOAD(nlp); - - for(;RTA_OK(rtap, rtl); rtap=RTA_NEXT(rtap, rtl)) - { - switch(rtap->rta_type) - { - case RTA_DST: - inet_ntop(rtp->rtm_family, RTA_DATA(rtap), dsts, rtp->rtm_family == AF_INET ? 24 : 40); - break; - case RTA_GATEWAY: - inet_ntop(rtp->rtm_family, RTA_DATA(rtap), gws, rtp->rtm_family == AF_INET ? 24 : 40); - break; - case RTA_OIF: - sprintf(ifs, "%d", *((int*)RTA_DATA(rtap))); - break; - } - } - sprintf(ms, "%d", rtp->rtm_dst_len); - - fprintf(stderr, "Route Deleted: dst %s/%s gw %s if %s\n", dsts, ms, gws, ifs); + char dsts[40] = {0}; + char gws[40] = {0}; + char ifs[16] = {0}; + char ms[24] = {0}; + + struct rtmsg *rtp = (struct rtmsg *) NLMSG_DATA(nlp); + struct rtattr *rtap = (struct rtattr *)RTM_RTA(rtp); + int rtl = RTM_PAYLOAD(nlp); + + for(;RTA_OK(rtap, rtl); rtap=RTA_NEXT(rtap, rtl)) + { + switch(rtap->rta_type) + { + case RTA_DST: + inet_ntop(rtp->rtm_family, RTA_DATA(rtap), dsts, rtp->rtm_family == AF_INET ? 24 : 40); + break; + case RTA_GATEWAY: + inet_ntop(rtp->rtm_family, RTA_DATA(rtap), gws, rtp->rtm_family == AF_INET ? 24 : 40); + break; + case RTA_OIF: + sprintf(ifs, "%d", *((int*)RTA_DATA(rtap))); + break; + } + } + sprintf(ms, "%d", rtp->rtm_dst_len); + + fprintf(stderr, "Route Deleted: dst %s/%s gw %s if %s\n", dsts, ms, gws, ifs); } void LinuxNetLink::_linkAdded(struct nlmsghdr *nlp) { - char mac[20] = {0}; - unsigned int mtu = 0; - char ifname[40] = {0}; - - struct ifinfomsg *ifip = (struct ifinfomsg *)NLMSG_DATA(nlp); - struct rtattr *rtap = (struct rtattr *)IFLA_RTA(ifip); - int ifil = RTM_PAYLOAD(nlp); - - const char *ptr; - unsigned char *ptr2; - for(;RTA_OK(rtap, ifil);rtap=RTA_NEXT(rtap, ifil)) - { - switch(rtap->rta_type) { - case IFLA_ADDRESS: - ptr2 = (unsigned char*)RTA_DATA(rtap); - snprintf(mac, 20, "%02x:%02x:%02x:%02x:%02x:%02x", - ptr2[0], ptr2[1], ptr2[2], ptr2[3], ptr2[4], ptr2[5]); - break; - case IFLA_IFNAME: - ptr = (const char*)RTA_DATA(rtap); - memcpy(ifname, ptr, strlen(ptr)); - break; - case IFLA_MTU: - memcpy(&mtu, RTA_DATA(rtap), sizeof(unsigned int)); - break; - } - } - - fprintf(stderr, "Link Added: %s mac: %s, mtu: %d\n", ifname, mac, mtu); + char mac[18] = {0}; + unsigned int mtu = 0; + char ifname[IFNAMSIZ] = {0}; + + struct ifinfomsg *ifip = (struct ifinfomsg *)NLMSG_DATA(nlp); + struct rtattr *rtap = (struct rtattr *)IFLA_RTA(ifip); + int ifil = RTM_PAYLOAD(nlp); + + const char *ptr; + unsigned char *ptr2; + for(;RTA_OK(rtap, ifil);rtap=RTA_NEXT(rtap, ifil)) + { + switch(rtap->rta_type) { + case IFLA_ADDRESS: + ptr2 = (unsigned char*)RTA_DATA(rtap); + snprintf(mac, 20, "%02x:%02x:%02x:%02x:%02x:%02x", + ptr2[0], ptr2[1], ptr2[2], ptr2[3], ptr2[4], ptr2[5]); + break; + case IFLA_IFNAME: + ptr = (const char*)RTA_DATA(rtap); + memcpy(ifname, ptr, strlen(ptr)); + break; + case IFLA_MTU: + memcpy(&mtu, RTA_DATA(rtap), sizeof(unsigned int)); + break; + } + } + + struct iface_entry &entry = _interfaces[ifip->ifi_index]; + entry.index = ifip->ifi_index; + memcpy(entry.ifacename, ifname, sizeof(ifname)); + memcpy(entry.mac, mac, sizeof(mac)); + entry.mtu = mtu; + + fprintf(stderr, "Link Added: %s mac: %s, mtu: %d\n", ifname, mac, mtu); } void LinuxNetLink::_linkDeleted(struct nlmsghdr *nlp) { - char mac[20] = {0}; - unsigned int mtu = 0; - char ifname[40] = {0}; - - struct ifinfomsg *ifip = (struct ifinfomsg *)NLMSG_DATA(nlp); - struct rtattr *rtap = (struct rtattr *)IFLA_RTA(ifip); - int ifil = RTM_PAYLOAD(nlp); - - const char *ptr; - unsigned char *ptr2; - for(;RTA_OK(rtap, ifil);rtap=RTA_NEXT(rtap, ifil)) - { - switch(rtap->rta_type) { - case IFLA_ADDRESS: - ptr2 = (unsigned char*)RTA_DATA(rtap); - snprintf(mac, 20, "%02x:%02x:%02x:%02x:%02x:%02x", - ptr2[0], ptr2[1], ptr2[2], ptr2[3], ptr2[4], ptr2[5]); - break; - case IFLA_IFNAME: - ptr = (const char*)RTA_DATA(rtap); - memcpy(ifname, ptr, strlen(ptr)); - break; - case IFLA_MTU: - memcpy(&mtu, RTA_DATA(rtap), sizeof(unsigned int)); - break; - } - } - - fprintf(stderr, "Link Deleted: %s mac: %s, mtu: %d\n", ifname, mac, mtu); + char mac[18] = {0}; + unsigned int mtu = 0; + char ifname[40] = {0}; + + struct ifinfomsg *ifip = (struct ifinfomsg *)NLMSG_DATA(nlp); + struct rtattr *rtap = (struct rtattr *)IFLA_RTA(ifip); + int ifil = RTM_PAYLOAD(nlp); + + const char *ptr; + unsigned char *ptr2; + for(;RTA_OK(rtap, ifil);rtap=RTA_NEXT(rtap, ifil)) + { + switch(rtap->rta_type) { + case IFLA_ADDRESS: + ptr2 = (unsigned char*)RTA_DATA(rtap); + snprintf(mac, 20, "%02x:%02x:%02x:%02x:%02x:%02x", + ptr2[0], ptr2[1], ptr2[2], ptr2[3], ptr2[4], ptr2[5]); + break; + case IFLA_IFNAME: + ptr = (const char*)RTA_DATA(rtap); + memcpy(ifname, ptr, strlen(ptr)); + break; + case IFLA_MTU: + memcpy(&mtu, RTA_DATA(rtap), sizeof(unsigned int)); + break; + } + } + + fprintf(stderr, "Link Deleted: %s mac: %s, mtu: %d\n", ifname, mac, mtu); + if(_interfaces.contains(ifip->ifi_index)) { + _interfaces.erase(ifip->ifi_index); + } } void LinuxNetLink::_requestIPv4Routes() { - struct nl_req req; - bzero(&req, sizeof(req)); - req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); - req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; - req.nl.nlmsg_type = RTM_GETROUTE; - req.nl.nlmsg_pid = 0; - req.nl.nlmsg_seq = ++_seq; - req.rt.rtm_family = AF_INET; - req.rt.rtm_table = RT_TABLE_MAIN; - - struct sockaddr_nl pa; - bzero(&pa, sizeof(pa)); - pa.nl_family = AF_NETLINK; - - struct msghdr msg; - bzero(&msg, sizeof(msg)); - msg.msg_name = (void*)&pa; - msg.msg_namelen = sizeof(pa); - - struct iovec iov; - bzero(&iov, sizeof(iov)); - iov.iov_base = (void*)&req.nl; - iov.iov_len = req.nl.nlmsg_len; - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - - sendmsg(_fd, &msg, 0); + struct nl_route_req req; + bzero(&req, sizeof(req)); + req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.nl.nlmsg_type = RTM_GETROUTE; + req.nl.nlmsg_pid = 0; + req.nl.nlmsg_seq = ++_seq; + req.rt.rtm_family = AF_INET; + req.rt.rtm_table = RT_TABLE_MAIN; + + struct sockaddr_nl pa; + bzero(&pa, sizeof(pa)); + pa.nl_family = AF_NETLINK; + + struct msghdr msg; + bzero(&msg, sizeof(msg)); + msg.msg_name = (void*)&pa; + msg.msg_namelen = sizeof(pa); + + struct iovec iov; + bzero(&iov, sizeof(iov)); + iov.iov_base = (void*)&req.nl; + iov.iov_len = req.nl.nlmsg_len; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + sendmsg(_fd, &msg, 0); } void LinuxNetLink::_requestIPv6Routes() { - struct nl_req req; - bzero(&req, sizeof(req)); - req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); - req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; - req.nl.nlmsg_type = RTM_GETROUTE; - req.nl.nlmsg_pid = 0; - req.nl.nlmsg_seq = ++_seq; - req.rt.rtm_family = AF_INET6; - req.rt.rtm_table = RT_TABLE_MAIN; - - struct sockaddr_nl pa; - bzero(&pa, sizeof(pa)); - pa.nl_family = AF_NETLINK; - - struct msghdr msg; - bzero(&msg, sizeof(msg)); - msg.msg_name = (void*)&pa; - msg.msg_namelen = sizeof(pa); - - struct iovec iov; - bzero(&iov, sizeof(iov)); - iov.iov_base = (void*)&req.nl; - iov.iov_len = req.nl.nlmsg_len; - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - - while((sendmsg(_fd, &msg, 0)) == -1) { - fprintf(stderr, "ipv6 waiting..."); - Thread::sleep(100); - } + struct nl_route_req req; + bzero(&req, sizeof(req)); + req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.nl.nlmsg_type = RTM_GETROUTE; + req.nl.nlmsg_pid = 0; + req.nl.nlmsg_seq = ++_seq; + req.rt.rtm_family = AF_INET6; + req.rt.rtm_table = RT_TABLE_MAIN; + + struct sockaddr_nl pa; + bzero(&pa, sizeof(pa)); + pa.nl_family = AF_NETLINK; + + struct msghdr msg; + bzero(&msg, sizeof(msg)); + msg.msg_name = (void*)&pa; + msg.msg_namelen = sizeof(pa); + + struct iovec iov; + bzero(&iov, sizeof(iov)); + iov.iov_base = (void*)&req.nl; + iov.iov_len = req.nl.nlmsg_len; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + sendmsg(_fd, &msg, 0); } -void LinuxNetLink::addRoute(const InetAddress &target, const InetAddress &via, const char *ifaceName) +void LinuxNetLink::_requestInterfaceList() { + struct nl_if_req req; + bzero(&req, sizeof(req)); + req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.nl.nlmsg_type = RTM_GETLINK; + req.nl.nlmsg_pid = 0; + req.nl.nlmsg_seq = ++_seq; + req.ifa.ifi_family = AF_UNSPEC; + + struct sockaddr_nl pa; + bzero(&pa, sizeof(pa)); + pa.nl_family = AF_NETLINK; + + struct msghdr msg; + bzero(&msg, sizeof(msg)); + msg.msg_name = (void*)&pa; + msg.msg_namelen = sizeof(pa); + + struct iovec iov; + bzero(&iov, sizeof(iov)); + iov.iov_base = (void*)&req.nl; + iov.iov_len = req.nl.nlmsg_len; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + sendmsg(_fd, &msg, 0); +} +void LinuxNetLink::addRoute(const InetAddress &target, const InetAddress &via, const char *ifaceName) +{ + int rtl = sizeof(struct rtmsg); + struct nl_route_req req; + bzero(&req, sizeof(req)); + + struct rtattr *rtap = (struct rtattr *)req.buf; + rtap->rta_type = RTA_DST; + if (target.isV4()) { + rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in_addr); + memcpy((void*)((char*)rtap+sizeof(struct rtattr)), &((struct sockaddr_in*)&target)->sin_addr, sizeof(struct in_addr)); + } else { + rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in6_addr); + memcpy((void*)((char*)rtap+sizeof(struct rtattr)), &((struct sockaddr_in6*)&target)->sin6_addr, sizeof(struct in6_addr)); + } + rtl += rtap->rta_len; + + int interface_index = -1; + if (ifaceName != NULL) { + Hashtable::Iterator iter(_interfaces); + int *k = NULL; + iface_entry *v = NULL; + while(iter.next(k, v)) { + if(strcmp(ifaceName, v->ifacename) == 0) { + interface_index = v->index; + break; + } + } + if (interface_index != -1) { + rtap = (struct rtattr *) (((char*)rtap) + rtap->rta_len); + rtap->rta_type = RTA_OIF; + rtap->rta_len = sizeof(struct rtattr)+sizeof(int); + memcpy(((char*)rtap)+sizeof(rtattr), &interface_index, sizeof(int)); + rtl += rtap->rta_len; + } + } + + if(via) { + rtap = (struct rtattr *)(((char*)rtap)+rtap->rta_len); + rtap->rta_type = RTA_GATEWAY; + if(via.isV4()) { + rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in_addr); + memcpy((char*)rtap+sizeof(struct rtattr), &((struct sockaddr_in*)&via)->sin_addr, sizeof(struct in_addr)); + } else { + rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in6_addr); + memcpy((char*)rtap+sizeof(struct rtattr), &((struct sockaddr_in6*)&via)->sin6_addr, sizeof(struct in6_addr)); + } + rtl += rtap->rta_len; + } + + req.nl.nlmsg_len = NLMSG_LENGTH(rtl); + req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; + req.nl.nlmsg_type = RTM_NEWROUTE; + req.nl.nlmsg_pid = 0; + req.nl.nlmsg_seq = ++_seq; + req.rt.rtm_family = target.ss_family; + req.rt.rtm_table = RT_TABLE_MAIN; + req.rt.rtm_protocol = RTPROT_STATIC; + req.rt.rtm_scope = RT_SCOPE_UNIVERSE; + req.rt.rtm_type = RTN_UNICAST; + req.rt.rtm_dst_len = target.netmaskBits(); + + struct sockaddr_nl pa; + bzero(&pa, sizeof(pa)); + pa.nl_family = AF_NETLINK; + + struct msghdr msg; + bzero(&msg, sizeof(msg)); + msg.msg_name = (void*)&pa; + msg.msg_namelen = sizeof(pa); + + struct iovec iov; + bzero(&iov, sizeof(iov)); + iov.iov_base = (void*)&req.nl; + iov.iov_len = req.nl.nlmsg_len; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + sendmsg(_fd, &msg, 0); } void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, const char *ifaceName) { - + int rtl = sizeof(struct rtmsg); + struct nl_route_req req; + bzero(&req, sizeof(req)); + + struct rtattr *rtap = (struct rtattr *)req.buf; + rtap->rta_type = RTA_DST; + if (target.isV4()) { + rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in_addr); + memcpy((void*)((char*)rtap+sizeof(struct rtattr)), &((struct sockaddr_in*)&target)->sin_addr, sizeof(struct in_addr)); + } else { + rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in6_addr); + memcpy((void*)((char*)rtap+sizeof(struct rtattr)), &((struct sockaddr_in6*)&target)->sin6_addr, sizeof(struct in6_addr)); + } + rtl += rtap->rta_len; + + int interface_index = -1; + if (ifaceName != NULL) { + Hashtable::Iterator iter(_interfaces); + int *k = NULL; + iface_entry *v = NULL; + while(iter.next(k, v)) { + if(strcmp(ifaceName, v->ifacename) == 0) { + interface_index = v->index; + break; + } + } + if (interface_index != -1) { + rtap = (struct rtattr *) (((char*)rtap) + rtap->rta_len); + rtap->rta_type = RTA_OIF; + rtap->rta_len = sizeof(struct rtattr)+sizeof(int); + memcpy(((char*)rtap)+sizeof(rtattr), &interface_index, sizeof(int)); + rtl += rtap->rta_len; + } + } + + if(via) { + rtap = (struct rtattr *)(((char*)rtap)+rtap->rta_len); + rtap->rta_type = RTA_GATEWAY; + if(via.isV4()) { + rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in_addr); + memcpy((char*)rtap+sizeof(struct rtattr), &((struct sockaddr_in*)&via)->sin_addr, sizeof(struct in_addr)); + } else { + rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in6_addr); + memcpy((char*)rtap+sizeof(struct rtattr), &((struct sockaddr_in6*)&via)->sin6_addr, sizeof(struct in6_addr)); + } + rtl += rtap->rta_len; + } + + req.nl.nlmsg_len = NLMSG_LENGTH(rtl); + req.nl.nlmsg_flags = NLM_F_REQUEST; + req.nl.nlmsg_type = RTM_DELROUTE; + req.nl.nlmsg_pid = 0; + req.nl.nlmsg_seq = ++_seq; + req.rt.rtm_family = target.ss_family; + req.rt.rtm_table = RT_TABLE_MAIN; + req.rt.rtm_protocol = RTPROT_STATIC; + req.rt.rtm_scope = RT_SCOPE_UNIVERSE; + req.rt.rtm_type = RTN_UNICAST; + req.rt.rtm_dst_len = target.netmaskBits(); + + struct sockaddr_nl pa; + bzero(&pa, sizeof(pa)); + pa.nl_family = AF_NETLINK; + + struct msghdr msg; + bzero(&msg, sizeof(msg)); + msg.msg_name = (void*)&pa; + msg.msg_namelen = sizeof(pa); + + struct iovec iov; + bzero(&iov, sizeof(iov)); + iov.iov_base = (void*)&req.nl; + iov.iov_len = req.nl.nlmsg_len; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + sendmsg(_fd, &msg, 0); } void LinuxNetLink::addInterface(const char *iface, unsigned int mtu) @@ -446,12 +646,12 @@ void LinuxNetLink::addAddress(const InetAddress &addr, const char *iface) RouteList LinuxNetLink::getIPV4Routes() const { - return _routes_ipv4; + return _routes_ipv4; } RouteList LinuxNetLink::getIPV6Routes() const { - return _routes_ipv6; + return _routes_ipv6; } } // namespace ZeroTier \ No newline at end of file diff --git a/osdep/LinuxNetLink.hpp b/osdep/LinuxNetLink.hpp index 8ee42ee4..d0e83189 100644 --- a/osdep/LinuxNetLink.hpp +++ b/osdep/LinuxNetLink.hpp @@ -33,28 +33,23 @@ #include #include #include - +#include #include "../node/InetAddress.hpp" #include "Thread.hpp" +#include "../node/Hashtable.hpp" namespace ZeroTier { struct route_entry { - InetAddress target; - InetAddress via; - const char *iface; + InetAddress target; + InetAddress via; + int if_index; + char iface[IFNAMSIZ]; }; - typedef std::vector RouteList; -struct nl_req { - struct nlmsghdr nl; - struct rtmsg rt; - char buf[8192]; -}; - /** * Interface with Linux's RTNETLINK */ @@ -93,18 +88,27 @@ private: void _ipAddressAdded(struct nlmsghdr *nlp); void _ipAddressDeleted(struct nlmsghdr *nlp); - + void _requestInterfaceList(); void _requestIPv4Routes(); void _requestIPv6Routes(); Thread _t; bool _running; + RouteList _routes_ipv4; RouteList _routes_ipv6; uint32_t _seq; + struct iface_entry { + int index; + char ifacename[IFNAMSIZ]; + char mac[18]; + unsigned int mtu; + }; + Hashtable _interfaces; + // socket communication vars; int _fd; struct sockaddr_nl _la; diff --git a/osdep/ManagedRoute.cpp b/osdep/ManagedRoute.cpp index 8d64fde3..fe7c6267 100644 --- a/osdep/ManagedRoute.cpp +++ b/osdep/ManagedRoute.cpp @@ -54,6 +54,7 @@ #include #include #include +#include "../osdep/LinuxNetLink.hpp" #endif #ifdef __BSD__ #include @@ -284,44 +285,14 @@ static void _routeCmd(const char *op,const InetAddress &target,const InetAddress #ifdef __LINUX__ // ---------------------------------------------------------- #define ZT_ROUTING_SUPPORT_FOUND 1 -static bool _hasRoute(const InetAddress &target, const InetAddress &via, const char *localInterface) -{ - if (target.ss_family == AF_INET) { - int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - - char *buf; - int nll; - struct rtmsg *rtp; - int rtl; - struct rtattr *rtap; - - struct sockaddr_nl la; - bzero(&la, sizeof(la)); - la.nl_family = AF_NETLINK; - la.nl_pad = 0; - la.nl_pid = (uint32_t)((ptrdiff_t)&target % getpid()); - la.nl_groups = 0; - int rtn = bind(fd, (struct sockaddr*)&la, sizeof(la)); - - - - close(fd); - return false; - } else { - - return false; - } -} - - static void _routeCmd(const char *op, const InetAddress &target, const InetAddress &via, const char *localInterface) { - bool hasRoute = _hasRoute(target, via, localInterface); - if (hasRoute && (strcmp(op, "add") == 0 || strcmp(op, "replace") == 0)) { - return; - } else if (!hasRoute && (strcmp(op, "remove") == 0 || strcmp(op, "del") == 0)) { - return; + if ((strcmp(op, "add") == 0 || strcmp(op, "replace") == 0)) { + LinuxNetLink::getInstance().addRoute(target, via, localInterface); + } else if ((strcmp(op, "remove") == 0 || strcmp(op, "del") == 0)) { + LinuxNetLink::getInstance().delRoute(target, via, localInterface); } + return; char targetStr[64] = {0}; char viaStr[64] = {0}; -- cgit v1.2.3 From 46a7a2be2e1c4d09d1da2bf26b110a31ec3d0661 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Wed, 30 May 2018 17:45:29 -0700 Subject: Added VERB_ACK and VERB_QOS_MEASUREMENT, refined notion of path quality --- include/ZeroTierDebug.h | 2 +- make-linux.mk | 3 + make-mac.mk | 5 +- node/Constants.hpp | 92 +++++----- node/IncomingPacket.cpp | 119 +++++++++---- node/IncomingPacket.hpp | 2 + node/Packet.hpp | 52 +++++- node/Path.hpp | 458 +++++++++++++++++++++++++++++------------------- node/Peer.cpp | 428 ++++++++++++++++++++++++++------------------ node/Peer.hpp | 85 ++++++++- node/RingBuffer.hpp | 81 ++++++--- node/Switch.cpp | 2 + node/Trace.cpp | 22 +-- node/Trace.hpp | 8 +- node/Utils.hpp | 8 + osdep/Binder.hpp | 14 -- osdep/Phy.hpp | 150 ++-------------- service/OneService.cpp | 33 ---- 18 files changed, 899 insertions(+), 665 deletions(-) (limited to 'node') diff --git a/include/ZeroTierDebug.h b/include/ZeroTierDebug.h index 8e5366f0..a60179b7 100644 --- a/include/ZeroTierDebug.h +++ b/include/ZeroTierDebug.h @@ -86,7 +86,7 @@ #include #define ZT_LOG_TAG "ZTSDK" #endif -#if defined(ZT_TRACE) +#if defined(ZT_DEBUG_TRACE) #if ZT_MSG_INFO == true #if defined(__ANDROID__) #define DEBUG_INFO(fmt, args...) ((void)__android_log_print(ANDROID_LOG_VERBOSE, ZT_LOG_TAG, \ diff --git a/make-linux.mk b/make-linux.mk index 0f5ef384..749c24b8 100644 --- a/make-linux.mk +++ b/make-linux.mk @@ -45,6 +45,9 @@ ONE_OBJS+=ext/http-parser/http_parser.o ifeq ($(ZT_TRACE),1) override DEFS+=-DZT_TRACE endif +ifeq ($(ZT_DEBUG_TRACE),1) + DEFS+=-DZT_DEBUG_TRACE +endif ifeq ($(ZT_RULES_ENGINE_DEBUGGING),1) override DEFS+=-DZT_RULES_ENGINE_DEBUGGING diff --git a/make-mac.mk b/make-mac.mk index 1178437a..12357f68 100644 --- a/make-mac.mk +++ b/make-mac.mk @@ -43,7 +43,10 @@ ONE_OBJS+=ext/libnatpmp/natpmp.o ext/libnatpmp/getgateway.o ext/miniupnpc/connec # Build with address sanitization library for advanced debugging (clang) ifeq ($(ZT_SANITIZE),1) - SANFLAGS+=-fsanitize=address -DASAN_OPTIONS=symbolize=1 + DEFS+=-fsanitize=address -DASAN_OPTIONS=symbolize=1 +endif +ifeq ($(ZT_DEBUG_TRACE),1) + DEFS+=-DZT_DEBUG_TRACE endif # Debug mode -- dump trace output, build binary with -g ifeq ($(ZT_DEBUG),1) diff --git a/node/Constants.hpp b/node/Constants.hpp index ee2ff0a6..227497de 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -267,11 +267,6 @@ */ #define ZT_PING_CHECK_INVERVAL 5000 -/** - * Length of interface name - */ -#define ZT_PATH_INTERFACE_NAME_SZ 16 - /** * How frequently to check for changes to the system's network interfaces. When * the service decides to use this constant it's because we want to react more @@ -286,78 +281,95 @@ #define ZT_MULTIPATH_PROPORTION_WIN_SZ 128 /** - * Threshold for flow to be considered balanced. + * How often we will sample packet latency. Should be at least greater than ZT_PING_CHECK_INVERVAL + * since we will record a 0 bit/s measurement if no valid latency measurement was made within this + * window of time. */ -#define ZT_MULTIPATH_FLOW_BALANCE_THESHOLD 0.80 +#define ZT_PATH_LATENCY_SAMPLE_INTERVAL ZT_MULTIPATH_PEER_PING_PERIOD * 2 /** - * Number of samples to consider when computing path statistics + * Interval used for rate-limiting the computation of path quality estimates. Set at 0 + * to compute as new packets arrive with no delay. */ -#define ZT_PATH_QUALITY_METRIC_WIN_SZ 64 +#define ZT_PATH_QUALITY_COMPUTE_INTERVAL 1000 /** - * How often important path metrics are sampled (in ms). These metrics are later used - * for path quality estimates + * Number of samples to consider when computing real-time path statistics */ -#define ZT_PATH_QUALITY_SAMPLE_INTERVAL 100 +#define ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ 128 /** - * How often new path quality estimates are computed + * Number of samples to consider when computing performing long-term path quality analysis. + * By default this value is set to ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ but can + * be set to any value greater than that to observe longer-term path quality behavior. */ -#define ZT_PATH_QUALITY_ESTIMATE_INTERVAL 100 +#define ZT_PATH_QUALITY_METRIC_WIN_SZ ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ /** - * How often we will sample packet latency. Should be at least greater than ZT_PING_CHECK_INVERVAL - * since we will record a 0 bit/s measurement if no valid latency measurement was made within this - * window of time. + * Maximum acceptable Packet Delay Variance (PDV) over a path */ -#define ZT_PATH_LATENCY_SAMPLE_INTERVAL ZT_MULTIPATH_PEER_PING_PERIOD * 2 +#define ZT_PATH_MAX_PDV 1000 /** - * Interval used for rate-limiting the computation of path quality estimates. Set at 0 - * to compute as new packets arrive with no delay. + * Maximum acceptable time interval between expectation and receipt of at least one ACK over a path + */ +#define ZT_PATH_MAX_AGE 30000 + +/** + * Maximum acceptable mean latency over a path + */ +#define ZT_PATH_MAX_MEAN_LATENCY 1000 + +/** + * How much each factor contributes to the "stability" score of a path + */ +#define ZT_PATH_CONTRIB_PDV 1.0 / 3.0 +#define ZT_PATH_CONTRIB_LATENCY 1.0 / 3.0 +#define ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE 1.0 / 3.0 + +/** + * How much each factor contributes to the "quality" score of a path */ -#define ZT_PATH_QUALITY_COMPUTE_INTERVAL 0 +#define ZT_PATH_CONTRIB_STABILITY 0.75 / 3.0 +#define ZT_PATH_CONTRIB_THROUGHPUT 1.50 / 3.0 +#define ZT_PATH_CONTRIB_SCOPE 0.75 / 3.0 /** - * Path error rate history window size. This is used to keep track of packet error - * measurements over a path's medium-term history. + * Min and max acceptable sizes for a VERB_QOS_MEASUREMENT packet */ -#define ZT_PATH_ERROR_HIST_WIN_SZ 10 +#define ZT_PATH_MIN_QOS_PACKET_SZ 8 + 1 +#define ZT_PATH_MAX_QOS_PACKET_SZ 1400 /** - * The number of packet error measurements in each sample + * How many ID:sojourn time pairs in a single QoS packet */ -#define ZT_PATH_ERROR_SAMPLE_WIN_SZ 1024 +#define ZT_PATH_QOS_TABLE_SIZE (ZT_PATH_MAX_QOS_PACKET_SZ * 8) / (64 + 8) /** - * How often a peer will prune its own paths. Pruning is important when multipath is - * enabled because we want to prevent the allocation algorithms from sending anything - * out on known dead paths. Additionally, quickly marking paths as dead helps when - * a new path is learned and needs to replace an older path. + * How often the service tests the path throughput */ -#define ZT_CLOSED_PATH_PRUNING_INTERVAL 1000 +#define ZT_PATH_THROUGHPUT_MEASUREMENT_INTERVAL ZT_PATH_ACK_INTERVAL * 8 /** - * Datagram used to test link throughput. Contents are random. + * Minimum amount of time between each ACK packet */ -#define ZT_LINK_TEST_DATAGRAM_SZ 1024 +#define ZT_PATH_ACK_INTERVAL 250 /** - * Size of datagram expected as a reply to a link speed test + * How often a QoS packet is sent */ -#define ZT_LINK_TEST_DATAGRAM_RESPONSE_SZ 8 +#define ZT_PATH_QOS_INTERVAL 1000 /** - * Time before a link test datagram is considered lost. Any corresponding - * timing records that would have been used to compute a RTT are purged. + * How often an aggregate link statistics report is emitted into this tracing system */ -#define ZT_LINK_TEST_TIMEOUT 10000 +#define ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL 60000 /** - * How often the service tests the link throughput. + * How much an aggregate link's component paths can vary from their target allocation + * before the link is considered to be in a state of imbalance. */ -#define ZT_LINK_SPEED_TEST_INTERVAL 1000 +#define ZT_PATH_IMBALANCE_THRESHOLD 0.20 /** * How frequently to send heartbeats over in-use paths diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index 8f6dda63..e7227412 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -80,7 +80,7 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) if (!trusted) { if (!dearmor(peer->key())) { RR->t->incomingPacketMessageAuthenticationFailure(tPtr,_path,packetId(),sourceAddress,hops(),"invalid MAC"); - _path->recordPacket(false); + _path->recordInvalidPacket(); return true; } } @@ -90,15 +90,15 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) return true; } - _path->recordPacket(true); - const Packet::Verb v = verb(); switch(v) { //case Packet::VERB_NOP: default: // ignore unknown verbs, but if they pass auth check they are "received" - peer->received(tPtr,_path,hops(),packetId(),v,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),v,0,Packet::VERB_NOP,false,0); return true; case Packet::VERB_HELLO: return _doHELLO(RR,tPtr,true); + case Packet::VERB_ACK: return _doACK(RR,tPtr,peer); + case Packet::VERB_QOS_MEASUREMENT: return _doQOS_MEASUREMENT(RR,tPtr,peer); case Packet::VERB_ERROR: return _doERROR(RR,tPtr,peer); case Packet::VERB_OK: return _doOK(RR,tPtr,peer); case Packet::VERB_WHOIS: return _doWHOIS(RR,tPtr,peer); @@ -197,11 +197,55 @@ bool IncomingPacket::_doERROR(const RuntimeEnvironment *RR,void *tPtr,const Shar default: break; } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_ERROR,inRePacketId,inReVerb,false,networkId); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_ERROR,inRePacketId,inReVerb,false,networkId); + + return true; +} + +bool IncomingPacket::_doACK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) +{ + /* Dissect incoming ACK packet. From this we can estimate current throughput of the path, establish known + * maximums and detect packet loss. */ + + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + int32_t ackedBytes; + memcpy(&ackedBytes, payload(), sizeof(int32_t)); + _path->receivedAck(RR->node->now(), Utils::ntoh(ackedBytes)); + } return true; } +bool IncomingPacket::_doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) +{ + + /* Dissect incoming QoS packet. From this we can compute latency values and their variance. + * The latency variance is used as a measure of "jitter". */ + + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + if (payloadLength() < ZT_PATH_MAX_QOS_PACKET_SZ && payloadLength() > ZT_PATH_MIN_QOS_PACKET_SZ) { + const int64_t now = RR->node->now(); + uint64_t rx_id[ZT_PATH_QOS_TABLE_SIZE]; + uint8_t rx_ts[ZT_PATH_QOS_TABLE_SIZE]; + char *begin = (char *)payload(); + char *ptr = begin; + int count = 0; + int len = payloadLength(); + // Read packet IDs and latency compensation intervals for each packet tracked by thie QoS packet + while (ptr < (begin + len)) { + memcpy((void*)&rx_id[count], ptr, sizeof(uint64_t)); + rx_id[count] = Utils::ntoh(rx_id[count]); + ptr+=sizeof(uint64_t); + memcpy((void*)&rx_ts[count], ptr, sizeof(uint8_t)); + ptr+=sizeof(uint8_t); + count++; + } + _path->receivedQoS(now, count, rx_id, rx_ts); + } + } + return true; +} + bool IncomingPacket::_doHELLO(const RuntimeEnvironment *RR,void *tPtr,const bool alreadyAuthenticated) { const int64_t now = RR->node->now(); @@ -398,7 +442,7 @@ bool IncomingPacket::_doHELLO(const RuntimeEnvironment *RR,void *tPtr,const bool _path->send(RR,tPtr,outp.data(),outp.size(),now); peer->setRemoteVersion(protoVersion,vMajor,vMinor,vRevision); // important for this to go first so received() knows the version - peer->received(tPtr,_path,hops(),pid,Packet::VERB_HELLO,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_HELLO,0,Packet::VERB_NOP,false,0); return true; } @@ -448,8 +492,9 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP } } - if (!hops()) + if (!hops() && (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE)) { _path->updateLatency((unsigned int)latency, RR->node->now()); + } peer->setRemoteVersion(vProto,vMajor,vMinor,vRevision); @@ -510,7 +555,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP default: break; } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_OK,inRePacketId,inReVerb,false,networkId); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_OK,inRePacketId,inReVerb,false,networkId); return true; } @@ -545,7 +590,7 @@ bool IncomingPacket::_doWHOIS(const RuntimeEnvironment *RR,void *tPtr,const Shar _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_WHOIS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_WHOIS,0,Packet::VERB_NOP,false,0); return true; } @@ -569,7 +614,7 @@ bool IncomingPacket::_doRENDEZVOUS(const RuntimeEnvironment *RR,void *tPtr,const } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP,false,0); return true; } @@ -598,7 +643,7 @@ bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,void *tPtr,const Shar _sendErrorNeedCredentials(RR,tPtr,peer,nwid); } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_FRAME,0,Packet::VERB_NOP,trustEstablished,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_FRAME,0,Packet::VERB_NOP,trustEstablished,nwid); return true; } @@ -621,7 +666,7 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const if (!network->gate(tPtr,peer)) { RR->t->incomingNetworkAccessDenied(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,true); _sendErrorNeedCredentials(RR,tPtr,peer,nwid); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid); return true; } @@ -633,7 +678,7 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const const uint8_t *const frameData = (const uint8_t *)field(comLen + ZT_PROTO_VERB_EXT_FRAME_IDX_PAYLOAD,frameLen); if ((!from)||(from == network->mac())) { - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } @@ -644,19 +689,19 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const network->learnBridgeRoute(from,peer->address()); } else { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"bridging not allowed (remote)"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } } else if (to != network->mac()) { if (to.isMulticast()) { if (network->config().multicastLimit == 0) { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"multicast disabled"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } } else if (!network->config().permitsBridging(RR->identity.address())) { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"bridging not allowed (local)"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } } @@ -676,9 +721,9 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); } else { - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid); } return true; @@ -698,7 +743,7 @@ bool IncomingPacket::_doECHO(const RuntimeEnvironment *RR,void *tPtr,const Share outp.armor(peer->key(),true); _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); - peer->received(tPtr,_path,hops(),pid,Packet::VERB_ECHO,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_ECHO,0,Packet::VERB_NOP,false,0); return true; } @@ -743,7 +788,7 @@ bool IncomingPacket::_doMULTICAST_LIKE(const RuntimeEnvironment *RR,void *tPtr,c } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0); return true; } @@ -866,7 +911,7 @@ bool IncomingPacket::_doNETWORK_CREDENTIALS(const RuntimeEnvironment *RR,void *t } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_NETWORK_CREDENTIALS,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CREDENTIALS,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0); return true; } @@ -892,7 +937,7 @@ bool IncomingPacket::_doNETWORK_CONFIG_REQUEST(const RuntimeEnvironment *RR,void _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } - peer->received(tPtr,_path,hopCount,requestPacketId,Packet::VERB_NETWORK_CONFIG_REQUEST,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hopCount,requestPacketId,payloadLength(),Packet::VERB_NETWORK_CONFIG_REQUEST,0,Packet::VERB_NOP,false,nwid); return true; } @@ -913,7 +958,7 @@ bool IncomingPacket::_doNETWORK_CONFIG(const RuntimeEnvironment *RR,void *tPtr,c } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_NETWORK_CONFIG,0,Packet::VERB_NOP,false,(network) ? network->id() : 0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CONFIG,0,Packet::VERB_NOP,false,(network) ? network->id() : 0); return true; } @@ -956,7 +1001,7 @@ bool IncomingPacket::_doMULTICAST_GATHER(const RuntimeEnvironment *RR,void *tPtr } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP,trustEstablished,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP,trustEstablished,nwid); return true; } @@ -982,7 +1027,7 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr, if (!network->gate(tPtr,peer)) { RR->t->incomingNetworkAccessDenied(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,true); _sendErrorNeedCredentials(RR,tPtr,peer,nwid); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); return true; } @@ -1006,19 +1051,19 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr, if (network->config().multicastLimit == 0) { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,from,to.mac(),"multicast disabled"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); return true; } if ((frameLen > 0)&&(frameLen <= ZT_MAX_MTU)) { if (!to.mac().isMulticast()) { RR->t->incomingPacketInvalid(tPtr,_path,packetId(),source(),hops(),Packet::VERB_MULTICAST_FRAME,"destination not multicast"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } if ((!from)||(from.isMulticast())||(from == network->mac())) { RR->t->incomingPacketInvalid(tPtr,_path,packetId(),source(),hops(),Packet::VERB_MULTICAST_FRAME,"invalid source MAC"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } @@ -1032,7 +1077,7 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr, network->learnBridgeRoute(from,peer->address()); } else { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,from,to.mac(),"bridging not allowed (remote)"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } } @@ -1055,10 +1100,10 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr, } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); } else { _sendErrorNeedCredentials(RR,tPtr,peer,nwid); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); } return true; @@ -1070,7 +1115,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt // First, subject this to a rate limit if (!peer->rateGatePushDirectPaths(now)) { - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0); return true; } @@ -1094,7 +1139,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt case 4: { const InetAddress a(field(ptr,4),4,at(ptr + 4)); if ( - ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_FORGET_PATH) == 0) && // not being told to forget + ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_FORGET_PATH) == 0) && // not being told to forget (!( ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_CLUSTER_REDIRECT) == 0) && (peer->hasActivePathTo(now,a)) )) && // not already known (RR->node->shouldUsePathForZeroTierTraffic(tPtr,peer->address(),_path->localSocket(),a)) ) // should use path { @@ -1108,7 +1153,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt case 6: { const InetAddress a(field(ptr,16),16,at(ptr + 16)); if ( - ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_FORGET_PATH) == 0) && // not being told to forget + ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_FORGET_PATH) == 0) && // not being told to forget (!( ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_CLUSTER_REDIRECT) == 0) && (peer->hasActivePathTo(now,a)) )) && // not already known (RR->node->shouldUsePathForZeroTierTraffic(tPtr,peer->address(),_path->localSocket(),a)) ) // should use path { @@ -1123,7 +1168,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt ptr += addrLen; } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0); return true; } @@ -1139,7 +1184,7 @@ bool IncomingPacket::_doUSER_MESSAGE(const RuntimeEnvironment *RR,void *tPtr,con RR->node->postEvent(tPtr,ZT_EVENT_USER_MESSAGE,reinterpret_cast(&um)); } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_USER_MESSAGE,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_USER_MESSAGE,0,Packet::VERB_NOP,false,0); return true; } @@ -1163,7 +1208,7 @@ bool IncomingPacket::_doREMOTE_TRACE(const RuntimeEnvironment *RR,void *tPtr,con } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_REMOTE_TRACE,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_REMOTE_TRACE,0,Packet::VERB_NOP,false,0); return true; } diff --git a/node/IncomingPacket.hpp b/node/IncomingPacket.hpp index 88f4f066..9144277c 100644 --- a/node/IncomingPacket.hpp +++ b/node/IncomingPacket.hpp @@ -125,6 +125,8 @@ private: // been authenticated, decrypted, decompressed, and classified. bool _doERROR(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doHELLO(const RuntimeEnvironment *RR,void *tPtr,const bool alreadyAuthenticated); + bool _doACK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); + bool _doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doWHOIS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doRENDEZVOUS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); diff --git a/node/Packet.hpp b/node/Packet.hpp index 27da6fb5..6869691e 100644 --- a/node/Packet.hpp +++ b/node/Packet.hpp @@ -419,7 +419,7 @@ public: template Fragment(const Buffer &b) : - Buffer(b) + Buffer(b) { } @@ -930,7 +930,53 @@ public: */ VERB_PUSH_DIRECT_PATHS = 0x10, - // 0x11, 0x12 -- deprecated + // 0x11 -- deprecated + + /** + * An acknowledgement of receipt of a series of recent packets from another + * peer. This is used to calculate relative throughput values and to detect + * packet loss. Only VERB_FRAME and VERB_EXT_FRAME packets are counted. + * + * ACK response format: + * <[4] 32-bit number of bytes received since last ACK> + * + * Upon receipt of this packet, the local peer will verify that the correct + * number of bytes were received by the remote peer. If these values do + * not agree that could be an indicator of packet loss. + * + * Additionally, the local peer knows the interval of time that has + * elapsed since the last received ACK. With this information it can compute + * a rough estimate of the current throughput. + * + * This is sent at a maximum rate of once per every ZT_PATH_ACK_INTERVAL + */ + VERB_ACK = 0x12, + + /** + * A packet containing timing measurements useful for estimating path quality. + * Composed of a list of pairs for an + * arbitrary set of recent packets. This is used to sample for latency and + * packet delay variance (PDV, "jitter"). + * + * QoS record format: + * + * <[8] 64-bit packet ID of previously-received packet> + * <[1] 8-bit packet sojourn time> + * <...repeat until end of max 1400 byte packet...> + * + * The number of possible records per QoS packet is: (1400 * 8) / 72 = 155 + * This packet should be sent very rarely (every few seconds) as it can be + * somewhat large if the connection is saturated. Future versions might use + * a bloom table to probablistically determine these values in a vastly + * more space-efficient manner. + * + * Note: The 'internal packet sojourn time' is a slight misnomer as it is a + * measure of the amount of time between when a packet was received and the + * egress time of its tracking QoS packet. + * + * This is sent at a maximum rate of once per every ZT_PATH_QOS_INTERVAL + */ + VERB_QOS_MEASUREMENT = 0x13, /** * A message with arbitrary user-definable content: @@ -999,7 +1045,7 @@ public: template Packet(const Buffer &b) : - Buffer(b) + Buffer(b) { } diff --git a/node/Path.hpp b/node/Path.hpp index e6bcecf0..0278d919 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -102,24 +102,23 @@ public: _latency(0xffff), _addr(), _ipScope(InetAddress::IP_SCOPE_NONE), - _currentPacketSampleCounter(0), - _meanPacketErrorRatio(0.0), - _meanLatency(0.0), - _lastLatencyUpdate(0), - _jitter(0.0), - _lastPathQualitySampleTime(0), - _lastComputedQuality(0.0), - _lastPathQualityEstimate(0), - _meanAge(0.0), + _lastAck(0), + _lastThroughputEstimation(0), + _lastQoSMeasurement(0), + _unackedBytes(0), + _expectingAckAsOf(0), + _packetsReceivedSinceLastAck(0), _meanThroughput(0.0), - _packetLossRatio(0) + _maxLifetimeThroughput(0), + _bytesAckedSinceLastThroughputEstimation(0), + _meanLatency(0.0), + _packetDelayVariance(0.0), + _packetErrorRatio(0.0), + _packetLossRatio(0), + _lastComputedStability(0.0), + _lastComputedRelativeQuality(0) { - memset(_ifname, 0, sizeof(_ifname)); - memset(_addrString, 0, sizeof(_addrString)); - _throughputSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _ageSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _latencySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _errSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + prepareBuffers(); } Path(const int64_t localSocket,const InetAddress &addr) : @@ -131,37 +130,35 @@ public: _latency(0xffff), _addr(addr), _ipScope(addr.ipScope()), - _currentPacketSampleCounter(0), - _meanPacketErrorRatio(0.0), - _meanLatency(0.0), - _lastLatencyUpdate(0), - _jitter(0.0), - _lastPathQualitySampleTime(0), - _lastComputedQuality(0.0), - _lastPathQualityEstimate(0), - _meanAge(0.0), + _lastAck(0), + _lastThroughputEstimation(0), + _lastQoSMeasurement(0), + _unackedBytes(0), + _expectingAckAsOf(0), + _packetsReceivedSinceLastAck(0), _meanThroughput(0.0), - _packetLossRatio(0) + _maxLifetimeThroughput(0), + _bytesAckedSinceLastThroughputEstimation(0), + _meanLatency(0.0), + _packetDelayVariance(0.0), + _packetErrorRatio(0.0), + _packetLossRatio(0), + _lastComputedStability(0.0), + _lastComputedRelativeQuality(0) { - memset(_ifname, 0, sizeof(_ifname)); - memset(_addrString, 0, sizeof(_addrString)); - _throughputSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _ageSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _latencySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _errSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + prepareBuffers(); } ~Path() { delete _throughputSamples; - delete _ageSamples; delete _latencySamples; - delete _errSamples; - + delete _qualitySamples; + delete _packetValiditySamples; _throughputSamples = NULL; - _ageSamples = NULL; _latencySamples = NULL; - _errSamples = NULL; + _qualitySamples = NULL; + _packetValiditySamples = NULL; } /** @@ -209,7 +206,6 @@ public: else { _latency = l; } - _lastLatencyUpdate = now; _latencySamples->push(l); } @@ -299,194 +295,273 @@ public: } /** - * @return An estimate of path quality -- higher is better. + * Take note that we're expecting a VERB_ACK on this path as of a specific time + * + * @param now Current time + * @param packetId ID of the packet + * @param payloadLength Number of bytes we're is expecting a reply to */ - inline float computeQuality(const int64_t now) + inline void expectingAck(int64_t now, int64_t packetId, uint16_t payloadLength) { - float latency_contrib = _meanLatency ? (float)1.0 / _meanLatency : 0; - float jitter_contrib = _jitter ? (float)1.0 / _jitter : 0; - float throughput_contrib = _meanThroughput ? _meanThroughput / 1000000 : 0; // in Mbps - float age_contrib = _meanAge > 0 ? (float)sqrt(_meanAge) : 1; - float error_contrib = (float)1.0 - _meanPacketErrorRatio; - float sum = (latency_contrib + jitter_contrib + throughput_contrib + error_contrib) / age_contrib; - _lastComputedQuality = sum * (long)((_ipScope) + 1); - return _lastComputedQuality; + _expectingAckAsOf = ackAge(now) > ZT_PATH_ACK_INTERVAL ? _expectingAckAsOf : now; + _unackedBytes += payloadLength; + _outgoingPacketRecords[packetId] = now; } /** - * Since quality estimates can become expensive we should cache the most recent result for traffic allocation - * algorithms which may need to reference this value multiple times through the course of their execution. + * Record that we've received a VERB_ACK on this path, also compute throughput if required. + * + * @param now Current time + * @param ackedBytes Number of bytes awknowledged by other peer */ - inline float lastComputedQuality() { - return _lastComputedQuality; + inline void receivedAck(int64_t now, int32_t ackedBytes) + { + _expectingAckAsOf = 0; + _unackedBytes = (ackedBytes > _unackedBytes) ? 0 : _unackedBytes - ackedBytes; + int64_t timeSinceThroughputEstimate = (now - _lastThroughputEstimation); + if (timeSinceThroughputEstimate >= ZT_PATH_THROUGHPUT_MEASUREMENT_INTERVAL) { + uint64_t throughput = (float)(_bytesAckedSinceLastThroughputEstimation) / ((float)timeSinceThroughputEstimate / (float)1000); + _throughputSamples->push(throughput); + _maxLifetimeThroughput = throughput > _maxLifetimeThroughput ? throughput : _maxLifetimeThroughput; + _lastThroughputEstimation = now; + _bytesAckedSinceLastThroughputEstimation = 0; + } else { + _bytesAckedSinceLastThroughputEstimation += ackedBytes; + } } /** - * @return A pointer to a cached copy of the human-readable name of the interface this Path's localSocket is bound to + * @return Number of bytes this peer is responsible for ACKing since last ACK */ - inline char *getName() { return _ifname; } + inline int32_t bytesToAck() + { + int32_t bytesToAck = 0; + for (int i=0; i<_packetsReceivedSinceLastAck; i++) { + bytesToAck += _recorded_len[i]; + } + return bytesToAck; + } /** - * @return Estimated throughput in bps of this link + * @return Number of bytes thusfar sent that have not been awknowledged by the remote peer */ - inline uint64_t getThroughput() { return _phy->getThroughput((PhySocket *)((uintptr_t)_localSocket)); } + inline int64_t unackedSentBytes() + { + return _unackedBytes; + } /** - * @return Packet delay varience + * Account for the fact that an ACK was just sent. Reset counters, timers, and clear statistics buffers + * + * @param Current time */ - inline float jitter() { return _jitter; } + inline void sentAck(int64_t now) + { + memset(_recorded_id, 0, sizeof(_recorded_id)); + memset(_recorded_ts, 0, sizeof(_recorded_ts)); + memset(_recorded_len, 0, sizeof(_recorded_len)); + _packetsReceivedSinceLastAck = 0; + _lastAck = now; + } /** - * @return Previously-computed mean latency + * Receive QoS data, match with recorded egress times from this peer, compute latency + * estimates. + * + * @param now Current time + * @param count Number of records + * @param rx_id table of packet IDs + * @param rx_ts table of holding times */ - inline float meanLatency() { return _meanLatency; } + inline void receivedQoS(int64_t now, int count, uint64_t *rx_id, uint8_t *rx_ts) + { + // Look up egress times and compute latency values for each record + for (int j=0; j::iterator it = _outgoingPacketRecords.find(rx_id[j]); + if (it != _outgoingPacketRecords.end()) { + uint16_t rtt = (uint16_t)(now - it->second); + uint16_t rtt_compensated = rtt - rx_ts[j]; + float latency = rtt_compensated / 2.0; + updateLatency(latency, now); + _outgoingPacketRecords.erase(it); + } + } + } /** - * @return Packet loss rate + * Generate the contents of a VERB_QOS_MEASUREMENT packet. + * + * @param now Current time + * @param qosBuffer destination buffer + * @return Size of payload */ - inline float packetLossRatio() { return _packetLossRatio; } + inline int32_t generateQoSPacket(int64_t now, char *qosBuffer) + { + int32_t len = 0; + for (int i=0; i<_packetsReceivedSinceLastAck; i++) { + uint64_t id = _recorded_id[i]; + memcpy(qosBuffer, &id, sizeof(uint64_t)); + qosBuffer+=sizeof(uint64_t); + uint8_t holdingTime = (uint8_t)(now - _recorded_ts[i]); + memcpy(qosBuffer, &holdingTime, sizeof(uint8_t)); + qosBuffer+=sizeof(uint8_t); + len+=sizeof(uint64_t)+sizeof(uint8_t); + } + return len; + } /** - * @return Mean packet error ratio + * Account for the fact that a VERB_QOS_MEASUREMENT was just sent. Reset timers. + * + * @param Current time */ - inline float meanPacketErrorRatio() { return _meanPacketErrorRatio; } + inline void sentQoS(int64_t now) { _lastQoSMeasurement = now; } /** - * @return Current packet error ratio (possibly incomplete sample set) + * Record statistics on incoming packets. Used later to estimate QoS. + * + * @param now Current time + * @param packetId + * @param payloadLength */ - inline float currentPacketErrorRatio() { - int errorsPerSample = 0; - for (int i=0; i<_currentPacketSampleCounter; i++) { - if (_packetValidity[i] == false) { - errorsPerSample++; - } - } - return (float)errorsPerSample / (float)ZT_PATH_ERROR_SAMPLE_WIN_SZ; + inline void recordIncomingPacket(int64_t now, int64_t packetId, int32_t payloadLength) + { + _recorded_ts[_packetsReceivedSinceLastAck] = now; + _recorded_id[_packetsReceivedSinceLastAck] = packetId; + _recorded_len[_packetsReceivedSinceLastAck] = payloadLength; + _packetsReceivedSinceLastAck++; + _packetValiditySamples->push(true); } /** - * @return Whether the Path's local socket is in a CLOSED state + * @param now Current time + * @return Whether an ACK (VERB_ACK) packet needs to be emitted at this time */ - inline bool isClosed() { return _phy->isClosed((PhySocket *)((uintptr_t)_localSocket)); } + inline bool needsToSendAck(int64_t now) { + return ((now - _lastAck) >= ZT_PATH_ACK_INTERVAL || + (_packetsReceivedSinceLastAck == ZT_PATH_QOS_TABLE_SIZE)) && _packetsReceivedSinceLastAck; + } /** - * @return The state of a Path's local socket + * @param now Current time + * @return Whether a QoS (VERB_QOS_MEASUREMENT) packet needs to be emitted at this time */ - inline int getState() { return _phy->getState((PhySocket *)((uintptr_t)_localSocket)); } + inline bool needsToSendQoS(int64_t now) { + return ((_packetsReceivedSinceLastAck >= ZT_PATH_QOS_TABLE_SIZE) || + ((now - _lastQoSMeasurement) > ZT_PATH_QOS_INTERVAL)) && _packetsReceivedSinceLastAck; + } /** - * @return Whether this socket may have been erased by the virtual physical link layer + * How much time has elapsed since we've been expecting a VERB_ACK on this path. This value + * is used to determine a more relevant path "age". This lets us penalize paths which are no + * longer ACKing, but not those that simple aren't being used to carry traffic at the + * current time. */ - inline bool isValidState() { return _phy->isValidState((PhySocket *)((uintptr_t)_localSocket)); } + inline int64_t ackAge(int64_t now) { return _expectingAckAsOf ? now - _expectingAckAsOf : 0; } /** - * @return Whether the path quality monitors have collected enough data to provide a quality value - * TODO: expand this + * The maximum observed throughput for this path */ - inline bool monitorsReady() { - return _latencySamples->count() && _ageSamples->count() && _throughputSamples->count(); - } + inline uint64_t maxLifetimeThroughput() { return _maxLifetimeThroughput; } /** - * @return A pointer to a cached copy of the address string for this Path (For debugging only) + * @return The mean throughput (in bits/s) of this link */ - inline char *getAddressString() { return _addrString; } + inline float meanThroughput() { return _meanThroughput; } /** - * Handle path sampling, computation of quality estimates, and other periodic tasks - * @param now Current time + * Assign a new relative quality value for this path in the aggregate link + * + * @param rq Quality of this path in comparison to other paths available to this peer */ - inline void measureLink(int64_t now) { - // Sample path properties and store them in a continuously-revolving buffer - if (now - _lastPathQualitySampleTime > ZT_PATH_QUALITY_SAMPLE_INTERVAL) { - _lastPathQualitySampleTime = now; - _throughputSamples->push(getThroughput()); // Thoughtput in bits/s - _ageSamples->push(now - _lastIn); // Age (time since last received packet) - if (now - _lastLatencyUpdate > ZT_PATH_LATENCY_SAMPLE_INTERVAL) { - _lastLatencyUpdate = now; - // Record 0 bp/s. Since we're using this to detect possible packet loss - updateLatency(0, now); - } - } - // Compute statistical values for use in link quality estimates - if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) { - _lastPathQualityComputeTime = now; - // Cache Path address string - address().toString(_addrString); - _phy->getIfName((PhySocket *)((uintptr_t)_localSocket), _ifname, ZT_PATH_INTERFACE_NAME_SZ); // Cache Interface name - // Derived values - if (_throughputSamples->count()) { - _packetLossRatio = (float)_throughputSamples->zeroCount() / (float)_throughputSamples->count(); - } - _meanThroughput = _throughputSamples->mean(); - _meanAge = _ageSamples->mean(); - _meanLatency = _latencySamples->mean(); - // Jitter - // SEE: RFC 3393, RFC 4689 - _jitter = _latencySamples->stddev(); - _meanPacketErrorRatio = _errSamples->mean(); // Packet Error Ratio (PER) - } - // Periodically compute a path quality estimate - if (now - _lastPathQualityEstimate > ZT_PATH_QUALITY_ESTIMATE_INTERVAL) { - computeQuality(now); - } - } + inline void updateRelativeQuality(float rq) { _lastComputedRelativeQuality = rq; } /** - * @param buf Buffer to store resultant string - * @return Description of path, in ASCII string format + * @return Quality of this path compared to others in the aggregate link */ - inline char *toString(char *buf) { - sprintf(buf,"%6s, q=%8.3f, %5.3f Mb/s, j=%8.2f, ml=%8.2f, meanAge=%8.2f, addr=%45s", - getName(), - lastComputedQuality(), - (float)meanThroughput() / (float)1000000, - jitter(), - meanLatency(), - meanAge(), - getAddressString()); - return buf; - } + inline float relativeQuality() { return _lastComputedRelativeQuality; } /** - * Record whether a packet is considered invalid by MAC/compression/cipher checks. This - * could be an indication of a bit error. This function will keep a running counter of - * up to a given window size and with each counter overflow it will compute a mean error rate - * and store that in a continuously shifting sample window. - * - * @param isValid Whether the packet in question is considered invalid + * @return Stability estimates can become expensive to compute, we cache the most recent result. */ - inline void recordPacket(bool isValid) { - if (_currentPacketSampleCounter < ZT_PATH_ERROR_SAMPLE_WIN_SZ) { - _packetValidity[_currentPacketSampleCounter] = isValid; - _currentPacketSampleCounter++; - } - else { - // Sample array is full, compute an mean and stick it in the ring buffer for trend analysis - _errSamples->push(currentPacketErrorRatio()); - _currentPacketSampleCounter=0; - } - } + inline float lastComputedStability() { return _lastComputedStability; } /** - * @return The mean age (in ms) of this link + * @return A pointer to a cached copy of the human-readable name of the interface this Path's localSocket is bound to */ - inline float meanAge() { return _meanAge; } + inline char *getName() { return _ifname; } /** - * @return The mean throughput (in bits/s) of this link + * @return Packet delay varience */ - inline float meanThroughput() { return _meanThroughput; } + inline float packetDelayVariance() { return _packetDelayVariance; } /** - * @return True if this path is alive (receiving heartbeats) + * @return Previously-computed mean latency */ - inline bool alive(const int64_t now) const { return ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000)); } + inline float meanLatency() { return _meanLatency; } + + /** + * @return Packet loss rate (PLR) + */ + inline float packetLossRatio() { return _packetLossRatio; } + + /** + * @return Packet error ratio (PER) + */ + inline float packetErrorRatio() { return _packetErrorRatio; } + + /** + * Record an invalid incoming packet. This packet failed MAC/compression/cipher checks and will now + * contribute to a Packet Error Ratio (PER). + */ + inline void recordInvalidPacket() { _packetValiditySamples->push(false); } /** - * @return True if this path hasn't received a packet in a "significant" amount of time + * @return A pointer to a cached copy of the address string for this Path (For debugging only) */ - inline bool stale(const int64_t now) const { return ((now - _lastIn) > ZT_LINK_SPEED_TEST_INTERVAL * 10); } + inline char *getAddressString() { return _addrString; } + + /** + * Compute and cache stability and performance metrics. The resultant stability coefficint is a measure of how "well behaved" + * this path is. This figure is substantially different from (but required for the estimation of the path's overall "quality". + * + * @param now Current time + */ + inline void processBackgroundPathMeasurements(int64_t now, const int64_t peerId) { + if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) { + _lastPathQualityComputeTime = now; + _phy->getIfName((PhySocket *)((uintptr_t)_localSocket), _ifname, 16); + address().toString(_addrString); + _meanThroughput = _throughputSamples->mean(); + _meanLatency = _latencySamples->mean(); + _packetDelayVariance = _latencySamples->stddev(); // Similar to "jitter" (SEE: RFC 3393, RFC 4689) + // If no packet validity samples, assume PER==0 + _packetErrorRatio = 1 - (_packetValiditySamples->count() ? _packetValiditySamples->mean() : 1); + // Compute path stability + // Normalize measurements with wildly different ranges into a reasonable range + float normalized_pdv = Utils::normalize(_packetDelayVariance, 0, ZT_PATH_MAX_PDV, 0, 10); + float normalized_la = Utils::normalize(_meanLatency, 0, ZT_PATH_MAX_MEAN_LATENCY, 0, 10); + float throughput_cv = _throughputSamples->mean() > 0 ? _throughputSamples->stddev() / _throughputSamples->mean() : 1; + // Form an exponential cutoff and apply contribution weights + float pdv_contrib = exp((-1)*normalized_pdv) * ZT_PATH_CONTRIB_PDV; + float latency_contrib = exp((-1)*normalized_la) * ZT_PATH_CONTRIB_LATENCY; + float throughput_disturbance_contrib = exp((-1)*throughput_cv) * ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE; + // Obey user-defined ignored contributions + pdv_contrib = ZT_PATH_CONTRIB_PDV > 0.0 ? pdv_contrib : 1; + latency_contrib = ZT_PATH_CONTRIB_LATENCY > 0.0 ? latency_contrib : 1; + throughput_disturbance_contrib = ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE > 0.0 ? throughput_disturbance_contrib : 1; + // Compute the quality product + _lastComputedStability = pdv_contrib + latency_contrib + throughput_disturbance_contrib; + _lastComputedStability *= 1 - _packetErrorRatio; + _qualitySamples->push(_lastComputedStability); + } + } + + /** + * @return True if this path is alive (receiving heartbeats) + */ + inline bool alive(const int64_t now) const { return ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000)); } /** * @return True if this path needs a heartbeat @@ -508,6 +583,21 @@ public: */ inline int64_t lastTrustEstablishedPacketReceived() const { return _lastTrustEstablishedPacketReceived; } + /** + * Initialize statistical buffers + */ + inline void prepareBuffers() { + _throughputSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _latencySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _qualitySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _packetValiditySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + memset(_ifname, 0, 16); + memset(_recorded_id, 0, sizeof(_recorded_id)); + memset(_recorded_ts, 0, sizeof(_recorded_ts)); + memset(_recorded_len, 0, sizeof(_recorded_len)); + memset(_addrString, 0, sizeof(_addrString)); + } + private: volatile int64_t _lastOut; volatile int64_t _lastIn; @@ -519,32 +609,42 @@ private: InetAddress::IpScope _ipScope; // memoize this since it's a computed value checked often AtomicCounter __refCount; - // Packet Error Ratio (PER) - int _packetValidity[ZT_PATH_ERROR_SAMPLE_WIN_SZ]; - int _currentPacketSampleCounter; - volatile float _meanPacketErrorRatio; + uint64_t _recorded_id[ZT_PATH_QOS_TABLE_SIZE]; + uint64_t _recorded_ts[ZT_PATH_QOS_TABLE_SIZE]; + uint16_t _recorded_len[ZT_PATH_QOS_TABLE_SIZE]; - // Latency and Jitter - volatile float _meanLatency; - int64_t _lastLatencyUpdate; - volatile float _jitter; + std::map _outgoingPacketRecords; + + int64_t _lastAck; + int64_t _lastThroughputEstimation; + int64_t _lastQoSMeasurement; + + int64_t _unackedBytes; + int64_t _expectingAckAsOf; + int16_t _packetsReceivedSinceLastAck; - int64_t _lastPathQualitySampleTime; - float _lastComputedQuality; - int64_t _lastPathQualityEstimate; - float _meanAge; float _meanThroughput; + uint64_t _maxLifetimeThroughput; + uint64_t _bytesAckedSinceLastThroughputEstimation; - // Circular buffers used to efficiently store large time series - RingBuffer *_throughputSamples; - RingBuffer *_latencySamples; - RingBuffer *_ageSamples; - RingBuffer *_errSamples; + volatile float _meanLatency; + float _packetDelayVariance; + float _packetErrorRatio; float _packetLossRatio; - char _ifname[ZT_PATH_INTERFACE_NAME_SZ]; + // cached estimates + float _lastComputedStability; + float _lastComputedRelativeQuality; + + // cached human-readable strings for tracing purposes + char _ifname[16]; char _addrString[256]; + + RingBuffer *_throughputSamples; + RingBuffer *_latencySamples; + RingBuffer *_qualitySamples; + RingBuffer *_packetValiditySamples; }; } // namespace ZeroTier diff --git a/node/Peer.cpp b/node/Peer.cpp index c46ed751..8deaa362 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -24,8 +24,8 @@ * of your own application. */ -#include "../version.h" +#include "../version.h" #include "Constants.hpp" #include "Peer.hpp" #include "Node.hpp" @@ -36,6 +36,7 @@ #include "Trace.hpp" #include "InetAddress.hpp" #include "RingBuffer.hpp" +#include "Utils.hpp" namespace ZeroTier { @@ -61,13 +62,13 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _id(peerIdentity), _directPathPushCutoffCount(0), _credentialsCutoffCount(0), - _linkBalanceStatus(false), - _linkRedundancyStatus(false) + _linkIsBalanced(false), + _linkIsRedundant(false), + _lastAggregateStatsReport(0) { if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) throw ZT_EXCEPTION_INVALID_ARGUMENT; _pathChoiceHist = new RingBuffer(ZT_MULTIPATH_PROPORTION_WIN_SZ); - _flowBalanceHist = new RingBuffer(ZT_MULTIPATH_PROPORTION_WIN_SZ); } void Peer::received( @@ -75,6 +76,7 @@ void Peer::received( const SharedPtr &path, const unsigned int hops, const uint64_t packetId, + const unsigned int payloadLength, const Packet::Verb verb, const uint64_t inRePacketId, const Packet::Verb inReVerb, @@ -103,13 +105,13 @@ void Peer::received( { Mutex::Lock _l(_paths_m); if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { - if ((now - _lastPathPrune) > ZT_CLOSED_PATH_PRUNING_INTERVAL) { - _lastPathPrune = now; - prunePaths(); + recordIncomingPacket(tPtr, path, packetId, payloadLength, verb, now); + if (path->needsToSendQoS(now)) { + sendQOS_MEASUREMENT(tPtr, path, path->localSocket(), path->address(), now); } for(unsigned int i=0;imeasureLink(now); + _paths[i].p->processBackgroundPathMeasurements(now, _id.address().toInt()); } } } @@ -117,7 +119,6 @@ void Peer::received( if (hops == 0) { // If this is a direct packet (no hops), update existing paths or learn new ones - bool havePath = false; { Mutex::Lock _l(_paths_m); @@ -164,6 +165,19 @@ void Peer::received( } } + // If we find a pre-existing path with the same address, just replace it. + // If we don't find anything we can replace, just use the replacePath that we previously decided on. + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + for(unsigned int i=0;iaddress().ss_family == path->address().ss_family && _paths[i].p->address().ipsEqual2(path->address())) { + replacePath = i; + break; + } + } + } + } + if (replacePath != ZT_MAX_PEER_NETWORK_PATHS) { if (verb == Packet::VERB_OK) { RR->t->peerLearnedNewPath(tPtr,networkId,*this,path,packetId); @@ -252,6 +266,117 @@ void Peer::received( } } +void Peer::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, + uint16_t payloadLength, const Packet::Verb verb, int64_t now) +{ + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + if (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME) { + path->expectingAck(now, packetId, payloadLength); + } + } +} + +void Peer::recordIncomingPacket(void *tPtr, const SharedPtr &path, const uint64_t packetId, + uint16_t payloadLength, const Packet::Verb verb, int64_t now) +{ + if (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME) { + if (path->needsToSendAck(now)) { + sendACK(tPtr, path, path->localSocket(), path->address(), now); + } + path->recordIncomingPacket(now, packetId, payloadLength); + } +} + +float Peer::computeAggregateLinkRelativeQuality(int64_t now) +{ + float maxStability = 0; + float totalRelativeQuality = 0; + float maxThroughput = 1; + float maxScope = 0; + float relStability[ZT_MAX_PEER_NETWORK_PATHS]; + float relThroughput[ZT_MAX_PEER_NETWORK_PATHS]; + memset(&relStability, 0, sizeof(relStability)); + memset(&relThroughput, 0, sizeof(relThroughput)); + // Survey all paths + for(unsigned int i=0;ilastComputedStability(); + relThroughput[i] = _paths[i].p->maxLifetimeThroughput(); + maxStability = relStability[i] > maxStability ? relStability[i] : maxStability; + maxThroughput = relThroughput[i] > maxThroughput ? relThroughput[i] : maxThroughput; + maxScope = _paths[i].p->ipScope() > maxScope ? _paths[i].p->ipScope() : maxScope; + } + } + // Convert to relative values + for(unsigned int i=0;iackAge(now), 0, ZT_PATH_MAX_AGE, 0, 10); + float age_contrib = exp((-1)*normalized_ma); + float relScope = ((float)(_paths[i].p->ipScope()+1) / (maxScope + 1)); + float relQuality = + (relStability[i] * ZT_PATH_CONTRIB_STABILITY) + + (fmax(1, relThroughput[i]) * ZT_PATH_CONTRIB_THROUGHPUT) + + relScope * ZT_PATH_CONTRIB_SCOPE; + relQuality *= age_contrib; + totalRelativeQuality += relQuality; + _paths[i].p->updateRelativeQuality(relQuality); + } + } + return (float)1.0 / totalRelativeQuality; // Used later to convert relative quantities into flow allocations +} + +float Peer::computeAggregateLinkPacketDelayVariance() +{ + float pdv = 0.0; + for(unsigned int i=0;irelativeQuality() * _paths[i].p->packetDelayVariance(); + } + } + return pdv; +} + +float Peer::computeAggregateLinkMeanLatency() +{ + float ml = 0.0; + for(unsigned int i=0;irelativeQuality() * _paths[i].p->meanLatency(); + } + } + return ml; +} + +int Peer::aggregateLinkPhysicalPathCount() +{ + std::map ifnamemap; + int pathCount = 0; + int64_t now = RR->node->now(); + for(unsigned int i=0;ialive(now)) { + if (!ifnamemap[_paths[i].p->getName()]) { + ifnamemap[_paths[i].p->getName()] = true; + pathCount++; + } + } + } + return pathCount; +} + +int Peer::aggregateLinkLogicalPathCount() +{ + int pathCount = 0; + int64_t now = RR->node->now(); + for(unsigned int i=0;ialive(now)) { + pathCount++; + } + } + return pathCount; +} + SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) { Mutex::Lock _l(_paths_m); @@ -264,7 +389,7 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) if (RR->node->getMultipathMode() == ZT_MULTIPATH_NONE) { long bestPathQuality = 2147483647; for(unsigned int i=0;iisValidState()) { + if (_paths[i].p) { if ((includeExpired)||((now - _paths[i].lr) < ZT_PEER_PATH_EXPIRATION)) { const long q = _paths[i].p->quality(now) / _paths[i].priority; if (q <= bestPathQuality) { @@ -280,23 +405,14 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) return SharedPtr(); } - if ((now - _lastPathPrune) > ZT_CLOSED_PATH_PRUNING_INTERVAL) { - _lastPathPrune = now; - prunePaths(); - } for(unsigned int i=0;imeasureLink(now); + _paths[i].p->processBackgroundPathMeasurements(now, _id.address().toInt()); } } /** * Randomly distribute traffic across all paths - * - * Behavior: - * - If path DOWN: Stop randomly choosing that path - * - If path UP: Start randomly choosing that path - * - If all paths are unresponsive: randomly choose from all paths */ int numAlivePaths = 0; int numStalePaths = 0; @@ -307,15 +423,13 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) memset(&stalePaths, -1, sizeof(stalePaths)); for(unsigned int i=0;iisValidState()) { - if (_paths[i].p->alive(now)) { - alivePaths[numAlivePaths] = i; - numAlivePaths++; - } - else { - stalePaths[numStalePaths] = i; - numStalePaths++; - } + if (_paths[i].p->alive(now)) { + alivePaths[numAlivePaths] = i; + numAlivePaths++; + } + else { + stalePaths[numStalePaths] = i; + numStalePaths++; } } } @@ -337,160 +451,104 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) * Proportionally allocate traffic according to dynamic path quality measurements */ if (RR->node->getMultipathMode() == ZT_MULTIPATH_PROPORTIONALLY_BALANCED) { - float relq[ZT_MAX_PEER_NETWORK_PATHS]; - memset(&relq, 0, sizeof(relq)); float alloc[ZT_MAX_PEER_NETWORK_PATHS]; memset(&alloc, 0, sizeof(alloc)); - - // Survey - // - // Take a survey of all available link qualities. We use this to determine if we - // can skip this algorithm altogether and if not, to establish baseline for physical - // link quality used in later calculations. - // - // We find the min/max quality of our currently-active links so - // that we can form a relative scale to rank each link proportionally - // to each other link. - uint16_t alivePaths[ZT_MAX_PEER_NETWORK_PATHS]; - uint16_t stalePaths[ZT_MAX_PEER_NETWORK_PATHS]; + int numAlivePaths = 0; + int numStalePaths = 0; + int alivePaths[ZT_MAX_PEER_NETWORK_PATHS]; + int stalePaths[ZT_MAX_PEER_NETWORK_PATHS]; memset(&alivePaths, -1, sizeof(alivePaths)); memset(&stalePaths, -1, sizeof(stalePaths)); - uint16_t numAlivePaths = 0; - uint16_t numStalePaths = 0; - float minQuality = 10000; - float maxQuality = -1; - float currQuality; - for(uint16_t i=0;iisValidState()) { - if (!_paths[i].p->monitorsReady()) { - // TODO: This should fix itself anyway but we should test whether forcing the use of a new path will - // aid in establishing flow balance more quickly. - } - // Compute quality here, going forward we will use lastComputedQuality() - currQuality = _paths[i].p->computeQuality(now); - if (!_paths[i].p->stale(now)) { + // Attempt to find an excuse not to use the rest of this algorithm + // Alive or Stale? + for(unsigned int i=0;ialive(now)) { + alivePaths[numAlivePaths] = i; numAlivePaths++; - } - else { + } else { + stalePaths[numStalePaths] = i; numStalePaths++; } - if (currQuality > maxQuality) { - maxQuality = currQuality; - bestPath = i; - } - if (currQuality < minQuality) { - minQuality = currQuality; - } - relq[i] = currQuality; + // Record a default path to use as a short-circuit for the rest of the algorithm + bestPath = i; } } - - // Attempt to find an excuse not to use the rest of this algorithm - if (bestPath == ZT_MAX_PEER_NETWORK_PATHS || (numAlivePaths == 0 && numStalePaths == 0)) { + if (numAlivePaths == 0 && numStalePaths == 0) { return SharedPtr(); - } if (numAlivePaths == 1) { - //return _paths[bestPath].p; - } if (numStalePaths == 1) { - //return _paths[bestPath].p; - } - - // Relative quality - // - // The strongest link will have a value of 1.0 whereas every other - // link will have a value which represents some fraction of the strongest link. - float totalRelativeQuality = 0; - for(unsigned int i=0;iisValidState()) { - relq[i] /= maxQuality ? maxQuality : 1; - totalRelativeQuality += relq[i]; - } + } if (numAlivePaths == 1 || numStalePaths == 1) { + return _paths[bestPath].p; } - - // Convert the relative quality values into flow allocations. - // Additionally, determine whether each path in the flow is - // contributing more or less than its target allocation. If - // it is contributing more than required, don't allow it to be - // randomly selected for the next packet. If however the path - // needs to contribute more to the flow, we should record - float imbalance = 0; - float qualityScalingFactor = (float)1.0 / totalRelativeQuality; + // Compare paths to each-other + float qualityScalingFactor = computeAggregateLinkRelativeQuality(now); + // Convert set of relative performances into an allocation set for(uint16_t i=0;icountValue(i); - // Compute traffic allocation for each path in the flow - if (_paths[i].p && _paths[i].p->isValidState()) { - // Allocation - // This is the percentage of traffic we want to send over a given path - alloc[i] = relq[i] * qualityScalingFactor; - float currProportion = numPktSentWithinWin / (float)ZT_MULTIPATH_PROPORTION_WIN_SZ; - float targetProportion = alloc[i]; - float diffProportion = currProportion - targetProportion; - // Imbalance - // - // This is the sum of the distances of each path's currently observed flow contributions - // from its most recent target allocation. In other words, this is a measure of how closely we - // are adhering to our desired allocations. It is worth noting that this value can be greater - // than 1.0 if a significant change to allocations is made by the algorithm, this will - // eventually correct itself. - imbalance += fabs(diffProportion); - if (diffProportion < 0) { - alloc[i] = targetProportion; - } - else { - alloc[i] = targetProportion; - } - } - } - - // Compute and record current flow balance - float balance = (float)1.0 - imbalance; - if (balance >= ZT_MULTIPATH_FLOW_BALANCE_THESHOLD) { - if (!_linkBalanceStatus) { - _linkBalanceStatus = true; - RR->t->peerLinkBalanced(NULL,0,*this); - } - } - else { - if (_linkBalanceStatus) { - _linkBalanceStatus = false; - RR->t->peerLinkImbalanced(NULL,0,*this); + if (_paths[i].p) { + alloc[i] = _paths[i].p->relativeQuality() * qualityScalingFactor; } } - - // Record the current flow balance. Later used for computing a mean flow balance value. - _flowBalanceHist->push(balance); - - // Randomly choose path from allocated candidates + // Randomly choose path according to their allocations unsigned int r; Utils::getSecureRandom(&r, 1); float rf = (float)(r %= 100) / 100; for(int i=0;iisValidState() && _paths[i].p->address().isV4()) { - if (alloc[i] > 0 && rf < alloc[i]) { + if (_paths[i].p) { + if (rf < alloc[i]) { bestPath = i; _pathChoiceHist->push(bestPath); // Record which path we chose break; } - if (alloc[i] > 0) { - rf -= alloc[i]; - } - else { - rf -= alloc[i]*-1; - } + rf -= alloc[i]; } } if (bestPath < ZT_MAX_PEER_NETWORK_PATHS) { return _paths[bestPath].p; } - return SharedPtr(); } + return SharedPtr(); +} - // Adhere to a user-defined interface/allocation scheme - if (RR->node->getMultipathMode() == ZT_MULTIPATH_MANUALLY_BALANCED) { - // TODO +char *Peer::interfaceListStr() +{ + std::map ifnamemap; + char tmp[32]; + const int64_t now = RR->node->now(); + char *ptr = _interfaceListStr; + bool imbalanced = false; + memset(_interfaceListStr, 0, sizeof(_interfaceListStr)); + int alivePathCount = aggregateLinkLogicalPathCount(); + for(unsigned int i=0;ialive(now)) { + int ipv = _paths[i].p->address().isV4(); + // If this is acting as an aggregate link, check allocations + float targetAllocation = 1.0 / alivePathCount; + float currentAllocation = 1.0; + if (alivePathCount > 1) { + currentAllocation = (float)_pathChoiceHist->countValue(i) / (float)_pathChoiceHist->count(); + if (fabs(targetAllocation - currentAllocation) > ZT_PATH_IMBALANCE_THRESHOLD) { + imbalanced = true; + } + } + char *ipvStr = ipv ? (char*)"ipv4" : (char*)"ipv6"; + sprintf(tmp, "(%s, %s, %5.4f)", _paths[i].p->getName(), ipvStr, currentAllocation); + // Prevent duplicates + if(ifnamemap[_paths[i].p->getName()] != ipv) { + memcpy(ptr, tmp, strlen(tmp)); + ptr += strlen(tmp); + *ptr = ' '; + ptr++; + ifnamemap[_paths[i].p->getName()] = ipv; + } + } } - - return SharedPtr(); + ptr--; // Overwrite trailing space + if (imbalanced) { + sprintf(tmp, ", is IMBALANCED"); + memcpy(ptr, tmp, sizeof(tmp)); + } else { + *ptr = '\0'; + } + return _interfaceListStr; } void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr &other) const @@ -614,6 +672,35 @@ void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr &o } } +void Peer::sendACK(void *tPtr,const SharedPtr &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now) +{ + Packet outp(_id.address(),RR->identity.address(),Packet::VERB_ACK); + uint32_t bytesToAck = path->bytesToAck(); + outp.append(bytesToAck); + if (atAddress) { + outp.armor(_key,false); + RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size()); + } else { + RR->sw->send(tPtr,outp,false); + } + path->sentAck(now); +} + +void Peer::sendQOS_MEASUREMENT(void *tPtr,const SharedPtr &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now) +{ + Packet outp(_id.address(),RR->identity.address(),Packet::VERB_QOS_MEASUREMENT); + char qosData[ZT_PATH_MAX_QOS_PACKET_SZ]; + path->generateQoSPacket(now,qosData); + outp.append(qosData,sizeof(qosData)); + if (atAddress) { + outp.armor(_key,false); + RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size()); + } else { + RR->sw->send(tPtr,outp,false); + } + path->sentQoS(now); +} + void Peer::sendHELLO(void *tPtr,const int64_t localSocket,const InetAddress &atAddress,int64_t now) { Packet outp(_id.address(),RR->identity.address(),Packet::VERB_HELLO); @@ -688,6 +775,25 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) const bool sendFullHello = ((now - _lastSentFullHello) >= ZT_PEER_PING_PERIOD); _lastSentFullHello = now; + // Emit traces regarding the status of aggregate links + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + int alivePathCount = aggregateLinkPhysicalPathCount(); + if ((now - _lastAggregateStatsReport) > ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL) { + _lastAggregateStatsReport = now; + if (alivePathCount) { + RR->t->peerLinkAggregateStatistics(NULL,*this); + } + } + // Report link redundancy + if (alivePathCount < 2 && _linkIsRedundant) { + _linkIsRedundant = !_linkIsRedundant; + RR->t->peerLinkNoLongerRedundant(NULL,*this); + } if (alivePathCount > 1 && !_linkIsRedundant) { + _linkIsRedundant = !_linkIsRedundant; + RR->t->peerLinkNowRedundant(NULL,*this); + } + } + // Right now we only keep pinging links that have the maximum priority. The // priority is used to track cluster redirections, meaning that when a cluster // redirects us its redirect target links override all other links and we @@ -726,22 +832,6 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) return sent; } -unsigned int Peer::prunePaths() -{ - unsigned int pruned = 0; - for(unsigned int i=0;iisClosed() || !_paths[i].p->isValidState()) { - _paths[i].lr = 0; - _paths[i].p.zero(); - _paths[i].priority = 1; - pruned++; - } - } - } - return pruned; -} - void Peer::clusterRedirect(void *tPtr,const SharedPtr &originatingPath,const InetAddress &remoteAddress,const int64_t now) { SharedPtr np(RR->topology->getPath(originatingPath->localSocket(),remoteAddress)); diff --git a/node/Peer.hpp b/node/Peer.hpp index 9873729b..2f723b07 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -68,9 +68,7 @@ public: ~Peer() { Utils::burn(_key,sizeof(_key)); delete _pathChoiceHist; - delete _flowBalanceHist; _pathChoiceHist = NULL; - _flowBalanceHist = NULL; } /** @@ -114,6 +112,7 @@ public: const SharedPtr &path, const unsigned int hops, const uint64_t packetId, + const unsigned int payloadLength, const Packet::Verb verb, const uint64_t inRePacketId, const Packet::Verb inReVerb, @@ -158,7 +157,74 @@ public: } /** - * Get the most appropriate direct path based on current multipath configuration + * Record statistics on outgoing packets + * + * @param path Path over which packet was sent + * @param id Packet ID + * @param len Length of packet payload + * @param verb Packet verb + * @param now Current time + */ + void recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now); + + /** + * Record statistics on incoming packets + * + * @param path Path over which packet was sent + * @param id Packet ID + * @param len Length of packet payload + * @param verb Packet verb + * @param now Current time + */ + void recordIncomingPacket(void *tPtr, const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now); + + /** + * Send an ACK to peer for the most recent packets received + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param localSocket Raw socket the ACK packet will be sent over + * @param atAddress Destination for the ACK packet + * @param now Current time + */ + void sendACK(void *tPtr, const SharedPtr &path, const int64_t localSocket,const InetAddress &atAddress,int64_t now); + + /** + * Send a QoS packet to peer so that it can evaluate the quality of this link + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param localSocket Raw socket the QoS packet will be sent over + * @param atAddress Destination for the QoS packet + * @param now Current time + */ + void sendQOS_MEASUREMENT(void *tPtr, const SharedPtr &path, const int64_t localSocket,const InetAddress &atAddress,int64_t now); + + /** + * @return The relative quality values for each path + */ + float computeAggregateLinkRelativeQuality(int64_t now); + + /** + * @return The aggregate link Packet Delay Variance (PDV) + */ + float computeAggregateLinkPacketDelayVariance(); + + /** + * @return The aggregate link mean latenct + */ + float computeAggregateLinkMeanLatency(); + + /** + * @return The number of currently alive "physical" paths in the aggregate link + */ + int aggregateLinkPhysicalPathCount(); + + /** + * @return The number of currently alive "logical" paths in the aggregate link + */ + int aggregateLinkLogicalPathCount(); + + /** + * Get the most appropriate direct path based on current multipath and QoS configuration * * @param now Current time * @param includeExpired If true, include even expired paths @@ -166,6 +232,12 @@ public: */ SharedPtr getAppropriatePath(int64_t now, bool includeExpired); + /** + * Generate a human-readable string of interface names making up the aggregate link, also include + * moving allocation and IP version number for each (for tracing) + */ + char *interfaceListStr(); + /** * Send VERB_RENDEZVOUS to this and another peer via the best common IP scope and path */ @@ -549,11 +621,12 @@ private: AtomicCounter __refCount; RingBuffer *_pathChoiceHist; - RingBuffer *_flowBalanceHist; - bool _linkBalanceStatus; - bool _linkRedundancyStatus; + bool _linkIsBalanced; + bool _linkIsRedundant; + uint64_t _lastAggregateStatsReport; + char _interfaceListStr[256]; // 16 characters * 16 paths in a link }; } // namespace ZeroTier diff --git a/node/RingBuffer.hpp b/node/RingBuffer.hpp index cd384749..32ae037c 100644 --- a/node/RingBuffer.hpp +++ b/node/RingBuffer.hpp @@ -172,6 +172,11 @@ public: write(&value, 1); } + /** + * @return The most recently pushed element on the buffer + */ + T get_most_recent() { return *(buf + end); } + /** * @param dest Destination buffer * @param n Size (in terms of number of elements) of the destination buffer @@ -218,10 +223,7 @@ public: /** * @return The number of slots that are unused in the buffer */ - size_t getFree() - { - return size - count(); - } + size_t getFree() { return size - count(); } /** * @return The arithmetic mean of the contents of the buffer @@ -229,45 +231,67 @@ public: float mean() { size_t iterator = begin; - float mean = 0; - for (size_t i=0; irecordOutgoingPacket(viaPath, packet.packetId(), packet.payloadLength(), packet.verb(), now); + if (trustedPathId) { packet.setTrusted(trustedPathId); } else { diff --git a/node/Trace.cpp b/node/Trace.cpp index 01a8da55..f47a029b 100644 --- a/node/Trace.cpp +++ b/node/Trace.cpp @@ -106,24 +106,24 @@ void Trace::peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId, } } -void Trace::peerLinkNowRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath) +void Trace::peerLinkNowRedundant(void *const tPtr,Peer &peer) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is fully redundant",peer.address().toInt(),networkId); + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is fully redundant",peer.address().toInt()); } -void Trace::peerLinkNoLongerRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath) +void Trace::peerLinkNoLongerRedundant(void *const tPtr,Peer &peer) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is no longer redundant",peer.address().toInt(),networkId); + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is no longer redundant",peer.address().toInt()); } -void Trace::peerLinkBalanced(void *const tPtr,const uint64_t networkId,Peer &peer) +void Trace::peerLinkAggregateStatistics(void *const tPtr,Peer &peer) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is balanced",peer.address().toInt(),networkId); -} - -void Trace::peerLinkImbalanced(void *const tPtr,const uint64_t networkId,Peer &peer) -{ - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is unbalanced",peer.address().toInt(),networkId); + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is composed of (%d) physical paths %s, has packet delay variance (%.0f ms), mean latency (%.0f ms)", + peer.address().toInt(), + peer.aggregateLinkPhysicalPathCount(), + peer.interfaceListStr(), + peer.computeAggregateLinkPacketDelayVariance(), + peer.computeAggregateLinkMeanLatency()); } void Trace::peerLearnedNewPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath,const uint64_t packetId) diff --git a/node/Trace.hpp b/node/Trace.hpp index b01163d6..734e84a5 100644 --- a/node/Trace.hpp +++ b/node/Trace.hpp @@ -122,10 +122,10 @@ public: void peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &path,const uint64_t packetId,const Packet::Verb verb); - void peerLinkNowRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath); - void peerLinkNoLongerRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath); - void peerLinkBalanced(void *const tPtr,const uint64_t networkId,Peer &peer); - void peerLinkImbalanced(void *const tPtr,const uint64_t networkId,Peer &peer); + void peerLinkNowRedundant(void *const tPtr,Peer &peer); + void peerLinkNoLongerRedundant(void *const tPtr,Peer &peer); + + void peerLinkAggregateStatistics(void *const tPtr,Peer &peer); void peerLearnedNewPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath,const uint64_t packetId); void peerRedirected(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath); diff --git a/node/Utils.hpp b/node/Utils.hpp index a24f2c9a..6ce67328 100644 --- a/node/Utils.hpp +++ b/node/Utils.hpp @@ -261,6 +261,14 @@ public: return l; } + static inline float normalize(float value, int64_t bigMin, int64_t bigMax, int32_t targetMin, int32_t targetMax) + { + int64_t bigSpan = bigMax - bigMin; + int64_t smallSpan = targetMax - targetMin; + float valueScaled = (value - (float)bigMin) / (float)bigSpan; + return (float)targetMin + valueScaled * (float)smallSpan; + } + /** * Generate secure random bytes * diff --git a/osdep/Binder.hpp b/osdep/Binder.hpp index 6e13836c..1f06021b 100644 --- a/osdep/Binder.hpp +++ b/osdep/Binder.hpp @@ -456,20 +456,6 @@ public: return false; } - /** - * Get a list of socket pointers for all bindings. - * - * @return A list of socket pointers for current bindings - */ - inline std::vector getBoundSockets() - { - std::vector sockets; - for (int i=0; i _bindingCount; diff --git a/osdep/Phy.hpp b/osdep/Phy.hpp index 2e276a2a..5e659767 100644 --- a/osdep/Phy.hpp +++ b/osdep/Phy.hpp @@ -27,8 +27,6 @@ #ifndef ZT_PHY_HPP #define ZT_PHY_HPP -#include "../osdep/OSUtils.hpp" - #include #include #include @@ -88,22 +86,6 @@ namespace ZeroTier { */ typedef void PhySocket; -struct link_test_record -{ - link_test_record(PhySocket *_s, uint64_t _id, uint64_t _egress_time, uint32_t _length) : - s(_s), - id(_id), - egress_time(_egress_time), - length(_length) - { - // - } - PhySocket *s; - uint64_t id; - uint64_t egress_time; - uint32_t length; -}; - /** * Simple templated non-blocking sockets implementation * @@ -170,19 +152,13 @@ private: ZT_PHY_SOCKET_UNIX_LISTEN = 0x08 }; - struct PhySocketImpl - { - PhySocketImpl() : - throughput(0) - { - memset(ifname, 0, sizeof(ifname)); - } + struct PhySocketImpl { + PhySocketImpl() { memset(ifname, 0, sizeof(ifname)); } PhySocketType type; ZT_PHY_SOCKFD_TYPE sock; void *uptr; // user-settable pointer ZT_PHY_SOCKADDR_STORAGE_TYPE saddr; // remote for TCP_OUT and TCP_IN, local for TCP_LISTEN, RAW, and UDP char ifname[16]; - uint64_t throughput; }; std::list _socks; @@ -198,7 +174,6 @@ private: bool _noDelay; bool _noCheck; - std::vector link_test_records; public: /** @@ -282,7 +257,9 @@ public: */ static inline void getIfName(PhySocket *s, char *nameBuf, int buflen) { - memcpy(nameBuf, reinterpret_cast(s)->ifname, buflen); + if (s) { + memcpy(nameBuf, reinterpret_cast(s)->ifname, buflen); + } } /** @@ -292,18 +269,9 @@ public: */ static inline void setIfName(PhySocket *s, char *ifname, int len) { - memcpy(&(reinterpret_cast(s)->ifname), ifname, len); - } - - /** - * Get result of most recent throughput test - * - * @param s Socket object - */ - inline uint64_t getThroughput(PhySocket *s) - { - PhySocketImpl *sws = (reinterpret_cast(s)); - return sws ? sws->throughput : 0; + if (s) { + memcpy(&(reinterpret_cast(s)->ifname), ifname, len); + } } /** @@ -339,105 +307,9 @@ public: */ inline bool isValidState(PhySocket *s) { - PhySocketImpl *sws = (reinterpret_cast(s)); - return sws->type >= ZT_PHY_SOCKET_CLOSED && sws->type <= ZT_PHY_SOCKET_UNIX_LISTEN; - } - - /** - * Send a datagram of a known size to a selected peer and record egress time. The peer - * shall eventually respond by echoing back a smaller datagram. - * - * @param s Socket object - * @param remoteAddress Address of remote peer to receive link test packet - * @param data Buffer containing random packet data - * @param len Length of packet data buffer - * @return Number of bytes successfully written to socket - */ - inline int test_link_speed(PhySocket *s, const struct sockaddr *to, void *data, uint32_t len) { - if (!reinterpret_cast(s)) { - return 0; - } - uint64_t *buf = (uint64_t*)data; - uint64_t id = buf[0]; - if (to->sa_family != AF_INET && to->sa_family != AF_INET6) { - return 0; - } - uint64_t egress_time = OSUtils::now(); - PhySocketImpl *sws = (reinterpret_cast(s)); -#if defined(_WIN32) || defined(_WIN64) - int w = ::sendto(sws->sock,reinterpret_cast(data),len,0,to,(to->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)) -#else - int w = ::sendto(sws->sock,data,len,0,to,(to->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); -#endif - if (w > 0) { - link_test_records.push_back(new link_test_record(s, id, egress_time, len)); - } - return w; - } - - /** - * Remove link speed test records which have timed-out and record a 0 bits/s measurement - */ - inline void refresh_link_speed_records() - { - for(size_t i=0;iegress_time > ZT_LINK_TEST_TIMEOUT) { - PhySocketImpl *sws = (reinterpret_cast(link_test_records[i]->s)); - if (sws) { - sws->throughput = 0; - } - link_test_records.erase(link_test_records.begin() + i); - } - } - } - - /** - * Upon receipt of a link speed test datagram we echo back only the identification portion - * - * @param s Socket object - * @param from Address of remote peer that sent this datagram - * @param data Buffer containing datagram's contents - * @param len Length of datagram - * @return Number of bytes successfully written to socket in response - */ - inline int respond_to_link_test(PhySocket *s,const struct sockaddr *from,void *data,unsigned long len) { - PhySocketImpl *sws = (reinterpret_cast(s)); - uint64_t *id = (uint64_t*)data; -#if defined(_WIN32) || defined(_WIN64) - int w = ::sendto(sws->sock,reinterpret_cast(id),sizeof(id[0]),0,from,(from->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); -#else - int w = ::sendto(sws->sock,id,sizeof(id[0]),0,from,(from->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); -#endif - return w; - } - - /** - * Upon receipt of a response to our original link test datagram, correlate this new datagram with the record - * of the one we sent. Compute the transit time and update the throughput field of the relevant socket. This - * value will later be read by the path quality estimation logic located in Path.hpp. - * - * @param s Socket object - * @param from Address of remote peer that sent this datagram - * @param data Buffer containing datagram contents (ID of original link test datagram) - * @param len Length of datagram - * @return true if datagram correponded to previous record, false if otherwise - */ - inline bool handle_link_test_response(PhySocket *s,const struct sockaddr *from,void *data,unsigned long len) { - uint64_t *id = (uint64_t*)data; - for(size_t i=0;iid == id[0]) { - float rtt = (OSUtils::now()-link_test_records[i]->egress_time) / (float)1000; // s - uint32_t sz = (link_test_records[i]->length) * 8; // bits - float transit_time = rtt / (float)2.0; - uint64_t raw = (uint64_t)(sz / transit_time); - PhySocketImpl *sws = (reinterpret_cast(s)); - if (sws) { - sws->throughput = raw; - } - delete link_test_records[i]; - link_test_records.erase(link_test_records.begin() + i); - return true; - } + if (s) { + PhySocketImpl *sws = (reinterpret_cast(s)); + return sws->type >= ZT_PHY_SOCKET_CLOSED && sws->type <= ZT_PHY_SOCKET_UNIX_LISTEN; } return false; } diff --git a/service/OneService.cpp b/service/OneService.cpp index 051629bc..cf2a6eda 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -37,7 +37,6 @@ #include "../version.h" #include "../include/ZeroTierOne.h" -#include "../include/ZeroTierDebug.h" #include "../node/Constants.hpp" #include "../node/Mutex.hpp" @@ -458,9 +457,6 @@ public: // Last potential sleep/wake event uint64_t _lastRestart; - // Last time link throughput was tested - uint64_t _lastLinkSpeedTest; - // Deadline for the next background task service function volatile int64_t _nextBackgroundTaskDeadline; @@ -881,26 +877,6 @@ public: lastMultipathModeUpdate = now; _node->setMultipathMode(_multipathMode); } - // Test link speeds - // TODO: This logic should eventually find its way into the core or as part of a passive - // measure within the protocol. - if (_multipathMode && ((now - _lastLinkSpeedTest) >= ZT_LINK_SPEED_TEST_INTERVAL)) { - _phy.refresh_link_speed_records(); - _lastLinkSpeedTest = now; - // Generate random data to fill UDP packet - uint64_t pktBuf[ZT_LINK_TEST_DATAGRAM_SZ / sizeof(uint64_t)]; - Utils::getSecureRandom(pktBuf, ZT_LINK_TEST_DATAGRAM_SZ); - ZT_PeerList *pl = _node->peers(); - std::vector sockets = _binder.getBoundSockets(); - for (int i=0; ipeerCount;++j) { - for (int k=0; k<(ZT_MAX_PEER_NETWORK_PATHS/4); k++) { - Utils::getSecureRandom(pktBuf, 8); // generate one random integer for unique id - _phy.test_link_speed(sockets[i], (struct sockaddr*)&(pl->peers[j].paths[k].address), pktBuf, ZT_LINK_TEST_DATAGRAM_SZ); - } - } - } - } // Run background task processor in core if it's time to do so int64_t dl = _nextBackgroundTaskDeadline; @@ -1799,15 +1775,6 @@ public: inline void phyOnDatagram(PhySocket *sock,void **uptr,const struct sockaddr *localAddr,const struct sockaddr *from,void *data,unsigned long len) { - if (_multipathMode) { - // Handle link test packets (should eventually be moved into the protocol itself) - if (len == ZT_LINK_TEST_DATAGRAM_SZ) { - _phy.respond_to_link_test(sock, from, data, len); - } - if (len == ZT_LINK_TEST_DATAGRAM_RESPONSE_SZ) { - _phy.handle_link_test_response(sock, from, data, len); - } - } if ((len >= 16)&&(reinterpret_cast(from)->ipScope() == InetAddress::IP_SCOPE_GLOBAL)) _lastDirectReceiveFromGlobal = OSUtils::now(); const ZT_ResultCode rc = _node->processWirePacket( -- cgit v1.2.3 From 91a22a686a17f8cedc58690592a84f5d4a3ab5e0 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Fri, 1 Jun 2018 18:03:59 -0700 Subject: Added auto-escalation to multipath if both peers support it. Improved QoS/ACK tracking. Related bug fixes --- node/Constants.hpp | 27 +++++++-- node/IncomingPacket.cpp | 52 +++++++++-------- node/Path.hpp | 148 ++++++++++++++++++++++++++++++++---------------- node/Peer.cpp | 51 +++++++++-------- node/Peer.hpp | 34 ++++++++--- 5 files changed, 200 insertions(+), 112 deletions(-) (limited to 'node') diff --git a/node/Constants.hpp b/node/Constants.hpp index 227497de..fddfce6a 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -334,6 +334,11 @@ #define ZT_PATH_CONTRIB_THROUGHPUT 1.50 / 3.0 #define ZT_PATH_CONTRIB_SCOPE 0.75 / 3.0 +/** + * How often a QoS packet is sent + */ +#define ZT_PATH_QOS_INTERVAL 3000 + /** * Min and max acceptable sizes for a VERB_QOS_MEASUREMENT packet */ @@ -343,7 +348,22 @@ /** * How many ID:sojourn time pairs in a single QoS packet */ -#define ZT_PATH_QOS_TABLE_SIZE (ZT_PATH_MAX_QOS_PACKET_SZ * 8) / (64 + 8) +#define ZT_PATH_QOS_TABLE_SIZE (ZT_PATH_MAX_QOS_PACKET_SZ * 8) / (64 + 16) + +/** + * Maximum number of outgoing packets we monitor for QoS information + */ +#define ZT_PATH_MAX_OUTSTANDING_QOS_RECORDS 128 + +/** + * How often we check the age of QoS records + */ +#define ZT_PATH_QOS_RECORD_PURGE_INTERVAL 1000 + +/** + * Timeout for QoS records + */ +#define ZT_PATH_QOS_TIMEOUT ZT_PATH_QOS_INTERVAL * 2 /** * How often the service tests the path throughput @@ -355,11 +375,6 @@ */ #define ZT_PATH_ACK_INTERVAL 250 -/** - * How often a QoS packet is sent - */ -#define ZT_PATH_QOS_INTERVAL 1000 - /** * How often an aggregate link statistics report is emitted into this tracing system */ diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index e7227412..c6d19021 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -206,43 +206,45 @@ bool IncomingPacket::_doACK(const RuntimeEnvironment *RR,void *tPtr,const Shared { /* Dissect incoming ACK packet. From this we can estimate current throughput of the path, establish known * maximums and detect packet loss. */ - - if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + if (peer->localMultipathSupport()) { int32_t ackedBytes; - memcpy(&ackedBytes, payload(), sizeof(int32_t)); + if (payloadLength() != sizeof(ackedBytes)) { + return true; // ignore + } + memcpy(&ackedBytes, payload(), sizeof(ackedBytes)); _path->receivedAck(RR->node->now(), Utils::ntoh(ackedBytes)); + peer->inferRemoteMultipathEnabled(); } return true; } - bool IncomingPacket::_doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) { - /* Dissect incoming QoS packet. From this we can compute latency values and their variance. * The latency variance is used as a measure of "jitter". */ - - if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { - if (payloadLength() < ZT_PATH_MAX_QOS_PACKET_SZ && payloadLength() > ZT_PATH_MIN_QOS_PACKET_SZ) { - const int64_t now = RR->node->now(); - uint64_t rx_id[ZT_PATH_QOS_TABLE_SIZE]; - uint8_t rx_ts[ZT_PATH_QOS_TABLE_SIZE]; - char *begin = (char *)payload(); - char *ptr = begin; - int count = 0; - int len = payloadLength(); - // Read packet IDs and latency compensation intervals for each packet tracked by thie QoS packet - while (ptr < (begin + len)) { - memcpy((void*)&rx_id[count], ptr, sizeof(uint64_t)); - rx_id[count] = Utils::ntoh(rx_id[count]); - ptr+=sizeof(uint64_t); - memcpy((void*)&rx_ts[count], ptr, sizeof(uint8_t)); - ptr+=sizeof(uint8_t); - count++; - } - _path->receivedQoS(now, count, rx_id, rx_ts); + if (peer->localMultipathSupport()) { + if (payloadLength() > ZT_PATH_MAX_QOS_PACKET_SZ || payloadLength() < ZT_PATH_MIN_QOS_PACKET_SZ) { + return true; // ignore } + const int64_t now = RR->node->now(); + uint64_t rx_id[ZT_PATH_QOS_TABLE_SIZE]; + uint16_t rx_ts[ZT_PATH_QOS_TABLE_SIZE]; + char *begin = (char *)payload(); + char *ptr = begin; + int count = 0; + int len = payloadLength(); + // Read packet IDs and latency compensation intervals for each packet tracked by thie QoS packet + while (ptr < (begin + len) && (count < ZT_PATH_QOS_TABLE_SIZE)) { + memcpy((void*)&rx_id[count], ptr, sizeof(uint64_t)); + ptr+=sizeof(uint64_t); + memcpy((void*)&rx_ts[count], ptr, sizeof(uint16_t)); + ptr+=sizeof(uint16_t); + count++; + } + _path->receivedQoS(now, count, rx_id, rx_ts); + peer->inferRemoteMultipathEnabled(); } + return true; } diff --git a/node/Path.hpp b/node/Path.hpp index 0278d919..e7448190 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -40,6 +40,7 @@ #include "AtomicCounter.hpp" #include "Utils.hpp" #include "RingBuffer.hpp" +#include "Packet.hpp" #include "../osdep/Phy.hpp" @@ -105,9 +106,11 @@ public: _lastAck(0), _lastThroughputEstimation(0), _lastQoSMeasurement(0), + _lastQoSRecordPurge(0), _unackedBytes(0), _expectingAckAsOf(0), _packetsReceivedSinceLastAck(0), + _packetsReceivedSinceLastQoS(0), _meanThroughput(0.0), _maxLifetimeThroughput(0), _bytesAckedSinceLastThroughputEstimation(0), @@ -133,9 +136,11 @@ public: _lastAck(0), _lastThroughputEstimation(0), _lastQoSMeasurement(0), + _lastQoSRecordPurge(0), _unackedBytes(0), _expectingAckAsOf(0), _packetsReceivedSinceLastAck(0), + _packetsReceivedSinceLastQoS(0), _meanThroughput(0.0), _maxLifetimeThroughput(0), _bytesAckedSinceLastThroughputEstimation(0), @@ -147,6 +152,7 @@ public: _lastComputedRelativeQuality(0) { prepareBuffers(); + _phy->getIfName((PhySocket *)((uintptr_t)_localSocket), _ifname, 16); } ~Path() @@ -295,17 +301,52 @@ public: } /** - * Take note that we're expecting a VERB_ACK on this path as of a specific time + * Record statistics on outgoing packets. Used later to estimate QoS metrics. * * @param now Current time - * @param packetId ID of the packet - * @param payloadLength Number of bytes we're is expecting a reply to + * @param packetId ID of packet + * @param payloadLength Length of payload + * @param verb Packet verb */ - inline void expectingAck(int64_t now, int64_t packetId, uint16_t payloadLength) + inline void recordOutgoingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb) { - _expectingAckAsOf = ackAge(now) > ZT_PATH_ACK_INTERVAL ? _expectingAckAsOf : now; - _unackedBytes += payloadLength; - _outgoingPacketRecords[packetId] = now; + Mutex::Lock _l(_statistics_m); + if (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME) { + if (packetId % 2 == 0) { // even -> use for ACK + _unackedBytes += payloadLength; + // Take note that we're expecting a VERB_ACK on this path as of a specific time + _expectingAckAsOf = ackAge(now) > ZT_PATH_ACK_INTERVAL ? _expectingAckAsOf : now; + } + else { // odd -> use for QoS + if (_outQoSRecords.size() < ZT_PATH_MAX_OUTSTANDING_QOS_RECORDS) { + _outQoSRecords[packetId] = now; + } + } + } + } + + /** + * Record statistics on incoming packets. Used later to estimate QoS metrics. + * + * @param now Current time + * @param packetId ID of packet + * @param payloadLength Length of payload + * @param verb Packet verb + */ + inline void recordIncomingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb) + { + Mutex::Lock _l(_statistics_m); + if (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME) { + if (packetId % 2 == 0) { // even -> use for ACK + _inACKRecords[packetId] = payloadLength; + _packetsReceivedSinceLastAck++; + } + else { // odd -> use for QoS + _inQoSRecords[packetId] = now; + _packetsReceivedSinceLastQoS++; + } + _packetValiditySamples->push(true); + } } /** @@ -335,9 +376,12 @@ public: */ inline int32_t bytesToAck() { + Mutex::Lock _l(_statistics_m); int32_t bytesToAck = 0; - for (int i=0; i<_packetsReceivedSinceLastAck; i++) { - bytesToAck += _recorded_len[i]; + std::map::iterator it = _inACKRecords.begin(); + while (it != _inACKRecords.end()) { + bytesToAck += it->second; + it++; } return bytesToAck; } @@ -357,9 +401,8 @@ public: */ inline void sentAck(int64_t now) { - memset(_recorded_id, 0, sizeof(_recorded_id)); - memset(_recorded_ts, 0, sizeof(_recorded_ts)); - memset(_recorded_len, 0, sizeof(_recorded_len)); + Mutex::Lock _l(_statistics_m); + _inACKRecords.clear(); _packetsReceivedSinceLastAck = 0; _lastAck = now; } @@ -373,17 +416,19 @@ public: * @param rx_id table of packet IDs * @param rx_ts table of holding times */ - inline void receivedQoS(int64_t now, int count, uint64_t *rx_id, uint8_t *rx_ts) + inline void receivedQoS(int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts) { + Mutex::Lock _l(_statistics_m); // Look up egress times and compute latency values for each record + std::map::iterator it; for (int j=0; j::iterator it = _outgoingPacketRecords.find(rx_id[j]); - if (it != _outgoingPacketRecords.end()) { + it = _outQoSRecords.find(rx_id[j]); + if (it != _outQoSRecords.end()) { uint16_t rtt = (uint16_t)(now - it->second); uint16_t rtt_compensated = rtt - rx_ts[j]; float latency = rtt_compensated / 2.0; updateLatency(latency, now); - _outgoingPacketRecords.erase(it); + _outQoSRecords.erase(it); } } } @@ -397,15 +442,20 @@ public: */ inline int32_t generateQoSPacket(int64_t now, char *qosBuffer) { + Mutex::Lock _l(_statistics_m); int32_t len = 0; - for (int i=0; i<_packetsReceivedSinceLastAck; i++) { - uint64_t id = _recorded_id[i]; + std::map::iterator it = _inQoSRecords.begin(); + int i=0; + while (i<_packetsReceivedSinceLastQoS && it != _inQoSRecords.end()) { + uint64_t id = it->first; memcpy(qosBuffer, &id, sizeof(uint64_t)); qosBuffer+=sizeof(uint64_t); - uint8_t holdingTime = (uint8_t)(now - _recorded_ts[i]); - memcpy(qosBuffer, &holdingTime, sizeof(uint8_t)); - qosBuffer+=sizeof(uint8_t); - len+=sizeof(uint64_t)+sizeof(uint8_t); + uint16_t holdingTime = (now - it->second); + memcpy(qosBuffer, &holdingTime, sizeof(uint16_t)); + qosBuffer+=sizeof(uint16_t); + len+=sizeof(uint64_t)+sizeof(uint16_t); + _inQoSRecords.erase(it++); + i++; } return len; } @@ -415,22 +465,9 @@ public: * * @param Current time */ - inline void sentQoS(int64_t now) { _lastQoSMeasurement = now; } - - /** - * Record statistics on incoming packets. Used later to estimate QoS. - * - * @param now Current time - * @param packetId - * @param payloadLength - */ - inline void recordIncomingPacket(int64_t now, int64_t packetId, int32_t payloadLength) - { - _recorded_ts[_packetsReceivedSinceLastAck] = now; - _recorded_id[_packetsReceivedSinceLastAck] = packetId; - _recorded_len[_packetsReceivedSinceLastAck] = payloadLength; - _packetsReceivedSinceLastAck++; - _packetValiditySamples->push(true); + inline void sentQoS(int64_t now) { + _packetsReceivedSinceLastQoS = 0; + _lastQoSMeasurement = now; } /** @@ -447,8 +484,8 @@ public: * @return Whether a QoS (VERB_QOS_MEASUREMENT) packet needs to be emitted at this time */ inline bool needsToSendQoS(int64_t now) { - return ((_packetsReceivedSinceLastAck >= ZT_PATH_QOS_TABLE_SIZE) || - ((now - _lastQoSMeasurement) > ZT_PATH_QOS_INTERVAL)) && _packetsReceivedSinceLastAck; + return ((_packetsReceivedSinceLastQoS >= ZT_PATH_QOS_TABLE_SIZE) || + ((now - _lastQoSMeasurement) > ZT_PATH_QOS_INTERVAL)) && _packetsReceivedSinceLastQoS; } /** @@ -523,15 +560,16 @@ public: inline char *getAddressString() { return _addrString; } /** - * Compute and cache stability and performance metrics. The resultant stability coefficint is a measure of how "well behaved" + * Compute and cache stability and performance metrics. The resultant stability coefficient is a measure of how "well behaved" * this path is. This figure is substantially different from (but required for the estimation of the path's overall "quality". * * @param now Current time */ inline void processBackgroundPathMeasurements(int64_t now, const int64_t peerId) { + Mutex::Lock _l(_statistics_m); + // Compute path stability if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) { _lastPathQualityComputeTime = now; - _phy->getIfName((PhySocket *)((uintptr_t)_localSocket), _ifname, 16); address().toString(_addrString); _meanThroughput = _throughputSamples->mean(); _meanLatency = _latencySamples->mean(); @@ -556,6 +594,17 @@ public: _lastComputedStability *= 1 - _packetErrorRatio; _qualitySamples->push(_lastComputedStability); } + // Prevent QoS records from sticking around for too long + if (now - _lastQoSRecordPurge > ZT_PATH_QOS_RECORD_PURGE_INTERVAL) + { + std::map::iterator it = _outQoSRecords.begin(); + while (it != _outQoSRecords.end()) { + // Time since egress of tracked packet + if ((now - it->second) >= ZT_PATH_QOS_TIMEOUT) { + _outQoSRecords.erase(it++); + } else { it++; } + } + } } /** @@ -592,13 +641,12 @@ public: _qualitySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); _packetValiditySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); memset(_ifname, 0, 16); - memset(_recorded_id, 0, sizeof(_recorded_id)); - memset(_recorded_ts, 0, sizeof(_recorded_ts)); - memset(_recorded_len, 0, sizeof(_recorded_len)); memset(_addrString, 0, sizeof(_addrString)); } private: + Mutex _statistics_m; + volatile int64_t _lastOut; volatile int64_t _lastIn; volatile int64_t _lastTrustEstablishedPacketReceived; @@ -609,19 +657,19 @@ private: InetAddress::IpScope _ipScope; // memoize this since it's a computed value checked often AtomicCounter __refCount; - uint64_t _recorded_id[ZT_PATH_QOS_TABLE_SIZE]; - uint64_t _recorded_ts[ZT_PATH_QOS_TABLE_SIZE]; - uint16_t _recorded_len[ZT_PATH_QOS_TABLE_SIZE]; - - std::map _outgoingPacketRecords; + std::map _outQoSRecords; // id:egress_time + std::map _inQoSRecords; // id:now + std::map _inACKRecords; // id:len int64_t _lastAck; int64_t _lastThroughputEstimation; int64_t _lastQoSMeasurement; + int64_t _lastQoSRecordPurge; int64_t _unackedBytes; int64_t _expectingAckAsOf; int16_t _packetsReceivedSinceLastAck; + int16_t _packetsReceivedSinceLastQoS; float _meanThroughput; uint64_t _maxLifetimeThroughput; diff --git a/node/Peer.cpp b/node/Peer.cpp index 8deaa362..a22bbffe 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -64,6 +64,7 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _credentialsCutoffCount(0), _linkIsBalanced(false), _linkIsRedundant(false), + _remotePeerMultipathEnabled(false), _lastAggregateStatsReport(0) { if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) @@ -104,8 +105,10 @@ void Peer::received( { Mutex::Lock _l(_paths_m); - if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { - recordIncomingPacket(tPtr, path, packetId, payloadLength, verb, now); + + recordIncomingPacket(tPtr, path, packetId, payloadLength, verb, now); + + if (canUseMultipath()) { if (path->needsToSendQoS(now)) { sendQOS_MEASUREMENT(tPtr, path, path->localSocket(), path->address(), now); } @@ -167,7 +170,7 @@ void Peer::received( // If we find a pre-existing path with the same address, just replace it. // If we don't find anything we can replace, just use the replacePath that we previously decided on. - if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + if (canUseMultipath()) { for(unsigned int i=0;iaddress().ss_family == path->address().ss_family && _paths[i].p->address().ipsEqual2(path->address())) { @@ -269,21 +272,19 @@ void Peer::received( void Peer::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now) { - if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { - if (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME) { - path->expectingAck(now, packetId, payloadLength); - } + if (localMultipathSupport()) { + path->recordOutgoingPacket(now, packetId, payloadLength, verb); } } void Peer::recordIncomingPacket(void *tPtr, const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now) { - if (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME) { + if (localMultipathSupport()) { if (path->needsToSendAck(now)) { sendACK(tPtr, path, path->localSocket(), path->address(), now); } - path->recordIncomingPacket(now, packetId, payloadLength); + path->recordIncomingPacket(now, packetId, payloadLength, verb); } } @@ -384,9 +385,9 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) /** * Send traffic across the highest quality path only. This algorithm will still - * use the old path quality metric. + * use the old path quality metric from protocol version 9. */ - if (RR->node->getMultipathMode() == ZT_MULTIPATH_NONE) { + if (!canUseMultipath()) { long bestPathQuality = 2147483647; for(unsigned int i=0;i Peer::getAppropriatePath(int64_t now, bool includeExpired) stalePaths[numStalePaths] = i; numStalePaths++; } - // Record a default path to use as a short-circuit for the rest of the algorithm + // Record a default path to use as a short-circuit for the rest of the algorithm (if needed) bestPath = i; } } + + // Compare paths to each-other + float qualityScalingFactor = computeAggregateLinkRelativeQuality(now); + if (numAlivePaths == 0 && numStalePaths == 0) { return SharedPtr(); } if (numAlivePaths == 1 || numStalePaths == 1) { return _paths[bestPath].p; } - // Compare paths to each-other - float qualityScalingFactor = computeAggregateLinkRelativeQuality(now); + // Convert set of relative performances into an allocation set for(uint16_t i=0;igetName(), ipvStr, currentAllocation); + sprintf(tmp, "(%s, %s, %.3f)", _paths[i].p->getName(), ipvStr, currentAllocation); // Prevent duplicates if(ifnamemap[_paths[i].p->getName()] != ipv) { memcpy(ptr, tmp, strlen(tmp)); @@ -543,7 +547,7 @@ char *Peer::interfaceListStr() } ptr--; // Overwrite trailing space if (imbalanced) { - sprintf(tmp, ", is IMBALANCED"); + sprintf(tmp, ", is asymmetrical"); memcpy(ptr, tmp, sizeof(tmp)); } else { *ptr = '\0'; @@ -688,10 +692,11 @@ void Peer::sendACK(void *tPtr,const SharedPtr &path,const int64_t localSoc void Peer::sendQOS_MEASUREMENT(void *tPtr,const SharedPtr &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now) { + const int64_t _now = RR->node->now(); Packet outp(_id.address(),RR->identity.address(),Packet::VERB_QOS_MEASUREMENT); char qosData[ZT_PATH_MAX_QOS_PACKET_SZ]; - path->generateQoSPacket(now,qosData); - outp.append(qosData,sizeof(qosData)); + int16_t len = path->generateQoSPacket(_now,qosData); + outp.append(qosData,len); if (atAddress) { outp.armor(_key,false); RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size()); @@ -775,17 +780,15 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) const bool sendFullHello = ((now - _lastSentFullHello) >= ZT_PEER_PING_PERIOD); _lastSentFullHello = now; - // Emit traces regarding the status of aggregate links - if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + // Emit traces regarding aggregate link status + if (canUseMultipath()) { int alivePathCount = aggregateLinkPhysicalPathCount(); if ((now - _lastAggregateStatsReport) > ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL) { _lastAggregateStatsReport = now; if (alivePathCount) { RR->t->peerLinkAggregateStatistics(NULL,*this); } - } - // Report link redundancy - if (alivePathCount < 2 && _linkIsRedundant) { + } if (alivePathCount < 2 && _linkIsRedundant) { _linkIsRedundant = !_linkIsRedundant; RR->t->peerLinkNoLongerRedundant(NULL,*this); } if (alivePathCount > 1 && !_linkIsRedundant) { @@ -821,7 +824,7 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) } } else break; } - if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + if (canUseMultipath()) { while(j < ZT_MAX_PEER_NETWORK_PATHS) { _paths[j].lr = 0; _paths[j].p.zero(); diff --git a/node/Peer.hpp b/node/Peer.hpp index 2f723b07..6e2f1d08 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -27,18 +27,13 @@ #ifndef ZT_PEER_HPP #define ZT_PEER_HPP -#include - -#include "Constants.hpp" - -#include -#include #include -#include #include "../include/ZeroTierOne.h" +#include "Constants.hpp" #include "RuntimeEnvironment.hpp" +#include "Node.hpp" #include "Path.hpp" #include "Address.hpp" #include "Utils.hpp" @@ -416,6 +411,29 @@ public: inline bool remoteVersionKnown() const { return ((_vMajor > 0)||(_vMinor > 0)||(_vRevision > 0)); } + /** + * Record that the remote peer does have multipath enabled. As is evident by the receipt of a VERB_ACK + * or a VERB_QOS_MEASUREMENT packet at some point in the past. Until this flag is set, the local client + * shall assume that multipath is not enabled and should only use classical Protocol 9 logic. + */ + inline void inferRemoteMultipathEnabled() { _remotePeerMultipathEnabled = true; } + + /** + * @return Whether the local client supports and is configured to use multipath + */ + inline bool localMultipathSupport() { return ((RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) && (ZT_PROTO_VERSION > 9)); } + + /** + * @return Whether the remote peer supports and is configured to use multipath + */ + inline bool remoteMultipathSupport() { return (_remotePeerMultipathEnabled && (_vProto > 9)); } + + /** + * @return Whether this client can use multipath to communicate with this peer. True if both peers are using + * the correct protocol and if both peers have multipath enabled. False if otherwise. + */ + inline bool canUseMultipath() { return (localMultipathSupport() && remoteMultipathSupport()); } + /** * @return True if peer has received a trust established packet (e.g. common network membership) in the past ZT_TRUST_EXPIRATION ms */ @@ -624,6 +642,8 @@ private: bool _linkIsBalanced; bool _linkIsRedundant; + bool _remotePeerMultipathEnabled; + uint64_t _lastAggregateStatsReport; char _interfaceListStr[256]; // 16 characters * 16 paths in a link -- cgit v1.2.3 From b6d97af4514ec433bfbb6d8e6b696a8d62e98bef Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Thu, 7 Jun 2018 15:26:18 -0700 Subject: Added rate gates for QOS and ACK packets --- node/Constants.hpp | 17 +++++++++++++++++ node/IncomingPacket.cpp | 4 ++++ node/Peer.hpp | 28 ++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+) (limited to 'node') diff --git a/node/Constants.hpp b/node/Constants.hpp index fddfce6a..b85c1b93 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -274,6 +274,23 @@ */ #define ZT_MULTIPATH_BINDER_REFRESH_PERIOD 5000 +/** + * Time horizon for VERB_QOS_MEASUREMENT and VERB_ACK packet processesing cutoff + */ +#define ZT_PATH_QOS_ACK_CUTOFF_TIME 30000 + +/** + * Maximum number of VERB_QOS_MEASUREMENT and VERB_ACK packets allowed to be + * processesed within cutoff time. Separate totals are kept for each type but + * the limit is the same for both. + * + * This limits how often this peer will compute statistical estimates + * of various QoS measures from a VERB_QOS_MEASUREMENT or VERB_ACK packets to + * CUTOFF_LIMIT times per CUTOFF_TIME milliseconds per peer to prevent + * this from being useful for DOS amplification attacks. + */ +#define ZT_PATH_QOS_ACK_CUTOFF_LIMIT 16 + /** * Path choice history window size. This is used to keep track of which paths were * previously selected so that we can maintain a target allocation over time. diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index c6d19021..70dcff5d 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -204,6 +204,8 @@ bool IncomingPacket::_doERROR(const RuntimeEnvironment *RR,void *tPtr,const Shar bool IncomingPacket::_doACK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) { + if (!peer->rateGateACK(RR->node->now())) + return true; /* Dissect incoming ACK packet. From this we can estimate current throughput of the path, establish known * maximums and detect packet loss. */ if (peer->localMultipathSupport()) { @@ -220,6 +222,8 @@ bool IncomingPacket::_doACK(const RuntimeEnvironment *RR,void *tPtr,const Shared } bool IncomingPacket::_doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) { + if (!peer->rateGateQoS(RR->node->now())) + return true; /* Dissect incoming QoS packet. From this we can compute latency values and their variance. * The latency variance is used as a measure of "jitter". */ if (peer->localMultipathSupport()) { diff --git a/node/Peer.hpp b/node/Peer.hpp index 6e2f1d08..8c2c496b 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -523,6 +523,30 @@ public: return false; } + /** + * Rate limit gate for VERB_QOS_MEASUREMENT + */ + inline bool rateGateQoS(const int64_t now) + { + if ((now - _lastQoSReceive) <= ZT_PATH_QOS_ACK_CUTOFF_TIME) + ++_QoSCutoffCount; + else _QoSCutoffCount = 0; + _lastQoSReceive = now; + return (_QoSCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT); + } + + /** + * Rate limit gate for VERB_ACK + */ + inline bool rateGateACK(const int64_t now) + { + if ((now - _lastACKReceive) <= ZT_PATH_QOS_ACK_CUTOFF_TIME) + ++_ACKCutoffCount; + else _ACKCutoffCount = 0; + _lastACKReceive = now; + return (_ACKCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT); + } + /** * Serialize a peer for storage in local cache * @@ -620,6 +644,8 @@ private: int64_t _lastComRequestSent; int64_t _lastCredentialsReceived; int64_t _lastTrustEstablishedPacketReceived; + int64_t _lastQoSReceive; + int64_t _lastACKReceive; int64_t _lastSentFullHello; int64_t _lastPathPrune; @@ -635,6 +661,8 @@ private: unsigned int _directPathPushCutoffCount; unsigned int _credentialsCutoffCount; + unsigned int _QoSCutoffCount; + unsigned int _ACKCutoffCount; AtomicCounter __refCount; -- cgit v1.2.3 From 9681fedbb44a25ffa1108b88a4795e017746ca6c Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Thu, 7 Jun 2018 17:25:27 -0700 Subject: Spellcheck sweep across codebase --- RELEASE-NOTES.md | 8 ++++---- include/ZeroTierOne.h | 8 ++++---- node/Capability.hpp | 6 +++--- node/CertificateOfMembership.hpp | 2 +- node/Constants.hpp | 4 ++-- node/Identity.cpp | 4 ++-- node/IncomingPacket.cpp | 2 +- node/InetAddress.cpp | 26 +++++++++++++------------- node/MulticastGroup.hpp | 2 +- node/Network.hpp | 2 +- node/NetworkConfig.hpp | 2 +- node/Packet.hpp | 18 +++++++++--------- node/Path.hpp | 6 +++--- node/Peer.cpp | 2 +- node/Peer.hpp | 2 +- node/Salsa20.cpp | 2 +- node/SelfAwareness.cpp | 2 +- node/Tag.hpp | 2 +- node/Topology.cpp | 2 +- osdep/OSUtils.cpp | 2 +- osdep/WindowsEthernetTap.hpp | 2 +- service/OneService.cpp | 20 ++++---------------- 22 files changed, 57 insertions(+), 69 deletions(-) (limited to 'node') diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 54dd1375..bec144f0 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -36,7 +36,7 @@ ZeroTier Release Notes * Fixed two very rare multithreading issues that were only observed on certain systems * Platform-Specific Changes * MacOS - * Installer now loads the kernel extension right away so that High Sierra users will see the prompt to authorize it. This is done in the "Security & Privacy" preference pane and must be done driectly on the console (not via remote desktop). On High Sierra and newer kexts must be authorized at the console via security settings system preferences pane. + * Installer now loads the kernel extension right away so that High Sierra users will see the prompt to authorize it. This is done in the "Security & Privacy" preference pane and must be done directly on the console (not via remote desktop). On High Sierra and newer kexts must be authorized at the console via security settings system preferences pane. * Windows * The Windows installer should now install the driver without requiring a special prompt in most cases. This should make it easier for our packages to be accepted into and updated in the Chocolatey repository and should make it easier to perform remote installs across groups of machines using IT management and provisioning tools. * The Windows official packages are now signed with an EV certificate (with hardware key). @@ -78,7 +78,7 @@ The largest new feature in 1.2.0, and the product of many months of work, is our Rules allow you to filter packets on your network and vector traffic to security observers. Security observation can be performed in-band using REDIRECT or out of band using TEE. -Tags and capabilites provide advanced methods for implementing fine grained permission structures and micro-segmentation schemes without bloating the size and complexity of your rules table. +Tags and capabilities provide advanced methods for implementing fine grained permission structures and micro-segmentation schemes without bloating the size and complexity of your rules table. See the [rules engine announcement blog post](https://www.zerotier.com/blog/?p=927) for an in-depth discussion of theory and implementation. The [manual](https://www.zerotier.com/manual.shtml) contains detailed information on rule, tag, and capability use, and the `rule-compiler/` subfolder of the ZeroTier source tree contains a JavaScript function to compile rules in our human-readable rule definition language into rules suitable for import into a network controller. (ZeroTier Central uses this same script to compile rules on [my.zerotier.com](https://my.zerotier.com/).) @@ -161,7 +161,7 @@ A special kind of public network called an ad-hoc network may be accessed by joi | Start of port range (hex) Reserved ZeroTier address prefix indicating a controller-less network -Ad-hoc networks are public (no access control) networks that have no network controller. Instead their configuration and other credentials are generated locally. Ad-hoc networks permit only IPv6 UDP and TCP unicast traffic (no multicast or broadcast) using 6plane format NDP-emulated IPv6 addresses. In addition an ad-hoc network ID encodes an IP port range. UDP packets and TCP SYN (connection open) packets are only allowed to desintation ports within the encoded range. +Ad-hoc networks are public (no access control) networks that have no network controller. Instead their configuration and other credentials are generated locally. Ad-hoc networks permit only IPv6 UDP and TCP unicast traffic (no multicast or broadcast) using 6plane format NDP-emulated IPv6 addresses. In addition an ad-hoc network ID encodes an IP port range. UDP packets and TCP SYN (connection open) packets are only allowed to destination ports within the encoded range. For example `ff00160016000000` is an ad-hoc network allowing only SSH, while `ff0000ffff000000` is an ad-hoc network allowing any UDP or TCP port. @@ -176,7 +176,7 @@ If you have data in an old SQLite3 controller we've included a NodeJS script in ## Major Bug Fixes in 1.2.0 * **The Windows HyperV 100% CPU bug is FINALLY DEAD**: This long-running problem turns out to have been an issue with Windows itself, but one we were triggering by placing invalid data into the Windows registry. Microsoft is aware of the issue but we've also fixed the triggering problem on our side. ZeroTier should now co-exist quite well with HyperV and should now be able to be bridged with a HyperV virtual switch. - * **Segmenation faults on musl-libc based Linux systems**: Alpine Linux and some embedded Linux systems that use musl libc (a minimal libc) experienced segmentation faults. These were due to a smaller default stack size. A work-around that sets the stack size for new threads has been added. + * **Segmentation faults on musl-libc based Linux systems**: Alpine Linux and some embedded Linux systems that use musl libc (a minimal libc) experienced segmentation faults. These were due to a smaller default stack size. A work-around that sets the stack size for new threads has been added. * **Windows firewall blocks local JSON API**: On some Windows systems the firewall likes to block 127.0.0.1:9993 for mysterious reasons. This is now fixed in the installer via the addition of another firewall exemption rule. * **UI crash on embedded Windows due to missing fonts**: The MSI installer now ships fonts and will install them if they are not present, so this should be fixed. diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index 15e15bd1..b3927c2e 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -411,7 +411,7 @@ enum ZT_ResultCode ZT_RESULT_ERROR_UNSUPPORTED_OPERATION = 1001, /** - * The requestion operation was given a bad parameter or was called in an invalid state + * The requested operation was given a bad parameter or was called in an invalid state */ ZT_RESULT_ERROR_BAD_PARAMETER = 1002 }; @@ -1498,7 +1498,7 @@ typedef int (*ZT_WirePacketSendFunction)( /** * Function to check whether a path should be used for ZeroTier traffic * - * Paramters: + * Parameters: * (1) Node * (2) User pointer * (3) ZeroTier address or 0 for none/any @@ -1531,7 +1531,7 @@ typedef int (*ZT_PathCheckFunction)( * (1) Node * (2) User pointer * (3) ZeroTier address (least significant 40 bits) - * (4) Desried address family or -1 for any + * (4) Desired address family or -1 for any * (5) Buffer to fill with result * * If provided this function will be occasionally called to get physical @@ -1696,7 +1696,7 @@ ZT_SDK_API enum ZT_ResultCode ZT_Node_processBackgroundTasks(ZT_Node *node,void * Join a network * * This may generate calls to the port config callback before it returns, - * or these may be deffered if a netconf is not available yet. + * or these may be differed if a netconf is not available yet. * * If we are already a member of the network, nothing is done and OK is * returned. diff --git a/node/Capability.hpp b/node/Capability.hpp index 91a46566..775532d9 100644 --- a/node/Capability.hpp +++ b/node/Capability.hpp @@ -52,7 +52,7 @@ class RuntimeEnvironment; * (1) Evaluates its capabilities in ascending order of ID to determine * which capability allows it to transmit this packet. * (2) If it has not done so lately, it then sends this capability to the - * receving peer ("presents" it). + * receiving peer ("presents" it). * (3) The sender then sends the packet. * * On the receiving side the receiver evaluates the capabilities presented @@ -64,7 +64,7 @@ class RuntimeEnvironment; * * Capabilities support a chain of custody. This is currently unused but * in the future would allow the publication of capabilities that can be - * handed off between nodes. Limited transferrability of capabilities is + * handed off between nodes. Limited transferability of capabilities is * a feature of true capability based security. */ class Capability : public Credential @@ -81,7 +81,7 @@ public: * @param id Capability ID * @param nwid Network ID * @param ts Timestamp (at controller) - * @param mccl Maximum custody chain length (1 to create non-transferrable capability) + * @param mccl Maximum custody chain length (1 to create non-transferable capability) * @param rules Network flow rules for this capability * @param ruleCount Number of flow rules */ diff --git a/node/CertificateOfMembership.hpp b/node/CertificateOfMembership.hpp index b5a90007..7fd38ad7 100644 --- a/node/CertificateOfMembership.hpp +++ b/node/CertificateOfMembership.hpp @@ -243,7 +243,7 @@ public: * Compare two certificates for parameter agreement * * This compares this certificate with the other and returns true if all - * paramters in this cert are present in the other and if they agree to + * parameters in this cert are present in the other and if they agree to * within this cert's max delta value for each given parameter. * * Tuples present in other but not in this cert are ignored, but any diff --git a/node/Constants.hpp b/node/Constants.hpp index b85c1b93..d86775c5 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -275,13 +275,13 @@ #define ZT_MULTIPATH_BINDER_REFRESH_PERIOD 5000 /** - * Time horizon for VERB_QOS_MEASUREMENT and VERB_ACK packet processesing cutoff + * Time horizon for VERB_QOS_MEASUREMENT and VERB_ACK packet processing cutoff */ #define ZT_PATH_QOS_ACK_CUTOFF_TIME 30000 /** * Maximum number of VERB_QOS_MEASUREMENT and VERB_ACK packets allowed to be - * processesed within cutoff time. Separate totals are kept for each type but + * processed within cutoff time. Separate totals are kept for each type but * the limit is the same for both. * * This limits how often this peer will compute statistical estimates diff --git a/node/Identity.cpp b/node/Identity.cpp index 03f27083..1687746b 100644 --- a/node/Identity.cpp +++ b/node/Identity.cpp @@ -50,8 +50,8 @@ static inline void _computeMemoryHardHash(const void *publicKey,unsigned int pub SHA512::hash(digest,publicKey,publicKeyBytes); // Initialize genmem[] using Salsa20 in a CBC-like configuration since - // ordinary Salsa20 is randomly seekable. This is good for a cipher - // but is not what we want for sequential memory-harndess. + // ordinary Salsa20 is randomly seek-able. This is good for a cipher + // but is not what we want for sequential memory-hardness. memset(genmem,0,ZT_IDENTITY_GEN_MEMORY); Salsa20 s20(digest,(char *)digest + 32); s20.crypt20((char *)genmem,(char *)genmem,64); diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index 70dcff5d..6bb9734c 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -237,7 +237,7 @@ bool IncomingPacket::_doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr, char *ptr = begin; int count = 0; int len = payloadLength(); - // Read packet IDs and latency compensation intervals for each packet tracked by thie QoS packet + // Read packet IDs and latency compensation intervals for each packet tracked by this QoS packet while (ptr < (begin + len) && (count < ZT_PATH_QOS_TABLE_SIZE)) { memcpy((void*)&rx_id[count], ptr, sizeof(uint64_t)); ptr+=sizeof(uint64_t); diff --git a/node/InetAddress.cpp b/node/InetAddress.cpp index 36b4e434..5d6ad07f 100644 --- a/node/InetAddress.cpp +++ b/node/InetAddress.cpp @@ -5,7 +5,7 @@ * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or - * (at your oion) any later version. + * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -62,23 +62,23 @@ InetAddress::IpScope InetAddress::ipScope() const case 0x37: return IP_SCOPE_PSEUDOPRIVATE; // 55.0.0.0/8 (US DoD) case 0x38: return IP_SCOPE_PSEUDOPRIVATE; // 56.0.0.0/8 (US Postal Service) case 0x64: - if ((ip & 0xffc00000) == 0x64400000) return IP_SCOPE_PRIVATE; // 100.64.0.0/10 + if ((ip & 0xffc00000) == 0x64400000) return IP_SCOPE_PRIVATE; // 100.64.0.0/10 break; case 0x7f: return IP_SCOPE_LOOPBACK; // 127.0.0.0/8 case 0xa9: - if ((ip & 0xffff0000) == 0xa9fe0000) return IP_SCOPE_LINK_LOCAL; // 169.254.0.0/16 + if ((ip & 0xffff0000) == 0xa9fe0000) return IP_SCOPE_LINK_LOCAL; // 169.254.0.0/16 break; case 0xac: - if ((ip & 0xfff00000) == 0xac100000) return IP_SCOPE_PRIVATE; // 172.16.0.0/12 + if ((ip & 0xfff00000) == 0xac100000) return IP_SCOPE_PRIVATE; // 172.16.0.0/12 break; case 0xc0: - if ((ip & 0xffff0000) == 0xc0a80000) return IP_SCOPE_PRIVATE; // 192.168.0.0/16 + if ((ip & 0xffff0000) == 0xc0a80000) return IP_SCOPE_PRIVATE; // 192.168.0.0/16 break; case 0xff: return IP_SCOPE_NONE; // 255.0.0.0/8 (broadcast, or unused/unusable) } switch(ip >> 28) { - case 0xe: return IP_SCOPE_MULTICAST; // 224.0.0.0/4 - case 0xf: return IP_SCOPE_PSEUDOPRIVATE; // 240.0.0.0/4 ("reserved," usually unusable) + case 0xe: return IP_SCOPE_MULTICAST; // 224.0.0.0/4 + case 0xf: return IP_SCOPE_PSEUDOPRIVATE; // 240.0.0.0/4 ("reserved," usually unusable) } return IP_SCOPE_GLOBAL; } break; @@ -86,21 +86,21 @@ InetAddress::IpScope InetAddress::ipScope() const case AF_INET6: { const unsigned char *ip = reinterpret_cast(reinterpret_cast(this)->sin6_addr.s6_addr); if ((ip[0] & 0xf0) == 0xf0) { - if (ip[0] == 0xff) return IP_SCOPE_MULTICAST; // ff00::/8 + if (ip[0] == 0xff) return IP_SCOPE_MULTICAST; // ff00::/8 if ((ip[0] == 0xfe)&&((ip[1] & 0xc0) == 0x80)) { unsigned int k = 2; while ((!ip[k])&&(k < 15)) ++k; if ((k == 15)&&(ip[15] == 0x01)) - return IP_SCOPE_LOOPBACK; // fe80::1/128 - else return IP_SCOPE_LINK_LOCAL; // fe80::/10 + return IP_SCOPE_LOOPBACK; // fe80::1/128 + else return IP_SCOPE_LINK_LOCAL; // fe80::/10 } - if ((ip[0] & 0xfe) == 0xfc) return IP_SCOPE_PRIVATE; // fc00::/7 + if ((ip[0] & 0xfe) == 0xfc) return IP_SCOPE_PRIVATE; // fc00::/7 } unsigned int k = 0; while ((!ip[k])&&(k < 15)) ++k; if (k == 15) { // all 0's except last byte - if (ip[15] == 0x01) return IP_SCOPE_LOOPBACK; // ::1/128 - if (ip[15] == 0x00) return IP_SCOPE_NONE; // ::/128 + if (ip[15] == 0x01) return IP_SCOPE_LOOPBACK; // ::1/128 + if (ip[15] == 0x00) return IP_SCOPE_NONE; // ::/128 } return IP_SCOPE_GLOBAL; } break; diff --git a/node/MulticastGroup.hpp b/node/MulticastGroup.hpp index 0f4a621e..0a318136 100644 --- a/node/MulticastGroup.hpp +++ b/node/MulticastGroup.hpp @@ -68,7 +68,7 @@ public: * Derive the multicast group used for address resolution (ARP/NDP) for an IP * * @param ip IP address (port field is ignored) - * @return Multicat group for ARP/NDP + * @return Multicast group for ARP/NDP */ static inline MulticastGroup deriveMulticastGroupForAddressResolution(const InetAddress &ip) { diff --git a/node/Network.hpp b/node/Network.hpp index 95b5483a..38f03eb3 100644 --- a/node/Network.hpp +++ b/node/Network.hpp @@ -438,6 +438,6 @@ private: AtomicCounter __refCount; }; -} // naemspace ZeroTier +} // namespace ZeroTier #endif diff --git a/node/NetworkConfig.hpp b/node/NetworkConfig.hpp index 44066c86..8900bda5 100644 --- a/node/NetworkConfig.hpp +++ b/node/NetworkConfig.hpp @@ -562,7 +562,7 @@ public: char name[ZT_MAX_NETWORK_SHORT_NAME_LENGTH + 1]; /** - * Certficiate of membership (for private networks) + * Certificate of membership (for private networks) */ CertificateOfMembership com; }; diff --git a/node/Packet.hpp b/node/Packet.hpp index 6869691e..8e82bd34 100644 --- a/node/Packet.hpp +++ b/node/Packet.hpp @@ -150,7 +150,7 @@ * * In cryptography, a "break" means something different from what it means in * common discussion. If a cipher is 256 bits strong and someone finds a way - * to reduce key search to 254 bits, this constitues a "break" in the academic + * to reduce key search to 254 bits, this constitutes a "break" in the academic * literature. 254 bits is still far beyond what can be leveraged to accomplish * a "break" as most people would understand it -- the actual decryption and * reading of traffic. @@ -249,7 +249,7 @@ */ #define ZT_PROTO_MIN_FRAGMENT_LENGTH ZT_PACKET_FRAGMENT_IDX_PAYLOAD -// Field incides for parsing verbs ------------------------------------------- +// Field indices for parsing verbs ------------------------------------------- // Some verbs have variable-length fields. Those aren't fully defined here // yet-- instead they are parsed using relative indexes in IncomingPacket. @@ -734,7 +734,7 @@ public: * Credentials can be for any number of networks. * * The use of a zero byte to terminate the COM section is for legacy - * backward compatiblity. Newer fields are prefixed with a length. + * backward compatibility. Newer fields are prefixed with a length. * * OK/ERROR are not generated. */ @@ -751,7 +751,7 @@ public: * This message requests network configuration from a node capable of * providing it. * - * Respones to this are always whole configs intended for the recipient. + * Responses to this are always whole configs intended for the recipient. * For patches and other updates a NETWORK_CONFIG is sent instead. * * It would be valid and correct as of 1.2.0 to use NETWORK_CONFIG always, @@ -884,7 +884,7 @@ public: * <[6] MAC address of multicast group> * <[4] 32-bit ADI for multicast group> * <[1] flags> - * [<[...] network certficate of membership (DEPRECATED)>] + * [<[...] network certificate of membership (DEPRECATED)>] * [<[...] implicit gather results if flag 0x01 is set>] * * OK flags (same bits as request flags): @@ -933,7 +933,7 @@ public: // 0x11 -- deprecated /** - * An acknowledgement of receipt of a series of recent packets from another + * An acknowledgment of receipt of a series of recent packets from another * peer. This is used to calculate relative throughput values and to detect * packet loss. Only VERB_FRAME and VERB_EXT_FRAME packets are counted. * @@ -967,7 +967,7 @@ public: * The number of possible records per QoS packet is: (1400 * 8) / 72 = 155 * This packet should be sent very rarely (every few seconds) as it can be * somewhat large if the connection is saturated. Future versions might use - * a bloom table to probablistically determine these values in a vastly + * a bloom table to probabilistically determine these values in a vastly * more space-efficient manner. * * Note: The 'internal packet sojourn time' is a slight misnomer as it is a @@ -1000,7 +1000,7 @@ public: * * This message contains a remote trace event. Remote trace events can * be sent to observers configured at the network level for those that - * pertain directly to actiity on a network, or to global observers if + * pertain directly to activity on a network, or to global observers if * locally configured. * * The instance ID is a random 64-bit value generated by each ZeroTier @@ -1297,7 +1297,7 @@ public: * Encrypt/decrypt a separately armored portion of a packet * * This is currently only used to mask portions of HELLO as an extra - * security precation since most of that message is sent in the clear. + * security precaution since most of that message is sent in the clear. * * This must NEVER be used more than once in the same packet, as doing * so will result in re-use of the same key stream. diff --git a/node/Path.hpp b/node/Path.hpp index e7448190..80a7895a 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -353,7 +353,7 @@ public: * Record that we've received a VERB_ACK on this path, also compute throughput if required. * * @param now Current time - * @param ackedBytes Number of bytes awknowledged by other peer + * @param ackedBytes Number of bytes acknowledged by other peer */ inline void receivedAck(int64_t now, int32_t ackedBytes) { @@ -387,7 +387,7 @@ public: } /** - * @return Number of bytes thusfar sent that have not been awknowledged by the remote peer + * @return Number of bytes thus far sent that have not been acknowledged by the remote peer */ inline int64_t unackedSentBytes() { @@ -529,7 +529,7 @@ public: inline char *getName() { return _ifname; } /** - * @return Packet delay varience + * @return Packet delay variance */ inline float packetDelayVariance() { return _packetDelayVariance; } diff --git a/node/Peer.cpp b/node/Peer.cpp index a22bbffe..877ec2fd 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -140,7 +140,7 @@ void Peer::received( if ((!havePath)&&(RR->node->shouldUsePathForZeroTierTraffic(tPtr,_id.address(),path->localSocket(),path->address()))) { Mutex::Lock _l(_paths_m); - // Paths are redunant if they duplicate an alive path to the same IP or + // Paths are redundant if they duplicate an alive path to the same IP or // with the same local socket and address family. bool redundant = false; for(unsigned int i=0;i SelfAwareness::getSymmetricNatPredictions() * * Since flows are encrypted and authenticated they could not actually * read or modify traffic, but they could gather meta-data for forensics - * purpsoes or use this as a DOS attack vector. */ + * purposes or use this as a DOS attack vector. */ std::map< uint32_t,unsigned int > maxPortByIp; InetAddress theOneTrueSurface; diff --git a/node/Tag.hpp b/node/Tag.hpp index d2e932c2..1d3e97fa 100644 --- a/node/Tag.hpp +++ b/node/Tag.hpp @@ -58,7 +58,7 @@ class RuntimeEnvironment; * values. * * Unlike capabilities tags are signed only by the issuer and are never - * transferrable. + * transferable. */ class Tag : public Credential { diff --git a/node/Topology.cpp b/node/Topology.cpp index 7c526b41..7e32f205 100644 --- a/node/Topology.cpp +++ b/node/Topology.cpp @@ -138,7 +138,7 @@ SharedPtr Topology::getPeer(void *tPtr,const Address &zta) } return SharedPtr(); } - } catch ( ... ) {} // ignore invalid identities or other strage failures + } catch ( ... ) {} // ignore invalid identities or other strange failures return SharedPtr(); } diff --git a/osdep/OSUtils.cpp b/osdep/OSUtils.cpp index cadd4e6b..8b7fd948 100644 --- a/osdep/OSUtils.cpp +++ b/osdep/OSUtils.cpp @@ -366,7 +366,7 @@ std::vector OSUtils::split(const char *s,const char *const sep,cons if (buf.size() > 0) { fields.push_back(buf); buf.clear(); - } // else skip runs of seperators + } // else skip runs of separators } else buf.push_back(*s); } ++s; diff --git a/osdep/WindowsEthernetTap.hpp b/osdep/WindowsEthernetTap.hpp index 1e36bdd8..4f0f89c4 100644 --- a/osdep/WindowsEthernetTap.hpp +++ b/osdep/WindowsEthernetTap.hpp @@ -71,7 +71,7 @@ public: static std::string destroyAllPersistentTapDevices(); /** - * Uninstall a specific persistent tap device by instance ID + * Uninstalls a specific persistent tap device by instance ID * * @param instanceId Device instance ID * @return Empty string on success, otherwise an error message diff --git a/service/OneService.cpp b/service/OneService.cpp index 152dca7b..e56a4827 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -422,7 +422,7 @@ public: unsigned int _primaryPort; volatile unsigned int _udpPortPickerCounter; - // Local configuration and memo-ized information from it + // Local configuration and memoized information from it json _localConfig; Hashtable< uint64_t,std::vector > _v4Hints; Hashtable< uint64_t,std::vector > _v6Hints; @@ -438,7 +438,7 @@ public: * To attempt to handle NAT/gateway craziness we use three local UDP ports: * * [0] is the normal/default port, usually 9993 - * [1] is a port dervied from our ZeroTier address + * [1] is a port derived from our ZeroTier address * [2] is a port computed from the normal/default for use with uPnP/NAT-PMP mappings * * [2] exists because on some gateways trying to do regular NAT-t interferes @@ -1130,16 +1130,7 @@ public: #ifdef __SYNOLOGY__ // Authenticate via Synology's built-in cgi script if (!isAuth) { - /* - fprintf(stderr, "path = %s\n", path.c_str()); - fprintf(stderr, "headers.size=%d\n", headers.size()); - std::map::const_iterator it(headers.begin()); - while(it != headers.end()) { - fprintf(stderr,"header[%s] = %s\n", (it->first).c_str(), (it->second).c_str()); - it++; - } - */ - // parse out url args + // Parse out url args int synotoken_pos = path.find("SynoToken"); int argpos = path.find("?"); if(synotoken_pos != std::string::npos && argpos != std::string::npos) { @@ -1152,10 +1143,7 @@ public: setenv("HTTP_COOKIE", cookie_val.c_str(), true); setenv("HTTP_X_SYNO_TOKEN", synotoken_val.c_str(), true); setenv("REMOTE_ADDR", ah2->second.c_str(),true); - //fprintf(stderr, "HTTP_COOKIE: %s\n",std::getenv ("HTTP_COOKIE")); - //fprintf(stderr, "HTTP_X_SYNO_TOKEN: %s\n",std::getenv ("HTTP_X_SYNO_TOKEN")); - //fprintf(stderr, "REMOTE_ADDR: %s\n",std::getenv ("REMOTE_ADDR")); - // check synology web auth + // Check Synology web auth char user[256], buf[1024]; FILE *fp = NULL; bzero(user, 256); -- cgit v1.2.3 From 6fddf31db31a6f3d0f9c6dd7e611543f56d6fc2f Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 12 Jun 2018 15:24:12 -0700 Subject: Improved rate limit logic for QoS/ACK packets. Also reduced how often processBackgroundPathMeasurements() is called --- node/Constants.hpp | 7 +------ node/Path.hpp | 8 +++----- node/Peer.cpp | 3 ++- node/Peer.hpp | 36 ++++++++++++++++++++---------------- 4 files changed, 26 insertions(+), 28 deletions(-) (limited to 'node') diff --git a/node/Constants.hpp b/node/Constants.hpp index d86775c5..d4f6faaf 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -289,7 +289,7 @@ * CUTOFF_LIMIT times per CUTOFF_TIME milliseconds per peer to prevent * this from being useful for DOS amplification attacks. */ -#define ZT_PATH_QOS_ACK_CUTOFF_LIMIT 16 +#define ZT_PATH_QOS_ACK_CUTOFF_LIMIT 128 /** * Path choice history window size. This is used to keep track of which paths were @@ -372,11 +372,6 @@ */ #define ZT_PATH_MAX_OUTSTANDING_QOS_RECORDS 128 -/** - * How often we check the age of QoS records - */ -#define ZT_PATH_QOS_RECORD_PURGE_INTERVAL 1000 - /** * Timeout for QoS records */ diff --git a/node/Path.hpp b/node/Path.hpp index 80a7895a..71615d50 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -566,9 +566,9 @@ public: * @param now Current time */ inline void processBackgroundPathMeasurements(int64_t now, const int64_t peerId) { - Mutex::Lock _l(_statistics_m); // Compute path stability if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) { + Mutex::Lock _l(_statistics_m); _lastPathQualityComputeTime = now; address().toString(_addrString); _meanThroughput = _throughputSamples->mean(); @@ -593,10 +593,8 @@ public: _lastComputedStability = pdv_contrib + latency_contrib + throughput_disturbance_contrib; _lastComputedStability *= 1 - _packetErrorRatio; _qualitySamples->push(_lastComputedStability); - } - // Prevent QoS records from sticking around for too long - if (now - _lastQoSRecordPurge > ZT_PATH_QOS_RECORD_PURGE_INTERVAL) - { + + // Prevent QoS records from sticking around for too long std::map::iterator it = _outQoSRecords.begin(); while (it != _outQoSRecords.end()) { // Time since egress of tracked packet diff --git a/node/Peer.cpp b/node/Peer.cpp index 877ec2fd..8b385ecc 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -24,7 +24,6 @@ * of your own application. */ - #include "../version.h" #include "Constants.hpp" #include "Peer.hpp" @@ -55,6 +54,8 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _lastCredentialsReceived(0), _lastTrustEstablishedPacketReceived(0), _lastSentFullHello(0), + _lastACKWindowReset(0), + _lastQoSWindowReset(0), _vProto(0), _vMajor(0), _vMinor(0), diff --git a/node/Peer.hpp b/node/Peer.hpp index f7c0dbbd..8807662d 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -524,27 +524,31 @@ public: } /** - * Rate limit gate for VERB_QOS_MEASUREMENT + * Rate limit gate for VERB_ACK */ - inline bool rateGateQoS(const int64_t now) + inline bool rateGateACK(const int64_t now) { - if ((now - _lastQoSReceive) <= ZT_PATH_QOS_ACK_CUTOFF_TIME) - ++_QoSCutoffCount; - else _QoSCutoffCount = 0; - _lastQoSReceive = now; - return (_QoSCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT); + if ((now - _lastACKWindowReset) >= ZT_PATH_QOS_ACK_CUTOFF_TIME) { + _lastACKWindowReset = now; + _ACKCutoffCount = 0; + } else { + ++_ACKCutoffCount; + } + return (_ACKCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT); } /** - * Rate limit gate for VERB_ACK + * Rate limit gate for VERB_QOS_MEASUREMENT */ - inline bool rateGateACK(const int64_t now) + inline bool rateGateQoS(const int64_t now) { - if ((now - _lastACKReceive) <= ZT_PATH_QOS_ACK_CUTOFF_TIME) - ++_ACKCutoffCount; - else _ACKCutoffCount = 0; - _lastACKReceive = now; - return (_ACKCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT); + if ((now - _lastQoSWindowReset) >= ZT_PATH_QOS_ACK_CUTOFF_TIME) { + _lastQoSWindowReset = now; + _QoSCutoffCount = 0; + } else { + ++_QoSCutoffCount; + } + return (_QoSCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT); } /** @@ -644,10 +648,10 @@ private: int64_t _lastComRequestSent; int64_t _lastCredentialsReceived; int64_t _lastTrustEstablishedPacketReceived; - int64_t _lastQoSReceive; - int64_t _lastACKReceive; int64_t _lastSentFullHello; int64_t _lastPathPrune; + int64_t _lastACKWindowReset; + int64_t _lastQoSWindowReset; uint16_t _vProto; uint16_t _vMajor; -- cgit v1.2.3 From 0faa655b834be45d379c84769604b5c43f734650 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 12 Jun 2018 16:30:46 -0700 Subject: Reduced how often relative path qualities and aggregate link allocations are computed --- node/Constants.hpp | 3 +-- node/Path.hpp | 19 +++++++++++++++++-- node/Peer.cpp | 33 +++++++++++++++------------------ node/Peer.hpp | 7 +++++-- 4 files changed, 38 insertions(+), 24 deletions(-) (limited to 'node') diff --git a/node/Constants.hpp b/node/Constants.hpp index d4f6faaf..0d3692f1 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -305,8 +305,7 @@ #define ZT_PATH_LATENCY_SAMPLE_INTERVAL ZT_MULTIPATH_PEER_PING_PERIOD * 2 /** - * Interval used for rate-limiting the computation of path quality estimates. Set at 0 - * to compute as new packets arrive with no delay. + * Interval used for rate-limiting the computation of path quality estimates. */ #define ZT_PATH_QUALITY_COMPUTE_INTERVAL 1000 diff --git a/node/Path.hpp b/node/Path.hpp index 71615d50..e0426323 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -119,7 +119,8 @@ public: _packetErrorRatio(0.0), _packetLossRatio(0), _lastComputedStability(0.0), - _lastComputedRelativeQuality(0) + _lastComputedRelativeQuality(0), + _lastAllocation(0.0) { prepareBuffers(); } @@ -149,7 +150,8 @@ public: _packetErrorRatio(0.0), _packetLossRatio(0), _lastComputedStability(0.0), - _lastComputedRelativeQuality(0) + _lastComputedRelativeQuality(0), + _lastAllocation(0.0) { prepareBuffers(); _phy->getIfName((PhySocket *)((uintptr_t)_localSocket), _ifname, 16); @@ -518,6 +520,18 @@ public: */ inline float relativeQuality() { return _lastComputedRelativeQuality; } + /** + * Assign a new allocation value for this path in the aggregate link + * + * @param allocation Percentage of traffic to be sent over this path to a peer + */ + inline void updateComponentAllocationOfAggregateLink(float allocation) { _lastAllocation = allocation; } + + /** + * @return Percentage of traffic allocated to this path in the aggregate link + */ + inline float allocation() { return _lastAllocation; } + /** * @return Stability estimates can become expensive to compute, we cache the most recent result. */ @@ -682,6 +696,7 @@ private: // cached estimates float _lastComputedStability; float _lastComputedRelativeQuality; + float _lastAllocation; // cached human-readable strings for tracing purposes char _ifname[16]; diff --git a/node/Peer.cpp b/node/Peer.cpp index 8b385ecc..55132bba 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -66,7 +66,8 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _linkIsBalanced(false), _linkIsRedundant(false), _remotePeerMultipathEnabled(false), - _lastAggregateStatsReport(0) + _lastAggregateStatsReport(0), + _lastAggregateAllocation(0) { if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) throw ZT_EXCEPTION_INVALID_ARGUMENT; @@ -289,7 +290,7 @@ void Peer::recordIncomingPacket(void *tPtr, const SharedPtr &path, const u } } -float Peer::computeAggregateLinkRelativeQuality(int64_t now) +void Peer::computeAggregateProportionalAllocation(int64_t now) { float maxStability = 0; float totalRelativeQuality = 0; @@ -326,7 +327,12 @@ float Peer::computeAggregateLinkRelativeQuality(int64_t now) _paths[i].p->updateRelativeQuality(relQuality); } } - return (float)1.0 / totalRelativeQuality; // Used later to convert relative quantities into flow allocations + // Convert set of relative performances into an allocation set + for(uint16_t i=0;iupdateComponentAllocationOfAggregateLink(_paths[i].p->relativeQuality() / totalRelativeQuality); + } + } } float Peer::computeAggregateLinkPacketDelayVariance() @@ -453,8 +459,6 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) * Proportionally allocate traffic according to dynamic path quality measurements */ if (RR->node->getMultipathMode() == ZT_MULTIPATH_PROPORTIONALLY_BALANCED) { - float alloc[ZT_MAX_PEER_NETWORK_PATHS]; - memset(&alloc, 0, sizeof(alloc)); int numAlivePaths = 0; int numStalePaths = 0; int alivePaths[ZT_MAX_PEER_NETWORK_PATHS]; @@ -476,34 +480,27 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) bestPath = i; } } - - // Compare paths to each-other - float qualityScalingFactor = computeAggregateLinkRelativeQuality(now); - + if ((now - _lastAggregateAllocation) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) { + _lastAggregateAllocation = now; + computeAggregateProportionalAllocation(now); + } if (numAlivePaths == 0 && numStalePaths == 0) { return SharedPtr(); } if (numAlivePaths == 1 || numStalePaths == 1) { return _paths[bestPath].p; } - - // Convert set of relative performances into an allocation set - for(uint16_t i=0;irelativeQuality() * qualityScalingFactor; - } - } // Randomly choose path according to their allocations unsigned int r; Utils::getSecureRandom(&r, 1); float rf = (float)(r %= 100) / 100; for(int i=0;iallocation()) { bestPath = i; _pathChoiceHist->push(bestPath); // Record which path we chose break; } - rf -= alloc[i]; + rf -= _paths[i].p->allocation(); } } if (bestPath < ZT_MAX_PEER_NETWORK_PATHS) { diff --git a/node/Peer.hpp b/node/Peer.hpp index 8807662d..21e8cbf2 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -194,9 +194,11 @@ public: void sendQOS_MEASUREMENT(void *tPtr, const SharedPtr &path, const int64_t localSocket,const InetAddress &atAddress,int64_t now); /** - * @return The relative quality values for each path + * Compute relative quality values and allocations for the components of the aggregate link + * + * @param now Current time */ - float computeAggregateLinkRelativeQuality(int64_t now); + void computeAggregateProportionalAllocation(int64_t now); /** * @return The aggregate link Packet Delay Variance (PDV) @@ -677,6 +679,7 @@ private: bool _remotePeerMultipathEnabled; uint64_t _lastAggregateStatsReport; + uint64_t _lastAggregateAllocation; char _interfaceListStr[256]; // 16 characters * 16 paths in a link }; -- cgit v1.2.3 From f8005b88adfe93af477528bec748882da8fa9bea Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Wed, 13 Jun 2018 14:50:37 -0700 Subject: Minor cleanup. More efficient push() operation for RingBuffer --- node/Path.hpp | 7 ------- node/Peer.hpp | 4 ++-- node/RingBuffer.hpp | 9 +++++++-- 3 files changed, 9 insertions(+), 11 deletions(-) (limited to 'node') diff --git a/node/Path.hpp b/node/Path.hpp index e0426323..30655877 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -161,11 +161,9 @@ public: { delete _throughputSamples; delete _latencySamples; - delete _qualitySamples; delete _packetValiditySamples; _throughputSamples = NULL; _latencySamples = NULL; - _qualitySamples = NULL; _packetValiditySamples = NULL; } @@ -580,7 +578,6 @@ public: * @param now Current time */ inline void processBackgroundPathMeasurements(int64_t now, const int64_t peerId) { - // Compute path stability if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) { Mutex::Lock _l(_statistics_m); _lastPathQualityComputeTime = now; @@ -606,8 +603,6 @@ public: // Compute the quality product _lastComputedStability = pdv_contrib + latency_contrib + throughput_disturbance_contrib; _lastComputedStability *= 1 - _packetErrorRatio; - _qualitySamples->push(_lastComputedStability); - // Prevent QoS records from sticking around for too long std::map::iterator it = _outQoSRecords.begin(); while (it != _outQoSRecords.end()) { @@ -650,7 +645,6 @@ public: inline void prepareBuffers() { _throughputSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); _latencySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _qualitySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); _packetValiditySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); memset(_ifname, 0, 16); memset(_addrString, 0, sizeof(_addrString)); @@ -704,7 +698,6 @@ private: RingBuffer *_throughputSamples; RingBuffer *_latencySamples; - RingBuffer *_qualitySamples; RingBuffer *_packetValiditySamples; }; diff --git a/node/Peer.hpp b/node/Peer.hpp index 21e8cbf2..9361f665 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -678,8 +678,8 @@ private: bool _linkIsRedundant; bool _remotePeerMultipathEnabled; - uint64_t _lastAggregateStatsReport; - uint64_t _lastAggregateAllocation; + int64_t _lastAggregateStatsReport; + int64_t _lastAggregateAllocation; char _interfaceListStr[256]; // 16 characters * 16 paths in a link }; diff --git a/node/RingBuffer.hpp b/node/RingBuffer.hpp index 32ae037c..e8d0d238 100644 --- a/node/RingBuffer.hpp +++ b/node/RingBuffer.hpp @@ -37,7 +37,7 @@ namespace ZeroTier { /** - * A revolving (ring) buffer. + * A circular buffer * * For fast handling of continuously-evolving variables (such as path quality metrics). * Using this, we can maintain longer sliding historical windows for important path @@ -169,7 +169,12 @@ public: if (count() == size) { consume(1); } - write(&value, 1); + const size_t first_chunk = std::min((size_t)1, size - end); + *(buf + end) = value; + end = (end + first_chunk) % size; + if (begin == end) { + wrap = true; + } } /** -- cgit v1.2.3 From 17fbb020e733fab8d8be933bb4981927015a10f5 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Thu, 14 Jun 2018 16:34:45 -0700 Subject: Added multipath field to zerotier-cli status output. Adjusted how path estimates are computed and cached --- include/ZeroTierOne.h | 57 +++++++++++++++++++++++++++++++----- node/Node.cpp | 11 +++++++ node/Path.hpp | 79 ++++++++++++++++++++++++++++++-------------------- node/Peer.cpp | 4 +-- node/Peer.hpp | 14 +++++---- service/OneService.cpp | 51 +++++++++++++++++++++++++++++++- 6 files changed, 169 insertions(+), 47 deletions(-) (limited to 'node') diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index b3927c2e..a100afd9 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -449,13 +449,6 @@ enum ZT_MultipathMode * Will cease sending traffic over links that appear to be stale. */ ZT_MULTIPATH_PROPORTIONALLY_BALANCED = 2, - - /** - * Traffic is allocated across a user-defined interface/allocation - * - * Will cease sending traffic over links that appear to be stale. - */ - ZT_MULTIPATH_MANUALLY_BALANCED = 3 }; /** @@ -1221,6 +1214,56 @@ typedef struct */ uint64_t trustedPathId; + /** + * One-way latency + */ + float latency; + + /** + * How much latency varies over time + */ + float packetDelayVariance; + + /** + * How much observed throughput varies over time + */ + float throughputDisturbCoeff; + + /** + * Packet Error Ratio (PER) + */ + float packetErrorRatio; + + /** + * Packet Loss Ratio (PLR) + */ + float packetLossRatio; + + /** + * Stability of the path + */ + float stability; + + /** + * Current throughput (moving average) + */ + uint64_t throughput; + + /** + * Maximum observed throughput for this path + */ + uint64_t maxThroughput; + + /** + * Percentage of traffic allocated to this path + */ + float allocation; + + /** + * Name of physical interface (for monitoring) + */ + char *ifname; + /** * Is path expired? */ diff --git a/node/Node.cpp b/node/Node.cpp index 71e5b6a7..24deeae2 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -474,6 +474,17 @@ ZT_PeerList *Node::peers() const p->paths[p->pathCount].trustedPathId = RR->topology->getOutboundPathTrust((*path)->address()); p->paths[p->pathCount].expired = 0; p->paths[p->pathCount].preferred = ((*path) == bestp) ? 1 : 0; + p->paths[p->pathCount].latency = (*path)->latency(); + p->paths[p->pathCount].packetDelayVariance = (*path)->packetDelayVariance(); + p->paths[p->pathCount].throughputDisturbCoeff = (*path)->throughputDisturbanceCoefficient(); + p->paths[p->pathCount].packetErrorRatio = (*path)->packetErrorRatio(); + p->paths[p->pathCount].packetLossRatio = (*path)->packetLossRatio(); + p->paths[p->pathCount].stability = (*path)->lastComputedStability(); + p->paths[p->pathCount].throughput = (*path)->meanThroughput(); + p->paths[p->pathCount].maxThroughput = (*path)->maxLifetimeThroughput(); + p->paths[p->pathCount].allocation = (*path)->allocation(); + p->paths[p->pathCount].ifname = (*path)->getName(); + ++p->pathCount; } } diff --git a/node/Path.hpp b/node/Path.hpp index 30655877..6162be20 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -111,15 +111,16 @@ public: _expectingAckAsOf(0), _packetsReceivedSinceLastAck(0), _packetsReceivedSinceLastQoS(0), - _meanThroughput(0.0), _maxLifetimeThroughput(0), + _lastComputedMeanThroughput(0), _bytesAckedSinceLastThroughputEstimation(0), - _meanLatency(0.0), - _packetDelayVariance(0.0), - _packetErrorRatio(0.0), - _packetLossRatio(0), + _lastComputedMeanLatency(0.0), + _lastComputedPacketDelayVariance(0.0), + _lastComputedPacketErrorRatio(0.0), + _lastComputedPacketLossRatio(0), _lastComputedStability(0.0), _lastComputedRelativeQuality(0), + _lastComputedThroughputDistCoeff(0.0), _lastAllocation(0.0) { prepareBuffers(); @@ -142,15 +143,16 @@ public: _expectingAckAsOf(0), _packetsReceivedSinceLastAck(0), _packetsReceivedSinceLastQoS(0), - _meanThroughput(0.0), _maxLifetimeThroughput(0), + _lastComputedMeanThroughput(0), _bytesAckedSinceLastThroughputEstimation(0), - _meanLatency(0.0), - _packetDelayVariance(0.0), - _packetErrorRatio(0.0), - _packetLossRatio(0), + _lastComputedMeanLatency(0.0), + _lastComputedPacketDelayVariance(0.0), + _lastComputedPacketErrorRatio(0.0), + _lastComputedPacketLossRatio(0), _lastComputedStability(0.0), _lastComputedRelativeQuality(0), + _lastComputedThroughputDistCoeff(0.0), _lastAllocation(0.0) { prepareBuffers(); @@ -162,9 +164,11 @@ public: delete _throughputSamples; delete _latencySamples; delete _packetValiditySamples; + delete _throughputDisturbanceSamples; _throughputSamples = NULL; _latencySamples = NULL; _packetValiditySamples = NULL; + _throughputDisturbanceSamples = NULL; } /** @@ -311,7 +315,7 @@ public: inline void recordOutgoingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb) { Mutex::Lock _l(_statistics_m); - if (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME) { + if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) { if (packetId % 2 == 0) { // even -> use for ACK _unackedBytes += payloadLength; // Take note that we're expecting a VERB_ACK on this path as of a specific time @@ -336,7 +340,7 @@ public: inline void recordIncomingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb) { Mutex::Lock _l(_statistics_m); - if (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME) { + if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) { if (packetId % 2 == 0) { // even -> use for ACK _inACKRecords[packetId] = payloadLength; _packetsReceivedSinceLastAck++; @@ -497,14 +501,14 @@ public: inline int64_t ackAge(int64_t now) { return _expectingAckAsOf ? now - _expectingAckAsOf : 0; } /** - * The maximum observed throughput for this path + * The maximum observed throughput (in bits/s) for this path */ inline uint64_t maxLifetimeThroughput() { return _maxLifetimeThroughput; } /** * @return The mean throughput (in bits/s) of this link */ - inline float meanThroughput() { return _meanThroughput; } + inline uint64_t meanThroughput() { return _lastComputedMeanThroughput; } /** * Assign a new relative quality value for this path in the aggregate link @@ -543,22 +547,22 @@ public: /** * @return Packet delay variance */ - inline float packetDelayVariance() { return _packetDelayVariance; } + inline float packetDelayVariance() { return _lastComputedPacketDelayVariance; } /** * @return Previously-computed mean latency */ - inline float meanLatency() { return _meanLatency; } + inline float meanLatency() { return _lastComputedMeanLatency; } /** * @return Packet loss rate (PLR) */ - inline float packetLossRatio() { return _packetLossRatio; } + inline float packetLossRatio() { return _lastComputedPacketLossRatio; } /** * @return Packet error ratio (PER) */ - inline float packetErrorRatio() { return _packetErrorRatio; } + inline float packetErrorRatio() { return _lastComputedPacketErrorRatio; } /** * Record an invalid incoming packet. This packet failed MAC/compression/cipher checks and will now @@ -571,38 +575,46 @@ public: */ inline char *getAddressString() { return _addrString; } + /** + * @return The current throughput disturbance coefficient + */ + inline float throughputDisturbanceCoefficient() { return _lastComputedThroughputDistCoeff; } + /** * Compute and cache stability and performance metrics. The resultant stability coefficient is a measure of how "well behaved" * this path is. This figure is substantially different from (but required for the estimation of the path's overall "quality". * * @param now Current time */ - inline void processBackgroundPathMeasurements(int64_t now, const int64_t peerId) { + inline void processBackgroundPathMeasurements(int64_t now) { if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) { Mutex::Lock _l(_statistics_m); _lastPathQualityComputeTime = now; address().toString(_addrString); - _meanThroughput = _throughputSamples->mean(); - _meanLatency = _latencySamples->mean(); - _packetDelayVariance = _latencySamples->stddev(); // Similar to "jitter" (SEE: RFC 3393, RFC 4689) + _lastComputedMeanLatency = _latencySamples->mean(); + _lastComputedPacketDelayVariance = _latencySamples->stddev(); // Similar to "jitter" (SEE: RFC 3393, RFC 4689) + _lastComputedMeanThroughput = (uint64_t)_throughputSamples->mean(); // If no packet validity samples, assume PER==0 - _packetErrorRatio = 1 - (_packetValiditySamples->count() ? _packetValiditySamples->mean() : 1); + _lastComputedPacketErrorRatio = 1 - (_packetValiditySamples->count() ? _packetValiditySamples->mean() : 1); // Compute path stability // Normalize measurements with wildly different ranges into a reasonable range - float normalized_pdv = Utils::normalize(_packetDelayVariance, 0, ZT_PATH_MAX_PDV, 0, 10); - float normalized_la = Utils::normalize(_meanLatency, 0, ZT_PATH_MAX_MEAN_LATENCY, 0, 10); + float normalized_pdv = Utils::normalize(_lastComputedPacketDelayVariance, 0, ZT_PATH_MAX_PDV, 0, 10); + float normalized_la = Utils::normalize(_lastComputedMeanLatency, 0, ZT_PATH_MAX_MEAN_LATENCY, 0, 10); float throughput_cv = _throughputSamples->mean() > 0 ? _throughputSamples->stddev() / _throughputSamples->mean() : 1; // Form an exponential cutoff and apply contribution weights float pdv_contrib = exp((-1)*normalized_pdv) * ZT_PATH_CONTRIB_PDV; float latency_contrib = exp((-1)*normalized_la) * ZT_PATH_CONTRIB_LATENCY; + // Throughput Disturbance Coefficient float throughput_disturbance_contrib = exp((-1)*throughput_cv) * ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE; + _throughputDisturbanceSamples->push(throughput_cv); + _lastComputedThroughputDistCoeff = _throughputDisturbanceSamples->mean(); // Obey user-defined ignored contributions pdv_contrib = ZT_PATH_CONTRIB_PDV > 0.0 ? pdv_contrib : 1; latency_contrib = ZT_PATH_CONTRIB_LATENCY > 0.0 ? latency_contrib : 1; throughput_disturbance_contrib = ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE > 0.0 ? throughput_disturbance_contrib : 1; - // Compute the quality product + // Stability _lastComputedStability = pdv_contrib + latency_contrib + throughput_disturbance_contrib; - _lastComputedStability *= 1 - _packetErrorRatio; + _lastComputedStability *= 1 - _lastComputedPacketErrorRatio; // Prevent QoS records from sticking around for too long std::map::iterator it = _outQoSRecords.begin(); while (it != _outQoSRecords.end()) { @@ -646,6 +658,7 @@ public: _throughputSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); _latencySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); _packetValiditySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _throughputDisturbanceSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); memset(_ifname, 0, 16); memset(_addrString, 0, sizeof(_addrString)); } @@ -677,19 +690,20 @@ private: int16_t _packetsReceivedSinceLastAck; int16_t _packetsReceivedSinceLastQoS; - float _meanThroughput; uint64_t _maxLifetimeThroughput; + uint64_t _lastComputedMeanThroughput; uint64_t _bytesAckedSinceLastThroughputEstimation; - volatile float _meanLatency; - float _packetDelayVariance; + float _lastComputedMeanLatency; + float _lastComputedPacketDelayVariance; - float _packetErrorRatio; - float _packetLossRatio; + float _lastComputedPacketErrorRatio; + float _lastComputedPacketLossRatio; // cached estimates float _lastComputedStability; float _lastComputedRelativeQuality; + float _lastComputedThroughputDistCoeff; float _lastAllocation; // cached human-readable strings for tracing purposes @@ -699,6 +713,7 @@ private: RingBuffer *_throughputSamples; RingBuffer *_latencySamples; RingBuffer *_packetValiditySamples; + RingBuffer *_throughputDisturbanceSamples; }; } // namespace ZeroTier diff --git a/node/Peer.cpp b/node/Peer.cpp index 55132bba..0f471b07 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -116,7 +116,7 @@ void Peer::received( } for(unsigned int i=0;iprocessBackgroundPathMeasurements(now, _id.address().toInt()); + _paths[i].p->processBackgroundPathMeasurements(now); } } } @@ -415,7 +415,7 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) for(unsigned int i=0;iprocessBackgroundPathMeasurements(now, _id.address().toInt()); + _paths[i].p->processBackgroundPathMeasurements(now); } } diff --git a/node/Peer.hpp b/node/Peer.hpp index 9361f665..ddbe6f77 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -353,14 +353,18 @@ public: inline int64_t isActive(int64_t now) const { return ((now - _lastNontrivialReceive) < ZT_PEER_ACTIVITY_TIMEOUT); } /** - * @return Latency in milliseconds of best path or 0xffff if unknown / no paths + * @return Latency in milliseconds of best/aggregate path or 0xffff if unknown / no paths */ inline unsigned int latency(const int64_t now) { - SharedPtr bp(getAppropriatePath(now,false)); - if (bp) - return bp->latency(); - return 0xffff; + if (RR->node->getMultipathMode()) { + return (int)computeAggregateLinkMeanLatency(); + } else { + SharedPtr bp(getAppropriatePath(now,false)); + if (bp) + return bp->latency(); + return 0xffff; + } } /** diff --git a/service/OneService.cpp b/service/OneService.cpp index e56a4827..a1a9d981 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -298,6 +298,39 @@ static void _peerToJson(nlohmann::json &pj,const ZT_Peer *peer) pj["paths"] = pa; } +static void _peerAggregateLinkToJson(nlohmann::json &pj,const ZT_Peer *peer) +{ + char tmp[256]; + OSUtils::ztsnprintf(tmp,sizeof(tmp),"%.10llx",peer->address); + pj["aggregateLinkLatency"] = peer->latency; + + nlohmann::json pa = nlohmann::json::array(); + for(unsigned int i=0;ipathCount;++i) { + //int64_t lastSend = peer->paths[i].lastSend; + //int64_t lastReceive = peer->paths[i].lastReceive; + nlohmann::json j; + j["address"] = reinterpret_cast(&(peer->paths[i].address))->toString(tmp); + //j["lastSend"] = (lastSend < 0) ? 0 : lastSend; + //j["lastReceive"] = (lastReceive < 0) ? 0 : lastReceive; + //j["trustedPathId"] = peer->paths[i].trustedPathId; + //j["active"] = (bool)(peer->paths[i].expired == 0); + //j["expired"] = (bool)(peer->paths[i].expired != 0); + //j["preferred"] = (bool)(peer->paths[i].preferred != 0); + j["latency"] = peer->paths[i].latency; + //j["packetDelayVariance"] = peer->paths[i].packetDelayVariance; + //j["throughputDisturbCoeff"] = peer->paths[i].throughputDisturbCoeff; + //j["packetErrorRatio"] = peer->paths[i].packetErrorRatio; + //j["packetLossRatio"] = peer->paths[i].packetLossRatio; + j["stability"] = peer->paths[i].stability; + j["throughput"] = peer->paths[i].throughput; + //j["maxThroughput"] = peer->paths[i].maxThroughput; + j["allocation"] = peer->paths[i].allocation; + j["ifname"] = peer->paths[i].ifname; + pa.push_back(j); + } + pj["paths"] = pa; +} + static void _moonToJson(nlohmann::json &mj,const World &world) { char tmp[4096]; @@ -1189,7 +1222,23 @@ public: json &settings = res["config"]["settings"]; settings["primaryPort"] = OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff; settings["allowTcpFallbackRelay"] = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],_allowTcpFallbackRelay); - settings["multipathMode"] = OSUtils::jsonInt(settings["multipathMode"],_multipathMode); + + if (_multipathMode) { + json &multipathConfig = res["multipath"]; + ZT_PeerList *pl = _node->peers(); + char peerAddrStr[256]; + if (pl) { + for(unsigned long i=0;ipeerCount;++i) { + if (pl->peers[i].role == ZT_PEER_ROLE_LEAF) { + nlohmann::json pj; + _peerAggregateLinkToJson(pj,&(pl->peers[i])); + OSUtils::ztsnprintf(peerAddrStr,sizeof(peerAddrStr),"%.10llx",pl->peers[i].address); + multipathConfig[peerAddrStr] = (pj); + } + } + } + } + #ifdef ZT_USE_MINIUPNPC settings["portMappingEnabled"] = OSUtils::jsonBool(settings["portMappingEnabled"],true); #else -- cgit v1.2.3 From 52264d5e284c49817fa0a4dd4f1f0c9feeced41d Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Thu, 14 Jun 2018 16:48:35 -0700 Subject: Fixed aggregateLinkLatency(). Also changed unit for throughput from byte to bit --- node/Path.hpp | 2 +- node/Peer.cpp | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'node') diff --git a/node/Path.hpp b/node/Path.hpp index 6162be20..fb202306 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -365,7 +365,7 @@ public: _unackedBytes = (ackedBytes > _unackedBytes) ? 0 : _unackedBytes - ackedBytes; int64_t timeSinceThroughputEstimate = (now - _lastThroughputEstimation); if (timeSinceThroughputEstimate >= ZT_PATH_THROUGHPUT_MEASUREMENT_INTERVAL) { - uint64_t throughput = (float)(_bytesAckedSinceLastThroughputEstimation) / ((float)timeSinceThroughputEstimate / (float)1000); + uint64_t throughput = (float)(_bytesAckedSinceLastThroughputEstimation * 8) / ((float)timeSinceThroughputEstimate / (float)1000); _throughputSamples->push(throughput); _maxLifetimeThroughput = throughput > _maxLifetimeThroughput ? throughput : _maxLifetimeThroughput; _lastThroughputEstimation = now; diff --git a/node/Peer.cpp b/node/Peer.cpp index 0f471b07..1d581ab8 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -349,12 +349,14 @@ float Peer::computeAggregateLinkPacketDelayVariance() float Peer::computeAggregateLinkMeanLatency() { float ml = 0.0; + int pathCount = 0; for(unsigned int i=0;irelativeQuality() * _paths[i].p->meanLatency(); } } - return ml; + return ml / pathCount; } int Peer::aggregateLinkPhysicalPathCount() -- cgit v1.2.3 From bdcdccfcc3157e62a4bc8078e583876cbef41223 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Fri, 22 Jun 2018 16:30:20 -0700 Subject: Improved path selection, more efficient traffic allocation, lower QoS/ACK overhead --- include/ZeroTierOne.h | 5 ++ node/Constants.hpp | 15 ++++- node/Node.cpp | 6 +- node/Path.hpp | 20 +++---- node/Peer.cpp | 150 ++++++++++++++++++++++++++----------------------- node/Peer.hpp | 36 ++++++++++-- service/OneService.cpp | 2 +- 7 files changed, 143 insertions(+), 91 deletions(-) (limited to 'node') diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index a100afd9..5b228e17 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -1315,6 +1315,11 @@ typedef struct */ unsigned int pathCount; + /** + * Whether this peer was ever reachable via an aggregate link + */ + bool hadAggregateLink; + /** * Known network paths to peer */ diff --git a/node/Constants.hpp b/node/Constants.hpp index 0d3692f1..420343ad 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -274,6 +274,19 @@ */ #define ZT_MULTIPATH_BINDER_REFRESH_PERIOD 5000 +/** + * Packets are only used for QoS/ACK statistical sampling if their packet ID is divisible by + * this integer. This is to provide a mechanism for both peers to agree on which packets need + * special treatment without having to exchange information. Changing this value would be + * a breaking change and would necessitate a protocol version upgrade. Since each incoming and + * outgoing packet ID is checked against this value its evaluation is of the form: + * (id & (divisor - 1)) == 0, thus the divisor must be a power of 2. + * + * This value is set at (16) so that given a normally-distributed RNG output we will sample + * 1/16th (or ~6.25%) of packets. + */ +#define ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR 0x10 + /** * Time horizon for VERB_QOS_MEASUREMENT and VERB_ACK packet processing cutoff */ @@ -384,7 +397,7 @@ /** * Minimum amount of time between each ACK packet */ -#define ZT_PATH_ACK_INTERVAL 250 +#define ZT_PATH_ACK_INTERVAL 1000 /** * How often an aggregate link statistics report is emitted into this tracing system diff --git a/node/Node.cpp b/node/Node.cpp index 24deeae2..9b10dfdd 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -450,6 +450,7 @@ ZT_PeerList *Node::peers() const for(std::vector< std::pair< Address,SharedPtr > >::iterator pi(peers.begin());pi!=peers.end();++pi) { ZT_Peer *p = &(pl->peers[pl->peerCount++]); p->address = pi->second->address().toInt(); + p->hadAggregateLink = 0; if (pi->second->remoteVersionKnown()) { p->versionMajor = pi->second->remoteVersionMajor(); p->versionMinor = pi->second->remoteVersionMinor(); @@ -466,6 +467,7 @@ ZT_PeerList *Node::peers() const std::vector< SharedPtr > paths(pi->second->paths(_now)); SharedPtr bestp(pi->second->getAppropriatePath(_now,false)); + p->hadAggregateLink |= pi->second->hasAggregateLink(); p->pathCount = 0; for(std::vector< SharedPtr >::iterator path(paths.begin());path!=paths.end();++path) { ZT_FAST_MEMCPY(&(p->paths[p->pathCount].address),&((*path)->address()),sizeof(struct sockaddr_storage)); @@ -475,14 +477,14 @@ ZT_PeerList *Node::peers() const p->paths[p->pathCount].expired = 0; p->paths[p->pathCount].preferred = ((*path) == bestp) ? 1 : 0; p->paths[p->pathCount].latency = (*path)->latency(); - p->paths[p->pathCount].packetDelayVariance = (*path)->packetDelayVariance(); + p->paths[p->pathCount].packetDelayVariance = (*path)->packetDelayVariance(); p->paths[p->pathCount].throughputDisturbCoeff = (*path)->throughputDisturbanceCoefficient(); p->paths[p->pathCount].packetErrorRatio = (*path)->packetErrorRatio(); p->paths[p->pathCount].packetLossRatio = (*path)->packetLossRatio(); p->paths[p->pathCount].stability = (*path)->lastComputedStability(); p->paths[p->pathCount].throughput = (*path)->meanThroughput(); p->paths[p->pathCount].maxThroughput = (*path)->maxLifetimeThroughput(); - p->paths[p->pathCount].allocation = (*path)->allocation(); + p->paths[p->pathCount].allocation = (float)(*path)->allocation() / (float)255; p->paths[p->pathCount].ifname = (*path)->getName(); ++p->pathCount; diff --git a/node/Path.hpp b/node/Path.hpp index fb202306..cafff8cf 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -121,7 +121,7 @@ public: _lastComputedStability(0.0), _lastComputedRelativeQuality(0), _lastComputedThroughputDistCoeff(0.0), - _lastAllocation(0.0) + _lastAllocation(0) { prepareBuffers(); } @@ -153,7 +153,7 @@ public: _lastComputedStability(0.0), _lastComputedRelativeQuality(0), _lastComputedThroughputDistCoeff(0.0), - _lastAllocation(0.0) + _lastAllocation(0) { prepareBuffers(); _phy->getIfName((PhySocket *)((uintptr_t)_localSocket), _ifname, 16); @@ -316,12 +316,10 @@ public: { Mutex::Lock _l(_statistics_m); if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) { - if (packetId % 2 == 0) { // even -> use for ACK + if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) { _unackedBytes += payloadLength; // Take note that we're expecting a VERB_ACK on this path as of a specific time _expectingAckAsOf = ackAge(now) > ZT_PATH_ACK_INTERVAL ? _expectingAckAsOf : now; - } - else { // odd -> use for QoS if (_outQoSRecords.size() < ZT_PATH_MAX_OUTSTANDING_QOS_RECORDS) { _outQoSRecords[packetId] = now; } @@ -341,11 +339,9 @@ public: { Mutex::Lock _l(_statistics_m); if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) { - if (packetId % 2 == 0) { // even -> use for ACK + if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) { _inACKRecords[packetId] = payloadLength; _packetsReceivedSinceLastAck++; - } - else { // odd -> use for QoS _inQoSRecords[packetId] = now; _packetsReceivedSinceLastQoS++; } @@ -527,12 +523,12 @@ public: * * @param allocation Percentage of traffic to be sent over this path to a peer */ - inline void updateComponentAllocationOfAggregateLink(float allocation) { _lastAllocation = allocation; } + inline void updateComponentAllocationOfAggregateLink(unsigned char allocation) { _lastAllocation = allocation; } /** * @return Percentage of traffic allocated to this path in the aggregate link */ - inline float allocation() { return _lastAllocation; } + inline unsigned char allocation() { return _lastAllocation; } /** * @return Stability estimates can become expensive to compute, we cache the most recent result. @@ -704,7 +700,9 @@ private: float _lastComputedStability; float _lastComputedRelativeQuality; float _lastComputedThroughputDistCoeff; - float _lastAllocation; + unsigned char _lastAllocation; + + // cached human-readable strings for tracing purposes char _ifname[16]; diff --git a/node/Peer.cpp b/node/Peer.cpp index 1d581ab8..21bbfabe 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -56,6 +56,12 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _lastSentFullHello(0), _lastACKWindowReset(0), _lastQoSWindowReset(0), + _lastMultipathCompatibilityCheck(0), + _freeRandomByte(0), + _uniqueAlivePathCount(0), + _localMultipathSupported(false), + _remoteMultipathSupported(false), + _canUseMultipath(false), _vProto(0), _vMajor(0), _vMinor(0), @@ -69,6 +75,7 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _lastAggregateStatsReport(0), _lastAggregateAllocation(0) { + Utils::getSecureRandom(&_freeRandomByte, 1); if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) throw ZT_EXCEPTION_INVALID_ARGUMENT; _pathChoiceHist = new RingBuffer(ZT_MULTIPATH_PROPORTION_WIN_SZ); @@ -110,7 +117,7 @@ void Peer::received( recordIncomingPacket(tPtr, path, packetId, payloadLength, verb, now); - if (canUseMultipath()) { + if (_canUseMultipath) { if (path->needsToSendQoS(now)) { sendQOS_MEASUREMENT(tPtr, path, path->localSocket(), path->address(), now); } @@ -145,17 +152,23 @@ void Peer::received( // Paths are redundant if they duplicate an alive path to the same IP or // with the same local socket and address family. bool redundant = false; + unsigned int replacePath = ZT_MAX_PEER_NETWORK_PATHS; for(unsigned int i=0;ialive(now)) && ( ((_paths[i].p->localSocket() == path->localSocket())&&(_paths[i].p->address().ss_family == path->address().ss_family)) || (_paths[i].p->address().ipsEqual2(path->address())) ) ) { + if ( (_paths[i].p->alive(now)) && ( ((_paths[i].p->localSocket() == path->localSocket())&&(_paths[i].p->address().ss_family == path->address().ss_family)) || (_paths[i].p->address().ipsEqual2(path->address())) ) ) { redundant = true; break; } + // If the path is the same address and port, simply assume this is a replacement + if ( (_paths[i].p->address().ipsEqual2(path->address()) && (_paths[i].p->address().port() == path->address().port()))) { + replacePath = i; + break; + } } else break; } - - if (!redundant) { - unsigned int replacePath = ZT_MAX_PEER_NETWORK_PATHS; + // If the path isn't a duplicate of the same localSocket AND we haven't already determined a replacePath, + // then find the worst path and replace it. + if (!redundant && replacePath == ZT_MAX_PEER_NETWORK_PATHS) { int replacePathQuality = 0; for(unsigned int i=0;iaddress().ss_family == path->address().ss_family && _paths[i].p->address().ipsEqual2(path->address())) { - replacePath = i; - break; - } - } - } - } - - if (replacePath != ZT_MAX_PEER_NETWORK_PATHS) { - if (verb == Packet::VERB_OK) { - RR->t->peerLearnedNewPath(tPtr,networkId,*this,path,packetId); - _paths[replacePath].lr = now; - _paths[replacePath].p = path; - _paths[replacePath].priority = 1; - } else { - attemptToContact = true; - } + } + if (replacePath != ZT_MAX_PEER_NETWORK_PATHS) { + if (verb == Packet::VERB_OK) { + RR->t->peerLearnedNewPath(tPtr,networkId,*this,path,packetId); + _paths[replacePath].lr = now; + _paths[replacePath].p = path; + _paths[replacePath].priority = 1; + } else { + attemptToContact = true; } } } @@ -274,7 +273,9 @@ void Peer::received( void Peer::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now) { - if (localMultipathSupport()) { + // Grab second byte from packetId to use as a source of entropy in the next path selection + _freeRandomByte = (packetId & 0xFF00) >> 8; + if (_canUseMultipath) { path->recordOutgoingPacket(now, packetId, payloadLength, verb); } } @@ -282,7 +283,7 @@ void Peer::recordOutgoingPacket(const SharedPtr &path, const uint64_t pack void Peer::recordIncomingPacket(void *tPtr, const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now) { - if (localMultipathSupport()) { + if (_canUseMultipath) { if (path->needsToSendAck(now)) { sendACK(tPtr, path, path->localSocket(), path->address(), now); } @@ -323,6 +324,9 @@ void Peer::computeAggregateProportionalAllocation(int64_t now) + (fmax(1, relThroughput[i]) * ZT_PATH_CONTRIB_THROUGHPUT) + relScope * ZT_PATH_CONTRIB_SCOPE; relQuality *= age_contrib; + // Arbitrary cutoffs + relQuality = relQuality > (1.00 / 100.0) ? relQuality : 0.0; + relQuality = relQuality < (99.0 / 100.0) ? relQuality : 1.0; totalRelativeQuality += relQuality; _paths[i].p->updateRelativeQuality(relQuality); } @@ -330,12 +334,12 @@ void Peer::computeAggregateProportionalAllocation(int64_t now) // Convert set of relative performances into an allocation set for(uint16_t i=0;iupdateComponentAllocationOfAggregateLink(_paths[i].p->relativeQuality() / totalRelativeQuality); + _paths[i].p->updateComponentAllocationOfAggregateLink((_paths[i].p->relativeQuality() / totalRelativeQuality) * 255); } } } -float Peer::computeAggregateLinkPacketDelayVariance() +int Peer::computeAggregateLinkPacketDelayVariance() { float pdv = 0.0; for(unsigned int i=0;i Peer::getAppropriatePath(int64_t now, bool includeExpired) * Send traffic across the highest quality path only. This algorithm will still * use the old path quality metric from protocol version 9. */ - if (!canUseMultipath()) { + if (!_canUseMultipath) { long bestPathQuality = 2147483647; for(unsigned int i=0;i Peer::getAppropriatePath(int64_t now, bool includeExpired) } } } - unsigned int r; - Utils::getSecureRandom(&r, 1); + unsigned int r = _freeRandomByte; if (numAlivePaths > 0) { - // pick a random out of the set deemed "alive" int rf = r % numAlivePaths; return _paths[alivePaths[rf]].p; } else if(numStalePaths > 0) { - // resort to trying any non-expired path + // Resort to trying any non-expired path int rf = r % numStalePaths; return _paths[stalePaths[rf]].p; } @@ -461,40 +463,12 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) * Proportionally allocate traffic according to dynamic path quality measurements */ if (RR->node->getMultipathMode() == ZT_MULTIPATH_PROPORTIONALLY_BALANCED) { - int numAlivePaths = 0; - int numStalePaths = 0; - int alivePaths[ZT_MAX_PEER_NETWORK_PATHS]; - int stalePaths[ZT_MAX_PEER_NETWORK_PATHS]; - memset(&alivePaths, -1, sizeof(alivePaths)); - memset(&stalePaths, -1, sizeof(stalePaths)); - // Attempt to find an excuse not to use the rest of this algorithm - // Alive or Stale? - for(unsigned int i=0;ialive(now)) { - alivePaths[numAlivePaths] = i; - numAlivePaths++; - } else { - stalePaths[numStalePaths] = i; - numStalePaths++; - } - // Record a default path to use as a short-circuit for the rest of the algorithm (if needed) - bestPath = i; - } - } if ((now - _lastAggregateAllocation) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) { _lastAggregateAllocation = now; computeAggregateProportionalAllocation(now); } - if (numAlivePaths == 0 && numStalePaths == 0) { - return SharedPtr(); - } if (numAlivePaths == 1 || numStalePaths == 1) { - return _paths[bestPath].p; - } // Randomly choose path according to their allocations - unsigned int r; - Utils::getSecureRandom(&r, 1); - float rf = (float)(r %= 100) / 100; + float rf = _freeRandomByte; for(int i=0;iallocation()) { @@ -676,6 +650,41 @@ void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr &o } } +inline void Peer::processBackgroundPeerTasks(int64_t now) +{ + // Determine current multipath compatibility with other peer + if ((now - _lastMultipathCompatibilityCheck) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) { + // Cache number of available paths so that we can short-circuit multipath logic elsewhere + // + // We also take notice of duplicate paths (same IP only) because we may have + // recently received a direct path push from a peer and our list might contain + // a dead path which hasn't been fully recognized as such. In this case we + // don't want the duplicate to trigger execution of multipath code prematurely. + // + // This is done to support the behavior of auto multipath enable/disable + // without user intervention. + int currAlivePathCount = 0; + int duplicatePathsFound = 0; + for (unsigned int i=0;iaddress().ipsEqual2(_paths[j].p->address()) && i != j) { + duplicatePathsFound+=1; + break; + } + } + } + } + _uniqueAlivePathCount = (currAlivePathCount - (duplicatePathsFound / 2)); + _lastMultipathCompatibilityCheck = now; + _localMultipathSupported = ((RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) && (ZT_PROTO_VERSION > 9)); + _remoteMultipathSupported = _vProto > 9; + // If both peers support multipath and more than one path exist, we can use multipath logic + _canUseMultipath = _localMultipathSupported && _remoteMultipathSupported && (_uniqueAlivePathCount > 1); + } +} + void Peer::sendACK(void *tPtr,const SharedPtr &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now) { Packet outp(_id.address(),RR->identity.address(),Packet::VERB_ACK); @@ -774,14 +783,15 @@ void Peer::tryMemorizedPath(void *tPtr,int64_t now) unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) { unsigned int sent = 0; - Mutex::Lock _l(_paths_m); const bool sendFullHello = ((now - _lastSentFullHello) >= ZT_PEER_PING_PERIOD); _lastSentFullHello = now; + processBackgroundPeerTasks(now); + // Emit traces regarding aggregate link status - if (canUseMultipath()) { + if (_canUseMultipath) { int alivePathCount = aggregateLinkPhysicalPathCount(); if ((now - _lastAggregateStatsReport) > ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL) { _lastAggregateStatsReport = now; diff --git a/node/Peer.hpp b/node/Peer.hpp index ddbe6f77..a32eaad0 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -203,12 +203,12 @@ public: /** * @return The aggregate link Packet Delay Variance (PDV) */ - float computeAggregateLinkPacketDelayVariance(); + int computeAggregateLinkPacketDelayVariance(); /** * @return The aggregate link mean latency */ - float computeAggregateLinkMeanLatency(); + int computeAggregateLinkMeanLatency(); /** * @return The number of currently alive "physical" paths in the aggregate link @@ -357,7 +357,7 @@ public: */ inline unsigned int latency(const int64_t now) { - if (RR->node->getMultipathMode()) { + if (_canUseMultipath) { return (int)computeAggregateLinkMeanLatency(); } else { SharedPtr bp(getAppropriatePath(now,false)); @@ -417,6 +417,14 @@ public: inline bool remoteVersionKnown() const { return ((_vMajor > 0)||(_vMinor > 0)||(_vRevision > 0)); } + /** + * Periodically update known multipath activation constraints. This is done so that we know when and when + * not to use multipath logic. Doing this once every few seconds is sufficient. + * + * @param now Current time + */ + inline void processBackgroundPeerTasks(int64_t now); + /** * Record that the remote peer does have multipath enabled. As is evident by the receipt of a VERB_ACK * or a VERB_QOS_MEASUREMENT packet at some point in the past. Until this flag is set, the local client @@ -427,18 +435,18 @@ public: /** * @return Whether the local client supports and is configured to use multipath */ - inline bool localMultipathSupport() { return ((RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) && (ZT_PROTO_VERSION > 9)); } + inline bool localMultipathSupport() { return _localMultipathSupported; } /** * @return Whether the remote peer supports and is configured to use multipath */ - inline bool remoteMultipathSupport() { return (_remotePeerMultipathEnabled && (_vProto > 9)); } + inline bool remoteMultipathSupport() { return _remoteMultipathSupported; } /** * @return Whether this client can use multipath to communicate with this peer. True if both peers are using * the correct protocol and if both peers have multipath enabled. False if otherwise. */ - inline bool canUseMultipath() { return (localMultipathSupport() && remoteMultipathSupport()); } + inline bool canUseMultipath() { return _canUseMultipath; } /** * @return True if peer has received a trust established packet (e.g. common network membership) in the past ZT_TRUST_EXPIRATION ms @@ -557,6 +565,13 @@ public: return (_QoSCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT); } + /** + * @return Whether this peer is reachable via an aggregate link + */ + inline bool hasAggregateLink() { + return _localMultipathSupported && _remoteMultipathSupported && _remotePeerMultipathEnabled; + } + /** * Serialize a peer for storage in local cache * @@ -658,6 +673,15 @@ private: int64_t _lastPathPrune; int64_t _lastACKWindowReset; int64_t _lastQoSWindowReset; + int64_t _lastMultipathCompatibilityCheck; + + unsigned char _freeRandomByte; + + int _uniqueAlivePathCount; + + bool _localMultipathSupported; + bool _remoteMultipathSupported; + bool _canUseMultipath; uint16_t _vProto; uint16_t _vMajor; diff --git a/service/OneService.cpp b/service/OneService.cpp index a1a9d981..9b12f17b 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -1229,7 +1229,7 @@ public: char peerAddrStr[256]; if (pl) { for(unsigned long i=0;ipeerCount;++i) { - if (pl->peers[i].role == ZT_PEER_ROLE_LEAF) { + if (pl->peers[i].hadAggregateLink) { nlohmann::json pj; _peerAggregateLinkToJson(pj,&(pl->peers[i])); OSUtils::ztsnprintf(peerAddrStr,sizeof(peerAddrStr),"%.10llx",pl->peers[i].address); -- cgit v1.2.3 From 28cb40529d04e61a188e9d4a2a316690703ea605 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 10 Jul 2018 16:50:12 -0700 Subject: Rough draft of fq-codel implementation --- node/Constants.hpp | 35 ++++++ node/Network.cpp | 16 ++- node/Network.hpp | 10 +- node/Node.cpp | 1 + node/OutboundMulticast.cpp | 3 +- node/Switch.cpp | 275 +++++++++++++++++++++++++++++++++++++++++++-- node/Switch.hpp | 94 ++++++++++++++++ 7 files changed, 419 insertions(+), 15 deletions(-) (limited to 'node') diff --git a/node/Constants.hpp b/node/Constants.hpp index 420343ad..5f21201e 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -410,6 +410,41 @@ */ #define ZT_PATH_IMBALANCE_THRESHOLD 0.20 +/** + * Max allowable time spent in any queue + */ +#define ZT_QOS_TARGET 5 // ms + +/** + * Time period where the time spent in the queue by a packet should fall below + * target at least once + */ +#define ZT_QOS_INTERVAL 100 // ms + +/** + * The number of bytes that each queue is allowed to send during each DRR cycle. + * This approximates a single-byte-based fairness queuing scheme + */ +#define ZT_QOS_QUANTUM ZT_DEFAULT_MTU + +/** + * The maximum total number of packets that can be queued among all + * active/inactive, old/new queues + */ +#define ZT_QOS_MAX_ENQUEUED_PACKETS 1024 + +/** + * Number of QoS queues (buckets) + */ +#define ZT_QOS_NUM_BUCKETS 9 + +/** + * All unspecified traffic is put in this bucket. Anything in a bucket with a smaller + * value is de-prioritized. Anything in a bucket with a higher value is prioritized over + * other traffic. + */ +#define ZT_QOS_DEFAULT_BUCKET 0 + /** * How frequently to send heartbeats over in-use paths */ diff --git a/node/Network.cpp b/node/Network.cpp index a75d9fd1..a5c2fc3e 100644 --- a/node/Network.cpp +++ b/node/Network.cpp @@ -106,7 +106,8 @@ static _doZtFilterResult _doZtFilter( const unsigned int ruleCount, Address &cc, // MUTABLE -- set to TEE destination if TEE action is taken or left alone otherwise unsigned int &ccLength, // MUTABLE -- set to length of packet payload to TEE - bool &ccWatch) // MUTABLE -- set to true for WATCH target as opposed to normal TEE + bool &ccWatch, // MUTABLE -- set to true for WATCH target as opposed to normal TEE + uint8_t &qosBucket) // MUTABLE -- set to the value of the argument provided to the matching action { // Set to true if we are a TEE/REDIRECT/WATCH target bool superAccept = false; @@ -621,7 +622,8 @@ bool Network::filterOutgoingPacket( const uint8_t *frameData, const unsigned int frameLen, const unsigned int etherType, - const unsigned int vlanId) + const unsigned int vlanId, + uint8_t &qosBucket) { const int64_t now = RR->node->now(); Address ztFinalDest(ztDest); @@ -636,7 +638,7 @@ bool Network::filterOutgoingPacket( Membership *const membership = (ztDest) ? _memberships.get(ztDest) : (Membership *)0; - switch(_doZtFilter(RR,rrl,_config,membership,false,ztSource,ztFinalDest,macSource,macDest,frameData,frameLen,etherType,vlanId,_config.rules,_config.ruleCount,cc,ccLength,ccWatch)) { + switch(_doZtFilter(RR,rrl,_config,membership,false,ztSource,ztFinalDest,macSource,macDest,frameData,frameLen,etherType,vlanId,_config.rules,_config.ruleCount,cc,ccLength,ccWatch,qosBucket)) { case DOZTFILTER_NO_MATCH: { for(unsigned int c=0;c<_config.capabilityCount;++c) { @@ -644,7 +646,7 @@ bool Network::filterOutgoingPacket( Address cc2; unsigned int ccLength2 = 0; bool ccWatch2 = false; - switch (_doZtFilter(RR,crrl,_config,membership,false,ztSource,ztFinalDest,macSource,macDest,frameData,frameLen,etherType,vlanId,_config.capabilities[c].rules(),_config.capabilities[c].ruleCount(),cc2,ccLength2,ccWatch2)) { + switch (_doZtFilter(RR,crrl,_config,membership,false,ztSource,ztFinalDest,macSource,macDest,frameData,frameLen,etherType,vlanId,_config.capabilities[c].rules(),_config.capabilities[c].ruleCount(),cc2,ccLength2,ccWatch2,qosBucket)) { case DOZTFILTER_NO_MATCH: case DOZTFILTER_DROP: // explicit DROP in a capability just terminates its evaluation and is an anti-pattern break; @@ -759,11 +761,13 @@ int Network::filterIncomingPacket( bool ccWatch = false; const Capability *c = (Capability *)0; + uint8_t qosBucket = 255; // For incoming packets this is a dummy value + Mutex::Lock _l(_lock); Membership &membership = _membership(sourcePeer->address()); - switch (_doZtFilter(RR,rrl,_config,&membership,true,sourcePeer->address(),ztFinalDest,macSource,macDest,frameData,frameLen,etherType,vlanId,_config.rules,_config.ruleCount,cc,ccLength,ccWatch)) { + switch (_doZtFilter(RR,rrl,_config,&membership,true,sourcePeer->address(),ztFinalDest,macSource,macDest,frameData,frameLen,etherType,vlanId,_config.rules,_config.ruleCount,cc,ccLength,ccWatch,qosBucket)) { case DOZTFILTER_NO_MATCH: { Membership::CapabilityIterator mci(membership,_config); @@ -772,7 +776,7 @@ int Network::filterIncomingPacket( Address cc2; unsigned int ccLength2 = 0; bool ccWatch2 = false; - switch(_doZtFilter(RR,crrl,_config,&membership,true,sourcePeer->address(),ztFinalDest,macSource,macDest,frameData,frameLen,etherType,vlanId,c->rules(),c->ruleCount(),cc2,ccLength2,ccWatch2)) { + switch(_doZtFilter(RR,crrl,_config,&membership,true,sourcePeer->address(),ztFinalDest,macSource,macDest,frameData,frameLen,etherType,vlanId,c->rules(),c->ruleCount(),cc2,ccLength2,ccWatch2,qosBucket)) { case DOZTFILTER_NO_MATCH: case DOZTFILTER_DROP: // explicit DROP in a capability just terminates its evaluation and is an anti-pattern break; diff --git a/node/Network.hpp b/node/Network.hpp index 38f03eb3..2baab511 100644 --- a/node/Network.hpp +++ b/node/Network.hpp @@ -132,7 +132,8 @@ public: const uint8_t *frameData, const unsigned int frameLen, const unsigned int etherType, - const unsigned int vlanId); + const unsigned int vlanId, + uint8_t &qosBucket); /** * Apply filters to an incoming packet @@ -297,6 +298,13 @@ public: */ void learnBridgeRoute(const MAC &mac,const Address &addr); + /** + * Whether QoS is in effect for this network + */ + bool QoSEnabled() { + return false; + } + /** * Learn a multicast group that is bridged to our tap device * diff --git a/node/Node.cpp b/node/Node.cpp index 9b10dfdd..576b2e4a 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -368,6 +368,7 @@ ZT_ResultCode Node::leave(uint64_t nwid,void **uptr,void *tptr) { Mutex::Lock _l(_networks_m); SharedPtr *nw = _networks.get(nwid); + RR->sw->removeNetworkQoSControlBlock(nwid); if (!nw) return ZT_RESULT_OK; if (uptr) diff --git a/node/OutboundMulticast.cpp b/node/OutboundMulticast.cpp index d7a7b4d8..2391771f 100644 --- a/node/OutboundMulticast.cpp +++ b/node/OutboundMulticast.cpp @@ -85,7 +85,8 @@ void OutboundMulticast::sendOnly(const RuntimeEnvironment *RR,void *tPtr,const A { const SharedPtr nw(RR->node->network(_nwid)); const Address toAddr2(toAddr); - if ((nw)&&(nw->filterOutgoingPacket(tPtr,true,RR->identity.address(),toAddr2,_macSrc,_macDest,_frameData,_frameLen,_etherType,0))) { + uint8_t QoSBucket = 255; // Dummy value + if ((nw)&&(nw->filterOutgoingPacket(tPtr,true,RR->identity.address(),toAddr2,_macSrc,_macDest,_frameData,_frameLen,_etherType,0,QoSBucket))) { _packet.newInitializationVector(); _packet.setDestination(toAddr2); RR->node->expectReplyTo(_packet.packetId()); diff --git a/node/Switch.cpp b/node/Switch.cpp index d53bf53e..fddbd581 100644 --- a/node/Switch.cpp +++ b/node/Switch.cpp @@ -266,6 +266,8 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const } } + uint8_t qosBucket = ZT_QOS_DEFAULT_BUCKET; + if (to.isMulticast()) { MulticastGroup multicastGroup(to,0); @@ -383,7 +385,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const network->learnBridgedMulticastGroup(tPtr,multicastGroup,RR->node->now()); // First pass sets noTee to false, but noTee is set to true in OutboundMulticast to prevent duplicates. - if (!network->filterOutgoingPacket(tPtr,false,RR->identity.address(),Address(),from,to,(const uint8_t *)data,len,etherType,vlanId)) { + if (!network->filterOutgoingPacket(tPtr,false,RR->identity.address(),Address(),from,to,(const uint8_t *)data,len,etherType,vlanId,qosBucket)) { RR->t->outgoingNetworkFrameDropped(tPtr,network,from,to,etherType,vlanId,len,"filter blocked"); return; } @@ -407,7 +409,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const Address toZT(to.toAddress(network->id())); // since in-network MACs are derived from addresses and network IDs, we can reverse this SharedPtr toPeer(RR->topology->getPeer(tPtr,toZT)); - if (!network->filterOutgoingPacket(tPtr,false,RR->identity.address(),toZT,from,to,(const uint8_t *)data,len,etherType,vlanId)) { + if (!network->filterOutgoingPacket(tPtr,false,RR->identity.address(),toZT,from,to,(const uint8_t *)data,len,etherType,vlanId,qosBucket)) { RR->t->outgoingNetworkFrameDropped(tPtr,network,from,to,etherType,vlanId,len,"filter blocked"); return; } @@ -422,7 +424,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const outp.append(data,len); if (!network->config().disableCompression()) outp.compress(); - send(tPtr,outp,true); + aqm_enqueue(tPtr,network,outp,true,qosBucket); } else { Packet outp(toZT,RR->identity.address(),Packet::VERB_FRAME); outp.append(network->id()); @@ -430,7 +432,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const outp.append(data,len); if (!network->config().disableCompression()) outp.compress(); - send(tPtr,outp,true); + aqm_enqueue(tPtr,network,outp,true,qosBucket); } } else { @@ -439,7 +441,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const // We filter with a NULL destination ZeroTier address first. Filtrations // for each ZT destination are also done below. This is the same rationale // and design as for multicast. - if (!network->filterOutgoingPacket(tPtr,false,RR->identity.address(),Address(),from,to,(const uint8_t *)data,len,etherType,vlanId)) { + if (!network->filterOutgoingPacket(tPtr,false,RR->identity.address(),Address(),from,to,(const uint8_t *)data,len,etherType,vlanId,qosBucket)) { RR->t->outgoingNetworkFrameDropped(tPtr,network,from,to,etherType,vlanId,len,"filter blocked"); return; } @@ -477,7 +479,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const } for(unsigned int b=0;bfilterOutgoingPacket(tPtr,true,RR->identity.address(),bridges[b],from,to,(const uint8_t *)data,len,etherType,vlanId)) { + if (network->filterOutgoingPacket(tPtr,true,RR->identity.address(),bridges[b],from,to,(const uint8_t *)data,len,etherType,vlanId,qosBucket)) { Packet outp(bridges[b],RR->identity.address(),Packet::VERB_EXT_FRAME); outp.append(network->id()); outp.append((uint8_t)0x00); @@ -487,7 +489,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const outp.append(data,len); if (!network->config().disableCompression()) outp.compress(); - send(tPtr,outp,true); + aqm_enqueue(tPtr,network,outp,true,qosBucket); } else { RR->t->outgoingNetworkFrameDropped(tPtr,network,from,to,etherType,vlanId,len,"filter blocked (bridge replication)"); } @@ -495,6 +497,263 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const } } +void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet &packet,bool encrypt,int qosBucket) +{ + if(!network->QoSEnabled()) { + send(tPtr, packet, encrypt); + return; + } + NetworkQoSControlBlock *nqcb = _netQueueControlBlock[network->id()]; + if (!nqcb) { + // DEBUG_INFO("creating network QoS control block (NQCB) for network %llx", network->id()); + nqcb = new NetworkQoSControlBlock(); + _netQueueControlBlock[network->id()] = nqcb; + // Initialize ZT_QOS_NUM_BUCKETS queues and place them in the INACTIVE list + // These queues will be shuffled between the new/old/inactive lists by the enqueue/dequeue algorithm + for (int i=0; iinactiveQueues.push_back(new ManagedQueue(i)); + } + } + + if (packet.verb() != Packet::VERB_FRAME && packet.verb() != Packet::VERB_EXT_FRAME) { + // DEBUG_INFO("skipping, no QoS for this packet, verb=%x", packet.verb()); + // just send packet normally, no QoS for ZT protocol traffic + send(tPtr, packet, encrypt); + } + + _aqm_m.lock(); + + // Enqueue packet and move queue to appropriate list + + const Address dest(packet.destination()); + TXQueueEntry *txEntry = new TXQueueEntry(dest,RR->node->now(),packet,encrypt); + + ManagedQueue *selectedQueue = nullptr; + for (int i=0; ioldQueues.size()) { // search old queues first (I think this is best since old would imply most recent usage of the queue) + if (nqcb->oldQueues[i]->id == qosBucket) { + selectedQueue = nqcb->oldQueues[i]; + } + } if (i < nqcb->newQueues.size()) { // search new queues (this would imply not often-used queues) + if (nqcb->newQueues[i]->id == qosBucket) { + selectedQueue = nqcb->newQueues[i]; + } + } if (i < nqcb->inactiveQueues.size()) { // search inactive queues + if (nqcb->inactiveQueues[i]->id == qosBucket) { + selectedQueue = nqcb->inactiveQueues[i]; + // move queue to end of NEW queue list + selectedQueue->byteCredit = ZT_QOS_QUANTUM; + // DEBUG_INFO("moving q=%p from INACTIVE to NEW list", selectedQueue); + nqcb->newQueues.push_back(selectedQueue); + nqcb->inactiveQueues.erase(nqcb->inactiveQueues.begin() + i); + } + } + } + if (!selectedQueue) { + return; + } + + selectedQueue->q.push_back(txEntry); + selectedQueue->byteLength+=txEntry->packet.payloadLength(); + nqcb->_currEnqueuedPackets++; + + // DEBUG_INFO("nq=%2lu, oq=%2lu, iq=%2lu, nqcb.size()=%3d, bucket=%2d, q=%p", nqcb->newQueues.size(), nqcb->oldQueues.size(), nqcb->inactiveQueues.size(), nqcb->_currEnqueuedPackets, qosBucket, selectedQueue); + + // Drop a packet if necessary + ManagedQueue *selectedQueueToDropFrom = nullptr; + if (nqcb->_currEnqueuedPackets > ZT_QOS_MAX_ENQUEUED_PACKETS) + { + // DEBUG_INFO("too many enqueued packets (%d), finding packet to drop", nqcb->_currEnqueuedPackets); + int maxQueueLength = 0; + for (int i=0; ioldQueues.size()) { + if (nqcb->oldQueues[i]->byteLength > maxQueueLength) { + maxQueueLength = nqcb->oldQueues[i]->byteLength; + selectedQueueToDropFrom = nqcb->oldQueues[i]; + } + } if (i < nqcb->newQueues.size()) { + if (nqcb->newQueues[i]->byteLength > maxQueueLength) { + maxQueueLength = nqcb->newQueues[i]->byteLength; + selectedQueueToDropFrom = nqcb->newQueues[i]; + } + } if (i < nqcb->inactiveQueues.size()) { + if (nqcb->inactiveQueues[i]->byteLength > maxQueueLength) { + maxQueueLength = nqcb->inactiveQueues[i]->byteLength; + selectedQueueToDropFrom = nqcb->inactiveQueues[i]; + } + } + } + if (selectedQueueToDropFrom) { + // DEBUG_INFO("dropping packet from head of largest queue (%d payload bytes)", maxQueueLength); + int sizeOfDroppedPacket = selectedQueueToDropFrom->q.front()->packet.payloadLength(); + delete selectedQueueToDropFrom->q.front(); + selectedQueueToDropFrom->q.pop_front(); + selectedQueueToDropFrom->byteLength-=sizeOfDroppedPacket; + nqcb->_currEnqueuedPackets--; + } + } + _aqm_m.unlock(); + aqm_dequeue(tPtr); +} + +uint64_t Switch::control_law(uint64_t t, int count) +{ + return t + ZT_QOS_INTERVAL / sqrt(count); +} + +Switch::dqr Switch::dodequeue(ManagedQueue *q, uint64_t now) +{ + dqr r; + r.ok_to_drop = false; + r.p = q->q.front(); + + if (r.p == NULL) { + q->first_above_time = 0; + return r; + } + uint64_t sojourn_time = now - r.p->creationTime; + if (sojourn_time < ZT_QOS_TARGET || q->byteLength <= ZT_DEFAULT_MTU) { + // went below - stay below for at least interval + q->first_above_time = 0; + } else { + if (q->first_above_time == 0) { + // just went above from below. if still above at + // first_above_time, will say it's ok to drop. + q->first_above_time = now + ZT_QOS_INTERVAL; + } else if (now >= q->first_above_time) { + r.ok_to_drop = true; + } + } + return r; +} + +Switch::TXQueueEntry * Switch::CoDelDequeue(ManagedQueue *q, bool isNew, uint64_t now) +{ + dqr r = dodequeue(q, now); + + if (q->dropping) { + if (!r.ok_to_drop) { + q->dropping = false; + } + while (now >= q->drop_next && q->dropping) { + q->q.pop_front(); // drop + r = dodequeue(q, now); + if (!r.ok_to_drop) { + // leave dropping state + q->dropping = false; + } else { + ++(q->count); + // schedule the next drop. + q->drop_next = control_law(q->drop_next, q->count); + } + } + } else if (r.ok_to_drop) { + q->q.pop_front(); // drop + r = dodequeue(q, now); + q->dropping = true; + q->count = (q->count > 2 && now - q->drop_next < 8*ZT_QOS_INTERVAL)? + q->count - 2 : 1; + q->drop_next = control_law(now, q->count); + } + return r.p; +} + +void Switch::aqm_dequeue(void *tPtr) +{ + // Cycle through network-specific QoS control blocks + for(std::map::iterator nqcb(_netQueueControlBlock.begin());nqcb!=_netQueueControlBlock.end();) { + if (!(*nqcb).second->_currEnqueuedPackets) { + return; + } + + uint64_t now = RR->node->now(); + TXQueueEntry *entryToEmit = nullptr; + std::vector *currQueues = &((*nqcb).second->newQueues); + std::vector *oldQueues = &((*nqcb).second->oldQueues); + std::vector *inactiveQueues = &((*nqcb).second->inactiveQueues); + + _aqm_m.lock(); + + // Attempt dequeue from queues in NEW list + bool examiningNewQueues = true; + while (currQueues->size()) { + ManagedQueue *queueAtFrontOfList = currQueues->front(); + if (queueAtFrontOfList->byteCredit < 0) { + queueAtFrontOfList->byteCredit += ZT_QOS_QUANTUM; + // Move to list of OLD queues + // DEBUG_INFO("moving q=%p from NEW to OLD list", queueAtFrontOfList); + oldQueues->push_back(queueAtFrontOfList); + currQueues->erase(currQueues->begin()); + } else { + entryToEmit = CoDelDequeue(queueAtFrontOfList, examiningNewQueues, now); + if (!entryToEmit) { + // Move to end of list of OLD queues + // DEBUG_INFO("moving q=%p from NEW to OLD list", queueAtFrontOfList); + oldQueues->push_back(queueAtFrontOfList); + currQueues->erase(currQueues->begin()); + } + else { + int len = entryToEmit->packet.payloadLength(); + queueAtFrontOfList->byteLength -= len; + queueAtFrontOfList->byteCredit -= len; + // Send the packet! + queueAtFrontOfList->q.pop_front(); + send(tPtr, entryToEmit->packet, entryToEmit->encrypt); + (*nqcb).second->_currEnqueuedPackets--; + } + if (queueAtFrontOfList) { + //DEBUG_INFO("dequeuing from q=%p, len=%lu in NEW list (byteCredit=%d)", queueAtFrontOfList, queueAtFrontOfList->q.size(), queueAtFrontOfList->byteCredit); + } + break; + } + } + + // Attempt dequeue from queues in OLD list + examiningNewQueues = false; + currQueues = &((*nqcb).second->oldQueues); + while (currQueues->size()) { + ManagedQueue *queueAtFrontOfList = currQueues->front(); + if (queueAtFrontOfList->byteCredit < 0) { + queueAtFrontOfList->byteCredit += ZT_QOS_QUANTUM; + oldQueues->push_back(queueAtFrontOfList); + currQueues->erase(currQueues->begin()); + } else { + entryToEmit = CoDelDequeue(queueAtFrontOfList, examiningNewQueues, now); + if (!entryToEmit) { + //DEBUG_INFO("moving q=%p from OLD to INACTIVE list", queueAtFrontOfList); + // Move to inactive list of queues + inactiveQueues->push_back(queueAtFrontOfList); + currQueues->erase(currQueues->begin()); + } + else { + int len = entryToEmit->packet.payloadLength(); + queueAtFrontOfList->byteLength -= len; + queueAtFrontOfList->byteCredit -= len; + queueAtFrontOfList->q.pop_front(); + send(tPtr, entryToEmit->packet, entryToEmit->encrypt); + (*nqcb).second->_currEnqueuedPackets--; + } + if (queueAtFrontOfList) { + //DEBUG_INFO("dequeuing from q=%p, len=%lu in OLD list (byteCredit=%d)", queueAtFrontOfList, queueAtFrontOfList->q.size(), queueAtFrontOfList->byteCredit); + } + break; + } + } + nqcb++; + _aqm_m.unlock(); + } +} + +void Switch::removeNetworkQoSControlBlock(uint64_t nwid) +{ + NetworkQoSControlBlock *nq = _netQueueControlBlock[nwid]; + if (nq) { + _netQueueControlBlock.erase(nwid); + delete nq; + nq = NULL; + } +} + void Switch::send(void *tPtr,Packet &packet,bool encrypt) { const Address dest(packet.destination()); @@ -550,6 +809,7 @@ void Switch::doAnythingWaitingForPeer(void *tPtr,const SharedPtr &peer) { Mutex::Lock _l(_txQueue_m); + for(std::list< TXQueueEntry >::iterator txi(_txQueue.begin());txi!=_txQueue.end();) { if (txi->dest == peer->address()) { if (_trySend(tPtr,txi->packet,txi->encrypt)) { @@ -574,6 +834,7 @@ unsigned long Switch::doTimerTasks(void *tPtr,int64_t now) std::vector
needWhois; { Mutex::Lock _l(_txQueue_m); + for(std::list< TXQueueEntry >::iterator txi(_txQueue.begin());txi!=_txQueue.end();) { if (_trySend(tPtr,txi->packet,txi->encrypt)) { _txQueue.erase(txi++); diff --git a/node/Switch.hpp b/node/Switch.hpp index 906f418e..5f60fc46 100644 --- a/node/Switch.hpp +++ b/node/Switch.hpp @@ -59,6 +59,14 @@ class Peer; */ class Switch { + struct ManagedQueue; + struct TXQueueEntry; + + typedef struct { + TXQueueEntry *p; + bool ok_to_drop; + } dqr; + public: Switch(const RuntimeEnvironment *renv); @@ -87,6 +95,62 @@ public: */ void onLocalEthernet(void *tPtr,const SharedPtr &network,const MAC &from,const MAC &to,unsigned int etherType,unsigned int vlanId,const void *data,unsigned int len); + /** + * Determines the next drop schedule for packets in the TX queue + * + * @param t Current time + * @param count Number of packets dropped this round + */ + uint64_t control_law(uint64_t t, int count); + + /** + * Selects a packet eligible for transmission from a TX queue. According to the control law, multiple packets + * may be intentionally dropped before a packet is returned to the AQM scheduler. + * + * @param q The TX queue that is being dequeued from + * @param now Current time + */ + dqr dodequeue(ManagedQueue *q, uint64_t now); + + /** + * Presents a packet to the AQM scheduler. + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param network Network that the packet shall be sent over + * @param packet Packet to be sent + * @param encrypt Encrypt packet payload? (always true except for HELLO) + * @param qosBucket Which bucket the rule-system determined this packet should fall into + */ + void aqm_enqueue(void *tPtr, const SharedPtr &network, Packet &packet,bool encrypt,int qosBucket); + + /** + * Performs a single AQM cycle and dequeues and transmits all eligible packets on all networks + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + */ + void aqm_dequeue(void *tPtr); + + /** + * Calls the dequeue mechanism and adjust queue state variables + * + * @param q The TX queue that is being dequeued from + * @param isNew Whether or not this queue is in the NEW list + * @param now Current time + */ + Switch::TXQueueEntry * CoDelDequeue(ManagedQueue *q, bool isNew, uint64_t now); + + /** + * Removes QoS Queues and flow state variables for a specific network. These queues are created + * automatically upon the transmission of the first packet from this peer to another peer on the + * given network. + * + * The reason for existence of queues and flow state variables specific to each network is so that + * each network's QoS rules function independently. + * + * @param nwid Network ID + */ + void removeNetworkQoSControlBlock(uint64_t nwid); + /** * Send a packet to a ZeroTier address (destination in packet) * @@ -199,6 +263,7 @@ private: }; std::list< TXQueueEntry > _txQueue; Mutex _txQueue_m; + Mutex _aqm_m; // Tracks sending of VERB_RENDEZVOUS to relaying peers struct _LastUniteKey @@ -220,6 +285,35 @@ private: }; Hashtable< _LastUniteKey,uint64_t > _lastUniteAttempt; // key is always sorted in ascending order, for set-like behavior Mutex _lastUniteAttempt_m; + + // Queue with additional flow state variables + struct ManagedQueue + { + ManagedQueue(int id) : + id(id), + byteCredit(ZT_QOS_QUANTUM), + byteLength(0), + dropping(false) + {} + int id; + int byteCredit; + int byteLength; + uint64_t first_above_time; + uint32_t count; + uint64_t drop_next; + bool dropping; + uint64_t drop_next_time; + std::list< TXQueueEntry *> q; + }; + // To implement fq_codel we need to maintain a queue of queues + struct NetworkQoSControlBlock + { + int _currEnqueuedPackets; + std::vector newQueues; + std::vector oldQueues; + std::vector inactiveQueues; + }; + std::map _netQueueControlBlock; }; } // namespace ZeroTier -- cgit v1.2.3 From 65b0030342704cdbace07693a22d3e8048f7f244 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Wed, 11 Jul 2018 16:55:13 -0700 Subject: Added basic QoS rule handling --- include/ZeroTierOne.h | 10 ++++++++++ node/Network.cpp | 8 +++++++- rule-compiler/rule-compiler.js | 5 ++++- 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'node') diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index 5b228e17..e4157b96 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -744,6 +744,11 @@ enum ZT_VirtualNetworkRuleType */ ZT_NETWORK_RULE_ACTION_BREAK = 5, + /** + * Place a matching frame in the specified QoS bucket + */ + ZT_NETWORK_RULE_ACTION_PRIORITY = 6, + /** * Maximum ID for an ACTION, anything higher is a MATCH */ @@ -934,6 +939,11 @@ typedef struct uint32_t flags; uint16_t length; } fwd; + + /** + * Quality of Service (QoS) bucket we want a frame to be placed in + */ + uint8_t qosBucket; } v; } ZT_VirtualNetworkRule; diff --git a/node/Network.cpp b/node/Network.cpp index a5c2fc3e..69a74281 100644 --- a/node/Network.cpp +++ b/node/Network.cpp @@ -29,6 +29,8 @@ #include #include +#include "../include/ZeroTierDebug.h" + #include "Constants.hpp" #include "../version.h" #include "Network.hpp" @@ -107,7 +109,7 @@ static _doZtFilterResult _doZtFilter( Address &cc, // MUTABLE -- set to TEE destination if TEE action is taken or left alone otherwise unsigned int &ccLength, // MUTABLE -- set to length of packet payload to TEE bool &ccWatch, // MUTABLE -- set to true for WATCH target as opposed to normal TEE - uint8_t &qosBucket) // MUTABLE -- set to the value of the argument provided to the matching action + uint8_t &qosBucket) // MUTABLE -- set to the value of the argument provided to PRIORITY { // Set to true if we are a TEE/REDIRECT/WATCH target bool superAccept = false; @@ -125,6 +127,10 @@ static _doZtFilterResult _doZtFilter( if ((unsigned int)rt <= (unsigned int)ZT_NETWORK_RULE_ACTION__MAX_ID) { if (thisSetMatches) { switch(rt) { + case ZT_NETWORK_RULE_ACTION_PRIORITY: + qosBucket = (rules[rn].v.qosBucket >= 0 || rules[rn].v.qosBucket <= 8) ? rules[rn].v.qosBucket : 4; // 4 = default bucket (no priority) + return DOZTFILTER_ACCEPT; + case ZT_NETWORK_RULE_ACTION_DROP: return DOZTFILTER_DROP; diff --git a/rule-compiler/rule-compiler.js b/rule-compiler/rule-compiler.js index bd84824e..38134b74 100644 --- a/rule-compiler/rule-compiler.js +++ b/rule-compiler/rule-compiler.js @@ -65,7 +65,8 @@ const OPEN_BLOCK_KEYWORDS = { 'tee': true, 'watch': true, 'redirect': true, - 'break': true + 'break': true, + 'priority': true }; // Reserved words that can't be used as tag, capability, or rule set names @@ -81,6 +82,7 @@ const RESERVED_WORDS = { 'watch': true, 'redirect': true, 'break': true, + 'priority': true, 'ztsrc': true, 'ztdest': true, @@ -131,6 +133,7 @@ const KEYWORD_TO_API_MAP = { 'watch': 'ACTION_WATCH', 'redirect': 'ACTION_REDIRECT', 'break': 'ACTION_BREAK', + 'priority': 'ACTION_PRIORITY', 'ztsrc': 'MATCH_SOURCE_ZEROTIER_ADDRESS', 'ztdest': 'MATCH_DEST_ZEROTIER_ADDRESS', -- cgit v1.2.3 From 88abd6ffc61b76fc244b9f7e885d6d9d64cb506e Mon Sep 17 00:00:00 2001 From: Dave Cottlehuber Date: Fri, 22 Jun 2018 21:05:53 +0000 Subject: add support for debug flags in FreeBSD --- include/ZeroTierDebug.h | 5 ++++- make-bsd.mk | 2 +- node/Trace.cpp | 1 + node/Utils.hpp | 16 ++++++++++++++-- 4 files changed, 20 insertions(+), 4 deletions(-) (limited to 'node') diff --git a/include/ZeroTierDebug.h b/include/ZeroTierDebug.h index 8e5366f0..752c40da 100644 --- a/include/ZeroTierDebug.h +++ b/include/ZeroTierDebug.h @@ -76,6 +76,9 @@ #ifdef __APPLE__ #define ZT_THREAD_ID (long)0 // (long)gettid() #endif +#ifdef __FreeBSD__ + #define ZT_THREAD_ID (long)0 // (long)gettid() +#endif #ifdef _WIN32 #define ZT_THREAD_ID (long)0 // #endif @@ -96,7 +99,7 @@ #define DEBUG_INFO(fmt, ...) fprintf(stderr, ZT_GRN "INFO [%ld]: %17s:%5d:%25s: " fmt "\n" \ ZT_RESET, ZT_THREAD_ID, ZT_FILENAME, __LINE__, __FUNCTION__, __VA_ARGS__) #endif - #if defined(__linux__) or defined(__APPLE__) + #if defined(__linux__) or defined(__APPLE__) or defined(__FreeBSD__) #define DEBUG_INFO(fmt, args ...) fprintf(stderr, ZT_GRN "INFO [%ld]: %17s:%5d:%25s: " fmt "\n" \ ZT_RESET, ZT_THREAD_ID, ZT_FILENAME, __LINE__, __FUNCTION__, ##args) #endif diff --git a/make-bsd.mk b/make-bsd.mk index 713cafc0..d17d5803 100644 --- a/make-bsd.mk +++ b/make-bsd.mk @@ -159,7 +159,7 @@ clean: rm -rf *.a *.o node/*.o controller/*.o osdep/*.o service/*.o ext/http-parser/*.o build-* zerotier-one zerotier-idtool zerotier-selftest zerotier-cli $(ONE_OBJS) $(CORE_OBJS) debug: FORCE - gmake -j 4 ZT_DEBUG=1 + $(MAKE) -j ZT_DEBUG=1 install: one rm -f /usr/local/sbin/zerotier-one diff --git a/node/Trace.cpp b/node/Trace.cpp index 386edaac..451f5806 100644 --- a/node/Trace.cpp +++ b/node/Trace.cpp @@ -40,6 +40,7 @@ #include "Tag.hpp" #include "Capability.hpp" #include "Revocation.hpp" +#include "../include/ZeroTierDebug.h" namespace ZeroTier { diff --git a/node/Utils.hpp b/node/Utils.hpp index a24f2c9a..cb107391 100644 --- a/node/Utils.hpp +++ b/node/Utils.hpp @@ -38,6 +38,10 @@ #include #include +#if defined(__FreeBSD__) +#include +#endif + #include "Constants.hpp" #ifdef __LINUX__ @@ -411,8 +415,12 @@ public: static inline uint64_t hton(uint64_t n) { #if __BYTE_ORDER == __LITTLE_ENDIAN -#if defined(__GNUC__) && (!defined(__OpenBSD__)) +#if defined(__GNUC__) +#if defined(__FreeBSD__) + return bswap64(n); +#elif (!defined(__OpenBSD__)) return __builtin_bswap64(n); +#endif #else return ( ((n & 0x00000000000000FFULL) << 56) | @@ -440,8 +448,12 @@ public: static inline uint64_t ntoh(uint64_t n) { #if __BYTE_ORDER == __LITTLE_ENDIAN -#if defined(__GNUC__) && !defined(__OpenBSD__) +#if defined(__GNUC__) +#if defined(__FreeBSD__) + return bswap64(n); +#elif (!defined(__OpenBSD__)) return __builtin_bswap64(n); +#endif #else return ( ((n & 0x00000000000000FFULL) << 56) | -- cgit v1.2.3 From 7b87e9c0cb671227a10807c52779e8d81400d753 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 11 Sep 2018 16:55:46 -0700 Subject: Added ability for OneService to monitor local.conf for changes and automatically reload --- node/Constants.hpp | 5 ++ service/OneService.cpp | 204 +++++++++++++++++++++++++++---------------------- 2 files changed, 117 insertions(+), 92 deletions(-) (limited to 'node') diff --git a/node/Constants.hpp b/node/Constants.hpp index 0de14b85..73a00c3e 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -275,6 +275,11 @@ */ #define ZT_PING_CHECK_INVERVAL 5000 +/** + * How often the local.conf file is checked for changes + */ +#define ZT_LOCAL_CONF_FILE_CHECK_INTERVAL 10000 + /** * How frequently to check for changes to the system's network interfaces. When * the service decides to use this constant it's because we want to react more diff --git a/service/OneService.cpp b/service/OneService.cpp index fc5c189c..91d10d22 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -67,9 +67,12 @@ #include #include #include +#include +#define stat _stat #else #include #include +#include #include #include #include @@ -468,6 +471,8 @@ public: std::vector< std::string > _interfacePrefixBlacklist; Mutex _localConfig_m; + std::vector explicitBind; + /* * To attempt to handle NAT/gateway craziness we use three local UDP ports: * @@ -639,98 +644,8 @@ public: _node = new Node(this,(void *)0,&cb,OSUtils::now()); } - // Read local configuration - std::vector explicitBind; - { - std::map ppc; - - // LEGACY: support old "trustedpaths" flat file - FILE *trustpaths = fopen((_homePath + ZT_PATH_SEPARATOR_S "trustedpaths").c_str(),"r"); - if (trustpaths) { - fprintf(stderr,"WARNING: 'trustedpaths' flat file format is deprecated in favor of path definitions in local.conf" ZT_EOL_S); - char buf[1024]; - while (fgets(buf,sizeof(buf),trustpaths)) { - int fno = 0; - char *saveptr = (char *)0; - uint64_t trustedPathId = 0; - InetAddress trustedPathNetwork; - for(char *f=Utils::stok(buf,"=\r\n \t",&saveptr);(f);f=Utils::stok((char *)0,"=\r\n \t",&saveptr)) { - if (fno == 0) { - trustedPathId = Utils::hexStrToU64(f); - } else if (fno == 1) { - trustedPathNetwork = InetAddress(f); - } else break; - ++fno; - } - if ( (trustedPathId != 0) && ((trustedPathNetwork.ss_family == AF_INET)||(trustedPathNetwork.ss_family == AF_INET6)) && (trustedPathNetwork.netmaskBits() > 0) ) { - ppc[trustedPathNetwork].trustedPathId = trustedPathId; - ppc[trustedPathNetwork].mtu = 0; // use default - } - } - fclose(trustpaths); - } - - // Read local config file - Mutex::Lock _l2(_localConfig_m); - std::string lcbuf; - if (OSUtils::readFile((_homePath + ZT_PATH_SEPARATOR_S "local.conf").c_str(),lcbuf)) { - try { - _localConfig = OSUtils::jsonParse(lcbuf); - if (!_localConfig.is_object()) { - fprintf(stderr,"WARNING: unable to parse local.conf (root element is not a JSON object)" ZT_EOL_S); - } - } catch ( ... ) { - fprintf(stderr,"WARNING: unable to parse local.conf (invalid JSON)" ZT_EOL_S); - } - } - - // Get any trusted paths in local.conf (we'll parse the rest of physical[] elsewhere) - json &physical = _localConfig["physical"]; - if (physical.is_object()) { - for(json::iterator phy(physical.begin());phy!=physical.end();++phy) { - InetAddress net(OSUtils::jsonString(phy.key(),"").c_str()); - if (net) { - if (phy.value().is_object()) { - uint64_t tpid; - if ((tpid = OSUtils::jsonInt(phy.value()["trustedPathId"],0ULL)) != 0ULL) { - if ((net.ss_family == AF_INET)||(net.ss_family == AF_INET6)) - ppc[net].trustedPathId = tpid; - } - ppc[net].mtu = (int)OSUtils::jsonInt(phy.value()["mtu"],0ULL); // 0 means use default - } - } - } - } - - json &settings = _localConfig["settings"]; - if (settings.is_object()) { - // Allow controller DB path to be put somewhere else - const std::string cdbp(OSUtils::jsonString(settings["controllerDbPath"],"")); - if (cdbp.length() > 0) - _controllerDbPath = cdbp; - - // Bind to wildcard instead of to specific interfaces (disables full tunnel capability) - json &bind = settings["bind"]; - if (bind.is_array()) { - for(unsigned long i=0;i 0) { - InetAddress ip(ips.c_str()); - if ((ip.ss_family == AF_INET)||(ip.ss_family == AF_INET6)) - explicitBind.push_back(ip); - } - } - } - } - - // Set trusted paths if there are any - if (ppc.size() > 0) { - for(std::map::iterator i(ppc.begin());i!=ppc.end();++i) - _node->setPhysicalPathConfiguration(reinterpret_cast(&(i->first)),&(i->second)); - } - } - - // Apply other runtime configuration from local.conf + // local.conf + readLocalSettings(); applyLocalConfig(); // Make sure we can use the primary port, and hunt for one if configured to do so @@ -854,6 +769,7 @@ public: int64_t lastMultipathModeUpdate = 0; int64_t lastCleanedPeersDb = 0; int64_t lastLocalInterfaceAddressCheck = (clockShouldBe - ZT_LOCAL_INTERFACE_CHECK_INTERVAL) + 15000; // do this in 15s to give portmapper time to configure and other things time to settle + int64_t lastLocalConfFileCheck = OSUtils::now(); for(;;) { _run_m.lock(); if (!_run) { @@ -882,6 +798,19 @@ public: _updater->apply(); } + // Reload local.conf if anything changed recently + if ((now - lastLocalConfFileCheck) >= ZT_LOCAL_CONF_FILE_CHECK_INTERVAL) { + lastLocalConfFileCheck = now; + struct stat result; + if(stat((_homePath + ZT_PATH_SEPARATOR_S "local.conf").c_str(), &result)==0) { + int64_t mod_time = result.st_mtime * 1000; + if ((now - mod_time) <= ZT_LOCAL_CONF_FILE_CHECK_INTERVAL) { + readLocalSettings(); + applyLocalConfig(); + } + } + } + // Refresh bindings in case device's interfaces have changed, and also sync routes to update any shadow routes (e.g. shadow default) if (((now - lastBindRefresh) >= (_multipathMode ? ZT_BINDER_REFRESH_PERIOD / 8 : ZT_BINDER_REFRESH_PERIOD))||(restarted)) { lastBindRefresh = now; @@ -999,6 +928,97 @@ public: return _termReason; } + void readLocalSettings() + { + // Read local configuration + std::map ppc; + + // LEGACY: support old "trustedpaths" flat file + FILE *trustpaths = fopen((_homePath + ZT_PATH_SEPARATOR_S "trustedpaths").c_str(),"r"); + if (trustpaths) { + fprintf(stderr,"WARNING: 'trustedpaths' flat file format is deprecated in favor of path definitions in local.conf" ZT_EOL_S); + char buf[1024]; + while (fgets(buf,sizeof(buf),trustpaths)) { + int fno = 0; + char *saveptr = (char *)0; + uint64_t trustedPathId = 0; + InetAddress trustedPathNetwork; + for(char *f=Utils::stok(buf,"=\r\n \t",&saveptr);(f);f=Utils::stok((char *)0,"=\r\n \t",&saveptr)) { + if (fno == 0) { + trustedPathId = Utils::hexStrToU64(f); + } else if (fno == 1) { + trustedPathNetwork = InetAddress(f); + } else break; + ++fno; + } + if ( (trustedPathId != 0) && ((trustedPathNetwork.ss_family == AF_INET)||(trustedPathNetwork.ss_family == AF_INET6)) && (trustedPathNetwork.netmaskBits() > 0) ) { + ppc[trustedPathNetwork].trustedPathId = trustedPathId; + ppc[trustedPathNetwork].mtu = 0; // use default + } + } + fclose(trustpaths); + } + + // Read local config file + Mutex::Lock _l2(_localConfig_m); + std::string lcbuf; + if (OSUtils::readFile((_homePath + ZT_PATH_SEPARATOR_S "local.conf").c_str(),lcbuf)) { + try { + _localConfig = OSUtils::jsonParse(lcbuf); + if (!_localConfig.is_object()) { + fprintf(stderr,"WARNING: unable to parse local.conf (root element is not a JSON object)" ZT_EOL_S); + } + } catch ( ... ) { + fprintf(stderr,"WARNING: unable to parse local.conf (invalid JSON)" ZT_EOL_S); + } + } + + // Get any trusted paths in local.conf (we'll parse the rest of physical[] elsewhere) + json &physical = _localConfig["physical"]; + if (physical.is_object()) { + for(json::iterator phy(physical.begin());phy!=physical.end();++phy) { + InetAddress net(OSUtils::jsonString(phy.key(),"").c_str()); + if (net) { + if (phy.value().is_object()) { + uint64_t tpid; + if ((tpid = OSUtils::jsonInt(phy.value()["trustedPathId"],0ULL)) != 0ULL) { + if ((net.ss_family == AF_INET)||(net.ss_family == AF_INET6)) + ppc[net].trustedPathId = tpid; + } + ppc[net].mtu = (int)OSUtils::jsonInt(phy.value()["mtu"],0ULL); // 0 means use default + } + } + } + } + + json &settings = _localConfig["settings"]; + if (settings.is_object()) { + // Allow controller DB path to be put somewhere else + const std::string cdbp(OSUtils::jsonString(settings["controllerDbPath"],"")); + if (cdbp.length() > 0) + _controllerDbPath = cdbp; + + // Bind to wildcard instead of to specific interfaces (disables full tunnel capability) + json &bind = settings["bind"]; + if (bind.is_array()) { + for(unsigned long i=0;i 0) { + InetAddress ip(ips.c_str()); + if ((ip.ss_family == AF_INET)||(ip.ss_family == AF_INET6)) + explicitBind.push_back(ip); + } + } + } + } + + // Set trusted paths if there are any + if (ppc.size() > 0) { + for(std::map::iterator i(ppc.begin());i!=ppc.end();++i) + _node->setPhysicalPathConfiguration(reinterpret_cast(&(i->first)),&(i->second)); + } + } + virtual ReasonForTermination reasonForTermination() const { Mutex::Lock _l(_termReason_m); -- cgit v1.2.3