From 9574d635c162015a91a9aa8a3fd2c2886689d06a Mon Sep 17 00:00:00 2001 From: Grant Limberg Date: Wed, 10 Jan 2018 16:05:39 -0800 Subject: Add Vault configuration option parsing to local.conf { "settings": { ... "valut": { "vaultURL": "...", "vaultKey": "...", "vaultPath": "..." } } } --- service/OneService.cpp | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'service') diff --git a/service/OneService.cpp b/service/OneService.cpp index 4854d68d..56f2551e 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -477,6 +477,12 @@ public: PortMapper *_portMapper; #endif + // HashiCorp Vault Settings + bool _vaultEnabled; + std::string _vaultURL; + std::string _vaultKey; + std::string _vaultPath; // defaults to cubbyhole/zerotier/identity.secret for per-access key storage + // Set to false to force service to stop volatile bool _run; Mutex _run_m; @@ -509,6 +515,10 @@ public: #ifdef ZT_USE_MINIUPNPC ,_portMapper((PortMapper *)0) #endif + ,_vaultEnabled(false) + ,_vaultURL() + ,_vaultKey() + ,_vaultPath("cubbyhole/zerotier/identity.secret") ,_run(true) { _ports[0] = 0; @@ -653,6 +663,9 @@ public: for(std::map::iterator i(ppc.begin());i!=ppc.end();++i) _node->setPhysicalPathConfiguration(reinterpret_cast(&(i->first)),&(i->second)); } + + json &vaultConfig = _localConfig["vault"]; + } // Apply other runtime configuration from local.conf @@ -1510,6 +1523,24 @@ public: _allowManagementFrom.push_back(nw); } } + + json &vault = settings["valut"]; + if (vault.is_object()) { + const std::string url(OSUtils::jsonString(vault["vaultURL"], "").c_str()); + if (!url.empty()) + _vaultURL = url; + + const std::string key(OSUtils::jsonString(vault["vaultKey"], "").c_str()); + if (!key.empty()) + _vaultKey = key; + + const std::string path(OSUtils::jsonString(vault["vaultPath"], "").c_str()); + if (!path.empty()) + _vaultPath = path; + + if (!_vaultURL.empty() && !_vaultKey.empty()) + _vaultEnabled = true; + } } // Checks if a managed IP or route target is allowed -- cgit v1.2.3 From 5ff0653f9e194a499fe1ac085b1a9b1b51643017 Mon Sep 17 00:00:00 2001 From: Grant Limberg Date: Wed, 10 Jan 2018 16:50:58 -0800 Subject: Hooks into StateGet and StatePut for grabbing identity.secret from Vault --- service/OneService.cpp | 44 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) (limited to 'service') diff --git a/service/OneService.cpp b/service/OneService.cpp index 56f2551e..672e3799 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -480,7 +480,7 @@ public: // HashiCorp Vault Settings bool _vaultEnabled; std::string _vaultURL; - std::string _vaultKey; + std::string _vaultToken; std::string _vaultPath; // defaults to cubbyhole/zerotier/identity.secret for per-access key storage // Set to false to force service to stop @@ -517,7 +517,7 @@ public: #endif ,_vaultEnabled(false) ,_vaultURL() - ,_vaultKey() + ,_vaultToken() ,_vaultPath("cubbyhole/zerotier/identity.secret") ,_run(true) { @@ -1530,15 +1530,15 @@ public: if (!url.empty()) _vaultURL = url; - const std::string key(OSUtils::jsonString(vault["vaultKey"], "").c_str()); - if (!key.empty()) - _vaultKey = key; + const std::string token(OSUtils::jsonString(vault["vaultToken"], "").c_str()); + if (!token.empty()) + _vaultToken = token; const std::string path(OSUtils::jsonString(vault["vaultPath"], "").c_str()); if (!path.empty()) _vaultPath = path; - if (!_vaultURL.empty() && !_vaultKey.empty()) + if (!_vaultURL.empty() && !_vaultToken.empty()) _vaultEnabled = true; } } @@ -2109,8 +2109,18 @@ public: } } + inline void nodeVaultPutIdentitySecret(const void *data, int len) + { + return; + } + inline void nodeStatePutFunction(enum ZT_StateObjectType type,const uint64_t id[2],const void *data,int len) { + if (_vaultEnabled && type == ZT_STATE_OBJECT_IDENTITY_SECRET) { + nodeVaultPutIdentitySecret(data, len); + return; + } + char p[1024]; FILE *f; bool secure = false; @@ -2176,9 +2186,22 @@ public: OSUtils::rm(p); } } + + inline int nodeVaultGetIdentitySecret(void *data, unsigned int maxlen) + { + return 0; + } inline int nodeStateGetFunction(enum ZT_StateObjectType type,const uint64_t id[2],void *data,unsigned int maxlen) { + if (_vaultEnabled && type == ZT_STATE_OBJECT_IDENTITY_SECRET) { + int retval = nodeVaultGetIdentitySecret(data, maxlen); + if (retval >= 0) + return retval; + + // else continue file based lookup + } + char p[4096]; switch(type) { case ZT_STATE_OBJECT_IDENTITY_PUBLIC: @@ -2206,6 +2229,15 @@ public: if (f) { int n = (int)fread(data,1,maxlen,f); fclose(f); + + if (_vaultEnabled && type == ZT_STATE_OBJECT_IDENTITY_SECRET) { + // If we've gotten here while Vault is enabled, Vault does not know the key and it's been + // read from disk instead. + // + // We should put the value in Vault and remove the local file. + nodeVaultPutIdentitySecret(data, n); + unlink(p); + } if (n >= 0) return n; } -- cgit v1.2.3 From 77930607230eccdcb7011f6c70465bab3c32cc38 Mon Sep 17 00:00:00 2001 From: Grant Limberg Date: Fri, 16 Feb 2018 14:30:27 -0800 Subject: Add HashiCorp Vault storage of ZeroTier's public & secret identity Adds a "vault" section to local.conf. Example local.conf: { "config": { "vault": { "vaultURL": "https://some.vault.host:8200", "vaultToken": "my-super-secret-vault-token", "vaultPath": "secure/place/to/put/identity" } } Additionally, the following environment variables can be set. Environment variables override local.conf: VAULT_ADDR VAULT_TOKEN VAULT_PATH Identities will be placed in the keys "public" and "secret" under the user specified path. If no path is specified, they will be placed in the token specific cubbyhole. If identity.public and identity.secret exist on disk and vault is configured, they will be automatically added to Vault and removed from disk. TODO: * Decide behavior for if Vault cannot be reached. * Add libcurl as a dependency in Linux & Mac builds * Add libcurl as a requirement for linux packages --- service/OneService.cpp | 262 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 224 insertions(+), 38 deletions(-) (limited to 'service') diff --git a/service/OneService.cpp b/service/OneService.cpp index 672e3799..7013d9dc 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -81,6 +81,12 @@ #include "../ext/http-parser/http_parser.h" #endif +#if ZT_VAULT_SUPPORT +extern "C" { +#include +} +#endif + #include "../ext/json/json.hpp" using json = nlohmann::json; @@ -158,6 +164,14 @@ namespace ZeroTier { typedef BSDEthernetTap EthernetTap; } // TCP activity timeout #define ZT_TCP_ACTIVITY_TIMEOUT 60000 +#if ZT_VAULT_SUPPORT +size_t curlResponseWrite(void *ptr, size_t size, size_t nmemb, std::string *data) +{ + data->append((char*)ptr, size * nmemb); + return size * nmemb; +} +#endif + namespace ZeroTier { namespace { @@ -478,10 +492,12 @@ public: #endif // HashiCorp Vault Settings +#if ZT_VAULT_SUPPORT bool _vaultEnabled; std::string _vaultURL; std::string _vaultToken; std::string _vaultPath; // defaults to cubbyhole/zerotier/identity.secret for per-access key storage +#endif // Set to false to force service to stop volatile bool _run; @@ -518,12 +534,16 @@ public: ,_vaultEnabled(false) ,_vaultURL() ,_vaultToken() - ,_vaultPath("cubbyhole/zerotier/identity.secret") + ,_vaultPath("cubbyhole/zerotier") ,_run(true) { _ports[0] = 0; _ports[1] = 0; _ports[2] = 0; + +#if ZT_VAULT_SUPPORT + curl_global_init(CURL_GLOBAL_DEFAULT); +#endif } virtual ~OneServiceImpl() @@ -560,20 +580,6 @@ public: _authToken = _trimString(_authToken); } - { - struct ZT_Node_Callbacks cb; - cb.version = 0; - cb.stateGetFunction = SnodeStateGetFunction; - cb.statePutFunction = SnodeStatePutFunction; - cb.wirePacketSendFunction = SnodeWirePacketSendFunction; - cb.virtualNetworkFrameFunction = SnodeVirtualNetworkFrameFunction; - cb.virtualNetworkConfigFunction = SnodeVirtualNetworkConfigFunction; - cb.eventCallback = SnodeEventCallback; - cb.pathCheckFunction = SnodePathCheckFunction; - cb.pathLookupFunction = SnodePathLookupFunction; - _node = new Node(this,(void *)0,&cb,OSUtils::now()); - } - // Read local configuration std::vector explicitBind; { @@ -663,14 +669,25 @@ public: for(std::map::iterator i(ppc.begin());i!=ppc.end();++i) _node->setPhysicalPathConfiguration(reinterpret_cast(&(i->first)),&(i->second)); } - - json &vaultConfig = _localConfig["vault"]; - } // Apply other runtime configuration from local.conf applyLocalConfig(); + { + struct ZT_Node_Callbacks cb; + cb.version = 0; + cb.stateGetFunction = SnodeStateGetFunction; + cb.statePutFunction = SnodeStatePutFunction; + cb.wirePacketSendFunction = SnodeWirePacketSendFunction; + cb.virtualNetworkFrameFunction = SnodeVirtualNetworkFrameFunction; + cb.virtualNetworkConfigFunction = SnodeVirtualNetworkConfigFunction; + cb.eventCallback = SnodeEventCallback; + cb.pathCheckFunction = SnodePathCheckFunction; + cb.pathLookupFunction = SnodePathLookupFunction; + _node = new Node(this, (void *)0, &cb, OSUtils::now()); + } + // Make sure we can use the primary port, and hunt for one if configured to do so const int portTrials = (_primaryPort == 0) ? 256 : 1; // if port is 0, pick random for(int k=0;k= 0) return retval; // else continue file based lookup } +#endif char p[4096]; switch(type) { @@ -2229,15 +2413,17 @@ public: if (f) { int n = (int)fread(data,1,maxlen,f); fclose(f); - - if (_vaultEnabled && type == ZT_STATE_OBJECT_IDENTITY_SECRET) { +#if ZT_VAULT_SUPPORT + if (_vaultEnabled && (type == ZT_STATE_OBJECT_IDENTITY_SECRET || type == ZT_STATE_OBJECT_IDENTITY_PUBLIC)) { // If we've gotten here while Vault is enabled, Vault does not know the key and it's been // read from disk instead. // // We should put the value in Vault and remove the local file. - nodeVaultPutIdentitySecret(data, n); - unlink(p); + if (nodeVaultPutIdentity(type, data, n)) { + unlink(p); + } } +#endif if (n >= 0) return n; } -- cgit v1.2.3 From 2d289a330870458ed5f0be4619d48eb551cb152d Mon Sep 17 00:00:00 2001 From: Grant Limberg Date: Fri, 16 Feb 2018 15:20:07 -0800 Subject: Just a little cleanup --- service/OneService.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'service') diff --git a/service/OneService.cpp b/service/OneService.cpp index 7013d9dc..7340c954 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -531,10 +531,12 @@ public: #ifdef ZT_USE_MINIUPNPC ,_portMapper((PortMapper *)0) #endif +#ifdef ZT_VAULT_SUPPORT ,_vaultEnabled(false) ,_vaultURL() ,_vaultToken() ,_vaultPath("cubbyhole/zerotier") +#endif ,_run(true) { _ports[0] = 0; @@ -551,6 +553,10 @@ public: _binder.closeAll(_phy); _phy.close(_localControlSocket4); _phy.close(_localControlSocket6); +#if ZT_VAULT_SUPPORT + curl_global_cleanup(); +#endif + #ifdef ZT_USE_MINIUPNPC delete _portMapper; #endif @@ -1541,6 +1547,7 @@ public: } } +#if ZT_VAULT_SUPPORT json &vault = settings["vault"]; if (vault.is_object()) { const std::string url(OSUtils::jsonString(vault["vaultURL"], "").c_str()); @@ -1579,6 +1586,7 @@ public: if (!_vaultURL.empty() && !_vaultToken.empty()) { _vaultEnabled = true; } +#endif } // Checks if a managed IP or route target is allowed -- cgit v1.2.3 From 836d897aecc193ec3477e67858237a3f97819024 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 1 May 2018 13:39:06 -0700 Subject: Re-ordered local.conf and node init logic, also split software update config into separate method to fix bad node pointer reference --- service/OneService.cpp | 56 ++++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 24 deletions(-) (limited to 'service') diff --git a/service/OneService.cpp b/service/OneService.cpp index e7229717..80b42818 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -591,10 +591,9 @@ public: } // Read local configuration + std::map ppc; std::vector explicitBind; { - std::map ppc; - // LEGACY: support old "trustedpaths" flat file FILE *trustpaths = fopen((_homePath + ZT_PATH_SEPARATOR_S "trustedpaths").c_str(),"r"); if (trustpaths) { @@ -673,12 +672,6 @@ public: } } } - - // Set trusted paths if there are any - if (ppc.size() > 0) { - for(std::map::iterator i(ppc.begin());i!=ppc.end();++i) - _node->setPhysicalPathConfiguration(reinterpret_cast(&(i->first)),&(i->second)); - } } // Apply other runtime configuration from local.conf @@ -698,6 +691,16 @@ public: _node = new Node(this, (void *)0, &cb, OSUtils::now()); } + // Apply software update specific configuration from local.conf + applySoftwareUpdateLocalConfig(); + + // Set trusted paths if there are any + if (ppc.size() > 0) { + for(std::map::iterator i(ppc.begin());i!=ppc.end();++i) + _node->setPhysicalPathConfiguration(reinterpret_cast(&(i->first)),&(i->second)); + } + ppc.clear(); + // Make sure we can use the primary port, and hunt for one if configured to do so const int portTrials = (_primaryPort == 0) ? 256 : 1; // if port is 0, pick random for(int k=0;ksetUpdateDistribution(udist); - _updater->setChannel(OSUtils::jsonString(settings["softwareUpdateChannel"],ZT_SOFTWARE_UPDATE_DEFAULT_CHANNEL)); - } else { - delete _updater; - _updater = (SoftwareUpdater *)0; - _updateAutoApply = false; - } -#endif - json &ignoreIfs = settings["interfacePrefixBlacklist"]; if (ignoreIfs.is_array()) { for(unsigned long i=0;isetUpdateDistribution(udist); + _updater->setChannel(OSUtils::jsonString(settings["softwareUpdateChannel"],ZT_SOFTWARE_UPDATE_DEFAULT_CHANNEL)); + } else { + delete _updater; + _updater = (SoftwareUpdater *)0; + _updateAutoApply = false; + } +#endif + } + // Checks if a managed IP or route target is allowed bool checkIfManagedIsAllowed(const NetworkState &n,const InetAddress &target) { -- cgit v1.2.3 From 6a2ba4baca326272c45930208b70cfedf8cb1638 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 1 May 2018 16:32:15 -0700 Subject: Introduced basic multipath support --- include/ZeroTierOne.h | 36 ++++++ node/Constants.hpp | 92 ++++++++++++++ node/IncomingPacket.cpp | 5 +- node/Multicaster.cpp | 2 +- node/Node.cpp | 4 +- node/Node.hpp | 5 + node/Path.hpp | 265 +++++++++++++++++++++++++++++++++++++++- node/Peer.cpp | 289 +++++++++++++++++++++++++++++++++++++++++--- node/Peer.hpp | 38 ++++-- node/RingBuffer.hpp | 315 ++++++++++++++++++++++++++++++++++++++++++++++++ node/Switch.cpp | 6 +- node/Topology.hpp | 2 +- node/Trace.cpp | 20 +++ node/Trace.hpp | 6 + osdep/Binder.hpp | 15 +++ osdep/Phy.hpp | 195 +++++++++++++++++++++++++++++- service/OneService.cpp | 56 ++++++++- 17 files changed, 1310 insertions(+), 41 deletions(-) create mode 100644 node/RingBuffer.hpp (limited to 'service') diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index 6da53a73..15e15bd1 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -422,6 +422,42 @@ enum ZT_ResultCode */ #define ZT_ResultCode_isFatal(x) ((((int)(x)) >= 100)&&(((int)(x)) < 1000)) +/** + * The multipath algorithm in use by this node. + */ +enum ZT_MultipathMode +{ + /** + * No active multipath. + * + * Traffic is merely sent over the strongest path. That being + * said, this mode will automatically failover in the event that a link goes down. + */ + ZT_MULTIPATH_NONE = 0, + + /** + * Traffic is randomly distributed among all active paths. + * + * Will cease sending traffic over links that appear to be stale. + */ + ZT_MULTIPATH_RANDOM = 1, + + /** + * Traffic is allocated across all active paths in proportion to their strength and + * reliability. + * + * Will cease sending traffic over links that appear to be stale. + */ + ZT_MULTIPATH_PROPORTIONALLY_BALANCED = 2, + + /** + * Traffic is allocated across a user-defined interface/allocation + * + * Will cease sending traffic over links that appear to be stale. + */ + ZT_MULTIPATH_MANUALLY_BALANCED = 3 +}; + /** * Status codes sent to status update callback when things happen */ diff --git a/node/Constants.hpp b/node/Constants.hpp index e2a35dce..80083abe 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -267,6 +267,98 @@ */ #define ZT_PING_CHECK_INVERVAL 5000 +/** + * Length of interface name + */ +#define ZT_PATH_INTERFACE_NAME_SZ 16 + +/** + * How frequently to check for changes to the system's network interfaces. When + * the service decides to use this constant it's because we want to react more + * quickly to new interfaces that pop up or go down. + */ +#define ZT_MULTIPATH_BINDER_REFRESH_PERIOD 5000 + +/** + * Path choice history window size. This is used to keep track of which paths were + * previously selected so that we can maintain a target allocation over time. + */ +#define ZT_MULTIPATH_PROPORTION_WIN_SZ 128 + +/** + * Threshold for flow to be considered balanced. + */ +#define ZT_MULTIPATH_FLOW_BALANCE_THESHOLD 0.80 + +/** + * Number of samples to consider when computing path statistics + */ +#define ZT_PATH_QUALITY_METRIC_WIN_SZ 128 + +/** + * How often important path metrics are sampled (in ms). These metrics are later used + * for path quality estimates + */ +#define ZT_PATH_QUALITY_SAMPLE_INTERVAL 100 + +/** + * How often new path quality estimates are computed + */ +#define ZT_PATH_QUALITY_ESTIMATE_INTERVAL 100 + +/** + * How often we will sample packet latency. Should be at least greater than ZT_PING_CHECK_INVERVAL + * since we will record a 0 bit/s measurement if no valid latency measurement was made within this + * window of time. + */ +#define ZT_PATH_LATENCY_SAMPLE_INTERVAL ZT_PING_CHECK_INVERVAL * 2 + +/** + * Interval used for rate-limiting the computation of path quality estimates. Set at 0 + * to compute as new packets arrive with no delay. + */ +#define ZT_PATH_QUALITY_COMPUTE_INTERVAL 0 + +/** + * Path error rate history window size. This is used to keep track of packet error + * measurements over a path's medium-term history. + */ +#define ZT_PATH_ERROR_HIST_WIN_SZ 10 + +/** + * The number of packet error measurements in each sample + */ +#define ZT_PATH_ERROR_SAMPLE_WIN_SZ 1024 + +/** + * How often a peer will prune its own paths. Pruning is important when multipath is + * enabled because we want to prevent the allocation algorithms from sending anything + * out on known dead paths. Additionally, quickly marking paths as dead helps when + * a new path is learned and needs to replace an older path. + */ +#define ZT_CLOSED_PATH_PRUNING_INTERVAL 1000 + +/** + * Datagram used to test link throughput. Contents are random. + */ +#define ZT_LINK_TEST_DATAGRAM_SZ 1024 + +/** + * Size of datagram expected as a reply to a link speed test + */ +#define ZT_LINK_TEST_DATAGRAM_RESPONSE_SZ 8 + +/** + * Time before a link test datagram is considered lost. Any corresponding + * timing records that would have been used to compute a RTT are purged. + */ +#define ZT_LINK_TEST_TIMEOUT 10000 + +/** + * How often the service tests the link throughput. + */ +#define ZT_LINK_SPEED_TEST_INTERVAL 1000 + /** * How frequently to send heartbeats over in-use paths */ diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index ff4fc94b..8f6dda63 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -80,6 +80,7 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) if (!trusted) { if (!dearmor(peer->key())) { RR->t->incomingPacketMessageAuthenticationFailure(tPtr,_path,packetId(),sourceAddress,hops(),"invalid MAC"); + _path->recordPacket(false); return true; } } @@ -89,6 +90,8 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) return true; } + _path->recordPacket(true); + const Packet::Verb v = verb(); switch(v) { //case Packet::VERB_NOP: @@ -446,7 +449,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP } if (!hops()) - _path->updateLatency((unsigned int)latency); + _path->updateLatency((unsigned int)latency, RR->node->now()); peer->setRemoteVersion(vProto,vMajor,vMinor,vRevision); diff --git a/node/Multicaster.cpp b/node/Multicaster.cpp index 753e4ee0..b2a5a205 100644 --- a/node/Multicaster.cpp +++ b/node/Multicaster.cpp @@ -190,7 +190,7 @@ void Multicaster::send( for(unsigned int i=0;i p(RR->topology->getPeerNoCache(multicastReplicators[i])); if ((p)&&(p->isAlive(now))) { - const SharedPtr pp(p->getBestPath(now,false)); + const SharedPtr pp(p->getAppropriatePath(now,false)); if ((pp)&&(pp->latency() < bestMulticastReplicatorLatency)) { bestMulticastReplicatorLatency = pp->latency(); bestMulticastReplicatorPath = pp; diff --git a/node/Node.cpp b/node/Node.cpp index db511430..71e5b6a7 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -234,7 +234,7 @@ public: } if ((!contacted)&&(_bestCurrentUpstream)) { - const SharedPtr up(_bestCurrentUpstream->getBestPath(_now,true)); + const SharedPtr up(_bestCurrentUpstream->getAppropriatePath(_now,true)); if (up) p->sendHELLO(_tPtr,up->localSocket(),up->address(),_now); } @@ -465,7 +465,7 @@ ZT_PeerList *Node::peers() const p->role = RR->topology->role(pi->second->identity().address()); std::vector< SharedPtr > paths(pi->second->paths(_now)); - SharedPtr bestp(pi->second->getBestPath(_now,false)); + SharedPtr bestp(pi->second->getAppropriatePath(_now,false)); p->pathCount = 0; for(std::vector< SharedPtr >::iterator path(paths.begin());path!=paths.end();++path) { ZT_FAST_MEMCPY(&(p->paths[p->pathCount].address),&((*path)->address()),sizeof(struct sockaddr_storage)); diff --git a/node/Node.hpp b/node/Node.hpp index 79284b63..4ad0e3cd 100644 --- a/node/Node.hpp +++ b/node/Node.hpp @@ -260,6 +260,9 @@ public: inline const Address &remoteTraceTarget() const { return _remoteTraceTarget; } inline Trace::Level remoteTraceLevel() const { return _remoteTraceLevel; } + inline void setMultipathMode(uint8_t mode) { _multipathMode = mode; } + inline uint8_t getMultipathMode() { return _multipathMode; } + private: RuntimeEnvironment _RR; RuntimeEnvironment *RR; @@ -284,6 +287,8 @@ private: Address _remoteTraceTarget; enum Trace::Level _remoteTraceLevel; + uint8_t _multipathMode; + volatile int64_t _now; int64_t _lastPingCheck; int64_t _lastHousekeepingRun; diff --git a/node/Path.hpp b/node/Path.hpp index e12328ff..5751d326 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -39,6 +39,9 @@ #include "SharedPtr.hpp" #include "AtomicCounter.hpp" #include "Utils.hpp" +#include "RingBuffer.hpp" + +#include "../osdep/Phy.hpp" /** * Maximum return value of preferenceRank() @@ -55,6 +58,7 @@ class RuntimeEnvironment; class Path { friend class SharedPtr; + Phy *_phy; public: /** @@ -93,22 +97,71 @@ public: _lastOut(0), _lastIn(0), _lastTrustEstablishedPacketReceived(0), + _lastPathQualityComputeTime(0), _localSocket(-1), _latency(0xffff), _addr(), - _ipScope(InetAddress::IP_SCOPE_NONE) + _ipScope(InetAddress::IP_SCOPE_NONE), + _currentPacketSampleCounter(0), + _meanPacketErrorRatio(0.0), + _meanLatency(0.0), + _lastLatencyUpdate(0), + _jitter(0.0), + _lastPathQualitySampleTime(0), + _lastComputedQuality(0.0), + _lastPathQualityEstimate(0), + _meanAge(0.0), + _meanThroughput(0.0), + _packetLossRatio(0) { + memset(_ifname, 0, sizeof(_ifname)); + memset(_addrString, 0, sizeof(_addrString)); + _throughputSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _ageSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _latencySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _errSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); } Path(const int64_t localSocket,const InetAddress &addr) : _lastOut(0), _lastIn(0), _lastTrustEstablishedPacketReceived(0), + _lastPathQualityComputeTime(0), _localSocket(localSocket), _latency(0xffff), _addr(addr), - _ipScope(addr.ipScope()) + _ipScope(addr.ipScope()), + _currentPacketSampleCounter(0), + _meanPacketErrorRatio(0.0), + _meanLatency(0.0), + _lastLatencyUpdate(0), + _jitter(0.0), + _lastPathQualitySampleTime(0), + _lastComputedQuality(0.0), + _lastPathQualityEstimate(0), + _meanAge(0.0), + _meanThroughput(0.0), + _packetLossRatio(0) + { + memset(_ifname, 0, sizeof(_ifname)); + memset(_addrString, 0, sizeof(_addrString)); + _throughputSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _ageSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _latencySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _errSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + } + + ~Path() { + delete _throughputSamples; + delete _ageSamples; + delete _latencySamples; + delete _errSamples; + + _throughputSamples = NULL; + _ageSamples = NULL; + _latencySamples = NULL; + _errSamples = NULL; } /** @@ -147,12 +200,17 @@ public: * * @param l Measured latency */ - inline void updateLatency(const unsigned int l) + inline void updateLatency(const unsigned int l, int64_t now) { unsigned int pl = _latency; - if (pl < 0xffff) + if (pl < 0xffff) { _latency = (pl + l) / 2; - else _latency = l; + } + else { + _latency = l; + } + _lastLatencyUpdate = now; + _latencySamples->push(l); } /** @@ -240,11 +298,180 @@ public: return (((age < (ZT_PATH_HEARTBEAT_PERIOD + 5000)) ? l : (l + 0xffff + age)) * (long)((ZT_INETADDRESS_MAX_SCOPE - _ipScope) + 1)); } + /** + * @return An estimate of path quality -- higher is better. + */ + inline float computeQuality(const int64_t now) + { + float latency_contrib = _meanLatency ? 1.0 / _meanLatency : 0; + float jitter_contrib = _jitter ? 1.0 / _jitter : 0; + float throughput_contrib = _meanThroughput ? _meanThroughput / 1000000 : 0; // in Mbps + float age_contrib = _meanAge > 0 ? (float)sqrt(_meanAge) : 1; + float error_contrib = 1.0 - _meanPacketErrorRatio; + float sum = (latency_contrib + jitter_contrib + throughput_contrib + error_contrib) / age_contrib; + _lastComputedQuality = sum * (long)((_ipScope) + 1); + return _lastComputedQuality; + } + + /** + * Since quality estimates can become expensive we should cache the most recent result for traffic allocation + * algorithms which may need to reference this value multiple times through the course of their execution. + */ + inline float lastComputedQuality() { + return _lastComputedQuality; + } + + /** + * @return A pointer to a cached copy of the human-readable name of the interface this Path's localSocket is bound to + */ + inline char *getName() { return _ifname; } + + /** + * @return Estimated throughput in bps of this link + */ + inline uint64_t getThroughput() { return _phy->getThroughput((PhySocket *)((uintptr_t)_localSocket)); } + + /** + * @return Packet delay varience + */ + inline float jitter() { return _jitter; } + + /** + * @return Previously-computed mean latency + */ + inline float meanLatency() { return _meanLatency; } + + /** + * @return Packet loss rate + */ + inline float packetLossRatio() { return _packetLossRatio; } + + /** + * @return Mean packet error ratio + */ + inline float meanPacketErrorRatio() { return _meanPacketErrorRatio; } + + /** + * @return Current packet error ratio (possibly incomplete sample set) + */ + inline float currentPacketErrorRatio() { + int errorsPerSample = 0; + for (int i=0; i<_currentPacketSampleCounter; i++) { + if (_packetValidity[i] == false) { + errorsPerSample++; + } + } + return (float)errorsPerSample / (float)ZT_PATH_ERROR_SAMPLE_WIN_SZ; + } + + /** + * @return Whether the Path's local socket is in a CLOSED state + */ + inline bool isClosed() { return _phy->isClosed((PhySocket *)((uintptr_t)_localSocket)); } + + /** + * @return The state of a Path's local socket + */ + inline int getState() { return _phy->getState((PhySocket *)((uintptr_t)_localSocket)); } + + /** + * @return Whether this socket may have been erased by the virtual physical link layer + */ + inline bool isValidState() { return _phy->isValidState((PhySocket *)((uintptr_t)_localSocket)); } + + /** + * @return Whether the path quality monitors have collected enough data to provide a quality value + * TODO: expand this + */ + inline bool monitorsReady() { + return _latencySamples->count() && _ageSamples->count() && _throughputSamples->count(); + } + + /** + * @return A pointer to a cached copy of the address string for this Path (For debugging only) + */ + inline char *getAddressString() { return _addrString; } + + /** + * Handle path sampling, computation of quality estimates, and other periodic tasks + * @param now Current time + */ + inline void measureLink(int64_t now) { + // Sample path properties and store them in a continuously-revolving buffer + if (now - _lastPathQualitySampleTime > ZT_PATH_QUALITY_SAMPLE_INTERVAL) { + _lastPathQualitySampleTime = now; + _throughputSamples->push(getThroughput()); // Thoughtput in bits/s + _ageSamples->push(now - _lastIn); // Age (time since last received packet) + if (now - _lastLatencyUpdate > ZT_PATH_LATENCY_SAMPLE_INTERVAL) { + _lastLatencyUpdate = now; + // Record 0 bp/s. Since we're using this to detect possible packet loss + updateLatency(0, now); + } + } + // Compute statistical values for use in link quality estimates + if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) { + _lastPathQualityComputeTime = now; + // Cache Path address string + address().toString(_addrString); + _phy->getIfName((PhySocket *)((uintptr_t)_localSocket), _ifname, ZT_PATH_INTERFACE_NAME_SZ); // Cache Interface name + // Derived values + if (_throughputSamples->count()) { + _packetLossRatio = (float)_throughputSamples->zeroCount() / (float)_throughputSamples->count(); + } + _meanThroughput = _throughputSamples->mean(); + _meanAge = _ageSamples->mean(); + _meanLatency = _latencySamples->mean(); + // Jitter + // SEE: RFC 3393, RFC 4689 + _jitter = _latencySamples->stddev(); + _meanPacketErrorRatio = _errSamples->mean(); // Packet Error Ratio (PER) + } + // Periodically compute a path quality estimate + if (now - _lastPathQualityEstimate > ZT_PATH_QUALITY_ESTIMATE_INTERVAL) { + computeQuality(now); + } + } + + /** + * Record whether a packet is considered invalid by MAC/compression/cipher checks. This + * could be an indication of a bit error. This function will keep a running counter of + * up to a given window size and with each counter overflow it will compute a mean error rate + * and store that in a continuously shifting sample window. + * + * @param isValid Whether the packet in question is considered invalid + */ + inline void recordPacket(bool isValid) { + if (_currentPacketSampleCounter < ZT_PATH_ERROR_SAMPLE_WIN_SZ) { + _packetValidity[_currentPacketSampleCounter] = isValid; + _currentPacketSampleCounter++; + } + else { + // Sample array is full, compute an mean and stick it in the ring buffer for trend analysis + _errSamples->push(currentPacketErrorRatio()); + _currentPacketSampleCounter=0; + } + } + + /** + * @return The mean age (in ms) of this link + */ + inline float meanAge() { return _meanAge; } + + /** + * @return The mean throughput (in bits/s) of this link + */ + inline float meanThroughput() { return _meanThroughput; } + /** * @return True if this path is alive (receiving heartbeats) */ inline bool alive(const int64_t now) const { return ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000)); } + /** + * @return True if this path hasn't received a packet in a "significant" amount of time + */ + inline bool stale(const int64_t now) const { return ((now - _lastIn) > ZT_LINK_SPEED_TEST_INTERVAL * 10); } + /** * @return True if this path needs a heartbeat */ @@ -269,11 +496,39 @@ private: volatile int64_t _lastOut; volatile int64_t _lastIn; volatile int64_t _lastTrustEstablishedPacketReceived; + volatile int64_t _lastPathQualityComputeTime; int64_t _localSocket; volatile unsigned int _latency; InetAddress _addr; InetAddress::IpScope _ipScope; // memoize this since it's a computed value checked often AtomicCounter __refCount; + + // Packet Error Ratio (PER) + int _packetValidity[ZT_PATH_ERROR_SAMPLE_WIN_SZ]; + int _currentPacketSampleCounter; + volatile float _meanPacketErrorRatio; + + // Latency and Jitter + volatile float _meanLatency; + int64_t _lastLatencyUpdate; + volatile float _jitter; + + int64_t _lastPathQualitySampleTime; + float _lastComputedQuality; + int64_t _lastPathQualityEstimate; + float _meanAge; + float _meanThroughput; + + // Circular buffers used to efficiently store large time series + RingBuffer *_throughputSamples; + RingBuffer *_latencySamples; + RingBuffer *_ageSamples; + RingBuffer *_errSamples; + + float _packetLossRatio; + + char _ifname[ZT_PATH_INTERFACE_NAME_SZ]; + char _addrString[256]; }; } // namespace ZeroTier diff --git a/node/Peer.cpp b/node/Peer.cpp index 71afd852..862a4529 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -35,6 +35,7 @@ #include "Packet.hpp" #include "Trace.hpp" #include "InetAddress.hpp" +#include "RingBuffer.hpp" namespace ZeroTier { @@ -59,10 +60,14 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _vRevision(0), _id(peerIdentity), _directPathPushCutoffCount(0), - _credentialsCutoffCount(0) + _credentialsCutoffCount(0), + _linkBalanceStatus(false), + _linkRedundancyStatus(false) { if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) throw ZT_EXCEPTION_INVALID_ARGUMENT; + _pathChoiceHist = new RingBuffer(ZT_MULTIPATH_PROPORTION_WIN_SZ); + _flowBalanceHist = new RingBuffer(ZT_MULTIPATH_PROPORTION_WIN_SZ); } void Peer::received( @@ -95,6 +100,18 @@ void Peer::received( path->trustedPacketReceived(now); } + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + if ((now - _lastPathPrune) > ZT_CLOSED_PATH_PRUNING_INTERVAL) { + _lastPathPrune = now; + prunePaths(); + } + for(unsigned int i=0;imeasureLink(now); + } + } + } + if (hops == 0) { // If this is a direct packet (no hops), update existing paths or learn new ones @@ -232,26 +249,246 @@ void Peer::received( } } -SharedPtr Peer::getBestPath(int64_t now,bool includeExpired) const +SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) { Mutex::Lock _l(_paths_m); - unsigned int bestPath = ZT_MAX_PEER_NETWORK_PATHS; - long bestPathQuality = 2147483647; + + /** + * Send traffic across the highest quality path only. This algorithm will still + * use the old path quality metric. + */ + if (RR->node->getMultipathMode() == ZT_MULTIPATH_NONE) { + long bestPathQuality = 2147483647; + for(unsigned int i=0;iisValidState()) { + if ((includeExpired)||((now - _paths[i].lr) < ZT_PEER_PATH_EXPIRATION)) { + const long q = _paths[i].p->quality(now) / _paths[i].priority; + if (q <= bestPathQuality) { + bestPathQuality = q; + bestPath = i; + } + } + } else break; + } + if (bestPath != ZT_MAX_PEER_NETWORK_PATHS) { + return _paths[bestPath].p; + } + return SharedPtr(); + } + + if ((now - _lastPathPrune) > ZT_CLOSED_PATH_PRUNING_INTERVAL) { + _lastPathPrune = now; + prunePaths(); + } for(unsigned int i=0;iquality(now) / _paths[i].priority; - if (q <= bestPathQuality) { - bestPathQuality = q; + _paths[i].p->measureLink(now); + } + } + + /** + * Randomly distribute traffic across all paths + * + * Behavior: + * - If path DOWN: Stop randomly choosing that path + * - If path UP: Start randomly choosing that path + * - If all paths are unresponsive: randomly choose from all paths + */ + int numAlivePaths = 0; + int numStalePaths = 0; + if (RR->node->getMultipathMode() == ZT_MULTIPATH_RANDOM) { + int alivePaths[ZT_MAX_PEER_NETWORK_PATHS]; + int stalePaths[ZT_MAX_PEER_NETWORK_PATHS]; + memset(&alivePaths, -1, sizeof(alivePaths)); + memset(&stalePaths, -1, sizeof(stalePaths)); + for(unsigned int i=0;iisValidState()) { + if (_paths[i].p->alive(now)) { + alivePaths[numAlivePaths] = i; + numAlivePaths++; + } + else { + stalePaths[numStalePaths] = i; + numStalePaths++; + } + } + } + } + unsigned int r; + Utils::getSecureRandom(&r, 1); + if (numAlivePaths > 0) { + // pick a random out of the set deemed "alive" + int rf = (float)(r %= numAlivePaths); + return _paths[alivePaths[rf]].p; + } + else if(numStalePaths > 0) { + // resort to trying any non-expired path + int rf = (float)(r %= numStalePaths); + return _paths[stalePaths[rf]].p; + } + } + + /** + * Proportionally allocate traffic according to dynamic path quality measurements + */ + if (RR->node->getMultipathMode() == ZT_MULTIPATH_PROPORTIONALLY_BALANCED) { + float relq[ZT_MAX_PEER_NETWORK_PATHS]; + memset(&relq, 0, sizeof(relq)); + float alloc[ZT_MAX_PEER_NETWORK_PATHS]; + memset(&alloc, 0, sizeof(alloc)); + + // Survey + // + // Take a survey of all available link qualities. We use this to determine if we + // can skip this algorithm altogether and if not, to establish baseline for physical + // link quality used in later calculations. + // + // We find the min/max quality of our currently-active links so + // that we can form a relative scale to rank each link proportionally + // to each other link. + uint16_t alivePaths[ZT_MAX_PEER_NETWORK_PATHS]; + uint16_t stalePaths[ZT_MAX_PEER_NETWORK_PATHS]; + memset(&alivePaths, -1, sizeof(alivePaths)); + memset(&stalePaths, -1, sizeof(stalePaths)); + uint16_t numAlivePaths = 0; + uint16_t numStalePaths = 0; + float minQuality = 10000; + float maxQuality = -1; + float currQuality; + for(uint16_t i=0;iisValidState()) { + if (!_paths[i].p->monitorsReady()) { + // TODO: This should fix itself anyway but we should test whether forcing the use of a new path will + // aid in establishing flow balance more quickly. + } + // Compute quality here, going forward we will use lastComputedQuality() + currQuality = _paths[i].p->computeQuality(now); + if (!_paths[i].p->stale(now)) { + alivePaths[i] = currQuality; + numAlivePaths++; + } + else { + stalePaths[i] = currQuality; + numStalePaths++; + } + if (currQuality > maxQuality) { + maxQuality = currQuality; bestPath = i; } + if (currQuality < minQuality) { + minQuality = currQuality; + } + relq[i] = currQuality; } - } else break; + } + + // Attempt to find an excuse not to use the rest of this algorithm + if (bestPath == ZT_MAX_PEER_NETWORK_PATHS || (numAlivePaths == 0 && numStalePaths == 0)) { + return SharedPtr(); + } if (numAlivePaths == 1) { + return _paths[bestPath].p; + } if (numStalePaths == 1) { + return _paths[bestPath].p; + } + + // Relative quality + // + // The strongest link will have a value of 1.0 whereas every other + // link will have a value which represents some fraction of the strongest link. + float totalRelativeQuality = 0; + for(unsigned int i=0;iisValidState()) { + relq[i] /= maxQuality ? maxQuality : 1; + totalRelativeQuality += relq[i]; + } + } + + // Convert the relative quality values into flow allocations. + // Additionally, determine whether each path in the flow is + // contributing more or less than its target allocation. If + // it is contributing more than required, don't allow it to be + // randomly selected for the next packet. If however the path + // needs to contribute more to the flow, we should record + float imbalance = 0; + float qualityScalingFactor = 1.0 / totalRelativeQuality; + for(uint16_t i=0;icountValue((float)i); + // Compute traffic allocation for each path in the flow + if (_paths[i].p && _paths[i].p->isValidState()) { + // Allocation + // This is the percentage of traffic we want to send over a given path + alloc[i] = relq[i] * qualityScalingFactor; + float currProportion = numPktSentWithinWin / (float)ZT_MULTIPATH_PROPORTION_WIN_SZ; + float targetProportion = alloc[i]; + float diffProportion = currProportion - targetProportion; + // Imbalance + // + // This is the sum of the distances of each path's currently observed flow contributions + // from its most recent target allocation. In other words, this is a measure of how closely we + // are adhering to our desired allocations. It is worth noting that this value can be greater + // than 1.0 if a significant change to allocations is made by the algorithm, this will + // eventually correct itself. + imbalance += fabs(diffProportion); + if (diffProportion < 0) { + alloc[i] = targetProportion; + } + else { + alloc[i] = targetProportion; + } + } + } + + // Compute and record current flow balance + float balance = 1.0 - imbalance; + if (balance >= ZT_MULTIPATH_FLOW_BALANCE_THESHOLD) { + if (!_linkBalanceStatus) { + _linkBalanceStatus = true; + RR->t->peerLinkBalanced(NULL,0,*this); + } + } + else { + if (_linkBalanceStatus) { + _linkBalanceStatus = false; + RR->t->peerLinkImbalanced(NULL,0,*this); + } + } + + // Record the current flow balance. Later used for computing a mean flow balance value. + _flowBalanceHist->push(balance); + + // Randomly choose path from allocated candidates + unsigned int r; + Utils::getSecureRandom(&r, 1); + float rf = (float)(r %= 100) / 100; + for(int i=0;iisValidState() && _paths[i].p->address().isV4()) { + if (alloc[i] > 0 && rf < alloc[i]) { + bestPath = i; + _pathChoiceHist->push(bestPath); // Record which path we chose + break; + } + if (alloc[i] > 0) { + rf -= alloc[i]; + } + else { + rf -= alloc[i]*-1; + } + } + } + if (bestPath < ZT_MAX_PEER_NETWORK_PATHS) { + return _paths[bestPath].p; + } + return SharedPtr(); + } + + // Adhere to a user-defined interface/allocation scheme + if (RR->node->getMultipathMode() == ZT_MULTIPATH_MANUALLY_BALANCED) { + // TODO } - if (bestPath != ZT_MAX_PEER_NETWORK_PATHS) - return _paths[bestPath].p; return SharedPtr(); } @@ -477,16 +714,34 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) } } else break; } - while(j < ZT_MAX_PEER_NETWORK_PATHS) { - _paths[j].lr = 0; - _paths[j].p.zero(); - _paths[j].priority = 1; - ++j; + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + while(j < ZT_MAX_PEER_NETWORK_PATHS) { + _paths[j].lr = 0; + _paths[j].p.zero(); + _paths[j].priority = 1; + ++j; + } } - return sent; } +unsigned int Peer::prunePaths() +{ + Mutex::Lock _l(_paths_m); + unsigned int pruned = 0; + for(unsigned int i=0;iisClosed() || !_paths[i].p->isValidState()) { + _paths[i].lr = 0; + _paths[i].p.zero(); + _paths[i].priority = 1; + pruned++; + } + } + } + return pruned; +} + void Peer::clusterRedirect(void *tPtr,const SharedPtr &originatingPath,const InetAddress &remoteAddress,const int64_t now) { SharedPtr np(RR->topology->getPath(originatingPath->localSocket(),remoteAddress)); diff --git a/node/Peer.hpp b/node/Peer.hpp index b6f3c695..9873729b 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -65,7 +65,13 @@ private: Peer() {} // disabled to prevent bugs -- should not be constructed uninitialized public: - ~Peer() { Utils::burn(_key,sizeof(_key)); } + ~Peer() { + Utils::burn(_key,sizeof(_key)); + delete _pathChoiceHist; + delete _flowBalanceHist; + _pathChoiceHist = NULL; + _flowBalanceHist = NULL; + } /** * Construct a new peer @@ -145,20 +151,20 @@ public: */ inline bool sendDirect(void *tPtr,const void *data,unsigned int len,int64_t now,bool force) { - SharedPtr bp(getBestPath(now,force)); + SharedPtr bp(getAppropriatePath(now,force)); if (bp) return bp->send(RR,tPtr,data,len,now); return false; } /** - * Get the best current direct path + * Get the most appropriate direct path based on current multipath configuration * * @param now Current time * @param includeExpired If true, include even expired paths * @return Best current path or NULL if none */ - SharedPtr getBestPath(int64_t now,bool includeExpired) const; + SharedPtr getAppropriatePath(int64_t now, bool includeExpired); /** * Send VERB_RENDEZVOUS to this and another peer via the best common IP scope and path @@ -212,6 +218,16 @@ public: */ unsigned int doPingAndKeepalive(void *tPtr,int64_t now); + /** + * Clear paths whose localSocket(s) are in a CLOSED state or have an otherwise INVALID state. + * This should be called frequently so that we can detect and remove unproductive or invalid paths. + * + * Under the hood this is done periodically based on ZT_CLOSED_PATH_PRUNING_INTERVAL. + * + * @return Number of paths that were pruned this round + */ + unsigned int prunePaths(); + /** * Process a cluster redirect sent by this peer * @@ -270,9 +286,9 @@ public: /** * @return Latency in milliseconds of best path or 0xffff if unknown / no paths */ - inline unsigned int latency(const int64_t now) const + inline unsigned int latency(const int64_t now) { - SharedPtr bp(getBestPath(now,false)); + SharedPtr bp(getAppropriatePath(now,false)); if (bp) return bp->latency(); return 0xffff; @@ -289,7 +305,7 @@ public: * * @return Relay quality score computed from latency and other factors, lower is better */ - inline unsigned int relayQuality(const int64_t now) const + inline unsigned int relayQuality(const int64_t now) { const uint64_t tsr = now - _lastReceive; if (tsr >= ZT_PEER_ACTIVITY_TIMEOUT) @@ -515,6 +531,7 @@ private: int64_t _lastCredentialsReceived; int64_t _lastTrustEstablishedPacketReceived; int64_t _lastSentFullHello; + int64_t _lastPathPrune; uint16_t _vProto; uint16_t _vMajor; @@ -530,6 +547,13 @@ private: unsigned int _credentialsCutoffCount; AtomicCounter __refCount; + + RingBuffer *_pathChoiceHist; + RingBuffer *_flowBalanceHist; + + bool _linkBalanceStatus; + bool _linkRedundancyStatus; + }; } // namespace ZeroTier diff --git a/node/RingBuffer.hpp b/node/RingBuffer.hpp new file mode 100644 index 00000000..62f1cf47 --- /dev/null +++ b/node/RingBuffer.hpp @@ -0,0 +1,315 @@ +/* + * ZeroTier One - Network Virtualization Everywhere + * Copyright (C) 2011-2018 ZeroTier, Inc. https://www.zerotier.com/ + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * -- + * + * You can be released from the requirements of the license by purchasing + * a commercial license. Buying such a license is mandatory as soon as you + * develop commercial closed-source software that incorporates or links + * directly against ZeroTier software without disclosing the source code + * of your own application. + */ + +#ifndef ZT_RINGBUFFER_H +#define ZT_RINGBUFFER_H + +#include +#include +#include +#include +#include +#include + +namespace ZeroTier { + +/** + * A revolving (ring) buffer. + * + * For fast handling of continuously-evolving variables (such as path quality metrics). + * Using this, we can maintain longer sliding historical windows for important path + * metrics without the need for potentially expensive calls to memcpy/memmove. + * + * Some basic statistical functionality is implemented here in an attempt + * to reduce the complexity of code needed to interact with this type of buffer. + */ + +template +class RingBuffer +{ +private: + T * buf; + size_t size; + size_t begin; + size_t end; + bool wrap; + +public: + + /** + * create a RingBuffer with space for up to size elements. + */ + explicit RingBuffer(size_t size) + : size(size), + begin(0), + end(0), + wrap(false) + { + buf = new T[size]; + memset(buf, 0, sizeof(T) * size); + } + + /** + * @return A pointer to the underlying buffer + */ + T* get_buf() + { + return buf + begin; + } + + /** + * Adjust buffer index pointer as if we copied data in + * @param n Number of elements to copy in + * @return Number of elements we copied in + */ + size_t produce(size_t n) + { + n = std::min(n, getFree()); + if (n == 0) { + return n; + } + const size_t first_chunk = std::min(n, size - end); + end = (end + first_chunk) % size; + if (first_chunk < n) { + const size_t second_chunk = n - first_chunk; + end = (end + second_chunk) % size; + } + if (begin == end) { + wrap = true; + } + return n; + } + + /** + * Fast erase, O(1). + * Merely reset the buffer pointer, doesn't erase contents + */ + void reset() + { + consume(count()); + } + + /** + * adjust buffer index pointer as if we copied data out + * @param n Number of elements we copied from the buffer + * @return Number of elements actually available from the buffer + */ + size_t consume(size_t n) + { + n = std::min(n, count()); + if (n == 0) { + return n; + } + if (wrap) { + wrap = false; + } + const size_t first_chunk = std::min(n, size - begin); + begin = (begin + first_chunk) % size; + if (first_chunk < n) { + const size_t second_chunk = n - first_chunk; + begin = (begin + second_chunk) % size; + } + return n; + } + + /** + * @param data Buffer that is to be written to the ring + * @param n Number of elements to write to the buffer + */ + size_t write(const T * data, size_t n) + { + n = std::min(n, getFree()); + if (n == 0) { + return n; + } + const size_t first_chunk = std::min(n, size - end); + memcpy(buf + end, data, first_chunk * sizeof(T)); + end = (end + first_chunk) % size; + if (first_chunk < n) { + const size_t second_chunk = n - first_chunk; + memcpy(buf + end, data + first_chunk, second_chunk * sizeof(T)); + end = (end + second_chunk) % size; + } + if (begin == end) { + wrap = true; + } + return n; + } + + /** + * Place a single value on the buffer. If the buffer is full, consume a value first. + * + * @param value A single value to be placed in the buffer + */ + void push(const T value) + { + if (count() == size) { + consume(1); + } + write(&value, 1); + } + + /** + * @param dest Destination buffer + * @param n Size (in terms of number of elements) of the destination buffer + * @return Number of elements read from the buffer + */ + size_t read(T * dest, size_t n) + { + n = std::min(n, count()); + if (n == 0) { + return n; + } + if (wrap) { + wrap = false; + } + const size_t first_chunk = std::min(n, size - begin); + memcpy(dest, buf + begin, first_chunk * sizeof(T)); + begin = (begin + first_chunk) % size; + if (first_chunk < n) { + const size_t second_chunk = n - first_chunk; + memcpy(dest + first_chunk, buf + begin, second_chunk * sizeof(T)); + begin = (begin + second_chunk) % size; + } + return n; + } + + /** + * Return how many elements are in the buffer, O(1). + * + * @return The number of elements in the buffer + */ + size_t count() + { + if (end == begin) { + return wrap ? size : 0; + } + else if (end > begin) { + return end - begin; + } + else { + return size + end - begin; + } + } + + /** + * @return The number of slots that are unused in the buffer + */ + size_t getFree() + { + return size - count(); + } + + /** + * @return The arithmetic mean of the contents of the buffer + */ + T mean() + { + size_t iterator = begin; + T mean = 0; + for (int i=0; i peer(RR->topology->getPeer(tPtr,destination)); if (peer) { - viaPath = peer->getBestPath(now,false); + viaPath = peer->getAppropriatePath(now,false); if (!viaPath) { peer->tryMemorizedPath(tPtr,now); // periodically attempt memorized or statically defined paths, if any are known const SharedPtr relay(RR->topology->getUpstreamPeer()); - if ( (!relay) || (!(viaPath = relay->getBestPath(now,false))) ) { - if (!(viaPath = peer->getBestPath(now,true))) + if ( (!relay) || (!(viaPath = relay->getAppropriatePath(now,false))) ) { + if (!(viaPath = peer->getAppropriatePath(now,true))) return false; } } diff --git a/node/Topology.hpp b/node/Topology.hpp index 63946a32..5d726007 100644 --- a/node/Topology.hpp +++ b/node/Topology.hpp @@ -299,7 +299,7 @@ public: Address *a = (Address *)0; SharedPtr *p = (SharedPtr *)0; while (i.next(a,p)) { - const SharedPtr pp((*p)->getBestPath(now,false)); + const SharedPtr pp((*p)->getAppropriatePath(now,false)); if (pp) ++cnt; } diff --git a/node/Trace.cpp b/node/Trace.cpp index 386edaac..01a8da55 100644 --- a/node/Trace.cpp +++ b/node/Trace.cpp @@ -106,6 +106,26 @@ void Trace::peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId, } } +void Trace::peerLinkNowRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath) +{ + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is fully redundant",peer.address().toInt(),networkId); +} + +void Trace::peerLinkNoLongerRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath) +{ + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is no longer redundant",peer.address().toInt(),networkId); +} + +void Trace::peerLinkBalanced(void *const tPtr,const uint64_t networkId,Peer &peer) +{ + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is balanced",peer.address().toInt(),networkId); +} + +void Trace::peerLinkImbalanced(void *const tPtr,const uint64_t networkId,Peer &peer) +{ + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is unbalanced",peer.address().toInt(),networkId); +} + void Trace::peerLearnedNewPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath,const uint64_t packetId) { char tmp[128]; diff --git a/node/Trace.hpp b/node/Trace.hpp index 2a2fca6c..b01163d6 100644 --- a/node/Trace.hpp +++ b/node/Trace.hpp @@ -121,6 +121,12 @@ public: void resettingPathsInScope(void *const tPtr,const Address &reporter,const InetAddress &reporterPhysicalAddress,const InetAddress &myPhysicalAddress,const InetAddress::IpScope scope); void peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &path,const uint64_t packetId,const Packet::Verb verb); + + void peerLinkNowRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath); + void peerLinkNoLongerRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath); + void peerLinkBalanced(void *const tPtr,const uint64_t networkId,Peer &peer); + void peerLinkImbalanced(void *const tPtr,const uint64_t networkId,Peer &peer); + void peerLearnedNewPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath,const uint64_t packetId); void peerRedirected(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath); diff --git a/osdep/Binder.hpp b/osdep/Binder.hpp index 93fad9f1..6e13836c 100644 --- a/osdep/Binder.hpp +++ b/osdep/Binder.hpp @@ -388,6 +388,7 @@ public: _bindings[_bindingCount].udpSock = udps; _bindings[_bindingCount].tcpListenSock = tcps; _bindings[_bindingCount].address = ii->first; + phy.setIfName(udps, (char*)ii->second.c_str(), ii->second.length()); ++_bindingCount; } } else { @@ -455,6 +456,20 @@ public: return false; } + /** + * Get a list of socket pointers for all bindings. + * + * @return A list of socket pointers for current bindings + */ + inline std::vector getBoundSockets() + { + std::vector sockets; + for (int i=0; i _bindingCount; diff --git a/osdep/Phy.hpp b/osdep/Phy.hpp index e359ccdd..d4f68681 100644 --- a/osdep/Phy.hpp +++ b/osdep/Phy.hpp @@ -27,6 +27,8 @@ #ifndef ZT_PHY_HPP #define ZT_PHY_HPP +#include "../osdep/OSUtils.hpp" + #include #include #include @@ -86,6 +88,22 @@ namespace ZeroTier { */ typedef void PhySocket; +struct link_test_record +{ + link_test_record(PhySocket *_s, uint64_t _id, uint64_t _egress_time, uint32_t _length) : + s(_s), + id(_id), + egress_time(_egress_time), + length(_length) + { + // + } + PhySocket *s; + uint64_t id; + uint64_t egress_time; + uint32_t length; +}; + /** * Simple templated non-blocking sockets implementation * @@ -154,10 +172,17 @@ private: struct PhySocketImpl { + PhySocketImpl() : + throughput(0) + { + memset(ifname, 0, sizeof(ifname)); + } PhySocketType type; ZT_PHY_SOCKFD_TYPE sock; void *uptr; // user-settable pointer ZT_PHY_SOCKADDR_STORAGE_TYPE saddr; // remote for TCP_OUT and TCP_IN, local for TCP_LISTEN, RAW, and UDP + char ifname[16]; + uint64_t throughput; }; std::list _socks; @@ -173,6 +198,7 @@ private: bool _noDelay; bool _noCheck; + std::vector link_test_records; public: /** @@ -249,6 +275,173 @@ public: */ static inline void** getuptr(PhySocket *s) throw() { return &(reinterpret_cast(s)->uptr); } + /** + * @param s Socket object + * @param nameBuf Buffer to store name of interface which this Socket object is bound to + * @param buflen Length of buffer to copy name into + */ + static inline void getIfName(PhySocket *s, char *nameBuf, int buflen) + { + memcpy(nameBuf, reinterpret_cast(s)->ifname, buflen); + } + + /** + * @param s Socket object + * @param ifname Buffer containing name of interface that this Socket object is bound to + * @param len Length of name of interface + */ + static inline void setIfName(PhySocket *s, char *ifname, int len) + { + memcpy(&(reinterpret_cast(s)->ifname), ifname, len); + } + + /** + * Get result of most recent throughput test + * + * @param s Socket object + */ + inline uint64_t getThroughput(PhySocket *s) + { + PhySocketImpl *sws = (reinterpret_cast(s)); + return sws ? sws->throughput : 0; + } + + /** + * Whether or not the socket object is in a closed state + * + * @param s Socket object + * @return true if socket is closed, false if otherwise + */ + inline bool isClosed(PhySocket *s) + { + PhySocketImpl *sws = (reinterpret_cast(s)); + return sws->type == ZT_PHY_SOCKET_CLOSED; + } + + /** + * Get state of socket object + * + * @param s Socket object + * @return State of socket + */ + inline int getState(PhySocket *s) + { + PhySocketImpl *sws = (reinterpret_cast(s)); + return sws->type; + } + + /** + * In the event that this socket is erased, we need a way to convey to the multipath logic + * that this path is no longer valid. + * + * @param s Socket object + * @return Whether the state of this socket is within an acceptable range of values + */ + inline bool isValidState(PhySocket *s) + { + PhySocketImpl *sws = (reinterpret_cast(s)); + return sws->type >= ZT_PHY_SOCKET_CLOSED && sws->type <= ZT_PHY_SOCKET_UNIX_LISTEN; + } + + /** + * Send a datagram of a known size to a selected peer and record egress time. The peer + * shall eventually respond by echoing back a smaller datagram. + * + * @param s Socket object + * @param remoteAddress Address of remote peer to receive link test packet + * @param data Buffer containing random packet data + * @param len Length of packet data buffer + * @return Number of bytes successfully written to socket + */ + inline int test_link_speed(PhySocket *s, const struct sockaddr *to, void *data, uint32_t len) { + if (!reinterpret_cast(s)) { + return 0; + } + uint64_t *buf = (uint64_t*)data; + uint64_t id = buf[0]; + if (to->sa_family != AF_INET && to->sa_family != AF_INET6) { + return 0; + } + uint64_t egress_time = OSUtils::now(); + PhySocketImpl *sws = (reinterpret_cast(s)); +#if defined(_WIN32) || defined(_WIN64) + return ((long)::sendto(sws->sock,reinterpret_cast(data),len,0,to,(to->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)) == (long)len); +#else + int w = ::sendto(sws->sock,data,len,0,to,(to->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); +#endif + if (w > 0) { + link_test_records.push_back(new link_test_record(s, id, egress_time, len)); + } + return w; + } + + /** + * Remove link speed test records which have timed-out and record a 0 bits/s measurement + */ + inline void refresh_link_speed_records() + { + for(int i=0;iegress_time > ZT_LINK_TEST_TIMEOUT) { + PhySocketImpl *sws = (reinterpret_cast(link_test_records[i]->s)); + if (sws) { + sws->throughput = 0; + } + link_test_records.erase(link_test_records.begin() + i); + } + } + } + + /** + * Upon receipt of a link speed test datagram we echo back only the identification portion + * + * @param s Socket object + * @param from Address of remote peer that sent this datagram + * @param data Buffer containing datagram's contents + * @param len Length of datagram + * @return Number of bytes successfully written to socket in response + */ + inline int respond_to_link_test(PhySocket *s,const struct sockaddr *from,void *data,unsigned long len) { + PhySocketImpl *sws = (reinterpret_cast(s)); + uint64_t *id = (uint64_t*)data; +#if defined(_WIN32) || defined(_WIN64) + return ((long)::sendto(sws->sock,reinterpret_cast(data),len,0,from,(from->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)) == (long)len); +#else + int w = ::sendto(sws->sock,id,sizeof(id[0]),0,from,(from->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); +#endif + return w; + } + + /** + * Upon receipt of a response to our original link test datagram, correlate this new datagram with the record + * of the one we sent. Compute the transit time and update the throughput field of the relevant socket. This + * value will later be read by the path quality estimation logic located in Path.hpp. + * + * @param s Socket object + * @param from Address of remote peer that sent this datagram + * @param data Buffer containing datagram contents (ID of original link test datagram) + * @param len Length of datagram + * @return true if datagram correponded to previous record, false if otherwise + */ + inline bool handle_link_test_response(PhySocket *s,const struct sockaddr *from,void *data,unsigned long len) { + uint64_t *id = (uint64_t*)data; + for(int i=0;iid == id[0]) { + float rtt = (OSUtils::now()-link_test_records[i]->egress_time) / (float)1000; // s + uint32_t sz = (link_test_records[i]->length) * 8; // bits + float transit_time = rtt / 2.0; + int64_t raw = sz / transit_time; + PhySocketImpl *sws = (reinterpret_cast(s)); + if (sws) { + sws->throughput = raw; + } + delete link_test_records[i]; + link_test_records.erase(link_test_records.begin() + i); + return true; + } + } + return false; + } + /** * Cause poll() to stop waiting immediately * @@ -985,7 +1178,7 @@ public: ZT_PHY_SOCKFD_TYPE sock = s->sock; // if closed, s->sock becomes invalid as s is no longer dereferencable if ((FD_ISSET(sock,&wfds))&&(FD_ISSET(sock,&_writefds))) { try { - _handler->phyOnUnixWritable((PhySocket *)&(*s),&(s->uptr),false); + _handler->phyOnUnixWritable((PhySocket *)&(*s),&(s->uptr)); } catch ( ... ) {} } if (FD_ISSET(sock,&rfds)) { diff --git a/service/OneService.cpp b/service/OneService.cpp index 80b42818..24456f71 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -37,6 +37,7 @@ #include "../version.h" #include "../include/ZeroTierOne.h" +#include "../include/ZeroTierDebug.h" #include "../node/Constants.hpp" #include "../node/Mutex.hpp" @@ -417,6 +418,7 @@ public: PhySocket *_localControlSocket6; bool _updateAutoApply; bool _allowTcpFallbackRelay; + unsigned int _multipathMode; unsigned int _primaryPort; volatile unsigned int _udpPortPickerCounter; @@ -455,6 +457,9 @@ public: // Last potential sleep/wake event uint64_t _lastRestart; + // Last time link throughput was tested + uint64_t _lastLinkSpeedTest; + // Deadline for the next background task service function volatile int64_t _nextBackgroundTaskDeadline; @@ -818,6 +823,7 @@ public: _lastRestart = clockShouldBe; int64_t lastTapMulticastGroupCheck = 0; int64_t lastBindRefresh = 0; + int64_t lastMultipathModeUpdate = 0; int64_t lastUpdateCheck = clockShouldBe; int64_t lastCleanedPeersDb = 0; int64_t lastLocalInterfaceAddressCheck = (clockShouldBe - ZT_LOCAL_INTERFACE_CHECK_INTERVAL) + 15000; // do this in 15s to give portmapper time to configure and other things time to settle @@ -849,8 +855,9 @@ public: _updater->apply(); } - // Refresh bindings in case device's interfaces have changed, and also sync routes to update any shadow routes (e.g. shadow default) - if (((now - lastBindRefresh) >= ZT_BINDER_REFRESH_PERIOD)||(restarted)) { + // Refresh bindings + int interfaceRefreshPeriod = _multipathMode ? ZT_MULTIPATH_BINDER_REFRESH_PERIOD : ZT_BINDER_REFRESH_PERIOD; + if (((now - lastBindRefresh) >= interfaceRefreshPeriod)||(restarted)) { lastBindRefresh = now; unsigned int p[3]; unsigned int pc = 0; @@ -867,6 +874,34 @@ public: } } } + // Update multipath mode (if needed) + if (((now - lastMultipathModeUpdate) >= interfaceRefreshPeriod)||(restarted)) { + lastMultipathModeUpdate = now; + _node->setMultipathMode(_multipathMode); + } + + // Test link speeds + // TODO: This logic should eventually find its way into the core or as part of a passive + // measure within the protocol. + if (_multipathMode && ((now - _lastLinkSpeedTest) >= ZT_LINK_SPEED_TEST_INTERVAL)) { + _phy.refresh_link_speed_records(); + _lastLinkSpeedTest = now; + // Generate random data to fill UDP packet + uint64_t pktBuf[ZT_LINK_TEST_DATAGRAM_SZ / sizeof(uint64_t)]; + Utils::getSecureRandom(pktBuf, ZT_LINK_TEST_DATAGRAM_SZ); + ZT_PeerList *pl = _node->peers(); + // get bindings (specifically just the sockets) + std::vector sockets = _binder.getBoundSockets(); + // interfaces + for (int i=0; ipeerCount;++j) { + for (int k=0; k<(ZT_MAX_PEER_NETWORK_PATHS/4); k++) { + Utils::getSecureRandom(pktBuf, 8); // generate one random integer for unique id + _phy.test_link_speed(sockets[i], (struct sockaddr*)&(pl->peers[j].paths[k].address), pktBuf, ZT_LINK_TEST_DATAGRAM_SZ); + } + } + } + } // Run background task processor in core if it's time to do so int64_t dl = _nextBackgroundTaskDeadline; @@ -1190,6 +1225,7 @@ public: json &settings = res["config"]["settings"]; settings["primaryPort"] = OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff; settings["allowTcpFallbackRelay"] = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],_allowTcpFallbackRelay); + settings["multipathMode"] = OSUtils::jsonInt(settings["multipathMode"],_multipathMode); #ifdef ZT_USE_MINIUPNPC settings["portMappingEnabled"] = OSUtils::jsonBool(settings["portMappingEnabled"],true); #else @@ -1518,6 +1554,11 @@ public: _primaryPort = (unsigned int)OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff; _allowTcpFallbackRelay = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],true); + _multipathMode = OSUtils::jsonInt(settings["multipathMode"],0); + if (_multipathMode != 0 && _allowTcpFallbackRelay) { + fprintf(stderr,"WARNING: multipathMode cannot be used with allowTcpFallbackRelay. Disabling allowTcpFallbackRelay"); + _allowTcpFallbackRelay = false; + } _portMappingEnabled = OSUtils::jsonBool(settings["portMappingEnabled"],true); json &ignoreIfs = settings["interfacePrefixBlacklist"]; @@ -1758,6 +1799,15 @@ public: inline void phyOnDatagram(PhySocket *sock,void **uptr,const struct sockaddr *localAddr,const struct sockaddr *from,void *data,unsigned long len) { + if (_multipathMode) { + // Handle link test packets (should eventually be moved into the protocol itself) + if (len == ZT_LINK_TEST_DATAGRAM_SZ) { + _phy.respond_to_link_test(sock, from, data, len); + } + if (len == ZT_LINK_TEST_DATAGRAM_RESPONSE_SZ) { + _phy.handle_link_test_response(sock, from, data, len); + } + } if ((len >= 16)&&(reinterpret_cast(from)->ipScope() == InetAddress::IP_SCOPE_GLOBAL)) _lastDirectReceiveFromGlobal = OSUtils::now(); const ZT_ResultCode rc = _node->processWirePacket( @@ -2007,7 +2057,7 @@ public: inline void phyOnUnixAccept(PhySocket *sockL,PhySocket *sockN,void **uptrL,void **uptrN) {} inline void phyOnUnixClose(PhySocket *sock,void **uptr) {} inline void phyOnUnixData(PhySocket *sock,void **uptr,void *data,unsigned long len) {} - inline void phyOnUnixWritable(PhySocket *sock,void **uptr,bool lwip_invoked) {} + inline void phyOnUnixWritable(PhySocket *sock,void **uptr) {} inline int nodeVirtualNetworkConfigFunction(uint64_t nwid,void **nuptr,enum ZT_VirtualNetworkConfigOperation op,const ZT_VirtualNetworkConfig *nwc) { -- cgit v1.2.3 From 1debe2292d85e2d377064f74246244ac607046bf Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Wed, 2 May 2018 11:22:07 -0700 Subject: Cleanup. Misc type conversion and signedness fixes --- node/Path.hpp | 6 +++--- node/Peer.cpp | 12 +++++------- node/RingBuffer.hpp | 28 ++++++++++++++-------------- osdep/Phy.hpp | 12 ++++++------ service/OneService.cpp | 5 ++--- 5 files changed, 30 insertions(+), 33 deletions(-) (limited to 'service') diff --git a/node/Path.hpp b/node/Path.hpp index 5751d326..7ce6e0f1 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -303,11 +303,11 @@ public: */ inline float computeQuality(const int64_t now) { - float latency_contrib = _meanLatency ? 1.0 / _meanLatency : 0; - float jitter_contrib = _jitter ? 1.0 / _jitter : 0; + float latency_contrib = _meanLatency ? (float)1.0 / _meanLatency : 0; + float jitter_contrib = _jitter ? (float)1.0 / _jitter : 0; float throughput_contrib = _meanThroughput ? _meanThroughput / 1000000 : 0; // in Mbps float age_contrib = _meanAge > 0 ? (float)sqrt(_meanAge) : 1; - float error_contrib = 1.0 - _meanPacketErrorRatio; + float error_contrib = (float)1.0 - _meanPacketErrorRatio; float sum = (latency_contrib + jitter_contrib + throughput_contrib + error_contrib) / age_contrib; _lastComputedQuality = sum * (long)((_ipScope) + 1); return _lastComputedQuality; diff --git a/node/Peer.cpp b/node/Peer.cpp index 862a4529..bbc6d6d2 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -320,12 +320,12 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) Utils::getSecureRandom(&r, 1); if (numAlivePaths > 0) { // pick a random out of the set deemed "alive" - int rf = (float)(r %= numAlivePaths); + int rf = r % numAlivePaths; return _paths[alivePaths[rf]].p; } else if(numStalePaths > 0) { // resort to trying any non-expired path - int rf = (float)(r %= numStalePaths); + int rf = r % numStalePaths; return _paths[stalePaths[rf]].p; } } @@ -366,11 +366,9 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) // Compute quality here, going forward we will use lastComputedQuality() currQuality = _paths[i].p->computeQuality(now); if (!_paths[i].p->stale(now)) { - alivePaths[i] = currQuality; numAlivePaths++; } else { - stalePaths[i] = currQuality; numStalePaths++; } if (currQuality > maxQuality) { @@ -412,10 +410,10 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) // randomly selected for the next packet. If however the path // needs to contribute more to the flow, we should record float imbalance = 0; - float qualityScalingFactor = 1.0 / totalRelativeQuality; + float qualityScalingFactor = (float)1.0 / totalRelativeQuality; for(uint16_t i=0;icountValue((float)i); + int numPktSentWithinWin = (int)_pathChoiceHist->countValue(i); // Compute traffic allocation for each path in the flow if (_paths[i].p && _paths[i].p->isValidState()) { // Allocation @@ -442,7 +440,7 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) } // Compute and record current flow balance - float balance = 1.0 - imbalance; + float balance = (float)1.0 - imbalance; if (balance >= ZT_MULTIPATH_FLOW_BALANCE_THESHOLD) { if (!_linkBalanceStatus) { _linkBalanceStatus = true; diff --git a/node/RingBuffer.hpp b/node/RingBuffer.hpp index 62f1cf47..cd384749 100644 --- a/node/RingBuffer.hpp +++ b/node/RingBuffer.hpp @@ -226,34 +226,34 @@ public: /** * @return The arithmetic mean of the contents of the buffer */ - T mean() + float mean() { size_t iterator = begin; - T mean = 0; - for (int i=0; i(s)); #if defined(_WIN32) || defined(_WIN64) - return ((long)::sendto(sws->sock,reinterpret_cast(data),len,0,to,(to->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)) == (long)len); + int w = ::sendto(sws->sock,reinterpret_cast(data),len,0,to,(to->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)) #else int w = ::sendto(sws->sock,data,len,0,to,(to->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); #endif @@ -380,7 +380,7 @@ public: */ inline void refresh_link_speed_records() { - for(int i=0;iegress_time > ZT_LINK_TEST_TIMEOUT) { PhySocketImpl *sws = (reinterpret_cast(link_test_records[i]->s)); if (sws) { @@ -404,7 +404,7 @@ public: PhySocketImpl *sws = (reinterpret_cast(s)); uint64_t *id = (uint64_t*)data; #if defined(_WIN32) || defined(_WIN64) - return ((long)::sendto(sws->sock,reinterpret_cast(data),len,0,from,(from->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)) == (long)len); + int w = ::sendto(sws->sock,reinterpret_cast(id),sizeof(id[0]),0,from,(from->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); #else int w = ::sendto(sws->sock,id,sizeof(id[0]),0,from,(from->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); #endif @@ -424,12 +424,12 @@ public: */ inline bool handle_link_test_response(PhySocket *s,const struct sockaddr *from,void *data,unsigned long len) { uint64_t *id = (uint64_t*)data; - for(int i=0;iid == id[0]) { float rtt = (OSUtils::now()-link_test_records[i]->egress_time) / (float)1000; // s uint32_t sz = (link_test_records[i]->length) * 8; // bits - float transit_time = rtt / 2.0; - int64_t raw = sz / transit_time; + float transit_time = rtt / (float)2.0; + uint64_t raw = (uint64_t)(sz / transit_time); PhySocketImpl *sws = (reinterpret_cast(s)); if (sws) { sws->throughput = raw; diff --git a/service/OneService.cpp b/service/OneService.cpp index 24456f71..27930a52 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -879,7 +879,6 @@ public: lastMultipathModeUpdate = now; _node->setMultipathMode(_multipathMode); } - // Test link speeds // TODO: This logic should eventually find its way into the core or as part of a passive // measure within the protocol. @@ -894,7 +893,7 @@ public: std::vector sockets = _binder.getBoundSockets(); // interfaces for (int i=0; ipeerCount;++j) { + for(size_t j=0;jpeerCount;++j) { for (int k=0; k<(ZT_MAX_PEER_NETWORK_PATHS/4); k++) { Utils::getSecureRandom(pktBuf, 8); // generate one random integer for unique id _phy.test_link_speed(sockets[i], (struct sockaddr*)&(pl->peers[j].paths[k].address), pktBuf, ZT_LINK_TEST_DATAGRAM_SZ); @@ -1554,7 +1553,7 @@ public: _primaryPort = (unsigned int)OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff; _allowTcpFallbackRelay = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],true); - _multipathMode = OSUtils::jsonInt(settings["multipathMode"],0); + _multipathMode = (unsigned int)OSUtils::jsonInt(settings["multipathMode"],0); if (_multipathMode != 0 && _allowTcpFallbackRelay) { fprintf(stderr,"WARNING: multipathMode cannot be used with allowTcpFallbackRelay. Disabling allowTcpFallbackRelay"); _allowTcpFallbackRelay = false; -- cgit v1.2.3 From 91c8e82c428cea9d9e0a6911fbd1820212871ad8 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Wed, 2 May 2018 15:24:14 -0700 Subject: Adjusted locking order of _paths_m for path pruning. Other minor multipath changes --- node/Constants.hpp | 11 +++++++++-- node/Path.hpp | 16 ++++++++++++++++ node/Peer.cpp | 24 +++++++++++++----------- service/OneService.cpp | 7 ++++--- 4 files changed, 42 insertions(+), 16 deletions(-) (limited to 'service') diff --git a/node/Constants.hpp b/node/Constants.hpp index 80083abe..ee2ff0a6 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -293,7 +293,7 @@ /** * Number of samples to consider when computing path statistics */ -#define ZT_PATH_QUALITY_METRIC_WIN_SZ 128 +#define ZT_PATH_QUALITY_METRIC_WIN_SZ 64 /** * How often important path metrics are sampled (in ms). These metrics are later used @@ -311,7 +311,7 @@ * since we will record a 0 bit/s measurement if no valid latency measurement was made within this * window of time. */ -#define ZT_PATH_LATENCY_SAMPLE_INTERVAL ZT_PING_CHECK_INVERVAL * 2 +#define ZT_PATH_LATENCY_SAMPLE_INTERVAL ZT_MULTIPATH_PEER_PING_PERIOD * 2 /** * Interval used for rate-limiting the computation of path quality estimates. Set at 0 @@ -374,6 +374,13 @@ */ #define ZT_PEER_PING_PERIOD 60000 +/** + * Delay between full-fledge pings of directly connected peers. + * With multipath bonding enabled ping peers more often to measure + * packet loss and latency. + */ +#define ZT_MULTIPATH_PEER_PING_PERIOD 5000 + /** * Paths are considered expired if they have not sent us a real packet in this long */ diff --git a/node/Path.hpp b/node/Path.hpp index 7ce6e0f1..e6bcecf0 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -432,6 +432,22 @@ public: } } + /** + * @param buf Buffer to store resultant string + * @return Description of path, in ASCII string format + */ + inline char *toString(char *buf) { + sprintf(buf,"%6s, q=%8.3f, %5.3f Mb/s, j=%8.2f, ml=%8.2f, meanAge=%8.2f, addr=%45s", + getName(), + lastComputedQuality(), + (float)meanThroughput() / (float)1000000, + jitter(), + meanLatency(), + meanAge(), + getAddressString()); + return buf; + } + /** * Record whether a packet is considered invalid by MAC/compression/cipher checks. This * could be an indication of a bit error. This function will keep a running counter of diff --git a/node/Peer.cpp b/node/Peer.cpp index bbc6d6d2..c46ed751 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -100,14 +100,17 @@ void Peer::received( path->trustedPacketReceived(now); } - if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { - if ((now - _lastPathPrune) > ZT_CLOSED_PATH_PRUNING_INTERVAL) { - _lastPathPrune = now; - prunePaths(); - } - for(unsigned int i=0;imeasureLink(now); + { + Mutex::Lock _l(_paths_m); + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + if ((now - _lastPathPrune) > ZT_CLOSED_PATH_PRUNING_INTERVAL) { + _lastPathPrune = now; + prunePaths(); + } + for(unsigned int i=0;imeasureLink(now); + } } } } @@ -386,9 +389,9 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) if (bestPath == ZT_MAX_PEER_NETWORK_PATHS || (numAlivePaths == 0 && numStalePaths == 0)) { return SharedPtr(); } if (numAlivePaths == 1) { - return _paths[bestPath].p; + //return _paths[bestPath].p; } if (numStalePaths == 1) { - return _paths[bestPath].p; + //return _paths[bestPath].p; } // Relative quality @@ -725,7 +728,6 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) unsigned int Peer::prunePaths() { - Mutex::Lock _l(_paths_m); unsigned int pruned = 0; for(unsigned int i=0;i= interfaceRefreshPeriod)||(restarted)) { lastBindRefresh = now; @@ -889,9 +891,7 @@ public: uint64_t pktBuf[ZT_LINK_TEST_DATAGRAM_SZ / sizeof(uint64_t)]; Utils::getSecureRandom(pktBuf, ZT_LINK_TEST_DATAGRAM_SZ); ZT_PeerList *pl = _node->peers(); - // get bindings (specifically just the sockets) std::vector sockets = _binder.getBoundSockets(); - // interfaces for (int i=0; ipeerCount;++j) { for (int k=0; k<(ZT_MAX_PEER_NETWORK_PATHS/4); k++) { @@ -936,7 +936,8 @@ public: } // Sync information about physical network interfaces - if ((now - lastLocalInterfaceAddressCheck) >= ZT_LOCAL_INTERFACE_CHECK_INTERVAL) { + int interfaceAddressCheckInterval = _multipathMode ? ZT_MULTIPATH_LOCAL_INTERFACE_CHECK_INTERVAL : ZT_LOCAL_INTERFACE_CHECK_INTERVAL; + if ((now - lastLocalInterfaceAddressCheck) >= interfaceAddressCheckInterval) { lastLocalInterfaceAddressCheck = now; _node->clearLocalInterfaceAddresses(); -- cgit v1.2.3 From 46a7a2be2e1c4d09d1da2bf26b110a31ec3d0661 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Wed, 30 May 2018 17:45:29 -0700 Subject: Added VERB_ACK and VERB_QOS_MEASUREMENT, refined notion of path quality --- include/ZeroTierDebug.h | 2 +- make-linux.mk | 3 + make-mac.mk | 5 +- node/Constants.hpp | 92 +++++----- node/IncomingPacket.cpp | 119 +++++++++---- node/IncomingPacket.hpp | 2 + node/Packet.hpp | 52 +++++- node/Path.hpp | 458 +++++++++++++++++++++++++++++------------------- node/Peer.cpp | 428 ++++++++++++++++++++++++++------------------ node/Peer.hpp | 85 ++++++++- node/RingBuffer.hpp | 81 ++++++--- node/Switch.cpp | 2 + node/Trace.cpp | 22 +-- node/Trace.hpp | 8 +- node/Utils.hpp | 8 + osdep/Binder.hpp | 14 -- osdep/Phy.hpp | 150 ++-------------- service/OneService.cpp | 33 ---- 18 files changed, 899 insertions(+), 665 deletions(-) (limited to 'service') diff --git a/include/ZeroTierDebug.h b/include/ZeroTierDebug.h index 8e5366f0..a60179b7 100644 --- a/include/ZeroTierDebug.h +++ b/include/ZeroTierDebug.h @@ -86,7 +86,7 @@ #include #define ZT_LOG_TAG "ZTSDK" #endif -#if defined(ZT_TRACE) +#if defined(ZT_DEBUG_TRACE) #if ZT_MSG_INFO == true #if defined(__ANDROID__) #define DEBUG_INFO(fmt, args...) ((void)__android_log_print(ANDROID_LOG_VERBOSE, ZT_LOG_TAG, \ diff --git a/make-linux.mk b/make-linux.mk index 0f5ef384..749c24b8 100644 --- a/make-linux.mk +++ b/make-linux.mk @@ -45,6 +45,9 @@ ONE_OBJS+=ext/http-parser/http_parser.o ifeq ($(ZT_TRACE),1) override DEFS+=-DZT_TRACE endif +ifeq ($(ZT_DEBUG_TRACE),1) + DEFS+=-DZT_DEBUG_TRACE +endif ifeq ($(ZT_RULES_ENGINE_DEBUGGING),1) override DEFS+=-DZT_RULES_ENGINE_DEBUGGING diff --git a/make-mac.mk b/make-mac.mk index 1178437a..12357f68 100644 --- a/make-mac.mk +++ b/make-mac.mk @@ -43,7 +43,10 @@ ONE_OBJS+=ext/libnatpmp/natpmp.o ext/libnatpmp/getgateway.o ext/miniupnpc/connec # Build with address sanitization library for advanced debugging (clang) ifeq ($(ZT_SANITIZE),1) - SANFLAGS+=-fsanitize=address -DASAN_OPTIONS=symbolize=1 + DEFS+=-fsanitize=address -DASAN_OPTIONS=symbolize=1 +endif +ifeq ($(ZT_DEBUG_TRACE),1) + DEFS+=-DZT_DEBUG_TRACE endif # Debug mode -- dump trace output, build binary with -g ifeq ($(ZT_DEBUG),1) diff --git a/node/Constants.hpp b/node/Constants.hpp index ee2ff0a6..227497de 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -267,11 +267,6 @@ */ #define ZT_PING_CHECK_INVERVAL 5000 -/** - * Length of interface name - */ -#define ZT_PATH_INTERFACE_NAME_SZ 16 - /** * How frequently to check for changes to the system's network interfaces. When * the service decides to use this constant it's because we want to react more @@ -286,78 +281,95 @@ #define ZT_MULTIPATH_PROPORTION_WIN_SZ 128 /** - * Threshold for flow to be considered balanced. + * How often we will sample packet latency. Should be at least greater than ZT_PING_CHECK_INVERVAL + * since we will record a 0 bit/s measurement if no valid latency measurement was made within this + * window of time. */ -#define ZT_MULTIPATH_FLOW_BALANCE_THESHOLD 0.80 +#define ZT_PATH_LATENCY_SAMPLE_INTERVAL ZT_MULTIPATH_PEER_PING_PERIOD * 2 /** - * Number of samples to consider when computing path statistics + * Interval used for rate-limiting the computation of path quality estimates. Set at 0 + * to compute as new packets arrive with no delay. */ -#define ZT_PATH_QUALITY_METRIC_WIN_SZ 64 +#define ZT_PATH_QUALITY_COMPUTE_INTERVAL 1000 /** - * How often important path metrics are sampled (in ms). These metrics are later used - * for path quality estimates + * Number of samples to consider when computing real-time path statistics */ -#define ZT_PATH_QUALITY_SAMPLE_INTERVAL 100 +#define ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ 128 /** - * How often new path quality estimates are computed + * Number of samples to consider when computing performing long-term path quality analysis. + * By default this value is set to ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ but can + * be set to any value greater than that to observe longer-term path quality behavior. */ -#define ZT_PATH_QUALITY_ESTIMATE_INTERVAL 100 +#define ZT_PATH_QUALITY_METRIC_WIN_SZ ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ /** - * How often we will sample packet latency. Should be at least greater than ZT_PING_CHECK_INVERVAL - * since we will record a 0 bit/s measurement if no valid latency measurement was made within this - * window of time. + * Maximum acceptable Packet Delay Variance (PDV) over a path */ -#define ZT_PATH_LATENCY_SAMPLE_INTERVAL ZT_MULTIPATH_PEER_PING_PERIOD * 2 +#define ZT_PATH_MAX_PDV 1000 /** - * Interval used for rate-limiting the computation of path quality estimates. Set at 0 - * to compute as new packets arrive with no delay. + * Maximum acceptable time interval between expectation and receipt of at least one ACK over a path + */ +#define ZT_PATH_MAX_AGE 30000 + +/** + * Maximum acceptable mean latency over a path + */ +#define ZT_PATH_MAX_MEAN_LATENCY 1000 + +/** + * How much each factor contributes to the "stability" score of a path + */ +#define ZT_PATH_CONTRIB_PDV 1.0 / 3.0 +#define ZT_PATH_CONTRIB_LATENCY 1.0 / 3.0 +#define ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE 1.0 / 3.0 + +/** + * How much each factor contributes to the "quality" score of a path */ -#define ZT_PATH_QUALITY_COMPUTE_INTERVAL 0 +#define ZT_PATH_CONTRIB_STABILITY 0.75 / 3.0 +#define ZT_PATH_CONTRIB_THROUGHPUT 1.50 / 3.0 +#define ZT_PATH_CONTRIB_SCOPE 0.75 / 3.0 /** - * Path error rate history window size. This is used to keep track of packet error - * measurements over a path's medium-term history. + * Min and max acceptable sizes for a VERB_QOS_MEASUREMENT packet */ -#define ZT_PATH_ERROR_HIST_WIN_SZ 10 +#define ZT_PATH_MIN_QOS_PACKET_SZ 8 + 1 +#define ZT_PATH_MAX_QOS_PACKET_SZ 1400 /** - * The number of packet error measurements in each sample + * How many ID:sojourn time pairs in a single QoS packet */ -#define ZT_PATH_ERROR_SAMPLE_WIN_SZ 1024 +#define ZT_PATH_QOS_TABLE_SIZE (ZT_PATH_MAX_QOS_PACKET_SZ * 8) / (64 + 8) /** - * How often a peer will prune its own paths. Pruning is important when multipath is - * enabled because we want to prevent the allocation algorithms from sending anything - * out on known dead paths. Additionally, quickly marking paths as dead helps when - * a new path is learned and needs to replace an older path. + * How often the service tests the path throughput */ -#define ZT_CLOSED_PATH_PRUNING_INTERVAL 1000 +#define ZT_PATH_THROUGHPUT_MEASUREMENT_INTERVAL ZT_PATH_ACK_INTERVAL * 8 /** - * Datagram used to test link throughput. Contents are random. + * Minimum amount of time between each ACK packet */ -#define ZT_LINK_TEST_DATAGRAM_SZ 1024 +#define ZT_PATH_ACK_INTERVAL 250 /** - * Size of datagram expected as a reply to a link speed test + * How often a QoS packet is sent */ -#define ZT_LINK_TEST_DATAGRAM_RESPONSE_SZ 8 +#define ZT_PATH_QOS_INTERVAL 1000 /** - * Time before a link test datagram is considered lost. Any corresponding - * timing records that would have been used to compute a RTT are purged. + * How often an aggregate link statistics report is emitted into this tracing system */ -#define ZT_LINK_TEST_TIMEOUT 10000 +#define ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL 60000 /** - * How often the service tests the link throughput. + * How much an aggregate link's component paths can vary from their target allocation + * before the link is considered to be in a state of imbalance. */ -#define ZT_LINK_SPEED_TEST_INTERVAL 1000 +#define ZT_PATH_IMBALANCE_THRESHOLD 0.20 /** * How frequently to send heartbeats over in-use paths diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index 8f6dda63..e7227412 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -80,7 +80,7 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) if (!trusted) { if (!dearmor(peer->key())) { RR->t->incomingPacketMessageAuthenticationFailure(tPtr,_path,packetId(),sourceAddress,hops(),"invalid MAC"); - _path->recordPacket(false); + _path->recordInvalidPacket(); return true; } } @@ -90,15 +90,15 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) return true; } - _path->recordPacket(true); - const Packet::Verb v = verb(); switch(v) { //case Packet::VERB_NOP: default: // ignore unknown verbs, but if they pass auth check they are "received" - peer->received(tPtr,_path,hops(),packetId(),v,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),v,0,Packet::VERB_NOP,false,0); return true; case Packet::VERB_HELLO: return _doHELLO(RR,tPtr,true); + case Packet::VERB_ACK: return _doACK(RR,tPtr,peer); + case Packet::VERB_QOS_MEASUREMENT: return _doQOS_MEASUREMENT(RR,tPtr,peer); case Packet::VERB_ERROR: return _doERROR(RR,tPtr,peer); case Packet::VERB_OK: return _doOK(RR,tPtr,peer); case Packet::VERB_WHOIS: return _doWHOIS(RR,tPtr,peer); @@ -197,11 +197,55 @@ bool IncomingPacket::_doERROR(const RuntimeEnvironment *RR,void *tPtr,const Shar default: break; } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_ERROR,inRePacketId,inReVerb,false,networkId); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_ERROR,inRePacketId,inReVerb,false,networkId); + + return true; +} + +bool IncomingPacket::_doACK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) +{ + /* Dissect incoming ACK packet. From this we can estimate current throughput of the path, establish known + * maximums and detect packet loss. */ + + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + int32_t ackedBytes; + memcpy(&ackedBytes, payload(), sizeof(int32_t)); + _path->receivedAck(RR->node->now(), Utils::ntoh(ackedBytes)); + } return true; } +bool IncomingPacket::_doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) +{ + + /* Dissect incoming QoS packet. From this we can compute latency values and their variance. + * The latency variance is used as a measure of "jitter". */ + + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + if (payloadLength() < ZT_PATH_MAX_QOS_PACKET_SZ && payloadLength() > ZT_PATH_MIN_QOS_PACKET_SZ) { + const int64_t now = RR->node->now(); + uint64_t rx_id[ZT_PATH_QOS_TABLE_SIZE]; + uint8_t rx_ts[ZT_PATH_QOS_TABLE_SIZE]; + char *begin = (char *)payload(); + char *ptr = begin; + int count = 0; + int len = payloadLength(); + // Read packet IDs and latency compensation intervals for each packet tracked by thie QoS packet + while (ptr < (begin + len)) { + memcpy((void*)&rx_id[count], ptr, sizeof(uint64_t)); + rx_id[count] = Utils::ntoh(rx_id[count]); + ptr+=sizeof(uint64_t); + memcpy((void*)&rx_ts[count], ptr, sizeof(uint8_t)); + ptr+=sizeof(uint8_t); + count++; + } + _path->receivedQoS(now, count, rx_id, rx_ts); + } + } + return true; +} + bool IncomingPacket::_doHELLO(const RuntimeEnvironment *RR,void *tPtr,const bool alreadyAuthenticated) { const int64_t now = RR->node->now(); @@ -398,7 +442,7 @@ bool IncomingPacket::_doHELLO(const RuntimeEnvironment *RR,void *tPtr,const bool _path->send(RR,tPtr,outp.data(),outp.size(),now); peer->setRemoteVersion(protoVersion,vMajor,vMinor,vRevision); // important for this to go first so received() knows the version - peer->received(tPtr,_path,hops(),pid,Packet::VERB_HELLO,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_HELLO,0,Packet::VERB_NOP,false,0); return true; } @@ -448,8 +492,9 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP } } - if (!hops()) + if (!hops() && (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE)) { _path->updateLatency((unsigned int)latency, RR->node->now()); + } peer->setRemoteVersion(vProto,vMajor,vMinor,vRevision); @@ -510,7 +555,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP default: break; } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_OK,inRePacketId,inReVerb,false,networkId); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_OK,inRePacketId,inReVerb,false,networkId); return true; } @@ -545,7 +590,7 @@ bool IncomingPacket::_doWHOIS(const RuntimeEnvironment *RR,void *tPtr,const Shar _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_WHOIS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_WHOIS,0,Packet::VERB_NOP,false,0); return true; } @@ -569,7 +614,7 @@ bool IncomingPacket::_doRENDEZVOUS(const RuntimeEnvironment *RR,void *tPtr,const } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP,false,0); return true; } @@ -598,7 +643,7 @@ bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,void *tPtr,const Shar _sendErrorNeedCredentials(RR,tPtr,peer,nwid); } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_FRAME,0,Packet::VERB_NOP,trustEstablished,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_FRAME,0,Packet::VERB_NOP,trustEstablished,nwid); return true; } @@ -621,7 +666,7 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const if (!network->gate(tPtr,peer)) { RR->t->incomingNetworkAccessDenied(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,true); _sendErrorNeedCredentials(RR,tPtr,peer,nwid); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid); return true; } @@ -633,7 +678,7 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const const uint8_t *const frameData = (const uint8_t *)field(comLen + ZT_PROTO_VERB_EXT_FRAME_IDX_PAYLOAD,frameLen); if ((!from)||(from == network->mac())) { - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } @@ -644,19 +689,19 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const network->learnBridgeRoute(from,peer->address()); } else { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"bridging not allowed (remote)"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } } else if (to != network->mac()) { if (to.isMulticast()) { if (network->config().multicastLimit == 0) { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"multicast disabled"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } } else if (!network->config().permitsBridging(RR->identity.address())) { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"bridging not allowed (local)"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } } @@ -676,9 +721,9 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); } else { - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid); } return true; @@ -698,7 +743,7 @@ bool IncomingPacket::_doECHO(const RuntimeEnvironment *RR,void *tPtr,const Share outp.armor(peer->key(),true); _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); - peer->received(tPtr,_path,hops(),pid,Packet::VERB_ECHO,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_ECHO,0,Packet::VERB_NOP,false,0); return true; } @@ -743,7 +788,7 @@ bool IncomingPacket::_doMULTICAST_LIKE(const RuntimeEnvironment *RR,void *tPtr,c } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0); return true; } @@ -866,7 +911,7 @@ bool IncomingPacket::_doNETWORK_CREDENTIALS(const RuntimeEnvironment *RR,void *t } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_NETWORK_CREDENTIALS,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CREDENTIALS,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0); return true; } @@ -892,7 +937,7 @@ bool IncomingPacket::_doNETWORK_CONFIG_REQUEST(const RuntimeEnvironment *RR,void _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } - peer->received(tPtr,_path,hopCount,requestPacketId,Packet::VERB_NETWORK_CONFIG_REQUEST,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hopCount,requestPacketId,payloadLength(),Packet::VERB_NETWORK_CONFIG_REQUEST,0,Packet::VERB_NOP,false,nwid); return true; } @@ -913,7 +958,7 @@ bool IncomingPacket::_doNETWORK_CONFIG(const RuntimeEnvironment *RR,void *tPtr,c } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_NETWORK_CONFIG,0,Packet::VERB_NOP,false,(network) ? network->id() : 0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CONFIG,0,Packet::VERB_NOP,false,(network) ? network->id() : 0); return true; } @@ -956,7 +1001,7 @@ bool IncomingPacket::_doMULTICAST_GATHER(const RuntimeEnvironment *RR,void *tPtr } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP,trustEstablished,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP,trustEstablished,nwid); return true; } @@ -982,7 +1027,7 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr, if (!network->gate(tPtr,peer)) { RR->t->incomingNetworkAccessDenied(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,true); _sendErrorNeedCredentials(RR,tPtr,peer,nwid); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); return true; } @@ -1006,19 +1051,19 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr, if (network->config().multicastLimit == 0) { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,from,to.mac(),"multicast disabled"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); return true; } if ((frameLen > 0)&&(frameLen <= ZT_MAX_MTU)) { if (!to.mac().isMulticast()) { RR->t->incomingPacketInvalid(tPtr,_path,packetId(),source(),hops(),Packet::VERB_MULTICAST_FRAME,"destination not multicast"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } if ((!from)||(from.isMulticast())||(from == network->mac())) { RR->t->incomingPacketInvalid(tPtr,_path,packetId(),source(),hops(),Packet::VERB_MULTICAST_FRAME,"invalid source MAC"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } @@ -1032,7 +1077,7 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr, network->learnBridgeRoute(from,peer->address()); } else { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,from,to.mac(),"bridging not allowed (remote)"); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay return true; } } @@ -1055,10 +1100,10 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr, } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); } else { _sendErrorNeedCredentials(RR,tPtr,peer,nwid); - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); } return true; @@ -1070,7 +1115,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt // First, subject this to a rate limit if (!peer->rateGatePushDirectPaths(now)) { - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0); return true; } @@ -1094,7 +1139,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt case 4: { const InetAddress a(field(ptr,4),4,at(ptr + 4)); if ( - ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_FORGET_PATH) == 0) && // not being told to forget + ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_FORGET_PATH) == 0) && // not being told to forget (!( ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_CLUSTER_REDIRECT) == 0) && (peer->hasActivePathTo(now,a)) )) && // not already known (RR->node->shouldUsePathForZeroTierTraffic(tPtr,peer->address(),_path->localSocket(),a)) ) // should use path { @@ -1108,7 +1153,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt case 6: { const InetAddress a(field(ptr,16),16,at(ptr + 16)); if ( - ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_FORGET_PATH) == 0) && // not being told to forget + ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_FORGET_PATH) == 0) && // not being told to forget (!( ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_CLUSTER_REDIRECT) == 0) && (peer->hasActivePathTo(now,a)) )) && // not already known (RR->node->shouldUsePathForZeroTierTraffic(tPtr,peer->address(),_path->localSocket(),a)) ) // should use path { @@ -1123,7 +1168,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt ptr += addrLen; } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0); return true; } @@ -1139,7 +1184,7 @@ bool IncomingPacket::_doUSER_MESSAGE(const RuntimeEnvironment *RR,void *tPtr,con RR->node->postEvent(tPtr,ZT_EVENT_USER_MESSAGE,reinterpret_cast(&um)); } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_USER_MESSAGE,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_USER_MESSAGE,0,Packet::VERB_NOP,false,0); return true; } @@ -1163,7 +1208,7 @@ bool IncomingPacket::_doREMOTE_TRACE(const RuntimeEnvironment *RR,void *tPtr,con } } - peer->received(tPtr,_path,hops(),packetId(),Packet::VERB_REMOTE_TRACE,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_REMOTE_TRACE,0,Packet::VERB_NOP,false,0); return true; } diff --git a/node/IncomingPacket.hpp b/node/IncomingPacket.hpp index 88f4f066..9144277c 100644 --- a/node/IncomingPacket.hpp +++ b/node/IncomingPacket.hpp @@ -125,6 +125,8 @@ private: // been authenticated, decrypted, decompressed, and classified. bool _doERROR(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doHELLO(const RuntimeEnvironment *RR,void *tPtr,const bool alreadyAuthenticated); + bool _doACK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); + bool _doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doWHOIS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doRENDEZVOUS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); diff --git a/node/Packet.hpp b/node/Packet.hpp index 27da6fb5..6869691e 100644 --- a/node/Packet.hpp +++ b/node/Packet.hpp @@ -419,7 +419,7 @@ public: template Fragment(const Buffer &b) : - Buffer(b) + Buffer(b) { } @@ -930,7 +930,53 @@ public: */ VERB_PUSH_DIRECT_PATHS = 0x10, - // 0x11, 0x12 -- deprecated + // 0x11 -- deprecated + + /** + * An acknowledgement of receipt of a series of recent packets from another + * peer. This is used to calculate relative throughput values and to detect + * packet loss. Only VERB_FRAME and VERB_EXT_FRAME packets are counted. + * + * ACK response format: + * <[4] 32-bit number of bytes received since last ACK> + * + * Upon receipt of this packet, the local peer will verify that the correct + * number of bytes were received by the remote peer. If these values do + * not agree that could be an indicator of packet loss. + * + * Additionally, the local peer knows the interval of time that has + * elapsed since the last received ACK. With this information it can compute + * a rough estimate of the current throughput. + * + * This is sent at a maximum rate of once per every ZT_PATH_ACK_INTERVAL + */ + VERB_ACK = 0x12, + + /** + * A packet containing timing measurements useful for estimating path quality. + * Composed of a list of pairs for an + * arbitrary set of recent packets. This is used to sample for latency and + * packet delay variance (PDV, "jitter"). + * + * QoS record format: + * + * <[8] 64-bit packet ID of previously-received packet> + * <[1] 8-bit packet sojourn time> + * <...repeat until end of max 1400 byte packet...> + * + * The number of possible records per QoS packet is: (1400 * 8) / 72 = 155 + * This packet should be sent very rarely (every few seconds) as it can be + * somewhat large if the connection is saturated. Future versions might use + * a bloom table to probablistically determine these values in a vastly + * more space-efficient manner. + * + * Note: The 'internal packet sojourn time' is a slight misnomer as it is a + * measure of the amount of time between when a packet was received and the + * egress time of its tracking QoS packet. + * + * This is sent at a maximum rate of once per every ZT_PATH_QOS_INTERVAL + */ + VERB_QOS_MEASUREMENT = 0x13, /** * A message with arbitrary user-definable content: @@ -999,7 +1045,7 @@ public: template Packet(const Buffer &b) : - Buffer(b) + Buffer(b) { } diff --git a/node/Path.hpp b/node/Path.hpp index e6bcecf0..0278d919 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -102,24 +102,23 @@ public: _latency(0xffff), _addr(), _ipScope(InetAddress::IP_SCOPE_NONE), - _currentPacketSampleCounter(0), - _meanPacketErrorRatio(0.0), - _meanLatency(0.0), - _lastLatencyUpdate(0), - _jitter(0.0), - _lastPathQualitySampleTime(0), - _lastComputedQuality(0.0), - _lastPathQualityEstimate(0), - _meanAge(0.0), + _lastAck(0), + _lastThroughputEstimation(0), + _lastQoSMeasurement(0), + _unackedBytes(0), + _expectingAckAsOf(0), + _packetsReceivedSinceLastAck(0), _meanThroughput(0.0), - _packetLossRatio(0) + _maxLifetimeThroughput(0), + _bytesAckedSinceLastThroughputEstimation(0), + _meanLatency(0.0), + _packetDelayVariance(0.0), + _packetErrorRatio(0.0), + _packetLossRatio(0), + _lastComputedStability(0.0), + _lastComputedRelativeQuality(0) { - memset(_ifname, 0, sizeof(_ifname)); - memset(_addrString, 0, sizeof(_addrString)); - _throughputSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _ageSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _latencySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _errSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + prepareBuffers(); } Path(const int64_t localSocket,const InetAddress &addr) : @@ -131,37 +130,35 @@ public: _latency(0xffff), _addr(addr), _ipScope(addr.ipScope()), - _currentPacketSampleCounter(0), - _meanPacketErrorRatio(0.0), - _meanLatency(0.0), - _lastLatencyUpdate(0), - _jitter(0.0), - _lastPathQualitySampleTime(0), - _lastComputedQuality(0.0), - _lastPathQualityEstimate(0), - _meanAge(0.0), + _lastAck(0), + _lastThroughputEstimation(0), + _lastQoSMeasurement(0), + _unackedBytes(0), + _expectingAckAsOf(0), + _packetsReceivedSinceLastAck(0), _meanThroughput(0.0), - _packetLossRatio(0) + _maxLifetimeThroughput(0), + _bytesAckedSinceLastThroughputEstimation(0), + _meanLatency(0.0), + _packetDelayVariance(0.0), + _packetErrorRatio(0.0), + _packetLossRatio(0), + _lastComputedStability(0.0), + _lastComputedRelativeQuality(0) { - memset(_ifname, 0, sizeof(_ifname)); - memset(_addrString, 0, sizeof(_addrString)); - _throughputSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _ageSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _latencySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); - _errSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + prepareBuffers(); } ~Path() { delete _throughputSamples; - delete _ageSamples; delete _latencySamples; - delete _errSamples; - + delete _qualitySamples; + delete _packetValiditySamples; _throughputSamples = NULL; - _ageSamples = NULL; _latencySamples = NULL; - _errSamples = NULL; + _qualitySamples = NULL; + _packetValiditySamples = NULL; } /** @@ -209,7 +206,6 @@ public: else { _latency = l; } - _lastLatencyUpdate = now; _latencySamples->push(l); } @@ -299,194 +295,273 @@ public: } /** - * @return An estimate of path quality -- higher is better. + * Take note that we're expecting a VERB_ACK on this path as of a specific time + * + * @param now Current time + * @param packetId ID of the packet + * @param payloadLength Number of bytes we're is expecting a reply to */ - inline float computeQuality(const int64_t now) + inline void expectingAck(int64_t now, int64_t packetId, uint16_t payloadLength) { - float latency_contrib = _meanLatency ? (float)1.0 / _meanLatency : 0; - float jitter_contrib = _jitter ? (float)1.0 / _jitter : 0; - float throughput_contrib = _meanThroughput ? _meanThroughput / 1000000 : 0; // in Mbps - float age_contrib = _meanAge > 0 ? (float)sqrt(_meanAge) : 1; - float error_contrib = (float)1.0 - _meanPacketErrorRatio; - float sum = (latency_contrib + jitter_contrib + throughput_contrib + error_contrib) / age_contrib; - _lastComputedQuality = sum * (long)((_ipScope) + 1); - return _lastComputedQuality; + _expectingAckAsOf = ackAge(now) > ZT_PATH_ACK_INTERVAL ? _expectingAckAsOf : now; + _unackedBytes += payloadLength; + _outgoingPacketRecords[packetId] = now; } /** - * Since quality estimates can become expensive we should cache the most recent result for traffic allocation - * algorithms which may need to reference this value multiple times through the course of their execution. + * Record that we've received a VERB_ACK on this path, also compute throughput if required. + * + * @param now Current time + * @param ackedBytes Number of bytes awknowledged by other peer */ - inline float lastComputedQuality() { - return _lastComputedQuality; + inline void receivedAck(int64_t now, int32_t ackedBytes) + { + _expectingAckAsOf = 0; + _unackedBytes = (ackedBytes > _unackedBytes) ? 0 : _unackedBytes - ackedBytes; + int64_t timeSinceThroughputEstimate = (now - _lastThroughputEstimation); + if (timeSinceThroughputEstimate >= ZT_PATH_THROUGHPUT_MEASUREMENT_INTERVAL) { + uint64_t throughput = (float)(_bytesAckedSinceLastThroughputEstimation) / ((float)timeSinceThroughputEstimate / (float)1000); + _throughputSamples->push(throughput); + _maxLifetimeThroughput = throughput > _maxLifetimeThroughput ? throughput : _maxLifetimeThroughput; + _lastThroughputEstimation = now; + _bytesAckedSinceLastThroughputEstimation = 0; + } else { + _bytesAckedSinceLastThroughputEstimation += ackedBytes; + } } /** - * @return A pointer to a cached copy of the human-readable name of the interface this Path's localSocket is bound to + * @return Number of bytes this peer is responsible for ACKing since last ACK */ - inline char *getName() { return _ifname; } + inline int32_t bytesToAck() + { + int32_t bytesToAck = 0; + for (int i=0; i<_packetsReceivedSinceLastAck; i++) { + bytesToAck += _recorded_len[i]; + } + return bytesToAck; + } /** - * @return Estimated throughput in bps of this link + * @return Number of bytes thusfar sent that have not been awknowledged by the remote peer */ - inline uint64_t getThroughput() { return _phy->getThroughput((PhySocket *)((uintptr_t)_localSocket)); } + inline int64_t unackedSentBytes() + { + return _unackedBytes; + } /** - * @return Packet delay varience + * Account for the fact that an ACK was just sent. Reset counters, timers, and clear statistics buffers + * + * @param Current time */ - inline float jitter() { return _jitter; } + inline void sentAck(int64_t now) + { + memset(_recorded_id, 0, sizeof(_recorded_id)); + memset(_recorded_ts, 0, sizeof(_recorded_ts)); + memset(_recorded_len, 0, sizeof(_recorded_len)); + _packetsReceivedSinceLastAck = 0; + _lastAck = now; + } /** - * @return Previously-computed mean latency + * Receive QoS data, match with recorded egress times from this peer, compute latency + * estimates. + * + * @param now Current time + * @param count Number of records + * @param rx_id table of packet IDs + * @param rx_ts table of holding times */ - inline float meanLatency() { return _meanLatency; } + inline void receivedQoS(int64_t now, int count, uint64_t *rx_id, uint8_t *rx_ts) + { + // Look up egress times and compute latency values for each record + for (int j=0; j::iterator it = _outgoingPacketRecords.find(rx_id[j]); + if (it != _outgoingPacketRecords.end()) { + uint16_t rtt = (uint16_t)(now - it->second); + uint16_t rtt_compensated = rtt - rx_ts[j]; + float latency = rtt_compensated / 2.0; + updateLatency(latency, now); + _outgoingPacketRecords.erase(it); + } + } + } /** - * @return Packet loss rate + * Generate the contents of a VERB_QOS_MEASUREMENT packet. + * + * @param now Current time + * @param qosBuffer destination buffer + * @return Size of payload */ - inline float packetLossRatio() { return _packetLossRatio; } + inline int32_t generateQoSPacket(int64_t now, char *qosBuffer) + { + int32_t len = 0; + for (int i=0; i<_packetsReceivedSinceLastAck; i++) { + uint64_t id = _recorded_id[i]; + memcpy(qosBuffer, &id, sizeof(uint64_t)); + qosBuffer+=sizeof(uint64_t); + uint8_t holdingTime = (uint8_t)(now - _recorded_ts[i]); + memcpy(qosBuffer, &holdingTime, sizeof(uint8_t)); + qosBuffer+=sizeof(uint8_t); + len+=sizeof(uint64_t)+sizeof(uint8_t); + } + return len; + } /** - * @return Mean packet error ratio + * Account for the fact that a VERB_QOS_MEASUREMENT was just sent. Reset timers. + * + * @param Current time */ - inline float meanPacketErrorRatio() { return _meanPacketErrorRatio; } + inline void sentQoS(int64_t now) { _lastQoSMeasurement = now; } /** - * @return Current packet error ratio (possibly incomplete sample set) + * Record statistics on incoming packets. Used later to estimate QoS. + * + * @param now Current time + * @param packetId + * @param payloadLength */ - inline float currentPacketErrorRatio() { - int errorsPerSample = 0; - for (int i=0; i<_currentPacketSampleCounter; i++) { - if (_packetValidity[i] == false) { - errorsPerSample++; - } - } - return (float)errorsPerSample / (float)ZT_PATH_ERROR_SAMPLE_WIN_SZ; + inline void recordIncomingPacket(int64_t now, int64_t packetId, int32_t payloadLength) + { + _recorded_ts[_packetsReceivedSinceLastAck] = now; + _recorded_id[_packetsReceivedSinceLastAck] = packetId; + _recorded_len[_packetsReceivedSinceLastAck] = payloadLength; + _packetsReceivedSinceLastAck++; + _packetValiditySamples->push(true); } /** - * @return Whether the Path's local socket is in a CLOSED state + * @param now Current time + * @return Whether an ACK (VERB_ACK) packet needs to be emitted at this time */ - inline bool isClosed() { return _phy->isClosed((PhySocket *)((uintptr_t)_localSocket)); } + inline bool needsToSendAck(int64_t now) { + return ((now - _lastAck) >= ZT_PATH_ACK_INTERVAL || + (_packetsReceivedSinceLastAck == ZT_PATH_QOS_TABLE_SIZE)) && _packetsReceivedSinceLastAck; + } /** - * @return The state of a Path's local socket + * @param now Current time + * @return Whether a QoS (VERB_QOS_MEASUREMENT) packet needs to be emitted at this time */ - inline int getState() { return _phy->getState((PhySocket *)((uintptr_t)_localSocket)); } + inline bool needsToSendQoS(int64_t now) { + return ((_packetsReceivedSinceLastAck >= ZT_PATH_QOS_TABLE_SIZE) || + ((now - _lastQoSMeasurement) > ZT_PATH_QOS_INTERVAL)) && _packetsReceivedSinceLastAck; + } /** - * @return Whether this socket may have been erased by the virtual physical link layer + * How much time has elapsed since we've been expecting a VERB_ACK on this path. This value + * is used to determine a more relevant path "age". This lets us penalize paths which are no + * longer ACKing, but not those that simple aren't being used to carry traffic at the + * current time. */ - inline bool isValidState() { return _phy->isValidState((PhySocket *)((uintptr_t)_localSocket)); } + inline int64_t ackAge(int64_t now) { return _expectingAckAsOf ? now - _expectingAckAsOf : 0; } /** - * @return Whether the path quality monitors have collected enough data to provide a quality value - * TODO: expand this + * The maximum observed throughput for this path */ - inline bool monitorsReady() { - return _latencySamples->count() && _ageSamples->count() && _throughputSamples->count(); - } + inline uint64_t maxLifetimeThroughput() { return _maxLifetimeThroughput; } /** - * @return A pointer to a cached copy of the address string for this Path (For debugging only) + * @return The mean throughput (in bits/s) of this link */ - inline char *getAddressString() { return _addrString; } + inline float meanThroughput() { return _meanThroughput; } /** - * Handle path sampling, computation of quality estimates, and other periodic tasks - * @param now Current time + * Assign a new relative quality value for this path in the aggregate link + * + * @param rq Quality of this path in comparison to other paths available to this peer */ - inline void measureLink(int64_t now) { - // Sample path properties and store them in a continuously-revolving buffer - if (now - _lastPathQualitySampleTime > ZT_PATH_QUALITY_SAMPLE_INTERVAL) { - _lastPathQualitySampleTime = now; - _throughputSamples->push(getThroughput()); // Thoughtput in bits/s - _ageSamples->push(now - _lastIn); // Age (time since last received packet) - if (now - _lastLatencyUpdate > ZT_PATH_LATENCY_SAMPLE_INTERVAL) { - _lastLatencyUpdate = now; - // Record 0 bp/s. Since we're using this to detect possible packet loss - updateLatency(0, now); - } - } - // Compute statistical values for use in link quality estimates - if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) { - _lastPathQualityComputeTime = now; - // Cache Path address string - address().toString(_addrString); - _phy->getIfName((PhySocket *)((uintptr_t)_localSocket), _ifname, ZT_PATH_INTERFACE_NAME_SZ); // Cache Interface name - // Derived values - if (_throughputSamples->count()) { - _packetLossRatio = (float)_throughputSamples->zeroCount() / (float)_throughputSamples->count(); - } - _meanThroughput = _throughputSamples->mean(); - _meanAge = _ageSamples->mean(); - _meanLatency = _latencySamples->mean(); - // Jitter - // SEE: RFC 3393, RFC 4689 - _jitter = _latencySamples->stddev(); - _meanPacketErrorRatio = _errSamples->mean(); // Packet Error Ratio (PER) - } - // Periodically compute a path quality estimate - if (now - _lastPathQualityEstimate > ZT_PATH_QUALITY_ESTIMATE_INTERVAL) { - computeQuality(now); - } - } + inline void updateRelativeQuality(float rq) { _lastComputedRelativeQuality = rq; } /** - * @param buf Buffer to store resultant string - * @return Description of path, in ASCII string format + * @return Quality of this path compared to others in the aggregate link */ - inline char *toString(char *buf) { - sprintf(buf,"%6s, q=%8.3f, %5.3f Mb/s, j=%8.2f, ml=%8.2f, meanAge=%8.2f, addr=%45s", - getName(), - lastComputedQuality(), - (float)meanThroughput() / (float)1000000, - jitter(), - meanLatency(), - meanAge(), - getAddressString()); - return buf; - } + inline float relativeQuality() { return _lastComputedRelativeQuality; } /** - * Record whether a packet is considered invalid by MAC/compression/cipher checks. This - * could be an indication of a bit error. This function will keep a running counter of - * up to a given window size and with each counter overflow it will compute a mean error rate - * and store that in a continuously shifting sample window. - * - * @param isValid Whether the packet in question is considered invalid + * @return Stability estimates can become expensive to compute, we cache the most recent result. */ - inline void recordPacket(bool isValid) { - if (_currentPacketSampleCounter < ZT_PATH_ERROR_SAMPLE_WIN_SZ) { - _packetValidity[_currentPacketSampleCounter] = isValid; - _currentPacketSampleCounter++; - } - else { - // Sample array is full, compute an mean and stick it in the ring buffer for trend analysis - _errSamples->push(currentPacketErrorRatio()); - _currentPacketSampleCounter=0; - } - } + inline float lastComputedStability() { return _lastComputedStability; } /** - * @return The mean age (in ms) of this link + * @return A pointer to a cached copy of the human-readable name of the interface this Path's localSocket is bound to */ - inline float meanAge() { return _meanAge; } + inline char *getName() { return _ifname; } /** - * @return The mean throughput (in bits/s) of this link + * @return Packet delay varience */ - inline float meanThroughput() { return _meanThroughput; } + inline float packetDelayVariance() { return _packetDelayVariance; } /** - * @return True if this path is alive (receiving heartbeats) + * @return Previously-computed mean latency */ - inline bool alive(const int64_t now) const { return ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000)); } + inline float meanLatency() { return _meanLatency; } + + /** + * @return Packet loss rate (PLR) + */ + inline float packetLossRatio() { return _packetLossRatio; } + + /** + * @return Packet error ratio (PER) + */ + inline float packetErrorRatio() { return _packetErrorRatio; } + + /** + * Record an invalid incoming packet. This packet failed MAC/compression/cipher checks and will now + * contribute to a Packet Error Ratio (PER). + */ + inline void recordInvalidPacket() { _packetValiditySamples->push(false); } /** - * @return True if this path hasn't received a packet in a "significant" amount of time + * @return A pointer to a cached copy of the address string for this Path (For debugging only) */ - inline bool stale(const int64_t now) const { return ((now - _lastIn) > ZT_LINK_SPEED_TEST_INTERVAL * 10); } + inline char *getAddressString() { return _addrString; } + + /** + * Compute and cache stability and performance metrics. The resultant stability coefficint is a measure of how "well behaved" + * this path is. This figure is substantially different from (but required for the estimation of the path's overall "quality". + * + * @param now Current time + */ + inline void processBackgroundPathMeasurements(int64_t now, const int64_t peerId) { + if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) { + _lastPathQualityComputeTime = now; + _phy->getIfName((PhySocket *)((uintptr_t)_localSocket), _ifname, 16); + address().toString(_addrString); + _meanThroughput = _throughputSamples->mean(); + _meanLatency = _latencySamples->mean(); + _packetDelayVariance = _latencySamples->stddev(); // Similar to "jitter" (SEE: RFC 3393, RFC 4689) + // If no packet validity samples, assume PER==0 + _packetErrorRatio = 1 - (_packetValiditySamples->count() ? _packetValiditySamples->mean() : 1); + // Compute path stability + // Normalize measurements with wildly different ranges into a reasonable range + float normalized_pdv = Utils::normalize(_packetDelayVariance, 0, ZT_PATH_MAX_PDV, 0, 10); + float normalized_la = Utils::normalize(_meanLatency, 0, ZT_PATH_MAX_MEAN_LATENCY, 0, 10); + float throughput_cv = _throughputSamples->mean() > 0 ? _throughputSamples->stddev() / _throughputSamples->mean() : 1; + // Form an exponential cutoff and apply contribution weights + float pdv_contrib = exp((-1)*normalized_pdv) * ZT_PATH_CONTRIB_PDV; + float latency_contrib = exp((-1)*normalized_la) * ZT_PATH_CONTRIB_LATENCY; + float throughput_disturbance_contrib = exp((-1)*throughput_cv) * ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE; + // Obey user-defined ignored contributions + pdv_contrib = ZT_PATH_CONTRIB_PDV > 0.0 ? pdv_contrib : 1; + latency_contrib = ZT_PATH_CONTRIB_LATENCY > 0.0 ? latency_contrib : 1; + throughput_disturbance_contrib = ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE > 0.0 ? throughput_disturbance_contrib : 1; + // Compute the quality product + _lastComputedStability = pdv_contrib + latency_contrib + throughput_disturbance_contrib; + _lastComputedStability *= 1 - _packetErrorRatio; + _qualitySamples->push(_lastComputedStability); + } + } + + /** + * @return True if this path is alive (receiving heartbeats) + */ + inline bool alive(const int64_t now) const { return ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000)); } /** * @return True if this path needs a heartbeat @@ -508,6 +583,21 @@ public: */ inline int64_t lastTrustEstablishedPacketReceived() const { return _lastTrustEstablishedPacketReceived; } + /** + * Initialize statistical buffers + */ + inline void prepareBuffers() { + _throughputSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _latencySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _qualitySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _packetValiditySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + memset(_ifname, 0, 16); + memset(_recorded_id, 0, sizeof(_recorded_id)); + memset(_recorded_ts, 0, sizeof(_recorded_ts)); + memset(_recorded_len, 0, sizeof(_recorded_len)); + memset(_addrString, 0, sizeof(_addrString)); + } + private: volatile int64_t _lastOut; volatile int64_t _lastIn; @@ -519,32 +609,42 @@ private: InetAddress::IpScope _ipScope; // memoize this since it's a computed value checked often AtomicCounter __refCount; - // Packet Error Ratio (PER) - int _packetValidity[ZT_PATH_ERROR_SAMPLE_WIN_SZ]; - int _currentPacketSampleCounter; - volatile float _meanPacketErrorRatio; + uint64_t _recorded_id[ZT_PATH_QOS_TABLE_SIZE]; + uint64_t _recorded_ts[ZT_PATH_QOS_TABLE_SIZE]; + uint16_t _recorded_len[ZT_PATH_QOS_TABLE_SIZE]; - // Latency and Jitter - volatile float _meanLatency; - int64_t _lastLatencyUpdate; - volatile float _jitter; + std::map _outgoingPacketRecords; + + int64_t _lastAck; + int64_t _lastThroughputEstimation; + int64_t _lastQoSMeasurement; + + int64_t _unackedBytes; + int64_t _expectingAckAsOf; + int16_t _packetsReceivedSinceLastAck; - int64_t _lastPathQualitySampleTime; - float _lastComputedQuality; - int64_t _lastPathQualityEstimate; - float _meanAge; float _meanThroughput; + uint64_t _maxLifetimeThroughput; + uint64_t _bytesAckedSinceLastThroughputEstimation; - // Circular buffers used to efficiently store large time series - RingBuffer *_throughputSamples; - RingBuffer *_latencySamples; - RingBuffer *_ageSamples; - RingBuffer *_errSamples; + volatile float _meanLatency; + float _packetDelayVariance; + float _packetErrorRatio; float _packetLossRatio; - char _ifname[ZT_PATH_INTERFACE_NAME_SZ]; + // cached estimates + float _lastComputedStability; + float _lastComputedRelativeQuality; + + // cached human-readable strings for tracing purposes + char _ifname[16]; char _addrString[256]; + + RingBuffer *_throughputSamples; + RingBuffer *_latencySamples; + RingBuffer *_qualitySamples; + RingBuffer *_packetValiditySamples; }; } // namespace ZeroTier diff --git a/node/Peer.cpp b/node/Peer.cpp index c46ed751..8deaa362 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -24,8 +24,8 @@ * of your own application. */ -#include "../version.h" +#include "../version.h" #include "Constants.hpp" #include "Peer.hpp" #include "Node.hpp" @@ -36,6 +36,7 @@ #include "Trace.hpp" #include "InetAddress.hpp" #include "RingBuffer.hpp" +#include "Utils.hpp" namespace ZeroTier { @@ -61,13 +62,13 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _id(peerIdentity), _directPathPushCutoffCount(0), _credentialsCutoffCount(0), - _linkBalanceStatus(false), - _linkRedundancyStatus(false) + _linkIsBalanced(false), + _linkIsRedundant(false), + _lastAggregateStatsReport(0) { if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) throw ZT_EXCEPTION_INVALID_ARGUMENT; _pathChoiceHist = new RingBuffer(ZT_MULTIPATH_PROPORTION_WIN_SZ); - _flowBalanceHist = new RingBuffer(ZT_MULTIPATH_PROPORTION_WIN_SZ); } void Peer::received( @@ -75,6 +76,7 @@ void Peer::received( const SharedPtr &path, const unsigned int hops, const uint64_t packetId, + const unsigned int payloadLength, const Packet::Verb verb, const uint64_t inRePacketId, const Packet::Verb inReVerb, @@ -103,13 +105,13 @@ void Peer::received( { Mutex::Lock _l(_paths_m); if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { - if ((now - _lastPathPrune) > ZT_CLOSED_PATH_PRUNING_INTERVAL) { - _lastPathPrune = now; - prunePaths(); + recordIncomingPacket(tPtr, path, packetId, payloadLength, verb, now); + if (path->needsToSendQoS(now)) { + sendQOS_MEASUREMENT(tPtr, path, path->localSocket(), path->address(), now); } for(unsigned int i=0;imeasureLink(now); + _paths[i].p->processBackgroundPathMeasurements(now, _id.address().toInt()); } } } @@ -117,7 +119,6 @@ void Peer::received( if (hops == 0) { // If this is a direct packet (no hops), update existing paths or learn new ones - bool havePath = false; { Mutex::Lock _l(_paths_m); @@ -164,6 +165,19 @@ void Peer::received( } } + // If we find a pre-existing path with the same address, just replace it. + // If we don't find anything we can replace, just use the replacePath that we previously decided on. + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + for(unsigned int i=0;iaddress().ss_family == path->address().ss_family && _paths[i].p->address().ipsEqual2(path->address())) { + replacePath = i; + break; + } + } + } + } + if (replacePath != ZT_MAX_PEER_NETWORK_PATHS) { if (verb == Packet::VERB_OK) { RR->t->peerLearnedNewPath(tPtr,networkId,*this,path,packetId); @@ -252,6 +266,117 @@ void Peer::received( } } +void Peer::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, + uint16_t payloadLength, const Packet::Verb verb, int64_t now) +{ + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + if (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME) { + path->expectingAck(now, packetId, payloadLength); + } + } +} + +void Peer::recordIncomingPacket(void *tPtr, const SharedPtr &path, const uint64_t packetId, + uint16_t payloadLength, const Packet::Verb verb, int64_t now) +{ + if (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME) { + if (path->needsToSendAck(now)) { + sendACK(tPtr, path, path->localSocket(), path->address(), now); + } + path->recordIncomingPacket(now, packetId, payloadLength); + } +} + +float Peer::computeAggregateLinkRelativeQuality(int64_t now) +{ + float maxStability = 0; + float totalRelativeQuality = 0; + float maxThroughput = 1; + float maxScope = 0; + float relStability[ZT_MAX_PEER_NETWORK_PATHS]; + float relThroughput[ZT_MAX_PEER_NETWORK_PATHS]; + memset(&relStability, 0, sizeof(relStability)); + memset(&relThroughput, 0, sizeof(relThroughput)); + // Survey all paths + for(unsigned int i=0;ilastComputedStability(); + relThroughput[i] = _paths[i].p->maxLifetimeThroughput(); + maxStability = relStability[i] > maxStability ? relStability[i] : maxStability; + maxThroughput = relThroughput[i] > maxThroughput ? relThroughput[i] : maxThroughput; + maxScope = _paths[i].p->ipScope() > maxScope ? _paths[i].p->ipScope() : maxScope; + } + } + // Convert to relative values + for(unsigned int i=0;iackAge(now), 0, ZT_PATH_MAX_AGE, 0, 10); + float age_contrib = exp((-1)*normalized_ma); + float relScope = ((float)(_paths[i].p->ipScope()+1) / (maxScope + 1)); + float relQuality = + (relStability[i] * ZT_PATH_CONTRIB_STABILITY) + + (fmax(1, relThroughput[i]) * ZT_PATH_CONTRIB_THROUGHPUT) + + relScope * ZT_PATH_CONTRIB_SCOPE; + relQuality *= age_contrib; + totalRelativeQuality += relQuality; + _paths[i].p->updateRelativeQuality(relQuality); + } + } + return (float)1.0 / totalRelativeQuality; // Used later to convert relative quantities into flow allocations +} + +float Peer::computeAggregateLinkPacketDelayVariance() +{ + float pdv = 0.0; + for(unsigned int i=0;irelativeQuality() * _paths[i].p->packetDelayVariance(); + } + } + return pdv; +} + +float Peer::computeAggregateLinkMeanLatency() +{ + float ml = 0.0; + for(unsigned int i=0;irelativeQuality() * _paths[i].p->meanLatency(); + } + } + return ml; +} + +int Peer::aggregateLinkPhysicalPathCount() +{ + std::map ifnamemap; + int pathCount = 0; + int64_t now = RR->node->now(); + for(unsigned int i=0;ialive(now)) { + if (!ifnamemap[_paths[i].p->getName()]) { + ifnamemap[_paths[i].p->getName()] = true; + pathCount++; + } + } + } + return pathCount; +} + +int Peer::aggregateLinkLogicalPathCount() +{ + int pathCount = 0; + int64_t now = RR->node->now(); + for(unsigned int i=0;ialive(now)) { + pathCount++; + } + } + return pathCount; +} + SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) { Mutex::Lock _l(_paths_m); @@ -264,7 +389,7 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) if (RR->node->getMultipathMode() == ZT_MULTIPATH_NONE) { long bestPathQuality = 2147483647; for(unsigned int i=0;iisValidState()) { + if (_paths[i].p) { if ((includeExpired)||((now - _paths[i].lr) < ZT_PEER_PATH_EXPIRATION)) { const long q = _paths[i].p->quality(now) / _paths[i].priority; if (q <= bestPathQuality) { @@ -280,23 +405,14 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) return SharedPtr(); } - if ((now - _lastPathPrune) > ZT_CLOSED_PATH_PRUNING_INTERVAL) { - _lastPathPrune = now; - prunePaths(); - } for(unsigned int i=0;imeasureLink(now); + _paths[i].p->processBackgroundPathMeasurements(now, _id.address().toInt()); } } /** * Randomly distribute traffic across all paths - * - * Behavior: - * - If path DOWN: Stop randomly choosing that path - * - If path UP: Start randomly choosing that path - * - If all paths are unresponsive: randomly choose from all paths */ int numAlivePaths = 0; int numStalePaths = 0; @@ -307,15 +423,13 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) memset(&stalePaths, -1, sizeof(stalePaths)); for(unsigned int i=0;iisValidState()) { - if (_paths[i].p->alive(now)) { - alivePaths[numAlivePaths] = i; - numAlivePaths++; - } - else { - stalePaths[numStalePaths] = i; - numStalePaths++; - } + if (_paths[i].p->alive(now)) { + alivePaths[numAlivePaths] = i; + numAlivePaths++; + } + else { + stalePaths[numStalePaths] = i; + numStalePaths++; } } } @@ -337,160 +451,104 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) * Proportionally allocate traffic according to dynamic path quality measurements */ if (RR->node->getMultipathMode() == ZT_MULTIPATH_PROPORTIONALLY_BALANCED) { - float relq[ZT_MAX_PEER_NETWORK_PATHS]; - memset(&relq, 0, sizeof(relq)); float alloc[ZT_MAX_PEER_NETWORK_PATHS]; memset(&alloc, 0, sizeof(alloc)); - - // Survey - // - // Take a survey of all available link qualities. We use this to determine if we - // can skip this algorithm altogether and if not, to establish baseline for physical - // link quality used in later calculations. - // - // We find the min/max quality of our currently-active links so - // that we can form a relative scale to rank each link proportionally - // to each other link. - uint16_t alivePaths[ZT_MAX_PEER_NETWORK_PATHS]; - uint16_t stalePaths[ZT_MAX_PEER_NETWORK_PATHS]; + int numAlivePaths = 0; + int numStalePaths = 0; + int alivePaths[ZT_MAX_PEER_NETWORK_PATHS]; + int stalePaths[ZT_MAX_PEER_NETWORK_PATHS]; memset(&alivePaths, -1, sizeof(alivePaths)); memset(&stalePaths, -1, sizeof(stalePaths)); - uint16_t numAlivePaths = 0; - uint16_t numStalePaths = 0; - float minQuality = 10000; - float maxQuality = -1; - float currQuality; - for(uint16_t i=0;iisValidState()) { - if (!_paths[i].p->monitorsReady()) { - // TODO: This should fix itself anyway but we should test whether forcing the use of a new path will - // aid in establishing flow balance more quickly. - } - // Compute quality here, going forward we will use lastComputedQuality() - currQuality = _paths[i].p->computeQuality(now); - if (!_paths[i].p->stale(now)) { + // Attempt to find an excuse not to use the rest of this algorithm + // Alive or Stale? + for(unsigned int i=0;ialive(now)) { + alivePaths[numAlivePaths] = i; numAlivePaths++; - } - else { + } else { + stalePaths[numStalePaths] = i; numStalePaths++; } - if (currQuality > maxQuality) { - maxQuality = currQuality; - bestPath = i; - } - if (currQuality < minQuality) { - minQuality = currQuality; - } - relq[i] = currQuality; + // Record a default path to use as a short-circuit for the rest of the algorithm + bestPath = i; } } - - // Attempt to find an excuse not to use the rest of this algorithm - if (bestPath == ZT_MAX_PEER_NETWORK_PATHS || (numAlivePaths == 0 && numStalePaths == 0)) { + if (numAlivePaths == 0 && numStalePaths == 0) { return SharedPtr(); - } if (numAlivePaths == 1) { - //return _paths[bestPath].p; - } if (numStalePaths == 1) { - //return _paths[bestPath].p; - } - - // Relative quality - // - // The strongest link will have a value of 1.0 whereas every other - // link will have a value which represents some fraction of the strongest link. - float totalRelativeQuality = 0; - for(unsigned int i=0;iisValidState()) { - relq[i] /= maxQuality ? maxQuality : 1; - totalRelativeQuality += relq[i]; - } + } if (numAlivePaths == 1 || numStalePaths == 1) { + return _paths[bestPath].p; } - - // Convert the relative quality values into flow allocations. - // Additionally, determine whether each path in the flow is - // contributing more or less than its target allocation. If - // it is contributing more than required, don't allow it to be - // randomly selected for the next packet. If however the path - // needs to contribute more to the flow, we should record - float imbalance = 0; - float qualityScalingFactor = (float)1.0 / totalRelativeQuality; + // Compare paths to each-other + float qualityScalingFactor = computeAggregateLinkRelativeQuality(now); + // Convert set of relative performances into an allocation set for(uint16_t i=0;icountValue(i); - // Compute traffic allocation for each path in the flow - if (_paths[i].p && _paths[i].p->isValidState()) { - // Allocation - // This is the percentage of traffic we want to send over a given path - alloc[i] = relq[i] * qualityScalingFactor; - float currProportion = numPktSentWithinWin / (float)ZT_MULTIPATH_PROPORTION_WIN_SZ; - float targetProportion = alloc[i]; - float diffProportion = currProportion - targetProportion; - // Imbalance - // - // This is the sum of the distances of each path's currently observed flow contributions - // from its most recent target allocation. In other words, this is a measure of how closely we - // are adhering to our desired allocations. It is worth noting that this value can be greater - // than 1.0 if a significant change to allocations is made by the algorithm, this will - // eventually correct itself. - imbalance += fabs(diffProportion); - if (diffProportion < 0) { - alloc[i] = targetProportion; - } - else { - alloc[i] = targetProportion; - } - } - } - - // Compute and record current flow balance - float balance = (float)1.0 - imbalance; - if (balance >= ZT_MULTIPATH_FLOW_BALANCE_THESHOLD) { - if (!_linkBalanceStatus) { - _linkBalanceStatus = true; - RR->t->peerLinkBalanced(NULL,0,*this); - } - } - else { - if (_linkBalanceStatus) { - _linkBalanceStatus = false; - RR->t->peerLinkImbalanced(NULL,0,*this); + if (_paths[i].p) { + alloc[i] = _paths[i].p->relativeQuality() * qualityScalingFactor; } } - - // Record the current flow balance. Later used for computing a mean flow balance value. - _flowBalanceHist->push(balance); - - // Randomly choose path from allocated candidates + // Randomly choose path according to their allocations unsigned int r; Utils::getSecureRandom(&r, 1); float rf = (float)(r %= 100) / 100; for(int i=0;iisValidState() && _paths[i].p->address().isV4()) { - if (alloc[i] > 0 && rf < alloc[i]) { + if (_paths[i].p) { + if (rf < alloc[i]) { bestPath = i; _pathChoiceHist->push(bestPath); // Record which path we chose break; } - if (alloc[i] > 0) { - rf -= alloc[i]; - } - else { - rf -= alloc[i]*-1; - } + rf -= alloc[i]; } } if (bestPath < ZT_MAX_PEER_NETWORK_PATHS) { return _paths[bestPath].p; } - return SharedPtr(); } + return SharedPtr(); +} - // Adhere to a user-defined interface/allocation scheme - if (RR->node->getMultipathMode() == ZT_MULTIPATH_MANUALLY_BALANCED) { - // TODO +char *Peer::interfaceListStr() +{ + std::map ifnamemap; + char tmp[32]; + const int64_t now = RR->node->now(); + char *ptr = _interfaceListStr; + bool imbalanced = false; + memset(_interfaceListStr, 0, sizeof(_interfaceListStr)); + int alivePathCount = aggregateLinkLogicalPathCount(); + for(unsigned int i=0;ialive(now)) { + int ipv = _paths[i].p->address().isV4(); + // If this is acting as an aggregate link, check allocations + float targetAllocation = 1.0 / alivePathCount; + float currentAllocation = 1.0; + if (alivePathCount > 1) { + currentAllocation = (float)_pathChoiceHist->countValue(i) / (float)_pathChoiceHist->count(); + if (fabs(targetAllocation - currentAllocation) > ZT_PATH_IMBALANCE_THRESHOLD) { + imbalanced = true; + } + } + char *ipvStr = ipv ? (char*)"ipv4" : (char*)"ipv6"; + sprintf(tmp, "(%s, %s, %5.4f)", _paths[i].p->getName(), ipvStr, currentAllocation); + // Prevent duplicates + if(ifnamemap[_paths[i].p->getName()] != ipv) { + memcpy(ptr, tmp, strlen(tmp)); + ptr += strlen(tmp); + *ptr = ' '; + ptr++; + ifnamemap[_paths[i].p->getName()] = ipv; + } + } } - - return SharedPtr(); + ptr--; // Overwrite trailing space + if (imbalanced) { + sprintf(tmp, ", is IMBALANCED"); + memcpy(ptr, tmp, sizeof(tmp)); + } else { + *ptr = '\0'; + } + return _interfaceListStr; } void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr &other) const @@ -614,6 +672,35 @@ void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr &o } } +void Peer::sendACK(void *tPtr,const SharedPtr &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now) +{ + Packet outp(_id.address(),RR->identity.address(),Packet::VERB_ACK); + uint32_t bytesToAck = path->bytesToAck(); + outp.append(bytesToAck); + if (atAddress) { + outp.armor(_key,false); + RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size()); + } else { + RR->sw->send(tPtr,outp,false); + } + path->sentAck(now); +} + +void Peer::sendQOS_MEASUREMENT(void *tPtr,const SharedPtr &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now) +{ + Packet outp(_id.address(),RR->identity.address(),Packet::VERB_QOS_MEASUREMENT); + char qosData[ZT_PATH_MAX_QOS_PACKET_SZ]; + path->generateQoSPacket(now,qosData); + outp.append(qosData,sizeof(qosData)); + if (atAddress) { + outp.armor(_key,false); + RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size()); + } else { + RR->sw->send(tPtr,outp,false); + } + path->sentQoS(now); +} + void Peer::sendHELLO(void *tPtr,const int64_t localSocket,const InetAddress &atAddress,int64_t now) { Packet outp(_id.address(),RR->identity.address(),Packet::VERB_HELLO); @@ -688,6 +775,25 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) const bool sendFullHello = ((now - _lastSentFullHello) >= ZT_PEER_PING_PERIOD); _lastSentFullHello = now; + // Emit traces regarding the status of aggregate links + if (RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) { + int alivePathCount = aggregateLinkPhysicalPathCount(); + if ((now - _lastAggregateStatsReport) > ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL) { + _lastAggregateStatsReport = now; + if (alivePathCount) { + RR->t->peerLinkAggregateStatistics(NULL,*this); + } + } + // Report link redundancy + if (alivePathCount < 2 && _linkIsRedundant) { + _linkIsRedundant = !_linkIsRedundant; + RR->t->peerLinkNoLongerRedundant(NULL,*this); + } if (alivePathCount > 1 && !_linkIsRedundant) { + _linkIsRedundant = !_linkIsRedundant; + RR->t->peerLinkNowRedundant(NULL,*this); + } + } + // Right now we only keep pinging links that have the maximum priority. The // priority is used to track cluster redirections, meaning that when a cluster // redirects us its redirect target links override all other links and we @@ -726,22 +832,6 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) return sent; } -unsigned int Peer::prunePaths() -{ - unsigned int pruned = 0; - for(unsigned int i=0;iisClosed() || !_paths[i].p->isValidState()) { - _paths[i].lr = 0; - _paths[i].p.zero(); - _paths[i].priority = 1; - pruned++; - } - } - } - return pruned; -} - void Peer::clusterRedirect(void *tPtr,const SharedPtr &originatingPath,const InetAddress &remoteAddress,const int64_t now) { SharedPtr np(RR->topology->getPath(originatingPath->localSocket(),remoteAddress)); diff --git a/node/Peer.hpp b/node/Peer.hpp index 9873729b..2f723b07 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -68,9 +68,7 @@ public: ~Peer() { Utils::burn(_key,sizeof(_key)); delete _pathChoiceHist; - delete _flowBalanceHist; _pathChoiceHist = NULL; - _flowBalanceHist = NULL; } /** @@ -114,6 +112,7 @@ public: const SharedPtr &path, const unsigned int hops, const uint64_t packetId, + const unsigned int payloadLength, const Packet::Verb verb, const uint64_t inRePacketId, const Packet::Verb inReVerb, @@ -158,7 +157,74 @@ public: } /** - * Get the most appropriate direct path based on current multipath configuration + * Record statistics on outgoing packets + * + * @param path Path over which packet was sent + * @param id Packet ID + * @param len Length of packet payload + * @param verb Packet verb + * @param now Current time + */ + void recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now); + + /** + * Record statistics on incoming packets + * + * @param path Path over which packet was sent + * @param id Packet ID + * @param len Length of packet payload + * @param verb Packet verb + * @param now Current time + */ + void recordIncomingPacket(void *tPtr, const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now); + + /** + * Send an ACK to peer for the most recent packets received + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param localSocket Raw socket the ACK packet will be sent over + * @param atAddress Destination for the ACK packet + * @param now Current time + */ + void sendACK(void *tPtr, const SharedPtr &path, const int64_t localSocket,const InetAddress &atAddress,int64_t now); + + /** + * Send a QoS packet to peer so that it can evaluate the quality of this link + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param localSocket Raw socket the QoS packet will be sent over + * @param atAddress Destination for the QoS packet + * @param now Current time + */ + void sendQOS_MEASUREMENT(void *tPtr, const SharedPtr &path, const int64_t localSocket,const InetAddress &atAddress,int64_t now); + + /** + * @return The relative quality values for each path + */ + float computeAggregateLinkRelativeQuality(int64_t now); + + /** + * @return The aggregate link Packet Delay Variance (PDV) + */ + float computeAggregateLinkPacketDelayVariance(); + + /** + * @return The aggregate link mean latenct + */ + float computeAggregateLinkMeanLatency(); + + /** + * @return The number of currently alive "physical" paths in the aggregate link + */ + int aggregateLinkPhysicalPathCount(); + + /** + * @return The number of currently alive "logical" paths in the aggregate link + */ + int aggregateLinkLogicalPathCount(); + + /** + * Get the most appropriate direct path based on current multipath and QoS configuration * * @param now Current time * @param includeExpired If true, include even expired paths @@ -166,6 +232,12 @@ public: */ SharedPtr getAppropriatePath(int64_t now, bool includeExpired); + /** + * Generate a human-readable string of interface names making up the aggregate link, also include + * moving allocation and IP version number for each (for tracing) + */ + char *interfaceListStr(); + /** * Send VERB_RENDEZVOUS to this and another peer via the best common IP scope and path */ @@ -549,11 +621,12 @@ private: AtomicCounter __refCount; RingBuffer *_pathChoiceHist; - RingBuffer *_flowBalanceHist; - bool _linkBalanceStatus; - bool _linkRedundancyStatus; + bool _linkIsBalanced; + bool _linkIsRedundant; + uint64_t _lastAggregateStatsReport; + char _interfaceListStr[256]; // 16 characters * 16 paths in a link }; } // namespace ZeroTier diff --git a/node/RingBuffer.hpp b/node/RingBuffer.hpp index cd384749..32ae037c 100644 --- a/node/RingBuffer.hpp +++ b/node/RingBuffer.hpp @@ -172,6 +172,11 @@ public: write(&value, 1); } + /** + * @return The most recently pushed element on the buffer + */ + T get_most_recent() { return *(buf + end); } + /** * @param dest Destination buffer * @param n Size (in terms of number of elements) of the destination buffer @@ -218,10 +223,7 @@ public: /** * @return The number of slots that are unused in the buffer */ - size_t getFree() - { - return size - count(); - } + size_t getFree() { return size - count(); } /** * @return The arithmetic mean of the contents of the buffer @@ -229,45 +231,67 @@ public: float mean() { size_t iterator = begin; - float mean = 0; - for (size_t i=0; irecordOutgoingPacket(viaPath, packet.packetId(), packet.payloadLength(), packet.verb(), now); + if (trustedPathId) { packet.setTrusted(trustedPathId); } else { diff --git a/node/Trace.cpp b/node/Trace.cpp index 01a8da55..f47a029b 100644 --- a/node/Trace.cpp +++ b/node/Trace.cpp @@ -106,24 +106,24 @@ void Trace::peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId, } } -void Trace::peerLinkNowRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath) +void Trace::peerLinkNowRedundant(void *const tPtr,Peer &peer) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is fully redundant",peer.address().toInt(),networkId); + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is fully redundant",peer.address().toInt()); } -void Trace::peerLinkNoLongerRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath) +void Trace::peerLinkNoLongerRedundant(void *const tPtr,Peer &peer) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is no longer redundant",peer.address().toInt(),networkId); + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is no longer redundant",peer.address().toInt()); } -void Trace::peerLinkBalanced(void *const tPtr,const uint64_t networkId,Peer &peer) +void Trace::peerLinkAggregateStatistics(void *const tPtr,Peer &peer) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is balanced",peer.address().toInt(),networkId); -} - -void Trace::peerLinkImbalanced(void *const tPtr,const uint64_t networkId,Peer &peer) -{ - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx on network %.16llx is unbalanced",peer.address().toInt(),networkId); + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is composed of (%d) physical paths %s, has packet delay variance (%.0f ms), mean latency (%.0f ms)", + peer.address().toInt(), + peer.aggregateLinkPhysicalPathCount(), + peer.interfaceListStr(), + peer.computeAggregateLinkPacketDelayVariance(), + peer.computeAggregateLinkMeanLatency()); } void Trace::peerLearnedNewPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath,const uint64_t packetId) diff --git a/node/Trace.hpp b/node/Trace.hpp index b01163d6..734e84a5 100644 --- a/node/Trace.hpp +++ b/node/Trace.hpp @@ -122,10 +122,10 @@ public: void peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &path,const uint64_t packetId,const Packet::Verb verb); - void peerLinkNowRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath); - void peerLinkNoLongerRedundant(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath); - void peerLinkBalanced(void *const tPtr,const uint64_t networkId,Peer &peer); - void peerLinkImbalanced(void *const tPtr,const uint64_t networkId,Peer &peer); + void peerLinkNowRedundant(void *const tPtr,Peer &peer); + void peerLinkNoLongerRedundant(void *const tPtr,Peer &peer); + + void peerLinkAggregateStatistics(void *const tPtr,Peer &peer); void peerLearnedNewPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath,const uint64_t packetId); void peerRedirected(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath); diff --git a/node/Utils.hpp b/node/Utils.hpp index a24f2c9a..6ce67328 100644 --- a/node/Utils.hpp +++ b/node/Utils.hpp @@ -261,6 +261,14 @@ public: return l; } + static inline float normalize(float value, int64_t bigMin, int64_t bigMax, int32_t targetMin, int32_t targetMax) + { + int64_t bigSpan = bigMax - bigMin; + int64_t smallSpan = targetMax - targetMin; + float valueScaled = (value - (float)bigMin) / (float)bigSpan; + return (float)targetMin + valueScaled * (float)smallSpan; + } + /** * Generate secure random bytes * diff --git a/osdep/Binder.hpp b/osdep/Binder.hpp index 6e13836c..1f06021b 100644 --- a/osdep/Binder.hpp +++ b/osdep/Binder.hpp @@ -456,20 +456,6 @@ public: return false; } - /** - * Get a list of socket pointers for all bindings. - * - * @return A list of socket pointers for current bindings - */ - inline std::vector getBoundSockets() - { - std::vector sockets; - for (int i=0; i _bindingCount; diff --git a/osdep/Phy.hpp b/osdep/Phy.hpp index 2e276a2a..5e659767 100644 --- a/osdep/Phy.hpp +++ b/osdep/Phy.hpp @@ -27,8 +27,6 @@ #ifndef ZT_PHY_HPP #define ZT_PHY_HPP -#include "../osdep/OSUtils.hpp" - #include #include #include @@ -88,22 +86,6 @@ namespace ZeroTier { */ typedef void PhySocket; -struct link_test_record -{ - link_test_record(PhySocket *_s, uint64_t _id, uint64_t _egress_time, uint32_t _length) : - s(_s), - id(_id), - egress_time(_egress_time), - length(_length) - { - // - } - PhySocket *s; - uint64_t id; - uint64_t egress_time; - uint32_t length; -}; - /** * Simple templated non-blocking sockets implementation * @@ -170,19 +152,13 @@ private: ZT_PHY_SOCKET_UNIX_LISTEN = 0x08 }; - struct PhySocketImpl - { - PhySocketImpl() : - throughput(0) - { - memset(ifname, 0, sizeof(ifname)); - } + struct PhySocketImpl { + PhySocketImpl() { memset(ifname, 0, sizeof(ifname)); } PhySocketType type; ZT_PHY_SOCKFD_TYPE sock; void *uptr; // user-settable pointer ZT_PHY_SOCKADDR_STORAGE_TYPE saddr; // remote for TCP_OUT and TCP_IN, local for TCP_LISTEN, RAW, and UDP char ifname[16]; - uint64_t throughput; }; std::list _socks; @@ -198,7 +174,6 @@ private: bool _noDelay; bool _noCheck; - std::vector link_test_records; public: /** @@ -282,7 +257,9 @@ public: */ static inline void getIfName(PhySocket *s, char *nameBuf, int buflen) { - memcpy(nameBuf, reinterpret_cast(s)->ifname, buflen); + if (s) { + memcpy(nameBuf, reinterpret_cast(s)->ifname, buflen); + } } /** @@ -292,18 +269,9 @@ public: */ static inline void setIfName(PhySocket *s, char *ifname, int len) { - memcpy(&(reinterpret_cast(s)->ifname), ifname, len); - } - - /** - * Get result of most recent throughput test - * - * @param s Socket object - */ - inline uint64_t getThroughput(PhySocket *s) - { - PhySocketImpl *sws = (reinterpret_cast(s)); - return sws ? sws->throughput : 0; + if (s) { + memcpy(&(reinterpret_cast(s)->ifname), ifname, len); + } } /** @@ -339,105 +307,9 @@ public: */ inline bool isValidState(PhySocket *s) { - PhySocketImpl *sws = (reinterpret_cast(s)); - return sws->type >= ZT_PHY_SOCKET_CLOSED && sws->type <= ZT_PHY_SOCKET_UNIX_LISTEN; - } - - /** - * Send a datagram of a known size to a selected peer and record egress time. The peer - * shall eventually respond by echoing back a smaller datagram. - * - * @param s Socket object - * @param remoteAddress Address of remote peer to receive link test packet - * @param data Buffer containing random packet data - * @param len Length of packet data buffer - * @return Number of bytes successfully written to socket - */ - inline int test_link_speed(PhySocket *s, const struct sockaddr *to, void *data, uint32_t len) { - if (!reinterpret_cast(s)) { - return 0; - } - uint64_t *buf = (uint64_t*)data; - uint64_t id = buf[0]; - if (to->sa_family != AF_INET && to->sa_family != AF_INET6) { - return 0; - } - uint64_t egress_time = OSUtils::now(); - PhySocketImpl *sws = (reinterpret_cast(s)); -#if defined(_WIN32) || defined(_WIN64) - int w = ::sendto(sws->sock,reinterpret_cast(data),len,0,to,(to->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)) -#else - int w = ::sendto(sws->sock,data,len,0,to,(to->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); -#endif - if (w > 0) { - link_test_records.push_back(new link_test_record(s, id, egress_time, len)); - } - return w; - } - - /** - * Remove link speed test records which have timed-out and record a 0 bits/s measurement - */ - inline void refresh_link_speed_records() - { - for(size_t i=0;iegress_time > ZT_LINK_TEST_TIMEOUT) { - PhySocketImpl *sws = (reinterpret_cast(link_test_records[i]->s)); - if (sws) { - sws->throughput = 0; - } - link_test_records.erase(link_test_records.begin() + i); - } - } - } - - /** - * Upon receipt of a link speed test datagram we echo back only the identification portion - * - * @param s Socket object - * @param from Address of remote peer that sent this datagram - * @param data Buffer containing datagram's contents - * @param len Length of datagram - * @return Number of bytes successfully written to socket in response - */ - inline int respond_to_link_test(PhySocket *s,const struct sockaddr *from,void *data,unsigned long len) { - PhySocketImpl *sws = (reinterpret_cast(s)); - uint64_t *id = (uint64_t*)data; -#if defined(_WIN32) || defined(_WIN64) - int w = ::sendto(sws->sock,reinterpret_cast(id),sizeof(id[0]),0,from,(from->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); -#else - int w = ::sendto(sws->sock,id,sizeof(id[0]),0,from,(from->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); -#endif - return w; - } - - /** - * Upon receipt of a response to our original link test datagram, correlate this new datagram with the record - * of the one we sent. Compute the transit time and update the throughput field of the relevant socket. This - * value will later be read by the path quality estimation logic located in Path.hpp. - * - * @param s Socket object - * @param from Address of remote peer that sent this datagram - * @param data Buffer containing datagram contents (ID of original link test datagram) - * @param len Length of datagram - * @return true if datagram correponded to previous record, false if otherwise - */ - inline bool handle_link_test_response(PhySocket *s,const struct sockaddr *from,void *data,unsigned long len) { - uint64_t *id = (uint64_t*)data; - for(size_t i=0;iid == id[0]) { - float rtt = (OSUtils::now()-link_test_records[i]->egress_time) / (float)1000; // s - uint32_t sz = (link_test_records[i]->length) * 8; // bits - float transit_time = rtt / (float)2.0; - uint64_t raw = (uint64_t)(sz / transit_time); - PhySocketImpl *sws = (reinterpret_cast(s)); - if (sws) { - sws->throughput = raw; - } - delete link_test_records[i]; - link_test_records.erase(link_test_records.begin() + i); - return true; - } + if (s) { + PhySocketImpl *sws = (reinterpret_cast(s)); + return sws->type >= ZT_PHY_SOCKET_CLOSED && sws->type <= ZT_PHY_SOCKET_UNIX_LISTEN; } return false; } diff --git a/service/OneService.cpp b/service/OneService.cpp index 051629bc..cf2a6eda 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -37,7 +37,6 @@ #include "../version.h" #include "../include/ZeroTierOne.h" -#include "../include/ZeroTierDebug.h" #include "../node/Constants.hpp" #include "../node/Mutex.hpp" @@ -458,9 +457,6 @@ public: // Last potential sleep/wake event uint64_t _lastRestart; - // Last time link throughput was tested - uint64_t _lastLinkSpeedTest; - // Deadline for the next background task service function volatile int64_t _nextBackgroundTaskDeadline; @@ -881,26 +877,6 @@ public: lastMultipathModeUpdate = now; _node->setMultipathMode(_multipathMode); } - // Test link speeds - // TODO: This logic should eventually find its way into the core or as part of a passive - // measure within the protocol. - if (_multipathMode && ((now - _lastLinkSpeedTest) >= ZT_LINK_SPEED_TEST_INTERVAL)) { - _phy.refresh_link_speed_records(); - _lastLinkSpeedTest = now; - // Generate random data to fill UDP packet - uint64_t pktBuf[ZT_LINK_TEST_DATAGRAM_SZ / sizeof(uint64_t)]; - Utils::getSecureRandom(pktBuf, ZT_LINK_TEST_DATAGRAM_SZ); - ZT_PeerList *pl = _node->peers(); - std::vector sockets = _binder.getBoundSockets(); - for (int i=0; ipeerCount;++j) { - for (int k=0; k<(ZT_MAX_PEER_NETWORK_PATHS/4); k++) { - Utils::getSecureRandom(pktBuf, 8); // generate one random integer for unique id - _phy.test_link_speed(sockets[i], (struct sockaddr*)&(pl->peers[j].paths[k].address), pktBuf, ZT_LINK_TEST_DATAGRAM_SZ); - } - } - } - } // Run background task processor in core if it's time to do so int64_t dl = _nextBackgroundTaskDeadline; @@ -1799,15 +1775,6 @@ public: inline void phyOnDatagram(PhySocket *sock,void **uptr,const struct sockaddr *localAddr,const struct sockaddr *from,void *data,unsigned long len) { - if (_multipathMode) { - // Handle link test packets (should eventually be moved into the protocol itself) - if (len == ZT_LINK_TEST_DATAGRAM_SZ) { - _phy.respond_to_link_test(sock, from, data, len); - } - if (len == ZT_LINK_TEST_DATAGRAM_RESPONSE_SZ) { - _phy.handle_link_test_response(sock, from, data, len); - } - } if ((len >= 16)&&(reinterpret_cast(from)->ipScope() == InetAddress::IP_SCOPE_GLOBAL)) _lastDirectReceiveFromGlobal = OSUtils::now(); const ZT_ResultCode rc = _node->processWirePacket( -- cgit v1.2.3 From 4dd093efc92055174ebb7388408db1ff9148d8b6 Mon Sep 17 00:00:00 2001 From: Grant Limberg Date: Mon, 4 Jun 2018 11:07:12 -0700 Subject: cant compare character arrays with == --- service/OneService.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'service') diff --git a/service/OneService.cpp b/service/OneService.cpp index 04d8c8df..091beacc 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -1608,11 +1608,13 @@ public: // Nuke applied routes that are no longer in n.config.routes[] and/or are not allowed for(std::list< SharedPtr >::iterator mr(n.managedRoutes.begin());mr!=n.managedRoutes.end();) { bool haveRoute = false; + if ( (checkIfManagedIsAllowed(n,(*mr)->target())) && (((*mr)->via().ss_family != (*mr)->target().ss_family)||(!matchIpOnly(myIps,(*mr)->via()))) ) { for(unsigned int i=0;i(&(n.config.routes[i].target)); const InetAddress *const via = reinterpret_cast(&(n.config.routes[i].via)); - if ( ((*mr)->target() == *target) && ( ((via->ss_family == target->ss_family)&&((*mr)->via().ipsEqual(*via))) || (tapdev == (*mr)->device()) ) ) { + + if ( ((*mr)->target() == *target) && ( ((via->ss_family == target->ss_family)&&((*mr)->via().ipsEqual(*via))) || (strcmp(tapdev,(*mr)->device())) ) ) { haveRoute = true; break; } -- cgit v1.2.3 From 62210e57f1c81aec43b714b22313f8c3a0569e2c Mon Sep 17 00:00:00 2001 From: Grant Limberg Date: Mon, 4 Jun 2018 11:24:24 -0700 Subject: helps to have an ==0 on a strcmp --- service/OneService.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'service') diff --git a/service/OneService.cpp b/service/OneService.cpp index 091beacc..91cf49ee 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -1614,7 +1614,7 @@ public: const InetAddress *const target = reinterpret_cast(&(n.config.routes[i].target)); const InetAddress *const via = reinterpret_cast(&(n.config.routes[i].via)); - if ( ((*mr)->target() == *target) && ( ((via->ss_family == target->ss_family)&&((*mr)->via().ipsEqual(*via))) || (strcmp(tapdev,(*mr)->device())) ) ) { + if ( ((*mr)->target() == *target) && ( ((via->ss_family == target->ss_family)&&((*mr)->via().ipsEqual(*via))) || (strcmp(tapdev,(*mr)->device())==0) ) ) { haveRoute = true; break; } -- cgit v1.2.3 From b22405b64b0b26fa1d5d8932cf212e4ae7760632 Mon Sep 17 00:00:00 2001 From: Grant Limberg Date: Mon, 4 Jun 2018 12:24:12 -0700 Subject: rtnetlink integrated. no more callouts to iproute2 --- make-linux.mk | 2 +- osdep/LinuxEthernetTap.cpp | 104 +++----- osdep/LinuxNetLink.cpp | 640 ++++++++++++++++++++++++++++++++++----------- osdep/LinuxNetLink.hpp | 16 +- osdep/ManagedRoute.cpp | 24 +- osdep/ManagedRoute.hpp | 10 +- service/OneService.cpp | 13 +- 7 files changed, 566 insertions(+), 243 deletions(-) (limited to 'service') diff --git a/make-linux.mk b/make-linux.mk index 69dc5619..1776fa35 100644 --- a/make-linux.mk +++ b/make-linux.mk @@ -19,7 +19,7 @@ include objects.mk ONE_OBJS+=osdep/LinuxEthernetTap.o ONE_OBJS+=osdep/LinuxNetLink.o -NLTEST_OBJS+=osdep/LinuxNetLink.o +NLTEST_OBJS+=osdep/LinuxNetLink.o node/InetAddress.o node/Utils.o node/Salsa20.o NLTEST_OBJS+=nltest.o # Auto-detect miniupnpc and nat-pmp as well and use system libs if present, diff --git a/osdep/LinuxEthernetTap.cpp b/osdep/LinuxEthernetTap.cpp index 06bbbada..5dc21391 100644 --- a/osdep/LinuxEthernetTap.cpp +++ b/osdep/LinuxEthernetTap.cpp @@ -56,6 +56,7 @@ #include "../node/Dictionary.hpp" #include "OSUtils.hpp" #include "LinuxEthernetTap.hpp" +#include "LinuxNetLink.hpp" // ff:ff:ff:ff:ff:ff with no ADI static const ZeroTier::MulticastGroup _blindWildcardMulticastGroup(ZeroTier::MAC(0xff),0); @@ -97,6 +98,9 @@ LinuxEthernetTap::LinuxEthernetTap( char procpath[128],nwids[32]; struct stat sbuf; + // ensure netlink connection is started + (void)LinuxNetLink::getInstance(); + OSUtils::ztsnprintf(nwids,sizeof(nwids),"%.16llx",nwid); Mutex::Lock _l(__tapCreateLock); // create only one tap at a time, globally @@ -263,18 +267,8 @@ bool LinuxEthernetTap::enabled() const static bool ___removeIp(const std::string &_dev,const InetAddress &ip) { - long cpid = (long)vfork(); - if (cpid == 0) { - OSUtils::redirectUnixOutputs("/dev/null",(const char *)0); - setenv("PATH", "/sbin:/bin:/usr/sbin:/usr/bin", 1); - char iptmp[128]; - ::execlp("ip","ip","addr","del",ip.toString(iptmp),"dev",_dev.c_str(),(const char *)0); - ::_exit(-1); - } else { - int exitcode = -1; - ::waitpid(cpid,&exitcode,0); - return (exitcode == 0); - } + LinuxNetLink::getInstance().removeAddress(ip, _dev.c_str()); + return true; } #ifdef __SYNOLOGY__ @@ -285,49 +279,32 @@ bool LinuxEthernetTap::addIpSyn(std::vector ips) std::string cfg_contents = "DEVICE="+_dev+"\nBOOTPROTO=static"; int ip4=0,ip6=0,ip4_tot=0,ip6_tot=0; - long cpid = (long)vfork(); - if (cpid == 0) { - OSUtils::redirectUnixOutputs("/dev/null",(const char *)0); - setenv("PATH", "/sbin:/bin:/usr/sbin:/usr/bin", 1); - // We must know if there is at least (one) of each protocol version so we - // can properly enumerate address/netmask combinations in the ifcfg-dev file - for(int i=0; i<(int)ips.size(); i++) { - if (ips[i].isV4()) - ip4_tot++; - else - ip6_tot++; - } - // Assemble and write contents of ifcfg-dev file - for(int i=0; i<(int)ips.size(); i++) { - if (ips[i].isV4()) { - char iptmp[64],iptmp2[64]; - std::string numstr4 = ip4_tot > 1 ? std::to_string(ip4) : ""; - cfg_contents += "\nIPADDR"+numstr4+"="+ips[i].toIpString(iptmp) - + "\nNETMASK"+numstr4+"="+ips[i].netmask().toIpString(iptmp2)+"\n"; - ip4++; - } - else { - char iptmp[64],iptmp2[64]; - std::string numstr6 = ip6_tot > 1 ? std::to_string(ip6) : ""; - cfg_contents += "\nIPV6ADDR"+numstr6+"="+ips[i].toIpString(iptmp) - + "\nNETMASK"+numstr6+"="+ips[i].netmask().toIpString(iptmp2)+"\n"; - ip6++; - } - } - OSUtils::writeFile(filepath.c_str(), cfg_contents.c_str(), cfg_contents.length()); - // Finaly, add IPs - for(int i=0; i<(int)ips.size(); i++){ - char iptmp[128],iptmp2[128]; - if (ips[i].isV4()) - ::execlp("ip","ip","addr","add",ips[i].toString(iptmp),"broadcast",ips[i].broadcast().toIpString(iptmp2),"dev",_dev.c_str(),(const char *)0); - else - ::execlp("ip","ip","addr","add",ips[i].toString(iptmp),"dev",_dev.c_str(),(const char *)0); + for(int i=0; i<(int)ips.size(); i++) { + if (ips[i].isV4()) + ip4_tot++; + else + ip6_tot++; + } + // Assemble and write contents of ifcfg-dev file + for(int i=0; i<(int)ips.size(); i++) { + if (ips[i].isV4()) { + char iptmp[64],iptmp2[64]; + std::string numstr4 = ip4_tot > 1 ? std::to_string(ip4) : ""; + cfg_contents += "\nIPADDR"+numstr4+"="+ips[i].toIpString(iptmp) + + "\nNETMASK"+numstr4+"="+ips[i].netmask().toIpString(iptmp2)+"\n"; + ip4++; + } else { + char iptmp[64],iptmp2[64]; + std::string numstr6 = ip6_tot > 1 ? std::to_string(ip6) : ""; + cfg_contents += "\nIPV6ADDR"+numstr6+"="+ips[i].toIpString(iptmp) + + "\nNETMASK"+numstr6+"="+ips[i].netmask().toIpString(iptmp2)+"\n"; + ip6++; } - ::_exit(-1); - } else if (cpid > 0) { - int exitcode = -1; - ::waitpid(cpid,&exitcode,0); - return (exitcode == 0); + } + OSUtils::writeFile(filepath.c_str(), cfg_contents.c_str(), cfg_contents.length()); + // Finaly, add IPs + for(int i=0; i<(int)ips.size(); i++){ + LinuxNetLink::getInstance().addAddress(ips[i], _dev.c_str()); } return true; } @@ -348,24 +325,9 @@ bool LinuxEthernetTap::addIp(const InetAddress &ip) ___removeIp(_dev,*i); } - long cpid = (long)vfork(); - if (cpid == 0) { - OSUtils::redirectUnixOutputs("/dev/null",(const char *)0); - setenv("PATH", "/sbin:/bin:/usr/sbin:/usr/bin", 1); - char iptmp[128],iptmp2[128]; - if (ip.isV4()) { - ::execlp("ip","ip","addr","add",ip.toString(iptmp),"broadcast",ip.broadcast().toIpString(iptmp2),"dev",_dev.c_str(),(const char *)0); - } else { - ::execlp("ip","ip","addr","add",ip.toString(iptmp),"dev",_dev.c_str(),(const char *)0); - } - ::_exit(-1); - } else if (cpid > 0) { - int exitcode = -1; - ::waitpid(cpid,&exitcode,0); - return (exitcode == 0); - } + LinuxNetLink::getInstance().addAddress(ip, _dev.c_str()); - return false; + return true; } bool LinuxEthernetTap::removeIp(const InetAddress &ip) diff --git a/osdep/LinuxNetLink.cpp b/osdep/LinuxNetLink.cpp index 79483b96..7ad687fb 100644 --- a/osdep/LinuxNetLink.cpp +++ b/osdep/LinuxNetLink.cpp @@ -54,39 +54,36 @@ LinuxNetLink::LinuxNetLink() : _t() , _running(false) , _routes_ipv4() + , _rv4_m() , _routes_ipv6() + , _rv6_m() , _seq(0) + , _interfaces() + , _if_m() , _fd(socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) , _la({0}) { // set socket timeout to 1 sec so we're not permablocking recv() calls - struct timeval tv; - tv.tv_sec = 1; - tv.tv_usec = 0; - if(setsockopt(_fd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof(tv)) != 0) { - fprintf(stderr, "setsockopt failed: %s\n", strerror(errno)); - } + _setSocketTimeout(_fd, 1); _la.nl_family = AF_NETLINK; - _la.nl_pid = getpid(); + _la.nl_pid = getpid()+1; _la.nl_groups = RTMGRP_LINK|RTMGRP_IPV4_IFADDR|RTMGRP_IPV6_IFADDR|RTMGRP_IPV4_ROUTE|RTMGRP_IPV6_ROUTE|RTMGRP_NOTIFY; if (bind(_fd, (struct sockaddr*)&_la, sizeof(_la))) { fprintf(stderr, "Error connecting to RTNETLINK: %s\n", strerror(errno)); ::exit(1); } - _running = true; - _t = Thread::start(this); - fprintf(stderr, "Requesting IPV4 Routes\n"); _requestIPv4Routes(); - Thread::sleep(10); fprintf(stderr, "Requesting IPV6 Routes\n"); _requestIPv6Routes(); - Thread::sleep(10); fprintf(stderr, "Requesting Interface List\n"); _requestInterfaceList(); + + _running = true; + _t = Thread::start(this); } LinuxNetLink::~LinuxNetLink() @@ -97,7 +94,17 @@ LinuxNetLink::~LinuxNetLink() ::close(_fd); } -void LinuxNetLink::threadMain() throw() +void LinuxNetLink::_setSocketTimeout(int fd, int seconds) +{ + struct timeval tv; + tv.tv_sec = seconds; + tv.tv_usec = 0; + if(setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof(tv)) != 0) { + fprintf(stderr, "setsockopt failed: %s\n", strerror(errno)); + } +} + +int LinuxNetLink::_doRecv(int fd) { char buf[8192]; char *p = NULL; @@ -106,26 +113,24 @@ void LinuxNetLink::threadMain() throw() int rtn = 0; p = buf; - while(_running) { - rtn = recv(_fd, p, sizeof(buf) - nll, 0); + while(true) { + rtn = recv(fd, p, sizeof(buf) - nll, 0); if (rtn > 0) { nlp = (struct nlmsghdr *)p; if(nlp->nlmsg_type == NLMSG_ERROR && (nlp->nlmsg_flags & NLM_F_ACK) != NLM_F_ACK) { - fprintf(stderr, "NLMSG_ERROR\n"); struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(nlp); if (err->error != 0) { fprintf(stderr, "rtnetlink error: %s\n", strerror(-(err->error))); } p = buf; nll = 0; - continue; + break; } if (nlp->nlmsg_type == NLMSG_NOOP) { - fprintf(stderr, "noop\n"); - continue; + break; } if( (nlp->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI || (nlp->nlmsg_type == NLMSG_DONE)) @@ -134,7 +139,7 @@ void LinuxNetLink::threadMain() throw() _processMessage(nlp, nll); p = buf; nll = 0; - continue; + break; } p += rtn; nll += rtn; @@ -144,7 +149,7 @@ void LinuxNetLink::threadMain() throw() fprintf(stderr, "NLMSG_OVERRUN: Data lost\n"); p = buf; nll = 0; - continue; + break; } nll += rtn; @@ -152,8 +157,21 @@ void LinuxNetLink::threadMain() throw() _processMessage(nlp, nll); p = buf; nll = 0; + break; + } else { + break; } - else { + } + return rtn; +} + +void LinuxNetLink::threadMain() throw() +{ + int rtn = 0; + + while(_running) { + rtn = _doRecv(_fd); + if (rtn <= 0) { Thread::sleep(100); continue; } @@ -258,6 +276,7 @@ void LinuxNetLink::_routeAdded(struct nlmsghdr *nlp) { char dsts[40] = {0}; char gws[40] = {0}; + char srcs[40] = {0}; char ifs[16] = {0}; char ms[24] = {0}; @@ -272,6 +291,9 @@ void LinuxNetLink::_routeAdded(struct nlmsghdr *nlp) case RTA_DST: inet_ntop(rtp->rtm_family, RTA_DATA(rtap), dsts, rtp->rtm_family == AF_INET ? 24 : 40); break; + case RTA_SRC: + inet_ntop(rtp->rtm_family, RTA_DATA(rtap), srcs, rtp->rtm_family == AF_INET ? 24: 40); + break; case RTA_GATEWAY: inet_ntop(rtp->rtm_family, RTA_DATA(rtap), gws, rtp->rtm_family == AF_INET ? 24 : 40); break; @@ -282,13 +304,14 @@ void LinuxNetLink::_routeAdded(struct nlmsghdr *nlp) } sprintf(ms, "%d", rtp->rtm_dst_len); - fprintf(stderr, "Route Added: dst %s/%s gw %s if %s\n", dsts, ms, gws, ifs); + fprintf(stderr, "Route Added: dst %s/%s gw %s src %s if %s\n", dsts, ms, gws, srcs, ifs); } void LinuxNetLink::_routeDeleted(struct nlmsghdr *nlp) { char dsts[40] = {0}; char gws[40] = {0}; + char srcs[40] = {0}; char ifs[16] = {0}; char ms[24] = {0}; @@ -303,6 +326,9 @@ void LinuxNetLink::_routeDeleted(struct nlmsghdr *nlp) case RTA_DST: inet_ntop(rtp->rtm_family, RTA_DATA(rtap), dsts, rtp->rtm_family == AF_INET ? 24 : 40); break; + case RTA_SRC: + inet_ntop(rtp->rtm_family, RTA_DATA(rtap), srcs, rtp->rtm_family == AF_INET ? 24 : 40); + break; case RTA_GATEWAY: inet_ntop(rtp->rtm_family, RTA_DATA(rtap), gws, rtp->rtm_family == AF_INET ? 24 : 40); break; @@ -313,7 +339,7 @@ void LinuxNetLink::_routeDeleted(struct nlmsghdr *nlp) } sprintf(ms, "%d", rtp->rtm_dst_len); - fprintf(stderr, "Route Deleted: dst %s/%s gw %s if %s\n", dsts, ms, gws, ifs); + fprintf(stderr, "Route Deleted: dst %s/%s gw %s src %s if %s\n", dsts, ms, gws, srcs, ifs); } void LinuxNetLink::_linkAdded(struct nlmsghdr *nlp) @@ -348,12 +374,15 @@ void LinuxNetLink::_linkAdded(struct nlmsghdr *nlp) } } - struct iface_entry &entry = _interfaces[ifip->ifi_index]; - entry.index = ifip->ifi_index; - memcpy(entry.ifacename, ifname, sizeof(ifname)); - memcpy(entry.mac, mac, sizeof(mac)); - memcpy(entry.mac_bin, mac_bin, 6); - entry.mtu = mtu; + { + Mutex::Lock l(_if_m); + struct iface_entry &entry = _interfaces[ifip->ifi_index]; + entry.index = ifip->ifi_index; + memcpy(entry.ifacename, ifname, sizeof(ifname)); + memcpy(entry.mac, mac, sizeof(mac)); + memcpy(entry.mac_bin, mac_bin, 6); + entry.mtu = mtu; + } fprintf(stderr, "Link Added: %s mac: %s, mtu: %d\n", ifname, mac, mtu); } @@ -389,13 +418,33 @@ void LinuxNetLink::_linkDeleted(struct nlmsghdr *nlp) } fprintf(stderr, "Link Deleted: %s mac: %s, mtu: %d\n", ifname, mac, mtu); - if(_interfaces.contains(ifip->ifi_index)) { - _interfaces.erase(ifip->ifi_index); + { + Mutex::Lock l(_if_m); + if(_interfaces.contains(ifip->ifi_index)) { + _interfaces.erase(ifip->ifi_index); + } } } void LinuxNetLink::_requestIPv4Routes() { + int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd == -1) { + fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); + return; + } + + _setSocketTimeout(fd); + + struct sockaddr_nl la; + la.nl_family = AF_NETLINK; + la.nl_pid = getpid(); + la.nl_groups = RTMGRP_IPV4_ROUTE; + if(bind(fd, (struct sockaddr*)&la, sizeof(la))) { + fprintf(stderr, "Error binding RTNETLINK: %s\n", strerror(errno)); + return; + } + struct nl_route_req req; bzero(&req, sizeof(req)); req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); @@ -422,11 +471,32 @@ void LinuxNetLink::_requestIPv4Routes() msg.msg_iov = &iov; msg.msg_iovlen = 1; - sendmsg(_fd, &msg, 0); + sendmsg(fd, &msg, 0); + + _doRecv(fd); + + close(fd); } void LinuxNetLink::_requestIPv6Routes() { + int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd == -1) { + fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); + return; + } + + _setSocketTimeout(fd); + + struct sockaddr_nl la; + la.nl_family = AF_NETLINK; + la.nl_pid = getpid(); + la.nl_groups = RTMGRP_IPV6_ROUTE; + if(bind(fd, (struct sockaddr*)&la, sizeof(struct sockaddr_nl))) { + fprintf(stderr, "Error binding RTNETLINK: %s\n", strerror(errno)); + return; + } + struct nl_route_req req; bzero(&req, sizeof(req)); req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); @@ -453,11 +523,32 @@ void LinuxNetLink::_requestIPv6Routes() msg.msg_iov = &iov; msg.msg_iovlen = 1; - sendmsg(_fd, &msg, 0); + sendmsg(fd, &msg, 0); + + _doRecv(fd); + + close(fd); } void LinuxNetLink::_requestInterfaceList() { + int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd == -1) { + fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); + return; + } + + _setSocketTimeout(fd); + + struct sockaddr_nl la; + la.nl_family = AF_NETLINK; + la.nl_pid = getpid(); + la.nl_groups = RTMGRP_LINK; + if(bind(fd, (struct sockaddr*)&la, sizeof(struct sockaddr_nl))) { + fprintf(stderr, "Error binding RTNETLINK: %s\n", strerror(errno)); + return; + } + struct nl_if_req req; bzero(&req, sizeof(req)); req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); @@ -482,11 +573,43 @@ void LinuxNetLink::_requestInterfaceList() iov.iov_len = req.nl.nlmsg_len; msg.msg_iov = &iov; msg.msg_iovlen = 1; - sendmsg(_fd, &msg, 0); + sendmsg(fd, &msg, 0); + + _doRecv(fd); + + close(fd); } -void LinuxNetLink::addRoute(const InetAddress &target, const InetAddress &via, const char *ifaceName) +void LinuxNetLink::addRoute(const InetAddress &target, const InetAddress &via, const InetAddress &src, const char *ifaceName) { + int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd == -1) { + fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); + return; + } + + _setSocketTimeout(fd); + + struct sockaddr_nl la; + bzero(&la, sizeof(la)); + la.nl_family = AF_NETLINK; + la.nl_pid = getpid(); + + if(bind(fd, (struct sockaddr*)&la, sizeof(struct sockaddr_nl))) { + fprintf(stderr, "Error binding RTNETLINK: %s\n", strerror(errno)); + return; + } + + char tmp[64]; + char tmp2[64]; + char tmp3[64]; + fprintf(stderr, "Adding Route. target: %s via: %s src: %s iface: %s\n", target.toString(tmp), via.toString(tmp2), src.toString(tmp3), ifaceName); + + if(!target) { + fprintf(stderr, "Uhhhh adding an empty route?!?!?"); + return; + } + int rtl = sizeof(struct rtmsg); struct nl_route_req req; bzero(&req, sizeof(req)); @@ -494,49 +617,54 @@ void LinuxNetLink::addRoute(const InetAddress &target, const InetAddress &via, c struct rtattr *rtap = (struct rtattr *)req.buf; rtap->rta_type = RTA_DST; if (target.isV4()) { - rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in_addr); - memcpy((void*)((char*)rtap+sizeof(struct rtattr)), &((struct sockaddr_in*)&target)->sin_addr, sizeof(struct in_addr)); + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in*)&target)->sin_addr, sizeof(struct in_addr)); } else { - rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in6_addr); - memcpy((void*)((char*)rtap+sizeof(struct rtattr)), &((struct sockaddr_in6*)&target)->sin6_addr, sizeof(struct in6_addr)); + rtap->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in6*)&target)->sin6_addr, sizeof(struct in6_addr)); } rtl += rtap->rta_len; - int interface_index = -1; - if (ifaceName != NULL) { - Hashtable::Iterator iter(_interfaces); - int *k = NULL; - iface_entry *v = NULL; - while(iter.next(k, v)) { - if(strcmp(ifaceName, v->ifacename) == 0) { - interface_index = v->index; - break; - } - } - if (interface_index != -1) { - rtap = (struct rtattr *) (((char*)rtap) + rtap->rta_len); - rtap->rta_type = RTA_OIF; - rtap->rta_len = sizeof(struct rtattr)+sizeof(int); - memcpy(((char*)rtap)+sizeof(rtattr), &interface_index, sizeof(int)); - rtl += rtap->rta_len; - } - } - if(via) { rtap = (struct rtattr *)(((char*)rtap)+rtap->rta_len); rtap->rta_type = RTA_GATEWAY; if(via.isV4()) { - rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in_addr); - memcpy((char*)rtap+sizeof(struct rtattr), &((struct sockaddr_in*)&via)->sin_addr, sizeof(struct in_addr)); + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in*)&via)->sin_addr, sizeof(struct in_addr)); } else { - rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in6_addr); - memcpy((char*)rtap+sizeof(struct rtattr), &((struct sockaddr_in6*)&via)->sin6_addr, sizeof(struct in6_addr)); + rtap->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in6*)&via)->sin6_addr, sizeof(struct in6_addr)); } rtl += rtap->rta_len; + } else if (src) { + rtap = (struct rtattr *)(((char*)rtap)+rtap->rta_len); + rtap->rta_type = RTA_SRC; + if(src.isV4()) { + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in*)&src)->sin_addr, sizeof(struct in_addr)); + + } else { + rtap->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in6*)&src)->sin6_addr, sizeof(struct in6_addr)); + } + req.rt.rtm_src_len = src.netmaskBits(); } + if (ifaceName != NULL) { + int interface_index = _indexForInterface(ifaceName); + if (interface_index != -1) { + rtap = (struct rtattr *) (((char*)rtap) + rtap->rta_len); + rtap->rta_type = RTA_OIF; + rtap->rta_len = RTA_LENGTH(sizeof(int)); + memcpy(RTA_DATA(rtap), &interface_index, sizeof(int)); + rtl += rtap->rta_len; + } + } + + + req.nl.nlmsg_len = NLMSG_LENGTH(rtl); - req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE | NLM_F_ACK; req.nl.nlmsg_type = RTM_NEWROUTE; req.nl.nlmsg_pid = 0; req.nl.nlmsg_seq = ++_seq; @@ -546,6 +674,7 @@ void LinuxNetLink::addRoute(const InetAddress &target, const InetAddress &via, c req.rt.rtm_scope = RT_SCOPE_UNIVERSE; req.rt.rtm_type = RTN_UNICAST; req.rt.rtm_dst_len = target.netmaskBits(); + req.rt.rtm_flags = 0; struct sockaddr_nl pa; bzero(&pa, sizeof(pa)); @@ -562,11 +691,41 @@ void LinuxNetLink::addRoute(const InetAddress &target, const InetAddress &via, c iov.iov_len = req.nl.nlmsg_len; msg.msg_iov = &iov; msg.msg_iovlen = 1; - sendmsg(_fd, &msg, 0); + sendmsg(fd, &msg, 0); + + _doRecv(fd); + + close(fd); } -void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, const char *ifaceName) +void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, const InetAddress &src, const char *ifaceName) { + int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd == -1) { + fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); + return; + } + + _setSocketTimeout(fd); + + struct sockaddr_nl la; + la.nl_family = AF_NETLINK; + la.nl_pid = getpid(); + + if(bind(fd, (struct sockaddr*)&la, sizeof(struct sockaddr_nl))) { + fprintf(stderr, "Error binding RTNETLINK: %s\n", strerror(errno)); + return; + } + char tmp[64]; + char tmp2[64]; + char tmp3[64]; + fprintf(stderr, "Removing Route. target: %s via: %s src: %s iface: %s\n", target.toString(tmp), via.toString(tmp2), src.toString(tmp3), ifaceName); + + if(!target) { + fprintf(stderr, "Uhhhh deleting an empty route?!?!?"); + return; + } + int rtl = sizeof(struct rtmsg); struct nl_route_req req; bzero(&req, sizeof(req)); @@ -574,47 +733,52 @@ void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, c struct rtattr *rtap = (struct rtattr *)req.buf; rtap->rta_type = RTA_DST; if (target.isV4()) { - rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in_addr); - memcpy((void*)((char*)rtap+sizeof(struct rtattr)), &((struct sockaddr_in*)&target)->sin_addr, sizeof(struct in_addr)); + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in*)&target)->sin_addr, sizeof(struct in_addr)); } else { - rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in6_addr); - memcpy((void*)((char*)rtap+sizeof(struct rtattr)), &((struct sockaddr_in6*)&target)->sin6_addr, sizeof(struct in6_addr)); + rtap->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in6*)&target)->sin6_addr, sizeof(struct in6_addr)); } rtl += rtap->rta_len; - int interface_index = -1; - if (ifaceName != NULL) { - Hashtable::Iterator iter(_interfaces); - int *k = NULL; - iface_entry *v = NULL; - while(iter.next(k, v)) { - if(strcmp(ifaceName, v->ifacename) == 0) { - interface_index = v->index; - break; - } - } - if (interface_index != -1) { - rtap = (struct rtattr *) (((char*)rtap) + rtap->rta_len); - rtap->rta_type = RTA_OIF; - rtap->rta_len = sizeof(struct rtattr)+sizeof(int); - memcpy(((char*)rtap)+sizeof(rtattr), &interface_index, sizeof(int)); - rtl += rtap->rta_len; - } - } - if(via) { rtap = (struct rtattr *)(((char*)rtap)+rtap->rta_len); rtap->rta_type = RTA_GATEWAY; if(via.isV4()) { - rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in_addr); - memcpy((char*)rtap+sizeof(struct rtattr), &((struct sockaddr_in*)&via)->sin_addr, sizeof(struct in_addr)); + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in*)&via)->sin_addr, sizeof(struct in_addr)); } else { - rtap->rta_len = sizeof(struct rtattr)+sizeof(struct in6_addr); - memcpy((char*)rtap+sizeof(struct rtattr), &((struct sockaddr_in6*)&via)->sin6_addr, sizeof(struct in6_addr)); + rtap->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in6*)&via)->sin6_addr, sizeof(struct in6_addr)); } rtl += rtap->rta_len; + } else if (src) { + rtap = (struct rtattr *)(((char*)rtap)+rtap->rta_len); + rtap->rta_type = RTA_SRC; + if(src.isV4()) { + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in*)&src)->sin_addr, sizeof(struct in_addr)); + + } else { + rtap->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); + memcpy(RTA_DATA(rtap), &((struct sockaddr_in6*)&src)->sin6_addr, sizeof(struct in6_addr)); + } + req.rt.rtm_src_len = src.netmaskBits(); } + if (ifaceName != NULL) { + int interface_index = _indexForInterface(ifaceName); + if (interface_index != -1) { + rtap = (struct rtattr *) (((char*)rtap) + rtap->rta_len); + rtap->rta_type = RTA_OIF; + rtap->rta_len = RTA_LENGTH(sizeof(int)); + memcpy(RTA_DATA(rtap), &interface_index, sizeof(int)); + rtl += rtap->rta_len; + } + } + + + req.nl.nlmsg_len = NLMSG_LENGTH(rtl); req.nl.nlmsg_flags = NLM_F_REQUEST; req.nl.nlmsg_type = RTM_DELROUTE; @@ -626,6 +790,7 @@ void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, c req.rt.rtm_scope = RT_SCOPE_UNIVERSE; req.rt.rtm_type = RTN_UNICAST; req.rt.rtm_dst_len = target.netmaskBits(); + req.rt.rtm_flags = 0; struct sockaddr_nl pa; bzero(&pa, sizeof(pa)); @@ -642,73 +807,226 @@ void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, c iov.iov_len = req.nl.nlmsg_len; msg.msg_iov = &iov; msg.msg_iovlen = 1; - sendmsg(_fd, &msg, 0); -} + sendmsg(fd, &msg, 0); -// void LinuxNetLink::addInterface(const char *iface, unsigned int mtu, const MAC &mac) -// { -// int rtl = sizeof(struct ifinfomsg); -// struct nl_if_req req; -// bzero(&req, sizeof(nl_if_req)); - -// struct rtattr *rtap = (struct rtattr *)req.buf; -// rtap->rta_type = IFLA_IFNAME; -// rtap->rta_len = sizeof(struct rtattr)+strlen(iface)+1; -// rtl += rtap->rta_len; - -// rtap = (struct rtattr*)(((char*)rtap)+rtap->rta_len); -// rtap->rta_type = IFLA_MTU; -// rtap->rta_len = sizeof(struct rtattr)+sizeof(unsigned int); -// rtl += rtap->rta_len; - -// rtap = (struct rtattr*)(((char*)rtap)+rtap->rta_len); -// rtap->rta_type = IFLA_ADDRESS; -// rtap->rta_len = sizeof(struct rtattr)+6; -// mac.copyTo(((char*)rtap)+sizeof(struct rtattr), 6); -// rtl += rtap->rta_len; - -// IFLA_LINKINFO; -// req.nl.nlmsg_len = NLMSG_LENGTH(rtl); -// req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; -// req.nl.nlmsg_type = RTM_NEWLINK; -// req.nl.nlmsg_pid = 0; -// req.nl.nlmsg_seq = ++_seq; - -// req.ifa.ifi_family = AF_UNSPEC; -// req.ifa.ifi_type = 0; // TODO figure this one out -// req.ifa.ifi_index = 0; -// req.ifa.ifi_flags = IFF_UP; - -// struct sockaddr_nl pa; -// bzero(&pa, sizeof(pa)); -// pa.nl_family = AF_NETLINK; - -// struct msghdr msg; -// bzero(&msg, sizeof(msg)); -// msg.msg_name = (void*)&pa; -// msg.msg_namelen = sizeof(pa); - -// struct iovec iov; -// iov.iov_base = (void*)&req.nl; -// iov.iov_len = req.nl.nlmsg_len; -// msg.msg_iov = &iov; -// msg.msg_iovlen = 1; -// sendmsg(_fd, &msg, 0); -// } - -// void LinuxNetLink::removeInterface(const char *iface) -// { - -// } + _doRecv(fd); + + close(fd); +} void LinuxNetLink::addAddress(const InetAddress &addr, const char *iface) { + int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd == -1) { + fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); + return; + } + _setSocketTimeout(fd); + + struct sockaddr_nl la; + la.nl_family = AF_NETLINK; + la.nl_pid = getpid(); + if (addr.isV4()) { + la.nl_groups = RTMGRP_IPV4_IFADDR; + } else { + la.nl_groups = RTMGRP_IPV6_IFADDR; + } + if(bind(fd, (struct sockaddr*)&la, sizeof(struct sockaddr_nl))) { + fprintf(stderr, "Error binding RTNETLINK: %s\n", strerror(errno)); + return; + } + + char tmp[128]; + fprintf(stderr, "Adding IP address %s to interface %s", addr.toString(tmp), iface); + int interface_index = _indexForInterface(iface); + + if (interface_index == -1) { + fprintf(stderr, "Unable to find index for interface %s\n", iface); + return; + } + + int rtl = sizeof(struct ifaddrmsg); + struct nl_adr_req req; + bzero(&req, sizeof(struct nl_adr_req)); + + struct rtattr *rtap = (struct rtattr *)req.buf;; + if(addr.isV4()) { + struct sockaddr_in *addr_v4 = (struct sockaddr_in*)&addr; + rtap->rta_type = IFA_ADDRESS; + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &addr_v4->sin_addr, sizeof(struct in_addr)); + rtl += rtap->rta_len; + + rtap = (struct rtattr*)(((char*)rtap) + rtap->rta_len); + rtap->rta_type = IFA_LOCAL; + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &addr_v4->sin_addr, sizeof(struct in_addr)); + rtl += rtap->rta_len; + + InetAddress broadcast = addr.broadcast(); + if(broadcast) { + rtap = (struct rtattr*)(((char*)rtap)+rtap->rta_len); + struct sockaddr_in *bcast = (struct sockaddr_in*)&broadcast; + rtap->rta_type = IFA_BROADCAST; + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &bcast->sin_addr, sizeof(struct in_addr)); + rtl += rtap->rta_len; + } + } else { //V6 + rtap->rta_type = IFA_ADDRESS; + struct sockaddr_in6 *addr_v6 = (struct sockaddr_in6*)&addr; + rtap->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); + memcpy(RTA_DATA(rtap), &addr_v6->sin6_addr, sizeof(struct in6_addr)); + rtl += rtap->rta_len; + } + + if (iface) { + rtap = (struct rtattr*)(((char*)rtap)+rtap->rta_len); + rtap->rta_type = IFA_LABEL; + rtap->rta_len = RTA_LENGTH(strlen(iface)); + memcpy(RTA_DATA(rtap), iface, strlen(iface)); + rtl += rtap->rta_len; + } + + req.nl.nlmsg_len = NLMSG_LENGTH(rtl); + req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + req.nl.nlmsg_type = RTM_NEWADDR; + req.nl.nlmsg_pid = 0; + req.nl.nlmsg_seq = ++_seq; + req.ifa.ifa_family = addr.ss_family; + req.ifa.ifa_prefixlen = addr.port(); + req.ifa.ifa_flags = IFA_F_PERMANENT; + req.ifa.ifa_scope = 0; + req.ifa.ifa_index = interface_index; + + struct sockaddr_nl pa; + bzero(&pa, sizeof(sockaddr_nl)); + pa.nl_family = AF_NETLINK; + + struct msghdr msg; + bzero(&msg, sizeof(msg)); + msg.msg_name = (void*)&pa; + msg.msg_namelen = sizeof(pa); + + struct iovec iov; + iov.iov_base = (void*)&req.nl; + iov.iov_len = req.nl.nlmsg_len; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + sendmsg(fd, &msg, 0); + + _doRecv(fd); + + close(fd); } void LinuxNetLink::removeAddress(const InetAddress &addr, const char *iface) { + int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd == -1) { + fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); + return; + } + + _setSocketTimeout(fd); + + struct sockaddr_nl la; + la.nl_family = AF_NETLINK; + la.nl_pid = getpid(); + if (addr.isV4()) { + la.nl_groups = RTMGRP_IPV4_IFADDR; + } else { + la.nl_groups = RTMGRP_IPV6_IFADDR; + } + if(bind(fd, (struct sockaddr*)&la, sizeof(struct sockaddr_nl))) { + fprintf(stderr, "Error binding RTNETLINK: %s\n", strerror(errno)); + return; + } + + char tmp[128]; + fprintf(stderr, "Removing IP address %s from interface %s", addr.toString(tmp), iface); + + int interface_index = _indexForInterface(iface); + + if (interface_index == -1) { + fprintf(stderr, "Unable to find index for interface %s\n", iface); + return; + } + + int rtl = sizeof(struct ifaddrmsg); + struct nl_adr_req req; + bzero(&req, sizeof(struct nl_adr_req)); + + struct rtattr *rtap = (struct rtattr *)req.buf; + if(addr.isV4()) { + struct sockaddr_in *addr_v4 = (struct sockaddr_in*)&addr; + rtap->rta_type = IFA_ADDRESS; + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &addr_v4->sin_addr, sizeof(struct in_addr)); + rtl += rtap->rta_len; + + rtap = (struct rtattr*)(((char*)rtap) + rtap->rta_len); + rtap->rta_type = IFA_LOCAL; + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &addr_v4->sin_addr, sizeof(struct in_addr)); + rtl += rtap->rta_len; + + InetAddress broadcast = addr.broadcast(); + if(broadcast) { + rtap = (struct rtattr*)(((char*)rtap)+rtap->rta_len); + struct sockaddr_in *bcast = (struct sockaddr_in*)&broadcast; + rtap->rta_type = IFA_BROADCAST; + rtap->rta_len = RTA_LENGTH(sizeof(struct in_addr)); + memcpy(RTA_DATA(rtap), &bcast->sin_addr, sizeof(struct in_addr)); + rtl += rtap->rta_len; + } + } else { //V6 + rtap->rta_type = IFA_ADDRESS; + struct sockaddr_in6 *addr_v6 = (struct sockaddr_in6*)&addr; + rtap->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); + memcpy(RTA_DATA(rtap), &addr_v6->sin6_addr, sizeof(struct in6_addr)); + rtl += rtap->rta_len; + } + + if (iface) { + rtap = (struct rtattr*)(((char*)rtap)+rtap->rta_len); + rtap->rta_type = IFA_LABEL; + rtap->rta_len = RTA_LENGTH(strlen(iface)); + memcpy(RTA_DATA(rtap), iface, strlen(iface)); + rtl += rtap->rta_len; + } + + req.nl.nlmsg_len = NLMSG_LENGTH(rtl); + req.nl.nlmsg_flags = NLM_F_REQUEST; + req.nl.nlmsg_type = RTM_DELADDR; + req.nl.nlmsg_pid = 0; + req.nl.nlmsg_seq = ++_seq; + req.ifa.ifa_family = addr.ss_family; + req.ifa.ifa_prefixlen = addr.port(); + req.ifa.ifa_flags = IFA_F_PERMANENT; + req.ifa.ifa_scope = 0; + req.ifa.ifa_index = interface_index; + + struct sockaddr_nl pa; + bzero(&pa, sizeof(sockaddr_nl)); + pa.nl_family = AF_NETLINK; + + struct msghdr msg; + bzero(&msg, sizeof(msg)); + msg.msg_name = (void*)&pa; + msg.msg_namelen = sizeof(pa); + struct iovec iov; + iov.iov_base = (void*)&req.nl; + iov.iov_len = req.nl.nlmsg_len; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + sendmsg(fd, &msg, 0); + + _doRecv(fd); + + close(fd); } RouteList LinuxNetLink::getIPV4Routes() const @@ -721,4 +1039,20 @@ RouteList LinuxNetLink::getIPV6Routes() const return _routes_ipv6; } -} // namespace ZeroTier \ No newline at end of file +int LinuxNetLink::_indexForInterface(const char *iface) +{ + Mutex::Lock l(_if_m); + int interface_index = -1; + Hashtable::Iterator iter(_interfaces); + int *k = NULL; + iface_entry *v = NULL; + while(iter.next(k,v)) { + if(strcmp(iface, v->ifacename) == 0) { + interface_index = v->index; + break; + } + } + return interface_index; +} + +} // namespace ZeroTier diff --git a/osdep/LinuxNetLink.hpp b/osdep/LinuxNetLink.hpp index ad457772..681aa46f 100644 --- a/osdep/LinuxNetLink.hpp +++ b/osdep/LinuxNetLink.hpp @@ -39,6 +39,7 @@ #include "../node/MAC.hpp" #include "Thread.hpp" #include "../node/Hashtable.hpp" +#include "../node/Mutex.hpp" namespace ZeroTier { @@ -70,19 +71,18 @@ public: LinuxNetLink(LinuxNetLink const&) = delete; void operator=(LinuxNetLink const&) = delete; - void addRoute(const InetAddress &target, const InetAddress &via, const char *ifaceName); - void delRoute(const InetAddress &target, const InetAddress &via, const char *ifaceName); + void addRoute(const InetAddress &target, const InetAddress &via, const InetAddress &src, const char *ifaceName); + void delRoute(const InetAddress &target, const InetAddress &via, const InetAddress &src, const char *ifaceName); RouteList getIPV4Routes() const; RouteList getIPV6Routes() const; - // void addInterface(const char *iface, unsigned int mtu, const MAC &mac); - // void removeInterface(const char *iface); - void addAddress(const InetAddress &addr, const char *iface); void removeAddress(const InetAddress &addr, const char *iface); void threadMain() throw(); private: + int _doRecv(int fd); + void _processMessage(struct nlmsghdr *nlp, int nll); void _routeAdded(struct nlmsghdr *nlp); void _routeDeleted(struct nlmsghdr *nlp); @@ -95,12 +95,17 @@ private: void _requestIPv4Routes(); void _requestIPv6Routes(); + int _indexForInterface(const char *iface); + + void _setSocketTimeout(int fd, int seconds = 1); Thread _t; bool _running; RouteList _routes_ipv4; + Mutex _rv4_m; RouteList _routes_ipv6; + Mutex _rv6_m; uint32_t _seq; @@ -112,6 +117,7 @@ private: unsigned int mtu; }; Hashtable _interfaces; + Mutex _if_m; // socket communication vars; int _fd; diff --git a/osdep/ManagedRoute.cpp b/osdep/ManagedRoute.cpp index fe7c6267..99277c9f 100644 --- a/osdep/ManagedRoute.cpp +++ b/osdep/ManagedRoute.cpp @@ -285,21 +285,23 @@ static void _routeCmd(const char *op,const InetAddress &target,const InetAddress #ifdef __LINUX__ // ---------------------------------------------------------- #define ZT_ROUTING_SUPPORT_FOUND 1 -static void _routeCmd(const char *op, const InetAddress &target, const InetAddress &via, const char *localInterface) +static void _routeCmd(const char *op, const InetAddress &target, const InetAddress &via, const InetAddress &src, const char *localInterface) { - if ((strcmp(op, "add") == 0 || strcmp(op, "replace") == 0)) { - LinuxNetLink::getInstance().addRoute(target, via, localInterface); - } else if ((strcmp(op, "remove") == 0 || strcmp(op, "del") == 0)) { - LinuxNetLink::getInstance().delRoute(target, via, localInterface); - } - return; - char targetStr[64] = {0}; char viaStr[64] = {0}; InetAddress nmsk = target.netmask(); char nmskStr[64] = {0}; fprintf(stderr, "Received Route Cmd: %s target: %s via: %s netmask: %s localInterface: %s\n", op, target.toString(targetStr), via.toString(viaStr), nmsk.toString(nmskStr), localInterface); + + if ((strcmp(op, "add") == 0 || strcmp(op, "replace") == 0)) { + LinuxNetLink::getInstance().addRoute(target, via, src, localInterface); + } else if ((strcmp(op, "remove") == 0 || strcmp(op, "del") == 0)) { + LinuxNetLink::getInstance().delRoute(target, via, src, localInterface); + } + return; + + int fd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);; struct rtentry route = {0}; @@ -600,11 +602,11 @@ bool ManagedRoute::sync() if (!_applied.count(leftt)) { _applied[leftt] = false; // boolean unused - _routeCmd("replace",leftt,_via,(_via) ? (const char *)0 : _device); + _routeCmd("replace",leftt,_via,_src,_device); } if ((rightt)&&(!_applied.count(rightt))) { _applied[rightt] = false; // boolean unused - _routeCmd("replace",rightt,_via,(_via) ? (const char *)0 : _device); + _routeCmd("replace",rightt,_via,_src,_device); } #endif // __LINUX__ ---------------------------------------------------------- @@ -651,7 +653,7 @@ void ManagedRoute::remove() #endif // __BSD__ ------------------------------------------------------------ #ifdef __LINUX__ // ---------------------------------------------------------- - _routeCmd("del",r->first,_via,(_via) ? (const char *)0 : _device); + _routeCmd("del",r->first,_via,_src,_device); #endif // __LINUX__ ---------------------------------------------------------- #ifdef __WINDOWS__ // -------------------------------------------------------- diff --git a/osdep/ManagedRoute.hpp b/osdep/ManagedRoute.hpp index 779ad6a1..301c54a8 100644 --- a/osdep/ManagedRoute.hpp +++ b/osdep/ManagedRoute.hpp @@ -49,14 +49,20 @@ class ManagedRoute friend class SharedPtr; public: - ManagedRoute(const InetAddress &target,const InetAddress &via,const char *device) + ManagedRoute(const InetAddress &target,const InetAddress &via,const InetAddress &src,const char *device) { _target = target; _via = via; + _src = src; if (via.ss_family == AF_INET) _via.setPort(32); else if (via.ss_family == AF_INET6) _via.setPort(128); + if (src.ss_family == AF_INET) { + _src.setPort(32); + } else if (src.ss_family == AF_INET6) { + _src.setPort(128); + } Utils::scopy(_device,sizeof(_device),device); _systemDevice[0] = (char)0; } @@ -87,6 +93,7 @@ public: inline const InetAddress &target() const { return _target; } inline const InetAddress &via() const { return _via; } + inline const InetAddress &src() const { return _src; } inline const char *device() const { return _device; } private: @@ -95,6 +102,7 @@ private: InetAddress _target; InetAddress _via; + InetAddress _src; InetAddress _systemVia; // for route overrides std::map _applied; // routes currently applied char _device[128]; diff --git a/service/OneService.cpp b/service/OneService.cpp index 91cf49ee..1851c88c 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -1631,6 +1631,17 @@ public: for(unsigned int i=0;i(&(n.config.routes[i].target)); const InetAddress *const via = reinterpret_cast(&(n.config.routes[i].via)); + InetAddress *src = NULL; + for (unsigned int j=0; j(&(n.config.assignedAddresses[j])); + if (target->isV4() && tmp->isV4()) { + src = reinterpret_cast(&(n.config.assignedAddresses[j])); + break; + } else if (target->isV6() && tmp->isV6()) { + src = reinterpret_cast(&(n.config.assignedAddresses[j])); + break; + } + } if ( (!checkIfManagedIsAllowed(n,*target)) || ((via->ss_family == target->ss_family)&&(matchIpOnly(myIps,*via))) ) continue; @@ -1662,7 +1673,7 @@ public: continue; // Add and apply new routes - n.managedRoutes.push_back(SharedPtr(new ManagedRoute(*target,*via,tapdev))); + n.managedRoutes.push_back(SharedPtr(new ManagedRoute(*target,*via,*src,tapdev))); if (!n.managedRoutes.back()->sync()) n.managedRoutes.pop_back(); } -- cgit v1.2.3 From 9681fedbb44a25ffa1108b88a4795e017746ca6c Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Thu, 7 Jun 2018 17:25:27 -0700 Subject: Spellcheck sweep across codebase --- RELEASE-NOTES.md | 8 ++++---- include/ZeroTierOne.h | 8 ++++---- node/Capability.hpp | 6 +++--- node/CertificateOfMembership.hpp | 2 +- node/Constants.hpp | 4 ++-- node/Identity.cpp | 4 ++-- node/IncomingPacket.cpp | 2 +- node/InetAddress.cpp | 26 +++++++++++++------------- node/MulticastGroup.hpp | 2 +- node/Network.hpp | 2 +- node/NetworkConfig.hpp | 2 +- node/Packet.hpp | 18 +++++++++--------- node/Path.hpp | 6 +++--- node/Peer.cpp | 2 +- node/Peer.hpp | 2 +- node/Salsa20.cpp | 2 +- node/SelfAwareness.cpp | 2 +- node/Tag.hpp | 2 +- node/Topology.cpp | 2 +- osdep/OSUtils.cpp | 2 +- osdep/WindowsEthernetTap.hpp | 2 +- service/OneService.cpp | 20 ++++---------------- 22 files changed, 57 insertions(+), 69 deletions(-) (limited to 'service') diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 54dd1375..bec144f0 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -36,7 +36,7 @@ ZeroTier Release Notes * Fixed two very rare multithreading issues that were only observed on certain systems * Platform-Specific Changes * MacOS - * Installer now loads the kernel extension right away so that High Sierra users will see the prompt to authorize it. This is done in the "Security & Privacy" preference pane and must be done driectly on the console (not via remote desktop). On High Sierra and newer kexts must be authorized at the console via security settings system preferences pane. + * Installer now loads the kernel extension right away so that High Sierra users will see the prompt to authorize it. This is done in the "Security & Privacy" preference pane and must be done directly on the console (not via remote desktop). On High Sierra and newer kexts must be authorized at the console via security settings system preferences pane. * Windows * The Windows installer should now install the driver without requiring a special prompt in most cases. This should make it easier for our packages to be accepted into and updated in the Chocolatey repository and should make it easier to perform remote installs across groups of machines using IT management and provisioning tools. * The Windows official packages are now signed with an EV certificate (with hardware key). @@ -78,7 +78,7 @@ The largest new feature in 1.2.0, and the product of many months of work, is our Rules allow you to filter packets on your network and vector traffic to security observers. Security observation can be performed in-band using REDIRECT or out of band using TEE. -Tags and capabilites provide advanced methods for implementing fine grained permission structures and micro-segmentation schemes without bloating the size and complexity of your rules table. +Tags and capabilities provide advanced methods for implementing fine grained permission structures and micro-segmentation schemes without bloating the size and complexity of your rules table. See the [rules engine announcement blog post](https://www.zerotier.com/blog/?p=927) for an in-depth discussion of theory and implementation. The [manual](https://www.zerotier.com/manual.shtml) contains detailed information on rule, tag, and capability use, and the `rule-compiler/` subfolder of the ZeroTier source tree contains a JavaScript function to compile rules in our human-readable rule definition language into rules suitable for import into a network controller. (ZeroTier Central uses this same script to compile rules on [my.zerotier.com](https://my.zerotier.com/).) @@ -161,7 +161,7 @@ A special kind of public network called an ad-hoc network may be accessed by joi | Start of port range (hex) Reserved ZeroTier address prefix indicating a controller-less network -Ad-hoc networks are public (no access control) networks that have no network controller. Instead their configuration and other credentials are generated locally. Ad-hoc networks permit only IPv6 UDP and TCP unicast traffic (no multicast or broadcast) using 6plane format NDP-emulated IPv6 addresses. In addition an ad-hoc network ID encodes an IP port range. UDP packets and TCP SYN (connection open) packets are only allowed to desintation ports within the encoded range. +Ad-hoc networks are public (no access control) networks that have no network controller. Instead their configuration and other credentials are generated locally. Ad-hoc networks permit only IPv6 UDP and TCP unicast traffic (no multicast or broadcast) using 6plane format NDP-emulated IPv6 addresses. In addition an ad-hoc network ID encodes an IP port range. UDP packets and TCP SYN (connection open) packets are only allowed to destination ports within the encoded range. For example `ff00160016000000` is an ad-hoc network allowing only SSH, while `ff0000ffff000000` is an ad-hoc network allowing any UDP or TCP port. @@ -176,7 +176,7 @@ If you have data in an old SQLite3 controller we've included a NodeJS script in ## Major Bug Fixes in 1.2.0 * **The Windows HyperV 100% CPU bug is FINALLY DEAD**: This long-running problem turns out to have been an issue with Windows itself, but one we were triggering by placing invalid data into the Windows registry. Microsoft is aware of the issue but we've also fixed the triggering problem on our side. ZeroTier should now co-exist quite well with HyperV and should now be able to be bridged with a HyperV virtual switch. - * **Segmenation faults on musl-libc based Linux systems**: Alpine Linux and some embedded Linux systems that use musl libc (a minimal libc) experienced segmentation faults. These were due to a smaller default stack size. A work-around that sets the stack size for new threads has been added. + * **Segmentation faults on musl-libc based Linux systems**: Alpine Linux and some embedded Linux systems that use musl libc (a minimal libc) experienced segmentation faults. These were due to a smaller default stack size. A work-around that sets the stack size for new threads has been added. * **Windows firewall blocks local JSON API**: On some Windows systems the firewall likes to block 127.0.0.1:9993 for mysterious reasons. This is now fixed in the installer via the addition of another firewall exemption rule. * **UI crash on embedded Windows due to missing fonts**: The MSI installer now ships fonts and will install them if they are not present, so this should be fixed. diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index 15e15bd1..b3927c2e 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -411,7 +411,7 @@ enum ZT_ResultCode ZT_RESULT_ERROR_UNSUPPORTED_OPERATION = 1001, /** - * The requestion operation was given a bad parameter or was called in an invalid state + * The requested operation was given a bad parameter or was called in an invalid state */ ZT_RESULT_ERROR_BAD_PARAMETER = 1002 }; @@ -1498,7 +1498,7 @@ typedef int (*ZT_WirePacketSendFunction)( /** * Function to check whether a path should be used for ZeroTier traffic * - * Paramters: + * Parameters: * (1) Node * (2) User pointer * (3) ZeroTier address or 0 for none/any @@ -1531,7 +1531,7 @@ typedef int (*ZT_PathCheckFunction)( * (1) Node * (2) User pointer * (3) ZeroTier address (least significant 40 bits) - * (4) Desried address family or -1 for any + * (4) Desired address family or -1 for any * (5) Buffer to fill with result * * If provided this function will be occasionally called to get physical @@ -1696,7 +1696,7 @@ ZT_SDK_API enum ZT_ResultCode ZT_Node_processBackgroundTasks(ZT_Node *node,void * Join a network * * This may generate calls to the port config callback before it returns, - * or these may be deffered if a netconf is not available yet. + * or these may be differed if a netconf is not available yet. * * If we are already a member of the network, nothing is done and OK is * returned. diff --git a/node/Capability.hpp b/node/Capability.hpp index 91a46566..775532d9 100644 --- a/node/Capability.hpp +++ b/node/Capability.hpp @@ -52,7 +52,7 @@ class RuntimeEnvironment; * (1) Evaluates its capabilities in ascending order of ID to determine * which capability allows it to transmit this packet. * (2) If it has not done so lately, it then sends this capability to the - * receving peer ("presents" it). + * receiving peer ("presents" it). * (3) The sender then sends the packet. * * On the receiving side the receiver evaluates the capabilities presented @@ -64,7 +64,7 @@ class RuntimeEnvironment; * * Capabilities support a chain of custody. This is currently unused but * in the future would allow the publication of capabilities that can be - * handed off between nodes. Limited transferrability of capabilities is + * handed off between nodes. Limited transferability of capabilities is * a feature of true capability based security. */ class Capability : public Credential @@ -81,7 +81,7 @@ public: * @param id Capability ID * @param nwid Network ID * @param ts Timestamp (at controller) - * @param mccl Maximum custody chain length (1 to create non-transferrable capability) + * @param mccl Maximum custody chain length (1 to create non-transferable capability) * @param rules Network flow rules for this capability * @param ruleCount Number of flow rules */ diff --git a/node/CertificateOfMembership.hpp b/node/CertificateOfMembership.hpp index b5a90007..7fd38ad7 100644 --- a/node/CertificateOfMembership.hpp +++ b/node/CertificateOfMembership.hpp @@ -243,7 +243,7 @@ public: * Compare two certificates for parameter agreement * * This compares this certificate with the other and returns true if all - * paramters in this cert are present in the other and if they agree to + * parameters in this cert are present in the other and if they agree to * within this cert's max delta value for each given parameter. * * Tuples present in other but not in this cert are ignored, but any diff --git a/node/Constants.hpp b/node/Constants.hpp index b85c1b93..d86775c5 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -275,13 +275,13 @@ #define ZT_MULTIPATH_BINDER_REFRESH_PERIOD 5000 /** - * Time horizon for VERB_QOS_MEASUREMENT and VERB_ACK packet processesing cutoff + * Time horizon for VERB_QOS_MEASUREMENT and VERB_ACK packet processing cutoff */ #define ZT_PATH_QOS_ACK_CUTOFF_TIME 30000 /** * Maximum number of VERB_QOS_MEASUREMENT and VERB_ACK packets allowed to be - * processesed within cutoff time. Separate totals are kept for each type but + * processed within cutoff time. Separate totals are kept for each type but * the limit is the same for both. * * This limits how often this peer will compute statistical estimates diff --git a/node/Identity.cpp b/node/Identity.cpp index 03f27083..1687746b 100644 --- a/node/Identity.cpp +++ b/node/Identity.cpp @@ -50,8 +50,8 @@ static inline void _computeMemoryHardHash(const void *publicKey,unsigned int pub SHA512::hash(digest,publicKey,publicKeyBytes); // Initialize genmem[] using Salsa20 in a CBC-like configuration since - // ordinary Salsa20 is randomly seekable. This is good for a cipher - // but is not what we want for sequential memory-harndess. + // ordinary Salsa20 is randomly seek-able. This is good for a cipher + // but is not what we want for sequential memory-hardness. memset(genmem,0,ZT_IDENTITY_GEN_MEMORY); Salsa20 s20(digest,(char *)digest + 32); s20.crypt20((char *)genmem,(char *)genmem,64); diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index 70dcff5d..6bb9734c 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -237,7 +237,7 @@ bool IncomingPacket::_doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr, char *ptr = begin; int count = 0; int len = payloadLength(); - // Read packet IDs and latency compensation intervals for each packet tracked by thie QoS packet + // Read packet IDs and latency compensation intervals for each packet tracked by this QoS packet while (ptr < (begin + len) && (count < ZT_PATH_QOS_TABLE_SIZE)) { memcpy((void*)&rx_id[count], ptr, sizeof(uint64_t)); ptr+=sizeof(uint64_t); diff --git a/node/InetAddress.cpp b/node/InetAddress.cpp index 36b4e434..5d6ad07f 100644 --- a/node/InetAddress.cpp +++ b/node/InetAddress.cpp @@ -5,7 +5,7 @@ * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or - * (at your oion) any later version. + * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -62,23 +62,23 @@ InetAddress::IpScope InetAddress::ipScope() const case 0x37: return IP_SCOPE_PSEUDOPRIVATE; // 55.0.0.0/8 (US DoD) case 0x38: return IP_SCOPE_PSEUDOPRIVATE; // 56.0.0.0/8 (US Postal Service) case 0x64: - if ((ip & 0xffc00000) == 0x64400000) return IP_SCOPE_PRIVATE; // 100.64.0.0/10 + if ((ip & 0xffc00000) == 0x64400000) return IP_SCOPE_PRIVATE; // 100.64.0.0/10 break; case 0x7f: return IP_SCOPE_LOOPBACK; // 127.0.0.0/8 case 0xa9: - if ((ip & 0xffff0000) == 0xa9fe0000) return IP_SCOPE_LINK_LOCAL; // 169.254.0.0/16 + if ((ip & 0xffff0000) == 0xa9fe0000) return IP_SCOPE_LINK_LOCAL; // 169.254.0.0/16 break; case 0xac: - if ((ip & 0xfff00000) == 0xac100000) return IP_SCOPE_PRIVATE; // 172.16.0.0/12 + if ((ip & 0xfff00000) == 0xac100000) return IP_SCOPE_PRIVATE; // 172.16.0.0/12 break; case 0xc0: - if ((ip & 0xffff0000) == 0xc0a80000) return IP_SCOPE_PRIVATE; // 192.168.0.0/16 + if ((ip & 0xffff0000) == 0xc0a80000) return IP_SCOPE_PRIVATE; // 192.168.0.0/16 break; case 0xff: return IP_SCOPE_NONE; // 255.0.0.0/8 (broadcast, or unused/unusable) } switch(ip >> 28) { - case 0xe: return IP_SCOPE_MULTICAST; // 224.0.0.0/4 - case 0xf: return IP_SCOPE_PSEUDOPRIVATE; // 240.0.0.0/4 ("reserved," usually unusable) + case 0xe: return IP_SCOPE_MULTICAST; // 224.0.0.0/4 + case 0xf: return IP_SCOPE_PSEUDOPRIVATE; // 240.0.0.0/4 ("reserved," usually unusable) } return IP_SCOPE_GLOBAL; } break; @@ -86,21 +86,21 @@ InetAddress::IpScope InetAddress::ipScope() const case AF_INET6: { const unsigned char *ip = reinterpret_cast(reinterpret_cast(this)->sin6_addr.s6_addr); if ((ip[0] & 0xf0) == 0xf0) { - if (ip[0] == 0xff) return IP_SCOPE_MULTICAST; // ff00::/8 + if (ip[0] == 0xff) return IP_SCOPE_MULTICAST; // ff00::/8 if ((ip[0] == 0xfe)&&((ip[1] & 0xc0) == 0x80)) { unsigned int k = 2; while ((!ip[k])&&(k < 15)) ++k; if ((k == 15)&&(ip[15] == 0x01)) - return IP_SCOPE_LOOPBACK; // fe80::1/128 - else return IP_SCOPE_LINK_LOCAL; // fe80::/10 + return IP_SCOPE_LOOPBACK; // fe80::1/128 + else return IP_SCOPE_LINK_LOCAL; // fe80::/10 } - if ((ip[0] & 0xfe) == 0xfc) return IP_SCOPE_PRIVATE; // fc00::/7 + if ((ip[0] & 0xfe) == 0xfc) return IP_SCOPE_PRIVATE; // fc00::/7 } unsigned int k = 0; while ((!ip[k])&&(k < 15)) ++k; if (k == 15) { // all 0's except last byte - if (ip[15] == 0x01) return IP_SCOPE_LOOPBACK; // ::1/128 - if (ip[15] == 0x00) return IP_SCOPE_NONE; // ::/128 + if (ip[15] == 0x01) return IP_SCOPE_LOOPBACK; // ::1/128 + if (ip[15] == 0x00) return IP_SCOPE_NONE; // ::/128 } return IP_SCOPE_GLOBAL; } break; diff --git a/node/MulticastGroup.hpp b/node/MulticastGroup.hpp index 0f4a621e..0a318136 100644 --- a/node/MulticastGroup.hpp +++ b/node/MulticastGroup.hpp @@ -68,7 +68,7 @@ public: * Derive the multicast group used for address resolution (ARP/NDP) for an IP * * @param ip IP address (port field is ignored) - * @return Multicat group for ARP/NDP + * @return Multicast group for ARP/NDP */ static inline MulticastGroup deriveMulticastGroupForAddressResolution(const InetAddress &ip) { diff --git a/node/Network.hpp b/node/Network.hpp index 95b5483a..38f03eb3 100644 --- a/node/Network.hpp +++ b/node/Network.hpp @@ -438,6 +438,6 @@ private: AtomicCounter __refCount; }; -} // naemspace ZeroTier +} // namespace ZeroTier #endif diff --git a/node/NetworkConfig.hpp b/node/NetworkConfig.hpp index 44066c86..8900bda5 100644 --- a/node/NetworkConfig.hpp +++ b/node/NetworkConfig.hpp @@ -562,7 +562,7 @@ public: char name[ZT_MAX_NETWORK_SHORT_NAME_LENGTH + 1]; /** - * Certficiate of membership (for private networks) + * Certificate of membership (for private networks) */ CertificateOfMembership com; }; diff --git a/node/Packet.hpp b/node/Packet.hpp index 6869691e..8e82bd34 100644 --- a/node/Packet.hpp +++ b/node/Packet.hpp @@ -150,7 +150,7 @@ * * In cryptography, a "break" means something different from what it means in * common discussion. If a cipher is 256 bits strong and someone finds a way - * to reduce key search to 254 bits, this constitues a "break" in the academic + * to reduce key search to 254 bits, this constitutes a "break" in the academic * literature. 254 bits is still far beyond what can be leveraged to accomplish * a "break" as most people would understand it -- the actual decryption and * reading of traffic. @@ -249,7 +249,7 @@ */ #define ZT_PROTO_MIN_FRAGMENT_LENGTH ZT_PACKET_FRAGMENT_IDX_PAYLOAD -// Field incides for parsing verbs ------------------------------------------- +// Field indices for parsing verbs ------------------------------------------- // Some verbs have variable-length fields. Those aren't fully defined here // yet-- instead they are parsed using relative indexes in IncomingPacket. @@ -734,7 +734,7 @@ public: * Credentials can be for any number of networks. * * The use of a zero byte to terminate the COM section is for legacy - * backward compatiblity. Newer fields are prefixed with a length. + * backward compatibility. Newer fields are prefixed with a length. * * OK/ERROR are not generated. */ @@ -751,7 +751,7 @@ public: * This message requests network configuration from a node capable of * providing it. * - * Respones to this are always whole configs intended for the recipient. + * Responses to this are always whole configs intended for the recipient. * For patches and other updates a NETWORK_CONFIG is sent instead. * * It would be valid and correct as of 1.2.0 to use NETWORK_CONFIG always, @@ -884,7 +884,7 @@ public: * <[6] MAC address of multicast group> * <[4] 32-bit ADI for multicast group> * <[1] flags> - * [<[...] network certficate of membership (DEPRECATED)>] + * [<[...] network certificate of membership (DEPRECATED)>] * [<[...] implicit gather results if flag 0x01 is set>] * * OK flags (same bits as request flags): @@ -933,7 +933,7 @@ public: // 0x11 -- deprecated /** - * An acknowledgement of receipt of a series of recent packets from another + * An acknowledgment of receipt of a series of recent packets from another * peer. This is used to calculate relative throughput values and to detect * packet loss. Only VERB_FRAME and VERB_EXT_FRAME packets are counted. * @@ -967,7 +967,7 @@ public: * The number of possible records per QoS packet is: (1400 * 8) / 72 = 155 * This packet should be sent very rarely (every few seconds) as it can be * somewhat large if the connection is saturated. Future versions might use - * a bloom table to probablistically determine these values in a vastly + * a bloom table to probabilistically determine these values in a vastly * more space-efficient manner. * * Note: The 'internal packet sojourn time' is a slight misnomer as it is a @@ -1000,7 +1000,7 @@ public: * * This message contains a remote trace event. Remote trace events can * be sent to observers configured at the network level for those that - * pertain directly to actiity on a network, or to global observers if + * pertain directly to activity on a network, or to global observers if * locally configured. * * The instance ID is a random 64-bit value generated by each ZeroTier @@ -1297,7 +1297,7 @@ public: * Encrypt/decrypt a separately armored portion of a packet * * This is currently only used to mask portions of HELLO as an extra - * security precation since most of that message is sent in the clear. + * security precaution since most of that message is sent in the clear. * * This must NEVER be used more than once in the same packet, as doing * so will result in re-use of the same key stream. diff --git a/node/Path.hpp b/node/Path.hpp index e7448190..80a7895a 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -353,7 +353,7 @@ public: * Record that we've received a VERB_ACK on this path, also compute throughput if required. * * @param now Current time - * @param ackedBytes Number of bytes awknowledged by other peer + * @param ackedBytes Number of bytes acknowledged by other peer */ inline void receivedAck(int64_t now, int32_t ackedBytes) { @@ -387,7 +387,7 @@ public: } /** - * @return Number of bytes thusfar sent that have not been awknowledged by the remote peer + * @return Number of bytes thus far sent that have not been acknowledged by the remote peer */ inline int64_t unackedSentBytes() { @@ -529,7 +529,7 @@ public: inline char *getName() { return _ifname; } /** - * @return Packet delay varience + * @return Packet delay variance */ inline float packetDelayVariance() { return _packetDelayVariance; } diff --git a/node/Peer.cpp b/node/Peer.cpp index a22bbffe..877ec2fd 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -140,7 +140,7 @@ void Peer::received( if ((!havePath)&&(RR->node->shouldUsePathForZeroTierTraffic(tPtr,_id.address(),path->localSocket(),path->address()))) { Mutex::Lock _l(_paths_m); - // Paths are redunant if they duplicate an alive path to the same IP or + // Paths are redundant if they duplicate an alive path to the same IP or // with the same local socket and address family. bool redundant = false; for(unsigned int i=0;i SelfAwareness::getSymmetricNatPredictions() * * Since flows are encrypted and authenticated they could not actually * read or modify traffic, but they could gather meta-data for forensics - * purpsoes or use this as a DOS attack vector. */ + * purposes or use this as a DOS attack vector. */ std::map< uint32_t,unsigned int > maxPortByIp; InetAddress theOneTrueSurface; diff --git a/node/Tag.hpp b/node/Tag.hpp index d2e932c2..1d3e97fa 100644 --- a/node/Tag.hpp +++ b/node/Tag.hpp @@ -58,7 +58,7 @@ class RuntimeEnvironment; * values. * * Unlike capabilities tags are signed only by the issuer and are never - * transferrable. + * transferable. */ class Tag : public Credential { diff --git a/node/Topology.cpp b/node/Topology.cpp index 7c526b41..7e32f205 100644 --- a/node/Topology.cpp +++ b/node/Topology.cpp @@ -138,7 +138,7 @@ SharedPtr Topology::getPeer(void *tPtr,const Address &zta) } return SharedPtr(); } - } catch ( ... ) {} // ignore invalid identities or other strage failures + } catch ( ... ) {} // ignore invalid identities or other strange failures return SharedPtr(); } diff --git a/osdep/OSUtils.cpp b/osdep/OSUtils.cpp index cadd4e6b..8b7fd948 100644 --- a/osdep/OSUtils.cpp +++ b/osdep/OSUtils.cpp @@ -366,7 +366,7 @@ std::vector OSUtils::split(const char *s,const char *const sep,cons if (buf.size() > 0) { fields.push_back(buf); buf.clear(); - } // else skip runs of seperators + } // else skip runs of separators } else buf.push_back(*s); } ++s; diff --git a/osdep/WindowsEthernetTap.hpp b/osdep/WindowsEthernetTap.hpp index 1e36bdd8..4f0f89c4 100644 --- a/osdep/WindowsEthernetTap.hpp +++ b/osdep/WindowsEthernetTap.hpp @@ -71,7 +71,7 @@ public: static std::string destroyAllPersistentTapDevices(); /** - * Uninstall a specific persistent tap device by instance ID + * Uninstalls a specific persistent tap device by instance ID * * @param instanceId Device instance ID * @return Empty string on success, otherwise an error message diff --git a/service/OneService.cpp b/service/OneService.cpp index 152dca7b..e56a4827 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -422,7 +422,7 @@ public: unsigned int _primaryPort; volatile unsigned int _udpPortPickerCounter; - // Local configuration and memo-ized information from it + // Local configuration and memoized information from it json _localConfig; Hashtable< uint64_t,std::vector > _v4Hints; Hashtable< uint64_t,std::vector > _v6Hints; @@ -438,7 +438,7 @@ public: * To attempt to handle NAT/gateway craziness we use three local UDP ports: * * [0] is the normal/default port, usually 9993 - * [1] is a port dervied from our ZeroTier address + * [1] is a port derived from our ZeroTier address * [2] is a port computed from the normal/default for use with uPnP/NAT-PMP mappings * * [2] exists because on some gateways trying to do regular NAT-t interferes @@ -1130,16 +1130,7 @@ public: #ifdef __SYNOLOGY__ // Authenticate via Synology's built-in cgi script if (!isAuth) { - /* - fprintf(stderr, "path = %s\n", path.c_str()); - fprintf(stderr, "headers.size=%d\n", headers.size()); - std::map::const_iterator it(headers.begin()); - while(it != headers.end()) { - fprintf(stderr,"header[%s] = %s\n", (it->first).c_str(), (it->second).c_str()); - it++; - } - */ - // parse out url args + // Parse out url args int synotoken_pos = path.find("SynoToken"); int argpos = path.find("?"); if(synotoken_pos != std::string::npos && argpos != std::string::npos) { @@ -1152,10 +1143,7 @@ public: setenv("HTTP_COOKIE", cookie_val.c_str(), true); setenv("HTTP_X_SYNO_TOKEN", synotoken_val.c_str(), true); setenv("REMOTE_ADDR", ah2->second.c_str(),true); - //fprintf(stderr, "HTTP_COOKIE: %s\n",std::getenv ("HTTP_COOKIE")); - //fprintf(stderr, "HTTP_X_SYNO_TOKEN: %s\n",std::getenv ("HTTP_X_SYNO_TOKEN")); - //fprintf(stderr, "REMOTE_ADDR: %s\n",std::getenv ("REMOTE_ADDR")); - // check synology web auth + // Check Synology web auth char user[256], buf[1024]; FILE *fp = NULL; bzero(user, 256); -- cgit v1.2.3 From 17fbb020e733fab8d8be933bb4981927015a10f5 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Thu, 14 Jun 2018 16:34:45 -0700 Subject: Added multipath field to zerotier-cli status output. Adjusted how path estimates are computed and cached --- include/ZeroTierOne.h | 57 +++++++++++++++++++++++++++++++----- node/Node.cpp | 11 +++++++ node/Path.hpp | 79 ++++++++++++++++++++++++++++++-------------------- node/Peer.cpp | 4 +-- node/Peer.hpp | 14 +++++---- service/OneService.cpp | 51 +++++++++++++++++++++++++++++++- 6 files changed, 169 insertions(+), 47 deletions(-) (limited to 'service') diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index b3927c2e..a100afd9 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -449,13 +449,6 @@ enum ZT_MultipathMode * Will cease sending traffic over links that appear to be stale. */ ZT_MULTIPATH_PROPORTIONALLY_BALANCED = 2, - - /** - * Traffic is allocated across a user-defined interface/allocation - * - * Will cease sending traffic over links that appear to be stale. - */ - ZT_MULTIPATH_MANUALLY_BALANCED = 3 }; /** @@ -1221,6 +1214,56 @@ typedef struct */ uint64_t trustedPathId; + /** + * One-way latency + */ + float latency; + + /** + * How much latency varies over time + */ + float packetDelayVariance; + + /** + * How much observed throughput varies over time + */ + float throughputDisturbCoeff; + + /** + * Packet Error Ratio (PER) + */ + float packetErrorRatio; + + /** + * Packet Loss Ratio (PLR) + */ + float packetLossRatio; + + /** + * Stability of the path + */ + float stability; + + /** + * Current throughput (moving average) + */ + uint64_t throughput; + + /** + * Maximum observed throughput for this path + */ + uint64_t maxThroughput; + + /** + * Percentage of traffic allocated to this path + */ + float allocation; + + /** + * Name of physical interface (for monitoring) + */ + char *ifname; + /** * Is path expired? */ diff --git a/node/Node.cpp b/node/Node.cpp index 71e5b6a7..24deeae2 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -474,6 +474,17 @@ ZT_PeerList *Node::peers() const p->paths[p->pathCount].trustedPathId = RR->topology->getOutboundPathTrust((*path)->address()); p->paths[p->pathCount].expired = 0; p->paths[p->pathCount].preferred = ((*path) == bestp) ? 1 : 0; + p->paths[p->pathCount].latency = (*path)->latency(); + p->paths[p->pathCount].packetDelayVariance = (*path)->packetDelayVariance(); + p->paths[p->pathCount].throughputDisturbCoeff = (*path)->throughputDisturbanceCoefficient(); + p->paths[p->pathCount].packetErrorRatio = (*path)->packetErrorRatio(); + p->paths[p->pathCount].packetLossRatio = (*path)->packetLossRatio(); + p->paths[p->pathCount].stability = (*path)->lastComputedStability(); + p->paths[p->pathCount].throughput = (*path)->meanThroughput(); + p->paths[p->pathCount].maxThroughput = (*path)->maxLifetimeThroughput(); + p->paths[p->pathCount].allocation = (*path)->allocation(); + p->paths[p->pathCount].ifname = (*path)->getName(); + ++p->pathCount; } } diff --git a/node/Path.hpp b/node/Path.hpp index 30655877..6162be20 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -111,15 +111,16 @@ public: _expectingAckAsOf(0), _packetsReceivedSinceLastAck(0), _packetsReceivedSinceLastQoS(0), - _meanThroughput(0.0), _maxLifetimeThroughput(0), + _lastComputedMeanThroughput(0), _bytesAckedSinceLastThroughputEstimation(0), - _meanLatency(0.0), - _packetDelayVariance(0.0), - _packetErrorRatio(0.0), - _packetLossRatio(0), + _lastComputedMeanLatency(0.0), + _lastComputedPacketDelayVariance(0.0), + _lastComputedPacketErrorRatio(0.0), + _lastComputedPacketLossRatio(0), _lastComputedStability(0.0), _lastComputedRelativeQuality(0), + _lastComputedThroughputDistCoeff(0.0), _lastAllocation(0.0) { prepareBuffers(); @@ -142,15 +143,16 @@ public: _expectingAckAsOf(0), _packetsReceivedSinceLastAck(0), _packetsReceivedSinceLastQoS(0), - _meanThroughput(0.0), _maxLifetimeThroughput(0), + _lastComputedMeanThroughput(0), _bytesAckedSinceLastThroughputEstimation(0), - _meanLatency(0.0), - _packetDelayVariance(0.0), - _packetErrorRatio(0.0), - _packetLossRatio(0), + _lastComputedMeanLatency(0.0), + _lastComputedPacketDelayVariance(0.0), + _lastComputedPacketErrorRatio(0.0), + _lastComputedPacketLossRatio(0), _lastComputedStability(0.0), _lastComputedRelativeQuality(0), + _lastComputedThroughputDistCoeff(0.0), _lastAllocation(0.0) { prepareBuffers(); @@ -162,9 +164,11 @@ public: delete _throughputSamples; delete _latencySamples; delete _packetValiditySamples; + delete _throughputDisturbanceSamples; _throughputSamples = NULL; _latencySamples = NULL; _packetValiditySamples = NULL; + _throughputDisturbanceSamples = NULL; } /** @@ -311,7 +315,7 @@ public: inline void recordOutgoingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb) { Mutex::Lock _l(_statistics_m); - if (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME) { + if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) { if (packetId % 2 == 0) { // even -> use for ACK _unackedBytes += payloadLength; // Take note that we're expecting a VERB_ACK on this path as of a specific time @@ -336,7 +340,7 @@ public: inline void recordIncomingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb) { Mutex::Lock _l(_statistics_m); - if (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME) { + if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) { if (packetId % 2 == 0) { // even -> use for ACK _inACKRecords[packetId] = payloadLength; _packetsReceivedSinceLastAck++; @@ -497,14 +501,14 @@ public: inline int64_t ackAge(int64_t now) { return _expectingAckAsOf ? now - _expectingAckAsOf : 0; } /** - * The maximum observed throughput for this path + * The maximum observed throughput (in bits/s) for this path */ inline uint64_t maxLifetimeThroughput() { return _maxLifetimeThroughput; } /** * @return The mean throughput (in bits/s) of this link */ - inline float meanThroughput() { return _meanThroughput; } + inline uint64_t meanThroughput() { return _lastComputedMeanThroughput; } /** * Assign a new relative quality value for this path in the aggregate link @@ -543,22 +547,22 @@ public: /** * @return Packet delay variance */ - inline float packetDelayVariance() { return _packetDelayVariance; } + inline float packetDelayVariance() { return _lastComputedPacketDelayVariance; } /** * @return Previously-computed mean latency */ - inline float meanLatency() { return _meanLatency; } + inline float meanLatency() { return _lastComputedMeanLatency; } /** * @return Packet loss rate (PLR) */ - inline float packetLossRatio() { return _packetLossRatio; } + inline float packetLossRatio() { return _lastComputedPacketLossRatio; } /** * @return Packet error ratio (PER) */ - inline float packetErrorRatio() { return _packetErrorRatio; } + inline float packetErrorRatio() { return _lastComputedPacketErrorRatio; } /** * Record an invalid incoming packet. This packet failed MAC/compression/cipher checks and will now @@ -571,38 +575,46 @@ public: */ inline char *getAddressString() { return _addrString; } + /** + * @return The current throughput disturbance coefficient + */ + inline float throughputDisturbanceCoefficient() { return _lastComputedThroughputDistCoeff; } + /** * Compute and cache stability and performance metrics. The resultant stability coefficient is a measure of how "well behaved" * this path is. This figure is substantially different from (but required for the estimation of the path's overall "quality". * * @param now Current time */ - inline void processBackgroundPathMeasurements(int64_t now, const int64_t peerId) { + inline void processBackgroundPathMeasurements(int64_t now) { if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) { Mutex::Lock _l(_statistics_m); _lastPathQualityComputeTime = now; address().toString(_addrString); - _meanThroughput = _throughputSamples->mean(); - _meanLatency = _latencySamples->mean(); - _packetDelayVariance = _latencySamples->stddev(); // Similar to "jitter" (SEE: RFC 3393, RFC 4689) + _lastComputedMeanLatency = _latencySamples->mean(); + _lastComputedPacketDelayVariance = _latencySamples->stddev(); // Similar to "jitter" (SEE: RFC 3393, RFC 4689) + _lastComputedMeanThroughput = (uint64_t)_throughputSamples->mean(); // If no packet validity samples, assume PER==0 - _packetErrorRatio = 1 - (_packetValiditySamples->count() ? _packetValiditySamples->mean() : 1); + _lastComputedPacketErrorRatio = 1 - (_packetValiditySamples->count() ? _packetValiditySamples->mean() : 1); // Compute path stability // Normalize measurements with wildly different ranges into a reasonable range - float normalized_pdv = Utils::normalize(_packetDelayVariance, 0, ZT_PATH_MAX_PDV, 0, 10); - float normalized_la = Utils::normalize(_meanLatency, 0, ZT_PATH_MAX_MEAN_LATENCY, 0, 10); + float normalized_pdv = Utils::normalize(_lastComputedPacketDelayVariance, 0, ZT_PATH_MAX_PDV, 0, 10); + float normalized_la = Utils::normalize(_lastComputedMeanLatency, 0, ZT_PATH_MAX_MEAN_LATENCY, 0, 10); float throughput_cv = _throughputSamples->mean() > 0 ? _throughputSamples->stddev() / _throughputSamples->mean() : 1; // Form an exponential cutoff and apply contribution weights float pdv_contrib = exp((-1)*normalized_pdv) * ZT_PATH_CONTRIB_PDV; float latency_contrib = exp((-1)*normalized_la) * ZT_PATH_CONTRIB_LATENCY; + // Throughput Disturbance Coefficient float throughput_disturbance_contrib = exp((-1)*throughput_cv) * ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE; + _throughputDisturbanceSamples->push(throughput_cv); + _lastComputedThroughputDistCoeff = _throughputDisturbanceSamples->mean(); // Obey user-defined ignored contributions pdv_contrib = ZT_PATH_CONTRIB_PDV > 0.0 ? pdv_contrib : 1; latency_contrib = ZT_PATH_CONTRIB_LATENCY > 0.0 ? latency_contrib : 1; throughput_disturbance_contrib = ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE > 0.0 ? throughput_disturbance_contrib : 1; - // Compute the quality product + // Stability _lastComputedStability = pdv_contrib + latency_contrib + throughput_disturbance_contrib; - _lastComputedStability *= 1 - _packetErrorRatio; + _lastComputedStability *= 1 - _lastComputedPacketErrorRatio; // Prevent QoS records from sticking around for too long std::map::iterator it = _outQoSRecords.begin(); while (it != _outQoSRecords.end()) { @@ -646,6 +658,7 @@ public: _throughputSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); _latencySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); _packetValiditySamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); + _throughputDisturbanceSamples = new RingBuffer(ZT_PATH_QUALITY_METRIC_WIN_SZ); memset(_ifname, 0, 16); memset(_addrString, 0, sizeof(_addrString)); } @@ -677,19 +690,20 @@ private: int16_t _packetsReceivedSinceLastAck; int16_t _packetsReceivedSinceLastQoS; - float _meanThroughput; uint64_t _maxLifetimeThroughput; + uint64_t _lastComputedMeanThroughput; uint64_t _bytesAckedSinceLastThroughputEstimation; - volatile float _meanLatency; - float _packetDelayVariance; + float _lastComputedMeanLatency; + float _lastComputedPacketDelayVariance; - float _packetErrorRatio; - float _packetLossRatio; + float _lastComputedPacketErrorRatio; + float _lastComputedPacketLossRatio; // cached estimates float _lastComputedStability; float _lastComputedRelativeQuality; + float _lastComputedThroughputDistCoeff; float _lastAllocation; // cached human-readable strings for tracing purposes @@ -699,6 +713,7 @@ private: RingBuffer *_throughputSamples; RingBuffer *_latencySamples; RingBuffer *_packetValiditySamples; + RingBuffer *_throughputDisturbanceSamples; }; } // namespace ZeroTier diff --git a/node/Peer.cpp b/node/Peer.cpp index 55132bba..0f471b07 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -116,7 +116,7 @@ void Peer::received( } for(unsigned int i=0;iprocessBackgroundPathMeasurements(now, _id.address().toInt()); + _paths[i].p->processBackgroundPathMeasurements(now); } } } @@ -415,7 +415,7 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) for(unsigned int i=0;iprocessBackgroundPathMeasurements(now, _id.address().toInt()); + _paths[i].p->processBackgroundPathMeasurements(now); } } diff --git a/node/Peer.hpp b/node/Peer.hpp index 9361f665..ddbe6f77 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -353,14 +353,18 @@ public: inline int64_t isActive(int64_t now) const { return ((now - _lastNontrivialReceive) < ZT_PEER_ACTIVITY_TIMEOUT); } /** - * @return Latency in milliseconds of best path or 0xffff if unknown / no paths + * @return Latency in milliseconds of best/aggregate path or 0xffff if unknown / no paths */ inline unsigned int latency(const int64_t now) { - SharedPtr bp(getAppropriatePath(now,false)); - if (bp) - return bp->latency(); - return 0xffff; + if (RR->node->getMultipathMode()) { + return (int)computeAggregateLinkMeanLatency(); + } else { + SharedPtr bp(getAppropriatePath(now,false)); + if (bp) + return bp->latency(); + return 0xffff; + } } /** diff --git a/service/OneService.cpp b/service/OneService.cpp index e56a4827..a1a9d981 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -298,6 +298,39 @@ static void _peerToJson(nlohmann::json &pj,const ZT_Peer *peer) pj["paths"] = pa; } +static void _peerAggregateLinkToJson(nlohmann::json &pj,const ZT_Peer *peer) +{ + char tmp[256]; + OSUtils::ztsnprintf(tmp,sizeof(tmp),"%.10llx",peer->address); + pj["aggregateLinkLatency"] = peer->latency; + + nlohmann::json pa = nlohmann::json::array(); + for(unsigned int i=0;ipathCount;++i) { + //int64_t lastSend = peer->paths[i].lastSend; + //int64_t lastReceive = peer->paths[i].lastReceive; + nlohmann::json j; + j["address"] = reinterpret_cast(&(peer->paths[i].address))->toString(tmp); + //j["lastSend"] = (lastSend < 0) ? 0 : lastSend; + //j["lastReceive"] = (lastReceive < 0) ? 0 : lastReceive; + //j["trustedPathId"] = peer->paths[i].trustedPathId; + //j["active"] = (bool)(peer->paths[i].expired == 0); + //j["expired"] = (bool)(peer->paths[i].expired != 0); + //j["preferred"] = (bool)(peer->paths[i].preferred != 0); + j["latency"] = peer->paths[i].latency; + //j["packetDelayVariance"] = peer->paths[i].packetDelayVariance; + //j["throughputDisturbCoeff"] = peer->paths[i].throughputDisturbCoeff; + //j["packetErrorRatio"] = peer->paths[i].packetErrorRatio; + //j["packetLossRatio"] = peer->paths[i].packetLossRatio; + j["stability"] = peer->paths[i].stability; + j["throughput"] = peer->paths[i].throughput; + //j["maxThroughput"] = peer->paths[i].maxThroughput; + j["allocation"] = peer->paths[i].allocation; + j["ifname"] = peer->paths[i].ifname; + pa.push_back(j); + } + pj["paths"] = pa; +} + static void _moonToJson(nlohmann::json &mj,const World &world) { char tmp[4096]; @@ -1189,7 +1222,23 @@ public: json &settings = res["config"]["settings"]; settings["primaryPort"] = OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff; settings["allowTcpFallbackRelay"] = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],_allowTcpFallbackRelay); - settings["multipathMode"] = OSUtils::jsonInt(settings["multipathMode"],_multipathMode); + + if (_multipathMode) { + json &multipathConfig = res["multipath"]; + ZT_PeerList *pl = _node->peers(); + char peerAddrStr[256]; + if (pl) { + for(unsigned long i=0;ipeerCount;++i) { + if (pl->peers[i].role == ZT_PEER_ROLE_LEAF) { + nlohmann::json pj; + _peerAggregateLinkToJson(pj,&(pl->peers[i])); + OSUtils::ztsnprintf(peerAddrStr,sizeof(peerAddrStr),"%.10llx",pl->peers[i].address); + multipathConfig[peerAddrStr] = (pj); + } + } + } + } + #ifdef ZT_USE_MINIUPNPC settings["portMappingEnabled"] = OSUtils::jsonBool(settings["portMappingEnabled"],true); #else -- cgit v1.2.3 From bdcdccfcc3157e62a4bc8078e583876cbef41223 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Fri, 22 Jun 2018 16:30:20 -0700 Subject: Improved path selection, more efficient traffic allocation, lower QoS/ACK overhead --- include/ZeroTierOne.h | 5 ++ node/Constants.hpp | 15 ++++- node/Node.cpp | 6 +- node/Path.hpp | 20 +++---- node/Peer.cpp | 150 ++++++++++++++++++++++++++----------------------- node/Peer.hpp | 36 ++++++++++-- service/OneService.cpp | 2 +- 7 files changed, 143 insertions(+), 91 deletions(-) (limited to 'service') diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index a100afd9..5b228e17 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -1315,6 +1315,11 @@ typedef struct */ unsigned int pathCount; + /** + * Whether this peer was ever reachable via an aggregate link + */ + bool hadAggregateLink; + /** * Known network paths to peer */ diff --git a/node/Constants.hpp b/node/Constants.hpp index 0d3692f1..420343ad 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -274,6 +274,19 @@ */ #define ZT_MULTIPATH_BINDER_REFRESH_PERIOD 5000 +/** + * Packets are only used for QoS/ACK statistical sampling if their packet ID is divisible by + * this integer. This is to provide a mechanism for both peers to agree on which packets need + * special treatment without having to exchange information. Changing this value would be + * a breaking change and would necessitate a protocol version upgrade. Since each incoming and + * outgoing packet ID is checked against this value its evaluation is of the form: + * (id & (divisor - 1)) == 0, thus the divisor must be a power of 2. + * + * This value is set at (16) so that given a normally-distributed RNG output we will sample + * 1/16th (or ~6.25%) of packets. + */ +#define ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR 0x10 + /** * Time horizon for VERB_QOS_MEASUREMENT and VERB_ACK packet processing cutoff */ @@ -384,7 +397,7 @@ /** * Minimum amount of time between each ACK packet */ -#define ZT_PATH_ACK_INTERVAL 250 +#define ZT_PATH_ACK_INTERVAL 1000 /** * How often an aggregate link statistics report is emitted into this tracing system diff --git a/node/Node.cpp b/node/Node.cpp index 24deeae2..9b10dfdd 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -450,6 +450,7 @@ ZT_PeerList *Node::peers() const for(std::vector< std::pair< Address,SharedPtr > >::iterator pi(peers.begin());pi!=peers.end();++pi) { ZT_Peer *p = &(pl->peers[pl->peerCount++]); p->address = pi->second->address().toInt(); + p->hadAggregateLink = 0; if (pi->second->remoteVersionKnown()) { p->versionMajor = pi->second->remoteVersionMajor(); p->versionMinor = pi->second->remoteVersionMinor(); @@ -466,6 +467,7 @@ ZT_PeerList *Node::peers() const std::vector< SharedPtr > paths(pi->second->paths(_now)); SharedPtr bestp(pi->second->getAppropriatePath(_now,false)); + p->hadAggregateLink |= pi->second->hasAggregateLink(); p->pathCount = 0; for(std::vector< SharedPtr >::iterator path(paths.begin());path!=paths.end();++path) { ZT_FAST_MEMCPY(&(p->paths[p->pathCount].address),&((*path)->address()),sizeof(struct sockaddr_storage)); @@ -475,14 +477,14 @@ ZT_PeerList *Node::peers() const p->paths[p->pathCount].expired = 0; p->paths[p->pathCount].preferred = ((*path) == bestp) ? 1 : 0; p->paths[p->pathCount].latency = (*path)->latency(); - p->paths[p->pathCount].packetDelayVariance = (*path)->packetDelayVariance(); + p->paths[p->pathCount].packetDelayVariance = (*path)->packetDelayVariance(); p->paths[p->pathCount].throughputDisturbCoeff = (*path)->throughputDisturbanceCoefficient(); p->paths[p->pathCount].packetErrorRatio = (*path)->packetErrorRatio(); p->paths[p->pathCount].packetLossRatio = (*path)->packetLossRatio(); p->paths[p->pathCount].stability = (*path)->lastComputedStability(); p->paths[p->pathCount].throughput = (*path)->meanThroughput(); p->paths[p->pathCount].maxThroughput = (*path)->maxLifetimeThroughput(); - p->paths[p->pathCount].allocation = (*path)->allocation(); + p->paths[p->pathCount].allocation = (float)(*path)->allocation() / (float)255; p->paths[p->pathCount].ifname = (*path)->getName(); ++p->pathCount; diff --git a/node/Path.hpp b/node/Path.hpp index fb202306..cafff8cf 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -121,7 +121,7 @@ public: _lastComputedStability(0.0), _lastComputedRelativeQuality(0), _lastComputedThroughputDistCoeff(0.0), - _lastAllocation(0.0) + _lastAllocation(0) { prepareBuffers(); } @@ -153,7 +153,7 @@ public: _lastComputedStability(0.0), _lastComputedRelativeQuality(0), _lastComputedThroughputDistCoeff(0.0), - _lastAllocation(0.0) + _lastAllocation(0) { prepareBuffers(); _phy->getIfName((PhySocket *)((uintptr_t)_localSocket), _ifname, 16); @@ -316,12 +316,10 @@ public: { Mutex::Lock _l(_statistics_m); if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) { - if (packetId % 2 == 0) { // even -> use for ACK + if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) { _unackedBytes += payloadLength; // Take note that we're expecting a VERB_ACK on this path as of a specific time _expectingAckAsOf = ackAge(now) > ZT_PATH_ACK_INTERVAL ? _expectingAckAsOf : now; - } - else { // odd -> use for QoS if (_outQoSRecords.size() < ZT_PATH_MAX_OUTSTANDING_QOS_RECORDS) { _outQoSRecords[packetId] = now; } @@ -341,11 +339,9 @@ public: { Mutex::Lock _l(_statistics_m); if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) { - if (packetId % 2 == 0) { // even -> use for ACK + if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) { _inACKRecords[packetId] = payloadLength; _packetsReceivedSinceLastAck++; - } - else { // odd -> use for QoS _inQoSRecords[packetId] = now; _packetsReceivedSinceLastQoS++; } @@ -527,12 +523,12 @@ public: * * @param allocation Percentage of traffic to be sent over this path to a peer */ - inline void updateComponentAllocationOfAggregateLink(float allocation) { _lastAllocation = allocation; } + inline void updateComponentAllocationOfAggregateLink(unsigned char allocation) { _lastAllocation = allocation; } /** * @return Percentage of traffic allocated to this path in the aggregate link */ - inline float allocation() { return _lastAllocation; } + inline unsigned char allocation() { return _lastAllocation; } /** * @return Stability estimates can become expensive to compute, we cache the most recent result. @@ -704,7 +700,9 @@ private: float _lastComputedStability; float _lastComputedRelativeQuality; float _lastComputedThroughputDistCoeff; - float _lastAllocation; + unsigned char _lastAllocation; + + // cached human-readable strings for tracing purposes char _ifname[16]; diff --git a/node/Peer.cpp b/node/Peer.cpp index 1d581ab8..21bbfabe 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -56,6 +56,12 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _lastSentFullHello(0), _lastACKWindowReset(0), _lastQoSWindowReset(0), + _lastMultipathCompatibilityCheck(0), + _freeRandomByte(0), + _uniqueAlivePathCount(0), + _localMultipathSupported(false), + _remoteMultipathSupported(false), + _canUseMultipath(false), _vProto(0), _vMajor(0), _vMinor(0), @@ -69,6 +75,7 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _lastAggregateStatsReport(0), _lastAggregateAllocation(0) { + Utils::getSecureRandom(&_freeRandomByte, 1); if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) throw ZT_EXCEPTION_INVALID_ARGUMENT; _pathChoiceHist = new RingBuffer(ZT_MULTIPATH_PROPORTION_WIN_SZ); @@ -110,7 +117,7 @@ void Peer::received( recordIncomingPacket(tPtr, path, packetId, payloadLength, verb, now); - if (canUseMultipath()) { + if (_canUseMultipath) { if (path->needsToSendQoS(now)) { sendQOS_MEASUREMENT(tPtr, path, path->localSocket(), path->address(), now); } @@ -145,17 +152,23 @@ void Peer::received( // Paths are redundant if they duplicate an alive path to the same IP or // with the same local socket and address family. bool redundant = false; + unsigned int replacePath = ZT_MAX_PEER_NETWORK_PATHS; for(unsigned int i=0;ialive(now)) && ( ((_paths[i].p->localSocket() == path->localSocket())&&(_paths[i].p->address().ss_family == path->address().ss_family)) || (_paths[i].p->address().ipsEqual2(path->address())) ) ) { + if ( (_paths[i].p->alive(now)) && ( ((_paths[i].p->localSocket() == path->localSocket())&&(_paths[i].p->address().ss_family == path->address().ss_family)) || (_paths[i].p->address().ipsEqual2(path->address())) ) ) { redundant = true; break; } + // If the path is the same address and port, simply assume this is a replacement + if ( (_paths[i].p->address().ipsEqual2(path->address()) && (_paths[i].p->address().port() == path->address().port()))) { + replacePath = i; + break; + } } else break; } - - if (!redundant) { - unsigned int replacePath = ZT_MAX_PEER_NETWORK_PATHS; + // If the path isn't a duplicate of the same localSocket AND we haven't already determined a replacePath, + // then find the worst path and replace it. + if (!redundant && replacePath == ZT_MAX_PEER_NETWORK_PATHS) { int replacePathQuality = 0; for(unsigned int i=0;iaddress().ss_family == path->address().ss_family && _paths[i].p->address().ipsEqual2(path->address())) { - replacePath = i; - break; - } - } - } - } - - if (replacePath != ZT_MAX_PEER_NETWORK_PATHS) { - if (verb == Packet::VERB_OK) { - RR->t->peerLearnedNewPath(tPtr,networkId,*this,path,packetId); - _paths[replacePath].lr = now; - _paths[replacePath].p = path; - _paths[replacePath].priority = 1; - } else { - attemptToContact = true; - } + } + if (replacePath != ZT_MAX_PEER_NETWORK_PATHS) { + if (verb == Packet::VERB_OK) { + RR->t->peerLearnedNewPath(tPtr,networkId,*this,path,packetId); + _paths[replacePath].lr = now; + _paths[replacePath].p = path; + _paths[replacePath].priority = 1; + } else { + attemptToContact = true; } } } @@ -274,7 +273,9 @@ void Peer::received( void Peer::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now) { - if (localMultipathSupport()) { + // Grab second byte from packetId to use as a source of entropy in the next path selection + _freeRandomByte = (packetId & 0xFF00) >> 8; + if (_canUseMultipath) { path->recordOutgoingPacket(now, packetId, payloadLength, verb); } } @@ -282,7 +283,7 @@ void Peer::recordOutgoingPacket(const SharedPtr &path, const uint64_t pack void Peer::recordIncomingPacket(void *tPtr, const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now) { - if (localMultipathSupport()) { + if (_canUseMultipath) { if (path->needsToSendAck(now)) { sendACK(tPtr, path, path->localSocket(), path->address(), now); } @@ -323,6 +324,9 @@ void Peer::computeAggregateProportionalAllocation(int64_t now) + (fmax(1, relThroughput[i]) * ZT_PATH_CONTRIB_THROUGHPUT) + relScope * ZT_PATH_CONTRIB_SCOPE; relQuality *= age_contrib; + // Arbitrary cutoffs + relQuality = relQuality > (1.00 / 100.0) ? relQuality : 0.0; + relQuality = relQuality < (99.0 / 100.0) ? relQuality : 1.0; totalRelativeQuality += relQuality; _paths[i].p->updateRelativeQuality(relQuality); } @@ -330,12 +334,12 @@ void Peer::computeAggregateProportionalAllocation(int64_t now) // Convert set of relative performances into an allocation set for(uint16_t i=0;iupdateComponentAllocationOfAggregateLink(_paths[i].p->relativeQuality() / totalRelativeQuality); + _paths[i].p->updateComponentAllocationOfAggregateLink((_paths[i].p->relativeQuality() / totalRelativeQuality) * 255); } } } -float Peer::computeAggregateLinkPacketDelayVariance() +int Peer::computeAggregateLinkPacketDelayVariance() { float pdv = 0.0; for(unsigned int i=0;i Peer::getAppropriatePath(int64_t now, bool includeExpired) * Send traffic across the highest quality path only. This algorithm will still * use the old path quality metric from protocol version 9. */ - if (!canUseMultipath()) { + if (!_canUseMultipath) { long bestPathQuality = 2147483647; for(unsigned int i=0;i Peer::getAppropriatePath(int64_t now, bool includeExpired) } } } - unsigned int r; - Utils::getSecureRandom(&r, 1); + unsigned int r = _freeRandomByte; if (numAlivePaths > 0) { - // pick a random out of the set deemed "alive" int rf = r % numAlivePaths; return _paths[alivePaths[rf]].p; } else if(numStalePaths > 0) { - // resort to trying any non-expired path + // Resort to trying any non-expired path int rf = r % numStalePaths; return _paths[stalePaths[rf]].p; } @@ -461,40 +463,12 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) * Proportionally allocate traffic according to dynamic path quality measurements */ if (RR->node->getMultipathMode() == ZT_MULTIPATH_PROPORTIONALLY_BALANCED) { - int numAlivePaths = 0; - int numStalePaths = 0; - int alivePaths[ZT_MAX_PEER_NETWORK_PATHS]; - int stalePaths[ZT_MAX_PEER_NETWORK_PATHS]; - memset(&alivePaths, -1, sizeof(alivePaths)); - memset(&stalePaths, -1, sizeof(stalePaths)); - // Attempt to find an excuse not to use the rest of this algorithm - // Alive or Stale? - for(unsigned int i=0;ialive(now)) { - alivePaths[numAlivePaths] = i; - numAlivePaths++; - } else { - stalePaths[numStalePaths] = i; - numStalePaths++; - } - // Record a default path to use as a short-circuit for the rest of the algorithm (if needed) - bestPath = i; - } - } if ((now - _lastAggregateAllocation) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) { _lastAggregateAllocation = now; computeAggregateProportionalAllocation(now); } - if (numAlivePaths == 0 && numStalePaths == 0) { - return SharedPtr(); - } if (numAlivePaths == 1 || numStalePaths == 1) { - return _paths[bestPath].p; - } // Randomly choose path according to their allocations - unsigned int r; - Utils::getSecureRandom(&r, 1); - float rf = (float)(r %= 100) / 100; + float rf = _freeRandomByte; for(int i=0;iallocation()) { @@ -676,6 +650,41 @@ void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr &o } } +inline void Peer::processBackgroundPeerTasks(int64_t now) +{ + // Determine current multipath compatibility with other peer + if ((now - _lastMultipathCompatibilityCheck) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) { + // Cache number of available paths so that we can short-circuit multipath logic elsewhere + // + // We also take notice of duplicate paths (same IP only) because we may have + // recently received a direct path push from a peer and our list might contain + // a dead path which hasn't been fully recognized as such. In this case we + // don't want the duplicate to trigger execution of multipath code prematurely. + // + // This is done to support the behavior of auto multipath enable/disable + // without user intervention. + int currAlivePathCount = 0; + int duplicatePathsFound = 0; + for (unsigned int i=0;iaddress().ipsEqual2(_paths[j].p->address()) && i != j) { + duplicatePathsFound+=1; + break; + } + } + } + } + _uniqueAlivePathCount = (currAlivePathCount - (duplicatePathsFound / 2)); + _lastMultipathCompatibilityCheck = now; + _localMultipathSupported = ((RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) && (ZT_PROTO_VERSION > 9)); + _remoteMultipathSupported = _vProto > 9; + // If both peers support multipath and more than one path exist, we can use multipath logic + _canUseMultipath = _localMultipathSupported && _remoteMultipathSupported && (_uniqueAlivePathCount > 1); + } +} + void Peer::sendACK(void *tPtr,const SharedPtr &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now) { Packet outp(_id.address(),RR->identity.address(),Packet::VERB_ACK); @@ -774,14 +783,15 @@ void Peer::tryMemorizedPath(void *tPtr,int64_t now) unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) { unsigned int sent = 0; - Mutex::Lock _l(_paths_m); const bool sendFullHello = ((now - _lastSentFullHello) >= ZT_PEER_PING_PERIOD); _lastSentFullHello = now; + processBackgroundPeerTasks(now); + // Emit traces regarding aggregate link status - if (canUseMultipath()) { + if (_canUseMultipath) { int alivePathCount = aggregateLinkPhysicalPathCount(); if ((now - _lastAggregateStatsReport) > ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL) { _lastAggregateStatsReport = now; diff --git a/node/Peer.hpp b/node/Peer.hpp index ddbe6f77..a32eaad0 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -203,12 +203,12 @@ public: /** * @return The aggregate link Packet Delay Variance (PDV) */ - float computeAggregateLinkPacketDelayVariance(); + int computeAggregateLinkPacketDelayVariance(); /** * @return The aggregate link mean latency */ - float computeAggregateLinkMeanLatency(); + int computeAggregateLinkMeanLatency(); /** * @return The number of currently alive "physical" paths in the aggregate link @@ -357,7 +357,7 @@ public: */ inline unsigned int latency(const int64_t now) { - if (RR->node->getMultipathMode()) { + if (_canUseMultipath) { return (int)computeAggregateLinkMeanLatency(); } else { SharedPtr bp(getAppropriatePath(now,false)); @@ -417,6 +417,14 @@ public: inline bool remoteVersionKnown() const { return ((_vMajor > 0)||(_vMinor > 0)||(_vRevision > 0)); } + /** + * Periodically update known multipath activation constraints. This is done so that we know when and when + * not to use multipath logic. Doing this once every few seconds is sufficient. + * + * @param now Current time + */ + inline void processBackgroundPeerTasks(int64_t now); + /** * Record that the remote peer does have multipath enabled. As is evident by the receipt of a VERB_ACK * or a VERB_QOS_MEASUREMENT packet at some point in the past. Until this flag is set, the local client @@ -427,18 +435,18 @@ public: /** * @return Whether the local client supports and is configured to use multipath */ - inline bool localMultipathSupport() { return ((RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) && (ZT_PROTO_VERSION > 9)); } + inline bool localMultipathSupport() { return _localMultipathSupported; } /** * @return Whether the remote peer supports and is configured to use multipath */ - inline bool remoteMultipathSupport() { return (_remotePeerMultipathEnabled && (_vProto > 9)); } + inline bool remoteMultipathSupport() { return _remoteMultipathSupported; } /** * @return Whether this client can use multipath to communicate with this peer. True if both peers are using * the correct protocol and if both peers have multipath enabled. False if otherwise. */ - inline bool canUseMultipath() { return (localMultipathSupport() && remoteMultipathSupport()); } + inline bool canUseMultipath() { return _canUseMultipath; } /** * @return True if peer has received a trust established packet (e.g. common network membership) in the past ZT_TRUST_EXPIRATION ms @@ -557,6 +565,13 @@ public: return (_QoSCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT); } + /** + * @return Whether this peer is reachable via an aggregate link + */ + inline bool hasAggregateLink() { + return _localMultipathSupported && _remoteMultipathSupported && _remotePeerMultipathEnabled; + } + /** * Serialize a peer for storage in local cache * @@ -658,6 +673,15 @@ private: int64_t _lastPathPrune; int64_t _lastACKWindowReset; int64_t _lastQoSWindowReset; + int64_t _lastMultipathCompatibilityCheck; + + unsigned char _freeRandomByte; + + int _uniqueAlivePathCount; + + bool _localMultipathSupported; + bool _remoteMultipathSupported; + bool _canUseMultipath; uint16_t _vProto; uint16_t _vMajor; diff --git a/service/OneService.cpp b/service/OneService.cpp index a1a9d981..9b12f17b 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -1229,7 +1229,7 @@ public: char peerAddrStr[256]; if (pl) { for(unsigned long i=0;ipeerCount;++i) { - if (pl->peers[i].role == ZT_PEER_ROLE_LEAF) { + if (pl->peers[i].hadAggregateLink) { nlohmann::json pj; _peerAggregateLinkToJson(pj,&(pl->peers[i])); OSUtils::ztsnprintf(peerAddrStr,sizeof(peerAddrStr),"%.10llx",pl->peers[i].address); -- cgit v1.2.3 From 515d7962b01aa08de1dd8e07a7c22a7d2b730340 Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Thu, 9 Aug 2018 11:39:42 -0700 Subject: Build fixes. --- service/OneService.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'service') diff --git a/service/OneService.cpp b/service/OneService.cpp index a34db4b4..b4539b84 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -1632,6 +1632,18 @@ public: const InetAddress *const target = reinterpret_cast(&(n.config.routes[i].target)); const InetAddress *const via = reinterpret_cast(&(n.config.routes[i].via)); + InetAddress *src = NULL; + for (unsigned int j=0; j(&(n.config.assignedAddresses[j])); + if (target->isV4() && tmp->isV4()) { + src = reinterpret_cast(&(n.config.assignedAddresses[j])); + break; + } else if (target->isV6() && tmp->isV6()) { + src = reinterpret_cast(&(n.config.assignedAddresses[j])); + break; + } + } + if ( (!checkIfManagedIsAllowed(n,*target)) || ((via->ss_family == target->ss_family)&&(matchIpOnly(myIps,*via))) ) continue; @@ -1659,7 +1671,7 @@ public: continue; // Add and apply new routes - n.managedRoutes.push_back(SharedPtr(new ManagedRoute(*target,*via,tapdev))); + n.managedRoutes.push_back(SharedPtr(new ManagedRoute(*target,*via,*src,tapdev))); if (!n.managedRoutes.back()->sync()) n.managedRoutes.pop_back(); #endif @@ -1922,7 +1934,7 @@ public: inline void phyOnUnixAccept(PhySocket *sockL,PhySocket *sockN,void **uptrL,void **uptrN) {} inline void phyOnUnixClose(PhySocket *sock,void **uptr) {} inline void phyOnUnixData(PhySocket *sock,void **uptr,void *data,unsigned long len) {} - inline void phyOnUnixWritable(PhySocket *sock,void **uptr,bool lwip_invoked) {} + inline void phyOnUnixWritable(PhySocket *sock,void **uptr) {} inline int nodeVirtualNetworkConfigFunction(uint64_t nwid,void **nuptr,enum ZT_VirtualNetworkConfigOperation op,const ZT_VirtualNetworkConfig *nwc) { -- cgit v1.2.3 From 0e4cfd897b4d13298671e68b52f85cc5ed864b8d Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Thu, 9 Aug 2018 13:40:49 -0700 Subject: Multipath-related amendments from merge into edge --- service/OneService.cpp | 80 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 64 insertions(+), 16 deletions(-) (limited to 'service') diff --git a/service/OneService.cpp b/service/OneService.cpp index b4539b84..c9786829 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -283,6 +283,39 @@ static void _peerToJson(nlohmann::json &pj,const ZT_Peer *peer) pj["paths"] = pa; } +static void _peerAggregateLinkToJson(nlohmann::json &pj,const ZT_Peer *peer) +{ + char tmp[256]; + OSUtils::ztsnprintf(tmp,sizeof(tmp),"%.10llx",peer->address); + pj["aggregateLinkLatency"] = peer->latency; + + nlohmann::json pa = nlohmann::json::array(); + for(unsigned int i=0;ipathCount;++i) { + int64_t lastSend = peer->paths[i].lastSend; + int64_t lastReceive = peer->paths[i].lastReceive; + nlohmann::json j; + j["address"] = reinterpret_cast(&(peer->paths[i].address))->toString(tmp); + j["lastSend"] = (lastSend < 0) ? 0 : lastSend; + j["lastReceive"] = (lastReceive < 0) ? 0 : lastReceive; + //j["trustedPathId"] = peer->paths[i].trustedPathId; + //j["active"] = (bool)(peer->paths[i].expired == 0); + //j["expired"] = (bool)(peer->paths[i].expired != 0); + //j["preferred"] = (bool)(peer->paths[i].preferred != 0); + j["latency"] = peer->paths[i].latency; + j["pdv"] = peer->paths[i].packetDelayVariance; + //j["throughputDisturbCoeff"] = peer->paths[i].throughputDisturbCoeff; + //j["packetErrorRatio"] = peer->paths[i].packetErrorRatio; + //j["packetLossRatio"] = peer->paths[i].packetLossRatio; + j["stability"] = peer->paths[i].stability; + j["throughput"] = peer->paths[i].throughput; + //j["maxThroughput"] = peer->paths[i].maxThroughput; + j["allocation"] = peer->paths[i].allocation; + j["ifname"] = peer->paths[i].ifname; + pa.push_back(j); + } + pj["paths"] = pa; +} + static void _moonToJson(nlohmann::json &mj,const World &world) { char tmp[4096]; @@ -403,6 +436,7 @@ public: PhySocket *_localControlSocket6; bool _updateAutoApply; bool _allowTcpFallbackRelay; + unsigned int _multipathMode; unsigned int _primaryPort; volatile unsigned int _udpPortPickerCounter; @@ -780,6 +814,7 @@ public: int64_t lastTapMulticastGroupCheck = 0; int64_t lastBindRefresh = 0; int64_t lastUpdateCheck = clockShouldBe; + int64_t lastMultipathModeUpdate = 0; int64_t lastCleanedPeersDb = 0; int64_t lastLocalInterfaceAddressCheck = (clockShouldBe - ZT_LOCAL_INTERFACE_CHECK_INTERVAL) + 15000; // do this in 15s to give portmapper time to configure and other things time to settle for(;;) { @@ -811,7 +846,7 @@ public: } // Refresh bindings in case device's interfaces have changed, and also sync routes to update any shadow routes (e.g. shadow default) - if (((now - lastBindRefresh) >= ZT_BINDER_REFRESH_PERIOD)||(restarted)) { + if (((now - lastBindRefresh) >= (_multipathMode ? ZT_BINDER_REFRESH_PERIOD / 8 : ZT_BINDER_REFRESH_PERIOD))||(restarted)) { lastBindRefresh = now; unsigned int p[3]; unsigned int pc = 0; @@ -828,6 +863,11 @@ public: } } } + // Update multipath mode (if needed) + if (((now - lastMultipathModeUpdate) >= ZT_BINDER_REFRESH_PERIOD / 8)||(restarted)) { + lastMultipathModeUpdate = now; + _node->setMultipathMode(_multipathMode); + } // Run background task processor in core if it's time to do so int64_t dl = _nextBackgroundTaskDeadline; @@ -863,7 +903,7 @@ public: } // Sync information about physical network interfaces - if ((now - lastLocalInterfaceAddressCheck) >= ZT_LOCAL_INTERFACE_CHECK_INTERVAL) { + if ((now - lastLocalInterfaceAddressCheck) >= (_multipathMode ? ZT_LOCAL_INTERFACE_CHECK_INTERVAL / 8 : ZT_LOCAL_INTERFACE_CHECK_INTERVAL)) { lastLocalInterfaceAddressCheck = now; _node->clearLocalInterfaceAddresses(); @@ -1082,16 +1122,6 @@ public: #ifdef __SYNOLOGY__ // Authenticate via Synology's built-in cgi script if (!isAuth) { - /* - fprintf(stderr, "path = %s\n", path.c_str()); - fprintf(stderr, "headers.size=%d\n", headers.size()); - std::map::const_iterator it(headers.begin()); - while(it != headers.end()) { - fprintf(stderr,"header[%s] = %s\n", (it->first).c_str(), (it->second).c_str()); - it++; - } - */ - // parse out url args int synotoken_pos = path.find("SynoToken"); int argpos = path.find("?"); if(synotoken_pos != std::string::npos && argpos != std::string::npos) { @@ -1104,10 +1134,6 @@ public: setenv("HTTP_COOKIE", cookie_val.c_str(), true); setenv("HTTP_X_SYNO_TOKEN", synotoken_val.c_str(), true); setenv("REMOTE_ADDR", ah2->second.c_str(),true); - //fprintf(stderr, "HTTP_COOKIE: %s\n",std::getenv ("HTTP_COOKIE")); - //fprintf(stderr, "HTTP_X_SYNO_TOKEN: %s\n",std::getenv ("HTTP_X_SYNO_TOKEN")); - //fprintf(stderr, "REMOTE_ADDR: %s\n",std::getenv ("REMOTE_ADDR")); - // check synology web auth char user[256], buf[1024]; FILE *fp = NULL; bzero(user, 256); @@ -1153,6 +1179,23 @@ public: json &settings = res["config"]["settings"]; settings["primaryPort"] = OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff; settings["allowTcpFallbackRelay"] = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],_allowTcpFallbackRelay); + + if (_multipathMode) { + json &multipathConfig = res["multipath"]; + ZT_PeerList *pl = _node->peers(); + char peerAddrStr[256]; + if (pl) { + for(unsigned long i=0;ipeerCount;++i) { + if (pl->peers[i].hadAggregateLink) { + nlohmann::json pj; + _peerAggregateLinkToJson(pj,&(pl->peers[i])); + OSUtils::ztsnprintf(peerAddrStr,sizeof(peerAddrStr),"%.10llx",pl->peers[i].address); + multipathConfig[peerAddrStr] = (pj); + } + } + } + } + #ifdef ZT_USE_MINIUPNPC settings["portMappingEnabled"] = OSUtils::jsonBool(settings["portMappingEnabled"],true); #else @@ -1481,6 +1524,11 @@ public: _primaryPort = (unsigned int)OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff; _allowTcpFallbackRelay = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],true); + _multipathMode = (unsigned int)OSUtils::jsonInt(settings["multipathMode"],0); + if (_multipathMode != 0 && _allowTcpFallbackRelay) { + fprintf(stderr,"WARNING: multipathMode cannot be used with allowTcpFallbackRelay. Disabling allowTcpFallbackRelay"); + _allowTcpFallbackRelay = false; + } _portMappingEnabled = OSUtils::jsonBool(settings["portMappingEnabled"],true); #ifndef ZT_SDK -- cgit v1.2.3 From 76b4ec12a0d680b1607d24a62c2b515cbda786df Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Thu, 9 Aug 2018 14:32:26 -0700 Subject: Vault-related amendments --- service/OneService.cpp | 252 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 252 insertions(+) (limited to 'service') diff --git a/service/OneService.cpp b/service/OneService.cpp index c9786829..d215421d 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -81,6 +81,12 @@ #include "../ext/http-parser/http_parser.h" #endif +#if ZT_VAULT_SUPPORT +extern "C" { +#include +} +#endif + #include "../ext/json/json.hpp" using json = nlohmann::json; @@ -162,6 +168,14 @@ namespace ZeroTier { typedef BSDEthernetTap EthernetTap; } // TCP activity timeout #define ZT_TCP_ACTIVITY_TIMEOUT 60000 +#if ZT_VAULT_SUPPORT +size_t curlResponseWrite(void *ptr, size_t size, size_t nmemb, std::string *data) +{ + data->append((char*)ptr, size * nmemb); + return size * nmemb; +} +#endif + namespace ZeroTier { namespace { @@ -515,6 +529,14 @@ public: PortMapper *_portMapper; #endif + // HashiCorp Vault Settings +#if ZT_VAULT_SUPPORT + bool _vaultEnabled; + std::string _vaultURL; + std::string _vaultToken; + std::string _vaultPath; // defaults to cubbyhole/zerotier/identity.secret for per-access key storage +#endif + // Set to false to force service to stop volatile bool _run; Mutex _run_m; @@ -546,12 +568,21 @@ public: ,_portMappingEnabled(true) #ifdef ZT_USE_MINIUPNPC ,_portMapper((PortMapper *)0) +#endif +#ifdef ZT_VAULT_SUPPORT + ,_vaultEnabled(false) + ,_vaultURL() + ,_vaultToken() + ,_vaultPath("cubbyhole/zerotier") #endif ,_run(true) { _ports[0] = 0; _ports[1] = 0; _ports[2] = 0; +#if ZT_VAULT_SUPPORT + curl_global_init(CURL_GLOBAL_DEFAULT); +#endif } virtual ~OneServiceImpl() @@ -559,6 +590,10 @@ public: _binder.closeAll(_phy); _phy.close(_localControlSocket4); _phy.close(_localControlSocket6); +#if ZT_VAULT_SUPPORT + curl_global_cleanup(); +#endif + #ifdef ZT_USE_MINIUPNPC delete _portMapper; #endif @@ -1566,6 +1601,47 @@ public: } } +#if ZT_VAULT_SUPPORT + json &vault = settings["vault"]; + if (vault.is_object()) { + const std::string url(OSUtils::jsonString(vault["vaultURL"], "").c_str()); + if (!url.empty()) { + _vaultURL = url; + } + + const std::string token(OSUtils::jsonString(vault["vaultToken"], "").c_str()); + if (!token.empty()) { + _vaultToken = token; + } + + const std::string path(OSUtils::jsonString(vault["vaultPath"], "").c_str()); + if (!path.empty()) { + _vaultPath = path; + } + } + + // also check environment variables for values. Environment variables + // will override local.conf variables + const std::string envURL(getenv("VAULT_ADDR")); + if (!envURL.empty()) { + _vaultURL = envURL; + } + + const std::string envToken(getenv("VAULT_TOKEN")); + if (!envToken.empty()) { + _vaultToken = envToken; + } + + const std::string envPath(getenv("VAULT_PATH")); + if (!envPath.empty()) { + _vaultPath = envPath; + } + + if (!_vaultURL.empty() && !_vaultToken.empty()) { + _vaultEnabled = true; + } +#endif + // Checks if a managed IP or route target is allowed bool checkIfManagedIsAllowed(const NetworkState &n,const InetAddress &target) { @@ -2142,8 +2218,88 @@ public: } } +#if ZT_VAULT_SUPPORT + inline bool nodeVaultPutIdentity(enum ZT_StateObjectType type, const void *data, int len) + { + bool retval = false; + if (type != ZT_STATE_OBJECT_IDENTITY_PUBLIC && type != ZT_STATE_OBJECT_IDENTITY_SECRET) { + return retval; + } + + CURL *curl = curl_easy_init(); + if (curl) { + char token[512] = { 0 }; + snprintf(token, sizeof(token), "X-Vault-Token: %s", _vaultToken.c_str()); + + struct curl_slist *chunk = NULL; + chunk = curl_slist_append(chunk, token); + + + char content_type[512] = { 0 }; + snprintf(content_type, sizeof(content_type), "Content-Type: application/json"); + + chunk = curl_slist_append(chunk, content_type); + + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, chunk); + + char url[2048] = { 0 }; + snprintf(url, sizeof(url), "%s/v1/%s", _vaultURL.c_str(), _vaultPath.c_str()); + + curl_easy_setopt(curl, CURLOPT_URL, url); + + json d = json::object(); + if (type == ZT_STATE_OBJECT_IDENTITY_PUBLIC) { + std::string key((const char*)data, len); + d["public"] = key; + } + else if (type == ZT_STATE_OBJECT_IDENTITY_SECRET) { + std::string key((const char*)data, len); + d["secret"] = key; + } + + if (!d.empty()) { + std::string post = d.dump(); + + if (!post.empty()) { + curl_easy_setopt(curl, CURLOPT_POSTFIELDS, post.c_str()); + curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, post.length()); + +#ifndef NDEBUG + curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); +#endif + + CURLcode res = curl_easy_perform(curl); + if (res == CURLE_OK) { + long response_code = 0; + curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response_code); + if (response_code == 200 || response_code == 204) { + retval = true; + } + } + } + } + + curl_easy_cleanup(curl); + curl = NULL; + curl_slist_free_all(chunk); + chunk = NULL; + } + + return retval; + } +#endif + inline void nodeStatePutFunction(enum ZT_StateObjectType type,const uint64_t id[2],const void *data,int len) { +#if ZT_VAULT_SUPPORT + if (_vaultEnabled && (type == ZT_STATE_OBJECT_IDENTITY_SECRET || type == ZT_STATE_OBJECT_IDENTITY_PUBLIC)) { + if (nodeVaultPutIdentity(type, data, len)) { + // value successfully written to Vault + return; + } + // else fallback to disk + } +#endif char p[1024]; FILE *f; bool secure = false; @@ -2210,8 +2366,93 @@ public: } } +#if ZT_VAULT_SUPPORT + inline int nodeVaultGetIdentity(enum ZT_StateObjectType type, void *data, unsigned int maxlen) + { + if (type != ZT_STATE_OBJECT_IDENTITY_SECRET && type != ZT_STATE_OBJECT_IDENTITY_PUBLIC) { + return -1; + } + + int ret = -1; + CURL *curl = curl_easy_init(); + if (curl) { + char token[512] = { 0 }; + snprintf(token, sizeof(token), "X-Vault-Token: %s", _vaultToken.c_str()); + + struct curl_slist *chunk = NULL; + chunk = curl_slist_append(chunk, token); + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, chunk); + + char url[2048] = { 0 }; + snprintf(url, sizeof(url), "%s/v1/%s", _vaultURL.c_str(), _vaultPath.c_str()); + + curl_easy_setopt(curl, CURLOPT_URL, url); + + std::string response; + std::string res_headers; + + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &curlResponseWrite); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response); + curl_easy_setopt(curl, CURLOPT_HEADERDATA, &res_headers); + +#ifndef NDEBUG + curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); +#endif + + CURLcode res = curl_easy_perform(curl); + + if (res == CURLE_OK) { + long response_code = 0; + curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response_code); + if (response_code == 200) { + try { + json payload = json::parse(response); + if (!payload["data"].is_null()) { + json &d = payload["data"]; + if (type == ZT_STATE_OBJECT_IDENTITY_SECRET) { + std::string secret = OSUtils::jsonString(d["secret"],""); + + if (!secret.empty()) { + ret = (int)secret.length(); + memcpy(data, secret.c_str(), ret); + } + } + else if (type == ZT_STATE_OBJECT_IDENTITY_PUBLIC) { + std::string pub = OSUtils::jsonString(d["public"],""); + + if (!pub.empty()) { + ret = (int)pub.length(); + memcpy(data, pub.c_str(), ret); + } + } + } + } + catch (...) { + ret = -1; + } + } + } + + curl_easy_cleanup(curl); + curl = NULL; + curl_slist_free_all(chunk); + chunk = NULL; + } + return ret; + } +#endif + inline int nodeStateGetFunction(enum ZT_StateObjectType type,const uint64_t id[2],void *data,unsigned int maxlen) { +#if ZT_VAULT_SUPPORT + if (_vaultEnabled && (type == ZT_STATE_OBJECT_IDENTITY_SECRET || type == ZT_STATE_OBJECT_IDENTITY_PUBLIC) ) { + int retval = nodeVaultGetIdentity(type, data, maxlen); + if (retval >= 0) + return retval; + + // else continue file based lookup + } +#endif char p[4096]; switch(type) { case ZT_STATE_OBJECT_IDENTITY_PUBLIC: @@ -2239,6 +2480,17 @@ public: if (f) { int n = (int)fread(data,1,maxlen,f); fclose(f); +#if ZT_VAULT_SUPPORT + if (_vaultEnabled && (type == ZT_STATE_OBJECT_IDENTITY_SECRET || type == ZT_STATE_OBJECT_IDENTITY_PUBLIC)) { + // If we've gotten here while Vault is enabled, Vault does not know the key and it's been + // read from disk instead. + // + // We should put the value in Vault and remove the local file. + if (nodeVaultPutIdentity(type, data, n)) { + unlink(p); + } + } +#endif if (n >= 0) return n; } -- cgit v1.2.3 From 0fddebc7913bffc2b3fd98ae3076e60c0d429396 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Mon, 10 Sep 2018 13:06:05 -0700 Subject: Added ability to manually set secondary and tertiary ports in local.conf --- service/OneService.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'service') diff --git a/service/OneService.cpp b/service/OneService.cpp index d215421d..fc5c189c 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -452,6 +452,8 @@ public: bool _allowTcpFallbackRelay; unsigned int _multipathMode; unsigned int _primaryPort; + unsigned int _secondaryPort; + unsigned int _tertiaryPort; volatile unsigned int _udpPortPickerCounter; // Local configuration and memo-ized information from it @@ -470,7 +472,7 @@ public: * To attempt to handle NAT/gateway craziness we use three local UDP ports: * * [0] is the normal/default port, usually 9993 - * [1] is a port dervied from our ZeroTier address + * [1] is a port derived from our ZeroTier address * [2] is a port computed from the normal/default for use with uPnP/NAT-PMP mappings * * [2] exists because on some gateways trying to do regular NAT-t interferes @@ -777,7 +779,7 @@ public: // This exists because there are buggy NATs out there that fail if more // than one device behind the same NAT tries to use the same internal // private address port number. Buggy NATs are a running theme. - _ports[1] = 20000 + ((unsigned int)_node->address() % 45500); + _ports[1] = (_secondaryPort == 0) ? 20000 + ((unsigned int)_node->address() % 45500) : _secondaryPort; for(int i=0;;++i) { if (i > 1000) { _ports[1] = 0; @@ -795,7 +797,7 @@ public: // use the other two ports for that because some NATs do really funky // stuff with ports that are explicitly mapped that breaks things. if (_ports[1]) { - _ports[2] = _ports[1]; + _ports[2] = (_tertiaryPort == 0) ? _ports[1] : _tertiaryPort; for(int i=0;;++i) { if (i > 1000) { _ports[2] = 0; @@ -1559,9 +1561,14 @@ public: _primaryPort = (unsigned int)OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff; _allowTcpFallbackRelay = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],true); + _secondaryPort = (unsigned int)OSUtils::jsonInt(settings["secondaryPort"],0); + _tertiaryPort = (unsigned int)OSUtils::jsonInt(settings["tertiaryPort"],0); + if (_secondaryPort != 0 || _tertiaryPort != 0) { + fprintf(stderr,"WARNING: using manually-specified ports. This can cause NAT issues." ZT_EOL_S); + } _multipathMode = (unsigned int)OSUtils::jsonInt(settings["multipathMode"],0); if (_multipathMode != 0 && _allowTcpFallbackRelay) { - fprintf(stderr,"WARNING: multipathMode cannot be used with allowTcpFallbackRelay. Disabling allowTcpFallbackRelay"); + fprintf(stderr,"WARNING: multipathMode cannot be used with allowTcpFallbackRelay. Disabling allowTcpFallbackRelay" ZT_EOL_S); _allowTcpFallbackRelay = false; } _portMappingEnabled = OSUtils::jsonBool(settings["portMappingEnabled"],true); -- cgit v1.2.3 From 7b87e9c0cb671227a10807c52779e8d81400d753 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 11 Sep 2018 16:55:46 -0700 Subject: Added ability for OneService to monitor local.conf for changes and automatically reload --- node/Constants.hpp | 5 ++ service/OneService.cpp | 204 +++++++++++++++++++++++++++---------------------- 2 files changed, 117 insertions(+), 92 deletions(-) (limited to 'service') diff --git a/node/Constants.hpp b/node/Constants.hpp index 0de14b85..73a00c3e 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -275,6 +275,11 @@ */ #define ZT_PING_CHECK_INVERVAL 5000 +/** + * How often the local.conf file is checked for changes + */ +#define ZT_LOCAL_CONF_FILE_CHECK_INTERVAL 10000 + /** * How frequently to check for changes to the system's network interfaces. When * the service decides to use this constant it's because we want to react more diff --git a/service/OneService.cpp b/service/OneService.cpp index fc5c189c..91d10d22 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -67,9 +67,12 @@ #include #include #include +#include +#define stat _stat #else #include #include +#include #include #include #include @@ -468,6 +471,8 @@ public: std::vector< std::string > _interfacePrefixBlacklist; Mutex _localConfig_m; + std::vector explicitBind; + /* * To attempt to handle NAT/gateway craziness we use three local UDP ports: * @@ -639,98 +644,8 @@ public: _node = new Node(this,(void *)0,&cb,OSUtils::now()); } - // Read local configuration - std::vector explicitBind; - { - std::map ppc; - - // LEGACY: support old "trustedpaths" flat file - FILE *trustpaths = fopen((_homePath + ZT_PATH_SEPARATOR_S "trustedpaths").c_str(),"r"); - if (trustpaths) { - fprintf(stderr,"WARNING: 'trustedpaths' flat file format is deprecated in favor of path definitions in local.conf" ZT_EOL_S); - char buf[1024]; - while (fgets(buf,sizeof(buf),trustpaths)) { - int fno = 0; - char *saveptr = (char *)0; - uint64_t trustedPathId = 0; - InetAddress trustedPathNetwork; - for(char *f=Utils::stok(buf,"=\r\n \t",&saveptr);(f);f=Utils::stok((char *)0,"=\r\n \t",&saveptr)) { - if (fno == 0) { - trustedPathId = Utils::hexStrToU64(f); - } else if (fno == 1) { - trustedPathNetwork = InetAddress(f); - } else break; - ++fno; - } - if ( (trustedPathId != 0) && ((trustedPathNetwork.ss_family == AF_INET)||(trustedPathNetwork.ss_family == AF_INET6)) && (trustedPathNetwork.netmaskBits() > 0) ) { - ppc[trustedPathNetwork].trustedPathId = trustedPathId; - ppc[trustedPathNetwork].mtu = 0; // use default - } - } - fclose(trustpaths); - } - - // Read local config file - Mutex::Lock _l2(_localConfig_m); - std::string lcbuf; - if (OSUtils::readFile((_homePath + ZT_PATH_SEPARATOR_S "local.conf").c_str(),lcbuf)) { - try { - _localConfig = OSUtils::jsonParse(lcbuf); - if (!_localConfig.is_object()) { - fprintf(stderr,"WARNING: unable to parse local.conf (root element is not a JSON object)" ZT_EOL_S); - } - } catch ( ... ) { - fprintf(stderr,"WARNING: unable to parse local.conf (invalid JSON)" ZT_EOL_S); - } - } - - // Get any trusted paths in local.conf (we'll parse the rest of physical[] elsewhere) - json &physical = _localConfig["physical"]; - if (physical.is_object()) { - for(json::iterator phy(physical.begin());phy!=physical.end();++phy) { - InetAddress net(OSUtils::jsonString(phy.key(),"").c_str()); - if (net) { - if (phy.value().is_object()) { - uint64_t tpid; - if ((tpid = OSUtils::jsonInt(phy.value()["trustedPathId"],0ULL)) != 0ULL) { - if ((net.ss_family == AF_INET)||(net.ss_family == AF_INET6)) - ppc[net].trustedPathId = tpid; - } - ppc[net].mtu = (int)OSUtils::jsonInt(phy.value()["mtu"],0ULL); // 0 means use default - } - } - } - } - - json &settings = _localConfig["settings"]; - if (settings.is_object()) { - // Allow controller DB path to be put somewhere else - const std::string cdbp(OSUtils::jsonString(settings["controllerDbPath"],"")); - if (cdbp.length() > 0) - _controllerDbPath = cdbp; - - // Bind to wildcard instead of to specific interfaces (disables full tunnel capability) - json &bind = settings["bind"]; - if (bind.is_array()) { - for(unsigned long i=0;i 0) { - InetAddress ip(ips.c_str()); - if ((ip.ss_family == AF_INET)||(ip.ss_family == AF_INET6)) - explicitBind.push_back(ip); - } - } - } - } - - // Set trusted paths if there are any - if (ppc.size() > 0) { - for(std::map::iterator i(ppc.begin());i!=ppc.end();++i) - _node->setPhysicalPathConfiguration(reinterpret_cast(&(i->first)),&(i->second)); - } - } - - // Apply other runtime configuration from local.conf + // local.conf + readLocalSettings(); applyLocalConfig(); // Make sure we can use the primary port, and hunt for one if configured to do so @@ -854,6 +769,7 @@ public: int64_t lastMultipathModeUpdate = 0; int64_t lastCleanedPeersDb = 0; int64_t lastLocalInterfaceAddressCheck = (clockShouldBe - ZT_LOCAL_INTERFACE_CHECK_INTERVAL) + 15000; // do this in 15s to give portmapper time to configure and other things time to settle + int64_t lastLocalConfFileCheck = OSUtils::now(); for(;;) { _run_m.lock(); if (!_run) { @@ -882,6 +798,19 @@ public: _updater->apply(); } + // Reload local.conf if anything changed recently + if ((now - lastLocalConfFileCheck) >= ZT_LOCAL_CONF_FILE_CHECK_INTERVAL) { + lastLocalConfFileCheck = now; + struct stat result; + if(stat((_homePath + ZT_PATH_SEPARATOR_S "local.conf").c_str(), &result)==0) { + int64_t mod_time = result.st_mtime * 1000; + if ((now - mod_time) <= ZT_LOCAL_CONF_FILE_CHECK_INTERVAL) { + readLocalSettings(); + applyLocalConfig(); + } + } + } + // Refresh bindings in case device's interfaces have changed, and also sync routes to update any shadow routes (e.g. shadow default) if (((now - lastBindRefresh) >= (_multipathMode ? ZT_BINDER_REFRESH_PERIOD / 8 : ZT_BINDER_REFRESH_PERIOD))||(restarted)) { lastBindRefresh = now; @@ -999,6 +928,97 @@ public: return _termReason; } + void readLocalSettings() + { + // Read local configuration + std::map ppc; + + // LEGACY: support old "trustedpaths" flat file + FILE *trustpaths = fopen((_homePath + ZT_PATH_SEPARATOR_S "trustedpaths").c_str(),"r"); + if (trustpaths) { + fprintf(stderr,"WARNING: 'trustedpaths' flat file format is deprecated in favor of path definitions in local.conf" ZT_EOL_S); + char buf[1024]; + while (fgets(buf,sizeof(buf),trustpaths)) { + int fno = 0; + char *saveptr = (char *)0; + uint64_t trustedPathId = 0; + InetAddress trustedPathNetwork; + for(char *f=Utils::stok(buf,"=\r\n \t",&saveptr);(f);f=Utils::stok((char *)0,"=\r\n \t",&saveptr)) { + if (fno == 0) { + trustedPathId = Utils::hexStrToU64(f); + } else if (fno == 1) { + trustedPathNetwork = InetAddress(f); + } else break; + ++fno; + } + if ( (trustedPathId != 0) && ((trustedPathNetwork.ss_family == AF_INET)||(trustedPathNetwork.ss_family == AF_INET6)) && (trustedPathNetwork.netmaskBits() > 0) ) { + ppc[trustedPathNetwork].trustedPathId = trustedPathId; + ppc[trustedPathNetwork].mtu = 0; // use default + } + } + fclose(trustpaths); + } + + // Read local config file + Mutex::Lock _l2(_localConfig_m); + std::string lcbuf; + if (OSUtils::readFile((_homePath + ZT_PATH_SEPARATOR_S "local.conf").c_str(),lcbuf)) { + try { + _localConfig = OSUtils::jsonParse(lcbuf); + if (!_localConfig.is_object()) { + fprintf(stderr,"WARNING: unable to parse local.conf (root element is not a JSON object)" ZT_EOL_S); + } + } catch ( ... ) { + fprintf(stderr,"WARNING: unable to parse local.conf (invalid JSON)" ZT_EOL_S); + } + } + + // Get any trusted paths in local.conf (we'll parse the rest of physical[] elsewhere) + json &physical = _localConfig["physical"]; + if (physical.is_object()) { + for(json::iterator phy(physical.begin());phy!=physical.end();++phy) { + InetAddress net(OSUtils::jsonString(phy.key(),"").c_str()); + if (net) { + if (phy.value().is_object()) { + uint64_t tpid; + if ((tpid = OSUtils::jsonInt(phy.value()["trustedPathId"],0ULL)) != 0ULL) { + if ((net.ss_family == AF_INET)||(net.ss_family == AF_INET6)) + ppc[net].trustedPathId = tpid; + } + ppc[net].mtu = (int)OSUtils::jsonInt(phy.value()["mtu"],0ULL); // 0 means use default + } + } + } + } + + json &settings = _localConfig["settings"]; + if (settings.is_object()) { + // Allow controller DB path to be put somewhere else + const std::string cdbp(OSUtils::jsonString(settings["controllerDbPath"],"")); + if (cdbp.length() > 0) + _controllerDbPath = cdbp; + + // Bind to wildcard instead of to specific interfaces (disables full tunnel capability) + json &bind = settings["bind"]; + if (bind.is_array()) { + for(unsigned long i=0;i 0) { + InetAddress ip(ips.c_str()); + if ((ip.ss_family == AF_INET)||(ip.ss_family == AF_INET6)) + explicitBind.push_back(ip); + } + } + } + } + + // Set trusted paths if there are any + if (ppc.size() > 0) { + for(std::map::iterator i(ppc.begin());i!=ppc.end();++i) + _node->setPhysicalPathConfiguration(reinterpret_cast(&(i->first)),&(i->second)); + } + } + virtual ReasonForTermination reasonForTermination() const { Mutex::Lock _l(_termReason_m); -- cgit v1.2.3