Date: Wed, 7 Oct 2015 16:11:50 -0700
Subject: Finally add an ECHO.
---
node/IncomingPacket.cpp | 15 ++++++++++++++-
node/IncomingPacket.hpp | 1 +
node/Packet.cpp | 1 +
node/Packet.hpp | 28 ++++++++++++++++------------
4 files changed, 32 insertions(+), 13 deletions(-)
diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp
index cfe5a6c3..083c47f8 100644
--- a/node/IncomingPacket.cpp
+++ b/node/IncomingPacket.cpp
@@ -83,6 +83,7 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR)
case Packet::VERB_RENDEZVOUS: return _doRENDEZVOUS(RR,peer);
case Packet::VERB_FRAME: return _doFRAME(RR,peer);
case Packet::VERB_EXT_FRAME: return _doEXT_FRAME(RR,peer);
+ case Packet::VERB_ECHO: return _doECHO(RR,peer);
case Packet::VERB_MULTICAST_LIKE: return _doMULTICAST_LIKE(RR,peer);
case Packet::VERB_NETWORK_MEMBERSHIP_CERTIFICATE: return _doNETWORK_MEMBERSHIP_CERTIFICATE(RR,peer);
case Packet::VERB_NETWORK_CONFIG_REQUEST: return _doNETWORK_CONFIG_REQUEST(RR,peer);
@@ -569,6 +570,18 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,const SharedPtr &peer)
+{
+ try {
+ Packet outp(peer->address(),RR->identity.address(),Packet::VERB_OK);
+ outp.append((unsigned char)Packet::VERB_ECHO);
+ outp.append(packetId());
+ outp.append(field(ZT_PACKET_IDX_PAYLOAD,size() - ZT_PACKET_IDX_PAYLOAD),size() - ZT_PACKET_IDX_PAYLOAD);
+ RR->sw->send(outp,true,0);
+ } catch ( ... ) {}
+ return true;
+}
+
bool IncomingPacket::_doMULTICAST_LIKE(const RuntimeEnvironment *RR,const SharedPtr &peer)
{
try {
@@ -636,7 +649,7 @@ bool IncomingPacket::_doNETWORK_CONFIG_REQUEST(const RuntimeEnvironment *RR,cons
outp.append(netconfStr.data(),(unsigned int)netconfStr.length());
outp.compress();
outp.armor(peer->key(),true);
- if (outp.size() > ZT_PROTO_MAX_PACKET_LENGTH) {
+ if (outp.size() > ZT_PROTO_MAX_PACKET_LENGTH) { // sanity check
TRACE("NETWORK_CONFIG_REQUEST failed: internal error: netconf size %u is too large",(unsigned int)netconfStr.length());
} else {
RR->node->putPacket(_localAddress,_remoteAddress,outp.data(),outp.size());
diff --git a/node/IncomingPacket.hpp b/node/IncomingPacket.hpp
index fd7a06c0..f5dd4b27 100644
--- a/node/IncomingPacket.hpp
+++ b/node/IncomingPacket.hpp
@@ -138,6 +138,7 @@ private:
bool _doRENDEZVOUS(const RuntimeEnvironment *RR,const SharedPtr &peer);
bool _doFRAME(const RuntimeEnvironment *RR,const SharedPtr &peer);
bool _doEXT_FRAME(const RuntimeEnvironment *RR,const SharedPtr &peer);
+ bool _doECHO(const RuntimeEnvironment *RR,const SharedPtr &peer);
bool _doMULTICAST_LIKE(const RuntimeEnvironment *RR,const SharedPtr &peer);
bool _doNETWORK_MEMBERSHIP_CERTIFICATE(const RuntimeEnvironment *RR,const SharedPtr &peer);
bool _doNETWORK_CONFIG_REQUEST(const RuntimeEnvironment *RR,const SharedPtr &peer);
diff --git a/node/Packet.cpp b/node/Packet.cpp
index 2d973dff..2fb7d488 100644
--- a/node/Packet.cpp
+++ b/node/Packet.cpp
@@ -45,6 +45,7 @@ const char *Packet::verbString(Verb v)
case VERB_RENDEZVOUS: return "RENDEZVOUS";
case VERB_FRAME: return "FRAME";
case VERB_EXT_FRAME: return "EXT_FRAME";
+ case VERB_ECHO: return "ECHO";
case VERB_MULTICAST_LIKE: return "MULTICAST_LIKE";
case VERB_NETWORK_MEMBERSHIP_CERTIFICATE: return "NETWORK_MEMBERSHIP_CERTIFICATE";
case VERB_NETWORK_CONFIG_REQUEST: return "NETWORK_CONFIG_REQUEST";
diff --git a/node/Packet.hpp b/node/Packet.hpp
index 93b594e5..01aadad0 100644
--- a/node/Packet.hpp
+++ b/node/Packet.hpp
@@ -46,22 +46,20 @@
#include "../ext/lz4/lz4.h"
/**
- * Protocol version -- incremented only for MAJOR changes
+ * Protocol version -- incremented only for major changes
*
* 1 - 0.2.0 ... 0.2.5
* 2 - 0.3.0 ... 0.4.5
- * * Added signature and originating peer to multicast frame
- * * Double size of multicast frame bloom filter
+ * + Added signature and originating peer to multicast frame
+ * + Double size of multicast frame bloom filter
* 3 - 0.5.0 ... 0.6.0
- * * Yet another multicast redesign
- * * New crypto completely changes key agreement cipher
+ * + Yet another multicast redesign
+ * + New crypto completely changes key agreement cipher
* 4 - 0.6.0 ... 1.0.6
- * * New identity format based on hashcash design
+ * + New identity format based on hashcash design
* 5 - 1.0.6 ... CURRENT
- * * Supports CIRCUIT_TEST and friends, otherwise compatibie w/v4
- *
- * This isn't going to change again for a long time unless your
- * author wakes up again at 4am with another great idea. :P
+ * + Supports circuit test, proof of work, and echo
+ * + Otherwise backward compatible with 4
*/
#define ZT_PROTO_VERSION 5
@@ -660,8 +658,14 @@ public:
*/
VERB_EXT_FRAME = 7,
- /* DEPRECATED */
- //VERB_P5_MULTICAST_FRAME = 8,
+ /**
+ * ECHO request (a.k.a. ping):
+ * <[...] arbitrary payload to be echoed back>
+ *
+ * This generates OK with a copy of the transmitted payload. No ERROR
+ * is generated. Response to ECHO requests is optional.
+ */
+ VERB_ECHO = 8,
/**
* Announce interest in multicast group(s):
--
cgit v1.2.3
From 0ce0bc00d220ba67af03bf0cc01c8fe9f9104039 Mon Sep 17 00:00:00 2001
From: Adam Ierymenko
Date: Wed, 7 Oct 2015 16:20:54 -0700
Subject: Make sure received() gets called for some new messages, and docs.
---
node/IncomingPacket.cpp | 17 +++++++++++++----
node/Packet.hpp | 2 ++
2 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp
index 083c47f8..4dd308cd 100644
--- a/node/IncomingPacket.cpp
+++ b/node/IncomingPacket.cpp
@@ -573,11 +573,13 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,const SharedPtr &peer)
{
try {
+ const uint64_t pid = packetId();
Packet outp(peer->address(),RR->identity.address(),Packet::VERB_OK);
outp.append((unsigned char)Packet::VERB_ECHO);
- outp.append(packetId());
+ outp.append((uint64_t)pid);
outp.append(field(ZT_PACKET_IDX_PAYLOAD,size() - ZT_PACKET_IDX_PAYLOAD),size() - ZT_PACKET_IDX_PAYLOAD);
- RR->sw->send(outp,true,0);
+ RR->node->putPacket(_localAddress,_remoteAddress,outp.data(),outp.size());
+ peer->received(RR,_localAddress,_remoteAddress,hops(),pid,Packet::VERB_ECHO,0,Packet::VERB_NOP);
} catch ( ... ) {}
return true;
}
@@ -881,6 +883,8 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,const Sha
}
ptr += addrLen;
}
+
+ peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP);
} catch (std::exception &exc) {
TRACE("dropped PUSH_DIRECT_PATHS from %s(%s): unexpected exception: %s",source().toString().c_str(),_remoteAddress.toString().c_str(),exc.what());
} catch ( ... ) {
@@ -1045,6 +1049,8 @@ bool IncomingPacket::_doCIRCUIT_TEST(const RuntimeEnvironment *RR,const SharedPt
RR->sw->send(outp,true,originatorCredentialNetworkId);
}
}
+
+ peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_CIRCUIT_TEST,0,Packet::VERB_NOP);
} catch (std::exception &exc) {
TRACE("dropped CIRCUIT_TEST from %s(%s): unexpected exception: %s",source().toString().c_str(),_remoteAddress.toString().c_str(),exc.what());
} catch ( ... ) {
@@ -1063,6 +1069,7 @@ bool IncomingPacket::_doREQUEST_PROOF_OF_WORK(const RuntimeEnvironment *RR,const
try {
// Right now this is only allowed from root servers -- may be allowed from controllers and relays later.
if (RR->topology->isRoot(peer->identity())) {
+ const uint64_t pid = packetId();
const unsigned int difficulty = (*this)[ZT_PACKET_IDX_PAYLOAD + 1];
const unsigned int challengeLength = at(ZT_PACKET_IDX_PAYLOAD + 2);
if (challengeLength > ZT_PROTO_MAX_PACKET_LENGTH)
@@ -1079,7 +1086,7 @@ bool IncomingPacket::_doREQUEST_PROOF_OF_WORK(const RuntimeEnvironment *RR,const
TRACE("PROOF_OF_WORK computed for %s: difficulty==%u, challengeLength==%u, result: %.16llx%.16llx",peer->address().toString().c_str(),difficulty,challengeLength,Utils::ntoh(*(reinterpret_cast(result))),Utils::ntoh(*(reinterpret_cast(result + 8))));
Packet outp(peer->address(),RR->identity.address(),Packet::VERB_OK);
outp.append((unsigned char)Packet::VERB_REQUEST_PROOF_OF_WORK);
- outp.append(packetId());
+ outp.append(pid);
outp.append((uint16_t)sizeof(result));
outp.append(result,sizeof(result));
outp.armor(peer->key(),true);
@@ -1087,7 +1094,7 @@ bool IncomingPacket::_doREQUEST_PROOF_OF_WORK(const RuntimeEnvironment *RR,const
} else {
Packet outp(peer->address(),RR->identity.address(),Packet::VERB_ERROR);
outp.append((unsigned char)Packet::VERB_REQUEST_PROOF_OF_WORK);
- outp.append(packetId());
+ outp.append(pid);
outp.append((unsigned char)Packet::ERROR_INVALID_REQUEST);
outp.armor(peer->key(),true);
RR->node->putPacket(_localAddress,_remoteAddress,outp.data(),outp.size());
@@ -1098,6 +1105,8 @@ bool IncomingPacket::_doREQUEST_PROOF_OF_WORK(const RuntimeEnvironment *RR,const
TRACE("dropped REQUEST_PROOF_OF_WORK from %s(%s): unrecognized proof of work type",peer->address().toString().c_str(),_remoteAddress.toString().c_str());
break;
}
+
+ peer->received(RR,_localAddress,_remoteAddress,hops(),pid,Packet::VERB_REQUEST_PROOF_OF_WORK,0,Packet::VERB_NOP);
} else {
TRACE("dropped REQUEST_PROOF_OF_WORK from %s(%s): not trusted enough",peer->address().toString().c_str(),_remoteAddress.toString().c_str());
}
diff --git a/node/Packet.hpp b/node/Packet.hpp
index 01aadad0..6f3cb72f 100644
--- a/node/Packet.hpp
+++ b/node/Packet.hpp
@@ -664,6 +664,8 @@ public:
*
* This generates OK with a copy of the transmitted payload. No ERROR
* is generated. Response to ECHO requests is optional.
+ *
+ * Note that fragmented ECHO packets may not work.
*/
VERB_ECHO = 8,
--
cgit v1.2.3
From fea1b6b2c3d004ad53c5997800229c070ccee79b Mon Sep 17 00:00:00 2001
From: Adam Ierymenko
Date: Wed, 7 Oct 2015 16:25:08 -0700
Subject: docs
---
node/Packet.hpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/node/Packet.hpp b/node/Packet.hpp
index 6f3cb72f..1c7a6f5e 100644
--- a/node/Packet.hpp
+++ b/node/Packet.hpp
@@ -365,6 +365,10 @@ namespace ZeroTier {
* immutable. This is because intermediate nodes can increment the hop
* count up to 7 (protocol max).
*
+ * A hop count of 7 also indicates that receiving peers should not attempt
+ * to learn direct paths from this packet. (Right now direct paths are only
+ * learned from direct packets anyway.)
+ *
* http://tonyarcieri.com/all-the-crypto-code-youve-ever-written-is-probably-broken
*
* For unencrypted packets, MAC is computed on plaintext. Only HELLO is ever
--
cgit v1.2.3
From 9347d6c866f6cae357448762f064a481fb765c00 Mon Sep 17 00:00:00 2001
From: Grant Limberg
Date: Wed, 7 Oct 2015 18:04:40 -0700
Subject: Make it so ZeroTierOne.h can be used with a C compiler again.
---
include/ZeroTierOne.h | 4 ++--
node/Node.cpp | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h
index 990f682d..175cedc5 100644
--- a/include/ZeroTierOne.h
+++ b/include/ZeroTierOne.h
@@ -1255,7 +1255,7 @@ void ZT_Node_freeQueryResult(ZT_Node *node,void *qr);
* @param trust How much do you trust the local network under this interface?
* @return Boolean: non-zero if address was accepted and added
*/
-int ZT_Node_addLocalInterfaceAddress(ZT_Node *node,const struct sockaddr_storage *addr,int metric,ZT_LocalInterfaceAddressTrust trust);
+int ZT_Node_addLocalInterfaceAddress(ZT_Node *node,const struct sockaddr_storage *addr,int metric, enum ZT_LocalInterfaceAddressTrust trust);
/**
* Clear local interface addresses
@@ -1296,7 +1296,7 @@ void ZT_Node_setNetconfMaster(ZT_Node *node,void *networkConfigMasterInstance);
* @param reportCallback Function to call each time a report is received
* @return OK or error if, for example, test is too big for a packet or support isn't compiled in
*/
-ZT_ResultCode ZT_Node_circuitTestBegin(ZT_Node *node,ZT_CircuitTest *test,void (*reportCallback)(ZT_Node *,ZT_CircuitTest *,const ZT_CircuitTestReport *));
+enum ZT_ResultCode ZT_Node_circuitTestBegin(ZT_Node *node,ZT_CircuitTest *test,void (*reportCallback)(ZT_Node *, ZT_CircuitTest *,const ZT_CircuitTestReport *));
/**
* Stop listening for results to a given circuit test
diff --git a/node/Node.cpp b/node/Node.cpp
index d5ca147c..7f469b97 100644
--- a/node/Node.cpp
+++ b/node/Node.cpp
@@ -779,7 +779,7 @@ void ZT_Node_setNetconfMaster(ZT_Node *node,void *networkControllerInstance)
} catch ( ... ) {}
}
-ZT_ResultCode ZT_Node_circuitTestBegin(ZT_Node *node,ZT_CircuitTest *test,void (*reportCallback)(ZT_Node *,ZT_CircuitTest *,const ZT_CircuitTestReport *))
+enum ZT_ResultCode ZT_Node_circuitTestBegin(ZT_Node *node,ZT_CircuitTest *test,void (*reportCallback)(ZT_Node *,ZT_CircuitTest *,const ZT_CircuitTestReport *))
{
try {
return reinterpret_cast(node)->circuitTestBegin(test,reportCallback);
@@ -795,7 +795,7 @@ void ZT_Node_circuitTestEnd(ZT_Node *node,ZT_CircuitTest *test)
} catch ( ... ) {}
}
-int ZT_Node_addLocalInterfaceAddress(ZT_Node *node,const struct sockaddr_storage *addr,int metric,ZT_LocalInterfaceAddressTrust trust)
+int ZT_Node_addLocalInterfaceAddress(ZT_Node *node,const struct sockaddr_storage *addr,int metric, enum ZT_LocalInterfaceAddressTrust trust)
{
try {
return reinterpret_cast(node)->addLocalInterfaceAddress(addr,metric,trust);
--
cgit v1.2.3
From 273f0d18b0bc907d90e746f5836576150f4cde22 Mon Sep 17 00:00:00 2001
From: Adam Ierymenko
Date: Thu, 8 Oct 2015 09:05:25 -0700
Subject: docs
---
node/Packet.hpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/node/Packet.hpp b/node/Packet.hpp
index 1c7a6f5e..4dfa73f0 100644
--- a/node/Packet.hpp
+++ b/node/Packet.hpp
@@ -345,11 +345,11 @@ namespace ZeroTier {
* ZeroTier packet
*
* Packet format:
- * <[8] random initialization vector (doubles as 64-bit packet ID)>
+ * <[8] 64-bit random packet ID and crypto initialization vector>
* <[5] destination ZT address>
* <[5] source ZT address>
* <[1] flags/cipher (top 5 bits) and ZT hop count (last 3 bits)>
- * <[8] 8-bit MAC (currently first 8 bytes of poly1305 tag)>
+ * <[8] 64-bit MAC>
* [... -- begin encryption envelope -- ...]
* <[1] encrypted flags (top 3 bits) and verb (last 5 bits)>
* [... verb-specific payload ...]
--
cgit v1.2.3
From a3876353ca1cb07213c5e1c5208b531caeda5523 Mon Sep 17 00:00:00 2001
From: Adam Ierymenko
Date: Thu, 8 Oct 2015 13:25:38 -0700
Subject: Abiltiy to post a test via the controller web API, and parsing of
CIRCUIT_TEST_REPORT messages.
---
controller/SqliteNetworkController.cpp | 50 ++++++++++++++++++
controller/SqliteNetworkController.hpp | 8 +++
node/IncomingPacket.cpp | 94 +++++++++++++++++++---------------
3 files changed, 111 insertions(+), 41 deletions(-)
diff --git a/controller/SqliteNetworkController.cpp b/controller/SqliteNetworkController.cpp
index 334ccc75..861cdbcf 100644
--- a/controller/SqliteNetworkController.cpp
+++ b/controller/SqliteNetworkController.cpp
@@ -505,6 +505,52 @@ unsigned int SqliteNetworkController::handleControlPlaneHttpPOST(
}
return _doCPGet(path,urlArgs,headers,body,responseBody,responseContentType);
+ } else if ((path.size() == 3)&&(path[2] == "test")) {
+ ZT_CircuitTest *test = (ZT_CircuitTest *)malloc(sizeof(ZT_CircuitTest));
+ memset(test,0,sizeof(ZT_CircuitTest));
+
+ Utils::getSecureRandom(&(test->testId),sizeof(test->testId));
+ test->credentialNetworkId = nwid;
+ test->ptr = (void *)this;
+
+ json_value *j = json_parse(body.c_str(),body.length());
+ if (j) {
+ if (j->type == json_object) {
+ for(unsigned int k=0;ku.object.length;++k) {
+
+ if (!strcmp(j->u.object.values[k].name,"hops")) {
+ if (j->u.object.values[k].value->type == json_array) {
+ for(unsigned int kk=0;kku.object.values[k].value->u.array.length;++kk) {
+ json_value *hop = j->u.object.values[k].value->u.array.values[kk];
+ if (hop->type == json_array) {
+ for(unsigned int kkk=0;kkku.array.length;++kkk) {
+ if (hop->u.array.values[kkk].type == json_string) {
+ test->hops[test->hopCount].addresses[test->hops[test->hopCount].breadth++] = Utils::hexStrToU64(hop->u.array.values[kkk].u.string.ptr) & 0xffffffffffULL;
+ }
+ }
+ ++test->hopCount;
+ }
+ }
+ }
+ } else if (!strcmp(j->u.object.values[k].name,"reportAtEveryHop")) {
+ if (j->u.object.values[k].value->type == json_boolean)
+ test->reportAtEveryHop = (j->u.object.values[k].value->u.boolean == 0) ? 0 : 1;
+ }
+
+ }
+ }
+ json_value_free(j);
+ }
+
+ if (!test->hopCount) {
+ ::free((void *)test);
+ return 500;
+ }
+
+ test->timestamp = OSUtils::now();
+ _node->circuitTestBegin(test,&(SqliteNetworkController::_circuitTestCallback));
+
+ return 200;
} // else 404
} else {
@@ -1819,4 +1865,8 @@ NetworkController::ResultCode SqliteNetworkController::_doNetworkConfigRequest(c
return NetworkController::NETCONF_QUERY_OK;
}
+static void _circuitTestCallback(ZT_Node *node,ZT_CircuitTest *test,const ZT_CircuitTestReport *report)
+{
+}
+
} // namespace ZeroTier
diff --git a/controller/SqliteNetworkController.hpp b/controller/SqliteNetworkController.hpp
index 68529e39..7a01487c 100644
--- a/controller/SqliteNetworkController.hpp
+++ b/controller/SqliteNetworkController.hpp
@@ -43,6 +43,9 @@
// Number of in-memory last log entries to maintain per user
#define ZT_SQLITENETWORKCONTROLLER_IN_MEMORY_LOG_SIZE 32
+// How long do circuit tests "live"? This is just to prevent buildup in memory.
+#define ZT_SQLITENETWORKCONTROLLER_CIRCUIT_TEST_TIMEOUT 300000
+
namespace ZeroTier {
class Node;
@@ -106,6 +109,8 @@ private:
const Dictionary &metaData,
Dictionary &netconf);
+ static void _circuitTestCallback(ZT_Node *node,ZT_CircuitTest *test,const ZT_CircuitTestReport *report);
+
Node *_node;
std::string _dbPath;
std::string _circuitTestPath;
@@ -140,6 +145,9 @@ private:
// Last log entries by address and network ID pair
std::map< std::pair,_LLEntry > _lastLog;
+ // Circuit tests outstanding
+ std::map< uint64_t,ZT_CircuitTest * > _circuitTests;
+
sqlite3 *_db;
sqlite3_stmt *_sGetNetworkById;
diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp
index 4dd308cd..0aadc104 100644
--- a/node/IncomingPacket.cpp
+++ b/node/IncomingPacket.cpp
@@ -175,10 +175,8 @@ bool IncomingPacket::_doERROR(const RuntimeEnvironment *RR,const SharedPtr
}
peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_ERROR,inRePacketId,inReVerb);
- } catch (std::exception &ex) {
- TRACE("dropped ERROR from %s(%s): unexpected exception: %s",peer->address().toString().c_str(),_remoteAddress.toString().c_str(),ex.what());
} catch ( ... ) {
- TRACE("dropped ERROR from %s(%s): unexpected exception: (unknown)",peer->address().toString().c_str(),_remoteAddress.toString().c_str());
+ TRACE("dropped ERROR from %s(%s): unexpected exception",peer->address().toString().c_str(),_remoteAddress.toString().c_str());
}
return true;
}
@@ -291,8 +289,6 @@ bool IncomingPacket::_doHELLO(const RuntimeEnvironment *RR)
_remoteAddress.serialize(outp);
outp.armor(peer->key(),true);
RR->node->putPacket(_localAddress,_remoteAddress,outp.data(),outp.size());
- } catch (std::exception &ex) {
- TRACE("dropped HELLO from %s(%s): %s",source().toString().c_str(),_remoteAddress.toString().c_str(),ex.what());
} catch ( ... ) {
TRACE("dropped HELLO from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str());
}
@@ -395,10 +391,8 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,const SharedPtr &p
}
peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_OK,inRePacketId,inReVerb);
- } catch (std::exception &ex) {
- TRACE("dropped OK from %s(%s): unexpected exception: %s",source().toString().c_str(),_remoteAddress.toString().c_str(),ex.what());
} catch ( ... ) {
- TRACE("dropped OK from %s(%s): unexpected exception: (unknown)",source().toString().c_str(),_remoteAddress.toString().c_str());
+ TRACE("dropped OK from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str());
}
return true;
}
@@ -453,8 +447,6 @@ bool IncomingPacket::_doRENDEZVOUS(const RuntimeEnvironment *RR,const SharedPtr<
} else {
TRACE("ignored RENDEZVOUS from %s(%s) to meet unknown peer %s",peer->address().toString().c_str(),_remoteAddress.toString().c_str(),with.toString().c_str());
}
- } catch (std::exception &ex) {
- TRACE("dropped RENDEZVOUS from %s(%s): %s",peer->address().toString().c_str(),_remoteAddress.toString().c_str(),ex.what());
} catch ( ... ) {
TRACE("dropped RENDEZVOUS from %s(%s): unexpected exception",peer->address().toString().c_str(),_remoteAddress.toString().c_str());
}
@@ -487,10 +479,8 @@ bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,const SharedPtr
} else {
TRACE("dropped FRAME from %s(%s): we are not connected to network %.16llx",source().toString().c_str(),_remoteAddress.toString().c_str(),at(ZT_PROTO_VERB_FRAME_IDX_NETWORK_ID));
}
- } catch (std::exception &ex) {
- TRACE("dropped FRAME from %s(%s): unexpected exception: %s",source().toString().c_str(),_remoteAddress.toString().c_str(),ex.what());
} catch ( ... ) {
- TRACE("dropped FRAME from %s(%s): unexpected exception: (unknown)",source().toString().c_str(),_remoteAddress.toString().c_str());
+ TRACE("dropped FRAME from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str());
}
return true;
}
@@ -562,10 +552,8 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,const SharedPtr(ZT_PROTO_VERB_FRAME_IDX_NETWORK_ID));
}
- } catch (std::exception &ex) {
- TRACE("dropped EXT_FRAME from %s(%s): unexpected exception: %s",source().toString().c_str(),_remoteAddress.toString().c_str(),ex.what());
} catch ( ... ) {
- TRACE("dropped EXT_FRAME from %s(%s): unexpected exception: (unknown)",source().toString().c_str(),_remoteAddress.toString().c_str());
+ TRACE("dropped EXT_FRAME from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str());
}
return true;
}
@@ -580,7 +568,9 @@ bool IncomingPacket::_doECHO(const RuntimeEnvironment *RR,const SharedPtr
outp.append(field(ZT_PACKET_IDX_PAYLOAD,size() - ZT_PACKET_IDX_PAYLOAD),size() - ZT_PACKET_IDX_PAYLOAD);
RR->node->putPacket(_localAddress,_remoteAddress,outp.data(),outp.size());
peer->received(RR,_localAddress,_remoteAddress,hops(),pid,Packet::VERB_ECHO,0,Packet::VERB_NOP);
- } catch ( ... ) {}
+ } catch ( ... ) {
+ TRACE("dropped ECHO from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str());
+ }
return true;
}
@@ -594,10 +584,8 @@ bool IncomingPacket::_doMULTICAST_LIKE(const RuntimeEnvironment *RR,const Shared
RR->mc->add(now,at(ptr),MulticastGroup(MAC(field(ptr + 8,6),6),at(ptr + 14)),peer->address());
peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP);
- } catch (std::exception &ex) {
- TRACE("dropped MULTICAST_LIKE from %s(%s): unexpected exception: %s",source().toString().c_str(),_remoteAddress.toString().c_str(),ex.what());
} catch ( ... ) {
- TRACE("dropped MULTICAST_LIKE from %s(%s): unexpected exception: (unknown)",source().toString().c_str(),_remoteAddress.toString().c_str());
+ TRACE("dropped MULTICAST_LIKE from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str());
}
return true;
}
@@ -614,10 +602,8 @@ bool IncomingPacket::_doNETWORK_MEMBERSHIP_CERTIFICATE(const RuntimeEnvironment
}
peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_NETWORK_MEMBERSHIP_CERTIFICATE,0,Packet::VERB_NOP);
- } catch (std::exception &ex) {
- TRACE("dropped NETWORK_MEMBERSHIP_CERTIFICATE from %s(%s): unexpected exception: %s",source().toString().c_str(),_remoteAddress.toString().c_str(),ex.what());
} catch ( ... ) {
- TRACE("dropped NETWORK_MEMBERSHIP_CERTIFICATE from %s(%s): unexpected exception: (unknown)",source().toString().c_str(),_remoteAddress.toString().c_str());
+ TRACE("dropped NETWORK_MEMBERSHIP_CERTIFICATE from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str());
}
return true;
}
@@ -700,10 +686,8 @@ bool IncomingPacket::_doNETWORK_CONFIG_REQUEST(const RuntimeEnvironment *RR,cons
outp.armor(peer->key(),true);
RR->node->putPacket(_localAddress,_remoteAddress,outp.data(),outp.size());
}
- } catch (std::exception &exc) {
- TRACE("dropped NETWORK_CONFIG_REQUEST from %s(%s): unexpected exception: %s",source().toString().c_str(),_remoteAddress.toString().c_str(),exc.what());
} catch ( ... ) {
- TRACE("dropped NETWORK_CONFIG_REQUEST from %s(%s): unexpected exception: (unknown)",source().toString().c_str(),_remoteAddress.toString().c_str());
+ TRACE("dropped NETWORK_CONFIG_REQUEST from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str());
}
return true;
}
@@ -720,10 +704,8 @@ bool IncomingPacket::_doNETWORK_CONFIG_REFRESH(const RuntimeEnvironment *RR,cons
ptr += 8;
}
peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_NETWORK_CONFIG_REFRESH,0,Packet::VERB_NOP);
- } catch (std::exception &exc) {
- TRACE("dropped NETWORK_CONFIG_REFRESH from %s(%s): unexpected exception: %s",source().toString().c_str(),_remoteAddress.toString().c_str(),exc.what());
} catch ( ... ) {
- TRACE("dropped NETWORK_CONFIG_REFRESH from %s(%s): unexpected exception: (unknown)",source().toString().c_str(),_remoteAddress.toString().c_str());
+ TRACE("dropped NETWORK_CONFIG_REFRESH from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str());
}
return true;
}
@@ -751,10 +733,8 @@ bool IncomingPacket::_doMULTICAST_GATHER(const RuntimeEnvironment *RR,const Shar
}
peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP);
- } catch (std::exception &exc) {
- TRACE("dropped MULTICAST_GATHER from %s(%s): unexpected exception: %s",source().toString().c_str(),_remoteAddress.toString().c_str(),exc.what());
} catch ( ... ) {
- TRACE("dropped MULTICAST_GATHER from %s(%s): unexpected exception: (unknown)",source().toString().c_str(),_remoteAddress.toString().c_str());
+ TRACE("dropped MULTICAST_GATHER from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str());
}
return true;
}
@@ -842,10 +822,8 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,const Share
} // else ignore -- not a member of this network
peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP);
- } catch (std::exception &exc) {
- TRACE("dropped MULTICAST_FRAME from %s(%s): unexpected exception: %s",source().toString().c_str(),_remoteAddress.toString().c_str(),exc.what());
} catch ( ... ) {
- TRACE("dropped MULTICAST_FRAME from %s(%s): unexpected exception: (unknown)",source().toString().c_str(),_remoteAddress.toString().c_str());
+ TRACE("dropped MULTICAST_FRAME from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str());
}
return true;
}
@@ -885,10 +863,8 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,const Sha
}
peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP);
- } catch (std::exception &exc) {
- TRACE("dropped PUSH_DIRECT_PATHS from %s(%s): unexpected exception: %s",source().toString().c_str(),_remoteAddress.toString().c_str(),exc.what());
} catch ( ... ) {
- TRACE("dropped PUSH_DIRECT_PATHS from %s(%s): unexpected exception: (unknown)",source().toString().c_str(),_remoteAddress.toString().c_str());
+ TRACE("dropped PUSH_DIRECT_PATHS from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str());
}
return true;
}
@@ -1051,16 +1027,52 @@ bool IncomingPacket::_doCIRCUIT_TEST(const RuntimeEnvironment *RR,const SharedPt
}
peer->received(RR,_localAddress,_remoteAddress,hops(),packetId(),Packet::VERB_CIRCUIT_TEST,0,Packet::VERB_NOP);
- } catch (std::exception &exc) {
- TRACE("dropped CIRCUIT_TEST from %s(%s): unexpected exception: %s",source().toString().c_str(),_remoteAddress.toString().c_str(),exc.what());
} catch ( ... ) {
- TRACE("dropped CIRCUIT_TEST from %s(%s): unexpected exception: (unknown)",source().toString().c_str(),_remoteAddress.toString().c_str());
+ TRACE("dropped CIRCUIT_TEST from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str());
}
return true;
}
bool IncomingPacket::_doCIRCUIT_TEST_REPORT(const RuntimeEnvironment *RR,const SharedPtr &peer)
{
+ try {
+ ZT_CircuitTestReport report;
+ memset(&report,0,sizeof(report));
+
+ report.address = peer->address().toInt();
+ report.testId = at(ZT_PACKET_IDX_PAYLOAD + 8);
+ report.timestamp = at(ZT_PACKET_IDX_PAYLOAD);
+ report.remoteTimestamp = at(ZT_PACKET_IDX_PAYLOAD + 16);
+ report.sourcePacketId = at(ZT_PACKET_IDX_PAYLOAD + 44);
+ report.flags = at(ZT_PACKET_IDX_PAYLOAD + 36);
+ report.sourcePacketHopCount = (*this)[ZT_PACKET_IDX_PAYLOAD + 52];
+ report.errorCode = at(ZT_PACKET_IDX_PAYLOAD + 34);
+ report.vendor = (enum ZT_Vendor)((*this)[ZT_PACKET_IDX_PAYLOAD + 24]);
+ report.protocolVersion = (*this)[ZT_PACKET_IDX_PAYLOAD + 25];
+ report.majorVersion = (*this)[ZT_PACKET_IDX_PAYLOAD + 26];
+ report.minorVersion = (*this)[ZT_PACKET_IDX_PAYLOAD + 27];
+ report.revision = at(ZT_PACKET_IDX_PAYLOAD + 28);
+ report.platform = (enum ZT_Platform)at(ZT_PACKET_IDX_PAYLOAD + 30);
+ report.architecture = (enum ZT_Architecture)at(ZT_PACKET_IDX_PAYLOAD + 32);
+
+ const unsigned int receivedOnLocalAddressLen = reinterpret_cast(&(report.receivedOnLocalAddress))->deserialize(*this,ZT_PACKET_IDX_PAYLOAD + 53);
+ const unsigned int receivedFromRemoteAddressLen = reinterpret_cast(&(report.receivedFromRemoteAddress))->deserialize(*this,ZT_PACKET_IDX_PAYLOAD + 53 + receivedOnLocalAddressLen);
+
+ unsigned int nhptr = ZT_PACKET_IDX_PAYLOAD + 53 + receivedOnLocalAddressLen + receivedFromRemoteAddressLen;
+ nhptr += at(nhptr) + 2; // add "additional field" length, which right now will be zero
+
+ report.nextHopCount = (*this)[nhptr++];
+ if (report.nextHopCount > ZT_CIRCUIT_TEST_MAX_HOP_BREADTH) // sanity check, shouldn't be possible
+ report.nextHopCount = ZT_CIRCUIT_TEST_MAX_HOP_BREADTH;
+ for(unsigned int h=0;h(nhptr); nhptr += 8;
+ nhptr += reinterpret_cast(&(report.nextHops[h].physicalAddress))->deserialize(*this,nhptr);
+ }
+
+ RR->node->postCircuitTestReport(&report);
+ } catch ( ... ) {
+ TRACE("dropped CIRCUIT_TEST_REPORT from %s(%s): unexpected exception",source().toString().c_str(),_remoteAddress.toString().c_str());
+ }
return true;
}
--
cgit v1.2.3
From 59da8b2a4b3e36605886944f3fa111870bbb8a2c Mon Sep 17 00:00:00 2001
From: Adam Ierymenko
Date: Thu, 8 Oct 2015 15:44:06 -0700
Subject: Logging of circuit test results to disk.
---
controller/SqliteNetworkController.cpp | 72 ++++++++++++++++++++++++++++++++--
1 file changed, 69 insertions(+), 3 deletions(-)
diff --git a/controller/SqliteNetworkController.cpp b/controller/SqliteNetworkController.cpp
index 861cdbcf..13a0cadd 100644
--- a/controller/SqliteNetworkController.cpp
+++ b/controller/SqliteNetworkController.cpp
@@ -524,8 +524,8 @@ unsigned int SqliteNetworkController::handleControlPlaneHttpPOST(
json_value *hop = j->u.object.values[k].value->u.array.values[kk];
if (hop->type == json_array) {
for(unsigned int kkk=0;kkku.array.length;++kkk) {
- if (hop->u.array.values[kkk].type == json_string) {
- test->hops[test->hopCount].addresses[test->hops[test->hopCount].breadth++] = Utils::hexStrToU64(hop->u.array.values[kkk].u.string.ptr) & 0xffffffffffULL;
+ if (hop->u.array.values[kkk]->type == json_string) {
+ test->hops[test->hopCount].addresses[test->hops[test->hopCount].breadth++] = Utils::hexStrToU64(hop->u.array.values[kkk]->u.string.ptr) & 0xffffffffffULL;
}
}
++test->hopCount;
@@ -548,6 +548,7 @@ unsigned int SqliteNetworkController::handleControlPlaneHttpPOST(
}
test->timestamp = OSUtils::now();
+ _circuitTests[test->testId] = test;
_node->circuitTestBegin(test,&(SqliteNetworkController::_circuitTestCallback));
return 200;
@@ -1865,8 +1866,73 @@ NetworkController::ResultCode SqliteNetworkController::_doNetworkConfigRequest(c
return NetworkController::NETCONF_QUERY_OK;
}
-static void _circuitTestCallback(ZT_Node *node,ZT_CircuitTest *test,const ZT_CircuitTestReport *report)
+void SqliteNetworkController::_circuitTestCallback(ZT_Node *node,ZT_CircuitTest *test,const ZT_CircuitTestReport *report)
{
+ static Mutex circuitTestWriteLock;
+
+ const uint64_t now = OSUtils::now();
+
+ SqliteNetworkController *const c = reinterpret_cast(test->ptr);
+ char tmp[128];
+
+ std::string reportSavePath(c->_circuitTestPath);
+ OSUtils::mkdir(reportSavePath);
+ Utils::snprintf(tmp,sizeof(tmp),ZT_PATH_SEPARATOR_S"%.16llx",test->credentialNetworkId);
+ reportSavePath.append(tmp);
+ OSUtils::mkdir(reportSavePath);
+ Utils::snprintf(tmp,sizeof(tmp),ZT_PATH_SEPARATOR_S"%.16llx_%.16llx",test->timestamp,test->testId);
+ reportSavePath.append(tmp);
+ OSUtils::mkdir(reportSavePath);
+ Utils::snprintf(tmp,sizeof(tmp),ZT_PATH_SEPARATOR_S"%.10llx",report->address);
+ reportSavePath.append(tmp);
+
+ {
+ Mutex::Lock _l(circuitTestWriteLock);
+ FILE *f = fopen(reportSavePath.c_str(),"a");
+ if (!f)
+ return;
+ fseek(f,0,SEEK_END);
+ fprintf(f,"%s{\n"
+ "\t\"address\": \"%.10llx\","ZT_EOL_S
+ "\t\"testId\": \"%.16llx\","ZT_EOL_S
+ "\t\"timestamp\": %llu,"ZT_EOL_S
+ "\t\"receivedTimestamp\": %llu,"ZT_EOL_S
+ "\t\"remoteTimestamp\": %llu,"ZT_EOL_S
+ "\t\"sourcePacketId\": \"%.16llx\","ZT_EOL_S
+ "\t\"flags\": %llu,"ZT_EOL_S
+ "\t\"sourcePacketHopCount\": %u,"ZT_EOL_S
+ "\t\"errorCode\": %u,"ZT_EOL_S
+ "\t\"vendor\": %d,"ZT_EOL_S
+ "\t\"protocolVersion\": %u,"ZT_EOL_S
+ "\t\"majorVersion\": %u,"ZT_EOL_S
+ "\t\"minorVersion\": %u,"ZT_EOL_S
+ "\t\"revision\": %u,"ZT_EOL_S
+ "\t\"platform\": %d,"ZT_EOL_S
+ "\t\"architecture\": %d,"ZT_EOL_S
+ "\t\"receivedOnLocalAddress\": \"%s\","ZT_EOL_S
+ "\t\"receivedFromRemoteAddress\": \"%s\""ZT_EOL_S
+ "}",
+ ((ftell(f) > 0) ? ",\n" : ""),
+ report->address,
+ test->testId,
+ report->timestamp,
+ now,
+ report->remoteTimestamp,
+ report->sourcePacketId,
+ report->flags,
+ report->sourcePacketHopCount,
+ report->errorCode,
+ (int)report->vendor,
+ report->protocolVersion,
+ report->majorVersion,
+ report->minorVersion,
+ report->revision,
+ (int)report->platform,
+ (int)report->architecture,
+ reinterpret_cast(&(report->receivedOnLocalAddress))->toString().c_str(),
+ reinterpret_cast(&(report->receivedFromRemoteAddress))->toString().c_str());
+ fclose(f);
+ }
}
} // namespace ZeroTier
--
cgit v1.2.3
From 160278c489b8ec2f11235f839836f0f014990fda Mon Sep 17 00:00:00 2001
From: Adam Ierymenko
Date: Thu, 8 Oct 2015 17:42:53 -0700
Subject: Little bit of reorg in Salsa20 which seems to speed things up very
slightly.
---
node/Salsa20.cpp | 75 ++++++++++++++++++++------------------------------------
1 file changed, 26 insertions(+), 49 deletions(-)
diff --git a/node/Salsa20.cpp b/node/Salsa20.cpp
index f8cf8591..dec14faf 100644
--- a/node/Salsa20.cpp
+++ b/node/Salsa20.cpp
@@ -175,41 +175,34 @@ void Salsa20::encrypt(const void *in,void *out,unsigned int bytes)
__m128i X1 = _mm_loadu_si128((const __m128i *)&(_state.v[1]));
__m128i X2 = _mm_loadu_si128((const __m128i *)&(_state.v[2]));
__m128i X3 = _mm_loadu_si128((const __m128i *)&(_state.v[3]));
+ __m128i T;
__m128i X0s = X0;
__m128i X1s = X1;
__m128i X2s = X2;
__m128i X3s = X3;
for (i=0;i<_roundsDiv4;++i) {
- __m128i T = _mm_add_epi32(X0, X3);
- X1 = _mm_xor_si128(X1, _mm_slli_epi32(T, 7));
- X1 = _mm_xor_si128(X1, _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X0, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X1, X0);
- X2 = _mm_xor_si128(X2, _mm_slli_epi32(T, 9));
- X2 = _mm_xor_si128(X2, _mm_srli_epi32(T, 23));
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X1);
- X3 = _mm_xor_si128(X3, _mm_slli_epi32(T, 13));
- X3 = _mm_xor_si128(X3, _mm_srli_epi32(T, 19));
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X3, X2);
- X0 = _mm_xor_si128(X0, _mm_slli_epi32(T, 18));
- X0 = _mm_xor_si128(X0, _mm_srli_epi32(T, 14));
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x93);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x39);
T = _mm_add_epi32(X0, X1);
- X3 = _mm_xor_si128(X3, _mm_slli_epi32(T, 7));
- X3 = _mm_xor_si128(X3, _mm_srli_epi32(T, 25));
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X3, X0);
- X2 = _mm_xor_si128(X2, _mm_slli_epi32(T, 9));
- X2 = _mm_xor_si128(X2, _mm_srli_epi32(T, 23));
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X3);
- X1 = _mm_xor_si128(X1, _mm_slli_epi32(T, 13));
- X1 = _mm_xor_si128(X1, _mm_srli_epi32(T, 19));
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X1, X2);
- X0 = _mm_xor_si128(X0, _mm_slli_epi32(T, 18));
- X0 = _mm_xor_si128(X0, _mm_srli_epi32(T, 14));
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x39);
X2 = _mm_shuffle_epi32(X2, 0x4E);
@@ -218,34 +211,26 @@ void Salsa20::encrypt(const void *in,void *out,unsigned int bytes)
// --
T = _mm_add_epi32(X0, X3);
- X1 = _mm_xor_si128(X1, _mm_slli_epi32(T, 7));
- X1 = _mm_xor_si128(X1, _mm_srli_epi32(T, 25));
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X1, X0);
- X2 = _mm_xor_si128(X2, _mm_slli_epi32(T, 9));
- X2 = _mm_xor_si128(X2, _mm_srli_epi32(T, 23));
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X1);
- X3 = _mm_xor_si128(X3, _mm_slli_epi32(T, 13));
- X3 = _mm_xor_si128(X3, _mm_srli_epi32(T, 19));
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X3, X2);
- X0 = _mm_xor_si128(X0, _mm_slli_epi32(T, 18));
- X0 = _mm_xor_si128(X0, _mm_srli_epi32(T, 14));
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x93);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x39);
T = _mm_add_epi32(X0, X1);
- X3 = _mm_xor_si128(X3, _mm_slli_epi32(T, 7));
- X3 = _mm_xor_si128(X3, _mm_srli_epi32(T, 25));
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X3, X0);
- X2 = _mm_xor_si128(X2, _mm_slli_epi32(T, 9));
- X2 = _mm_xor_si128(X2, _mm_srli_epi32(T, 23));
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X3);
- X1 = _mm_xor_si128(X1, _mm_slli_epi32(T, 13));
- X1 = _mm_xor_si128(X1, _mm_srli_epi32(T, 19));
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X1, X2);
- X0 = _mm_xor_si128(X0, _mm_slli_epi32(T, 18));
- X0 = _mm_xor_si128(X0, _mm_srli_epi32(T, 14));
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x39);
X2 = _mm_shuffle_epi32(X2, 0x4E);
@@ -257,22 +242,14 @@ void Salsa20::encrypt(const void *in,void *out,unsigned int bytes)
X2 = _mm_add_epi32(X2s,X2);
X3 = _mm_add_epi32(X3s,X3);
- {
- __m128i k02 = _mm_or_si128(_mm_slli_epi64(X0, 32), _mm_srli_epi64(X3, 32));
- k02 = _mm_shuffle_epi32(k02, _MM_SHUFFLE(0, 1, 2, 3));
- __m128i k13 = _mm_or_si128(_mm_slli_epi64(X1, 32), _mm_srli_epi64(X0, 32));
- k13 = _mm_shuffle_epi32(k13, _MM_SHUFFLE(0, 1, 2, 3));
- __m128i k20 = _mm_or_si128(_mm_and_si128(X2, _S20SSECONSTANTS.maskLo32), _mm_and_si128(X1, _S20SSECONSTANTS.maskHi32));
- __m128i k31 = _mm_or_si128(_mm_and_si128(X3, _S20SSECONSTANTS.maskLo32), _mm_and_si128(X2, _S20SSECONSTANTS.maskHi32));
-
- const float *const mv = (const float *)m;
- float *const cv = (float *)c;
-
- _mm_storeu_ps(cv,_mm_castsi128_ps(_mm_xor_si128(_mm_unpackhi_epi64(k02,k20),_mm_castps_si128(_mm_loadu_ps(mv)))));
- _mm_storeu_ps(cv + 4,_mm_castsi128_ps(_mm_xor_si128(_mm_unpackhi_epi64(k13,k31),_mm_castps_si128(_mm_loadu_ps(mv + 4)))));
- _mm_storeu_ps(cv + 8,_mm_castsi128_ps(_mm_xor_si128(_mm_unpacklo_epi64(k20,k02),_mm_castps_si128(_mm_loadu_ps(mv + 8)))));
- _mm_storeu_ps(cv + 12,_mm_castsi128_ps(_mm_xor_si128(_mm_unpacklo_epi64(k31,k13),_mm_castps_si128(_mm_loadu_ps(mv + 12)))));
- }
+ __m128i k02 = _mm_shuffle_epi32(_mm_or_si128(_mm_slli_epi64(X0, 32), _mm_srli_epi64(X3, 32)), _MM_SHUFFLE(0, 1, 2, 3));
+ __m128i k13 = _mm_shuffle_epi32(_mm_or_si128(_mm_slli_epi64(X1, 32), _mm_srli_epi64(X0, 32)), _MM_SHUFFLE(0, 1, 2, 3));
+ __m128i k20 = _mm_or_si128(_mm_and_si128(X2, _S20SSECONSTANTS.maskLo32), _mm_and_si128(X1, _S20SSECONSTANTS.maskHi32));
+ __m128i k31 = _mm_or_si128(_mm_and_si128(X3, _S20SSECONSTANTS.maskLo32), _mm_and_si128(X2, _S20SSECONSTANTS.maskHi32));
+ _mm_storeu_ps(reinterpret_cast(c),_mm_castsi128_ps(_mm_xor_si128(_mm_unpackhi_epi64(k02,k20),_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(m))))));
+ _mm_storeu_ps(reinterpret_cast(c) + 4,_mm_castsi128_ps(_mm_xor_si128(_mm_unpackhi_epi64(k13,k31),_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(m) + 4)))));
+ _mm_storeu_ps(reinterpret_cast(c) + 8,_mm_castsi128_ps(_mm_xor_si128(_mm_unpacklo_epi64(k20,k02),_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(m) + 8)))));
+ _mm_storeu_ps(reinterpret_cast(c) + 12,_mm_castsi128_ps(_mm_xor_si128(_mm_unpacklo_epi64(k31,k13),_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(m) + 12)))));
if (!(++_state.i[8])) {
++_state.i[5]; // state reordered for SSE
--
cgit v1.2.3
From 3fa6dd377f479774ae2726f24748f41458329272 Mon Sep 17 00:00:00 2001
From: Adam Ierymenko
Date: Fri, 9 Oct 2015 08:51:57 -0700
Subject: docs
---
node/Packet.hpp | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/node/Packet.hpp b/node/Packet.hpp
index 4dfa73f0..5c7253bf 100644
--- a/node/Packet.hpp
+++ b/node/Packet.hpp
@@ -669,7 +669,8 @@ public:
* This generates OK with a copy of the transmitted payload. No ERROR
* is generated. Response to ECHO requests is optional.
*
- * Note that fragmented ECHO packets may not work.
+ * Support for fragmented echo packets is optional and their use is not
+ * recommended.
*/
VERB_ECHO = 8,
@@ -680,8 +681,18 @@ public:
* <[4] multicast additional distinguishing information (ADI)>
* [... additional tuples of network/address/adi ...]
*
- * LIKEs are sent to peers with whom you have a direct peer to peer
- * connection, and always including root servers.
+ * LIKEs may be sent to any peer, though a good implementation should
+ * restrict them to peers on the same network they're for and to network
+ * controllers and root servers. In the current network, root servers
+ * will provide the service of final multicast cache.
+ *
+ * It is recommended that NETWORK_MEMBERSHIP_CERTIFICATE pushes be sent
+ * along with MULTICAST_LIKE when pushing LIKEs to peers that do not
+ * share a network membership (such as root servers), since this can be
+ * used to authenticate GATHER requests and limit responses to peers
+ * authorized to talk on a network. (Should be an optional field here,
+ * but saving one or two packets every five minutes is not worth an
+ * ugly hack or protocol rev.)
*
* OK/ERROR are not generated.
*/
--
cgit v1.2.3
From 0c498556d5b11c101d2b18cf85cff2d53aa97d58 Mon Sep 17 00:00:00 2001
From: Adam Ierymenko
Date: Fri, 9 Oct 2015 09:39:27 -0700
Subject: Unroll Salsa20 fully for a little more speed (non-SSE now almost as
fast as SSE)
---
node/Identity.cpp | 9 +-
node/IncomingPacket.cpp | 8 +-
node/Node.cpp | 6 +-
node/Packet.cpp | 12 +-
node/Salsa20.cpp | 1206 ++++++++++++++++++++++++++++++++++++++++++-----
node/Salsa20.hpp | 40 +-
selftest.cpp | 42 +-
7 files changed, 1138 insertions(+), 185 deletions(-)
diff --git a/node/Identity.cpp b/node/Identity.cpp
index 8765da51..e5aaf13d 100644
--- a/node/Identity.cpp
+++ b/node/Identity.cpp
@@ -41,7 +41,6 @@
#define ZT_IDENTITY_GEN_HASHCASH_FIRST_BYTE_LESS_THAN 17
#define ZT_IDENTITY_GEN_MEMORY 2097152
-#define ZT_IDENTITY_GEN_SALSA20_ROUNDS 20
namespace ZeroTier {
@@ -55,8 +54,8 @@ static inline void _computeMemoryHardHash(const void *publicKey,unsigned int pub
// ordinary Salsa20 is randomly seekable. This is good for a cipher
// but is not what we want for sequential memory-harndess.
memset(genmem,0,ZT_IDENTITY_GEN_MEMORY);
- Salsa20 s20(digest,256,(char *)digest + 32,ZT_IDENTITY_GEN_SALSA20_ROUNDS);
- s20.encrypt((char *)genmem,(char *)genmem,64);
+ Salsa20 s20(digest,256,(char *)digest + 32);
+ s20.encrypt20((char *)genmem,(char *)genmem,64);
for(unsigned long i=64;i(candidate));
SHA512::hash(shabuf,candidate,16 + challengeLength);
- s20.init(shabuf,256,&s20iv,12);
+ s20.init(shabuf,256,&s20iv);
memset(salsabuf,0,sizeof(salsabuf));
- s20.encrypt(salsabuf,salsabuf,sizeof(salsabuf));
+ s20.encrypt12(salsabuf,salsabuf,sizeof(salsabuf));
SHA512::hash(shabuf,salsabuf,sizeof(salsabuf));
d = difficulty;
@@ -1186,9 +1186,9 @@ bool IncomingPacket::testSalsa2012Sha512ProofOfWorkResult(unsigned int difficult
memcpy(candidate + 16,challenge,challengeLength);
SHA512::hash(shabuf,candidate,16 + challengeLength);
- s20.init(shabuf,256,&s20iv,12);
+ s20.init(shabuf,256,&s20iv);
memset(salsabuf,0,sizeof(salsabuf));
- s20.encrypt(salsabuf,salsabuf,sizeof(salsabuf));
+ s20.encrypt12(salsabuf,salsabuf,sizeof(salsabuf));
SHA512::hash(shabuf,salsabuf,sizeof(salsabuf));
d = difficulty;
diff --git a/node/Node.cpp b/node/Node.cpp
index 7f469b97..84452146 100644
--- a/node/Node.cpp
+++ b/node/Node.cpp
@@ -88,9 +88,9 @@ Node::Node(
{
char foo[32];
Utils::getSecureRandom(foo,32);
- _prng.init(foo,256,foo,8);
+ _prng.init(foo,256,foo);
memset(_prngStream,0,sizeof(_prngStream));
- _prng.encrypt(_prngStream,_prngStream,sizeof(_prngStream));
+ _prng.encrypt12(_prngStream,_prngStream,sizeof(_prngStream));
}
std::string idtmp(dataStoreGet("identity.secret"));
@@ -574,7 +574,7 @@ uint64_t Node::prng()
{
unsigned int p = (++_prngStreamPtr % (sizeof(_prngStream) / sizeof(uint64_t)));
if (!p)
- _prng.encrypt(_prngStream,_prngStream,sizeof(_prngStream));
+ _prng.encrypt12(_prngStream,_prngStream,sizeof(_prngStream));
return _prngStream[p];
}
diff --git a/node/Packet.cpp b/node/Packet.cpp
index 2fb7d488..f11ae1b8 100644
--- a/node/Packet.cpp
+++ b/node/Packet.cpp
@@ -92,14 +92,14 @@ void Packet::armor(const void *key,bool encryptPayload)
setCipher(encryptPayload ? ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012 : ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_NONE);
_salsa20MangleKey((const unsigned char *)key,mangledKey);
- Salsa20 s20(mangledKey,256,field(ZT_PACKET_IDX_IV,8),ZT_PROTO_SALSA20_ROUNDS);
+ Salsa20 s20(mangledKey,256,field(ZT_PACKET_IDX_IV,8)/*,ZT_PROTO_SALSA20_ROUNDS*/);
// MAC key is always the first 32 bytes of the Salsa20 key stream
// This is the same construction DJB's NaCl library uses
- s20.encrypt(ZERO_KEY,macKey,sizeof(macKey));
+ s20.encrypt12(ZERO_KEY,macKey,sizeof(macKey));
if (encryptPayload)
- s20.encrypt(payload,payload,payloadLen);
+ s20.encrypt12(payload,payload,payloadLen);
Poly1305::compute(mac,payload,payloadLen,macKey);
memcpy(field(ZT_PACKET_IDX_MAC,8),mac,8);
@@ -116,15 +116,15 @@ bool Packet::dearmor(const void *key)
if ((cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_NONE)||(cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012)) {
_salsa20MangleKey((const unsigned char *)key,mangledKey);
- Salsa20 s20(mangledKey,256,field(ZT_PACKET_IDX_IV,8),ZT_PROTO_SALSA20_ROUNDS);
+ Salsa20 s20(mangledKey,256,field(ZT_PACKET_IDX_IV,8)/*,ZT_PROTO_SALSA20_ROUNDS*/);
- s20.encrypt(ZERO_KEY,macKey,sizeof(macKey));
+ s20.encrypt12(ZERO_KEY,macKey,sizeof(macKey));
Poly1305::compute(mac,payload,payloadLen,macKey);
if (!Utils::secureEq(mac,field(ZT_PACKET_IDX_MAC,8),8))
return false;
if (cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012)
- s20.decrypt(payload,payload,payloadLen);
+ s20.decrypt12(payload,payload,payloadLen);
return true;
} else return false; // unrecognized cipher suite
diff --git a/node/Salsa20.cpp b/node/Salsa20.cpp
index dec14faf..3aa19ac6 100644
--- a/node/Salsa20.cpp
+++ b/node/Salsa20.cpp
@@ -66,7 +66,7 @@ static const _s20sseconsts _S20SSECONSTANTS;
namespace ZeroTier {
-void Salsa20::init(const void *key,unsigned int kbits,const void *iv,unsigned int rounds)
+void Salsa20::init(const void *key,unsigned int kbits,const void *iv)
throw()
{
#ifdef ZT_SALSA20_SSE
@@ -121,11 +121,9 @@ void Salsa20::init(const void *key,unsigned int kbits,const void *iv,unsigned in
_state.i[15] = U8TO32_LITTLE(constants + 12);
_state.i[0] = U8TO32_LITTLE(constants + 0);
#endif
-
- _roundsDiv4 = rounds / 4;
}
-void Salsa20::encrypt(const void *in,void *out,unsigned int bytes)
+void Salsa20::encrypt12(const void *in,void *out,unsigned int bytes)
throw()
{
uint8_t tmp[64];
@@ -181,61 +179,149 @@ void Salsa20::encrypt(const void *in,void *out,unsigned int bytes)
__m128i X2s = X2;
__m128i X3s = X3;
- for (i=0;i<_roundsDiv4;++i) {
- T = _mm_add_epi32(X0, X3);
- X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
- T = _mm_add_epi32(X1, X0);
- X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
- T = _mm_add_epi32(X2, X1);
- X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
- T = _mm_add_epi32(X3, X2);
- X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-
- X1 = _mm_shuffle_epi32(X1, 0x93);
- X2 = _mm_shuffle_epi32(X2, 0x4E);
- X3 = _mm_shuffle_epi32(X3, 0x39);
-
- T = _mm_add_epi32(X0, X1);
- X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
- T = _mm_add_epi32(X3, X0);
- X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
- T = _mm_add_epi32(X2, X3);
- X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
- T = _mm_add_epi32(X1, X2);
- X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-
- X1 = _mm_shuffle_epi32(X1, 0x39);
- X2 = _mm_shuffle_epi32(X2, 0x4E);
- X3 = _mm_shuffle_epi32(X3, 0x93);
-
- // --
-
- T = _mm_add_epi32(X0, X3);
- X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
- T = _mm_add_epi32(X1, X0);
- X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
- T = _mm_add_epi32(X2, X1);
- X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
- T = _mm_add_epi32(X3, X2);
- X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-
- X1 = _mm_shuffle_epi32(X1, 0x93);
- X2 = _mm_shuffle_epi32(X2, 0x4E);
- X3 = _mm_shuffle_epi32(X3, 0x39);
-
- T = _mm_add_epi32(X0, X1);
- X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
- T = _mm_add_epi32(X3, X0);
- X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
- T = _mm_add_epi32(X2, X3);
- X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
- T = _mm_add_epi32(X1, X2);
- X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-
- X1 = _mm_shuffle_epi32(X1, 0x39);
- X2 = _mm_shuffle_epi32(X2, 0x4E);
- X3 = _mm_shuffle_epi32(X3, 0x93);
- }
+ // 2X round -------------------------------------------------------------
+ T = _mm_add_epi32(X0, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X1, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X3, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x93);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x39);
+ T = _mm_add_epi32(X0, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X3, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X1, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x39);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x93);
+
+ // 2X round -------------------------------------------------------------
+ T = _mm_add_epi32(X0, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X1, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X3, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x93);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x39);
+ T = _mm_add_epi32(X0, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X3, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X1, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x39);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x93);
+
+ // 2X round -------------------------------------------------------------
+ T = _mm_add_epi32(X0, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X1, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X3, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x93);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x39);
+ T = _mm_add_epi32(X0, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X3, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X1, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x39);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x93);
+
+ // 2X round -------------------------------------------------------------
+ T = _mm_add_epi32(X0, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X1, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X3, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x93);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x39);
+ T = _mm_add_epi32(X0, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X3, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X1, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x39);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x93);
+
+ // 2X round -------------------------------------------------------------
+ T = _mm_add_epi32(X0, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X1, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X3, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x93);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x39);
+ T = _mm_add_epi32(X0, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X3, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X1, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x39);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x93);
+
+ // 2X round -------------------------------------------------------------
+ T = _mm_add_epi32(X0, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X1, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X3, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x93);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x39);
+ T = _mm_add_epi32(X0, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X3, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X1, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x39);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x93);
X0 = _mm_add_epi32(X0s,X0);
X1 = _mm_add_epi32(X1s,X1);
@@ -273,76 +359,942 @@ void Salsa20::encrypt(const void *in,void *out,unsigned int bytes)
x14 = j14;
x15 = j15;
- for(i=0;i<_roundsDiv4;++i) {
- x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
- x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
- x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
- x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
- x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
- x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
- x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
- x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
- x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
- x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
- x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
- x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
- x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
- x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
- x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
- x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
- x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
- x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
- x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
- x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
- x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
- x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
- x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
- x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
- x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
- x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
- x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
- x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
- x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
- x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
- x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
- x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
-
- // --
-
- x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
- x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
- x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
- x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
- x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
- x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
- x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
- x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
- x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
- x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
- x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
- x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
- x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
- x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
- x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
- x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
- x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
- x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
- x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
- x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
- x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
- x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
- x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
- x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
- x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
- x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
- x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
- x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
- x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
- x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
- x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
- x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
+ // 2X round -------------------------------------------------------------
+ x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
+ x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
+ x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
+ x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
+ x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
+ x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
+ x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
+ x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
+ x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
+ x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
+ x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
+ x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
+ x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
+ x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
+ x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
+ x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
+ x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
+ x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
+ x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
+ x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
+ x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
+ x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
+ x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
+ x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
+ x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
+ x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
+ x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
+ x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
+ x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
+ x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
+
+ // 2X round -------------------------------------------------------------
+ x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
+ x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
+ x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
+ x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
+ x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
+ x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
+ x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
+ x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
+ x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
+ x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
+ x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
+ x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
+ x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
+ x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
+ x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
+ x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
+ x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
+ x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
+ x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
+ x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
+ x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
+ x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
+ x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
+ x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
+ x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
+ x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
+ x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
+ x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
+ x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
+ x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
+
+ // 2X round -------------------------------------------------------------
+ x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
+ x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
+ x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
+ x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
+ x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
+ x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
+ x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
+ x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
+ x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
+ x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
+ x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
+ x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
+ x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
+ x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
+ x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
+ x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
+ x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
+ x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
+ x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
+ x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
+ x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
+ x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
+ x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
+ x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
+ x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
+ x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
+ x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
+ x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
+ x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
+ x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
+
+ // 2X round -------------------------------------------------------------
+ x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
+ x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
+ x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
+ x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
+ x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
+ x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
+ x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
+ x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
+ x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
+ x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
+ x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
+ x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
+ x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
+ x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
+ x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
+ x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
+ x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
+ x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
+ x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
+ x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
+ x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
+ x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
+ x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
+ x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
+ x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
+ x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
+ x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
+ x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
+ x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
+ x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
+
+ // 2X round -------------------------------------------------------------
+ x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
+ x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
+ x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
+ x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
+ x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
+ x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
+ x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
+ x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
+ x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
+ x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
+ x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
+ x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
+ x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
+ x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
+ x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
+ x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
+ x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
+ x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
+ x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
+ x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
+ x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
+ x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
+ x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
+ x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
+ x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
+ x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
+ x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
+ x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
+ x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
+ x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
+
+ // 2X round -------------------------------------------------------------
+ x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
+ x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
+ x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
+ x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
+ x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
+ x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
+ x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
+ x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
+ x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
+ x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
+ x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
+ x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
+ x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
+ x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
+ x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
+ x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
+ x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
+ x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
+ x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
+ x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
+ x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
+ x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
+ x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
+ x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
+ x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
+ x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
+ x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
+ x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
+ x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
+ x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
+
+ x0 = PLUS(x0,j0);
+ x1 = PLUS(x1,j1);
+ x2 = PLUS(x2,j2);
+ x3 = PLUS(x3,j3);
+ x4 = PLUS(x4,j4);
+ x5 = PLUS(x5,j5);
+ x6 = PLUS(x6,j6);
+ x7 = PLUS(x7,j7);
+ x8 = PLUS(x8,j8);
+ x9 = PLUS(x9,j9);
+ x10 = PLUS(x10,j10);
+ x11 = PLUS(x11,j11);
+ x12 = PLUS(x12,j12);
+ x13 = PLUS(x13,j13);
+ x14 = PLUS(x14,j14);
+ x15 = PLUS(x15,j15);
+
+ U32TO8_LITTLE(c + 0,XOR(x0,U8TO32_LITTLE(m + 0)));
+ U32TO8_LITTLE(c + 4,XOR(x1,U8TO32_LITTLE(m + 4)));
+ U32TO8_LITTLE(c + 8,XOR(x2,U8TO32_LITTLE(m + 8)));
+ U32TO8_LITTLE(c + 12,XOR(x3,U8TO32_LITTLE(m + 12)));
+ U32TO8_LITTLE(c + 16,XOR(x4,U8TO32_LITTLE(m + 16)));
+ U32TO8_LITTLE(c + 20,XOR(x5,U8TO32_LITTLE(m + 20)));
+ U32TO8_LITTLE(c + 24,XOR(x6,U8TO32_LITTLE(m + 24)));
+ U32TO8_LITTLE(c + 28,XOR(x7,U8TO32_LITTLE(m + 28)));
+ U32TO8_LITTLE(c + 32,XOR(x8,U8TO32_LITTLE(m + 32)));
+ U32TO8_LITTLE(c + 36,XOR(x9,U8TO32_LITTLE(m + 36)));
+ U32TO8_LITTLE(c + 40,XOR(x10,U8TO32_LITTLE(m + 40)));
+ U32TO8_LITTLE(c + 44,XOR(x11,U8TO32_LITTLE(m + 44)));
+ U32TO8_LITTLE(c + 48,XOR(x12,U8TO32_LITTLE(m + 48)));
+ U32TO8_LITTLE(c + 52,XOR(x13,U8TO32_LITTLE(m + 52)));
+ U32TO8_LITTLE(c + 56,XOR(x14,U8TO32_LITTLE(m + 56)));
+ U32TO8_LITTLE(c + 60,XOR(x15,U8TO32_LITTLE(m + 60)));
+
+ if (!(++j8)) {
+ ++j9;
+ /* stopping at 2^70 bytes per nonce is user's responsibility */
+ }
+#endif
+
+ if (bytes <= 64) {
+ if (bytes < 64) {
+ for (i = 0;i < bytes;++i)
+ ctarget[i] = c[i];
+ }
+
+#ifndef ZT_SALSA20_SSE
+ _state.i[8] = j8;
+ _state.i[9] = j9;
+#endif
+
+ return;
+ }
+
+ bytes -= 64;
+ c += 64;
+ m += 64;
+ }
+}
+
+void Salsa20::encrypt20(const void *in,void *out,unsigned int bytes)
+ throw()
+{
+ uint8_t tmp[64];
+ const uint8_t *m = (const uint8_t *)in;
+ uint8_t *c = (uint8_t *)out;
+ uint8_t *ctarget = c;
+ unsigned int i;
+
+#ifndef ZT_SALSA20_SSE
+ uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+ uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+#endif
+
+ if (!bytes)
+ return;
+
+#ifndef ZT_SALSA20_SSE
+ j0 = _state.i[0];
+ j1 = _state.i[1];
+ j2 = _state.i[2];
+ j3 = _state.i[3];
+ j4 = _state.i[4];
+ j5 = _state.i[5];
+ j6 = _state.i[6];
+ j7 = _state.i[7];
+ j8 = _state.i[8];
+ j9 = _state.i[9];
+ j10 = _state.i[10];
+ j11 = _state.i[11];
+ j12 = _state.i[12];
+ j13 = _state.i[13];
+ j14 = _state.i[14];
+ j15 = _state.i[15];
+#endif
+
+ for (;;) {
+ if (bytes < 64) {
+ for (i = 0;i < bytes;++i)
+ tmp[i] = m[i];
+ m = tmp;
+ ctarget = c;
+ c = tmp;
}
+#ifdef ZT_SALSA20_SSE
+ __m128i X0 = _mm_loadu_si128((const __m128i *)&(_state.v[0]));
+ __m128i X1 = _mm_loadu_si128((const __m128i *)&(_state.v[1]));
+ __m128i X2 = _mm_loadu_si128((const __m128i *)&(_state.v[2]));
+ __m128i X3 = _mm_loadu_si128((const __m128i *)&(_state.v[3]));
+ __m128i T;
+ __m128i X0s = X0;
+ __m128i X1s = X1;
+ __m128i X2s = X2;
+ __m128i X3s = X3;
+
+ // 2X round -------------------------------------------------------------
+ T = _mm_add_epi32(X0, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X1, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X3, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x93);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x39);
+ T = _mm_add_epi32(X0, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X3, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X1, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x39);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x93);
+
+ // 2X round -------------------------------------------------------------
+ T = _mm_add_epi32(X0, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X1, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X3, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x93);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x39);
+ T = _mm_add_epi32(X0, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X3, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X1, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x39);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x93);
+
+ // 2X round -------------------------------------------------------------
+ T = _mm_add_epi32(X0, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X1, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X3, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x93);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x39);
+ T = _mm_add_epi32(X0, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X3, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X1, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x39);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x93);
+
+ // 2X round -------------------------------------------------------------
+ T = _mm_add_epi32(X0, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X1, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X3, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x93);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x39);
+ T = _mm_add_epi32(X0, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X3, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X1, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x39);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x93);
+
+ // 2X round -------------------------------------------------------------
+ T = _mm_add_epi32(X0, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X1, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X3, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x93);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x39);
+ T = _mm_add_epi32(X0, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X3, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X1, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x39);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x93);
+
+ // 2X round -------------------------------------------------------------
+ T = _mm_add_epi32(X0, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X1, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X3, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x93);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x39);
+ T = _mm_add_epi32(X0, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X3, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X1, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x39);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x93);
+
+ // 2X round -------------------------------------------------------------
+ T = _mm_add_epi32(X0, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X1, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X3, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x93);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x39);
+ T = _mm_add_epi32(X0, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X3, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X1, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x39);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x93);
+
+ // 2X round -------------------------------------------------------------
+ T = _mm_add_epi32(X0, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X1, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X3, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x93);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x39);
+ T = _mm_add_epi32(X0, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X3, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X1, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x39);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x93);
+
+ // 2X round -------------------------------------------------------------
+ T = _mm_add_epi32(X0, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X1, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X3, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x93);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x39);
+ T = _mm_add_epi32(X0, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X3, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X1, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x39);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x93);
+
+ // 2X round -------------------------------------------------------------
+ T = _mm_add_epi32(X0, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X1, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X3, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x93);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x39);
+ T = _mm_add_epi32(X0, X1);
+ X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
+ T = _mm_add_epi32(X3, X0);
+ X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
+ T = _mm_add_epi32(X2, X3);
+ X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
+ T = _mm_add_epi32(X1, X2);
+ X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
+ X1 = _mm_shuffle_epi32(X1, 0x39);
+ X2 = _mm_shuffle_epi32(X2, 0x4E);
+ X3 = _mm_shuffle_epi32(X3, 0x93);
+
+ X0 = _mm_add_epi32(X0s,X0);
+ X1 = _mm_add_epi32(X1s,X1);
+ X2 = _mm_add_epi32(X2s,X2);
+ X3 = _mm_add_epi32(X3s,X3);
+
+ __m128i k02 = _mm_shuffle_epi32(_mm_or_si128(_mm_slli_epi64(X0, 32), _mm_srli_epi64(X3, 32)), _MM_SHUFFLE(0, 1, 2, 3));
+ __m128i k13 = _mm_shuffle_epi32(_mm_or_si128(_mm_slli_epi64(X1, 32), _mm_srli_epi64(X0, 32)), _MM_SHUFFLE(0, 1, 2, 3));
+ __m128i k20 = _mm_or_si128(_mm_and_si128(X2, _S20SSECONSTANTS.maskLo32), _mm_and_si128(X1, _S20SSECONSTANTS.maskHi32));
+ __m128i k31 = _mm_or_si128(_mm_and_si128(X3, _S20SSECONSTANTS.maskLo32), _mm_and_si128(X2, _S20SSECONSTANTS.maskHi32));
+ _mm_storeu_ps(reinterpret_cast(c),_mm_castsi128_ps(_mm_xor_si128(_mm_unpackhi_epi64(k02,k20),_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(m))))));
+ _mm_storeu_ps(reinterpret_cast(c) + 4,_mm_castsi128_ps(_mm_xor_si128(_mm_unpackhi_epi64(k13,k31),_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(m) + 4)))));
+ _mm_storeu_ps(reinterpret_cast(c) + 8,_mm_castsi128_ps(_mm_xor_si128(_mm_unpacklo_epi64(k20,k02),_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(m) + 8)))));
+ _mm_storeu_ps(reinterpret_cast(c) + 12,_mm_castsi128_ps(_mm_xor_si128(_mm_unpacklo_epi64(k31,k13),_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(m) + 12)))));
+
+ if (!(++_state.i[8])) {
+ ++_state.i[5]; // state reordered for SSE
+ /* stopping at 2^70 bytes per nonce is user's responsibility */
+ }
+#else
+ x0 = j0;
+ x1 = j1;
+ x2 = j2;
+ x3 = j3;
+ x4 = j4;
+ x5 = j5;
+ x6 = j6;
+ x7 = j7;
+ x8 = j8;
+ x9 = j9;
+ x10 = j10;
+ x11 = j11;
+ x12 = j12;
+ x13 = j13;
+ x14 = j14;
+ x15 = j15;
+
+ // 2X round -------------------------------------------------------------
+ x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
+ x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
+ x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
+ x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
+ x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
+ x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
+ x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
+ x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
+ x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
+ x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
+ x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
+ x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
+ x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
+ x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
+ x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
+ x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
+ x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
+ x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
+ x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
+ x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
+ x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
+ x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
+ x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
+ x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
+ x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
+ x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
+ x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
+ x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
+ x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
+ x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
+
+ // 2X round -------------------------------------------------------------
+ x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
+ x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
+ x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
+ x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
+ x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
+ x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
+ x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
+ x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
+ x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
+ x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
+ x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
+ x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
+ x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
+ x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
+ x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
+ x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
+ x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
+ x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
+ x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
+ x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
+ x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
+ x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
+ x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
+ x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
+ x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
+ x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
+ x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
+ x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
+ x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
+ x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
+
+ // 2X round -------------------------------------------------------------
+ x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
+ x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
+ x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
+ x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
+ x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
+ x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
+ x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
+ x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
+ x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
+ x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
+ x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
+ x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
+ x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
+ x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
+ x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
+ x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
+ x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
+ x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
+ x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
+ x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
+ x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
+ x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
+ x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
+ x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
+ x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
+ x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
+ x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
+ x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
+ x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
+ x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
+
+ // 2X round -------------------------------------------------------------
+ x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
+ x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
+ x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
+ x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
+ x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
+ x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
+ x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
+ x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
+ x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
+ x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
+ x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
+ x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
+ x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
+ x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
+ x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
+ x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
+ x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
+ x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
+ x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
+ x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
+ x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
+ x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
+ x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
+ x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
+ x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
+ x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
+ x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
+ x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
+ x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
+ x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
+
+ // 2X round -------------------------------------------------------------
+ x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
+ x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
+ x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
+ x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
+ x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
+ x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
+ x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
+ x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
+ x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
+ x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
+ x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
+ x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
+ x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
+ x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
+ x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
+ x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
+ x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
+ x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
+ x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
+ x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
+ x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
+ x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
+ x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
+ x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
+ x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
+ x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
+ x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
+ x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
+ x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
+ x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
+
+ // 2X round -------------------------------------------------------------
+ x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
+ x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
+ x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
+ x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
+ x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
+ x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
+ x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
+ x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
+ x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
+ x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
+ x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
+ x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
+ x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
+ x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
+ x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
+ x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
+ x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
+ x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
+ x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
+ x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
+ x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
+ x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
+ x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
+ x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
+ x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
+ x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
+ x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
+ x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
+ x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
+ x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
+
+ // 2X round -------------------------------------------------------------
+ x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
+ x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
+ x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
+ x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
+ x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
+ x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
+ x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
+ x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
+ x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
+ x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
+ x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
+ x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
+ x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
+ x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
+ x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
+ x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
+ x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
+ x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
+ x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
+ x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
+ x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
+ x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
+ x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
+ x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
+ x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
+ x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
+ x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
+ x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
+ x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
+ x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
+
+ // 2X round -------------------------------------------------------------
+ x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
+ x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
+ x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
+ x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
+ x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
+ x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
+ x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
+ x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
+ x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
+ x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
+ x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
+ x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
+ x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
+ x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
+ x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
+ x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
+ x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
+ x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
+ x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
+ x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
+ x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
+ x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
+ x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
+ x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
+ x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
+ x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
+ x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
+ x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
+ x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
+ x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
+
+ // 2X round -------------------------------------------------------------
+ x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
+ x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
+ x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
+ x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
+ x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
+ x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
+ x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
+ x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
+ x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
+ x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
+ x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
+ x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
+ x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
+ x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
+ x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
+ x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
+ x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
+ x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
+ x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
+ x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
+ x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
+ x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
+ x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
+ x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
+ x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
+ x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
+ x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
+ x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
+ x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
+ x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
+
+ // 2X round -------------------------------------------------------------
+ x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
+ x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
+ x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
+ x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
+ x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
+ x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
+ x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
+ x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
+ x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
+ x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
+ x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
+ x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
+ x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
+ x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
+ x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
+ x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
+ x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
+ x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
+ x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
+ x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
+ x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
+ x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
+ x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
+ x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
+ x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
+ x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
+ x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
+ x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
+ x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
+ x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
+ x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
+
x0 = PLUS(x0,j0);
x1 = PLUS(x1,j1);
x2 = PLUS(x2,j2);
diff --git a/node/Salsa20.hpp b/node/Salsa20.hpp
index 84baf3da..a2082bea 100644
--- a/node/Salsa20.hpp
+++ b/node/Salsa20.hpp
@@ -35,12 +35,11 @@ public:
* @param key Key bits
* @param kbits Number of key bits: 128 or 256 (recommended)
* @param iv 64-bit initialization vector
- * @param rounds Number of rounds: 8, 12, or 20
*/
- Salsa20(const void *key,unsigned int kbits,const void *iv,unsigned int rounds)
+ Salsa20(const void *key,unsigned int kbits,const void *iv)
throw()
{
- init(key,kbits,iv,rounds);
+ init(key,kbits,iv);
}
/**
@@ -49,21 +48,43 @@ public:
* @param key Key bits
* @param kbits Number of key bits: 128 or 256 (recommended)
* @param iv 64-bit initialization vector
- * @param rounds Number of rounds: 8, 12, or 20
*/
- void init(const void *key,unsigned int kbits,const void *iv,unsigned int rounds)
+ void init(const void *key,unsigned int kbits,const void *iv)
throw();
/**
- * Encrypt data
+ * Encrypt data using Salsa20/12
*
* @param in Input data
* @param out Output buffer
* @param bytes Length of data
*/
- void encrypt(const void *in,void *out,unsigned int bytes)
+ void encrypt12(const void *in,void *out,unsigned int bytes)
throw();
+ /**
+ * Encrypt data using Salsa20/20
+ *
+ * @param in Input data
+ * @param out Output buffer
+ * @param bytes Length of data
+ */
+ void encrypt20(const void *in,void *out,unsigned int bytes)
+ throw();
+
+ /**
+ * Decrypt data
+ *
+ * @param in Input data
+ * @param out Output buffer
+ * @param bytes Length of data
+ */
+ inline void decrypt12(const void *in,void *out,unsigned int bytes)
+ throw()
+ {
+ encrypt12(in,out,bytes);
+ }
+
/**
* Decrypt data
*
@@ -71,10 +92,10 @@ public:
* @param out Output buffer
* @param bytes Length of data
*/
- inline void decrypt(const void *in,void *out,unsigned int bytes)
+ inline void decrypt20(const void *in,void *out,unsigned int bytes)
throw()
{
- encrypt(in,out,bytes);
+ encrypt20(in,out,bytes);
}
private:
@@ -84,7 +105,6 @@ private:
#endif // ZT_SALSA20_SSE
uint32_t i[16];
} _state;
- unsigned int _roundsDiv4;
};
} // namespace ZeroTier
diff --git a/selftest.cpp b/selftest.cpp
index 090839ee..4ba76c0b 100644
--- a/selftest.cpp
+++ b/selftest.cpp
@@ -162,27 +162,27 @@ static int testCrypto()
memset(buf2,0,sizeof(buf2));
memset(buf3,0,sizeof(buf3));
Salsa20 s20;
- s20.init("12345678123456781234567812345678",256,"12345678",20);
- s20.encrypt(buf1,buf2,sizeof(buf1));
- s20.init("12345678123456781234567812345678",256,"12345678",20);
- s20.decrypt(buf2,buf3,sizeof(buf2));
+ s20.init("12345678123456781234567812345678",256,"12345678");
+ s20.encrypt20(buf1,buf2,sizeof(buf1));
+ s20.init("12345678123456781234567812345678",256,"12345678");
+ s20.decrypt20(buf2,buf3,sizeof(buf2));
if (memcmp(buf1,buf3,sizeof(buf1))) {
std::cout << "FAIL (encrypt/decrypt test)" << std::endl;
return -1;
}
}
- Salsa20 s20(s20TV0Key,256,s20TV0Iv,20);
+ Salsa20 s20(s20TV0Key,256,s20TV0Iv);
memset(buf1,0,sizeof(buf1));
memset(buf2,0,sizeof(buf2));
- s20.encrypt(buf1,buf2,64);
+ s20.encrypt20(buf1,buf2,64);
if (memcmp(buf2,s20TV0Ks,64)) {
std::cout << "FAIL (test vector 0)" << std::endl;
return -1;
}
- s20.init(s2012TV0Key,256,s2012TV0Iv,12);
+ s20.init(s2012TV0Key,256,s2012TV0Iv);
memset(buf1,0,sizeof(buf1));
memset(buf2,0,sizeof(buf2));
- s20.encrypt(buf1,buf2,64);
+ s20.encrypt12(buf1,buf2,64);
if (memcmp(buf2,s2012TV0Ks,64)) {
std::cout << "FAIL (test vector 1)" << std::endl;
return -1;
@@ -195,34 +195,16 @@ static int testCrypto()
std::cout << "[crypto] Salsa20 SSE: DISABLED" << std::endl;
#endif
- std::cout << "[crypto] Benchmarking Salsa20/8... "; std::cout.flush();
- {
- unsigned char *bb = (unsigned char *)::malloc(1234567);
- for(unsigned int i=0;i<1234567;++i)
- bb[i] = (unsigned char)i;
- Salsa20 s20(s20TV0Key,256,s20TV0Iv,8);
- double bytes = 0.0;
- uint64_t start = OSUtils::now();
- for(unsigned int i=0;i<200;++i) {
- s20.encrypt(bb,bb,1234567);
- bytes += 1234567.0;
- }
- uint64_t end = OSUtils::now();
- SHA512::hash(buf1,bb,1234567);
- std::cout << ((bytes / 1048576.0) / ((double)(end - start) / 1000.0)) << " MiB/second (" << Utils::hex(buf1,16) << ')' << std::endl;
- ::free((void *)bb);
- }
-
std::cout << "[crypto] Benchmarking Salsa20/12... "; std::cout.flush();
{
unsigned char *bb = (unsigned char *)::malloc(1234567);
for(unsigned int i=0;i<1234567;++i)
bb[i] = (unsigned char)i;
- Salsa20 s20(s20TV0Key,256,s20TV0Iv,12);
+ Salsa20 s20(s20TV0Key,256,s20TV0Iv);
double bytes = 0.0;
uint64_t start = OSUtils::now();
for(unsigned int i=0;i<200;++i) {
- s20.encrypt(bb,bb,1234567);
+ s20.encrypt12(bb,bb,1234567);
bytes += 1234567.0;
}
uint64_t end = OSUtils::now();
@@ -236,11 +218,11 @@ static int testCrypto()
unsigned char *bb = (unsigned char *)::malloc(1234567);
for(unsigned int i=0;i<1234567;++i)
bb[i] = (unsigned char)i;
- Salsa20 s20(s20TV0Key,256,s20TV0Iv,20);
+ Salsa20 s20(s20TV0Key,256,s20TV0Iv);
double bytes = 0.0;
uint64_t start = OSUtils::now();
for(unsigned int i=0;i<200;++i) {
- s20.encrypt(bb,bb,1234567);
+ s20.encrypt20(bb,bb,1234567);
bytes += 1234567.0;
}
uint64_t end = OSUtils::now();
--
cgit v1.2.3
From c2bbec2f050da996f660f2ae28b365330ebff633 Mon Sep 17 00:00:00 2001
From: Adam Ierymenko
Date: Fri, 9 Oct 2015 10:14:20 -0700
Subject: Docker example (and useful for testing)
---
.gitignore | 2 ++
examples/docker/Dockerfile | 19 +++++++++++++++++++
examples/docker/README.md | 8 ++++++++
examples/docker/main.sh | 25 +++++++++++++++++++++++++
4 files changed, 54 insertions(+)
create mode 100644 examples/docker/Dockerfile
create mode 100644 examples/docker/README.md
create mode 100644 examples/docker/main.sh
diff --git a/.gitignore b/.gitignore
index 498119e3..b5d71690 100755
--- a/.gitignore
+++ b/.gitignore
@@ -29,6 +29,8 @@ Thumbs.db
# *nix/Mac build droppings
/build-*
/ZeroTierOneInstaller-*
+/examples/docker/zerotier-one
+/examples/docker/test-*.env
# Miscellaneous file types that we don't want to check in
*.log
diff --git a/examples/docker/Dockerfile b/examples/docker/Dockerfile
new file mode 100644
index 00000000..a4274924
--- /dev/null
+++ b/examples/docker/Dockerfile
@@ -0,0 +1,19 @@
+FROM centos:7
+
+MAINTAINER https://www.zerotier.com/
+
+RUN yum -y update && yum clean all
+
+EXPOSE 9993/udp
+
+RUN mkdir -p /var/lib/zerotier-one
+RUN mkdir -p /var/lib/zerotier-one/networks.d
+RUN ln -sf /var/lib/zerotier-one/zerotier-one /usr/local/bin/zerotier-cli
+RUN ln -sf /var/lib/zerotier-one/zerotier-one /usr/local/bin/zerotier-idtool
+
+ADD zerotier-one /var/lib/zerotier-one/
+
+ADD main.sh /
+RUN chmod a+x /main.sh
+
+CMD ["./main.sh"]
diff --git a/examples/docker/README.md b/examples/docker/README.md
new file mode 100644
index 00000000..4dae52f3
--- /dev/null
+++ b/examples/docker/README.md
@@ -0,0 +1,8 @@
+Simple Dockerfile Example
+======
+
+This is a simple Docker example using ZeroTier One in normal tun/tap mode. It uses a Dockerfile to build an image containing ZeroTier One and a main.sh that launches it with an identity supplied via the Docker environment via the ZEROTIER\_IDENTITY\_SECRET and ZEROTIER\_NETWORK variables. The Dockerfile assumes that the zerotier-one binary is in the build folder.
+
+This is not a very secure way to load an identity secret, but it's useful for testing since it allows you to repeatedly launch Docker containers with the same identity. For production we'd recommend using something like Hashicorp Vault, or modifying main.sh to leave identities unspecified and allow the container to generate a new identity at runtime. Then you could script approval of containers using the controller API, approving them as they launch, etc. (We are working on better ways of doing mass provisioning.)
+
+To use in normal tun/tap mode with Docker, containers must be run with the options "--device=/dev/net/tun --cap-add=NET_ADMIN". The main.sh script supplied here will complain and exit if these options are not present (no /dev/net/tun device).
diff --git a/examples/docker/main.sh b/examples/docker/main.sh
new file mode 100644
index 00000000..e9febb13
--- /dev/null
+++ b/examples/docker/main.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+export PATH=/bin:/usr/bin:/usr/local/bin:/sbin:/usr/sbin
+
+if [ ! -c "/dev/net/tun" ]; then
+ echo 'FATAL: must be docker run with: --device=/dev/net/tun --cap-add=NET_ADMIN'
+ exit 1
+fi
+
+if [ -z "$ZEROTIER_IDENTITY_SECRET" ]; then
+ echo 'FATAL: ZEROTIER_IDENTITY_SECRET not set -- aborting!'
+ exit 1
+fi
+
+if [ -z "$ZEROTIER_NETWORK" ]; then
+ echo 'Warning: ZEROTIER_NETWORK not set, you will need to docker exec zerotier-cli to join a network.'
+else
+ # The existence of a .conf will cause the service to "remember" this network
+ touch /var/lib/zerotier-one/networks.d/$ZEROTIER_NETWORK.conf
+fi
+
+rm -f /var/lib/zerotier-one/identity.*
+echo "$ZEROTIER_IDENTITY_SECRET" >identity.secret
+
+/var/lib/zerotier-one/zerotier-one
--
cgit v1.2.3
From 9a2565115119f4c56bada376974ed77c6b2661c7 Mon Sep 17 00:00:00 2001
From: Adam Ierymenko
Date: Fri, 9 Oct 2015 10:14:45 -0700
Subject: .
---
examples/docker/README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/docker/README.md b/examples/docker/README.md
index 4dae52f3..fbc93481 100644
--- a/examples/docker/README.md
+++ b/examples/docker/README.md
@@ -5,4 +5,4 @@ This is a simple Docker example using ZeroTier One in normal tun/tap mode. It us
This is not a very secure way to load an identity secret, but it's useful for testing since it allows you to repeatedly launch Docker containers with the same identity. For production we'd recommend using something like Hashicorp Vault, or modifying main.sh to leave identities unspecified and allow the container to generate a new identity at runtime. Then you could script approval of containers using the controller API, approving them as they launch, etc. (We are working on better ways of doing mass provisioning.)
-To use in normal tun/tap mode with Docker, containers must be run with the options "--device=/dev/net/tun --cap-add=NET_ADMIN". The main.sh script supplied here will complain and exit if these options are not present (no /dev/net/tun device).
+To use in normal tun/tap mode with Docker, containers must be run with the options "--device=/dev/net/tun --privileged". The main.sh script supplied here will complain and exit if these options are not present (no /dev/net/tun device).
--
cgit v1.2.3
From e33adad8f5b1bb64cc4c5b318a8fb95077407419 Mon Sep 17 00:00:00 2001
From: Adam Ierymenko
Date: Fri, 9 Oct 2015 12:15:42 -0700
Subject: Script to quickly generate test docker env files.
---
examples/docker/maketestenv.sh | 11 +++++++++++
1 file changed, 11 insertions(+)
create mode 100755 examples/docker/maketestenv.sh
diff --git a/examples/docker/maketestenv.sh b/examples/docker/maketestenv.sh
new file mode 100755
index 00000000..275692e1
--- /dev/null
+++ b/examples/docker/maketestenv.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+if [ -z "$1" -o -z "$2" ]; then
+ echo 'Usage: maketestenv.sh