summaryrefslogtreecommitdiff
path: root/node/Salsa20.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'node/Salsa20.hpp')
-rw-r--r--node/Salsa20.hpp128
1 files changed, 86 insertions, 42 deletions
diff --git a/node/Salsa20.hpp b/node/Salsa20.hpp
index 7e4c1e53..bfb6d9d9 100644
--- a/node/Salsa20.hpp
+++ b/node/Salsa20.hpp
@@ -10,6 +10,7 @@
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
+#include <string.h>
#include "Constants.hpp"
#include "Utils.hpp"
@@ -30,76 +31,119 @@ namespace ZeroTier {
class Salsa20
{
public:
- Salsa20() throw() {}
-
+ Salsa20() {}
~Salsa20() { Utils::burn(&_state,sizeof(_state)); }
/**
- * @param key Key bits
- * @param kbits Number of key bits: 128 or 256 (recommended)
- * @param iv 64-bit initialization vector
+ * XOR d with s
+ *
+ * This is done efficiently using e.g. SSE if available. It's used when
+ * alternative Salsa20 implementations are used in Packet and is here
+ * since this is where all the SSE stuff is already included.
+ *
+ * @param d Destination to XOR
+ * @param s Source bytes to XOR with destination
+ * @param len Length of s and d
*/
- Salsa20(const void *key,unsigned int kbits,const void *iv)
- throw()
+ static inline void memxor(uint8_t *d,const uint8_t *s,unsigned int len)
{
- init(key,kbits,iv);
+#ifdef ZT_SALSA20_SSE
+ while (len >= 128) {
+ __m128i s0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s));
+ __m128i s1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 16));
+ __m128i s2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 32));
+ __m128i s3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 48));
+ __m128i s4 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 64));
+ __m128i s5 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 80));
+ __m128i s6 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 96));
+ __m128i s7 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 112));
+ __m128i d0 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d));
+ __m128i d1 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 16));
+ __m128i d2 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 32));
+ __m128i d3 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 48));
+ __m128i d4 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 64));
+ __m128i d5 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 80));
+ __m128i d6 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 96));
+ __m128i d7 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 112));
+ d0 = _mm_xor_si128(d0,s0);
+ d1 = _mm_xor_si128(d1,s1);
+ d2 = _mm_xor_si128(d2,s2);
+ d3 = _mm_xor_si128(d3,s3);
+ d4 = _mm_xor_si128(d4,s4);
+ d5 = _mm_xor_si128(d5,s5);
+ d6 = _mm_xor_si128(d6,s6);
+ d7 = _mm_xor_si128(d7,s7);
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(d),d0);
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(d + 16),d1);
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(d + 32),d2);
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(d + 48),d3);
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(d + 64),d4);
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(d + 80),d5);
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(d + 96),d6);
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(d + 112),d7);
+ s += 128;
+ d += 128;
+ len -= 128;
+ }
+ while (len >= 16) {
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(d),_mm_xor_si128(_mm_loadu_si128(reinterpret_cast<__m128i *>(d)),_mm_loadu_si128(reinterpret_cast<const __m128i *>(s))));
+ s += 16;
+ d += 16;
+ len -= 16;
+ }
+#else
+#ifndef ZT_NO_TYPE_PUNNING
+ while (len >= 16) {
+ (*reinterpret_cast<uint64_t *>(d)) ^= (*reinterpret_cast<const uint64_t *>(s));
+ s += 8;
+ d += 8;
+ (*reinterpret_cast<uint64_t *>(d)) ^= (*reinterpret_cast<const uint64_t *>(s));
+ s += 8;
+ d += 8;
+ len -= 16;
+ }
+#endif
+#endif
+ while (len) {
+ --len;
+ *(d++) ^= *(s++);
+ }
}
/**
- * Initialize cipher
- *
- * @param key Key bits
- * @param kbits Number of key bits: 128 or 256 (recommended)
+ * @param key 256-bit (32 byte) key
* @param iv 64-bit initialization vector
*/
- void init(const void *key,unsigned int kbits,const void *iv)
- throw();
-
- /**
- * Encrypt data using Salsa20/12
- *
- * @param in Input data
- * @param out Output buffer
- * @param bytes Length of data
- */
- void encrypt12(const void *in,void *out,unsigned int bytes)
- throw();
+ Salsa20(const void *key,const void *iv)
+ {
+ init(key,iv);
+ }
/**
- * Encrypt data using Salsa20/20
+ * Initialize cipher
*
- * @param in Input data
- * @param out Output buffer
- * @param bytes Length of data
+ * @param key Key bits
+ * @param iv 64-bit initialization vector
*/
- void encrypt20(const void *in,void *out,unsigned int bytes)
- throw();
+ void init(const void *key,const void *iv);
/**
- * Decrypt data
+ * Encrypt/decrypt data using Salsa20/12
*
* @param in Input data
* @param out Output buffer
* @param bytes Length of data
*/
- inline void decrypt12(const void *in,void *out,unsigned int bytes)
- throw()
- {
- encrypt12(in,out,bytes);
- }
+ void crypt12(const void *in,void *out,unsigned int bytes);
/**
- * Decrypt data
+ * Encrypt/decrypt data using Salsa20/20
*
* @param in Input data
* @param out Output buffer
* @param bytes Length of data
*/
- inline void decrypt20(const void *in,void *out,unsigned int bytes)
- throw()
- {
- encrypt20(in,out,bytes);
- }
+ void crypt20(const void *in,void *out,unsigned int bytes);
private:
union {