diff options
author | Adam Ierymenko <adam.ierymenko@zerotier.com> | 2018-04-25 06:39:02 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-04-25 06:39:02 -0700 |
commit | 42ec780a6f6eedef4d8b1d8218bd72fc6ed75cc0 (patch) | |
tree | 7bf86c4d92d6a0f77eced79bfc33313c62c7b6dd /node/Salsa20.hpp | |
parent | 18c9dc8a0649c866eff9f299f20fa5b19c502e52 (diff) | |
parent | 4608880fb06700822d01e9e5d6729fcdeb82b64b (diff) | |
download | infinitytier-42ec780a6f6eedef4d8b1d8218bd72fc6ed75cc0.tar.gz infinitytier-42ec780a6f6eedef4d8b1d8218bd72fc6ed75cc0.zip |
Merge branch 'dev' into netbsd-support
Diffstat (limited to 'node/Salsa20.hpp')
-rw-r--r-- | node/Salsa20.hpp | 128 |
1 files changed, 86 insertions, 42 deletions
diff --git a/node/Salsa20.hpp b/node/Salsa20.hpp index 7e4c1e53..bfb6d9d9 100644 --- a/node/Salsa20.hpp +++ b/node/Salsa20.hpp @@ -10,6 +10,7 @@ #include <stdio.h> #include <stdint.h> #include <stdlib.h> +#include <string.h> #include "Constants.hpp" #include "Utils.hpp" @@ -30,76 +31,119 @@ namespace ZeroTier { class Salsa20 { public: - Salsa20() throw() {} - + Salsa20() {} ~Salsa20() { Utils::burn(&_state,sizeof(_state)); } /** - * @param key Key bits - * @param kbits Number of key bits: 128 or 256 (recommended) - * @param iv 64-bit initialization vector + * XOR d with s + * + * This is done efficiently using e.g. SSE if available. It's used when + * alternative Salsa20 implementations are used in Packet and is here + * since this is where all the SSE stuff is already included. + * + * @param d Destination to XOR + * @param s Source bytes to XOR with destination + * @param len Length of s and d */ - Salsa20(const void *key,unsigned int kbits,const void *iv) - throw() + static inline void memxor(uint8_t *d,const uint8_t *s,unsigned int len) { - init(key,kbits,iv); +#ifdef ZT_SALSA20_SSE + while (len >= 128) { + __m128i s0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s)); + __m128i s1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 16)); + __m128i s2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 32)); + __m128i s3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 48)); + __m128i s4 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 64)); + __m128i s5 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 80)); + __m128i s6 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 96)); + __m128i s7 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 112)); + __m128i d0 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d)); + __m128i d1 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 16)); + __m128i d2 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 32)); + __m128i d3 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 48)); + __m128i d4 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 64)); + __m128i d5 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 80)); + __m128i d6 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 96)); + __m128i d7 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 112)); + d0 = _mm_xor_si128(d0,s0); + d1 = _mm_xor_si128(d1,s1); + d2 = _mm_xor_si128(d2,s2); + d3 = _mm_xor_si128(d3,s3); + d4 = _mm_xor_si128(d4,s4); + d5 = _mm_xor_si128(d5,s5); + d6 = _mm_xor_si128(d6,s6); + d7 = _mm_xor_si128(d7,s7); + _mm_storeu_si128(reinterpret_cast<__m128i *>(d),d0); + _mm_storeu_si128(reinterpret_cast<__m128i *>(d + 16),d1); + _mm_storeu_si128(reinterpret_cast<__m128i *>(d + 32),d2); + _mm_storeu_si128(reinterpret_cast<__m128i *>(d + 48),d3); + _mm_storeu_si128(reinterpret_cast<__m128i *>(d + 64),d4); + _mm_storeu_si128(reinterpret_cast<__m128i *>(d + 80),d5); + _mm_storeu_si128(reinterpret_cast<__m128i *>(d + 96),d6); + _mm_storeu_si128(reinterpret_cast<__m128i *>(d + 112),d7); + s += 128; + d += 128; + len -= 128; + } + while (len >= 16) { + _mm_storeu_si128(reinterpret_cast<__m128i *>(d),_mm_xor_si128(_mm_loadu_si128(reinterpret_cast<__m128i *>(d)),_mm_loadu_si128(reinterpret_cast<const __m128i *>(s)))); + s += 16; + d += 16; + len -= 16; + } +#else +#ifndef ZT_NO_TYPE_PUNNING + while (len >= 16) { + (*reinterpret_cast<uint64_t *>(d)) ^= (*reinterpret_cast<const uint64_t *>(s)); + s += 8; + d += 8; + (*reinterpret_cast<uint64_t *>(d)) ^= (*reinterpret_cast<const uint64_t *>(s)); + s += 8; + d += 8; + len -= 16; + } +#endif +#endif + while (len) { + --len; + *(d++) ^= *(s++); + } } /** - * Initialize cipher - * - * @param key Key bits - * @param kbits Number of key bits: 128 or 256 (recommended) + * @param key 256-bit (32 byte) key * @param iv 64-bit initialization vector */ - void init(const void *key,unsigned int kbits,const void *iv) - throw(); - - /** - * Encrypt data using Salsa20/12 - * - * @param in Input data - * @param out Output buffer - * @param bytes Length of data - */ - void encrypt12(const void *in,void *out,unsigned int bytes) - throw(); + Salsa20(const void *key,const void *iv) + { + init(key,iv); + } /** - * Encrypt data using Salsa20/20 + * Initialize cipher * - * @param in Input data - * @param out Output buffer - * @param bytes Length of data + * @param key Key bits + * @param iv 64-bit initialization vector */ - void encrypt20(const void *in,void *out,unsigned int bytes) - throw(); + void init(const void *key,const void *iv); /** - * Decrypt data + * Encrypt/decrypt data using Salsa20/12 * * @param in Input data * @param out Output buffer * @param bytes Length of data */ - inline void decrypt12(const void *in,void *out,unsigned int bytes) - throw() - { - encrypt12(in,out,bytes); - } + void crypt12(const void *in,void *out,unsigned int bytes); /** - * Decrypt data + * Encrypt/decrypt data using Salsa20/20 * * @param in Input data * @param out Output buffer * @param bytes Length of data */ - inline void decrypt20(const void *in,void *out,unsigned int bytes) - throw() - { - encrypt20(in,out,bytes); - } + void crypt20(const void *in,void *out,unsigned int bytes); private: union { |