summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ext/arm32-neon-salsa2012-asm/salsa2012.h8
-rw-r--r--make-linux.mk13
-rw-r--r--selftest.cpp23
3 files changed, 42 insertions, 2 deletions
diff --git a/ext/arm32-neon-salsa2012-asm/salsa2012.h b/ext/arm32-neon-salsa2012-asm/salsa2012.h
index 7820a2e6..719b2e0c 100644
--- a/ext/arm32-neon-salsa2012-asm/salsa2012.h
+++ b/ext/arm32-neon-salsa2012-asm/salsa2012.h
@@ -1,6 +1,14 @@
#ifndef ZT_SALSA2012_ARM32NEON_ASM
#define ZT_SALSA2012_ARM32NEON_ASM
+#if defined(__linux__) || defined(linux) || defined(__LINUX__) || defined(__linux)
+#include <sys/auxv.h>
+#include <asm/hwcap.h>
+#define zt_arm_has_neon() (getauxval(AT_HWCAP) & HWCAP_NEON)
+#else
+#define zt_arm_has_neon() (true)
+#endif
+
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/make-linux.mk b/make-linux.mk
index 2d2f023f..87d29afe 100644
--- a/make-linux.mk
+++ b/make-linux.mk
@@ -98,30 +98,37 @@ endif
ifeq ($(CC_MACH),arm)
ZT_ARCHITECTURE=3
override DEFS+=-DZT_NO_TYPE_PUNNING
+ ZT_USE_ARM32_NEON_ASM_SALSA2012=1
endif
ifeq ($(CC_MACH),armel)
ZT_ARCHITECTURE=3
override DEFS+=-DZT_NO_TYPE_PUNNING
+ ZT_USE_ARM32_NEON_ASM_SALSA2012=1
endif
ifeq ($(CC_MACH),armhf)
ZT_ARCHITECTURE=3
override DEFS+=-DZT_NO_TYPE_PUNNING
+ ZT_USE_ARM32_NEON_ASM_SALSA2012=1
endif
ifeq ($(CC_MACH),armv6)
ZT_ARCHITECTURE=3
override DEFS+=-DZT_NO_TYPE_PUNNING
+ ZT_USE_ARM32_NEON_ASM_SALSA2012=1
endif
ifeq ($(CC_MACH),armv6zk)
ZT_ARCHITECTURE=3
override DEFS+=-DZT_NO_TYPE_PUNNING
+ ZT_USE_ARM32_NEON_ASM_SALSA2012=1
endif
ifeq ($(CC_MACH),armv6kz)
ZT_ARCHITECTURE=3
override DEFS+=-DZT_NO_TYPE_PUNNING
+ ZT_USE_ARM32_NEON_ASM_SALSA2012=1
endif
ifeq ($(CC_MACH),armv7)
ZT_ARCHITECTURE=3
override DEFS+=-DZT_NO_TYPE_PUNNING
+ ZT_USE_ARM32_NEON_ASM_SALSA2012=1
endif
ifeq ($(CC_MACH),arm64)
ZT_ARCHITECTURE=4
@@ -158,11 +165,15 @@ endif
# Disable software updates by default on Linux since that is normally done with package management
override DEFS+=-DZT_BUILD_PLATFORM=1 -DZT_BUILD_ARCHITECTURE=$(ZT_ARCHITECTURE) -DZT_SOFTWARE_UPDATE_DEFAULT="\"disable\""
-# Use X64 ASM Salsa20/12 on X86_64 target
+# Build faster crypto on some targets
ifeq ($(ZT_USE_X64_ASM_SALSA2012),1)
override DEFS+=-DZT_USE_X64_ASM_SALSA2012
override OBJS+=ext/x64-salsa2012-asm/salsa2012.o
endif
+ifeq ($(ZT_USE_ARM32_NEON_ASM_SALSA2012),1)
+ override DEFS+=-DZT_USE_ARM32_NEON_ASM_SALSA2012
+ override OBJS+=ext/arm32-neon-salsa2012-asm/salsa2012.o
+endif
# Static builds, which are currently done for a number of Linux targets
ifeq ($(ZT_STATIC),1)
diff --git a/selftest.cpp b/selftest.cpp
index e68cf047..6df2e272 100644
--- a/selftest.cpp
+++ b/selftest.cpp
@@ -57,6 +57,9 @@
#ifdef ZT_USE_X64_ASM_SALSA2012
#include "ext/x64-salsa2012-asm/salsa2012.h"
#endif
+#ifdef ZT_USE_ARM32_NEON_ASM_SALSA2012
+#include "ext/arm32-neon-salsa2012-asm/salsa2012.h"
+#endif
#ifdef __WINDOWS__
#include <tchar.h>
@@ -215,7 +218,7 @@ static int testCrypto()
double bytes = 0.0;
uint64_t start = OSUtils::now();
for(unsigned int i=0;i<200;++i) {
- zt_salsa2012_amd64_xmm6(bb, 1234567, s20TV0Iv, s20TV0Key);
+ zt_salsa2012_amd64_xmm6(bb,1234567,s20TV0Iv,s20TV0Key);
bytes += 1234567.0;
}
uint64_t end = OSUtils::now();
@@ -224,6 +227,24 @@ static int testCrypto()
}
#endif
+#ifdef ZT_USE_ARM32_NEON_ASM_SALSA2012
+ if (zt_arm_has_neon()) {
+ std::cout << "[crypto] Benchmarking Salsa20/12 fast arm32/neon ASM... "; std::cout.flush();
+ {
+ unsigned char *bb = (unsigned char *)::malloc(1234567);
+ double bytes = 0.0;
+ uint64_t start = OSUtils::now();
+ for(unsigned int i=0;i<200;++i) {
+ zt_salsa2012_armneon3_xor(bb,(const unsigned char *)0,1234567,s20TV0Iv,s20TV0Key);
+ bytes += 1234567.0;
+ }
+ uint64_t end = OSUtils::now();
+ std::cout << ((bytes / 1048576.0) / ((double)(end - start) / 1024.0)) << " MiB/second" << std::endl;
+ ::free((void *)bb);
+ }
+ }
+#endif
+
std::cout << "[crypto] Benchmarking Salsa20/20... "; std::cout.flush();
{
unsigned char *bb = (unsigned char *)::malloc(1234567);