summaryrefslogtreecommitdiff
path: root/src/libstrongswan/plugins/aesni
diff options
context:
space:
mode:
Diffstat (limited to 'src/libstrongswan/plugins/aesni')
-rw-r--r--src/libstrongswan/plugins/aesni/Makefile.am26
-rw-r--r--src/libstrongswan/plugins/aesni/Makefile.in793
-rw-r--r--src/libstrongswan/plugins/aesni/aesni_cbc.c671
-rw-r--r--src/libstrongswan/plugins/aesni/aesni_cbc.h48
-rw-r--r--src/libstrongswan/plugins/aesni/aesni_ccm.c914
-rw-r--r--src/libstrongswan/plugins/aesni/aesni_ccm.h50
-rw-r--r--src/libstrongswan/plugins/aesni/aesni_cmac.c371
-rw-r--r--src/libstrongswan/plugins/aesni/aesni_cmac.h52
-rw-r--r--src/libstrongswan/plugins/aesni/aesni_ctr.c643
-rw-r--r--src/libstrongswan/plugins/aesni/aesni_ctr.h48
-rw-r--r--src/libstrongswan/plugins/aesni/aesni_gcm.c1447
-rw-r--r--src/libstrongswan/plugins/aesni/aesni_gcm.h50
-rw-r--r--src/libstrongswan/plugins/aesni/aesni_key.c301
-rw-r--r--src/libstrongswan/plugins/aesni/aesni_key.h65
-rw-r--r--src/libstrongswan/plugins/aesni/aesni_plugin.c125
-rw-r--r--src/libstrongswan/plugins/aesni/aesni_plugin.h42
-rw-r--r--src/libstrongswan/plugins/aesni/aesni_xcbc.c367
-rw-r--r--src/libstrongswan/plugins/aesni/aesni_xcbc.h52
18 files changed, 6065 insertions, 0 deletions
diff --git a/src/libstrongswan/plugins/aesni/Makefile.am b/src/libstrongswan/plugins/aesni/Makefile.am
new file mode 100644
index 000000000..2fe85c66c
--- /dev/null
+++ b/src/libstrongswan/plugins/aesni/Makefile.am
@@ -0,0 +1,26 @@
+AM_CPPFLAGS = \
+ -I$(top_srcdir)/src/libstrongswan
+
+AM_CFLAGS = \
+ -maes \
+ -mpclmul \
+ -mssse3 \
+ $(PLUGIN_CFLAGS)
+
+if MONOLITHIC
+noinst_LTLIBRARIES = libstrongswan-aesni.la
+else
+plugin_LTLIBRARIES = libstrongswan-aesni.la
+endif
+
+libstrongswan_aesni_la_SOURCES = \
+ aesni_key.h aesni_key.c \
+ aesni_cbc.h aesni_cbc.c \
+ aesni_ctr.h aesni_ctr.c \
+ aesni_ccm.h aesni_ccm.c \
+ aesni_gcm.h aesni_gcm.c \
+ aesni_xcbc.h aesni_xcbc.c \
+ aesni_cmac.h aesni_cmac.c \
+ aesni_plugin.h aesni_plugin.c
+
+libstrongswan_aesni_la_LDFLAGS = -module -avoid-version
diff --git a/src/libstrongswan/plugins/aesni/Makefile.in b/src/libstrongswan/plugins/aesni/Makefile.in
new file mode 100644
index 000000000..34adaa390
--- /dev/null
+++ b/src/libstrongswan/plugins/aesni/Makefile.in
@@ -0,0 +1,793 @@
+# Makefile.in generated by automake 1.14.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src/libstrongswan/plugins/aesni
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
+ $(top_srcdir)/depcomp
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/config/libtool.m4 \
+ $(top_srcdir)/m4/config/ltoptions.m4 \
+ $(top_srcdir)/m4/config/ltsugar.m4 \
+ $(top_srcdir)/m4/config/ltversion.m4 \
+ $(top_srcdir)/m4/config/lt~obsolete.m4 \
+ $(top_srcdir)/m4/macros/split-package-version.m4 \
+ $(top_srcdir)/m4/macros/with.m4 \
+ $(top_srcdir)/m4/macros/enable-disable.m4 \
+ $(top_srcdir)/m4/macros/add-plugin.m4 \
+ $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+am__installdirs = "$(DESTDIR)$(plugindir)"
+LTLIBRARIES = $(noinst_LTLIBRARIES) $(plugin_LTLIBRARIES)
+libstrongswan_aesni_la_LIBADD =
+am_libstrongswan_aesni_la_OBJECTS = aesni_key.lo aesni_cbc.lo \
+ aesni_ctr.lo aesni_ccm.lo aesni_gcm.lo aesni_xcbc.lo \
+ aesni_cmac.lo aesni_plugin.lo
+libstrongswan_aesni_la_OBJECTS = $(am_libstrongswan_aesni_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+libstrongswan_aesni_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
+ $(AM_CFLAGS) $(CFLAGS) $(libstrongswan_aesni_la_LDFLAGS) \
+ $(LDFLAGS) -o $@
+@MONOLITHIC_FALSE@am_libstrongswan_aesni_la_rpath = -rpath \
+@MONOLITHIC_FALSE@ $(plugindir)
+@MONOLITHIC_TRUE@am_libstrongswan_aesni_la_rpath =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo " CC " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo " CCLD " $@;
+am__v_CCLD_1 =
+SOURCES = $(libstrongswan_aesni_la_SOURCES)
+DIST_SOURCES = $(libstrongswan_aesni_la_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+ALLOCA = @ALLOCA@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BFDLIB = @BFDLIB@
+BTLIB = @BTLIB@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+COVERAGE_CFLAGS = @COVERAGE_CFLAGS@
+COVERAGE_LDFLAGS = @COVERAGE_LDFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLIB = @DLLIB@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+EASY_INSTALL = @EASY_INSTALL@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GEM = @GEM@
+GENHTML = @GENHTML@
+GPERF = @GPERF@
+GPRBUILD = @GPRBUILD@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LCOV = @LCOV@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+MYSQLCFLAG = @MYSQLCFLAG@
+MYSQLCONFIG = @MYSQLCONFIG@
+MYSQLLIB = @MYSQLLIB@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OPENSSL_LIB = @OPENSSL_LIB@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PACKAGE_VERSION_BUILD = @PACKAGE_VERSION_BUILD@
+PACKAGE_VERSION_MAJOR = @PACKAGE_VERSION_MAJOR@
+PACKAGE_VERSION_MINOR = @PACKAGE_VERSION_MINOR@
+PACKAGE_VERSION_REVIEW = @PACKAGE_VERSION_REVIEW@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+PLUGIN_CFLAGS = @PLUGIN_CFLAGS@
+PTHREADLIB = @PTHREADLIB@
+PYTHON = @PYTHON@
+PYTHONEGGINSTALLDIR = @PYTHONEGGINSTALLDIR@
+PYTHON_EXEC_PREFIX = @PYTHON_EXEC_PREFIX@
+PYTHON_PLATFORM = @PYTHON_PLATFORM@
+PYTHON_PREFIX = @PYTHON_PREFIX@
+PYTHON_VERSION = @PYTHON_VERSION@
+PY_TEST = @PY_TEST@
+RANLIB = @RANLIB@
+RTLIB = @RTLIB@
+RUBY = @RUBY@
+RUBYGEMDIR = @RUBYGEMDIR@
+RUBYINCLUDE = @RUBYINCLUDE@
+RUBYLIB = @RUBYLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SOCKLIB = @SOCKLIB@
+STRIP = @STRIP@
+UNWINDLIB = @UNWINDLIB@
+VERSION = @VERSION@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+aikgen_plugins = @aikgen_plugins@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+attest_plugins = @attest_plugins@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+c_plugins = @c_plugins@
+charon_natt_port = @charon_natt_port@
+charon_plugins = @charon_plugins@
+charon_udp_port = @charon_udp_port@
+clearsilver_LIBS = @clearsilver_LIBS@
+cmd_plugins = @cmd_plugins@
+datadir = @datadir@
+datarootdir = @datarootdir@
+dbusservicedir = @dbusservicedir@
+dev_headers = @dev_headers@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+fips_mode = @fips_mode@
+gtk_CFLAGS = @gtk_CFLAGS@
+gtk_LIBS = @gtk_LIBS@
+h_plugins = @h_plugins@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+imcvdir = @imcvdir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+ipsec_script = @ipsec_script@
+ipsec_script_upper = @ipsec_script_upper@
+ipsecdir = @ipsecdir@
+ipsecgroup = @ipsecgroup@
+ipseclibdir = @ipseclibdir@
+ipsecuser = @ipsecuser@
+json_CFLAGS = @json_CFLAGS@
+json_LIBS = @json_LIBS@
+libdir = @libdir@
+libexecdir = @libexecdir@
+libiptc_CFLAGS = @libiptc_CFLAGS@
+libiptc_LIBS = @libiptc_LIBS@
+linux_headers = @linux_headers@
+localedir = @localedir@
+localstatedir = @localstatedir@
+maemo_CFLAGS = @maemo_CFLAGS@
+maemo_LIBS = @maemo_LIBS@
+manager_plugins = @manager_plugins@
+mandir = @mandir@
+medsrv_plugins = @medsrv_plugins@
+mkdir_p = @mkdir_p@
+nm_CFLAGS = @nm_CFLAGS@
+nm_LIBS = @nm_LIBS@
+nm_ca_dir = @nm_ca_dir@
+nm_plugins = @nm_plugins@
+oldincludedir = @oldincludedir@
+pcsclite_CFLAGS = @pcsclite_CFLAGS@
+pcsclite_LIBS = @pcsclite_LIBS@
+pdfdir = @pdfdir@
+piddir = @piddir@
+pkgpyexecdir = @pkgpyexecdir@
+pkgpythondir = @pkgpythondir@
+pki_plugins = @pki_plugins@
+plugindir = @plugindir@
+pool_plugins = @pool_plugins@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+pyexecdir = @pyexecdir@
+pythondir = @pythondir@
+random_device = @random_device@
+resolv_conf = @resolv_conf@
+routing_table = @routing_table@
+routing_table_prio = @routing_table_prio@
+s_plugins = @s_plugins@
+sbindir = @sbindir@
+scepclient_plugins = @scepclient_plugins@
+scripts_plugins = @scripts_plugins@
+sharedstatedir = @sharedstatedir@
+soup_CFLAGS = @soup_CFLAGS@
+soup_LIBS = @soup_LIBS@
+srcdir = @srcdir@
+starter_plugins = @starter_plugins@
+strongswan_conf = @strongswan_conf@
+strongswan_options = @strongswan_options@
+swanctldir = @swanctldir@
+sysconfdir = @sysconfdir@
+systemd_daemon_CFLAGS = @systemd_daemon_CFLAGS@
+systemd_daemon_LIBS = @systemd_daemon_LIBS@
+systemd_journal_CFLAGS = @systemd_journal_CFLAGS@
+systemd_journal_LIBS = @systemd_journal_LIBS@
+systemdsystemunitdir = @systemdsystemunitdir@
+t_plugins = @t_plugins@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+urandom_device = @urandom_device@
+xml_CFLAGS = @xml_CFLAGS@
+xml_LIBS = @xml_LIBS@
+AM_CPPFLAGS = \
+ -I$(top_srcdir)/src/libstrongswan
+
+AM_CFLAGS = \
+ -maes \
+ -mpclmul \
+ -mssse3 \
+ $(PLUGIN_CFLAGS)
+
+@MONOLITHIC_TRUE@noinst_LTLIBRARIES = libstrongswan-aesni.la
+@MONOLITHIC_FALSE@plugin_LTLIBRARIES = libstrongswan-aesni.la
+libstrongswan_aesni_la_SOURCES = \
+ aesni_key.h aesni_key.c \
+ aesni_cbc.h aesni_cbc.c \
+ aesni_ctr.h aesni_ctr.c \
+ aesni_ccm.h aesni_ccm.c \
+ aesni_gcm.h aesni_gcm.c \
+ aesni_xcbc.h aesni_xcbc.c \
+ aesni_cmac.h aesni_cmac.c \
+ aesni_plugin.h aesni_plugin.c
+
+libstrongswan_aesni_la_LDFLAGS = -module -avoid-version
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/libstrongswan/plugins/aesni/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu src/libstrongswan/plugins/aesni/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+ -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+ @list='$(noinst_LTLIBRARIES)'; \
+ locs=`for p in $$list; do echo $$p; done | \
+ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+ sort -u`; \
+ test -z "$$locs" || { \
+ echo rm -f $${locs}; \
+ rm -f $${locs}; \
+ }
+
+install-pluginLTLIBRARIES: $(plugin_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ @list='$(plugin_LTLIBRARIES)'; test -n "$(plugindir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(MKDIR_P) '$(DESTDIR)$(plugindir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(plugindir)" || exit 1; \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(plugindir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(plugindir)"; \
+ }
+
+uninstall-pluginLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(plugin_LTLIBRARIES)'; test -n "$(plugindir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(plugindir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(plugindir)/$$f"; \
+ done
+
+clean-pluginLTLIBRARIES:
+ -test -z "$(plugin_LTLIBRARIES)" || rm -f $(plugin_LTLIBRARIES)
+ @list='$(plugin_LTLIBRARIES)'; \
+ locs=`for p in $$list; do echo $$p; done | \
+ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+ sort -u`; \
+ test -z "$$locs" || { \
+ echo rm -f $${locs}; \
+ rm -f $${locs}; \
+ }
+
+libstrongswan-aesni.la: $(libstrongswan_aesni_la_OBJECTS) $(libstrongswan_aesni_la_DEPENDENCIES) $(EXTRA_libstrongswan_aesni_la_DEPENDENCIES)
+ $(AM_V_CCLD)$(libstrongswan_aesni_la_LINK) $(am_libstrongswan_aesni_la_rpath) $(libstrongswan_aesni_la_OBJECTS) $(libstrongswan_aesni_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aesni_cbc.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aesni_ccm.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aesni_cmac.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aesni_ctr.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aesni_gcm.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aesni_key.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aesni_plugin.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aesni_xcbc.Plo@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\
+@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+ for dir in "$(DESTDIR)$(plugindir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+ clean-pluginLTLIBRARIES mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-pluginLTLIBRARIES
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-pluginLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+ clean-libtool clean-noinstLTLIBRARIES clean-pluginLTLIBRARIES \
+ cscopelist-am ctags ctags-am distclean distclean-compile \
+ distclean-generic distclean-libtool distclean-tags distdir dvi \
+ dvi-am html html-am info info-am install install-am \
+ install-data install-data-am install-dvi install-dvi-am \
+ install-exec install-exec-am install-html install-html-am \
+ install-info install-info-am install-man install-pdf \
+ install-pdf-am install-pluginLTLIBRARIES install-ps \
+ install-ps-am install-strip installcheck installcheck-am \
+ installdirs maintainer-clean maintainer-clean-generic \
+ mostlyclean mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \
+ uninstall-am uninstall-pluginLTLIBRARIES
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/libstrongswan/plugins/aesni/aesni_cbc.c b/src/libstrongswan/plugins/aesni/aesni_cbc.c
new file mode 100644
index 000000000..78ada7663
--- /dev/null
+++ b/src/libstrongswan/plugins/aesni/aesni_cbc.c
@@ -0,0 +1,671 @@
+/*
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2015 revosec AG
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "aesni_cbc.h"
+#include "aesni_key.h"
+
+/**
+ * Pipeline parallelism we use for CBC decryption
+ */
+#define CBC_DECRYPT_PARALLELISM 4
+
+typedef struct private_aesni_cbc_t private_aesni_cbc_t;
+
+/**
+ * CBC en/decryption method type
+ */
+typedef void (*aesni_cbc_fn_t)(aesni_key_t*, u_int, u_char*, u_char*, u_char*);
+
+/**
+ * Private data of an aesni_cbc_t object.
+ */
+struct private_aesni_cbc_t {
+
+ /**
+ * Public aesni_cbc_t interface.
+ */
+ aesni_cbc_t public;
+
+ /**
+ * Key size
+ */
+ u_int key_size;
+
+ /**
+ * Encryption key schedule
+ */
+ aesni_key_t *ekey;
+
+ /**
+ * Decryption key schedule
+ */
+ aesni_key_t *dkey;
+
+ /**
+ * Encryption method
+ */
+ aesni_cbc_fn_t encrypt;
+
+ /**
+ * Decryption method
+ */
+ aesni_cbc_fn_t decrypt;
+};
+
+/**
+ * AES-128 CBC encryption
+ */
+static void encrypt_cbc128(aesni_key_t *key, u_int blocks, u_char *in,
+ u_char *iv, u_char *out)
+{
+ __m128i *ks, t, fb, *bi, *bo;
+ int i;
+
+ ks = key->schedule;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+
+ fb = _mm_loadu_si128((__m128i*)iv);
+ for (i = 0; i < blocks; i++)
+ {
+ t = _mm_loadu_si128(bi + i);
+ fb = _mm_xor_si128(t, fb);
+ fb = _mm_xor_si128(fb, ks[0]);
+
+ fb = _mm_aesenc_si128(fb, ks[1]);
+ fb = _mm_aesenc_si128(fb, ks[2]);
+ fb = _mm_aesenc_si128(fb, ks[3]);
+ fb = _mm_aesenc_si128(fb, ks[4]);
+ fb = _mm_aesenc_si128(fb, ks[5]);
+ fb = _mm_aesenc_si128(fb, ks[6]);
+ fb = _mm_aesenc_si128(fb, ks[7]);
+ fb = _mm_aesenc_si128(fb, ks[8]);
+ fb = _mm_aesenc_si128(fb, ks[9]);
+
+ fb = _mm_aesenclast_si128(fb, ks[10]);
+ _mm_storeu_si128(bo + i, fb);
+ }
+}
+
+/**
+ * AES-128 CBC decryption
+ */
+static void decrypt_cbc128(aesni_key_t *key, u_int blocks, u_char *in,
+ u_char *iv, u_char *out)
+{
+ __m128i *ks, last, *bi, *bo;
+ __m128i t1, t2, t3, t4;
+ __m128i f1, f2, f3, f4;
+ u_int i, pblocks;
+
+ ks = key->schedule;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+ pblocks = blocks - (blocks % CBC_DECRYPT_PARALLELISM);
+
+ f1 = _mm_loadu_si128((__m128i*)iv);
+
+ for (i = 0; i < pblocks; i += CBC_DECRYPT_PARALLELISM)
+ {
+ t1 = _mm_loadu_si128(bi + i + 0);
+ t2 = _mm_loadu_si128(bi + i + 1);
+ t3 = _mm_loadu_si128(bi + i + 2);
+ t4 = _mm_loadu_si128(bi + i + 3);
+
+ f2 = t1;
+ f3 = t2;
+ f4 = t3;
+ last = t4;
+
+ t1 = _mm_xor_si128(t1, ks[0]);
+ t2 = _mm_xor_si128(t2, ks[0]);
+ t3 = _mm_xor_si128(t3, ks[0]);
+ t4 = _mm_xor_si128(t4, ks[0]);
+
+ t1 = _mm_aesdec_si128(t1, ks[1]);
+ t2 = _mm_aesdec_si128(t2, ks[1]);
+ t3 = _mm_aesdec_si128(t3, ks[1]);
+ t4 = _mm_aesdec_si128(t4, ks[1]);
+ t1 = _mm_aesdec_si128(t1, ks[2]);
+ t2 = _mm_aesdec_si128(t2, ks[2]);
+ t3 = _mm_aesdec_si128(t3, ks[2]);
+ t4 = _mm_aesdec_si128(t4, ks[2]);
+ t1 = _mm_aesdec_si128(t1, ks[3]);
+ t2 = _mm_aesdec_si128(t2, ks[3]);
+ t3 = _mm_aesdec_si128(t3, ks[3]);
+ t4 = _mm_aesdec_si128(t4, ks[3]);
+ t1 = _mm_aesdec_si128(t1, ks[4]);
+ t2 = _mm_aesdec_si128(t2, ks[4]);
+ t3 = _mm_aesdec_si128(t3, ks[4]);
+ t4 = _mm_aesdec_si128(t4, ks[4]);
+ t1 = _mm_aesdec_si128(t1, ks[5]);
+ t2 = _mm_aesdec_si128(t2, ks[5]);
+ t3 = _mm_aesdec_si128(t3, ks[5]);
+ t4 = _mm_aesdec_si128(t4, ks[5]);
+ t1 = _mm_aesdec_si128(t1, ks[6]);
+ t2 = _mm_aesdec_si128(t2, ks[6]);
+ t3 = _mm_aesdec_si128(t3, ks[6]);
+ t4 = _mm_aesdec_si128(t4, ks[6]);
+ t1 = _mm_aesdec_si128(t1, ks[7]);
+ t2 = _mm_aesdec_si128(t2, ks[7]);
+ t3 = _mm_aesdec_si128(t3, ks[7]);
+ t4 = _mm_aesdec_si128(t4, ks[7]);
+ t1 = _mm_aesdec_si128(t1, ks[8]);
+ t2 = _mm_aesdec_si128(t2, ks[8]);
+ t3 = _mm_aesdec_si128(t3, ks[8]);
+ t4 = _mm_aesdec_si128(t4, ks[8]);
+ t1 = _mm_aesdec_si128(t1, ks[9]);
+ t2 = _mm_aesdec_si128(t2, ks[9]);
+ t3 = _mm_aesdec_si128(t3, ks[9]);
+ t4 = _mm_aesdec_si128(t4, ks[9]);
+
+ t1 = _mm_aesdeclast_si128(t1, ks[10]);
+ t2 = _mm_aesdeclast_si128(t2, ks[10]);
+ t3 = _mm_aesdeclast_si128(t3, ks[10]);
+ t4 = _mm_aesdeclast_si128(t4, ks[10]);
+ t1 = _mm_xor_si128(t1, f1);
+ t2 = _mm_xor_si128(t2, f2);
+ t3 = _mm_xor_si128(t3, f3);
+ t4 = _mm_xor_si128(t4, f4);
+ _mm_storeu_si128(bo + i + 0, t1);
+ _mm_storeu_si128(bo + i + 1, t2);
+ _mm_storeu_si128(bo + i + 2, t3);
+ _mm_storeu_si128(bo + i + 3, t4);
+ f1 = last;
+ }
+
+ for (i = pblocks; i < blocks; i++)
+ {
+ last = _mm_loadu_si128(bi + i);
+ t1 = _mm_xor_si128(last, ks[0]);
+
+ t1 = _mm_aesdec_si128(t1, ks[1]);
+ t1 = _mm_aesdec_si128(t1, ks[2]);
+ t1 = _mm_aesdec_si128(t1, ks[3]);
+ t1 = _mm_aesdec_si128(t1, ks[4]);
+ t1 = _mm_aesdec_si128(t1, ks[5]);
+ t1 = _mm_aesdec_si128(t1, ks[6]);
+ t1 = _mm_aesdec_si128(t1, ks[7]);
+ t1 = _mm_aesdec_si128(t1, ks[8]);
+ t1 = _mm_aesdec_si128(t1, ks[9]);
+
+ t1 = _mm_aesdeclast_si128(t1, ks[10]);
+ t1 = _mm_xor_si128(t1, f1);
+ _mm_storeu_si128(bo + i, t1);
+ f1 = last;
+ }
+}
+
+/**
+ * AES-192 CBC encryption
+ */
+static void encrypt_cbc192(aesni_key_t *key, u_int blocks, u_char *in,
+ u_char *iv, u_char *out)
+{
+ __m128i *ks, t, fb, *bi, *bo;
+ int i;
+
+ ks = key->schedule;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+
+ fb = _mm_loadu_si128((__m128i*)iv);
+ for (i = 0; i < blocks; i++)
+ {
+ t = _mm_loadu_si128(bi + i);
+ fb = _mm_xor_si128(t, fb);
+ fb = _mm_xor_si128(fb, ks[0]);
+
+ fb = _mm_aesenc_si128(fb, ks[1]);
+ fb = _mm_aesenc_si128(fb, ks[2]);
+ fb = _mm_aesenc_si128(fb, ks[3]);
+ fb = _mm_aesenc_si128(fb, ks[4]);
+ fb = _mm_aesenc_si128(fb, ks[5]);
+ fb = _mm_aesenc_si128(fb, ks[6]);
+ fb = _mm_aesenc_si128(fb, ks[7]);
+ fb = _mm_aesenc_si128(fb, ks[8]);
+ fb = _mm_aesenc_si128(fb, ks[9]);
+ fb = _mm_aesenc_si128(fb, ks[10]);
+ fb = _mm_aesenc_si128(fb, ks[11]);
+
+ fb = _mm_aesenclast_si128(fb, ks[12]);
+ _mm_storeu_si128(bo + i, fb);
+ }
+}
+
+/**
+ * AES-192 CBC decryption
+ */
+static void decrypt_cbc192(aesni_key_t *key, u_int blocks, u_char *in,
+ u_char *iv, u_char *out)
+{
+ __m128i *ks, last, *bi, *bo;
+ __m128i t1, t2, t3, t4;
+ __m128i f1, f2, f3, f4;
+ u_int i, pblocks;
+
+ ks = key->schedule;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+ pblocks = blocks - (blocks % CBC_DECRYPT_PARALLELISM);
+
+ f1 = _mm_loadu_si128((__m128i*)iv);
+
+ for (i = 0; i < pblocks; i += CBC_DECRYPT_PARALLELISM)
+ {
+ t1 = _mm_loadu_si128(bi + i + 0);
+ t2 = _mm_loadu_si128(bi + i + 1);
+ t3 = _mm_loadu_si128(bi + i + 2);
+ t4 = _mm_loadu_si128(bi + i + 3);
+
+ f2 = t1;
+ f3 = t2;
+ f4 = t3;
+ last = t4;
+
+ t1 = _mm_xor_si128(t1, ks[0]);
+ t2 = _mm_xor_si128(t2, ks[0]);
+ t3 = _mm_xor_si128(t3, ks[0]);
+ t4 = _mm_xor_si128(t4, ks[0]);
+
+ t1 = _mm_aesdec_si128(t1, ks[1]);
+ t2 = _mm_aesdec_si128(t2, ks[1]);
+ t3 = _mm_aesdec_si128(t3, ks[1]);
+ t4 = _mm_aesdec_si128(t4, ks[1]);
+ t1 = _mm_aesdec_si128(t1, ks[2]);
+ t2 = _mm_aesdec_si128(t2, ks[2]);
+ t3 = _mm_aesdec_si128(t3, ks[2]);
+ t4 = _mm_aesdec_si128(t4, ks[2]);
+ t1 = _mm_aesdec_si128(t1, ks[3]);
+ t2 = _mm_aesdec_si128(t2, ks[3]);
+ t3 = _mm_aesdec_si128(t3, ks[3]);
+ t4 = _mm_aesdec_si128(t4, ks[3]);
+ t1 = _mm_aesdec_si128(t1, ks[4]);
+ t2 = _mm_aesdec_si128(t2, ks[4]);
+ t3 = _mm_aesdec_si128(t3, ks[4]);
+ t4 = _mm_aesdec_si128(t4, ks[4]);
+ t1 = _mm_aesdec_si128(t1, ks[5]);
+ t2 = _mm_aesdec_si128(t2, ks[5]);
+ t3 = _mm_aesdec_si128(t3, ks[5]);
+ t4 = _mm_aesdec_si128(t4, ks[5]);
+ t1 = _mm_aesdec_si128(t1, ks[6]);
+ t2 = _mm_aesdec_si128(t2, ks[6]);
+ t3 = _mm_aesdec_si128(t3, ks[6]);
+ t4 = _mm_aesdec_si128(t4, ks[6]);
+ t1 = _mm_aesdec_si128(t1, ks[7]);
+ t2 = _mm_aesdec_si128(t2, ks[7]);
+ t3 = _mm_aesdec_si128(t3, ks[7]);
+ t4 = _mm_aesdec_si128(t4, ks[7]);
+ t1 = _mm_aesdec_si128(t1, ks[8]);
+ t2 = _mm_aesdec_si128(t2, ks[8]);
+ t3 = _mm_aesdec_si128(t3, ks[8]);
+ t4 = _mm_aesdec_si128(t4, ks[8]);
+ t1 = _mm_aesdec_si128(t1, ks[9]);
+ t2 = _mm_aesdec_si128(t2, ks[9]);
+ t3 = _mm_aesdec_si128(t3, ks[9]);
+ t4 = _mm_aesdec_si128(t4, ks[9]);
+ t1 = _mm_aesdec_si128(t1, ks[10]);
+ t2 = _mm_aesdec_si128(t2, ks[10]);
+ t3 = _mm_aesdec_si128(t3, ks[10]);
+ t4 = _mm_aesdec_si128(t4, ks[10]);
+ t1 = _mm_aesdec_si128(t1, ks[11]);
+ t2 = _mm_aesdec_si128(t2, ks[11]);
+ t3 = _mm_aesdec_si128(t3, ks[11]);
+ t4 = _mm_aesdec_si128(t4, ks[11]);
+
+ t1 = _mm_aesdeclast_si128(t1, ks[12]);
+ t2 = _mm_aesdeclast_si128(t2, ks[12]);
+ t3 = _mm_aesdeclast_si128(t3, ks[12]);
+ t4 = _mm_aesdeclast_si128(t4, ks[12]);
+ t1 = _mm_xor_si128(t1, f1);
+ t2 = _mm_xor_si128(t2, f2);
+ t3 = _mm_xor_si128(t3, f3);
+ t4 = _mm_xor_si128(t4, f4);
+ _mm_storeu_si128(bo + i + 0, t1);
+ _mm_storeu_si128(bo + i + 1, t2);
+ _mm_storeu_si128(bo + i + 2, t3);
+ _mm_storeu_si128(bo + i + 3, t4);
+ f1 = last;
+ }
+
+ for (i = pblocks; i < blocks; i++)
+ {
+ last = _mm_loadu_si128(bi + i);
+ t1 = _mm_xor_si128(last, ks[0]);
+
+ t1 = _mm_aesdec_si128(t1, ks[1]);
+ t1 = _mm_aesdec_si128(t1, ks[2]);
+ t1 = _mm_aesdec_si128(t1, ks[3]);
+ t1 = _mm_aesdec_si128(t1, ks[4]);
+ t1 = _mm_aesdec_si128(t1, ks[5]);
+ t1 = _mm_aesdec_si128(t1, ks[6]);
+ t1 = _mm_aesdec_si128(t1, ks[7]);
+ t1 = _mm_aesdec_si128(t1, ks[8]);
+ t1 = _mm_aesdec_si128(t1, ks[9]);
+ t1 = _mm_aesdec_si128(t1, ks[10]);
+ t1 = _mm_aesdec_si128(t1, ks[11]);
+
+ t1 = _mm_aesdeclast_si128(t1, ks[12]);
+ t1 = _mm_xor_si128(t1, f1);
+ _mm_storeu_si128(bo + i, t1);
+ f1 = last;
+ }
+}
+
+/**
+ * AES-256 CBC encryption
+ */
+static void encrypt_cbc256(aesni_key_t *key, u_int blocks, u_char *in,
+ u_char *iv, u_char *out)
+{
+ __m128i *ks, t, fb, *bi, *bo;
+ int i;
+
+ ks = key->schedule;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+
+ fb = _mm_loadu_si128((__m128i*)iv);
+ for (i = 0; i < blocks; i++)
+ {
+ t = _mm_loadu_si128(bi + i);
+ fb = _mm_xor_si128(t, fb);
+ fb = _mm_xor_si128(fb, ks[0]);
+
+ fb = _mm_aesenc_si128(fb, ks[1]);
+ fb = _mm_aesenc_si128(fb, ks[2]);
+ fb = _mm_aesenc_si128(fb, ks[3]);
+ fb = _mm_aesenc_si128(fb, ks[4]);
+ fb = _mm_aesenc_si128(fb, ks[5]);
+ fb = _mm_aesenc_si128(fb, ks[6]);
+ fb = _mm_aesenc_si128(fb, ks[7]);
+ fb = _mm_aesenc_si128(fb, ks[8]);
+ fb = _mm_aesenc_si128(fb, ks[9]);
+ fb = _mm_aesenc_si128(fb, ks[10]);
+ fb = _mm_aesenc_si128(fb, ks[11]);
+ fb = _mm_aesenc_si128(fb, ks[12]);
+ fb = _mm_aesenc_si128(fb, ks[13]);
+
+ fb = _mm_aesenclast_si128(fb, ks[14]);
+ _mm_storeu_si128(bo + i, fb);
+ }
+}
+
+/**
+ * AES-256 CBC decryption
+ */
+static void decrypt_cbc256(aesni_key_t *key, u_int blocks, u_char *in,
+ u_char *iv, u_char *out)
+{
+ __m128i *ks, last, *bi, *bo;
+ __m128i t1, t2, t3, t4;
+ __m128i f1, f2, f3, f4;
+ u_int i, pblocks;
+
+ ks = key->schedule;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+ pblocks = blocks - (blocks % CBC_DECRYPT_PARALLELISM);
+
+ f1 = _mm_loadu_si128((__m128i*)iv);
+
+ for (i = 0; i < pblocks; i += CBC_DECRYPT_PARALLELISM)
+ {
+ t1 = _mm_loadu_si128(bi + i + 0);
+ t2 = _mm_loadu_si128(bi + i + 1);
+ t3 = _mm_loadu_si128(bi + i + 2);
+ t4 = _mm_loadu_si128(bi + i + 3);
+
+ f2 = t1;
+ f3 = t2;
+ f4 = t3;
+ last = t4;
+
+ t1 = _mm_xor_si128(t1, ks[0]);
+ t2 = _mm_xor_si128(t2, ks[0]);
+ t3 = _mm_xor_si128(t3, ks[0]);
+ t4 = _mm_xor_si128(t4, ks[0]);
+
+ t1 = _mm_aesdec_si128(t1, ks[1]);
+ t2 = _mm_aesdec_si128(t2, ks[1]);
+ t3 = _mm_aesdec_si128(t3, ks[1]);
+ t4 = _mm_aesdec_si128(t4, ks[1]);
+ t1 = _mm_aesdec_si128(t1, ks[2]);
+ t2 = _mm_aesdec_si128(t2, ks[2]);
+ t3 = _mm_aesdec_si128(t3, ks[2]);
+ t4 = _mm_aesdec_si128(t4, ks[2]);
+ t1 = _mm_aesdec_si128(t1, ks[3]);
+ t2 = _mm_aesdec_si128(t2, ks[3]);
+ t3 = _mm_aesdec_si128(t3, ks[3]);
+ t4 = _mm_aesdec_si128(t4, ks[3]);
+ t1 = _mm_aesdec_si128(t1, ks[4]);
+ t2 = _mm_aesdec_si128(t2, ks[4]);
+ t3 = _mm_aesdec_si128(t3, ks[4]);
+ t4 = _mm_aesdec_si128(t4, ks[4]);
+ t1 = _mm_aesdec_si128(t1, ks[5]);
+ t2 = _mm_aesdec_si128(t2, ks[5]);
+ t3 = _mm_aesdec_si128(t3, ks[5]);
+ t4 = _mm_aesdec_si128(t4, ks[5]);
+ t1 = _mm_aesdec_si128(t1, ks[6]);
+ t2 = _mm_aesdec_si128(t2, ks[6]);
+ t3 = _mm_aesdec_si128(t3, ks[6]);
+ t4 = _mm_aesdec_si128(t4, ks[6]);
+ t1 = _mm_aesdec_si128(t1, ks[7]);
+ t2 = _mm_aesdec_si128(t2, ks[7]);
+ t3 = _mm_aesdec_si128(t3, ks[7]);
+ t4 = _mm_aesdec_si128(t4, ks[7]);
+ t1 = _mm_aesdec_si128(t1, ks[8]);
+ t2 = _mm_aesdec_si128(t2, ks[8]);
+ t3 = _mm_aesdec_si128(t3, ks[8]);
+ t4 = _mm_aesdec_si128(t4, ks[8]);
+ t1 = _mm_aesdec_si128(t1, ks[9]);
+ t2 = _mm_aesdec_si128(t2, ks[9]);
+ t3 = _mm_aesdec_si128(t3, ks[9]);
+ t4 = _mm_aesdec_si128(t4, ks[9]);
+ t1 = _mm_aesdec_si128(t1, ks[10]);
+ t2 = _mm_aesdec_si128(t2, ks[10]);
+ t3 = _mm_aesdec_si128(t3, ks[10]);
+ t4 = _mm_aesdec_si128(t4, ks[10]);
+ t1 = _mm_aesdec_si128(t1, ks[11]);
+ t2 = _mm_aesdec_si128(t2, ks[11]);
+ t3 = _mm_aesdec_si128(t3, ks[11]);
+ t4 = _mm_aesdec_si128(t4, ks[11]);
+ t1 = _mm_aesdec_si128(t1, ks[12]);
+ t2 = _mm_aesdec_si128(t2, ks[12]);
+ t3 = _mm_aesdec_si128(t3, ks[12]);
+ t4 = _mm_aesdec_si128(t4, ks[12]);
+ t1 = _mm_aesdec_si128(t1, ks[13]);
+ t2 = _mm_aesdec_si128(t2, ks[13]);
+ t3 = _mm_aesdec_si128(t3, ks[13]);
+ t4 = _mm_aesdec_si128(t4, ks[13]);
+
+ t1 = _mm_aesdeclast_si128(t1, ks[14]);
+ t2 = _mm_aesdeclast_si128(t2, ks[14]);
+ t3 = _mm_aesdeclast_si128(t3, ks[14]);
+ t4 = _mm_aesdeclast_si128(t4, ks[14]);
+ t1 = _mm_xor_si128(t1, f1);
+ t2 = _mm_xor_si128(t2, f2);
+ t3 = _mm_xor_si128(t3, f3);
+ t4 = _mm_xor_si128(t4, f4);
+ _mm_storeu_si128(bo + i + 0, t1);
+ _mm_storeu_si128(bo + i + 1, t2);
+ _mm_storeu_si128(bo + i + 2, t3);
+ _mm_storeu_si128(bo + i + 3, t4);
+ f1 = last;
+ }
+
+ for (i = pblocks; i < blocks; i++)
+ {
+ last = _mm_loadu_si128(bi + i);
+ t1 = _mm_xor_si128(last, ks[0]);
+
+ t1 = _mm_aesdec_si128(t1, ks[1]);
+ t1 = _mm_aesdec_si128(t1, ks[2]);
+ t1 = _mm_aesdec_si128(t1, ks[3]);
+ t1 = _mm_aesdec_si128(t1, ks[4]);
+ t1 = _mm_aesdec_si128(t1, ks[5]);
+ t1 = _mm_aesdec_si128(t1, ks[6]);
+ t1 = _mm_aesdec_si128(t1, ks[7]);
+ t1 = _mm_aesdec_si128(t1, ks[8]);
+ t1 = _mm_aesdec_si128(t1, ks[9]);
+ t1 = _mm_aesdec_si128(t1, ks[10]);
+ t1 = _mm_aesdec_si128(t1, ks[11]);
+ t1 = _mm_aesdec_si128(t1, ks[12]);
+ t1 = _mm_aesdec_si128(t1, ks[13]);
+
+ t1 = _mm_aesdeclast_si128(t1, ks[14]);
+ t1 = _mm_xor_si128(t1, f1);
+ _mm_storeu_si128(bo + i, t1);
+ f1 = last;
+ }
+}
+
+/**
+ * Do inline or allocated de/encryption using key schedule
+ */
+static bool crypt(aesni_cbc_fn_t fn, aesni_key_t *key,
+ chunk_t data, chunk_t iv, chunk_t *out)
+{
+ u_char *buf;
+
+ if (!key || iv.len != AES_BLOCK_SIZE || data.len % AES_BLOCK_SIZE)
+ {
+ return FALSE;
+ }
+ if (out)
+ {
+ *out = chunk_alloc(data.len);
+ buf = out->ptr;
+ }
+ else
+ {
+ buf = data.ptr;
+ }
+ fn(key, data.len / AES_BLOCK_SIZE, data.ptr, iv.ptr, buf);
+ return TRUE;
+}
+
+METHOD(crypter_t, encrypt, bool,
+ private_aesni_cbc_t *this, chunk_t data, chunk_t iv, chunk_t *encrypted)
+{
+ return crypt(this->encrypt, this->ekey, data, iv, encrypted);
+}
+
+METHOD(crypter_t, decrypt, bool,
+ private_aesni_cbc_t *this, chunk_t data, chunk_t iv, chunk_t *decrypted)
+{
+ return crypt(this->decrypt, this->dkey, data, iv, decrypted);
+}
+
+METHOD(crypter_t, get_block_size, size_t,
+ private_aesni_cbc_t *this)
+{
+ return AES_BLOCK_SIZE;
+}
+
+METHOD(crypter_t, get_iv_size, size_t,
+ private_aesni_cbc_t *this)
+{
+ return AES_BLOCK_SIZE;
+}
+
+METHOD(crypter_t, get_key_size, size_t,
+ private_aesni_cbc_t *this)
+{
+ return this->key_size;
+}
+
+METHOD(crypter_t, set_key, bool,
+ private_aesni_cbc_t *this, chunk_t key)
+{
+ if (key.len != this->key_size)
+ {
+ return FALSE;
+ }
+
+ DESTROY_IF(this->ekey);
+ DESTROY_IF(this->dkey);
+
+ this->ekey = aesni_key_create(TRUE, key);
+ this->dkey = aesni_key_create(FALSE, key);
+
+ return this->ekey && this->dkey;
+}
+
+METHOD(crypter_t, destroy, void,
+ private_aesni_cbc_t *this)
+{
+ DESTROY_IF(this->ekey);
+ DESTROY_IF(this->dkey);
+ free_align(this);
+}
+
+/**
+ * See header
+ */
+aesni_cbc_t *aesni_cbc_create(encryption_algorithm_t algo, size_t key_size)
+{
+ private_aesni_cbc_t *this;
+
+ if (algo != ENCR_AES_CBC)
+ {
+ return NULL;
+ }
+ switch (key_size)
+ {
+ case 0:
+ key_size = 16;
+ break;
+ case 16:
+ case 24:
+ case 32:
+ break;
+ default:
+ return NULL;
+ }
+
+ INIT_ALIGN(this, sizeof(__m128i),
+ .public = {
+ .crypter = {
+ .encrypt = _encrypt,
+ .decrypt = _decrypt,
+ .get_block_size = _get_block_size,
+ .get_iv_size = _get_iv_size,
+ .get_key_size = _get_key_size,
+ .set_key = _set_key,
+ .destroy = _destroy,
+ },
+ },
+ .key_size = key_size,
+ );
+
+ switch (key_size)
+ {
+ case 16:
+ this->encrypt = encrypt_cbc128;
+ this->decrypt = decrypt_cbc128;
+ break;
+ case 24:
+ this->encrypt = encrypt_cbc192;
+ this->decrypt = decrypt_cbc192;
+ break;
+ case 32:
+ this->encrypt = encrypt_cbc256;
+ this->decrypt = decrypt_cbc256;
+ break;
+ }
+
+ return &this->public;
+}
diff --git a/src/libstrongswan/plugins/aesni/aesni_cbc.h b/src/libstrongswan/plugins/aesni/aesni_cbc.h
new file mode 100644
index 000000000..c004ec611
--- /dev/null
+++ b/src/libstrongswan/plugins/aesni/aesni_cbc.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2015 revosec AG
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+/**
+ * @defgroup aesni_cbc aesni_cbc
+ * @{ @ingroup aesni
+ */
+
+#ifndef AESNI_CBC_H_
+#define AESNI_CBC_H_
+
+#include <library.h>
+
+typedef struct aesni_cbc_t aesni_cbc_t;
+
+/**
+ * CBC mode crypter using AES-NI
+ */
+struct aesni_cbc_t {
+
+ /**
+ * Implements crypter interface
+ */
+ crypter_t crypter;
+};
+
+/**
+ * Create a aesni_cbc instance.
+ *
+ * @param algo encryption algorithm, AES_ENCR_CBC
+ * @param key_size AES key size, in bytes
+ * @return AES-CBC crypter, NULL if not supported
+ */
+aesni_cbc_t *aesni_cbc_create(encryption_algorithm_t algo, size_t key_size);
+
+#endif /** AESNI_CBC_H_ @}*/
diff --git a/src/libstrongswan/plugins/aesni/aesni_ccm.c b/src/libstrongswan/plugins/aesni/aesni_ccm.c
new file mode 100644
index 000000000..d523bc17a
--- /dev/null
+++ b/src/libstrongswan/plugins/aesni/aesni_ccm.c
@@ -0,0 +1,914 @@
+/*
+ * Copyright (C) 2010-2015 Martin Willi
+ * Copyright (C) 2010-2015 revosec AG
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "aesni_ccm.h"
+#include "aesni_key.h"
+
+#include <crypto/iv/iv_gen_seq.h>
+
+#include <tmmintrin.h>
+
+#define SALT_SIZE 3
+#define IV_SIZE 8
+#define NONCE_SIZE (SALT_SIZE + IV_SIZE) /* 11 */
+#define Q_SIZE (AES_BLOCK_SIZE - NONCE_SIZE - 1) /* 4 */
+
+typedef struct private_aesni_ccm_t private_aesni_ccm_t;
+
+/**
+ * CCM en/decryption method type
+ */
+typedef void (*aesni_ccm_fn_t)(private_aesni_ccm_t*, size_t, u_char*, u_char*,
+ u_char*, size_t, u_char*, u_char*);
+
+/**
+ * Private data of an aesni_ccm_t object.
+ */
+struct private_aesni_ccm_t {
+
+ /**
+ * Public aesni_ccm_t interface.
+ */
+ aesni_ccm_t public;
+
+ /**
+ * Encryption key schedule
+ */
+ aesni_key_t *key;
+
+ /**
+ * IV generator.
+ */
+ iv_gen_t *iv_gen;
+
+ /**
+ * Length of the integrity check value
+ */
+ size_t icv_size;
+
+ /**
+ * Length of the key in bytes
+ */
+ size_t key_size;
+
+ /**
+ * CCM encryption function
+ */
+ aesni_ccm_fn_t encrypt;
+
+ /**
+ * CCM decryption function
+ */
+ aesni_ccm_fn_t decrypt;
+
+ /**
+ * salt to add to nonce
+ */
+ u_char salt[SALT_SIZE];
+};
+
+/**
+ * First block with control information
+ */
+typedef struct __attribute__((packed)) {
+ BITFIELD4(u_int8_t,
+ /* size of p length field q, as q-1 */
+ q_len: 3,
+ /* size of our ICV t, as (t-2)/2 */
+ t_len: 3,
+ /* do we have associated data */
+ assoc: 1,
+ reserved: 1,
+ ) flags;
+ /* nonce value */
+ struct __attribute__((packed)) {
+ u_char salt[SALT_SIZE];
+ u_char iv[IV_SIZE];
+ } nonce;
+ /* length of plain text, q */
+ u_char q[Q_SIZE];
+} b0_t;
+
+/**
+ * Counter block
+ */
+typedef struct __attribute__((packed)) {
+ BITFIELD3(u_int8_t,
+ /* size of p length field q, as q-1 */
+ q_len: 3,
+ zero: 3,
+ reserved: 2,
+ ) flags;
+ /* nonce value */
+ struct __attribute__((packed)) {
+ u_char salt[SALT_SIZE];
+ u_char iv[IV_SIZE];
+ } nonce;
+ /* counter value */
+ u_char i[Q_SIZE];
+} ctr_t;
+
+/**
+ * Build the first block B0
+ */
+static void build_b0(private_aesni_ccm_t *this, size_t len, size_t alen,
+ u_char *iv, void *out)
+{
+ b0_t *block = out;
+
+ block->flags.reserved = 0;
+ block->flags.assoc = alen ? 1 : 0;
+ block->flags.t_len = (this->icv_size - 2) / 2;
+ block->flags.q_len = Q_SIZE - 1;
+ memcpy(block->nonce.salt, this->salt, SALT_SIZE);
+ memcpy(block->nonce.iv, iv, IV_SIZE);
+ htoun32(block->q, len);
+}
+
+/**
+ * Build a counter block for counter i
+ */
+static void build_ctr(private_aesni_ccm_t *this, u_int32_t i, u_char *iv,
+ void *out)
+{
+ ctr_t *ctr = out;
+
+ ctr->flags.reserved = 0;
+ ctr->flags.zero = 0;
+ ctr->flags.q_len = Q_SIZE - 1;
+ memcpy(ctr->nonce.salt, this->salt, SALT_SIZE);
+ memcpy(ctr->nonce.iv, iv, IV_SIZE);
+ htoun32(ctr->i, i);
+}
+
+/**
+ * Calculate the ICV for the b0 and associated data
+ */
+static __m128i icv_header(private_aesni_ccm_t *this, size_t len, u_char *iv,
+ u_int16_t alen, u_char *assoc)
+{
+ __m128i *ks, b, t, c;
+ u_int i, round, blocks, rem;
+
+ ks = this->key->schedule;
+ build_b0(this, len, alen, iv, &b);
+ c = _mm_loadu_si128(&b);
+ c = _mm_xor_si128(c, ks[0]);
+ for (round = 1; round < this->key->rounds; round++)
+ {
+ c = _mm_aesenc_si128(c, ks[round]);
+ }
+ c = _mm_aesenclast_si128(c, ks[this->key->rounds]);
+
+ if (alen)
+ {
+ blocks = (alen + sizeof(alen)) / AES_BLOCK_SIZE;
+ rem = (alen + sizeof(alen)) % AES_BLOCK_SIZE;
+ if (rem)
+ {
+ blocks++;
+ }
+ for (i = 0; i < blocks; i++)
+ {
+ if (i == 0)
+ { /* first block */
+ memset(&b, 0, sizeof(b));
+ htoun16(&b, alen);
+ memcpy(((u_char*)&b) + sizeof(alen), assoc,
+ min(alen, sizeof(b) - sizeof(alen)));
+ t = _mm_loadu_si128(&b);
+ }
+ else if (i == blocks - 1 && rem)
+ { /* last block with padding */
+ memset(&b, 0, sizeof(b));
+ memcpy(&b, ((__m128i*)(assoc - sizeof(alen))) + i, rem);
+ t = _mm_loadu_si128(&b);
+ }
+ else
+ { /* full block */
+ t = _mm_loadu_si128(((__m128i*)(assoc - sizeof(alen))) + i);
+ }
+ c = _mm_xor_si128(t, c);
+ c = _mm_xor_si128(c, ks[0]);
+ for (round = 1; round < this->key->rounds; round++)
+ {
+ c = _mm_aesenc_si128(c, ks[round]);
+ }
+ c = _mm_aesenclast_si128(c, ks[this->key->rounds]);
+ }
+ }
+ return c;
+}
+
+/**
+ * En-/Decrypt the ICV, trim and store it
+ */
+static void crypt_icv(private_aesni_ccm_t *this, u_char *iv,
+ __m128i c, u_char *icv)
+{
+ __m128i *ks, b, t;
+ u_int round;
+
+ ks = this->key->schedule;
+ build_ctr(this, 0, iv, &b);
+
+ t = _mm_loadu_si128(&b);
+ t = _mm_xor_si128(t, ks[0]);
+ for (round = 1; round < this->key->rounds; round++)
+ {
+ t = _mm_aesenc_si128(t, ks[round]);
+ }
+ t = _mm_aesenclast_si128(t, ks[this->key->rounds]);
+
+ t = _mm_xor_si128(t, c);
+
+ _mm_storeu_si128(&b, t);
+ memcpy(icv, &b, this->icv_size);
+}
+
+/**
+ * Do big-endian increment on x
+ */
+static inline __m128i increment_be(__m128i x)
+{
+ __m128i swap;
+
+ swap = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+
+ x = _mm_shuffle_epi8(x, swap);
+ x = _mm_add_epi64(x, _mm_set_epi32(0, 0, 0, 1));
+ x = _mm_shuffle_epi8(x, swap);
+
+ return x;
+}
+
+/**
+ * Encrypt a remaining incomplete block
+ */
+static __m128i encrypt_ccm_rem(aesni_key_t *key, u_int rem, __m128i state,
+ void *in, void *out, __m128i c)
+{
+ __m128i *ks, t, b, d;
+ u_int round;
+
+ ks = key->schedule;
+ memset(&b, 0, sizeof(b));
+ memcpy(&b, in, rem);
+ d = _mm_loadu_si128(&b);
+
+ c = _mm_xor_si128(d, c);
+ c = _mm_xor_si128(c, ks[0]);
+ t = _mm_xor_si128(state, ks[0]);
+ for (round = 1; round < key->rounds; round++)
+ {
+ c = _mm_aesenc_si128(c, ks[round]);
+ t = _mm_aesenc_si128(t, ks[round]);
+ }
+ c = _mm_aesenclast_si128(c, ks[key->rounds]);
+ t = _mm_aesenclast_si128(t, ks[key->rounds]);
+
+ t = _mm_xor_si128(t, d);
+ _mm_storeu_si128(&b, t);
+
+ memcpy(out, &b, rem);
+
+ return c;
+}
+
+/**
+ * Decrypt a remaining incomplete block
+ */
+static __m128i decrypt_ccm_rem(aesni_key_t *key, u_int rem, __m128i state,
+ void *in, void *out, __m128i c)
+{
+ __m128i *ks, t, b, d;
+ u_int round;
+
+ ks = key->schedule;
+ memset(&b, 0, sizeof(b));
+ memcpy(&b, in, rem);
+ d = _mm_loadu_si128(&b);
+
+ t = _mm_xor_si128(state, ks[0]);
+ for (round = 1; round < key->rounds; round++)
+ {
+ t = _mm_aesenc_si128(t, ks[round]);
+ }
+ t = _mm_aesenclast_si128(t, ks[key->rounds]);
+ t = _mm_xor_si128(t, d);
+ _mm_storeu_si128(&b, t);
+
+ memset((u_char*)&b + rem, 0, sizeof(b) - rem);
+ t = _mm_loadu_si128(&b);
+ c = _mm_xor_si128(t, c);
+ c = _mm_xor_si128(c, ks[0]);
+ for (round = 1; round < key->rounds; round++)
+ {
+ c = _mm_aesenc_si128(c, ks[round]);
+ }
+ c = _mm_aesenclast_si128(c, ks[key->rounds]);
+
+ memcpy(out, &b, rem);
+
+ return c;
+}
+
+/**
+ * AES-128 CCM encryption/ICV generation
+ */
+static void encrypt_ccm128(private_aesni_ccm_t *this,
+ size_t len, u_char *in, u_char *out, u_char *iv,
+ size_t alen, u_char *assoc, u_char *icv)
+{
+ __m128i *ks, d, t, c, b, state, *bi, *bo;
+ u_int blocks, rem, i;
+
+ c = icv_header(this, len, iv, alen, assoc);
+ build_ctr(this, 1, iv, &b);
+ state = _mm_load_si128(&b);
+ blocks = len / AES_BLOCK_SIZE;
+ rem = len % AES_BLOCK_SIZE;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+
+ ks = this->key->schedule;
+
+ for (i = 0; i < blocks; i++)
+ {
+ d = _mm_loadu_si128(bi + i);
+
+ c = _mm_xor_si128(d, c);
+ c = _mm_xor_si128(c, ks[0]);
+ t = _mm_xor_si128(state, ks[0]);
+
+ c = _mm_aesenc_si128(c, ks[1]);
+ t = _mm_aesenc_si128(t, ks[1]);
+ c = _mm_aesenc_si128(c, ks[2]);
+ t = _mm_aesenc_si128(t, ks[2]);
+ c = _mm_aesenc_si128(c, ks[3]);
+ t = _mm_aesenc_si128(t, ks[3]);
+ c = _mm_aesenc_si128(c, ks[4]);
+ t = _mm_aesenc_si128(t, ks[4]);
+ c = _mm_aesenc_si128(c, ks[5]);
+ t = _mm_aesenc_si128(t, ks[5]);
+ c = _mm_aesenc_si128(c, ks[6]);
+ t = _mm_aesenc_si128(t, ks[6]);
+ c = _mm_aesenc_si128(c, ks[7]);
+ t = _mm_aesenc_si128(t, ks[7]);
+ c = _mm_aesenc_si128(c, ks[8]);
+ t = _mm_aesenc_si128(t, ks[8]);
+ c = _mm_aesenc_si128(c, ks[9]);
+ t = _mm_aesenc_si128(t, ks[9]);
+
+ c = _mm_aesenclast_si128(c, ks[10]);
+ t = _mm_aesenclast_si128(t, ks[10]);
+
+ t = _mm_xor_si128(t, d);
+ _mm_storeu_si128(bo + i, t);
+
+ state = increment_be(state);
+ }
+
+ if (rem)
+ {
+ c = encrypt_ccm_rem(this->key, rem, state, bi + blocks, bo + blocks, c);
+ }
+ crypt_icv(this, iv, c, icv);
+}
+
+/**
+ * AES-128 CCM decryption/ICV generation
+ */
+static void decrypt_ccm128(private_aesni_ccm_t *this,
+ size_t len, u_char *in, u_char *out, u_char *iv,
+ size_t alen, u_char *assoc, u_char *icv)
+{
+ __m128i *ks, d, t, c, b, state, *bi, *bo;
+ u_int blocks, rem, i;
+
+ c = icv_header(this, len, iv, alen, assoc);
+ build_ctr(this, 1, iv, &b);
+ state = _mm_load_si128(&b);
+ blocks = len / AES_BLOCK_SIZE;
+ rem = len % AES_BLOCK_SIZE;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+
+ ks = this->key->schedule;
+
+ for (i = 0; i < blocks; i++)
+ {
+ d = _mm_loadu_si128(bi + i);
+
+ t = _mm_xor_si128(state, ks[0]);
+
+ t = _mm_aesenc_si128(t, ks[1]);
+ t = _mm_aesenc_si128(t, ks[2]);
+ t = _mm_aesenc_si128(t, ks[3]);
+ t = _mm_aesenc_si128(t, ks[4]);
+ t = _mm_aesenc_si128(t, ks[5]);
+ t = _mm_aesenc_si128(t, ks[6]);
+ t = _mm_aesenc_si128(t, ks[7]);
+ t = _mm_aesenc_si128(t, ks[8]);
+ t = _mm_aesenc_si128(t, ks[9]);
+
+ t = _mm_aesenclast_si128(t, ks[10]);
+ t = _mm_xor_si128(t, d);
+ _mm_storeu_si128(bo + i, t);
+
+ c = _mm_xor_si128(t, c);
+ c = _mm_xor_si128(c, ks[0]);
+
+ c = _mm_aesenc_si128(c, ks[1]);
+ c = _mm_aesenc_si128(c, ks[2]);
+ c = _mm_aesenc_si128(c, ks[3]);
+ c = _mm_aesenc_si128(c, ks[4]);
+ c = _mm_aesenc_si128(c, ks[5]);
+ c = _mm_aesenc_si128(c, ks[6]);
+ c = _mm_aesenc_si128(c, ks[7]);
+ c = _mm_aesenc_si128(c, ks[8]);
+ c = _mm_aesenc_si128(c, ks[9]);
+
+ c = _mm_aesenclast_si128(c, ks[10]);
+
+ state = increment_be(state);
+ }
+
+ if (rem)
+ {
+ c = decrypt_ccm_rem(this->key, rem, state, bi + blocks, bo + blocks, c);
+ }
+ crypt_icv(this, iv, c, icv);
+}
+
+/**
+ * AES-192 CCM encryption/ICV generation
+ */
+static void encrypt_ccm192(private_aesni_ccm_t *this,
+ size_t len, u_char *in, u_char *out, u_char *iv,
+ size_t alen, u_char *assoc, u_char *icv)
+{
+ __m128i *ks, d, t, c, b, state, *bi, *bo;
+ u_int blocks, rem, i;
+
+ c = icv_header(this, len, iv, alen, assoc);
+ build_ctr(this, 1, iv, &b);
+ state = _mm_load_si128(&b);
+ blocks = len / AES_BLOCK_SIZE;
+ rem = len % AES_BLOCK_SIZE;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+
+ ks = this->key->schedule;
+
+ for (i = 0; i < blocks; i++)
+ {
+ d = _mm_loadu_si128(bi + i);
+
+ c = _mm_xor_si128(d, c);
+ c = _mm_xor_si128(c, ks[0]);
+ t = _mm_xor_si128(state, ks[0]);
+
+ c = _mm_aesenc_si128(c, ks[1]);
+ t = _mm_aesenc_si128(t, ks[1]);
+ c = _mm_aesenc_si128(c, ks[2]);
+ t = _mm_aesenc_si128(t, ks[2]);
+ c = _mm_aesenc_si128(c, ks[3]);
+ t = _mm_aesenc_si128(t, ks[3]);
+ c = _mm_aesenc_si128(c, ks[4]);
+ t = _mm_aesenc_si128(t, ks[4]);
+ c = _mm_aesenc_si128(c, ks[5]);
+ t = _mm_aesenc_si128(t, ks[5]);
+ c = _mm_aesenc_si128(c, ks[6]);
+ t = _mm_aesenc_si128(t, ks[6]);
+ c = _mm_aesenc_si128(c, ks[7]);
+ t = _mm_aesenc_si128(t, ks[7]);
+ c = _mm_aesenc_si128(c, ks[8]);
+ t = _mm_aesenc_si128(t, ks[8]);
+ c = _mm_aesenc_si128(c, ks[9]);
+ t = _mm_aesenc_si128(t, ks[9]);
+ c = _mm_aesenc_si128(c, ks[10]);
+ t = _mm_aesenc_si128(t, ks[10]);
+ c = _mm_aesenc_si128(c, ks[11]);
+ t = _mm_aesenc_si128(t, ks[11]);
+
+ c = _mm_aesenclast_si128(c, ks[12]);
+ t = _mm_aesenclast_si128(t, ks[12]);
+
+ t = _mm_xor_si128(t, d);
+ _mm_storeu_si128(bo + i, t);
+
+ state = increment_be(state);
+ }
+
+ if (rem)
+ {
+ c = encrypt_ccm_rem(this->key, rem, state, bi + blocks, bo + blocks, c);
+ }
+ crypt_icv(this, iv, c, icv);
+}
+
+/**
+ * AES-192 CCM decryption/ICV generation
+ */
+static void decrypt_ccm192(private_aesni_ccm_t *this,
+ size_t len, u_char *in, u_char *out, u_char *iv,
+ size_t alen, u_char *assoc, u_char *icv)
+{
+ __m128i *ks, d, t, c, b, state, *bi, *bo;
+ u_int blocks, rem, i;
+
+ c = icv_header(this, len, iv, alen, assoc);
+ build_ctr(this, 1, iv, &b);
+ state = _mm_load_si128(&b);
+ blocks = len / AES_BLOCK_SIZE;
+ rem = len % AES_BLOCK_SIZE;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+
+ ks = this->key->schedule;
+
+ for (i = 0; i < blocks; i++)
+ {
+ d = _mm_loadu_si128(bi + i);
+
+ t = _mm_xor_si128(state, ks[0]);
+
+ t = _mm_aesenc_si128(t, ks[1]);
+ t = _mm_aesenc_si128(t, ks[2]);
+ t = _mm_aesenc_si128(t, ks[3]);
+ t = _mm_aesenc_si128(t, ks[4]);
+ t = _mm_aesenc_si128(t, ks[5]);
+ t = _mm_aesenc_si128(t, ks[6]);
+ t = _mm_aesenc_si128(t, ks[7]);
+ t = _mm_aesenc_si128(t, ks[8]);
+ t = _mm_aesenc_si128(t, ks[9]);
+ t = _mm_aesenc_si128(t, ks[10]);
+ t = _mm_aesenc_si128(t, ks[11]);
+
+ t = _mm_aesenclast_si128(t, ks[12]);
+ t = _mm_xor_si128(t, d);
+ _mm_storeu_si128(bo + i, t);
+
+ c = _mm_xor_si128(t, c);
+ c = _mm_xor_si128(c, ks[0]);
+
+ c = _mm_aesenc_si128(c, ks[1]);
+ c = _mm_aesenc_si128(c, ks[2]);
+ c = _mm_aesenc_si128(c, ks[3]);
+ c = _mm_aesenc_si128(c, ks[4]);
+ c = _mm_aesenc_si128(c, ks[5]);
+ c = _mm_aesenc_si128(c, ks[6]);
+ c = _mm_aesenc_si128(c, ks[7]);
+ c = _mm_aesenc_si128(c, ks[8]);
+ c = _mm_aesenc_si128(c, ks[9]);
+ c = _mm_aesenc_si128(c, ks[10]);
+ c = _mm_aesenc_si128(c, ks[11]);
+
+ c = _mm_aesenclast_si128(c, ks[12]);
+
+ state = increment_be(state);
+ }
+
+ if (rem)
+ {
+ c = decrypt_ccm_rem(this->key, rem, state, bi + blocks, bo + blocks, c);
+ }
+ crypt_icv(this, iv, c, icv);
+}
+
+/**
+ * AES-256 CCM encryption/ICV generation
+ */
+static void encrypt_ccm256(private_aesni_ccm_t *this,
+ size_t len, u_char *in, u_char *out, u_char *iv,
+ size_t alen, u_char *assoc, u_char *icv)
+{
+ __m128i *ks, d, t, c, b, state, *bi, *bo;
+ u_int blocks, rem, i;
+
+ c = icv_header(this, len, iv, alen, assoc);
+ build_ctr(this, 1, iv, &b);
+ state = _mm_load_si128(&b);
+ blocks = len / AES_BLOCK_SIZE;
+ rem = len % AES_BLOCK_SIZE;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+
+ ks = this->key->schedule;
+
+ for (i = 0; i < blocks; i++)
+ {
+ d = _mm_loadu_si128(bi + i);
+
+ c = _mm_xor_si128(d, c);
+ c = _mm_xor_si128(c, ks[0]);
+ t = _mm_xor_si128(state, ks[0]);
+
+ c = _mm_aesenc_si128(c, ks[1]);
+ t = _mm_aesenc_si128(t, ks[1]);
+ c = _mm_aesenc_si128(c, ks[2]);
+ t = _mm_aesenc_si128(t, ks[2]);
+ c = _mm_aesenc_si128(c, ks[3]);
+ t = _mm_aesenc_si128(t, ks[3]);
+ c = _mm_aesenc_si128(c, ks[4]);
+ t = _mm_aesenc_si128(t, ks[4]);
+ c = _mm_aesenc_si128(c, ks[5]);
+ t = _mm_aesenc_si128(t, ks[5]);
+ c = _mm_aesenc_si128(c, ks[6]);
+ t = _mm_aesenc_si128(t, ks[6]);
+ c = _mm_aesenc_si128(c, ks[7]);
+ t = _mm_aesenc_si128(t, ks[7]);
+ c = _mm_aesenc_si128(c, ks[8]);
+ t = _mm_aesenc_si128(t, ks[8]);
+ c = _mm_aesenc_si128(c, ks[9]);
+ t = _mm_aesenc_si128(t, ks[9]);
+ c = _mm_aesenc_si128(c, ks[10]);
+ t = _mm_aesenc_si128(t, ks[10]);
+ c = _mm_aesenc_si128(c, ks[11]);
+ t = _mm_aesenc_si128(t, ks[11]);
+ c = _mm_aesenc_si128(c, ks[12]);
+ t = _mm_aesenc_si128(t, ks[12]);
+ c = _mm_aesenc_si128(c, ks[13]);
+ t = _mm_aesenc_si128(t, ks[13]);
+
+ c = _mm_aesenclast_si128(c, ks[14]);
+ t = _mm_aesenclast_si128(t, ks[14]);
+
+ t = _mm_xor_si128(t, d);
+ _mm_storeu_si128(bo + i, t);
+
+ state = increment_be(state);
+ }
+
+ if (rem)
+ {
+ c = encrypt_ccm_rem(this->key, rem, state, bi + blocks, bo + blocks, c);
+ }
+ crypt_icv(this, iv, c, icv);
+}
+
+/**
+ * AES-256 CCM decryption/ICV generation
+ */
+static void decrypt_ccm256(private_aesni_ccm_t *this,
+ size_t len, u_char *in, u_char *out, u_char *iv,
+ size_t alen, u_char *assoc, u_char *icv)
+{
+ __m128i *ks, d, t, c, b, state, *bi, *bo;
+ u_int blocks, rem, i;
+
+ c = icv_header(this, len, iv, alen, assoc);
+ build_ctr(this, 1, iv, &b);
+ state = _mm_load_si128(&b);
+ blocks = len / AES_BLOCK_SIZE;
+ rem = len % AES_BLOCK_SIZE;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+
+ ks = this->key->schedule;
+
+ for (i = 0; i < blocks; i++)
+ {
+ d = _mm_loadu_si128(bi + i);
+
+ t = _mm_xor_si128(state, ks[0]);
+
+ t = _mm_aesenc_si128(t, ks[1]);
+ t = _mm_aesenc_si128(t, ks[2]);
+ t = _mm_aesenc_si128(t, ks[3]);
+ t = _mm_aesenc_si128(t, ks[4]);
+ t = _mm_aesenc_si128(t, ks[5]);
+ t = _mm_aesenc_si128(t, ks[6]);
+ t = _mm_aesenc_si128(t, ks[7]);
+ t = _mm_aesenc_si128(t, ks[8]);
+ t = _mm_aesenc_si128(t, ks[9]);
+ t = _mm_aesenc_si128(t, ks[10]);
+ t = _mm_aesenc_si128(t, ks[11]);
+ t = _mm_aesenc_si128(t, ks[12]);
+ t = _mm_aesenc_si128(t, ks[13]);
+
+ t = _mm_aesenclast_si128(t, ks[14]);
+ t = _mm_xor_si128(t, d);
+ _mm_storeu_si128(bo + i, t);
+
+ c = _mm_xor_si128(t, c);
+ c = _mm_xor_si128(c, ks[0]);
+
+ c = _mm_aesenc_si128(c, ks[1]);
+ c = _mm_aesenc_si128(c, ks[2]);
+ c = _mm_aesenc_si128(c, ks[3]);
+ c = _mm_aesenc_si128(c, ks[4]);
+ c = _mm_aesenc_si128(c, ks[5]);
+ c = _mm_aesenc_si128(c, ks[6]);
+ c = _mm_aesenc_si128(c, ks[7]);
+ c = _mm_aesenc_si128(c, ks[8]);
+ c = _mm_aesenc_si128(c, ks[9]);
+ c = _mm_aesenc_si128(c, ks[10]);
+ c = _mm_aesenc_si128(c, ks[11]);
+ c = _mm_aesenc_si128(c, ks[12]);
+ c = _mm_aesenc_si128(c, ks[13]);
+
+ c = _mm_aesenclast_si128(c, ks[14]);
+
+ state = increment_be(state);
+ }
+
+ if (rem)
+ {
+ c = decrypt_ccm_rem(this->key, rem, state, bi + blocks, bo + blocks, c);
+ }
+ crypt_icv(this, iv, c, icv);
+}
+
+METHOD(aead_t, encrypt, bool,
+ private_aesni_ccm_t *this, chunk_t plain, chunk_t assoc, chunk_t iv,
+ chunk_t *encr)
+{
+ u_char *out;
+
+ if (!this->key || iv.len != IV_SIZE)
+ {
+ return FALSE;
+ }
+ out = plain.ptr;
+ if (encr)
+ {
+ *encr = chunk_alloc(plain.len + this->icv_size);
+ out = encr->ptr;
+ }
+ this->encrypt(this, plain.len, plain.ptr, out, iv.ptr,
+ assoc.len, assoc.ptr, out + plain.len);
+ return TRUE;
+}
+
+METHOD(aead_t, decrypt, bool,
+ private_aesni_ccm_t *this, chunk_t encr, chunk_t assoc, chunk_t iv,
+ chunk_t *plain)
+{
+ u_char *out, icv[this->icv_size];
+
+ if (!this->key || iv.len != IV_SIZE || encr.len < this->icv_size)
+ {
+ return FALSE;
+ }
+ encr.len -= this->icv_size;
+ out = encr.ptr;
+ if (plain)
+ {
+ *plain = chunk_alloc(encr.len);
+ out = plain->ptr;
+ }
+
+ this->decrypt(this, encr.len, encr.ptr, out, iv.ptr,
+ assoc.len, assoc.ptr, icv);
+ return memeq_const(icv, encr.ptr + encr.len, this->icv_size);
+}
+
+METHOD(aead_t, get_block_size, size_t,
+ private_aesni_ccm_t *this)
+{
+ return 1;
+}
+
+METHOD(aead_t, get_icv_size, size_t,
+ private_aesni_ccm_t *this)
+{
+ return this->icv_size;
+}
+
+METHOD(aead_t, get_iv_size, size_t,
+ private_aesni_ccm_t *this)
+{
+ return IV_SIZE;
+}
+
+METHOD(aead_t, get_iv_gen, iv_gen_t*,
+ private_aesni_ccm_t *this)
+{
+ return this->iv_gen;
+}
+
+METHOD(aead_t, get_key_size, size_t,
+ private_aesni_ccm_t *this)
+{
+ return this->key_size + SALT_SIZE;
+}
+
+METHOD(aead_t, set_key, bool,
+ private_aesni_ccm_t *this, chunk_t key)
+{
+ if (key.len != this->key_size + SALT_SIZE)
+ {
+ return FALSE;
+ }
+
+ memcpy(this->salt, key.ptr + key.len - SALT_SIZE, SALT_SIZE);
+ key.len -= SALT_SIZE;
+
+ DESTROY_IF(this->key);
+ this->key = aesni_key_create(TRUE, key);
+ return TRUE;
+}
+
+METHOD(aead_t, destroy, void,
+ private_aesni_ccm_t *this)
+{
+ DESTROY_IF(this->key);
+ this->iv_gen->destroy(this->iv_gen);
+ free_align(this);
+}
+
+/**
+ * See header
+ */
+aesni_ccm_t *aesni_ccm_create(encryption_algorithm_t algo,
+ size_t key_size, size_t salt_size)
+{
+ private_aesni_ccm_t *this;
+ size_t icv_size;
+
+ switch (key_size)
+ {
+ case 0:
+ key_size = 16;
+ break;
+ case 16:
+ case 24:
+ case 32:
+ break;
+ default:
+ return NULL;
+ }
+ if (salt_size && salt_size != SALT_SIZE)
+ {
+ /* currently not supported */
+ return NULL;
+ }
+ switch (algo)
+ {
+ case ENCR_AES_CCM_ICV8:
+ algo = ENCR_AES_CBC;
+ icv_size = 8;
+ break;
+ case ENCR_AES_CCM_ICV12:
+ algo = ENCR_AES_CBC;
+ icv_size = 12;
+ break;
+ case ENCR_AES_CCM_ICV16:
+ algo = ENCR_AES_CBC;
+ icv_size = 16;
+ break;
+ default:
+ return NULL;
+ }
+
+ INIT_ALIGN(this, sizeof(__m128i),
+ .public = {
+ .aead = {
+ .encrypt = _encrypt,
+ .decrypt = _decrypt,
+ .get_block_size = _get_block_size,
+ .get_icv_size = _get_icv_size,
+ .get_iv_size = _get_iv_size,
+ .get_iv_gen = _get_iv_gen,
+ .get_key_size = _get_key_size,
+ .set_key = _set_key,
+ .destroy = _destroy,
+ },
+ },
+ .key_size = key_size,
+ .iv_gen = iv_gen_seq_create(),
+ .icv_size = icv_size,
+ );
+
+ switch (key_size)
+ {
+ case 16:
+ this->encrypt = encrypt_ccm128;
+ this->decrypt = decrypt_ccm128;
+ break;
+ case 24:
+ this->encrypt = encrypt_ccm192;
+ this->decrypt = decrypt_ccm192;
+ break;
+ case 32:
+ this->encrypt = encrypt_ccm256;
+ this->decrypt = decrypt_ccm256;
+ break;
+ }
+
+ return &this->public;
+}
diff --git a/src/libstrongswan/plugins/aesni/aesni_ccm.h b/src/libstrongswan/plugins/aesni/aesni_ccm.h
new file mode 100644
index 000000000..69612b515
--- /dev/null
+++ b/src/libstrongswan/plugins/aesni/aesni_ccm.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2015 revosec AG
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+/**
+ * @defgroup aesni_ccm aesni_ccm
+ * @{ @ingroup aesni
+ */
+
+#ifndef AESNI_CCM_H_
+#define AESNI_CCM_H_
+
+#include <library.h>
+
+typedef struct aesni_ccm_t aesni_ccm_t;
+
+/**
+ * CCM mode AEAD using AES-NI
+ */
+struct aesni_ccm_t {
+
+ /**
+ * Implements aead_t interface
+ */
+ aead_t aead;
+};
+
+/**
+ * Create a aesni_ccm instance.
+ *
+ * @param algo encryption algorithm, ENCR_AES_CCM*
+ * @param key_size AES key size, in bytes
+ * @param salt_size size of salt value
+ * @return AES-CCM AEAD, NULL if not supported
+ */
+aesni_ccm_t *aesni_ccm_create(encryption_algorithm_t algo,
+ size_t key_size, size_t salt_size);
+
+#endif /** AESNI_CCM_H_ @}*/
diff --git a/src/libstrongswan/plugins/aesni/aesni_cmac.c b/src/libstrongswan/plugins/aesni/aesni_cmac.c
new file mode 100644
index 000000000..d6a87e6d7
--- /dev/null
+++ b/src/libstrongswan/plugins/aesni/aesni_cmac.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright (C) 2012 Tobias Brunner
+ * Hochschule fuer Technik Rapperswil
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2015 revosec AG
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "aesni_cmac.h"
+#include "aesni_key.h"
+
+#include <crypto/prfs/mac_prf.h>
+#include <crypto/signers/mac_signer.h>
+
+typedef struct private_mac_t private_mac_t;
+
+/**
+ * Private data of a mac_t object.
+ */
+struct private_mac_t {
+
+ /**
+ * Public interface.
+ */
+ mac_t public;
+
+ /**
+ * Key schedule for key K
+ */
+ aesni_key_t *k;
+
+ /**
+ * K1
+ */
+ __m128i k1;
+
+ /**
+ * K2
+ */
+ __m128i k2;
+
+ /**
+ * T
+ */
+ __m128i t;
+
+ /**
+ * remaining, unprocessed bytes in append mode
+ */
+ u_char rem[AES_BLOCK_SIZE];
+
+ /**
+ * number of bytes in remaining
+ */
+ int rem_size;
+};
+
+METHOD(mac_t, get_mac, bool,
+ private_mac_t *this, chunk_t data, u_int8_t *out)
+{
+ __m128i *ks, t, l, *bi;
+ u_int blocks, rem, i;
+
+ if (!this->k)
+ {
+ return FALSE;
+ }
+
+ ks = this->k->schedule;
+ t = this->t;
+
+ if (this->rem_size + data.len > AES_BLOCK_SIZE)
+ {
+ /* T := 0x00000000000000000000000000000000 (initially)
+ * for each block M_i (except the last)
+ * X := T XOR M_i;
+ * T := AES-128(K, X);
+ */
+
+ /* append data to remaining bytes, process block M_1 */
+ memcpy(this->rem + this->rem_size, data.ptr,
+ AES_BLOCK_SIZE - this->rem_size);
+ data = chunk_skip(data, AES_BLOCK_SIZE - this->rem_size);
+
+ t = _mm_xor_si128(t, _mm_loadu_si128((__m128i*)this->rem));
+
+ t = _mm_xor_si128(t, ks[0]);
+ t = _mm_aesenc_si128(t, ks[1]);
+ t = _mm_aesenc_si128(t, ks[2]);
+ t = _mm_aesenc_si128(t, ks[3]);
+ t = _mm_aesenc_si128(t, ks[4]);
+ t = _mm_aesenc_si128(t, ks[5]);
+ t = _mm_aesenc_si128(t, ks[6]);
+ t = _mm_aesenc_si128(t, ks[7]);
+ t = _mm_aesenc_si128(t, ks[8]);
+ t = _mm_aesenc_si128(t, ks[9]);
+ t = _mm_aesenclast_si128(t, ks[10]);
+
+ /* process blocks M_2 ... M_n-1 */
+ bi = (__m128i*)data.ptr;
+ rem = data.len % AES_BLOCK_SIZE;
+ blocks = data.len / AES_BLOCK_SIZE;
+ if (!rem && blocks)
+ { /* don't do last block */
+ rem = AES_BLOCK_SIZE;
+ blocks--;
+ }
+
+ /* process blocks M[2] ... M[n-1] */
+ for (i = 0; i < blocks; i++)
+ {
+ t = _mm_xor_si128(t, _mm_loadu_si128(bi + i));
+
+ t = _mm_xor_si128(t, ks[0]);
+ t = _mm_aesenc_si128(t, ks[1]);
+ t = _mm_aesenc_si128(t, ks[2]);
+ t = _mm_aesenc_si128(t, ks[3]);
+ t = _mm_aesenc_si128(t, ks[4]);
+ t = _mm_aesenc_si128(t, ks[5]);
+ t = _mm_aesenc_si128(t, ks[6]);
+ t = _mm_aesenc_si128(t, ks[7]);
+ t = _mm_aesenc_si128(t, ks[8]);
+ t = _mm_aesenc_si128(t, ks[9]);
+ t = _mm_aesenclast_si128(t, ks[10]);
+ }
+
+ /* store remaining bytes of block M_n */
+ memcpy(this->rem, data.ptr + data.len - rem, rem);
+ this->rem_size = rem;
+ }
+ else
+ {
+ /* no complete block (or last block), just copy into remaining */
+ memcpy(this->rem + this->rem_size, data.ptr, data.len);
+ this->rem_size += data.len;
+ }
+ if (out)
+ {
+ /* if last block is complete
+ * M_last := M_n XOR K1;
+ * else
+ * M_last := padding(M_n) XOR K2;
+ */
+ if (this->rem_size == AES_BLOCK_SIZE)
+ {
+ l = _mm_loadu_si128((__m128i*)this->rem);
+ l = _mm_xor_si128(l, this->k1);
+ }
+ else
+ {
+ /* padding(x) = x || 10^i where i is 128-8*r-1
+ * That is, padding(x) is the concatenation of x and a single '1',
+ * followed by the minimum number of '0's, so that the total length is
+ * equal to 128 bits.
+ */
+ if (this->rem_size < AES_BLOCK_SIZE)
+ {
+ memset(this->rem + this->rem_size, 0,
+ AES_BLOCK_SIZE - this->rem_size);
+ this->rem[this->rem_size] = 0x80;
+ }
+ l = _mm_loadu_si128((__m128i*)this->rem);
+ l = _mm_xor_si128(l, this->k2);
+ }
+ /* T := M_last XOR T;
+ * T := AES-128(K,T);
+ */
+ t = _mm_xor_si128(l, t);
+
+ t = _mm_xor_si128(t, ks[0]);
+ t = _mm_aesenc_si128(t, ks[1]);
+ t = _mm_aesenc_si128(t, ks[2]);
+ t = _mm_aesenc_si128(t, ks[3]);
+ t = _mm_aesenc_si128(t, ks[4]);
+ t = _mm_aesenc_si128(t, ks[5]);
+ t = _mm_aesenc_si128(t, ks[6]);
+ t = _mm_aesenc_si128(t, ks[7]);
+ t = _mm_aesenc_si128(t, ks[8]);
+ t = _mm_aesenc_si128(t, ks[9]);
+ t = _mm_aesenclast_si128(t, ks[10]);
+
+ _mm_storeu_si128((__m128i*)out, t);
+
+ /* reset state */
+ t = _mm_setzero_si128();
+ this->rem_size = 0;
+ }
+ this->t = t;
+ return TRUE;
+}
+
+METHOD(mac_t, get_mac_size, size_t,
+ private_mac_t *this)
+{
+ return AES_BLOCK_SIZE;
+}
+
+/**
+ * Left-shift the given chunk by one bit.
+ */
+static void bit_shift(chunk_t chunk)
+{
+ size_t i;
+
+ for (i = 0; i < chunk.len; i++)
+ {
+ chunk.ptr[i] <<= 1;
+ if (i < chunk.len - 1 && chunk.ptr[i + 1] & 0x80)
+ {
+ chunk.ptr[i] |= 0x01;
+ }
+ }
+}
+
+METHOD(mac_t, set_key, bool,
+ private_mac_t *this, chunk_t key)
+{
+ __m128i rb, msb, l, a;
+ u_int round;
+ chunk_t k;
+
+ this->t = _mm_setzero_si128();
+ this->rem_size = 0;
+
+ /* we support variable keys as defined in RFC 4615 */
+ if (key.len == AES_BLOCK_SIZE)
+ {
+ k = key;
+ }
+ else
+ { /* use cmac recursively to resize longer or shorter keys */
+ k = chunk_alloca(AES_BLOCK_SIZE);
+ memset(k.ptr, 0, k.len);
+ if (!set_key(this, k) || !get_mac(this, key, k.ptr))
+ {
+ return FALSE;
+ }
+ }
+
+ DESTROY_IF(this->k);
+ this->k = aesni_key_create(TRUE, k);
+ if (!this->k)
+ {
+ return FALSE;
+ }
+
+ /*
+ * Rb = 0x00000000000000000000000000000087
+ * L = 0x00000000000000000000000000000000 encrypted with K
+ * if MSB(L) == 0
+ * K1 = L << 1
+ * else
+ * K1 = (L << 1) XOR Rb
+ * if MSB(K1) == 0
+ * K2 = K1 << 1
+ * else
+ * K2 = (K1 << 1) XOR Rb
+ */
+
+ rb = _mm_set_epi32(0x87000000, 0, 0, 0);
+ msb = _mm_set_epi32(0, 0, 0, 0x80);
+
+ l = _mm_setzero_si128();
+
+ l = _mm_xor_si128(l, this->k->schedule[0]);
+ for (round = 1; round < this->k->rounds; round++)
+ {
+ l = _mm_aesenc_si128(l, this->k->schedule[round]);
+ }
+ l = _mm_aesenclast_si128(l, this->k->schedule[this->k->rounds]);
+
+ this->k1 = l;
+ bit_shift(chunk_from_thing(this->k1));
+ a = _mm_and_si128(l, msb);
+ if (memchr(&a, 0x80, 1))
+ {
+ this->k1 = _mm_xor_si128(this->k1, rb);
+ }
+ this->k2 = this->k1;
+ bit_shift(chunk_from_thing(this->k2));
+ a = _mm_and_si128(this->k1, msb);
+ if (memchr(&a, 0x80, 1))
+ {
+ this->k2 = _mm_xor_si128(this->k2, rb);
+ }
+
+ return TRUE;
+}
+
+METHOD(mac_t, destroy, void,
+ private_mac_t *this)
+{
+ DESTROY_IF(this->k);
+ memwipe(&this->k1, sizeof(this->k1));
+ memwipe(&this->k2, sizeof(this->k2));
+ free_align(this);
+}
+
+/*
+ * Described in header
+ */
+mac_t *aesni_cmac_create(encryption_algorithm_t algo, size_t key_size)
+{
+ private_mac_t *this;
+
+ INIT_ALIGN(this, sizeof(__m128i),
+ .public = {
+ .get_mac = _get_mac,
+ .get_mac_size = _get_mac_size,
+ .set_key = _set_key,
+ .destroy = _destroy,
+ },
+ );
+
+ return &this->public;
+}
+
+/*
+ * Described in header.
+ */
+prf_t *aesni_cmac_prf_create(pseudo_random_function_t algo)
+{
+ mac_t *cmac;
+
+ switch (algo)
+ {
+ case PRF_AES128_CMAC:
+ cmac = aesni_cmac_create(ENCR_AES_CBC, 16);
+ break;
+ default:
+ return NULL;
+ }
+ if (cmac)
+ {
+ return mac_prf_create(cmac);
+ }
+ return NULL;
+}
+
+/*
+ * Described in header
+ */
+signer_t *aesni_cmac_signer_create(integrity_algorithm_t algo)
+{
+ size_t truncation;
+ mac_t *cmac;
+
+ switch (algo)
+ {
+ case AUTH_AES_CMAC_96:
+ cmac = aesni_cmac_create(ENCR_AES_CBC, 16);
+ truncation = 12;
+ break;
+ default:
+ return NULL;
+ }
+ if (cmac)
+ {
+ return mac_signer_create(cmac, truncation);
+ }
+ return NULL;
+}
diff --git a/src/libstrongswan/plugins/aesni/aesni_cmac.h b/src/libstrongswan/plugins/aesni/aesni_cmac.h
new file mode 100644
index 000000000..5f0af7393
--- /dev/null
+++ b/src/libstrongswan/plugins/aesni/aesni_cmac.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2015 revosec AG
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+/**
+ * @defgroup aesni_xcbc aesni_xcbc
+ * @{ @ingroup aesni
+ */
+
+#ifndef CMAC_H_
+#define CMAC_H_
+
+#include <crypto/mac.h>
+#include <crypto/prfs/prf.h>
+#include <crypto/signers/signer.h>
+
+/**
+ * Create a generic mac_t object using AESNI CMAC.
+ *
+ * @param algo underlying encryption algorithm
+ * @param key_size size of encryption key, in bytes
+ */
+mac_t *aesni_cmac_create(encryption_algorithm_t algo, size_t key_size);
+
+/**
+ * Creates a new prf_t object based AESNI CMAC.
+ *
+ * @param algo algorithm to implement
+ * @return prf_t object, NULL if not supported
+ */
+prf_t *aesni_cmac_prf_create(pseudo_random_function_t algo);
+
+/**
+ * Creates a new signer_t object based on AESNI CMAC.
+ *
+ * @param algo algorithm to implement
+ * @return signer_t, NULL if not supported
+ */
+signer_t *aesni_cmac_signer_create(integrity_algorithm_t algo);
+
+#endif /** CMAC_H_ @}*/
diff --git a/src/libstrongswan/plugins/aesni/aesni_ctr.c b/src/libstrongswan/plugins/aesni/aesni_ctr.c
new file mode 100644
index 000000000..989813814
--- /dev/null
+++ b/src/libstrongswan/plugins/aesni/aesni_ctr.c
@@ -0,0 +1,643 @@
+/*
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2015 revosec AG
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "aesni_ctr.h"
+#include "aesni_key.h"
+
+#include <tmmintrin.h>
+
+/**
+ * Pipeline parallelism we use for CTR en/decryption
+ */
+#define CTR_CRYPT_PARALLELISM 4
+
+typedef struct private_aesni_ctr_t private_aesni_ctr_t;
+
+/**
+ * CTR en/decryption method type
+ */
+typedef void (*aesni_ctr_fn_t)(private_aesni_ctr_t*, size_t, u_char*, u_char*);
+
+/**
+ * Private data of an aesni_ctr_t object.
+ */
+struct private_aesni_ctr_t {
+
+ /**
+ * Public aesni_ctr_t interface.
+ */
+ aesni_ctr_t public;
+
+ /**
+ * Key size
+ */
+ u_int key_size;
+
+ /**
+ * Key schedule
+ */
+ aesni_key_t *key;
+
+ /**
+ * Encryption method
+ */
+ aesni_ctr_fn_t crypt;
+
+ /**
+ * Counter state
+ */
+ struct {
+ char nonce[4];
+ char iv[8];
+ u_int32_t counter;
+ } __attribute__((packed, aligned(sizeof(__m128i)))) state;
+};
+
+/**
+ * Do big-endian increment on x
+ */
+static inline __m128i increment_be(__m128i x)
+{
+ __m128i swap;
+
+ swap = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+
+ x = _mm_shuffle_epi8(x, swap);
+ x = _mm_add_epi64(x, _mm_set_epi32(0, 0, 0, 1));
+ x = _mm_shuffle_epi8(x, swap);
+
+ return x;
+}
+
+/**
+ * AES-128 CTR encryption
+ */
+static void encrypt_ctr128(private_aesni_ctr_t *this,
+ size_t len, u_char *in, u_char *out)
+{
+ __m128i t1, t2, t3, t4;
+ __m128i d1, d2, d3, d4;
+ __m128i *ks, state, b, *bi, *bo;
+ u_int i, blocks, pblocks, rem;
+
+ state = _mm_load_si128((__m128i*)&this->state);
+ blocks = len / AES_BLOCK_SIZE;
+ pblocks = blocks - (blocks % CTR_CRYPT_PARALLELISM);
+ rem = len % AES_BLOCK_SIZE;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+
+ ks = this->key->schedule;
+
+ for (i = 0; i < pblocks; i += CTR_CRYPT_PARALLELISM)
+ {
+ d1 = _mm_loadu_si128(bi + i + 0);
+ d2 = _mm_loadu_si128(bi + i + 1);
+ d3 = _mm_loadu_si128(bi + i + 2);
+ d4 = _mm_loadu_si128(bi + i + 3);
+
+ t1 = _mm_xor_si128(state, ks[0]);
+ state = increment_be(state);
+ t2 = _mm_xor_si128(state, ks[0]);
+ state = increment_be(state);
+ t3 = _mm_xor_si128(state, ks[0]);
+ state = increment_be(state);
+ t4 = _mm_xor_si128(state, ks[0]);
+ state = increment_be(state);
+
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t2 = _mm_aesenc_si128(t2, ks[1]);
+ t3 = _mm_aesenc_si128(t3, ks[1]);
+ t4 = _mm_aesenc_si128(t4, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t2 = _mm_aesenc_si128(t2, ks[2]);
+ t3 = _mm_aesenc_si128(t3, ks[2]);
+ t4 = _mm_aesenc_si128(t4, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t2 = _mm_aesenc_si128(t2, ks[3]);
+ t3 = _mm_aesenc_si128(t3, ks[3]);
+ t4 = _mm_aesenc_si128(t4, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t2 = _mm_aesenc_si128(t2, ks[4]);
+ t3 = _mm_aesenc_si128(t3, ks[4]);
+ t4 = _mm_aesenc_si128(t4, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t2 = _mm_aesenc_si128(t2, ks[5]);
+ t3 = _mm_aesenc_si128(t3, ks[5]);
+ t4 = _mm_aesenc_si128(t4, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t2 = _mm_aesenc_si128(t2, ks[6]);
+ t3 = _mm_aesenc_si128(t3, ks[6]);
+ t4 = _mm_aesenc_si128(t4, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t2 = _mm_aesenc_si128(t2, ks[7]);
+ t3 = _mm_aesenc_si128(t3, ks[7]);
+ t4 = _mm_aesenc_si128(t4, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t2 = _mm_aesenc_si128(t2, ks[8]);
+ t3 = _mm_aesenc_si128(t3, ks[8]);
+ t4 = _mm_aesenc_si128(t4, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t2 = _mm_aesenc_si128(t2, ks[9]);
+ t3 = _mm_aesenc_si128(t3, ks[9]);
+ t4 = _mm_aesenc_si128(t4, ks[9]);
+
+ t1 = _mm_aesenclast_si128(t1, ks[10]);
+ t2 = _mm_aesenclast_si128(t2, ks[10]);
+ t3 = _mm_aesenclast_si128(t3, ks[10]);
+ t4 = _mm_aesenclast_si128(t4, ks[10]);
+ t1 = _mm_xor_si128(t1, d1);
+ t2 = _mm_xor_si128(t2, d2);
+ t3 = _mm_xor_si128(t3, d3);
+ t4 = _mm_xor_si128(t4, d4);
+ _mm_storeu_si128(bo + i + 0, t1);
+ _mm_storeu_si128(bo + i + 1, t2);
+ _mm_storeu_si128(bo + i + 2, t3);
+ _mm_storeu_si128(bo + i + 3, t4);
+ }
+
+ for (i = pblocks; i < blocks; i++)
+ {
+ d1 = _mm_loadu_si128(bi + i);
+
+ t1 = _mm_xor_si128(state, ks[0]);
+ state = increment_be(state);
+
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+
+ t1 = _mm_aesenclast_si128(t1, ks[10]);
+ t1 = _mm_xor_si128(t1, d1);
+ _mm_storeu_si128(bo + i, t1);
+ }
+
+ if (rem)
+ {
+ memset(&b, 0, sizeof(b));
+ memcpy(&b, bi + blocks, rem);
+
+ d1 = _mm_loadu_si128(&b);
+ t1 = _mm_xor_si128(state, ks[0]);
+
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+
+ t1 = _mm_aesenclast_si128(t1, ks[10]);
+ t1 = _mm_xor_si128(t1, d1);
+ _mm_storeu_si128(&b, t1);
+
+ memcpy(bo + blocks, &b, rem);
+ }
+}
+
+/**
+ * AES-192 CTR encryption
+ */
+static void encrypt_ctr192(private_aesni_ctr_t *this,
+ size_t len, u_char *in, u_char *out)
+{
+ __m128i t1, t2, t3, t4;
+ __m128i d1, d2, d3, d4;
+ __m128i *ks, state, b, *bi, *bo;
+ u_int i, blocks, pblocks, rem;
+
+ state = _mm_load_si128((__m128i*)&this->state);
+ blocks = len / AES_BLOCK_SIZE;
+ pblocks = blocks - (blocks % CTR_CRYPT_PARALLELISM);
+ rem = len % AES_BLOCK_SIZE;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+
+ ks = this->key->schedule;
+
+ for (i = 0; i < pblocks; i += CTR_CRYPT_PARALLELISM)
+ {
+ d1 = _mm_loadu_si128(bi + i + 0);
+ d2 = _mm_loadu_si128(bi + i + 1);
+ d3 = _mm_loadu_si128(bi + i + 2);
+ d4 = _mm_loadu_si128(bi + i + 3);
+
+ t1 = _mm_xor_si128(state, ks[0]);
+ state = increment_be(state);
+ t2 = _mm_xor_si128(state, ks[0]);
+ state = increment_be(state);
+ t3 = _mm_xor_si128(state, ks[0]);
+ state = increment_be(state);
+ t4 = _mm_xor_si128(state, ks[0]);
+ state = increment_be(state);
+
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t2 = _mm_aesenc_si128(t2, ks[1]);
+ t3 = _mm_aesenc_si128(t3, ks[1]);
+ t4 = _mm_aesenc_si128(t4, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t2 = _mm_aesenc_si128(t2, ks[2]);
+ t3 = _mm_aesenc_si128(t3, ks[2]);
+ t4 = _mm_aesenc_si128(t4, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t2 = _mm_aesenc_si128(t2, ks[3]);
+ t3 = _mm_aesenc_si128(t3, ks[3]);
+ t4 = _mm_aesenc_si128(t4, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t2 = _mm_aesenc_si128(t2, ks[4]);
+ t3 = _mm_aesenc_si128(t3, ks[4]);
+ t4 = _mm_aesenc_si128(t4, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t2 = _mm_aesenc_si128(t2, ks[5]);
+ t3 = _mm_aesenc_si128(t3, ks[5]);
+ t4 = _mm_aesenc_si128(t4, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t2 = _mm_aesenc_si128(t2, ks[6]);
+ t3 = _mm_aesenc_si128(t3, ks[6]);
+ t4 = _mm_aesenc_si128(t4, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t2 = _mm_aesenc_si128(t2, ks[7]);
+ t3 = _mm_aesenc_si128(t3, ks[7]);
+ t4 = _mm_aesenc_si128(t4, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t2 = _mm_aesenc_si128(t2, ks[8]);
+ t3 = _mm_aesenc_si128(t3, ks[8]);
+ t4 = _mm_aesenc_si128(t4, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t2 = _mm_aesenc_si128(t2, ks[9]);
+ t3 = _mm_aesenc_si128(t3, ks[9]);
+ t4 = _mm_aesenc_si128(t4, ks[9]);
+ t1 = _mm_aesenc_si128(t1, ks[10]);
+ t2 = _mm_aesenc_si128(t2, ks[10]);
+ t3 = _mm_aesenc_si128(t3, ks[10]);
+ t4 = _mm_aesenc_si128(t4, ks[10]);
+ t1 = _mm_aesenc_si128(t1, ks[11]);
+ t2 = _mm_aesenc_si128(t2, ks[11]);
+ t3 = _mm_aesenc_si128(t3, ks[11]);
+ t4 = _mm_aesenc_si128(t4, ks[11]);
+
+ t1 = _mm_aesenclast_si128(t1, ks[12]);
+ t2 = _mm_aesenclast_si128(t2, ks[12]);
+ t3 = _mm_aesenclast_si128(t3, ks[12]);
+ t4 = _mm_aesenclast_si128(t4, ks[12]);
+ t1 = _mm_xor_si128(t1, d1);
+ t2 = _mm_xor_si128(t2, d2);
+ t3 = _mm_xor_si128(t3, d3);
+ t4 = _mm_xor_si128(t4, d4);
+ _mm_storeu_si128(bo + i + 0, t1);
+ _mm_storeu_si128(bo + i + 1, t2);
+ _mm_storeu_si128(bo + i + 2, t3);
+ _mm_storeu_si128(bo + i + 3, t4);
+ }
+
+ for (i = pblocks; i < blocks; i++)
+ {
+ d1 = _mm_loadu_si128(bi + i);
+
+ t1 = _mm_xor_si128(state, ks[0]);
+ state = increment_be(state);
+
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t1 = _mm_aesenc_si128(t1, ks[10]);
+ t1 = _mm_aesenc_si128(t1, ks[11]);
+
+ t1 = _mm_aesenclast_si128(t1, ks[12]);
+ t1 = _mm_xor_si128(t1, d1);
+ _mm_storeu_si128(bo + i, t1);
+ }
+
+ if (rem)
+ {
+ memset(&b, 0, sizeof(b));
+ memcpy(&b, bi + blocks, rem);
+
+ d1 = _mm_loadu_si128(&b);
+ t1 = _mm_xor_si128(state, ks[0]);
+
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t1 = _mm_aesenc_si128(t1, ks[10]);
+ t1 = _mm_aesenc_si128(t1, ks[11]);
+
+ t1 = _mm_aesenclast_si128(t1, ks[12]);
+ t1 = _mm_xor_si128(t1, d1);
+ _mm_storeu_si128(&b, t1);
+
+ memcpy(bo + blocks, &b, rem);
+ }
+}
+
+/**
+ * AES-256 CTR encryption
+ */
+static void encrypt_ctr256(private_aesni_ctr_t *this,
+ size_t len, u_char *in, u_char *out)
+{
+ __m128i t1, t2, t3, t4;
+ __m128i d1, d2, d3, d4;
+ __m128i *ks, state, b, *bi, *bo;
+ u_int i, blocks, pblocks, rem;
+
+ state = _mm_load_si128((__m128i*)&this->state);
+ blocks = len / AES_BLOCK_SIZE;
+ pblocks = blocks - (blocks % CTR_CRYPT_PARALLELISM);
+ rem = len % AES_BLOCK_SIZE;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+
+ ks = this->key->schedule;
+
+ for (i = 0; i < pblocks; i += CTR_CRYPT_PARALLELISM)
+ {
+ d1 = _mm_loadu_si128(bi + i + 0);
+ d2 = _mm_loadu_si128(bi + i + 1);
+ d3 = _mm_loadu_si128(bi + i + 2);
+ d4 = _mm_loadu_si128(bi + i + 3);
+
+ t1 = _mm_xor_si128(state, ks[0]);
+ state = increment_be(state);
+ t2 = _mm_xor_si128(state, ks[0]);
+ state = increment_be(state);
+ t3 = _mm_xor_si128(state, ks[0]);
+ state = increment_be(state);
+ t4 = _mm_xor_si128(state, ks[0]);
+ state = increment_be(state);
+
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t2 = _mm_aesenc_si128(t2, ks[1]);
+ t3 = _mm_aesenc_si128(t3, ks[1]);
+ t4 = _mm_aesenc_si128(t4, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t2 = _mm_aesenc_si128(t2, ks[2]);
+ t3 = _mm_aesenc_si128(t3, ks[2]);
+ t4 = _mm_aesenc_si128(t4, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t2 = _mm_aesenc_si128(t2, ks[3]);
+ t3 = _mm_aesenc_si128(t3, ks[3]);
+ t4 = _mm_aesenc_si128(t4, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t2 = _mm_aesenc_si128(t2, ks[4]);
+ t3 = _mm_aesenc_si128(t3, ks[4]);
+ t4 = _mm_aesenc_si128(t4, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t2 = _mm_aesenc_si128(t2, ks[5]);
+ t3 = _mm_aesenc_si128(t3, ks[5]);
+ t4 = _mm_aesenc_si128(t4, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t2 = _mm_aesenc_si128(t2, ks[6]);
+ t3 = _mm_aesenc_si128(t3, ks[6]);
+ t4 = _mm_aesenc_si128(t4, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t2 = _mm_aesenc_si128(t2, ks[7]);
+ t3 = _mm_aesenc_si128(t3, ks[7]);
+ t4 = _mm_aesenc_si128(t4, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t2 = _mm_aesenc_si128(t2, ks[8]);
+ t3 = _mm_aesenc_si128(t3, ks[8]);
+ t4 = _mm_aesenc_si128(t4, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t2 = _mm_aesenc_si128(t2, ks[9]);
+ t3 = _mm_aesenc_si128(t3, ks[9]);
+ t4 = _mm_aesenc_si128(t4, ks[9]);
+ t1 = _mm_aesenc_si128(t1, ks[10]);
+ t2 = _mm_aesenc_si128(t2, ks[10]);
+ t3 = _mm_aesenc_si128(t3, ks[10]);
+ t4 = _mm_aesenc_si128(t4, ks[10]);
+ t1 = _mm_aesenc_si128(t1, ks[11]);
+ t2 = _mm_aesenc_si128(t2, ks[11]);
+ t3 = _mm_aesenc_si128(t3, ks[11]);
+ t4 = _mm_aesenc_si128(t4, ks[11]);
+ t1 = _mm_aesenc_si128(t1, ks[12]);
+ t2 = _mm_aesenc_si128(t2, ks[12]);
+ t3 = _mm_aesenc_si128(t3, ks[12]);
+ t4 = _mm_aesenc_si128(t4, ks[12]);
+ t1 = _mm_aesenc_si128(t1, ks[13]);
+ t2 = _mm_aesenc_si128(t2, ks[13]);
+ t3 = _mm_aesenc_si128(t3, ks[13]);
+ t4 = _mm_aesenc_si128(t4, ks[13]);
+
+ t1 = _mm_aesenclast_si128(t1, ks[14]);
+ t2 = _mm_aesenclast_si128(t2, ks[14]);
+ t3 = _mm_aesenclast_si128(t3, ks[14]);
+ t4 = _mm_aesenclast_si128(t4, ks[14]);
+ t1 = _mm_xor_si128(t1, d1);
+ t2 = _mm_xor_si128(t2, d2);
+ t3 = _mm_xor_si128(t3, d3);
+ t4 = _mm_xor_si128(t4, d4);
+ _mm_storeu_si128(bo + i + 0, t1);
+ _mm_storeu_si128(bo + i + 1, t2);
+ _mm_storeu_si128(bo + i + 2, t3);
+ _mm_storeu_si128(bo + i + 3, t4);
+ }
+
+ for (i = pblocks; i < blocks; i++)
+ {
+ d1 = _mm_loadu_si128(bi + i);
+
+ t1 = _mm_xor_si128(state, ks[0]);
+ state = increment_be(state);
+
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t1 = _mm_aesenc_si128(t1, ks[10]);
+ t1 = _mm_aesenc_si128(t1, ks[11]);
+ t1 = _mm_aesenc_si128(t1, ks[12]);
+ t1 = _mm_aesenc_si128(t1, ks[13]);
+
+ t1 = _mm_aesenclast_si128(t1, ks[14]);
+ t1 = _mm_xor_si128(t1, d1);
+ _mm_storeu_si128(bo + i, t1);
+ }
+
+ if (rem)
+ {
+ memset(&b, 0, sizeof(b));
+ memcpy(&b, bi + blocks, rem);
+
+ d1 = _mm_loadu_si128(&b);
+ t1 = _mm_xor_si128(state, ks[0]);
+
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t1 = _mm_aesenc_si128(t1, ks[10]);
+ t1 = _mm_aesenc_si128(t1, ks[11]);
+ t1 = _mm_aesenc_si128(t1, ks[12]);
+ t1 = _mm_aesenc_si128(t1, ks[13]);
+
+ t1 = _mm_aesenclast_si128(t1, ks[14]);
+ t1 = _mm_xor_si128(t1, d1);
+ _mm_storeu_si128(&b, t1);
+
+ memcpy(bo + blocks, &b, rem);
+ }
+}
+
+METHOD(crypter_t, crypt, bool,
+ private_aesni_ctr_t *this, chunk_t in, chunk_t iv, chunk_t *out)
+{
+ u_char *buf;
+
+ if (!this->key || iv.len != sizeof(this->state.iv))
+ {
+ return FALSE;
+ }
+ memcpy(this->state.iv, iv.ptr, sizeof(this->state.iv));
+ this->state.counter = htonl(1);
+
+ buf = in.ptr;
+ if (out)
+ {
+ *out = chunk_alloc(in.len);
+ buf = out->ptr;
+ }
+ this->crypt(this, in.len, in.ptr, buf);
+ return TRUE;
+}
+
+METHOD(crypter_t, get_block_size, size_t,
+ private_aesni_ctr_t *this)
+{
+ return 1;
+}
+
+METHOD(crypter_t, get_iv_size, size_t,
+ private_aesni_ctr_t *this)
+{
+ return sizeof(this->state.iv);
+}
+
+METHOD(crypter_t, get_key_size, size_t,
+ private_aesni_ctr_t *this)
+{
+ return this->key_size + sizeof(this->state.nonce);
+}
+
+METHOD(crypter_t, set_key, bool,
+ private_aesni_ctr_t *this, chunk_t key)
+{
+ if (key.len != get_key_size(this))
+ {
+ return FALSE;
+ }
+
+ memcpy(this->state.nonce, key.ptr + key.len - sizeof(this->state.nonce),
+ sizeof(this->state.nonce));
+ key.len -= sizeof(this->state.nonce);
+
+ DESTROY_IF(this->key);
+ this->key = aesni_key_create(TRUE, key);
+
+ return this->key;
+}
+
+METHOD(crypter_t, destroy, void,
+ private_aesni_ctr_t *this)
+{
+ DESTROY_IF(this->key);
+ free_align(this);
+}
+
+/**
+ * See header
+ */
+aesni_ctr_t *aesni_ctr_create(encryption_algorithm_t algo, size_t key_size)
+{
+ private_aesni_ctr_t *this;
+
+ if (algo != ENCR_AES_CTR)
+ {
+ return NULL;
+ }
+ switch (key_size)
+ {
+ case 0:
+ key_size = 16;
+ break;
+ case 16:
+ case 24:
+ case 32:
+ break;
+ default:
+ return NULL;
+ }
+
+ INIT_ALIGN(this, sizeof(__m128i),
+ .public = {
+ .crypter = {
+ .encrypt = _crypt,
+ .decrypt = _crypt,
+ .get_block_size = _get_block_size,
+ .get_iv_size = _get_iv_size,
+ .get_key_size = _get_key_size,
+ .set_key = _set_key,
+ .destroy = _destroy,
+ },
+ },
+ .key_size = key_size,
+ );
+
+ switch (key_size)
+ {
+ case 16:
+ this->crypt = encrypt_ctr128;
+ break;
+ case 24:
+ this->crypt = encrypt_ctr192;
+ break;
+ case 32:
+ this->crypt = encrypt_ctr256;
+ break;
+ }
+
+ return &this->public;
+}
diff --git a/src/libstrongswan/plugins/aesni/aesni_ctr.h b/src/libstrongswan/plugins/aesni/aesni_ctr.h
new file mode 100644
index 000000000..6126a2c75
--- /dev/null
+++ b/src/libstrongswan/plugins/aesni/aesni_ctr.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2015 revosec AG
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+/**
+ * @defgroup aesni_ctr aesni_ctr
+ * @{ @ingroup aesni
+ */
+
+#ifndef AESNI_CTR_H_
+#define AESNI_CTR_H_
+
+#include <library.h>
+
+typedef struct aesni_ctr_t aesni_ctr_t;
+
+/**
+ * CTR mode crypter using AES-NI
+ */
+struct aesni_ctr_t {
+
+ /**
+ * Implements crypter interface
+ */
+ crypter_t crypter;
+};
+
+/**
+ * Create a aesni_ctr instance.
+ *
+ * @param algo encryption algorithm, AES_ENCR_CTR
+ * @param key_size AES key size, in bytes
+ * @return AES-CTR crypter, NULL if not supported
+ */
+aesni_ctr_t *aesni_ctr_create(encryption_algorithm_t algo, size_t key_size);
+
+#endif /** AESNI_CTR_H_ @}*/
diff --git a/src/libstrongswan/plugins/aesni/aesni_gcm.c b/src/libstrongswan/plugins/aesni/aesni_gcm.c
new file mode 100644
index 000000000..53c0b144e
--- /dev/null
+++ b/src/libstrongswan/plugins/aesni/aesni_gcm.c
@@ -0,0 +1,1447 @@
+/*
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2015 revosec AG
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "aesni_gcm.h"
+#include "aesni_key.h"
+
+#include <crypto/iv/iv_gen_seq.h>
+
+#include <tmmintrin.h>
+
+#define NONCE_SIZE 12
+#define IV_SIZE 8
+#define SALT_SIZE (NONCE_SIZE - IV_SIZE)
+
+/**
+ * Parallel pipelining
+ */
+#define GCM_CRYPT_PARALLELISM 4
+
+typedef struct private_aesni_gcm_t private_aesni_gcm_t;
+
+/**
+ * GCM en/decryption method type
+ */
+typedef void (*aesni_gcm_fn_t)(private_aesni_gcm_t*, size_t, u_char*, u_char*,
+ u_char*, size_t, u_char*, u_char*);
+
+/**
+ * Private data of an aesni_gcm_t object.
+ */
+struct private_aesni_gcm_t {
+
+ /**
+ * Public aesni_gcm_t interface.
+ */
+ aesni_gcm_t public;
+
+ /**
+ * Encryption key schedule
+ */
+ aesni_key_t *key;
+
+ /**
+ * IV generator.
+ */
+ iv_gen_t *iv_gen;
+
+ /**
+ * Length of the integrity check value
+ */
+ size_t icv_size;
+
+ /**
+ * Length of the key in bytes
+ */
+ size_t key_size;
+
+ /**
+ * GCM encryption function
+ */
+ aesni_gcm_fn_t encrypt;
+
+ /**
+ * GCM decryption function
+ */
+ aesni_gcm_fn_t decrypt;
+
+ /**
+ * salt to add to nonce
+ */
+ u_char salt[SALT_SIZE];
+
+ /**
+ * GHASH subkey H, big-endian
+ */
+ __m128i h;
+
+ /**
+ * GHASH key H^2, big-endian
+ */
+ __m128i hh;
+
+ /**
+ * GHASH key H^3, big-endian
+ */
+ __m128i hhh;
+
+ /**
+ * GHASH key H^4, big-endian
+ */
+ __m128i hhhh;
+};
+
+/**
+ * Byte-swap a 128-bit integer
+ */
+static inline __m128i swap128(__m128i x)
+{
+ return _mm_shuffle_epi8(x,
+ _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
+}
+
+/**
+ * Multiply two blocks in GF128
+ */
+static __m128i mult_block(__m128i h, __m128i y)
+{
+ __m128i t1, t2, t3, t4, t5, t6;
+
+ y = swap128(y);
+
+ t1 = _mm_clmulepi64_si128(h, y, 0x00);
+ t2 = _mm_clmulepi64_si128(h, y, 0x01);
+ t3 = _mm_clmulepi64_si128(h, y, 0x10);
+ t4 = _mm_clmulepi64_si128(h, y, 0x11);
+
+ t2 = _mm_xor_si128(t2, t3);
+ t3 = _mm_slli_si128(t2, 8);
+ t2 = _mm_srli_si128(t2, 8);
+ t1 = _mm_xor_si128(t1, t3);
+ t4 = _mm_xor_si128(t4, t2);
+
+ t5 = _mm_srli_epi32(t1, 31);
+ t1 = _mm_slli_epi32(t1, 1);
+ t6 = _mm_srli_epi32(t4, 31);
+ t4 = _mm_slli_epi32(t4, 1);
+
+ t3 = _mm_srli_si128(t5, 12);
+ t6 = _mm_slli_si128(t6, 4);
+ t5 = _mm_slli_si128(t5, 4);
+ t1 = _mm_or_si128(t1, t5);
+ t4 = _mm_or_si128(t4, t6);
+ t4 = _mm_or_si128(t4, t3);
+
+ t5 = _mm_slli_epi32(t1, 31);
+ t6 = _mm_slli_epi32(t1, 30);
+ t3 = _mm_slli_epi32(t1, 25);
+
+ t5 = _mm_xor_si128(t5, t6);
+ t5 = _mm_xor_si128(t5, t3);
+ t6 = _mm_srli_si128(t5, 4);
+ t4 = _mm_xor_si128(t4, t6);
+ t5 = _mm_slli_si128(t5, 12);
+ t1 = _mm_xor_si128(t1, t5);
+ t4 = _mm_xor_si128(t4, t1);
+
+ t5 = _mm_srli_epi32(t1, 1);
+ t2 = _mm_srli_epi32(t1, 2);
+ t3 = _mm_srli_epi32(t1, 7);
+ t4 = _mm_xor_si128(t4, t2);
+ t4 = _mm_xor_si128(t4, t3);
+ t4 = _mm_xor_si128(t4, t5);
+
+ return swap128(t4);
+}
+
+/**
+ * Multiply four consecutive blocks by their respective GHASH key, XOR
+ */
+static inline __m128i mult4xor(__m128i h1, __m128i h2, __m128i h3, __m128i h4,
+ __m128i d1, __m128i d2, __m128i d3, __m128i d4)
+{
+ __m128i t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
+
+ d1 = swap128(d1);
+ d2 = swap128(d2);
+ d3 = swap128(d3);
+ d4 = swap128(d4);
+
+ t0 = _mm_clmulepi64_si128(h1, d1, 0x00);
+ t1 = _mm_clmulepi64_si128(h2, d2, 0x00);
+ t2 = _mm_clmulepi64_si128(h3, d3, 0x00);
+ t3 = _mm_clmulepi64_si128(h4, d4, 0x00);
+ t8 = _mm_xor_si128(t0, t1);
+ t8 = _mm_xor_si128(t8, t2);
+ t8 = _mm_xor_si128(t8, t3);
+
+ t4 = _mm_clmulepi64_si128(h1, d1, 0x11);
+ t5 = _mm_clmulepi64_si128(h2, d2, 0x11);
+ t6 = _mm_clmulepi64_si128(h3, d3, 0x11);
+ t7 = _mm_clmulepi64_si128(h4, d4, 0x11);
+ t9 = _mm_xor_si128(t4, t5);
+ t9 = _mm_xor_si128(t9, t6);
+ t9 = _mm_xor_si128(t9, t7);
+
+ t0 = _mm_shuffle_epi32(h1, 78);
+ t4 = _mm_shuffle_epi32(d1, 78);
+ t0 = _mm_xor_si128(t0, h1);
+ t4 = _mm_xor_si128(t4, d1);
+ t1 = _mm_shuffle_epi32(h2, 78);
+ t5 = _mm_shuffle_epi32(d2, 78);
+ t1 = _mm_xor_si128(t1, h2);
+ t5 = _mm_xor_si128(t5, d2);
+ t2 = _mm_shuffle_epi32(h3, 78);
+ t6 = _mm_shuffle_epi32(d3, 78);
+ t2 = _mm_xor_si128(t2, h3);
+ t6 = _mm_xor_si128(t6, d3);
+ t3 = _mm_shuffle_epi32(h4, 78);
+ t7 = _mm_shuffle_epi32(d4, 78);
+ t3 = _mm_xor_si128(t3, h4);
+ t7 = _mm_xor_si128(t7, d4);
+
+ t0 = _mm_clmulepi64_si128(t0, t4, 0x00);
+ t1 = _mm_clmulepi64_si128(t1, t5, 0x00);
+ t2 = _mm_clmulepi64_si128(t2, t6, 0x00);
+ t3 = _mm_clmulepi64_si128(t3, t7, 0x00);
+ t0 = _mm_xor_si128(t0, t8);
+ t0 = _mm_xor_si128(t0, t9);
+ t0 = _mm_xor_si128(t1, t0);
+ t0 = _mm_xor_si128(t2, t0);
+
+ t0 = _mm_xor_si128(t3, t0);
+ t4 = _mm_slli_si128(t0, 8);
+ t0 = _mm_srli_si128(t0, 8);
+ t3 = _mm_xor_si128(t4, t8);
+ t6 = _mm_xor_si128(t0, t9);
+ t7 = _mm_srli_epi32(t3, 31);
+ t8 = _mm_srli_epi32(t6, 31);
+ t3 = _mm_slli_epi32(t3, 1);
+ t6 = _mm_slli_epi32(t6, 1);
+ t9 = _mm_srli_si128(t7, 12);
+ t8 = _mm_slli_si128(t8, 4);
+ t7 = _mm_slli_si128(t7, 4);
+ t3 = _mm_or_si128(t3, t7);
+ t6 = _mm_or_si128(t6, t8);
+ t6 = _mm_or_si128(t6, t9);
+ t7 = _mm_slli_epi32(t3, 31);
+ t8 = _mm_slli_epi32(t3, 30);
+ t9 = _mm_slli_epi32(t3, 25);
+ t7 = _mm_xor_si128(t7, t8);
+ t7 = _mm_xor_si128(t7, t9);
+ t8 = _mm_srli_si128(t7, 4);
+ t7 = _mm_slli_si128(t7, 12);
+ t3 = _mm_xor_si128(t3, t7);
+ t2 = _mm_srli_epi32(t3, 1);
+ t4 = _mm_srli_epi32(t3, 2);
+ t5 = _mm_srli_epi32(t3, 7);
+ t2 = _mm_xor_si128(t2, t4);
+ t2 = _mm_xor_si128(t2, t5);
+ t2 = _mm_xor_si128(t2, t8);
+ t3 = _mm_xor_si128(t3, t2);
+ t6 = _mm_xor_si128(t6, t3);
+
+ return swap128(t6);
+}
+
+/**
+ * GHASH on a single block
+ */
+static __m128i ghash(__m128i h, __m128i y, __m128i x)
+{
+ return mult_block(h, _mm_xor_si128(y, x));
+}
+
+/**
+ * Start constructing the ICV for the associated data
+ */
+static __m128i icv_header(private_aesni_gcm_t *this, void *assoc, size_t alen)
+{
+ u_int blocks, pblocks, rem, i;
+ __m128i h1, h2, h3, h4, d1, d2, d3, d4;
+ __m128i y, last, *ab;
+
+ h1 = this->hhhh;
+ h2 = this->hhh;
+ h3 = this->hh;
+ h4 = this->h;
+
+ y = _mm_setzero_si128();
+ ab = assoc;
+ blocks = alen / AES_BLOCK_SIZE;
+ pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM);
+ rem = alen % AES_BLOCK_SIZE;
+ for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM)
+ {
+ d1 = _mm_loadu_si128(ab + i + 0);
+ d2 = _mm_loadu_si128(ab + i + 1);
+ d3 = _mm_loadu_si128(ab + i + 2);
+ d4 = _mm_loadu_si128(ab + i + 3);
+ y = _mm_xor_si128(y, d1);
+ y = mult4xor(h1, h2, h3, h4, y, d2, d3, d4);
+ }
+ for (i = pblocks; i < blocks; i++)
+ {
+ y = ghash(this->h, y, _mm_loadu_si128(ab + i));
+ }
+ if (rem)
+ {
+ last = _mm_setzero_si128();
+ memcpy(&last, ab + blocks, rem);
+
+ y = ghash(this->h, y, last);
+ }
+
+ return y;
+}
+
+/**
+ * Complete the ICV by hashing a assoc/data length block
+ */
+static __m128i icv_tailer(private_aesni_gcm_t *this, __m128i y,
+ size_t alen, size_t dlen)
+{
+ __m128i b;
+
+ htoun64(&b, alen * 8);
+ htoun64((u_char*)&b + sizeof(u_int64_t), dlen * 8);
+
+ return ghash(this->h, y, b);
+}
+
+/**
+ * En-/Decrypt the ICV, trim and store it
+ */
+static void icv_crypt(private_aesni_gcm_t *this, __m128i y, __m128i j,
+ u_char *icv)
+{
+ __m128i *ks, t, b;
+ u_int round;
+
+ ks = this->key->schedule;
+ t = _mm_xor_si128(j, ks[0]);
+ for (round = 1; round < this->key->rounds; round++)
+ {
+ t = _mm_aesenc_si128(t, ks[round]);
+ }
+ t = _mm_aesenclast_si128(t, ks[this->key->rounds]);
+
+ t = _mm_xor_si128(y, t);
+
+ _mm_storeu_si128(&b, t);
+ memcpy(icv, &b, this->icv_size);
+}
+
+/**
+ * Do big-endian increment on x
+ */
+static inline __m128i increment_be(__m128i x)
+{
+ x = swap128(x);
+ x = _mm_add_epi64(x, _mm_set_epi32(0, 0, 0, 1));
+ x = swap128(x);
+
+ return x;
+}
+
+/**
+ * Generate the block J0
+ */
+static inline __m128i create_j(private_aesni_gcm_t *this, u_char *iv)
+{
+ u_char j[AES_BLOCK_SIZE];
+
+ memcpy(j, this->salt, SALT_SIZE);
+ memcpy(j + SALT_SIZE, iv, IV_SIZE);
+ htoun32(j + SALT_SIZE + IV_SIZE, 1);
+
+ return _mm_loadu_si128((__m128i*)j);
+}
+
+/**
+ * Encrypt a remaining incomplete block, return updated Y
+ */
+static __m128i encrypt_gcm_rem(private_aesni_gcm_t *this, u_int rem,
+ void *in, void *out, __m128i cb, __m128i y)
+{
+ __m128i *ks, t, b;
+ u_int round;
+
+ memset(&b, 0, sizeof(b));
+ memcpy(&b, in, rem);
+
+ ks = this->key->schedule;
+ t = _mm_xor_si128(cb, ks[0]);
+ for (round = 1; round < this->key->rounds; round++)
+ {
+ t = _mm_aesenc_si128(t, ks[round]);
+ }
+ t = _mm_aesenclast_si128(t, ks[this->key->rounds]);
+ b = _mm_xor_si128(t, b);
+
+ memcpy(out, &b, rem);
+
+ memset((u_char*)&b + rem, 0, AES_BLOCK_SIZE - rem);
+ return ghash(this->h, y, b);
+}
+
+/**
+ * Decrypt a remaining incomplete block, return updated Y
+ */
+static __m128i decrypt_gcm_rem(private_aesni_gcm_t *this, u_int rem,
+ void *in, void *out, __m128i cb, __m128i y)
+{
+ __m128i *ks, t, b;
+ u_int round;
+
+ memset(&b, 0, sizeof(b));
+ memcpy(&b, in, rem);
+
+ y = ghash(this->h, y, b);
+
+ ks = this->key->schedule;
+ t = _mm_xor_si128(cb, ks[0]);
+ for (round = 1; round < this->key->rounds; round++)
+ {
+ t = _mm_aesenc_si128(t, ks[round]);
+ }
+ t = _mm_aesenclast_si128(t, ks[this->key->rounds]);
+ b = _mm_xor_si128(t, b);
+
+ memcpy(out, &b, rem);
+
+ return y;
+}
+
+/**
+ * AES-128 GCM encryption/ICV generation
+ */
+static void encrypt_gcm128(private_aesni_gcm_t *this,
+ size_t len, u_char *in, u_char *out, u_char *iv,
+ size_t alen, u_char *assoc, u_char *icv)
+{
+ __m128i d1, d2, d3, d4, t1, t2, t3, t4;
+ __m128i *ks, y, j, cb, *bi, *bo;
+ u_int blocks, pblocks, rem, i;
+
+ j = create_j(this, iv);
+ cb = increment_be(j);
+ y = icv_header(this, assoc, alen);
+ blocks = len / AES_BLOCK_SIZE;
+ pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM);
+ rem = len % AES_BLOCK_SIZE;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+
+ ks = this->key->schedule;
+
+ for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM)
+ {
+ d1 = _mm_loadu_si128(bi + i + 0);
+ d2 = _mm_loadu_si128(bi + i + 1);
+ d3 = _mm_loadu_si128(bi + i + 2);
+ d4 = _mm_loadu_si128(bi + i + 3);
+
+ t1 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+ t2 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+ t3 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+ t4 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t2 = _mm_aesenc_si128(t2, ks[1]);
+ t3 = _mm_aesenc_si128(t3, ks[1]);
+ t4 = _mm_aesenc_si128(t4, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t2 = _mm_aesenc_si128(t2, ks[2]);
+ t3 = _mm_aesenc_si128(t3, ks[2]);
+ t4 = _mm_aesenc_si128(t4, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t2 = _mm_aesenc_si128(t2, ks[3]);
+ t3 = _mm_aesenc_si128(t3, ks[3]);
+ t4 = _mm_aesenc_si128(t4, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t2 = _mm_aesenc_si128(t2, ks[4]);
+ t3 = _mm_aesenc_si128(t3, ks[4]);
+ t4 = _mm_aesenc_si128(t4, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t2 = _mm_aesenc_si128(t2, ks[5]);
+ t3 = _mm_aesenc_si128(t3, ks[5]);
+ t4 = _mm_aesenc_si128(t4, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t2 = _mm_aesenc_si128(t2, ks[6]);
+ t3 = _mm_aesenc_si128(t3, ks[6]);
+ t4 = _mm_aesenc_si128(t4, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t2 = _mm_aesenc_si128(t2, ks[7]);
+ t3 = _mm_aesenc_si128(t3, ks[7]);
+ t4 = _mm_aesenc_si128(t4, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t2 = _mm_aesenc_si128(t2, ks[8]);
+ t3 = _mm_aesenc_si128(t3, ks[8]);
+ t4 = _mm_aesenc_si128(t4, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t2 = _mm_aesenc_si128(t2, ks[9]);
+ t3 = _mm_aesenc_si128(t3, ks[9]);
+ t4 = _mm_aesenc_si128(t4, ks[9]);
+
+ t1 = _mm_aesenclast_si128(t1, ks[10]);
+ t2 = _mm_aesenclast_si128(t2, ks[10]);
+ t3 = _mm_aesenclast_si128(t3, ks[10]);
+ t4 = _mm_aesenclast_si128(t4, ks[10]);
+
+ t1 = _mm_xor_si128(t1, d1);
+ t2 = _mm_xor_si128(t2, d2);
+ t3 = _mm_xor_si128(t3, d3);
+ t4 = _mm_xor_si128(t4, d4);
+
+ y = _mm_xor_si128(y, t1);
+ y = mult4xor(this->hhhh, this->hhh, this->hh, this->h, y, t2, t3, t4);
+
+ _mm_storeu_si128(bo + i + 0, t1);
+ _mm_storeu_si128(bo + i + 1, t2);
+ _mm_storeu_si128(bo + i + 2, t3);
+ _mm_storeu_si128(bo + i + 3, t4);
+ }
+
+ for (i = pblocks; i < blocks; i++)
+ {
+ d1 = _mm_loadu_si128(bi + i);
+
+ t1 = _mm_xor_si128(cb, ks[0]);
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t1 = _mm_aesenclast_si128(t1, ks[10]);
+
+ t1 = _mm_xor_si128(t1, d1);
+ _mm_storeu_si128(bo + i, t1);
+
+ y = ghash(this->h, y, t1);
+
+ cb = increment_be(cb);
+ }
+
+ if (rem)
+ {
+ y = encrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y);
+ }
+ y = icv_tailer(this, y, alen, len);
+ icv_crypt(this, y, j, icv);
+}
+
+/**
+ * AES-128 GCM decryption/ICV generation
+ */
+static void decrypt_gcm128(private_aesni_gcm_t *this,
+ size_t len, u_char *in, u_char *out, u_char *iv,
+ size_t alen, u_char *assoc, u_char *icv)
+{
+ __m128i d1, d2, d3, d4, t1, t2, t3, t4;
+ __m128i *ks, y, j, cb, *bi, *bo;
+ u_int blocks, pblocks, rem, i;
+
+ j = create_j(this, iv);
+ cb = increment_be(j);
+ y = icv_header(this, assoc, alen);
+ blocks = len / AES_BLOCK_SIZE;
+ pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM);
+ rem = len % AES_BLOCK_SIZE;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+
+ ks = this->key->schedule;
+
+ for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM)
+ {
+ d1 = _mm_loadu_si128(bi + i + 0);
+ d2 = _mm_loadu_si128(bi + i + 1);
+ d3 = _mm_loadu_si128(bi + i + 2);
+ d4 = _mm_loadu_si128(bi + i + 3);
+
+ y = _mm_xor_si128(y, d1);
+ y = mult4xor(this->hhhh, this->hhh, this->hh, this->h, y, d2, d3, d4);
+
+ t1 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+ t2 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+ t3 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+ t4 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t2 = _mm_aesenc_si128(t2, ks[1]);
+ t3 = _mm_aesenc_si128(t3, ks[1]);
+ t4 = _mm_aesenc_si128(t4, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t2 = _mm_aesenc_si128(t2, ks[2]);
+ t3 = _mm_aesenc_si128(t3, ks[2]);
+ t4 = _mm_aesenc_si128(t4, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t2 = _mm_aesenc_si128(t2, ks[3]);
+ t3 = _mm_aesenc_si128(t3, ks[3]);
+ t4 = _mm_aesenc_si128(t4, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t2 = _mm_aesenc_si128(t2, ks[4]);
+ t3 = _mm_aesenc_si128(t3, ks[4]);
+ t4 = _mm_aesenc_si128(t4, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t2 = _mm_aesenc_si128(t2, ks[5]);
+ t3 = _mm_aesenc_si128(t3, ks[5]);
+ t4 = _mm_aesenc_si128(t4, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t2 = _mm_aesenc_si128(t2, ks[6]);
+ t3 = _mm_aesenc_si128(t3, ks[6]);
+ t4 = _mm_aesenc_si128(t4, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t2 = _mm_aesenc_si128(t2, ks[7]);
+ t3 = _mm_aesenc_si128(t3, ks[7]);
+ t4 = _mm_aesenc_si128(t4, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t2 = _mm_aesenc_si128(t2, ks[8]);
+ t3 = _mm_aesenc_si128(t3, ks[8]);
+ t4 = _mm_aesenc_si128(t4, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t2 = _mm_aesenc_si128(t2, ks[9]);
+ t3 = _mm_aesenc_si128(t3, ks[9]);
+ t4 = _mm_aesenc_si128(t4, ks[9]);
+
+ t1 = _mm_aesenclast_si128(t1, ks[10]);
+ t2 = _mm_aesenclast_si128(t2, ks[10]);
+ t3 = _mm_aesenclast_si128(t3, ks[10]);
+ t4 = _mm_aesenclast_si128(t4, ks[10]);
+
+ t1 = _mm_xor_si128(t1, d1);
+ t2 = _mm_xor_si128(t2, d2);
+ t3 = _mm_xor_si128(t3, d3);
+ t4 = _mm_xor_si128(t4, d4);
+
+ _mm_storeu_si128(bo + i + 0, t1);
+ _mm_storeu_si128(bo + i + 1, t2);
+ _mm_storeu_si128(bo + i + 2, t3);
+ _mm_storeu_si128(bo + i + 3, t4);
+ }
+
+ for (i = pblocks; i < blocks; i++)
+ {
+ d1 = _mm_loadu_si128(bi + i);
+
+ y = ghash(this->h, y, d1);
+
+ t1 = _mm_xor_si128(cb, ks[0]);
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t1 = _mm_aesenclast_si128(t1, ks[10]);
+
+ t1 = _mm_xor_si128(t1, d1);
+ _mm_storeu_si128(bo + i, t1);
+
+ cb = increment_be(cb);
+ }
+
+ if (rem)
+ {
+ y = decrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y);
+ }
+ y = icv_tailer(this, y, alen, len);
+ icv_crypt(this, y, j, icv);
+}
+
+/**
+ * AES-192 GCM encryption/ICV generation
+ */
+static void encrypt_gcm192(private_aesni_gcm_t *this,
+ size_t len, u_char *in, u_char *out, u_char *iv,
+ size_t alen, u_char *assoc, u_char *icv)
+{
+ __m128i d1, d2, d3, d4, t1, t2, t3, t4;
+ __m128i *ks, y, j, cb, *bi, *bo;
+ u_int blocks, pblocks, rem, i;
+
+ j = create_j(this, iv);
+ cb = increment_be(j);
+ y = icv_header(this, assoc, alen);
+ blocks = len / AES_BLOCK_SIZE;
+ pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM);
+ rem = len % AES_BLOCK_SIZE;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+
+ ks = this->key->schedule;
+
+ for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM)
+ {
+ d1 = _mm_loadu_si128(bi + i + 0);
+ d2 = _mm_loadu_si128(bi + i + 1);
+ d3 = _mm_loadu_si128(bi + i + 2);
+ d4 = _mm_loadu_si128(bi + i + 3);
+
+ t1 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+ t2 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+ t3 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+ t4 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t2 = _mm_aesenc_si128(t2, ks[1]);
+ t3 = _mm_aesenc_si128(t3, ks[1]);
+ t4 = _mm_aesenc_si128(t4, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t2 = _mm_aesenc_si128(t2, ks[2]);
+ t3 = _mm_aesenc_si128(t3, ks[2]);
+ t4 = _mm_aesenc_si128(t4, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t2 = _mm_aesenc_si128(t2, ks[3]);
+ t3 = _mm_aesenc_si128(t3, ks[3]);
+ t4 = _mm_aesenc_si128(t4, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t2 = _mm_aesenc_si128(t2, ks[4]);
+ t3 = _mm_aesenc_si128(t3, ks[4]);
+ t4 = _mm_aesenc_si128(t4, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t2 = _mm_aesenc_si128(t2, ks[5]);
+ t3 = _mm_aesenc_si128(t3, ks[5]);
+ t4 = _mm_aesenc_si128(t4, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t2 = _mm_aesenc_si128(t2, ks[6]);
+ t3 = _mm_aesenc_si128(t3, ks[6]);
+ t4 = _mm_aesenc_si128(t4, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t2 = _mm_aesenc_si128(t2, ks[7]);
+ t3 = _mm_aesenc_si128(t3, ks[7]);
+ t4 = _mm_aesenc_si128(t4, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t2 = _mm_aesenc_si128(t2, ks[8]);
+ t3 = _mm_aesenc_si128(t3, ks[8]);
+ t4 = _mm_aesenc_si128(t4, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t2 = _mm_aesenc_si128(t2, ks[9]);
+ t3 = _mm_aesenc_si128(t3, ks[9]);
+ t4 = _mm_aesenc_si128(t4, ks[9]);
+ t1 = _mm_aesenc_si128(t1, ks[10]);
+ t2 = _mm_aesenc_si128(t2, ks[10]);
+ t3 = _mm_aesenc_si128(t3, ks[10]);
+ t4 = _mm_aesenc_si128(t4, ks[10]);
+ t1 = _mm_aesenc_si128(t1, ks[11]);
+ t2 = _mm_aesenc_si128(t2, ks[11]);
+ t3 = _mm_aesenc_si128(t3, ks[11]);
+ t4 = _mm_aesenc_si128(t4, ks[11]);
+
+ t1 = _mm_aesenclast_si128(t1, ks[12]);
+ t2 = _mm_aesenclast_si128(t2, ks[12]);
+ t3 = _mm_aesenclast_si128(t3, ks[12]);
+ t4 = _mm_aesenclast_si128(t4, ks[12]);
+
+ t1 = _mm_xor_si128(t1, d1);
+ t2 = _mm_xor_si128(t2, d2);
+ t3 = _mm_xor_si128(t3, d3);
+ t4 = _mm_xor_si128(t4, d4);
+
+ y = _mm_xor_si128(y, t1);
+ y = mult4xor(this->hhhh, this->hhh, this->hh, this->h, y, t2, t3, t4);
+
+ _mm_storeu_si128(bo + i + 0, t1);
+ _mm_storeu_si128(bo + i + 1, t2);
+ _mm_storeu_si128(bo + i + 2, t3);
+ _mm_storeu_si128(bo + i + 3, t4);
+ }
+
+ for (i = pblocks; i < blocks; i++)
+ {
+ d1 = _mm_loadu_si128(bi + i);
+
+ t1 = _mm_xor_si128(cb, ks[0]);
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t1 = _mm_aesenc_si128(t1, ks[10]);
+ t1 = _mm_aesenc_si128(t1, ks[11]);
+ t1 = _mm_aesenclast_si128(t1, ks[12]);
+
+ t1 = _mm_xor_si128(t1, d1);
+ _mm_storeu_si128(bo + i, t1);
+
+ y = ghash(this->h, y, t1);
+
+ cb = increment_be(cb);
+ }
+
+ if (rem)
+ {
+ y = encrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y);
+ }
+ y = icv_tailer(this, y, alen, len);
+ icv_crypt(this, y, j, icv);
+}
+
+/**
+ * AES-192 GCM decryption/ICV generation
+ */
+static void decrypt_gcm192(private_aesni_gcm_t *this,
+ size_t len, u_char *in, u_char *out, u_char *iv,
+ size_t alen, u_char *assoc, u_char *icv)
+{
+ __m128i d1, d2, d3, d4, t1, t2, t3, t4;
+ __m128i *ks, y, j, cb, *bi, *bo;
+ u_int blocks, pblocks, rem, i;
+
+ j = create_j(this, iv);
+ cb = increment_be(j);
+ y = icv_header(this, assoc, alen);
+ blocks = len / AES_BLOCK_SIZE;
+ pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM);
+ rem = len % AES_BLOCK_SIZE;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+
+ ks = this->key->schedule;
+
+ for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM)
+ {
+ d1 = _mm_loadu_si128(bi + i + 0);
+ d2 = _mm_loadu_si128(bi + i + 1);
+ d3 = _mm_loadu_si128(bi + i + 2);
+ d4 = _mm_loadu_si128(bi + i + 3);
+
+ y = _mm_xor_si128(y, d1);
+ y = mult4xor(this->hhhh, this->hhh, this->hh, this->h, y, d2, d3, d4);
+
+ t1 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+ t2 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+ t3 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+ t4 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t2 = _mm_aesenc_si128(t2, ks[1]);
+ t3 = _mm_aesenc_si128(t3, ks[1]);
+ t4 = _mm_aesenc_si128(t4, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t2 = _mm_aesenc_si128(t2, ks[2]);
+ t3 = _mm_aesenc_si128(t3, ks[2]);
+ t4 = _mm_aesenc_si128(t4, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t2 = _mm_aesenc_si128(t2, ks[3]);
+ t3 = _mm_aesenc_si128(t3, ks[3]);
+ t4 = _mm_aesenc_si128(t4, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t2 = _mm_aesenc_si128(t2, ks[4]);
+ t3 = _mm_aesenc_si128(t3, ks[4]);
+ t4 = _mm_aesenc_si128(t4, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t2 = _mm_aesenc_si128(t2, ks[5]);
+ t3 = _mm_aesenc_si128(t3, ks[5]);
+ t4 = _mm_aesenc_si128(t4, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t2 = _mm_aesenc_si128(t2, ks[6]);
+ t3 = _mm_aesenc_si128(t3, ks[6]);
+ t4 = _mm_aesenc_si128(t4, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t2 = _mm_aesenc_si128(t2, ks[7]);
+ t3 = _mm_aesenc_si128(t3, ks[7]);
+ t4 = _mm_aesenc_si128(t4, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t2 = _mm_aesenc_si128(t2, ks[8]);
+ t3 = _mm_aesenc_si128(t3, ks[8]);
+ t4 = _mm_aesenc_si128(t4, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t2 = _mm_aesenc_si128(t2, ks[9]);
+ t3 = _mm_aesenc_si128(t3, ks[9]);
+ t4 = _mm_aesenc_si128(t4, ks[9]);
+ t1 = _mm_aesenc_si128(t1, ks[10]);
+ t2 = _mm_aesenc_si128(t2, ks[10]);
+ t3 = _mm_aesenc_si128(t3, ks[10]);
+ t4 = _mm_aesenc_si128(t4, ks[10]);
+ t1 = _mm_aesenc_si128(t1, ks[11]);
+ t2 = _mm_aesenc_si128(t2, ks[11]);
+ t3 = _mm_aesenc_si128(t3, ks[11]);
+ t4 = _mm_aesenc_si128(t4, ks[11]);
+
+ t1 = _mm_aesenclast_si128(t1, ks[12]);
+ t2 = _mm_aesenclast_si128(t2, ks[12]);
+ t3 = _mm_aesenclast_si128(t3, ks[12]);
+ t4 = _mm_aesenclast_si128(t4, ks[12]);
+
+ t1 = _mm_xor_si128(t1, d1);
+ t2 = _mm_xor_si128(t2, d2);
+ t3 = _mm_xor_si128(t3, d3);
+ t4 = _mm_xor_si128(t4, d4);
+
+ _mm_storeu_si128(bo + i + 0, t1);
+ _mm_storeu_si128(bo + i + 1, t2);
+ _mm_storeu_si128(bo + i + 2, t3);
+ _mm_storeu_si128(bo + i + 3, t4);
+ }
+
+ for (i = pblocks; i < blocks; i++)
+ {
+ d1 = _mm_loadu_si128(bi + i);
+
+ y = ghash(this->h, y, d1);
+
+ t1 = _mm_xor_si128(cb, ks[0]);
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t1 = _mm_aesenc_si128(t1, ks[10]);
+ t1 = _mm_aesenc_si128(t1, ks[11]);
+ t1 = _mm_aesenclast_si128(t1, ks[12]);
+
+ t1 = _mm_xor_si128(t1, d1);
+ _mm_storeu_si128(bo + i, t1);
+
+ cb = increment_be(cb);
+ }
+
+ if (rem)
+ {
+ y = decrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y);
+ }
+ y = icv_tailer(this, y, alen, len);
+ icv_crypt(this, y, j, icv);
+}
+
+/**
+ * AES-256 GCM encryption/ICV generation
+ */
+static void encrypt_gcm256(private_aesni_gcm_t *this,
+ size_t len, u_char *in, u_char *out, u_char *iv,
+ size_t alen, u_char *assoc, u_char *icv)
+{
+ __m128i d1, d2, d3, d4, t1, t2, t3, t4;
+ __m128i *ks, y, j, cb, *bi, *bo;
+ u_int blocks, pblocks, rem, i;
+
+ j = create_j(this, iv);
+ cb = increment_be(j);
+ y = icv_header(this, assoc, alen);
+ blocks = len / AES_BLOCK_SIZE;
+ pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM);
+ rem = len % AES_BLOCK_SIZE;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+
+ ks = this->key->schedule;
+
+ for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM)
+ {
+ d1 = _mm_loadu_si128(bi + i + 0);
+ d2 = _mm_loadu_si128(bi + i + 1);
+ d3 = _mm_loadu_si128(bi + i + 2);
+ d4 = _mm_loadu_si128(bi + i + 3);
+
+ t1 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+ t2 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+ t3 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+ t4 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t2 = _mm_aesenc_si128(t2, ks[1]);
+ t3 = _mm_aesenc_si128(t3, ks[1]);
+ t4 = _mm_aesenc_si128(t4, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t2 = _mm_aesenc_si128(t2, ks[2]);
+ t3 = _mm_aesenc_si128(t3, ks[2]);
+ t4 = _mm_aesenc_si128(t4, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t2 = _mm_aesenc_si128(t2, ks[3]);
+ t3 = _mm_aesenc_si128(t3, ks[3]);
+ t4 = _mm_aesenc_si128(t4, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t2 = _mm_aesenc_si128(t2, ks[4]);
+ t3 = _mm_aesenc_si128(t3, ks[4]);
+ t4 = _mm_aesenc_si128(t4, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t2 = _mm_aesenc_si128(t2, ks[5]);
+ t3 = _mm_aesenc_si128(t3, ks[5]);
+ t4 = _mm_aesenc_si128(t4, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t2 = _mm_aesenc_si128(t2, ks[6]);
+ t3 = _mm_aesenc_si128(t3, ks[6]);
+ t4 = _mm_aesenc_si128(t4, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t2 = _mm_aesenc_si128(t2, ks[7]);
+ t3 = _mm_aesenc_si128(t3, ks[7]);
+ t4 = _mm_aesenc_si128(t4, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t2 = _mm_aesenc_si128(t2, ks[8]);
+ t3 = _mm_aesenc_si128(t3, ks[8]);
+ t4 = _mm_aesenc_si128(t4, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t2 = _mm_aesenc_si128(t2, ks[9]);
+ t3 = _mm_aesenc_si128(t3, ks[9]);
+ t4 = _mm_aesenc_si128(t4, ks[9]);
+ t1 = _mm_aesenc_si128(t1, ks[10]);
+ t2 = _mm_aesenc_si128(t2, ks[10]);
+ t3 = _mm_aesenc_si128(t3, ks[10]);
+ t4 = _mm_aesenc_si128(t4, ks[10]);
+ t1 = _mm_aesenc_si128(t1, ks[11]);
+ t2 = _mm_aesenc_si128(t2, ks[11]);
+ t3 = _mm_aesenc_si128(t3, ks[11]);
+ t4 = _mm_aesenc_si128(t4, ks[11]);
+ t1 = _mm_aesenc_si128(t1, ks[12]);
+ t2 = _mm_aesenc_si128(t2, ks[12]);
+ t3 = _mm_aesenc_si128(t3, ks[12]);
+ t4 = _mm_aesenc_si128(t4, ks[12]);
+ t1 = _mm_aesenc_si128(t1, ks[13]);
+ t2 = _mm_aesenc_si128(t2, ks[13]);
+ t3 = _mm_aesenc_si128(t3, ks[13]);
+ t4 = _mm_aesenc_si128(t4, ks[13]);
+
+ t1 = _mm_aesenclast_si128(t1, ks[14]);
+ t2 = _mm_aesenclast_si128(t2, ks[14]);
+ t3 = _mm_aesenclast_si128(t3, ks[14]);
+ t4 = _mm_aesenclast_si128(t4, ks[14]);
+
+ t1 = _mm_xor_si128(t1, d1);
+ t2 = _mm_xor_si128(t2, d2);
+ t3 = _mm_xor_si128(t3, d3);
+ t4 = _mm_xor_si128(t4, d4);
+
+ y = _mm_xor_si128(y, t1);
+ y = mult4xor(this->hhhh, this->hhh, this->hh, this->h, y, t2, t3, t4);
+
+ _mm_storeu_si128(bo + i + 0, t1);
+ _mm_storeu_si128(bo + i + 1, t2);
+ _mm_storeu_si128(bo + i + 2, t3);
+ _mm_storeu_si128(bo + i + 3, t4);
+ }
+
+ for (i = pblocks; i < blocks; i++)
+ {
+ d1 = _mm_loadu_si128(bi + i);
+
+ t1 = _mm_xor_si128(cb, ks[0]);
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t1 = _mm_aesenc_si128(t1, ks[10]);
+ t1 = _mm_aesenc_si128(t1, ks[11]);
+ t1 = _mm_aesenc_si128(t1, ks[12]);
+ t1 = _mm_aesenc_si128(t1, ks[13]);
+ t1 = _mm_aesenclast_si128(t1, ks[14]);
+
+ t1 = _mm_xor_si128(t1, d1);
+ _mm_storeu_si128(bo + i, t1);
+
+ y = ghash(this->h, y, t1);
+
+ cb = increment_be(cb);
+ }
+
+ if (rem)
+ {
+ y = encrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y);
+ }
+ y = icv_tailer(this, y, alen, len);
+ icv_crypt(this, y, j, icv);
+}
+
+/**
+ * AES-256 GCM decryption/ICV generation
+ */
+static void decrypt_gcm256(private_aesni_gcm_t *this,
+ size_t len, u_char *in, u_char *out, u_char *iv,
+ size_t alen, u_char *assoc, u_char *icv)
+{
+ __m128i d1, d2, d3, d4, t1, t2, t3, t4;
+ __m128i *ks, y, j, cb, *bi, *bo;
+ u_int blocks, pblocks, rem, i;
+
+ j = create_j(this, iv);
+ cb = increment_be(j);
+ y = icv_header(this, assoc, alen);
+ blocks = len / AES_BLOCK_SIZE;
+ pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM);
+ rem = len % AES_BLOCK_SIZE;
+ bi = (__m128i*)in;
+ bo = (__m128i*)out;
+
+ ks = this->key->schedule;
+
+ for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM)
+ {
+ d1 = _mm_loadu_si128(bi + i + 0);
+ d2 = _mm_loadu_si128(bi + i + 1);
+ d3 = _mm_loadu_si128(bi + i + 2);
+ d4 = _mm_loadu_si128(bi + i + 3);
+
+ y = _mm_xor_si128(y, d1);
+ y = mult4xor(this->hhhh, this->hhh, this->hh, this->h, y, d2, d3, d4);
+
+ t1 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+ t2 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+ t3 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+ t4 = _mm_xor_si128(cb, ks[0]);
+ cb = increment_be(cb);
+
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t2 = _mm_aesenc_si128(t2, ks[1]);
+ t3 = _mm_aesenc_si128(t3, ks[1]);
+ t4 = _mm_aesenc_si128(t4, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t2 = _mm_aesenc_si128(t2, ks[2]);
+ t3 = _mm_aesenc_si128(t3, ks[2]);
+ t4 = _mm_aesenc_si128(t4, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t2 = _mm_aesenc_si128(t2, ks[3]);
+ t3 = _mm_aesenc_si128(t3, ks[3]);
+ t4 = _mm_aesenc_si128(t4, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t2 = _mm_aesenc_si128(t2, ks[4]);
+ t3 = _mm_aesenc_si128(t3, ks[4]);
+ t4 = _mm_aesenc_si128(t4, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t2 = _mm_aesenc_si128(t2, ks[5]);
+ t3 = _mm_aesenc_si128(t3, ks[5]);
+ t4 = _mm_aesenc_si128(t4, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t2 = _mm_aesenc_si128(t2, ks[6]);
+ t3 = _mm_aesenc_si128(t3, ks[6]);
+ t4 = _mm_aesenc_si128(t4, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t2 = _mm_aesenc_si128(t2, ks[7]);
+ t3 = _mm_aesenc_si128(t3, ks[7]);
+ t4 = _mm_aesenc_si128(t4, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t2 = _mm_aesenc_si128(t2, ks[8]);
+ t3 = _mm_aesenc_si128(t3, ks[8]);
+ t4 = _mm_aesenc_si128(t4, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t2 = _mm_aesenc_si128(t2, ks[9]);
+ t3 = _mm_aesenc_si128(t3, ks[9]);
+ t4 = _mm_aesenc_si128(t4, ks[9]);
+ t1 = _mm_aesenc_si128(t1, ks[10]);
+ t2 = _mm_aesenc_si128(t2, ks[10]);
+ t3 = _mm_aesenc_si128(t3, ks[10]);
+ t4 = _mm_aesenc_si128(t4, ks[10]);
+ t1 = _mm_aesenc_si128(t1, ks[11]);
+ t2 = _mm_aesenc_si128(t2, ks[11]);
+ t3 = _mm_aesenc_si128(t3, ks[11]);
+ t4 = _mm_aesenc_si128(t4, ks[11]);
+ t1 = _mm_aesenc_si128(t1, ks[12]);
+ t2 = _mm_aesenc_si128(t2, ks[12]);
+ t3 = _mm_aesenc_si128(t3, ks[12]);
+ t4 = _mm_aesenc_si128(t4, ks[12]);
+ t1 = _mm_aesenc_si128(t1, ks[13]);
+ t2 = _mm_aesenc_si128(t2, ks[13]);
+ t3 = _mm_aesenc_si128(t3, ks[13]);
+ t4 = _mm_aesenc_si128(t4, ks[13]);
+
+ t1 = _mm_aesenclast_si128(t1, ks[14]);
+ t2 = _mm_aesenclast_si128(t2, ks[14]);
+ t3 = _mm_aesenclast_si128(t3, ks[14]);
+ t4 = _mm_aesenclast_si128(t4, ks[14]);
+
+ t1 = _mm_xor_si128(t1, d1);
+ t2 = _mm_xor_si128(t2, d2);
+ t3 = _mm_xor_si128(t3, d3);
+ t4 = _mm_xor_si128(t4, d4);
+
+ _mm_storeu_si128(bo + i + 0, t1);
+ _mm_storeu_si128(bo + i + 1, t2);
+ _mm_storeu_si128(bo + i + 2, t3);
+ _mm_storeu_si128(bo + i + 3, t4);
+ }
+
+ for (i = pblocks; i < blocks; i++)
+ {
+ d1 = _mm_loadu_si128(bi + i);
+
+ y = ghash(this->h, y, d1);
+
+ t1 = _mm_xor_si128(cb, ks[0]);
+ t1 = _mm_aesenc_si128(t1, ks[1]);
+ t1 = _mm_aesenc_si128(t1, ks[2]);
+ t1 = _mm_aesenc_si128(t1, ks[3]);
+ t1 = _mm_aesenc_si128(t1, ks[4]);
+ t1 = _mm_aesenc_si128(t1, ks[5]);
+ t1 = _mm_aesenc_si128(t1, ks[6]);
+ t1 = _mm_aesenc_si128(t1, ks[7]);
+ t1 = _mm_aesenc_si128(t1, ks[8]);
+ t1 = _mm_aesenc_si128(t1, ks[9]);
+ t1 = _mm_aesenc_si128(t1, ks[10]);
+ t1 = _mm_aesenc_si128(t1, ks[11]);
+ t1 = _mm_aesenc_si128(t1, ks[12]);
+ t1 = _mm_aesenc_si128(t1, ks[13]);
+ t1 = _mm_aesenclast_si128(t1, ks[14]);
+
+ t1 = _mm_xor_si128(t1, d1);
+ _mm_storeu_si128(bo + i, t1);
+
+ cb = increment_be(cb);
+ }
+
+ if (rem)
+ {
+ y = decrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y);
+ }
+ y = icv_tailer(this, y, alen, len);
+ icv_crypt(this, y, j, icv);
+}
+
+METHOD(aead_t, encrypt, bool,
+ private_aesni_gcm_t *this, chunk_t plain, chunk_t assoc, chunk_t iv,
+ chunk_t *encr)
+{
+ u_char *out;
+
+ if (!this->key || iv.len != IV_SIZE)
+ {
+ return FALSE;
+ }
+ out = plain.ptr;
+ if (encr)
+ {
+ *encr = chunk_alloc(plain.len + this->icv_size);
+ out = encr->ptr;
+ }
+ this->encrypt(this, plain.len, plain.ptr, out, iv.ptr,
+ assoc.len, assoc.ptr, out + plain.len);
+ return TRUE;
+}
+
+METHOD(aead_t, decrypt, bool,
+ private_aesni_gcm_t *this, chunk_t encr, chunk_t assoc, chunk_t iv,
+ chunk_t *plain)
+{
+ u_char *out, icv[this->icv_size];
+
+ if (!this->key || iv.len != IV_SIZE || encr.len < this->icv_size)
+ {
+ return FALSE;
+ }
+ encr.len -= this->icv_size;
+ out = encr.ptr;
+ if (plain)
+ {
+ *plain = chunk_alloc(encr.len);
+ out = plain->ptr;
+ }
+ this->decrypt(this, encr.len, encr.ptr, out, iv.ptr,
+ assoc.len, assoc.ptr, icv);
+ return memeq_const(icv, encr.ptr + encr.len, this->icv_size);
+}
+
+METHOD(aead_t, get_block_size, size_t,
+ private_aesni_gcm_t *this)
+{
+ return 1;
+}
+
+METHOD(aead_t, get_icv_size, size_t,
+ private_aesni_gcm_t *this)
+{
+ return this->icv_size;
+}
+
+METHOD(aead_t, get_iv_size, size_t,
+ private_aesni_gcm_t *this)
+{
+ return IV_SIZE;
+}
+
+METHOD(aead_t, get_iv_gen, iv_gen_t*,
+ private_aesni_gcm_t *this)
+{
+ return this->iv_gen;
+}
+
+METHOD(aead_t, get_key_size, size_t,
+ private_aesni_gcm_t *this)
+{
+ return this->key_size + SALT_SIZE;
+}
+
+METHOD(aead_t, set_key, bool,
+ private_aesni_gcm_t *this, chunk_t key)
+{
+ u_int round;
+ __m128i *ks, h;
+
+ if (key.len != this->key_size + SALT_SIZE)
+ {
+ return FALSE;
+ }
+
+ memcpy(this->salt, key.ptr + key.len - SALT_SIZE, SALT_SIZE);
+ key.len -= SALT_SIZE;
+
+ DESTROY_IF(this->key);
+ this->key = aesni_key_create(TRUE, key);
+
+ ks = this->key->schedule;
+ h = _mm_xor_si128(_mm_setzero_si128(), ks[0]);
+ for (round = 1; round < this->key->rounds; round++)
+ {
+ h = _mm_aesenc_si128(h, ks[round]);
+ }
+ h = _mm_aesenclast_si128(h, ks[this->key->rounds]);
+
+ this->h = h;
+ h = swap128(h);
+ this->hh = mult_block(h, this->h);
+ this->hhh = mult_block(h, this->hh);
+ this->hhhh = mult_block(h, this->hhh);
+ this->h = swap128(this->h);
+ this->hh = swap128(this->hh);
+ this->hhh = swap128(this->hhh);
+ this->hhhh = swap128(this->hhhh);
+
+ return TRUE;
+}
+
+METHOD(aead_t, destroy, void,
+ private_aesni_gcm_t *this)
+{
+ DESTROY_IF(this->key);
+ memwipe(&this->h, sizeof(this->h));
+ memwipe(&this->hh, sizeof(this->hh));
+ memwipe(&this->hhh, sizeof(this->hhh));
+ memwipe(&this->hhhh, sizeof(this->hhhh));
+ this->iv_gen->destroy(this->iv_gen);
+ free_align(this);
+}
+
+/**
+ * See header
+ */
+aesni_gcm_t *aesni_gcm_create(encryption_algorithm_t algo,
+ size_t key_size, size_t salt_size)
+{
+ private_aesni_gcm_t *this;
+ size_t icv_size;
+
+ switch (key_size)
+ {
+ case 0:
+ key_size = 16;
+ break;
+ case 16:
+ case 24:
+ case 32:
+ break;
+ default:
+ return NULL;
+ }
+ if (salt_size && salt_size != SALT_SIZE)
+ {
+ /* currently not supported */
+ return NULL;
+ }
+ switch (algo)
+ {
+ case ENCR_AES_GCM_ICV8:
+ algo = ENCR_AES_CBC;
+ icv_size = 8;
+ break;
+ case ENCR_AES_GCM_ICV12:
+ algo = ENCR_AES_CBC;
+ icv_size = 12;
+ break;
+ case ENCR_AES_GCM_ICV16:
+ algo = ENCR_AES_CBC;
+ icv_size = 16;
+ break;
+ default:
+ return NULL;
+ }
+
+ INIT_ALIGN(this, sizeof(__m128i),
+ .public = {
+ .aead = {
+ .encrypt = _encrypt,
+ .decrypt = _decrypt,
+ .get_block_size = _get_block_size,
+ .get_icv_size = _get_icv_size,
+ .get_iv_size = _get_iv_size,
+ .get_iv_gen = _get_iv_gen,
+ .get_key_size = _get_key_size,
+ .set_key = _set_key,
+ .destroy = _destroy,
+ },
+ },
+ .key_size = key_size,
+ .iv_gen = iv_gen_seq_create(),
+ .icv_size = icv_size,
+ );
+
+ switch (key_size)
+ {
+ case 16:
+ this->encrypt = encrypt_gcm128;
+ this->decrypt = decrypt_gcm128;
+ break;
+ case 24:
+ this->encrypt = encrypt_gcm192;
+ this->decrypt = decrypt_gcm192;
+ break;
+ case 32:
+ this->encrypt = encrypt_gcm256;
+ this->decrypt = decrypt_gcm256;
+ break;
+ }
+
+ return &this->public;
+}
diff --git a/src/libstrongswan/plugins/aesni/aesni_gcm.h b/src/libstrongswan/plugins/aesni/aesni_gcm.h
new file mode 100644
index 000000000..5a256c8db
--- /dev/null
+++ b/src/libstrongswan/plugins/aesni/aesni_gcm.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2015 revosec AG
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+/**
+ * @defgroup aesni_gcm aesni_gcm
+ * @{ @ingroup aesni
+ */
+
+#ifndef AESNI_GCM_H_
+#define AESNI_GCM_H_
+
+#include <library.h>
+
+typedef struct aesni_gcm_t aesni_gcm_t;
+
+/**
+ * GCM mode AEAD using AES-NI
+ */
+struct aesni_gcm_t {
+
+ /**
+ * Implements aead_t interface
+ */
+ aead_t aead;
+};
+
+/**
+ * Create a aesni_gcm instance.
+ *
+ * @param algo encryption algorithm, ENCR_AES_GCM*
+ * @param key_size AES key size, in bytes
+ * @param salt_size size of salt value
+ * @return AES-GCM AEAD, NULL if not supported
+ */
+aesni_gcm_t *aesni_gcm_create(encryption_algorithm_t algo,
+ size_t key_size, size_t salt_size);
+
+#endif /** AESNI_GCM_H_ @}*/
diff --git a/src/libstrongswan/plugins/aesni/aesni_key.c b/src/libstrongswan/plugins/aesni/aesni_key.c
new file mode 100644
index 000000000..523266a30
--- /dev/null
+++ b/src/libstrongswan/plugins/aesni/aesni_key.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2015 revosec AG
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "aesni_key.h"
+
+/**
+ * Rounds used for each AES key size
+ */
+#define AES128_ROUNDS 10
+#define AES192_ROUNDS 12
+#define AES256_ROUNDS 14
+
+typedef struct private_aesni_key_t private_aesni_key_t;
+
+/**
+ * Private data of an aesni_key_t object.
+ */
+struct private_aesni_key_t {
+
+ /**
+ * Public aesni_key_t interface.
+ */
+ aesni_key_t public;
+};
+
+/**
+ * Invert round encryption keys to get a decryption key schedule
+ */
+static void reverse_key(aesni_key_t *this)
+{
+ __m128i t[this->rounds + 1];
+ int i;
+
+ for (i = 0; i <= this->rounds; i++)
+ {
+ t[i] = this->schedule[i];
+ }
+ this->schedule[this->rounds] = t[0];
+ for (i = 1; i < this->rounds; i++)
+ {
+ this->schedule[this->rounds - i] = _mm_aesimc_si128(t[i]);
+ }
+ this->schedule[0] = t[this->rounds];
+
+ memwipe(t, sizeof(t));
+}
+
+/**
+ * Assist in creating a 128-bit round key
+ */
+static __m128i assist128(__m128i a, __m128i b)
+{
+ __m128i c;
+
+ b = _mm_shuffle_epi32(b ,0xff);
+ c = _mm_slli_si128(a, 0x04);
+ a = _mm_xor_si128(a, c);
+ c = _mm_slli_si128(c, 0x04);
+ a = _mm_xor_si128(a, c);
+ c = _mm_slli_si128(c, 0x04);
+ a = _mm_xor_si128(a, c);
+ a = _mm_xor_si128(a, b);
+
+ return a;
+}
+
+/**
+ * Expand a 128-bit key to encryption round keys
+ */
+static void expand128(__m128i *key, __m128i *schedule)
+{
+ __m128i t;
+
+ schedule[0] = t = _mm_loadu_si128(key);
+ schedule[1] = t = assist128(t, _mm_aeskeygenassist_si128(t, 0x01));
+ schedule[2] = t = assist128(t, _mm_aeskeygenassist_si128(t, 0x02));
+ schedule[3] = t = assist128(t, _mm_aeskeygenassist_si128(t, 0x04));
+ schedule[4] = t = assist128(t, _mm_aeskeygenassist_si128(t, 0x08));
+ schedule[5] = t = assist128(t, _mm_aeskeygenassist_si128(t, 0x10));
+ schedule[6] = t = assist128(t, _mm_aeskeygenassist_si128(t, 0x20));
+ schedule[7] = t = assist128(t, _mm_aeskeygenassist_si128(t, 0x40));
+ schedule[8] = t = assist128(t, _mm_aeskeygenassist_si128(t, 0x80));
+ schedule[9] = t = assist128(t, _mm_aeskeygenassist_si128(t, 0x1b));
+ schedule[10] = assist128(t, _mm_aeskeygenassist_si128(t, 0x36));
+}
+
+/**
+ * Assist in creating a 192-bit round key
+ */
+static __m128i assist192(__m128i b, __m128i c, __m128i *a)
+{
+ __m128i t;
+
+ b = _mm_shuffle_epi32(b, 0x55);
+ t = _mm_slli_si128(*a, 0x04);
+ *a = _mm_xor_si128(*a, t);
+ t = _mm_slli_si128(t, 0x04);
+ *a = _mm_xor_si128(*a, t);
+ t = _mm_slli_si128(t, 0x04);
+ *a = _mm_xor_si128(*a, t);
+ *a = _mm_xor_si128(*a, b);
+ b = _mm_shuffle_epi32(*a, 0xff);
+ t = _mm_slli_si128(c, 0x04);
+ t = _mm_xor_si128(c, t);
+ t = _mm_xor_si128(t, b);
+
+ return t;
+}
+
+/**
+ * return a[63:0] | b[63:0] << 64
+ */
+static __m128i _mm_shuffle_i00(__m128i a, __m128i b)
+{
+ return (__m128i)_mm_shuffle_pd((__m128d)a, (__m128d)b, 0);
+}
+
+/**
+ * return a[127:64] >> 64 | b[63:0] << 64
+ */
+static __m128i _mm_shuffle_i01(__m128i a, __m128i b)
+{
+ return (__m128i)_mm_shuffle_pd((__m128d)a, (__m128d)b, 1);
+}
+
+/**
+ * Expand a 192-bit encryption key to round keys
+ */
+static void expand192(__m128i *key, __m128i *schedule)
+{
+ __m128i t1, t2, t3;
+
+ schedule[0] = t1 = _mm_loadu_si128(key);
+ t2 = t3 = _mm_loadu_si128(key + 1);
+
+ t2 = assist192(_mm_aeskeygenassist_si128(t2, 0x1), t2, &t1);
+ schedule[1] = _mm_shuffle_i00(t3, t1);
+ schedule[2] = _mm_shuffle_i01(t1, t2);
+ t2 = t3 = assist192(_mm_aeskeygenassist_si128(t2, 0x2), t2, &t1);
+ schedule[3] = t1;
+
+ t2 = assist192(_mm_aeskeygenassist_si128(t2, 0x4), t2, &t1);
+ schedule[4] = _mm_shuffle_i00(t3, t1);
+ schedule[5] = _mm_shuffle_i01(t1, t2);
+ t2 = t3 = assist192(_mm_aeskeygenassist_si128(t2, 0x8), t2, &t1);
+ schedule[6] = t1;
+
+ t2 = assist192(_mm_aeskeygenassist_si128 (t2,0x10), t2, &t1);
+ schedule[7] = _mm_shuffle_i00(t3, t1);
+ schedule[8] = _mm_shuffle_i01(t1, t2);
+ t2 = t3 = assist192(_mm_aeskeygenassist_si128 (t2,0x20), t2, &t1);
+ schedule[9] = t1;
+
+ t2 = assist192(_mm_aeskeygenassist_si128(t2, 0x40), t2, &t1);
+ schedule[10] = _mm_shuffle_i00(t3, t1);
+ schedule[11] = _mm_shuffle_i01(t1, t2);
+ assist192(_mm_aeskeygenassist_si128(t2, 0x80), t2, &t1);
+ schedule[12] = t1;
+}
+
+/**
+ * Assist in creating a 256-bit round key
+ */
+static __m128i assist256_1(__m128i a, __m128i b)
+{
+ __m128i x, y;
+
+ b = _mm_shuffle_epi32(b, 0xff);
+ y = _mm_slli_si128(a, 0x04);
+ x = _mm_xor_si128(a, y);
+ y = _mm_slli_si128(y, 0x04);
+ x = _mm_xor_si128 (x, y);
+ y = _mm_slli_si128(y, 0x04);
+ x = _mm_xor_si128(x, y);
+ x = _mm_xor_si128(x, b);
+
+ return x;
+}
+
+/**
+ * Assist in creating a 256-bit round key
+ */
+static __m128i assist256_2(__m128i a, __m128i b)
+{
+ __m128i x, y, z;
+
+ y = _mm_aeskeygenassist_si128(a, 0x00);
+ z = _mm_shuffle_epi32(y, 0xaa);
+ y = _mm_slli_si128(b, 0x04);
+ x = _mm_xor_si128(b, y);
+ y = _mm_slli_si128(y, 0x04);
+ x = _mm_xor_si128(x, y);
+ y = _mm_slli_si128(y, 0x04);
+ x = _mm_xor_si128(x, y);
+ x = _mm_xor_si128(x, z);
+
+ return x;
+}
+
+/**
+ * Expand a 256-bit encryption key to round keys
+ */
+static void expand256(__m128i *key, __m128i *schedule)
+{
+ __m128i t1, t2;
+
+ schedule[0] = t1 = _mm_loadu_si128(key);
+ schedule[1] = t2 = _mm_loadu_si128(key + 1);
+
+ schedule[2] = t1 = assist256_1(t1, _mm_aeskeygenassist_si128(t2, 0x01));
+ schedule[3] = t2 = assist256_2(t1, t2);
+
+ schedule[4] = t1 = assist256_1(t1, _mm_aeskeygenassist_si128(t2, 0x02));
+ schedule[5] = t2 = assist256_2(t1, t2);
+
+ schedule[6] = t1 = assist256_1(t1, _mm_aeskeygenassist_si128(t2, 0x04));
+ schedule[7] = t2 = assist256_2(t1, t2);
+
+ schedule[8] = t1 = assist256_1(t1, _mm_aeskeygenassist_si128(t2, 0x08));
+ schedule[9] = t2 = assist256_2(t1, t2);
+
+ schedule[10] = t1 = assist256_1(t1, _mm_aeskeygenassist_si128(t2, 0x10));
+ schedule[11] = t2 = assist256_2(t1, t2);
+
+ schedule[12] = t1 = assist256_1(t1, _mm_aeskeygenassist_si128(t2, 0x20));
+ schedule[13] = t2 = assist256_2(t1, t2);
+
+ schedule[14] = assist256_1(t1, _mm_aeskeygenassist_si128(t2, 0x40));
+}
+
+METHOD(aesni_key_t, destroy, void,
+ private_aesni_key_t *this)
+{
+ memwipe(this, sizeof(*this) + (this->public.rounds + 1) * AES_BLOCK_SIZE);
+ free_align(this);
+}
+
+/**
+ * See header
+ */
+aesni_key_t *aesni_key_create(bool encrypt, chunk_t key)
+{
+ private_aesni_key_t *this;
+ int rounds;
+
+ switch (key.len)
+ {
+ case 16:
+ rounds = AES128_ROUNDS;
+ break;
+ case 24:
+ rounds = AES192_ROUNDS;
+ break;
+ case 32:
+ rounds = AES256_ROUNDS;
+ break;
+ default:
+ return NULL;
+ }
+
+ INIT_EXTRA_ALIGN(this, (rounds + 1) * AES_BLOCK_SIZE, sizeof(__m128i),
+ .public = {
+ .destroy = _destroy,
+ .rounds = rounds,
+ },
+ );
+
+ switch (key.len)
+ {
+ case 16:
+ expand128((__m128i*)key.ptr, this->public.schedule);
+ break;
+ case 24:
+ expand192((__m128i*)key.ptr, this->public.schedule);
+ break;
+ case 32:
+ expand256((__m128i*)key.ptr, this->public.schedule);
+ break;
+ default:
+ break;
+ }
+
+ if (!encrypt)
+ {
+ reverse_key(&this->public);
+ }
+
+ return &this->public;
+}
diff --git a/src/libstrongswan/plugins/aesni/aesni_key.h b/src/libstrongswan/plugins/aesni/aesni_key.h
new file mode 100644
index 000000000..12dcd221d
--- /dev/null
+++ b/src/libstrongswan/plugins/aesni/aesni_key.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2015 revosec AG
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+/**
+ * @defgroup aesni_key aesni_key
+ * @{ @ingroup aesni
+ */
+
+#ifndef AESNI_KEY_H_
+#define AESNI_KEY_H_
+
+#include <library.h>
+
+#include <wmmintrin.h>
+
+/**
+ * AES block size, in bytes
+ */
+#define AES_BLOCK_SIZE 16
+
+typedef struct aesni_key_t aesni_key_t;
+
+/**
+ * Key schedule for encryption/decryption using on AES-NI.
+ */
+struct aesni_key_t {
+
+ /**
+ * Destroy a aesni_key_t.
+ */
+ void (*destroy)(aesni_key_t *this);
+
+ /**
+ * Number of AES rounds (10, 12, 14)
+ */
+ int rounds;
+
+ /**
+ * Key schedule, for each round + the round 0 (whitening)
+ */
+ __attribute__((aligned(sizeof(__m128i)))) __m128i schedule[];
+};
+
+/**
+ * Create a AESNI key schedule instance.
+ *
+ * @param encrypt TRUE for encryption schedule, FALSE for decryption
+ * @param key non-expanded crypto key, 16, 24 or 32 bytes
+ * @return key schedule, NULL on invalid key size
+ */
+aesni_key_t *aesni_key_create(bool encrypt, chunk_t key);
+
+#endif /** AESNI_KEY_H_ @}*/
diff --git a/src/libstrongswan/plugins/aesni/aesni_plugin.c b/src/libstrongswan/plugins/aesni/aesni_plugin.c
new file mode 100644
index 000000000..b92419dc4
--- /dev/null
+++ b/src/libstrongswan/plugins/aesni/aesni_plugin.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2015 revosec AG
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "aesni_plugin.h"
+#include "aesni_cbc.h"
+#include "aesni_ctr.h"
+#include "aesni_ccm.h"
+#include "aesni_gcm.h"
+#include "aesni_xcbc.h"
+#include "aesni_cmac.h"
+
+#include <stdio.h>
+
+#include <library.h>
+#include <utils/debug.h>
+#include <utils/cpu_feature.h>
+
+typedef struct private_aesni_plugin_t private_aesni_plugin_t;
+typedef enum cpuid_feature_t cpuid_feature_t;
+
+/**
+ * private data of aesni_plugin
+ */
+struct private_aesni_plugin_t {
+
+ /**
+ * public functions
+ */
+ aesni_plugin_t public;
+};
+
+METHOD(plugin_t, get_name, char*,
+ private_aesni_plugin_t *this)
+{
+ return "aesni";
+}
+
+METHOD(plugin_t, get_features, int,
+ private_aesni_plugin_t *this, plugin_feature_t *features[])
+{
+ static plugin_feature_t f[] = {
+ PLUGIN_REGISTER(CRYPTER, aesni_cbc_create),
+ PLUGIN_PROVIDE(CRYPTER, ENCR_AES_CBC, 16),
+ PLUGIN_PROVIDE(CRYPTER, ENCR_AES_CBC, 24),
+ PLUGIN_PROVIDE(CRYPTER, ENCR_AES_CBC, 32),
+ PLUGIN_REGISTER(CRYPTER, aesni_ctr_create),
+ PLUGIN_PROVIDE(CRYPTER, ENCR_AES_CTR, 16),
+ PLUGIN_PROVIDE(CRYPTER, ENCR_AES_CTR, 24),
+ PLUGIN_PROVIDE(CRYPTER, ENCR_AES_CTR, 32),
+ PLUGIN_REGISTER(AEAD, aesni_ccm_create),
+ PLUGIN_PROVIDE(AEAD, ENCR_AES_CCM_ICV8, 16),
+ PLUGIN_PROVIDE(AEAD, ENCR_AES_CCM_ICV12, 16),
+ PLUGIN_PROVIDE(AEAD, ENCR_AES_CCM_ICV16, 16),
+ PLUGIN_PROVIDE(AEAD, ENCR_AES_CCM_ICV8, 24),
+ PLUGIN_PROVIDE(AEAD, ENCR_AES_CCM_ICV12, 24),
+ PLUGIN_PROVIDE(AEAD, ENCR_AES_CCM_ICV16, 24),
+ PLUGIN_PROVIDE(AEAD, ENCR_AES_CCM_ICV8, 32),
+ PLUGIN_PROVIDE(AEAD, ENCR_AES_CCM_ICV12, 32),
+ PLUGIN_PROVIDE(AEAD, ENCR_AES_CCM_ICV16, 32),
+ PLUGIN_REGISTER(AEAD, aesni_gcm_create),
+ PLUGIN_PROVIDE(AEAD, ENCR_AES_GCM_ICV8, 16),
+ PLUGIN_PROVIDE(AEAD, ENCR_AES_GCM_ICV12, 16),
+ PLUGIN_PROVIDE(AEAD, ENCR_AES_GCM_ICV16, 16),
+ PLUGIN_PROVIDE(AEAD, ENCR_AES_GCM_ICV8, 24),
+ PLUGIN_PROVIDE(AEAD, ENCR_AES_GCM_ICV12, 24),
+ PLUGIN_PROVIDE(AEAD, ENCR_AES_GCM_ICV16, 24),
+ PLUGIN_PROVIDE(AEAD, ENCR_AES_GCM_ICV8, 32),
+ PLUGIN_PROVIDE(AEAD, ENCR_AES_GCM_ICV12, 32),
+ PLUGIN_PROVIDE(AEAD, ENCR_AES_GCM_ICV16, 32),
+ PLUGIN_REGISTER(PRF, aesni_xcbc_prf_create),
+ PLUGIN_PROVIDE(PRF, PRF_AES128_XCBC),
+ PLUGIN_REGISTER(SIGNER, aesni_xcbc_signer_create),
+ PLUGIN_PROVIDE(SIGNER, AUTH_AES_XCBC_96),
+ PLUGIN_REGISTER(PRF, aesni_cmac_prf_create),
+ PLUGIN_PROVIDE(PRF, PRF_AES128_CMAC),
+ PLUGIN_REGISTER(SIGNER, aesni_cmac_signer_create),
+ PLUGIN_PROVIDE(SIGNER, AUTH_AES_CMAC_96),
+ };
+
+ *features = f;
+ if (cpu_feature_available(CPU_FEATURE_AESNI | CPU_FEATURE_PCLMULQDQ))
+ {
+ return countof(f);
+ }
+ return 0;
+}
+
+METHOD(plugin_t, destroy, void,
+ private_aesni_plugin_t *this)
+{
+ free(this);
+}
+
+/*
+ * see header file
+ */
+plugin_t *aesni_plugin_create()
+{
+ private_aesni_plugin_t *this;
+
+ INIT(this,
+ .public = {
+ .plugin = {
+ .get_name = _get_name,
+ .get_features = _get_features,
+ .reload = (void*)return_false,
+ .destroy = _destroy,
+ },
+ },
+ );
+
+ return &this->public.plugin;
+}
diff --git a/src/libstrongswan/plugins/aesni/aesni_plugin.h b/src/libstrongswan/plugins/aesni/aesni_plugin.h
new file mode 100644
index 000000000..2b0c92c25
--- /dev/null
+++ b/src/libstrongswan/plugins/aesni/aesni_plugin.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2015 revosec AG
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+/**
+ * @defgroup aesni aesni
+ * @ingroup plugins
+ *
+ * @defgroup aesni_plugin aesni_plugin
+ * @{ @ingroup aesni
+ */
+
+#ifndef AESNI_PLUGIN_H_
+#define AESNI_PLUGIN_H_
+
+#include <plugins/plugin.h>
+
+typedef struct aesni_plugin_t aesni_plugin_t;
+
+/**
+ * Plugin providing crypto primitives based on Intel AES-NI instructions.
+ */
+struct aesni_plugin_t {
+
+ /**
+ * implements plugin interface
+ */
+ plugin_t plugin;
+};
+
+#endif /** AESNI_PLUGIN_H_ @}*/
diff --git a/src/libstrongswan/plugins/aesni/aesni_xcbc.c b/src/libstrongswan/plugins/aesni/aesni_xcbc.c
new file mode 100644
index 000000000..24a75cec0
--- /dev/null
+++ b/src/libstrongswan/plugins/aesni/aesni_xcbc.c
@@ -0,0 +1,367 @@
+/*
+ * Copyright (C) 2008-2015 Martin Willi
+ * Copyright (C) 2012 Tobias Brunner
+ * Hochschule fuer Technik Rapperswil
+ * Copyright (C) 2015 revosec AG
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "aesni_xcbc.h"
+#include "aesni_key.h"
+
+#include <crypto/prfs/mac_prf.h>
+#include <crypto/signers/mac_signer.h>
+
+typedef struct private_aesni_mac_t private_aesni_mac_t;
+
+/**
+ * Private data of a mac_t object.
+ */
+struct private_aesni_mac_t {
+
+ /**
+ * Public mac_t interface.
+ */
+ mac_t public;
+
+ /**
+ * Key schedule for K1
+ */
+ aesni_key_t *k1;
+
+ /**
+ * k2
+ */
+ __m128i k2;
+
+ /**
+ * k3
+ */
+ __m128i k3;
+
+ /**
+ * E
+ */
+ __m128i e;
+
+ /**
+ * remaining, unprocessed bytes in append mode
+ */
+ u_char rem[AES_BLOCK_SIZE];
+
+ /**
+ * number of bytes used in remaining
+ */
+ int rem_size;
+
+ /**
+ * TRUE if we have zero bytes to xcbc in final()
+ */
+ bool zero;
+};
+
+METHOD(mac_t, get_mac, bool,
+ private_aesni_mac_t *this, chunk_t data, u_int8_t *out)
+{
+ __m128i *ks, e, *bi;
+ u_int blocks, rem, i;
+
+ if (!this->k1)
+ {
+ return FALSE;
+ }
+
+ ks = this->k1->schedule;
+
+ e = this->e;
+
+ if (data.len)
+ {
+ this->zero = FALSE;
+ }
+
+ if (this->rem_size + data.len > AES_BLOCK_SIZE)
+ {
+ /* (3) For each block M[i], where i = 1 ... n-1:
+ * XOR M[i] with E[i-1], then encrypt the result with Key K1,
+ * yielding E[i].
+ */
+
+ /* append data to remaining bytes, process block M[1] */
+ memcpy(this->rem + this->rem_size, data.ptr,
+ AES_BLOCK_SIZE - this->rem_size);
+ data = chunk_skip(data, AES_BLOCK_SIZE - this->rem_size);
+
+ e = _mm_xor_si128(e, _mm_loadu_si128((__m128i*)this->rem));
+
+ e = _mm_xor_si128(e, ks[0]);
+ e = _mm_aesenc_si128(e, ks[1]);
+ e = _mm_aesenc_si128(e, ks[2]);
+ e = _mm_aesenc_si128(e, ks[3]);
+ e = _mm_aesenc_si128(e, ks[4]);
+ e = _mm_aesenc_si128(e, ks[5]);
+ e = _mm_aesenc_si128(e, ks[6]);
+ e = _mm_aesenc_si128(e, ks[7]);
+ e = _mm_aesenc_si128(e, ks[8]);
+ e = _mm_aesenc_si128(e, ks[9]);
+ e = _mm_aesenclast_si128(e, ks[10]);
+
+ bi = (__m128i*)data.ptr;
+ rem = data.len % AES_BLOCK_SIZE;
+ blocks = data.len / AES_BLOCK_SIZE;
+ if (!rem && blocks)
+ { /* don't do last block */
+ rem = AES_BLOCK_SIZE;
+ blocks--;
+ }
+
+ /* process blocks M[2] ... M[n-1] */
+ for (i = 0; i < blocks; i++)
+ {
+ e = _mm_xor_si128(e, _mm_loadu_si128(bi + i));
+
+ e = _mm_xor_si128(e, ks[0]);
+ e = _mm_aesenc_si128(e, ks[1]);
+ e = _mm_aesenc_si128(e, ks[2]);
+ e = _mm_aesenc_si128(e, ks[3]);
+ e = _mm_aesenc_si128(e, ks[4]);
+ e = _mm_aesenc_si128(e, ks[5]);
+ e = _mm_aesenc_si128(e, ks[6]);
+ e = _mm_aesenc_si128(e, ks[7]);
+ e = _mm_aesenc_si128(e, ks[8]);
+ e = _mm_aesenc_si128(e, ks[9]);
+ e = _mm_aesenclast_si128(e, ks[10]);
+ }
+
+ /* store remaining bytes of block M[n] */
+ memcpy(this->rem, data.ptr + data.len - rem, rem);
+ this->rem_size = rem;
+ }
+ else
+ {
+ /* no complete block, just copy into remaining */
+ memcpy(this->rem + this->rem_size, data.ptr, data.len);
+ this->rem_size += data.len;
+ }
+
+ if (out)
+ {
+ /* (4) For block M[n]: */
+ if (this->rem_size == AES_BLOCK_SIZE && !this->zero)
+ {
+ /* a) If the blocksize of M[n] is 128 bits:
+ * XOR M[n] with E[n-1] and Key K2, then encrypt the result with
+ * Key K1, yielding E[n].
+ */
+ e = _mm_xor_si128(e, this->k2);
+ }
+ else
+ {
+ /* b) If the blocksize of M[n] is less than 128 bits:
+ *
+ * i) Pad M[n] with a single "1" bit, followed by the number of
+ * "0" bits (possibly none) required to increase M[n]'s
+ * blocksize to 128 bits.
+ */
+ if (this->rem_size < AES_BLOCK_SIZE)
+ {
+ memset(this->rem + this->rem_size, 0,
+ AES_BLOCK_SIZE - this->rem_size);
+ this->rem[this->rem_size] = 0x80;
+ }
+ /* ii) XOR M[n] with E[n-1] and Key K3, then encrypt the result
+ * with Key K1, yielding E[n].
+ */
+ e = _mm_xor_si128(e, this->k3);
+ }
+ e = _mm_xor_si128(e, _mm_loadu_si128((__m128i*)this->rem));
+
+ e = _mm_xor_si128(e, ks[0]);
+ e = _mm_aesenc_si128(e, ks[1]);
+ e = _mm_aesenc_si128(e, ks[2]);
+ e = _mm_aesenc_si128(e, ks[3]);
+ e = _mm_aesenc_si128(e, ks[4]);
+ e = _mm_aesenc_si128(e, ks[5]);
+ e = _mm_aesenc_si128(e, ks[6]);
+ e = _mm_aesenc_si128(e, ks[7]);
+ e = _mm_aesenc_si128(e, ks[8]);
+ e = _mm_aesenc_si128(e, ks[9]);
+ e = _mm_aesenclast_si128(e, ks[10]);
+ _mm_storeu_si128((__m128i*)out, e);
+
+ /* (2) Define E[0] = 0x00000000000000000000000000000000 */
+ e = _mm_setzero_si128();
+ this->rem_size = 0;
+ this->zero = TRUE;
+ }
+ this->e = e;
+ return TRUE;
+}
+
+METHOD(mac_t, get_mac_size, size_t,
+ private_aesni_mac_t *this)
+{
+ return AES_BLOCK_SIZE;
+}
+
+METHOD(mac_t, set_key, bool,
+ private_aesni_mac_t *this, chunk_t key)
+{
+ __m128i t1, t2, t3;
+ u_char k1[AES_BLOCK_SIZE];
+ u_int round;
+ chunk_t k;
+
+ /* reset state */
+ this->e = _mm_setzero_si128();
+ this->rem_size = 0;
+ this->zero = TRUE;
+
+ /* Create RFC4434 variable keys if required */
+ if (key.len == AES_BLOCK_SIZE)
+ {
+ k = key;
+ }
+ else if (key.len < AES_BLOCK_SIZE)
+ { /* pad short keys */
+ k = chunk_alloca(AES_BLOCK_SIZE);
+ memset(k.ptr, 0, k.len);
+ memcpy(k.ptr, key.ptr, key.len);
+ }
+ else
+ { /* shorten key using XCBC */
+ k = chunk_alloca(AES_BLOCK_SIZE);
+ memset(k.ptr, 0, k.len);
+ if (!set_key(this, k) || !get_mac(this, key, k.ptr))
+ {
+ return FALSE;
+ }
+ }
+
+ /*
+ * (1) Derive 3 128-bit keys (K1, K2 and K3) from the 128-bit secret
+ * key K, as follows:
+ * K1 = 0x01010101010101010101010101010101 encrypted with Key K
+ * K2 = 0x02020202020202020202020202020202 encrypted with Key K
+ * K3 = 0x03030303030303030303030303030303 encrypted with Key K
+ */
+
+ DESTROY_IF(this->k1);
+ this->k1 = aesni_key_create(TRUE, k);
+ if (!this->k1)
+ {
+ return FALSE;
+ }
+
+ t1 = _mm_set1_epi8(0x01);
+ t2 = _mm_set1_epi8(0x02);
+ t3 = _mm_set1_epi8(0x03);
+
+ t1 = _mm_xor_si128(t1, this->k1->schedule[0]);
+ t2 = _mm_xor_si128(t2, this->k1->schedule[0]);
+ t3 = _mm_xor_si128(t3, this->k1->schedule[0]);
+
+ for (round = 1; round < this->k1->rounds; round++)
+ {
+ t1 = _mm_aesenc_si128(t1, this->k1->schedule[round]);
+ t2 = _mm_aesenc_si128(t2, this->k1->schedule[round]);
+ t3 = _mm_aesenc_si128(t3, this->k1->schedule[round]);
+ }
+
+ t1 = _mm_aesenclast_si128(t1, this->k1->schedule[this->k1->rounds]);
+ t2 = _mm_aesenclast_si128(t2, this->k1->schedule[this->k1->rounds]);
+ t3 = _mm_aesenclast_si128(t3, this->k1->schedule[this->k1->rounds]);
+
+ _mm_storeu_si128((__m128i*)k1, t1);
+ this->k2 = t2;
+ this->k3 = t3;
+
+ this->k1->destroy(this->k1);
+ this->k1 = aesni_key_create(TRUE, chunk_from_thing(k1));
+
+ memwipe(k1, AES_BLOCK_SIZE);
+ return this->k1 != NULL;
+}
+
+METHOD(mac_t, destroy, void,
+ private_aesni_mac_t *this)
+{
+ DESTROY_IF(this->k1);
+ memwipe(&this->k2, sizeof(this->k2));
+ memwipe(&this->k3, sizeof(this->k3));
+ free_align(this);
+}
+
+/*
+ * Described in header
+ */
+mac_t *aesni_xcbc_create(encryption_algorithm_t algo, size_t key_size)
+{
+ private_aesni_mac_t *this;
+
+ INIT_ALIGN(this, sizeof(__m128i),
+ .public = {
+ .get_mac = _get_mac,
+ .get_mac_size = _get_mac_size,
+ .set_key = _set_key,
+ .destroy = _destroy,
+ },
+ );
+
+ return &this->public;
+}
+
+/*
+ * Described in header.
+ */
+prf_t *aesni_xcbc_prf_create(pseudo_random_function_t algo)
+{
+ mac_t *xcbc;
+
+ switch (algo)
+ {
+ case PRF_AES128_XCBC:
+ xcbc = aesni_xcbc_create(ENCR_AES_CBC, 16);
+ break;
+ default:
+ return NULL;
+ }
+ if (xcbc)
+ {
+ return mac_prf_create(xcbc);
+ }
+ return NULL;
+}
+
+/*
+ * Described in header
+ */
+signer_t *aesni_xcbc_signer_create(integrity_algorithm_t algo)
+{
+ size_t trunc;
+ mac_t *xcbc;
+
+ switch (algo)
+ {
+ case AUTH_AES_XCBC_96:
+ xcbc = aesni_xcbc_create(ENCR_AES_CBC, 16);
+ trunc = 12;
+ break;
+ default:
+ return NULL;
+ }
+ if (xcbc)
+ {
+ return mac_signer_create(xcbc, trunc);
+ }
+ return NULL;
+}
diff --git a/src/libstrongswan/plugins/aesni/aesni_xcbc.h b/src/libstrongswan/plugins/aesni/aesni_xcbc.h
new file mode 100644
index 000000000..53f559feb
--- /dev/null
+++ b/src/libstrongswan/plugins/aesni/aesni_xcbc.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2015 revosec AG
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+/**
+ * @defgroup aesni_xcbc aesni_xcbc
+ * @{ @ingroup aesni
+ */
+
+#ifndef AESNI_XCBC_H_
+#define AESNI_XCBC_H_
+
+#include <crypto/mac.h>
+#include <crypto/prfs/prf.h>
+#include <crypto/signers/signer.h>
+
+/**
+ * Create a generic mac_t object using AESNI XCBC
+ *
+ * @param algo underlying encryption algorithm
+ * @param key_size size of encryption key, in bytes
+ */
+mac_t *aesni_xcbc_create(encryption_algorithm_t algo, size_t key_size);
+
+/**
+ * Creates a new prf_t object based AESNI XCBC.
+ *
+ * @param algo algorithm to implement
+ * @return prf_t object, NULL if not supported
+ */
+prf_t *aesni_xcbc_prf_create(pseudo_random_function_t algo);
+
+/**
+ * Creates a new signer_t object based on AESNI XCBC.
+ *
+ * @param algo algorithm to implement
+ * @return signer_t, NULL if not supported
+ */
+signer_t *aesni_xcbc_signer_create(integrity_algorithm_t algo);
+
+#endif /** AESNI_XCBC_H_ @}*/