diff options
Diffstat (limited to 'src/libstrongswan/plugins/aesni')
18 files changed, 6065 insertions, 0 deletions
diff --git a/src/libstrongswan/plugins/aesni/Makefile.am b/src/libstrongswan/plugins/aesni/Makefile.am new file mode 100644 index 000000000..2fe85c66c --- /dev/null +++ b/src/libstrongswan/plugins/aesni/Makefile.am @@ -0,0 +1,26 @@ +AM_CPPFLAGS = \ + -I$(top_srcdir)/src/libstrongswan + +AM_CFLAGS = \ + -maes \ + -mpclmul \ + -mssse3 \ + $(PLUGIN_CFLAGS) + +if MONOLITHIC +noinst_LTLIBRARIES = libstrongswan-aesni.la +else +plugin_LTLIBRARIES = libstrongswan-aesni.la +endif + +libstrongswan_aesni_la_SOURCES = \ + aesni_key.h aesni_key.c \ + aesni_cbc.h aesni_cbc.c \ + aesni_ctr.h aesni_ctr.c \ + aesni_ccm.h aesni_ccm.c \ + aesni_gcm.h aesni_gcm.c \ + aesni_xcbc.h aesni_xcbc.c \ + aesni_cmac.h aesni_cmac.c \ + aesni_plugin.h aesni_plugin.c + +libstrongswan_aesni_la_LDFLAGS = -module -avoid-version diff --git a/src/libstrongswan/plugins/aesni/Makefile.in b/src/libstrongswan/plugins/aesni/Makefile.in new file mode 100644 index 000000000..34adaa390 --- /dev/null +++ b/src/libstrongswan/plugins/aesni/Makefile.in @@ -0,0 +1,793 @@ +# Makefile.in generated by automake 1.14.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/libstrongswan/plugins/aesni +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/config/libtool.m4 \ + $(top_srcdir)/m4/config/ltoptions.m4 \ + $(top_srcdir)/m4/config/ltsugar.m4 \ + $(top_srcdir)/m4/config/ltversion.m4 \ + $(top_srcdir)/m4/config/lt~obsolete.m4 \ + $(top_srcdir)/m4/macros/split-package-version.m4 \ + $(top_srcdir)/m4/macros/with.m4 \ + $(top_srcdir)/m4/macros/enable-disable.m4 \ + $(top_srcdir)/m4/macros/add-plugin.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(plugindir)" +LTLIBRARIES = $(noinst_LTLIBRARIES) $(plugin_LTLIBRARIES) +libstrongswan_aesni_la_LIBADD = +am_libstrongswan_aesni_la_OBJECTS = aesni_key.lo aesni_cbc.lo \ + aesni_ctr.lo aesni_ccm.lo aesni_gcm.lo aesni_xcbc.lo \ + aesni_cmac.lo aesni_plugin.lo +libstrongswan_aesni_la_OBJECTS = $(am_libstrongswan_aesni_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libstrongswan_aesni_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(libstrongswan_aesni_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +@MONOLITHIC_FALSE@am_libstrongswan_aesni_la_rpath = -rpath \ +@MONOLITHIC_FALSE@ $(plugindir) +@MONOLITHIC_TRUE@am_libstrongswan_aesni_la_rpath = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libstrongswan_aesni_la_SOURCES) +DIST_SOURCES = $(libstrongswan_aesni_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BFDLIB = @BFDLIB@ +BTLIB = @BTLIB@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +COVERAGE_CFLAGS = @COVERAGE_CFLAGS@ +COVERAGE_LDFLAGS = @COVERAGE_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLIB = @DLLIB@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +EASY_INSTALL = @EASY_INSTALL@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GEM = @GEM@ +GENHTML = @GENHTML@ +GPERF = @GPERF@ +GPRBUILD = @GPRBUILD@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LCOV = @LCOV@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LEX = @LEX@ +LEXLIB = @LEXLIB@ +LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MYSQLCFLAG = @MYSQLCFLAG@ +MYSQLCONFIG = @MYSQLCONFIG@ +MYSQLLIB = @MYSQLLIB@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENSSL_LIB = @OPENSSL_LIB@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PACKAGE_VERSION_BUILD = @PACKAGE_VERSION_BUILD@ +PACKAGE_VERSION_MAJOR = @PACKAGE_VERSION_MAJOR@ +PACKAGE_VERSION_MINOR = @PACKAGE_VERSION_MINOR@ +PACKAGE_VERSION_REVIEW = @PACKAGE_VERSION_REVIEW@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERL = @PERL@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PLUGIN_CFLAGS = @PLUGIN_CFLAGS@ +PTHREADLIB = @PTHREADLIB@ +PYTHON = @PYTHON@ +PYTHONEGGINSTALLDIR = @PYTHONEGGINSTALLDIR@ +PYTHON_EXEC_PREFIX = @PYTHON_EXEC_PREFIX@ +PYTHON_PLATFORM = @PYTHON_PLATFORM@ +PYTHON_PREFIX = @PYTHON_PREFIX@ +PYTHON_VERSION = @PYTHON_VERSION@ +PY_TEST = @PY_TEST@ +RANLIB = @RANLIB@ +RTLIB = @RTLIB@ +RUBY = @RUBY@ +RUBYGEMDIR = @RUBYGEMDIR@ +RUBYINCLUDE = @RUBYINCLUDE@ +RUBYLIB = @RUBYLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOCKLIB = @SOCKLIB@ +STRIP = @STRIP@ +UNWINDLIB = @UNWINDLIB@ +VERSION = @VERSION@ +YACC = @YACC@ +YFLAGS = @YFLAGS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +aikgen_plugins = @aikgen_plugins@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +attest_plugins = @attest_plugins@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +c_plugins = @c_plugins@ +charon_natt_port = @charon_natt_port@ +charon_plugins = @charon_plugins@ +charon_udp_port = @charon_udp_port@ +clearsilver_LIBS = @clearsilver_LIBS@ +cmd_plugins = @cmd_plugins@ +datadir = @datadir@ +datarootdir = @datarootdir@ +dbusservicedir = @dbusservicedir@ +dev_headers = @dev_headers@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +fips_mode = @fips_mode@ +gtk_CFLAGS = @gtk_CFLAGS@ +gtk_LIBS = @gtk_LIBS@ +h_plugins = @h_plugins@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +imcvdir = @imcvdir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +ipsec_script = @ipsec_script@ +ipsec_script_upper = @ipsec_script_upper@ +ipsecdir = @ipsecdir@ +ipsecgroup = @ipsecgroup@ +ipseclibdir = @ipseclibdir@ +ipsecuser = @ipsecuser@ +json_CFLAGS = @json_CFLAGS@ +json_LIBS = @json_LIBS@ +libdir = @libdir@ +libexecdir = @libexecdir@ +libiptc_CFLAGS = @libiptc_CFLAGS@ +libiptc_LIBS = @libiptc_LIBS@ +linux_headers = @linux_headers@ +localedir = @localedir@ +localstatedir = @localstatedir@ +maemo_CFLAGS = @maemo_CFLAGS@ +maemo_LIBS = @maemo_LIBS@ +manager_plugins = @manager_plugins@ +mandir = @mandir@ +medsrv_plugins = @medsrv_plugins@ +mkdir_p = @mkdir_p@ +nm_CFLAGS = @nm_CFLAGS@ +nm_LIBS = @nm_LIBS@ +nm_ca_dir = @nm_ca_dir@ +nm_plugins = @nm_plugins@ +oldincludedir = @oldincludedir@ +pcsclite_CFLAGS = @pcsclite_CFLAGS@ +pcsclite_LIBS = @pcsclite_LIBS@ +pdfdir = @pdfdir@ +piddir = @piddir@ +pkgpyexecdir = @pkgpyexecdir@ +pkgpythondir = @pkgpythondir@ +pki_plugins = @pki_plugins@ +plugindir = @plugindir@ +pool_plugins = @pool_plugins@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +pyexecdir = @pyexecdir@ +pythondir = @pythondir@ +random_device = @random_device@ +resolv_conf = @resolv_conf@ +routing_table = @routing_table@ +routing_table_prio = @routing_table_prio@ +s_plugins = @s_plugins@ +sbindir = @sbindir@ +scepclient_plugins = @scepclient_plugins@ +scripts_plugins = @scripts_plugins@ +sharedstatedir = @sharedstatedir@ +soup_CFLAGS = @soup_CFLAGS@ +soup_LIBS = @soup_LIBS@ +srcdir = @srcdir@ +starter_plugins = @starter_plugins@ +strongswan_conf = @strongswan_conf@ +strongswan_options = @strongswan_options@ +swanctldir = @swanctldir@ +sysconfdir = @sysconfdir@ +systemd_daemon_CFLAGS = @systemd_daemon_CFLAGS@ +systemd_daemon_LIBS = @systemd_daemon_LIBS@ +systemd_journal_CFLAGS = @systemd_journal_CFLAGS@ +systemd_journal_LIBS = @systemd_journal_LIBS@ +systemdsystemunitdir = @systemdsystemunitdir@ +t_plugins = @t_plugins@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +urandom_device = @urandom_device@ +xml_CFLAGS = @xml_CFLAGS@ +xml_LIBS = @xml_LIBS@ +AM_CPPFLAGS = \ + -I$(top_srcdir)/src/libstrongswan + +AM_CFLAGS = \ + -maes \ + -mpclmul \ + -mssse3 \ + $(PLUGIN_CFLAGS) + +@MONOLITHIC_TRUE@noinst_LTLIBRARIES = libstrongswan-aesni.la +@MONOLITHIC_FALSE@plugin_LTLIBRARIES = libstrongswan-aesni.la +libstrongswan_aesni_la_SOURCES = \ + aesni_key.h aesni_key.c \ + aesni_cbc.h aesni_cbc.c \ + aesni_ctr.h aesni_ctr.c \ + aesni_ccm.h aesni_ccm.c \ + aesni_gcm.h aesni_gcm.c \ + aesni_xcbc.h aesni_xcbc.c \ + aesni_cmac.h aesni_cmac.c \ + aesni_plugin.h aesni_plugin.c + +libstrongswan_aesni_la_LDFLAGS = -module -avoid-version +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/libstrongswan/plugins/aesni/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu src/libstrongswan/plugins/aesni/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +install-pluginLTLIBRARIES: $(plugin_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(plugin_LTLIBRARIES)'; test -n "$(plugindir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(plugindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(plugindir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(plugindir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(plugindir)"; \ + } + +uninstall-pluginLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(plugin_LTLIBRARIES)'; test -n "$(plugindir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(plugindir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(plugindir)/$$f"; \ + done + +clean-pluginLTLIBRARIES: + -test -z "$(plugin_LTLIBRARIES)" || rm -f $(plugin_LTLIBRARIES) + @list='$(plugin_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libstrongswan-aesni.la: $(libstrongswan_aesni_la_OBJECTS) $(libstrongswan_aesni_la_DEPENDENCIES) $(EXTRA_libstrongswan_aesni_la_DEPENDENCIES) + $(AM_V_CCLD)$(libstrongswan_aesni_la_LINK) $(am_libstrongswan_aesni_la_rpath) $(libstrongswan_aesni_la_OBJECTS) $(libstrongswan_aesni_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aesni_cbc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aesni_ccm.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aesni_cmac.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aesni_ctr.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aesni_gcm.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aesni_key.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aesni_plugin.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aesni_xcbc.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: + for dir in "$(DESTDIR)$(plugindir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + clean-pluginLTLIBRARIES mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-pluginLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-pluginLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES clean-pluginLTLIBRARIES \ + cscopelist-am ctags ctags-am distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-pluginLTLIBRARIES install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am uninstall-pluginLTLIBRARIES + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/libstrongswan/plugins/aesni/aesni_cbc.c b/src/libstrongswan/plugins/aesni/aesni_cbc.c new file mode 100644 index 000000000..78ada7663 --- /dev/null +++ b/src/libstrongswan/plugins/aesni/aesni_cbc.c @@ -0,0 +1,671 @@ +/* + * Copyright (C) 2015 Martin Willi + * Copyright (C) 2015 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include "aesni_cbc.h" +#include "aesni_key.h" + +/** + * Pipeline parallelism we use for CBC decryption + */ +#define CBC_DECRYPT_PARALLELISM 4 + +typedef struct private_aesni_cbc_t private_aesni_cbc_t; + +/** + * CBC en/decryption method type + */ +typedef void (*aesni_cbc_fn_t)(aesni_key_t*, u_int, u_char*, u_char*, u_char*); + +/** + * Private data of an aesni_cbc_t object. + */ +struct private_aesni_cbc_t { + + /** + * Public aesni_cbc_t interface. + */ + aesni_cbc_t public; + + /** + * Key size + */ + u_int key_size; + + /** + * Encryption key schedule + */ + aesni_key_t *ekey; + + /** + * Decryption key schedule + */ + aesni_key_t *dkey; + + /** + * Encryption method + */ + aesni_cbc_fn_t encrypt; + + /** + * Decryption method + */ + aesni_cbc_fn_t decrypt; +}; + +/** + * AES-128 CBC encryption + */ +static void encrypt_cbc128(aesni_key_t *key, u_int blocks, u_char *in, + u_char *iv, u_char *out) +{ + __m128i *ks, t, fb, *bi, *bo; + int i; + + ks = key->schedule; + bi = (__m128i*)in; + bo = (__m128i*)out; + + fb = _mm_loadu_si128((__m128i*)iv); + for (i = 0; i < blocks; i++) + { + t = _mm_loadu_si128(bi + i); + fb = _mm_xor_si128(t, fb); + fb = _mm_xor_si128(fb, ks[0]); + + fb = _mm_aesenc_si128(fb, ks[1]); + fb = _mm_aesenc_si128(fb, ks[2]); + fb = _mm_aesenc_si128(fb, ks[3]); + fb = _mm_aesenc_si128(fb, ks[4]); + fb = _mm_aesenc_si128(fb, ks[5]); + fb = _mm_aesenc_si128(fb, ks[6]); + fb = _mm_aesenc_si128(fb, ks[7]); + fb = _mm_aesenc_si128(fb, ks[8]); + fb = _mm_aesenc_si128(fb, ks[9]); + + fb = _mm_aesenclast_si128(fb, ks[10]); + _mm_storeu_si128(bo + i, fb); + } +} + +/** + * AES-128 CBC decryption + */ +static void decrypt_cbc128(aesni_key_t *key, u_int blocks, u_char *in, + u_char *iv, u_char *out) +{ + __m128i *ks, last, *bi, *bo; + __m128i t1, t2, t3, t4; + __m128i f1, f2, f3, f4; + u_int i, pblocks; + + ks = key->schedule; + bi = (__m128i*)in; + bo = (__m128i*)out; + pblocks = blocks - (blocks % CBC_DECRYPT_PARALLELISM); + + f1 = _mm_loadu_si128((__m128i*)iv); + + for (i = 0; i < pblocks; i += CBC_DECRYPT_PARALLELISM) + { + t1 = _mm_loadu_si128(bi + i + 0); + t2 = _mm_loadu_si128(bi + i + 1); + t3 = _mm_loadu_si128(bi + i + 2); + t4 = _mm_loadu_si128(bi + i + 3); + + f2 = t1; + f3 = t2; + f4 = t3; + last = t4; + + t1 = _mm_xor_si128(t1, ks[0]); + t2 = _mm_xor_si128(t2, ks[0]); + t3 = _mm_xor_si128(t3, ks[0]); + t4 = _mm_xor_si128(t4, ks[0]); + + t1 = _mm_aesdec_si128(t1, ks[1]); + t2 = _mm_aesdec_si128(t2, ks[1]); + t3 = _mm_aesdec_si128(t3, ks[1]); + t4 = _mm_aesdec_si128(t4, ks[1]); + t1 = _mm_aesdec_si128(t1, ks[2]); + t2 = _mm_aesdec_si128(t2, ks[2]); + t3 = _mm_aesdec_si128(t3, ks[2]); + t4 = _mm_aesdec_si128(t4, ks[2]); + t1 = _mm_aesdec_si128(t1, ks[3]); + t2 = _mm_aesdec_si128(t2, ks[3]); + t3 = _mm_aesdec_si128(t3, ks[3]); + t4 = _mm_aesdec_si128(t4, ks[3]); + t1 = _mm_aesdec_si128(t1, ks[4]); + t2 = _mm_aesdec_si128(t2, ks[4]); + t3 = _mm_aesdec_si128(t3, ks[4]); + t4 = _mm_aesdec_si128(t4, ks[4]); + t1 = _mm_aesdec_si128(t1, ks[5]); + t2 = _mm_aesdec_si128(t2, ks[5]); + t3 = _mm_aesdec_si128(t3, ks[5]); + t4 = _mm_aesdec_si128(t4, ks[5]); + t1 = _mm_aesdec_si128(t1, ks[6]); + t2 = _mm_aesdec_si128(t2, ks[6]); + t3 = _mm_aesdec_si128(t3, ks[6]); + t4 = _mm_aesdec_si128(t4, ks[6]); + t1 = _mm_aesdec_si128(t1, ks[7]); + t2 = _mm_aesdec_si128(t2, ks[7]); + t3 = _mm_aesdec_si128(t3, ks[7]); + t4 = _mm_aesdec_si128(t4, ks[7]); + t1 = _mm_aesdec_si128(t1, ks[8]); + t2 = _mm_aesdec_si128(t2, ks[8]); + t3 = _mm_aesdec_si128(t3, ks[8]); + t4 = _mm_aesdec_si128(t4, ks[8]); + t1 = _mm_aesdec_si128(t1, ks[9]); + t2 = _mm_aesdec_si128(t2, ks[9]); + t3 = _mm_aesdec_si128(t3, ks[9]); + t4 = _mm_aesdec_si128(t4, ks[9]); + + t1 = _mm_aesdeclast_si128(t1, ks[10]); + t2 = _mm_aesdeclast_si128(t2, ks[10]); + t3 = _mm_aesdeclast_si128(t3, ks[10]); + t4 = _mm_aesdeclast_si128(t4, ks[10]); + t1 = _mm_xor_si128(t1, f1); + t2 = _mm_xor_si128(t2, f2); + t3 = _mm_xor_si128(t3, f3); + t4 = _mm_xor_si128(t4, f4); + _mm_storeu_si128(bo + i + 0, t1); + _mm_storeu_si128(bo + i + 1, t2); + _mm_storeu_si128(bo + i + 2, t3); + _mm_storeu_si128(bo + i + 3, t4); + f1 = last; + } + + for (i = pblocks; i < blocks; i++) + { + last = _mm_loadu_si128(bi + i); + t1 = _mm_xor_si128(last, ks[0]); + + t1 = _mm_aesdec_si128(t1, ks[1]); + t1 = _mm_aesdec_si128(t1, ks[2]); + t1 = _mm_aesdec_si128(t1, ks[3]); + t1 = _mm_aesdec_si128(t1, ks[4]); + t1 = _mm_aesdec_si128(t1, ks[5]); + t1 = _mm_aesdec_si128(t1, ks[6]); + t1 = _mm_aesdec_si128(t1, ks[7]); + t1 = _mm_aesdec_si128(t1, ks[8]); + t1 = _mm_aesdec_si128(t1, ks[9]); + + t1 = _mm_aesdeclast_si128(t1, ks[10]); + t1 = _mm_xor_si128(t1, f1); + _mm_storeu_si128(bo + i, t1); + f1 = last; + } +} + +/** + * AES-192 CBC encryption + */ +static void encrypt_cbc192(aesni_key_t *key, u_int blocks, u_char *in, + u_char *iv, u_char *out) +{ + __m128i *ks, t, fb, *bi, *bo; + int i; + + ks = key->schedule; + bi = (__m128i*)in; + bo = (__m128i*)out; + + fb = _mm_loadu_si128((__m128i*)iv); + for (i = 0; i < blocks; i++) + { + t = _mm_loadu_si128(bi + i); + fb = _mm_xor_si128(t, fb); + fb = _mm_xor_si128(fb, ks[0]); + + fb = _mm_aesenc_si128(fb, ks[1]); + fb = _mm_aesenc_si128(fb, ks[2]); + fb = _mm_aesenc_si128(fb, ks[3]); + fb = _mm_aesenc_si128(fb, ks[4]); + fb = _mm_aesenc_si128(fb, ks[5]); + fb = _mm_aesenc_si128(fb, ks[6]); + fb = _mm_aesenc_si128(fb, ks[7]); + fb = _mm_aesenc_si128(fb, ks[8]); + fb = _mm_aesenc_si128(fb, ks[9]); + fb = _mm_aesenc_si128(fb, ks[10]); + fb = _mm_aesenc_si128(fb, ks[11]); + + fb = _mm_aesenclast_si128(fb, ks[12]); + _mm_storeu_si128(bo + i, fb); + } +} + +/** + * AES-192 CBC decryption + */ +static void decrypt_cbc192(aesni_key_t *key, u_int blocks, u_char *in, + u_char *iv, u_char *out) +{ + __m128i *ks, last, *bi, *bo; + __m128i t1, t2, t3, t4; + __m128i f1, f2, f3, f4; + u_int i, pblocks; + + ks = key->schedule; + bi = (__m128i*)in; + bo = (__m128i*)out; + pblocks = blocks - (blocks % CBC_DECRYPT_PARALLELISM); + + f1 = _mm_loadu_si128((__m128i*)iv); + + for (i = 0; i < pblocks; i += CBC_DECRYPT_PARALLELISM) + { + t1 = _mm_loadu_si128(bi + i + 0); + t2 = _mm_loadu_si128(bi + i + 1); + t3 = _mm_loadu_si128(bi + i + 2); + t4 = _mm_loadu_si128(bi + i + 3); + + f2 = t1; + f3 = t2; + f4 = t3; + last = t4; + + t1 = _mm_xor_si128(t1, ks[0]); + t2 = _mm_xor_si128(t2, ks[0]); + t3 = _mm_xor_si128(t3, ks[0]); + t4 = _mm_xor_si128(t4, ks[0]); + + t1 = _mm_aesdec_si128(t1, ks[1]); + t2 = _mm_aesdec_si128(t2, ks[1]); + t3 = _mm_aesdec_si128(t3, ks[1]); + t4 = _mm_aesdec_si128(t4, ks[1]); + t1 = _mm_aesdec_si128(t1, ks[2]); + t2 = _mm_aesdec_si128(t2, ks[2]); + t3 = _mm_aesdec_si128(t3, ks[2]); + t4 = _mm_aesdec_si128(t4, ks[2]); + t1 = _mm_aesdec_si128(t1, ks[3]); + t2 = _mm_aesdec_si128(t2, ks[3]); + t3 = _mm_aesdec_si128(t3, ks[3]); + t4 = _mm_aesdec_si128(t4, ks[3]); + t1 = _mm_aesdec_si128(t1, ks[4]); + t2 = _mm_aesdec_si128(t2, ks[4]); + t3 = _mm_aesdec_si128(t3, ks[4]); + t4 = _mm_aesdec_si128(t4, ks[4]); + t1 = _mm_aesdec_si128(t1, ks[5]); + t2 = _mm_aesdec_si128(t2, ks[5]); + t3 = _mm_aesdec_si128(t3, ks[5]); + t4 = _mm_aesdec_si128(t4, ks[5]); + t1 = _mm_aesdec_si128(t1, ks[6]); + t2 = _mm_aesdec_si128(t2, ks[6]); + t3 = _mm_aesdec_si128(t3, ks[6]); + t4 = _mm_aesdec_si128(t4, ks[6]); + t1 = _mm_aesdec_si128(t1, ks[7]); + t2 = _mm_aesdec_si128(t2, ks[7]); + t3 = _mm_aesdec_si128(t3, ks[7]); + t4 = _mm_aesdec_si128(t4, ks[7]); + t1 = _mm_aesdec_si128(t1, ks[8]); + t2 = _mm_aesdec_si128(t2, ks[8]); + t3 = _mm_aesdec_si128(t3, ks[8]); + t4 = _mm_aesdec_si128(t4, ks[8]); + t1 = _mm_aesdec_si128(t1, ks[9]); + t2 = _mm_aesdec_si128(t2, ks[9]); + t3 = _mm_aesdec_si128(t3, ks[9]); + t4 = _mm_aesdec_si128(t4, ks[9]); + t1 = _mm_aesdec_si128(t1, ks[10]); + t2 = _mm_aesdec_si128(t2, ks[10]); + t3 = _mm_aesdec_si128(t3, ks[10]); + t4 = _mm_aesdec_si128(t4, ks[10]); + t1 = _mm_aesdec_si128(t1, ks[11]); + t2 = _mm_aesdec_si128(t2, ks[11]); + t3 = _mm_aesdec_si128(t3, ks[11]); + t4 = _mm_aesdec_si128(t4, ks[11]); + + t1 = _mm_aesdeclast_si128(t1, ks[12]); + t2 = _mm_aesdeclast_si128(t2, ks[12]); + t3 = _mm_aesdeclast_si128(t3, ks[12]); + t4 = _mm_aesdeclast_si128(t4, ks[12]); + t1 = _mm_xor_si128(t1, f1); + t2 = _mm_xor_si128(t2, f2); + t3 = _mm_xor_si128(t3, f3); + t4 = _mm_xor_si128(t4, f4); + _mm_storeu_si128(bo + i + 0, t1); + _mm_storeu_si128(bo + i + 1, t2); + _mm_storeu_si128(bo + i + 2, t3); + _mm_storeu_si128(bo + i + 3, t4); + f1 = last; + } + + for (i = pblocks; i < blocks; i++) + { + last = _mm_loadu_si128(bi + i); + t1 = _mm_xor_si128(last, ks[0]); + + t1 = _mm_aesdec_si128(t1, ks[1]); + t1 = _mm_aesdec_si128(t1, ks[2]); + t1 = _mm_aesdec_si128(t1, ks[3]); + t1 = _mm_aesdec_si128(t1, ks[4]); + t1 = _mm_aesdec_si128(t1, ks[5]); + t1 = _mm_aesdec_si128(t1, ks[6]); + t1 = _mm_aesdec_si128(t1, ks[7]); + t1 = _mm_aesdec_si128(t1, ks[8]); + t1 = _mm_aesdec_si128(t1, ks[9]); + t1 = _mm_aesdec_si128(t1, ks[10]); + t1 = _mm_aesdec_si128(t1, ks[11]); + + t1 = _mm_aesdeclast_si128(t1, ks[12]); + t1 = _mm_xor_si128(t1, f1); + _mm_storeu_si128(bo + i, t1); + f1 = last; + } +} + +/** + * AES-256 CBC encryption + */ +static void encrypt_cbc256(aesni_key_t *key, u_int blocks, u_char *in, + u_char *iv, u_char *out) +{ + __m128i *ks, t, fb, *bi, *bo; + int i; + + ks = key->schedule; + bi = (__m128i*)in; + bo = (__m128i*)out; + + fb = _mm_loadu_si128((__m128i*)iv); + for (i = 0; i < blocks; i++) + { + t = _mm_loadu_si128(bi + i); + fb = _mm_xor_si128(t, fb); + fb = _mm_xor_si128(fb, ks[0]); + + fb = _mm_aesenc_si128(fb, ks[1]); + fb = _mm_aesenc_si128(fb, ks[2]); + fb = _mm_aesenc_si128(fb, ks[3]); + fb = _mm_aesenc_si128(fb, ks[4]); + fb = _mm_aesenc_si128(fb, ks[5]); + fb = _mm_aesenc_si128(fb, ks[6]); + fb = _mm_aesenc_si128(fb, ks[7]); + fb = _mm_aesenc_si128(fb, ks[8]); + fb = _mm_aesenc_si128(fb, ks[9]); + fb = _mm_aesenc_si128(fb, ks[10]); + fb = _mm_aesenc_si128(fb, ks[11]); + fb = _mm_aesenc_si128(fb, ks[12]); + fb = _mm_aesenc_si128(fb, ks[13]); + + fb = _mm_aesenclast_si128(fb, ks[14]); + _mm_storeu_si128(bo + i, fb); + } +} + +/** + * AES-256 CBC decryption + */ +static void decrypt_cbc256(aesni_key_t *key, u_int blocks, u_char *in, + u_char *iv, u_char *out) +{ + __m128i *ks, last, *bi, *bo; + __m128i t1, t2, t3, t4; + __m128i f1, f2, f3, f4; + u_int i, pblocks; + + ks = key->schedule; + bi = (__m128i*)in; + bo = (__m128i*)out; + pblocks = blocks - (blocks % CBC_DECRYPT_PARALLELISM); + + f1 = _mm_loadu_si128((__m128i*)iv); + + for (i = 0; i < pblocks; i += CBC_DECRYPT_PARALLELISM) + { + t1 = _mm_loadu_si128(bi + i + 0); + t2 = _mm_loadu_si128(bi + i + 1); + t3 = _mm_loadu_si128(bi + i + 2); + t4 = _mm_loadu_si128(bi + i + 3); + + f2 = t1; + f3 = t2; + f4 = t3; + last = t4; + + t1 = _mm_xor_si128(t1, ks[0]); + t2 = _mm_xor_si128(t2, ks[0]); + t3 = _mm_xor_si128(t3, ks[0]); + t4 = _mm_xor_si128(t4, ks[0]); + + t1 = _mm_aesdec_si128(t1, ks[1]); + t2 = _mm_aesdec_si128(t2, ks[1]); + t3 = _mm_aesdec_si128(t3, ks[1]); + t4 = _mm_aesdec_si128(t4, ks[1]); + t1 = _mm_aesdec_si128(t1, ks[2]); + t2 = _mm_aesdec_si128(t2, ks[2]); + t3 = _mm_aesdec_si128(t3, ks[2]); + t4 = _mm_aesdec_si128(t4, ks[2]); + t1 = _mm_aesdec_si128(t1, ks[3]); + t2 = _mm_aesdec_si128(t2, ks[3]); + t3 = _mm_aesdec_si128(t3, ks[3]); + t4 = _mm_aesdec_si128(t4, ks[3]); + t1 = _mm_aesdec_si128(t1, ks[4]); + t2 = _mm_aesdec_si128(t2, ks[4]); + t3 = _mm_aesdec_si128(t3, ks[4]); + t4 = _mm_aesdec_si128(t4, ks[4]); + t1 = _mm_aesdec_si128(t1, ks[5]); + t2 = _mm_aesdec_si128(t2, ks[5]); + t3 = _mm_aesdec_si128(t3, ks[5]); + t4 = _mm_aesdec_si128(t4, ks[5]); + t1 = _mm_aesdec_si128(t1, ks[6]); + t2 = _mm_aesdec_si128(t2, ks[6]); + t3 = _mm_aesdec_si128(t3, ks[6]); + t4 = _mm_aesdec_si128(t4, ks[6]); + t1 = _mm_aesdec_si128(t1, ks[7]); + t2 = _mm_aesdec_si128(t2, ks[7]); + t3 = _mm_aesdec_si128(t3, ks[7]); + t4 = _mm_aesdec_si128(t4, ks[7]); + t1 = _mm_aesdec_si128(t1, ks[8]); + t2 = _mm_aesdec_si128(t2, ks[8]); + t3 = _mm_aesdec_si128(t3, ks[8]); + t4 = _mm_aesdec_si128(t4, ks[8]); + t1 = _mm_aesdec_si128(t1, ks[9]); + t2 = _mm_aesdec_si128(t2, ks[9]); + t3 = _mm_aesdec_si128(t3, ks[9]); + t4 = _mm_aesdec_si128(t4, ks[9]); + t1 = _mm_aesdec_si128(t1, ks[10]); + t2 = _mm_aesdec_si128(t2, ks[10]); + t3 = _mm_aesdec_si128(t3, ks[10]); + t4 = _mm_aesdec_si128(t4, ks[10]); + t1 = _mm_aesdec_si128(t1, ks[11]); + t2 = _mm_aesdec_si128(t2, ks[11]); + t3 = _mm_aesdec_si128(t3, ks[11]); + t4 = _mm_aesdec_si128(t4, ks[11]); + t1 = _mm_aesdec_si128(t1, ks[12]); + t2 = _mm_aesdec_si128(t2, ks[12]); + t3 = _mm_aesdec_si128(t3, ks[12]); + t4 = _mm_aesdec_si128(t4, ks[12]); + t1 = _mm_aesdec_si128(t1, ks[13]); + t2 = _mm_aesdec_si128(t2, ks[13]); + t3 = _mm_aesdec_si128(t3, ks[13]); + t4 = _mm_aesdec_si128(t4, ks[13]); + + t1 = _mm_aesdeclast_si128(t1, ks[14]); + t2 = _mm_aesdeclast_si128(t2, ks[14]); + t3 = _mm_aesdeclast_si128(t3, ks[14]); + t4 = _mm_aesdeclast_si128(t4, ks[14]); + t1 = _mm_xor_si128(t1, f1); + t2 = _mm_xor_si128(t2, f2); + t3 = _mm_xor_si128(t3, f3); + t4 = _mm_xor_si128(t4, f4); + _mm_storeu_si128(bo + i + 0, t1); + _mm_storeu_si128(bo + i + 1, t2); + _mm_storeu_si128(bo + i + 2, t3); + _mm_storeu_si128(bo + i + 3, t4); + f1 = last; + } + + for (i = pblocks; i < blocks; i++) + { + last = _mm_loadu_si128(bi + i); + t1 = _mm_xor_si128(last, ks[0]); + + t1 = _mm_aesdec_si128(t1, ks[1]); + t1 = _mm_aesdec_si128(t1, ks[2]); + t1 = _mm_aesdec_si128(t1, ks[3]); + t1 = _mm_aesdec_si128(t1, ks[4]); + t1 = _mm_aesdec_si128(t1, ks[5]); + t1 = _mm_aesdec_si128(t1, ks[6]); + t1 = _mm_aesdec_si128(t1, ks[7]); + t1 = _mm_aesdec_si128(t1, ks[8]); + t1 = _mm_aesdec_si128(t1, ks[9]); + t1 = _mm_aesdec_si128(t1, ks[10]); + t1 = _mm_aesdec_si128(t1, ks[11]); + t1 = _mm_aesdec_si128(t1, ks[12]); + t1 = _mm_aesdec_si128(t1, ks[13]); + + t1 = _mm_aesdeclast_si128(t1, ks[14]); + t1 = _mm_xor_si128(t1, f1); + _mm_storeu_si128(bo + i, t1); + f1 = last; + } +} + +/** + * Do inline or allocated de/encryption using key schedule + */ +static bool crypt(aesni_cbc_fn_t fn, aesni_key_t *key, + chunk_t data, chunk_t iv, chunk_t *out) +{ + u_char *buf; + + if (!key || iv.len != AES_BLOCK_SIZE || data.len % AES_BLOCK_SIZE) + { + return FALSE; + } + if (out) + { + *out = chunk_alloc(data.len); + buf = out->ptr; + } + else + { + buf = data.ptr; + } + fn(key, data.len / AES_BLOCK_SIZE, data.ptr, iv.ptr, buf); + return TRUE; +} + +METHOD(crypter_t, encrypt, bool, + private_aesni_cbc_t *this, chunk_t data, chunk_t iv, chunk_t *encrypted) +{ + return crypt(this->encrypt, this->ekey, data, iv, encrypted); +} + +METHOD(crypter_t, decrypt, bool, + private_aesni_cbc_t *this, chunk_t data, chunk_t iv, chunk_t *decrypted) +{ + return crypt(this->decrypt, this->dkey, data, iv, decrypted); +} + +METHOD(crypter_t, get_block_size, size_t, + private_aesni_cbc_t *this) +{ + return AES_BLOCK_SIZE; +} + +METHOD(crypter_t, get_iv_size, size_t, + private_aesni_cbc_t *this) +{ + return AES_BLOCK_SIZE; +} + +METHOD(crypter_t, get_key_size, size_t, + private_aesni_cbc_t *this) +{ + return this->key_size; +} + +METHOD(crypter_t, set_key, bool, + private_aesni_cbc_t *this, chunk_t key) +{ + if (key.len != this->key_size) + { + return FALSE; + } + + DESTROY_IF(this->ekey); + DESTROY_IF(this->dkey); + + this->ekey = aesni_key_create(TRUE, key); + this->dkey = aesni_key_create(FALSE, key); + + return this->ekey && this->dkey; +} + +METHOD(crypter_t, destroy, void, + private_aesni_cbc_t *this) +{ + DESTROY_IF(this->ekey); + DESTROY_IF(this->dkey); + free_align(this); +} + +/** + * See header + */ +aesni_cbc_t *aesni_cbc_create(encryption_algorithm_t algo, size_t key_size) +{ + private_aesni_cbc_t *this; + + if (algo != ENCR_AES_CBC) + { + return NULL; + } + switch (key_size) + { + case 0: + key_size = 16; + break; + case 16: + case 24: + case 32: + break; + default: + return NULL; + } + + INIT_ALIGN(this, sizeof(__m128i), + .public = { + .crypter = { + .encrypt = _encrypt, + .decrypt = _decrypt, + .get_block_size = _get_block_size, + .get_iv_size = _get_iv_size, + .get_key_size = _get_key_size, + .set_key = _set_key, + .destroy = _destroy, + }, + }, + .key_size = key_size, + ); + + switch (key_size) + { + case 16: + this->encrypt = encrypt_cbc128; + this->decrypt = decrypt_cbc128; + break; + case 24: + this->encrypt = encrypt_cbc192; + this->decrypt = decrypt_cbc192; + break; + case 32: + this->encrypt = encrypt_cbc256; + this->decrypt = decrypt_cbc256; + break; + } + + return &this->public; +} diff --git a/src/libstrongswan/plugins/aesni/aesni_cbc.h b/src/libstrongswan/plugins/aesni/aesni_cbc.h new file mode 100644 index 000000000..c004ec611 --- /dev/null +++ b/src/libstrongswan/plugins/aesni/aesni_cbc.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2015 Martin Willi + * Copyright (C) 2015 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +/** + * @defgroup aesni_cbc aesni_cbc + * @{ @ingroup aesni + */ + +#ifndef AESNI_CBC_H_ +#define AESNI_CBC_H_ + +#include <library.h> + +typedef struct aesni_cbc_t aesni_cbc_t; + +/** + * CBC mode crypter using AES-NI + */ +struct aesni_cbc_t { + + /** + * Implements crypter interface + */ + crypter_t crypter; +}; + +/** + * Create a aesni_cbc instance. + * + * @param algo encryption algorithm, AES_ENCR_CBC + * @param key_size AES key size, in bytes + * @return AES-CBC crypter, NULL if not supported + */ +aesni_cbc_t *aesni_cbc_create(encryption_algorithm_t algo, size_t key_size); + +#endif /** AESNI_CBC_H_ @}*/ diff --git a/src/libstrongswan/plugins/aesni/aesni_ccm.c b/src/libstrongswan/plugins/aesni/aesni_ccm.c new file mode 100644 index 000000000..d523bc17a --- /dev/null +++ b/src/libstrongswan/plugins/aesni/aesni_ccm.c @@ -0,0 +1,914 @@ +/* + * Copyright (C) 2010-2015 Martin Willi + * Copyright (C) 2010-2015 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include "aesni_ccm.h" +#include "aesni_key.h" + +#include <crypto/iv/iv_gen_seq.h> + +#include <tmmintrin.h> + +#define SALT_SIZE 3 +#define IV_SIZE 8 +#define NONCE_SIZE (SALT_SIZE + IV_SIZE) /* 11 */ +#define Q_SIZE (AES_BLOCK_SIZE - NONCE_SIZE - 1) /* 4 */ + +typedef struct private_aesni_ccm_t private_aesni_ccm_t; + +/** + * CCM en/decryption method type + */ +typedef void (*aesni_ccm_fn_t)(private_aesni_ccm_t*, size_t, u_char*, u_char*, + u_char*, size_t, u_char*, u_char*); + +/** + * Private data of an aesni_ccm_t object. + */ +struct private_aesni_ccm_t { + + /** + * Public aesni_ccm_t interface. + */ + aesni_ccm_t public; + + /** + * Encryption key schedule + */ + aesni_key_t *key; + + /** + * IV generator. + */ + iv_gen_t *iv_gen; + + /** + * Length of the integrity check value + */ + size_t icv_size; + + /** + * Length of the key in bytes + */ + size_t key_size; + + /** + * CCM encryption function + */ + aesni_ccm_fn_t encrypt; + + /** + * CCM decryption function + */ + aesni_ccm_fn_t decrypt; + + /** + * salt to add to nonce + */ + u_char salt[SALT_SIZE]; +}; + +/** + * First block with control information + */ +typedef struct __attribute__((packed)) { + BITFIELD4(u_int8_t, + /* size of p length field q, as q-1 */ + q_len: 3, + /* size of our ICV t, as (t-2)/2 */ + t_len: 3, + /* do we have associated data */ + assoc: 1, + reserved: 1, + ) flags; + /* nonce value */ + struct __attribute__((packed)) { + u_char salt[SALT_SIZE]; + u_char iv[IV_SIZE]; + } nonce; + /* length of plain text, q */ + u_char q[Q_SIZE]; +} b0_t; + +/** + * Counter block + */ +typedef struct __attribute__((packed)) { + BITFIELD3(u_int8_t, + /* size of p length field q, as q-1 */ + q_len: 3, + zero: 3, + reserved: 2, + ) flags; + /* nonce value */ + struct __attribute__((packed)) { + u_char salt[SALT_SIZE]; + u_char iv[IV_SIZE]; + } nonce; + /* counter value */ + u_char i[Q_SIZE]; +} ctr_t; + +/** + * Build the first block B0 + */ +static void build_b0(private_aesni_ccm_t *this, size_t len, size_t alen, + u_char *iv, void *out) +{ + b0_t *block = out; + + block->flags.reserved = 0; + block->flags.assoc = alen ? 1 : 0; + block->flags.t_len = (this->icv_size - 2) / 2; + block->flags.q_len = Q_SIZE - 1; + memcpy(block->nonce.salt, this->salt, SALT_SIZE); + memcpy(block->nonce.iv, iv, IV_SIZE); + htoun32(block->q, len); +} + +/** + * Build a counter block for counter i + */ +static void build_ctr(private_aesni_ccm_t *this, u_int32_t i, u_char *iv, + void *out) +{ + ctr_t *ctr = out; + + ctr->flags.reserved = 0; + ctr->flags.zero = 0; + ctr->flags.q_len = Q_SIZE - 1; + memcpy(ctr->nonce.salt, this->salt, SALT_SIZE); + memcpy(ctr->nonce.iv, iv, IV_SIZE); + htoun32(ctr->i, i); +} + +/** + * Calculate the ICV for the b0 and associated data + */ +static __m128i icv_header(private_aesni_ccm_t *this, size_t len, u_char *iv, + u_int16_t alen, u_char *assoc) +{ + __m128i *ks, b, t, c; + u_int i, round, blocks, rem; + + ks = this->key->schedule; + build_b0(this, len, alen, iv, &b); + c = _mm_loadu_si128(&b); + c = _mm_xor_si128(c, ks[0]); + for (round = 1; round < this->key->rounds; round++) + { + c = _mm_aesenc_si128(c, ks[round]); + } + c = _mm_aesenclast_si128(c, ks[this->key->rounds]); + + if (alen) + { + blocks = (alen + sizeof(alen)) / AES_BLOCK_SIZE; + rem = (alen + sizeof(alen)) % AES_BLOCK_SIZE; + if (rem) + { + blocks++; + } + for (i = 0; i < blocks; i++) + { + if (i == 0) + { /* first block */ + memset(&b, 0, sizeof(b)); + htoun16(&b, alen); + memcpy(((u_char*)&b) + sizeof(alen), assoc, + min(alen, sizeof(b) - sizeof(alen))); + t = _mm_loadu_si128(&b); + } + else if (i == blocks - 1 && rem) + { /* last block with padding */ + memset(&b, 0, sizeof(b)); + memcpy(&b, ((__m128i*)(assoc - sizeof(alen))) + i, rem); + t = _mm_loadu_si128(&b); + } + else + { /* full block */ + t = _mm_loadu_si128(((__m128i*)(assoc - sizeof(alen))) + i); + } + c = _mm_xor_si128(t, c); + c = _mm_xor_si128(c, ks[0]); + for (round = 1; round < this->key->rounds; round++) + { + c = _mm_aesenc_si128(c, ks[round]); + } + c = _mm_aesenclast_si128(c, ks[this->key->rounds]); + } + } + return c; +} + +/** + * En-/Decrypt the ICV, trim and store it + */ +static void crypt_icv(private_aesni_ccm_t *this, u_char *iv, + __m128i c, u_char *icv) +{ + __m128i *ks, b, t; + u_int round; + + ks = this->key->schedule; + build_ctr(this, 0, iv, &b); + + t = _mm_loadu_si128(&b); + t = _mm_xor_si128(t, ks[0]); + for (round = 1; round < this->key->rounds; round++) + { + t = _mm_aesenc_si128(t, ks[round]); + } + t = _mm_aesenclast_si128(t, ks[this->key->rounds]); + + t = _mm_xor_si128(t, c); + + _mm_storeu_si128(&b, t); + memcpy(icv, &b, this->icv_size); +} + +/** + * Do big-endian increment on x + */ +static inline __m128i increment_be(__m128i x) +{ + __m128i swap; + + swap = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + x = _mm_shuffle_epi8(x, swap); + x = _mm_add_epi64(x, _mm_set_epi32(0, 0, 0, 1)); + x = _mm_shuffle_epi8(x, swap); + + return x; +} + +/** + * Encrypt a remaining incomplete block + */ +static __m128i encrypt_ccm_rem(aesni_key_t *key, u_int rem, __m128i state, + void *in, void *out, __m128i c) +{ + __m128i *ks, t, b, d; + u_int round; + + ks = key->schedule; + memset(&b, 0, sizeof(b)); + memcpy(&b, in, rem); + d = _mm_loadu_si128(&b); + + c = _mm_xor_si128(d, c); + c = _mm_xor_si128(c, ks[0]); + t = _mm_xor_si128(state, ks[0]); + for (round = 1; round < key->rounds; round++) + { + c = _mm_aesenc_si128(c, ks[round]); + t = _mm_aesenc_si128(t, ks[round]); + } + c = _mm_aesenclast_si128(c, ks[key->rounds]); + t = _mm_aesenclast_si128(t, ks[key->rounds]); + + t = _mm_xor_si128(t, d); + _mm_storeu_si128(&b, t); + + memcpy(out, &b, rem); + + return c; +} + +/** + * Decrypt a remaining incomplete block + */ +static __m128i decrypt_ccm_rem(aesni_key_t *key, u_int rem, __m128i state, + void *in, void *out, __m128i c) +{ + __m128i *ks, t, b, d; + u_int round; + + ks = key->schedule; + memset(&b, 0, sizeof(b)); + memcpy(&b, in, rem); + d = _mm_loadu_si128(&b); + + t = _mm_xor_si128(state, ks[0]); + for (round = 1; round < key->rounds; round++) + { + t = _mm_aesenc_si128(t, ks[round]); + } + t = _mm_aesenclast_si128(t, ks[key->rounds]); + t = _mm_xor_si128(t, d); + _mm_storeu_si128(&b, t); + + memset((u_char*)&b + rem, 0, sizeof(b) - rem); + t = _mm_loadu_si128(&b); + c = _mm_xor_si128(t, c); + c = _mm_xor_si128(c, ks[0]); + for (round = 1; round < key->rounds; round++) + { + c = _mm_aesenc_si128(c, ks[round]); + } + c = _mm_aesenclast_si128(c, ks[key->rounds]); + + memcpy(out, &b, rem); + + return c; +} + +/** + * AES-128 CCM encryption/ICV generation + */ +static void encrypt_ccm128(private_aesni_ccm_t *this, + size_t len, u_char *in, u_char *out, u_char *iv, + size_t alen, u_char *assoc, u_char *icv) +{ + __m128i *ks, d, t, c, b, state, *bi, *bo; + u_int blocks, rem, i; + + c = icv_header(this, len, iv, alen, assoc); + build_ctr(this, 1, iv, &b); + state = _mm_load_si128(&b); + blocks = len / AES_BLOCK_SIZE; + rem = len % AES_BLOCK_SIZE; + bi = (__m128i*)in; + bo = (__m128i*)out; + + ks = this->key->schedule; + + for (i = 0; i < blocks; i++) + { + d = _mm_loadu_si128(bi + i); + + c = _mm_xor_si128(d, c); + c = _mm_xor_si128(c, ks[0]); + t = _mm_xor_si128(state, ks[0]); + + c = _mm_aesenc_si128(c, ks[1]); + t = _mm_aesenc_si128(t, ks[1]); + c = _mm_aesenc_si128(c, ks[2]); + t = _mm_aesenc_si128(t, ks[2]); + c = _mm_aesenc_si128(c, ks[3]); + t = _mm_aesenc_si128(t, ks[3]); + c = _mm_aesenc_si128(c, ks[4]); + t = _mm_aesenc_si128(t, ks[4]); + c = _mm_aesenc_si128(c, ks[5]); + t = _mm_aesenc_si128(t, ks[5]); + c = _mm_aesenc_si128(c, ks[6]); + t = _mm_aesenc_si128(t, ks[6]); + c = _mm_aesenc_si128(c, ks[7]); + t = _mm_aesenc_si128(t, ks[7]); + c = _mm_aesenc_si128(c, ks[8]); + t = _mm_aesenc_si128(t, ks[8]); + c = _mm_aesenc_si128(c, ks[9]); + t = _mm_aesenc_si128(t, ks[9]); + + c = _mm_aesenclast_si128(c, ks[10]); + t = _mm_aesenclast_si128(t, ks[10]); + + t = _mm_xor_si128(t, d); + _mm_storeu_si128(bo + i, t); + + state = increment_be(state); + } + + if (rem) + { + c = encrypt_ccm_rem(this->key, rem, state, bi + blocks, bo + blocks, c); + } + crypt_icv(this, iv, c, icv); +} + +/** + * AES-128 CCM decryption/ICV generation + */ +static void decrypt_ccm128(private_aesni_ccm_t *this, + size_t len, u_char *in, u_char *out, u_char *iv, + size_t alen, u_char *assoc, u_char *icv) +{ + __m128i *ks, d, t, c, b, state, *bi, *bo; + u_int blocks, rem, i; + + c = icv_header(this, len, iv, alen, assoc); + build_ctr(this, 1, iv, &b); + state = _mm_load_si128(&b); + blocks = len / AES_BLOCK_SIZE; + rem = len % AES_BLOCK_SIZE; + bi = (__m128i*)in; + bo = (__m128i*)out; + + ks = this->key->schedule; + + for (i = 0; i < blocks; i++) + { + d = _mm_loadu_si128(bi + i); + + t = _mm_xor_si128(state, ks[0]); + + t = _mm_aesenc_si128(t, ks[1]); + t = _mm_aesenc_si128(t, ks[2]); + t = _mm_aesenc_si128(t, ks[3]); + t = _mm_aesenc_si128(t, ks[4]); + t = _mm_aesenc_si128(t, ks[5]); + t = _mm_aesenc_si128(t, ks[6]); + t = _mm_aesenc_si128(t, ks[7]); + t = _mm_aesenc_si128(t, ks[8]); + t = _mm_aesenc_si128(t, ks[9]); + + t = _mm_aesenclast_si128(t, ks[10]); + t = _mm_xor_si128(t, d); + _mm_storeu_si128(bo + i, t); + + c = _mm_xor_si128(t, c); + c = _mm_xor_si128(c, ks[0]); + + c = _mm_aesenc_si128(c, ks[1]); + c = _mm_aesenc_si128(c, ks[2]); + c = _mm_aesenc_si128(c, ks[3]); + c = _mm_aesenc_si128(c, ks[4]); + c = _mm_aesenc_si128(c, ks[5]); + c = _mm_aesenc_si128(c, ks[6]); + c = _mm_aesenc_si128(c, ks[7]); + c = _mm_aesenc_si128(c, ks[8]); + c = _mm_aesenc_si128(c, ks[9]); + + c = _mm_aesenclast_si128(c, ks[10]); + + state = increment_be(state); + } + + if (rem) + { + c = decrypt_ccm_rem(this->key, rem, state, bi + blocks, bo + blocks, c); + } + crypt_icv(this, iv, c, icv); +} + +/** + * AES-192 CCM encryption/ICV generation + */ +static void encrypt_ccm192(private_aesni_ccm_t *this, + size_t len, u_char *in, u_char *out, u_char *iv, + size_t alen, u_char *assoc, u_char *icv) +{ + __m128i *ks, d, t, c, b, state, *bi, *bo; + u_int blocks, rem, i; + + c = icv_header(this, len, iv, alen, assoc); + build_ctr(this, 1, iv, &b); + state = _mm_load_si128(&b); + blocks = len / AES_BLOCK_SIZE; + rem = len % AES_BLOCK_SIZE; + bi = (__m128i*)in; + bo = (__m128i*)out; + + ks = this->key->schedule; + + for (i = 0; i < blocks; i++) + { + d = _mm_loadu_si128(bi + i); + + c = _mm_xor_si128(d, c); + c = _mm_xor_si128(c, ks[0]); + t = _mm_xor_si128(state, ks[0]); + + c = _mm_aesenc_si128(c, ks[1]); + t = _mm_aesenc_si128(t, ks[1]); + c = _mm_aesenc_si128(c, ks[2]); + t = _mm_aesenc_si128(t, ks[2]); + c = _mm_aesenc_si128(c, ks[3]); + t = _mm_aesenc_si128(t, ks[3]); + c = _mm_aesenc_si128(c, ks[4]); + t = _mm_aesenc_si128(t, ks[4]); + c = _mm_aesenc_si128(c, ks[5]); + t = _mm_aesenc_si128(t, ks[5]); + c = _mm_aesenc_si128(c, ks[6]); + t = _mm_aesenc_si128(t, ks[6]); + c = _mm_aesenc_si128(c, ks[7]); + t = _mm_aesenc_si128(t, ks[7]); + c = _mm_aesenc_si128(c, ks[8]); + t = _mm_aesenc_si128(t, ks[8]); + c = _mm_aesenc_si128(c, ks[9]); + t = _mm_aesenc_si128(t, ks[9]); + c = _mm_aesenc_si128(c, ks[10]); + t = _mm_aesenc_si128(t, ks[10]); + c = _mm_aesenc_si128(c, ks[11]); + t = _mm_aesenc_si128(t, ks[11]); + + c = _mm_aesenclast_si128(c, ks[12]); + t = _mm_aesenclast_si128(t, ks[12]); + + t = _mm_xor_si128(t, d); + _mm_storeu_si128(bo + i, t); + + state = increment_be(state); + } + + if (rem) + { + c = encrypt_ccm_rem(this->key, rem, state, bi + blocks, bo + blocks, c); + } + crypt_icv(this, iv, c, icv); +} + +/** + * AES-192 CCM decryption/ICV generation + */ +static void decrypt_ccm192(private_aesni_ccm_t *this, + size_t len, u_char *in, u_char *out, u_char *iv, + size_t alen, u_char *assoc, u_char *icv) +{ + __m128i *ks, d, t, c, b, state, *bi, *bo; + u_int blocks, rem, i; + + c = icv_header(this, len, iv, alen, assoc); + build_ctr(this, 1, iv, &b); + state = _mm_load_si128(&b); + blocks = len / AES_BLOCK_SIZE; + rem = len % AES_BLOCK_SIZE; + bi = (__m128i*)in; + bo = (__m128i*)out; + + ks = this->key->schedule; + + for (i = 0; i < blocks; i++) + { + d = _mm_loadu_si128(bi + i); + + t = _mm_xor_si128(state, ks[0]); + + t = _mm_aesenc_si128(t, ks[1]); + t = _mm_aesenc_si128(t, ks[2]); + t = _mm_aesenc_si128(t, ks[3]); + t = _mm_aesenc_si128(t, ks[4]); + t = _mm_aesenc_si128(t, ks[5]); + t = _mm_aesenc_si128(t, ks[6]); + t = _mm_aesenc_si128(t, ks[7]); + t = _mm_aesenc_si128(t, ks[8]); + t = _mm_aesenc_si128(t, ks[9]); + t = _mm_aesenc_si128(t, ks[10]); + t = _mm_aesenc_si128(t, ks[11]); + + t = _mm_aesenclast_si128(t, ks[12]); + t = _mm_xor_si128(t, d); + _mm_storeu_si128(bo + i, t); + + c = _mm_xor_si128(t, c); + c = _mm_xor_si128(c, ks[0]); + + c = _mm_aesenc_si128(c, ks[1]); + c = _mm_aesenc_si128(c, ks[2]); + c = _mm_aesenc_si128(c, ks[3]); + c = _mm_aesenc_si128(c, ks[4]); + c = _mm_aesenc_si128(c, ks[5]); + c = _mm_aesenc_si128(c, ks[6]); + c = _mm_aesenc_si128(c, ks[7]); + c = _mm_aesenc_si128(c, ks[8]); + c = _mm_aesenc_si128(c, ks[9]); + c = _mm_aesenc_si128(c, ks[10]); + c = _mm_aesenc_si128(c, ks[11]); + + c = _mm_aesenclast_si128(c, ks[12]); + + state = increment_be(state); + } + + if (rem) + { + c = decrypt_ccm_rem(this->key, rem, state, bi + blocks, bo + blocks, c); + } + crypt_icv(this, iv, c, icv); +} + +/** + * AES-256 CCM encryption/ICV generation + */ +static void encrypt_ccm256(private_aesni_ccm_t *this, + size_t len, u_char *in, u_char *out, u_char *iv, + size_t alen, u_char *assoc, u_char *icv) +{ + __m128i *ks, d, t, c, b, state, *bi, *bo; + u_int blocks, rem, i; + + c = icv_header(this, len, iv, alen, assoc); + build_ctr(this, 1, iv, &b); + state = _mm_load_si128(&b); + blocks = len / AES_BLOCK_SIZE; + rem = len % AES_BLOCK_SIZE; + bi = (__m128i*)in; + bo = (__m128i*)out; + + ks = this->key->schedule; + + for (i = 0; i < blocks; i++) + { + d = _mm_loadu_si128(bi + i); + + c = _mm_xor_si128(d, c); + c = _mm_xor_si128(c, ks[0]); + t = _mm_xor_si128(state, ks[0]); + + c = _mm_aesenc_si128(c, ks[1]); + t = _mm_aesenc_si128(t, ks[1]); + c = _mm_aesenc_si128(c, ks[2]); + t = _mm_aesenc_si128(t, ks[2]); + c = _mm_aesenc_si128(c, ks[3]); + t = _mm_aesenc_si128(t, ks[3]); + c = _mm_aesenc_si128(c, ks[4]); + t = _mm_aesenc_si128(t, ks[4]); + c = _mm_aesenc_si128(c, ks[5]); + t = _mm_aesenc_si128(t, ks[5]); + c = _mm_aesenc_si128(c, ks[6]); + t = _mm_aesenc_si128(t, ks[6]); + c = _mm_aesenc_si128(c, ks[7]); + t = _mm_aesenc_si128(t, ks[7]); + c = _mm_aesenc_si128(c, ks[8]); + t = _mm_aesenc_si128(t, ks[8]); + c = _mm_aesenc_si128(c, ks[9]); + t = _mm_aesenc_si128(t, ks[9]); + c = _mm_aesenc_si128(c, ks[10]); + t = _mm_aesenc_si128(t, ks[10]); + c = _mm_aesenc_si128(c, ks[11]); + t = _mm_aesenc_si128(t, ks[11]); + c = _mm_aesenc_si128(c, ks[12]); + t = _mm_aesenc_si128(t, ks[12]); + c = _mm_aesenc_si128(c, ks[13]); + t = _mm_aesenc_si128(t, ks[13]); + + c = _mm_aesenclast_si128(c, ks[14]); + t = _mm_aesenclast_si128(t, ks[14]); + + t = _mm_xor_si128(t, d); + _mm_storeu_si128(bo + i, t); + + state = increment_be(state); + } + + if (rem) + { + c = encrypt_ccm_rem(this->key, rem, state, bi + blocks, bo + blocks, c); + } + crypt_icv(this, iv, c, icv); +} + +/** + * AES-256 CCM decryption/ICV generation + */ +static void decrypt_ccm256(private_aesni_ccm_t *this, + size_t len, u_char *in, u_char *out, u_char *iv, + size_t alen, u_char *assoc, u_char *icv) +{ + __m128i *ks, d, t, c, b, state, *bi, *bo; + u_int blocks, rem, i; + + c = icv_header(this, len, iv, alen, assoc); + build_ctr(this, 1, iv, &b); + state = _mm_load_si128(&b); + blocks = len / AES_BLOCK_SIZE; + rem = len % AES_BLOCK_SIZE; + bi = (__m128i*)in; + bo = (__m128i*)out; + + ks = this->key->schedule; + + for (i = 0; i < blocks; i++) + { + d = _mm_loadu_si128(bi + i); + + t = _mm_xor_si128(state, ks[0]); + + t = _mm_aesenc_si128(t, ks[1]); + t = _mm_aesenc_si128(t, ks[2]); + t = _mm_aesenc_si128(t, ks[3]); + t = _mm_aesenc_si128(t, ks[4]); + t = _mm_aesenc_si128(t, ks[5]); + t = _mm_aesenc_si128(t, ks[6]); + t = _mm_aesenc_si128(t, ks[7]); + t = _mm_aesenc_si128(t, ks[8]); + t = _mm_aesenc_si128(t, ks[9]); + t = _mm_aesenc_si128(t, ks[10]); + t = _mm_aesenc_si128(t, ks[11]); + t = _mm_aesenc_si128(t, ks[12]); + t = _mm_aesenc_si128(t, ks[13]); + + t = _mm_aesenclast_si128(t, ks[14]); + t = _mm_xor_si128(t, d); + _mm_storeu_si128(bo + i, t); + + c = _mm_xor_si128(t, c); + c = _mm_xor_si128(c, ks[0]); + + c = _mm_aesenc_si128(c, ks[1]); + c = _mm_aesenc_si128(c, ks[2]); + c = _mm_aesenc_si128(c, ks[3]); + c = _mm_aesenc_si128(c, ks[4]); + c = _mm_aesenc_si128(c, ks[5]); + c = _mm_aesenc_si128(c, ks[6]); + c = _mm_aesenc_si128(c, ks[7]); + c = _mm_aesenc_si128(c, ks[8]); + c = _mm_aesenc_si128(c, ks[9]); + c = _mm_aesenc_si128(c, ks[10]); + c = _mm_aesenc_si128(c, ks[11]); + c = _mm_aesenc_si128(c, ks[12]); + c = _mm_aesenc_si128(c, ks[13]); + + c = _mm_aesenclast_si128(c, ks[14]); + + state = increment_be(state); + } + + if (rem) + { + c = decrypt_ccm_rem(this->key, rem, state, bi + blocks, bo + blocks, c); + } + crypt_icv(this, iv, c, icv); +} + +METHOD(aead_t, encrypt, bool, + private_aesni_ccm_t *this, chunk_t plain, chunk_t assoc, chunk_t iv, + chunk_t *encr) +{ + u_char *out; + + if (!this->key || iv.len != IV_SIZE) + { + return FALSE; + } + out = plain.ptr; + if (encr) + { + *encr = chunk_alloc(plain.len + this->icv_size); + out = encr->ptr; + } + this->encrypt(this, plain.len, plain.ptr, out, iv.ptr, + assoc.len, assoc.ptr, out + plain.len); + return TRUE; +} + +METHOD(aead_t, decrypt, bool, + private_aesni_ccm_t *this, chunk_t encr, chunk_t assoc, chunk_t iv, + chunk_t *plain) +{ + u_char *out, icv[this->icv_size]; + + if (!this->key || iv.len != IV_SIZE || encr.len < this->icv_size) + { + return FALSE; + } + encr.len -= this->icv_size; + out = encr.ptr; + if (plain) + { + *plain = chunk_alloc(encr.len); + out = plain->ptr; + } + + this->decrypt(this, encr.len, encr.ptr, out, iv.ptr, + assoc.len, assoc.ptr, icv); + return memeq_const(icv, encr.ptr + encr.len, this->icv_size); +} + +METHOD(aead_t, get_block_size, size_t, + private_aesni_ccm_t *this) +{ + return 1; +} + +METHOD(aead_t, get_icv_size, size_t, + private_aesni_ccm_t *this) +{ + return this->icv_size; +} + +METHOD(aead_t, get_iv_size, size_t, + private_aesni_ccm_t *this) +{ + return IV_SIZE; +} + +METHOD(aead_t, get_iv_gen, iv_gen_t*, + private_aesni_ccm_t *this) +{ + return this->iv_gen; +} + +METHOD(aead_t, get_key_size, size_t, + private_aesni_ccm_t *this) +{ + return this->key_size + SALT_SIZE; +} + +METHOD(aead_t, set_key, bool, + private_aesni_ccm_t *this, chunk_t key) +{ + if (key.len != this->key_size + SALT_SIZE) + { + return FALSE; + } + + memcpy(this->salt, key.ptr + key.len - SALT_SIZE, SALT_SIZE); + key.len -= SALT_SIZE; + + DESTROY_IF(this->key); + this->key = aesni_key_create(TRUE, key); + return TRUE; +} + +METHOD(aead_t, destroy, void, + private_aesni_ccm_t *this) +{ + DESTROY_IF(this->key); + this->iv_gen->destroy(this->iv_gen); + free_align(this); +} + +/** + * See header + */ +aesni_ccm_t *aesni_ccm_create(encryption_algorithm_t algo, + size_t key_size, size_t salt_size) +{ + private_aesni_ccm_t *this; + size_t icv_size; + + switch (key_size) + { + case 0: + key_size = 16; + break; + case 16: + case 24: + case 32: + break; + default: + return NULL; + } + if (salt_size && salt_size != SALT_SIZE) + { + /* currently not supported */ + return NULL; + } + switch (algo) + { + case ENCR_AES_CCM_ICV8: + algo = ENCR_AES_CBC; + icv_size = 8; + break; + case ENCR_AES_CCM_ICV12: + algo = ENCR_AES_CBC; + icv_size = 12; + break; + case ENCR_AES_CCM_ICV16: + algo = ENCR_AES_CBC; + icv_size = 16; + break; + default: + return NULL; + } + + INIT_ALIGN(this, sizeof(__m128i), + .public = { + .aead = { + .encrypt = _encrypt, + .decrypt = _decrypt, + .get_block_size = _get_block_size, + .get_icv_size = _get_icv_size, + .get_iv_size = _get_iv_size, + .get_iv_gen = _get_iv_gen, + .get_key_size = _get_key_size, + .set_key = _set_key, + .destroy = _destroy, + }, + }, + .key_size = key_size, + .iv_gen = iv_gen_seq_create(), + .icv_size = icv_size, + ); + + switch (key_size) + { + case 16: + this->encrypt = encrypt_ccm128; + this->decrypt = decrypt_ccm128; + break; + case 24: + this->encrypt = encrypt_ccm192; + this->decrypt = decrypt_ccm192; + break; + case 32: + this->encrypt = encrypt_ccm256; + this->decrypt = decrypt_ccm256; + break; + } + + return &this->public; +} diff --git a/src/libstrongswan/plugins/aesni/aesni_ccm.h b/src/libstrongswan/plugins/aesni/aesni_ccm.h new file mode 100644 index 000000000..69612b515 --- /dev/null +++ b/src/libstrongswan/plugins/aesni/aesni_ccm.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2015 Martin Willi + * Copyright (C) 2015 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +/** + * @defgroup aesni_ccm aesni_ccm + * @{ @ingroup aesni + */ + +#ifndef AESNI_CCM_H_ +#define AESNI_CCM_H_ + +#include <library.h> + +typedef struct aesni_ccm_t aesni_ccm_t; + +/** + * CCM mode AEAD using AES-NI + */ +struct aesni_ccm_t { + + /** + * Implements aead_t interface + */ + aead_t aead; +}; + +/** + * Create a aesni_ccm instance. + * + * @param algo encryption algorithm, ENCR_AES_CCM* + * @param key_size AES key size, in bytes + * @param salt_size size of salt value + * @return AES-CCM AEAD, NULL if not supported + */ +aesni_ccm_t *aesni_ccm_create(encryption_algorithm_t algo, + size_t key_size, size_t salt_size); + +#endif /** AESNI_CCM_H_ @}*/ diff --git a/src/libstrongswan/plugins/aesni/aesni_cmac.c b/src/libstrongswan/plugins/aesni/aesni_cmac.c new file mode 100644 index 000000000..d6a87e6d7 --- /dev/null +++ b/src/libstrongswan/plugins/aesni/aesni_cmac.c @@ -0,0 +1,371 @@ +/* + * Copyright (C) 2012 Tobias Brunner + * Hochschule fuer Technik Rapperswil + * Copyright (C) 2015 Martin Willi + * Copyright (C) 2015 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include "aesni_cmac.h" +#include "aesni_key.h" + +#include <crypto/prfs/mac_prf.h> +#include <crypto/signers/mac_signer.h> + +typedef struct private_mac_t private_mac_t; + +/** + * Private data of a mac_t object. + */ +struct private_mac_t { + + /** + * Public interface. + */ + mac_t public; + + /** + * Key schedule for key K + */ + aesni_key_t *k; + + /** + * K1 + */ + __m128i k1; + + /** + * K2 + */ + __m128i k2; + + /** + * T + */ + __m128i t; + + /** + * remaining, unprocessed bytes in append mode + */ + u_char rem[AES_BLOCK_SIZE]; + + /** + * number of bytes in remaining + */ + int rem_size; +}; + +METHOD(mac_t, get_mac, bool, + private_mac_t *this, chunk_t data, u_int8_t *out) +{ + __m128i *ks, t, l, *bi; + u_int blocks, rem, i; + + if (!this->k) + { + return FALSE; + } + + ks = this->k->schedule; + t = this->t; + + if (this->rem_size + data.len > AES_BLOCK_SIZE) + { + /* T := 0x00000000000000000000000000000000 (initially) + * for each block M_i (except the last) + * X := T XOR M_i; + * T := AES-128(K, X); + */ + + /* append data to remaining bytes, process block M_1 */ + memcpy(this->rem + this->rem_size, data.ptr, + AES_BLOCK_SIZE - this->rem_size); + data = chunk_skip(data, AES_BLOCK_SIZE - this->rem_size); + + t = _mm_xor_si128(t, _mm_loadu_si128((__m128i*)this->rem)); + + t = _mm_xor_si128(t, ks[0]); + t = _mm_aesenc_si128(t, ks[1]); + t = _mm_aesenc_si128(t, ks[2]); + t = _mm_aesenc_si128(t, ks[3]); + t = _mm_aesenc_si128(t, ks[4]); + t = _mm_aesenc_si128(t, ks[5]); + t = _mm_aesenc_si128(t, ks[6]); + t = _mm_aesenc_si128(t, ks[7]); + t = _mm_aesenc_si128(t, ks[8]); + t = _mm_aesenc_si128(t, ks[9]); + t = _mm_aesenclast_si128(t, ks[10]); + + /* process blocks M_2 ... M_n-1 */ + bi = (__m128i*)data.ptr; + rem = data.len % AES_BLOCK_SIZE; + blocks = data.len / AES_BLOCK_SIZE; + if (!rem && blocks) + { /* don't do last block */ + rem = AES_BLOCK_SIZE; + blocks--; + } + + /* process blocks M[2] ... M[n-1] */ + for (i = 0; i < blocks; i++) + { + t = _mm_xor_si128(t, _mm_loadu_si128(bi + i)); + + t = _mm_xor_si128(t, ks[0]); + t = _mm_aesenc_si128(t, ks[1]); + t = _mm_aesenc_si128(t, ks[2]); + t = _mm_aesenc_si128(t, ks[3]); + t = _mm_aesenc_si128(t, ks[4]); + t = _mm_aesenc_si128(t, ks[5]); + t = _mm_aesenc_si128(t, ks[6]); + t = _mm_aesenc_si128(t, ks[7]); + t = _mm_aesenc_si128(t, ks[8]); + t = _mm_aesenc_si128(t, ks[9]); + t = _mm_aesenclast_si128(t, ks[10]); + } + + /* store remaining bytes of block M_n */ + memcpy(this->rem, data.ptr + data.len - rem, rem); + this->rem_size = rem; + } + else + { + /* no complete block (or last block), just copy into remaining */ + memcpy(this->rem + this->rem_size, data.ptr, data.len); + this->rem_size += data.len; + } + if (out) + { + /* if last block is complete + * M_last := M_n XOR K1; + * else + * M_last := padding(M_n) XOR K2; + */ + if (this->rem_size == AES_BLOCK_SIZE) + { + l = _mm_loadu_si128((__m128i*)this->rem); + l = _mm_xor_si128(l, this->k1); + } + else + { + /* padding(x) = x || 10^i where i is 128-8*r-1 + * That is, padding(x) is the concatenation of x and a single '1', + * followed by the minimum number of '0's, so that the total length is + * equal to 128 bits. + */ + if (this->rem_size < AES_BLOCK_SIZE) + { + memset(this->rem + this->rem_size, 0, + AES_BLOCK_SIZE - this->rem_size); + this->rem[this->rem_size] = 0x80; + } + l = _mm_loadu_si128((__m128i*)this->rem); + l = _mm_xor_si128(l, this->k2); + } + /* T := M_last XOR T; + * T := AES-128(K,T); + */ + t = _mm_xor_si128(l, t); + + t = _mm_xor_si128(t, ks[0]); + t = _mm_aesenc_si128(t, ks[1]); + t = _mm_aesenc_si128(t, ks[2]); + t = _mm_aesenc_si128(t, ks[3]); + t = _mm_aesenc_si128(t, ks[4]); + t = _mm_aesenc_si128(t, ks[5]); + t = _mm_aesenc_si128(t, ks[6]); + t = _mm_aesenc_si128(t, ks[7]); + t = _mm_aesenc_si128(t, ks[8]); + t = _mm_aesenc_si128(t, ks[9]); + t = _mm_aesenclast_si128(t, ks[10]); + + _mm_storeu_si128((__m128i*)out, t); + + /* reset state */ + t = _mm_setzero_si128(); + this->rem_size = 0; + } + this->t = t; + return TRUE; +} + +METHOD(mac_t, get_mac_size, size_t, + private_mac_t *this) +{ + return AES_BLOCK_SIZE; +} + +/** + * Left-shift the given chunk by one bit. + */ +static void bit_shift(chunk_t chunk) +{ + size_t i; + + for (i = 0; i < chunk.len; i++) + { + chunk.ptr[i] <<= 1; + if (i < chunk.len - 1 && chunk.ptr[i + 1] & 0x80) + { + chunk.ptr[i] |= 0x01; + } + } +} + +METHOD(mac_t, set_key, bool, + private_mac_t *this, chunk_t key) +{ + __m128i rb, msb, l, a; + u_int round; + chunk_t k; + + this->t = _mm_setzero_si128(); + this->rem_size = 0; + + /* we support variable keys as defined in RFC 4615 */ + if (key.len == AES_BLOCK_SIZE) + { + k = key; + } + else + { /* use cmac recursively to resize longer or shorter keys */ + k = chunk_alloca(AES_BLOCK_SIZE); + memset(k.ptr, 0, k.len); + if (!set_key(this, k) || !get_mac(this, key, k.ptr)) + { + return FALSE; + } + } + + DESTROY_IF(this->k); + this->k = aesni_key_create(TRUE, k); + if (!this->k) + { + return FALSE; + } + + /* + * Rb = 0x00000000000000000000000000000087 + * L = 0x00000000000000000000000000000000 encrypted with K + * if MSB(L) == 0 + * K1 = L << 1 + * else + * K1 = (L << 1) XOR Rb + * if MSB(K1) == 0 + * K2 = K1 << 1 + * else + * K2 = (K1 << 1) XOR Rb + */ + + rb = _mm_set_epi32(0x87000000, 0, 0, 0); + msb = _mm_set_epi32(0, 0, 0, 0x80); + + l = _mm_setzero_si128(); + + l = _mm_xor_si128(l, this->k->schedule[0]); + for (round = 1; round < this->k->rounds; round++) + { + l = _mm_aesenc_si128(l, this->k->schedule[round]); + } + l = _mm_aesenclast_si128(l, this->k->schedule[this->k->rounds]); + + this->k1 = l; + bit_shift(chunk_from_thing(this->k1)); + a = _mm_and_si128(l, msb); + if (memchr(&a, 0x80, 1)) + { + this->k1 = _mm_xor_si128(this->k1, rb); + } + this->k2 = this->k1; + bit_shift(chunk_from_thing(this->k2)); + a = _mm_and_si128(this->k1, msb); + if (memchr(&a, 0x80, 1)) + { + this->k2 = _mm_xor_si128(this->k2, rb); + } + + return TRUE; +} + +METHOD(mac_t, destroy, void, + private_mac_t *this) +{ + DESTROY_IF(this->k); + memwipe(&this->k1, sizeof(this->k1)); + memwipe(&this->k2, sizeof(this->k2)); + free_align(this); +} + +/* + * Described in header + */ +mac_t *aesni_cmac_create(encryption_algorithm_t algo, size_t key_size) +{ + private_mac_t *this; + + INIT_ALIGN(this, sizeof(__m128i), + .public = { + .get_mac = _get_mac, + .get_mac_size = _get_mac_size, + .set_key = _set_key, + .destroy = _destroy, + }, + ); + + return &this->public; +} + +/* + * Described in header. + */ +prf_t *aesni_cmac_prf_create(pseudo_random_function_t algo) +{ + mac_t *cmac; + + switch (algo) + { + case PRF_AES128_CMAC: + cmac = aesni_cmac_create(ENCR_AES_CBC, 16); + break; + default: + return NULL; + } + if (cmac) + { + return mac_prf_create(cmac); + } + return NULL; +} + +/* + * Described in header + */ +signer_t *aesni_cmac_signer_create(integrity_algorithm_t algo) +{ + size_t truncation; + mac_t *cmac; + + switch (algo) + { + case AUTH_AES_CMAC_96: + cmac = aesni_cmac_create(ENCR_AES_CBC, 16); + truncation = 12; + break; + default: + return NULL; + } + if (cmac) + { + return mac_signer_create(cmac, truncation); + } + return NULL; +} diff --git a/src/libstrongswan/plugins/aesni/aesni_cmac.h b/src/libstrongswan/plugins/aesni/aesni_cmac.h new file mode 100644 index 000000000..5f0af7393 --- /dev/null +++ b/src/libstrongswan/plugins/aesni/aesni_cmac.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2015 Martin Willi + * Copyright (C) 2015 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +/** + * @defgroup aesni_xcbc aesni_xcbc + * @{ @ingroup aesni + */ + +#ifndef CMAC_H_ +#define CMAC_H_ + +#include <crypto/mac.h> +#include <crypto/prfs/prf.h> +#include <crypto/signers/signer.h> + +/** + * Create a generic mac_t object using AESNI CMAC. + * + * @param algo underlying encryption algorithm + * @param key_size size of encryption key, in bytes + */ +mac_t *aesni_cmac_create(encryption_algorithm_t algo, size_t key_size); + +/** + * Creates a new prf_t object based AESNI CMAC. + * + * @param algo algorithm to implement + * @return prf_t object, NULL if not supported + */ +prf_t *aesni_cmac_prf_create(pseudo_random_function_t algo); + +/** + * Creates a new signer_t object based on AESNI CMAC. + * + * @param algo algorithm to implement + * @return signer_t, NULL if not supported + */ +signer_t *aesni_cmac_signer_create(integrity_algorithm_t algo); + +#endif /** CMAC_H_ @}*/ diff --git a/src/libstrongswan/plugins/aesni/aesni_ctr.c b/src/libstrongswan/plugins/aesni/aesni_ctr.c new file mode 100644 index 000000000..989813814 --- /dev/null +++ b/src/libstrongswan/plugins/aesni/aesni_ctr.c @@ -0,0 +1,643 @@ +/* + * Copyright (C) 2015 Martin Willi + * Copyright (C) 2015 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include "aesni_ctr.h" +#include "aesni_key.h" + +#include <tmmintrin.h> + +/** + * Pipeline parallelism we use for CTR en/decryption + */ +#define CTR_CRYPT_PARALLELISM 4 + +typedef struct private_aesni_ctr_t private_aesni_ctr_t; + +/** + * CTR en/decryption method type + */ +typedef void (*aesni_ctr_fn_t)(private_aesni_ctr_t*, size_t, u_char*, u_char*); + +/** + * Private data of an aesni_ctr_t object. + */ +struct private_aesni_ctr_t { + + /** + * Public aesni_ctr_t interface. + */ + aesni_ctr_t public; + + /** + * Key size + */ + u_int key_size; + + /** + * Key schedule + */ + aesni_key_t *key; + + /** + * Encryption method + */ + aesni_ctr_fn_t crypt; + + /** + * Counter state + */ + struct { + char nonce[4]; + char iv[8]; + u_int32_t counter; + } __attribute__((packed, aligned(sizeof(__m128i)))) state; +}; + +/** + * Do big-endian increment on x + */ +static inline __m128i increment_be(__m128i x) +{ + __m128i swap; + + swap = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + + x = _mm_shuffle_epi8(x, swap); + x = _mm_add_epi64(x, _mm_set_epi32(0, 0, 0, 1)); + x = _mm_shuffle_epi8(x, swap); + + return x; +} + +/** + * AES-128 CTR encryption + */ +static void encrypt_ctr128(private_aesni_ctr_t *this, + size_t len, u_char *in, u_char *out) +{ + __m128i t1, t2, t3, t4; + __m128i d1, d2, d3, d4; + __m128i *ks, state, b, *bi, *bo; + u_int i, blocks, pblocks, rem; + + state = _mm_load_si128((__m128i*)&this->state); + blocks = len / AES_BLOCK_SIZE; + pblocks = blocks - (blocks % CTR_CRYPT_PARALLELISM); + rem = len % AES_BLOCK_SIZE; + bi = (__m128i*)in; + bo = (__m128i*)out; + + ks = this->key->schedule; + + for (i = 0; i < pblocks; i += CTR_CRYPT_PARALLELISM) + { + d1 = _mm_loadu_si128(bi + i + 0); + d2 = _mm_loadu_si128(bi + i + 1); + d3 = _mm_loadu_si128(bi + i + 2); + d4 = _mm_loadu_si128(bi + i + 3); + + t1 = _mm_xor_si128(state, ks[0]); + state = increment_be(state); + t2 = _mm_xor_si128(state, ks[0]); + state = increment_be(state); + t3 = _mm_xor_si128(state, ks[0]); + state = increment_be(state); + t4 = _mm_xor_si128(state, ks[0]); + state = increment_be(state); + + t1 = _mm_aesenc_si128(t1, ks[1]); + t2 = _mm_aesenc_si128(t2, ks[1]); + t3 = _mm_aesenc_si128(t3, ks[1]); + t4 = _mm_aesenc_si128(t4, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t2 = _mm_aesenc_si128(t2, ks[2]); + t3 = _mm_aesenc_si128(t3, ks[2]); + t4 = _mm_aesenc_si128(t4, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t2 = _mm_aesenc_si128(t2, ks[3]); + t3 = _mm_aesenc_si128(t3, ks[3]); + t4 = _mm_aesenc_si128(t4, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t2 = _mm_aesenc_si128(t2, ks[4]); + t3 = _mm_aesenc_si128(t3, ks[4]); + t4 = _mm_aesenc_si128(t4, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t2 = _mm_aesenc_si128(t2, ks[5]); + t3 = _mm_aesenc_si128(t3, ks[5]); + t4 = _mm_aesenc_si128(t4, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t2 = _mm_aesenc_si128(t2, ks[6]); + t3 = _mm_aesenc_si128(t3, ks[6]); + t4 = _mm_aesenc_si128(t4, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t2 = _mm_aesenc_si128(t2, ks[7]); + t3 = _mm_aesenc_si128(t3, ks[7]); + t4 = _mm_aesenc_si128(t4, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t2 = _mm_aesenc_si128(t2, ks[8]); + t3 = _mm_aesenc_si128(t3, ks[8]); + t4 = _mm_aesenc_si128(t4, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t2 = _mm_aesenc_si128(t2, ks[9]); + t3 = _mm_aesenc_si128(t3, ks[9]); + t4 = _mm_aesenc_si128(t4, ks[9]); + + t1 = _mm_aesenclast_si128(t1, ks[10]); + t2 = _mm_aesenclast_si128(t2, ks[10]); + t3 = _mm_aesenclast_si128(t3, ks[10]); + t4 = _mm_aesenclast_si128(t4, ks[10]); + t1 = _mm_xor_si128(t1, d1); + t2 = _mm_xor_si128(t2, d2); + t3 = _mm_xor_si128(t3, d3); + t4 = _mm_xor_si128(t4, d4); + _mm_storeu_si128(bo + i + 0, t1); + _mm_storeu_si128(bo + i + 1, t2); + _mm_storeu_si128(bo + i + 2, t3); + _mm_storeu_si128(bo + i + 3, t4); + } + + for (i = pblocks; i < blocks; i++) + { + d1 = _mm_loadu_si128(bi + i); + + t1 = _mm_xor_si128(state, ks[0]); + state = increment_be(state); + + t1 = _mm_aesenc_si128(t1, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + + t1 = _mm_aesenclast_si128(t1, ks[10]); + t1 = _mm_xor_si128(t1, d1); + _mm_storeu_si128(bo + i, t1); + } + + if (rem) + { + memset(&b, 0, sizeof(b)); + memcpy(&b, bi + blocks, rem); + + d1 = _mm_loadu_si128(&b); + t1 = _mm_xor_si128(state, ks[0]); + + t1 = _mm_aesenc_si128(t1, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + + t1 = _mm_aesenclast_si128(t1, ks[10]); + t1 = _mm_xor_si128(t1, d1); + _mm_storeu_si128(&b, t1); + + memcpy(bo + blocks, &b, rem); + } +} + +/** + * AES-192 CTR encryption + */ +static void encrypt_ctr192(private_aesni_ctr_t *this, + size_t len, u_char *in, u_char *out) +{ + __m128i t1, t2, t3, t4; + __m128i d1, d2, d3, d4; + __m128i *ks, state, b, *bi, *bo; + u_int i, blocks, pblocks, rem; + + state = _mm_load_si128((__m128i*)&this->state); + blocks = len / AES_BLOCK_SIZE; + pblocks = blocks - (blocks % CTR_CRYPT_PARALLELISM); + rem = len % AES_BLOCK_SIZE; + bi = (__m128i*)in; + bo = (__m128i*)out; + + ks = this->key->schedule; + + for (i = 0; i < pblocks; i += CTR_CRYPT_PARALLELISM) + { + d1 = _mm_loadu_si128(bi + i + 0); + d2 = _mm_loadu_si128(bi + i + 1); + d3 = _mm_loadu_si128(bi + i + 2); + d4 = _mm_loadu_si128(bi + i + 3); + + t1 = _mm_xor_si128(state, ks[0]); + state = increment_be(state); + t2 = _mm_xor_si128(state, ks[0]); + state = increment_be(state); + t3 = _mm_xor_si128(state, ks[0]); + state = increment_be(state); + t4 = _mm_xor_si128(state, ks[0]); + state = increment_be(state); + + t1 = _mm_aesenc_si128(t1, ks[1]); + t2 = _mm_aesenc_si128(t2, ks[1]); + t3 = _mm_aesenc_si128(t3, ks[1]); + t4 = _mm_aesenc_si128(t4, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t2 = _mm_aesenc_si128(t2, ks[2]); + t3 = _mm_aesenc_si128(t3, ks[2]); + t4 = _mm_aesenc_si128(t4, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t2 = _mm_aesenc_si128(t2, ks[3]); + t3 = _mm_aesenc_si128(t3, ks[3]); + t4 = _mm_aesenc_si128(t4, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t2 = _mm_aesenc_si128(t2, ks[4]); + t3 = _mm_aesenc_si128(t3, ks[4]); + t4 = _mm_aesenc_si128(t4, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t2 = _mm_aesenc_si128(t2, ks[5]); + t3 = _mm_aesenc_si128(t3, ks[5]); + t4 = _mm_aesenc_si128(t4, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t2 = _mm_aesenc_si128(t2, ks[6]); + t3 = _mm_aesenc_si128(t3, ks[6]); + t4 = _mm_aesenc_si128(t4, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t2 = _mm_aesenc_si128(t2, ks[7]); + t3 = _mm_aesenc_si128(t3, ks[7]); + t4 = _mm_aesenc_si128(t4, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t2 = _mm_aesenc_si128(t2, ks[8]); + t3 = _mm_aesenc_si128(t3, ks[8]); + t4 = _mm_aesenc_si128(t4, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t2 = _mm_aesenc_si128(t2, ks[9]); + t3 = _mm_aesenc_si128(t3, ks[9]); + t4 = _mm_aesenc_si128(t4, ks[9]); + t1 = _mm_aesenc_si128(t1, ks[10]); + t2 = _mm_aesenc_si128(t2, ks[10]); + t3 = _mm_aesenc_si128(t3, ks[10]); + t4 = _mm_aesenc_si128(t4, ks[10]); + t1 = _mm_aesenc_si128(t1, ks[11]); + t2 = _mm_aesenc_si128(t2, ks[11]); + t3 = _mm_aesenc_si128(t3, ks[11]); + t4 = _mm_aesenc_si128(t4, ks[11]); + + t1 = _mm_aesenclast_si128(t1, ks[12]); + t2 = _mm_aesenclast_si128(t2, ks[12]); + t3 = _mm_aesenclast_si128(t3, ks[12]); + t4 = _mm_aesenclast_si128(t4, ks[12]); + t1 = _mm_xor_si128(t1, d1); + t2 = _mm_xor_si128(t2, d2); + t3 = _mm_xor_si128(t3, d3); + t4 = _mm_xor_si128(t4, d4); + _mm_storeu_si128(bo + i + 0, t1); + _mm_storeu_si128(bo + i + 1, t2); + _mm_storeu_si128(bo + i + 2, t3); + _mm_storeu_si128(bo + i + 3, t4); + } + + for (i = pblocks; i < blocks; i++) + { + d1 = _mm_loadu_si128(bi + i); + + t1 = _mm_xor_si128(state, ks[0]); + state = increment_be(state); + + t1 = _mm_aesenc_si128(t1, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t1 = _mm_aesenc_si128(t1, ks[10]); + t1 = _mm_aesenc_si128(t1, ks[11]); + + t1 = _mm_aesenclast_si128(t1, ks[12]); + t1 = _mm_xor_si128(t1, d1); + _mm_storeu_si128(bo + i, t1); + } + + if (rem) + { + memset(&b, 0, sizeof(b)); + memcpy(&b, bi + blocks, rem); + + d1 = _mm_loadu_si128(&b); + t1 = _mm_xor_si128(state, ks[0]); + + t1 = _mm_aesenc_si128(t1, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t1 = _mm_aesenc_si128(t1, ks[10]); + t1 = _mm_aesenc_si128(t1, ks[11]); + + t1 = _mm_aesenclast_si128(t1, ks[12]); + t1 = _mm_xor_si128(t1, d1); + _mm_storeu_si128(&b, t1); + + memcpy(bo + blocks, &b, rem); + } +} + +/** + * AES-256 CTR encryption + */ +static void encrypt_ctr256(private_aesni_ctr_t *this, + size_t len, u_char *in, u_char *out) +{ + __m128i t1, t2, t3, t4; + __m128i d1, d2, d3, d4; + __m128i *ks, state, b, *bi, *bo; + u_int i, blocks, pblocks, rem; + + state = _mm_load_si128((__m128i*)&this->state); + blocks = len / AES_BLOCK_SIZE; + pblocks = blocks - (blocks % CTR_CRYPT_PARALLELISM); + rem = len % AES_BLOCK_SIZE; + bi = (__m128i*)in; + bo = (__m128i*)out; + + ks = this->key->schedule; + + for (i = 0; i < pblocks; i += CTR_CRYPT_PARALLELISM) + { + d1 = _mm_loadu_si128(bi + i + 0); + d2 = _mm_loadu_si128(bi + i + 1); + d3 = _mm_loadu_si128(bi + i + 2); + d4 = _mm_loadu_si128(bi + i + 3); + + t1 = _mm_xor_si128(state, ks[0]); + state = increment_be(state); + t2 = _mm_xor_si128(state, ks[0]); + state = increment_be(state); + t3 = _mm_xor_si128(state, ks[0]); + state = increment_be(state); + t4 = _mm_xor_si128(state, ks[0]); + state = increment_be(state); + + t1 = _mm_aesenc_si128(t1, ks[1]); + t2 = _mm_aesenc_si128(t2, ks[1]); + t3 = _mm_aesenc_si128(t3, ks[1]); + t4 = _mm_aesenc_si128(t4, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t2 = _mm_aesenc_si128(t2, ks[2]); + t3 = _mm_aesenc_si128(t3, ks[2]); + t4 = _mm_aesenc_si128(t4, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t2 = _mm_aesenc_si128(t2, ks[3]); + t3 = _mm_aesenc_si128(t3, ks[3]); + t4 = _mm_aesenc_si128(t4, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t2 = _mm_aesenc_si128(t2, ks[4]); + t3 = _mm_aesenc_si128(t3, ks[4]); + t4 = _mm_aesenc_si128(t4, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t2 = _mm_aesenc_si128(t2, ks[5]); + t3 = _mm_aesenc_si128(t3, ks[5]); + t4 = _mm_aesenc_si128(t4, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t2 = _mm_aesenc_si128(t2, ks[6]); + t3 = _mm_aesenc_si128(t3, ks[6]); + t4 = _mm_aesenc_si128(t4, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t2 = _mm_aesenc_si128(t2, ks[7]); + t3 = _mm_aesenc_si128(t3, ks[7]); + t4 = _mm_aesenc_si128(t4, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t2 = _mm_aesenc_si128(t2, ks[8]); + t3 = _mm_aesenc_si128(t3, ks[8]); + t4 = _mm_aesenc_si128(t4, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t2 = _mm_aesenc_si128(t2, ks[9]); + t3 = _mm_aesenc_si128(t3, ks[9]); + t4 = _mm_aesenc_si128(t4, ks[9]); + t1 = _mm_aesenc_si128(t1, ks[10]); + t2 = _mm_aesenc_si128(t2, ks[10]); + t3 = _mm_aesenc_si128(t3, ks[10]); + t4 = _mm_aesenc_si128(t4, ks[10]); + t1 = _mm_aesenc_si128(t1, ks[11]); + t2 = _mm_aesenc_si128(t2, ks[11]); + t3 = _mm_aesenc_si128(t3, ks[11]); + t4 = _mm_aesenc_si128(t4, ks[11]); + t1 = _mm_aesenc_si128(t1, ks[12]); + t2 = _mm_aesenc_si128(t2, ks[12]); + t3 = _mm_aesenc_si128(t3, ks[12]); + t4 = _mm_aesenc_si128(t4, ks[12]); + t1 = _mm_aesenc_si128(t1, ks[13]); + t2 = _mm_aesenc_si128(t2, ks[13]); + t3 = _mm_aesenc_si128(t3, ks[13]); + t4 = _mm_aesenc_si128(t4, ks[13]); + + t1 = _mm_aesenclast_si128(t1, ks[14]); + t2 = _mm_aesenclast_si128(t2, ks[14]); + t3 = _mm_aesenclast_si128(t3, ks[14]); + t4 = _mm_aesenclast_si128(t4, ks[14]); + t1 = _mm_xor_si128(t1, d1); + t2 = _mm_xor_si128(t2, d2); + t3 = _mm_xor_si128(t3, d3); + t4 = _mm_xor_si128(t4, d4); + _mm_storeu_si128(bo + i + 0, t1); + _mm_storeu_si128(bo + i + 1, t2); + _mm_storeu_si128(bo + i + 2, t3); + _mm_storeu_si128(bo + i + 3, t4); + } + + for (i = pblocks; i < blocks; i++) + { + d1 = _mm_loadu_si128(bi + i); + + t1 = _mm_xor_si128(state, ks[0]); + state = increment_be(state); + + t1 = _mm_aesenc_si128(t1, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t1 = _mm_aesenc_si128(t1, ks[10]); + t1 = _mm_aesenc_si128(t1, ks[11]); + t1 = _mm_aesenc_si128(t1, ks[12]); + t1 = _mm_aesenc_si128(t1, ks[13]); + + t1 = _mm_aesenclast_si128(t1, ks[14]); + t1 = _mm_xor_si128(t1, d1); + _mm_storeu_si128(bo + i, t1); + } + + if (rem) + { + memset(&b, 0, sizeof(b)); + memcpy(&b, bi + blocks, rem); + + d1 = _mm_loadu_si128(&b); + t1 = _mm_xor_si128(state, ks[0]); + + t1 = _mm_aesenc_si128(t1, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t1 = _mm_aesenc_si128(t1, ks[10]); + t1 = _mm_aesenc_si128(t1, ks[11]); + t1 = _mm_aesenc_si128(t1, ks[12]); + t1 = _mm_aesenc_si128(t1, ks[13]); + + t1 = _mm_aesenclast_si128(t1, ks[14]); + t1 = _mm_xor_si128(t1, d1); + _mm_storeu_si128(&b, t1); + + memcpy(bo + blocks, &b, rem); + } +} + +METHOD(crypter_t, crypt, bool, + private_aesni_ctr_t *this, chunk_t in, chunk_t iv, chunk_t *out) +{ + u_char *buf; + + if (!this->key || iv.len != sizeof(this->state.iv)) + { + return FALSE; + } + memcpy(this->state.iv, iv.ptr, sizeof(this->state.iv)); + this->state.counter = htonl(1); + + buf = in.ptr; + if (out) + { + *out = chunk_alloc(in.len); + buf = out->ptr; + } + this->crypt(this, in.len, in.ptr, buf); + return TRUE; +} + +METHOD(crypter_t, get_block_size, size_t, + private_aesni_ctr_t *this) +{ + return 1; +} + +METHOD(crypter_t, get_iv_size, size_t, + private_aesni_ctr_t *this) +{ + return sizeof(this->state.iv); +} + +METHOD(crypter_t, get_key_size, size_t, + private_aesni_ctr_t *this) +{ + return this->key_size + sizeof(this->state.nonce); +} + +METHOD(crypter_t, set_key, bool, + private_aesni_ctr_t *this, chunk_t key) +{ + if (key.len != get_key_size(this)) + { + return FALSE; + } + + memcpy(this->state.nonce, key.ptr + key.len - sizeof(this->state.nonce), + sizeof(this->state.nonce)); + key.len -= sizeof(this->state.nonce); + + DESTROY_IF(this->key); + this->key = aesni_key_create(TRUE, key); + + return this->key; +} + +METHOD(crypter_t, destroy, void, + private_aesni_ctr_t *this) +{ + DESTROY_IF(this->key); + free_align(this); +} + +/** + * See header + */ +aesni_ctr_t *aesni_ctr_create(encryption_algorithm_t algo, size_t key_size) +{ + private_aesni_ctr_t *this; + + if (algo != ENCR_AES_CTR) + { + return NULL; + } + switch (key_size) + { + case 0: + key_size = 16; + break; + case 16: + case 24: + case 32: + break; + default: + return NULL; + } + + INIT_ALIGN(this, sizeof(__m128i), + .public = { + .crypter = { + .encrypt = _crypt, + .decrypt = _crypt, + .get_block_size = _get_block_size, + .get_iv_size = _get_iv_size, + .get_key_size = _get_key_size, + .set_key = _set_key, + .destroy = _destroy, + }, + }, + .key_size = key_size, + ); + + switch (key_size) + { + case 16: + this->crypt = encrypt_ctr128; + break; + case 24: + this->crypt = encrypt_ctr192; + break; + case 32: + this->crypt = encrypt_ctr256; + break; + } + + return &this->public; +} diff --git a/src/libstrongswan/plugins/aesni/aesni_ctr.h b/src/libstrongswan/plugins/aesni/aesni_ctr.h new file mode 100644 index 000000000..6126a2c75 --- /dev/null +++ b/src/libstrongswan/plugins/aesni/aesni_ctr.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2015 Martin Willi + * Copyright (C) 2015 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +/** + * @defgroup aesni_ctr aesni_ctr + * @{ @ingroup aesni + */ + +#ifndef AESNI_CTR_H_ +#define AESNI_CTR_H_ + +#include <library.h> + +typedef struct aesni_ctr_t aesni_ctr_t; + +/** + * CTR mode crypter using AES-NI + */ +struct aesni_ctr_t { + + /** + * Implements crypter interface + */ + crypter_t crypter; +}; + +/** + * Create a aesni_ctr instance. + * + * @param algo encryption algorithm, AES_ENCR_CTR + * @param key_size AES key size, in bytes + * @return AES-CTR crypter, NULL if not supported + */ +aesni_ctr_t *aesni_ctr_create(encryption_algorithm_t algo, size_t key_size); + +#endif /** AESNI_CTR_H_ @}*/ diff --git a/src/libstrongswan/plugins/aesni/aesni_gcm.c b/src/libstrongswan/plugins/aesni/aesni_gcm.c new file mode 100644 index 000000000..53c0b144e --- /dev/null +++ b/src/libstrongswan/plugins/aesni/aesni_gcm.c @@ -0,0 +1,1447 @@ +/* + * Copyright (C) 2015 Martin Willi + * Copyright (C) 2015 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include "aesni_gcm.h" +#include "aesni_key.h" + +#include <crypto/iv/iv_gen_seq.h> + +#include <tmmintrin.h> + +#define NONCE_SIZE 12 +#define IV_SIZE 8 +#define SALT_SIZE (NONCE_SIZE - IV_SIZE) + +/** + * Parallel pipelining + */ +#define GCM_CRYPT_PARALLELISM 4 + +typedef struct private_aesni_gcm_t private_aesni_gcm_t; + +/** + * GCM en/decryption method type + */ +typedef void (*aesni_gcm_fn_t)(private_aesni_gcm_t*, size_t, u_char*, u_char*, + u_char*, size_t, u_char*, u_char*); + +/** + * Private data of an aesni_gcm_t object. + */ +struct private_aesni_gcm_t { + + /** + * Public aesni_gcm_t interface. + */ + aesni_gcm_t public; + + /** + * Encryption key schedule + */ + aesni_key_t *key; + + /** + * IV generator. + */ + iv_gen_t *iv_gen; + + /** + * Length of the integrity check value + */ + size_t icv_size; + + /** + * Length of the key in bytes + */ + size_t key_size; + + /** + * GCM encryption function + */ + aesni_gcm_fn_t encrypt; + + /** + * GCM decryption function + */ + aesni_gcm_fn_t decrypt; + + /** + * salt to add to nonce + */ + u_char salt[SALT_SIZE]; + + /** + * GHASH subkey H, big-endian + */ + __m128i h; + + /** + * GHASH key H^2, big-endian + */ + __m128i hh; + + /** + * GHASH key H^3, big-endian + */ + __m128i hhh; + + /** + * GHASH key H^4, big-endian + */ + __m128i hhhh; +}; + +/** + * Byte-swap a 128-bit integer + */ +static inline __m128i swap128(__m128i x) +{ + return _mm_shuffle_epi8(x, + _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); +} + +/** + * Multiply two blocks in GF128 + */ +static __m128i mult_block(__m128i h, __m128i y) +{ + __m128i t1, t2, t3, t4, t5, t6; + + y = swap128(y); + + t1 = _mm_clmulepi64_si128(h, y, 0x00); + t2 = _mm_clmulepi64_si128(h, y, 0x01); + t3 = _mm_clmulepi64_si128(h, y, 0x10); + t4 = _mm_clmulepi64_si128(h, y, 0x11); + + t2 = _mm_xor_si128(t2, t3); + t3 = _mm_slli_si128(t2, 8); + t2 = _mm_srli_si128(t2, 8); + t1 = _mm_xor_si128(t1, t3); + t4 = _mm_xor_si128(t4, t2); + + t5 = _mm_srli_epi32(t1, 31); + t1 = _mm_slli_epi32(t1, 1); + t6 = _mm_srli_epi32(t4, 31); + t4 = _mm_slli_epi32(t4, 1); + + t3 = _mm_srli_si128(t5, 12); + t6 = _mm_slli_si128(t6, 4); + t5 = _mm_slli_si128(t5, 4); + t1 = _mm_or_si128(t1, t5); + t4 = _mm_or_si128(t4, t6); + t4 = _mm_or_si128(t4, t3); + + t5 = _mm_slli_epi32(t1, 31); + t6 = _mm_slli_epi32(t1, 30); + t3 = _mm_slli_epi32(t1, 25); + + t5 = _mm_xor_si128(t5, t6); + t5 = _mm_xor_si128(t5, t3); + t6 = _mm_srli_si128(t5, 4); + t4 = _mm_xor_si128(t4, t6); + t5 = _mm_slli_si128(t5, 12); + t1 = _mm_xor_si128(t1, t5); + t4 = _mm_xor_si128(t4, t1); + + t5 = _mm_srli_epi32(t1, 1); + t2 = _mm_srli_epi32(t1, 2); + t3 = _mm_srli_epi32(t1, 7); + t4 = _mm_xor_si128(t4, t2); + t4 = _mm_xor_si128(t4, t3); + t4 = _mm_xor_si128(t4, t5); + + return swap128(t4); +} + +/** + * Multiply four consecutive blocks by their respective GHASH key, XOR + */ +static inline __m128i mult4xor(__m128i h1, __m128i h2, __m128i h3, __m128i h4, + __m128i d1, __m128i d2, __m128i d3, __m128i d4) +{ + __m128i t0, t1, t2, t3, t4, t5, t6, t7, t8, t9; + + d1 = swap128(d1); + d2 = swap128(d2); + d3 = swap128(d3); + d4 = swap128(d4); + + t0 = _mm_clmulepi64_si128(h1, d1, 0x00); + t1 = _mm_clmulepi64_si128(h2, d2, 0x00); + t2 = _mm_clmulepi64_si128(h3, d3, 0x00); + t3 = _mm_clmulepi64_si128(h4, d4, 0x00); + t8 = _mm_xor_si128(t0, t1); + t8 = _mm_xor_si128(t8, t2); + t8 = _mm_xor_si128(t8, t3); + + t4 = _mm_clmulepi64_si128(h1, d1, 0x11); + t5 = _mm_clmulepi64_si128(h2, d2, 0x11); + t6 = _mm_clmulepi64_si128(h3, d3, 0x11); + t7 = _mm_clmulepi64_si128(h4, d4, 0x11); + t9 = _mm_xor_si128(t4, t5); + t9 = _mm_xor_si128(t9, t6); + t9 = _mm_xor_si128(t9, t7); + + t0 = _mm_shuffle_epi32(h1, 78); + t4 = _mm_shuffle_epi32(d1, 78); + t0 = _mm_xor_si128(t0, h1); + t4 = _mm_xor_si128(t4, d1); + t1 = _mm_shuffle_epi32(h2, 78); + t5 = _mm_shuffle_epi32(d2, 78); + t1 = _mm_xor_si128(t1, h2); + t5 = _mm_xor_si128(t5, d2); + t2 = _mm_shuffle_epi32(h3, 78); + t6 = _mm_shuffle_epi32(d3, 78); + t2 = _mm_xor_si128(t2, h3); + t6 = _mm_xor_si128(t6, d3); + t3 = _mm_shuffle_epi32(h4, 78); + t7 = _mm_shuffle_epi32(d4, 78); + t3 = _mm_xor_si128(t3, h4); + t7 = _mm_xor_si128(t7, d4); + + t0 = _mm_clmulepi64_si128(t0, t4, 0x00); + t1 = _mm_clmulepi64_si128(t1, t5, 0x00); + t2 = _mm_clmulepi64_si128(t2, t6, 0x00); + t3 = _mm_clmulepi64_si128(t3, t7, 0x00); + t0 = _mm_xor_si128(t0, t8); + t0 = _mm_xor_si128(t0, t9); + t0 = _mm_xor_si128(t1, t0); + t0 = _mm_xor_si128(t2, t0); + + t0 = _mm_xor_si128(t3, t0); + t4 = _mm_slli_si128(t0, 8); + t0 = _mm_srli_si128(t0, 8); + t3 = _mm_xor_si128(t4, t8); + t6 = _mm_xor_si128(t0, t9); + t7 = _mm_srli_epi32(t3, 31); + t8 = _mm_srli_epi32(t6, 31); + t3 = _mm_slli_epi32(t3, 1); + t6 = _mm_slli_epi32(t6, 1); + t9 = _mm_srli_si128(t7, 12); + t8 = _mm_slli_si128(t8, 4); + t7 = _mm_slli_si128(t7, 4); + t3 = _mm_or_si128(t3, t7); + t6 = _mm_or_si128(t6, t8); + t6 = _mm_or_si128(t6, t9); + t7 = _mm_slli_epi32(t3, 31); + t8 = _mm_slli_epi32(t3, 30); + t9 = _mm_slli_epi32(t3, 25); + t7 = _mm_xor_si128(t7, t8); + t7 = _mm_xor_si128(t7, t9); + t8 = _mm_srli_si128(t7, 4); + t7 = _mm_slli_si128(t7, 12); + t3 = _mm_xor_si128(t3, t7); + t2 = _mm_srli_epi32(t3, 1); + t4 = _mm_srli_epi32(t3, 2); + t5 = _mm_srli_epi32(t3, 7); + t2 = _mm_xor_si128(t2, t4); + t2 = _mm_xor_si128(t2, t5); + t2 = _mm_xor_si128(t2, t8); + t3 = _mm_xor_si128(t3, t2); + t6 = _mm_xor_si128(t6, t3); + + return swap128(t6); +} + +/** + * GHASH on a single block + */ +static __m128i ghash(__m128i h, __m128i y, __m128i x) +{ + return mult_block(h, _mm_xor_si128(y, x)); +} + +/** + * Start constructing the ICV for the associated data + */ +static __m128i icv_header(private_aesni_gcm_t *this, void *assoc, size_t alen) +{ + u_int blocks, pblocks, rem, i; + __m128i h1, h2, h3, h4, d1, d2, d3, d4; + __m128i y, last, *ab; + + h1 = this->hhhh; + h2 = this->hhh; + h3 = this->hh; + h4 = this->h; + + y = _mm_setzero_si128(); + ab = assoc; + blocks = alen / AES_BLOCK_SIZE; + pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM); + rem = alen % AES_BLOCK_SIZE; + for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM) + { + d1 = _mm_loadu_si128(ab + i + 0); + d2 = _mm_loadu_si128(ab + i + 1); + d3 = _mm_loadu_si128(ab + i + 2); + d4 = _mm_loadu_si128(ab + i + 3); + y = _mm_xor_si128(y, d1); + y = mult4xor(h1, h2, h3, h4, y, d2, d3, d4); + } + for (i = pblocks; i < blocks; i++) + { + y = ghash(this->h, y, _mm_loadu_si128(ab + i)); + } + if (rem) + { + last = _mm_setzero_si128(); + memcpy(&last, ab + blocks, rem); + + y = ghash(this->h, y, last); + } + + return y; +} + +/** + * Complete the ICV by hashing a assoc/data length block + */ +static __m128i icv_tailer(private_aesni_gcm_t *this, __m128i y, + size_t alen, size_t dlen) +{ + __m128i b; + + htoun64(&b, alen * 8); + htoun64((u_char*)&b + sizeof(u_int64_t), dlen * 8); + + return ghash(this->h, y, b); +} + +/** + * En-/Decrypt the ICV, trim and store it + */ +static void icv_crypt(private_aesni_gcm_t *this, __m128i y, __m128i j, + u_char *icv) +{ + __m128i *ks, t, b; + u_int round; + + ks = this->key->schedule; + t = _mm_xor_si128(j, ks[0]); + for (round = 1; round < this->key->rounds; round++) + { + t = _mm_aesenc_si128(t, ks[round]); + } + t = _mm_aesenclast_si128(t, ks[this->key->rounds]); + + t = _mm_xor_si128(y, t); + + _mm_storeu_si128(&b, t); + memcpy(icv, &b, this->icv_size); +} + +/** + * Do big-endian increment on x + */ +static inline __m128i increment_be(__m128i x) +{ + x = swap128(x); + x = _mm_add_epi64(x, _mm_set_epi32(0, 0, 0, 1)); + x = swap128(x); + + return x; +} + +/** + * Generate the block J0 + */ +static inline __m128i create_j(private_aesni_gcm_t *this, u_char *iv) +{ + u_char j[AES_BLOCK_SIZE]; + + memcpy(j, this->salt, SALT_SIZE); + memcpy(j + SALT_SIZE, iv, IV_SIZE); + htoun32(j + SALT_SIZE + IV_SIZE, 1); + + return _mm_loadu_si128((__m128i*)j); +} + +/** + * Encrypt a remaining incomplete block, return updated Y + */ +static __m128i encrypt_gcm_rem(private_aesni_gcm_t *this, u_int rem, + void *in, void *out, __m128i cb, __m128i y) +{ + __m128i *ks, t, b; + u_int round; + + memset(&b, 0, sizeof(b)); + memcpy(&b, in, rem); + + ks = this->key->schedule; + t = _mm_xor_si128(cb, ks[0]); + for (round = 1; round < this->key->rounds; round++) + { + t = _mm_aesenc_si128(t, ks[round]); + } + t = _mm_aesenclast_si128(t, ks[this->key->rounds]); + b = _mm_xor_si128(t, b); + + memcpy(out, &b, rem); + + memset((u_char*)&b + rem, 0, AES_BLOCK_SIZE - rem); + return ghash(this->h, y, b); +} + +/** + * Decrypt a remaining incomplete block, return updated Y + */ +static __m128i decrypt_gcm_rem(private_aesni_gcm_t *this, u_int rem, + void *in, void *out, __m128i cb, __m128i y) +{ + __m128i *ks, t, b; + u_int round; + + memset(&b, 0, sizeof(b)); + memcpy(&b, in, rem); + + y = ghash(this->h, y, b); + + ks = this->key->schedule; + t = _mm_xor_si128(cb, ks[0]); + for (round = 1; round < this->key->rounds; round++) + { + t = _mm_aesenc_si128(t, ks[round]); + } + t = _mm_aesenclast_si128(t, ks[this->key->rounds]); + b = _mm_xor_si128(t, b); + + memcpy(out, &b, rem); + + return y; +} + +/** + * AES-128 GCM encryption/ICV generation + */ +static void encrypt_gcm128(private_aesni_gcm_t *this, + size_t len, u_char *in, u_char *out, u_char *iv, + size_t alen, u_char *assoc, u_char *icv) +{ + __m128i d1, d2, d3, d4, t1, t2, t3, t4; + __m128i *ks, y, j, cb, *bi, *bo; + u_int blocks, pblocks, rem, i; + + j = create_j(this, iv); + cb = increment_be(j); + y = icv_header(this, assoc, alen); + blocks = len / AES_BLOCK_SIZE; + pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM); + rem = len % AES_BLOCK_SIZE; + bi = (__m128i*)in; + bo = (__m128i*)out; + + ks = this->key->schedule; + + for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM) + { + d1 = _mm_loadu_si128(bi + i + 0); + d2 = _mm_loadu_si128(bi + i + 1); + d3 = _mm_loadu_si128(bi + i + 2); + d4 = _mm_loadu_si128(bi + i + 3); + + t1 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + t2 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + t3 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + t4 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + + t1 = _mm_aesenc_si128(t1, ks[1]); + t2 = _mm_aesenc_si128(t2, ks[1]); + t3 = _mm_aesenc_si128(t3, ks[1]); + t4 = _mm_aesenc_si128(t4, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t2 = _mm_aesenc_si128(t2, ks[2]); + t3 = _mm_aesenc_si128(t3, ks[2]); + t4 = _mm_aesenc_si128(t4, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t2 = _mm_aesenc_si128(t2, ks[3]); + t3 = _mm_aesenc_si128(t3, ks[3]); + t4 = _mm_aesenc_si128(t4, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t2 = _mm_aesenc_si128(t2, ks[4]); + t3 = _mm_aesenc_si128(t3, ks[4]); + t4 = _mm_aesenc_si128(t4, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t2 = _mm_aesenc_si128(t2, ks[5]); + t3 = _mm_aesenc_si128(t3, ks[5]); + t4 = _mm_aesenc_si128(t4, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t2 = _mm_aesenc_si128(t2, ks[6]); + t3 = _mm_aesenc_si128(t3, ks[6]); + t4 = _mm_aesenc_si128(t4, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t2 = _mm_aesenc_si128(t2, ks[7]); + t3 = _mm_aesenc_si128(t3, ks[7]); + t4 = _mm_aesenc_si128(t4, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t2 = _mm_aesenc_si128(t2, ks[8]); + t3 = _mm_aesenc_si128(t3, ks[8]); + t4 = _mm_aesenc_si128(t4, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t2 = _mm_aesenc_si128(t2, ks[9]); + t3 = _mm_aesenc_si128(t3, ks[9]); + t4 = _mm_aesenc_si128(t4, ks[9]); + + t1 = _mm_aesenclast_si128(t1, ks[10]); + t2 = _mm_aesenclast_si128(t2, ks[10]); + t3 = _mm_aesenclast_si128(t3, ks[10]); + t4 = _mm_aesenclast_si128(t4, ks[10]); + + t1 = _mm_xor_si128(t1, d1); + t2 = _mm_xor_si128(t2, d2); + t3 = _mm_xor_si128(t3, d3); + t4 = _mm_xor_si128(t4, d4); + + y = _mm_xor_si128(y, t1); + y = mult4xor(this->hhhh, this->hhh, this->hh, this->h, y, t2, t3, t4); + + _mm_storeu_si128(bo + i + 0, t1); + _mm_storeu_si128(bo + i + 1, t2); + _mm_storeu_si128(bo + i + 2, t3); + _mm_storeu_si128(bo + i + 3, t4); + } + + for (i = pblocks; i < blocks; i++) + { + d1 = _mm_loadu_si128(bi + i); + + t1 = _mm_xor_si128(cb, ks[0]); + t1 = _mm_aesenc_si128(t1, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t1 = _mm_aesenclast_si128(t1, ks[10]); + + t1 = _mm_xor_si128(t1, d1); + _mm_storeu_si128(bo + i, t1); + + y = ghash(this->h, y, t1); + + cb = increment_be(cb); + } + + if (rem) + { + y = encrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y); + } + y = icv_tailer(this, y, alen, len); + icv_crypt(this, y, j, icv); +} + +/** + * AES-128 GCM decryption/ICV generation + */ +static void decrypt_gcm128(private_aesni_gcm_t *this, + size_t len, u_char *in, u_char *out, u_char *iv, + size_t alen, u_char *assoc, u_char *icv) +{ + __m128i d1, d2, d3, d4, t1, t2, t3, t4; + __m128i *ks, y, j, cb, *bi, *bo; + u_int blocks, pblocks, rem, i; + + j = create_j(this, iv); + cb = increment_be(j); + y = icv_header(this, assoc, alen); + blocks = len / AES_BLOCK_SIZE; + pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM); + rem = len % AES_BLOCK_SIZE; + bi = (__m128i*)in; + bo = (__m128i*)out; + + ks = this->key->schedule; + + for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM) + { + d1 = _mm_loadu_si128(bi + i + 0); + d2 = _mm_loadu_si128(bi + i + 1); + d3 = _mm_loadu_si128(bi + i + 2); + d4 = _mm_loadu_si128(bi + i + 3); + + y = _mm_xor_si128(y, d1); + y = mult4xor(this->hhhh, this->hhh, this->hh, this->h, y, d2, d3, d4); + + t1 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + t2 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + t3 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + t4 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + + t1 = _mm_aesenc_si128(t1, ks[1]); + t2 = _mm_aesenc_si128(t2, ks[1]); + t3 = _mm_aesenc_si128(t3, ks[1]); + t4 = _mm_aesenc_si128(t4, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t2 = _mm_aesenc_si128(t2, ks[2]); + t3 = _mm_aesenc_si128(t3, ks[2]); + t4 = _mm_aesenc_si128(t4, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t2 = _mm_aesenc_si128(t2, ks[3]); + t3 = _mm_aesenc_si128(t3, ks[3]); + t4 = _mm_aesenc_si128(t4, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t2 = _mm_aesenc_si128(t2, ks[4]); + t3 = _mm_aesenc_si128(t3, ks[4]); + t4 = _mm_aesenc_si128(t4, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t2 = _mm_aesenc_si128(t2, ks[5]); + t3 = _mm_aesenc_si128(t3, ks[5]); + t4 = _mm_aesenc_si128(t4, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t2 = _mm_aesenc_si128(t2, ks[6]); + t3 = _mm_aesenc_si128(t3, ks[6]); + t4 = _mm_aesenc_si128(t4, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t2 = _mm_aesenc_si128(t2, ks[7]); + t3 = _mm_aesenc_si128(t3, ks[7]); + t4 = _mm_aesenc_si128(t4, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t2 = _mm_aesenc_si128(t2, ks[8]); + t3 = _mm_aesenc_si128(t3, ks[8]); + t4 = _mm_aesenc_si128(t4, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t2 = _mm_aesenc_si128(t2, ks[9]); + t3 = _mm_aesenc_si128(t3, ks[9]); + t4 = _mm_aesenc_si128(t4, ks[9]); + + t1 = _mm_aesenclast_si128(t1, ks[10]); + t2 = _mm_aesenclast_si128(t2, ks[10]); + t3 = _mm_aesenclast_si128(t3, ks[10]); + t4 = _mm_aesenclast_si128(t4, ks[10]); + + t1 = _mm_xor_si128(t1, d1); + t2 = _mm_xor_si128(t2, d2); + t3 = _mm_xor_si128(t3, d3); + t4 = _mm_xor_si128(t4, d4); + + _mm_storeu_si128(bo + i + 0, t1); + _mm_storeu_si128(bo + i + 1, t2); + _mm_storeu_si128(bo + i + 2, t3); + _mm_storeu_si128(bo + i + 3, t4); + } + + for (i = pblocks; i < blocks; i++) + { + d1 = _mm_loadu_si128(bi + i); + + y = ghash(this->h, y, d1); + + t1 = _mm_xor_si128(cb, ks[0]); + t1 = _mm_aesenc_si128(t1, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t1 = _mm_aesenclast_si128(t1, ks[10]); + + t1 = _mm_xor_si128(t1, d1); + _mm_storeu_si128(bo + i, t1); + + cb = increment_be(cb); + } + + if (rem) + { + y = decrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y); + } + y = icv_tailer(this, y, alen, len); + icv_crypt(this, y, j, icv); +} + +/** + * AES-192 GCM encryption/ICV generation + */ +static void encrypt_gcm192(private_aesni_gcm_t *this, + size_t len, u_char *in, u_char *out, u_char *iv, + size_t alen, u_char *assoc, u_char *icv) +{ + __m128i d1, d2, d3, d4, t1, t2, t3, t4; + __m128i *ks, y, j, cb, *bi, *bo; + u_int blocks, pblocks, rem, i; + + j = create_j(this, iv); + cb = increment_be(j); + y = icv_header(this, assoc, alen); + blocks = len / AES_BLOCK_SIZE; + pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM); + rem = len % AES_BLOCK_SIZE; + bi = (__m128i*)in; + bo = (__m128i*)out; + + ks = this->key->schedule; + + for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM) + { + d1 = _mm_loadu_si128(bi + i + 0); + d2 = _mm_loadu_si128(bi + i + 1); + d3 = _mm_loadu_si128(bi + i + 2); + d4 = _mm_loadu_si128(bi + i + 3); + + t1 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + t2 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + t3 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + t4 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + + t1 = _mm_aesenc_si128(t1, ks[1]); + t2 = _mm_aesenc_si128(t2, ks[1]); + t3 = _mm_aesenc_si128(t3, ks[1]); + t4 = _mm_aesenc_si128(t4, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t2 = _mm_aesenc_si128(t2, ks[2]); + t3 = _mm_aesenc_si128(t3, ks[2]); + t4 = _mm_aesenc_si128(t4, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t2 = _mm_aesenc_si128(t2, ks[3]); + t3 = _mm_aesenc_si128(t3, ks[3]); + t4 = _mm_aesenc_si128(t4, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t2 = _mm_aesenc_si128(t2, ks[4]); + t3 = _mm_aesenc_si128(t3, ks[4]); + t4 = _mm_aesenc_si128(t4, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t2 = _mm_aesenc_si128(t2, ks[5]); + t3 = _mm_aesenc_si128(t3, ks[5]); + t4 = _mm_aesenc_si128(t4, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t2 = _mm_aesenc_si128(t2, ks[6]); + t3 = _mm_aesenc_si128(t3, ks[6]); + t4 = _mm_aesenc_si128(t4, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t2 = _mm_aesenc_si128(t2, ks[7]); + t3 = _mm_aesenc_si128(t3, ks[7]); + t4 = _mm_aesenc_si128(t4, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t2 = _mm_aesenc_si128(t2, ks[8]); + t3 = _mm_aesenc_si128(t3, ks[8]); + t4 = _mm_aesenc_si128(t4, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t2 = _mm_aesenc_si128(t2, ks[9]); + t3 = _mm_aesenc_si128(t3, ks[9]); + t4 = _mm_aesenc_si128(t4, ks[9]); + t1 = _mm_aesenc_si128(t1, ks[10]); + t2 = _mm_aesenc_si128(t2, ks[10]); + t3 = _mm_aesenc_si128(t3, ks[10]); + t4 = _mm_aesenc_si128(t4, ks[10]); + t1 = _mm_aesenc_si128(t1, ks[11]); + t2 = _mm_aesenc_si128(t2, ks[11]); + t3 = _mm_aesenc_si128(t3, ks[11]); + t4 = _mm_aesenc_si128(t4, ks[11]); + + t1 = _mm_aesenclast_si128(t1, ks[12]); + t2 = _mm_aesenclast_si128(t2, ks[12]); + t3 = _mm_aesenclast_si128(t3, ks[12]); + t4 = _mm_aesenclast_si128(t4, ks[12]); + + t1 = _mm_xor_si128(t1, d1); + t2 = _mm_xor_si128(t2, d2); + t3 = _mm_xor_si128(t3, d3); + t4 = _mm_xor_si128(t4, d4); + + y = _mm_xor_si128(y, t1); + y = mult4xor(this->hhhh, this->hhh, this->hh, this->h, y, t2, t3, t4); + + _mm_storeu_si128(bo + i + 0, t1); + _mm_storeu_si128(bo + i + 1, t2); + _mm_storeu_si128(bo + i + 2, t3); + _mm_storeu_si128(bo + i + 3, t4); + } + + for (i = pblocks; i < blocks; i++) + { + d1 = _mm_loadu_si128(bi + i); + + t1 = _mm_xor_si128(cb, ks[0]); + t1 = _mm_aesenc_si128(t1, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t1 = _mm_aesenc_si128(t1, ks[10]); + t1 = _mm_aesenc_si128(t1, ks[11]); + t1 = _mm_aesenclast_si128(t1, ks[12]); + + t1 = _mm_xor_si128(t1, d1); + _mm_storeu_si128(bo + i, t1); + + y = ghash(this->h, y, t1); + + cb = increment_be(cb); + } + + if (rem) + { + y = encrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y); + } + y = icv_tailer(this, y, alen, len); + icv_crypt(this, y, j, icv); +} + +/** + * AES-192 GCM decryption/ICV generation + */ +static void decrypt_gcm192(private_aesni_gcm_t *this, + size_t len, u_char *in, u_char *out, u_char *iv, + size_t alen, u_char *assoc, u_char *icv) +{ + __m128i d1, d2, d3, d4, t1, t2, t3, t4; + __m128i *ks, y, j, cb, *bi, *bo; + u_int blocks, pblocks, rem, i; + + j = create_j(this, iv); + cb = increment_be(j); + y = icv_header(this, assoc, alen); + blocks = len / AES_BLOCK_SIZE; + pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM); + rem = len % AES_BLOCK_SIZE; + bi = (__m128i*)in; + bo = (__m128i*)out; + + ks = this->key->schedule; + + for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM) + { + d1 = _mm_loadu_si128(bi + i + 0); + d2 = _mm_loadu_si128(bi + i + 1); + d3 = _mm_loadu_si128(bi + i + 2); + d4 = _mm_loadu_si128(bi + i + 3); + + y = _mm_xor_si128(y, d1); + y = mult4xor(this->hhhh, this->hhh, this->hh, this->h, y, d2, d3, d4); + + t1 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + t2 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + t3 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + t4 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + + t1 = _mm_aesenc_si128(t1, ks[1]); + t2 = _mm_aesenc_si128(t2, ks[1]); + t3 = _mm_aesenc_si128(t3, ks[1]); + t4 = _mm_aesenc_si128(t4, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t2 = _mm_aesenc_si128(t2, ks[2]); + t3 = _mm_aesenc_si128(t3, ks[2]); + t4 = _mm_aesenc_si128(t4, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t2 = _mm_aesenc_si128(t2, ks[3]); + t3 = _mm_aesenc_si128(t3, ks[3]); + t4 = _mm_aesenc_si128(t4, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t2 = _mm_aesenc_si128(t2, ks[4]); + t3 = _mm_aesenc_si128(t3, ks[4]); + t4 = _mm_aesenc_si128(t4, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t2 = _mm_aesenc_si128(t2, ks[5]); + t3 = _mm_aesenc_si128(t3, ks[5]); + t4 = _mm_aesenc_si128(t4, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t2 = _mm_aesenc_si128(t2, ks[6]); + t3 = _mm_aesenc_si128(t3, ks[6]); + t4 = _mm_aesenc_si128(t4, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t2 = _mm_aesenc_si128(t2, ks[7]); + t3 = _mm_aesenc_si128(t3, ks[7]); + t4 = _mm_aesenc_si128(t4, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t2 = _mm_aesenc_si128(t2, ks[8]); + t3 = _mm_aesenc_si128(t3, ks[8]); + t4 = _mm_aesenc_si128(t4, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t2 = _mm_aesenc_si128(t2, ks[9]); + t3 = _mm_aesenc_si128(t3, ks[9]); + t4 = _mm_aesenc_si128(t4, ks[9]); + t1 = _mm_aesenc_si128(t1, ks[10]); + t2 = _mm_aesenc_si128(t2, ks[10]); + t3 = _mm_aesenc_si128(t3, ks[10]); + t4 = _mm_aesenc_si128(t4, ks[10]); + t1 = _mm_aesenc_si128(t1, ks[11]); + t2 = _mm_aesenc_si128(t2, ks[11]); + t3 = _mm_aesenc_si128(t3, ks[11]); + t4 = _mm_aesenc_si128(t4, ks[11]); + + t1 = _mm_aesenclast_si128(t1, ks[12]); + t2 = _mm_aesenclast_si128(t2, ks[12]); + t3 = _mm_aesenclast_si128(t3, ks[12]); + t4 = _mm_aesenclast_si128(t4, ks[12]); + + t1 = _mm_xor_si128(t1, d1); + t2 = _mm_xor_si128(t2, d2); + t3 = _mm_xor_si128(t3, d3); + t4 = _mm_xor_si128(t4, d4); + + _mm_storeu_si128(bo + i + 0, t1); + _mm_storeu_si128(bo + i + 1, t2); + _mm_storeu_si128(bo + i + 2, t3); + _mm_storeu_si128(bo + i + 3, t4); + } + + for (i = pblocks; i < blocks; i++) + { + d1 = _mm_loadu_si128(bi + i); + + y = ghash(this->h, y, d1); + + t1 = _mm_xor_si128(cb, ks[0]); + t1 = _mm_aesenc_si128(t1, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t1 = _mm_aesenc_si128(t1, ks[10]); + t1 = _mm_aesenc_si128(t1, ks[11]); + t1 = _mm_aesenclast_si128(t1, ks[12]); + + t1 = _mm_xor_si128(t1, d1); + _mm_storeu_si128(bo + i, t1); + + cb = increment_be(cb); + } + + if (rem) + { + y = decrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y); + } + y = icv_tailer(this, y, alen, len); + icv_crypt(this, y, j, icv); +} + +/** + * AES-256 GCM encryption/ICV generation + */ +static void encrypt_gcm256(private_aesni_gcm_t *this, + size_t len, u_char *in, u_char *out, u_char *iv, + size_t alen, u_char *assoc, u_char *icv) +{ + __m128i d1, d2, d3, d4, t1, t2, t3, t4; + __m128i *ks, y, j, cb, *bi, *bo; + u_int blocks, pblocks, rem, i; + + j = create_j(this, iv); + cb = increment_be(j); + y = icv_header(this, assoc, alen); + blocks = len / AES_BLOCK_SIZE; + pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM); + rem = len % AES_BLOCK_SIZE; + bi = (__m128i*)in; + bo = (__m128i*)out; + + ks = this->key->schedule; + + for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM) + { + d1 = _mm_loadu_si128(bi + i + 0); + d2 = _mm_loadu_si128(bi + i + 1); + d3 = _mm_loadu_si128(bi + i + 2); + d4 = _mm_loadu_si128(bi + i + 3); + + t1 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + t2 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + t3 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + t4 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + + t1 = _mm_aesenc_si128(t1, ks[1]); + t2 = _mm_aesenc_si128(t2, ks[1]); + t3 = _mm_aesenc_si128(t3, ks[1]); + t4 = _mm_aesenc_si128(t4, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t2 = _mm_aesenc_si128(t2, ks[2]); + t3 = _mm_aesenc_si128(t3, ks[2]); + t4 = _mm_aesenc_si128(t4, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t2 = _mm_aesenc_si128(t2, ks[3]); + t3 = _mm_aesenc_si128(t3, ks[3]); + t4 = _mm_aesenc_si128(t4, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t2 = _mm_aesenc_si128(t2, ks[4]); + t3 = _mm_aesenc_si128(t3, ks[4]); + t4 = _mm_aesenc_si128(t4, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t2 = _mm_aesenc_si128(t2, ks[5]); + t3 = _mm_aesenc_si128(t3, ks[5]); + t4 = _mm_aesenc_si128(t4, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t2 = _mm_aesenc_si128(t2, ks[6]); + t3 = _mm_aesenc_si128(t3, ks[6]); + t4 = _mm_aesenc_si128(t4, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t2 = _mm_aesenc_si128(t2, ks[7]); + t3 = _mm_aesenc_si128(t3, ks[7]); + t4 = _mm_aesenc_si128(t4, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t2 = _mm_aesenc_si128(t2, ks[8]); + t3 = _mm_aesenc_si128(t3, ks[8]); + t4 = _mm_aesenc_si128(t4, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t2 = _mm_aesenc_si128(t2, ks[9]); + t3 = _mm_aesenc_si128(t3, ks[9]); + t4 = _mm_aesenc_si128(t4, ks[9]); + t1 = _mm_aesenc_si128(t1, ks[10]); + t2 = _mm_aesenc_si128(t2, ks[10]); + t3 = _mm_aesenc_si128(t3, ks[10]); + t4 = _mm_aesenc_si128(t4, ks[10]); + t1 = _mm_aesenc_si128(t1, ks[11]); + t2 = _mm_aesenc_si128(t2, ks[11]); + t3 = _mm_aesenc_si128(t3, ks[11]); + t4 = _mm_aesenc_si128(t4, ks[11]); + t1 = _mm_aesenc_si128(t1, ks[12]); + t2 = _mm_aesenc_si128(t2, ks[12]); + t3 = _mm_aesenc_si128(t3, ks[12]); + t4 = _mm_aesenc_si128(t4, ks[12]); + t1 = _mm_aesenc_si128(t1, ks[13]); + t2 = _mm_aesenc_si128(t2, ks[13]); + t3 = _mm_aesenc_si128(t3, ks[13]); + t4 = _mm_aesenc_si128(t4, ks[13]); + + t1 = _mm_aesenclast_si128(t1, ks[14]); + t2 = _mm_aesenclast_si128(t2, ks[14]); + t3 = _mm_aesenclast_si128(t3, ks[14]); + t4 = _mm_aesenclast_si128(t4, ks[14]); + + t1 = _mm_xor_si128(t1, d1); + t2 = _mm_xor_si128(t2, d2); + t3 = _mm_xor_si128(t3, d3); + t4 = _mm_xor_si128(t4, d4); + + y = _mm_xor_si128(y, t1); + y = mult4xor(this->hhhh, this->hhh, this->hh, this->h, y, t2, t3, t4); + + _mm_storeu_si128(bo + i + 0, t1); + _mm_storeu_si128(bo + i + 1, t2); + _mm_storeu_si128(bo + i + 2, t3); + _mm_storeu_si128(bo + i + 3, t4); + } + + for (i = pblocks; i < blocks; i++) + { + d1 = _mm_loadu_si128(bi + i); + + t1 = _mm_xor_si128(cb, ks[0]); + t1 = _mm_aesenc_si128(t1, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t1 = _mm_aesenc_si128(t1, ks[10]); + t1 = _mm_aesenc_si128(t1, ks[11]); + t1 = _mm_aesenc_si128(t1, ks[12]); + t1 = _mm_aesenc_si128(t1, ks[13]); + t1 = _mm_aesenclast_si128(t1, ks[14]); + + t1 = _mm_xor_si128(t1, d1); + _mm_storeu_si128(bo + i, t1); + + y = ghash(this->h, y, t1); + + cb = increment_be(cb); + } + + if (rem) + { + y = encrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y); + } + y = icv_tailer(this, y, alen, len); + icv_crypt(this, y, j, icv); +} + +/** + * AES-256 GCM decryption/ICV generation + */ +static void decrypt_gcm256(private_aesni_gcm_t *this, + size_t len, u_char *in, u_char *out, u_char *iv, + size_t alen, u_char *assoc, u_char *icv) +{ + __m128i d1, d2, d3, d4, t1, t2, t3, t4; + __m128i *ks, y, j, cb, *bi, *bo; + u_int blocks, pblocks, rem, i; + + j = create_j(this, iv); + cb = increment_be(j); + y = icv_header(this, assoc, alen); + blocks = len / AES_BLOCK_SIZE; + pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM); + rem = len % AES_BLOCK_SIZE; + bi = (__m128i*)in; + bo = (__m128i*)out; + + ks = this->key->schedule; + + for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM) + { + d1 = _mm_loadu_si128(bi + i + 0); + d2 = _mm_loadu_si128(bi + i + 1); + d3 = _mm_loadu_si128(bi + i + 2); + d4 = _mm_loadu_si128(bi + i + 3); + + y = _mm_xor_si128(y, d1); + y = mult4xor(this->hhhh, this->hhh, this->hh, this->h, y, d2, d3, d4); + + t1 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + t2 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + t3 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + t4 = _mm_xor_si128(cb, ks[0]); + cb = increment_be(cb); + + t1 = _mm_aesenc_si128(t1, ks[1]); + t2 = _mm_aesenc_si128(t2, ks[1]); + t3 = _mm_aesenc_si128(t3, ks[1]); + t4 = _mm_aesenc_si128(t4, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t2 = _mm_aesenc_si128(t2, ks[2]); + t3 = _mm_aesenc_si128(t3, ks[2]); + t4 = _mm_aesenc_si128(t4, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t2 = _mm_aesenc_si128(t2, ks[3]); + t3 = _mm_aesenc_si128(t3, ks[3]); + t4 = _mm_aesenc_si128(t4, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t2 = _mm_aesenc_si128(t2, ks[4]); + t3 = _mm_aesenc_si128(t3, ks[4]); + t4 = _mm_aesenc_si128(t4, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t2 = _mm_aesenc_si128(t2, ks[5]); + t3 = _mm_aesenc_si128(t3, ks[5]); + t4 = _mm_aesenc_si128(t4, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t2 = _mm_aesenc_si128(t2, ks[6]); + t3 = _mm_aesenc_si128(t3, ks[6]); + t4 = _mm_aesenc_si128(t4, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t2 = _mm_aesenc_si128(t2, ks[7]); + t3 = _mm_aesenc_si128(t3, ks[7]); + t4 = _mm_aesenc_si128(t4, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t2 = _mm_aesenc_si128(t2, ks[8]); + t3 = _mm_aesenc_si128(t3, ks[8]); + t4 = _mm_aesenc_si128(t4, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t2 = _mm_aesenc_si128(t2, ks[9]); + t3 = _mm_aesenc_si128(t3, ks[9]); + t4 = _mm_aesenc_si128(t4, ks[9]); + t1 = _mm_aesenc_si128(t1, ks[10]); + t2 = _mm_aesenc_si128(t2, ks[10]); + t3 = _mm_aesenc_si128(t3, ks[10]); + t4 = _mm_aesenc_si128(t4, ks[10]); + t1 = _mm_aesenc_si128(t1, ks[11]); + t2 = _mm_aesenc_si128(t2, ks[11]); + t3 = _mm_aesenc_si128(t3, ks[11]); + t4 = _mm_aesenc_si128(t4, ks[11]); + t1 = _mm_aesenc_si128(t1, ks[12]); + t2 = _mm_aesenc_si128(t2, ks[12]); + t3 = _mm_aesenc_si128(t3, ks[12]); + t4 = _mm_aesenc_si128(t4, ks[12]); + t1 = _mm_aesenc_si128(t1, ks[13]); + t2 = _mm_aesenc_si128(t2, ks[13]); + t3 = _mm_aesenc_si128(t3, ks[13]); + t4 = _mm_aesenc_si128(t4, ks[13]); + + t1 = _mm_aesenclast_si128(t1, ks[14]); + t2 = _mm_aesenclast_si128(t2, ks[14]); + t3 = _mm_aesenclast_si128(t3, ks[14]); + t4 = _mm_aesenclast_si128(t4, ks[14]); + + t1 = _mm_xor_si128(t1, d1); + t2 = _mm_xor_si128(t2, d2); + t3 = _mm_xor_si128(t3, d3); + t4 = _mm_xor_si128(t4, d4); + + _mm_storeu_si128(bo + i + 0, t1); + _mm_storeu_si128(bo + i + 1, t2); + _mm_storeu_si128(bo + i + 2, t3); + _mm_storeu_si128(bo + i + 3, t4); + } + + for (i = pblocks; i < blocks; i++) + { + d1 = _mm_loadu_si128(bi + i); + + y = ghash(this->h, y, d1); + + t1 = _mm_xor_si128(cb, ks[0]); + t1 = _mm_aesenc_si128(t1, ks[1]); + t1 = _mm_aesenc_si128(t1, ks[2]); + t1 = _mm_aesenc_si128(t1, ks[3]); + t1 = _mm_aesenc_si128(t1, ks[4]); + t1 = _mm_aesenc_si128(t1, ks[5]); + t1 = _mm_aesenc_si128(t1, ks[6]); + t1 = _mm_aesenc_si128(t1, ks[7]); + t1 = _mm_aesenc_si128(t1, ks[8]); + t1 = _mm_aesenc_si128(t1, ks[9]); + t1 = _mm_aesenc_si128(t1, ks[10]); + t1 = _mm_aesenc_si128(t1, ks[11]); + t1 = _mm_aesenc_si128(t1, ks[12]); + t1 = _mm_aesenc_si128(t1, ks[13]); + t1 = _mm_aesenclast_si128(t1, ks[14]); + + t1 = _mm_xor_si128(t1, d1); + _mm_storeu_si128(bo + i, t1); + + cb = increment_be(cb); + } + + if (rem) + { + y = decrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y); + } + y = icv_tailer(this, y, alen, len); + icv_crypt(this, y, j, icv); +} + +METHOD(aead_t, encrypt, bool, + private_aesni_gcm_t *this, chunk_t plain, chunk_t assoc, chunk_t iv, + chunk_t *encr) +{ + u_char *out; + + if (!this->key || iv.len != IV_SIZE) + { + return FALSE; + } + out = plain.ptr; + if (encr) + { + *encr = chunk_alloc(plain.len + this->icv_size); + out = encr->ptr; + } + this->encrypt(this, plain.len, plain.ptr, out, iv.ptr, + assoc.len, assoc.ptr, out + plain.len); + return TRUE; +} + +METHOD(aead_t, decrypt, bool, + private_aesni_gcm_t *this, chunk_t encr, chunk_t assoc, chunk_t iv, + chunk_t *plain) +{ + u_char *out, icv[this->icv_size]; + + if (!this->key || iv.len != IV_SIZE || encr.len < this->icv_size) + { + return FALSE; + } + encr.len -= this->icv_size; + out = encr.ptr; + if (plain) + { + *plain = chunk_alloc(encr.len); + out = plain->ptr; + } + this->decrypt(this, encr.len, encr.ptr, out, iv.ptr, + assoc.len, assoc.ptr, icv); + return memeq_const(icv, encr.ptr + encr.len, this->icv_size); +} + +METHOD(aead_t, get_block_size, size_t, + private_aesni_gcm_t *this) +{ + return 1; +} + +METHOD(aead_t, get_icv_size, size_t, + private_aesni_gcm_t *this) +{ + return this->icv_size; +} + +METHOD(aead_t, get_iv_size, size_t, + private_aesni_gcm_t *this) +{ + return IV_SIZE; +} + +METHOD(aead_t, get_iv_gen, iv_gen_t*, + private_aesni_gcm_t *this) +{ + return this->iv_gen; +} + +METHOD(aead_t, get_key_size, size_t, + private_aesni_gcm_t *this) +{ + return this->key_size + SALT_SIZE; +} + +METHOD(aead_t, set_key, bool, + private_aesni_gcm_t *this, chunk_t key) +{ + u_int round; + __m128i *ks, h; + + if (key.len != this->key_size + SALT_SIZE) + { + return FALSE; + } + + memcpy(this->salt, key.ptr + key.len - SALT_SIZE, SALT_SIZE); + key.len -= SALT_SIZE; + + DESTROY_IF(this->key); + this->key = aesni_key_create(TRUE, key); + + ks = this->key->schedule; + h = _mm_xor_si128(_mm_setzero_si128(), ks[0]); + for (round = 1; round < this->key->rounds; round++) + { + h = _mm_aesenc_si128(h, ks[round]); + } + h = _mm_aesenclast_si128(h, ks[this->key->rounds]); + + this->h = h; + h = swap128(h); + this->hh = mult_block(h, this->h); + this->hhh = mult_block(h, this->hh); + this->hhhh = mult_block(h, this->hhh); + this->h = swap128(this->h); + this->hh = swap128(this->hh); + this->hhh = swap128(this->hhh); + this->hhhh = swap128(this->hhhh); + + return TRUE; +} + +METHOD(aead_t, destroy, void, + private_aesni_gcm_t *this) +{ + DESTROY_IF(this->key); + memwipe(&this->h, sizeof(this->h)); + memwipe(&this->hh, sizeof(this->hh)); + memwipe(&this->hhh, sizeof(this->hhh)); + memwipe(&this->hhhh, sizeof(this->hhhh)); + this->iv_gen->destroy(this->iv_gen); + free_align(this); +} + +/** + * See header + */ +aesni_gcm_t *aesni_gcm_create(encryption_algorithm_t algo, + size_t key_size, size_t salt_size) +{ + private_aesni_gcm_t *this; + size_t icv_size; + + switch (key_size) + { + case 0: + key_size = 16; + break; + case 16: + case 24: + case 32: + break; + default: + return NULL; + } + if (salt_size && salt_size != SALT_SIZE) + { + /* currently not supported */ + return NULL; + } + switch (algo) + { + case ENCR_AES_GCM_ICV8: + algo = ENCR_AES_CBC; + icv_size = 8; + break; + case ENCR_AES_GCM_ICV12: + algo = ENCR_AES_CBC; + icv_size = 12; + break; + case ENCR_AES_GCM_ICV16: + algo = ENCR_AES_CBC; + icv_size = 16; + break; + default: + return NULL; + } + + INIT_ALIGN(this, sizeof(__m128i), + .public = { + .aead = { + .encrypt = _encrypt, + .decrypt = _decrypt, + .get_block_size = _get_block_size, + .get_icv_size = _get_icv_size, + .get_iv_size = _get_iv_size, + .get_iv_gen = _get_iv_gen, + .get_key_size = _get_key_size, + .set_key = _set_key, + .destroy = _destroy, + }, + }, + .key_size = key_size, + .iv_gen = iv_gen_seq_create(), + .icv_size = icv_size, + ); + + switch (key_size) + { + case 16: + this->encrypt = encrypt_gcm128; + this->decrypt = decrypt_gcm128; + break; + case 24: + this->encrypt = encrypt_gcm192; + this->decrypt = decrypt_gcm192; + break; + case 32: + this->encrypt = encrypt_gcm256; + this->decrypt = decrypt_gcm256; + break; + } + + return &this->public; +} diff --git a/src/libstrongswan/plugins/aesni/aesni_gcm.h b/src/libstrongswan/plugins/aesni/aesni_gcm.h new file mode 100644 index 000000000..5a256c8db --- /dev/null +++ b/src/libstrongswan/plugins/aesni/aesni_gcm.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2015 Martin Willi + * Copyright (C) 2015 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +/** + * @defgroup aesni_gcm aesni_gcm + * @{ @ingroup aesni + */ + +#ifndef AESNI_GCM_H_ +#define AESNI_GCM_H_ + +#include <library.h> + +typedef struct aesni_gcm_t aesni_gcm_t; + +/** + * GCM mode AEAD using AES-NI + */ +struct aesni_gcm_t { + + /** + * Implements aead_t interface + */ + aead_t aead; +}; + +/** + * Create a aesni_gcm instance. + * + * @param algo encryption algorithm, ENCR_AES_GCM* + * @param key_size AES key size, in bytes + * @param salt_size size of salt value + * @return AES-GCM AEAD, NULL if not supported + */ +aesni_gcm_t *aesni_gcm_create(encryption_algorithm_t algo, + size_t key_size, size_t salt_size); + +#endif /** AESNI_GCM_H_ @}*/ diff --git a/src/libstrongswan/plugins/aesni/aesni_key.c b/src/libstrongswan/plugins/aesni/aesni_key.c new file mode 100644 index 000000000..523266a30 --- /dev/null +++ b/src/libstrongswan/plugins/aesni/aesni_key.c @@ -0,0 +1,301 @@ +/* + * Copyright (C) 2015 Martin Willi + * Copyright (C) 2015 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include "aesni_key.h" + +/** + * Rounds used for each AES key size + */ +#define AES128_ROUNDS 10 +#define AES192_ROUNDS 12 +#define AES256_ROUNDS 14 + +typedef struct private_aesni_key_t private_aesni_key_t; + +/** + * Private data of an aesni_key_t object. + */ +struct private_aesni_key_t { + + /** + * Public aesni_key_t interface. + */ + aesni_key_t public; +}; + +/** + * Invert round encryption keys to get a decryption key schedule + */ +static void reverse_key(aesni_key_t *this) +{ + __m128i t[this->rounds + 1]; + int i; + + for (i = 0; i <= this->rounds; i++) + { + t[i] = this->schedule[i]; + } + this->schedule[this->rounds] = t[0]; + for (i = 1; i < this->rounds; i++) + { + this->schedule[this->rounds - i] = _mm_aesimc_si128(t[i]); + } + this->schedule[0] = t[this->rounds]; + + memwipe(t, sizeof(t)); +} + +/** + * Assist in creating a 128-bit round key + */ +static __m128i assist128(__m128i a, __m128i b) +{ + __m128i c; + + b = _mm_shuffle_epi32(b ,0xff); + c = _mm_slli_si128(a, 0x04); + a = _mm_xor_si128(a, c); + c = _mm_slli_si128(c, 0x04); + a = _mm_xor_si128(a, c); + c = _mm_slli_si128(c, 0x04); + a = _mm_xor_si128(a, c); + a = _mm_xor_si128(a, b); + + return a; +} + +/** + * Expand a 128-bit key to encryption round keys + */ +static void expand128(__m128i *key, __m128i *schedule) +{ + __m128i t; + + schedule[0] = t = _mm_loadu_si128(key); + schedule[1] = t = assist128(t, _mm_aeskeygenassist_si128(t, 0x01)); + schedule[2] = t = assist128(t, _mm_aeskeygenassist_si128(t, 0x02)); + schedule[3] = t = assist128(t, _mm_aeskeygenassist_si128(t, 0x04)); + schedule[4] = t = assist128(t, _mm_aeskeygenassist_si128(t, 0x08)); + schedule[5] = t = assist128(t, _mm_aeskeygenassist_si128(t, 0x10)); + schedule[6] = t = assist128(t, _mm_aeskeygenassist_si128(t, 0x20)); + schedule[7] = t = assist128(t, _mm_aeskeygenassist_si128(t, 0x40)); + schedule[8] = t = assist128(t, _mm_aeskeygenassist_si128(t, 0x80)); + schedule[9] = t = assist128(t, _mm_aeskeygenassist_si128(t, 0x1b)); + schedule[10] = assist128(t, _mm_aeskeygenassist_si128(t, 0x36)); +} + +/** + * Assist in creating a 192-bit round key + */ +static __m128i assist192(__m128i b, __m128i c, __m128i *a) +{ + __m128i t; + + b = _mm_shuffle_epi32(b, 0x55); + t = _mm_slli_si128(*a, 0x04); + *a = _mm_xor_si128(*a, t); + t = _mm_slli_si128(t, 0x04); + *a = _mm_xor_si128(*a, t); + t = _mm_slli_si128(t, 0x04); + *a = _mm_xor_si128(*a, t); + *a = _mm_xor_si128(*a, b); + b = _mm_shuffle_epi32(*a, 0xff); + t = _mm_slli_si128(c, 0x04); + t = _mm_xor_si128(c, t); + t = _mm_xor_si128(t, b); + + return t; +} + +/** + * return a[63:0] | b[63:0] << 64 + */ +static __m128i _mm_shuffle_i00(__m128i a, __m128i b) +{ + return (__m128i)_mm_shuffle_pd((__m128d)a, (__m128d)b, 0); +} + +/** + * return a[127:64] >> 64 | b[63:0] << 64 + */ +static __m128i _mm_shuffle_i01(__m128i a, __m128i b) +{ + return (__m128i)_mm_shuffle_pd((__m128d)a, (__m128d)b, 1); +} + +/** + * Expand a 192-bit encryption key to round keys + */ +static void expand192(__m128i *key, __m128i *schedule) +{ + __m128i t1, t2, t3; + + schedule[0] = t1 = _mm_loadu_si128(key); + t2 = t3 = _mm_loadu_si128(key + 1); + + t2 = assist192(_mm_aeskeygenassist_si128(t2, 0x1), t2, &t1); + schedule[1] = _mm_shuffle_i00(t3, t1); + schedule[2] = _mm_shuffle_i01(t1, t2); + t2 = t3 = assist192(_mm_aeskeygenassist_si128(t2, 0x2), t2, &t1); + schedule[3] = t1; + + t2 = assist192(_mm_aeskeygenassist_si128(t2, 0x4), t2, &t1); + schedule[4] = _mm_shuffle_i00(t3, t1); + schedule[5] = _mm_shuffle_i01(t1, t2); + t2 = t3 = assist192(_mm_aeskeygenassist_si128(t2, 0x8), t2, &t1); + schedule[6] = t1; + + t2 = assist192(_mm_aeskeygenassist_si128 (t2,0x10), t2, &t1); + schedule[7] = _mm_shuffle_i00(t3, t1); + schedule[8] = _mm_shuffle_i01(t1, t2); + t2 = t3 = assist192(_mm_aeskeygenassist_si128 (t2,0x20), t2, &t1); + schedule[9] = t1; + + t2 = assist192(_mm_aeskeygenassist_si128(t2, 0x40), t2, &t1); + schedule[10] = _mm_shuffle_i00(t3, t1); + schedule[11] = _mm_shuffle_i01(t1, t2); + assist192(_mm_aeskeygenassist_si128(t2, 0x80), t2, &t1); + schedule[12] = t1; +} + +/** + * Assist in creating a 256-bit round key + */ +static __m128i assist256_1(__m128i a, __m128i b) +{ + __m128i x, y; + + b = _mm_shuffle_epi32(b, 0xff); + y = _mm_slli_si128(a, 0x04); + x = _mm_xor_si128(a, y); + y = _mm_slli_si128(y, 0x04); + x = _mm_xor_si128 (x, y); + y = _mm_slli_si128(y, 0x04); + x = _mm_xor_si128(x, y); + x = _mm_xor_si128(x, b); + + return x; +} + +/** + * Assist in creating a 256-bit round key + */ +static __m128i assist256_2(__m128i a, __m128i b) +{ + __m128i x, y, z; + + y = _mm_aeskeygenassist_si128(a, 0x00); + z = _mm_shuffle_epi32(y, 0xaa); + y = _mm_slli_si128(b, 0x04); + x = _mm_xor_si128(b, y); + y = _mm_slli_si128(y, 0x04); + x = _mm_xor_si128(x, y); + y = _mm_slli_si128(y, 0x04); + x = _mm_xor_si128(x, y); + x = _mm_xor_si128(x, z); + + return x; +} + +/** + * Expand a 256-bit encryption key to round keys + */ +static void expand256(__m128i *key, __m128i *schedule) +{ + __m128i t1, t2; + + schedule[0] = t1 = _mm_loadu_si128(key); + schedule[1] = t2 = _mm_loadu_si128(key + 1); + + schedule[2] = t1 = assist256_1(t1, _mm_aeskeygenassist_si128(t2, 0x01)); + schedule[3] = t2 = assist256_2(t1, t2); + + schedule[4] = t1 = assist256_1(t1, _mm_aeskeygenassist_si128(t2, 0x02)); + schedule[5] = t2 = assist256_2(t1, t2); + + schedule[6] = t1 = assist256_1(t1, _mm_aeskeygenassist_si128(t2, 0x04)); + schedule[7] = t2 = assist256_2(t1, t2); + + schedule[8] = t1 = assist256_1(t1, _mm_aeskeygenassist_si128(t2, 0x08)); + schedule[9] = t2 = assist256_2(t1, t2); + + schedule[10] = t1 = assist256_1(t1, _mm_aeskeygenassist_si128(t2, 0x10)); + schedule[11] = t2 = assist256_2(t1, t2); + + schedule[12] = t1 = assist256_1(t1, _mm_aeskeygenassist_si128(t2, 0x20)); + schedule[13] = t2 = assist256_2(t1, t2); + + schedule[14] = assist256_1(t1, _mm_aeskeygenassist_si128(t2, 0x40)); +} + +METHOD(aesni_key_t, destroy, void, + private_aesni_key_t *this) +{ + memwipe(this, sizeof(*this) + (this->public.rounds + 1) * AES_BLOCK_SIZE); + free_align(this); +} + +/** + * See header + */ +aesni_key_t *aesni_key_create(bool encrypt, chunk_t key) +{ + private_aesni_key_t *this; + int rounds; + + switch (key.len) + { + case 16: + rounds = AES128_ROUNDS; + break; + case 24: + rounds = AES192_ROUNDS; + break; + case 32: + rounds = AES256_ROUNDS; + break; + default: + return NULL; + } + + INIT_EXTRA_ALIGN(this, (rounds + 1) * AES_BLOCK_SIZE, sizeof(__m128i), + .public = { + .destroy = _destroy, + .rounds = rounds, + }, + ); + + switch (key.len) + { + case 16: + expand128((__m128i*)key.ptr, this->public.schedule); + break; + case 24: + expand192((__m128i*)key.ptr, this->public.schedule); + break; + case 32: + expand256((__m128i*)key.ptr, this->public.schedule); + break; + default: + break; + } + + if (!encrypt) + { + reverse_key(&this->public); + } + + return &this->public; +} diff --git a/src/libstrongswan/plugins/aesni/aesni_key.h b/src/libstrongswan/plugins/aesni/aesni_key.h new file mode 100644 index 000000000..12dcd221d --- /dev/null +++ b/src/libstrongswan/plugins/aesni/aesni_key.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2015 Martin Willi + * Copyright (C) 2015 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +/** + * @defgroup aesni_key aesni_key + * @{ @ingroup aesni + */ + +#ifndef AESNI_KEY_H_ +#define AESNI_KEY_H_ + +#include <library.h> + +#include <wmmintrin.h> + +/** + * AES block size, in bytes + */ +#define AES_BLOCK_SIZE 16 + +typedef struct aesni_key_t aesni_key_t; + +/** + * Key schedule for encryption/decryption using on AES-NI. + */ +struct aesni_key_t { + + /** + * Destroy a aesni_key_t. + */ + void (*destroy)(aesni_key_t *this); + + /** + * Number of AES rounds (10, 12, 14) + */ + int rounds; + + /** + * Key schedule, for each round + the round 0 (whitening) + */ + __attribute__((aligned(sizeof(__m128i)))) __m128i schedule[]; +}; + +/** + * Create a AESNI key schedule instance. + * + * @param encrypt TRUE for encryption schedule, FALSE for decryption + * @param key non-expanded crypto key, 16, 24 or 32 bytes + * @return key schedule, NULL on invalid key size + */ +aesni_key_t *aesni_key_create(bool encrypt, chunk_t key); + +#endif /** AESNI_KEY_H_ @}*/ diff --git a/src/libstrongswan/plugins/aesni/aesni_plugin.c b/src/libstrongswan/plugins/aesni/aesni_plugin.c new file mode 100644 index 000000000..b92419dc4 --- /dev/null +++ b/src/libstrongswan/plugins/aesni/aesni_plugin.c @@ -0,0 +1,125 @@ +/* + * Copyright (C) 2015 Martin Willi + * Copyright (C) 2015 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include "aesni_plugin.h" +#include "aesni_cbc.h" +#include "aesni_ctr.h" +#include "aesni_ccm.h" +#include "aesni_gcm.h" +#include "aesni_xcbc.h" +#include "aesni_cmac.h" + +#include <stdio.h> + +#include <library.h> +#include <utils/debug.h> +#include <utils/cpu_feature.h> + +typedef struct private_aesni_plugin_t private_aesni_plugin_t; +typedef enum cpuid_feature_t cpuid_feature_t; + +/** + * private data of aesni_plugin + */ +struct private_aesni_plugin_t { + + /** + * public functions + */ + aesni_plugin_t public; +}; + +METHOD(plugin_t, get_name, char*, + private_aesni_plugin_t *this) +{ + return "aesni"; +} + +METHOD(plugin_t, get_features, int, + private_aesni_plugin_t *this, plugin_feature_t *features[]) +{ + static plugin_feature_t f[] = { + PLUGIN_REGISTER(CRYPTER, aesni_cbc_create), + PLUGIN_PROVIDE(CRYPTER, ENCR_AES_CBC, 16), + PLUGIN_PROVIDE(CRYPTER, ENCR_AES_CBC, 24), + PLUGIN_PROVIDE(CRYPTER, ENCR_AES_CBC, 32), + PLUGIN_REGISTER(CRYPTER, aesni_ctr_create), + PLUGIN_PROVIDE(CRYPTER, ENCR_AES_CTR, 16), + PLUGIN_PROVIDE(CRYPTER, ENCR_AES_CTR, 24), + PLUGIN_PROVIDE(CRYPTER, ENCR_AES_CTR, 32), + PLUGIN_REGISTER(AEAD, aesni_ccm_create), + PLUGIN_PROVIDE(AEAD, ENCR_AES_CCM_ICV8, 16), + PLUGIN_PROVIDE(AEAD, ENCR_AES_CCM_ICV12, 16), + PLUGIN_PROVIDE(AEAD, ENCR_AES_CCM_ICV16, 16), + PLUGIN_PROVIDE(AEAD, ENCR_AES_CCM_ICV8, 24), + PLUGIN_PROVIDE(AEAD, ENCR_AES_CCM_ICV12, 24), + PLUGIN_PROVIDE(AEAD, ENCR_AES_CCM_ICV16, 24), + PLUGIN_PROVIDE(AEAD, ENCR_AES_CCM_ICV8, 32), + PLUGIN_PROVIDE(AEAD, ENCR_AES_CCM_ICV12, 32), + PLUGIN_PROVIDE(AEAD, ENCR_AES_CCM_ICV16, 32), + PLUGIN_REGISTER(AEAD, aesni_gcm_create), + PLUGIN_PROVIDE(AEAD, ENCR_AES_GCM_ICV8, 16), + PLUGIN_PROVIDE(AEAD, ENCR_AES_GCM_ICV12, 16), + PLUGIN_PROVIDE(AEAD, ENCR_AES_GCM_ICV16, 16), + PLUGIN_PROVIDE(AEAD, ENCR_AES_GCM_ICV8, 24), + PLUGIN_PROVIDE(AEAD, ENCR_AES_GCM_ICV12, 24), + PLUGIN_PROVIDE(AEAD, ENCR_AES_GCM_ICV16, 24), + PLUGIN_PROVIDE(AEAD, ENCR_AES_GCM_ICV8, 32), + PLUGIN_PROVIDE(AEAD, ENCR_AES_GCM_ICV12, 32), + PLUGIN_PROVIDE(AEAD, ENCR_AES_GCM_ICV16, 32), + PLUGIN_REGISTER(PRF, aesni_xcbc_prf_create), + PLUGIN_PROVIDE(PRF, PRF_AES128_XCBC), + PLUGIN_REGISTER(SIGNER, aesni_xcbc_signer_create), + PLUGIN_PROVIDE(SIGNER, AUTH_AES_XCBC_96), + PLUGIN_REGISTER(PRF, aesni_cmac_prf_create), + PLUGIN_PROVIDE(PRF, PRF_AES128_CMAC), + PLUGIN_REGISTER(SIGNER, aesni_cmac_signer_create), + PLUGIN_PROVIDE(SIGNER, AUTH_AES_CMAC_96), + }; + + *features = f; + if (cpu_feature_available(CPU_FEATURE_AESNI | CPU_FEATURE_PCLMULQDQ)) + { + return countof(f); + } + return 0; +} + +METHOD(plugin_t, destroy, void, + private_aesni_plugin_t *this) +{ + free(this); +} + +/* + * see header file + */ +plugin_t *aesni_plugin_create() +{ + private_aesni_plugin_t *this; + + INIT(this, + .public = { + .plugin = { + .get_name = _get_name, + .get_features = _get_features, + .reload = (void*)return_false, + .destroy = _destroy, + }, + }, + ); + + return &this->public.plugin; +} diff --git a/src/libstrongswan/plugins/aesni/aesni_plugin.h b/src/libstrongswan/plugins/aesni/aesni_plugin.h new file mode 100644 index 000000000..2b0c92c25 --- /dev/null +++ b/src/libstrongswan/plugins/aesni/aesni_plugin.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2015 Martin Willi + * Copyright (C) 2015 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +/** + * @defgroup aesni aesni + * @ingroup plugins + * + * @defgroup aesni_plugin aesni_plugin + * @{ @ingroup aesni + */ + +#ifndef AESNI_PLUGIN_H_ +#define AESNI_PLUGIN_H_ + +#include <plugins/plugin.h> + +typedef struct aesni_plugin_t aesni_plugin_t; + +/** + * Plugin providing crypto primitives based on Intel AES-NI instructions. + */ +struct aesni_plugin_t { + + /** + * implements plugin interface + */ + plugin_t plugin; +}; + +#endif /** AESNI_PLUGIN_H_ @}*/ diff --git a/src/libstrongswan/plugins/aesni/aesni_xcbc.c b/src/libstrongswan/plugins/aesni/aesni_xcbc.c new file mode 100644 index 000000000..24a75cec0 --- /dev/null +++ b/src/libstrongswan/plugins/aesni/aesni_xcbc.c @@ -0,0 +1,367 @@ +/* + * Copyright (C) 2008-2015 Martin Willi + * Copyright (C) 2012 Tobias Brunner + * Hochschule fuer Technik Rapperswil + * Copyright (C) 2015 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include "aesni_xcbc.h" +#include "aesni_key.h" + +#include <crypto/prfs/mac_prf.h> +#include <crypto/signers/mac_signer.h> + +typedef struct private_aesni_mac_t private_aesni_mac_t; + +/** + * Private data of a mac_t object. + */ +struct private_aesni_mac_t { + + /** + * Public mac_t interface. + */ + mac_t public; + + /** + * Key schedule for K1 + */ + aesni_key_t *k1; + + /** + * k2 + */ + __m128i k2; + + /** + * k3 + */ + __m128i k3; + + /** + * E + */ + __m128i e; + + /** + * remaining, unprocessed bytes in append mode + */ + u_char rem[AES_BLOCK_SIZE]; + + /** + * number of bytes used in remaining + */ + int rem_size; + + /** + * TRUE if we have zero bytes to xcbc in final() + */ + bool zero; +}; + +METHOD(mac_t, get_mac, bool, + private_aesni_mac_t *this, chunk_t data, u_int8_t *out) +{ + __m128i *ks, e, *bi; + u_int blocks, rem, i; + + if (!this->k1) + { + return FALSE; + } + + ks = this->k1->schedule; + + e = this->e; + + if (data.len) + { + this->zero = FALSE; + } + + if (this->rem_size + data.len > AES_BLOCK_SIZE) + { + /* (3) For each block M[i], where i = 1 ... n-1: + * XOR M[i] with E[i-1], then encrypt the result with Key K1, + * yielding E[i]. + */ + + /* append data to remaining bytes, process block M[1] */ + memcpy(this->rem + this->rem_size, data.ptr, + AES_BLOCK_SIZE - this->rem_size); + data = chunk_skip(data, AES_BLOCK_SIZE - this->rem_size); + + e = _mm_xor_si128(e, _mm_loadu_si128((__m128i*)this->rem)); + + e = _mm_xor_si128(e, ks[0]); + e = _mm_aesenc_si128(e, ks[1]); + e = _mm_aesenc_si128(e, ks[2]); + e = _mm_aesenc_si128(e, ks[3]); + e = _mm_aesenc_si128(e, ks[4]); + e = _mm_aesenc_si128(e, ks[5]); + e = _mm_aesenc_si128(e, ks[6]); + e = _mm_aesenc_si128(e, ks[7]); + e = _mm_aesenc_si128(e, ks[8]); + e = _mm_aesenc_si128(e, ks[9]); + e = _mm_aesenclast_si128(e, ks[10]); + + bi = (__m128i*)data.ptr; + rem = data.len % AES_BLOCK_SIZE; + blocks = data.len / AES_BLOCK_SIZE; + if (!rem && blocks) + { /* don't do last block */ + rem = AES_BLOCK_SIZE; + blocks--; + } + + /* process blocks M[2] ... M[n-1] */ + for (i = 0; i < blocks; i++) + { + e = _mm_xor_si128(e, _mm_loadu_si128(bi + i)); + + e = _mm_xor_si128(e, ks[0]); + e = _mm_aesenc_si128(e, ks[1]); + e = _mm_aesenc_si128(e, ks[2]); + e = _mm_aesenc_si128(e, ks[3]); + e = _mm_aesenc_si128(e, ks[4]); + e = _mm_aesenc_si128(e, ks[5]); + e = _mm_aesenc_si128(e, ks[6]); + e = _mm_aesenc_si128(e, ks[7]); + e = _mm_aesenc_si128(e, ks[8]); + e = _mm_aesenc_si128(e, ks[9]); + e = _mm_aesenclast_si128(e, ks[10]); + } + + /* store remaining bytes of block M[n] */ + memcpy(this->rem, data.ptr + data.len - rem, rem); + this->rem_size = rem; + } + else + { + /* no complete block, just copy into remaining */ + memcpy(this->rem + this->rem_size, data.ptr, data.len); + this->rem_size += data.len; + } + + if (out) + { + /* (4) For block M[n]: */ + if (this->rem_size == AES_BLOCK_SIZE && !this->zero) + { + /* a) If the blocksize of M[n] is 128 bits: + * XOR M[n] with E[n-1] and Key K2, then encrypt the result with + * Key K1, yielding E[n]. + */ + e = _mm_xor_si128(e, this->k2); + } + else + { + /* b) If the blocksize of M[n] is less than 128 bits: + * + * i) Pad M[n] with a single "1" bit, followed by the number of + * "0" bits (possibly none) required to increase M[n]'s + * blocksize to 128 bits. + */ + if (this->rem_size < AES_BLOCK_SIZE) + { + memset(this->rem + this->rem_size, 0, + AES_BLOCK_SIZE - this->rem_size); + this->rem[this->rem_size] = 0x80; + } + /* ii) XOR M[n] with E[n-1] and Key K3, then encrypt the result + * with Key K1, yielding E[n]. + */ + e = _mm_xor_si128(e, this->k3); + } + e = _mm_xor_si128(e, _mm_loadu_si128((__m128i*)this->rem)); + + e = _mm_xor_si128(e, ks[0]); + e = _mm_aesenc_si128(e, ks[1]); + e = _mm_aesenc_si128(e, ks[2]); + e = _mm_aesenc_si128(e, ks[3]); + e = _mm_aesenc_si128(e, ks[4]); + e = _mm_aesenc_si128(e, ks[5]); + e = _mm_aesenc_si128(e, ks[6]); + e = _mm_aesenc_si128(e, ks[7]); + e = _mm_aesenc_si128(e, ks[8]); + e = _mm_aesenc_si128(e, ks[9]); + e = _mm_aesenclast_si128(e, ks[10]); + _mm_storeu_si128((__m128i*)out, e); + + /* (2) Define E[0] = 0x00000000000000000000000000000000 */ + e = _mm_setzero_si128(); + this->rem_size = 0; + this->zero = TRUE; + } + this->e = e; + return TRUE; +} + +METHOD(mac_t, get_mac_size, size_t, + private_aesni_mac_t *this) +{ + return AES_BLOCK_SIZE; +} + +METHOD(mac_t, set_key, bool, + private_aesni_mac_t *this, chunk_t key) +{ + __m128i t1, t2, t3; + u_char k1[AES_BLOCK_SIZE]; + u_int round; + chunk_t k; + + /* reset state */ + this->e = _mm_setzero_si128(); + this->rem_size = 0; + this->zero = TRUE; + + /* Create RFC4434 variable keys if required */ + if (key.len == AES_BLOCK_SIZE) + { + k = key; + } + else if (key.len < AES_BLOCK_SIZE) + { /* pad short keys */ + k = chunk_alloca(AES_BLOCK_SIZE); + memset(k.ptr, 0, k.len); + memcpy(k.ptr, key.ptr, key.len); + } + else + { /* shorten key using XCBC */ + k = chunk_alloca(AES_BLOCK_SIZE); + memset(k.ptr, 0, k.len); + if (!set_key(this, k) || !get_mac(this, key, k.ptr)) + { + return FALSE; + } + } + + /* + * (1) Derive 3 128-bit keys (K1, K2 and K3) from the 128-bit secret + * key K, as follows: + * K1 = 0x01010101010101010101010101010101 encrypted with Key K + * K2 = 0x02020202020202020202020202020202 encrypted with Key K + * K3 = 0x03030303030303030303030303030303 encrypted with Key K + */ + + DESTROY_IF(this->k1); + this->k1 = aesni_key_create(TRUE, k); + if (!this->k1) + { + return FALSE; + } + + t1 = _mm_set1_epi8(0x01); + t2 = _mm_set1_epi8(0x02); + t3 = _mm_set1_epi8(0x03); + + t1 = _mm_xor_si128(t1, this->k1->schedule[0]); + t2 = _mm_xor_si128(t2, this->k1->schedule[0]); + t3 = _mm_xor_si128(t3, this->k1->schedule[0]); + + for (round = 1; round < this->k1->rounds; round++) + { + t1 = _mm_aesenc_si128(t1, this->k1->schedule[round]); + t2 = _mm_aesenc_si128(t2, this->k1->schedule[round]); + t3 = _mm_aesenc_si128(t3, this->k1->schedule[round]); + } + + t1 = _mm_aesenclast_si128(t1, this->k1->schedule[this->k1->rounds]); + t2 = _mm_aesenclast_si128(t2, this->k1->schedule[this->k1->rounds]); + t3 = _mm_aesenclast_si128(t3, this->k1->schedule[this->k1->rounds]); + + _mm_storeu_si128((__m128i*)k1, t1); + this->k2 = t2; + this->k3 = t3; + + this->k1->destroy(this->k1); + this->k1 = aesni_key_create(TRUE, chunk_from_thing(k1)); + + memwipe(k1, AES_BLOCK_SIZE); + return this->k1 != NULL; +} + +METHOD(mac_t, destroy, void, + private_aesni_mac_t *this) +{ + DESTROY_IF(this->k1); + memwipe(&this->k2, sizeof(this->k2)); + memwipe(&this->k3, sizeof(this->k3)); + free_align(this); +} + +/* + * Described in header + */ +mac_t *aesni_xcbc_create(encryption_algorithm_t algo, size_t key_size) +{ + private_aesni_mac_t *this; + + INIT_ALIGN(this, sizeof(__m128i), + .public = { + .get_mac = _get_mac, + .get_mac_size = _get_mac_size, + .set_key = _set_key, + .destroy = _destroy, + }, + ); + + return &this->public; +} + +/* + * Described in header. + */ +prf_t *aesni_xcbc_prf_create(pseudo_random_function_t algo) +{ + mac_t *xcbc; + + switch (algo) + { + case PRF_AES128_XCBC: + xcbc = aesni_xcbc_create(ENCR_AES_CBC, 16); + break; + default: + return NULL; + } + if (xcbc) + { + return mac_prf_create(xcbc); + } + return NULL; +} + +/* + * Described in header + */ +signer_t *aesni_xcbc_signer_create(integrity_algorithm_t algo) +{ + size_t trunc; + mac_t *xcbc; + + switch (algo) + { + case AUTH_AES_XCBC_96: + xcbc = aesni_xcbc_create(ENCR_AES_CBC, 16); + trunc = 12; + break; + default: + return NULL; + } + if (xcbc) + { + return mac_signer_create(xcbc, trunc); + } + return NULL; +} diff --git a/src/libstrongswan/plugins/aesni/aesni_xcbc.h b/src/libstrongswan/plugins/aesni/aesni_xcbc.h new file mode 100644 index 000000000..53f559feb --- /dev/null +++ b/src/libstrongswan/plugins/aesni/aesni_xcbc.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2015 Martin Willi + * Copyright (C) 2015 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +/** + * @defgroup aesni_xcbc aesni_xcbc + * @{ @ingroup aesni + */ + +#ifndef AESNI_XCBC_H_ +#define AESNI_XCBC_H_ + +#include <crypto/mac.h> +#include <crypto/prfs/prf.h> +#include <crypto/signers/signer.h> + +/** + * Create a generic mac_t object using AESNI XCBC + * + * @param algo underlying encryption algorithm + * @param key_size size of encryption key, in bytes + */ +mac_t *aesni_xcbc_create(encryption_algorithm_t algo, size_t key_size); + +/** + * Creates a new prf_t object based AESNI XCBC. + * + * @param algo algorithm to implement + * @return prf_t object, NULL if not supported + */ +prf_t *aesni_xcbc_prf_create(pseudo_random_function_t algo); + +/** + * Creates a new signer_t object based on AESNI XCBC. + * + * @param algo algorithm to implement + * @return signer_t, NULL if not supported + */ +signer_t *aesni_xcbc_signer_create(integrity_algorithm_t algo); + +#endif /** AESNI_XCBC_H_ @}*/ |