pax_global_header00006660000000000000000000000064127155631220014516gustar00rootroot0000000000000052 comment=ba9de77abd2155e58e1c74910b06e39bf41f645e libntru-0.5/000077500000000000000000000000001271556312200130415ustar00rootroot00000000000000libntru-0.5/.gitignore000066400000000000000000000003031271556312200150250ustar00rootroot00000000000000# Object files *.o # Compiler-generated assembly files src/sha1-mb-x86_64.s src/sha256-mb-x86_64.s # Libraries *.lib # Shared objects (inc. Windows DLLs) *.dll *.so # Executables *.exe *.out libntru-0.5/LICENSE000066400000000000000000000034611271556312200140520ustar00rootroot00000000000000Copyright (c) 2012, Tim Buktu Copyright (c) 2016, Shay Gueron and Fabian Schlieker Copyright (c) 2006, CRYPTOGAMS by All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain copyright notices, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the CRYPTOGAMS nor the names of its copyright holder and contributors may be used to endorse or promote products derived from this software without specific prior written permission. ALTERNATIVELY, provided that this notice is retained in full, this product may be distributed under the terms of the GNU General Public License (GPL), in which case the provisions of the GPL apply INSTEAD OF those given above. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. libntru-0.5/Makefile000066400000000000000000000005101271556312200144750ustar00rootroot00000000000000ifeq ($(OS), Windows_NT) include Makefile.win else ifeq ($(shell uname), Darwin) include Makefile.osx else ifeq ($(shell uname), OS/2) include Makefile.os2 else ifeq ($(shell uname), FreeBSD) include Makefile.bsd else ifeq ($(shell uname), OpenBSD) include Makefile.bsd else include Makefile.linux endif libntru-0.5/Makefile.bsd000066400000000000000000000124611271556312200152540ustar00rootroot00000000000000CC?=cc AS=$(CC) -c AR?=ar CFLAGS?=-g CFLAGS+=-Wall -Wextra -Wno-unused-parameter SSSE3_FLAG = $(shell /usr/bin/grep -o SSSE3 /var/run/dmesg.boot | /usr/bin/head -1) ifneq ($(SSE), no) ifeq ($(SSSE3_FLAG), SSSE3) SSE=yes endif endif AVX2_FLAG = $(shell /usr/bin/grep -o AVX2 /var/run/dmesg.boot | /usr/bin/head -1) ifneq ($(AVX2), no) ifeq ($(AVX2_FLAG), AVX2) AVX2=yes endif endif ifeq ($(AVX2), yes) SSE=yes endif ifeq ($(SSE), no) AVX2=no endif ifeq ($(SSE), yes) CFLAGS+=-mssse3 endif ifeq ($(AVX2), yes) CFLAGS+=-mavx2 endif # use -march=native if we're compiling for x86 BENCH_ARCH_OPTION= MACHINE=$(shell uname -m | sed 's/i.86/i386/g') ifeq ($(SSE), yes) ifeq ($(MACHINE), i386) BENCH_ARCH_OPTION=-march=native endif ifeq ($(MACHINE), amd64) BENCH_ARCH_OPTION=-march=native endif endif OPTFLAGS=-O2 bench: OPTFLAGS=-O3 $(BENCH_ARCH_OPTION) CFLAGS+=$(OPTFLAGS) ifneq ($(shell uname), OpenBSD) LIBS+=-lrt endif SRCDIR=src TESTDIR=tests LIB_OBJS=bitstring.o encparams.o hash.o idxgen.o key.o mgf.o ntru.o poly.o rand.o arith.o sha1.o sha2.o nist_ctr_drbg.o rijndael.o ifeq ($(SSE), yes) ifeq ($(MACHINE), amd64) LIB_OBJS+=sha1-mb-x86_64.o sha256-mb-x86_64.o endif endif TEST_OBJS=test_bitstring.o test_hash.o test_idxgen.o test_key.o test_ntru.o test.o test_poly.o test_util.o VERSION=0.5 INST_PFX=/usr INST_LIBDIR=$(INST_PFX)/lib INST_INCLUDE=$(INST_PFX)/include/libntru INST_DOCDIR=$(INST_PFX)/share/doc/libntru-$(VERSION) INST_HEADERS=ntru.h types.h key.h encparams.h hash.h rand.h err.h PERL=/usr/bin/perl PERLASM_SCHEME=elf LIB_OBJS_PATHS=$(patsubst %,$(SRCDIR)/%,$(LIB_OBJS)) TEST_OBJS_PATHS=$(patsubst %,$(TESTDIR)/%,$(TEST_OBJS)) DIST_NAME=libntru-$(VERSION) MAKEFILENAME=$(lastword $(MAKEFILE_LIST)) .PHONY: all lib install uninstall dist test clean distclean all: lib lib: libntru.so static-lib: libntru.a libntru.so: $(LIB_OBJS_PATHS) $(CC) $(CFLAGS) $(CPPFLAGS) -shared -Wl,-soname,libntru.so -o libntru.so $(LIB_OBJS_PATHS) $(LDFLAGS) $(LIBS) libntru.a: $(LIB_OBJS_PATHS) $(AR) cru libntru.a $(LIB_OBJS_PATHS) install: install-lib install-doc install-headers install-lib: lib test -d "$(DESTDIR)$(INST_LIBDIR)" || mkdir -p "$(DESTDIR)$(INST_LIBDIR)" install -m 0755 libntru.so "$(DESTDIR)$(INST_LIBDIR)/libntru.so" install-static-lib: static-lib test -d "$(DESTDIR)$(INST_LIBDIR)" || mkdir -p "$(DESTDIR)$(INST_LIBDIR)" install -m 0755 libntru.a "$(DESTDIR)$(INST_LIBDIR)/libntru.a" install-doc: test -d "$(DESTDIR)$(INST_DOCDIR)" || mkdir -p "$(DESTDIR)$(INST_DOCDIR)" install -m 0644 README.md "$(DESTDIR)$(INST_DOCDIR)/README.md" install-headers: test -d "$(DESTDIR)$(INST_INCLUDE)" || mkdir -p "$(DESTDIR)$(INST_INCLUDE)" for header in $(INST_HEADERS) ; do \ install -m 0644 "$(SRCDIR)/$$header" "$(DESTDIR)$(INST_INCLUDE)/" ; \ done uninstall: uninstall-lib uninstall-doc uninstall-headers uninstall-lib: rm -f "$(DESTDIR)$(INST_LIBDIR)/libntru.so" uninstall-static-lib: rm -f "$(DESTDIR)$(INST_LIBDIR)/libntru.a" uninstall-doc: rm -f "$(DESTDIR)$(INST_DOCDIR)/README.md" rmdir "$(DESTDIR)$(INST_DOCDIR)/" uninstall-headers: for header in $(INST_HEADERS) ; do \ rm "$(DESTDIR)$(INST_INCLUDE)/$$header" ; \ done rmdir "$(DESTDIR)$(INST_INCLUDE)/" dist: rm -rf $(DIST_NAME) mkdir $(DIST_NAME) mkdir $(DIST_NAME)/$(SRCDIR) mkdir $(DIST_NAME)/$(TESTDIR) cp Makefile Makefile.win Makefile.osx README.md LICENSE PATENTS $(DIST_NAME) cp $(SRCDIR)/*.c $(DIST_NAME)/$(SRCDIR) cp $(SRCDIR)/*.h $(DIST_NAME)/$(SRCDIR) cp $(TESTDIR)/*.c $(DIST_NAME)/$(TESTDIR) cp $(TESTDIR)/*.h $(DIST_NAME)/$(TESTDIR) tar cf $(DIST_NAME).tar.xz $(DIST_NAME) --lzma rm -rf $(DIST_NAME) test: $(MAKE) -f $(MAKEFILENAME) testnoham @echo @echo Testing patent-reduced build LD_LIBRARY_PATH=. ./testnoham $(MAKE) -f $(MAKEFILENAME) testham @echo @echo Testing full build LD_LIBRARY_PATH=. ./testham testham: clean lib $(TEST_OBJS_PATHS) @echo CFLAGS=$(CFLAGS) $(CC) $(CFLAGS) -o testham $(TEST_OBJS_PATHS) -L. -lntru -lm testnoham: CFLAGS += -DNTRU_AVOID_HAMMING_WT_PATENT testnoham: clean lib $(TEST_OBJS_PATHS) @echo CFLAGS=$(CFLAGS) $(CC) $(CFLAGS) -o testnoham $(TEST_OBJS_PATHS) -L. -lntru -lm bench: static-lib $(CC) $(CFLAGS) $(CPPFLAGS) -o bench $(SRCDIR)/bench.c $(LDFLAGS) $(LIBS) -L. -lntru hybrid: static-lib $(CC) $(CFLAGS) $(CPPFLAGS) -o hybrid $(SRCDIR)/hybrid.c $(LDFLAGS) $(LIBS) -L. -lntru -lcrypto $(SRCDIR)/%.o: $(SRCDIR)/%.c $(CC) $(CFLAGS) $(CPPFLAGS) -c -fPIC $< -o $@ $(SRCDIR)/sha1-mb-x86_64.s: $(SRCDIR)/sha1-mb-x86_64.pl; CC=$(CC) ASM="$(AS)" $(PERL) $(SRCDIR)/sha1-mb-x86_64.pl $(PERLASM_SCHEME) > $@ $(SRCDIR)/sha1-mb-x86_64.o: $(SRCDIR)/sha1-mb-x86_64.s $(AS) $(SRCDIR)/sha1-mb-x86_64.s -o $@ $(SRCDIR)/sha256-mb-x86_64.s: $(SRCDIR)/sha256-mb-x86_64.pl; CC=$(CC) ASM="$(AS)" $(PERL) $(SRCDIR)/sha256-mb-x86_64.pl $(PERLASM_SCHEME) > $@ $(SRCDIR)/sha256-mb-x86_64.o: $(SRCDIR)/sha256-mb-x86_64.s $(AS) $(SRCDIR)/sha256-mb-x86_64.s -o $@ tests/%.o: tests/%.c $(CC) $(CFLAGS) $(CPPFLAGS) -fPIC -I$(SRCDIR) -c $< -o $@ clean: @# also clean files generated on other OSes rm -f $(SRCDIR)/*.o $(SRCDIR)/*.s $(TESTDIR)/*.o libntru.so libntru.a libntru.dylib libntru.dll testham testnoham testham.exe testnoham.exe bench bench.exe hybrid hybrid.exe distclean: clean rm -rf $(DIST_NAME) rm -f $(DIST_NAME).tar.xz $(DIST_NAME).zip libntru-0.5/Makefile.linux000066400000000000000000000123401271556312200156370ustar00rootroot00000000000000CC?=gcc AS=$(CC) -c AR?=ar CFLAGS?=-g CFLAGS+=-Wall -Wextra -Wno-unused-parameter SSSE3_FLAG = $(shell /bin/grep -m 1 -o ssse3 /proc/cpuinfo) ifneq ($(SSE), no) ifeq ($(SSSE3_FLAG), ssse3) SSE=yes endif endif AVX2_FLAG = $(shell /bin/grep -m 1 -o avx2 /proc/cpuinfo) ifneq ($(AVX2), no) ifeq ($(AVX2_FLAG), avx2) AVX2=yes endif endif ifeq ($(AVX2), yes) SSE=yes endif ifeq ($(SSE), no) AVX2=no endif ifeq ($(SSE), yes) CFLAGS+=-mssse3 endif ifeq ($(AVX2), yes) CFLAGS+=-mavx2 endif # use -march=native if we're compiling for x86 BENCH_ARCH_OPTION= MACHINE=$(shell uname -m | sed 's/i.86/i386/g') ifeq ($(SSE), yes) ifeq ($(MACHINE), i386) BENCH_ARCH_OPTION=-march=native endif ifeq ($(MACHINE), x86_64) BENCH_ARCH_OPTION=-march=native endif endif OPTFLAGS=-O2 bench: OPTFLAGS=-O3 $(BENCH_ARCH_OPTION) CFLAGS+=$(OPTFLAGS) LIBS+=-lrt SRCDIR=src TESTDIR=tests LIB_OBJS=bitstring.o encparams.o hash.o idxgen.o key.o mgf.o ntru.o poly.o rand.o arith.o sha1.o sha2.o nist_ctr_drbg.o rijndael.o ifeq ($(SSE), yes) ifeq ($(MACHINE), x86_64) LIB_OBJS+=sha1-mb-x86_64.o sha256-mb-x86_64.o endif endif TEST_OBJS=test_bitstring.o test_hash.o test_idxgen.o test_key.o test_ntru.o test.o test_poly.o test_util.o VERSION=0.5 INST_PFX=/usr INST_LIBDIR=$(INST_PFX)/lib INST_INCLUDE=$(INST_PFX)/include/libntru INST_DOCDIR=$(INST_PFX)/share/doc/libntru-$(VERSION) INST_HEADERS=ntru.h types.h key.h encparams.h hash.h rand.h err.h PERL=/usr/bin/perl PERLASM_SCHEME=elf LIB_OBJS_PATHS=$(patsubst %,$(SRCDIR)/%,$(LIB_OBJS)) TEST_OBJS_PATHS=$(patsubst %,$(TESTDIR)/%,$(TEST_OBJS)) DIST_NAME=libntru-$(VERSION) MAKEFILENAME=$(lastword $(MAKEFILE_LIST)) .PHONY: all lib install uninstall dist test clean distclean all: lib lib: libntru.so static-lib: libntru.a libntru.so: $(LIB_OBJS_PATHS) $(CC) $(CFLAGS) $(CPPFLAGS) -shared -Wl,-soname,libntru.so -o libntru.so $(LIB_OBJS_PATHS) $(LDFLAGS) $(LIBS) libntru.a: $(LIB_OBJS_PATHS) $(AR) cru libntru.a $(LIB_OBJS_PATHS) install: install-lib install-doc install-headers install-lib: lib test -d "$(DESTDIR)$(INST_LIBDIR)" || mkdir -p "$(DESTDIR)$(INST_LIBDIR)" install -m 0755 libntru.so "$(DESTDIR)$(INST_LIBDIR)/libntru.so" install-static-lib: static-lib test -d "$(DESTDIR)$(INST_LIBDIR)" || mkdir -p "$(DESTDIR)$(INST_LIBDIR)" install -m 0755 libntru.a "$(DESTDIR)$(INST_LIBDIR)/libntru.a" install-doc: test -d "$(DESTDIR)$(INST_DOCDIR)" || mkdir -p "$(DESTDIR)$(INST_DOCDIR)" install -m 0644 README.md "$(DESTDIR)$(INST_DOCDIR)/README.md" install-headers: test -d "$(DESTDIR)$(INST_INCLUDE)" || mkdir -p "$(DESTDIR)$(INST_INCLUDE)" for header in $(INST_HEADERS) ; do \ install -m 0644 "$(SRCDIR)/$$header" "$(DESTDIR)$(INST_INCLUDE)/" ; \ done uninstall: uninstall-lib uninstall-doc uninstall-headers uninstall-lib: rm -f "$(DESTDIR)$(INST_LIBDIR)/libntru.so" uninstall-static-lib: rm -f "$(DESTDIR)$(INST_LIBDIR)/libntru.a" uninstall-doc: rm -f "$(DESTDIR)$(INST_DOCDIR)/README.md" rmdir "$(DESTDIR)$(INST_DOCDIR)/" uninstall-headers: for header in $(INST_HEADERS) ; do \ rm "$(DESTDIR)$(INST_INCLUDE)/$$header" ; \ done rmdir "$(DESTDIR)$(INST_INCLUDE)/" dist: rm -rf $(DIST_NAME) mkdir $(DIST_NAME) mkdir $(DIST_NAME)/$(SRCDIR) mkdir $(DIST_NAME)/$(TESTDIR) cp Makefile Makefile.win Makefile.osx README.md LICENSE PATENTS $(DIST_NAME) cp $(SRCDIR)/*.c $(DIST_NAME)/$(SRCDIR) cp $(SRCDIR)/*.h $(DIST_NAME)/$(SRCDIR) cp $(TESTDIR)/*.c $(DIST_NAME)/$(TESTDIR) cp $(TESTDIR)/*.h $(DIST_NAME)/$(TESTDIR) tar cf $(DIST_NAME).tar.xz $(DIST_NAME) --lzma rm -rf $(DIST_NAME) test: $(MAKE) -f $(MAKEFILENAME) testnoham @echo @echo Testing patent-reduced build LD_LIBRARY_PATH=. ./testnoham $(MAKE) -f $(MAKEFILENAME) testham @echo @echo Testing full build LD_LIBRARY_PATH=. ./testham testham: clean lib $(TEST_OBJS_PATHS) @echo CFLAGS=$(CFLAGS) $(CC) $(CFLAGS) -o testham $(TEST_OBJS_PATHS) -L. -lntru -lm testnoham: CFLAGS += -DNTRU_AVOID_HAMMING_WT_PATENT testnoham: clean lib $(TEST_OBJS_PATHS) @echo CFLAGS=$(CFLAGS) $(CC) $(CFLAGS) -o testnoham $(TEST_OBJS_PATHS) -L. -lntru -lm bench: static-lib $(CC) $(CFLAGS) $(CPPFLAGS) -o bench $(SRCDIR)/bench.c $(LDFLAGS) $(LIBS) -L. -lntru hybrid: static-lib $(CC) $(CFLAGS) $(CPPFLAGS) -o hybrid $(SRCDIR)/hybrid.c $(LDFLAGS) $(LIBS) -L. -lntru -lcrypto $(SRCDIR)/%.o: $(SRCDIR)/%.c $(CC) $(CFLAGS) $(CPPFLAGS) -c -fPIC $< -o $@ $(SRCDIR)/sha1-mb-x86_64.s: $(SRCDIR)/sha1-mb-x86_64.pl; CC=$(CC) ASM="$(AS)" $(PERL) $(SRCDIR)/sha1-mb-x86_64.pl $(PERLASM_SCHEME) > $@ $(SRCDIR)/sha1-mb-x86_64.o: $(SRCDIR)/sha1-mb-x86_64.s $(AS) $(SRCDIR)/sha1-mb-x86_64.s -o $@ $(SRCDIR)/sha256-mb-x86_64.s: $(SRCDIR)/sha256-mb-x86_64.pl; CC=$(CC) ASM="$(AS)" $(PERL) $(SRCDIR)/sha256-mb-x86_64.pl $(PERLASM_SCHEME) > $@ $(SRCDIR)/sha256-mb-x86_64.o: $(SRCDIR)/sha256-mb-x86_64.s $(AS) $(SRCDIR)/sha256-mb-x86_64.s -o $@ tests/%.o: tests/%.c $(CC) $(CFLAGS) $(CPPFLAGS) -fPIC -I$(SRCDIR) -c $< -o $@ clean: @# also clean files generated on other OSes rm -f $(SRCDIR)/*.o $(SRCDIR)/*.s $(TESTDIR)/*.o libntru.so libntru.a libntru.dylib libntru.dll testham testnoham testham.exe testnoham.exe bench bench.exe hybrid hybrid.exe distclean: clean rm -rf $(DIST_NAME) rm -f $(DIST_NAME).tar.xz $(DIST_NAME).zip libntru-0.5/Makefile.os2000066400000000000000000000104151271556312200152040ustar00rootroot00000000000000CC?=gcc AS=$(CC) -c OPTFLAGS=-O2 bench: OPTFLAGS=-O3 -march=native CFLAGS?=-g $(OPTFLAGS) CFLAGS+=-Wall -Wextra -Wno-unused-parameter ifeq ($(AVX2), yes) CFLAGS+=-mavx2 SSE=yes endif ifeq ($(SSE), yes) CFLAGS+=-mssse3 endif LIBS+=-lrt SRCDIR=src TESTDIR=tests LIB_OBJS=bitstring.o encparams.o hash.o idxgen.o key.o mgf.o ntru.o poly.o rand.o arith.o sha1.o sha2.o nist_ctr_drbg.o rijndael.o ifeq ($(SSE), yes) LIB_OBJS+=sha1-mb-x86_64.o sha256-mb-x86_64.o endif TEST_OBJS=test_bitstring.o test_hash.o test_idxgen.o test_key.o test_ntru.o test.o test_poly.o test_util.o VERSION=0.5 INST_PFX=%PROGRAMFILES% INST_LIBDIR=$(INST_PFX)\libntru INST_INCLUDE=$(INST_PFX)\libntru\include INST_DOCDIR=$(INST_PFX)\libntru INST_HEADERS=ntru.h types.h key.h encparams.h hash.h rand.h err.h PERL=c:\mingw\msys\1.0\bin\perl PERLASM_SCHEME=coff LIB_OBJS_PATHS=$(patsubst %,$(SRCDIR)/%,$(LIB_OBJS)) TEST_OBJS_PATHS=$(patsubst %,$(TESTDIR)/%,$(TEST_OBJS)) DIST_NAME=libntru-$(VERSION) MAKEFILENAME=$(lastword $(MAKEFILE_LIST)) .PHONY: all lib install dist test clean distclean all: lib lib: $(LIB_OBJS_PATHS) $(CC) $(CFLAGS) $(CPPFLAGS) -shared -o libntru.dll $(LIB_OBJS_PATHS) install: lib mkdir "$(DESTDIR)$(INST_PFX)" mkdir "$(DESTDIR)$(INST_LIBDIR)" mkdir "$(DESTDIR)$(INST_INCLUDE)" mkdir "$(DESTDIR)$(INST_DOCDIR)" copy libntru.dll "$(DESTDIR)$(INST_LIBDIR)" copy README.md "$(DESTDIR)$(INST_DOCDIR)" for %%h in ($(INST_HEADERS)) do \ copy $(SRCDIR)\%%h "$(INST_INCLUDE)" uninstall: rm -f "$(DESTDIR)$(INST_LIBDIR)\libntru.dll" rm -f "$(DESTDIR)$(INST_DOCDIR)\README.md" for %%h in ($(DESTDIR)$(INST_HEADERS)) do \ rm -f "$(DESTDIR)$(INST_INCLUDE)\%%h" rmdir "$(DESTDIR)$(INST_INCLUDE)" rmdir "$(DESTDIR)$(INST_LIBDIR)" rmdir "$(DESTDIR)$(INST_DOCDIR)" dist: rm -f $(DIST_NAME)\$(SRCDIR)\*.c rm -f $(DIST_NAME)\$(SRCDIR)\*.h rmdir $(DIST_NAME)\$(SRCDIR) rm -f $(DIST_NAME)\$(TESTDIR)\*.c rm -f $(DIST_NAME)\$(TESTDIR)\*.h rmdir $(DIST_NAME)\$(TESTDIR) rm -f /q $(DIST_NAME)\*.* rmdir $(DIST_NAME) mkdir $(DIST_NAME) mkdir $(DIST_NAME)\$(SRCDIR) mkdir $(DIST_NAME)\$(TESTDIR) copy Makefile $(DIST_NAME) copy Makefile.win $(DIST_NAME) copy Makefile.osx $(DIST_NAME) copy README.md $(DIST_NAME) copy LICENSE $(DIST_NAME) copy PATENTS $(DIST_NAME) copy $(SRCDIR)\*.c $(DIST_NAME)\$(SRCDIR) copy $(SRCDIR)\*.h $(DIST_NAME)\$(SRCDIR) copy $(TESTDIR)\*.c $(DIST_NAME)\$(TESTDIR) copy $(TESTDIR)\*.h $(DIST_NAME)\$(TESTDIR) test: $(MAKE) -f $(MAKEFILENAME) testnoham @echo. @echo Testing patent-reduced build testnoham.exe $(MAKE) -f $(MAKEFILENAME) testham @echo. @echo Testing full build testham.exe testham: clean lib $(TEST_OBJS_PATHS) @echo CFLAGS=$(CFLAGS) $(CC) $(CFLAGS) -o testham.exe $(TEST_OBJS_PATHS) -L. -lntru -lm testnoham: CFLAGS += -DNTRU_AVOID_HAMMING_WT_PATENT testnoham: clean lib $(TEST_OBJS_PATHS) @echo CFLAGS=$(CFLAGS) $(CC) $(CFLAGS) -o testnoham.exe $(TEST_OBJS_PATHS) -L. -lntru -lm bench: lib $(CC) $(CFLAGS) $(CPPFLAGS) -o bench $(SRCDIR)/bench.c -L. -lntru hybrid: lib $(CC) $(CFLAGS) $(CPPFLAGS) -o hybrid $(SRCDIR)/hybrid.c $(LDFLAGS) -L. -lntru -lcrypto -lgdi32 $(SRCDIR)/%.o: $(SRCDIR)/%.c $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ $(SRCDIR)/sha1-mb-x86_64.s: $(SRCDIR)/sha1-mb-x86_64.pl; CC=$(CC) ASM="$(AS)" $(PERL) $(SRCDIR)/sha1-mb-x86_64.pl $(PERLASM_SCHEME) > $@ $(SRCDIR)/sha1-mb-x86_64.o: $(SRCDIR)/sha1-mb-x86_64.s $(AS) $(SRCDIR)/sha1-mb-x86_64.s -o $@ $(SRCDIR)/sha256-mb-x86_64.s: $(SRCDIR)/sha256-mb-x86_64.pl; CC=$(CC) ASM="$(AS)" $(PERL) $(SRCDIR)/sha256-mb-x86_64.pl $(PERLASM_SCHEME) > $@ $(SRCDIR)/sha256-mb-x86_64.o: $(SRCDIR)/sha256-mb-x86_64.s $(AS) $(SRCDIR)/sha256-mb-x86_64.s -o $@ $(TESTDIR)/%.o: tests/%.c $(CC) $(CFLAGS) $(CPPFLAGS) -I$(SRCDIR) -c $< -o $@ clean: rm -f $(SRCDIR)\*.o rm -f $(SRCDIR)\*.s rm -f $(TESTDIR)\*.o rm -f libntru.dll rm -f testham.exe rm -f testnoham.exe rm -f bench.exe rm -f libntru.so rm -f libntru.dylib rm -f testham rm -f testnoham rm -f bench rm -f hybrid rm -f hybrid.exe distclean: clean rm -f $(DIST_NAME)\$(SRCDIR)\*.c rm -f $(DIST_NAME)\$(SRCDIR)\*.h rmdir $(DIST_NAME)\$(SRCDIR) rm -f $(DIST_NAME)\$(TESTDIR)\*.c rm -f $(DIST_NAME)\$(TESTDIR)\*.h rmdir $(DIST_NAME)\$(TESTDIR) rm -f $(DIST_NAME)\*.* rmdir $(DIST_NAME) rm -f $(DIST_NAME).zip rm -f $(DIST_NAME).tar.xz libntru-0.5/Makefile.osx000066400000000000000000000106441271556312200153160ustar00rootroot00000000000000CC?=gcc AS=$(CC) -c OPTFLAGS=-O2 bench: OPTFLAGS=-O3 CFLAGS=-g -Wall -Wextra -Wno-unused-parameter $(OPTFLAGS) SSSE3_FLAG = $(shell /usr/sbin/sysctl machdep.cpu.features | grep -m 1 -ow SSSE3) ifneq ($(SSE), no) ifeq ($(SSSE3_FLAG), SSSE3) SSE=yes endif endif AVX2_FLAG = $(shell /usr/sbin/sysctl machdep.cpu.features | grep -m 1 -ow AVX2) ifneq ($(AVX2), no) ifeq ($(AVX2_FLAG), AVX2) AVX2=yes endif endif ifeq ($(AVX2), yes) SSE=yes endif ifeq ($(SSE), no) AVX2=no endif ifeq ($(SSE), yes) CFLAGS+=-mssse3 endif ifeq ($(SSE), no) CFLAGS+=-march=x86-64 endif ifeq ($(AVX2), yes) CFLAGS+=-mavx2 endif SRCDIR=src TESTDIR=tests LIB_OBJS=bitstring.o encparams.o hash.o idxgen.o key.o mgf.o ntru.o poly.o rand.o arith.o sha1.o sha2.o nist_ctr_drbg.o rijndael.o ifeq ($(SSE), yes) LIB_OBJS+=sha1-mb-x86_64.o sha256-mb-x86_64.o endif TEST_OBJS=test_bitstring.o test_hash.o test_idxgen.o test_key.o test_ntru.o test.o test_poly.o test_util.o VERSION=0.5 INST_PFX=/usr INST_LIBDIR=$(INST_PFX)/lib INST_INCLUDE=$(INST_PFX)/include/libntru INST_DOCDIR=$(INST_PFX)/share/doc/libntru INST_HEADERS=ntru.h types.h key.h encparams.h hash.h rand.h err.h PERL=/usr/bin/perl PERLASM_SCHEME=macosx LIB_OBJS_PATHS=$(patsubst %,$(SRCDIR)/%,$(LIB_OBJS)) TEST_OBJS_PATHS=$(patsubst %,$(TESTDIR)/%,$(TEST_OBJS)) DIST_NAME=libntru-$(VERSION) MAKEFILENAME=$(lastword $(MAKEFILE_LIST)) .PHONY: all lib install uninstall dist test clean distclean all: lib lib: $(LIB_OBJS_PATHS) $(CC) $(CFLAGS) $(CPPFLAGS) -dynamiclib -o libntru.dylib $(LIB_OBJS_PATHS) $(LDFLAGS) $(LIBS) install: lib test -d "$(DESTDIR)$(INST_PFX)" || mkdir -p "$(DESTDIR)$(INST_PFX)" test -d "$(DESTDIR)$(INST_LIBDIR)" || mkdir "$(DESTDIR)$(INST_LIBDIR)" test -d "$(DESTDIR)$(INST_INCLUDE)" || mkdir -p "$(DESTDIR)$(INST_INCLUDE)" test -d "$(DESTDIR)$(INST_DOCDIR)" || mkdir -p "$(DESTDIR)$(INST_DOCDIR)" install -m 0755 libntru.so "$(DESTDIR)$(INST_LIBDIR)/libntru.so" install -m 0644 README.md "$(DESTDIR)$(INST_DOCDIR)/README.md" for header in $(INST_HEADERS) ; do \ install -m 0644 "$(SRCDIR)/$$header" "$(DESTDIR)$(INST_INCLUDE)/" ; \ done uninstall: rm -f "$(DESTDIR)$(INST_LIBDIR)/libntru.so" rm -f "$(DESTDIR)$(INST_DOCDIR)/README.md" rmdir "$(DESTDIR)$(INST_DOCDIR)/" for header in $(INST_HEADERS) ; do \ rm "$(DESTDIR)$(INST_INCLUDE)/$$header" ; \ done rmdir "$(DESTDIR)$(INST_INCLUDE)/" dist: rm -rf $(DIST_NAME) mkdir $(DIST_NAME) mkdir $(DIST_NAME)/$(SRCDIR) mkdir $(DIST_NAME)/$(TESTDIR) cp Makefile Makefile.win Makefile.osx README.md LICENSE PATENTS $(DIST_NAME) cp $(SRCDIR)/*.c $(DIST_NAME)/$(SRCDIR) cp $(SRCDIR)/*.h $(DIST_NAME)/$(SRCDIR) cp $(TESTDIR)/*.c $(DIST_NAME)/$(TESTDIR) cp $(TESTDIR)/*.h $(DIST_NAME)/$(TESTDIR) tar cf $(DIST_NAME).tar.xz $(DIST_NAME) --lzma rm -rf $(DIST_NAME) test: $(MAKE) -f $(MAKEFILENAME) testnoham @echo @echo Testing patent-reduced build DYLD_LIBRARY_PATH=. ./testnoham $(MAKE) -f $(MAKEFILENAME) testham @echo @echo Testing full build DYLD_LIBRARY_PATH=. ./testham testham: clean lib $(TEST_OBJS_PATHS) @echo CFLAGS=$(CFLAGS) $(CC) $(CFLAGS) -o testham $(TEST_OBJS_PATHS) -L. -lntru -lm testnoham: CFLAGS += -DNTRU_AVOID_HAMMING_WT_PATENT testnoham: clean lib $(TEST_OBJS_PATHS) @echo CFLAGS=$(CFLAGS) $(CC) $(CFLAGS) -o testnoham $(TEST_OBJS_PATHS) -L. -lntru -lm bench: lib $(CC) $(CFLAGS) $(CPPFLAGS) -o bench $(SRCDIR)/bench.c $(LDFLAGS) -L. -lntru hybrid: lib $(CC) $(CFLAGS) $(CPPFLAGS) -o hybrid $(SRCDIR)/hybrid.c $(LDFLAGS) -L. -lntru -lcrypto $(SRCDIR)/%.o: $(SRCDIR)/%.c $(CC) $(CFLAGS) $(CPPFLAGS) -c -fPIC $< -o $@ $(SRCDIR)/sha1-mb-x86_64.s: $(SRCDIR)/sha1-mb-x86_64.pl; CC=$(CC) ASM="$(AS)" $(PERL) $(SRCDIR)/sha1-mb-x86_64.pl $(PERLASM_SCHEME) > $@ $(SRCDIR)/sha1-mb-x86_64.o: $(SRCDIR)/sha1-mb-x86_64.s $(AS) $(SRCDIR)/sha1-mb-x86_64.s -o $@ $(SRCDIR)/sha256-mb-x86_64.s: $(SRCDIR)/sha256-mb-x86_64.pl; CC=$(CC) ASM="$(AS)" $(PERL) $(SRCDIR)/sha256-mb-x86_64.pl $(PERLASM_SCHEME) > $@ $(SRCDIR)/sha256-mb-x86_64.o: $(SRCDIR)/sha256-mb-x86_64.s $(AS) $(SRCDIR)/sha256-mb-x86_64.s -o $@ tests/%.o: tests/%.c $(CC) $(CFLAGS) $(CPPFLAGS) -fPIC -I$(SRCDIR) -c $< -o $@ clean: @# also clean files generated on other OSes rm -f $(SRCDIR)/*.o $(SRCDIR)/*.s $(TESTDIR)/*.o libntru.so libntru.a libntru.dylib libntru.dll testham testnoham testham.exe testnoham.exe bench bench.exe hybrid hybrid.exe distclean: clean rm -rf $(DIST_NAME) rm -f $(DIST_NAME).tar.xz $(DIST_NAME).zip libntru-0.5/Makefile.win000066400000000000000000000133121271556312200152750ustar00rootroot00000000000000CC?=gcc AS=$(CC) -c OPTFLAGS=-O2 bench: OPTFLAGS=-O3 -march=native CFLAGS?=-g $(OPTFLAGS) CFLAGS+=-Wall -Wextra -Wno-unused-parameter ifeq ($(AVX2), yes) CFLAGS+=-mavx2 SSE=yes endif ifeq ($(SSE), yes) CFLAGS+=-mssse3 endif SRCDIR=src TESTDIR=tests LIB_OBJS=bitstring.o encparams.o hash.o idxgen.o key.o mgf.o ntru.o poly.o rand.o arith.o sha1.o sha2.o nist_ctr_drbg.o rijndael.o ifeq ($(SSE), yes) LIB_OBJS+=sha1-mb-x86_64.o sha256-mb-x86_64.o endif TEST_OBJS=test_bitstring.o test_hash.o test_idxgen.o test_key.o test_ntru.o test.o test_poly.o test_util.o VERSION=0.5 INST_PFX=%PROGRAMFILES% INST_LIBDIR=$(INST_PFX)\libntru INST_INCLUDE=$(INST_PFX)\libntru\include INST_DOCDIR=$(INST_PFX)\libntru INST_HEADERS=ntru.h types.h key.h encparams.h hash.h rand.h err.h PERL=c:\mingw\msys\1.0\bin\perl PERLASM_SCHEME=coff LIB_OBJS_PATHS=$(patsubst %,$(SRCDIR)/%,$(LIB_OBJS)) TEST_OBJS_PATHS=$(patsubst %,$(TESTDIR)/%,$(TEST_OBJS)) DIST_NAME=libntru-$(VERSION) MAKEFILENAME=$(lastword $(MAKEFILE_LIST)) .PHONY: all lib install dist test clean distclean all: lib lib: $(LIB_OBJS_PATHS) $(CC) $(CFLAGS) $(CPPFLAGS) -shared -o libntru.dll $(LIB_OBJS_PATHS) -lws2_32 -ladvapi32 install: lib if not exist "$(DESTDIR)$(INST_PFX)" mkdir "$(DESTDIR)$(INST_PFX)" if not exist "$(DESTDIR)$(INST_LIBDIR)" mkdir "$(DESTDIR)$(INST_LIBDIR)" if not exist "$(DESTDIR)$(INST_INCLUDE)" mkdir "$(DESTDIR)$(INST_INCLUDE)" if not exist "$(DESTDIR)$(INST_DOCDIR)" mkdir "$(DESTDIR)$(INST_DOCDIR)" copy libntru.dll "$(DESTDIR)$(INST_LIBDIR)" copy README.md "$(DESTDIR)$(INST_DOCDIR)" for %%h in ($(INST_HEADERS)) do \ copy $(SRCDIR)\%%h "$(INST_INCLUDE)" uninstall: if exist "$(DESTDIR)$(INST_LIBDIR)\libntru.dll" del "$(DESTDIR)$(INST_LIBDIR)\libntru.dll" if exist "$(DESTDIR)$(INST_DOCDIR)\README.md" del "$(DESTDIR)$(INST_DOCDIR)\README.md" for %%h in ($(DESTDIR)$(INST_HEADERS)) do \ if exist "$(DESTDIR)$(INST_INCLUDE)\%%h" del "$(DESTDIR)$(INST_INCLUDE)\%%h" if exist "$(DESTDIR)$(INST_INCLUDE)" rmdir "$(DESTDIR)$(INST_INCLUDE)" if exist "$(DESTDIR)$(INST_LIBDIR)" rmdir "$(DESTDIR)$(INST_LIBDIR)" if exist "$(DESTDIR)$(INST_DOCDIR)" rmdir "$(DESTDIR)$(INST_DOCDIR)" dist: @if exist $(DIST_NAME)\$(SRCDIR)\*.c del $(DIST_NAME)\$(SRCDIR)\*.c @if exist $(DIST_NAME)\$(SRCDIR)\*.h del $(DIST_NAME)\$(SRCDIR)\*.h @if exist $(DIST_NAME)\$(SRCDIR) rmdir $(DIST_NAME)\$(SRCDIR) @if exist $(DIST_NAME)\$(TESTDIR)\*.c del $(DIST_NAME)\$(TESTDIR)\*.c @if exist $(DIST_NAME)\$(TESTDIR)\*.h del $(DIST_NAME)\$(TESTDIR)\*.h @if exist $(DIST_NAME)\$(TESTDIR) rmdir $(DIST_NAME)\$(TESTDIR) @if exist $(DIST_NAME)\*.* del /q $(DIST_NAME)\*.* @if exist $(DIST_NAME) rmdir $(DIST_NAME) mkdir $(DIST_NAME) mkdir $(DIST_NAME)\$(SRCDIR) mkdir $(DIST_NAME)\$(TESTDIR) copy Makefile $(DIST_NAME) copy Makefile.win $(DIST_NAME) copy Makefile.osx $(DIST_NAME) copy README.md $(DIST_NAME) copy LICENSE $(DIST_NAME) copy PATENTS $(DIST_NAME) copy $(SRCDIR)\*.c $(DIST_NAME)\$(SRCDIR) copy $(SRCDIR)\*.h $(DIST_NAME)\$(SRCDIR) copy $(TESTDIR)\*.c $(DIST_NAME)\$(TESTDIR) copy $(TESTDIR)\*.h $(DIST_NAME)\$(TESTDIR) @rem zip is provided by MinGW zip -rm $(DIST_NAME).zip $(DIST_NAME) test: $(MAKE) -f $(MAKEFILENAME) testnoham @echo. @echo Testing patent-reduced build testnoham.exe $(MAKE) -f $(MAKEFILENAME) testham @echo. @echo Testing full build testham.exe testham: clean lib $(TEST_OBJS_PATHS) @echo CFLAGS=$(CFLAGS) $(CC) $(CFLAGS) -o testham.exe $(TEST_OBJS_PATHS) $(LDFLAGS) -L. -llibntru -lm -lws2_32 testnoham: CFLAGS += -DNTRU_AVOID_HAMMING_WT_PATENT testnoham: clean lib $(TEST_OBJS_PATHS) @echo CFLAGS=$(CFLAGS) $(CC) $(CFLAGS) -o testnoham.exe $(TEST_OBJS_PATHS) $(LDFLAGS) -L. -llibntru -lm -lws2_32 bench: lib $(CC) $(CFLAGS) $(CPPFLAGS) -o bench $(SRCDIR)/bench.c $(LDFLAGS) -L. -llibntru hybrid: lib $(CC) $(CFLAGS) $(CPPFLAGS) -o hybrid $(SRCDIR)/hybrid.c $(LDFLAGS) -L. -llibntru -lcrypto -lgdi32 $(SRCDIR)/%.o: $(SRCDIR)/%.c $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ $(SRCDIR)/sha1-mb-x86_64.s: $(SRCDIR)/sha1-mb-x86_64.pl; CC=$(CC) ASM="$(AS)" $(PERL) $(SRCDIR)/sha1-mb-x86_64.pl $(PERLASM_SCHEME) > $@ $(SRCDIR)/sha1-mb-x86_64.o: $(SRCDIR)/sha1-mb-x86_64.s $(AS) $(SRCDIR)/sha1-mb-x86_64.s -o $@ $(SRCDIR)/sha256-mb-x86_64.s: $(SRCDIR)/sha256-mb-x86_64.pl; CC=$(CC) ASM="$(AS)" $(PERL) $(SRCDIR)/sha256-mb-x86_64.pl $(PERLASM_SCHEME) > $@ $(SRCDIR)/sha256-mb-x86_64.o: $(SRCDIR)/sha256-mb-x86_64.s $(AS) $(SRCDIR)/sha256-mb-x86_64.s -o $@ $(TESTDIR)/%.o: tests/%.c $(CC) $(CFLAGS) $(CPPFLAGS) -I$(SRCDIR) -c $< -o $@ clean: @if exist $(SRCDIR)\*.o del $(SRCDIR)\*.o @if exist $(SRCDIR)\*.s del $(SRCDIR)\*.s @if exist $(TESTDIR)\*.o del $(TESTDIR)\*.o @if exist libntru.dll del libntru.dll @if exist testham.exe del testham.exe @if exist testnoham.exe del testnoham.exe @if exist bench.exe del bench.exe @if exist hybrid.exe del hybrid.exe @rem ***** clean files generated on other OSes ***** @if exist libntru.so del libntru.so @if exist libntru.a del libntru.a @if exist libntru.dylib del libntru.dylib @if exist testham del testnoham @if exist testnoham del testnoham @if exist bench del bench @if exist hybrid del hybrid distclean: clean @if exist $(DIST_NAME)\$(SRCDIR)\*.c del $(DIST_NAME)\$(SRCDIR)\*.c @if exist $(DIST_NAME)\$(SRCDIR)\*.h del $(DIST_NAME)\$(SRCDIR)\*.h @if exist $(DIST_NAME)\$(SRCDIR) rmdir $(DIST_NAME)\$(SRCDIR) @if exist $(DIST_NAME)\$(TESTDIR)\*.c del $(DIST_NAME)\$(TESTDIR)\*.c @if exist $(DIST_NAME)\$(TESTDIR)\*.h del $(DIST_NAME)\$(TESTDIR)\*.h @if exist $(DIST_NAME)\$(TESTDIR) rmdir $(DIST_NAME)\$(TESTDIR) @if exist $(DIST_NAME)\*.* del /q $(DIST_NAME)\*.* @if exist $(DIST_NAME) rmdir $(DIST_NAME) @if exist $(DIST_NAME).zip del $(DIST_NAME).zip @if exist $(DIST_NAME).tar.xz del $(DIST_NAME).tar.xz libntru-0.5/PATENTS000066400000000000000000000004451271556312200141050ustar00rootroot00000000000000This code implements inventions covered by claims of U.S. patents 6,081,597 (the basic NTRUEncrypt patent) and 7,031,468 (patent on the low Hamming weight optimization), and by similar patents in other countries. The patents are held by Security Innovation, Inc. (www.securityinnovation.com). libntru-0.5/README.md000066400000000000000000000165421271556312200143300ustar00rootroot00000000000000# C implementation of NTRUEncrypt An implementation of the public-key encryption scheme NTRUEncrypt in C, following the IEEE P1363.1 standard. NTRU's main strengths are high performance and resistance to quantum computer attacks. Its main drawback is that it is patent encumbered. The patents expire in 2021; when built with the NTRU_AVOID_HAMMING_WT_PATENT flag, libntru becomes patent-free in 2017. Benchmark results: ![Benchmark results](https://tbuktu.github.io/ntru/images/bench.png?raw=true "Benchmark results") For more information on the NTRUEncrypt algorithm, see the NTRU introduction page at https://tbuktu.github.com/ntru/. ## Compiling Run ```make``` to build the library, or ```make test``` to run unit tests. ```make bench``` builds a benchmark program. On *BSD, use ```gmake``` instead of ```make```. The ```SSE``` environment variable enables SSSE3 support (```SSE=yes```) or disables it (```SSE=no```). Default on Linux, BSD, and MacOS is to autodetect SSSE3 on the build host, Windows default is no SSSE3. The ```AVX2``` environment variable controls AVX2 support and works just like the ```SSE``` variable. ## Usage #include "ntru.h" /* key generation */ struct NtruEncParams params = NTRU_DEFAULT_PARAMS_128_BITS; /*see section "Parameter Sets" below*/ NtruRandGen rng_def = NTRU_RNG_DEFAULT; NtruRandContext rand_ctx_def; if (ntru_rand_init(&rand_ctx_def, &rng_def) != NTRU_SUCCESS) printf("rng fail\n"); NtruEncKeyPair kp; if (ntru_gen_key_pair(¶ms, &kp, &rand_ctx_def) != NTRU_SUCCESS) printf("keygen fail\n"); /* deterministic key generation from password */ uint8_t seed[17]; strcpy(seed, "my test password"); NtruRandGen rng_ctr_drbg = NTRU_RNG_CTR_DRBG; NtruRandContext rand_ctx_ctr_drbg; if (ntru_rand_init_det(&rand_ctx_ctr_drbg, &rng_ctr_drbg, seed, strlen(seed)) != NTRU_SUCCESS) printf("rng fail\n"); if (ntru_gen_key_pair(¶ms, &kp, &rand_ctx_ctr_drbg) != NTRU_SUCCESS) printf("keygen fail\n"); /* encryption */ uint8_t msg[9]; strcpy(msg, "whatever"); uint8_t enc[ntru_enc_len(¶ms)]; if (ntru_encrypt(msg, strlen(msg), &kp.pub, ¶ms, &rand_ctx_def, enc) != NTRU_SUCCESS) printf("encrypt fail\n"); /* decryption */ uint8_t dec[ntru_max_msg_len(¶ms)]; uint16_t dec_len; if (ntru_decrypt((uint8_t*)&enc, &kp, ¶ms, (uint8_t*)&dec, &dec_len) != NTRU_SUCCESS) printf("decrypt fail\n"); /* generate another public key for the existing private key */ NtruEncPubKey pub2; if (ntru_gen_pub(¶ms, &kp.priv, &pub2, &rand_ctx_def) != NTRU_SUCCESS) printf("pub key generation fail\n"); /* release RNG resources */ if (ntru_rand_release(&rand_ctx_def) != NTRU_SUCCESS) printf("rng fail\n"); if (ntru_rand_release(&rand_ctx_ctr_drbg) != NTRU_SUCCESS) printf("rng fail\n"); /* export key to uint8_t array */ uint8_t pub_arr[ntru_pub_len(¶ms)]; ntru_export_pub(&kp.pub, pub_arr); /* import key from uint8_t array */ NtruEncPubKey pub; ntru_import_pub(pub_arr, &pub); For encryption of messages longer than `ntru_max_msg_len(...)`, see `src/hybrid.c` (requires OpenSSL lib+headers, use `make hybrid` to build). ## Parameter Sets | Name | Strength | Sizes (CText/Pub/Priv) | Enc / Dec Time (Rel.) | Pat. Until | |:------------------------------ |:--------- |:---------------------- |:--------------------- |:------------ | | EES401EP1 | 112 bits | 552 / 556 / 264 | 2.9 / 3.7 | Aug 19, 2017 | | EES541EP1 | 112 bits | 744 / 748 / 132 | 1.7 / 2.5 | Aug 19, 2017 | | EES659EP1 | 112 bits | 907 / 911 / 104 | 1.6 / 2.4 | Aug 19, 2017 | | EES401EP2 | 112 bits | 552 / 556 / 67 | 1.0 / 1.4 | Aug 24, 2021 | | NTRU_DEFAULT_PARAMS_112_BITS | 112 bits | Synonym for EES401EP2 or EES401EP1, dep. on NTRU_AVOID_HAMMING_WT_PATENT | | EES449EP1 | 128 bits | 618 / 622 / 311 | 3.2 / 4.5 | Aug 19, 2017 | | EES613EP1 | 128 bits | 843 / 847 / 147 | 1.9 / 2.8 | Aug 19, 2017 | | EES761EP1 | 128 bits | 1047 / 1051 / 114 | 1.8 / 2.7 | Aug 19, 2017 | | EES439EP1 | 128 bits | 604 / 608 / 68 | 1.2 / 1.6 | Aug 24, 2021 | | EES443EP1 | 128 bits | 610 / 614 / 68 | 1.2 / 1.6 | Aug 24, 2021 | | NTRU_DEFAULT_PARAMS_128_BITS | 128 bits | Synonym for EES443EP1 or EES449EP1, dep. on NTRU_AVOID_HAMMING_WT_PATENT | | EES677EP1 | 192 bits | 931 / 935 / 402 | 5.4 / 7.5 | Aug 19, 2017 | | EES887EP1 | 192 bits | 1220 / 1224 / 212 | 3.5 / 5.1 | Aug 19, 2017 | | EES1087EP1 | 192 bits | 1495 / 1499 / 183 | 3.5 / 5.1 | Aug 19, 2017 | | EES593EP1 | 192 bits | 816 / 820 / 87 | 1.8 / 2.5 | Aug 24, 2021 | | EES587EP1 | 192 bits | 808 / 812 / 87 | 2.1 / 2.7 | Aug 24, 2021 | | NTRU_DEFAULT_PARAMS_192_BITS | 192 bits | Synonym for EES587EP1 or EES677EP1, dep. on NTRU_AVOID_HAMMING_WT_PATENT | | EES1087EP2 | 256 bits | 1495 / 1499 / 339 | 5.8 / 8.5 | Aug 19, 2017 | | EES1171EP1 | 256 bits | 1611 / 1615 / 301 | 5.4 / 8.0 | Aug 19, 2017 | | EES1499EP1 | 256 bits | 2062 / 2066 / 227 | 5.4 / 8.1 | Aug 19, 2017 | | EES743EP1 | 256 bits | 1022 / 1026 / 111 | 2.4 / 3.4 | Aug 24, 2021 | | NTRU_DEFAULT_PARAMS_256_BITS | 256 bits | Synonym for EES743EP1 or EES1087EP2, dep. on NTRU_AVOID_HAMMING_WT_PATENT | ## Random Number Generators * Use NTRU_RNG_DEFAULT for non-deterministic keys and non-deterministic encryption * Use NTRU_RNG_CTR_DRBG for deterministic keys and deterministic encryption Other RNGs are NTRU_RNG_WINCRYPT, NTRU_RNG_DEVURANDOM, and NTRU_RNG_DEVRANDOM but these may be removed in a future release. To use your own RNG, make an array of 3 function pointers: ```{init, generate, release}``` with the following signatures: * ```uint8_t init(NtruRandContext *rand_ctx, NtruRandGen *rand_gen);``` * ```uint8_t generate(uint8_t rand_data[], uint16_t len, NtruRandContext *rand_ctx);``` * ```uint8_t release(NtruRandContext *rand_ctx);``` Ignore ```rand_ctx->seed``` in ```init()``` if your RNG is non-deterministic. ## Supported Platforms libntru has been tested on Linux, FreeBSD, OpenBSD, Mac OS X, and Windows (MingW). ## Further reading * Wikipedia article: https://en.wikipedia.org/wiki/NTRUEncrypt * Original NTRUEncrypt paper: https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.25.8422&rep=rep1&type=pdf * Follow-up NTRUEncrypt paper: https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.64.6834&rep=rep1&type=pdf * NTRU articles (technical and mathematical): https://www.securityinnovation.com/products/encryption-libraries/ntru-crypto/ntru-resources.html * EESS: http://grouper.ieee.org/groups/1363/lattPK/submissions/EESS1v2.pdf * Jeffrey Hoffstein et al: An Introduction to Mathematical Cryptography, Springer-Verlag, ISBN 978-0-387-77993-5 libntru-0.5/changelog000066400000000000000000000034351271556312200147200ustar00rootroot000000000000000.5 (5/14/2016) * RNGs: NTRU_RNG_DEFAULT uses CTR_DRBG now, NTRU_RNG_CTR_DRBG replaces NTRU_RNG_IGF2 This change breaks the API! * Denser encoding of private keys This change breaks binary compatibility! * new API feature: multiple public keys for the same private key * new parameter sets: EES443EP1 and EES587EP1 * new #defines for referencing param sets by strength: NTRU_DEFAULT_PARAMS_xxx_BITS * BSD support * bugfixes: o fixed deterministic encryption on big-endian machines o fixed some incorrect error codes when key generation failed o fixed generation of polynomial g which deviated from the standard o fixed SSE detection on BSD o fixed buffer overruns (thanks Jeffrey Quesnelle) o fixed a resource leak o fixed tests on ARMv6 o fixed calculation of M_len * AVX2 optimizations (thanks Shay Gueron and Fabian Schlieker) * minor optimizations 0.4.1 (5/11/2015) * Experimental support for OS/2 (thanks Elbert Pol) * Fixed build when the CC environment variable is set * Fixed a memory leak 0.4 (5/4/2015) * massive performance improvements * new Makefile targets for distros (thanks Julian Ospald) * made the library C++ friendly (thanks Ruben De Smet) * new functions: ntru_params_from_key_pair() and ntru_params_from_priv_key() * fixed a memory leak * fixed parameter values * fixed data-dependent branches 0.3 (11/14/2014) * option to exclude code that is patented beyond 2017 * RNG-related API changes for thread safety and closeability of OS handles * several bugs fixed * sample code for NTRU+AES encryption 0.2 (3/30/2014) * supports all EES parameter sets * supports deterministic encryption and key generation * works on Windows (MinGW) * no longer depends on OpenSSL * several bugs fixed * performance improvements 0.1 (3/19/2012) Initial release libntru-0.5/src/000077500000000000000000000000001271556312200136305ustar00rootroot00000000000000libntru-0.5/src/arith.c000066400000000000000000000002241271556312200151010ustar00rootroot00000000000000#include "arith.h" uint8_t ntru_log2(uint16_t n) { uint8_t log = 0; while (n > 1) { n /= 2; log++; } return log; } libntru-0.5/src/arith.h000066400000000000000000000003171271556312200151110ustar00rootroot00000000000000#ifndef NTRU_ARITH_H #define NTRU_ARITH_H #include /** * @brief logarithm * * Returns floor(log(n)). * * @param n * @return */ uint8_t ntru_log2(uint16_t n); #endif /* NTRU_ARITH_H */ libntru-0.5/src/bench.c000066400000000000000000000143741271556312200150640ustar00rootroot00000000000000#include #include #include #include "ntru.h" #define NUM_ITER_KEYGEN 50 #define NUM_ITER_ENCDEC 10000 /* * The __MACH__ and __MINGW32__ code below is from * https://github.com/credentials/silvia/commit/e327067cf7feaf62ac0bde84d13ee47372c0094e */ #ifdef __MACH__ /* * Mac OS X does not have clock_gettime for some reason * * Use solution from here to fix it: * http://stackoverflow.com/questions/5167269/clock-gettime-alternative-in-mac-os-x */ #define CLOCK_REALTIME 0 #include #include void clock_gettime(uint32_t clock, struct timespec* the_time) { clock_serv_t cclock; mach_timespec_t mts; host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); clock_get_time(cclock, &mts); mach_port_deallocate(mach_task_self(), cclock); the_time->tv_sec = mts.tv_sec; the_time->tv_nsec = mts.tv_nsec; } #endif /* __MACH__ */ #ifdef __MINGW32__ /* * MinGW does not have clock_gettime for some reason * * Use solution from here to fix it: * http://stackoverflow.com/questions/5404277/porting-clock-gettime-to-windows */ #include #include #include #include #define CLOCK_REALTIME 0 LARGE_INTEGER getFILETIMEoffset() { SYSTEMTIME s; FILETIME f; LARGE_INTEGER t; s.wYear = 1970; s.wMonth = 1; s.wDay = 1; s.wHour = 0; s.wMinute = 0; s.wSecond = 0; s.wMilliseconds = 0; SystemTimeToFileTime(&s, &f); t.QuadPart = f.dwHighDateTime; t.QuadPart <<= 32; t.QuadPart |= f.dwLowDateTime; return (t); } void clock_gettime(uint32_t X, struct timespec *ts) { LARGE_INTEGER t; FILETIME f; double nanoseconds; static LARGE_INTEGER offset; static double frequencyToNanoseconds; static uint32_t initialized = 0; static BOOL usePerformanceCounter = 0; if (!initialized) { LARGE_INTEGER performanceFrequency; initialized = 1; usePerformanceCounter = QueryPerformanceFrequency(&performanceFrequency); if (usePerformanceCounter) { QueryPerformanceCounter(&offset); frequencyToNanoseconds = (double)performanceFrequency.QuadPart / 1000000000.; } else { offset = getFILETIMEoffset(); frequencyToNanoseconds = 0.010; } } if (usePerformanceCounter) QueryPerformanceCounter(&t); else { GetSystemTimeAsFileTime(&f); t.QuadPart = f.dwHighDateTime; t.QuadPart <<= 32; t.QuadPart |= f.dwLowDateTime; } t.QuadPart -= offset.QuadPart; nanoseconds = (double)t.QuadPart / frequencyToNanoseconds; t.QuadPart = nanoseconds; ts->tv_sec = t.QuadPart / 1000000000; ts->tv_nsec = t.QuadPart % 1000000000; } #endif /* __MINGW32__ */ int compare_double(const void *p1, const void *p2) { double t1 = *(double*)p1; double t2 = *(double*)p2; return t1t2 ? 1 : 0); } double median(double *samples, int num_samples) { if (num_samples == 0) return 0; if (num_samples == 1) return samples[0]; qsort(samples, num_samples, sizeof(samples[0]), compare_double); if (num_samples%2 == 0) return (samples[num_samples/2-1]+samples[num_samples/2]) / 2; else return samples[num_samples/2]; } void print_time(char *label, double *samples, int num_samples) { double time = median(samples, num_samples); double per_sec = 1000000.0 / time; #ifdef WIN32 printf("%s %dus=%d/sec ", label, (uint32_t)time, (uint32_t)per_sec); #else printf("%s %dμs=%d/sec ", label, (uint32_t)time, (uint32_t)per_sec); #endif fflush(stdout); } int main(int argc, char **argv) { printf("Please wait...\n"); NtruEncParams param_arr[] = ALL_PARAM_SETS; uint8_t success = 1; uint8_t param_idx; for (param_idx=0; param_idx #include "bitstring.h" void ntru_append_byte(NtruBitStr *a, uint8_t b) { if (a->num_bytes == 0) { a->num_bytes = 1; a->buf[0] = b; a->last_byte_bits = 8; } else if (a->last_byte_bits == 8) a->buf[a->num_bytes++] = b; else { uint8_t s = 8 - a->last_byte_bits; a->buf[a->num_bytes-1] |= b << a->last_byte_bits; a->buf[a->num_bytes++] = b >> s; } } void ntru_append(NtruBitStr *a, uint8_t *b, uint16_t blen) { uint16_t i; for (i=0; inum_bytes = (num_bits+7) / 8; memcpy(&b->buf, &a->buf, sizeof b->buf); b->last_byte_bits = num_bits % 8; if (b->last_byte_bits == 0) b->last_byte_bits = 8; else { int8_t s = b->last_byte_bits; b->buf[b->num_bytes-1] = b->buf[b->num_bytes-1] & ((1<num_bytes-1)*8 + a->last_byte_bits - num_bits; uint16_t start_byte = start_bit / 8; uint8_t start_bit_in_byte = start_bit % 8; uint16_t sum = a->buf[start_byte] >> start_bit_in_byte; uint8_t shift = 8 - start_bit_in_byte; uint16_t i; for (i=start_byte+1; inum_bytes-1; i++) { sum |= a->buf[i] << shift; shift += 8; } uint8_t final_bits = num_bits - shift; /* #bits in the final byte */ uint8_t afin = a->buf[a->num_bytes-1]; sum |= (afin & ((1<num_bytes -= num_bits / 8; a->last_byte_bits -= num_bits %= 8; if (a->last_byte_bits < 0) { a->last_byte_bits += 8; a->num_bytes--; } } libntru-0.5/src/bitstring.h000066400000000000000000000011271271556312200160070ustar00rootroot00000000000000#ifndef NTRU_BITSTRING_H #define NTRU_BITSTRING_H #include #include "encparams.h" typedef struct NtruBitStr { uint8_t buf[NTRU_MAX_BIT_STR_LEN]; uint16_t num_bytes; /* includes the last byte even if only some of its bits are used */ int8_t last_byte_bits; /* last_byte_bits <= 8 */ } NtruBitStr; void ntru_append(NtruBitStr *a, uint8_t *b, uint16_t blen); void ntru_trailing(NtruBitStr *a, uint8_t num_bits, NtruBitStr *b); uint16_t ntru_leading(NtruBitStr *a, uint8_t num_bits); void ntru_truncate(NtruBitStr *a, uint8_t num_bits); #endif /* NTRU_BITSTRING_H */ libntru-0.5/src/encparams.c000066400000000000000000000277051271556312200157600ustar00rootroot00000000000000#include "encparams.h" #include "arith.h" const NtruEncParams EES401EP1 = {\ "EES401EP1", /* name */\ 401, /* N */\ 2048, /* q */\ 0, /* prod_flag */\ 113, /* df */\ 0,\ 0,\ 133, /* dg */\ 113, /* dm0 */\ 112, /* db */\ 11, /* c */\ 32, /* min_calls_r */\ 9, /* min_calls_mask */\ 1, /* hash_seed */\ {0, 2, 4}, /* oid */\ ntru_sha1, /* hash */\ ntru_sha1_4way, /* hash_4way */\ ntru_sha1_8way, /* hash_8way */\ 20, /* hlen */\ 114 /* pklen */\ }; const NtruEncParams EES449EP1 = {\ "EES449EP1", /* name */\ 449, /* N */\ 2048, /* q */\ 0, /* prod_flag */\ 134, /* df */\ 0,\ 0,\ 149, /* dg */\ 134, /* dm0 */\ 128, /* db */\ 9, /* c */\ 31, /* min_calls_r */\ 9, /* min_calls_mask */\ 1, /* hash_seed */\ {0, 3, 3}, /* oid */\ ntru_sha1, /* hash */\ ntru_sha1_4way, /* hash_4way */\ ntru_sha1_8way, /* hash_8way */\ 20, /* hlen */\ 128 /* pklen */\ }; const NtruEncParams EES677EP1 = {\ "EES677EP1", /* name */\ 677, /* N */\ 2048, /* q */\ 0, /* prod_flag */\ 157, /* df */\ 0,\ 0,\ 225, /* dg */\ 157, /* dm0 */\ 192, /* db */\ 11, /* c */\ 27, /* min_calls_r */\ 9, /* min_calls_mask */\ 1, /* hash_seed */\ {0, 5, 3}, /* oid */\ ntru_sha256, /* hash */\ ntru_sha256_4way, /* hash_4way */\ ntru_sha256_8way, /* hash_8way */\ 32, /* hlen */\ 192 /* pklen */\ }; const NtruEncParams EES1087EP2 = {\ "EES1087EP2", /* name */\ 1087, /* N */\ 2048, /* q */\ 0, /* prod_flag */\ 120, /* df */\ 0,\ 0,\ 362, /* dg */\ 120, /* dm0 */\ 256, /* db */\ 13, /* c */\ 25, /* min_calls_r */\ 14, /* min_calls_mask */\ 1, /* hash_seed */\ {0, 6, 3}, /* oid */\ ntru_sha256, /* hash */\ ntru_sha256_4way, /* hash_4way */\ ntru_sha256_8way, /* hash_8way */\ 32, /* hlen */\ 256 /* pklen */\ }; const NtruEncParams EES541EP1 = {\ "EES541EP1", /* name */\ 541, /* N */\ 2048, /* q */\ 0, /* prod_flag */\ 49, /* df */\ 0,\ 0,\ 180, /* dg */\ 49, /* dm0 */\ 112, /* db */\ 12, /* c */\ 15, /* min_calls_r */\ 11, /* min_calls_mask */\ 1, /* hash_seed */\ {0, 2, 5}, /* oid */\ ntru_sha1, /* hash */\ ntru_sha1_4way, /* hash_4way */\ ntru_sha1_8way, /* hash_8way */\ 20, /* hlen */\ 112 /* pklen */\ }; const NtruEncParams EES613EP1 = {\ "EES613EP1", /* name */\ 613, /* N */\ 2048, /* q */\ 0, /* prod_flag */\ 55, /* df */\ 0,\ 0,\ 204, /* dg */\ 55, /* dm0 */\ 128, /* db */\ 11, /* c */\ 16, /* min_calls_r */\ 13, /* min_calls_mask */\ 1, /* hash_seed */\ {0, 3, 4}, /* oid */\ ntru_sha1, /* hash */\ ntru_sha1_4way, /* hash_4way */\ ntru_sha1_8way, /* hash_8way */\ 20, /* hlen */\ 128 /* pklen */\ }; const NtruEncParams EES887EP1 = {\ "EES887EP1", /* name */\ 887, /* N */\ 2048, /* q */\ 0, /* prod_flag */\ 81, /* df */\ 0,\ 0,\ 295, /* dg */\ 81, /* dm0 */\ 192, /* db */\ 10, /* c */\ 13, /* min_calls_r */\ 12, /* min_calls_mask */\ 1, /* hash_seed */\ {0, 5, 4}, /* oid */\ ntru_sha256, /* hash */\ ntru_sha256_4way, /* hash_4way */\ ntru_sha256_8way, /* hash_8way */\ 32, /* hlen */\ 192 /* pklen */\ }; const NtruEncParams EES1171EP1 = {\ "EES1171EP1", /* name */\ 1171, /* N */\ 2048, /* q */\ 0, /* prod_flag */\ 106, /* df */\ 0,\ 0,\ 390, /* dg */\ 106, /* dm0 */\ 256, /* db */\ 12, /* c */\ 20, /* min_calls_r */\ 15, /* min_calls_mask */\ 1, /* hash_seed */\ {0, 6, 4}, /* oid */\ ntru_sha256, /* hash */\ ntru_sha256_4way, /* hash_4way */\ ntru_sha256_8way, /* hash_8way */\ 32, /* hlen */\ 256 /* pklen */\ }; const NtruEncParams EES659EP1 = {\ "EES659EP1", /* name */\ 659, /* N */\ 2048, /* q */\ 0, /* prod_flag */\ 38, /* df */\ 0,\ 0,\ 219, /* dg */\ 38, /* dm0 */\ 112, /* db */\ 11, /* c */\ 11, /* min_calls_r */\ 14, /* min_calls_mask */\ 1, /* hash_seed */\ {0, 2, 6}, /* oid */\ ntru_sha1, /* hash */\ ntru_sha1_4way, /* hash_4way */\ ntru_sha1_8way, /* hash_8way */\ 20, /* hlen */\ 112 /* pklen */\ }; const NtruEncParams EES761EP1 = {\ "EES761EP1", /* name */\ 761, /* N */\ 2048, /* q */\ 0, /* prod_flag */\ 42, /* df */\ 0,\ 0,\ 253, /* dg */\ 42, /* dm0 */\ 128, /* db */\ 12, /* c */\ 13, /* min_calls_r */\ 16, /* min_calls_mask */\ 1, /* hash_seed */\ {0, 3, 5}, /* oid */\ ntru_sha1, /* hash */\ ntru_sha1_4way, /* hash_4way */\ ntru_sha1_8way, /* hash_8way */\ 20, /* hlen */\ 128 /* pklen */\ }; const NtruEncParams EES1087EP1 = {\ "EES1087EP1", /* name */\ 1087, /* N */\ 2048, /* q */\ 0, /* prod_flag */\ 63, /* df */\ 0,\ 0,\ 362, /* dg */\ 63, /* dm0 */\ 192, /* db */\ 13, /* c */\ 13, /* min_calls_r */\ 14, /* min_calls_mask */\ 1, /* hash_seed */\ {0, 5, 5}, /* oid */\ ntru_sha256, /* hash */\ ntru_sha256_4way, /* hash_4way */\ ntru_sha256_8way, /* hash_8way */\ 32, /* hlen */\ 192 /* pklen */\ }; const NtruEncParams EES1499EP1 = {\ "EES1499EP1", /* name */\ 1499, /* N */\ 2048, /* q */\ 0, /* prod_flag */\ 79, /* df */\ 0,\ 0,\ 499, /* dg */\ 79, /* dm0 */\ 256, /* db */\ 13, /* c */\ 17, /* min_calls_r */\ 19, /* min_calls_mask */\ 1, /* hash_seed */\ {0, 6, 5}, /* oid */\ ntru_sha256, /* hash */\ ntru_sha256_4way, /* hash_4way */\ ntru_sha256_8way, /* hash_8way */\ 32, /* hlen */\ 256 /* pklen */\ }; #ifndef NTRU_AVOID_HAMMING_WT_PATENT const NtruEncParams EES401EP2 = {\ "EES401EP2", /* name */\ 401, /* N */\ 2048, /* q */\ 1, /* prod_flag */\ 8, /* df1 */\ 8, /* df2 */\ 6, /* df3 */\ 133, /* dg */\ 101, /* dm0 */\ 112, /* db */\ 11, /* c */\ 10, /* min_calls_r */\ 6, /* min_calls_mask */\ 1, /* hash_seed */\ {0, 2, 16}, /* oid */\ ntru_sha1, /* hash */\ ntru_sha1_4way, /* hash_4way */\ ntru_sha1_8way, /* hash_8way */\ 20, /* hlen */\ 112 /* pklen */\ }; const NtruEncParams EES439EP1 = {\ "EES439EP1", /* name */\ 439, /* N */\ 2048, /* q */\ 1, /* prod_flag */\ 9, /* df1 */\ 8, /* df2 */\ 5, /* df3 */\ 146, /* dg */\ 112, /* dm0 */\ 128, /* db */\ 9, /* c */\ 15, /* min_calls_r */\ 6, /* min_calls_mask */\ 1, /* hash_seed */\ {0, 3, 16}, /* oid */\ ntru_sha1, /* hash */\ ntru_sha1_4way, /* hash_4way */\ ntru_sha1_8way, /* hash_8way */\ 20, /* hlen */\ 128 /* pklen */\ }; const NtruEncParams EES443EP1 = {\ "EES443EP1", /* name */\ 443, /* N */\ 2048, /* q */\ 1, /* prod_flag */\ 9, /* df1 */\ 8, /* df2 */\ 5, /* df3 */\ 148, /* dg */\ 115, /* dm0 */\ 128, /* db */\ 9, /* c */\ 8, /* min_calls_r */\ 5, /* min_calls_mask */\ 1, /* hash_seed */\ {0, 3, 17}, /* oid */\ ntru_sha256, /* hash */\ ntru_sha256_4way, /* hash_4way */\ ntru_sha256_8way, /* hash_8way */\ 32, /* hlen */\ 128 /* pklen */\ }; const NtruEncParams EES593EP1 = {\ "EES593EP1", /* name */\ 593, /* N */\ 2048, /* q */\ 1, /* prod_flag */\ 10, /* df1 */\ 10, /* df2 */\ 8, /* df3 */\ 197, /* dg */\ 158, /* dm0 */\ 192, /* db */\ 11, /* c */\ 12, /* min_calls_r */\ 5, /* min_calls_mask */\ 1, /* hash_seed */\ {0, 5, 16}, /* oid */\ ntru_sha256, /* hash */\ ntru_sha256_4way, /* hash_4way */\ ntru_sha256_8way, /* hash_8way */\ 32, /* hlen */\ 192 /* pklen */\ }; const NtruEncParams EES587EP1 = {\ "EES587EP1", /* name */\ 587, /* N */\ 2048, /* q */\ 1, /* prod_flag */\ 10, /* df1 */\ 10, /* df2 */\ 8, /* df3 */\ 196, /* dg */\ 157, /* dm0 */\ 192, /* db */\ 11, /* c */\ 13, /* min_calls_r */\ 7, /* min_calls_mask */\ 1, /* hash_seed */\ {0, 5, 17}, /* oid */\ ntru_sha256, /* hash */\ ntru_sha256_4way, /* hash_4way */\ ntru_sha256_8way, /* hash_8way */\ 32, /* hlen */\ 192 /* pklen */\ }; const NtruEncParams EES743EP1 = {\ "EES743EP1", /* name */\ 743, /* N */\ 2048, /* q */\ 1, /* prod_flag */\ 11, /* df1 */\ 11, /* df2 */\ 15, /* df3 */\ 247, /* dg */\ 204, /* dm0 */\ 256, /* db */\ 13, /* c */\ 12, /* min_calls_r */\ 7, /* min_calls_mask */\ 1, /* hash_seed */\ {0, 6, 16}, /* oid */\ ntru_sha256, /* hash */\ ntru_sha256_4way, /* hash_4way */\ ntru_sha256_8way, /* hash_8way */\ 32, /* hlen */\ 256 /* pklen */\ }; #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ uint16_t ntru_enc_len(const NtruEncParams *params) { return ntru_enc_len_Nq(params->N, params->q); } uint16_t ntru_enc_len_Nq(uint16_t N, uint16_t q) { /* make sure q is a power of 2 */ if (q & (q-1)) return 0; uint16_t len_bits = N * ntru_log2(q); uint16_t len_bytes = (len_bits+7) / 8; return len_bytes; } libntru-0.5/src/encparams.h000066400000000000000000000154461271556312200157640ustar00rootroot00000000000000#ifndef NTRU_ENCPARAMS_H #define NTRU_ENCPARAMS_H #include #include "hash.h" /* max hash output length in bytes */ #define NTRU_MAX_HASH_LEN 64 /** upper limit for the parameter c in NtruEncParams */ #define NTRU_MAX_C 32 /** max length of a bit string in bytes */ #define NTRU_MAX_BIT_STR_LEN (NTRU_MAX_HASH_LEN * (NTRU_MAX_C+1)) /* A set of parameters for NtruEncrypt */ typedef struct NtruEncParams { /* name of the parameter set */ char name[11]; /* number of polynomial coefficients */ uint16_t N; /* modulus */ uint16_t q; /* 1 for product-form private keys, 0 for ternary */ uint8_t prod_flag; /* * number of ones in the private polynomial f1 (if prod=1) or f (if prod=0) */ uint16_t df1; /* * number of ones in the private polynomial f2; ignored if prod=0 */ uint16_t df2; /* * number of ones in the private polynomial f3; ignored if prod=0 */ uint16_t df3; /* * number of ones in the polynomial g (used during key generation) */ uint16_t dg; /* * minimum acceptable number of -1's, 0's, and 1's in the polynomial m' * in the last encryption step */ uint16_t dm0; /* number of random bits to prepend to the message */ uint16_t db; /* a parameter for the Index Generation Function */ uint16_t c; /* minimum number of hash calls for the IGF to make */ uint16_t min_calls_r; /* minimum number of calls to generate the masking polynomial */ uint16_t min_calls_mask; /* * whether to hash the seed in the MGF first (1) or * use the seed directly (0) */ uint8_t hash_seed; /* three bytes that uniquely identify the parameter set */ uint8_t oid[3]; /* hash function, e.g. ntru_sha256 */ void (*hash)(uint8_t[], uint16_t, uint8_t[]); /* hash function for 4 inputs, e.g. ntru_sha256_4way */ void (*hash_4way)(uint8_t *[4], uint16_t, uint8_t *[4]); /* hash function for 8 inputs, e.g. ntru_sha256_8way */ void (*hash_8way)(uint8_t *[8], uint16_t, uint8_t *[8]); /* output length of the hash function */ uint16_t hlen; /* number of bits of the public key to hash */ uint16_t pklen; } NtruEncParams; /* * An IEEE 1361.1 parameter set that gives 112 bits of security and is optimized for key size. */ extern const NtruEncParams EES401EP1; /* * An IEEE 1361.1 parameter set that gives 128 bits of security and is optimized for key size. */ extern const NtruEncParams EES449EP1; /* * An IEEE 1361.1 parameter set that gives 192 bits of security and is optimized for key size. */ extern const NtruEncParams EES677EP1; /* * An IEEE 1361.1 parameter set that gives 256 bits of security and is optimized for key size. */ extern const NtruEncParams EES1087EP2; /* * An IEEE 1361.1 parameter set that gives 112 bits of security and is * a tradeoff between key size and encryption/decryption speed. */ extern const NtruEncParams EES541EP1; /* * An IEEE 1361.1 parameter set that gives 128 bits of security and is * a tradeoff between key size and encryption/decryption speed. */ extern const NtruEncParams EES613EP1; /* * An IEEE 1361.1 parameter set that gives 192 bits of security and is * a tradeoff between key size and encryption/decryption speed. */ extern const NtruEncParams EES887EP1; /* * An IEEE 1361.1 parameter set that gives 256 bits of security and is * a tradeoff between key size and encryption/decryption speed. */ extern const NtruEncParams EES1171EP1; /* * An IEEE 1361.1 parameter set that gives 112 bits of security and is * optimized for encryption/decryption speed. */ extern const NtruEncParams EES659EP1; /* * An IEEE 1361.1 parameter set that gives 128 bits of security and is * optimized for encryption/decryption speed. */ extern const NtruEncParams EES761EP1; /* * An IEEE 1361.1 parameter set that gives 192 bits of security and is * optimized for encryption/decryption speed. */ extern const NtruEncParams EES1087EP1; /* * An IEEE 1361.1 parameter set that gives 256 bits of security and is * optimized for encryption/decryption speed. */ extern const NtruEncParams EES1499EP1; #ifndef NTRU_AVOID_HAMMING_WT_PATENT /* * A product-form parameter set that gives 112 bits of security. */ extern const NtruEncParams EES401EP2; /* * A product-form parameter set that gives 128 bits of security. * DEPRECATED -- use EES443EP1 instead. */ extern const NtruEncParams EES439EP1; /* * A product-form parameter set that gives 128 bits of security. */ extern const NtruEncParams EES443EP1; /* * A product-form parameter set that gives 192 bits of security. * DEPRECATED -- use EES587EP1 instead. */ extern const NtruEncParams EES593EP1; /* * A product-form parameter set that gives 192 bits of security. */ extern const NtruEncParams EES587EP1; /* * A product-form parameter set that gives 256 bits of security. */ extern const NtruEncParams EES743EP1; #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ #ifndef NTRU_AVOID_HAMMING_WT_PATENT /* * The default parameter set for 112 bits of security. */ #define NTRU_DEFAULT_PARAMS_112_BITS EES401EP2 /* * The default parameter set for 128 bits of security. */ #define NTRU_DEFAULT_PARAMS_128_BITS EES443EP1 /* * The default parameter set for 192 bits of security. */ #define NTRU_DEFAULT_PARAMS_192_BITS EES587EP1 /* * The default parameter set for 256 bits of security. */ #define NTRU_DEFAULT_PARAMS_256_BITS EES743EP1 #define ALL_PARAM_SETS {EES401EP1, EES449EP1, EES677EP1, EES1087EP2, EES541EP1, EES613EP1, EES887EP1, EES1171EP1, EES659EP1, EES761EP1, EES1087EP1, EES1499EP1, EES401EP2, EES439EP1, EES443EP1, EES593EP1, EES587EP1, EES743EP1} #else /* * The default parameter set for 112 bits of security. */ #define NTRU_DEFAULT_PARAMS_112_BITS EES541EP1 /* * The default parameter set for 128 bits of security. */ #define NTRU_DEFAULT_PARAMS_128_BITS EES613EP1 /* * The default parameter set for 192 bits of security. */ #define NTRU_DEFAULT_PARAMS_192_BITS EES887EP1 /* * The default parameter set for 256 bits of security. */ #define NTRU_DEFAULT_PARAMS_256_BITS EES1171EP1 #define ALL_PARAM_SETS {EES401EP1, EES449EP1, EES677EP1, EES1087EP2, EES541EP1, EES613EP1, EES887EP1, EES1171EP1, EES659EP1, EES761EP1, EES1087EP1, EES1499EP1} #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ /** * @brief Ciphertext length * * Returns the length of an encrypted message in bytes for a given parameter set. * * @param params * @return the length in bytes or 0 if params->q is not a power of two */ uint16_t ntru_enc_len(const NtruEncParams *params); /** * @brief Ciphertext length * * Returns the length of an encrypted message in bytes for a given N and q value. * * @param N * @param q * @return the length in bytes or 0 if q is not a power of two */ uint16_t ntru_enc_len_Nq(uint16_t N, uint16_t q); #endif /* NTRU_ENCPARAMS_H */ libntru-0.5/src/err.h000066400000000000000000000006741271556312200146000ustar00rootroot00000000000000#ifndef NTRU_ERR_H #define NTRU_ERR_H #define NTRU_SUCCESS 0 #define NTRU_ERR_OUT_OF_MEMORY 1 #define NTRU_ERR_PRNG 2 #define NTRU_ERR_MSG_TOO_LONG 3 #define NTRU_ERR_INVALID_MAX_LEN 4 #define NTRU_ERR_DM0_VIOLATION 5 #define NTRU_ERR_NO_ZERO_PAD 6 #define NTRU_ERR_INVALID_ENCODING 7 #define NTRU_ERR_NULL_ARG 8 #define NTRU_ERR_UNKNOWN_PARAM_SET 9 #define NTRU_ERR_INVALID_PARAM 10 #define NTRU_ERR_INVALID_KEY 11 #endif /* NTRU_ERR_H */ libntru-0.5/src/hash.c000066400000000000000000000365241271556312200147310ustar00rootroot00000000000000#include #include #if defined __SSSE3__ && _LP64 #include #endif #ifdef WIN32 #include #else #include #endif #include "sph_sha1.h" #include "sph_sha2.h" #include "hash.h" void ntru_sha1(uint8_t *input, uint16_t input_len, uint8_t *digest) { sph_sha1_context context; sph_sha1_init(&context); sph_sha1(&context, input, input_len); sph_sha1_close(&context, digest); } void ntru_sha256(uint8_t *input, uint16_t input_len, uint8_t *digest) { sph_sha256_context context; sph_sha256_init(&context); sph_sha256(&context, input, input_len); sph_sha256_close(&context, digest); } #if defined __SSSE3__ && _LP64 typedef struct { uint32_t A[8], B[8], C[8], D[8], E[8]; uint32_t Nl,Nh; uint32_t data[8][16]; uint8_t num; /* 1 or 2 */ } SHA1_MB_CTX; typedef struct { uint32_t A[8]; uint32_t B[8]; uint32_t C[8]; uint32_t D[8]; uint32_t E[8]; uint32_t F[8]; uint32_t G[8]; uint32_t H[8]; uint32_t Nl, Nh; uint8_t num; /* 1 or 2 */ uint32_t data[8][16]; } SHA256_MB_CTX; typedef struct { uint8_t *ptr; uint32_t blocks; } HASH_DESC; #ifdef __AVX2__ /* don’t detect SHA extensions for now, just report AVX/AVX2 */ uint32_t OPENSSL_ia32cap_P[] __attribute__((visibility("hidden"))) = {0, 1<<28, 1<<5, 0}; #else uint32_t OPENSSL_ia32cap_P[] __attribute__((visibility("hidden"))) = {0, 0, 0, 0}; #endif extern void sha1_multi_block(SHA1_MB_CTX *, HASH_DESC *, int num); extern void sha256_multi_block(SHA256_MB_CTX *, HASH_DESC *, int num); void SHA1_MB_Init(SHA1_MB_CTX *ctx) { memset(ctx, 0, sizeof(*ctx)); __m128i a = _mm_set1_epi32(0x67452301); __m128i b = _mm_set1_epi32(0xefcdab89); __m128i c = _mm_set1_epi32(0x98badcfe); __m128i d = _mm_set1_epi32(0x10325476); __m128i e = _mm_set1_epi32(0xc3d2e1f0); _mm_storeu_si128((__m128i*)&ctx->A, a); _mm_storeu_si128((__m128i*)&ctx->B, b); _mm_storeu_si128((__m128i*)&ctx->C, c); _mm_storeu_si128((__m128i*)&ctx->D, d); _mm_storeu_si128((__m128i*)&ctx->E, e); } void SHA1_MB_Init8(SHA1_MB_CTX *ctx) { /* init A[0]..A[3], B[0]..B[3], ... */ SHA1_MB_Init(ctx); /* init A[4]..A[7], B[4]..B[7], ... */ __m128i a = _mm_set1_epi32(0x67452301); __m128i b = _mm_set1_epi32(0xefcdab89); __m128i c = _mm_set1_epi32(0x98badcfe); __m128i d = _mm_set1_epi32(0x10325476); __m128i e = _mm_set1_epi32(0xc3d2e1f0); _mm_storeu_si128((__m128i*)&ctx->A[4], a); _mm_storeu_si128((__m128i*)&ctx->B[4], b); _mm_storeu_si128((__m128i*)&ctx->C[4], c); _mm_storeu_si128((__m128i*)&ctx->D[4], d); _mm_storeu_si128((__m128i*)&ctx->E[4], e); } void SHA1_MB_Update(SHA1_MB_CTX *ctx, uint8_t *data[4], size_t len) { if (len == 0) return; uint32_t l = (ctx->Nl + (((uint32_t) len) << 3)) & 0xffffffffUL; /* * 95-05-24 eay Fixed a bug with the overflow handling, thanks to Wei Dai * for pointing it out. */ if (l < ctx->Nl) /* overflow */ ctx->Nh++; ctx->Nh += (uint32_t) (len >> 29); /* might cause compiler warning on * 16-bit */ ctx->Nl = l; uint8_t *data_[4]; uint8_t i; for (i=0; i<4; i++) data_[i] = data[i]; size_t n = len / 64; if (n > 0) { HASH_DESC hdesc[4]; for (i=0; i<4; i++) { hdesc[i].ptr = data[i]; hdesc[i].blocks = n; } sha1_multi_block(ctx, hdesc, 1); n *= 64; for (i=0; i<4; i++) data_[i] += n; len -= n; } if (len != 0) { ctx->num = (uint32_t)len; for (i=0; i<4; i++) { uint8_t *d = (uint8_t*)ctx->data[i]; memcpy(d, data_[i], len); } } } void SHA1_MB_Update8(SHA1_MB_CTX *ctx, uint8_t *data[8], size_t len) { if (len == 0) return; uint32_t l = (ctx->Nl + (((uint32_t) len) << 3)) & 0xffffffffUL; /* * 95-05-24 eay Fixed a bug with the overflow handling, thanks to Wei Dai * for pointing it out. */ if (l < ctx->Nl) /* overflow */ ctx->Nh++; ctx->Nh += (uint32_t) (len >> 29); /* might cause compiler warning on * 16-bit */ ctx->Nl = l; uint8_t *data_[8]; uint8_t i; for (i=0; i<8; i++) data_[i] = data[i]; size_t n = len / 64; if (n > 0) { HASH_DESC hdesc[8]; for (i=0; i<8; i++) { hdesc[i].ptr = data[i]; hdesc[i].blocks = n; } sha1_multi_block(ctx, hdesc, 2); n *= 64; for (i=0; i<8; i++) data_[i] += n; len -= n; } if (len != 0) { ctx->num = (uint32_t)len; for (i=0; i<8; i++) { uint8_t *d = (uint8_t*)ctx->data[i]; memcpy(d, data_[i], len); } } } void SHA1_MB_Final(uint8_t *digest[4], SHA1_MB_CTX *ctx) { size_t n = ctx->num; uint8_t i; for (i=0; i<4; i++) { uint8_t *d = (uint8_t*)ctx->data[i]; *(d+n) = 0x80; } n++; for (i=0; i<4; i++) memset(((uint8_t*)ctx->data[i]) + n, 0, 64 - n); if (n > (64 - 8)) { n = 0; HASH_DESC hdesc[4]; for (i=0; i<4; i++) { hdesc[i].ptr = (uint8_t*)ctx->data[i]; hdesc[i].blocks = 1; } sha1_multi_block(ctx, hdesc, 1); } for (i=0; i<4; i++) { uint8_t *d = (uint8_t*)&ctx->data[i]; memset(d+n, 0, 64-8-n); d += 64 - 8; uint32_t *d32 = (uint32_t*)d; *d32 = ntohl(ctx->Nh); d += 4; d32 = (uint32_t*)d; *d32 = ntohl(ctx->Nl); } HASH_DESC hdesc[4]; for (i=0; i<4; i++) { hdesc[i].ptr = (uint8_t*)ctx->data[i]; hdesc[i].blocks = 1; } sha1_multi_block(ctx, hdesc, 1); for (i=0; i<4; i++) { uint32_t *d32 = (uint32_t*)digest[i]; *(d32++) = ntohl(ctx->A[i]); *(d32++) = ntohl(ctx->B[i]); *(d32++) = ntohl(ctx->C[i]); *(d32++) = ntohl(ctx->D[i]); *d32 = ntohl(ctx->E[i]); } } void SHA1_MB_Final8(uint8_t *digest[8], SHA1_MB_CTX *ctx) { size_t n = ctx->num; uint8_t i; for (i=0; i<8; i++) { uint8_t *d = (uint8_t*)ctx->data[i]; *(d+n) = 0x80; } n++; for (i=0; i<8; i++) memset(((uint8_t*)ctx->data[i]) + n, 0, 64 - n); if (n > (64 - 8)) { n = 0; HASH_DESC hdesc[8]; for (i=0; i<8; i++) { hdesc[i].ptr = (uint8_t*)ctx->data[i]; hdesc[i].blocks = 1; } sha1_multi_block(ctx, hdesc, 2); } for (i=0; i<8; i++) { uint8_t *d = (uint8_t*)&ctx->data[i]; memset(d+n, 0, 64-8-n); d += 64 - 8; uint32_t *d32 = (uint32_t*)d; *d32 = ntohl(ctx->Nh); d += 4; d32 = (uint32_t*)d; *d32 = ntohl(ctx->Nl); } HASH_DESC hdesc[8]; for (i=0; i<8; i++) { hdesc[i].ptr = (uint8_t*)ctx->data[i]; hdesc[i].blocks = 1; } sha1_multi_block(ctx, hdesc, 2); for (i=0; i<8; i++) { uint32_t *d32 = (uint32_t*)digest[i]; *(d32++) = ntohl(ctx->A[i]); *(d32++) = ntohl(ctx->B[i]); *(d32++) = ntohl(ctx->C[i]); *(d32++) = ntohl(ctx->D[i]); *d32 = ntohl(ctx->E[i]); } } void ntru_sha1_4way(uint8_t *input[4], uint16_t input_len, uint8_t *digest[4]) { SHA1_MB_CTX ctx; SHA1_MB_Init(&ctx); SHA1_MB_Update(&ctx, input, input_len); SHA1_MB_Final(digest, &ctx); } void ntru_sha1_8way(uint8_t *input[8], uint16_t input_len, uint8_t *digest[8]) { SHA1_MB_CTX ctx; SHA1_MB_Init8(&ctx); SHA1_MB_Update8(&ctx, input, input_len); SHA1_MB_Final8(digest, &ctx); } void SHA256_MB_Init(SHA256_MB_CTX *ctx) { memset(ctx, 0, sizeof(*ctx)); __m128i a = _mm_set1_epi32(0x6a09e667); __m128i b = _mm_set1_epi32(0xbb67ae85); __m128i c = _mm_set1_epi32(0x3c6ef372); __m128i d = _mm_set1_epi32(0xa54ff53a); __m128i e = _mm_set1_epi32(0x510e527f); __m128i f = _mm_set1_epi32(0x9b05688c); __m128i g = _mm_set1_epi32(0x1f83d9ab); __m128i h = _mm_set1_epi32(0x5be0cd19); _mm_storeu_si128((__m128i*)&ctx->A, a); _mm_storeu_si128((__m128i*)&ctx->B, b); _mm_storeu_si128((__m128i*)&ctx->C, c); _mm_storeu_si128((__m128i*)&ctx->D, d); _mm_storeu_si128((__m128i*)&ctx->E, e); _mm_storeu_si128((__m128i*)&ctx->F, f); _mm_storeu_si128((__m128i*)&ctx->G, g); _mm_storeu_si128((__m128i*)&ctx->H, h); } void SHA256_MB_Init8(SHA256_MB_CTX *ctx) { /* init A[0]..A[3], B[0]..B[3], ... */ SHA256_MB_Init(ctx); /* init A[4]..A[7], B[4]..B[7], ... */ __m128i a = _mm_set1_epi32(0x6a09e667); __m128i b = _mm_set1_epi32(0xbb67ae85); __m128i c = _mm_set1_epi32(0x3c6ef372); __m128i d = _mm_set1_epi32(0xa54ff53a); __m128i e = _mm_set1_epi32(0x510e527f); __m128i f = _mm_set1_epi32(0x9b05688c); __m128i g = _mm_set1_epi32(0x1f83d9ab); __m128i h = _mm_set1_epi32(0x5be0cd19); _mm_storeu_si128((__m128i*)&ctx->A[4], a); _mm_storeu_si128((__m128i*)&ctx->B[4], b); _mm_storeu_si128((__m128i*)&ctx->C[4], c); _mm_storeu_si128((__m128i*)&ctx->D[4], d); _mm_storeu_si128((__m128i*)&ctx->E[4], e); _mm_storeu_si128((__m128i*)&ctx->F[4], f); _mm_storeu_si128((__m128i*)&ctx->G[4], g); _mm_storeu_si128((__m128i*)&ctx->H[4], h); } void SHA256_MB_Update(SHA256_MB_CTX *ctx, uint8_t *data[4], size_t len) { if (len == 0) return; uint32_t l = (ctx->Nl + (((uint32_t) len) << 3)) & 0xffffffffUL; /* * 95-05-24 eay Fixed a bug with the overflow handling, thanks to Wei Dai * for pointing it out. */ if (l < ctx->Nl) /* overflow */ ctx->Nh++; ctx->Nh += (uint32_t) (len >> 29); /* might cause compiler warning on * 16-bit */ ctx->Nl = l; uint8_t *data_[4]; uint8_t i; for (i=0; i<4; i++) data_[i] = data[i]; size_t n = len / 64; if (n > 0) { HASH_DESC hdesc[4]; for (i=0; i<4; i++) { hdesc[i].ptr = data[i]; hdesc[i].blocks = n; } sha256_multi_block(ctx, hdesc, 1); n *= 64; for (i=0; i<4; i++) data_[i] += n; len -= n; } if (len != 0) { ctx->num = (uint32_t)len; for (i=0; i<4; i++) { uint8_t *d = (uint8_t*)ctx->data[i]; memcpy(d, data_[i], len); } } } void SHA256_MB_Update8(SHA256_MB_CTX *ctx, uint8_t *data[8], size_t len) { if (len == 0) return; uint32_t l = (ctx->Nl + (((uint32_t) len) << 3)) & 0xffffffffUL; /* * 95-05-24 eay Fixed a bug with the overflow handling, thanks to Wei Dai * for pointing it out. */ if (l < ctx->Nl) /* overflow */ ctx->Nh++; ctx->Nh += (uint32_t) (len >> 29); /* might cause compiler warning on * 16-bit */ ctx->Nl = l; uint8_t *data_[8]; uint8_t i; for (i=0; i<8; i++) data_[i] = data[i]; size_t n = len / 64; if (n > 0) { HASH_DESC hdesc[8]; for (i=0; i<8; i++) { hdesc[i].ptr = data[i]; hdesc[i].blocks = n; } sha256_multi_block(ctx, hdesc, 2); n *= 64; for (i=0; i<8; i++) data_[i] += n; len -= n; } if (len != 0) { ctx->num = (uint32_t)len; for (i=0; i<8; i++) { uint8_t *d = (uint8_t*)ctx->data[i]; memcpy(d, data_[i], len); } } } void SHA256_MB_Final(uint8_t *digest[4], SHA256_MB_CTX *ctx) { size_t n = ctx->num; uint8_t i; for (i=0; i<4; i++) { uint8_t *d = (uint8_t*)ctx->data[i]; *(d+n) = 0x80; } n++; for (i=0; i<4; i++) memset(((uint8_t*)ctx->data[i]) + n, 0, 64 - n); if (n > (64 - 8)) { n = 0; HASH_DESC hdesc[4]; for (i=0; i<4; i++) { hdesc[i].ptr = (uint8_t*)ctx->data[i]; hdesc[i].blocks = 1; } sha256_multi_block(ctx, hdesc, 1); } for (i=0; i<4; i++) { uint8_t *d = (uint8_t*)&ctx->data[i]; memset(d+n, 0, 64-8-n); d += 64 - 8; uint32_t *d32 = (uint32_t*)d; *d32 = ntohl(ctx->Nh); d += 4; d32 = (uint32_t*)d; *d32 = ntohl(ctx->Nl); } HASH_DESC hdesc[4]; for (i=0; i<4; i++) { hdesc[i].ptr = (uint8_t*)ctx->data[i]; hdesc[i].blocks = 1; } sha256_multi_block(ctx, hdesc, 1); for (i=0; i<4; i++) { uint32_t *d32 = (uint32_t*)digest[i]; *(d32++) = ntohl(ctx->A[i]); *(d32++) = ntohl(ctx->B[i]); *(d32++) = ntohl(ctx->C[i]); *(d32++) = ntohl(ctx->D[i]); *(d32++) = ntohl(ctx->E[i]); *(d32++) = ntohl(ctx->F[i]); *(d32++) = ntohl(ctx->G[i]); *d32 = ntohl(ctx->H[i]); } } void SHA256_MB_Final8(uint8_t *digest[8], SHA256_MB_CTX *ctx) { size_t n = ctx->num; uint8_t i; for (i=0; i<8; i++) { uint8_t *d = (uint8_t*)ctx->data[i]; *(d+n) = 0x80; } n++; for (i=0; i<8; i++) memset(((uint8_t*)ctx->data[i]) + n, 0, 64 - n); if (n > (64 - 8)) { n = 0; HASH_DESC hdesc[8]; for (i=0; i<8; i++) { hdesc[i].ptr = (uint8_t*)ctx->data[i]; hdesc[i].blocks = 1; } sha256_multi_block(ctx, hdesc, 2); } for (i=0; i<8; i++) { uint8_t *d = (uint8_t*)&ctx->data[i]; memset(d+n, 0, 64-8-n); d += 64 - 8; uint32_t *d32 = (uint32_t*)d; *d32 = ntohl(ctx->Nh); d += 4; d32 = (uint32_t*)d; *d32 = ntohl(ctx->Nl); } HASH_DESC hdesc[8]; for (i=0; i<8; i++) { hdesc[i].ptr = (uint8_t*)ctx->data[i]; hdesc[i].blocks = 1; } sha256_multi_block(ctx, hdesc, 2); for (i=0; i<8; i++) { uint32_t *d32 = (uint32_t*)digest[i]; *(d32++) = ntohl(ctx->A[i]); *(d32++) = ntohl(ctx->B[i]); *(d32++) = ntohl(ctx->C[i]); *(d32++) = ntohl(ctx->D[i]); *(d32++) = ntohl(ctx->E[i]); *(d32++) = ntohl(ctx->F[i]); *(d32++) = ntohl(ctx->G[i]); *d32 = ntohl(ctx->H[i]); } } void ntru_sha256_4way(uint8_t *input[4], uint16_t input_len, uint8_t *digest[4]) { SHA256_MB_CTX ctx; SHA256_MB_Init(&ctx); SHA256_MB_Update(&ctx, input, input_len); SHA256_MB_Final(digest, &ctx); } void ntru_sha256_8way(uint8_t *input[8], uint16_t input_len, uint8_t *digest[8]) { SHA256_MB_CTX ctx; SHA256_MB_Init8(&ctx); SHA256_MB_Update8(&ctx, input, input_len); SHA256_MB_Final8(digest, &ctx); } #else /* non-SSE code */ void ntru_sha1_4way(uint8_t *input[4], uint16_t input_len, uint8_t *digest[4]) { uint8_t i; for (i=0; i<4; i++) ntru_sha1(input[i], input_len, digest[i]); } void ntru_sha1_8way(uint8_t *input[8], uint16_t input_len, uint8_t *digest[8]) { uint8_t i; for (i=0; i<8; i++) ntru_sha1(input[i], input_len, digest[i]); } void ntru_sha256_4way(uint8_t *input[4], uint16_t input_len, uint8_t *digest[4]) { uint8_t i; for (i=0; i<4; i++) ntru_sha256(input[i], input_len, digest[i]); } void ntru_sha256_8way(uint8_t *input[8], uint16_t input_len, uint8_t *digest[8]) { uint8_t i; for (i=0; i<8; i++) ntru_sha256(input[i], input_len, digest[i]); } #endif /* __SSSE3__ && _LP64 */ libntru-0.5/src/hash.h000066400000000000000000000010571271556312200147270ustar00rootroot00000000000000#ifndef NTRU_HASH_H #define NTRU_HASH_H #include void ntru_sha1(uint8_t *input, uint16_t input_len, uint8_t *digest); void ntru_sha1_4way(uint8_t *input[4], uint16_t input_len, uint8_t *digest[4]); void ntru_sha1_8way(uint8_t *input[8], uint16_t input_len, uint8_t *digest[8]); void ntru_sha256(uint8_t *input, uint16_t input_len, uint8_t *digest); void ntru_sha256_4way(uint8_t *input[4], uint16_t input_len, uint8_t *digest[4]); void ntru_sha256_8way(uint8_t *input[8], uint16_t input_len, uint8_t *digest[8]); #endif /* NTRU_HASH_H */ libntru-0.5/src/hybrid.c000066400000000000000000000115451271556312200152630ustar00rootroot00000000000000#include #include #include "ntru.h" #include /***************************************************************************** * Sample code that shows how to do hybrid encryption using NTRU and AES. * *****************************************************************************/ int openssl_encr_decr(uint8_t *inbuf, int inlen, uint8_t *outbuf, int *outlen, uint8_t *key, uint8_t *iv, int do_encrypt) { EVP_CIPHER_CTX ctx; EVP_CIPHER_CTX_init(&ctx); EVP_CipherInit_ex(&ctx, EVP_aes_128_cbc(), NULL, NULL, NULL, do_encrypt); EVP_CipherInit_ex(&ctx, NULL, NULL, key, iv, do_encrypt); int retval = EVP_CipherUpdate(&ctx, outbuf, outlen, inbuf, inlen); int pad_bytes = 0; if (retval && !EVP_CipherFinal_ex(&ctx, outbuf+*outlen, &pad_bytes)) retval = 0; *outlen += pad_bytes; EVP_CIPHER_CTX_cleanup(&ctx); return retval; } /** * @brief Hybrid Encryption * * Encrypts a message of arbitrary length. * * @param msg The message to encrypt * @param msg_len length of msg * @param pub the public key to encrypt the message with * @param params the NtruEncrypt parameters to use * @param rand_ctx an initialized random number generator. See ntru_rand_init() in rand.h. * @param enc output parameter; a pointer to store the encrypted message. Must accommodate * ntru_enc_len(params)+msg_len+16 bytes. * @param enc_len output parameter; number of bytes written * @return NTRU_SUCCESS on success, or one of the NTRU_ERR_ codes on failure; 99 for OpenSSL error */ uint8_t ntru_encrypt_hybrid(uint8_t *msg, uint16_t msg_len, NtruEncPubKey *pub, const NtruEncParams *params, NtruRandContext *rand_ctx, uint8_t *enc, int *enc_len) { uint8_t key_iv[32]; /* key + iv */ if (ntru_rand_generate(key_iv, 32, rand_ctx) != NTRU_SUCCESS) return NTRU_ERR_PRNG; int retval = ntru_encrypt(key_iv, 32, pub, params, rand_ctx, enc); /* put encrypted sym key + iv at the beginning */ int outlen; if (!openssl_encr_decr(msg, msg_len, enc+ntru_enc_len(params), &outlen, key_iv, key_iv+16, 1)) /* followed by the encrypted msg */ retval = 99; *enc_len = outlen; *enc_len += ntru_enc_len(params); /* add length of encrypted sym key + iv */ memset(key_iv, 0, 32); return retval; } /** * @brief Hybrid Decryption * * Decrypts a message encrypted with ntru_encrypt_hybrid(). * * @param enc The message to decrypt * @param enc_len length of enc * @param kp a key pair that contains the public key the message was encrypted * with, and the corresponding private key * @param params the NtruEncrypt parameters the message was encrypted with * @param dec output parameter; a pointer to store the decrypted message. Must accommodate * enc_len-ntru_enc_len(params) bytes. * @param dec_len output parameter; pointer to store the length of dec * @return NTRU_SUCCESS on success, or one of the NTRU_ERR_ codes on failure; 99 for OpenSSL error */ uint8_t ntru_decrypt_hybrid(uint8_t *enc, int enc_len, NtruEncKeyPair *kp, const NtruEncParams *params, uint8_t *dec, int *dec_len) { uint8_t key_iv[32]; uint16_t key_len; uint8_t retval = ntru_decrypt(enc, kp, params, key_iv, &key_len); if (retval != NTRU_SUCCESS) return retval; if (!openssl_encr_decr(enc+ntru_enc_len(params), enc_len-ntru_enc_len(params), dec, dec_len, key_iv, key_iv+16, 0)) retval = 99; return retval; } int main(int arc, char **argv) { char plain_char[123]; strcpy(plain_char, "This text is too long to fit in a NTRU message, so we'll use " \ "symmetric encryption and then NTRU-encrypt the symmetric key."); uint8_t plain[strlen(plain_char)]; unsigned i; for (i=0; i #include #include "idxgen.h" #include "ntru_endian.h" void ntru_IGF_init(uint8_t *seed, uint16_t seed_len, const NtruEncParams *params, NtruIGFState *s) { s->Z = seed; s->zlen = seed_len; s->N = params->N; s->c = params->c; s->rnd_thresh = (1<c) - (1<c)%s->N; s->hlen = params->hlen; s->rem_len = params->min_calls_r * 8 * s->hlen; s->hash = params->hash; s->hash_4way = params->hash_4way; s->hash_8way = params->hash_8way; s->counter = 0; s->buf.num_bytes = 0; s->buf.last_byte_bits = 0; while (s->counter < params->min_calls_r-7) { uint8_t H_arr[8][NTRU_MAX_HASH_LEN]; uint16_t inp_len = s->zlen + sizeof s->counter; uint8_t j; uint8_t hash_inp_arr[8][inp_len]; uint8_t *hash_inp[8]; for (j=0; j<8; j++) { memcpy(&hash_inp_arr[j], (uint8_t*)s->Z, s->zlen); uint16_t counter_endian = htole16(s->counter); memcpy((uint8_t*)&hash_inp_arr[j] + s->zlen, &counter_endian, sizeof s->counter); hash_inp[j] = hash_inp_arr[j]; s->counter++; } uint8_t *H[8]; for (j=0; j<8; j++) H[j] = H_arr[j]; s->hash_8way(hash_inp, inp_len, H); for (j=0; j<8; j++) ntru_append(&s->buf, H[j], s->hlen); } while (s->counter < params->min_calls_r-3) { uint8_t H_arr[4][NTRU_MAX_HASH_LEN]; uint16_t inp_len = s->zlen + sizeof s->counter; uint8_t j; uint8_t hash_inp_arr[4][inp_len]; uint8_t *hash_inp[4]; for (j=0; j<4; j++) { memcpy(&hash_inp_arr[j], (uint8_t*)s->Z, s->zlen); uint16_t counter_endian = htole16(s->counter); memcpy((uint8_t*)&hash_inp_arr[j] + s->zlen, &counter_endian, sizeof s->counter); hash_inp[j] = hash_inp_arr[j]; s->counter++; } uint8_t *H[4]; for (j=0; j<4; j++) H[j] = H_arr[j]; s->hash_4way(hash_inp, inp_len, H); for (j=0; j<4; j++) ntru_append(&s->buf, H[j], s->hlen); } while (s->counter < params->min_calls_r) { uint8_t H[NTRU_MAX_HASH_LEN]; uint16_t inp_len = s->zlen + sizeof s->counter; uint8_t hash_inp[inp_len]; memcpy(&hash_inp, (uint8_t*)s->Z, s->zlen); uint16_t counter_endian = htole16(s->counter); memcpy((uint8_t*)&hash_inp + s->zlen, &counter_endian, sizeof s->counter); s->hash((uint8_t*)&hash_inp, inp_len, (uint8_t*)&H); ntru_append(&s->buf, (uint8_t*)&H, s->hlen); s->counter++; } } void ntru_IGF_next(NtruIGFState *s, uint16_t *i) { uint16_t N = s-> N; uint16_t c = s-> c; uint8_t H[NTRU_MAX_HASH_LEN]; for (;;) { if (s->rem_len < c) { NtruBitStr M; ntru_trailing(&s->buf, s->rem_len, &M); uint16_t tmp_len = c - s->rem_len; uint16_t c_thresh = s->counter + (tmp_len+s->hlen-1) / s->hlen; while (s->counter < c_thresh) { uint16_t inp_len = s->zlen + sizeof s->counter; uint8_t hash_inp[inp_len]; memcpy(&hash_inp, (uint8_t*)s->Z, s->zlen); uint16_t counter_endian = htole16(s->counter); memcpy((uint8_t*)&hash_inp + s->zlen, &counter_endian, sizeof s->counter); s->hash((uint8_t*)&hash_inp, inp_len, (uint8_t*)&H); ntru_append(&M, (uint8_t*)&H, s->hlen); s->counter++; s->rem_len += 8 * s->hlen; } s->buf = M; } *i = ntru_leading(&s->buf, c); /* assume c<32 */ ntru_truncate(&s->buf, c); s->rem_len -= c; if (*i < s->rnd_thresh) { /* if (*i < (1<= N) *i -= N; return; } } } libntru-0.5/src/idxgen.h000066400000000000000000000020751271556312200152630ustar00rootroot00000000000000#ifndef NTRU_IDXGEN_H #define NTRU_IDXGEN_H #include #include "encparams.h" #include "bitstring.h" typedef struct NtruIGFState { uint16_t N; uint16_t c; uint16_t rnd_thresh; /* value below which random numbers are accepted */ uint8_t *Z; uint16_t zlen; uint16_t rem_len; NtruBitStr buf; uint16_t counter; void (*hash)(uint8_t[], uint16_t, uint8_t[]); void (*hash_4way)(uint8_t*[4], uint16_t, uint8_t*[4]); void (*hash_8way)(uint8_t*[8], uint16_t, uint8_t*[8]); uint16_t hlen; } NtruIGFState; /** * @brief IGF initialization * * Initializes the Index Generation Function. * Based on IGF-2 from IEEE P1363.1 section 8.4.2.1. * * @param seed * @param seed_len * @param params * @param s */ void ntru_IGF_init(uint8_t *seed, uint16_t seed_len, const NtruEncParams *params, NtruIGFState *s); /** * @brief IGF next index * * Returns the next index. * Based on IGF-2 from IEEE P1363.1 section 8.4.2.1. * * @param s * @param i */ void ntru_IGF_next(NtruIGFState *s, uint16_t *i); #endif /* NTRU_IDXGEN_H */ libntru-0.5/src/key.c000066400000000000000000000175051271556312200145740ustar00rootroot00000000000000#include #include #ifdef WIN32 #include #else #include #endif #include "key.h" #include "poly.h" #include "encparams.h" #include "types.h" #include "arith.h" #include "err.h" void ntru_export_pub(NtruEncPubKey *key, uint8_t *arr) { /* write N */ uint16_t N_endian = htons(key->h.N); memcpy(arr, &N_endian, sizeof N_endian); arr += sizeof N_endian; /* write q */ uint16_t q_endian = htons(key->q); memcpy(arr, &q_endian, sizeof q_endian); arr += sizeof q_endian; /* write h */ ntru_to_arr_32(&key->h, key->q, arr); } uint16_t ntru_import_pub(uint8_t *arr, NtruEncPubKey *key) { uint8_t *arr_head = arr; /* read N */ uint16_t N_endian; memcpy(&N_endian, arr_head, sizeof N_endian); uint16_t N = ntohs(N_endian); key->h.N = N; /* read q */ arr_head += sizeof N_endian; uint16_t q_endian; memcpy(&q_endian, arr_head, sizeof q_endian); uint16_t q = ntohs(q_endian); key->q = q; arr_head += sizeof q_endian; /* read h */ ntru_from_arr(arr_head, N, q, &key->h); arr_head += ntru_enc_len_Nq(N, q); return arr_head - arr; } uint16_t ntru_pub_len(const NtruEncParams *params) { return 4 + ntru_enc_len(params); } uint16_t ntru_tern_to_arr(NtruTernPoly *poly, uint8_t *arr) { uint8_t *arr_head = arr; /* write #ones and #neg_ones */ uint16_t num_ones = htons(poly->num_ones); memcpy(arr_head, &num_ones, sizeof num_ones); arr_head += sizeof num_ones; uint16_t num_neg_ones = htons(poly->num_neg_ones); memcpy(arr_head, &num_neg_ones, sizeof num_neg_ones); arr_head += sizeof num_neg_ones; /* write indices of ones and negative ones */ uint8_t bits_per_idx = ntru_log2(poly->N-1) + 1; uint32_t buf = 0; uint8_t buf_size = 0; /* #bits in buf */ uint16_t i; for (i=0; inum_ones; i++) { uint16_t idx = poly->ones[i]; buf |= idx << buf_size; buf_size += bits_per_idx; while (buf_size > 8) { *arr_head = buf & 0xFF; arr_head++; buf >>= 8; buf_size -= 8; } } for (i=0; inum_neg_ones; i++) { uint16_t idx = poly->neg_ones[i]; buf |= idx << buf_size; buf_size += bits_per_idx; while (buf_size > 8) { *arr_head = buf & 0xFF; arr_head++; buf >>= 8; buf_size -= 8; } } if (buf_size > 0) { *arr_head = buf & 0xFF; arr_head++; } return arr_head - arr; } uint16_t ntru_export_priv(NtruEncPrivKey *key, uint8_t *arr) { uint8_t *arr_head = arr; uint8_t prod_flag = key->t.prod_flag; /* write N */ #ifndef NTRU_AVOID_HAMMING_WT_PATENT uint16_t N_endian = prod_flag ? htons(key->t.poly.prod.N) : htons(key->t.poly.tern.N); #else uint16_t N_endian = htons(key->t.poly.tern.N); #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ memcpy(arr_head, &N_endian, sizeof N_endian); arr_head += sizeof N_endian; /* write q */ uint16_t q_endian = htons(key->q); memcpy(arr_head, &q_endian, sizeof q_endian); arr_head += sizeof q_endian; /* write flags */ *arr_head = 3 | (prod_flag?4:0); arr_head++; /* write f1, f2, f3 */ #ifndef NTRU_AVOID_HAMMING_WT_PATENT if (prod_flag) { arr_head += ntru_tern_to_arr(&key->t.poly.prod.f1, arr_head); arr_head += ntru_tern_to_arr(&key->t.poly.prod.f2, arr_head); arr_head += ntru_tern_to_arr(&key->t.poly.prod.f3, arr_head); } else #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ arr_head += ntru_tern_to_arr(&key->t.poly.tern, arr_head); return arr_head - arr; } uint16_t ntru_tern_from_arr(uint8_t *arr, uint16_t N, NtruTernPoly *poly) { poly->N = N; uint8_t *arr_head = arr; /* read #ones and #ones */ uint16_t num_ones; memcpy(&num_ones, arr_head, sizeof num_ones); poly->num_ones = ntohs(num_ones); arr_head += sizeof num_ones; uint16_t num_neg_ones; memcpy(&num_neg_ones, arr_head, sizeof num_neg_ones); poly->num_neg_ones = ntohs(num_neg_ones); arr_head += sizeof num_neg_ones; /* read indices of ones and negative ones */ uint8_t bits_per_idx = ntru_log2(N-1) + 1; uint16_t mask = (1<num_ones; i++) { while (buf_size < bits_per_idx) { buf |= *arr_head << buf_size; arr_head++; buf_size += 8; } poly->ones[i] = buf & mask; buf >>= bits_per_idx; buf_size -= bits_per_idx; } for (i=0; inum_neg_ones; i++) { while (buf_size < bits_per_idx) { buf |= *arr_head << buf_size; arr_head++; buf_size += 8; } poly->neg_ones[i] = buf & mask; buf >>= bits_per_idx; buf_size -= bits_per_idx; } return arr_head - arr; } void ntru_import_priv(uint8_t *arr, NtruEncPrivKey *key) { /* read N */ uint16_t N; memcpy(&N, arr, sizeof N); N = ntohs(N); arr += sizeof N; /* read q */ uint16_t q; memcpy(&q, arr, sizeof q); key->q = ntohs(q); arr += sizeof q; /* read flags and check bit 2 */ uint8_t flags = *arr; key->t.prod_flag = (flags&4) != 0; arr++; #ifndef NTRU_AVOID_HAMMING_WT_PATENT if (key->t.prod_flag) { key->t.poly.prod.N = N; arr += ntru_tern_from_arr(arr, N, &key->t.poly.prod.f1); arr += ntru_tern_from_arr(arr, N, &key->t.poly.prod.f2); arr += ntru_tern_from_arr(arr, N, &key->t.poly.prod.f3); } else #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ { key->t.poly.tern.N = N; arr += ntru_tern_from_arr(arr, key->t.poly.tern.N, &key->t.poly.tern); } } uint16_t ntru_priv_len(const NtruEncParams *params) { uint8_t bits_per_idx = ntru_log2(params->N-1) + 1; if (params->prod_flag) { uint16_t poly1_len = 4 + (bits_per_idx*2*params->df1+7) / 8; uint16_t poly2_len = 4 + (bits_per_idx*2*params->df2+7) / 8; uint16_t poly3_len = 4 + (bits_per_idx*2*params->df3+7) / 8; return 5 + poly1_len + poly2_len + poly3_len; } else return 5 + 4 + (bits_per_idx*2*params->df1+7) / 8; } uint8_t ntru_params_from_key_pair(NtruEncKeyPair *kp, NtruEncParams *params) { return ntru_params_from_priv_key(&kp->priv, params); } uint8_t ntru_params_from_priv_key(NtruEncPrivKey *key, NtruEncParams *params) { if (!key || !params) return NTRU_ERR_NULL_ARG; size_t i = 0; struct NtruEncParams all[] = ALL_PARAM_SETS; for (i=0; it.prod_flag) { df = key->t.poly.prod.f1.num_ones; N = key->t.poly.prod.N; } else { #endif df = key->t.poly.tern.num_ones; N = key->t.poly.tern.N; #ifndef NTRU_AVOID_HAMMING_WT_PATENT } #endif if (N==all[i].N && df==all[i].df1) { strcpy(params->name, all[i].name); params->N = all[i].N; params->q = all[i].q; params->prod_flag = all[i].prod_flag; params->df1 = all[i].df1; params->df2 = all[i].df2; params->df3 = all[i].df3; params->dm0 = all[i].dm0; params->db = all[i].db; params->c = all[i].c; params->min_calls_r = all[i].min_calls_r; params->min_calls_mask = all[i].min_calls_mask; params->hash_seed = all[i].hash_seed; memcpy(params->oid, all[i].oid, sizeof(all[i].oid)); params->hash = all[i].hash; params->hlen = all[i].hlen; params->pklen = all[i].pklen; return NTRU_SUCCESS; } } return NTRU_ERR_UNKNOWN_PARAM_SET; } libntru-0.5/src/key.h000066400000000000000000000011461271556312200145730ustar00rootroot00000000000000#ifndef NTRU_KEY_H #define NTRU_KEY_H #include "types.h" #include "encparams.h" void ntru_export_pub(NtruEncPubKey *key, uint8_t *arr); uint16_t ntru_import_pub(uint8_t *arr, NtruEncPubKey *key); uint16_t ntru_export_priv(NtruEncPrivKey *key, uint8_t *arr); void ntru_import_priv(uint8_t *arr, NtruEncPrivKey *key); uint16_t ntru_pub_len(const NtruEncParams *params); uint16_t ntru_priv_len(const NtruEncParams *params); uint8_t ntru_params_from_key_pair(NtruEncKeyPair *kp, NtruEncParams *params); uint8_t ntru_params_from_priv_key(NtruEncPrivKey *key, NtruEncParams *params); #endif /* NTRU_KEY_H */ libntru-0.5/src/md_helper.c000066400000000000000000000240521271556312200157360ustar00rootroot00000000000000/* $Id: md_helper.c 216 2010-06-08 09:46:57Z tp $ */ /* * This file contains some functions which implement the external data * handling and padding for Merkle-Damgard hash functions which follow * the conventions set out by MD4 (little-endian) or SHA-1 (big-endian). * * API: this file is meant to be included, not compiled as a stand-alone * file. Some macros must be defined: * RFUN name for the round function * HASH "short name" for the hash function * BE32 defined for big-endian, 32-bit based (e.g. SHA-1) * LE32 defined for little-endian, 32-bit based (e.g. MD5) * BE64 defined for big-endian, 64-bit based (e.g. SHA-512) * LE64 defined for little-endian, 64-bit based (no example yet) * PW01 if defined, append 0x01 instead of 0x80 (for Tiger) * BLEN if defined, length of a message block (in bytes) * PLW1 if defined, length is defined on one 64-bit word only (for Tiger) * PLW4 if defined, length is defined on four 64-bit words (for WHIRLPOOL) * SVAL if defined, reference to the context state information * * BLEN is used when a message block is not 16 (32-bit or 64-bit) words: * this is used for instance for Tiger, which works on 64-bit words but * uses 512-bit message blocks (eight 64-bit words). PLW1 and PLW4 are * ignored if 32-bit words are used; if 64-bit words are used and PLW1 is * set, then only one word (64 bits) will be used to encode the input * message length (in bits), otherwise two words will be used (as in * SHA-384 and SHA-512). If 64-bit words are used and PLW4 is defined (but * not PLW1), four 64-bit words will be used to encode the message length * (in bits). Note that regardless of those settings, only 64-bit message * lengths are supported (in bits): messages longer than 2 Exabytes will be * improperly hashed (this is unlikely to happen soon: 2 Exabytes is about * 2 millions Terabytes, which is huge). * * If CLOSE_ONLY is defined, then this file defines only the sph_XXX_close() * function. This is used for Tiger2, which is identical to Tiger except * when it comes to the padding (Tiger2 uses the standard 0x80 byte instead * of the 0x01 from original Tiger). * * The RFUN function is invoked with two arguments, the first pointing to * aligned data (as a "const void *"), the second being state information * from the context structure. By default, this state information is the * "val" field from the context, and this field is assumed to be an array * of words ("sph_u32" or "sph_u64", depending on BE32/LE32/BE64/LE64). * from the context structure. The "val" field can have any type, except * for the output encoding which assumes that it is an array of "sph_u32" * values. By defining NO_OUTPUT, this last step is deactivated; the * includer code is then responsible for writing out the hash result. When * NO_OUTPUT is defined, the third parameter to the "close()" function is * ignored. * * ==========================(LICENSE BEGIN)============================ * * Copyright (c) 2007-2010 Projet RNRT SAPHIR * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * ===========================(LICENSE END)============================= * * @author Thomas Pornin */ #ifdef _MSC_VER #pragma warning (disable: 4146) #endif #undef SPH_XCAT #define SPH_XCAT(a, b) SPH_XCAT_(a, b) #undef SPH_XCAT_ #define SPH_XCAT_(a, b) a ## b #undef SPH_BLEN #undef SPH_WLEN #if defined BE64 || defined LE64 #define SPH_BLEN 128U #define SPH_WLEN 8U #else #define SPH_BLEN 64U #define SPH_WLEN 4U #endif #ifdef BLEN #undef SPH_BLEN #define SPH_BLEN BLEN #endif #undef SPH_MAXPAD #if defined PLW1 #define SPH_MAXPAD (SPH_BLEN - SPH_WLEN) #elif defined PLW4 #define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 2)) #else #define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 1)) #endif #undef SPH_VAL #undef SPH_NO_OUTPUT #ifdef SVAL #define SPH_VAL SVAL #define SPH_NO_OUTPUT 1 #else #define SPH_VAL sc->val #endif #ifndef CLOSE_ONLY #ifdef SPH_UPTR static void SPH_XCAT(HASH, _short)(void *cc, const void *data, size_t len) #else void SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len) #endif { SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc; unsigned current; sc = cc; #if SPH_64 current = (unsigned)sc->count & (SPH_BLEN - 1U); #else current = (unsigned)sc->count_low & (SPH_BLEN - 1U); #endif while (len > 0) { unsigned clen; #if !SPH_64 sph_u32 clow, clow2; #endif clen = SPH_BLEN - current; if (clen > len) clen = len; memcpy(sc->buf + current, data, clen); data = (const unsigned char *)data + clen; current += clen; len -= clen; if (current == SPH_BLEN) { RFUN(sc->buf, SPH_VAL); current = 0; } #if SPH_64 sc->count += clen; #else clow = sc->count_low; clow2 = SPH_T32(clow + clen); sc->count_low = clow2; if (clow2 < clow) sc->count_high ++; #endif } } #ifdef SPH_UPTR void SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len) { SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc; unsigned current; size_t orig_len; #if !SPH_64 sph_u32 clow, clow2; #endif if (len < (2 * SPH_BLEN)) { SPH_XCAT(HASH, _short)(cc, data, len); return; } sc = cc; #if SPH_64 current = (unsigned)sc->count & (SPH_BLEN - 1U); #else current = (unsigned)sc->count_low & (SPH_BLEN - 1U); #endif if (current > 0) { unsigned t; t = SPH_BLEN - current; SPH_XCAT(HASH, _short)(cc, data, t); data = (const unsigned char *)data + t; len -= t; } #if !SPH_UNALIGNED if (((SPH_UPTR)data & (SPH_WLEN - 1U)) != 0) { SPH_XCAT(HASH, _short)(cc, data, len); return; } #endif orig_len = len; while (len >= SPH_BLEN) { RFUN(data, SPH_VAL); len -= SPH_BLEN; data = (const unsigned char *)data + SPH_BLEN; } if (len > 0) memcpy(sc->buf, data, len); #if SPH_64 sc->count += (sph_u64)orig_len; #else clow = sc->count_low; clow2 = SPH_T32(clow + orig_len); sc->count_low = clow2; if (clow2 < clow) sc->count_high ++; /* * This code handles the improbable situation where "size_t" is * greater than 32 bits, and yet we do not have a 64-bit type. */ orig_len >>= 12; orig_len >>= 10; orig_len >>= 10; sc->count_high += orig_len; #endif } #endif #endif /* * Perform padding and produce result. The context is NOT reinitialized * by this function. */ static void SPH_XCAT(HASH, _addbits_and_close)(void *cc, unsigned ub, unsigned n, void *dst, unsigned rnum) { SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc; unsigned current, u; #if !SPH_64 sph_u32 low, high; #endif sc = cc; #if SPH_64 current = (unsigned)sc->count & (SPH_BLEN - 1U); #else current = (unsigned)sc->count_low & (SPH_BLEN - 1U); #endif #ifdef PW01 sc->buf[current ++] = (0x100 | (ub & 0xFF)) >> (8 - n); #else { unsigned z; z = 0x80 >> n; sc->buf[current ++] = ((ub & -z) | z) & 0xFF; } #endif if (current > SPH_MAXPAD) { memset(sc->buf + current, 0, SPH_BLEN - current); RFUN(sc->buf, SPH_VAL); memset(sc->buf, 0, SPH_MAXPAD); } else { memset(sc->buf + current, 0, SPH_MAXPAD - current); } #if defined BE64 #if defined PLW1 sph_enc64be_aligned(sc->buf + SPH_MAXPAD, SPH_T64(sc->count << 3) + (sph_u64)n); #elif defined PLW4 memset(sc->buf + SPH_MAXPAD, 0, 2 * SPH_WLEN); sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN, sc->count >> 61); sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 3 * SPH_WLEN, SPH_T64(sc->count << 3) + (sph_u64)n); #else sph_enc64be_aligned(sc->buf + SPH_MAXPAD, sc->count >> 61); sph_enc64be_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, SPH_T64(sc->count << 3) + (sph_u64)n); #endif #elif defined LE64 #if defined PLW1 sph_enc64le_aligned(sc->buf + SPH_MAXPAD, SPH_T64(sc->count << 3) + (sph_u64)n); #elif defined PLW1 sph_enc64le_aligned(sc->buf + SPH_MAXPAD, SPH_T64(sc->count << 3) + (sph_u64)n); sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61); memset(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN, 0, 2 * SPH_WLEN); #else sph_enc64le_aligned(sc->buf + SPH_MAXPAD, SPH_T64(sc->count << 3) + (sph_u64)n); sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61); #endif #else #if SPH_64 #ifdef BE32 sph_enc64be_aligned(sc->buf + SPH_MAXPAD, SPH_T64(sc->count << 3) + (sph_u64)n); #else sph_enc64le_aligned(sc->buf + SPH_MAXPAD, SPH_T64(sc->count << 3) + (sph_u64)n); #endif #else low = sc->count_low; high = SPH_T32((sc->count_high << 3) | (low >> 29)); low = SPH_T32(low << 3) + (sph_u32)n; #ifdef BE32 sph_enc32be(sc->buf + SPH_MAXPAD, high); sph_enc32be(sc->buf + SPH_MAXPAD + SPH_WLEN, low); #else sph_enc32le(sc->buf + SPH_MAXPAD, low); sph_enc32le(sc->buf + SPH_MAXPAD + SPH_WLEN, high); #endif #endif #endif RFUN(sc->buf, SPH_VAL); #ifdef SPH_NO_OUTPUT (void)dst; (void)rnum; (void)u; #else for (u = 0; u < rnum; u ++) { #if defined BE64 sph_enc64be((unsigned char *)dst + 8 * u, sc->val[u]); #elif defined LE64 sph_enc64le((unsigned char *)dst + 8 * u, sc->val[u]); #elif defined BE32 sph_enc32be((unsigned char *)dst + 4 * u, sc->val[u]); #else sph_enc32le((unsigned char *)dst + 4 * u, sc->val[u]); #endif } #endif } static void SPH_XCAT(HASH, _close)(void *cc, void *dst, unsigned rnum) { SPH_XCAT(HASH, _addbits_and_close)(cc, 0, 0, dst, rnum); } libntru-0.5/src/md_helper.h000066400000000000000000000251361271556312200157470ustar00rootroot00000000000000/* $Id: md_helper.c 216 2010-06-08 09:46:57Z tp $ */ /* * This file contains some functions which implement the external data * handling and padding for Merkle-Damgard hash functions which follow * the conventions set out by MD4 (little-endian) or SHA-1 (big-endian). * * API: this file is meant to be included, not compiled as a stand-alone * file. Some macros must be defined: * RFUN name for the round function * HASH "short name" for the hash function * BE32 defined for big-endian, 32-bit based (e.g. SHA-1) * LE32 defined for little-endian, 32-bit based (e.g. MD5) * BE64 defined for big-endian, 64-bit based (e.g. SHA-512) * LE64 defined for little-endian, 64-bit based (no example yet) * PW01 if defined, append 0x01 instead of 0x80 (for Tiger) * BLEN if defined, length of a message block (in bytes) * PLW1 if defined, length is defined on one 64-bit word only (for Tiger) * PLW4 if defined, length is defined on four 64-bit words (for WHIRLPOOL) * SVAL if defined, reference to the context state information * * BLEN is used when a message block is not 16 (32-bit or 64-bit) words: * this is used for instance for Tiger, which works on 64-bit words but * uses 512-bit message blocks (eight 64-bit words). PLW1 and PLW4 are * ignored if 32-bit words are used; if 64-bit words are used and PLW1 is * set, then only one word (64 bits) will be used to encode the input * message length (in bits), otherwise two words will be used (as in * SHA-384 and SHA-512). If 64-bit words are used and PLW4 is defined (but * not PLW1), four 64-bit words will be used to encode the message length * (in bits). Note that regardless of those settings, only 64-bit message * lengths are supported (in bits): messages longer than 2 Exabytes will be * improperly hashed (this is unlikely to happen soon: 2 Exabytes is about * 2 millions Terabytes, which is huge). * * If CLOSE_ONLY is defined, then this file defines only the sph_XXX_close() * function. This is used for Tiger2, which is identical to Tiger except * when it comes to the padding (Tiger2 uses the standard 0x80 byte instead * of the 0x01 from original Tiger). * * The RFUN function is invoked with two arguments, the first pointing to * aligned data (as a "const void *"), the second being state information * from the context structure. By default, this state information is the * "val" field from the context, and this field is assumed to be an array * of words ("sph_u32" or "sph_u64", depending on BE32/LE32/BE64/LE64). * from the context structure. The "val" field can have any type, except * for the output encoding which assumes that it is an array of "sph_u32" * values. By defining NO_OUTPUT, this last step is deactivated; the * includer code is then responsible for writing out the hash result. When * NO_OUTPUT is defined, the third parameter to the "close()" function is * ignored. * * ==========================(LICENSE BEGIN)============================ * * Copyright (c) 2007-2010 Projet RNRT SAPHIR * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * ===========================(LICENSE END)============================= * * @author Thomas Pornin */ #ifdef _MSC_VER #pragma warning (disable: 4146) #endif #undef SPH_XCAT #define SPH_XCAT(a, b) SPH_XCAT_(a, b) #undef SPH_XCAT_ #define SPH_XCAT_(a, b) a ## b #undef SPH_BLEN #undef SPH_WLEN #if defined BE64 || defined LE64 #define SPH_BLEN 128U #define SPH_WLEN 8U #else #define SPH_BLEN 64U #define SPH_WLEN 4U #endif #ifdef BLEN #undef SPH_BLEN #define SPH_BLEN BLEN #endif #undef SPH_MAXPAD #if defined PLW1 #define SPH_MAXPAD (SPH_BLEN - SPH_WLEN) #elif defined PLW4 #define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 2)) #else #define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 1)) #endif #undef SPH_VAL #undef SPH_NO_OUTPUT #ifdef SVAL #define SPH_VAL SVAL #define SPH_NO_OUTPUT 1 #else #define SPH_VAL sc->val #endif #ifndef CLOSE_ONLY #ifdef SPH_UPTR static void SPH_XCAT(HASH, _short)(void *cc, const void *data, size_t len) #else void SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len) #endif { SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc; unsigned current; sc = cc; #if SPH_64 current = (unsigned)sc->count & (SPH_BLEN - 1U); #else current = (unsigned)sc->count_low & (SPH_BLEN - 1U); #endif while (len > 0) { unsigned clen; #if !SPH_64 sph_u32 clow, clow2; #endif clen = SPH_BLEN - current; if (clen > len) clen = len; memcpy(sc->buf + current, data, clen); data = (const unsigned char *)data + clen; current += clen; len -= clen; if (current == SPH_BLEN) { RFUN(sc->buf, SPH_VAL); current = 0; } #if SPH_64 sc->count += clen; #else clow = sc->count_low; clow2 = SPH_T32(clow + clen); sc->count_low = clow2; if (clow2 < clow) sc->count_high ++; #endif } } #ifdef SPH_UPTR void SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len) { SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc; unsigned current; size_t orig_len; #if !SPH_64 sph_u32 clow, clow2; #endif if (len < (2 * SPH_BLEN)) { SPH_XCAT(HASH, _short)(cc, data, len); return; } sc = cc; #if SPH_64 current = (unsigned)sc->count & (SPH_BLEN - 1U); #else current = (unsigned)sc->count_low & (SPH_BLEN - 1U); #endif if (current > 0) { unsigned t; t = SPH_BLEN - current; SPH_XCAT(HASH, _short)(cc, data, t); data = (const unsigned char *)data + t; len -= t; } #if !SPH_UNALIGNED if (((SPH_UPTR)data & (SPH_WLEN - 1U)) != 0) { SPH_XCAT(HASH, _short)(cc, data, len); return; } #endif orig_len = len; while (len >= SPH_BLEN) { RFUN(data, SPH_VAL); len -= SPH_BLEN; data = (const unsigned char *)data + SPH_BLEN; } if (len > 0) memcpy(sc->buf, data, len); #if SPH_64 sc->count += (sph_u64)orig_len; #else clow = sc->count_low; clow2 = SPH_T32(clow + orig_len); sc->count_low = clow2; if (clow2 < clow) sc->count_high ++; /* * This code handles the improbable situation where "size_t" is * greater than 32 bits, and yet we do not have a 64-bit type. */ orig_len >>= 12; orig_len >>= 10; orig_len >>= 10; sc->count_high += orig_len; #endif } #endif #endif /* * Perform padding and produce result. The context is NOT reinitialized * by this function. */ static void SPH_XCAT(HASH, _addbits_and_close)(void *cc, unsigned ub, unsigned n, void *dst, unsigned rnum) { SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc; unsigned current, u; #if !SPH_64 sph_u32 low, high; #endif sc = cc; #if SPH_64 current = (unsigned)sc->count & (SPH_BLEN - 1U); #else current = (unsigned)sc->count_low & (SPH_BLEN - 1U); #endif #ifdef PW01 sc->buf[current ++] = (0x100 | (ub & 0xFF)) >> (8 - n); #else { unsigned z; z = 0x80 >> n; sc->buf[current ++] = ((ub & -z) | z) & 0xFF; } #endif if (current > SPH_MAXPAD) { memset(sc->buf + current, 0, SPH_BLEN - current); RFUN(sc->buf, SPH_VAL); memset(sc->buf, 0, SPH_MAXPAD); } else { memset(sc->buf + current, 0, SPH_MAXPAD - current); } #if defined BE64 #if defined PLW1 sph_enc64be_aligned(sc->buf + SPH_MAXPAD, SPH_T64(sc->count << 3) + (sph_u64)n); #elif defined PLW4 memset(sc->buf + SPH_MAXPAD, 0, 2 * SPH_WLEN); sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN, sc->count >> 61); sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 3 * SPH_WLEN, SPH_T64(sc->count << 3) + (sph_u64)n); #else sph_enc64be_aligned(sc->buf + SPH_MAXPAD, sc->count >> 61); sph_enc64be_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, SPH_T64(sc->count << 3) + (sph_u64)n); #endif #elif defined LE64 #if defined PLW1 sph_enc64le_aligned(sc->buf + SPH_MAXPAD, SPH_T64(sc->count << 3) + (sph_u64)n); #elif defined PLW1 sph_enc64le_aligned(sc->buf + SPH_MAXPAD, SPH_T64(sc->count << 3) + (sph_u64)n); sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61); memset(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN, 0, 2 * SPH_WLEN); #else sph_enc64le_aligned(sc->buf + SPH_MAXPAD, SPH_T64(sc->count << 3) + (sph_u64)n); sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61); #endif #else #if SPH_64 #ifdef BE32 sph_enc64be_aligned(sc->buf + SPH_MAXPAD, SPH_T64(sc->count << 3) + (sph_u64)n); #else sph_enc64le_aligned(sc->buf + SPH_MAXPAD, SPH_T64(sc->count << 3) + (sph_u64)n); #endif #else low = sc->count_low; high = SPH_T32((sc->count_high << 3) | (low >> 29)); low = SPH_T32(low << 3) + (sph_u32)n; #ifdef BE32 sph_enc32be(sc->buf + SPH_MAXPAD, high); sph_enc32be(sc->buf + SPH_MAXPAD + SPH_WLEN, low); #else sph_enc32le(sc->buf + SPH_MAXPAD, low); sph_enc32le(sc->buf + SPH_MAXPAD + SPH_WLEN, high); #endif #endif #endif RFUN(sc->buf, SPH_VAL); #ifdef SPH_NO_OUTPUT (void)dst; (void)rnum; (void)u; #else for (u = 0; u < rnum; u ++) { #if defined BE64 sph_enc64be((unsigned char *)dst + 8 * u, sc->val[u]); #elif defined LE64 sph_enc64le((unsigned char *)dst + 8 * u, sc->val[u]); #elif defined BE32 sph_enc32be((unsigned char *)dst + 4 * u, sc->val[u]); #else sph_enc32le((unsigned char *)dst + 4 * u, sc->val[u]); #endif } #endif } static void SPH_XCAT(HASH, _close)(void *cc, void *dst, unsigned rnum) { SPH_XCAT(HASH, _addbits_and_close)(cc, 0, 0, dst, rnum); } libntru-0.5/src/mgf.c000066400000000000000000000233061271556312200145510ustar00rootroot00000000000000#include #ifdef WIN32 #include #include #else #include #endif #include "encparams.h" #include "poly.h" int16_t NTRU_MGF_TRIT_TBL[243][5] = { { 0, 0, 0, 0, 0}, { 1, 0, 0, 0, 0}, {-1, 0, 0, 0, 0}, { 0, 1, 0, 0, 0}, { 1, 1, 0, 0, 0}, {-1, 1, 0, 0, 0}, { 0, -1, 0, 0, 0}, { 1, -1, 0, 0, 0}, {-1, -1, 0, 0, 0}, { 0, 0, 1, 0, 0}, { 1, 0, 1, 0, 0}, {-1, 0, 1, 0, 0}, { 0, 1, 1, 0, 0}, { 1, 1, 1, 0, 0}, {-1, 1, 1, 0, 0}, { 0, -1, 1, 0, 0}, { 1, -1, 1, 0, 0}, {-1, -1, 1, 0, 0}, { 0, 0, -1, 0, 0}, { 1, 0, -1, 0, 0}, {-1, 0, -1, 0, 0}, { 0, 1, -1, 0, 0}, { 1, 1, -1, 0, 0}, {-1, 1, -1, 0, 0}, { 0, -1, -1, 0, 0}, { 1, -1, -1, 0, 0}, {-1, -1, -1, 0, 0}, { 0, 0, 0, 1, 0}, { 1, 0, 0, 1, 0}, {-1, 0, 0, 1, 0}, { 0, 1, 0, 1, 0}, { 1, 1, 0, 1, 0}, {-1, 1, 0, 1, 0}, { 0, -1, 0, 1, 0}, { 1, -1, 0, 1, 0}, {-1, -1, 0, 1, 0}, { 0, 0, 1, 1, 0}, { 1, 0, 1, 1, 0}, {-1, 0, 1, 1, 0}, { 0, 1, 1, 1, 0}, { 1, 1, 1, 1, 0}, {-1, 1, 1, 1, 0}, { 0, -1, 1, 1, 0}, { 1, -1, 1, 1, 0}, {-1, -1, 1, 1, 0}, { 0, 0, -1, 1, 0}, { 1, 0, -1, 1, 0}, {-1, 0, -1, 1, 0}, { 0, 1, -1, 1, 0}, { 1, 1, -1, 1, 0}, {-1, 1, -1, 1, 0}, { 0, -1, -1, 1, 0}, { 1, -1, -1, 1, 0}, {-1, -1, -1, 1, 0}, { 0, 0, 0, -1, 0}, { 1, 0, 0, -1, 0}, {-1, 0, 0, -1, 0}, { 0, 1, 0, -1, 0}, { 1, 1, 0, -1, 0}, {-1, 1, 0, -1, 0}, { 0, -1, 0, -1, 0}, { 1, -1, 0, -1, 0}, {-1, -1, 0, -1, 0}, { 0, 0, 1, -1, 0}, { 1, 0, 1, -1, 0}, {-1, 0, 1, -1, 0}, { 0, 1, 1, -1, 0}, { 1, 1, 1, -1, 0}, {-1, 1, 1, -1, 0}, { 0, -1, 1, -1, 0}, { 1, -1, 1, -1, 0}, {-1, -1, 1, -1, 0}, { 0, 0, -1, -1, 0}, { 1, 0, -1, -1, 0}, {-1, 0, -1, -1, 0}, { 0, 1, -1, -1, 0}, { 1, 1, -1, -1, 0}, {-1, 1, -1, -1, 0}, { 0, -1, -1, -1, 0}, { 1, -1, -1, -1, 0}, {-1, -1, -1, -1, 0}, { 0, 0, 0, 0, 1}, { 1, 0, 0, 0, 1}, {-1, 0, 0, 0, 1}, { 0, 1, 0, 0, 1}, { 1, 1, 0, 0, 1}, {-1, 1, 0, 0, 1}, { 0, -1, 0, 0, 1}, { 1, -1, 0, 0, 1}, {-1, -1, 0, 0, 1}, { 0, 0, 1, 0, 1}, { 1, 0, 1, 0, 1}, {-1, 0, 1, 0, 1}, { 0, 1, 1, 0, 1}, { 1, 1, 1, 0, 1}, {-1, 1, 1, 0, 1}, { 0, -1, 1, 0, 1}, { 1, -1, 1, 0, 1}, {-1, -1, 1, 0, 1}, { 0, 0, -1, 0, 1}, { 1, 0, -1, 0, 1}, {-1, 0, -1, 0, 1}, { 0, 1, -1, 0, 1}, { 1, 1, -1, 0, 1}, {-1, 1, -1, 0, 1}, { 0, -1, -1, 0, 1}, { 1, -1, -1, 0, 1}, {-1, -1, -1, 0, 1}, { 0, 0, 0, 1, 1}, { 1, 0, 0, 1, 1}, {-1, 0, 0, 1, 1}, { 0, 1, 0, 1, 1}, { 1, 1, 0, 1, 1}, {-1, 1, 0, 1, 1}, { 0, -1, 0, 1, 1}, { 1, -1, 0, 1, 1}, {-1, -1, 0, 1, 1}, { 0, 0, 1, 1, 1}, { 1, 0, 1, 1, 1}, {-1, 0, 1, 1, 1}, { 0, 1, 1, 1, 1}, { 1, 1, 1, 1, 1}, {-1, 1, 1, 1, 1}, { 0, -1, 1, 1, 1}, { 1, -1, 1, 1, 1}, {-1, -1, 1, 1, 1}, { 0, 0, -1, 1, 1}, { 1, 0, -1, 1, 1}, {-1, 0, -1, 1, 1}, { 0, 1, -1, 1, 1}, { 1, 1, -1, 1, 1}, {-1, 1, -1, 1, 1}, { 0, -1, -1, 1, 1}, { 1, -1, -1, 1, 1}, {-1, -1, -1, 1, 1}, { 0, 0, 0, -1, 1}, { 1, 0, 0, -1, 1}, {-1, 0, 0, -1, 1}, { 0, 1, 0, -1, 1}, { 1, 1, 0, -1, 1}, {-1, 1, 0, -1, 1}, { 0, -1, 0, -1, 1}, { 1, -1, 0, -1, 1}, {-1, -1, 0, -1, 1}, { 0, 0, 1, -1, 1}, { 1, 0, 1, -1, 1}, {-1, 0, 1, -1, 1}, { 0, 1, 1, -1, 1}, { 1, 1, 1, -1, 1}, {-1, 1, 1, -1, 1}, { 0, -1, 1, -1, 1}, { 1, -1, 1, -1, 1}, {-1, -1, 1, -1, 1}, { 0, 0, -1, -1, 1}, { 1, 0, -1, -1, 1}, {-1, 0, -1, -1, 1}, { 0, 1, -1, -1, 1}, { 1, 1, -1, -1, 1}, {-1, 1, -1, -1, 1}, { 0, -1, -1, -1, 1}, { 1, -1, -1, -1, 1}, {-1, -1, -1, -1, 1}, { 0, 0, 0, 0, -1}, { 1, 0, 0, 0, -1}, {-1, 0, 0, 0, -1}, { 0, 1, 0, 0, -1}, { 1, 1, 0, 0, -1}, {-1, 1, 0, 0, -1}, { 0, -1, 0, 0, -1}, { 1, -1, 0, 0, -1}, {-1, -1, 0, 0, -1}, { 0, 0, 1, 0, -1}, { 1, 0, 1, 0, -1}, {-1, 0, 1, 0, -1}, { 0, 1, 1, 0, -1}, { 1, 1, 1, 0, -1}, {-1, 1, 1, 0, -1}, { 0, -1, 1, 0, -1}, { 1, -1, 1, 0, -1}, {-1, -1, 1, 0, -1}, { 0, 0, -1, 0, -1}, { 1, 0, -1, 0, -1}, {-1, 0, -1, 0, -1}, { 0, 1, -1, 0, -1}, { 1, 1, -1, 0, -1}, {-1, 1, -1, 0, -1}, { 0, -1, -1, 0, -1}, { 1, -1, -1, 0, -1}, {-1, -1, -1, 0, -1}, { 0, 0, 0, 1, -1}, { 1, 0, 0, 1, -1}, {-1, 0, 0, 1, -1}, { 0, 1, 0, 1, -1}, { 1, 1, 0, 1, -1}, {-1, 1, 0, 1, -1}, { 0, -1, 0, 1, -1}, { 1, -1, 0, 1, -1}, {-1, -1, 0, 1, -1}, { 0, 0, 1, 1, -1}, { 1, 0, 1, 1, -1}, {-1, 0, 1, 1, -1}, { 0, 1, 1, 1, -1}, { 1, 1, 1, 1, -1}, {-1, 1, 1, 1, -1}, { 0, -1, 1, 1, -1}, { 1, -1, 1, 1, -1}, {-1, -1, 1, 1, -1}, { 0, 0, -1, 1, -1}, { 1, 0, -1, 1, -1}, {-1, 0, -1, 1, -1}, { 0, 1, -1, 1, -1}, { 1, 1, -1, 1, -1}, {-1, 1, -1, 1, -1}, { 0, -1, -1, 1, -1}, { 1, -1, -1, 1, -1}, {-1, -1, -1, 1, -1}, { 0, 0, 0, -1, -1}, { 1, 0, 0, -1, -1}, {-1, 0, 0, -1, -1}, { 0, 1, 0, -1, -1}, { 1, 1, 0, -1, -1}, {-1, 1, 0, -1, -1}, { 0, -1, 0, -1, -1}, { 1, -1, 0, -1, -1}, {-1, -1, 0, -1, -1}, { 0, 0, 1, -1, -1}, { 1, 0, 1, -1, -1}, {-1, 0, 1, -1, -1}, { 0, 1, 1, -1, -1}, { 1, 1, 1, -1, -1}, {-1, 1, 1, -1, -1}, { 0, -1, 1, -1, -1}, { 1, -1, 1, -1, -1}, {-1, -1, 1, -1, -1}, { 0, 0, -1, -1, -1}, { 1, 0, -1, -1, -1}, {-1, 0, -1, -1, -1}, { 0, 1, -1, -1, -1}, { 1, 1, -1, -1, -1}, {-1, 1, -1, -1, -1}, { 0, -1, -1, -1, -1}, { 1, -1, -1, -1, -1}, {-1, -1, -1, -1, -1} }; void ntru_MGF(uint8_t *seed, uint16_t seed_len, const NtruEncParams *params, NtruIntPoly *i) { uint16_t N = params->N; i->N = N; uint16_t min_calls_mask = params->min_calls_mask; uint16_t hlen = params->hlen; uint8_t buf[min_calls_mask * hlen]; uint16_t buf_len = 0; uint8_t Z[hlen]; params->hash(seed, seed_len, (uint8_t*)&Z); /* hashSeed is always true */ uint16_t counter = 0; uint8_t H[hlen]; uint16_t inp_len = hlen + sizeof counter; uint8_t hash_inp[inp_len]; while (counter < min_calls_mask-7) { uint8_t H_arr[8][NTRU_MAX_HASH_LEN]; uint8_t j; uint8_t hash_inp_arr[8][inp_len]; uint8_t *hash_inp[8]; for (j=0; j<8; j++) { uint16_t counter_endian = htons(counter); /* convert to network byte order */ memcpy(&hash_inp_arr[j], Z, sizeof Z); memcpy((uint8_t*)&hash_inp_arr[j] + sizeof Z, &counter_endian, sizeof counter_endian); hash_inp[j] = hash_inp_arr[j]; counter++; } uint8_t *H[8]; for (j=0; j<8; j++) H[j] = H_arr[j]; params->hash_8way(hash_inp, inp_len, H); uint16_t i; for (j=0; j<8; j++) for (i=0; ihash_4way(hash_inp, inp_len, H); uint16_t i; for (j=0; j<4; j++) for (i=0; ihash((uint8_t*)&hash_inp, inp_len, (uint8_t*)&H); uint16_t i; for (i=0; icoeffs[cur], NTRU_MGF_TRIT_TBL[O], 10); /* copy 5 uint16_t's */ cur += 5; if (cur >= N) return; } if (cur >= N) return; memcpy(&hash_inp, Z, sizeof Z); memcpy((uint8_t*)&hash_inp + hlen, &counter, sizeof counter); params->hash((uint8_t*)&hash_inp, inp_len, (uint8_t*)&H); memcpy(&buf, &H, hlen); buf_len = hlen; } } libntru-0.5/src/mgf.h000066400000000000000000000007601271556312200145550ustar00rootroot00000000000000#ifndef NTRU_MGF_H #define NTRU_MGF_H #include /** * @brief Mask Generation Function * * An implementation of MGF-TP-1 from P1363.1 section 8.4.1.1. * * @param seed seed for the deterministic random number generator * @param seed_len length of seed * @param params NTRUEncrypt parameters * @param i output parameter: the generated ternary polynomial */ void ntru_MGF(uint8_t *seed, uint16_t seed_len, const NtruEncParams *params, NtruIntPoly *i); #endif /* NTRU_MGF_H */ libntru-0.5/src/nist_aes_rijndael.h000066400000000000000000000036201271556312200174570ustar00rootroot00000000000000/* * Copyright (c) 2007 Henric Jungheim * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* * Interface adapter for Rijndael implementation (for use by NIST SP 800-90 CTR_DRBG) */ #ifndef NIST_AES_RIJNDAEL_H #define NIST_AES_RIJNDAEL_H #ifndef __RIJNDAEL_H #include "rijndael.h" #endif #define NIST_AES_MAXKEYBITS 256 #define NIST_AES_MAXKEYBYTES (NIST_AES_MAXKEYBITS / 8) #define NIST_AES_MAXKEYINTS (NIST_AES_MAXKEYBYTES / sizeof(int)) #define NIST_AES_BLOCKSIZEBITS 128 #define NIST_AES_BLOCKSIZEBYTES (NIST_AES_BLOCKSIZEBITS / 8) #define NIST_AES_BLOCKSIZEINTS (NIST_AES_BLOCKSIZEBYTES / sizeof(int)) typedef struct { int Nr; /* key-length-dependent number of rounds */ unsigned int ek[4*(AES_MAXROUNDS + 1)]; /* encrypt key schedule */ } NIST_AES_ENCRYPT_CTX; static __inline void NIST_AES_ECB_Encrypt(const NIST_AES_ENCRYPT_CTX* ctx, const void* src, void* dst) { rijndaelEncrypt(ctx->ek, ctx->Nr, (const unsigned char *)src, (unsigned char *)dst); } static __inline int NIST_AES_Schedule_Encryption(NIST_AES_ENCRYPT_CTX* ctx, const void* key, int bits) { ctx->Nr = rijndaelKeySetupEnc(ctx->ek, (const unsigned char *)key, bits); if (!ctx->Nr) return 1; return 0; } #endif /* NIST_AES_RIJNDAEL_H */ libntru-0.5/src/nist_config.h000066400000000000000000000024531271556312200163070ustar00rootroot00000000000000/* * Copyright (c) 2007 Henric Jungheim * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* * NIST SP 800-90 Configuration (Random Number Generator) */ #ifndef NIST_CONFIG_H #define NIST_CONFIG_H #define NIST_IS_LITTLE_ENDIAN 1 /* #define NIST_ZEROIZE 1 */ #if 0 /* Use the VIA padlock hardware as the AES implementation */ #ifndef NIST_AES_PADLOCK_H_ #include "nist_aes_padlock.h" #endif #else #ifndef NIST_AES_RIJNDAEL_H_ #include "nist_aes_rijndael.h" #endif #endif /* Use AES-256 as the block cipher */ #ifndef NIST_CTR_DRBG_AES256_H #include "nist_ctr_drbg_aes256.h" #endif #endif /* NIST_CONFIG_H */ libntru-0.5/src/nist_ctr_drbg.c000066400000000000000000000363211271556312200166240ustar00rootroot00000000000000/* * Copyright (c) 2007 Henric Jungheim * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* * NIST SP 800-90 CTR_DRBG (Random Number Generator) */ #include "nist_ctr_drbg.h" #include #include #include #include /* * NIST SP 800-90 March 2007 * 10.4.2 Derivation Function Using a Block Cipher Algorithm * Global Constants */ static NIST_Key nist_cipher_df_ctx; static unsigned char nist_cipher_df_encrypted_iv[NIST_BLOCK_SEEDLEN / NIST_BLOCK_OUTLEN][NIST_BLOCK_OUTLEN_BYTES]; /* * NIST SP 800-90 March 2007 * 10.2.1.3.2 The Process Steps for Instantiation When a Derivation * Function is Used * Global Constants */ static NIST_Key nist_cipher_zero_ctx; /* * NIST SP 800-90 March 2007 * 10.2.1.5.2 The Process Steps for Generating Pseudorandom Bits When a * Derivation Function is Used for the DRBG Implementation * Global Constants */ static const unsigned int nist_ctr_drgb_generate_null_input[NIST_BLOCK_SEEDLEN_INTS] = { 0 }; /* * Utility */ /* * nist_increment_block * Increment the output block as a big-endian number. */ static void nist_increment_block(unsigned int* V) { int i; unsigned int x; for (i = NIST_BLOCK_OUTLEN_INTS - 1; i >= 0; --i) { x = NIST_NTOHL(V[i]) + 1; V[i] = NIST_HTONL(x); if (x) /* There was only a carry if we are zero */ return; } } /* * NIST SP 800-90 March 2007 * 10.4.3 BCC Function */ static void nist_ctr_drbg_bcc_update(const NIST_Key* ctx, const unsigned int* data, int n, unsigned int *chaining_value) { int i, j; unsigned int input_block[NIST_BLOCK_OUTLEN_INTS]; /* [4] for i = 1 to n */ for (i = 0; i < n; ++i) { /* [4.1] input_block = chaining_value XOR block_i */ for (j = 0; j < (int)NIST_BLOCK_OUTLEN_INTS; ++j) input_block[j] = chaining_value[j] ^ *data++; /* [4.2] chaining_value = Block_Encrypt(Key, input_block) */ Block_Encrypt(ctx, &input_block[0], &chaining_value[0]); } /* [5] output_block = chaining_value */ /* chaining_value already is output_block, so no copy is required */ } static void nist_ctr_drbg_bcc(NIST_Key* ctx, const unsigned int* data, int n, unsigned int *output_block) { unsigned int* chaining_value = output_block; /* [1] chaining_value = 0^outlen */ memset(&chaining_value[0], 0, NIST_BLOCK_OUTLEN_BYTES); nist_ctr_drbg_bcc_update(ctx, data, n, output_block); } /* * NIST SP 800-90 March 2007 * 10.4.2 Derivation Function Using a Block Cipher Algorithm */ typedef struct { int index; unsigned char S[NIST_BLOCK_OUTLEN_BYTES]; } NIST_CTR_DRBG_DF_BCC_CTX; static __inline int check_int_alignment(const void* p) { /* * It would be great if "intptr_t" could be found in * some standard place. */ ptrdiff_t ip = (const char *)p - (const char *)0; if (ip & (sizeof(int) - 1)) return 0; return 1; } static void nist_ctr_drbg_df_bcc_init(NIST_CTR_DRBG_DF_BCC_CTX* ctx, int L, int N) { unsigned int* S = (unsigned int *)ctx->S; /* [4] S = L || N || input_string || 0x80 */ S[0] = NIST_HTONL(L); S[1] = NIST_HTONL(N); ctx->index = 2 * sizeof(S[0]); } static void nist_ctr_drbg_df_bcc_update(NIST_CTR_DRBG_DF_BCC_CTX* ctx, const char* input_string, int input_string_length, unsigned int* temp) { int i, len; int index = ctx->index; unsigned char* S = ctx->S; if (index) { assert(index < NIST_BLOCK_OUTLEN_BYTES); len = NIST_BLOCK_OUTLEN_BYTES - index; if (input_string_length < len) len = input_string_length; memcpy(&S[index], input_string, len); index += len; input_string += len; input_string_length -= len; if (index < NIST_BLOCK_OUTLEN_BYTES) { ctx->index = index; return; } /* We have a full block in S, so let's process it */ /* [9.2] BCC */ nist_ctr_drbg_bcc_update(&nist_cipher_df_ctx, (unsigned int *)&S[0], 1, temp); index = 0; } /* ctx->S is empty, so let's handle as many input blocks as we can */ len = input_string_length / NIST_BLOCK_OUTLEN_BYTES; if (len > 0) { if (check_int_alignment(input_string)) { /* [9.2] BCC */ nist_ctr_drbg_bcc_update(&nist_cipher_df_ctx, (const unsigned int *)input_string, len, temp); input_string += len * NIST_BLOCK_OUTLEN_BYTES; input_string_length -= len * NIST_BLOCK_OUTLEN_BYTES; } else { for (i = 0; i < len; ++i) { memcpy(&S[0], input_string, NIST_BLOCK_OUTLEN_BYTES); /* [9.2] BCC */ nist_ctr_drbg_bcc_update(&nist_cipher_df_ctx, (unsigned int *)&S[0], 1, temp); input_string += NIST_BLOCK_OUTLEN_BYTES; input_string_length -= NIST_BLOCK_OUTLEN_BYTES; } } } assert(input_string_length < NIST_BLOCK_OUTLEN_BYTES); if (input_string_length) { memcpy(&S[0], input_string, input_string_length); index = input_string_length; } ctx->index = index; } static void nist_ctr_drbg_df_bcc_final(NIST_CTR_DRBG_DF_BCC_CTX* ctx, unsigned int* temp) { int index; unsigned char* S = ctx->S; static const char endmark[] = { 0x80 }; nist_ctr_drbg_df_bcc_update(ctx, endmark, sizeof(endmark), temp); index = ctx->index; if (index) { memset(&S[index], 0, NIST_BLOCK_OUTLEN_BYTES - index); /* [9.2] BCC */ nist_ctr_drbg_bcc_update(&nist_cipher_df_ctx, (unsigned int *)&S[0], 1, temp); } } static int nist_ctr_drbg_block_cipher_df(const char* input_string[], unsigned int L[], int input_string_count, unsigned char* output_string, unsigned int N) { int j, k, blocks, sum_L; unsigned int *temp; unsigned int *X; NIST_Key ctx; NIST_CTR_DRBG_DF_BCC_CTX df_bcc_ctx; unsigned int buffer[NIST_BLOCK_SEEDLEN_INTS]; /* * NIST SP 800-90 March 2007 10.4.2 states that 512 bits is * the maximum length for the approved block cipher algorithms. */ unsigned int output_buffer[512 / 8 / sizeof(unsigned int)]; if (N > sizeof(output_buffer) || N < 1) return 1; sum_L = 0; for (j = 0; j < input_string_count; ++j) sum_L += L[j]; /* [6] temp = Null string */ temp = buffer; /* [9] while len(temp) < keylen + outlen, do */ for (j = 0; j < NIST_BLOCK_SEEDLEN / NIST_BLOCK_OUTLEN; ++j) { /* [9.2] temp = temp || BCC(K, (IV || S)) */ /* Since we have precomputed BCC(K, IV), we start with that... */ memcpy(&temp[0], &nist_cipher_df_encrypted_iv[j][0], NIST_BLOCK_OUTLEN_BYTES); nist_ctr_drbg_df_bcc_init(&df_bcc_ctx, sum_L, N); /* Compute the rest of BCC(K, (IV || S)) */ for (k = 0; k < input_string_count; ++k) nist_ctr_drbg_df_bcc_update(&df_bcc_ctx, input_string[k], L[k], temp); nist_ctr_drbg_df_bcc_final(&df_bcc_ctx, temp); temp += NIST_BLOCK_OUTLEN_INTS; } nist_zeroize(&df_bcc_ctx, sizeof(df_bcc_ctx)); /* [6] temp = Null string */ temp = buffer; /* [10] K = Leftmost keylen bits of temp */ Block_Schedule_Encryption(&ctx, &temp[0]); /* [11] X = next outlen bits of temp */ X = &temp[NIST_BLOCK_KEYLEN_INTS]; /* [12] temp = Null string */ temp = output_buffer; /* [13] While len(temp) < number_of_bits_to_return, do */ blocks = (int)(N / NIST_BLOCK_OUTLEN_BYTES); if (N & (NIST_BLOCK_OUTLEN_BYTES - 1)) ++blocks; for (j = 0; j < blocks; ++j) { /* [13.1] X = Block_Encrypt(K, X) */ Block_Encrypt(&ctx, X, temp); X = temp; temp += NIST_BLOCK_OUTLEN_INTS; } /* [14] requested_bits = Leftmost number_of_bits_to_return of temp */ memcpy(output_string, output_buffer, N); nist_zeroize(&ctx, sizeof(ctx)); return 0; } static int nist_ctr_drbg_block_cipher_df_initialize() { int i, err; unsigned char K[NIST_BLOCK_KEYLEN_BYTES]; unsigned int IV[NIST_BLOCK_OUTLEN_INTS]; /* [8] K = Leftmost keylen bits of 0x00010203 ... 1D1E1F */ for (i = 0; i < (int)sizeof(K); ++i) K[i] = (unsigned char)i; err = Block_Schedule_Encryption(&nist_cipher_df_ctx, K); if (err) return err; /* * Precompute the partial BCC result from encrypting the IVs: * nist_cipher_df_encrypted_iv[i] = BCC(K, IV(i)) */ /* [7] i = 0 */ /* [9.1] IV = i || 0^(outlen - len(i)) */ memset(&IV[0], 0, sizeof(IV)); /* [9.3] i = i + 1 */ for (i = 0; i < NIST_BLOCK_SEEDLEN / NIST_BLOCK_OUTLEN; ++i) { /* [9.1] IV = i || 0^(outlen - len(i)) */ IV[0] = NIST_HTONL(i); /* [9.2] temp = temp || BCC(K, (IV || S)) (the IV part, at least) */ nist_ctr_drbg_bcc(&nist_cipher_df_ctx, &IV[0], 1, (unsigned int *)&nist_cipher_df_encrypted_iv[i][0]); } return 0; } /* * NIST SP 800-90 March 2007 * 10.2.1.2 The Update Function */ static void nist_ctr_drbg_update(NIST_CTR_DRBG* drbg, const unsigned int* provided_data) { int i; unsigned int temp[NIST_BLOCK_SEEDLEN_INTS]; unsigned int* output_block; /* 2. while (len(temp) < seedlen) do */ for (output_block = temp; output_block < &temp[NIST_BLOCK_SEEDLEN_INTS]; output_block += NIST_BLOCK_OUTLEN_INTS) { /* 2.1 V = (V + 1) mod 2^outlen */ nist_increment_block(&drbg->V[0]); /* 2.2 output_block = Block_Encrypt(K, V) */ Block_Encrypt(&drbg->ctx, drbg->V, output_block); } /* 3 temp is already of size seedlen (NIST_BLOCK_SEEDLEN_INTS) */ /* 4 (part 1) temp = temp XOR provided_data */ for (i = 0; i < (int)NIST_BLOCK_KEYLEN_INTS; ++i) temp[i] ^= *provided_data++; /* 5 Key = leftmost keylen bits of temp */ Block_Schedule_Encryption(&drbg->ctx, &temp[0]); /* 4 (part 2) combined with 6 V = rightmost outlen bits of temp */ for (i = 0; i < (int)NIST_BLOCK_OUTLEN_INTS; ++i) drbg->V[i] = temp[NIST_BLOCK_KEYLEN_INTS + i] ^ *provided_data++; } /* * NIST SP 800-90 March 2007 * 10.2.1.3.2 The Process Steps for Instantiation When a Derivation * Function is Used */ int nist_ctr_drbg_instantiate(NIST_CTR_DRBG* drbg, const void* entropy_input, int entropy_input_length, const void* nonce, int nonce_length, const void* personalization_string, int personalization_string_length) { int err, count; unsigned int seed_material[NIST_BLOCK_SEEDLEN_INTS]; unsigned int length[3]; const char *input_string[3]; /* [1] seed_material = entropy_input || nonce || personalization_string */ input_string[0] = entropy_input; length[0] = entropy_input_length; input_string[1] = nonce; length[1] = nonce_length; count = 2; if (personalization_string) { input_string[count] = personalization_string; length[count] = personalization_string_length; ++count; } /* [2] seed_material = Block_Cipher_df(seed_material, seedlen) */ err = nist_ctr_drbg_block_cipher_df(input_string, length, count, (unsigned char *)seed_material, sizeof(seed_material)); if (err) return err; /* [3] Key = 0^keylen */ memcpy(&drbg->ctx, &nist_cipher_zero_ctx, sizeof(drbg->ctx)); /* [4] V = 0^outlen */ memset(&drbg->V, 0, sizeof(drbg->V)); /* [5] (Key, V) = Update(seed_material, Key, V) */ nist_ctr_drbg_update(drbg, seed_material); /* [6] reseed_counter = 1 */ drbg->reseed_counter = 1; return 0; } static int nist_ctr_drbg_instantiate_initialize() { int err; unsigned char K[NIST_BLOCK_KEYLEN_BYTES]; memset(&K[0], 0, sizeof(K)); err = Block_Schedule_Encryption(&nist_cipher_zero_ctx, &K[0]); return err; } /* * NIST SP 800-90 March 2007 * 10.2.1.4.2 The Process Steps for Reseeding When a Derivation * Function is Used */ int nist_ctr_drbg_reseed(NIST_CTR_DRBG* drbg, const void* entropy_input, int entropy_input_length, const void* additional_input, int additional_input_length) { int err, count; const char *input_string[2]; unsigned int length[2]; unsigned int seed_material[NIST_BLOCK_SEEDLEN_INTS]; /* [1] seed_material = entropy_input || additional_input */ input_string[0] = entropy_input; length[0] = entropy_input_length; count = 1; if (additional_input) { input_string[count] = additional_input; length[count] = additional_input_length; ++count; } /* [2] seed_material = Block_Cipher_df(seed_material, seedlen) */ err = nist_ctr_drbg_block_cipher_df(input_string, length, count, (unsigned char *)seed_material, sizeof(seed_material)); if (err) return err; /* [3] (Key, V) = Update(seed_material, Key, V) */ nist_ctr_drbg_update(drbg, seed_material); /* [4] reseed_counter = 1 */ drbg->reseed_counter = 1; return 0; } /* * NIST SP 800-90 March 2007 * 10.2.1.5.2 The Process Steps for Generating Pseudorandom Bits When a * Derivation Function is Used for the DRBG Implementation */ static void nist_ctr_drbg_generate_block(NIST_CTR_DRBG* drbg, unsigned int* output_block) { /* [4.1] V = (V + 1) mod 2^outlen */ nist_increment_block(&drbg->V[0]); /* [4.2] output_block = Block_Encrypt(Key, V) */ Block_Encrypt(&drbg->ctx, &drbg->V[0], output_block); } int nist_ctr_drbg_generate(NIST_CTR_DRBG* drbg, void* output_string, int output_string_length, const void* additional_input, int additional_input_length) { int i, len, err; int blocks = output_string_length / NIST_BLOCK_OUTLEN_BYTES; unsigned char* p; unsigned int* temp; const char *input_string[1]; unsigned int length[1]; unsigned int buffer[NIST_BLOCK_OUTLEN_BYTES]; unsigned int additional_input_buffer[NIST_BLOCK_SEEDLEN_INTS]; if (output_string_length < 1) return 1; /* [1] If reseed_counter > reseed_interval ... */ if (drbg->reseed_counter >= NIST_CTR_DRBG_RESEED_INTERVAL) return 1; /* [2] If (addional_input != Null), then */ if (additional_input) { input_string[0] = additional_input; length[0] = additional_input_length; /* [2.1] additional_input = Block_Cipher_df(additional_input, seedlen) */ err = nist_ctr_drbg_block_cipher_df(input_string, length, 1, (unsigned char *)additional_input_buffer, sizeof(additional_input_buffer)); if (err) return err; /* [2.2] (Key, V) = Update(additional_input, Key, V) */ nist_ctr_drbg_update(drbg, additional_input_buffer); } if (blocks && check_int_alignment(output_string)) { /* [3] temp = Null */ temp = (unsigned int *)output_string; for (i = 0; i < blocks; ++i) { nist_ctr_drbg_generate_block(drbg, temp); temp += NIST_BLOCK_OUTLEN_INTS; output_string_length -= NIST_BLOCK_OUTLEN_BYTES; } output_string = (unsigned char *)temp; } /* [3] temp = Null */ temp = buffer; len = NIST_BLOCK_OUTLEN_BYTES; /* [4] While (len(temp) < requested_number_of_bits) do: */ p = output_string; while (output_string_length > 0) { nist_ctr_drbg_generate_block(drbg, temp); if (output_string_length < NIST_BLOCK_OUTLEN_BYTES) len = output_string_length; memcpy(p, temp, len); p += len; output_string_length -= len; } /* [6] (Key, V) = Update(additional_input, Key, V) */ nist_ctr_drbg_update(drbg, additional_input ? &additional_input_buffer[0] : &nist_ctr_drgb_generate_null_input[0]); /* [7] reseed_counter = reseed_counter + 1 */ ++drbg->reseed_counter; return 0; } int nist_ctr_initialize() { int err; err = nist_ctr_drbg_instantiate_initialize(); if (err) return err; err = nist_ctr_drbg_block_cipher_df_initialize(); if (err) return err; return 0; } int nist_ctr_drbg_destroy(NIST_CTR_DRBG* drbg) { nist_zeroize(drbg, sizeof(*drbg)); drbg->reseed_counter = ~0U; return 1; } libntru-0.5/src/nist_ctr_drbg.h000066400000000000000000000062561271556312200166350ustar00rootroot00000000000000/* * Copyright (c) 2007 Henric Jungheim * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* * NIST SP 800-90 CTR_DRBG (Random Number Generator) */ #ifndef NIST_CTR_DRBG_H #define NIST_CTR_DRBG_H #ifndef NIST_CONFIG_H #include "nist_config.h" #endif #define NIST_BLOCK_SEEDLEN (NIST_BLOCK_KEYLEN + NIST_BLOCK_OUTLEN) #define NIST_BLOCK_SEEDLEN_BYTES (NIST_BLOCK_SEEDLEN / 8) #define NIST_BLOCK_SEEDLEN_INTS (NIST_BLOCK_SEEDLEN_BYTES / sizeof(int)) #ifndef NIST_KEY_ALIGNMENT #define NIST_KEY_ALIGNMENT #endif typedef struct { unsigned int reseed_counter; NIST_Key ctx; unsigned int V[NIST_BLOCK_OUTLEN_INTS]; } NIST_KEY_ALIGNMENT NIST_CTR_DRBG; int nist_ctr_initialize(void); int nist_ctr_drbg_generate(NIST_CTR_DRBG* drbg, void* output_string, int output_string_length, const void* additional_input, int additional_input_length); int nist_ctr_drbg_reseed(NIST_CTR_DRBG* drbg, const void* entropy_input, int entropy_input_length, const void* additional_input, int additional_input_length); int nist_ctr_drbg_instantiate(NIST_CTR_DRBG* drbg, const void* entropy_input, int entropy_input_length, const void* nonce, int nonce_length, const void* personalization_string, int personalization_string_length); int nist_ctr_drbg_destroy(NIST_CTR_DRBG* ); void nist_dump_simple_hex(const void* data, int length); void nist_dump_hex(const void* data, int length); void nist_dump_named_hex(const char* name, const void* data, int length); void nist_dump_ctr_drbg(const NIST_CTR_DRBG* drbg); void nist_dump_block_ctx(const NIST_Key* ctx); #ifdef NIST_ZEROIZE #define nist_zeroize(p, s) memset(p, 0, s) #else #define nist_zeroize(p, s) do { } while(0) #endif /* * The test vectors will indicate failure if the * byte ordering is set incorrectly. * Pretending to be little endian will improve performance * slightly but should not have an impact on * security (a byte-swapped nonce is still a nonce). */ #ifndef NIST_HTONL #if defined(NIST_IS_LITTLE_ENDIAN) #define NIST_HTONL(x) nist_htonl(x) static __inline unsigned int nist_htonl(unsigned int x) { return ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24)); } #elif defined(NIST_IS_BIG_ENDIAN) #define NIST_HTONL(x) (x) #else #error "Define NIST_HTONL or define NIST_IS_{BIG|LITTLE}_ENDIAN." #endif #endif /* NIST_HTONL */ #ifndef NIST_NTOHL /* BE->H and H->BE are usually the same. */ #define NIST_NTOHL(x) NIST_HTONL(x) #endif /* NIST_NTOHL */ #endif /* NIST_CTR_DRBG_H */ libntru-0.5/src/nist_ctr_drbg_aes256.h000066400000000000000000000034601271556312200177140ustar00rootroot00000000000000/* * Copyright (c) 2007 Henric Jungheim * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* * NIST SP 800-90 CTR_DRBG (Random Number Generator) */ #ifndef NIST_CTR_DRBG_AES256_H #define NIST_CTR_DRBG_AES256_H /* Choose AES-256 as the underlying block cipher */ #define NIST_BLOCK_KEYLEN (256) #define NIST_BLOCK_KEYLEN_BYTES (NIST_BLOCK_KEYLEN / 8) #define NIST_BLOCK_KEYLEN_INTS (NIST_BLOCK_KEYLEN_BYTES / sizeof(int)) #define NIST_BLOCK_OUTLEN (NIST_AES_BLOCKSIZEBITS) #define NIST_BLOCK_OUTLEN_BYTES (NIST_BLOCK_OUTLEN / 8) #define NIST_BLOCK_OUTLEN_INTS (NIST_BLOCK_OUTLEN_BYTES / sizeof(int)) typedef NIST_AES_ENCRYPT_CTX NIST_Key; #define Block_Encrypt(ctx, src, dst) NIST_AES_ECB_Encrypt(ctx, src, dst) #define Block_Schedule_Encryption(ctx, key) NIST_AES_Schedule_Encryption(ctx, key, NIST_BLOCK_KEYLEN) /* * NIST SP 800-90 March 2007 * 10.2 DRBG Mechanism Based on Block Ciphers * * Table 3 specifies the reseed interval as * <= 2^48. We are only using a 32-bit counter, * so we set the reseed interval a bit lower. */ #define NIST_CTR_DRBG_RESEED_INTERVAL (100000) #endif /* NIST_CTR_DRBG_AES256_H */ libntru-0.5/src/ntru.c000066400000000000000000000370361271556312200147750ustar00rootroot00000000000000#include #include #include #include "ntru.h" #include "rand.h" #include "poly.h" #include "idxgen.h" #include "mgf.h" /** Whether to ensure g is invertible when generating a key */ #define NTRU_CHECK_INVERTIBILITY_G 0 const int8_t NTRU_COEFF1_TABLE[] = {0, 0, 0, 1, 1, 1, -1, -1}; const int8_t NTRU_COEFF2_TABLE[] = {0, 1, -1, 0, 1, -1, 0, 1}; /* Generates a random g. If NTRU_CHECK_INVERTIBILITY_G, g will be invertible mod q */ uint8_t ntru_gen_g(const NtruEncParams *params, NtruPrivPoly *g, NtruRandContext *rand_ctx) { uint16_t N = params->N; uint16_t dg = params->dg; for (;;) { if (!ntru_rand_tern(N, dg, dg, &g->poly.tern, rand_ctx)) return NTRU_ERR_PRNG; g->prod_flag = 0; if (!NTRU_CHECK_INVERTIBILITY_G) break; NtruIntPoly gq; if (ntru_invert(g, params->q-1, &gq)) break; } return NTRU_SUCCESS; } uint8_t ntru_gen_key_pair_single(const NtruEncParams *params, NtruEncPrivKey *priv, NtruEncPubKey *pub, NtruIntPoly *fq, NtruRandContext *rand_ctx) { uint16_t N = params->N; uint16_t q = params->q; uint16_t df1 = params->df1; #ifndef NTRU_AVOID_HAMMING_WT_PATENT uint16_t df2 = params->df2; uint16_t df3 = params->df3; #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ if (q & (q-1)) /* check that modulus is a power of 2 */ return NTRU_ERR_INVALID_PARAM; /* choose a random f that is invertible mod q */ #ifndef NTRU_AVOID_HAMMING_WT_PATENT if (params->prod_flag) { NtruPrivPoly *t = &priv->t; t->prod_flag = 1; t->poly.prod.N = N; priv->q = q; for (;;) { /* choose random t, find the inverse of 3t+1 */ if (!ntru_rand_prod(N, df1, df2, df3, df3, &t->poly.prod, rand_ctx)) return NTRU_ERR_PRNG; if (ntru_invert(t, q-1, fq)) break; } } else #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ { NtruPrivPoly *t = &priv->t; t->prod_flag = 0; priv->q = q; for (;;) { /* choose random t, find the inverse of 3t+1 */ if (!ntru_rand_tern(N, df1, df1, &t->poly.tern, rand_ctx)) return NTRU_ERR_PRNG; if (ntru_invert(t, q-1, fq)) break; } } /* choose a random g */ NtruPrivPoly g; uint8_t result = ntru_gen_g(params, &g, rand_ctx); if (result != NTRU_SUCCESS) return result; NtruIntPoly *h = &pub->h; if (!ntru_mult_priv(&g, fq, h, q-1)) return NTRU_ERR_INVALID_PARAM; ntru_mult_fac(h, 3); ntru_mod_mask(h, q-1); ntru_clear_priv(&g); pub->q = q; return NTRU_SUCCESS; } uint8_t ntru_gen_key_pair(const NtruEncParams *params, NtruEncKeyPair *kp, NtruRandContext *rand_ctx) { NtruIntPoly fq; uint8_t result = ntru_gen_key_pair_single(params, &kp->priv, &kp->pub, &fq, rand_ctx); ntru_clear_int(&fq); return result; } uint8_t ntru_gen_key_pair_multi(const NtruEncParams *params, NtruEncPrivKey *priv, NtruEncPubKey *pub, NtruRandContext *rand_ctx, uint32_t num_pub) { uint16_t q = params->q; NtruIntPoly fq; uint8_t result = ntru_gen_key_pair_single(params, priv, pub, &fq, rand_ctx); if (result != NTRU_SUCCESS) return result; uint32_t i; for (i=1; iq; NtruIntPoly fq; if (!ntru_invert(&priv->t, q-1, &fq)) return NTRU_ERR_INVALID_KEY; NtruIntPoly *h = &pub->h; NtruPrivPoly g; uint8_t result = ntru_gen_g(params, &g, rand_ctx); if (result != NTRU_SUCCESS) return result; if (!ntru_mult_priv(&g, &fq, h, q-1)) return NTRU_ERR_INVALID_PARAM; ntru_clear_int(&fq); ntru_mult_fac(h, 3); ntru_mod_mask(h, q-1); pub->q = q; return NTRU_SUCCESS; } /** * @brief byte array to ternary polynomial * * Decodes a uint8_t array encoded with ntru_to_sves() back to a polynomial with N * coefficients between -1 and 1. * Ignores any excess bytes. * See P1363.1 section 9.2.2. * * @param M an encoded ternary polynomial * @param M_len number of elements in M * @param N number of coefficients to generate * @param poly output parameter; pointer to write the polynomial to */ void ntru_from_sves(uint8_t *M, uint16_t M_len, uint16_t N, NtruIntPoly *poly) { poly->N = N; uint16_t coeff_idx = 0; uint16_t i = 0; while (i<(M_len+2)/3*3 && coeff_idxcoeffs[coeff_idx++] = NTRU_COEFF1_TABLE[coeff_tbl_idx]; poly->coeffs[coeff_idx++] = NTRU_COEFF2_TABLE[coeff_tbl_idx]; chunk >>= 3; } } while (coeff_idx < N) poly->coeffs[coeff_idx++] = 0; } /** * @brief Ternary polynomial to byte array * * Encodes a polynomial whose elements are between 0 and 2, to a uint8_t array. * The (2*i)-th coefficient and the (2*i+1)-th coefficient must not both equal * 2 for any integer i, so this method is only safe to use with arrays * produced by ntru_from_sves(). * See P1363.1 section 9.2.3. * * @param poly a ternary polynomial * @param data output parameter; must accommodate ceil(num_bits/8)+3 bytes * @return 1 for success, 0 for invalid encoding */ uint8_t ntru_to_sves(NtruIntPoly *poly, uint8_t *data) { uint16_t N = poly->N; uint16_t num_bits = (N*3+1) / 2; memset(data, 0, (num_bits+7)/8); uint16_t i; uint16_t start = 0; uint16_t end = N/2*2; /* if there is an odd number of coeffs, throw away the highest one */ memset(&poly->coeffs[N], 0, 2*15); /* we process coefficients in blocks of 16, so clear the last block */ uint16_t d_idx = 0; uint8_t valid = 1; for (i=start; icoeffs[i++]; int16_t coeff2 = poly->coeffs[i++]; if (coeff1==2 && coeff2==2) valid = 0; int16_t c = coeff1*3 + coeff2; data[d_idx] = c; coeff1 = poly->coeffs[i++]; coeff2 = poly->coeffs[i++]; if (coeff1==2 && coeff2==2) valid = 0; c = coeff1*3 + coeff2; data[d_idx] |= c << 3; coeff1 = poly->coeffs[i++]; coeff2 = poly->coeffs[i++]; if (coeff1==2 && coeff2==2) valid = 0; c = coeff1*3 + coeff2; data[d_idx] |= c << 6; d_idx++; data[d_idx] = c >> 2; coeff1 = poly->coeffs[i++]; coeff2 = poly->coeffs[i++]; if (coeff1==2 && coeff2==2) valid = 0; c = coeff1*3 + coeff2; data[d_idx] |= c << 1; coeff1 = poly->coeffs[i++]; coeff2 = poly->coeffs[i++]; if (coeff1==2 && coeff2==2) valid = 0; c = coeff1*3 + coeff2; data[d_idx] |= c << 4; coeff1 = poly->coeffs[i++]; coeff2 = poly->coeffs[i++]; if (coeff1==2 && coeff2==2) valid = 0; c = coeff1*3 + coeff2; data[d_idx] |= c << 7; d_idx++; data[d_idx] = c >> 1; coeff1 = poly->coeffs[i++]; coeff2 = poly->coeffs[i++]; if (coeff1==2 && coeff2==2) valid = 0; c = coeff1*3 + coeff2; data[d_idx] |= c << 2; coeff1 = poly->coeffs[i++]; coeff2 = poly->coeffs[i++]; if (coeff1==2 && coeff2==2) valid = 0; c = coeff1*3 + coeff2; data[d_idx] |= c << 5; d_idx++; } return valid; } /** * @brief Seed generation * * Generates a seed for the Blinding Polynomial Generation Function. * * @param msg the plain-text message * @param msg_len number of characters in msg * @param h the public key * @param b db bits of random data * @param params encryption parameters * @param seed output parameter; an array to write the seed value to */ void ntru_get_seed(uint8_t *msg, uint16_t msg_len, NtruIntPoly *h, uint8_t *b, const NtruEncParams *params, uint8_t *seed) { uint16_t oid_len = sizeof params->oid; uint16_t pklen = params->pklen; uint8_t bh[ntru_enc_len(params)]; ntru_to_arr(h, params->q, (uint8_t*)&bh); uint8_t htrunc[pklen/8]; memcpy(&htrunc, &bh, pklen/8); /* seed = OID|m|b|htrunc */ uint16_t blen = params->db/8; memcpy(seed, ¶ms->oid, oid_len); seed += oid_len; memcpy(seed, msg, msg_len); seed += msg_len; memcpy(seed, b, blen); seed += blen; memcpy(seed, &htrunc, pklen/8); } void ntru_gen_tern_poly(NtruIGFState *s, uint16_t df, NtruTernPoly *p) { p->N = s->N; p->num_ones = df; p->num_neg_ones = df; uint16_t idx; uint16_t r[p->N]; memset(r, 0, sizeof r); uint16_t t = 0; while (t < df) { ntru_IGF_next(s, &idx); if (!r[idx]) { p->neg_ones[t] = idx; r[idx] = 1; t++; } } t = 0; while (t < df) { ntru_IGF_next(s, &idx); if (!r[idx]) { p->ones[t] = idx; r[idx] = 1; t++; } } } void ntru_gen_blind_poly(uint8_t *seed, uint16_t seed_len, const NtruEncParams *params, NtruPrivPoly *r) { NtruIGFState s; ntru_IGF_init(seed, seed_len, params, &s); #ifndef NTRU_AVOID_HAMMING_WT_PATENT if (params->prod_flag) { r->poly.prod.N = s.N; ntru_gen_tern_poly(&s, params->df1, &r->poly.prod.f1); ntru_gen_tern_poly(&s, params->df2, &r->poly.prod.f2); ntru_gen_tern_poly(&s, params->df3, &r->poly.prod.f3); } else #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ { r->poly.tern.N = s.N; ntru_gen_tern_poly(&s, params->df1, &r->poly.tern); } r->prod_flag = params->prod_flag; } /* All elements of p->coeffs must be in the [0..2] range */ uint8_t ntru_check_rep_weight(NtruIntPoly *p, uint16_t dm0) { uint16_t i; uint16_t weights[3]; weights[0] = weights[1] = weights[2] = 0; for (i=0; iN; i++) weights[p->coeffs[i]]++; return (weights[0]>=dm0 && weights[1]>=dm0 && weights[2]>=dm0); } uint8_t ntru_encrypt(uint8_t *msg, uint16_t msg_len, NtruEncPubKey *pub, const NtruEncParams *params, NtruRandContext *rand_ctx, uint8_t *enc) { uint16_t N = params->N; uint16_t q = params->q; uint16_t db = params->db; uint16_t max_len_bytes = ntru_max_msg_len(params); uint16_t dm0 = params->dm0; if (q & (q-1)) /* check that modulus is a power of 2 */ return NTRU_ERR_INVALID_PARAM; if (max_len_bytes > 255) return NTRU_ERR_INVALID_MAX_LEN; if (msg_len > max_len_bytes) return NTRU_ERR_MSG_TOO_LONG; for (;;) { /* M = b|octL|msg|p0 */ uint8_t b[db/8]; if (ntru_rand_generate(b, db/8, rand_ctx) != NTRU_SUCCESS) return NTRU_ERR_PRNG; uint16_t M_len = db/8 + 1 + max_len_bytes + 1; uint8_t M[M_len]; memcpy(&M, &b, db/8); uint8_t *M_head = (uint8_t*)&M + db/8; *M_head = msg_len; M_head++; memcpy(M_head, msg, msg_len); M_head += msg_len; memset(M_head, 0, max_len_bytes+1-msg_len); NtruIntPoly mtrin; ntru_from_sves((uint8_t*)&M, M_len, N, &mtrin); uint16_t blen = params->db / 8; uint16_t sdata_len = sizeof(params->oid) + msg_len + blen + blen; uint8_t sdata[sdata_len]; ntru_get_seed(msg, msg_len, &pub->h, (uint8_t*)&b, params, (uint8_t*)&sdata); NtruIntPoly R; NtruPrivPoly r; ntru_gen_blind_poly((uint8_t*)&sdata, sdata_len, params, &r); if (!ntru_mult_priv(&r, &pub->h, &R, q-1)) return NTRU_ERR_INVALID_PARAM; uint16_t oR4_len = (N*2+7) / 8; uint8_t oR4[oR4_len]; ntru_to_arr4(&R, (uint8_t*)&oR4); NtruIntPoly mask; ntru_MGF((uint8_t*)&oR4, oR4_len, params, &mask); ntru_add(&mtrin, &mask); ntru_mod3(&mtrin); if (!ntru_check_rep_weight(&mtrin, dm0)) continue; ntru_add(&R, &mtrin); ntru_to_arr(&R, q, enc); return NTRU_SUCCESS; } } void ntru_decrypt_poly(NtruIntPoly *e, NtruEncPrivKey *priv, uint16_t q, NtruIntPoly *d) { ntru_mult_priv(&priv->t, e, d, q-1); ntru_mult_fac(d, 3); ntru_add(d, e); ntru_mod_center(d, q); ntru_mod3(d); } uint8_t ntru_decrypt(uint8_t *enc, NtruEncKeyPair *kp, const NtruEncParams *params, uint8_t *dec, uint16_t *dec_len) { uint16_t N = params->N; uint16_t q = params->q; uint16_t db = params->db; uint16_t max_len_bytes = ntru_max_msg_len(params); uint16_t dm0 = params->dm0; if (q & (q-1)) /* check that modulus is a power of 2 */ return NTRU_ERR_INVALID_PARAM; if (max_len_bytes > 255) return NTRU_ERR_INVALID_MAX_LEN; uint16_t blen = db / 8; uint8_t retcode = NTRU_SUCCESS; NtruIntPoly e; ntru_from_arr(enc, N, q, &e); NtruIntPoly ci; ntru_decrypt_poly(&e, &kp->priv, q, &ci); if (!ntru_check_rep_weight(&ci, dm0) && retcode==NTRU_SUCCESS) retcode = NTRU_ERR_DM0_VIOLATION; NtruIntPoly cR = e; ntru_sub(&cR, &ci); ntru_mod_mask(&cR, q-1); uint16_t coR4_len = (N*2+7) / 8; uint8_t coR4[coR4_len]; ntru_to_arr4(&cR, (uint8_t*)&coR4); NtruIntPoly mask; ntru_MGF((uint8_t*)&coR4, coR4_len, params, &mask); NtruIntPoly cmtrin = ci; ntru_sub(&cmtrin, &mask); ntru_mod3(&cmtrin); uint16_t cM_len_bits = (N*3+1) / 2; uint16_t cM_len_bytes = (cM_len_bits+7) / 8; uint8_t cM[cM_len_bytes+3]; /* 3 extra bytes for ntru_to_sves() */ if (!ntru_to_sves(&cmtrin, (uint8_t*)&cM) && retcode==NTRU_SUCCESS) retcode = NTRU_ERR_INVALID_ENCODING; uint8_t cb[blen]; uint8_t *cM_head = cM; memcpy(cb, cM_head, blen); cM_head += blen; uint8_t cl = *cM_head; /* llen=1, so read one byte */ cM_head++; if (cl > max_len_bytes) { if (retcode == NTRU_SUCCESS) retcode = NTRU_ERR_MSG_TOO_LONG; cl = max_len_bytes; /* prevent buffer overrun in memcpy below */ } memcpy(dec, cM_head, cl); cM_head += cl; uint8_t *i; for (i=cM_head; ioid) + cl + blen + db/8; uint8_t sdata[sdata_len]; ntru_get_seed(dec, cl, &kp->pub.h, (uint8_t*)&cb, params, (uint8_t*)&sdata); NtruPrivPoly cr; ntru_gen_blind_poly((uint8_t*)&sdata, sdata_len, params, &cr); NtruIntPoly cR_prime; ntru_mult_priv(&cr, &kp->pub.h, &cR_prime, q-1); if (!ntru_equals_int(&cR_prime, &cR) && retcode==NTRU_SUCCESS) retcode = NTRU_ERR_INVALID_ENCODING; *dec_len = cl; return retcode; } uint8_t ntru_max_msg_len(const NtruEncParams *params) { uint16_t N = params->N; uint8_t llen = 1; /* ceil(log2(max_len)) */ uint16_t db = params->db; uint16_t max_msg_len; max_msg_len = N/2*3/8 - llen - db/8; return max_msg_len; } libntru-0.5/src/ntru.h000066400000000000000000000122721271556312200147750ustar00rootroot00000000000000#ifndef NTRU_NTRU_H #define NTRU_NTRU_H #ifdef __cplusplus extern "C" { #endif /* __cplusplus*/ #include "types.h" #include "key.h" #include "encparams.h" #include "rand.h" #include "err.h" /** * @brief Key generation * * Generates a NtruEncrypt key pair. * If a deterministic RNG is used, the key pair will be deterministic for a given random seed; * otherwise, the key pair will be completely random. * * @param params the NtruEncrypt parameters to use * @param kp pointer to write the key pair to (output parameter) * @param rand_ctx an initialized random number generator. See ntru_rand_init() in rand.h. * @return NTRU_SUCCESS for success, or a NTRU_ERR_ code for failure */ uint8_t ntru_gen_key_pair(const NtruEncParams *params, NtruEncKeyPair *kp, NtruRandContext *rand_ctx); /** * @brief Key generation with multiple public keys * * Generates num_pub NtruEncrypt key pairs. They all share a private key but their public keys * differ. The private key decrypts messages encrypted for any of the public keys. * Note that when decrypting, the public key of the key pair passed into ntru_decrypt() must * match the public key used for encrypting the message. * If a deterministic RNG is used, the key pair will be deterministic for a given random seed; * otherwise, the key pair will be completely random. * * @param params the NtruEncrypt parameters to use * @param priv the private key (output parameter) * @param pub an array of length num_pub or more (output parameter) * @param rand_ctx an initialized random number generator. See ntru_rand_init() in rand.h. * @param num_pub the number of public keys to generate * @return NTRU_SUCCESS for success, or a NTRU_ERR_ code for failure */ uint8_t ntru_gen_key_pair_multi(const NtruEncParams *params, NtruEncPrivKey *priv, NtruEncPubKey *pub, NtruRandContext *rand_ctx, uint32_t num_pub); /** * @brief New public key * * Generates a new public key for an existing private key. The new public key can be used * interchangeably with the existing public key(s). * Generating n keys via ntru_gen_key_pair_multi() is more efficient than generating one * and then calling ntru_gen_pub() n-1 times, so if the number of public keys needed is * known beforehand and if speed matters, ntru_gen_key_pair_multi() should be used. * Note that when decrypting, the public key of the key pair passed into ntru_decrypt() must * match the public key used for encrypting the message. * If a deterministic RNG is used, the key will be deterministic for a given random seed; * otherwise, the key will be completely random. * * @param params the NtruEncrypt parameters to use * @param priv a private key * @param pub the new public key (output parameter) * @param rand_ctx an initialized random number generator. See ntru_rand_init() in rand.h. * @param num_pub the number of public keys to generate * @return NTRU_SUCCESS for success, or a NTRU_ERR_ code for failure */ uint8_t ntru_gen_pub(const NtruEncParams *params, NtruEncPrivKey *priv, NtruEncPubKey *pub, NtruRandContext *rand_ctx); /** * @brief Encryption * * Encrypts a message. * If a deterministic RNG is used, the encrypted message will also be deterministic for a given * combination of plain text, key, and random seed. * See P1363.1 section 9.2.2. * * @param msg The message to encrypt * @param msg_len length of msg. Must not exceed ntru_max_msg_len(params). To encrypt * bulk data, encrypt with a symmetric key, then NTRU-encrypt that key. * @param pub the public key to encrypt the message with * @param params the NtruEncrypt parameters to use * @param rand_ctx an initialized random number generator. See ntru_rand_init() in rand.h. * @param enc output parameter; a pointer to store the encrypted message. Must accommodate ntru_enc_len(params) bytes. * @return NTRU_SUCCESS on success, or one of the NTRU_ERR_ codes on failure */ uint8_t ntru_encrypt(uint8_t *msg, uint16_t msg_len, NtruEncPubKey *pub, const NtruEncParams *params, NtruRandContext *rand_ctx, uint8_t *enc); /** * @brief Decryption * * Decrypts a message. * See P1363.1 section 9.2.3. * * @param enc The message to decrypt * @param kp a key pair that contains the public key the message was encrypted with, and the corresponding private key * @param params the NtruEncrypt parameters the message was encrypted with * @param dec output parameter; a pointer to store the decrypted message. Must accommodate ntru_max_msg_len(params) bytes. * @param dec_len output parameter; pointer to store the length of dec * @return NTRU_SUCCESS on success, or one of the NTRU_ERR_ codes on failure */ uint8_t ntru_decrypt(uint8_t *enc, NtruEncKeyPair *kp, const NtruEncParams *params, uint8_t *dec, uint16_t *dec_len); /** * @brief Maximum message length * * Returns the maximum length a plaintext message can be. * Depending on the parameter set, the maximum lengths for the predefined * parameter sets are between 60 and 248. * For longer messages, use hybrid encryption. * * @param params an NtruEncrypt parameter set * @return the maximum number of bytes in a message */ uint8_t ntru_max_msg_len(const NtruEncParams *params); #ifdef __cplusplus } #endif /* __cplusplus*/ #endif /* NTRU_NTRU_H */ libntru-0.5/src/ntru_endian.h000066400000000000000000000013001271556312200163010ustar00rootroot00000000000000#ifndef NTRU_ENDIAN_H #define NTRU_ENDIAN_H #ifdef __APPLE__ #include #define htole64(x) OSSwapHostToLittleInt64(x) #define htole32(x) OSSwapHostToLittleInt32(x) #define htole16(x) OSSwapHostToLittleInt16(x) #endif #ifdef __MINGW32__ /* assume little endian */ #define htole64(x) (x) #define htole32(x) (x) #define htole16(x) (x) #endif #ifdef __FreeBSD__ #include #endif #ifdef __GLIBC__ #if __GLIBC__ <= 2 || ( __GLIBC__ == 2 && __GLIBC_MINOR__ < 9 ) #ifndef __powerpc__ /* assume little endian */ #define htole64(x) (x) #define htole32(x) (x) #define htole16(x) (x) #endif #endif #endif #ifdef __OS2__ #include #endif #endif /* NTRU_ENDIAN_H */ libntru-0.5/src/poly.c000066400000000000000000001750651271556312200147750ustar00rootroot00000000000000#include #include #ifdef __SSSE3__ #include #endif #ifdef __AVX2__ #include #endif #include "poly.h" #include "rand.h" #include "err.h" #include "arith.h" #include "encparams.h" #include "ntru_endian.h" #define NTRU_SPARSE_THRESH 14 #define NTRU_KARATSUBA_THRESH_16 40 #define NTRU_KARATSUBA_THRESH_64 120 uint8_t ntru_num_bits(uint16_t n) { uint8_t b = 1; while (n >>= 1) b++; return b; } uint8_t ntru_rand_tern(uint16_t N, uint16_t num_ones, uint16_t num_neg_ones, NtruTernPoly *poly, NtruRandContext *rand_ctx) { int16_t coeffs[N]; memset(&coeffs, 0, N * sizeof coeffs[0]); uint16_t rand_len = num_ones + num_neg_ones + 10; /* 10 more to avoid calling the RNG again, for up to 10 collisions */ uint16_t rand_data[rand_len]; if (ntru_rand_generate((uint8_t*)rand_data, rand_len*2, rand_ctx) != NTRU_SUCCESS) return 0; uint16_t r_idx = 0; /* index into rand_data */ uint16_t bits = ntru_num_bits(N); uint16_t i = 0; while (i < num_ones) { uint16_t r = htole16(rand_data[r_idx]) >> (8*sizeof r - bits); /* 0 <= r < 2^bits */ r_idx++; /* refill rand_data if we run out */ if (r_idx >= rand_len) { if (ntru_rand_generate((uint8_t*)rand_data, rand_len*2, rand_ctx) != NTRU_SUCCESS) return 0; r_idx = 0; } if (rones[i] = r; coeffs[r] = 1; i++; } } i = 0; while (i < num_neg_ones) { uint16_t r = htole16(rand_data[r_idx]) >> (8*sizeof r - bits); /* 0 <= r < 2^bits */ r_idx++; /* refill rand_data if we run out */ if (r_idx >= rand_len) { if (ntru_rand_generate((uint8_t*)rand_data, rand_len*2, rand_ctx) != NTRU_SUCCESS) return 0; r_idx = 0; } if (rneg_ones[i] = r; coeffs[r] = -1; i++; } } poly->N = N; poly->num_ones = num_ones; poly->num_neg_ones = num_neg_ones; return 1; } #ifndef NTRU_AVOID_HAMMING_WT_PATENT uint8_t ntru_rand_prod(uint16_t N, uint16_t df1, uint16_t df2, uint16_t df3_ones, uint16_t df3_neg_ones, NtruProdPoly *poly, NtruRandContext *rand_ctx) { poly->N = N; uint8_t result = ntru_rand_tern(N, df1, df1, &poly->f1, rand_ctx); result &= ntru_rand_tern(N, df2, df2, &poly->f2, rand_ctx); result &= ntru_rand_tern(N, df3_ones, df3_neg_ones, &poly->f3, rand_ctx); return result; } #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ void ntru_add(NtruIntPoly *a, NtruIntPoly *b) { uint16_t i; for (i=0; iN; i++) a->coeffs[i] += b->coeffs[i]; } void ntru_add_mod2_32(uint32_t *a, uint32_t *b, uint16_t len) { uint16_t i; for (i=0; iN; i++) a->coeffs[i] -= b->coeffs[i]; } void ntru_neg_mod(NtruIntPoly *a, uint16_t modulus) { uint16_t i; for (i=0; iN; i++) a->coeffs[i] = modulus - a->coeffs[i]; } uint8_t ntru_mult_int(NtruIntPoly *a, NtruIntPoly *b, NtruIntPoly *c, uint16_t mod_mask) { #ifdef __AVX2__ return ntru_mult_int_avx2(a, b, c, mod_mask); #elif __SSSE3__ return ntru_mult_int_sse(a, b, c, mod_mask); #elif _LP64 return ntru_mult_int_64(a, b, c, mod_mask); #else return ntru_mult_int_16(a, b, c, mod_mask); #endif } void ntru_mult_int_16_base(int16_t *a, int16_t *b, int16_t *c, uint16_t len, uint16_t N, uint16_t mod_mask) { memset(c, 0, 2*(2*len-1)); /* only needed if N < NTRU_KARATSUBA_THRESH_16 */ uint16_t c_idx = 0; uint16_t k; for (k=0; k<2*len-1; k++) { int16_t ck = 0; uint16_t i; int16_t istart = k - len + 1; if (istart < 0) istart = 0; int16_t iend = k + 1; if (iend > len) iend = len; int16_t a_idx = k - istart; for (i=istart; i= N) c_idx = 0; } } void ntru_mult_karatsuba_16(int16_t *a, int16_t *b, int16_t *c, uint16_t len, uint16_t N) { if (len < NTRU_KARATSUBA_THRESH_16) ntru_mult_int_16_base(a, b, c, len, N, -1); else { uint16_t len2 = len / 2; int16_t z0[NTRU_INT_POLY_SIZE]; int16_t z1[NTRU_INT_POLY_SIZE]; int16_t z2[NTRU_INT_POLY_SIZE]; /* z0, z2 */ ntru_mult_karatsuba_16(a, b, z0, len2, N); ntru_mult_karatsuba_16(a+len2, b+len2, z2, len-len2, N); /* z1 */ int16_t lh1[NTRU_INT_POLY_SIZE]; int16_t lh2[NTRU_INT_POLY_SIZE]; uint16_t i; for (i=0; i= N) c_idx = 0; } c_idx = 2 * len2; if (c_idx >= N) c_idx = 0; for (i=0; i<2*(len-len2)-1; i++) { c[c_idx] += z2[i]; c_idx++; if (c_idx >= N) c_idx = 0; } } } uint8_t ntru_mult_int_16(NtruIntPoly *a, NtruIntPoly *b, NtruIntPoly *c, uint16_t mod_mask) { uint16_t N = a->N; if (N != b->N) return 0; c->N = N; ntru_mult_karatsuba_16((int16_t*)&a->coeffs, (int16_t*)&b->coeffs, (int16_t*)&c->coeffs, N, N); ntru_mod_mask(c, mod_mask); return 1; } void ntru_mult_int_64_base(int16_t *a, int16_t *b, int16_t *c, uint16_t len, uint16_t N, uint16_t mod_mask) { uint16_t N2 = (len+1) / 2; uint64_t mod_mask_64 = mod_mask + (mod_mask<<25); /* make 64-bit versions of a and b */ uint64_t a64[N2]; uint64_t b64[N2]; uint16_t i; for (i=0; i> 50; overflow_ctr_rem--; if (!overflow_ctr_rem) { uint16_t k; for (k=0; k= N) k = 0; c[k] += c64[i] >> 25; if (++k >= N) k = 0; } } void ntru_mult_karatsuba_64(int16_t *a, int16_t *b, int16_t *c, uint16_t len, uint16_t N, uint16_t mod_mask) { if (len < NTRU_KARATSUBA_THRESH_64) ntru_mult_int_64_base(a, b, c, len, N, mod_mask); else { uint16_t len2 = len / 2; int16_t z0[NTRU_INT_POLY_SIZE]; int16_t z1[NTRU_INT_POLY_SIZE]; int16_t z2[NTRU_INT_POLY_SIZE]; /* z0, z2 */ ntru_mult_karatsuba_64(a, b, z0, len2, N, mod_mask); ntru_mult_karatsuba_64(a+len2, b+len2, z2, len-len2, N, mod_mask); /* z1 */ int16_t lh1[NTRU_INT_POLY_SIZE]; int16_t lh2[NTRU_INT_POLY_SIZE]; uint16_t i; for (i=0; i= N) c_idx = 0; } c_idx = 2 * len2; if (c_idx >= N) c_idx = 0; for (i=0; i<2*(len-len2)-1; i++) { c[c_idx] += z2[i]; c_idx++; if (c_idx >= N) c_idx = 0; } } } uint8_t ntru_mult_int_64(NtruIntPoly *a, NtruIntPoly *b, NtruIntPoly *c, uint16_t mod_mask) { uint16_t N = a->N; if (N != b->N) return 0; c->N = N; ntru_mult_karatsuba_64((int16_t*)&a->coeffs, (int16_t*)&b->coeffs, (int16_t*)&c->coeffs, N, N, mod_mask); ntru_mod_mask(c, mod_mask); return 1; } #ifdef __SSSE3__ uint8_t ntru_mult_int_sse(NtruIntPoly *a, NtruIntPoly *b, NtruIntPoly *c, uint16_t mod_mask) { uint16_t N = a->N; if (N != b->N) return 0; c->N = N; int16_t c_coeffs[2*NTRU_INT_POLY_SIZE]; /* double capacity for intermediate result */ memset(&c_coeffs, 0, sizeof(c_coeffs)); uint16_t k; for (k=N; kcoeffs[k] = 0; b->coeffs[k] = 0; } for (k=0; kcoeffs[k+j]); /* indices 0..7 */ __m128i a128 = _mm_lddqu_si128((__m128i*)&a->coeffs[0]); __m128i c128 = _mm_lddqu_si128((__m128i*)&c_coeffs[k]); for (j=0; j<8; j++) { __m128i product = _mm_mullo_epi16(a128, b128[j]); c128 = _mm_add_epi16(c128, product); a128 = _mm_slli_si128(a128, 2); } _mm_storeu_si128((__m128i*)&c_coeffs[k], c128); /* indices 8... */ uint16_t i; for (i=8; icoeffs[i-7]); __m128i a128_1 = _mm_lddqu_si128((__m128i*)&a->coeffs[i]); for (j=0; j<8; j++) { __m128i product = _mm_mullo_epi16(a128_1, b128[j]); c128 = _mm_add_epi16(c128, product); a128_0 = _mm_slli_si128(a128_0, 2); a128_1 = _mm_alignr_epi8(a128_1, a128_0, 14); } _mm_storeu_si128((__m128i*)&c_coeffs[k+i], c128); } } /* no need to SSE-ify the following loop b/c the compiler auto-vectorizes it */ for (k=0; kcoeffs[k] = c_coeffs[k] + c_coeffs[N+k]; ntru_mod_mask(c, mod_mask); return 1; } #endif /* __SSSE3__ */ #ifdef __AVX2__ uint8_t ntru_mult_int_avx2(NtruIntPoly *a, NtruIntPoly *b, NtruIntPoly *c, uint16_t mod_mask) { uint16_t N = a->N; if (N != b->N) return 0; c->N = N; int16_t c_coeffs[2*NTRU_INT_POLY_SIZE]; /* double capacity for intermediate result */ memset(&c_coeffs, 0, sizeof(c_coeffs)); uint16_t k; for (k=N; kcoeffs[k] = 0; b->coeffs[k] = 0; } for (k=0; kcoeffs[k+j])), _mm_set1_epi16(b->coeffs[k+8+j]),1); } /* indices 0..7 */ __m128i tmp_a = _mm_lddqu_si128((__m128i*)&a->coeffs[0]); __m256i a256 = _mm256_broadcastsi128_si256(tmp_a); __m256i c256 = _mm256_lddqu_si256((__m256i*)&c_coeffs[k]); for (j=0; j<8; j++) { __m256i product = _mm256_mullo_epi16(a256, b256[j]); c256 = _mm256_add_epi16(c256, product); a256 = _mm256_bslli_epi128(a256, 2); } _mm256_storeu_si256((__m256i*)&c_coeffs[k], c256); /* indices 8... */ uint16_t i; for (i=8; icoeffs[i-7]); __m256i a256_0 = _mm256_broadcastsi128_si256(tmp_0); __m128i tmp_1 = _mm_lddqu_si128((__m128i*)&a->coeffs[i]); __m256i a256_1 = _mm256_broadcastsi128_si256(tmp_1); for (j=0; j<8; j++) { __m256i product = _mm256_mullo_epi16(a256_1, b256[j]); c256 = _mm256_add_epi16(c256, product); a256_0 = _mm256_bslli_epi128(a256_0, 2); a256_1 = _mm256_alignr_epi8(a256_1, a256_0, 14); } _mm256_storeu_si256((__m256i*)&c_coeffs[k+i], c256); } } /* no need to SSE-ify the following loop b/c the compiler auto-vectorizes it */ for (k=0; kcoeffs[k] = c_coeffs[k] + c_coeffs[N+k]; ntru_mod_mask(c, mod_mask); return 1; } #endif /* __AVX2__ */ uint8_t ntru_mult_tern(NtruIntPoly *a, NtruTernPoly *b, NtruIntPoly *c, uint16_t mod_mask) { #ifdef __AVX2__ return ntru_mult_tern_avx2(a, b, c, mod_mask); #elif __SSSE3__ return ntru_mult_tern_sse(a, b, c, mod_mask); #elif _LP64 return ntru_mult_tern_64(a, b, c, mod_mask); #else return ntru_mult_tern_32(a, b, c, mod_mask); #endif } uint8_t ntru_mult_tern_32(NtruIntPoly *a, NtruTernPoly *b, NtruIntPoly *c, uint16_t mod_mask) { uint16_t N = a->N; if (N != b->N) return 0; c->N = N; memset(&c->coeffs, 0, N * sizeof c->coeffs[0]); uint32_t mod_mask_32 = mod_mask; mod_mask_32 += mod_mask_32 << 16; typedef uint32_t __attribute__((__may_alias__)) uint32_t_alias; /* make sure a.coeffs[i] <= mod_mask */ ntru_mod_mask(a, mod_mask_32); uint16_t overflow_ctr_start = (1<<16)/(mod_mask+1) - 1; uint16_t overflow_ctr_rem = overflow_ctr_start; /* add coefficients that are multiplied by 1 */ uint16_t i; for (i=0; inum_ones; i++) { int16_t j; int16_t k = b->ones[i]; uint16_t j_end = N-2ones[i] ? 0 : N-2-b->ones[i]; for (j=0; jcoeffs[k]) += *((uint32_t_alias*)&a->coeffs[j]); for (; kcoeffs[k] += a->coeffs[j]; for (k=0; jcoeffs[k]) += *((uint32_t_alias*)&a->coeffs[j]); for (; jcoeffs[k] += a->coeffs[j]; overflow_ctr_rem--; if (!overflow_ctr_rem) { ntru_mod_mask(c, mod_mask); overflow_ctr_rem = overflow_ctr_start; } } /* use inverse mask for subtraction */ mod_mask_32 = ~mod_mask_32; for (i=0; icoeffs[i]) |= mod_mask_32; for (; icoeffs[i] |= mod_mask_32; /* subtract coefficients that are multiplied by -1 */ overflow_ctr_rem = overflow_ctr_start; for (i=0; inum_neg_ones; i++) { int16_t j; int16_t k = b->neg_ones[i]; uint16_t j_end = N-2neg_ones[i] ? 0 : N-2-b->neg_ones[i]; for (j=0; jcoeffs[k]) -= *((uint32_t_alias*)&a->coeffs[j]); for (; kcoeffs[k] -= a->coeffs[j]; for (k=0; jcoeffs[k]) -= *((uint32_t_alias*)&a->coeffs[j]); for (; jcoeffs[k] -= a->coeffs[j]; overflow_ctr_rem--; if (!overflow_ctr_rem) { for (j=0; jcoeffs[j]) |= mod_mask_32; for (; jcoeffs[j] |= mod_mask_32; overflow_ctr_rem = overflow_ctr_start; } } ntru_mod_mask(c, mod_mask); return 1; } uint8_t ntru_mult_tern_64(NtruIntPoly *a, NtruTernPoly *b, NtruIntPoly *c, uint16_t mod_mask) { uint16_t N = a->N; if (N != b->N) return 0; memset(&c->coeffs, 0, N * sizeof c->coeffs[0]); c->N = N; uint64_t mod_mask_64 = mod_mask; mod_mask_64 += mod_mask_64 << 16; mod_mask_64 += mod_mask_64 << 32; typedef uint64_t __attribute__((__may_alias__)) uint64_t_alias; /* make sure a.coeffs[i] <= mod_mask */ ntru_mod_mask(a, mod_mask); uint16_t overflow_ctr_start = (1<<16)/(mod_mask+1) - 1; uint16_t overflow_ctr_rem = overflow_ctr_start; /* add coefficients that are multiplied by 1 */ uint16_t i; for (i=0; inum_ones; i++) { int16_t j; int16_t k = b->ones[i]; uint16_t j_end = N-4ones[i] ? 0 : N-4-b->ones[i]; for (j=0; jcoeffs[k]) += *((uint64_t_alias*)&a->coeffs[j]); for (; kcoeffs[k] += a->coeffs[j]; for (k=0; jcoeffs[k]) += *((uint64_t_alias*)&a->coeffs[j]); for (; jcoeffs[k] += a->coeffs[j]; overflow_ctr_rem--; if (!overflow_ctr_rem) { ntru_mod_mask(c, mod_mask); overflow_ctr_rem = overflow_ctr_start; } } /* use inverse mask for subtraction */ mod_mask_64 = ~mod_mask_64; for (i=0; icoeffs[i]) |= mod_mask_64; for (; icoeffs[i] |= mod_mask_64; /* subtract coefficients that are multiplied by -1 */ overflow_ctr_rem = overflow_ctr_start; for (i=0; inum_neg_ones; i++) { int16_t j; int16_t k = b->neg_ones[i]; uint16_t j_end = N-4neg_ones[i] ? 0 : N-4-b->neg_ones[i]; for (j=0; jcoeffs[k]) -= *((uint64_t_alias*)&a->coeffs[j]); for (; kcoeffs[k] -= a->coeffs[j]; for (k=0; jcoeffs[k]) -= *((uint64_t_alias*)&a->coeffs[j]); for (; jcoeffs[k] -= a->coeffs[j]; overflow_ctr_rem--; if (!overflow_ctr_rem) { for (j=0; jcoeffs[j]) |= mod_mask_64; for (; jcoeffs[j] |= mod_mask_64; overflow_ctr_rem = overflow_ctr_start; } } ntru_mod_mask(c, mod_mask); return 1; } #ifdef __SSSE3__ /* Optimized for small df */ uint8_t ntru_mult_tern_sse_sparse(NtruIntPoly *a, NtruTernPoly *b, NtruIntPoly *c, uint16_t mod_mask) { uint16_t N = a->N; if (N != b->N) return 0; memset(&c->coeffs, 0, N * sizeof c->coeffs[0]); c->N = N; /* add coefficients that are multiplied by 1 */ uint16_t i; for (i=0; inum_ones; i++) { int16_t j; int16_t k = b->ones[i]; uint16_t j_end = Nones[i] ? 0 : N-b->ones[i]; /* it is safe not to truncate the last block of 8 coefficients */ /* because there is extra room at the end of the coeffs array */ for (j=0; jcoeffs[k]); __m128i aj = _mm_lddqu_si128((__m128i*)&a->coeffs[j]); __m128i ca = _mm_add_epi16(ck, aj); _mm_storeu_si128((__m128i*)&c->coeffs[k], ca); } j = j_end; for (k=0; jcoeffs[k]); __m128i aj = _mm_lddqu_si128((__m128i*)&a->coeffs[j]); __m128i ca = _mm_add_epi16(ck, aj); _mm_storeu_si128((__m128i*)&c->coeffs[k], ca); } for (; jcoeffs[k] += a->coeffs[j]; } /* subtract coefficients that are multiplied by -1 */ for (i=0; inum_neg_ones; i++) { int16_t j; int16_t k = b->neg_ones[i]; uint16_t j_end = Nneg_ones[i] ? 0 : N-b->neg_ones[i]; /* it is safe not to truncate the last block of 8 coefficients */ /* because there is extra room at the end of the coeffs array */ for (j=0; jcoeffs[k]); __m128i aj = _mm_lddqu_si128((__m128i*)&a->coeffs[j]); __m128i ca = _mm_sub_epi16(ck, aj); _mm_storeu_si128((__m128i*)&c->coeffs[k], ca); } j = j_end; for (k=0; jcoeffs[k]); __m128i aj = _mm_lddqu_si128((__m128i*)&a->coeffs[j]); __m128i ca = _mm_sub_epi16(ck, aj); _mm_storeu_si128((__m128i*)&c->coeffs[k], ca); } for (; jcoeffs[k] -= a->coeffs[j]; } ntru_mod_mask(c, mod_mask); return 1; } /* Optimized for large df */ uint8_t ntru_mult_tern_sse_dense(NtruIntPoly *a, NtruTernPoly *b, NtruIntPoly *c, uint16_t mod_mask) { uint16_t N = a->N; if (N != b->N) return 0; c->N = N; uint16_t i; for (i=N; icoeffs[i] = 0; int16_t c_coeffs_arr[8+2*NTRU_INT_POLY_SIZE]; /* double capacity for intermediate result + another 8 */ int16_t *c_coeffs = c_coeffs_arr + 8; memset(&c_coeffs_arr, 0, sizeof(c_coeffs_arr)); __m128i a_coeffs0[8]; a_coeffs0[0] = _mm_lddqu_si128((__m128i*)&a->coeffs[0]); for (i=1; i<8; i++) a_coeffs0[i] = _mm_slli_si128(a_coeffs0[i-1], 2); /* add coefficients that are multiplied by 1 */ for (i=0; inum_ones; i++) { int16_t k = b->ones[i]; /* process the first num_coeffs0 coefficients, 1<=num_coeffs0<=8 */ uint8_t num_bytes0 = 16 - (((size_t)&c_coeffs[k])%16); uint8_t num_coeffs0 = num_bytes0 / 2; /* c_coeffs[k+num_coeffs0] is 16-byte aligned */ k -= 8 - num_coeffs0; __m128i *ck = (__m128i*)&c_coeffs[k]; __m128i aj = a_coeffs0[8-num_coeffs0]; __m128i ca = _mm_add_epi16(*ck, aj); _mm_store_si128(ck, ca); k += 8; /* process the remaining coefficients in blocks of 8. */ /* it is safe not to truncate the last block of 8 coefficients */ /* because there is extra room at the end of the coeffs array */ ck = (__m128i*)&c_coeffs[k]; int16_t j; for (j=num_coeffs0; jcoeffs[j]); __m128i ca = _mm_add_epi16(*ck, aj); _mm_store_si128(ck, ca); ck++; } } /* subtract coefficients that are multiplied by -1 */ for (i=0; inum_neg_ones; i++) { int16_t k = b->neg_ones[i]; /* process the first num_coeffs0 coefficients, 1<=num_coeffs0<=8 */ uint8_t num_bytes0 = 16 - (((size_t)&c_coeffs[k])%16); uint8_t num_coeffs0 = num_bytes0 / 2; /* c_coeffs[k+num_coeffs0] is 16-byte aligned */ k -= 8 - num_coeffs0; __m128i *ck = (__m128i*)&c_coeffs[k]; __m128i aj = a_coeffs0[8-num_coeffs0]; __m128i ca = _mm_sub_epi16(*ck, aj); _mm_store_si128(ck, ca); k += 8; /* process the remaining coefficients in blocks of 8. */ /* it is safe not to truncate the last block of 8 coefficients */ /* because there is extra room at the end of the coeffs array */ ck = (__m128i*)&c_coeffs[k]; int16_t j; for (j=num_coeffs0; jcoeffs[j]); __m128i ca = _mm_sub_epi16(*ck, aj); _mm_store_si128(ck, ca); ck++; } } /* reduce c_coeffs[0..2N-1] to [0..N-1] and apply mod_mask to reduce values mod q */ /* handle the first coefficients individually if c_coeffs is not 16-byte aligned */ for (i=0; ((size_t)&c_coeffs[i])%16; i++) c->coeffs[i] = (c_coeffs[i] + c_coeffs[N+i]) & mod_mask; /* handle the remaining ones in blocks of 8 */ __m128i mod_mask_128 = _mm_set1_epi16(mod_mask); __m128i *ci = (__m128i*)(&c_coeffs[i]); for (; icoeffs[i], c128_0); ci++; } return 1; } uint8_t ntru_mult_tern_sse(NtruIntPoly *a, NtruTernPoly *b, NtruIntPoly *c, uint16_t mod_mask) { if (b->num_onesnum_neg_onesN; if (N != b->N) return 0; memset(&c->coeffs, 0, N * sizeof c->coeffs[0]); c->N = N; /* add coefficients that are multiplied by 1 */ uint16_t i; for (i=0; inum_ones; i++) { int16_t j; int16_t k = b->ones[i]; uint16_t j_end = Nones[i] ? 0 : N-b->ones[i]; /* it is safe not to truncate the last block of 8 coefficients */ /* because there is extra room at the end of the coeffs array */ for (j=0; jcoeffs[k]); __m256i aj = _mm256_lddqu_si256((__m256i*)&a->coeffs[j]); __m256i ca = _mm256_add_epi16(ck, aj); _mm256_storeu_si256((__m256i*)&c->coeffs[k], ca); } j = j_end; for (k=0; jcoeffs[k]); __m256i aj = _mm256_lddqu_si256((__m256i*)&a->coeffs[j]); __m256i ca = _mm256_add_epi16(ck, aj); _mm256_storeu_si256((__m256i*)&c->coeffs[k], ca); } for (; jcoeffs[k] += a->coeffs[j]; } /* subtract coefficients that are multiplied by -1 */ for (i=0; inum_neg_ones; i++) { int16_t j; int16_t k = b->neg_ones[i]; uint16_t j_end = Nneg_ones[i] ? 0 : N-b->neg_ones[i]; /* it is safe not to truncate the last block of 8 coefficients */ /* because there is extra room at the end of the coeffs array */ for (j=0; jcoeffs[k]); __m256i aj = _mm256_lddqu_si256((__m256i*)&a->coeffs[j]); __m256i ca = _mm256_sub_epi16(ck, aj); _mm256_storeu_si256((__m256i*)&c->coeffs[k], ca); } j = j_end; for (k=0; jcoeffs[k]); __m256i aj = _mm256_lddqu_si256((__m256i*)&a->coeffs[j]); __m256i ca = _mm256_sub_epi16(ck, aj); _mm256_storeu_si256((__m256i*)&c->coeffs[k], ca); } for (; jcoeffs[k] -= a->coeffs[j]; } ntru_mod_mask(c, mod_mask); return 1; } /* Optimized for large df */ uint8_t ntru_mult_tern_avx2_dense(NtruIntPoly *a, NtruTernPoly *b, NtruIntPoly *c, uint16_t mod_mask) { uint16_t N = a->N; if (N != b->N) return 0; c->N = N; uint16_t i; for (i=N; icoeffs[i] = 0; int16_t c_coeffs_arr[16+2*NTRU_INT_POLY_SIZE]; /* double capacity for intermediate result + another 8 */ int16_t *c_coeffs = c_coeffs_arr + 16; memset(&c_coeffs_arr, 0, sizeof(c_coeffs_arr)); __m256i a_coeffs0[16]; a_coeffs0[0] = _mm256_lddqu_si256((__m256i*)&a->coeffs[0]); for (i=1; i<16; i++) { /* Emulate the SSE full-register shifting behaviour in AVX2 (the */ /* corresponding _mm256_slli_si256 instruction shifts the two */ /* 128-bit lanes independently instead of the whole register). */ /* Two AVX2 instructions are needed for this. */ __m256i mask = _mm256_permute2x128_si256(a_coeffs0[i-1], a_coeffs0[i-1], _MM_SHUFFLE(0,0,2,0) ); a_coeffs0[i] = _mm256_alignr_epi8(a_coeffs0[i-1],mask,14); } /* add coefficients that are multiplied by 1 */ for (i=0; inum_ones; i++) { int16_t k = b->ones[i]; /* process the first num_coeffs0 coefficients, 1<=num_coeffs0<=8 */ uint8_t num_bytes0 = 32 - (((size_t)&c_coeffs[k])%32); uint8_t num_coeffs0 = num_bytes0 / 2; /* c_coeffs[k+num_coeffs0] is 32-byte aligned */ k -= 16 - num_coeffs0; __m256i *ck = (__m256i*)&c_coeffs[k]; __m256i aj = a_coeffs0[16-num_coeffs0]; __m256i ca = _mm256_add_epi16(*ck, aj); _mm256_store_si256(ck, ca); k += 16; /* process the remaining coefficients in blocks of 16. */ /* it is safe not to truncate the last block of 16 coefficients */ /* because there is extra room at the end of the coeffs array */ ck = (__m256i*)&c_coeffs[k]; int16_t j; for (j=num_coeffs0; jcoeffs[j]); __m256i ca = _mm256_add_epi16(*ck, aj); _mm256_store_si256(ck, ca); ck++; } } /* subtract coefficients that are multiplied by -1 */ for (i=0; inum_neg_ones; i++) { int16_t k = b->neg_ones[i]; /* process the first num_coeffs0 coefficients, 1<=num_coeffs0<=8 */ uint8_t num_bytes0 = 32 - (((size_t)&c_coeffs[k])%32); uint8_t num_coeffs0 = num_bytes0 / 2; /* c_coeffs[k+num_coeffs0] is 32-byte aligned */ k -= 16 - num_coeffs0; __m256i *ck = (__m256i*)&c_coeffs[k]; __m256i aj = a_coeffs0[16-num_coeffs0]; __m256i ca = _mm256_sub_epi16(*ck, aj); _mm256_store_si256(ck, ca); k += 16; /* process the remaining coefficients in blocks of 16. */ /* it is safe not to truncate the last block of 16 coefficients */ /* because there is extra room at the end of the coeffs array */ ck = (__m256i*)&c_coeffs[k]; int16_t j; for (j=num_coeffs0; jcoeffs[j]); __m256i ca = _mm256_sub_epi16(*ck, aj); _mm256_store_si256(ck, ca); ck++; } } /* reduce c_coeffs[0..2N-1] to [0..N-1] and apply mod_mask to reduce values mod q */ /* handle the first coefficients individually if c_coeffs is not 16-byte aligned */ for (i=0; ((size_t)&c_coeffs[i])%32; i++) c->coeffs[i] = (c_coeffs[i] + c_coeffs[N+i]) & mod_mask; /* handle the remaining ones in blocks of 16 */ __m256i mod_mask_256 = _mm256_set1_epi16(mod_mask); __m256i *ci = (__m256i*)(&c_coeffs[i]); for (; icoeffs[i], c256_0); ci++; } return 1; } uint8_t ntru_mult_tern_avx2(NtruIntPoly *a, NtruTernPoly *b, NtruIntPoly *c, uint16_t mod_mask) { if (b->num_onesnum_neg_onesN; if (N != b->N) return 0; c->N = N; memset(&c->coeffs, 0, N * sizeof c->coeffs[0]); NtruIntPoly temp; ntru_mult_tern(a, &b->f1, &temp, mod_mask); ntru_mult_tern(&temp, &b->f2, c, mod_mask); NtruIntPoly f3a; ntru_mult_tern(a, &b->f3, &f3a, mod_mask); ntru_add(c, &f3a); ntru_mod_mask(c, mod_mask); return 1; } #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ uint8_t ntru_mult_priv(NtruPrivPoly *a, NtruIntPoly *b, NtruIntPoly *c, uint16_t mod_mask) { #ifndef NTRU_AVOID_HAMMING_WT_PATENT if (a->prod_flag) return ntru_mult_prod(b, &a->poly.prod, c, mod_mask); else #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ return ntru_mult_tern(b, &a->poly.tern, c, mod_mask); } /** NtruPrivPoly to binary (coefficients reduced mod 2), 64 bit version */ void ntru_priv_to_mod2_64(NtruPrivPoly *a, uint64_t *b_coeffs64) { #ifndef NTRU_AVOID_HAMMING_WT_PATENT if (a->prod_flag) { NtruProdPoly *prod = &a->poly.prod; uint16_t N = prod->N; uint16_t N64 = (prod->N+63) / 64; memset(b_coeffs64, 0, N64*8); uint16_t i, j, bidx; for (i=0; if1.num_ones; i++) { for (j=0; jf2.num_ones; j++) { bidx = prod->f1.ones[i] + prod->f2.ones[j]; if (bidx >= N) bidx -= N; b_coeffs64[bidx/64] ^= ((uint64_t)1) << (bidx%64); } for (j=0; jf2.num_neg_ones; j++) { bidx = prod->f1.ones[i] + prod->f2.neg_ones[j]; if (bidx >= N) bidx -= N; b_coeffs64[bidx/64] ^= ((uint64_t)1) << (bidx%64); } } for (i=0; if1.num_neg_ones; i++) { for (j=0; jf2.num_ones; j++) { bidx = prod->f1.neg_ones[i] + prod->f2.ones[j]; if (bidx >= N) bidx -= N; b_coeffs64[bidx/64] ^= ((uint64_t)1) << (bidx%64); } for (j=0; jf2.num_neg_ones; j++) { bidx = prod->f1.neg_ones[i] + prod->f2.neg_ones[j]; if (bidx >= N) bidx -= N; b_coeffs64[bidx/64] ^= ((uint64_t)1) << (bidx%64); } } for (i=0; if3.num_ones; i++) { uint16_t ai = prod->f3.ones[i]; b_coeffs64[ai/64] ^= ((uint64_t)1) << (ai%64); } for (i=0; if3.num_neg_ones; i++) { uint16_t ai = prod->f3.neg_ones[i]; b_coeffs64[ai/64] ^= ((uint64_t)1) << (ai%64); } } else #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ { NtruTernPoly *tern = &a->poly.tern; uint16_t N64 = (tern->N+63) / 64; memset(b_coeffs64, 0, N64*8); uint16_t i; for (i=0; inum_ones; i++) { uint16_t ai = tern->ones[i]; b_coeffs64[ai/64] ^= ((uint64_t)1) << (ai%64); } for (i=0; inum_neg_ones; i++) { uint16_t ai = tern->neg_ones[i]; b_coeffs64[ai/64] ^= ((uint64_t)1) << (ai%64); } } } /** NtruPrivPoly to binary (coefficients reduced mod 2), 32 bit version */ void ntru_priv_to_mod2_32(NtruPrivPoly *a, uint32_t *b_coeffs32) { #ifndef NTRU_AVOID_HAMMING_WT_PATENT if (a->prod_flag) { NtruProdPoly *prod = &a->poly.prod; uint16_t N = prod->N; uint16_t N32 = (prod->N+31) / 32; memset(b_coeffs32, 0, N32*4); uint16_t i, j, bidx; for (i=0; if1.num_ones; i++) { for (j=0; jf2.num_ones; j++) { bidx = prod->f1.ones[i] + prod->f2.ones[j]; if (bidx >= N) bidx -= N; b_coeffs32[bidx/32] ^= ((uint32_t)1) << (bidx%32); } for (j=0; jf2.num_neg_ones; j++) { bidx = prod->f1.ones[i] + prod->f2.neg_ones[j]; if (bidx >= N) bidx -= N; b_coeffs32[bidx/32] ^= ((uint32_t)1) << (bidx%32); } } for (i=0; if1.num_neg_ones; i++) { for (j=0; jf2.num_ones; j++) { bidx = prod->f1.neg_ones[i] + prod->f2.ones[j]; if (bidx >= N) bidx -= N; b_coeffs32[bidx/32] ^= ((uint32_t)1) << (bidx%32); } for (j=0; jf2.num_neg_ones; j++) { bidx = prod->f1.neg_ones[i] + prod->f2.neg_ones[j]; if (bidx >= N) bidx -= N; b_coeffs32[bidx/32] ^= ((uint32_t)1) << (bidx%32); } } for (i=0; if3.num_ones; i++) { uint16_t ai = prod->f3.ones[i]; b_coeffs32[ai/32] ^= ((uint32_t)1) << (ai%32); } for (i=0; if3.num_neg_ones; i++) { uint16_t ai = prod->f3.neg_ones[i]; b_coeffs32[ai/32] ^= ((uint32_t)1) << (ai%32); } } else #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ { NtruTernPoly *tern = &a->poly.tern; uint16_t N32 = (tern->N+31) / 32; memset(b_coeffs32, 0, N32*4); uint16_t i; for (i=0; inum_ones; i++) { uint16_t ai = tern->ones[i]; b_coeffs32[ai/32] ^= ((uint32_t)1) << (ai%32); } for (i=0; inum_neg_ones; i++) { uint16_t ai = tern->neg_ones[i]; b_coeffs32[ai/32] ^= ((uint32_t)1) << (ai%32); } } } void ntru_to_arr_64(NtruIntPoly *p, uint16_t q, uint8_t *a) { uint16_t N = p->N; uint8_t log_q = ntru_log2(q); uint16_t enc_bytes = ntru_enc_len_Nq(N, q); uint16_t rem = enc_bytes % sizeof(uint64_t); uint16_t quo = enc_bytes / sizeof(uint64_t); uint16_t enc_last_int = rem ? quo : quo - 1; uint16_t enc_last_int_valid = rem ? rem : sizeof(uint64_t); uint64_t last = 0; typedef uint64_t __attribute__((__may_alias__)) *uint64_t_alias; uint64_t *a64 = (uint64_t_alias)a; uint16_t a_idx = 0; /* index into a64 */ uint8_t bit_idx = 0; /* next unused bit of a64[a_idx] */ a64[0] = 0; uint16_t p_idx; uint64_t mod_mask = q - 1; for (p_idx=0; p_idxcoeffs[p_idx] & mod_mask; if (bit_idx < 64-log_q) { if (a_idx == enc_last_int) last |= coeff << bit_idx; else a64[a_idx] |= coeff << bit_idx; bit_idx += log_q; } else { a64[a_idx] |= coeff << bit_idx; a_idx++; bit_idx += log_q - 64; if (a_idx == enc_last_int) last = coeff >> (log_q - bit_idx); else a64[a_idx] = coeff >> (log_q-bit_idx); } } /* reverse byte order on big-endian machines */ uint16_t i; for (i = 0; i <= a_idx; i++) { if (i == enc_last_int) { last = htole64(last); memcpy(&a64[i], &last, enc_last_int_valid); } else a64[i] = htole64(a64[i]); } } void ntru_to_arr_32(NtruIntPoly *p, uint16_t q, uint8_t *a) { uint16_t N = p->N; uint8_t log_q = ntru_log2(q); uint16_t enc_bytes = ntru_enc_len_Nq(N, q); uint16_t rem = enc_bytes % sizeof(uint32_t); uint16_t quo = enc_bytes / sizeof(uint32_t); uint16_t enc_last_int = rem ? quo : quo - 1; uint16_t enc_last_int_valid = rem ? rem : sizeof(uint32_t); uint32_t last = 0; typedef uint32_t __attribute__((__may_alias__)) *uint32_t_alias; uint32_t *a32 = (uint32_t_alias)a; uint16_t a_idx = 0; /* index into a32 */ uint8_t bit_idx = 0; /* next unused bit of a32[a_idx] */ a32[0] = 0; uint16_t p_idx; uint32_t mod_mask = q - 1; for (p_idx=0; p_idxcoeffs[p_idx] & mod_mask; if (bit_idx < 32-log_q) { if (a_idx == enc_last_int) last |= coeff << bit_idx; else a32[a_idx] |= coeff << bit_idx; bit_idx += log_q; } else { a32[a_idx] |= coeff << bit_idx; a_idx++; bit_idx += log_q - 32; if (a_idx == enc_last_int) last = coeff >> (log_q - bit_idx); else a32[a_idx] = coeff >> (log_q-bit_idx); } } /* reverse byte order on big-endian machines */ uint16_t i; for (i = 0; i <= a_idx; i++) { if (i == enc_last_int) { last = htole32(last); memcpy(&a32[i], &last, enc_last_int_valid); } else a32[i] = htole32(a32[i]); } } #ifdef __SSSE3__ void ntru_to_arr_sse_2048(NtruIntPoly *p, uint8_t *a) { /* mask{n} masks bits n..n+10 except for mask64 which masks bits 64..66 */ __m128i mask0 = {(1<<11)-1, 0}; __m128i mask11 = _mm_slli_epi64(mask0, 11); __m128i mask22 = _mm_slli_epi64(mask11, 11); __m128i mask33 = _mm_slli_epi64(mask22, 11); __m128i mask44 = _mm_slli_epi64(mask33, 11); __m128i mask55 = {(uint64_t)((1<<9)-1) << 55, 3}; __m128i mask64 = {0, 3}; __m128i mask66 = {0, ((1<<11)-1) << 2}; __m128i mask77 = _mm_slli_epi64(mask66, 11); __m128i mask88 = _mm_slli_epi64(mask77, 11); __m128i mask99 = _mm_slli_epi64(mask88, 11); uint16_t a_idx = 0; uint16_t p_idx; uint16_t N = p->N; for (p_idx=0; p_idxcoeffs[p_idx]); /* 8 coeffs of p starting at p_idx */ __m128i a128 = _mm_and_si128(p128, mask0); /* bits [0..10] -> [0..10] */ a128 = _mm_or_si128(a128, _mm_and_si128(_mm_srli_epi64(p128, 5), mask11)); /* [16..26] -> [11..21] */ a128 = _mm_or_si128(a128, _mm_and_si128(_mm_srli_epi64(p128, 10), mask22)); /* [32..42] -> [22..32] */ a128 = _mm_or_si128(a128, _mm_and_si128(_mm_srli_epi64(p128, 15), mask33)); /* [48..58] -> [33..43] */ __m128i p128_64 = _mm_srli_si128(p128, 8); a128 = _mm_or_si128(a128, _mm_and_si128(_mm_slli_epi64(p128_64, 44), mask44)); /* [64..74] -> [44..54] */ a128 = _mm_or_si128(a128, _mm_and_si128(_mm_slli_epi64(p128_64, 39), mask55)); /* [80..88] -> [55..63] */ a128 = _mm_or_si128(a128, _mm_and_si128(_mm_srli_epi64(p128, 25), mask64)); /* [89..90] -> [64..65] */ a128 = _mm_or_si128(a128, _mm_and_si128(_mm_srli_epi64(p128, 30), mask66)); /* [96..111] -> [66..76] */ a128 = _mm_or_si128(a128, _mm_and_si128(_mm_srli_epi64(p128, 35), mask77)); /* [112..127] -> [77..87] */ _mm_storeu_si128((__m128i*)&a[a_idx], a128); a_idx += 11; } /* remaining coeffs (up to 10) */ __m128i p128 = _mm_lddqu_si128((__m128i*)&p->coeffs[p_idx]); /* 8 coeffs of p starting at p_idx */ __m128i a128 = _mm_setzero_si128(); if (N-p_idx > 0) a128 = _mm_and_si128(p128, mask0); /* bits [0..10] -> [0..10] */ if (N-p_idx > 1) a128 = _mm_or_si128(a128, _mm_and_si128(_mm_srli_epi64(p128, 5), mask11)); /* [16..26] -> [11..21] */ if (N-p_idx > 2) a128 = _mm_or_si128(a128, _mm_and_si128(_mm_srli_epi64(p128, 10), mask22)); /* [32..42] -> [22..32] */ if (N-p_idx > 3) a128 = _mm_or_si128(a128, _mm_and_si128(_mm_srli_epi64(p128, 15), mask33)); /* [48..58] -> [33..43] */ __m128i p128_64 = _mm_srli_si128(p128, 8); if (N-p_idx > 4) a128 = _mm_or_si128(a128, _mm_and_si128(_mm_slli_epi64(p128_64, 44), mask44)); /* [64..74] -> [44..54] */ if (N-p_idx > 5) { a128 = _mm_or_si128(a128, _mm_and_si128(_mm_slli_epi64(p128_64, 39), mask55)); /* [80..88] -> [55..63] */ a128 = _mm_or_si128(a128, _mm_and_si128(_mm_srli_epi64(p128, 25), mask64)); /* [89..90] -> [64..65] */ } if (N-p_idx > 6) a128 = _mm_or_si128(a128, _mm_and_si128(_mm_srli_epi64(p128, 30), mask66)); /* [96..111] -> [66..76] */ if (N-p_idx > 7) a128 = _mm_or_si128(a128, _mm_and_si128(_mm_srli_epi64(p128, 35), mask77)); /* [112..127] -> [77..87] */ if (N-p_idx > 8) { p128 = _mm_lddqu_si128((__m128i*)&p->coeffs[p_idx+8]); /* coeffs p_idx+8 through p_idx+15 */ p128_64 = _mm_slli_si128(p128, 8); a128 = _mm_or_si128(a128, _mm_and_si128(_mm_slli_epi64(p128_64, 24), mask88)); /* [0..15] -> [88..98] */ } if (N-p_idx > 9) a128 = _mm_or_si128(a128, _mm_and_si128(_mm_slli_epi64(p128_64, 19), mask99)); /* [16..31] -> [99..109] */ uint8_t a_last[16]; _mm_storeu_si128((__m128i*)a_last, a128); memcpy(&a[a_idx], a_last, ((N-p_idx)*11+7)/8); } #endif /* __SSSE3__ */ void ntru_to_arr(NtruIntPoly *p, uint16_t q, uint8_t *a) { #ifdef __SSSE3__ if (q == 2048) ntru_to_arr_sse_2048(p, a); else ntru_to_arr_32(p, q, a); #elif _LP64 ntru_to_arr_64(p, q, a); #else ntru_to_arr_32(p, q, a); #endif } void ntru_to_arr4(NtruIntPoly *p, uint8_t *arr) { uint16_t i = 0; while (i < p->N-3) { int8_t c0 = p->coeffs[i] & 3; int8_t c1 = p->coeffs[i+1] & 3; int8_t c2 = p->coeffs[i+2] & 3; int8_t c3 = p->coeffs[i+3] & 3; int16_t d = c0 + (c1<<2) + (c2<<4) + (c3<<6); arr[i/4] = d; i += 4; } /* handle the last 0 to 3 coefficients */ if (i >= p->N) return; uint16_t last = i / 4; arr[last] = p->coeffs[i] & 3; i++; if (i >= p->N) return; arr[last] |= (p->coeffs[i]&3) << 2; i++; if (i >= p->N) return; arr[last] |= (p->coeffs[i]&3) << 4; i++; if (i >= p->N) return; arr[last] |= (p->coeffs[i]&3) << 6; } void ntru_from_arr(uint8_t *arr, uint16_t N, uint16_t q, NtruIntPoly *p) { p->N = N; memset(&p->coeffs, 0, N * sizeof p->coeffs[0]); uint8_t bits_per_coeff = ntru_log2(q); uint32_t mask = 0xFFFFFFFF >> (32-bits_per_coeff); /* for truncating values to bitsPerCoeff bits */ uint16_t byte_idx = 0; uint8_t bit_idx = 0; /* next bit in arr[byte_idx] */ uint32_t coeff_buf = 0; /* contains (bit_idx) bits */ uint8_t coeff_bits = 0; /* length of coeffBuf */ uint16_t coeff_idx = 0; /* index into coeffs */ while (coeff_idx < N) { /* copy bits_per_coeff or more into coeff_buf */ while (coeff_bits < bits_per_coeff) { coeff_buf += (arr[byte_idx]&0xFF) << coeff_bits; coeff_bits += 8 - bit_idx; byte_idx++; bit_idx = 0; } /* low bits_per_coeff bits = next coefficient */ p->coeffs[coeff_idx] = coeff_buf & mask; coeff_idx++; coeff_buf >>= bits_per_coeff; coeff_bits -= bits_per_coeff; } } void ntru_mult_fac(NtruIntPoly *a, int16_t factor) { uint16_t i; for (i=0; iN; i++) a->coeffs[i] *= factor; } #ifdef __SSSE3__ void ntru_mod_sse(NtruIntPoly *p, uint16_t mod_mask) { uint16_t i; __m128i mod_mask_128 = _mm_set1_epi16(mod_mask); for (i=0; iN; i+=8) { __m128i a = _mm_lddqu_si128((__m128i*)&p->coeffs[i]); a = _mm_and_si128(a, mod_mask_128); _mm_storeu_si128((__m128i*)&p->coeffs[i], a); } } #endif #ifdef __AVX2__ void ntru_mod_avx2(NtruIntPoly *p, uint16_t mod_mask) { uint16_t i; __m256i mod_mask_256 = _mm256_set1_epi16(mod_mask); for (i=0; iN; i+=16) { __m256i a = _mm256_lddqu_si256((__m256i*)&p->coeffs[i]); a = _mm256_and_si256(a, mod_mask_256); _mm256_storeu_si256((__m256i*)&p->coeffs[i], a); } } #endif /* __AVX2__ */ void ntru_mod_64(NtruIntPoly *p, uint16_t mod_mask) { typedef uint64_t __attribute__((__may_alias__)) uint64_t_alias; uint64_t mod_mask_64 = mod_mask; mod_mask_64 += mod_mask_64 << 16; mod_mask_64 += mod_mask_64 << 32; uint16_t i; for (i=0; iN; i+=4) *((uint64_t_alias*)&p->coeffs[i]) &= mod_mask_64; } void ntru_mod_32(NtruIntPoly *p, uint16_t modulus) { typedef uint32_t __attribute__((__may_alias__)) uint32_t_alias; uint32_t mod_mask = modulus - 1; mod_mask += mod_mask << 16; uint16_t i; for (i=0; iN; i+=2) *((uint32_t_alias*)&p->coeffs[i]) &= mod_mask; } void ntru_mod_mask(NtruIntPoly *p, uint16_t mod_mask) { #ifdef __AVX2__ ntru_mod_avx2(p, mod_mask); #elif __SSSE3__ ntru_mod_sse(p, mod_mask); #elif _LP64 ntru_mod_64(p, mod_mask); #else ntru_mod_32(p, mod_mask+1); #endif } void ntru_mod3_standard(NtruIntPoly *p) { uint16_t i; for (i=0; iN; i++) { int8_t c = p->coeffs[i] % 3; if (c == -2) c = 1; if (c == -1) c = 2; p->coeffs[i] = c; } } #ifdef __SSSE3__ /* (i%3)+3 for i=0..7 */ __m128i NTRU_MOD3_LUT = {0x0403050403050403, 0}; /** * SSE version of ntru_mod3. * Based on Douglas W Jones' mod3 function at * http://homepage.cs.uiowa.edu/~jones/bcd/mod.shtml. */ void ntru_mod3_sse(NtruIntPoly *p) { uint16_t i; for (i=0; i<(p->N+7)/8*8; i+=8) { __m128i a = _mm_lddqu_si128((__m128i*)&p->coeffs[i]); /* make positive */ __m128i _3000 = _mm_set1_epi16(3000); a = _mm_add_epi16(a, _3000); /* a = (a>>8) + (a&0xFF); (sum base 2**8 digits) */ __m128i a1 = _mm_srli_epi16(a, 8); __m128i mask = _mm_set1_epi16(0x00FF); __m128i a2 = _mm_and_si128(a, mask); a = _mm_add_epi16(a1, a2); /* a = (a>>4) + (a&0xF); (sum base 2**4 digits; worst case 0x3B) */ a1 = _mm_srli_epi16(a, 4); mask = _mm_set1_epi16(0x000F); a2 = _mm_and_si128(a, mask); a = _mm_add_epi16(a1, a2); /* a = (a>>2) + (a&0x3); (sum base 2**2 digits; worst case 0x1B) */ a1 = _mm_srli_epi16(a, 2); mask = _mm_set1_epi16(0x0003); a2 = _mm_and_si128(a, mask); a = _mm_add_epi16(a1, a2); /* a = (a>>2) + (a&0x3); (sum base 2**2 digits; worst case 0x7) */ a1 = _mm_srli_epi16(a, 2); mask = _mm_set1_epi16(0x0003); a2 = _mm_and_si128(a, mask); a = _mm_add_epi16(a1, a2); __m128i a_mod3 = _mm_shuffle_epi8(NTRU_MOD3_LUT, a); /* _mm_shuffle_epi8 changed bytes 1, 3, 5, ... to non-zero; change them back to zero */ mask = _mm_set1_epi16(0x00FF); a_mod3 = _mm_and_si128(a_mod3, mask); /* subtract 3 so coefficients are in the 0..2 range */ __m128i three = _mm_set1_epi16(0x0003); a_mod3 = _mm_sub_epi16(a_mod3, three); _mm_storeu_si128((__m128i*)&p->coeffs[i], a_mod3); } } #endif /* __SSSE3__ */ #ifdef __AVX2__ __m256i NTRU_MOD3_LUT_AVX = {0x0403050403050403, 0, 0x0403050403050403, 0}; void ntru_mod3_avx2(NtruIntPoly *p) { uint16_t i; for (i=0; i<(p->N+15)/16*16; i+=16) { __m256i a = _mm256_lddqu_si256((__m256i*)&p->coeffs[i]); /* make positive */ __m256i _3000 = _mm256_set1_epi16(3000); a = _mm256_add_epi16(a, _3000); /* a = (a>>8) + (a&0xFF); (sum base 2**8 digits) */ __m256i a1 = _mm256_srli_epi16(a, 8); __m256i mask = _mm256_set1_epi16(0x00FF); __m256i a2 = _mm256_and_si256(a, mask); a = _mm256_add_epi16(a1, a2); /* a = (a>>4) + (a&0xF); (sum base 2**4 digits; worst case 0x3B) */ a1 = _mm256_srli_epi16(a, 4); mask = _mm256_set1_epi16(0x000F); a2 = _mm256_and_si256(a, mask); a = _mm256_add_epi16(a1, a2); /* a = (a>>2) + (a&0x3); (sum base 2**2 digits; worst case 0x1B) */ a1 = _mm256_srli_epi16(a, 2); mask = _mm256_set1_epi16(0x0003); a2 = _mm256_and_si256(a, mask); a = _mm256_add_epi16(a1, a2); /* a = (a>>2) + (a&0x3); (sum base 2**2 digits; worst case 0x7) */ a1 = _mm256_srli_epi16(a, 2); mask = _mm256_set1_epi16(0x0003); a2 = _mm256_and_si256(a, mask); a = _mm256_add_epi16(a1, a2); __m256i a_mod3 = _mm256_shuffle_epi8(NTRU_MOD3_LUT_AVX, a); /* _mm256_shuffle_epi8 changed bytes 1, 3, 5, ... to non-zero; change them back to zero */ mask = _mm256_set1_epi16(0x00FF); a_mod3 = _mm256_and_si256(a_mod3, mask); /* subtract 3 so coefficients are in the 0..2 range */ __m256i three = _mm256_set1_epi16(0x0003); a_mod3 = _mm256_sub_epi16(a_mod3, three); _mm256_storeu_si256((__m256i*)&p->coeffs[i], a_mod3); } } #endif /* __AVX2__ */ void ntru_mod3(NtruIntPoly *p) { #ifdef __AVX2__ ntru_mod3_avx2(p); #elif __SSSE3__ ntru_mod3_sse(p); #else ntru_mod3_standard(p); #endif /* __SSSE3__ */ } void ntru_mod_center(NtruIntPoly *p, uint16_t modulus) { uint16_t m2 = modulus / 2; uint16_t mod_mask = modulus - 1; uint16_t i; for (i=0; iN; i++) { uint16_t c = p->coeffs[i] & mod_mask; // note that c is unsigned if (c > m2) c -= modulus; p->coeffs[i] = c; } } uint8_t ntru_equals1(NtruIntPoly *p) { uint16_t i; for (i=1; iN; i++) if (p->coeffs[i] != 0) return 0; return p->coeffs[0] == 1; } uint8_t ntru_equals_int(NtruIntPoly *a, NtruIntPoly *b) { if (a->N != b->N) return 0; uint16_t i; for (i=0; iN; i++) if (a->coeffs[i] != b->coeffs[i]) return 0; return 1; } uint16_t ntru_deg_64(uint64_t *coeffs, uint16_t len) { uint16_t deg = 64*len - 1; len--; while (len>0 && coeffs[len]==0) { len--; deg -= 64; } while (coeffs[len]>>(deg%64)==0 && deg>0) deg--; return deg; } uint16_t ntru_deg_32(uint32_t *coeffs, uint16_t len) { uint16_t deg = 32*len - 1; len--; while (len>0 && coeffs[len]==0) { len--; deg -= 32; } while (coeffs[len]>>(deg%32)==0 && deg>0) deg--; return deg; } void ntru_clear_tern(NtruTernPoly *p) { memset(&p->ones, 0, p->num_ones * sizeof p->ones[0]); memset(&p->neg_ones, 0, p->num_neg_ones * sizeof p->neg_ones[0]); } void ntru_clear_priv(NtruPrivPoly *p) { #ifndef NTRU_AVOID_HAMMING_WT_PATENT if (p->prod_flag) { ntru_clear_tern(&p->poly.prod.f1); ntru_clear_tern(&p->poly.prod.f2); ntru_clear_tern(&p->poly.prod.f3); } else #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ ntru_clear_tern(&p->poly.tern); } void ntru_clear_int(NtruIntPoly *p) { uint16_t i; for (i=0; iN; i++) p->coeffs[i] = 0; } /** * @brief Lift inverse * * Given a polynomial a and the inverse of (1+3a) mod 2, this function * calculates the inverse of (1+3a) mod q. * * @param a a polynomial such that Fq = (1+3a)^(-1) (mod 2) * @param Fq the inverse of 1+3a modulo 2 * @param q the modulus */ void ntru_lift_inverse(NtruPrivPoly *a, NtruIntPoly *Fq, uint16_t q) { NtruIntPoly temp1, temp2; uint32_t v = 2; while (v < q) { v *= v; /* temp1 = (1+3a)*Fq */ ntru_mult_priv(a, Fq, &temp1, q-1); ntru_mult_fac(&temp1, 3); ntru_add(&temp1, Fq); ntru_neg_mod(&temp1, q); temp1.coeffs[0] += 2; memcpy(&temp2, Fq, sizeof *Fq); ntru_mult_int(&temp1, &temp2, Fq, q-1); } } uint8_t ntru_invert(NtruPrivPoly *a, uint16_t mod_mask, NtruIntPoly *Fq) { #ifdef _LP64 return ntru_invert_64(a, mod_mask, Fq); #else return ntru_invert_32(a, mod_mask, Fq); #endif } uint8_t ntru_invert_32(NtruPrivPoly *a, uint16_t mod_mask, NtruIntPoly *Fq) { int16_t i; #ifndef NTRU_AVOID_HAMMING_WT_PATENT uint16_t N = a->prod_flag ? a->poly.prod.N : a->poly.tern.N; #else uint16_t N = a->poly.tern.N; #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ uint16_t k = 0; uint16_t N32 = (N+1+31) / 32; /* #uint32_t's needed for N+1 coeffs */ /* b = 1 */ uint32_t b_coeffs32_arr[N32]; uint32_t *b_coeffs32 = b_coeffs32_arr; memset(b_coeffs32+1, 0, (N32-1)*4); b_coeffs32[0] = 1; /* c = 0 */ uint32_t c_coeffs32_arr[N32]; uint32_t *c_coeffs32 = c_coeffs32_arr; memset(c_coeffs32, 0, N32*4); /* f=3a+1; skip multiplication by 3 because f=3f (mod 2) */ uint32_t f_coeffs32_arr[N32]; uint32_t *f_coeffs32 = f_coeffs32_arr; ntru_priv_to_mod2_32(a, f_coeffs32); f_coeffs32[0] ^= 1; /* g(x) = x^N − 1 */ uint32_t g_coeffs32_arr[N32]; uint32_t *g_coeffs32 = g_coeffs32_arr; memset(g_coeffs32, 0, N32*4); g_coeffs32[0] = 1; g_coeffs32[N/32] |= ((uint32_t)1) << (N%32); uint16_t deg_f = ntru_deg_32(f_coeffs32, N32); uint16_t deg_g = N; for (;;) { uint16_t num_zeros = 0; /* while f[0]==0 */ while ((f_coeffs32[num_zeros/32]&(((uint32_t)1)<<(num_zeros%32)))==0 && num_zeros<=N) num_zeros++; if (num_zeros >= N) /* not invertible */ return 0; k += num_zeros; /* right-shift f, left-shift c num_zeros coefficients each */ if (num_zeros >= 32) { memmove(c_coeffs32+num_zeros/32, c_coeffs32, N32*4-num_zeros/32*4); memset(c_coeffs32, 0, num_zeros/32*4); memmove(f_coeffs32, f_coeffs32+num_zeros/32, N32*4-num_zeros/32*4); memset(f_coeffs32+N32-num_zeros/32, 0, num_zeros/32*4); deg_f -= num_zeros / 32 * 32; num_zeros %= 32; } if (num_zeros > 0) { /* c(x) = c(x)*(x^num_zeros) */ for (i=N32-1; i>0; i--) { c_coeffs32[i] <<= num_zeros; c_coeffs32[i] |= c_coeffs32[i-1] >> (32-num_zeros); } c_coeffs32[0] <<= num_zeros; /* f(x) = f(x)/(x^num_zeros) */ for (i=1; i>= num_zeros; f_coeffs32[i-1] |= f_coeffs32[i] << (32-num_zeros); } f_coeffs32[i-1] >>= num_zeros; } deg_f -= num_zeros; if (deg_f==0 && f_coeffs32[0]==1) /* if f==1 */ break; if (deg_f < deg_g) { /* exchange f and g */ uint32_t *temp_coeffs = f_coeffs32; f_coeffs32 = g_coeffs32; g_coeffs32 = temp_coeffs; uint16_t temp = deg_f; deg_f = deg_g; deg_g = temp; /* exchange b and c */ temp_coeffs = b_coeffs32; b_coeffs32 = c_coeffs32; c_coeffs32 = temp_coeffs; } ntru_add_mod2_32(f_coeffs32, g_coeffs32, N32); /* adding f+g may have lowered the degree of f */ while (deg_f>0 && (f_coeffs32[deg_f/32]&(((uint32_t)1)<<(deg_f%32)))==0) deg_f--; ntru_add_mod2_32(b_coeffs32, c_coeffs32, N32); } if ((b_coeffs32[(N+1-1)/32]&(((uint32_t)1)<<((N+1-1)%32))) != 0) /* if (b[N]!=0) */ return 0; /* Fq(x) = x^(N-k) * b(x) */ memset(&Fq->coeffs, 0, N * sizeof Fq->coeffs[0]); Fq->N = N; int16_t j = 0; while (k >= N) k -= N; for (i=N-1; i>=0; i--) { j = i - k; if (j < 0) j += N; Fq->coeffs[j] = (b_coeffs32[i/32]>>(i%32)) & 1; /* Fq->coeffs[j]=b[i] */ } ntru_lift_inverse(a, Fq, mod_mask+1); return 1; } uint8_t ntru_invert_64(NtruPrivPoly *a, uint16_t mod_mask, NtruIntPoly *Fq) { #ifndef NTRU_AVOID_HAMMING_WT_PATENT uint16_t N = a->prod_flag ? a->poly.prod.N : a->poly.tern.N; #else uint16_t N = a->poly.tern.N; #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ uint16_t k = 0; uint16_t N64 = (N+1+63) / 64; /* #uint64_t's needed for N+1 coeffs */ /* b = 1 */ uint64_t b_coeffs64_arr[N64]; uint64_t *b_coeffs64 = b_coeffs64_arr; memset(b_coeffs64+1, 0, (N64-1)*8); b_coeffs64[0] = 1; /* c = 0 */ uint64_t c_coeffs64_arr[N64]; uint64_t *c_coeffs64 = c_coeffs64_arr; memset(c_coeffs64, 0, N64*8); /* f=3a+1; skip multiplication by 3 because f=3f (mod 2) */ uint64_t f_coeffs64_arr[N64]; uint64_t *f_coeffs64 = f_coeffs64_arr; ntru_priv_to_mod2_64(a, f_coeffs64); f_coeffs64[0] ^= 1; /* g(x) = x^N − 1 */ uint64_t g_coeffs64_arr[N64]; uint64_t *g_coeffs64 = g_coeffs64_arr; memset(g_coeffs64, 0, N64*8); g_coeffs64[0] = 1; g_coeffs64[N/64] |= ((uint64_t)1) << (N%64); uint16_t deg_f = ntru_deg_64(f_coeffs64, N64); uint16_t deg_g = N; for (;;) { uint16_t num_zeros = 0; /* while f[0]==0 */ while ((f_coeffs64[num_zeros/64]&(((uint64_t)1)<<(num_zeros%64)))==0 && num_zeros<=N) num_zeros++; if (num_zeros >= N) /* not invertible */ return 0; k += num_zeros; /* right-shift f, left-shift c num_zeros coefficients each */ if (num_zeros >= 64) { memmove(c_coeffs64+num_zeros/64, c_coeffs64, N64*8-num_zeros/64*8); memset(c_coeffs64, 0, num_zeros/64*8); memmove(f_coeffs64, f_coeffs64+num_zeros/64, N64*8-num_zeros/64*8); memset(f_coeffs64+N64-num_zeros/64, 0, num_zeros/64*8); deg_f -= num_zeros / 64 * 64; num_zeros %= 64; } if (num_zeros > 0) { int16_t i; /* c(x) = c(x)*(x^num_zeros) */ for (i=N64-1; i>0; i--) { c_coeffs64[i] <<= num_zeros; c_coeffs64[i] |= c_coeffs64[i-1] >> (64-num_zeros); } c_coeffs64[0] <<= num_zeros; /* f(x) = f(x)/(x^num_zeros) */ for (i=1; i>= num_zeros; f_coeffs64[i-1] |= f_coeffs64[i] << (64-num_zeros); } f_coeffs64[i-1] >>= num_zeros; } deg_f -= num_zeros; if (deg_f==0 && f_coeffs64[0]==1) /* if f==1 */ break; if (deg_f < deg_g) { /* exchange f and g */ uint64_t *temp_coeffs = f_coeffs64; f_coeffs64 = g_coeffs64; g_coeffs64 = temp_coeffs; uint16_t temp = deg_f; deg_f = deg_g; deg_g = temp; /* exchange b and c */ temp_coeffs = b_coeffs64; b_coeffs64 = c_coeffs64; c_coeffs64 = temp_coeffs; } ntru_add_mod2_64(f_coeffs64, g_coeffs64, N64); /* adding f+g may have lowered the degree of f */ while (deg_f>0 && (f_coeffs64[deg_f/64]&(((uint64_t)1)<<(deg_f%64)))==0) deg_f--; ntru_add_mod2_64(b_coeffs64, c_coeffs64, N64); } if ((b_coeffs64[(N+1-1)/64]&(((uint64_t)1)<<((N+1-1)%64))) != 0) /* if (b[N]!=0) */ return 0; /* Fq(x) = x^(N-k) * b(x) */ memset(&Fq->coeffs, 0, N * sizeof Fq->coeffs[0]); Fq->N = N; int16_t j = 0; while (k >= N) k -= N; int16_t i; for (i=N-1; i>=0; i--) { j = i - k; if (j < 0) j += N; Fq->coeffs[j] = (b_coeffs64[i/64]>>(i%64)) & 1; /* Fq->coeffs[j]=b[i] */ } ntru_lift_inverse(a, Fq, mod_mask+1); return 1; } libntru-0.5/src/poly.h000066400000000000000000000370431271556312200147730ustar00rootroot00000000000000#ifndef NTRU_POLY_H #define NTRU_POLY_H #include #include "rand.h" #include "types.h" /** * @brief Random ternary polynomial * * Generates a random ternary polynomial. * * @param N the number of coefficients; must be NTRU_MAX_DEGREE or less * @param num_ones number of ones * @param num_neg_ones number of negative ones * @param poly output parameter; a pointer to store the new polynomial * @param rand_ctx a random number generator * @return 1 for success, 0 for failure */ uint8_t ntru_rand_tern(uint16_t N, uint16_t num_ones, uint16_t num_neg_ones, NtruTernPoly *poly, NtruRandContext *rand_ctx); #ifndef NTRU_AVOID_HAMMING_WT_PATENT /** * @brief Random product-form polynomial * * Generates a random product-form polynomial consisting of 3 random ternary polynomials. * * @param N the number of coefficients; must be NTRU_MAX_DEGREE or less * @param df1 number of ones and negative ones in the first ternary polynomial * @param df2 number of ones and negative ones in the second ternary polynomial * @param df3_ones number of ones ones in the third ternary polynomial * @param df3_neg_ones number of negative ones in the third ternary polynomial * @param poly output parameter; a pointer to store the new polynomial * @param rand_ctx a random number generator * @return 1 for success, 0 for failure */ uint8_t ntru_rand_prod(uint16_t N, uint16_t df1, uint16_t df2, uint16_t df3_ones, uint16_t df3_neg_ones, NtruProdPoly *poly, NtruRandContext *rand_ctx); #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ /** * @brief Addition of two polynomials * * Adds a NtruIntPoly to another. * The polynomial b must not have more coefficients than a. * * @param a input and output parameter; coefficients are overwritten * @param b a polynomial to add to the polynomial a */ void ntru_add(NtruIntPoly *a, NtruIntPoly *b); /** * @brief Subtraction of two polynomials * * Subtracts a NtruIntPoly from another. * The polynomial b must not have more coefficients than a. * * @param a input and output parameter; coefficients are overwritten * @param b a polynomial to subtract from the polynomial a */ void ntru_sub(NtruIntPoly *a, NtruIntPoly *b); /** * @brief General polynomial by ternary polynomial multiplication * * Multiplies a NtruIntPoly by a NtruTernPoly. The number of coefficients * must be the same for both polynomials. * * @param a a general polynomial * @param b a ternary polynomial * @param c output parameter; a pointer to store the new polynomial * @param mod_mask an AND mask to apply; must be a power of two minus one * @return 0 if the number of coefficients differ, 1 otherwise */ uint8_t ntru_mult_tern(NtruIntPoly *a, NtruTernPoly *b, NtruIntPoly *c, uint16_t mod_mask); /** * @brief General polynomial by ternary polynomial multiplication, 32 bit version * * Multiplies a NtruIntPoly by a NtruTernPoly. The number of coefficients * must be the same for both polynomials. * Uses 32-bit arithmetic. * * @param a a general polynomial * @param b a ternary polynomial * @param c output parameter; a pointer to store the new polynomial * @param mod_mask an AND mask to apply; must be a power of two minus one * @return 0 if the number of coefficients differ, 1 otherwise */ uint8_t ntru_mult_tern_32(NtruIntPoly *a, NtruTernPoly *b, NtruIntPoly *c, uint16_t mod_mask); /** * @brief General polynomial by ternary polynomial multiplication, 64 bit version * * Multiplies a NtruIntPoly by a NtruTernPoly. The number of coefficients * must be the same for both polynomials. * Uses 64-bit arithmetic. * * @param a a general polynomial * @param b a ternary polynomial * @param c output parameter; a pointer to store the new polynomial * @param mod_mask an AND mask to apply; must be a power of two minus one * @return 0 if the number of coefficients differ, 1 otherwise */ uint8_t ntru_mult_tern_64(NtruIntPoly *a, NtruTernPoly *b, NtruIntPoly *c, uint16_t mod_mask); /** * @brief General polynomial by ternary polynomial multiplication, SSSE3 version * * Multiplies a NtruIntPoly by a NtruTernPoly. The number of coefficients * must be the same for both polynomials. * This variant requires SSSE3 support. * * @param a a general polynomial * @param b a ternary polynomial * @param c output parameter; a pointer to store the new polynomial * @param mod_mask an AND mask to apply; must be a power of two minus one * @return 0 if the number of coefficients differ, 1 otherwise */ uint8_t ntru_mult_tern_sse(NtruIntPoly *a, NtruTernPoly *b, NtruIntPoly *c, uint16_t mod_mask); /** * @brief General polynomial by ternary polynomial multiplication, AVX2 version * * Multiplies a NtruIntPoly by a NtruTernPoly. The number of coefficients * must be the same for both polynomials. * This variant requires AVX2 support. * * @param a a general polynomial * @param b a ternary polynomial * @param c output parameter; a pointer to store the new polynomial * @param mod_mask an AND mask to apply; must be a power of two minus one * @return 0 if the number of coefficients differ, 1 otherwise */ uint8_t ntru_mult_tern_avx2(NtruIntPoly *a, NtruTernPoly *b, NtruIntPoly *c, uint16_t mod_mask); #ifndef NTRU_AVOID_HAMMING_WT_PATENT /** * @brief General polynomial by product-form polynomial multiplication * * Multiplies a NtruIntPoly by a NtruProdPoly. The number of coefficients * must be the same for both polynomials. * * @param a a general polynomial * @param b a product-form polynomial * @param c output parameter; a pointer to store the new polynomial * @param mod_mask an AND mask to apply; must be a power of two minus one * @return 0 if the number of coefficients differ, 1 otherwise */ uint8_t ntru_mult_prod(NtruIntPoly *a, NtruProdPoly *b, NtruIntPoly *c, uint16_t mod_mask); #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ /** * @brief General polynomial by private polynomial multiplication * * Multiplies a NtruIntPoly by a NtruPrivPoly, i.e. a NtruTernPoly or * a NtruProdPoly. The number of coefficients must be the same for both * polynomials. * * @param a a "private" polynomial * @param b a general polynomial * @param c output parameter; a pointer to store the new polynomial * @param mod_mask an AND mask to apply; must be a power of two minus one * @return 0 if the number of coefficients differ, 1 otherwise */ uint8_t ntru_mult_priv(NtruPrivPoly *a, NtruIntPoly *b, NtruIntPoly *c, uint16_t mod_mask); /** * @brief Polynomial to binary * * Converts a NtruIntPoly to a uint8_t array. Each coefficient is encoded * in (log q) bits. * Uses 32-bit arithmetic. * * @param p a polynomial * @param q the modulus; must be a power of two * @param a output parameter; a pointer to store the encoded polynomial. * No extra room is needed at the end. */ void ntru_to_arr_32(NtruIntPoly *p, uint16_t q, uint8_t *a); /** * @brief Polynomial to binary * * Converts a NtruIntPoly to a uint8_t array. Each coefficient is encoded * in (log q) bits. * Uses 64-bit arithmetic. * * @param p a polynomial * @param q the modulus; must be a power of two * @param a output parameter; a pointer to store the encoded polynomial. * Must accommodate at least 7 more bytes than the result takes up. */ void ntru_to_arr_64(NtruIntPoly *p, uint16_t q, uint8_t *a); /** * @brief Polynomial to binary * * Converts a NtruIntPoly to a uint8_t array. q is assumed to be 2048, so * each coefficient is encoded in 11 bits. * Requires SSSE3 support. * * @param p a polynomial * @param a output parameter; a pointer to store the encoded polynomial. * Must accommodate at least 7 more bytes than the result takes up. */ void ntru_to_arr_sse_2048(NtruIntPoly *p, uint8_t *a); /** * @brief Polynomial to binary * * Converts a NtruIntPoly to a uint8_t array. Each coefficient is encoded * in (log q) bits. * * @param p a polynomial * @param q the modulus; must be a power of two * @param a output parameter; a pointer to store the encoded polynomial */ void ntru_to_arr(NtruIntPoly *p, uint16_t q, uint8_t *a); /** * @brief Polynomial to binary modulo 4 * * Optimized version of ntru_to_arr() for q=4. * Encodes the low 2 bits of all coefficients in a uint8_t array. * * @param p a polynomial * @param arr output parameter; a pointer to store the encoded polynomial */ void ntru_to_arr4(NtruIntPoly *p, uint8_t *arr); void ntru_from_arr(uint8_t *arr, uint16_t N, uint16_t q, NtruIntPoly *p); /** * @brief Multiplies a polynomial by a factor * * Multiplies each coefficient of an NtruIntPoly by an integer. * * @param a input and output parameter; coefficients are overwritten * @param factor the factor to multiply by */ void ntru_mult_fac(NtruIntPoly *a, int16_t factor); /** * @brief Multiplication of two general polynomials with a modulus * * Multiplies a NtruIntPoly by another, taking the coefficient values modulo an integer. * The number of coefficients must be the same for both polynomials. * * @param a input and output parameter; coefficients are overwritten * @param b a polynomial to multiply by * @param c output parameter; a pointer to store the new polynomial * @param mod_mask an AND mask to apply to the coefficients of c * @return 0 if the number of coefficients differ, 1 otherwise */ uint8_t ntru_mult_int(NtruIntPoly *a, NtruIntPoly *b, NtruIntPoly *c, uint16_t mod_mask); /** * @brief Multiplication of two general polynomials with a modulus * * Multiplies a NtruIntPoly by another, taking the coefficient values modulo an integer. * The number of coefficients must be the same for both polynomials. * Uses 16-bit arithmetic. * * @param a input and output parameter; coefficients are overwritten * @param b a polynomial to multiply by * @param c output parameter; a pointer to store the new polynomial * @param mod_mask an AND mask to apply to the coefficients of c * @return 0 if the number of coefficients differ, 1 otherwise */ uint8_t ntru_mult_int_16(NtruIntPoly *a, NtruIntPoly *b, NtruIntPoly *c, uint16_t mod_mask); /** * @brief Multiplication of two general polynomials with a modulus, 64 bit version * * Multiplies a NtruIntPoly by another, taking the coefficient values modulo an integer. * The number of coefficients must be the same for both polynomials. * Uses 64-bit arithmetic. * * @param a input and output parameter; coefficients are overwritten * @param b a polynomial to multiply by * @param c output parameter; a pointer to store the new polynomial * @param mod_mask an AND mask to apply to the coefficients of c * @return 0 if the number of coefficients differ, 1 otherwise */ uint8_t ntru_mult_int_64(NtruIntPoly *a, NtruIntPoly *b, NtruIntPoly *c, uint16_t mod_mask); /** * @brief Multiplication of two general polynomials with a modulus, SSSE3 version * * Multiplies a NtruIntPoly by another, taking the coefficient values modulo an integer. * The number of coefficients must be the same for both polynomials. * Requires SSSE3 support. * * @param a input and output parameter; coefficients are overwritten * @param b a polynomial to multiply by * @param c output parameter; a pointer to store the new polynomial * @param mod_mask an AND mask to apply to the coefficients of c * @return 0 if the number of coefficients differ, 1 otherwise */ uint8_t ntru_mult_int_sse(NtruIntPoly *a, NtruIntPoly *b, NtruIntPoly *c, uint16_t mod_mask); /** * @brief Multiplication of two general polynomials with a modulus, AVX2 version * * Multiplies a NtruIntPoly by another, taking the coefficient values modulo an integer. * The number of coefficients must be the same for both polynomials. * Requires AVX2 support. * * @param a input and output parameter; coefficients are overwritten * @param b a polynomial to multiply by * @param c output parameter; a pointer to store the new polynomial * @param mod_mask an AND mask to apply to the coefficients of c * @return 0 if the number of coefficients differ, 1 otherwise */ uint8_t ntru_mult_int_avx2(NtruIntPoly *a, NtruIntPoly *b, NtruIntPoly *c, uint16_t mod_mask); /** * @brief Reduction modulo a power of two * * Reduces the coefficients of an NtruIntPoly modulo a power of two. * * @param p input and output parameter; coefficients are overwritten * @param mod_mask an AND mask to apply to the coefficients of c */ void ntru_mod_mask(NtruIntPoly *p, uint16_t mod_mask); /** * @brief Reduction modulo 3 * * Reduces the coefficients of an NtruIntPoly modulo 3 such that all * coefficients are ternary. * * @param p input and output parameter; coefficients are overwritten */ void ntru_mod3(NtruIntPoly *p); /** * @brief Reduction modulo an integer, centered * * Reduces the coefficients of an NtruIntPoly modulo an integer such that * -q/2 <= p->coeffs[i] < q/2 for all coefficients. * * @param p input and output parameter; coefficients are overwritten * @param modulus the modulus to apply to the coefficients of p */ void ntru_mod_center(NtruIntPoly *p, uint16_t modulus); /** * @brief Equality with one * * Tests if p(x) = 1 * * @param p a polynomial * @return 1 iff all coefficients are equal to zero, except for the lowest coefficient which must equal 1 */ uint8_t ntru_equals1(NtruIntPoly *p); /** * @brief Equality of two polynomials * * Tests if a(x) = b(x) * * @param a a polynomial * @param b a polynomial * @return 1 iff all coefficients are equal */ uint8_t ntru_equals_int(NtruIntPoly *a, NtruIntPoly *b); /** * @brief Erases a private polynomial * * Overwrites all coefficients of a private (i.e., ternary or product-form) * polynomial with zeros. * * @param p a polynomial */ void ntru_clear_priv(NtruPrivPoly *p); /** * @brief Erases a general polynomial * * Overwrites all coefficients of a polynomial with zeros. * * @param p a polynomial */ void ntru_clear_int(NtruIntPoly *p); /** * @brief Inverse modulo q * * Computes the inverse of 1+3a mod q; q must be a power of 2. * Returns 0 if the polynomial is not invertible, 1 otherwise. * The algorithm is described in "Almost Inverses and Fast NTRU Key Generation" at * http://www.securityinnovation.com/uploads/Crypto/NTRUTech014.pdf * * @param a a ternary or product-form polynomial * @param mod_mask an AND mask to apply; must be a power of two minus one * @param Fq output parameter; a pointer to store the new polynomial * @return 1 if a is invertible, 0 otherwise */ uint8_t ntru_invert(NtruPrivPoly *a, uint16_t mod_mask, NtruIntPoly *Fq); /** * @brief Inverse modulo q * * Computes the inverse of 1+3a mod q; q must be a power of 2. * Returns 0 if the polynomial is not invertible, 1 otherwise. * The algorithm is described in "Almost Inverses and Fast NTRU Key Generation" at * http://www.securityinnovation.com/uploads/Crypto/NTRUTech014.pdf * This function uses 32-bit arithmetic. * * @param a a ternary or product-form polynomial * @param mod_mask an AND mask to apply; must be a power of two minus one * @param Fq output parameter; a pointer to store the new polynomial * @return 1 if a is invertible, 0 otherwise */ uint8_t ntru_invert_32(NtruPrivPoly *a, uint16_t mod_mask, NtruIntPoly *Fq); /** * @brief Inverse modulo q * * Computes the inverse of 1+3a mod q; q must be a power of 2. * Returns 0 if the polynomial is not invertible, 1 otherwise. * The algorithm is described in "Almost Inverses and Fast NTRU Key Generation" at * http://www.securityinnovation.com/uploads/Crypto/NTRUTech014.pdf * This function uses 64-bit arithmetic. * * @param a a ternary or product-form polynomial * @param mod_mask an AND mask to apply; must be a power of two minus one * @param Fq output parameter; a pointer to store the new polynomial * @return 1 if a is invertible, 0 otherwise */ uint8_t ntru_invert_64(NtruPrivPoly *a, uint16_t mod_mask, NtruIntPoly *Fq); #endif /* NTRU_POLY_H */ libntru-0.5/src/rand.c000066400000000000000000000153451271556312200147300ustar00rootroot00000000000000#include #include #include #include #include "rand.h" #include "err.h" #include "encparams.h" #include "nist_ctr_drbg.h" #ifdef WIN32 #define WIN32_LEAN_AND_MEAN #include #include #endif const char NTRU_PERS_STRING[] = "libntru"; /* personalization string for CTR-DRBG */ uint8_t ntru_rand_init(NtruRandContext *rand_ctx, struct NtruRandGen *rand_gen) { rand_ctx->rand_gen = rand_gen; rand_ctx->seed = NULL; return rand_gen->init(rand_ctx, rand_gen) ? NTRU_SUCCESS : NTRU_ERR_PRNG; } uint8_t ntru_rand_init_det(NtruRandContext *rand_ctx, struct NtruRandGen *rand_gen, uint8_t *seed, uint16_t seed_len) { rand_ctx->seed = malloc(seed_len); if (rand_ctx->seed == NULL) return NTRU_ERR_PRNG; memcpy(rand_ctx->seed, seed, seed_len); rand_ctx->seed_len = seed_len; rand_ctx->rand_gen = rand_gen; return rand_gen->init(rand_ctx, rand_gen) ? NTRU_SUCCESS : NTRU_ERR_PRNG; } uint8_t ntru_rand_generate(uint8_t rand_data[], uint16_t len, NtruRandContext *rand_ctx) { return rand_ctx->rand_gen->generate(rand_data, len, rand_ctx) ? NTRU_SUCCESS : NTRU_ERR_PRNG; } uint8_t ntru_rand_release(NtruRandContext *rand_ctx) { if (rand_ctx->seed != NULL) free(rand_ctx->seed); return rand_ctx->rand_gen->release(rand_ctx) ? NTRU_SUCCESS : NTRU_ERR_PRNG; } #ifdef WIN32 uint8_t ntru_rand_wincrypt_init(NtruRandContext *rand_ctx, NtruRandGen *rand_gen) { HCRYPTPROV *hCryptProv = malloc(sizeof(HCRYPTPROV)); if (hCryptProv == NULL) return 0; uint8_t result = CryptAcquireContext(hCryptProv, NULL, NULL, PROV_RSA_FULL, 0); if (!result) { if (GetLastError() == (DWORD)NTE_BAD_KEYSET) /* see http://support.microsoft.com/kb/238187 */ result = CryptAcquireContext(hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_NEWKEYSET); if (!result) { free(hCryptProv); return 0; } } rand_ctx->state = hCryptProv; return 1; } uint8_t ntru_rand_wincrypt_generate(uint8_t rand_data[], uint16_t len, NtruRandContext *rand_ctx) { HCRYPTPROV *hCryptProv = (HCRYPTPROV*)rand_ctx->state; return CryptGenRandom(*hCryptProv, len, rand_data); } uint8_t ntru_rand_wincrypt_release(NtruRandContext *rand_ctx) { HCRYPTPROV *hCryptProv = (HCRYPTPROV*)rand_ctx->state; uint8_t result = CryptReleaseContext(*hCryptProv, 0); free(hCryptProv); return result; } #else uint8_t ntru_rand_device_init(NtruRandContext *rand_ctx, struct NtruRandGen *rand_gen, char *filename) { int rand_fd = open(filename, O_RDONLY); if (rand_fd >= 0) { /* save rand_fd in rand_ctx->state */ int *fd_ptr = malloc(sizeof(int)); if (fd_ptr == NULL) { close(rand_fd); return 0; } *fd_ptr = rand_fd; rand_ctx->state = fd_ptr; } return rand_fd >= 0; } uint8_t ntru_rand_device_generate(uint8_t rand_data[], uint16_t len, NtruRandContext *rand_ctx) { int rand_fd = *((int*)rand_ctx->state); ssize_t bytes_read = read(rand_fd, rand_data, len); return bytes_read == len; } uint8_t ntru_rand_device_release(NtruRandContext *rand_ctx) { int rand_fd = *((int*)rand_ctx->state); free(rand_ctx->state); return close(rand_fd) >= 0; } uint8_t ntru_rand_devurandom_init(NtruRandContext *rand_ctx, struct NtruRandGen *rand_gen) { return ntru_rand_device_init(rand_ctx, rand_gen, "/dev/urandom"); } uint8_t ntru_rand_devurandom_generate(uint8_t rand_data[], uint16_t len, NtruRandContext *rand_ctx) { return ntru_rand_device_generate(rand_data, len, rand_ctx); } uint8_t ntru_rand_devurandom_release(NtruRandContext *rand_ctx) { return ntru_rand_device_release(rand_ctx); } uint8_t ntru_rand_devrandom_init(NtruRandContext *rand_ctx, struct NtruRandGen *rand_gen) { return ntru_rand_device_init(rand_ctx, rand_gen, "/dev/random"); } uint8_t ntru_rand_devrandom_generate(uint8_t rand_data[], uint16_t len, NtruRandContext *rand_ctx) { return ntru_rand_device_generate(rand_data, len, rand_ctx); } uint8_t ntru_rand_devrandom_release(NtruRandContext *rand_ctx) { return ntru_rand_device_release(rand_ctx); } #endif /* !WIN32 */ uint8_t ntru_rand_ctr_drbg_init(NtruRandContext *rand_ctx, struct NtruRandGen *rand_gen) { rand_ctx->state = malloc(sizeof(NIST_CTR_DRBG)); if (!rand_ctx->state) return 0; uint16_t pers_string_size = strlen(NTRU_PERS_STRING) * sizeof(NTRU_PERS_STRING[0]); return nist_ctr_drbg_instantiate(rand_ctx->state, rand_ctx->seed, rand_ctx->seed_len, NULL, 0, NTRU_PERS_STRING, pers_string_size) == 0; } uint8_t ntru_rand_ctr_drbg_generate(uint8_t rand_data[], uint16_t len, NtruRandContext *rand_ctx) { nist_ctr_drbg_generate(rand_ctx->state, rand_data, len, NULL, 0); return 1; } uint8_t ntru_rand_ctr_drbg_release(NtruRandContext *rand_ctx) { uint8_t result = nist_ctr_drbg_destroy(rand_ctx->state); free(rand_ctx->state); return result; } uint8_t ntru_get_entropy(uint8_t *buffer, uint16_t len) { #ifdef WIN32 /* initialize */ HCRYPTPROV hCryptProv; uint8_t result = CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, 0); if (!result) { if (GetLastError() == (DWORD)NTE_BAD_KEYSET) /* see http://support.microsoft.com/kb/238187 */ result = CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_NEWKEYSET); if (!result) return 0; } /* generate */ result &= CryptGenRandom(hCryptProv, len, buffer); /* release */ result &= CryptReleaseContext(hCryptProv, 0); return result; #else /* open /dev/urandom */ int rand_fd = open("/dev/urandom", O_RDONLY); uint8_t result = rand_fd >= 0; /* read */ ssize_t bytes_read = read(rand_fd, buffer, len); result &= bytes_read == len; /* close */ result &= close(rand_fd) >= 0; return result; #endif /* !WIN32 */ } uint8_t ntru_rand_default_init(NtruRandContext *rand_ctx, struct NtruRandGen *rand_gen) { uint8_t result = 1; result &= nist_ctr_initialize() == 0; rand_ctx->state = malloc(sizeof(NIST_CTR_DRBG)); if (!rand_ctx->state) return 0; uint8_t entropy[32]; result &= ntru_get_entropy(entropy, 32); uint16_t pers_string_size = strlen(NTRU_PERS_STRING) * sizeof(NTRU_PERS_STRING[0]); result &= nist_ctr_drbg_instantiate(rand_ctx->state, entropy, 32, NULL, 0, NTRU_PERS_STRING, pers_string_size) == 0; return result; } uint8_t ntru_rand_default_generate(uint8_t rand_data[], uint16_t len, NtruRandContext *rand_ctx) { nist_ctr_drbg_generate(rand_ctx->state, rand_data, len, NULL, 0); return 1; } uint8_t ntru_rand_default_release(NtruRandContext *rand_ctx) { uint8_t result = nist_ctr_drbg_destroy(rand_ctx->state); free(rand_ctx->state); return result; } libntru-0.5/src/rand.h000066400000000000000000000063531271556312200147340ustar00rootroot00000000000000#ifndef NTRU_RAND_H #define NTRU_RAND_H #include "types.h" struct NtruRandGen; typedef struct NtruRandContext { struct NtruRandGen *rand_gen; uint8_t *seed; /* for deterministic RNGs */ uint16_t seed_len; /* for deterministic RNGs */ void *state; } NtruRandContext; typedef struct NtruRandGen { uint8_t (*init)(NtruRandContext *rand_ctx, struct NtruRandGen *rand_gen); /* a pointer to a function that takes an array and an array size, and fills the array with random data */ uint8_t (*generate)(uint8_t rand_data[], uint16_t len, NtruRandContext *rand_ctx); uint8_t (*release)(NtruRandContext *rand_ctx); } NtruRandGen; /** Returns NTRU_SUCCESS or NTRU_ERR_PRNG */ uint8_t ntru_rand_init(NtruRandContext *rand_ctx, struct NtruRandGen *rand_gen); /** Returns NTRU_SUCCESS or NTRU_ERR_PRNG */ uint8_t ntru_rand_init_det(NtruRandContext *rand_ctx, struct NtruRandGen *rand_gen, uint8_t *seed, uint16_t seed_len); /** Returns NTRU_SUCCESS or NTRU_ERR_PRNG */ uint8_t ntru_rand_generate(uint8_t rand_data[], uint16_t len, NtruRandContext *rand_ctx); /** Returns NTRU_SUCCESS or NTRU_ERR_PRNG */ uint8_t ntru_rand_release(NtruRandContext *rand_ctx); #ifdef WIN32 #define NTRU_RNG_WINCRYPT {ntru_rand_wincrypt_init, ntru_rand_wincrypt_generate, ntru_rand_wincrypt_release} /* CryptGenRandom-based RNG */ uint8_t ntru_rand_wincrypt_init(NtruRandContext *rand_ctx, NtruRandGen *rand_gen); uint8_t ntru_rand_wincrypt_generate(uint8_t rand_data[], uint16_t len, NtruRandContext *rand_ctx); uint8_t ntru_rand_wincrypt_release(NtruRandContext *rand_ctx); #else #define NTRU_RNG_DEVURANDOM {ntru_rand_devurandom_init, ntru_rand_devurandom_generate, ntru_rand_devurandom_release} #define NTRU_RNG_DEVRANDOM {ntru_rand_devrandom_init, ntru_rand_devrandom_generate, ntru_rand_devrandom_release} /* /dev/random-based RNG */ uint8_t ntru_rand_devrandom_init(NtruRandContext *rand_ctx, struct NtruRandGen *rand_gen); uint8_t ntru_rand_devrandom_generate(uint8_t rand_data[], uint16_t len, NtruRandContext *rand_ctx); uint8_t ntru_rand_devrandom_release(NtruRandContext *rand_ctx); /* /dev/urandom-based RNG */ uint8_t ntru_rand_devurandom_init(NtruRandContext *rand_ctx, struct NtruRandGen *rand_gen); uint8_t ntru_rand_devurandom_generate(uint8_t rand_data[], uint16_t len, NtruRandContext *rand_ctx); uint8_t ntru_rand_devurandom_release(NtruRandContext *rand_ctx); #endif /* !WIN32 */ /** default RNG: CTR_DRBG seeded from /dev/urandom (on *nix) or CryptGenRandom() (on Windows) */ uint8_t ntru_rand_default_init(NtruRandContext *rand_ctx, struct NtruRandGen *rand_gen); uint8_t ntru_rand_default_generate(uint8_t rand_data[], uint16_t len, NtruRandContext *rand_ctx); uint8_t ntru_rand_default_release(NtruRandContext *rand_ctx); #define NTRU_RNG_DEFAULT {ntru_rand_default_init, ntru_rand_default_generate, ntru_rand_default_release} /* deterministic RNG based on CTR_DRBG */ uint8_t ntru_rand_ctr_drbg_init(NtruRandContext *rand_ctx, struct NtruRandGen *rand_gen); uint8_t ntru_rand_ctr_drbg_generate(uint8_t rand_data[], uint16_t len, NtruRandContext *rand_ctx); uint8_t ntru_rand_ctr_drbg_release(NtruRandContext *rand_ctx); #define NTRU_RNG_CTR_DRBG {ntru_rand_ctr_drbg_init, ntru_rand_ctr_drbg_generate, ntru_rand_ctr_drbg_release} #endif /* NTRU_RAND_H */ libntru-0.5/src/rijndael.c000066400000000000000000001633541271556312200156000ustar00rootroot00000000000000/* $OpenBSD: rijndael.c,v 1.18 2005/05/25 05:47:53 markus Exp $ */ /** * rijndael-alg-fst.c * * @version 3.0 (December 2000) * * Optimised ANSI C code for the Rijndael cipher (now AES) * * @author Vincent Rijmen * @author Antoon Bosselaers * @author Paulo Barreto * * This code is hereby placed in the public domain. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#include //#include typedef unsigned char u_char; #include "rijndael.h" #define FULL_UNROLL /* Te0[x] = S [x].[02, 01, 01, 03]; Te1[x] = S [x].[03, 02, 01, 01]; Te2[x] = S [x].[01, 03, 02, 01]; Te3[x] = S [x].[01, 01, 03, 02]; Te4[x] = S [x].[01, 01, 01, 01]; Td0[x] = Si[x].[0e, 09, 0d, 0b]; Td1[x] = Si[x].[0b, 0e, 09, 0d]; Td2[x] = Si[x].[0d, 0b, 0e, 09]; Td3[x] = Si[x].[09, 0d, 0b, 0e]; Td4[x] = Si[x].[01, 01, 01, 01]; */ static const u32 Te0[256] = { 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U, 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU, 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU, 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U, 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU, 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU, 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU, 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU, 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU, 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U, 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU, 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU, 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U, 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU, 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU, 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU, 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU, 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU, 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U, 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU, 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU, 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU, 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU, 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U, 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U, 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U, 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U, 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU, 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U, 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U, 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU, 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU, 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U, 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U, 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U, 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU, 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U, 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU, 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U, 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU, 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U, 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U, 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU, 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U, 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U, 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U, 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U, 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U, 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U, 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U, 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U, 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU, 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U, 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U, 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U, 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U, 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U, 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U, 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU, 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U, 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U, 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U, 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU, }; static const u32 Te1[256] = { 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU, 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U, 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU, 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U, 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU, 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U, 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU, 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U, 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U, 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU, 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U, 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U, 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U, 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU, 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U, 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U, 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU, 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U, 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U, 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U, 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU, 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU, 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U, 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU, 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU, 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U, 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU, 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U, 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU, 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U, 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U, 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U, 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU, 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U, 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU, 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U, 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU, 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U, 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U, 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU, 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU, 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU, 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U, 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U, 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU, 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U, 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU, 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U, 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU, 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U, 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU, 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU, 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U, 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU, 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U, 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU, 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U, 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U, 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U, 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU, 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU, 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U, 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU, 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U, }; static const u32 Te2[256] = { 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU, 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U, 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU, 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U, 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU, 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U, 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU, 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U, 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U, 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU, 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U, 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U, 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U, 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU, 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U, 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U, 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU, 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U, 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U, 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U, 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU, 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU, 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U, 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU, 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU, 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U, 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU, 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U, 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU, 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U, 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U, 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U, 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU, 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U, 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU, 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U, 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU, 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U, 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U, 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU, 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU, 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU, 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U, 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U, 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU, 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U, 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU, 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U, 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU, 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U, 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU, 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU, 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U, 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU, 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U, 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU, 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U, 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U, 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U, 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU, 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU, 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U, 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU, 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U, }; static const u32 Te3[256] = { 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U, 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U, 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U, 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU, 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU, 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU, 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U, 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU, 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU, 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U, 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U, 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU, 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU, 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU, 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU, 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU, 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U, 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU, 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU, 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U, 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U, 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U, 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U, 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U, 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU, 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U, 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU, 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU, 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U, 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U, 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U, 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU, 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U, 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU, 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU, 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U, 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U, 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU, 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U, 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU, 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U, 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U, 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U, 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U, 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU, 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U, 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU, 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U, 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU, 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U, 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU, 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU, 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU, 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU, 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U, 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U, 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U, 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U, 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U, 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U, 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU, 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U, 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU, 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU, }; static const u32 Te4[256] = { 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU, 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U, 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU, 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U, 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU, 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U, 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU, 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U, 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U, 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU, 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U, 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U, 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U, 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU, 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U, 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U, 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU, 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U, 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U, 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U, 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU, 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU, 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U, 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU, 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU, 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U, 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU, 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U, 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU, 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U, 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U, 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U, 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU, 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U, 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU, 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U, 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU, 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U, 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U, 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU, 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU, 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU, 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U, 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U, 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU, 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U, 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU, 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U, 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU, 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U, 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU, 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU, 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U, 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU, 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U, 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU, 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U, 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U, 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U, 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU, 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU, 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U, 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU, 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U, }; static const u32 Td0[256] = { 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U, 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U, 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU, 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U, 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U, 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU, 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U, 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU, 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U, 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U, 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U, 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U, 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU, 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U, 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU, 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U, 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU, 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U, 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U, 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U, 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU, 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U, 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU, 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U, 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU, 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U, 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU, 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU, 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U, 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU, 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U, 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU, 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U, 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U, 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U, 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU, 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U, 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U, 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU, 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U, 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U, 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U, 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U, 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U, 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU, 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U, 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U, 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U, 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U, 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U, 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU, 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU, 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU, 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU, 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U, 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U, 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU, 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU, 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U, 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU, 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U, 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U, 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U, }; static const u32 Td1[256] = { 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU, 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U, 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU, 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U, 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U, 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U, 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U, 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U, 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U, 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU, 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU, 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU, 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U, 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU, 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U, 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U, 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U, 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU, 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU, 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U, 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU, 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U, 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU, 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU, 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U, 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U, 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U, 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU, 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U, 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU, 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U, 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U, 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U, 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU, 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U, 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U, 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U, 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U, 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U, 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U, 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU, 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU, 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U, 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU, 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U, 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU, 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU, 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U, 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU, 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U, 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U, 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U, 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U, 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U, 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U, 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U, 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU, 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U, 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U, 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU, 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U, 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U, 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U, 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U, }; static const u32 Td2[256] = { 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U, 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U, 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U, 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U, 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU, 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U, 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U, 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U, 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U, 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU, 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U, 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U, 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU, 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U, 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U, 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U, 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U, 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U, 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U, 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU, 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U, 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U, 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U, 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U, 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U, 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU, 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU, 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U, 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU, 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U, 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU, 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU, 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU, 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU, 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U, 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U, 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U, 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U, 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U, 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U, 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U, 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU, 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU, 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U, 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U, 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU, 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU, 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U, 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U, 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U, 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U, 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U, 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U, 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U, 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU, 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U, 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U, 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U, 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U, 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U, 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U, 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU, 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U, 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U, }; static const u32 Td3[256] = { 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU, 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU, 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U, 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U, 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU, 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU, 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U, 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU, 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U, 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU, 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U, 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U, 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U, 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U, 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U, 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU, 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU, 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U, 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U, 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU, 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU, 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U, 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U, 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U, 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U, 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU, 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U, 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U, 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU, 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU, 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U, 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U, 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U, 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU, 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U, 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U, 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U, 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U, 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U, 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U, 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U, 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU, 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U, 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U, 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU, 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU, 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U, 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU, 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U, 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U, 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U, 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U, 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U, 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U, 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU, 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU, 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU, 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU, 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U, 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U, 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U, 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU, 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U, 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U, }; static const u32 Td4[256] = { 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U, 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U, 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU, 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU, 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U, 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U, 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U, 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU, 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U, 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU, 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU, 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU, 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U, 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U, 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U, 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U, 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U, 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U, 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU, 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U, 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U, 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU, 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U, 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U, 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U, 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU, 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U, 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U, 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU, 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U, 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U, 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU, 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U, 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU, 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU, 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U, 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U, 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U, 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U, 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU, 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U, 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U, 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU, 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU, 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU, 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U, 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU, 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U, 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U, 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U, 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U, 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU, 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U, 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU, 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU, 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU, 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU, 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U, 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU, 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U, 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU, 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U, 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U, 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU, }; static const u32 rcon[] = { 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, 0x40000000, 0x80000000, 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ }; #define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3])) #define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); } /** * Expand the cipher key into the encryption key schedule. * * @return the number of rounds for the given cipher key size. */ int rijndaelKeySetupEnc(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits) { int i = 0; u32 temp; rk[0] = GETU32(cipherKey ); rk[1] = GETU32(cipherKey + 4); rk[2] = GETU32(cipherKey + 8); rk[3] = GETU32(cipherKey + 12); if (keyBits == 128) { for (;;) { temp = rk[3]; rk[4] = rk[0] ^ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ (Te4[(temp ) & 0xff] & 0x0000ff00) ^ (Te4[(temp >> 24) ] & 0x000000ff) ^ rcon[i]; rk[5] = rk[1] ^ rk[4]; rk[6] = rk[2] ^ rk[5]; rk[7] = rk[3] ^ rk[6]; if (++i == 10) { return 10; } rk += 4; } } rk[4] = GETU32(cipherKey + 16); rk[5] = GETU32(cipherKey + 20); if (keyBits == 192) { for (;;) { temp = rk[ 5]; rk[ 6] = rk[ 0] ^ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ (Te4[(temp ) & 0xff] & 0x0000ff00) ^ (Te4[(temp >> 24) ] & 0x000000ff) ^ rcon[i]; rk[ 7] = rk[ 1] ^ rk[ 6]; rk[ 8] = rk[ 2] ^ rk[ 7]; rk[ 9] = rk[ 3] ^ rk[ 8]; if (++i == 8) { return 12; } rk[10] = rk[ 4] ^ rk[ 9]; rk[11] = rk[ 5] ^ rk[10]; rk += 6; } } rk[6] = GETU32(cipherKey + 24); rk[7] = GETU32(cipherKey + 28); if (keyBits == 256) { for (;;) { temp = rk[ 7]; rk[ 8] = rk[ 0] ^ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ (Te4[(temp ) & 0xff] & 0x0000ff00) ^ (Te4[(temp >> 24) ] & 0x000000ff) ^ rcon[i]; rk[ 9] = rk[ 1] ^ rk[ 8]; rk[10] = rk[ 2] ^ rk[ 9]; rk[11] = rk[ 3] ^ rk[10]; if (++i == 7) { return 14; } temp = rk[11]; rk[12] = rk[ 4] ^ (Te4[(temp >> 24) ] & 0xff000000) ^ (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^ (Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^ (Te4[(temp ) & 0xff] & 0x000000ff); rk[13] = rk[ 5] ^ rk[12]; rk[14] = rk[ 6] ^ rk[13]; rk[15] = rk[ 7] ^ rk[14]; rk += 8; } } return 0; } /** * Expand the cipher key into the decryption key schedule. * * @return the number of rounds for the given cipher key size. */ int rijndaelKeySetupDec(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits) { int Nr, i, j; u32 temp; /* expand the cipher key: */ Nr = rijndaelKeySetupEnc(rk, cipherKey, keyBits); /* invert the order of the round keys: */ for (i = 0, j = 4*Nr; i < j; i += 4, j -= 4) { temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp; temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp; temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; } /* apply the inverse MixColumn transform to all round keys but the first and the last: */ for (i = 1; i < Nr; i++) { rk += 4; rk[0] = Td0[Te4[(rk[0] >> 24) ] & 0xff] ^ Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^ Td2[Te4[(rk[0] >> 8) & 0xff] & 0xff] ^ Td3[Te4[(rk[0] ) & 0xff] & 0xff]; rk[1] = Td0[Te4[(rk[1] >> 24) ] & 0xff] ^ Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^ Td2[Te4[(rk[1] >> 8) & 0xff] & 0xff] ^ Td3[Te4[(rk[1] ) & 0xff] & 0xff]; rk[2] = Td0[Te4[(rk[2] >> 24) ] & 0xff] ^ Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^ Td2[Te4[(rk[2] >> 8) & 0xff] & 0xff] ^ Td3[Te4[(rk[2] ) & 0xff] & 0xff]; rk[3] = Td0[Te4[(rk[3] >> 24) ] & 0xff] ^ Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^ Td2[Te4[(rk[3] >> 8) & 0xff] & 0xff] ^ Td3[Te4[(rk[3] ) & 0xff] & 0xff]; } return Nr; } void rijndaelEncrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 pt[16], u8 ct[16]) { u32 s0, s1, s2, s3, t0, t1, t2, t3; #ifndef FULL_UNROLL int r; #endif /* ?FULL_UNROLL */ /* * map byte array block to cipher state * and add initial round key: */ s0 = GETU32(pt ) ^ rk[0]; s1 = GETU32(pt + 4) ^ rk[1]; s2 = GETU32(pt + 8) ^ rk[2]; s3 = GETU32(pt + 12) ^ rk[3]; #ifdef FULL_UNROLL /* round 1: */ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4]; t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5]; t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6]; t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7]; /* round 2: */ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8]; s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9]; s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10]; s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11]; /* round 3: */ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12]; t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13]; t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14]; t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15]; /* round 4: */ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16]; s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17]; s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18]; s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19]; /* round 5: */ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20]; t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21]; t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22]; t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23]; /* round 6: */ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24]; s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25]; s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26]; s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27]; /* round 7: */ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28]; t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29]; t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30]; t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31]; /* round 8: */ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32]; s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33]; s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34]; s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35]; /* round 9: */ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36]; t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37]; t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38]; t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39]; if (Nr > 10) { /* round 10: */ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40]; s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41]; s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42]; s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43]; /* round 11: */ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44]; t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45]; t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46]; t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47]; if (Nr > 12) { /* round 12: */ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48]; s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49]; s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50]; s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51]; /* round 13: */ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52]; t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53]; t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54]; t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55]; } } rk += Nr << 2; #else /* !FULL_UNROLL */ /* * Nr - 1 full rounds: */ r = Nr >> 1; for (;;) { t0 = Te0[(s0 >> 24) ] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[(s3 ) & 0xff] ^ rk[4]; t1 = Te0[(s1 >> 24) ] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[(s0 ) & 0xff] ^ rk[5]; t2 = Te0[(s2 >> 24) ] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[(s1 ) & 0xff] ^ rk[6]; t3 = Te0[(s3 >> 24) ] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[(s2 ) & 0xff] ^ rk[7]; rk += 8; if (--r == 0) { break; } s0 = Te0[(t0 >> 24) ] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[(t3 ) & 0xff] ^ rk[0]; s1 = Te0[(t1 >> 24) ] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[(t0 ) & 0xff] ^ rk[1]; s2 = Te0[(t2 >> 24) ] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[(t1 ) & 0xff] ^ rk[2]; s3 = Te0[(t3 >> 24) ] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[(t2 ) & 0xff] ^ rk[3]; } #endif /* ?FULL_UNROLL */ /* * apply last round and * map cipher state to byte array block: */ s0 = (Te4[(t0 >> 24) ] & 0xff000000) ^ (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^ (Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^ (Te4[(t3 ) & 0xff] & 0x000000ff) ^ rk[0]; PUTU32(ct , s0); s1 = (Te4[(t1 >> 24) ] & 0xff000000) ^ (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^ (Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^ (Te4[(t0 ) & 0xff] & 0x000000ff) ^ rk[1]; PUTU32(ct + 4, s1); s2 = (Te4[(t2 >> 24) ] & 0xff000000) ^ (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^ (Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^ (Te4[(t1 ) & 0xff] & 0x000000ff) ^ rk[2]; PUTU32(ct + 8, s2); s3 = (Te4[(t3 >> 24) ] & 0xff000000) ^ (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^ (Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^ (Te4[(t2 ) & 0xff] & 0x000000ff) ^ rk[3]; PUTU32(ct + 12, s3); } static void rijndaelDecrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 ct[16], u8 pt[16]) { u32 s0, s1, s2, s3, t0, t1, t2, t3; #ifndef FULL_UNROLL int r; #endif /* ?FULL_UNROLL */ /* * map byte array block to cipher state * and add initial round key: */ s0 = GETU32(ct ) ^ rk[0]; s1 = GETU32(ct + 4) ^ rk[1]; s2 = GETU32(ct + 8) ^ rk[2]; s3 = GETU32(ct + 12) ^ rk[3]; #ifdef FULL_UNROLL /* round 1: */ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4]; t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5]; t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6]; t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7]; /* round 2: */ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8]; s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9]; s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10]; s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11]; /* round 3: */ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12]; t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13]; t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14]; t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15]; /* round 4: */ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16]; s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17]; s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18]; s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19]; /* round 5: */ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20]; t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21]; t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22]; t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23]; /* round 6: */ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24]; s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25]; s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26]; s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27]; /* round 7: */ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28]; t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29]; t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30]; t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31]; /* round 8: */ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32]; s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33]; s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34]; s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35]; /* round 9: */ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36]; t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37]; t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38]; t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39]; if (Nr > 10) { /* round 10: */ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40]; s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41]; s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42]; s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43]; /* round 11: */ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44]; t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45]; t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46]; t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47]; if (Nr > 12) { /* round 12: */ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48]; s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49]; s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50]; s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51]; /* round 13: */ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52]; t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53]; t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54]; t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55]; } } rk += Nr << 2; #else /* !FULL_UNROLL */ /* * Nr - 1 full rounds: */ r = Nr >> 1; for (;;) { t0 = Td0[(s0 >> 24) ] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[(s1 ) & 0xff] ^ rk[4]; t1 = Td0[(s1 >> 24) ] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[(s2 ) & 0xff] ^ rk[5]; t2 = Td0[(s2 >> 24) ] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[(s3 ) & 0xff] ^ rk[6]; t3 = Td0[(s3 >> 24) ] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[(s0 ) & 0xff] ^ rk[7]; rk += 8; if (--r == 0) { break; } s0 = Td0[(t0 >> 24) ] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[(t1 ) & 0xff] ^ rk[0]; s1 = Td0[(t1 >> 24) ] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[(t2 ) & 0xff] ^ rk[1]; s2 = Td0[(t2 >> 24) ] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[(t3 ) & 0xff] ^ rk[2]; s3 = Td0[(t3 >> 24) ] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[(t0 ) & 0xff] ^ rk[3]; } #endif /* ?FULL_UNROLL */ /* * apply last round and * map cipher state to byte array block: */ s0 = (Td4[(t0 >> 24) ] & 0xff000000) ^ (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^ (Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^ (Td4[(t1 ) & 0xff] & 0x000000ff) ^ rk[0]; PUTU32(pt , s0); s1 = (Td4[(t1 >> 24) ] & 0xff000000) ^ (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^ (Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^ (Td4[(t2 ) & 0xff] & 0x000000ff) ^ rk[1]; PUTU32(pt + 4, s1); s2 = (Td4[(t2 >> 24) ] & 0xff000000) ^ (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^ (Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^ (Td4[(t3 ) & 0xff] & 0x000000ff) ^ rk[2]; PUTU32(pt + 8, s2); s3 = (Td4[(t3 >> 24) ] & 0xff000000) ^ (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^ (Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^ (Td4[(t0 ) & 0xff] & 0x000000ff) ^ rk[3]; PUTU32(pt + 12, s3); } /* setup key context for encryption only */ int rijndael_set_key_enc_only(rijndael_ctx *ctx, const u_char *key, int bits) { int rounds; rounds = rijndaelKeySetupEnc(ctx->ek, key, bits); if (rounds == 0) return -1; ctx->Nr = rounds; ctx->enc_only = 1; return 0; } /* setup key context for both encryption and decryption */ int rijndael_set_key(rijndael_ctx *ctx, const u_char *key, int bits) { int rounds; rounds = rijndaelKeySetupEnc(ctx->ek, key, bits); if (rounds == 0) return -1; if (rijndaelKeySetupDec(ctx->dk, key, bits) != rounds) return -1; ctx->Nr = rounds; ctx->enc_only = 0; return 0; } void rijndael_decrypt(const rijndael_ctx *ctx, const u_char *src, u_char *dst) { rijndaelDecrypt(ctx->dk, ctx->Nr, src, dst); } void rijndael_encrypt(const rijndael_ctx *ctx, const u_char *src, u_char *dst) { rijndaelEncrypt(ctx->ek, ctx->Nr, src, dst); } libntru-0.5/src/rijndael.h000066400000000000000000000044221271556312200155730ustar00rootroot00000000000000/* $OpenBSD: rijndael.h,v 1.12 2007/05/27 05:43:17 tedu Exp $ */ /** * rijndael-alg-fst.h * * @version 3.0 (December 2000) * * Optimised ANSI C code for the Rijndael cipher (now AES) * * @author Vincent Rijmen * @author Antoon Bosselaers * @author Paulo Barreto * * This code is hereby placed in the public domain. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __RIJNDAEL_H #define __RIJNDAEL_H #define AES_MAXKEYBITS (256) #define AES_MAXKEYBYTES (AES_MAXKEYBITS/8) /* for 256-bit keys, fewer for less */ #define AES_MAXROUNDS 14 typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; /* The structure for key information */ typedef struct { int enc_only; /* context contains only encrypt schedule */ int Nr; /* key-length-dependent number of rounds */ u32 ek[4*(AES_MAXROUNDS + 1)]; /* encrypt key schedule */ u32 dk[4*(AES_MAXROUNDS + 1)]; /* decrypt key schedule */ } rijndael_ctx; /*int rijndael_set_key(rijndael_ctx *, const u_char *, int); int rijndael_set_key_enc_only(rijndael_ctx *, const u_char *, int); void rijndael_decrypt(const rijndael_ctx *, const u_char *src, u_char *dst); void rijndael_encrypt(const rijndael_ctx *, const u_char *src, u_char *dst); */ int rijndaelKeySetupEnc(unsigned int [], const unsigned char [], int); int rijndaelKeySetupDec(unsigned int [], const unsigned char [], int); void rijndaelEncrypt(const unsigned int [], int, const unsigned char [], unsigned char []); #endif /* __RIJNDAEL_H */ libntru-0.5/src/sha1-mb-x86_64.pl000077500000000000000000001115101271556312200163530ustar00rootroot00000000000000#!/usr/bin/env perl # ==================================================================== # Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. # ==================================================================== # Multi-buffer SHA1 procedure processes n buffers in parallel by # placing buffer data to designated lane of SIMD register. n is # naturally limited to 4 on pre-AVX2 processors and to 8 on # AVX2-capable processors such as Haswell. # # this +aesni(i) sha1 aesni-sha1 gain(iv) # ------------------------------------------------------------------- # Westmere(ii) 10.7/n +1.28=3.96(n=4) 5.30 6.66 +68% # Atom(ii) 18.1/n +3.93=8.46(n=4) 9.37 12.8 +51% # Sandy Bridge (8.16 +5.15=13.3)/n 4.99 5.98 +80% # Ivy Bridge (8.08 +5.14=13.2)/n 4.60 5.54 +68% # Haswell(iii) (8.96 +5.00=14.0)/n 3.57 4.55 +160% # Bulldozer (9.76 +5.76=15.5)/n 5.95 6.37 +64% # # (i) multi-block CBC encrypt with 128-bit key; # (ii) (HASH+AES)/n does not apply to Westmere for n>3 and Atom, # because of lower AES-NI instruction throughput; # (iii) "this" is for n=8, when we gather twice as much data, result # for n=4 is 8.00+4.44=12.4; # (iv) presented improvement coefficients are asymptotic limits and # in real-life application are somewhat lower, e.g. for 2KB # fragments they range from 30% to 100% (on Haswell); $flavour = shift; $output = shift; if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; $avx=0; if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` =~ /GNU assembler version ([2-9]\.[0-9]+)/) { $avx = ($1>=2.19) + ($1>=2.22); } if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { $avx = ($1>=2.09) + ($1>=2.10); } if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && `ml64 2>&1` =~ /Version ([0-9]+)\./) { $avx = ($1>=10) + ($1>=11); } if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) { $avx = ($2>=3.0) + ($2>3.0); } open OUT,"| \"$^X\" $xlate $flavour $output"; *STDOUT=*OUT; # void sha1_multi_block ( # struct { unsigned int A[8]; # unsigned int B[8]; # unsigned int C[8]; # unsigned int D[8]; # unsigned int E[8]; } *ctx, # struct { void *ptr; int blocks; } inp[8], # int num); /* 1 or 2 */ # $ctx="%rdi"; # 1st arg $inp="%rsi"; # 2nd arg $num="%edx"; @ptr=map("%r$_",(8..11)); $Tbl="%rbp"; @V=($A,$B,$C,$D,$E)=map("%xmm$_",(0..4)); ($t0,$t1,$t2,$t3,$tx)=map("%xmm$_",(5..9)); @Xi=map("%xmm$_",(10..14)); $K="%xmm15"; if (1) { # Atom-specific optimization aiming to eliminate pshufb with high # registers [and thus get rid of 48 cycles accumulated penalty] @Xi=map("%xmm$_",(0..4)); ($tx,$t0,$t1,$t2,$t3)=map("%xmm$_",(5..9)); @V=($A,$B,$C,$D,$E)=map("%xmm$_",(10..14)); } $REG_SZ=16; sub Xi_off { my $off = shift; $off %= 16; $off *= $REG_SZ; $off<256 ? "$off-128(%rax)" : "$off-256-128(%rbx)"; } sub BODY_00_19 { my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; my $k=$i+2; # Loads are performed 2+3/4 iterations in advance. 3/4 means that out # of 4 words you would expect to be loaded per given iteration one is # spilled to next iteration. In other words indices in four input # streams are distributed as following: # # $i==0: 0,0,0,0,1,1,1,1,2,2,2, # $i==1: 2,3,3,3, # $i==2: 3,4,4,4, # ... # $i==13: 14,15,15,15, # $i==14: 15 # # Then at $i==15 Xupdate is applied one iteration in advance... $code.=<<___ if ($i==0); movd (@ptr[0]),@Xi[0] lea `16*4`(@ptr[0]),@ptr[0] movd (@ptr[1]),@Xi[2] # borrow @Xi[2] lea `16*4`(@ptr[1]),@ptr[1] movd (@ptr[2]),@Xi[3] # borrow @Xi[3] lea `16*4`(@ptr[2]),@ptr[2] movd (@ptr[3]),@Xi[4] # borrow @Xi[4] lea `16*4`(@ptr[3]),@ptr[3] punpckldq @Xi[3],@Xi[0] movd `4*$j-16*4`(@ptr[0]),@Xi[1] punpckldq @Xi[4],@Xi[2] movd `4*$j-16*4`(@ptr[1]),$t3 punpckldq @Xi[2],@Xi[0] movd `4*$j-16*4`(@ptr[2]),$t2 pshufb $tx,@Xi[0] ___ $code.=<<___ if ($i<14); # just load input movd `4*$j-16*4`(@ptr[3]),$t1 punpckldq $t2,@Xi[1] movdqa $a,$t2 paddd $K,$e # e+=K_00_19 punpckldq $t1,$t3 movdqa $b,$t1 movdqa $b,$t0 pslld \$5,$t2 pandn $d,$t1 pand $c,$t0 punpckldq $t3,@Xi[1] movdqa $a,$t3 movdqa @Xi[0],`&Xi_off($i)` paddd @Xi[0],$e # e+=X[i] movd `4*$k-16*4`(@ptr[0]),@Xi[2] psrld \$27,$t3 pxor $t1,$t0 # Ch(b,c,d) movdqa $b,$t1 por $t3,$t2 # rol(a,5) movd `4*$k-16*4`(@ptr[1]),$t3 pslld \$30,$t1 paddd $t0,$e # e+=Ch(b,c,d) psrld \$2,$b paddd $t2,$e # e+=rol(a,5) pshufb $tx,@Xi[1] movd `4*$k-16*4`(@ptr[2]),$t2 por $t1,$b # b=rol(b,30) ___ $code.=<<___ if ($i==14); # just load input movd `4*$j-16*4`(@ptr[3]),$t1 punpckldq $t2,@Xi[1] movdqa $a,$t2 paddd $K,$e # e+=K_00_19 punpckldq $t1,$t3 movdqa $b,$t1 movdqa $b,$t0 pslld \$5,$t2 prefetcht0 63(@ptr[0]) pandn $d,$t1 pand $c,$t0 punpckldq $t3,@Xi[1] movdqa $a,$t3 movdqa @Xi[0],`&Xi_off($i)` paddd @Xi[0],$e # e+=X[i] psrld \$27,$t3 pxor $t1,$t0 # Ch(b,c,d) movdqa $b,$t1 prefetcht0 63(@ptr[1]) por $t3,$t2 # rol(a,5) pslld \$30,$t1 paddd $t0,$e # e+=Ch(b,c,d) prefetcht0 63(@ptr[2]) psrld \$2,$b paddd $t2,$e # e+=rol(a,5) pshufb $tx,@Xi[1] prefetcht0 63(@ptr[3]) por $t1,$b # b=rol(b,30) ___ $code.=<<___ if ($i>=13 && $i<15); movdqa `&Xi_off($j+2)`,@Xi[3] # preload "X[2]" ___ $code.=<<___ if ($i>=15); # apply Xupdate pxor @Xi[-2],@Xi[1] # "X[13]" movdqa `&Xi_off($j+2)`,@Xi[3] # "X[2]" movdqa $a,$t2 pxor `&Xi_off($j+8)`,@Xi[1] paddd $K,$e # e+=K_00_19 movdqa $b,$t1 pslld \$5,$t2 pxor @Xi[3],@Xi[1] movdqa $b,$t0 pandn $d,$t1 movdqa @Xi[1],$tx pand $c,$t0 movdqa $a,$t3 psrld \$31,$tx paddd @Xi[1],@Xi[1] movdqa @Xi[0],`&Xi_off($i)` paddd @Xi[0],$e # e+=X[i] psrld \$27,$t3 pxor $t1,$t0 # Ch(b,c,d) movdqa $b,$t1 por $t3,$t2 # rol(a,5) pslld \$30,$t1 paddd $t0,$e # e+=Ch(b,c,d) psrld \$2,$b paddd $t2,$e # e+=rol(a,5) por $tx,@Xi[1] # rol \$1,@Xi[1] por $t1,$b # b=rol(b,30) ___ push(@Xi,shift(@Xi)); } sub BODY_20_39 { my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; $code.=<<___ if ($i<79); pxor @Xi[-2],@Xi[1] # "X[13]" movdqa `&Xi_off($j+2)`,@Xi[3] # "X[2]" movdqa $a,$t2 movdqa $d,$t0 pxor `&Xi_off($j+8)`,@Xi[1] paddd $K,$e # e+=K_20_39 pslld \$5,$t2 pxor $b,$t0 movdqa $a,$t3 ___ $code.=<<___ if ($i<72); movdqa @Xi[0],`&Xi_off($i)` ___ $code.=<<___ if ($i<79); paddd @Xi[0],$e # e+=X[i] pxor @Xi[3],@Xi[1] psrld \$27,$t3 pxor $c,$t0 # Parity(b,c,d) movdqa $b,$t1 pslld \$30,$t1 movdqa @Xi[1],$tx por $t3,$t2 # rol(a,5) psrld \$31,$tx paddd $t0,$e # e+=Parity(b,c,d) paddd @Xi[1],@Xi[1] psrld \$2,$b paddd $t2,$e # e+=rol(a,5) por $tx,@Xi[1] # rol(@Xi[1],1) por $t1,$b # b=rol(b,30) ___ $code.=<<___ if ($i==79); movdqa $a,$t2 paddd $K,$e # e+=K_20_39 movdqa $d,$t0 pslld \$5,$t2 pxor $b,$t0 movdqa $a,$t3 paddd @Xi[0],$e # e+=X[i] psrld \$27,$t3 movdqa $b,$t1 pxor $c,$t0 # Parity(b,c,d) pslld \$30,$t1 por $t3,$t2 # rol(a,5) paddd $t0,$e # e+=Parity(b,c,d) psrld \$2,$b paddd $t2,$e # e+=rol(a,5) por $t1,$b # b=rol(b,30) ___ push(@Xi,shift(@Xi)); } sub BODY_40_59 { my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; $code.=<<___; pxor @Xi[-2],@Xi[1] # "X[13]" movdqa `&Xi_off($j+2)`,@Xi[3] # "X[2]" movdqa $a,$t2 movdqa $d,$t1 pxor `&Xi_off($j+8)`,@Xi[1] pxor @Xi[3],@Xi[1] paddd $K,$e # e+=K_40_59 pslld \$5,$t2 movdqa $a,$t3 pand $c,$t1 movdqa $d,$t0 movdqa @Xi[1],$tx psrld \$27,$t3 paddd $t1,$e pxor $c,$t0 movdqa @Xi[0],`&Xi_off($i)` paddd @Xi[0],$e # e+=X[i] por $t3,$t2 # rol(a,5) psrld \$31,$tx pand $b,$t0 movdqa $b,$t1 pslld \$30,$t1 paddd @Xi[1],@Xi[1] paddd $t0,$e # e+=Maj(b,d,c) psrld \$2,$b paddd $t2,$e # e+=rol(a,5) por $tx,@Xi[1] # rol(@X[1],1) por $t1,$b # b=rol(b,30) ___ push(@Xi,shift(@Xi)); } $code.=<<___; .text .extern OPENSSL_ia32cap_P .globl sha1_multi_block .type sha1_multi_block,\@function,3 .align 32 sha1_multi_block: mov OPENSSL_ia32cap_P+4(%rip),%rcx bt \$61,%rcx # check SHA bit jc _shaext_shortcut ___ $code.=<<___ if ($avx); test \$`1<<28`,%ecx jnz _avx_shortcut ___ $code.=<<___; mov %rsp,%rax push %rbx push %rbp ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp movaps %xmm6,(%rsp) movaps %xmm7,0x10(%rsp) movaps %xmm8,0x20(%rsp) movaps %xmm9,0x30(%rsp) movaps %xmm10,-0x78(%rax) movaps %xmm11,-0x68(%rax) movaps %xmm12,-0x58(%rax) movaps %xmm13,-0x48(%rax) movaps %xmm14,-0x38(%rax) movaps %xmm15,-0x28(%rax) ___ $code.=<<___; sub \$`$REG_SZ*18`,%rsp and \$-256,%rsp mov %rax,`$REG_SZ*17`(%rsp) # original %rsp .Lbody: lea K_XX_XX(%rip),$Tbl lea `$REG_SZ*16`(%rsp),%rbx .Loop_grande: mov $num,`$REG_SZ*17+8`(%rsp) # original $num xor $num,$num ___ for($i=0;$i<4;$i++) { $code.=<<___; mov `16*$i+0`($inp),@ptr[$i] # input pointer mov `16*$i+8`($inp),%ecx # number of blocks cmp $num,%ecx cmovg %ecx,$num # find maximum test %ecx,%ecx mov %ecx,`4*$i`(%rbx) # initialize counters cmovle $Tbl,@ptr[$i] # cancel input ___ } $code.=<<___; test $num,$num jz .Ldone movdqu 0x00($ctx),$A # load context lea 128(%rsp),%rax movdqu 0x20($ctx),$B movdqu 0x40($ctx),$C movdqu 0x60($ctx),$D movdqu 0x80($ctx),$E movdqa 0x60($Tbl),$tx # pbswap_mask movdqa -0x20($Tbl),$K # K_00_19 jmp .Loop .align 32 .Loop: ___ for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } $code.=" movdqa 0x00($Tbl),$K\n"; # K_20_39 for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } $code.=" movdqa 0x20($Tbl),$K\n"; # K_40_59 for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } $code.=" movdqa 0x40($Tbl),$K\n"; # K_60_79 for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } $code.=<<___; movdqa (%rbx),@Xi[0] # pull counters mov \$1,%ecx cmp 4*0(%rbx),%ecx # examinte counters pxor $t2,$t2 cmovge $Tbl,@ptr[0] # cancel input cmp 4*1(%rbx),%ecx movdqa @Xi[0],@Xi[1] cmovge $Tbl,@ptr[1] cmp 4*2(%rbx),%ecx pcmpgtd $t2,@Xi[1] # mask value cmovge $Tbl,@ptr[2] cmp 4*3(%rbx),%ecx paddd @Xi[1],@Xi[0] # counters-- cmovge $Tbl,@ptr[3] movdqu 0x00($ctx),$t0 pand @Xi[1],$A movdqu 0x20($ctx),$t1 pand @Xi[1],$B paddd $t0,$A movdqu 0x40($ctx),$t2 pand @Xi[1],$C paddd $t1,$B movdqu 0x60($ctx),$t3 pand @Xi[1],$D paddd $t2,$C movdqu 0x80($ctx),$tx pand @Xi[1],$E movdqu $A,0x00($ctx) paddd $t3,$D movdqu $B,0x20($ctx) paddd $tx,$E movdqu $C,0x40($ctx) movdqu $D,0x60($ctx) movdqu $E,0x80($ctx) movdqa @Xi[0],(%rbx) # save counters movdqa 0x60($Tbl),$tx # pbswap_mask movdqa -0x20($Tbl),$K # K_00_19 dec $num jnz .Loop mov `$REG_SZ*17+8`(%rsp),$num lea $REG_SZ($ctx),$ctx lea `16*$REG_SZ/4`($inp),$inp dec $num jnz .Loop_grande .Ldone: mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp ___ $code.=<<___ if ($win64); movaps -0xb8(%rax),%xmm6 movaps -0xa8(%rax),%xmm7 movaps -0x98(%rax),%xmm8 movaps -0x88(%rax),%xmm9 movaps -0x78(%rax),%xmm10 movaps -0x68(%rax),%xmm11 movaps -0x58(%rax),%xmm12 movaps -0x48(%rax),%xmm13 movaps -0x38(%rax),%xmm14 movaps -0x28(%rax),%xmm15 ___ $code.=<<___; mov -16(%rax),%rbp mov -8(%rax),%rbx lea (%rax),%rsp .Lepilogue: ret .size sha1_multi_block,.-sha1_multi_block ___ {{{ my ($ABCD0,$E0,$E0_,$BSWAP,$ABCD1,$E1,$E1_)=map("%xmm$_",(0..3,8..10)); my @MSG0=map("%xmm$_",(4..7)); my @MSG1=map("%xmm$_",(11..14)); $code.=<<___; .type sha1_multi_block_shaext,\@function,3 .align 32 sha1_multi_block_shaext: _shaext_shortcut: mov %rsp,%rax push %rbx push %rbp ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp movaps %xmm6,(%rsp) movaps %xmm7,0x10(%rsp) movaps %xmm8,0x20(%rsp) movaps %xmm9,0x30(%rsp) movaps %xmm10,-0x78(%rax) movaps %xmm11,-0x68(%rax) movaps %xmm12,-0x58(%rax) movaps %xmm13,-0x48(%rax) movaps %xmm14,-0x38(%rax) movaps %xmm15,-0x28(%rax) ___ $code.=<<___; sub \$`$REG_SZ*18`,%rsp shl \$1,$num # we process pair at a time and \$-256,%rsp lea 0x40($ctx),$ctx # size optimization mov %rax,`$REG_SZ*17`(%rsp) # original %rsp .Lbody_shaext: lea `$REG_SZ*16`(%rsp),%rbx movdqa K_XX_XX+0x80(%rip),$BSWAP # byte-n-word swap .Loop_grande_shaext: mov $num,`$REG_SZ*17+8`(%rsp) # orignal $num xor $num,$num ___ for($i=0;$i<2;$i++) { $code.=<<___; mov `16*$i+0`($inp),@ptr[$i] # input pointer mov `16*$i+8`($inp),%ecx # number of blocks cmp $num,%ecx cmovg %ecx,$num # find maximum test %ecx,%ecx mov %ecx,`4*$i`(%rbx) # initialize counters cmovle %rsp,@ptr[$i] # cancel input ___ } $code.=<<___; test $num,$num jz .Ldone_shaext movq 0x00-0x40($ctx),$ABCD0 # a1.a0 movq 0x20-0x40($ctx),@MSG0[0]# b1.b0 movq 0x40-0x40($ctx),@MSG0[1]# c1.c0 movq 0x60-0x40($ctx),@MSG0[2]# d1.d0 movq 0x80-0x40($ctx),@MSG0[3]# e1.e0 punpckldq @MSG0[0],$ABCD0 # b1.a1.b0.a0 punpckldq @MSG0[2],@MSG0[1] # d1.c1.d0.c0 movdqa $ABCD0,$ABCD1 punpcklqdq @MSG0[1],$ABCD0 # d0.c0.b0.a0 punpckhqdq @MSG0[1],$ABCD1 # d1.c1.b1.a1 pshufd \$0b00111111,@MSG0[3],$E0 pshufd \$0b01111111,@MSG0[3],$E1 pshufd \$0b00011011,$ABCD0,$ABCD0 pshufd \$0b00011011,$ABCD1,$ABCD1 jmp .Loop_shaext .align 32 .Loop_shaext: movdqu 0x00(@ptr[0]),@MSG0[0] movdqu 0x00(@ptr[1]),@MSG1[0] movdqu 0x10(@ptr[0]),@MSG0[1] movdqu 0x10(@ptr[1]),@MSG1[1] movdqu 0x20(@ptr[0]),@MSG0[2] pshufb $BSWAP,@MSG0[0] movdqu 0x20(@ptr[1]),@MSG1[2] pshufb $BSWAP,@MSG1[0] movdqu 0x30(@ptr[0]),@MSG0[3] lea 0x40(@ptr[0]),@ptr[0] pshufb $BSWAP,@MSG0[1] movdqu 0x30(@ptr[1]),@MSG1[3] lea 0x40(@ptr[1]),@ptr[1] pshufb $BSWAP,@MSG1[1] movdqa $E0,0x50(%rsp) # offload paddd @MSG0[0],$E0 movdqa $E1,0x70(%rsp) paddd @MSG1[0],$E1 movdqa $ABCD0,0x40(%rsp) # offload movdqa $ABCD0,$E0_ movdqa $ABCD1,0x60(%rsp) movdqa $ABCD1,$E1_ sha1rnds4 \$0,$E0,$ABCD0 # 0-3 sha1nexte @MSG0[1],$E0_ sha1rnds4 \$0,$E1,$ABCD1 # 0-3 sha1nexte @MSG1[1],$E1_ pshufb $BSWAP,@MSG0[2] prefetcht0 127(@ptr[0]) sha1msg1 @MSG0[1],@MSG0[0] pshufb $BSWAP,@MSG1[2] prefetcht0 127(@ptr[1]) sha1msg1 @MSG1[1],@MSG1[0] pshufb $BSWAP,@MSG0[3] movdqa $ABCD0,$E0 pshufb $BSWAP,@MSG1[3] movdqa $ABCD1,$E1 sha1rnds4 \$0,$E0_,$ABCD0 # 4-7 sha1nexte @MSG0[2],$E0 sha1rnds4 \$0,$E1_,$ABCD1 # 4-7 sha1nexte @MSG1[2],$E1 pxor @MSG0[2],@MSG0[0] sha1msg1 @MSG0[2],@MSG0[1] pxor @MSG1[2],@MSG1[0] sha1msg1 @MSG1[2],@MSG1[1] ___ for($i=2;$i<20-4;$i++) { $code.=<<___; movdqa $ABCD0,$E0_ movdqa $ABCD1,$E1_ sha1rnds4 \$`int($i/5)`,$E0,$ABCD0 # 8-11 sha1nexte @MSG0[3],$E0_ sha1rnds4 \$`int($i/5)`,$E1,$ABCD1 # 8-11 sha1nexte @MSG1[3],$E1_ sha1msg2 @MSG0[3],@MSG0[0] sha1msg2 @MSG1[3],@MSG1[0] pxor @MSG0[3],@MSG0[1] sha1msg1 @MSG0[3],@MSG0[2] pxor @MSG1[3],@MSG1[1] sha1msg1 @MSG1[3],@MSG1[2] ___ ($E0,$E0_)=($E0_,$E0); ($E1,$E1_)=($E1_,$E1); push(@MSG0,shift(@MSG0)); push(@MSG1,shift(@MSG1)); } $code.=<<___; movdqa $ABCD0,$E0_ movdqa $ABCD1,$E1_ sha1rnds4 \$3,$E0,$ABCD0 # 64-67 sha1nexte @MSG0[3],$E0_ sha1rnds4 \$3,$E1,$ABCD1 # 64-67 sha1nexte @MSG1[3],$E1_ sha1msg2 @MSG0[3],@MSG0[0] sha1msg2 @MSG1[3],@MSG1[0] pxor @MSG0[3],@MSG0[1] pxor @MSG1[3],@MSG1[1] mov \$1,%ecx pxor @MSG0[2],@MSG0[2] # zero cmp 4*0(%rbx),%ecx # examine counters cmovge %rsp,@ptr[0] # cancel input movdqa $ABCD0,$E0 movdqa $ABCD1,$E1 sha1rnds4 \$3,$E0_,$ABCD0 # 68-71 sha1nexte @MSG0[0],$E0 sha1rnds4 \$3,$E1_,$ABCD1 # 68-71 sha1nexte @MSG1[0],$E1 sha1msg2 @MSG0[0],@MSG0[1] sha1msg2 @MSG1[0],@MSG1[1] cmp 4*1(%rbx),%ecx cmovge %rsp,@ptr[1] movq (%rbx),@MSG0[0] # pull counters movdqa $ABCD0,$E0_ movdqa $ABCD1,$E1_ sha1rnds4 \$3,$E0,$ABCD0 # 72-75 sha1nexte @MSG0[1],$E0_ sha1rnds4 \$3,$E1,$ABCD1 # 72-75 sha1nexte @MSG1[1],$E1_ pshufd \$0x00,@MSG0[0],@MSG1[2] pshufd \$0x55,@MSG0[0],@MSG1[3] movdqa @MSG0[0],@MSG0[1] pcmpgtd @MSG0[2],@MSG1[2] pcmpgtd @MSG0[2],@MSG1[3] movdqa $ABCD0,$E0 movdqa $ABCD1,$E1 sha1rnds4 \$3,$E0_,$ABCD0 # 76-79 sha1nexte $MSG0[2],$E0 sha1rnds4 \$3,$E1_,$ABCD1 # 76-79 sha1nexte $MSG0[2],$E1 pcmpgtd @MSG0[2],@MSG0[1] # counter mask pand @MSG1[2],$ABCD0 pand @MSG1[2],$E0 pand @MSG1[3],$ABCD1 pand @MSG1[3],$E1 paddd @MSG0[1],@MSG0[0] # counters-- paddd 0x40(%rsp),$ABCD0 paddd 0x50(%rsp),$E0 paddd 0x60(%rsp),$ABCD1 paddd 0x70(%rsp),$E1 movq @MSG0[0],(%rbx) # save counters dec $num jnz .Loop_shaext mov `$REG_SZ*17+8`(%rsp),$num pshufd \$0b00011011,$ABCD0,$ABCD0 pshufd \$0b00011011,$ABCD1,$ABCD1 movdqa $ABCD0,@MSG0[0] punpckldq $ABCD1,$ABCD0 # b1.b0.a1.a0 punpckhdq $ABCD1,@MSG0[0] # d1.d0.c1.c0 punpckhdq $E1,$E0 # e1.e0.xx.xx movq $ABCD0,0x00-0x40($ctx) # a1.a0 psrldq \$8,$ABCD0 movq @MSG0[0],0x40-0x40($ctx)# c1.c0 psrldq \$8,@MSG0[0] movq $ABCD0,0x20-0x40($ctx) # b1.b0 psrldq \$8,$E0 movq @MSG0[0],0x60-0x40($ctx)# d1.d0 movq $E0,0x80-0x40($ctx) # e1.e0 lea `$REG_SZ/2`($ctx),$ctx lea `16*2`($inp),$inp dec $num jnz .Loop_grande_shaext .Ldone_shaext: #mov `$REG_SZ*17`(%rsp),%rax # original %rsp ___ $code.=<<___ if ($win64); movaps -0xb8(%rax),%xmm6 movaps -0xa8(%rax),%xmm7 movaps -0x98(%rax),%xmm8 movaps -0x88(%rax),%xmm9 movaps -0x78(%rax),%xmm10 movaps -0x68(%rax),%xmm11 movaps -0x58(%rax),%xmm12 movaps -0x48(%rax),%xmm13 movaps -0x38(%rax),%xmm14 movaps -0x28(%rax),%xmm15 ___ $code.=<<___; mov -16(%rax),%rbp mov -8(%rax),%rbx lea (%rax),%rsp .Lepilogue_shaext: ret .size sha1_multi_block_shaext,.-sha1_multi_block_shaext ___ }}} if ($avx) {{{ sub BODY_00_19_avx { my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; my $k=$i+2; my $vpack = $REG_SZ==16 ? "vpunpckldq" : "vinserti128"; my $ptr_n = $REG_SZ==16 ? @ptr[1] : @ptr[4]; $code.=<<___ if ($i==0 && $REG_SZ==16); vmovd (@ptr[0]),@Xi[0] lea `16*4`(@ptr[0]),@ptr[0] vmovd (@ptr[1]),@Xi[2] # borrow Xi[2] lea `16*4`(@ptr[1]),@ptr[1] vpinsrd \$1,(@ptr[2]),@Xi[0],@Xi[0] lea `16*4`(@ptr[2]),@ptr[2] vpinsrd \$1,(@ptr[3]),@Xi[2],@Xi[2] lea `16*4`(@ptr[3]),@ptr[3] vmovd `4*$j-16*4`(@ptr[0]),@Xi[1] vpunpckldq @Xi[2],@Xi[0],@Xi[0] vmovd `4*$j-16*4`($ptr_n),$t3 vpshufb $tx,@Xi[0],@Xi[0] ___ $code.=<<___ if ($i<15 && $REG_SZ==16); # just load input vpinsrd \$1,`4*$j-16*4`(@ptr[2]),@Xi[1],@Xi[1] vpinsrd \$1,`4*$j-16*4`(@ptr[3]),$t3,$t3 ___ $code.=<<___ if ($i==0 && $REG_SZ==32); vmovd (@ptr[0]),@Xi[0] lea `16*4`(@ptr[0]),@ptr[0] vmovd (@ptr[4]),@Xi[2] # borrow Xi[2] lea `16*4`(@ptr[4]),@ptr[4] vmovd (@ptr[1]),$t2 lea `16*4`(@ptr[1]),@ptr[1] vmovd (@ptr[5]),$t1 lea `16*4`(@ptr[5]),@ptr[5] vpinsrd \$1,(@ptr[2]),@Xi[0],@Xi[0] lea `16*4`(@ptr[2]),@ptr[2] vpinsrd \$1,(@ptr[6]),@Xi[2],@Xi[2] lea `16*4`(@ptr[6]),@ptr[6] vpinsrd \$1,(@ptr[3]),$t2,$t2 lea `16*4`(@ptr[3]),@ptr[3] vpunpckldq $t2,@Xi[0],@Xi[0] vpinsrd \$1,(@ptr[7]),$t1,$t1 lea `16*4`(@ptr[7]),@ptr[7] vpunpckldq $t1,@Xi[2],@Xi[2] vmovd `4*$j-16*4`(@ptr[0]),@Xi[1] vinserti128 @Xi[2],@Xi[0],@Xi[0] vmovd `4*$j-16*4`($ptr_n),$t3 vpshufb $tx,@Xi[0],@Xi[0] ___ $code.=<<___ if ($i<15 && $REG_SZ==32); # just load input vmovd `4*$j-16*4`(@ptr[1]),$t2 vmovd `4*$j-16*4`(@ptr[5]),$t1 vpinsrd \$1,`4*$j-16*4`(@ptr[2]),@Xi[1],@Xi[1] vpinsrd \$1,`4*$j-16*4`(@ptr[6]),$t3,$t3 vpinsrd \$1,`4*$j-16*4`(@ptr[3]),$t2,$t2 vpunpckldq $t2,@Xi[1],@Xi[1] vpinsrd \$1,`4*$j-16*4`(@ptr[7]),$t1,$t1 vpunpckldq $t1,$t3,$t3 ___ $code.=<<___ if ($i<14); vpaddd $K,$e,$e # e+=K_00_19 vpslld \$5,$a,$t2 vpandn $d,$b,$t1 vpand $c,$b,$t0 vmovdqa @Xi[0],`&Xi_off($i)` vpaddd @Xi[0],$e,$e # e+=X[i] $vpack $t3,@Xi[1],@Xi[1] vpsrld \$27,$a,$t3 vpxor $t1,$t0,$t0 # Ch(b,c,d) vmovd `4*$k-16*4`(@ptr[0]),@Xi[2] vpslld \$30,$b,$t1 vpor $t3,$t2,$t2 # rol(a,5) vmovd `4*$k-16*4`($ptr_n),$t3 vpaddd $t0,$e,$e # e+=Ch(b,c,d) vpsrld \$2,$b,$b vpaddd $t2,$e,$e # e+=rol(a,5) vpshufb $tx,@Xi[1],@Xi[1] vpor $t1,$b,$b # b=rol(b,30) ___ $code.=<<___ if ($i==14); vpaddd $K,$e,$e # e+=K_00_19 prefetcht0 63(@ptr[0]) vpslld \$5,$a,$t2 vpandn $d,$b,$t1 vpand $c,$b,$t0 vmovdqa @Xi[0],`&Xi_off($i)` vpaddd @Xi[0],$e,$e # e+=X[i] $vpack $t3,@Xi[1],@Xi[1] vpsrld \$27,$a,$t3 prefetcht0 63(@ptr[1]) vpxor $t1,$t0,$t0 # Ch(b,c,d) vpslld \$30,$b,$t1 vpor $t3,$t2,$t2 # rol(a,5) prefetcht0 63(@ptr[2]) vpaddd $t0,$e,$e # e+=Ch(b,c,d) vpsrld \$2,$b,$b vpaddd $t2,$e,$e # e+=rol(a,5) prefetcht0 63(@ptr[3]) vpshufb $tx,@Xi[1],@Xi[1] vpor $t1,$b,$b # b=rol(b,30) ___ $code.=<<___ if ($i>=13 && $i<15); vmovdqa `&Xi_off($j+2)`,@Xi[3] # preload "X[2]" ___ $code.=<<___ if ($i>=15); # apply Xupdate vpxor @Xi[-2],@Xi[1],@Xi[1] # "X[13]" vmovdqa `&Xi_off($j+2)`,@Xi[3] # "X[2]" vpaddd $K,$e,$e # e+=K_00_19 vpslld \$5,$a,$t2 vpandn $d,$b,$t1 `"prefetcht0 63(@ptr[4])" if ($i==15 && $REG_SZ==32)` vpand $c,$b,$t0 vmovdqa @Xi[0],`&Xi_off($i)` vpaddd @Xi[0],$e,$e # e+=X[i] vpxor `&Xi_off($j+8)`,@Xi[1],@Xi[1] vpsrld \$27,$a,$t3 vpxor $t1,$t0,$t0 # Ch(b,c,d) vpxor @Xi[3],@Xi[1],@Xi[1] `"prefetcht0 63(@ptr[5])" if ($i==15 && $REG_SZ==32)` vpslld \$30,$b,$t1 vpor $t3,$t2,$t2 # rol(a,5) vpaddd $t0,$e,$e # e+=Ch(b,c,d) `"prefetcht0 63(@ptr[6])" if ($i==15 && $REG_SZ==32)` vpsrld \$31,@Xi[1],$tx vpaddd @Xi[1],@Xi[1],@Xi[1] vpsrld \$2,$b,$b `"prefetcht0 63(@ptr[7])" if ($i==15 && $REG_SZ==32)` vpaddd $t2,$e,$e # e+=rol(a,5) vpor $tx,@Xi[1],@Xi[1] # rol \$1,@Xi[1] vpor $t1,$b,$b # b=rol(b,30) ___ push(@Xi,shift(@Xi)); } sub BODY_20_39_avx { my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; $code.=<<___ if ($i<79); vpxor @Xi[-2],@Xi[1],@Xi[1] # "X[13]" vmovdqa `&Xi_off($j+2)`,@Xi[3] # "X[2]" vpslld \$5,$a,$t2 vpaddd $K,$e,$e # e+=K_20_39 vpxor $b,$d,$t0 ___ $code.=<<___ if ($i<72); vmovdqa @Xi[0],`&Xi_off($i)` ___ $code.=<<___ if ($i<79); vpaddd @Xi[0],$e,$e # e+=X[i] vpxor `&Xi_off($j+8)`,@Xi[1],@Xi[1] vpsrld \$27,$a,$t3 vpxor $c,$t0,$t0 # Parity(b,c,d) vpxor @Xi[3],@Xi[1],@Xi[1] vpslld \$30,$b,$t1 vpor $t3,$t2,$t2 # rol(a,5) vpaddd $t0,$e,$e # e+=Parity(b,c,d) vpsrld \$31,@Xi[1],$tx vpaddd @Xi[1],@Xi[1],@Xi[1] vpsrld \$2,$b,$b vpaddd $t2,$e,$e # e+=rol(a,5) vpor $tx,@Xi[1],@Xi[1] # rol(@Xi[1],1) vpor $t1,$b,$b # b=rol(b,30) ___ $code.=<<___ if ($i==79); vpslld \$5,$a,$t2 vpaddd $K,$e,$e # e+=K_20_39 vpxor $b,$d,$t0 vpsrld \$27,$a,$t3 vpaddd @Xi[0],$e,$e # e+=X[i] vpxor $c,$t0,$t0 # Parity(b,c,d) vpslld \$30,$b,$t1 vpor $t3,$t2,$t2 # rol(a,5) vpaddd $t0,$e,$e # e+=Parity(b,c,d) vpsrld \$2,$b,$b vpaddd $t2,$e,$e # e+=rol(a,5) vpor $t1,$b,$b # b=rol(b,30) ___ push(@Xi,shift(@Xi)); } sub BODY_40_59_avx { my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; $code.=<<___; vpxor @Xi[-2],@Xi[1],@Xi[1] # "X[13]" vmovdqa `&Xi_off($j+2)`,@Xi[3] # "X[2]" vpaddd $K,$e,$e # e+=K_40_59 vpslld \$5,$a,$t2 vpand $c,$d,$t1 vpxor `&Xi_off($j+8)`,@Xi[1],@Xi[1] vpaddd $t1,$e,$e vpsrld \$27,$a,$t3 vpxor $c,$d,$t0 vpxor @Xi[3],@Xi[1],@Xi[1] vmovdqu @Xi[0],`&Xi_off($i)` vpaddd @Xi[0],$e,$e # e+=X[i] vpor $t3,$t2,$t2 # rol(a,5) vpsrld \$31,@Xi[1],$tx vpand $b,$t0,$t0 vpaddd @Xi[1],@Xi[1],@Xi[1] vpslld \$30,$b,$t1 vpaddd $t0,$e,$e # e+=Maj(b,d,c) vpsrld \$2,$b,$b vpaddd $t2,$e,$e # e+=rol(a,5) vpor $tx,@Xi[1],@Xi[1] # rol(@X[1],1) vpor $t1,$b,$b # b=rol(b,30) ___ push(@Xi,shift(@Xi)); } $code.=<<___; .type sha1_multi_block_avx,\@function,3 .align 32 sha1_multi_block_avx: _avx_shortcut: ___ $code.=<<___ if ($avx>1); shr \$32,%rcx cmp \$2,$num jb .Lavx test \$`1<<5`,%ecx jnz _avx2_shortcut jmp .Lavx .align 32 .Lavx: ___ $code.=<<___; mov %rsp,%rax push %rbx push %rbp ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp movaps %xmm6,(%rsp) movaps %xmm7,0x10(%rsp) movaps %xmm8,0x20(%rsp) movaps %xmm9,0x30(%rsp) movaps %xmm10,-0x78(%rax) movaps %xmm11,-0x68(%rax) movaps %xmm12,-0x58(%rax) movaps %xmm13,-0x48(%rax) movaps %xmm14,-0x38(%rax) movaps %xmm15,-0x28(%rax) ___ $code.=<<___; sub \$`$REG_SZ*18`, %rsp and \$-256,%rsp mov %rax,`$REG_SZ*17`(%rsp) # original %rsp .Lbody_avx: lea K_XX_XX(%rip),$Tbl lea `$REG_SZ*16`(%rsp),%rbx vzeroupper .Loop_grande_avx: mov $num,`$REG_SZ*17+8`(%rsp) # original $num xor $num,$num ___ for($i=0;$i<4;$i++) { $code.=<<___; mov `16*$i+0`($inp),@ptr[$i] # input pointer mov `16*$i+8`($inp),%ecx # number of blocks cmp $num,%ecx cmovg %ecx,$num # find maximum test %ecx,%ecx mov %ecx,`4*$i`(%rbx) # initialize counters cmovle $Tbl,@ptr[$i] # cancel input ___ } $code.=<<___; test $num,$num jz .Ldone_avx vmovdqu 0x00($ctx),$A # load context lea 128(%rsp),%rax vmovdqu 0x20($ctx),$B vmovdqu 0x40($ctx),$C vmovdqu 0x60($ctx),$D vmovdqu 0x80($ctx),$E vmovdqu 0x60($Tbl),$tx # pbswap_mask jmp .Loop_avx .align 32 .Loop_avx: ___ $code.=" vmovdqa -0x20($Tbl),$K\n"; # K_00_19 for($i=0;$i<20;$i++) { &BODY_00_19_avx($i,@V); unshift(@V,pop(@V)); } $code.=" vmovdqa 0x00($Tbl),$K\n"; # K_20_39 for(;$i<40;$i++) { &BODY_20_39_avx($i,@V); unshift(@V,pop(@V)); } $code.=" vmovdqa 0x20($Tbl),$K\n"; # K_40_59 for(;$i<60;$i++) { &BODY_40_59_avx($i,@V); unshift(@V,pop(@V)); } $code.=" vmovdqa 0x40($Tbl),$K\n"; # K_60_79 for(;$i<80;$i++) { &BODY_20_39_avx($i,@V); unshift(@V,pop(@V)); } $code.=<<___; mov \$1,%ecx ___ for($i=0;$i<4;$i++) { $code.=<<___; cmp `4*$i`(%rbx),%ecx # examine counters cmovge $Tbl,@ptr[$i] # cancel input ___ } $code.=<<___; vmovdqu (%rbx),$t0 # pull counters vpxor $t2,$t2,$t2 vmovdqa $t0,$t1 vpcmpgtd $t2,$t1,$t1 # mask value vpaddd $t1,$t0,$t0 # counters-- vpand $t1,$A,$A vpand $t1,$B,$B vpaddd 0x00($ctx),$A,$A vpand $t1,$C,$C vpaddd 0x20($ctx),$B,$B vpand $t1,$D,$D vpaddd 0x40($ctx),$C,$C vpand $t1,$E,$E vpaddd 0x60($ctx),$D,$D vpaddd 0x80($ctx),$E,$E vmovdqu $A,0x00($ctx) vmovdqu $B,0x20($ctx) vmovdqu $C,0x40($ctx) vmovdqu $D,0x60($ctx) vmovdqu $E,0x80($ctx) vmovdqu $t0,(%rbx) # save counters vmovdqu 0x60($Tbl),$tx # pbswap_mask dec $num jnz .Loop_avx mov `$REG_SZ*17+8`(%rsp),$num lea $REG_SZ($ctx),$ctx lea `16*$REG_SZ/4`($inp),$inp dec $num jnz .Loop_grande_avx .Ldone_avx: mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp vzeroupper ___ $code.=<<___ if ($win64); movaps -0xb8(%rax),%xmm6 movaps -0xa8(%rax),%xmm7 movaps -0x98(%rax),%xmm8 movaps -0x88(%rax),%xmm9 movaps -0x78(%rax),%xmm10 movaps -0x68(%rax),%xmm11 movaps -0x58(%rax),%xmm12 movaps -0x48(%rax),%xmm13 movaps -0x38(%rax),%xmm14 movaps -0x28(%rax),%xmm15 ___ $code.=<<___; mov -16(%rax),%rbp mov -8(%rax),%rbx lea (%rax),%rsp .Lepilogue_avx: ret .size sha1_multi_block_avx,.-sha1_multi_block_avx ___ if ($avx>1) { $code =~ s/\`([^\`]*)\`/eval $1/gem; $REG_SZ=32; @ptr=map("%r$_",(12..15,8..11)); @V=($A,$B,$C,$D,$E)=map("%ymm$_",(0..4)); ($t0,$t1,$t2,$t3,$tx)=map("%ymm$_",(5..9)); @Xi=map("%ymm$_",(10..14)); $K="%ymm15"; $code.=<<___; .type sha1_multi_block_avx2,\@function,3 .align 32 sha1_multi_block_avx2: _avx2_shortcut: mov %rsp,%rax push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp movaps %xmm6,(%rsp) movaps %xmm7,0x10(%rsp) movaps %xmm8,0x20(%rsp) movaps %xmm9,0x30(%rsp) movaps %xmm10,0x40(%rsp) movaps %xmm11,0x50(%rsp) movaps %xmm12,-0x78(%rax) movaps %xmm13,-0x68(%rax) movaps %xmm14,-0x58(%rax) movaps %xmm15,-0x48(%rax) ___ $code.=<<___; sub \$`$REG_SZ*18`, %rsp and \$-256,%rsp mov %rax,`$REG_SZ*17`(%rsp) # original %rsp .Lbody_avx2: lea K_XX_XX(%rip),$Tbl shr \$1,$num vzeroupper .Loop_grande_avx2: mov $num,`$REG_SZ*17+8`(%rsp) # original $num xor $num,$num lea `$REG_SZ*16`(%rsp),%rbx ___ for($i=0;$i<8;$i++) { $code.=<<___; mov `16*$i+0`($inp),@ptr[$i] # input pointer mov `16*$i+8`($inp),%ecx # number of blocks cmp $num,%ecx cmovg %ecx,$num # find maximum test %ecx,%ecx mov %ecx,`4*$i`(%rbx) # initialize counters cmovle $Tbl,@ptr[$i] # cancel input ___ } $code.=<<___; vmovdqu 0x00($ctx),$A # load context lea 128(%rsp),%rax vmovdqu 0x20($ctx),$B lea 256+128(%rsp),%rbx vmovdqu 0x40($ctx),$C vmovdqu 0x60($ctx),$D vmovdqu 0x80($ctx),$E vmovdqu 0x60($Tbl),$tx # pbswap_mask jmp .Loop_avx2 .align 32 .Loop_avx2: ___ $code.=" vmovdqa -0x20($Tbl),$K\n"; # K_00_19 for($i=0;$i<20;$i++) { &BODY_00_19_avx($i,@V); unshift(@V,pop(@V)); } $code.=" vmovdqa 0x00($Tbl),$K\n"; # K_20_39 for(;$i<40;$i++) { &BODY_20_39_avx($i,@V); unshift(@V,pop(@V)); } $code.=" vmovdqa 0x20($Tbl),$K\n"; # K_40_59 for(;$i<60;$i++) { &BODY_40_59_avx($i,@V); unshift(@V,pop(@V)); } $code.=" vmovdqa 0x40($Tbl),$K\n"; # K_60_79 for(;$i<80;$i++) { &BODY_20_39_avx($i,@V); unshift(@V,pop(@V)); } $code.=<<___; mov \$1,%ecx lea `$REG_SZ*16`(%rsp),%rbx ___ for($i=0;$i<8;$i++) { $code.=<<___; cmp `4*$i`(%rbx),%ecx # examine counters cmovge $Tbl,@ptr[$i] # cancel input ___ } $code.=<<___; vmovdqu (%rbx),$t0 # pull counters vpxor $t2,$t2,$t2 vmovdqa $t0,$t1 vpcmpgtd $t2,$t1,$t1 # mask value vpaddd $t1,$t0,$t0 # counters-- vpand $t1,$A,$A vpand $t1,$B,$B vpaddd 0x00($ctx),$A,$A vpand $t1,$C,$C vpaddd 0x20($ctx),$B,$B vpand $t1,$D,$D vpaddd 0x40($ctx),$C,$C vpand $t1,$E,$E vpaddd 0x60($ctx),$D,$D vpaddd 0x80($ctx),$E,$E vmovdqu $A,0x00($ctx) vmovdqu $B,0x20($ctx) vmovdqu $C,0x40($ctx) vmovdqu $D,0x60($ctx) vmovdqu $E,0x80($ctx) vmovdqu $t0,(%rbx) # save counters lea 256+128(%rsp),%rbx vmovdqu 0x60($Tbl),$tx # pbswap_mask dec $num jnz .Loop_avx2 #mov `$REG_SZ*17+8`(%rsp),$num #lea $REG_SZ($ctx),$ctx #lea `16*$REG_SZ/4`($inp),$inp #dec $num #jnz .Loop_grande_avx2 .Ldone_avx2: mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp vzeroupper ___ $code.=<<___ if ($win64); movaps -0xd8(%rax),%xmm6 movaps -0xc8(%rax),%xmm7 movaps -0xb8(%rax),%xmm8 movaps -0xa8(%rax),%xmm9 movaps -0x98(%rax),%xmm10 movaps -0x88(%rax),%xmm11 movaps -0x78(%rax),%xmm12 movaps -0x68(%rax),%xmm13 movaps -0x58(%rax),%xmm14 movaps -0x48(%rax),%xmm15 ___ $code.=<<___; mov -48(%rax),%r15 mov -40(%rax),%r14 mov -32(%rax),%r13 mov -24(%rax),%r12 mov -16(%rax),%rbp mov -8(%rax),%rbx lea (%rax),%rsp .Lepilogue_avx2: ret .size sha1_multi_block_avx2,.-sha1_multi_block_avx2 ___ } }}} $code.=<<___; .align 256 .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 # K_00_19 .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 # K_00_19 K_XX_XX: .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 # K_20_39 .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 # K_20_39 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc # K_40_59 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc # K_40_59 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 # K_60_79 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 # K_60_79 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f # pbswap .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f # pbswap .byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 .asciz "SHA1 multi-block transform for x86_64, CRYPTOGAMS by " ___ if ($win64) { # EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, # CONTEXT *context,DISPATCHER_CONTEXT *disp) $rec="%rcx"; $frame="%rdx"; $context="%r8"; $disp="%r9"; $code.=<<___; .extern __imp_RtlVirtualUnwind .type se_handler,\@abi-omnipotent .align 16 se_handler: push %rsi push %rdi push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 pushfq sub \$64,%rsp mov 120($context),%rax # pull context->Rax mov 248($context),%rbx # pull context->Rip mov 8($disp),%rsi # disp->ImageBase mov 56($disp),%r11 # disp->HandlerData mov 0(%r11),%r10d # HandlerData[0] lea (%rsi,%r10),%r10 # end of prologue label cmp %r10,%rbx # context->Rip<.Lbody jb .Lin_prologue mov 152($context),%rax # pull context->Rsp mov 4(%r11),%r10d # HandlerData[1] lea (%rsi,%r10),%r10 # epilogue label cmp %r10,%rbx # context->Rip>=.Lepilogue jae .Lin_prologue mov `16*17`(%rax),%rax # pull saved stack pointer mov -8(%rax),%rbx mov -16(%rax),%rbp mov %rbx,144($context) # restore context->Rbx mov %rbp,160($context) # restore context->Rbp lea -24-10*16(%rax),%rsi lea 512($context),%rdi # &context.Xmm6 mov \$20,%ecx .long 0xa548f3fc # cld; rep movsq .Lin_prologue: mov 8(%rax),%rdi mov 16(%rax),%rsi mov %rax,152($context) # restore context->Rsp mov %rsi,168($context) # restore context->Rsi mov %rdi,176($context) # restore context->Rdi mov 40($disp),%rdi # disp->ContextRecord mov $context,%rsi # context mov \$154,%ecx # sizeof(CONTEXT) .long 0xa548f3fc # cld; rep movsq mov $disp,%rsi xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER mov 8(%rsi),%rdx # arg2, disp->ImageBase mov 0(%rsi),%r8 # arg3, disp->ControlPc mov 16(%rsi),%r9 # arg4, disp->FunctionEntry mov 40(%rsi),%r10 # disp->ContextRecord lea 56(%rsi),%r11 # &disp->HandlerData lea 24(%rsi),%r12 # &disp->EstablisherFrame mov %r10,32(%rsp) # arg5 mov %r11,40(%rsp) # arg6 mov %r12,48(%rsp) # arg7 mov %rcx,56(%rsp) # arg8, (NULL) call *__imp_RtlVirtualUnwind(%rip) mov \$1,%eax # ExceptionContinueSearch add \$64,%rsp popfq pop %r15 pop %r14 pop %r13 pop %r12 pop %rbp pop %rbx pop %rdi pop %rsi ret .size se_handler,.-se_handler ___ $code.=<<___ if ($avx>1); .type avx2_handler,\@abi-omnipotent .align 16 avx2_handler: push %rsi push %rdi push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 pushfq sub \$64,%rsp mov 120($context),%rax # pull context->Rax mov 248($context),%rbx # pull context->Rip mov 8($disp),%rsi # disp->ImageBase mov 56($disp),%r11 # disp->HandlerData mov 0(%r11),%r10d # HandlerData[0] lea (%rsi,%r10),%r10 # end of prologue label cmp %r10,%rbx # context->RipRsp mov 4(%r11),%r10d # HandlerData[1] lea (%rsi,%r10),%r10 # epilogue label cmp %r10,%rbx # context->Rip>=epilogue label jae .Lin_prologue mov `32*17`($context),%rax # pull saved stack pointer mov -8(%rax),%rbx mov -16(%rax),%rbp mov -24(%rax),%r12 mov -32(%rax),%r13 mov -40(%rax),%r14 mov -48(%rax),%r15 mov %rbx,144($context) # restore context->Rbx mov %rbp,160($context) # restore context->Rbp mov %r12,216($context) # restore cotnext->R12 mov %r13,224($context) # restore cotnext->R13 mov %r14,232($context) # restore cotnext->R14 mov %r15,240($context) # restore cotnext->R15 lea -56-10*16(%rax),%rsi lea 512($context),%rdi # &context.Xmm6 mov \$20,%ecx .long 0xa548f3fc # cld; rep movsq jmp .Lin_prologue .size avx2_handler,.-avx2_handler ___ $code.=<<___; .section .pdata .align 4 .rva .LSEH_begin_sha1_multi_block .rva .LSEH_end_sha1_multi_block .rva .LSEH_info_sha1_multi_block .rva .LSEH_begin_sha1_multi_block_shaext .rva .LSEH_end_sha1_multi_block_shaext .rva .LSEH_info_sha1_multi_block_shaext ___ $code.=<<___ if ($avx); .rva .LSEH_begin_sha1_multi_block_avx .rva .LSEH_end_sha1_multi_block_avx .rva .LSEH_info_sha1_multi_block_avx ___ $code.=<<___ if ($avx>1); .rva .LSEH_begin_sha1_multi_block_avx2 .rva .LSEH_end_sha1_multi_block_avx2 .rva .LSEH_info_sha1_multi_block_avx2 ___ $code.=<<___; .section .xdata .align 8 .LSEH_info_sha1_multi_block: .byte 9,0,0,0 .rva se_handler .rva .Lbody,.Lepilogue # HandlerData[] .LSEH_info_sha1_multi_block_shaext: .byte 9,0,0,0 .rva se_handler .rva .Lbody_shaext,.Lepilogue_shaext # HandlerData[] ___ $code.=<<___ if ($avx); .LSEH_info_sha1_multi_block_avx: .byte 9,0,0,0 .rva se_handler .rva .Lbody_avx,.Lepilogue_avx # HandlerData[] ___ $code.=<<___ if ($avx>1); .LSEH_info_sha1_multi_block_avx2: .byte 9,0,0,0 .rva avx2_handler .rva .Lbody_avx2,.Lepilogue_avx2 # HandlerData[] ___ } #################################################################### sub rex { local *opcode=shift; my ($dst,$src)=@_; my $rex=0; $rex|=0x04 if ($dst>=8); $rex|=0x01 if ($src>=8); unshift @opcode,$rex|0x40 if ($rex); } sub sha1rnds4 { if (@_[0] =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { my @opcode=(0x0f,0x3a,0xcc); rex(\@opcode,$3,$2); push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M my $c=$1; push @opcode,$c=~/^0/?oct($c):$c; return ".byte\t".join(',',@opcode); } else { return "sha1rnds4\t".@_[0]; } } sub sha1op38 { my $instr = shift; my %opcodelet = ( "sha1nexte" => 0xc8, "sha1msg1" => 0xc9, "sha1msg2" => 0xca ); if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) { my @opcode=(0x0f,0x38); rex(\@opcode,$2,$1); push @opcode,$opcodelet{$instr}; push @opcode,0xc0|($1&7)|(($2&7)<<3); # ModR/M return ".byte\t".join(',',@opcode); } else { return $instr."\t".@_[0]; } } foreach (split("\n",$code)) { s/\`([^\`]*)\`/eval($1)/ge; s/\b(sha1rnds4)\s+(.*)/sha1rnds4($2)/geo or s/\b(sha1[^\s]*)\s+(.*)/sha1op38($1,$2)/geo or s/\b(vmov[dq])\b(.+)%ymm([0-9]+)/$1$2%xmm$3/go or s/\b(vmovdqu)\b(.+)%x%ymm([0-9]+)/$1$2%xmm$3/go or s/\b(vpinsr[qd])\b(.+)%ymm([0-9]+),%ymm([0-9]+)/$1$2%xmm$3,%xmm$4/go or s/\b(vpextr[qd])\b(.+)%ymm([0-9]+)/$1$2%xmm$3/go or s/\b(vinserti128)\b(\s+)%ymm/$1$2\$1,%xmm/go or s/\b(vpbroadcast[qd]\s+)%ymm([0-9]+)/$1%xmm$2/go; print $_,"\n"; } close STDOUT; libntru-0.5/src/sha1.c000066400000000000000000000313341271556312200146340ustar00rootroot00000000000000/* $Id: sha1.c 216 2010-06-08 09:46:57Z tp $ */ /* * SHA-1 implementation. * * ==========================(LICENSE BEGIN)============================ * * Copyright (c) 2007-2010 Projet RNRT SAPHIR * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * ===========================(LICENSE END)============================= * * @author Thomas Pornin */ #include #include #include "sph_sha1.h" #define F(B, C, D) ((((C) ^ (D)) & (B)) ^ (D)) #define G(B, C, D) ((B) ^ (C) ^ (D)) #define H(B, C, D) (((D) & (C)) | (((D) | (C)) & (B))) #define I(B, C, D) G(B, C, D) #define ROTL SPH_ROTL32 #define K1 SPH_C32(0x5A827999) #define K2 SPH_C32(0x6ED9EBA1) #define K3 SPH_C32(0x8F1BBCDC) #define K4 SPH_C32(0xCA62C1D6) static const sph_u32 IV[5] = { SPH_C32(0x67452301), SPH_C32(0xEFCDAB89), SPH_C32(0x98BADCFE), SPH_C32(0x10325476), SPH_C32(0xC3D2E1F0) }; /* * This macro defines the body for a SHA-1 compression function * implementation. The "in" parameter should evaluate, when applied to a * numerical input parameter from 0 to 15, to an expression which yields * the corresponding input block. The "r" parameter should evaluate to * an array or pointer expression designating the array of 5 words which * contains the input and output of the compression function. */ #define SHA1_ROUND_BODY(in, r) do { \ sph_u32 A, B, C, D, E; \ sph_u32 W00, W01, W02, W03, W04, W05, W06, W07; \ sph_u32 W08, W09, W10, W11, W12, W13, W14, W15; \ \ A = (r)[0]; \ B = (r)[1]; \ C = (r)[2]; \ D = (r)[3]; \ E = (r)[4]; \ \ W00 = in(0); \ E = SPH_T32(ROTL(A, 5) + F(B, C, D) + E + W00 + K1); \ B = ROTL(B, 30); \ W01 = in(1); \ D = SPH_T32(ROTL(E, 5) + F(A, B, C) + D + W01 + K1); \ A = ROTL(A, 30); \ W02 = in(2); \ C = SPH_T32(ROTL(D, 5) + F(E, A, B) + C + W02 + K1); \ E = ROTL(E, 30); \ W03 = in(3); \ B = SPH_T32(ROTL(C, 5) + F(D, E, A) + B + W03 + K1); \ D = ROTL(D, 30); \ W04 = in(4); \ A = SPH_T32(ROTL(B, 5) + F(C, D, E) + A + W04 + K1); \ C = ROTL(C, 30); \ W05 = in(5); \ E = SPH_T32(ROTL(A, 5) + F(B, C, D) + E + W05 + K1); \ B = ROTL(B, 30); \ W06 = in(6); \ D = SPH_T32(ROTL(E, 5) + F(A, B, C) + D + W06 + K1); \ A = ROTL(A, 30); \ W07 = in(7); \ C = SPH_T32(ROTL(D, 5) + F(E, A, B) + C + W07 + K1); \ E = ROTL(E, 30); \ W08 = in(8); \ B = SPH_T32(ROTL(C, 5) + F(D, E, A) + B + W08 + K1); \ D = ROTL(D, 30); \ W09 = in(9); \ A = SPH_T32(ROTL(B, 5) + F(C, D, E) + A + W09 + K1); \ C = ROTL(C, 30); \ W10 = in(10); \ E = SPH_T32(ROTL(A, 5) + F(B, C, D) + E + W10 + K1); \ B = ROTL(B, 30); \ W11 = in(11); \ D = SPH_T32(ROTL(E, 5) + F(A, B, C) + D + W11 + K1); \ A = ROTL(A, 30); \ W12 = in(12); \ C = SPH_T32(ROTL(D, 5) + F(E, A, B) + C + W12 + K1); \ E = ROTL(E, 30); \ W13 = in(13); \ B = SPH_T32(ROTL(C, 5) + F(D, E, A) + B + W13 + K1); \ D = ROTL(D, 30); \ W14 = in(14); \ A = SPH_T32(ROTL(B, 5) + F(C, D, E) + A + W14 + K1); \ C = ROTL(C, 30); \ W15 = in(15); \ E = SPH_T32(ROTL(A, 5) + F(B, C, D) + E + W15 + K1); \ B = ROTL(B, 30); \ W00 = ROTL(W13 ^ W08 ^ W02 ^ W00, 1); \ D = SPH_T32(ROTL(E, 5) + F(A, B, C) + D + W00 + K1); \ A = ROTL(A, 30); \ W01 = ROTL(W14 ^ W09 ^ W03 ^ W01, 1); \ C = SPH_T32(ROTL(D, 5) + F(E, A, B) + C + W01 + K1); \ E = ROTL(E, 30); \ W02 = ROTL(W15 ^ W10 ^ W04 ^ W02, 1); \ B = SPH_T32(ROTL(C, 5) + F(D, E, A) + B + W02 + K1); \ D = ROTL(D, 30); \ W03 = ROTL(W00 ^ W11 ^ W05 ^ W03, 1); \ A = SPH_T32(ROTL(B, 5) + F(C, D, E) + A + W03 + K1); \ C = ROTL(C, 30); \ W04 = ROTL(W01 ^ W12 ^ W06 ^ W04, 1); \ E = SPH_T32(ROTL(A, 5) + G(B, C, D) + E + W04 + K2); \ B = ROTL(B, 30); \ W05 = ROTL(W02 ^ W13 ^ W07 ^ W05, 1); \ D = SPH_T32(ROTL(E, 5) + G(A, B, C) + D + W05 + K2); \ A = ROTL(A, 30); \ W06 = ROTL(W03 ^ W14 ^ W08 ^ W06, 1); \ C = SPH_T32(ROTL(D, 5) + G(E, A, B) + C + W06 + K2); \ E = ROTL(E, 30); \ W07 = ROTL(W04 ^ W15 ^ W09 ^ W07, 1); \ B = SPH_T32(ROTL(C, 5) + G(D, E, A) + B + W07 + K2); \ D = ROTL(D, 30); \ W08 = ROTL(W05 ^ W00 ^ W10 ^ W08, 1); \ A = SPH_T32(ROTL(B, 5) + G(C, D, E) + A + W08 + K2); \ C = ROTL(C, 30); \ W09 = ROTL(W06 ^ W01 ^ W11 ^ W09, 1); \ E = SPH_T32(ROTL(A, 5) + G(B, C, D) + E + W09 + K2); \ B = ROTL(B, 30); \ W10 = ROTL(W07 ^ W02 ^ W12 ^ W10, 1); \ D = SPH_T32(ROTL(E, 5) + G(A, B, C) + D + W10 + K2); \ A = ROTL(A, 30); \ W11 = ROTL(W08 ^ W03 ^ W13 ^ W11, 1); \ C = SPH_T32(ROTL(D, 5) + G(E, A, B) + C + W11 + K2); \ E = ROTL(E, 30); \ W12 = ROTL(W09 ^ W04 ^ W14 ^ W12, 1); \ B = SPH_T32(ROTL(C, 5) + G(D, E, A) + B + W12 + K2); \ D = ROTL(D, 30); \ W13 = ROTL(W10 ^ W05 ^ W15 ^ W13, 1); \ A = SPH_T32(ROTL(B, 5) + G(C, D, E) + A + W13 + K2); \ C = ROTL(C, 30); \ W14 = ROTL(W11 ^ W06 ^ W00 ^ W14, 1); \ E = SPH_T32(ROTL(A, 5) + G(B, C, D) + E + W14 + K2); \ B = ROTL(B, 30); \ W15 = ROTL(W12 ^ W07 ^ W01 ^ W15, 1); \ D = SPH_T32(ROTL(E, 5) + G(A, B, C) + D + W15 + K2); \ A = ROTL(A, 30); \ W00 = ROTL(W13 ^ W08 ^ W02 ^ W00, 1); \ C = SPH_T32(ROTL(D, 5) + G(E, A, B) + C + W00 + K2); \ E = ROTL(E, 30); \ W01 = ROTL(W14 ^ W09 ^ W03 ^ W01, 1); \ B = SPH_T32(ROTL(C, 5) + G(D, E, A) + B + W01 + K2); \ D = ROTL(D, 30); \ W02 = ROTL(W15 ^ W10 ^ W04 ^ W02, 1); \ A = SPH_T32(ROTL(B, 5) + G(C, D, E) + A + W02 + K2); \ C = ROTL(C, 30); \ W03 = ROTL(W00 ^ W11 ^ W05 ^ W03, 1); \ E = SPH_T32(ROTL(A, 5) + G(B, C, D) + E + W03 + K2); \ B = ROTL(B, 30); \ W04 = ROTL(W01 ^ W12 ^ W06 ^ W04, 1); \ D = SPH_T32(ROTL(E, 5) + G(A, B, C) + D + W04 + K2); \ A = ROTL(A, 30); \ W05 = ROTL(W02 ^ W13 ^ W07 ^ W05, 1); \ C = SPH_T32(ROTL(D, 5) + G(E, A, B) + C + W05 + K2); \ E = ROTL(E, 30); \ W06 = ROTL(W03 ^ W14 ^ W08 ^ W06, 1); \ B = SPH_T32(ROTL(C, 5) + G(D, E, A) + B + W06 + K2); \ D = ROTL(D, 30); \ W07 = ROTL(W04 ^ W15 ^ W09 ^ W07, 1); \ A = SPH_T32(ROTL(B, 5) + G(C, D, E) + A + W07 + K2); \ C = ROTL(C, 30); \ W08 = ROTL(W05 ^ W00 ^ W10 ^ W08, 1); \ E = SPH_T32(ROTL(A, 5) + H(B, C, D) + E + W08 + K3); \ B = ROTL(B, 30); \ W09 = ROTL(W06 ^ W01 ^ W11 ^ W09, 1); \ D = SPH_T32(ROTL(E, 5) + H(A, B, C) + D + W09 + K3); \ A = ROTL(A, 30); \ W10 = ROTL(W07 ^ W02 ^ W12 ^ W10, 1); \ C = SPH_T32(ROTL(D, 5) + H(E, A, B) + C + W10 + K3); \ E = ROTL(E, 30); \ W11 = ROTL(W08 ^ W03 ^ W13 ^ W11, 1); \ B = SPH_T32(ROTL(C, 5) + H(D, E, A) + B + W11 + K3); \ D = ROTL(D, 30); \ W12 = ROTL(W09 ^ W04 ^ W14 ^ W12, 1); \ A = SPH_T32(ROTL(B, 5) + H(C, D, E) + A + W12 + K3); \ C = ROTL(C, 30); \ W13 = ROTL(W10 ^ W05 ^ W15 ^ W13, 1); \ E = SPH_T32(ROTL(A, 5) + H(B, C, D) + E + W13 + K3); \ B = ROTL(B, 30); \ W14 = ROTL(W11 ^ W06 ^ W00 ^ W14, 1); \ D = SPH_T32(ROTL(E, 5) + H(A, B, C) + D + W14 + K3); \ A = ROTL(A, 30); \ W15 = ROTL(W12 ^ W07 ^ W01 ^ W15, 1); \ C = SPH_T32(ROTL(D, 5) + H(E, A, B) + C + W15 + K3); \ E = ROTL(E, 30); \ W00 = ROTL(W13 ^ W08 ^ W02 ^ W00, 1); \ B = SPH_T32(ROTL(C, 5) + H(D, E, A) + B + W00 + K3); \ D = ROTL(D, 30); \ W01 = ROTL(W14 ^ W09 ^ W03 ^ W01, 1); \ A = SPH_T32(ROTL(B, 5) + H(C, D, E) + A + W01 + K3); \ C = ROTL(C, 30); \ W02 = ROTL(W15 ^ W10 ^ W04 ^ W02, 1); \ E = SPH_T32(ROTL(A, 5) + H(B, C, D) + E + W02 + K3); \ B = ROTL(B, 30); \ W03 = ROTL(W00 ^ W11 ^ W05 ^ W03, 1); \ D = SPH_T32(ROTL(E, 5) + H(A, B, C) + D + W03 + K3); \ A = ROTL(A, 30); \ W04 = ROTL(W01 ^ W12 ^ W06 ^ W04, 1); \ C = SPH_T32(ROTL(D, 5) + H(E, A, B) + C + W04 + K3); \ E = ROTL(E, 30); \ W05 = ROTL(W02 ^ W13 ^ W07 ^ W05, 1); \ B = SPH_T32(ROTL(C, 5) + H(D, E, A) + B + W05 + K3); \ D = ROTL(D, 30); \ W06 = ROTL(W03 ^ W14 ^ W08 ^ W06, 1); \ A = SPH_T32(ROTL(B, 5) + H(C, D, E) + A + W06 + K3); \ C = ROTL(C, 30); \ W07 = ROTL(W04 ^ W15 ^ W09 ^ W07, 1); \ E = SPH_T32(ROTL(A, 5) + H(B, C, D) + E + W07 + K3); \ B = ROTL(B, 30); \ W08 = ROTL(W05 ^ W00 ^ W10 ^ W08, 1); \ D = SPH_T32(ROTL(E, 5) + H(A, B, C) + D + W08 + K3); \ A = ROTL(A, 30); \ W09 = ROTL(W06 ^ W01 ^ W11 ^ W09, 1); \ C = SPH_T32(ROTL(D, 5) + H(E, A, B) + C + W09 + K3); \ E = ROTL(E, 30); \ W10 = ROTL(W07 ^ W02 ^ W12 ^ W10, 1); \ B = SPH_T32(ROTL(C, 5) + H(D, E, A) + B + W10 + K3); \ D = ROTL(D, 30); \ W11 = ROTL(W08 ^ W03 ^ W13 ^ W11, 1); \ A = SPH_T32(ROTL(B, 5) + H(C, D, E) + A + W11 + K3); \ C = ROTL(C, 30); \ W12 = ROTL(W09 ^ W04 ^ W14 ^ W12, 1); \ E = SPH_T32(ROTL(A, 5) + I(B, C, D) + E + W12 + K4); \ B = ROTL(B, 30); \ W13 = ROTL(W10 ^ W05 ^ W15 ^ W13, 1); \ D = SPH_T32(ROTL(E, 5) + I(A, B, C) + D + W13 + K4); \ A = ROTL(A, 30); \ W14 = ROTL(W11 ^ W06 ^ W00 ^ W14, 1); \ C = SPH_T32(ROTL(D, 5) + I(E, A, B) + C + W14 + K4); \ E = ROTL(E, 30); \ W15 = ROTL(W12 ^ W07 ^ W01 ^ W15, 1); \ B = SPH_T32(ROTL(C, 5) + I(D, E, A) + B + W15 + K4); \ D = ROTL(D, 30); \ W00 = ROTL(W13 ^ W08 ^ W02 ^ W00, 1); \ A = SPH_T32(ROTL(B, 5) + I(C, D, E) + A + W00 + K4); \ C = ROTL(C, 30); \ W01 = ROTL(W14 ^ W09 ^ W03 ^ W01, 1); \ E = SPH_T32(ROTL(A, 5) + I(B, C, D) + E + W01 + K4); \ B = ROTL(B, 30); \ W02 = ROTL(W15 ^ W10 ^ W04 ^ W02, 1); \ D = SPH_T32(ROTL(E, 5) + I(A, B, C) + D + W02 + K4); \ A = ROTL(A, 30); \ W03 = ROTL(W00 ^ W11 ^ W05 ^ W03, 1); \ C = SPH_T32(ROTL(D, 5) + I(E, A, B) + C + W03 + K4); \ E = ROTL(E, 30); \ W04 = ROTL(W01 ^ W12 ^ W06 ^ W04, 1); \ B = SPH_T32(ROTL(C, 5) + I(D, E, A) + B + W04 + K4); \ D = ROTL(D, 30); \ W05 = ROTL(W02 ^ W13 ^ W07 ^ W05, 1); \ A = SPH_T32(ROTL(B, 5) + I(C, D, E) + A + W05 + K4); \ C = ROTL(C, 30); \ W06 = ROTL(W03 ^ W14 ^ W08 ^ W06, 1); \ E = SPH_T32(ROTL(A, 5) + I(B, C, D) + E + W06 + K4); \ B = ROTL(B, 30); \ W07 = ROTL(W04 ^ W15 ^ W09 ^ W07, 1); \ D = SPH_T32(ROTL(E, 5) + I(A, B, C) + D + W07 + K4); \ A = ROTL(A, 30); \ W08 = ROTL(W05 ^ W00 ^ W10 ^ W08, 1); \ C = SPH_T32(ROTL(D, 5) + I(E, A, B) + C + W08 + K4); \ E = ROTL(E, 30); \ W09 = ROTL(W06 ^ W01 ^ W11 ^ W09, 1); \ B = SPH_T32(ROTL(C, 5) + I(D, E, A) + B + W09 + K4); \ D = ROTL(D, 30); \ W10 = ROTL(W07 ^ W02 ^ W12 ^ W10, 1); \ A = SPH_T32(ROTL(B, 5) + I(C, D, E) + A + W10 + K4); \ C = ROTL(C, 30); \ W11 = ROTL(W08 ^ W03 ^ W13 ^ W11, 1); \ E = SPH_T32(ROTL(A, 5) + I(B, C, D) + E + W11 + K4); \ B = ROTL(B, 30); \ W12 = ROTL(W09 ^ W04 ^ W14 ^ W12, 1); \ D = SPH_T32(ROTL(E, 5) + I(A, B, C) + D + W12 + K4); \ A = ROTL(A, 30); \ W13 = ROTL(W10 ^ W05 ^ W15 ^ W13, 1); \ C = SPH_T32(ROTL(D, 5) + I(E, A, B) + C + W13 + K4); \ E = ROTL(E, 30); \ W14 = ROTL(W11 ^ W06 ^ W00 ^ W14, 1); \ B = SPH_T32(ROTL(C, 5) + I(D, E, A) + B + W14 + K4); \ D = ROTL(D, 30); \ W15 = ROTL(W12 ^ W07 ^ W01 ^ W15, 1); \ A = SPH_T32(ROTL(B, 5) + I(C, D, E) + A + W15 + K4); \ C = ROTL(C, 30); \ \ (r)[0] = SPH_T32(r[0] + A); \ (r)[1] = SPH_T32(r[1] + B); \ (r)[2] = SPH_T32(r[2] + C); \ (r)[3] = SPH_T32(r[3] + D); \ (r)[4] = SPH_T32(r[4] + E); \ } while (0) /* * One round of SHA-1. The data must be aligned for 32-bit access. */ static void sha1_round(const unsigned char *data, sph_u32 r[5]) { #define SHA1_IN(x) sph_dec32be_aligned(data + (4 * (x))) SHA1_ROUND_BODY(SHA1_IN, r); #undef SHA1_IN } /* see sph_sha1.h */ void sph_sha1_init(void *cc) { sph_sha1_context *sc; sc = cc; memcpy(sc->val, IV, sizeof IV); #if SPH_64 sc->count = 0; #else sc->count_high = sc->count_low = 0; #endif } #define RFUN sha1_round #define HASH sha1 #define BE32 1 #include "md_helper.c" /* see sph_sha1.h */ void sph_sha1_close(void *cc, void *dst) { sha1_close(cc, dst, 5); sph_sha1_init(cc); } /* see sph_sha1.h */ void sph_sha1_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) { sha1_addbits_and_close(cc, ub, n, dst, 5); sph_sha1_init(cc); } /* see sph_sha1.h */ void sph_sha1_comp(const sph_u32 msg[16], sph_u32 val[5]) { #define SHA1_IN(x) msg[x] SHA1_ROUND_BODY(SHA1_IN, val); #undef SHA1_IN } libntru-0.5/src/sha2.c000066400000000000000000000644071271556312200146440ustar00rootroot00000000000000/* $Id: sha2.c 227 2010-06-16 17:28:38Z tp $ */ /* * SHA-224 / SHA-256 implementation. * * ==========================(LICENSE BEGIN)============================ * * Copyright (c) 2007-2010 Projet RNRT SAPHIR * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * ===========================(LICENSE END)============================= * * @author Thomas Pornin */ #include #include #include "sph_sha2.h" #if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SHA2 #define SPH_SMALL_FOOTPRINT_SHA2 1 #endif #define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z)) #define MAJ(X, Y, Z) (((Y) & (Z)) | (((Y) | (Z)) & (X))) #define ROTR SPH_ROTR32 #define BSG2_0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) #define BSG2_1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) #define SSG2_0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SPH_T32((x) >> 3)) #define SSG2_1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SPH_T32((x) >> 10)) static const sph_u32 H224[8] = { SPH_C32(0xC1059ED8), SPH_C32(0x367CD507), SPH_C32(0x3070DD17), SPH_C32(0xF70E5939), SPH_C32(0xFFC00B31), SPH_C32(0x68581511), SPH_C32(0x64F98FA7), SPH_C32(0xBEFA4FA4) }; static const sph_u32 H256[8] = { SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85), SPH_C32(0x3C6EF372), SPH_C32(0xA54FF53A), SPH_C32(0x510E527F), SPH_C32(0x9B05688C), SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19) }; /* * The SHA2_ROUND_BODY defines the body for a SHA-224 / SHA-256 * compression function implementation. The "in" parameter should * evaluate, when applied to a numerical input parameter from 0 to 15, * to an expression which yields the corresponding input block. The "r" * parameter should evaluate to an array or pointer expression * designating the array of 8 words which contains the input and output * of the compression function. */ #if SPH_SMALL_FOOTPRINT_SHA2 static const sph_u32 K[64] = { SPH_C32(0x428A2F98), SPH_C32(0x71374491), SPH_C32(0xB5C0FBCF), SPH_C32(0xE9B5DBA5), SPH_C32(0x3956C25B), SPH_C32(0x59F111F1), SPH_C32(0x923F82A4), SPH_C32(0xAB1C5ED5), SPH_C32(0xD807AA98), SPH_C32(0x12835B01), SPH_C32(0x243185BE), SPH_C32(0x550C7DC3), SPH_C32(0x72BE5D74), SPH_C32(0x80DEB1FE), SPH_C32(0x9BDC06A7), SPH_C32(0xC19BF174), SPH_C32(0xE49B69C1), SPH_C32(0xEFBE4786), SPH_C32(0x0FC19DC6), SPH_C32(0x240CA1CC), SPH_C32(0x2DE92C6F), SPH_C32(0x4A7484AA), SPH_C32(0x5CB0A9DC), SPH_C32(0x76F988DA), SPH_C32(0x983E5152), SPH_C32(0xA831C66D), SPH_C32(0xB00327C8), SPH_C32(0xBF597FC7), SPH_C32(0xC6E00BF3), SPH_C32(0xD5A79147), SPH_C32(0x06CA6351), SPH_C32(0x14292967), SPH_C32(0x27B70A85), SPH_C32(0x2E1B2138), SPH_C32(0x4D2C6DFC), SPH_C32(0x53380D13), SPH_C32(0x650A7354), SPH_C32(0x766A0ABB), SPH_C32(0x81C2C92E), SPH_C32(0x92722C85), SPH_C32(0xA2BFE8A1), SPH_C32(0xA81A664B), SPH_C32(0xC24B8B70), SPH_C32(0xC76C51A3), SPH_C32(0xD192E819), SPH_C32(0xD6990624), SPH_C32(0xF40E3585), SPH_C32(0x106AA070), SPH_C32(0x19A4C116), SPH_C32(0x1E376C08), SPH_C32(0x2748774C), SPH_C32(0x34B0BCB5), SPH_C32(0x391C0CB3), SPH_C32(0x4ED8AA4A), SPH_C32(0x5B9CCA4F), SPH_C32(0x682E6FF3), SPH_C32(0x748F82EE), SPH_C32(0x78A5636F), SPH_C32(0x84C87814), SPH_C32(0x8CC70208), SPH_C32(0x90BEFFFA), SPH_C32(0xA4506CEB), SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2) }; #define SHA2_MEXP1(in, pc) do { \ W[pc] = in(pc); \ } while (0) #define SHA2_MEXP2(in, pc) do { \ W[(pc) & 0x0F] = SPH_T32(SSG2_1(W[((pc) - 2) & 0x0F]) \ + W[((pc) - 7) & 0x0F] \ + SSG2_0(W[((pc) - 15) & 0x0F]) + W[(pc) & 0x0F]); \ } while (0) #define SHA2_STEPn(n, a, b, c, d, e, f, g, h, in, pc) do { \ sph_u32 t1, t2; \ SHA2_MEXP ## n(in, pc); \ t1 = SPH_T32(h + BSG2_1(e) + CH(e, f, g) \ + K[pcount + (pc)] + W[(pc) & 0x0F]); \ t2 = SPH_T32(BSG2_0(a) + MAJ(a, b, c)); \ d = SPH_T32(d + t1); \ h = SPH_T32(t1 + t2); \ } while (0) #define SHA2_STEP1(a, b, c, d, e, f, g, h, in, pc) \ SHA2_STEPn(1, a, b, c, d, e, f, g, h, in, pc) #define SHA2_STEP2(a, b, c, d, e, f, g, h, in, pc) \ SHA2_STEPn(2, a, b, c, d, e, f, g, h, in, pc) #define SHA2_ROUND_BODY(in, r) do { \ sph_u32 A, B, C, D, E, F, G, H; \ sph_u32 W[16]; \ unsigned pcount; \ \ A = (r)[0]; \ B = (r)[1]; \ C = (r)[2]; \ D = (r)[3]; \ E = (r)[4]; \ F = (r)[5]; \ G = (r)[6]; \ H = (r)[7]; \ pcount = 0; \ SHA2_STEP1(A, B, C, D, E, F, G, H, in, 0); \ SHA2_STEP1(H, A, B, C, D, E, F, G, in, 1); \ SHA2_STEP1(G, H, A, B, C, D, E, F, in, 2); \ SHA2_STEP1(F, G, H, A, B, C, D, E, in, 3); \ SHA2_STEP1(E, F, G, H, A, B, C, D, in, 4); \ SHA2_STEP1(D, E, F, G, H, A, B, C, in, 5); \ SHA2_STEP1(C, D, E, F, G, H, A, B, in, 6); \ SHA2_STEP1(B, C, D, E, F, G, H, A, in, 7); \ SHA2_STEP1(A, B, C, D, E, F, G, H, in, 8); \ SHA2_STEP1(H, A, B, C, D, E, F, G, in, 9); \ SHA2_STEP1(G, H, A, B, C, D, E, F, in, 10); \ SHA2_STEP1(F, G, H, A, B, C, D, E, in, 11); \ SHA2_STEP1(E, F, G, H, A, B, C, D, in, 12); \ SHA2_STEP1(D, E, F, G, H, A, B, C, in, 13); \ SHA2_STEP1(C, D, E, F, G, H, A, B, in, 14); \ SHA2_STEP1(B, C, D, E, F, G, H, A, in, 15); \ for (pcount = 16; pcount < 64; pcount += 16) { \ SHA2_STEP2(A, B, C, D, E, F, G, H, in, 0); \ SHA2_STEP2(H, A, B, C, D, E, F, G, in, 1); \ SHA2_STEP2(G, H, A, B, C, D, E, F, in, 2); \ SHA2_STEP2(F, G, H, A, B, C, D, E, in, 3); \ SHA2_STEP2(E, F, G, H, A, B, C, D, in, 4); \ SHA2_STEP2(D, E, F, G, H, A, B, C, in, 5); \ SHA2_STEP2(C, D, E, F, G, H, A, B, in, 6); \ SHA2_STEP2(B, C, D, E, F, G, H, A, in, 7); \ SHA2_STEP2(A, B, C, D, E, F, G, H, in, 8); \ SHA2_STEP2(H, A, B, C, D, E, F, G, in, 9); \ SHA2_STEP2(G, H, A, B, C, D, E, F, in, 10); \ SHA2_STEP2(F, G, H, A, B, C, D, E, in, 11); \ SHA2_STEP2(E, F, G, H, A, B, C, D, in, 12); \ SHA2_STEP2(D, E, F, G, H, A, B, C, in, 13); \ SHA2_STEP2(C, D, E, F, G, H, A, B, in, 14); \ SHA2_STEP2(B, C, D, E, F, G, H, A, in, 15); \ } \ (r)[0] = SPH_T32((r)[0] + A); \ (r)[1] = SPH_T32((r)[1] + B); \ (r)[2] = SPH_T32((r)[2] + C); \ (r)[3] = SPH_T32((r)[3] + D); \ (r)[4] = SPH_T32((r)[4] + E); \ (r)[5] = SPH_T32((r)[5] + F); \ (r)[6] = SPH_T32((r)[6] + G); \ (r)[7] = SPH_T32((r)[7] + H); \ } while (0) #else #define SHA2_ROUND_BODY(in, r) do { \ sph_u32 A, B, C, D, E, F, G, H, T1, T2; \ sph_u32 W00, W01, W02, W03, W04, W05, W06, W07; \ sph_u32 W08, W09, W10, W11, W12, W13, W14, W15; \ \ A = (r)[0]; \ B = (r)[1]; \ C = (r)[2]; \ D = (r)[3]; \ E = (r)[4]; \ F = (r)[5]; \ G = (r)[6]; \ H = (r)[7]; \ W00 = in(0); \ T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + SPH_C32(0x428A2F98) + W00); \ T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ D = SPH_T32(D + T1); \ H = SPH_T32(T1 + T2); \ W01 = in(1); \ T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + SPH_C32(0x71374491) + W01); \ T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ C = SPH_T32(C + T1); \ G = SPH_T32(T1 + T2); \ W02 = in(2); \ T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + SPH_C32(0xB5C0FBCF) + W02); \ T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ B = SPH_T32(B + T1); \ F = SPH_T32(T1 + T2); \ W03 = in(3); \ T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + SPH_C32(0xE9B5DBA5) + W03); \ T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ A = SPH_T32(A + T1); \ E = SPH_T32(T1 + T2); \ W04 = in(4); \ T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + SPH_C32(0x3956C25B) + W04); \ T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ H = SPH_T32(H + T1); \ D = SPH_T32(T1 + T2); \ W05 = in(5); \ T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + SPH_C32(0x59F111F1) + W05); \ T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ G = SPH_T32(G + T1); \ C = SPH_T32(T1 + T2); \ W06 = in(6); \ T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + SPH_C32(0x923F82A4) + W06); \ T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ F = SPH_T32(F + T1); \ B = SPH_T32(T1 + T2); \ W07 = in(7); \ T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + SPH_C32(0xAB1C5ED5) + W07); \ T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ E = SPH_T32(E + T1); \ A = SPH_T32(T1 + T2); \ W08 = in(8); \ T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + SPH_C32(0xD807AA98) + W08); \ T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ D = SPH_T32(D + T1); \ H = SPH_T32(T1 + T2); \ W09 = in(9); \ T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + SPH_C32(0x12835B01) + W09); \ T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ C = SPH_T32(C + T1); \ G = SPH_T32(T1 + T2); \ W10 = in(10); \ T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + SPH_C32(0x243185BE) + W10); \ T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ B = SPH_T32(B + T1); \ F = SPH_T32(T1 + T2); \ W11 = in(11); \ T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + SPH_C32(0x550C7DC3) + W11); \ T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ A = SPH_T32(A + T1); \ E = SPH_T32(T1 + T2); \ W12 = in(12); \ T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + SPH_C32(0x72BE5D74) + W12); \ T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ H = SPH_T32(H + T1); \ D = SPH_T32(T1 + T2); \ W13 = in(13); \ T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + SPH_C32(0x80DEB1FE) + W13); \ T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ G = SPH_T32(G + T1); \ C = SPH_T32(T1 + T2); \ W14 = in(14); \ T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + SPH_C32(0x9BDC06A7) + W14); \ T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ F = SPH_T32(F + T1); \ B = SPH_T32(T1 + T2); \ W15 = in(15); \ T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + SPH_C32(0xC19BF174) + W15); \ T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ E = SPH_T32(E + T1); \ A = SPH_T32(T1 + T2); \ W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \ T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + SPH_C32(0xE49B69C1) + W00); \ T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ D = SPH_T32(D + T1); \ H = SPH_T32(T1 + T2); \ W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \ T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + SPH_C32(0xEFBE4786) + W01); \ T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ C = SPH_T32(C + T1); \ G = SPH_T32(T1 + T2); \ W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \ T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + SPH_C32(0x0FC19DC6) + W02); \ T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ B = SPH_T32(B + T1); \ F = SPH_T32(T1 + T2); \ W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \ T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + SPH_C32(0x240CA1CC) + W03); \ T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ A = SPH_T32(A + T1); \ E = SPH_T32(T1 + T2); \ W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \ T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + SPH_C32(0x2DE92C6F) + W04); \ T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ H = SPH_T32(H + T1); \ D = SPH_T32(T1 + T2); \ W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \ T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + SPH_C32(0x4A7484AA) + W05); \ T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ G = SPH_T32(G + T1); \ C = SPH_T32(T1 + T2); \ W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \ T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + SPH_C32(0x5CB0A9DC) + W06); \ T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ F = SPH_T32(F + T1); \ B = SPH_T32(T1 + T2); \ W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \ T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + SPH_C32(0x76F988DA) + W07); \ T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ E = SPH_T32(E + T1); \ A = SPH_T32(T1 + T2); \ W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \ T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + SPH_C32(0x983E5152) + W08); \ T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ D = SPH_T32(D + T1); \ H = SPH_T32(T1 + T2); \ W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \ T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + SPH_C32(0xA831C66D) + W09); \ T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ C = SPH_T32(C + T1); \ G = SPH_T32(T1 + T2); \ W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \ T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + SPH_C32(0xB00327C8) + W10); \ T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ B = SPH_T32(B + T1); \ F = SPH_T32(T1 + T2); \ W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \ T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + SPH_C32(0xBF597FC7) + W11); \ T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ A = SPH_T32(A + T1); \ E = SPH_T32(T1 + T2); \ W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \ T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + SPH_C32(0xC6E00BF3) + W12); \ T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ H = SPH_T32(H + T1); \ D = SPH_T32(T1 + T2); \ W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \ T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + SPH_C32(0xD5A79147) + W13); \ T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ G = SPH_T32(G + T1); \ C = SPH_T32(T1 + T2); \ W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \ T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + SPH_C32(0x06CA6351) + W14); \ T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ F = SPH_T32(F + T1); \ B = SPH_T32(T1 + T2); \ W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \ T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + SPH_C32(0x14292967) + W15); \ T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ E = SPH_T32(E + T1); \ A = SPH_T32(T1 + T2); \ W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \ T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + SPH_C32(0x27B70A85) + W00); \ T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ D = SPH_T32(D + T1); \ H = SPH_T32(T1 + T2); \ W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \ T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + SPH_C32(0x2E1B2138) + W01); \ T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ C = SPH_T32(C + T1); \ G = SPH_T32(T1 + T2); \ W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \ T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + SPH_C32(0x4D2C6DFC) + W02); \ T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ B = SPH_T32(B + T1); \ F = SPH_T32(T1 + T2); \ W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \ T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + SPH_C32(0x53380D13) + W03); \ T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ A = SPH_T32(A + T1); \ E = SPH_T32(T1 + T2); \ W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \ T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + SPH_C32(0x650A7354) + W04); \ T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ H = SPH_T32(H + T1); \ D = SPH_T32(T1 + T2); \ W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \ T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + SPH_C32(0x766A0ABB) + W05); \ T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ G = SPH_T32(G + T1); \ C = SPH_T32(T1 + T2); \ W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \ T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + SPH_C32(0x81C2C92E) + W06); \ T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ F = SPH_T32(F + T1); \ B = SPH_T32(T1 + T2); \ W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \ T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + SPH_C32(0x92722C85) + W07); \ T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ E = SPH_T32(E + T1); \ A = SPH_T32(T1 + T2); \ W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \ T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + SPH_C32(0xA2BFE8A1) + W08); \ T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ D = SPH_T32(D + T1); \ H = SPH_T32(T1 + T2); \ W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \ T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + SPH_C32(0xA81A664B) + W09); \ T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ C = SPH_T32(C + T1); \ G = SPH_T32(T1 + T2); \ W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \ T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + SPH_C32(0xC24B8B70) + W10); \ T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ B = SPH_T32(B + T1); \ F = SPH_T32(T1 + T2); \ W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \ T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + SPH_C32(0xC76C51A3) + W11); \ T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ A = SPH_T32(A + T1); \ E = SPH_T32(T1 + T2); \ W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \ T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + SPH_C32(0xD192E819) + W12); \ T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ H = SPH_T32(H + T1); \ D = SPH_T32(T1 + T2); \ W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \ T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + SPH_C32(0xD6990624) + W13); \ T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ G = SPH_T32(G + T1); \ C = SPH_T32(T1 + T2); \ W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \ T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + SPH_C32(0xF40E3585) + W14); \ T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ F = SPH_T32(F + T1); \ B = SPH_T32(T1 + T2); \ W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \ T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + SPH_C32(0x106AA070) + W15); \ T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ E = SPH_T32(E + T1); \ A = SPH_T32(T1 + T2); \ W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \ T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + SPH_C32(0x19A4C116) + W00); \ T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ D = SPH_T32(D + T1); \ H = SPH_T32(T1 + T2); \ W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \ T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + SPH_C32(0x1E376C08) + W01); \ T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ C = SPH_T32(C + T1); \ G = SPH_T32(T1 + T2); \ W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \ T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + SPH_C32(0x2748774C) + W02); \ T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ B = SPH_T32(B + T1); \ F = SPH_T32(T1 + T2); \ W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \ T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + SPH_C32(0x34B0BCB5) + W03); \ T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ A = SPH_T32(A + T1); \ E = SPH_T32(T1 + T2); \ W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \ T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + SPH_C32(0x391C0CB3) + W04); \ T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ H = SPH_T32(H + T1); \ D = SPH_T32(T1 + T2); \ W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \ T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + SPH_C32(0x4ED8AA4A) + W05); \ T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ G = SPH_T32(G + T1); \ C = SPH_T32(T1 + T2); \ W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \ T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + SPH_C32(0x5B9CCA4F) + W06); \ T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ F = SPH_T32(F + T1); \ B = SPH_T32(T1 + T2); \ W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \ T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + SPH_C32(0x682E6FF3) + W07); \ T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ E = SPH_T32(E + T1); \ A = SPH_T32(T1 + T2); \ W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \ T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + SPH_C32(0x748F82EE) + W08); \ T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ D = SPH_T32(D + T1); \ H = SPH_T32(T1 + T2); \ W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \ T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + SPH_C32(0x78A5636F) + W09); \ T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ C = SPH_T32(C + T1); \ G = SPH_T32(T1 + T2); \ W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \ T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + SPH_C32(0x84C87814) + W10); \ T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ B = SPH_T32(B + T1); \ F = SPH_T32(T1 + T2); \ W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \ T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + SPH_C32(0x8CC70208) + W11); \ T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ A = SPH_T32(A + T1); \ E = SPH_T32(T1 + T2); \ W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \ T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + SPH_C32(0x90BEFFFA) + W12); \ T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ H = SPH_T32(H + T1); \ D = SPH_T32(T1 + T2); \ W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \ T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + SPH_C32(0xA4506CEB) + W13); \ T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ G = SPH_T32(G + T1); \ C = SPH_T32(T1 + T2); \ W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \ T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + SPH_C32(0xBEF9A3F7) + W14); \ T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ F = SPH_T32(F + T1); \ B = SPH_T32(T1 + T2); \ W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \ T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + SPH_C32(0xC67178F2) + W15); \ T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ E = SPH_T32(E + T1); \ A = SPH_T32(T1 + T2); \ (r)[0] = SPH_T32((r)[0] + A); \ (r)[1] = SPH_T32((r)[1] + B); \ (r)[2] = SPH_T32((r)[2] + C); \ (r)[3] = SPH_T32((r)[3] + D); \ (r)[4] = SPH_T32((r)[4] + E); \ (r)[5] = SPH_T32((r)[5] + F); \ (r)[6] = SPH_T32((r)[6] + G); \ (r)[7] = SPH_T32((r)[7] + H); \ } while (0) #endif /* * One round of SHA-224 / SHA-256. The data must be aligned for 32-bit access. */ static void sha2_round(const unsigned char *data, sph_u32 r[8]) { #define SHA2_IN(x) sph_dec32be_aligned(data + (4 * (x))) SHA2_ROUND_BODY(SHA2_IN, r); #undef SHA2_IN } /* see sph_sha2.h */ void sph_sha224_init(void *cc) { sph_sha224_context *sc; sc = cc; memcpy(sc->val, H224, sizeof H224); #if SPH_64 sc->count = 0; #else sc->count_high = sc->count_low = 0; #endif } /* see sph_sha2.h */ void sph_sha256_init(void *cc) { sph_sha256_context *sc; sc = cc; memcpy(sc->val, H256, sizeof H256); #if SPH_64 sc->count = 0; #else sc->count_high = sc->count_low = 0; #endif } #define RFUN sha2_round #define HASH sha224 #define BE32 1 #include "md_helper.h" /* see sph_sha2.h */ void sph_sha224_close(void *cc, void *dst) { sha224_close(cc, dst, 7); sph_sha224_init(cc); } /* see sph_sha2.h */ void sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) { sha224_addbits_and_close(cc, ub, n, dst, 7); sph_sha224_init(cc); } /* see sph_sha2.h */ void sph_sha256_close(void *cc, void *dst) { sha224_close(cc, dst, 8); sph_sha256_init(cc); } /* see sph_sha2.h */ void sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) { sha224_addbits_and_close(cc, ub, n, dst, 8); sph_sha256_init(cc); } /* see sph_sha2.h */ void sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8]) { #define SHA2_IN(x) msg[x] SHA2_ROUND_BODY(SHA2_IN, val); #undef SHA2_IN } libntru-0.5/src/sha256-mb-x86_64.pl000077500000000000000000001125101271556312200165300ustar00rootroot00000000000000#!/usr/bin/env perl # ==================================================================== # Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. # ==================================================================== # Multi-buffer SHA256 procedure processes n buffers in parallel by # placing buffer data to designated lane of SIMD register. n is # naturally limited to 4 on pre-AVX2 processors and to 8 on # AVX2-capable processors such as Haswell. # # this +aesni(i) sha256 aesni-sha256 gain(iv) # ------------------------------------------------------------------- # Westmere(ii) 23.3/n +1.28=7.11(n=4) 12.3 +3.75=16.1 +126% # Atom(ii) 38.7/n +3.93=13.6(n=4) 20.8 +5.69=26.5 +95% # Sandy Bridge (20.5 +5.15=25.7)/n 11.6 13.0 +103% # Ivy Bridge (20.4 +5.14=25.5)/n 10.3 11.6 +82% # Haswell(iii) (21.0 +5.00=26.0)/n 7.80 8.79 +170% # Bulldozer (21.6 +5.76=27.4)/n 13.6 13.7 +100% # # (i) multi-block CBC encrypt with 128-bit key; # (ii) (HASH+AES)/n does not apply to Westmere for n>3 and Atom, # because of lower AES-NI instruction throughput, nor is there # AES-NI-SHA256 stitch for these processors; # (iii) "this" is for n=8, when we gather twice as much data, result # for n=4 is 20.3+4.44=24.7; # (iv) presented improvement coefficients are asymptotic limits and # in real-life application are somewhat lower, e.g. for 2KB # fragments they range from 75% to 130% (on Haswell); $flavour = shift; $output = shift; if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; $avx=0; if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` =~ /GNU assembler version ([2-9]\.[0-9]+)/) { $avx = ($1>=2.19) + ($1>=2.22); } if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { $avx = ($1>=2.09) + ($1>=2.10); } if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && `ml64 2>&1` =~ /Version ([0-9]+)\./) { $avx = ($1>=10) + ($1>=11); } if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) { $avx = ($2>=3.0) + ($2>3.0); } open OUT,"| \"$^X\" $xlate $flavour $output"; *STDOUT=*OUT; # void sha256_multi_block ( # struct { unsigned int A[8]; # unsigned int B[8]; # unsigned int C[8]; # unsigned int D[8]; # unsigned int E[8]; # unsigned int F[8]; # unsigned int G[8]; # unsigned int H[8]; } *ctx, # struct { void *ptr; int blocks; } inp[8], # int num); /* 1 or 2 */ # $ctx="%rdi"; # 1st arg $inp="%rsi"; # 2nd arg $num="%edx"; # 3rd arg @ptr=map("%r$_",(8..11)); $Tbl="%rbp"; @V=($A,$B,$C,$D,$E,$F,$G,$H)=map("%xmm$_",(8..15)); ($t1,$t2,$t3,$axb,$bxc,$Xi,$Xn,$sigma)=map("%xmm$_",(0..7)); $REG_SZ=16; sub Xi_off { my $off = shift; $off %= 16; $off *= $REG_SZ; $off<256 ? "$off-128(%rax)" : "$off-256-128(%rbx)"; } sub ROUND_00_15 { my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; $code.=<<___ if ($i<15); movd `4*$i`(@ptr[0]),$Xi movd `4*$i`(@ptr[1]),$t1 movd `4*$i`(@ptr[2]),$t2 movd `4*$i`(@ptr[3]),$t3 punpckldq $t2,$Xi punpckldq $t3,$t1 punpckldq $t1,$Xi ___ $code.=<<___ if ($i==15); movd `4*$i`(@ptr[0]),$Xi lea `16*4`(@ptr[0]),@ptr[0] movd `4*$i`(@ptr[1]),$t1 lea `16*4`(@ptr[1]),@ptr[1] movd `4*$i`(@ptr[2]),$t2 lea `16*4`(@ptr[2]),@ptr[2] movd `4*$i`(@ptr[3]),$t3 lea `16*4`(@ptr[3]),@ptr[3] punpckldq $t2,$Xi punpckldq $t3,$t1 punpckldq $t1,$Xi ___ $code.=<<___; movdqa $e,$sigma `"pshufb $Xn,$Xi" if ($i<=15 && ($i&1)==0)` movdqa $e,$t3 `"pshufb $Xn,$Xi" if ($i<=15 && ($i&1)==1)` psrld \$6,$sigma movdqa $e,$t2 pslld \$7,$t3 movdqa $Xi,`&Xi_off($i)` paddd $h,$Xi # Xi+=h psrld \$11,$t2 pxor $t3,$sigma pslld \$21-7,$t3 paddd `32*($i%8)-128`($Tbl),$Xi # Xi+=K[round] pxor $t2,$sigma psrld \$25-11,$t2 movdqa $e,$t1 `"prefetcht0 63(@ptr[0])" if ($i==15)` pxor $t3,$sigma movdqa $e,$axb # borrow $axb pslld \$26-21,$t3 pandn $g,$t1 pand $f,$axb pxor $t2,$sigma `"prefetcht0 63(@ptr[1])" if ($i==15)` movdqa $a,$t2 pxor $t3,$sigma # Sigma1(e) movdqa $a,$t3 psrld \$2,$t2 paddd $sigma,$Xi # Xi+=Sigma1(e) pxor $axb,$t1 # Ch(e,f,g) movdqa $b,$axb movdqa $a,$sigma pslld \$10,$t3 pxor $a,$axb # a^b, b^c in next round `"prefetcht0 63(@ptr[2])" if ($i==15)` psrld \$13,$sigma pxor $t3,$t2 paddd $t1,$Xi # Xi+=Ch(e,f,g) pslld \$19-10,$t3 pand $axb,$bxc pxor $sigma,$t2 `"prefetcht0 63(@ptr[3])" if ($i==15)` psrld \$22-13,$sigma pxor $t3,$t2 movdqa $b,$h pslld \$30-19,$t3 pxor $t2,$sigma pxor $bxc,$h # h=Maj(a,b,c)=Ch(a^b,c,b) paddd $Xi,$d # d+=Xi pxor $t3,$sigma # Sigma0(a) paddd $Xi,$h # h+=Xi paddd $sigma,$h # h+=Sigma0(a) ___ $code.=<<___ if (($i%8)==7); lea `32*8`($Tbl),$Tbl ___ ($axb,$bxc)=($bxc,$axb); } sub ROUND_16_XX { my $i=shift; $code.=<<___; movdqa `&Xi_off($i+1)`,$Xn paddd `&Xi_off($i+9)`,$Xi # Xi+=X[i+9] movdqa $Xn,$sigma movdqa $Xn,$t2 psrld \$3,$sigma movdqa $Xn,$t3 psrld \$7,$t2 movdqa `&Xi_off($i+14)`,$t1 pslld \$14,$t3 pxor $t2,$sigma psrld \$18-7,$t2 movdqa $t1,$axb # borrow $axb pxor $t3,$sigma pslld \$25-14,$t3 pxor $t2,$sigma psrld \$10,$t1 movdqa $axb,$t2 psrld \$17,$axb pxor $t3,$sigma # sigma0(X[i+1]) pslld \$13,$t2 paddd $sigma,$Xi # Xi+=sigma0(e) pxor $axb,$t1 psrld \$19-17,$axb pxor $t2,$t1 pslld \$15-13,$t2 pxor $axb,$t1 pxor $t2,$t1 # sigma0(X[i+14]) paddd $t1,$Xi # Xi+=sigma1(X[i+14]) ___ &ROUND_00_15($i,@_); ($Xi,$Xn)=($Xn,$Xi); } $code.=<<___; .text .extern OPENSSL_ia32cap_P .globl sha256_multi_block .type sha256_multi_block,\@function,3 .align 32 sha256_multi_block: mov OPENSSL_ia32cap_P+4(%rip),%rcx bt \$61,%rcx # check SHA bit jc _shaext_shortcut ___ $code.=<<___ if ($avx); test \$`1<<28`,%ecx jnz _avx_shortcut ___ $code.=<<___; mov %rsp,%rax push %rbx push %rbp ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp movaps %xmm6,(%rsp) movaps %xmm7,0x10(%rsp) movaps %xmm8,0x20(%rsp) movaps %xmm9,0x30(%rsp) movaps %xmm10,-0x78(%rax) movaps %xmm11,-0x68(%rax) movaps %xmm12,-0x58(%rax) movaps %xmm13,-0x48(%rax) movaps %xmm14,-0x38(%rax) movaps %xmm15,-0x28(%rax) ___ $code.=<<___; sub \$`$REG_SZ*18`, %rsp and \$-256,%rsp mov %rax,`$REG_SZ*17`(%rsp) # original %rsp .Lbody: lea K256+128(%rip),$Tbl lea `$REG_SZ*16`(%rsp),%rbx lea 0x80($ctx),$ctx # size optimization .Loop_grande: mov $num,`$REG_SZ*17+8`(%rsp) # original $num xor $num,$num ___ for($i=0;$i<4;$i++) { $code.=<<___; mov `16*$i+0`($inp),@ptr[$i] # input pointer mov `16*$i+8`($inp),%ecx # number of blocks cmp $num,%ecx cmovg %ecx,$num # find maximum test %ecx,%ecx mov %ecx,`4*$i`(%rbx) # initialize counters cmovle $Tbl,@ptr[$i] # cancel input ___ } $code.=<<___; test $num,$num jz .Ldone movdqu 0x00-0x80($ctx),$A # load context lea 128(%rsp),%rax movdqu 0x20-0x80($ctx),$B movdqu 0x40-0x80($ctx),$C movdqu 0x60-0x80($ctx),$D movdqu 0x80-0x80($ctx),$E movdqu 0xa0-0x80($ctx),$F movdqu 0xc0-0x80($ctx),$G movdqu 0xe0-0x80($ctx),$H movdqu .Lpbswap(%rip),$Xn jmp .Loop .align 32 .Loop: movdqa $C,$bxc pxor $B,$bxc # magic seed ___ for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); } $code.=<<___; movdqu `&Xi_off($i)`,$Xi mov \$3,%ecx jmp .Loop_16_xx .align 32 .Loop_16_xx: ___ for(;$i<32;$i++) { &ROUND_16_XX($i,@V); unshift(@V,pop(@V)); } $code.=<<___; dec %ecx jnz .Loop_16_xx mov \$1,%ecx lea K256+128(%rip),$Tbl movdqa (%rbx),$sigma # pull counters cmp 4*0(%rbx),%ecx # examine counters pxor $t1,$t1 cmovge $Tbl,@ptr[0] # cancel input cmp 4*1(%rbx),%ecx movdqa $sigma,$Xn cmovge $Tbl,@ptr[1] cmp 4*2(%rbx),%ecx pcmpgtd $t1,$Xn # mask value cmovge $Tbl,@ptr[2] cmp 4*3(%rbx),%ecx paddd $Xn,$sigma # counters-- cmovge $Tbl,@ptr[3] movdqu 0x00-0x80($ctx),$t1 pand $Xn,$A movdqu 0x20-0x80($ctx),$t2 pand $Xn,$B movdqu 0x40-0x80($ctx),$t3 pand $Xn,$C movdqu 0x60-0x80($ctx),$Xi pand $Xn,$D paddd $t1,$A movdqu 0x80-0x80($ctx),$t1 pand $Xn,$E paddd $t2,$B movdqu 0xa0-0x80($ctx),$t2 pand $Xn,$F paddd $t3,$C movdqu 0xc0-0x80($ctx),$t3 pand $Xn,$G paddd $Xi,$D movdqu 0xe0-0x80($ctx),$Xi pand $Xn,$H paddd $t1,$E paddd $t2,$F movdqu $A,0x00-0x80($ctx) paddd $t3,$G movdqu $B,0x20-0x80($ctx) paddd $Xi,$H movdqu $C,0x40-0x80($ctx) movdqu $D,0x60-0x80($ctx) movdqu $E,0x80-0x80($ctx) movdqu $F,0xa0-0x80($ctx) movdqu $G,0xc0-0x80($ctx) movdqu $H,0xe0-0x80($ctx) movdqa $sigma,(%rbx) # save counters movdqa .Lpbswap(%rip),$Xn dec $num jnz .Loop mov `$REG_SZ*17+8`(%rsp),$num lea $REG_SZ($ctx),$ctx lea `16*$REG_SZ/4`($inp),$inp dec $num jnz .Loop_grande .Ldone: mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp ___ $code.=<<___ if ($win64); movaps -0xb8(%rax),%xmm6 movaps -0xa8(%rax),%xmm7 movaps -0x98(%rax),%xmm8 movaps -0x88(%rax),%xmm9 movaps -0x78(%rax),%xmm10 movaps -0x68(%rax),%xmm11 movaps -0x58(%rax),%xmm12 movaps -0x48(%rax),%xmm13 movaps -0x38(%rax),%xmm14 movaps -0x28(%rax),%xmm15 ___ $code.=<<___; mov -16(%rax),%rbp mov -8(%rax),%rbx lea (%rax),%rsp .Lepilogue: ret .size sha256_multi_block,.-sha256_multi_block ___ {{{ my ($Wi,$TMP0,$TMP1,$TMPx,$ABEF0,$CDGH0,$ABEF1,$CDGH1)=map("%xmm$_",(0..3,12..15)); my @MSG0=map("%xmm$_",(4..7)); my @MSG1=map("%xmm$_",(8..11)); $code.=<<___; .type sha256_multi_block_shaext,\@function,3 .align 32 sha256_multi_block_shaext: _shaext_shortcut: mov %rsp,%rax push %rbx push %rbp ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp movaps %xmm6,(%rsp) movaps %xmm7,0x10(%rsp) movaps %xmm8,0x20(%rsp) movaps %xmm9,0x30(%rsp) movaps %xmm10,-0x78(%rax) movaps %xmm11,-0x68(%rax) movaps %xmm12,-0x58(%rax) movaps %xmm13,-0x48(%rax) movaps %xmm14,-0x38(%rax) movaps %xmm15,-0x28(%rax) ___ $code.=<<___; sub \$`$REG_SZ*18`,%rsp shl \$1,$num # we process pair at a time and \$-256,%rsp lea 0x80($ctx),$ctx # size optimization mov %rax,`$REG_SZ*17`(%rsp) # original %rsp .Lbody_shaext: lea `$REG_SZ*16`(%rsp),%rbx lea K256_shaext+0x80(%rip),$Tbl .Loop_grande_shaext: mov $num,`$REG_SZ*17+8`(%rsp) # orignal $num xor $num,$num ___ for($i=0;$i<2;$i++) { $code.=<<___; mov `16*$i+0`($inp),@ptr[$i] # input pointer mov `16*$i+8`($inp),%ecx # number of blocks cmp $num,%ecx cmovg %ecx,$num # find maximum test %ecx,%ecx mov %ecx,`4*$i`(%rbx) # initialize counters cmovle %rsp,@ptr[$i] # cancel input ___ } $code.=<<___; test $num,$num jz .Ldone_shaext movq 0x00-0x80($ctx),$ABEF0 # A1.A0 movq 0x20-0x80($ctx),@MSG0[0] # B1.B0 movq 0x40-0x80($ctx),$CDGH0 # C1.C0 movq 0x60-0x80($ctx),@MSG0[1] # D1.D0 movq 0x80-0x80($ctx),@MSG1[0] # E1.E0 movq 0xa0-0x80($ctx),@MSG1[1] # F1.F0 movq 0xc0-0x80($ctx),@MSG1[2] # G1.G0 movq 0xe0-0x80($ctx),@MSG1[3] # H1.H0 punpckldq @MSG0[0],$ABEF0 # B1.A1.B0.A0 punpckldq @MSG0[1],$CDGH0 # D1.C1.D0.C0 punpckldq @MSG1[1],@MSG1[0] # F1.E1.F0.E0 punpckldq @MSG1[3],@MSG1[2] # H1.G1.H0.G0 movdqa K256_shaext-0x10(%rip),$TMPx # byte swap movdqa $ABEF0,$ABEF1 movdqa $CDGH0,$CDGH1 punpcklqdq @MSG1[0],$ABEF0 # F0.E0.B0.A0 punpcklqdq @MSG1[2],$CDGH0 # H0.G0.D0.C0 punpckhqdq @MSG1[0],$ABEF1 # F1.E1.B1.A1 punpckhqdq @MSG1[2],$CDGH1 # H1.G1.D1.C1 pshufd \$0b00011011,$ABEF0,$ABEF0 pshufd \$0b00011011,$CDGH0,$CDGH0 pshufd \$0b00011011,$ABEF1,$ABEF1 pshufd \$0b00011011,$CDGH1,$CDGH1 jmp .Loop_shaext .align 32 .Loop_shaext: movdqu 0x00(@ptr[0]),@MSG0[0] movdqu 0x00(@ptr[1]),@MSG1[0] movdqu 0x10(@ptr[0]),@MSG0[1] movdqu 0x10(@ptr[1]),@MSG1[1] movdqu 0x20(@ptr[0]),@MSG0[2] pshufb $TMPx,@MSG0[0] movdqu 0x20(@ptr[1]),@MSG1[2] pshufb $TMPx,@MSG1[0] movdqu 0x30(@ptr[0]),@MSG0[3] lea 0x40(@ptr[0]),@ptr[0] movdqu 0x30(@ptr[1]),@MSG1[3] lea 0x40(@ptr[1]),@ptr[1] movdqa 0*16-0x80($Tbl),$Wi pshufb $TMPx,@MSG0[1] paddd @MSG0[0],$Wi pxor $ABEF0,@MSG0[0] # black magic movdqa $Wi,$TMP0 movdqa 0*16-0x80($Tbl),$TMP1 pshufb $TMPx,@MSG1[1] paddd @MSG1[0],$TMP1 movdqa $CDGH0,0x50(%rsp) # offload sha256rnds2 $ABEF0,$CDGH0 # 0-3 pxor $ABEF1,@MSG1[0] # black magic movdqa $TMP1,$Wi movdqa $CDGH1,0x70(%rsp) sha256rnds2 $ABEF1,$CDGH1 # 0-3 pshufd \$0x0e,$TMP0,$Wi pxor $ABEF0,@MSG0[0] # black magic movdqa $ABEF0,0x40(%rsp) # offload sha256rnds2 $CDGH0,$ABEF0 pshufd \$0x0e,$TMP1,$Wi pxor $ABEF1,@MSG1[0] # black magic movdqa $ABEF1,0x60(%rsp) movdqa 1*16-0x80($Tbl),$TMP0 paddd @MSG0[1],$TMP0 pshufb $TMPx,@MSG0[2] sha256rnds2 $CDGH1,$ABEF1 movdqa $TMP0,$Wi movdqa 1*16-0x80($Tbl),$TMP1 paddd @MSG1[1],$TMP1 sha256rnds2 $ABEF0,$CDGH0 # 4-7 movdqa $TMP1,$Wi prefetcht0 127(@ptr[0]) pshufb $TMPx,@MSG0[3] pshufb $TMPx,@MSG1[2] prefetcht0 127(@ptr[1]) sha256rnds2 $ABEF1,$CDGH1 # 4-7 pshufd \$0x0e,$TMP0,$Wi pshufb $TMPx,@MSG1[3] sha256msg1 @MSG0[1],@MSG0[0] sha256rnds2 $CDGH0,$ABEF0 pshufd \$0x0e,$TMP1,$Wi movdqa 2*16-0x80($Tbl),$TMP0 paddd @MSG0[2],$TMP0 sha256rnds2 $CDGH1,$ABEF1 movdqa $TMP0,$Wi movdqa 2*16-0x80($Tbl),$TMP1 paddd @MSG1[2],$TMP1 sha256rnds2 $ABEF0,$CDGH0 # 8-11 sha256msg1 @MSG1[1],@MSG1[0] movdqa $TMP1,$Wi movdqa @MSG0[3],$TMPx sha256rnds2 $ABEF1,$CDGH1 # 8-11 pshufd \$0x0e,$TMP0,$Wi palignr \$4,@MSG0[2],$TMPx paddd $TMPx,@MSG0[0] movdqa @MSG1[3],$TMPx palignr \$4,@MSG1[2],$TMPx sha256msg1 @MSG0[2],@MSG0[1] sha256rnds2 $CDGH0,$ABEF0 pshufd \$0x0e,$TMP1,$Wi movdqa 3*16-0x80($Tbl),$TMP0 paddd @MSG0[3],$TMP0 sha256rnds2 $CDGH1,$ABEF1 sha256msg1 @MSG1[2],@MSG1[1] movdqa $TMP0,$Wi movdqa 3*16-0x80($Tbl),$TMP1 paddd $TMPx,@MSG1[0] paddd @MSG1[3],$TMP1 sha256msg2 @MSG0[3],@MSG0[0] sha256rnds2 $ABEF0,$CDGH0 # 12-15 movdqa $TMP1,$Wi movdqa @MSG0[0],$TMPx palignr \$4,@MSG0[3],$TMPx sha256rnds2 $ABEF1,$CDGH1 # 12-15 sha256msg2 @MSG1[3],@MSG1[0] pshufd \$0x0e,$TMP0,$Wi paddd $TMPx,@MSG0[1] movdqa @MSG1[0],$TMPx palignr \$4,@MSG1[3],$TMPx sha256msg1 @MSG0[3],@MSG0[2] sha256rnds2 $CDGH0,$ABEF0 pshufd \$0x0e,$TMP1,$Wi movdqa 4*16-0x80($Tbl),$TMP0 paddd @MSG0[0],$TMP0 sha256rnds2 $CDGH1,$ABEF1 sha256msg1 @MSG1[3],@MSG1[2] ___ for($i=4;$i<16-3;$i++) { $code.=<<___; movdqa $TMP0,$Wi movdqa $i*16-0x80($Tbl),$TMP1 paddd $TMPx,@MSG1[1] paddd @MSG1[0],$TMP1 sha256msg2 @MSG0[0],@MSG0[1] sha256rnds2 $ABEF0,$CDGH0 # 16-19... movdqa $TMP1,$Wi movdqa @MSG0[1],$TMPx palignr \$4,@MSG0[0],$TMPx sha256rnds2 $ABEF1,$CDGH1 # 16-19... sha256msg2 @MSG1[0],@MSG1[1] pshufd \$0x0e,$TMP0,$Wi paddd $TMPx,@MSG0[2] movdqa @MSG1[1],$TMPx palignr \$4,@MSG1[0],$TMPx sha256msg1 @MSG0[0],@MSG0[3] sha256rnds2 $CDGH0,$ABEF0 pshufd \$0x0e,$TMP1,$Wi movdqa `($i+1)*16`-0x80($Tbl),$TMP0 paddd @MSG0[1],$TMP0 sha256rnds2 $CDGH1,$ABEF1 sha256msg1 @MSG1[0],@MSG1[3] ___ push(@MSG0,shift(@MSG0)); push(@MSG1,shift(@MSG1)); } $code.=<<___; movdqa $TMP0,$Wi movdqa 13*16-0x80($Tbl),$TMP1 paddd $TMPx,@MSG1[1] paddd @MSG1[0],$TMP1 sha256msg2 @MSG0[0],@MSG0[1] sha256rnds2 $ABEF0,$CDGH0 # 52-55 movdqa $TMP1,$Wi movdqa @MSG0[1],$TMPx palignr \$4,@MSG0[0],$TMPx sha256rnds2 $ABEF1,$CDGH1 # 52-55 sha256msg2 @MSG1[0],@MSG1[1] pshufd \$0x0e,$TMP0,$Wi paddd $TMPx,@MSG0[2] movdqa @MSG1[1],$TMPx palignr \$4,@MSG1[0],$TMPx nop sha256rnds2 $CDGH0,$ABEF0 pshufd \$0x0e,$TMP1,$Wi movdqa 14*16-0x80($Tbl),$TMP0 paddd @MSG0[1],$TMP0 sha256rnds2 $CDGH1,$ABEF1 movdqa $TMP0,$Wi movdqa 14*16-0x80($Tbl),$TMP1 paddd $TMPx,@MSG1[2] paddd @MSG1[1],$TMP1 sha256msg2 @MSG0[1],@MSG0[2] nop sha256rnds2 $ABEF0,$CDGH0 # 56-59 movdqa $TMP1,$Wi mov \$1,%ecx pxor @MSG0[1],@MSG0[1] # zero sha256rnds2 $ABEF1,$CDGH1 # 56-59 sha256msg2 @MSG1[1],@MSG1[2] pshufd \$0x0e,$TMP0,$Wi movdqa 15*16-0x80($Tbl),$TMP0 paddd @MSG0[2],$TMP0 movq (%rbx),@MSG0[2] # pull counters nop sha256rnds2 $CDGH0,$ABEF0 pshufd \$0x0e,$TMP1,$Wi movdqa 15*16-0x80($Tbl),$TMP1 paddd @MSG1[2],$TMP1 sha256rnds2 $CDGH1,$ABEF1 movdqa $TMP0,$Wi cmp 4*0(%rbx),%ecx # examine counters cmovge %rsp,@ptr[0] # cancel input cmp 4*1(%rbx),%ecx cmovge %rsp,@ptr[1] pshufd \$0x00,@MSG0[2],@MSG1[0] sha256rnds2 $ABEF0,$CDGH0 # 60-63 movdqa $TMP1,$Wi pshufd \$0x55,@MSG0[2],@MSG1[1] movdqa @MSG0[2],@MSG1[2] sha256rnds2 $ABEF1,$CDGH1 # 60-63 pshufd \$0x0e,$TMP0,$Wi pcmpgtd @MSG0[1],@MSG1[0] pcmpgtd @MSG0[1],@MSG1[1] sha256rnds2 $CDGH0,$ABEF0 pshufd \$0x0e,$TMP1,$Wi pcmpgtd @MSG0[1],@MSG1[2] # counter mask movdqa K256_shaext-0x10(%rip),$TMPx sha256rnds2 $CDGH1,$ABEF1 pand @MSG1[0],$CDGH0 pand @MSG1[1],$CDGH1 pand @MSG1[0],$ABEF0 pand @MSG1[1],$ABEF1 paddd @MSG0[2],@MSG1[2] # counters-- paddd 0x50(%rsp),$CDGH0 paddd 0x70(%rsp),$CDGH1 paddd 0x40(%rsp),$ABEF0 paddd 0x60(%rsp),$ABEF1 movq @MSG1[2],(%rbx) # save counters dec $num jnz .Loop_shaext mov `$REG_SZ*17+8`(%rsp),$num pshufd \$0b00011011,$ABEF0,$ABEF0 pshufd \$0b00011011,$CDGH0,$CDGH0 pshufd \$0b00011011,$ABEF1,$ABEF1 pshufd \$0b00011011,$CDGH1,$CDGH1 movdqa $ABEF0,@MSG0[0] movdqa $CDGH0,@MSG0[1] punpckldq $ABEF1,$ABEF0 # B1.B0.A1.A0 punpckhdq $ABEF1,@MSG0[0] # F1.F0.E1.E0 punpckldq $CDGH1,$CDGH0 # D1.D0.C1.C0 punpckhdq $CDGH1,@MSG0[1] # H1.H0.G1.G0 movq $ABEF0,0x00-0x80($ctx) # A1.A0 psrldq \$8,$ABEF0 movq @MSG0[0],0x80-0x80($ctx) # E1.E0 psrldq \$8,@MSG0[0] movq $ABEF0,0x20-0x80($ctx) # B1.B0 movq @MSG0[0],0xa0-0x80($ctx) # F1.F0 movq $CDGH0,0x40-0x80($ctx) # C1.C0 psrldq \$8,$CDGH0 movq @MSG0[1],0xc0-0x80($ctx) # G1.G0 psrldq \$8,@MSG0[1] movq $CDGH0,0x60-0x80($ctx) # D1.D0 movq @MSG0[1],0xe0-0x80($ctx) # H1.H0 lea `$REG_SZ/2`($ctx),$ctx lea `16*2`($inp),$inp dec $num jnz .Loop_grande_shaext .Ldone_shaext: #mov `$REG_SZ*17`(%rsp),%rax # original %rsp ___ $code.=<<___ if ($win64); movaps -0xb8(%rax),%xmm6 movaps -0xa8(%rax),%xmm7 movaps -0x98(%rax),%xmm8 movaps -0x88(%rax),%xmm9 movaps -0x78(%rax),%xmm10 movaps -0x68(%rax),%xmm11 movaps -0x58(%rax),%xmm12 movaps -0x48(%rax),%xmm13 movaps -0x38(%rax),%xmm14 movaps -0x28(%rax),%xmm15 ___ $code.=<<___; mov -16(%rax),%rbp mov -8(%rax),%rbx lea (%rax),%rsp .Lepilogue_shaext: ret .size sha256_multi_block_shaext,.-sha256_multi_block_shaext ___ }}} if ($avx) {{{ sub ROUND_00_15_avx { my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; $code.=<<___ if ($i<15 && $REG_SZ==16); vmovd `4*$i`(@ptr[0]),$Xi vmovd `4*$i`(@ptr[1]),$t1 vpinsrd \$1,`4*$i`(@ptr[2]),$Xi,$Xi vpinsrd \$1,`4*$i`(@ptr[3]),$t1,$t1 vpunpckldq $t1,$Xi,$Xi vpshufb $Xn,$Xi,$Xi ___ $code.=<<___ if ($i==15 && $REG_SZ==16); vmovd `4*$i`(@ptr[0]),$Xi lea `16*4`(@ptr[0]),@ptr[0] vmovd `4*$i`(@ptr[1]),$t1 lea `16*4`(@ptr[1]),@ptr[1] vpinsrd \$1,`4*$i`(@ptr[2]),$Xi,$Xi lea `16*4`(@ptr[2]),@ptr[2] vpinsrd \$1,`4*$i`(@ptr[3]),$t1,$t1 lea `16*4`(@ptr[3]),@ptr[3] vpunpckldq $t1,$Xi,$Xi vpshufb $Xn,$Xi,$Xi ___ $code.=<<___ if ($i<15 && $REG_SZ==32); vmovd `4*$i`(@ptr[0]),$Xi vmovd `4*$i`(@ptr[4]),$t1 vmovd `4*$i`(@ptr[1]),$t2 vmovd `4*$i`(@ptr[5]),$t3 vpinsrd \$1,`4*$i`(@ptr[2]),$Xi,$Xi vpinsrd \$1,`4*$i`(@ptr[6]),$t1,$t1 vpinsrd \$1,`4*$i`(@ptr[3]),$t2,$t2 vpunpckldq $t2,$Xi,$Xi vpinsrd \$1,`4*$i`(@ptr[7]),$t3,$t3 vpunpckldq $t3,$t1,$t1 vinserti128 $t1,$Xi,$Xi vpshufb $Xn,$Xi,$Xi ___ $code.=<<___ if ($i==15 && $REG_SZ==32); vmovd `4*$i`(@ptr[0]),$Xi lea `16*4`(@ptr[0]),@ptr[0] vmovd `4*$i`(@ptr[4]),$t1 lea `16*4`(@ptr[4]),@ptr[4] vmovd `4*$i`(@ptr[1]),$t2 lea `16*4`(@ptr[1]),@ptr[1] vmovd `4*$i`(@ptr[5]),$t3 lea `16*4`(@ptr[5]),@ptr[5] vpinsrd \$1,`4*$i`(@ptr[2]),$Xi,$Xi lea `16*4`(@ptr[2]),@ptr[2] vpinsrd \$1,`4*$i`(@ptr[6]),$t1,$t1 lea `16*4`(@ptr[6]),@ptr[6] vpinsrd \$1,`4*$i`(@ptr[3]),$t2,$t2 lea `16*4`(@ptr[3]),@ptr[3] vpunpckldq $t2,$Xi,$Xi vpinsrd \$1,`4*$i`(@ptr[7]),$t3,$t3 lea `16*4`(@ptr[7]),@ptr[7] vpunpckldq $t3,$t1,$t1 vinserti128 $t1,$Xi,$Xi vpshufb $Xn,$Xi,$Xi ___ $code.=<<___; vpsrld \$6,$e,$sigma vpslld \$26,$e,$t3 vmovdqu $Xi,`&Xi_off($i)` vpaddd $h,$Xi,$Xi # Xi+=h vpsrld \$11,$e,$t2 vpxor $t3,$sigma,$sigma vpslld \$21,$e,$t3 vpaddd `32*($i%8)-128`($Tbl),$Xi,$Xi # Xi+=K[round] vpxor $t2,$sigma,$sigma vpsrld \$25,$e,$t2 vpxor $t3,$sigma,$sigma `"prefetcht0 63(@ptr[0])" if ($i==15)` vpslld \$7,$e,$t3 vpandn $g,$e,$t1 vpand $f,$e,$axb # borrow $axb `"prefetcht0 63(@ptr[1])" if ($i==15)` vpxor $t2,$sigma,$sigma vpsrld \$2,$a,$h # borrow $h vpxor $t3,$sigma,$sigma # Sigma1(e) `"prefetcht0 63(@ptr[2])" if ($i==15)` vpslld \$30,$a,$t2 vpxor $axb,$t1,$t1 # Ch(e,f,g) vpxor $a,$b,$axb # a^b, b^c in next round `"prefetcht0 63(@ptr[3])" if ($i==15)` vpxor $t2,$h,$h vpaddd $sigma,$Xi,$Xi # Xi+=Sigma1(e) vpsrld \$13,$a,$t2 `"prefetcht0 63(@ptr[4])" if ($i==15 && $REG_SZ==32)` vpslld \$19,$a,$t3 vpaddd $t1,$Xi,$Xi # Xi+=Ch(e,f,g) vpand $axb,$bxc,$bxc `"prefetcht0 63(@ptr[5])" if ($i==15 && $REG_SZ==32)` vpxor $t2,$h,$sigma vpsrld \$22,$a,$t2 vpxor $t3,$sigma,$sigma `"prefetcht0 63(@ptr[6])" if ($i==15 && $REG_SZ==32)` vpslld \$10,$a,$t3 vpxor $bxc,$b,$h # h=Maj(a,b,c)=Ch(a^b,c,b) vpaddd $Xi,$d,$d # d+=Xi `"prefetcht0 63(@ptr[7])" if ($i==15 && $REG_SZ==32)` vpxor $t2,$sigma,$sigma vpxor $t3,$sigma,$sigma # Sigma0(a) vpaddd $Xi,$h,$h # h+=Xi vpaddd $sigma,$h,$h # h+=Sigma0(a) ___ $code.=<<___ if (($i%8)==7); add \$`32*8`,$Tbl ___ ($axb,$bxc)=($bxc,$axb); } sub ROUND_16_XX_avx { my $i=shift; $code.=<<___; vmovdqu `&Xi_off($i+1)`,$Xn vpaddd `&Xi_off($i+9)`,$Xi,$Xi # Xi+=X[i+9] vpsrld \$3,$Xn,$sigma vpsrld \$7,$Xn,$t2 vpslld \$25,$Xn,$t3 vpxor $t2,$sigma,$sigma vpsrld \$18,$Xn,$t2 vpxor $t3,$sigma,$sigma vpslld \$14,$Xn,$t3 vmovdqu `&Xi_off($i+14)`,$t1 vpsrld \$10,$t1,$axb # borrow $axb vpxor $t2,$sigma,$sigma vpsrld \$17,$t1,$t2 vpxor $t3,$sigma,$sigma # sigma0(X[i+1]) vpslld \$15,$t1,$t3 vpaddd $sigma,$Xi,$Xi # Xi+=sigma0(e) vpxor $t2,$axb,$sigma vpsrld \$19,$t1,$t2 vpxor $t3,$sigma,$sigma vpslld \$13,$t1,$t3 vpxor $t2,$sigma,$sigma vpxor $t3,$sigma,$sigma # sigma0(X[i+14]) vpaddd $sigma,$Xi,$Xi # Xi+=sigma1(X[i+14]) ___ &ROUND_00_15_avx($i,@_); ($Xi,$Xn)=($Xn,$Xi); } $code.=<<___; .type sha256_multi_block_avx,\@function,3 .align 32 sha256_multi_block_avx: _avx_shortcut: ___ $code.=<<___ if ($avx>1); shr \$32,%rcx cmp \$2,$num jb .Lavx test \$`1<<5`,%ecx jnz _avx2_shortcut jmp .Lavx .align 32 .Lavx: ___ $code.=<<___; mov %rsp,%rax push %rbx push %rbp ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp movaps %xmm6,(%rsp) movaps %xmm7,0x10(%rsp) movaps %xmm8,0x20(%rsp) movaps %xmm9,0x30(%rsp) movaps %xmm10,-0x78(%rax) movaps %xmm11,-0x68(%rax) movaps %xmm12,-0x58(%rax) movaps %xmm13,-0x48(%rax) movaps %xmm14,-0x38(%rax) movaps %xmm15,-0x28(%rax) ___ $code.=<<___; sub \$`$REG_SZ*18`, %rsp and \$-256,%rsp mov %rax,`$REG_SZ*17`(%rsp) # original %rsp .Lbody_avx: lea K256+128(%rip),$Tbl lea `$REG_SZ*16`(%rsp),%rbx lea 0x80($ctx),$ctx # size optimization .Loop_grande_avx: mov $num,`$REG_SZ*17+8`(%rsp) # original $num xor $num,$num ___ for($i=0;$i<4;$i++) { $code.=<<___; mov `16*$i+0`($inp),@ptr[$i] # input pointer mov `16*$i+8`($inp),%ecx # number of blocks cmp $num,%ecx cmovg %ecx,$num # find maximum test %ecx,%ecx mov %ecx,`4*$i`(%rbx) # initialize counters cmovle $Tbl,@ptr[$i] # cancel input ___ } $code.=<<___; test $num,$num jz .Ldone_avx vmovdqu 0x00-0x80($ctx),$A # load context lea 128(%rsp),%rax vmovdqu 0x20-0x80($ctx),$B vmovdqu 0x40-0x80($ctx),$C vmovdqu 0x60-0x80($ctx),$D vmovdqu 0x80-0x80($ctx),$E vmovdqu 0xa0-0x80($ctx),$F vmovdqu 0xc0-0x80($ctx),$G vmovdqu 0xe0-0x80($ctx),$H vmovdqu .Lpbswap(%rip),$Xn jmp .Loop_avx .align 32 .Loop_avx: vpxor $B,$C,$bxc # magic seed ___ for($i=0;$i<16;$i++) { &ROUND_00_15_avx($i,@V); unshift(@V,pop(@V)); } $code.=<<___; vmovdqu `&Xi_off($i)`,$Xi mov \$3,%ecx jmp .Loop_16_xx_avx .align 32 .Loop_16_xx_avx: ___ for(;$i<32;$i++) { &ROUND_16_XX_avx($i,@V); unshift(@V,pop(@V)); } $code.=<<___; dec %ecx jnz .Loop_16_xx_avx mov \$1,%ecx lea K256+128(%rip),$Tbl ___ for($i=0;$i<4;$i++) { $code.=<<___; cmp `4*$i`(%rbx),%ecx # examine counters cmovge $Tbl,@ptr[$i] # cancel input ___ } $code.=<<___; vmovdqa (%rbx),$sigma # pull counters vpxor $t1,$t1,$t1 vmovdqa $sigma,$Xn vpcmpgtd $t1,$Xn,$Xn # mask value vpaddd $Xn,$sigma,$sigma # counters-- vmovdqu 0x00-0x80($ctx),$t1 vpand $Xn,$A,$A vmovdqu 0x20-0x80($ctx),$t2 vpand $Xn,$B,$B vmovdqu 0x40-0x80($ctx),$t3 vpand $Xn,$C,$C vmovdqu 0x60-0x80($ctx),$Xi vpand $Xn,$D,$D vpaddd $t1,$A,$A vmovdqu 0x80-0x80($ctx),$t1 vpand $Xn,$E,$E vpaddd $t2,$B,$B vmovdqu 0xa0-0x80($ctx),$t2 vpand $Xn,$F,$F vpaddd $t3,$C,$C vmovdqu 0xc0-0x80($ctx),$t3 vpand $Xn,$G,$G vpaddd $Xi,$D,$D vmovdqu 0xe0-0x80($ctx),$Xi vpand $Xn,$H,$H vpaddd $t1,$E,$E vpaddd $t2,$F,$F vmovdqu $A,0x00-0x80($ctx) vpaddd $t3,$G,$G vmovdqu $B,0x20-0x80($ctx) vpaddd $Xi,$H,$H vmovdqu $C,0x40-0x80($ctx) vmovdqu $D,0x60-0x80($ctx) vmovdqu $E,0x80-0x80($ctx) vmovdqu $F,0xa0-0x80($ctx) vmovdqu $G,0xc0-0x80($ctx) vmovdqu $H,0xe0-0x80($ctx) vmovdqu $sigma,(%rbx) # save counters vmovdqu .Lpbswap(%rip),$Xn dec $num jnz .Loop_avx mov `$REG_SZ*17+8`(%rsp),$num lea $REG_SZ($ctx),$ctx lea `16*$REG_SZ/4`($inp),$inp dec $num jnz .Loop_grande_avx .Ldone_avx: mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp vzeroupper ___ $code.=<<___ if ($win64); movaps -0xb8(%rax),%xmm6 movaps -0xa8(%rax),%xmm7 movaps -0x98(%rax),%xmm8 movaps -0x88(%rax),%xmm9 movaps -0x78(%rax),%xmm10 movaps -0x68(%rax),%xmm11 movaps -0x58(%rax),%xmm12 movaps -0x48(%rax),%xmm13 movaps -0x38(%rax),%xmm14 movaps -0x28(%rax),%xmm15 ___ $code.=<<___; mov -16(%rax),%rbp mov -8(%rax),%rbx lea (%rax),%rsp .Lepilogue_avx: ret .size sha256_multi_block_avx,.-sha256_multi_block_avx ___ if ($avx>1) { $code =~ s/\`([^\`]*)\`/eval $1/gem; $REG_SZ=32; @ptr=map("%r$_",(12..15,8..11)); @V=($A,$B,$C,$D,$E,$F,$G,$H)=map("%ymm$_",(8..15)); ($t1,$t2,$t3,$axb,$bxc,$Xi,$Xn,$sigma)=map("%ymm$_",(0..7)); $code.=<<___; .type sha256_multi_block_avx2,\@function,3 .align 32 sha256_multi_block_avx2: _avx2_shortcut: mov %rsp,%rax push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp movaps %xmm6,(%rsp) movaps %xmm7,0x10(%rsp) movaps %xmm8,0x20(%rsp) movaps %xmm9,0x30(%rsp) movaps %xmm10,0x40(%rsp) movaps %xmm11,0x50(%rsp) movaps %xmm12,-0x78(%rax) movaps %xmm13,-0x68(%rax) movaps %xmm14,-0x58(%rax) movaps %xmm15,-0x48(%rax) ___ $code.=<<___; sub \$`$REG_SZ*18`, %rsp and \$-256,%rsp mov %rax,`$REG_SZ*17`(%rsp) # original %rsp .Lbody_avx2: lea K256+128(%rip),$Tbl lea 0x80($ctx),$ctx # size optimization .Loop_grande_avx2: mov $num,`$REG_SZ*17+8`(%rsp) # original $num xor $num,$num lea `$REG_SZ*16`(%rsp),%rbx ___ for($i=0;$i<8;$i++) { $code.=<<___; mov `16*$i+0`($inp),@ptr[$i] # input pointer mov `16*$i+8`($inp),%ecx # number of blocks cmp $num,%ecx cmovg %ecx,$num # find maximum test %ecx,%ecx mov %ecx,`4*$i`(%rbx) # initialize counters cmovle $Tbl,@ptr[$i] # cancel input ___ } $code.=<<___; vmovdqu 0x00-0x80($ctx),$A # load context lea 128(%rsp),%rax vmovdqu 0x20-0x80($ctx),$B lea 256+128(%rsp),%rbx vmovdqu 0x40-0x80($ctx),$C vmovdqu 0x60-0x80($ctx),$D vmovdqu 0x80-0x80($ctx),$E vmovdqu 0xa0-0x80($ctx),$F vmovdqu 0xc0-0x80($ctx),$G vmovdqu 0xe0-0x80($ctx),$H vmovdqu .Lpbswap(%rip),$Xn jmp .Loop_avx2 .align 32 .Loop_avx2: vpxor $B,$C,$bxc # magic seed ___ for($i=0;$i<16;$i++) { &ROUND_00_15_avx($i,@V); unshift(@V,pop(@V)); } $code.=<<___; vmovdqu `&Xi_off($i)`,$Xi mov \$3,%ecx jmp .Loop_16_xx_avx2 .align 32 .Loop_16_xx_avx2: ___ for(;$i<32;$i++) { &ROUND_16_XX_avx($i,@V); unshift(@V,pop(@V)); } $code.=<<___; dec %ecx jnz .Loop_16_xx_avx2 mov \$1,%ecx lea `$REG_SZ*16`(%rsp),%rbx lea K256+128(%rip),$Tbl ___ for($i=0;$i<8;$i++) { $code.=<<___; cmp `4*$i`(%rbx),%ecx # examine counters cmovge $Tbl,@ptr[$i] # cancel input ___ } $code.=<<___; vmovdqa (%rbx),$sigma # pull counters vpxor $t1,$t1,$t1 vmovdqa $sigma,$Xn vpcmpgtd $t1,$Xn,$Xn # mask value vpaddd $Xn,$sigma,$sigma # counters-- vmovdqu 0x00-0x80($ctx),$t1 vpand $Xn,$A,$A vmovdqu 0x20-0x80($ctx),$t2 vpand $Xn,$B,$B vmovdqu 0x40-0x80($ctx),$t3 vpand $Xn,$C,$C vmovdqu 0x60-0x80($ctx),$Xi vpand $Xn,$D,$D vpaddd $t1,$A,$A vmovdqu 0x80-0x80($ctx),$t1 vpand $Xn,$E,$E vpaddd $t2,$B,$B vmovdqu 0xa0-0x80($ctx),$t2 vpand $Xn,$F,$F vpaddd $t3,$C,$C vmovdqu 0xc0-0x80($ctx),$t3 vpand $Xn,$G,$G vpaddd $Xi,$D,$D vmovdqu 0xe0-0x80($ctx),$Xi vpand $Xn,$H,$H vpaddd $t1,$E,$E vpaddd $t2,$F,$F vmovdqu $A,0x00-0x80($ctx) vpaddd $t3,$G,$G vmovdqu $B,0x20-0x80($ctx) vpaddd $Xi,$H,$H vmovdqu $C,0x40-0x80($ctx) vmovdqu $D,0x60-0x80($ctx) vmovdqu $E,0x80-0x80($ctx) vmovdqu $F,0xa0-0x80($ctx) vmovdqu $G,0xc0-0x80($ctx) vmovdqu $H,0xe0-0x80($ctx) vmovdqu $sigma,(%rbx) # save counters lea 256+128(%rsp),%rbx vmovdqu .Lpbswap(%rip),$Xn dec $num jnz .Loop_avx2 #mov `$REG_SZ*17+8`(%rsp),$num #lea $REG_SZ($ctx),$ctx #lea `16*$REG_SZ/4`($inp),$inp #dec $num #jnz .Loop_grande_avx2 .Ldone_avx2: mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp vzeroupper ___ $code.=<<___ if ($win64); movaps -0xd8(%rax),%xmm6 movaps -0xc8(%rax),%xmm7 movaps -0xb8(%rax),%xmm8 movaps -0xa8(%rax),%xmm9 movaps -0x98(%rax),%xmm10 movaps -0x88(%rax),%xmm11 movaps -0x78(%rax),%xmm12 movaps -0x68(%rax),%xmm13 movaps -0x58(%rax),%xmm14 movaps -0x48(%rax),%xmm15 ___ $code.=<<___; mov -48(%rax),%r15 mov -40(%rax),%r14 mov -32(%rax),%r13 mov -24(%rax),%r12 mov -16(%rax),%rbp mov -8(%rax),%rbx lea (%rax),%rsp .Lepilogue_avx2: ret .size sha256_multi_block_avx2,.-sha256_multi_block_avx2 ___ } }}} $code.=<<___; .align 256 K256: ___ sub TABLE { foreach (@_) { $code.=<<___; .long $_,$_,$_,$_ .long $_,$_,$_,$_ ___ } } &TABLE( 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5, 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5, 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3, 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174, 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc, 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da, 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7, 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967, 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13, 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85, 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3, 0xd192e819,0xd6990624,0xf40e3585,0x106aa070, 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5, 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3, 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208, 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 ); $code.=<<___; .Lpbswap: .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f # pbswap .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f # pbswap K256_shaext: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .asciz "SHA256 multi-block transform for x86_64, CRYPTOGAMS by " ___ if ($win64) { # EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, # CONTEXT *context,DISPATCHER_CONTEXT *disp) $rec="%rcx"; $frame="%rdx"; $context="%r8"; $disp="%r9"; $code.=<<___; .extern __imp_RtlVirtualUnwind .type se_handler,\@abi-omnipotent .align 16 se_handler: push %rsi push %rdi push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 pushfq sub \$64,%rsp mov 120($context),%rax # pull context->Rax mov 248($context),%rbx # pull context->Rip mov 8($disp),%rsi # disp->ImageBase mov 56($disp),%r11 # disp->HandlerData mov 0(%r11),%r10d # HandlerData[0] lea (%rsi,%r10),%r10 # end of prologue label cmp %r10,%rbx # context->Rip<.Lbody jb .Lin_prologue mov 152($context),%rax # pull context->Rsp mov 4(%r11),%r10d # HandlerData[1] lea (%rsi,%r10),%r10 # epilogue label cmp %r10,%rbx # context->Rip>=.Lepilogue jae .Lin_prologue mov `16*17`(%rax),%rax # pull saved stack pointer mov -8(%rax),%rbx mov -16(%rax),%rbp mov %rbx,144($context) # restore context->Rbx mov %rbp,160($context) # restore context->Rbp lea -24-10*16(%rax),%rsi lea 512($context),%rdi # &context.Xmm6 mov \$20,%ecx .long 0xa548f3fc # cld; rep movsq .Lin_prologue: mov 8(%rax),%rdi mov 16(%rax),%rsi mov %rax,152($context) # restore context->Rsp mov %rsi,168($context) # restore context->Rsi mov %rdi,176($context) # restore context->Rdi mov 40($disp),%rdi # disp->ContextRecord mov $context,%rsi # context mov \$154,%ecx # sizeof(CONTEXT) .long 0xa548f3fc # cld; rep movsq mov $disp,%rsi xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER mov 8(%rsi),%rdx # arg2, disp->ImageBase mov 0(%rsi),%r8 # arg3, disp->ControlPc mov 16(%rsi),%r9 # arg4, disp->FunctionEntry mov 40(%rsi),%r10 # disp->ContextRecord lea 56(%rsi),%r11 # &disp->HandlerData lea 24(%rsi),%r12 # &disp->EstablisherFrame mov %r10,32(%rsp) # arg5 mov %r11,40(%rsp) # arg6 mov %r12,48(%rsp) # arg7 mov %rcx,56(%rsp) # arg8, (NULL) call *__imp_RtlVirtualUnwind(%rip) mov \$1,%eax # ExceptionContinueSearch add \$64,%rsp popfq pop %r15 pop %r14 pop %r13 pop %r12 pop %rbp pop %rbx pop %rdi pop %rsi ret .size se_handler,.-se_handler ___ $code.=<<___ if ($avx>1); .type avx2_handler,\@abi-omnipotent .align 16 avx2_handler: push %rsi push %rdi push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 pushfq sub \$64,%rsp mov 120($context),%rax # pull context->Rax mov 248($context),%rbx # pull context->Rip mov 8($disp),%rsi # disp->ImageBase mov 56($disp),%r11 # disp->HandlerData mov 0(%r11),%r10d # HandlerData[0] lea (%rsi,%r10),%r10 # end of prologue label cmp %r10,%rbx # context->RipRsp mov 4(%r11),%r10d # HandlerData[1] lea (%rsi,%r10),%r10 # epilogue label cmp %r10,%rbx # context->Rip>=epilogue label jae .Lin_prologue mov `32*17`($context),%rax # pull saved stack pointer mov -8(%rax),%rbx mov -16(%rax),%rbp mov -24(%rax),%r12 mov -32(%rax),%r13 mov -40(%rax),%r14 mov -48(%rax),%r15 mov %rbx,144($context) # restore context->Rbx mov %rbp,160($context) # restore context->Rbp mov %r12,216($context) # restore cotnext->R12 mov %r13,224($context) # restore cotnext->R13 mov %r14,232($context) # restore cotnext->R14 mov %r15,240($context) # restore cotnext->R15 lea -56-10*16(%rax),%rsi lea 512($context),%rdi # &context.Xmm6 mov \$20,%ecx .long 0xa548f3fc # cld; rep movsq jmp .Lin_prologue .size avx2_handler,.-avx2_handler ___ $code.=<<___; .section .pdata .align 4 .rva .LSEH_begin_sha256_multi_block .rva .LSEH_end_sha256_multi_block .rva .LSEH_info_sha256_multi_block .rva .LSEH_begin_sha256_multi_block_shaext .rva .LSEH_end_sha256_multi_block_shaext .rva .LSEH_info_sha256_multi_block_shaext ___ $code.=<<___ if ($avx); .rva .LSEH_begin_sha256_multi_block_avx .rva .LSEH_end_sha256_multi_block_avx .rva .LSEH_info_sha256_multi_block_avx ___ $code.=<<___ if ($avx>1); .rva .LSEH_begin_sha256_multi_block_avx2 .rva .LSEH_end_sha256_multi_block_avx2 .rva .LSEH_info_sha256_multi_block_avx2 ___ $code.=<<___; .section .xdata .align 8 .LSEH_info_sha256_multi_block: .byte 9,0,0,0 .rva se_handler .rva .Lbody,.Lepilogue # HandlerData[] .LSEH_info_sha256_multi_block_shaext: .byte 9,0,0,0 .rva se_handler .rva .Lbody_shaext,.Lepilogue_shaext # HandlerData[] ___ $code.=<<___ if ($avx); .LSEH_info_sha256_multi_block_avx: .byte 9,0,0,0 .rva se_handler .rva .Lbody_avx,.Lepilogue_avx # HandlerData[] ___ $code.=<<___ if ($avx>1); .LSEH_info_sha256_multi_block_avx2: .byte 9,0,0,0 .rva avx2_handler .rva .Lbody_avx2,.Lepilogue_avx2 # HandlerData[] ___ } #################################################################### sub rex { local *opcode=shift; my ($dst,$src)=@_; my $rex=0; $rex|=0x04 if ($dst>=8); $rex|=0x01 if ($src>=8); unshift @opcode,$rex|0x40 if ($rex); } sub sha256op38 { my $instr = shift; my %opcodelet = ( "sha256rnds2" => 0xcb, "sha256msg1" => 0xcc, "sha256msg2" => 0xcd ); if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) { my @opcode=(0x0f,0x38); rex(\@opcode,$2,$1); push @opcode,$opcodelet{$instr}; push @opcode,0xc0|($1&7)|(($2&7)<<3); # ModR/M return ".byte\t".join(',',@opcode); } else { return $instr."\t".@_[0]; } } foreach (split("\n",$code)) { s/\`([^\`]*)\`/eval($1)/ge; s/\b(sha256[^\s]*)\s+(.*)/sha256op38($1,$2)/geo or s/\b(vmov[dq])\b(.+)%ymm([0-9]+)/$1$2%xmm$3/go or s/\b(vmovdqu)\b(.+)%x%ymm([0-9]+)/$1$2%xmm$3/go or s/\b(vpinsr[qd])\b(.+)%ymm([0-9]+),%ymm([0-9]+)/$1$2%xmm$3,%xmm$4/go or s/\b(vpextr[qd])\b(.+)%ymm([0-9]+)/$1$2%xmm$3/go or s/\b(vinserti128)\b(\s+)%ymm/$1$2\$1,%xmm/go or s/\b(vpbroadcast[qd]\s+)%ymm([0-9]+)/$1%xmm$2/go; print $_,"\n"; } close STDOUT; libntru-0.5/src/sph_sha1.h000066400000000000000000000110461271556312200155110ustar00rootroot00000000000000/* $Id: sph_sha1.h 216 2010-06-08 09:46:57Z tp $ */ /** * SHA-1 interface. * * SHA-1 is described in FIPS 180-1 (now superseded by FIPS 180-2, but the * description of SHA-1 is still included and has not changed). FIPS * standards can be found at: http://csrc.nist.gov/publications/fips/ * * @warning A theoretical collision attack against SHA-1, with work * factor 2^63, has been published. SHA-1 should not be used in new * protocol designs. * * ==========================(LICENSE BEGIN)============================ * * Copyright (c) 2007-2010 Projet RNRT SAPHIR * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * ===========================(LICENSE END)============================= * * @file sph_sha1.h * @author Thomas Pornin */ #ifndef SPH_SHA1_H__ #define SPH_SHA1_H__ #include #include "sph_types.h" /** * Output size (in bits) for SHA-1. */ #define SPH_SIZE_sha1 160 /** * This structure is a context for SHA-1 computations: it contains the * intermediate values and some data from the last entered block. Once * a SHA-1 computation has been performed, the context can be reused for * another computation. * * The contents of this structure are private. A running SHA-1 computation * can be cloned by copying the context (e.g. with a simple * memcpy()). */ typedef struct { #ifndef DOXYGEN_IGNORE unsigned char buf[64]; /* first field, for alignment */ sph_u32 val[5]; #if SPH_64 sph_u64 count; #else sph_u32 count_high, count_low; #endif #endif } sph_sha1_context; /** * Initialize a SHA-1 context. This process performs no memory allocation. * * @param cc the SHA-1 context (pointer to a sph_sha1_context) */ void sph_sha1_init(void *cc); /** * Process some data bytes. It is acceptable that len is zero * (in which case this function does nothing). * * @param cc the SHA-1 context * @param data the input data * @param len the input data length (in bytes) */ void sph_sha1(void *cc, const void *data, size_t len); /** * Terminate the current SHA-1 computation and output the result into the * provided buffer. The destination buffer must be wide enough to * accomodate the result (20 bytes). The context is automatically * reinitialized. * * @param cc the SHA-1 context * @param dst the destination buffer */ void sph_sha1_close(void *cc, void *dst); /** * Add a few additional bits (0 to 7) to the current computation, then * terminate it and output the result in the provided buffer, which must * be wide enough to accomodate the result (20 bytes). If bit number i * in ub has value 2^i, then the extra bits are those * numbered 7 downto 8-n (this is the big-endian convention at the byte * level). The context is automatically reinitialized. * * @param cc the SHA-1 context * @param ub the extra bits * @param n the number of extra bits (0 to 7) * @param dst the destination buffer */ void sph_sha1_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); /** * Apply the SHA-1 compression function on the provided data. The * msg parameter contains the 16 32-bit input blocks, * as numerical values (hence after the big-endian decoding). The * val parameter contains the 5 32-bit input blocks for * the compression function; the output is written in place in this * array. * * @param msg the message block (16 values) * @param val the function 160-bit input and output */ void sph_sha1_comp(const sph_u32 msg[16], sph_u32 val[5]); #endif libntru-0.5/src/sph_sha2.h000066400000000000000000000275501271556312200155210ustar00rootroot00000000000000/* $Id: sph_sha2.h 216 2010-06-08 09:46:57Z tp $ */ /** * SHA-224, SHA-256, SHA-384 and SHA-512 interface. * * SHA-256 has been published in FIPS 180-2, now amended with a change * notice to include SHA-224 as well (which is a simple variation on * SHA-256). SHA-384 and SHA-512 are also defined in FIPS 180-2. FIPS * standards can be found at: * http://csrc.nist.gov/publications/fips/ * * ==========================(LICENSE BEGIN)============================ * * Copyright (c) 2007-2010 Projet RNRT SAPHIR * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * ===========================(LICENSE END)============================= * * @file sph_sha2.h * @author Thomas Pornin */ #ifndef SPH_SHA2_H__ #define SPH_SHA2_H__ #include #include "sph_types.h" /** * Output size (in bits) for SHA-224. */ #define SPH_SIZE_sha224 224 /** * Output size (in bits) for SHA-256. */ #define SPH_SIZE_sha256 256 /** * This structure is a context for SHA-224 computations: it contains the * intermediate values and some data from the last entered block. Once * a SHA-224 computation has been performed, the context can be reused for * another computation. * * The contents of this structure are private. A running SHA-224 computation * can be cloned by copying the context (e.g. with a simple * memcpy()). */ typedef struct { #ifndef DOXYGEN_IGNORE unsigned char buf[64]; /* first field, for alignment */ sph_u32 val[8]; #if SPH_64 sph_u64 count; #else sph_u32 count_high, count_low; #endif #endif } sph_sha224_context; /** * This structure is a context for SHA-256 computations. It is identical * to the SHA-224 context. However, a context is initialized for SHA-224 * or SHA-256, but not both (the internal IV is not the * same). */ typedef sph_sha224_context sph_sha256_context; /** * Initialize a SHA-224 context. This process performs no memory allocation. * * @param cc the SHA-224 context (pointer to * a sph_sha224_context) */ void sph_sha224_init(void *cc); /** * Process some data bytes. It is acceptable that len is zero * (in which case this function does nothing). * * @param cc the SHA-224 context * @param data the input data * @param len the input data length (in bytes) */ void sph_sha224(void *cc, const void *data, size_t len); /** * Terminate the current SHA-224 computation and output the result into the * provided buffer. The destination buffer must be wide enough to * accomodate the result (28 bytes). The context is automatically * reinitialized. * * @param cc the SHA-224 context * @param dst the destination buffer */ void sph_sha224_close(void *cc, void *dst); /** * Add a few additional bits (0 to 7) to the current computation, then * terminate it and output the result in the provided buffer, which must * be wide enough to accomodate the result (28 bytes). If bit number i * in ub has value 2^i, then the extra bits are those * numbered 7 downto 8-n (this is the big-endian convention at the byte * level). The context is automatically reinitialized. * * @param cc the SHA-224 context * @param ub the extra bits * @param n the number of extra bits (0 to 7) * @param dst the destination buffer */ void sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); /** * Apply the SHA-224 compression function on the provided data. The * msg parameter contains the 16 32-bit input blocks, * as numerical values (hence after the big-endian decoding). The * val parameter contains the 8 32-bit input blocks for * the compression function; the output is written in place in this * array. * * @param msg the message block (16 values) * @param val the function 256-bit input and output */ void sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8]); /** * Initialize a SHA-256 context. This process performs no memory allocation. * * @param cc the SHA-256 context (pointer to * a sph_sha256_context) */ void sph_sha256_init(void *cc); #ifdef DOXYGEN_IGNORE /** * Process some data bytes, for SHA-256. This function is identical to * sha_224() * * @param cc the SHA-224 context * @param data the input data * @param len the input data length (in bytes) */ void sph_sha256(void *cc, const void *data, size_t len); #endif #ifndef DOXYGEN_IGNORE #define sph_sha256 sph_sha224 #endif /** * Terminate the current SHA-256 computation and output the result into the * provided buffer. The destination buffer must be wide enough to * accomodate the result (32 bytes). The context is automatically * reinitialized. * * @param cc the SHA-256 context * @param dst the destination buffer */ void sph_sha256_close(void *cc, void *dst); /** * Add a few additional bits (0 to 7) to the current computation, then * terminate it and output the result in the provided buffer, which must * be wide enough to accomodate the result (32 bytes). If bit number i * in ub has value 2^i, then the extra bits are those * numbered 7 downto 8-n (this is the big-endian convention at the byte * level). The context is automatically reinitialized. * * @param cc the SHA-256 context * @param ub the extra bits * @param n the number of extra bits (0 to 7) * @param dst the destination buffer */ void sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); #ifdef DOXYGEN_IGNORE /** * Apply the SHA-256 compression function on the provided data. This * function is identical to sha224_comp(). * * @param msg the message block (16 values) * @param val the function 256-bit input and output */ void sph_sha256_comp(const sph_u32 msg[16], sph_u32 val[8]); #endif #ifndef DOXYGEN_IGNORE #define sph_sha256_comp sph_sha224_comp #endif #if SPH_64 /** * Output size (in bits) for SHA-384. */ #define SPH_SIZE_sha384 384 /** * Output size (in bits) for SHA-512. */ #define SPH_SIZE_sha512 512 /** * This structure is a context for SHA-384 computations: it contains the * intermediate values and some data from the last entered block. Once * a SHA-384 computation has been performed, the context can be reused for * another computation. * * The contents of this structure are private. A running SHA-384 computation * can be cloned by copying the context (e.g. with a simple * memcpy()). */ typedef struct { #ifndef DOXYGEN_IGNORE unsigned char buf[128]; /* first field, for alignment */ sph_u64 val[8]; sph_u64 count; #endif } sph_sha384_context; /** * Initialize a SHA-384 context. This process performs no memory allocation. * * @param cc the SHA-384 context (pointer to * a sph_sha384_context) */ void sph_sha384_init(void *cc); /** * Process some data bytes. It is acceptable that len is zero * (in which case this function does nothing). * * @param cc the SHA-384 context * @param data the input data * @param len the input data length (in bytes) */ void sph_sha384(void *cc, const void *data, size_t len); /** * Terminate the current SHA-384 computation and output the result into the * provided buffer. The destination buffer must be wide enough to * accomodate the result (48 bytes). The context is automatically * reinitialized. * * @param cc the SHA-384 context * @param dst the destination buffer */ void sph_sha384_close(void *cc, void *dst); /** * Add a few additional bits (0 to 7) to the current computation, then * terminate it and output the result in the provided buffer, which must * be wide enough to accomodate the result (48 bytes). If bit number i * in ub has value 2^i, then the extra bits are those * numbered 7 downto 8-n (this is the big-endian convention at the byte * level). The context is automatically reinitialized. * * @param cc the SHA-384 context * @param ub the extra bits * @param n the number of extra bits (0 to 7) * @param dst the destination buffer */ void sph_sha384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); /** * Apply the SHA-384 compression function on the provided data. The * msg parameter contains the 16 64-bit input blocks, * as numerical values (hence after the big-endian decoding). The * val parameter contains the 8 64-bit input blocks for * the compression function; the output is written in place in this * array. * * @param msg the message block (16 values) * @param val the function 512-bit input and output */ void sph_sha384_comp(const sph_u64 msg[16], sph_u64 val[8]); /** * This structure is a context for SHA-512 computations. It is identical * to the SHA-384 context. However, a context is initialized for SHA-384 * or SHA-512, but not both (the internal IV is not the * same). */ typedef sph_sha384_context sph_sha512_context; /** * Initialize a SHA-512 context. This process performs no memory allocation. * * @param cc the SHA-512 context (pointer to * a sph_sha512_context) */ void sph_sha512_init(void *cc); #ifdef DOXYGEN_IGNORE /** * Process some data bytes, for SHA-512. This function is identical to * sph_sha384(). * * @param cc the SHA-384 context * @param data the input data * @param len the input data length (in bytes) */ void sph_sha512(void *cc, const void *data, size_t len); #endif #ifndef DOXYGEN_IGNORE #define sph_sha512 sph_sha384 #endif /** * Terminate the current SHA-512 computation and output the result into the * provided buffer. The destination buffer must be wide enough to * accomodate the result (64 bytes). The context is automatically * reinitialized. * * @param cc the SHA-512 context * @param dst the destination buffer */ void sph_sha512_close(void *cc, void *dst); /** * Add a few additional bits (0 to 7) to the current computation, then * terminate it and output the result in the provided buffer, which must * be wide enough to accomodate the result (64 bytes). If bit number i * in ub has value 2^i, then the extra bits are those * numbered 7 downto 8-n (this is the big-endian convention at the byte * level). The context is automatically reinitialized. * * @param cc the SHA-512 context * @param ub the extra bits * @param n the number of extra bits (0 to 7) * @param dst the destination buffer */ void sph_sha512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); #ifdef DOXYGEN_IGNORE /** * Apply the SHA-512 compression function. This function is identical to * sph_sha384_comp(). * * @param msg the message block (16 values) * @param val the function 512-bit input and output */ void sph_sha512_comp(const sph_u64 msg[16], sph_u64 val[8]); #endif #ifndef DOXYGEN_IGNORE #define sph_sha512_comp sph_sha384_comp #endif #endif #endif libntru-0.5/src/sph_types.h000066400000000000000000002041121271556312200160170ustar00rootroot00000000000000/* $Id: sph_types.h 260 2011-07-21 01:02:38Z tp $ */ /** * Basic type definitions. * * This header file defines the generic integer types that will be used * for the implementation of hash functions; it also contains helper * functions which encode and decode multi-byte integer values, using * either little-endian or big-endian conventions. * * This file contains a compile-time test on the size of a byte * (the unsigned char C type). If bytes are not octets, * i.e. if they do not have a size of exactly 8 bits, then compilation * is aborted. Architectures where bytes are not octets are relatively * rare, even in the embedded devices market. We forbid non-octet bytes * because there is no clear convention on how octet streams are encoded * on such systems. * * ==========================(LICENSE BEGIN)============================ * * Copyright (c) 2007-2010 Projet RNRT SAPHIR * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * ===========================(LICENSE END)============================= * * @file sph_types.h * @author Thomas Pornin */ #ifndef SPH_TYPES_H__ #define SPH_TYPES_H__ #include /* * All our I/O functions are defined over octet streams. We do not know * how to handle input data if bytes are not octets. */ #if CHAR_BIT != 8 #error This code requires 8-bit bytes #endif /* ============= BEGIN documentation block for Doxygen ============ */ #ifdef DOXYGEN_IGNORE /** @mainpage sphlib C code documentation * * @section overview Overview * * sphlib is a library which contains implementations of * various cryptographic hash functions. These pages have been generated * with doxygen and * document the API for the C implementations. * * The API is described in appropriate header files, which are available * in the "Files" section. Each hash function family has its own header, * whose name begins with "sph_" and contains the family * name. For instance, the API for the RIPEMD hash functions is available * in the header file sph_ripemd.h. * * @section principles API structure and conventions * * @subsection io Input/output conventions * * In all generality, hash functions operate over strings of bits. * Individual bits are rarely encountered in C programming or actual * communication protocols; most protocols converge on the ubiquitous * "octet" which is a group of eight bits. Data is thus expressed as a * stream of octets. The C programming language contains the notion of a * "byte", which is a data unit managed under the type "unsigned * char". The C standard prescribes that a byte should hold at * least eight bits, but possibly more. Most modern architectures, even * in the embedded world, feature eight-bit bytes, i.e. map bytes to * octets. * * Nevertheless, for some of the implemented hash functions, an extra * API has been added, which allows the input of arbitrary sequences of * bits: when the computation is about to be closed, 1 to 7 extra bits * can be added. The functions for which this API is implemented include * the SHA-2 functions and all SHA-3 candidates. * * sphlib defines hash function which may hash octet streams, * i.e. streams of bits where the number of bits is a multiple of eight. * The data input functions in the sphlib API expect data * as anonymous pointers ("const void *") with a length * (of type "size_t") which gives the input data chunk length * in bytes. A byte is assumed to be an octet; the sph_types.h * header contains a compile-time test which prevents compilation on * architectures where this property is not met. * * The hash function output is also converted into bytes. All currently * implemented hash functions have an output width which is a multiple of * eight, and this is likely to remain true for new designs. * * Most hash functions internally convert input data into 32-bit of 64-bit * words, using either little-endian or big-endian conversion. The hash * output also often consists of such words, which are encoded into output * bytes with a similar endianness convention. Some hash functions have * been only loosely specified on that subject; when necessary, * sphlib has been tested against published "reference" * implementations in order to use the same conventions. * * @subsection shortname Function short name * * Each implemented hash function has a "short name" which is used * internally to derive the identifiers for the functions and context * structures which the function uses. For instance, MD5 has the short * name "md5". Short names are listed in the next section, * for the implemented hash functions. In subsequent sections, the * short name will be assumed to be "XXX": replace with the * actual hash function name to get the C identifier. * * Note: some functions within the same family share the same core * elements, such as update function or context structure. Correspondingly, * some of the defined types or functions may actually be macros which * transparently evaluate to another type or function name. * * @subsection context Context structure * * Each implemented hash fonction has its own context structure, available * under the type name "sph_XXX_context" for the hash function * with short name "XXX". This structure holds all needed * state for a running hash computation. * * The contents of these structures are meant to be opaque, and private * to the implementation. However, these contents are specified in the * header files so that application code which uses sphlib * may access the size of those structures. * * The caller is responsible for allocating the context structure, * whether by dynamic allocation (malloc() or equivalent), * static allocation (a global permanent variable), as an automatic * variable ("on the stack"), or by any other mean which ensures proper * structure alignment. sphlib code performs no dynamic * allocation by itself. * * The context must be initialized before use, using the * sph_XXX_init() function. This function sets the context * state to proper initial values for hashing. * * Since all state data is contained within the context structure, * sphlib is thread-safe and reentrant: several hash * computations may be performed in parallel, provided that they do not * operate on the same context. Moreover, a running computation can be * cloned by copying the context (with a simple memcpy()): * the context and its clone are then independant and may be updated * with new data and/or closed without interfering with each other. * Similarly, a context structure can be moved in memory at will: * context structures contain no pointer, in particular no pointer to * themselves. * * @subsection dataio Data input * * Hashed data is input with the sph_XXX() fonction, which * takes as parameters a pointer to the context, a pointer to the data * to hash, and the number of data bytes to hash. The context is updated * with the new data. * * Data can be input in one or several calls, with arbitrary input lengths. * However, it is best, performance wise, to input data by relatively big * chunks (say a few kilobytes), because this allows sphlib to * optimize things and avoid internal copying. * * When all data has been input, the context can be closed with * sph_XXX_close(). The hash output is computed and written * into the provided buffer. The caller must take care to provide a * buffer of appropriate length; e.g., when using SHA-1, the output is * a 20-byte word, therefore the output buffer must be at least 20-byte * long. * * For some hash functions, the sph_XXX_addbits_and_close() * function can be used instead of sph_XXX_close(). This * function can take a few extra bits to be added at * the end of the input message. This allows hashing messages with a * bit length which is not a multiple of 8. The extra bits are provided * as an unsigned integer value, and a bit count. The bit count must be * between 0 and 7, inclusive. The extra bits are provided as bits 7 to * 0 (bits of numerical value 128, 64, 32... downto 0), in that order. * For instance, to add three bits of value 1, 1 and 0, the unsigned * integer will have value 192 (1*128 + 1*64 + 0*32) and the bit count * will be 3. * * The SPH_SIZE_XXX macro is defined for each hash function; * it evaluates to the function output size, expressed in bits. For instance, * SPH_SIZE_sha1 evaluates to 160. * * When closed, the context is automatically reinitialized and can be * immediately used for another computation. It is not necessary to call * sph_XXX_init() after a close. Note that * sph_XXX_init() can still be called to "reset" a context, * i.e. forget previously input data, and get back to the initial state. * * @subsection alignment Data alignment * * "Alignment" is a property of data, which is said to be "properly * aligned" when its emplacement in memory is such that the data can * be optimally read by full words. This depends on the type of access; * basically, some hash functions will read data by 32-bit or 64-bit * words. sphlib does not mandate such alignment for input * data, but using aligned data can substantially improve performance. * * As a rule, it is best to input data by chunks whose length (in bytes) * is a multiple of eight, and which begins at "generally aligned" * addresses, such as the base address returned by a call to * malloc(). * * @section functions Implemented functions * * We give here the list of implemented functions. They are grouped by * family; to each family corresponds a specific header file. Each * individual function has its associated "short name". Please refer to * the documentation for that header file to get details on the hash * function denomination and provenance. * * Note: the functions marked with a '(64)' in the list below are * available only if the C compiler provides an integer type of length * 64 bits or more. Such a type is mandatory in the latest C standard * (ISO 9899:1999, aka "C99") and is present in several older compilers * as well, so chances are that such a type is available. * * - HAVAL family: file sph_haval.h * - HAVAL-128/3 (128-bit, 3 passes): short name: haval128_3 * - HAVAL-128/4 (128-bit, 4 passes): short name: haval128_4 * - HAVAL-128/5 (128-bit, 5 passes): short name: haval128_5 * - HAVAL-160/3 (160-bit, 3 passes): short name: haval160_3 * - HAVAL-160/4 (160-bit, 4 passes): short name: haval160_4 * - HAVAL-160/5 (160-bit, 5 passes): short name: haval160_5 * - HAVAL-192/3 (192-bit, 3 passes): short name: haval192_3 * - HAVAL-192/4 (192-bit, 4 passes): short name: haval192_4 * - HAVAL-192/5 (192-bit, 5 passes): short name: haval192_5 * - HAVAL-224/3 (224-bit, 3 passes): short name: haval224_3 * - HAVAL-224/4 (224-bit, 4 passes): short name: haval224_4 * - HAVAL-224/5 (224-bit, 5 passes): short name: haval224_5 * - HAVAL-256/3 (256-bit, 3 passes): short name: haval256_3 * - HAVAL-256/4 (256-bit, 4 passes): short name: haval256_4 * - HAVAL-256/5 (256-bit, 5 passes): short name: haval256_5 * - MD2: file sph_md2.h, short name: md2 * - MD4: file sph_md4.h, short name: md4 * - MD5: file sph_md5.h, short name: md5 * - PANAMA: file sph_panama.h, short name: panama * - RadioGatun family: file sph_radiogatun.h * - RadioGatun[32]: short name: radiogatun32 * - RadioGatun[64]: short name: radiogatun64 (64) * - RIPEMD family: file sph_ripemd.h * - RIPEMD: short name: ripemd * - RIPEMD-128: short name: ripemd128 * - RIPEMD-160: short name: ripemd160 * - SHA-0: file sph_sha0.h, short name: sha0 * - SHA-1: file sph_sha1.h, short name: sha1 * - SHA-2 family, 32-bit hashes: file sph_sha2.h * - SHA-224: short name: sha224 * - SHA-256: short name: sha256 * - SHA-384: short name: sha384 (64) * - SHA-512: short name: sha512 (64) * - Tiger family: file sph_tiger.h * - Tiger: short name: tiger (64) * - Tiger2: short name: tiger2 (64) * - WHIRLPOOL family: file sph_whirlpool.h * - WHIRLPOOL-0: short name: whirlpool0 (64) * - WHIRLPOOL-1: short name: whirlpool1 (64) * - WHIRLPOOL: short name: whirlpool (64) * * The fourteen second-round SHA-3 candidates are also implemented; * when applicable, the implementations follow the "final" specifications * as published for the third round of the SHA-3 competition (BLAKE, * Groestl, JH, Keccak and Skein have been tweaked for third round). * * - BLAKE family: file sph_blake.h * - BLAKE-224: short name: blake224 * - BLAKE-256: short name: blake256 * - BLAKE-384: short name: blake384 * - BLAKE-512: short name: blake512 * - BMW (Blue Midnight Wish) family: file sph_bmw.h * - BMW-224: short name: bmw224 * - BMW-256: short name: bmw256 * - BMW-384: short name: bmw384 (64) * - BMW-512: short name: bmw512 (64) * - CubeHash family: file sph_cubehash.h (specified as * CubeHash16/32 in the CubeHash specification) * - CubeHash-224: short name: cubehash224 * - CubeHash-256: short name: cubehash256 * - CubeHash-384: short name: cubehash384 * - CubeHash-512: short name: cubehash512 * - ECHO family: file sph_echo.h * - ECHO-224: short name: echo224 * - ECHO-256: short name: echo256 * - ECHO-384: short name: echo384 * - ECHO-512: short name: echo512 * - Fugue family: file sph_fugue.h * - Fugue-224: short name: fugue224 * - Fugue-256: short name: fugue256 * - Fugue-384: short name: fugue384 * - Fugue-512: short name: fugue512 * - Groestl family: file sph_groestl.h * - Groestl-224: short name: groestl224 * - Groestl-256: short name: groestl256 * - Groestl-384: short name: groestl384 * - Groestl-512: short name: groestl512 * - Hamsi family: file sph_hamsi.h * - Hamsi-224: short name: hamsi224 * - Hamsi-256: short name: hamsi256 * - Hamsi-384: short name: hamsi384 * - Hamsi-512: short name: hamsi512 * - JH family: file sph_jh.h * - JH-224: short name: jh224 * - JH-256: short name: jh256 * - JH-384: short name: jh384 * - JH-512: short name: jh512 * - Keccak family: file sph_keccak.h * - Keccak-224: short name: keccak224 * - Keccak-256: short name: keccak256 * - Keccak-384: short name: keccak384 * - Keccak-512: short name: keccak512 * - Luffa family: file sph_luffa.h * - Luffa-224: short name: luffa224 * - Luffa-256: short name: luffa256 * - Luffa-384: short name: luffa384 * - Luffa-512: short name: luffa512 * - Shabal family: file sph_shabal.h * - Shabal-192: short name: shabal192 * - Shabal-224: short name: shabal224 * - Shabal-256: short name: shabal256 * - Shabal-384: short name: shabal384 * - Shabal-512: short name: shabal512 * - SHAvite-3 family: file sph_shavite.h * - SHAvite-224 (nominally "SHAvite-3 with 224-bit output"): * short name: shabal224 * - SHAvite-256 (nominally "SHAvite-3 with 256-bit output"): * short name: shabal256 * - SHAvite-384 (nominally "SHAvite-3 with 384-bit output"): * short name: shabal384 * - SHAvite-512 (nominally "SHAvite-3 with 512-bit output"): * short name: shabal512 * - SIMD family: file sph_simd.h * - SIMD-224: short name: simd224 * - SIMD-256: short name: simd256 * - SIMD-384: short name: simd384 * - SIMD-512: short name: simd512 * - Skein family: file sph_skein.h * - Skein-224 (nominally specified as Skein-512-224): short name: * skein224 (64) * - Skein-256 (nominally specified as Skein-512-256): short name: * skein256 (64) * - Skein-384 (nominally specified as Skein-512-384): short name: * skein384 (64) * - Skein-512 (nominally specified as Skein-512-512): short name: * skein512 (64) * * For the second-round SHA-3 candidates, the functions are as specified * for round 2, i.e. with the "tweaks" that some candidates added * between round 1 and round 2. Also, some of the submitted packages for * round 2 contained errors, in the specification, reference code, or * both. sphlib implements the corrected versions. */ /** @hideinitializer * Unsigned integer type whose length is at least 32 bits; on most * architectures, it will have a width of exactly 32 bits. Unsigned C * types implement arithmetics modulo a power of 2; use the * SPH_T32() macro to ensure that the value is truncated * to exactly 32 bits. Unless otherwise specified, all macros and * functions which accept sph_u32 values assume that these * values fit on 32 bits, i.e. do not exceed 2^32-1, even on architectures * where sph_u32 is larger than that. */ typedef __arch_dependant__ sph_u32; /** @hideinitializer * Signed integer type corresponding to sph_u32; it has * width 32 bits or more. */ typedef __arch_dependant__ sph_s32; /** @hideinitializer * Unsigned integer type whose length is at least 64 bits; on most * architectures which feature such a type, it will have a width of * exactly 64 bits. C99-compliant platform will have this type; it * is also defined when the GNU compiler (gcc) is used, and on * platforms where unsigned long is large enough. If this * type is not available, then some hash functions which depends on * a 64-bit type will not be available (most notably SHA-384, SHA-512, * Tiger and WHIRLPOOL). */ typedef __arch_dependant__ sph_u64; /** @hideinitializer * Signed integer type corresponding to sph_u64; it has * width 64 bits or more. */ typedef __arch_dependant__ sph_s64; /** * This macro expands the token x into a suitable * constant expression of type sph_u32. Depending on * how this type is defined, a suffix such as UL may * be appended to the argument. * * @param x the token to expand into a suitable constant expression */ #define SPH_C32(x) /** * Truncate a 32-bit value to exactly 32 bits. On most systems, this is * a no-op, recognized as such by the compiler. * * @param x the value to truncate (of type sph_u32) */ #define SPH_T32(x) /** * Rotate a 32-bit value by a number of bits to the left. The rotate * count must reside between 1 and 31. This macro assumes that its * first argument fits in 32 bits (no extra bit allowed on machines where * sph_u32 is wider); both arguments may be evaluated * several times. * * @param x the value to rotate (of type sph_u32) * @param n the rotation count (between 1 and 31, inclusive) */ #define SPH_ROTL32(x, n) /** * Rotate a 32-bit value by a number of bits to the left. The rotate * count must reside between 1 and 31. This macro assumes that its * first argument fits in 32 bits (no extra bit allowed on machines where * sph_u32 is wider); both arguments may be evaluated * several times. * * @param x the value to rotate (of type sph_u32) * @param n the rotation count (between 1 and 31, inclusive) */ #define SPH_ROTR32(x, n) /** * This macro is defined on systems for which a 64-bit type has been * detected, and is used for sph_u64. */ #define SPH_64 /** * This macro is defined on systems for the "native" integer size is * 64 bits (64-bit values fit in one register). */ #define SPH_64_TRUE /** * This macro expands the token x into a suitable * constant expression of type sph_u64. Depending on * how this type is defined, a suffix such as ULL may * be appended to the argument. This macro is defined only if a * 64-bit type was detected and used for sph_u64. * * @param x the token to expand into a suitable constant expression */ #define SPH_C64(x) /** * Truncate a 64-bit value to exactly 64 bits. On most systems, this is * a no-op, recognized as such by the compiler. This macro is defined only * if a 64-bit type was detected and used for sph_u64. * * @param x the value to truncate (of type sph_u64) */ #define SPH_T64(x) /** * Rotate a 64-bit value by a number of bits to the left. The rotate * count must reside between 1 and 63. This macro assumes that its * first argument fits in 64 bits (no extra bit allowed on machines where * sph_u64 is wider); both arguments may be evaluated * several times. This macro is defined only if a 64-bit type was detected * and used for sph_u64. * * @param x the value to rotate (of type sph_u64) * @param n the rotation count (between 1 and 63, inclusive) */ #define SPH_ROTL64(x, n) /** * Rotate a 64-bit value by a number of bits to the left. The rotate * count must reside between 1 and 63. This macro assumes that its * first argument fits in 64 bits (no extra bit allowed on machines where * sph_u64 is wider); both arguments may be evaluated * several times. This macro is defined only if a 64-bit type was detected * and used for sph_u64. * * @param x the value to rotate (of type sph_u64) * @param n the rotation count (between 1 and 63, inclusive) */ #define SPH_ROTR64(x, n) /** * This macro evaluates to inline or an equivalent construction, * if available on the compilation platform, or to nothing otherwise. This * is used to declare inline functions, for which the compiler should * endeavour to include the code directly in the caller. Inline functions * are typically defined in header files as replacement for macros. */ #define SPH_INLINE /** * This macro is defined if the platform has been detected as using * little-endian convention. This implies that the sph_u32 * type (and the sph_u64 type also, if it is defined) has * an exact width (i.e. exactly 32-bit, respectively 64-bit). */ #define SPH_LITTLE_ENDIAN /** * This macro is defined if the platform has been detected as using * big-endian convention. This implies that the sph_u32 * type (and the sph_u64 type also, if it is defined) has * an exact width (i.e. exactly 32-bit, respectively 64-bit). */ #define SPH_BIG_ENDIAN /** * This macro is defined if 32-bit words (and 64-bit words, if defined) * can be read from and written to memory efficiently in little-endian * convention. This is the case for little-endian platforms, and also * for the big-endian platforms which have special little-endian access * opcodes (e.g. Ultrasparc). */ #define SPH_LITTLE_FAST /** * This macro is defined if 32-bit words (and 64-bit words, if defined) * can be read from and written to memory efficiently in big-endian * convention. This is the case for little-endian platforms, and also * for the little-endian platforms which have special big-endian access * opcodes. */ #define SPH_BIG_FAST /** * On some platforms, this macro is defined to an unsigned integer type * into which pointer values may be cast. The resulting value can then * be tested for being a multiple of 2, 4 or 8, indicating an aligned * pointer for, respectively, 16-bit, 32-bit or 64-bit memory accesses. */ #define SPH_UPTR /** * When defined, this macro indicates that unaligned memory accesses * are possible with only a minor penalty, and thus should be prefered * over strategies which first copy data to an aligned buffer. */ #define SPH_UNALIGNED /** * Byte-swap a 32-bit word (i.e. 0x12345678 becomes * 0x78563412). This is an inline function which resorts * to inline assembly on some platforms, for better performance. * * @param x the 32-bit value to byte-swap * @return the byte-swapped value */ static inline sph_u32 sph_bswap32(sph_u32 x); /** * Byte-swap a 64-bit word. This is an inline function which resorts * to inline assembly on some platforms, for better performance. This * function is defined only if a suitable 64-bit type was found for * sph_u64 * * @param x the 64-bit value to byte-swap * @return the byte-swapped value */ static inline sph_u64 sph_bswap64(sph_u64 x); /** * Decode a 16-bit unsigned value from memory, in little-endian convention * (least significant byte comes first). * * @param src the source address * @return the decoded value */ static inline unsigned sph_dec16le(const void *src); /** * Encode a 16-bit unsigned value into memory, in little-endian convention * (least significant byte comes first). * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc16le(void *dst, unsigned val); /** * Decode a 16-bit unsigned value from memory, in big-endian convention * (most significant byte comes first). * * @param src the source address * @return the decoded value */ static inline unsigned sph_dec16be(const void *src); /** * Encode a 16-bit unsigned value into memory, in big-endian convention * (most significant byte comes first). * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc16be(void *dst, unsigned val); /** * Decode a 32-bit unsigned value from memory, in little-endian convention * (least significant byte comes first). * * @param src the source address * @return the decoded value */ static inline sph_u32 sph_dec32le(const void *src); /** * Decode a 32-bit unsigned value from memory, in little-endian convention * (least significant byte comes first). This function assumes that the * source address is suitably aligned for a direct access, if the platform * supports such things; it can thus be marginally faster than the generic * sph_dec32le() function. * * @param src the source address * @return the decoded value */ static inline sph_u32 sph_dec32le_aligned(const void *src); /** * Encode a 32-bit unsigned value into memory, in little-endian convention * (least significant byte comes first). * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc32le(void *dst, sph_u32 val); /** * Encode a 32-bit unsigned value into memory, in little-endian convention * (least significant byte comes first). This function assumes that the * destination address is suitably aligned for a direct access, if the * platform supports such things; it can thus be marginally faster than * the generic sph_enc32le() function. * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc32le_aligned(void *dst, sph_u32 val); /** * Decode a 32-bit unsigned value from memory, in big-endian convention * (most significant byte comes first). * * @param src the source address * @return the decoded value */ static inline sph_u32 sph_dec32be(const void *src); /** * Decode a 32-bit unsigned value from memory, in big-endian convention * (most significant byte comes first). This function assumes that the * source address is suitably aligned for a direct access, if the platform * supports such things; it can thus be marginally faster than the generic * sph_dec32be() function. * * @param src the source address * @return the decoded value */ static inline sph_u32 sph_dec32be_aligned(const void *src); /** * Encode a 32-bit unsigned value into memory, in big-endian convention * (most significant byte comes first). * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc32be(void *dst, sph_u32 val); /** * Encode a 32-bit unsigned value into memory, in big-endian convention * (most significant byte comes first). This function assumes that the * destination address is suitably aligned for a direct access, if the * platform supports such things; it can thus be marginally faster than * the generic sph_enc32be() function. * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc32be_aligned(void *dst, sph_u32 val); /** * Decode a 64-bit unsigned value from memory, in little-endian convention * (least significant byte comes first). This function is defined only * if a suitable 64-bit type was detected and used for sph_u64. * * @param src the source address * @return the decoded value */ static inline sph_u64 sph_dec64le(const void *src); /** * Decode a 64-bit unsigned value from memory, in little-endian convention * (least significant byte comes first). This function assumes that the * source address is suitably aligned for a direct access, if the platform * supports such things; it can thus be marginally faster than the generic * sph_dec64le() function. This function is defined only * if a suitable 64-bit type was detected and used for sph_u64. * * @param src the source address * @return the decoded value */ static inline sph_u64 sph_dec64le_aligned(const void *src); /** * Encode a 64-bit unsigned value into memory, in little-endian convention * (least significant byte comes first). This function is defined only * if a suitable 64-bit type was detected and used for sph_u64. * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc64le(void *dst, sph_u64 val); /** * Encode a 64-bit unsigned value into memory, in little-endian convention * (least significant byte comes first). This function assumes that the * destination address is suitably aligned for a direct access, if the * platform supports such things; it can thus be marginally faster than * the generic sph_enc64le() function. This function is defined * only if a suitable 64-bit type was detected and used for * sph_u64. * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc64le_aligned(void *dst, sph_u64 val); /** * Decode a 64-bit unsigned value from memory, in big-endian convention * (most significant byte comes first). This function is defined only * if a suitable 64-bit type was detected and used for sph_u64. * * @param src the source address * @return the decoded value */ static inline sph_u64 sph_dec64be(const void *src); /** * Decode a 64-bit unsigned value from memory, in big-endian convention * (most significant byte comes first). This function assumes that the * source address is suitably aligned for a direct access, if the platform * supports such things; it can thus be marginally faster than the generic * sph_dec64be() function. This function is defined only * if a suitable 64-bit type was detected and used for sph_u64. * * @param src the source address * @return the decoded value */ static inline sph_u64 sph_dec64be_aligned(const void *src); /** * Encode a 64-bit unsigned value into memory, in big-endian convention * (most significant byte comes first). This function is defined only * if a suitable 64-bit type was detected and used for sph_u64. * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc64be(void *dst, sph_u64 val); /** * Encode a 64-bit unsigned value into memory, in big-endian convention * (most significant byte comes first). This function assumes that the * destination address is suitably aligned for a direct access, if the * platform supports such things; it can thus be marginally faster than * the generic sph_enc64be() function. This function is defined * only if a suitable 64-bit type was detected and used for * sph_u64. * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc64be_aligned(void *dst, sph_u64 val); #endif /* ============== END documentation block for Doxygen ============= */ #ifndef DOXYGEN_IGNORE /* * We want to define the types "sph_u32" and "sph_u64" which hold * unsigned values of at least, respectively, 32 and 64 bits. These * tests should select appropriate types for most platforms. The * macro "SPH_64" is defined if the 64-bit is supported. */ #undef SPH_64 #undef SPH_64_TRUE #if defined __STDC__ && __STDC_VERSION__ >= 199901L /* * On C99 implementations, we can use to get an exact 64-bit * type, if any, or otherwise use a wider type (which must exist, for * C99 conformance). */ #include #ifdef UINT32_MAX typedef uint32_t sph_u32; typedef int32_t sph_s32; #else typedef uint_fast32_t sph_u32; typedef int_fast32_t sph_s32; #endif #if !SPH_NO_64 #ifdef UINT64_MAX typedef uint64_t sph_u64; typedef int64_t sph_s64; #else typedef uint_fast64_t sph_u64; typedef int_fast64_t sph_s64; #endif #endif #define SPH_C32(x) ((sph_u32)(x)) #if !SPH_NO_64 #define SPH_C64(x) ((sph_u64)(x)) #define SPH_64 1 #endif #else /* * On non-C99 systems, we use "unsigned int" if it is wide enough, * "unsigned long" otherwise. This supports all "reasonable" architectures. * We have to be cautious: pre-C99 preprocessors handle constants * differently in '#if' expressions. Hence the shifts to test UINT_MAX. */ #if ((UINT_MAX >> 11) >> 11) >= 0x3FF typedef unsigned int sph_u32; typedef int sph_s32; #define SPH_C32(x) ((sph_u32)(x ## U)) #else typedef unsigned long sph_u32; typedef long sph_s32; #define SPH_C32(x) ((sph_u32)(x ## UL)) #endif #if !SPH_NO_64 /* * We want a 64-bit type. We use "unsigned long" if it is wide enough (as * is common on 64-bit architectures such as AMD64, Alpha or Sparcv9), * "unsigned long long" otherwise, if available. We use ULLONG_MAX to * test whether "unsigned long long" is available; we also know that * gcc features this type, even if the libc header do not know it. */ #if ((ULONG_MAX >> 31) >> 31) >= 3 typedef unsigned long sph_u64; typedef long sph_s64; #define SPH_C64(x) ((sph_u64)(x ## UL)) #define SPH_64 1 #elif ((ULLONG_MAX >> 31) >> 31) >= 3 || defined __GNUC__ typedef unsigned long long sph_u64; typedef long long sph_s64; #define SPH_C64(x) ((sph_u64)(x ## ULL)) #define SPH_64 1 #else /* * No 64-bit type... */ #endif #endif #endif /* * If the "unsigned long" type has length 64 bits or more, then this is * a "true" 64-bit architectures. This is also true with Visual C on * amd64, even though the "long" type is limited to 32 bits. */ #if SPH_64 && (((ULONG_MAX >> 31) >> 31) >= 3 || defined _M_X64) #define SPH_64_TRUE 1 #endif /* * Implementation note: some processors have specific opcodes to perform * a rotation. Recent versions of gcc recognize the expression above and * use the relevant opcodes, when appropriate. */ #define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) #define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) #define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) #if SPH_64 #define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) #define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) #define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) #endif #ifndef DOXYGEN_IGNORE /* * Define SPH_INLINE to be an "inline" qualifier, if available. We define * some small macro-like functions which benefit greatly from being inlined. */ #if (defined __STDC__ && __STDC_VERSION__ >= 199901L) || defined __GNUC__ #define SPH_INLINE inline #elif defined _MSC_VER #define SPH_INLINE __inline #else #define SPH_INLINE #endif #endif /* * We define some macros which qualify the architecture. These macros * may be explicit set externally (e.g. as compiler parameters). The * code below sets those macros if they are not already defined. * * Most macros are boolean, thus evaluate to either zero or non-zero. * The SPH_UPTR macro is special, in that it evaluates to a C type, * or is not defined. * * SPH_UPTR if defined: unsigned type to cast pointers into * * SPH_UNALIGNED non-zero if unaligned accesses are efficient * SPH_LITTLE_ENDIAN non-zero if architecture is known to be little-endian * SPH_BIG_ENDIAN non-zero if architecture is known to be big-endian * SPH_LITTLE_FAST non-zero if little-endian decoding is fast * SPH_BIG_FAST non-zero if big-endian decoding is fast * * If SPH_UPTR is defined, then encoding and decoding of 32-bit and 64-bit * values will try to be "smart". Either SPH_LITTLE_ENDIAN or SPH_BIG_ENDIAN * _must_ be non-zero in those situations. The 32-bit and 64-bit types * _must_ also have an exact width. * * SPH_SPARCV9_GCC_32 UltraSPARC-compatible with gcc, 32-bit mode * SPH_SPARCV9_GCC_64 UltraSPARC-compatible with gcc, 64-bit mode * SPH_SPARCV9_GCC UltraSPARC-compatible with gcc * SPH_I386_GCC x86-compatible (32-bit) with gcc * SPH_I386_MSVC x86-compatible (32-bit) with Microsoft Visual C * SPH_AMD64_GCC x86-compatible (64-bit) with gcc * SPH_AMD64_MSVC x86-compatible (64-bit) with Microsoft Visual C * SPH_PPC32_GCC PowerPC, 32-bit, with gcc * SPH_PPC64_GCC PowerPC, 64-bit, with gcc * * TODO: enhance automatic detection, for more architectures and compilers. * Endianness is the most important. SPH_UNALIGNED and SPH_UPTR help with * some very fast functions (e.g. MD4) when using unaligned input data. * The CPU-specific-with-GCC macros are useful only for inline assembly, * normally restrained to this header file. */ /* * 32-bit x86, aka "i386 compatible". */ #if defined __i386__ || defined _M_IX86 #define SPH_DETECT_UNALIGNED 1 #define SPH_DETECT_LITTLE_ENDIAN 1 #define SPH_DETECT_UPTR sph_u32 #ifdef __GNUC__ #define SPH_DETECT_I386_GCC 1 #endif #ifdef _MSC_VER #define SPH_DETECT_I386_MSVC 1 #endif /* * 64-bit x86, hereafter known as "amd64". */ #elif defined __x86_64 || defined _M_X64 #define SPH_DETECT_UNALIGNED 1 #define SPH_DETECT_LITTLE_ENDIAN 1 #define SPH_DETECT_UPTR sph_u64 #ifdef __GNUC__ #define SPH_DETECT_AMD64_GCC 1 #endif #ifdef _MSC_VER #define SPH_DETECT_AMD64_MSVC 1 #endif /* * 64-bit Sparc architecture (implies v9). */ #elif ((defined __sparc__ || defined __sparc) && defined __arch64__) \ || defined __sparcv9 #define SPH_DETECT_BIG_ENDIAN 1 #define SPH_DETECT_UPTR sph_u64 #ifdef __GNUC__ #define SPH_DETECT_SPARCV9_GCC_64 1 #define SPH_DETECT_LITTLE_FAST 1 #endif /* * 32-bit Sparc. */ #elif (defined __sparc__ || defined __sparc) \ && !(defined __sparcv9 || defined __arch64__) #define SPH_DETECT_BIG_ENDIAN 1 #define SPH_DETECT_UPTR sph_u32 #if defined __GNUC__ && defined __sparc_v9__ #define SPH_DETECT_SPARCV9_GCC_32 1 #define SPH_DETECT_LITTLE_FAST 1 #endif /* * ARM, little-endian. */ #elif defined __arm__ && __ARMEL__ #define SPH_DETECT_LITTLE_ENDIAN 1 /* * MIPS, little-endian. */ #elif MIPSEL || _MIPSEL || __MIPSEL || __MIPSEL__ #define SPH_DETECT_LITTLE_ENDIAN 1 /* * MIPS, big-endian. */ #elif MIPSEB || _MIPSEB || __MIPSEB || __MIPSEB__ #define SPH_DETECT_BIG_ENDIAN 1 /* * PowerPC. */ #elif defined __powerpc__ || defined __POWERPC__ || defined __ppc__ \ || defined _ARCH_PPC /* * Note: we do not declare cross-endian access to be "fast": even if * using inline assembly, implementation should still assume that * keeping the decoded word in a temporary is faster than decoding * it again. */ #if defined __GNUC__ #if SPH_64_TRUE #define SPH_DETECT_PPC64_GCC 1 #else #define SPH_DETECT_PPC32_GCC 1 #endif #endif #if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN #define SPH_DETECT_BIG_ENDIAN 1 #elif defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN #define SPH_DETECT_LITTLE_ENDIAN 1 #endif /* * Itanium, 64-bit. */ #elif defined __ia64 || defined __ia64__ \ || defined __itanium__ || defined _M_IA64 #if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN #define SPH_DETECT_BIG_ENDIAN 1 #else #define SPH_DETECT_LITTLE_ENDIAN 1 #endif #if defined __LP64__ || defined _LP64 #define SPH_DETECT_UPTR sph_u64 #else #define SPH_DETECT_UPTR sph_u32 #endif #endif #if defined SPH_DETECT_SPARCV9_GCC_32 || defined SPH_DETECT_SPARCV9_GCC_64 #define SPH_DETECT_SPARCV9_GCC 1 #endif #if defined SPH_DETECT_UNALIGNED && !defined SPH_UNALIGNED #define SPH_UNALIGNED SPH_DETECT_UNALIGNED #endif #if defined SPH_DETECT_UPTR && !defined SPH_UPTR #define SPH_UPTR SPH_DETECT_UPTR #endif #if defined SPH_DETECT_LITTLE_ENDIAN && !defined SPH_LITTLE_ENDIAN #define SPH_LITTLE_ENDIAN SPH_DETECT_LITTLE_ENDIAN #endif #if defined SPH_DETECT_BIG_ENDIAN && !defined SPH_BIG_ENDIAN #define SPH_BIG_ENDIAN SPH_DETECT_BIG_ENDIAN #endif #if defined SPH_DETECT_LITTLE_FAST && !defined SPH_LITTLE_FAST #define SPH_LITTLE_FAST SPH_DETECT_LITTLE_FAST #endif #if defined SPH_DETECT_BIG_FAST && !defined SPH_BIG_FAST #define SPH_BIG_FAST SPH_DETECT_BIG_FAST #endif #if defined SPH_DETECT_SPARCV9_GCC_32 && !defined SPH_SPARCV9_GCC_32 #define SPH_SPARCV9_GCC_32 SPH_DETECT_SPARCV9_GCC_32 #endif #if defined SPH_DETECT_SPARCV9_GCC_64 && !defined SPH_SPARCV9_GCC_64 #define SPH_SPARCV9_GCC_64 SPH_DETECT_SPARCV9_GCC_64 #endif #if defined SPH_DETECT_SPARCV9_GCC && !defined SPH_SPARCV9_GCC #define SPH_SPARCV9_GCC SPH_DETECT_SPARCV9_GCC #endif #if defined SPH_DETECT_I386_GCC && !defined SPH_I386_GCC #define SPH_I386_GCC SPH_DETECT_I386_GCC #endif #if defined SPH_DETECT_I386_MSVC && !defined SPH_I386_MSVC #define SPH_I386_MSVC SPH_DETECT_I386_MSVC #endif #if defined SPH_DETECT_AMD64_GCC && !defined SPH_AMD64_GCC #define SPH_AMD64_GCC SPH_DETECT_AMD64_GCC #endif #if defined SPH_DETECT_AMD64_MSVC && !defined SPH_AMD64_MSVC #define SPH_AMD64_MSVC SPH_DETECT_AMD64_MSVC #endif #if defined SPH_DETECT_PPC32_GCC && !defined SPH_PPC32_GCC #define SPH_PPC32_GCC SPH_DETECT_PPC32_GCC #endif #if defined SPH_DETECT_PPC64_GCC && !defined SPH_PPC64_GCC #define SPH_PPC64_GCC SPH_DETECT_PPC64_GCC #endif #if SPH_LITTLE_ENDIAN && !defined SPH_LITTLE_FAST #define SPH_LITTLE_FAST 1 #endif #if SPH_BIG_ENDIAN && !defined SPH_BIG_FAST #define SPH_BIG_FAST 1 #endif #if defined SPH_UPTR && !(SPH_LITTLE_ENDIAN || SPH_BIG_ENDIAN) #error SPH_UPTR defined, but endianness is not known. #endif #if SPH_I386_GCC && !SPH_NO_ASM /* * On x86 32-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit * values. */ static SPH_INLINE sph_u32 sph_bswap32(sph_u32 x) { __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x)); return x; } #if SPH_64 static SPH_INLINE sph_u64 sph_bswap64(sph_u64 x) { return ((sph_u64)sph_bswap32((sph_u32)x) << 32) | (sph_u64)sph_bswap32((sph_u32)(x >> 32)); } #endif #elif SPH_AMD64_GCC && !SPH_NO_ASM /* * On x86 64-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit * and 64-bit values. */ static SPH_INLINE sph_u32 sph_bswap32(sph_u32 x) { __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x)); return x; } #if SPH_64 static SPH_INLINE sph_u64 sph_bswap64(sph_u64 x) { __asm__ __volatile__ ("bswapq %0" : "=r" (x) : "0" (x)); return x; } #endif /* * Disabled code. Apparently, Microsoft Visual C 2005 is smart enough * to generate proper opcodes for endianness swapping with the pure C * implementation below. * #elif SPH_I386_MSVC && !SPH_NO_ASM static __inline sph_u32 __declspec(naked) __fastcall sph_bswap32(sph_u32 x) { __asm { bswap ecx mov eax,ecx ret } } #if SPH_64 static SPH_INLINE sph_u64 sph_bswap64(sph_u64 x) { return ((sph_u64)sph_bswap32((sph_u32)x) << 32) | (sph_u64)sph_bswap32((sph_u32)(x >> 32)); } #endif * * [end of disabled code] */ #else static SPH_INLINE sph_u32 sph_bswap32(sph_u32 x) { x = SPH_T32((x << 16) | (x >> 16)); x = ((x & SPH_C32(0xFF00FF00)) >> 8) | ((x & SPH_C32(0x00FF00FF)) << 8); return x; } #if SPH_64 /** * Byte-swap a 64-bit value. * * @param x the input value * @return the byte-swapped value */ static SPH_INLINE sph_u64 sph_bswap64(sph_u64 x) { x = SPH_T64((x << 32) | (x >> 32)); x = ((x & SPH_C64(0xFFFF0000FFFF0000)) >> 16) | ((x & SPH_C64(0x0000FFFF0000FFFF)) << 16); x = ((x & SPH_C64(0xFF00FF00FF00FF00)) >> 8) | ((x & SPH_C64(0x00FF00FF00FF00FF)) << 8); return x; } #endif #endif #if SPH_SPARCV9_GCC && !SPH_NO_ASM /* * On UltraSPARC systems, native ordering is big-endian, but it is * possible to perform little-endian read accesses by specifying the * address space 0x88 (ASI_PRIMARY_LITTLE). Basically, either we use * the opcode "lda [%reg]0x88,%dst", where %reg is the register which * contains the source address and %dst is the destination register, * or we use "lda [%reg+imm]%asi,%dst", which uses the %asi register * to get the address space name. The latter format is better since it * combines an addition and the actual access in a single opcode; but * it requires the setting (and subsequent resetting) of %asi, which is * slow. Some operations (i.e. MD5 compression function) combine many * successive little-endian read accesses, which may share the same * %asi setting. The macros below contain the appropriate inline * assembly. */ #define SPH_SPARCV9_SET_ASI \ sph_u32 sph_sparcv9_asi; \ __asm__ __volatile__ ( \ "rd %%asi,%0\n\twr %%g0,0x88,%%asi" : "=r" (sph_sparcv9_asi)); #define SPH_SPARCV9_RESET_ASI \ __asm__ __volatile__ ("wr %%g0,%0,%%asi" : : "r" (sph_sparcv9_asi)); #define SPH_SPARCV9_DEC32LE(base, idx) ({ \ sph_u32 sph_sparcv9_tmp; \ __asm__ __volatile__ ("lda [%1+" #idx "*4]%%asi,%0" \ : "=r" (sph_sparcv9_tmp) : "r" (base)); \ sph_sparcv9_tmp; \ }) #endif static SPH_INLINE void sph_enc16be(void *dst, unsigned val) { ((unsigned char *)dst)[0] = (val >> 8); ((unsigned char *)dst)[1] = val; } static SPH_INLINE unsigned sph_dec16be(const void *src) { return ((unsigned)(((const unsigned char *)src)[0]) << 8) | (unsigned)(((const unsigned char *)src)[1]); } static SPH_INLINE void sph_enc16le(void *dst, unsigned val) { ((unsigned char *)dst)[0] = val; ((unsigned char *)dst)[1] = val >> 8; } static SPH_INLINE unsigned sph_dec16le(const void *src) { return (unsigned)(((const unsigned char *)src)[0]) | ((unsigned)(((const unsigned char *)src)[1]) << 8); } /** * Encode a 32-bit value into the provided buffer (big endian convention). * * @param dst the destination buffer * @param val the 32-bit value to encode */ static SPH_INLINE void sph_enc32be(void *dst, sph_u32 val) { #if defined SPH_UPTR #if SPH_UNALIGNED #if SPH_LITTLE_ENDIAN val = sph_bswap32(val); #endif *(sph_u32 *)dst = val; #else if (((SPH_UPTR)dst & 3) == 0) { #if SPH_LITTLE_ENDIAN val = sph_bswap32(val); #endif *(sph_u32 *)dst = val; } else { ((unsigned char *)dst)[0] = (val >> 24); ((unsigned char *)dst)[1] = (val >> 16); ((unsigned char *)dst)[2] = (val >> 8); ((unsigned char *)dst)[3] = val; } #endif #else ((unsigned char *)dst)[0] = (val >> 24); ((unsigned char *)dst)[1] = (val >> 16); ((unsigned char *)dst)[2] = (val >> 8); ((unsigned char *)dst)[3] = val; #endif } /** * Encode a 32-bit value into the provided buffer (big endian convention). * The destination buffer must be properly aligned. * * @param dst the destination buffer (32-bit aligned) * @param val the value to encode */ static SPH_INLINE void sph_enc32be_aligned(void *dst, sph_u32 val) { #if SPH_LITTLE_ENDIAN *(sph_u32 *)dst = sph_bswap32(val); #elif SPH_BIG_ENDIAN *(sph_u32 *)dst = val; #else ((unsigned char *)dst)[0] = (val >> 24); ((unsigned char *)dst)[1] = (val >> 16); ((unsigned char *)dst)[2] = (val >> 8); ((unsigned char *)dst)[3] = val; #endif } /** * Decode a 32-bit value from the provided buffer (big endian convention). * * @param src the source buffer * @return the decoded value */ static SPH_INLINE sph_u32 sph_dec32be(const void *src) { #if defined SPH_UPTR #if SPH_UNALIGNED #if SPH_LITTLE_ENDIAN return sph_bswap32(*(const sph_u32 *)src); #else return *(const sph_u32 *)src; #endif #else if (((SPH_UPTR)src & 3) == 0) { #if SPH_LITTLE_ENDIAN return sph_bswap32(*(const sph_u32 *)src); #else return *(const sph_u32 *)src; #endif } else { return ((sph_u32)(((const unsigned char *)src)[0]) << 24) | ((sph_u32)(((const unsigned char *)src)[1]) << 16) | ((sph_u32)(((const unsigned char *)src)[2]) << 8) | (sph_u32)(((const unsigned char *)src)[3]); } #endif #else return ((sph_u32)(((const unsigned char *)src)[0]) << 24) | ((sph_u32)(((const unsigned char *)src)[1]) << 16) | ((sph_u32)(((const unsigned char *)src)[2]) << 8) | (sph_u32)(((const unsigned char *)src)[3]); #endif } /** * Decode a 32-bit value from the provided buffer (big endian convention). * The source buffer must be properly aligned. * * @param src the source buffer (32-bit aligned) * @return the decoded value */ static SPH_INLINE sph_u32 sph_dec32be_aligned(const void *src) { #if SPH_LITTLE_ENDIAN return sph_bswap32(*(const sph_u32 *)src); #elif SPH_BIG_ENDIAN return *(const sph_u32 *)src; #else return ((sph_u32)(((const unsigned char *)src)[0]) << 24) | ((sph_u32)(((const unsigned char *)src)[1]) << 16) | ((sph_u32)(((const unsigned char *)src)[2]) << 8) | (sph_u32)(((const unsigned char *)src)[3]); #endif } /** * Encode a 32-bit value into the provided buffer (little endian convention). * * @param dst the destination buffer * @param val the 32-bit value to encode */ static SPH_INLINE void sph_enc32le(void *dst, sph_u32 val) { #if defined SPH_UPTR #if SPH_UNALIGNED #if SPH_BIG_ENDIAN val = sph_bswap32(val); #endif *(sph_u32 *)dst = val; #else if (((SPH_UPTR)dst & 3) == 0) { #if SPH_BIG_ENDIAN val = sph_bswap32(val); #endif *(sph_u32 *)dst = val; } else { ((unsigned char *)dst)[0] = val; ((unsigned char *)dst)[1] = (val >> 8); ((unsigned char *)dst)[2] = (val >> 16); ((unsigned char *)dst)[3] = (val >> 24); } #endif #else ((unsigned char *)dst)[0] = val; ((unsigned char *)dst)[1] = (val >> 8); ((unsigned char *)dst)[2] = (val >> 16); ((unsigned char *)dst)[3] = (val >> 24); #endif } /** * Encode a 32-bit value into the provided buffer (little endian convention). * The destination buffer must be properly aligned. * * @param dst the destination buffer (32-bit aligned) * @param val the value to encode */ static SPH_INLINE void sph_enc32le_aligned(void *dst, sph_u32 val) { #if SPH_LITTLE_ENDIAN *(sph_u32 *)dst = val; #elif SPH_BIG_ENDIAN *(sph_u32 *)dst = sph_bswap32(val); #else ((unsigned char *)dst)[0] = val; ((unsigned char *)dst)[1] = (val >> 8); ((unsigned char *)dst)[2] = (val >> 16); ((unsigned char *)dst)[3] = (val >> 24); #endif } /** * Decode a 32-bit value from the provided buffer (little endian convention). * * @param src the source buffer * @return the decoded value */ static SPH_INLINE sph_u32 sph_dec32le(const void *src) { #if defined SPH_UPTR #if SPH_UNALIGNED #if SPH_BIG_ENDIAN return sph_bswap32(*(const sph_u32 *)src); #else return *(const sph_u32 *)src; #endif #else if (((SPH_UPTR)src & 3) == 0) { #if SPH_BIG_ENDIAN #if SPH_SPARCV9_GCC && !SPH_NO_ASM sph_u32 tmp; /* * "__volatile__" is needed here because without it, * gcc-3.4.3 miscompiles the code and performs the * access before the test on the address, thus triggering * a bus error... */ __asm__ __volatile__ ( "lda [%1]0x88,%0" : "=r" (tmp) : "r" (src)); return tmp; /* * On PowerPC, this turns out not to be worth the effort: the inline * assembly makes GCC optimizer uncomfortable, which tends to nullify * the decoding gains. * * For most hash functions, using this inline assembly trick changes * hashing speed by less than 5% and often _reduces_ it. The biggest * gains are for MD4 (+11%) and CubeHash (+30%). For all others, it is * less then 10%. The speed gain on CubeHash is probably due to the * chronic shortage of registers that CubeHash endures; for the other * functions, the generic code appears to be efficient enough already. * #elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM sph_u32 tmp; __asm__ __volatile__ ( "lwbrx %0,0,%1" : "=r" (tmp) : "r" (src)); return tmp; */ #else return sph_bswap32(*(const sph_u32 *)src); #endif #else return *(const sph_u32 *)src; #endif } else { return (sph_u32)(((const unsigned char *)src)[0]) | ((sph_u32)(((const unsigned char *)src)[1]) << 8) | ((sph_u32)(((const unsigned char *)src)[2]) << 16) | ((sph_u32)(((const unsigned char *)src)[3]) << 24); } #endif #else return (sph_u32)(((const unsigned char *)src)[0]) | ((sph_u32)(((const unsigned char *)src)[1]) << 8) | ((sph_u32)(((const unsigned char *)src)[2]) << 16) | ((sph_u32)(((const unsigned char *)src)[3]) << 24); #endif } /** * Decode a 32-bit value from the provided buffer (little endian convention). * The source buffer must be properly aligned. * * @param src the source buffer (32-bit aligned) * @return the decoded value */ static SPH_INLINE sph_u32 sph_dec32le_aligned(const void *src) { #if SPH_LITTLE_ENDIAN return *(const sph_u32 *)src; #elif SPH_BIG_ENDIAN #if SPH_SPARCV9_GCC && !SPH_NO_ASM sph_u32 tmp; __asm__ __volatile__ ("lda [%1]0x88,%0" : "=r" (tmp) : "r" (src)); return tmp; /* * Not worth it generally. * #elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM sph_u32 tmp; __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (tmp) : "r" (src)); return tmp; */ #else return sph_bswap32(*(const sph_u32 *)src); #endif #else return (sph_u32)(((const unsigned char *)src)[0]) | ((sph_u32)(((const unsigned char *)src)[1]) << 8) | ((sph_u32)(((const unsigned char *)src)[2]) << 16) | ((sph_u32)(((const unsigned char *)src)[3]) << 24); #endif } #if SPH_64 /** * Encode a 64-bit value into the provided buffer (big endian convention). * * @param dst the destination buffer * @param val the 64-bit value to encode */ static SPH_INLINE void sph_enc64be(void *dst, sph_u64 val) { #if defined SPH_UPTR #if SPH_UNALIGNED #if SPH_LITTLE_ENDIAN val = sph_bswap64(val); #endif *(sph_u64 *)dst = val; #else if (((SPH_UPTR)dst & 7) == 0) { #if SPH_LITTLE_ENDIAN val = sph_bswap64(val); #endif *(sph_u64 *)dst = val; } else { ((unsigned char *)dst)[0] = (val >> 56); ((unsigned char *)dst)[1] = (val >> 48); ((unsigned char *)dst)[2] = (val >> 40); ((unsigned char *)dst)[3] = (val >> 32); ((unsigned char *)dst)[4] = (val >> 24); ((unsigned char *)dst)[5] = (val >> 16); ((unsigned char *)dst)[6] = (val >> 8); ((unsigned char *)dst)[7] = val; } #endif #else ((unsigned char *)dst)[0] = (val >> 56); ((unsigned char *)dst)[1] = (val >> 48); ((unsigned char *)dst)[2] = (val >> 40); ((unsigned char *)dst)[3] = (val >> 32); ((unsigned char *)dst)[4] = (val >> 24); ((unsigned char *)dst)[5] = (val >> 16); ((unsigned char *)dst)[6] = (val >> 8); ((unsigned char *)dst)[7] = val; #endif } /** * Encode a 64-bit value into the provided buffer (big endian convention). * The destination buffer must be properly aligned. * * @param dst the destination buffer (64-bit aligned) * @param val the value to encode */ static SPH_INLINE void sph_enc64be_aligned(void *dst, sph_u64 val) { #if SPH_LITTLE_ENDIAN *(sph_u64 *)dst = sph_bswap64(val); #elif SPH_BIG_ENDIAN *(sph_u64 *)dst = val; #else ((unsigned char *)dst)[0] = (val >> 56); ((unsigned char *)dst)[1] = (val >> 48); ((unsigned char *)dst)[2] = (val >> 40); ((unsigned char *)dst)[3] = (val >> 32); ((unsigned char *)dst)[4] = (val >> 24); ((unsigned char *)dst)[5] = (val >> 16); ((unsigned char *)dst)[6] = (val >> 8); ((unsigned char *)dst)[7] = val; #endif } /** * Decode a 64-bit value from the provided buffer (big endian convention). * * @param src the source buffer * @return the decoded value */ static SPH_INLINE sph_u64 sph_dec64be(const void *src) { #if defined SPH_UPTR #if SPH_UNALIGNED #if SPH_LITTLE_ENDIAN return sph_bswap64(*(const sph_u64 *)src); #else return *(const sph_u64 *)src; #endif #else if (((SPH_UPTR)src & 7) == 0) { #if SPH_LITTLE_ENDIAN return sph_bswap64(*(const sph_u64 *)src); #else return *(const sph_u64 *)src; #endif } else { return ((sph_u64)(((const unsigned char *)src)[0]) << 56) | ((sph_u64)(((const unsigned char *)src)[1]) << 48) | ((sph_u64)(((const unsigned char *)src)[2]) << 40) | ((sph_u64)(((const unsigned char *)src)[3]) << 32) | ((sph_u64)(((const unsigned char *)src)[4]) << 24) | ((sph_u64)(((const unsigned char *)src)[5]) << 16) | ((sph_u64)(((const unsigned char *)src)[6]) << 8) | (sph_u64)(((const unsigned char *)src)[7]); } #endif #else return ((sph_u64)(((const unsigned char *)src)[0]) << 56) | ((sph_u64)(((const unsigned char *)src)[1]) << 48) | ((sph_u64)(((const unsigned char *)src)[2]) << 40) | ((sph_u64)(((const unsigned char *)src)[3]) << 32) | ((sph_u64)(((const unsigned char *)src)[4]) << 24) | ((sph_u64)(((const unsigned char *)src)[5]) << 16) | ((sph_u64)(((const unsigned char *)src)[6]) << 8) | (sph_u64)(((const unsigned char *)src)[7]); #endif } /** * Decode a 64-bit value from the provided buffer (big endian convention). * The source buffer must be properly aligned. * * @param src the source buffer (64-bit aligned) * @return the decoded value */ static SPH_INLINE sph_u64 sph_dec64be_aligned(const void *src) { #if SPH_LITTLE_ENDIAN return sph_bswap64(*(const sph_u64 *)src); #elif SPH_BIG_ENDIAN return *(const sph_u64 *)src; #else return ((sph_u64)(((const unsigned char *)src)[0]) << 56) | ((sph_u64)(((const unsigned char *)src)[1]) << 48) | ((sph_u64)(((const unsigned char *)src)[2]) << 40) | ((sph_u64)(((const unsigned char *)src)[3]) << 32) | ((sph_u64)(((const unsigned char *)src)[4]) << 24) | ((sph_u64)(((const unsigned char *)src)[5]) << 16) | ((sph_u64)(((const unsigned char *)src)[6]) << 8) | (sph_u64)(((const unsigned char *)src)[7]); #endif } /** * Encode a 64-bit value into the provided buffer (little endian convention). * * @param dst the destination buffer * @param val the 64-bit value to encode */ static SPH_INLINE void sph_enc64le(void *dst, sph_u64 val) { #if defined SPH_UPTR #if SPH_UNALIGNED #if SPH_BIG_ENDIAN val = sph_bswap64(val); #endif *(sph_u64 *)dst = val; #else if (((SPH_UPTR)dst & 7) == 0) { #if SPH_BIG_ENDIAN val = sph_bswap64(val); #endif *(sph_u64 *)dst = val; } else { ((unsigned char *)dst)[0] = val; ((unsigned char *)dst)[1] = (val >> 8); ((unsigned char *)dst)[2] = (val >> 16); ((unsigned char *)dst)[3] = (val >> 24); ((unsigned char *)dst)[4] = (val >> 32); ((unsigned char *)dst)[5] = (val >> 40); ((unsigned char *)dst)[6] = (val >> 48); ((unsigned char *)dst)[7] = (val >> 56); } #endif #else ((unsigned char *)dst)[0] = val; ((unsigned char *)dst)[1] = (val >> 8); ((unsigned char *)dst)[2] = (val >> 16); ((unsigned char *)dst)[3] = (val >> 24); ((unsigned char *)dst)[4] = (val >> 32); ((unsigned char *)dst)[5] = (val >> 40); ((unsigned char *)dst)[6] = (val >> 48); ((unsigned char *)dst)[7] = (val >> 56); #endif } /** * Encode a 64-bit value into the provided buffer (little endian convention). * The destination buffer must be properly aligned. * * @param dst the destination buffer (64-bit aligned) * @param val the value to encode */ static SPH_INLINE void sph_enc64le_aligned(void *dst, sph_u64 val) { #if SPH_LITTLE_ENDIAN *(sph_u64 *)dst = val; #elif SPH_BIG_ENDIAN *(sph_u64 *)dst = sph_bswap64(val); #else ((unsigned char *)dst)[0] = val; ((unsigned char *)dst)[1] = (val >> 8); ((unsigned char *)dst)[2] = (val >> 16); ((unsigned char *)dst)[3] = (val >> 24); ((unsigned char *)dst)[4] = (val >> 32); ((unsigned char *)dst)[5] = (val >> 40); ((unsigned char *)dst)[6] = (val >> 48); ((unsigned char *)dst)[7] = (val >> 56); #endif } /** * Decode a 64-bit value from the provided buffer (little endian convention). * * @param src the source buffer * @return the decoded value */ static SPH_INLINE sph_u64 sph_dec64le(const void *src) { #if defined SPH_UPTR #if SPH_UNALIGNED #if SPH_BIG_ENDIAN return sph_bswap64(*(const sph_u64 *)src); #else return *(const sph_u64 *)src; #endif #else if (((SPH_UPTR)src & 7) == 0) { #if SPH_BIG_ENDIAN #if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM sph_u64 tmp; __asm__ __volatile__ ( "ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src)); return tmp; /* * Not worth it generally. * #elif SPH_PPC32_GCC && !SPH_NO_ASM return (sph_u64)sph_dec32le_aligned(src) | ((sph_u64)sph_dec32le_aligned( (const char *)src + 4) << 32); #elif SPH_PPC64_GCC && !SPH_NO_ASM sph_u64 tmp; __asm__ __volatile__ ( "ldbrx %0,0,%1" : "=r" (tmp) : "r" (src)); return tmp; */ #else return sph_bswap64(*(const sph_u64 *)src); #endif #else return *(const sph_u64 *)src; #endif } else { return (sph_u64)(((const unsigned char *)src)[0]) | ((sph_u64)(((const unsigned char *)src)[1]) << 8) | ((sph_u64)(((const unsigned char *)src)[2]) << 16) | ((sph_u64)(((const unsigned char *)src)[3]) << 24) | ((sph_u64)(((const unsigned char *)src)[4]) << 32) | ((sph_u64)(((const unsigned char *)src)[5]) << 40) | ((sph_u64)(((const unsigned char *)src)[6]) << 48) | ((sph_u64)(((const unsigned char *)src)[7]) << 56); } #endif #else return (sph_u64)(((const unsigned char *)src)[0]) | ((sph_u64)(((const unsigned char *)src)[1]) << 8) | ((sph_u64)(((const unsigned char *)src)[2]) << 16) | ((sph_u64)(((const unsigned char *)src)[3]) << 24) | ((sph_u64)(((const unsigned char *)src)[4]) << 32) | ((sph_u64)(((const unsigned char *)src)[5]) << 40) | ((sph_u64)(((const unsigned char *)src)[6]) << 48) | ((sph_u64)(((const unsigned char *)src)[7]) << 56); #endif } /** * Decode a 64-bit value from the provided buffer (little endian convention). * The source buffer must be properly aligned. * * @param src the source buffer (64-bit aligned) * @return the decoded value */ static SPH_INLINE sph_u64 sph_dec64le_aligned(const void *src) { #if SPH_LITTLE_ENDIAN return *(const sph_u64 *)src; #elif SPH_BIG_ENDIAN #if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM sph_u64 tmp; __asm__ __volatile__ ("ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src)); return tmp; /* * Not worth it generally. * #elif SPH_PPC32_GCC && !SPH_NO_ASM return (sph_u64)sph_dec32le_aligned(src) | ((sph_u64)sph_dec32le_aligned((const char *)src + 4) << 32); #elif SPH_PPC64_GCC && !SPH_NO_ASM sph_u64 tmp; __asm__ __volatile__ ("ldbrx %0,0,%1" : "=r" (tmp) : "r" (src)); return tmp; */ #else return sph_bswap64(*(const sph_u64 *)src); #endif #else return (sph_u64)(((const unsigned char *)src)[0]) | ((sph_u64)(((const unsigned char *)src)[1]) << 8) | ((sph_u64)(((const unsigned char *)src)[2]) << 16) | ((sph_u64)(((const unsigned char *)src)[3]) << 24) | ((sph_u64)(((const unsigned char *)src)[4]) << 32) | ((sph_u64)(((const unsigned char *)src)[5]) << 40) | ((sph_u64)(((const unsigned char *)src)[6]) << 48) | ((sph_u64)(((const unsigned char *)src)[7]) << 56); #endif } #endif #endif /* Doxygen excluded block */ #endif libntru-0.5/src/types.h000066400000000000000000000033741271556312200151540ustar00rootroot00000000000000#ifndef NTRU_TYPES_H #define NTRU_TYPES_H #include #define NTRU_MAX_DEGREE (1499+1) /* max N value for all param sets; +1 for ntru_invert_...() */ #define NTRU_INT_POLY_SIZE ((NTRU_MAX_DEGREE+16+7)&0xFFF8) /* (max #coefficients + 16) rounded to a multiple of 8 */ #define NTRU_MAX_ONES 499 /* max(df1, df2, df3, dg) */ /** A polynomial with integer coefficients. */ typedef struct NtruIntPoly { uint16_t N; int16_t coeffs[NTRU_INT_POLY_SIZE]; } NtruIntPoly; /** A ternary polynomial, i.e. all coefficients are equal to -1, 0, or 1. */ typedef struct NtruTernPoly { uint16_t N; uint16_t num_ones; uint16_t num_neg_ones; uint16_t ones[NTRU_MAX_ONES]; uint16_t neg_ones[NTRU_MAX_ONES]; } NtruTernPoly; #ifndef NTRU_AVOID_HAMMING_WT_PATENT /** * A product-form polynomial, i.e. a polynomial of the form f1*f2+f3 * where f1,f2,f3 are very sparsely populated ternary polynomials. */ typedef struct NtruProdPoly { uint16_t N; NtruTernPoly f1, f2, f3; } NtruProdPoly; #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ /** Private polynomial, can be ternary or product-form */ typedef struct { uint8_t prod_flag; /* whether the polynomial is in product form */ union { NtruTernPoly tern; #ifndef NTRU_AVOID_HAMMING_WT_PATENT NtruProdPoly prod; #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ } poly; } NtruPrivPoly; /** * NtruEncrypt private key */ typedef struct NtruEncPrivKey { uint16_t q; NtruPrivPoly t; } NtruEncPrivKey; /** * NtruEncrypt public key */ typedef struct NtruEncPubKey { uint16_t q; NtruIntPoly h; } NtruEncPubKey; /** * NtruEncrypt key pair */ typedef struct NtruEncKeyPair { NtruEncPrivKey priv; NtruEncPubKey pub; } NtruEncKeyPair; #endif /* NTRU_TYPES_H */ libntru-0.5/src/x86_64-xlate.pl000077500000000000000000001056211271556312200162460ustar00rootroot00000000000000#!/usr/bin/env perl # Ascetic x86_64 AT&T to MASM/NASM assembler translator by . # # Why AT&T to MASM and not vice versa? Several reasons. Because AT&T # format is way easier to parse. Because it's simpler to "gear" from # Unix ABI to Windows one [see cross-reference "card" at the end of # file]. Because Linux targets were available first... # # In addition the script also "distills" code suitable for GNU # assembler, so that it can be compiled with more rigid assemblers, # such as Solaris /usr/ccs/bin/as. # # This translator is not designed to convert *arbitrary* assembler # code from AT&T format to MASM one. It's designed to convert just # enough to provide for dual-ABI OpenSSL modules development... # There *are* limitations and you might have to modify your assembler # code or this script to achieve the desired result... # # Currently recognized limitations: # # - can't use multiple ops per line; # # Dual-ABI styling rules. # # 1. Adhere to Unix register and stack layout [see cross-reference # ABI "card" at the end for explanation]. # 2. Forget about "red zone," stick to more traditional blended # stack frame allocation. If volatile storage is actually required # that is. If not, just leave the stack as is. # 3. Functions tagged with ".type name,@function" get crafted with # unified Win64 prologue and epilogue automatically. If you want # to take care of ABI differences yourself, tag functions as # ".type name,@abi-omnipotent" instead. # 4. To optimize the Win64 prologue you can specify number of input # arguments as ".type name,@function,N." Keep in mind that if N is # larger than 6, then you *have to* write "abi-omnipotent" code, # because >6 cases can't be addressed with unified prologue. # 5. Name local labels as .L*, do *not* use dynamic labels such as 1: # (sorry about latter). # 6. Don't use [or hand-code with .byte] "rep ret." "ret" mnemonic is # required to identify the spots, where to inject Win64 epilogue! # But on the pros, it's then prefixed with rep automatically:-) # 7. Stick to explicit ip-relative addressing. If you have to use # GOTPCREL addressing, stick to mov symbol@GOTPCREL(%rip),%r??. # Both are recognized and translated to proper Win64 addressing # modes. To support legacy code a synthetic directive, .picmeup, # is implemented. It puts address of the *next* instruction into # target register, e.g.: # # .picmeup %rax # lea .Label-.(%rax),%rax # # 8. In order to provide for structured exception handling unified # Win64 prologue copies %rsp value to %rax. For further details # see SEH paragraph at the end. # 9. .init segment is allowed to contain calls to functions only. # a. If function accepts more than 4 arguments *and* >4th argument # is declared as non 64-bit value, do clear its upper part. my $flavour = shift; my $output = shift; if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } open STDOUT,">$output" || die "can't open $output: $!" if (defined($output)); my $gas=1; $gas=0 if ($output =~ /\.asm$/); my $elf=1; $elf=0 if (!$gas); my $win64=0; my $prefix=""; my $decor=".L"; my $masmref=8 + 50727*2**-32; # 8.00.50727 shipped with VS2005 my $masm=0; my $PTR=" PTR"; my $nasmref=2.03; my $nasm=0; if ($flavour eq "mingw64") { $gas=1; $elf=0; $win64=1; $prefix=`echo __USER_LABEL_PREFIX__ | $ENV{CC} -E -P -`; chomp($prefix); } elsif ($flavour eq "macosx") { $gas=1; $elf=0; $prefix="_"; $decor="L\$"; } elsif ($flavour eq "masm") { $gas=0; $elf=0; $masm=$masmref; $win64=1; $decor="\$L\$"; } elsif ($flavour eq "nasm") { $gas=0; $elf=0; $nasm=$nasmref; $win64=1; $decor="\$L\$"; $PTR=""; } elsif (!$gas) { if ($ENV{ASM} =~ m/nasm/ && `nasm -v` =~ m/version ([0-9]+)\.([0-9]+)/i) { $nasm = $1 + $2*0.01; $PTR=""; } elsif (`ml64 2>&1` =~ m/Version ([0-9]+)\.([0-9]+)(\.([0-9]+))?/) { $masm = $1 + $2*2**-16 + $4*2**-32; } die "no assembler found on %PATH" if (!($nasm || $masm)); $win64=1; $elf=0; $decor="\$L\$"; } my $current_segment; my $current_function; my %globals; { package opcode; # pick up opcodes sub re { my $self = shift; # single instance in enough... local *line = shift; undef $ret; if ($line =~ /^([a-z][a-z0-9]*)/i) { $self->{op} = $1; $ret = $self; $line = substr($line,@+[0]); $line =~ s/^\s+//; undef $self->{sz}; if ($self->{op} =~ /^(movz)x?([bw]).*/) { # movz is pain... $self->{op} = $1; $self->{sz} = $2; } elsif ($self->{op} =~ /call|jmp/) { $self->{sz} = ""; } elsif ($self->{op} =~ /^p/ && $' !~ /^(ush|op|insrw)/) { # SSEn $self->{sz} = ""; } elsif ($self->{op} =~ /^v/) { # VEX $self->{sz} = ""; } elsif ($self->{op} =~ /mov[dq]/ && $line =~ /%xmm/) { $self->{sz} = ""; } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) { $self->{op} = $1; $self->{sz} = $2; } } $ret; } sub size { my $self = shift; my $sz = shift; $self->{sz} = $sz if (defined($sz) && !defined($self->{sz})); $self->{sz}; } sub out { my $self = shift; if ($gas) { if ($self->{op} eq "movz") { # movz is pain... sprintf "%s%s%s",$self->{op},$self->{sz},shift; } elsif ($self->{op} =~ /^set/) { "$self->{op}"; } elsif ($self->{op} eq "ret") { my $epilogue = ""; if ($win64 && $current_function->{abi} eq "svr4") { $epilogue = "movq 8(%rsp),%rdi\n\t" . "movq 16(%rsp),%rsi\n\t"; } $epilogue . ".byte 0xf3,0xc3"; } elsif ($self->{op} eq "call" && !$elf && $current_segment eq ".init") { ".p2align\t3\n\t.quad"; } else { "$self->{op}$self->{sz}"; } } else { $self->{op} =~ s/^movz/movzx/; if ($self->{op} eq "ret") { $self->{op} = ""; if ($win64 && $current_function->{abi} eq "svr4") { $self->{op} = "mov rdi,QWORD${PTR}[8+rsp]\t;WIN64 epilogue\n\t". "mov rsi,QWORD${PTR}[16+rsp]\n\t"; } $self->{op} .= "DB\t0F3h,0C3h\t\t;repret"; } elsif ($self->{op} =~ /^(pop|push)f/) { $self->{op} .= $self->{sz}; } elsif ($self->{op} eq "call" && $current_segment eq ".CRT\$XCU") { $self->{op} = "\tDQ"; } $self->{op}; } } sub mnemonic { my $self=shift; my $op=shift; $self->{op}=$op if (defined($op)); $self->{op}; } } { package const; # pick up constants, which start with $ sub re { my $self = shift; # single instance in enough... local *line = shift; undef $ret; if ($line =~ /^\$([^,]+)/) { $self->{value} = $1; $ret = $self; $line = substr($line,@+[0]); $line =~ s/^\s+//; } $ret; } sub out { my $self = shift; if ($gas) { # Solaris /usr/ccs/bin/as can't handle multiplications # in $self->{value} $self->{value} =~ s/(?{value} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg; sprintf "\$%s",$self->{value}; } else { $self->{value} =~ s/(0b[0-1]+)/oct($1)/eig; $self->{value} =~ s/0x([0-9a-f]+)/0$1h/ig if ($masm); sprintf "%s",$self->{value}; } } } { package ea; # pick up effective addresses: expr(%reg,%reg,scale) sub re { my $self = shift; # single instance in enough... local *line = shift; undef $ret; # optional * ---vvv--- appears in indirect jmp/call if ($line =~ /^(\*?)([^\(,]*)\(([%\w,]+)\)/) { $self->{asterisk} = $1; $self->{label} = $2; ($self->{base},$self->{index},$self->{scale})=split(/,/,$3); $self->{scale} = 1 if (!defined($self->{scale})); $ret = $self; $line = substr($line,@+[0]); $line =~ s/^\s+//; if ($win64 && $self->{label} =~ s/\@GOTPCREL//) { die if (opcode->mnemonic() ne "mov"); opcode->mnemonic("lea"); } $self->{base} =~ s/^%//; $self->{index} =~ s/^%// if (defined($self->{index})); } $ret; } sub size {} sub out { my $self = shift; my $sz = shift; $self->{label} =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei; $self->{label} =~ s/\.L/$decor/g; # Silently convert all EAs to 64-bit. This is required for # elder GNU assembler and results in more compact code, # *but* most importantly AES module depends on this feature! $self->{index} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/; $self->{base} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/; # Solaris /usr/ccs/bin/as can't handle multiplications # in $self->{label}, new gas requires sign extension... use integer; $self->{label} =~ s/(?{label} =~ s/\b([0-9]+\s*[\*\/\%]\s*[0-9]+)\b/eval($1)/eg; $self->{label} =~ s/\b([0-9]+)\b/$1<<32>>32/eg; if (!$self->{label} && $self->{index} && $self->{scale}==1 && $self->{base} =~ /(rbp|r13)/) { $self->{base} = $self->{index}; $self->{index} = $1; } if ($gas) { $self->{label} =~ s/^___imp_/__imp__/ if ($flavour eq "mingw64"); if (defined($self->{index})) { sprintf "%s%s(%s,%%%s,%d)",$self->{asterisk}, $self->{label}, $self->{base}?"%$self->{base}":"", $self->{index},$self->{scale}; } else { sprintf "%s%s(%%%s)", $self->{asterisk},$self->{label},$self->{base}; } } else { %szmap = ( b=>"BYTE$PTR", w=>"WORD$PTR", l=>"DWORD$PTR", d=>"DWORD$PTR", q=>"QWORD$PTR", o=>"OWORD$PTR", x=>"XMMWORD$PTR", y=>"YMMWORD$PTR", z=>"ZMMWORD$PTR" ); $self->{label} =~ s/\./\$/g; $self->{label} =~ s/(?{label} = "($self->{label})" if ($self->{label} =~ /[\*\+\-\/]/); ($self->{asterisk}) && ($sz="q") || (opcode->mnemonic() =~ /^v?mov([qd])$/) && ($sz=$1) || (opcode->mnemonic() =~ /^v?pinsr([qdwb])$/) && ($sz=$1) || (opcode->mnemonic() =~ /^vpbroadcast([qdwb])$/) && ($sz=$1) || (opcode->mnemonic() =~ /^vinsert[fi]128$/) && ($sz="x"); if (defined($self->{index})) { sprintf "%s[%s%s*%d%s]",$szmap{$sz}, $self->{label}?"$self->{label}+":"", $self->{index},$self->{scale}, $self->{base}?"+$self->{base}":""; } elsif ($self->{base} eq "rip") { sprintf "%s[%s]",$szmap{$sz},$self->{label}; } else { sprintf "%s[%s%s]",$szmap{$sz}, $self->{label}?"$self->{label}+":"", $self->{base}; } } } } { package register; # pick up registers, which start with %. sub re { my $class = shift; # muliple instances... my $self = {}; local *line = shift; undef $ret; # optional * ---vvv--- appears in indirect jmp/call if ($line =~ /^(\*?)%(\w+)/) { bless $self,$class; $self->{asterisk} = $1; $self->{value} = $2; $ret = $self; $line = substr($line,@+[0]); $line =~ s/^\s+//; } $ret; } sub size { my $self = shift; undef $ret; if ($self->{value} =~ /^r[\d]+b$/i) { $ret="b"; } elsif ($self->{value} =~ /^r[\d]+w$/i) { $ret="w"; } elsif ($self->{value} =~ /^r[\d]+d$/i) { $ret="l"; } elsif ($self->{value} =~ /^r[\w]+$/i) { $ret="q"; } elsif ($self->{value} =~ /^[a-d][hl]$/i){ $ret="b"; } elsif ($self->{value} =~ /^[\w]{2}l$/i) { $ret="b"; } elsif ($self->{value} =~ /^[\w]{2}$/i) { $ret="w"; } elsif ($self->{value} =~ /^e[a-z]{2}$/i){ $ret="l"; } $ret; } sub out { my $self = shift; if ($gas) { sprintf "%s%%%s",$self->{asterisk},$self->{value}; } else { $self->{value}; } } } { package label; # pick up labels, which end with : sub re { my $self = shift; # single instance is enough... local *line = shift; undef $ret; if ($line =~ /(^[\.\w]+)\:/) { $self->{value} = $1; $ret = $self; $line = substr($line,@+[0]); $line =~ s/^\s+//; $self->{value} =~ s/^\.L/$decor/; } $ret; } sub out { my $self = shift; if ($gas) { my $func = ($globals{$self->{value}} or $self->{value}) . ":"; if ($win64 && $current_function->{name} eq $self->{value} && $current_function->{abi} eq "svr4") { $func .= "\n"; $func .= " movq %rdi,8(%rsp)\n"; $func .= " movq %rsi,16(%rsp)\n"; $func .= " movq %rsp,%rax\n"; $func .= "${decor}SEH_begin_$current_function->{name}:\n"; my $narg = $current_function->{narg}; $narg=6 if (!defined($narg)); $func .= " movq %rcx,%rdi\n" if ($narg>0); $func .= " movq %rdx,%rsi\n" if ($narg>1); $func .= " movq %r8,%rdx\n" if ($narg>2); $func .= " movq %r9,%rcx\n" if ($narg>3); $func .= " movq 40(%rsp),%r8\n" if ($narg>4); $func .= " movq 48(%rsp),%r9\n" if ($narg>5); } $func; } elsif ($self->{value} ne "$current_function->{name}") { $self->{value} .= ":" if ($masm && $ret!~m/^\$/); $self->{value} . ":"; } elsif ($win64 && $current_function->{abi} eq "svr4") { my $func = "$current_function->{name}" . ($nasm ? ":" : "\tPROC $current_function->{scope}") . "\n"; $func .= " mov QWORD${PTR}[8+rsp],rdi\t;WIN64 prologue\n"; $func .= " mov QWORD${PTR}[16+rsp],rsi\n"; $func .= " mov rax,rsp\n"; $func .= "${decor}SEH_begin_$current_function->{name}:"; $func .= ":" if ($masm); $func .= "\n"; my $narg = $current_function->{narg}; $narg=6 if (!defined($narg)); $func .= " mov rdi,rcx\n" if ($narg>0); $func .= " mov rsi,rdx\n" if ($narg>1); $func .= " mov rdx,r8\n" if ($narg>2); $func .= " mov rcx,r9\n" if ($narg>3); $func .= " mov r8,QWORD${PTR}[40+rsp]\n" if ($narg>4); $func .= " mov r9,QWORD${PTR}[48+rsp]\n" if ($narg>5); $func .= "\n"; } else { "$current_function->{name}". ($nasm ? ":" : "\tPROC $current_function->{scope}"); } } } { package expr; # pick up expressioins sub re { my $self = shift; # single instance is enough... local *line = shift; undef $ret; if ($line =~ /(^[^,]+)/) { $self->{value} = $1; $ret = $self; $line = substr($line,@+[0]); $line =~ s/^\s+//; $self->{value} =~ s/\@PLT// if (!$elf); $self->{value} =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei; $self->{value} =~ s/\.L/$decor/g; } $ret; } sub out { my $self = shift; if ($nasm && opcode->mnemonic()=~m/^j(?![re]cxz)/) { "NEAR ".$self->{value}; } else { $self->{value}; } } } { package directive; # pick up directives, which start with . sub re { my $self = shift; # single instance is enough... local *line = shift; undef $ret; my $dir; my %opcode = # lea 2f-1f(%rip),%dst; 1: nop; 2: ( "%rax"=>0x01058d48, "%rcx"=>0x010d8d48, "%rdx"=>0x01158d48, "%rbx"=>0x011d8d48, "%rsp"=>0x01258d48, "%rbp"=>0x012d8d48, "%rsi"=>0x01358d48, "%rdi"=>0x013d8d48, "%r8" =>0x01058d4c, "%r9" =>0x010d8d4c, "%r10"=>0x01158d4c, "%r11"=>0x011d8d4c, "%r12"=>0x01258d4c, "%r13"=>0x012d8d4c, "%r14"=>0x01358d4c, "%r15"=>0x013d8d4c ); if ($line =~ /^\s*(\.\w+)/) { $dir = $1; $ret = $self; undef $self->{value}; $line = substr($line,@+[0]); $line =~ s/^\s+//; SWITCH: for ($dir) { /\.picmeup/ && do { if ($line =~ /(%r[\w]+)/i) { $dir="\t.long"; $line=sprintf "0x%x,0x90000000",$opcode{$1}; } last; }; /\.global|\.globl|\.extern/ && do { $globals{$line} = $prefix . $line; $line = $globals{$line} if ($prefix); last; }; /\.type/ && do { ($sym,$type,$narg) = split(',',$line); if ($type eq "\@function") { undef $current_function; $current_function->{name} = $sym; $current_function->{abi} = "svr4"; $current_function->{narg} = $narg; $current_function->{scope} = defined($globals{$sym})?"PUBLIC":"PRIVATE"; } elsif ($type eq "\@abi-omnipotent") { undef $current_function; $current_function->{name} = $sym; $current_function->{scope} = defined($globals{$sym})?"PUBLIC":"PRIVATE"; } $line =~ s/\@abi\-omnipotent/\@function/; $line =~ s/\@function.*/\@function/; last; }; /\.asciz/ && do { if ($line =~ /^"(.*)"$/) { $dir = ".byte"; $line = join(",",unpack("C*",$1),0); } last; }; /\.rva|\.long|\.quad/ && do { $line =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei; $line =~ s/\.L/$decor/g; last; }; } if ($gas) { $self->{value} = $dir . "\t" . $line; if ($dir =~ /\.extern/) { $self->{value} = ""; # swallow extern } elsif (!$elf && $dir =~ /\.type/) { $self->{value} = ""; $self->{value} = ".def\t" . ($globals{$1} or $1) . ";\t" . (defined($globals{$1})?".scl 2;":".scl 3;") . "\t.type 32;\t.endef" if ($win64 && $line =~ /([^,]+),\@function/); } elsif (!$elf && $dir =~ /\.size/) { $self->{value} = ""; if (defined($current_function)) { $self->{value} .= "${decor}SEH_end_$current_function->{name}:" if ($win64 && $current_function->{abi} eq "svr4"); undef $current_function; } } elsif (!$elf && $dir =~ /\.align/) { $self->{value} = ".p2align\t" . (log($line)/log(2)); } elsif ($dir eq ".section") { $current_segment=$line; if (!$elf && $current_segment eq ".init") { if ($flavour eq "macosx") { $self->{value} = ".mod_init_func"; } elsif ($flavour eq "mingw64") { $self->{value} = ".section\t.ctors"; } } } elsif ($dir =~ /\.(text|data)/) { $current_segment=".$1"; } elsif ($dir =~ /\.hidden/) { if ($flavour eq "macosx") { $self->{value} = ".private_extern\t$prefix$line"; } elsif ($flavour eq "mingw64") { $self->{value} = ""; } } elsif ($dir =~ /\.comm/) { $self->{value} = "$dir\t$prefix$line"; $self->{value} =~ s|,([0-9]+),([0-9]+)$|",$1,".log($2)/log(2)|e if ($flavour eq "macosx"); } $line = ""; return $self; } # non-gas case or nasm/masm SWITCH: for ($dir) { /\.text/ && do { my $v=undef; if ($nasm) { $v="section .text code align=64\n"; } else { $v="$current_segment\tENDS\n" if ($current_segment); $current_segment = ".text\$"; $v.="$current_segment\tSEGMENT "; $v.=$masm>=$masmref ? "ALIGN(256)" : "PAGE"; $v.=" 'CODE'"; } $self->{value} = $v; last; }; /\.data/ && do { my $v=undef; if ($nasm) { $v="section .data data align=8\n"; } else { $v="$current_segment\tENDS\n" if ($current_segment); $current_segment = "_DATA"; $v.="$current_segment\tSEGMENT"; } $self->{value} = $v; last; }; /\.section/ && do { my $v=undef; $line =~ s/([^,]*).*/$1/; $line = ".CRT\$XCU" if ($line eq ".init"); if ($nasm) { $v="section $line"; if ($line=~/\.([px])data/) { $v.=" rdata align="; $v.=$1 eq "p"? 4 : 8; } elsif ($line=~/\.CRT\$/i) { $v.=" rdata align=8"; } } else { $v="$current_segment\tENDS\n" if ($current_segment); $v.="$line\tSEGMENT"; if ($line=~/\.([px])data/) { $v.=" READONLY"; $v.=" ALIGN(".($1 eq "p" ? 4 : 8).")" if ($masm>=$masmref); } elsif ($line=~/\.CRT\$/i) { $v.=" READONLY "; $v.=$masm>=$masmref ? "ALIGN(8)" : "DWORD"; } } $current_segment = $line; $self->{value} = $v; last; }; /\.extern/ && do { $self->{value} = "EXTERN\t".$line; $self->{value} .= ":NEAR" if ($masm); last; }; /\.globl|.global/ && do { $self->{value} = $masm?"PUBLIC":"global"; $self->{value} .= "\t".$line; last; }; /\.size/ && do { if (defined($current_function)) { undef $self->{value}; if ($current_function->{abi} eq "svr4") { $self->{value}="${decor}SEH_end_$current_function->{name}:"; $self->{value}.=":\n" if($masm); } $self->{value}.="$current_function->{name}\tENDP" if($masm && $current_function->{name}); undef $current_function; } last; }; /\.align/ && do { $self->{value} = "ALIGN\t".$line; last; }; /\.(value|long|rva|quad)/ && do { my $sz = substr($1,0,1); my @arr = split(/,\s*/,$line); my $last = pop(@arr); my $conv = sub { my $var=shift; $var=~s/^(0b[0-1]+)/oct($1)/eig; $var=~s/^0x([0-9a-f]+)/0$1h/ig if ($masm); if ($sz eq "D" && ($current_segment=~/.[px]data/ || $dir eq ".rva")) { $var=~s/([_a-z\$\@][_a-z0-9\$\@]*)/$nasm?"$1 wrt ..imagebase":"imagerel $1"/egi; } $var; }; $sz =~ tr/bvlrq/BWDDQ/; $self->{value} = "\tD$sz\t"; for (@arr) { $self->{value} .= &$conv($_).","; } $self->{value} .= &$conv($last); last; }; /\.byte/ && do { my @str=split(/,\s*/,$line); map(s/(0b[0-1]+)/oct($1)/eig,@str); map(s/0x([0-9a-f]+)/0$1h/ig,@str) if ($masm); while ($#str>15) { $self->{value}.="DB\t" .join(",",@str[0..15])."\n"; foreach (0..15) { shift @str; } } $self->{value}.="DB\t" .join(",",@str) if (@str); last; }; /\.comm/ && do { my @str=split(/,\s*/,$line); my $v=undef; if ($nasm) { $v.="common $prefix@str[0] @str[1]"; } else { $v="$current_segment\tENDS\n" if ($current_segment); $current_segment = "_DATA"; $v.="$current_segment\tSEGMENT\n"; $v.="COMM @str[0]:DWORD:".@str[1]/4; } $self->{value} = $v; last; }; } $line = ""; } $ret; } sub out { my $self = shift; $self->{value}; } } sub rex { local *opcode=shift; my ($dst,$src,$rex)=@_; $rex|=0x04 if($dst>=8); $rex|=0x01 if($src>=8); push @opcode,($rex|0x40) if ($rex); } # older gas and ml64 don't handle SSE>2 instructions my %regrm = ( "%eax"=>0, "%ecx"=>1, "%edx"=>2, "%ebx"=>3, "%esp"=>4, "%ebp"=>5, "%esi"=>6, "%edi"=>7 ); my $movq = sub { # elderly gas can't handle inter-register movq my $arg = shift; my @opcode=(0x66); if ($arg =~ /%xmm([0-9]+),\s*%r(\w+)/) { my ($src,$dst)=($1,$2); if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } rex(\@opcode,$src,$dst,0x8); push @opcode,0x0f,0x7e; push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M @opcode; } elsif ($arg =~ /%r(\w+),\s*%xmm([0-9]+)/) { my ($src,$dst)=($2,$1); if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } rex(\@opcode,$src,$dst,0x8); push @opcode,0x0f,0x6e; push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M @opcode; } else { (); } }; my $pextrd = sub { if (shift =~ /\$([0-9]+),\s*%xmm([0-9]+),\s*(%\w+)/) { my @opcode=(0x66); $imm=$1; $src=$2; $dst=$3; if ($dst =~ /%r([0-9]+)d/) { $dst = $1; } elsif ($dst =~ /%e/) { $dst = $regrm{$dst}; } rex(\@opcode,$src,$dst); push @opcode,0x0f,0x3a,0x16; push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M push @opcode,$imm; @opcode; } else { (); } }; my $pinsrd = sub { if (shift =~ /\$([0-9]+),\s*(%\w+),\s*%xmm([0-9]+)/) { my @opcode=(0x66); $imm=$1; $src=$2; $dst=$3; if ($src =~ /%r([0-9]+)/) { $src = $1; } elsif ($src =~ /%e/) { $src = $regrm{$src}; } rex(\@opcode,$dst,$src); push @opcode,0x0f,0x3a,0x22; push @opcode,0xc0|(($dst&7)<<3)|($src&7); # ModR/M push @opcode,$imm; @opcode; } else { (); } }; my $pshufb = sub { if (shift =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) { my @opcode=(0x66); rex(\@opcode,$2,$1); push @opcode,0x0f,0x38,0x00; push @opcode,0xc0|($1&7)|(($2&7)<<3); # ModR/M @opcode; } else { (); } }; my $palignr = sub { if (shift =~ /\$([0-9]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { my @opcode=(0x66); rex(\@opcode,$3,$2); push @opcode,0x0f,0x3a,0x0f; push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M push @opcode,$1; @opcode; } else { (); } }; my $pclmulqdq = sub { if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { my @opcode=(0x66); rex(\@opcode,$3,$2); push @opcode,0x0f,0x3a,0x44; push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M my $c=$1; push @opcode,$c=~/^0/?oct($c):$c; @opcode; } else { (); } }; my $rdrand = sub { if (shift =~ /%[er](\w+)/) { my @opcode=(); my $dst=$1; if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } rex(\@opcode,0,$1,8); push @opcode,0x0f,0xc7,0xf0|($dst&7); @opcode; } else { (); } }; my $rdseed = sub { if (shift =~ /%[er](\w+)/) { my @opcode=(); my $dst=$1; if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } rex(\@opcode,0,$1,8); push @opcode,0x0f,0xc7,0xf8|($dst&7); @opcode; } else { (); } }; sub rxb { local *opcode=shift; my ($dst,$src1,$src2,$rxb)=@_; $rxb|=0x7<<5; $rxb&=~(0x04<<5) if($dst>=8); $rxb&=~(0x01<<5) if($src1>=8); $rxb&=~(0x02<<5) if($src2>=8); push @opcode,$rxb; } my $vprotd = sub { if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { my @opcode=(0x8f); rxb(\@opcode,$3,$2,-1,0x08); push @opcode,0x78,0xc2; push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M my $c=$1; push @opcode,$c=~/^0/?oct($c):$c; @opcode; } else { (); } }; my $vprotq = sub { if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { my @opcode=(0x8f); rxb(\@opcode,$3,$2,-1,0x08); push @opcode,0x78,0xc3; push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M my $c=$1; push @opcode,$c=~/^0/?oct($c):$c; @opcode; } else { (); } }; if ($nasm) { print <<___; default rel %define XMMWORD %define YMMWORD %define ZMMWORD ___ } elsif ($masm) { print <<___; OPTION DOTNAME ___ } while($line=<>) { chomp($line); $line =~ s|[#!].*$||; # get rid of asm-style comments... $line =~ s|/\*.*\*/||; # ... and C-style comments... $line =~ s|^\s+||; # ... and skip white spaces in beginning $line =~ s|\s+$||; # ... and at the end undef $label; undef $opcode; undef @args; if ($label=label->re(\$line)) { print $label->out(); } if (directive->re(\$line)) { printf "%s",directive->out(); } elsif ($opcode=opcode->re(\$line)) { my $asm = eval("\$".$opcode->mnemonic()); undef @bytes; if ((ref($asm) eq 'CODE') && scalar(@bytes=&$asm($line))) { print $gas?".byte\t":"DB\t",join(',',@bytes),"\n"; next; } ARGUMENT: while (1) { my $arg; if ($arg=register->re(\$line)) { opcode->size($arg->size()); } elsif ($arg=const->re(\$line)) { } elsif ($arg=ea->re(\$line)) { } elsif ($arg=expr->re(\$line)) { } else { last ARGUMENT; } push @args,$arg; last ARGUMENT if ($line !~ /^,/); $line =~ s/^,\s*//; } # ARGUMENT: if ($#args>=0) { my $insn; my $sz=opcode->size(); if ($gas) { $insn = $opcode->out($#args>=1?$args[$#args]->size():$sz); @args = map($_->out($sz),@args); printf "\t%s\t%s",$insn,join(",",@args); } else { $insn = $opcode->out(); foreach (@args) { my $arg = $_->out(); # $insn.=$sz compensates for movq, pinsrw, ... if ($arg =~ /^xmm[0-9]+$/) { $insn.=$sz; $sz="x" if(!$sz); last; } if ($arg =~ /^ymm[0-9]+$/) { $insn.=$sz; $sz="y" if(!$sz); last; } if ($arg =~ /^zmm[0-9]+$/) { $insn.=$sz; $sz="z" if(!$sz); last; } if ($arg =~ /^mm[0-9]+$/) { $insn.=$sz; $sz="q" if(!$sz); last; } } @args = reverse(@args); undef $sz if ($nasm && $opcode->mnemonic() eq "lea"); printf "\t%s\t%s",$insn,join(",",map($_->out($sz),@args)); } } else { printf "\t%s",$opcode->out(); } } print $line,"\n"; } print "\n$current_segment\tENDS\n" if ($current_segment && $masm); print "END\n" if ($masm); close STDOUT; ################################################# # Cross-reference x86_64 ABI "card" # # Unix Win64 # %rax * * # %rbx - - # %rcx #4 #1 # %rdx #3 #2 # %rsi #2 - # %rdi #1 - # %rbp - - # %rsp - - # %r8 #5 #3 # %r9 #6 #4 # %r10 * * # %r11 * * # %r12 - - # %r13 - - # %r14 - - # %r15 - - # # (*) volatile register # (-) preserved by callee # (#) Nth argument, volatile # # In Unix terms top of stack is argument transfer area for arguments # which could not be accomodated in registers. Or in other words 7th # [integer] argument resides at 8(%rsp) upon function entry point. # 128 bytes above %rsp constitute a "red zone" which is not touched # by signal handlers and can be used as temporal storage without # allocating a frame. # # In Win64 terms N*8 bytes on top of stack is argument transfer area, # which belongs to/can be overwritten by callee. N is the number of # arguments passed to callee, *but* not less than 4! This means that # upon function entry point 5th argument resides at 40(%rsp), as well # as that 32 bytes from 8(%rsp) can always be used as temporal # storage [without allocating a frame]. One can actually argue that # one can assume a "red zone" above stack pointer under Win64 as well. # Point is that at apparently no occasion Windows kernel would alter # the area above user stack pointer in true asynchronous manner... # # All the above means that if assembler programmer adheres to Unix # register and stack layout, but disregards the "red zone" existense, # it's possible to use following prologue and epilogue to "gear" from # Unix to Win64 ABI in leaf functions with not more than 6 arguments. # # omnipotent_function: # ifdef WIN64 # movq %rdi,8(%rsp) # movq %rsi,16(%rsp) # movq %rcx,%rdi ; if 1st argument is actually present # movq %rdx,%rsi ; if 2nd argument is actually ... # movq %r8,%rdx ; if 3rd argument is ... # movq %r9,%rcx ; if 4th argument ... # movq 40(%rsp),%r8 ; if 5th ... # movq 48(%rsp),%r9 ; if 6th ... # endif # ... # ifdef WIN64 # movq 8(%rsp),%rdi # movq 16(%rsp),%rsi # endif # ret # ################################################# # Win64 SEH, Structured Exception Handling. # # Unlike on Unix systems(*) lack of Win64 stack unwinding information # has undesired side-effect at run-time: if an exception is raised in # assembler subroutine such as those in question (basically we're # referring to segmentation violations caused by malformed input # parameters), the application is briskly terminated without invoking # any exception handlers, most notably without generating memory dump # or any user notification whatsoever. This poses a problem. It's # possible to address it by registering custom language-specific # handler that would restore processor context to the state at # subroutine entry point and return "exception is not handled, keep # unwinding" code. Writing such handler can be a challenge... But it's # doable, though requires certain coding convention. Consider following # snippet: # # .type function,@function # function: # movq %rsp,%rax # copy rsp to volatile register # pushq %r15 # save non-volatile registers # pushq %rbx # pushq %rbp # movq %rsp,%r11 # subq %rdi,%r11 # prepare [variable] stack frame # andq $-64,%r11 # movq %rax,0(%r11) # check for exceptions # movq %r11,%rsp # allocate [variable] stack frame # movq %rax,0(%rsp) # save original rsp value # magic_point: # ... # movq 0(%rsp),%rcx # pull original rsp value # movq -24(%rcx),%rbp # restore non-volatile registers # movq -16(%rcx),%rbx # movq -8(%rcx),%r15 # movq %rcx,%rsp # restore original rsp # ret # .size function,.-function # # The key is that up to magic_point copy of original rsp value remains # in chosen volatile register and no non-volatile register, except for # rsp, is modified. While past magic_point rsp remains constant till # the very end of the function. In this case custom language-specific # exception handler would look like this: # # EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, # CONTEXT *context,DISPATCHER_CONTEXT *disp) # { ULONG64 *rsp = (ULONG64 *)context->Rax; # if (context->Rip >= magic_point) # { rsp = ((ULONG64 **)context->Rsp)[0]; # context->Rbp = rsp[-3]; # context->Rbx = rsp[-2]; # context->R15 = rsp[-1]; # } # context->Rsp = (ULONG64)rsp; # context->Rdi = rsp[1]; # context->Rsi = rsp[2]; # # memcpy (disp->ContextRecord,context,sizeof(CONTEXT)); # RtlVirtualUnwind(UNW_FLAG_NHANDLER,disp->ImageBase, # dips->ControlPc,disp->FunctionEntry,disp->ContextRecord, # &disp->HandlerData,&disp->EstablisherFrame,NULL); # return ExceptionContinueSearch; # } # # It's appropriate to implement this handler in assembler, directly in # function's module. In order to do that one has to know members' # offsets in CONTEXT and DISPATCHER_CONTEXT structures and some constant # values. Here they are: # # CONTEXT.Rax 120 # CONTEXT.Rcx 128 # CONTEXT.Rdx 136 # CONTEXT.Rbx 144 # CONTEXT.Rsp 152 # CONTEXT.Rbp 160 # CONTEXT.Rsi 168 # CONTEXT.Rdi 176 # CONTEXT.R8 184 # CONTEXT.R9 192 # CONTEXT.R10 200 # CONTEXT.R11 208 # CONTEXT.R12 216 # CONTEXT.R13 224 # CONTEXT.R14 232 # CONTEXT.R15 240 # CONTEXT.Rip 248 # CONTEXT.Xmm6 512 # sizeof(CONTEXT) 1232 # DISPATCHER_CONTEXT.ControlPc 0 # DISPATCHER_CONTEXT.ImageBase 8 # DISPATCHER_CONTEXT.FunctionEntry 16 # DISPATCHER_CONTEXT.EstablisherFrame 24 # DISPATCHER_CONTEXT.TargetIp 32 # DISPATCHER_CONTEXT.ContextRecord 40 # DISPATCHER_CONTEXT.LanguageHandler 48 # DISPATCHER_CONTEXT.HandlerData 56 # UNW_FLAG_NHANDLER 0 # ExceptionContinueSearch 1 # # In order to tie the handler to the function one has to compose # couple of structures: one for .xdata segment and one for .pdata. # # UNWIND_INFO structure for .xdata segment would be # # function_unwind_info: # .byte 9,0,0,0 # .rva handler # # This structure designates exception handler for a function with # zero-length prologue, no stack frame or frame register. # # To facilitate composing of .pdata structures, auto-generated "gear" # prologue copies rsp value to rax and denotes next instruction with # .LSEH_begin_{function_name} label. This essentially defines the SEH # styling rule mentioned in the beginning. Position of this label is # chosen in such manner that possible exceptions raised in the "gear" # prologue would be accounted to caller and unwound from latter's frame. # End of function is marked with respective .LSEH_end_{function_name} # label. To summarize, .pdata segment would contain # # .rva .LSEH_begin_function # .rva .LSEH_end_function # .rva function_unwind_info # # Reference to function_unwind_info from .xdata segment is the anchor. # In case you wonder why references are 32-bit .rvas and not 64-bit # .quads. References put into these two segments are required to be # *relative* to the base address of the current binary module, a.k.a. # image base. No Win64 module, be it .exe or .dll, can be larger than # 2GB and thus such relative references can be and are accommodated in # 32 bits. # # Having reviewed the example function code, one can argue that "movq # %rsp,%rax" above is redundant. It is not! Keep in mind that on Unix # rax would contain an undefined value. If this "offends" you, use # another register and refrain from modifying rax till magic_point is # reached, i.e. as if it was a non-volatile register. If more registers # are required prior [variable] frame setup is completed, note that # nobody says that you can have only one "magic point." You can # "liberate" non-volatile registers by denoting last stack off-load # instruction and reflecting it in finer grade unwind logic in handler. # After all, isn't it why it's called *language-specific* handler... # # Attentive reader can notice that exceptions would be mishandled in # auto-generated "gear" epilogue. Well, exception effectively can't # occur there, because if memory area used by it was subject to # segmentation violation, then it would be raised upon call to the # function (and as already mentioned be accounted to caller, which is # not a problem). If you're still not comfortable, then define tail # "magic point" just prior ret instruction and have handler treat it... # # (*) Note that we're talking about run-time, not debug-time. Lack of # unwind information makes debugging hard on both Windows and # Unix. "Unlike" referes to the fact that on Unix signal handler # will always be invoked, core dumped and appropriate exit code # returned to parent (for user notification). libntru-0.5/tests/000077500000000000000000000000001271556312200142035ustar00rootroot00000000000000libntru-0.5/tests/test.c000066400000000000000000000010021271556312200153170ustar00rootroot00000000000000#include #include #include "test_poly.h" #include "test_ntru.h" #include "test_idxgen.h" #include "test_bitstring.h" #include "test_key.h" #include "test_hash.h" int main(int argc, char** argv) { printf("Running tests...\n"); uint8_t pass = test_poly(); pass &= test_ntru(); pass &= test_idxgen(); pass &= test_bitstring(); pass &= test_key(); pass &= test_hash(); printf("%s\n", pass?"All tests passed":"One or more tests failed"); return pass ? 0 : 1; } libntru-0.5/tests/test_bitstring.c000066400000000000000000000061671271556312200174250ustar00rootroot00000000000000#include #include "test_util.h" #include "bitstring.h" uint8_t test_append() { uint8_t c[5] = {0}; NtruBitStr b0 = {{0}, 0, 0};; c[0] = 78; ntru_append(&b0, (uint8_t*)&c, 1); uint8_t exp0[] = {78}; uint8_t valid = equals_arr(b0.buf, exp0, 1); NtruBitStr b1 = {{0}, 0, 0};; c[1] = 251; ntru_append(&b1, (uint8_t*)&c, 2); uint8_t exp1[] = {78, 251}; valid &= equals_arr(b1.buf, exp1, 2); NtruBitStr b2 = {{0}, 0, 0};; c[2] = 127; ntru_append(&b2, (uint8_t*)&c, 3); uint8_t exp2[] = {78, 251, 127}; valid &= equals_arr(b2.buf, exp2, 3); NtruBitStr b3 = {{0}, 0, 0};; c[3] = 0; ntru_append(&b3, (uint8_t*)&c, 4); uint8_t exp3[] = {78, 251, 127, 0}; valid &= equals_arr(b3.buf, exp3, 4); NtruBitStr b4 = {{0}, 0, 0};; c[4] = 100; ntru_append(&b4, (uint8_t*)&c, 5); uint8_t exp4[] = {78, 251, 127, 0, 100}; valid &= equals_arr(b4.buf, exp4, 5); return valid; } uint8_t test_trailing() { NtruBitStr b0 = {{0}, 0, 0}; uint8_t c0[] = {78}; ntru_append(&b0, (uint8_t*)&c0, 1); NtruBitStr b0_trail; ntru_trailing(&b0, 3, &b0_trail); uint8_t exp0[] = {6}; uint8_t valid = equals_arr(b0_trail.buf, exp0, 1); NtruBitStr b1 = {{0}, 0, 0}; uint8_t c1[] = {78, 251}; ntru_append(&b1, (uint8_t*)&c1, 2); NtruBitStr b1_trail; ntru_trailing(&b1, 9, &b1_trail); uint8_t exp1[] = {78, 1}; valid &= equals_arr(b1_trail.buf, exp1, 2); uint8_t c2[] = {100}; ntru_append(&b1_trail, (uint8_t*)&c2, 1); uint8_t exp2[] = {78, 201}; valid &= equals_arr((uint8_t*)&b1_trail.buf, (uint8_t*)&exp2, 2); NtruBitStr b2_trail; ntru_trailing(&b1_trail, 13, &b2_trail); uint8_t exp3[] = {78, 9}; valid &= equals_arr(b2_trail.buf, exp3, 2); ntru_trailing(&b1_trail, 11, &b2_trail); uint8_t exp4[] = {78, 1}; valid &= equals_arr(b2_trail.buf, exp4, 2); uint8_t c3[] = {100}; ntru_append(&b2_trail, (uint8_t*)&c3, 1); uint8_t exp5[] = {78, 33, 3}; valid &= equals_arr(b2_trail.buf, exp5, 3); ntru_trailing(&b2_trail, 16, &b1_trail); uint8_t exp6[] = {78, 33}; valid &= equals_arr(b1_trail.buf, exp6, 2); return valid; } uint8_t test_leading() { uint8_t valid = 1; NtruBitStr b0 = {{0}, 0, 0}; uint8_t c0[] = {78, 42}; ntru_append(&b0, (uint8_t*)&c0, 2); valid &= ntru_leading(&b0, 3) == 1; valid &= ntru_leading(&b0, 9) == 84; valid &= ntru_leading(&b0, 11) == 338; NtruBitStr b1; ntru_trailing(&b0, 11, &b1); uint8_t exp0[] = {78, 2}; valid &= equals_arr(b1.buf, exp0, 2); valid &= ntru_leading(&b1, 11) == 590; valid &= ntru_leading(&b1, 5) == 9; uint8_t c1[] = {115}; ntru_append(&b1, (uint8_t*)&c1, 1); valid &= ntru_leading(&b1, 9) == 230; valid &= ntru_leading(&b1, 11) == 922; uint8_t c2[] = {220}; ntru_append(&b1, (uint8_t*)&c2, 1); valid &= ntru_leading(&b1, 6) == 55; return valid; } uint8_t test_bitstring() { uint8_t valid = test_append(); valid &= test_trailing(); valid &= test_leading(); print_result("test_bitstring", valid); return valid; } libntru-0.5/tests/test_bitstring.h000066400000000000000000000001521271556312200174160ustar00rootroot00000000000000#ifndef TEST_BITSTRING_H #define TEST_BITSTRING_H #include uint8_t test_bitstring(); #endif libntru-0.5/tests/test_hash.c000066400000000000000000000126651271556312200163430ustar00rootroot00000000000000#include #include "test_util.h" #include "hash.h" #include "encparams.h" #include "rand.h" uint8_t test_hash() { char* test_string_char = "The quick brown fox jumps over the lazy dog"; size_t len = strlen(test_string_char); uint8_t test_string[len]; str_to_uint8(test_string_char, test_string); /* test ntru_sha1() */ uint8_t sha1[] = { 0x2f, 0xd4, 0xe1, 0xc6, 0x7a, 0x2d, 0x28, 0xfc, 0xed, 0x84, 0x9e, 0xe1, 0xbb, 0x76, 0xe7, 0x39, 0x1b, 0x93, 0xeb, 0x12 }; uint8_t hash1[20]; ntru_sha1(test_string, len, (uint8_t*)&hash1); int valid1 = memcmp((uint8_t*)hash1, (uint8_t*)sha1, 20) == 0; /* test ntru_sha1_4way() */ uint16_t i; NtruRandContext rand_ctx; NtruRandGen rng = NTRU_RNG_DEFAULT; valid1 &= ntru_rand_init(&rand_ctx, &rng) == NTRU_SUCCESS; for (i=0; i<100; i++) { uint16_t inp_len = i; uint8_t test_a[inp_len]; uint8_t test_b[inp_len]; uint8_t test_c[inp_len]; uint8_t test_d[inp_len]; uint8_t *hash_inp[4]; hash_inp[0] = test_a; hash_inp[1] = test_b; hash_inp[2] = test_c; hash_inp[3] = test_d; uint8_t j; for (j=0; j<4; j++) valid1 &= ntru_rand_generate(hash_inp[j], inp_len, &rand_ctx) == NTRU_SUCCESS; uint8_t H4_arr[4][20]; uint8_t *H4[4]; for (j=0; j<4; j++) H4[j] = H4_arr[j]; ntru_sha1_4way(hash_inp, inp_len, H4); for (j=0; j<4; j++) { uint8_t H1[20]; ntru_sha1(hash_inp[j], inp_len, H1); valid1 &= memcmp(H4[j], H1, 20) == 0; } } /* test ntru_sha1_8way() */ valid1 &= ntru_rand_init(&rand_ctx, &rng) == NTRU_SUCCESS; for (i=0; i<100; i++) { uint16_t inp_len = i; uint8_t test_a[inp_len]; uint8_t test_b[inp_len]; uint8_t test_c[inp_len]; uint8_t test_d[inp_len]; uint8_t test_e[inp_len]; uint8_t test_f[inp_len]; uint8_t test_g[inp_len]; uint8_t test_h[inp_len]; uint8_t *hash_inp[8]; hash_inp[0] = test_a; hash_inp[1] = test_b; hash_inp[2] = test_c; hash_inp[3] = test_d; hash_inp[4] = test_e; hash_inp[5] = test_f; hash_inp[6] = test_g; hash_inp[7] = test_h; uint8_t j; for (j=0; j<8; j++) valid1 &= ntru_rand_generate(hash_inp[j], inp_len, &rand_ctx) == NTRU_SUCCESS; uint8_t H8_arr[8][20]; uint8_t *H8[8]; for (j=0; j<8; j++) H8[j] = H8_arr[j]; ntru_sha1_8way(hash_inp, inp_len, H8); for (j=0; j<8; j++) { uint8_t H1[20]; ntru_sha1(hash_inp[j], inp_len, H1); valid1 &= memcmp(H8[j], H1, 20) == 0; } } valid1 &= ntru_rand_release(&rand_ctx) == NTRU_SUCCESS; /* test ntru_sha256() */ uint8_t sha256[] = { 0xd7, 0xa8, 0xfb, 0xb3, 0x07, 0xd7, 0x80, 0x94, 0x69, 0xca, 0x9a, 0xbc, 0xb0, 0x08, 0x2e, 0x4f, 0x8d, 0x56, 0x51, 0xe4, 0x6d, 0x3c, 0xdb, 0x76, 0x2d, 0x02, 0xd0, 0xbf, 0x37, 0xc9, 0xe5, 0x92 }; uint8_t hash256[32]; ntru_sha256(test_string, len, (uint8_t*)&hash256); int valid256 = memcmp((uint8_t*)&hash256, (uint8_t*)&sha256, 32) == 0; /* test ntru_sha256_4way() */ valid256 &= ntru_rand_init(&rand_ctx, &rng) == NTRU_SUCCESS; for (i=0; i<100; i++) { uint16_t inp_len = i; uint8_t test_a[inp_len]; uint8_t test_b[inp_len]; uint8_t test_c[inp_len]; uint8_t test_d[inp_len]; uint8_t *hash_inp[4]; hash_inp[0] = test_a; hash_inp[1] = test_b; hash_inp[2] = test_c; hash_inp[3] = test_d; uint8_t j; for (j=0; j<4; j++) valid256 &= ntru_rand_generate(hash_inp[j], inp_len, &rand_ctx) == NTRU_SUCCESS; uint8_t H4_arr[4][32]; uint8_t *H4[4]; for (j=0; j<4; j++) H4[j] = H4_arr[j]; ntru_sha256_4way(hash_inp, inp_len, H4); for (j=0; j<4; j++) { uint8_t H1[32]; ntru_sha256(hash_inp[j], inp_len, H1); valid256 &= memcmp(H4[j], H1, 32) == 0; } } /* test ntru_sha256_8way() */ valid256 &= ntru_rand_init(&rand_ctx, &rng) == NTRU_SUCCESS; for (i=0; i<100; i++) { uint16_t inp_len = i; uint8_t test_a[inp_len]; uint8_t test_b[inp_len]; uint8_t test_c[inp_len]; uint8_t test_d[inp_len]; uint8_t test_e[inp_len]; uint8_t test_f[inp_len]; uint8_t test_g[inp_len]; uint8_t test_h[inp_len]; uint8_t *hash_inp[8]; hash_inp[0] = test_a; hash_inp[1] = test_b; hash_inp[2] = test_c; hash_inp[3] = test_d; hash_inp[4] = test_e; hash_inp[5] = test_f; hash_inp[6] = test_g; hash_inp[7] = test_h; uint8_t j; for (j=0; j<8; j++) valid256 &= ntru_rand_generate(hash_inp[j], inp_len, &rand_ctx) == NTRU_SUCCESS; uint8_t H8_arr[8][32]; uint8_t *H8[8]; for (j=0; j<8; j++) H8[j] = H8_arr[j]; ntru_sha256_8way(hash_inp, inp_len, H8); for (j=0; j<8; j++) { uint8_t H1[32]; ntru_sha256(hash_inp[j], inp_len, H1); valid256 &= memcmp(H8[j], H1, 32) == 0; } } valid256 &= ntru_rand_release(&rand_ctx) == NTRU_SUCCESS; uint8_t valid = valid1 && valid256; print_result("test_hash", valid); return valid; } libntru-0.5/tests/test_hash.h000066400000000000000000000001331271556312200163330ustar00rootroot00000000000000#ifndef TEST_HASH_H #define TEST_HASH_H #include uint8_t test_hash(); #endif libntru-0.5/tests/test_idxgen.c000066400000000000000000000034451271556312200166720ustar00rootroot00000000000000#include #include #include #include #include "encparams.h" #include "idxgen.h" #include "test_util.h" /** number of calls to IGF */ #define NUM_ITER 100000 /* tests the IGF-2 implementation */ uint8_t test_idxgen() { /* seed random number generator */ time_t rs; time(&rs); srand(rs); /* generate a random IGF seed array */ uint32_t i; uint8_t seed[100]; for (i=0; i uint8_t test_idxgen(); #endif libntru-0.5/tests/test_key.c000066400000000000000000000054311271556312200162010ustar00rootroot00000000000000#include "test_key.h" #include "test_util.h" #include "ntru.h" #include "poly.h" uint8_t test_export_import() { #ifndef NTRU_AVOID_HAMMING_WT_PATENT NtruEncParams param_arr[] = {EES439EP1, EES1087EP2}; #else NtruEncParams param_arr[] = {EES1087EP2}; #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ uint8_t valid = 1; uint8_t i; for (i=0; i uint8_t test_key(); #endif libntru-0.5/tests/test_ntru.c000066400000000000000000000373001271556312200164010ustar00rootroot00000000000000#include #ifdef WIN32 #include #else #include #endif #include "test_ntru.h" #include "test_util.h" #include "ntru.h" #include "poly.h" void encrypt_poly(NtruIntPoly *m, NtruTernPoly *r, NtruIntPoly *h, NtruIntPoly *e, uint16_t q) { ntru_mult_tern(h, r, e, q); ntru_add(e, m); ntru_mod_mask(e, q-1); } void decrypt_poly(NtruIntPoly *e, NtruEncPrivKey *priv, NtruIntPoly *d, uint16_t modulus) { #ifndef NTRU_AVOID_HAMMING_WT_PATENT if (priv->t.prod_flag) ntru_mult_prod(e, &priv->t.poly.prod, d, modulus-1); else #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ ntru_mult_tern(e, &priv->t.poly.tern, d, modulus-1); ntru_mod_mask(d, modulus-1); ntru_mult_fac(d, 3); ntru_add(d, e); ntru_mod_center(d, modulus); ntru_mod3(d); uint16_t i; for (i=0; iN; i++) if (d->coeffs[i] == 2) d->coeffs[i] = -1; } /** Returns 0 on error, 1 on success */ uint8_t gen_key_pair(char *seed, NtruEncParams *params, NtruEncKeyPair *kp) { uint16_t seed_len = strlen(seed); uint8_t seed_uint8[seed_len]; str_to_uint8(seed, seed_uint8); NtruRandContext rand_ctx; NtruRandGen rng = NTRU_RNG_CTR_DRBG; ntru_rand_init_det(&rand_ctx, &rng, seed_uint8, seed_len); uint8_t result = 1; result &= ntru_gen_key_pair(params, kp, &rand_ctx) == NTRU_SUCCESS; result &= ntru_rand_release(&rand_ctx) == NTRU_SUCCESS; return result; } uint8_t test_keygen() { NtruEncParams param_arr[] = ALL_PARAM_SETS; uint8_t valid = 1; uint8_t i; NtruRandGen rng = NTRU_RNG_DEFAULT; for (i=0; i uint8_t test_ntru(); #endif libntru-0.5/tests/test_poly.c000066400000000000000000000213721271556312200163760ustar00rootroot00000000000000#include #include #include "poly.h" #include "ntru.h" #include "test_util.h" #include "test_poly.h" /** * @brief Multiplication of two general polynomials * * Multiplies a NtruIntPoly by another. The number of coefficients * must be the same for both polynomials. * * @param a a general polynomial * @param b a general polynomial * @param c output parameter; a pointer to store the new polynomial * @return 0 if the number of coefficients differ, 1 otherwise */ uint8_t ntru_mult_int_nomod(NtruIntPoly *a, NtruIntPoly *b, NtruIntPoly *c) { uint16_t N = a->N; if (N != b->N) return 0; c->N = N; uint16_t i, k; for (k=0; kcoeffs[i] * a->coeffs[(N+k-i)%N]; c->coeffs[k] = ck; } return 1; } /** tests ntru_mult_int() */ uint8_t test_mult_int() { uint8_t valid = 1; /* multiplication modulo q */ NtruIntPoly a1 = {11, {-1, 1, 1, 0, -1, 0, 1, 0, 0, 1, -1}}; NtruIntPoly b1 = {11, {14, 11, 26, 24, 14, 16, 30, 7, 25, 6, 19}}; NtruIntPoly c1; ntru_mult_int(&a1, &b1, &c1, 32-1); NtruIntPoly c1_exp = {11, {3, 25, -10, 21, 10, 7, 6, 7, 5, 29, -7}}; valid &= equals_int_mod(&c1_exp, &c1, 32); /* ntru_mult_mod should give the same result as ntru_mult_int_nomod followed by ntru_mod_mask */ NtruIntPoly a2 = {5, {1278, 1451, 850, 1071, 942}}; NtruIntPoly b2 = {5, {571, 52, 1096, 1800, 662}}; NtruIntPoly c2, c2_exp; valid &= ntru_mult_int(&a2, &b2, &c2, 2048-1); valid &= ntru_mult_int_nomod(&a2, &b2, &c2_exp); ntru_mod_mask(&c2_exp, 2048-1); valid &= equals_int_mod(&c2_exp, &c2, 2048); NtruRandGen rng = NTRU_RNG_DEFAULT; NtruRandContext rand_ctx; valid &= ntru_rand_init(&rand_ctx, &rng) == NTRU_SUCCESS; int i; for (i=0; i<10; i++) { uint16_t N; valid &= rand_ctx.rand_gen->generate((uint8_t*)&N, sizeof N, &rand_ctx); N = 100 + (N%(NTRU_MAX_DEGREE-100)); NtruIntPoly a3, b3, c3, c3_exp; valid &= rand_int(N, 11, &a3, &rand_ctx); valid &= rand_int(N, 11, &b3, &rand_ctx); valid &= ntru_mult_int_nomod(&a3, &b3, &c3_exp); ntru_mod_mask(&c3_exp, 2048-1); valid &= ntru_mult_int_16(&a3, &b3, &c3, 2048-1); valid &= equals_int_mod(&c3_exp, &c3, 2048); #ifndef __ARMEL__ valid &= ntru_mult_int_64(&a3, &b3, &c3, 2048-1); valid &= equals_int_mod(&c3_exp, &c3, 2048); #endif } valid &= ntru_rand_release(&rand_ctx) == NTRU_SUCCESS; print_result("test_mult_int", valid); return valid; } /* tests ntru_mult_tern() */ uint8_t test_mult_tern() { NtruRandGen rng = NTRU_RNG_DEFAULT; NtruRandContext rand_ctx; uint8_t valid = ntru_rand_init(&rand_ctx, &rng) == NTRU_SUCCESS; NtruTernPoly a; valid &= ntru_rand_tern(11, 3, 3, &a, &rand_ctx); NtruIntPoly b; valid &= rand_int(11, 5, &b, &rand_ctx); NtruIntPoly a_int; ntru_tern_to_int(&a, &a_int); NtruIntPoly c_int; ntru_mult_int(&a_int, &b, &c_int, 32-1); NtruIntPoly c_tern; ntru_mult_tern_32(&b, &a, &c_tern, 32-1); valid &= equals_int_mod(&c_tern, &c_int, 32); #ifndef __ARMEL__ ntru_mult_tern_64(&b, &a, &c_tern, 32-1); valid &= equals_int_mod(&c_tern, &c_int, 32); #endif #ifdef __SSSE3__ ntru_mult_tern_sse(&b, &a, &c_tern, 32-1); valid &= equals_int_mod(&c_tern, &c_int, 32); #endif int i; for (i=0; i<10; i++) { uint16_t N; valid &= rand_ctx.rand_gen->generate((uint8_t*)&N, sizeof N, &rand_ctx); N = 100 + (N%(NTRU_MAX_DEGREE-100)); uint16_t num_ones; valid &= rand_ctx.rand_gen->generate((uint8_t*)&num_ones, sizeof num_ones, &rand_ctx); num_ones %= N/2; num_ones %= NTRU_MAX_ONES; uint16_t num_neg_ones; valid &= rand_ctx.rand_gen->generate((uint8_t*)&num_neg_ones, sizeof num_neg_ones, &rand_ctx); num_neg_ones %= N/2; num_neg_ones %= NTRU_MAX_ONES; valid &= ntru_rand_tern(N, num_ones, num_neg_ones, &a, &rand_ctx); valid &= rand_int(N, 11, &b, &rand_ctx); ntru_tern_to_int(&a, &a_int); ntru_mult_int_nomod(&a_int, &b, &c_int); ntru_mult_tern_32(&b, &a, &c_tern, 2048-1); valid &= equals_int_mod(&c_tern, &c_int, 2048); #ifndef __ARMEL__ ntru_mult_tern_64(&b, &a, &c_tern, 2048-1); valid &= equals_int_mod(&c_tern, &c_int, 2048); #endif #ifdef __SSSE3__ ntru_mult_tern_sse(&b, &a, &c_tern, 2048-1); valid &= equals_int_mod(&c_tern, &c_int, 2048); #endif } valid &= ntru_rand_release(&rand_ctx) == NTRU_SUCCESS; print_result("test_mult_tern", valid); return valid; } #ifndef NTRU_AVOID_HAMMING_WT_PATENT /* tests ntru_mult_prod() */ uint8_t test_mult_prod() { uint8_t valid = 1; uint16_t i; NtruRandGen rng = NTRU_RNG_DEFAULT; NtruRandContext rand_ctx; valid &= ntru_rand_init(&rand_ctx, &rng) == NTRU_SUCCESS; uint16_t log_modulus = 11; uint16_t modulus = 1 << log_modulus; for (i=0; i<10; i++) { NtruProdPoly a; valid &= ntru_rand_prod(853, 8, 8, 8, 9, &a, &rand_ctx); NtruIntPoly b; valid &= rand_int(853, 1< uint8_t test_poly(); #endif libntru-0.5/tests/test_util.c000066400000000000000000000127721271556312200163740ustar00rootroot00000000000000#include #include #include #include "test_util.h" #include "poly.h" /** For equals_hash_func() */ #define HASH_INPUT_LEN 100 uint8_t equals_int(NtruIntPoly *a, NtruIntPoly *b) { if (a->N != b->N) return 0; uint16_t i; for (i=0; iN; i++) if (a->coeffs[i] != b->coeffs[i]) return 0; return 1; } uint8_t equals_int_mod(NtruIntPoly *a, NtruIntPoly *b, uint16_t modulus) { if (a->N != b->N) return 0; uint16_t i; for (i=0; iN; i++) if ((a->coeffs[i]-b->coeffs[i]) % modulus) return 0; return 1; } uint8_t equals_tern(NtruTernPoly *a, NtruTernPoly *b) { if (a->N != b->N) return 0; if (a->num_ones != b->num_ones) return 0; if (a->num_neg_ones != b->num_neg_ones) return 0; uint16_t i; for (i=0; inum_ones; i++) if (a->ones[i] != b->ones[i]) return 0; for (i=0; inum_neg_ones; i++) if (a->neg_ones[i] != b->neg_ones[i]) return 0; return 1; } #ifndef NTRU_AVOID_HAMMING_WT_PATENT uint8_t equals_prod(NtruProdPoly *a, NtruProdPoly *b) { return a->N==b->N && equals_tern(&a->f1, &b->f1) && equals_tern(&a->f2, &b->f2) && equals_tern(&a->f3, &b->f3); } #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ uint8_t equals_key_pair(NtruEncKeyPair *kp1, NtruEncKeyPair *kp2) { if (kp1->priv.q != kp2->priv.q) return 0; #ifndef NTRU_AVOID_HAMMING_WT_PATENT if (kp1->priv.t.prod_flag && !equals_prod(&kp1->priv.t.poly.prod, &kp2->priv.t.poly.prod)) return 0; #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ if (!kp1->priv.t.prod_flag && !equals_tern(&kp1->priv.t.poly.tern, &kp2->priv.t.poly.tern)) return 0; if (kp1->pub.q != kp2->pub.q) return 0; if (!equals_int(&kp1->pub.h, &kp2->pub.h)) return 0; return 1; } uint8_t equals_arr(uint8_t *arr1, uint8_t *arr2, uint16_t len) { uint16_t i; for (i=0; iname, params2->name) == 0; equal &= params1->N == params2->N; equal &= params1->q == params2->q; equal &= params1->prod_flag == params2->prod_flag; equal &= params1->df1 == params2->df1; if (params1->prod_flag) { equal &= params1->df2 == params2->df2; equal &= params1->df3 == params2->df3; } equal &= params1->dm0 == params2->dm0; equal &= params1->db == params2->db; equal &= params1->c == params2->c; equal &= params1->min_calls_r == params2->min_calls_r; equal &= params1->min_calls_mask == params2->min_calls_mask; equal &= params1->hash_seed == params2->hash_seed; equal &= memcmp(params1->oid, params2->oid, sizeof(params1->oid)) == 0; equal &= equals_hash_func(params1->hash, params2->hash, params1->hlen); equal &= params1->hlen == params2->hlen; equal &= params1->pklen == params2->pklen; return equal; } uint8_t rand_int(uint16_t N, uint16_t pow2q, NtruIntPoly *poly, NtruRandContext *rand_ctx) { uint16_t rand_data[N]; if (!rand_ctx->rand_gen->generate((uint8_t*)rand_data, N*2, rand_ctx)) return 0; poly->N = N; uint16_t shift = 16 - pow2q; while ((int16_t)--N >= 0) poly->coeffs[N] = rand_data[N] >> shift; return 1; } void ntru_tern_to_int(NtruTernPoly *a, NtruIntPoly *b) { memset(&b->coeffs, 0, a->N * sizeof b->coeffs[0]); uint16_t i; for (i=0; inum_ones; i++) b->coeffs[a->ones[i]] = 1; for (i=0; inum_neg_ones; i++) b->coeffs[a->neg_ones[i]] = -1; b->N = a->N; } void ntru_add_tern(NtruIntPoly *a, NtruTernPoly *b) { uint16_t i; for (i=0; inum_ones; i++) a->coeffs[b->ones[i]]++; for (i=0; inum_neg_ones; i++) a->coeffs[b->neg_ones[i]]--; } #ifndef NTRU_AVOID_HAMMING_WT_PATENT void ntru_prod_to_int(NtruProdPoly *a, NtruIntPoly *b, uint16_t modulus) { memset(&b->coeffs, 0, a->N * sizeof b->coeffs[0]); b->N = a->N; uint16_t mod_mask = modulus - 1; NtruIntPoly c; ntru_tern_to_int(&a->f1, &c); ntru_mult_tern(&c, &a->f2, b, mod_mask); ntru_add_tern(b, &a->f3); } #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ void ntru_priv_to_int(NtruPrivPoly *a, NtruIntPoly *b, uint16_t modulus) { #ifndef NTRU_AVOID_HAMMING_WT_PATENT if (a->prod_flag) ntru_prod_to_int(&a->poly.prod, b, modulus); else #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ ntru_tern_to_int(&a->poly.tern, b); } void str_to_uint8(char *in, uint8_t *out) { size_t len = strlen(in); size_t i; for (i=0; i #include "ntru.h" uint8_t equals_int(NtruIntPoly *a, NtruIntPoly *b); uint8_t equals_int_mod(NtruIntPoly *a, NtruIntPoly *b, uint16_t modulus); uint8_t equals_key_pair(NtruEncKeyPair *kp1, NtruEncKeyPair *kp2); uint8_t equals_arr(uint8_t *arr1, uint8_t *arr2, uint16_t len); uint8_t equals_params(NtruEncParams *params1, NtruEncParams *params2); uint8_t rand_int(uint16_t N, uint16_t pow2q, NtruIntPoly *poly, NtruRandContext *rand_ctx); /** * @brief Ternary to general integer polynomial * * Converts a NtruTernPoly to an equivalent NtruIntPoly. * * @param a a ternary polynomial * @param b output parameter; a pointer to store the new polynomial */ void ntru_tern_to_int(NtruTernPoly *a, NtruIntPoly *b); #ifndef NTRU_AVOID_HAMMING_WT_PATENT /** * @brief Product-form to general polynomial * * Converts a NtruProdPoly to an equivalent NtruIntPoly. * * @param a a product-form polynomial * @param b output parameter; a pointer to store the new polynomial * @param modulus the modulus; must be a power of two */ void ntru_prod_to_int(NtruProdPoly *a, NtruIntPoly *b, uint16_t modulus); #endif /* NTRU_AVOID_HAMMING_WT_PATENT */ /** * @brief Private polynomial to general polynomial * * Converts a NtruPrivPoly (i.e. a NtruTernPoly or NtruProdPoly) to an * equivalent NtruIntPoly. * * @param a a "private" polynomial * @param b output parameter; a pointer to store the new polynomial * @param modulus the modulus; must be a power of two */ void ntru_priv_to_int(NtruPrivPoly *a, NtruIntPoly *b, uint16_t modulus); /** * @brief string to uint8_t array * * Converts a char array to a uint8_t array. If char is longer than uint8_t, * only the least significant 8 bits of each element are copied. * * @param in the NtruEncrypt parameters to use * @param out pointer to write the key pair to (output parameter) */ void str_to_uint8(char *in, uint8_t *out); void print_result(char *test_name, uint8_t valid); #endif