diff options
| author | cinap_lenrek <cinap_lenrek@felloff.net> | 2017-11-20 00:10:35 +0100 |
|---|---|---|
| committer | cinap_lenrek <cinap_lenrek@felloff.net> | 2017-11-20 00:10:35 +0100 |
| commit | 077e719dfbf9bf2582bed80026251cc0d108c16e (patch) | |
| tree | 5e8fd7d6297f5d007ea21c85f8346eae0717ed7d | |
| parent | 1eb373945455f1ba03fa1b221529d74ca2a778ad (diff) | |
| download | plan9front-077e719dfbf9bf2582bed80026251cc0d108c16e.tar.xz | |
libsec: write optimized _chachablock() function for amd64 / sse2
doing 4 quarterround's in parallel using 128-bit
vector registers. for second round shuffle the columns and
then shuffle back.
code is rather obvious. only trick here is for the first
quaterround PSHUFLW/PSHUFHW is used to swap the halfwords
for the <<<16 rotation.
| -rw-r--r-- | sys/src/ape/lib/sec/amd64/mkfile | 1 | ||||
| -rw-r--r-- | sys/src/ape/lib/sec/port/mkfile | 2 | ||||
| -rw-r--r-- | sys/src/libsec/amd64/chachablock.s | 74 | ||||
| -rw-r--r-- | sys/src/libsec/amd64/mkfile | 1 | ||||
| -rw-r--r-- | sys/src/libsec/port/chacha.c | 39 | ||||
| -rw-r--r-- | sys/src/libsec/port/chachablock.c | 29 | ||||
| -rw-r--r-- | sys/src/libsec/port/mkfile | 2 |
7 files changed, 112 insertions, 36 deletions
diff --git a/sys/src/ape/lib/sec/amd64/mkfile b/sys/src/ape/lib/sec/amd64/mkfile index f11c532a6..a2b3e867a 100644 --- a/sys/src/ape/lib/sec/amd64/mkfile +++ b/sys/src/ape/lib/sec/amd64/mkfile @@ -3,6 +3,7 @@ APE=/sys/src/ape LIB=/$objtype/lib/ape/libsec.a FILES=\ + chachablock\ md5block\ sha1block\ aesni\ diff --git a/sys/src/ape/lib/sec/port/mkfile b/sys/src/ape/lib/sec/port/mkfile index b25156cba..54d96bb8a 100644 --- a/sys/src/ape/lib/sec/port/mkfile +++ b/sys/src/ape/lib/sec/port/mkfile @@ -11,7 +11,7 @@ CFILES = des.c desmodes.c desECB.c desCBC.c des3ECB.c des3CBC.c\ sha1pickle.c md5pickle.c\ poly1305.c\ rc4.c\ - chacha.c\ + chacha.c chachablock.c\ salsa.c\ genrandom.c prng.c fastrand.c nfastrand.c\ probably_prime.c smallprimetest.c genprime.c dsaprimes.c\ diff --git a/sys/src/libsec/amd64/chachablock.s b/sys/src/libsec/amd64/chachablock.s new file mode 100644 index 000000000..d098c4425 --- /dev/null +++ b/sys/src/libsec/amd64/chachablock.s @@ -0,0 +1,74 @@ +#define ROTATE(n, v1, v2) \ + MOVO v1, v2; \ + PSLLL $(n), v1; \ + PSRLL $(32-n), v2; \ + POR v1, v2 + +TEXT _chachablock(SB), 0, $0 + MOVOU 0(RARG), X0 + MOVOU 16(RARG), X1 + MOVOU 32(RARG), X2 + MOVOU 48(RARG), X3 + + MOVL rounds+8(FP), CX + SHRL $1, CX + +_loop: + PADDL X1, X0 + PXOR X0, X3 + /* ROTATE(16, X3, X3) */ + PSHUFLW $(1<<0 | 0<<2 | 3<<4 | 2<<6), X3, X3 + PSHUFHW $(1<<0 | 0<<2 | 3<<4 | 2<<6), X3, X3 + + PADDL X3, X2 + MOVO X1, X4 + PXOR X2, X4 + ROTATE(12, X4, X1) + + PADDL X1, X0 + MOVO X0, X4 + PXOR X3, X4 + ROTATE(8, X4, X3) + + PADDL X3, X2 + MOVO X1, X4 + PXOR X2, X4 + ROTATE(7, X4, X1) + + PSHUFL $(1<<0 | 2<<2 | 3<<4 | 0<<6), X1, X1 + PSHUFL $(2<<0 | 3<<2 | 0<<4 | 1<<6), X2, X2 + PSHUFL $(3<<0 | 0<<2 | 1<<4 | 2<<6), X3, X3 + + PADDL X1, X0 + PXOR X0, X3 + /* ROTATE(16, X3, X3) */ + PSHUFLW $(1<<0 | 0<<2 | 3<<4 | 2<<6), X3, X3 + PSHUFHW $(1<<0 | 0<<2 | 3<<4 | 2<<6), X3, X3 + + PADDL X3, X2 + MOVO X1, X4 + PXOR X2, X4 + ROTATE(12, X4, X1) + + PADDL X1, X0 + MOVO X0, X4 + PXOR X3, X4 + ROTATE(8, X4, X3) + + PADDL X3, X2 + MOVO X1, X4 + PXOR X2, X4 + ROTATE(7, X4, X1) + + PSHUFL $(3<<0 | 0<<2 | 1<<4 | 2<<6), X1, X1 + PSHUFL $(2<<0 | 3<<2 | 0<<4 | 1<<6), X2, X2 + PSHUFL $(1<<0 | 2<<2 | 3<<4 | 0<<6), X3, X3 + + DECL CX + JNE _loop + + MOVOU X0, 0(RARG) + MOVOU X1, 16(RARG) + MOVOU X2, 32(RARG) + MOVOU X3, 48(RARG) + RET diff --git a/sys/src/libsec/amd64/mkfile b/sys/src/libsec/amd64/mkfile index 990d35aa4..633fde1fe 100644 --- a/sys/src/libsec/amd64/mkfile +++ b/sys/src/libsec/amd64/mkfile @@ -3,6 +3,7 @@ objtype=amd64 LIB=/$objtype/lib/libsec.a FILES=\ + chachablock\ md5block\ sha1block\ aesni\ diff --git a/sys/src/libsec/port/chacha.c b/sys/src/libsec/port/chacha.c index b885b8b92..9431ca69a 100644 --- a/sys/src/libsec/port/chacha.c +++ b/sys/src/libsec/port/chacha.c @@ -10,26 +10,13 @@ and including the changes to block number and nonce defined in RFC7539 #include "os.h" #include <libsec.h> -enum{ - Blockwords= ChachaBsize/sizeof(u32int) -}; +/* from chachablock.$O */ +extern void _chachablock(u32int x[16], int rounds); /* little-endian data order */ #define GET4(p) ((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24)) #define PUT4(p,v) (p)[0]=(v);(p)[1]=(v)>>8;(p)[2]=(v)>>16;(p)[3]=(v)>>24 -#define ROTATE(v,c) ((u32int)((v) << (c)) | ((v) >> (32 - (c)))) - -#define QUARTERROUND(ia,ib,ic,id) { \ - u32int a, b, c, d, t; \ - a = x[ia]; b = x[ib]; c = x[ic]; d = x[id]; \ - a += b; t = d^a; d = ROTATE(t,16); \ - c += d; t = b^c; b = ROTATE(t,12); \ - a += b; t = d^a; d = ROTATE(t, 8); \ - c += d; t = b^c; b = ROTATE(t, 7); \ - x[ia] = a; x[ib] = b; x[ic] = c; x[id] = d; \ -} - #define ENCRYPT(s, x, y, d) {\ u32int v; \ v = GET4(s); \ @@ -88,22 +75,6 @@ setupChachastate(Chachastate *s, uchar *key, ulong keylen, uchar *iv, ulong ivle } static void -dorounds(u32int x[Blockwords], int rounds) -{ - for(; rounds > 0; rounds -= 2) { - QUARTERROUND(0, 4, 8,12) - QUARTERROUND(1, 5, 9,13) - QUARTERROUND(2, 6,10,14) - QUARTERROUND(3, 7,11,15) - - QUARTERROUND(0, 5,10,15) - QUARTERROUND(1, 6,11,12) - QUARTERROUND(2, 7, 8,13) - QUARTERROUND(3, 4, 9,14) - } -} - -static void hchachablock(uchar h[32], Chachastate *s) { u32int x[16]; @@ -125,7 +96,7 @@ hchachablock(uchar h[32], Chachastate *s) x[14] = s->input[14]; x[15] = s->input[15]; - dorounds(x, s->rounds); + _chachablock(x, s->rounds); PUT4(h+0*4, x[0]); PUT4(h+1*4, x[1]); @@ -183,7 +154,7 @@ chacha_setblock(Chachastate *s, u64int blockno) static void encryptblock(Chachastate *s, uchar *src, uchar *dst) { - u32int x[Blockwords]; + u32int x[16]; int i; x[0] = s->input[0]; @@ -202,7 +173,7 @@ encryptblock(Chachastate *s, uchar *src, uchar *dst) x[13] = s->input[13]; x[14] = s->input[14]; x[15] = s->input[15]; - dorounds(x, s->rounds); + _chachablock(x, s->rounds); for(i=0; i<nelem(x); i+=4){ ENCRYPT(src, x[i], s->input[i], dst); diff --git a/sys/src/libsec/port/chachablock.c b/sys/src/libsec/port/chachablock.c new file mode 100644 index 000000000..955b48107 --- /dev/null +++ b/sys/src/libsec/port/chachablock.c @@ -0,0 +1,29 @@ +#include "os.h" + +#define ROTATE(v,c) ((u32int)((v) << (c)) | ((v) >> (32 - (c)))) + +#define QUARTERROUND(ia,ib,ic,id) { \ + u32int a, b, c, d, t; \ + a = x[ia]; b = x[ib]; c = x[ic]; d = x[id]; \ + a += b; t = d^a; d = ROTATE(t,16); \ + c += d; t = b^c; b = ROTATE(t,12); \ + a += b; t = d^a; d = ROTATE(t, 8); \ + c += d; t = b^c; b = ROTATE(t, 7); \ + x[ia] = a; x[ib] = b; x[ic] = c; x[id] = d; \ +} + +void +_chachablock(u32int x[16], int rounds) +{ + for(; rounds > 0; rounds -= 2) { + QUARTERROUND(0, 4, 8,12) + QUARTERROUND(1, 5, 9,13) + QUARTERROUND(2, 6,10,14) + QUARTERROUND(3, 7,11,15) + + QUARTERROUND(0, 5,10,15) + QUARTERROUND(1, 6,11,12) + QUARTERROUND(2, 7, 8,13) + QUARTERROUND(3, 4, 9,14) + } +} diff --git a/sys/src/libsec/port/mkfile b/sys/src/libsec/port/mkfile index e0bc8becb..b6490b7bc 100644 --- a/sys/src/libsec/port/mkfile +++ b/sys/src/libsec/port/mkfile @@ -10,7 +10,7 @@ CFILES = des.c desmodes.c desECB.c desCBC.c des3ECB.c des3CBC.c\ sha1pickle.c md5pickle.c\ poly1305.c\ rc4.c\ - chacha.c\ + chacha.c chachablock.c\ salsa.c\ genrandom.c prng.c fastrand.c nfastrand.c\ probably_prime.c smallprimetest.c genprime.c dsaprimes.c\ |
