summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcinap_lenrek <cinap_lenrek@felloff.net>2016-06-09 23:12:46 +0200
committercinap_lenrek <cinap_lenrek@felloff.net>2016-06-09 23:12:46 +0200
commita00b6bdbfa72a1688a866edf5f825720c9874ada (patch)
tree82b2e1a62cc75c007039fe07663300706947f381
parent5cdabc5eb16db432abcf61dc126bfb7f9b2978c1 (diff)
downloadplan9front-a00b6bdbfa72a1688a866edf5f825720c9874ada.tar.xz
8c, 6c: native ROL (cyclic shift) instruction support, improve peephole optimizers
introduce rolor() function to subsitute (a << c) | (a >> (bits(a) - c)) with (a <<< c) where <<< is cyclic rotation and c is constant. this almost doubles the speed of chacha encryption of 386 and amd64. the peephole optimizer used to stop when it hit a shift or rol instruction when attempting to eleminate moves by register substitution. but we do not have to as long as the shift count operand is not CX (which cannot be substituted) and CX is not a subject for substitution.
-rw-r--r--sys/src/cmd/6c/cgen.c1
-rw-r--r--sys/src/cmd/6c/peep.c28
-rw-r--r--sys/src/cmd/6c/sgen.c11
-rw-r--r--sys/src/cmd/6c/txt.c10
-rw-r--r--sys/src/cmd/8c/cgen.c1
-rw-r--r--sys/src/cmd/8c/peep.c36
-rw-r--r--sys/src/cmd/8c/sgen.c14
-rw-r--r--sys/src/cmd/8c/txt.c8
-rw-r--r--sys/src/cmd/cc/cc.h2
-rw-r--r--sys/src/cmd/cc/sub.c41
10 files changed, 119 insertions, 33 deletions
diff --git a/sys/src/cmd/6c/cgen.c b/sys/src/cmd/6c/cgen.c
index 2cfbfbe66..d43d1d366 100644
--- a/sys/src/cmd/6c/cgen.c
+++ b/sys/src/cmd/6c/cgen.c
@@ -159,6 +159,7 @@ cgen(Node *n, Node *nn)
regfree(&nod);
break;
+ case OROL:
case OLSHR:
case OASHL:
case OASHR:
diff --git a/sys/src/cmd/6c/peep.c b/sys/src/cmd/6c/peep.c
index 7096ce958..9c2b3c429 100644
--- a/sys/src/cmd/6c/peep.c
+++ b/sys/src/cmd/6c/peep.c
@@ -370,15 +370,11 @@ subprop(Reg *r0)
break;
p = r->prog;
switch(p->as) {
- case ACALL:
- return 0;
-
case AIMULL:
case AIMULQ:
case AIMULW:
if(p->to.type != D_NONE)
break;
-
case ADIVB:
case ADIVL:
case ADIVQ:
@@ -393,6 +389,19 @@ subprop(Reg *r0)
case AMULQ:
case AMULW:
+ case ACWD:
+ case ACDQ:
+ case ACQO:
+
+ case AREP:
+ case AREPN:
+ case ALOOP:
+ case ALOOPEQ:
+ case ALOOPNE:
+
+ case ACALL:
+ return 0;
+
case AROLB:
case AROLL:
case AROLQ:
@@ -417,14 +426,9 @@ subprop(Reg *r0)
case ASHRL:
case ASHRQ:
case ASHRW:
-
- case AREP:
- case AREPN:
-
- case ACWD:
- case ACDQ:
- case ACQO:
- return 0;
+ if(p->from.type == D_CX && v1->type == D_CX)
+ return 0;
+ break;
case AORL:
case AORQ:
diff --git a/sys/src/cmd/6c/sgen.c b/sys/src/cmd/6c/sgen.c
index a7d751c0f..14e5f3c7e 100644
--- a/sys/src/cmd/6c/sgen.c
+++ b/sys/src/cmd/6c/sgen.c
@@ -120,7 +120,6 @@ xcom(Node *n)
*l = *(n->left);
l->xoffset += r->vconst;
n->left = l;
- r = n->right;
goto brk;
}
break;
@@ -212,7 +211,6 @@ xcom(Node *n)
if(g >= 0) {
n->left = r;
n->right = l;
- l = r;
r = n->right;
}
g = vlog(r);
@@ -288,6 +286,12 @@ xcom(Node *n)
indexshift(n);
break;
+ case OOR:
+ xcom(l);
+ xcom(r);
+ rolor(n);
+ break;
+
default:
if(l != Z)
xcom(l);
@@ -298,6 +302,8 @@ xcom(Node *n)
brk:
if(n->addable >= 10)
return;
+ l = n->left;
+ r = n->right;
if(l != Z)
n->complex = l->complex;
if(r != Z) {
@@ -344,6 +350,7 @@ brk:
}
break;
+ case OROL:
case OLSHR:
case OASHL:
case OASHR:
diff --git a/sys/src/cmd/6c/txt.c b/sys/src/cmd/6c/txt.c
index 5239b5633..861cdfde4 100644
--- a/sys/src/cmd/6c/txt.c
+++ b/sys/src/cmd/6c/txt.c
@@ -1305,6 +1305,16 @@ gopcode(int o, Type *ty, Node *f, Node *t)
a = ASALQ;
break;
+ case OROL:
+ a = AROLL;
+ if(et == TCHAR || et == TUCHAR)
+ a = AROLB;
+ if(et == TSHORT || et == TUSHORT)
+ a = AROLW;
+ if(et == TVLONG || et == TUVLONG || et == TIND)
+ a = AROLQ;
+ break;
+
case OFUNC:
a = ACALL;
break;
diff --git a/sys/src/cmd/8c/cgen.c b/sys/src/cmd/8c/cgen.c
index 35559bc15..1900f3cbc 100644
--- a/sys/src/cmd/8c/cgen.c
+++ b/sys/src/cmd/8c/cgen.c
@@ -178,6 +178,7 @@ cgen(Node *n, Node *nn)
regfree(&nod);
break;
+ case OROL:
case OLSHR:
case OASHL:
case OASHR:
diff --git a/sys/src/cmd/8c/peep.c b/sys/src/cmd/8c/peep.c
index 8f0fae88a..482f17a65 100644
--- a/sys/src/cmd/8c/peep.c
+++ b/sys/src/cmd/8c/peep.c
@@ -264,9 +264,6 @@ subprop(Reg *r0)
break;
p = r->prog;
switch(p->as) {
- case ACALL:
- return 0;
-
case AIMULL:
case AIMULW:
if(p->to.type != D_NONE)
@@ -283,6 +280,23 @@ subprop(Reg *r0)
case AMULL:
case AMULW:
+ case AREP:
+ case AREPN:
+ case ALOOP:
+ case ALOOPNE:
+
+ case ACWD:
+ case ACDQ:
+
+ case ASTOSB:
+ case ASTOSL:
+ case AMOVSB:
+ case AMOVSL:
+ case AFSTSW:
+
+ case ACALL:
+ return 0;
+
case AROLB:
case AROLL:
case AROLW:
@@ -301,19 +315,9 @@ subprop(Reg *r0)
case ASHRB:
case ASHRL:
case ASHRW:
-
- case AREP:
- case AREPN:
-
- case ACWD:
- case ACDQ:
-
- case ASTOSB:
- case ASTOSL:
- case AMOVSB:
- case AMOVSL:
- case AFSTSW:
- return 0;
+ if(p->from.type == D_CX && v1->type == D_CX)
+ return 0;
+ break;
case AORL:
case AANDL:
diff --git a/sys/src/cmd/8c/sgen.c b/sys/src/cmd/8c/sgen.c
index 9ba278c8c..6fc0ffba7 100644
--- a/sys/src/cmd/8c/sgen.c
+++ b/sys/src/cmd/8c/sgen.c
@@ -127,7 +127,6 @@ xcom(Node *n)
*l = *(n->left);
l->xoffset += r->vconst;
n->left = l;
- r = n->right;
goto brk;
}
break;
@@ -219,7 +218,6 @@ xcom(Node *n)
if(g >= 0) {
n->left = r;
n->right = l;
- l = r;
r = n->right;
}
g = vlog(r);
@@ -230,7 +228,7 @@ xcom(Node *n)
indexshift(n);
break;
}
-commute(n);
+ commute(n);
break;
case OASLDIV:
@@ -295,6 +293,13 @@ commute(n);
indexshift(n);
break;
+ case OOR:
+ xcom(l);
+ xcom(r);
+ if(typechl[n->type->etype])
+ rolor(n);
+ break;
+
default:
if(l != Z)
xcom(l);
@@ -305,6 +310,8 @@ commute(n);
brk:
if(n->addable >= 10)
return;
+ l = n->left;
+ r = n->right;
if(l != Z)
n->complex = l->complex;
if(r != Z) {
@@ -349,6 +356,7 @@ brk:
}
break;
+ case OROL:
case OLSHR:
case OASHL:
case OASHR:
diff --git a/sys/src/cmd/8c/txt.c b/sys/src/cmd/8c/txt.c
index 2974af4cc..9d64aa7e3 100644
--- a/sys/src/cmd/8c/txt.c
+++ b/sys/src/cmd/8c/txt.c
@@ -1238,6 +1238,14 @@ gopcode(int o, Type *ty, Node *f, Node *t)
a = ASALW;
break;
+ case OROL:
+ a = AROLL;
+ if(et == TCHAR || et == TUCHAR)
+ a = AROLB;
+ if(et == TSHORT || et == TUSHORT)
+ a = AROLW;
+ break;
+
case OFUNC:
a = ACALL;
break;
diff --git a/sys/src/cmd/cc/cc.h b/sys/src/cmd/cc/cc.h
index 01eb04562..dbaddffb5 100644
--- a/sys/src/cmd/cc/cc.h
+++ b/sys/src/cmd/cc/cc.h
@@ -273,6 +273,7 @@ enum
OPROTO,
OREGISTER,
ORETURN,
+ OROL,
OSET,
OSIGN,
OSIZE,
@@ -694,6 +695,7 @@ int log2(uvlong);
int vlog(Node*);
int topbit(ulong);
void simplifyshift(Node*);
+void rolor(Node*);
long typebitor(long, long);
void diag(Node*, char*, ...);
void warn(Node*, char*, ...);
diff --git a/sys/src/cmd/cc/sub.c b/sys/src/cmd/cc/sub.c
index ff1d6689d..91886cc35 100644
--- a/sys/src/cmd/cc/sub.c
+++ b/sys/src/cmd/cc/sub.c
@@ -903,6 +903,46 @@ if(debug['<'])prtree(n, "rewrite2");
n->left->op = o;
}
+/*
+ * replace shift/or with rotate left
+ */
+void
+rolor(Node *n)
+{
+ Node *l, *r;
+
+ if(!typeu[n->type->etype])
+ return;
+
+ l = n->left;
+ r = n->right;
+ switch(l->op){
+ case OASHL:
+ if(r->op == OLSHR)
+ break;
+ return;
+ case OLSHR:
+ if(r->op == OASHL){
+ r = l;
+ l = n->right;
+ break;
+ }
+ default:
+ return;
+ }
+ if(l->right->op != OCONST || r->right->op != OCONST)
+ return;
+ if(vconst(l->right) + vconst(r->right) != ewidth[n->type->etype]*8)
+ return;
+ if(l->left->type != n->type || r->left->type != n->type)
+ return;
+ if(l->left->op != ONAME || r->left->op != ONAME || l->left->sym != r->left->sym)
+ return;
+
+ *n = *l;
+ n->op = OROL;
+}
+
int
side(Node *n)
{
@@ -1473,6 +1513,7 @@ Init onamesinit[] =
OPROTO, 0, "PROTO",
OREGISTER, 0, "REGISTER",
ORETURN, 0, "RETURN",
+ OROL, 0, "ROL",
OSET, 0, "SET",
OSIGN, 0, "SIGN",
OSIZE, 0, "SIZE",