diff options
51 files changed, 2016 insertions, 0 deletions
diff --git a/amd64/include/ape/float.h b/amd64/include/ape/float.h new file mode 100644 index 000000000..498286844 --- /dev/null +++ b/amd64/include/ape/float.h @@ -0,0 +1,80 @@ +#ifndef __FLOAT +#define __FLOAT +/* IEEE, default rounding */ + +#define FLT_ROUNDS 1 +#define FLT_RADIX 2 + +#define FLT_DIG 6 +#define FLT_EPSILON 1.19209290e-07 +#define FLT_MANT_DIG 24 +#define FLT_MAX 3.40282347e+38 +#define FLT_MAX_10_EXP 38 +#define FLT_MAX_EXP 128 +#define FLT_MIN 1.17549435e-38 +#define FLT_MIN_10_EXP -37 +#define FLT_MIN_EXP -125 + +#define DBL_DIG 15 +#define DBL_EPSILON 2.2204460492503131e-16 +#define DBL_MANT_DIG 53 +#define DBL_MAX 1.797693134862315708145e+308 +#define DBL_MAX_10_EXP 308 +#define DBL_MAX_EXP 1024 +#define DBL_MIN 2.225073858507201383090233e-308 +#define DBL_MIN_10_EXP -307 +#define DBL_MIN_EXP -1021 +#define LDBL_MANT_DIG DBL_MANT_DIG +#define LDBL_EPSILON DBL_EPSILON +#define LDBL_DIG DBL_DIG +#define LDBL_MIN_EXP DBL_MIN_EXP +#define LDBL_MIN DBL_MIN +#define LDBL_MIN_10_EXP DBL_MIN_10_EXP +#define LDBL_MAX_EXP DBL_MAX_EXP +#define LDBL_MAX DBL_MAX +#define LDBL_MAX_10_EXP DBL_MAX_10_EXP + +typedef union FPdbleword FPdbleword; +union FPdbleword +{ + double x; + struct { /* little endian */ + long lo; + long hi; + }; +}; + +#ifdef _RESEARCH_SOURCE +/* define stuff needed for floating conversion */ +#define IEEE_8087 1 +#define Sudden_Underflow 1 +#endif +#ifdef _PLAN9_SOURCE +/* MXCSR */ +/* fcr */ +#define FPFTZ (1<<15) /* amd64 */ +#define FPINEX (1<<12) +#define FPUNFL (1<<11) +#define FPOVFL (1<<10) +#define FPZDIV (1<<9) +#define FPDNRM (1<<8) /* amd64 */ +#define FPINVAL (1<<7) +#define FPDAZ (1<<6) /* amd64 */ +#define FPRNR (0<<13) +#define FPRZ (3<<13) +#define FPRPINF (2<<13) +#define FPRNINF (1<<13) +#define FPRMASK (3<<13) +#define FPPEXT 0 +#define FPPSGL 0 +#define FPPDBL 0 +#define FPPMASK 0 +/* fsr */ +#define FPAINEX (1<<5) +#define FPAUNFL (1<<4) +#define FPAOVFL (1<<3) +#define FPAZDIV (1<<2) +#define FPADNRM (1<<1) /* not in plan 9 */ +#define FPAINVAL (1<<0) +#endif +#endif /* __FLOAT */ diff --git a/amd64/include/ape/inttypes.h b/amd64/include/ape/inttypes.h new file mode 100644 index 000000000..0ee22cca1 --- /dev/null +++ b/amd64/include/ape/inttypes.h @@ -0,0 +1,21 @@ +#ifndef _SUSV2_SOURCE +#error "inttypes.h is SUSV2" +#endif + +#ifndef _INTTYPES_H_ +#define _INTTYPES_H_ 1 + + +typedef char int8_t; +typedef short int16_t; +typedef int int32_t; +typedef long long int64_t; +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; + +typedef long long intptr_t; +typedef unsigned long long uintptr_t; + +#endif diff --git a/amd64/include/ape/math.h b/amd64/include/ape/math.h new file mode 100644 index 000000000..a880a01f8 --- /dev/null +++ b/amd64/include/ape/math.h @@ -0,0 +1,78 @@ +#ifndef __MATH +#define __MATH +#pragma lib "/$M/lib/ape/libap.a" + +/* a HUGE_VAL appropriate for IEEE double-precision */ +/* the correct value, 1.797693134862316e+308, causes a ken overflow */ +#define HUGE_VAL 1.79769313486231e+308 + +#ifdef __cplusplus +extern "C" { +#endif + +extern double acos(double); +extern double asin(double); +extern double atan(double); +extern double atan2(double, double); +extern double cos(double); +extern double hypot(double, double); +extern double sin(double); +extern double tan(double); +extern double cosh(double); +extern double sinh(double); +extern double tanh(double); +extern double exp(double); +extern double frexp(double, int *); +extern double ldexp(double, int); +extern double log(double); +extern double log10(double); +extern double modf(double, double *); +extern double pow(double, double); +extern double sqrt(double); +extern double ceil(double); +extern double fabs(double); +extern double floor(double); +extern double fmod(double, double); +extern double NaN(void); +extern int isNaN(double); +extern double Inf(int); +extern int isInf(double, int); + +#ifdef _RESEARCH_SOURCE +/* does >> treat left operand as unsigned ? */ +#define Unsigned_Shifts 1 +#define M_E 2.7182818284590452354 /* e */ +#define M_LOG2E 1.4426950408889634074 /* log 2e */ +#define M_LOG10E 0.43429448190325182765 /* log 10e */ +#define M_LN2 0.69314718055994530942 /* log e2 */ +#define M_LN10 2.30258509299404568402 /* log e10 */ +#define M_PI 3.14159265358979323846 /* pi */ +#define M_PI_2 1.57079632679489661923 /* pi/2 */ +#define M_PI_4 0.78539816339744830962 /* pi/4 */ +#define M_1_PI 0.31830988618379067154 /* 1/pi */ +#define M_2_PI 0.63661977236758134308 /* 2/pi */ +#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */ +#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ +#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ + +extern double hypot(double, double); +extern double erf(double); +extern double erfc(double); +extern double j0(double); +extern double y0(double); +extern double j1(double); +extern double y1(double); +extern double jn(int, double); +extern double yn(int, double); + +#endif + + +#ifdef __cplusplus +} +#endif + +#define isnan(x) isNaN(x) +#define isinf(x) isInf(x, 0) + +#endif /* __MATH */ diff --git a/amd64/include/ape/stdarg.h b/amd64/include/ape/stdarg.h new file mode 100644 index 000000000..cf466841f --- /dev/null +++ b/amd64/include/ape/stdarg.h @@ -0,0 +1,18 @@ +#ifndef __STDARG +#define __STDARG + +typedef char *va_list; + +#define va_start(list, start) list = (sizeof(start)<8 ? (char *)((long long *)&(start)+1) : \ +(char *)(&(start)+1)) +#define va_end(list) +#define va_arg(list, mode)\ + ((sizeof(mode) == 1)?\ + ((mode*)(list += 8))[-8]:\ + (sizeof(mode) == 2)?\ + ((mode*)(list += 8))[-4]:\ + (sizeof(mode) == 4)?\ + ((mode*)(list += 8))[-2]:\ + ((mode*)(list += sizeof(mode)))[-1]) + +#endif /* __STDARG */ diff --git a/amd64/include/ape/ureg.h b/amd64/include/ape/ureg.h new file mode 100644 index 000000000..f6b2aba0d --- /dev/null +++ b/amd64/include/ape/ureg.h @@ -0,0 +1,38 @@ +#ifndef __UREG_H +#define __UREG_H +#if !defined(_PLAN9_SOURCE) + This header file is an extension to ANSI/POSIX +#endif + +struct Ureg { + unsigned long long ax; + unsigned long long bx; + unsigned long long cx; + unsigned long long dx; + unsigned long long si; + unsigned long long di; + unsigned long long bp; + unsigned long long r8; + unsigned long long r9; + unsigned long long r10; + unsigned long long r11; + unsigned long long r12; + unsigned long long r13; + unsigned long long r14; + unsigned long long r15; + + unsigned short ds; + unsigned short es; + unsigned short fs; + unsigned short gs; + + unsigned long long type; + unsigned long long error; /* error code (or zero) */ + unsigned long long pc; /* pc */ + unsigned long long cs; /* old context */ + unsigned long long flags; /* old flags */ + unsigned long long sp; /* sp */ + unsigned long long ss; /* old stack segment */ +}; + +#endif diff --git a/sys/src/ape/lib/9/amd64/getcallerpc.s b/sys/src/ape/lib/9/amd64/getcallerpc.s new file mode 100644 index 000000000..15e55ea17 --- /dev/null +++ b/sys/src/ape/lib/9/amd64/getcallerpc.s @@ -0,0 +1,3 @@ +TEXT getcallerpc(SB), 1, $0 + MOVQ -8(RARG), AX + RET diff --git a/sys/src/ape/lib/9/amd64/getfcr.s b/sys/src/ape/lib/9/amd64/getfcr.s new file mode 100644 index 000000000..9fc2002da --- /dev/null +++ b/sys/src/ape/lib/9/amd64/getfcr.s @@ -0,0 +1,38 @@ + +TEXT setfcr(SB), $4 + XORL $(0x3F<<7),RARG /* bits are cleared in csr to enable them */ + ANDL $0xFFC0, RARG /* just the fcr bits */ + WAIT /* is this needed? */ + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $~0x3F, AX + ORL RARG, AX + MOVL AX, 0(SP) + LDMXCSR 0(SP) + RET + +TEXT getfcr(SB), $4 + WAIT + STMXCSR 0(SP) + MOVWLZX 0(SP), AX + ANDL $0xFFC0, AX + XORL $(0x3F<<7),AX + RET + +TEXT getfsr(SB), $4 + WAIT + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $0x3F, AX + RET + +TEXT setfsr(SB), $4 + ANDL $0x3F, RARG + WAIT + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $~0x3F, AX + ORL RARG, AX + MOVL AX, 0(SP) + LDMXCSR 0(SP) + RET diff --git a/sys/src/ape/lib/ap/amd64/_seek.c b/sys/src/ape/lib/ap/amd64/_seek.c new file mode 100644 index 000000000..a9d92c7cb --- /dev/null +++ b/sys/src/ape/lib/ap/amd64/_seek.c @@ -0,0 +1,11 @@ +extern long __SEEK(long long*, int, long long, int); + +long long +_SEEK(int fd, long long o, int p) +{ + long long l; + + if(__SEEK(&l, fd, o, p) < 0) + l = -1; + return l; +} diff --git a/sys/src/ape/lib/ap/amd64/cycles.s b/sys/src/ape/lib/ap/amd64/cycles.s new file mode 100644 index 000000000..769f617bb --- /dev/null +++ b/sys/src/ape/lib/ap/amd64/cycles.s @@ -0,0 +1,5 @@ +TEXT _cycles(SB),1,$0 /* time stamp counter; cycles since power up */ + RDTSC + MOVL AX, 0(RARG) /* lo */ + MOVL DX, 4(RARG) /* hi */ + RET diff --git a/sys/src/ape/lib/ap/amd64/lock.c b/sys/src/ape/lib/ap/amd64/lock.c new file mode 100644 index 000000000..91c0ba233 --- /dev/null +++ b/sys/src/ape/lib/ap/amd64/lock.c @@ -0,0 +1,26 @@ +#define _LOCK_EXTENSION +#include "../plan9/sys9.h" +#include <lock.h> + +int tas(int*); + +void +lock(Lock *lk) +{ + while(tas(&lk->val)) + _SLEEP(0); +} + +int +canlock(Lock *lk) +{ + if(tas(&lk->val)) + return 0; + return 1; +} + +void +unlock(Lock *lk) +{ + lk->val = 0; +} diff --git a/sys/src/ape/lib/ap/amd64/main9.s b/sys/src/ape/lib/ap/amd64/main9.s new file mode 100644 index 000000000..d461dc185 --- /dev/null +++ b/sys/src/ape/lib/ap/amd64/main9.s @@ -0,0 +1,26 @@ +#define NPRIVATES 16 + +GLOBL _tos(SB), $8 +GLOBL _errnoloc(SB), $8 +GLOBL _privates(SB), $8 +GLOBL _nprivates(SB), $8 + +TEXT _main(SB), 1, $(32+NPRIVATES*8) + + /* _tos = arg */ + MOVQ AX, _tos(SB) + LEAQ 24(SP), AX + MOVQ AX, _errnoloc(SB) + LEAQ 32(SP), AX + MOVQ AX, _privates(SB) + MOVQ $NPRIVATES, _nprivates(SB) + CALL _envsetup(SB) + MOVL inargc-8(FP), RARG + LEAQ inargv+0(FP), AX + MOVQ AX, 8(SP) + MOVQ environ(SB), AX + MOVQ AX, 16(SP) + CALL main(SB) + MOVQ AX, RARG + CALL exit(SB) + RET diff --git a/sys/src/ape/lib/ap/amd64/main9p.s b/sys/src/ape/lib/ap/amd64/main9p.s new file mode 100644 index 000000000..dfd3450ea --- /dev/null +++ b/sys/src/ape/lib/ap/amd64/main9p.s @@ -0,0 +1,45 @@ +#define NPRIVATES 16 + +GLOBL _tos(SB), $8 +GLOBL _privates(SB), $8 +GLOBL _nprivates(SB), $8 + +TEXT _mainp(SB), 1, $(3*8+NPRIVATES*8) + + /* _tos = arg */ + MOVQ AX, _tos(SB) + LEAQ 8(SP), AX + MOVQ AX, _privates(SB) + MOVQ $NPRIVATES, _nprivates(SB) + + /* _profmain(); */ + CALL _profmain(SB) + + /* _tos->prof.pp = _tos->prof.next; */ + MOVQ _tos+0(SB),DX + MOVQ 4(DX),CX + MOVQ CX,(DX) + + CALL _envsetup(SB) + + /* main(argc, argv, environ); */ + MOVL inargc-8(FP), RARG + LEAQ inargv+0(FP), AX + MOVQ AX, 8(SP) + MOVQ environ(SB), AX + MOVQ AX, 16(SP) + CALL main(SB) + +loop: + MOVL AX, RARG + CALL exit(SB) + MOVQ $_profin(SB), AX /* force loading of profile */ + MOVL $0, AX + JMP loop + +TEXT _savearg(SB), 1, $0 + RET + +TEXT _callpc(SB), 1, $0 + MOVQ 8(RARG), AX + RET diff --git a/sys/src/ape/lib/ap/amd64/mkfile b/sys/src/ape/lib/ap/amd64/mkfile new file mode 100644 index 000000000..5bb7f3271 --- /dev/null +++ b/sys/src/ape/lib/ap/amd64/mkfile @@ -0,0 +1,20 @@ +APE=/sys/src/ape +objtype=amd64 +<$APE/config +LIB=/$objtype/lib/ape/libap.a +OFILES=\ + _seek.$O\ + cycles.$O\ + lock.$O\ + main9.$O\ + main9p.$O\ + notetramp.$O\ + setjmp.$O\ + strchr.$O\ + strlen.$O\ + tas.$O\ + +</sys/src/cmd/mksyslib + +CFLAGS=-c -D_POSIX_SOURCE -D_PLAN9_SOURCE + diff --git a/sys/src/ape/lib/ap/amd64/notetramp.c b/sys/src/ape/lib/ap/amd64/notetramp.c new file mode 100644 index 000000000..c92205175 --- /dev/null +++ b/sys/src/ape/lib/ap/amd64/notetramp.c @@ -0,0 +1,81 @@ +#include "../plan9/lib.h" +#include "../plan9/sys9.h" +#include <signal.h> +#include <setjmp.h> + +/* A stack to hold pcs when signals nest */ +#define MAXSIGSTACK 20 +typedef struct Pcstack Pcstack; +static struct Pcstack { + int sig; + void (*hdlr)(int, char*, Ureg*); + unsigned long long restorepc; + Ureg *u; +} pcstack[MAXSIGSTACK]; +static int nstack = 0; + +static void notecont(Ureg*, char*); + +void +_notetramp(int sig, void (*hdlr)(int, char*, Ureg*), Ureg *u) +{ + Pcstack *p; + + if(nstack >= MAXSIGSTACK) + _NOTED(1); /* nesting too deep; just do system default */ + p = &pcstack[nstack]; + p->restorepc = u->pc; + p->sig = sig; + p->hdlr = hdlr; + p->u = u; + nstack++; + u->pc = (unsigned long long) notecont; + _NOTED(2); /* NSAVE: clear note but hold state */ +} + +static void +notecont(Ureg *u, char *s) +{ + Pcstack *p; + void(*f)(int, char*, Ureg*); + + p = &pcstack[nstack-1]; + f = p->hdlr; + u->pc = p->restorepc; + nstack--; + (*f)(p->sig, s, u); + _NOTED(3); /* NRSTR */ +} + +#define JMPBUFPC 1 +#define JMPBUFSP 0 + +extern sigset_t _psigblocked; + +typedef struct { + sigset_t set; + sigset_t blocked; + unsigned long long jmpbuf[2]; +} sigjmp_buf_amd64; + +void +siglongjmp(sigjmp_buf j, int ret) +{ + struct Ureg *u; + sigjmp_buf_amd64 *jb; + + jb = (sigjmp_buf_amd64*)j; + + if(jb->set) + _psigblocked = jb->blocked; + if(nstack == 0 || pcstack[nstack-1].u->sp > jb->jmpbuf[JMPBUFSP]) + longjmp((void*)jb->jmpbuf, ret); + u = pcstack[nstack-1].u; + nstack--; + u->ax = ret; + if(ret == 0) + u->ax = 1; + u->pc = jb->jmpbuf[JMPBUFPC]; + u->sp = jb->jmpbuf[JMPBUFSP] + 8; + _NOTED(3); /* NRSTR */ +} diff --git a/sys/src/ape/lib/ap/amd64/setjmp.s b/sys/src/ape/lib/ap/amd64/setjmp.s new file mode 100644 index 000000000..67320cad9 --- /dev/null +++ b/sys/src/ape/lib/ap/amd64/setjmp.s @@ -0,0 +1,27 @@ +TEXT longjmp(SB), $0 + MOVL r+8(FP), AX + CMPL AX, $0 + JNE ok /* ansi: "longjmp(0) => longjmp(1)" */ + MOVL $1, AX /* bless their pointed heads */ +ok: + MOVQ 0(RARG), SP /* restore sp */ + MOVQ 8(RARG), BX /* put return pc on the stack */ + MOVQ BX, 0(SP) + RET + +TEXT setjmp(SB), $0 + MOVQ SP, 0(RARG) /* store sp */ + MOVQ 0(SP), BX /* store return pc */ + MOVQ BX, 8(RARG) + MOVL $0, AX /* return 0 */ + RET + +TEXT sigsetjmp(SB), $0 + MOVL savemask+8(FP), BX + MOVL BX, 0(RARG) + MOVL $_psigblocked(SB), 4(RARG) + MOVQ SP, 8(RARG) /* store sp */ + MOVQ 0(SP), BX /* store return pc */ + MOVQ BX, 16(RARG) + MOVL $0, AX /* return 0 */ + RET diff --git a/sys/src/ape/lib/ap/amd64/strchr.s b/sys/src/ape/lib/ap/amd64/strchr.s new file mode 100644 index 000000000..317537361 --- /dev/null +++ b/sys/src/ape/lib/ap/amd64/strchr.s @@ -0,0 +1,38 @@ + TEXT strchr(SB), $0 + + MOVQ RARG, DI + MOVB c+8(FP), AX + CMPB AX, $0 + JEQ l2 /**/ + +/* + * char is not null + */ +l1: + MOVB (DI), BX + CMPB BX, $0 + JEQ ret0 + ADDQ $1, DI + CMPB AX, BX + JNE l1 + + MOVQ DI, AX + SUBQ $1, AX + RET + +/* + * char is null + */ +l2: + MOVQ $-1, CX + CLD + + REPN; SCASB + + MOVQ DI, AX + SUBQ $1, AX + RET + +ret0: + MOVQ $0, AX + RET diff --git a/sys/src/ape/lib/ap/amd64/strlen.s b/sys/src/ape/lib/ap/amd64/strlen.s new file mode 100644 index 000000000..cf27cd56a --- /dev/null +++ b/sys/src/ape/lib/ap/amd64/strlen.s @@ -0,0 +1,16 @@ + TEXT strlen(SB),$0 + + MOVL $0, AX + MOVQ $-1, CX + CLD +/* + * look for end of string + */ + + MOVQ RARG, DI + REPN; SCASB + + MOVQ DI, AX + SUBQ RARG, AX + SUBQ $1, AX + RET diff --git a/sys/src/ape/lib/ap/amd64/tas.s b/sys/src/ape/lib/ap/amd64/tas.s new file mode 100644 index 000000000..7aa994b99 --- /dev/null +++ b/sys/src/ape/lib/ap/amd64/tas.s @@ -0,0 +1,5 @@ +TEXT tas(SB),$0 + + MOVL $0xdeadead,AX + XCHGL AX,(RARG) + RET diff --git a/sys/src/libc/amd64/_seek.c b/sys/src/libc/amd64/_seek.c new file mode 100644 index 000000000..d44420db2 --- /dev/null +++ b/sys/src/libc/amd64/_seek.c @@ -0,0 +1,14 @@ +#include <u.h> +#include <libc.h> + +extern int _seek(vlong*, int, vlong, int); + +vlong +seek(int fd, vlong o, int p) +{ + vlong l; + + if(_seek(&l, fd, o, p) < 0) + l = -1LL; + return l; +} diff --git a/sys/src/libc/amd64/argv0.s b/sys/src/libc/amd64/argv0.s new file mode 100644 index 000000000..fe4eb4c85 --- /dev/null +++ b/sys/src/libc/amd64/argv0.s @@ -0,0 +1,4 @@ +GLOBL argv0(SB), $8 +GLOBL _tos(SB), $8 +GLOBL _privates(SB), $8 +GLOBL _nprivates(SB), $4 diff --git a/sys/src/libc/amd64/atom.s b/sys/src/libc/amd64/atom.s new file mode 100644 index 000000000..c6618870f --- /dev/null +++ b/sys/src/libc/amd64/atom.s @@ -0,0 +1,69 @@ +TEXT ainc(SB), 1, $0 /* int ainc(int *); */ +ainclp: + MOVL (RARG), AX /* exp */ + MOVL AX, BX + INCL BX /* new */ + LOCK; CMPXCHGL BX, (RARG) + JNZ ainclp + MOVL BX, AX + RET + +TEXT adec(SB), 1, $0 /* int adec(int*); */ +adeclp: + MOVL (RARG), AX + MOVL AX, BX + DECL BX + LOCK; CMPXCHGL BX, (RARG) + JNZ adeclp + MOVL BX, AX + RET + +/* + * int cas32(u32int *p, u32int ov, u32int nv); + * int cas(uint *p, int ov, int nv); + * int casl(ulong *p, ulong ov, ulong nv); + */ + +TEXT cas32(SB), 1, $0 +TEXT cas(SB), 1, $0 +TEXT casul(SB), 1, $0 +TEXT casl(SB), 1, $0 /* back compat */ + MOVL exp+8(FP), AX + MOVL new+16(FP), BX + LOCK; CMPXCHGL BX, (RARG) + MOVL $1, AX /* use CMOVLEQ etc. here? */ + JNZ _cas32r0 +_cas32r1: + RET +_cas32r0: + DECL AX + RET + +/* + * int cas64(u64int *p, u64int ov, u64int nv); + * int casp(void **p, void *ov, void *nv); + */ + +TEXT cas64(SB), 1, $0 +TEXT casp(SB), 1, $0 + MOVQ exp+8(FP), AX + MOVQ new+16(FP), BX + LOCK; CMPXCHGQ BX, (RARG) + MOVL $1, AX /* use CMOVLEQ etc. here? */ + JNZ _cas64r0 +_cas64r1: + RET +_cas64r0: + DECL AX + RET + +TEXT fas64(SB), 1, $-4 +TEXT fasp(SB), 1, $-4 + MOVQ p+8(FP), AX + LOCK; XCHGQ AX, (RARG) /* */ + RET + +TEXT fas32(SB), 1, $-4 + MOVL p+8(FP), AX + LOCK; XCHGL AX, (RARG) /* */ + RET diff --git a/sys/src/libc/amd64/cycles.s b/sys/src/libc/amd64/cycles.s new file mode 100644 index 000000000..3c0095646 --- /dev/null +++ b/sys/src/libc/amd64/cycles.s @@ -0,0 +1,5 @@ +TEXT cycles(SB),1,$0 /* time stamp counter; cycles since power up */ + RDTSC + MOVL AX, 0(RARG) /* lo */ + MOVL DX, 4(RARG) /* hi */ + RET diff --git a/sys/src/libc/amd64/getcallerpc.s b/sys/src/libc/amd64/getcallerpc.s new file mode 100644 index 000000000..15e55ea17 --- /dev/null +++ b/sys/src/libc/amd64/getcallerpc.s @@ -0,0 +1,3 @@ +TEXT getcallerpc(SB), 1, $0 + MOVQ -8(RARG), AX + RET diff --git a/sys/src/libc/amd64/getfcr.s b/sys/src/libc/amd64/getfcr.s new file mode 100644 index 000000000..9fc2002da --- /dev/null +++ b/sys/src/libc/amd64/getfcr.s @@ -0,0 +1,38 @@ + +TEXT setfcr(SB), $4 + XORL $(0x3F<<7),RARG /* bits are cleared in csr to enable them */ + ANDL $0xFFC0, RARG /* just the fcr bits */ + WAIT /* is this needed? */ + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $~0x3F, AX + ORL RARG, AX + MOVL AX, 0(SP) + LDMXCSR 0(SP) + RET + +TEXT getfcr(SB), $4 + WAIT + STMXCSR 0(SP) + MOVWLZX 0(SP), AX + ANDL $0xFFC0, AX + XORL $(0x3F<<7),AX + RET + +TEXT getfsr(SB), $4 + WAIT + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $0x3F, AX + RET + +TEXT setfsr(SB), $4 + ANDL $0x3F, RARG + WAIT + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $~0x3F, AX + ORL RARG, AX + MOVL AX, 0(SP) + LDMXCSR 0(SP) + RET diff --git a/sys/src/libc/amd64/main9.s b/sys/src/libc/amd64/main9.s new file mode 100644 index 000000000..c9d5853c2 --- /dev/null +++ b/sys/src/libc/amd64/main9.s @@ -0,0 +1,19 @@ +#define NPRIVATES 16 + +TEXT _main(SB), 1, $(2*8+NPRIVATES*8) + MOVQ AX, _tos(SB) + LEAQ 16(SP), AX + MOVQ AX, _privates(SB) + MOVL $NPRIVATES, _nprivates(SB) + MOVL inargc-8(FP), RARG + LEAQ inargv+0(FP), AX + MOVQ AX, 8(SP) + CALL main(SB) + +loop: + MOVQ $_exits<>(SB), RARG + CALL exits(SB) + JMP loop + +DATA _exits<>+0(SB)/4, $"main" +GLOBL _exits<>+0(SB), $5 diff --git a/sys/src/libc/amd64/main9p.s b/sys/src/libc/amd64/main9p.s new file mode 100644 index 000000000..353b18088 --- /dev/null +++ b/sys/src/libc/amd64/main9p.s @@ -0,0 +1,41 @@ +#define NPRIVATES 16 + +TEXT _mainp(SB), 1, $(2*8+NPRIVATES*8) + MOVQ AX, _tos(SB) /* _tos = arg */ + LEAQ 16(SP), AX + MOVQ AX, _privates(SB) + MOVL $NPRIVATES, _nprivates(SB) + + CALL _profmain(SB) /* _profmain(); */ + + MOVQ _tos+0(SB), DX /* _tos->prof.pp = _tos->prof.next; */ + MOVQ 8(DX), CX + MOVQ CX, (DX) + + MOVL inargc-8(FP), RARG /* main(argc, argv); */ + LEAQ inargv+0(FP), AX + MOVQ AX, 8(SP) + CALL main(SB) + +loop: + MOVQ $_exits<>(SB), RARG + CALL exits(SB) + MOVQ $_profin(SB), AX /* force loading of profile */ + JMP loop + +TEXT _savearg(SB), 1, $0 + MOVQ RARG, AX + RET + +TEXT _saveret(SB), 1, $0 + RET + +TEXT _restorearg(SB), 1, $0 + RET /* we want RARG in RARG */ + +TEXT _callpc(SB), 1, $0 + MOVQ 8(RARG), AX + RET + +DATA _exits<>+0(SB)/4, $"main" +GLOBL _exits<>+0(SB), $5 diff --git a/sys/src/libc/amd64/memccpy.s b/sys/src/libc/amd64/memccpy.s new file mode 100644 index 000000000..d878faaf5 --- /dev/null +++ b/sys/src/libc/amd64/memccpy.s @@ -0,0 +1,58 @@ + TEXT memccpy(SB),$0 + + MOVL n+24(FP), CX + CMPL CX, $0 + JEQ none + MOVQ p2+8(FP), DI + MOVBLZX c+16(FP), AX + CLD +/* + * find the character in the second string + */ + + REPN; SCASB + JEQ found + +/* + * if not found, set count to 'n' + */ +none: + MOVL $0, AX + MOVL n+24(FP), BX + JMP memcpy + +/* + * if found, set count to bytes thru character + */ +found: + MOVQ DI, AX + SUBQ p2+8(FP), AX + MOVQ AX, BX + ADDQ RARG, AX + +/* + * copy the memory + */ + +memcpy: + MOVQ RARG, DI + MOVQ p2+8(FP), SI +/* + * copy whole longs, if aligned + */ + MOVQ DI, DX + ORQ SI, DX + ANDL $3, DX + JNE c3 + MOVL BX, CX + SHRQ $2, CX + REP; MOVSL +/* + * copy the rest, by bytes + */ + ANDL $3, BX +c3: + MOVL BX, CX + REP; MOVSB + + RET diff --git a/sys/src/libc/amd64/memchr.s b/sys/src/libc/amd64/memchr.s new file mode 100644 index 000000000..3648e84f9 --- /dev/null +++ b/sys/src/libc/amd64/memchr.s @@ -0,0 +1,23 @@ + TEXT memchr(SB),$0 + + MOVL n+16(FP), CX + CMPL CX, $0 + JEQ none + MOVQ RARG, DI + MOVBLZX c+8(FP), AX + CLD +/* + * SCASB is memchr instruction + */ + + REPN; SCASB + JEQ found + +none: + MOVL $0, AX + RET + +found: + MOVQ DI, AX + SUBQ $1, AX + RET diff --git a/sys/src/libc/amd64/memcmp.s b/sys/src/libc/amd64/memcmp.s new file mode 100644 index 000000000..71098b200 --- /dev/null +++ b/sys/src/libc/amd64/memcmp.s @@ -0,0 +1,52 @@ + TEXT memcmp(SB),$0 + + MOVL n+16(FP), BX + CMPL BX, $0 + JEQ none + MOVQ RARG, DI + MOVQ p2+8(FP), SI + CLD + MOVQ DI, CX + ORQ SI, CX + ANDL $3, CX + JNE c3 +/* + * first by longs + */ + + MOVL BX, CX + SHRQ $2, CX + + REP; CMPSL + JNE found + +/* + * then by bytes + */ + ANDL $3, BX +c3: + MOVL BX, CX + REP; CMPSB + JNE found1 + +none: + MOVQ $0, AX + RET + +/* + * if long found, + * back up and look by bytes + */ +found: + MOVL $4, CX + SUBQ CX, DI + SUBQ CX, SI + REP; CMPSB + +found1: + JLS lt + MOVQ $-1, AX + RET +lt: + MOVQ $1, AX + RET diff --git a/sys/src/libc/amd64/memcpy.s b/sys/src/libc/amd64/memcpy.s new file mode 100644 index 000000000..878e80223 --- /dev/null +++ b/sys/src/libc/amd64/memcpy.s @@ -0,0 +1,81 @@ +TEXT memcpy(SB), $0 + MOVQ RARG, DI + MOVQ DI, AX /* return value */ + MOVQ p2+8(FP), SI + MOVL n+16(FP), BX + CMPL BX, $0 + JGT _ok + JEQ _return /* nothing to do if n == 0 */ + MOVL $0, SI /* fault if n < 0 */ + +/* + * check and set for backwards: + * (p2 < p1) && ((p2+n) > p1) + */ +_ok: + CMPQ SI, DI + JGT _forward + JEQ _return /* nothing to do if p2 == p1 */ + MOVQ SI, DX + ADDQ BX, DX + CMPQ DX, DI + JGT _back + +/* + * copy whole longs if aligned + */ +_forward: + CLD + MOVQ SI, DX + ORQ DI, DX + ANDL $3, DX + JNE c3f + MOVQ BX, CX + SHRQ $2, CX + ANDL $3, BX + REP; MOVSL + +/* + * copy the rest, by bytes + */ + JEQ _return /* flags set by above ANDL */ +c3f: + MOVL BX, CX + REP; MOVSB + + RET + +/* + * whole thing backwards has + * adjusted addresses + */ +_back: + ADDQ BX, DI + ADDQ BX, SI + STD + SUBQ $4, DI + SUBQ $4, SI +/* + * copy whole longs, if aligned + */ + MOVQ DI, DX + ORQ SI, DX + ANDL $3, DX + JNE c3b + MOVL BX, CX + SHRQ $2, CX + ANDL $3, BX + REP; MOVSL +/* + * copy the rest, by bytes + */ + JEQ _return /* flags set by above ANDL */ + +c3b: + ADDQ $3, DI + ADDQ $3, SI + MOVL BX, CX + REP; MOVSB + +_return: + RET diff --git a/sys/src/libc/amd64/memmove.s b/sys/src/libc/amd64/memmove.s new file mode 100644 index 000000000..1f00537fd --- /dev/null +++ b/sys/src/libc/amd64/memmove.s @@ -0,0 +1,81 @@ +TEXT memmove(SB), $0 + MOVQ RARG, DI + MOVQ DI, AX /* return value */ + MOVQ p2+8(FP), SI + MOVL n+16(FP), BX + CMPL BX, $0 + JGT _ok + JEQ _return /* nothing to do if n == 0 */ + MOVL $0, SI /* fault if n < 0 */ + +/* + * check and set for backwards: + * (p2 < p1) && ((p2+n) > p1) + */ +_ok: + CMPQ SI, DI + JGT _forward + JEQ _return /* nothing to do if p2 == p1 */ + MOVQ SI, DX + ADDQ BX, DX + CMPQ DX, DI + JGT _back + +/* + * copy whole longs if aligned + */ +_forward: + CLD + MOVQ SI, DX + ORQ DI, DX + ANDL $3, DX + JNE c3f + MOVQ BX, CX + SHRQ $2, CX + ANDL $3, BX + REP; MOVSL + +/* + * copy the rest, by bytes + */ + JEQ _return /* flags set by above ANDL */ +c3f: + MOVL BX, CX + REP; MOVSB + + RET + +/* + * whole thing backwards has + * adjusted addresses + */ +_back: + ADDQ BX, DI + ADDQ BX, SI + STD + SUBQ $4, DI + SUBQ $4, SI +/* + * copy whole longs, if aligned + */ + MOVQ DI, DX + ORQ SI, DX + ANDL $3, DX + JNE c3b + MOVL BX, CX + SHRQ $2, CX + ANDL $3, BX + REP; MOVSL +/* + * copy the rest, by bytes + */ + JEQ _return /* flags set by above ANDL */ + +c3b: + ADDQ $3, DI + ADDQ $3, SI + MOVL BX, CX + REP; MOVSB + +_return: + RET diff --git a/sys/src/libc/amd64/memset.s b/sys/src/libc/amd64/memset.s new file mode 100644 index 000000000..d190edee2 --- /dev/null +++ b/sys/src/libc/amd64/memset.s @@ -0,0 +1,41 @@ + TEXT memset(SB),$0 + + CLD + MOVQ RARG, DI + MOVBLZX c+8(FP), AX + MOVL n+16(FP), BX +/* + * if not enough bytes, just set bytes + */ + CMPL BX, $9 + JLS c3 +/* + * if not aligned, just set bytes + */ + MOVQ RARG, CX + ANDL $3,CX + JNE c3 +/* + * build word in AX + */ + MOVB AL, AH + MOVL AX, CX + SHLL $16, CX + ORL CX, AX +/* + * set whole longs + */ +c1: + MOVQ BX, CX + SHRQ $2, CX + ANDL $3, BX + REP; STOSL +/* + * set the rest, by bytes + */ +c3: + MOVL BX, CX + REP; STOSB +ret: + MOVQ RARG,AX + RET diff --git a/sys/src/libc/amd64/mkfile b/sys/src/libc/amd64/mkfile new file mode 100644 index 000000000..81aa7445f --- /dev/null +++ b/sys/src/libc/amd64/mkfile @@ -0,0 +1,41 @@ +objtype=amd64 +</$objtype/mkfile + +LIB=/$objtype/lib/libc.a +SFILES=\ + argv0.s\ + atom.s\ + cycles.s\ + getfcr.s\ + main9.s\ + main9p.s\ + memccpy.s\ + memchr.s\ + memcmp.s\ + memcpy.s\ + memmove.s\ + memset.s\ + muldiv.s\ + setjmp.s\ + sqrt.s\ + strcat.s\ + strchr.s\ + strcpy.s\ + strlen.s\ + tas.s\ + +CFILES=\ + _seek.c\ + getcallerpc.c\ + notejmp.c\ + +HFILES=/sys/include/libc.h + +OFILES=${CFILES:%.c=%.$O} ${SFILES:%.s=%.$O} + +UPDATE=mkfile\ + $HFILES\ + $CFILES\ + $SFILES\ + +</sys/src/cmd/mksyslib diff --git a/sys/src/libc/amd64/muldiv.s b/sys/src/libc/amd64/muldiv.s new file mode 100644 index 000000000..be67302be --- /dev/null +++ b/sys/src/libc/amd64/muldiv.s @@ -0,0 +1,12 @@ +TEXT umuldiv(SB), $0 + MOVL RARG, AX + MULL b+8(FP) + DIVL c+16(FP) + RET + +TEXT muldiv(SB), $0 + MOVL RARG, AX + IMULL b+8(FP) + IDIVL c+16(FP) + RET + END diff --git a/sys/src/libc/amd64/notejmp.c b/sys/src/libc/amd64/notejmp.c new file mode 100644 index 000000000..100c41e4c --- /dev/null +++ b/sys/src/libc/amd64/notejmp.c @@ -0,0 +1,16 @@ +#include <u.h> +#include <libc.h> +#include <ureg.h> + +void +notejmp(void *vr, jmp_buf j, int ret) +{ + struct Ureg *r = vr; + + r->ax = ret; + if(ret == 0) + r->ax = 1; + r->pc = j[JMPBUFPC]; + r->sp = j[JMPBUFSP] + 8; + noted(NCONT); +} diff --git a/sys/src/libc/amd64/setjmp.s b/sys/src/libc/amd64/setjmp.s new file mode 100644 index 000000000..520e05c49 --- /dev/null +++ b/sys/src/libc/amd64/setjmp.s @@ -0,0 +1,17 @@ +TEXT longjmp(SB), $0 + MOVL r+8(FP), AX + CMPL AX, $0 + JNE ok /* ansi: "longjmp(0) => longjmp(1)" */ + MOVL $1, AX /* bless their pointed heads */ +ok: + MOVQ 0(RARG), SP /* restore sp */ + MOVQ 8(RARG), BX /* put return pc on the stack */ + MOVQ BX, 0(SP) + RET + +TEXT setjmp(SB), $0 + MOVQ SP, 0(RARG) /* store sp */ + MOVQ 0(SP), BX /* store return pc */ + MOVQ BX, 8(RARG) + MOVL $0, AX /* return 0 */ + RET diff --git a/sys/src/libc/amd64/sqrt.s b/sys/src/libc/amd64/sqrt.s new file mode 100644 index 000000000..6c8338b69 --- /dev/null +++ b/sys/src/libc/amd64/sqrt.s @@ -0,0 +1,4 @@ +TEXT sqrt(SB), $0 + MOVSD a+0(FP), X0 + SQRTSD X0, X0 + RET diff --git a/sys/src/libc/amd64/strcat.s b/sys/src/libc/amd64/strcat.s new file mode 100644 index 000000000..ff0339d6a --- /dev/null +++ b/sys/src/libc/amd64/strcat.s @@ -0,0 +1,48 @@ + TEXT strcat(SB),$0 + + MOVL $0, AX + MOVQ $-1, CX + CLD + +/* + * find length of second string + */ + + MOVQ p2+8(FP), DI + REPN; SCASB + + MOVQ DI, BX + SUBQ p2+8(FP), BX + +/* + * find end of first string + */ + + MOVQ RARG, DI + REPN; SCASB + +/* + * copy the memory + */ + SUBQ $1, DI + MOVQ p2+8(FP), SI +/* + * copy whole longs, if aligned + */ + MOVQ DI, CX + ORQ SI, CX + ANDL $3, CX + JNE c3 + MOVQ BX, CX + SHRQ $2, CX + REP; MOVSL +/* + * copy the rest, by bytes + */ + ANDL $3, BX +c3: + MOVQ BX, CX + REP; MOVSB + + MOVQ RARG, AX + RET diff --git a/sys/src/libc/amd64/strchr.s b/sys/src/libc/amd64/strchr.s new file mode 100644 index 000000000..317537361 --- /dev/null +++ b/sys/src/libc/amd64/strchr.s @@ -0,0 +1,38 @@ + TEXT strchr(SB), $0 + + MOVQ RARG, DI + MOVB c+8(FP), AX + CMPB AX, $0 + JEQ l2 /**/ + +/* + * char is not null + */ +l1: + MOVB (DI), BX + CMPB BX, $0 + JEQ ret0 + ADDQ $1, DI + CMPB AX, BX + JNE l1 + + MOVQ DI, AX + SUBQ $1, AX + RET + +/* + * char is null + */ +l2: + MOVQ $-1, CX + CLD + + REPN; SCASB + + MOVQ DI, AX + SUBQ $1, AX + RET + +ret0: + MOVQ $0, AX + RET diff --git a/sys/src/libc/amd64/strcpy.s b/sys/src/libc/amd64/strcpy.s new file mode 100644 index 000000000..850ceb886 --- /dev/null +++ b/sys/src/libc/amd64/strcpy.s @@ -0,0 +1,40 @@ + TEXT strcpy(SB),$0 + + MOVL $0, AX + MOVQ $-1, CX + CLD +/* + * find end of second string + */ + + MOVQ p2+8(FP), DI + REPN; SCASB + + MOVQ DI, BX + SUBQ p2+8(FP), BX + +/* + * copy the memory + */ + MOVQ RARG, DI + MOVQ p2+8(FP), SI +/* + * copy whole longs, if aligned + */ + MOVQ DI, CX + ORQ SI, CX + ANDL $3, CX + JNE c3 + MOVQ BX, CX + SHRQ $2, CX + REP; MOVSL +/* + * copy the rest, by bytes + */ + ANDL $3, BX +c3: + MOVL BX, CX + REP; MOVSB + + MOVQ RARG, AX + RET diff --git a/sys/src/libc/amd64/strlen.s b/sys/src/libc/amd64/strlen.s new file mode 100644 index 000000000..cf27cd56a --- /dev/null +++ b/sys/src/libc/amd64/strlen.s @@ -0,0 +1,16 @@ + TEXT strlen(SB),$0 + + MOVL $0, AX + MOVQ $-1, CX + CLD +/* + * look for end of string + */ + + MOVQ RARG, DI + REPN; SCASB + + MOVQ DI, AX + SUBQ RARG, AX + SUBQ $1, AX + RET diff --git a/sys/src/libc/amd64/tas.s b/sys/src/libc/amd64/tas.s new file mode 100644 index 000000000..571828d36 --- /dev/null +++ b/sys/src/libc/amd64/tas.s @@ -0,0 +1,5 @@ +TEXT _tas(SB), 1, $0 + + MOVL $0xdeaddead,AX + XCHGL AX,(RARG) + RET diff --git a/sys/src/libmp/amd64/mkfile b/sys/src/libmp/amd64/mkfile new file mode 100644 index 000000000..c9ecdb003 --- /dev/null +++ b/sys/src/libmp/amd64/mkfile @@ -0,0 +1,20 @@ +objtype=amd64 +</$objtype/mkfile + +LIB=/$objtype/lib/libmp.a +SFILES=\ + mpdigdiv.s\ + mpvecadd.s\ + mpvecdigmuladd.s\ + mpvecdigmulsub.s\ + mpvecsub.s\ + +HFILES=/$objtype/include/u.h /sys/include/mp.h ../port/dat.h + +OFILES=${SFILES:%.s=%.$O} + +UPDATE=mkfile\ + $HFILES\ + $SFILES\ + +</sys/src/cmd/mksyslib diff --git a/sys/src/libmp/amd64/mpdigdiv.s b/sys/src/libmp/amd64/mpdigdiv.s new file mode 100644 index 000000000..6025d141a --- /dev/null +++ b/sys/src/libmp/amd64/mpdigdiv.s @@ -0,0 +1,21 @@ +TEXT mpdigdiv(SB),$0 + +/* MOVL dividend+0(FP),BX */ + MOVL 0(RARG),AX + MOVL 4(RARG),DX + MOVL divisor+8(FP),BX + MOVQ quotient+16(FP),DI + XORL CX,CX + CMPL DX,BX /* dividend >= 2^32 * divisor */ + JHS _divovfl + CMPL BX,CX /* divisor == 0 */ + JE _divovfl + DIVL BX /* AX = DX:AX/BX */ + MOVL AX,0(DI) + RET + + /* return all 1's */ +_divovfl: + NOTL CX + MOVL CX,0(DI) + RET diff --git a/sys/src/libmp/amd64/mpvecadd.s b/sys/src/libmp/amd64/mpvecadd.s new file mode 100644 index 000000000..326f39dad --- /dev/null +++ b/sys/src/libmp/amd64/mpvecadd.s @@ -0,0 +1,54 @@ +/* + * mpvecadd(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *sum) + * + * sum[0:alen] = a[0:alen-1] + b[0:blen-1] + * + * prereq: alen >= blen, sum has room for alen+1 digits + */ +TEXT mpvecadd(SB),$0 + + MOVL alen+8(FP),DX + MOVL blen+24(FP),CX +/* MOVL a+0(FP),SI */ + MOVQ RARG, SI + MOVQ b+16(FP),BX + SUBL CX,DX + MOVQ sum+32(FP),DI + XORL BP,BP /* this also sets carry to 0 */ + + /* skip addition if b is zero */ + TESTL CX,CX + JZ _add1 + + /* sum[0:blen-1],carry = a[0:blen-1] + b[0:blen-1] */ +_addloop1: + MOVL (SI)(BP*4), AX + ADCL (BX)(BP*4), AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _addloop1 + +_add1: + /* jump if alen > blen */ + INCL DX + MOVL DX,CX + LOOP _addloop2 + + /* sum[alen] = carry */ +_addend: + JC _addcarry + MOVL $0,(DI)(BP*4) + RET +_addcarry: + MOVL $1,(DI)(BP*4) + RET + + /* sum[blen:alen-1],carry = a[blen:alen-1] + 0 */ +_addloop2: + MOVL (SI)(BP*4),AX + ADCL $0,AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _addloop2 + JMP _addend + diff --git a/sys/src/libmp/amd64/mpvecdigmuladd.s b/sys/src/libmp/amd64/mpvecdigmuladd.s new file mode 100644 index 000000000..6599a42c8 --- /dev/null +++ b/sys/src/libmp/amd64/mpvecdigmuladd.s @@ -0,0 +1,53 @@ +/* + * mpvecdigmul(mpdigit *b, int n, mpdigit m, mpdigit *p) + * + * p += b*m + * + * each step look like: + * hi,lo = m*b[i] + * lo += oldhi + carry + * hi += carry + * p[i] += lo + * oldhi = hi + * + * the registers are: + * hi = DX - constrained by hardware + * lo = AX - constrained by hardware + * b+n = SI - can't be BP + * p+n = DI - can't be BP + * i-n = BP + * m = BX + * oldhi = CX + * + */ +TEXT mpvecdigmuladd(SB),$0 + +/* MOVQ b+0(FP),SI */ + MOVQ RARG,SI + MOVL n+8(FP),CX + MOVL m+16(FP),BX + MOVQ p+24(FP),DI + MOVL CX,BP + NEGQ BP /* BP = -n */ + SHLL $2,CX + ADDQ CX,SI /* SI = b + n */ + ADDQ CX,DI /* DI = p + n */ + XORL CX,CX +_muladdloop: + MOVL (SI)(BP*4),AX /* lo = b[i] */ + MULL BX /* hi, lo = b[i] * m */ + ADDL CX,AX /* lo += oldhi */ + JCC _muladdnocarry1 + INCL DX /* hi += carry */ +_muladdnocarry1: + ADDL AX,(DI)(BP*4) /* p[i] += lo */ + JCC _muladdnocarry2 + INCL DX /* hi += carry */ +_muladdnocarry2: + MOVL DX,CX /* oldhi = hi */ + INCQ BP /* i++ */ + JNZ _muladdloop + XORL AX,AX + ADDL CX,(DI)(BP*4) /* p[n] + oldhi */ + ADCL AX,AX /* return carry out of p[n] */ + RET diff --git a/sys/src/libmp/amd64/mpvecdigmulsub.s b/sys/src/libmp/amd64/mpvecdigmulsub.s new file mode 100644 index 000000000..0b5a35761 --- /dev/null +++ b/sys/src/libmp/amd64/mpvecdigmulsub.s @@ -0,0 +1,53 @@ +/* + * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p) + * + * p -= b*m + * + * each step look like: + * hi,lo = m*b[i] + * lo += oldhi + carry + * hi += carry + * p[i] += lo + * oldhi = hi + * + * the registers are: + * hi = DX - constrained by hardware + * lo = AX - constrained by hardware + * b = SI - can't be BP + * p = DI - can't be BP + * i = BP + * n = CX - constrained by LOOP instr + * m = BX + * oldhi = R8 + * + */ +TEXT mpvecdigmulsub(SB),$0 + +/* MOVL b+0(FP),SI */ + MOVQ RARG,SI + MOVL n+8(FP),CX + MOVL m+16(FP),BX + MOVQ p+24(FP),DI + XORL BP,BP + MOVL BP,R8 +_mulsubloop: + MOVL (SI)(BP*4),AX /* lo = b[i] */ + MULL BX /* hi, lo = b[i] * m */ + ADDL R8,AX /* lo += oldhi */ + JCC _mulsubnocarry1 + INCL DX /* hi += carry */ +_mulsubnocarry1: + SUBL AX,(DI)(BP*4) + JCC _mulsubnocarry2 + INCL DX /* hi += carry */ +_mulsubnocarry2: + MOVL DX,R8 + INCL BP + LOOP _mulsubloop + SUBL R8,(DI)(BP*4) + JCC _mulsubnocarry3 + MOVQ $-1,AX + RET +_mulsubnocarry3: + MOVQ $1,AX + RET diff --git a/sys/src/libmp/amd64/mpvecsub.s b/sys/src/libmp/amd64/mpvecsub.s new file mode 100644 index 000000000..9e1b53494 --- /dev/null +++ b/sys/src/libmp/amd64/mpvecsub.s @@ -0,0 +1,45 @@ +/* + * mpvecsub(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *diff) + * + * diff[0:alen-1] = a[0:alen-1] - b[0:blen-1] + * + * prereq: alen >= blen, diff has room for alen digits + */ +TEXT mpvecsub(SB),$0 + +/* MOVQ a+0(FP),SI */ + MOVQ RARG, SI + MOVQ b+16(FP),BX + MOVL alen+8(FP),DX + MOVL blen+24(FP),CX + MOVQ diff+32(FP),DI + SUBL CX,DX + XORL BP,BP /* this also sets carry to 0 */ + + /* skip subraction if b is zero */ + TESTL CX,CX + JZ _sub1 + + /* diff[0:blen-1],borrow = a[0:blen-1] - b[0:blen-1] */ +_subloop1: + MOVL (SI)(BP*4),AX + SBBL (BX)(BP*4),AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _subloop1 + +_sub1: + INCL DX + MOVL DX,CX + LOOP _subloop2 + RET + + /* diff[blen:alen-1] = a[blen:alen-1] - 0 */ +_subloop2: + MOVL (SI)(BP*4),AX + SBBL $0,AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _subloop2 + RET + diff --git a/sys/src/libsec/amd64/md5block.s b/sys/src/libsec/amd64/md5block.s new file mode 100644 index 000000000..4e0d00883 --- /dev/null +++ b/sys/src/libsec/amd64/md5block.s @@ -0,0 +1,212 @@ +/* + * rfc1321 requires that I include this. The code is new. The constants + * all come from the rfc (hence the copyright). We trade a table for the + * macros in rfc. The total size is a lot less. -- presotto + * + * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All + * rights reserved. + * + * License to copy and use this software is granted provided that it + * is identified as the "RSA Data Security, Inc. MD5 Message-Digest + * Algorithm" in all material mentioning or referencing this software + * or this function. + * + * License is also granted to make and use derivative works provided + * that such works are identified as "derived from the RSA Data + * Security, Inc. MD5 Message-Digest Algorithm" in all material + * mentioning or referencing the derived work. + * + * RSA Data Security, Inc. makes no representations concerning either + * the merchantability of this software or the suitability of this + * software forany particular purpose. It is provided "as is" + * without express or implied warranty of any kind. + * These notices must be retained in any copies of any part of this + * documentation and/or software. + */ +#define S11 7 +#define S12 12 +#define S13 17 +#define S14 22 + +#define S21 5 +#define S22 9 +#define S23 14 +#define S24 20 + +#define S31 4 +#define S32 11 +#define S33 16 +#define S34 23 + +#define S41 6 +#define S42 10 +#define S43 15 +#define S44 21 + +/* + * SI is data + * a += FN(B,C,D); + * a += x[sh] + t[sh]; + * a = (a << S11) | (a >> (32 - S11)); + * a += b; + */ + +#define BODY1(off,V,FN,SH,A,B,C,D)\ + FN(B,C,D)\ + LEAL V(A)(DI*1),A;\ + ADDL (off)(BP),A;\ + ROLL $SH,A;\ + ADDL B,A;\ + +#define BODY(off,V,FN,SH,A,B,C,D)\ + FN(B,C,D)\ + LEAL V(A)(DI*1),A;\ + ADDL (off)(BP),A;\ + ROLL $SH,A;\ + ADDL B,A;\ + +/* + * fn1 = ((c ^ d) & b) ^ d + */ +#define FN1(B,C,D)\ + MOVL C,DI;\ + XORL D,DI;\ + ANDL B,DI;\ + XORL D,DI;\ + +/* + * fn2 = ((b ^ c) & d) ^ c; + */ +#define FN2(B,C,D)\ + MOVL B,DI;\ + XORL C,DI;\ + ANDL D,DI;\ + XORL C,DI;\ + +/* + * fn3 = b ^ c ^ d; + */ +#define FN3(B,C,D)\ + MOVL B,DI;\ + XORL C,DI;\ + XORL D,DI;\ + +/* + * fn4 = c ^ (b | ~d); + */ +#define FN4(B,C,D)\ + MOVL D,DI;\ + XORL $-1,DI;\ + ORL B,DI;\ + XORL C,DI;\ + +#define LEN 8 +#define STATE 16 + +TEXT _md5block+0(SB), $0 + + MOVQ RARG,R8 + MOVLQZX len+LEN(FP),BX + ADDQ BX,R8 + +mainloop: + MOVQ state+STATE(FP),SI + MOVL (SI),AX + MOVL 4(SI),BX + MOVL 8(SI),CX + MOVL 12(SI),DX + + BODY1( 0*4,0xd76aa478,FN1,S11,AX,BX,CX,DX) + BODY1( 1*4,0xe8c7b756,FN1,S12,DX,AX,BX,CX) + BODY1( 2*4,0x242070db,FN1,S13,CX,DX,AX,BX) + BODY1( 3*4,0xc1bdceee,FN1,S14,BX,CX,DX,AX) + + BODY1( 4*4,0xf57c0faf,FN1,S11,AX,BX,CX,DX) + BODY1( 5*4,0x4787c62a,FN1,S12,DX,AX,BX,CX) + BODY1( 6*4,0xa8304613,FN1,S13,CX,DX,AX,BX) + BODY1( 7*4,0xfd469501,FN1,S14,BX,CX,DX,AX) + + BODY1( 8*4,0x698098d8,FN1,S11,AX,BX,CX,DX) + BODY1( 9*4,0x8b44f7af,FN1,S12,DX,AX,BX,CX) + BODY1(10*4,0xffff5bb1,FN1,S13,CX,DX,AX,BX) + BODY1(11*4,0x895cd7be,FN1,S14,BX,CX,DX,AX) + + BODY1(12*4,0x6b901122,FN1,S11,AX,BX,CX,DX) + BODY1(13*4,0xfd987193,FN1,S12,DX,AX,BX,CX) + BODY1(14*4,0xa679438e,FN1,S13,CX,DX,AX,BX) + BODY1(15*4,0x49b40821,FN1,S14,BX,CX,DX,AX) + + + BODY( 1*4,0xf61e2562,FN2,S21,AX,BX,CX,DX) + BODY( 6*4,0xc040b340,FN2,S22,DX,AX,BX,CX) + BODY(11*4,0x265e5a51,FN2,S23,CX,DX,AX,BX) + BODY( 0*4,0xe9b6c7aa,FN2,S24,BX,CX,DX,AX) + + BODY( 5*4,0xd62f105d,FN2,S21,AX,BX,CX,DX) + BODY(10*4,0x02441453,FN2,S22,DX,AX,BX,CX) + BODY(15*4,0xd8a1e681,FN2,S23,CX,DX,AX,BX) + BODY( 4*4,0xe7d3fbc8,FN2,S24,BX,CX,DX,AX) + + BODY( 9*4,0x21e1cde6,FN2,S21,AX,BX,CX,DX) + BODY(14*4,0xc33707d6,FN2,S22,DX,AX,BX,CX) + BODY( 3*4,0xf4d50d87,FN2,S23,CX,DX,AX,BX) + BODY( 8*4,0x455a14ed,FN2,S24,BX,CX,DX,AX) + + BODY(13*4,0xa9e3e905,FN2,S21,AX,BX,CX,DX) + BODY( 2*4,0xfcefa3f8,FN2,S22,DX,AX,BX,CX) + BODY( 7*4,0x676f02d9,FN2,S23,CX,DX,AX,BX) + BODY(12*4,0x8d2a4c8a,FN2,S24,BX,CX,DX,AX) + + + BODY( 5*4,0xfffa3942,FN3,S31,AX,BX,CX,DX) + BODY( 8*4,0x8771f681,FN3,S32,DX,AX,BX,CX) + BODY(11*4,0x6d9d6122,FN3,S33,CX,DX,AX,BX) + BODY(14*4,0xfde5380c,FN3,S34,BX,CX,DX,AX) + + BODY( 1*4,0xa4beea44,FN3,S31,AX,BX,CX,DX) + BODY( 4*4,0x4bdecfa9,FN3,S32,DX,AX,BX,CX) + BODY( 7*4,0xf6bb4b60,FN3,S33,CX,DX,AX,BX) + BODY(10*4,0xbebfbc70,FN3,S34,BX,CX,DX,AX) + + BODY(13*4,0x289b7ec6,FN3,S31,AX,BX,CX,DX) + BODY( 0*4,0xeaa127fa,FN3,S32,DX,AX,BX,CX) + BODY( 3*4,0xd4ef3085,FN3,S33,CX,DX,AX,BX) + BODY( 6*4,0x04881d05,FN3,S34,BX,CX,DX,AX) + + BODY( 9*4,0xd9d4d039,FN3,S31,AX,BX,CX,DX) + BODY(12*4,0xe6db99e5,FN3,S32,DX,AX,BX,CX) + BODY(15*4,0x1fa27cf8,FN3,S33,CX,DX,AX,BX) + BODY( 2*4,0xc4ac5665,FN3,S34,BX,CX,DX,AX) + + + BODY( 0*4,0xf4292244,FN4,S41,AX,BX,CX,DX) + BODY( 7*4,0x432aff97,FN4,S42,DX,AX,BX,CX) + BODY(14*4,0xab9423a7,FN4,S43,CX,DX,AX,BX) + BODY( 5*4,0xfc93a039,FN4,S44,BX,CX,DX,AX) + + BODY(12*4,0x655b59c3,FN4,S41,AX,BX,CX,DX) + BODY( 3*4,0x8f0ccc92,FN4,S42,DX,AX,BX,CX) + BODY(10*4,0xffeff47d,FN4,S43,CX,DX,AX,BX) + BODY( 1*4,0x85845dd1,FN4,S44,BX,CX,DX,AX) + + BODY( 8*4,0x6fa87e4f,FN4,S41,AX,BX,CX,DX) + BODY(15*4,0xfe2ce6e0,FN4,S42,DX,AX,BX,CX) + BODY( 6*4,0xa3014314,FN4,S43,CX,DX,AX,BX) + BODY(13*4,0x4e0811a1,FN4,S44,BX,CX,DX,AX) + + BODY( 4*4,0xf7537e82,FN4,S41,AX,BX,CX,DX) + BODY(11*4,0xbd3af235,FN4,S42,DX,AX,BX,CX) + BODY( 2*4,0x2ad7d2bb,FN4,S43,CX,DX,AX,BX) + BODY( 9*4,0xeb86d391,FN4,S44,BX,CX,DX,AX) + + ADDQ $(16*4),BP + MOVQ state+STATE(FP),DI + ADDL AX,0(DI) + ADDL BX,4(DI) + ADDL CX,8(DI) + ADDL DX,12(DI) + + CMPQ BP,R8 + JCS mainloop + + RET diff --git a/sys/src/libsec/amd64/mkfile b/sys/src/libsec/amd64/mkfile new file mode 100644 index 000000000..f7948cad9 --- /dev/null +++ b/sys/src/libsec/amd64/mkfile @@ -0,0 +1,19 @@ +objtype=amd64 +</$objtype/mkfile + +LIB=/$objtype/lib/libsec.a +FILES=\ + md5block\ + sha1block\ + +HFILES=/sys/include/libsec.h + +SFILES=${FILES:%=%.s} + +OFILES=${FILES:%=%.$O} + +UPDATE=mkfile\ + $HFILES\ + $SFILES\ + +</sys/src/cmd/mksyslib diff --git a/sys/src/libsec/amd64/sha1block.s b/sys/src/libsec/amd64/sha1block.s new file mode 100644 index 000000000..d8283fb38 --- /dev/null +++ b/sys/src/libsec/amd64/sha1block.s @@ -0,0 +1,197 @@ +/* x = (wp[off-f] ^ wp[off-8] ^ wp[off-14] ^ wp[off-16]) <<< 1; + * wp[off] = x; + * x += A <<< 5; + * E += 0xca62c1d6 + x; + * x = FN(B,C,D); + * E += x; + * B >>> 2 + */ +#define BSWAPDI BYTE $0x0f; BYTE $0xcf; + +#define BODY(off,FN,V,A,B,C,D,E)\ + MOVL (off-64)(BP),DI;\ + XORL (off-56)(BP),DI;\ + XORL (off-32)(BP),DI;\ + XORL (off-12)(BP),DI;\ + ROLL $1,DI;\ + MOVL DI,off(BP);\ + LEAL V(DI)(E*1),E;\ + MOVL A,DI;\ + ROLL $5,DI;\ + ADDL DI,E;\ + FN(B,C,D)\ + ADDL DI,E;\ + RORL $2,B;\ + +#define BODY0(off,FN,V,A,B,C,D,E)\ + MOVLQZX off(BX),DI;\ + BSWAPDI;\ + MOVL DI,off(BP);\ + LEAL V(DI)(E*1),E;\ + MOVL A,DI;\ + ROLL $5,DI;\ + ADDL DI,E;\ + FN(B,C,D)\ + ADDL DI,E;\ + RORL $2,B;\ + +/* + * fn1 = (((C^D)&B)^D); + */ +#define FN1(B,C,D)\ + MOVL C,DI;\ + XORL D,DI;\ + ANDL B,DI;\ + XORL D,DI;\ + +/* + * fn24 = B ^ C ^ D + */ +#define FN24(B,C,D)\ + MOVL B,DI;\ + XORL C,DI;\ + XORL D,DI;\ + +/* + * fn3 = ((B ^ C) & (D ^= B)) ^ B + * D ^= B to restore D + */ +#define FN3(B,C,D)\ + MOVL B,DI;\ + XORL C,DI;\ + XORL B,D;\ + ANDL D,DI;\ + XORL B,DI;\ + XORL B,D;\ + +/* + * stack offsets + * void sha1block(uchar *DATA, int LEN, ulong *STATE) + */ +#define DATA 0 +#define LEN 8 +#define STATE 16 + +/* + * stack offsets for locals + * ulong w[80]; + * uchar *edata; + * ulong *w15, *w40, *w60, *w80; + * register local + * ulong *wp = BP + * ulong a = eax, b = ebx, c = ecx, d = edx, e = esi + * ulong tmp = edi + */ +#define Rpdata R8 +#define WARRAY (-8-(80*4)) +#define TMP1 (-16-(80*4)) +#define TMP2 (-24-(80*4)) +#define W15 (-32-(80*4)) +#define W40 (-40-(80*4)) +#define W60 (-48-(80*4)) +#define W80 (-56-(80*4)) +#define EDATA (-64-(80*4)) + +TEXT _sha1block+0(SB),$384 + + MOVQ RARG, Rpdata + MOVLQZX len+LEN(FP),BX + ADDQ BX, RARG + MOVQ RARG,edata+EDATA(SP) + + LEAQ aw15+(WARRAY+15*4)(SP),DI + MOVQ DI,w15+W15(SP) + LEAQ aw40+(WARRAY+40*4)(SP),DX + MOVQ DX,w40+W40(SP) + LEAQ aw60+(WARRAY+60*4)(SP),CX + MOVQ CX,w60+W60(SP) + LEAQ aw80+(WARRAY+80*4)(SP),DI + MOVQ DI,w80+W80(SP) + +mainloop: + LEAQ warray+WARRAY(SP),BP + + MOVQ state+STATE(FP),DI + MOVL (DI),AX + MOVL 4(DI),BX + MOVL BX,tmp1+TMP1(SP) + MOVL 8(DI),CX + MOVL 12(DI),DX + MOVL 16(DI),SI + + MOVQ Rpdata,BX + +loop1: + BODY0(0,FN1,0x5a827999,AX,tmp1+TMP1(SP),CX,DX,SI) + MOVL SI,tmp2+TMP2(SP) + BODY0(4,FN1,0x5a827999,SI,AX,tmp1+TMP1(SP),CX,DX) + MOVL tmp1+TMP1(SP),SI + BODY0(8,FN1,0x5a827999,DX,tmp2+TMP2(SP),AX,SI,CX) + BODY0(12,FN1,0x5a827999,CX,DX,tmp2+TMP2(SP),AX,SI) + MOVL SI,tmp1+TMP1(SP) + BODY0(16,FN1,0x5a827999,SI,CX,DX,tmp2+TMP2(SP),AX) + MOVL tmp2+TMP2(SP),SI + + ADDQ $20,BX + ADDQ $20,BP + CMPQ BP,w15+W15(SP) + JCS loop1 + + BODY0(0,FN1,0x5a827999,AX,tmp1+TMP1(SP),CX,DX,SI) + ADDQ $4,BX + MOVQ BX,R8 + MOVQ tmp1+TMP1(SP),BX + + BODY(4,FN1,0x5a827999,SI,AX,BX,CX,DX) + BODY(8,FN1,0x5a827999,DX,SI,AX,BX,CX) + BODY(12,FN1,0x5a827999,CX,DX,SI,AX,BX) + BODY(16,FN1,0x5a827999,BX,CX,DX,SI,AX) + + ADDQ $20,BP + +loop2: + BODY(0,FN24,0x6ed9eba1,AX,BX,CX,DX,SI) + BODY(4,FN24,0x6ed9eba1,SI,AX,BX,CX,DX) + BODY(8,FN24,0x6ed9eba1,DX,SI,AX,BX,CX) + BODY(12,FN24,0x6ed9eba1,CX,DX,SI,AX,BX) + BODY(16,FN24,0x6ed9eba1,BX,CX,DX,SI,AX) + + ADDQ $20,BP + CMPQ BP,w40+W40(SP) + JCS loop2 + +loop3: + BODY(0,FN3,0x8f1bbcdc,AX,BX,CX,DX,SI) + BODY(4,FN3,0x8f1bbcdc,SI,AX,BX,CX,DX) + BODY(8,FN3,0x8f1bbcdc,DX,SI,AX,BX,CX) + BODY(12,FN3,0x8f1bbcdc,CX,DX,SI,AX,BX) + BODY(16,FN3,0x8f1bbcdc,BX,CX,DX,SI,AX) + + ADDQ $20,BP + CMPQ BP,w60+W60(SP) + JCS loop3 + +loop4: + BODY(0,FN24,0xca62c1d6,AX,BX,CX,DX,SI) + BODY(4,FN24,0xca62c1d6,SI,AX,BX,CX,DX) + BODY(8,FN24,0xca62c1d6,DX,SI,AX,BX,CX) + BODY(12,FN24,0xca62c1d6,CX,DX,SI,AX,BX) + BODY(16,FN24,0xca62c1d6,BX,CX,DX,SI,AX) + + ADDQ $20,BP + CMPQ BP,w80+W80(SP) + JCS loop4 + + MOVQ state+STATE(FP),DI + ADDL AX,0(DI) + ADDL BX,4(DI) + ADDL CX,8(DI) + ADDL DX,12(DI) + ADDL SI,16(DI) + + MOVQ edata+EDATA(SP),DI + CMPQ Rpdata,DI + JCS mainloop + + RET + END |