From 0f8168038af32828fcdc39575dea0e4de0c01122 Mon Sep 17 00:00:00 2001 From: ben Date: Tue, 26 Apr 2016 22:26:03 -0500 Subject: remove old libregexp files; add headers for upas/bayes --- sys/src/cmd/upas/bayes/regcomp.h | 63 ++++++++++++++++++++++ sys/src/cmd/upas/bayes/regexp.h | 66 +++++++++++++++++++++++ sys/src/libregexp/regaux.c | 113 --------------------------------------- sys/src/libregexp/regcomp.h | 63 ---------------------- 4 files changed, 129 insertions(+), 176 deletions(-) create mode 100644 sys/src/cmd/upas/bayes/regcomp.h create mode 100644 sys/src/cmd/upas/bayes/regexp.h delete mode 100644 sys/src/libregexp/regaux.c delete mode 100644 sys/src/libregexp/regcomp.h diff --git a/sys/src/cmd/upas/bayes/regcomp.h b/sys/src/cmd/upas/bayes/regcomp.h new file mode 100644 index 000000000..402fe7d5c --- /dev/null +++ b/sys/src/cmd/upas/bayes/regcomp.h @@ -0,0 +1,63 @@ +/* + * substitution list + */ +#define NSUBEXP 32 +typedef struct Resublist Resublist; +struct Resublist +{ + Resub m[NSUBEXP]; +}; + +/* + * Actions and Tokens (Reinst types) + * + * 02xx are operators, value == precedence + * 03xx are tokens, i.e. operands for operators + */ +#define RUNE 0177 +#define OPERATOR 0200 /* Bitmask of all operators */ +#define START 0200 /* Start, used for marker on stack */ +#define RBRA 0201 /* Right bracket, ) */ +#define LBRA 0202 /* Left bracket, ( */ +#define OR 0203 /* Alternation, | */ +#define CAT 0204 /* Concatentation, implicit operator */ +#define STAR 0205 /* Closure, * */ +#define PLUS 0206 /* a+ == aa* */ +#define QUEST 0207 /* a? == a|nothing, i.e. 0 or 1 a's */ +#define ANY 0300 /* Any character except newline, . */ +#define ANYNL 0301 /* Any character including newline, . */ +#define NOP 0302 /* No operation, internal use only */ +#define BOL 0303 /* Beginning of line, ^ */ +#define EOL 0304 /* End of line, $ */ +#define CCLASS 0305 /* Character class, [] */ +#define NCCLASS 0306 /* Negated character class, [] */ +#define END 0377 /* Terminate: match found */ + +/* + * regexec execution lists + */ +#define LISTSIZE 10 +#define BIGLISTSIZE (25*LISTSIZE) +typedef struct Relist Relist; +struct Relist +{ + Reinst* inst; /* Reinstruction of the thread */ + Resublist se; /* matched subexpressions in this thread */ +}; +typedef struct Reljunk Reljunk; +struct Reljunk +{ + Relist* relist[2]; + Relist* reliste[2]; + int starttype; + Rune startchar; + char* starts; + char* eol; + Rune* rstarts; + Rune* reol; +}; + +extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*); +extern void _renewmatch(Resub*, int, Resublist*); +extern Relist* _renewemptythread(Relist*, Reinst*, int, char*); +extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*); diff --git a/sys/src/cmd/upas/bayes/regexp.h b/sys/src/cmd/upas/bayes/regexp.h new file mode 100644 index 000000000..780dc8001 --- /dev/null +++ b/sys/src/cmd/upas/bayes/regexp.h @@ -0,0 +1,66 @@ +#pragma src "/sys/src/oldlibregexp" +#pragma lib "oldlibregexp.a" + +typedef struct Resub Resub; +typedef struct Reclass Reclass; +typedef struct Reinst Reinst; +typedef struct Reprog Reprog; + +/* + * Sub expression matches + */ +struct Resub{ + union + { + char *sp; + Rune *rsp; + }; + union + { + char *ep; + Rune *rep; + }; +}; + +/* + * character class, each pair of rune's defines a range + */ +struct Reclass{ + Rune *end; + Rune spans[64]; +}; + +/* + * Machine instructions + */ +struct Reinst{ + int type; + union { + Reclass *cp; /* class pointer */ + Rune r; /* character */ + int subid; /* sub-expression id for RBRA and LBRA */ + Reinst *right; /* right child of OR */ + }; + union { /* regexp relies on these two being in the same union */ + Reinst *left; /* left child of OR */ + Reinst *next; /* next instruction for CAT & LBRA */ + }; +}; + +/* + * Reprogram definition + */ +struct Reprog{ + Reinst *startinst; /* start pc */ + Reclass class[16]; /* .data */ + Reinst firstinst[5]; /* .text */ +}; + +extern Reprog *regcomp(char*); +extern Reprog *regcomplit(char*); +extern Reprog *regcompnl(char*); +extern void regerror(char*); +extern int regexec(Reprog*, char*, Resub*, int); +extern void regsub(char*, char*, int, Resub*, int); +extern int rregexec(Reprog*, Rune*, Resub*, int); +extern void rregsub(Rune*, Rune*, int, Resub*, int); diff --git a/sys/src/libregexp/regaux.c b/sys/src/libregexp/regaux.c deleted file mode 100644 index a7d52eca8..000000000 --- a/sys/src/libregexp/regaux.c +++ /dev/null @@ -1,113 +0,0 @@ -#include -#include -#include "regexp.h" -#include "regcomp.h" - - -/* - * save a new match in mp - */ -extern void -_renewmatch(Resub *mp, int ms, Resublist *sp) -{ - int i; - - if(mp==0 || ms<=0) - return; - if(mp[0].sp==0 || sp->m[0].spm[0].sp==mp[0].sp && sp->m[0].ep>mp[0].ep)){ - for(i=0; im[i]; - for(; iinst; p++){ - if(p->inst == ip){ - if(sep->m[0].sp < p->se.m[0].sp){ - if(ms > 1) - p->se = *sep; - else - p->se.m[0] = sep->m[0]; - } - return 0; - } - } - p->inst = ip; - if(ms > 1) - p->se = *sep; - else - p->se.m[0] = sep->m[0]; - (++p)->inst = 0; - return p; -} - -/* - * same as renewthread, but called with - * initial empty start pointer. - */ -extern Relist* -_renewemptythread(Relist *lp, /* _relist to add to */ - Reinst *ip, /* instruction to add */ - int ms, - char *sp) /* pointers to subexpressions */ -{ - Relist *p; - - for(p=lp; p->inst; p++){ - if(p->inst == ip){ - if(sp < p->se.m[0].sp) { - if(ms > 1) - memset(&p->se, 0, sizeof(p->se)); - p->se.m[0].sp = sp; - } - return 0; - } - } - p->inst = ip; - if(ms > 1) - memset(&p->se, 0, sizeof(p->se)); - p->se.m[0].sp = sp; - (++p)->inst = 0; - return p; -} - -extern Relist* -_rrenewemptythread(Relist *lp, /* _relist to add to */ - Reinst *ip, /* instruction to add */ - int ms, - Rune *rsp) /* pointers to subexpressions */ -{ - Relist *p; - - for(p=lp; p->inst; p++){ - if(p->inst == ip){ - if(rsp < p->se.m[0].rsp) { - if(ms > 1) - memset(&p->se, 0, sizeof(p->se)); - p->se.m[0].rsp = rsp; - } - return 0; - } - } - p->inst = ip; - if(ms > 1) - memset(&p->se, 0, sizeof(p->se)); - p->se.m[0].rsp = rsp; - (++p)->inst = 0; - return p; -} diff --git a/sys/src/libregexp/regcomp.h b/sys/src/libregexp/regcomp.h deleted file mode 100644 index 402fe7d5c..000000000 --- a/sys/src/libregexp/regcomp.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * substitution list - */ -#define NSUBEXP 32 -typedef struct Resublist Resublist; -struct Resublist -{ - Resub m[NSUBEXP]; -}; - -/* - * Actions and Tokens (Reinst types) - * - * 02xx are operators, value == precedence - * 03xx are tokens, i.e. operands for operators - */ -#define RUNE 0177 -#define OPERATOR 0200 /* Bitmask of all operators */ -#define START 0200 /* Start, used for marker on stack */ -#define RBRA 0201 /* Right bracket, ) */ -#define LBRA 0202 /* Left bracket, ( */ -#define OR 0203 /* Alternation, | */ -#define CAT 0204 /* Concatentation, implicit operator */ -#define STAR 0205 /* Closure, * */ -#define PLUS 0206 /* a+ == aa* */ -#define QUEST 0207 /* a? == a|nothing, i.e. 0 or 1 a's */ -#define ANY 0300 /* Any character except newline, . */ -#define ANYNL 0301 /* Any character including newline, . */ -#define NOP 0302 /* No operation, internal use only */ -#define BOL 0303 /* Beginning of line, ^ */ -#define EOL 0304 /* End of line, $ */ -#define CCLASS 0305 /* Character class, [] */ -#define NCCLASS 0306 /* Negated character class, [] */ -#define END 0377 /* Terminate: match found */ - -/* - * regexec execution lists - */ -#define LISTSIZE 10 -#define BIGLISTSIZE (25*LISTSIZE) -typedef struct Relist Relist; -struct Relist -{ - Reinst* inst; /* Reinstruction of the thread */ - Resublist se; /* matched subexpressions in this thread */ -}; -typedef struct Reljunk Reljunk; -struct Reljunk -{ - Relist* relist[2]; - Relist* reliste[2]; - int starttype; - Rune startchar; - char* starts; - char* eol; - Rune* rstarts; - Rune* reol; -}; - -extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*); -extern void _renewmatch(Resub*, int, Resublist*); -extern Relist* _renewemptythread(Relist*, Reinst*, int, char*); -extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*); -- cgit v1.2.3