diff options
author | cinap_lenrek <cinap_lenrek@felloff.net> | 2015-09-24 05:13:03 +0200 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@felloff.net> | 2015-09-24 05:13:03 +0200 |
commit | bba6d26ca26a60690d50b3fe41a8778abd66cff0 (patch) | |
tree | a4422c245464ffbbf529163e776790148062a015 | |
parent | 2b5ab91775b97d9e53b2c92a45164703855029a2 (diff) | |
download | plan9front-bba6d26ca26a60690d50b3fe41a8778abd66cff0.tar.xz |
cpp: fix memory corruption due to input buffer relocation
the dynamic input buffer resize code (fillbuf()) is broken as
the calling code assumes that memory wont relocate. instead
of trying to work out all the cases where this happens, i'm
getting rid of fillbuf() and just read the whole file into
memory in setsource().
the bug could be reproduced with something as simple as:
@{for(i in `{seq 1 10000}){echo $i ', \'; }} | cpp
-rw-r--r-- | sys/src/cmd/cpp/cpp.h | 3 | ||||
-rw-r--r-- | sys/src/cmd/cpp/hideset.c | 2 | ||||
-rw-r--r-- | sys/src/cmd/cpp/lex.c | 98 | ||||
-rw-r--r-- | sys/src/cmd/cpp/tokens.c | 2 |
4 files changed, 19 insertions, 86 deletions
diff --git a/sys/src/cmd/cpp/cpp.h b/sys/src/cmd/cpp/cpp.h index 8299614a7..2f70ffcfe 100644 --- a/sys/src/cmd/cpp/cpp.h +++ b/sys/src/cmd/cpp/cpp.h @@ -29,7 +29,6 @@ enum kwtype { KIF, KIFDEF, KIFNDEF, KELIF, KELSE, KENDIF, KINCLUDE, KDEFINE, #define ISMAC 010 /* builtin macro, e.g. __LINE__ */ #define ISVARMAC 020 /* variadic macro */ -#define EOB 0xFE /* sentinel for end of input buffer */ #define EOFC 0xFD /* sentinel for end of input file */ #define XPWS 1 /* token flag: white space to assure token sep. */ @@ -58,7 +57,6 @@ typedef struct source { uchar *inb; /* input buffer */ uchar *inp; /* input pointer */ uchar *inl; /* end of input */ - int ins; /* input buffer size */ int fd; /* input source */ int ifdepth; /* conditional nesting in include */ struct source *next; /* stack for #include */ @@ -102,7 +100,6 @@ void *domalloc(int); void dofree(void *); void error(enum errtype, char *, ...); void flushout(void); -int fillbuf(Source *); int trigraph(Source *); int foldline(Source *); Nlist *lookup(Token *, int); diff --git a/sys/src/cmd/cpp/hideset.c b/sys/src/cmd/cpp/hideset.c index 15bed4c21..b784e9599 100644 --- a/sys/src/cmd/cpp/hideset.c +++ b/sys/src/cmd/cpp/hideset.c @@ -53,7 +53,7 @@ newhideset(int hs, Nlist *np) return hs; if (nhidesets >= maxhidesets) { maxhidesets = 3*maxhidesets/2+1; - hidesets = (Hideset *)realloc(hidesets, (sizeof (Hideset *))*maxhidesets); + hidesets = (Hideset *)dorealloc(hidesets, (sizeof (Hideset *))*maxhidesets); } hs1 = (Hideset)domalloc(len*sizeof(Hideset)); memmove(hs1, nhs, len*sizeof(Hideset)); diff --git a/sys/src/cmd/cpp/lex.c b/sys/src/cmd/cpp/lex.c index 30367c3f7..e90423e93 100644 --- a/sys/src/cmd/cpp/lex.c +++ b/sys/src/cmd/cpp/lex.c @@ -42,7 +42,7 @@ enum state { CC1, CC2, WS1, PLUS1, MINUS1, STAR1, SLASH1, PCT1, SHARP1, CIRC1, GT1, GT2, LT1, LT2, OR1, AND1, ASG1, NOT1, DOTS1, S_SELF=MAXSTATE, S_SELFB, S_EOF, S_NL, S_EOFSTR, - S_STNL, S_COMNL, S_EOFCOM, S_COMMENT, S_EOB, S_WS, S_NAME + S_STNL, S_COMNL, S_EOFCOM, S_COMMENT, S_WS, S_NAME }; int tottok; @@ -271,7 +271,7 @@ expandlex(void) } } } - /* install special cases for ? (trigraphs), \ (splicing), runes, and EOB */ + /* install special cases for ? (trigraphs), \ (splicing), runes */ for (i=0; i<MAXSTATE; i++) { for (j=0; j<0xFF; j++) if (j=='?' || j=='\\' || UTF2(j) || UTF3(j)) { @@ -279,7 +279,6 @@ expandlex(void) bigfsm[j][i] = ~bigfsm[j][i]; bigfsm[j][i] &= ~QBSBIT; } - bigfsm[EOB][i] = ~S_EOB; if (bigfsm[EOFC][i]>=0) bigfsm[EOFC][i] = ~S_EOF; } @@ -313,18 +312,8 @@ gettokens(Tokenrow *trp, int reset) tp = trp->lp; ip = s->inp; - if (reset) { + if (reset) s->lineinc = 0; - if (ip>=s->inl) { /* nothing in buffer */ - s->inl = s->inb; - fillbuf(s); - ip = s->inp = s->inb; - } else if (ip >= s->inb+(3*s->ins/4)) { - memmove(s->inb, ip, 4+s->inl-ip); - s->inl = s->inb+(s->inl-ip); - ip = s->inp = s->inb; - } - } maxp = &trp->bp[trp->max]; runelen = 1; for (;;) { @@ -409,12 +398,6 @@ gettokens(Tokenrow *trp, int reset) runelen = 1; continue; - case S_EOB: - s->inp = ip; - fillbuf(cursource); - state = oldstate; - continue; - case S_EOF: tp->type = END; tp->len = 0; @@ -445,12 +428,7 @@ gettokens(Tokenrow *trp, int reset) state = COM2; ip += runelen; runelen = 1; - if (ip >= s->inb+(7*s->ins/8)) { /* very long comment */ - memmove(tp->t, ip, 4+s->inl-ip); - s->inl -= ip-tp->t; - ip = tp->t+1; - } - continue; + continue; case S_EOFCOM: error(WARNING, "EOF inside comment"); @@ -478,8 +456,6 @@ trigraph(Source *s) { int c; - while (s->inp+2 >= s->inl && fillbuf(s)!=EOF) - ; if (s->inp[1]!='?') return 0; c = 0; @@ -517,8 +493,6 @@ foldline(Source *s) int ncr = 0; recheck: - while (s->inp+1 >= s->inl && fillbuf(s)!=EOF) - ; if (s->inp[ncr+1] == '\r') { /* nonstandardly, ignore CR before line-folding */ ncr++; goto recheck; @@ -531,37 +505,6 @@ recheck: return 0; } -int -fillbuf(Source *s) -{ - int n; - - while((char *)s->inl+s->ins/8 > (char *)s->inb+s->ins) { - int l = s->inl - s->inb; - int p = s->inp - s->inb; - if(l < 0) - error(FATAL, "negative end of input!?"); - if(p < 0) - error(FATAL, "negative input pointer!?"); - /* double the buffer size and try again */ - s->ins *= 2; - s->inb = dorealloc(s->inb, s->ins); - s->inl = s->inb + l; - s->inp = s->inb + p; - } - if (s->fd<0 || (n=read(s->fd, (char *)s->inl, s->ins/8)) <= 0) - n = 0; - if ((*s->inp&0xff) == EOB) /* sentinel character appears in input */ - *s->inp = EOFC; - s->inl += n; - s->inl[0] = s->inl[1]= s->inl[2]= s->inl[3] = EOB; - if (n==0) { - s->inl[0] = s->inl[1]= s->inl[2]= s->inl[3] = EOFC; - return EOF; - } - return 0; -} - /* * Push down to new source of characters. * If fd>0 and str==NULL, then from a file `name'; @@ -571,7 +514,7 @@ Source * setsource(char *name, int fd, char *str) { Source *s = new(Source); - int len; + int n, len; s->line = 1; s->lineinc = 0; @@ -580,32 +523,25 @@ setsource(char *name, int fd, char *str) s->next = cursource; s->ifdepth = 0; cursource = s; - /* slop at right for EOB */ + /* slop at right for EOFC */ if (str) { len = strlen(str); s->inb = domalloc(len+4); - s->inp = s->inb; - strncpy((char *)s->inp, str, len); + strncpy((char *)s->inb, str, len); } else { - Dir *d; - int junk; - ulong length = 0; - d = dirfstat(fd); - if (d != nil) { - length = d->length; - free(d); - } - junk = length; - if (junk<INS) - junk = INS; - s->inb = domalloc((junk)+4); - s->inp = s->inb; len = 0; + s->inb = nil; + for(;;){ + s->inb = dorealloc(s->inb, len + INS); + if (s->fd<0 || (n=read(s->fd, (char *)s->inb + len, INS)) <= 0) + break; + len += n; + } + s->inb = dorealloc(s->inb, len + 4); } - - s->ins = INS; + s->inp = s->inb; s->inl = s->inp+len; - s->inl[0] = s->inl[1] = EOB; + s->inl[0] = s->inl[1] = s->inl[2] = s->inl[3] = EOFC; return s; } diff --git a/sys/src/cmd/cpp/tokens.c b/sys/src/cmd/cpp/tokens.c index 26ccb087c..1db8b0344 100644 --- a/sys/src/cmd/cpp/tokens.c +++ b/sys/src/cmd/cpp/tokens.c @@ -92,7 +92,7 @@ growtokenrow(Tokenrow *trp) int nlast = trp->lp - trp->bp; trp->max = 3*trp->max/2 + 1; - trp->bp = (Token *)realloc(trp->bp, trp->max*sizeof(Token)); + trp->bp = (Token *)dorealloc(trp->bp, trp->max*sizeof(Token)); trp->lp = &trp->bp[nlast]; trp->tp = &trp->bp[ncur]; return trp->lp; |