diff options
author | cinap_lenrek <cinap_lenrek@centraldogma> | 2011-09-20 04:14:29 +0200 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@centraldogma> | 2011-09-20 04:14:29 +0200 |
commit | 6c91d99ce29f8cb6c47c0c535c9162f248f1fdb1 (patch) | |
tree | 63ef4ea08cd0e5a2a23edf9c44de52d1d5bcc7df | |
parent | 7a29aa57cbc0a5ccf015ded763b54046235275ec (diff) | |
download | plan9front-6c91d99ce29f8cb6c47c0c535c9162f248f1fdb1.tar.xz |
uhtml: remove trailing utf BOM marker, html2ms: fix underline handling and escaping
-rw-r--r-- | sys/src/cmd/html2ms.c | 56 | ||||
-rw-r--r-- | sys/src/cmd/page.c | 6 | ||||
-rw-r--r-- | sys/src/cmd/uhtml.c | 5 |
3 files changed, 46 insertions, 21 deletions
diff --git a/sys/src/cmd/html2ms.c b/sys/src/cmd/html2ms.c index dc98e59f3..7adba6b95 100644 --- a/sys/src/cmd/html2ms.c +++ b/sys/src/cmd/html2ms.c @@ -142,8 +142,24 @@ onfont(Text *text, Tag *tag) } void -ona(Text *text, Tag *) +restoreunderline(Text *text, Tag *tag) { + text->underline = tag->restore; + emit(text, ""); +} + +void +ona(Text *text, Tag *tag) +{ + int i; + + for(i=0; i<tag->nattr; i++) + if(cistrcmp(tag->attr[i].attr, "href") == 0) + break; + if(i == tag->nattr) + return; + tag->restore = text->underline; + tag->close = restoreunderline; text->underline = 1; } @@ -207,10 +223,13 @@ parsecomment(void) if(n != 7 || cistrncmp(buf, "[CDATA[", 7)) continue; while((c = Bgetc(&in)) > 0){ - if(c == ']') - if(Bgetc(&in) == ']') - if(Bgetc(&in) == '>') - return; + if(c == ']'){ + if(Bgetc(&in) == ']'){ + if(Bgetc(&in) != '>') + Bungetc(&in); + return; + } + } } } } @@ -425,18 +444,17 @@ parsetext(Text *text, Tag *tag) case '\r': case ' ': case '\t': - if(text->pre == 0){ - text->space = 1; + text->space = 1; + if(text->pre == 0) continue; - } default: if(r == '\n' || r == '\r') text->pos = 0; if(text->space){ text->space = 0; if(text->underline){ - emit(text, ""); - text->pos = Bprint(&out, ".UL "); + emit(text, ".UL "); + text->pos = 1; } else if(text->pos >= 70){ text->pos = 0; Bputc(&out, '\n'); @@ -445,16 +463,15 @@ parsetext(Text *text, Tag *tag) Bputc(&out, ' '); } } - if(text->pos == 0 && r == '.'){ - text->pos++; - Bputc(&out, ' '); - } - text->pos++; - if(r == 0xA0){ + if(text->pos == 0 && r == '.') + text->pos += Bprint(&out, "\\&"); + else if(r == '\\') + text->pos += Bprint(&out, "\\&\\"); + else if(r == 0xA0){ r = ' '; - Bputc(&out, '\\'); + text->pos += Bprint(&out, "\\"); } - Bprint(&out, "%C", r); + text->pos += Bprint(&out, "%C", r); } } } @@ -473,7 +490,10 @@ main(void) Binit(&out, 1, OWRITE); memset(&text, 0, sizeof(text)); + + text.font = "R"; text.output = 1; + parsetext(&text, nil); emit(&text, "\n"); } diff --git a/sys/src/cmd/page.c b/sys/src/cmd/page.c index 12be5dc51..4b1d1d804 100644 --- a/sys/src/cmd/page.c +++ b/sys/src/cmd/page.c @@ -623,9 +623,9 @@ popenfile(Page *p) p->data = "lp -dstdout"; p->open = popengs; } - else if(cistrncmp(buf, "<?xml", 5) == 0 || - cistrncmp(buf, "<!DOCTYPE", 9) == 0 || - cistrncmp(buf, "<HTML", 5) == 0){ + else if(cistrstr(buf, "<?xml") || + cistrstr(buf, "<!DOCTYPE") || + cistrstr(buf, "<HTML")){ p->data = "uhtml | html2ms | troff -ms | lp -dstdout"; p->open = popengs; } diff --git a/sys/src/cmd/uhtml.c b/sys/src/cmd/uhtml.c index 49503877e..c18c76202 100644 --- a/sys/src/cmd/uhtml.c +++ b/sys/src/cmd/uhtml.c @@ -62,6 +62,11 @@ main(int argc, char *argv[]) if((nbuf = read(0, buf, sizeof(buf)-1)) < 0) sysfatal("read: %r"); buf[nbuf] = 0; + + /* useless BOM marker */ + if(memcmp(buf, "\xEF\xBB\xBF", 3)==0) + memmove(buf, buf+3, nbuf-3); + for(;;){ if(s = cistrstr(buf, "encoding=")) if(s = strval(s+9)){ |