w3m

Unnamed repository; edit this file to name it for gitweb.
git clone https://logand.com/git/w3m.git/
Log | Files | Refs | README

commit 2d86c8596ca1e22ab98c584d378832852978be1b
parent af2e1adbb7a2ac1882762e4c2518fddbc0763670
Author: ukai <ukai>
Date:   Tue,  3 Dec 2002 15:35:09 +0000

[w3m-dev 03509] HTML parser
* file.c (close_textarea): delete
	(HTMLtagproc1): rewrite
			delete HTML_EOL
			move HTML_LISTING, HTML_N_LISTING
			add HTML_PRE_PLAIN, HTML_N_PRE_PLAIN
			add HTML_PLAINTEXT
			end_tag
	(HTMLlineproc0): s/str/line/
			rewrite
	(completeHTMLstream): </textarea> if necessary
* fm.h (struct readbuffer): delete ignore_tag
			add end_tag
	(RB_XMPMODE): deleted
	(RB_LSTMODE): deleted
	(RB_SCRIPT): added
	(RB_STYLE): added
	(RB_*): renumber
	(R_ST_EOL): added
	(R_ST_*): renumber
	(ST_IS_TAG): check R_ST_EOL
* form.c (form_fputs_decode): remove <eol> handling
* frame.c (newFrame): remove_space()
	(CASE_TABLE_TAG): added
	(createFrameFile): rewrite
* html.c (TagMAP): delete eol
		add pre_plain, /pre_plain
* html.h (HTML_EOL): deleted
	(HTML_PRE_PLAIN): added
	(HTML_N_PRE_PLAIN): added
* table.c (visible_length): rewrite
	(visible_length_plain): added
	(maximum_visible_length_plain): added
	(do_refill): R_ST_EOL
	(table_close_select): end_tag
	(table_close_textarea): end_tag
	(TAG_ACTION_PLAIN): added
	(feed_table_tag): rewrite
	(feed_table): rewrite
* table.h (TBLM_*) reassign
	(struct table_mode): delete ignore_tag
			add end_tag
* tagtable.tab (eol): deleted
		(pre_plain): added
		(/pre_plain): added
From: Hironori SAKAMOTO <hsaka@mth.biglobe.ne.jp>

Diffstat:
MChangeLog | 48++++++++++++++++++++++++++++++++++++++++++++++++
Mfile.c | 325+++++++++++++++++++++++++++++++++++--------------------------------------------
Mfm.h | 64+++++++++++++++++++++++++++++++---------------------------------
Mform.c | 10----------
Mframe.c | 158+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Mhtml.c | 5+++--
Mhtml.h | 7++++---
Mtable.c | 254+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
Mtable.h | 28++++++++++++----------------
Mtagtable.tab | 3++-
10 files changed, 544 insertions(+), 358 deletions(-)

diff --git a/ChangeLog b/ChangeLog @@ -1,3 +1,51 @@ +2002-12-04 Hironori SAKAMOTO <hsaka@mth.biglobe.ne.jp> + + * [w3m-dev 03509] HTML parser + * file.c (close_textarea): delete + (HTMLtagproc1): rewrite + delete HTML_EOL + move HTML_LISTING, HTML_N_LISTING + add HTML_PRE_PLAIN, HTML_N_PRE_PLAIN + add HTML_PLAINTEXT + end_tag + (HTMLlineproc0): s/str/line/ + rewrite + (completeHTMLstream): </textarea> if necessary + * fm.h (struct readbuffer): delete ignore_tag + add end_tag + (RB_XMPMODE): deleted + (RB_LSTMODE): deleted + (RB_SCRIPT): added + (RB_STYLE): added + (RB_*): renumber + (R_ST_EOL): added + (R_ST_*): renumber + (ST_IS_TAG): check R_ST_EOL + * form.c (form_fputs_decode): remove <eol> handling + * frame.c (newFrame): remove_space() + (CASE_TABLE_TAG): added + (createFrameFile): rewrite + * html.c (TagMAP): delete eol + add pre_plain, /pre_plain + * html.h (HTML_EOL): deleted + (HTML_PRE_PLAIN): added + (HTML_N_PRE_PLAIN): added + * table.c (visible_length): rewrite + (visible_length_plain): added + (maximum_visible_length_plain): added + (do_refill): R_ST_EOL + (table_close_select): end_tag + (table_close_textarea): end_tag + (TAG_ACTION_PLAIN): added + (feed_table_tag): rewrite + (feed_table): rewrite + * table.h (TBLM_*) reassign + (struct table_mode): delete ignore_tag + add end_tag + * tagtable.tab (eol): deleted + (pre_plain): added + (/pre_plain): added + 2002-12-03 Hironori SAKAMOTO <hsaka@mth.biglobe.ne.jp> * [w3m-dev 03505] Re: coredump when ssl error diff --git a/file.c b/file.c @@ -34,7 +34,6 @@ static FILE *lessopen_stream(char *path); static Buffer *loadcmdout(char *cmd, Buffer *(*loadproc) (URLFile *, Buffer *), Buffer *defaultbuf); -static void close_textarea(struct html_feed_environ *h_env); static void addnewline(Buffer *buf, char *line, Lineprop *prop, #ifdef USE_ANSI_COLOR Linecolor *color, @@ -4064,10 +4063,6 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env) flushline(h_env, obuf, envs[h_env->envc].indent, 1, h_env->limit); h_env->blank_lines = 0; return 1; - case HTML_EOL: - if ((obuf->flag & RB_PREMODE) && obuf->pos > envs[h_env->envc].indent) - flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); - return 1; case HTML_H: if (!(obuf->flag & (RB_PREMODE | RB_IGNORE_P))) { flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); @@ -4366,46 +4361,74 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env) if (obuf->nobr_level == 0) obuf->flag &= ~RB_NOBR; return 0; - case HTML_LISTING: + case HTML_PRE_PLAIN: CLOSE_P; - flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); - obuf->flag |= (RB_LSTMODE | RB_IGNORE_P); - /* istr = str; */ + if (!(obuf->flag & RB_IGNORE_P)) { + flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); + do_blankline(h_env, obuf, envs[h_env->envc].indent, 0, + h_env->limit); + } + obuf->flag |= (RB_PRE | RB_IGNORE_P); return 1; - case HTML_N_LISTING: + case HTML_N_PRE_PLAIN: CLOSE_P; - flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); - obuf->flag &= ~RB_LSTMODE; + if (!(obuf->flag & RB_IGNORE_P)) { + flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); + do_blankline(h_env, obuf, envs[h_env->envc].indent, 0, + h_env->limit); + obuf->flag |= RB_IGNORE_P; + } + obuf->flag &= ~RB_PRE; return 1; + case HTML_LISTING: case HTML_XMP: + case HTML_PLAINTEXT: CLOSE_P; - flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); - obuf->flag |= (RB_XMPMODE | RB_IGNORE_P); - /* istr = str; */ + if (!(obuf->flag & RB_IGNORE_P)) { + flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); + do_blankline(h_env, obuf, envs[h_env->envc].indent, 0, + h_env->limit); + } + obuf->flag |= (RB_PLAIN | RB_IGNORE_P); + switch (cmd) { + case HTML_LISTING: + obuf->end_tag = HTML_N_LISTING; + break; + case HTML_XMP: + obuf->end_tag = HTML_N_XMP; + break; + case HTML_PLAINTEXT: + obuf->end_tag = MAX_HTMLTAG; + break; + } return 1; + case HTML_N_LISTING: case HTML_N_XMP: CLOSE_P; - flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); - obuf->flag &= ~RB_XMPMODE; + if (!(obuf->flag & RB_IGNORE_P)) { + flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); + do_blankline(h_env, obuf, envs[h_env->envc].indent, 0, + h_env->limit); + obuf->flag |= RB_IGNORE_P; + } + obuf->flag &= ~RB_PLAIN; + obuf->end_tag = 0; return 1; case HTML_SCRIPT: - obuf->flag |= RB_IGNORE; - obuf->ignore_tag = Strnew_charp("</script>"); - return 1; - case HTML_N_SCRIPT: - /* should not be reached */ + obuf->flag |= RB_SCRIPT; + obuf->end_tag = HTML_N_SCRIPT; return 1; case HTML_STYLE: - obuf->flag |= RB_IGNORE; - obuf->ignore_tag = Strnew_charp("</style>"); + obuf->flag |= RB_STYLE; + obuf->end_tag = HTML_N_STYLE; return 1; - case HTML_N_STYLE: - /* should not be reached */ + case HTML_N_SCRIPT: + obuf->flag &= ~RB_SCRIPT; + obuf->end_tag = 0; return 1; - case HTML_PLAINTEXT: - flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); - obuf->flag |= RB_PLAIN; - /* istr = str; */ + case HTML_N_STYLE: + obuf->flag &= ~RB_STYLE; + obuf->end_tag = 0; return 1; case HTML_A: if (obuf->anchor) @@ -4513,7 +4536,7 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env) table_mode[obuf->table_level].indent_level = 0; table_mode[obuf->table_level].nobr_level = 0; table_mode[obuf->table_level].caption = 0; - table_mode[obuf->table_level].ignore_tag = NULL; + table_mode[obuf->table_level].end_tag = 0; /* HTML_UNKNOWN */ #ifndef TABLE_EXPAND tables[obuf->table_level]->total_width = width; #else @@ -4572,9 +4595,11 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env) if (tmp) HTMLlineproc1(tmp->ptr, h_env); obuf->flag |= RB_INSELECT; + obuf->end_tag = HTML_N_SELECT; return 1; case HTML_N_SELECT: obuf->flag &= ~RB_INSELECT; + obuf->end_tag = 0; tmp = process_n_select(); if (tmp) HTMLlineproc1(tmp->ptr, h_env); @@ -4587,9 +4612,14 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env) if (tmp) HTMLlineproc1(tmp->ptr, h_env); obuf->flag |= RB_INTXTA; + obuf->end_tag = HTML_N_TEXTAREA; return 1; case HTML_N_TEXTAREA: - close_textarea(h_env); + obuf->flag &= ~RB_INTXTA; + obuf->end_tag = 0; + tmp = process_n_textarea(); + if (tmp) + HTMLlineproc1(tmp->ptr, h_env); return 1; case HTML_ISINDEX: p = ""; @@ -5448,10 +5478,9 @@ table_width(struct html_feed_environ *h_env, int table_level) /* HTML processing first pass */ void -HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal) +HTMLlineproc0(char *line, struct html_feed_environ *h_env, int internal) { Lineprop mode; - char *q; int cmd; struct readbuffer *obuf = h_env->obuf; int indent, delta; @@ -5467,25 +5496,12 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal) (obuf->flag & RB_PREMODE) ? 'P' : ' ', (obuf->table_level >= 0) ? 'T' : ' ', (obuf->flag & RB_INTXTA) ? 'X' : ' ', - (obuf->flag & RB_IGNORE) ? 'I' : ' '); - fprintf(f, "HTMLlineproc1(\"%s\",%d,%lx)\n", str, h_env->limit, + (obuf->flag & (RB_SCRIPT | RB_STYLE)) ? 'S' : ' '); + fprintf(f, "HTMLlineproc1(\"%s\",%d,%lx)\n", line, h_env->limit, (unsigned long)h_env); fclose(f); } -#if 0 - /* comment processing */ - if (obuf->status == R_ST_CMNT || obuf->status == R_ST_NCMNT3 || - obuf->status == R_ST_IRRTAG) { - while (*str != '\0' && obuf->status != R_ST_NORMAL) { - next_status(*str, &obuf->status); - str++; - } - if (obuf->status != R_ST_NORMAL) - return; - } -#endif - tokbuf = Strnew(); table_start: @@ -5496,132 +5512,93 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal) tbl_width = table_width(h_env, level); } - while (*str != '\0') { + while (*line != '\0') { + char *str, *p; int is_tag = FALSE; - int pre_mode = (obuf->table_level >= 0) ? - tbl_mode->pre_mode & TBLM_PLAIN : obuf->flag & RB_PLAINMODE; - - if (obuf->flag & RB_PLAIN) - goto read_as_plain; /* don't process tag */ + int pre_mode = (obuf->table_level >= 0) ? tbl_mode->pre_mode : + obuf->flag; + int end_tag = (obuf->table_level >= 0) ? tbl_mode->end_tag : + obuf->end_tag; - if (ST_IS_COMMENT(obuf->status)) { - read_token(h_env->tagbuf, &str, &obuf->status, pre_mode, 1); - if (obuf->status != R_ST_NORMAL) - return; - if (pre_mode) { - is_tag = TRUE; - q = h_env->tagbuf->ptr; - goto read_as_pre_mode; - } - continue; - } - if (*str == '<' || ST_IS_TAG(obuf->status)) { + if (*line == '<' || obuf->status != R_ST_NORMAL) { /* * Tag processing */ - if (ST_IS_TAG(obuf->status)) { -/*** continuation of a tag ***/ - read_token(h_env->tagbuf, &str, &obuf->status, pre_mode, 1); - } + if (obuf->status == R_ST_EOL) + obuf->status = R_ST_NORMAL; else { - if (!REALLY_THE_BEGINNING_OF_A_TAG(str)) { - /* this is NOT a beginning of a tag */ - obuf->status = R_ST_NORMAL; - if (pre_mode) - goto read_as_pre_mode; - HTMLlineproc1("&lt;", h_env); - str++; - continue; - } - read_token(h_env->tagbuf, &str, &obuf->status, pre_mode, 0); - } -#if 0 - if (ST_IS_COMMENT(obuf->status)) { - if ((obuf->table_level >= 0) ? tbl_mode->pre_mode & TBLM_IGNORE - : obuf->flag & RB_IGNORE) - /* within ignored tag, such as * - * <script>..</script>, don't process comment. */ - obuf->status = R_ST_NORMAL; - return; + read_token(h_env->tagbuf, &line, &obuf->status, + pre_mode & RB_PREMODE, obuf->status != R_ST_NORMAL); + if (obuf->status != R_ST_NORMAL) + return; } -#endif if (h_env->tagbuf->length == 0) continue; - if (obuf->status != R_ST_NORMAL) { - if (!pre_mode) { - if (Strlastchar(h_env->tagbuf) == '\n') - Strchop(h_env->tagbuf); - if (ST_IS_REAL_TAG(obuf->status)) - Strcat_char(h_env->tagbuf, ' '); + str = h_env->tagbuf->ptr; + if (*str == '<') { + if (str[1] && REALLY_THE_BEGINNING_OF_A_TAG(str)) + is_tag = TRUE; + else if (!(pre_mode & (RB_PLAIN | RB_INTXTA | RB_INSELECT | + RB_SCRIPT | RB_STYLE))) { + line = Strnew_m_charp(str + 1, line, NULL)->ptr; + str = "&lt;"; } - if ((obuf->table_level >= 0) - ? ((tbl_mode->pre_mode & TBLM_IGNORE) && - !TAG_IS(h_env->tagbuf->ptr, tbl_mode->ignore_tag->ptr, - tbl_mode->ignore_tag->length - 1)) - : ((obuf->flag & RB_IGNORE) && - !TAG_IS(h_env->tagbuf->ptr, obuf->ignore_tag->ptr, - obuf->ignore_tag->length - 1))) - /* within ignored tag, such as * - * <script>..</script>, don't process tag. */ - obuf->status = R_ST_NORMAL; - continue; } - is_tag = TRUE; - q = h_env->tagbuf->ptr; + } + else { + read_token(tokbuf, &line, &obuf->status, pre_mode & RB_PREMODE, 0); + if (obuf->status != R_ST_NORMAL) /* R_ST_AMP ? */ + continue; + str = tokbuf->ptr; } - read_as_pre_mode: - if (obuf->flag & (RB_INTXTA | RB_INSELECT | RB_IGNORE)) { - cmd = HTML_UNKNOWN; - if (!is_tag) { - read_token(tokbuf, &str, &obuf->status, - (obuf->flag & RB_INTXTA) ? 1 : 0, 0); - if (obuf->status != R_ST_NORMAL) - continue; - q = tokbuf->ptr; - } - else { - char *p = q; - cmd = gethtmlcmd(&p); - } - - /* textarea */ - if (obuf->flag & RB_INTXTA) { - if (cmd == HTML_N_TEXTAREA) - goto proc_normal; - feed_textarea(q); + if (pre_mode & (RB_PLAIN | RB_INTXTA | RB_INSELECT | RB_SCRIPT | + RB_STYLE)) { + if (is_tag) { + p = str; + if ((tag = parse_tag(&p, internal))) { + if (tag->tagid == end_tag || + (pre_mode & RB_INSELECT && tag->tagid == HTML_N_FORM)) + goto proc_normal; + } } - else if (obuf->flag & RB_INSELECT) { - if (cmd == HTML_N_SELECT || cmd == HTML_N_FORM) + /* select */ + if (pre_mode & RB_INSELECT) { + if (obuf->table_level >= 0) goto proc_normal; - feed_select(q); + feed_select(str); + continue; } - /* script */ - else if (obuf->flag & RB_IGNORE) { - if (TAG_IS(q, obuf->ignore_tag->ptr, - obuf->ignore_tag->length - 1)) { - obuf->flag &= ~RB_IGNORE; + if (is_tag) { + if (strncmp(str, "<!--", 4) && (p = strchr(str + 1, '<'))) { + str = Strnew_charp_n(str, p - str)->ptr; + line = Strnew_m_charp(p, line, NULL)->ptr; } + is_tag = FALSE; } - continue; + if (obuf->table_level >= 0) + goto proc_normal; + /* textarea */ + if (pre_mode & RB_INTXTA) { + feed_textarea(str); + continue; + } + /* script */ + if (pre_mode & RB_SCRIPT) + continue; + /* style */ + if (pre_mode & RB_STYLE) + continue; } + proc_normal: if (obuf->table_level >= 0) { /* * within table: in <table>..</table>, all input tokens * are fed to the table renderer, and then the renderer * makes HTML output. */ - - if (!is_tag) { - read_token(tokbuf, &str, &obuf->status, - tbl_mode->pre_mode & TBLM_PREMODE, 0); - if (obuf->status != R_ST_NORMAL) - continue; - q = tokbuf->ptr; - } - - switch (feed_table(tbl, q, tbl_mode, tbl_width, internal)) { + switch (feed_table(tbl, str, tbl_mode, tbl_width, internal)) { case 0: /* </table> tag */ obuf->table_level--; @@ -5629,14 +5606,13 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal) continue; end_table(tbl); if (obuf->table_level >= 0) { - Str tmp; struct table *tbl0 = tables[obuf->table_level]; - tmp = Sprintf("<table_alt tid=%d>", tbl0->ntable); + str = Sprintf("<table_alt tid=%d>", tbl0->ntable)->ptr; pushTable(tbl0, tbl); tbl = tbl0; tbl_mode = &table_mode[obuf->table_level]; tbl_width = table_width(h_env, obuf->table_level); - feed_table(tbl, tmp->ptr, tbl_mode, tbl_width, TRUE); + feed_table(tbl, str, tbl_mode, tbl_width, TRUE); continue; /* continue to the next */ } @@ -5659,27 +5635,17 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal) continue; case 1: /* <table> tag */ - goto proc_normal; + break; default: continue; } } - proc_normal: if (is_tag) { /*** Beginning of a new tag ***/ - if ((tag = parse_tag(&q, internal))) + if ((tag = parse_tag(&str, internal))) cmd = tag->tagid; else - cmd = HTML_UNKNOWN; - if (((obuf->flag & RB_XMPMODE) && cmd != HTML_N_XMP) || - ((obuf->flag & RB_LSTMODE) && cmd != HTML_N_LISTING)) { - Str tmp = Strdup(h_env->tagbuf); - Strcat_charp(tmp, str); - str = tmp->ptr; - goto read_as_plain; - } - if (cmd == HTML_UNKNOWN) continue; /* process tags */ if (HTMLtagproc1(tag, h_env) == 0) { @@ -5701,12 +5667,12 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal) continue; } - read_as_plain: + while (*str) { mode = get_mctype(str); delta = get_mclen(mode); if (obuf->flag & (RB_SPECIAL & ~RB_NOBR)) { char ch = *str; - if (!(obuf->flag & RB_PLAINMODE) && (*str == '&')) { + if (!(obuf->flag & RB_PLAIN) && (*str == '&')) { char *p = str; int ech = getescapechar(&p); if (ech == '\n' || ech == '\r') { @@ -5739,7 +5705,7 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal) % Tabstop != 0); str++; } - else if (obuf->flag & RB_PLAINMODE) { + else if (obuf->flag & RB_PLAIN) { char *p = html_quote_char(*str); if (p) { push_charp(obuf, 1, p, PC_ASCII); @@ -5820,10 +5786,10 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal) #endif /* FORMAT_NICE */ HTMLlineproc1(line->ptr, h_env); } + } } } - if (!(obuf->flag & (RB_PREMODE | RB_NOBR | RB_INTXTA | RB_INSELECT - | RB_PLAINMODE | RB_IGNORE))) { + if (!(obuf->flag & (RB_SPECIAL | RB_INTXTA | RB_INSELECT))) { char *tp; int i = 0; @@ -5849,17 +5815,6 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal) } } -static void -close_textarea(struct html_feed_environ *h_env) -{ - Str tmp; - - h_env->obuf->flag &= ~RB_INTXTA; - tmp = process_n_textarea(); - if (tmp != NULL) - HTMLlineproc1(tmp->ptr, h_env); -} - extern char *NullLine; extern Lineprop NullProp[]; @@ -6135,6 +6090,8 @@ completeHTMLstream(struct html_feed_environ *h_env, struct readbuffer *obuf) push_tag(obuf, "</u>", HTML_N_U); obuf->in_under = 0; } + if (obuf->flag & RB_INTXTA) + HTMLlineproc1("</textarea>", h_env); /* for unbalanced select tag */ if (obuf->flag & RB_INSELECT) HTMLlineproc1("</select>", h_env); @@ -6142,7 +6099,7 @@ completeHTMLstream(struct html_feed_environ *h_env, struct readbuffer *obuf) /* for unbalanced table tag */ while (obuf->table_level >= 0) { table_mode[obuf->table_level].pre_mode - &= ~(TBLM_IGNORE | TBLM_XMP | TBLM_LST); + &= ~(TBLM_SCRIPT | TBLM_STYLE | TBLM_PLAIN); HTMLlineproc1("</table>", h_env); } } @@ -6351,8 +6308,10 @@ loadHTMLstream(URLFile *f, Buffer *newBuf, FILE * src, int internal) #endif /* USE_NNTP */ HTMLlineproc0(lineBuf2->ptr, &htmlenv1, internal); } - if (obuf.status != R_ST_NORMAL) - HTMLlineproc0(correct_irrtag(obuf.status)->ptr, &htmlenv1, internal); + if (obuf.status != R_ST_NORMAL) { + obuf.status = R_ST_EOL; + HTMLlineproc0("\n", &htmlenv1, internal); + } obuf.status = R_ST_NORMAL; completeHTMLstream(&htmlenv1, &obuf); flushline(&htmlenv1, &obuf, 0, 2, htmlenv1.limit); diff --git a/fm.h b/fm.h @@ -534,7 +534,7 @@ struct readbuffer { long flag_stack[RB_STACK_SIZE]; int flag_sp; int status; - Str ignore_tag; + unsigned char end_tag; short table_level; short nobr_level; Str anchor; @@ -557,33 +557,30 @@ struct readbuffer { #define in_stand fontstat[2] #define RB_PRE 0x01 -#define RB_XMPMODE 0x02 -#define RB_LSTMODE 0x04 +#define RB_SCRIPT 0x02 +#define RB_STYLE 0x04 #define RB_PLAIN 0x08 -#define RB_LEFT 0x80000 -#define RB_CENTER 0x10 -#define RB_RIGHT 0x20 -#define RB_ALIGN (RB_LEFT| RB_CENTER | RB_RIGHT) -#define RB_NOBR 0x40 -#define RB_P 0x80 -#define RB_PRE_INT 0x100 -#define RB_PREMODE (RB_PRE | RB_PRE_INT) -#define RB_SPECIAL (RB_PRE|RB_XMPMODE|RB_LSTMODE|RB_PLAIN|RB_NOBR|RB_PRE_INT) -#define RB_PLAINMODE (RB_XMPMODE|RB_LSTMODE|RB_PLAIN) - -#define RB_IN_DT 0x200 -#define RB_INTXTA 0x400 -#define RB_INSELECT 0x800 -#define RB_IGNORE 0x1000 -#define RB_INSEL 0x2000 -#define RB_IGNORE_P 0x4000 -#define RB_TITLE 0x8000 -#define RB_NFLUSHED 0x10000 -#define RB_NOFRAMES 0x20000 -#define RB_INTABLE 0x40000 +#define RB_LEFT 0x10 +#define RB_CENTER 0x20 +#define RB_RIGHT 0x40 +#define RB_ALIGN (RB_LEFT | RB_CENTER | RB_RIGHT) +#define RB_NOBR 0x80 +#define RB_P 0x100 +#define RB_PRE_INT 0x200 +#define RB_IN_DT 0x400 +#define RB_INTXTA 0x800 +#define RB_INSELECT 0x1000 +#define RB_IGNORE_P 0x2000 +#define RB_TITLE 0x4000 +#define RB_NFLUSHED 0x8000 +#define RB_NOFRAMES 0x10000 +#define RB_INTABLE 0x20000 +#define RB_PREMODE (RB_PRE | RB_PRE_INT | RB_SCRIPT | RB_STYLE | RB_PLAIN | RB_INTXTA) +#define RB_SPECIAL (RB_PRE | RB_PRE_INT | RB_SCRIPT | RB_STYLE | RB_PLAIN | RB_NOBR) +#define RB_PLAIN_PRE 0x40000 #ifdef FORMAT_NICE -#define RB_FILL 0x200000 +#define RB_FILL 0x80000 #endif /* FORMAT_NICE */ #define RB_GET_ALIGN(obuf) ((obuf)->flag&RB_ALIGN) @@ -605,17 +602,18 @@ struct readbuffer { #define R_ST_DQUOTE 4 /* within double quote */ #define R_ST_EQL 5 /* = */ #define R_ST_AMP 6 /* within ampersand quote */ -#define R_ST_CMNT1 7 /* <! */ -#define R_ST_CMNT2 8 /* <!- */ -#define R_ST_CMNT 9 /* within comment */ -#define R_ST_NCMNT1 10 /* comment - */ -#define R_ST_NCMNT2 11 /* comment -- */ -#define R_ST_NCMNT3 12 /* comment -- space */ -#define R_ST_IRRTAG 13 /* within irregular tag */ +#define R_ST_EOL 7 /* end of file */ +#define R_ST_CMNT1 8 /* <! */ +#define R_ST_CMNT2 9 /* <!- */ +#define R_ST_CMNT 10 /* within comment */ +#define R_ST_NCMNT1 11 /* comment - */ +#define R_ST_NCMNT2 12 /* comment -- */ +#define R_ST_NCMNT3 13 /* comment -- space */ +#define R_ST_IRRTAG 14 /* within irregular tag */ #define ST_IS_REAL_TAG(s) ((s)==R_ST_TAG||(s)==R_ST_TAG0||(s)==R_ST_EQL) #define ST_IS_COMMENT(s) ((s)>=R_ST_CMNT1) -#define ST_IS_TAG(s) ((s)!=R_ST_NORMAL&&(s)!=R_ST_AMP&&!ST_IS_COMMENT(s)) +#define ST_IS_TAG(s) ((s)!=R_ST_NORMAL&&(s)!=R_ST_AMP&&!ST_IS_COMMENT(s)&&(s)!=R_ST_EOL) /* is this '<' really means the beginning of a tag? */ #define REALLY_THE_BEGINNING_OF_A_TAG(p) \ diff --git a/form.c b/form.c @@ -430,16 +430,6 @@ form_fputs_decode(Str s, FILE * f) for (p = s->ptr; *p;) { switch (*p) { - case '<': - if (!strncasecmp(p, "<eol>", 5)) { - Strcat_char(z, '\n'); - p += 5; - } - else { - Strcat_char(z, *p); - p++; - } - break; #if !defined( __CYGWIN__ ) && !defined( __EMX__ ) case '\r': if (*(p + 1) == '\n') diff --git a/frame.c b/frame.c @@ -97,7 +97,7 @@ newFrame(struct parsed_tag *tag, Buffer *buf) body->baseURL = baseURL(buf); if (tag) { if (parsedtag_get_value(tag, ATTR_SRC, &p)) - body->url = url_quote_conv(p, buf->document_code); + body->url = url_quote_conv(remove_space(p), buf->document_code); if (parsedtag_get_value(tag, ATTR_NAME, &p) && *p != '_') body->name = url_quote_conv(p, buf->document_code); } @@ -412,6 +412,23 @@ frame_download_source(struct frame_body *b, ParsedURL *currentURL, return ret_frameset; } +#define CASE_TABLE_TAG \ + case HTML_TR:\ + case HTML_N_TR:\ + case HTML_TD:\ + case HTML_N_TD:\ + case HTML_TH:\ + case HTML_N_TH:\ + case HTML_THEAD:\ + case HTML_N_THEAD:\ + case HTML_TBODY:\ + case HTML_N_TBODY:\ + case HTML_TFOOT:\ + case HTML_N_TFOOT:\ + case HTML_COLGROUP:\ + case HTML_N_COLGROUP:\ + case HTML_COL + static int createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level, int force_reload) @@ -467,8 +484,10 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level, struct frameset *f_frameset; int i = c + r * f->col; char *p = ""; + int status = R_ST_NORMAL; Str tok = Strnew(); - int status; + int pre_mode = 0; + int end_tag = 0; frame = f->frame[i]; @@ -557,12 +576,13 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level, break; } do { - status = R_ST_NORMAL; + int is_tag = FALSE; + char *q; + struct parsed_tag *tag; + do { if (*p == '\0') { Str tmp = StrmyUFgets(&f2); - if (tmp->length == 0 && status != R_ST_NORMAL) - tmp = correct_irrtag(status); if (tmp->length == 0) break; #ifdef JP_CHARSET @@ -573,21 +593,67 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level, cleanup_line(tmp, HTML_MODE); p = tmp->ptr; } - if (status == R_ST_NORMAL) - read_token(tok, &p, &status, 1, 0); - else if (ST_IS_COMMENT(status)) - read_token(tok, &p, &status, 0, 0); - else - read_token(tok, &p, &status, 1, 1); + read_token(tok, &p, &status, 1, status != R_ST_NORMAL); } while (status != R_ST_NORMAL); if (tok->length == 0) continue; if (tok->ptr[0] == '<') { + is_tag = TRUE; + if (pre_mode & (RB_PLAIN | RB_INTXTA | RB_SCRIPT | + RB_STYLE)) { + q = tok->ptr; + if ((tag = parse_tag(&q, FALSE)) && + tag->tagid == end_tag) { + if (pre_mode & RB_PLAIN) { + fputs("</PRE_PLAIN>", f1); + pre_mode = 0; + end_tag = 0; + goto token_end; + } + pre_mode = 0; + end_tag = 0; + goto proc_normal; + } + if (strncmp(tok->ptr, "<!--", 4) && + (q = strchr(tok->ptr + 1, '<'))) { + tok = Strnew_charp_n(tok->ptr, q - tok->ptr); + p = Strnew_m_charp(q, p, NULL)->ptr; + status = R_ST_NORMAL; + } + is_tag = FALSE; + } + else if (pre_mode & RB_INSELECT) { + q = tok->ptr; + if ((tag = parse_tag(&q, FALSE))) { + if ((tag->tagid == end_tag) || + (tag->tagid == HTML_N_FORM)) { + if (tag->tagid == HTML_N_FORM) + fputs("</SELECT>", f1); + pre_mode = 0; + end_tag = 0; + goto proc_normal; + } + if (t_stack) { + switch (tag->tagid) { + case HTML_TABLE: + case HTML_N_TABLE: + CASE_TABLE_TAG: + fputs("</SELECT>", f1); + pre_mode = 0; + end_tag = 0; + goto proc_normal; + } + } + } + } + } + + proc_normal: + if (is_tag) { char *q = tok->ptr; int j, a_target = 0; - struct parsed_tag *tag; ParsedURL url; if (!(tag = parse_tag(&q, FALSE))) @@ -603,7 +669,7 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level, case HTML_BASE: /* "BASE" is prohibit tag */ if (parsedtag_get_value(tag, ATTR_HREF, &q)) { - q = url_quote_conv(q, code); + q = url_quote_conv(remove_space(q), code); parseURL(q, &base, NULL); } if (parsedtag_get_value(tag, ATTR_TARGET, &q)) { @@ -660,18 +726,7 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level, goto token_end; } break; - case HTML_THEAD: - case HTML_N_THEAD: - case HTML_TBODY: - case HTML_N_TBODY: - case HTML_TFOOT: - case HTML_N_TFOOT: - case HTML_TD: - case HTML_N_TD: - case HTML_TR: - case HTML_N_TR: - case HTML_TH: - case HTML_N_TH: + CASE_TABLE_TAG: /* table_tags MUST be in table stack */ if (!t_stack) { Strshrinkfirst(tok, 1); @@ -682,6 +737,37 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level, } break; + case HTML_SELECT: + pre_mode = RB_INSELECT; + end_tag = HTML_N_SELECT; + break; + case HTML_TEXTAREA: + pre_mode = RB_INTXTA; + end_tag = HTML_N_TEXTAREA; + break; + case HTML_SCRIPT: + pre_mode = RB_SCRIPT; + end_tag = HTML_N_SCRIPT; + break; + case HTML_STYLE: + pre_mode = RB_STYLE; + end_tag = HTML_N_STYLE; + break; + case HTML_LISTING: + pre_mode = RB_PLAIN; + end_tag = HTML_N_LISTING; + fputs("<PRE_PLAIN>", f1); + goto token_end; + case HTML_XMP: + pre_mode = RB_PLAIN; + end_tag = HTML_N_XMP; + fputs("<PRE_PLAIN>", f1); + goto token_end; + case HTML_PLAINTEXT: + pre_mode = RB_PLAIN; + end_tag = MAX_HTMLTAG; + fputs("<PRE_PLAIN>", f1); + goto token_end; default: break; } @@ -693,7 +779,8 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level, if (!tag->value[j]) break; tag->value[j] = - url_quote_conv(tag->value[j], code); + url_quote_conv(remove_space(tag->value[j]), + code); parseURL2(tag->value[j], &url, &base); if (url.scheme == SCM_UNKNOWN || #ifndef USE_W3MMAILER @@ -748,11 +835,28 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level, Strfputs(tok, f1); } else { - Strfputs(tok, f1); + if (pre_mode & (RB_PLAIN | RB_INTXTA)) + fprintf(f1, "%s", html_quote(tok->ptr)); + else + Strfputs(tok, f1); } token_end: Strclear(tok); } while (*p != '\0' || !iseos(f2.stream)); + if (pre_mode & RB_PLAIN) + fputs("</PRE_PLAIN>\n", f1); + else if (pre_mode & RB_INTXTA) + fputs("</TEXTAREA></FORM>\n", f1); + else if (pre_mode & RB_INSELECT) + fputs("</SELECT></FORM>\n", f1); + else if (pre_mode & (RB_SCRIPT | RB_STYLE)) { + if (status != R_ST_NORMAL) + fputs(correct_irrtag(status)->ptr, f1); + if (pre_mode & RB_SCRIPT) + fputs("</SCRIPT>\n", f1); + else if (pre_mode & RB_STYLE) + fputs("</STYLE>\n", f1); + } while (t_stack--) fputs("</TABLE>\n", f1); UFclose(&f2); diff --git a/html.c b/html.c @@ -248,8 +248,9 @@ TagInfo TagMAP[MAX_HTMLTAG] = { {"/input_alt", NULL, 0, TFLG_INT | TFLG_END}, /* 123 HTML_N_INPUT_ALT */ {"img_alt", ALST_IMG_ALT, MAXA_IMG_ALT, TFLG_INT}, /* 124 HTML_IMG_ALT */ {"/img_alt", NULL, 0, TFLG_INT | TFLG_END}, /* 125 HTML_N_IMG_ALT */ - {"eol", NULL, 0, TFLG_INT}, /* 126 HTML_EOL */ - {" ", ALST_NOP, MAXA_NOP, TFLG_INT}, /* 127 HTML_NOP */ + {" ", ALST_NOP, MAXA_NOP, TFLG_INT}, /* 126 HTML_NOP */ + {"pre_plain", NULL, 0, TFLG_INT}, /* 127 HTML_PRE_PLAIN */ + {"/pre_plain", NULL, 0, TFLG_INT | TFLG_END}, /* 128 HTML_N_PRE_PLAIN */ }; TagAttrInfo AttrMAP[MAX_TAGATTR] = { diff --git a/html.h b/html.h @@ -213,10 +213,11 @@ typedef struct { #define HTML_N_INPUT_ALT 123 #define HTML_IMG_ALT 124 #define HTML_N_IMG_ALT 125 -#define HTML_EOL 126 -#define HTML_NOP 127 +#define HTML_NOP 126 +#define HTML_PRE_PLAIN 127 +#define HTML_N_PRE_PLAIN 128 -#define MAX_HTMLTAG 128 +#define MAX_HTMLTAG 129 /* Tag attribute */ diff --git a/table.c b/table.c @@ -490,11 +490,11 @@ visible_length(char *str) else if (status == R_ST_AMP) { if (prev_status == R_ST_NORMAL) { Strclear(tagbuf); + len--; amp_len = 0; } else { Strcat_char(tagbuf, *str); - len++; amp_len++; } } @@ -502,10 +502,13 @@ visible_length(char *str) Strcat_char(tagbuf, *str); r2 = tagbuf->ptr; t = getescapecmd(&r2); - len += strlen(t) - 1 - amp_len; - if (*r2 != '\0') { - str -= strlen(r2); + if (!*r2 && (*t == '\r' || *t == '\n')) { + if (len > max_len) + max_len = len; + len = 0; } + else + len += strlen(t) + strlen(r2); } else if (status == R_ST_NORMAL && ST_IS_REAL_TAG(prev_status)) { ; @@ -516,22 +519,42 @@ visible_length(char *str) len++; } while ((visible_length_offset + len) % Tabstop != 0); } - else if (*str == '\n' || *str == '\r') { + else if (*str == '\r' || *str == '\n') { + len--; if (len > max_len) max_len = len; len = 0; } - else if (*str == '\n' || *str == '\r') - len = 0; str++; } if (status == R_ST_AMP) { r2 = tagbuf->ptr; t = getescapecmd(&r2); - len += strlen(t) - 1 - amp_len; - if (*r2 != '\0') { - len += strlen(r2); + if (*t != '\r' && *t != '\n') + len += strlen(t) + strlen(r2); + } + return len > max_len ? len : max_len; +} + +int +visible_length_plain(char *str) +{ + int len = 0, max_len = 0; + + while (*str) { + if (*str == '\t') { + do { + len++; + } while ((visible_length_offset + len) % Tabstop != 0); + } + else if (*str == '\r' || *str == '\n') { + if (len > max_len) + max_len = len; + len = 0; } + else + len++; + str++; } return len > max_len ? len : max_len; } @@ -558,6 +581,28 @@ maximum_visible_length(char *str) return maxlen; } +int +maximum_visible_length_plain(char *str) +{ + int maxlen, len; + + visible_length_offset = 0; + maxlen = visible_length_plain(str); + + if (!strchr(str, '\t')) + return maxlen; + + for (visible_length_offset = 1; visible_length_offset < Tabstop; + visible_length_offset++) { + len = visible_length_plain(str); + if (maxlen < len) { + maxlen = len; + break; + } + } + return maxlen; +} + void align(TextLine *lbuf, int width, int mode) { @@ -810,6 +855,10 @@ do_refill(struct table *tbl, int row, int col, int maxlimit) else HTMLlineproc1(l->ptr, &h_env); } + if (obuf.status != R_ST_NORMAL) { + obuf.status = R_ST_EOL; + HTMLlineproc1("\n", &h_env); + } completeHTMLstream(&h_env, &obuf); flushline(&h_env, &obuf, 0, 2, h_env.limit); if (tbl->border_mode == BORDER_NONE) { @@ -2361,6 +2410,7 @@ table_close_select(struct table *tbl, struct table_mode *mode, int width) { Str tmp = process_n_select(); mode->pre_mode &= ~TBLM_INSELECT; + mode->end_tag = 0; feed_table1(tbl, tmp, mode, width); } @@ -2369,6 +2419,7 @@ table_close_textarea(struct table *tbl, struct table_mode *mode, int width) { Str tmp = process_n_textarea(); mode->pre_mode &= ~TBLM_INTXTA; + mode->end_tag = 0; feed_table1(tbl, tmp, mode, width); } @@ -2394,6 +2445,7 @@ table_close_anchor0(struct table *tbl, struct table_mode *mode) #define TAG_ACTION_FEED 1 #define TAG_ACTION_TABLE 2 #define TAG_ACTION_N_TABLE 3 +#define TAG_ACTION_PLAIN 4 #define CASE_TABLE_TAG \ case HTML_TABLE:\ @@ -2429,53 +2481,62 @@ feed_table_tag(struct table *tbl, char *line, struct table_mode *mode, cmd = tag->tagid; - if (mode->pre_mode & TBLM_IGNORE) { - switch (cmd) { - case HTML_N_STYLE: - mode->pre_mode &= ~TBLM_STYLE; + if (mode->pre_mode & TBLM_PLAIN) { + if (mode->end_tag == cmd) { + mode->pre_mode &= ~TBLM_PLAIN; + mode->end_tag = 0; + feed_table_block_tag(tbl, line, mode, 0, cmd); return TAG_ACTION_NONE; - case HTML_N_SCRIPT: + } + return TAG_ACTION_PLAIN; + } + if (mode->pre_mode & TBLM_INTXTA) { + if (mode->end_tag == cmd) { + table_close_textarea(tbl, mode, width); + return TAG_ACTION_NONE; + } + return TAG_ACTION_FEED; + } + if (mode->pre_mode & TBLM_SCRIPT) { + if (mode->end_tag == cmd) { mode->pre_mode &= ~TBLM_SCRIPT; + mode->end_tag = 0; return TAG_ACTION_NONE; - default: + } + return TAG_ACTION_PLAIN; + } + if (mode->pre_mode & TBLM_STYLE) { + if (mode->end_tag == cmd) { + mode->pre_mode &= ~TBLM_STYLE; + mode->end_tag = 0; return TAG_ACTION_NONE; } + return TAG_ACTION_PLAIN; } - - switch (cmd) { - CASE_TABLE_TAG: - if (mode->caption) - mode->caption = 0; - if (mode->pre_mode & (TBLM_IGNORE | TBLM_XMP | TBLM_LST)) - mode->pre_mode &= ~(TBLM_IGNORE | TBLM_XMP | TBLM_LST); - if (mode->pre_mode & TBLM_INTXTA) - table_close_textarea(tbl, mode, width); - if (mode->pre_mode & TBLM_INSELECT) + /* failsafe: a tag other than <option></option>and </select> in * + * <select> environment is regarded as the end of <select>. */ + if (mode->pre_mode & TBLM_INSELECT) { + switch (cmd) { + CASE_TABLE_TAG: + case HTML_N_FORM: + case HTML_N_SELECT: /* mode->end_tag */ table_close_select(tbl, mode, width); + break; + default: + return TAG_ACTION_FEED; + } } - if (mode->caption) { switch (cmd) { + CASE_TABLE_TAG: case HTML_N_CAPTION: mode->caption = 0; - return TAG_ACTION_NONE; + break; default: return TAG_ACTION_FEED; } } - /* failsafe: a tag other than <option></option>and </select> in * - * <select> environment is regarded as the end of <select>. */ - if (mode->pre_mode & TBLM_INSELECT && cmd == HTML_N_FORM) { - table_close_select(tbl, mode, width); - } - - if ((mode->pre_mode & TBLM_INSELECT && cmd != HTML_N_SELECT) || - (mode->pre_mode & TBLM_INTXTA && cmd != HTML_N_TEXTAREA) || - (mode->pre_mode & TBLM_XMP && cmd != HTML_N_XMP) || - (mode->pre_mode & TBLM_LST && cmd != HTML_N_LISTING)) - return TAG_ACTION_FEED; - if (mode->pre_mode & TBLM_PRE) { switch (cmd) { case HTML_NOBR: @@ -2742,33 +2803,33 @@ feed_table_tag(struct table *tbl, char *line, struct table_mode *mode, case HTML_LI: case HTML_PRE: case HTML_N_PRE: + case HTML_HR: case HTML_LISTING: - case HTML_N_LISTING: case HTML_XMP: - case HTML_N_XMP: case HTML_PLAINTEXT: + case HTML_PRE_PLAIN: + case HTML_N_PRE_PLAIN: feed_table_block_tag(tbl, line, mode, 0, cmd); switch (cmd) { case HTML_PRE: + case HTML_PRE_PLAIN: mode->pre_mode |= TBLM_PRE; break; case HTML_N_PRE: + case HTML_N_PRE_PLAIN: mode->pre_mode &= ~TBLM_PRE; break; case HTML_LISTING: - mode->pre_mode |= TBLM_LST; - break; - case HTML_N_LISTING: - mode->pre_mode &= ~TBLM_LST; + mode->pre_mode |= TBLM_PLAIN; + mode->end_tag = HTML_N_LISTING; break; case HTML_XMP: - mode->pre_mode |= TBLM_XMP; - break; - case HTML_N_XMP: - mode->pre_mode &= ~TBLM_XMP; + mode->pre_mode |= TBLM_PLAIN; + mode->end_tag = HTML_N_XMP; break; case HTML_PLAINTEXT: - mode->pre_mode |= TBLM_PLAINTEXT; + mode->pre_mode |= TBLM_PLAIN; + mode->end_tag = MAX_HTMLTAG; break; } break; @@ -2857,9 +2918,7 @@ feed_table_tag(struct table *tbl, char *line, struct table_mode *mode, if (tmp) feed_table1(tbl, tmp, mode, width); mode->pre_mode |= TBLM_INSELECT; - break; - case HTML_N_SELECT: - table_close_select(tbl, mode, width); + mode->end_tag = HTML_N_SELECT; break; case HTML_OPTION: /* nothing */ @@ -2880,9 +2939,7 @@ feed_table_tag(struct table *tbl, char *line, struct table_mode *mode, if (tmp) feed_table1(tbl, tmp, mode, width); mode->pre_mode |= TBLM_INTXTA; - break; - case HTML_N_TEXTAREA: - table_close_textarea(tbl, mode, width); + mode->end_tag = HTML_N_TEXTAREA; break; case HTML_A: table_close_anchor0(tbl, mode); @@ -2969,11 +3026,11 @@ feed_table_tag(struct table *tbl, char *line, struct table_mode *mode, break; case HTML_SCRIPT: mode->pre_mode |= TBLM_SCRIPT; - mode->ignore_tag = Strnew_charp("</script>"); + mode->end_tag = HTML_N_SCRIPT; break; case HTML_STYLE: mode->pre_mode |= TBLM_STYLE; - mode->ignore_tag = Strnew_charp("</style>"); + mode->end_tag = HTML_N_STYLE; break; case HTML_N_A: table_close_anchor0(tbl, mode); @@ -2994,7 +3051,6 @@ feed_table_tag(struct table *tbl, char *line, struct table_mode *mode, case HTML_TEXTAREA_INT: case HTML_N_TEXTAREA_INT: case HTML_IMG_ALT: - case HTML_EOL: case HTML_RULE: case HTML_N_RULE: default: @@ -3014,25 +3070,29 @@ feed_table(struct table *tbl, char *line, struct table_mode *mode, Str tmp; struct table_linfo *linfo = &tbl->linfo; - if (*line == '<') { - int action; + if (*line == '<' && line[1] && REALLY_THE_BEGINNING_OF_A_TAG(line)) { struct parsed_tag *tag; p = line; tag = parse_tag(&p, internal); if (tag) { - action = feed_table_tag(tbl, line, mode, width, tag); - if (action == TAG_ACTION_NONE) + switch (feed_table_tag(tbl, line, mode, width, tag)) { + case TAG_ACTION_NONE: return -1; - else if (action == TAG_ACTION_N_TABLE) + case TAG_ACTION_N_TABLE: return 0; - else if (action == TAG_ACTION_TABLE) { + case TAG_ACTION_TABLE: return 1; + case TAG_ACTION_PLAIN: + break; + case TAG_ACTION_FEED: + default: + if (parsedtag_need_reconstruct(tag)) + line = parsedtag2str(tag)->ptr; } - else if (parsedtag_need_reconstruct(tag)) - line = parsedtag2str(tag)->ptr; } else { - if (!(mode->pre_mode & TBLM_PLAIN)) + if (!(mode->pre_mode & (TBLM_PLAIN | TBLM_INTXTA | TBLM_INSELECT | + TBLM_SCRIPT | TBLM_STYLE))) return -1; } } @@ -3040,7 +3100,9 @@ feed_table(struct table *tbl, char *line, struct table_mode *mode, Strcat_charp(tbl->caption, line); return -1; } - if (mode->pre_mode & TBLM_IGNORE) + if (mode->pre_mode & TBLM_SCRIPT) + return -1; + if (mode->pre_mode & TBLM_STYLE) return -1; if (mode->pre_mode & TBLM_INTXTA) { feed_textarea(line); @@ -3100,7 +3162,7 @@ feed_table(struct table *tbl, char *line, struct table_mode *mode, } line = tmp->ptr; } - if (!(mode->pre_mode & TBLM_SPECIAL)) { + if (!(mode->pre_mode & (TBLM_SPECIAL & ~TBLM_NOBR))) { if (!(tbl->flag & TBL_IN_COL) || linfo->prev_spaces != 0) while (IS_SPACE(*line)) line++; @@ -3114,25 +3176,51 @@ feed_table(struct table *tbl, char *line, struct table_mode *mode, i = skip_space(tbl, line, linfo, !(mode->pre_mode & TBLM_NOBR)); addcontentssize(tbl, visible_length(line) - i); setwidth(tbl, mode); + pushdata(tbl, tbl->row, tbl->col, line); } - else { - /* <pre> mode or something like it */ + else if (mode->pre_mode & TBLM_PRE_INT) { check_rowcol(tbl, mode); - if (mode->pre_mode & TBLM_PRE_INT && mode->nobr_offset < 0) + if (mode->nobr_offset < 0) mode->nobr_offset = tbl->tabcontentssize; - if (mode->pre_mode & TBLM_PLAIN) - i = strlen(line); - else - i = maximum_visible_length(line); - addcontentssize(tbl, i); + addcontentssize(tbl, maximum_visible_length(line)); setwidth(tbl, mode); - if (!(mode->pre_mode & TBLM_PRE_INT)) { - p = line + strlen(line) - 1; - if (*p == '\r' || *p == '\n') + pushdata(tbl, tbl->row, tbl->col, line); + } + else { + /* <pre> mode or something like it */ + check_rowcol(tbl, mode); + while (*line) { + int nl = FALSE; + if ((p = strchr(line, '\r')) || (p = strchr(line, '\n'))) { + if (*p == '\r' && p[1] == '\n') + p++; + if (p[1]) { + p++; + tmp = Strnew_charp_n(line, p - line); + line = p; + p = tmp->ptr; + } + else { + p = line; + line = ""; + } + nl = TRUE; + } + else { + p = line; + line = ""; + } + if (mode->pre_mode & TBLM_PLAIN) + i = maximum_visible_length_plain(p); + else + i = maximum_visible_length(p); + addcontentssize(tbl, i); + setwidth(tbl, mode); + if (nl) clearcontentssize(tbl, mode); + pushdata(tbl, tbl->row, tbl->col, p); } } - pushdata(tbl, tbl->row, tbl->col, line); return -1; } diff --git a/table.h b/table.h @@ -114,21 +114,17 @@ struct table { int sloppy_width; }; -#define TBLM_PRE 1 -#define TBLM_NOBR 2 -#define TBLM_XMP 4 -#define TBLM_LST 8 -#define TBLM_PLAINTEXT 16 -#define TBLM_PRE_INT 32 -#define TBLM_INTXTA 64 -#define TBLM_INSELECT 128 -#define TBLM_PREMODE (TBLM_PRE|TBLM_INTXTA|TBLM_INSELECT|TBLM_PLAIN) -#define TBLM_SPECIAL (TBLM_PRE|TBLM_PRE_INT|TBLM_PLAIN) -#define TBLM_PLAIN (TBLM_PLAINTEXT|TBLM_XMP|TBLM_LST) -#define TBLM_SCRIPT 256 -#define TBLM_STYLE 512 -#define TBLM_IGNORE (TBLM_SCRIPT|TBLM_STYLE) -#define TBLM_ANCHOR 1024 +#define TBLM_PRE RB_PRE +#define TBLM_SCRIPT RB_SCRIPT +#define TBLM_STYLE RB_STYLE +#define TBLM_PLAIN RB_PLAIN +#define TBLM_NOBR RB_NOBR +#define TBLM_PRE_INT RB_PRE_INT +#define TBLM_INTXTA RB_INTXTA +#define TBLM_INSELECT RB_INSELECT +#define TBLM_PREMODE (TBLM_PRE | TBLM_PRE_INT | TBLM_SCRIPT | TBLM_STYLE | TBLM_PLAIN | TBLM_INTXTA) +#define TBLM_SPECIAL (TBLM_PRE | TBLM_PRE_INT | TBLM_SCRIPT | TBLM_STYLE | TBLM_PLAIN | TBLM_NOBR) +#define TBLM_ANCHOR 0x100000 #define uchar unsigned char #define ushort unsigned short @@ -139,7 +135,7 @@ struct table_mode { short nobr_offset; char nobr_level; short anchor_offset; - Str ignore_tag; + unsigned char end_tag; }; /* Local Variables: */ diff --git a/tagtable.tab b/tagtable.tab @@ -154,7 +154,6 @@ input_alt HTML_INPUT_ALT /input_alt HTML_N_INPUT_ALT img_alt HTML_IMG_ALT /img_alt HTML_N_IMG_ALT -eol HTML_EOL pre_int HTML_PRE_INT /pre_int HTML_N_PRE_INT bgsound HTML_BGSOUND @@ -167,3 +166,5 @@ select_int HTML_SELECT_INT option_int HTML_OPTION_INT textarea_int HTML_TEXTAREA_INT /textarea_int HTML_N_TEXTAREA_INT +pre_plain HTML_PRE_PLAIN +/pre_plain HTML_N_PRE_PLAIN