Patch: Elm ME+ 2.5 PLalpha62 -> Elm ME+ 2.5 PLalpha63 [4/7] (4/7)
From
Kari Hurtta@21:1/5 to
All on Wed Jan 3 13:52:11 2024
[continued from previous message]
+ CHECK_SBUFFER_LEN(tf_tag_param_error);
+
+ } else {
+
+ if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_lowerascii) &&
+ ((0x0041 /* A */ <= u && u <= 0x005A /* Z */))) {
+ uint16 lower_char = u + 0x0020;
+ add_unicode_to_string(token->atr_name,1,&lower_char);
+ } else {
+ /* Returns no status */
+ add_state_to_string(token->atr_name,token->rchar); + }
+ ADD_SBUFFER_CHAR(tf_tag_param_error);
+ }
+
+ } else {
+ clearit(token->tag_flags,TFLAG_seen_equals); /* No = yet */ +
+ if (0x000A /* LF '\n' (new line) */ == u) {
+ /* Consume newline */
+
+ token->have_nl = 1;
+ reset_state(token->rchar,0);
+
+ EMIT_TOKEN_CLASS(tf_tag_space);
+
+ } else if (0x0009 /* HT '\t' (horizontal tab) */ == u ||
+ 0x000C /* FF '\f' (form feed) */ == u ||
+ 0x0020 /* SPACE */ == u) {
+
+ ADD_SBUFFER_CHAR(tf_tag_space);
+
+ } else if (0x002F /* / */ == u ||
+ 0x003E /* > */ == u) {
+
+
+ token->tag_state = ts_tag_ending; /* Process for /> or > */
+
+ if (len > 0) {
+ EMIT_TOKEN_CLASS(tf_tag_space);
+ }
+
+ } else if (0x003D /* = */ == u) {
+
+ /* Start tag name and set = as first name of tag name */
+ token->atr_name = new_string(token->text_charset);
+
+ /* Returns no status */
+ add_state_to_string(token->atr_name,token->rchar);
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ /* Same time this is displayed? as error */
+
+ EMIT_TOKEN_CLASS(tf_tag_param_error);
+
+ } else {
+
+ if (len > 0) {
+ EMIT_TOKEN_CLASS(tf_tag_space);
+ }
+
+ /* Start tag name and reprocess character */
+ token->atr_name = new_string(token->text_charset);
+ }
+ }
+
+ } else if (ts_tag_comment == token->tag_state) {
+ /* <!-- */
+
+ /* May include token->tag_lookahead */
+
+ /* Not correct but this ends
+ with --> or
+ --!> (with error)
+ */
+
+ /* Enable buffer */
+
+ if (!token->tag_lookahead)
+ token->tag_lookahead = new_string(token->text_charset);
+
+ if (0x000A /* LF '\n' */ == u) {
+ /* Consume newline */
+
+ token->have_nl = 1;
+ reset_state(token->rchar,0);
+
+ free_string(& token->tag_lookahead); /* Do not look across lines */
+
+ EMIT_TOKEN_CLASS(tf_comment_chunk);
+
+ } else if (0x003E /* > */ == u) {
+
+ int lookahead_len;
+ int r;
+
+ /* Returns no status */
+ add_state_to_string(token->tag_lookahead,token->rchar);
+ lookahead_len = string_len(token->tag_lookahead);
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ if (lookahead_len >= 3 &&
+ 0 <= (r = string_matches_ascii(token->tag_lookahead,
+ cs2us("-->"),
+ 0,SMA_op_find_ascii))) { +
+ free_string(& (token->tag_lookahead));
+ token->tag_state = ts_init; /* Tag ended */
+
+ EMIT_TOKEN_CLASS(tf_comment_end);
+ } else if (lookahead_len >= 4 &&
+ 0 <= (r = string_matches_ascii(token->tag_lookahead,
+ cs2us("--!>"),
+ 0,SMA_op_find_ascii))) { +
+ free_string(& (token->tag_lookahead));
+ token->tag_state = ts_init; /* Tag ended */
+
+ EMIT_TOKEN_CLASS(tf_comment_error);
+ }
+
+ CHECK_SBUFFER_LEN(tf_comment_chunk);
+
+ } else if (0x0000 /* NUL */ == u) {
+ uint16 bad_char = UNICODE_BAD_CHAR;
+
+ DPRINT(Debug,20,(&Debug,
+ "get_new_tagfilter_token: Found NUL character when parsing comment text\n"));
+
+ reset_state(token->rchar,0); /* Get next character */
+
+ add_unicode_to_string(token->tag_lookahead,1,&bad_char);
+
+ token->error = 1;
+ add_unicode_to_string(token->sbuffer,1,&bad_char);
+
+ CHECK_SBUFFER_LEN(tf_comment_chunk);
+
+ } else {
+
+ /* Just append other characters */
+ /* Returns no status */
+ add_state_to_string(token->tag_lookahead,token->rchar);
+
+ ADD_SBUFFER_CHAR(tf_comment_chunk);
+ }
+
+ } else if (ts_tag_comment_start == token->tag_state) {
+
+ if (token->tag_lookahead) {
+ if (0x002D /* - */ == u ||
+ 0x003E /* > */ == u) {
+ int lookahead_len;
+
+ /* Returns no status */
+ add_state_to_string(token->tag_lookahead,token->rchar); + lookahead_len = string_len(token->tag_lookahead);
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ if (lookahead_len > 3) {
+ /* Leaks token->tag_lookahead */
+
+ token->tag_state = ts_tag_comment;
+
+ EMIT_TOKEN_CLASS(tf_comment_start);
+ } else if (3 == lookahead_len &&
+ string_matches_ascii(token->tag_lookahead,
+ cs2us("-->"),
+ 0,SMA_op_normal)) {
+ free_string(& (token->tag_lookahead));
+
+ /* <!----> */
+ token->tag_state = ts_init;
+
+ EMIT_TOKEN_CLASS(tf_whole_comment);
+ } else if (2 == lookahead_len &&
+ string_matches_ascii(token->tag_lookahead,
+ cs2us("->"),
+ 0,SMA_op_normal)) {
+ free_string(& (token->tag_lookahead));
+
+ /* <!---> */
+ token->tag_state = ts_init;
+
+ EMIT_TOKEN_CLASS(tf_comment_error);
+ }
+
+ } else {
+
+ /* Leaks token->tag_lookahead */
+
+ token->tag_state = ts_tag_comment;
+
+ EMIT_TOKEN_CLASS(tf_comment_start);
+ }
+
+ } else {
+
+ if (0x000A /* LF '\n' */ == u) {
+ /* Consume newline */
+
+ token->have_nl = 1;
+ reset_state(token->rchar,0);
+
+ token->tag_state = ts_tag_comment;
+
+ EMIT_TOKEN_CLASS(tf_comment_start);
+ } else if (0x002D /* - */ == u) {
+ /* Enable buffer and process again */
+ token->tag_lookahead = new_string(token->text_charset);
+ } else if (0x003E /* > */ == u) {
+ /* Closing on empty comment */
+
+ /* <!--> seen */
+
+ token->tag_state = ts_init;
+
+ EMIT_TOKEN_CLASS(tf_comment_error);
+ } else {
+ token->tag_state = ts_tag_comment;
+
+ EMIT_TOKEN_CLASS(tf_comment_start);
+ }
+ }
+
+ } else if (ts_tag_bang == token->tag_state) {
+
+ if (token->tag_lookahead) {
+ if (0x002D /* - */ == u ||
+ 0x005B /* [ */ == u ||
+ (0x0041 /* A */ <= u && u <= 0x005A /* Z */) ||
+ (0x0061 /* a */ <= u && u <= 0x007A /* z */)) {
+
+ int lookahead_len;
+
+ /* Returns no status */
+ add_state_to_string(token->tag_lookahead,token->rchar); + lookahead_len = string_len(token->tag_lookahead);
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ /* DOCTYPE have 7 characters
+ [CDATA[ have 7 characters
+ -- have 2 characters
+ */
+
+ if (lookahead_len > 7) {
+ free_string(& (token->tag_lookahead));
+
+ /* Bogus <! ... > */
+ token->tag_state = ts_tag_bogus_comment;
+
+ EMIT_TOKEN_CLASS(tf_bcomment_start);
+ } else if (7 == lookahead_len &&
+ string_matches_ascii(token->tag_lookahead,
+ cs2us("[CDATA["),
+ 0,SMA_op_normal)) {
+
+ free_string(& (token->tag_lookahead));
+ /* CDATA is not now supported .... */
+
+ /* TODO: Check that is CDATA allowed */
+
+ token->tag_state = ts_tag_bogus_comment;
+
+ EMIT_TOKEN_CLASS(tf_bcomment_start);
+
+
+ } else if (7 == lookahead_len &&
+ string_matches_ascii(token->tag_lookahead,
+ cs2us("DOCTYPE"),
+ SMA_ignore_case,
+ SMA_op_normal)) {
+
+ /* Seen <!DOCTYPE */
+
+ token->tag_state = ts_tag_doctype_start;
+ token->tag_quote = 0;
+
+ /* Leaks token->tag_lookahead */
+
+ EMIT_TOKEN_CLASS(tf_doctype_start);
+
+ } else if (2 == lookahead_len &&
+ string_matches_ascii(token->tag_lookahead,
+ cs2us("--"),
+ 0,SMA_op_normal)) {
+ free_string(& (token->tag_lookahead));
+
+ /* Seen <!-- */
+ token->tag_state = ts_tag_comment_start;
+ }
+
+
+
+ } else {
+ free_string(& (token->tag_lookahead));
+
+ /* Bogus <! ... > */
+ token->tag_state = ts_tag_bogus_comment;
+
+ EMIT_TOKEN_CLASS(tf_bcomment_start);
+ }
+
+ } else {
+ if (0x002D /* - */ == u ||
+ 0x0044 /* D */ == u ||
+ 0x0064 /* d */ == u ||
+ 0x005B /* [ */ == u) {
+
+ /* Enable buffer and process again */
+ token->tag_lookahead = new_string(token->text_charset);
+ } else {
+ /* Bogus <! ... > */
+
+ if (0x000A /* LF '\n' */ == u) {
+ /* Consume newline */
+
+ token->have_nl = 1;
+ reset_state(token->rchar,0);
+ }
+
+ token->tag_state = ts_tag_bogus_comment;
+
+ EMIT_TOKEN_CLASS(tf_bcomment_start);
+ }
+ }
+
+ } else if (ts_tag_endmark == token->tag_state) {
+
+ if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_enriched_tag) &&
+ ((0x0030 /* 0 */ <= u && u <= 0x0039 /* 9 */) ||
+ (0x0041 /* A */ <= u && u <= 0x005A /* Z */) ||
+ (0x0061 /* a */ <= u && u <= 0x007A /* z */) ||
+ 0x002D /* - */ == u)) {
+
+ /* text/enriched end tag */
+
+ if (len >= MAX_ENRICHED_TOKEN) { /* Too long */
+ EMIT_TOKEN_CLASS(tf_tag_error);
+ }
+
+ if (!token->tag_name)
+ token->tag_name = new_string(token->text_charset);
+
+ if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_lowerascii) && + ((0x0041 /* A */ <= u && u <= 0x005A /* Z */))) {
+ uint16 lower_char = u + 0x0020;
+ add_unicode_to_string(token->tag_name,1,&lower_char);
+
+ } else {
+ /* Returns no status */
+ add_state_to_string(token->tag_name,token->rchar);
+ }
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ } else if (len <= 2 &&
+ isoff(tagfilter->tagflt_mode,TAGFLT_MODE_enriched_tag) &&
+ ((0x0041 /* A */ <= u && u <= 0x005A /* Z */) ||
+ (0x0061 /* a */ <= u && u <= 0x007A /* z */))) {
+
+ /* Start of end tag name */
+
+ if (!token->tag_name)
+ token->tag_name = new_string(token->text_charset);
+
+ if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_lowerascii) && + ((0x0041 /* A */ <= u && u <= 0x005A /* Z */))) {
+ uint16 lower_char = u + 0x0020;
+ add_unicode_to_string(token->tag_name,1,&lower_char);
+ } else {
+ /* Returns no status */
+ add_state_to_string(token->tag_name,token->rchar);
+ }
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ } else if (0x003E /* > */ == u) {
+ if (len > 2) {
+
+ token->tag_state = ts_init;
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ EMIT_TOKEN_CLASS(tf_whole_endtag);
+ } else {
+ token->tag_state = ts_init;
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ EMIT_TOKEN_CLASS(tf_tag_error);
+ }
+ } else if (len > 2 &&
+ isoff(tagfilter->tagflt_mode,TAGFLT_MODE_enriched_tag)) {
+
+ /* Parsing name */
+
+ if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_tag_attributes) &&
+ (0x0009 /* HT '\t' */ == u ||
+ 0x000A /* LF '\n' */ == u ||
+ 0x000C /* FF '\f' */ == u ||
+ 0x0020 /* SPACE */ == u)) {
+
+ if (0x000A /* LF '\n' */ == u) {
+ /* Consume newline */
+
+ token->have_nl = 1;
+ reset_state(token->rchar,0);
+ }
+
+ /* do not include space to name */
+
+ token->tag_state = ts_tag_params;
+
+ EMIT_TOKEN_CLASS(tf_start_endtag);
+
+ } else if (0x0000 /* NUL */ == u) {
+ uint16 bad_char = UNICODE_BAD_CHAR;
+
+ DPRINT(Debug,20,(&Debug,
+ "get_new_tagfilter_token: Found NUL character when parsing end tag name\n"));
+
+ reset_state(token->rchar,0); /* Get next character */ +
+ token->error = 1;
+ add_unicode_to_string(token->sbuffer,1,&bad_char);
+
+ if (!token->tag_name)
+ token->tag_name = new_string(token->text_charset); + add_unicode_to_string(token->tag_name,1,&bad_char);
+
+ CHECK_SBUFFER_LEN(tf_tag_error);
+ } else {
+
+ /* Accepts eveything as tag name? */
+
+ if (!token->tag_name)
+ token->tag_name = new_string(token->text_charset); +
+ if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_lowerascii) &&
+ ((0x0041 /* A */ <= u && u <= 0x005A /* Z */))) {
+ uint16 lower_char = u + 0x0020;
+ add_unicode_to_string(token->tag_name,1,&lower_char);
+ } else {
+ /* Returns no status */
+ add_state_to_string(token->tag_name,token->rchar); + }
+
+ ADD_SBUFFER_CHAR(tf_tag_error);
+ }
+
+ } else if (isoff(tagfilter->tagflt_mode,TAGFLT_MODE_enriched_tag)) {
+
+ /* Bogus </ ... > */
+
+ token->tag_state = ts_tag_bogus_comment;
+
+ if (0x000A /* LF '\n' */ == u) {
+ /* Consume newline */
+
+ token->have_nl = 1;
+ reset_state(token->rchar,0);
+ }
+
+ EMIT_TOKEN_CLASS( tf_bcomment_start);
+
+ } else {
+
+ if (0x000A /* LF '\n' */ == u) {
+ /* Consume newline */
+
+ token->have_nl = 1;
+ reset_state(token->rchar,0);
+ }
+
+ /* Parse error on tag -- do not include character */
+
+ token->tag_state = ts_init;
+
+ EMIT_TOKEN_CLASS(tf_tag_error);
+ }
+
+ } else if (ts_tag_bogus_comment == token->tag_state) {
+
+ if (0x003E /* > */ == u) {
+ token->tag_state = ts_init;
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ EMIT_TOKEN_CLASS(tf_bcomment_end);
+ } else if (0x0000 /* NUL */ == u) {
+ uint16 bad_char = UNICODE_BAD_CHAR;
+
+ DPRINT(Debug,20,(&Debug,
+ "get_new_tagfilter_token: Found NUL character when parsing < ... comment\n"));
+
+ reset_state(token->rchar,0); /* Get next character */
+
+ token->error = 1;
+ add_unicode_to_string(token->sbuffer,1,&bad_char);
+
+ CHECK_SBUFFER_LEN(tf_bcomment_chunk);
+ } else {
+ /* Accepts eveything as comment? */
+
+ if (0x000A /* LF '\n' */ == u) {
+ /* Consume newline */
+
+ token->have_nl = 1;
+ reset_state(token->rchar,0);
+
+ EMIT_TOKEN_CLASS(tf_bcomment_chunk);
+ }
+
+ CHECK_SBUFFER_LEN(tf_bcomment_chunk);
+ }
+
+ } else if (ts_tag_self_closing == token->tag_state) {
+
+ setit(token->tag_flags,TFLAG_self_closing);
+
+ if (0x003E /* > */ == u) {
+
+ token->tag_state = ts_init;
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ clearit(token->tag_flags,TFLAG_self_closing); /* Passed already */
+
+ EMIT_TOKEN_CLASS(tf_selfclosed_tag);
+
+ } else if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_tag_attributes)) {
+
+ /* Do not include to name */
+
+ if (0x000A /* LF '\n' */ == u) {
+ /* Consume newline */
+
+ token->have_nl = 1;
+ reset_state(token->rchar,0);
+ }
+
+ token->tag_state = ts_tag_params;
+
+ EMIT_TOKEN_CLASS(tf_start_tag);
+ } else {
+ /* Back to parsing on tag name */
+
+ token->tag_state = ts_tag_start;
+
+ if (0x000A /* LF '\n' */ == u) {
+ /* Consume newline */
+
+ token->have_nl = 1;
+ reset_state(token->rchar,0);
+
+ EMIT_TOKEN_CLASS(tf_tag_error);
+ } else {
+
+ if (!token->tag_name)
+ token->tag_name = new_string(token->text_charset); +
+ if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_lowerascii) &&
+ ((0x0041 /* A */ <= u && u <= 0x005A /* Z */))) {
+ uint16 lower_char = u + 0x0020;
+ add_unicode_to_string(token->tag_name,1,&lower_char);
+ } else {
+ /* Returns no status */
+ add_state_to_string(token->tag_name,token->rchar); + }
+ }
+ }
+
+ } else if (ts_tag_start == token->tag_state) {
+
+ clearit(token->tag_flags,TFLAG_self_closing); /* CLear possible bogus flag */
+
+ if (1 == len &&
+ ison(tagfilter->tagflt_mode,TAGFLT_MODE_double_smaller) && + 0x003C /* < */ == u) {
+
+ /* text/enriched escaped < */
+
+ /* Note that sbuffer includes all charaters
+ part of token -- not just token value
+ */
+
+ token->tag_state = ts_init;
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ EMIT_TOKEN_CLASS(tf_double_smaller);
+
+ } else if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_enriched_tag) &&
+ ((0x0030 /* 0 */ <= u && u <= 0x0039 /* 9 */) ||
+ (0x0041 /* A */ <= u && u <= 0x005A /* Z */) ||
+ (0x0061 /* a */ <= u && u <= 0x007A /* z */) ||
+ 0x002D /* - */ == u)) {
+
+ /* text/enriched tag */
+
+ if (len >= MAX_ENRICHED_TOKEN) { /* Too long */
+ EMIT_TOKEN_CLASS(tf_tag_error);
+ }
+
+ if (!token->tag_name)
+ token->tag_name = new_string(token->text_charset);
+
+ if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_lowerascii) && + ((0x0041 /* A */ <= u && u <= 0x005A /* Z */))) {
+ uint16 lower_char = u + 0x0020;
+ add_unicode_to_string(token->tag_name,1,&lower_char);
+ } else {
+ /* Returns no status */
+ add_state_to_string(token->tag_name,token->rchar);
+ }
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ } else if (1 == len && tagfilter->doctype_name && 0x0021 /* ! */ == u) {
+ /* Markup declaration */
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ token->tag_state = ts_tag_bang;
+ } else if (1 == len && 0x002F /* / */ == u) {
+ /* end tag */
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ token->tag_state = ts_tag_endmark;
+ } else if (1 == len &&
+ isoff(tagfilter->tagflt_mode,TAGFLT_MODE_enriched_tag) &&
+ ((0x0041 /* A */ <= u && u <= 0x005A /* Z */) ||
+ (0x0061 /* a */ <= u && u <= 0x007A /* z */))) {
+
+ /* Start of tag name */
+
+ if (!token->tag_name)
+ token->tag_name = new_string(token->text_charset);
+
+ if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_lowerascii) && + ((0x0041 /* A */ <= u && u <= 0x005A /* Z */))) {
+ uint16 lower_char = u + 0x0020;
+ add_unicode_to_string(token->tag_name,1,&lower_char);
+
+ } else {
+ /* Returns no status */
+ add_state_to_string(token->tag_name,token->rchar);
+ }
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ } else if (1 == len &&
+ isoff(tagfilter->tagflt_mode,TAGFLT_MODE_enriched_tag) &&
+ 0x003F /* ? */ == u) {
+
+ /* Bogus <? > */
+
+ token->tag_state = ts_tag_bogus_comment;
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ EMIT_TOKEN_CLASS(tf_bcomment_start);
+ } else if (len > 1 && 0x003E /* > */ == u) {
+
+ token->tag_state = ts_init;
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ EMIT_TOKEN_CLASS(tf_whole_tag);
+ } else if (len > 1 &&
+ isoff(tagfilter->tagflt_mode,TAGFLT_MODE_enriched_tag)) {
+
+ /* Parsing name */
+
+ if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_tag_attributes) &&
+ (0x0009 /* HT '\t' */ == u ||
+ 0x000A /* LF '\n' */ == u ||
+ 0x000C /* FF '\f' */ == u ||
+ 0x0020 /* SPACE */ == u)) {
+
+ if (0x000A /* LF '\n' */ == u) {
+ /* Consume newline */
+
+ token->have_nl = 1;
+ reset_state(token->rchar,0);
+ }
+
+ /* do not include space to name */
+
+ token->tag_state = ts_tag_params;
+
+ EMIT_TOKEN_CLASS(tf_start_tag);
+ } else if (0x002F /* / */ == u) {
+
+ token->tag_state = ts_tag_self_closing;
+
+ ADD_SBUFFER_CHAR(tf_tag_error);
+
+ } else if (0x0000 /* NUL */ == u) {
+ uint16 bad_char = UNICODE_BAD_CHAR;
+
+ DPRINT(Debug,20,(&Debug,
+ "get_new_tagfilter_token: Found NUL character when parsing tag name\n"));
+
+ reset_state(token->rchar,0); /* Get next character */ +
+ token->error = 1;
+ add_unicode_to_string(token->sbuffer,1,&bad_char);
+
+ if (!token->tag_name)
+ token->tag_name = new_string(token->text_charset); + add_unicode_to_string(token->tag_name,1,&bad_char);
+
+ CHECK_SBUFFER_LEN(tf_tag_error);
+ } else {
+ if (!token->tag_name)
+ token->tag_name = new_string(token->text_charset); +
+ if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_lowerascii) &&
+ ((0x0041 /* A */ <= u && u <= 0x005A /* Z */))) {
+ uint16 lower_char = u + 0x0020;
+ add_unicode_to_string(token->tag_name,1,&lower_char);
+ } else {
+ /* Returns no status */
+ add_state_to_string(token->tag_name,token->rchar); + }
+
+ /* Accepts eveything as tag name? */
+
+ ADD_SBUFFER_CHAR(tf_tag_error);
+ }
+
+ } else {
+
+ if (0x000A /* LF '\n' */ == u) {
+ /* Consume newline */
+
+ token->have_nl = 1;
+ reset_state(token->rchar,0);
+ }
+
+ /* Parse error on tag -- do not include character */
+
+ token->tag_state = ts_init;
+
+ EMIT_TOKEN_CLASS(tf_tag_error);
+ }
+
+
+ } else if (ts_init == token->tag_state) {
+
+ if (0x003C /* < */ == u) {
+
+ if (len > 0) {
+ /* Do not eat character -- next character starts tag */ +
+ EMIT_TOKEN_CLASS(tf_body);
+ }
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ token->tag_state = ts_tag_start;
+
+ } else if (0x0026 /* & */ == u &&
+ tagfilter->have_entities) {
+
+ if (len > 0) {
+ /* Do not eat character -- next character starts entity */
+
+ EMIT_TOKEN_CLASS(tf_body);
+ }
+
+ token->entity_state = ent_entity_start;
+
+ /* Start & .... */
+ token->named_reference = new_string(token->text_charset);
+ /* Returns no status */
+ add_state_to_string(token->named_reference,token->rchar);
+ token->walk_reference = tagfilter_start_reference(tagfilter->have_entities);
+
+ ADD_SBUFFER_CHAR_NOCHECK;
+
+ } else
+ goto parse_body;
+
+ } else {
+
+ if (!badstate) {
+ DPRINT(Debug,20,(&Debug,
+ "get_new_tagfilter_token: bad token->tag_state=%d\n",
+ token->tag_state));
+ badstate = 1;
+ }
+
+
+ parse_body:
+
+ /* On body of mail */
+
+ if (0x000A /* LF '\n' */ == u) {
+ token->have_nl = 1;
+
+ setit(token->tag_flags,TFLAG_seen_nl);
+
+ reset_state(token->rchar,0);
+
+ EMIT_TOKEN_CLASS(tf_body);
+ }
+
+ ADD_SBUFFER_CHAR(tf_body);
+ }
+
+ } else {
+ ch = state_getc(state_in);
+
+ process_char:
+ if (EOF == ch) {
+
+ if (!gotbyte) {
+ ret = -1;
+
+ free_string(& (token->sbuffer));
+ } else {
+ len = string_len(token->sbuffer);
+
+ ret = len;
+ }
+ token->eof = 1;
+ token->token_class = tf_body; /* EOF is outside of tag */ +
+ goto out;
+ }
+ gotbyte++;
+
+ if (! add_streambyte_to_state(token->rchar,ch)) {
+ uint16 bad_char = UNICODE_BAD_CHAR;
+
+ DPRINT(Debug,20,(&Debug,
+ "get_new_tagfilter_token: Failed to add byte %02x to character when scanning token\n"));
+ token->error = 1;
+ reset_state(token->rchar,0);
+
+ add_unicode_to_string(token->sbuffer,1,&bad_char);
+ if (token->tag_lookahead)
+ add_unicode_to_string(token->tag_lookahead,1,&bad_char);
+ if (token->tag_name)
+ add_unicode_to_string(token->tag_name,1,&bad_char);
+ if (token->atr_name)
+ add_unicode_to_string(token->atr_name,1,&bad_char);
+
+ }
+ }
+
+ } while(1);
+
+ out:
+
+ DPRINT(Debug,20,(&Debug,
+ "get_new_tagfilter_token=%d:\n",
+ ret));
+
+ DPRINT(Debug,20,(&Debug, " token_class %d",
+ token->token_class));
+ switch(token->token_class) {
+ case tf_doctype_error: DPRINT(Debug,20,(&Debug," tf_doctype_error")); break;
+ case tf_tag_atrvalue_error: DPRINT(Debug,20,(&Debug," tf_tag_atrvalue_error")); break;
+ case tf_tag_param_error: DPRINT(Debug,20,(&Debug," tf_tag_param_error")); break;
+ case tf_comment_error: DPRINT(Debug,20,(&Debug," tf_comment_error")); break;
+ case tf_bcomment_error: DPRINT(Debug,20,(&Debug," tf_bcomment_error")); break;
+ case tf_tag_error: DPRINT(Debug,20,(&Debug," tf_tag_error")); break;
+ case tf_entity_error: DPRINT(Debug,20,(&Debug," tf_entity_error")); break;
+ case tf_body: DPRINT(Debug,20,(&Debug," tf_body")); break;
+ case tf_start_tag: DPRINT(Debug,20,(&Debug," tf_start_tag")); break; /* <tag */
+ case tf_whole_tag: DPRINT(Debug,20,(&Debug," tf_whole_tag")); break; /* <tag> */
+ case tf_selfclosed_tag: DPRINT(Debug,20,(&Debug," tf_selfclosed_tag")); break; /* <tag/> */
+ case tf_bcomment_start: DPRINT(Debug,20,(&Debug," tf_bcomment_start")); break; /* <? or <! or </ */
+ case tf_bcomment_chunk: DPRINT(Debug,20,(&Debug," tf_bcomment_chunk")); break; /* <? bogus comment chunk */
+ case tf_bcomment_end: DPRINT(Debug,20,(&Debug," tf_bcomment_end")); break; /* end bogus comment > */
+ case tf_start_endtag: DPRINT(Debug,20,(&Debug," tf_start_endtag")); break; /* </tag */
+ case tf_whole_endtag: DPRINT(Debug,20,(&Debug," tf_whole_endtag")); break; /* </tag> */
+ case tf_entity: DPRINT(Debug,20,(&Debug," tf_entity")); break;
+ case tf_numeric_entity: DPRINT(Debug,20,(&Debug," tf_numeric_entity")); break; /* Numeric entity */
+ case tf_double_smaller: DPRINT(Debug,20,(&Debug," tf_double_smaller")); break; /* << as escaping */
+ case tf_span_nl: DPRINT(Debug,20,(&Debug," tf_span_nl")); break; /* span of newline (except first) */
+ case tf_comment_start: DPRINT(Debug,20,(&Debug," tf_comment_start")); break; /* <!-- */
+ case tf_whole_comment: DPRINT(Debug,20,(&Debug," tf_whole_comment")); break; /* <!----> */
+ case tf_comment_chunk: DPRINT(Debug,20,(&Debug," tf_comment_chunk")); break; /* <!-- comment chunk */
+ case tf_comment_end: DPRINT(Debug,20,(&Debug," tf_comment_end")); break; /* end comment --> */
+ case tf_tag_space: DPRINT(Debug,20,(&Debug," tf_tag_space")); break; /* space on attributes */
+ case tf_tag_atrname: DPRINT(Debug,20,(&Debug," tf_tag_atrname")); break; /* Got attribute name */
+ case tf_tag_selfclosed_end: DPRINT(Debug,20,(&Debug," tf_tag_selfclosed_end")); break; /* Got /> */
+ case tf_tag_end: DPRINT(Debug,20,(&Debug," tf_tag_end")); break; /* Got > */
+ case tf_tag_atrequal: DPRINT(Debug,20,(&Debug," tf_tag_atrequal")); break; /* Got = */
+ case tf_tag_atrvalue_start: DPRINT(Debug,20,(&Debug," tf_tag_atrvalue_start")); break; /* Start attribute value */
+ case tf_tag_atrvalue_segment: DPRINT(Debug,20,(&Debug," tf_tag_atrvalue_segment")); break; /* Part of attribute value */
+ case tf_tag_atrvalue_end: DPRINT(Debug,20,(&Debug," tf_tag_atrvalue_end")); break; /* Start attribute value */
+ case tf_doctype_start: DPRINT(Debug,20,(&Debug," tf_doctype_start")); break; /* <!DOCTYPE */
+ case tf_doctype_segment: DPRINT(Debug,20,(&Debug," tf_doctype_segment")); break; /* Part of DOCTYPE */
+ case tf_doctype_space: DPRINT(Debug,20,(&Debug," tf_doctype_space")); break; /* Space on doctype */
+ case tf_doctype_item: DPRINT(Debug,20,(&Debug," tf_doctype_item")); break; /* Collected doctype item */
+ case tf_doctype_end: DPRINT(Debug,20,(&Debug," tf_doctype_end")); break; /* DOCTYPE line ended */
+ }
+ DPRINT(Debug,20,(&Debug, "\n"));
+ if (token->tag_name) {
+ DEBUG_PRINT_STRING(Debug,20,
+ " tag_name ",
+ " tag_name > ",
+ token->tag_name);
+ }
+ if (token->atr_name) {
+ DEBUG_PRINT_STRING(Debug,20,
+ " atr_name ",
+ " atr_name > ",
+ token->atr_name);
+ }
+ if (token->atr_value_segment) {
+ DEBUG_PRINT_STRING(Debug,20,
+ " atr_value_segment ",
+ " atr_value_segment > ",
+ token->atr_value_segment);
+ }
+ if (token->named_reference) {
+ DEBUG_PRINT_STRING(Debug,20,
+ " named_reference ",
+ " named_reference > ",
+ token->named_reference);
+ }
+
+ if (UNICODE_BAD_CHAR != token->numeric_reference ||
+ tf_numeric_entity == token->token_class ||
+ ison(token->tag_flags,TFLAG_num_overflow)) {
+
+ DPRINT(Debug,20,(&Debug, " numeric_reference %04x\n",
+ token->numeric_reference));
+ }
+ if (token->doctype_item) {
+ DEBUG_PRINT_STRING(Debug,20,
+ " doctype_item ",
+ " doctype_item > ",
+ token->doctype_item);
+ }
+ if (token->have_nl) {
+ DPRINT(Debug,20,(&Debug, " have_nl\n"));
+ }
+ if (token->eof) {
+ DPRINT(Debug,20,(&Debug, " eof\n"));
+ }
+ if (token->error) {
+ DPRINT(Debug,20,(&Debug, " error\n"));
+ }
+
+ if (token->tag_state != ts_init ||
+ token->token_class != tf_body ||
+ ison(token->tag_flags,TFLAG_self_closing) ||
+ ison(token->tag_flags,TFLAG_seen_equals) ||
+ token->tag_quote ||
+ token->tag_lookahead
+ ) {
+ DPRINT(Debug,20,(&Debug, " tag_state %d",
+ token->tag_state));
+
+ switch (token->tag_state) {
+ case ts_init: DPRINT(Debug,20,(&Debug, " ts_init")); break;
+ case ts_tag_start: DPRINT(Debug,20,(&Debug, " ts_tag_start")); /* Seen < */ break;
+ case ts_tag_bang: DPRINT(Debug,20,(&Debug, " ts_tag_bang")); /* Seen <! */ break;
+ case ts_tag_endmark: DPRINT(Debug,20,(&Debug, " ts_tag_endmark")); /* Seen </ */ break;
+ case ts_tag_params: DPRINT(Debug,20,(&Debug, " ts_tag_params")); /* Seen space after < or expecting params */ break;
+ case ts_tag_ending: DPRINT(Debug,20,(&Debug, " ts_tag_ending")); /* Process for /> or > */ break;
+ case ts_tag_bogus_comment: DPRINT(Debug,20,(&Debug, " ts_tag_bogus_comment")); /* Parse to > */ break;
+ case ts_tag_self_closing: DPRINT(Debug,20,(&Debug, " ts_tag_self_closing")); /* Seen <tag/ */ break;
+ case ts_tag_comment_start: DPRINT(Debug,20,(&Debug, " ts_tag_comment_start")); /* Seen <!-- */ break;
+ case ts_tag_comment: DPRINT(Debug,20,(&Debug, " ts_tag_comment")); /* Inside of comment */ break;
+ case ts_tag_after_atrname: DPRINT(Debug,20,(&Debug, " ts_tag_after_atrname")); /* Got attribure name, parse = */ break;
+ case ts_tag_atrvalue: DPRINT(Debug,20,(&Debug, " ts_tag_atrvalue")); /* On attribure value */ break;
+ case ts_tag_after_quoted: DPRINT(Debug,20,(&Debug, " ts_tag_after_quoted")); /* After quoted value */ break;
+ case ts_tag_doctype_start: DPRINT(Debug,20,(&Debug, " ts_tag_doctype_start")); /* Seen <!DOCTYPE */ break;
+ case ts_tag_doctype_bogus: DPRINT(Debug,20,(&Debug, " ts_tag_doctype_bogus")); /* Bogus DOCTYPE line */ break;
+ }
+
+ DPRINT(Debug,20,(&Debug, "\n"));
+
+ DPRINT(Debug,20,(&Debug, " tag_quote %04x",
+ token->tag_quote));
+ switch (token->tag_quote) {
+ case 0: DPRINT(Debug,20,(&Debug, " (none)")); break;
+ case 0x0027: DPRINT(Debug,20,(&Debug, " (')")); break;
+ case 0x0022: DPRINT(Debug,20,(&Debug, " (\")")); break;
+ }
+ DPRINT(Debug,20,(&Debug, "\n"));
[continued in next message]
--- SoupGate-Win32 v1.05
* Origin: fsxNet Usenet Gateway (21:1/5)