• Patch: Elm ME+ 2.5 PLalpha62 -> Elm ME+ 2.5 PLalpha63 [4/7] (4/7)

    From Kari Hurtta@21:1/5 to All on Wed Jan 3 13:52:11 2024
    [continued from previous message]

    + CHECK_SBUFFER_LEN(tf_tag_param_error);
    +
    + } else {
    +
    + if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_lowerascii) &&
    + ((0x0041 /* A */ <= u && u <= 0x005A /* Z */))) {
    + uint16 lower_char = u + 0x0020;
    + add_unicode_to_string(token->atr_name,1,&lower_char);
    + } else {
    + /* Returns no status */
    + add_state_to_string(token->atr_name,token->rchar); + }
    + ADD_SBUFFER_CHAR(tf_tag_param_error);
    + }
    +
    + } else {
    + clearit(token->tag_flags,TFLAG_seen_equals); /* No = yet */ +
    + if (0x000A /* LF '\n' (new line) */ == u) {
    + /* Consume newline */
    +
    + token->have_nl = 1;
    + reset_state(token->rchar,0);
    +
    + EMIT_TOKEN_CLASS(tf_tag_space);
    +
    + } else if (0x0009 /* HT '\t' (horizontal tab) */ == u ||
    + 0x000C /* FF '\f' (form feed) */ == u ||
    + 0x0020 /* SPACE */ == u) {
    +
    + ADD_SBUFFER_CHAR(tf_tag_space);
    +
    + } else if (0x002F /* / */ == u ||
    + 0x003E /* > */ == u) {
    +
    +
    + token->tag_state = ts_tag_ending; /* Process for /> or > */
    +
    + if (len > 0) {
    + EMIT_TOKEN_CLASS(tf_tag_space);
    + }
    +
    + } else if (0x003D /* = */ == u) {
    +
    + /* Start tag name and set = as first name of tag name */
    + token->atr_name = new_string(token->text_charset);
    +
    + /* Returns no status */
    + add_state_to_string(token->atr_name,token->rchar);
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + /* Same time this is displayed? as error */
    +
    + EMIT_TOKEN_CLASS(tf_tag_param_error);
    +
    + } else {
    +
    + if (len > 0) {
    + EMIT_TOKEN_CLASS(tf_tag_space);
    + }
    +
    + /* Start tag name and reprocess character */
    + token->atr_name = new_string(token->text_charset);
    + }
    + }
    +
    + } else if (ts_tag_comment == token->tag_state) {
    + /* <!-- */
    +
    + /* May include token->tag_lookahead */
    +
    + /* Not correct but this ends
    + with --> or
    + --!> (with error)
    + */
    +
    + /* Enable buffer */
    +
    + if (!token->tag_lookahead)
    + token->tag_lookahead = new_string(token->text_charset);
    +
    + if (0x000A /* LF '\n' */ == u) {
    + /* Consume newline */
    +
    + token->have_nl = 1;
    + reset_state(token->rchar,0);
    +
    + free_string(& token->tag_lookahead); /* Do not look across lines */
    +
    + EMIT_TOKEN_CLASS(tf_comment_chunk);
    +
    + } else if (0x003E /* > */ == u) {
    +
    + int lookahead_len;
    + int r;
    +
    + /* Returns no status */
    + add_state_to_string(token->tag_lookahead,token->rchar);
    + lookahead_len = string_len(token->tag_lookahead);
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + if (lookahead_len >= 3 &&
    + 0 <= (r = string_matches_ascii(token->tag_lookahead,
    + cs2us("-->"),
    + 0,SMA_op_find_ascii))) { +
    + free_string(& (token->tag_lookahead));
    + token->tag_state = ts_init; /* Tag ended */
    +
    + EMIT_TOKEN_CLASS(tf_comment_end);
    + } else if (lookahead_len >= 4 &&
    + 0 <= (r = string_matches_ascii(token->tag_lookahead,
    + cs2us("--!>"),
    + 0,SMA_op_find_ascii))) { +
    + free_string(& (token->tag_lookahead));
    + token->tag_state = ts_init; /* Tag ended */
    +
    + EMIT_TOKEN_CLASS(tf_comment_error);
    + }
    +
    + CHECK_SBUFFER_LEN(tf_comment_chunk);
    +
    + } else if (0x0000 /* NUL */ == u) {
    + uint16 bad_char = UNICODE_BAD_CHAR;
    +
    + DPRINT(Debug,20,(&Debug,
    + "get_new_tagfilter_token: Found NUL character when parsing comment text\n"));
    +
    + reset_state(token->rchar,0); /* Get next character */
    +
    + add_unicode_to_string(token->tag_lookahead,1,&bad_char);
    +
    + token->error = 1;
    + add_unicode_to_string(token->sbuffer,1,&bad_char);
    +
    + CHECK_SBUFFER_LEN(tf_comment_chunk);
    +
    + } else {
    +
    + /* Just append other characters */
    + /* Returns no status */
    + add_state_to_string(token->tag_lookahead,token->rchar);
    +
    + ADD_SBUFFER_CHAR(tf_comment_chunk);
    + }
    +
    + } else if (ts_tag_comment_start == token->tag_state) {
    +
    + if (token->tag_lookahead) {
    + if (0x002D /* - */ == u ||
    + 0x003E /* > */ == u) {
    + int lookahead_len;
    +
    + /* Returns no status */
    + add_state_to_string(token->tag_lookahead,token->rchar); + lookahead_len = string_len(token->tag_lookahead);
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + if (lookahead_len > 3) {
    + /* Leaks token->tag_lookahead */
    +
    + token->tag_state = ts_tag_comment;
    +
    + EMIT_TOKEN_CLASS(tf_comment_start);
    + } else if (3 == lookahead_len &&
    + string_matches_ascii(token->tag_lookahead,
    + cs2us("-->"),
    + 0,SMA_op_normal)) {
    + free_string(& (token->tag_lookahead));
    +
    + /* <!----> */
    + token->tag_state = ts_init;
    +
    + EMIT_TOKEN_CLASS(tf_whole_comment);
    + } else if (2 == lookahead_len &&
    + string_matches_ascii(token->tag_lookahead,
    + cs2us("->"),
    + 0,SMA_op_normal)) {
    + free_string(& (token->tag_lookahead));
    +
    + /* <!---> */
    + token->tag_state = ts_init;
    +
    + EMIT_TOKEN_CLASS(tf_comment_error);
    + }
    +
    + } else {
    +
    + /* Leaks token->tag_lookahead */
    +
    + token->tag_state = ts_tag_comment;
    +
    + EMIT_TOKEN_CLASS(tf_comment_start);
    + }
    +
    + } else {
    +
    + if (0x000A /* LF '\n' */ == u) {
    + /* Consume newline */
    +
    + token->have_nl = 1;
    + reset_state(token->rchar,0);
    +
    + token->tag_state = ts_tag_comment;
    +
    + EMIT_TOKEN_CLASS(tf_comment_start);
    + } else if (0x002D /* - */ == u) {
    + /* Enable buffer and process again */
    + token->tag_lookahead = new_string(token->text_charset);
    + } else if (0x003E /* > */ == u) {
    + /* Closing on empty comment */
    +
    + /* <!--> seen */
    +
    + token->tag_state = ts_init;
    +
    + EMIT_TOKEN_CLASS(tf_comment_error);
    + } else {
    + token->tag_state = ts_tag_comment;
    +
    + EMIT_TOKEN_CLASS(tf_comment_start);
    + }
    + }
    +
    + } else if (ts_tag_bang == token->tag_state) {
    +
    + if (token->tag_lookahead) {
    + if (0x002D /* - */ == u ||
    + 0x005B /* [ */ == u ||
    + (0x0041 /* A */ <= u && u <= 0x005A /* Z */) ||
    + (0x0061 /* a */ <= u && u <= 0x007A /* z */)) {
    +
    + int lookahead_len;
    +
    + /* Returns no status */
    + add_state_to_string(token->tag_lookahead,token->rchar); + lookahead_len = string_len(token->tag_lookahead);
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + /* DOCTYPE have 7 characters
    + [CDATA[ have 7 characters
    + -- have 2 characters
    + */
    +
    + if (lookahead_len > 7) {
    + free_string(& (token->tag_lookahead));
    +
    + /* Bogus <! ... > */
    + token->tag_state = ts_tag_bogus_comment;
    +
    + EMIT_TOKEN_CLASS(tf_bcomment_start);
    + } else if (7 == lookahead_len &&
    + string_matches_ascii(token->tag_lookahead,
    + cs2us("[CDATA["),
    + 0,SMA_op_normal)) {
    +
    + free_string(& (token->tag_lookahead));
    + /* CDATA is not now supported .... */
    +
    + /* TODO: Check that is CDATA allowed */
    +
    + token->tag_state = ts_tag_bogus_comment;
    +
    + EMIT_TOKEN_CLASS(tf_bcomment_start);
    +
    +
    + } else if (7 == lookahead_len &&
    + string_matches_ascii(token->tag_lookahead,
    + cs2us("DOCTYPE"),
    + SMA_ignore_case,
    + SMA_op_normal)) {
    +
    + /* Seen <!DOCTYPE */
    +
    + token->tag_state = ts_tag_doctype_start;
    + token->tag_quote = 0;
    +
    + /* Leaks token->tag_lookahead */
    +
    + EMIT_TOKEN_CLASS(tf_doctype_start);
    +
    + } else if (2 == lookahead_len &&
    + string_matches_ascii(token->tag_lookahead,
    + cs2us("--"),
    + 0,SMA_op_normal)) {
    + free_string(& (token->tag_lookahead));
    +
    + /* Seen <!-- */
    + token->tag_state = ts_tag_comment_start;
    + }
    +
    +
    +
    + } else {
    + free_string(& (token->tag_lookahead));
    +
    + /* Bogus <! ... > */
    + token->tag_state = ts_tag_bogus_comment;
    +
    + EMIT_TOKEN_CLASS(tf_bcomment_start);
    + }
    +
    + } else {
    + if (0x002D /* - */ == u ||
    + 0x0044 /* D */ == u ||
    + 0x0064 /* d */ == u ||
    + 0x005B /* [ */ == u) {
    +
    + /* Enable buffer and process again */
    + token->tag_lookahead = new_string(token->text_charset);
    + } else {
    + /* Bogus <! ... > */
    +
    + if (0x000A /* LF '\n' */ == u) {
    + /* Consume newline */
    +
    + token->have_nl = 1;
    + reset_state(token->rchar,0);
    + }
    +
    + token->tag_state = ts_tag_bogus_comment;
    +
    + EMIT_TOKEN_CLASS(tf_bcomment_start);
    + }
    + }
    +
    + } else if (ts_tag_endmark == token->tag_state) {
    +
    + if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_enriched_tag) &&
    + ((0x0030 /* 0 */ <= u && u <= 0x0039 /* 9 */) ||
    + (0x0041 /* A */ <= u && u <= 0x005A /* Z */) ||
    + (0x0061 /* a */ <= u && u <= 0x007A /* z */) ||
    + 0x002D /* - */ == u)) {
    +
    + /* text/enriched end tag */
    +
    + if (len >= MAX_ENRICHED_TOKEN) { /* Too long */
    + EMIT_TOKEN_CLASS(tf_tag_error);
    + }
    +
    + if (!token->tag_name)
    + token->tag_name = new_string(token->text_charset);
    +
    + if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_lowerascii) && + ((0x0041 /* A */ <= u && u <= 0x005A /* Z */))) {
    + uint16 lower_char = u + 0x0020;
    + add_unicode_to_string(token->tag_name,1,&lower_char);
    +
    + } else {
    + /* Returns no status */
    + add_state_to_string(token->tag_name,token->rchar);
    + }
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + } else if (len <= 2 &&
    + isoff(tagfilter->tagflt_mode,TAGFLT_MODE_enriched_tag) &&
    + ((0x0041 /* A */ <= u && u <= 0x005A /* Z */) ||
    + (0x0061 /* a */ <= u && u <= 0x007A /* z */))) {
    +
    + /* Start of end tag name */
    +
    + if (!token->tag_name)
    + token->tag_name = new_string(token->text_charset);
    +
    + if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_lowerascii) && + ((0x0041 /* A */ <= u && u <= 0x005A /* Z */))) {
    + uint16 lower_char = u + 0x0020;
    + add_unicode_to_string(token->tag_name,1,&lower_char);
    + } else {
    + /* Returns no status */
    + add_state_to_string(token->tag_name,token->rchar);
    + }
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + } else if (0x003E /* > */ == u) {
    + if (len > 2) {
    +
    + token->tag_state = ts_init;
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + EMIT_TOKEN_CLASS(tf_whole_endtag);
    + } else {
    + token->tag_state = ts_init;
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + EMIT_TOKEN_CLASS(tf_tag_error);
    + }
    + } else if (len > 2 &&
    + isoff(tagfilter->tagflt_mode,TAGFLT_MODE_enriched_tag)) {
    +
    + /* Parsing name */
    +
    + if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_tag_attributes) &&
    + (0x0009 /* HT '\t' */ == u ||
    + 0x000A /* LF '\n' */ == u ||
    + 0x000C /* FF '\f' */ == u ||
    + 0x0020 /* SPACE */ == u)) {
    +
    + if (0x000A /* LF '\n' */ == u) {
    + /* Consume newline */
    +
    + token->have_nl = 1;
    + reset_state(token->rchar,0);
    + }
    +
    + /* do not include space to name */
    +
    + token->tag_state = ts_tag_params;
    +
    + EMIT_TOKEN_CLASS(tf_start_endtag);
    +
    + } else if (0x0000 /* NUL */ == u) {
    + uint16 bad_char = UNICODE_BAD_CHAR;
    +
    + DPRINT(Debug,20,(&Debug,
    + "get_new_tagfilter_token: Found NUL character when parsing end tag name\n"));
    +
    + reset_state(token->rchar,0); /* Get next character */ +
    + token->error = 1;
    + add_unicode_to_string(token->sbuffer,1,&bad_char);
    +
    + if (!token->tag_name)
    + token->tag_name = new_string(token->text_charset); + add_unicode_to_string(token->tag_name,1,&bad_char);
    +
    + CHECK_SBUFFER_LEN(tf_tag_error);
    + } else {
    +
    + /* Accepts eveything as tag name? */
    +
    + if (!token->tag_name)
    + token->tag_name = new_string(token->text_charset); +
    + if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_lowerascii) &&
    + ((0x0041 /* A */ <= u && u <= 0x005A /* Z */))) {
    + uint16 lower_char = u + 0x0020;
    + add_unicode_to_string(token->tag_name,1,&lower_char);
    + } else {
    + /* Returns no status */
    + add_state_to_string(token->tag_name,token->rchar); + }
    +
    + ADD_SBUFFER_CHAR(tf_tag_error);
    + }
    +
    + } else if (isoff(tagfilter->tagflt_mode,TAGFLT_MODE_enriched_tag)) {
    +
    + /* Bogus </ ... > */
    +
    + token->tag_state = ts_tag_bogus_comment;
    +
    + if (0x000A /* LF '\n' */ == u) {
    + /* Consume newline */
    +
    + token->have_nl = 1;
    + reset_state(token->rchar,0);
    + }
    +
    + EMIT_TOKEN_CLASS( tf_bcomment_start);
    +
    + } else {
    +
    + if (0x000A /* LF '\n' */ == u) {
    + /* Consume newline */
    +
    + token->have_nl = 1;
    + reset_state(token->rchar,0);
    + }
    +
    + /* Parse error on tag -- do not include character */
    +
    + token->tag_state = ts_init;
    +
    + EMIT_TOKEN_CLASS(tf_tag_error);
    + }
    +
    + } else if (ts_tag_bogus_comment == token->tag_state) {
    +
    + if (0x003E /* > */ == u) {
    + token->tag_state = ts_init;
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + EMIT_TOKEN_CLASS(tf_bcomment_end);
    + } else if (0x0000 /* NUL */ == u) {
    + uint16 bad_char = UNICODE_BAD_CHAR;
    +
    + DPRINT(Debug,20,(&Debug,
    + "get_new_tagfilter_token: Found NUL character when parsing < ... comment\n"));
    +
    + reset_state(token->rchar,0); /* Get next character */
    +
    + token->error = 1;
    + add_unicode_to_string(token->sbuffer,1,&bad_char);
    +
    + CHECK_SBUFFER_LEN(tf_bcomment_chunk);
    + } else {
    + /* Accepts eveything as comment? */
    +
    + if (0x000A /* LF '\n' */ == u) {
    + /* Consume newline */
    +
    + token->have_nl = 1;
    + reset_state(token->rchar,0);
    +
    + EMIT_TOKEN_CLASS(tf_bcomment_chunk);
    + }
    +
    + CHECK_SBUFFER_LEN(tf_bcomment_chunk);
    + }
    +
    + } else if (ts_tag_self_closing == token->tag_state) {
    +
    + setit(token->tag_flags,TFLAG_self_closing);
    +
    + if (0x003E /* > */ == u) {
    +
    + token->tag_state = ts_init;
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + clearit(token->tag_flags,TFLAG_self_closing); /* Passed already */
    +
    + EMIT_TOKEN_CLASS(tf_selfclosed_tag);
    +
    + } else if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_tag_attributes)) {
    +
    + /* Do not include to name */
    +
    + if (0x000A /* LF '\n' */ == u) {
    + /* Consume newline */
    +
    + token->have_nl = 1;
    + reset_state(token->rchar,0);
    + }
    +
    + token->tag_state = ts_tag_params;
    +
    + EMIT_TOKEN_CLASS(tf_start_tag);
    + } else {
    + /* Back to parsing on tag name */
    +
    + token->tag_state = ts_tag_start;
    +
    + if (0x000A /* LF '\n' */ == u) {
    + /* Consume newline */
    +
    + token->have_nl = 1;
    + reset_state(token->rchar,0);
    +
    + EMIT_TOKEN_CLASS(tf_tag_error);
    + } else {
    +
    + if (!token->tag_name)
    + token->tag_name = new_string(token->text_charset); +
    + if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_lowerascii) &&
    + ((0x0041 /* A */ <= u && u <= 0x005A /* Z */))) {
    + uint16 lower_char = u + 0x0020;
    + add_unicode_to_string(token->tag_name,1,&lower_char);
    + } else {
    + /* Returns no status */
    + add_state_to_string(token->tag_name,token->rchar); + }
    + }
    + }
    +
    + } else if (ts_tag_start == token->tag_state) {
    +
    + clearit(token->tag_flags,TFLAG_self_closing); /* CLear possible bogus flag */
    +
    + if (1 == len &&
    + ison(tagfilter->tagflt_mode,TAGFLT_MODE_double_smaller) && + 0x003C /* < */ == u) {
    +
    + /* text/enriched escaped < */
    +
    + /* Note that sbuffer includes all charaters
    + part of token -- not just token value
    + */
    +
    + token->tag_state = ts_init;
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + EMIT_TOKEN_CLASS(tf_double_smaller);
    +
    + } else if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_enriched_tag) &&
    + ((0x0030 /* 0 */ <= u && u <= 0x0039 /* 9 */) ||
    + (0x0041 /* A */ <= u && u <= 0x005A /* Z */) ||
    + (0x0061 /* a */ <= u && u <= 0x007A /* z */) ||
    + 0x002D /* - */ == u)) {
    +
    + /* text/enriched tag */
    +
    + if (len >= MAX_ENRICHED_TOKEN) { /* Too long */
    + EMIT_TOKEN_CLASS(tf_tag_error);
    + }
    +
    + if (!token->tag_name)
    + token->tag_name = new_string(token->text_charset);
    +
    + if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_lowerascii) && + ((0x0041 /* A */ <= u && u <= 0x005A /* Z */))) {
    + uint16 lower_char = u + 0x0020;
    + add_unicode_to_string(token->tag_name,1,&lower_char);
    + } else {
    + /* Returns no status */
    + add_state_to_string(token->tag_name,token->rchar);
    + }
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + } else if (1 == len && tagfilter->doctype_name && 0x0021 /* ! */ == u) {
    + /* Markup declaration */
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + token->tag_state = ts_tag_bang;
    + } else if (1 == len && 0x002F /* / */ == u) {
    + /* end tag */
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + token->tag_state = ts_tag_endmark;
    + } else if (1 == len &&
    + isoff(tagfilter->tagflt_mode,TAGFLT_MODE_enriched_tag) &&
    + ((0x0041 /* A */ <= u && u <= 0x005A /* Z */) ||
    + (0x0061 /* a */ <= u && u <= 0x007A /* z */))) {
    +
    + /* Start of tag name */
    +
    + if (!token->tag_name)
    + token->tag_name = new_string(token->text_charset);
    +
    + if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_lowerascii) && + ((0x0041 /* A */ <= u && u <= 0x005A /* Z */))) {
    + uint16 lower_char = u + 0x0020;
    + add_unicode_to_string(token->tag_name,1,&lower_char);
    +
    + } else {
    + /* Returns no status */
    + add_state_to_string(token->tag_name,token->rchar);
    + }
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + } else if (1 == len &&
    + isoff(tagfilter->tagflt_mode,TAGFLT_MODE_enriched_tag) &&
    + 0x003F /* ? */ == u) {
    +
    + /* Bogus <? > */
    +
    + token->tag_state = ts_tag_bogus_comment;
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + EMIT_TOKEN_CLASS(tf_bcomment_start);
    + } else if (len > 1 && 0x003E /* > */ == u) {
    +
    + token->tag_state = ts_init;
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + EMIT_TOKEN_CLASS(tf_whole_tag);
    + } else if (len > 1 &&
    + isoff(tagfilter->tagflt_mode,TAGFLT_MODE_enriched_tag)) {
    +
    + /* Parsing name */
    +
    + if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_tag_attributes) &&
    + (0x0009 /* HT '\t' */ == u ||
    + 0x000A /* LF '\n' */ == u ||
    + 0x000C /* FF '\f' */ == u ||
    + 0x0020 /* SPACE */ == u)) {
    +
    + if (0x000A /* LF '\n' */ == u) {
    + /* Consume newline */
    +
    + token->have_nl = 1;
    + reset_state(token->rchar,0);
    + }
    +
    + /* do not include space to name */
    +
    + token->tag_state = ts_tag_params;
    +
    + EMIT_TOKEN_CLASS(tf_start_tag);
    + } else if (0x002F /* / */ == u) {
    +
    + token->tag_state = ts_tag_self_closing;
    +
    + ADD_SBUFFER_CHAR(tf_tag_error);
    +
    + } else if (0x0000 /* NUL */ == u) {
    + uint16 bad_char = UNICODE_BAD_CHAR;
    +
    + DPRINT(Debug,20,(&Debug,
    + "get_new_tagfilter_token: Found NUL character when parsing tag name\n"));
    +
    + reset_state(token->rchar,0); /* Get next character */ +
    + token->error = 1;
    + add_unicode_to_string(token->sbuffer,1,&bad_char);
    +
    + if (!token->tag_name)
    + token->tag_name = new_string(token->text_charset); + add_unicode_to_string(token->tag_name,1,&bad_char);
    +
    + CHECK_SBUFFER_LEN(tf_tag_error);
    + } else {
    + if (!token->tag_name)
    + token->tag_name = new_string(token->text_charset); +
    + if (ison(tagfilter->tagflt_mode,TAGFLT_MODE_lowerascii) &&
    + ((0x0041 /* A */ <= u && u <= 0x005A /* Z */))) {
    + uint16 lower_char = u + 0x0020;
    + add_unicode_to_string(token->tag_name,1,&lower_char);
    + } else {
    + /* Returns no status */
    + add_state_to_string(token->tag_name,token->rchar); + }
    +
    + /* Accepts eveything as tag name? */
    +
    + ADD_SBUFFER_CHAR(tf_tag_error);
    + }
    +
    + } else {
    +
    + if (0x000A /* LF '\n' */ == u) {
    + /* Consume newline */
    +
    + token->have_nl = 1;
    + reset_state(token->rchar,0);
    + }
    +
    + /* Parse error on tag -- do not include character */
    +
    + token->tag_state = ts_init;
    +
    + EMIT_TOKEN_CLASS(tf_tag_error);
    + }
    +
    +
    + } else if (ts_init == token->tag_state) {
    +
    + if (0x003C /* < */ == u) {
    +
    + if (len > 0) {
    + /* Do not eat character -- next character starts tag */ +
    + EMIT_TOKEN_CLASS(tf_body);
    + }
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + token->tag_state = ts_tag_start;
    +
    + } else if (0x0026 /* & */ == u &&
    + tagfilter->have_entities) {
    +
    + if (len > 0) {
    + /* Do not eat character -- next character starts entity */
    +
    + EMIT_TOKEN_CLASS(tf_body);
    + }
    +
    + token->entity_state = ent_entity_start;
    +
    + /* Start & .... */
    + token->named_reference = new_string(token->text_charset);
    + /* Returns no status */
    + add_state_to_string(token->named_reference,token->rchar);
    + token->walk_reference = tagfilter_start_reference(tagfilter->have_entities);
    +
    + ADD_SBUFFER_CHAR_NOCHECK;
    +
    + } else
    + goto parse_body;
    +
    + } else {
    +
    + if (!badstate) {
    + DPRINT(Debug,20,(&Debug,
    + "get_new_tagfilter_token: bad token->tag_state=%d\n",
    + token->tag_state));
    + badstate = 1;
    + }
    +
    +
    + parse_body:
    +
    + /* On body of mail */
    +
    + if (0x000A /* LF '\n' */ == u) {
    + token->have_nl = 1;
    +
    + setit(token->tag_flags,TFLAG_seen_nl);
    +
    + reset_state(token->rchar,0);
    +
    + EMIT_TOKEN_CLASS(tf_body);
    + }
    +
    + ADD_SBUFFER_CHAR(tf_body);
    + }
    +
    + } else {
    + ch = state_getc(state_in);
    +
    + process_char:
    + if (EOF == ch) {
    +
    + if (!gotbyte) {
    + ret = -1;
    +
    + free_string(& (token->sbuffer));
    + } else {
    + len = string_len(token->sbuffer);
    +
    + ret = len;
    + }
    + token->eof = 1;
    + token->token_class = tf_body; /* EOF is outside of tag */ +
    + goto out;
    + }
    + gotbyte++;
    +
    + if (! add_streambyte_to_state(token->rchar,ch)) {
    + uint16 bad_char = UNICODE_BAD_CHAR;
    +
    + DPRINT(Debug,20,(&Debug,
    + "get_new_tagfilter_token: Failed to add byte %02x to character when scanning token\n"));
    + token->error = 1;
    + reset_state(token->rchar,0);
    +
    + add_unicode_to_string(token->sbuffer,1,&bad_char);
    + if (token->tag_lookahead)
    + add_unicode_to_string(token->tag_lookahead,1,&bad_char);
    + if (token->tag_name)
    + add_unicode_to_string(token->tag_name,1,&bad_char);
    + if (token->atr_name)
    + add_unicode_to_string(token->atr_name,1,&bad_char);
    +
    + }
    + }
    +
    + } while(1);
    +
    + out:
    +
    + DPRINT(Debug,20,(&Debug,
    + "get_new_tagfilter_token=%d:\n",
    + ret));
    +
    + DPRINT(Debug,20,(&Debug, " token_class %d",
    + token->token_class));
    + switch(token->token_class) {
    + case tf_doctype_error: DPRINT(Debug,20,(&Debug," tf_doctype_error")); break;
    + case tf_tag_atrvalue_error: DPRINT(Debug,20,(&Debug," tf_tag_atrvalue_error")); break;
    + case tf_tag_param_error: DPRINT(Debug,20,(&Debug," tf_tag_param_error")); break;
    + case tf_comment_error: DPRINT(Debug,20,(&Debug," tf_comment_error")); break;
    + case tf_bcomment_error: DPRINT(Debug,20,(&Debug," tf_bcomment_error")); break;
    + case tf_tag_error: DPRINT(Debug,20,(&Debug," tf_tag_error")); break;
    + case tf_entity_error: DPRINT(Debug,20,(&Debug," tf_entity_error")); break;
    + case tf_body: DPRINT(Debug,20,(&Debug," tf_body")); break;
    + case tf_start_tag: DPRINT(Debug,20,(&Debug," tf_start_tag")); break; /* <tag */
    + case tf_whole_tag: DPRINT(Debug,20,(&Debug," tf_whole_tag")); break; /* <tag> */
    + case tf_selfclosed_tag: DPRINT(Debug,20,(&Debug," tf_selfclosed_tag")); break; /* <tag/> */
    + case tf_bcomment_start: DPRINT(Debug,20,(&Debug," tf_bcomment_start")); break; /* <? or <! or </ */
    + case tf_bcomment_chunk: DPRINT(Debug,20,(&Debug," tf_bcomment_chunk")); break; /* <? bogus comment chunk */
    + case tf_bcomment_end: DPRINT(Debug,20,(&Debug," tf_bcomment_end")); break; /* end bogus comment > */
    + case tf_start_endtag: DPRINT(Debug,20,(&Debug," tf_start_endtag")); break; /* </tag */
    + case tf_whole_endtag: DPRINT(Debug,20,(&Debug," tf_whole_endtag")); break; /* </tag> */
    + case tf_entity: DPRINT(Debug,20,(&Debug," tf_entity")); break;
    + case tf_numeric_entity: DPRINT(Debug,20,(&Debug," tf_numeric_entity")); break; /* Numeric entity */
    + case tf_double_smaller: DPRINT(Debug,20,(&Debug," tf_double_smaller")); break; /* << as escaping */
    + case tf_span_nl: DPRINT(Debug,20,(&Debug," tf_span_nl")); break; /* span of newline (except first) */
    + case tf_comment_start: DPRINT(Debug,20,(&Debug," tf_comment_start")); break; /* <!-- */
    + case tf_whole_comment: DPRINT(Debug,20,(&Debug," tf_whole_comment")); break; /* <!----> */
    + case tf_comment_chunk: DPRINT(Debug,20,(&Debug," tf_comment_chunk")); break; /* <!-- comment chunk */
    + case tf_comment_end: DPRINT(Debug,20,(&Debug," tf_comment_end")); break; /* end comment --> */
    + case tf_tag_space: DPRINT(Debug,20,(&Debug," tf_tag_space")); break; /* space on attributes */
    + case tf_tag_atrname: DPRINT(Debug,20,(&Debug," tf_tag_atrname")); break; /* Got attribute name */
    + case tf_tag_selfclosed_end: DPRINT(Debug,20,(&Debug," tf_tag_selfclosed_end")); break; /* Got /> */
    + case tf_tag_end: DPRINT(Debug,20,(&Debug," tf_tag_end")); break; /* Got > */
    + case tf_tag_atrequal: DPRINT(Debug,20,(&Debug," tf_tag_atrequal")); break; /* Got = */
    + case tf_tag_atrvalue_start: DPRINT(Debug,20,(&Debug," tf_tag_atrvalue_start")); break; /* Start attribute value */
    + case tf_tag_atrvalue_segment: DPRINT(Debug,20,(&Debug," tf_tag_atrvalue_segment")); break; /* Part of attribute value */
    + case tf_tag_atrvalue_end: DPRINT(Debug,20,(&Debug," tf_tag_atrvalue_end")); break; /* Start attribute value */
    + case tf_doctype_start: DPRINT(Debug,20,(&Debug," tf_doctype_start")); break; /* <!DOCTYPE */
    + case tf_doctype_segment: DPRINT(Debug,20,(&Debug," tf_doctype_segment")); break; /* Part of DOCTYPE */
    + case tf_doctype_space: DPRINT(Debug,20,(&Debug," tf_doctype_space")); break; /* Space on doctype */
    + case tf_doctype_item: DPRINT(Debug,20,(&Debug," tf_doctype_item")); break; /* Collected doctype item */
    + case tf_doctype_end: DPRINT(Debug,20,(&Debug," tf_doctype_end")); break; /* DOCTYPE line ended */
    + }
    + DPRINT(Debug,20,(&Debug, "\n"));
    + if (token->tag_name) {
    + DEBUG_PRINT_STRING(Debug,20,
    + " tag_name ",
    + " tag_name > ",
    + token->tag_name);
    + }
    + if (token->atr_name) {
    + DEBUG_PRINT_STRING(Debug,20,
    + " atr_name ",
    + " atr_name > ",
    + token->atr_name);
    + }
    + if (token->atr_value_segment) {
    + DEBUG_PRINT_STRING(Debug,20,
    + " atr_value_segment ",
    + " atr_value_segment > ",
    + token->atr_value_segment);
    + }
    + if (token->named_reference) {
    + DEBUG_PRINT_STRING(Debug,20,
    + " named_reference ",
    + " named_reference > ",
    + token->named_reference);
    + }
    +
    + if (UNICODE_BAD_CHAR != token->numeric_reference ||
    + tf_numeric_entity == token->token_class ||
    + ison(token->tag_flags,TFLAG_num_overflow)) {
    +
    + DPRINT(Debug,20,(&Debug, " numeric_reference %04x\n",
    + token->numeric_reference));
    + }
    + if (token->doctype_item) {
    + DEBUG_PRINT_STRING(Debug,20,
    + " doctype_item ",
    + " doctype_item > ",
    + token->doctype_item);
    + }
    + if (token->have_nl) {
    + DPRINT(Debug,20,(&Debug, " have_nl\n"));
    + }
    + if (token->eof) {
    + DPRINT(Debug,20,(&Debug, " eof\n"));
    + }
    + if (token->error) {
    + DPRINT(Debug,20,(&Debug, " error\n"));
    + }
    +
    + if (token->tag_state != ts_init ||
    + token->token_class != tf_body ||
    + ison(token->tag_flags,TFLAG_self_closing) ||
    + ison(token->tag_flags,TFLAG_seen_equals) ||
    + token->tag_quote ||
    + token->tag_lookahead
    + ) {
    + DPRINT(Debug,20,(&Debug, " tag_state %d",
    + token->tag_state));
    +
    + switch (token->tag_state) {
    + case ts_init: DPRINT(Debug,20,(&Debug, " ts_init")); break;
    + case ts_tag_start: DPRINT(Debug,20,(&Debug, " ts_tag_start")); /* Seen < */ break;
    + case ts_tag_bang: DPRINT(Debug,20,(&Debug, " ts_tag_bang")); /* Seen <! */ break;
    + case ts_tag_endmark: DPRINT(Debug,20,(&Debug, " ts_tag_endmark")); /* Seen </ */ break;
    + case ts_tag_params: DPRINT(Debug,20,(&Debug, " ts_tag_params")); /* Seen space after < or expecting params */ break;
    + case ts_tag_ending: DPRINT(Debug,20,(&Debug, " ts_tag_ending")); /* Process for /> or > */ break;
    + case ts_tag_bogus_comment: DPRINT(Debug,20,(&Debug, " ts_tag_bogus_comment")); /* Parse to > */ break;
    + case ts_tag_self_closing: DPRINT(Debug,20,(&Debug, " ts_tag_self_closing")); /* Seen <tag/ */ break;
    + case ts_tag_comment_start: DPRINT(Debug,20,(&Debug, " ts_tag_comment_start")); /* Seen <!-- */ break;
    + case ts_tag_comment: DPRINT(Debug,20,(&Debug, " ts_tag_comment")); /* Inside of comment */ break;
    + case ts_tag_after_atrname: DPRINT(Debug,20,(&Debug, " ts_tag_after_atrname")); /* Got attribure name, parse = */ break;
    + case ts_tag_atrvalue: DPRINT(Debug,20,(&Debug, " ts_tag_atrvalue")); /* On attribure value */ break;
    + case ts_tag_after_quoted: DPRINT(Debug,20,(&Debug, " ts_tag_after_quoted")); /* After quoted value */ break;
    + case ts_tag_doctype_start: DPRINT(Debug,20,(&Debug, " ts_tag_doctype_start")); /* Seen <!DOCTYPE */ break;
    + case ts_tag_doctype_bogus: DPRINT(Debug,20,(&Debug, " ts_tag_doctype_bogus")); /* Bogus DOCTYPE line */ break;
    + }
    +
    + DPRINT(Debug,20,(&Debug, "\n"));
    +
    + DPRINT(Debug,20,(&Debug, " tag_quote %04x",
    + token->tag_quote));
    + switch (token->tag_quote) {
    + case 0: DPRINT(Debug,20,(&Debug, " (none)")); break;
    + case 0x0027: DPRINT(Debug,20,(&Debug, " (')")); break;
    + case 0x0022: DPRINT(Debug,20,(&Debug, " (\")")); break;
    + }
    + DPRINT(Debug,20,(&Debug, "\n"));

    [continued in next message]

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)