1/* tags.c -- recognize HTML tags
2
3  (c) 1998-2006 (W3C) MIT, ERCIM, Keio University
4  See tidy.h for the copyright notice.
5
6  CVS Info :
7
8    $Author: iccir $
9    $Date: 2007/01/30 23:46:52 $
10    $Revision: 1.3 $
11
12  The HTML tags are stored as 8 bit ASCII strings.
13
14*/
15
16#include "tidy-int.h"
17#include "message.h"
18#include "tmbstr.h"
19
20/* Attribute checking methods */
21static CheckAttribs CheckIMG;
22static CheckAttribs CheckLINK;
23static CheckAttribs CheckAREA;
24static CheckAttribs CheckTABLE;
25static CheckAttribs CheckCaption;
26static CheckAttribs CheckSCRIPT;
27static CheckAttribs CheckSTYLE;
28static CheckAttribs CheckHTML;
29static CheckAttribs CheckFORM;
30static CheckAttribs CheckMETA;
31
32#define VERS_ELEM_A          (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
33#define VERS_ELEM_ABBR       (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
34#define VERS_ELEM_ACRONYM    (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
35#define VERS_ELEM_ADDRESS    (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
36#define VERS_ELEM_APPLET     (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
37#define VERS_ELEM_AREA       (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
38#define VERS_ELEM_B          (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
39#define VERS_ELEM_BASE       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
40#define VERS_ELEM_BASEFONT   (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
41#define VERS_ELEM_BDO        (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
42#define VERS_ELEM_BIG        (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
43#define VERS_ELEM_BLOCKQUOTE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
44#define VERS_ELEM_BODY       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
45#define VERS_ELEM_BR         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
46#define VERS_ELEM_BUTTON     (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
47#define VERS_ELEM_CAPTION    (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
48#define VERS_ELEM_CENTER     (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
49#define VERS_ELEM_CITE       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
50#define VERS_ELEM_CODE       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
51#define VERS_ELEM_COL        (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
52#define VERS_ELEM_COLGROUP   (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
53#define VERS_ELEM_DD         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
54#define VERS_ELEM_DEL        (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
55#define VERS_ELEM_DFN        (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
56#define VERS_ELEM_DIR        (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
57#define VERS_ELEM_DIV        (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
58#define VERS_ELEM_DL         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
59#define VERS_ELEM_DT         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
60#define VERS_ELEM_EM         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
61#define VERS_ELEM_FIELDSET   (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
62#define VERS_ELEM_FONT       (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
63#define VERS_ELEM_FORM       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
64#define VERS_ELEM_FRAME      (xxxx|xxxx|xxxx|xxxx|xxxx|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
65#define VERS_ELEM_FRAMESET   (xxxx|xxxx|xxxx|xxxx|xxxx|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
66#define VERS_ELEM_H1         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
67#define VERS_ELEM_H2         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
68#define VERS_ELEM_H3         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
69#define VERS_ELEM_H4         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
70#define VERS_ELEM_H5         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
71#define VERS_ELEM_H6         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
72#define VERS_ELEM_HEAD       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
73#define VERS_ELEM_HR         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
74#define VERS_ELEM_HTML       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
75#define VERS_ELEM_I          (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
76#define VERS_ELEM_IFRAME     (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
77#define VERS_ELEM_IMG        (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
78#define VERS_ELEM_INPUT      (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
79#define VERS_ELEM_INS        (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
80#define VERS_ELEM_ISINDEX    (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
81#define VERS_ELEM_KBD        (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
82#define VERS_ELEM_LABEL      (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
83#define VERS_ELEM_LEGEND     (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
84#define VERS_ELEM_LI         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
85#define VERS_ELEM_LINK       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
86#define VERS_ELEM_LISTING    (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
87#define VERS_ELEM_MAP        (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
88#define VERS_ELEM_MENU       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
89#define VERS_ELEM_META       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
90#define VERS_ELEM_NEXTID     (HT20|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
91#define VERS_ELEM_NOFRAMES   (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
92#define VERS_ELEM_NOSCRIPT   (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
93#define VERS_ELEM_OBJECT     (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
94#define VERS_ELEM_OL         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
95#define VERS_ELEM_OPTGROUP   (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
96#define VERS_ELEM_OPTION     (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
97#define VERS_ELEM_P          (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
98#define VERS_ELEM_PARAM      (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
99#define VERS_ELEM_PLAINTEXT  (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
100#define VERS_ELEM_PRE        (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
101#define VERS_ELEM_Q          (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
102#define VERS_ELEM_RB         (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
103#define VERS_ELEM_RBC        (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
104#define VERS_ELEM_RP         (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
105#define VERS_ELEM_RT         (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
106#define VERS_ELEM_RTC        (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
107#define VERS_ELEM_RUBY       (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
108#define VERS_ELEM_S          (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
109#define VERS_ELEM_SAMP       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
110#define VERS_ELEM_SCRIPT     (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
111#define VERS_ELEM_SELECT     (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
112#define VERS_ELEM_SMALL      (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
113#define VERS_ELEM_SPAN       (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
114#define VERS_ELEM_STRIKE     (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
115#define VERS_ELEM_STRONG     (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
116#define VERS_ELEM_STYLE      (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
117#define VERS_ELEM_SUB        (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
118#define VERS_ELEM_SUP        (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
119#define VERS_ELEM_TABLE      (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
120#define VERS_ELEM_TBODY      (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
121#define VERS_ELEM_TD         (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
122#define VERS_ELEM_TEXTAREA   (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
123#define VERS_ELEM_TFOOT      (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
124#define VERS_ELEM_TH         (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
125#define VERS_ELEM_THEAD      (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
126#define VERS_ELEM_TITLE      (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
127#define VERS_ELEM_TR         (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
128#define VERS_ELEM_TT         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
129#define VERS_ELEM_U          (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
130#define VERS_ELEM_UL         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
131#define VERS_ELEM_VAR        (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
132#define VERS_ELEM_XMP        (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
133
134static const Dict tag_defs[] =
135{
136  { TidyTag_UNKNOWN,    "unknown!",   VERS_UNKNOWN,         NULL,                       (0),                                           NULL,          NULL           },
137
138  /* W3C defined elements */
139  { TidyTag_A,          "a",          VERS_ELEM_A,          &TY_(W3CAttrsFor_A)[0],          (CM_INLINE),                                   TY_(ParseInline),   NULL           },
140  { TidyTag_ABBR,       "abbr",       VERS_ELEM_ABBR,       &TY_(W3CAttrsFor_ABBR)[0],       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
141  { TidyTag_ACRONYM,    "acronym",    VERS_ELEM_ACRONYM,    &TY_(W3CAttrsFor_ACRONYM)[0],    (CM_INLINE),                                   TY_(ParseInline),   NULL           },
142  { TidyTag_ADDRESS,    "address",    VERS_ELEM_ADDRESS,    &TY_(W3CAttrsFor_ADDRESS)[0],    (CM_BLOCK),                                    TY_(ParseInline),   NULL           },
143  { TidyTag_APPLET,     "applet",     VERS_ELEM_APPLET,     &TY_(W3CAttrsFor_APPLET)[0],     (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM),         TY_(ParseBlock),    NULL           },
144  { TidyTag_AREA,       "area",       VERS_ELEM_AREA,       &TY_(W3CAttrsFor_AREA)[0],       (CM_BLOCK|CM_EMPTY),                           TY_(ParseEmpty),    CheckAREA      },
145  { TidyTag_B,          "b",          VERS_ELEM_B,          &TY_(W3CAttrsFor_B)[0],          (CM_INLINE),                                   TY_(ParseInline),   NULL           },
146  { TidyTag_BASE,       "base",       VERS_ELEM_BASE,       &TY_(W3CAttrsFor_BASE)[0],       (CM_HEAD|CM_EMPTY),                            TY_(ParseEmpty),    NULL           },
147  { TidyTag_BASEFONT,   "basefont",   VERS_ELEM_BASEFONT,   &TY_(W3CAttrsFor_BASEFONT)[0],   (CM_INLINE|CM_EMPTY),                          TY_(ParseEmpty),    NULL           },
148  { TidyTag_BDO,        "bdo",        VERS_ELEM_BDO,        &TY_(W3CAttrsFor_BDO)[0],        (CM_INLINE),                                   TY_(ParseInline),   NULL           },
149  { TidyTag_BIG,        "big",        VERS_ELEM_BIG,        &TY_(W3CAttrsFor_BIG)[0],        (CM_INLINE),                                   TY_(ParseInline),   NULL           },
150  { TidyTag_BLOCKQUOTE, "blockquote", VERS_ELEM_BLOCKQUOTE, &TY_(W3CAttrsFor_BLOCKQUOTE)[0], (CM_BLOCK),                                    TY_(ParseBlock),    NULL           },
151  { TidyTag_BODY,       "body",       VERS_ELEM_BODY,       &TY_(W3CAttrsFor_BODY)[0],       (CM_HTML|CM_OPT|CM_OMITST),                    TY_(ParseBody),     NULL           },
152  { TidyTag_BR,         "br",         VERS_ELEM_BR,         &TY_(W3CAttrsFor_BR)[0],         (CM_INLINE|CM_EMPTY),                          TY_(ParseEmpty),    NULL           },
153  { TidyTag_BUTTON,     "button",     VERS_ELEM_BUTTON,     &TY_(W3CAttrsFor_BUTTON)[0],     (CM_INLINE),                                   TY_(ParseBlock),    NULL           },
154  { TidyTag_CAPTION,    "caption",    VERS_ELEM_CAPTION,    &TY_(W3CAttrsFor_CAPTION)[0],    (CM_TABLE),                                    TY_(ParseInline),   CheckCaption   },
155  { TidyTag_CENTER,     "center",     VERS_ELEM_CENTER,     &TY_(W3CAttrsFor_CENTER)[0],     (CM_BLOCK),                                    TY_(ParseBlock),    NULL           },
156  { TidyTag_CITE,       "cite",       VERS_ELEM_CITE,       &TY_(W3CAttrsFor_CITE)[0],       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
157  { TidyTag_CODE,       "code",       VERS_ELEM_CODE,       &TY_(W3CAttrsFor_CODE)[0],       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
158  { TidyTag_COL,        "col",        VERS_ELEM_COL,        &TY_(W3CAttrsFor_COL)[0],        (CM_TABLE|CM_EMPTY),                           TY_(ParseEmpty),    NULL           },
159  { TidyTag_COLGROUP,   "colgroup",   VERS_ELEM_COLGROUP,   &TY_(W3CAttrsFor_COLGROUP)[0],   (CM_TABLE|CM_OPT),                             TY_(ParseColGroup), NULL           },
160  { TidyTag_DD,         "dd",         VERS_ELEM_DD,         &TY_(W3CAttrsFor_DD)[0],         (CM_DEFLIST|CM_OPT|CM_NO_INDENT),              TY_(ParseBlock),    NULL           },
161  { TidyTag_DEL,        "del",        VERS_ELEM_DEL,        &TY_(W3CAttrsFor_DEL)[0],        (CM_INLINE|CM_BLOCK|CM_MIXED),                 TY_(ParseInline),   NULL           },
162  { TidyTag_DFN,        "dfn",        VERS_ELEM_DFN,        &TY_(W3CAttrsFor_DFN)[0],        (CM_INLINE),                                   TY_(ParseInline),   NULL           },
163  { TidyTag_DIR,        "dir",        VERS_ELEM_DIR,        &TY_(W3CAttrsFor_DIR)[0],        (CM_BLOCK|CM_OBSOLETE),                        TY_(ParseList),     NULL           },
164  { TidyTag_DIV,        "div",        VERS_ELEM_DIV,        &TY_(W3CAttrsFor_DIV)[0],        (CM_BLOCK),                                    TY_(ParseBlock),    NULL           },
165  { TidyTag_DL,         "dl",         VERS_ELEM_DL,         &TY_(W3CAttrsFor_DL)[0],         (CM_BLOCK),                                    TY_(ParseDefList),  NULL           },
166  { TidyTag_DT,         "dt",         VERS_ELEM_DT,         &TY_(W3CAttrsFor_DT)[0],         (CM_DEFLIST|CM_OPT|CM_NO_INDENT),              TY_(ParseInline),   NULL           },
167  { TidyTag_EM,         "em",         VERS_ELEM_EM,         &TY_(W3CAttrsFor_EM)[0],         (CM_INLINE),                                   TY_(ParseInline),   NULL           },
168  { TidyTag_FIELDSET,   "fieldset",   VERS_ELEM_FIELDSET,   &TY_(W3CAttrsFor_FIELDSET)[0],   (CM_BLOCK),                                    TY_(ParseBlock),    NULL           },
169  { TidyTag_FONT,       "font",       VERS_ELEM_FONT,       &TY_(W3CAttrsFor_FONT)[0],       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
170  { TidyTag_FORM,       "form",       VERS_ELEM_FORM,       &TY_(W3CAttrsFor_FORM)[0],       (CM_BLOCK),                                    TY_(ParseBlock),    CheckFORM      },
171  { TidyTag_FRAME,      "frame",      VERS_ELEM_FRAME,      &TY_(W3CAttrsFor_FRAME)[0],      (CM_FRAMES|CM_EMPTY),                          TY_(ParseEmpty),    NULL           },
172  { TidyTag_FRAMESET,   "frameset",   VERS_ELEM_FRAMESET,   &TY_(W3CAttrsFor_FRAMESET)[0],   (CM_HTML|CM_FRAMES),                           TY_(ParseFrameSet), NULL           },
173  { TidyTag_H1,         "h1",         VERS_ELEM_H1,         &TY_(W3CAttrsFor_H1)[0],         (CM_BLOCK|CM_HEADING),                         TY_(ParseInline),   NULL           },
174  { TidyTag_H2,         "h2",         VERS_ELEM_H2,         &TY_(W3CAttrsFor_H2)[0],         (CM_BLOCK|CM_HEADING),                         TY_(ParseInline),   NULL           },
175  { TidyTag_H3,         "h3",         VERS_ELEM_H3,         &TY_(W3CAttrsFor_H3)[0],         (CM_BLOCK|CM_HEADING),                         TY_(ParseInline),   NULL           },
176  { TidyTag_H4,         "h4",         VERS_ELEM_H4,         &TY_(W3CAttrsFor_H4)[0],         (CM_BLOCK|CM_HEADING),                         TY_(ParseInline),   NULL           },
177  { TidyTag_H5,         "h5",         VERS_ELEM_H5,         &TY_(W3CAttrsFor_H5)[0],         (CM_BLOCK|CM_HEADING),                         TY_(ParseInline),   NULL           },
178  { TidyTag_H6,         "h6",         VERS_ELEM_H6,         &TY_(W3CAttrsFor_H6)[0],         (CM_BLOCK|CM_HEADING),                         TY_(ParseInline),   NULL           },
179  { TidyTag_HEAD,       "head",       VERS_ELEM_HEAD,       &TY_(W3CAttrsFor_HEAD)[0],       (CM_HTML|CM_OPT|CM_OMITST),                    TY_(ParseHead),     NULL           },
180  { TidyTag_HR,         "hr",         VERS_ELEM_HR,         &TY_(W3CAttrsFor_HR)[0],         (CM_BLOCK|CM_EMPTY),                           TY_(ParseEmpty),    NULL           },
181  { TidyTag_HTML,       "html",       VERS_ELEM_HTML,       &TY_(W3CAttrsFor_HTML)[0],       (CM_HTML|CM_OPT|CM_OMITST),                    TY_(ParseHTML),     CheckHTML      },
182  { TidyTag_I,          "i",          VERS_ELEM_I,          &TY_(W3CAttrsFor_I)[0],          (CM_INLINE),                                   TY_(ParseInline),   NULL           },
183  { TidyTag_IFRAME,     "iframe",     VERS_ELEM_IFRAME,     &TY_(W3CAttrsFor_IFRAME)[0],     (CM_INLINE),                                   TY_(ParseBlock),    NULL           },
184  { TidyTag_IMG,        "img",        VERS_ELEM_IMG,        &TY_(W3CAttrsFor_IMG)[0],        (CM_INLINE|CM_IMG|CM_EMPTY),                   TY_(ParseEmpty),    CheckIMG       },
185  { TidyTag_INPUT,      "input",      VERS_ELEM_INPUT,      &TY_(W3CAttrsFor_INPUT)[0],      (CM_INLINE|CM_IMG|CM_EMPTY),                   TY_(ParseEmpty),    NULL           },
186  { TidyTag_INS,        "ins",        VERS_ELEM_INS,        &TY_(W3CAttrsFor_INS)[0],        (CM_INLINE|CM_BLOCK|CM_MIXED),                 TY_(ParseInline),   NULL           },
187  { TidyTag_ISINDEX,    "isindex",    VERS_ELEM_ISINDEX,    &TY_(W3CAttrsFor_ISINDEX)[0],    (CM_BLOCK|CM_EMPTY),                           TY_(ParseEmpty),    NULL           },
188  { TidyTag_KBD,        "kbd",        VERS_ELEM_KBD,        &TY_(W3CAttrsFor_KBD)[0],        (CM_INLINE),                                   TY_(ParseInline),   NULL           },
189  { TidyTag_LABEL,      "label",      VERS_ELEM_LABEL,      &TY_(W3CAttrsFor_LABEL)[0],      (CM_INLINE),                                   TY_(ParseInline),   NULL           },
190  { TidyTag_LEGEND,     "legend",     VERS_ELEM_LEGEND,     &TY_(W3CAttrsFor_LEGEND)[0],     (CM_INLINE),                                   TY_(ParseInline),   NULL           },
191  { TidyTag_LI,         "li",         VERS_ELEM_LI,         &TY_(W3CAttrsFor_LI)[0],         (CM_LIST|CM_OPT|CM_NO_INDENT),                 TY_(ParseBlock),    NULL           },
192  { TidyTag_LINK,       "link",       VERS_ELEM_LINK,       &TY_(W3CAttrsFor_LINK)[0],       (CM_HEAD|CM_EMPTY),                            TY_(ParseEmpty),    CheckLINK      },
193  { TidyTag_LISTING,    "listing",    VERS_ELEM_LISTING,    &TY_(W3CAttrsFor_LISTING)[0],    (CM_BLOCK|CM_OBSOLETE),                        TY_(ParsePre),      NULL           },
194  { TidyTag_MAP,        "map",        VERS_ELEM_MAP,        &TY_(W3CAttrsFor_MAP)[0],        (CM_INLINE),                                   TY_(ParseBlock),    NULL           },
195  { TidyTag_MENU,       "menu",       VERS_ELEM_MENU,       &TY_(W3CAttrsFor_MENU)[0],       (CM_BLOCK|CM_OBSOLETE),                        TY_(ParseList),     NULL           },
196  { TidyTag_META,       "meta",       VERS_ELEM_META,       &TY_(W3CAttrsFor_META)[0],       (CM_HEAD|CM_EMPTY),                            TY_(ParseEmpty),    CheckMETA      },
197  { TidyTag_NOFRAMES,   "noframes",   VERS_ELEM_NOFRAMES,   &TY_(W3CAttrsFor_NOFRAMES)[0],   (CM_BLOCK|CM_FRAMES),                          TY_(ParseNoFrames), NULL           },
198  { TidyTag_NOSCRIPT,   "noscript",   VERS_ELEM_NOSCRIPT,   &TY_(W3CAttrsFor_NOSCRIPT)[0],   (CM_BLOCK|CM_INLINE|CM_MIXED),                 TY_(ParseBlock),    NULL           },
199  { TidyTag_OBJECT,     "object",     VERS_ELEM_OBJECT,     &TY_(W3CAttrsFor_OBJECT)[0],     (CM_OBJECT|CM_HEAD|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock),    NULL           },
200  { TidyTag_OL,         "ol",         VERS_ELEM_OL,         &TY_(W3CAttrsFor_OL)[0],         (CM_BLOCK),                                    TY_(ParseList),     NULL           },
201  { TidyTag_OPTGROUP,   "optgroup",   VERS_ELEM_OPTGROUP,   &TY_(W3CAttrsFor_OPTGROUP)[0],   (CM_FIELD|CM_OPT),                             TY_(ParseOptGroup), NULL           },
202  { TidyTag_OPTION,     "option",     VERS_ELEM_OPTION,     &TY_(W3CAttrsFor_OPTION)[0],     (CM_FIELD|CM_OPT),                             TY_(ParseText),     NULL           },
203  { TidyTag_P,          "p",          VERS_ELEM_P,          &TY_(W3CAttrsFor_P)[0],          (CM_BLOCK|CM_OPT),                             TY_(ParseInline),   NULL           },
204  { TidyTag_PARAM,      "param",      VERS_ELEM_PARAM,      &TY_(W3CAttrsFor_PARAM)[0],      (CM_INLINE|CM_EMPTY),                          TY_(ParseEmpty),    NULL           },
205  { TidyTag_PLAINTEXT,  "plaintext",  VERS_ELEM_PLAINTEXT,  &TY_(W3CAttrsFor_PLAINTEXT)[0],  (CM_BLOCK|CM_OBSOLETE),                        TY_(ParsePre),      NULL           },
206  { TidyTag_PRE,        "pre",        VERS_ELEM_PRE,        &TY_(W3CAttrsFor_PRE)[0],        (CM_BLOCK),                                    TY_(ParsePre),      NULL           },
207  { TidyTag_Q,          "q",          VERS_ELEM_Q,          &TY_(W3CAttrsFor_Q)[0],          (CM_INLINE),                                   TY_(ParseInline),   NULL           },
208  { TidyTag_RB,         "rb",         VERS_ELEM_RB,         &TY_(W3CAttrsFor_RB)[0],         (CM_INLINE),                                   TY_(ParseInline),   NULL           },
209  { TidyTag_RBC,        "rbc",        VERS_ELEM_RBC,        &TY_(W3CAttrsFor_RBC)[0],        (CM_INLINE),                                   TY_(ParseInline),   NULL           },
210  { TidyTag_RP,         "rp",         VERS_ELEM_RP,         &TY_(W3CAttrsFor_RP)[0],         (CM_INLINE),                                   TY_(ParseInline),   NULL           },
211  { TidyTag_RT,         "rt",         VERS_ELEM_RT,         &TY_(W3CAttrsFor_RT)[0],         (CM_INLINE),                                   TY_(ParseInline),   NULL           },
212  { TidyTag_RTC,        "rtc",        VERS_ELEM_RTC,        &TY_(W3CAttrsFor_RTC)[0],        (CM_INLINE),                                   TY_(ParseInline),   NULL           },
213  { TidyTag_RUBY,       "ruby",       VERS_ELEM_RUBY,       &TY_(W3CAttrsFor_RUBY)[0],       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
214  { TidyTag_S,          "s",          VERS_ELEM_S,          &TY_(W3CAttrsFor_S)[0],          (CM_INLINE),                                   TY_(ParseInline),   NULL           },
215  { TidyTag_SAMP,       "samp",       VERS_ELEM_SAMP,       &TY_(W3CAttrsFor_SAMP)[0],       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
216  { TidyTag_SCRIPT,     "script",     VERS_ELEM_SCRIPT,     &TY_(W3CAttrsFor_SCRIPT)[0],     (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE),         TY_(ParseScript),   CheckSCRIPT    },
217  { TidyTag_SELECT,     "select",     VERS_ELEM_SELECT,     &TY_(W3CAttrsFor_SELECT)[0],     (CM_INLINE|CM_FIELD),                          TY_(ParseSelect),   NULL           },
218  { TidyTag_SMALL,      "small",      VERS_ELEM_SMALL,      &TY_(W3CAttrsFor_SMALL)[0],      (CM_INLINE),                                   TY_(ParseInline),   NULL           },
219  { TidyTag_SPAN,       "span",       VERS_ELEM_SPAN,       &TY_(W3CAttrsFor_SPAN)[0],       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
220  { TidyTag_STRIKE,     "strike",     VERS_ELEM_STRIKE,     &TY_(W3CAttrsFor_STRIKE)[0],     (CM_INLINE),                                   TY_(ParseInline),   NULL           },
221  { TidyTag_STRONG,     "strong",     VERS_ELEM_STRONG,     &TY_(W3CAttrsFor_STRONG)[0],     (CM_INLINE),                                   TY_(ParseInline),   NULL           },
222  { TidyTag_STYLE,      "style",      VERS_ELEM_STYLE,      &TY_(W3CAttrsFor_STYLE)[0],      (CM_HEAD),                                     TY_(ParseScript),   CheckSTYLE     },
223  { TidyTag_SUB,        "sub",        VERS_ELEM_SUB,        &TY_(W3CAttrsFor_SUB)[0],        (CM_INLINE),                                   TY_(ParseInline),   NULL           },
224  { TidyTag_SUP,        "sup",        VERS_ELEM_SUP,        &TY_(W3CAttrsFor_SUP)[0],        (CM_INLINE),                                   TY_(ParseInline),   NULL           },
225  { TidyTag_TABLE,      "table",      VERS_ELEM_TABLE,      &TY_(W3CAttrsFor_TABLE)[0],      (CM_BLOCK),                                    TY_(ParseTableTag), CheckTABLE     },
226  { TidyTag_TBODY,      "tbody",      VERS_ELEM_TBODY,      &TY_(W3CAttrsFor_TBODY)[0],      (CM_TABLE|CM_ROWGRP|CM_OPT),                   TY_(ParseRowGroup), NULL           },
227  { TidyTag_TD,         "td",         VERS_ELEM_TD,         &TY_(W3CAttrsFor_TD)[0],         (CM_ROW|CM_OPT|CM_NO_INDENT),                  TY_(ParseBlock),    NULL           },
228  { TidyTag_TEXTAREA,   "textarea",   VERS_ELEM_TEXTAREA,   &TY_(W3CAttrsFor_TEXTAREA)[0],   (CM_INLINE|CM_FIELD),                          TY_(ParseText),     NULL           },
229  { TidyTag_TFOOT,      "tfoot",      VERS_ELEM_TFOOT,      &TY_(W3CAttrsFor_TFOOT)[0],      (CM_TABLE|CM_ROWGRP|CM_OPT),                   TY_(ParseRowGroup), NULL           },
230  { TidyTag_TH,         "th",         VERS_ELEM_TH,         &TY_(W3CAttrsFor_TH)[0],         (CM_ROW|CM_OPT|CM_NO_INDENT),                  TY_(ParseBlock),    NULL           },
231  { TidyTag_THEAD,      "thead",      VERS_ELEM_THEAD,      &TY_(W3CAttrsFor_THEAD)[0],      (CM_TABLE|CM_ROWGRP|CM_OPT),                   TY_(ParseRowGroup), NULL           },
232  { TidyTag_TITLE,      "title",      VERS_ELEM_TITLE,      &TY_(W3CAttrsFor_TITLE)[0],      (CM_HEAD),                                     TY_(ParseTitle),    NULL           },
233  { TidyTag_TR,         "tr",         VERS_ELEM_TR,         &TY_(W3CAttrsFor_TR)[0],         (CM_TABLE|CM_OPT),                             TY_(ParseRow),      NULL           },
234  { TidyTag_TT,         "tt",         VERS_ELEM_TT,         &TY_(W3CAttrsFor_TT)[0],         (CM_INLINE),                                   TY_(ParseInline),   NULL           },
235  { TidyTag_U,          "u",          VERS_ELEM_U,          &TY_(W3CAttrsFor_U)[0],          (CM_INLINE),                                   TY_(ParseInline),   NULL           },
236  { TidyTag_UL,         "ul",         VERS_ELEM_UL,         &TY_(W3CAttrsFor_UL)[0],         (CM_BLOCK),                                    TY_(ParseList),     NULL           },
237  { TidyTag_VAR,        "var",        VERS_ELEM_VAR,        &TY_(W3CAttrsFor_VAR)[0],        (CM_INLINE),                                   TY_(ParseInline),   NULL           },
238  { TidyTag_XMP,        "xmp",        VERS_ELEM_XMP,        &TY_(W3CAttrsFor_XMP)[0],        (CM_BLOCK|CM_OBSOLETE),                        TY_(ParsePre),      NULL           },
239  { TidyTag_NEXTID,     "nextid",     VERS_ELEM_NEXTID,     &TY_(W3CAttrsFor_NEXTID)[0],     (CM_HEAD|CM_EMPTY),                            TY_(ParseEmpty),    NULL           },
240
241  /* proprietary elements */
242  { TidyTag_ALIGN,      "align",      VERS_NETSCAPE,        NULL,                       (CM_BLOCK),                                    TY_(ParseBlock),    NULL           },
243  { TidyTag_BGSOUND,    "bgsound",    VERS_MICROSOFT,       NULL,                       (CM_HEAD|CM_EMPTY),                            TY_(ParseEmpty),    NULL           },
244  { TidyTag_BLINK,      "blink",      VERS_PROPRIETARY,     NULL,                       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
245  { TidyTag_COMMENT,    "comment",    VERS_MICROSOFT,       NULL,                       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
246  { TidyTag_EMBED,      "embed",      VERS_NETSCAPE,        NULL,                       (CM_INLINE|CM_IMG|CM_EMPTY),                   TY_(ParseEmpty),    NULL           },
247  { TidyTag_ILAYER,     "ilayer",     VERS_NETSCAPE,        NULL,                       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
248  { TidyTag_KEYGEN,     "keygen",     VERS_NETSCAPE,        NULL,                       (CM_INLINE|CM_EMPTY),                          TY_(ParseEmpty),    NULL           },
249  { TidyTag_LAYER,      "layer",      VERS_NETSCAPE,        NULL,                       (CM_BLOCK),                                    TY_(ParseBlock),    NULL           },
250  { TidyTag_MARQUEE,    "marquee",    VERS_MICROSOFT,       NULL,                       (CM_INLINE|CM_OPT),                            TY_(ParseInline),   NULL           },
251  { TidyTag_MULTICOL,   "multicol",   VERS_NETSCAPE,        NULL,                       (CM_BLOCK),                                    TY_(ParseBlock),    NULL           },
252  { TidyTag_NOBR,       "nobr",       VERS_PROPRIETARY,     NULL,                       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
253  { TidyTag_NOEMBED,    "noembed",    VERS_NETSCAPE,        NULL,                       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
254  { TidyTag_NOLAYER,    "nolayer",    VERS_NETSCAPE,        NULL,                       (CM_BLOCK|CM_INLINE|CM_MIXED),                 TY_(ParseBlock),    NULL           },
255  { TidyTag_NOSAVE,     "nosave",     VERS_NETSCAPE,        NULL,                       (CM_BLOCK),                                    TY_(ParseBlock),    NULL           },
256  { TidyTag_SERVER,     "server",     VERS_NETSCAPE,        NULL,                       (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE),         TY_(ParseScript),   NULL           },
257  { TidyTag_SERVLET,    "servlet",    VERS_SUN,             NULL,                       (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM),         TY_(ParseBlock),    NULL           },
258  { TidyTag_SPACER,     "spacer",     VERS_NETSCAPE,        NULL,                       (CM_INLINE|CM_EMPTY),                          TY_(ParseEmpty),    NULL           },
259  { TidyTag_WBR,        "wbr",        VERS_PROPRIETARY,     NULL,                       (CM_INLINE|CM_EMPTY),                          TY_(ParseEmpty),    NULL           },
260
261  /* this must be the final entry */
262  { (TidyTagId)0,        NULL,         0,                    NULL,                       (0),                                           NULL,          NULL           }
263};
264
265#if ELEMENT_HASH_LOOKUP
266static uint hash(ctmbstr s)
267{
268    uint hashval;
269
270    for (hashval = 0; *s != '\0'; s++)
271        hashval = *s + 31*hashval;
272
273    return hashval % ELEMENT_HASH_SIZE;
274}
275
276static const Dict *install(TidyTagImpl* tags, const Dict* old)
277{
278    DictHash *np;
279    uint hashval;
280
281    if (old)
282    {
283        np = (DictHash *)MemAlloc(sizeof(*np));
284        np->tag = old;
285
286        hashval = hash(old->name);
287        np->next = tags->hashtab[hashval];
288        tags->hashtab[hashval] = np;
289    }
290
291    return old;
292}
293
294static void removeFromHash( TidyTagImpl* tags, ctmbstr s )
295{
296    uint h = hash(s);
297    DictHash *p, *prev = NULL;
298    for (p = tags->hashtab[h]; p && p->tag; p = p->next)
299    {
300        if (TY_(tmbstrcmp)(s, p->tag->name) == 0)
301        {
302            DictHash* next = p->next;
303            if ( prev )
304                prev->next = next;
305            else
306                tags->hashtab[h] = next;
307            MemFree(p);
308            return;
309        }
310        prev = p;
311    }
312}
313
314static void emptyHash( TidyTagImpl* tags )
315{
316    uint i;
317    DictHash *prev, *next;
318
319    for (i = 0; i < ELEMENT_HASH_SIZE; ++i)
320    {
321        prev = NULL;
322        next = tags->hashtab[i];
323
324        while(next)
325        {
326            prev = next->next;
327            MemFree(next);
328            next = prev;
329        }
330
331        tags->hashtab[i] = NULL;
332    }
333}
334#endif /* ELEMENT_HASH_LOOKUP */
335
336static const Dict* lookup( TidyTagImpl* tags, ctmbstr s )
337{
338    const Dict *np;
339#if ELEMENT_HASH_LOOKUP
340    const DictHash* p;
341#endif
342
343    if (!s)
344        return NULL;
345
346#if ELEMENT_HASH_LOOKUP
347    /* this breaks if declared elements get changed between two   */
348    /* parser runs since Tidy would use the cached version rather */
349    /* than the new one.                                          */
350    /* However, as FreeDeclaredTags() correctly cleans the hash   */
351    /* this should not be true anymore.                           */
352    for (p = tags->hashtab[hash(s)]; p && p->tag; p = p->next)
353        if (TY_(tmbstrcmp)(s, p->tag->name) == 0)
354            return p->tag;
355
356    for (np = tag_defs + 1; np < tag_defs + N_TIDY_TAGS; ++np)
357        if (TY_(tmbstrcmp)(s, np->name) == 0)
358            return install(tags, np);
359
360    for (np = tags->declared_tag_list; np; np = np->next)
361        if (TY_(tmbstrcmp)(s, np->name) == 0)
362            return install(tags, np);
363#else
364
365    for (np = tag_defs + 1; np < tag_defs + N_TIDY_TAGS; ++np)
366        if (TY_(tmbstrcmp)(s, np->name) == 0)
367            return np;
368
369    for (np = tags->declared_tag_list; np; np = np->next)
370        if (TY_(tmbstrcmp)(s, np->name) == 0)
371            return np;
372
373#endif /* ELEMENT_HASH_LOOKUP */
374
375    return NULL;
376}
377
378
379static void declare( TidyTagImpl* tags,
380                     ctmbstr name, uint versions, uint model,
381                     Parser *parser, CheckAttribs *chkattrs )
382{
383    if ( name )
384    {
385        Dict* np = (Dict*) lookup( tags, name );
386        if ( np == NULL )
387        {
388            np = (Dict*) MemAlloc( sizeof(Dict) );
389            ClearMemory( np, sizeof(Dict) );
390
391            np->name = TY_(tmbstrdup)( name );
392            np->next = tags->declared_tag_list;
393            tags->declared_tag_list = np;
394        }
395
396        /* Make sure we are not over-writing predefined tags */
397        if ( np->id == TidyTag_UNKNOWN )
398        {
399          np->versions = versions;
400          np->model   |= model;
401          np->parser   = parser;
402          np->chkattrs = chkattrs;
403          np->attrvers = NULL;
404        }
405    }
406}
407
408/* public interface for finding tag by name */
409Bool TY_(FindTag)( TidyDocImpl* doc, Node *node )
410{
411    const Dict *np = NULL;
412    if ( cfgBool(doc, TidyXmlTags) )
413    {
414        node->tag = doc->tags.xml_tags;
415        return yes;
416    }
417
418    if ( node->element && (np = lookup(&doc->tags, node->element)) )
419    {
420        node->tag = np;
421        return yes;
422    }
423
424    return no;
425}
426
427const Dict* TY_(LookupTagDef)( TidyTagId tid )
428{
429    const Dict *np;
430
431    for (np = tag_defs + 1; np < tag_defs + N_TIDY_TAGS; ++np )
432        if (np->id == tid)
433            return np;
434
435    return NULL;
436}
437
438Parser* TY_(FindParser)( TidyDocImpl* doc, Node *node )
439{
440    const Dict* np = lookup( &doc->tags, node->element );
441    if ( np )
442        return np->parser;
443    return NULL;
444}
445
446void TY_(DefineTag)( TidyDocImpl* doc, UserTagType tagType, ctmbstr name )
447{
448    Parser* parser = NULL;
449    uint cm = 0;
450    uint vers = VERS_PROPRIETARY;
451
452    switch (tagType)
453    {
454    case tagtype_empty:
455        cm = CM_EMPTY|CM_NO_INDENT|CM_NEW;
456        parser = TY_(ParseBlock);
457        break;
458
459    case tagtype_inline:
460        cm = CM_INLINE|CM_NO_INDENT|CM_NEW;
461        parser = TY_(ParseInline);
462        break;
463
464    case tagtype_block:
465        cm = CM_BLOCK|CM_NO_INDENT|CM_NEW;
466        parser = TY_(ParseBlock);
467        break;
468
469    case tagtype_pre:
470        cm = CM_BLOCK|CM_NO_INDENT|CM_NEW;
471        parser = TY_(ParsePre);
472        break;
473
474    case tagtype_null:
475        break;
476    }
477    if ( cm && parser )
478        declare( &doc->tags, name, vers, cm, parser, NULL );
479}
480
481TidyIterator   TY_(GetDeclaredTagList)( TidyDocImpl* doc )
482{
483    return (TidyIterator) doc->tags.declared_tag_list;
484}
485
486ctmbstr        TY_(GetNextDeclaredTag)( TidyDocImpl* ARG_UNUSED(doc),
487                                        UserTagType tagType, TidyIterator* iter )
488{
489    ctmbstr name = NULL;
490    Dict* curr;
491    for ( curr = (Dict*) *iter; name == NULL && curr != NULL; curr = curr->next )
492    {
493        switch ( tagType )
494        {
495        case tagtype_empty:
496            if ( (curr->model & CM_EMPTY) != 0 )
497                name = curr->name;
498            break;
499
500        case tagtype_inline:
501            if ( (curr->model & CM_INLINE) != 0 )
502                name = curr->name;
503            break;
504
505        case tagtype_block:
506            if ( (curr->model & CM_BLOCK) != 0 &&
507                 curr->parser == TY_(ParseBlock) )
508                name = curr->name;
509            break;
510
511        case tagtype_pre:
512            if ( (curr->model & CM_BLOCK) != 0 &&
513                 curr->parser == TY_(ParsePre) )
514                name = curr->name;
515            break;
516
517        case tagtype_null:
518            break;
519        }
520    }
521    *iter = (TidyIterator) curr;
522    return name;
523}
524
525void TY_(InitTags)( TidyDocImpl* doc )
526{
527    Dict* xml;
528    TidyTagImpl* tags = &doc->tags;
529
530    ClearMemory( tags, sizeof(TidyTagImpl) );
531
532    /* create dummy entry for all xml tags */
533    xml = (Dict*) MemAlloc( sizeof(Dict) );
534    ClearMemory( xml, sizeof(Dict) );
535    xml->name = NULL;
536    xml->versions = VERS_XML;
537    xml->model = CM_BLOCK;
538    xml->parser = NULL;
539    xml->chkattrs = NULL;
540    xml->attrvers = NULL;
541    tags->xml_tags = xml;
542}
543
544/* By default, zap all of them.  But allow
545** an single type to be specified.
546*/
547void TY_(FreeDeclaredTags)( TidyDocImpl* doc, UserTagType tagType )
548{
549    TidyTagImpl* tags = &doc->tags;
550    Dict *curr, *next = NULL, *prev = NULL;
551
552    for ( curr=tags->declared_tag_list; curr; curr = next )
553    {
554        Bool deleteIt = yes;
555        next = curr->next;
556        switch ( tagType )
557        {
558        case tagtype_empty:
559            deleteIt = ( curr->model & CM_EMPTY ) != 0;
560            break;
561
562        case tagtype_inline:
563            deleteIt = ( curr->model & CM_INLINE ) != 0;
564            break;
565
566        case tagtype_block:
567            deleteIt = ( (curr->model & CM_BLOCK) != 0 &&
568                         curr->parser == TY_(ParseBlock) );
569            break;
570
571        case tagtype_pre:
572            deleteIt = ( (curr->model & CM_BLOCK) != 0 &&
573                         curr->parser == TY_(ParsePre) );
574            break;
575
576        case tagtype_null:
577            break;
578        }
579
580        if ( deleteIt )
581        {
582#if ELEMENT_HASH_LOOKUP
583          removeFromHash( &doc->tags, curr->name );
584#endif
585          MemFree( curr->name );
586          MemFree( curr );
587          if ( prev )
588            prev->next = next;
589          else
590            tags->declared_tag_list = next;
591        }
592        else
593          prev = curr;
594    }
595}
596
597void TY_(FreeTags)( TidyDocImpl* doc )
598{
599    TidyTagImpl* tags = &doc->tags;
600
601#if ELEMENT_HASH_LOOKUP
602    emptyHash( tags );
603#endif
604    TY_(FreeDeclaredTags)( doc, tagtype_null );
605    MemFree( tags->xml_tags );
606
607    /* get rid of dangling tag references */
608    ClearMemory( tags, sizeof(TidyTagImpl) );
609}
610
611
612/* default method for checking an element's attributes */
613void TY_(CheckAttributes)( TidyDocImpl* doc, Node *node )
614{
615    AttVal *next, *attval = node->attributes;
616    while (attval)
617    {
618        next = attval->next;
619        TY_(CheckAttribute)( doc, node, attval );
620        attval = next;
621    }
622}
623
624/* methods for checking attributes for specific elements */
625
626void CheckIMG( TidyDocImpl* doc, Node *node )
627{
628    Bool HasAlt = TY_(AttrGetById)(node, TidyAttr_ALT) != NULL;
629    Bool HasSrc = TY_(AttrGetById)(node, TidyAttr_SRC) != NULL;
630    Bool HasUseMap = TY_(AttrGetById)(node, TidyAttr_USEMAP) != NULL;
631    Bool HasIsMap = TY_(AttrGetById)(node, TidyAttr_ISMAP) != NULL;
632    Bool HasDataFld = TY_(AttrGetById)(node, TidyAttr_DATAFLD) != NULL;
633
634    TY_(CheckAttributes)(doc, node);
635
636    if ( !HasAlt )
637    {
638        if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
639        {
640            doc->badAccess |= MISSING_IMAGE_ALT;
641            TY_(ReportMissingAttr)( doc, node, "alt" );
642        }
643
644        if ( cfgStr(doc, TidyAltText) )
645            TY_(AddAttribute)( doc, node, "alt", cfgStr(doc, TidyAltText) );
646    }
647
648    if ( !HasSrc && !HasDataFld )
649        TY_(ReportMissingAttr)( doc, node, "src" );
650
651    if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
652    {
653        if ( HasIsMap && !HasUseMap )
654            TY_(ReportAttrError)( doc, node, NULL, MISSING_IMAGEMAP);
655    }
656}
657
658void CheckCaption(TidyDocImpl* doc, Node *node)
659{
660    AttVal *attval;
661
662    TY_(CheckAttributes)(doc, node);
663
664    attval = TY_(AttrGetById)(node, TidyAttr_ALIGN);
665
666    if (!AttrHasValue(attval))
667        return;
668
669    if (AttrValueIs(attval, "left") || AttrValueIs(attval, "right"))
670        TY_(ConstrainVersion)(doc, VERS_HTML40_LOOSE);
671    else if (AttrValueIs(attval, "top") || AttrValueIs(attval, "bottom"))
672        TY_(ConstrainVersion)(doc, ~(VERS_HTML20|VERS_HTML32));
673    else
674        TY_(ReportAttrError)(doc, node, attval, BAD_ATTRIBUTE_VALUE);
675}
676
677void CheckHTML( TidyDocImpl* doc, Node *node )
678{
679    TY_(CheckAttributes)(doc, node);
680}
681
682void CheckAREA( TidyDocImpl* doc, Node *node )
683{
684    Bool HasAlt = TY_(AttrGetById)(node, TidyAttr_ALT) != NULL;
685    Bool HasHref = TY_(AttrGetById)(node, TidyAttr_HREF) != NULL;
686    Bool HasNohref = TY_(AttrGetById)(node, TidyAttr_NOHREF) != NULL;
687
688    TY_(CheckAttributes)(doc, node);
689
690    if ( !HasAlt )
691    {
692        if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
693        {
694            doc->badAccess |= MISSING_LINK_ALT;
695            TY_(ReportMissingAttr)( doc, node, "alt" );
696        }
697    }
698
699    if ( !HasHref && !HasNohref )
700        TY_(ReportMissingAttr)( doc, node, "href" );
701}
702
703void CheckTABLE( TidyDocImpl* doc, Node *node )
704{
705    AttVal* attval;
706    Bool HasSummary = TY_(AttrGetById)(node, TidyAttr_SUMMARY) != NULL;
707
708    TY_(CheckAttributes)(doc, node);
709
710    /* a missing summary attribute is bad accessibility, no matter
711       what HTML version is involved; a document without is valid */
712    if (cfg(doc, TidyAccessibilityCheckLevel) == 0)
713    {
714        if (!HasSummary)
715        {
716            doc->badAccess |= MISSING_SUMMARY;
717            TY_(ReportMissingAttr)( doc, node, "summary");
718        }
719    }
720
721    /* convert <table border> to <table border="1"> */
722    if ( cfgBool(doc, TidyXmlOut) && (attval = TY_(AttrGetById)(node, TidyAttr_BORDER)) )
723    {
724        if (attval->value == NULL)
725            attval->value = TY_(tmbstrdup)("1");
726    }
727}
728
729/* add missing type attribute when appropriate */
730void CheckSCRIPT( TidyDocImpl* doc, Node *node )
731{
732    AttVal *lang, *type;
733    char buf[16];
734
735    TY_(CheckAttributes)(doc, node);
736
737    lang = TY_(AttrGetById)(node, TidyAttr_LANGUAGE);
738    type = TY_(AttrGetById)(node, TidyAttr_TYPE);
739
740    if (!type)
741    {
742        /* check for javascript */
743        if (lang)
744        {
745            /* Test #696799. lang->value can be NULL. */
746            buf[0] = '\0';
747            TY_(tmbstrncpy)(buf, lang->value, sizeof(buf));
748            buf[10] = '\0';
749
750            if (TY_(tmbstrncasecmp)(buf, "javascript", 10) == 0 ||
751                 TY_(tmbstrncasecmp)(buf,   "jscript",  7) == 0)
752            {
753                TY_(AddAttribute)(doc, node, "type", "text/javascript");
754            }
755            else if (TY_(tmbstrcasecmp)(buf, "vbscript") == 0)
756            {
757                /* per Randy Waki 8/6/01 */
758                TY_(AddAttribute)(doc, node, "type", "text/vbscript");
759            }
760        }
761        else
762        {
763            TY_(AddAttribute)(doc, node, "type", "text/javascript");
764        }
765
766        type = TY_(AttrGetById)(node, TidyAttr_TYPE);
767
768        if (type != NULL)
769        {
770            TY_(ReportAttrError)(doc, node, type, INSERTING_ATTRIBUTE);
771        }
772        else
773        {
774            TY_(ReportMissingAttr)(doc, node, "type");
775        }
776    }
777}
778
779
780/* add missing type attribute when appropriate */
781void CheckSTYLE( TidyDocImpl* doc, Node *node )
782{
783    AttVal *type = TY_(AttrGetById)(node, TidyAttr_TYPE);
784
785    TY_(CheckAttributes)( doc, node );
786
787    if ( !type || !type->value || !TY_(tmbstrlen)(type->value) )
788    {
789        type = TY_(RepairAttrValue)(doc, node, "type", "text/css");
790        TY_(ReportAttrError)( doc, node, type, INSERTING_ATTRIBUTE );
791    }
792}
793
794/* add missing type attribute when appropriate */
795void CheckLINK( TidyDocImpl* doc, Node *node )
796{
797    AttVal *rel = TY_(AttrGetById)(node, TidyAttr_REL);
798
799    TY_(CheckAttributes)( doc, node );
800
801    /* todo: <link rel="alternate stylesheet"> */
802    if (AttrValueIs(rel, "stylesheet"))
803    {
804        AttVal *type = TY_(AttrGetById)(node, TidyAttr_TYPE);
805        if (!type)
806        {
807            TY_(AddAttribute)( doc, node, "type", "text/css" );
808            type = TY_(AttrGetById)(node, TidyAttr_TYPE);
809            TY_(ReportAttrError)( doc, node, type, INSERTING_ATTRIBUTE );
810        }
811    }
812}
813
814/* reports missing action attribute */
815void CheckFORM( TidyDocImpl* doc, Node *node )
816{
817    AttVal *action = TY_(AttrGetById)(node, TidyAttr_ACTION);
818
819    TY_(CheckAttributes)(doc, node);
820
821    if (!action)
822        TY_(ReportMissingAttr)(doc, node, "action");
823}
824
825/* reports missing content attribute */
826void CheckMETA( TidyDocImpl* doc, Node *node )
827{
828    AttVal *content = TY_(AttrGetById)(node, TidyAttr_CONTENT);
829
830    TY_(CheckAttributes)(doc, node);
831
832    if (!content)
833        TY_(ReportMissingAttr)( doc, node, "content" );
834    /* name or http-equiv attribute must also be set */
835}
836
837
838Bool TY_(nodeIsText)( Node* node )
839{
840  return ( node && node->type == TextNode );
841}
842
843Bool TY_(nodeHasText)( TidyDocImpl* doc, Node* node )
844{
845  if ( doc && node )
846  {
847    uint ix;
848    Lexer* lexer = doc->lexer;
849    for ( ix = node->start; ix < node->end; ++ix )
850    {
851        /* whitespace */
852        if ( !TY_(IsWhite)( lexer->lexbuf[ix] ) )
853            return yes;
854    }
855  }
856  return no;
857}
858
859Bool TY_(nodeIsElement)( Node* node )
860{
861  return ( node &&
862           (node->type == StartTag || node->type == StartEndTag) );
863}
864
865#if 0
866/* Compare & result to operand.  If equal, then all bits
867** requested are set.
868*/
869Bool nodeMatchCM( Node* node, uint contentModel )
870{
871  return ( node && node->tag &&
872           (node->tag->model & contentModel) == contentModel );
873}
874#endif
875
876/* True if any of the bits requested are set.
877*/
878Bool TY_(nodeHasCM)( Node* node, uint contentModel )
879{
880  return ( node && node->tag &&
881           (node->tag->model & contentModel) != 0 );
882}
883
884Bool TY_(nodeCMIsBlock)( Node* node )
885{
886  return TY_(nodeHasCM)( node, CM_BLOCK );
887}
888Bool TY_(nodeCMIsInline)( Node* node )
889{
890  return TY_(nodeHasCM)( node, CM_INLINE );
891}
892Bool TY_(nodeCMIsEmpty)( Node* node )
893{
894  return TY_(nodeHasCM)( node, CM_EMPTY );
895}
896
897Bool TY_(nodeIsHeader)( Node* node )
898{
899    TidyTagId tid = TagId( node  );
900    return ( tid &&
901             tid == TidyTag_H1 ||
902             tid == TidyTag_H2 ||
903             tid == TidyTag_H3 ||
904             tid == TidyTag_H4 ||
905             tid == TidyTag_H5 ||
906             tid == TidyTag_H6 );
907}
908
909uint TY_(nodeHeaderLevel)( Node* node )
910{
911    TidyTagId tid = TagId( node  );
912    switch ( tid )
913    {
914    case TidyTag_H1:
915        return 1;
916    case TidyTag_H2:
917        return 2;
918    case TidyTag_H3:
919        return 3;
920    case TidyTag_H4:
921        return 4;
922    case TidyTag_H5:
923        return 5;
924    case TidyTag_H6:
925        return 6;
926    default:
927    {
928        /* fall through */
929    }
930    }
931    return 0;
932}
933
934/*
935 * local variables:
936 * mode: c
937 * indent-tabs-mode: nil
938 * c-basic-offset: 4
939 * eval: (c-set-offset 'substatement-open 0)
940 * end:
941 */
942