1/* tags.c -- recognize HTML tags 2 3 (c) 1998-2006 (W3C) MIT, ERCIM, Keio University 4 See tidy.h for the copyright notice. 5 6 CVS Info : 7 8 $Author$ 9 $Date$ 10 $Revision$ 11 12 The HTML tags are stored as 8 bit ASCII strings. 13 14*/ 15 16#include "tidy-int.h" 17#include "message.h" 18#include "tmbstr.h" 19 20/* Attribute checking methods */ 21static CheckAttribs CheckIMG; 22static CheckAttribs CheckLINK; 23static CheckAttribs CheckAREA; 24static CheckAttribs CheckTABLE; 25static CheckAttribs CheckCaption; 26static CheckAttribs CheckSCRIPT; 27static CheckAttribs CheckSTYLE; 28static CheckAttribs CheckHTML; 29static CheckAttribs CheckFORM; 30static CheckAttribs CheckMETA; 31 32#define VERS_ELEM_A (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 33#define VERS_ELEM_ABBR (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 34#define VERS_ELEM_ACRONYM (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 35#define VERS_ELEM_ADDRESS (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 36#define VERS_ELEM_APPLET (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx) 37#define VERS_ELEM_AREA (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 38#define VERS_ELEM_B (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 39#define VERS_ELEM_BASE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 40#define VERS_ELEM_BASEFONT (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx) 41#define VERS_ELEM_BDO (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 42#define VERS_ELEM_BIG (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 43#define VERS_ELEM_BLOCKQUOTE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 44#define VERS_ELEM_BODY (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 45#define VERS_ELEM_BR (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 46#define VERS_ELEM_BUTTON (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 47#define VERS_ELEM_CAPTION (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 48#define VERS_ELEM_CENTER (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx) 49#define VERS_ELEM_CITE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 50#define VERS_ELEM_CODE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 51#define VERS_ELEM_COL (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 52#define VERS_ELEM_COLGROUP (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 53#define VERS_ELEM_DD (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 54#define VERS_ELEM_DEL (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 55#define VERS_ELEM_DFN (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 56#define VERS_ELEM_DIR (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx) 57#define VERS_ELEM_DIV (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 58#define VERS_ELEM_DL (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 59#define VERS_ELEM_DT (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 60#define VERS_ELEM_EM (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 61#define VERS_ELEM_FIELDSET (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 62#define VERS_ELEM_FONT (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx) 63#define VERS_ELEM_FORM (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 64#define VERS_ELEM_FRAME (xxxx|xxxx|xxxx|xxxx|xxxx|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx) 65#define VERS_ELEM_FRAMESET (xxxx|xxxx|xxxx|xxxx|xxxx|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx) 66#define VERS_ELEM_H1 (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 67#define VERS_ELEM_H2 (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 68#define VERS_ELEM_H3 (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 69#define VERS_ELEM_H4 (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 70#define VERS_ELEM_H5 (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 71#define VERS_ELEM_H6 (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 72#define VERS_ELEM_HEAD (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 73#define VERS_ELEM_HR (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 74#define VERS_ELEM_HTML (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 75#define VERS_ELEM_I (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 76#define VERS_ELEM_IFRAME (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx) 77#define VERS_ELEM_IMG (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 78#define VERS_ELEM_INPUT (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 79#define VERS_ELEM_INS (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 80#define VERS_ELEM_ISINDEX (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx) 81#define VERS_ELEM_KBD (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 82#define VERS_ELEM_LABEL (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 83#define VERS_ELEM_LEGEND (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 84#define VERS_ELEM_LI (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 85#define VERS_ELEM_LINK (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 86#define VERS_ELEM_LISTING (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx) 87#define VERS_ELEM_MAP (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 88#define VERS_ELEM_MENU (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx) 89#define VERS_ELEM_META (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 90#define VERS_ELEM_NEXTID (HT20|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx) 91#define VERS_ELEM_NOFRAMES (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx) 92#define VERS_ELEM_NOSCRIPT (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 93#define VERS_ELEM_OBJECT (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 94#define VERS_ELEM_OL (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 95#define VERS_ELEM_OPTGROUP (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 96#define VERS_ELEM_OPTION (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 97#define VERS_ELEM_P (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 98#define VERS_ELEM_PARAM (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 99#define VERS_ELEM_PLAINTEXT (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx) 100#define VERS_ELEM_PRE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 101#define VERS_ELEM_Q (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 102#define VERS_ELEM_RB (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx) 103#define VERS_ELEM_RBC (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx) 104#define VERS_ELEM_RP (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx) 105#define VERS_ELEM_RT (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx) 106#define VERS_ELEM_RTC (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx) 107#define VERS_ELEM_RUBY (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx) 108#define VERS_ELEM_S (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx) 109#define VERS_ELEM_SAMP (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 110#define VERS_ELEM_SCRIPT (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 111#define VERS_ELEM_SELECT (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 112#define VERS_ELEM_SMALL (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 113#define VERS_ELEM_SPAN (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 114#define VERS_ELEM_STRIKE (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx) 115#define VERS_ELEM_STRONG (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 116#define VERS_ELEM_STYLE (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 117#define VERS_ELEM_SUB (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 118#define VERS_ELEM_SUP (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 119#define VERS_ELEM_TABLE (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 120#define VERS_ELEM_TBODY (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 121#define VERS_ELEM_TD (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 122#define VERS_ELEM_TEXTAREA (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 123#define VERS_ELEM_TFOOT (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 124#define VERS_ELEM_TH (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 125#define VERS_ELEM_THEAD (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 126#define VERS_ELEM_TITLE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 127#define VERS_ELEM_TR (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 128#define VERS_ELEM_TT (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx) 129#define VERS_ELEM_U (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx) 130#define VERS_ELEM_UL (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 131#define VERS_ELEM_VAR (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10) 132#define VERS_ELEM_XMP (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx) 133 134static const Dict tag_defs[] = 135{ 136 { TidyTag_UNKNOWN, "unknown!", VERS_UNKNOWN, NULL, (0), NULL, NULL }, 137 138 /* W3C defined elements */ 139 { TidyTag_A, "a", VERS_ELEM_A, &TY_(W3CAttrsFor_A)[0], (CM_INLINE), TY_(ParseInline), NULL }, 140 { TidyTag_ABBR, "abbr", VERS_ELEM_ABBR, &TY_(W3CAttrsFor_ABBR)[0], (CM_INLINE), TY_(ParseInline), NULL }, 141 { TidyTag_ACRONYM, "acronym", VERS_ELEM_ACRONYM, &TY_(W3CAttrsFor_ACRONYM)[0], (CM_INLINE), TY_(ParseInline), NULL }, 142 { TidyTag_ADDRESS, "address", VERS_ELEM_ADDRESS, &TY_(W3CAttrsFor_ADDRESS)[0], (CM_BLOCK), TY_(ParseInline), NULL }, 143 { TidyTag_APPLET, "applet", VERS_ELEM_APPLET, &TY_(W3CAttrsFor_APPLET)[0], (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock), NULL }, 144 { TidyTag_AREA, "area", VERS_ELEM_AREA, &TY_(W3CAttrsFor_AREA)[0], (CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), CheckAREA }, 145 { TidyTag_B, "b", VERS_ELEM_B, &TY_(W3CAttrsFor_B)[0], (CM_INLINE), TY_(ParseInline), NULL }, 146 { TidyTag_BASE, "base", VERS_ELEM_BASE, &TY_(W3CAttrsFor_BASE)[0], (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), NULL }, 147 { TidyTag_BASEFONT, "basefont", VERS_ELEM_BASEFONT, &TY_(W3CAttrsFor_BASEFONT)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL }, 148 { TidyTag_BDO, "bdo", VERS_ELEM_BDO, &TY_(W3CAttrsFor_BDO)[0], (CM_INLINE), TY_(ParseInline), NULL }, 149 { TidyTag_BIG, "big", VERS_ELEM_BIG, &TY_(W3CAttrsFor_BIG)[0], (CM_INLINE), TY_(ParseInline), NULL }, 150 { TidyTag_BLOCKQUOTE, "blockquote", VERS_ELEM_BLOCKQUOTE, &TY_(W3CAttrsFor_BLOCKQUOTE)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, 151 { TidyTag_BODY, "body", VERS_ELEM_BODY, &TY_(W3CAttrsFor_BODY)[0], (CM_HTML|CM_OPT|CM_OMITST), TY_(ParseBody), NULL }, 152 { TidyTag_BR, "br", VERS_ELEM_BR, &TY_(W3CAttrsFor_BR)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL }, 153 { TidyTag_BUTTON, "button", VERS_ELEM_BUTTON, &TY_(W3CAttrsFor_BUTTON)[0], (CM_INLINE), TY_(ParseBlock), NULL }, 154 { TidyTag_CAPTION, "caption", VERS_ELEM_CAPTION, &TY_(W3CAttrsFor_CAPTION)[0], (CM_TABLE), TY_(ParseInline), CheckCaption }, 155 { TidyTag_CENTER, "center", VERS_ELEM_CENTER, &TY_(W3CAttrsFor_CENTER)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, 156 { TidyTag_CITE, "cite", VERS_ELEM_CITE, &TY_(W3CAttrsFor_CITE)[0], (CM_INLINE), TY_(ParseInline), NULL }, 157 { TidyTag_CODE, "code", VERS_ELEM_CODE, &TY_(W3CAttrsFor_CODE)[0], (CM_INLINE), TY_(ParseInline), NULL }, 158 { TidyTag_COL, "col", VERS_ELEM_COL, &TY_(W3CAttrsFor_COL)[0], (CM_TABLE|CM_EMPTY), TY_(ParseEmpty), NULL }, 159 { TidyTag_COLGROUP, "colgroup", VERS_ELEM_COLGROUP, &TY_(W3CAttrsFor_COLGROUP)[0], (CM_TABLE|CM_OPT), TY_(ParseColGroup), NULL }, 160 { TidyTag_DD, "dd", VERS_ELEM_DD, &TY_(W3CAttrsFor_DD)[0], (CM_DEFLIST|CM_OPT|CM_NO_INDENT), TY_(ParseBlock), NULL }, 161 { TidyTag_DEL, "del", VERS_ELEM_DEL, &TY_(W3CAttrsFor_DEL)[0], (CM_INLINE|CM_BLOCK|CM_MIXED), TY_(ParseInline), NULL }, 162 { TidyTag_DFN, "dfn", VERS_ELEM_DFN, &TY_(W3CAttrsFor_DFN)[0], (CM_INLINE), TY_(ParseInline), NULL }, 163 { TidyTag_DIR, "dir", VERS_ELEM_DIR, &TY_(W3CAttrsFor_DIR)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParseList), NULL }, 164 { TidyTag_DIV, "div", VERS_ELEM_DIV, &TY_(W3CAttrsFor_DIV)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, 165 { TidyTag_DL, "dl", VERS_ELEM_DL, &TY_(W3CAttrsFor_DL)[0], (CM_BLOCK), TY_(ParseDefList), NULL }, 166 { TidyTag_DT, "dt", VERS_ELEM_DT, &TY_(W3CAttrsFor_DT)[0], (CM_DEFLIST|CM_OPT|CM_NO_INDENT), TY_(ParseInline), NULL }, 167 { TidyTag_EM, "em", VERS_ELEM_EM, &TY_(W3CAttrsFor_EM)[0], (CM_INLINE), TY_(ParseInline), NULL }, 168 { TidyTag_FIELDSET, "fieldset", VERS_ELEM_FIELDSET, &TY_(W3CAttrsFor_FIELDSET)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, 169 { TidyTag_FONT, "font", VERS_ELEM_FONT, &TY_(W3CAttrsFor_FONT)[0], (CM_INLINE), TY_(ParseInline), NULL }, 170 { TidyTag_FORM, "form", VERS_ELEM_FORM, &TY_(W3CAttrsFor_FORM)[0], (CM_BLOCK), TY_(ParseBlock), CheckFORM }, 171 { TidyTag_FRAME, "frame", VERS_ELEM_FRAME, &TY_(W3CAttrsFor_FRAME)[0], (CM_FRAMES|CM_EMPTY), TY_(ParseEmpty), NULL }, 172 { TidyTag_FRAMESET, "frameset", VERS_ELEM_FRAMESET, &TY_(W3CAttrsFor_FRAMESET)[0], (CM_HTML|CM_FRAMES), TY_(ParseFrameSet), NULL }, 173 { TidyTag_H1, "h1", VERS_ELEM_H1, &TY_(W3CAttrsFor_H1)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL }, 174 { TidyTag_H2, "h2", VERS_ELEM_H2, &TY_(W3CAttrsFor_H2)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL }, 175 { TidyTag_H3, "h3", VERS_ELEM_H3, &TY_(W3CAttrsFor_H3)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL }, 176 { TidyTag_H4, "h4", VERS_ELEM_H4, &TY_(W3CAttrsFor_H4)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL }, 177 { TidyTag_H5, "h5", VERS_ELEM_H5, &TY_(W3CAttrsFor_H5)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL }, 178 { TidyTag_H6, "h6", VERS_ELEM_H6, &TY_(W3CAttrsFor_H6)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL }, 179 { TidyTag_HEAD, "head", VERS_ELEM_HEAD, &TY_(W3CAttrsFor_HEAD)[0], (CM_HTML|CM_OPT|CM_OMITST), TY_(ParseHead), NULL }, 180 { TidyTag_HR, "hr", VERS_ELEM_HR, &TY_(W3CAttrsFor_HR)[0], (CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), NULL }, 181 { TidyTag_HTML, "html", VERS_ELEM_HTML, &TY_(W3CAttrsFor_HTML)[0], (CM_HTML|CM_OPT|CM_OMITST), TY_(ParseHTML), CheckHTML }, 182 { TidyTag_I, "i", VERS_ELEM_I, &TY_(W3CAttrsFor_I)[0], (CM_INLINE), TY_(ParseInline), NULL }, 183 { TidyTag_IFRAME, "iframe", VERS_ELEM_IFRAME, &TY_(W3CAttrsFor_IFRAME)[0], (CM_INLINE), TY_(ParseBlock), NULL }, 184 { TidyTag_IMG, "img", VERS_ELEM_IMG, &TY_(W3CAttrsFor_IMG)[0], (CM_INLINE|CM_IMG|CM_EMPTY), TY_(ParseEmpty), CheckIMG }, 185 { TidyTag_INPUT, "input", VERS_ELEM_INPUT, &TY_(W3CAttrsFor_INPUT)[0], (CM_INLINE|CM_IMG|CM_EMPTY), TY_(ParseEmpty), NULL }, 186 { TidyTag_INS, "ins", VERS_ELEM_INS, &TY_(W3CAttrsFor_INS)[0], (CM_INLINE|CM_BLOCK|CM_MIXED), TY_(ParseInline), NULL }, 187 { TidyTag_ISINDEX, "isindex", VERS_ELEM_ISINDEX, &TY_(W3CAttrsFor_ISINDEX)[0], (CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), NULL }, 188 { TidyTag_KBD, "kbd", VERS_ELEM_KBD, &TY_(W3CAttrsFor_KBD)[0], (CM_INLINE), TY_(ParseInline), NULL }, 189 { TidyTag_LABEL, "label", VERS_ELEM_LABEL, &TY_(W3CAttrsFor_LABEL)[0], (CM_INLINE), TY_(ParseInline), NULL }, 190 { TidyTag_LEGEND, "legend", VERS_ELEM_LEGEND, &TY_(W3CAttrsFor_LEGEND)[0], (CM_INLINE), TY_(ParseInline), NULL }, 191 { TidyTag_LI, "li", VERS_ELEM_LI, &TY_(W3CAttrsFor_LI)[0], (CM_LIST|CM_OPT|CM_NO_INDENT), TY_(ParseBlock), NULL }, 192 { TidyTag_LINK, "link", VERS_ELEM_LINK, &TY_(W3CAttrsFor_LINK)[0], (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), CheckLINK }, 193 { TidyTag_LISTING, "listing", VERS_ELEM_LISTING, &TY_(W3CAttrsFor_LISTING)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParsePre), NULL }, 194 { TidyTag_MAP, "map", VERS_ELEM_MAP, &TY_(W3CAttrsFor_MAP)[0], (CM_INLINE), TY_(ParseBlock), NULL }, 195 { TidyTag_MENU, "menu", VERS_ELEM_MENU, &TY_(W3CAttrsFor_MENU)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParseList), NULL }, 196 { TidyTag_META, "meta", VERS_ELEM_META, &TY_(W3CAttrsFor_META)[0], (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), CheckMETA }, 197 { TidyTag_NOFRAMES, "noframes", VERS_ELEM_NOFRAMES, &TY_(W3CAttrsFor_NOFRAMES)[0], (CM_BLOCK|CM_FRAMES), TY_(ParseNoFrames), NULL }, 198 { TidyTag_NOSCRIPT, "noscript", VERS_ELEM_NOSCRIPT, &TY_(W3CAttrsFor_NOSCRIPT)[0], (CM_BLOCK|CM_INLINE|CM_MIXED), TY_(ParseBlock), NULL }, 199 { TidyTag_OBJECT, "object", VERS_ELEM_OBJECT, &TY_(W3CAttrsFor_OBJECT)[0], (CM_OBJECT|CM_HEAD|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock), NULL }, 200 { TidyTag_OL, "ol", VERS_ELEM_OL, &TY_(W3CAttrsFor_OL)[0], (CM_BLOCK), TY_(ParseList), NULL }, 201 { TidyTag_OPTGROUP, "optgroup", VERS_ELEM_OPTGROUP, &TY_(W3CAttrsFor_OPTGROUP)[0], (CM_FIELD|CM_OPT), TY_(ParseOptGroup), NULL }, 202 { TidyTag_OPTION, "option", VERS_ELEM_OPTION, &TY_(W3CAttrsFor_OPTION)[0], (CM_FIELD|CM_OPT), TY_(ParseText), NULL }, 203 { TidyTag_P, "p", VERS_ELEM_P, &TY_(W3CAttrsFor_P)[0], (CM_BLOCK|CM_OPT), TY_(ParseInline), NULL }, 204 { TidyTag_PARAM, "param", VERS_ELEM_PARAM, &TY_(W3CAttrsFor_PARAM)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL }, 205 { TidyTag_PLAINTEXT, "plaintext", VERS_ELEM_PLAINTEXT, &TY_(W3CAttrsFor_PLAINTEXT)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParsePre), NULL }, 206 { TidyTag_PRE, "pre", VERS_ELEM_PRE, &TY_(W3CAttrsFor_PRE)[0], (CM_BLOCK), TY_(ParsePre), NULL }, 207 { TidyTag_Q, "q", VERS_ELEM_Q, &TY_(W3CAttrsFor_Q)[0], (CM_INLINE), TY_(ParseInline), NULL }, 208 { TidyTag_RB, "rb", VERS_ELEM_RB, &TY_(W3CAttrsFor_RB)[0], (CM_INLINE), TY_(ParseInline), NULL }, 209 { TidyTag_RBC, "rbc", VERS_ELEM_RBC, &TY_(W3CAttrsFor_RBC)[0], (CM_INLINE), TY_(ParseInline), NULL }, 210 { TidyTag_RP, "rp", VERS_ELEM_RP, &TY_(W3CAttrsFor_RP)[0], (CM_INLINE), TY_(ParseInline), NULL }, 211 { TidyTag_RT, "rt", VERS_ELEM_RT, &TY_(W3CAttrsFor_RT)[0], (CM_INLINE), TY_(ParseInline), NULL }, 212 { TidyTag_RTC, "rtc", VERS_ELEM_RTC, &TY_(W3CAttrsFor_RTC)[0], (CM_INLINE), TY_(ParseInline), NULL }, 213 { TidyTag_RUBY, "ruby", VERS_ELEM_RUBY, &TY_(W3CAttrsFor_RUBY)[0], (CM_INLINE), TY_(ParseInline), NULL }, 214 { TidyTag_S, "s", VERS_ELEM_S, &TY_(W3CAttrsFor_S)[0], (CM_INLINE), TY_(ParseInline), NULL }, 215 { TidyTag_SAMP, "samp", VERS_ELEM_SAMP, &TY_(W3CAttrsFor_SAMP)[0], (CM_INLINE), TY_(ParseInline), NULL }, 216 { TidyTag_SCRIPT, "script", VERS_ELEM_SCRIPT, &TY_(W3CAttrsFor_SCRIPT)[0], (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE), TY_(ParseScript), CheckSCRIPT }, 217 { TidyTag_SELECT, "select", VERS_ELEM_SELECT, &TY_(W3CAttrsFor_SELECT)[0], (CM_INLINE|CM_FIELD), TY_(ParseSelect), NULL }, 218 { TidyTag_SMALL, "small", VERS_ELEM_SMALL, &TY_(W3CAttrsFor_SMALL)[0], (CM_INLINE), TY_(ParseInline), NULL }, 219 { TidyTag_SPAN, "span", VERS_ELEM_SPAN, &TY_(W3CAttrsFor_SPAN)[0], (CM_INLINE), TY_(ParseInline), NULL }, 220 { TidyTag_STRIKE, "strike", VERS_ELEM_STRIKE, &TY_(W3CAttrsFor_STRIKE)[0], (CM_INLINE), TY_(ParseInline), NULL }, 221 { TidyTag_STRONG, "strong", VERS_ELEM_STRONG, &TY_(W3CAttrsFor_STRONG)[0], (CM_INLINE), TY_(ParseInline), NULL }, 222 { TidyTag_STYLE, "style", VERS_ELEM_STYLE, &TY_(W3CAttrsFor_STYLE)[0], (CM_HEAD), TY_(ParseScript), CheckSTYLE }, 223 { TidyTag_SUB, "sub", VERS_ELEM_SUB, &TY_(W3CAttrsFor_SUB)[0], (CM_INLINE), TY_(ParseInline), NULL }, 224 { TidyTag_SUP, "sup", VERS_ELEM_SUP, &TY_(W3CAttrsFor_SUP)[0], (CM_INLINE), TY_(ParseInline), NULL }, 225 { TidyTag_TABLE, "table", VERS_ELEM_TABLE, &TY_(W3CAttrsFor_TABLE)[0], (CM_BLOCK), TY_(ParseTableTag), CheckTABLE }, 226 { TidyTag_TBODY, "tbody", VERS_ELEM_TBODY, &TY_(W3CAttrsFor_TBODY)[0], (CM_TABLE|CM_ROWGRP|CM_OPT), TY_(ParseRowGroup), NULL }, 227 { TidyTag_TD, "td", VERS_ELEM_TD, &TY_(W3CAttrsFor_TD)[0], (CM_ROW|CM_OPT|CM_NO_INDENT), TY_(ParseBlock), NULL }, 228 { TidyTag_TEXTAREA, "textarea", VERS_ELEM_TEXTAREA, &TY_(W3CAttrsFor_TEXTAREA)[0], (CM_INLINE|CM_FIELD), TY_(ParseText), NULL }, 229 { TidyTag_TFOOT, "tfoot", VERS_ELEM_TFOOT, &TY_(W3CAttrsFor_TFOOT)[0], (CM_TABLE|CM_ROWGRP|CM_OPT), TY_(ParseRowGroup), NULL }, 230 { TidyTag_TH, "th", VERS_ELEM_TH, &TY_(W3CAttrsFor_TH)[0], (CM_ROW|CM_OPT|CM_NO_INDENT), TY_(ParseBlock), NULL }, 231 { TidyTag_THEAD, "thead", VERS_ELEM_THEAD, &TY_(W3CAttrsFor_THEAD)[0], (CM_TABLE|CM_ROWGRP|CM_OPT), TY_(ParseRowGroup), NULL }, 232 { TidyTag_TITLE, "title", VERS_ELEM_TITLE, &TY_(W3CAttrsFor_TITLE)[0], (CM_HEAD), TY_(ParseTitle), NULL }, 233 { TidyTag_TR, "tr", VERS_ELEM_TR, &TY_(W3CAttrsFor_TR)[0], (CM_TABLE|CM_OPT), TY_(ParseRow), NULL }, 234 { TidyTag_TT, "tt", VERS_ELEM_TT, &TY_(W3CAttrsFor_TT)[0], (CM_INLINE), TY_(ParseInline), NULL }, 235 { TidyTag_U, "u", VERS_ELEM_U, &TY_(W3CAttrsFor_U)[0], (CM_INLINE), TY_(ParseInline), NULL }, 236 { TidyTag_UL, "ul", VERS_ELEM_UL, &TY_(W3CAttrsFor_UL)[0], (CM_BLOCK), TY_(ParseList), NULL }, 237 { TidyTag_VAR, "var", VERS_ELEM_VAR, &TY_(W3CAttrsFor_VAR)[0], (CM_INLINE), TY_(ParseInline), NULL }, 238 { TidyTag_XMP, "xmp", VERS_ELEM_XMP, &TY_(W3CAttrsFor_XMP)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParsePre), NULL }, 239 { TidyTag_NEXTID, "nextid", VERS_ELEM_NEXTID, &TY_(W3CAttrsFor_NEXTID)[0], (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), NULL }, 240 241 /* proprietary elements */ 242 { TidyTag_ALIGN, "align", VERS_NETSCAPE, NULL, (CM_BLOCK), TY_(ParseBlock), NULL }, 243 { TidyTag_BGSOUND, "bgsound", VERS_MICROSOFT, NULL, (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), NULL }, 244 { TidyTag_BLINK, "blink", VERS_PROPRIETARY, NULL, (CM_INLINE), TY_(ParseInline), NULL }, 245 { TidyTag_COMMENT, "comment", VERS_MICROSOFT, NULL, (CM_INLINE), TY_(ParseInline), NULL }, 246 { TidyTag_EMBED, "embed", VERS_NETSCAPE, NULL, (CM_INLINE|CM_IMG|CM_EMPTY), TY_(ParseEmpty), NULL }, 247 { TidyTag_ILAYER, "ilayer", VERS_NETSCAPE, NULL, (CM_INLINE), TY_(ParseInline), NULL }, 248 { TidyTag_KEYGEN, "keygen", VERS_NETSCAPE, NULL, (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL }, 249 { TidyTag_LAYER, "layer", VERS_NETSCAPE, NULL, (CM_BLOCK), TY_(ParseBlock), NULL }, 250 { TidyTag_MARQUEE, "marquee", VERS_MICROSOFT, NULL, (CM_INLINE|CM_OPT), TY_(ParseInline), NULL }, 251 { TidyTag_MULTICOL, "multicol", VERS_NETSCAPE, NULL, (CM_BLOCK), TY_(ParseBlock), NULL }, 252 { TidyTag_NOBR, "nobr", VERS_PROPRIETARY, NULL, (CM_INLINE), TY_(ParseInline), NULL }, 253 { TidyTag_NOEMBED, "noembed", VERS_NETSCAPE, NULL, (CM_INLINE), TY_(ParseInline), NULL }, 254 { TidyTag_NOLAYER, "nolayer", VERS_NETSCAPE, NULL, (CM_BLOCK|CM_INLINE|CM_MIXED), TY_(ParseBlock), NULL }, 255 { TidyTag_NOSAVE, "nosave", VERS_NETSCAPE, NULL, (CM_BLOCK), TY_(ParseBlock), NULL }, 256 { TidyTag_SERVER, "server", VERS_NETSCAPE, NULL, (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE), TY_(ParseScript), NULL }, 257 { TidyTag_SERVLET, "servlet", VERS_SUN, NULL, (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock), NULL }, 258 { TidyTag_SPACER, "spacer", VERS_NETSCAPE, NULL, (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL }, 259 { TidyTag_WBR, "wbr", VERS_PROPRIETARY, NULL, (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL }, 260 261 /* this must be the final entry */ 262 { (TidyTagId)0, NULL, 0, NULL, (0), NULL, NULL } 263}; 264 265#if ELEMENT_HASH_LOOKUP 266static uint hash(ctmbstr s) 267{ 268 uint hashval; 269 270 for (hashval = 0; *s != '\0'; s++) 271 hashval = *s + 31*hashval; 272 273 return hashval % ELEMENT_HASH_SIZE; 274} 275 276static const Dict *install(TidyTagImpl* tags, const Dict* old) 277{ 278 DictHash *np; 279 uint hashval; 280 281 if (old) 282 { 283 np = (DictHash *)MemAlloc(sizeof(*np)); 284 np->tag = old; 285 286 hashval = hash(old->name); 287 np->next = tags->hashtab[hashval]; 288 tags->hashtab[hashval] = np; 289 } 290 291 return old; 292} 293 294static void removeFromHash( TidyTagImpl* tags, ctmbstr s ) 295{ 296 uint h = hash(s); 297 DictHash *p, *prev = NULL; 298 for (p = tags->hashtab[h]; p && p->tag; p = p->next) 299 { 300 if (TY_(tmbstrcmp)(s, p->tag->name) == 0) 301 { 302 DictHash* next = p->next; 303 if ( prev ) 304 prev->next = next; 305 else 306 tags->hashtab[h] = next; 307 MemFree(p); 308 return; 309 } 310 prev = p; 311 } 312} 313 314static void emptyHash( TidyTagImpl* tags ) 315{ 316 uint i; 317 DictHash *prev, *next; 318 319 for (i = 0; i < ELEMENT_HASH_SIZE; ++i) 320 { 321 prev = NULL; 322 next = tags->hashtab[i]; 323 324 while(next) 325 { 326 prev = next->next; 327 MemFree(next); 328 next = prev; 329 } 330 331 tags->hashtab[i] = NULL; 332 } 333} 334#endif /* ELEMENT_HASH_LOOKUP */ 335 336static const Dict* lookup( TidyTagImpl* tags, ctmbstr s ) 337{ 338 const Dict *np; 339#if ELEMENT_HASH_LOOKUP 340 const DictHash* p; 341#endif 342 343 if (!s) 344 return NULL; 345 346#if ELEMENT_HASH_LOOKUP 347 /* this breaks if declared elements get changed between two */ 348 /* parser runs since Tidy would use the cached version rather */ 349 /* than the new one. */ 350 /* However, as FreeDeclaredTags() correctly cleans the hash */ 351 /* this should not be true anymore. */ 352 for (p = tags->hashtab[hash(s)]; p && p->tag; p = p->next) 353 if (TY_(tmbstrcmp)(s, p->tag->name) == 0) 354 return p->tag; 355 356 for (np = tag_defs + 1; np < tag_defs + N_TIDY_TAGS; ++np) 357 if (TY_(tmbstrcmp)(s, np->name) == 0) 358 return install(tags, np); 359 360 for (np = tags->declared_tag_list; np; np = np->next) 361 if (TY_(tmbstrcmp)(s, np->name) == 0) 362 return install(tags, np); 363#else 364 365 for (np = tag_defs + 1; np < tag_defs + N_TIDY_TAGS; ++np) 366 if (TY_(tmbstrcmp)(s, np->name) == 0) 367 return np; 368 369 for (np = tags->declared_tag_list; np; np = np->next) 370 if (TY_(tmbstrcmp)(s, np->name) == 0) 371 return np; 372 373#endif /* ELEMENT_HASH_LOOKUP */ 374 375 return NULL; 376} 377 378 379static void declare( TidyTagImpl* tags, 380 ctmbstr name, uint versions, uint model, 381 Parser *parser, CheckAttribs *chkattrs ) 382{ 383 if ( name ) 384 { 385 Dict* np = (Dict*) lookup( tags, name ); 386 if ( np == NULL ) 387 { 388 np = (Dict*) MemAlloc( sizeof(Dict) ); 389 ClearMemory( np, sizeof(Dict) ); 390 391 np->name = TY_(tmbstrdup)( name ); 392 np->next = tags->declared_tag_list; 393 tags->declared_tag_list = np; 394 } 395 396 /* Make sure we are not over-writing predefined tags */ 397 if ( np->id == TidyTag_UNKNOWN ) 398 { 399 np->versions = versions; 400 np->model |= model; 401 np->parser = parser; 402 np->chkattrs = chkattrs; 403 np->attrvers = NULL; 404 } 405 } 406} 407 408/* public interface for finding tag by name */ 409Bool TY_(FindTag)( TidyDocImpl* doc, Node *node ) 410{ 411 const Dict *np = NULL; 412 if ( cfgBool(doc, TidyXmlTags) ) 413 { 414 node->tag = doc->tags.xml_tags; 415 return yes; 416 } 417 418 if ( node->element && (np = lookup(&doc->tags, node->element)) ) 419 { 420 node->tag = np; 421 return yes; 422 } 423 424 return no; 425} 426 427const Dict* TY_(LookupTagDef)( TidyTagId tid ) 428{ 429 const Dict *np; 430 431 for (np = tag_defs + 1; np < tag_defs + N_TIDY_TAGS; ++np ) 432 if (np->id == tid) 433 return np; 434 435 return NULL; 436} 437 438Parser* TY_(FindParser)( TidyDocImpl* doc, Node *node ) 439{ 440 const Dict* np = lookup( &doc->tags, node->element ); 441 if ( np ) 442 return np->parser; 443 return NULL; 444} 445 446void TY_(DefineTag)( TidyDocImpl* doc, UserTagType tagType, ctmbstr name ) 447{ 448 Parser* parser = NULL; 449 uint cm = 0; 450 uint vers = VERS_PROPRIETARY; 451 452 switch (tagType) 453 { 454 case tagtype_empty: 455 cm = CM_EMPTY|CM_NO_INDENT|CM_NEW; 456 parser = TY_(ParseBlock); 457 break; 458 459 case tagtype_inline: 460 cm = CM_INLINE|CM_NO_INDENT|CM_NEW; 461 parser = TY_(ParseInline); 462 break; 463 464 case tagtype_block: 465 cm = CM_BLOCK|CM_NO_INDENT|CM_NEW; 466 parser = TY_(ParseBlock); 467 break; 468 469 case tagtype_pre: 470 cm = CM_BLOCK|CM_NO_INDENT|CM_NEW; 471 parser = TY_(ParsePre); 472 break; 473 474 case tagtype_null: 475 break; 476 } 477 if ( cm && parser ) 478 declare( &doc->tags, name, vers, cm, parser, NULL ); 479} 480 481TidyIterator TY_(GetDeclaredTagList)( TidyDocImpl* doc ) 482{ 483 return (TidyIterator) doc->tags.declared_tag_list; 484} 485 486ctmbstr TY_(GetNextDeclaredTag)( TidyDocImpl* ARG_UNUSED(doc), 487 UserTagType tagType, TidyIterator* iter ) 488{ 489 ctmbstr name = NULL; 490 Dict* curr; 491 for ( curr = (Dict*) *iter; name == NULL && curr != NULL; curr = curr->next ) 492 { 493 switch ( tagType ) 494 { 495 case tagtype_empty: 496 if ( (curr->model & CM_EMPTY) != 0 ) 497 name = curr->name; 498 break; 499 500 case tagtype_inline: 501 if ( (curr->model & CM_INLINE) != 0 ) 502 name = curr->name; 503 break; 504 505 case tagtype_block: 506 if ( (curr->model & CM_BLOCK) != 0 && 507 curr->parser == TY_(ParseBlock) ) 508 name = curr->name; 509 break; 510 511 case tagtype_pre: 512 if ( (curr->model & CM_BLOCK) != 0 && 513 curr->parser == TY_(ParsePre) ) 514 name = curr->name; 515 break; 516 517 case tagtype_null: 518 break; 519 } 520 } 521 *iter = (TidyIterator) curr; 522 return name; 523} 524 525void TY_(InitTags)( TidyDocImpl* doc ) 526{ 527 Dict* xml; 528 TidyTagImpl* tags = &doc->tags; 529 530 ClearMemory( tags, sizeof(TidyTagImpl) ); 531 532 /* create dummy entry for all xml tags */ 533 xml = (Dict*) MemAlloc( sizeof(Dict) ); 534 ClearMemory( xml, sizeof(Dict) ); 535 xml->name = NULL; 536 xml->versions = VERS_XML; 537 xml->model = CM_BLOCK; 538 xml->parser = NULL; 539 xml->chkattrs = NULL; 540 xml->attrvers = NULL; 541 tags->xml_tags = xml; 542} 543 544/* By default, zap all of them. But allow 545** an single type to be specified. 546*/ 547void TY_(FreeDeclaredTags)( TidyDocImpl* doc, UserTagType tagType ) 548{ 549 TidyTagImpl* tags = &doc->tags; 550 Dict *curr, *next = NULL, *prev = NULL; 551 552 for ( curr=tags->declared_tag_list; curr; curr = next ) 553 { 554 Bool deleteIt = yes; 555 next = curr->next; 556 switch ( tagType ) 557 { 558 case tagtype_empty: 559 deleteIt = ( curr->model & CM_EMPTY ) != 0; 560 break; 561 562 case tagtype_inline: 563 deleteIt = ( curr->model & CM_INLINE ) != 0; 564 break; 565 566 case tagtype_block: 567 deleteIt = ( (curr->model & CM_BLOCK) != 0 && 568 curr->parser == TY_(ParseBlock) ); 569 break; 570 571 case tagtype_pre: 572 deleteIt = ( (curr->model & CM_BLOCK) != 0 && 573 curr->parser == TY_(ParsePre) ); 574 break; 575 576 case tagtype_null: 577 break; 578 } 579 580 if ( deleteIt ) 581 { 582#if ELEMENT_HASH_LOOKUP 583 removeFromHash( &doc->tags, curr->name ); 584#endif 585 MemFree( curr->name ); 586 MemFree( curr ); 587 if ( prev ) 588 prev->next = next; 589 else 590 tags->declared_tag_list = next; 591 } 592 else 593 prev = curr; 594 } 595} 596 597void TY_(FreeTags)( TidyDocImpl* doc ) 598{ 599 TidyTagImpl* tags = &doc->tags; 600 601#if ELEMENT_HASH_LOOKUP 602 emptyHash( tags ); 603#endif 604 TY_(FreeDeclaredTags)( doc, tagtype_null ); 605 MemFree( tags->xml_tags ); 606 607 /* get rid of dangling tag references */ 608 ClearMemory( tags, sizeof(TidyTagImpl) ); 609} 610 611 612/* default method for checking an element's attributes */ 613void TY_(CheckAttributes)( TidyDocImpl* doc, Node *node ) 614{ 615 AttVal *next, *attval = node->attributes; 616 while (attval) 617 { 618 next = attval->next; 619 TY_(CheckAttribute)( doc, node, attval ); 620 attval = next; 621 } 622} 623 624/* methods for checking attributes for specific elements */ 625 626void CheckIMG( TidyDocImpl* doc, Node *node ) 627{ 628 Bool HasAlt = TY_(AttrGetById)(node, TidyAttr_ALT) != NULL; 629 Bool HasSrc = TY_(AttrGetById)(node, TidyAttr_SRC) != NULL; 630 Bool HasUseMap = TY_(AttrGetById)(node, TidyAttr_USEMAP) != NULL; 631 Bool HasIsMap = TY_(AttrGetById)(node, TidyAttr_ISMAP) != NULL; 632 Bool HasDataFld = TY_(AttrGetById)(node, TidyAttr_DATAFLD) != NULL; 633 634 TY_(CheckAttributes)(doc, node); 635 636 if ( !HasAlt ) 637 { 638 if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 ) 639 { 640 doc->badAccess |= MISSING_IMAGE_ALT; 641 TY_(ReportMissingAttr)( doc, node, "alt" ); 642 } 643 644 if ( cfgStr(doc, TidyAltText) ) 645 TY_(AddAttribute)( doc, node, "alt", cfgStr(doc, TidyAltText) ); 646 } 647 648 if ( !HasSrc && !HasDataFld ) 649 TY_(ReportMissingAttr)( doc, node, "src" ); 650 651 if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 ) 652 { 653 if ( HasIsMap && !HasUseMap ) 654 TY_(ReportAttrError)( doc, node, NULL, MISSING_IMAGEMAP); 655 } 656} 657 658void CheckCaption(TidyDocImpl* doc, Node *node) 659{ 660 AttVal *attval; 661 662 TY_(CheckAttributes)(doc, node); 663 664 attval = TY_(AttrGetById)(node, TidyAttr_ALIGN); 665 666 if (!AttrHasValue(attval)) 667 return; 668 669 if (AttrValueIs(attval, "left") || AttrValueIs(attval, "right")) 670 TY_(ConstrainVersion)(doc, VERS_HTML40_LOOSE); 671 else if (AttrValueIs(attval, "top") || AttrValueIs(attval, "bottom")) 672 TY_(ConstrainVersion)(doc, ~(VERS_HTML20|VERS_HTML32)); 673 else 674 TY_(ReportAttrError)(doc, node, attval, BAD_ATTRIBUTE_VALUE); 675} 676 677void CheckHTML( TidyDocImpl* doc, Node *node ) 678{ 679 TY_(CheckAttributes)(doc, node); 680} 681 682void CheckAREA( TidyDocImpl* doc, Node *node ) 683{ 684 Bool HasAlt = TY_(AttrGetById)(node, TidyAttr_ALT) != NULL; 685 Bool HasHref = TY_(AttrGetById)(node, TidyAttr_HREF) != NULL; 686 Bool HasNohref = TY_(AttrGetById)(node, TidyAttr_NOHREF) != NULL; 687 688 TY_(CheckAttributes)(doc, node); 689 690 if ( !HasAlt ) 691 { 692 if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 ) 693 { 694 doc->badAccess |= MISSING_LINK_ALT; 695 TY_(ReportMissingAttr)( doc, node, "alt" ); 696 } 697 } 698 699 if ( !HasHref && !HasNohref ) 700 TY_(ReportMissingAttr)( doc, node, "href" ); 701} 702 703void CheckTABLE( TidyDocImpl* doc, Node *node ) 704{ 705 AttVal* attval; 706 Bool HasSummary = TY_(AttrGetById)(node, TidyAttr_SUMMARY) != NULL; 707 708 TY_(CheckAttributes)(doc, node); 709 710 /* a missing summary attribute is bad accessibility, no matter 711 what HTML version is involved; a document without is valid */ 712 if (cfg(doc, TidyAccessibilityCheckLevel) == 0) 713 { 714 if (!HasSummary) 715 { 716 doc->badAccess |= MISSING_SUMMARY; 717 TY_(ReportMissingAttr)( doc, node, "summary"); 718 } 719 } 720 721 /* convert <table border> to <table border="1"> */ 722 if ( cfgBool(doc, TidyXmlOut) && (attval = TY_(AttrGetById)(node, TidyAttr_BORDER)) ) 723 { 724 if (attval->value == NULL) 725 attval->value = TY_(tmbstrdup)("1"); 726 } 727} 728 729/* add missing type attribute when appropriate */ 730void CheckSCRIPT( TidyDocImpl* doc, Node *node ) 731{ 732 AttVal *lang, *type; 733 char buf[16]; 734 735 TY_(CheckAttributes)(doc, node); 736 737 lang = TY_(AttrGetById)(node, TidyAttr_LANGUAGE); 738 type = TY_(AttrGetById)(node, TidyAttr_TYPE); 739 740 if (!type) 741 { 742 /* check for javascript */ 743 if (lang) 744 { 745 /* Test #696799. lang->value can be NULL. */ 746 buf[0] = '\0'; 747 TY_(tmbstrncpy)(buf, lang->value, sizeof(buf)); 748 buf[10] = '\0'; 749 750 if (TY_(tmbstrncasecmp)(buf, "javascript", 10) == 0 || 751 TY_(tmbstrncasecmp)(buf, "jscript", 7) == 0) 752 { 753 TY_(AddAttribute)(doc, node, "type", "text/javascript"); 754 } 755 else if (TY_(tmbstrcasecmp)(buf, "vbscript") == 0) 756 { 757 /* per Randy Waki 8/6/01 */ 758 TY_(AddAttribute)(doc, node, "type", "text/vbscript"); 759 } 760 } 761 else 762 { 763 TY_(AddAttribute)(doc, node, "type", "text/javascript"); 764 } 765 766 type = TY_(AttrGetById)(node, TidyAttr_TYPE); 767 768 if (type != NULL) 769 { 770 TY_(ReportAttrError)(doc, node, type, INSERTING_ATTRIBUTE); 771 } 772 else 773 { 774 TY_(ReportMissingAttr)(doc, node, "type"); 775 } 776 } 777} 778 779 780/* add missing type attribute when appropriate */ 781void CheckSTYLE( TidyDocImpl* doc, Node *node ) 782{ 783 AttVal *type = TY_(AttrGetById)(node, TidyAttr_TYPE); 784 785 TY_(CheckAttributes)( doc, node ); 786 787 if ( !type || !type->value || !TY_(tmbstrlen)(type->value) ) 788 { 789 type = TY_(RepairAttrValue)(doc, node, "type", "text/css"); 790 TY_(ReportAttrError)( doc, node, type, INSERTING_ATTRIBUTE ); 791 } 792} 793 794/* add missing type attribute when appropriate */ 795void CheckLINK( TidyDocImpl* doc, Node *node ) 796{ 797 AttVal *rel = TY_(AttrGetById)(node, TidyAttr_REL); 798 799 TY_(CheckAttributes)( doc, node ); 800 801 /* todo: <link rel="alternate stylesheet"> */ 802 if (AttrValueIs(rel, "stylesheet")) 803 { 804 AttVal *type = TY_(AttrGetById)(node, TidyAttr_TYPE); 805 if (!type) 806 { 807 TY_(AddAttribute)( doc, node, "type", "text/css" ); 808 type = TY_(AttrGetById)(node, TidyAttr_TYPE); 809 TY_(ReportAttrError)( doc, node, type, INSERTING_ATTRIBUTE ); 810 } 811 } 812} 813 814/* reports missing action attribute */ 815void CheckFORM( TidyDocImpl* doc, Node *node ) 816{ 817 AttVal *action = TY_(AttrGetById)(node, TidyAttr_ACTION); 818 819 TY_(CheckAttributes)(doc, node); 820 821 if (!action) 822 TY_(ReportMissingAttr)(doc, node, "action"); 823} 824 825/* reports missing content attribute */ 826void CheckMETA( TidyDocImpl* doc, Node *node ) 827{ 828 AttVal *content = TY_(AttrGetById)(node, TidyAttr_CONTENT); 829 830 TY_(CheckAttributes)(doc, node); 831 832 if (!content) 833 TY_(ReportMissingAttr)( doc, node, "content" ); 834 /* name or http-equiv attribute must also be set */ 835} 836 837 838Bool TY_(nodeIsText)( Node* node ) 839{ 840 return ( node && node->type == TextNode ); 841} 842 843Bool TY_(nodeHasText)( TidyDocImpl* doc, Node* node ) 844{ 845 if ( doc && node ) 846 { 847 uint ix; 848 Lexer* lexer = doc->lexer; 849 for ( ix = node->start; ix < node->end; ++ix ) 850 { 851 /* whitespace */ 852 if ( !TY_(IsWhite)( lexer->lexbuf[ix] ) ) 853 return yes; 854 } 855 } 856 return no; 857} 858 859Bool TY_(nodeIsElement)( Node* node ) 860{ 861 return ( node && 862 (node->type == StartTag || node->type == StartEndTag) ); 863} 864 865#if 0 866/* Compare & result to operand. If equal, then all bits 867** requested are set. 868*/ 869Bool nodeMatchCM( Node* node, uint contentModel ) 870{ 871 return ( node && node->tag && 872 (node->tag->model & contentModel) == contentModel ); 873} 874#endif 875 876/* True if any of the bits requested are set. 877*/ 878Bool TY_(nodeHasCM)( Node* node, uint contentModel ) 879{ 880 return ( node && node->tag && 881 (node->tag->model & contentModel) != 0 ); 882} 883 884Bool TY_(nodeCMIsBlock)( Node* node ) 885{ 886 return TY_(nodeHasCM)( node, CM_BLOCK ); 887} 888Bool TY_(nodeCMIsInline)( Node* node ) 889{ 890 return TY_(nodeHasCM)( node, CM_INLINE ); 891} 892Bool TY_(nodeCMIsEmpty)( Node* node ) 893{ 894 return TY_(nodeHasCM)( node, CM_EMPTY ); 895} 896 897Bool TY_(nodeIsHeader)( Node* node ) 898{ 899 TidyTagId tid = TagId( node ); 900 return ( tid && 901 tid == TidyTag_H1 || 902 tid == TidyTag_H2 || 903 tid == TidyTag_H3 || 904 tid == TidyTag_H4 || 905 tid == TidyTag_H5 || 906 tid == TidyTag_H6 ); 907} 908 909uint TY_(nodeHeaderLevel)( Node* node ) 910{ 911 TidyTagId tid = TagId( node ); 912 switch ( tid ) 913 { 914 case TidyTag_H1: 915 return 1; 916 case TidyTag_H2: 917 return 2; 918 case TidyTag_H3: 919 return 3; 920 case TidyTag_H4: 921 return 4; 922 case TidyTag_H5: 923 return 5; 924 case TidyTag_H6: 925 return 6; 926 default: 927 { 928 /* fall through */ 929 } 930 } 931 return 0; 932} 933 934/* 935 * local variables: 936 * mode: c 937 * indent-tabs-mode: nil 938 * c-basic-offset: 4 939 * eval: (c-set-offset 'substatement-open 0) 940 * end: 941 */ 942