1/* attrs.c -- recognize HTML attributes
2
3  (c) 1998-2006 (W3C) MIT, ERCIM, Keio University
4  See tidy.h for the copyright notice.
5
6  CVS Info :
7
8    $Author: mrowe $
9    $Date: 2009/01/29 05:45:41 $
10    $Revision: 1.14 $
11
12*/
13
14#include "tidy-int.h"
15#include "attrs.h"
16#include "message.h"
17#include "tmbstr.h"
18#include "utf8.h"
19
20/*
21 Bind attribute types to procedures to check values.
22 You can add new procedures for better validation
23 and each procedure has access to the node in which
24 the attribute occurred as well as the attribute name
25 and its value.
26
27 By default, attributes are checked without regard
28 to the element they are found on. You have the choice
29 of making the procedure test which element is involved
30 or in writing methods for each element which controls
31 exactly how the attributes of that element are checked.
32 This latter approach is best for detecting the absence
33 of required attributes.
34*/
35
36static AttrCheck CheckAction;
37static AttrCheck CheckScript;
38static AttrCheck CheckName;
39#ifdef TIDY_APPLE_CHANGES
40static AttrCheck CheckClass;
41static AttrCheck CheckStyleAttr;
42#endif
43static AttrCheck CheckId;
44static AttrCheck CheckAlign;
45static AttrCheck CheckValign;
46static AttrCheck CheckBool;
47static AttrCheck CheckLength;
48static AttrCheck CheckTarget;
49static AttrCheck CheckFsubmit;
50static AttrCheck CheckClear;
51static AttrCheck CheckShape;
52static AttrCheck CheckNumber;
53static AttrCheck CheckScope;
54static AttrCheck CheckColor;
55static AttrCheck CheckVType;
56static AttrCheck CheckScroll;
57static AttrCheck CheckTextDir;
58static AttrCheck CheckLang;
59static AttrCheck CheckType;
60
61#define CH_PCDATA      NULL
62#define CH_CHARSET     NULL
63#define CH_TYPE        CheckType
64#define CH_XTYPE       NULL
65#define CH_CHARACTER   NULL
66#define CH_URLS        NULL
67#define CH_URL         TY_(CheckUrl)
68#define CH_SCRIPT      CheckScript
69#define CH_ALIGN       CheckAlign
70#define CH_VALIGN      CheckValign
71#define CH_COLOR       CheckColor
72#define CH_CLEAR       CheckClear
73#define CH_BORDER      CheckBool     /* kludge */
74#define CH_LANG        CheckLang
75#define CH_BOOL        CheckBool
76#define CH_COLS        NULL
77#define CH_NUMBER      CheckNumber
78#define CH_LENGTH      CheckLength
79#define CH_COORDS      NULL
80#define CH_DATE        NULL
81#define CH_TEXTDIR     CheckTextDir
82#define CH_IDREFS      NULL
83#define CH_IDREF       NULL
84#define CH_IDDEF       CheckId
85#define CH_NAME        CheckName
86#define CH_TFRAME      NULL
87#define CH_FBORDER     NULL
88#define CH_MEDIA       NULL
89#define CH_FSUBMIT     CheckFsubmit
90#define CH_LINKTYPES   NULL
91#define CH_TRULES      NULL
92#define CH_SCOPE       CheckScope
93#define CH_SHAPE       CheckShape
94#define CH_SCROLL      CheckScroll
95#define CH_TARGET      CheckTarget
96#define CH_VTYPE       CheckVType
97#define CH_ACTION      CheckAction
98
99static const Attribute attribute_defs [] =
100{
101  { TidyAttr_UNKNOWN,           "unknown!",          VERS_PROPRIETARY,  NULL         },
102  { TidyAttr_ABBR,              "abbr",              VERS_HTML40,       CH_PCDATA    },
103  { TidyAttr_ACCEPT,            "accept",            VERS_ALL,          CH_XTYPE     },
104  { TidyAttr_ACCEPT_CHARSET,    "accept-charset",    VERS_HTML40,       CH_CHARSET   },
105  { TidyAttr_ACCESSKEY,         "accesskey",         VERS_HTML40,       CH_CHARACTER },
106  { TidyAttr_ACTION,            "action",            VERS_ALL,          CH_ACTION    },
107  { TidyAttr_ADD_DATE,          "add_date",          VERS_NETSCAPE,     CH_PCDATA    }, /* A */
108  { TidyAttr_ALIGN,             "align",             VERS_ALL,          CH_ALIGN     }, /* varies by element */
109  { TidyAttr_ALINK,             "alink",             VERS_LOOSE,        CH_COLOR     },
110  { TidyAttr_ALT,               "alt",               VERS_ALL,          CH_PCDATA    }, /* nowrap */
111  { TidyAttr_ARCHIVE,           "archive",           VERS_HTML40,       CH_URLS      }, /* space or comma separated list */
112  { TidyAttr_AXIS,              "axis",              VERS_HTML40,       CH_PCDATA    },
113  { TidyAttr_BACKGROUND,        "background",        VERS_LOOSE,        CH_URL       },
114  { TidyAttr_BGCOLOR,           "bgcolor",           VERS_LOOSE,        CH_COLOR     },
115  { TidyAttr_BGPROPERTIES,      "bgproperties",      VERS_PROPRIETARY,  CH_PCDATA    }, /* BODY "fixed" fixes background */
116  { TidyAttr_BORDER,            "border",            VERS_ALL,          CH_BORDER    }, /* like LENGTH + "border" */
117  { TidyAttr_BORDERCOLOR,       "bordercolor",       VERS_MICROSOFT,    CH_COLOR     }, /* used on TABLE */
118  { TidyAttr_BOTTOMMARGIN,      "bottommargin",      VERS_MICROSOFT,    CH_NUMBER    }, /* used on BODY */
119  { TidyAttr_CELLPADDING,       "cellpadding",       VERS_FROM32,       CH_LENGTH    }, /* % or pixel values */
120  { TidyAttr_CELLSPACING,       "cellspacing",       VERS_FROM32,       CH_LENGTH    },
121  { TidyAttr_CHAR,              "char",              VERS_HTML40,       CH_CHARACTER },
122  { TidyAttr_CHAROFF,           "charoff",           VERS_HTML40,       CH_LENGTH    },
123  { TidyAttr_CHARSET,           "charset",           VERS_HTML40,       CH_CHARSET   },
124  { TidyAttr_CHECKED,           "checked",           VERS_ALL,          CH_BOOL      }, /* i.e. "checked" or absent */
125  { TidyAttr_CITE,              "cite",              VERS_HTML40,       CH_URL       },
126#ifdef TIDY_APPLE_CHANGES
127  { TidyAttr_CLASS,             "class",             VERS_HTML40,       CheckClass   },
128#else
129  { TidyAttr_CLASS,             "class",             VERS_HTML40,       CH_PCDATA    },
130#endif
131  { TidyAttr_CLASSID,           "classid",           VERS_HTML40,       CH_URL       },
132  { TidyAttr_CLEAR,             "clear",             VERS_LOOSE,        CH_CLEAR     }, /* BR: left, right, all */
133  { TidyAttr_CODE,              "code",              VERS_LOOSE,        CH_PCDATA    }, /* APPLET */
134  { TidyAttr_CODEBASE,          "codebase",          VERS_HTML40,       CH_URL       }, /* OBJECT */
135  { TidyAttr_CODETYPE,          "codetype",          VERS_HTML40,       CH_XTYPE     }, /* OBJECT */
136  { TidyAttr_COLOR,             "color",             VERS_LOOSE,        CH_COLOR     }, /* BASEFONT, FONT */
137  { TidyAttr_COLS,              "cols",              VERS_IFRAME,       CH_COLS      }, /* TABLE & FRAMESET */
138  { TidyAttr_COLSPAN,           "colspan",           VERS_FROM32,       CH_NUMBER    },
139  { TidyAttr_COMPACT,           "compact",           VERS_ALL,          CH_BOOL      }, /* lists */
140  { TidyAttr_CONTENT,           "content",           VERS_ALL,          CH_PCDATA    },
141  { TidyAttr_COORDS,            "coords",            VERS_FROM32,       CH_COORDS    }, /* AREA, A */
142  { TidyAttr_DATA,              "data",              VERS_HTML40,       CH_URL       }, /* OBJECT */
143  { TidyAttr_DATAFLD,           "datafld",           VERS_MICROSOFT,    CH_PCDATA    }, /* used on DIV, IMG */
144  { TidyAttr_DATAFORMATAS,      "dataformatas",      VERS_MICROSOFT,    CH_PCDATA    }, /* used on DIV, IMG */
145  { TidyAttr_DATAPAGESIZE,      "datapagesize",      VERS_MICROSOFT,    CH_NUMBER    }, /* used on DIV, IMG */
146  { TidyAttr_DATASRC,           "datasrc",           VERS_MICROSOFT,    CH_URL       }, /* used on TABLE */
147  { TidyAttr_DATETIME,          "datetime",          VERS_HTML40,       CH_DATE      }, /* INS, DEL */
148  { TidyAttr_DECLARE,           "declare",           VERS_HTML40,       CH_BOOL      }, /* OBJECT */
149  { TidyAttr_DEFER,             "defer",             VERS_HTML40,       CH_BOOL      }, /* SCRIPT */
150  { TidyAttr_DIR,               "dir",               VERS_HTML40,       CH_TEXTDIR   }, /* ltr or rtl */
151  { TidyAttr_DISABLED,          "disabled",          VERS_HTML40,       CH_BOOL      }, /* form fields */
152  { TidyAttr_ENCODING,          "encoding",          VERS_XML,          CH_PCDATA    }, /* <?xml?> */
153  { TidyAttr_ENCTYPE,           "enctype",           VERS_ALL,          CH_XTYPE     }, /* FORM */
154  { TidyAttr_FACE,              "face",              VERS_LOOSE,        CH_PCDATA    }, /* BASEFONT, FONT */
155  { TidyAttr_FOR,               "for",               VERS_HTML40,       CH_IDREF     }, /* LABEL */
156  { TidyAttr_FRAME,             "frame",             VERS_HTML40,       CH_TFRAME    }, /* TABLE */
157  { TidyAttr_FRAMEBORDER,       "frameborder",       VERS_FRAMESET,     CH_FBORDER   }, /* 0 or 1 */
158  { TidyAttr_FRAMESPACING,      "framespacing",      VERS_PROPRIETARY,  CH_NUMBER    },
159  { TidyAttr_GRIDX,             "gridx",             VERS_PROPRIETARY,  CH_NUMBER    }, /* TABLE Adobe golive*/
160  { TidyAttr_GRIDY,             "gridy",             VERS_PROPRIETARY,  CH_NUMBER    }, /* TABLE Adobe golive */
161  { TidyAttr_HEADERS,           "headers",           VERS_HTML40,       CH_IDREFS    }, /* table cells */
162  { TidyAttr_HEIGHT,            "height",            VERS_ALL,          CH_LENGTH    }, /* pixels only for TH/TD */
163  { TidyAttr_HREF,              "href",              VERS_ALL,          CH_URL       }, /* A, AREA, LINK and BASE */
164  { TidyAttr_HREFLANG,          "hreflang",          VERS_HTML40,       CH_LANG      }, /* A, LINK */
165  { TidyAttr_HSPACE,            "hspace",            VERS_ALL,          CH_NUMBER    }, /* APPLET, IMG, OBJECT */
166  { TidyAttr_HTTP_EQUIV,        "http-equiv",        VERS_ALL,          CH_PCDATA    }, /* META */
167  { TidyAttr_ID,                "id",                VERS_HTML40,       CH_IDDEF     },
168  { TidyAttr_ISMAP,             "ismap",             VERS_ALL,          CH_BOOL      }, /* IMG */
169  { TidyAttr_LABEL,             "label",             VERS_HTML40,       CH_PCDATA    }, /* OPT, OPTGROUP */
170  { TidyAttr_LANG,              "lang",              VERS_HTML40,       CH_LANG      },
171  { TidyAttr_LANGUAGE,          "language",          VERS_LOOSE,        CH_PCDATA    }, /* SCRIPT */
172  { TidyAttr_LAST_MODIFIED,     "last_modified",     VERS_NETSCAPE,     CH_PCDATA    }, /* A */
173  { TidyAttr_LAST_VISIT,        "last_visit",        VERS_NETSCAPE,     CH_PCDATA    }, /* A */
174  { TidyAttr_LEFTMARGIN,        "leftmargin",        VERS_MICROSOFT,    CH_NUMBER    }, /* used on BODY */
175  { TidyAttr_LINK,              "link",              VERS_LOOSE,        CH_COLOR     }, /* BODY */
176  { TidyAttr_LONGDESC,          "longdesc",          VERS_HTML40,       CH_URL       }, /* IMG */
177  { TidyAttr_LOWSRC,            "lowsrc",            VERS_PROPRIETARY,  CH_URL       }, /* IMG */
178  { TidyAttr_MARGINHEIGHT,      "marginheight",      VERS_IFRAME,       CH_NUMBER    }, /* FRAME, IFRAME, BODY */
179  { TidyAttr_MARGINWIDTH,       "marginwidth",       VERS_IFRAME,       CH_NUMBER    }, /* ditto */
180  { TidyAttr_MAXLENGTH,         "maxlength",         VERS_ALL,          CH_NUMBER    }, /* INPUT */
181  { TidyAttr_MEDIA,             "media",             VERS_HTML40,       CH_MEDIA     }, /* STYLE, LINK */
182  { TidyAttr_METHOD,            "method",            VERS_ALL,          CH_FSUBMIT   }, /* FORM: get or post */
183  { TidyAttr_MULTIPLE,          "multiple",          VERS_ALL,          CH_BOOL      }, /* SELECT */
184  { TidyAttr_NAME,              "name",              VERS_ALL,          CH_NAME      },
185  { TidyAttr_NOHREF,            "nohref",            VERS_FROM32,       CH_BOOL      }, /* AREA */
186  { TidyAttr_NORESIZE,          "noresize",          VERS_FRAMESET,     CH_BOOL      }, /* FRAME */
187  { TidyAttr_NOSHADE,           "noshade",           VERS_LOOSE,        CH_BOOL      }, /* HR */
188  { TidyAttr_NOWRAP,            "nowrap",            VERS_LOOSE,        CH_BOOL      }, /* table cells */
189  { TidyAttr_OBJECT,            "object",            VERS_HTML40_LOOSE, CH_PCDATA    }, /* APPLET */
190  { TidyAttr_OnAFTERUPDATE,     "onafterupdate",     VERS_MICROSOFT,    CH_SCRIPT    },
191  { TidyAttr_OnBEFOREUNLOAD,    "onbeforeunload",    VERS_MICROSOFT,    CH_SCRIPT    },
192  { TidyAttr_OnBEFOREUPDATE,    "onbeforeupdate",    VERS_MICROSOFT,    CH_SCRIPT    },
193  { TidyAttr_OnBLUR,            "onblur",            VERS_EVENTS,       CH_SCRIPT    }, /* event */
194  { TidyAttr_OnCHANGE,          "onchange",          VERS_EVENTS,       CH_SCRIPT    }, /* event */
195  { TidyAttr_OnCLICK,           "onclick",           VERS_EVENTS,       CH_SCRIPT    }, /* event */
196  { TidyAttr_OnDATAAVAILABLE,   "ondataavailable",   VERS_MICROSOFT,    CH_SCRIPT    }, /* object, applet */
197  { TidyAttr_OnDATASETCHANGED,  "ondatasetchanged",  VERS_MICROSOFT,    CH_SCRIPT    }, /* object, applet */
198  { TidyAttr_OnDATASETCOMPLETE, "ondatasetcomplete", VERS_MICROSOFT,    CH_SCRIPT    },
199  { TidyAttr_OnDBLCLICK,        "ondblclick",        VERS_EVENTS,       CH_SCRIPT    }, /* event */
200  { TidyAttr_OnERRORUPDATE,     "onerrorupdate",     VERS_MICROSOFT,    CH_SCRIPT    }, /* form fields */
201  { TidyAttr_OnFOCUS,           "onfocus",           VERS_EVENTS,       CH_SCRIPT    }, /* event */
202  { TidyAttr_OnKEYDOWN,         "onkeydown",         VERS_EVENTS,       CH_SCRIPT    }, /* event */
203  { TidyAttr_OnKEYPRESS,        "onkeypress",        VERS_EVENTS,       CH_SCRIPT    }, /* event */
204  { TidyAttr_OnKEYUP,           "onkeyup",           VERS_EVENTS,       CH_SCRIPT    }, /* event */
205  { TidyAttr_OnLOAD,            "onload",            VERS_EVENTS,       CH_SCRIPT    }, /* event */
206  { TidyAttr_OnMOUSEDOWN,       "onmousedown",       VERS_EVENTS,       CH_SCRIPT    }, /* event */
207  { TidyAttr_OnMOUSEMOVE,       "onmousemove",       VERS_EVENTS,       CH_SCRIPT    }, /* event */
208  { TidyAttr_OnMOUSEOUT,        "onmouseout",        VERS_EVENTS,       CH_SCRIPT    }, /* event */
209  { TidyAttr_OnMOUSEOVER,       "onmouseover",       VERS_EVENTS,       CH_SCRIPT    }, /* event */
210  { TidyAttr_OnMOUSEUP,         "onmouseup",         VERS_EVENTS,       CH_SCRIPT    }, /* event */
211  { TidyAttr_OnRESET,           "onreset",           VERS_EVENTS,       CH_SCRIPT    }, /* event */
212  { TidyAttr_OnROWENTER,        "onrowenter",        VERS_MICROSOFT,    CH_SCRIPT    }, /* form fields */
213  { TidyAttr_OnROWEXIT,         "onrowexit",         VERS_MICROSOFT,    CH_SCRIPT    }, /* form fields */
214  { TidyAttr_OnSELECT,          "onselect",          VERS_EVENTS,       CH_SCRIPT    }, /* event */
215  { TidyAttr_OnSUBMIT,          "onsubmit",          VERS_EVENTS,       CH_SCRIPT    }, /* event */
216  { TidyAttr_OnUNLOAD,          "onunload",          VERS_EVENTS,       CH_SCRIPT    }, /* event */
217  { TidyAttr_PROFILE,           "profile",           VERS_HTML40,       CH_URL       }, /* HEAD */
218  { TidyAttr_PROMPT,            "prompt",            VERS_LOOSE,        CH_PCDATA    }, /* ISINDEX */
219  { TidyAttr_RBSPAN,            "rbspan",            VERS_XHTML11,      CH_NUMBER    }, /* ruby markup */
220  { TidyAttr_READONLY,          "readonly",          VERS_HTML40,       CH_BOOL      }, /* form fields */
221  { TidyAttr_REL,               "rel",               VERS_ALL,          CH_LINKTYPES },
222  { TidyAttr_REV,               "rev",               VERS_ALL,          CH_LINKTYPES },
223  { TidyAttr_RIGHTMARGIN,       "rightmargin",       VERS_MICROSOFT,    CH_NUMBER    }, /* used on BODY */
224  { TidyAttr_ROWS,              "rows",              VERS_ALL,          CH_NUMBER    }, /* TEXTAREA */
225  { TidyAttr_ROWSPAN,           "rowspan",           VERS_ALL,          CH_NUMBER    }, /* table cells */
226  { TidyAttr_RULES,             "rules",             VERS_HTML40,       CH_TRULES    }, /* TABLE */
227  { TidyAttr_SCHEME,            "scheme",            VERS_HTML40,       CH_PCDATA    }, /* META */
228  { TidyAttr_SCOPE,             "scope",             VERS_HTML40,       CH_SCOPE     }, /* table cells */
229  { TidyAttr_SCROLLING,         "scrolling",         VERS_IFRAME,       CH_SCROLL    }, /* yes, no or auto */
230  { TidyAttr_SELECTED,          "selected",          VERS_ALL,          CH_BOOL      }, /* OPTION */
231  { TidyAttr_SHAPE,             "shape",             VERS_FROM32,       CH_SHAPE     }, /* AREA, A */
232  { TidyAttr_SHOWGRID,          "showgrid",          VERS_PROPRIETARY,  CH_BOOL      }, /* TABLE Adobe golive */
233  { TidyAttr_SHOWGRIDX,         "showgridx",         VERS_PROPRIETARY,  CH_BOOL      }, /* TABLE Adobe golive*/
234  { TidyAttr_SHOWGRIDY,         "showgridy",         VERS_PROPRIETARY,  CH_BOOL      }, /* TABLE Adobe golive*/
235  { TidyAttr_SIZE,              "size",              VERS_LOOSE,        CH_NUMBER    }, /* HR, FONT, BASEFONT, SELECT */
236  { TidyAttr_SPAN,              "span",              VERS_HTML40,       CH_NUMBER    }, /* COL, COLGROUP */
237  { TidyAttr_SRC,               "src",               VERS_ALL,          CH_URL       }, /* IMG, FRAME, IFRAME */
238  { TidyAttr_STANDBY,           "standby",           VERS_HTML40,       CH_PCDATA    }, /* OBJECT */
239  { TidyAttr_START,             "start",             VERS_ALL,          CH_NUMBER    }, /* OL */
240#ifdef TIDY_APPLE_CHANGES
241  { TidyAttr_STYLE,             "style",             VERS_HTML40,     CheckStyleAttr },
242#else
243  { TidyAttr_STYLE,             "style",             VERS_HTML40,       CH_PCDATA    },
244#endif
245  { TidyAttr_SUMMARY,           "summary",           VERS_HTML40,       CH_PCDATA    }, /* TABLE */
246  { TidyAttr_TABINDEX,          "tabindex",          VERS_HTML40,       CH_NUMBER    }, /* fields, OBJECT  and A */
247  { TidyAttr_TARGET,            "target",            VERS_HTML40,       CH_TARGET    }, /* names a frame/window */
248  { TidyAttr_TEXT,              "text",              VERS_LOOSE,        CH_COLOR     }, /* BODY */
249  { TidyAttr_TITLE,             "title",             VERS_HTML40,       CH_PCDATA    }, /* text tool tip */
250  { TidyAttr_TOPMARGIN,         "topmargin",         VERS_MICROSOFT,    CH_NUMBER    }, /* used on BODY */
251  { TidyAttr_TYPE,              "type",              VERS_FROM32,       CH_TYPE      }, /* also used by SPACER */
252  { TidyAttr_USEMAP,            "usemap",            VERS_ALL,          CH_URL       }, /* things with images */
253  { TidyAttr_VALIGN,            "valign",            VERS_FROM32,       CH_VALIGN    },
254  { TidyAttr_VALUE,             "value",             VERS_ALL,          CH_PCDATA    },
255  { TidyAttr_VALUETYPE,         "valuetype",         VERS_HTML40,       CH_VTYPE     }, /* PARAM: data, ref, object */
256  { TidyAttr_VERSION,           "version",           VERS_ALL|VERS_XML, CH_PCDATA    }, /* HTML <?xml?> */
257  { TidyAttr_VLINK,             "vlink",             VERS_LOOSE,        CH_COLOR     }, /* BODY */
258  { TidyAttr_VSPACE,            "vspace",            VERS_LOOSE,        CH_NUMBER    }, /* IMG, OBJECT, APPLET */
259  { TidyAttr_WIDTH,             "width",             VERS_ALL,          CH_LENGTH    }, /* pixels only for TD/TH */
260  { TidyAttr_WRAP,              "wrap",              VERS_NETSCAPE,     CH_PCDATA    }, /* textarea */
261  { TidyAttr_XML_LANG,          "xml:lang",          VERS_XML,          CH_LANG      }, /* XML language */
262  { TidyAttr_XML_SPACE,         "xml:space",         VERS_XML,          CH_PCDATA    }, /* XML white space */
263
264  /* todo: VERS_ALL is wrong! */
265  { TidyAttr_XMLNS,             "xmlns",             VERS_ALL,          CH_PCDATA    }, /* name space */
266  { TidyAttr_EVENT,             "event",             VERS_HTML40,       CH_PCDATA    }, /* reserved for <script> */
267  { TidyAttr_METHODS,           "methods",           VERS_HTML20,       CH_PCDATA    }, /* for <a>, never implemented */
268  { TidyAttr_N,                 "n",                 VERS_HTML20,       CH_PCDATA    }, /* for <nextid> */
269  { TidyAttr_SDAFORM,           "sdaform",           VERS_HTML20,       CH_PCDATA    }, /* SDATA attribute in HTML 2.0 */
270  { TidyAttr_SDAPREF,           "sdapref",           VERS_HTML20,       CH_PCDATA    }, /* SDATA attribute in HTML 2.0 */
271  { TidyAttr_SDASUFF,           "sdasuff",           VERS_HTML20,       CH_PCDATA    }, /* SDATA attribute in HTML 2.0 */
272  { TidyAttr_URN,               "urn",               VERS_HTML20,       CH_PCDATA    }, /* for <a>, never implemented */
273
274  /* this must be the final entry */
275  { N_TIDY_ATTRIBS,             NULL,                VERS_UNKNOWN,      NULL         }
276};
277
278
279/* Apple Changes:
280   2007-03-01 iccir Due to the control flow in TY_(CheckAttribute), we cannot
281                    use RemoveAttribute() inside of a Check___ function --
282                    the resulting call to AttributeIsProprietary() will hit
283                    dealloced data.  Unfortuately, a lot of the Apple-specific
284                    changes need this ability.
285
286                    The best way to fix this problem would be to have the
287                    Check___ functions return a Bool instead of a void.  If
288                    no is returned, TY_(CheckAttribute) could then call
289                    RemoveAttribute() and bail out.
290
291                    I don't want to sprinkle even more TIDY_APPLE_CHANGES into
292                    this file, however.
293
294                    For now, call MarkAttributeForRemoval() instead.  This
295                    sets the AttVal's (Attribute *)dict to a fake MarkedForRemoval.
296
297                    We then check for this value upon returning to TY_(CheckAttribute).
298*/
299#ifdef TIDY_APPLE_CHANGES
300static const Attribute MarkedForRemoval = { TidyTag_UNKNOWN, "",  VERS_PROPRIETARY,  NULL };
301
302static void MarkAttributeForRemoval(AttVal* attval)
303{
304    attval->dict = &MarkedForRemoval;
305}
306
307static Bool AttributeIsMarkedForRemoval(AttVal* attval)
308{
309    return (attval->dict == &MarkedForRemoval);
310}
311#endif
312
313static uint AttributeVersions(Node* node, AttVal* attval)
314{
315    uint i;
316
317    if (!attval || !attval->dict)
318        return VERS_UNKNOWN;
319
320    if (!node || !node->tag || !node->tag->attrvers)
321        return attval->dict->versions;
322
323    for (i = 0; node->tag->attrvers[i].attribute; ++i)
324        if (node->tag->attrvers[i].attribute == attval->dict->id)
325            return node->tag->attrvers[i].versions;
326
327    return attval->dict->versions & VERS_ALL
328             ? VERS_UNKNOWN
329             : attval->dict->versions;
330
331}
332
333
334/* return the version of the attribute "id" of element "node" */
335uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id )
336{
337    uint i;
338
339    if (!node || !node->tag || !node->tag->attrvers)
340        return VERS_UNKNOWN;
341
342    for (i = 0; node->tag->attrvers[i].attribute; ++i)
343        if (node->tag->attrvers[i].attribute == id)
344            return node->tag->attrvers[i].versions;
345
346    return VERS_UNKNOWN;
347}
348
349/* returns true if the element is a W3C defined element */
350/* but the element/attribute combination is not         */
351static Bool AttributeIsProprietary(Node* node, AttVal* attval)
352{
353    if (!node || !attval)
354        return no;
355
356    if (!node->tag)
357        return no;
358
359    if (!(node->tag->versions & VERS_ALL))
360        return no;
361
362    if (AttributeVersions(node, attval) & VERS_ALL)
363        return no;
364
365    return yes;
366}
367
368/* used by CheckColor() */
369struct _colors
370{
371    ctmbstr name;
372    ctmbstr hex;
373};
374
375static const struct _colors colors[] =
376{
377    { "black",   "#000000" },
378    { "green",   "#008000" },
379    { "silver",  "#C0C0C0" },
380    { "lime",    "#00FF00" },
381    { "gray",    "#808080" },
382    { "olive",   "#808000" },
383    { "white",   "#FFFFFF" },
384    { "yellow",  "#FFFF00" },
385    { "maroon",  "#800000" },
386    { "navy",    "#000080" },
387    { "red",     "#FF0000" },
388    { "blue",    "#0000FF" },
389    { "purple",  "#800080" },
390    { "teal",    "#008080" },
391    { "fuchsia", "#FF00FF" },
392    { "aqua",    "#00FFFF" },
393    { NULL,      NULL      }
394};
395
396static ctmbstr GetColorCode(ctmbstr name)
397{
398    uint i;
399
400    for (i = 0; colors[i].name; ++i)
401        if (TY_(tmbstrcasecmp)(name, colors[i].name) == 0)
402            return colors[i].hex;
403
404    return NULL;
405}
406
407static ctmbstr GetColorName(ctmbstr code)
408{
409    uint i;
410
411    for (i = 0; colors[i].name; ++i)
412        if (TY_(tmbstrcasecmp)(code, colors[i].hex) == 0)
413            return colors[i].name;
414
415    return NULL;
416}
417
418#if 0
419static const struct _colors fancy_colors[] =
420{
421    { "darkgreen",            "#006400" },
422    { "antiquewhite",         "#FAEBD7" },
423    { "aqua",                 "#00FFFF" },
424    { "aquamarine",           "#7FFFD4" },
425    { "azure",                "#F0FFFF" },
426    { "beige",                "#F5F5DC" },
427    { "bisque",               "#FFE4C4" },
428    { "black",                "#000000" },
429    { "blanchedalmond",       "#FFEBCD" },
430    { "blue",                 "#0000FF" },
431    { "blueviolet",           "#8A2BE2" },
432    { "brown",                "#A52A2A" },
433    { "burlywood",            "#DEB887" },
434    { "cadetblue",            "#5F9EA0" },
435    { "chartreuse",           "#7FFF00" },
436    { "chocolate",            "#D2691E" },
437    { "coral",                "#FF7F50" },
438    { "cornflowerblue",       "#6495ED" },
439    { "cornsilk",             "#FFF8DC" },
440    { "crimson",              "#DC143C" },
441    { "cyan",                 "#00FFFF" },
442    { "darkblue",             "#00008B" },
443    { "darkcyan",             "#008B8B" },
444    { "darkgoldenrod",        "#B8860B" },
445    { "darkgray",             "#A9A9A9" },
446    { "darkgreen",            "#006400" },
447    { "darkkhaki",            "#BDB76B" },
448    { "darkmagenta",          "#8B008B" },
449    { "darkolivegreen",       "#556B2F" },
450    { "darkorange",           "#FF8C00" },
451    { "darkorchid",           "#9932CC" },
452    { "darkred",              "#8B0000" },
453    { "darksalmon",           "#E9967A" },
454    { "darkseagreen",         "#8FBC8F" },
455    { "darkslateblue",        "#483D8B" },
456    { "darkslategray",        "#2F4F4F" },
457    { "darkturquoise",        "#00CED1" },
458    { "darkviolet",           "#9400D3" },
459    { "deeppink",             "#FF1493" },
460    { "deepskyblue",          "#00BFFF" },
461    { "dimgray",              "#696969" },
462    { "dodgerblue",           "#1E90FF" },
463    { "firebrick",            "#B22222" },
464    { "floralwhite",          "#FFFAF0" },
465    { "forestgreen",          "#228B22" },
466    { "fuchsia",              "#FF00FF" },
467    { "gainsboro",            "#DCDCDC" },
468    { "ghostwhite",           "#F8F8FF" },
469    { "gold",                 "#FFD700" },
470    { "goldenrod",            "#DAA520" },
471    { "gray",                 "#808080" },
472    { "green",                "#008000" },
473    { "greenyellow",          "#ADFF2F" },
474    { "honeydew",             "#F0FFF0" },
475    { "hotpink",              "#FF69B4" },
476    { "indianred",            "#CD5C5C" },
477    { "indigo",               "#4B0082" },
478    { "ivory",                "#FFFFF0" },
479    { "khaki",                "#F0E68C" },
480    { "lavender",             "#E6E6FA" },
481    { "lavenderblush",        "#FFF0F5" },
482    { "lawngreen",            "#7CFC00" },
483    { "lemonchiffon",         "#FFFACD" },
484    { "lightblue",            "#ADD8E6" },
485    { "lightcoral",           "#F08080" },
486    { "lightcyan",            "#E0FFFF" },
487    { "lightgoldenrodyellow", "#FAFAD2" },
488    { "lightgreen",           "#90EE90" },
489    { "lightgrey",            "#D3D3D3" },
490    { "lightpink",            "#FFB6C1" },
491    { "lightsalmon",          "#FFA07A" },
492    { "lightseagreen",        "#20B2AA" },
493    { "lightskyblue",         "#87CEFA" },
494    { "lightslategray",       "#778899" },
495    { "lightsteelblue",       "#B0C4DE" },
496    { "lightyellow",          "#FFFFE0" },
497    { "lime",                 "#00FF00" },
498    { "limegreen",            "#32CD32" },
499    { "linen",                "#FAF0E6" },
500    { "magenta",              "#FF00FF" },
501    { "maroon",               "#800000" },
502    { "mediumaquamarine",     "#66CDAA" },
503    { "mediumblue",           "#0000CD" },
504    { "mediumorchid",         "#BA55D3" },
505    { "mediumpurple",         "#9370DB" },
506    { "mediumseagreen",       "#3CB371" },
507    { "mediumslateblue",      "#7B68EE" },
508    { "mediumspringgreen",    "#00FA9A" },
509    { "mediumturquoise",      "#48D1CC" },
510    { "mediumvioletred",      "#C71585" },
511    { "midnightblue",         "#191970" },
512    { "mintcream",            "#F5FFFA" },
513    { "mistyrose",            "#FFE4E1" },
514    { "moccasin",             "#FFE4B5" },
515    { "navajowhite",          "#FFDEAD" },
516    { "navy",                 "#000080" },
517    { "oldlace",              "#FDF5E6" },
518    { "olive",                "#808000" },
519    { "olivedrab",            "#6B8E23" },
520    { "orange",               "#FFA500" },
521    { "orangered",            "#FF4500" },
522    { "orchid",               "#DA70D6" },
523    { "palegoldenrod",        "#EEE8AA" },
524    { "palegreen",            "#98FB98" },
525    { "paleturquoise",        "#AFEEEE" },
526    { "palevioletred",        "#DB7093" },
527    { "papayawhip",           "#FFEFD5" },
528    { "peachpuff",            "#FFDAB9" },
529    { "peru",                 "#CD853F" },
530    { "pink",                 "#FFC0CB" },
531    { "plum",                 "#DDA0DD" },
532    { "powderblue",           "#B0E0E6" },
533    { "purple",               "#800080" },
534    { "red",                  "#FF0000" },
535    { "rosybrown",            "#BC8F8F" },
536    { "royalblue",            "#4169E1" },
537    { "saddlebrown",          "#8B4513" },
538    { "salmon",               "#FA8072" },
539    { "sandybrown",           "#F4A460" },
540    { "seagreen",             "#2E8B57" },
541    { "seashell",             "#FFF5EE" },
542    { "sienna",               "#A0522D" },
543    { "silver",               "#C0C0C0" },
544    { "skyblue",              "#87CEEB" },
545    { "slateblue",            "#6A5ACD" },
546    { "slategray",            "#708090" },
547    { "snow",                 "#FFFAFA" },
548    { "springgreen",          "#00FF7F" },
549    { "steelblue",            "#4682B4" },
550    { "tan",                  "#D2B48C" },
551    { "teal",                 "#008080" },
552    { "thistle",              "#D8BFD8" },
553    { "tomato",               "#FF6347" },
554    { "turquoise",            "#40E0D0" },
555    { "violet",               "#EE82EE" },
556    { "wheat",                "#F5DEB3" },
557    { "white",                "#FFFFFF" },
558    { "whitesmoke",           "#F5F5F5" },
559    { "yellow",               "#FFFF00" },
560    { "yellowgreen",          "#9ACD32" },
561    { NULL,                   NULL      }
562};
563#endif
564
565#if ATTRIBUTE_HASH_LOOKUP
566static uint hash(ctmbstr s)
567{
568    uint hashval;
569
570    for (hashval = 0; *s != '\0'; s++)
571        hashval = *s + 31*hashval;
572
573    return hashval % ATTRIBUTE_HASH_SIZE;
574}
575
576static const Attribute *install(TidyAttribImpl * attribs, const Attribute* old)
577{
578    AttrHash *np;
579    uint hashval;
580
581    if (old)
582    {
583        np = (AttrHash *)MemAlloc(sizeof(*np));
584        np->attr = old;
585
586        hashval = hash(old->name);
587        np->next = attribs->hashtab[hashval];
588        attribs->hashtab[hashval] = np;
589    }
590
591    return old;
592}
593
594static void removeFromHash( TidyAttribImpl * attribs, ctmbstr s )
595{
596    uint h = hash(s);
597    AttrHash *p, *prev = NULL;
598    for (p = attribs->hashtab[h]; p && p->attr; p = p->next)
599    {
600        if (TY_(tmbstrcmp)(s, p->attr->name) == 0)
601        {
602            AttrHash* next = p->next;
603            if ( prev )
604                prev->next = next;
605            else
606                attribs->hashtab[h] = next;
607            MemFree(p);
608            return;
609        }
610        prev = p;
611    }
612}
613
614static void emptyHash( TidyAttribImpl * attribs )
615{
616    AttrHash *dict, *next;
617    uint i;
618
619    for (i = 0; i < ATTRIBUTE_HASH_SIZE; ++i)
620    {
621        dict = attribs->hashtab[i];
622
623        while(dict)
624        {
625            next = dict->next;
626            MemFree(dict);
627            dict = next;
628        }
629
630        attribs->hashtab[i] = NULL;
631    }
632}
633#endif
634
635static const Attribute* lookup(TidyDocImpl* ARG_UNUSED(doc),
636                               TidyAttribImpl* ARG_UNUSED(attribs),
637                               ctmbstr atnam)
638{
639#ifdef TIDY_APPLE_CHANGES
640    static Attribute unknownEventHandler;
641#endif
642
643    const Attribute *np;
644#if ATTRIBUTE_HASH_LOOKUP
645    const AttrHash *p;
646#endif
647
648    if (!atnam)
649        return NULL;
650
651#if ATTRIBUTE_HASH_LOOKUP
652    for (p = attribs->hashtab[hash(atnam)]; p && p->attr; p = p->next)
653        if (TY_(tmbstrcmp)(atnam, p->attr->name) == 0)
654            return p->attr;
655
656    for (np = attribute_defs; np && np->name; ++np)
657        if (TY_(tmbstrcmp)(atnam, np->name) == 0)
658            return install(attribs, np);
659#else
660    for (np = attribute_defs; np && np->name; ++np)
661        if (TY_(tmbstrcmp)(atnam, np->name) == 0)
662            return np;
663#endif
664
665#ifdef TIDY_APPLE_CHANGES
666    if (!unknownEventHandler.name) {
667        unknownEventHandler.name = "onunknowneventhandler";
668        unknownEventHandler.versions = VERS_ALL;
669        unknownEventHandler.attrchk = CH_SCRIPT;
670    }
671
672    /* When sanitizing against XSS problems we strip all onfoo-style event handlers to prevent potential
673       security problems caused by event handlers that we aren't explicitly aware of, such as was the case
674       with <rdar://problem/6507826>. */
675    if (cfgBool(doc, TidySanitizeAgainstXSS)) {
676        if (TY_(tmbstrncasecmp)(atnam, "on", 2) == 0)
677            return &unknownEventHandler;
678    }
679#endif
680
681    return NULL;
682}
683
684
685/* Locate attributes by type */
686AttVal* TY_(AttrGetById)( Node* node, TidyAttrId id )
687{
688   AttVal* av;
689   for ( av = node->attributes; av; av = av->next )
690   {
691     if ( AttrIsId(av, id) )
692         return av;
693   }
694   return NULL;
695}
696
697/* public method for finding attribute definition by name */
698const Attribute* TY_(FindAttribute)( TidyDocImpl* doc, AttVal *attval )
699{
700    if ( attval )
701       return lookup( doc, &doc->attribs, attval->attribute );
702    return NULL;
703}
704
705AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name )
706{
707    AttVal *attr;
708    for (attr = node->attributes; attr != NULL; attr = attr->next)
709    {
710        if (attr->attribute && TY_(tmbstrcmp)(attr->attribute, name) == 0)
711            break;
712    }
713    return attr;
714}
715
716AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
717                           Node *node, ctmbstr name, ctmbstr value )
718{
719    AttVal *av = TY_(NewAttribute)();
720    av->delim = '"';
721    av->attribute = TY_(tmbstrdup)(name);
722
723    if (value)
724        av->value = TY_(tmbstrdup)(value);
725    else
726        av->value = NULL;
727
728    av->dict = lookup(doc, &doc->attribs, name);
729
730    TY_(InsertAttributeAtEnd)(node, av);
731    return av;
732}
733
734AttVal* TY_(RepairAttrValue)(TidyDocImpl* doc, Node* node, ctmbstr name, ctmbstr value)
735{
736    AttVal* old = TY_(GetAttrByName)(node, name);
737
738    if (old)
739    {
740        if (old->value)
741            MemFree(old->value);
742        if (value)
743            old->value = TY_(tmbstrdup)(value);
744        else
745            old->value = NULL;
746
747        return old;
748    }
749    else
750        return TY_(AddAttribute)(doc, node, name, value);
751}
752
753static Bool CheckAttrType( TidyDocImpl* doc,
754                           ctmbstr attrname, AttrCheck type )
755{
756    const Attribute* np = lookup( doc, &doc->attribs, attrname );
757    return (Bool)( np && np->attrchk == type );
758}
759
760Bool TY_(IsUrl)( TidyDocImpl* doc, ctmbstr attrname )
761{
762    return CheckAttrType( doc, attrname, CH_URL );
763}
764
765/*
766Bool IsBool( TidyDocImpl* doc, ctmbstr attrname )
767{
768    return CheckAttrType( doc, attrname, CH_BOOL );
769}
770*/
771
772Bool TY_(IsScript)( TidyDocImpl* doc, ctmbstr attrname )
773{
774    return CheckAttrType( doc, attrname, CH_SCRIPT );
775}
776
777/* may id or name serve as anchor? */
778Bool TY_(IsAnchorElement)( TidyDocImpl* ARG_UNUSED(doc), Node* node)
779{
780    TidyTagId tid = TagId( node );
781    if ( tid == TidyTag_A      ||
782         tid == TidyTag_APPLET ||
783         tid == TidyTag_FORM   ||
784         tid == TidyTag_FRAME  ||
785         tid == TidyTag_IFRAME ||
786         tid == TidyTag_IMG    ||
787         tid == TidyTag_MAP )
788        return yes;
789
790    return no;
791}
792
793/*
794  In CSS1, selectors can contain only the characters A-Z, 0-9,
795  and Unicode characters 161-255, plus dash (-); they cannot start
796  with a dash or a digit; they can also contain escaped characters
797  and any Unicode character as a numeric code (see next item).
798
799  The backslash followed by at most four hexadecimal digits
800  (0..9A..F) stands for the Unicode character with that number.
801
802  Any character except a hexadecimal digit can be escaped to remove
803  its special meaning, by putting a backslash in front.
804
805  #508936 - CSS class naming for -clean option
806*/
807Bool TY_(IsCSS1Selector)( ctmbstr buf )
808{
809    Bool valid = yes;
810    int esclen = 0;
811    byte c;
812    int pos;
813
814    for ( pos=0; valid && (c = *buf++); ++pos )
815    {
816        if ( c == '\\' )
817        {
818            esclen = 1;  /* ab\555\444 is 4 chars {'a', 'b', \555, \444} */
819        }
820        else if ( isdigit( c ) )
821        {
822            /* Digit not 1st, unless escaped (Max length "\112F") */
823            if ( esclen > 0 )
824                valid = ( ++esclen < 6 );
825            if ( valid )
826                valid = ( pos>0 || esclen>0 );
827        }
828        else
829        {
830            valid = (
831                esclen > 0                       /* Escaped? Anything goes. */
832                || ( pos>0 && c == '-' )         /* Dash cannot be 1st char */
833                || isalpha(c)                    /* a-z, A-Z anywhere */
834                || ( c >= 161 )                  /* Unicode 161-255 anywhere */
835            );
836            esclen = 0;
837        }
838    }
839    return valid;
840}
841
842/* free single anchor */
843static void FreeAnchor(Anchor *a)
844{
845    if ( a )
846        MemFree( a->name );
847    MemFree( a );
848}
849
850/* removes anchor for specific node */
851void TY_(RemoveAnchorByNode)( TidyDocImpl* doc, Node *node )
852{
853    TidyAttribImpl* attribs = &doc->attribs;
854    Anchor *delme = NULL, *curr, *prev = NULL;
855
856    for ( curr=attribs->anchor_list; curr!=NULL; curr=curr->next )
857    {
858        if ( curr->node == node )
859        {
860            if ( prev )
861                prev->next = curr->next;
862            else
863                attribs->anchor_list = curr->next;
864            delme = curr;
865            break;
866        }
867        prev = curr;
868    }
869    FreeAnchor( delme );
870}
871
872/* initialize new anchor */
873static Anchor* NewAnchor( ctmbstr name, Node* node )
874{
875    Anchor *a = (Anchor*) MemAlloc( sizeof(Anchor) );
876
877    a->name = TY_(tmbstrdup)( name );
878    a->name = TY_(tmbstrtolower)(a->name);
879    a->node = node;
880    a->next = NULL;
881
882    return a;
883}
884
885/* add new anchor to namespace */
886static Anchor* AddAnchor( TidyDocImpl* doc, ctmbstr name, Node *node )
887{
888    TidyAttribImpl* attribs = &doc->attribs;
889    Anchor *a = NewAnchor( name, node );
890
891    if ( attribs->anchor_list == NULL)
892         attribs->anchor_list = a;
893    else
894    {
895        Anchor *here =  attribs->anchor_list;
896        while (here->next)
897            here = here->next;
898        here->next = a;
899    }
900
901    return attribs->anchor_list;
902}
903
904/* return node associated with anchor */
905static Node* GetNodeByAnchor( TidyDocImpl* doc, ctmbstr name )
906{
907    TidyAttribImpl* attribs = &doc->attribs;
908    Anchor *found;
909    tmbstr lname = TY_(tmbstrdup)(name);
910    lname = TY_(tmbstrtolower)(lname);
911
912    for ( found = attribs->anchor_list; found != NULL; found = found->next )
913    {
914        if ( TY_(tmbstrcmp)(found->name, lname) == 0 )
915            break;
916    }
917
918    MemFree(lname);
919    if ( found )
920        return found->node;
921    return NULL;
922}
923
924/* free all anchors */
925void TY_(FreeAnchors)( TidyDocImpl* doc )
926{
927    TidyAttribImpl* attribs = &doc->attribs;
928    Anchor* a;
929    while (NULL != (a = attribs->anchor_list) )
930    {
931        attribs->anchor_list = a->next;
932        FreeAnchor(a);
933    }
934}
935
936/* public method for inititializing attribute dictionary */
937void TY_(InitAttrs)( TidyDocImpl* doc )
938{
939    ClearMemory( &doc->attribs, sizeof(TidyAttribImpl) );
940#ifdef _DEBUG
941    {
942      /* Attribute ID is index position in Attribute type lookup table */
943      uint ix;
944      for ( ix=0; ix < N_TIDY_ATTRIBS; ++ix )
945      {
946        const Attribute* dict = &attribute_defs[ ix ];
947        assert( (uint) dict->id == ix );
948      }
949    }
950#endif
951}
952
953/* free all declared attributes */
954static void FreeDeclaredAttributes( TidyDocImpl* doc )
955{
956    TidyAttribImpl* attribs = &doc->attribs;
957    Attribute* dict;
958    while ( NULL != (dict = attribs->declared_attr_list) )
959    {
960        attribs->declared_attr_list = dict->next;
961#if ATTRIBUTE_HASH_LOOKUP
962        removeFromHash( &doc->attribs, dict->name );
963#endif
964        MemFree( dict->name );
965        MemFree( dict );
966    }
967}
968
969void TY_(FreeAttrTable)( TidyDocImpl* doc )
970{
971#if ATTRIBUTE_HASH_LOOKUP
972    emptyHash( &doc->attribs );
973#endif
974    TY_(FreeAnchors)( doc );
975    FreeDeclaredAttributes( doc );
976}
977
978void TY_(AppendToClassAttr)( AttVal *classattr, ctmbstr classname )
979{
980    uint len = TY_(tmbstrlen)(classattr->value) +
981        TY_(tmbstrlen)(classname) + 2;
982    tmbstr s = (tmbstr) MemAlloc( len );
983    s[0] = '\0';
984    if (classattr->value)
985    {
986        TY_(tmbstrcpy)( s, classattr->value );
987        TY_(tmbstrcat)( s, " " );
988    }
989    TY_(tmbstrcat)( s, classname );
990    if (classattr->value)
991        MemFree( classattr->value );
992    classattr->value = s;
993}
994
995/* concatenate styles */
996static void AppendToStyleAttr( AttVal *styleattr, ctmbstr styleprop )
997{
998    /*
999    this doesn't handle CSS comments and
1000    leading/trailing white-space very well
1001    see http://www.w3.org/TR/css-style-attr
1002    */
1003    uint end = TY_(tmbstrlen)(styleattr->value);
1004
1005    if (end >0 && styleattr->value[end - 1] == ';')
1006    {
1007        /* attribute ends with declaration seperator */
1008
1009        styleattr->value = (tmbstr) MemRealloc(styleattr->value,
1010            end + TY_(tmbstrlen)(styleprop) + 2);
1011
1012        TY_(tmbstrcat)(styleattr->value, " ");
1013        TY_(tmbstrcat)(styleattr->value, styleprop);
1014    }
1015    else if (end >0 && styleattr->value[end - 1] == '}')
1016    {
1017        /* attribute ends with rule set */
1018
1019        styleattr->value = (tmbstr) MemRealloc(styleattr->value,
1020            end + TY_(tmbstrlen)(styleprop) + 6);
1021
1022        TY_(tmbstrcat)(styleattr->value, " { ");
1023        TY_(tmbstrcat)(styleattr->value, styleprop);
1024        TY_(tmbstrcat)(styleattr->value, " }");
1025    }
1026    else
1027    {
1028        /* attribute ends with property value */
1029
1030        styleattr->value = (tmbstr) MemRealloc(styleattr->value,
1031            end + TY_(tmbstrlen)(styleprop) + 3);
1032
1033        if (end > 0)
1034            TY_(tmbstrcat)(styleattr->value, "; ");
1035        TY_(tmbstrcat)(styleattr->value, styleprop);
1036    }
1037}
1038
1039/*
1040 the same attribute name can't be used
1041 more than once in each element
1042*/
1043void TY_(RepairDuplicateAttributes)( TidyDocImpl* doc, Node *node)
1044{
1045    AttVal *first;
1046
1047    for (first = node->attributes; first != NULL;)
1048    {
1049        AttVal *second;
1050        Bool firstRedefined = no;
1051
1052        if (!(first->asp == NULL && first->php == NULL))
1053        {
1054            first = first->next;
1055            continue;
1056        }
1057
1058        for (second = first->next; second != NULL;)
1059        {
1060            AttVal *temp;
1061
1062            if (!(second->asp == NULL && second->php == NULL &&
1063                AttrsHaveSameId(first, second)))
1064            {
1065                second = second->next;
1066                continue;
1067            }
1068
1069            /* first and second attribute have same local name */
1070            /* now determine what to do with this duplicate... */
1071
1072            if (attrIsCLASS(first) && cfgBool(doc, TidyJoinClasses)
1073                && AttrHasValue(first) && AttrHasValue(second))
1074            {
1075                /* concatenate classes */
1076
1077                TY_(AppendToClassAttr)(first, second->value);
1078
1079                temp = second->next;
1080                TY_(ReportAttrError)( doc, node, second, JOINING_ATTRIBUTE);
1081                TY_(RemoveAttribute)( doc, node, second );
1082                second = temp;
1083            }
1084            else if (attrIsSTYLE(first) && cfgBool(doc, TidyJoinStyles)
1085                     && AttrHasValue(first) && AttrHasValue(second))
1086            {
1087                AppendToStyleAttr( first, second->value );
1088
1089                temp = second->next;
1090                TY_(ReportAttrError)( doc, node, second, JOINING_ATTRIBUTE);
1091                TY_(RemoveAttribute)( doc, node, second );
1092                second = temp;
1093            }
1094            else if ( cfg(doc, TidyDuplicateAttrs) == TidyKeepLast )
1095            {
1096                temp = first->next;
1097                TY_(ReportAttrError)( doc, node, first, REPEATED_ATTRIBUTE);
1098                TY_(RemoveAttribute)( doc, node, first );
1099                firstRedefined = yes;
1100                first = temp;
1101                second = second->next;
1102            }
1103            else /* TidyDuplicateAttrs == TidyKeepFirst */
1104            {
1105                temp = second->next;
1106                TY_(ReportAttrError)( doc, node, second, REPEATED_ATTRIBUTE);
1107                TY_(RemoveAttribute)( doc, node, second );
1108                second = temp;
1109            }
1110        }
1111        if (!firstRedefined)
1112            first = first->next;
1113    }
1114}
1115
1116/* ignore unknown attributes for proprietary elements */
1117const Attribute* TY_(CheckAttribute)( TidyDocImpl* doc, Node *node, AttVal *attval )
1118{
1119    const Attribute* attribute = attval->dict;
1120
1121    if ( attribute != NULL )
1122    {
1123        if (attribute->versions & VERS_XML)
1124        {
1125            doc->lexer->isvoyager = yes;
1126            if (!cfgBool(doc, TidyHtmlOut))
1127            {
1128                TY_(SetOptionBool)(doc, TidyXhtmlOut, yes);
1129                TY_(SetOptionBool)(doc, TidyXmlOut, yes);
1130            }
1131        }
1132
1133        TY_(ConstrainVersion)(doc, AttributeVersions(node, attval));
1134
1135        if (attribute->attrchk)
1136            attribute->attrchk( doc, node, attval );
1137    }
1138
1139#ifdef TIDY_APPLE_CHANGES
1140    if (AttributeIsMarkedForRemoval(attval))
1141    {
1142        TY_(RemoveAttribute)( doc, node, attval );
1143    }
1144    else
1145#endif
1146
1147    if (AttributeIsProprietary(node, attval))
1148    {
1149        TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
1150
1151        if (cfgBool(doc, TidyDropPropAttrs))
1152            TY_(RemoveAttribute)( doc, node, attval );
1153    }
1154
1155    return attribute;
1156}
1157
1158Bool TY_(IsBoolAttribute)(AttVal *attval)
1159{
1160    const Attribute *attribute = ( attval ? attval->dict : NULL );
1161    if ( attribute && attribute->attrchk == CH_BOOL )
1162        return yes;
1163    return no;
1164}
1165
1166Bool TY_(attrIsEvent)( AttVal* attval )
1167{
1168    TidyAttrId atid = AttrId( attval );
1169
1170    return (atid == TidyAttr_OnAFTERUPDATE     ||
1171            atid == TidyAttr_OnBEFOREUNLOAD    ||
1172            atid == TidyAttr_OnBEFOREUPDATE    ||
1173            atid == TidyAttr_OnBLUR            ||
1174            atid == TidyAttr_OnCHANGE          ||
1175            atid == TidyAttr_OnCLICK           ||
1176            atid == TidyAttr_OnDATAAVAILABLE   ||
1177            atid == TidyAttr_OnDATASETCHANGED  ||
1178            atid == TidyAttr_OnDATASETCOMPLETE ||
1179            atid == TidyAttr_OnDBLCLICK        ||
1180            atid == TidyAttr_OnERRORUPDATE     ||
1181            atid == TidyAttr_OnFOCUS           ||
1182            atid == TidyAttr_OnKEYDOWN         ||
1183            atid == TidyAttr_OnKEYPRESS        ||
1184            atid == TidyAttr_OnKEYUP           ||
1185            atid == TidyAttr_OnLOAD            ||
1186            atid == TidyAttr_OnMOUSEDOWN       ||
1187            atid == TidyAttr_OnMOUSEMOVE       ||
1188            atid == TidyAttr_OnMOUSEOUT        ||
1189            atid == TidyAttr_OnMOUSEOVER       ||
1190            atid == TidyAttr_OnMOUSEUP         ||
1191            atid == TidyAttr_OnRESET           ||
1192            atid == TidyAttr_OnROWENTER        ||
1193            atid == TidyAttr_OnROWEXIT         ||
1194            atid == TidyAttr_OnSELECT          ||
1195            atid == TidyAttr_OnSUBMIT          ||
1196            atid == TidyAttr_OnUNLOAD);
1197}
1198
1199static void CheckLowerCaseAttrValue( TidyDocImpl* doc, Node *node, AttVal *attval)
1200{
1201    tmbstr p;
1202    Bool hasUpper = no;
1203
1204    if (!AttrHasValue(attval))
1205        return;
1206
1207    p = attval->value;
1208
1209    while (*p)
1210    {
1211        if (TY_(IsUpper)(*p)) /* #501230 - fix by Terry Teague - 09 Jan 02 */
1212        {
1213            hasUpper = yes;
1214            break;
1215        }
1216        p++;
1217    }
1218
1219    if (hasUpper)
1220    {
1221        Lexer* lexer = doc->lexer;
1222        if (lexer->isvoyager)
1223            TY_(ReportAttrError)( doc, node, attval, ATTR_VALUE_NOT_LCASE);
1224
1225        if ( lexer->isvoyager || cfgBool(doc, TidyLowerLiterals) )
1226            attval->value = TY_(tmbstrtolower)(attval->value);
1227    }
1228}
1229
1230/* methods for checking value of a specific attribute */
1231
1232void TY_(CheckUrl)( TidyDocImpl* doc, Node *node, AttVal *attval)
1233{
1234    tmbchar c;
1235    tmbstr dest, p;
1236    uint escape_count = 0, backslash_count = 0;
1237    uint i, pos = 0;
1238    uint len;
1239
1240/* Apple Changes:
1241   2007-02-18 iccir Rewrote support for absoluting relative URLs
1242*/
1243#ifdef TIDY_APPLE_CHANGES
1244    Bool ends_with_slash, starts_with_slash, already_absolute = no;
1245    ctmbstr base_uri;
1246    uint base_uri_len;
1247#endif
1248
1249    if (!AttrHasValue(attval))
1250    {
1251        TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1252        return;
1253    }
1254
1255    p = attval->value;
1256
1257#ifdef TIDY_APPLE_CHANGES
1258    starts_with_slash = (p[0] == '/');
1259    base_uri = cfgStr(doc, starts_with_slash ? TidyAbsolutePathBaseUri : TidyRelativePathBaseUri);
1260
1261    if (base_uri && base_uri[0])
1262    {
1263        for (i = 0; 0 != (c = p[i]); ++i)
1264        {
1265            if (c == ':')
1266            {
1267                already_absolute = yes;
1268                break;
1269            }
1270            else if (c == '/')
1271            {
1272                break;
1273            }
1274        }
1275
1276        if (!already_absolute)
1277        {
1278            base_uri_len = tmbstrlen(base_uri);
1279            len = tmbstrlen(p) + base_uri_len + 2;
1280            dest = (tmbstr) MemAlloc(len);
1281
1282            /*
1283                If the current value started with a slash or our base uri ends with a slash,
1284                the format can be %s%s.  Else, we need to insert a slash in between.
1285            */
1286            ends_with_slash = (base_uri[base_uri_len - 1] == '/');
1287
1288            if (starts_with_slash && ends_with_slash)
1289            {
1290                sprintf(dest, "%s%s",  base_uri, p+1);
1291            }
1292            else if (starts_with_slash || ends_with_slash)
1293            {
1294                sprintf(dest, "%s%s",  base_uri, p);
1295            }
1296            else
1297            {
1298                sprintf(dest, "%s/%s", base_uri, p);
1299            }
1300
1301            MemFree(attval->value);
1302            attval->value = dest;
1303            p = dest;
1304        }
1305    }
1306#endif
1307
1308
1309/* Apple Changes:
1310   2007-02-01 iccir If TidySanitizeAgainstXSS is set, remove any URL attribute which contains embedded scripts
1311*/
1312#ifdef TIDY_APPLE_CHANGES
1313    if (cfgBool(doc, TidySanitizeAgainstXSS))
1314    {
1315        c = p[0];
1316
1317        /* Check first character as an optimization. */
1318        if (c != 'h' && c != 'H')
1319        {
1320            if (tmbstrncasecmp(p, "javascript:", 11) == 0 ||
1321                tmbstrncasecmp(p, "script:",     7)  == 0 ||
1322                tmbstrncasecmp(p, "vbscript:",   9)  == 0 ||
1323                tmbstrncasecmp(p, "file:",       5)  == 0)
1324            {
1325                MarkAttributeForRemoval( attval );
1326                return;
1327            }
1328        }
1329    }
1330#endif
1331
1332    for (i = 0; '\0' != (c = p[i]); ++i)
1333    {
1334        if (c == '\\')
1335        {
1336            ++backslash_count;
1337            if ( cfgBool(doc, TidyFixBackslash) )
1338                p[i] = '/';
1339        }
1340        else if ((c > 0x7e) || (c <= 0x20) || (strchr("<>", c)))
1341            ++escape_count;
1342    }
1343
1344    if ( cfgBool(doc, TidyFixUri) && escape_count )
1345    {
1346        len = TY_(tmbstrlen)(p) + escape_count * 2 + 1;
1347        dest = (tmbstr) MemAlloc(len);
1348
1349        for (i = 0; 0 != (c = p[i]); ++i)
1350        {
1351            if ((c > 0x7e) || (c <= 0x20) || (strchr("<>", c)))
1352                pos += sprintf( dest + pos, "%%%02X", (byte)c );
1353            else
1354                dest[pos++] = c;
1355        }
1356        dest[pos] = 0;
1357
1358        MemFree(attval->value);
1359        attval->value = dest;
1360    }
1361    if ( backslash_count )
1362    {
1363        if ( cfgBool(doc, TidyFixBackslash) )
1364            TY_(ReportAttrError)( doc, node, attval, FIXED_BACKSLASH );
1365        else
1366            TY_(ReportAttrError)( doc, node, attval, BACKSLASH_IN_URI );
1367    }
1368    if ( escape_count )
1369    {
1370        if ( cfgBool(doc, TidyFixUri) )
1371            TY_(ReportAttrError)( doc, node, attval, ESCAPED_ILLEGAL_URI);
1372        else
1373            TY_(ReportAttrError)( doc, node, attval, ILLEGAL_URI_REFERENCE);
1374
1375        doc->badChars |= BC_INVALID_URI;
1376    }
1377}
1378
1379/* RFC 2396, section 4.2 states:
1380     "[...] in the case of HTML's FORM element, [...] an
1381     empty URI reference represents the base URI of the
1382     current document and should be replaced by that URI
1383     when transformed into a request."
1384*/
1385void CheckAction( TidyDocImpl* doc, Node *node, AttVal *attval)
1386{
1387    if (AttrHasValue(attval))
1388        TY_(CheckUrl)( doc, node, attval );
1389}
1390
1391/* Apple Changes:
1392   2007-01-31 iccir If TidySanitizeAgainstXSS is set, remove all on* (onBlur, onClick, etc) attributes
1393*/
1394#ifdef TIDY_APPLE_CHANGES
1395void CheckScript( TidyDocImpl* doc, Node *node, AttVal *attval )
1396{
1397    if ( cfgBool(doc, TidySanitizeAgainstXSS) )
1398        MarkAttributeForRemoval( attval );
1399}
1400#else
1401void CheckScript( TidyDocImpl* ARG_UNUSED(doc), Node* ARG_UNUSED(node),
1402                  AttVal* ARG_UNUSED(attval))
1403{
1404}
1405#endif
1406
1407Bool TY_(IsValidHTMLID)(ctmbstr id)
1408{
1409    ctmbstr s = id;
1410
1411    if (!s)
1412        return no;
1413
1414    if (!TY_(IsLetter)(*s++))
1415        return no;
1416
1417    while (*s)
1418        if (!TY_(IsNamechar)(*s++))
1419            return no;
1420
1421    return yes;
1422
1423}
1424
1425Bool TY_(IsValidXMLID)(ctmbstr id)
1426{
1427    ctmbstr s = id;
1428    tchar c;
1429
1430    if (!s)
1431        return no;
1432
1433    c = *s++;
1434    if (c > 0x7F)
1435        s += TY_(GetUTF8)(s, &c);
1436
1437    if (!(TY_(IsXMLLetter)(c) || c == '_' || c == ':'))
1438        return no;
1439
1440    while (*s)
1441    {
1442        c = (unsigned char)*s;
1443
1444        if (c > 0x7F)
1445            s += TY_(GetUTF8)(s, &c);
1446
1447        ++s;
1448
1449        if (!TY_(IsXMLNamechar)(c))
1450            return no;
1451    }
1452
1453    return yes;
1454}
1455
1456static Bool IsValidNMTOKEN(ctmbstr name)
1457{
1458    ctmbstr s = name;
1459    tchar c;
1460
1461    if (!s)
1462        return no;
1463
1464    while (*s)
1465    {
1466        c = (unsigned char)*s;
1467
1468        if (c > 0x7F)
1469            s += TY_(GetUTF8)(s, &c);
1470
1471        ++s;
1472
1473        if (!TY_(IsXMLNamechar)(c))
1474            return no;
1475    }
1476
1477    return yes;
1478}
1479
1480static Bool AttrValueIsAmong(AttVal *attval, ctmbstr const list[])
1481{
1482    const ctmbstr *v;
1483    for (v = list; *v; ++v)
1484        if (AttrValueIs(attval, *v))
1485            return yes;
1486    return no;
1487}
1488
1489static void CheckAttrValidity( TidyDocImpl* doc, Node *node, AttVal *attval,
1490                               ctmbstr const list[])
1491{
1492    if (!AttrHasValue(attval))
1493    {
1494        TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1495        return;
1496    }
1497
1498    CheckLowerCaseAttrValue( doc, node, attval );
1499
1500    if (!AttrValueIsAmong(attval, list))
1501        TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1502}
1503
1504void CheckName( TidyDocImpl* doc, Node *node, AttVal *attval)
1505{
1506    Node *old;
1507
1508    if (!AttrHasValue(attval))
1509    {
1510        TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1511        return;
1512    }
1513
1514    if ( TY_(IsAnchorElement)(doc, node) )
1515    {
1516        if (cfgBool(doc, TidyXmlOut) && !IsValidNMTOKEN(attval->value))
1517            TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1518
1519        if ((old = GetNodeByAnchor(doc, attval->value)) &&  old != node)
1520        {
1521            TY_(ReportAttrError)( doc, node, attval, ANCHOR_NOT_UNIQUE);
1522        }
1523        else
1524            AddAnchor( doc, attval->value, node );
1525    }
1526}
1527
1528/* Apple Changes:
1529   2007-01-30 iccir Add support for dropping 'class' attributes with a certain prefix
1530   2007-02-02 iccir When a style attribute is encountered, remove it if TidySanitizeAgainstXSS is set
1531*/
1532#ifdef TIDY_APPLE_CHANGES
1533void CheckClass( TidyDocImpl* doc, Node *node, AttVal *attval)
1534{
1535    ctmbstr prefix = cfgStr(doc, TidyDropClassesWithPrefix);
1536
1537    if (prefix && attval->value)
1538    {
1539        tmbstr value = attval->value;
1540        uint len = tmbstrlen(prefix);
1541
1542        if (tmbstrlen(value) >= len && tmbstrncasecmp(prefix, value, len) == 0)
1543        {
1544            MarkAttributeForRemoval( attval );
1545        }
1546    }
1547}
1548
1549void CheckStyleAttr( TidyDocImpl* doc, Node *node, AttVal *attval)
1550{
1551    if ( cfgBool(doc, TidySanitizeAgainstXSS) )
1552        MarkAttributeForRemoval( attval );
1553}
1554#endif
1555
1556void CheckId( TidyDocImpl* doc, Node *node, AttVal *attval )
1557{
1558    Lexer* lexer = doc->lexer;
1559    Node *old;
1560
1561/* Apple Changes:
1562   2007-01-30 iccir Add support for dropping 'id' attributes with a certain prefix
1563*/
1564#ifdef TIDY_APPLE_CHANGES
1565    ctmbstr prefix = cfgStr(doc, TidyDropIdsWithPrefix);
1566
1567    if (prefix && attval->value)
1568    {
1569        tmbstr value = attval->value;
1570        uint len = tmbstrlen(prefix);
1571
1572        if (tmbstrlen(value) >= len && tmbstrncasecmp(prefix, value, len) == 0)
1573        {
1574            MarkAttributeForRemoval( attval );
1575            return;
1576        }
1577    }
1578#endif
1579
1580    if (!AttrHasValue(attval))
1581    {
1582        TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1583        return;
1584    }
1585
1586    if (!TY_(IsValidHTMLID)(attval->value))
1587    {
1588        if (lexer->isvoyager && TY_(IsValidXMLID)(attval->value))
1589            TY_(ReportAttrError)( doc, node, attval, XML_ID_SYNTAX);
1590        else
1591            TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1592    }
1593
1594    if ((old = GetNodeByAnchor(doc, attval->value)) &&  old != node)
1595    {
1596        TY_(ReportAttrError)( doc, node, attval, ANCHOR_NOT_UNIQUE);
1597    }
1598    else
1599        AddAnchor( doc, attval->value, node );
1600}
1601
1602void CheckBool( TidyDocImpl* doc, Node *node, AttVal *attval)
1603{
1604    if (!AttrHasValue(attval))
1605        return;
1606
1607    CheckLowerCaseAttrValue( doc, node, attval );
1608}
1609
1610void CheckAlign( TidyDocImpl* doc, Node *node, AttVal *attval)
1611{
1612    ctmbstr const values[] = {"left", "right", "center", "justify", NULL};
1613
1614    /* IMG, OBJECT, APPLET and EMBED use align for vertical position */
1615    if (node->tag && (node->tag->model & CM_IMG))
1616    {
1617        CheckValign( doc, node, attval );
1618        return;
1619    }
1620
1621    if (!AttrHasValue(attval))
1622    {
1623        TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1624        return;
1625    }
1626
1627    CheckLowerCaseAttrValue( doc, node, attval);
1628
1629    /* currently CheckCaption(...) takes care of the remaining cases */
1630    if (nodeIsCAPTION(node))
1631        return;
1632
1633    if (!AttrValueIsAmong(attval, values))
1634    {
1635        /* align="char" is allowed for elements with CM_TABLE|CM_ROW
1636           except CAPTION which is excluded above, */
1637        if( !(AttrValueIs(attval, "char")
1638              && TY_(nodeHasCM)(node, CM_TABLE|CM_ROW)) )
1639             TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1640    }
1641}
1642
1643void CheckValign( TidyDocImpl* doc, Node *node, AttVal *attval)
1644{
1645    ctmbstr const values[] = {"top", "middle", "bottom", "baseline", NULL};
1646    ctmbstr const values2[] = {"left", "right", NULL};
1647    ctmbstr const valuesp[] = {"texttop", "absmiddle", "absbottom",
1648                               "textbottom", NULL};
1649
1650    if (!AttrHasValue(attval))
1651    {
1652        TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1653        return;
1654    }
1655
1656    CheckLowerCaseAttrValue( doc, node, attval );
1657
1658    if (AttrValueIsAmong(attval, values))
1659    {
1660            /* all is fine */
1661    }
1662    else if (AttrValueIsAmong(attval, values2))
1663    {
1664        if (!(node->tag && (node->tag->model & CM_IMG)))
1665            TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1666    }
1667    else if (AttrValueIsAmong(attval, valuesp))
1668    {
1669        TY_(ConstrainVersion)( doc, VERS_PROPRIETARY );
1670        TY_(ReportAttrError)( doc, node, attval, PROPRIETARY_ATTR_VALUE);
1671    }
1672    else
1673        TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1674}
1675
1676void CheckLength( TidyDocImpl* doc, Node *node, AttVal *attval)
1677{
1678    tmbstr p;
1679
1680    if (!AttrHasValue(attval))
1681    {
1682        TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1683        return;
1684    }
1685
1686    /* don't check for <col width=...> and <colgroup width=...> */
1687    if (attrIsWIDTH(attval) && (nodeIsCOL(node) || nodeIsCOLGROUP(node)))
1688        return;
1689
1690    p = attval->value;
1691
1692    if (!TY_(IsDigit)(*p++))
1693    {
1694        TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1695    }
1696    else
1697    {
1698        while (*p)
1699        {
1700            if (!TY_(IsDigit)(*p) && *p != '%')
1701            {
1702                TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1703                break;
1704            }
1705            ++p;
1706        }
1707    }
1708}
1709
1710void CheckTarget( TidyDocImpl* doc, Node *node, AttVal *attval)
1711{
1712    ctmbstr const values[] = {"_blank", "_self", "_parent", "_top", NULL};
1713
1714    if (!AttrHasValue(attval))
1715    {
1716        TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1717        return;
1718    }
1719
1720    /* target names must begin with A-Za-z ... */
1721    if (TY_(IsLetter)(attval->value[0]))
1722        return;
1723
1724    /* or be one of the allowed list */
1725    if (!AttrValueIsAmong(attval, values))
1726        TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1727}
1728
1729void CheckFsubmit( TidyDocImpl* doc, Node *node, AttVal *attval)
1730{
1731    ctmbstr const values[] = {"get", "post", NULL};
1732    CheckAttrValidity( doc, node, attval, values );
1733}
1734
1735void CheckClear( TidyDocImpl* doc, Node *node, AttVal *attval)
1736{
1737    ctmbstr const values[] = {"none", "left", "right", "all", NULL};
1738
1739    if (!AttrHasValue(attval))
1740    {
1741        TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1742        if (attval->value == NULL)
1743            attval->value = TY_(tmbstrdup)( "none" );
1744        return;
1745    }
1746
1747    CheckLowerCaseAttrValue( doc, node, attval );
1748
1749    if (!AttrValueIsAmong(attval, values))
1750        TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1751}
1752
1753void CheckShape( TidyDocImpl* doc, Node *node, AttVal *attval)
1754{
1755    ctmbstr const values[] = {"rect", "default", "circle", "poly", NULL};
1756    CheckAttrValidity( doc, node, attval, values );
1757}
1758
1759void CheckScope( TidyDocImpl* doc, Node *node, AttVal *attval)
1760{
1761    ctmbstr const values[] = {"row", "rowgroup", "col", "colgroup", NULL};
1762    CheckAttrValidity( doc, node, attval, values );
1763}
1764
1765void CheckNumber( TidyDocImpl* doc, Node *node, AttVal *attval)
1766{
1767    tmbstr p;
1768
1769    if (!AttrHasValue(attval))
1770    {
1771        TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1772        return;
1773    }
1774
1775    /* don't check <frameset cols=... rows=...> */
1776    if ( nodeIsFRAMESET(node) &&
1777        (attrIsCOLS(attval) || attrIsROWS(attval)))
1778     return;
1779
1780    p  = attval->value;
1781
1782    /* font size may be preceded by + or - */
1783    if ( nodeIsFONT(node) && (*p == '+' || *p == '-') )
1784        ++p;
1785
1786    while (*p)
1787    {
1788        if (!TY_(IsDigit)(*p))
1789        {
1790            TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1791            break;
1792        }
1793        ++p;
1794    }
1795}
1796
1797/* check hexadecimal color value */
1798static Bool IsValidColorCode(ctmbstr color)
1799{
1800    uint i;
1801
1802    if (TY_(tmbstrlen)(color) != 6)
1803        return no;
1804
1805    /* check if valid hex digits and letters */
1806    for (i = 0; i < 6; i++)
1807        if (!TY_(IsDigit)(color[i]) && !strchr("abcdef", TY_(ToLower)(color[i])))
1808            return no;
1809
1810    return yes;
1811}
1812
1813/* check color syntax and beautify value by option */
1814void CheckColor( TidyDocImpl* doc, Node *node, AttVal *attval)
1815{
1816    Bool valid = no;
1817    tmbstr given;
1818
1819    if (!AttrHasValue(attval))
1820    {
1821        TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1822        return;
1823    }
1824
1825    given = attval->value;
1826
1827    /* 727851 - add hash to hash-less color values */
1828    if (given[0] != '#' && (valid = IsValidColorCode(given)))
1829    {
1830        tmbstr cp, s;
1831
1832        cp = s = (tmbstr) MemAlloc(2 + TY_(tmbstrlen)(given));
1833        *cp++ = '#';
1834        while ('\0' != (*cp++ = *given++))
1835            continue;
1836
1837        TY_(ReportAttrError)(doc, node, attval, BAD_ATTRIBUTE_VALUE_REPLACED);
1838
1839        MemFree(attval->value);
1840        given = attval->value = s;
1841    }
1842
1843    if (!valid && given[0] == '#')
1844        valid = IsValidColorCode(given + 1);
1845
1846    if (valid && given[0] == '#' && cfgBool(doc, TidyReplaceColor))
1847    {
1848        ctmbstr newName = GetColorName(given);
1849
1850        if (newName)
1851        {
1852            MemFree(attval->value);
1853            given = attval->value = TY_(tmbstrdup)(newName);
1854        }
1855    }
1856
1857    /* if it is not a valid color code, it is a color name */
1858    if (!valid)
1859        valid = GetColorCode(given) != NULL;
1860
1861    if (valid && given[0] == '#')
1862        attval->value = TY_(tmbstrtoupper)(attval->value);
1863    else if (valid)
1864        attval->value = TY_(tmbstrtolower)(attval->value);
1865
1866    if (!valid)
1867        TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1868}
1869
1870/* check valuetype attribute for element param */
1871void CheckVType( TidyDocImpl* doc, Node *node, AttVal *attval)
1872{
1873    ctmbstr const values[] = {"data", "object", "ref", NULL};
1874    CheckAttrValidity( doc, node, attval, values );
1875}
1876
1877/* checks scrolling attribute */
1878void CheckScroll( TidyDocImpl* doc, Node *node, AttVal *attval)
1879{
1880    ctmbstr const values[] = {"no", "auto", "yes", NULL};
1881    CheckAttrValidity( doc, node, attval, values );
1882}
1883
1884/* checks dir attribute */
1885void CheckTextDir( TidyDocImpl* doc, Node *node, AttVal *attval)
1886{
1887    ctmbstr const values[] = {"rtl", "ltr", NULL};
1888    CheckAttrValidity( doc, node, attval, values );
1889}
1890
1891/* checks lang and xml:lang attributes */
1892void CheckLang( TidyDocImpl* doc, Node *node, AttVal *attval)
1893{
1894    /* empty xml:lang is allowed through XML 1.0 SE errata */
1895    if (!AttrHasValue(attval) && !attrIsXML_LANG(attval))
1896    {
1897        if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
1898        {
1899            TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE );
1900        }
1901        return;
1902    }
1903}
1904
1905/* checks type attribute */
1906void CheckType( TidyDocImpl* doc, Node *node, AttVal *attval)
1907{
1908    ctmbstr const valuesINPUT[] = {"text", "password", "checkbox", "radio",
1909                                   "submit", "reset", "file", "hidden",
1910                                   "image", "button", NULL};
1911    ctmbstr const valuesBUTTON[] = {"button", "submit", "reset", NULL};
1912    ctmbstr const valuesUL[] = {"disc", "square", "circle", NULL};
1913    ctmbstr const valuesOL[] = {"1", "a", "i", NULL};
1914
1915    if (nodeIsINPUT(node))
1916        CheckAttrValidity( doc, node, attval, valuesINPUT );
1917    else if (nodeIsBUTTON(node))
1918        CheckAttrValidity( doc, node, attval, valuesBUTTON );
1919    else if (nodeIsUL(node))
1920        CheckAttrValidity( doc, node, attval, valuesUL );
1921    else if (nodeIsOL(node))
1922    {
1923        if (!AttrHasValue(attval))
1924        {
1925            TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1926            return;
1927        }
1928        if (!AttrValueIsAmong(attval, valuesOL))
1929            TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1930    }
1931    else if (nodeIsLI(node))
1932    {
1933        if (!AttrHasValue(attval))
1934        {
1935            TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1936            return;
1937        }
1938        if (AttrValueIsAmong(attval, valuesUL))
1939            CheckLowerCaseAttrValue( doc, node, attval );
1940        else if (!AttrValueIsAmong(attval, valuesOL))
1941            TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1942    }
1943    return;
1944}
1945
1946/*
1947 * local variables:
1948 * mode: c
1949 * indent-tabs-mode: nil
1950 * c-basic-offset: 4
1951 * eval: (c-set-offset 'substatement-open 0)
1952 * end:
1953 */
1954