1/* attrs.c -- recognize HTML attributes 2 3 (c) 1998-2006 (W3C) MIT, ERCIM, Keio University 4 See tidy.h for the copyright notice. 5 6 CVS Info : 7 8 $Author: mrowe $ 9 $Date: 2009/01/29 05:45:41 $ 10 $Revision: 1.14 $ 11 12*/ 13 14#include "tidy-int.h" 15#include "attrs.h" 16#include "message.h" 17#include "tmbstr.h" 18#include "utf8.h" 19 20/* 21 Bind attribute types to procedures to check values. 22 You can add new procedures for better validation 23 and each procedure has access to the node in which 24 the attribute occurred as well as the attribute name 25 and its value. 26 27 By default, attributes are checked without regard 28 to the element they are found on. You have the choice 29 of making the procedure test which element is involved 30 or in writing methods for each element which controls 31 exactly how the attributes of that element are checked. 32 This latter approach is best for detecting the absence 33 of required attributes. 34*/ 35 36static AttrCheck CheckAction; 37static AttrCheck CheckScript; 38static AttrCheck CheckName; 39#ifdef TIDY_APPLE_CHANGES 40static AttrCheck CheckClass; 41static AttrCheck CheckStyleAttr; 42#endif 43static AttrCheck CheckId; 44static AttrCheck CheckAlign; 45static AttrCheck CheckValign; 46static AttrCheck CheckBool; 47static AttrCheck CheckLength; 48static AttrCheck CheckTarget; 49static AttrCheck CheckFsubmit; 50static AttrCheck CheckClear; 51static AttrCheck CheckShape; 52static AttrCheck CheckNumber; 53static AttrCheck CheckScope; 54static AttrCheck CheckColor; 55static AttrCheck CheckVType; 56static AttrCheck CheckScroll; 57static AttrCheck CheckTextDir; 58static AttrCheck CheckLang; 59static AttrCheck CheckType; 60 61#define CH_PCDATA NULL 62#define CH_CHARSET NULL 63#define CH_TYPE CheckType 64#define CH_XTYPE NULL 65#define CH_CHARACTER NULL 66#define CH_URLS NULL 67#define CH_URL TY_(CheckUrl) 68#define CH_SCRIPT CheckScript 69#define CH_ALIGN CheckAlign 70#define CH_VALIGN CheckValign 71#define CH_COLOR CheckColor 72#define CH_CLEAR CheckClear 73#define CH_BORDER CheckBool /* kludge */ 74#define CH_LANG CheckLang 75#define CH_BOOL CheckBool 76#define CH_COLS NULL 77#define CH_NUMBER CheckNumber 78#define CH_LENGTH CheckLength 79#define CH_COORDS NULL 80#define CH_DATE NULL 81#define CH_TEXTDIR CheckTextDir 82#define CH_IDREFS NULL 83#define CH_IDREF NULL 84#define CH_IDDEF CheckId 85#define CH_NAME CheckName 86#define CH_TFRAME NULL 87#define CH_FBORDER NULL 88#define CH_MEDIA NULL 89#define CH_FSUBMIT CheckFsubmit 90#define CH_LINKTYPES NULL 91#define CH_TRULES NULL 92#define CH_SCOPE CheckScope 93#define CH_SHAPE CheckShape 94#define CH_SCROLL CheckScroll 95#define CH_TARGET CheckTarget 96#define CH_VTYPE CheckVType 97#define CH_ACTION CheckAction 98 99static const Attribute attribute_defs [] = 100{ 101 { TidyAttr_UNKNOWN, "unknown!", VERS_PROPRIETARY, NULL }, 102 { TidyAttr_ABBR, "abbr", VERS_HTML40, CH_PCDATA }, 103 { TidyAttr_ACCEPT, "accept", VERS_ALL, CH_XTYPE }, 104 { TidyAttr_ACCEPT_CHARSET, "accept-charset", VERS_HTML40, CH_CHARSET }, 105 { TidyAttr_ACCESSKEY, "accesskey", VERS_HTML40, CH_CHARACTER }, 106 { TidyAttr_ACTION, "action", VERS_ALL, CH_ACTION }, 107 { TidyAttr_ADD_DATE, "add_date", VERS_NETSCAPE, CH_PCDATA }, /* A */ 108 { TidyAttr_ALIGN, "align", VERS_ALL, CH_ALIGN }, /* varies by element */ 109 { TidyAttr_ALINK, "alink", VERS_LOOSE, CH_COLOR }, 110 { TidyAttr_ALT, "alt", VERS_ALL, CH_PCDATA }, /* nowrap */ 111 { TidyAttr_ARCHIVE, "archive", VERS_HTML40, CH_URLS }, /* space or comma separated list */ 112 { TidyAttr_AXIS, "axis", VERS_HTML40, CH_PCDATA }, 113 { TidyAttr_BACKGROUND, "background", VERS_LOOSE, CH_URL }, 114 { TidyAttr_BGCOLOR, "bgcolor", VERS_LOOSE, CH_COLOR }, 115 { TidyAttr_BGPROPERTIES, "bgproperties", VERS_PROPRIETARY, CH_PCDATA }, /* BODY "fixed" fixes background */ 116 { TidyAttr_BORDER, "border", VERS_ALL, CH_BORDER }, /* like LENGTH + "border" */ 117 { TidyAttr_BORDERCOLOR, "bordercolor", VERS_MICROSOFT, CH_COLOR }, /* used on TABLE */ 118 { TidyAttr_BOTTOMMARGIN, "bottommargin", VERS_MICROSOFT, CH_NUMBER }, /* used on BODY */ 119 { TidyAttr_CELLPADDING, "cellpadding", VERS_FROM32, CH_LENGTH }, /* % or pixel values */ 120 { TidyAttr_CELLSPACING, "cellspacing", VERS_FROM32, CH_LENGTH }, 121 { TidyAttr_CHAR, "char", VERS_HTML40, CH_CHARACTER }, 122 { TidyAttr_CHAROFF, "charoff", VERS_HTML40, CH_LENGTH }, 123 { TidyAttr_CHARSET, "charset", VERS_HTML40, CH_CHARSET }, 124 { TidyAttr_CHECKED, "checked", VERS_ALL, CH_BOOL }, /* i.e. "checked" or absent */ 125 { TidyAttr_CITE, "cite", VERS_HTML40, CH_URL }, 126#ifdef TIDY_APPLE_CHANGES 127 { TidyAttr_CLASS, "class", VERS_HTML40, CheckClass }, 128#else 129 { TidyAttr_CLASS, "class", VERS_HTML40, CH_PCDATA }, 130#endif 131 { TidyAttr_CLASSID, "classid", VERS_HTML40, CH_URL }, 132 { TidyAttr_CLEAR, "clear", VERS_LOOSE, CH_CLEAR }, /* BR: left, right, all */ 133 { TidyAttr_CODE, "code", VERS_LOOSE, CH_PCDATA }, /* APPLET */ 134 { TidyAttr_CODEBASE, "codebase", VERS_HTML40, CH_URL }, /* OBJECT */ 135 { TidyAttr_CODETYPE, "codetype", VERS_HTML40, CH_XTYPE }, /* OBJECT */ 136 { TidyAttr_COLOR, "color", VERS_LOOSE, CH_COLOR }, /* BASEFONT, FONT */ 137 { TidyAttr_COLS, "cols", VERS_IFRAME, CH_COLS }, /* TABLE & FRAMESET */ 138 { TidyAttr_COLSPAN, "colspan", VERS_FROM32, CH_NUMBER }, 139 { TidyAttr_COMPACT, "compact", VERS_ALL, CH_BOOL }, /* lists */ 140 { TidyAttr_CONTENT, "content", VERS_ALL, CH_PCDATA }, 141 { TidyAttr_COORDS, "coords", VERS_FROM32, CH_COORDS }, /* AREA, A */ 142 { TidyAttr_DATA, "data", VERS_HTML40, CH_URL }, /* OBJECT */ 143 { TidyAttr_DATAFLD, "datafld", VERS_MICROSOFT, CH_PCDATA }, /* used on DIV, IMG */ 144 { TidyAttr_DATAFORMATAS, "dataformatas", VERS_MICROSOFT, CH_PCDATA }, /* used on DIV, IMG */ 145 { TidyAttr_DATAPAGESIZE, "datapagesize", VERS_MICROSOFT, CH_NUMBER }, /* used on DIV, IMG */ 146 { TidyAttr_DATASRC, "datasrc", VERS_MICROSOFT, CH_URL }, /* used on TABLE */ 147 { TidyAttr_DATETIME, "datetime", VERS_HTML40, CH_DATE }, /* INS, DEL */ 148 { TidyAttr_DECLARE, "declare", VERS_HTML40, CH_BOOL }, /* OBJECT */ 149 { TidyAttr_DEFER, "defer", VERS_HTML40, CH_BOOL }, /* SCRIPT */ 150 { TidyAttr_DIR, "dir", VERS_HTML40, CH_TEXTDIR }, /* ltr or rtl */ 151 { TidyAttr_DISABLED, "disabled", VERS_HTML40, CH_BOOL }, /* form fields */ 152 { TidyAttr_ENCODING, "encoding", VERS_XML, CH_PCDATA }, /* <?xml?> */ 153 { TidyAttr_ENCTYPE, "enctype", VERS_ALL, CH_XTYPE }, /* FORM */ 154 { TidyAttr_FACE, "face", VERS_LOOSE, CH_PCDATA }, /* BASEFONT, FONT */ 155 { TidyAttr_FOR, "for", VERS_HTML40, CH_IDREF }, /* LABEL */ 156 { TidyAttr_FRAME, "frame", VERS_HTML40, CH_TFRAME }, /* TABLE */ 157 { TidyAttr_FRAMEBORDER, "frameborder", VERS_FRAMESET, CH_FBORDER }, /* 0 or 1 */ 158 { TidyAttr_FRAMESPACING, "framespacing", VERS_PROPRIETARY, CH_NUMBER }, 159 { TidyAttr_GRIDX, "gridx", VERS_PROPRIETARY, CH_NUMBER }, /* TABLE Adobe golive*/ 160 { TidyAttr_GRIDY, "gridy", VERS_PROPRIETARY, CH_NUMBER }, /* TABLE Adobe golive */ 161 { TidyAttr_HEADERS, "headers", VERS_HTML40, CH_IDREFS }, /* table cells */ 162 { TidyAttr_HEIGHT, "height", VERS_ALL, CH_LENGTH }, /* pixels only for TH/TD */ 163 { TidyAttr_HREF, "href", VERS_ALL, CH_URL }, /* A, AREA, LINK and BASE */ 164 { TidyAttr_HREFLANG, "hreflang", VERS_HTML40, CH_LANG }, /* A, LINK */ 165 { TidyAttr_HSPACE, "hspace", VERS_ALL, CH_NUMBER }, /* APPLET, IMG, OBJECT */ 166 { TidyAttr_HTTP_EQUIV, "http-equiv", VERS_ALL, CH_PCDATA }, /* META */ 167 { TidyAttr_ID, "id", VERS_HTML40, CH_IDDEF }, 168 { TidyAttr_ISMAP, "ismap", VERS_ALL, CH_BOOL }, /* IMG */ 169 { TidyAttr_LABEL, "label", VERS_HTML40, CH_PCDATA }, /* OPT, OPTGROUP */ 170 { TidyAttr_LANG, "lang", VERS_HTML40, CH_LANG }, 171 { TidyAttr_LANGUAGE, "language", VERS_LOOSE, CH_PCDATA }, /* SCRIPT */ 172 { TidyAttr_LAST_MODIFIED, "last_modified", VERS_NETSCAPE, CH_PCDATA }, /* A */ 173 { TidyAttr_LAST_VISIT, "last_visit", VERS_NETSCAPE, CH_PCDATA }, /* A */ 174 { TidyAttr_LEFTMARGIN, "leftmargin", VERS_MICROSOFT, CH_NUMBER }, /* used on BODY */ 175 { TidyAttr_LINK, "link", VERS_LOOSE, CH_COLOR }, /* BODY */ 176 { TidyAttr_LONGDESC, "longdesc", VERS_HTML40, CH_URL }, /* IMG */ 177 { TidyAttr_LOWSRC, "lowsrc", VERS_PROPRIETARY, CH_URL }, /* IMG */ 178 { TidyAttr_MARGINHEIGHT, "marginheight", VERS_IFRAME, CH_NUMBER }, /* FRAME, IFRAME, BODY */ 179 { TidyAttr_MARGINWIDTH, "marginwidth", VERS_IFRAME, CH_NUMBER }, /* ditto */ 180 { TidyAttr_MAXLENGTH, "maxlength", VERS_ALL, CH_NUMBER }, /* INPUT */ 181 { TidyAttr_MEDIA, "media", VERS_HTML40, CH_MEDIA }, /* STYLE, LINK */ 182 { TidyAttr_METHOD, "method", VERS_ALL, CH_FSUBMIT }, /* FORM: get or post */ 183 { TidyAttr_MULTIPLE, "multiple", VERS_ALL, CH_BOOL }, /* SELECT */ 184 { TidyAttr_NAME, "name", VERS_ALL, CH_NAME }, 185 { TidyAttr_NOHREF, "nohref", VERS_FROM32, CH_BOOL }, /* AREA */ 186 { TidyAttr_NORESIZE, "noresize", VERS_FRAMESET, CH_BOOL }, /* FRAME */ 187 { TidyAttr_NOSHADE, "noshade", VERS_LOOSE, CH_BOOL }, /* HR */ 188 { TidyAttr_NOWRAP, "nowrap", VERS_LOOSE, CH_BOOL }, /* table cells */ 189 { TidyAttr_OBJECT, "object", VERS_HTML40_LOOSE, CH_PCDATA }, /* APPLET */ 190 { TidyAttr_OnAFTERUPDATE, "onafterupdate", VERS_MICROSOFT, CH_SCRIPT }, 191 { TidyAttr_OnBEFOREUNLOAD, "onbeforeunload", VERS_MICROSOFT, CH_SCRIPT }, 192 { TidyAttr_OnBEFOREUPDATE, "onbeforeupdate", VERS_MICROSOFT, CH_SCRIPT }, 193 { TidyAttr_OnBLUR, "onblur", VERS_EVENTS, CH_SCRIPT }, /* event */ 194 { TidyAttr_OnCHANGE, "onchange", VERS_EVENTS, CH_SCRIPT }, /* event */ 195 { TidyAttr_OnCLICK, "onclick", VERS_EVENTS, CH_SCRIPT }, /* event */ 196 { TidyAttr_OnDATAAVAILABLE, "ondataavailable", VERS_MICROSOFT, CH_SCRIPT }, /* object, applet */ 197 { TidyAttr_OnDATASETCHANGED, "ondatasetchanged", VERS_MICROSOFT, CH_SCRIPT }, /* object, applet */ 198 { TidyAttr_OnDATASETCOMPLETE, "ondatasetcomplete", VERS_MICROSOFT, CH_SCRIPT }, 199 { TidyAttr_OnDBLCLICK, "ondblclick", VERS_EVENTS, CH_SCRIPT }, /* event */ 200 { TidyAttr_OnERRORUPDATE, "onerrorupdate", VERS_MICROSOFT, CH_SCRIPT }, /* form fields */ 201 { TidyAttr_OnFOCUS, "onfocus", VERS_EVENTS, CH_SCRIPT }, /* event */ 202 { TidyAttr_OnKEYDOWN, "onkeydown", VERS_EVENTS, CH_SCRIPT }, /* event */ 203 { TidyAttr_OnKEYPRESS, "onkeypress", VERS_EVENTS, CH_SCRIPT }, /* event */ 204 { TidyAttr_OnKEYUP, "onkeyup", VERS_EVENTS, CH_SCRIPT }, /* event */ 205 { TidyAttr_OnLOAD, "onload", VERS_EVENTS, CH_SCRIPT }, /* event */ 206 { TidyAttr_OnMOUSEDOWN, "onmousedown", VERS_EVENTS, CH_SCRIPT }, /* event */ 207 { TidyAttr_OnMOUSEMOVE, "onmousemove", VERS_EVENTS, CH_SCRIPT }, /* event */ 208 { TidyAttr_OnMOUSEOUT, "onmouseout", VERS_EVENTS, CH_SCRIPT }, /* event */ 209 { TidyAttr_OnMOUSEOVER, "onmouseover", VERS_EVENTS, CH_SCRIPT }, /* event */ 210 { TidyAttr_OnMOUSEUP, "onmouseup", VERS_EVENTS, CH_SCRIPT }, /* event */ 211 { TidyAttr_OnRESET, "onreset", VERS_EVENTS, CH_SCRIPT }, /* event */ 212 { TidyAttr_OnROWENTER, "onrowenter", VERS_MICROSOFT, CH_SCRIPT }, /* form fields */ 213 { TidyAttr_OnROWEXIT, "onrowexit", VERS_MICROSOFT, CH_SCRIPT }, /* form fields */ 214 { TidyAttr_OnSELECT, "onselect", VERS_EVENTS, CH_SCRIPT }, /* event */ 215 { TidyAttr_OnSUBMIT, "onsubmit", VERS_EVENTS, CH_SCRIPT }, /* event */ 216 { TidyAttr_OnUNLOAD, "onunload", VERS_EVENTS, CH_SCRIPT }, /* event */ 217 { TidyAttr_PROFILE, "profile", VERS_HTML40, CH_URL }, /* HEAD */ 218 { TidyAttr_PROMPT, "prompt", VERS_LOOSE, CH_PCDATA }, /* ISINDEX */ 219 { TidyAttr_RBSPAN, "rbspan", VERS_XHTML11, CH_NUMBER }, /* ruby markup */ 220 { TidyAttr_READONLY, "readonly", VERS_HTML40, CH_BOOL }, /* form fields */ 221 { TidyAttr_REL, "rel", VERS_ALL, CH_LINKTYPES }, 222 { TidyAttr_REV, "rev", VERS_ALL, CH_LINKTYPES }, 223 { TidyAttr_RIGHTMARGIN, "rightmargin", VERS_MICROSOFT, CH_NUMBER }, /* used on BODY */ 224 { TidyAttr_ROWS, "rows", VERS_ALL, CH_NUMBER }, /* TEXTAREA */ 225 { TidyAttr_ROWSPAN, "rowspan", VERS_ALL, CH_NUMBER }, /* table cells */ 226 { TidyAttr_RULES, "rules", VERS_HTML40, CH_TRULES }, /* TABLE */ 227 { TidyAttr_SCHEME, "scheme", VERS_HTML40, CH_PCDATA }, /* META */ 228 { TidyAttr_SCOPE, "scope", VERS_HTML40, CH_SCOPE }, /* table cells */ 229 { TidyAttr_SCROLLING, "scrolling", VERS_IFRAME, CH_SCROLL }, /* yes, no or auto */ 230 { TidyAttr_SELECTED, "selected", VERS_ALL, CH_BOOL }, /* OPTION */ 231 { TidyAttr_SHAPE, "shape", VERS_FROM32, CH_SHAPE }, /* AREA, A */ 232 { TidyAttr_SHOWGRID, "showgrid", VERS_PROPRIETARY, CH_BOOL }, /* TABLE Adobe golive */ 233 { TidyAttr_SHOWGRIDX, "showgridx", VERS_PROPRIETARY, CH_BOOL }, /* TABLE Adobe golive*/ 234 { TidyAttr_SHOWGRIDY, "showgridy", VERS_PROPRIETARY, CH_BOOL }, /* TABLE Adobe golive*/ 235 { TidyAttr_SIZE, "size", VERS_LOOSE, CH_NUMBER }, /* HR, FONT, BASEFONT, SELECT */ 236 { TidyAttr_SPAN, "span", VERS_HTML40, CH_NUMBER }, /* COL, COLGROUP */ 237 { TidyAttr_SRC, "src", VERS_ALL, CH_URL }, /* IMG, FRAME, IFRAME */ 238 { TidyAttr_STANDBY, "standby", VERS_HTML40, CH_PCDATA }, /* OBJECT */ 239 { TidyAttr_START, "start", VERS_ALL, CH_NUMBER }, /* OL */ 240#ifdef TIDY_APPLE_CHANGES 241 { TidyAttr_STYLE, "style", VERS_HTML40, CheckStyleAttr }, 242#else 243 { TidyAttr_STYLE, "style", VERS_HTML40, CH_PCDATA }, 244#endif 245 { TidyAttr_SUMMARY, "summary", VERS_HTML40, CH_PCDATA }, /* TABLE */ 246 { TidyAttr_TABINDEX, "tabindex", VERS_HTML40, CH_NUMBER }, /* fields, OBJECT and A */ 247 { TidyAttr_TARGET, "target", VERS_HTML40, CH_TARGET }, /* names a frame/window */ 248 { TidyAttr_TEXT, "text", VERS_LOOSE, CH_COLOR }, /* BODY */ 249 { TidyAttr_TITLE, "title", VERS_HTML40, CH_PCDATA }, /* text tool tip */ 250 { TidyAttr_TOPMARGIN, "topmargin", VERS_MICROSOFT, CH_NUMBER }, /* used on BODY */ 251 { TidyAttr_TYPE, "type", VERS_FROM32, CH_TYPE }, /* also used by SPACER */ 252 { TidyAttr_USEMAP, "usemap", VERS_ALL, CH_URL }, /* things with images */ 253 { TidyAttr_VALIGN, "valign", VERS_FROM32, CH_VALIGN }, 254 { TidyAttr_VALUE, "value", VERS_ALL, CH_PCDATA }, 255 { TidyAttr_VALUETYPE, "valuetype", VERS_HTML40, CH_VTYPE }, /* PARAM: data, ref, object */ 256 { TidyAttr_VERSION, "version", VERS_ALL|VERS_XML, CH_PCDATA }, /* HTML <?xml?> */ 257 { TidyAttr_VLINK, "vlink", VERS_LOOSE, CH_COLOR }, /* BODY */ 258 { TidyAttr_VSPACE, "vspace", VERS_LOOSE, CH_NUMBER }, /* IMG, OBJECT, APPLET */ 259 { TidyAttr_WIDTH, "width", VERS_ALL, CH_LENGTH }, /* pixels only for TD/TH */ 260 { TidyAttr_WRAP, "wrap", VERS_NETSCAPE, CH_PCDATA }, /* textarea */ 261 { TidyAttr_XML_LANG, "xml:lang", VERS_XML, CH_LANG }, /* XML language */ 262 { TidyAttr_XML_SPACE, "xml:space", VERS_XML, CH_PCDATA }, /* XML white space */ 263 264 /* todo: VERS_ALL is wrong! */ 265 { TidyAttr_XMLNS, "xmlns", VERS_ALL, CH_PCDATA }, /* name space */ 266 { TidyAttr_EVENT, "event", VERS_HTML40, CH_PCDATA }, /* reserved for <script> */ 267 { TidyAttr_METHODS, "methods", VERS_HTML20, CH_PCDATA }, /* for <a>, never implemented */ 268 { TidyAttr_N, "n", VERS_HTML20, CH_PCDATA }, /* for <nextid> */ 269 { TidyAttr_SDAFORM, "sdaform", VERS_HTML20, CH_PCDATA }, /* SDATA attribute in HTML 2.0 */ 270 { TidyAttr_SDAPREF, "sdapref", VERS_HTML20, CH_PCDATA }, /* SDATA attribute in HTML 2.0 */ 271 { TidyAttr_SDASUFF, "sdasuff", VERS_HTML20, CH_PCDATA }, /* SDATA attribute in HTML 2.0 */ 272 { TidyAttr_URN, "urn", VERS_HTML20, CH_PCDATA }, /* for <a>, never implemented */ 273 274 /* this must be the final entry */ 275 { N_TIDY_ATTRIBS, NULL, VERS_UNKNOWN, NULL } 276}; 277 278 279/* Apple Changes: 280 2007-03-01 iccir Due to the control flow in TY_(CheckAttribute), we cannot 281 use RemoveAttribute() inside of a Check___ function -- 282 the resulting call to AttributeIsProprietary() will hit 283 dealloced data. Unfortuately, a lot of the Apple-specific 284 changes need this ability. 285 286 The best way to fix this problem would be to have the 287 Check___ functions return a Bool instead of a void. If 288 no is returned, TY_(CheckAttribute) could then call 289 RemoveAttribute() and bail out. 290 291 I don't want to sprinkle even more TIDY_APPLE_CHANGES into 292 this file, however. 293 294 For now, call MarkAttributeForRemoval() instead. This 295 sets the AttVal's (Attribute *)dict to a fake MarkedForRemoval. 296 297 We then check for this value upon returning to TY_(CheckAttribute). 298*/ 299#ifdef TIDY_APPLE_CHANGES 300static const Attribute MarkedForRemoval = { TidyTag_UNKNOWN, "", VERS_PROPRIETARY, NULL }; 301 302static void MarkAttributeForRemoval(AttVal* attval) 303{ 304 attval->dict = &MarkedForRemoval; 305} 306 307static Bool AttributeIsMarkedForRemoval(AttVal* attval) 308{ 309 return (attval->dict == &MarkedForRemoval); 310} 311#endif 312 313static uint AttributeVersions(Node* node, AttVal* attval) 314{ 315 uint i; 316 317 if (!attval || !attval->dict) 318 return VERS_UNKNOWN; 319 320 if (!node || !node->tag || !node->tag->attrvers) 321 return attval->dict->versions; 322 323 for (i = 0; node->tag->attrvers[i].attribute; ++i) 324 if (node->tag->attrvers[i].attribute == attval->dict->id) 325 return node->tag->attrvers[i].versions; 326 327 return attval->dict->versions & VERS_ALL 328 ? VERS_UNKNOWN 329 : attval->dict->versions; 330 331} 332 333 334/* return the version of the attribute "id" of element "node" */ 335uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id ) 336{ 337 uint i; 338 339 if (!node || !node->tag || !node->tag->attrvers) 340 return VERS_UNKNOWN; 341 342 for (i = 0; node->tag->attrvers[i].attribute; ++i) 343 if (node->tag->attrvers[i].attribute == id) 344 return node->tag->attrvers[i].versions; 345 346 return VERS_UNKNOWN; 347} 348 349/* returns true if the element is a W3C defined element */ 350/* but the element/attribute combination is not */ 351static Bool AttributeIsProprietary(Node* node, AttVal* attval) 352{ 353 if (!node || !attval) 354 return no; 355 356 if (!node->tag) 357 return no; 358 359 if (!(node->tag->versions & VERS_ALL)) 360 return no; 361 362 if (AttributeVersions(node, attval) & VERS_ALL) 363 return no; 364 365 return yes; 366} 367 368/* used by CheckColor() */ 369struct _colors 370{ 371 ctmbstr name; 372 ctmbstr hex; 373}; 374 375static const struct _colors colors[] = 376{ 377 { "black", "#000000" }, 378 { "green", "#008000" }, 379 { "silver", "#C0C0C0" }, 380 { "lime", "#00FF00" }, 381 { "gray", "#808080" }, 382 { "olive", "#808000" }, 383 { "white", "#FFFFFF" }, 384 { "yellow", "#FFFF00" }, 385 { "maroon", "#800000" }, 386 { "navy", "#000080" }, 387 { "red", "#FF0000" }, 388 { "blue", "#0000FF" }, 389 { "purple", "#800080" }, 390 { "teal", "#008080" }, 391 { "fuchsia", "#FF00FF" }, 392 { "aqua", "#00FFFF" }, 393 { NULL, NULL } 394}; 395 396static ctmbstr GetColorCode(ctmbstr name) 397{ 398 uint i; 399 400 for (i = 0; colors[i].name; ++i) 401 if (TY_(tmbstrcasecmp)(name, colors[i].name) == 0) 402 return colors[i].hex; 403 404 return NULL; 405} 406 407static ctmbstr GetColorName(ctmbstr code) 408{ 409 uint i; 410 411 for (i = 0; colors[i].name; ++i) 412 if (TY_(tmbstrcasecmp)(code, colors[i].hex) == 0) 413 return colors[i].name; 414 415 return NULL; 416} 417 418#if 0 419static const struct _colors fancy_colors[] = 420{ 421 { "darkgreen", "#006400" }, 422 { "antiquewhite", "#FAEBD7" }, 423 { "aqua", "#00FFFF" }, 424 { "aquamarine", "#7FFFD4" }, 425 { "azure", "#F0FFFF" }, 426 { "beige", "#F5F5DC" }, 427 { "bisque", "#FFE4C4" }, 428 { "black", "#000000" }, 429 { "blanchedalmond", "#FFEBCD" }, 430 { "blue", "#0000FF" }, 431 { "blueviolet", "#8A2BE2" }, 432 { "brown", "#A52A2A" }, 433 { "burlywood", "#DEB887" }, 434 { "cadetblue", "#5F9EA0" }, 435 { "chartreuse", "#7FFF00" }, 436 { "chocolate", "#D2691E" }, 437 { "coral", "#FF7F50" }, 438 { "cornflowerblue", "#6495ED" }, 439 { "cornsilk", "#FFF8DC" }, 440 { "crimson", "#DC143C" }, 441 { "cyan", "#00FFFF" }, 442 { "darkblue", "#00008B" }, 443 { "darkcyan", "#008B8B" }, 444 { "darkgoldenrod", "#B8860B" }, 445 { "darkgray", "#A9A9A9" }, 446 { "darkgreen", "#006400" }, 447 { "darkkhaki", "#BDB76B" }, 448 { "darkmagenta", "#8B008B" }, 449 { "darkolivegreen", "#556B2F" }, 450 { "darkorange", "#FF8C00" }, 451 { "darkorchid", "#9932CC" }, 452 { "darkred", "#8B0000" }, 453 { "darksalmon", "#E9967A" }, 454 { "darkseagreen", "#8FBC8F" }, 455 { "darkslateblue", "#483D8B" }, 456 { "darkslategray", "#2F4F4F" }, 457 { "darkturquoise", "#00CED1" }, 458 { "darkviolet", "#9400D3" }, 459 { "deeppink", "#FF1493" }, 460 { "deepskyblue", "#00BFFF" }, 461 { "dimgray", "#696969" }, 462 { "dodgerblue", "#1E90FF" }, 463 { "firebrick", "#B22222" }, 464 { "floralwhite", "#FFFAF0" }, 465 { "forestgreen", "#228B22" }, 466 { "fuchsia", "#FF00FF" }, 467 { "gainsboro", "#DCDCDC" }, 468 { "ghostwhite", "#F8F8FF" }, 469 { "gold", "#FFD700" }, 470 { "goldenrod", "#DAA520" }, 471 { "gray", "#808080" }, 472 { "green", "#008000" }, 473 { "greenyellow", "#ADFF2F" }, 474 { "honeydew", "#F0FFF0" }, 475 { "hotpink", "#FF69B4" }, 476 { "indianred", "#CD5C5C" }, 477 { "indigo", "#4B0082" }, 478 { "ivory", "#FFFFF0" }, 479 { "khaki", "#F0E68C" }, 480 { "lavender", "#E6E6FA" }, 481 { "lavenderblush", "#FFF0F5" }, 482 { "lawngreen", "#7CFC00" }, 483 { "lemonchiffon", "#FFFACD" }, 484 { "lightblue", "#ADD8E6" }, 485 { "lightcoral", "#F08080" }, 486 { "lightcyan", "#E0FFFF" }, 487 { "lightgoldenrodyellow", "#FAFAD2" }, 488 { "lightgreen", "#90EE90" }, 489 { "lightgrey", "#D3D3D3" }, 490 { "lightpink", "#FFB6C1" }, 491 { "lightsalmon", "#FFA07A" }, 492 { "lightseagreen", "#20B2AA" }, 493 { "lightskyblue", "#87CEFA" }, 494 { "lightslategray", "#778899" }, 495 { "lightsteelblue", "#B0C4DE" }, 496 { "lightyellow", "#FFFFE0" }, 497 { "lime", "#00FF00" }, 498 { "limegreen", "#32CD32" }, 499 { "linen", "#FAF0E6" }, 500 { "magenta", "#FF00FF" }, 501 { "maroon", "#800000" }, 502 { "mediumaquamarine", "#66CDAA" }, 503 { "mediumblue", "#0000CD" }, 504 { "mediumorchid", "#BA55D3" }, 505 { "mediumpurple", "#9370DB" }, 506 { "mediumseagreen", "#3CB371" }, 507 { "mediumslateblue", "#7B68EE" }, 508 { "mediumspringgreen", "#00FA9A" }, 509 { "mediumturquoise", "#48D1CC" }, 510 { "mediumvioletred", "#C71585" }, 511 { "midnightblue", "#191970" }, 512 { "mintcream", "#F5FFFA" }, 513 { "mistyrose", "#FFE4E1" }, 514 { "moccasin", "#FFE4B5" }, 515 { "navajowhite", "#FFDEAD" }, 516 { "navy", "#000080" }, 517 { "oldlace", "#FDF5E6" }, 518 { "olive", "#808000" }, 519 { "olivedrab", "#6B8E23" }, 520 { "orange", "#FFA500" }, 521 { "orangered", "#FF4500" }, 522 { "orchid", "#DA70D6" }, 523 { "palegoldenrod", "#EEE8AA" }, 524 { "palegreen", "#98FB98" }, 525 { "paleturquoise", "#AFEEEE" }, 526 { "palevioletred", "#DB7093" }, 527 { "papayawhip", "#FFEFD5" }, 528 { "peachpuff", "#FFDAB9" }, 529 { "peru", "#CD853F" }, 530 { "pink", "#FFC0CB" }, 531 { "plum", "#DDA0DD" }, 532 { "powderblue", "#B0E0E6" }, 533 { "purple", "#800080" }, 534 { "red", "#FF0000" }, 535 { "rosybrown", "#BC8F8F" }, 536 { "royalblue", "#4169E1" }, 537 { "saddlebrown", "#8B4513" }, 538 { "salmon", "#FA8072" }, 539 { "sandybrown", "#F4A460" }, 540 { "seagreen", "#2E8B57" }, 541 { "seashell", "#FFF5EE" }, 542 { "sienna", "#A0522D" }, 543 { "silver", "#C0C0C0" }, 544 { "skyblue", "#87CEEB" }, 545 { "slateblue", "#6A5ACD" }, 546 { "slategray", "#708090" }, 547 { "snow", "#FFFAFA" }, 548 { "springgreen", "#00FF7F" }, 549 { "steelblue", "#4682B4" }, 550 { "tan", "#D2B48C" }, 551 { "teal", "#008080" }, 552 { "thistle", "#D8BFD8" }, 553 { "tomato", "#FF6347" }, 554 { "turquoise", "#40E0D0" }, 555 { "violet", "#EE82EE" }, 556 { "wheat", "#F5DEB3" }, 557 { "white", "#FFFFFF" }, 558 { "whitesmoke", "#F5F5F5" }, 559 { "yellow", "#FFFF00" }, 560 { "yellowgreen", "#9ACD32" }, 561 { NULL, NULL } 562}; 563#endif 564 565#if ATTRIBUTE_HASH_LOOKUP 566static uint hash(ctmbstr s) 567{ 568 uint hashval; 569 570 for (hashval = 0; *s != '\0'; s++) 571 hashval = *s + 31*hashval; 572 573 return hashval % ATTRIBUTE_HASH_SIZE; 574} 575 576static const Attribute *install(TidyAttribImpl * attribs, const Attribute* old) 577{ 578 AttrHash *np; 579 uint hashval; 580 581 if (old) 582 { 583 np = (AttrHash *)MemAlloc(sizeof(*np)); 584 np->attr = old; 585 586 hashval = hash(old->name); 587 np->next = attribs->hashtab[hashval]; 588 attribs->hashtab[hashval] = np; 589 } 590 591 return old; 592} 593 594static void removeFromHash( TidyAttribImpl * attribs, ctmbstr s ) 595{ 596 uint h = hash(s); 597 AttrHash *p, *prev = NULL; 598 for (p = attribs->hashtab[h]; p && p->attr; p = p->next) 599 { 600 if (TY_(tmbstrcmp)(s, p->attr->name) == 0) 601 { 602 AttrHash* next = p->next; 603 if ( prev ) 604 prev->next = next; 605 else 606 attribs->hashtab[h] = next; 607 MemFree(p); 608 return; 609 } 610 prev = p; 611 } 612} 613 614static void emptyHash( TidyAttribImpl * attribs ) 615{ 616 AttrHash *dict, *next; 617 uint i; 618 619 for (i = 0; i < ATTRIBUTE_HASH_SIZE; ++i) 620 { 621 dict = attribs->hashtab[i]; 622 623 while(dict) 624 { 625 next = dict->next; 626 MemFree(dict); 627 dict = next; 628 } 629 630 attribs->hashtab[i] = NULL; 631 } 632} 633#endif 634 635static const Attribute* lookup(TidyDocImpl* ARG_UNUSED(doc), 636 TidyAttribImpl* ARG_UNUSED(attribs), 637 ctmbstr atnam) 638{ 639#ifdef TIDY_APPLE_CHANGES 640 static Attribute unknownEventHandler; 641#endif 642 643 const Attribute *np; 644#if ATTRIBUTE_HASH_LOOKUP 645 const AttrHash *p; 646#endif 647 648 if (!atnam) 649 return NULL; 650 651#if ATTRIBUTE_HASH_LOOKUP 652 for (p = attribs->hashtab[hash(atnam)]; p && p->attr; p = p->next) 653 if (TY_(tmbstrcmp)(atnam, p->attr->name) == 0) 654 return p->attr; 655 656 for (np = attribute_defs; np && np->name; ++np) 657 if (TY_(tmbstrcmp)(atnam, np->name) == 0) 658 return install(attribs, np); 659#else 660 for (np = attribute_defs; np && np->name; ++np) 661 if (TY_(tmbstrcmp)(atnam, np->name) == 0) 662 return np; 663#endif 664 665#ifdef TIDY_APPLE_CHANGES 666 if (!unknownEventHandler.name) { 667 unknownEventHandler.name = "onunknowneventhandler"; 668 unknownEventHandler.versions = VERS_ALL; 669 unknownEventHandler.attrchk = CH_SCRIPT; 670 } 671 672 /* When sanitizing against XSS problems we strip all onfoo-style event handlers to prevent potential 673 security problems caused by event handlers that we aren't explicitly aware of, such as was the case 674 with <rdar://problem/6507826>. */ 675 if (cfgBool(doc, TidySanitizeAgainstXSS)) { 676 if (TY_(tmbstrncasecmp)(atnam, "on", 2) == 0) 677 return &unknownEventHandler; 678 } 679#endif 680 681 return NULL; 682} 683 684 685/* Locate attributes by type */ 686AttVal* TY_(AttrGetById)( Node* node, TidyAttrId id ) 687{ 688 AttVal* av; 689 for ( av = node->attributes; av; av = av->next ) 690 { 691 if ( AttrIsId(av, id) ) 692 return av; 693 } 694 return NULL; 695} 696 697/* public method for finding attribute definition by name */ 698const Attribute* TY_(FindAttribute)( TidyDocImpl* doc, AttVal *attval ) 699{ 700 if ( attval ) 701 return lookup( doc, &doc->attribs, attval->attribute ); 702 return NULL; 703} 704 705AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name ) 706{ 707 AttVal *attr; 708 for (attr = node->attributes; attr != NULL; attr = attr->next) 709 { 710 if (attr->attribute && TY_(tmbstrcmp)(attr->attribute, name) == 0) 711 break; 712 } 713 return attr; 714} 715 716AttVal* TY_(AddAttribute)( TidyDocImpl* doc, 717 Node *node, ctmbstr name, ctmbstr value ) 718{ 719 AttVal *av = TY_(NewAttribute)(); 720 av->delim = '"'; 721 av->attribute = TY_(tmbstrdup)(name); 722 723 if (value) 724 av->value = TY_(tmbstrdup)(value); 725 else 726 av->value = NULL; 727 728 av->dict = lookup(doc, &doc->attribs, name); 729 730 TY_(InsertAttributeAtEnd)(node, av); 731 return av; 732} 733 734AttVal* TY_(RepairAttrValue)(TidyDocImpl* doc, Node* node, ctmbstr name, ctmbstr value) 735{ 736 AttVal* old = TY_(GetAttrByName)(node, name); 737 738 if (old) 739 { 740 if (old->value) 741 MemFree(old->value); 742 if (value) 743 old->value = TY_(tmbstrdup)(value); 744 else 745 old->value = NULL; 746 747 return old; 748 } 749 else 750 return TY_(AddAttribute)(doc, node, name, value); 751} 752 753static Bool CheckAttrType( TidyDocImpl* doc, 754 ctmbstr attrname, AttrCheck type ) 755{ 756 const Attribute* np = lookup( doc, &doc->attribs, attrname ); 757 return (Bool)( np && np->attrchk == type ); 758} 759 760Bool TY_(IsUrl)( TidyDocImpl* doc, ctmbstr attrname ) 761{ 762 return CheckAttrType( doc, attrname, CH_URL ); 763} 764 765/* 766Bool IsBool( TidyDocImpl* doc, ctmbstr attrname ) 767{ 768 return CheckAttrType( doc, attrname, CH_BOOL ); 769} 770*/ 771 772Bool TY_(IsScript)( TidyDocImpl* doc, ctmbstr attrname ) 773{ 774 return CheckAttrType( doc, attrname, CH_SCRIPT ); 775} 776 777/* may id or name serve as anchor? */ 778Bool TY_(IsAnchorElement)( TidyDocImpl* ARG_UNUSED(doc), Node* node) 779{ 780 TidyTagId tid = TagId( node ); 781 if ( tid == TidyTag_A || 782 tid == TidyTag_APPLET || 783 tid == TidyTag_FORM || 784 tid == TidyTag_FRAME || 785 tid == TidyTag_IFRAME || 786 tid == TidyTag_IMG || 787 tid == TidyTag_MAP ) 788 return yes; 789 790 return no; 791} 792 793/* 794 In CSS1, selectors can contain only the characters A-Z, 0-9, 795 and Unicode characters 161-255, plus dash (-); they cannot start 796 with a dash or a digit; they can also contain escaped characters 797 and any Unicode character as a numeric code (see next item). 798 799 The backslash followed by at most four hexadecimal digits 800 (0..9A..F) stands for the Unicode character with that number. 801 802 Any character except a hexadecimal digit can be escaped to remove 803 its special meaning, by putting a backslash in front. 804 805 #508936 - CSS class naming for -clean option 806*/ 807Bool TY_(IsCSS1Selector)( ctmbstr buf ) 808{ 809 Bool valid = yes; 810 int esclen = 0; 811 byte c; 812 int pos; 813 814 for ( pos=0; valid && (c = *buf++); ++pos ) 815 { 816 if ( c == '\\' ) 817 { 818 esclen = 1; /* ab\555\444 is 4 chars {'a', 'b', \555, \444} */ 819 } 820 else if ( isdigit( c ) ) 821 { 822 /* Digit not 1st, unless escaped (Max length "\112F") */ 823 if ( esclen > 0 ) 824 valid = ( ++esclen < 6 ); 825 if ( valid ) 826 valid = ( pos>0 || esclen>0 ); 827 } 828 else 829 { 830 valid = ( 831 esclen > 0 /* Escaped? Anything goes. */ 832 || ( pos>0 && c == '-' ) /* Dash cannot be 1st char */ 833 || isalpha(c) /* a-z, A-Z anywhere */ 834 || ( c >= 161 ) /* Unicode 161-255 anywhere */ 835 ); 836 esclen = 0; 837 } 838 } 839 return valid; 840} 841 842/* free single anchor */ 843static void FreeAnchor(Anchor *a) 844{ 845 if ( a ) 846 MemFree( a->name ); 847 MemFree( a ); 848} 849 850/* removes anchor for specific node */ 851void TY_(RemoveAnchorByNode)( TidyDocImpl* doc, Node *node ) 852{ 853 TidyAttribImpl* attribs = &doc->attribs; 854 Anchor *delme = NULL, *curr, *prev = NULL; 855 856 for ( curr=attribs->anchor_list; curr!=NULL; curr=curr->next ) 857 { 858 if ( curr->node == node ) 859 { 860 if ( prev ) 861 prev->next = curr->next; 862 else 863 attribs->anchor_list = curr->next; 864 delme = curr; 865 break; 866 } 867 prev = curr; 868 } 869 FreeAnchor( delme ); 870} 871 872/* initialize new anchor */ 873static Anchor* NewAnchor( ctmbstr name, Node* node ) 874{ 875 Anchor *a = (Anchor*) MemAlloc( sizeof(Anchor) ); 876 877 a->name = TY_(tmbstrdup)( name ); 878 a->name = TY_(tmbstrtolower)(a->name); 879 a->node = node; 880 a->next = NULL; 881 882 return a; 883} 884 885/* add new anchor to namespace */ 886static Anchor* AddAnchor( TidyDocImpl* doc, ctmbstr name, Node *node ) 887{ 888 TidyAttribImpl* attribs = &doc->attribs; 889 Anchor *a = NewAnchor( name, node ); 890 891 if ( attribs->anchor_list == NULL) 892 attribs->anchor_list = a; 893 else 894 { 895 Anchor *here = attribs->anchor_list; 896 while (here->next) 897 here = here->next; 898 here->next = a; 899 } 900 901 return attribs->anchor_list; 902} 903 904/* return node associated with anchor */ 905static Node* GetNodeByAnchor( TidyDocImpl* doc, ctmbstr name ) 906{ 907 TidyAttribImpl* attribs = &doc->attribs; 908 Anchor *found; 909 tmbstr lname = TY_(tmbstrdup)(name); 910 lname = TY_(tmbstrtolower)(lname); 911 912 for ( found = attribs->anchor_list; found != NULL; found = found->next ) 913 { 914 if ( TY_(tmbstrcmp)(found->name, lname) == 0 ) 915 break; 916 } 917 918 MemFree(lname); 919 if ( found ) 920 return found->node; 921 return NULL; 922} 923 924/* free all anchors */ 925void TY_(FreeAnchors)( TidyDocImpl* doc ) 926{ 927 TidyAttribImpl* attribs = &doc->attribs; 928 Anchor* a; 929 while (NULL != (a = attribs->anchor_list) ) 930 { 931 attribs->anchor_list = a->next; 932 FreeAnchor(a); 933 } 934} 935 936/* public method for inititializing attribute dictionary */ 937void TY_(InitAttrs)( TidyDocImpl* doc ) 938{ 939 ClearMemory( &doc->attribs, sizeof(TidyAttribImpl) ); 940#ifdef _DEBUG 941 { 942 /* Attribute ID is index position in Attribute type lookup table */ 943 uint ix; 944 for ( ix=0; ix < N_TIDY_ATTRIBS; ++ix ) 945 { 946 const Attribute* dict = &attribute_defs[ ix ]; 947 assert( (uint) dict->id == ix ); 948 } 949 } 950#endif 951} 952 953/* free all declared attributes */ 954static void FreeDeclaredAttributes( TidyDocImpl* doc ) 955{ 956 TidyAttribImpl* attribs = &doc->attribs; 957 Attribute* dict; 958 while ( NULL != (dict = attribs->declared_attr_list) ) 959 { 960 attribs->declared_attr_list = dict->next; 961#if ATTRIBUTE_HASH_LOOKUP 962 removeFromHash( &doc->attribs, dict->name ); 963#endif 964 MemFree( dict->name ); 965 MemFree( dict ); 966 } 967} 968 969void TY_(FreeAttrTable)( TidyDocImpl* doc ) 970{ 971#if ATTRIBUTE_HASH_LOOKUP 972 emptyHash( &doc->attribs ); 973#endif 974 TY_(FreeAnchors)( doc ); 975 FreeDeclaredAttributes( doc ); 976} 977 978void TY_(AppendToClassAttr)( AttVal *classattr, ctmbstr classname ) 979{ 980 uint len = TY_(tmbstrlen)(classattr->value) + 981 TY_(tmbstrlen)(classname) + 2; 982 tmbstr s = (tmbstr) MemAlloc( len ); 983 s[0] = '\0'; 984 if (classattr->value) 985 { 986 TY_(tmbstrcpy)( s, classattr->value ); 987 TY_(tmbstrcat)( s, " " ); 988 } 989 TY_(tmbstrcat)( s, classname ); 990 if (classattr->value) 991 MemFree( classattr->value ); 992 classattr->value = s; 993} 994 995/* concatenate styles */ 996static void AppendToStyleAttr( AttVal *styleattr, ctmbstr styleprop ) 997{ 998 /* 999 this doesn't handle CSS comments and 1000 leading/trailing white-space very well 1001 see http://www.w3.org/TR/css-style-attr 1002 */ 1003 uint end = TY_(tmbstrlen)(styleattr->value); 1004 1005 if (end >0 && styleattr->value[end - 1] == ';') 1006 { 1007 /* attribute ends with declaration seperator */ 1008 1009 styleattr->value = (tmbstr) MemRealloc(styleattr->value, 1010 end + TY_(tmbstrlen)(styleprop) + 2); 1011 1012 TY_(tmbstrcat)(styleattr->value, " "); 1013 TY_(tmbstrcat)(styleattr->value, styleprop); 1014 } 1015 else if (end >0 && styleattr->value[end - 1] == '}') 1016 { 1017 /* attribute ends with rule set */ 1018 1019 styleattr->value = (tmbstr) MemRealloc(styleattr->value, 1020 end + TY_(tmbstrlen)(styleprop) + 6); 1021 1022 TY_(tmbstrcat)(styleattr->value, " { "); 1023 TY_(tmbstrcat)(styleattr->value, styleprop); 1024 TY_(tmbstrcat)(styleattr->value, " }"); 1025 } 1026 else 1027 { 1028 /* attribute ends with property value */ 1029 1030 styleattr->value = (tmbstr) MemRealloc(styleattr->value, 1031 end + TY_(tmbstrlen)(styleprop) + 3); 1032 1033 if (end > 0) 1034 TY_(tmbstrcat)(styleattr->value, "; "); 1035 TY_(tmbstrcat)(styleattr->value, styleprop); 1036 } 1037} 1038 1039/* 1040 the same attribute name can't be used 1041 more than once in each element 1042*/ 1043void TY_(RepairDuplicateAttributes)( TidyDocImpl* doc, Node *node) 1044{ 1045 AttVal *first; 1046 1047 for (first = node->attributes; first != NULL;) 1048 { 1049 AttVal *second; 1050 Bool firstRedefined = no; 1051 1052 if (!(first->asp == NULL && first->php == NULL)) 1053 { 1054 first = first->next; 1055 continue; 1056 } 1057 1058 for (second = first->next; second != NULL;) 1059 { 1060 AttVal *temp; 1061 1062 if (!(second->asp == NULL && second->php == NULL && 1063 AttrsHaveSameId(first, second))) 1064 { 1065 second = second->next; 1066 continue; 1067 } 1068 1069 /* first and second attribute have same local name */ 1070 /* now determine what to do with this duplicate... */ 1071 1072 if (attrIsCLASS(first) && cfgBool(doc, TidyJoinClasses) 1073 && AttrHasValue(first) && AttrHasValue(second)) 1074 { 1075 /* concatenate classes */ 1076 1077 TY_(AppendToClassAttr)(first, second->value); 1078 1079 temp = second->next; 1080 TY_(ReportAttrError)( doc, node, second, JOINING_ATTRIBUTE); 1081 TY_(RemoveAttribute)( doc, node, second ); 1082 second = temp; 1083 } 1084 else if (attrIsSTYLE(first) && cfgBool(doc, TidyJoinStyles) 1085 && AttrHasValue(first) && AttrHasValue(second)) 1086 { 1087 AppendToStyleAttr( first, second->value ); 1088 1089 temp = second->next; 1090 TY_(ReportAttrError)( doc, node, second, JOINING_ATTRIBUTE); 1091 TY_(RemoveAttribute)( doc, node, second ); 1092 second = temp; 1093 } 1094 else if ( cfg(doc, TidyDuplicateAttrs) == TidyKeepLast ) 1095 { 1096 temp = first->next; 1097 TY_(ReportAttrError)( doc, node, first, REPEATED_ATTRIBUTE); 1098 TY_(RemoveAttribute)( doc, node, first ); 1099 firstRedefined = yes; 1100 first = temp; 1101 second = second->next; 1102 } 1103 else /* TidyDuplicateAttrs == TidyKeepFirst */ 1104 { 1105 temp = second->next; 1106 TY_(ReportAttrError)( doc, node, second, REPEATED_ATTRIBUTE); 1107 TY_(RemoveAttribute)( doc, node, second ); 1108 second = temp; 1109 } 1110 } 1111 if (!firstRedefined) 1112 first = first->next; 1113 } 1114} 1115 1116/* ignore unknown attributes for proprietary elements */ 1117const Attribute* TY_(CheckAttribute)( TidyDocImpl* doc, Node *node, AttVal *attval ) 1118{ 1119 const Attribute* attribute = attval->dict; 1120 1121 if ( attribute != NULL ) 1122 { 1123 if (attribute->versions & VERS_XML) 1124 { 1125 doc->lexer->isvoyager = yes; 1126 if (!cfgBool(doc, TidyHtmlOut)) 1127 { 1128 TY_(SetOptionBool)(doc, TidyXhtmlOut, yes); 1129 TY_(SetOptionBool)(doc, TidyXmlOut, yes); 1130 } 1131 } 1132 1133 TY_(ConstrainVersion)(doc, AttributeVersions(node, attval)); 1134 1135 if (attribute->attrchk) 1136 attribute->attrchk( doc, node, attval ); 1137 } 1138 1139#ifdef TIDY_APPLE_CHANGES 1140 if (AttributeIsMarkedForRemoval(attval)) 1141 { 1142 TY_(RemoveAttribute)( doc, node, attval ); 1143 } 1144 else 1145#endif 1146 1147 if (AttributeIsProprietary(node, attval)) 1148 { 1149 TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE); 1150 1151 if (cfgBool(doc, TidyDropPropAttrs)) 1152 TY_(RemoveAttribute)( doc, node, attval ); 1153 } 1154 1155 return attribute; 1156} 1157 1158Bool TY_(IsBoolAttribute)(AttVal *attval) 1159{ 1160 const Attribute *attribute = ( attval ? attval->dict : NULL ); 1161 if ( attribute && attribute->attrchk == CH_BOOL ) 1162 return yes; 1163 return no; 1164} 1165 1166Bool TY_(attrIsEvent)( AttVal* attval ) 1167{ 1168 TidyAttrId atid = AttrId( attval ); 1169 1170 return (atid == TidyAttr_OnAFTERUPDATE || 1171 atid == TidyAttr_OnBEFOREUNLOAD || 1172 atid == TidyAttr_OnBEFOREUPDATE || 1173 atid == TidyAttr_OnBLUR || 1174 atid == TidyAttr_OnCHANGE || 1175 atid == TidyAttr_OnCLICK || 1176 atid == TidyAttr_OnDATAAVAILABLE || 1177 atid == TidyAttr_OnDATASETCHANGED || 1178 atid == TidyAttr_OnDATASETCOMPLETE || 1179 atid == TidyAttr_OnDBLCLICK || 1180 atid == TidyAttr_OnERRORUPDATE || 1181 atid == TidyAttr_OnFOCUS || 1182 atid == TidyAttr_OnKEYDOWN || 1183 atid == TidyAttr_OnKEYPRESS || 1184 atid == TidyAttr_OnKEYUP || 1185 atid == TidyAttr_OnLOAD || 1186 atid == TidyAttr_OnMOUSEDOWN || 1187 atid == TidyAttr_OnMOUSEMOVE || 1188 atid == TidyAttr_OnMOUSEOUT || 1189 atid == TidyAttr_OnMOUSEOVER || 1190 atid == TidyAttr_OnMOUSEUP || 1191 atid == TidyAttr_OnRESET || 1192 atid == TidyAttr_OnROWENTER || 1193 atid == TidyAttr_OnROWEXIT || 1194 atid == TidyAttr_OnSELECT || 1195 atid == TidyAttr_OnSUBMIT || 1196 atid == TidyAttr_OnUNLOAD); 1197} 1198 1199static void CheckLowerCaseAttrValue( TidyDocImpl* doc, Node *node, AttVal *attval) 1200{ 1201 tmbstr p; 1202 Bool hasUpper = no; 1203 1204 if (!AttrHasValue(attval)) 1205 return; 1206 1207 p = attval->value; 1208 1209 while (*p) 1210 { 1211 if (TY_(IsUpper)(*p)) /* #501230 - fix by Terry Teague - 09 Jan 02 */ 1212 { 1213 hasUpper = yes; 1214 break; 1215 } 1216 p++; 1217 } 1218 1219 if (hasUpper) 1220 { 1221 Lexer* lexer = doc->lexer; 1222 if (lexer->isvoyager) 1223 TY_(ReportAttrError)( doc, node, attval, ATTR_VALUE_NOT_LCASE); 1224 1225 if ( lexer->isvoyager || cfgBool(doc, TidyLowerLiterals) ) 1226 attval->value = TY_(tmbstrtolower)(attval->value); 1227 } 1228} 1229 1230/* methods for checking value of a specific attribute */ 1231 1232void TY_(CheckUrl)( TidyDocImpl* doc, Node *node, AttVal *attval) 1233{ 1234 tmbchar c; 1235 tmbstr dest, p; 1236 uint escape_count = 0, backslash_count = 0; 1237 uint i, pos = 0; 1238 uint len; 1239 1240/* Apple Changes: 1241 2007-02-18 iccir Rewrote support for absoluting relative URLs 1242*/ 1243#ifdef TIDY_APPLE_CHANGES 1244 Bool ends_with_slash, starts_with_slash, already_absolute = no; 1245 ctmbstr base_uri; 1246 uint base_uri_len; 1247#endif 1248 1249 if (!AttrHasValue(attval)) 1250 { 1251 TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); 1252 return; 1253 } 1254 1255 p = attval->value; 1256 1257#ifdef TIDY_APPLE_CHANGES 1258 starts_with_slash = (p[0] == '/'); 1259 base_uri = cfgStr(doc, starts_with_slash ? TidyAbsolutePathBaseUri : TidyRelativePathBaseUri); 1260 1261 if (base_uri && base_uri[0]) 1262 { 1263 for (i = 0; 0 != (c = p[i]); ++i) 1264 { 1265 if (c == ':') 1266 { 1267 already_absolute = yes; 1268 break; 1269 } 1270 else if (c == '/') 1271 { 1272 break; 1273 } 1274 } 1275 1276 if (!already_absolute) 1277 { 1278 base_uri_len = tmbstrlen(base_uri); 1279 len = tmbstrlen(p) + base_uri_len + 2; 1280 dest = (tmbstr) MemAlloc(len); 1281 1282 /* 1283 If the current value started with a slash or our base uri ends with a slash, 1284 the format can be %s%s. Else, we need to insert a slash in between. 1285 */ 1286 ends_with_slash = (base_uri[base_uri_len - 1] == '/'); 1287 1288 if (starts_with_slash && ends_with_slash) 1289 { 1290 sprintf(dest, "%s%s", base_uri, p+1); 1291 } 1292 else if (starts_with_slash || ends_with_slash) 1293 { 1294 sprintf(dest, "%s%s", base_uri, p); 1295 } 1296 else 1297 { 1298 sprintf(dest, "%s/%s", base_uri, p); 1299 } 1300 1301 MemFree(attval->value); 1302 attval->value = dest; 1303 p = dest; 1304 } 1305 } 1306#endif 1307 1308 1309/* Apple Changes: 1310 2007-02-01 iccir If TidySanitizeAgainstXSS is set, remove any URL attribute which contains embedded scripts 1311*/ 1312#ifdef TIDY_APPLE_CHANGES 1313 if (cfgBool(doc, TidySanitizeAgainstXSS)) 1314 { 1315 c = p[0]; 1316 1317 /* Check first character as an optimization. */ 1318 if (c != 'h' && c != 'H') 1319 { 1320 if (tmbstrncasecmp(p, "javascript:", 11) == 0 || 1321 tmbstrncasecmp(p, "script:", 7) == 0 || 1322 tmbstrncasecmp(p, "vbscript:", 9) == 0 || 1323 tmbstrncasecmp(p, "file:", 5) == 0) 1324 { 1325 MarkAttributeForRemoval( attval ); 1326 return; 1327 } 1328 } 1329 } 1330#endif 1331 1332 for (i = 0; '\0' != (c = p[i]); ++i) 1333 { 1334 if (c == '\\') 1335 { 1336 ++backslash_count; 1337 if ( cfgBool(doc, TidyFixBackslash) ) 1338 p[i] = '/'; 1339 } 1340 else if ((c > 0x7e) || (c <= 0x20) || (strchr("<>", c))) 1341 ++escape_count; 1342 } 1343 1344 if ( cfgBool(doc, TidyFixUri) && escape_count ) 1345 { 1346 len = TY_(tmbstrlen)(p) + escape_count * 2 + 1; 1347 dest = (tmbstr) MemAlloc(len); 1348 1349 for (i = 0; 0 != (c = p[i]); ++i) 1350 { 1351 if ((c > 0x7e) || (c <= 0x20) || (strchr("<>", c))) 1352 pos += sprintf( dest + pos, "%%%02X", (byte)c ); 1353 else 1354 dest[pos++] = c; 1355 } 1356 dest[pos] = 0; 1357 1358 MemFree(attval->value); 1359 attval->value = dest; 1360 } 1361 if ( backslash_count ) 1362 { 1363 if ( cfgBool(doc, TidyFixBackslash) ) 1364 TY_(ReportAttrError)( doc, node, attval, FIXED_BACKSLASH ); 1365 else 1366 TY_(ReportAttrError)( doc, node, attval, BACKSLASH_IN_URI ); 1367 } 1368 if ( escape_count ) 1369 { 1370 if ( cfgBool(doc, TidyFixUri) ) 1371 TY_(ReportAttrError)( doc, node, attval, ESCAPED_ILLEGAL_URI); 1372 else 1373 TY_(ReportAttrError)( doc, node, attval, ILLEGAL_URI_REFERENCE); 1374 1375 doc->badChars |= BC_INVALID_URI; 1376 } 1377} 1378 1379/* RFC 2396, section 4.2 states: 1380 "[...] in the case of HTML's FORM element, [...] an 1381 empty URI reference represents the base URI of the 1382 current document and should be replaced by that URI 1383 when transformed into a request." 1384*/ 1385void CheckAction( TidyDocImpl* doc, Node *node, AttVal *attval) 1386{ 1387 if (AttrHasValue(attval)) 1388 TY_(CheckUrl)( doc, node, attval ); 1389} 1390 1391/* Apple Changes: 1392 2007-01-31 iccir If TidySanitizeAgainstXSS is set, remove all on* (onBlur, onClick, etc) attributes 1393*/ 1394#ifdef TIDY_APPLE_CHANGES 1395void CheckScript( TidyDocImpl* doc, Node *node, AttVal *attval ) 1396{ 1397 if ( cfgBool(doc, TidySanitizeAgainstXSS) ) 1398 MarkAttributeForRemoval( attval ); 1399} 1400#else 1401void CheckScript( TidyDocImpl* ARG_UNUSED(doc), Node* ARG_UNUSED(node), 1402 AttVal* ARG_UNUSED(attval)) 1403{ 1404} 1405#endif 1406 1407Bool TY_(IsValidHTMLID)(ctmbstr id) 1408{ 1409 ctmbstr s = id; 1410 1411 if (!s) 1412 return no; 1413 1414 if (!TY_(IsLetter)(*s++)) 1415 return no; 1416 1417 while (*s) 1418 if (!TY_(IsNamechar)(*s++)) 1419 return no; 1420 1421 return yes; 1422 1423} 1424 1425Bool TY_(IsValidXMLID)(ctmbstr id) 1426{ 1427 ctmbstr s = id; 1428 tchar c; 1429 1430 if (!s) 1431 return no; 1432 1433 c = *s++; 1434 if (c > 0x7F) 1435 s += TY_(GetUTF8)(s, &c); 1436 1437 if (!(TY_(IsXMLLetter)(c) || c == '_' || c == ':')) 1438 return no; 1439 1440 while (*s) 1441 { 1442 c = (unsigned char)*s; 1443 1444 if (c > 0x7F) 1445 s += TY_(GetUTF8)(s, &c); 1446 1447 ++s; 1448 1449 if (!TY_(IsXMLNamechar)(c)) 1450 return no; 1451 } 1452 1453 return yes; 1454} 1455 1456static Bool IsValidNMTOKEN(ctmbstr name) 1457{ 1458 ctmbstr s = name; 1459 tchar c; 1460 1461 if (!s) 1462 return no; 1463 1464 while (*s) 1465 { 1466 c = (unsigned char)*s; 1467 1468 if (c > 0x7F) 1469 s += TY_(GetUTF8)(s, &c); 1470 1471 ++s; 1472 1473 if (!TY_(IsXMLNamechar)(c)) 1474 return no; 1475 } 1476 1477 return yes; 1478} 1479 1480static Bool AttrValueIsAmong(AttVal *attval, ctmbstr const list[]) 1481{ 1482 const ctmbstr *v; 1483 for (v = list; *v; ++v) 1484 if (AttrValueIs(attval, *v)) 1485 return yes; 1486 return no; 1487} 1488 1489static void CheckAttrValidity( TidyDocImpl* doc, Node *node, AttVal *attval, 1490 ctmbstr const list[]) 1491{ 1492 if (!AttrHasValue(attval)) 1493 { 1494 TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); 1495 return; 1496 } 1497 1498 CheckLowerCaseAttrValue( doc, node, attval ); 1499 1500 if (!AttrValueIsAmong(attval, list)) 1501 TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1502} 1503 1504void CheckName( TidyDocImpl* doc, Node *node, AttVal *attval) 1505{ 1506 Node *old; 1507 1508 if (!AttrHasValue(attval)) 1509 { 1510 TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); 1511 return; 1512 } 1513 1514 if ( TY_(IsAnchorElement)(doc, node) ) 1515 { 1516 if (cfgBool(doc, TidyXmlOut) && !IsValidNMTOKEN(attval->value)) 1517 TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1518 1519 if ((old = GetNodeByAnchor(doc, attval->value)) && old != node) 1520 { 1521 TY_(ReportAttrError)( doc, node, attval, ANCHOR_NOT_UNIQUE); 1522 } 1523 else 1524 AddAnchor( doc, attval->value, node ); 1525 } 1526} 1527 1528/* Apple Changes: 1529 2007-01-30 iccir Add support for dropping 'class' attributes with a certain prefix 1530 2007-02-02 iccir When a style attribute is encountered, remove it if TidySanitizeAgainstXSS is set 1531*/ 1532#ifdef TIDY_APPLE_CHANGES 1533void CheckClass( TidyDocImpl* doc, Node *node, AttVal *attval) 1534{ 1535 ctmbstr prefix = cfgStr(doc, TidyDropClassesWithPrefix); 1536 1537 if (prefix && attval->value) 1538 { 1539 tmbstr value = attval->value; 1540 uint len = tmbstrlen(prefix); 1541 1542 if (tmbstrlen(value) >= len && tmbstrncasecmp(prefix, value, len) == 0) 1543 { 1544 MarkAttributeForRemoval( attval ); 1545 } 1546 } 1547} 1548 1549void CheckStyleAttr( TidyDocImpl* doc, Node *node, AttVal *attval) 1550{ 1551 if ( cfgBool(doc, TidySanitizeAgainstXSS) ) 1552 MarkAttributeForRemoval( attval ); 1553} 1554#endif 1555 1556void CheckId( TidyDocImpl* doc, Node *node, AttVal *attval ) 1557{ 1558 Lexer* lexer = doc->lexer; 1559 Node *old; 1560 1561/* Apple Changes: 1562 2007-01-30 iccir Add support for dropping 'id' attributes with a certain prefix 1563*/ 1564#ifdef TIDY_APPLE_CHANGES 1565 ctmbstr prefix = cfgStr(doc, TidyDropIdsWithPrefix); 1566 1567 if (prefix && attval->value) 1568 { 1569 tmbstr value = attval->value; 1570 uint len = tmbstrlen(prefix); 1571 1572 if (tmbstrlen(value) >= len && tmbstrncasecmp(prefix, value, len) == 0) 1573 { 1574 MarkAttributeForRemoval( attval ); 1575 return; 1576 } 1577 } 1578#endif 1579 1580 if (!AttrHasValue(attval)) 1581 { 1582 TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); 1583 return; 1584 } 1585 1586 if (!TY_(IsValidHTMLID)(attval->value)) 1587 { 1588 if (lexer->isvoyager && TY_(IsValidXMLID)(attval->value)) 1589 TY_(ReportAttrError)( doc, node, attval, XML_ID_SYNTAX); 1590 else 1591 TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1592 } 1593 1594 if ((old = GetNodeByAnchor(doc, attval->value)) && old != node) 1595 { 1596 TY_(ReportAttrError)( doc, node, attval, ANCHOR_NOT_UNIQUE); 1597 } 1598 else 1599 AddAnchor( doc, attval->value, node ); 1600} 1601 1602void CheckBool( TidyDocImpl* doc, Node *node, AttVal *attval) 1603{ 1604 if (!AttrHasValue(attval)) 1605 return; 1606 1607 CheckLowerCaseAttrValue( doc, node, attval ); 1608} 1609 1610void CheckAlign( TidyDocImpl* doc, Node *node, AttVal *attval) 1611{ 1612 ctmbstr const values[] = {"left", "right", "center", "justify", NULL}; 1613 1614 /* IMG, OBJECT, APPLET and EMBED use align for vertical position */ 1615 if (node->tag && (node->tag->model & CM_IMG)) 1616 { 1617 CheckValign( doc, node, attval ); 1618 return; 1619 } 1620 1621 if (!AttrHasValue(attval)) 1622 { 1623 TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); 1624 return; 1625 } 1626 1627 CheckLowerCaseAttrValue( doc, node, attval); 1628 1629 /* currently CheckCaption(...) takes care of the remaining cases */ 1630 if (nodeIsCAPTION(node)) 1631 return; 1632 1633 if (!AttrValueIsAmong(attval, values)) 1634 { 1635 /* align="char" is allowed for elements with CM_TABLE|CM_ROW 1636 except CAPTION which is excluded above, */ 1637 if( !(AttrValueIs(attval, "char") 1638 && TY_(nodeHasCM)(node, CM_TABLE|CM_ROW)) ) 1639 TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1640 } 1641} 1642 1643void CheckValign( TidyDocImpl* doc, Node *node, AttVal *attval) 1644{ 1645 ctmbstr const values[] = {"top", "middle", "bottom", "baseline", NULL}; 1646 ctmbstr const values2[] = {"left", "right", NULL}; 1647 ctmbstr const valuesp[] = {"texttop", "absmiddle", "absbottom", 1648 "textbottom", NULL}; 1649 1650 if (!AttrHasValue(attval)) 1651 { 1652 TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); 1653 return; 1654 } 1655 1656 CheckLowerCaseAttrValue( doc, node, attval ); 1657 1658 if (AttrValueIsAmong(attval, values)) 1659 { 1660 /* all is fine */ 1661 } 1662 else if (AttrValueIsAmong(attval, values2)) 1663 { 1664 if (!(node->tag && (node->tag->model & CM_IMG))) 1665 TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1666 } 1667 else if (AttrValueIsAmong(attval, valuesp)) 1668 { 1669 TY_(ConstrainVersion)( doc, VERS_PROPRIETARY ); 1670 TY_(ReportAttrError)( doc, node, attval, PROPRIETARY_ATTR_VALUE); 1671 } 1672 else 1673 TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1674} 1675 1676void CheckLength( TidyDocImpl* doc, Node *node, AttVal *attval) 1677{ 1678 tmbstr p; 1679 1680 if (!AttrHasValue(attval)) 1681 { 1682 TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); 1683 return; 1684 } 1685 1686 /* don't check for <col width=...> and <colgroup width=...> */ 1687 if (attrIsWIDTH(attval) && (nodeIsCOL(node) || nodeIsCOLGROUP(node))) 1688 return; 1689 1690 p = attval->value; 1691 1692 if (!TY_(IsDigit)(*p++)) 1693 { 1694 TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1695 } 1696 else 1697 { 1698 while (*p) 1699 { 1700 if (!TY_(IsDigit)(*p) && *p != '%') 1701 { 1702 TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1703 break; 1704 } 1705 ++p; 1706 } 1707 } 1708} 1709 1710void CheckTarget( TidyDocImpl* doc, Node *node, AttVal *attval) 1711{ 1712 ctmbstr const values[] = {"_blank", "_self", "_parent", "_top", NULL}; 1713 1714 if (!AttrHasValue(attval)) 1715 { 1716 TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); 1717 return; 1718 } 1719 1720 /* target names must begin with A-Za-z ... */ 1721 if (TY_(IsLetter)(attval->value[0])) 1722 return; 1723 1724 /* or be one of the allowed list */ 1725 if (!AttrValueIsAmong(attval, values)) 1726 TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1727} 1728 1729void CheckFsubmit( TidyDocImpl* doc, Node *node, AttVal *attval) 1730{ 1731 ctmbstr const values[] = {"get", "post", NULL}; 1732 CheckAttrValidity( doc, node, attval, values ); 1733} 1734 1735void CheckClear( TidyDocImpl* doc, Node *node, AttVal *attval) 1736{ 1737 ctmbstr const values[] = {"none", "left", "right", "all", NULL}; 1738 1739 if (!AttrHasValue(attval)) 1740 { 1741 TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); 1742 if (attval->value == NULL) 1743 attval->value = TY_(tmbstrdup)( "none" ); 1744 return; 1745 } 1746 1747 CheckLowerCaseAttrValue( doc, node, attval ); 1748 1749 if (!AttrValueIsAmong(attval, values)) 1750 TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1751} 1752 1753void CheckShape( TidyDocImpl* doc, Node *node, AttVal *attval) 1754{ 1755 ctmbstr const values[] = {"rect", "default", "circle", "poly", NULL}; 1756 CheckAttrValidity( doc, node, attval, values ); 1757} 1758 1759void CheckScope( TidyDocImpl* doc, Node *node, AttVal *attval) 1760{ 1761 ctmbstr const values[] = {"row", "rowgroup", "col", "colgroup", NULL}; 1762 CheckAttrValidity( doc, node, attval, values ); 1763} 1764 1765void CheckNumber( TidyDocImpl* doc, Node *node, AttVal *attval) 1766{ 1767 tmbstr p; 1768 1769 if (!AttrHasValue(attval)) 1770 { 1771 TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); 1772 return; 1773 } 1774 1775 /* don't check <frameset cols=... rows=...> */ 1776 if ( nodeIsFRAMESET(node) && 1777 (attrIsCOLS(attval) || attrIsROWS(attval))) 1778 return; 1779 1780 p = attval->value; 1781 1782 /* font size may be preceded by + or - */ 1783 if ( nodeIsFONT(node) && (*p == '+' || *p == '-') ) 1784 ++p; 1785 1786 while (*p) 1787 { 1788 if (!TY_(IsDigit)(*p)) 1789 { 1790 TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1791 break; 1792 } 1793 ++p; 1794 } 1795} 1796 1797/* check hexadecimal color value */ 1798static Bool IsValidColorCode(ctmbstr color) 1799{ 1800 uint i; 1801 1802 if (TY_(tmbstrlen)(color) != 6) 1803 return no; 1804 1805 /* check if valid hex digits and letters */ 1806 for (i = 0; i < 6; i++) 1807 if (!TY_(IsDigit)(color[i]) && !strchr("abcdef", TY_(ToLower)(color[i]))) 1808 return no; 1809 1810 return yes; 1811} 1812 1813/* check color syntax and beautify value by option */ 1814void CheckColor( TidyDocImpl* doc, Node *node, AttVal *attval) 1815{ 1816 Bool valid = no; 1817 tmbstr given; 1818 1819 if (!AttrHasValue(attval)) 1820 { 1821 TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); 1822 return; 1823 } 1824 1825 given = attval->value; 1826 1827 /* 727851 - add hash to hash-less color values */ 1828 if (given[0] != '#' && (valid = IsValidColorCode(given))) 1829 { 1830 tmbstr cp, s; 1831 1832 cp = s = (tmbstr) MemAlloc(2 + TY_(tmbstrlen)(given)); 1833 *cp++ = '#'; 1834 while ('\0' != (*cp++ = *given++)) 1835 continue; 1836 1837 TY_(ReportAttrError)(doc, node, attval, BAD_ATTRIBUTE_VALUE_REPLACED); 1838 1839 MemFree(attval->value); 1840 given = attval->value = s; 1841 } 1842 1843 if (!valid && given[0] == '#') 1844 valid = IsValidColorCode(given + 1); 1845 1846 if (valid && given[0] == '#' && cfgBool(doc, TidyReplaceColor)) 1847 { 1848 ctmbstr newName = GetColorName(given); 1849 1850 if (newName) 1851 { 1852 MemFree(attval->value); 1853 given = attval->value = TY_(tmbstrdup)(newName); 1854 } 1855 } 1856 1857 /* if it is not a valid color code, it is a color name */ 1858 if (!valid) 1859 valid = GetColorCode(given) != NULL; 1860 1861 if (valid && given[0] == '#') 1862 attval->value = TY_(tmbstrtoupper)(attval->value); 1863 else if (valid) 1864 attval->value = TY_(tmbstrtolower)(attval->value); 1865 1866 if (!valid) 1867 TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1868} 1869 1870/* check valuetype attribute for element param */ 1871void CheckVType( TidyDocImpl* doc, Node *node, AttVal *attval) 1872{ 1873 ctmbstr const values[] = {"data", "object", "ref", NULL}; 1874 CheckAttrValidity( doc, node, attval, values ); 1875} 1876 1877/* checks scrolling attribute */ 1878void CheckScroll( TidyDocImpl* doc, Node *node, AttVal *attval) 1879{ 1880 ctmbstr const values[] = {"no", "auto", "yes", NULL}; 1881 CheckAttrValidity( doc, node, attval, values ); 1882} 1883 1884/* checks dir attribute */ 1885void CheckTextDir( TidyDocImpl* doc, Node *node, AttVal *attval) 1886{ 1887 ctmbstr const values[] = {"rtl", "ltr", NULL}; 1888 CheckAttrValidity( doc, node, attval, values ); 1889} 1890 1891/* checks lang and xml:lang attributes */ 1892void CheckLang( TidyDocImpl* doc, Node *node, AttVal *attval) 1893{ 1894 /* empty xml:lang is allowed through XML 1.0 SE errata */ 1895 if (!AttrHasValue(attval) && !attrIsXML_LANG(attval)) 1896 { 1897 if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 ) 1898 { 1899 TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE ); 1900 } 1901 return; 1902 } 1903} 1904 1905/* checks type attribute */ 1906void CheckType( TidyDocImpl* doc, Node *node, AttVal *attval) 1907{ 1908 ctmbstr const valuesINPUT[] = {"text", "password", "checkbox", "radio", 1909 "submit", "reset", "file", "hidden", 1910 "image", "button", NULL}; 1911 ctmbstr const valuesBUTTON[] = {"button", "submit", "reset", NULL}; 1912 ctmbstr const valuesUL[] = {"disc", "square", "circle", NULL}; 1913 ctmbstr const valuesOL[] = {"1", "a", "i", NULL}; 1914 1915 if (nodeIsINPUT(node)) 1916 CheckAttrValidity( doc, node, attval, valuesINPUT ); 1917 else if (nodeIsBUTTON(node)) 1918 CheckAttrValidity( doc, node, attval, valuesBUTTON ); 1919 else if (nodeIsUL(node)) 1920 CheckAttrValidity( doc, node, attval, valuesUL ); 1921 else if (nodeIsOL(node)) 1922 { 1923 if (!AttrHasValue(attval)) 1924 { 1925 TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); 1926 return; 1927 } 1928 if (!AttrValueIsAmong(attval, valuesOL)) 1929 TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1930 } 1931 else if (nodeIsLI(node)) 1932 { 1933 if (!AttrHasValue(attval)) 1934 { 1935 TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); 1936 return; 1937 } 1938 if (AttrValueIsAmong(attval, valuesUL)) 1939 CheckLowerCaseAttrValue( doc, node, attval ); 1940 else if (!AttrValueIsAmong(attval, valuesOL)) 1941 TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1942 } 1943 return; 1944} 1945 1946/* 1947 * local variables: 1948 * mode: c 1949 * indent-tabs-mode: nil 1950 * c-basic-offset: 4 1951 * eval: (c-set-offset 'substatement-open 0) 1952 * end: 1953 */ 1954