1/* localize.c -- text strings and routines to handle errors and general messages 2 3 (c) 1998-2006 (W3C) MIT, ERCIM, Keio University 4 Portions Copyright University of Toronto 5 See tidy.h and access.h for the copyright notice. 6 7 You should only need to edit this file and tidy.c 8 to localize HTML tidy. *** This needs checking *** 9 10 CVS Info : 11 12 $Author: iccir $ 13 $Date: 2007/02/21 02:46:28 $ 14 $Revision: 1.6 $ 15 16*/ 17 18#include "tidy-int.h" 19#include "lexer.h" 20#include "streamio.h" 21#include "message.h" 22#include "tmbstr.h" 23#include "utf8.h" 24 25/* used to point to Web Accessibility Guidelines */ 26#define ACCESS_URL "http://www.w3.org/WAI/GL" 27 28/* points to the Adaptive Technology Resource Centre at the 29** University of Toronto 30*/ 31#define ATRC_ACCESS_URL "http://www.aprompt.ca/Tidy/accessibilitychecks.html" 32 33#include "version.h" 34 35ctmbstr TY_(ReleaseDate)(void) 36{ 37 return TY_(release_date); 38} 39 40static struct _msgfmt 41{ 42 uint code; 43 ctmbstr fmt; 44} const msgFormat[] = 45{ 46/* ReportEncodingWarning */ 47 { ENCODING_MISMATCH, "specified input encoding (%s) does not match actual input encoding (%s)" }, /* Warning */ 48 49/* ReportEncodingError */ 50 { VENDOR_SPECIFIC_CHARS, "%s invalid character code %s" }, /* Error */ 51 { INVALID_SGML_CHARS, "%s invalid character code %s" }, /* Error */ 52 { INVALID_UTF8, "%s invalid UTF-8 bytes (char. code %s)" }, /* Error */ 53 { INVALID_UTF16, "%s invalid UTF-16 surrogate pair (char. code %s)" }, /* Error */ 54 { INVALID_NCR, "%s invalid numeric character reference %s" }, /* Error */ 55 56/* ReportEntityError */ 57 { MISSING_SEMICOLON, "entity \"%s\" doesn't end in ';'" }, /* Warning in HTML, Error in XML/XHTML */ 58 { MISSING_SEMICOLON_NCR, "numeric character reference \"%s\" doesn't end in ';'" }, /* Warning in HTML, Error in XML/XHTML */ 59 { UNESCAPED_AMPERSAND, "unescaped & which should be written as &" }, /* Warning in HTML, Error in XHTML */ 60 { UNKNOWN_ENTITY, "unescaped & or unknown entity \"%s\"" }, /* Error */ 61 { APOS_UNDEFINED, "named entity ' only defined in XML/XHTML" }, /* Error in HTML (should only occur for HTML input) */ 62 63/* ReportAttrError */ 64 65 /* attribute name */ 66 { INSERTING_ATTRIBUTE, "%s inserting \"%s\" attribute" }, /* Warning in CheckLINK, Error otherwise */ 67 { MISSING_ATTR_VALUE, "%s attribute \"%s\" lacks value" }, /* Warning in CheckUrl, Error otherwise */ 68 { UNKNOWN_ATTRIBUTE, "%s unknown attribute \"%s\"" }, /* Error */ 69 { PROPRIETARY_ATTRIBUTE, "%s proprietary attribute \"%s\"" }, /* Error */ 70 { JOINING_ATTRIBUTE, "%s joining values of repeated attribute \"%s\"" }, /* Error */ 71 { XML_ATTRIBUTE_VALUE, "%s has XML attribute \"%s\"" }, /* Error (but deprecated) */ 72 73 /* attribute value */ 74 { XML_ID_SYNTAX, "%s ID \"%s\" uses XML ID syntax" }, /* Warning if XHTML, Error if HTML */ 75 { ATTR_VALUE_NOT_LCASE, "%s attribute value \"%s\" must be lower case for XHTML" }, /* Error if XHTML input, Notice if HTML input and XHTML outout */ 76 { PROPRIETARY_ATTR_VALUE, "%s proprietary attribute value \"%s\"" }, /* Error */ 77 { ANCHOR_NOT_UNIQUE, "%s anchor \"%s\" already defined" }, /* Error */ 78 79 /* attribute name, attribute value */ 80 { BAD_ATTRIBUTE_VALUE, "%s attribute \"%s\" has invalid value \"%s\"" }, /* Error */ 81 { BAD_ATTRIBUTE_VALUE_REPLACED, "%s attribute \"%s\" had invalid value \"%s\" and has been replaced" }, /* Error */ 82 { INVALID_ATTRIBUTE, "%s attribute name \"%s\" (value=\"%s\") is invalid" }, /* Error */ 83 84 /* attribute value, attribute name */ 85 { REPEATED_ATTRIBUTE, "%s dropping value \"%s\" for repeated attribute \"%s\"" }, /* Error */ 86 87 /* no arguments */ 88 { INVALID_XML_ID, "%s cannot copy name attribute to id" }, /* Warning */ 89 { UNEXPECTED_GT, "%s missing '>' for end of tag" }, /* Warning if HTML, Error if XML/XHTML */ 90 { UNEXPECTED_QUOTEMARK, "%s unexpected or duplicate quote mark" }, /* Error */ 91 { MISSING_QUOTEMARK, "%s attribute with missing trailing quote mark" }, /* Error */ 92 { UNEXPECTED_END_OF_FILE_ATTR, "%s end of file while parsing attributes" }, /* Error */ 93 { ID_NAME_MISMATCH, "%s id and name attribute value mismatch" }, /* Error */ 94 { BACKSLASH_IN_URI, "%s URI reference contains backslash. Typo?" }, /* Error */ 95 { FIXED_BACKSLASH, "%s converting backslash in URI to slash" }, /* Error */ 96 { ILLEGAL_URI_REFERENCE, "%s improperly escaped URI reference" }, /* Error */ 97 { ESCAPED_ILLEGAL_URI, "%s escaping malformed URI reference" }, /* Error */ 98 { NEWLINE_IN_URI, "%s discarding newline in URI reference" }, /* Error */ 99 { WHITE_IN_URI, "%s discarding whitespace in URI reference" }, /* Error */ 100 { UNEXPECTED_EQUALSIGN, "%s unexpected '=', expected attribute name" }, /* Error */ 101 { MISSING_IMAGEMAP, "%s should use client-side image map" }, /* Warning (but deprecated) */ 102 103/* ReportMissingAttr */ 104 { MISSING_ATTRIBUTE, "%s lacks \"%s\" attribute" }, /* Error */ 105/* ReportWarning */ 106 { NESTED_EMPHASIS, "nested emphasis %s" }, /* Warning */ 107 { NESTED_QUOTATION, "nested q elements, possible typo." }, /* Warning */ 108 { OBSOLETE_ELEMENT, "replacing obsolete element %s by %s" }, /* Warning */ 109 { COERCE_TO_ENDTAG_WARN, "<%s> is probably intended as </%s>" }, /* Warning */ 110 111/* ReportNotice */ 112 { TRIM_EMPTY_ELEMENT, "trimming empty %s" }, /* Notice */ 113 { REPLACING_ELEMENT, "replacing %s by %s" }, /* Notice */ 114 115/* ReportError */ 116 { COERCE_TO_ENDTAG, "<%s> is probably intended as </%s>" }, /* Error */ 117 { REPLACING_UNEX_ELEMENT, "replacing unexpected %s by %s" }, /* Error */ 118 { MISSING_ENDTAG_FOR, "missing </%s>" }, /* Error */ 119 { MISSING_ENDTAG_BEFORE, "missing </%s> before %s" }, /* Error */ 120 { DISCARDING_UNEXPECTED, "discarding unexpected %s" }, /* Error */ 121 { NON_MATCHING_ENDTAG, "replacing unexpected %s by </%s>" }, /* Error */ 122 { TAG_NOT_ALLOWED_IN, "%s isn't allowed in <%s> elements" }, /* Error */ 123 { MISSING_STARTTAG, "missing <%s>" }, /* Error */ 124 { UNEXPECTED_ENDTAG, "unexpected </%s>" }, /* Error */ 125 { TOO_MANY_ELEMENTS, "too many %s elements" }, /* Error */ 126 { USING_BR_INPLACE_OF, "using <br> in place of %s" }, /* Error */ 127 { INSERTING_TAG, "inserting implicit <%s>" }, /* Error */ 128 { CANT_BE_NESTED, "%s can't be nested" }, /* Error */ 129 { PROPRIETARY_ELEMENT, "%s is not approved by W3C" }, /* Error */ 130 { ILLEGAL_NESTING, "%s shouldn't be nested" }, /* Error */ 131 { NOFRAMES_CONTENT, "%s not inside 'noframes' element" }, /* Error */ 132 { UNEXPECTED_END_OF_FILE, "unexpected end of file %s" }, /* Error */ 133 { ELEMENT_NOT_EMPTY, "%s element not empty or not closed" }, /* Error */ 134 { UNEXPECTED_ENDTAG_IN, "unexpected </%s> in <%s>" }, /* Error */ 135 { TOO_MANY_ELEMENTS_IN, "too many %s elements in <%s>" }, /* Error */ 136 { UNESCAPED_ELEMENT, "unescaped %s in pre content" }, /* Error (but deprecated) */ 137 138 /* no arguments */ 139 { DOCTYPE_AFTER_TAGS, "<!DOCTYPE> isn't allowed after elements" }, /* Error */ 140 { MISSING_TITLE_ELEMENT, "inserting missing 'title' element" }, /* Error */ 141 { INCONSISTENT_VERSION, "HTML DOCTYPE doesn't match content" }, /* Error */ 142 { MISSING_DOCTYPE, "missing <!DOCTYPE> declaration" }, /* Error */ 143 { CONTENT_AFTER_BODY, "content occurs after end of body" }, /* Error */ 144 { MALFORMED_COMMENT, "adjacent hyphens within comment" }, /* Error */ 145 { BAD_COMMENT_CHARS, "expecting -- or >" }, /* Error */ 146 { BAD_CDATA_CONTENT, "'<' + '/' + letter not allowed here" }, /* Error */ 147 { INCONSISTENT_NAMESPACE, "HTML namespace doesn't match content" }, /* Error */ 148 { SPACE_PRECEDING_XMLDECL, "removing whitespace preceding XML Declaration" }, /* Error */ 149 { MALFORMED_DOCTYPE, "discarding malformed <!DOCTYPE>" }, /* Error */ 150 { BAD_XML_COMMENT, "XML comments can't contain --" }, /* Error (but deprecated) */ 151 { DTYPE_NOT_UPPER_CASE, "SYSTEM, PUBLIC, W3C, DTD, EN must be upper case" }, /* Error (but deprecated) */ 152 { ENCODING_IO_CONFLICT, "Output encoding does not work with standard output" }, /* Error (but deprecated) */ 153 154/* ReportFatal */ 155 { SUSPECTED_MISSING_QUOTE, "missing quote mark for attribute value" }, /* Error? (not really sometimes) */ 156 { DUPLICATE_FRAMESET, "repeated FRAMESET element" }, /* Error */ 157 { UNKNOWN_ELEMENT, "%s is not recognized!" }, /* Error */ 158 { UNEXPECTED_ENDTAG, "unexpected </%s>" }, /* Error */ 159 160#if SUPPORT_ACCESSIBILITY_CHECKS 161 162/* ReportAccess */ 163/* 164 List of error/warning messages. The error code corresponds to 165 the check that is listed in the AERT (HTML specifications). 166*/ 167 { IMG_MISSING_ALT, "[1.1.1.1]: <img> missing 'alt' text." }, /* Access */ 168 { IMG_ALT_SUSPICIOUS_FILENAME, "[1.1.1.2]: suspicious 'alt' text (filename)." }, /* Access */ 169 { IMG_ALT_SUSPICIOUS_FILE_SIZE, "[1.1.1.3]: suspicious 'alt' text (file size)." }, /* Access */ 170 { IMG_ALT_SUSPICIOUS_PLACEHOLDER, "[1.1.1.4]: suspicious 'alt' text (placeholder)." }, /* Access */ 171 { IMG_ALT_SUSPICIOUS_TOO_LONG, "[1.1.1.10]: suspicious 'alt' text (too long)." }, /* Access */ 172 { IMG_MISSING_LONGDESC_DLINK, "[1.1.2.1]: <img> missing 'longdesc' and d-link." }, /* Access */ 173 { IMG_MISSING_DLINK, "[1.1.2.2]: <img> missing d-link." }, /* Access */ 174 { IMG_MISSING_LONGDESC, "[1.1.2.3]: <img> missing 'longdesc'." }, /* Access */ 175 { IMG_BUTTON_MISSING_ALT, "[1.1.3.1]: <img> (button) missing 'alt' text." }, /* Access */ 176 { APPLET_MISSING_ALT, "[1.1.4.1]: <applet> missing alternate content." }, /* Access */ 177 { OBJECT_MISSING_ALT, "[1.1.5.1]: <object> missing alternate content." }, /* Access */ 178 { AUDIO_MISSING_TEXT_WAV, "[1.1.6.1]: audio missing text transcript (wav)." }, /* Access */ 179 { AUDIO_MISSING_TEXT_AU, "[1.1.6.2]: audio missing text transcript (au)." }, /* Access */ 180 { AUDIO_MISSING_TEXT_AIFF, "[1.1.6.3]: audio missing text transcript (aiff)." }, /* Access */ 181 { AUDIO_MISSING_TEXT_SND, "[1.1.6.4]: audio missing text transcript (snd)." }, /* Access */ 182 { AUDIO_MISSING_TEXT_RA, "[1.1.6.5]: audio missing text transcript (ra)." }, /* Access */ 183 { AUDIO_MISSING_TEXT_RM, "[1.1.6.6]: audio missing text transcript (rm)." }, /* Access */ 184 { FRAME_MISSING_LONGDESC, "[1.1.8.1]: <frame> may require 'longdesc'." }, /* Access */ 185 { AREA_MISSING_ALT, "[1.1.9.1]: <area> missing 'alt' text." }, /* Access */ 186 { SCRIPT_MISSING_NOSCRIPT, "[1.1.10.1]: <script> missing <noscript> section." }, /* Access */ 187 { ASCII_REQUIRES_DESCRIPTION, "[1.1.12.1]: ascii art requires description." }, /* Access */ 188 { IMG_MAP_SERVER_REQUIRES_TEXT_LINKS, "[1.2.1.1]: image map (server-side) requires text links." }, /* Access */ 189 { MULTIMEDIA_REQUIRES_TEXT, "[1.4.1.1]: multimedia requires synchronized text equivalents." }, /* Access */ 190 { IMG_MAP_CLIENT_MISSING_TEXT_LINKS, "[1.5.1.1]: image map (client-side) missing text links." }, /* Access */ 191 { INFORMATION_NOT_CONVEYED_IMAGE, "[2.1.1.1]: ensure information not conveyed through color alone (image)." }, /* Access */ 192 { INFORMATION_NOT_CONVEYED_APPLET, "[2.1.1.2]: ensure information not conveyed through color alone (applet)." }, /* Access */ 193 { INFORMATION_NOT_CONVEYED_OBJECT, "[2.1.1.3]: ensure information not conveyed through color alone (object)." }, /* Access */ 194 { INFORMATION_NOT_CONVEYED_SCRIPT, "[2.1.1.4]: ensure information not conveyed through color alone (script)." }, /* Access */ 195 { INFORMATION_NOT_CONVEYED_INPUT, "[2.1.1.5]: ensure information not conveyed through color alone (input)." }, /* Access */ 196 { COLOR_CONTRAST_TEXT, "[2.2.1.1]: poor color contrast (text)." }, /* Access */ 197 { COLOR_CONTRAST_LINK, "[2.2.1.2]: poor color contrast (link)." }, /* Access */ 198 { COLOR_CONTRAST_ACTIVE_LINK, "[2.2.1.3]: poor color contrast (active link)." }, /* Access */ 199 { COLOR_CONTRAST_VISITED_LINK, "[2.2.1.4]: poor color contrast (visited link)." }, /* Access */ 200 { DOCTYPE_MISSING, "[3.2.1.1]: <doctype> missing." }, /* Access */ 201 { STYLE_SHEET_CONTROL_PRESENTATION, "[3.3.1.1]: use style sheets to control presentation." }, /* Access */ 202 { HEADERS_IMPROPERLY_NESTED, "[3.5.1.1]: headers improperly nested." }, /* Access */ 203 { POTENTIAL_HEADER_BOLD, "[3.5.2.1]: potential header (bold)." }, /* Access */ 204 { POTENTIAL_HEADER_ITALICS, "[3.5.2.2]: potential header (italics)." }, /* Access */ 205 { POTENTIAL_HEADER_UNDERLINE, "[3.5.2.3]: potential header (underline)." }, /* Access */ 206 { HEADER_USED_FORMAT_TEXT, "[3.5.3.1]: header used to format text." }, /* Access */ 207 { LIST_USAGE_INVALID_UL, "[3.6.1.1]: list usage invalid <ul>." }, /* Access */ 208 { LIST_USAGE_INVALID_OL, "[3.6.1.2]: list usage invalid <ol>." }, /* Access */ 209 { LIST_USAGE_INVALID_LI, "[3.6.1.4]: list usage invalid <li>." }, /* Access */ 210 { INDICATE_CHANGES_IN_LANGUAGE, "[4.1.1.1]: indicate changes in language." }, /* Access */ 211 { LANGUAGE_NOT_IDENTIFIED, "[4.3.1.1]: language not identified." }, /* Access */ 212 { LANGUAGE_INVALID, "[4.3.1.2]: language attribute invalid." }, /* Access */ 213 { DATA_TABLE_MISSING_HEADERS, "[5.1.2.1]: data <table> missing row/column headers (all)." }, /* Access */ 214 { DATA_TABLE_MISSING_HEADERS_COLUMN, "[5.1.2.2]: data <table> missing row/column headers (1 col)." }, /* Access */ 215 { DATA_TABLE_MISSING_HEADERS_ROW, "[5.1.2.3]: data <table> missing row/column headers (1 row)." }, /* Access */ 216 { DATA_TABLE_REQUIRE_MARKUP_COLUMN_HEADERS, "[5.2.1.1]: data <table> may require markup (column headers)." }, /* Access */ 217 { DATA_TABLE_REQUIRE_MARKUP_ROW_HEADERS, "[5.2.1.2]: data <table> may require markup (row headers)." }, /* Access */ 218 { LAYOUT_TABLES_LINEARIZE_PROPERLY, "[5.3.1.1]: verify layout tables linearize properly." }, /* Access */ 219 { LAYOUT_TABLE_INVALID_MARKUP, "[5.4.1.1]: invalid markup used in layout <table>." }, /* Access */ 220 { TABLE_MISSING_SUMMARY, "[5.5.1.1]: <table> missing summary." }, /* Access */ 221 { TABLE_SUMMARY_INVALID_NULL, "[5.5.1.2]: <table> summary invalid (null)." }, /* Access */ 222 { TABLE_SUMMARY_INVALID_SPACES, "[5.5.1.3]: <table> summary invalid (spaces)." }, /* Access */ 223 { TABLE_SUMMARY_INVALID_PLACEHOLDER, "[5.5.1.6]: <table> summary invalid (placeholder text)." }, /* Access */ 224 { TABLE_MISSING_CAPTION, "[5.5.2.1]: <table> missing <caption>." }, /* Access */ 225 { TABLE_MAY_REQUIRE_HEADER_ABBR, "[5.6.1.1]: <table> may require header abbreviations." }, /* Access */ 226 { TABLE_MAY_REQUIRE_HEADER_ABBR_NULL, "[5.6.1.2]: <table> header abbreviations invalid (null)." }, /* Access */ 227 { TABLE_MAY_REQUIRE_HEADER_ABBR_SPACES, "[5.6.1.3]: <table> header abbreviations invalid (spaces)." }, /* Access */ 228 { STYLESHEETS_REQUIRE_TESTING_LINK, "[6.1.1.1]: style sheets require testing (link)." }, /* Access */ 229 { STYLESHEETS_REQUIRE_TESTING_STYLE_ELEMENT, "[6.1.1.2]: style sheets require testing (style element)." }, /* Access */ 230 { STYLESHEETS_REQUIRE_TESTING_STYLE_ATTR, "[6.1.1.3]: style sheets require testing (style attribute)." }, /* Access */ 231 { FRAME_SRC_INVALID, "[6.2.1.1]: <frame> source invalid." }, /* Access */ 232 { TEXT_EQUIVALENTS_REQUIRE_UPDATING_APPLET, "[6.2.2.1]: text equivalents require updating (applet)." }, /* Access */ 233 { TEXT_EQUIVALENTS_REQUIRE_UPDATING_SCRIPT, "[6.2.2.2]: text equivalents require updating (script)." }, /* Access */ 234 { TEXT_EQUIVALENTS_REQUIRE_UPDATING_OBJECT, "[6.2.2.3]: text equivalents require updating (object)." }, /* Access */ 235 { PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_SCRIPT, "[6.3.1.1]: programmatic objects require testing (script)." }, /* Access */ 236 { PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_OBJECT, "[6.3.1.2]: programmatic objects require testing (object)." }, /* Access */ 237 { PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_EMBED, "[6.3.1.3]: programmatic objects require testing (embed)." }, /* Access */ 238 { PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_APPLET, "[6.3.1.4]: programmatic objects require testing (applet)." }, /* Access */ 239 { FRAME_MISSING_NOFRAMES, "[6.5.1.1]: <frameset> missing <noframes> section." }, /* Access */ 240 { NOFRAMES_INVALID_NO_VALUE, "[6.5.1.2]: <noframes> section invalid (no value)." }, /* Access */ 241 { NOFRAMES_INVALID_CONTENT, "[6.5.1.3]: <noframes> section invalid (content)." }, /* Access */ 242 { NOFRAMES_INVALID_LINK, "[6.5.1.4]: <noframes> section invalid (link)." }, /* Access */ 243 { REMOVE_FLICKER_SCRIPT, "[7.1.1.1]: remove flicker (script)." }, /* Access */ 244 { REMOVE_FLICKER_OBJECT, "[7.1.1.2]: remove flicker (object)." }, /* Access */ 245 { REMOVE_FLICKER_EMBED, "[7.1.1.3]: remove flicker (embed)." }, /* Access */ 246 { REMOVE_FLICKER_APPLET, "[7.1.1.4]: remove flicker (applet)." }, /* Access */ 247 { REMOVE_FLICKER_ANIMATED_GIF, "[7.1.1.5]: remove flicker (animated gif)." }, /* Access */ 248 { REMOVE_BLINK_MARQUEE, "[7.2.1.1]: remove blink/marquee." }, /* Access */ 249 { REMOVE_AUTO_REFRESH, "[7.4.1.1]: remove auto-refresh." }, /* Access */ 250 { REMOVE_AUTO_REDIRECT, "[7.5.1.1]: remove auto-redirect." }, /* Access */ 251 { ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_SCRIPT, "[8.1.1.1]: ensure programmatic objects are accessible (script)." }, /* Access */ 252 { ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_OBJECT, "[8.1.1.2]: ensure programmatic objects are accessible (object)." }, /* Access */ 253 { ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_APPLET, "[8.1.1.3]: ensure programmatic objects are accessible (applet)." }, /* Access */ 254 { ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_EMBED, "[8.1.1.4]: ensure programmatic objects are accessible (embed)." }, /* Access */ 255 { IMAGE_MAP_SERVER_SIDE_REQUIRES_CONVERSION, "[9.1.1.1]: image map (server-side) requires conversion." }, /* Access */ 256 { SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_DOWN, "[9.3.1.1]: <script> not keyboard accessible (onMouseDown)." }, /* Access */ 257 { SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_UP, "[9.3.1.2]: <script> not keyboard accessible (onMouseUp)." }, /* Access */ 258 { SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_CLICK, "[9.3.1.3]: <script> not keyboard accessible (onClick)." }, /* Access */ 259 { SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OVER, "[9.3.1.4]: <script> not keyboard accessible (onMouseOver)." }, /* Access */ 260 { SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OUT, "[9.3.1.5]: <script> not keyboard accessible (onMouseOut)." }, /* Access */ 261 { SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_MOVE, "[9.3.1.6]: <script> not keyboard accessible (onMouseMove)." }, /* Access */ 262 { NEW_WINDOWS_REQUIRE_WARNING_NEW, "[10.1.1.1]: new windows require warning (_new)." }, /* Access */ 263 { NEW_WINDOWS_REQUIRE_WARNING_BLANK, "[10.1.1.2]: new windows require warning (_blank)." }, /* Access */ 264 { FORM_CONTROL_REQUIRES_DEFAULT_TEXT, "[10.4.1.1]: form control requires default text." }, /* Access */ 265 { FORM_CONTROL_DEFAULT_TEXT_INVALID_NULL, "[10.4.1.2]: form control default text invalid (null)." }, /* Access */ 266 { FORM_CONTROL_DEFAULT_TEXT_INVALID_SPACES, "[10.4.1.3]: form control default text invalid (spaces)." }, /* Access */ 267 { REPLACE_DEPRECATED_HTML_APPLET, "[11.2.1.1]: replace deprecated html <applet>." }, /* Access */ 268 { REPLACE_DEPRECATED_HTML_BASEFONT, "[11.2.1.2]: replace deprecated html <basefont>." }, /* Access */ 269 { REPLACE_DEPRECATED_HTML_CENTER, "[11.2.1.3]: replace deprecated html <center>." }, /* Access */ 270 { REPLACE_DEPRECATED_HTML_DIR, "[11.2.1.4]: replace deprecated html <dir>." }, /* Access */ 271 { REPLACE_DEPRECATED_HTML_FONT, "[11.2.1.5]: replace deprecated html <font>." }, /* Access */ 272 { REPLACE_DEPRECATED_HTML_ISINDEX, "[11.2.1.6]: replace deprecated html <isindex>." }, /* Access */ 273 { REPLACE_DEPRECATED_HTML_MENU, "[11.2.1.7]: replace deprecated html <menu>." }, /* Access */ 274 { REPLACE_DEPRECATED_HTML_S, "[11.2.1.8]: replace deprecated html <s>." }, /* Access */ 275 { REPLACE_DEPRECATED_HTML_STRIKE, "[11.2.1.9]: replace deprecated html <strike>." }, /* Access */ 276 { REPLACE_DEPRECATED_HTML_U, "[11.2.1.10]: replace deprecated html <u>." }, /* Access */ 277 { FRAME_MISSING_TITLE, "[12.1.1.1]: <frame> missing title." }, /* Access */ 278 { FRAME_TITLE_INVALID_NULL, "[12.1.1.2]: <frame> title invalid (null)." }, /* Access */ 279 { FRAME_TITLE_INVALID_SPACES, "[12.1.1.3]: <frame> title invalid (spaces)." }, /* Access */ 280 { ASSOCIATE_LABELS_EXPLICITLY, "[12.4.1.1]: associate labels explicitly with form controls." }, /* Access */ 281 { ASSOCIATE_LABELS_EXPLICITLY_FOR, "[12.4.1.2]: associate labels explicitly with form controls (for)." }, /* Access */ 282 { ASSOCIATE_LABELS_EXPLICITLY_ID, "[12.4.1.3]: associate labels explicitly with form controls (id)." }, /* Access */ 283 { LINK_TEXT_NOT_MEANINGFUL, "[13.1.1.1]: link text not meaningful." }, /* Access */ 284 { LINK_TEXT_MISSING, "[13.1.1.2]: link text missing." }, /* Access */ 285 { LINK_TEXT_TOO_LONG, "[13.1.1.3]: link text too long." }, /* Access */ 286 { LINK_TEXT_NOT_MEANINGFUL_CLICK_HERE, "[13.1.1.4]: link text not meaningful (click here)." }, /* Access */ 287 { METADATA_MISSING, "[13.2.1.1]: Metadata missing." }, /* Access */ 288 { METADATA_MISSING_LINK, "[13.2.1.2]: Metadata missing (link element)." }, /* Access */ 289 { METADATA_MISSING_REDIRECT_AUTOREFRESH, "[13.2.1.3]: Metadata missing (redirect/auto-refresh)." }, /* Access */ 290 { SKIPOVER_ASCII_ART, "[13.10.1.1]: skip over ascii art." }, /* Access */ 291 292#endif /* SUPPORT_ACCESSIBILITY_CHECKS */ 293 294 /* must be last */ 295 { 0, NULL } 296}; 297 298static ctmbstr GetFormatFromCode(uint code) 299{ 300 uint i; 301 302 for (i = 0; msgFormat[i].fmt; ++i) 303 if (msgFormat[i].code == code) 304 return msgFormat[i].fmt; 305 306 return NULL; 307} 308 309/* 310 Documentation of configuration options 311*/ 312 313/* Cross references */ 314static const TidyOptionId TidyXmlDeclLinks[] = 315 { TidyCharEncoding, TidyOutCharEncoding, TidyUnknownOption }; 316static const TidyOptionId TidyJoinClassesLinks[] = 317 { TidyJoinStyles, TidyDuplicateAttrs, TidyUnknownOption }; 318static const TidyOptionId TidyJoinStylesLinks[] = 319 { TidyJoinClasses, TidyDuplicateAttrs, TidyUnknownOption }; 320static const TidyOptionId TidyDuplicateAttrsLinks[] = 321 { TidyJoinClasses, TidyJoinStyles, TidyUnknownOption }; 322static const TidyOptionId TidyIndentContentLinks[] = 323 { TidyIndentSpaces, TidyUnknownOption }; 324static const TidyOptionId TidyIndentSpacesLinks[] = 325 { TidyIndentContent, TidyUnknownOption }; 326static const TidyOptionId TidyWrapAttValsLinks[] = 327 { TidyWrapScriptlets, TidyUnknownOption }; 328static const TidyOptionId TidyWrapScriptletsLinks[] = 329 { TidyWrapAttVals, TidyUnknownOption }; 330static const TidyOptionId TidyCharEncodingLinks[] = 331 { TidyInCharEncoding, TidyOutCharEncoding, TidyUnknownOption }; 332static const TidyOptionId TidyInCharEncodingLinks[] = 333 { TidyCharEncoding, TidyUnknownOption }; 334static const TidyOptionId TidyOutCharEncodingLinks[] = 335 { TidyCharEncoding, TidyUnknownOption }; 336static const TidyOptionId TidyErrFileLinks[] = 337 { TidyOutFile, TidyUnknownOption }; 338static const TidyOptionId TidyOutFileLinks[] = 339 { TidyErrFile, TidyUnknownOption }; 340static const TidyOptionId TidyBlockTagsLinks[] = 341 { TidyEmptyTags, TidyInlineTags, TidyPreTags, TidyUnknownOption }; 342static const TidyOptionId TidyEmptyTagsLinks[] = 343 { TidyBlockTags, TidyInlineTags, TidyPreTags, TidyUnknownOption }; 344static const TidyOptionId TidyInlineTagsLinks[] = 345 { TidyBlockTags, TidyEmptyTags, TidyPreTags, TidyUnknownOption }; 346static const TidyOptionId TidyPreTagsLinks[] = 347 { TidyBlockTags, TidyEmptyTags, TidyInlineTags, TidyUnknownOption }; 348static const TidyOptionId TidyMergeDivsLinks[] = 349 { TidyMakeClean, TidyUnknownOption }; 350static const TidyOptionId TidyAsciiCharsLinks[] = 351 { TidyMakeClean, TidyUnknownOption }; 352static const TidyOptionId TidyNumEntitiesLinks[] = 353 { TidyDoctype, TidyUnknownOption }; 354 355/* Documentation of options */ 356static const TidyOptionDoc option_docs[] = 357{ 358 {TidyXmlDecl, 359 "This option specifies if Tidy should add the XML declaration when " 360 "outputting XML or XHTML. Note that if the input already includes an " 361 "<?xml ... ?> declaration then this option will be ignored. " 362 "If the encoding for the output is different from \"ascii\", one of the " 363 "utf encodings or \"raw\", the declaration is always added as required by " 364 "the XML standard. " 365 , TidyXmlDeclLinks 366 }, 367 {TidyXmlSpace, 368 "This option specifies if Tidy should add xml:space=\"preserve\" to " 369 "elements such as <PRE>, <STYLE> and <SCRIPT> when " 370 "generating XML. This is needed if the whitespace in such elements is to " 371 "be parsed appropriately without having access to the DTD. " 372 }, 373 {TidyAltText, 374 "This option specifies the default \"alt=\" text Tidy uses for " 375 "<IMG> attributes. This feature is dangerous as it suppresses " 376 "further accessibility warnings. You are responsible for making your " 377 "documents accessible to people who can not see the images! " 378 }, 379 {TidyXmlPIs, 380 "This option specifies if Tidy should change the parsing of processing " 381 "instructions to require ?> as the terminator rather than >. This " 382 "option is automatically set if the input is in XML. " 383 }, 384 {TidyMakeBare, 385 "This option specifies if Tidy should strip Microsoft specific HTML " 386 "from Word 2000 documents, and output spaces rather than non-breaking " 387 "spaces where they exist in the input. " 388 }, 389 {TidyCSSPrefix, 390 "This option specifies the prefix that Tidy uses for styles rules. By " 391 "default, \"c\" will be used. " 392 }, 393 {TidyMakeClean, 394 "This option specifies if Tidy " 395 "should strip out surplus presentational tags and attributes replacing " 396 "them by style rules and structural markup as appropriate. It works well " 397 "on the HTML saved by Microsoft Office products. " 398 }, 399 {TidyDoctype, 400 "This option specifies the DOCTYPE declaration generated by Tidy. If set " 401 "to \"omit\" the output won't contain a DOCTYPE declaration. If set to " 402 "\"auto\" (the default) Tidy will use an educated guess based upon the " 403 "contents of the document. If set to \"strict\", Tidy will set the DOCTYPE " 404 "to the strict DTD. If set to \"loose\", the DOCTYPE is set to the loose " 405 "(transitional) DTD. Alternatively, you can supply a string for the formal " 406 "public identifier (FPI).<br />" 407 "<br />" 408 "For example: <br />" 409 "doctype: \"-//ACME//DTD HTML 3.14159//EN\"<br />" 410 "<br />" 411 "If you specify the FPI for an XHTML document, Tidy will set the " 412 "system identifier to an empty string. For an HTML document, Tidy adds a " 413 "system identifier only if one was already present in order to preserve " 414 "the processing mode of some browsers. Tidy leaves the DOCTYPE for " 415 "generic XML documents unchanged. <code>--doctype omit</code> implies " 416 "<code>--numeric-entities yes</code>. This option does not offer a " 417 "validation of the document conformance. " 418 }, 419 {TidyDropEmptyParas, 420 "This option specifies if Tidy should discard empty paragraphs. " 421 }, 422 {TidyDropFontTags, 423 "This option specifies if Tidy should discard <FONT> and " 424 "<CENTER> tags without creating the corresponding style rules. This " 425 "option can be set independently of the clean option. " 426 }, 427 {TidyDropPropAttrs, 428 "This option specifies if Tidy should strip out proprietary attributes, " 429 "such as MS data binding attributes. " 430 }, 431 {TidyEncloseBlockText, 432 "This option specifies if Tidy should insert a <P> element to " 433 "enclose any text it finds in any element that allows mixed content for " 434 "HTML transitional but not HTML strict. " 435 }, 436 {TidyEncloseBodyText, 437 "This option specifies if Tidy should enclose any text it finds in the " 438 "body element within a <P> element. This is useful when you want to " 439 "take existing HTML and use it with a style sheet. " 440 }, 441 {TidyEscapeCdata, 442 "This option specifies if Tidy should convert <![CDATA[]]> " 443 "sections to normal text. " 444 }, 445 {TidyFixComments, 446 "This option specifies if Tidy should replace unexpected hyphens with " 447 "\"=\" characters when it comes across adjacent hyphens. The default is " 448 "yes. This option is provided for users of Cold Fusion which uses the " 449 "comment syntax: <!--- ---> " 450 }, 451 {TidyFixUri, 452 "This option specifies if Tidy should check attribute values that carry " 453 "URIs for illegal characters and if such are found, escape them as HTML 4 " 454 "recommends. " 455 }, 456 {TidyHideComments, 457 "This option specifies if Tidy should print out comments. " 458 }, 459 {TidyHideEndTags, 460 "This option specifies if Tidy should omit optional end-tags when " 461 "generating the pretty printed markup. This option is ignored if you are " 462 "outputting to XML. " 463 }, 464 {TidyIndentCdata, 465 "This option specifies if Tidy should indent <![CDATA[]]> sections. " 466 }, 467 {TidyXmlTags, 468 "This option specifies if Tidy should use the XML parser rather than the " 469 "error correcting HTML parser. " 470 }, 471 {TidyJoinClasses, 472 "This option specifies if Tidy should combine class names to generate " 473 "a single new class name, if multiple class assignments are detected on " 474 "an element. " 475 , TidyJoinClassesLinks 476 }, 477 {TidyJoinStyles, 478 "This option specifies if Tidy should combine styles to generate a single " 479 "new style, if multiple style values are detected on an element. " 480 , TidyJoinStylesLinks 481 }, 482 {TidyLogicalEmphasis, 483 "This option specifies if Tidy should replace any occurrence of <I> " 484 "by <EM> and any occurrence of <B> by <STRONG>. In both " 485 "cases, the attributes are preserved unchanged. This option can be set " 486 "independently of the clean and drop-font-tags options. " 487 }, 488 {TidyLowerLiterals, 489 "This option specifies if Tidy should convert the value of an attribute " 490 "that takes a list of predefined values to lower case. This is required " 491 "for XHTML documents. " 492 }, 493 {TidyMergeDivs, 494 "Can be used to modify behavior of -c (--clean yes) option. " 495 "This option specifies if Tidy should merge nested <div> such as " 496 "\"<div><div>...</div></div>\". If set to " 497 "\"auto\", the attributes of the inner <div> are moved to the " 498 "outer one. As well, nested <div> with ID attributes are not " 499 "merged. If set to \"yes\", the attributes of the inner <div> " 500 "are discarded with the exception of \"class\" and \"style\". " 501 ,TidyMergeDivsLinks 502 }, 503#if SUPPORT_ASIAN_ENCODINGS 504 {TidyNCR, 505 "This option specifies if Tidy should allow numeric character references. " 506 }, 507#endif 508 {TidyBlockTags, 509 "This option specifies new block-level tags. This option takes a space or " 510 "comma separated list of tag names. Unless you declare new tags, Tidy will " 511 "refuse to generate a tidied file if the input includes previously unknown " 512 "tags. Note you can't change the content model for elements such as " 513 "<TABLE>, <UL>, <OL> and <DL>. " 514 ,TidyBlockTagsLinks 515 }, 516 {TidyEmptyTags, 517 "This option specifies new empty inline tags. This option takes a space " 518 "or comma separated list of tag names. Unless you declare new tags, Tidy " 519 "will refuse to generate a tidied file if the input includes previously " 520 "unknown tags. Remember to also declare empty tags as either inline or " 521 "blocklevel. " 522 ,TidyEmptyTagsLinks 523 }, 524 {TidyInlineTags, 525 "This option specifies new non-empty inline tags. This option takes a " 526 "space or comma separated list of tag names. Unless you declare new tags, " 527 "Tidy will refuse to generate a tidied file if the input includes " 528 "previously unknown tags. " 529 ,TidyInlineTagsLinks 530 }, 531 { TidyPreTags, 532 "This option specifies " 533 "new tags that are to be processed in exactly the same way as HTML's " 534 "<PRE> element. This option takes a space or comma separated list " 535 "of tag names. Unless you declare new tags, Tidy will refuse to generate " 536 "a tidied file if the input includes previously unknown tags. Note you " 537 "can not as yet add new CDATA elements (similar to <SCRIPT>). " 538 ,TidyPreTagsLinks 539 }, 540 {TidyNumEntities, 541 "This option specifies if Tidy should output entities other than the " 542 "built-in HTML entities (&amp;, &lt;, &gt; and &quot;) in " 543 "the numeric rather than the named entity form. Only entities compatible " 544 "with the DOCTYPE declaration generated are used. Entities that can be " 545 "represented in the output encoding are translated correspondingly. " 546 ,TidyNumEntitiesLinks 547 }, 548 {TidyHtmlOut, 549 "This option specifies if Tidy should generate pretty printed output, " 550 "writing it as HTML. " 551 }, 552 {TidyXhtmlOut, 553 "This option specifies if Tidy should generate pretty printed output, " 554 "writing it as extensible HTML. " 555 "This option causes Tidy to set the DOCTYPE and default namespace as " 556 "appropriate to XHTML. If a DOCTYPE or namespace is given they will " 557 "checked for consistency with the content of the document. In the case of " 558 "an inconsistency, the corrected values will appear in the output. For " 559 "XHTML, entities can be written as named or numeric entities according to " 560 "the setting of the \"numeric-entities\" option. The original case of tags " 561 "and attributes will be preserved, regardless of other options. " 562 }, 563 {TidyXmlOut, 564 "This option specifies if Tidy should pretty print output, writing it as " 565 "well-formed XML. Any entities not defined in XML 1.0 will be written as " 566 "numeric entities to allow them to be parsed by a XML parser. The original " 567 "case of tags and attributes will be preserved, regardless of other " 568 "options. " 569 }, 570 {TidyQuoteAmpersand, 571 "This option specifies if Tidy should output unadorned & characters as " 572 "&amp;. " 573 }, 574 {TidyQuoteMarks, 575 "This option specifies if Tidy should output " characters as " 576 "&quot; as is preferred by some editing environments. The apostrophe " 577 "character ' is written out as &#39; since many web browsers don't yet " 578 "support &apos;. " 579 }, 580 {TidyQuoteNbsp, 581 "This option specifies if Tidy should output non-breaking space characters " 582 "as entities, rather than as the Unicode character value 160 (decimal). " 583 }, 584 {TidyDuplicateAttrs, 585 "This option specifies if Tidy should keep the first or last attribute, if " 586 "an attribute is repeated, e.g. has two align attributes. " 587 , TidyDuplicateAttrsLinks 588 }, 589 {TidyReplaceColor, 590 "This option specifies if Tidy should replace numeric values in color " 591 "attributes by HTML/XHTML color names where defined, e.g. replace " 592 "\"#ffffff\" with \"white\". " 593 }, 594 {TidyBodyOnly, 595 "This option specifies if Tidy should print only the contents of the " 596 "body tag as an HTML fragment. Useful for incorporating existing whole " 597 "pages as a portion of another page. " 598 }, 599 {TidyUpperCaseAttrs, 600 "This option specifies if Tidy should output attribute names in upper " 601 "case. The default is no, which results in lower case attribute names, " 602 "except for XML input, where the original case is preserved. " 603 }, 604 {TidyUpperCaseTags, 605 "This option specifies if Tidy should output tag names in upper case. " 606 "The default is no, which results in lower case tag names, except for XML " 607 "input, where the original case is preserved. " 608 }, 609 {TidyWord2000, 610 "This option specifies if Tidy should go to great pains to strip out all " 611 "the surplus stuff Microsoft Word 2000 inserts when you save Word " 612 "documents as \"Web pages\". Doesn't handle embedded images or VML. " 613 "You should consider using Word's \"Save As: Web Page, Filtered\". " 614 }, 615 {TidyAccessibilityCheckLevel, 616 "This option specifies what level of accessibility checking, if any, " 617 "that Tidy should do. Level 0 is equivalent to Tidy Classic's " 618 "accessibility checking. " 619 "For more information on Tidy's accessibility checking, visit the " 620 "<a href=\"http://www.aprompt.ca/Tidy/accessibilitychecks.html\" " 621 ">Adaptive Technology Resource Centre at the University of Toronto</a>. " 622 }, 623 {TidyShowErrors, 624 "This option specifies the number Tidy uses to determine if further errors " 625 "should be shown. If set to 0, then no errors are shown. " 626 }, 627 {TidyShowWarnings, 628 "This option specifies if Tidy should suppress warnings. This can be " 629 "useful when a few errors are hidden in a flurry of warnings. " 630 }, 631 {TidyBreakBeforeBR, 632 "This option specifies if Tidy should output a line break before each " 633 "<BR> element. " 634 }, 635 {TidyIndentContent, 636 "This option specifies if Tidy should indent block-level tags. If set to " 637 "\"auto\", this option causes Tidy to decide whether or not to indent the " 638 "content of tags such as TITLE, H1-H6, LI, TD, TD, or P depending on " 639 "whether or not the content includes a block-level element. You are " 640 "advised to avoid setting indent to yes as this can expose layout bugs in " 641 "some browsers. " 642 ,TidyIndentContentLinks 643 }, 644 {TidyIndentAttributes, 645 "This option specifies if Tidy should begin each attribute on a new line. " 646 }, 647 {TidyIndentSpaces, 648 "This option specifies the number of spaces Tidy uses to indent content, " 649 "when indentation is enabled. " 650 ,TidyIndentSpacesLinks 651 }, 652 {TidyLiteralAttribs, 653 "This option specifies if Tidy should ensure that whitespace characters " 654 "within attribute values are passed through unchanged. " 655 }, 656 {TidyShowMarkup, 657 "This option specifies if Tidy should generate a pretty printed version " 658 "of the markup. Note that Tidy won't generate a pretty printed version if " 659 "it finds significant errors (see force-output). " 660 }, 661#if SUPPORT_ASIAN_ENCODINGS 662 {TidyPunctWrap, 663 "This option specifies if Tidy should line wrap after some Unicode or " 664 "Chinese punctuation characters. " 665 }, 666#endif 667 {TidyBurstSlides, 668 "Currently not used. Tidy Classic only. " 669 }, 670 {TidyTabSize, 671 "This option specifies the number of columns that Tidy uses between " 672 "successive tab stops. It is used to map tabs to spaces when reading the " 673 "input. Tidy never outputs tabs. " 674 }, 675 {TidyVertSpace, 676 "This option specifies if Tidy should add some empty lines for " 677 "readability. " 678 }, 679 {TidyWrapLen, 680 "This option specifies the right margin Tidy uses for line wrapping. Tidy " 681 "tries to wrap lines so that they do not exceed this length. Set wrap to " 682 "zero if you want to disable line wrapping. " 683 }, 684 {TidyWrapAsp, 685 "This option specifies if Tidy should line wrap text contained within ASP " 686 "pseudo elements, which look like: <% ... %>. " 687 }, 688 {TidyWrapAttVals, 689 "This option specifies if Tidy should line wrap attribute values, for " 690 "easier editing. This option can be set independently of " 691 "wrap-script-literals. " 692 ,TidyWrapAttValsLinks 693 }, 694 {TidyWrapJste, 695 "This option specifies if Tidy should line wrap text contained within " 696 "JSTE pseudo elements, which look like: <# ... #>. " 697 }, 698 {TidyWrapPhp, 699 "This option specifies if Tidy should line wrap text contained within PHP " 700 "pseudo elements, which look like: <?php ... ?>. " 701 }, 702 {TidyWrapScriptlets, 703 "This option specifies if Tidy should line wrap string literals that " 704 "appear in script attributes. Tidy wraps long script string literals by " 705 "inserting a backslash character before the line break. " 706 ,TidyWrapScriptletsLinks 707 }, 708 {TidyWrapSection, 709 "This option specifies if Tidy should line wrap text contained within " 710 "<![ ... ]> section tags. " 711 }, 712 {TidyAsciiChars, 713 "Can be used to modify behavior of -c (--clean yes) option. If set " 714 "to \"yes\" when using -c, &emdash;, &rdquo;, and other named " 715 "character entities are downgraded to their closest ascii equivalents. " 716 ,TidyAsciiCharsLinks 717 }, 718 {TidyCharEncoding, 719 "This option specifies the character encoding Tidy uses for both the input " 720 "and output. For ascii, Tidy will accept Latin-1 (ISO-8859-1) character " 721 "values, but will use entities for all characters whose value > 127. " 722 "For raw, Tidy will output values above 127 without translating them into " 723 "entities. For latin1, characters above 255 will be written as entities. " 724 "For utf8, Tidy assumes that both input and output is encoded as UTF-8. " 725 "You can use iso2022 for files encoded using the ISO-2022 family of " 726 "encodings e.g. ISO-2022-JP. For mac and win1252, Tidy will accept vendor " 727 "specific character values, but will use entities for all characters whose " 728 "value > 127. " 729 ,TidyCharEncodingLinks 730 }, 731 {TidyInCharEncoding, 732 "This option specifies the character encoding Tidy uses for the input. See " 733 "char-encoding for more info. " 734 ,TidyInCharEncodingLinks 735 }, 736#if SUPPORT_ASIAN_ENCODINGS 737 {TidyLanguage, 738 "Currently not used, but this option specifies the language Tidy uses " 739 "(for instance \"en\"). " 740 }, 741#endif 742#if SUPPORT_UTF16_ENCODINGS 743 {TidyOutputBOM, 744 "This option specifies if Tidy should write a Unicode Byte Order Mark " 745 "character (BOM; also known as Zero Width No-Break Space; has value of " 746 "U+FEFF) to the beginning of the output; only for UTF-8 and UTF-16 output " 747 "encodings. If set to \"auto\", this option causes Tidy to write a BOM to " 748 "the output only if a BOM was present at the beginning of the input. A BOM " 749 "is always written for XML/XHTML output using UTF-16 output encodings. " 750 }, 751#endif 752 {TidyOutCharEncoding, 753 "This option specifies the character encoding Tidy uses for the output. " 754 "See char-encoding for more info. May only be different from " 755 "input-encoding for Latin encodings (ascii, latin0, latin1, mac, win1252, " 756 "ibm858). " 757 ,TidyOutCharEncodingLinks 758 }, 759 {TidyNewline, 760 "The default is appropriate to the current platform: CRLF on PC-DOS, " 761 "MS-Windows and OS/2, CR on Classic Mac OS, and LF everywhere else " 762 "(Unix and Linux). " 763 }, 764 {TidyErrFile, 765 "This option specifies the error file Tidy uses for errors and warnings. " 766 "Normally errors and warnings are output to \"stderr\". " 767 ,TidyErrFileLinks 768 }, 769 {TidyFixBackslash, 770 "This option specifies if Tidy should replace backslash characters " 771 "\"<code>\\</code>\" in URLs by forward slashes \"<code>/</code>\". " 772 }, 773 {TidyForceOutput, 774 "This option specifies if Tidy should produce output even if errors are " 775 "encountered. Use this option with care - if Tidy reports an error, this " 776 "means Tidy was not able to, or is not sure how to, fix the error, so the " 777 "resulting output may not reflect your intention. " 778 }, 779 {TidyEmacs, 780 "This option specifies if Tidy should change the format for reporting " 781 "errors and warnings to a format that is more easily parsed by GNU Emacs. " 782 }, 783 {TidyEmacsFile, 784 "Used internally. " 785 }, 786 {TidyKeepFileTimes, 787 "This option specifies if Tidy should keep the original modification time " 788 "of files that Tidy modifies in place. The default is no. Setting the " 789 "option to yes allows you to tidy files without causing these files to be " 790 "uploaded to a web server when using a tool such as SiteCopy. Note this " 791 "feature is not supported on some platforms. " 792 }, 793 {TidyOutFile, 794 "This option specifies the output file Tidy uses for markup. Normally " 795 "markup is written to \"stdout\". " 796 ,TidyOutFileLinks 797 }, 798 {TidyQuiet, 799 "This option specifies if Tidy should output the summary of the numbers " 800 "of errors and warnings, or the welcome or informational messages. " 801 }, 802 {TidySlideStyle, 803 "Currently not used. Tidy Classic only. " 804 }, 805 {TidyMark, 806 "This option specifies if Tidy should add a meta element to the document " 807 "head to indicate that the document has been tidied. Tidy won't add a meta " 808 "element if one is already present. " 809 }, 810 {TidyWriteBack, 811 "This option specifies if Tidy should write back the tidied markup to the " 812 "same file it read from. You are advised to keep copies of important files " 813 "before tidying them, as on rare occasions the result may not be what you " 814 "expect. " 815 }, 816 {TidyDecorateInferredUL, 817 "This option specifies if Tidy should decorate inferred UL elements with " 818 "some CSS markup to avoid indentation to the right. " 819 }, 820 {N_TIDY_OPTIONS, 821 NULL 822 } 823}; 824 825const TidyOptionDoc* TY_(OptGetDocDesc)( TidyOptionId optId ) 826{ 827 uint i = 0; 828 829 while( option_docs[i].opt != N_TIDY_OPTIONS ) 830 { 831 if ( option_docs[i].opt == optId ) 832 return &option_docs[i]; 833 ++i; 834 } 835 return NULL; 836} 837 838 839static char* LevelPrefix( TidyReportLevel level, char* buf, size_t count ) 840{ 841 *buf = 0; 842 switch ( level ) 843 { 844 case TidyInfo: 845 TY_(tmbstrncpy)( buf, "Info: ", count ); 846 break; 847 case TidyWarning: 848 TY_(tmbstrncpy)( buf, "Warning: ", count ); 849 break; 850 case TidyConfig: 851 TY_(tmbstrncpy)( buf, "Config: ", count ); 852 break; 853 case TidyAccess: 854 TY_(tmbstrncpy)( buf, "Access: ", count ); 855 break; 856 case TidyError: 857 TY_(tmbstrncpy)( buf, "Error: ", count ); 858 break; 859 case TidyBadDocument: 860 TY_(tmbstrncpy)( buf, "Document: ", count ); 861 break; 862 case TidyFatal: 863 TY_(tmbstrncpy)( buf, "panic: ", count ); 864 break; 865 } 866 return buf + TY_(tmbstrlen)( buf ); 867} 868 869/* Updates document message counts and 870** compares counts to options to see if message 871** display should go forward. 872*/ 873static Bool UpdateCount( TidyDocImpl* doc, TidyReportLevel level ) 874{ 875 /* keep quiet after <ShowErrors> errors */ 876 Bool go = ( doc->errors < cfg(doc, TidyShowErrors) ); 877 878 switch ( level ) 879 { 880 case TidyInfo: 881 doc->infoMessages++; 882 break; 883 case TidyWarning: 884 doc->warnings++; 885 go = go && cfgBool( doc, TidyShowWarnings ); 886 break; 887 case TidyConfig: 888 doc->optionErrors++; 889 break; 890 case TidyAccess: 891 doc->accessErrors++; 892 break; 893 case TidyError: 894 doc->errors++; 895 break; 896 case TidyBadDocument: 897 doc->docErrors++; 898 break; 899 case TidyFatal: 900 /* Ack! */; 901 break; 902 } 903 904 return go; 905} 906 907static char* ReportPosition(TidyDocImpl* doc, int line, int col, char* buf, size_t count) 908{ 909 *buf = 0; 910 911 /* Change formatting to be parsable by GNU Emacs */ 912 if ( cfgBool(doc, TidyEmacs) && cfgStr(doc, TidyEmacsFile) ) 913 TY_(tmbsnprintf)(buf, count, "%s:%d:%d: ", 914 cfgStr(doc, TidyEmacsFile), line, col); 915 else /* traditional format */ 916 TY_(tmbsnprintf)(buf, count, "line %d column %d - ", line, col); 917 return buf + TY_(tmbstrlen)( buf ); 918} 919 920/* General message writing routine. 921** Each message is a single warning, error, etc. 922** 923** This routine will keep track of counts and, 924** if the caller has set a filter, it will be 925** called. The new preferred way of handling 926** Tidy diagnostics output is either a) define 927** a new output sink or b) install a message 928** filter routine. 929*/ 930 931static void messagePos( TidyDocImpl* doc, TidyReportLevel level, 932 int line, int col, ctmbstr msg, va_list args ) 933#ifdef __GNUC__ 934__attribute__((format(printf, 5, 0))) 935#endif 936; 937static void messagePos( TidyDocImpl* doc, TidyReportLevel level, 938 int line, int col, ctmbstr msg, va_list args ) 939{ 940 char messageBuf[ 2048 ]; 941 Bool go = UpdateCount( doc, level ); 942 943 if ( go ) 944 { 945 TY_(tmbvsnprintf)(messageBuf, sizeof(messageBuf), msg, args); 946 if ( doc->mssgFilt ) 947 { 948 TidyDoc tdoc = tidyImplToDoc( doc ); 949 go = doc->mssgFilt( tdoc, level, line, col, messageBuf ); 950 } 951 } 952 953 if ( go ) 954 { 955 char buf[ 64 ]; 956 const char *cp; 957 if ( line > 0 && col > 0 ) 958 { 959 ReportPosition(doc, line, col, buf, sizeof(buf)); 960 for ( cp = buf; *cp; ++cp ) 961 TY_(WriteChar)( *cp, doc->errout ); 962 } 963 964 LevelPrefix( level, buf, sizeof(buf) ); 965 for ( cp = buf; *cp; ++cp ) 966 TY_(WriteChar)( *cp, doc->errout ); 967 968 for ( cp = messageBuf; *cp; ++cp ) 969 TY_(WriteChar)( *cp, doc->errout ); 970 TY_(WriteChar)( '\n', doc->errout ); 971 } 972} 973 974/* Reports error at current Lexer line/column. */ 975static 976void message( TidyDocImpl* doc, TidyReportLevel level, ctmbstr msg, ... ) 977#ifdef __GNUC__ 978__attribute__((format(printf, 3, 4))) 979#endif 980; 981 982/* Reports error at node line/column. */ 983static 984void messageNode( TidyDocImpl* doc, TidyReportLevel level, 985 Node* node, ctmbstr msg, ... ) 986#ifdef __GNUC__ 987__attribute__((format(printf, 4, 5))) 988#endif 989; 990 991/* Reports error at given line/column. */ 992static 993void messageLexer( TidyDocImpl* doc, TidyReportLevel level, 994 ctmbstr msg, ... ) 995#ifdef __GNUC__ 996__attribute__((format(printf, 3, 4))) 997#endif 998; 999 1000/* For general reporting. Emits nothing if --quiet yes */ 1001/* Apple Changes: 1002 2007-02-07 iccir tidy_out needs to be exported for binary 1003 compatibility. Hence, it cannot be static. 1004 2007-02-20 iccir Removing .exp file, so tidy_out needs TIDY_EXPORT 1005*/ 1006#ifndef TIDY_APPLE_CHANGES 1007static 1008#else 1009TIDY_EXPORT 1010#endif 1011void tidy_out( TidyDocImpl* doc, ctmbstr msg, ... ) 1012#ifdef __GNUC__ 1013__attribute__((format(printf, 2, 3))) 1014#endif 1015; 1016 1017 1018void message( TidyDocImpl* doc, TidyReportLevel level, ctmbstr msg, ... ) 1019{ 1020 va_list args; 1021 va_start( args, msg ); 1022 messagePos( doc, level, 0, 0, msg, args ); 1023 va_end( args ); 1024} 1025 1026 1027void messageLexer( TidyDocImpl* doc, TidyReportLevel level, ctmbstr msg, ... ) 1028{ 1029 int line = ( doc->lexer ? doc->lexer->lines : 0 ); 1030 int col = ( doc->lexer ? doc->lexer->columns : 0 ); 1031 1032 va_list args; 1033 va_start( args, msg ); 1034 messagePos( doc, level, line, col, msg, args ); 1035 va_end( args ); 1036} 1037 1038void messageNode( TidyDocImpl* doc, TidyReportLevel level, Node* node, 1039 ctmbstr msg, ... ) 1040{ 1041 int line = ( node ? node->line : 1042 ( doc->lexer ? doc->lexer->lines : 0 ) ); 1043 int col = ( node ? node->column : 1044 ( doc->lexer ? doc->lexer->columns : 0 ) ); 1045 1046 va_list args; 1047 va_start( args, msg ); 1048 messagePos( doc, level, line, col, msg, args ); 1049 va_end( args ); 1050} 1051 1052void tidy_out( TidyDocImpl* doc, ctmbstr msg, ... ) 1053{ 1054 if ( !cfgBool(doc, TidyQuiet) ) 1055 { 1056 ctmbstr cp; 1057 char buf[ 2048 ]; 1058 1059 va_list args; 1060 va_start( args, msg ); 1061 TY_(tmbvsnprintf)(buf, sizeof(buf), msg, args); 1062 va_end( args ); 1063 1064 for ( cp=buf; *cp; ++cp ) 1065 TY_(WriteChar)( *cp, doc->errout ); 1066 } 1067} 1068 1069#if 0 1070void ShowVersion( TidyDocImpl* doc ) 1071{ 1072 ctmbstr platform = "", helper = ""; 1073 1074#ifdef PLATFORM_NAME 1075 platform = PLATFORM_NAME; 1076 helper = " for "; 1077#endif 1078 1079 tidy_out( doc, "\nHTML Tidy%s%s (release date: %s; built on %s, at %s)\n" 1080 "See http://tidy.sourceforge.net/ for details.\n", 1081 helper, platform, TY_(release_date), __DATE__, __TIME__ ); 1082} 1083#endif 1084 1085void TY_(FileError)( TidyDocImpl* doc, ctmbstr file, TidyReportLevel level ) 1086{ 1087 message( doc, level, "Can't open \"%s\"\n", file ); 1088} 1089 1090static char* TagToString(Node* tag, char* buf, size_t count) 1091{ 1092 *buf = 0; 1093 if (tag) 1094 { 1095 if (TY_(nodeIsElement)(tag)) 1096 TY_(tmbsnprintf)(buf, count, "<%s>", tag->element); 1097 else if (tag->type == EndTag) 1098 TY_(tmbsnprintf)(buf, count, "</%s>", tag->element); 1099 else if (tag->type == DocTypeTag) 1100 TY_(tmbsnprintf)(buf, count, "<!DOCTYPE>"); 1101 else if (tag->type == TextNode) 1102 TY_(tmbsnprintf)(buf, count, "plain text"); 1103 else if (tag->type == XmlDecl) 1104 TY_(tmbsnprintf)(buf, count, "XML declaration"); 1105 else if (tag->element) 1106 TY_(tmbsnprintf)(buf, count, "%s", tag->element); 1107 } 1108 return buf + TY_(tmbstrlen)(buf); 1109} 1110 1111/* lexer is not defined when this is called */ 1112void TY_(ReportUnknownOption)( TidyDocImpl* doc, ctmbstr option ) 1113{ 1114 assert( option != NULL ); 1115 message( doc, TidyConfig, "unknown option: %s", option ); 1116} 1117 1118/* lexer is not defined when this is called */ 1119void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option ) 1120{ 1121 assert( option != NULL ); 1122 message( doc, TidyConfig, 1123 "missing or malformed argument for option: %s", option ); 1124} 1125 1126static void NtoS(int n, tmbstr str) 1127{ 1128 tmbchar buf[40]; 1129 int i; 1130 1131 for (i = 0;; ++i) 1132 { 1133 buf[i] = (tmbchar)( (n % 10) + '0' ); 1134 1135 n = n / 10; 1136 1137 if (n == 0) 1138 break; 1139 } 1140 1141 n = i; 1142 1143 while (i >= 0) 1144 { 1145 str[n-i] = buf[i]; 1146 --i; 1147 } 1148 1149 str[n+1] = '\0'; 1150} 1151 1152void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding) 1153{ 1154 switch(code) 1155 { 1156 case ENCODING_MISMATCH: 1157 messageLexer(doc, TidyWarning, GetFormatFromCode(code), 1158 TY_(CharEncodingName)(doc->docIn->encoding), 1159 TY_(CharEncodingName)(encoding)); 1160 doc->badChars |= BC_ENCODING_MISMATCH; 1161 break; 1162 } 1163} 1164 1165void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded) 1166{ 1167 char buf[ 32 ] = {'\0'}; 1168 1169 ctmbstr action = discarded ? "discarding" : "replacing"; 1170 ctmbstr fmt = GetFormatFromCode(code); 1171 1172 /* An encoding mismatch is currently treated as a non-fatal error */ 1173 switch (code) 1174 { 1175 case VENDOR_SPECIFIC_CHARS: 1176 NtoS(c, buf); 1177 doc->badChars |= BC_VENDOR_SPECIFIC_CHARS; 1178 break; 1179 1180 case INVALID_SGML_CHARS: 1181 NtoS(c, buf); 1182 doc->badChars |= BC_INVALID_SGML_CHARS; 1183 break; 1184 1185 case INVALID_UTF8: 1186 TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c); 1187 doc->badChars |= BC_INVALID_UTF8; 1188 break; 1189 1190#if SUPPORT_UTF16_ENCODINGS 1191 case INVALID_UTF16: 1192 TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c); 1193 doc->badChars |= BC_INVALID_UTF16; 1194 break; 1195#endif 1196 1197 case INVALID_NCR: 1198 NtoS(c, buf); 1199 doc->badChars |= BC_INVALID_NCR; 1200 break; 1201 } 1202 1203 if (fmt) 1204 messageLexer( doc, TidyWarning, fmt, action, buf ); 1205} 1206 1207void TY_(ReportEntityError)( TidyDocImpl* doc, uint code, ctmbstr entity, 1208 int ARG_UNUSED(c) ) 1209{ 1210 ctmbstr entityname = ( entity ? entity : "NULL" ); 1211 ctmbstr fmt = GetFormatFromCode(code); 1212 1213 if (fmt) 1214 messageLexer( doc, TidyWarning, fmt, entityname ); 1215} 1216 1217void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code) 1218{ 1219 char const *name = "NULL", *value = "NULL"; 1220 char tagdesc[64]; 1221 ctmbstr fmt = GetFormatFromCode(code); 1222 1223 assert( fmt != NULL ); 1224 1225 TagToString(node, tagdesc, sizeof(tagdesc)); 1226 1227 if (av) 1228 { 1229 if (av->attribute) 1230 name = av->attribute; 1231 if (av->value) 1232 value = av->value; 1233 } 1234 1235 switch (code) 1236 { 1237 case UNKNOWN_ATTRIBUTE: 1238 case INSERTING_ATTRIBUTE: 1239 case MISSING_ATTR_VALUE: 1240 case XML_ATTRIBUTE_VALUE: 1241 case PROPRIETARY_ATTRIBUTE: 1242 case JOINING_ATTRIBUTE: 1243 messageNode(doc, TidyWarning, node, fmt, tagdesc, name); 1244 break; 1245 1246 case BAD_ATTRIBUTE_VALUE: 1247 case BAD_ATTRIBUTE_VALUE_REPLACED: 1248 case INVALID_ATTRIBUTE: 1249 messageNode(doc, TidyWarning, node, fmt, tagdesc, name, value); 1250 break; 1251 1252 case UNEXPECTED_QUOTEMARK: 1253 case MISSING_QUOTEMARK: 1254 case ID_NAME_MISMATCH: 1255 case BACKSLASH_IN_URI: 1256 case FIXED_BACKSLASH: 1257 case ILLEGAL_URI_REFERENCE: 1258 case ESCAPED_ILLEGAL_URI: 1259 case NEWLINE_IN_URI: 1260 case WHITE_IN_URI: 1261 case UNEXPECTED_GT: 1262 case INVALID_XML_ID: 1263 case UNEXPECTED_EQUALSIGN: 1264 messageNode(doc, TidyWarning, node, fmt, tagdesc); 1265 break; 1266 1267 case XML_ID_SYNTAX: 1268 case PROPRIETARY_ATTR_VALUE: 1269 case ANCHOR_NOT_UNIQUE: 1270 case ATTR_VALUE_NOT_LCASE: 1271 messageNode(doc, TidyWarning, node, fmt, tagdesc, value); 1272 break; 1273 1274 1275 case MISSING_IMAGEMAP: 1276 messageNode(doc, TidyWarning, node, fmt, tagdesc); 1277 doc->badAccess |= MISSING_IMAGE_MAP; 1278 break; 1279 1280 case REPEATED_ATTRIBUTE: 1281 messageNode(doc, TidyWarning, node, fmt, tagdesc, value, name); 1282 break; 1283 1284 case UNEXPECTED_END_OF_FILE_ATTR: 1285 /* on end of file adjust reported position to end of input */ 1286 doc->lexer->lines = doc->docIn->curline; 1287 doc->lexer->columns = doc->docIn->curcol; 1288 messageLexer(doc, TidyWarning, fmt, tagdesc); 1289 break; 1290 } 1291} 1292 1293void TY_(ReportMissingAttr)( TidyDocImpl* doc, Node* node, ctmbstr name ) 1294{ 1295 char tagdesc[ 64 ]; 1296 ctmbstr fmt = GetFormatFromCode(MISSING_ATTRIBUTE); 1297 1298 assert( fmt != NULL ); 1299 TagToString(node, tagdesc, sizeof(tagdesc)); 1300 messageNode( doc, TidyWarning, node, fmt, tagdesc, name ); 1301} 1302 1303#if SUPPORT_ACCESSIBILITY_CHECKS 1304 1305/********************************************************* 1306* Accessibility 1307* 1308* DisplayHTMLTableAlgorithm() 1309* 1310* If the table does contain 2 or more logical levels of 1311* row or column headers, the HTML 4 table algorithm 1312* to show the author how the headers are currently associated 1313* with the cells. 1314*********************************************************/ 1315 1316void TY_(DisplayHTMLTableAlgorithm)( TidyDocImpl* doc ) 1317{ 1318 tidy_out(doc, " \n"); 1319 tidy_out(doc, " - First, search left from the cell's position to find row header cells.\n"); 1320 tidy_out(doc, " - Then search upwards to find column header cells.\n"); 1321 tidy_out(doc, " - The search in a given direction stops when the edge of the table is\n"); 1322 tidy_out(doc, " reached or when a data cell is found after a header cell.\n"); 1323 tidy_out(doc, " - Row headers are inserted into the list in the order they appear in\n"); 1324 tidy_out(doc, " the table. \n"); 1325 tidy_out(doc, " - For left-to-right tables, headers are inserted from left to right.\n"); 1326 tidy_out(doc, " - Column headers are inserted after row headers, in \n"); 1327 tidy_out(doc, " the order they appear in the table, from top to bottom. \n"); 1328 tidy_out(doc, " - If a header cell has the headers attribute set, then the headers \n"); 1329 tidy_out(doc, " referenced by this attribute are inserted into the list and the \n"); 1330 tidy_out(doc, " search stops for the current direction.\n"); 1331 tidy_out(doc, " TD cells that set the axis attribute are also treated as header cells.\n"); 1332 tidy_out(doc, " \n"); 1333} 1334 1335void TY_(ReportAccessWarning)( TidyDocImpl* doc, Node* node, uint code ) 1336{ 1337 ctmbstr fmt = GetFormatFromCode(code); 1338 doc->badAccess = yes; 1339 messageNode( doc, TidyAccess, node, fmt ); 1340} 1341 1342void TY_(ReportAccessError)( TidyDocImpl* doc, Node* node, uint code ) 1343{ 1344 ctmbstr fmt = GetFormatFromCode(code); 1345 doc->badAccess = yes; 1346 messageNode( doc, TidyAccess, node, fmt ); 1347} 1348 1349#endif /* SUPPORT_ACCESSIBILITY_CHECKS */ 1350 1351void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code) 1352{ 1353 Node* rpt = (element ? element : node); 1354 ctmbstr fmt = GetFormatFromCode(code); 1355 char nodedesc[256] = { 0 }; 1356 char elemdesc[256] = { 0 }; 1357 1358 assert( fmt != NULL ); 1359 1360 TagToString(node, nodedesc, sizeof(nodedesc)); 1361 1362 switch (code) 1363 { 1364 case NESTED_QUOTATION: 1365 messageNode(doc, TidyWarning, rpt, fmt); 1366 break; 1367 1368 case OBSOLETE_ELEMENT: 1369 TagToString(element, elemdesc, sizeof(elemdesc)); 1370 messageNode(doc, TidyWarning, rpt, fmt, elemdesc, nodedesc); 1371 break; 1372 1373 case NESTED_EMPHASIS: 1374 messageNode(doc, TidyWarning, rpt, fmt, nodedesc); 1375 break; 1376 case COERCE_TO_ENDTAG_WARN: 1377 messageNode(doc, TidyWarning, rpt, fmt, node->element, node->element); 1378 break; 1379 } 1380} 1381 1382void TY_(ReportNotice)(TidyDocImpl* doc, Node *element, Node *node, uint code) 1383{ 1384 Node* rpt = ( element ? element : node ); 1385 ctmbstr fmt = GetFormatFromCode(code); 1386 char nodedesc[256] = { 0 }; 1387 char elemdesc[256] = { 0 }; 1388 1389 assert( fmt != NULL ); 1390 1391 TagToString(node, nodedesc, sizeof(nodedesc)); 1392 1393 switch (code) 1394 { 1395 case TRIM_EMPTY_ELEMENT: 1396 TagToString(element, elemdesc, sizeof(nodedesc)); 1397 messageNode(doc, TidyWarning, element, fmt, elemdesc); 1398 break; 1399 1400 case REPLACING_ELEMENT: 1401 TagToString(element, elemdesc, sizeof(elemdesc)); 1402 messageNode(doc, TidyWarning, rpt, fmt, elemdesc, nodedesc); 1403 break; 1404 } 1405} 1406 1407void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code) 1408{ 1409 char nodedesc[ 256 ] = {0}; 1410 char elemdesc[ 256 ] = {0}; 1411 Node* rpt = ( element ? element : node ); 1412 ctmbstr fmt = GetFormatFromCode(code); 1413 1414 assert( fmt != NULL ); 1415 1416 TagToString(node, nodedesc, sizeof(nodedesc)); 1417 1418 switch ( code ) 1419 { 1420 case MISSING_STARTTAG: 1421 case UNEXPECTED_ENDTAG: 1422 case TOO_MANY_ELEMENTS: 1423 case INSERTING_TAG: 1424 messageNode(doc, TidyWarning, node, fmt, node->element); 1425 break; 1426 1427 case USING_BR_INPLACE_OF: 1428 case CANT_BE_NESTED: 1429 case PROPRIETARY_ELEMENT: 1430 case UNESCAPED_ELEMENT: 1431 case NOFRAMES_CONTENT: 1432 messageNode(doc, TidyWarning, node, fmt, nodedesc); 1433 break; 1434 1435 case MISSING_TITLE_ELEMENT: 1436 case INCONSISTENT_VERSION: 1437 case MALFORMED_DOCTYPE: 1438 case CONTENT_AFTER_BODY: 1439 case MALFORMED_COMMENT: 1440 case BAD_COMMENT_CHARS: 1441 case BAD_XML_COMMENT: 1442 case BAD_CDATA_CONTENT: 1443 case INCONSISTENT_NAMESPACE: 1444 case DOCTYPE_AFTER_TAGS: 1445 case DTYPE_NOT_UPPER_CASE: 1446 messageNode(doc, TidyWarning, rpt, fmt); 1447 break; 1448 1449 case COERCE_TO_ENDTAG: 1450 case NON_MATCHING_ENDTAG: 1451 messageNode(doc, TidyWarning, rpt, fmt, node->element, node->element); 1452 break; 1453 1454 case UNEXPECTED_ENDTAG_IN: 1455 case TOO_MANY_ELEMENTS_IN: 1456 messageNode(doc, TidyWarning, node, fmt, node->element, element->element); 1457 break; 1458 1459 case ENCODING_IO_CONFLICT: 1460 case MISSING_DOCTYPE: 1461 case SPACE_PRECEDING_XMLDECL: 1462 messageNode(doc, TidyWarning, node, fmt); 1463 break; 1464 1465 case TRIM_EMPTY_ELEMENT: 1466 case ILLEGAL_NESTING: 1467 case UNEXPECTED_END_OF_FILE: 1468 case ELEMENT_NOT_EMPTY: 1469 TagToString(element, elemdesc, sizeof(elemdesc)); 1470 messageNode(doc, TidyWarning, element, fmt, elemdesc); 1471 break; 1472 1473 1474 case MISSING_ENDTAG_FOR: 1475 messageNode(doc, TidyWarning, rpt, fmt, element->element); 1476 break; 1477 1478 case MISSING_ENDTAG_BEFORE: 1479 messageNode(doc, TidyWarning, rpt, fmt, element->element, nodedesc); 1480 break; 1481 1482 case DISCARDING_UNEXPECTED: 1483 /* Force error if in a bad form */ 1484 messageNode(doc, doc->badForm ? TidyError : TidyWarning, node, fmt, nodedesc); 1485 break; 1486 1487 case TAG_NOT_ALLOWED_IN: 1488 messageNode(doc, TidyWarning, rpt, fmt, nodedesc, element->element); 1489 break; 1490 1491 case REPLACING_UNEX_ELEMENT: 1492 TagToString(element, elemdesc, sizeof(elemdesc)); 1493 messageNode(doc, TidyWarning, rpt, fmt, elemdesc, nodedesc); 1494 break; 1495 } 1496} 1497 1498void TY_(ReportFatal)( TidyDocImpl* doc, Node *element, Node *node, uint code) 1499{ 1500 char nodedesc[ 256 ] = {0}; 1501 Node* rpt = ( element ? element : node ); 1502 ctmbstr fmt = GetFormatFromCode(code); 1503 1504 switch ( code ) 1505 { 1506 case SUSPECTED_MISSING_QUOTE: 1507 case DUPLICATE_FRAMESET: 1508 messageNode(doc, TidyError, rpt, fmt); 1509 break; 1510 1511 case UNKNOWN_ELEMENT: 1512 TagToString(node, nodedesc, sizeof(nodedesc)); 1513 messageNode( doc, TidyError, node, fmt, nodedesc ); 1514 break; 1515 1516 case UNEXPECTED_ENDTAG_IN: 1517 messageNode(doc, TidyError, node, fmt, node->element, element->element); 1518 break; 1519 1520 case UNEXPECTED_ENDTAG: /* generated by XML docs */ 1521 messageNode(doc, TidyError, node, fmt, node->element); 1522 break; 1523 } 1524} 1525 1526void TY_(ErrorSummary)( TidyDocImpl* doc ) 1527{ 1528 /* adjust badAccess to that its NULL if frames are ok */ 1529 ctmbstr encnam = "specified"; 1530 int charenc = cfg( doc, TidyCharEncoding ); 1531 if ( charenc == WIN1252 ) 1532 encnam = "Windows-1252"; 1533 else if ( charenc == MACROMAN ) 1534 encnam = "MacRoman"; 1535 else if ( charenc == IBM858 ) 1536 encnam = "ibm858"; 1537 else if ( charenc == LATIN0 ) 1538 encnam = "latin0"; 1539 1540 if ( doc->badAccess & (USING_FRAMES | USING_NOFRAMES) ) 1541 { 1542 if (!((doc->badAccess & USING_FRAMES) && !(doc->badAccess & USING_NOFRAMES))) 1543 doc->badAccess &= ~(USING_FRAMES | USING_NOFRAMES); 1544 } 1545 1546 if (doc->badChars) 1547 { 1548#if 0 1549 if ( doc->badChars & WINDOWS_CHARS ) 1550 { 1551 tidy_out(doc, "Characters codes for the Microsoft Windows fonts in the range\n"); 1552 tidy_out(doc, "128 - 159 may not be recognized on other platforms. You are\n"); 1553 tidy_out(doc, "instead recommended to use named entities, e.g. ™ rather\n"); 1554 tidy_out(doc, "than Windows character code 153 (0x2122 in Unicode). Note that\n"); 1555 tidy_out(doc, "as of February 1998 few browsers support the new entities.\n\n"); 1556 } 1557#endif 1558 if (doc->badChars & BC_VENDOR_SPECIFIC_CHARS) 1559 { 1560 1561 tidy_out(doc, "It is unlikely that vendor-specific, system-dependent encodings\n"); 1562 tidy_out(doc, "work widely enough on the World Wide Web; you should avoid using the \n"); 1563 tidy_out(doc, "%s", encnam ); 1564 tidy_out(doc, " character encoding, instead you are recommended to\n" ); 1565 tidy_out(doc, "use named entities, e.g. ™.\n\n"); 1566 } 1567 if ((doc->badChars & BC_INVALID_SGML_CHARS) || (doc->badChars & BC_INVALID_NCR)) 1568 { 1569 tidy_out(doc, "Character codes 128 to 159 (U+0080 to U+009F) are not allowed in HTML;\n"); 1570 tidy_out(doc, "even if they were, they would likely be unprintable control characters.\n"); 1571 tidy_out(doc, "Tidy assumed you wanted to refer to a character with the same byte value in the \n"); 1572 tidy_out(doc, "%s", encnam ); 1573 tidy_out(doc, " encoding and replaced that reference with the Unicode equivalent.\n\n" ); 1574 } 1575 if (doc->badChars & BC_INVALID_UTF8) 1576 { 1577 tidy_out(doc, "Character codes for UTF-8 must be in the range: U+0000 to U+10FFFF.\n"); 1578 tidy_out(doc, "The definition of UTF-8 in Annex D of ISO/IEC 10646-1:2000 also\n"); 1579 tidy_out(doc, "allows for the use of five- and six-byte sequences to encode\n"); 1580 tidy_out(doc, "characters that are outside the range of the Unicode character set;\n"); 1581 tidy_out(doc, "those five- and six-byte sequences are illegal for the use of\n"); 1582 tidy_out(doc, "UTF-8 as a transformation of Unicode characters. ISO/IEC 10646\n"); 1583 tidy_out(doc, "does not allow mapping of unpaired surrogates, nor U+FFFE and U+FFFF\n"); 1584 tidy_out(doc, "(but it does allow other noncharacters). For more information please refer to\n"); 1585 tidy_out(doc, "http://www.unicode.org/unicode and http://www.cl.cam.ac.uk/~mgk25/unicode.html\n\n"); 1586 } 1587 1588#if SUPPORT_UTF16_ENCODINGS 1589 1590 if (doc->badChars & BC_INVALID_UTF16) 1591 { 1592 tidy_out(doc, "Character codes for UTF-16 must be in the range: U+0000 to U+10FFFF.\n"); 1593 tidy_out(doc, "The definition of UTF-16 in Annex C of ISO/IEC 10646-1:2000 does not allow the\n"); 1594 tidy_out(doc, "mapping of unpaired surrogates. For more information please refer to\n"); 1595 tidy_out(doc, "http://www.unicode.org/unicode and http://www.cl.cam.ac.uk/~mgk25/unicode.html\n\n"); 1596 } 1597 1598#endif 1599 1600 if (doc->badChars & BC_INVALID_URI) 1601 { 1602 tidy_out(doc, "URIs must be properly escaped, they must not contain unescaped\n"); 1603 tidy_out(doc, "characters below U+0021 including the space character and not\n"); 1604 tidy_out(doc, "above U+007E. Tidy escapes the URI for you as recommended by\n"); 1605 tidy_out(doc, "HTML 4.01 section B.2.1 and XML 1.0 section 4.2.2. Some user agents\n"); 1606 tidy_out(doc, "use another algorithm to escape such URIs and some server-sided\n"); 1607 tidy_out(doc, "scripts depend on that. If you want to depend on that, you must\n"); 1608 tidy_out(doc, "escape the URI by your own. For more information please refer to\n"); 1609 tidy_out(doc, "http://www.w3.org/International/O-URL-and-ident.html\n\n"); 1610 } 1611 } 1612 1613 if (doc->badForm) 1614 { 1615 tidy_out(doc, "You may need to move one or both of the <form> and </form>\n"); 1616 tidy_out(doc, "tags. HTML elements should be properly nested and form elements\n"); 1617 tidy_out(doc, "are no exception. For instance you should not place the <form>\n"); 1618 tidy_out(doc, "in one table cell and the </form> in another. If the <form> is\n"); 1619 tidy_out(doc, "placed before a table, the </form> cannot be placed inside the\n"); 1620 tidy_out(doc, "table! Note that one form can't be nested inside another!\n\n"); 1621 } 1622 1623 if (doc->badAccess) 1624 { 1625 if ( cfg(doc, TidyAccessibilityCheckLevel) > 0 ) 1626 { 1627 tidy_out(doc, "For further advice on how to make your pages accessible, see\n"); 1628 tidy_out(doc, "%s", ACCESS_URL ); 1629 tidy_out(doc, " and\n" ); 1630 tidy_out(doc, "%s", ATRC_ACCESS_URL ); 1631 tidy_out(doc, ".\n" ); 1632 tidy_out(doc, "You may also want to try \"http://www.cast.org/bobby/\" which is a free Web-based\n"); 1633 tidy_out(doc, "service for checking URLs for accessibility.\n\n"); 1634 } 1635 else 1636 { 1637 if (doc->badAccess & MISSING_SUMMARY) 1638 { 1639 tidy_out(doc, "The table summary attribute should be used to describe\n"); 1640 tidy_out(doc, "the table structure. It is very helpful for people using\n"); 1641 tidy_out(doc, "non-visual browsers. The scope and headers attributes for\n"); 1642 tidy_out(doc, "table cells are useful for specifying which headers apply\n"); 1643 tidy_out(doc, "to each table cell, enabling non-visual browsers to provide\n"); 1644 tidy_out(doc, "a meaningful context for each cell.\n\n"); 1645 } 1646 1647 if (doc->badAccess & MISSING_IMAGE_ALT) 1648 { 1649 tidy_out(doc, "The alt attribute should be used to give a short description\n"); 1650 tidy_out(doc, "of an image; longer descriptions should be given with the\n"); 1651 tidy_out(doc, "longdesc attribute which takes a URL linked to the description.\n"); 1652 tidy_out(doc, "These measures are needed for people using non-graphical browsers.\n\n"); 1653 } 1654 1655 if (doc->badAccess & MISSING_IMAGE_MAP) 1656 { 1657 tidy_out(doc, "Use client-side image maps in preference to server-side image\n"); 1658 tidy_out(doc, "maps as the latter are inaccessible to people using non-\n"); 1659 tidy_out(doc, "graphical browsers. In addition, client-side maps are easier\n"); 1660 tidy_out(doc, "to set up and provide immediate feedback to users.\n\n"); 1661 } 1662 1663 if (doc->badAccess & MISSING_LINK_ALT) 1664 { 1665 tidy_out(doc, "For hypertext links defined using a client-side image map, you\n"); 1666 tidy_out(doc, "need to use the alt attribute to provide a textual description\n"); 1667 tidy_out(doc, "of the link for people using non-graphical browsers.\n\n"); 1668 } 1669 1670 if ((doc->badAccess & USING_FRAMES) && !(doc->badAccess & USING_NOFRAMES)) 1671 { 1672 tidy_out(doc, "Pages designed using frames presents problems for\n"); 1673 tidy_out(doc, "people who are either blind or using a browser that\n"); 1674 tidy_out(doc, "doesn't support frames. A frames-based page should always\n"); 1675 tidy_out(doc, "include an alternative layout inside a NOFRAMES element.\n\n"); 1676 } 1677 1678 tidy_out(doc, "For further advice on how to make your pages accessible\n"); 1679 tidy_out(doc, "see " ); 1680 tidy_out(doc, ACCESS_URL ); 1681 tidy_out(doc, ". You may also want to try\n" ); 1682 tidy_out(doc, "\"http://www.cast.org/bobby/\" which is a free Web-based\n"); 1683 tidy_out(doc, "service for checking URLs for accessibility.\n\n"); 1684 } 1685 } 1686 1687 if (doc->badLayout) 1688 { 1689 if (doc->badLayout & USING_LAYER) 1690 { 1691 tidy_out(doc, "The Cascading Style Sheets (CSS) Positioning mechanism\n"); 1692 tidy_out(doc, "is recommended in preference to the proprietary <LAYER>\n"); 1693 tidy_out(doc, "element due to limited vendor support for LAYER.\n\n"); 1694 } 1695 1696 if (doc->badLayout & USING_SPACER) 1697 { 1698 tidy_out(doc, "You are recommended to use CSS for controlling white\n"); 1699 tidy_out(doc, "space (e.g. for indentation, margins and line spacing).\n"); 1700 tidy_out(doc, "The proprietary <SPACER> element has limited vendor support.\n\n"); 1701 } 1702 1703 if (doc->badLayout & USING_FONT) 1704 { 1705 tidy_out(doc, "You are recommended to use CSS to specify the font and\n"); 1706 tidy_out(doc, "properties such as its size and color. This will reduce\n"); 1707 tidy_out(doc, "the size of HTML files and make them easier to maintain\n"); 1708 tidy_out(doc, "compared with using <FONT> elements.\n\n"); 1709 } 1710 1711 if (doc->badLayout & USING_NOBR) 1712 { 1713 tidy_out(doc, "You are recommended to use CSS to control line wrapping.\n"); 1714 tidy_out(doc, "Use \"white-space: nowrap\" to inhibit wrapping in place\n"); 1715 tidy_out(doc, "of inserting <NOBR>...</NOBR> into the markup.\n\n"); 1716 } 1717 1718 if (doc->badLayout & USING_BODY) 1719 { 1720 tidy_out(doc, "You are recommended to use CSS to specify page and link colors\n"); 1721 } 1722 } 1723} 1724 1725#if 0 1726void TY_(UnknownOption)( TidyDocImpl* doc, char c ) 1727{ 1728 message( doc, TidyConfig, 1729 "unrecognized option -%c use -help to list options\n", c ); 1730} 1731 1732void TY_(UnknownFile)( TidyDocImpl* doc, ctmbstr program, ctmbstr file ) 1733{ 1734 message( doc, TidyConfig, 1735 "%s: can't open file \"%s\"\n", program, file ); 1736} 1737#endif 1738 1739void TY_(NeedsAuthorIntervention)( TidyDocImpl* doc ) 1740{ 1741 tidy_out(doc, "This document has errors that must be fixed before\n"); 1742 tidy_out(doc, "using HTML Tidy to generate a tidied up version.\n\n"); 1743} 1744 1745void TY_(GeneralInfo)( TidyDocImpl* doc ) 1746{ 1747 tidy_out(doc, "To learn more about HTML Tidy see http://tidy.sourceforge.net\n"); 1748 tidy_out(doc, "Please send bug reports to html-tidy@w3.org\n"); 1749 tidy_out(doc, "HTML and CSS specifications are available from http://www.w3.org/\n"); 1750 tidy_out(doc, "Lobby your company to join W3C, see http://www.w3.org/Consortium\n"); 1751} 1752 1753#if SUPPORT_ACCESSIBILITY_CHECKS 1754 1755void TY_(AccessibilityHelloMessage)( TidyDocImpl* doc ) 1756{ 1757 tidy_out( doc, "\n" ); 1758 tidy_out( doc, "Accessibility Checks: Version 0.1\n" ); 1759 tidy_out( doc, "\n" ); 1760} 1761 1762#endif /* SUPPORT_ACCESSIBILITY_CHECKS */ 1763 1764#if 0 1765void TY_(HelloMessage)( TidyDocImpl* doc, ctmbstr date, ctmbstr filename ) 1766{ 1767 tmbchar buf[ 2048 ]; 1768 ctmbstr platform = "", helper = ""; 1769 ctmbstr msgfmt = "\nHTML Tidy for %s (vers %s; built on %s, at %s)\n" 1770 "Parsing \"%s\"\n"; 1771 1772#ifdef PLATFORM_NAME 1773 platform = PLATFORM_NAME; 1774 helper = " for "; 1775#endif 1776 1777 if ( TY_(tmbstrcmp)(filename, "stdin") == 0 ) 1778 { 1779 /* Filename will be ignored at end of varargs */ 1780 msgfmt = "\nHTML Tidy for %s (vers %s; built on %s, at %s)\n" 1781 "Parsing console input (stdin)\n"; 1782 } 1783 1784 TY_(tmbsnprintf)(buf, sizeof(buf), msgfmt, helper, platform, 1785 date, __DATE__, __TIME__, filename); 1786 tidy_out( doc, buf ); 1787} 1788#endif 1789 1790void TY_(ReportMarkupVersion)( TidyDocImpl* doc ) 1791{ 1792 if (doc->givenDoctype) 1793 { 1794 /* todo: deal with non-ASCII characters in FPI */ 1795 message(doc, TidyInfo, "Doctype given is \"%s\"", doc->givenDoctype); 1796 } 1797 1798 if ( ! cfgBool(doc, TidyXmlTags) ) 1799 { 1800 Bool isXhtml = doc->lexer->isvoyager; 1801 uint apparentVers; 1802 ctmbstr vers; 1803 1804 apparentVers = TY_(ApparentVersion)( doc ); 1805 1806 vers = TY_(HTMLVersionNameFromCode)( apparentVers, isXhtml ); 1807 1808 if (!vers) 1809 vers = "HTML Proprietary"; 1810 1811 message( doc, TidyInfo, "Document content looks like %s", vers ); 1812 1813 /* Warn about missing sytem identifier (SI) in emitted doctype */ 1814 if ( TY_(WarnMissingSIInEmittedDocType)( doc ) ) 1815 message( doc, TidyInfo, "No system identifier in emitted doctype" ); 1816 } 1817} 1818 1819void TY_(ReportNumWarnings)( TidyDocImpl* doc ) 1820{ 1821 if ( doc->warnings > 0 || doc->errors > 0 ) 1822 { 1823 tidy_out( doc, "%u %s, %u %s were found!", 1824 doc->warnings, doc->warnings == 1 ? "warning" : "warnings", 1825 doc->errors, doc->errors == 1 ? "error" : "errors" ); 1826 1827 if ( doc->errors > cfg(doc, TidyShowErrors) || 1828 !cfgBool(doc, TidyShowWarnings) ) 1829 tidy_out( doc, " Not all warnings/errors were shown.\n\n" ); 1830 else 1831 tidy_out( doc, "\n\n" ); 1832 } 1833 else 1834 tidy_out( doc, "No warnings or errors were found.\n\n" ); 1835} 1836 1837/* 1838 * local variables: 1839 * mode: c 1840 * indent-tabs-mode: nil 1841 * c-basic-offset: 4 1842 * eval: (c-set-offset 'substatement-open 0) 1843 * end: 1844 */ 1845