1/* 2 * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. 3 */ 4/* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21package com.sun.org.apache.xml.internal.serializer; 22 23import java.io.IOException; 24import java.util.Properties; 25 26import javax.xml.transform.Result; 27 28import org.xml.sax.Attributes; 29import org.xml.sax.SAXException; 30 31import com.sun.org.apache.xml.internal.serializer.utils.MsgKey; 32import com.sun.org.apache.xml.internal.serializer.utils.Utils; 33 34/** 35 * This serializer takes a series of SAX or 36 * SAX-like events and writes its output 37 * to the given stream. 38 * 39 * This class is not a public API, it is public 40 * because it is used from another package. 41 * 42 * @xsl.usage internal 43 */ 44public final class ToHTMLStream extends ToStream 45{ 46 47 /** This flag is set while receiving events from the DTD */ 48 protected boolean m_inDTD = false; 49 50 /** True if the previous element is a block element. */ 51 private boolean m_isprevblock = false; 52 53 /** 54 * Map that tells which XML characters should have special treatment, and it 55 * provides character to entity name lookup. 56 */ 57 private static final CharInfo m_htmlcharInfo = 58// new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE); 59 CharInfo.getCharInfoInternal(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML); 60 61 /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */ 62 static final Trie m_elementFlags = new Trie(); 63 64 static { 65 initTagReference(m_elementFlags); 66 } 67 static void initTagReference(Trie m_elementFlags) { 68 69 // HTML 4.0 loose DTD 70 m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY)); 71 m_elementFlags.put( 72 "FRAME", 73 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 74 m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK)); 75 m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK)); 76 m_elementFlags.put( 77 "ISINDEX", 78 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 79 m_elementFlags.put( 80 "APPLET", 81 new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE)); 82 m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK)); 83 m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK)); 84 m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK)); 85 86 // HTML 4.0 strict DTD 87 m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 88 m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 89 m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 90 m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 91 m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 92 m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE)); 93 m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE)); 94 m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE)); 95 m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE)); 96 m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE)); 97 m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE)); 98 m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE)); 99 m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE)); 100 m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE)); 101 m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE)); 102 m_elementFlags.put( 103 "SUP", 104 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 105 m_elementFlags.put( 106 "SUB", 107 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 108 m_elementFlags.put( 109 "SPAN", 110 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 111 m_elementFlags.put( 112 "BDO", 113 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 114 m_elementFlags.put( 115 "BR", 116 new ElemDesc( 117 0 118 | ElemDesc.SPECIAL 119 | ElemDesc.ASPECIAL 120 | ElemDesc.EMPTY 121 | ElemDesc.BLOCK)); 122 m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK)); 123 m_elementFlags.put( 124 "ADDRESS", 125 new ElemDesc( 126 0 127 | ElemDesc.BLOCK 128 | ElemDesc.BLOCKFORM 129 | ElemDesc.BLOCKFORMFIELDSET)); 130 m_elementFlags.put( 131 "DIV", 132 new ElemDesc( 133 0 134 | ElemDesc.BLOCK 135 | ElemDesc.BLOCKFORM 136 | ElemDesc.BLOCKFORMFIELDSET)); 137 m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL)); 138 m_elementFlags.put( 139 "MAP", 140 new ElemDesc( 141 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK)); 142 m_elementFlags.put( 143 "AREA", 144 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 145 m_elementFlags.put( 146 "LINK", 147 new ElemDesc( 148 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); 149 m_elementFlags.put( 150 "IMG", 151 new ElemDesc( 152 0 153 | ElemDesc.SPECIAL 154 | ElemDesc.ASPECIAL 155 | ElemDesc.EMPTY 156 | ElemDesc.WHITESPACESENSITIVE)); 157 m_elementFlags.put( 158 "OBJECT", 159 new ElemDesc( 160 0 161 | ElemDesc.SPECIAL 162 | ElemDesc.ASPECIAL 163 | ElemDesc.HEADMISC 164 | ElemDesc.WHITESPACESENSITIVE)); 165 m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY)); 166 m_elementFlags.put( 167 "HR", 168 new ElemDesc( 169 0 170 | ElemDesc.BLOCK 171 | ElemDesc.BLOCKFORM 172 | ElemDesc.BLOCKFORMFIELDSET 173 | ElemDesc.EMPTY)); 174 m_elementFlags.put( 175 "P", 176 new ElemDesc( 177 0 178 | ElemDesc.BLOCK 179 | ElemDesc.BLOCKFORM 180 | ElemDesc.BLOCKFORMFIELDSET)); 181 m_elementFlags.put( 182 "H1", 183 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 184 m_elementFlags.put( 185 "H2", 186 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 187 m_elementFlags.put( 188 "H3", 189 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 190 m_elementFlags.put( 191 "H4", 192 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 193 m_elementFlags.put( 194 "H5", 195 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 196 m_elementFlags.put( 197 "H6", 198 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 199 m_elementFlags.put( 200 "PRE", 201 new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK)); 202 m_elementFlags.put( 203 "Q", 204 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 205 m_elementFlags.put( 206 "BLOCKQUOTE", 207 new ElemDesc( 208 0 209 | ElemDesc.BLOCK 210 | ElemDesc.BLOCKFORM 211 | ElemDesc.BLOCKFORMFIELDSET)); 212 m_elementFlags.put("INS", new ElemDesc(0)); 213 m_elementFlags.put("DEL", new ElemDesc(0)); 214 m_elementFlags.put( 215 "DL", 216 new ElemDesc( 217 0 218 | ElemDesc.BLOCK 219 | ElemDesc.BLOCKFORM 220 | ElemDesc.BLOCKFORMFIELDSET)); 221 m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK)); 222 m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK)); 223 m_elementFlags.put( 224 "OL", 225 new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); 226 m_elementFlags.put( 227 "UL", 228 new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); 229 m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK)); 230 m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK)); 231 m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL)); 232 m_elementFlags.put( 233 "INPUT", 234 new ElemDesc( 235 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY)); 236 m_elementFlags.put( 237 "SELECT", 238 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 239 m_elementFlags.put("OPTGROUP", new ElemDesc(0)); 240 m_elementFlags.put("OPTION", new ElemDesc(0)); 241 m_elementFlags.put( 242 "TEXTAREA", 243 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 244 m_elementFlags.put( 245 "FIELDSET", 246 new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM)); 247 m_elementFlags.put("LEGEND", new ElemDesc(0)); 248 m_elementFlags.put( 249 "BUTTON", 250 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 251 m_elementFlags.put( 252 "TABLE", 253 new ElemDesc( 254 0 255 | ElemDesc.BLOCK 256 | ElemDesc.BLOCKFORM 257 | ElemDesc.BLOCKFORMFIELDSET)); 258 m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK)); 259 m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK)); 260 m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK)); 261 m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK)); 262 m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK)); 263 m_elementFlags.put( 264 "COL", 265 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 266 m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK)); 267 m_elementFlags.put("TH", new ElemDesc(0)); 268 m_elementFlags.put("TD", new ElemDesc(0)); 269 m_elementFlags.put( 270 "HEAD", 271 new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM)); 272 m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK)); 273 m_elementFlags.put( 274 "BASE", 275 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 276 m_elementFlags.put( 277 "META", 278 new ElemDesc( 279 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); 280 m_elementFlags.put( 281 "STYLE", 282 new ElemDesc( 283 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK)); 284 m_elementFlags.put( 285 "SCRIPT", 286 new ElemDesc( 287 0 288 | ElemDesc.SPECIAL 289 | ElemDesc.ASPECIAL 290 | ElemDesc.HEADMISC 291 | ElemDesc.RAW)); 292 m_elementFlags.put( 293 "NOSCRIPT", 294 new ElemDesc( 295 0 296 | ElemDesc.BLOCK 297 | ElemDesc.BLOCKFORM 298 | ElemDesc.BLOCKFORMFIELDSET)); 299 m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK)); 300 301 // From "John Ky" <hand@syd.speednet.com.au 302 // Transitional Document Type Definition () 303 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont 304 m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 305 306 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE 307 m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 308 m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 309 310 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U 311 m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 312 313 // From "John Ky" <hand@syd.speednet.com.au 314 m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 315 316 // HTML 4.0, section 16.5 317 m_elementFlags.put( 318 "IFRAME", 319 new ElemDesc( 320 0 321 | ElemDesc.BLOCK 322 | ElemDesc.BLOCKFORM 323 | ElemDesc.BLOCKFORMFIELDSET)); 324 325 // Netscape 4 extension 326 m_elementFlags.put( 327 "LAYER", 328 new ElemDesc( 329 0 330 | ElemDesc.BLOCK 331 | ElemDesc.BLOCKFORM 332 | ElemDesc.BLOCKFORMFIELDSET)); 333 // Netscape 4 extension 334 m_elementFlags.put( 335 "ILAYER", 336 new ElemDesc( 337 0 338 | ElemDesc.BLOCK 339 | ElemDesc.BLOCKFORM 340 | ElemDesc.BLOCKFORMFIELDSET)); 341 342 343 // NOW FOR ATTRIBUTE INFORMATION . . . 344 ElemDesc elemDesc; 345 346 347 // ---------------------------------------------- 348 elemDesc = (ElemDesc) m_elementFlags.get("a"); 349 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 350 elemDesc.setAttr("NAME", ElemDesc.ATTRURL); 351 352 // ---------------------------------------------- 353 elemDesc = (ElemDesc) m_elementFlags.get("area"); 354 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 355 elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY); 356 357 // ---------------------------------------------- 358 elemDesc = (ElemDesc) m_elementFlags.get("base"); 359 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 360 361 // ---------------------------------------------- 362 elemDesc = (ElemDesc) m_elementFlags.get("button"); 363 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 364 365 // ---------------------------------------------- 366 elemDesc = (ElemDesc) m_elementFlags.get("blockquote"); 367 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 368 369 // ---------------------------------------------- 370 elemDesc = (ElemDesc) m_elementFlags.get("del"); 371 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 372 373 // ---------------------------------------------- 374 elemDesc = (ElemDesc) m_elementFlags.get("dir"); 375 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 376 377 // ---------------------------------------------- 378 379 elemDesc = (ElemDesc) m_elementFlags.get("div"); 380 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension 381 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension 382 383 // ---------------------------------------------- 384 elemDesc = (ElemDesc) m_elementFlags.get("dl"); 385 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 386 387 // ---------------------------------------------- 388 elemDesc = (ElemDesc) m_elementFlags.get("form"); 389 elemDesc.setAttr("ACTION", ElemDesc.ATTRURL); 390 391 // ---------------------------------------------- 392 // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM> 393 elemDesc = (ElemDesc) m_elementFlags.get("frame"); 394 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 395 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 396 elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY); 397 398 // ---------------------------------------------- 399 elemDesc = (ElemDesc) m_elementFlags.get("head"); 400 elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL); 401 402 // ---------------------------------------------- 403 elemDesc = (ElemDesc) m_elementFlags.get("hr"); 404 elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY); 405 406 // ---------------------------------------------- 407 // HTML 4.0, section 16.5 408 elemDesc = (ElemDesc) m_elementFlags.get("iframe"); 409 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 410 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 411 412 // ---------------------------------------------- 413 // Netscape 4 extension 414 elemDesc = (ElemDesc) m_elementFlags.get("ilayer"); 415 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 416 417 // ---------------------------------------------- 418 elemDesc = (ElemDesc) m_elementFlags.get("img"); 419 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 420 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 421 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 422 elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); 423 424 // ---------------------------------------------- 425 elemDesc = (ElemDesc) m_elementFlags.get("input"); 426 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 427 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 428 elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY); 429 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 430 elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); 431 elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); 432 433 // ---------------------------------------------- 434 elemDesc = (ElemDesc) m_elementFlags.get("ins"); 435 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 436 437 // ---------------------------------------------- 438 // Netscape 4 extension 439 elemDesc = (ElemDesc) m_elementFlags.get("layer"); 440 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 441 442 // ---------------------------------------------- 443 elemDesc = (ElemDesc) m_elementFlags.get("link"); 444 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 445 446 // ---------------------------------------------- 447 elemDesc = (ElemDesc) m_elementFlags.get("menu"); 448 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 449 450 // ---------------------------------------------- 451 elemDesc = (ElemDesc) m_elementFlags.get("object"); 452 elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL); 453 elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL); 454 elemDesc.setAttr("DATA", ElemDesc.ATTRURL); 455 elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL); 456 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 457 elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY); 458 459 // ---------------------------------------------- 460 elemDesc = (ElemDesc) m_elementFlags.get("ol"); 461 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 462 463 // ---------------------------------------------- 464 elemDesc = (ElemDesc) m_elementFlags.get("optgroup"); 465 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 466 467 // ---------------------------------------------- 468 elemDesc = (ElemDesc) m_elementFlags.get("option"); 469 elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY); 470 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 471 472 // ---------------------------------------------- 473 elemDesc = (ElemDesc) m_elementFlags.get("q"); 474 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 475 476 // ---------------------------------------------- 477 elemDesc = (ElemDesc) m_elementFlags.get("script"); 478 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 479 elemDesc.setAttr("FOR", ElemDesc.ATTRURL); 480 elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY); 481 482 // ---------------------------------------------- 483 elemDesc = (ElemDesc) m_elementFlags.get("select"); 484 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 485 elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY); 486 487 // ---------------------------------------------- 488 elemDesc = (ElemDesc) m_elementFlags.get("table"); 489 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension 490 491 // ---------------------------------------------- 492 elemDesc = (ElemDesc) m_elementFlags.get("td"); 493 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 494 495 // ---------------------------------------------- 496 elemDesc = (ElemDesc) m_elementFlags.get("textarea"); 497 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 498 elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); 499 500 // ---------------------------------------------- 501 elemDesc = (ElemDesc) m_elementFlags.get("th"); 502 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 503 504 // ---------------------------------------------- 505 // The nowrap attribute of a tr element is both 506 // a Netscape and Internet-Explorer extension 507 elemDesc = (ElemDesc) m_elementFlags.get("tr"); 508 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 509 510 // ---------------------------------------------- 511 elemDesc = (ElemDesc) m_elementFlags.get("ul"); 512 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 513 } 514 515 /** 516 * Dummy element for elements not found. 517 */ 518 static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK); 519 520 /** True if URLs should be specially escaped with the %xx form. */ 521 private boolean m_specialEscapeURLs = true; 522 523 /** True if the META tag should be omitted. */ 524 private boolean m_omitMetaTag = false; 525 526 /** 527 * Tells if the formatter should use special URL escaping. 528 * 529 * @param bool True if URLs should be specially escaped with the %xx form. 530 */ 531 public void setSpecialEscapeURLs(boolean bool) 532 { 533 m_specialEscapeURLs = bool; 534 } 535 536 /** 537 * Tells if the formatter should omit the META tag. 538 * 539 * @param bool True if the META tag should be omitted. 540 */ 541 public void setOmitMetaTag(boolean bool) 542 { 543 m_omitMetaTag = bool; 544 } 545 546 /** 547 * Specifies an output format for this serializer. It the 548 * serializer has already been associated with an output format, 549 * it will switch to the new format. This method should not be 550 * called while the serializer is in the process of serializing 551 * a document. 552 * 553 * This method can be called multiple times before starting 554 * the serialization of a particular result-tree. In principle 555 * all serialization parameters can be changed, with the exception 556 * of method="html" (it must be method="html" otherwise we 557 * shouldn't even have a ToHTMLStream object here!) 558 * 559 * @param format The output format or serialzation parameters 560 * to use. 561 */ 562 public void setOutputFormat(Properties format) 563 { 564 565 m_specialEscapeURLs = 566 OutputPropertyUtils.getBooleanProperty( 567 OutputPropertiesFactory.S_USE_URL_ESCAPING, 568 format); 569 570 m_omitMetaTag = 571 OutputPropertyUtils.getBooleanProperty( 572 OutputPropertiesFactory.S_OMIT_META_TAG, 573 format); 574 575 super.setOutputFormat(format); 576 } 577 578 /** 579 * Tells if the formatter should use special URL escaping. 580 * 581 * @return True if URLs should be specially escaped with the %xx form. 582 */ 583 private final boolean getSpecialEscapeURLs() 584 { 585 return m_specialEscapeURLs; 586 } 587 588 /** 589 * Tells if the formatter should omit the META tag. 590 * 591 * @return True if the META tag should be omitted. 592 */ 593 private final boolean getOmitMetaTag() 594 { 595 return m_omitMetaTag; 596 } 597 598 /** 599 * Get a description of the given element. 600 * 601 * @param name non-null name of element, case insensitive. 602 * 603 * @return non-null reference to ElemDesc, which may be m_dummy if no 604 * element description matches the given name. 605 */ 606 public static final ElemDesc getElemDesc(String name) 607 { 608 /* this method used to return m_dummy when name was null 609 * but now it doesn't check and and requires non-null name. 610 */ 611 Object obj = m_elementFlags.get(name); 612 if (null != obj) 613 return (ElemDesc)obj; 614 return m_dummy; 615 } 616 617 /** 618 * A Trie that is just a copy of the "static" one. 619 * We need this one to be able to use the faster, but not thread-safe 620 * method Trie.get2(name) 621 */ 622 private Trie m_htmlInfo = new Trie(m_elementFlags); 623 /** 624 * Calls to this method could be replaced with calls to 625 * getElemDesc(name), but this one should be faster. 626 */ 627 private ElemDesc getElemDesc2(String name) 628 { 629 Object obj = m_htmlInfo.get2(name); 630 if (null != obj) 631 return (ElemDesc)obj; 632 return m_dummy; 633 } 634 635 /** 636 * Default constructor. 637 */ 638 public ToHTMLStream() 639 { 640 641 super(); 642 m_charInfo = m_htmlcharInfo; 643 // initialize namespaces 644 m_prefixMap = new NamespaceMappings(); 645 646 } 647 648 /** The name of the current element. */ 649// private String m_currentElementName = null; 650 651 /** 652 * Receive notification of the beginning of a document. 653 * 654 * @throws org.xml.sax.SAXException Any SAX exception, possibly 655 * wrapping another exception. 656 * 657 * @throws org.xml.sax.SAXException 658 */ 659 protected void startDocumentInternal() throws org.xml.sax.SAXException 660 { 661 super.startDocumentInternal(); 662 663 m_needToCallStartDocument = false; 664 m_needToOutputDocTypeDecl = true; 665 m_startNewLine = false; 666 setOmitXMLDeclaration(true); 667 668 if (true == m_needToOutputDocTypeDecl) 669 { 670 String doctypeSystem = getDoctypeSystem(); 671 String doctypePublic = getDoctypePublic(); 672 if ((null != doctypeSystem) || (null != doctypePublic)) 673 { 674 final java.io.Writer writer = m_writer; 675 try 676 { 677 writer.write("<!DOCTYPE html"); 678 679 if (null != doctypePublic) 680 { 681 writer.write(" PUBLIC \""); 682 writer.write(doctypePublic); 683 writer.write('"'); 684 } 685 686 if (null != doctypeSystem) 687 { 688 if (null == doctypePublic) 689 writer.write(" SYSTEM \""); 690 else 691 writer.write(" \""); 692 693 writer.write(doctypeSystem); 694 writer.write('"'); 695 } 696 697 writer.write('>'); 698 outputLineSep(); 699 } 700 catch(IOException e) 701 { 702 throw new SAXException(e); 703 } 704 } 705 } 706 707 m_needToOutputDocTypeDecl = false; 708 } 709 710 /** 711 * Receive notification of the end of a document. 712 * 713 * @throws org.xml.sax.SAXException Any SAX exception, possibly 714 * wrapping another exception. 715 * 716 * @throws org.xml.sax.SAXException 717 */ 718 public final void endDocument() throws org.xml.sax.SAXException 719 { 720 if (m_doIndent) { 721 flushCharactersBuffer(); 722 } 723 flushPending(); 724 if (m_doIndent && !m_isprevtext) 725 { 726 try 727 { 728 outputLineSep(); 729 } 730 catch(IOException e) 731 { 732 throw new SAXException(e); 733 } 734 } 735 736 flushWriter(); 737 if (m_tracer != null) 738 super.fireEndDoc(); 739 } 740 741 /** 742 * If the previous is an inline element, won't insert a new line before the 743 * text. 744 * 745 */ 746 protected boolean shouldIndentForText() { 747 return super.shouldIndentForText() && m_isprevblock; 748 } 749 750 /** 751 * Only check m_doIndent, disregard m_ispreserveSpace. 752 * 753 * @return True if the content should be formatted. 754 */ 755 protected boolean shouldFormatOutput() { 756 return m_doIndent; 757 } 758 759 /** 760 * Receive notification of the beginning of an element. 761 * 762 * 763 * @param namespaceURI 764 * @param localName 765 * @param name 766 * The element type name. 767 * @param atts 768 * The attributes attached to the element, if any. 769 * @throws org.xml.sax.SAXException 770 * Any SAX exception, possibly wrapping another exception. 771 * @see #endElement 772 * @see org.xml.sax.AttributeList 773 */ 774 public void startElement( 775 String namespaceURI, 776 String localName, 777 String name, 778 Attributes atts) 779 throws SAXException 780 { 781 if (m_doIndent) { 782 // will add extra one if having namespace but no matter 783 m_childNodeNum++; 784 flushCharactersBuffer(); 785 } 786 ElemContext elemContext = m_elemContext; 787 788 // clean up any pending things first 789 if (elemContext.m_startTagOpen) 790 { 791 closeStartTag(); 792 elemContext.m_startTagOpen = false; 793 } 794 else if (m_cdataTagOpen) 795 { 796 closeCDATA(); 797 m_cdataTagOpen = false; 798 } 799 else if (m_needToCallStartDocument) 800 { 801 startDocumentInternal(); 802 m_needToCallStartDocument = false; 803 } 804 805 806 // if this element has a namespace then treat it like XML 807 if (null != namespaceURI && namespaceURI.length() > 0) 808 { 809 super.startElement(namespaceURI, localName, name, atts); 810 811 return; 812 } 813 814 try 815 { 816 // getElemDesc2(name) is faster than getElemDesc(name) 817 ElemDesc elemDesc = getElemDesc2(name); 818 int elemFlags = elemDesc.getFlags(); 819 820 // deal with indentation issues first 821 if (m_doIndent) 822 { 823 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0; 824 if ((elemContext.m_elementName != null) 825 // If this element is a block element, 826 // or if this is not a block element, then if the 827 // previous is neither a text nor an inline 828 && (isBlockElement || (!(m_isprevtext || !m_isprevblock)))) 829 { 830 m_startNewLine = true; 831 832 indent(); 833 } 834 m_isprevblock = isBlockElement; 835 } 836 837 // save any attributes for later processing 838 if (atts != null) 839 addAttributes(atts); 840 841 m_isprevtext = false; 842 final java.io.Writer writer = m_writer; 843 writer.write('<'); 844 writer.write(name); 845 846 if (m_doIndent) { 847 m_childNodeNumStack.add(m_childNodeNum); 848 m_childNodeNum = 0; 849 } 850 851 if (m_tracer != null) 852 firePseudoAttributes(); 853 854 if ((elemFlags & ElemDesc.EMPTY) != 0) 855 { 856 // an optimization for elements which are expected 857 // to be empty. 858 m_elemContext = elemContext.push(); 859 /* XSLTC sometimes calls namespaceAfterStartElement() 860 * so we need to remember the name 861 */ 862 m_elemContext.m_elementName = name; 863 m_elemContext.m_elementDesc = elemDesc; 864 return; 865 } 866 else 867 { 868 elemContext = elemContext.push(namespaceURI,localName,name); 869 m_elemContext = elemContext; 870 elemContext.m_elementDesc = elemDesc; 871 elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0; 872 873 // set m_startNewLine for the next element 874 if (m_doIndent) { 875 // elemFlags is equivalent to m_elemContext.m_elementDesc.getFlags(), 876 // in this branch m_elemContext.m_elementName is not null 877 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0; 878 if (isBlockElement) 879 m_startNewLine = true; 880 } 881 } 882 883 884 if ((elemFlags & ElemDesc.HEADELEM) != 0) 885 { 886 // This is the <HEAD> element, do some special processing 887 closeStartTag(); 888 elemContext.m_startTagOpen = false; 889 if (!m_omitMetaTag) 890 { 891 if (m_doIndent) 892 indent(); 893 writer.write( 894 "<META http-equiv=\"Content-Type\" content=\"text/html; charset="); 895 String encoding = getEncoding(); 896 String encode = Encodings.getMimeEncoding(encoding); 897 writer.write(encode); 898 writer.write("\">"); 899 } 900 } 901 } 902 catch (IOException e) 903 { 904 throw new SAXException(e); 905 } 906 } 907 908 /** 909 * Receive notification of the end of an element. 910 * 911 * 912 * @param namespaceURI 913 * @param localName 914 * @param name The element type name 915 * @throws org.xml.sax.SAXException Any SAX exception, possibly 916 * wrapping another exception. 917 */ 918 public final void endElement( 919 final String namespaceURI, 920 final String localName, 921 final String name) 922 throws org.xml.sax.SAXException 923 { 924 if (m_doIndent) { 925 flushCharactersBuffer(); 926 } 927 // deal with any pending issues 928 if (m_cdataTagOpen) 929 closeCDATA(); 930 931 // if the element has a namespace, treat it like XML, not HTML 932 if (null != namespaceURI && namespaceURI.length() > 0) 933 { 934 super.endElement(namespaceURI, localName, name); 935 936 return; 937 } 938 939 try 940 { 941 942 ElemContext elemContext = m_elemContext; 943 final ElemDesc elemDesc = elemContext.m_elementDesc; 944 final int elemFlags = elemDesc.getFlags(); 945 final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0; 946 947 // deal with any indentation issues 948 if (m_doIndent) 949 { 950 final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0; 951 boolean shouldIndent = false; 952 953 // If this element is a block element, 954 // or if this is not a block element, then if the previous is 955 // neither a text nor an inline 956 if (isBlockElement || (!(m_isprevtext || !m_isprevblock))) 957 { 958 m_startNewLine = true; 959 shouldIndent = true; 960 } 961 if (!elemContext.m_startTagOpen && shouldIndent && (m_childNodeNum > 1 || !m_isprevtext)) 962 indent(elemContext.m_currentElemDepth - 1); 963 964 m_isprevblock = isBlockElement; 965 } 966 967 final java.io.Writer writer = m_writer; 968 if (!elemContext.m_startTagOpen) 969 { 970 writer.write("</"); 971 writer.write(name); 972 writer.write('>'); 973 } 974 else 975 { 976 // the start-tag open when this method was called, 977 // so we need to process it now. 978 979 if (m_tracer != null) 980 super.fireStartElem(name); 981 982 // the starting tag was still open when we received this endElement() call 983 // so we need to process any gathered attributes NOW, before they go away. 984 int nAttrs = m_attributes.getLength(); 985 if (nAttrs > 0) 986 { 987 processAttributes(m_writer, nAttrs); 988 // clear attributes object for re-use with next element 989 m_attributes.clear(); 990 } 991 if (!elemEmpty) 992 { 993 // As per Dave/Paul recommendation 12/06/2000 994 // if (shouldIndent) 995 // writer.write('>'); 996 // indent(m_currentIndent); 997 998 writer.write("></"); 999 writer.write(name); 1000 writer.write('>'); 1001 } 1002 else 1003 { 1004 writer.write('>'); 1005 } 1006 } 1007 1008 if (m_doIndent) { 1009 m_childNodeNum = m_childNodeNumStack.remove(m_childNodeNumStack.size() - 1); 1010 // clean up because the element has ended 1011 m_isprevtext = false; 1012 } 1013 // fire off the end element event 1014 if (m_tracer != null) 1015 super.fireEndElem(name); 1016 1017 // OPTIMIZE-EMPTY 1018 if (elemEmpty) 1019 { 1020 // a quick exit if the HTML element had no children. 1021 // This block of code can be removed if the corresponding block of code 1022 // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed 1023 m_elemContext = elemContext.m_prev; 1024 return; 1025 } 1026 1027 // some more clean because the element has ended. 1028 m_elemContext = elemContext.m_prev; 1029// m_isRawStack.pop(); 1030 } 1031 catch (IOException e) 1032 { 1033 throw new SAXException(e); 1034 } 1035 } 1036 1037 /** 1038 * Process an attribute. 1039 * @param writer The writer to write the processed output to. 1040 * @param name The name of the attribute. 1041 * @param value The value of the attribute. 1042 * @param elemDesc The description of the HTML element 1043 * that has this attribute. 1044 * 1045 * @throws org.xml.sax.SAXException 1046 */ 1047 protected void processAttribute( 1048 java.io.Writer writer, 1049 String name, 1050 String value, 1051 ElemDesc elemDesc) 1052 throws IOException 1053 { 1054 writer.write(' '); 1055 1056 if ( ((value.length() == 0) || value.equalsIgnoreCase(name)) 1057 && elemDesc != null 1058 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY)) 1059 { 1060 writer.write(name); 1061 } 1062 else 1063 { 1064 // %REVIEW% %OPT% 1065 // Two calls to single-char write may NOT 1066 // be more efficient than one to string-write... 1067 writer.write(name); 1068 writer.write("=\""); 1069 if ( elemDesc != null 1070 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL)) 1071 writeAttrURI(writer, value, m_specialEscapeURLs); 1072 else 1073 writeAttrString(writer, value, this.getEncoding()); 1074 writer.write('"'); 1075 1076 } 1077 } 1078 1079 /** 1080 * Tell if a character is an ASCII digit. 1081 */ 1082 private boolean isASCIIDigit(char c) 1083 { 1084 return (c >= '0' && c <= '9'); 1085 } 1086 1087 /** 1088 * Make an integer into an HH hex value. 1089 * Does no checking on the size of the input, since this 1090 * is only meant to be used locally by writeAttrURI. 1091 * 1092 * @param i must be a value less than 255. 1093 * 1094 * @return should be a two character string. 1095 */ 1096 private static String makeHHString(int i) 1097 { 1098 String s = Integer.toHexString(i).toUpperCase(); 1099 if (s.length() == 1) 1100 { 1101 s = "0" + s; 1102 } 1103 return s; 1104 } 1105 1106 /** 1107 * Dmitri Ilyin: Makes sure if the String is HH encoded sign. 1108 * @param str must be 2 characters long 1109 * 1110 * @return true or false 1111 */ 1112 private boolean isHHSign(String str) 1113 { 1114 boolean sign = true; 1115 try 1116 { 1117 char r = (char) Integer.parseInt(str, 16); 1118 } 1119 catch (NumberFormatException e) 1120 { 1121 sign = false; 1122 } 1123 return sign; 1124 } 1125 1126 /** 1127 * Write the specified <var>string</var> after substituting non ASCII characters, 1128 * with <CODE>%HH</CODE>, where HH is the hex of the byte value. 1129 * 1130 * @param string String to convert to XML format. 1131 * @param doURLEscaping True if we should try to encode as 1132 * per http://www.ietf.org/rfc/rfc2396.txt. 1133 * 1134 * @throws org.xml.sax.SAXException if a bad surrogate pair is detected. 1135 */ 1136 public void writeAttrURI( 1137 final java.io.Writer writer, String string, boolean doURLEscaping) 1138 throws IOException 1139 { 1140 // http://www.ietf.org/rfc/rfc2396.txt says: 1141 // A URI is always in an "escaped" form, since escaping or unescaping a 1142 // completed URI might change its semantics. Normally, the only time 1143 // escape encodings can safely be made is when the URI is being created 1144 // from its component parts; each component may have its own set of 1145 // characters that are reserved, so only the mechanism responsible for 1146 // generating or interpreting that component can determine whether or 1147 // not escaping a character will change its semantics. Likewise, a URI 1148 // must be separated into its components before the escaped characters 1149 // within those components can be safely decoded. 1150 // 1151 // ...So we do our best to do limited escaping of the URL, without 1152 // causing damage. If the URL is already properly escaped, in theory, this 1153 // function should not change the string value. 1154 1155 final int end = string.length(); 1156 if (end > m_attrBuff.length) 1157 { 1158 m_attrBuff = new char[end*2 + 1]; 1159 } 1160 string.getChars(0,end, m_attrBuff, 0); 1161 final char[] chars = m_attrBuff; 1162 1163 int cleanStart = 0; 1164 int cleanLength = 0; 1165 1166 1167 char ch = 0; 1168 for (int i = 0; i < end; i++) 1169 { 1170 ch = chars[i]; 1171 1172 if ((ch < 32) || (ch > 126)) 1173 { 1174 if (cleanLength > 0) 1175 { 1176 writer.write(chars, cleanStart, cleanLength); 1177 cleanLength = 0; 1178 } 1179 if (doURLEscaping) 1180 { 1181 // Encode UTF16 to UTF8. 1182 // Reference is Unicode, A Primer, by Tony Graham. 1183 // Page 92. 1184 1185 // Note that Kay doesn't escape 0x20... 1186 // if(ch == 0x20) // Not sure about this... -sb 1187 // { 1188 // writer.write(ch); 1189 // } 1190 // else 1191 if (ch <= 0x7F) 1192 { 1193 writer.write('%'); 1194 writer.write(makeHHString(ch)); 1195 } 1196 else if (ch <= 0x7FF) 1197 { 1198 // Clear low 6 bits before rotate, put high 4 bits in low byte, 1199 // and set two high bits. 1200 int high = (ch >> 6) | 0xC0; 1201 int low = (ch & 0x3F) | 0x80; 1202 // First 6 bits, + high bit 1203 writer.write('%'); 1204 writer.write(makeHHString(high)); 1205 writer.write('%'); 1206 writer.write(makeHHString(low)); 1207 } 1208 else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate 1209 { 1210 // I'm sure this can be done in 3 instructions, but I choose 1211 // to try and do it exactly like it is done in the book, at least 1212 // until we are sure this is totally clean. I don't think performance 1213 // is a big issue with this particular function, though I could be 1214 // wrong. Also, the stuff below clearly does more masking than 1215 // it needs to do. 1216 1217 // Clear high 6 bits. 1218 int highSurrogate = ((int) ch) & 0x03FF; 1219 1220 // Middle 4 bits (wwww) + 1 1221 // "Note that the value of wwww from the high surrogate bit pattern 1222 // is incremented to make the uuuuu bit pattern in the scalar value 1223 // so the surrogate pair don't address the BMP." 1224 int wwww = ((highSurrogate & 0x03C0) >> 6); 1225 int uuuuu = wwww + 1; 1226 1227 // next 4 bits 1228 int zzzz = (highSurrogate & 0x003C) >> 2; 1229 1230 // low 2 bits 1231 int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30; 1232 1233 // Get low surrogate character. 1234 ch = chars[++i]; 1235 1236 // Clear high 6 bits. 1237 int lowSurrogate = ((int) ch) & 0x03FF; 1238 1239 // put the middle 4 bits into the bottom of yyyyyy (byte 3) 1240 yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6); 1241 1242 // bottom 6 bits. 1243 int xxxxxx = (lowSurrogate & 0x003F); 1244 1245 int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu 1246 int byte2 = 1247 0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz; 1248 int byte3 = 0x80 | yyyyyy; 1249 int byte4 = 0x80 | xxxxxx; 1250 1251 writer.write('%'); 1252 writer.write(makeHHString(byte1)); 1253 writer.write('%'); 1254 writer.write(makeHHString(byte2)); 1255 writer.write('%'); 1256 writer.write(makeHHString(byte3)); 1257 writer.write('%'); 1258 writer.write(makeHHString(byte4)); 1259 } 1260 else 1261 { 1262 int high = (ch >> 12) | 0xE0; // top 4 bits 1263 int middle = ((ch & 0x0FC0) >> 6) | 0x80; 1264 // middle 6 bits 1265 int low = (ch & 0x3F) | 0x80; 1266 // First 6 bits, + high bit 1267 writer.write('%'); 1268 writer.write(makeHHString(high)); 1269 writer.write('%'); 1270 writer.write(makeHHString(middle)); 1271 writer.write('%'); 1272 writer.write(makeHHString(low)); 1273 } 1274 1275 } 1276 else if (escapingNotNeeded(ch)) 1277 { 1278 writer.write(ch); 1279 } 1280 else 1281 { 1282 writer.write("&#"); 1283 writer.write(Integer.toString(ch)); 1284 writer.write(';'); 1285 } 1286 // In this character range we have first written out any previously accumulated 1287 // "clean" characters, then processed the current more complicated character, 1288 // which may have incremented "i". 1289 // We now we reset the next possible clean character. 1290 cleanStart = i + 1; 1291 } 1292 // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as 1293 // not allowing quotes in the URI proper syntax, nor in the fragment 1294 // identifier, we believe that it's OK to double escape quotes. 1295 else if (ch == '"') 1296 { 1297 // If the character is a '%' number number, try to avoid double-escaping. 1298 // There is a question if this is legal behavior. 1299 1300 // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded 1301 // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little. 1302 1303 // if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) ) 1304 1305 // We are no longer escaping '%' 1306 1307 if (cleanLength > 0) 1308 { 1309 writer.write(chars, cleanStart, cleanLength); 1310 cleanLength = 0; 1311 } 1312 1313 1314 // Mike Kay encodes this as ", so he may know something I don't? 1315 if (doURLEscaping) 1316 writer.write("%22"); 1317 else 1318 writer.write("""); // we have to escape this, I guess. 1319 1320 // We have written out any clean characters, then the escaped '%' and now we 1321 // We now we reset the next possible clean character. 1322 cleanStart = i + 1; 1323 } 1324 else if (ch == '&') 1325 { 1326 // HTML 4.01 reads, "Authors should use "&" (ASCII decimal 38) 1327 // instead of "&" to avoid confusion with the beginning of a character 1328 // reference (entity reference open delimiter). 1329 if (cleanLength > 0) 1330 { 1331 writer.write(chars, cleanStart, cleanLength); 1332 cleanLength = 0; 1333 } 1334 writer.write("&"); 1335 cleanStart = i + 1; 1336 } 1337 else 1338 { 1339 // no processing for this character, just count how 1340 // many characters in a row that we have that need no processing 1341 cleanLength++; 1342 } 1343 } 1344 1345 // are there any clean characters at the end of the array 1346 // that we haven't processed yet? 1347 if (cleanLength > 1) 1348 { 1349 // if the whole string can be written out as-is do so 1350 // otherwise write out the clean chars at the end of the 1351 // array 1352 if (cleanStart == 0) 1353 writer.write(string); 1354 else 1355 writer.write(chars, cleanStart, cleanLength); 1356 } 1357 else if (cleanLength == 1) 1358 { 1359 // a little optimization for 1 clean character 1360 // (we could have let the previous if(...) handle them all) 1361 writer.write(ch); 1362 } 1363 } 1364 1365 /** 1366 * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>, 1367 * and UTF-16 surrogates for character references <CODE>&#xnn</CODE>. 1368 * 1369 * @param string String to convert to XML format. 1370 * @param encoding CURRENTLY NOT IMPLEMENTED. 1371 * 1372 * @throws org.xml.sax.SAXException 1373 */ 1374 public void writeAttrString( 1375 final java.io.Writer writer, String string, String encoding) 1376 throws IOException 1377 { 1378 final int end = string.length(); 1379 if (end > m_attrBuff.length) 1380 { 1381 m_attrBuff = new char[end * 2 + 1]; 1382 } 1383 string.getChars(0, end, m_attrBuff, 0); 1384 final char[] chars = m_attrBuff; 1385 1386 1387 1388 int cleanStart = 0; 1389 int cleanLength = 0; 1390 1391 char ch = 0; 1392 for (int i = 0; i < end; i++) 1393 { 1394 ch = chars[i]; 1395 1396 // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE); 1397 // System.out.println("ch: "+(int)ch); 1398 // System.out.println("m_maxCharacter: "+(int)m_maxCharacter); 1399 // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]); 1400 if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch))) 1401 { 1402 cleanLength++; 1403 } 1404 else if ('<' == ch || '>' == ch) 1405 { 1406 cleanLength++; // no escaping in this case, as specified in 15.2 1407 } 1408 else if ( 1409 ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1])) 1410 { 1411 cleanLength++; // no escaping in this case, as specified in 15.2 1412 } 1413 else 1414 { 1415 if (cleanLength > 0) 1416 { 1417 writer.write(chars,cleanStart,cleanLength); 1418 cleanLength = 0; 1419 } 1420 int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true); 1421 1422 if (i != pos) 1423 { 1424 i = pos - 1; 1425 } 1426 else 1427 { 1428 if (Encodings.isHighUTF16Surrogate(ch)) 1429 { 1430 1431 writeUTF16Surrogate(ch, chars, i, end); 1432 i++; // two input characters processed 1433 // this increments by one and the for() 1434 // loop itself increments by another one. 1435 } 1436 1437 // The next is kind of a hack to keep from escaping in the case 1438 // of Shift_JIS and the like. 1439 1440 /* 1441 else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF) 1442 && (ch != 160)) 1443 { 1444 writer.write(ch); // no escaping in this case 1445 } 1446 else 1447 */ 1448 String outputStringForChar = m_charInfo.getOutputStringForChar(ch); 1449 if (null != outputStringForChar) 1450 { 1451 writer.write(outputStringForChar); 1452 } 1453 else if (escapingNotNeeded(ch)) 1454 { 1455 writer.write(ch); // no escaping in this case 1456 } 1457 else 1458 { 1459 writer.write("&#"); 1460 writer.write(Integer.toString(ch)); 1461 writer.write(';'); 1462 } 1463 } 1464 cleanStart = i + 1; 1465 } 1466 } // end of for() 1467 1468 // are there any clean characters at the end of the array 1469 // that we haven't processed yet? 1470 if (cleanLength > 1) 1471 { 1472 // if the whole string can be written out as-is do so 1473 // otherwise write out the clean chars at the end of the 1474 // array 1475 if (cleanStart == 0) 1476 writer.write(string); 1477 else 1478 writer.write(chars, cleanStart, cleanLength); 1479 } 1480 else if (cleanLength == 1) 1481 { 1482 // a little optimization for 1 clean character 1483 // (we could have let the previous if(...) handle them all) 1484 writer.write(ch); 1485 } 1486 } 1487 1488 1489 1490 /** 1491 * Receive notification of character data. 1492 * 1493 * <p>The Parser will call this method to report each chunk of 1494 * character data. SAX parsers may return all contiguous character 1495 * data in a single chunk, or they may split it into several 1496 * chunks; however, all of the characters in any single event 1497 * must come from the same external entity, so that the Locator 1498 * provides useful information.</p> 1499 * 1500 * <p>The application must not attempt to read from the array 1501 * outside of the specified range.</p> 1502 * 1503 * <p>Note that some parsers will report whitespace using the 1504 * ignorableWhitespace() method rather than this one (validating 1505 * parsers must do so).</p> 1506 * 1507 * @param chars The characters from the XML document. 1508 * @param start The start position in the array. 1509 * @param length The number of characters to read from the array. 1510 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1511 * wrapping another exception. 1512 * @see #ignorableWhitespace 1513 * @see org.xml.sax.Locator 1514 * 1515 * @throws org.xml.sax.SAXException 1516 */ 1517 public final void characters(char chars[], int start, int length) 1518 throws org.xml.sax.SAXException 1519 { 1520 1521 if (m_elemContext.m_isRaw) 1522 { 1523 try 1524 { 1525 if (m_elemContext.m_startTagOpen) 1526 { 1527 closeStartTag(); 1528 m_elemContext.m_startTagOpen = false; 1529 } 1530 1531// With m_ispreserve just set true it looks like shouldIndent() 1532// will always return false, so drop any possible indentation. 1533// if (shouldIndent()) 1534// indent(); 1535 1536 // writer.write("<![CDATA["); 1537 // writer.write(chars, start, length); 1538 writeNormalizedChars(chars, start, length, false, m_lineSepUse); 1539 m_isprevtext = true; 1540 // writer.write("]]>"); 1541 1542 // time to generate characters event 1543 if (m_tracer != null) 1544 super.fireCharEvent(chars, start, length); 1545 1546 return; 1547 } 1548 catch (IOException ioe) 1549 { 1550 throw new org.xml.sax.SAXException( 1551 Utils.messages.createMessage( 1552 MsgKey.ER_OIERROR, 1553 null), 1554 ioe); 1555 //"IO error", ioe); 1556 } 1557 } 1558 else 1559 { 1560 super.characters(chars, start, length); 1561 } 1562 } 1563 1564 /** 1565 * Receive notification of cdata. 1566 * 1567 * <p>The Parser will call this method to report each chunk of 1568 * character data. SAX parsers may return all contiguous character 1569 * data in a single chunk, or they may split it into several 1570 * chunks; however, all of the characters in any single event 1571 * must come from the same external entity, so that the Locator 1572 * provides useful information.</p> 1573 * 1574 * <p>The application must not attempt to read from the array 1575 * outside of the specified range.</p> 1576 * 1577 * <p>Note that some parsers will report whitespace using the 1578 * ignorableWhitespace() method rather than this one (validating 1579 * parsers must do so).</p> 1580 * 1581 * @param ch The characters from the XML document. 1582 * @param start The start position in the array. 1583 * @param length The number of characters to read from the array. 1584 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1585 * wrapping another exception. 1586 * @see #ignorableWhitespace 1587 * @see org.xml.sax.Locator 1588 * 1589 * @throws org.xml.sax.SAXException 1590 */ 1591 public final void cdata(char ch[], int start, int length) 1592 throws org.xml.sax.SAXException 1593 { 1594 if ((null != m_elemContext.m_elementName) 1595 && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT") 1596 || m_elemContext.m_elementName.equalsIgnoreCase("STYLE"))) 1597 { 1598 try 1599 { 1600 if (m_elemContext.m_startTagOpen) 1601 { 1602 closeStartTag(); 1603 m_elemContext.m_startTagOpen = false; 1604 } 1605 1606 if (shouldIndent()) 1607 indent(); 1608 1609 // writer.write(ch, start, length); 1610 writeNormalizedChars(ch, start, length, true, m_lineSepUse); 1611 } 1612 catch (IOException ioe) 1613 { 1614 throw new org.xml.sax.SAXException( 1615 Utils.messages.createMessage( 1616 MsgKey.ER_OIERROR, 1617 null), 1618 ioe); 1619 //"IO error", ioe); 1620 } 1621 } 1622 else 1623 { 1624 super.cdata(ch, start, length); 1625 } 1626 } 1627 1628 /** 1629 * Receive notification of a processing instruction. 1630 * 1631 * @param target The processing instruction target. 1632 * @param data The processing instruction data, or null if 1633 * none was supplied. 1634 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1635 * wrapping another exception. 1636 * 1637 * @throws org.xml.sax.SAXException 1638 */ 1639 public void processingInstruction(String target, String data) 1640 throws org.xml.sax.SAXException 1641 { 1642 if (m_doIndent) { 1643 m_childNodeNum++; 1644 flushCharactersBuffer(); 1645 } 1646 // Process any pending starDocument and startElement first. 1647 flushPending(); 1648 1649 // Use a fairly nasty hack to tell if the next node is supposed to be 1650 // unescaped text. 1651 if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING)) 1652 { 1653 startNonEscaping(); 1654 } 1655 else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING)) 1656 { 1657 endNonEscaping(); 1658 } 1659 else 1660 { 1661 try 1662 { 1663 if (m_elemContext.m_startTagOpen) 1664 { 1665 closeStartTag(); 1666 m_elemContext.m_startTagOpen = false; 1667 } 1668 else if (m_needToCallStartDocument) 1669 startDocumentInternal(); 1670 1671 if (shouldIndent()) 1672 indent(); 1673 1674 final java.io.Writer writer = m_writer; 1675 //writer.write("<?" + target); 1676 writer.write("<?"); 1677 writer.write(target); 1678 1679 if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0))) 1680 writer.write(' '); 1681 1682 //writer.write(data + ">"); // different from XML 1683 writer.write(data); // different from XML 1684 writer.write('>'); // different from XML 1685 1686 // Always output a newline char if not inside of an 1687 // element. The whitespace is not significant in that 1688 // case. 1689 if (m_elemContext.m_currentElemDepth <= 0) 1690 outputLineSep(); 1691 1692 m_startNewLine = true; 1693 } 1694 catch(IOException e) 1695 { 1696 throw new SAXException(e); 1697 } 1698 } 1699 1700 // now generate the PI event 1701 if (m_tracer != null) 1702 super.fireEscapingEvent(target, data); 1703 } 1704 1705 /** 1706 * Receive notivication of a entityReference. 1707 * 1708 * @param name non-null reference to entity name string. 1709 * 1710 * @throws org.xml.sax.SAXException 1711 */ 1712 public final void entityReference(String name) 1713 throws org.xml.sax.SAXException 1714 { 1715 try 1716 { 1717 1718 final java.io.Writer writer = m_writer; 1719 writer.write('&'); 1720 writer.write(name); 1721 writer.write(';'); 1722 1723 } catch(IOException e) 1724 { 1725 throw new SAXException(e); 1726 } 1727 } 1728 /** 1729 * @see ExtendedContentHandler#endElement(String) 1730 */ 1731 public final void endElement(String elemName) throws SAXException 1732 { 1733 endElement(null, null, elemName); 1734 } 1735 1736 /** 1737 * Process the attributes, which means to write out the currently 1738 * collected attributes to the writer. The attributes are not 1739 * cleared by this method 1740 * 1741 * @param writer the writer to write processed attributes to. 1742 * @param nAttrs the number of attributes in m_attributes 1743 * to be processed 1744 * 1745 * @throws org.xml.sax.SAXException 1746 */ 1747 public void processAttributes(java.io.Writer writer, int nAttrs) 1748 throws IOException,SAXException 1749 { 1750 /* 1751 * process the collected attributes 1752 */ 1753 for (int i = 0; i < nAttrs; i++) 1754 { 1755 processAttribute( 1756 writer, 1757 m_attributes.getQName(i), 1758 m_attributes.getValue(i), 1759 m_elemContext.m_elementDesc); 1760 } 1761 } 1762 1763 /** 1764 * For the enclosing elements starting tag write out out any attributes 1765 * followed by ">" 1766 * 1767 *@throws org.xml.sax.SAXException 1768 */ 1769 protected void closeStartTag() throws SAXException 1770 { 1771 try 1772 { 1773 1774 // finish processing attributes, time to fire off the start element event 1775 if (m_tracer != null) 1776 super.fireStartElem(m_elemContext.m_elementName); 1777 1778 int nAttrs = m_attributes.getLength(); 1779 if (nAttrs>0) 1780 { 1781 processAttributes(m_writer, nAttrs); 1782 // clear attributes object for re-use with next element 1783 m_attributes.clear(); 1784 } 1785 1786 m_writer.write('>'); 1787 1788 /* whether Xalan or XSLTC, we have the prefix mappings now, so 1789 * lets determine if the current element is specified in the cdata- 1790 * section-elements list. 1791 */ 1792 if (m_StringOfCDATASections != null) 1793 m_elemContext.m_isCdataSection = isCdataSection(); 1794 1795 } 1796 catch(IOException e) 1797 { 1798 throw new SAXException(e); 1799 } 1800 } 1801 1802 /** 1803 * This method is used when a prefix/uri namespace mapping 1804 * is indicated after the element was started with a 1805 * startElement() and before and endElement(). 1806 * startPrefixMapping(prefix,uri) would be used before the 1807 * startElement() call. 1808 * @param uri the URI of the namespace 1809 * @param prefix the prefix associated with the given URI. 1810 * 1811 * @see ExtendedContentHandler#namespaceAfterStartElement(String, String) 1812 */ 1813 public void namespaceAfterStartElement(String prefix, String uri) 1814 throws SAXException 1815 { 1816 // hack for XSLTC with finding URI for default namespace 1817 if (m_elemContext.m_elementURI == null) 1818 { 1819 String prefix1 = getPrefixPart(m_elemContext.m_elementName); 1820 if (prefix1 == null && EMPTYSTRING.equals(prefix)) 1821 { 1822 // the elements URI is not known yet, and it 1823 // doesn't have a prefix, and we are currently 1824 // setting the uri for prefix "", so we have 1825 // the uri for the element... lets remember it 1826 m_elemContext.m_elementURI = uri; 1827 } 1828 } 1829 startPrefixMapping(prefix,uri,false); 1830 } 1831 1832 public void startDTD(String name, String publicId, String systemId) 1833 throws SAXException 1834 { 1835 m_inDTD = true; 1836 super.startDTD(name, publicId, systemId); 1837 } 1838 1839 /** 1840 * Report the end of DTD declarations. 1841 * @throws org.xml.sax.SAXException The application may raise an exception. 1842 * @see #startDTD 1843 */ 1844 public void endDTD() throws org.xml.sax.SAXException 1845 { 1846 m_inDTD = false; 1847 /* for ToHTMLStream the DOCTYPE is entirely output in the 1848 * startDocumentInternal() method, so don't do anything here 1849 */ 1850 } 1851 /** 1852 * This method does nothing. 1853 */ 1854 public void attributeDecl( 1855 String eName, 1856 String aName, 1857 String type, 1858 String valueDefault, 1859 String value) 1860 throws SAXException 1861 { 1862 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1863 } 1864 1865 /** 1866 * This method does nothing. 1867 */ 1868 public void elementDecl(String name, String model) throws SAXException 1869 { 1870 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1871 } 1872 /** 1873 * This method does nothing. 1874 */ 1875 public void internalEntityDecl(String name, String value) 1876 throws SAXException 1877 { 1878 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1879 } 1880 /** 1881 * This method does nothing. 1882 */ 1883 public void externalEntityDecl( 1884 String name, 1885 String publicId, 1886 String systemId) 1887 throws SAXException 1888 { 1889 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1890 } 1891 1892 /** 1893 * This method is used to add an attribute to the currently open element. 1894 * The caller has guaranted that this attribute is unique, which means that it 1895 * not been seen before and will not be seen again. 1896 * 1897 * @param name the qualified name of the attribute 1898 * @param value the value of the attribute which can contain only 1899 * ASCII printable characters characters in the range 32 to 127 inclusive. 1900 * @param flags the bit values of this integer give optimization information. 1901 */ 1902 public void addUniqueAttribute(String name, String value, int flags) 1903 throws SAXException 1904 { 1905 try 1906 { 1907 final java.io.Writer writer = m_writer; 1908 if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt) 1909 { 1910 // "flags" has indicated that the characters 1911 // '>' '<' '&' and '"' are not in the value and 1912 // m_htmlcharInfo has recorded that there are no other 1913 // entities in the range 0 to 127 so we write out the 1914 // value directly 1915 writer.write(' '); 1916 writer.write(name); 1917 writer.write("=\""); 1918 writer.write(value); 1919 writer.write('"'); 1920 } 1921 else if ( 1922 (flags & HTML_ATTREMPTY) > 0 1923 && (value.length() == 0 || value.equalsIgnoreCase(name))) 1924 { 1925 writer.write(' '); 1926 writer.write(name); 1927 } 1928 else 1929 { 1930 writer.write(' '); 1931 writer.write(name); 1932 writer.write("=\""); 1933 if ((flags & HTML_ATTRURL) > 0) 1934 { 1935 writeAttrURI(writer, value, m_specialEscapeURLs); 1936 } 1937 else 1938 { 1939 writeAttrString(writer, value, this.getEncoding()); 1940 } 1941 writer.write('"'); 1942 } 1943 } catch (IOException e) { 1944 throw new SAXException(e); 1945 } 1946 } 1947 1948 public void comment(char ch[], int start, int length) 1949 throws SAXException 1950 { 1951 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1952 if (m_inDTD) 1953 return; 1954 super.comment(ch, start, length); 1955 } 1956 1957 public boolean reset() 1958 { 1959 boolean ret = super.reset(); 1960 if (!ret) 1961 return false; 1962 initToHTMLStream(); 1963 return true; 1964 } 1965 1966 private void initToHTMLStream() 1967 { 1968 m_isprevblock = false; 1969 m_inDTD = false; 1970 m_omitMetaTag = false; 1971 m_specialEscapeURLs = true; 1972 } 1973 1974 static class Trie 1975 { 1976 /** 1977 * A digital search trie for 7-bit ASCII text 1978 * The API is a subset of java.util.Hashtable 1979 * The key must be a 7-bit ASCII string 1980 * The value may be any Java Object 1981 * One can get an object stored in a trie from its key, 1982 * but the search is either case sensitive or case 1983 * insensitive to the characters in the key, and this 1984 * choice of sensitivity or insensitivity is made when 1985 * the Trie is created, before any objects are put in it. 1986 * 1987 * This class is a copy of the one in com.sun.org.apache.xml.internal.utils. 1988 * It exists to cut the serializers dependancy on that package. 1989 * 1990 * @xsl.usage internal 1991 */ 1992 1993 /** Size of the m_nextChar array. */ 1994 public static final int ALPHA_SIZE = 128; 1995 1996 /** The root node of the tree. */ 1997 final Node m_Root; 1998 1999 /** helper buffer to convert Strings to char arrays */ 2000 private char[] m_charBuffer = new char[0]; 2001 2002 /** true if the search for an object is lower case only with the key */ 2003 private final boolean m_lowerCaseOnly; 2004 2005 /** 2006 * Construct the trie that has a case insensitive search. 2007 */ 2008 public Trie() 2009 { 2010 m_Root = new Node(); 2011 m_lowerCaseOnly = false; 2012 } 2013 2014 /** 2015 * Construct the trie given the desired case sensitivity with the key. 2016 * @param lowerCaseOnly true if the search keys are to be loser case only, 2017 * not case insensitive. 2018 */ 2019 public Trie(boolean lowerCaseOnly) 2020 { 2021 m_Root = new Node(); 2022 m_lowerCaseOnly = lowerCaseOnly; 2023 } 2024 2025 /** 2026 * Put an object into the trie for lookup. 2027 * 2028 * @param key must be a 7-bit ASCII string 2029 * @param value any java object. 2030 * 2031 * @return The old object that matched key, or null. 2032 */ 2033 public Object put(String key, Object value) 2034 { 2035 2036 final int len = key.length(); 2037 if (len > m_charBuffer.length) 2038 { 2039 // make the biggest buffer ever needed in get(String) 2040 m_charBuffer = new char[len]; 2041 } 2042 2043 Node node = m_Root; 2044 2045 for (int i = 0; i < len; i++) 2046 { 2047 Node nextNode = 2048 node.m_nextChar[Character.toLowerCase(key.charAt(i))]; 2049 2050 if (nextNode != null) 2051 { 2052 node = nextNode; 2053 } 2054 else 2055 { 2056 for (; i < len; i++) 2057 { 2058 Node newNode = new Node(); 2059 if (m_lowerCaseOnly) 2060 { 2061 // put this value into the tree only with a lower case key 2062 node.m_nextChar[Character.toLowerCase( 2063 key.charAt(i))] = 2064 newNode; 2065 } 2066 else 2067 { 2068 // put this value into the tree with a case insensitive key 2069 node.m_nextChar[Character.toUpperCase( 2070 key.charAt(i))] = 2071 newNode; 2072 node.m_nextChar[Character.toLowerCase( 2073 key.charAt(i))] = 2074 newNode; 2075 } 2076 node = newNode; 2077 } 2078 break; 2079 } 2080 } 2081 2082 Object ret = node.m_Value; 2083 2084 node.m_Value = value; 2085 2086 return ret; 2087 } 2088 2089 /** 2090 * Get an object that matches the key. 2091 * 2092 * @param key must be a 7-bit ASCII string 2093 * 2094 * @return The object that matches the key, or null. 2095 */ 2096 public Object get(final String key) 2097 { 2098 2099 final int len = key.length(); 2100 2101 /* If the name is too long, we won't find it, this also keeps us 2102 * from overflowing m_charBuffer 2103 */ 2104 if (m_charBuffer.length < len) 2105 return null; 2106 2107 Node node = m_Root; 2108 switch (len) // optimize the look up based on the number of chars 2109 { 2110 // case 0 looks silly, but the generated bytecode runs 2111 // faster for lookup of elements of length 2 with this in 2112 // and a fair bit faster. Don't know why. 2113 case 0 : 2114 { 2115 return null; 2116 } 2117 2118 case 1 : 2119 { 2120 final char ch = key.charAt(0); 2121 if (ch < ALPHA_SIZE) 2122 { 2123 node = node.m_nextChar[ch]; 2124 if (node != null) 2125 return node.m_Value; 2126 } 2127 return null; 2128 } 2129 // comment out case 2 because the default is faster 2130 // case 2 : 2131 // { 2132 // final char ch0 = key.charAt(0); 2133 // final char ch1 = key.charAt(1); 2134 // if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE) 2135 // { 2136 // node = node.m_nextChar[ch0]; 2137 // if (node != null) 2138 // { 2139 // 2140 // if (ch1 < ALPHA_SIZE) 2141 // { 2142 // node = node.m_nextChar[ch1]; 2143 // if (node != null) 2144 // return node.m_Value; 2145 // } 2146 // } 2147 // } 2148 // return null; 2149 // } 2150 default : 2151 { 2152 for (int i = 0; i < len; i++) 2153 { 2154 // A thread-safe way to loop over the characters 2155 final char ch = key.charAt(i); 2156 if (ALPHA_SIZE <= ch) 2157 { 2158 // the key is not 7-bit ASCII so we won't find it here 2159 return null; 2160 } 2161 2162 node = node.m_nextChar[ch]; 2163 if (node == null) 2164 return null; 2165 } 2166 2167 return node.m_Value; 2168 } 2169 } 2170 } 2171 2172 /** 2173 * The node representation for the trie. 2174 * @xsl.usage internal 2175 */ 2176 private class Node 2177 { 2178 2179 /** 2180 * Constructor, creates a Node[ALPHA_SIZE]. 2181 */ 2182 Node() 2183 { 2184 m_nextChar = new Node[ALPHA_SIZE]; 2185 m_Value = null; 2186 } 2187 2188 /** The next nodes. */ 2189 final Node m_nextChar[]; 2190 2191 /** The value. */ 2192 Object m_Value; 2193 } 2194 /** 2195 * Construct the trie from another Trie. 2196 * Both the existing Trie and this new one share the same table for 2197 * lookup, and it is assumed that the table is fully populated and 2198 * not changing anymore. 2199 * 2200 * @param existingTrie the Trie that this one is a copy of. 2201 */ 2202 public Trie(Trie existingTrie) 2203 { 2204 // copy some fields from the existing Trie into this one. 2205 m_Root = existingTrie.m_Root; 2206 m_lowerCaseOnly = existingTrie.m_lowerCaseOnly; 2207 2208 // get a buffer just big enough to hold the longest key in the table. 2209 int max = existingTrie.getLongestKeyLength(); 2210 m_charBuffer = new char[max]; 2211 } 2212 2213 /** 2214 * Get an object that matches the key. 2215 * This method is faster than get(), but is not thread-safe. 2216 * 2217 * @param key must be a 7-bit ASCII string 2218 * 2219 * @return The object that matches the key, or null. 2220 */ 2221 public Object get2(final String key) 2222 { 2223 2224 final int len = key.length(); 2225 2226 /* If the name is too long, we won't find it, this also keeps us 2227 * from overflowing m_charBuffer 2228 */ 2229 if (m_charBuffer.length < len) 2230 return null; 2231 2232 Node node = m_Root; 2233 switch (len) // optimize the look up based on the number of chars 2234 { 2235 // case 0 looks silly, but the generated bytecode runs 2236 // faster for lookup of elements of length 2 with this in 2237 // and a fair bit faster. Don't know why. 2238 case 0 : 2239 { 2240 return null; 2241 } 2242 2243 case 1 : 2244 { 2245 final char ch = key.charAt(0); 2246 if (ch < ALPHA_SIZE) 2247 { 2248 node = node.m_nextChar[ch]; 2249 if (node != null) 2250 return node.m_Value; 2251 } 2252 return null; 2253 } 2254 default : 2255 { 2256 /* Copy string into array. This is not thread-safe because 2257 * it modifies the contents of m_charBuffer. If multiple 2258 * threads were to use this Trie they all would be 2259 * using this same array (not good). So this 2260 * method is not thread-safe, but it is faster because 2261 * converting to a char[] and looping over elements of 2262 * the array is faster than a String's charAt(i). 2263 */ 2264 key.getChars(0, len, m_charBuffer, 0); 2265 2266 for (int i = 0; i < len; i++) 2267 { 2268 final char ch = m_charBuffer[i]; 2269 if (ALPHA_SIZE <= ch) 2270 { 2271 // the key is not 7-bit ASCII so we won't find it here 2272 return null; 2273 } 2274 2275 node = node.m_nextChar[ch]; 2276 if (node == null) 2277 return null; 2278 } 2279 2280 return node.m_Value; 2281 } 2282 } 2283 } 2284 2285 /** 2286 * Get the length of the longest key used in the table. 2287 */ 2288 public int getLongestKeyLength() 2289 { 2290 return m_charBuffer.length; 2291 } 2292 } 2293} 2294