1/* tidylib.c -- internal library definitions 2 3 (c) 1998-2006 (W3C) MIT, ERCIM, Keio University 4 See tidy.h for the copyright notice. 5 6 CVS Info : 7 8 $Author: iccir $ 9 $Date: 2007/01/30 23:46:52 $ 10 $Revision: 1.3 $ 11 12 Defines HTML Tidy API implemented by tidy library. 13 14 Very rough initial cut for discussion purposes. 15 16 Public interface is const-correct and doesn't explicitly depend 17 on any globals. Thus, thread-safety may be introduced w/out 18 changing the interface. 19 20 Looking ahead to a C++ wrapper, C functions always pass 21 this-equivalent as 1st arg. 22 23 Created 2001-05-20 by Charles Reitzel 24 25*/ 26 27#include <errno.h> 28 29#include "tidy-int.h" 30#include "parser.h" 31#include "clean.h" 32#include "config.h" 33#include "message.h" 34#include "pprint.h" 35#include "entities.h" 36#include "tmbstr.h" 37#include "utf8.h" 38#include "mappedio.h" 39 40#ifdef TIDY_WIN32_MLANG_SUPPORT 41#include "win32tc.h" 42#endif 43 44#ifdef NEVER 45TidyDocImpl* tidyDocToImpl( TidyDoc tdoc ) 46{ 47 return (TidyDocImpl*) tdoc; 48} 49TidyDoc tidyImplToDoc( TidyDocImpl* impl ) 50{ 51 return (TidyDoc) impl; 52} 53 54Node* tidyNodeToImpl( TidyNode tnod ) 55{ 56 return (Node*) tnod; 57} 58TidyNode tidyImplToNode( Node* node ) 59{ 60 return (TidyNode) node; 61} 62 63AttVal* tidyAttrToImpl( TidyAttr tattr ) 64{ 65 return (AttVal*) tattr; 66} 67TidyAttr tidyImplToAttr( AttVal* attval ) 68{ 69 return (TidyAttr) attval; 70} 71 72const TidyOptionImpl* tidyOptionToImpl( TidyOption topt ) 73{ 74 return (const TidyOptionImpl*) topt; 75} 76TidyOption tidyImplToOption( const TidyOptionImpl* option ) 77{ 78 return (TidyOption) option; 79} 80#endif 81 82/* Tidy public interface 83** 84** Most functions return an integer: 85** 86** 0 -> SUCCESS 87** >0 -> WARNING 88** <0 -> ERROR 89** 90*/ 91 92TidyDoc TIDY_CALL tidyCreate(void) 93{ 94 TidyDocImpl* impl = tidyDocCreate(); 95 return tidyImplToDoc( impl ); 96} 97 98void TIDY_CALL tidyRelease( TidyDoc tdoc ) 99{ 100 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 101 tidyDocRelease( impl ); 102} 103 104TidyDocImpl* tidyDocCreate(void) 105{ 106 TidyDocImpl* doc = (TidyDocImpl*)MemAlloc( sizeof(TidyDocImpl) ); 107 ClearMemory( doc, sizeof(*doc) ); 108 109 TY_(InitMap)(); 110 TY_(InitTags)( doc ); 111 TY_(InitAttrs)( doc ); 112 TY_(InitConfig)( doc ); 113 TY_(InitPrintBuf)( doc ); 114 115 /* By default, wire tidy messages to standard error. 116 ** Document input will be set by parsing routines. 117 ** Document output will be set by pretty print routines. 118 ** Config input will be set by config parsing routines. 119 ** But we need to start off with a way to report errors. 120 */ 121 doc->errout = TY_(StdErrOutput)(); 122 return doc; 123} 124 125void tidyDocRelease( TidyDocImpl* doc ) 126{ 127 /* doc in/out opened and closed by parse/print routines */ 128 if ( doc ) 129 { 130 assert( doc->docIn == NULL ); 131 assert( doc->docOut == NULL ); 132 133 TY_(ReleaseStreamOut)( doc->errout ); 134 doc->errout = NULL; 135 136 TY_(FreePrintBuf)( doc ); 137 TY_(FreeLexer)( doc ); 138 TY_(FreeNode)(doc, &doc->root); 139 ClearMemory(&doc->root, sizeof(Node)); 140 141 if (doc->givenDoctype) 142 MemFree(doc->givenDoctype); 143 144 TY_(FreeConfig)( doc ); 145 TY_(FreeAttrTable)( doc ); 146 TY_(FreeTags)( doc ); 147 MemFree( doc ); 148 } 149} 150 151/* Let application store a chunk of data w/ each Tidy tdocance. 152** Useful for callbacks. 153*/ 154void TIDY_CALL tidySetAppData( TidyDoc tdoc, void* appData ) 155{ 156 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 157 if ( impl ) 158 impl->appData = appData; 159} 160void* TIDY_CALL tidyGetAppData( TidyDoc tdoc ) 161{ 162 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 163 if ( impl ) 164 return impl->appData; 165 return NULL; 166} 167 168ctmbstr TIDY_CALL tidyReleaseDate(void) 169{ 170 return TY_(ReleaseDate)(); 171} 172 173 174/* Get/set configuration options 175*/ 176Bool TIDY_CALL tidySetOptionCallback( TidyDoc tdoc, TidyOptCallback pOptCallback ) 177{ 178 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 179 if ( impl ) 180 { 181 impl->pOptCallback = pOptCallback; 182 return yes; 183 } 184 return no; 185} 186 187 188int TIDY_CALL tidyLoadConfig( TidyDoc tdoc, ctmbstr cfgfil ) 189{ 190 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 191 if ( impl ) 192 return TY_(ParseConfigFile)( impl, cfgfil ); 193 return -EINVAL; 194} 195 196int TIDY_CALL tidyLoadConfigEnc( TidyDoc tdoc, ctmbstr cfgfil, ctmbstr charenc ) 197{ 198 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 199 if ( impl ) 200 return TY_(ParseConfigFileEnc)( impl, cfgfil, charenc ); 201 return -EINVAL; 202} 203 204int TIDY_CALL tidySetCharEncoding( TidyDoc tdoc, ctmbstr encnam ) 205{ 206 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 207 if ( impl ) 208 { 209 int enc = TY_(CharEncodingId)( encnam ); 210 if ( enc >= 0 && TY_(AdjustCharEncoding)(impl, enc) ) 211 return 0; 212 213 TY_(ReportBadArgument)( impl, "char-encoding" ); 214 } 215 return -EINVAL; 216} 217 218int TIDY_CALL tidySetInCharEncoding( TidyDoc tdoc, ctmbstr encnam ) 219{ 220 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 221 if ( impl ) 222 { 223 int enc = TY_(CharEncodingId)( encnam ); 224 if ( enc >= 0 && TY_(SetOptionInt)( impl, TidyInCharEncoding, enc ) ) 225 return 0; 226 227 TY_(ReportBadArgument)( impl, "in-char-encoding" ); 228 } 229 return -EINVAL; 230} 231 232int TIDY_CALL tidySetOutCharEncoding( TidyDoc tdoc, ctmbstr encnam ) 233{ 234 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 235 if ( impl ) 236 { 237 int enc = TY_(CharEncodingId)( encnam ); 238 if ( enc >= 0 && TY_(SetOptionInt)( impl, TidyOutCharEncoding, enc ) ) 239 return 0; 240 241 TY_(ReportBadArgument)( impl, "out-char-encoding" ); 242 } 243 return -EINVAL; 244} 245 246TidyOptionId TIDY_CALL tidyOptGetIdForName( ctmbstr optnam ) 247{ 248 const TidyOptionImpl* option = TY_(lookupOption)( optnam ); 249 if ( option ) 250 return option->id; 251 return N_TIDY_OPTIONS; /* Error */ 252} 253 254TidyIterator TIDY_CALL tidyGetOptionList( TidyDoc tdoc ) 255{ 256 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 257 if ( impl ) 258 return TY_(getOptionList)( impl ); 259 return (TidyIterator) -1; 260} 261 262TidyOption TIDY_CALL tidyGetNextOption( TidyDoc tdoc, TidyIterator* pos ) 263{ 264 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 265 const TidyOptionImpl* option = NULL; 266 if ( impl ) 267 option = TY_(getNextOption)( impl, pos ); 268 else if ( pos ) 269 *pos = 0; 270 return tidyImplToOption( option ); 271} 272 273 274TidyOption TIDY_CALL tidyGetOption( TidyDoc ARG_UNUSED(tdoc), TidyOptionId optId ) 275{ 276 const TidyOptionImpl* option = TY_(getOption)( optId ); 277 return tidyImplToOption( option ); 278} 279TidyOption TIDY_CALL tidyGetOptionByName( TidyDoc ARG_UNUSED(doc), ctmbstr optnam ) 280{ 281 const TidyOptionImpl* option = TY_(lookupOption)( optnam ); 282 return tidyImplToOption( option ); 283} 284 285TidyOptionId TIDY_CALL tidyOptGetId( TidyOption topt ) 286{ 287 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 288 if ( option ) 289 return option->id; 290 return N_TIDY_OPTIONS; 291} 292ctmbstr TIDY_CALL tidyOptGetName( TidyOption topt ) 293{ 294 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 295 if ( option ) 296 return option->name; 297 return NULL; 298} 299TidyOptionType TIDY_CALL tidyOptGetType( TidyOption topt ) 300{ 301 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 302 if ( option ) 303 return option->type; 304 return (TidyOptionType) -1; 305} 306TidyConfigCategory TIDY_CALL tidyOptGetCategory( TidyOption topt ) 307{ 308 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 309 if ( option ) 310 return option->category; 311 return (TidyConfigCategory) -1; 312} 313ctmbstr TIDY_CALL tidyOptGetDefault( TidyOption topt ) 314{ 315 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 316 if ( option && option->type == TidyString ) 317 return (ctmbstr) option->dflt; 318 return NULL; 319} 320ulong TIDY_CALL tidyOptGetDefaultInt( TidyOption topt ) 321{ 322 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 323 if ( option && option->type != TidyString ) 324 return option->dflt; 325 return ~0U; 326} 327Bool TIDY_CALL tidyOptGetDefaultBool( TidyOption topt ) 328{ 329 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 330 if ( option && option->type != TidyString ) 331 return ( option->dflt ? yes : no ); 332 return no; 333} 334Bool TIDY_CALL tidyOptIsReadOnly( TidyOption topt ) 335{ 336 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 337 if ( option ) 338 return ( option->parser == NULL ); 339 return yes; 340} 341 342 343TidyIterator TIDY_CALL tidyOptGetPickList( TidyOption topt ) 344{ 345 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 346 if ( option ) 347 return TY_(getOptionPickList)( option ); 348 return (TidyIterator) -1; 349} 350ctmbstr TIDY_CALL tidyOptGetNextPick( TidyOption topt, TidyIterator* pos ) 351{ 352 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 353 if ( option ) 354 return TY_(getNextOptionPick)( option, pos ); 355 return NULL; 356} 357 358 359ctmbstr TIDY_CALL tidyOptGetValue( TidyDoc tdoc, TidyOptionId optId ) 360{ 361 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 362 ctmbstr optval = NULL; 363 if ( impl ) 364 optval = cfgStr( impl, optId ); 365 return optval; 366} 367Bool TIDY_CALL tidyOptSetValue( TidyDoc tdoc, TidyOptionId optId, ctmbstr val ) 368{ 369 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 370 if ( impl ) 371 return TY_(ParseConfigValue)( impl, optId, val ); 372 return no; 373} 374Bool TIDY_CALL tidyOptParseValue( TidyDoc tdoc, ctmbstr optnam, ctmbstr val ) 375{ 376 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 377 if ( impl ) 378 return TY_(ParseConfigOption)( impl, optnam, val ); 379 return no; 380} 381 382ulong TIDY_CALL tidyOptGetInt( TidyDoc tdoc, TidyOptionId optId ) 383{ 384 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 385 ulong opti = 0; 386 if ( impl ) 387 opti = cfg( impl, optId ); 388 return opti; 389} 390 391Bool TIDY_CALL tidyOptSetInt( TidyDoc tdoc, TidyOptionId optId, ulong val ) 392{ 393 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 394 if ( impl ) 395 return TY_(SetOptionInt)( impl, optId, val ); 396 return no; 397} 398 399Bool TIDY_CALL tidyOptGetBool( TidyDoc tdoc, TidyOptionId optId ) 400{ 401 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 402 Bool optb = no; 403 if ( impl ) 404 { 405 const TidyOptionImpl* option = TY_(getOption)( optId ); 406 if ( option ) 407 { 408 optb = cfgBool( impl, optId ); 409 } 410 } 411 return optb; 412} 413 414Bool TIDY_CALL tidyOptSetBool( TidyDoc tdoc, TidyOptionId optId, Bool val ) 415{ 416 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 417 if ( impl ) 418 return TY_(SetOptionBool)( impl, optId, val ); 419 return no; 420} 421 422ctmbstr TIDY_CALL tidyOptGetEncName( TidyDoc tdoc, TidyOptionId optId ) 423{ 424 uint enc = tidyOptGetInt( tdoc, optId ); 425 return TY_(CharEncodingOptName)( enc ); 426} 427 428ctmbstr TIDY_CALL tidyOptGetCurrPick( TidyDoc tdoc, TidyOptionId optId ) 429{ 430 const TidyOptionImpl* option = TY_(getOption)( optId ); 431 if ( option && option->pickList ) 432 { 433 uint ix, pick = tidyOptGetInt( tdoc, optId ); 434 const ctmbstr* pL = option->pickList; 435 for ( ix=0; *pL && ix < pick; ++ix ) 436 ++pL; 437 if ( *pL ) 438 return *pL; 439 } 440 return NULL; 441} 442 443 444TidyIterator TIDY_CALL tidyOptGetDeclTagList( TidyDoc tdoc ) 445{ 446 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 447 TidyIterator declIter = 0; 448 if ( impl ) 449 declIter = TY_(GetDeclaredTagList)( impl ); 450 return declIter; 451} 452 453ctmbstr TIDY_CALL tidyOptGetNextDeclTag( TidyDoc tdoc, TidyOptionId optId, 454 TidyIterator* iter ) 455{ 456 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 457 ctmbstr tagnam = NULL; 458 if ( impl ) 459 { 460 UserTagType tagtyp = tagtype_null; 461 if ( optId == TidyInlineTags ) 462 tagtyp = tagtype_inline; 463 else if ( optId == TidyBlockTags ) 464 tagtyp = tagtype_block; 465 else if ( optId == TidyEmptyTags ) 466 tagtyp = tagtype_empty; 467 else if ( optId == TidyPreTags ) 468 tagtyp = tagtype_pre; 469 if ( tagtyp != tagtype_null ) 470 tagnam = TY_(GetNextDeclaredTag)( impl, tagtyp, iter ); 471 } 472 return tagnam; 473} 474 475ctmbstr TIDY_CALL tidyOptGetDoc( TidyDoc ARG_UNUSED(tdoc), TidyOption opt ) 476{ 477 const TidyOptionId optId = tidyOptGetId( opt ); 478 const TidyOptionDoc* docDesc = TY_(OptGetDocDesc)( optId ); 479 return docDesc ? docDesc->doc : NULL; 480} 481 482TidyIterator TIDY_CALL tidyOptGetDocLinksList( TidyDoc ARG_UNUSED(tdoc), TidyOption opt ) 483{ 484 const TidyOptionId optId = tidyOptGetId( opt ); 485 const TidyOptionDoc* docDesc = TY_(OptGetDocDesc)( optId ); 486 if (docDesc && docDesc->links) 487 return (TidyIterator)docDesc->links; 488 return (TidyIterator)NULL; 489} 490 491TidyOption TIDY_CALL tidyOptGetNextDocLinks( TidyDoc tdoc, TidyIterator* pos ) 492{ 493 const TidyOptionId* curr = (TidyOptionId *)*pos; 494 TidyOption opt; 495 496 if (*curr == TidyUnknownOption) 497 { 498 *pos = (TidyIterator)NULL; 499 return (TidyOption)0; 500 } 501 opt = tidyGetOption(tdoc, *curr); 502 curr++; 503 *pos = (*curr == TidyUnknownOption ) ? 504 (TidyIterator)NULL:(TidyIterator)curr; 505 return opt; 506} 507 508int TIDY_CALL tidyOptSaveFile( TidyDoc tdoc, ctmbstr cfgfil ) 509{ 510 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 511 if ( impl ) 512 return TY_(SaveConfigFile)( impl, cfgfil ); 513 return -EINVAL; 514} 515 516int TIDY_CALL tidyOptSaveSink( TidyDoc tdoc, TidyOutputSink* sink ) 517{ 518 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 519 if ( impl ) 520 return TY_(SaveConfigSink)( impl, sink ); 521 return -EINVAL; 522} 523 524Bool TIDY_CALL tidyOptSnapshot( TidyDoc tdoc ) 525{ 526 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 527 if ( impl ) 528 { 529 TY_(TakeConfigSnapshot)( impl ); 530 return yes; 531 } 532 return no; 533} 534Bool TIDY_CALL tidyOptResetToSnapshot( TidyDoc tdoc ) 535{ 536 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 537 if ( impl ) 538 { 539 TY_(ResetConfigToSnapshot)( impl ); 540 return yes; 541 } 542 return no; 543} 544Bool TIDY_CALL tidyOptResetAllToDefault( TidyDoc tdoc ) 545{ 546 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 547 if ( impl ) 548 { 549 TY_(ResetConfigToDefault)( impl ); 550 return yes; 551 } 552 return no; 553} 554 555Bool TIDY_CALL tidyOptResetToDefault( TidyDoc tdoc, TidyOptionId optId ) 556{ 557 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 558 if ( impl ) 559 return TY_(ResetOptionToDefault)( impl, optId ); 560 return no; 561} 562 563Bool TIDY_CALL tidyOptDiffThanDefault( TidyDoc tdoc ) 564{ 565 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 566 if ( impl ) 567 return TY_(ConfigDiffThanDefault)( impl ); 568 return no; 569} 570Bool TIDY_CALL tidyOptDiffThanSnapshot( TidyDoc tdoc ) 571{ 572 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 573 if ( impl ) 574 return TY_(ConfigDiffThanSnapshot)( impl ); 575 return no; 576} 577 578Bool TIDY_CALL tidyOptCopyConfig( TidyDoc to, TidyDoc from ) 579{ 580 TidyDocImpl* docTo = tidyDocToImpl( to ); 581 TidyDocImpl* docFrom = tidyDocToImpl( from ); 582 if ( docTo && docFrom ) 583 { 584 TY_(CopyConfig)( docTo, docFrom ); 585 return yes; 586 } 587 return no; 588} 589 590 591/* I/O and Message handling interface 592** 593** By default, Tidy will define, create and use 594** tdocances of input and output handlers for 595** standard C buffered I/O (i.e. FILE* stdin, 596** FILE* stdout and FILE* stderr for content 597** input, content output and diagnostic output, 598** respectively. A FILE* cfgFile input handler 599** will be used for config files. Command line 600** options will just be set directly. 601*/ 602 603/* Use TidyReportFilter to filter messages by diagnostic level: 604** info, warning, etc. Just set diagnostic output 605** handler to redirect all diagnostics output. Return true 606** to proceed with output, false to cancel. 607*/ 608Bool TIDY_CALL tidySetReportFilter( TidyDoc tdoc, TidyReportFilter filt ) 609{ 610 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 611 if ( impl ) 612 { 613 impl->mssgFilt = filt; 614 return yes; 615 } 616 return no; 617} 618 619#if 0 /* Not yet */ 620int tidySetContentOutputSink( TidyDoc tdoc, TidyOutputSink* outp ) 621{ 622 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 623 if ( impl ) 624 { 625 impl->docOut = outp; 626 return 0; 627 } 628 return -EINVAL; 629} 630int tidySetDiagnosticOutputSink( TidyDoc tdoc, TidyOutputSink* outp ) 631{ 632 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 633 if ( impl ) 634 { 635 impl->msgOut = outp; 636 return 0; 637 } 638 return -EINVAL; 639} 640 641 642/* Library helpers 643*/ 644cmbstr tidyLookupMessage( TidyDoc tdoc, int errorNo ) 645{ 646 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 647 cmbstr mssg = NULL; 648 if ( impl ) 649 mssg = tidyMessage_Lookup( impl->messages, errorNo ); 650 return mssg; 651} 652#endif 653 654 655FILE* TIDY_CALL tidySetErrorFile( TidyDoc tdoc, ctmbstr errfilnam ) 656{ 657 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 658 if ( impl ) 659 { 660 FILE* errout = fopen( errfilnam, "wb" ); 661 if ( errout ) 662 { 663 uint outenc = cfg( impl, TidyOutCharEncoding ); 664 uint nl = cfg( impl, TidyNewline ); 665 TY_(ReleaseStreamOut)( impl->errout ); 666 impl->errout = TY_(FileOutput)( errout, outenc, nl ); 667 return errout; 668 } 669 else /* Emit message to current error sink */ 670 TY_(FileError)( impl, errfilnam, TidyError ); 671 } 672 return NULL; 673} 674 675int TIDY_CALL tidySetErrorBuffer( TidyDoc tdoc, TidyBuffer* errbuf ) 676{ 677 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 678 if ( impl ) 679 { 680 uint outenc = cfg( impl, TidyOutCharEncoding ); 681 uint nl = cfg( impl, TidyNewline ); 682 TY_(ReleaseStreamOut)( impl->errout ); 683 impl->errout = TY_(BufferOutput)( errbuf, outenc, nl ); 684 return ( impl->errout ? 0 : -ENOMEM ); 685 } 686 return -EINVAL; 687} 688 689int TIDY_CALL tidySetErrorSink( TidyDoc tdoc, TidyOutputSink* sink ) 690{ 691 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 692 if ( impl ) 693 { 694 uint outenc = cfg( impl, TidyOutCharEncoding ); 695 uint nl = cfg( impl, TidyNewline ); 696 TY_(ReleaseStreamOut)( impl->errout ); 697 impl->errout = TY_(UserOutput)( sink, outenc, nl ); 698 return ( impl->errout ? 0 : -ENOMEM ); 699 } 700 return -EINVAL; 701} 702 703 704/* Document info */ 705int TIDY_CALL tidyStatus( TidyDoc tdoc ) 706{ 707 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 708 int tidyStat = -EINVAL; 709 if ( impl ) 710 tidyStat = tidyDocStatus( impl ); 711 return tidyStat; 712} 713int TIDY_CALL tidyDetectedHtmlVersion( TidyDoc ARG_UNUSED(tdoc) ) 714{ 715/* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */ 716 return 0; 717} 718Bool TIDY_CALL tidyDetectedXhtml( TidyDoc ARG_UNUSED(tdoc) ) 719{ 720/* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */ 721 return no; 722} 723Bool TIDY_CALL tidyDetectedGenericXml( TidyDoc ARG_UNUSED(tdoc) ) 724{ 725/* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */ 726 return no; 727} 728 729uint TIDY_CALL tidyErrorCount( TidyDoc tdoc ) 730{ 731 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 732 uint count = 0xFFFFFFFF; 733 if ( impl ) 734 count = impl->errors; 735 return count; 736} 737uint TIDY_CALL tidyWarningCount( TidyDoc tdoc ) 738{ 739 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 740 uint count = 0xFFFFFFFF; 741 if ( impl ) 742 count = impl->warnings; 743 return count; 744} 745uint TIDY_CALL tidyAccessWarningCount( TidyDoc tdoc ) 746{ 747 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 748 uint count = 0xFFFFFFFF; 749 if ( impl ) 750 count = impl->accessErrors; 751 return count; 752} 753uint TIDY_CALL tidyConfigErrorCount( TidyDoc tdoc ) 754{ 755 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 756 uint count = 0xFFFFFFFF; 757 if ( impl ) 758 count = impl->optionErrors; 759 return count; 760} 761 762 763/* Error reporting functions 764*/ 765void TIDY_CALL tidyErrorSummary( TidyDoc tdoc ) 766{ 767 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 768 if ( impl ) 769 TY_(ErrorSummary)( impl ); 770} 771void TIDY_CALL tidyGeneralInfo( TidyDoc tdoc ) 772{ 773 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 774 if ( impl ) 775 TY_(GeneralInfo)( impl ); 776} 777 778 779/* I/O Functions 780** 781** Initial version supports only whole-file operations. 782** Do not expose Tidy StreamIn or Out data structures - yet. 783*/ 784 785/* Parse/load Functions 786** 787** HTML/XHTML version determined from input. 788*/ 789int TIDY_CALL tidyParseFile( TidyDoc tdoc, ctmbstr filnam ) 790{ 791 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 792 return tidyDocParseFile( doc, filnam ); 793} 794int TIDY_CALL tidyParseStdin( TidyDoc tdoc ) 795{ 796 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 797 return tidyDocParseStdin( doc ); 798} 799int TIDY_CALL tidyParseString( TidyDoc tdoc, ctmbstr content ) 800{ 801 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 802 return tidyDocParseString( doc, content ); 803} 804int TIDY_CALL tidyParseBuffer( TidyDoc tdoc, TidyBuffer* inbuf ) 805{ 806 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 807 return tidyDocParseBuffer( doc, inbuf ); 808} 809int TIDY_CALL tidyParseSource( TidyDoc tdoc, TidyInputSource* source ) 810{ 811 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 812 return tidyDocParseSource( doc, source ); 813} 814 815 816int tidyDocParseFile( TidyDocImpl* doc, ctmbstr filnam ) 817{ 818#ifdef _WIN32 819 return TY_(DocParseFileWithMappedFile)( doc, filnam ); 820#else 821 int status = -ENOENT; 822 FILE* fin = fopen( filnam, "rb" ); 823 824#if PRESERVE_FILE_TIMES 825 struct stat sbuf = {0}; 826 /* get last modified time */ 827 ClearMemory( &doc->filetimes, sizeof(doc->filetimes) ); 828 if ( fin && cfgBool(doc,TidyKeepFileTimes) && 829 fstat(fileno(fin), &sbuf) != -1 ) 830 { 831 doc->filetimes.actime = sbuf.st_atime; 832 doc->filetimes.modtime = sbuf.st_mtime; 833 } 834#endif 835 836 if ( fin ) 837 { 838 StreamIn* in = TY_(FileInput)( doc, fin, cfg( doc, TidyInCharEncoding )); 839 if ( !in ) 840 { 841 fclose( fin ); 842 return status; 843 } 844 status = tidyDocParseStream( doc, in ); 845 TY_(freeFileSource)(&in->source, yes); 846 TY_(freeStreamIn)(in); 847 } 848 else /* Error message! */ 849 TY_(FileError)( doc, filnam, TidyError ); 850 return status; 851#endif 852} 853 854int tidyDocParseStdin( TidyDocImpl* doc ) 855{ 856 StreamIn* in = TY_(FileInput)( doc, stdin, cfg( doc, TidyInCharEncoding )); 857 int status = tidyDocParseStream( doc, in ); 858 TY_(freeStreamIn)(in); 859 return status; 860} 861 862int tidyDocParseBuffer( TidyDocImpl* doc, TidyBuffer* inbuf ) 863{ 864 int status = -EINVAL; 865 if ( inbuf ) 866 { 867 StreamIn* in = TY_(BufferInput)( doc, inbuf, cfg( doc, TidyInCharEncoding )); 868 status = tidyDocParseStream( doc, in ); 869 TY_(freeStreamIn)(in); 870 } 871 return status; 872} 873 874int tidyDocParseString( TidyDocImpl* doc, ctmbstr content ) 875{ 876 int status = -EINVAL; 877 TidyBuffer inbuf = {0}; 878 StreamIn* in = NULL; 879 880 if ( content ) 881 { 882 tidyBufAttach( &inbuf, (byte*)content, TY_(tmbstrlen)(content)+1 ); 883 in = TY_(BufferInput)( doc, &inbuf, cfg( doc, TidyInCharEncoding )); 884 status = tidyDocParseStream( doc, in ); 885 tidyBufDetach( &inbuf ); 886 TY_(freeStreamIn)(in); 887 } 888 return status; 889} 890 891int tidyDocParseSource( TidyDocImpl* doc, TidyInputSource* source ) 892{ 893 StreamIn* in = TY_(UserInput)( doc, source, cfg( doc, TidyInCharEncoding )); 894 int status = tidyDocParseStream( doc, in ); 895 TY_(freeStreamIn)(in); 896 return status; 897} 898 899 900/* Print/save Functions 901** 902*/ 903int TIDY_CALL tidySaveFile( TidyDoc tdoc, ctmbstr filnam ) 904{ 905 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 906 return tidyDocSaveFile( doc, filnam ); 907} 908int TIDY_CALL tidySaveStdout( TidyDoc tdoc ) 909{ 910 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 911 return tidyDocSaveStdout( doc ); 912} 913int TIDY_CALL tidySaveString( TidyDoc tdoc, tmbstr buffer, uint* buflen ) 914{ 915 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 916 return tidyDocSaveString( doc, buffer, buflen ); 917} 918int TIDY_CALL tidySaveBuffer( TidyDoc tdoc, TidyBuffer* outbuf ) 919{ 920 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 921 return tidyDocSaveBuffer( doc, outbuf ); 922} 923int TIDY_CALL tidySaveSink( TidyDoc tdoc, TidyOutputSink* sink ) 924{ 925 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 926 return tidyDocSaveSink( doc, sink ); 927} 928 929int tidyDocSaveFile( TidyDocImpl* doc, ctmbstr filnam ) 930{ 931 int status = -ENOENT; 932 FILE* fout = NULL; 933 934 /* Don't zap input file if no output */ 935 if ( doc->errors > 0 && 936 cfgBool(doc, TidyWriteBack) && !cfgBool(doc, TidyForceOutput) ) 937 status = tidyDocStatus( doc ); 938 else 939 fout = fopen( filnam, "wb" ); 940 941 if ( fout ) 942 { 943 uint outenc = cfg( doc, TidyOutCharEncoding ); 944 uint nl = cfg( doc, TidyNewline ); 945 StreamOut* out = TY_(FileOutput)( fout, outenc, nl ); 946 947 status = tidyDocSaveStream( doc, out ); 948 949 fclose( fout ); 950 MemFree( out ); 951 952#if PRESERVE_FILE_TIMES 953 if ( doc->filetimes.actime ) 954 { 955 /* set file last accessed/modified times to original values */ 956 utime( filnam, &doc->filetimes ); 957 ClearMemory( &doc->filetimes, sizeof(doc->filetimes) ); 958 } 959#endif /* PRESERVFILETIMES */ 960 } 961 if ( status < 0 ) /* Error message! */ 962 TY_(FileError)( doc, filnam, TidyError ); 963 return status; 964} 965 966 967 968/* Note, _setmode() does NOT work on Win2K Pro w/ VC++ 6.0 SP3. 969** The code has been left in in case it works w/ other compilers 970** or operating systems. If stdout is in Text mode, be aware that 971** it will garble UTF16 documents. In text mode, when it encounters 972** a single byte of value 10 (0xA), it will insert a single byte 973** value 13 (0xD) just before it. This has the effect of garbling 974** the entire document. 975*/ 976 977#if !defined(NO_SETMODE_SUPPORT) 978 979#if defined(_WIN32) || defined(OS2_OS) 980#include <fcntl.h> 981#include <io.h> 982#endif 983 984#endif 985 986int tidyDocSaveStdout( TidyDocImpl* doc ) 987{ 988#if !defined(NO_SETMODE_SUPPORT) 989 990#if defined(_WIN32) || defined(OS2_OS) 991 int oldstdoutmode = -1, oldstderrmode = -1; 992#endif 993 994#endif 995 int status = 0; 996 uint outenc = cfg( doc, TidyOutCharEncoding ); 997 uint nl = cfg( doc, TidyNewline ); 998 StreamOut* out = TY_(FileOutput)( stdout, outenc, nl ); 999 1000#if !defined(NO_SETMODE_SUPPORT) 1001 1002#if defined(_WIN32) || defined(OS2_OS) 1003 oldstdoutmode = setmode( fileno(stdout), _O_BINARY ); 1004 oldstderrmode = setmode( fileno(stderr), _O_BINARY ); 1005#endif 1006 1007#endif 1008 1009 if ( 0 == status ) 1010 status = tidyDocSaveStream( doc, out ); 1011 1012 fflush(stdout); 1013 fflush(stderr); 1014 1015#if !defined(NO_SETMODE_SUPPORT) 1016 1017#if defined(_WIN32) || defined(OS2_OS) 1018 if ( oldstdoutmode != -1 ) 1019 oldstdoutmode = setmode( fileno(stdout), oldstdoutmode ); 1020 if ( oldstderrmode != -1 ) 1021 oldstderrmode = setmode( fileno(stderr), oldstderrmode ); 1022#endif 1023 1024#endif 1025 1026 MemFree( out ); 1027 return status; 1028} 1029 1030int tidyDocSaveString( TidyDocImpl* doc, tmbstr buffer, uint* buflen ) 1031{ 1032 uint outenc = cfg( doc, TidyOutCharEncoding ); 1033 uint nl = cfg( doc, TidyNewline ); 1034 TidyBuffer outbuf = {0}; 1035 1036 StreamOut* out = TY_(BufferOutput)( &outbuf, outenc, nl ); 1037 int status = tidyDocSaveStream( doc, out ); 1038 1039 if ( outbuf.size > *buflen ) 1040 status = -ENOMEM; 1041 else 1042 memcpy( buffer, outbuf.bp, outbuf.size ); 1043 1044 *buflen = outbuf.size; 1045 tidyBufFree( &outbuf ); 1046 MemFree( out ); 1047 return status; 1048} 1049 1050int tidyDocSaveBuffer( TidyDocImpl* doc, TidyBuffer* outbuf ) 1051{ 1052 int status = -EINVAL; 1053 if ( outbuf ) 1054 { 1055 uint outenc = cfg( doc, TidyOutCharEncoding ); 1056 uint nl = cfg( doc, TidyNewline ); 1057 StreamOut* out = TY_(BufferOutput)( outbuf, outenc, nl ); 1058 1059 status = tidyDocSaveStream( doc, out ); 1060 MemFree( out ); 1061 } 1062 return status; 1063} 1064 1065int tidyDocSaveSink( TidyDocImpl* doc, TidyOutputSink* sink ) 1066{ 1067 uint outenc = cfg( doc, TidyOutCharEncoding ); 1068 uint nl = cfg( doc, TidyNewline ); 1069 StreamOut* out = TY_(UserOutput)( sink, outenc, nl ); 1070 int status = tidyDocSaveStream( doc, out ); 1071 MemFree( out ); 1072 return status; 1073} 1074 1075int tidyDocStatus( TidyDocImpl* doc ) 1076{ 1077 if ( doc->errors > 0 ) 1078 return 2; 1079 if ( doc->warnings > 0 || doc->accessErrors > 0 ) 1080 return 1; 1081 return 0; 1082} 1083 1084 1085 1086int TIDY_CALL tidyCleanAndRepair( TidyDoc tdoc ) 1087{ 1088 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 1089 if ( impl ) 1090 return tidyDocCleanAndRepair( impl ); 1091 return -EINVAL; 1092} 1093 1094int TIDY_CALL tidyRunDiagnostics( TidyDoc tdoc ) 1095{ 1096 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 1097 if ( impl ) 1098 return tidyDocRunDiagnostics( impl ); 1099 return -EINVAL; 1100} 1101 1102 1103/* Workhorse functions. 1104** 1105** Parse requires input source, all input config items 1106** and diagnostic sink to have all been set before calling. 1107** 1108** Emit likewise requires that document sink and all 1109** pretty printing options have been set. 1110*/ 1111static ctmbstr integrity = "\nPanic - tree has lost its integrity\n"; 1112 1113int tidyDocParseStream( TidyDocImpl* doc, StreamIn* in ) 1114{ 1115 Bool xmlIn = cfgBool( doc, TidyXmlTags ); 1116 int bomEnc; 1117 1118 assert( doc != NULL && in != NULL ); 1119 assert( doc->docIn == NULL ); 1120 doc->docIn = in; 1121 1122 TY_(TakeConfigSnapshot)( doc ); /* Save config state */ 1123 TY_(FreeLexer)( doc ); 1124 TY_(FreeAnchors)( doc ); 1125 1126 TY_(FreeNode)(doc, &doc->root); 1127 ClearMemory(&doc->root, sizeof(Node)); 1128 1129 if (doc->givenDoctype) 1130 MemFree(doc->givenDoctype); 1131 1132 doc->givenDoctype = NULL; 1133 1134 doc->lexer = TY_(NewLexer)( doc ); 1135 /* doc->lexer->root = &doc->root; */ 1136 doc->root.line = doc->lexer->lines; 1137 doc->root.column = doc->lexer->columns; 1138 doc->inputHadBOM = no; 1139 1140 bomEnc = TY_(ReadBOMEncoding)(in); 1141 1142 if (bomEnc != -1) 1143 { 1144 in->encoding = bomEnc; 1145 TY_(SetOptionInt)(doc, TidyInCharEncoding, bomEnc); 1146 } 1147 1148#ifdef TIDY_WIN32_MLANG_SUPPORT 1149 if (in->encoding > WIN32MLANG) 1150 TY_(Win32MLangInitInputTranscoder)(in, in->encoding); 1151#endif /* TIDY_WIN32_MLANG_SUPPORT */ 1152 1153 /* Tidy doesn't alter the doctype for generic XML docs */ 1154 if ( xmlIn ) 1155 { 1156 TY_(ParseXMLDocument)( doc ); 1157 if ( !TY_(CheckNodeIntegrity)( &doc->root ) ) 1158 FatalError( integrity ); 1159 } 1160 else 1161 { 1162 doc->warnings = 0; 1163 TY_(ParseDocument)( doc ); 1164 if ( !TY_(CheckNodeIntegrity)( &doc->root ) ) 1165 FatalError( integrity ); 1166 } 1167 1168#ifdef TIDY_WIN32_MLANG_SUPPORT 1169 TY_(Win32MLangUninitInputTranscoder)(in); 1170#endif /* TIDY_WIN32_MLANG_SUPPORT */ 1171 1172 doc->docIn = NULL; 1173 return tidyDocStatus( doc ); 1174} 1175 1176int tidyDocRunDiagnostics( TidyDocImpl* doc ) 1177{ 1178 Bool quiet = cfgBool( doc, TidyQuiet ); 1179 Bool force = cfgBool( doc, TidyForceOutput ); 1180 1181 if ( !quiet ) 1182 { 1183 1184 TY_(ReportMarkupVersion)( doc ); 1185 TY_(ReportNumWarnings)( doc ); 1186 } 1187 1188 if ( doc->errors > 0 && !force ) 1189 TY_(NeedsAuthorIntervention)( doc ); 1190 1191 return tidyDocStatus( doc ); 1192} 1193 1194int tidyDocCleanAndRepair( TidyDocImpl* doc ) 1195{ 1196 Bool word2K = cfgBool( doc, TidyWord2000 ); 1197 Bool logical = cfgBool( doc, TidyLogicalEmphasis ); 1198 Bool clean = cfgBool( doc, TidyMakeClean ); 1199 Bool dropFont = cfgBool( doc, TidyDropFontTags ); 1200 Bool htmlOut = cfgBool( doc, TidyHtmlOut ); 1201 Bool xmlOut = cfgBool( doc, TidyXmlOut ); 1202 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut ); 1203 Bool xmlDecl = cfgBool( doc, TidyXmlDecl ); 1204 Bool tidyMark = cfgBool( doc, TidyMark ); 1205 Bool tidyXmlTags = cfgBool( doc, TidyXmlTags ); 1206 Node* node; 1207 1208 if (tidyXmlTags) 1209 return tidyDocStatus( doc ); 1210 1211 /* simplifies <b><b> ... </b> ...</b> etc. */ 1212 TY_(NestedEmphasis)( doc, &doc->root ); 1213 1214 /* cleans up <dir>indented text</dir> etc. */ 1215 TY_(List2BQ)( doc, &doc->root ); 1216 TY_(BQ2Div)( doc, &doc->root ); 1217 1218 /* replaces i by em and b by strong */ 1219 if ( logical ) 1220 TY_(EmFromI)( doc, &doc->root ); 1221 1222 if ( word2K && TY_(IsWord2000)(doc) ) 1223 { 1224 /* prune Word2000's <![if ...]> ... <![endif]> */ 1225 TY_(DropSections)( doc, &doc->root ); 1226 1227 /* drop style & class attributes and empty p, span elements */ 1228 TY_(CleanWord2000)( doc, &doc->root ); 1229 TY_(DropEmptyElements)(doc, &doc->root); 1230 } 1231 1232 /* replaces presentational markup by style rules */ 1233 if ( clean || dropFont ) 1234 TY_(CleanDocument)( doc ); 1235 1236 /* Move terminating <br /> tags from out of paragraphs */ 1237 /*! Do we want to do this for all block-level elements? */ 1238 1239 /* This is disabled due to http://tidy.sf.net/bug/681116 */ 1240#if 0 1241 FixBrakes( doc, TY_(FindBody)( doc )); 1242#endif 1243 1244 /* Reconcile http-equiv meta element with output encoding */ 1245 if (cfg( doc, TidyOutCharEncoding) != RAW 1246#ifndef NO_NATIVE_ISO2022_SUPPORT 1247 && cfg( doc, TidyOutCharEncoding) != ISO2022 1248#endif 1249 ) 1250 TY_(VerifyHTTPEquiv)( doc, TY_(FindHEAD)( doc )); 1251 1252 if ( !TY_(CheckNodeIntegrity)( &doc->root ) ) 1253 FatalError( integrity ); 1254 1255 /* remember given doctype for reporting */ 1256 node = TY_(FindDocType)(doc); 1257 if (node) 1258 { 1259 AttVal* fpi = TY_(GetAttrByName)(node, "PUBLIC"); 1260 if (AttrHasValue(fpi)) 1261 { 1262 if (doc->givenDoctype) 1263 MemFree(doc->givenDoctype); 1264 doc->givenDoctype = TY_(tmbstrdup)(fpi->value); 1265 } 1266 } 1267 1268 if ( doc->root.content ) 1269 { 1270 /* If we had XHTML input but want HTML output */ 1271 if ( htmlOut && doc->lexer->isvoyager ) 1272 { 1273 Node* node = TY_(FindDocType)(doc); 1274 /* Remove reference, but do not free */ 1275 if (node) 1276 TY_(RemoveNode)(node); 1277 } 1278 1279 if (xhtmlOut && !htmlOut) 1280 { 1281 TY_(SetXHTMLDocType)(doc); 1282 TY_(FixAnchors)(doc, &doc->root, yes, yes); 1283 TY_(FixXhtmlNamespace)(doc, yes); 1284 TY_(FixLanguageInformation)(doc, &doc->root, yes, yes); 1285 } 1286 else 1287 { 1288 TY_(FixDocType)(doc); 1289 TY_(FixAnchors)(doc, &doc->root, yes, yes); 1290 TY_(FixXhtmlNamespace)(doc, no); 1291 TY_(FixLanguageInformation)(doc, &doc->root, no, yes); 1292 } 1293 1294 if (tidyMark ) 1295 TY_(AddGenerator)(doc); 1296 } 1297 1298 /* ensure presence of initial <?xml version="1.0"?> */ 1299 if ( xmlOut && xmlDecl ) 1300 TY_(FixXmlDecl)( doc ); 1301 1302 return tidyDocStatus( doc ); 1303} 1304 1305int tidyDocSaveStream( TidyDocImpl* doc, StreamOut* out ) 1306{ 1307 Bool showMarkup = cfgBool( doc, TidyShowMarkup ); 1308 Bool forceOutput = cfgBool( doc, TidyForceOutput ); 1309#if SUPPORT_UTF16_ENCODINGS 1310 Bool outputBOM = ( cfgAutoBool(doc, TidyOutputBOM) == TidyYesState ); 1311 Bool smartBOM = ( cfgAutoBool(doc, TidyOutputBOM) == TidyAutoState ); 1312#endif 1313 Bool xmlOut = cfgBool( doc, TidyXmlOut ); 1314 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut ); 1315 Bool bodyOnly = cfgBool( doc, TidyBodyOnly ); 1316 1317 Bool dropComments = cfgBool(doc, TidyHideComments); 1318 Bool makeClean = cfgBool(doc, TidyMakeClean); 1319 Bool asciiChars = cfgBool(doc, TidyAsciiChars); 1320 Bool makeBare = cfgBool(doc, TidyMakeBare); 1321 Bool escapeCDATA = cfgBool(doc, TidyEscapeCdata); 1322 1323 if (escapeCDATA) 1324 TY_(ConvertCDATANodes)(doc, &doc->root); 1325 1326 if (dropComments) 1327 TY_(DropComments)(doc, &doc->root); 1328 1329 if (makeClean) 1330 { 1331 /* noop */ 1332 TY_(DropFontElements)(doc, &doc->root, NULL); 1333 TY_(WbrToSpace)(doc, &doc->root); 1334 } 1335 1336 if ((makeClean && asciiChars) || makeBare) 1337 TY_(DowngradeTypography)(doc, &doc->root); 1338 1339 if (makeBare) 1340 /* Note: no longer replaces in */ 1341 /* attribute values / non-text tokens */ 1342 TY_(NormalizeSpaces)(doc->lexer, &doc->root); 1343 else 1344 TY_(ReplacePreformattedSpaces)(doc, &doc->root); 1345 1346 if ( showMarkup && (doc->errors == 0 || forceOutput) ) 1347 { 1348#if SUPPORT_UTF16_ENCODINGS 1349 /* Output a Byte Order Mark if required */ 1350 if ( outputBOM || (doc->inputHadBOM && smartBOM) ) 1351 TY_(outBOM)( out ); 1352#endif 1353 1354 /* No longer necessary. No DOCTYPE == HTML 3.2, 1355 ** which gives you only the basic character entities, 1356 ** which are safe in any browser. 1357 ** if ( !TY_(FindDocType)(doc) ) 1358 ** TY_(SetOptionBool)( doc, TidyNumEntities, yes ); 1359 */ 1360 1361 doc->docOut = out; 1362 if ( xmlOut && !xhtmlOut ) 1363 TY_(PPrintXMLTree)( doc, NORMAL, 0, &doc->root ); 1364 else if ( bodyOnly ) 1365 TY_(PrintBody)( doc ); 1366 else 1367 TY_(PPrintTree)( doc, NORMAL, 0, &doc->root ); 1368 1369 TY_(PFlushLine)( doc, 0 ); 1370 doc->docOut = NULL; 1371 } 1372 1373 TY_(ResetConfigToSnapshot)( doc ); 1374 return tidyDocStatus( doc ); 1375} 1376 1377/* Tree traversal functions 1378** 1379** The big issue here is the degree to which we should mimic 1380** a DOM and/or SAX nodes. 1381** 1382** Is it 100% possible (and, if so, how difficult is it) to 1383** emit SAX events from this API? If SAX events are possible, 1384** is that 100% of data needed to build a DOM? 1385*/ 1386 1387TidyNode TIDY_CALL tidyGetRoot( TidyDoc tdoc ) 1388{ 1389 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 1390 return tidyImplToNode( &impl->root ); 1391} 1392 1393TidyNode TIDY_CALL tidyGetHtml( TidyDoc tdoc ) 1394{ 1395 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 1396 Node* node = NULL; 1397 if ( impl ) 1398 node = TY_(FindHTML)( impl ); 1399 return tidyImplToNode( node ); 1400} 1401 1402TidyNode TIDY_CALL tidyGetHead( TidyDoc tdoc ) 1403{ 1404 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 1405 Node* node = NULL; 1406 if ( impl ) 1407 node = TY_(FindHEAD)( impl ); 1408 return tidyImplToNode( node ); 1409} 1410 1411TidyNode TIDY_CALL tidyGetBody( TidyDoc tdoc ) 1412{ 1413 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 1414 Node* node = NULL; 1415 if ( impl ) 1416 node = TY_(FindBody)( impl ); 1417 return tidyImplToNode( node ); 1418} 1419 1420/* parent / child */ 1421TidyNode TIDY_CALL tidyGetParent( TidyNode tnod ) 1422{ 1423 Node* nimp = tidyNodeToImpl( tnod ); 1424 return tidyImplToNode( nimp->parent ); 1425} 1426TidyNode TIDY_CALL tidyGetChild( TidyNode tnod ) 1427{ 1428 Node* nimp = tidyNodeToImpl( tnod ); 1429 return tidyImplToNode( nimp->content ); 1430} 1431 1432/* siblings */ 1433TidyNode TIDY_CALL tidyGetNext( TidyNode tnod ) 1434{ 1435 Node* nimp = tidyNodeToImpl( tnod ); 1436 return tidyImplToNode( nimp->next ); 1437} 1438TidyNode TIDY_CALL tidyGetPrev( TidyNode tnod ) 1439{ 1440 Node* nimp = tidyNodeToImpl( tnod ); 1441 return tidyImplToNode( nimp->prev ); 1442} 1443 1444/* Node info */ 1445TidyNodeType TIDY_CALL tidyNodeGetType( TidyNode tnod ) 1446{ 1447 Node* nimp = tidyNodeToImpl( tnod ); 1448 TidyNodeType ntyp = TidyNode_Root; 1449 if ( nimp ) 1450 ntyp = (TidyNodeType) nimp->type; 1451 return ntyp; 1452} 1453 1454uint TIDY_CALL tidyNodeLine( TidyNode tnod ) 1455{ 1456 Node* nimp = tidyNodeToImpl( tnod ); 1457 uint line = 0; 1458 if ( nimp ) 1459 line = nimp->line; 1460 return line; 1461} 1462uint TIDY_CALL tidyNodeColumn( TidyNode tnod ) 1463{ 1464 Node* nimp = tidyNodeToImpl( tnod ); 1465 uint col = 0; 1466 if ( nimp ) 1467 col = nimp->column; 1468 return col; 1469} 1470 1471ctmbstr TIDY_CALL tidyNodeGetName( TidyNode tnod ) 1472{ 1473 Node* nimp = tidyNodeToImpl( tnod ); 1474 ctmbstr nnam = NULL; 1475 if ( nimp ) 1476 nnam = nimp->element; 1477 return nnam; 1478} 1479 1480 1481Bool TIDY_CALL tidyNodeHasText( TidyDoc tdoc, TidyNode tnod ) 1482{ 1483 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 1484 if ( doc ) 1485 return TY_(nodeHasText)( doc, tidyNodeToImpl(tnod) ); 1486 return no; 1487} 1488 1489 1490Bool TIDY_CALL tidyNodeGetText( TidyDoc tdoc, TidyNode tnod, TidyBuffer* outbuf ) 1491{ 1492 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 1493 Node* nimp = tidyNodeToImpl( tnod ); 1494 if ( doc && nimp && outbuf ) 1495 { 1496 uint outenc = cfg( doc, TidyOutCharEncoding ); 1497 uint nl = cfg( doc, TidyNewline ); 1498 StreamOut* out = TY_(BufferOutput)( outbuf, outenc, nl ); 1499 Bool xmlOut = cfgBool( doc, TidyXmlOut ); 1500 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut ); 1501 1502 doc->docOut = out; 1503 if ( xmlOut && !xhtmlOut ) 1504 TY_(PPrintXMLTree)( doc, NORMAL, 0, nimp ); 1505 else 1506 TY_(PPrintTree)( doc, NORMAL, 0, nimp ); 1507 1508 TY_(PFlushLine)( doc, 0 ); 1509 doc->docOut = NULL; 1510 1511 MemFree( out ); 1512 return yes; 1513 } 1514 return no; 1515} 1516 1517 1518Bool TIDY_CALL tidyNodeIsProp( TidyDoc ARG_UNUSED(tdoc), TidyNode tnod ) 1519{ 1520 Node* nimp = tidyNodeToImpl( tnod ); 1521 Bool isProprietary = yes; 1522 if ( nimp ) 1523 { 1524 switch ( nimp->type ) 1525 { 1526 case RootNode: 1527 case DocTypeTag: 1528 case CommentTag: 1529 case XmlDecl: 1530 case ProcInsTag: 1531 case TextNode: 1532 case CDATATag: 1533 isProprietary = no; 1534 break; 1535 1536 case SectionTag: 1537 case AspTag: 1538 case JsteTag: 1539 case PhpTag: 1540 isProprietary = yes; 1541 break; 1542 1543 case StartTag: 1544 case EndTag: 1545 case StartEndTag: 1546 isProprietary = ( nimp->tag 1547 ? (nimp->tag->versions&VERS_PROPRIETARY)!=0 1548 : yes ); 1549 break; 1550 } 1551 } 1552 return isProprietary; 1553} 1554 1555TidyTagId TIDY_CALL tidyNodeGetId(TidyNode tnod) 1556{ 1557 Node* nimp = tidyNodeToImpl(tnod); 1558 1559 TidyTagId tagId = TidyTag_UNKNOWN; 1560 if (nimp && nimp->tag) 1561 tagId = nimp->tag->id; 1562 1563 return tagId; 1564} 1565 1566 1567/* Null for non-element nodes and all pure HTML 1568cmbstr tidyNodeNsLocal( TidyNode tnod ) 1569{ 1570} 1571cmbstr tidyNodeNsPrefix( TidyNode tnod ) 1572{ 1573} 1574cmbstr tidyNodeNsUri( TidyNode tnod ) 1575{ 1576} 1577*/ 1578 1579/* Iterate over attribute values */ 1580TidyAttr TIDY_CALL tidyAttrFirst( TidyNode tnod ) 1581{ 1582 Node* nimp = tidyNodeToImpl( tnod ); 1583 AttVal* attval = NULL; 1584 if ( nimp ) 1585 attval = nimp->attributes; 1586 return tidyImplToAttr( attval ); 1587} 1588TidyAttr TIDY_CALL tidyAttrNext( TidyAttr tattr ) 1589{ 1590 AttVal* attval = tidyAttrToImpl( tattr ); 1591 AttVal* nxtval = NULL; 1592 if ( attval ) 1593 nxtval = attval->next; 1594 return tidyImplToAttr( nxtval ); 1595} 1596 1597ctmbstr TIDY_CALL tidyAttrName( TidyAttr tattr ) 1598{ 1599 AttVal* attval = tidyAttrToImpl( tattr ); 1600 ctmbstr anam = NULL; 1601 if ( attval ) 1602 anam = attval->attribute; 1603 return anam; 1604} 1605ctmbstr TIDY_CALL tidyAttrValue( TidyAttr tattr ) 1606{ 1607 AttVal* attval = tidyAttrToImpl( tattr ); 1608 ctmbstr aval = NULL; 1609 if ( attval ) 1610 aval = attval->value; 1611 return aval; 1612} 1613 1614/* Null for pure HTML 1615ctmbstr tidyAttrNsLocal( TidyAttr tattr ) 1616{ 1617} 1618ctmbstr tidyAttrNsPrefix( TidyAttr tattr ) 1619{ 1620} 1621ctmbstr tidyAttrNsUri( TidyAttr tattr ) 1622{ 1623} 1624*/ 1625 1626TidyAttrId TIDY_CALL tidyAttrGetId( TidyAttr tattr ) 1627{ 1628 AttVal* attval = tidyAttrToImpl( tattr ); 1629 TidyAttrId attrId = TidyAttr_UNKNOWN; 1630 if ( attval && attval->dict ) 1631 attrId = attval->dict->id; 1632 return attrId; 1633} 1634Bool TIDY_CALL tidyAttrIsProp( TidyAttr tattr ) 1635{ 1636 AttVal* attval = tidyAttrToImpl( tattr ); 1637 Bool isProprietary = yes; 1638 if ( attval ) 1639 isProprietary = ( attval->dict 1640 ? (attval->dict->versions & VERS_PROPRIETARY) != 0 1641 : yes ); 1642 return isProprietary; 1643} 1644 1645/* 1646 * local variables: 1647 * mode: c 1648 * indent-tabs-mode: nil 1649 * c-basic-offset: 4 1650 * eval: (c-set-offset 'substatement-open 0) 1651 * end: 1652 */ 1653