///////////////////////////////////////////////////////////////////////////// // Name: src/html/htmlpars.cpp // Purpose: wxHtmlParser class (generic parser) // Author: Vaclav Slavik // RCS-ID: $Id: htmlpars.cpp 66413 2010-12-20 17:40:05Z JS $ // Copyright: (c) 1999 Vaclav Slavik // Licence: wxWindows licence ///////////////////////////////////////////////////////////////////////////// #include "wx/wxprec.h" #ifdef __BORLANDC__ #pragma hdrstop #endif #if wxUSE_HTML && wxUSE_STREAMS #ifndef WXPRECOMP #include "wx/dynarray.h" #include "wx/log.h" #include "wx/intl.h" #include "wx/app.h" #endif #include "wx/tokenzr.h" #include "wx/wfstream.h" #include "wx/url.h" #include "wx/fontmap.h" #include "wx/html/htmldefs.h" #include "wx/html/htmlpars.h" #include "wx/arrimpl.cpp" #ifdef __WXWINCE__ #include "wx/msw/wince/missing.h" // for bsearch() #endif // DLL options compatibility check: WX_CHECK_BUILD_OPTIONS("wxHTML") const wxChar *wxTRACE_HTML_DEBUG = _T("htmldebug"); //----------------------------------------------------------------------------- // wxHtmlParser helpers //----------------------------------------------------------------------------- class wxHtmlTextPiece { public: wxHtmlTextPiece(int pos, int lng) : m_pos(pos), m_lng(lng) {} int m_pos, m_lng; }; WX_DECLARE_OBJARRAY(wxHtmlTextPiece, wxHtmlTextPieces); WX_DEFINE_OBJARRAY(wxHtmlTextPieces) class wxHtmlParserState { public: wxHtmlTag *m_curTag; wxHtmlTag *m_tags; wxHtmlTextPieces *m_textPieces; int m_curTextPiece; wxString m_source; wxHtmlParserState *m_nextState; }; //----------------------------------------------------------------------------- // wxHtmlParser //----------------------------------------------------------------------------- IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser,wxObject) wxHtmlParser::wxHtmlParser() : wxObject(), m_HandlersHash(wxKEY_STRING), m_FS(NULL), m_HandlersStack(NULL) { m_entitiesParser = new wxHtmlEntitiesParser; m_Tags = NULL; m_CurTag = NULL; m_TextPieces = NULL; m_CurTextPiece = 0; m_SavedStates = NULL; } wxHtmlParser::~wxHtmlParser() { while (RestoreState()) {} DestroyDOMTree(); if (m_HandlersStack) { wxList& tmp = *m_HandlersStack; wxList::iterator it, en; for( it = tmp.begin(), en = tmp.end(); it != en; ++it ) delete (wxHashTable*)*it; tmp.clear(); } delete m_HandlersStack; m_HandlersHash.Clear(); WX_CLEAR_LIST(wxList, m_HandlersList); delete m_entitiesParser; } wxObject* wxHtmlParser::Parse(const wxString& source) { InitParser(source); DoParsing(); wxObject *result = GetProduct(); DoneParser(); return result; } void wxHtmlParser::InitParser(const wxString& source) { SetSource(source); m_stopParsing = false; } void wxHtmlParser::DoneParser() { DestroyDOMTree(); } void wxHtmlParser::SetSource(const wxString& src) { DestroyDOMTree(); m_Source = src; CreateDOMTree(); m_CurTag = NULL; m_CurTextPiece = 0; } void wxHtmlParser::CreateDOMTree() { wxHtmlTagsCache cache(m_Source); m_TextPieces = new wxHtmlTextPieces; CreateDOMSubTree(NULL, 0, m_Source.length(), &cache); m_CurTextPiece = 0; } extern bool wxIsCDATAElement(const wxChar *tag); void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, int begin_pos, int end_pos, wxHtmlTagsCache *cache) { if (end_pos <= begin_pos) return; wxChar c; int i = begin_pos; int textBeginning = begin_pos; // If the tag contains CDATA text, we include the text between beginning // and ending tag verbosely. Setting i=end_pos will skip to the very // end of this function where text piece is added, bypassing any child // tags parsing (CDATA element can't have child elements by definition): if (cur != NULL && wxIsCDATAElement(cur->GetName().c_str())) { i = end_pos; } while (i < end_pos) { c = m_Source.GetChar(i); if (c == wxT('<')) { // add text to m_TextPieces: if (i - textBeginning > 0) m_TextPieces->Add( wxHtmlTextPiece(textBeginning, i - textBeginning)); // if it is a comment, skip it: if (i < end_pos-6 && m_Source.GetChar(i+1) == wxT('!') && m_Source.GetChar(i+2) == wxT('-') && m_Source.GetChar(i+3) == wxT('-')) { // Comments begin with "