1///////////////////////////////////////////////////////////////////////////// 2// Name: src/html/htmltag.cpp 3// Purpose: wxHtmlTag class (represents single tag) 4// Author: Vaclav Slavik 5// RCS-ID: $Id: htmltag.cpp 53433 2008-05-03 00:40:29Z VZ $ 6// Copyright: (c) 1999 Vaclav Slavik 7// Licence: wxWindows licence 8///////////////////////////////////////////////////////////////////////////// 9 10#include "wx/wxprec.h" 11 12#ifdef __BORLANDC__ 13 #pragma hdrstop 14#endif 15 16#if wxUSE_HTML 17 18#include "wx/html/htmltag.h" 19 20#ifndef WXPRECOMP 21 #include "wx/colour.h" 22#endif 23 24#include "wx/html/htmlpars.h" 25#include <stdio.h> // for vsscanf 26#include <stdarg.h> 27 28 29//----------------------------------------------------------------------------- 30// wxHtmlTagsCache 31//----------------------------------------------------------------------------- 32 33struct wxHtmlCacheItem 34{ 35 // this is "pos" value passed to wxHtmlTag's constructor. 36 // it is position of '<' character of the tag 37 int Key; 38 39 // end positions for the tag: 40 // end1 is '<' of ending tag, 41 // end2 is '>' or both are 42 // -1 if there is no ending tag for this one... 43 // or -2 if this is ending tag </...> 44 int End1, End2; 45 46 // name of this tag 47 wxChar *Name; 48}; 49 50 51IMPLEMENT_CLASS(wxHtmlTagsCache,wxObject) 52 53#define CACHE_INCREMENT 64 54 55bool wxIsCDATAElement(const wxChar *tag) 56{ 57 return (wxStrcmp(tag, _T("SCRIPT")) == 0) || 58 (wxStrcmp(tag, _T("STYLE")) == 0); 59} 60 61wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source) 62{ 63 const wxChar *src = source.c_str(); 64 int lng = source.length(); 65 wxChar tagBuffer[256]; 66 67 m_Cache = NULL; 68 m_CacheSize = 0; 69 m_CachePos = 0; 70 71 int pos = 0; 72 while (pos < lng) 73 { 74 if (src[pos] == wxT('<')) // tag found: 75 { 76 if (m_CacheSize % CACHE_INCREMENT == 0) 77 m_Cache = (wxHtmlCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(wxHtmlCacheItem)); 78 int tg = m_CacheSize++; 79 int stpos = pos++; 80 m_Cache[tg].Key = stpos; 81 82 int i; 83 for ( i = 0; 84 pos < lng && i < (int)WXSIZEOF(tagBuffer) - 1 && 85 src[pos] != wxT('>') && !wxIsspace(src[pos]); 86 i++, pos++ ) 87 { 88 tagBuffer[i] = (wxChar)wxToupper(src[pos]); 89 } 90 tagBuffer[i] = _T('\0'); 91 92 m_Cache[tg].Name = new wxChar[i+1]; 93 memcpy(m_Cache[tg].Name, tagBuffer, (i+1)*sizeof(wxChar)); 94 95 while (pos < lng && src[pos] != wxT('>')) pos++; 96 97 if (src[stpos+1] == wxT('/')) // ending tag: 98 { 99 m_Cache[tg].End1 = m_Cache[tg].End2 = -2; 100 // find matching begin tag: 101 for (i = tg; i >= 0; i--) 102 if ((m_Cache[i].End1 == -1) && (wxStrcmp(m_Cache[i].Name, tagBuffer+1) == 0)) 103 { 104 m_Cache[i].End1 = stpos; 105 m_Cache[i].End2 = pos + 1; 106 break; 107 } 108 } 109 else 110 { 111 m_Cache[tg].End1 = m_Cache[tg].End2 = -1; 112 113 if (wxIsCDATAElement(tagBuffer)) 114 { 115 // store the orig pos in case we are missing the closing 116 // tag (see below) 117 wxInt32 old_pos = pos; 118 bool foundCloseTag = false; 119 120 // find next matching tag 121 int tag_len = wxStrlen(tagBuffer); 122 while (pos < lng) 123 { 124 // find the ending tag 125 while (pos + 1 < lng && 126 (src[pos] != '<' || src[pos+1] != '/')) 127 ++pos; 128 if (src[pos] == '<') 129 ++pos; 130 131 // see if it matches 132 int match_pos = 0; 133 while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') { 134 // cast to wxChar needed to suppress warning in 135 // Unicode build 136 if ((wxChar)wxToupper(src[pos]) == tagBuffer[match_pos]) { 137 ++match_pos; 138 } 139 else if (src[pos] == wxT(' ') || src[pos] == wxT('\n') || 140 src[pos] == wxT('\r') || src[pos] == wxT('\t')) { 141 // need to skip over these 142 } 143 else { 144 match_pos = 0; 145 } 146 ++pos; 147 } 148 149 // found a match 150 if (match_pos == tag_len) 151 { 152 pos = pos - tag_len - 3; 153 foundCloseTag = true; 154 break; 155 } 156 else // keep looking for the closing tag 157 { 158 ++pos; 159 } 160 } 161 if (!foundCloseTag) 162 { 163 // we didn't find closing tag; this means the markup 164 // is incorrect and the best thing we can do is to 165 // ignore the unclosed tag and continue parsing as if 166 // it didn't exist: 167 pos = old_pos; 168 } 169 } 170 } 171 } 172 173 pos++; 174 } 175 176 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore: 177 for (int i = 0; i < m_CacheSize; i++) 178 { 179 delete[] m_Cache[i].Name; 180 m_Cache[i].Name = NULL; 181 } 182} 183 184void wxHtmlTagsCache::QueryTag(int at, int* end1, int* end2) 185{ 186 if (m_Cache == NULL) return; 187 if (m_Cache[m_CachePos].Key != at) 188 { 189 int delta = (at < m_Cache[m_CachePos].Key) ? -1 : 1; 190 do 191 { 192 if ( m_CachePos < 0 || m_CachePos == m_CacheSize ) 193 { 194 // something is very wrong with HTML, give up by returning an 195 // impossibly large value which is going to be ignored by the 196 // caller 197 *end1 = 198 *end2 = INT_MAX; 199 return; 200 } 201 202 m_CachePos += delta; 203 } 204 while (m_Cache[m_CachePos].Key != at); 205 } 206 *end1 = m_Cache[m_CachePos].End1; 207 *end2 = m_Cache[m_CachePos].End2; 208} 209 210 211 212 213//----------------------------------------------------------------------------- 214// wxHtmlTag 215//----------------------------------------------------------------------------- 216 217IMPLEMENT_CLASS(wxHtmlTag,wxObject) 218 219wxHtmlTag::wxHtmlTag(wxHtmlTag *parent, 220 const wxString& source, int pos, int end_pos, 221 wxHtmlTagsCache *cache, 222 wxHtmlEntitiesParser *entParser) : wxObject() 223{ 224 /* Setup DOM relations */ 225 226 m_Next = NULL; 227 m_FirstChild = m_LastChild = NULL; 228 m_Parent = parent; 229 if (parent) 230 { 231 m_Prev = m_Parent->m_LastChild; 232 if (m_Prev == NULL) 233 m_Parent->m_FirstChild = this; 234 else 235 m_Prev->m_Next = this; 236 m_Parent->m_LastChild = this; 237 } 238 else 239 m_Prev = NULL; 240 241 /* Find parameters and their values: */ 242 243 int i; 244 wxChar c; 245 246 // fill-in name, params and begin pos: 247 i = pos+1; 248 249 // find tag's name and convert it to uppercase: 250 while ((i < end_pos) && 251 ((c = source[i++]) != wxT(' ') && c != wxT('\r') && 252 c != wxT('\n') && c != wxT('\t') && 253 c != wxT('>'))) 254 { 255 if ((c >= wxT('a')) && (c <= wxT('z'))) 256 c -= (wxT('a') - wxT('A')); 257 m_Name << c; 258 } 259 260 // if the tag has parameters, read them and "normalize" them, 261 // i.e. convert to uppercase, replace whitespaces by spaces and 262 // remove whitespaces around '=': 263 if (source[i-1] != wxT('>')) 264 { 265 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \ 266 c == wxT('\n') || c == wxT('\t')) 267 wxString pname, pvalue; 268 wxChar quote; 269 enum 270 { 271 ST_BEFORE_NAME = 1, 272 ST_NAME, 273 ST_BEFORE_EQ, 274 ST_BEFORE_VALUE, 275 ST_VALUE 276 } state; 277 278 quote = 0; 279 state = ST_BEFORE_NAME; 280 while (i < end_pos) 281 { 282 c = source[i++]; 283 284 if (c == wxT('>') && !(state == ST_VALUE && quote != 0)) 285 { 286 if (state == ST_BEFORE_EQ || state == ST_NAME) 287 { 288 m_ParamNames.Add(pname); 289 m_ParamValues.Add(wxEmptyString); 290 } 291 else if (state == ST_VALUE && quote == 0) 292 { 293 m_ParamNames.Add(pname); 294 if (entParser) 295 m_ParamValues.Add(entParser->Parse(pvalue)); 296 else 297 m_ParamValues.Add(pvalue); 298 } 299 break; 300 } 301 switch (state) 302 { 303 case ST_BEFORE_NAME: 304 if (!IS_WHITE(c)) 305 { 306 pname = c; 307 state = ST_NAME; 308 } 309 break; 310 case ST_NAME: 311 if (IS_WHITE(c)) 312 state = ST_BEFORE_EQ; 313 else if (c == wxT('=')) 314 state = ST_BEFORE_VALUE; 315 else 316 pname << c; 317 break; 318 case ST_BEFORE_EQ: 319 if (c == wxT('=')) 320 state = ST_BEFORE_VALUE; 321 else if (!IS_WHITE(c)) 322 { 323 m_ParamNames.Add(pname); 324 m_ParamValues.Add(wxEmptyString); 325 pname = c; 326 state = ST_NAME; 327 } 328 break; 329 case ST_BEFORE_VALUE: 330 if (!IS_WHITE(c)) 331 { 332 if (c == wxT('"') || c == wxT('\'')) 333 quote = c, pvalue = wxEmptyString; 334 else 335 quote = 0, pvalue = c; 336 state = ST_VALUE; 337 } 338 break; 339 case ST_VALUE: 340 if ((quote != 0 && c == quote) || 341 (quote == 0 && IS_WHITE(c))) 342 { 343 m_ParamNames.Add(pname); 344 if (quote == 0) 345 { 346 // VS: backward compatibility, no real reason, 347 // but wxHTML code relies on this... :( 348 pvalue.MakeUpper(); 349 } 350 if (entParser) 351 m_ParamValues.Add(entParser->Parse(pvalue)); 352 else 353 m_ParamValues.Add(pvalue); 354 state = ST_BEFORE_NAME; 355 } 356 else 357 pvalue << c; 358 break; 359 } 360 } 361 362 #undef IS_WHITE 363 } 364 m_Begin = i; 365 366 cache->QueryTag(pos, &m_End1, &m_End2); 367 if (m_End1 > end_pos) m_End1 = end_pos; 368 if (m_End2 > end_pos) m_End2 = end_pos; 369} 370 371wxHtmlTag::~wxHtmlTag() 372{ 373 wxHtmlTag *t1, *t2; 374 t1 = m_FirstChild; 375 while (t1) 376 { 377 t2 = t1->GetNextSibling(); 378 delete t1; 379 t1 = t2; 380 } 381} 382 383bool wxHtmlTag::HasParam(const wxString& par) const 384{ 385 return (m_ParamNames.Index(par, false) != wxNOT_FOUND); 386} 387 388wxString wxHtmlTag::GetParam(const wxString& par, bool with_commas) const 389{ 390 int index = m_ParamNames.Index(par, false); 391 if (index == wxNOT_FOUND) 392 return wxEmptyString; 393 if (with_commas) 394 { 395 // VS: backward compatibility, seems to be never used by wxHTML... 396 wxString s; 397 s << wxT('"') << m_ParamValues[index] << wxT('"'); 398 return s; 399 } 400 else 401 return m_ParamValues[index]; 402} 403 404int wxHtmlTag::ScanParam(const wxString& par, 405 const wxChar *format, 406 void *param) const 407{ 408 wxString parval = GetParam(par); 409 return wxSscanf(parval, format, param); 410} 411 412bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const 413{ 414 wxCHECK_MSG( clr, false, _T("invalid colour argument") ); 415 416 wxString str = GetParam(par); 417 418 // handle colours defined in HTML 4.0 first: 419 if (str.length() > 1 && str[0] != _T('#')) 420 { 421 #define HTML_COLOUR(name, r, g, b) \ 422 if (str.IsSameAs(wxT(name), false)) \ 423 { clr->Set(r, g, b); return true; } 424 HTML_COLOUR("black", 0x00,0x00,0x00) 425 HTML_COLOUR("silver", 0xC0,0xC0,0xC0) 426 HTML_COLOUR("gray", 0x80,0x80,0x80) 427 HTML_COLOUR("white", 0xFF,0xFF,0xFF) 428 HTML_COLOUR("maroon", 0x80,0x00,0x00) 429 HTML_COLOUR("red", 0xFF,0x00,0x00) 430 HTML_COLOUR("purple", 0x80,0x00,0x80) 431 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF) 432 HTML_COLOUR("green", 0x00,0x80,0x00) 433 HTML_COLOUR("lime", 0x00,0xFF,0x00) 434 HTML_COLOUR("olive", 0x80,0x80,0x00) 435 HTML_COLOUR("yellow", 0xFF,0xFF,0x00) 436 HTML_COLOUR("navy", 0x00,0x00,0x80) 437 HTML_COLOUR("blue", 0x00,0x00,0xFF) 438 HTML_COLOUR("teal", 0x00,0x80,0x80) 439 HTML_COLOUR("aqua", 0x00,0xFF,0xFF) 440 #undef HTML_COLOUR 441 } 442 443 // then try to parse #rrggbb representations or set from other well 444 // known names (note that this doesn't strictly conform to HTML spec, 445 // but it doesn't do real harm -- but it *must* be done after the standard 446 // colors are handled above): 447 if (clr->Set(str)) 448 return true; 449 450 return false; 451} 452 453bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const 454{ 455 if ( !HasParam(par) ) 456 return false; 457 458 long i; 459 if ( !GetParam(par).ToLong(&i) ) 460 return false; 461 462 *clr = (int)i; 463 return true; 464} 465 466wxString wxHtmlTag::GetAllParams() const 467{ 468 // VS: this function is for backward compatibility only, 469 // never used by wxHTML 470 wxString s; 471 size_t cnt = m_ParamNames.GetCount(); 472 for (size_t i = 0; i < cnt; i++) 473 { 474 s << m_ParamNames[i]; 475 s << wxT('='); 476 if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND) 477 s << wxT('\'') << m_ParamValues[i] << wxT('\''); 478 else 479 s << wxT('"') << m_ParamValues[i] << wxT('"'); 480 } 481 return s; 482} 483 484wxHtmlTag *wxHtmlTag::GetFirstSibling() const 485{ 486 if (m_Parent) 487 return m_Parent->m_FirstChild; 488 else 489 { 490 wxHtmlTag *cur = (wxHtmlTag*)this; 491 while (cur->m_Prev) 492 cur = cur->m_Prev; 493 return cur; 494 } 495} 496 497wxHtmlTag *wxHtmlTag::GetLastSibling() const 498{ 499 if (m_Parent) 500 return m_Parent->m_LastChild; 501 else 502 { 503 wxHtmlTag *cur = (wxHtmlTag*)this; 504 while (cur->m_Next) 505 cur = cur->m_Next; 506 return cur; 507 } 508} 509 510wxHtmlTag *wxHtmlTag::GetNextTag() const 511{ 512 if (m_FirstChild) return m_FirstChild; 513 if (m_Next) return m_Next; 514 wxHtmlTag *cur = m_Parent; 515 if (!cur) return NULL; 516 while (cur->m_Parent && !cur->m_Next) 517 cur = cur->m_Parent; 518 return cur->m_Next; 519} 520 521#endif 522