1/////////////////////////////////////////////////////////////////////////////
2// Name:        src/html/htmltag.cpp
3// Purpose:     wxHtmlTag class (represents single tag)
4// Author:      Vaclav Slavik
5// RCS-ID:      $Id: htmltag.cpp 53433 2008-05-03 00:40:29Z VZ $
6// Copyright:   (c) 1999 Vaclav Slavik
7// Licence:     wxWindows licence
8/////////////////////////////////////////////////////////////////////////////
9
10#include "wx/wxprec.h"
11
12#ifdef __BORLANDC__
13    #pragma hdrstop
14#endif
15
16#if wxUSE_HTML
17
18#include "wx/html/htmltag.h"
19
20#ifndef WXPRECOMP
21    #include "wx/colour.h"
22#endif
23
24#include "wx/html/htmlpars.h"
25#include <stdio.h> // for vsscanf
26#include <stdarg.h>
27
28
29//-----------------------------------------------------------------------------
30// wxHtmlTagsCache
31//-----------------------------------------------------------------------------
32
33struct wxHtmlCacheItem
34{
35    // this is "pos" value passed to wxHtmlTag's constructor.
36    // it is position of '<' character of the tag
37    int Key;
38
39    // end positions for the tag:
40    // end1 is '<' of ending tag,
41    // end2 is '>' or both are
42    // -1 if there is no ending tag for this one...
43    // or -2 if this is ending tag  </...>
44    int End1, End2;
45
46    // name of this tag
47    wxChar *Name;
48};
49
50
51IMPLEMENT_CLASS(wxHtmlTagsCache,wxObject)
52
53#define CACHE_INCREMENT  64
54
55bool wxIsCDATAElement(const wxChar *tag)
56{
57    return (wxStrcmp(tag, _T("SCRIPT")) == 0) ||
58           (wxStrcmp(tag, _T("STYLE")) == 0);
59}
60
61wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
62{
63    const wxChar *src = source.c_str();
64    int lng = source.length();
65    wxChar tagBuffer[256];
66
67    m_Cache = NULL;
68    m_CacheSize = 0;
69    m_CachePos = 0;
70
71    int pos = 0;
72    while (pos < lng)
73    {
74        if (src[pos] == wxT('<'))   // tag found:
75        {
76            if (m_CacheSize % CACHE_INCREMENT == 0)
77                m_Cache = (wxHtmlCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(wxHtmlCacheItem));
78            int tg = m_CacheSize++;
79            int stpos = pos++;
80            m_Cache[tg].Key = stpos;
81
82            int i;
83            for ( i = 0;
84                  pos < lng && i < (int)WXSIZEOF(tagBuffer) - 1 &&
85                  src[pos] != wxT('>') && !wxIsspace(src[pos]);
86                  i++, pos++ )
87            {
88                tagBuffer[i] = (wxChar)wxToupper(src[pos]);
89            }
90            tagBuffer[i] = _T('\0');
91
92            m_Cache[tg].Name = new wxChar[i+1];
93            memcpy(m_Cache[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
94
95            while (pos < lng && src[pos] != wxT('>')) pos++;
96
97            if (src[stpos+1] == wxT('/')) // ending tag:
98            {
99                m_Cache[tg].End1 = m_Cache[tg].End2 = -2;
100                // find matching begin tag:
101                for (i = tg; i >= 0; i--)
102                    if ((m_Cache[i].End1 == -1) && (wxStrcmp(m_Cache[i].Name, tagBuffer+1) == 0))
103                    {
104                        m_Cache[i].End1 = stpos;
105                        m_Cache[i].End2 = pos + 1;
106                        break;
107                    }
108            }
109            else
110            {
111                m_Cache[tg].End1 = m_Cache[tg].End2 = -1;
112
113                if (wxIsCDATAElement(tagBuffer))
114                {
115                    // store the orig pos in case we are missing the closing
116                    // tag (see below)
117                    wxInt32 old_pos = pos;
118                    bool foundCloseTag = false;
119
120                    // find next matching tag
121                    int tag_len = wxStrlen(tagBuffer);
122                    while (pos < lng)
123                    {
124                        // find the ending tag
125                        while (pos + 1 < lng &&
126                               (src[pos] != '<' || src[pos+1] != '/'))
127                            ++pos;
128                        if (src[pos] == '<')
129                            ++pos;
130
131                        // see if it matches
132                        int match_pos = 0;
133                        while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') {
134                            // cast to wxChar needed to suppress warning in
135                            // Unicode build
136                            if ((wxChar)wxToupper(src[pos]) == tagBuffer[match_pos]) {
137                                ++match_pos;
138                            }
139                            else if (src[pos] == wxT(' ') || src[pos] == wxT('\n') ||
140                                src[pos] == wxT('\r') || src[pos] == wxT('\t')) {
141                                // need to skip over these
142                            }
143                            else {
144                                match_pos = 0;
145                            }
146                            ++pos;
147                        }
148
149                        // found a match
150                        if (match_pos == tag_len)
151                        {
152                            pos = pos - tag_len - 3;
153                            foundCloseTag = true;
154                            break;
155                        }
156                        else // keep looking for the closing tag
157                        {
158                            ++pos;
159                        }
160                    }
161                    if (!foundCloseTag)
162                    {
163                        // we didn't find closing tag; this means the markup
164                        // is incorrect and the best thing we can do is to
165                        // ignore the unclosed tag and continue parsing as if
166                        // it didn't exist:
167                        pos = old_pos;
168                    }
169                }
170            }
171        }
172
173        pos++;
174    }
175
176    // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
177    for (int i = 0; i < m_CacheSize; i++)
178    {
179        delete[] m_Cache[i].Name;
180        m_Cache[i].Name = NULL;
181    }
182}
183
184void wxHtmlTagsCache::QueryTag(int at, int* end1, int* end2)
185{
186    if (m_Cache == NULL) return;
187    if (m_Cache[m_CachePos].Key != at)
188    {
189        int delta = (at < m_Cache[m_CachePos].Key) ? -1 : 1;
190        do
191        {
192            if ( m_CachePos < 0 || m_CachePos == m_CacheSize )
193            {
194                // something is very wrong with HTML, give up by returning an
195                // impossibly large value which is going to be ignored by the
196                // caller
197                *end1 =
198                *end2 = INT_MAX;
199                return;
200            }
201
202            m_CachePos += delta;
203        }
204        while (m_Cache[m_CachePos].Key != at);
205    }
206    *end1 = m_Cache[m_CachePos].End1;
207    *end2 = m_Cache[m_CachePos].End2;
208}
209
210
211
212
213//-----------------------------------------------------------------------------
214// wxHtmlTag
215//-----------------------------------------------------------------------------
216
217IMPLEMENT_CLASS(wxHtmlTag,wxObject)
218
219wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
220                     const wxString& source, int pos, int end_pos,
221                     wxHtmlTagsCache *cache,
222                     wxHtmlEntitiesParser *entParser) : wxObject()
223{
224    /* Setup DOM relations */
225
226    m_Next = NULL;
227    m_FirstChild = m_LastChild = NULL;
228    m_Parent = parent;
229    if (parent)
230    {
231        m_Prev = m_Parent->m_LastChild;
232        if (m_Prev == NULL)
233            m_Parent->m_FirstChild = this;
234        else
235            m_Prev->m_Next = this;
236        m_Parent->m_LastChild = this;
237    }
238    else
239        m_Prev = NULL;
240
241    /* Find parameters and their values: */
242
243    int i;
244    wxChar c;
245
246    // fill-in name, params and begin pos:
247    i = pos+1;
248
249    // find tag's name and convert it to uppercase:
250    while ((i < end_pos) &&
251           ((c = source[i++]) != wxT(' ') && c != wxT('\r') &&
252             c != wxT('\n') && c != wxT('\t') &&
253             c != wxT('>')))
254    {
255        if ((c >= wxT('a')) && (c <= wxT('z')))
256            c -= (wxT('a') - wxT('A'));
257        m_Name << c;
258    }
259
260    // if the tag has parameters, read them and "normalize" them,
261    // i.e. convert to uppercase, replace whitespaces by spaces and
262    // remove whitespaces around '=':
263    if (source[i-1] != wxT('>'))
264    {
265        #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
266                             c == wxT('\n') || c == wxT('\t'))
267        wxString pname, pvalue;
268        wxChar quote;
269        enum
270        {
271            ST_BEFORE_NAME = 1,
272            ST_NAME,
273            ST_BEFORE_EQ,
274            ST_BEFORE_VALUE,
275            ST_VALUE
276        } state;
277
278        quote = 0;
279        state = ST_BEFORE_NAME;
280        while (i < end_pos)
281        {
282            c = source[i++];
283
284            if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
285            {
286                if (state == ST_BEFORE_EQ || state == ST_NAME)
287                {
288                    m_ParamNames.Add(pname);
289                    m_ParamValues.Add(wxEmptyString);
290                }
291                else if (state == ST_VALUE && quote == 0)
292                {
293                    m_ParamNames.Add(pname);
294                    if (entParser)
295                        m_ParamValues.Add(entParser->Parse(pvalue));
296                    else
297                        m_ParamValues.Add(pvalue);
298                }
299                break;
300            }
301            switch (state)
302            {
303                case ST_BEFORE_NAME:
304                    if (!IS_WHITE(c))
305                    {
306                        pname = c;
307                        state = ST_NAME;
308                    }
309                    break;
310                case ST_NAME:
311                    if (IS_WHITE(c))
312                        state = ST_BEFORE_EQ;
313                    else if (c == wxT('='))
314                        state = ST_BEFORE_VALUE;
315                    else
316                        pname << c;
317                    break;
318                case ST_BEFORE_EQ:
319                    if (c == wxT('='))
320                        state = ST_BEFORE_VALUE;
321                    else if (!IS_WHITE(c))
322                    {
323                        m_ParamNames.Add(pname);
324                        m_ParamValues.Add(wxEmptyString);
325                        pname = c;
326                        state = ST_NAME;
327                    }
328                    break;
329                case ST_BEFORE_VALUE:
330                    if (!IS_WHITE(c))
331                    {
332                        if (c == wxT('"') || c == wxT('\''))
333                            quote = c, pvalue = wxEmptyString;
334                        else
335                            quote = 0, pvalue = c;
336                        state = ST_VALUE;
337                    }
338                    break;
339                case ST_VALUE:
340                    if ((quote != 0 && c == quote) ||
341                        (quote == 0 && IS_WHITE(c)))
342                    {
343                        m_ParamNames.Add(pname);
344                        if (quote == 0)
345                        {
346                            // VS: backward compatibility, no real reason,
347                            //     but wxHTML code relies on this... :(
348                            pvalue.MakeUpper();
349                        }
350                        if (entParser)
351                            m_ParamValues.Add(entParser->Parse(pvalue));
352                        else
353                            m_ParamValues.Add(pvalue);
354                        state = ST_BEFORE_NAME;
355                    }
356                    else
357                        pvalue << c;
358                    break;
359            }
360        }
361
362        #undef IS_WHITE
363    }
364    m_Begin = i;
365
366    cache->QueryTag(pos, &m_End1, &m_End2);
367    if (m_End1 > end_pos) m_End1 = end_pos;
368    if (m_End2 > end_pos) m_End2 = end_pos;
369}
370
371wxHtmlTag::~wxHtmlTag()
372{
373    wxHtmlTag *t1, *t2;
374    t1 = m_FirstChild;
375    while (t1)
376    {
377        t2 = t1->GetNextSibling();
378        delete t1;
379        t1 = t2;
380    }
381}
382
383bool wxHtmlTag::HasParam(const wxString& par) const
384{
385    return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
386}
387
388wxString wxHtmlTag::GetParam(const wxString& par, bool with_commas) const
389{
390    int index = m_ParamNames.Index(par, false);
391    if (index == wxNOT_FOUND)
392        return wxEmptyString;
393    if (with_commas)
394    {
395        // VS: backward compatibility, seems to be never used by wxHTML...
396        wxString s;
397        s << wxT('"') << m_ParamValues[index] << wxT('"');
398        return s;
399    }
400    else
401        return m_ParamValues[index];
402}
403
404int wxHtmlTag::ScanParam(const wxString& par,
405                         const wxChar *format,
406                         void *param) const
407{
408    wxString parval = GetParam(par);
409    return wxSscanf(parval, format, param);
410}
411
412bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
413{
414    wxCHECK_MSG( clr, false, _T("invalid colour argument") );
415
416    wxString str = GetParam(par);
417
418    // handle colours defined in HTML 4.0 first:
419    if (str.length() > 1 && str[0] != _T('#'))
420    {
421        #define HTML_COLOUR(name, r, g, b)              \
422            if (str.IsSameAs(wxT(name), false))         \
423                { clr->Set(r, g, b); return true; }
424        HTML_COLOUR("black",   0x00,0x00,0x00)
425        HTML_COLOUR("silver",  0xC0,0xC0,0xC0)
426        HTML_COLOUR("gray",    0x80,0x80,0x80)
427        HTML_COLOUR("white",   0xFF,0xFF,0xFF)
428        HTML_COLOUR("maroon",  0x80,0x00,0x00)
429        HTML_COLOUR("red",     0xFF,0x00,0x00)
430        HTML_COLOUR("purple",  0x80,0x00,0x80)
431        HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
432        HTML_COLOUR("green",   0x00,0x80,0x00)
433        HTML_COLOUR("lime",    0x00,0xFF,0x00)
434        HTML_COLOUR("olive",   0x80,0x80,0x00)
435        HTML_COLOUR("yellow",  0xFF,0xFF,0x00)
436        HTML_COLOUR("navy",    0x00,0x00,0x80)
437        HTML_COLOUR("blue",    0x00,0x00,0xFF)
438        HTML_COLOUR("teal",    0x00,0x80,0x80)
439        HTML_COLOUR("aqua",    0x00,0xFF,0xFF)
440        #undef HTML_COLOUR
441    }
442
443    // then try to parse #rrggbb representations or set from other well
444    // known names (note that this doesn't strictly conform to HTML spec,
445    // but it doesn't do real harm -- but it *must* be done after the standard
446    // colors are handled above):
447    if (clr->Set(str))
448        return true;
449
450    return false;
451}
452
453bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
454{
455    if ( !HasParam(par) )
456        return false;
457
458    long i;
459    if ( !GetParam(par).ToLong(&i) )
460        return false;
461
462    *clr = (int)i;
463    return true;
464}
465
466wxString wxHtmlTag::GetAllParams() const
467{
468    // VS: this function is for backward compatibility only,
469    //     never used by wxHTML
470    wxString s;
471    size_t cnt = m_ParamNames.GetCount();
472    for (size_t i = 0; i < cnt; i++)
473    {
474        s << m_ParamNames[i];
475        s << wxT('=');
476        if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
477            s << wxT('\'') << m_ParamValues[i] << wxT('\'');
478        else
479            s << wxT('"') << m_ParamValues[i] << wxT('"');
480    }
481    return s;
482}
483
484wxHtmlTag *wxHtmlTag::GetFirstSibling() const
485{
486    if (m_Parent)
487        return m_Parent->m_FirstChild;
488    else
489    {
490        wxHtmlTag *cur = (wxHtmlTag*)this;
491        while (cur->m_Prev)
492            cur = cur->m_Prev;
493        return cur;
494    }
495}
496
497wxHtmlTag *wxHtmlTag::GetLastSibling() const
498{
499    if (m_Parent)
500        return m_Parent->m_LastChild;
501    else
502    {
503        wxHtmlTag *cur = (wxHtmlTag*)this;
504        while (cur->m_Next)
505            cur = cur->m_Next;
506        return cur;
507    }
508}
509
510wxHtmlTag *wxHtmlTag::GetNextTag() const
511{
512    if (m_FirstChild) return m_FirstChild;
513    if (m_Next) return m_Next;
514    wxHtmlTag *cur = m_Parent;
515    if (!cur) return NULL;
516    while (cur->m_Parent && !cur->m_Next)
517        cur = cur->m_Parent;
518    return cur->m_Next;
519}
520
521#endif
522