1/////////////////////////////////////////////////////////////////////////////
2// Name:        src/common/tokenzr.cpp
3// Purpose:     String tokenizer
4// Author:      Guilhem Lavaux
5// Modified by: Vadim Zeitlin (almost full rewrite)
6// Created:     04/22/98
7// RCS-ID:      $Id: tokenzr.cpp 39694 2006-06-13 11:30:40Z ABX $
8// Copyright:   (c) Guilhem Lavaux
9// Licence:     wxWindows licence
10/////////////////////////////////////////////////////////////////////////////
11
12// ============================================================================
13// declarations
14// ============================================================================
15
16// ----------------------------------------------------------------------------
17// headers
18// ----------------------------------------------------------------------------
19
20// For compilers that support precompilation, includes "wx.h".
21#include "wx/wxprec.h"
22
23#ifdef __BORLANDC__
24    #pragma hdrstop
25#endif
26
27#include "wx/tokenzr.h"
28
29#ifndef WX_PRECOMP
30    #include "wx/arrstr.h"
31#endif
32
33// Required for wxIs... functions
34#include <ctype.h>
35
36// ============================================================================
37// implementation
38// ============================================================================
39
40// ----------------------------------------------------------------------------
41// wxStringTokenizer construction
42// ----------------------------------------------------------------------------
43
44wxStringTokenizer::wxStringTokenizer(const wxString& str,
45                                     const wxString& delims,
46                                     wxStringTokenizerMode mode)
47{
48    SetString(str, delims, mode);
49}
50
51void wxStringTokenizer::SetString(const wxString& str,
52                                  const wxString& delims,
53                                  wxStringTokenizerMode mode)
54{
55    if ( mode == wxTOKEN_DEFAULT )
56    {
57        // by default, we behave like strtok() if the delimiters are only
58        // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for
59        // whitespace delimiters, strtok() behaviour is better because we want
60        // to count consecutive spaces as one delimiter)
61        const wxChar *p;
62        for ( p = delims.c_str(); *p; p++ )
63        {
64            if ( !wxIsspace(*p) )
65                break;
66        }
67
68        if ( *p )
69        {
70            // not whitespace char in delims
71            mode = wxTOKEN_RET_EMPTY;
72        }
73        else
74        {
75            // only whitespaces
76            mode = wxTOKEN_STRTOK;
77        }
78    }
79
80    m_delims = delims;
81    m_mode = mode;
82
83    Reinit(str);
84}
85
86void wxStringTokenizer::Reinit(const wxString& str)
87{
88    wxASSERT_MSG( IsOk(), _T("you should call SetString() first") );
89
90    m_string = str;
91    m_pos = 0;
92    m_lastDelim = _T('\0');
93}
94
95// ----------------------------------------------------------------------------
96// access to the tokens
97// ----------------------------------------------------------------------------
98
99// do we have more of them?
100bool wxStringTokenizer::HasMoreTokens() const
101{
102    wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") );
103
104    if ( m_string.find_first_not_of(m_delims, m_pos) != wxString::npos )
105    {
106        // there are non delimiter characters left, so we do have more tokens
107        return true;
108    }
109
110    switch ( m_mode )
111    {
112        case wxTOKEN_RET_EMPTY:
113        case wxTOKEN_RET_DELIMS:
114            // special hack for wxTOKEN_RET_EMPTY: we should return the initial
115            // empty token even if there are only delimiters after it
116            return m_pos == 0 && !m_string.empty();
117
118        case wxTOKEN_RET_EMPTY_ALL:
119            // special hack for wxTOKEN_RET_EMPTY_ALL: we can know if we had
120            // already returned the trailing empty token after the last
121            // delimiter by examining m_lastDelim: it is set to NUL if we run
122            // up to the end of the string in GetNextToken(), but if it is not
123            // NUL yet we still have this last token to return even if m_pos is
124            // already at m_string.length()
125            return m_pos < m_string.length() || m_lastDelim != _T('\0');
126
127        case wxTOKEN_INVALID:
128        case wxTOKEN_DEFAULT:
129            wxFAIL_MSG( _T("unexpected tokenizer mode") );
130            // fall through
131
132        case wxTOKEN_STRTOK:
133            // never return empty delimiters
134            break;
135    }
136
137    return false;
138}
139
140// count the number of (remaining) tokens in the string
141size_t wxStringTokenizer::CountTokens() const
142{
143    wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") );
144
145    // VZ: this function is IMHO not very useful, so it's probably not very
146    //     important if its implementation here is not as efficient as it
147    //     could be -- but OTOH like this we're sure to get the correct answer
148    //     in all modes
149    wxStringTokenizer tkz(m_string.c_str() + m_pos, m_delims, m_mode);
150
151    size_t count = 0;
152    while ( tkz.HasMoreTokens() )
153    {
154        count++;
155
156        (void)tkz.GetNextToken();
157    }
158
159    return count;
160}
161
162// ----------------------------------------------------------------------------
163// token extraction
164// ----------------------------------------------------------------------------
165
166wxString wxStringTokenizer::GetNextToken()
167{
168    wxString token;
169    do
170    {
171        if ( !HasMoreTokens() )
172        {
173            break;
174        }
175
176        // find the end of this token
177        size_t pos = m_string.find_first_of(m_delims, m_pos);
178
179        // and the start of the next one
180        if ( pos == wxString::npos )
181        {
182            // no more delimiters, the token is everything till the end of
183            // string
184            token.assign(m_string, m_pos, wxString::npos);
185
186            // skip the token
187            m_pos = m_string.length();
188
189            // it wasn't terminated
190            m_lastDelim = _T('\0');
191        }
192        else // we found a delimiter at pos
193        {
194            // in wxTOKEN_RET_DELIMS mode we return the delimiter character
195            // with token, otherwise leave it out
196            size_t len = pos - m_pos;
197            if ( m_mode == wxTOKEN_RET_DELIMS )
198                len++;
199
200            token.assign(m_string, m_pos, len);
201
202            // skip the token and the trailing delimiter
203            m_pos = pos + 1;
204
205            m_lastDelim = m_string[pos];
206        }
207    }
208    while ( !AllowEmpty() && token.empty() );
209
210    return token;
211}
212
213// ----------------------------------------------------------------------------
214// public functions
215// ----------------------------------------------------------------------------
216
217wxArrayString wxStringTokenize(const wxString& str,
218                               const wxString& delims,
219                               wxStringTokenizerMode mode)
220{
221    wxArrayString tokens;
222    wxStringTokenizer tk(str, delims, mode);
223    while ( tk.HasMoreTokens() )
224    {
225        tokens.Add(tk.GetNextToken());
226    }
227
228    return tokens;
229}
230