1///////////////////////////////////////////////////////////////////////////// 2// Name: src/common/tokenzr.cpp 3// Purpose: String tokenizer 4// Author: Guilhem Lavaux 5// Modified by: Vadim Zeitlin (almost full rewrite) 6// Created: 04/22/98 7// RCS-ID: $Id: tokenzr.cpp 39694 2006-06-13 11:30:40Z ABX $ 8// Copyright: (c) Guilhem Lavaux 9// Licence: wxWindows licence 10///////////////////////////////////////////////////////////////////////////// 11 12// ============================================================================ 13// declarations 14// ============================================================================ 15 16// ---------------------------------------------------------------------------- 17// headers 18// ---------------------------------------------------------------------------- 19 20// For compilers that support precompilation, includes "wx.h". 21#include "wx/wxprec.h" 22 23#ifdef __BORLANDC__ 24 #pragma hdrstop 25#endif 26 27#include "wx/tokenzr.h" 28 29#ifndef WX_PRECOMP 30 #include "wx/arrstr.h" 31#endif 32 33// Required for wxIs... functions 34#include <ctype.h> 35 36// ============================================================================ 37// implementation 38// ============================================================================ 39 40// ---------------------------------------------------------------------------- 41// wxStringTokenizer construction 42// ---------------------------------------------------------------------------- 43 44wxStringTokenizer::wxStringTokenizer(const wxString& str, 45 const wxString& delims, 46 wxStringTokenizerMode mode) 47{ 48 SetString(str, delims, mode); 49} 50 51void wxStringTokenizer::SetString(const wxString& str, 52 const wxString& delims, 53 wxStringTokenizerMode mode) 54{ 55 if ( mode == wxTOKEN_DEFAULT ) 56 { 57 // by default, we behave like strtok() if the delimiters are only 58 // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for 59 // whitespace delimiters, strtok() behaviour is better because we want 60 // to count consecutive spaces as one delimiter) 61 const wxChar *p; 62 for ( p = delims.c_str(); *p; p++ ) 63 { 64 if ( !wxIsspace(*p) ) 65 break; 66 } 67 68 if ( *p ) 69 { 70 // not whitespace char in delims 71 mode = wxTOKEN_RET_EMPTY; 72 } 73 else 74 { 75 // only whitespaces 76 mode = wxTOKEN_STRTOK; 77 } 78 } 79 80 m_delims = delims; 81 m_mode = mode; 82 83 Reinit(str); 84} 85 86void wxStringTokenizer::Reinit(const wxString& str) 87{ 88 wxASSERT_MSG( IsOk(), _T("you should call SetString() first") ); 89 90 m_string = str; 91 m_pos = 0; 92 m_lastDelim = _T('\0'); 93} 94 95// ---------------------------------------------------------------------------- 96// access to the tokens 97// ---------------------------------------------------------------------------- 98 99// do we have more of them? 100bool wxStringTokenizer::HasMoreTokens() const 101{ 102 wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") ); 103 104 if ( m_string.find_first_not_of(m_delims, m_pos) != wxString::npos ) 105 { 106 // there are non delimiter characters left, so we do have more tokens 107 return true; 108 } 109 110 switch ( m_mode ) 111 { 112 case wxTOKEN_RET_EMPTY: 113 case wxTOKEN_RET_DELIMS: 114 // special hack for wxTOKEN_RET_EMPTY: we should return the initial 115 // empty token even if there are only delimiters after it 116 return m_pos == 0 && !m_string.empty(); 117 118 case wxTOKEN_RET_EMPTY_ALL: 119 // special hack for wxTOKEN_RET_EMPTY_ALL: we can know if we had 120 // already returned the trailing empty token after the last 121 // delimiter by examining m_lastDelim: it is set to NUL if we run 122 // up to the end of the string in GetNextToken(), but if it is not 123 // NUL yet we still have this last token to return even if m_pos is 124 // already at m_string.length() 125 return m_pos < m_string.length() || m_lastDelim != _T('\0'); 126 127 case wxTOKEN_INVALID: 128 case wxTOKEN_DEFAULT: 129 wxFAIL_MSG( _T("unexpected tokenizer mode") ); 130 // fall through 131 132 case wxTOKEN_STRTOK: 133 // never return empty delimiters 134 break; 135 } 136 137 return false; 138} 139 140// count the number of (remaining) tokens in the string 141size_t wxStringTokenizer::CountTokens() const 142{ 143 wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") ); 144 145 // VZ: this function is IMHO not very useful, so it's probably not very 146 // important if its implementation here is not as efficient as it 147 // could be -- but OTOH like this we're sure to get the correct answer 148 // in all modes 149 wxStringTokenizer tkz(m_string.c_str() + m_pos, m_delims, m_mode); 150 151 size_t count = 0; 152 while ( tkz.HasMoreTokens() ) 153 { 154 count++; 155 156 (void)tkz.GetNextToken(); 157 } 158 159 return count; 160} 161 162// ---------------------------------------------------------------------------- 163// token extraction 164// ---------------------------------------------------------------------------- 165 166wxString wxStringTokenizer::GetNextToken() 167{ 168 wxString token; 169 do 170 { 171 if ( !HasMoreTokens() ) 172 { 173 break; 174 } 175 176 // find the end of this token 177 size_t pos = m_string.find_first_of(m_delims, m_pos); 178 179 // and the start of the next one 180 if ( pos == wxString::npos ) 181 { 182 // no more delimiters, the token is everything till the end of 183 // string 184 token.assign(m_string, m_pos, wxString::npos); 185 186 // skip the token 187 m_pos = m_string.length(); 188 189 // it wasn't terminated 190 m_lastDelim = _T('\0'); 191 } 192 else // we found a delimiter at pos 193 { 194 // in wxTOKEN_RET_DELIMS mode we return the delimiter character 195 // with token, otherwise leave it out 196 size_t len = pos - m_pos; 197 if ( m_mode == wxTOKEN_RET_DELIMS ) 198 len++; 199 200 token.assign(m_string, m_pos, len); 201 202 // skip the token and the trailing delimiter 203 m_pos = pos + 1; 204 205 m_lastDelim = m_string[pos]; 206 } 207 } 208 while ( !AllowEmpty() && token.empty() ); 209 210 return token; 211} 212 213// ---------------------------------------------------------------------------- 214// public functions 215// ---------------------------------------------------------------------------- 216 217wxArrayString wxStringTokenize(const wxString& str, 218 const wxString& delims, 219 wxStringTokenizerMode mode) 220{ 221 wxArrayString tokens; 222 wxStringTokenizer tk(str, delims, mode); 223 while ( tk.HasMoreTokens() ) 224 { 225 tokens.Add(tk.GetNextToken()); 226 } 227 228 return tokens; 229} 230