1// Copyright 2012 The Kyua Authors.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9//   notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright
11//   notice, this list of conditions and the following disclaimer in the
12//   documentation and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors
14//   may be used to endorse or promote products derived from this software
15//   without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29#include "utils/text/operations.ipp"
30
31#include <sstream>
32
33#include "utils/format/macros.hpp"
34#include "utils/sanity.hpp"
35
36namespace text = utils::text;
37
38
39/// Replaces XML special characters from an input string.
40///
41/// The list of XML special characters is specified here:
42///     http://www.w3.org/TR/xml11/#charsets
43///
44/// \param in The input to quote.
45///
46/// \return A quoted string without any XML special characters.
47std::string
48text::escape_xml(const std::string& in)
49{
50    std::ostringstream quoted;
51
52    for (std::string::const_iterator it = in.begin();
53         it != in.end(); ++it) {
54        unsigned char c = (unsigned char)*it;
55        if (c == '"') {
56            quoted << "&quot;";
57        } else if (c == '&') {
58            quoted << "&amp;";
59        } else if (c == '<') {
60            quoted << "&lt;";
61        } else if (c == '>') {
62            quoted << "&gt;";
63        } else if (c == '\'') {
64            quoted << "&apos;";
65        } else if ((c >= 0x01 && c <= 0x08) ||
66                   (c >= 0x0B && c <= 0x0C) ||
67                   (c >= 0x0E && c <= 0x1F) ||
68                   (c >= 0x7F && c <= 0x84) ||
69                   (c >= 0x86 && c <= 0x9F)) {
70            // for RestrictedChar characters, escape them
71            // as '&amp;#[decimal ASCII value];'
72            // so that in the XML file we will see the escaped
73            // character.
74            quoted << "&amp;#" << static_cast< std::string::size_type >(*it)
75                   << ";";
76        } else {
77            quoted << *it;
78        }
79    }
80    return quoted.str();
81}
82
83
84/// Surrounds a string with quotes, escaping the quote itself if needed.
85///
86/// \param text The string to quote.
87/// \param quote The quote character to use.
88///
89/// \return The quoted string.
90std::string
91text::quote(const std::string& text, const char quote)
92{
93    std::ostringstream quoted;
94    quoted << quote;
95
96    std::string::size_type start_pos = 0;
97    std::string::size_type last_pos = text.find(quote);
98    while (last_pos != std::string::npos) {
99        quoted << text.substr(start_pos, last_pos - start_pos) << '\\';
100        start_pos = last_pos;
101        last_pos = text.find(quote, start_pos + 1);
102    }
103    quoted << text.substr(start_pos);
104
105    quoted << quote;
106    return quoted.str();
107}
108
109
110/// Fills a paragraph to the specified length.
111///
112/// This preserves any sequence of spaces in the input and any possible
113/// newlines.  Sequences of spaces may be split in half (and thus one space is
114/// lost), but the rest of the spaces will be preserved as either trailing or
115/// leading spaces.
116///
117/// \param input The string to refill.
118/// \param target_width The width to refill the paragraph to.
119///
120/// \return The refilled paragraph as a sequence of independent lines.
121std::vector< std::string >
122text::refill(const std::string& input, const std::size_t target_width)
123{
124    std::vector< std::string > output;
125
126    std::string::size_type start = 0;
127    while (start < input.length()) {
128        std::string::size_type width;
129        if (start + target_width >= input.length())
130            width = input.length() - start;
131        else {
132            if (input[start + target_width] == ' ') {
133                width = target_width;
134            } else {
135                const std::string::size_type pos = input.find_last_of(
136                    " ", start + target_width - 1);
137                if (pos == std::string::npos || pos < start + 1) {
138                    width = input.find_first_of(" ", start + target_width);
139                    if (width == std::string::npos)
140                        width = input.length() - start;
141                    else
142                        width -= start;
143                } else {
144                    width = pos - start;
145                }
146            }
147        }
148        INV(width != std::string::npos);
149        INV(start + width <= input.length());
150        INV(input[start + width] == ' ' || input[start + width] == '\0');
151        output.push_back(input.substr(start, width));
152
153        start += width + 1;
154    }
155
156    if (input.empty()) {
157        INV(output.empty());
158        output.push_back("");
159    }
160
161    return output;
162}
163
164
165/// Fills a paragraph to the specified length.
166///
167/// See the documentation for refill() for additional details.
168///
169/// \param input The string to refill.
170/// \param target_width The width to refill the paragraph to.
171///
172/// \return The refilled paragraph as a string with embedded newlines.
173std::string
174text::refill_as_string(const std::string& input, const std::size_t target_width)
175{
176    return join(refill(input, target_width), "\n");
177}
178
179
180/// Replaces all occurrences of a substring in a string.
181///
182/// \param input The string in which to perform the replacement.
183/// \param search The pattern to be replaced.
184/// \param replacement The substring to replace search with.
185///
186/// \return A copy of input with the replacements performed.
187std::string
188text::replace_all(const std::string& input, const std::string& search,
189                  const std::string& replacement)
190{
191    std::string output;
192
193    std::string::size_type pos, lastpos = 0;
194    while ((pos = input.find(search, lastpos)) != std::string::npos) {
195        output += input.substr(lastpos, pos - lastpos);
196        output += replacement;
197        lastpos = pos + search.length();
198    }
199    output += input.substr(lastpos);
200
201    return output;
202}
203
204
205/// Splits a string into different components.
206///
207/// \param str The string to split.
208/// \param delimiter The separator to use to split the words.
209///
210/// \return The different words in the input string as split by the provided
211/// delimiter.
212std::vector< std::string >
213text::split(const std::string& str, const char delimiter)
214{
215    std::vector< std::string > words;
216    if (!str.empty()) {
217        std::string::size_type pos = str.find(delimiter);
218        words.push_back(str.substr(0, pos));
219        while (pos != std::string::npos) {
220            ++pos;
221            const std::string::size_type next = str.find(delimiter, pos);
222            words.push_back(str.substr(pos, next - pos));
223            pos = next;
224        }
225    }
226    return words;
227}
228
229
230/// Converts a string to a boolean.
231///
232/// \param str The string to convert.
233///
234/// \return The converted string, if the input string was valid.
235///
236/// \throw std::value_error If the input string does not represent a valid
237///     boolean value.
238template<>
239bool
240text::to_type(const std::string& str)
241{
242    if (str == "true")
243        return true;
244    else if (str == "false")
245        return false;
246    else
247        throw value_error(F("Invalid boolean value '%s'") % str);
248}
249
250
251/// Identity function for to_type, for genericity purposes.
252///
253/// \param str The string to convert.
254///
255/// \return The input string.
256template<>
257std::string
258text::to_type(const std::string& str)
259{
260    return str;
261}
262