1219820Sjeff// Copyright 2012 The Kyua Authors.
2219820Sjeff// All rights reserved.
3219820Sjeff//
4219820Sjeff// Redistribution and use in source and binary forms, with or without
5219820Sjeff// modification, are permitted provided that the following conditions are
6219820Sjeff// met:
7219820Sjeff//
8219820Sjeff// * Redistributions of source code must retain the above copyright
9219820Sjeff//   notice, this list of conditions and the following disclaimer.
10219820Sjeff// * Redistributions in binary form must reproduce the above copyright
11219820Sjeff//   notice, this list of conditions and the following disclaimer in the
12219820Sjeff//   documentation and/or other materials provided with the distribution.
13219820Sjeff// * Neither the name of Google Inc. nor the names of its contributors
14219820Sjeff//   may be used to endorse or promote products derived from this software
15219820Sjeff//   without specific prior written permission.
16219820Sjeff//
17219820Sjeff// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18219820Sjeff// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19219820Sjeff// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20219820Sjeff// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21219820Sjeff// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22219820Sjeff// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23219820Sjeff// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24219820Sjeff// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25219820Sjeff// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26219820Sjeff// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27219820Sjeff// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28219820Sjeff
29219820Sjeff#include "utils/text/operations.ipp"
30219820Sjeff
31219820Sjeff#include <sstream>
32219820Sjeff
33219820Sjeff#include "utils/format/macros.hpp"
34219820Sjeff#include "utils/sanity.hpp"
35219820Sjeff
36219820Sjeffnamespace text = utils::text;
37219820Sjeff
38219820Sjeff
39219820Sjeff/// Replaces XML special characters from an input string.
40219820Sjeff///
41219820Sjeff/// The list of XML special characters is specified here:
42219820Sjeff///     http://www.w3.org/TR/xml11/#charsets
43219820Sjeff///
44219820Sjeff/// \param in The input to quote.
45219820Sjeff///
46219820Sjeff/// \return A quoted string without any XML special characters.
47219820Sjeffstd::string
48219820Sjefftext::escape_xml(const std::string& in)
49219820Sjeff{
50219820Sjeff    std::ostringstream quoted;
51219820Sjeff
52219820Sjeff    for (std::string::const_iterator it = in.begin();
53219820Sjeff         it != in.end(); ++it) {
54219820Sjeff        unsigned char c = (unsigned char)*it;
55219820Sjeff        if (c == '"') {
56219820Sjeff            quoted << "&quot;";
57219820Sjeff        } else if (c == '&') {
58219820Sjeff            quoted << "&amp;";
59219820Sjeff        } else if (c == '<') {
60219820Sjeff            quoted << "&lt;";
61219820Sjeff        } else if (c == '>') {
62219820Sjeff            quoted << "&gt;";
63219820Sjeff        } else if (c == '\'') {
64219820Sjeff            quoted << "&apos;";
65219820Sjeff        } else if ((c >= 0x01 && c <= 0x08) ||
66219820Sjeff                   (c >= 0x0B && c <= 0x0C) ||
67219820Sjeff                   (c >= 0x0E && c <= 0x1F) ||
68219820Sjeff                   (c >= 0x7F && c <= 0x84) ||
69219820Sjeff                   (c >= 0x86 && c <= 0x9F)) {
70219820Sjeff            // for RestrictedChar characters, escape them
71219820Sjeff            // as '&amp;#[decimal ASCII value];'
72219820Sjeff            // so that in the XML file we will see the escaped
73219820Sjeff            // character.
74219820Sjeff            quoted << "&amp;#" << static_cast< std::string::size_type >(*it)
75219820Sjeff                   << ";";
76219820Sjeff        } else {
77219820Sjeff            quoted << *it;
78219820Sjeff        }
79219820Sjeff    }
80219820Sjeff    return quoted.str();
81219820Sjeff}
82219820Sjeff
83219820Sjeff
84219820Sjeff/// Surrounds a string with quotes, escaping the quote itself if needed.
85219820Sjeff///
86219820Sjeff/// \param text The string to quote.
87219820Sjeff/// \param quote The quote character to use.
88219820Sjeff///
89219820Sjeff/// \return The quoted string.
90219820Sjeffstd::string
91219820Sjefftext::quote(const std::string& text, const char quote)
92219820Sjeff{
93219820Sjeff    std::ostringstream quoted;
94219820Sjeff    quoted << quote;
95219820Sjeff
96219820Sjeff    std::string::size_type start_pos = 0;
97219820Sjeff    std::string::size_type last_pos = text.find(quote);
98219820Sjeff    while (last_pos != std::string::npos) {
99219820Sjeff        quoted << text.substr(start_pos, last_pos - start_pos) << '\\';
100219820Sjeff        start_pos = last_pos;
101219820Sjeff        last_pos = text.find(quote, start_pos + 1);
102219820Sjeff    }
103219820Sjeff    quoted << text.substr(start_pos);
104219820Sjeff
105219820Sjeff    quoted << quote;
106219820Sjeff    return quoted.str();
107219820Sjeff}
108219820Sjeff
109219820Sjeff
110219820Sjeff/// Fills a paragraph to the specified length.
111219820Sjeff///
112219820Sjeff/// This preserves any sequence of spaces in the input and any possible
113219820Sjeff/// newlines.  Sequences of spaces may be split in half (and thus one space is
114219820Sjeff/// lost), but the rest of the spaces will be preserved as either trailing or
115219820Sjeff/// leading spaces.
116219820Sjeff///
117219820Sjeff/// \param input The string to refill.
118219820Sjeff/// \param target_width The width to refill the paragraph to.
119219820Sjeff///
120219820Sjeff/// \return The refilled paragraph as a sequence of independent lines.
121219820Sjeffstd::vector< std::string >
122219820Sjefftext::refill(const std::string& input, const std::size_t target_width)
123219820Sjeff{
124219820Sjeff    std::vector< std::string > output;
125219820Sjeff
126219820Sjeff    std::string::size_type start = 0;
127219820Sjeff    while (start < input.length()) {
128219820Sjeff        std::string::size_type width;
129219820Sjeff        if (start + target_width >= input.length())
130219820Sjeff            width = input.length() - start;
131219820Sjeff        else {
132219820Sjeff            if (input[start + target_width] == ' ') {
133219820Sjeff                width = target_width;
134219820Sjeff            } else {
135219820Sjeff                const std::string::size_type pos = input.find_last_of(
136219820Sjeff                    " ", start + target_width - 1);
137219820Sjeff                if (pos == std::string::npos || pos < start + 1) {
138219820Sjeff                    width = input.find_first_of(" ", start + target_width);
139219820Sjeff                    if (width == std::string::npos)
140219820Sjeff                        width = input.length() - start;
141219820Sjeff                    else
142219820Sjeff                        width -= start;
143219820Sjeff                } else {
144219820Sjeff                    width = pos - start;
145219820Sjeff                }
146219820Sjeff            }
147219820Sjeff        }
148219820Sjeff        INV(width != std::string::npos);
149219820Sjeff        INV(start + width <= input.length());
150219820Sjeff        INV(input[start + width] == ' ' || input[start + width] == '\0');
151219820Sjeff        output.push_back(input.substr(start, width));
152219820Sjeff
153219820Sjeff        start += width + 1;
154219820Sjeff    }
155219820Sjeff
156219820Sjeff    if (input.empty()) {
157219820Sjeff        INV(output.empty());
158219820Sjeff        output.push_back("");
159219820Sjeff    }
160219820Sjeff
161219820Sjeff    return output;
162219820Sjeff}
163219820Sjeff
164219820Sjeff
165219820Sjeff/// Fills a paragraph to the specified length.
166219820Sjeff///
167219820Sjeff/// See the documentation for refill() for additional details.
168219820Sjeff///
169219820Sjeff/// \param input The string to refill.
170219820Sjeff/// \param target_width The width to refill the paragraph to.
171219820Sjeff///
172219820Sjeff/// \return The refilled paragraph as a string with embedded newlines.
173219820Sjeffstd::string
174219820Sjefftext::refill_as_string(const std::string& input, const std::size_t target_width)
175219820Sjeff{
176219820Sjeff    return join(refill(input, target_width), "\n");
177219820Sjeff}
178219820Sjeff
179219820Sjeff
180219820Sjeff/// Replaces all occurrences of a substring in a string.
181219820Sjeff///
182219820Sjeff/// \param input The string in which to perform the replacement.
183219820Sjeff/// \param search The pattern to be replaced.
184219820Sjeff/// \param replacement The substring to replace search with.
185219820Sjeff///
186219820Sjeff/// \return A copy of input with the replacements performed.
187219820Sjeffstd::string
188219820Sjefftext::replace_all(const std::string& input, const std::string& search,
189219820Sjeff                  const std::string& replacement)
190219820Sjeff{
191219820Sjeff    std::string output;
192219820Sjeff
193219820Sjeff    std::string::size_type pos, lastpos = 0;
194219820Sjeff    while ((pos = input.find(search, lastpos)) != std::string::npos) {
195219820Sjeff        output += input.substr(lastpos, pos - lastpos);
196219820Sjeff        output += replacement;
197219820Sjeff        lastpos = pos + search.length();
198219820Sjeff    }
199219820Sjeff    output += input.substr(lastpos);
200219820Sjeff
201219820Sjeff    return output;
202219820Sjeff}
203219820Sjeff
204219820Sjeff
205219820Sjeff/// Splits a string into different components.
206219820Sjeff///
207219820Sjeff/// \param str The string to split.
208219820Sjeff/// \param delimiter The separator to use to split the words.
209219820Sjeff///
210219820Sjeff/// \return The different words in the input string as split by the provided
211219820Sjeff/// delimiter.
212219820Sjeffstd::vector< std::string >
213219820Sjefftext::split(const std::string& str, const char delimiter)
214219820Sjeff{
215219820Sjeff    std::vector< std::string > words;
216219820Sjeff    if (!str.empty()) {
217219820Sjeff        std::string::size_type pos = str.find(delimiter);
218219820Sjeff        words.push_back(str.substr(0, pos));
219219820Sjeff        while (pos != std::string::npos) {
220219820Sjeff            ++pos;
221219820Sjeff            const std::string::size_type next = str.find(delimiter, pos);
222219820Sjeff            words.push_back(str.substr(pos, next - pos));
223219820Sjeff            pos = next;
224219820Sjeff        }
225219820Sjeff    }
226219820Sjeff    return words;
227219820Sjeff}
228219820Sjeff
229219820Sjeff
230219820Sjeff/// Converts a string to a boolean.
231219820Sjeff///
232219820Sjeff/// \param str The string to convert.
233219820Sjeff///
234219820Sjeff/// \return The converted string, if the input string was valid.
235219820Sjeff///
236219820Sjeff/// \throw std::value_error If the input string does not represent a valid
237219820Sjeff///     boolean value.
238219820Sjefftemplate<>
239219820Sjeffbool
240219820Sjefftext::to_type(const std::string& str)
241219820Sjeff{
242219820Sjeff    if (str == "true")
243219820Sjeff        return true;
244219820Sjeff    else if (str == "false")
245219820Sjeff        return false;
246219820Sjeff    else
247        throw value_error(F("Invalid boolean value '%s'") % str);
248}
249
250
251/// Identity function for to_type, for genericity purposes.
252///
253/// \param str The string to convert.
254///
255/// \return The input string.
256template<>
257std::string
258text::to_type(const std::string& str)
259{
260    return str;
261}
262