1// Copyright 2012 The Kyua Authors. 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: 7// 8// * Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// * Redistributions in binary form must reproduce the above copyright 11// notice, this list of conditions and the following disclaimer in the 12// documentation and/or other materials provided with the distribution. 13// * Neither the name of Google Inc. nor the names of its contributors 14// may be used to endorse or promote products derived from this software 15// without specific prior written permission. 16// 17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29#include "utils/text/operations.ipp" 30 31#include <sstream> 32 33#include "utils/format/macros.hpp" 34#include "utils/sanity.hpp" 35 36namespace text = utils::text; 37 38 39/// Replaces XML special characters from an input string. 40/// 41/// The list of XML special characters is specified here: 42/// http://www.w3.org/TR/xml11/#charsets 43/// 44/// \param in The input to quote. 45/// 46/// \return A quoted string without any XML special characters. 47std::string 48text::escape_xml(const std::string& in) 49{ 50 std::ostringstream quoted; 51 52 for (std::string::const_iterator it = in.begin(); 53 it != in.end(); ++it) { 54 unsigned char c = (unsigned char)*it; 55 if (c == '"') { 56 quoted << """; 57 } else if (c == '&') { 58 quoted << "&"; 59 } else if (c == '<') { 60 quoted << "<"; 61 } else if (c == '>') { 62 quoted << ">"; 63 } else if (c == '\'') { 64 quoted << "'"; 65 } else if ((c >= 0x01 && c <= 0x08) || 66 (c >= 0x0B && c <= 0x0C) || 67 (c >= 0x0E && c <= 0x1F) || 68 (c >= 0x7F && c <= 0x84) || 69 (c >= 0x86 && c <= 0x9F)) { 70 // for RestrictedChar characters, escape them 71 // as '&#[decimal ASCII value];' 72 // so that in the XML file we will see the escaped 73 // character. 74 quoted << "&#" << static_cast< std::string::size_type >(*it) 75 << ";"; 76 } else { 77 quoted << *it; 78 } 79 } 80 return quoted.str(); 81} 82 83 84/// Surrounds a string with quotes, escaping the quote itself if needed. 85/// 86/// \param text The string to quote. 87/// \param quote The quote character to use. 88/// 89/// \return The quoted string. 90std::string 91text::quote(const std::string& text, const char quote) 92{ 93 std::ostringstream quoted; 94 quoted << quote; 95 96 std::string::size_type start_pos = 0; 97 std::string::size_type last_pos = text.find(quote); 98 while (last_pos != std::string::npos) { 99 quoted << text.substr(start_pos, last_pos - start_pos) << '\\'; 100 start_pos = last_pos; 101 last_pos = text.find(quote, start_pos + 1); 102 } 103 quoted << text.substr(start_pos); 104 105 quoted << quote; 106 return quoted.str(); 107} 108 109 110/// Fills a paragraph to the specified length. 111/// 112/// This preserves any sequence of spaces in the input and any possible 113/// newlines. Sequences of spaces may be split in half (and thus one space is 114/// lost), but the rest of the spaces will be preserved as either trailing or 115/// leading spaces. 116/// 117/// \param input The string to refill. 118/// \param target_width The width to refill the paragraph to. 119/// 120/// \return The refilled paragraph as a sequence of independent lines. 121std::vector< std::string > 122text::refill(const std::string& input, const std::size_t target_width) 123{ 124 std::vector< std::string > output; 125 126 std::string::size_type start = 0; 127 while (start < input.length()) { 128 std::string::size_type width; 129 if (start + target_width >= input.length()) 130 width = input.length() - start; 131 else { 132 if (input[start + target_width] == ' ') { 133 width = target_width; 134 } else { 135 const std::string::size_type pos = input.find_last_of( 136 " ", start + target_width - 1); 137 if (pos == std::string::npos || pos < start + 1) { 138 width = input.find_first_of(" ", start + target_width); 139 if (width == std::string::npos) 140 width = input.length() - start; 141 else 142 width -= start; 143 } else { 144 width = pos - start; 145 } 146 } 147 } 148 INV(width != std::string::npos); 149 INV(start + width <= input.length()); 150 INV(input[start + width] == ' ' || input[start + width] == '\0'); 151 output.push_back(input.substr(start, width)); 152 153 start += width + 1; 154 } 155 156 if (input.empty()) { 157 INV(output.empty()); 158 output.push_back(""); 159 } 160 161 return output; 162} 163 164 165/// Fills a paragraph to the specified length. 166/// 167/// See the documentation for refill() for additional details. 168/// 169/// \param input The string to refill. 170/// \param target_width The width to refill the paragraph to. 171/// 172/// \return The refilled paragraph as a string with embedded newlines. 173std::string 174text::refill_as_string(const std::string& input, const std::size_t target_width) 175{ 176 return join(refill(input, target_width), "\n"); 177} 178 179 180/// Replaces all occurrences of a substring in a string. 181/// 182/// \param input The string in which to perform the replacement. 183/// \param search The pattern to be replaced. 184/// \param replacement The substring to replace search with. 185/// 186/// \return A copy of input with the replacements performed. 187std::string 188text::replace_all(const std::string& input, const std::string& search, 189 const std::string& replacement) 190{ 191 std::string output; 192 193 std::string::size_type pos, lastpos = 0; 194 while ((pos = input.find(search, lastpos)) != std::string::npos) { 195 output += input.substr(lastpos, pos - lastpos); 196 output += replacement; 197 lastpos = pos + search.length(); 198 } 199 output += input.substr(lastpos); 200 201 return output; 202} 203 204 205/// Splits a string into different components. 206/// 207/// \param str The string to split. 208/// \param delimiter The separator to use to split the words. 209/// 210/// \return The different words in the input string as split by the provided 211/// delimiter. 212std::vector< std::string > 213text::split(const std::string& str, const char delimiter) 214{ 215 std::vector< std::string > words; 216 if (!str.empty()) { 217 std::string::size_type pos = str.find(delimiter); 218 words.push_back(str.substr(0, pos)); 219 while (pos != std::string::npos) { 220 ++pos; 221 const std::string::size_type next = str.find(delimiter, pos); 222 words.push_back(str.substr(pos, next - pos)); 223 pos = next; 224 } 225 } 226 return words; 227} 228 229 230/// Converts a string to a boolean. 231/// 232/// \param str The string to convert. 233/// 234/// \return The converted string, if the input string was valid. 235/// 236/// \throw std::value_error If the input string does not represent a valid 237/// boolean value. 238template<> 239bool 240text::to_type(const std::string& str) 241{ 242 if (str == "true") 243 return true; 244 else if (str == "false") 245 return false; 246 else 247 throw value_error(F("Invalid boolean value '%s'") % str); 248} 249 250 251/// Identity function for to_type, for genericity purposes. 252/// 253/// \param str The string to convert. 254/// 255/// \return The input string. 256template<> 257std::string 258text::to_type(const std::string& str) 259{ 260 return str; 261} 262