1219820Sjeff// Copyright 2012 The Kyua Authors. 2219820Sjeff// All rights reserved. 3219820Sjeff// 4219820Sjeff// Redistribution and use in source and binary forms, with or without 5219820Sjeff// modification, are permitted provided that the following conditions are 6219820Sjeff// met: 7219820Sjeff// 8219820Sjeff// * Redistributions of source code must retain the above copyright 9219820Sjeff// notice, this list of conditions and the following disclaimer. 10219820Sjeff// * Redistributions in binary form must reproduce the above copyright 11219820Sjeff// notice, this list of conditions and the following disclaimer in the 12219820Sjeff// documentation and/or other materials provided with the distribution. 13219820Sjeff// * Neither the name of Google Inc. nor the names of its contributors 14219820Sjeff// may be used to endorse or promote products derived from this software 15219820Sjeff// without specific prior written permission. 16219820Sjeff// 17219820Sjeff// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18219820Sjeff// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19219820Sjeff// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20219820Sjeff// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21219820Sjeff// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22219820Sjeff// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23219820Sjeff// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24219820Sjeff// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25219820Sjeff// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26219820Sjeff// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27219820Sjeff// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28219820Sjeff 29219820Sjeff#include "utils/text/operations.ipp" 30219820Sjeff 31219820Sjeff#include <sstream> 32219820Sjeff 33219820Sjeff#include "utils/format/macros.hpp" 34219820Sjeff#include "utils/sanity.hpp" 35219820Sjeff 36219820Sjeffnamespace text = utils::text; 37219820Sjeff 38219820Sjeff 39219820Sjeff/// Replaces XML special characters from an input string. 40219820Sjeff/// 41219820Sjeff/// The list of XML special characters is specified here: 42219820Sjeff/// http://www.w3.org/TR/xml11/#charsets 43219820Sjeff/// 44219820Sjeff/// \param in The input to quote. 45219820Sjeff/// 46219820Sjeff/// \return A quoted string without any XML special characters. 47219820Sjeffstd::string 48219820Sjefftext::escape_xml(const std::string& in) 49219820Sjeff{ 50219820Sjeff std::ostringstream quoted; 51219820Sjeff 52219820Sjeff for (std::string::const_iterator it = in.begin(); 53219820Sjeff it != in.end(); ++it) { 54219820Sjeff unsigned char c = (unsigned char)*it; 55219820Sjeff if (c == '"') { 56219820Sjeff quoted << """; 57219820Sjeff } else if (c == '&') { 58219820Sjeff quoted << "&"; 59219820Sjeff } else if (c == '<') { 60219820Sjeff quoted << "<"; 61219820Sjeff } else if (c == '>') { 62219820Sjeff quoted << ">"; 63219820Sjeff } else if (c == '\'') { 64219820Sjeff quoted << "'"; 65219820Sjeff } else if ((c >= 0x01 && c <= 0x08) || 66219820Sjeff (c >= 0x0B && c <= 0x0C) || 67219820Sjeff (c >= 0x0E && c <= 0x1F) || 68219820Sjeff (c >= 0x7F && c <= 0x84) || 69219820Sjeff (c >= 0x86 && c <= 0x9F)) { 70219820Sjeff // for RestrictedChar characters, escape them 71219820Sjeff // as '&#[decimal ASCII value];' 72219820Sjeff // so that in the XML file we will see the escaped 73219820Sjeff // character. 74219820Sjeff quoted << "&#" << static_cast< std::string::size_type >(*it) 75219820Sjeff << ";"; 76219820Sjeff } else { 77219820Sjeff quoted << *it; 78219820Sjeff } 79219820Sjeff } 80219820Sjeff return quoted.str(); 81219820Sjeff} 82219820Sjeff 83219820Sjeff 84219820Sjeff/// Surrounds a string with quotes, escaping the quote itself if needed. 85219820Sjeff/// 86219820Sjeff/// \param text The string to quote. 87219820Sjeff/// \param quote The quote character to use. 88219820Sjeff/// 89219820Sjeff/// \return The quoted string. 90219820Sjeffstd::string 91219820Sjefftext::quote(const std::string& text, const char quote) 92219820Sjeff{ 93219820Sjeff std::ostringstream quoted; 94219820Sjeff quoted << quote; 95219820Sjeff 96219820Sjeff std::string::size_type start_pos = 0; 97219820Sjeff std::string::size_type last_pos = text.find(quote); 98219820Sjeff while (last_pos != std::string::npos) { 99219820Sjeff quoted << text.substr(start_pos, last_pos - start_pos) << '\\'; 100219820Sjeff start_pos = last_pos; 101219820Sjeff last_pos = text.find(quote, start_pos + 1); 102219820Sjeff } 103219820Sjeff quoted << text.substr(start_pos); 104219820Sjeff 105219820Sjeff quoted << quote; 106219820Sjeff return quoted.str(); 107219820Sjeff} 108219820Sjeff 109219820Sjeff 110219820Sjeff/// Fills a paragraph to the specified length. 111219820Sjeff/// 112219820Sjeff/// This preserves any sequence of spaces in the input and any possible 113219820Sjeff/// newlines. Sequences of spaces may be split in half (and thus one space is 114219820Sjeff/// lost), but the rest of the spaces will be preserved as either trailing or 115219820Sjeff/// leading spaces. 116219820Sjeff/// 117219820Sjeff/// \param input The string to refill. 118219820Sjeff/// \param target_width The width to refill the paragraph to. 119219820Sjeff/// 120219820Sjeff/// \return The refilled paragraph as a sequence of independent lines. 121219820Sjeffstd::vector< std::string > 122219820Sjefftext::refill(const std::string& input, const std::size_t target_width) 123219820Sjeff{ 124219820Sjeff std::vector< std::string > output; 125219820Sjeff 126219820Sjeff std::string::size_type start = 0; 127219820Sjeff while (start < input.length()) { 128219820Sjeff std::string::size_type width; 129219820Sjeff if (start + target_width >= input.length()) 130219820Sjeff width = input.length() - start; 131219820Sjeff else { 132219820Sjeff if (input[start + target_width] == ' ') { 133219820Sjeff width = target_width; 134219820Sjeff } else { 135219820Sjeff const std::string::size_type pos = input.find_last_of( 136219820Sjeff " ", start + target_width - 1); 137219820Sjeff if (pos == std::string::npos || pos < start + 1) { 138219820Sjeff width = input.find_first_of(" ", start + target_width); 139219820Sjeff if (width == std::string::npos) 140219820Sjeff width = input.length() - start; 141219820Sjeff else 142219820Sjeff width -= start; 143219820Sjeff } else { 144219820Sjeff width = pos - start; 145219820Sjeff } 146219820Sjeff } 147219820Sjeff } 148219820Sjeff INV(width != std::string::npos); 149219820Sjeff INV(start + width <= input.length()); 150219820Sjeff INV(input[start + width] == ' ' || input[start + width] == '\0'); 151219820Sjeff output.push_back(input.substr(start, width)); 152219820Sjeff 153219820Sjeff start += width + 1; 154219820Sjeff } 155219820Sjeff 156219820Sjeff if (input.empty()) { 157219820Sjeff INV(output.empty()); 158219820Sjeff output.push_back(""); 159219820Sjeff } 160219820Sjeff 161219820Sjeff return output; 162219820Sjeff} 163219820Sjeff 164219820Sjeff 165219820Sjeff/// Fills a paragraph to the specified length. 166219820Sjeff/// 167219820Sjeff/// See the documentation for refill() for additional details. 168219820Sjeff/// 169219820Sjeff/// \param input The string to refill. 170219820Sjeff/// \param target_width The width to refill the paragraph to. 171219820Sjeff/// 172219820Sjeff/// \return The refilled paragraph as a string with embedded newlines. 173219820Sjeffstd::string 174219820Sjefftext::refill_as_string(const std::string& input, const std::size_t target_width) 175219820Sjeff{ 176219820Sjeff return join(refill(input, target_width), "\n"); 177219820Sjeff} 178219820Sjeff 179219820Sjeff 180219820Sjeff/// Replaces all occurrences of a substring in a string. 181219820Sjeff/// 182219820Sjeff/// \param input The string in which to perform the replacement. 183219820Sjeff/// \param search The pattern to be replaced. 184219820Sjeff/// \param replacement The substring to replace search with. 185219820Sjeff/// 186219820Sjeff/// \return A copy of input with the replacements performed. 187219820Sjeffstd::string 188219820Sjefftext::replace_all(const std::string& input, const std::string& search, 189219820Sjeff const std::string& replacement) 190219820Sjeff{ 191219820Sjeff std::string output; 192219820Sjeff 193219820Sjeff std::string::size_type pos, lastpos = 0; 194219820Sjeff while ((pos = input.find(search, lastpos)) != std::string::npos) { 195219820Sjeff output += input.substr(lastpos, pos - lastpos); 196219820Sjeff output += replacement; 197219820Sjeff lastpos = pos + search.length(); 198219820Sjeff } 199219820Sjeff output += input.substr(lastpos); 200219820Sjeff 201219820Sjeff return output; 202219820Sjeff} 203219820Sjeff 204219820Sjeff 205219820Sjeff/// Splits a string into different components. 206219820Sjeff/// 207219820Sjeff/// \param str The string to split. 208219820Sjeff/// \param delimiter The separator to use to split the words. 209219820Sjeff/// 210219820Sjeff/// \return The different words in the input string as split by the provided 211219820Sjeff/// delimiter. 212219820Sjeffstd::vector< std::string > 213219820Sjefftext::split(const std::string& str, const char delimiter) 214219820Sjeff{ 215219820Sjeff std::vector< std::string > words; 216219820Sjeff if (!str.empty()) { 217219820Sjeff std::string::size_type pos = str.find(delimiter); 218219820Sjeff words.push_back(str.substr(0, pos)); 219219820Sjeff while (pos != std::string::npos) { 220219820Sjeff ++pos; 221219820Sjeff const std::string::size_type next = str.find(delimiter, pos); 222219820Sjeff words.push_back(str.substr(pos, next - pos)); 223219820Sjeff pos = next; 224219820Sjeff } 225219820Sjeff } 226219820Sjeff return words; 227219820Sjeff} 228219820Sjeff 229219820Sjeff 230219820Sjeff/// Converts a string to a boolean. 231219820Sjeff/// 232219820Sjeff/// \param str The string to convert. 233219820Sjeff/// 234219820Sjeff/// \return The converted string, if the input string was valid. 235219820Sjeff/// 236219820Sjeff/// \throw std::value_error If the input string does not represent a valid 237219820Sjeff/// boolean value. 238219820Sjefftemplate<> 239219820Sjeffbool 240219820Sjefftext::to_type(const std::string& str) 241219820Sjeff{ 242219820Sjeff if (str == "true") 243219820Sjeff return true; 244219820Sjeff else if (str == "false") 245219820Sjeff return false; 246219820Sjeff else 247 throw value_error(F("Invalid boolean value '%s'") % str); 248} 249 250 251/// Identity function for to_type, for genericity purposes. 252/// 253/// \param str The string to convert. 254/// 255/// \return The input string. 256template<> 257std::string 258text::to_type(const std::string& str) 259{ 260 return str; 261} 262