1/* 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) 3 * Copyright (C) 2005-2010, 2013-2014 Apple Inc. All rights reserved. 4 * Copyright (C) 2009 Google Inc. All rights reserved. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23#ifndef StringImpl_h 24#define StringImpl_h 25 26#include <limits.h> 27#include <unicode/uchar.h> 28#include <unicode/ustring.h> 29#include <wtf/ASCIICType.h> 30#include <wtf/Forward.h> 31#include <wtf/MathExtras.h> 32#include <wtf/StdLibExtras.h> 33#include <wtf/StringHasher.h> 34#include <wtf/Vector.h> 35#include <wtf/text/ConversionMode.h> 36 37#if USE(CF) 38typedef const struct __CFString * CFStringRef; 39#endif 40 41#ifdef __OBJC__ 42@class NSString; 43#endif 44 45namespace JSC { 46namespace LLInt { class Data; } 47class LLIntOffsetsExtractor; 48} 49 50namespace WTF { 51 52struct CStringTranslator; 53template<typename CharacterType> struct HashAndCharactersTranslator; 54struct HashAndUTF8CharactersTranslator; 55struct LCharBufferTranslator; 56struct CharBufferFromLiteralDataTranslator; 57struct SubstringTranslator; 58struct UCharBufferTranslator; 59template<typename> class RetainPtr; 60 61enum TextCaseSensitivity { 62 TextCaseSensitive, 63 TextCaseInsensitive 64}; 65 66typedef bool (*CharacterMatchFunctionPtr)(UChar); 67typedef bool (*IsWhiteSpaceFunctionPtr)(UChar); 68 69// Define STRING_STATS to turn on run time statistics of string sizes and memory usage 70#undef STRING_STATS 71 72#ifdef STRING_STATS 73struct StringStats { 74 inline void add8BitString(unsigned length, bool isSubString = false) 75 { 76 ++m_totalNumberStrings; 77 ++m_number8BitStrings; 78 if (!isSubString) 79 m_total8BitData += length; 80 } 81 82 inline void add16BitString(unsigned length, bool isSubString = false) 83 { 84 ++m_totalNumberStrings; 85 ++m_number16BitStrings; 86 if (!isSubString) 87 m_total16BitData += length; 88 } 89 90 inline void addUpconvertedString(unsigned length) 91 { 92 ++m_numberUpconvertedStrings; 93 m_totalUpconvertedData += length; 94 } 95 96 void removeString(StringImpl*); 97 void printStats(); 98 99 static const unsigned s_printStringStatsFrequency = 5000; 100 static unsigned s_stringRemovesTillPrintStats; 101 102 unsigned m_totalNumberStrings; 103 unsigned m_number8BitStrings; 104 unsigned m_number16BitStrings; 105 unsigned m_numberUpconvertedStrings; 106 unsigned long long m_total8BitData; 107 unsigned long long m_total16BitData; 108 unsigned long long m_totalUpconvertedData; 109}; 110 111#define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitString(length) 112#define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) StringImpl::stringStats().add8BitString(length, isSubString) 113#define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16BitString(length) 114#define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) StringImpl::stringStats().add16BitString(length, isSubString) 115#define STRING_STATS_ADD_UPCONVERTED_STRING(length) StringImpl::stringStats().addUpconvertedString(length) 116#define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeString(string) 117#else 118#define STRING_STATS_ADD_8BIT_STRING(length) ((void)0) 119#define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) ((void)0) 120#define STRING_STATS_ADD_16BIT_STRING(length) ((void)0) 121#define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) ((void)0) 122#define STRING_STATS_ADD_UPCONVERTED_STRING(length) ((void)0) 123#define STRING_STATS_REMOVE_STRING(string) ((void)0) 124#endif 125 126class StringImpl { 127 WTF_MAKE_NONCOPYABLE(StringImpl); WTF_MAKE_FAST_ALLOCATED; 128 friend struct WTF::CStringTranslator; 129 template<typename CharacterType> friend struct WTF::HashAndCharactersTranslator; 130 friend struct WTF::HashAndUTF8CharactersTranslator; 131 friend struct WTF::CharBufferFromLiteralDataTranslator; 132 friend struct WTF::LCharBufferTranslator; 133 friend struct WTF::SubstringTranslator; 134 friend struct WTF::UCharBufferTranslator; 135 friend class AtomicStringImpl; 136 friend class JSC::LLInt::Data; 137 friend class JSC::LLIntOffsetsExtractor; 138 139private: 140 enum BufferOwnership { 141 BufferInternal, 142 BufferOwned, 143 BufferSubstring, 144 }; 145 146 // Used to construct static strings, which have an special refCount that can never hit zero. 147 // This means that the static string will never be destroyed, which is important because 148 // static strings will be shared across threads & ref-counted in a non-threadsafe manner. 149 friend class NeverDestroyed<StringImpl>; 150 enum ConstructEmptyStringTag { ConstructEmptyString }; 151 StringImpl(ConstructEmptyStringTag) 152 : m_refCount(s_refCountFlagIsStaticString) 153 , m_length(0) 154 , m_data8(reinterpret_cast<const LChar*>(&m_length)) 155 , m_hashAndFlags(s_hashFlag8BitBuffer | s_hashFlagIsAtomic | BufferOwned) 156 { 157 // Ensure that the hash is computed so that AtomicStringHash can call existingHash() 158 // with impunity. The empty string is special because it is never entered into 159 // AtomicString's HashKey, but still needs to compare correctly. 160 STRING_STATS_ADD_8BIT_STRING(m_length); 161 162 hash(); 163 } 164 165 // FIXME: there has to be a less hacky way to do this. 166 enum Force8Bit { Force8BitConstructor }; 167 // Create a normal 8-bit string with internal storage (BufferInternal) 168 StringImpl(unsigned length, Force8Bit) 169 : m_refCount(s_refCountIncrement) 170 , m_length(length) 171 , m_data8(tailPointer<LChar>()) 172 , m_hashAndFlags(s_hashFlag8BitBuffer | BufferInternal) 173 { 174 ASSERT(m_data8); 175 ASSERT(m_length); 176 177 STRING_STATS_ADD_8BIT_STRING(m_length); 178 } 179 180 // Create a normal 16-bit string with internal storage (BufferInternal) 181 StringImpl(unsigned length) 182 : m_refCount(s_refCountIncrement) 183 , m_length(length) 184 , m_data16(tailPointer<UChar>()) 185 , m_hashAndFlags(BufferInternal) 186 { 187 ASSERT(m_data16); 188 ASSERT(m_length); 189 190 STRING_STATS_ADD_16BIT_STRING(m_length); 191 } 192 193 // Create a StringImpl adopting ownership of the provided buffer (BufferOwned) 194 StringImpl(MallocPtr<LChar> characters, unsigned length) 195 : m_refCount(s_refCountIncrement) 196 , m_length(length) 197 , m_data8(characters.leakPtr()) 198 , m_hashAndFlags(s_hashFlag8BitBuffer | BufferOwned) 199 { 200 ASSERT(m_data8); 201 ASSERT(m_length); 202 203 STRING_STATS_ADD_8BIT_STRING(m_length); 204 } 205 206 enum ConstructWithoutCopyingTag { ConstructWithoutCopying }; 207 StringImpl(const UChar* characters, unsigned length, ConstructWithoutCopyingTag) 208 : m_refCount(s_refCountIncrement) 209 , m_length(length) 210 , m_data16(characters) 211 , m_hashAndFlags(BufferInternal) 212 { 213 ASSERT(m_data16); 214 ASSERT(m_length); 215 216 STRING_STATS_ADD_16BIT_STRING(0); 217 } 218 219 StringImpl(const LChar* characters, unsigned length, ConstructWithoutCopyingTag) 220 : m_refCount(s_refCountIncrement) 221 , m_length(length) 222 , m_data8(characters) 223 , m_hashAndFlags(s_hashFlag8BitBuffer | BufferInternal) 224 { 225 ASSERT(m_data8); 226 ASSERT(m_length); 227 228 STRING_STATS_ADD_8BIT_STRING(0); 229 } 230 231 // Create a StringImpl adopting ownership of the provided buffer (BufferOwned) 232 StringImpl(MallocPtr<UChar> characters, unsigned length) 233 : m_refCount(s_refCountIncrement) 234 , m_length(length) 235 , m_data16(characters.leakPtr()) 236 , m_hashAndFlags(BufferOwned) 237 { 238 ASSERT(m_data16); 239 ASSERT(m_length); 240 241 STRING_STATS_ADD_16BIT_STRING(m_length); 242 } 243 244 // Used to create new strings that are a substring of an existing 8-bit StringImpl (BufferSubstring) 245 StringImpl(const LChar* characters, unsigned length, PassRefPtr<StringImpl> base) 246 : m_refCount(s_refCountIncrement) 247 , m_length(length) 248 , m_data8(characters) 249 , m_hashAndFlags(s_hashFlag8BitBuffer | BufferSubstring) 250 { 251 ASSERT(is8Bit()); 252 ASSERT(m_data8); 253 ASSERT(m_length); 254 ASSERT(base->bufferOwnership() != BufferSubstring); 255 256 substringBuffer() = base.leakRef(); 257 258 STRING_STATS_ADD_8BIT_STRING2(m_length, true); 259 } 260 261 // Used to create new strings that are a substring of an existing 16-bit StringImpl (BufferSubstring) 262 StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl> base) 263 : m_refCount(s_refCountIncrement) 264 , m_length(length) 265 , m_data16(characters) 266 , m_hashAndFlags(BufferSubstring) 267 { 268 ASSERT(!is8Bit()); 269 ASSERT(m_data16); 270 ASSERT(m_length); 271 ASSERT(base->bufferOwnership() != BufferSubstring); 272 273 substringBuffer() = base.leakRef(); 274 275 STRING_STATS_ADD_16BIT_STRING2(m_length, true); 276 } 277 278 enum CreateEmptyUniqueTag { CreateEmptyUnique }; 279 StringImpl(CreateEmptyUniqueTag) 280 : m_refCount(s_refCountIncrement) 281 , m_length(0) 282 // We expect m_length to be initialized to 0 as we use it 283 // to represent a null terminated buffer. 284 , m_data8(reinterpret_cast<const LChar*>(&m_length)) 285 , m_hashAndFlags(hashAndFlagsForEmptyUnique()) 286 { 287 ASSERT(m_data8); 288 289 STRING_STATS_ADD_8BIT_STRING(m_length); 290 } 291 292 ~StringImpl(); 293 294public: 295 WTF_EXPORT_STRING_API static void destroy(StringImpl*); 296 297 WTF_EXPORT_STRING_API static PassRef<StringImpl> create(const UChar*, unsigned length); 298 WTF_EXPORT_STRING_API static PassRef<StringImpl> create(const LChar*, unsigned length); 299 WTF_EXPORT_STRING_API static PassRef<StringImpl> create8BitIfPossible(const UChar*, unsigned length); 300 template<size_t inlineCapacity> 301 static PassRef<StringImpl> create8BitIfPossible(const Vector<UChar, inlineCapacity>& vector) 302 { 303 return create8BitIfPossible(vector.data(), vector.size()); 304 } 305 WTF_EXPORT_STRING_API static PassRef<StringImpl> create8BitIfPossible(const UChar*); 306 307 ALWAYS_INLINE static PassRef<StringImpl> create(const char* s, unsigned length) { return create(reinterpret_cast<const LChar*>(s), length); } 308 WTF_EXPORT_STRING_API static PassRef<StringImpl> create(const LChar*); 309 ALWAYS_INLINE static PassRef<StringImpl> create(const char* s) { return create(reinterpret_cast<const LChar*>(s)); } 310 311 static ALWAYS_INLINE PassRef<StringImpl> createSubstringSharingImpl8(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length) 312 { 313 ASSERT(rep); 314 ASSERT(length <= rep->length()); 315 316 if (!length) 317 return *empty(); 318 319 ASSERT(rep->is8Bit()); 320 StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->substringBuffer() : rep.get(); 321 322 // We allocate a buffer that contains both the StringImpl struct as well as the pointer to the owner string. 323 StringImpl* stringImpl = static_cast<StringImpl*>(fastMalloc(allocationSize<StringImpl*>(1))); 324 return adoptRef(*new (NotNull, stringImpl) StringImpl(rep->m_data8 + offset, length, ownerRep)); 325 } 326 327 static ALWAYS_INLINE PassRef<StringImpl> createSubstringSharingImpl(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length) 328 { 329 ASSERT(rep); 330 ASSERT(length <= rep->length()); 331 332 if (!length) 333 return *empty(); 334 335 StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->substringBuffer() : rep.get(); 336 337 // We allocate a buffer that contains both the StringImpl struct as well as the pointer to the owner string. 338 StringImpl* stringImpl = static_cast<StringImpl*>(fastMalloc(allocationSize<StringImpl*>(1))); 339 if (rep->is8Bit()) 340 return adoptRef(*new (NotNull, stringImpl) StringImpl(rep->m_data8 + offset, length, ownerRep)); 341 return adoptRef(*new (NotNull, stringImpl) StringImpl(rep->m_data16 + offset, length, ownerRep)); 342 } 343 344 template<unsigned charactersCount> 345 ALWAYS_INLINE static PassRef<StringImpl> createFromLiteral(const char (&characters)[charactersCount]) 346 { 347 COMPILE_ASSERT(charactersCount > 1, StringImplFromLiteralNotEmpty); 348 COMPILE_ASSERT((charactersCount - 1 <= ((unsigned(~0) - sizeof(StringImpl)) / sizeof(LChar))), StringImplFromLiteralCannotOverflow); 349 350 return createWithoutCopying(reinterpret_cast<const LChar*>(characters), charactersCount - 1); 351 } 352 353 // FIXME: Transition off of these functions to createWithoutCopying instead. 354 WTF_EXPORT_STRING_API static PassRef<StringImpl> createFromLiteral(const char* characters, unsigned length); 355 WTF_EXPORT_STRING_API static PassRef<StringImpl> createFromLiteral(const char* characters); 356 357 WTF_EXPORT_STRING_API static PassRef<StringImpl> createWithoutCopying(const UChar* characters, unsigned length); 358 WTF_EXPORT_STRING_API static PassRef<StringImpl> createWithoutCopying(const LChar* characters, unsigned length); 359 360 WTF_EXPORT_STRING_API static PassRef<StringImpl> createUninitialized(unsigned length, LChar*& data); 361 WTF_EXPORT_STRING_API static PassRef<StringImpl> createUninitialized(unsigned length, UChar*& data); 362 template <typename T> static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, T*& output) 363 { 364 if (!length) { 365 output = 0; 366 return empty(); 367 } 368 369 if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(T))) { 370 output = 0; 371 return 0; 372 } 373 StringImpl* resultImpl; 374 if (!tryFastMalloc(allocationSize<T>(length)).getValue(resultImpl)) { 375 output = 0; 376 return 0; 377 } 378 output = resultImpl->tailPointer<T>(); 379 380 return constructInternal<T>(resultImpl, length); 381 } 382 383 static PassRef<StringImpl> createEmptyUnique() 384 { 385 return adoptRef(*new StringImpl(CreateEmptyUnique)); 386 } 387 388 // Reallocate the StringImpl. The originalString must be only owned by the PassRefPtr, 389 // and the buffer ownership must be BufferInternal. Just like the input pointer of realloc(), 390 // the originalString can't be used after this function. 391 static PassRef<StringImpl> reallocate(PassRefPtr<StringImpl> originalString, unsigned length, LChar*& data); 392 static PassRef<StringImpl> reallocate(PassRefPtr<StringImpl> originalString, unsigned length, UChar*& data); 393 394 static unsigned flagsOffset() { return OBJECT_OFFSETOF(StringImpl, m_hashAndFlags); } 395 static unsigned flagIs8Bit() { return s_hashFlag8BitBuffer; } 396 static unsigned flagIsAtomic() { return s_hashFlagIsAtomic; } 397 static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data8); } 398 399 template<typename CharType, size_t inlineCapacity, typename OverflowHandler> 400 static PassRef<StringImpl> adopt(Vector<CharType, inlineCapacity, OverflowHandler>& vector) 401 { 402 if (size_t size = vector.size()) { 403 ASSERT(vector.data()); 404 if (size > std::numeric_limits<unsigned>::max()) 405 CRASH(); 406 return adoptRef(*new StringImpl(vector.releaseBuffer(), size)); 407 } 408 return *empty(); 409 } 410 411 WTF_EXPORT_STRING_API static PassRef<StringImpl> adopt(StringBuffer<UChar>&); 412 WTF_EXPORT_STRING_API static PassRef<StringImpl> adopt(StringBuffer<LChar>&); 413 414 unsigned length() const { return m_length; } 415 bool is8Bit() const { return m_hashAndFlags & s_hashFlag8BitBuffer; } 416 417 ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return m_data8; } 418 ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return m_data16; } 419 420 template <typename CharType> 421 ALWAYS_INLINE const CharType *characters() const; 422 423 size_t cost() const 424 { 425 // For substrings, return the cost of the base string. 426 if (bufferOwnership() == BufferSubstring) 427 return substringBuffer()->cost(); 428 429 if (m_hashAndFlags & s_hashFlagDidReportCost) 430 return 0; 431 432 m_hashAndFlags |= s_hashFlagDidReportCost; 433 size_t result = m_length; 434 if (!is8Bit()) 435 result <<= 1; 436 return result; 437 } 438 439 size_t costDuringGC() 440 { 441 if (isStatic()) 442 return 0; 443 444 if (bufferOwnership() == BufferSubstring) 445 return divideRoundedUp(substringBuffer()->costDuringGC(), refCount()); 446 447 size_t result = m_length; 448 if (!is8Bit()) 449 result <<= 1; 450 return divideRoundedUp(result, refCount()); 451 } 452 453 WTF_EXPORT_STRING_API size_t sizeInBytes() const; 454 455 bool isEmptyUnique() const 456 { 457 return !length() && !isStatic(); 458 } 459 460 bool isAtomic() const { return m_hashAndFlags & s_hashFlagIsAtomic; } 461 void setIsAtomic(bool isAtomic) 462 { 463 ASSERT(!isStatic()); 464 ASSERT(!isEmptyUnique()); 465 if (isAtomic) 466 m_hashAndFlags |= s_hashFlagIsAtomic; 467 else 468 m_hashAndFlags &= ~s_hashFlagIsAtomic; 469 } 470 471#ifdef STRING_STATS 472 bool isSubString() const { return bufferOwnership() == BufferSubstring; } 473#endif 474 475 static WTF_EXPORT_STRING_API CString utf8ForCharacters(const UChar* characters, unsigned length, ConversionMode = LenientConversion); 476 WTF_EXPORT_STRING_API CString utf8ForRange(unsigned offset, unsigned length, ConversionMode = LenientConversion) const; 477 WTF_EXPORT_STRING_API CString utf8(ConversionMode = LenientConversion) const; 478 479private: 480 static WTF_EXPORT_STRING_API bool utf8Impl(const UChar* characters, unsigned length, char*& buffer, size_t bufferSize, ConversionMode); 481 482 // The high bits of 'hash' are always empty, but we prefer to store our flags 483 // in the low bits because it makes them slightly more efficient to access. 484 // So, we shift left and right when setting and getting our hash code. 485 void setHash(unsigned hash) const 486 { 487 ASSERT(!hasHash()); 488 // Multiple clients assume that StringHasher is the canonical string hash function. 489 ASSERT(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(m_data8, m_length) : StringHasher::computeHashAndMaskTop8Bits(m_data16, m_length))); 490 ASSERT(!(hash & (s_flagMask << (8 * sizeof(hash) - s_flagCount)))); // Verify that enough high bits are empty. 491 492 hash <<= s_flagCount; 493 ASSERT(!(hash & m_hashAndFlags)); // Verify that enough low bits are empty after shift. 494 ASSERT(hash); // Verify that 0 is a valid sentinel hash value. 495 496 m_hashAndFlags |= hash; // Store hash with flags in low bits. 497 } 498 499 unsigned rawHash() const 500 { 501 return m_hashAndFlags >> s_flagCount; 502 } 503 504public: 505 bool hasHash() const 506 { 507 return rawHash() != 0; 508 } 509 510 unsigned existingHash() const 511 { 512 ASSERT(hasHash()); 513 return rawHash(); 514 } 515 516 unsigned hash() const 517 { 518 if (hasHash()) 519 return existingHash(); 520 return hashSlowCase(); 521 } 522 523 bool isStatic() const { return m_refCount & s_refCountFlagIsStaticString; } 524 525 inline size_t refCount() const 526 { 527 return m_refCount / s_refCountIncrement; 528 } 529 530 inline bool hasOneRef() const 531 { 532 return m_refCount == s_refCountIncrement; 533 } 534 535 // This method is useful for assertions. 536 inline bool hasAtLeastOneRef() const 537 { 538 return !!m_refCount; 539 } 540 541 inline void ref() 542 { 543 ASSERT(!isCompilationThread()); 544 m_refCount += s_refCountIncrement; 545 } 546 547 inline void deref() 548 { 549 ASSERT(!isCompilationThread()); 550 unsigned tempRefCount = m_refCount - s_refCountIncrement; 551 if (!tempRefCount) { 552 StringImpl::destroy(this); 553 return; 554 } 555 m_refCount = tempRefCount; 556 } 557 558 WTF_EXPORT_PRIVATE static StringImpl* empty(); 559 560 // FIXME: Does this really belong in StringImpl? 561 template <typename T> static void copyChars(T* destination, const T* source, unsigned numCharacters) 562 { 563 if (numCharacters == 1) { 564 *destination = *source; 565 return; 566 } 567 568 if (numCharacters <= s_copyCharsInlineCutOff) { 569 unsigned i = 0; 570#if (CPU(X86) || CPU(X86_64)) 571 const unsigned charsPerInt = sizeof(uint32_t) / sizeof(T); 572 573 if (numCharacters > charsPerInt) { 574 unsigned stopCount = numCharacters & ~(charsPerInt - 1); 575 576 const uint32_t* srcCharacters = reinterpret_cast<const uint32_t*>(source); 577 uint32_t* destCharacters = reinterpret_cast<uint32_t*>(destination); 578 for (unsigned j = 0; i < stopCount; i += charsPerInt, ++j) 579 destCharacters[j] = srcCharacters[j]; 580 } 581#endif 582 for (; i < numCharacters; ++i) 583 destination[i] = source[i]; 584 } else 585 memcpy(destination, source, numCharacters * sizeof(T)); 586 } 587 588 ALWAYS_INLINE static void copyChars(UChar* destination, const LChar* source, unsigned numCharacters) 589 { 590 for (unsigned i = 0; i < numCharacters; ++i) 591 destination[i] = source[i]; 592 } 593 594 // Some string features, like refcounting and the atomicity flag, are not 595 // thread-safe. We achieve thread safety by isolation, giving each thread 596 // its own copy of the string. 597 PassRef<StringImpl> isolatedCopy() const; 598 599 WTF_EXPORT_STRING_API PassRef<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX); 600 601 UChar at(unsigned i) const 602 { 603 ASSERT_WITH_SECURITY_IMPLICATION(i < m_length); 604 if (is8Bit()) 605 return m_data8[i]; 606 return m_data16[i]; 607 } 608 UChar operator[](unsigned i) const { return at(i); } 609 WTF_EXPORT_STRING_API UChar32 characterStartingAt(unsigned); 610 611 WTF_EXPORT_STRING_API bool containsOnlyWhitespace(); 612 613 int toIntStrict(bool* ok = 0, int base = 10); 614 unsigned toUIntStrict(bool* ok = 0, int base = 10); 615 int64_t toInt64Strict(bool* ok = 0, int base = 10); 616 uint64_t toUInt64Strict(bool* ok = 0, int base = 10); 617 intptr_t toIntPtrStrict(bool* ok = 0, int base = 10); 618 619 WTF_EXPORT_STRING_API int toInt(bool* ok = 0); // ignores trailing garbage 620 unsigned toUInt(bool* ok = 0); // ignores trailing garbage 621 int64_t toInt64(bool* ok = 0); // ignores trailing garbage 622 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage 623 intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage 624 625 // FIXME: Like the strict functions above, these give false for "ok" when there is trailing garbage. 626 // Like the non-strict functions above, these return the value when there is trailing garbage. 627 // It would be better if these were more consistent with the above functions instead. 628 double toDouble(bool* ok = 0); 629 float toFloat(bool* ok = 0); 630 631 WTF_EXPORT_STRING_API PassRef<StringImpl> convertToASCIILowercase(); 632 WTF_EXPORT_STRING_API PassRef<StringImpl> lower(); 633 WTF_EXPORT_STRING_API PassRef<StringImpl> upper(); 634 WTF_EXPORT_STRING_API PassRef<StringImpl> lower(const AtomicString& localeIdentifier); 635 WTF_EXPORT_STRING_API PassRef<StringImpl> upper(const AtomicString& localeIdentifier); 636 637 WTF_EXPORT_STRING_API PassRef<StringImpl> fill(UChar); 638 // FIXME: Do we need fill(char) or can we just do the right thing if UChar is ASCII? 639 640 PassRef<StringImpl> foldCase(); 641 642 PassRef<StringImpl> stripWhiteSpace(); 643 PassRef<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr); 644 WTF_EXPORT_STRING_API PassRef<StringImpl> simplifyWhiteSpace(); 645 PassRef<StringImpl> simplifyWhiteSpace(IsWhiteSpaceFunctionPtr); 646 647 PassRef<StringImpl> removeCharacters(CharacterMatchFunctionPtr); 648 template <typename CharType> 649 ALWAYS_INLINE PassRef<StringImpl> removeCharacters(const CharType* characters, CharacterMatchFunctionPtr); 650 651 size_t find(LChar character, unsigned start = 0); 652 size_t find(char character, unsigned start = 0); 653 size_t find(UChar character, unsigned start = 0); 654 WTF_EXPORT_STRING_API size_t find(CharacterMatchFunctionPtr, unsigned index = 0); 655 size_t find(const LChar*, unsigned index = 0); 656 ALWAYS_INLINE size_t find(const char* s, unsigned index = 0) { return find(reinterpret_cast<const LChar*>(s), index); } 657 WTF_EXPORT_STRING_API size_t find(StringImpl*); 658 WTF_EXPORT_STRING_API size_t find(StringImpl*, unsigned index); 659 size_t findIgnoringCase(const LChar*, unsigned index = 0); 660 ALWAYS_INLINE size_t findIgnoringCase(const char* s, unsigned index = 0) { return findIgnoringCase(reinterpret_cast<const LChar*>(s), index); } 661 WTF_EXPORT_STRING_API size_t findIgnoringCase(StringImpl*, unsigned index = 0); 662 663 WTF_EXPORT_STRING_API size_t findNextLineStart(unsigned index = UINT_MAX); 664 665 WTF_EXPORT_STRING_API size_t reverseFind(UChar, unsigned index = UINT_MAX); 666 WTF_EXPORT_STRING_API size_t reverseFind(StringImpl*, unsigned index = UINT_MAX); 667 WTF_EXPORT_STRING_API size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX); 668 669 WTF_EXPORT_STRING_API bool startsWith(const StringImpl*) const; 670 bool startsWith(StringImpl* str, bool caseSensitive) { return caseSensitive ? startsWith(str) : (reverseFindIgnoringCase(str, 0) == 0); } 671 WTF_EXPORT_STRING_API bool startsWith(UChar) const; 672 WTF_EXPORT_STRING_API bool startsWith(const char*, unsigned matchLength, bool caseSensitive) const; 673 template<unsigned matchLength> 674 bool startsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return startsWith(prefix, matchLength - 1, caseSensitive); } 675 676 WTF_EXPORT_STRING_API bool endsWith(StringImpl*, bool caseSensitive = true); 677 WTF_EXPORT_STRING_API bool endsWith(UChar) const; 678 WTF_EXPORT_STRING_API bool endsWith(const char*, unsigned matchLength, bool caseSensitive) const; 679 template<unsigned matchLength> 680 bool endsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return endsWith(prefix, matchLength - 1, caseSensitive); } 681 682 WTF_EXPORT_STRING_API PassRef<StringImpl> replace(UChar, UChar); 683 WTF_EXPORT_STRING_API PassRef<StringImpl> replace(UChar, StringImpl*); 684 ALWAYS_INLINE PassRef<StringImpl> replace(UChar pattern, const char* replacement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<const LChar*>(replacement), replacementLength); } 685 WTF_EXPORT_STRING_API PassRef<StringImpl> replace(UChar, const LChar*, unsigned replacementLength); 686 PassRef<StringImpl> replace(UChar, const UChar*, unsigned replacementLength); 687 WTF_EXPORT_STRING_API PassRef<StringImpl> replace(StringImpl*, StringImpl*); 688 WTF_EXPORT_STRING_API PassRef<StringImpl> replace(unsigned index, unsigned len, StringImpl*); 689 690 WTF_EXPORT_STRING_API UCharDirection defaultWritingDirection(bool* hasStrongDirectionality = nullptr); 691 692#if USE(CF) 693 RetainPtr<CFStringRef> createCFString(); 694#endif 695#ifdef __OBJC__ 696 WTF_EXPORT_STRING_API operator NSString*(); 697#endif 698 699#ifdef STRING_STATS 700 ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; } 701#endif 702 703 WTF_EXPORT_STRING_API static const UChar latin1CaseFoldTable[256]; 704 705private: 706 bool requiresCopy() const 707 { 708 if (bufferOwnership() != BufferInternal) 709 return true; 710 711 if (is8Bit()) 712 return m_data8 == tailPointer<LChar>(); 713 return m_data16 == tailPointer<UChar>(); 714 } 715 716 template<typename T> 717 static size_t allocationSize(unsigned tailElementCount) 718 { 719 return tailOffset<T>() + tailElementCount * sizeof(T); 720 } 721 722 template<typename T> 723 static ptrdiff_t tailOffset() 724 { 725#if COMPILER(MSVC) 726 // MSVC doesn't support alignof yet. 727 return roundUpToMultipleOf<sizeof(T)>(sizeof(StringImpl)); 728#else 729 return roundUpToMultipleOf<alignof(T)>(offsetof(StringImpl, m_hashAndFlags) + sizeof(StringImpl::m_hashAndFlags)); 730#endif 731 } 732 733 template<typename T> 734 const T* tailPointer() const 735 { 736 return reinterpret_cast<const T*>(reinterpret_cast<const uint8_t*>(this) + tailOffset<T>()); 737 } 738 739 template<typename T> 740 T* tailPointer() 741 { 742 return reinterpret_cast<T*>(reinterpret_cast<uint8_t*>(this) + tailOffset<T>()); 743 } 744 745 StringImpl* const& substringBuffer() const 746 { 747 ASSERT(bufferOwnership() == BufferSubstring); 748 749 return *tailPointer<StringImpl*>(); 750 } 751 752 StringImpl*& substringBuffer() 753 { 754 ASSERT(bufferOwnership() == BufferSubstring); 755 756 return *tailPointer<StringImpl*>(); 757 } 758 759 // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings. 760 static const unsigned s_copyCharsInlineCutOff = 20; 761 762 BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_hashAndFlags & s_hashMaskBufferOwnership); } 763 template <class UCharPredicate> PassRef<StringImpl> stripMatchedCharacters(UCharPredicate); 764 template <typename CharType, class UCharPredicate> PassRef<StringImpl> simplifyMatchedCharactersToSpace(UCharPredicate); 765 template <typename CharType> static PassRef<StringImpl> constructInternal(StringImpl*, unsigned); 766 template <typename CharType> static PassRef<StringImpl> createUninitializedInternal(unsigned, CharType*&); 767 template <typename CharType> static PassRef<StringImpl> createUninitializedInternalNonEmpty(unsigned, CharType*&); 768 template <typename CharType> static PassRef<StringImpl> reallocateInternal(PassRefPtr<StringImpl>, unsigned, CharType*&); 769 template <typename CharType> static PassRef<StringImpl> createInternal(const CharType*, unsigned); 770 WTF_EXPORT_PRIVATE NEVER_INLINE unsigned hashSlowCase() const; 771 WTF_EXPORT_PRIVATE unsigned hashAndFlagsForEmptyUnique(); 772 773 // The bottom bit in the ref count indicates a static (immortal) string. 774 static const unsigned s_refCountFlagIsStaticString = 0x1; 775 static const unsigned s_refCountIncrement = 0x2; // This allows us to ref / deref without disturbing the static string flag. 776 777 // The bottom 6 bits in the hash are flags. 778 static const unsigned s_flagCount = 6; 779 static const unsigned s_flagMask = (1u << s_flagCount) - 1; 780 COMPILE_ASSERT(s_flagCount <= StringHasher::flagCount, StringHasher_reserves_enough_bits_for_StringImpl_flags); 781 782 static const unsigned s_hashFlag8BitBuffer = 1u << 5; 783 static const unsigned s_hashFlagIsAtomic = 1u << 4; 784 static const unsigned s_hashFlagDidReportCost = 1u << 3; 785 static const unsigned s_hashMaskBufferOwnership = 1u | (1u << 1); 786 787#ifdef STRING_STATS 788 WTF_EXPORTDATA static StringStats m_stringStats; 789#endif 790 791public: 792 struct StaticASCIILiteral { 793 // These member variables must match the layout of StringImpl. 794 unsigned m_refCount; 795 unsigned m_length; 796 const LChar* m_data8; 797 unsigned m_hashAndFlags; 798 799 // These values mimic ConstructFromLiteral. 800 static const unsigned s_initialRefCount = s_refCountIncrement; 801 static const unsigned s_initialFlags = s_hashFlag8BitBuffer | BufferInternal; 802 static const unsigned s_hashShift = s_flagCount; 803 }; 804 805#ifndef NDEBUG 806 void assertHashIsCorrect() 807 { 808 ASSERT(hasHash()); 809 ASSERT(existingHash() == StringHasher::computeHashAndMaskTop8Bits(characters8(), length())); 810 } 811#endif 812 813private: 814 // These member variables must match the layout of StaticASCIILiteral. 815 unsigned m_refCount; 816 unsigned m_length; 817 union { 818 const LChar* m_data8; 819 const UChar* m_data16; 820 }; 821 mutable unsigned m_hashAndFlags; 822}; 823 824static_assert(sizeof(StringImpl) == sizeof(StringImpl::StaticASCIILiteral), ""); 825 826#if !ASSERT_DISABLED 827// StringImpls created from StaticASCIILiteral will ASSERT 828// in the generic ValueCheck<T>::checkConsistency 829// as they are not allocated by fastMalloc. 830// We don't currently have any way to detect that case 831// so we ignore the consistency check for all StringImpl*. 832template<> struct 833ValueCheck<StringImpl*> { 834 static void checkConsistency(const StringImpl*) { } 835}; 836#endif 837 838template <> 839ALWAYS_INLINE PassRef<StringImpl> StringImpl::constructInternal<LChar>(StringImpl* impl, unsigned length) { return adoptRef(*new (NotNull, impl) StringImpl(length, Force8BitConstructor)); } 840template <> 841ALWAYS_INLINE PassRef<StringImpl> StringImpl::constructInternal<UChar>(StringImpl* impl, unsigned length) { return adoptRef(*new (NotNull, impl) StringImpl(length)); } 842 843template <> 844ALWAYS_INLINE const LChar* StringImpl::characters<LChar>() const { return characters8(); } 845 846template <> 847ALWAYS_INLINE const UChar* StringImpl::characters<UChar>() const { return characters16(); } 848 849WTF_EXPORT_STRING_API bool equal(const StringImpl*, const StringImpl*); 850WTF_EXPORT_STRING_API bool equal(const StringImpl*, const LChar*); 851inline bool equal(const StringImpl* a, const char* b) { return equal(a, reinterpret_cast<const LChar*>(b)); } 852WTF_EXPORT_STRING_API bool equal(const StringImpl*, const LChar*, unsigned); 853WTF_EXPORT_STRING_API bool equal(const StringImpl*, const UChar*, unsigned); 854inline bool equal(const StringImpl* a, const char* b, unsigned length) { return equal(a, reinterpret_cast<const LChar*>(b), length); } 855inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); } 856inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_cast<const LChar*>(a)); } 857WTF_EXPORT_STRING_API bool equal(const StringImpl& a, const StringImpl& b); 858 859template<typename T> 860inline T loadUnaligned(const char* s) 861{ 862#if COMPILER(CLANG) 863 T tmp; 864 memcpy(&tmp, s, sizeof(T)); 865 return tmp; 866#else 867 // This may result in undefined behavior due to unaligned access. 868 return *reinterpret_cast<const T*>(s); 869#endif 870} 871 872// Do comparisons 8 or 4 bytes-at-a-time on architectures where it's safe. 873#if CPU(X86_64) || CPU(ARM64) 874ALWAYS_INLINE bool equal(const LChar* aLChar, const LChar* bLChar, unsigned length) 875{ 876 unsigned dwordLength = length >> 3; 877 878 const char* a = reinterpret_cast<const char*>(aLChar); 879 const char* b = reinterpret_cast<const char*>(bLChar); 880 881 if (dwordLength) { 882 for (unsigned i = 0; i != dwordLength; ++i) { 883 if (loadUnaligned<uint64_t>(a) != loadUnaligned<uint64_t>(b)) 884 return false; 885 886 a += sizeof(uint64_t); 887 b += sizeof(uint64_t); 888 } 889 } 890 891 if (length & 4) { 892 if (loadUnaligned<uint32_t>(a) != loadUnaligned<uint32_t>(b)) 893 return false; 894 895 a += sizeof(uint32_t); 896 b += sizeof(uint32_t); 897 } 898 899 if (length & 2) { 900 if (loadUnaligned<uint16_t>(a) != loadUnaligned<uint16_t>(b)) 901 return false; 902 903 a += sizeof(uint16_t); 904 b += sizeof(uint16_t); 905 } 906 907 if (length & 1 && (*reinterpret_cast<const LChar*>(a) != *reinterpret_cast<const LChar*>(b))) 908 return false; 909 910 return true; 911} 912 913ALWAYS_INLINE bool equal(const UChar* aUChar, const UChar* bUChar, unsigned length) 914{ 915 unsigned dwordLength = length >> 2; 916 917 const char* a = reinterpret_cast<const char*>(aUChar); 918 const char* b = reinterpret_cast<const char*>(bUChar); 919 920 if (dwordLength) { 921 for (unsigned i = 0; i != dwordLength; ++i) { 922 if (loadUnaligned<uint64_t>(a) != loadUnaligned<uint64_t>(b)) 923 return false; 924 925 a += sizeof(uint64_t); 926 b += sizeof(uint64_t); 927 } 928 } 929 930 if (length & 2) { 931 if (loadUnaligned<uint32_t>(a) != loadUnaligned<uint32_t>(b)) 932 return false; 933 934 a += sizeof(uint32_t); 935 b += sizeof(uint32_t); 936 } 937 938 if (length & 1 && (*reinterpret_cast<const UChar*>(a) != *reinterpret_cast<const UChar*>(b))) 939 return false; 940 941 return true; 942} 943#elif CPU(X86) 944ALWAYS_INLINE bool equal(const LChar* aLChar, const LChar* bLChar, unsigned length) 945{ 946 const char* a = reinterpret_cast<const char*>(aLChar); 947 const char* b = reinterpret_cast<const char*>(bLChar); 948 949 unsigned wordLength = length >> 2; 950 for (unsigned i = 0; i != wordLength; ++i) { 951 if (loadUnaligned<uint32_t>(a) != loadUnaligned<uint32_t>(b)) 952 return false; 953 a += sizeof(uint32_t); 954 b += sizeof(uint32_t); 955 } 956 957 length &= 3; 958 959 if (length) { 960 const LChar* aRemainder = reinterpret_cast<const LChar*>(a); 961 const LChar* bRemainder = reinterpret_cast<const LChar*>(b); 962 963 for (unsigned i = 0; i < length; ++i) { 964 if (aRemainder[i] != bRemainder[i]) 965 return false; 966 } 967 } 968 969 return true; 970} 971 972ALWAYS_INLINE bool equal(const UChar* aUChar, const UChar* bUChar, unsigned length) 973{ 974 const char* a = reinterpret_cast<const char*>(aUChar); 975 const char* b = reinterpret_cast<const char*>(bUChar); 976 977 unsigned wordLength = length >> 1; 978 for (unsigned i = 0; i != wordLength; ++i) { 979 if (loadUnaligned<uint32_t>(a) != loadUnaligned<uint32_t>(b)) 980 return false; 981 a += sizeof(uint32_t); 982 b += sizeof(uint32_t); 983 } 984 985 if (length & 1 && *reinterpret_cast<const UChar*>(a) != *reinterpret_cast<const UChar*>(b)) 986 return false; 987 988 return true; 989} 990#elif PLATFORM(IOS) && WTF_ARM_ARCH_AT_LEAST(7) 991ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) 992{ 993 bool isEqual = false; 994 uint32_t aValue; 995 uint32_t bValue; 996 asm("subs %[length], #4\n" 997 "blo 2f\n" 998 999 "0:\n" // Label 0 = Start of loop over 32 bits. 1000 "ldr %[aValue], [%[a]], #4\n" 1001 "ldr %[bValue], [%[b]], #4\n" 1002 "cmp %[aValue], %[bValue]\n" 1003 "bne 66f\n" 1004 "subs %[length], #4\n" 1005 "bhs 0b\n" 1006 1007 // At this point, length can be: 1008 // -0: 00000000000000000000000000000000 (0 bytes left) 1009 // -1: 11111111111111111111111111111111 (3 bytes left) 1010 // -2: 11111111111111111111111111111110 (2 bytes left) 1011 // -3: 11111111111111111111111111111101 (1 byte left) 1012 // -4: 11111111111111111111111111111100 (length was 0) 1013 // The pointers are at the correct position. 1014 "2:\n" // Label 2 = End of loop over 32 bits, check for pair of characters. 1015 "tst %[length], #2\n" 1016 "beq 1f\n" 1017 "ldrh %[aValue], [%[a]], #2\n" 1018 "ldrh %[bValue], [%[b]], #2\n" 1019 "cmp %[aValue], %[bValue]\n" 1020 "bne 66f\n" 1021 1022 "1:\n" // Label 1 = Check for a single character left. 1023 "tst %[length], #1\n" 1024 "beq 42f\n" 1025 "ldrb %[aValue], [%[a]]\n" 1026 "ldrb %[bValue], [%[b]]\n" 1027 "cmp %[aValue], %[bValue]\n" 1028 "bne 66f\n" 1029 1030 "42:\n" // Label 42 = Success. 1031 "mov %[isEqual], #1\n" 1032 "66:\n" // Label 66 = End without changing isEqual to 1. 1033 : [length]"+r"(length), [isEqual]"+r"(isEqual), [a]"+r"(a), [b]"+r"(b), [aValue]"+r"(aValue), [bValue]"+r"(bValue) 1034 : 1035 : 1036 ); 1037 return isEqual; 1038} 1039 1040ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) 1041{ 1042 bool isEqual = false; 1043 uint32_t aValue; 1044 uint32_t bValue; 1045 asm("subs %[length], #2\n" 1046 "blo 1f\n" 1047 1048 "0:\n" // Label 0 = Start of loop over 32 bits. 1049 "ldr %[aValue], [%[a]], #4\n" 1050 "ldr %[bValue], [%[b]], #4\n" 1051 "cmp %[aValue], %[bValue]\n" 1052 "bne 66f\n" 1053 "subs %[length], #2\n" 1054 "bhs 0b\n" 1055 1056 // At this point, length can be: 1057 // -0: 00000000000000000000000000000000 (0 bytes left) 1058 // -1: 11111111111111111111111111111111 (1 character left, 2 bytes) 1059 // -2: 11111111111111111111111111111110 (length was zero) 1060 // The pointers are at the correct position. 1061 "1:\n" // Label 1 = Check for a single character left. 1062 "tst %[length], #1\n" 1063 "beq 42f\n" 1064 "ldrh %[aValue], [%[a]]\n" 1065 "ldrh %[bValue], [%[b]]\n" 1066 "cmp %[aValue], %[bValue]\n" 1067 "bne 66f\n" 1068 1069 "42:\n" // Label 42 = Success. 1070 "mov %[isEqual], #1\n" 1071 "66:\n" // Label 66 = End without changing isEqual to 1. 1072 : [length]"+r"(length), [isEqual]"+r"(isEqual), [a]"+r"(a), [b]"+r"(b), [aValue]"+r"(aValue), [bValue]"+r"(bValue) 1073 : 1074 : 1075 ); 1076 return isEqual; 1077} 1078#else 1079ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) { return !memcmp(a, b, length); } 1080ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) { return !memcmp(a, b, length * sizeof(UChar)); } 1081#endif 1082 1083ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) 1084{ 1085 for (unsigned i = 0; i < length; ++i) { 1086 if (a[i] != b[i]) 1087 return false; 1088 } 1089 return true; 1090} 1091 1092ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { return equal(b, a, length); } 1093 1094WTF_EXPORT_STRING_API bool equalIgnoringCase(const StringImpl*, const StringImpl*); 1095WTF_EXPORT_STRING_API bool equalIgnoringCase(const StringImpl*, const LChar*); 1096inline bool equalIgnoringCase(const LChar* a, const StringImpl* b) { return equalIgnoringCase(b, a); } 1097WTF_EXPORT_STRING_API bool equalIgnoringCase(const LChar*, const LChar*, unsigned); 1098WTF_EXPORT_STRING_API bool equalIgnoringCase(const UChar*, const LChar*, unsigned); 1099inline bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) { return equalIgnoringCase(a, reinterpret_cast<const LChar*>(b), length); } 1100inline bool equalIgnoringCase(const LChar* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); } 1101inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); } 1102inline bool equalIgnoringCase(const char* a, const LChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); } 1103inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length) 1104{ 1105 ASSERT(length >= 0); 1106 return !u_memcasecmp(a, b, length, U_FOLD_CASE_DEFAULT); 1107} 1108WTF_EXPORT_STRING_API bool equalIgnoringCaseNonNull(const StringImpl*, const StringImpl*); 1109 1110WTF_EXPORT_STRING_API bool equalIgnoringNullity(StringImpl*, StringImpl*); 1111WTF_EXPORT_STRING_API bool equalIgnoringNullity(const UChar*, size_t length, StringImpl*); 1112 1113template<typename CharacterType> 1114inline size_t find(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = 0) 1115{ 1116 while (index < length) { 1117 if (characters[index] == matchCharacter) 1118 return index; 1119 ++index; 1120 } 1121 return notFound; 1122} 1123 1124ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = 0) 1125{ 1126 return find(characters, length, static_cast<UChar>(matchCharacter), index); 1127} 1128 1129inline size_t find(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0) 1130{ 1131 if (matchCharacter & ~0xFF) 1132 return notFound; 1133 return find(characters, length, static_cast<LChar>(matchCharacter), index); 1134} 1135 1136inline size_t find(const LChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0) 1137{ 1138 while (index < length) { 1139 if (matchFunction(characters[index])) 1140 return index; 1141 ++index; 1142 } 1143 return notFound; 1144} 1145 1146inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0) 1147{ 1148 while (index < length) { 1149 if (matchFunction(characters[index])) 1150 return index; 1151 ++index; 1152 } 1153 return notFound; 1154} 1155 1156template<typename CharacterType> 1157inline size_t findNextLineStart(const CharacterType* characters, unsigned length, unsigned index = 0) 1158{ 1159 while (index < length) { 1160 CharacterType c = characters[index++]; 1161 if ((c != '\n') && (c != '\r')) 1162 continue; 1163 1164 // There can only be a start of a new line if there are more characters 1165 // beyond the current character. 1166 if (index < length) { 1167 // The 3 common types of line terminators are 1. \r\n (Windows), 1168 // 2. \r (old MacOS) and 3. \n (Unix'es). 1169 1170 if (c == '\n') 1171 return index; // Case 3: just \n. 1172 1173 CharacterType c2 = characters[index]; 1174 if (c2 != '\n') 1175 return index; // Case 2: just \r. 1176 1177 // Case 1: \r\n. 1178 // But, there's only a start of a new line if there are more 1179 // characters beyond the \r\n. 1180 if (++index < length) 1181 return index; 1182 } 1183 } 1184 return notFound; 1185} 1186 1187template<typename CharacterType> 1188inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigned length, unsigned index = UINT_MAX) 1189{ 1190 if (!length) 1191 return notFound; 1192 if (index >= length) 1193 index = length - 1; 1194 CharacterType c = characters[index]; 1195 while ((c != '\n') && (c != '\r')) { 1196 if (!index--) 1197 return notFound; 1198 c = characters[index]; 1199 } 1200 return index; 1201} 1202 1203template<typename CharacterType> 1204inline size_t reverseFind(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = UINT_MAX) 1205{ 1206 if (!length) 1207 return notFound; 1208 if (index >= length) 1209 index = length - 1; 1210 while (characters[index] != matchCharacter) { 1211 if (!index--) 1212 return notFound; 1213 } 1214 return index; 1215} 1216 1217ALWAYS_INLINE size_t reverseFind(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = UINT_MAX) 1218{ 1219 return reverseFind(characters, length, static_cast<UChar>(matchCharacter), index); 1220} 1221 1222inline size_t reverseFind(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = UINT_MAX) 1223{ 1224 if (matchCharacter & ~0xFF) 1225 return notFound; 1226 return reverseFind(characters, length, static_cast<LChar>(matchCharacter), index); 1227} 1228 1229inline size_t StringImpl::find(LChar character, unsigned start) 1230{ 1231 if (is8Bit()) 1232 return WTF::find(characters8(), m_length, character, start); 1233 return WTF::find(characters16(), m_length, character, start); 1234} 1235 1236ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start) 1237{ 1238 return find(static_cast<LChar>(character), start); 1239} 1240 1241inline size_t StringImpl::find(UChar character, unsigned start) 1242{ 1243 if (is8Bit()) 1244 return WTF::find(characters8(), m_length, character, start); 1245 return WTF::find(characters16(), m_length, character, start); 1246} 1247 1248template<size_t inlineCapacity> inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b) 1249{ 1250 return equalIgnoringNullity(a.data(), a.size(), b); 1251} 1252 1253template<typename CharacterType1, typename CharacterType2> 1254inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType1* c1, const CharacterType2* c2) 1255{ 1256 const unsigned lmin = l1 < l2 ? l1 : l2; 1257 unsigned pos = 0; 1258 while (pos < lmin && *c1 == *c2) { 1259 ++c1; 1260 ++c2; 1261 ++pos; 1262 } 1263 1264 if (pos < lmin) 1265 return (c1[0] > c2[0]) ? 1 : -1; 1266 1267 if (l1 == l2) 1268 return 0; 1269 1270 return (l1 > l2) ? 1 : -1; 1271} 1272 1273inline int codePointCompare8(const StringImpl* string1, const StringImpl* string2) 1274{ 1275 return codePointCompare(string1->length(), string2->length(), string1->characters8(), string2->characters8()); 1276} 1277 1278inline int codePointCompare16(const StringImpl* string1, const StringImpl* string2) 1279{ 1280 return codePointCompare(string1->length(), string2->length(), string1->characters16(), string2->characters16()); 1281} 1282 1283inline int codePointCompare8To16(const StringImpl* string1, const StringImpl* string2) 1284{ 1285 return codePointCompare(string1->length(), string2->length(), string1->characters8(), string2->characters16()); 1286} 1287 1288inline int codePointCompare(const StringImpl* string1, const StringImpl* string2) 1289{ 1290 if (!string1) 1291 return (string2 && string2->length()) ? -1 : 0; 1292 1293 if (!string2) 1294 return string1->length() ? 1 : 0; 1295 1296 bool string1Is8Bit = string1->is8Bit(); 1297 bool string2Is8Bit = string2->is8Bit(); 1298 if (string1Is8Bit) { 1299 if (string2Is8Bit) 1300 return codePointCompare8(string1, string2); 1301 return codePointCompare8To16(string1, string2); 1302 } 1303 if (string2Is8Bit) 1304 return -codePointCompare8To16(string2, string1); 1305 return codePointCompare16(string1, string2); 1306} 1307 1308inline bool isSpaceOrNewline(UChar c) 1309{ 1310 // Use isASCIISpace() for basic Latin-1. 1311 // This will include newlines, which aren't included in Unicode DirWS. 1312 return c <= 0x7F ? isASCIISpace(c) : u_charDirection(c) == U_WHITE_SPACE_NEUTRAL; 1313} 1314 1315template<typename CharacterType> 1316inline unsigned lengthOfNullTerminatedString(const CharacterType* string) 1317{ 1318 ASSERT(string); 1319 size_t length = 0; 1320 while (string[length]) 1321 ++length; 1322 1323 RELEASE_ASSERT(length < std::numeric_limits<unsigned>::max()); 1324 return static_cast<unsigned>(length); 1325} 1326 1327inline PassRef<StringImpl> StringImpl::isolatedCopy() const 1328{ 1329 if (!requiresCopy()) { 1330 if (is8Bit()) 1331 return StringImpl::createWithoutCopying(m_data8, m_length); 1332 return StringImpl::createWithoutCopying(m_data16, m_length); 1333 } 1334 1335 if (is8Bit()) 1336 return create(m_data8, m_length); 1337 return create(m_data16, m_length); 1338} 1339 1340struct StringHash; 1341 1342// StringHash is the default hash for StringImpl* and RefPtr<StringImpl> 1343template<typename T> struct DefaultHash; 1344template<> struct DefaultHash<StringImpl*> { 1345 typedef StringHash Hash; 1346}; 1347template<> struct DefaultHash<RefPtr<StringImpl>> { 1348 typedef StringHash Hash; 1349}; 1350 1351} // namespace WTF 1352 1353using WTF::StringImpl; 1354using WTF::equal; 1355using WTF::TextCaseSensitivity; 1356using WTF::TextCaseSensitive; 1357using WTF::TextCaseInsensitive; 1358 1359#endif 1360