1/* 2 * Copyright (C) 2004-2008, 2013-2014 Apple Inc. All rights reserved. 3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> 4 * Copyright (C) 2012 Google Inc. All rights reserved. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23#include "config.h" 24#include "AtomicString.h" 25 26#include "AtomicStringTable.h" 27#include "HashSet.h" 28#include "IntegerToStringConversion.h" 29#include "StringHash.h" 30#include "Threading.h" 31#include "WTFThreadData.h" 32#include "dtoa.h" 33#include <wtf/unicode/UTF8.h> 34 35#if USE(WEB_THREAD) 36#include "TCSpinLock.h" 37#endif 38 39namespace WTF { 40 41using namespace Unicode; 42 43static_assert(sizeof(AtomicString) == sizeof(String), "AtomicString and String must be same size!"); 44 45#if USE(WEB_THREAD) 46 47class AtomicStringTableLocker : public SpinLockHolder { 48 WTF_MAKE_NONCOPYABLE(AtomicStringTableLocker); 49 50 static SpinLock s_stringTableLock; 51public: 52 AtomicStringTableLocker() 53 : SpinLockHolder(&s_stringTableLock) 54 { 55 } 56}; 57 58SpinLock AtomicStringTableLocker::s_stringTableLock = SPINLOCK_INITIALIZER; 59 60#else 61 62class AtomicStringTableLocker { 63 WTF_MAKE_NONCOPYABLE(AtomicStringTableLocker); 64public: 65 AtomicStringTableLocker() { } 66}; 67 68#endif // USE(WEB_THREAD) 69 70static ALWAYS_INLINE HashSet<StringImpl*>& stringTable() 71{ 72 return wtfThreadData().atomicStringTable()->table(); 73} 74 75template<typename T, typename HashTranslator> 76static inline PassRefPtr<StringImpl> addToStringTable(const T& value) 77{ 78 AtomicStringTableLocker locker; 79 80 HashSet<StringImpl*>::AddResult addResult = stringTable().add<HashTranslator>(value); 81 82 // If the string is newly-translated, then we need to adopt it. 83 // The boolean in the pair tells us if that is so. 84 return addResult.isNewEntry ? adoptRef(*addResult.iterator) : *addResult.iterator; 85} 86 87struct CStringTranslator { 88 static unsigned hash(const LChar* c) 89 { 90 return StringHasher::computeHashAndMaskTop8Bits(c); 91 } 92 93 static inline bool equal(StringImpl* r, const LChar* s) 94 { 95 return WTF::equal(r, s); 96 } 97 98 static void translate(StringImpl*& location, const LChar* const& c, unsigned hash) 99 { 100 location = &StringImpl::create(c).leakRef(); 101 location->setHash(hash); 102 location->setIsAtomic(true); 103 } 104}; 105 106PassRefPtr<StringImpl> AtomicString::add(const LChar* c) 107{ 108 if (!c) 109 return 0; 110 if (!*c) 111 return StringImpl::empty(); 112 113 return addToStringTable<const LChar*, CStringTranslator>(c); 114} 115 116template<typename CharacterType> 117struct HashTranslatorCharBuffer { 118 const CharacterType* s; 119 unsigned length; 120}; 121 122typedef HashTranslatorCharBuffer<UChar> UCharBuffer; 123struct UCharBufferTranslator { 124 static unsigned hash(const UCharBuffer& buf) 125 { 126 return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length); 127 } 128 129 static bool equal(StringImpl* const& str, const UCharBuffer& buf) 130 { 131 return WTF::equal(str, buf.s, buf.length); 132 } 133 134 static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash) 135 { 136 location = &StringImpl::create8BitIfPossible(buf.s, buf.length).leakRef(); 137 location->setHash(hash); 138 location->setIsAtomic(true); 139 } 140}; 141 142template<typename CharacterType> 143struct HashAndCharacters { 144 unsigned hash; 145 const CharacterType* characters; 146 unsigned length; 147}; 148 149template<typename CharacterType> 150struct HashAndCharactersTranslator { 151 static unsigned hash(const HashAndCharacters<CharacterType>& buffer) 152 { 153 ASSERT(buffer.hash == StringHasher::computeHashAndMaskTop8Bits(buffer.characters, buffer.length)); 154 return buffer.hash; 155 } 156 157 static bool equal(StringImpl* const& string, const HashAndCharacters<CharacterType>& buffer) 158 { 159 return WTF::equal(string, buffer.characters, buffer.length); 160 } 161 162 static void translate(StringImpl*& location, const HashAndCharacters<CharacterType>& buffer, unsigned hash) 163 { 164 location = &StringImpl::create(buffer.characters, buffer.length).leakRef(); 165 location->setHash(hash); 166 location->setIsAtomic(true); 167 } 168}; 169 170struct HashAndUTF8Characters { 171 unsigned hash; 172 const char* characters; 173 unsigned length; 174 unsigned utf16Length; 175}; 176 177struct HashAndUTF8CharactersTranslator { 178 static unsigned hash(const HashAndUTF8Characters& buffer) 179 { 180 return buffer.hash; 181 } 182 183 static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer) 184 { 185 if (buffer.utf16Length != string->length()) 186 return false; 187 188 // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same. 189 if (buffer.utf16Length != buffer.length) { 190 if (string->is8Bit()) 191 return equalLatin1WithUTF8(string->characters8(), buffer.characters, buffer.characters + buffer.length); 192 193 return equalUTF16WithUTF8(string->characters16(), buffer.characters, buffer.characters + buffer.length); 194 } 195 196 if (string->is8Bit()) { 197 const LChar* stringCharacters = string->characters8(); 198 199 for (unsigned i = 0; i < buffer.length; ++i) { 200 ASSERT(isASCII(buffer.characters[i])); 201 if (stringCharacters[i] != buffer.characters[i]) 202 return false; 203 } 204 205 return true; 206 } 207 208 const UChar* stringCharacters = string->characters16(); 209 210 for (unsigned i = 0; i < buffer.length; ++i) { 211 ASSERT(isASCII(buffer.characters[i])); 212 if (stringCharacters[i] != buffer.characters[i]) 213 return false; 214 } 215 216 return true; 217 } 218 219 static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash) 220 { 221 UChar* target; 222 RefPtr<StringImpl> newString = StringImpl::createUninitialized(buffer.utf16Length, target); 223 224 bool isAllASCII; 225 const char* source = buffer.characters; 226 if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length, &isAllASCII) != conversionOK) 227 ASSERT_NOT_REACHED(); 228 229 if (isAllASCII) 230 newString = StringImpl::create(buffer.characters, buffer.length); 231 232 location = newString.release().leakRef(); 233 location->setHash(hash); 234 location->setIsAtomic(true); 235 } 236}; 237 238PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length) 239{ 240 if (!s) 241 return 0; 242 243 if (!length) 244 return StringImpl::empty(); 245 246 UCharBuffer buffer = { s, length }; 247 return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer); 248} 249 250PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash) 251{ 252 ASSERT(s); 253 ASSERT(existingHash); 254 255 if (!length) 256 return StringImpl::empty(); 257 258 HashAndCharacters<UChar> buffer = { existingHash, s, length }; 259 return addToStringTable<HashAndCharacters<UChar>, HashAndCharactersTranslator<UChar>>(buffer); 260} 261 262PassRefPtr<StringImpl> AtomicString::add(const UChar* s) 263{ 264 if (!s) 265 return 0; 266 267 unsigned length = 0; 268 while (s[length] != UChar(0)) 269 ++length; 270 271 if (!length) 272 return StringImpl::empty(); 273 274 UCharBuffer buffer = { s, length }; 275 return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer); 276} 277 278struct SubstringLocation { 279 StringImpl* baseString; 280 unsigned start; 281 unsigned length; 282}; 283 284struct SubstringTranslator { 285 static void translate(StringImpl*& location, const SubstringLocation& buffer, unsigned hash) 286 { 287 location = &StringImpl::createSubstringSharingImpl(buffer.baseString, buffer.start, buffer.length).leakRef(); 288 location->setHash(hash); 289 location->setIsAtomic(true); 290 } 291}; 292 293struct SubstringTranslator8 : SubstringTranslator { 294 static unsigned hash(const SubstringLocation& buffer) 295 { 296 return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters8() + buffer.start, buffer.length); 297 } 298 299 static bool equal(StringImpl* const& string, const SubstringLocation& buffer) 300 { 301 return WTF::equal(string, buffer.baseString->characters8() + buffer.start, buffer.length); 302 } 303}; 304 305struct SubstringTranslator16 : SubstringTranslator { 306 static unsigned hash(const SubstringLocation& buffer) 307 { 308 return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters16() + buffer.start, buffer.length); 309 } 310 311 static bool equal(StringImpl* const& string, const SubstringLocation& buffer) 312 { 313 return WTF::equal(string, buffer.baseString->characters16() + buffer.start, buffer.length); 314 } 315}; 316 317PassRefPtr<StringImpl> AtomicString::add(StringImpl* baseString, unsigned start, unsigned length) 318{ 319 if (!baseString) 320 return nullptr; 321 322 if (!length || start >= baseString->length()) 323 return StringImpl::empty(); 324 325 unsigned maxLength = baseString->length() - start; 326 if (length >= maxLength) { 327 if (!start) 328 return add(baseString); 329 length = maxLength; 330 } 331 332 SubstringLocation buffer = { baseString, start, length }; 333 if (baseString->is8Bit()) 334 return addToStringTable<SubstringLocation, SubstringTranslator8>(buffer); 335 return addToStringTable<SubstringLocation, SubstringTranslator16>(buffer); 336} 337 338typedef HashTranslatorCharBuffer<LChar> LCharBuffer; 339struct LCharBufferTranslator { 340 static unsigned hash(const LCharBuffer& buf) 341 { 342 return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length); 343 } 344 345 static bool equal(StringImpl* const& str, const LCharBuffer& buf) 346 { 347 return WTF::equal(str, buf.s, buf.length); 348 } 349 350 static void translate(StringImpl*& location, const LCharBuffer& buf, unsigned hash) 351 { 352 location = &StringImpl::create(buf.s, buf.length).leakRef(); 353 location->setHash(hash); 354 location->setIsAtomic(true); 355 } 356}; 357 358typedef HashTranslatorCharBuffer<char> CharBuffer; 359struct CharBufferFromLiteralDataTranslator { 360 static unsigned hash(const CharBuffer& buf) 361 { 362 return StringHasher::computeHashAndMaskTop8Bits(reinterpret_cast<const LChar*>(buf.s), buf.length); 363 } 364 365 static bool equal(StringImpl* const& str, const CharBuffer& buf) 366 { 367 return WTF::equal(str, buf.s, buf.length); 368 } 369 370 static void translate(StringImpl*& location, const CharBuffer& buf, unsigned hash) 371 { 372 location = &StringImpl::createFromLiteral(buf.s, buf.length).leakRef(); 373 location->setHash(hash); 374 location->setIsAtomic(true); 375 } 376}; 377 378PassRefPtr<StringImpl> AtomicString::add(const LChar* s, unsigned length) 379{ 380 if (!s) 381 return 0; 382 383 if (!length) 384 return StringImpl::empty(); 385 386 LCharBuffer buffer = { s, length }; 387 return addToStringTable<LCharBuffer, LCharBufferTranslator>(buffer); 388} 389 390PassRefPtr<StringImpl> AtomicString::addFromLiteralData(const char* characters, unsigned length) 391{ 392 ASSERT(characters); 393 ASSERT(length); 394 395 CharBuffer buffer = { characters, length }; 396 return addToStringTable<CharBuffer, CharBufferFromLiteralDataTranslator>(buffer); 397} 398 399PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl& string) 400{ 401 if (!string.length()) 402 return StringImpl::empty(); 403 404 ASSERT_WITH_MESSAGE(!string.isAtomic(), "AtomicString should not hit the slow case if the string is already atomic."); 405 406 AtomicStringTableLocker locker; 407 auto addResult = stringTable().add(&string); 408 409 if (addResult.isNewEntry) { 410 ASSERT(*addResult.iterator == &string); 411 string.setIsAtomic(true); 412 } 413 414 return *addResult.iterator; 415} 416 417PassRefPtr<StringImpl> AtomicString::addSlowCase(AtomicStringTable& stringTable, StringImpl& string) 418{ 419 if (!string.length()) 420 return StringImpl::empty(); 421 422 ASSERT_WITH_MESSAGE(!string.isAtomic(), "AtomicString should not hit the slow case if the string is already atomic."); 423 424 AtomicStringTableLocker locker; 425 auto addResult = stringTable.table().add(&string); 426 427 if (addResult.isNewEntry) { 428 ASSERT(*addResult.iterator == &string); 429 string.setIsAtomic(true); 430 } 431 432 return *addResult.iterator; 433} 434 435template<typename CharacterType> 436static inline HashSet<StringImpl*>::iterator findString(const StringImpl* stringImpl) 437{ 438 HashAndCharacters<CharacterType> buffer = { stringImpl->existingHash(), stringImpl->characters<CharacterType>(), stringImpl->length() }; 439 return stringTable().find<HashAndCharactersTranslator<CharacterType>>(buffer); 440} 441 442AtomicStringImpl* AtomicString::findStringWithHash(const StringImpl& stringImpl) 443{ 444 ASSERT(stringImpl.existingHash()); 445 446 if (!stringImpl.length()) 447 return static_cast<AtomicStringImpl*>(StringImpl::empty()); 448 449 AtomicStringTableLocker locker; 450 HashSet<StringImpl*>::iterator iterator; 451 if (stringImpl.is8Bit()) 452 iterator = findString<LChar>(&stringImpl); 453 else 454 iterator = findString<UChar>(&stringImpl); 455 if (iterator == stringTable().end()) 456 return 0; 457 return static_cast<AtomicStringImpl*>(*iterator); 458} 459 460void AtomicString::remove(StringImpl* string) 461{ 462 ASSERT(string->isAtomic()); 463 AtomicStringTableLocker locker; 464 HashSet<StringImpl*>& atomicStringTable = stringTable(); 465 HashSet<StringImpl*>::iterator iterator = atomicStringTable.find(string); 466 ASSERT_WITH_MESSAGE(iterator != atomicStringTable.end(), "The string being removed is atomic in the string table of an other thread!"); 467 atomicStringTable.remove(iterator); 468} 469 470AtomicString AtomicString::lower() const 471{ 472 // Note: This is a hot function in the Dromaeo benchmark. 473 StringImpl* impl = this->impl(); 474 if (UNLIKELY(!impl)) 475 return AtomicString(); 476 477 RefPtr<StringImpl> lowercasedString = impl->lower(); 478 if (LIKELY(lowercasedString == impl)) 479 return *this; 480 481 AtomicString result; 482 result.m_string = addSlowCase(*lowercasedString); 483 return result; 484} 485 486AtomicString AtomicString::convertToASCIILowercase() const 487{ 488 StringImpl* impl = this->impl(); 489 if (UNLIKELY(!impl)) 490 return AtomicString(); 491 492 // Convert short strings without allocating a new StringImpl, since 493 // there's a good chance these strings are already in the atomic 494 // string table and so no memory allocation will be required. 495 unsigned length; 496 const unsigned localBufferSize = 100; 497 if (impl->is8Bit() && (length = impl->length()) <= localBufferSize) { 498 const LChar* characters = impl->characters8(); 499 unsigned failingIndex; 500 for (unsigned i = 0; i < length; ++i) { 501 if (UNLIKELY(isASCIIUpper(characters[i]))) { 502 failingIndex = i; 503 goto SlowPath; 504 } 505 } 506 return *this; 507SlowPath: 508 LChar localBuffer[localBufferSize]; 509 for (unsigned i = 0; i < failingIndex; ++i) 510 localBuffer[i] = characters[i]; 511 for (unsigned i = failingIndex; i < length; ++i) 512 localBuffer[i] = toASCIILower(characters[i]); 513 return AtomicString(localBuffer, length); 514 } 515 516 RefPtr<StringImpl> convertedString = impl->convertToASCIILowercase(); 517 if (LIKELY(convertedString == impl)) 518 return *this; 519 520 AtomicString result; 521 result.m_string = addSlowCase(*convertedString); 522 return result; 523} 524 525AtomicStringImpl* AtomicString::findSlowCase(StringImpl& string) 526{ 527 ASSERT_WITH_MESSAGE(!string.isAtomic(), "AtomicStringImpls should return from the fast case."); 528 529 AtomicStringTableLocker locker; 530 HashSet<StringImpl*>& atomicStringTable = stringTable(); 531 auto iterator = atomicStringTable.find(&string); 532 if (iterator != atomicStringTable.end()) 533 return static_cast<AtomicStringImpl*>(*iterator); 534 return nullptr; 535} 536 537AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd) 538{ 539 HashAndUTF8Characters buffer; 540 buffer.characters = charactersStart; 541 buffer.hash = calculateStringHashAndLengthFromUTF8MaskingTop8Bits(charactersStart, charactersEnd, buffer.length, buffer.utf16Length); 542 543 if (!buffer.hash) 544 return nullAtom; 545 546 AtomicString atomicString; 547 atomicString.m_string = addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer); 548 return atomicString; 549} 550 551AtomicString AtomicString::number(int number) 552{ 553 return numberToStringSigned<AtomicString>(number); 554} 555 556AtomicString AtomicString::number(unsigned number) 557{ 558 return numberToStringUnsigned<AtomicString>(number); 559} 560 561AtomicString AtomicString::number(double number) 562{ 563 NumberToStringBuffer buffer; 564 return String(numberToFixedPrecisionString(number, 6, buffer, true)); 565} 566 567AtomicStringImpl* AtomicString::find(LChar* characters, unsigned length) 568{ 569 AtomicStringTableLocker locker; 570 auto& table = stringTable(); 571 572 LCharBuffer buffer = { characters, length }; 573 auto iterator = table.find<LCharBufferTranslator>(buffer); 574 if (iterator != table.end()) 575 return static_cast<AtomicStringImpl*>(*iterator); 576 return nullptr; 577} 578 579AtomicStringImpl* AtomicString::find(UChar* characters, unsigned length) 580{ 581 AtomicStringTableLocker locker; 582 auto& table = stringTable(); 583 584 UCharBuffer buffer = { characters, length }; 585 auto iterator = table.find<UCharBufferTranslator>(buffer); 586 if (iterator != table.end()) 587 return static_cast<AtomicStringImpl*>(*iterator); 588 return nullptr; 589} 590 591#if !ASSERT_DISABLED 592bool AtomicString::isInAtomicStringTable(StringImpl* string) 593{ 594 AtomicStringTableLocker locker; 595 return stringTable().contains(string); 596} 597#endif 598 599#ifndef NDEBUG 600void AtomicString::show() const 601{ 602 m_string.show(); 603} 604#endif 605 606} // namespace WTF 607