1/*
2 * Copyright (C) 2004-2008, 2013-2014 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
4 * Copyright (C) 2012 Google Inc. All rights reserved.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB.  If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#include "config.h"
24#include "AtomicString.h"
25
26#include "AtomicStringTable.h"
27#include "HashSet.h"
28#include "IntegerToStringConversion.h"
29#include "StringHash.h"
30#include "Threading.h"
31#include "WTFThreadData.h"
32#include "dtoa.h"
33#include <wtf/unicode/UTF8.h>
34
35#if USE(WEB_THREAD)
36#include "TCSpinLock.h"
37#endif
38
39namespace WTF {
40
41using namespace Unicode;
42
43static_assert(sizeof(AtomicString) == sizeof(String), "AtomicString and String must be same size!");
44
45#if USE(WEB_THREAD)
46
47class AtomicStringTableLocker : public SpinLockHolder {
48    WTF_MAKE_NONCOPYABLE(AtomicStringTableLocker);
49
50    static SpinLock s_stringTableLock;
51public:
52    AtomicStringTableLocker()
53        : SpinLockHolder(&s_stringTableLock)
54    {
55    }
56};
57
58SpinLock AtomicStringTableLocker::s_stringTableLock = SPINLOCK_INITIALIZER;
59
60#else
61
62class AtomicStringTableLocker {
63    WTF_MAKE_NONCOPYABLE(AtomicStringTableLocker);
64public:
65    AtomicStringTableLocker() { }
66};
67
68#endif // USE(WEB_THREAD)
69
70static ALWAYS_INLINE HashSet<StringImpl*>& stringTable()
71{
72    return wtfThreadData().atomicStringTable()->table();
73}
74
75template<typename T, typename HashTranslator>
76static inline PassRefPtr<StringImpl> addToStringTable(const T& value)
77{
78    AtomicStringTableLocker locker;
79
80    HashSet<StringImpl*>::AddResult addResult = stringTable().add<HashTranslator>(value);
81
82    // If the string is newly-translated, then we need to adopt it.
83    // The boolean in the pair tells us if that is so.
84    return addResult.isNewEntry ? adoptRef(*addResult.iterator) : *addResult.iterator;
85}
86
87struct CStringTranslator {
88    static unsigned hash(const LChar* c)
89    {
90        return StringHasher::computeHashAndMaskTop8Bits(c);
91    }
92
93    static inline bool equal(StringImpl* r, const LChar* s)
94    {
95        return WTF::equal(r, s);
96    }
97
98    static void translate(StringImpl*& location, const LChar* const& c, unsigned hash)
99    {
100        location = &StringImpl::create(c).leakRef();
101        location->setHash(hash);
102        location->setIsAtomic(true);
103    }
104};
105
106PassRefPtr<StringImpl> AtomicString::add(const LChar* c)
107{
108    if (!c)
109        return 0;
110    if (!*c)
111        return StringImpl::empty();
112
113    return addToStringTable<const LChar*, CStringTranslator>(c);
114}
115
116template<typename CharacterType>
117struct HashTranslatorCharBuffer {
118    const CharacterType* s;
119    unsigned length;
120};
121
122typedef HashTranslatorCharBuffer<UChar> UCharBuffer;
123struct UCharBufferTranslator {
124    static unsigned hash(const UCharBuffer& buf)
125    {
126        return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length);
127    }
128
129    static bool equal(StringImpl* const& str, const UCharBuffer& buf)
130    {
131        return WTF::equal(str, buf.s, buf.length);
132    }
133
134    static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash)
135    {
136        location = &StringImpl::create8BitIfPossible(buf.s, buf.length).leakRef();
137        location->setHash(hash);
138        location->setIsAtomic(true);
139    }
140};
141
142template<typename CharacterType>
143struct HashAndCharacters {
144    unsigned hash;
145    const CharacterType* characters;
146    unsigned length;
147};
148
149template<typename CharacterType>
150struct HashAndCharactersTranslator {
151    static unsigned hash(const HashAndCharacters<CharacterType>& buffer)
152    {
153        ASSERT(buffer.hash == StringHasher::computeHashAndMaskTop8Bits(buffer.characters, buffer.length));
154        return buffer.hash;
155    }
156
157    static bool equal(StringImpl* const& string, const HashAndCharacters<CharacterType>& buffer)
158    {
159        return WTF::equal(string, buffer.characters, buffer.length);
160    }
161
162    static void translate(StringImpl*& location, const HashAndCharacters<CharacterType>& buffer, unsigned hash)
163    {
164        location = &StringImpl::create(buffer.characters, buffer.length).leakRef();
165        location->setHash(hash);
166        location->setIsAtomic(true);
167    }
168};
169
170struct HashAndUTF8Characters {
171    unsigned hash;
172    const char* characters;
173    unsigned length;
174    unsigned utf16Length;
175};
176
177struct HashAndUTF8CharactersTranslator {
178    static unsigned hash(const HashAndUTF8Characters& buffer)
179    {
180        return buffer.hash;
181    }
182
183    static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer)
184    {
185        if (buffer.utf16Length != string->length())
186            return false;
187
188        // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same.
189        if (buffer.utf16Length != buffer.length) {
190            if (string->is8Bit())
191                return equalLatin1WithUTF8(string->characters8(), buffer.characters, buffer.characters + buffer.length);
192
193            return equalUTF16WithUTF8(string->characters16(), buffer.characters, buffer.characters + buffer.length);
194        }
195
196        if (string->is8Bit()) {
197            const LChar* stringCharacters = string->characters8();
198
199            for (unsigned i = 0; i < buffer.length; ++i) {
200                ASSERT(isASCII(buffer.characters[i]));
201                if (stringCharacters[i] != buffer.characters[i])
202                    return false;
203            }
204
205            return true;
206        }
207
208        const UChar* stringCharacters = string->characters16();
209
210        for (unsigned i = 0; i < buffer.length; ++i) {
211            ASSERT(isASCII(buffer.characters[i]));
212            if (stringCharacters[i] != buffer.characters[i])
213                return false;
214        }
215
216        return true;
217    }
218
219    static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash)
220    {
221        UChar* target;
222        RefPtr<StringImpl> newString = StringImpl::createUninitialized(buffer.utf16Length, target);
223
224        bool isAllASCII;
225        const char* source = buffer.characters;
226        if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length, &isAllASCII) != conversionOK)
227            ASSERT_NOT_REACHED();
228
229        if (isAllASCII)
230            newString = StringImpl::create(buffer.characters, buffer.length);
231
232        location = newString.release().leakRef();
233        location->setHash(hash);
234        location->setIsAtomic(true);
235    }
236};
237
238PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length)
239{
240    if (!s)
241        return 0;
242
243    if (!length)
244        return StringImpl::empty();
245
246    UCharBuffer buffer = { s, length };
247    return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
248}
249
250PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash)
251{
252    ASSERT(s);
253    ASSERT(existingHash);
254
255    if (!length)
256        return StringImpl::empty();
257
258    HashAndCharacters<UChar> buffer = { existingHash, s, length };
259    return addToStringTable<HashAndCharacters<UChar>, HashAndCharactersTranslator<UChar>>(buffer);
260}
261
262PassRefPtr<StringImpl> AtomicString::add(const UChar* s)
263{
264    if (!s)
265        return 0;
266
267    unsigned length = 0;
268    while (s[length] != UChar(0))
269        ++length;
270
271    if (!length)
272        return StringImpl::empty();
273
274    UCharBuffer buffer = { s, length };
275    return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
276}
277
278struct SubstringLocation {
279    StringImpl* baseString;
280    unsigned start;
281    unsigned length;
282};
283
284struct SubstringTranslator {
285    static void translate(StringImpl*& location, const SubstringLocation& buffer, unsigned hash)
286    {
287        location = &StringImpl::createSubstringSharingImpl(buffer.baseString, buffer.start, buffer.length).leakRef();
288        location->setHash(hash);
289        location->setIsAtomic(true);
290    }
291};
292
293struct SubstringTranslator8 : SubstringTranslator {
294    static unsigned hash(const SubstringLocation& buffer)
295    {
296        return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters8() + buffer.start, buffer.length);
297    }
298
299    static bool equal(StringImpl* const& string, const SubstringLocation& buffer)
300    {
301        return WTF::equal(string, buffer.baseString->characters8() + buffer.start, buffer.length);
302    }
303};
304
305struct SubstringTranslator16 : SubstringTranslator {
306    static unsigned hash(const SubstringLocation& buffer)
307    {
308        return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters16() + buffer.start, buffer.length);
309    }
310
311    static bool equal(StringImpl* const& string, const SubstringLocation& buffer)
312    {
313        return WTF::equal(string, buffer.baseString->characters16() + buffer.start, buffer.length);
314    }
315};
316
317PassRefPtr<StringImpl> AtomicString::add(StringImpl* baseString, unsigned start, unsigned length)
318{
319    if (!baseString)
320        return nullptr;
321
322    if (!length || start >= baseString->length())
323        return StringImpl::empty();
324
325    unsigned maxLength = baseString->length() - start;
326    if (length >= maxLength) {
327        if (!start)
328            return add(baseString);
329        length = maxLength;
330    }
331
332    SubstringLocation buffer = { baseString, start, length };
333    if (baseString->is8Bit())
334        return addToStringTable<SubstringLocation, SubstringTranslator8>(buffer);
335    return addToStringTable<SubstringLocation, SubstringTranslator16>(buffer);
336}
337
338typedef HashTranslatorCharBuffer<LChar> LCharBuffer;
339struct LCharBufferTranslator {
340    static unsigned hash(const LCharBuffer& buf)
341    {
342        return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length);
343    }
344
345    static bool equal(StringImpl* const& str, const LCharBuffer& buf)
346    {
347        return WTF::equal(str, buf.s, buf.length);
348    }
349
350    static void translate(StringImpl*& location, const LCharBuffer& buf, unsigned hash)
351    {
352        location = &StringImpl::create(buf.s, buf.length).leakRef();
353        location->setHash(hash);
354        location->setIsAtomic(true);
355    }
356};
357
358typedef HashTranslatorCharBuffer<char> CharBuffer;
359struct CharBufferFromLiteralDataTranslator {
360    static unsigned hash(const CharBuffer& buf)
361    {
362        return StringHasher::computeHashAndMaskTop8Bits(reinterpret_cast<const LChar*>(buf.s), buf.length);
363    }
364
365    static bool equal(StringImpl* const& str, const CharBuffer& buf)
366    {
367        return WTF::equal(str, buf.s, buf.length);
368    }
369
370    static void translate(StringImpl*& location, const CharBuffer& buf, unsigned hash)
371    {
372        location = &StringImpl::createFromLiteral(buf.s, buf.length).leakRef();
373        location->setHash(hash);
374        location->setIsAtomic(true);
375    }
376};
377
378PassRefPtr<StringImpl> AtomicString::add(const LChar* s, unsigned length)
379{
380    if (!s)
381        return 0;
382
383    if (!length)
384        return StringImpl::empty();
385
386    LCharBuffer buffer = { s, length };
387    return addToStringTable<LCharBuffer, LCharBufferTranslator>(buffer);
388}
389
390PassRefPtr<StringImpl> AtomicString::addFromLiteralData(const char* characters, unsigned length)
391{
392    ASSERT(characters);
393    ASSERT(length);
394
395    CharBuffer buffer = { characters, length };
396    return addToStringTable<CharBuffer, CharBufferFromLiteralDataTranslator>(buffer);
397}
398
399PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl& string)
400{
401    if (!string.length())
402        return StringImpl::empty();
403
404    ASSERT_WITH_MESSAGE(!string.isAtomic(), "AtomicString should not hit the slow case if the string is already atomic.");
405
406    AtomicStringTableLocker locker;
407    auto addResult = stringTable().add(&string);
408
409    if (addResult.isNewEntry) {
410        ASSERT(*addResult.iterator == &string);
411        string.setIsAtomic(true);
412    }
413
414    return *addResult.iterator;
415}
416
417PassRefPtr<StringImpl> AtomicString::addSlowCase(AtomicStringTable& stringTable, StringImpl& string)
418{
419    if (!string.length())
420        return StringImpl::empty();
421
422    ASSERT_WITH_MESSAGE(!string.isAtomic(), "AtomicString should not hit the slow case if the string is already atomic.");
423
424    AtomicStringTableLocker locker;
425    auto addResult = stringTable.table().add(&string);
426
427    if (addResult.isNewEntry) {
428        ASSERT(*addResult.iterator == &string);
429        string.setIsAtomic(true);
430    }
431
432    return *addResult.iterator;
433}
434
435template<typename CharacterType>
436static inline HashSet<StringImpl*>::iterator findString(const StringImpl* stringImpl)
437{
438    HashAndCharacters<CharacterType> buffer = { stringImpl->existingHash(), stringImpl->characters<CharacterType>(), stringImpl->length() };
439    return stringTable().find<HashAndCharactersTranslator<CharacterType>>(buffer);
440}
441
442AtomicStringImpl* AtomicString::findStringWithHash(const StringImpl& stringImpl)
443{
444    ASSERT(stringImpl.existingHash());
445
446    if (!stringImpl.length())
447        return static_cast<AtomicStringImpl*>(StringImpl::empty());
448
449    AtomicStringTableLocker locker;
450    HashSet<StringImpl*>::iterator iterator;
451    if (stringImpl.is8Bit())
452        iterator = findString<LChar>(&stringImpl);
453    else
454        iterator = findString<UChar>(&stringImpl);
455    if (iterator == stringTable().end())
456        return 0;
457    return static_cast<AtomicStringImpl*>(*iterator);
458}
459
460void AtomicString::remove(StringImpl* string)
461{
462    ASSERT(string->isAtomic());
463    AtomicStringTableLocker locker;
464    HashSet<StringImpl*>& atomicStringTable = stringTable();
465    HashSet<StringImpl*>::iterator iterator = atomicStringTable.find(string);
466    ASSERT_WITH_MESSAGE(iterator != atomicStringTable.end(), "The string being removed is atomic in the string table of an other thread!");
467    atomicStringTable.remove(iterator);
468}
469
470AtomicString AtomicString::lower() const
471{
472    // Note: This is a hot function in the Dromaeo benchmark.
473    StringImpl* impl = this->impl();
474    if (UNLIKELY(!impl))
475        return AtomicString();
476
477    RefPtr<StringImpl> lowercasedString = impl->lower();
478    if (LIKELY(lowercasedString == impl))
479        return *this;
480
481    AtomicString result;
482    result.m_string = addSlowCase(*lowercasedString);
483    return result;
484}
485
486AtomicString AtomicString::convertToASCIILowercase() const
487{
488    StringImpl* impl = this->impl();
489    if (UNLIKELY(!impl))
490        return AtomicString();
491
492    // Convert short strings without allocating a new StringImpl, since
493    // there's a good chance these strings are already in the atomic
494    // string table and so no memory allocation will be required.
495    unsigned length;
496    const unsigned localBufferSize = 100;
497    if (impl->is8Bit() && (length = impl->length()) <= localBufferSize) {
498        const LChar* characters = impl->characters8();
499        unsigned failingIndex;
500        for (unsigned i = 0; i < length; ++i) {
501            if (UNLIKELY(isASCIIUpper(characters[i]))) {
502                failingIndex = i;
503                goto SlowPath;
504            }
505        }
506        return *this;
507SlowPath:
508        LChar localBuffer[localBufferSize];
509        for (unsigned i = 0; i < failingIndex; ++i)
510            localBuffer[i] = characters[i];
511        for (unsigned i = failingIndex; i < length; ++i)
512            localBuffer[i] = toASCIILower(characters[i]);
513        return AtomicString(localBuffer, length);
514    }
515
516    RefPtr<StringImpl> convertedString = impl->convertToASCIILowercase();
517    if (LIKELY(convertedString == impl))
518        return *this;
519
520    AtomicString result;
521    result.m_string = addSlowCase(*convertedString);
522    return result;
523}
524
525AtomicStringImpl* AtomicString::findSlowCase(StringImpl& string)
526{
527    ASSERT_WITH_MESSAGE(!string.isAtomic(), "AtomicStringImpls should return from the fast case.");
528
529    AtomicStringTableLocker locker;
530    HashSet<StringImpl*>& atomicStringTable = stringTable();
531    auto iterator = atomicStringTable.find(&string);
532    if (iterator != atomicStringTable.end())
533        return static_cast<AtomicStringImpl*>(*iterator);
534    return nullptr;
535}
536
537AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd)
538{
539    HashAndUTF8Characters buffer;
540    buffer.characters = charactersStart;
541    buffer.hash = calculateStringHashAndLengthFromUTF8MaskingTop8Bits(charactersStart, charactersEnd, buffer.length, buffer.utf16Length);
542
543    if (!buffer.hash)
544        return nullAtom;
545
546    AtomicString atomicString;
547    atomicString.m_string = addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer);
548    return atomicString;
549}
550
551AtomicString AtomicString::number(int number)
552{
553    return numberToStringSigned<AtomicString>(number);
554}
555
556AtomicString AtomicString::number(unsigned number)
557{
558    return numberToStringUnsigned<AtomicString>(number);
559}
560
561AtomicString AtomicString::number(double number)
562{
563    NumberToStringBuffer buffer;
564    return String(numberToFixedPrecisionString(number, 6, buffer, true));
565}
566
567AtomicStringImpl* AtomicString::find(LChar* characters, unsigned length)
568{
569    AtomicStringTableLocker locker;
570    auto& table = stringTable();
571
572    LCharBuffer buffer = { characters, length };
573    auto iterator = table.find<LCharBufferTranslator>(buffer);
574    if (iterator != table.end())
575        return static_cast<AtomicStringImpl*>(*iterator);
576    return nullptr;
577}
578
579AtomicStringImpl* AtomicString::find(UChar* characters, unsigned length)
580{
581    AtomicStringTableLocker locker;
582    auto& table = stringTable();
583
584    UCharBuffer buffer = { characters, length };
585    auto iterator = table.find<UCharBufferTranslator>(buffer);
586    if (iterator != table.end())
587        return static_cast<AtomicStringImpl*>(*iterator);
588    return nullptr;
589}
590
591#if !ASSERT_DISABLED
592bool AtomicString::isInAtomicStringTable(StringImpl* string)
593{
594    AtomicStringTableLocker locker;
595    return stringTable().contains(string);
596}
597#endif
598
599#ifndef NDEBUG
600void AtomicString::show() const
601{
602    m_string.show();
603}
604#endif
605
606} // namespace WTF
607