1/*
2 * Copyright (C) 2011 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23 * THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#ifndef BloomFilter_h
27#define BloomFilter_h
28
29#include <wtf/text/AtomicString.h>
30
31namespace WTF {
32
33// Counting bloom filter with k=2 and 8 bit counters. Uses 2^keyBits bytes of memory.
34// False positive rate is approximately (1-e^(-2n/m))^2, where n is the number of unique
35// keys and m is the table size (==2^keyBits).
36template <unsigned keyBits>
37class BloomFilter {
38    WTF_MAKE_FAST_ALLOCATED;
39public:
40    static_assert(keyBits <= 16, "BloomFilter key size must be less than or equal to 16!");
41
42    static const size_t tableSize = 1 << keyBits;
43    static const unsigned keyMask = (1 << keyBits) - 1;
44    static uint8_t maximumCount() { return std::numeric_limits<uint8_t>::max(); }
45
46    BloomFilter() { clear(); }
47
48    void add(unsigned hash);
49    void remove(unsigned hash);
50
51    // The filter may give false positives (claim it may contain a key it doesn't)
52    // but never false negatives (claim it doesn't contain a key it does).
53    bool mayContain(unsigned hash) const { return firstSlot(hash) && secondSlot(hash); }
54
55    // The filter must be cleared before reuse even if all keys are removed.
56    // Otherwise overflowed keys will stick around.
57    void clear();
58
59    void add(const AtomicString& string) { add(string.impl()->existingHash()); }
60    void add(const String& string) { add(string.impl()->hash()); }
61    void remove(const AtomicString& string) { remove(string.impl()->existingHash()); }
62    void remove(const String& string) { remove(string.impl()->hash()); }
63
64    bool mayContain(const AtomicString& string) const { return mayContain(string.impl()->existingHash()); }
65    bool mayContain(const String& string) const { return mayContain(string.impl()->hash()); }
66
67#if !ASSERT_DISABLED
68    // Slow.
69    bool likelyEmpty() const;
70    bool isClear() const;
71#endif
72
73private:
74    uint8_t& firstSlot(unsigned hash) { return m_table[hash & keyMask]; }
75    uint8_t& secondSlot(unsigned hash) { return m_table[(hash >> 16) & keyMask]; }
76    const uint8_t& firstSlot(unsigned hash) const { return m_table[hash & keyMask]; }
77    const uint8_t& secondSlot(unsigned hash) const { return m_table[(hash >> 16) & keyMask]; }
78
79    uint8_t m_table[tableSize];
80};
81
82template <unsigned keyBits>
83inline void BloomFilter<keyBits>::add(unsigned hash)
84{
85    uint8_t& first = firstSlot(hash);
86    uint8_t& second = secondSlot(hash);
87    if (LIKELY(first < maximumCount()))
88        ++first;
89    if (LIKELY(second < maximumCount()))
90        ++second;
91}
92
93template <unsigned keyBits>
94inline void BloomFilter<keyBits>::remove(unsigned hash)
95{
96    uint8_t& first = firstSlot(hash);
97    uint8_t& second = secondSlot(hash);
98    ASSERT(first);
99    ASSERT(second);
100    // In case of an overflow, the slot sticks in the table until clear().
101    if (LIKELY(first < maximumCount()))
102        --first;
103    if (LIKELY(second < maximumCount()))
104        --second;
105}
106
107template <unsigned keyBits>
108inline void BloomFilter<keyBits>::clear()
109{
110    memset(m_table, 0, tableSize);
111}
112
113#if !ASSERT_DISABLED
114template <unsigned keyBits>
115bool BloomFilter<keyBits>::likelyEmpty() const
116{
117    for (size_t n = 0; n < tableSize; ++n) {
118        if (m_table[n] && m_table[n] != maximumCount())
119            return false;
120    }
121    return true;
122}
123
124template <unsigned keyBits>
125bool BloomFilter<keyBits>::isClear() const
126{
127    for (size_t n = 0; n < tableSize; ++n) {
128        if (m_table[n])
129            return false;
130    }
131    return true;
132}
133#endif
134
135}
136
137using WTF::BloomFilter;
138
139#endif
140