1/*
2 * Copyright �� 2012  Google, Inc.
3 *
4 *  This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_SET_PRIVATE_HH
28#define HB_SET_PRIVATE_HH
29
30#include "hb-private.hh"
31#include "hb-object-private.hh"
32
33
34/*
35 * The set digests here implement various "filters" that support
36 * "approximate member query".  Conceptually these are like Bloom
37 * Filter and Quotient Filter, however, much smaller, faster, and
38 * designed to fit the requirements of our uses for glyph coverage
39 * queries.
40 *
41 * Our filters are highly accurate if the lookup covers fairly local
42 * set of glyphs, but fully flooded and ineffective if coverage is
43 * all over the place.
44 *
45 * The frozen-set can be used instead of a digest, to trade more
46 * memory for 100% accuracy, but in practice, that doesn't look like
47 * an attractive trade-off.
48 */
49
50template <typename mask_t, unsigned int shift>
51struct hb_set_digest_lowest_bits_t
52{
53  ASSERT_POD ();
54
55  static const unsigned int mask_bytes = sizeof (mask_t);
56  static const unsigned int mask_bits = sizeof (mask_t) * 8;
57  static const unsigned int num_bits = 0
58                                     + (mask_bytes >= 1 ? 3 : 0)
59                                     + (mask_bytes >= 2 ? 1 : 0)
60                                     + (mask_bytes >= 4 ? 1 : 0)
61                                     + (mask_bytes >= 8 ? 1 : 0)
62                                     + (mask_bytes >= 16? 1 : 0)
63                                     + 0;
64
65  ASSERT_STATIC (shift < sizeof (hb_codepoint_t) * 8);
66  ASSERT_STATIC (shift + num_bits <= sizeof (hb_codepoint_t) * 8);
67
68  inline void init (void) {
69    mask = 0;
70  }
71
72  inline void add (hb_codepoint_t g) {
73    mask |= mask_for (g);
74  }
75
76  inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
77    if ((b >> shift) - (a >> shift) >= mask_bits - 1)
78      mask = (mask_t) -1;
79    else {
80      mask_t ma = mask_for (a);
81      mask_t mb = mask_for (b);
82      mask |= mb + (mb - ma) - (mb < ma);
83    }
84  }
85
86  inline bool may_have (hb_codepoint_t g) const {
87    return !!(mask & mask_for (g));
88  }
89
90  private:
91
92  static inline mask_t mask_for (hb_codepoint_t g) {
93    return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1));
94  }
95  mask_t mask;
96};
97
98template <typename head_t, typename tail_t>
99struct hb_set_digest_combiner_t
100{
101  ASSERT_POD ();
102
103  inline void init (void) {
104    head.init ();
105    tail.init ();
106  }
107
108  inline void add (hb_codepoint_t g) {
109    head.add (g);
110    tail.add (g);
111  }
112
113  inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
114    head.add_range (a, b);
115    tail.add_range (a, b);
116  }
117
118  inline bool may_have (hb_codepoint_t g) const {
119    return head.may_have (g) && tail.may_have (g);
120  }
121
122  private:
123  head_t head;
124  tail_t tail;
125};
126
127
128/*
129 * hb_set_digest_t
130 *
131 * This is a combination of digests that performs "best".
132 * There is not much science to this: it's a result of intuition
133 * and testing.
134 */
135typedef hb_set_digest_combiner_t
136<
137  hb_set_digest_lowest_bits_t<unsigned long, 4>,
138  hb_set_digest_combiner_t
139  <
140    hb_set_digest_lowest_bits_t<unsigned long, 0>,
141    hb_set_digest_lowest_bits_t<unsigned long, 9>
142  >
143> hb_set_digest_t;
144
145
146
147/*
148 * hb_set_t
149 */
150
151
152/* TODO Make this faster and memmory efficient. */
153
154struct hb_set_t
155{
156  friend struct hb_frozen_set_t;
157
158  hb_object_header_t header;
159  ASSERT_POD ();
160  bool in_error;
161
162  inline void init (void) {
163    hb_object_init (this);
164    clear ();
165  }
166  inline void fini (void) {
167  }
168  inline void clear (void) {
169    if (unlikely (hb_object_is_inert (this)))
170      return;
171    in_error = false;
172    memset (elts, 0, sizeof elts);
173  }
174  inline bool is_empty (void) const {
175    for (unsigned int i = 0; i < ARRAY_LENGTH (elts); i++)
176      if (elts[i])
177        return false;
178    return true;
179  }
180  inline void add (hb_codepoint_t g)
181  {
182    if (unlikely (in_error)) return;
183    if (unlikely (g == INVALID)) return;
184    if (unlikely (g > MAX_G)) return;
185    elt (g) |= mask (g);
186  }
187  inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
188  {
189    if (unlikely (in_error)) return;
190    /* TODO Speedup */
191    for (unsigned int i = a; i < b + 1; i++)
192      add (i);
193  }
194  inline void del (hb_codepoint_t g)
195  {
196    if (unlikely (in_error)) return;
197    if (unlikely (g > MAX_G)) return;
198    elt (g) &= ~mask (g);
199  }
200  inline void del_range (hb_codepoint_t a, hb_codepoint_t b)
201  {
202    if (unlikely (in_error)) return;
203    /* TODO Speedup */
204    for (unsigned int i = a; i < b + 1; i++)
205      del (i);
206  }
207  inline bool has (hb_codepoint_t g) const
208  {
209    if (unlikely (g > MAX_G)) return false;
210    return !!(elt (g) & mask (g));
211  }
212  inline bool intersects (hb_codepoint_t first,
213                          hb_codepoint_t last) const
214  {
215    if (unlikely (first > MAX_G)) return false;
216    if (unlikely (last  > MAX_G)) last = MAX_G;
217    unsigned int end = last + 1;
218    for (hb_codepoint_t i = first; i < end; i++)
219      if (has (i))
220        return true;
221    return false;
222  }
223  inline bool is_equal (const hb_set_t *other) const
224  {
225    for (unsigned int i = 0; i < ELTS; i++)
226      if (elts[i] != other->elts[i])
227        return false;
228    return true;
229  }
230  inline void set (const hb_set_t *other)
231  {
232    if (unlikely (in_error)) return;
233    for (unsigned int i = 0; i < ELTS; i++)
234      elts[i] = other->elts[i];
235  }
236  inline void union_ (const hb_set_t *other)
237  {
238    if (unlikely (in_error)) return;
239    for (unsigned int i = 0; i < ELTS; i++)
240      elts[i] |= other->elts[i];
241  }
242  inline void intersect (const hb_set_t *other)
243  {
244    if (unlikely (in_error)) return;
245    for (unsigned int i = 0; i < ELTS; i++)
246      elts[i] &= other->elts[i];
247  }
248  inline void subtract (const hb_set_t *other)
249  {
250    if (unlikely (in_error)) return;
251    for (unsigned int i = 0; i < ELTS; i++)
252      elts[i] &= ~other->elts[i];
253  }
254  inline void symmetric_difference (const hb_set_t *other)
255  {
256    if (unlikely (in_error)) return;
257    for (unsigned int i = 0; i < ELTS; i++)
258      elts[i] ^= other->elts[i];
259  }
260  inline void invert (void)
261  {
262    if (unlikely (in_error)) return;
263    for (unsigned int i = 0; i < ELTS; i++)
264      elts[i] = ~elts[i];
265  }
266  inline bool next (hb_codepoint_t *codepoint) const
267  {
268    if (unlikely (*codepoint == INVALID)) {
269      hb_codepoint_t i = get_min ();
270      if (i != INVALID) {
271        *codepoint = i;
272        return true;
273      } else {
274        *codepoint = INVALID;
275        return false;
276      }
277    }
278    for (hb_codepoint_t i = *codepoint + 1; i < MAX_G + 1; i++)
279      if (has (i)) {
280        *codepoint = i;
281        return true;
282      }
283    *codepoint = INVALID;
284    return false;
285  }
286  inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
287  {
288    hb_codepoint_t i;
289
290    i = *last;
291    if (!next (&i))
292    {
293      *last = *first = INVALID;
294      return false;
295    }
296
297    *last = *first = i;
298    while (next (&i) && i == *last + 1)
299      (*last)++;
300
301    return true;
302  }
303
304  inline unsigned int get_population (void) const
305  {
306    unsigned int count = 0;
307    for (unsigned int i = 0; i < ELTS; i++)
308      count += _hb_popcount32 (elts[i]);
309    return count;
310  }
311  inline hb_codepoint_t get_min (void) const
312  {
313    for (unsigned int i = 0; i < ELTS; i++)
314      if (elts[i])
315        for (unsigned int j = 0; j < BITS; j++)
316          if (elts[i] & (1u << j))
317            return i * BITS + j;
318    return INVALID;
319  }
320  inline hb_codepoint_t get_max (void) const
321  {
322    for (unsigned int i = ELTS; i; i--)
323      if (elts[i - 1])
324        for (unsigned int j = BITS; j; j--)
325          if (elts[i - 1] & (1u << (j - 1)))
326            return (i - 1) * BITS + (j - 1);
327    return INVALID;
328  }
329
330  typedef uint32_t elt_t;
331  static const unsigned int MAX_G = 65536 - 1; /* XXX Fix this... */
332  static const unsigned int SHIFT = 5;
333  static const unsigned int BITS = (1 << SHIFT);
334  static const unsigned int MASK = BITS - 1;
335  static const unsigned int ELTS = (MAX_G + 1 + (BITS - 1)) / BITS;
336  static  const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
337
338  elt_t &elt (hb_codepoint_t g) { return elts[g >> SHIFT]; }
339  elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
340  elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
341
342  elt_t elts[ELTS]; /* XXX 8kb */
343
344  ASSERT_STATIC (sizeof (elt_t) * 8 == BITS);
345  ASSERT_STATIC (sizeof (elt_t) * 8 * ELTS > MAX_G);
346};
347
348struct hb_frozen_set_t
349{
350  static const unsigned int SHIFT = hb_set_t::SHIFT;
351  static const unsigned int BITS = hb_set_t::BITS;
352  static const unsigned int MASK = hb_set_t::MASK;
353  typedef hb_set_t::elt_t elt_t;
354
355  inline void init (const hb_set_t &set)
356  {
357    start = count = 0;
358    elts = NULL;
359
360    unsigned int max = set.get_max ();
361    if (max == set.INVALID)
362      return;
363    unsigned int min = set.get_min ();
364    const elt_t &min_elt = set.elt (min);
365
366    start = min & ~MASK;
367    count = max - start + 1;
368    unsigned int num_elts = (count + BITS - 1) / BITS;
369    unsigned int elts_size = num_elts * sizeof (elt_t);
370    elts = (elt_t *) malloc (elts_size);
371    if (unlikely (!elts))
372    {
373      start = count = 0;
374      return;
375    }
376    memcpy (elts, &min_elt, elts_size);
377  }
378
379  inline void fini (void)
380  {
381    if (elts)
382      free (elts);
383  }
384
385  inline bool has (hb_codepoint_t g) const
386  {
387    /* hb_codepoint_t is unsigned. */
388    g -= start;
389    if (unlikely (g > count)) return false;
390    return !!(elt (g) & mask (g));
391  }
392
393  elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
394  elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
395
396  private:
397  hb_codepoint_t start, count;
398  elt_t *elts;
399};
400
401
402#endif /* HB_SET_PRIVATE_HH */
403