hb-ot-shape-complex-use.cc revision 13240:8c09472c3de2
1/*
2 * Copyright �� 2015  Mozilla Foundation.
3 * Copyright �� 2015  Google, Inc.
4 *
5 *  This is part of HarfBuzz, a text shaping library.
6 *
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
12 *
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17 * DAMAGE.
18 *
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 *
25 * Mozilla Author(s): Jonathan Kew
26 * Google Author(s): Behdad Esfahbod
27 */
28
29#include "hb-ot-shape-complex-use-private.hh"
30#include "hb-ot-shape-complex-arabic-private.hh"
31
32/* buffer var allocations */
33#define use_category() complex_var_u8_0()
34
35
36/*
37 * Universal Shaping Engine.
38 * https://www.microsoft.com/typography/OpenTypeDev/USE/intro.htm
39 */
40
41static const hb_tag_t
42basic_features[] =
43{
44  /*
45   * Basic features.
46   * These features are applied all at once, before reordering.
47   */
48  HB_TAG('r','k','r','f'),
49  HB_TAG('a','b','v','f'),
50  HB_TAG('b','l','w','f'),
51  HB_TAG('h','a','l','f'),
52  HB_TAG('p','s','t','f'),
53  HB_TAG('v','a','t','u'),
54  HB_TAG('c','j','c','t'),
55};
56static const hb_tag_t
57arabic_features[] =
58{
59  HB_TAG('i','s','o','l'),
60  HB_TAG('i','n','i','t'),
61  HB_TAG('m','e','d','i'),
62  HB_TAG('f','i','n','a'),
63  /* The spec doesn't specify these but we apply anyway, since our Arabic shaper
64   * does.  These are only used in Syriac spec. */
65  HB_TAG('m','e','d','2'),
66  HB_TAG('f','i','n','2'),
67  HB_TAG('f','i','n','3'),
68};
69/* Same order as arabic_features.  Don't need Syriac stuff.*/
70enum joining_form_t {
71  ISOL,
72  INIT,
73  MEDI,
74  FINA,
75  _NONE
76};
77static const hb_tag_t
78other_features[] =
79{
80  /*
81   * Other features.
82   * These features are applied all at once, after reordering.
83   */
84  HB_TAG('a','b','v','s'),
85  HB_TAG('b','l','w','s'),
86  HB_TAG('h','a','l','n'),
87  HB_TAG('p','r','e','s'),
88  HB_TAG('p','s','t','s'),
89  /* Positioning features, though we don't care about the types. */
90  HB_TAG('d','i','s','t'),
91  HB_TAG('a','b','v','m'),
92  HB_TAG('b','l','w','m'),
93};
94
95static void
96setup_syllables (const hb_ot_shape_plan_t *plan,
97                 hb_font_t *font,
98                 hb_buffer_t *buffer);
99static void
100clear_substitution_flags (const hb_ot_shape_plan_t *plan,
101                          hb_font_t *font,
102                          hb_buffer_t *buffer);
103static void
104record_rphf (const hb_ot_shape_plan_t *plan,
105             hb_font_t *font,
106             hb_buffer_t *buffer);
107static void
108record_pref (const hb_ot_shape_plan_t *plan,
109             hb_font_t *font,
110             hb_buffer_t *buffer);
111static void
112reorder (const hb_ot_shape_plan_t *plan,
113         hb_font_t *font,
114         hb_buffer_t *buffer);
115
116static void
117collect_features_use (hb_ot_shape_planner_t *plan)
118{
119  hb_ot_map_builder_t *map = &plan->map;
120
121  /* Do this before any lookups have been applied. */
122  map->add_gsub_pause (setup_syllables);
123
124  /* "Default glyph pre-processing group" */
125  map->add_global_bool_feature (HB_TAG('l','o','c','l'));
126  map->add_global_bool_feature (HB_TAG('c','c','m','p'));
127  map->add_global_bool_feature (HB_TAG('n','u','k','t'));
128  map->add_global_bool_feature (HB_TAG('a','k','h','n'));
129
130  /* "Reordering group" */
131  map->add_gsub_pause (clear_substitution_flags);
132  map->add_feature (HB_TAG('r','p','h','f'), 1, F_MANUAL_ZWJ);
133  map->add_gsub_pause (record_rphf);
134  map->add_gsub_pause (clear_substitution_flags);
135  map->add_feature (HB_TAG('p','r','e','f'), 1, F_GLOBAL | F_MANUAL_ZWJ);
136  map->add_gsub_pause (record_pref);
137
138  /* "Orthographic unit shaping group" */
139  for (unsigned int i = 0; i < ARRAY_LENGTH (basic_features); i++)
140    map->add_feature (basic_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
141
142  map->add_gsub_pause (reorder);
143
144  /* "Topographical features" */
145  for (unsigned int i = 0; i < ARRAY_LENGTH (arabic_features); i++)
146    map->add_feature (arabic_features[i], 1, F_NONE);
147  map->add_gsub_pause (NULL);
148
149  /* "Standard typographic presentation" and "Positional feature application" */
150  for (unsigned int i = 0; i < ARRAY_LENGTH (other_features); i++)
151    map->add_feature (other_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
152}
153
154struct use_shape_plan_t
155{
156  ASSERT_POD ();
157
158  hb_mask_t rphf_mask;
159
160  arabic_shape_plan_t *arabic_plan;
161};
162
163static bool
164has_arabic_joining (hb_script_t script)
165{
166  /* List of scripts that have data in arabic-table. */
167  switch ((int) script)
168  {
169    /* Unicode-1.1 additions */
170    case HB_SCRIPT_ARABIC:
171
172    /* Unicode-3.0 additions */
173    case HB_SCRIPT_MONGOLIAN:
174    case HB_SCRIPT_SYRIAC:
175
176    /* Unicode-5.0 additions */
177    case HB_SCRIPT_NKO:
178    case HB_SCRIPT_PHAGS_PA:
179
180    /* Unicode-6.0 additions */
181    case HB_SCRIPT_MANDAIC:
182
183    /* Unicode-7.0 additions */
184    case HB_SCRIPT_MANICHAEAN:
185    case HB_SCRIPT_PSALTER_PAHLAVI:
186
187      return true;
188
189    default:
190      return false;
191  }
192}
193
194static void *
195data_create_use (const hb_ot_shape_plan_t *plan)
196{
197  use_shape_plan_t *use_plan = (use_shape_plan_t *) calloc (1, sizeof (use_shape_plan_t));
198  if (unlikely (!use_plan))
199    return NULL;
200
201  use_plan->rphf_mask = plan->map.get_1_mask (HB_TAG('r','p','h','f'));
202
203  if (has_arabic_joining (plan->props.script))
204  {
205    use_plan->arabic_plan = (arabic_shape_plan_t *) data_create_arabic (plan);
206    if (unlikely (!use_plan->arabic_plan))
207    {
208      free (use_plan);
209      return NULL;
210    }
211  }
212
213  return use_plan;
214}
215
216static void
217data_destroy_use (void *data)
218{
219  use_shape_plan_t *use_plan = (use_shape_plan_t *) data;
220
221  if (use_plan->arabic_plan)
222    data_destroy_arabic (use_plan->arabic_plan);
223
224  free (data);
225}
226
227enum syllable_type_t {
228  independent_cluster,
229  virama_terminated_cluster,
230  consonant_cluster,
231  vowel_cluster,
232  number_joiner_terminated_cluster,
233  numeral_cluster,
234  symbol_cluster,
235  broken_cluster,
236};
237
238#include "hb-ot-shape-complex-use-machine.hh"
239
240
241static void
242setup_masks_use (const hb_ot_shape_plan_t *plan,
243                 hb_buffer_t              *buffer,
244                 hb_font_t                *font HB_UNUSED)
245{
246  const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
247
248  /* Do this before allocating use_category(). */
249  if (use_plan->arabic_plan)
250  {
251    setup_masks_arabic_plan (use_plan->arabic_plan, buffer, plan->props.script);
252  }
253
254  HB_BUFFER_ALLOCATE_VAR (buffer, use_category);
255
256  /* We cannot setup masks here.  We save information about characters
257   * and setup masks later on in a pause-callback. */
258
259  unsigned int count = buffer->len;
260  hb_glyph_info_t *info = buffer->info;
261  for (unsigned int i = 0; i < count; i++)
262    info[i].use_category() = hb_use_get_categories (info[i].codepoint);
263}
264
265static void
266setup_rphf_mask (const hb_ot_shape_plan_t *plan,
267                 hb_buffer_t *buffer)
268{
269  const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
270
271  hb_mask_t mask = use_plan->rphf_mask;
272  if (!mask) return;
273
274  hb_glyph_info_t *info = buffer->info;
275
276  foreach_syllable (buffer, start, end)
277  {
278    unsigned int limit = info[start].use_category() == USE_R ? 1 : MIN (3u, end - start);
279    for (unsigned int i = start; i < start + limit; i++)
280      info[i].mask |= mask;
281  }
282}
283
284static void
285setup_topographical_masks (const hb_ot_shape_plan_t *plan,
286                           hb_buffer_t *buffer)
287{
288
289  ASSERT_STATIC (INIT < 4 && ISOL < 4 && MEDI < 4 && FINA < 4);
290  hb_mask_t masks[4], all_masks = 0;
291  for (unsigned int i = 0; i < 4; i++)
292  {
293    masks[i] = plan->map.get_1_mask (arabic_features[i]);
294    if (masks[i] == plan->map.get_global_mask ())
295      masks[i] = 0;
296    all_masks |= masks[i];
297  }
298  if (!all_masks)
299    return;
300  hb_mask_t other_masks = ~all_masks;
301
302  unsigned int last_start = 0;
303  joining_form_t last_form = _NONE;
304  hb_glyph_info_t *info = buffer->info;
305  foreach_syllable (buffer, start, end)
306  {
307    syllable_type_t syllable_type = (syllable_type_t) (info[start].syllable() & 0x0F);
308    switch (syllable_type)
309    {
310      case independent_cluster:
311      case symbol_cluster:
312        /* These don't join.  Nothing to do. */
313        last_form = _NONE;
314        break;
315
316      case virama_terminated_cluster:
317      case consonant_cluster:
318      case vowel_cluster:
319      case number_joiner_terminated_cluster:
320      case numeral_cluster:
321      case broken_cluster:
322
323        bool join = last_form == FINA || last_form == ISOL;
324
325        if (join)
326        {
327          /* Fixup previous syllable's form. */
328          last_form = last_form == FINA ? MEDI : INIT;
329          for (unsigned int i = last_start; i < start; i++)
330            info[i].mask = (info[i].mask & other_masks) | masks[last_form];
331        }
332
333        /* Form for this syllable. */
334        last_form = join ? FINA : ISOL;
335        for (unsigned int i = start; i < end; i++)
336          info[i].mask = (info[i].mask & other_masks) | masks[last_form];
337
338        break;
339    }
340
341    last_start = start;
342  }
343}
344
345static void
346setup_syllables (const hb_ot_shape_plan_t *plan,
347                 hb_font_t *font HB_UNUSED,
348                 hb_buffer_t *buffer)
349{
350  find_syllables (buffer);
351  setup_rphf_mask (plan, buffer);
352  setup_topographical_masks (plan, buffer);
353}
354
355static void
356clear_substitution_flags (const hb_ot_shape_plan_t *plan,
357                          hb_font_t *font HB_UNUSED,
358                          hb_buffer_t *buffer)
359{
360  hb_glyph_info_t *info = buffer->info;
361  unsigned int count = buffer->len;
362  for (unsigned int i = 0; i < count; i++)
363    _hb_glyph_info_clear_substituted_and_ligated_and_multiplied (&info[i]);
364}
365
366static void
367record_rphf (const hb_ot_shape_plan_t *plan,
368             hb_font_t *font,
369             hb_buffer_t *buffer)
370{
371  const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
372
373  hb_mask_t mask = use_plan->rphf_mask;
374  if (!mask) return;
375  hb_glyph_info_t *info = buffer->info;
376
377  foreach_syllable (buffer, start, end)
378  {
379    /* Mark a substituted repha as USE_R. */
380    for (unsigned int i = start; i < end && (info[i].mask & mask); i++)
381      if (_hb_glyph_info_substituted (&info[i]))
382      {
383        info[i].use_category() = USE_R;
384        break;
385      }
386  }
387}
388
389static void
390record_pref (const hb_ot_shape_plan_t *plan,
391             hb_font_t *font,
392             hb_buffer_t *buffer)
393{
394  hb_glyph_info_t *info = buffer->info;
395
396  foreach_syllable (buffer, start, end)
397  {
398    /* Mark a substituted pref as VPre, as they behave the same way. */
399    for (unsigned int i = start; i < end; i++)
400      if (_hb_glyph_info_substituted (&info[i]))
401      {
402        info[i].use_category() = USE_VPre;
403        break;
404      }
405  }
406}
407
408static void
409reorder_syllable (hb_buffer_t *buffer, unsigned int start, unsigned int end)
410{
411  syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
412  /* Only a few syllable types need reordering. */
413  if (unlikely (!(FLAG_SAFE (syllable_type) &
414                  (FLAG (virama_terminated_cluster) |
415                   FLAG (consonant_cluster) |
416                   FLAG (vowel_cluster) |
417                   FLAG (broken_cluster) |
418                   0))))
419    return;
420
421  hb_glyph_info_t *info = buffer->info;
422
423#define HALANT_FLAGS FLAG(USE_H)
424#define BASE_FLAGS (FLAG (USE_B) | FLAG (USE_GB) | FLAG (USE_IV))
425
426  /* Move things forward. */
427  if (info[start].use_category() == USE_R && end - start > 1)
428  {
429    /* Got a repha.  Reorder it to after first base, before first halant. */
430    for (unsigned int i = start + 1; i < end; i++)
431      if (FLAG_UNSAFE (info[i].use_category()) & (HALANT_FLAGS | BASE_FLAGS))
432      {
433        /* If we hit a halant, move before it; otherwise it's a base: move to it's
434         * place, and shift things in between backward. */
435
436        if (info[i].use_category() == USE_H)
437          i--;
438
439        buffer->merge_clusters (start, i + 1);
440        hb_glyph_info_t t = info[start];
441        memmove (&info[start], &info[start + 1], (i - start) * sizeof (info[0]));
442        info[i] = t;
443
444        break;
445      }
446  }
447
448  /* Move things back. */
449  unsigned int j = end;
450  for (unsigned int i = start; i < end; i++)
451  {
452    uint32_t flag = FLAG_UNSAFE (info[i].use_category());
453    if (flag & (HALANT_FLAGS | BASE_FLAGS))
454    {
455      /* If we hit a halant, move before it; otherwise it's a base: move to it's
456       * place, and shift things in between backward. */
457      if (info[i].use_category() == USE_H)
458        j = i + 1;
459      else
460        j = i;
461    }
462    else if (((flag) & (FLAG (USE_VPre) | FLAG (USE_VMPre))) &&
463             /* Only move the first component of a MultipleSubst. */
464             0 == _hb_glyph_info_get_lig_comp (&info[i]) &&
465             j < i)
466    {
467      buffer->merge_clusters (j, i + 1);
468      hb_glyph_info_t t = info[i];
469      memmove (&info[j + 1], &info[j], (i - j) * sizeof (info[0]));
470      info[j] = t;
471    }
472  }
473}
474
475static inline void
476insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
477                       hb_font_t *font,
478                       hb_buffer_t *buffer)
479{
480  /* Note: This loop is extra overhead, but should not be measurable. */
481  bool has_broken_syllables = false;
482  unsigned int count = buffer->len;
483  hb_glyph_info_t *info = buffer->info;
484  for (unsigned int i = 0; i < count; i++)
485    if ((info[i].syllable() & 0x0F) == broken_cluster)
486    {
487      has_broken_syllables = true;
488      break;
489    }
490  if (likely (!has_broken_syllables))
491    return;
492
493
494  hb_codepoint_t dottedcircle_glyph;
495  if (!font->get_glyph (0x25CCu, 0, &dottedcircle_glyph))
496    return;
497
498  hb_glyph_info_t dottedcircle = {0};
499  if (!font->get_glyph (0x25CCu, 0, &dottedcircle.codepoint))
500    return;
501  dottedcircle.use_category() = hb_use_get_categories (0x25CC);
502
503  buffer->clear_output ();
504
505  buffer->idx = 0;
506
507  unsigned int last_syllable = 0;
508  while (buffer->idx < buffer->len)
509  {
510    unsigned int syllable = buffer->cur().syllable();
511    syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
512    if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
513    {
514      last_syllable = syllable;
515
516      hb_glyph_info_t info = dottedcircle;
517      info.cluster = buffer->cur().cluster;
518      info.mask = buffer->cur().mask;
519      info.syllable() = buffer->cur().syllable();
520      /* TODO Set glyph_props? */
521
522      /* Insert dottedcircle after possible Repha. */
523      while (buffer->idx < buffer->len &&
524             last_syllable == buffer->cur().syllable() &&
525             buffer->cur().use_category() == USE_R)
526        buffer->next_glyph ();
527
528      buffer->output_info (info);
529    }
530    else
531      buffer->next_glyph ();
532  }
533
534  buffer->swap_buffers ();
535}
536
537static void
538reorder (const hb_ot_shape_plan_t *plan,
539         hb_font_t *font,
540         hb_buffer_t *buffer)
541{
542  insert_dotted_circles (plan, font, buffer);
543
544  hb_glyph_info_t *info = buffer->info;
545
546  foreach_syllable (buffer, start, end)
547    reorder_syllable (buffer, start, end);
548
549  /* Zero syllables now... */
550  unsigned int count = buffer->len;
551  for (unsigned int i = 0; i < count; i++)
552    info[i].syllable() = 0;
553
554  HB_BUFFER_DEALLOCATE_VAR (buffer, use_category);
555}
556
557static bool
558compose_use (const hb_ot_shape_normalize_context_t *c,
559             hb_codepoint_t  a,
560             hb_codepoint_t  b,
561             hb_codepoint_t *ab)
562{
563  /* Avoid recomposing split matras. */
564  if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
565    return false;
566
567  return c->unicode->compose (a, b, ab);
568}
569
570
571const hb_ot_complex_shaper_t _hb_ot_complex_shaper_use =
572{
573  "use",
574  collect_features_use,
575  NULL, /* override_features */
576  data_create_use,
577  data_destroy_use,
578  NULL, /* preprocess_text */
579  HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
580  NULL, /* decompose */
581  compose_use,
582  setup_masks_use,
583  HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
584  false, /* fallback_position */
585};
586