1/*
2 * Copyright �� 2015  Mozilla Foundation.
3 * Copyright �� 2015  Google, Inc.
4 *
5 *  This is part of HarfBuzz, a text shaping library.
6 *
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
12 *
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17 * DAMAGE.
18 *
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 *
25 * Mozilla Author(s): Jonathan Kew
26 * Google Author(s): Behdad Esfahbod
27 */
28
29#include "hb-ot-shape-complex-use-private.hh"
30#include "hb-ot-shape-complex-arabic-private.hh"
31
32/* buffer var allocations */
33#define use_category() complex_var_u8_0()
34
35
36/*
37 * Universal Shaping Engine.
38 * https://www.microsoft.com/typography/OpenTypeDev/USE/intro.htm
39 */
40
41static const hb_tag_t
42basic_features[] =
43{
44  /*
45   * Basic features.
46   * These features are applied all at once, before reordering.
47   */
48  HB_TAG('r','k','r','f'),
49  HB_TAG('a','b','v','f'),
50  HB_TAG('b','l','w','f'),
51  HB_TAG('h','a','l','f'),
52  HB_TAG('p','s','t','f'),
53  HB_TAG('v','a','t','u'),
54  HB_TAG('c','j','c','t'),
55};
56static const hb_tag_t
57arabic_features[] =
58{
59  HB_TAG('i','s','o','l'),
60  HB_TAG('i','n','i','t'),
61  HB_TAG('m','e','d','i'),
62  HB_TAG('f','i','n','a'),
63  /* The spec doesn't specify these but we apply anyway, since our Arabic shaper
64   * does.  These are only used in Syriac spec. */
65  HB_TAG('m','e','d','2'),
66  HB_TAG('f','i','n','2'),
67  HB_TAG('f','i','n','3'),
68};
69/* Same order as arabic_features.  Don't need Syriac stuff.*/
70enum joining_form_t {
71  ISOL,
72  INIT,
73  MEDI,
74  FINA,
75  _NONE
76};
77static const hb_tag_t
78other_features[] =
79{
80  /*
81   * Other features.
82   * These features are applied all at once, after reordering.
83   */
84  HB_TAG('a','b','v','s'),
85  HB_TAG('b','l','w','s'),
86  HB_TAG('h','a','l','n'),
87  HB_TAG('p','r','e','s'),
88  HB_TAG('p','s','t','s'),
89  /* Positioning features, though we don't care about the types. */
90  HB_TAG('d','i','s','t'),
91  HB_TAG('a','b','v','m'),
92  HB_TAG('b','l','w','m'),
93};
94
95static void
96setup_syllables (const hb_ot_shape_plan_t *plan,
97                 hb_font_t *font,
98                 hb_buffer_t *buffer);
99static void
100clear_substitution_flags (const hb_ot_shape_plan_t *plan,
101                          hb_font_t *font,
102                          hb_buffer_t *buffer);
103static void
104record_rphf (const hb_ot_shape_plan_t *plan,
105             hb_font_t *font,
106             hb_buffer_t *buffer);
107static void
108record_pref (const hb_ot_shape_plan_t *plan,
109             hb_font_t *font,
110             hb_buffer_t *buffer);
111static void
112reorder (const hb_ot_shape_plan_t *plan,
113         hb_font_t *font,
114         hb_buffer_t *buffer);
115
116static void
117collect_features_use (hb_ot_shape_planner_t *plan)
118{
119  hb_ot_map_builder_t *map = &plan->map;
120
121  /* Do this before any lookups have been applied. */
122  map->add_gsub_pause (setup_syllables);
123
124  /* "Default glyph pre-processing group" */
125  map->add_global_bool_feature (HB_TAG('l','o','c','l'));
126  map->add_global_bool_feature (HB_TAG('c','c','m','p'));
127  map->add_global_bool_feature (HB_TAG('n','u','k','t'));
128  map->add_global_bool_feature (HB_TAG('a','k','h','n'));
129
130  /* "Reordering group" */
131  map->add_gsub_pause (clear_substitution_flags);
132  map->add_feature (HB_TAG('r','p','h','f'), 1, F_MANUAL_ZWJ);
133  map->add_gsub_pause (record_rphf);
134  map->add_gsub_pause (clear_substitution_flags);
135  map->add_feature (HB_TAG('p','r','e','f'), 1, F_GLOBAL | F_MANUAL_ZWJ);
136  map->add_gsub_pause (record_pref);
137
138  /* "Orthographic unit shaping group" */
139  for (unsigned int i = 0; i < ARRAY_LENGTH (basic_features); i++)
140    map->add_feature (basic_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
141
142  map->add_gsub_pause (reorder);
143
144  /* "Topographical features" */
145  for (unsigned int i = 0; i < ARRAY_LENGTH (arabic_features); i++)
146    map->add_feature (arabic_features[i], 1, F_NONE);
147  map->add_gsub_pause (NULL);
148
149  /* "Standard typographic presentation" and "Positional feature application" */
150  for (unsigned int i = 0; i < ARRAY_LENGTH (other_features); i++)
151    map->add_feature (other_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
152}
153
154struct use_shape_plan_t
155{
156  ASSERT_POD ();
157
158  hb_mask_t rphf_mask;
159
160  arabic_shape_plan_t *arabic_plan;
161};
162
163static bool
164has_arabic_joining (hb_script_t script)
165{
166  /* List of scripts that have data in arabic-table. */
167  switch ((int) script)
168  {
169    /* Unicode-1.1 additions */
170    case HB_SCRIPT_ARABIC:
171
172    /* Unicode-3.0 additions */
173    case HB_SCRIPT_MONGOLIAN:
174    case HB_SCRIPT_SYRIAC:
175
176    /* Unicode-5.0 additions */
177    case HB_SCRIPT_NKO:
178    case HB_SCRIPT_PHAGS_PA:
179
180    /* Unicode-6.0 additions */
181    case HB_SCRIPT_MANDAIC:
182
183    /* Unicode-7.0 additions */
184    case HB_SCRIPT_MANICHAEAN:
185    case HB_SCRIPT_PSALTER_PAHLAVI:
186
187    /* Unicode-9.0 additions */
188    case HB_SCRIPT_ADLAM:
189
190      return true;
191
192    default:
193      return false;
194  }
195}
196
197static void *
198data_create_use (const hb_ot_shape_plan_t *plan)
199{
200  use_shape_plan_t *use_plan = (use_shape_plan_t *) calloc (1, sizeof (use_shape_plan_t));
201  if (unlikely (!use_plan))
202    return NULL;
203
204  use_plan->rphf_mask = plan->map.get_1_mask (HB_TAG('r','p','h','f'));
205
206  if (has_arabic_joining (plan->props.script))
207  {
208    use_plan->arabic_plan = (arabic_shape_plan_t *) data_create_arabic (plan);
209    if (unlikely (!use_plan->arabic_plan))
210    {
211      free (use_plan);
212      return NULL;
213    }
214  }
215
216  return use_plan;
217}
218
219static void
220data_destroy_use (void *data)
221{
222  use_shape_plan_t *use_plan = (use_shape_plan_t *) data;
223
224  if (use_plan->arabic_plan)
225    data_destroy_arabic (use_plan->arabic_plan);
226
227  free (data);
228}
229
230enum syllable_type_t {
231  independent_cluster,
232  virama_terminated_cluster,
233  standard_cluster,
234  number_joiner_terminated_cluster,
235  numeral_cluster,
236  symbol_cluster,
237  broken_cluster,
238  non_cluster,
239};
240
241#include "hb-ot-shape-complex-use-machine.hh"
242
243
244static void
245setup_masks_use (const hb_ot_shape_plan_t *plan,
246                 hb_buffer_t              *buffer,
247                 hb_font_t                *font HB_UNUSED)
248{
249  const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
250
251  /* Do this before allocating use_category(). */
252  if (use_plan->arabic_plan)
253  {
254    setup_masks_arabic_plan (use_plan->arabic_plan, buffer, plan->props.script);
255  }
256
257  HB_BUFFER_ALLOCATE_VAR (buffer, use_category);
258
259  /* We cannot setup masks here.  We save information about characters
260   * and setup masks later on in a pause-callback. */
261
262  unsigned int count = buffer->len;
263  hb_glyph_info_t *info = buffer->info;
264  for (unsigned int i = 0; i < count; i++)
265    info[i].use_category() = hb_use_get_categories (info[i].codepoint);
266}
267
268static void
269setup_rphf_mask (const hb_ot_shape_plan_t *plan,
270                 hb_buffer_t *buffer)
271{
272  const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
273
274  hb_mask_t mask = use_plan->rphf_mask;
275  if (!mask) return;
276
277  hb_glyph_info_t *info = buffer->info;
278
279  foreach_syllable (buffer, start, end)
280  {
281    unsigned int limit = info[start].use_category() == USE_R ? 1 : MIN (3u, end - start);
282    for (unsigned int i = start; i < start + limit; i++)
283      info[i].mask |= mask;
284  }
285}
286
287static void
288setup_topographical_masks (const hb_ot_shape_plan_t *plan,
289                           hb_buffer_t *buffer)
290{
291  const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
292  if (use_plan->arabic_plan)
293    return;
294
295  ASSERT_STATIC (INIT < 4 && ISOL < 4 && MEDI < 4 && FINA < 4);
296  hb_mask_t masks[4], all_masks = 0;
297  for (unsigned int i = 0; i < 4; i++)
298  {
299    masks[i] = plan->map.get_1_mask (arabic_features[i]);
300    if (masks[i] == plan->map.get_global_mask ())
301      masks[i] = 0;
302    all_masks |= masks[i];
303  }
304  if (!all_masks)
305    return;
306  hb_mask_t other_masks = ~all_masks;
307
308  unsigned int last_start = 0;
309  joining_form_t last_form = _NONE;
310  hb_glyph_info_t *info = buffer->info;
311  foreach_syllable (buffer, start, end)
312  {
313    syllable_type_t syllable_type = (syllable_type_t) (info[start].syllable() & 0x0F);
314    switch (syllable_type)
315    {
316      case independent_cluster:
317      case symbol_cluster:
318      case non_cluster:
319        /* These don't join.  Nothing to do. */
320        last_form = _NONE;
321        break;
322
323      case virama_terminated_cluster:
324      case standard_cluster:
325      case number_joiner_terminated_cluster:
326      case numeral_cluster:
327      case broken_cluster:
328
329        bool join = last_form == FINA || last_form == ISOL;
330
331        if (join)
332        {
333          /* Fixup previous syllable's form. */
334          last_form = last_form == FINA ? MEDI : INIT;
335          for (unsigned int i = last_start; i < start; i++)
336            info[i].mask = (info[i].mask & other_masks) | masks[last_form];
337        }
338
339        /* Form for this syllable. */
340        last_form = join ? FINA : ISOL;
341        for (unsigned int i = start; i < end; i++)
342          info[i].mask = (info[i].mask & other_masks) | masks[last_form];
343
344        break;
345    }
346
347    last_start = start;
348  }
349}
350
351static void
352setup_syllables (const hb_ot_shape_plan_t *plan,
353                 hb_font_t *font HB_UNUSED,
354                 hb_buffer_t *buffer)
355{
356  find_syllables (buffer);
357  setup_rphf_mask (plan, buffer);
358  setup_topographical_masks (plan, buffer);
359}
360
361static void
362clear_substitution_flags (const hb_ot_shape_plan_t *plan,
363                          hb_font_t *font HB_UNUSED,
364                          hb_buffer_t *buffer)
365{
366  hb_glyph_info_t *info = buffer->info;
367  unsigned int count = buffer->len;
368  for (unsigned int i = 0; i < count; i++)
369    _hb_glyph_info_clear_substituted (&info[i]);
370}
371
372static void
373record_rphf (const hb_ot_shape_plan_t *plan,
374             hb_font_t *font,
375             hb_buffer_t *buffer)
376{
377  const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
378
379  hb_mask_t mask = use_plan->rphf_mask;
380  if (!mask) return;
381  hb_glyph_info_t *info = buffer->info;
382
383  foreach_syllable (buffer, start, end)
384  {
385    /* Mark a substituted repha as USE_R. */
386    for (unsigned int i = start; i < end && (info[i].mask & mask); i++)
387      if (_hb_glyph_info_substituted (&info[i]))
388      {
389        info[i].use_category() = USE_R;
390        break;
391      }
392  }
393}
394
395static void
396record_pref (const hb_ot_shape_plan_t *plan,
397             hb_font_t *font,
398             hb_buffer_t *buffer)
399{
400  hb_glyph_info_t *info = buffer->info;
401
402  foreach_syllable (buffer, start, end)
403  {
404    /* Mark a substituted pref as VPre, as they behave the same way. */
405    for (unsigned int i = start; i < end; i++)
406      if (_hb_glyph_info_substituted (&info[i]))
407      {
408        info[i].use_category() = USE_VPre;
409        break;
410      }
411  }
412}
413
414static inline bool
415is_halant (const hb_glyph_info_t &info)
416{
417  return info.use_category() == USE_H && !_hb_glyph_info_ligated (&info);
418}
419
420static void
421reorder_syllable (hb_buffer_t *buffer, unsigned int start, unsigned int end)
422{
423  syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
424  /* Only a few syllable types need reordering. */
425  if (unlikely (!(FLAG_SAFE (syllable_type) &
426                  (FLAG (virama_terminated_cluster) |
427                   FLAG (standard_cluster) |
428                   FLAG (broken_cluster) |
429                   0))))
430    return;
431
432  hb_glyph_info_t *info = buffer->info;
433
434#define BASE_FLAGS (FLAG (USE_B) | FLAG (USE_GB))
435
436  /* Move things forward. */
437  if (info[start].use_category() == USE_R && end - start > 1)
438  {
439    /* Got a repha.  Reorder it to after first base, before first halant. */
440    for (unsigned int i = start + 1; i < end; i++)
441      if ((FLAG_UNSAFE (info[i].use_category()) & (BASE_FLAGS)) || is_halant (info[i]))
442      {
443        /* If we hit a halant, move before it; otherwise it's a base: move to it's
444         * place, and shift things in between backward. */
445
446        if (is_halant (info[i]))
447          i--;
448
449        buffer->merge_clusters (start, i + 1);
450        hb_glyph_info_t t = info[start];
451        memmove (&info[start], &info[start + 1], (i - start) * sizeof (info[0]));
452        info[i] = t;
453
454        break;
455      }
456  }
457
458  /* Move things back. */
459  unsigned int j = end;
460  for (unsigned int i = start; i < end; i++)
461  {
462    uint32_t flag = FLAG_UNSAFE (info[i].use_category());
463    if ((flag & (BASE_FLAGS)) || is_halant (info[i]))
464    {
465      /* If we hit a halant, move after it; otherwise it's a base: move to it's
466       * place, and shift things in between backward. */
467      if (is_halant (info[i]))
468        j = i + 1;
469      else
470        j = i;
471    }
472    else if (((flag) & (FLAG (USE_VPre) | FLAG (USE_VMPre))) &&
473             /* Only move the first component of a MultipleSubst. */
474             0 == _hb_glyph_info_get_lig_comp (&info[i]) &&
475             j < i)
476    {
477      buffer->merge_clusters (j, i + 1);
478      hb_glyph_info_t t = info[i];
479      memmove (&info[j + 1], &info[j], (i - j) * sizeof (info[0]));
480      info[j] = t;
481    }
482  }
483}
484
485static inline void
486insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
487                       hb_font_t *font,
488                       hb_buffer_t *buffer)
489{
490  /* Note: This loop is extra overhead, but should not be measurable. */
491  bool has_broken_syllables = false;
492  unsigned int count = buffer->len;
493  hb_glyph_info_t *info = buffer->info;
494  for (unsigned int i = 0; i < count; i++)
495    if ((info[i].syllable() & 0x0F) == broken_cluster)
496    {
497      has_broken_syllables = true;
498      break;
499    }
500  if (likely (!has_broken_syllables))
501    return;
502
503  hb_glyph_info_t dottedcircle = {0};
504  if (!font->get_nominal_glyph (0x25CCu, &dottedcircle.codepoint))
505    return;
506  dottedcircle.use_category() = hb_use_get_categories (0x25CC);
507
508  buffer->clear_output ();
509
510  buffer->idx = 0;
511  unsigned int last_syllable = 0;
512  while (buffer->idx < buffer->len && !buffer->in_error)
513  {
514    unsigned int syllable = buffer->cur().syllable();
515    syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
516    if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
517    {
518      last_syllable = syllable;
519
520      hb_glyph_info_t ginfo = dottedcircle;
521      ginfo.cluster = buffer->cur().cluster;
522      ginfo.mask = buffer->cur().mask;
523      ginfo.syllable() = buffer->cur().syllable();
524      /* TODO Set glyph_props? */
525
526      /* Insert dottedcircle after possible Repha. */
527      while (buffer->idx < buffer->len && !buffer->in_error &&
528             last_syllable == buffer->cur().syllable() &&
529             buffer->cur().use_category() == USE_R)
530        buffer->next_glyph ();
531
532      buffer->output_info (ginfo);
533    }
534    else
535      buffer->next_glyph ();
536  }
537
538  buffer->swap_buffers ();
539}
540
541static void
542reorder (const hb_ot_shape_plan_t *plan,
543         hb_font_t *font,
544         hb_buffer_t *buffer)
545{
546  insert_dotted_circles (plan, font, buffer);
547
548  hb_glyph_info_t *info = buffer->info;
549
550  foreach_syllable (buffer, start, end)
551    reorder_syllable (buffer, start, end);
552
553  /* Zero syllables now... */
554  unsigned int count = buffer->len;
555  for (unsigned int i = 0; i < count; i++)
556    info[i].syllable() = 0;
557
558  HB_BUFFER_DEALLOCATE_VAR (buffer, use_category);
559}
560
561static bool
562decompose_use (const hb_ot_shape_normalize_context_t *c,
563                hb_codepoint_t  ab,
564                hb_codepoint_t *a,
565                hb_codepoint_t *b)
566{
567  switch (ab)
568  {
569    /* Chakma:
570     * Special case where the Unicode decomp gives matras in the wrong order
571     * for cluster validation.
572     */
573    case 0x1112Eu : *a = 0x11127u; *b= 0x11131u; return true;
574    case 0x1112Fu : *a = 0x11127u; *b= 0x11132u; return true;
575
576    /*
577     * Decompose split matras that don't have Unicode decompositions.
578     */
579
580    /* Limbu */
581    case 0x1925u  : *a = 0x1920u; *b= 0x1923u; return true;
582    case 0x1926u  : *a = 0x1920u; *b= 0x1924u; return true;
583
584    /* Balinese */
585    case 0x1B3Cu  : *a = 0x1B42u; *b= 0x1B3Cu; return true;
586
587#if 0
588    /* Lepcha */
589    case 0x1C29u  : *a = no decomp, -> LEFT; return true;
590
591    /* Javanese */
592    case 0xA9C0u  : *a = no decomp, -> RIGHT; return true;
593
594    /* Sharada */
595    case 0x111BFu  : *a = no decomp, -> ABOVE; return true;
596#endif
597  }
598
599  return (bool) c->unicode->decompose (ab, a, b);
600}
601
602static bool
603compose_use (const hb_ot_shape_normalize_context_t *c,
604             hb_codepoint_t  a,
605             hb_codepoint_t  b,
606             hb_codepoint_t *ab)
607{
608  /* Avoid recomposing split matras. */
609  if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
610    return false;
611
612  return (bool)c->unicode->compose (a, b, ab);
613}
614
615
616const hb_ot_complex_shaper_t _hb_ot_complex_shaper_use =
617{
618  "use",
619  collect_features_use,
620  NULL, /* override_features */
621  data_create_use,
622  data_destroy_use,
623  NULL, /* preprocess_text */
624  NULL, /* postprocess_glyphs */
625  HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
626  decompose_use,
627  compose_use,
628  setup_masks_use,
629  NULL, /* disable_otl */
630  HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
631  false, /* fallback_position */
632};
633