1/* Line breaking of UTF-32 strings.
2   Copyright (C) 2001-2003, 2006-2010 Free Software Foundation, Inc.
3   Written by Bruno Haible <bruno@clisp.org>, 2001.
4
5   This program is free software: you can redistribute it and/or modify it
6   under the terms of the GNU Lesser General Public License as published
7   by the Free Software Foundation; either version 3 of the License, or
8   (at your option) any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18#include <config.h>
19
20/* Specification.  */
21#include "unilbrk.h"
22
23#include <stdlib.h>
24
25#include "unilbrk/lbrktables.h"
26#include "uniwidth/cjk.h"
27
28void
29u32_possible_linebreaks (const uint32_t *s, size_t n, const char *encoding, char *p)
30{
31  int LBP_AI_REPLACEMENT = (is_cjk_encoding (encoding) ? LBP_ID : LBP_AL);
32  const uint32_t *s_end = s + n;
33  int last_prop = LBP_BK; /* line break property of last non-space character */
34  char *seen_space = NULL; /* Was a space seen after the last non-space character? */
35  char *seen_space2 = NULL; /* At least two spaces after the last non-space? */
36
37  while (s < s_end)
38    {
39      ucs4_t uc = *s;
40      int prop = unilbrkprop_lookup (uc);
41
42      if (prop == LBP_BK)
43        {
44          /* Mandatory break.  */
45          *p = UC_BREAK_MANDATORY;
46          last_prop = LBP_BK;
47          seen_space = NULL;
48          seen_space2 = NULL;
49        }
50      else
51        {
52          char *q;
53
54          /* Resolve property values whose behaviour is not fixed.  */
55          switch (prop)
56            {
57            case LBP_AI:
58              /* Resolve ambiguous.  */
59              prop = LBP_AI_REPLACEMENT;
60              break;
61            case LBP_CB:
62              /* This is arbitrary.  */
63              prop = LBP_ID;
64              break;
65            case LBP_SA:
66              /* We don't handle complex scripts yet.
67                 Treat LBP_SA like LBP_XX.  */
68            case LBP_XX:
69              /* This is arbitrary.  */
70              prop = LBP_AL;
71              break;
72            }
73
74          /* Deal with spaces and combining characters.  */
75          q = p;
76          if (prop == LBP_SP)
77            {
78              /* Don't break just before a space.  */
79              *p = UC_BREAK_PROHIBITED;
80              seen_space2 = seen_space;
81              seen_space = p;
82            }
83          else if (prop == LBP_ZW)
84            {
85              /* Don't break just before a zero-width space.  */
86              *p = UC_BREAK_PROHIBITED;
87              last_prop = LBP_ZW;
88              seen_space = NULL;
89              seen_space2 = NULL;
90            }
91          else if (prop == LBP_CM)
92            {
93              /* Don't break just before a combining character, except immediately after a
94                 zero-width space.  */
95              if (last_prop == LBP_ZW)
96                {
97                  /* Break after zero-width space.  */
98                  *p = UC_BREAK_POSSIBLE;
99                  /* A combining character turns a preceding space into LBP_ID.  */
100                  last_prop = LBP_ID;
101                }
102              else
103                {
104                  *p = UC_BREAK_PROHIBITED;
105                  /* A combining character turns a preceding space into LBP_ID.  */
106                  if (seen_space != NULL)
107                    {
108                      q = seen_space;
109                      seen_space = seen_space2;
110                      prop = LBP_ID;
111                      goto lookup_via_table;
112                    }
113                }
114            }
115          else
116            {
117             lookup_via_table:
118              /* prop must be usable as an index for table 7.3 of UTR #14.  */
119              if (!(prop >= 0 && prop < sizeof (unilbrk_table) / sizeof (unilbrk_table[0])))
120                abort ();
121
122              if (last_prop == LBP_BK)
123                {
124                  /* Don't break at the beginning of a line.  */
125                  *q = UC_BREAK_PROHIBITED;
126                }
127              else if (last_prop == LBP_ZW)
128                {
129                  /* Break after zero-width space.  */
130                  *q = UC_BREAK_POSSIBLE;
131                }
132              else
133                {
134                  switch (unilbrk_table [last_prop] [prop])
135                    {
136                    case D:
137                      *q = UC_BREAK_POSSIBLE;
138                      break;
139                    case I:
140                      *q = (seen_space != NULL ? UC_BREAK_POSSIBLE : UC_BREAK_PROHIBITED);
141                      break;
142                    case P:
143                      *q = UC_BREAK_PROHIBITED;
144                      break;
145                    default:
146                      abort ();
147                    }
148                }
149              last_prop = prop;
150              seen_space = NULL;
151              seen_space2 = NULL;
152            }
153        }
154
155      s++;
156      p++;
157    }
158}
159