1/* Line breaking of UTF-8 strings.
2   Copyright (C) 2001-2003, 2006-2010 Free Software Foundation, Inc.
3   Written by Bruno Haible <bruno@clisp.org>, 2001.
4
5   This program is free software: you can redistribute it and/or modify it
6   under the terms of the GNU Lesser General Public License as published
7   by the Free Software Foundation; either version 3 of the License, or
8   (at your option) any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18#include <config.h>
19
20/* Specification.  */
21#include "unilbrk.h"
22
23#include "unistr.h"
24#include "uniwidth.h"
25
26int
27u8_width_linebreaks (const uint8_t *s, size_t n,
28                     int width, int start_column, int at_end_columns,
29                     const char *o, const char *encoding,
30                     char *p)
31{
32  const uint8_t *s_end;
33  char *last_p;
34  int last_column;
35  int piece_width;
36
37  u8_possible_linebreaks (s, n, encoding, p);
38
39  s_end = s + n;
40  last_p = NULL;
41  last_column = start_column;
42  piece_width = 0;
43  while (s < s_end)
44    {
45      ucs4_t uc;
46      int count = u8_mbtouc_unsafe (&uc, s, s_end - s);
47
48      /* Respect the override.  */
49      if (o != NULL && *o != UC_BREAK_UNDEFINED)
50        *p = *o;
51
52      if (*p == UC_BREAK_POSSIBLE || *p == UC_BREAK_MANDATORY)
53        {
54          /* An atomic piece of text ends here.  */
55          if (last_p != NULL && last_column + piece_width > width)
56            {
57              /* Insert a line break.  */
58              *last_p = UC_BREAK_POSSIBLE;
59              last_column = 0;
60            }
61        }
62
63      if (*p == UC_BREAK_MANDATORY)
64        {
65          /* uc is a line break character.  */
66          /* Start a new piece at column 0.  */
67          last_p = NULL;
68          last_column = 0;
69          piece_width = 0;
70        }
71      else
72        {
73          /* uc is not a line break character.  */
74          int w;
75
76          if (*p == UC_BREAK_POSSIBLE)
77            {
78              /* Start a new piece.  */
79              last_p = p;
80              last_column += piece_width;
81              piece_width = 0;
82              /* No line break for the moment, may be turned into
83                 UC_BREAK_POSSIBLE later, via last_p. */
84            }
85
86          *p = UC_BREAK_PROHIBITED;
87
88          w = uc_width (uc, encoding);
89          if (w >= 0) /* ignore control characters in the string */
90            piece_width += w;
91        }
92
93      s += count;
94      p += count;
95      if (o != NULL)
96        o += count;
97    }
98
99  /* The last atomic piece of text ends here.  */
100  if (last_p != NULL && last_column + piece_width + at_end_columns > width)
101    {
102      /* Insert a line break.  */
103      *last_p = UC_BREAK_POSSIBLE;
104      last_column = 0;
105    }
106
107  return last_column + piece_width;
108}
109
110
111#ifdef TEST
112
113#include <stdio.h>
114#include <stdlib.h>
115#include <string.h>
116
117/* Read the contents of an input stream, and return it, terminated with a NUL
118   byte. */
119char *
120read_file (FILE *stream)
121{
122#define BUFSIZE 4096
123  char *buf = NULL;
124  int alloc = 0;
125  int size = 0;
126  int count;
127
128  while (! feof (stream))
129    {
130      if (size + BUFSIZE > alloc)
131        {
132          alloc = alloc + alloc / 2;
133          if (alloc < size + BUFSIZE)
134            alloc = size + BUFSIZE;
135          buf = realloc (buf, alloc);
136          if (buf == NULL)
137            {
138              fprintf (stderr, "out of memory\n");
139              exit (1);
140            }
141        }
142      count = fread (buf + size, 1, BUFSIZE, stream);
143      if (count == 0)
144        {
145          if (ferror (stream))
146            {
147              perror ("fread");
148              exit (1);
149            }
150        }
151      else
152        size += count;
153    }
154  buf = realloc (buf, size + 1);
155  if (buf == NULL)
156    {
157      fprintf (stderr, "out of memory\n");
158      exit (1);
159    }
160  buf[size] = '\0';
161  return buf;
162#undef BUFSIZE
163}
164
165int
166main (int argc, char * argv[])
167{
168  if (argc == 2)
169    {
170      /* Insert line breaks for a given width.  */
171      int width = atoi (argv[1]);
172      char *input = read_file (stdin);
173      int length = strlen (input);
174      char *breaks = malloc (length);
175      int i;
176
177      u8_width_linebreaks ((uint8_t *) input, length, width, 0, 0, NULL, "UTF-8", breaks);
178
179      for (i = 0; i < length; i++)
180        {
181          switch (breaks[i])
182            {
183            case UC_BREAK_POSSIBLE:
184              putc ('\n', stdout);
185              break;
186            case UC_BREAK_MANDATORY:
187              break;
188            case UC_BREAK_PROHIBITED:
189              break;
190            default:
191              abort ();
192            }
193          putc (input[i], stdout);
194        }
195
196      free (breaks);
197
198      return 0;
199    }
200  else
201    return 1;
202}
203
204#endif /* TEST */
205