1/* Line breaking of UTF-8 strings. 2 Copyright (C) 2001-2003, 2006-2010 Free Software Foundation, Inc. 3 Written by Bruno Haible <bruno@clisp.org>, 2001. 4 5 This program is free software: you can redistribute it and/or modify it 6 under the terms of the GNU Lesser General Public License as published 7 by the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18#include <config.h> 19 20/* Specification. */ 21#include "unilbrk.h" 22 23#include "unistr.h" 24#include "uniwidth.h" 25 26int 27u8_width_linebreaks (const uint8_t *s, size_t n, 28 int width, int start_column, int at_end_columns, 29 const char *o, const char *encoding, 30 char *p) 31{ 32 const uint8_t *s_end; 33 char *last_p; 34 int last_column; 35 int piece_width; 36 37 u8_possible_linebreaks (s, n, encoding, p); 38 39 s_end = s + n; 40 last_p = NULL; 41 last_column = start_column; 42 piece_width = 0; 43 while (s < s_end) 44 { 45 ucs4_t uc; 46 int count = u8_mbtouc_unsafe (&uc, s, s_end - s); 47 48 /* Respect the override. */ 49 if (o != NULL && *o != UC_BREAK_UNDEFINED) 50 *p = *o; 51 52 if (*p == UC_BREAK_POSSIBLE || *p == UC_BREAK_MANDATORY) 53 { 54 /* An atomic piece of text ends here. */ 55 if (last_p != NULL && last_column + piece_width > width) 56 { 57 /* Insert a line break. */ 58 *last_p = UC_BREAK_POSSIBLE; 59 last_column = 0; 60 } 61 } 62 63 if (*p == UC_BREAK_MANDATORY) 64 { 65 /* uc is a line break character. */ 66 /* Start a new piece at column 0. */ 67 last_p = NULL; 68 last_column = 0; 69 piece_width = 0; 70 } 71 else 72 { 73 /* uc is not a line break character. */ 74 int w; 75 76 if (*p == UC_BREAK_POSSIBLE) 77 { 78 /* Start a new piece. */ 79 last_p = p; 80 last_column += piece_width; 81 piece_width = 0; 82 /* No line break for the moment, may be turned into 83 UC_BREAK_POSSIBLE later, via last_p. */ 84 } 85 86 *p = UC_BREAK_PROHIBITED; 87 88 w = uc_width (uc, encoding); 89 if (w >= 0) /* ignore control characters in the string */ 90 piece_width += w; 91 } 92 93 s += count; 94 p += count; 95 if (o != NULL) 96 o += count; 97 } 98 99 /* The last atomic piece of text ends here. */ 100 if (last_p != NULL && last_column + piece_width + at_end_columns > width) 101 { 102 /* Insert a line break. */ 103 *last_p = UC_BREAK_POSSIBLE; 104 last_column = 0; 105 } 106 107 return last_column + piece_width; 108} 109 110 111#ifdef TEST 112 113#include <stdio.h> 114#include <stdlib.h> 115#include <string.h> 116 117/* Read the contents of an input stream, and return it, terminated with a NUL 118 byte. */ 119char * 120read_file (FILE *stream) 121{ 122#define BUFSIZE 4096 123 char *buf = NULL; 124 int alloc = 0; 125 int size = 0; 126 int count; 127 128 while (! feof (stream)) 129 { 130 if (size + BUFSIZE > alloc) 131 { 132 alloc = alloc + alloc / 2; 133 if (alloc < size + BUFSIZE) 134 alloc = size + BUFSIZE; 135 buf = realloc (buf, alloc); 136 if (buf == NULL) 137 { 138 fprintf (stderr, "out of memory\n"); 139 exit (1); 140 } 141 } 142 count = fread (buf + size, 1, BUFSIZE, stream); 143 if (count == 0) 144 { 145 if (ferror (stream)) 146 { 147 perror ("fread"); 148 exit (1); 149 } 150 } 151 else 152 size += count; 153 } 154 buf = realloc (buf, size + 1); 155 if (buf == NULL) 156 { 157 fprintf (stderr, "out of memory\n"); 158 exit (1); 159 } 160 buf[size] = '\0'; 161 return buf; 162#undef BUFSIZE 163} 164 165int 166main (int argc, char * argv[]) 167{ 168 if (argc == 2) 169 { 170 /* Insert line breaks for a given width. */ 171 int width = atoi (argv[1]); 172 char *input = read_file (stdin); 173 int length = strlen (input); 174 char *breaks = malloc (length); 175 int i; 176 177 u8_width_linebreaks ((uint8_t *) input, length, width, 0, 0, NULL, "UTF-8", breaks); 178 179 for (i = 0; i < length; i++) 180 { 181 switch (breaks[i]) 182 { 183 case UC_BREAK_POSSIBLE: 184 putc ('\n', stdout); 185 break; 186 case UC_BREAK_MANDATORY: 187 break; 188 case UC_BREAK_PROHIBITED: 189 break; 190 default: 191 abort (); 192 } 193 putc (input[i], stdout); 194 } 195 196 free (breaks); 197 198 return 0; 199 } 200 else 201 return 1; 202} 203 204#endif /* TEST */ 205