Deleted Added
sdiff udiff text old ( 128345 ) new ( 161475 )
full compact
1/*
2 * Copyright (C) 1984-2002 Mark Nudelman
3 *
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
6 *
7 * For more information about less, or for information on how to
8 * contact the author, see the README file.
9 */
10
11
12/*
13 * Functions to define the character set
14 * and do things specific to the character set.
15 */
16
17#include "less.h"
18#if HAVE_LOCALE
19#include <locale.h>
20#include <ctype.h>
21#endif
22
23public int utf_mode = 0;
24
25/*
26 * Predefined character sets,
27 * selected by the LESSCHARSET environment variable.
28 */
29struct charset {
30 char *name;
31 int *p_flag;
32 char *desc;
33} charsets[] = {
34 { "ascii", NULL, "8bcccbcc18b95.b" },
35 { "dos", NULL, "8bcccbcc12bc5b223.b" },
36 { "ebcdic", NULL, "5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b." },
37 { "IBM-1047", NULL, "4cbcbc3b9cbccbccbb4c6bcc5b3cbbc4bc4bccbc191.b" },
38 { "iso8859", NULL, "8bcccbcc18b95.33b." },
39 { "koi8-r", NULL, "8bcccbcc18b95.b128." },
40 { "next", NULL, "8bcccbcc18b95.bb125.bb" },
41 { "utf-8", &utf_mode, "8bcccbcc18b." },
42 { NULL, NULL, NULL }
43};
44
45struct cs_alias {
46 char *name;
47 char *oname;
48} cs_aliases[] = {
49 { "latin1", "iso8859" },
50 { "latin9", "iso8859" },
51 { NULL, NULL }
52};
53
54#define IS_BINARY_CHAR 01
55#define IS_CONTROL_CHAR 02
56
57static char chardef[256];
58static char *binfmt = NULL;
59public int binattr = AT_STANDOUT;
60
61
62/*
63 * Define a charset, given a description string.
64 * The string consists of 256 letters,
65 * one for each character in the charset.
66 * If the string is shorter than 256 letters, missing letters

--- 59 unchanged lines hidden (view full) ---

126 *cp++ = v;
127}
128
129/*
130 * Define a charset, given a charset name.
131 * The valid charset names are listed in the "charsets" array.
132 */
133 static int
134icharset(name)
135 register char *name;
136{
137 register struct charset *p;
138 register struct cs_alias *a;
139
140 if (name == NULL || *name == '\0')
141 return (0);
142
143 /* First see if the name is an alias. */

--- 12 unchanged lines hidden (view full) ---

156 {
157 ichardef(p->desc);
158 if (p->p_flag != NULL)
159 *(p->p_flag) = 1;
160 return (1);
161 }
162 }
163
164 error("invalid charset name", NULL_PARG);
165 quit(QUIT_ERROR);
166 /*NOTREACHED*/
167 return (0);
168}
169
170#if HAVE_LOCALE
171/*
172 * Define a charset, given a locale name.
173 */
174 static void
175ilocale()
176{
177 register int c;
178
179 setlocale(LC_ALL, "");
180 for (c = 0; c < (int) sizeof(chardef); c++)
181 {
182 if (isprint(c))
183 chardef[c] = 0;
184 else if (iscntrl(c))
185 chardef[c] = IS_CONTROL_CHAR;
186 else
187 chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR;
188 }
189}
190#endif
191
192/*
193 * Define the printing format for control chars.
194 */
195 public void
196setbinfmt(s)
197 char *s;
198{
199 if (s == NULL || *s == '\0')
200 s = "*s<%X>";
201 /*
202 * Select the attributes if it starts with "*".
203 */
204 if (*s == '*')
205 {
206 switch (s[1])
207 {
208 case 'd': binattr = AT_BOLD; break;
209 case 'k': binattr = AT_BLINK; break;
210 case 's': binattr = AT_STANDOUT; break;
211 case 'u': binattr = AT_UNDERLINE; break;
212 default: binattr = AT_NORMAL; break;
213 }
214 s += 2;
215 }
216 binfmt = s;
217}
218
219/*
220 * Initialize charset data structures.
221 */
222 public void
223init_charset()
224{
225 register char *s;
226
227 s = lgetenv("LESSBINFMT");
228 setbinfmt(s);
229
230 /*
231 * See if environment variable LESSCHARSET is defined.
232 */
233 s = lgetenv("LESSCHARSET");
234 if (icharset(s))
235 return;
236 /*
237 * LESSCHARSET is not defined: try LESSCHARDEF.
238 */
239 s = lgetenv("LESSCHARDEF");
240 if (s != NULL && *s != '\0')
241 {
242 ichardef(s);
243 return;
244 }
245
246#if HAVE_STRSTR
247 /*
248 * Check whether LC_ALL, LC_CTYPE or LANG look like UTF-8 is used.
249 */
250 if ((s = lgetenv("LC_ALL")) != NULL ||
251 (s = lgetenv("LC_CTYPE")) != NULL ||
252 (s = lgetenv("LANG")) != NULL)
253 {
254 if (strstr(s, "UTF-8") != NULL || strstr(s, "utf-8") != NULL)
255 if (icharset("utf-8"))
256 return;
257 }
258#endif
259
260#if HAVE_LOCALE
261 /*
262 * Use setlocale.
263 */
264 ilocale();
265#else
266#if MSDOS_COMPILER
267 /*
268 * Default to "dos".
269 */
270 (void) icharset("dos");
271#else
272 /*
273 * Default to "latin1".
274 */
275 (void) icharset("latin1");
276#endif
277#endif
278}
279
280/*
281 * Is a given character a "binary" character?
282 */
283 public int
284binary_char(c)
285 unsigned char c;
286{
287 c &= 0377;
288 return (chardef[c] & IS_BINARY_CHAR);

--- 13 unchanged lines hidden (view full) ---

302/*
303 * Return the printable form of a character.
304 * For example, in the "ascii" charset '\3' is printed as "^C".
305 */
306 public char *
307prchar(c)
308 int c;
309{
310 static char buf[8];
311
312 c &= 0377;
313 if (!control_char(c))
314 sprintf(buf, "%c", c);
315 else if (c == ESC)
316 sprintf(buf, "ESC");
317#if IS_EBCDIC_HOST
318 else if (!binary_char(c) && c < 64)
319 sprintf(buf, "^%c",
320 /*
321 * This array roughly inverts CONTROL() #defined in less.h,
322 * and should be kept in sync with CONTROL() and IBM-1047.
323 */
324 "@ABC.I.?...KLMNO"
325 "PQRS.JH.XY.."
326 "\\]^_"
327 "......W[.....EFG"
328 "..V....D....TU.Z"[c]);
329#else
330 else if (c < 128 && !control_char(c ^ 0100))
331 sprintf(buf, "^%c", c ^ 0100);
332#endif
333 else
334 sprintf(buf, binfmt, c);
335 return (buf);
336}