Deleted Added
full compact
1/*
2 * Copyright (C) 1984-2000 Mark Nudelman
3 *
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
6 *
7 * For more information about less, or for information on how to
8 * contact the author, see the README file.
9 */
10
11
12/*
13 * Functions to define the character set
14 * and do things specific to the character set.
15 */
16
17#include "less.h"
18#if HAVE_LOCALE
19#include <locale.h>
20#include <ctype.h>
21#endif
22
23public int utf_mode = 0;
24
25/*
26 * Predefined character sets,
27 * selected by the LESSCHARSET environment variable.
28 */
29struct charset {
30 char *name;
31 int *p_flag;
32 char *desc;
33} charsets[] = {
34 { "ascii", NULL, "8bcccbcc18b95.b" },
35 { "dos", NULL, "8bcccbcc12bc5b95.b." },
35 { "dos", NULL, "8bcccbcc12bc5b223.b" },
36 { "ebcdic", NULL, "5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b." },
37 { "IBM-1047", NULL, "4cbcbc3b9cbccbccbb4c6bcc5b3cbbc4bc4bccbc191.b" },
38 { "iso8859", NULL, "8bcccbcc18b95.33b." },
39 { "koi8-r", NULL, "8bcccbcc18b95.b128." },
39 { "latin1", NULL, "8bcccbcc18b95.33b." },
40 { "next", NULL, "8bcccbcc18b95.bb125.bb" },
41 { "utf-8", &utf_mode, "8bcccbcc18b." },
42 { NULL, NULL, NULL }
43};
44
45struct cs_alias {
46 char *name;
47 char *oname;
48} cs_aliases[] = {
49 { "latin1", "iso8859" },
50 { "latin9", "iso8859" },
51 { NULL, NULL }
52};
53
54#define IS_BINARY_CHAR 01
55#define IS_CONTROL_CHAR 02
56
57static char chardef[256];
58static char *binfmt = NULL;
59public int binattr = AT_STANDOUT;
60
61
62/*
63 * Define a charset, given a description string.
64 * The string consists of 256 letters,
65 * one for each character in the charset.
66 * If the string is shorter than 256 letters, missing letters
67 * are taken to be identical to the last one.
68 * A decimal number followed by a letter is taken to be a
69 * repetition of the letter.
70 *
71 * Each letter is one of:
72 * . normal character
73 * b binary character
74 * c control character
75 */
76 static void
77ichardef(s)
78 char *s;
79{
80 register char *cp;
81 register int n;
82 register char v;
83
84 n = 0;
85 v = 0;
86 cp = chardef;
87 while (*s != '\0')
88 {
89 switch (*s++)
90 {
91 case '.':
92 v = 0;
93 break;
94 case 'c':
95 v = IS_CONTROL_CHAR;
96 break;
97 case 'b':
98 v = IS_BINARY_CHAR|IS_CONTROL_CHAR;
99 break;
100
101 case '0': case '1': case '2': case '3': case '4':
102 case '5': case '6': case '7': case '8': case '9':
103 n = (10 * n) + (s[-1] - '0');
104 continue;
105
106 default:
107 error("invalid chardef", NULL_PARG);
108 quit(QUIT_ERROR);
109 /*NOTREACHED*/
110 }
111
112 do
113 {
114 if (cp >= chardef + sizeof(chardef))
115 {
116 error("chardef longer than 256", NULL_PARG);
117 quit(QUIT_ERROR);
118 /*NOTREACHED*/
119 }
120 *cp++ = v;
121 } while (--n > 0);
122 n = 0;
123 }
124
125 while (cp < chardef + sizeof(chardef))
126 *cp++ = v;
127}
128
129/*
130 * Define a charset, given a charset name.
131 * The valid charset names are listed in the "charsets" array.
132 */
133 static int
134icharset(name)
135 register char *name;
136{
137 register struct charset *p;
138 register struct cs_alias *a;
139
140 if (name == NULL || *name == '\0')
141 return (0);
142
143 /* First see if the name is an alias. */
144 for (a = cs_aliases; a->name != NULL; a++)
145 {
146 if (strcmp(name, a->name) == 0)
147 {
148 name = a->oname;
149 break;
150 }
151 }
152
153 for (p = charsets; p->name != NULL; p++)
154 {
155 if (strcmp(name, p->name) == 0)
156 {
157 ichardef(p->desc);
158 if (p->p_flag != NULL)
159 *(p->p_flag) = 1;
160 return (1);
161 }
162 }
163
164 error("invalid charset name", NULL_PARG);
165 quit(QUIT_ERROR);
166 /*NOTREACHED*/
167}
168
169#if HAVE_LOCALE
170/*
171 * Define a charset, given a locale name.
172 */
173 static void
174ilocale()
175{
176 register int c;
177
178 setlocale(LC_ALL, "");
179 for (c = 0; c < (int) sizeof(chardef); c++)
180 {
181 if (isprint(c))
182 chardef[c] = 0;
183 else if (iscntrl(c))
184 chardef[c] = IS_CONTROL_CHAR;
185 else
186 chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR;
187 }
188}
189#endif
190
191/*
192 * Define the printing format for control chars.
193 */
194 public void
195setbinfmt(s)
196 char *s;
197{
198 if (s == NULL || *s == '\0')
199 s = "*s<%X>";
200 /*
201 * Select the attributes if it starts with "*".
202 */
203 if (*s == '*')
204 {
205 switch (s[1])
206 {
207 case 'd': binattr = AT_BOLD; break;
208 case 'k': binattr = AT_BLINK; break;
209 case 's': binattr = AT_STANDOUT; break;
210 case 'u': binattr = AT_UNDERLINE; break;
211 default: binattr = AT_NORMAL; break;
212 }
213 s += 2;
214 }
215 binfmt = s;
216}
217
218/*
219 * Initialize charset data structures.
220 */
221 public void
222init_charset()
223{
224 register char *s;
225
226 s = lgetenv("LESSBINFMT");
227 setbinfmt(s);
228
229 /*
230 * See if environment variable LESSCHARSET is defined.
231 */
232 s = lgetenv("LESSCHARSET");
233 if (icharset(s))
234 return;
235 /*
236 * LESSCHARSET is not defined: try LESSCHARDEF.
237 */
238 s = lgetenv("LESSCHARDEF");
239 if (s != NULL && *s != '\0')
240 {
241 ichardef(s);
242 return;
243 }
244
245#if HAVE_STRSTR
246 /*
247 * Check whether LC_ALL, LC_CTYPE or LANG look like UTF-8 is used.
248 */
249 if ((s = lgetenv("LC_ALL")) != NULL ||
250 (s = lgetenv("LC_CTYPE")) != NULL ||
251 (s = lgetenv("LANG")) != NULL)
252 {
253 if (strstr(s, "UTF-8") != NULL || strstr(s, "utf-8") != NULL)
254 if (icharset("utf-8"))
255 return;
256 }
257#endif
258
259#if HAVE_LOCALE
260 /*
261 * Use setlocale.
262 */
263 ilocale();
264#else
265#if MSDOS_COMPILER
266 /*
267 * Default to "dos".
268 */
269 (void) icharset("dos");
270#else
271 /*
272 * Default to "latin1".
273 */
274 (void) icharset("latin1");
275#endif
276#endif
277}
278
279/*
280 * Is a given character a "binary" character?
281 */
282 public int
283binary_char(c)
284 unsigned char c;
285{
286 c &= 0377;
287 return (chardef[c] & IS_BINARY_CHAR);
288}
289
290/*
291 * Is a given character a "control" character?
292 */
293 public int
294control_char(c)
295 int c;
296{
297 c &= 0377;
298 return (chardef[c] & IS_CONTROL_CHAR);
299}
300
301/*
302 * Return the printable form of a character.
303 * For example, in the "ascii" charset '\3' is printed as "^C".
304 */
305 public char *
306prchar(c)
307 int c;
308{
309 static char buf[8];
310
311 c &= 0377;
312 if (!control_char(c))
313 sprintf(buf, "%c", c);
314 else if (c == ESC)
315 sprintf(buf, "ESC");
289 else if (c < 128 && !control_char(c ^ 0100))
290 sprintf(buf, "^%c", c ^ 0100);
316#if IS_EBCDIC_HOST
317 else if (!binary_char(c) && c < 64)
318 sprintf(buf, "^%c",
319 /*
320 * This array roughly inverts CONTROL() #defined in less.h,
321 * and should be kept in sync with CONTROL() and IBM-1047.
322 */
323 "@ABC.I.?...KLMNO"
324 "PQRS.JH.XY.."
325 "\\]^_"
326 "......W[.....EFG"
327 "..V....D....TU.Z"[c]);
328#else
329 else if (c < 128 && !control_char(c ^ 0100))
330 sprintf(buf, "^%c", c ^ 0100);
331#endif
332 else
333 sprintf(buf, binfmt, c);
334 return (buf);
335}