1/*
2 * compat.c - compatibility routines for the deprived
3 *
4 * This file is part of zsh, the Z shell.
5 *
6 * Copyright (c) 1992-1997 Paul Falstad
7 * All rights reserved.
8 *
9 * Permission is hereby granted, without written agreement and without
10 * license or royalty fees, to use, copy, modify, and distribute this
11 * software and to distribute modified versions of this software for any
12 * purpose, provided that the above copyright notice and the following
13 * two paragraphs appear in all copies of this software.
14 *
15 * In no event shall Paul Falstad or the Zsh Development Group be liable
16 * to any party for direct, indirect, special, incidental, or consequential
17 * damages arising out of the use of this software and its documentation,
18 * even if Paul Falstad and the Zsh Development Group have been advised of
19 * the possibility of such damage.
20 *
21 * Paul Falstad and the Zsh Development Group specifically disclaim any
22 * warranties, including, but not limited to, the implied warranties of
23 * merchantability and fitness for a particular purpose.  The software
24 * provided hereunder is on an "as is" basis, and Paul Falstad and the
25 * Zsh Development Group have no obligation to provide maintenance,
26 * support, updates, enhancements, or modifications.
27 *
28 */
29
30#include "zsh.mdh"
31#include "compat.pro"
32
33/* Return pointer to first occurence of string t *
34 * in string s.  Return NULL if not present.     */
35
36#ifndef HAVE_STRSTR
37char *
38strstr(const char *s, const char *t)
39{
40    char *p1, *p2;
41
42    for (; *s; s++) {
43        for (p1 = s, p2 = t; *p2; p1++, p2++)
44            if (*p1 != *p2)
45                break;
46        if (!*p2)
47            return (char *)s;
48    }
49    return NULL;
50}
51#endif
52
53
54#ifndef HAVE_GETHOSTNAME
55int
56gethostname(char *name, size_t namelen)
57{
58    struct utsname uts;
59
60    uname(&uts);
61    if(strlen(uts.nodename) >= namelen) {
62	errno = EINVAL;
63	return -1;
64    }
65    strcpy(name, uts.nodename);
66    return 0;
67}
68#endif
69
70
71#ifndef HAVE_GETTIMEOFDAY
72int
73gettimeofday(struct timeval *tv, struct timezone *tz)
74{
75    tv->tv_usec = 0;
76    tv->tv_sec = (long)time((time_t) 0);
77    return 0;
78}
79#endif
80
81
82/* compute the difference between two calendar times */
83
84#ifndef HAVE_DIFFTIME
85double
86difftime(time_t t2, time_t t1)
87{
88    return ((double)t2 - (double)t1);
89}
90#endif
91
92
93#ifndef HAVE_STRERROR
94extern char *sys_errlist[];
95
96/* Get error message string associated with a particular  *
97 * error number, and returns a pointer to that string.    *
98 * This is not a particularly robust version of strerror. */
99
100char *
101strerror(int errnum)
102{
103    return (sys_errlist[errnum]);
104}
105#endif
106
107
108#if 0
109/* pathconf(_PC_PATH_MAX) is not currently useful to zsh.  The value *
110 * returned varies depending on a number of factors, e.g. the amount *
111 * of memory available to the operating system at a given time; thus *
112 * it can't be used for buffer allocation, or even as an indication  *
113 * of whether an attempt to use or create a given pathname may fail  *
114 * at any future time.                                               *
115 *                                                                   *
116 * The call is also permitted to fail if the argument path is not an *
117 * existing directory, so even to make sense of that one must search *
118 * for a valid directory somewhere in the path and adjust.  Even if  *
119 * it succeeds, the return value is relative to the input directory, *
120 * and therefore potentially relative to the length of the shortest  *
121 * path either to that directory or to our working directory.        *
122 *                                                                   *
123 * Finally, see the note below for glibc; detection of pathconf() is *
124 * not by itself an indication that it works reliably.               */
125
126/* The documentation for pathconf() says something like:             *
127 *     The limit is returned, if one exists.  If the system  does    *
128 *     not  have  a  limit  for  the  requested  resource,  -1 is    *
129 *     returned, and errno is unchanged.  If there is  an  error,    *
130 *     -1  is returned, and errno is set to reflect the nature of    *
131 *     the error.                                                    *
132 *                                                                   *
133 * System calls are not permitted to set errno to 0; but we must (or *
134 * some other flag value) in order to determine that the resource is *
135 * unlimited.  What use is leaving errno unchanged?  Instead, define *
136 * a wrapper that resets errno to 0 and returns 0 for "the system    *
137 * does not have a limit," so that -1 always means a real error.     */
138
139/**/
140mod_export long
141zpathmax(char *dir)
142{
143#ifdef HAVE_PATHCONF
144    long pathmax;
145
146    errno = 0;
147    if ((pathmax = pathconf(dir, _PC_PATH_MAX)) >= 0) {
148	/* Some versions of glibc pathconf return a hardwired value! */
149	return pathmax;
150    } else if (errno == EINVAL || errno == ENOENT || errno == ENOTDIR) {
151	/* Work backward to find a directory, until we run out of path. */
152	char *tail = strrchr(dir, '/');
153	while (tail > dir && tail[-1] == '/')
154	    --tail;
155	if (tail > dir) {
156	    *tail = 0;
157	    pathmax = zpathmax(dir);
158	    *tail = '/';
159	} else {
160	    errno = 0;
161	    if (tail)
162		pathmax = pathconf("/", _PC_PATH_MAX);
163	    else
164		pathmax = pathconf(".", _PC_PATH_MAX);
165	}
166	if (pathmax > 0) {
167	    long taillen = (tail ? strlen(tail) : (strlen(dir) + 1));
168	    if (taillen < pathmax)
169		return pathmax - taillen;
170	    else
171		errno = ENAMETOOLONG;
172	}
173    }
174    if (errno)
175	return -1;
176    else
177	return 0; /* pathmax should be considered unlimited */
178#else
179    long dirlen = strlen(dir);
180
181    /* The following is wrong if dir is not an absolute path. */
182    return ((long) ((dirlen >= PATH_MAX) ?
183		    ((errno = ENAMETOOLONG), -1) :
184		    ((errno = 0), PATH_MAX - dirlen)));
185#endif
186}
187#endif /* 0 */
188
189#ifdef HAVE_SYSCONF
190/*
191 * This is replaced by a macro from system.h if not HAVE_SYSCONF.
192 * 0 is returned by sysconf if _SC_OPEN_MAX is unavailable;
193 * -1 is returned on error
194 *
195 * Neither of these should happen, but resort to OPEN_MAX rather
196 * than return 0 or -1 just in case.
197 *
198 * We'll limit the open maximum to ZSH_INITIAL_OPEN_MAX to
199 * avoid probing ridiculous numbers of file descriptors.
200 */
201
202/**/
203mod_export long
204zopenmax(void)
205{
206    long openmax;
207
208    if ((openmax = sysconf(_SC_OPEN_MAX)) < 1) {
209	openmax = OPEN_MAX;
210    } else if (openmax > OPEN_MAX) {
211	/* On some systems, "limit descriptors unlimited" or the  *
212	 * equivalent will set openmax to a huge number.  Unless  *
213	 * there actually is a file descriptor > OPEN_MAX already *
214	 * open, nothing in zsh requires the true maximum, and in *
215	 * fact it causes inefficiency elsewhere if we report it. *
216	 * So, report the maximum of OPEN_MAX or the largest open *
217	 * descriptor (is there a better way to find that?).      */
218	long i, j = OPEN_MAX;
219	if (openmax > ZSH_INITIAL_OPEN_MAX)
220	    openmax = ZSH_INITIAL_OPEN_MAX;
221	for (i = j; i < openmax; i += (errno != EINTR)) {
222	    errno = 0;
223	    if (fcntl(i, F_GETFL, 0) < 0 &&
224		(errno == EBADF || errno == EINTR))
225		continue;
226	    j = i;
227	}
228	openmax = j;
229    }
230
231    return (max_zsh_fd > openmax) ? max_zsh_fd : openmax;
232}
233#endif
234
235/*
236 * Rationalise the current directory, returning the string.
237 *
238 * If "d" is not NULL, it is used to store information about the
239 * directory.  The returned name is also present in d->dirname and is in
240 * permanently allocated memory.  The handling of this case depends on
241 * whether the fchdir() system call is available; if it is, it is assumed
242 * the caller is able to restore the current directory.  On successfully
243 * identifying the directory the function returns immediately rather
244 * than ascending the hierarchy.
245 *
246 * If "d" is NULL, no assumption about the caller's behaviour is
247 * made.  The returned string is in heap memory.  This case is
248 * always handled by changing directory up the hierarchy.
249 *
250 * On Cygwin or other systems where USE_GETCWD is defined (at the
251 * time of writing only QNX), we skip all the above and use the
252 * getcwd() system call.
253 */
254
255/**/
256mod_export char *
257zgetdir(struct dirsav *d)
258{
259    char nbuf[PATH_MAX+3];
260    char *buf;
261    int bufsiz, pos;
262    struct stat sbuf;
263    ino_t pino;
264    dev_t pdev;
265#if !defined(__CYGWIN__) && !defined(USE_GETCWD)
266    struct dirent *de;
267    DIR *dir;
268    dev_t dev;
269    ino_t ino;
270    int len;
271#endif
272
273    buf = zhalloc(bufsiz = PATH_MAX);
274    pos = bufsiz - 1;
275    buf[pos] = '\0';
276    strcpy(nbuf, "../");
277    if (stat(".", &sbuf) < 0) {
278	return NULL;
279    }
280
281    /* Record the initial inode and device */
282    pino = sbuf.st_ino;
283    pdev = sbuf.st_dev;
284    if (d)
285	d->ino = pino, d->dev = pdev;
286#if !defined(__CYGWIN__) && !defined(USE_GETCWD)
287#ifdef HAVE_FCHDIR
288    else
289#endif
290	holdintr();
291
292    for (;;) {
293	/* Examine the parent of the current directory. */
294	if (stat("..", &sbuf) < 0)
295	    break;
296
297	/* Inode and device of curtent directory */
298	ino = pino;
299	dev = pdev;
300	/* Inode and device of current directory's parent */
301	pino = sbuf.st_ino;
302	pdev = sbuf.st_dev;
303
304	/* If they're the same, we've reached the root directory. */
305	if (ino == pino && dev == pdev) {
306	    if (!buf[pos])
307		buf[--pos] = '/';
308	    if (d) {
309#ifndef HAVE_FCHDIR
310		zchdir(buf + pos);
311		noholdintr();
312#endif
313		return d->dirname = ztrdup(buf + pos);
314	    }
315	    zchdir(buf + pos);
316	    noholdintr();
317	    return buf + pos;
318	}
319
320	/* Search the parent for the current directory. */
321	if (!(dir = opendir("..")))
322	    break;
323
324	while ((de = readdir(dir))) {
325	    char *fn = de->d_name;
326	    /* Ignore `.' and `..'. */
327	    if (fn[0] == '.' &&
328		(fn[1] == '\0' ||
329		 (fn[1] == '.' && fn[2] == '\0')))
330		continue;
331#ifdef HAVE_STRUCT_DIRENT_D_STAT
332	    if(de->d_stat.st_dev == dev && de->d_stat.st_ino == ino) {
333		/* Found the directory we're currently in */
334		strncpy(nbuf + 3, fn, PATH_MAX);
335		break;
336	    }
337#else /* !HAVE_STRUCT_DIRENT_D_STAT */
338# ifdef HAVE_STRUCT_DIRENT_D_INO
339	    if (dev != pdev || (ino_t) de->d_ino == ino)
340# endif /* HAVE_STRUCT_DIRENT_D_INO */
341	    {
342		/* Maybe found directory, need to check device & inode */
343		strncpy(nbuf + 3, fn, PATH_MAX);
344		lstat(nbuf, &sbuf);
345		if (sbuf.st_dev == dev && sbuf.st_ino == ino)
346		    break;
347	    }
348#endif /* !HAVE_STRUCT_DIRENT_D_STAT */
349	}
350	closedir(dir);
351	if (!de)
352	    break;		/* Not found */
353	/*
354	 * We get the "/" free just by copying from nbuf+2 instead
355	 * of nbuf+3, which is where we copied the path component.
356	 * This means buf[pos] is always a "/".
357	 */
358	len = strlen(nbuf + 2);
359	pos -= len;
360	while (pos <= 1) {
361	    char *newbuf = zhalloc(2*bufsiz);
362	    memcpy(newbuf + bufsiz, buf, bufsiz);
363	    buf = newbuf;
364	    pos += bufsiz;
365	    bufsiz *= 2;
366	}
367	memcpy(buf + pos, nbuf + 2, len);
368#ifdef HAVE_FCHDIR
369	if (d)
370	    return d->dirname = ztrdup(buf + pos + 1);
371#endif
372	if (chdir(".."))
373	    break;
374    }
375
376    /*
377     * Fix up the directory, if necessary.
378     * We're changing back down the hierarchy, ignore the
379     * "/" at buf[pos].
380     */
381    if (d) {
382#ifndef HAVE_FCHDIR
383	if (buf[pos])
384	    zchdir(buf + pos + 1);
385	noholdintr();
386#endif
387	return NULL;
388    }
389
390    if (buf[pos])
391	zchdir(buf + pos + 1);
392    noholdintr();
393
394#else  /* __CYGWIN__, USE_GETCWD cases */
395
396    if (!getcwd(buf, bufsiz)) {
397	if (d) {
398	    return NULL;
399	}
400    } else {
401	if (d) {
402	    return d->dirname = ztrdup(buf);
403	}
404	return buf;
405    }
406#endif
407
408    /*
409     * Something bad happened.
410     * This has been seen when inside a special directory,
411     * such as the Netapp .snapshot directory, that doesn't
412     * appear as a directory entry in the parent directory.
413     * We'll just need our best guess.
414     *
415     * We only get here from zgetcwd(); let that fall back to pwd.
416     */
417
418    return NULL;
419}
420
421/*
422 * Try to find the current directory.
423 * If we couldn't work it out internally, fall back to getcwd().
424 * If it fails, fall back to pwd; if zgetcwd() is being used
425 * to set pwd, pwd should be NULL and we just return ".".
426 */
427
428/**/
429char *
430zgetcwd(void)
431{
432    char *ret = zgetdir(NULL);
433#ifdef HAVE_GETCWD
434    if (!ret) {
435#ifdef GETCWD_CALLS_MALLOC
436	char *cwd = getcwd(NULL, 0);
437	if (cwd) {
438	    ret = dupstring(cwd);
439	    free(cwd);
440	}
441#else
442	char *cwdbuf = zalloc(PATH_MAX);
443	ret = getcwd(cwdbuf, PATH_MAX);
444	if (ret)
445	    ret = dupstring(ret);
446	free(cwdbuf);
447#endif /* GETCWD_CALLS_MALLOC */
448    }
449#endif /* HAVE_GETCWD */
450    if (!ret)
451	ret = pwd;
452    if (!ret)
453	ret = dupstring(".");
454    return ret;
455}
456
457/* chdir with arbitrary long pathname.  Returns 0 on success, -1 on normal *
458 * failure and -2 when chdir failed and the current directory is lost.  */
459
460/**/
461mod_export int
462zchdir(char *dir)
463{
464    char *s;
465    int currdir = -2;
466
467    for (;;) {
468	if (!*dir || chdir(dir) == 0) {
469#ifdef HAVE_FCHDIR
470           if (currdir >= 0)
471               close(currdir);
472#endif
473	    return 0;
474	}
475	if ((errno != ENAMETOOLONG && errno != ENOMEM) ||
476	    strlen(dir) < PATH_MAX)
477	    break;
478	for (s = dir + PATH_MAX - 1; s > dir && *s != '/'; s--)
479	    ;
480	if (s == dir)
481	    break;
482#ifdef HAVE_FCHDIR
483	if (currdir == -2)
484	    currdir = open(".", O_RDONLY|O_NOCTTY);
485#endif
486	*s = '\0';
487	if (chdir(dir) < 0) {
488	    *s = '/';
489	    break;
490	}
491#ifndef HAVE_FCHDIR
492	currdir = -1;
493#endif
494	*s = '/';
495	while (*++s == '/')
496	    ;
497	dir = s;
498    }
499#ifdef HAVE_FCHDIR
500    if (currdir >= 0) {
501	if (fchdir(currdir) < 0) {
502	    close(currdir);
503	    return -2;
504	}
505	close(currdir);
506	return -1;
507    }
508#endif
509    return currdir == -2 ? -1 : -2;
510}
511
512/*
513 * How to print out a 64 bit integer.  This isn't needed (1) if longs
514 * are 64 bit, since ordinary %ld will work (2) if we couldn't find a
515 * 64 bit type anyway.
516 */
517/**/
518#ifdef ZSH_64_BIT_TYPE
519/**/
520mod_export char *
521output64(zlong val)
522{
523    static char llbuf[DIGBUFSIZE];
524    convbase(llbuf, val, 0);
525    return llbuf;
526}
527/**/
528#endif /* ZSH_64_BIT_TYPE */
529
530#ifndef HAVE_STRTOUL
531
532/*
533 * Copyright (c) 1990, 1993
534 *	The Regents of the University of California.  All rights reserved.
535 *
536 * Redistribution and use in source and binary forms, with or without
537 * modification, are permitted provided that the following conditions
538 * are met:
539 * 1. Redistributions of source code must retain the above copyright
540 *    notice, this list of conditions and the following disclaimer.
541 * 2. Redistributions in binary form must reproduce the above copyright
542 *    notice, this list of conditions and the following disclaimer in the
543 *    documentation and/or other materials provided with the distribution.
544 * 3. Neither the name of the University nor the names of its contributors
545 *    may be used to endorse or promote products derived from this software
546 *    without specific prior written permission.
547 *
548 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
549 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
550 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
551 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
552 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
553 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
554 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
555 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
556 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
557 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
558 * SUCH DAMAGE.
559 */
560
561/*
562 * Convert a string to an unsigned long integer.
563 *
564 * Ignores `locale' stuff.  Assumes that the upper and lower case
565 * alphabets and digits are each contiguous.
566 */
567unsigned long
568strtoul(nptr, endptr, base)
569	const char *nptr;
570	char **endptr;
571	int base;
572{
573	const char *s;
574	unsigned long acc, cutoff;
575	int c;
576	int neg, any, cutlim;
577
578	/* endptr may be NULL */
579
580	s = nptr;
581	do {
582		c = (unsigned char) *s++;
583	} while (isspace(c));
584	if (c == '-') {
585		neg = 1;
586		c = *s++;
587	} else {
588		neg = 0;
589		if (c == '+')
590			c = *s++;
591	}
592	if ((base == 0 || base == 16) &&
593	    c == '0' && (*s == 'x' || *s == 'X')) {
594		c = s[1];
595		s += 2;
596		base = 16;
597	}
598	if (base == 0)
599		base = c == '0' ? 8 : 10;
600
601	cutoff = ULONG_MAX / (unsigned long)base;
602	cutlim = (int)(ULONG_MAX % (unsigned long)base);
603	for (acc = 0, any = 0;; c = (unsigned char) *s++) {
604		if (isdigit(c))
605			c -= '0';
606		else if (isalpha(c)) {
607			c -= isupper(c) ? 'A' - 10 : 'a' - 10;
608		} else
609			break;
610		if (c >= base)
611			break;
612		if (any < 0)
613			continue;
614		if (acc > cutoff || (acc == cutoff && c > cutlim)) {
615			any = -1;
616			acc = ULONG_MAX;
617			errno = ERANGE;
618		} else {
619			any = 1;
620			acc *= (unsigned long)base;
621			acc += c;
622		}
623	}
624	if (neg && any > 0)
625		acc = -acc;
626	if (endptr != NULL)
627		*endptr = any ? s - 1 : nptr;
628	return (acc);
629}
630#endif /* HAVE_STRTOUL */
631
632/**/
633#if defined(BROKEN_WCWIDTH) && (defined(__STDC_ISO_10646__) || defined(__APPLE__))
634
635/*
636 * This is an implementation of wcwidth() and wcswidth() (defined in
637 * IEEE Std 1002.1-2001) for Unicode.
638 *
639 * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
640 * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
641 *
642 * In fixed-width output devices, Latin characters all occupy a single
643 * "cell" position of equal width, whereas ideographic CJK characters
644 * occupy two such cells. Interoperability between terminal-line
645 * applications and (teletype-style) character terminals using the
646 * UTF-8 encoding requires agreement on which character should advance
647 * the cursor by how many cell positions. No established formal
648 * standards exist at present on which Unicode character shall occupy
649 * how many cell positions on character terminals. These routines are
650 * a first attempt of defining such behavior based on simple rules
651 * applied to data provided by the Unicode Consortium.
652 *
653 * For some graphical characters, the Unicode standard explicitly
654 * defines a character-cell width via the definition of the East Asian
655 * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.
656 * In all these cases, there is no ambiguity about which width a
657 * terminal shall use. For characters in the East Asian Ambiguous (A)
658 * class, the width choice depends purely on a preference of backward
659 * compatibility with either historic CJK or Western practice.
660 * Choosing single-width for these characters is easy to justify as
661 * the appropriate long-term solution, as the CJK practice of
662 * displaying these characters as double-width comes from historic
663 * implementation simplicity (8-bit encoded characters were displayed
664 * single-width and 16-bit ones double-width, even for Greek,
665 * Cyrillic, etc.) and not any typographic considerations.
666 *
667 * Much less clear is the choice of width for the Not East Asian
668 * (Neutral) class. Existing practice does not dictate a width for any
669 * of these characters. It would nevertheless make sense
670 * typographically to allocate two character cells to characters such
671 * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be
672 * represented adequately with a single-width glyph. The following
673 * routines at present merely assign a single-cell width to all
674 * neutral characters, in the interest of simplicity. This is not
675 * entirely satisfactory and should be reconsidered before
676 * establishing a formal standard in this area. At the moment, the
677 * decision which Not East Asian (Neutral) characters should be
678 * represented by double-width glyphs cannot yet be answered by
679 * applying a simple rule from the Unicode database content. Setting
680 * up a proper standard for the behavior of UTF-8 character terminals
681 * will require a careful analysis not only of each Unicode character,
682 * but also of each presentation form, something the author of these
683 * routines has avoided to do so far.
684 *
685 * http://www.unicode.org/unicode/reports/tr11/
686 *
687 * Markus Kuhn -- 2007-05-26 (Unicode 5.0)
688 *
689 * Permission to use, copy, modify, and distribute this software
690 * for any purpose and without fee is hereby granted. The author
691 * disclaims all warranties with regard to this software.
692 *
693 * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
694 */
695
696struct interval {
697  int first;
698  int last;
699};
700
701/* auxiliary function for binary search in interval table */
702static int bisearch(wchar_t ucs, const struct interval *table, int max) {
703  int min = 0;
704  int mid;
705
706  if (ucs < table[0].first || ucs > table[max].last)
707    return 0;
708  while (max >= min) {
709    mid = (min + max) / 2;
710    if (ucs > table[mid].last)
711      min = mid + 1;
712    else if (ucs < table[mid].first)
713      max = mid - 1;
714    else
715      return 1;
716  }
717
718  return 0;
719}
720
721
722/* The following two functions define the column width of an ISO 10646
723 * character as follows:
724 *
725 *    - The null character (U+0000) has a column width of 0.
726 *
727 *    - Other C0/C1 control characters and DEL will lead to a return
728 *      value of -1.
729 *
730 *    - Non-spacing and enclosing combining characters (general
731 *      category code Mn or Me in the Unicode database) have a
732 *      column width of 0.
733 *
734 *    - SOFT HYPHEN (U+00AD) has a column width of 1.
735 *
736 *    - Other format characters (general category code Cf in the Unicode
737 *      database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
738 *
739 *    - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
740 *      have a column width of 0.
741 *
742 *    - Spacing characters in the East Asian Wide (W) or East Asian
743 *      Full-width (F) category as defined in Unicode Technical
744 *      Report #11 have a column width of 2.
745 *
746 *    - All remaining characters (including all printable
747 *      ISO 8859-1 and WGL4 characters, Unicode control characters,
748 *      etc.) have a column width of 1.
749 *
750 * This implementation assumes that wchar_t characters are encoded
751 * in ISO 10646.
752 */
753
754/**/
755int
756mk_wcwidth(wchar_t ucs)
757{
758  /* sorted list of non-overlapping intervals of non-spacing characters */
759  /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
760  static const struct interval combining[] = {
761    { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
762    { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
763    { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
764    { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 },
765    { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
766    { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
767    { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 },
768    { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D },
769    { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 },
770    { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD },
771    { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C },
772    { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
773    { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC },
774    { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD },
775    { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
776    { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
777    { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
778    { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
779    { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC },
780    { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
781    { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D },
782    { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
783    { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
784    { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
785    { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
786    { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
787    { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 },
788    { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
789    { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
790    { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F },
791    { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
792    { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
793    { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
794    { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
795    { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
796    { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 },
797    { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
798    { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
799    { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
800    { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F },
801    { 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B },
802    { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F },
803    { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB },
804    { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F },
805    { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 },
806    { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD },
807    { 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F },
808    { 0xE0100, 0xE01EF }
809  };
810
811  /* test for 8-bit control characters */
812  if (ucs == 0)
813    return 0;
814  if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
815    return -1;
816
817  /* binary search in table of non-spacing characters */
818  if (bisearch(ucs, combining,
819	       sizeof(combining) / sizeof(struct interval) - 1))
820    return 0;
821
822  /* if we arrive here, ucs is not a combining or C0/C1 control character */
823
824  return 1 +
825    (ucs >= 0x1100 &&
826     (ucs <= 0x115f ||                    /* Hangul Jamo init. consonants */
827      ucs == 0x2329 || ucs == 0x232a ||
828      (ucs >= 0x2e80 && ucs <= 0xa4cf &&
829       ucs != 0x303f) ||                  /* CJK ... Yi */
830      (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
831      (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
832      (ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */
833      (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
834      (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */
835      (ucs >= 0xffe0 && ucs <= 0xffe6) ||
836      (ucs >= 0x20000 && ucs <= 0x2fffd) ||
837      (ucs >= 0x30000 && ucs <= 0x3fffd)));
838}
839
840
841/*
842 * The following functions are part of the original wcwidth.c:
843 * we don't use them but I've kept them in case - pws.
844 */
845#if 0
846int mk_wcswidth(const wchar_t *pwcs, size_t n)
847{
848  int w, width = 0;
849
850  for (;*pwcs && n-- > 0; pwcs++)
851    if ((w = mk_wcwidth(*pwcs)) < 0)
852      return -1;
853    else
854      width += w;
855
856  return width;
857}
858
859
860/*
861 * The following functions are the same as mk_wcwidth() and
862 * mk_wcswidth(), except that spacing characters in the East Asian
863 * Ambiguous (A) category as defined in Unicode Technical Report #11
864 * have a column width of 2. This variant might be useful for users of
865 * CJK legacy encodings who want to migrate to UCS without changing
866 * the traditional terminal character-width behaviour. It is not
867 * otherwise recommended for general use.
868 */
869int mk_wcwidth_cjk(wchar_t ucs)
870{
871  /* sorted list of non-overlapping intervals of East Asian Ambiguous
872   * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */
873  static const struct interval ambiguous[] = {
874    { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 },
875    { 0x00AA, 0x00AA }, { 0x00AE, 0x00AE }, { 0x00B0, 0x00B4 },
876    { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 },
877    { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 },
878    { 0x00E6, 0x00E6 }, { 0x00E8, 0x00EA }, { 0x00EC, 0x00ED },
879    { 0x00F0, 0x00F0 }, { 0x00F2, 0x00F3 }, { 0x00F7, 0x00FA },
880    { 0x00FC, 0x00FC }, { 0x00FE, 0x00FE }, { 0x0101, 0x0101 },
881    { 0x0111, 0x0111 }, { 0x0113, 0x0113 }, { 0x011B, 0x011B },
882    { 0x0126, 0x0127 }, { 0x012B, 0x012B }, { 0x0131, 0x0133 },
883    { 0x0138, 0x0138 }, { 0x013F, 0x0142 }, { 0x0144, 0x0144 },
884    { 0x0148, 0x014B }, { 0x014D, 0x014D }, { 0x0152, 0x0153 },
885    { 0x0166, 0x0167 }, { 0x016B, 0x016B }, { 0x01CE, 0x01CE },
886    { 0x01D0, 0x01D0 }, { 0x01D2, 0x01D2 }, { 0x01D4, 0x01D4 },
887    { 0x01D6, 0x01D6 }, { 0x01D8, 0x01D8 }, { 0x01DA, 0x01DA },
888    { 0x01DC, 0x01DC }, { 0x0251, 0x0251 }, { 0x0261, 0x0261 },
889    { 0x02C4, 0x02C4 }, { 0x02C7, 0x02C7 }, { 0x02C9, 0x02CB },
890    { 0x02CD, 0x02CD }, { 0x02D0, 0x02D0 }, { 0x02D8, 0x02DB },
891    { 0x02DD, 0x02DD }, { 0x02DF, 0x02DF }, { 0x0391, 0x03A1 },
892    { 0x03A3, 0x03A9 }, { 0x03B1, 0x03C1 }, { 0x03C3, 0x03C9 },
893    { 0x0401, 0x0401 }, { 0x0410, 0x044F }, { 0x0451, 0x0451 },
894    { 0x2010, 0x2010 }, { 0x2013, 0x2016 }, { 0x2018, 0x2019 },
895    { 0x201C, 0x201D }, { 0x2020, 0x2022 }, { 0x2024, 0x2027 },
896    { 0x2030, 0x2030 }, { 0x2032, 0x2033 }, { 0x2035, 0x2035 },
897    { 0x203B, 0x203B }, { 0x203E, 0x203E }, { 0x2074, 0x2074 },
898    { 0x207F, 0x207F }, { 0x2081, 0x2084 }, { 0x20AC, 0x20AC },
899    { 0x2103, 0x2103 }, { 0x2105, 0x2105 }, { 0x2109, 0x2109 },
900    { 0x2113, 0x2113 }, { 0x2116, 0x2116 }, { 0x2121, 0x2122 },
901    { 0x2126, 0x2126 }, { 0x212B, 0x212B }, { 0x2153, 0x2154 },
902    { 0x215B, 0x215E }, { 0x2160, 0x216B }, { 0x2170, 0x2179 },
903    { 0x2190, 0x2199 }, { 0x21B8, 0x21B9 }, { 0x21D2, 0x21D2 },
904    { 0x21D4, 0x21D4 }, { 0x21E7, 0x21E7 }, { 0x2200, 0x2200 },
905    { 0x2202, 0x2203 }, { 0x2207, 0x2208 }, { 0x220B, 0x220B },
906    { 0x220F, 0x220F }, { 0x2211, 0x2211 }, { 0x2215, 0x2215 },
907    { 0x221A, 0x221A }, { 0x221D, 0x2220 }, { 0x2223, 0x2223 },
908    { 0x2225, 0x2225 }, { 0x2227, 0x222C }, { 0x222E, 0x222E },
909    { 0x2234, 0x2237 }, { 0x223C, 0x223D }, { 0x2248, 0x2248 },
910    { 0x224C, 0x224C }, { 0x2252, 0x2252 }, { 0x2260, 0x2261 },
911    { 0x2264, 0x2267 }, { 0x226A, 0x226B }, { 0x226E, 0x226F },
912    { 0x2282, 0x2283 }, { 0x2286, 0x2287 }, { 0x2295, 0x2295 },
913    { 0x2299, 0x2299 }, { 0x22A5, 0x22A5 }, { 0x22BF, 0x22BF },
914    { 0x2312, 0x2312 }, { 0x2460, 0x24E9 }, { 0x24EB, 0x254B },
915    { 0x2550, 0x2573 }, { 0x2580, 0x258F }, { 0x2592, 0x2595 },
916    { 0x25A0, 0x25A1 }, { 0x25A3, 0x25A9 }, { 0x25B2, 0x25B3 },
917    { 0x25B6, 0x25B7 }, { 0x25BC, 0x25BD }, { 0x25C0, 0x25C1 },
918    { 0x25C6, 0x25C8 }, { 0x25CB, 0x25CB }, { 0x25CE, 0x25D1 },
919    { 0x25E2, 0x25E5 }, { 0x25EF, 0x25EF }, { 0x2605, 0x2606 },
920    { 0x2609, 0x2609 }, { 0x260E, 0x260F }, { 0x2614, 0x2615 },
921    { 0x261C, 0x261C }, { 0x261E, 0x261E }, { 0x2640, 0x2640 },
922    { 0x2642, 0x2642 }, { 0x2660, 0x2661 }, { 0x2663, 0x2665 },
923    { 0x2667, 0x266A }, { 0x266C, 0x266D }, { 0x266F, 0x266F },
924    { 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF },
925    { 0xFFFD, 0xFFFD }, { 0xF0000, 0xFFFFD }, { 0x100000, 0x10FFFD }
926  };
927
928  /* binary search in table of non-spacing characters */
929  if (bisearch(ucs, ambiguous,
930	       sizeof(ambiguous) / sizeof(struct interval) - 1))
931    return 2;
932
933  return mk_wcwidth(ucs);
934}
935
936
937int mk_wcswidth_cjk(const wchar_t *pwcs, size_t n)
938{
939  int w, width = 0;
940
941  for (;*pwcs && n-- > 0; pwcs++)
942    if ((w = mk_wcwidth_cjk(*pwcs)) < 0)
943      return -1;
944    else
945      width += w;
946
947  return width;
948}
949#endif /* 0 */
950
951/**/
952#endif /* BROKEN_WCWIDTH && (__STDC_ISO_10646__ || __APPLE__) */
953
954