bwstring.c revision 298089
138451Smsmith/*-
238451Smsmith * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
338451Smsmith * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
498542Smckusick * All rights reserved.
598542Smckusick *
698542Smckusick * Redistribution and use in source and binary forms, with or without
798542Smckusick * modification, are permitted provided that the following conditions
898542Smckusick * are met:
998542Smckusick * 1. Redistributions of source code must retain the above copyright
1098542Smckusick *    notice, this list of conditions and the following disclaimer.
1198542Smckusick * 2. Redistributions in binary form must reproduce the above copyright
1298542Smckusick *    notice, this list of conditions and the following disclaimer in the
1398542Smckusick *    documentation and/or other materials provided with the distribution.
1438451Smsmith *
1538451Smsmith * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1638451Smsmith * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1738451Smsmith * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1838451Smsmith * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1938451Smsmith * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2038451Smsmith * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2138451Smsmith * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2238451Smsmith * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2338451Smsmith * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2438451Smsmith * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2538451Smsmith * SUCH DAMAGE.
2638451Smsmith */
2738451Smsmith
2838451Smsmith#include <sys/cdefs.h>
2938451Smsmith__FBSDID("$FreeBSD: head/usr.bin/sort/bwstring.c 298089 2016-04-15 22:31:22Z pfg $");
3038451Smsmith
3138451Smsmith#include <ctype.h>
3238451Smsmith#include <errno.h>
3338451Smsmith#include <err.h>
3438451Smsmith#include <langinfo.h>
3538451Smsmith#include <math.h>
3638451Smsmith#include <stdlib.h>
3738451Smsmith#include <string.h>
3838451Smsmith#include <wchar.h>
3938451Smsmith#include <wctype.h>
4038451Smsmith
4138451Smsmith#include "bwstring.h"
4238451Smsmith#include "sort.h"
4338451Smsmith
4438451Smsmithbool byte_sort;
4538451Smsmith
4638451Smsmithstatic wchar_t **wmonths;
4738451Smsmithstatic unsigned char **cmonths;
4838451Smsmith
4938451Smsmith/* initialise months */
5038451Smsmith
5138451Smsmithvoid
5238451Smsmithinitialise_months(void)
5338451Smsmith{
5438451Smsmith	const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
5538451Smsmith	    ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
5638451Smsmith	    ABMON_11, ABMON_12 };
5738451Smsmith	unsigned char *tmp;
5838451Smsmith	size_t len;
5938451Smsmith
6038451Smsmith	if (MB_CUR_MAX == 1) {
6138451Smsmith		if (cmonths == NULL) {
6238451Smsmith			unsigned char *m;
6338451Smsmith
6438451Smsmith			cmonths = sort_malloc(sizeof(unsigned char*) * 12);
6538451Smsmith			for (int i = 0; i < 12; i++) {
6638451Smsmith				cmonths[i] = NULL;
6738451Smsmith				tmp = (unsigned char *) nl_langinfo(item[i]);
6838451Smsmith				if (debug_sort)
6938451Smsmith					printf("month[%d]=%s\n", i, tmp);
7084221Sdillon				if (*tmp == '\0')
7184221Sdillon					continue;
7284221Sdillon				m = sort_strdup(tmp);
7338451Smsmith				len = strlen(tmp);
7438451Smsmith				for (unsigned int j = 0; j < len; j++)
7538451Smsmith					m[j] = toupper(m[j]);
7638451Smsmith				cmonths[i] = m;
7738451Smsmith			}
7896477Sphk		}
7938451Smsmith
8038451Smsmith	} else {
8138451Smsmith		if (wmonths == NULL) {
8238451Smsmith			wchar_t *m;
8338451Smsmith
8438451Smsmith			wmonths = sort_malloc(sizeof(wchar_t *) * 12);
8538451Smsmith			for (int i = 0; i < 12; i++) {
8639468Smsmith				wmonths[i] = NULL;
8787631Sjhb				tmp = (unsigned char *) nl_langinfo(item[i]);
8838451Smsmith				if (debug_sort)
8938451Smsmith					printf("month[%d]=%s\n", i, tmp);
9038451Smsmith				if (*tmp == '\0')
9138451Smsmith					continue;
9259766Sjlemon				len = strlen(tmp);
9338451Smsmith				m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1));
9438451Smsmith				if (mbstowcs(m, (char*)tmp, len) ==
9559766Sjlemon				    ((size_t) - 1)) {
9659766Sjlemon					sort_free(m);
9759766Sjlemon					continue;
9859766Sjlemon				}
9987631Sjhb				m[len] = L'\0';
10059766Sjlemon				for (unsigned int j = 0; j < len; j++)
10159766Sjlemon					m[j] = towupper(m[j]);
10259766Sjlemon				wmonths[i] = m;
10338451Smsmith			}
10438451Smsmith		}
10538451Smsmith	}
10638451Smsmith}
10738451Smsmith
10838451Smsmith/*
10938451Smsmith * Compare two wide-character strings
11038451Smsmith */
11198542Smckusickstatic int
11298542Smckusickwide_str_coll(const wchar_t *s1, const wchar_t *s2)
11398542Smckusick{
11498542Smckusick	int ret = 0;
11538451Smsmith
11638451Smsmith	errno = 0;
11738451Smsmith	ret = wcscoll(s1, s2);
11838451Smsmith	if (errno == EILSEQ) {
11938451Smsmith		errno = 0;
12038451Smsmith		ret = wcscmp(s1, s2);
12138451Smsmith		if (errno != 0) {
12298542Smckusick			for (size_t i = 0; ; ++i) {
12398542Smckusick				wchar_t c1 = s1[i];
12438451Smsmith				wchar_t c2 = s2[i];
12538451Smsmith				if (c1 == L'\0')
12638451Smsmith					return ((c2 == L'\0') ? 0 : -1);
12798542Smckusick				if (c2 == L'\0')
12898542Smckusick					return (+1);
12998542Smckusick				if (c1 == c2)
13038451Smsmith					continue;
13138451Smsmith				return ((int)(c1 - c2));
13298542Smckusick			}
13338451Smsmith		}
13487631Sjhb	}
13538451Smsmith	return (ret);
13638451Smsmith}
13738451Smsmith
13838451Smsmith/* counterparts of wcs functions */
13938451Smsmith
14038451Smsmithvoid
14138451Smsmithbwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
14238451Smsmith{
14338451Smsmith
14438451Smsmith	if (MB_CUR_MAX == 1)
14592913Sobrien		fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix);
14692913Sobrien	else
14738451Smsmith		fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix);
14838451Smsmith}
14938451Smsmith
15038451Smsmithconst void* bwsrawdata(const struct bwstring *bws)
15139665Smsmith{
15239665Smsmith
15339665Smsmith	return (&(bws->data));
15438451Smsmith}
15538451Smsmith
15638451Smsmithsize_t bwsrawlen(const struct bwstring *bws)
15739665Smsmith{
158278602Sian
15938451Smsmith	return ((MB_CUR_MAX == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len));
16038451Smsmith}
16138451Smsmith
16238451Smsmithsize_t
16338451Smsmithbws_memsize(const struct bwstring *bws)
16438451Smsmith{
16538451Smsmith
16638451Smsmith	return ((MB_CUR_MAX == 1) ? (bws->len + 2 + sizeof(struct bwstring)) :
16738451Smsmith	    (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring)));
16838451Smsmith}
16998542Smckusick
17098542Smckusickvoid
17198542Smckusickbws_setlen(struct bwstring *bws, size_t newlen)
17298542Smckusick{
17398542Smckusick
17498542Smckusick	if (bws && newlen != bws->len && newlen <= bws->len) {
17538451Smsmith		bws->len = newlen;
17638451Smsmith		if (MB_CUR_MAX == 1)
17738451Smsmith			bws->data.cstr[newlen] = '\0';
17838451Smsmith		else
17938451Smsmith			bws->data.wstr[newlen] = L'\0';
18092913Sobrien	}
18138451Smsmith}
18238451Smsmith
18338451Smsmith/*
18438451Smsmith * Allocate a new binary string of specified size
18538451Smsmith */
186134760Siedowsestruct bwstring *
18738451Smsmithbwsalloc(size_t sz)
18839665Smsmith{
18938451Smsmith	struct bwstring *ret;
19038451Smsmith
19138451Smsmith	if (MB_CUR_MAX == 1)
19238451Smsmith		ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
19338451Smsmith	else
19438451Smsmith		ret = sort_malloc(sizeof(struct bwstring) +
19538451Smsmith		    SIZEOF_WCHAR_STRING(sz + 1));
19638451Smsmith	ret->len = sz;
19738451Smsmith
19838451Smsmith	if (MB_CUR_MAX == 1)
19998542Smckusick		ret->data.cstr[ret->len] = '\0';
20098542Smckusick	else
20138451Smsmith		ret->data.wstr[ret->len] = L'\0';
20292913Sobrien
20392913Sobrien	return (ret);
20438451Smsmith}
20538451Smsmith
20698542Smckusick/*
20738451Smsmith * Create a copy of binary string.
20838451Smsmith * New string size equals the length of the old string.
20938451Smsmith */
21038451Smsmithstruct bwstring *
21138451Smsmithbwsdup(const struct bwstring *s)
21238451Smsmith{
21338451Smsmith
21438451Smsmith	if (s == NULL)
21538451Smsmith		return (NULL);
21638451Smsmith	else {
21738451Smsmith		struct bwstring *ret = bwsalloc(s->len);
21838451Smsmith
21938451Smsmith		if (MB_CUR_MAX == 1)
22038451Smsmith			memcpy(ret->data.cstr, s->data.cstr, (s->len));
22138451Smsmith		else
22238451Smsmith			memcpy(ret->data.wstr, s->data.wstr,
22338451Smsmith			    SIZEOF_WCHAR_STRING(s->len));
22438451Smsmith
22538451Smsmith		return (ret);
22638451Smsmith	}
22738451Smsmith}
22838451Smsmith
22938451Smsmith/*
23038451Smsmith * Create a new binary string from a wide character buffer.
23138451Smsmith */
23238451Smsmithstruct bwstring *
23338451Smsmithbwssbdup(const wchar_t *str, size_t len)
23498542Smckusick{
23538451Smsmith
23638451Smsmith	if (str == NULL)
23738451Smsmith		return ((len == 0) ? bwsalloc(0) : NULL);
23838451Smsmith	else {
23938451Smsmith		struct bwstring *ret;
24038451Smsmith
24138451Smsmith		ret = bwsalloc(len);
24238451Smsmith
24338451Smsmith		if (MB_CUR_MAX == 1)
24438451Smsmith			for (size_t i = 0; i < len; ++i)
24538451Smsmith				ret->data.cstr[i] = (unsigned char) str[i];
24638451Smsmith		else
24738451Smsmith			memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len));
24838451Smsmith
24938451Smsmith		return (ret);
25038451Smsmith	}
25138451Smsmith}
25238451Smsmith
25338451Smsmith/*
25438451Smsmith * Create a new binary string from a raw binary buffer.
25538451Smsmith */
25698542Smckusickstruct bwstring *
25738451Smsmithbwscsbdup(const unsigned char *str, size_t len)
25838451Smsmith{
25938451Smsmith	struct bwstring *ret;
26038451Smsmith
26138451Smsmith	ret = bwsalloc(len);
26238451Smsmith
26338451Smsmith	if (str) {
26438451Smsmith		if (MB_CUR_MAX == 1)
26538451Smsmith			memcpy(ret->data.cstr, str, len);
26638451Smsmith		else {
26739665Smsmith			mbstate_t mbs;
268278602Sian			const char *s;
26938451Smsmith			size_t charlen, chars, cptr;
27038451Smsmith
27138451Smsmith			charlen = chars = 0;
27238451Smsmith			cptr = 0;
27338451Smsmith			s = (const char *) str;
27438451Smsmith
27538451Smsmith			memset(&mbs, 0, sizeof(mbs));
27638451Smsmith
27738451Smsmith			while (cptr < len) {
27838451Smsmith				size_t n = MB_CUR_MAX;
27938451Smsmith
28038451Smsmith				if (n > len - cptr)
28138451Smsmith					n = len - cptr;
28238451Smsmith				charlen = mbrlen(s + cptr, n, &mbs);
28338451Smsmith				switch (charlen) {
28438451Smsmith				case 0:
28538451Smsmith					/* FALLTHROUGH */
28638451Smsmith				case (size_t) -1:
28798542Smckusick					/* FALLTHROUGH */
28898542Smckusick				case (size_t) -2:
28998542Smckusick					ret->data.wstr[chars++] =
29098542Smckusick					    (unsigned char) s[cptr];
29138451Smsmith					++cptr;
29238451Smsmith					break;
29338451Smsmith				default:
29438451Smsmith					n = mbrtowc(ret->data.wstr + (chars++),
29538451Smsmith					    s + cptr, charlen, &mbs);
29638451Smsmith					if ((n == (size_t)-1) || (n == (size_t)-2))
29738451Smsmith						/* NOTREACHED */
29838451Smsmith						err(2, "mbrtowc error");
29987631Sjhb					cptr += charlen;
30087631Sjhb				}
30187631Sjhb			}
30287631Sjhb
30387631Sjhb			ret->len = chars;
30487631Sjhb			ret->data.wstr[ret->len] = L'\0';
30587631Sjhb		}
30687631Sjhb	}
30792913Sobrien	return (ret);
30892913Sobrien}
30987631Sjhb
31098542Smckusick/*
31198542Smckusick * De-allocate object memory
31287631Sjhb */
31387631Sjhbvoid
31487631Sjhbbwsfree(const struct bwstring *s)
31587631Sjhb{
31687631Sjhb
31787631Sjhb	if (s)
31887631Sjhb		sort_free(s);
31987631Sjhb}
32098542Smckusick
32187631Sjhb/*
32287631Sjhb * Copy content of src binary string to dst.
32387631Sjhb * If the capacity of the dst string is not sufficient,
32487631Sjhb * then the data is truncated.
32587631Sjhb */
32687631Sjhbsize_t
32798542Smckusickbwscpy(struct bwstring *dst, const struct bwstring *src)
32898542Smckusick{
32987631Sjhb	size_t nums = src->len;
33087631Sjhb
33187631Sjhb	if (nums > dst->len)
33287631Sjhb		nums = dst->len;
33387631Sjhb	dst->len = nums;
33498542Smckusick
33598542Smckusick	if (MB_CUR_MAX == 1) {
33687631Sjhb		memcpy(dst->data.cstr, src->data.cstr, nums);
33787631Sjhb		dst->data.cstr[dst->len] = '\0';
33887631Sjhb	} else {
33987631Sjhb		memcpy(dst->data.wstr, src->data.wstr,
34087631Sjhb		    SIZEOF_WCHAR_STRING(nums + 1));
34187631Sjhb		dst->data.wstr[dst->len] = L'\0';
34287631Sjhb	}
34387631Sjhb
34487631Sjhb	return (nums);
34587631Sjhb}
34687631Sjhb
34787631Sjhb/*
34887631Sjhb * Copy content of src binary string to dst,
349278602Sian * with specified number of symbols to be copied.
35087631Sjhb * If the capacity of the dst string is not sufficient,
35187631Sjhb * then the data is truncated.
35287631Sjhb */
35387631Sjhbstruct bwstring *
35487631Sjhbbwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size)
35587631Sjhb{
35687631Sjhb	size_t nums = src->len;
35787631Sjhb
35887631Sjhb	if (nums > dst->len)
35987631Sjhb		nums = dst->len;
36087631Sjhb	if (nums > size)
36187631Sjhb		nums = size;
36298542Smckusick	dst->len = nums;
36387631Sjhb
36487631Sjhb	if (MB_CUR_MAX == 1) {
36587631Sjhb		memcpy(dst->data.cstr, src->data.cstr, nums);
36687631Sjhb		dst->data.cstr[dst->len] = '\0';
36787631Sjhb	} else {
368278602Sian		memcpy(dst->data.wstr, src->data.wstr,
36987631Sjhb		    SIZEOF_WCHAR_STRING(nums + 1));
37087631Sjhb		dst->data.wstr[dst->len] = L'\0';
37187631Sjhb	}
37287631Sjhb
37387631Sjhb	return (dst);
37487631Sjhb}
37587631Sjhb
37638451Smsmith/*
37738451Smsmith * Copy content of src binary string to dst,
37838451Smsmith * with specified number of symbols to be copied.
37938451Smsmith * An offset value can be specified, from the start of src string.
38038451Smsmith * If the capacity of the dst string is not sufficient,
38138451Smsmith * then the data is truncated.
38238451Smsmith */
38338451Smsmithstruct bwstring *
38438451Smsmithbwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
38592913Sobrien    size_t size)
38692913Sobrien{
38738451Smsmith
38898542Smckusick	if (offset >= src->len) {
38998542Smckusick		dst->data.wstr[0] = 0;
39038451Smsmith		dst->len = 0;
39138451Smsmith	} else {
39238451Smsmith		size_t nums = src->len - offset;
39338451Smsmith
39438451Smsmith		if (nums > dst->len)
39598542Smckusick			nums = dst->len;
39638451Smsmith		if (nums > size)
39738451Smsmith			nums = size;
39887631Sjhb		dst->len = nums;
39987631Sjhb		if (MB_CUR_MAX == 1) {
40087631Sjhb			memcpy(dst->data.cstr, src->data.cstr + offset,
40138451Smsmith			    (nums));
40238451Smsmith			dst->data.cstr[dst->len] = '\0';
40338451Smsmith		} else {
40438451Smsmith			memcpy(dst->data.wstr, src->data.wstr + offset,
40538451Smsmith			    SIZEOF_WCHAR_STRING(nums));
40638451Smsmith			dst->data.wstr[dst->len] = L'\0';
40738451Smsmith		}
40838451Smsmith	}
409278602Sian	return (dst);
41038451Smsmith}
41138451Smsmith
41238451Smsmith/*
41338451Smsmith * Write binary string to the file.
41438451Smsmith * The output is ended either with '\n' (nl == true)
41538451Smsmith * or '\0' (nl == false).
41638451Smsmith */
41738451Smsmithsize_t
41838451Smsmithbwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
41938451Smsmith{
42038451Smsmith
42138451Smsmith	if (MB_CUR_MAX == 1) {
42238451Smsmith		size_t len = bws->len;
42338451Smsmith
42438451Smsmith		if (!zero_ended) {
42538451Smsmith			bws->data.cstr[len] = '\n';
42638451Smsmith
42738451Smsmith			if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
42838451Smsmith				err(2, NULL);
42938451Smsmith
43038451Smsmith			bws->data.cstr[len] = '\0';
43198542Smckusick		} else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
43298542Smckusick			err(2, NULL);
43338451Smsmith
43438451Smsmith		return (len + 1);
43538451Smsmith
43638451Smsmith	} else {
43738451Smsmith		wchar_t eols;
43838451Smsmith		size_t printed = 0;
43938451Smsmith
44038451Smsmith		eols = zero_ended ? btowc('\0') : btowc('\n');
44138451Smsmith
44238451Smsmith		while (printed < BWSLEN(bws)) {
44338451Smsmith			const wchar_t *s = bws->data.wstr + printed;
44438451Smsmith
44538451Smsmith			if (*s == L'\0') {
44638451Smsmith				int nums;
44792913Sobrien
44892913Sobrien				nums = fwprintf(f, L"%lc", *s);
44938451Smsmith
45038451Smsmith				if (nums != 1)
45138451Smsmith					err(2, NULL);
45238451Smsmith				++printed;
45338451Smsmith			} else {
45438451Smsmith				int nums;
45538451Smsmith
45638451Smsmith				nums = fwprintf(f, L"%ls", s);
45738451Smsmith
45898542Smckusick				if (nums < 1)
45938451Smsmith					err(2, NULL);
46038451Smsmith				printed += nums;
46138451Smsmith			}
46238451Smsmith		}
46338451Smsmith		fwprintf(f, L"%lc", eols);
46438451Smsmith		return (printed + 1);
46538451Smsmith	}
46638451Smsmith}
46738451Smsmith
46838451Smsmith/*
46938451Smsmith * Allocate and read a binary string from file.
47038451Smsmith * The strings are nl-ended or zero-ended, depending on the sort setting.
47138451Smsmith */
47238451Smsmithstruct bwstring *
47338451Smsmithbwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
47438451Smsmith{
47538451Smsmith	wint_t eols;
47638451Smsmith
47738451Smsmith	eols = zero_ended ? btowc('\0') : btowc('\n');
47838451Smsmith
47938451Smsmith	if (!zero_ended && (MB_CUR_MAX > 1)) {
48038451Smsmith		wchar_t *ret;
48138451Smsmith
48238451Smsmith		ret = fgetwln(f, len);
48338451Smsmith
48438451Smsmith		if (ret == NULL) {
48538451Smsmith			if (!feof(f))
48638451Smsmith				err(2, NULL);
48738451Smsmith			return (NULL);
48898542Smckusick		}
48998542Smckusick		if (*len > 0) {
49038451Smsmith			if (ret[*len - 1] == (wchar_t)eols)
49138451Smsmith				--(*len);
49238451Smsmith		}
49338451Smsmith		return (bwssbdup(ret, *len));
49439468Smsmith
49539468Smsmith	} else if (!zero_ended && (MB_CUR_MAX == 1)) {
49638451Smsmith		char *ret;
49738451Smsmith
49892913Sobrien		ret = fgetln(f, len);
49992913Sobrien
50038451Smsmith		if (ret == NULL) {
50138451Smsmith			if (!feof(f))
50238451Smsmith				err(2, NULL);
50398542Smckusick			return (NULL);
50438451Smsmith		}
50538451Smsmith		if (*len > 0) {
50638451Smsmith			if (ret[*len - 1] == '\n')
50738451Smsmith				--(*len);
50839468Smsmith		}
50938451Smsmith		return (bwscsbdup((unsigned char*)ret, *len));
51038451Smsmith
51138451Smsmith	} else {
51238451Smsmith		*len = 0;
51338451Smsmith
51438451Smsmith		if (feof(f))
51538451Smsmith			return (NULL);
51698542Smckusick
51738451Smsmith		if (2 >= rb->fgetwln_z_buffer_size) {
518278602Sian			rb->fgetwln_z_buffer_size += 256;
51998542Smckusick			rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
52098542Smckusick			    sizeof(wchar_t) * rb->fgetwln_z_buffer_size);
52198542Smckusick		}
52298542Smckusick		rb->fgetwln_z_buffer[*len] = 0;
52398542Smckusick
52498542Smckusick		if (MB_CUR_MAX == 1)
52598542Smckusick			while (!feof(f)) {
52698542Smckusick				int c;
52798542Smckusick
52898542Smckusick				c = fgetc(f);
52998542Smckusick
530107555Sjake				if (c == EOF) {
53198542Smckusick					if (*len == 0)
53298542Smckusick						return (NULL);
53398542Smckusick					goto line_read_done;
53498542Smckusick				}
53598542Smckusick				if (c == eols)
53698542Smckusick					goto line_read_done;
53738451Smsmith
53838451Smsmith				if (*len + 1 >= rb->fgetwln_z_buffer_size) {
53938451Smsmith					rb->fgetwln_z_buffer_size += 256;
54038451Smsmith					rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
54138451Smsmith					    SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
54238451Smsmith				}
54338451Smsmith
54498542Smckusick				rb->fgetwln_z_buffer[*len] = c;
54592913Sobrien				rb->fgetwln_z_buffer[++(*len)] = 0;
54638451Smsmith			}
54738451Smsmith		else
54838451Smsmith			while (!feof(f)) {
54938451Smsmith				wint_t c = 0;
55038451Smsmith
55138451Smsmith				c = fgetwc(f);
55238451Smsmith
55338451Smsmith				if (c == WEOF) {
55438451Smsmith					if (*len == 0)
55538451Smsmith						return (NULL);
55638451Smsmith					goto line_read_done;
55738451Smsmith				}
55839468Smsmith				if (c == eols)
55939468Smsmith					goto line_read_done;
56039468Smsmith
56139468Smsmith				if (*len + 1 >= rb->fgetwln_z_buffer_size) {
56239468Smsmith					rb->fgetwln_z_buffer_size += 256;
56338451Smsmith					rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
56438451Smsmith					    SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
56538451Smsmith				}
56638451Smsmith
56738451Smsmith				rb->fgetwln_z_buffer[*len] = c;
56838451Smsmith				rb->fgetwln_z_buffer[++(*len)] = 0;
56938451Smsmith			}
57038451Smsmith
57138451Smsmithline_read_done:
57238451Smsmith		/* we do not count the last 0 */
57338451Smsmith		return (bwssbdup(rb->fgetwln_z_buffer, *len));
57438451Smsmith	}
57538451Smsmith}
57698542Smckusick
57738451Smsmithint
57838451Smsmithbwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
57938451Smsmith    size_t offset, size_t len)
58038451Smsmith{
58138451Smsmith	size_t cmp_len, len1, len2;
58238451Smsmith	int res = 0;
58338451Smsmith
58438451Smsmith	cmp_len = 0;
58592913Sobrien	len1 = bws1->len;
58638451Smsmith	len2 = bws2->len;
58738451Smsmith
58838451Smsmith	if (len1 <= offset) {
58938451Smsmith		return ((len2 <= offset) ? 0 : -1);
59038451Smsmith	} else {
59138451Smsmith		if (len2 <= offset)
59238451Smsmith			return (+1);
59338451Smsmith		else {
59438451Smsmith			len1 -= offset;
59538451Smsmith			len2 -= offset;
59638451Smsmith
59738451Smsmith			cmp_len = len1;
59838451Smsmith
59938451Smsmith			if (len2 < cmp_len)
60038451Smsmith				cmp_len = len2;
60138451Smsmith
60238451Smsmith			if (len < cmp_len)
60338451Smsmith				cmp_len = len;
60438451Smsmith
60538451Smsmith			if (MB_CUR_MAX == 1) {
60638451Smsmith				const unsigned char *s1, *s2;
60738451Smsmith
60838451Smsmith				s1 = bws1->data.cstr + offset;
60938451Smsmith				s2 = bws2->data.cstr + offset;
61038451Smsmith
61138451Smsmith				res = memcmp(s1, s2, cmp_len);
61238451Smsmith
61338451Smsmith			} else {
61438451Smsmith				const wchar_t *s1, *s2;
61538451Smsmith
61638451Smsmith				s1 = bws1->data.wstr + offset;
61738451Smsmith				s2 = bws2->data.wstr + offset;
61898542Smckusick
61998542Smckusick				res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
62038451Smsmith			}
62138451Smsmith		}
62238451Smsmith	}
62338451Smsmith
62438451Smsmith	if (res == 0) {
62538451Smsmith		if (len1 < cmp_len && len1 < len2)
62638451Smsmith			res = -1;
62738451Smsmith		else if (len2 < cmp_len && len2 < len1)
62838451Smsmith			res = +1;
62938451Smsmith	}
63038451Smsmith
63138451Smsmith	return (res);
63238451Smsmith}
63398542Smckusick
63498542Smckusickint
63598542Smckusickbwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
63698542Smckusick{
63798542Smckusick	size_t len1, len2, cmp_len;
63838451Smsmith	int res;
63938451Smsmith
64038451Smsmith	len1 = bws1->len;
64138451Smsmith	len2 = bws2->len;
64238451Smsmith
64398542Smckusick	len1 -= offset;
64492913Sobrien	len2 -= offset;
64538451Smsmith
64638451Smsmith	cmp_len = len1;
64739665Smsmith
64898542Smckusick	if (len2 < cmp_len)
64938451Smsmith		cmp_len = len2;
65038451Smsmith
65138451Smsmith	res = bwsncmp(bws1, bws2, offset, cmp_len);
652278602Sian
65338451Smsmith	if (res == 0) {
65438451Smsmith		if( len1 < len2)
65538451Smsmith			res = -1;
65638451Smsmith		else if (len2 < len1)
65738451Smsmith			res = +1;
65838451Smsmith	}
65938451Smsmith
66038451Smsmith	return (res);
66138451Smsmith}
66238451Smsmith
66338451Smsmithint
66438451Smsmithbws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
66538451Smsmith{
66638451Smsmith	wchar_t c1, c2;
66738451Smsmith	size_t i = 0;
66838451Smsmith
66938451Smsmith	for (i = 0; i < len; ++i) {
67038451Smsmith		c1 = bws_get_iter_value(iter1);
67138451Smsmith		c2 = bws_get_iter_value(iter2);
67238451Smsmith		if (c1 != c2)
67338451Smsmith			return (c1 - c2);
67438451Smsmith		iter1 = bws_iterator_inc(iter1, 1);
67538451Smsmith		iter2 = bws_iterator_inc(iter2, 1);
67638451Smsmith	}
67738451Smsmith
67838451Smsmith	return (0);
67938451Smsmith}
68038451Smsmith
681134760Siedowseint
68238451Smsmithbwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
68338451Smsmith{
68439665Smsmith	size_t len1, len2;
68539468Smsmith
68639468Smsmith	len1 = bws1->len;
68738451Smsmith	len2 = bws2->len;
68838451Smsmith
68939665Smsmith	if (len1 <= offset)
69038451Smsmith		return ((len2 <= offset) ? 0 : -1);
69138451Smsmith	else {
69238451Smsmith		if (len2 <= offset)
69338451Smsmith			return (+1);
69438451Smsmith		else {
69538451Smsmith			len1 -= offset;
69638451Smsmith			len2 -= offset;
69738451Smsmith
69838451Smsmith			if (MB_CUR_MAX == 1) {
69938451Smsmith				const unsigned char *s1, *s2;
70092913Sobrien
70138451Smsmith				s1 = bws1->data.cstr + offset;
70238451Smsmith				s2 = bws2->data.cstr + offset;
70338451Smsmith
70438451Smsmith				if (byte_sort) {
70538451Smsmith					int res = 0;
70638451Smsmith
70738451Smsmith					if (len1 > len2) {
70838451Smsmith						res = memcmp(s1, s2, len2);
70939665Smsmith						if (!res)
71038451Smsmith							res = +1;
71138451Smsmith					} else if (len1 < len2) {
71239665Smsmith						res = memcmp(s1, s2, len1);
71339665Smsmith						if (!res)
71438451Smsmith							res = -1;
71538451Smsmith					} else
71638451Smsmith						res = memcmp(s1, s2, len1);
71738451Smsmith
71838451Smsmith					return (res);
71938451Smsmith
72038451Smsmith				} else {
72138451Smsmith					int res = 0;
72238451Smsmith					size_t i, maxlen;
72338451Smsmith
72438451Smsmith					i = 0;
72538451Smsmith					maxlen = len1;
72638451Smsmith
72738451Smsmith					if (maxlen > len2)
72838451Smsmith						maxlen = len2;
72992913Sobrien
73092913Sobrien					while (i < maxlen) {
73138451Smsmith						/* goto next non-zero part: */
73238451Smsmith						while ((i < maxlen) &&
73338451Smsmith						    !s1[i] && !s2[i])
73492913Sobrien							++i;
73538451Smsmith
73638451Smsmith						if (i >= maxlen)
73798542Smckusick							break;
73838451Smsmith
73938451Smsmith						if (s1[i] == 0) {
74038451Smsmith							if (s2[i] == 0)
74138451Smsmith								/* NOTREACHED */
74238451Smsmith								err(2, "bwscoll error 01");
74338451Smsmith							else
74438451Smsmith								return (-1);
74538451Smsmith						} else if (s2[i] == 0)
74638451Smsmith							return (+1);
74738451Smsmith
74838451Smsmith						res = strcoll((const char*)(s1 + i), (const char*)(s2 + i));
74938451Smsmith						if (res)
75038451Smsmith							return (res);
75138451Smsmith
75238451Smsmith						while ((i < maxlen) &&
75338451Smsmith						    s1[i] && s2[i])
75438451Smsmith							++i;
75538451Smsmith
75638451Smsmith						if (i >= maxlen)
75738451Smsmith							break;
75838451Smsmith
75987631Sjhb						if (s1[i] == 0) {
76087631Sjhb							if (s2[i] == 0) {
76187631Sjhb								++i;
76287631Sjhb								continue;
76387631Sjhb							} else
76487631Sjhb								return (-1);
76587631Sjhb						} else if (s2[i] == 0)
76687631Sjhb							return (+1);
76787631Sjhb						else
76887631Sjhb							/* NOTREACHED */
76987631Sjhb							err(2, "bwscoll error 02");
77087631Sjhb					}
77192913Sobrien
77287631Sjhb					if (len1 < len2)
77387631Sjhb						return (-1);
77492913Sobrien					else if (len1 > len2)
77587631Sjhb						return (+1);
77687631Sjhb
77787631Sjhb					return (0);
77898542Smckusick				}
77987631Sjhb			} else {
78087631Sjhb				const wchar_t *s1, *s2;
78187631Sjhb				size_t i, maxlen;
78287631Sjhb				int res = 0;
78387631Sjhb
78487631Sjhb				s1 = bws1->data.wstr + offset;
78587631Sjhb				s2 = bws2->data.wstr + offset;
78687631Sjhb
78787631Sjhb				i = 0;
78887631Sjhb				maxlen = len1;
78987631Sjhb
79087631Sjhb				if (maxlen > len2)
79187631Sjhb					maxlen = len2;
79287631Sjhb
79387631Sjhb				while (i < maxlen) {
79487631Sjhb
79587631Sjhb					/* goto next non-zero part: */
79638451Smsmith					while ((i < maxlen) &&
79738451Smsmith					    !s1[i] && !s2[i])
79838451Smsmith						++i;
79938451Smsmith
80038451Smsmith					if (i >= maxlen)
80138451Smsmith						break;
80292913Sobrien
80338451Smsmith					if (s1[i] == 0) {
80438451Smsmith						if (s2[i] == 0)
80538451Smsmith							/* NOTREACHED */
80638451Smsmith							err(2, "bwscoll error 1");
80738451Smsmith						else
80838451Smsmith							return (-1);
80938451Smsmith					} else if (s2[i] == 0)
81038451Smsmith						return (+1);
81138451Smsmith
81298542Smckusick					res = wide_str_coll(s1 + i, s2 + i);
81338451Smsmith					if (res)
81438451Smsmith						return (res);
815124811Sjhb
81638451Smsmith					while ((i < maxlen) && s1[i] && s2[i])
81738451Smsmith						++i;
81838451Smsmith
81938451Smsmith					if (i >= maxlen)
82038451Smsmith						break;
82138451Smsmith
82238451Smsmith					if (s1[i] == 0) {
82338451Smsmith						if (s2[i] == 0) {
82438451Smsmith							++i;
82538451Smsmith							continue;
82692913Sobrien						} else
82738451Smsmith							return (-1);
82838451Smsmith					} else if (s2[i] == 0)
82998542Smckusick						return (+1);
83098542Smckusick					else
83198542Smckusick						/* NOTREACHED */
83298542Smckusick						err(2, "bwscoll error 2");
83338451Smsmith				}
83438451Smsmith
83538451Smsmith				if (len1 < len2)
83659766Sjlemon					return (-1);
83759766Sjlemon				else if (len1 > len2)
83859766Sjlemon					return (+1);
83959766Sjlemon
84059766Sjlemon				return (0);
84159766Sjlemon			}
84259766Sjlemon		}
84359766Sjlemon	}
84459766Sjlemon}
84559766Sjlemon
84659766Sjlemon/*
84759766Sjlemon * Correction of the system API
84859766Sjlemon */
84998542Smckusickdouble
85059766Sjlemonbwstod(struct bwstring *s0, bool *empty)
85159766Sjlemon{
85259766Sjlemon	double ret = 0;
85359766Sjlemon
85459766Sjlemon	if (MB_CUR_MAX == 1) {
85559766Sjlemon		unsigned char *end, *s;
85659766Sjlemon		char *ep;
85759766Sjlemon
85859766Sjlemon		s = s0->data.cstr;
85959766Sjlemon		end = s + s0->len;
86059766Sjlemon		ep = NULL;
86159766Sjlemon
862		while (isblank(*s) && s < end)
863			++s;
864
865		if (!isprint(*s)) {
866			*empty = true;
867			return (0);
868		}
869
870		ret = strtod((char*)s, &ep);
871		if ((unsigned char*) ep == s) {
872			*empty = true;
873			return (0);
874		}
875	} else {
876		wchar_t *end, *ep, *s;
877
878		s = s0->data.wstr;
879		end = s + s0->len;
880		ep = NULL;
881
882		while (iswblank(*s) && s < end)
883			++s;
884
885		if (!iswprint(*s)) {
886			*empty = true;
887			return (0);
888		}
889
890		ret = wcstod(s, &ep);
891		if (ep == s) {
892			*empty = true;
893			return (0);
894		}
895	}
896
897	*empty = false;
898	return (ret);
899}
900
901/*
902 * A helper function for monthcoll.  If a line matches
903 * a month name, it returns (number of the month - 1),
904 * while if there is no match, it just return -1.
905 */
906
907int
908bws_month_score(const struct bwstring *s0)
909{
910
911	if (MB_CUR_MAX == 1) {
912		const unsigned char *end, *s;
913		size_t len;
914
915		s = s0->data.cstr;
916		end = s + s0->len;
917
918		while (isblank(*s) && s < end)
919			++s;
920
921		len = strlen((const char*)s);
922
923		for (int i = 11; i >= 0; --i) {
924			if (cmonths[i] &&
925			    (s == (unsigned char*)strstr((const char*)s, (char*)(cmonths[i]))))
926				return (i);
927		}
928
929	} else {
930		const wchar_t *end, *s;
931		size_t len;
932
933		s = s0->data.wstr;
934		end = s + s0->len;
935
936		while (iswblank(*s) && s < end)
937			++s;
938
939		len = wcslen(s);
940
941		for (int i = 11; i >= 0; --i) {
942			if (wmonths[i] && (s == wcsstr(s, wmonths[i])))
943				return (i);
944		}
945	}
946
947	return (-1);
948}
949
950/*
951 * Rips out leading blanks (-b).
952 */
953struct bwstring *
954ignore_leading_blanks(struct bwstring *str)
955{
956
957	if (MB_CUR_MAX == 1) {
958		unsigned char *dst, *end, *src;
959
960		src = str->data.cstr;
961		dst = src;
962		end = src + str->len;
963
964		while (src < end && isblank(*src))
965			++src;
966
967		if (src != dst) {
968			size_t newlen;
969
970			newlen = BWSLEN(str) - (src - dst);
971
972			while (src < end) {
973				*dst = *src;
974				++dst;
975				++src;
976			}
977			bws_setlen(str, newlen);
978		}
979	} else {
980		wchar_t *dst, *end, *src;
981
982		src = str->data.wstr;
983		dst = src;
984		end = src + str->len;
985
986		while (src < end && iswblank(*src))
987			++src;
988
989		if (src != dst) {
990
991			size_t newlen = BWSLEN(str) - (src - dst);
992
993			while (src < end) {
994				*dst = *src;
995				++dst;
996				++src;
997			}
998			bws_setlen(str, newlen);
999
1000		}
1001	}
1002	return (str);
1003}
1004
1005/*
1006 * Rips out nonprinting characters (-i).
1007 */
1008struct bwstring *
1009ignore_nonprinting(struct bwstring *str)
1010{
1011	size_t newlen = str->len;
1012
1013	if (MB_CUR_MAX == 1) {
1014		unsigned char *dst, *end, *src;
1015		unsigned char c;
1016
1017		src = str->data.cstr;
1018		dst = src;
1019		end = src + str->len;
1020
1021		while (src < end) {
1022			c = *src;
1023			if (isprint(c)) {
1024				*dst = c;
1025				++dst;
1026				++src;
1027			} else {
1028				++src;
1029				--newlen;
1030			}
1031		}
1032	} else {
1033		wchar_t *dst, *end, *src;
1034		wchar_t c;
1035
1036		src = str->data.wstr;
1037		dst = src;
1038		end = src + str->len;
1039
1040		while (src < end) {
1041			c = *src;
1042			if (iswprint(c)) {
1043				*dst = c;
1044				++dst;
1045				++src;
1046			} else {
1047				++src;
1048				--newlen;
1049			}
1050		}
1051	}
1052	bws_setlen(str, newlen);
1053
1054	return (str);
1055}
1056
1057/*
1058 * Rips out any characters that are not alphanumeric characters
1059 * nor blanks (-d).
1060 */
1061struct bwstring *
1062dictionary_order(struct bwstring *str)
1063{
1064	size_t newlen = str->len;
1065
1066	if (MB_CUR_MAX == 1) {
1067		unsigned char *dst, *end, *src;
1068		unsigned char c;
1069
1070		src = str->data.cstr;
1071		dst = src;
1072		end = src + str->len;
1073
1074		while (src < end) {
1075			c = *src;
1076			if (isalnum(c) || isblank(c)) {
1077				*dst = c;
1078				++dst;
1079				++src;
1080			} else {
1081				++src;
1082				--newlen;
1083			}
1084		}
1085	} else {
1086		wchar_t *dst, *end, *src;
1087		wchar_t c;
1088
1089		src = str->data.wstr;
1090		dst = src;
1091		end = src + str->len;
1092
1093		while (src < end) {
1094			c = *src;
1095			if (iswalnum(c) || iswblank(c)) {
1096				*dst = c;
1097				++dst;
1098				++src;
1099			} else {
1100				++src;
1101				--newlen;
1102			}
1103		}
1104	}
1105	bws_setlen(str, newlen);
1106
1107	return (str);
1108}
1109
1110/*
1111 * Converts string to lower case(-f).
1112 */
1113struct bwstring *
1114ignore_case(struct bwstring *str)
1115{
1116
1117	if (MB_CUR_MAX == 1) {
1118		unsigned char *end, *s;
1119
1120		s = str->data.cstr;
1121		end = s + str->len;
1122
1123		while (s < end) {
1124			*s = toupper(*s);
1125			++s;
1126		}
1127	} else {
1128		wchar_t *end, *s;
1129
1130		s = str->data.wstr;
1131		end = s + str->len;
1132
1133		while (s < end) {
1134			*s = towupper(*s);
1135			++s;
1136		}
1137	}
1138	return (str);
1139}
1140
1141void
1142bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
1143{
1144
1145	if (MB_CUR_MAX == 1)
1146		warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr);
1147	else
1148		warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr);
1149}
1150