1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32
33#include <ctype.h>
34#include <errno.h>
35#include <err.h>
36#include <langinfo.h>
37#include <math.h>
38#include <stdlib.h>
39#include <string.h>
40#include <wchar.h>
41#include <wctype.h>
42
43#include "bwstring.h"
44#include "sort.h"
45
46bool byte_sort;
47
48static wchar_t **wmonths;
49static unsigned char **cmonths;
50
51/* initialise months */
52
53void
54initialise_months(void)
55{
56	const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
57	    ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
58	    ABMON_11, ABMON_12 };
59	unsigned char *tmp;
60	size_t len;
61
62	if (MB_CUR_MAX == 1) {
63		if (cmonths == NULL) {
64			unsigned char *m;
65
66			cmonths = sort_malloc(sizeof(unsigned char*) * 12);
67			for (int i = 0; i < 12; i++) {
68				cmonths[i] = NULL;
69				tmp = (unsigned char *) nl_langinfo(item[i]);
70				if (debug_sort)
71					printf("month[%d]=%s\n", i, tmp);
72				if (*tmp == '\0')
73					continue;
74				m = sort_strdup(tmp);
75				len = strlen(tmp);
76				for (unsigned int j = 0; j < len; j++)
77					m[j] = toupper(m[j]);
78				cmonths[i] = m;
79			}
80		}
81
82	} else {
83		if (wmonths == NULL) {
84			wchar_t *m;
85
86			wmonths = sort_malloc(sizeof(wchar_t *) * 12);
87			for (int i = 0; i < 12; i++) {
88				wmonths[i] = NULL;
89				tmp = (unsigned char *) nl_langinfo(item[i]);
90				if (debug_sort)
91					printf("month[%d]=%s\n", i, tmp);
92				if (*tmp == '\0')
93					continue;
94				len = strlen(tmp);
95				m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1));
96				if (mbstowcs(m, (char*)tmp, len) ==
97				    ((size_t) - 1)) {
98					sort_free(m);
99					continue;
100				}
101				m[len] = L'\0';
102				for (unsigned int j = 0; j < len; j++)
103					m[j] = towupper(m[j]);
104				wmonths[i] = m;
105			}
106		}
107	}
108}
109
110/*
111 * Compare two wide-character strings
112 */
113static int
114wide_str_coll(const wchar_t *s1, const wchar_t *s2)
115{
116	int ret = 0;
117
118	errno = 0;
119	ret = wcscoll(s1, s2);
120	if (errno == EILSEQ) {
121		errno = 0;
122		ret = wcscmp(s1, s2);
123		if (errno != 0) {
124			for (size_t i = 0; ; ++i) {
125				wchar_t c1 = s1[i];
126				wchar_t c2 = s2[i];
127				if (c1 == L'\0')
128					return ((c2 == L'\0') ? 0 : -1);
129				if (c2 == L'\0')
130					return (+1);
131				if (c1 == c2)
132					continue;
133				return ((int)(c1 - c2));
134			}
135		}
136	}
137	return (ret);
138}
139
140/* counterparts of wcs functions */
141
142void
143bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
144{
145
146	if (MB_CUR_MAX == 1)
147		fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix);
148	else
149		fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix);
150}
151
152const void* bwsrawdata(const struct bwstring *bws)
153{
154
155	return (&(bws->data));
156}
157
158size_t bwsrawlen(const struct bwstring *bws)
159{
160
161	return ((MB_CUR_MAX == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len));
162}
163
164size_t
165bws_memsize(const struct bwstring *bws)
166{
167
168	return ((MB_CUR_MAX == 1) ? (bws->len + 2 + sizeof(struct bwstring)) :
169	    (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring)));
170}
171
172void
173bws_setlen(struct bwstring *bws, size_t newlen)
174{
175
176	if (bws && newlen != bws->len && newlen <= bws->len) {
177		bws->len = newlen;
178		if (MB_CUR_MAX == 1)
179			bws->data.cstr[newlen] = '\0';
180		else
181			bws->data.wstr[newlen] = L'\0';
182	}
183}
184
185/*
186 * Allocate a new binary string of specified size
187 */
188struct bwstring *
189bwsalloc(size_t sz)
190{
191	struct bwstring *ret;
192
193	if (MB_CUR_MAX == 1)
194		ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
195	else
196		ret = sort_malloc(sizeof(struct bwstring) +
197		    SIZEOF_WCHAR_STRING(sz + 1));
198	ret->len = sz;
199
200	if (MB_CUR_MAX == 1)
201		ret->data.cstr[ret->len] = '\0';
202	else
203		ret->data.wstr[ret->len] = L'\0';
204
205	return (ret);
206}
207
208/*
209 * Create a copy of binary string.
210 * New string size equals the length of the old string.
211 */
212struct bwstring *
213bwsdup(const struct bwstring *s)
214{
215
216	if (s == NULL)
217		return (NULL);
218	else {
219		struct bwstring *ret = bwsalloc(s->len);
220
221		if (MB_CUR_MAX == 1)
222			memcpy(ret->data.cstr, s->data.cstr, (s->len));
223		else
224			memcpy(ret->data.wstr, s->data.wstr,
225			    SIZEOF_WCHAR_STRING(s->len));
226
227		return (ret);
228	}
229}
230
231/*
232 * Create a new binary string from a wide character buffer.
233 */
234struct bwstring *
235bwssbdup(const wchar_t *str, size_t len)
236{
237
238	if (str == NULL)
239		return ((len == 0) ? bwsalloc(0) : NULL);
240	else {
241		struct bwstring *ret;
242
243		ret = bwsalloc(len);
244
245		if (MB_CUR_MAX == 1)
246			for (size_t i = 0; i < len; ++i)
247				ret->data.cstr[i] = (unsigned char) str[i];
248		else
249			memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len));
250
251		return (ret);
252	}
253}
254
255/*
256 * Create a new binary string from a raw binary buffer.
257 */
258struct bwstring *
259bwscsbdup(const unsigned char *str, size_t len)
260{
261	struct bwstring *ret;
262
263	ret = bwsalloc(len);
264
265	if (str) {
266		if (MB_CUR_MAX == 1)
267			memcpy(ret->data.cstr, str, len);
268		else {
269			mbstate_t mbs;
270			const char *s;
271			size_t charlen, chars, cptr;
272
273			chars = 0;
274			cptr = 0;
275			s = (const char *) str;
276
277			memset(&mbs, 0, sizeof(mbs));
278
279			while (cptr < len) {
280				size_t n = MB_CUR_MAX;
281
282				if (n > len - cptr)
283					n = len - cptr;
284				charlen = mbrlen(s + cptr, n, &mbs);
285				switch (charlen) {
286				case 0:
287					/* FALLTHROUGH */
288				case (size_t) -1:
289					/* FALLTHROUGH */
290				case (size_t) -2:
291					ret->data.wstr[chars++] =
292					    (unsigned char) s[cptr];
293					++cptr;
294					break;
295				default:
296					n = mbrtowc(ret->data.wstr + (chars++),
297					    s + cptr, charlen, &mbs);
298					if ((n == (size_t)-1) || (n == (size_t)-2))
299						/* NOTREACHED */
300						err(2, "mbrtowc error");
301					cptr += charlen;
302				}
303			}
304
305			ret->len = chars;
306			ret->data.wstr[ret->len] = L'\0';
307		}
308	}
309	return (ret);
310}
311
312/*
313 * De-allocate object memory
314 */
315void
316bwsfree(const struct bwstring *s)
317{
318
319	if (s)
320		sort_free(s);
321}
322
323/*
324 * Copy content of src binary string to dst.
325 * If the capacity of the dst string is not sufficient,
326 * then the data is truncated.
327 */
328size_t
329bwscpy(struct bwstring *dst, const struct bwstring *src)
330{
331	size_t nums = src->len;
332
333	if (nums > dst->len)
334		nums = dst->len;
335	dst->len = nums;
336
337	if (MB_CUR_MAX == 1) {
338		memcpy(dst->data.cstr, src->data.cstr, nums);
339		dst->data.cstr[dst->len] = '\0';
340	} else {
341		memcpy(dst->data.wstr, src->data.wstr,
342		    SIZEOF_WCHAR_STRING(nums + 1));
343		dst->data.wstr[dst->len] = L'\0';
344	}
345
346	return (nums);
347}
348
349/*
350 * Copy content of src binary string to dst,
351 * with specified number of symbols to be copied.
352 * If the capacity of the dst string is not sufficient,
353 * then the data is truncated.
354 */
355struct bwstring *
356bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size)
357{
358	size_t nums = src->len;
359
360	if (nums > dst->len)
361		nums = dst->len;
362	if (nums > size)
363		nums = size;
364	dst->len = nums;
365
366	if (MB_CUR_MAX == 1) {
367		memcpy(dst->data.cstr, src->data.cstr, nums);
368		dst->data.cstr[dst->len] = '\0';
369	} else {
370		memcpy(dst->data.wstr, src->data.wstr,
371		    SIZEOF_WCHAR_STRING(nums + 1));
372		dst->data.wstr[dst->len] = L'\0';
373	}
374
375	return (dst);
376}
377
378/*
379 * Copy content of src binary string to dst,
380 * with specified number of symbols to be copied.
381 * An offset value can be specified, from the start of src string.
382 * If the capacity of the dst string is not sufficient,
383 * then the data is truncated.
384 */
385struct bwstring *
386bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
387    size_t size)
388{
389
390	if (offset >= src->len) {
391		dst->data.wstr[0] = 0;
392		dst->len = 0;
393	} else {
394		size_t nums = src->len - offset;
395
396		if (nums > dst->len)
397			nums = dst->len;
398		if (nums > size)
399			nums = size;
400		dst->len = nums;
401		if (MB_CUR_MAX == 1) {
402			memcpy(dst->data.cstr, src->data.cstr + offset,
403			    (nums));
404			dst->data.cstr[dst->len] = '\0';
405		} else {
406			memcpy(dst->data.wstr, src->data.wstr + offset,
407			    SIZEOF_WCHAR_STRING(nums));
408			dst->data.wstr[dst->len] = L'\0';
409		}
410	}
411	return (dst);
412}
413
414/*
415 * Write binary string to the file.
416 * The output is ended either with '\n' (nl == true)
417 * or '\0' (nl == false).
418 */
419size_t
420bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
421{
422
423	if (MB_CUR_MAX == 1) {
424		size_t len = bws->len;
425
426		if (!zero_ended) {
427			bws->data.cstr[len] = '\n';
428
429			if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
430				err(2, NULL);
431
432			bws->data.cstr[len] = '\0';
433		} else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
434			err(2, NULL);
435
436		return (len + 1);
437
438	} else {
439		wchar_t eols;
440		size_t printed = 0;
441
442		eols = zero_ended ? btowc('\0') : btowc('\n');
443
444		while (printed < BWSLEN(bws)) {
445			const wchar_t *s = bws->data.wstr + printed;
446
447			if (*s == L'\0') {
448				int nums;
449
450				nums = fwprintf(f, L"%lc", *s);
451
452				if (nums != 1)
453					err(2, NULL);
454				++printed;
455			} else {
456				int nums;
457
458				nums = fwprintf(f, L"%ls", s);
459
460				if (nums < 1)
461					err(2, NULL);
462				printed += nums;
463			}
464		}
465		fwprintf(f, L"%lc", eols);
466		return (printed + 1);
467	}
468}
469
470/*
471 * Allocate and read a binary string from file.
472 * The strings are nl-ended or zero-ended, depending on the sort setting.
473 */
474struct bwstring *
475bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
476{
477	wint_t eols;
478
479	eols = zero_ended ? btowc('\0') : btowc('\n');
480
481	if (!zero_ended && (MB_CUR_MAX > 1)) {
482		wchar_t *ret;
483
484		ret = fgetwln(f, len);
485
486		if (ret == NULL) {
487			if (!feof(f))
488				err(2, NULL);
489			return (NULL);
490		}
491		if (*len > 0) {
492			if (ret[*len - 1] == (wchar_t)eols)
493				--(*len);
494		}
495		return (bwssbdup(ret, *len));
496
497	} else if (!zero_ended && (MB_CUR_MAX == 1)) {
498		char *ret;
499
500		ret = fgetln(f, len);
501
502		if (ret == NULL) {
503			if (!feof(f))
504				err(2, NULL);
505			return (NULL);
506		}
507		if (*len > 0) {
508			if (ret[*len - 1] == '\n')
509				--(*len);
510		}
511		return (bwscsbdup((unsigned char*)ret, *len));
512
513	} else {
514		*len = 0;
515
516		if (feof(f))
517			return (NULL);
518
519		if (2 >= rb->fgetwln_z_buffer_size) {
520			rb->fgetwln_z_buffer_size += 256;
521			rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
522			    sizeof(wchar_t) * rb->fgetwln_z_buffer_size);
523		}
524		rb->fgetwln_z_buffer[*len] = 0;
525
526		if (MB_CUR_MAX == 1)
527			while (!feof(f)) {
528				int c;
529
530				c = fgetc(f);
531
532				if (c == EOF) {
533					if (*len == 0)
534						return (NULL);
535					goto line_read_done;
536				}
537				if (c == eols)
538					goto line_read_done;
539
540				if (*len + 1 >= rb->fgetwln_z_buffer_size) {
541					rb->fgetwln_z_buffer_size += 256;
542					rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
543					    SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
544				}
545
546				rb->fgetwln_z_buffer[*len] = c;
547				rb->fgetwln_z_buffer[++(*len)] = 0;
548			}
549		else
550			while (!feof(f)) {
551				wint_t c = 0;
552
553				c = fgetwc(f);
554
555				if (c == WEOF) {
556					if (*len == 0)
557						return (NULL);
558					goto line_read_done;
559				}
560				if (c == eols)
561					goto line_read_done;
562
563				if (*len + 1 >= rb->fgetwln_z_buffer_size) {
564					rb->fgetwln_z_buffer_size += 256;
565					rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
566					    SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
567				}
568
569				rb->fgetwln_z_buffer[*len] = c;
570				rb->fgetwln_z_buffer[++(*len)] = 0;
571			}
572
573line_read_done:
574		/* we do not count the last 0 */
575		return (bwssbdup(rb->fgetwln_z_buffer, *len));
576	}
577}
578
579int
580bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
581    size_t offset, size_t len)
582{
583	size_t cmp_len, len1, len2;
584	int res = 0;
585
586	len1 = bws1->len;
587	len2 = bws2->len;
588
589	if (len1 <= offset) {
590		return ((len2 <= offset) ? 0 : -1);
591	} else {
592		if (len2 <= offset)
593			return (+1);
594		else {
595			len1 -= offset;
596			len2 -= offset;
597
598			cmp_len = len1;
599
600			if (len2 < cmp_len)
601				cmp_len = len2;
602
603			if (len < cmp_len)
604				cmp_len = len;
605
606			if (MB_CUR_MAX == 1) {
607				const unsigned char *s1, *s2;
608
609				s1 = bws1->data.cstr + offset;
610				s2 = bws2->data.cstr + offset;
611
612				res = memcmp(s1, s2, cmp_len);
613
614			} else {
615				const wchar_t *s1, *s2;
616
617				s1 = bws1->data.wstr + offset;
618				s2 = bws2->data.wstr + offset;
619
620				res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
621			}
622		}
623	}
624
625	if (res == 0) {
626		if (len1 < cmp_len && len1 < len2)
627			res = -1;
628		else if (len2 < cmp_len && len2 < len1)
629			res = +1;
630	}
631
632	return (res);
633}
634
635int
636bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
637{
638	size_t len1, len2, cmp_len;
639	int res;
640
641	len1 = bws1->len;
642	len2 = bws2->len;
643
644	len1 -= offset;
645	len2 -= offset;
646
647	cmp_len = len1;
648
649	if (len2 < cmp_len)
650		cmp_len = len2;
651
652	res = bwsncmp(bws1, bws2, offset, cmp_len);
653
654	if (res == 0) {
655		if( len1 < len2)
656			res = -1;
657		else if (len2 < len1)
658			res = +1;
659	}
660
661	return (res);
662}
663
664int
665bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
666{
667	wchar_t c1, c2;
668	size_t i = 0;
669
670	for (i = 0; i < len; ++i) {
671		c1 = bws_get_iter_value(iter1);
672		c2 = bws_get_iter_value(iter2);
673		if (c1 != c2)
674			return (c1 - c2);
675		iter1 = bws_iterator_inc(iter1, 1);
676		iter2 = bws_iterator_inc(iter2, 1);
677	}
678
679	return (0);
680}
681
682int
683bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
684{
685	size_t len1, len2;
686
687	len1 = bws1->len;
688	len2 = bws2->len;
689
690	if (len1 <= offset)
691		return ((len2 <= offset) ? 0 : -1);
692	else {
693		if (len2 <= offset)
694			return (+1);
695		else {
696			len1 -= offset;
697			len2 -= offset;
698
699			if (MB_CUR_MAX == 1) {
700				const unsigned char *s1, *s2;
701
702				s1 = bws1->data.cstr + offset;
703				s2 = bws2->data.cstr + offset;
704
705				if (byte_sort) {
706					int res = 0;
707
708					if (len1 > len2) {
709						res = memcmp(s1, s2, len2);
710						if (!res)
711							res = +1;
712					} else if (len1 < len2) {
713						res = memcmp(s1, s2, len1);
714						if (!res)
715							res = -1;
716					} else
717						res = memcmp(s1, s2, len1);
718
719					return (res);
720
721				} else {
722					int res = 0;
723					size_t i, maxlen;
724
725					i = 0;
726					maxlen = len1;
727
728					if (maxlen > len2)
729						maxlen = len2;
730
731					while (i < maxlen) {
732						/* goto next non-zero part: */
733						while ((i < maxlen) &&
734						    !s1[i] && !s2[i])
735							++i;
736
737						if (i >= maxlen)
738							break;
739
740						if (s1[i] == 0) {
741							if (s2[i] == 0)
742								/* NOTREACHED */
743								err(2, "bwscoll error 01");
744							else
745								return (-1);
746						} else if (s2[i] == 0)
747							return (+1);
748
749						res = strcoll((const char*)(s1 + i), (const char*)(s2 + i));
750						if (res)
751							return (res);
752
753						while ((i < maxlen) &&
754						    s1[i] && s2[i])
755							++i;
756
757						if (i >= maxlen)
758							break;
759
760						if (s1[i] == 0) {
761							if (s2[i] == 0) {
762								++i;
763								continue;
764							} else
765								return (-1);
766						} else if (s2[i] == 0)
767							return (+1);
768						else
769							/* NOTREACHED */
770							err(2, "bwscoll error 02");
771					}
772
773					if (len1 < len2)
774						return (-1);
775					else if (len1 > len2)
776						return (+1);
777
778					return (0);
779				}
780			} else {
781				const wchar_t *s1, *s2;
782				size_t i, maxlen;
783				int res = 0;
784
785				s1 = bws1->data.wstr + offset;
786				s2 = bws2->data.wstr + offset;
787
788				i = 0;
789				maxlen = len1;
790
791				if (maxlen > len2)
792					maxlen = len2;
793
794				while (i < maxlen) {
795
796					/* goto next non-zero part: */
797					while ((i < maxlen) &&
798					    !s1[i] && !s2[i])
799						++i;
800
801					if (i >= maxlen)
802						break;
803
804					if (s1[i] == 0) {
805						if (s2[i] == 0)
806							/* NOTREACHED */
807							err(2, "bwscoll error 1");
808						else
809							return (-1);
810					} else if (s2[i] == 0)
811						return (+1);
812
813					res = wide_str_coll(s1 + i, s2 + i);
814					if (res)
815						return (res);
816
817					while ((i < maxlen) && s1[i] && s2[i])
818						++i;
819
820					if (i >= maxlen)
821						break;
822
823					if (s1[i] == 0) {
824						if (s2[i] == 0) {
825							++i;
826							continue;
827						} else
828							return (-1);
829					} else if (s2[i] == 0)
830						return (+1);
831					else
832						/* NOTREACHED */
833						err(2, "bwscoll error 2");
834				}
835
836				if (len1 < len2)
837					return (-1);
838				else if (len1 > len2)
839					return (+1);
840
841				return (0);
842			}
843		}
844	}
845}
846
847/*
848 * Correction of the system API
849 */
850double
851bwstod(struct bwstring *s0, bool *empty)
852{
853	double ret = 0;
854
855	if (MB_CUR_MAX == 1) {
856		unsigned char *end, *s;
857		char *ep;
858
859		s = s0->data.cstr;
860		end = s + s0->len;
861		ep = NULL;
862
863		while (isblank(*s) && s < end)
864			++s;
865
866		if (!isprint(*s)) {
867			*empty = true;
868			return (0);
869		}
870
871		ret = strtod((char*)s, &ep);
872		if ((unsigned char*) ep == s) {
873			*empty = true;
874			return (0);
875		}
876	} else {
877		wchar_t *end, *ep, *s;
878
879		s = s0->data.wstr;
880		end = s + s0->len;
881		ep = NULL;
882
883		while (iswblank(*s) && s < end)
884			++s;
885
886		if (!iswprint(*s)) {
887			*empty = true;
888			return (0);
889		}
890
891		ret = wcstod(s, &ep);
892		if (ep == s) {
893			*empty = true;
894			return (0);
895		}
896	}
897
898	*empty = false;
899	return (ret);
900}
901
902/*
903 * A helper function for monthcoll.  If a line matches
904 * a month name, it returns (number of the month - 1),
905 * while if there is no match, it just return -1.
906 */
907
908int
909bws_month_score(const struct bwstring *s0)
910{
911
912	if (MB_CUR_MAX == 1) {
913		const unsigned char *end, *s;
914
915		s = s0->data.cstr;
916		end = s + s0->len;
917
918		while (isblank(*s) && s < end)
919			++s;
920
921		for (int i = 11; i >= 0; --i) {
922			if (cmonths[i] &&
923			    (s == (unsigned char*)strstr((const char*)s, (char*)(cmonths[i]))))
924				return (i);
925		}
926
927	} else {
928		const wchar_t *end, *s;
929
930		s = s0->data.wstr;
931		end = s + s0->len;
932
933		while (iswblank(*s) && s < end)
934			++s;
935
936		for (int i = 11; i >= 0; --i) {
937			if (wmonths[i] && (s == wcsstr(s, wmonths[i])))
938				return (i);
939		}
940	}
941
942	return (-1);
943}
944
945/*
946 * Rips out leading blanks (-b).
947 */
948struct bwstring *
949ignore_leading_blanks(struct bwstring *str)
950{
951
952	if (MB_CUR_MAX == 1) {
953		unsigned char *dst, *end, *src;
954
955		src = str->data.cstr;
956		dst = src;
957		end = src + str->len;
958
959		while (src < end && isblank(*src))
960			++src;
961
962		if (src != dst) {
963			size_t newlen;
964
965			newlen = BWSLEN(str) - (src - dst);
966
967			while (src < end) {
968				*dst = *src;
969				++dst;
970				++src;
971			}
972			bws_setlen(str, newlen);
973		}
974	} else {
975		wchar_t *dst, *end, *src;
976
977		src = str->data.wstr;
978		dst = src;
979		end = src + str->len;
980
981		while (src < end && iswblank(*src))
982			++src;
983
984		if (src != dst) {
985
986			size_t newlen = BWSLEN(str) - (src - dst);
987
988			while (src < end) {
989				*dst = *src;
990				++dst;
991				++src;
992			}
993			bws_setlen(str, newlen);
994
995		}
996	}
997	return (str);
998}
999
1000/*
1001 * Rips out nonprinting characters (-i).
1002 */
1003struct bwstring *
1004ignore_nonprinting(struct bwstring *str)
1005{
1006	size_t newlen = str->len;
1007
1008	if (MB_CUR_MAX == 1) {
1009		unsigned char *dst, *end, *src;
1010		unsigned char c;
1011
1012		src = str->data.cstr;
1013		dst = src;
1014		end = src + str->len;
1015
1016		while (src < end) {
1017			c = *src;
1018			if (isprint(c)) {
1019				*dst = c;
1020				++dst;
1021				++src;
1022			} else {
1023				++src;
1024				--newlen;
1025			}
1026		}
1027	} else {
1028		wchar_t *dst, *end, *src;
1029		wchar_t c;
1030
1031		src = str->data.wstr;
1032		dst = src;
1033		end = src + str->len;
1034
1035		while (src < end) {
1036			c = *src;
1037			if (iswprint(c)) {
1038				*dst = c;
1039				++dst;
1040				++src;
1041			} else {
1042				++src;
1043				--newlen;
1044			}
1045		}
1046	}
1047	bws_setlen(str, newlen);
1048
1049	return (str);
1050}
1051
1052/*
1053 * Rips out any characters that are not alphanumeric characters
1054 * nor blanks (-d).
1055 */
1056struct bwstring *
1057dictionary_order(struct bwstring *str)
1058{
1059	size_t newlen = str->len;
1060
1061	if (MB_CUR_MAX == 1) {
1062		unsigned char *dst, *end, *src;
1063		unsigned char c;
1064
1065		src = str->data.cstr;
1066		dst = src;
1067		end = src + str->len;
1068
1069		while (src < end) {
1070			c = *src;
1071			if (isalnum(c) || isblank(c)) {
1072				*dst = c;
1073				++dst;
1074				++src;
1075			} else {
1076				++src;
1077				--newlen;
1078			}
1079		}
1080	} else {
1081		wchar_t *dst, *end, *src;
1082		wchar_t c;
1083
1084		src = str->data.wstr;
1085		dst = src;
1086		end = src + str->len;
1087
1088		while (src < end) {
1089			c = *src;
1090			if (iswalnum(c) || iswblank(c)) {
1091				*dst = c;
1092				++dst;
1093				++src;
1094			} else {
1095				++src;
1096				--newlen;
1097			}
1098		}
1099	}
1100	bws_setlen(str, newlen);
1101
1102	return (str);
1103}
1104
1105/*
1106 * Converts string to lower case(-f).
1107 */
1108struct bwstring *
1109ignore_case(struct bwstring *str)
1110{
1111
1112	if (MB_CUR_MAX == 1) {
1113		unsigned char *end, *s;
1114
1115		s = str->data.cstr;
1116		end = s + str->len;
1117
1118		while (s < end) {
1119			*s = toupper(*s);
1120			++s;
1121		}
1122	} else {
1123		wchar_t *end, *s;
1124
1125		s = str->data.wstr;
1126		end = s + str->len;
1127
1128		while (s < end) {
1129			*s = towupper(*s);
1130			++s;
1131		}
1132	}
1133	return (str);
1134}
1135
1136void
1137bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
1138{
1139
1140	if (MB_CUR_MAX == 1)
1141		warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr);
1142	else
1143		warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr);
1144}
1145