1/*	$OpenBSD: bwstring.c,v 1.9 2019/05/15 09:33:34 schwarze Exp $	*/
2
3/*-
4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <ctype.h>
31#include <errno.h>
32#include <err.h>
33#include <langinfo.h>
34#include <math.h>
35#include <stdlib.h>
36#include <string.h>
37#include <wchar.h>
38#include <wctype.h>
39
40#include "bwstring.h"
41#include "sort.h"
42
43static wchar_t **wmonths;
44static char **cmonths;
45
46/* initialise months */
47
48void
49initialise_months(void)
50{
51	const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
52	    ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
53	    ABMON_11, ABMON_12 };
54	char *tmp;
55	size_t len;
56
57	if (sort_mb_cur_max == 1) {
58		if (cmonths == NULL) {
59			char *m;
60			unsigned int j;
61			int i;
62
63			cmonths = sort_malloc(sizeof(char *) * 12);
64			for (i = 0; i < 12; i++) {
65				cmonths[i] = NULL;
66				tmp = nl_langinfo(item[i]);
67				if (debug_sort)
68					printf("month[%d]=%s\n", i, tmp);
69				if (*tmp == '\0')
70					continue;
71				m = sort_strdup(tmp);
72				len = strlen(tmp);
73				for (j = 0; j < len; j++)
74					m[j] = toupper(m[j]);
75				cmonths[i] = m;
76			}
77		}
78	} else {
79		if (wmonths == NULL) {
80			unsigned int j;
81			wchar_t *m;
82			int i;
83
84			wmonths = sort_malloc(sizeof(wchar_t *) * 12);
85			for (i = 0; i < 12; i++) {
86				wmonths[i] = NULL;
87				tmp = nl_langinfo(item[i]);
88				if (debug_sort)
89					printf("month[%d]=%s\n", i, tmp);
90				if (*tmp == '\0')
91					continue;
92				len = strlen(tmp);
93				m = sort_reallocarray(NULL, len + 1,
94				    sizeof(wchar_t));
95				if (mbstowcs(m, tmp, len) == (size_t)-1) {
96					sort_free(m);
97					continue;
98				}
99				m[len] = L'\0';
100				for (j = 0; j < len; j++)
101					m[j] = towupper(m[j]);
102				wmonths[i] = m;
103			}
104		}
105	}
106}
107
108/*
109 * Compare two wide-character strings
110 */
111static int
112wide_str_coll(const wchar_t *s1, const wchar_t *s2)
113{
114	int ret = 0;
115
116	errno = 0;
117	ret = wcscoll(s1, s2);
118	if (errno == EILSEQ) {
119		errno = 0;
120		ret = wcscmp(s1, s2);
121		if (errno != 0) {
122			size_t i;
123			for (i = 0; ; ++i) {
124				wchar_t c1 = s1[i];
125				wchar_t c2 = s2[i];
126				if (c1 == L'\0')
127					return (c2 == L'\0') ? 0 : -1;
128				if (c2 == L'\0')
129					return 1;
130				if (c1 == c2)
131					continue;
132				return (int)c1 - (int)c2;
133			}
134		}
135	}
136	return ret;
137}
138
139/* counterparts of wcs functions */
140
141void
142bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
143{
144	if (sort_mb_cur_max == 1)
145		fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix);
146	else
147		fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix);
148}
149
150const void *
151bwsrawdata(const struct bwstring *bws)
152{
153	return &(bws->data);
154}
155
156size_t
157bwsrawlen(const struct bwstring *bws)
158{
159	return (sort_mb_cur_max == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len);
160}
161
162size_t
163bws_memsize(const struct bwstring *bws)
164{
165	return (sort_mb_cur_max == 1) ? (bws->len + 2 + sizeof(struct bwstring)) :
166	    (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring));
167}
168
169void
170bws_setlen(struct bwstring *bws, size_t newlen)
171{
172	if (bws && newlen != bws->len && newlen <= bws->len) {
173		bws->len = newlen;
174		if (sort_mb_cur_max == 1)
175			bws->data.cstr[newlen] = '\0';
176		else
177			bws->data.wstr[newlen] = L'\0';
178	}
179}
180
181/*
182 * Allocate a new binary string of specified size
183 */
184struct bwstring *
185bwsalloc(size_t sz)
186{
187	struct bwstring *ret;
188
189	if (sort_mb_cur_max == 1) {
190		ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
191		ret->data.cstr[sz] = '\0';
192	} else {
193		ret = sort_malloc(sizeof(struct bwstring) +
194		    SIZEOF_WCHAR_STRING(sz + 1));
195		ret->data.wstr[sz] = L'\0';
196	}
197	ret->len = sz;
198
199	return ret;
200}
201
202/*
203 * Create a copy of binary string.
204 * New string size equals the length of the old string.
205 */
206struct bwstring *
207bwsdup(const struct bwstring *s)
208{
209	struct bwstring *ret;
210
211	if (s == NULL)
212		return NULL;
213
214	ret = bwsalloc(s->len);
215
216	if (sort_mb_cur_max == 1)
217		memcpy(ret->data.cstr, s->data.cstr, s->len);
218	else
219		memcpy(ret->data.wstr, s->data.wstr,
220		    SIZEOF_WCHAR_STRING(s->len));
221
222	return ret;
223}
224
225/*
226 * Create a new binary string from a wide character buffer.
227 */
228struct bwstring *
229bwssbdup(const wchar_t *str, size_t len)
230{
231	if (str == NULL)
232		return (len == 0) ? bwsalloc(0) : NULL;
233	else {
234		struct bwstring *ret;
235		size_t i;
236
237		ret = bwsalloc(len);
238
239		if (sort_mb_cur_max == 1)
240			for (i = 0; i < len; ++i)
241				ret->data.cstr[i] = (unsigned char) str[i];
242		else
243			memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len));
244
245		return ret;
246	}
247}
248
249/*
250 * Create a new binary string from a raw binary buffer.
251 */
252struct bwstring *
253bwscsbdup(const unsigned char *str, size_t len)
254{
255	struct bwstring *ret;
256
257	ret = bwsalloc(len);
258
259	if (str) {
260		if (sort_mb_cur_max == 1)
261			memcpy(ret->data.cstr, str, len);
262		else {
263			mbstate_t mbs;
264			const char *s;
265			size_t charlen, chars, cptr;
266
267			chars = 0;
268			cptr = 0;
269			s = (const char *) str;
270
271			memset(&mbs, 0, sizeof(mbs));
272
273			while (cptr < len) {
274				size_t n = sort_mb_cur_max;
275
276				if (n > len - cptr)
277					n = len - cptr;
278				charlen = mbrlen(s + cptr, n, &mbs);
279				switch (charlen) {
280				case 0:
281					/* FALLTHROUGH */
282				case (size_t) -1:
283					/* FALLTHROUGH */
284				case (size_t) -2:
285					ret->data.wstr[chars++] =
286					    (unsigned char) s[cptr];
287					++cptr;
288					break;
289				default:
290					n = mbrtowc(ret->data.wstr + (chars++),
291					    s + cptr, charlen, &mbs);
292					if ((n == (size_t)-1) || (n == (size_t)-2))
293						/* NOTREACHED */
294						err(2, "mbrtowc error");
295					cptr += charlen;
296				};
297			}
298
299			ret->len = chars;
300			ret->data.wstr[ret->len] = L'\0';
301		}
302	}
303	return ret;
304}
305
306/*
307 * De-allocate object memory
308 */
309void
310bwsfree(struct bwstring *s)
311{
312	sort_free(s);
313}
314
315/*
316 * Copy content of src binary string to dst.
317 * If the capacity of the dst string is not sufficient,
318 * then the data is truncated.
319 */
320size_t
321bwscpy(struct bwstring *dst, const struct bwstring *src)
322{
323	size_t nums = src->len;
324
325	if (nums > dst->len)
326		nums = dst->len;
327	dst->len = nums;
328
329	if (sort_mb_cur_max == 1) {
330		memcpy(dst->data.cstr, src->data.cstr, nums);
331		dst->data.cstr[dst->len] = '\0';
332	} else {
333		memcpy(dst->data.wstr, src->data.wstr,
334		    SIZEOF_WCHAR_STRING(nums + 1));
335		dst->data.wstr[dst->len] = L'\0';
336	}
337
338	return nums;
339}
340
341/*
342 * Copy content of src binary string to dst,
343 * with specified number of symbols to be copied.
344 * If the capacity of the dst string is not sufficient,
345 * then the data is truncated.
346 */
347struct bwstring *
348bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size)
349{
350	size_t nums = src->len;
351
352	if (nums > dst->len)
353		nums = dst->len;
354	if (nums > size)
355		nums = size;
356	dst->len = nums;
357
358	if (sort_mb_cur_max == 1) {
359		memcpy(dst->data.cstr, src->data.cstr, nums);
360		dst->data.cstr[dst->len] = '\0';
361	} else {
362		memcpy(dst->data.wstr, src->data.wstr,
363		    SIZEOF_WCHAR_STRING(nums + 1));
364		dst->data.wstr[dst->len] = L'\0';
365	}
366
367	return dst;
368}
369
370/*
371 * Copy content of src binary string to dst,
372 * with specified number of symbols to be copied.
373 * An offset value can be specified, from the start of src string.
374 * If the capacity of the dst string is not sufficient,
375 * then the data is truncated.
376 */
377struct bwstring *
378bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
379    size_t size)
380{
381	if (offset >= src->len) {
382		dst->data.wstr[0] = 0;
383		dst->len = 0;
384	} else {
385		size_t nums = src->len - offset;
386
387		if (nums > dst->len)
388			nums = dst->len;
389		if (nums > size)
390			nums = size;
391		dst->len = nums;
392		if (sort_mb_cur_max == 1) {
393			memcpy(dst->data.cstr, src->data.cstr + offset,
394			    (nums));
395			dst->data.cstr[dst->len] = '\0';
396		} else {
397			memcpy(dst->data.wstr, src->data.wstr + offset,
398			    SIZEOF_WCHAR_STRING(nums));
399			dst->data.wstr[dst->len] = L'\0';
400		}
401	}
402	return dst;
403}
404
405/*
406 * Write binary string to the file.
407 * The output is ended either with '\n' (nl == true)
408 * or '\0' (nl == false).
409 */
410size_t
411bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
412{
413	if (sort_mb_cur_max == 1) {
414		size_t len = bws->len;
415
416		if (!zero_ended) {
417			bws->data.cstr[len] = '\n';
418
419			if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
420				err(2, NULL);
421
422			bws->data.cstr[len] = '\0';
423		} else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
424			err(2, NULL);
425
426		return len + 1;
427
428	} else {
429		wchar_t eols;
430		size_t printed = 0;
431
432		eols = zero_ended ? btowc('\0') : btowc('\n');
433
434		while (printed < BWSLEN(bws)) {
435			const wchar_t *s = bws->data.wstr + printed;
436
437			if (*s == L'\0') {
438				int nums;
439
440				nums = fwprintf(f, L"%lc", *s);
441
442				if (nums != 1)
443					err(2, NULL);
444				++printed;
445			} else {
446				int nums;
447
448				nums = fwprintf(f, L"%ls", s);
449
450				if (nums < 1)
451					err(2, NULL);
452				printed += nums;
453			}
454		}
455		fwprintf(f, L"%lc", eols);
456		return printed + 1;
457	}
458}
459
460/*
461 * Allocate and read a binary string from file.
462 * The strings are nl-ended or zero-ended, depending on the sort setting.
463 */
464struct bwstring *
465bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
466{
467	wint_t eols;
468
469	eols = zero_ended ? btowc('\0') : btowc('\n');
470
471	if (!zero_ended && (sort_mb_cur_max > 1)) {
472		wchar_t *ret;
473
474		ret = fgetwln(f, len);
475
476		if (ret == NULL) {
477			if (!feof(f))
478				err(2, NULL);
479			return NULL;
480		}
481		if (*len > 0) {
482			if (ret[*len - 1] == (wchar_t)eols)
483				--(*len);
484		}
485		return bwssbdup(ret, *len);
486
487	} else if (!zero_ended && (sort_mb_cur_max == 1)) {
488		char *ret;
489
490		ret = fgetln(f, len);
491
492		if (ret == NULL) {
493			if (!feof(f))
494				err(2, NULL);
495			return NULL;
496		}
497		if (*len > 0) {
498			if (ret[*len - 1] == '\n')
499				--(*len);
500		}
501		return bwscsbdup((unsigned char *)ret, *len);
502
503	} else {
504		*len = 0;
505
506		if (feof(f))
507			return NULL;
508
509		if (2 >= rb->fgetwln_z_buffer_size) {
510			rb->fgetwln_z_buffer_size += 256;
511			rb->fgetwln_z_buffer =
512			    sort_reallocarray(rb->fgetwln_z_buffer,
513			    rb->fgetwln_z_buffer_size, sizeof(wchar_t));
514		}
515		rb->fgetwln_z_buffer[*len] = 0;
516
517		if (sort_mb_cur_max == 1) {
518			while (!feof(f)) {
519				int c;
520
521				c = fgetc(f);
522
523				if (c == EOF) {
524					if (*len == 0)
525						return NULL;
526					goto line_read_done;
527				}
528				if (c == eols)
529					goto line_read_done;
530
531				if (*len + 1 >= rb->fgetwln_z_buffer_size) {
532					rb->fgetwln_z_buffer_size += 256;
533					rb->fgetwln_z_buffer =
534					    sort_reallocarray(rb->fgetwln_z_buffer,
535					    rb->fgetwln_z_buffer_size, sizeof(wchar_t));
536				}
537
538				rb->fgetwln_z_buffer[*len] = c;
539				rb->fgetwln_z_buffer[++(*len)] = 0;
540			}
541		} else {
542			while (!feof(f)) {
543				wint_t c = 0;
544
545				c = fgetwc(f);
546
547				if (c == WEOF) {
548					if (*len == 0)
549						return NULL;
550					goto line_read_done;
551				}
552				if (c == eols)
553					goto line_read_done;
554
555				if (*len + 1 >= rb->fgetwln_z_buffer_size) {
556					rb->fgetwln_z_buffer_size += 256;
557					rb->fgetwln_z_buffer =
558					    sort_reallocarray(rb->fgetwln_z_buffer,
559					    rb->fgetwln_z_buffer_size, sizeof(wchar_t));
560				}
561
562				rb->fgetwln_z_buffer[*len] = c;
563				rb->fgetwln_z_buffer[++(*len)] = 0;
564			}
565		}
566
567line_read_done:
568		/* we do not count the last 0 */
569		return bwssbdup(rb->fgetwln_z_buffer, *len);
570	}
571}
572
573int
574bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
575    size_t offset, size_t len)
576{
577	size_t cmp_len, len1, len2;
578	int res = 0;
579
580	len1 = bws1->len;
581	len2 = bws2->len;
582
583	if (len1 <= offset) {
584		return (len2 <= offset) ? 0 : -1;
585	} else {
586		if (len2 <= offset)
587			return 1;
588		else {
589			len1 -= offset;
590			len2 -= offset;
591
592			cmp_len = len1;
593
594			if (len2 < cmp_len)
595				cmp_len = len2;
596
597			if (len < cmp_len)
598				cmp_len = len;
599
600			if (sort_mb_cur_max == 1) {
601				const unsigned char *s1, *s2;
602
603				s1 = bws1->data.cstr + offset;
604				s2 = bws2->data.cstr + offset;
605
606				res = memcmp(s1, s2, cmp_len);
607
608			} else {
609				const wchar_t *s1, *s2;
610
611				s1 = bws1->data.wstr + offset;
612				s2 = bws2->data.wstr + offset;
613
614				res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
615			}
616		}
617	}
618
619	if (res == 0) {
620		if (len1 < cmp_len && len1 < len2)
621			res = -1;
622		else if (len2 < cmp_len && len2 < len1)
623			res = +1;
624	}
625
626	return res;
627}
628
629int
630bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
631{
632	size_t len1, len2, cmp_len;
633	int res;
634
635	len1 = bws1->len;
636	len2 = bws2->len;
637
638	len1 -= offset;
639	len2 -= offset;
640
641	cmp_len = len1;
642
643	if (len2 < cmp_len)
644		cmp_len = len2;
645
646	res = bwsncmp(bws1, bws2, offset, cmp_len);
647
648	if (res == 0) {
649		if (len1 < len2)
650			res = -1;
651		else if (len2 < len1)
652			res = +1;
653	}
654
655	return res;
656}
657
658int
659bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
660{
661	wchar_t c1, c2;
662	size_t i = 0;
663
664	for (i = 0; i < len; ++i) {
665		c1 = bws_get_iter_value(iter1);
666		c2 = bws_get_iter_value(iter2);
667		if (c1 != c2)
668			return c1 - c2;
669		iter1 = bws_iterator_inc(iter1, 1);
670		iter2 = bws_iterator_inc(iter2, 1);
671	}
672
673	return 0;
674}
675
676int
677bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
678{
679	size_t len1, len2;
680
681	len1 = bws1->len;
682	len2 = bws2->len;
683
684	if (len1 <= offset)
685		return (len2 <= offset) ? 0 : -1;
686
687	if (len2 <= offset)
688		return 1;
689
690	len1 -= offset;
691	len2 -= offset;
692
693	if (sort_mb_cur_max == 1) {
694		const unsigned char *s1, *s2;
695		int res;
696
697		s1 = bws1->data.cstr + offset;
698		s2 = bws2->data.cstr + offset;
699
700		if (len1 > len2) {
701			res = memcmp(s1, s2, len2);
702			if (!res)
703				res = +1;
704		} else if (len1 < len2) {
705			res = memcmp(s1, s2, len1);
706			if (!res)
707				res = -1;
708		} else
709			res = memcmp(s1, s2, len1);
710
711		return res;
712	} else {
713		const wchar_t *s1, *s2;
714		size_t i, maxlen;
715		int res = 0;
716
717		s1 = bws1->data.wstr + offset;
718		s2 = bws2->data.wstr + offset;
719
720		i = 0;
721		maxlen = len1;
722
723		if (maxlen > len2)
724			maxlen = len2;
725
726		while (i < maxlen) {
727
728			/* goto next non-zero part: */
729			while (i < maxlen &&
730			    s1[i] == L'\0' && s2[i] == L'\0')
731				++i;
732
733			if (i >= maxlen)
734				break;
735
736			if (s1[i] == L'\0') {
737				if (s2[i] == L'\0')
738					/* NOTREACHED */
739					err(2, "bwscoll error 1");
740				else
741					return -1;
742			} else if (s2[i] == L'\0')
743				return 1;
744
745			res = wide_str_coll(s1 + i, s2 + i);
746			if (res)
747				return res;
748
749			while (i < maxlen && s1[i] != L'\0' && s2[i] != L'\0')
750				++i;
751
752			if (i >= maxlen)
753				break;
754
755			if (s1[i] == L'\0') {
756				if (s2[i] == L'\0') {
757					++i;
758					continue;
759				} else
760					return -1;
761			} else if (s2[i] == L'\0')
762				return 1;
763			else
764				/* NOTREACHED */
765				err(2, "bwscoll error 2");
766		}
767
768		if (len1 == len2)
769			return 0;
770		return len1 < len2 ? -1 : 1;
771	}
772}
773
774/*
775 * Correction of the system API
776 */
777double
778bwstod(struct bwstring *s0, bool *empty)
779{
780	double ret = 0;
781
782	if (sort_mb_cur_max == 1) {
783		char *ep, *end, *s;
784
785		s = (char *)s0->data.cstr;
786		end = s + s0->len;
787		ep = NULL;
788
789		while (isblank((unsigned char)*s) && s < end)
790			++s;
791
792		if (!isprint((unsigned char)*s)) {
793			*empty = true;
794			return 0;
795		}
796
797		ret = strtod(s, &ep);
798		if (ep == s) {
799			*empty = true;
800			return 0;
801		}
802	} else {
803		wchar_t *end, *ep, *s;
804
805		s = s0->data.wstr;
806		end = s + s0->len;
807		ep = NULL;
808
809		while (iswblank(*s) && s < end)
810			++s;
811
812		if (!iswprint(*s)) {
813			*empty = true;
814			return 0;
815		}
816
817		ret = wcstod(s, &ep);
818		if (ep == s) {
819			*empty = true;
820			return 0;
821		}
822	}
823
824	*empty = false;
825	return ret;
826}
827
828/*
829 * A helper function for monthcoll.  If a line matches
830 * a month name, it returns (number of the month - 1),
831 * while if there is no match, it just return -1.
832 */
833int
834bws_month_score(const struct bwstring *s0)
835{
836	if (sort_mb_cur_max == 1) {
837		const char *end, *s;
838		int i;
839
840		s = (char *)s0->data.cstr;
841		end = s + s0->len;
842
843		while (isblank((unsigned char)*s) && s < end)
844			++s;
845
846		for (i = 11; i >= 0; --i) {
847			if (cmonths[i] &&
848			    (s == strstr(s, cmonths[i])))
849				return i;
850		}
851	} else {
852		const wchar_t *end, *s;
853		int i;
854
855		s = s0->data.wstr;
856		end = s + s0->len;
857
858		while (iswblank(*s) && s < end)
859			++s;
860
861		for (i = 11; i >= 0; --i) {
862			if (wmonths[i] && (s == wcsstr(s, wmonths[i])))
863				return i;
864		}
865	}
866
867	return -1;
868}
869
870/*
871 * Rips out leading blanks (-b).
872 */
873struct bwstring *
874ignore_leading_blanks(struct bwstring *str)
875{
876	if (sort_mb_cur_max == 1) {
877		unsigned char *dst, *end, *src;
878
879		src = str->data.cstr;
880		dst = src;
881		end = src + str->len;
882
883		while (src < end && isblank(*src))
884			++src;
885
886		if (src != dst) {
887			size_t newlen;
888
889			newlen = BWSLEN(str) - (src - dst);
890
891			while (src < end) {
892				*dst = *src;
893				++dst;
894				++src;
895			}
896			bws_setlen(str, newlen);
897		}
898	} else {
899		wchar_t *dst, *end, *src;
900
901		src = str->data.wstr;
902		dst = src;
903		end = src + str->len;
904
905		while (src < end && iswblank(*src))
906			++src;
907
908		if (src != dst) {
909
910			size_t newlen = BWSLEN(str) - (src - dst);
911
912			while (src < end) {
913				*dst = *src;
914				++dst;
915				++src;
916			}
917			bws_setlen(str, newlen);
918
919		}
920	}
921	return str;
922}
923
924/*
925 * Rips out nonprinting characters (-i).
926 */
927struct bwstring *
928ignore_nonprinting(struct bwstring *str)
929{
930	size_t newlen = str->len;
931
932	if (sort_mb_cur_max == 1) {
933		unsigned char *dst, *end, *src;
934		unsigned char c;
935
936		src = str->data.cstr;
937		dst = src;
938		end = src + str->len;
939
940		while (src < end) {
941			c = *src;
942			if (isprint(c)) {
943				*dst = c;
944				++dst;
945				++src;
946			} else {
947				++src;
948				--newlen;
949			}
950		}
951	} else {
952		wchar_t *dst, *end, *src;
953		wchar_t c;
954
955		src = str->data.wstr;
956		dst = src;
957		end = src + str->len;
958
959		while (src < end) {
960			c = *src;
961			if (iswprint(c)) {
962				*dst = c;
963				++dst;
964				++src;
965			} else {
966				++src;
967				--newlen;
968			}
969		}
970	}
971	bws_setlen(str, newlen);
972
973	return str;
974}
975
976/*
977 * Rips out any characters that are not alphanumeric characters
978 * nor blanks (-d).
979 */
980struct bwstring *
981dictionary_order(struct bwstring *str)
982{
983	size_t newlen = str->len;
984
985	if (sort_mb_cur_max == 1) {
986		unsigned char *dst, *end, *src;
987		unsigned char c;
988
989		src = str->data.cstr;
990		dst = src;
991		end = src + str->len;
992
993		while (src < end) {
994			c = *src;
995			if (isalnum(c) || isblank(c)) {
996				*dst = c;
997				++dst;
998				++src;
999			} else {
1000				++src;
1001				--newlen;
1002			}
1003		}
1004	} else {
1005		wchar_t *dst, *end, *src;
1006		wchar_t c;
1007
1008		src = str->data.wstr;
1009		dst = src;
1010		end = src + str->len;
1011
1012		while (src < end) {
1013			c = *src;
1014			if (iswalnum(c) || iswblank(c)) {
1015				*dst = c;
1016				++dst;
1017				++src;
1018			} else {
1019				++src;
1020				--newlen;
1021			}
1022		}
1023	}
1024	bws_setlen(str, newlen);
1025
1026	return str;
1027}
1028
1029/*
1030 * Converts string to lower case(-f).
1031 */
1032struct bwstring *
1033ignore_case(struct bwstring *str)
1034{
1035	if (sort_mb_cur_max == 1) {
1036		unsigned char *end, *s;
1037
1038		s = str->data.cstr;
1039		end = s + str->len;
1040
1041		while (s < end) {
1042			*s = toupper(*s);
1043			++s;
1044		}
1045	} else {
1046		wchar_t *end, *s;
1047
1048		s = str->data.wstr;
1049		end = s + str->len;
1050
1051		while (s < end) {
1052			*s = towupper(*s);
1053			++s;
1054		}
1055	}
1056	return str;
1057}
1058
1059void
1060bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
1061{
1062	if (sort_mb_cur_max == 1)
1063		warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr);
1064	else
1065		warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr);
1066}
1067