1/*	$OpenBSD: vsort.c,v 1.2 2015/04/01 21:46:38 millert Exp $	*/
2
3/*-
4 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
5 * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/types.h>
31
32#include <ctype.h>
33#include <stdlib.h>
34#include <string.h>
35
36#include "sort.h"
37#include "vsort.h"
38
39static inline bool
40isdigit_clocale(wchar_t c)
41{
42	return (c >= L'0' && c <= L'9');
43}
44
45static inline bool
46isalpha_clocale(wchar_t c)
47{
48	return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'));
49}
50
51static inline bool
52isalnum_clocale(wchar_t c)
53{
54	return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') ||
55	    (c >= L'0' && c <= L'9'));
56}
57
58/*
59 * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$
60 * Set length of string before suffix.
61 */
62static void
63find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len)
64{
65	wchar_t c;
66	size_t clen;
67	bool expect_alpha, sfx;
68
69	sfx = false;
70	expect_alpha = false;
71	*len = 0;
72	clen = 0;
73
74	while ((si < se) && (c = bws_get_iter_value(si))) {
75		if (expect_alpha) {
76			expect_alpha = false;
77			if (!isalpha_clocale(c) && (c != L'~'))
78				sfx = false;
79		} else if (c == L'.') {
80			expect_alpha = true;
81			if (!sfx) {
82				sfx = true;
83				*len = clen;
84			}
85		} else if (!isalnum_clocale(c) && (c != L'~'))
86			sfx = false;
87
88		si = bws_iterator_inc(si, 1);
89		++clen;
90	}
91
92	/* This code must be here to make the implementation compatible
93	 * with WORDING of GNU sort documentation.
94	 * But the GNU sort implementation is not following its own
95	 * documentation.  GNU sort allows empty file extensions
96	 * (just dot with nothing after); but the regular expression in
97	 * their documentation does not allow empty file extensions.
98	 * We chose to make our implementation compatible with GNU sort
99	 * implementation.  If they will ever fix their bug, this code
100	 * must be uncommented. Or they may choose to fix the info page,
101	 * then the code stays commented.
102	 *
103	 if (expect_alpha)
104	 	sfx = false;
105	 */
106
107	if (!sfx)
108		*len = clen;
109}
110
111static inline int
112cmp_chars(wchar_t c1, wchar_t c2)
113{
114	if (c1 == c2)
115		return 0;
116
117	if (c1 == L'~')
118		return -1;
119	if (c2 == L'~')
120		return 1;
121
122	if (isdigit_clocale(c1) || !c1)
123		return (isdigit_clocale(c2) || !c2) ? 0 : -1;
124
125	if (isdigit_clocale(c2) || !c2)
126		return 1;
127
128	if (isalpha_clocale(c1))
129		return isalpha_clocale(c2) ? (int)c1 - (int)c2 : -1;
130
131	if (isalpha_clocale(c2))
132		return 1;
133
134	return (int)c1 - (int)c2;
135}
136
137static int
138cmpversions(bwstring_iterator si1, bwstring_iterator se1,
139    bwstring_iterator si2, bwstring_iterator se2)
140{
141	int cmp, diff;
142
143	while ((si1 < se1) || (si2 < se2)) {
144		diff = 0;
145
146		while (((si1 < se1) &&
147		    !isdigit_clocale(bws_get_iter_value(si1))) ||
148		    ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) {
149			wchar_t c1, c2;
150
151			c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0;
152			c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0;
153
154			cmp = cmp_chars(c1, c2);
155			if (cmp)
156				return cmp;
157
158			if (si1 < se1)
159				si1 = bws_iterator_inc(si1, 1);
160			if (si2 < se2)
161				si2 = bws_iterator_inc(si2, 1);
162		}
163
164		while (bws_get_iter_value(si1) == L'0')
165			si1 = bws_iterator_inc(si1, 1);
166
167		while (bws_get_iter_value(si2) == L'0')
168			si2 = bws_iterator_inc(si2, 1);
169
170		while (isdigit_clocale(bws_get_iter_value(si1)) &&
171		    isdigit_clocale(bws_get_iter_value(si2))) {
172			if (!diff)
173				diff = ((int)bws_get_iter_value(si1) -
174				    (int)bws_get_iter_value(si2));
175			si1 = bws_iterator_inc(si1, 1);
176			si2 = bws_iterator_inc(si2, 1);
177		}
178
179		if (isdigit_clocale(bws_get_iter_value(si1)))
180			return 1;
181
182		if (isdigit_clocale(bws_get_iter_value(si2)))
183			return -1;
184
185		if (diff)
186			return diff;
187	}
188
189	return 0;
190}
191
192/*
193 * Compare two version strings
194 */
195int
196vcmp(struct bwstring *s1, struct bwstring *s2)
197{
198	bwstring_iterator si1, si2;
199	wchar_t c1, c2;
200	size_t len1, len2, slen1, slen2;
201	int cmp_bytes, cmp_res;
202
203	if (s1 == s2)
204		return 0;
205
206	cmp_bytes = bwscmp(s1, s2, 0);
207	if (cmp_bytes == 0)
208		return 0;
209
210	len1 = slen1 = BWSLEN(s1);
211	len2 = slen2 = BWSLEN(s2);
212
213	if (slen1 < 1)
214		return -1;
215	if (slen2 < 1)
216		return 1;
217
218	si1 = bws_begin(s1);
219	si2 = bws_begin(s2);
220
221	c1 = bws_get_iter_value(si1);
222	c2 = bws_get_iter_value(si2);
223
224	if (c1 == L'.' && (slen1 == 1))
225		return -1;
226
227	if (c2 == L'.' && (slen2 == 1))
228		return 1;
229
230	if (slen1 == 2 && c1 == L'.' &&
231	    bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.')
232		return -1;
233	if (slen2 == 2 && c2 == L'.' &&
234	    bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.')
235		return 1;
236
237	if (c1 == L'.' && c2 != L'.')
238		return -1;
239	if (c1 != L'.' && c2 == L'.')
240		return 1;
241
242	if (c1 == L'.' && c2 == L'.') {
243		si1 = bws_iterator_inc(si1, 1);
244		si2 = bws_iterator_inc(si2, 1);
245	}
246
247	find_suffix(si1, bws_end(s1), &len1);
248	find_suffix(si2, bws_end(s2), &len2);
249
250	if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0))
251		return cmp_bytes;
252
253	cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2,
254	    bws_iterator_inc(si2, len2));
255
256	if (cmp_res == 0)
257	  cmp_res = cmp_bytes;
258
259	return cmp_res;
260}
261