1/*-
2 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
3 * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD$");
30
31#include <sys/types.h>
32
33#include <ctype.h>
34#include <stdlib.h>
35#include <string.h>
36
37#include "sort.h"
38#include "vsort.h"
39
40static inline bool
41isdigit_clocale(wchar_t c)
42{
43
44	return (c >= L'0' && c <= L'9');
45}
46
47static inline bool
48isalpha_clocale(wchar_t c)
49{
50
51	return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'));
52}
53
54static inline bool
55isalnum_clocale(wchar_t c)
56{
57
58	return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') ||
59	    (c >= L'0' && c <= L'9'));
60}
61
62/*
63 * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$
64 * Set length of string before suffix.
65 */
66static void
67find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len)
68{
69	wchar_t c;
70	size_t clen;
71	bool expect_alpha, sfx;
72
73	sfx = false;
74	expect_alpha = false;
75	*len = 0;
76	clen = 0;
77
78	while ((si < se) && (c = bws_get_iter_value(si))) {
79		if (expect_alpha) {
80			expect_alpha = false;
81			if (!isalpha_clocale(c) && (c != L'~'))
82				sfx = false;
83		} else if (c == L'.') {
84			expect_alpha = true;
85			if (!sfx) {
86				sfx = true;
87				*len = clen;
88			}
89		} else if (!isalnum_clocale(c) && (c != L'~'))
90			sfx = false;
91
92		si = bws_iterator_inc(si, 1);
93		++clen;
94	}
95
96	/* This code must be here to make the implementation compatible
97	 * with WORDING of GNU sort documentation.
98	 * But the GNU sort implementation is not following its own
99	 * documentation.  GNU sort allows empty file extensions
100	 * (just dot with nothing after); but the regular expression in
101	 * their documentation does not allow empty file extensions.
102	 * We chose to make our implementation compatible with GNU sort
103	 * implementation.  If they will ever fix their bug, this code
104	 * must be uncommented. Or they may choose to fix the info page,
105	 * then the code stays commented.
106	 *
107	 if (expect_alpha)
108	 	sfx = false;
109	 */
110
111	if (!sfx)
112		*len = clen;
113}
114
115static inline int
116cmp_chars(wchar_t c1, wchar_t c2)
117{
118
119	if (c1 == c2)
120		return (0);
121
122	if (c1 == L'~')
123		return (-1);
124	if (c2 == L'~')
125		return (+1);
126
127	if (isdigit_clocale(c1) || !c1)
128		return ((isdigit_clocale(c2) || !c2) ? 0 : -1);
129
130	if (isdigit_clocale(c2) || !c2)
131		return (+1);
132
133	if (isalpha_clocale(c1))
134		return ((isalpha_clocale(c2)) ? ((int) c1 - (int) c2) : -1);
135
136	if (isalpha_clocale(c2))
137		return (+1);
138
139	return ((int) c1 - (int) c2);
140}
141
142static int
143cmpversions(bwstring_iterator si1, bwstring_iterator se1,
144    bwstring_iterator si2, bwstring_iterator se2)
145{
146	int cmp, diff;
147
148	while ((si1 < se1) || (si2 < se2)) {
149		diff = 0;
150
151		while (((si1 < se1) &&
152		    !isdigit_clocale(bws_get_iter_value(si1))) ||
153		    ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) {
154			wchar_t c1, c2;
155
156			c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0;
157			c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0;
158
159			cmp = cmp_chars(c1, c2);
160			if (cmp)
161				return (cmp);
162
163			if (si1 < se1)
164				si1 = bws_iterator_inc(si1, 1);
165			if (si2 < se2)
166				si2 = bws_iterator_inc(si2, 1);
167		}
168
169		while (bws_get_iter_value(si1) == L'0')
170			si1 = bws_iterator_inc(si1, 1);
171
172		while (bws_get_iter_value(si2) == L'0')
173			si2 = bws_iterator_inc(si2, 1);
174
175		while (isdigit_clocale(bws_get_iter_value(si1)) &&
176		    isdigit_clocale(bws_get_iter_value(si2))) {
177			if (!diff)
178				diff = ((int)bws_get_iter_value(si1) -
179				    (int)bws_get_iter_value(si2));
180			si1 = bws_iterator_inc(si1, 1);
181			si2 = bws_iterator_inc(si2, 1);
182		}
183
184		if (isdigit_clocale(bws_get_iter_value(si1)))
185			return (1);
186
187		if (isdigit_clocale(bws_get_iter_value(si2)))
188			return (-1);
189
190		if (diff)
191			return (diff);
192	}
193
194	return (0);
195}
196
197/*
198 * Compare two version strings
199 */
200int
201vcmp(struct bwstring *s1, struct bwstring *s2)
202{
203	bwstring_iterator si1, si2;
204	wchar_t c1, c2;
205	size_t len1, len2, slen1, slen2;
206	int cmp_bytes, cmp_res;
207
208	if (s1 == s2)
209		return (0);
210
211	cmp_bytes = bwscmp(s1, s2, 0);
212	if (cmp_bytes == 0)
213		return (0);
214
215	len1 = slen1 = BWSLEN(s1);
216	len2 = slen2 = BWSLEN(s2);
217
218	if (slen1 < 1)
219		return (-1);
220	if (slen2 < 1)
221		return (+1);
222
223	si1 = bws_begin(s1);
224	si2 = bws_begin(s2);
225
226	c1 = bws_get_iter_value(si1);
227	c2 = bws_get_iter_value(si2);
228
229	if (c1 == L'.' && (slen1 == 1))
230		return (-1);
231
232	if (c2 == L'.' && (slen2 == 1))
233		return (+1);
234
235	if (slen1 == 2 && c1 == L'.' &&
236	    bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.')
237		return (-1);
238	if (slen2 == 2 && c2 == L'.' &&
239	    bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.')
240		return (+1);
241
242	if (c1 == L'.' && c2 != L'.')
243		return (-1);
244	if (c1 != L'.' && c2 == L'.')
245		return (+1);
246
247	if (c1 == L'.' && c2 == L'.') {
248		si1 = bws_iterator_inc(si1, 1);
249		si2 = bws_iterator_inc(si2, 1);
250	}
251
252	find_suffix(si1, bws_end(s1), &len1);
253	find_suffix(si2, bws_end(s2), &len2);
254
255	if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0))
256		return (cmp_bytes);
257
258	cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2,
259	    bws_iterator_inc(si2, len2));
260
261	if (cmp_res == 0)
262	  cmp_res = cmp_bytes;
263
264	return (cmp_res);
265}
266