1#
2# Copyright (c) 2023 Klara, Inc.
3#
4# SPDX-License-Identifier: BSD-2-Clause
5#
6
7#
8# These tests need to run in a multibyte locale with non-localized
9# error messages.
10#
11export LC_CTYPE=C.UTF-8
12export LC_MESSAGES=C
13
14#
15# Size of wc's read buffer.
16#
17MAXBSIZE=65536
18
19#
20# Sample text containing multibyte characters
21#
22tv="Der bode en underlig gr��spr��ngt en
23p�� den yderste n��gne ��; ���
24han gjorde visst intet menneske m��n
25hverken p�� land eller sj��;
26dog stundom gnistred hans ��jne stygt, ���
27helst mod uroligt vejr, ���
28og da mente folk, at han var forrykt,
29og da var der f��, som uden frykt
30kom Terje Vigen n��r.
31"
32tvl=10
33tvw=55
34tvc=300
35tvm=283
36tvcL=42
37tvmL=39
38
39#
40# Run a series of tests using the same input file.  The first argument
41# is the name of the file.  The next three are the expected line,
42# word, and byte counts.  The optional fifth is the expected character
43# count; if not provided, it is expected to be identical to the byte
44# count.
45#
46atf_check_wc() {
47	local file="$1"
48	local l="$2"
49	local w="$3"
50	local c="$4"
51	local m="${5-$4}"
52
53	atf_check -o match:"^ +${l} +${w} +${c}\$" wc <"${file}"
54	atf_check -o match:"^ +${l}\$" wc -l <"${file}"
55	atf_check -o match:"^ +${w}\$" wc -w <"${file}"
56	atf_check -o match:"^ +${c}\$" wc -c <"${file}"
57	atf_check -o match:"^ +${m}\$" wc -m <"${file}"
58	atf_check -o match:"^ +${l} +${w} +${c} ${file}\$" wc "$file"
59	atf_check -o match:"^ +${l} ${file}\$" wc -l "$file"
60	atf_check -o match:"^ +${w} ${file}\$" wc -w "$file"
61	atf_check -o match:"^ +${c} ${file}\$" wc -c "$file"
62	atf_check -o match:"^ +${m} ${file}\$" wc -m "$file"
63}
64
65atf_test_case basic
66basic_head()
67{
68	atf_set "descr" "Basic test case"
69}
70basic_body()
71{
72	printf "a b\n" >foo
73	atf_check_wc foo 1 2 4
74}
75
76atf_test_case blank
77blank_head()
78{
79	atf_set "descr" "Input containing only blank lines"
80}
81blank_body()
82{
83	printf "\n\n\n" >foo
84	atf_check_wc foo 3 0 3
85}
86
87atf_test_case empty
88empty_head()
89{
90	atf_set "descr" "Empty input"
91}
92empty_body()
93{
94	printf "" >foo
95	atf_check_wc foo 0 0 0
96}
97
98atf_test_case invalid
99invalid_head()
100{
101	atf_set "descr" "Invalid multibyte input"
102}
103invalid_body()
104{
105	printf "a\377b\n" >foo
106	atf_check \
107	    -e match:"Illegal byte sequence" \
108	    -o match:"^ +4 foo$" \
109	    wc -m foo
110}
111
112atf_test_case multiline
113multiline_head()
114{
115	atf_set "descr" "Multiline, multibyte input"
116}
117multiline_body()
118{
119	printf "%s\n" "$tv" >foo
120	atf_check_wc foo $tvl $tvw $tvc $tvm
121	# longest line in bytes
122	atf_check -o match:"^ +$tvc +$tvcL foo" wc -cL foo
123	atf_check -o match:"^ +$tvc +$tvcL" wc -cL <foo
124	# longest line in characters
125	atf_check -o match:"^ +$tvm +$tvmL foo" wc -mL foo
126	atf_check -o match:"^ +$tvm +$tvmL" wc -mL <foo
127}
128
129atf_test_case multiline_repeated
130multiline_repeated_head()
131{
132	atf_set "descr" "Multiline input exceeding the input buffer size"
133}
134multiline_repeated_body()
135{
136	local c=0
137	while [ $c -lt 1000 ] ; do
138		printf "%1\$s\n%1\$s\n%1\$s\n%1\$s\n%1\$s\n" "$tv"
139		c=$((c+5))
140	done >foo
141	atf_check_wc foo $((tvl*c)) $((tvw*c)) $((tvc*c)) $((tvm*c))
142}
143
144atf_test_case nul
145nul_head()
146{
147	atf_set "descr" "Input containing NUL"
148}
149nul_body()
150{
151	printf "a\0b\n" >foo
152	atf_check_wc foo 1 1 4
153}
154
155atf_test_case poop
156poop_head()
157{
158	atf_set "descr" "Multibyte sequence across buffer boundary"
159}
160poop_body()
161{
162	local l=0 w=0 c=0 m=0
163	# The code below produces a stream of 4-byte UTF-8 sequences
164	# aligned on 5-byte boundaries, ensuring that the first full
165	# read of length MAXBSIZE will end in a partial sequence ���
166	# unless MAXBSIZE is a multiple of 5 (not possible since it's
167	# a power of 2) or one less than a multiple of 5 (e.g. 2^18 =
168	# 262,144 = (52429 * 5) - 1) in which case we prepend a single
169	# newline to push our sequence out of phase.
170	atf_check_not_equal 0 $((MAXBSIZE % 5))
171	:>foo
172	if [ $((MAXBSIZE % 5)) -eq 4 ] ; then
173		printf "\n"
174		l=$((l + 1))
175		c=$((c + 1))
176		m=$((m + 1))
177	fi >>foo
178	while [ $c -le $MAXBSIZE ] ; do
179		printf "����.����.����.����.����.����.����.����.����.����.����.����.����.����.����.����\n"
180		l=$((l + 1))
181		w=$((w + 1))
182		c=$((c + 80)) # 80 bytes
183		m=$((m + 32)) # 32 multibyte characters
184	done >>foo
185	atf_check_wc foo $l $w $c $m
186}
187
188atf_test_case total
189total_head()
190{
191	atf_set "descr" "Multiple inputs"
192}
193total_body()
194{
195	printf "%s\n" "$tv" >foo
196	printf "%s\n" "$tv" >bar
197	atf_check \
198	    -o match:"^ +$((tvl*2)) +$((tvw*2)) +$((tvc*2)) total$" \
199	    wc foo bar
200}
201
202atf_test_case unterminated
203unterminated_head()
204{
205	atf_set "descr" "Input not ending in newline"
206}
207unterminated_body()
208{
209	printf "a b" >foo
210	atf_check_wc foo 0 2 3
211}
212
213atf_test_case usage
214usage_head()
215{
216	atf_set "descr" "Trigger usage message"
217}
218usage_body()
219{
220	atf_check -s exit:1 -e match:"usage: wc" wc -\?
221}
222
223atf_test_case whitespace
224whitespace_head()
225{
226	atf_set "descr" "Input containing only whitespace and newlines"
227}
228whitespace_body()
229{
230	printf "\n \n\t\n" >foo
231	atf_check_wc foo 3 0 5
232}
233
234atf_init_test_cases()
235{
236	atf_add_test_case basic
237	atf_add_test_case blank
238	atf_add_test_case empty
239	atf_add_test_case invalid
240	atf_add_test_case multiline
241	atf_add_test_case multiline_repeated
242	atf_add_test_case nul
243	atf_add_test_case poop
244	atf_add_test_case total
245	atf_add_test_case unterminated
246	atf_add_test_case usage
247	atf_add_test_case whitespace
248}
249