160786Sps#
260786Sps# Copyright (c) 2023 Klara, Inc.
360786Sps#
460786Sps# SPDX-License-Identifier: BSD-2-Clause
560786Sps#
660786Sps
760786Sps#
860786Sps# These tests need to run in a multibyte locale with non-localized
960786Sps# error messages.
1060786Sps#
1160786Spsexport LC_CTYPE=C.UTF-8
1260786Spsexport LC_MESSAGES=C
1360786Sps
1460786Sps#
1560786Sps# Size of wc's read buffer.
1660786Sps#
1760786SpsMAXBSIZE=65536
1860786Sps
1960786Sps#
2060786Sps# Sample text containing multibyte characters
2160786Sps#
2260786Spstv="Der bode en underlig gr��spr��ngt en
2360786Spsp�� den yderste n��gne ��; ���
2460786Spshan gjorde visst intet menneske m��n
2560786Spshverken p�� land eller sj��;
2660786Spsdog stundom gnistred hans ��jne stygt, ���
2760786Spshelst mod uroligt vejr, ���
2860786Spsog da mente folk, at han var forrykt,
2960786Spsog da var der f��, som uden frykt
3060786Spskom Terje Vigen n��r.
3160786Sps"
3260786Spstvl=10
3360786Spstvw=55
3460786Spstvc=300
3560786Spstvm=283
3660786SpstvcL=42
3760786SpstvmL=39
3860786Sps
3960786Sps#
4060786Sps# Run a series of tests using the same input file.  The first argument
4160786Sps# is the name of the file.  The next three are the expected line,
4260786Sps# word, and byte counts.  The optional fifth is the expected character
4360786Sps# count; if not provided, it is expected to be identical to the byte
4460786Sps# count.
4560786Sps#
4660786Spsatf_check_wc() {
4760786Sps	local file="$1"
4860786Sps	local l="$2"
4960786Sps	local w="$3"
5060786Sps	local c="$4"
5160786Sps	local m="${5-$4}"
5260786Sps
5360786Sps	atf_check -o match:"^ +${l} +${w} +${c}\$" wc <"${file}"
5460786Sps	atf_check -o match:"^ +${l}\$" wc -l <"${file}"
5560786Sps	atf_check -o match:"^ +${w}\$" wc -w <"${file}"
5660786Sps	atf_check -o match:"^ +${c}\$" wc -c <"${file}"
5760786Sps	atf_check -o match:"^ +${m}\$" wc -m <"${file}"
5860786Sps	atf_check -o match:"^ +${l} +${w} +${c} ${file}\$" wc "$file"
5960786Sps	atf_check -o match:"^ +${l} ${file}\$" wc -l "$file"
6060786Sps	atf_check -o match:"^ +${w} ${file}\$" wc -w "$file"
6160786Sps	atf_check -o match:"^ +${c} ${file}\$" wc -c "$file"
6260786Sps	atf_check -o match:"^ +${m} ${file}\$" wc -m "$file"
6360786Sps}
6460786Sps
6560786Spsatf_test_case basic
6660786Spsbasic_head()
6760786Sps{
6860786Sps	atf_set "descr" "Basic test case"
6960786Sps}
7060786Spsbasic_body()
7160786Sps{
7260786Sps	printf "a b\n" >foo
7360786Sps	atf_check_wc foo 1 2 4
7460786Sps}
7560786Sps
7660786Spsatf_test_case blank
7760786Spsblank_head()
7860786Sps{
7960786Sps	atf_set "descr" "Input containing only blank lines"
8060786Sps}
8160786Spsblank_body()
8260786Sps{
8360786Sps	printf "\n\n\n" >foo
8460786Sps	atf_check_wc foo 3 0 3
8560786Sps}
8660786Sps
8760786Spsatf_test_case empty
8860786Spsempty_head()
8960786Sps{
9060786Sps	atf_set "descr" "Empty input"
9160786Sps}
9260786Spsempty_body()
9360786Sps{
9460786Sps	printf "" >foo
9560786Sps	atf_check_wc foo 0 0 0
9660786Sps}
9760786Sps
9860786Spsatf_test_case invalid
9960786Spsinvalid_head()
10060786Sps{
10160786Sps	atf_set "descr" "Invalid multibyte input"
10260786Sps}
10360786Spsinvalid_body()
10460786Sps{
10560786Sps	printf "a\377b\n" >foo
10660786Sps	atf_check \
10760786Sps	    -e match:"Illegal byte sequence" \
10860786Sps	    -o match:"^ +4 foo$" \
10960786Sps	    wc -m foo
11060786Sps}
11160786Sps
11260786Spsatf_test_case multiline
11360786Spsmultiline_head()
11460786Sps{
11560786Sps	atf_set "descr" "Multiline, multibyte input"
11660786Sps}
11760786Spsmultiline_body()
11860786Sps{
11960786Sps	printf "%s\n" "$tv" >foo
12060786Sps	atf_check_wc foo $tvl $tvw $tvc $tvm
12160786Sps	# longest line in bytes
12260786Sps	atf_check -o match:"^ +$tvc +$tvcL foo" wc -cL foo
12360786Sps	atf_check -o match:"^ +$tvc +$tvcL" wc -cL <foo
12460786Sps	# longest line in characters
12560786Sps	atf_check -o match:"^ +$tvm +$tvmL foo" wc -mL foo
12660786Sps	atf_check -o match:"^ +$tvm +$tvmL" wc -mL <foo
12760786Sps}
12860786Sps
12960786Spsatf_test_case multiline_repeated
13060786Spsmultiline_repeated_head()
13160786Sps{
13260786Sps	atf_set "descr" "Multiline input exceeding the input buffer size"
13360786Sps}
13460786Spsmultiline_repeated_body()
13560786Sps{
13660786Sps	local c=0
13760786Sps	while [ $c -lt 1000 ] ; do
13860786Sps		printf "%1\$s\n%1\$s\n%1\$s\n%1\$s\n%1\$s\n" "$tv"
13960786Sps		c=$((c+5))
14060786Sps	done >foo
14160786Sps	atf_check_wc foo $((tvl*c)) $((tvw*c)) $((tvc*c)) $((tvm*c))
14260786Sps}
14360786Sps
14460786Spsatf_test_case nul
14560786Spsnul_head()
14660786Sps{
14760786Sps	atf_set "descr" "Input containing NUL"
14860786Sps}
14960786Spsnul_body()
15060786Sps{
15160786Sps	printf "a\0b\n" >foo
15260786Sps	atf_check_wc foo 1 1 4
15360786Sps}
15460786Sps
15560786Spsatf_test_case poop
15660786Spspoop_head()
15760786Sps{
15860786Sps	atf_set "descr" "Multibyte sequence across buffer boundary"
15960786Sps}
16060786Spspoop_body()
16160786Sps{
16260786Sps	local l=0 w=0 c=0 m=0
16360786Sps	# The code below produces a stream of 4-byte UTF-8 sequences
16460786Sps	# aligned on 5-byte boundaries, ensuring that the first full
16560786Sps	# read of length MAXBSIZE will end in a partial sequence ���
16660786Sps	# unless MAXBSIZE is a multiple of 5 (not possible since it's
16760786Sps	# a power of 2) or one less than a multiple of 5 (e.g. 2^18 =
16860786Sps	# 262,144 = (52429 * 5) - 1) in which case we prepend a single
16960786Sps	# newline to push our sequence out of phase.
17060786Sps	atf_check_not_equal 0 $((MAXBSIZE % 5))
17160786Sps	:>foo
17260786Sps	if [ $((MAXBSIZE % 5)) -eq 4 ] ; then
17360786Sps		printf "\n"
17460786Sps		l=$((l + 1))
17560786Sps		c=$((c + 1))
17660786Sps		m=$((m + 1))
17760786Sps	fi >>foo
17860786Sps	while [ $c -le $MAXBSIZE ] ; do
17960786Sps		printf "����.����.����.����.����.����.����.����.����.����.����.����.����.����.����.����\n"
18060786Sps		l=$((l + 1))
18160786Sps		w=$((w + 1))
18260786Sps		c=$((c + 80)) # 80 bytes
18360786Sps		m=$((m + 32)) # 32 multibyte characters
18460786Sps	done >>foo
18560786Sps	atf_check_wc foo $l $w $c $m
18660786Sps}
18760786Sps
18860786Spsatf_test_case total
18960786Spstotal_head()
19060786Sps{
19160786Sps	atf_set "descr" "Multiple inputs"
19260786Sps}
19360786Spstotal_body()
19460786Sps{
19560786Sps	printf "%s\n" "$tv" >foo
19660786Sps	printf "%s\n" "$tv" >bar
19760786Sps	atf_check \
19860786Sps	    -o match:"^ +$((tvl*2)) +$((tvw*2)) +$((tvc*2)) total$" \
19960786Sps	    wc foo bar
20060786Sps}
20160786Sps
20260786Spsatf_test_case unterminated
20360786Spsunterminated_head()
20460786Sps{
20560786Sps	atf_set "descr" "Input not ending in newline"
20660786Sps}
20760786Spsunterminated_body()
20860786Sps{
20960786Sps	printf "a b" >foo
21060786Sps	atf_check_wc foo 0 2 3
21160786Sps}
21260786Sps
21360786Spsatf_test_case usage
21460786Spsusage_head()
21560786Sps{
21660786Sps	atf_set "descr" "Trigger usage message"
21760786Sps}
21860786Spsusage_body()
21960786Sps{
22060786Sps	atf_check -s exit:1 -e match:"usage: wc" wc -\?
22160786Sps}
22260786Sps
22360786Spsatf_test_case whitespace
22460786Spswhitespace_head()
22560786Sps{
22660786Sps	atf_set "descr" "Input containing only whitespace and newlines"
22760786Sps}
22860786Spswhitespace_body()
22960786Sps{
23060786Sps	printf "\n \n\t\n" >foo
23160786Sps	atf_check_wc foo 3 0 5
23260786Sps}
23360786Sps
23460786Spsatf_init_test_cases()
23560786Sps{
23660786Sps	atf_add_test_case basic
23760786Sps	atf_add_test_case blank
23860786Sps	atf_add_test_case empty
23960786Sps	atf_add_test_case invalid
24060786Sps	atf_add_test_case multiline
24160786Sps	atf_add_test_case multiline_repeated
24260786Sps	atf_add_test_case nul
24360786Sps	atf_add_test_case poop
24460786Sps	atf_add_test_case total
24560786Sps	atf_add_test_case unterminated
24660786Sps	atf_add_test_case usage
24760786Sps	atf_add_test_case whitespace
24860786Sps}
24960786Sps