160786Sps# 260786Sps# Copyright (c) 2023 Klara, Inc. 360786Sps# 460786Sps# SPDX-License-Identifier: BSD-2-Clause 560786Sps# 660786Sps 760786Sps# 860786Sps# These tests need to run in a multibyte locale with non-localized 960786Sps# error messages. 1060786Sps# 1160786Spsexport LC_CTYPE=C.UTF-8 1260786Spsexport LC_MESSAGES=C 1360786Sps 1460786Sps# 1560786Sps# Size of wc's read buffer. 1660786Sps# 1760786SpsMAXBSIZE=65536 1860786Sps 1960786Sps# 2060786Sps# Sample text containing multibyte characters 2160786Sps# 2260786Spstv="Der bode en underlig gr��spr��ngt en 2360786Spsp�� den yderste n��gne ��; ��� 2460786Spshan gjorde visst intet menneske m��n 2560786Spshverken p�� land eller sj��; 2660786Spsdog stundom gnistred hans ��jne stygt, ��� 2760786Spshelst mod uroligt vejr, ��� 2860786Spsog da mente folk, at han var forrykt, 2960786Spsog da var der f��, som uden frykt 3060786Spskom Terje Vigen n��r. 3160786Sps" 3260786Spstvl=10 3360786Spstvw=55 3460786Spstvc=300 3560786Spstvm=283 3660786SpstvcL=42 3760786SpstvmL=39 3860786Sps 3960786Sps# 4060786Sps# Run a series of tests using the same input file. The first argument 4160786Sps# is the name of the file. The next three are the expected line, 4260786Sps# word, and byte counts. The optional fifth is the expected character 4360786Sps# count; if not provided, it is expected to be identical to the byte 4460786Sps# count. 4560786Sps# 4660786Spsatf_check_wc() { 4760786Sps local file="$1" 4860786Sps local l="$2" 4960786Sps local w="$3" 5060786Sps local c="$4" 5160786Sps local m="${5-$4}" 5260786Sps 5360786Sps atf_check -o match:"^ +${l} +${w} +${c}\$" wc <"${file}" 5460786Sps atf_check -o match:"^ +${l}\$" wc -l <"${file}" 5560786Sps atf_check -o match:"^ +${w}\$" wc -w <"${file}" 5660786Sps atf_check -o match:"^ +${c}\$" wc -c <"${file}" 5760786Sps atf_check -o match:"^ +${m}\$" wc -m <"${file}" 5860786Sps atf_check -o match:"^ +${l} +${w} +${c} ${file}\$" wc "$file" 5960786Sps atf_check -o match:"^ +${l} ${file}\$" wc -l "$file" 6060786Sps atf_check -o match:"^ +${w} ${file}\$" wc -w "$file" 6160786Sps atf_check -o match:"^ +${c} ${file}\$" wc -c "$file" 6260786Sps atf_check -o match:"^ +${m} ${file}\$" wc -m "$file" 6360786Sps} 6460786Sps 6560786Spsatf_test_case basic 6660786Spsbasic_head() 6760786Sps{ 6860786Sps atf_set "descr" "Basic test case" 6960786Sps} 7060786Spsbasic_body() 7160786Sps{ 7260786Sps printf "a b\n" >foo 7360786Sps atf_check_wc foo 1 2 4 7460786Sps} 7560786Sps 7660786Spsatf_test_case blank 7760786Spsblank_head() 7860786Sps{ 7960786Sps atf_set "descr" "Input containing only blank lines" 8060786Sps} 8160786Spsblank_body() 8260786Sps{ 8360786Sps printf "\n\n\n" >foo 8460786Sps atf_check_wc foo 3 0 3 8560786Sps} 8660786Sps 8760786Spsatf_test_case empty 8860786Spsempty_head() 8960786Sps{ 9060786Sps atf_set "descr" "Empty input" 9160786Sps} 9260786Spsempty_body() 9360786Sps{ 9460786Sps printf "" >foo 9560786Sps atf_check_wc foo 0 0 0 9660786Sps} 9760786Sps 9860786Spsatf_test_case invalid 9960786Spsinvalid_head() 10060786Sps{ 10160786Sps atf_set "descr" "Invalid multibyte input" 10260786Sps} 10360786Spsinvalid_body() 10460786Sps{ 10560786Sps printf "a\377b\n" >foo 10660786Sps atf_check \ 10760786Sps -e match:"Illegal byte sequence" \ 10860786Sps -o match:"^ +4 foo$" \ 10960786Sps wc -m foo 11060786Sps} 11160786Sps 11260786Spsatf_test_case multiline 11360786Spsmultiline_head() 11460786Sps{ 11560786Sps atf_set "descr" "Multiline, multibyte input" 11660786Sps} 11760786Spsmultiline_body() 11860786Sps{ 11960786Sps printf "%s\n" "$tv" >foo 12060786Sps atf_check_wc foo $tvl $tvw $tvc $tvm 12160786Sps # longest line in bytes 12260786Sps atf_check -o match:"^ +$tvc +$tvcL foo" wc -cL foo 12360786Sps atf_check -o match:"^ +$tvc +$tvcL" wc -cL <foo 12460786Sps # longest line in characters 12560786Sps atf_check -o match:"^ +$tvm +$tvmL foo" wc -mL foo 12660786Sps atf_check -o match:"^ +$tvm +$tvmL" wc -mL <foo 12760786Sps} 12860786Sps 12960786Spsatf_test_case multiline_repeated 13060786Spsmultiline_repeated_head() 13160786Sps{ 13260786Sps atf_set "descr" "Multiline input exceeding the input buffer size" 13360786Sps} 13460786Spsmultiline_repeated_body() 13560786Sps{ 13660786Sps local c=0 13760786Sps while [ $c -lt 1000 ] ; do 13860786Sps printf "%1\$s\n%1\$s\n%1\$s\n%1\$s\n%1\$s\n" "$tv" 13960786Sps c=$((c+5)) 14060786Sps done >foo 14160786Sps atf_check_wc foo $((tvl*c)) $((tvw*c)) $((tvc*c)) $((tvm*c)) 14260786Sps} 14360786Sps 14460786Spsatf_test_case nul 14560786Spsnul_head() 14660786Sps{ 14760786Sps atf_set "descr" "Input containing NUL" 14860786Sps} 14960786Spsnul_body() 15060786Sps{ 15160786Sps printf "a\0b\n" >foo 15260786Sps atf_check_wc foo 1 1 4 15360786Sps} 15460786Sps 15560786Spsatf_test_case poop 15660786Spspoop_head() 15760786Sps{ 15860786Sps atf_set "descr" "Multibyte sequence across buffer boundary" 15960786Sps} 16060786Spspoop_body() 16160786Sps{ 16260786Sps local l=0 w=0 c=0 m=0 16360786Sps # The code below produces a stream of 4-byte UTF-8 sequences 16460786Sps # aligned on 5-byte boundaries, ensuring that the first full 16560786Sps # read of length MAXBSIZE will end in a partial sequence ��� 16660786Sps # unless MAXBSIZE is a multiple of 5 (not possible since it's 16760786Sps # a power of 2) or one less than a multiple of 5 (e.g. 2^18 = 16860786Sps # 262,144 = (52429 * 5) - 1) in which case we prepend a single 16960786Sps # newline to push our sequence out of phase. 17060786Sps atf_check_not_equal 0 $((MAXBSIZE % 5)) 17160786Sps :>foo 17260786Sps if [ $((MAXBSIZE % 5)) -eq 4 ] ; then 17360786Sps printf "\n" 17460786Sps l=$((l + 1)) 17560786Sps c=$((c + 1)) 17660786Sps m=$((m + 1)) 17760786Sps fi >>foo 17860786Sps while [ $c -le $MAXBSIZE ] ; do 17960786Sps printf "����.����.����.����.����.����.����.����.����.����.����.����.����.����.����.����\n" 18060786Sps l=$((l + 1)) 18160786Sps w=$((w + 1)) 18260786Sps c=$((c + 80)) # 80 bytes 18360786Sps m=$((m + 32)) # 32 multibyte characters 18460786Sps done >>foo 18560786Sps atf_check_wc foo $l $w $c $m 18660786Sps} 18760786Sps 18860786Spsatf_test_case total 18960786Spstotal_head() 19060786Sps{ 19160786Sps atf_set "descr" "Multiple inputs" 19260786Sps} 19360786Spstotal_body() 19460786Sps{ 19560786Sps printf "%s\n" "$tv" >foo 19660786Sps printf "%s\n" "$tv" >bar 19760786Sps atf_check \ 19860786Sps -o match:"^ +$((tvl*2)) +$((tvw*2)) +$((tvc*2)) total$" \ 19960786Sps wc foo bar 20060786Sps} 20160786Sps 20260786Spsatf_test_case unterminated 20360786Spsunterminated_head() 20460786Sps{ 20560786Sps atf_set "descr" "Input not ending in newline" 20660786Sps} 20760786Spsunterminated_body() 20860786Sps{ 20960786Sps printf "a b" >foo 21060786Sps atf_check_wc foo 0 2 3 21160786Sps} 21260786Sps 21360786Spsatf_test_case usage 21460786Spsusage_head() 21560786Sps{ 21660786Sps atf_set "descr" "Trigger usage message" 21760786Sps} 21860786Spsusage_body() 21960786Sps{ 22060786Sps atf_check -s exit:1 -e match:"usage: wc" wc -\? 22160786Sps} 22260786Sps 22360786Spsatf_test_case whitespace 22460786Spswhitespace_head() 22560786Sps{ 22660786Sps atf_set "descr" "Input containing only whitespace and newlines" 22760786Sps} 22860786Spswhitespace_body() 22960786Sps{ 23060786Sps printf "\n \n\t\n" >foo 23160786Sps atf_check_wc foo 3 0 5 23260786Sps} 23360786Sps 23460786Spsatf_init_test_cases() 23560786Sps{ 23660786Sps atf_add_test_case basic 23760786Sps atf_add_test_case blank 23860786Sps atf_add_test_case empty 23960786Sps atf_add_test_case invalid 24060786Sps atf_add_test_case multiline 24160786Sps atf_add_test_case multiline_repeated 24260786Sps atf_add_test_case nul 24360786Sps atf_add_test_case poop 24460786Sps atf_add_test_case total 24560786Sps atf_add_test_case unterminated 24660786Sps atf_add_test_case usage 24760786Sps atf_add_test_case whitespace 24860786Sps} 24960786Sps