1# This testcase is part of GDB, the GNU debugger. 2 3# Copyright 2001-2020 Free Software Foundation, Inc. 4 5# This program is free software; you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by 7# the Free Software Foundation; either version 3 of the License, or 8# (at your option) any later version. 9# 10# This program is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13# GNU General Public License for more details. 14# 15# You should have received a copy of the GNU General Public License 16# along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18# Please email any bugs, comments, and/or additions to this file to: 19# bug-gdb@gnu.org 20 21# Test GDB's character set support. 22 23 24standard_testfile .c charset-malloc.c 25 26if { [prepare_for_testing "failed to prepare" ${testfile} [list $srcfile $srcfile2]] } { 27 return -1 28} 29 30# Parse the output from a `show charset' command. Return the host 31# and target charset as a two-element list. 32proc parse_show_charset_output {testname} { 33 global gdb_prompt 34 35 gdb_expect { 36 -re "The host character set is \"(.*)\"\\.\[\r\n\]+The target character set is \"(.*)\"\\.\[\r\n\]+The target wide character set is \"(.*)\"\\.\[\r\n\]+$gdb_prompt $" { 37 set host_charset $expect_out(1,string) 38 set target_charset $expect_out(2,string) 39 set retlist [list $host_charset $target_charset] 40 pass $testname 41 } 42 -re "The host character set is \"(.*)\"\\.\[\r\n\]+$gdb_prompt $" { 43 set host_charset $expect_out(1,string) 44 set retlist [list $host_charset] 45 pass $testname 46 } 47 -re "The target character set is \"(.*)\"\\.\[\r\n\]+$gdb_prompt $" { 48 set target_charset $expect_out(1,string) 49 set retlist [list $target_charset] 50 pass $testname 51 } 52 -re ".*$gdb_prompt $" { 53 fail $testname 54 } 55 timeout { 56 fail "$testname (timeout)" 57 } 58 } 59 60 return $retlist 61} 62 63 64# Try the various `show charset' commands. 65 66send_gdb "show charset\n" 67set show_charset [parse_show_charset_output "show charset"] 68 69send_gdb "show target-charset\n" 70set show_target_charset \ 71 [lindex [parse_show_charset_output "show target-charset"] 0] 72 73if {[lsearch -exact $show_charset $show_target_charset] >= 0} { 74 pass "check `show target-charset' against `show charset'" 75} else { 76 fail "check `show target-charset' against `show charset'" 77} 78 79send_gdb "show host-charset\n" 80set show_host_charset \ 81 [lindex [parse_show_charset_output "show host-charset"] 0] 82 83if {[lsearch -exact $show_charset $show_host_charset] >= 0} { 84 pass "check `show host-charset' against `show charset'" 85} else { 86 fail "check `show host-charset' against `show charset'" 87} 88 89# Try a malformed `set charset'. 90gdb_test "set charset" \ 91 "Requires an argument. Valid arguments are.*" \ 92 "try malformed `set charset'" 93 94# Try using `set host-charset' on an invalid character set. 95gdb_test "set host-charset my_grandma_bonnie" \ 96 "Undefined item: \"my_grandma_bonnie\"." \ 97 "try `set host-charset' with invalid charset" 98 99# Try using `set target-charset' on an invalid character set. 100gdb_test "set target-charset my_grandma_bonnie" \ 101 "Undefined item: \"my_grandma_bonnie\"." \ 102 "try `set target-charset' with invalid charset" 103 104# A Tcl array mapping the names of all the character sets we've seen 105# to "1" if the character set can be used as a host character set, or 106# "0" otherwise. We can use `array names charsets' just to get a list 107# of all character sets. 108array set charsets {} 109 110proc all_charset_names {} { 111 global charsets 112 return [array names charsets] 113} 114 115proc valid_host_charset {charset} { 116 global charsets 117 return [expr {[info exists charsets($charset)] && $charsets($charset)}] 118} 119 120proc valid_target_charset {charset} { 121 global charsets 122 return [info exists charsets($charset)] 123} 124 125send_gdb "set host-charset\n" 126gdb_expect { 127 -re "Requires an argument. Valid arguments are (.*)\\.\r\n$gdb_prompt $" { 128 set host_charset_list $expect_out(1,string) 129 regsub -all {, } $host_charset_list {,} host_charset_list 130 foreach host_charset [split $host_charset_list ","] { 131 set charsets($host_charset) 1 132 } 133 pass "capture valid host charsets" 134 } 135 136 -re ".*$gdb_prompt $" { 137 fail "capture valid host charsets" 138 } 139 140 timeout { 141 fail "(timeout) capture valid host charsets" 142 } 143} 144 145# If gdb was built with a phony iconv, it will only have two character 146# sets: "auto" and the default. In this situation, this set of tests 147# is pointless. 148if {[llength [array names charsets]] < 3} { 149 untested "fewer than 3 charsets" 150 return -1 151} 152 153send_gdb "set target-charset\n" 154gdb_expect { 155 -re "Requires an argument. Valid arguments are (.*)\\.\r\n$gdb_prompt $" { 156 set target_charset_list $expect_out(1,string) 157 regsub -all {, } $target_charset_list {,} target_charset_list 158 foreach target_charset [split $target_charset_list ","] { 159 if {! [info exists charsets($target_charset)]} { 160 set charsets($target_charset) 0 161 } 162 } 163 pass "capture valid target charsets" 164 } 165 166 -re ".*$gdb_prompt $" { 167 fail "capture valid target charsets" 168 } 169 170 timeout { 171 fail "(timeout) capture valid target charsets" 172 } 173} 174 175# We don't want to test all the charset names here, since that would 176# be too many combinations. We we pick a subset. 177set charset_subset {ASCII ISO-8859-1 EBCDIC-US IBM1047} 178foreach_with_prefix host_charset $charset_subset { 179 if {[valid_host_charset $host_charset]} { 180 181 set testname "try `set host-charset $host_charset'" 182 send_gdb "set host-charset $host_charset\n" 183 gdb_expect { 184 -re "GDB doesn't know of any character set named.*\[\r\n]+${gdb_prompt} $" { 185 # How did it get into `charsets' then? 186 fail "$testname (didn't recognize name)" 187 } 188 -re "GDB can't use `.*' as its host character set\\.\[\r\n]+${gdb_prompt} $" { 189 # Well, then why does its `charsets' entry say it can? 190 fail $testname 191 } 192 -re "${gdb_prompt} $" { 193 pass $testname 194 } 195 timeout { 196 fail "$testname (timeout)" 197 } 198 } 199 200 # Check that the command actually had its intended effect: 201 # $host_charset should now be the host character set. 202 send_gdb "show charset\n" 203 set result [parse_show_charset_output "parse `show charset' after `set host-charset $host_charset'"] 204 if {! [string compare [lindex $result 0] $host_charset]} { 205 pass "check effect of `set host-charset $host_charset'" 206 } else { 207 fail "check effect of `set host-charset $host_charset'" 208 } 209 210 # Now try setting every possible target character set, 211 # given that host charset. 212 foreach target_charset $charset_subset { 213 if {![valid_target_charset $target_charset]} { 214 continue 215 } 216 set testname "try `set target-charset $target_charset'" 217 send_gdb "set target-charset $target_charset\n" 218 gdb_expect { 219 -re "GDB doesn't know of any character set named.*\[\r\n]+${gdb_prompt} $" { 220 fail "$testname (didn't recognize name)" 221 } 222 -re "GDB can't convert from the .* character set to .*\\.\[\r\n\]+${gdb_prompt} $" { 223 # This is a serious problem. GDB should be able to convert 224 # between any arbitrary pair of character sets. 225 fail "$testname (can't convert)" 226 } 227 -re "${gdb_prompt} $" { 228 pass $testname 229 } 230 timeout { 231 fail "$testname (timeout)" 232 } 233 } 234 235 # Check that the command actually had its intended effect: 236 # $target_charset should now be the target charset. 237 send_gdb "show charset\n" 238 set result [parse_show_charset_output "parse `show charset' after `set target-charset $target_charset'"] 239 if {! [string compare $result [list $host_charset $target_charset]]} { 240 pass "check effect of `set target-charset $target_charset'" 241 } else { 242 fail "check effect of `set target-charset $target_charset'" 243 } 244 245 # Test handling of characters in the host charset which 246 # can't be translated into the target charset. \xA2 is 247 # `cent' in ISO-8859-1, which has no equivalent in ASCII. 248 # 249 # On some systems, the pseudo-tty through which we 250 # communicate with GDB insists on stripping the high bit 251 # from input characters, meaning that `cent' turns into 252 # `"'. Since ISO-8859-1 and ASCII are identical in the 253 # lower 128 characters, it's tough to see how we can test 254 # this behavior on such systems, so we just xfail it. 255 # 256 # Note: the \x16 (Control-V) is an escape to allow \xA2 to 257 # get past readline. 258 if {! [string compare $host_charset iso-8859-1] && ! [string compare $target_charset ascii]} { 259 260 set testname "untranslatable character in character literal" 261 send_gdb "print '\x16\xA2'\n" 262 gdb_expect { 263 -re "There is no character corresponding to .* in the target character set .*\\.\[\r\n\]+$gdb_prompt $" { 264 pass $testname 265 } 266 -re " = 34 '\"'\[\r\n\]+$gdb_prompt $" { 267 xfail "$testname (DejaGNU's pseudo-tty strips eighth bit)" 268 } 269 -re "$gdb_prompt $" { 270 fail $testname 271 } 272 timeout { 273 fail "$testname (timeout)" 274 } 275 } 276 277 set testname "untranslatable character in string literal" 278 # If the PTTY zeros bit seven, then this turns into 279 # print """ 280 # which gets us a syntax error. We don't care. 281 send_gdb "print \"\x16\xA2\"\n" 282 gdb_expect { 283 -re "There is no character corresponding to .* in the target character set .*\\.\[\r\n\]+$gdb_prompt $" { 284 pass $testname 285 } 286 -re "Unterminated string in expression.\[\r\n\]+$gdb_prompt $" { 287 xfail "$testname (DejaGNU's pseudo-tty strips eighth bit)" 288 } 289 -re "$gdb_prompt $" { 290 fail $testname 291 } 292 timeout { 293 fail "$testname (timeout)" 294 } 295 } 296 297 set testname "untranslatable characters in backslash escape" 298 send_gdb "print '\\\x16\xA2'\n" 299 gdb_expect { 300 -re "The escape sequence .* is equivalent to plain .*, which has no equivalent\[\r\n\]+in the .* character set\\.\[\r\n\]+$gdb_prompt $" { 301 pass $testname 302 } 303 -re " = 34 '\"'\[\r\n\]+$gdb_prompt $" { 304 xfail "$testname (DejaGNU's pseudo-tty strips eighth bit)" 305 } 306 -re "$gdb_prompt $" { 307 fail $testname 308 } 309 timeout { 310 fail "$testname (timeout)" 311 } 312 } 313 } 314 } 315 } 316} 317 318 319# Set the host character set to plain ASCII, and try actually printing 320# some strings in various target character sets. We need to run the 321# test program to the point at which the strings have been 322# initialized. 323gdb_test "break ${srcfile}:[gdb_get_line_number "all strings initialized"]" \ 324 ".*Breakpoint.* at .*" \ 325 "set breakpoint after all strings have been initialized" 326gdb_run_cmd 327gdb_test "" "Breakpoint.*all strings initialized.*" "run until all strings have been initialized" 328 329# We only try the wide character tests on machines where the wchar_t 330# typedef in the test case has the right size. 331set wchar_size [get_sizeof wchar_t 99] 332set wchar_ok 0 333if {$wchar_size == 2} { 334 lappend charset_subset UTF-16 335 set wchar_ok 1 336} elseif {$wchar_size == 4} { 337 lappend charset_subset UTF-32 338 set wchar_ok 1 339} 340 341gdb_test_no_output "set host-charset ASCII" 342foreach target_charset $charset_subset { 343 if {![valid_target_charset $target_charset]} { 344 continue 345 } 346 347 if {$target_charset == "UTF-32" || $target_charset == "UTF-16"} { 348 set param target-wide-charset 349 set L L 350 } else { 351 set param target-charset 352 set L "" 353 } 354 gdb_test_no_output "set $param $target_charset" 355 356 # Try printing the null character. There seems to be a bug in 357 # gdb_test that requires us to use gdb_expect here. 358 send_gdb "print $L'\\0'\n" 359 gdb_expect { 360 -re "\\\$${decimal} = 0 $L'\\\\000'\[\r\n\]+$gdb_prompt $" { 361 pass "print the null character in ${target_charset}" 362 } 363 -re "$gdb_prompt $" { 364 fail "print the null character in ${target_charset}" 365 } 366 timeout { 367 fail "print the null character in ${target_charset} (timeout)" 368 } 369 } 370 371 # Compute the name of the variable in the test program that holds 372 # a string in $target_charset. The variable's name is the 373 # character set's name, in lower-case, with all non-identifier 374 # characters replaced with '_', with "_string" stuck on the end. 375 if {$target_charset == "UTF-16"} { 376 # We still use the utf_32_string variable -- but the size is 377 # correct for UTF-16. 378 set var_name utf_32_string 379 } else { 380 set var_name [string tolower "${target_charset}_string"] 381 regsub -all -- "\[^a-z0-9_\]" $var_name "_" var_name 382 } 383 384 # Compute a regexp matching the results we expect. This is static, 385 # but it's easier than writing it out. 386 regsub -all "." "abfnrtv" "(\\\\&|x)" escapes 387 set uppercase "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 388 set lowercase "abcdefghijklmnopqrstuvwxyz" 389 set digits "0123456789" 390 set octal_escape "\\\\\[0-9\]+" 391 392 send_gdb "print $var_name\n" 393 # ${escapes}${uppercase}${lowercase}${digits}${octal}${octal} 394 gdb_expect { 395 -re ".* = $L\"(\\\\a|x)(\\\\b|x)(\\\\f|x)(\\\\n|x)(\\\\r|x)(\\\\t|x)(\\\\v|x)${uppercase}${lowercase}${digits}(${octal_escape}|x)+\"\[\r\n\]+$gdb_prompt $" { 396 pass "print string in $target_charset" 397 } 398 -re "$gdb_prompt $" { 399 fail "print string in $target_charset" 400 } 401 timeout { 402 fail "print string in $target_charset (timeout)" 403 } 404 } 405 406 # Try entering a character literal, and see if it comes back unchanged. 407 gdb_test "print $L'A'" \ 408 " = \[0-9-\]+ $L'A'" \ 409 "parse character literal in ${target_charset}" 410 411 # Check that the character literal was encoded correctly. 412 gdb_test "print $L'A' == $var_name\[7\]" \ 413 " = 1" \ 414 "check value of parsed character literal in ${target_charset}" 415 416 # Try entering a string literal, and see if it comes back unchanged. 417 gdb_test "print $L\"abcdefABCDEF012345\"" \ 418 " = $L\"abcdefABCDEF012345\"" \ 419 "parse string literal in ${target_charset}" 420 421 # Check that the string literal was encoded correctly. 422 gdb_test "print $L\"q\"\[0\] == $var_name\[49\]" \ 423 " = 1" \ 424 "check value of parsed string literal in ${target_charset}" 425 426 # Test handling of characters in the target charset which 427 # can't be translated into the host charset. 428 if {! [string compare $target_charset iso-8859-1]} { 429 gdb_test "print iso_8859_1_string\[69\]" \ 430 " = \[0-9-\]+ '\\\\242'" \ 431 "print character with no equivalent in host character set" 432 gdb_test "print iso_8859_1_string + 70" \ 433 " = ${hex} \"\\\\242.*\"" \ 434 "print string with no equivalent in host character set" 435 } 436 437 # Make sure that we don't apply the ISO-8859-1 `print_literally' 438 # function to ASCII. 439 if {! [string compare $target_charset ascii]} { 440 gdb_test "print iso_8859_1_string\[69\]" \ 441 " = \[0-9-\]+ '\\\\242'" \ 442 "print ASCII unprintable character" 443 gdb_test "print iso_8859_1_string + 70" \ 444 " = ${hex} \"\\\\242.*\"" \ 445 "print ASCII unprintable string" 446 } 447 448 # Try printing characters with backslash escape equivalents. 449 set escapees {a b f n r t v} 450 for {set i 0} {$i < [llength $escapees]} {incr i} { 451 set escape [lindex $escapees $i] 452 send_gdb "print $var_name\[$i\]\n" 453 set have_escape 1 454 gdb_expect { 455 -re "= \[0-9-\]+ $L'\\\\${escape}'\[\r\n\]+$gdb_prompt $" { 456 pass "try printing '\\${escape}' in ${target_charset}" 457 } 458 -re "= \[0-9-\]+ 'x'\[\r\n\]+$gdb_prompt $" { 459 xfail "try printing '\\${escape}' in ${target_charset} (no such escape)" 460 set have_escape 0 461 } 462 -re "$gdb_prompt $" { 463 fail "try printing '\\${escape}' in ${target_charset}" 464 } 465 timeout { 466 fail "try printing '\\${escape}' in ${target_charset} (timeout)" 467 } 468 } 469 470 if {$have_escape} { 471 472 # Try parsing a backslash escape in a character literal. 473 gdb_test "print $L'\\${escape}' == $var_name\[$i\]" \ 474 " = 1" \ 475 "check value of '\\${escape}' in ${target_charset}" 476 477 # Try parsing a backslash escape in a string literal. 478 gdb_test "print $L\"\\${escape}\"\[0\] == $var_name\[$i\]" \ 479 " = 1" \ 480 "check value of \"\\${escape}\" in ${target_charset}" 481 } 482 } 483 484 # Try printing a character escape that doesn't exist. We should 485 # get the unescaped character, in the target character set. 486 gdb_test "print $L'\\q'" " = \[0-9-\]+ $L'q'" \ 487 "print escape that doesn't exist in $target_charset" 488 gdb_test "print $L'\\q' == $var_name\[49\]" " = 1" \ 489 "check value of escape that doesn't exist in $target_charset" 490} 491 492# Reset the target charset. 493gdb_test_no_output "set target-charset UTF-8" 494 495# \242 is not a valid UTF-8 character. 496gdb_test "print \"\\242\"" " = \"\\\\242\"" \ 497 "non-representable target character" 498 499gdb_test "print '\\x'" "\\\\x escape without a following hex digit." 500gdb_test "print '\\u'" "\\\\u escape without a following hex digit." 501gdb_test "print '\\9'" " = \[0-9\]+ '9'" 502 503# An octal escape can only be 3 digits. 504gdb_test "print \"\\1011\"" " = \"A1\"" 505 506# Tests for wide- or unicode- strings. L is the prefix letter to use, 507# either "L" (for wide strings), "u" (for UTF-16), or "U" (for UTF-32). 508# NAME is used in the test names and should be related to the prefix 509# letter in some easy-to-undestand way. 510proc test_wide_or_unicode {L name} { 511 gdb_test "print $L\"ab\" $L\"c\"" " = $L\"abc\"" \ 512 "basic $name string concatenation" 513 gdb_test "print $L\"ab\" \"c\"" " = $L\"abc\"" \ 514 "narrow and $name string concatenation" 515 gdb_test "print \"ab\" $L\"c\"" " = $L\"abc\"" \ 516 "$name and narrow string concatenation" 517 gdb_test "print $L\"\\xe\" $L\"c\"" " = $L\"\\\\016c\"" \ 518 "$name string concatenation with escape" 519 gdb_test "print $L\"\" \"abcdef\" \"g\"" \ 520 "$L\"abcdefg\"" \ 521 "concatenate three strings with empty $name string" 522 523 gdb_test "print $L'a'" "= \[0-9\]+ $L'a'" \ 524 "basic $name character" 525} 526 527if {$wchar_ok} { 528 test_wide_or_unicode L wide 529} 530 531set ucs2_ok [expr {[get_sizeof char16_t 99] == 2}] 532 533if ![valid_host_charset "UTF-16"] { 534 verbose -log "Disabling UTF-16 tests." 535 set ucs2_ok 0 536} 537 538if {$ucs2_ok} { 539 test_wide_or_unicode u UTF-16 540} 541 542set ucs4_ok [expr {[get_sizeof char32_t 99] == 4}] 543if {$ucs4_ok} { 544 test_wide_or_unicode U UTF-32 545} 546 547# Test an invalid string combination. 548proc test_combination {L1 name1 L2 name2} { 549 gdb_test "print $L1\"abc\" $L2\"def\"" \ 550 "Undefined string concatenation." \ 551 "undefined concatenation of $name1 and $name2" 552} 553 554if {$wchar_ok && $ucs2_ok} { 555 test_combination L wide u UTF-16 556} 557if {$wchar_ok && $ucs4_ok} { 558 test_combination L wide U UTF-32 559 # Regression test for a typedef to a typedef. 560 gdb_test "print myvar" "= \[0-9\]+ L'A'" \ 561 "typedef to wchar_t" 562} 563if {$ucs2_ok && $ucs4_ok} { 564 test_combination u UTF-16 U UTF-32 565} 566 567if {$ucs2_ok} { 568 set go 1 569 gdb_test_multiple "python print ('hello, world!')" \ 570 "verify python support for charset tests" { 571 -re "not supported.*$gdb_prompt $" { 572 unsupported "python support is disabled" 573 set go 0 574 } 575 -re "$gdb_prompt $" {} 576 } 577 578 if {$go} { 579 gdb_test "print u\"abcdef\"" " = u\"abcdef\"" \ 580 "set up for python printing of utf-16 string" 581 582 gdb_test "python print (gdb.history(0).string())" "abcdef" \ 583 "extract utf-16 string using python" 584 } 585} 586 587# Regression test for a cleanup bug in the charset code. 588gdb_test "print 'a' == 'a' || 'b' == 'b'" \ 589 ".* = 1" \ 590 "EVAL_SKIP cleanup handling regression test" 591 592 593proc string_display { var_name set_prefix x_size x_type} { 594 with_test_prefix "set_prefix=$set_prefix" { 595 gdb_test_no_output "set ${var_name} = ${set_prefix}\"Test String\\0with zeroes\""\ 596 "assign ${var_name} with prefix ${set_prefix}" 597 gdb_test "x /2${x_size}s ${var_name}" ".*\t${x_type}\"Test String\"\[\r\n\]+.*\t${x_type}\"with zeroes\"" \ 598 "display String ${var_name} with x/${x_size}s" 599 } 600} 601 602if {$ucs2_ok} { 603 string_display String16 u h u 604 if {$wchar_size == 2} { 605 string_display String16 L h u 606 } 607} 608 609string_display String32 U w U 610if {$wchar_size == 4} { 611 string_display String32 L w U 612} 613 614 615foreach name {short int long} { 616 # We're really just checking to make sure this doesn't give an 617 # error. 618 gdb_test "print ${name}_array = \"hi\"" \ 619 " = {.*}" \ 620 "assign string to $name array" 621} 622 623 624gdb_exit 625