1#!/usr/bin/tclsh 2 3# gen_unicode_test.tcl -- 4# 5# This program parses the RFC 3454 file and generates the 6# corresponding unicode.test file with unicode package tests. 7# The input to this program should be NormalizationTest.txt. 8# It can be downloaded from: 9# ftp://ftp.unicode.org/Public/UNIDATA/NormalizationTest.txt 10# Short test suite is generated by default. If you want to generate 11# all tests (more than 300000 test cases) add suffix 'full' as the 12# third argument. 13# 14# Usage: gen_unicode_test.tcl infile outdir ?full? 15# 16# RCS: @(#) $Id: gen_unicode_test.tcl,v 1.1 2008/01/29 02:18:10 patthoyts Exp $ 17 18package require struct::list 19 20set short_test_list [list \ 21 "LATIN CAPITAL LETTER D, COMBINING DOT ABOVE, COMBINING DOT BELOW" \ 22 "NO-BREAK SPACE" \ 23 "VULGAR FRACTION ONE HALF" \ 24 "ORIYA LETTER RRA" \ 25 "KANNADA VOWEL SIGN EE" \ 26 "TIBETAN LETTER GHA" \ 27 "MODIFIER LETTER CAPITAL A" \ 28 "GREEK SMALL LETTER EPSILON WITH PSILI AND OXIA" \ 29 "KANGXI RADICAL SPROUT" \ 30 "HIRAGANA LETTER DE" \ 31 "KATAKANA LETTER PA" \ 32 "HANGUL LETTER SIOS-PIEUP" \ 33 "HANGUL SYLLABLE GYANG" \ 34 "CJK COMPATIBILITY IDEOGRAPH-F98E" \ 35 "ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM" \ 36 "ARABIC LIGATURE AIN WITH JEEM ISOLATED FORM" \ 37 "FULLWIDTH DIGIT THREE" \ 38 "LATIN SMALL LETTER A, COMBINING CYRILLIC TITLO, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B" \ 39 "LATIN SMALL LETTER A, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, COMBINING RING OVERLAY, LATIN SMALL LETTER B" \ 40 "HANGUL SYLLABLE BYO, COMBINING TILDE OVERLAY, HANGUL JONGSEONG TIKEUT"] 41 42set fd [open [lindex $argv 0]] 43 44set all_tests {} 45set n 0 46while {[gets $fd line] >= 0} { 47 set line [string trim $line] 48 if {![regexp \ 49 {^([ [:xdigit:]]+);([ [:xdigit:]]+);([ [:xdigit:]]+);([ [:xdigit:]]+);([ [:xdigit:]]+);.*\) (.*)} \ 50 $line -> c(1) c(2) c(3) c(4) c(5) title]} continue 51 52 set q 1 53 foreach i {1 2 3 4 5} { 54 set s($i) {} 55 set us($i) "" 56 foreach xnum $c($i) { 57 set uc [scan $xnum %x] 58 if {$uc > 0xffff} { 59 set q 0 60 } 61 lappend s($i) $uc 62 append us($i) \\u$xnum 63 } 64 } 65 if {!$q} { 66 # Test case contains character which is greater than 0xFFFF and can't 67 # be represented in Tcl 68 continue 69 } 70 set test($n) [list $s(1) $s(2) $s(3) $s(4) $s(5) $title] 71 set test1($n) [list $us(1) $us(2) $us(3) $us(4) $us(5) $title] 72 if {[lsearch $short_test_list $title] >= 0} { 73 lappend all_tests $n 74 } 75 incr n 76} 77 78close $fd 79 80if {[string equal [lindex $argv 2] full]} { 81 set all_tests [struct::list iota $n] 82} 83 84set f [open [file join [lindex $argv 1] unicode.test] w] 85fconfigure $f -translation lf 86puts $f \ 87"# unicode.test 88# 89# Tests for the unicode package. This file is automatically generated by 90# the gen_unicode_test.tcl script. Do not modify this file by hands. 91# 92# RCS: @(#) \$Id\$ 93 94# ------------------------------------------------------------------------- 95 96source \[file join \\ 97 \[file dirname \[file dirname \[file join \[pwd\] \[info script\]\]\]\] \\ 98 devtools testutilities.tcl\] 99 100testsNeedTcl 8.3 101testsNeedTcltest 1.0 102 103testing { 104 useLocalFile unicode_data.tcl 105 useLocalFile unicode.tcl 106} 107 108# ------------------------------------------------------------------------- 109" 110 111set j 0 112foreach i $all_tests { 113 puts $f \ 114" 115test unicode-1.[incr j] {normalizeS D: [lindex $test1($i) 5]} { 116 unicode::normalizeS D \"[lindex $test1($i) 0]\" 117} \"[lindex $test1($i) 2]\" 118 119test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} { 120 unicode::normalize D [list [lindex $test($i) 1]] 121} {[lindex $test($i) 2]} 122 123test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} { 124 unicode::normalize D [list [lindex $test($i) 2]] 125} {[lindex $test($i) 2]} 126 127test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} { 128 unicode::normalize D [list [lindex $test($i) 3]] 129} {[lindex $test($i) 4]} 130 131test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} { 132 unicode::normalize D [list [lindex $test($i) 4]] 133} {[lindex $test($i) 4]} 134" 135} 136 137set j 0 138foreach i $all_tests { 139 puts $f \ 140" 141test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} { 142 unicode::normalize C [list [lindex $test($i) 0]] 143} {[lindex $test($i) 1]} 144 145test unicode-2.[incr j] {normalizeS C: [lindex $test1($i) 5]} { 146 unicode::normalizeS C \"[lindex $test1($i) 1]\" 147} \"[lindex $test1($i) 1]\" 148 149test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} { 150 unicode::normalize C [list [lindex $test($i) 2]] 151} {[lindex $test($i) 1]} 152 153test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} { 154 unicode::normalize C [list [lindex $test($i) 3]] 155} {[lindex $test($i) 3]} 156 157test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} { 158 unicode::normalize C [list [lindex $test($i) 4]] 159} {[lindex $test($i) 3]} 160" 161} 162 163set j 0 164foreach i $all_tests { 165 puts $f \ 166" 167test unicode-3.[incr j] {normalize KD: [lindex $test($i) 5]} { 168 unicode::normalize KD [list [lindex $test($i) 0]] 169} {[lindex $test($i) 4]} 170 171test unicode-3.[incr j] {normalize KD: [lindex $test($i) 5]} { 172 unicode::normalize KD [list [lindex $test($i) 1]] 173} {[lindex $test($i) 4]} 174 175test unicode-3.[incr j] {normalizeS KD: [lindex $test1($i) 5]} { 176 unicode::normalizeS KD \"[lindex $test1($i) 2]\" 177} \"[lindex $test1($i) 4]\" 178 179test unicode-3.[incr j] {normalize KD: [lindex $test($i) 5]} { 180 unicode::normalize KD [list [lindex $test($i) 3]] 181} {[lindex $test($i) 4]} 182 183test unicode-1.[incr j] {normalize KD: [lindex $test($i) 5]} { 184 unicode::normalize KD [list [lindex $test($i) 4]] 185} {[lindex $test($i) 4]} 186" 187} 188 189set j 0 190foreach i $all_tests { 191 puts $f \ 192" 193test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} { 194 unicode::normalize KC [list [lindex $test($i) 0]] 195} {[lindex $test($i) 3]} 196 197test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} { 198 unicode::normalize KC [list [lindex $test($i) 1]] 199} {[lindex $test($i) 3]} 200 201test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} { 202 unicode::normalize KC [list [lindex $test($i) 2]] 203} {[lindex $test($i) 3]} 204 205test unicode-4.[incr j] {normalizeS KC: [lindex $test1($i) 5]} { 206 unicode::normalizeS KC \"[lindex $test1($i) 3]\" 207} \"[lindex $test1($i) 3]\" 208 209test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} { 210 unicode::normalize KC [list [lindex $test($i) 4]] 211} {[lindex $test($i) 3]} 212" 213} 214 215puts $f \ 216" 217test unicode-5.1 {fromstring} { 218 unicode::fromstring \"\\u0403\\u0405\\u0406\\u041f\\u0034\" 219} {1027 1029 1030 1055 52} 220 221test unicode-5.2 {fromstring} { 222 unicode::fromstring \"\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\u0008\\u0009\\u000a\\u000b\\u000c\\u000d\" 223} {1 2 3 4 5 6 7 8 9 10 11 12 13} 224 225test unicode-6.1 {tostring} { 226 unicode::tostring {16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1} 227} \"\\u0010\\u000f\\u000e\\u000d\\u000c\\u000b\\u000a\\u0009\\u0008\\u0007\\u0006\\u0005\\u0004\\u0003\\u0002\\u0001\" 228 229test unicode-6.2 {tostring} { 230 unicode::tostring {12345 12346 12347 12348 12349 12350 12351} 231} \"\\u3039\\u303a\\u303b\\u303c\\u303d\\u303e\\u303f\" 232 233test unicode-7.1 {normalize bad form} { 234 catch {unicode::normalize S \"\"} result 235 set result 236} \"::unicode::normalize: Only D, C, KD and KC forms are allowed\" 237 238test unicode-8.1 {normalizeS bad form} { 239 catch {unicode::normalizeS S \"\"} result 240 set result 241} \"::unicode::normalizeS: Only D, C, KD and KC forms are allowed\" 242 243::tcltest::cleanupTests 244" 245 246close $f 247 248