1#!/bin/sh 2# The next line is executed by /bin/sh, but not tcl \ 3exec tclsh "$0" ${1+"$@"} 4 5# 6# uniClass.tcl -- 7# 8# Generates the character ranges and singletons that are used in 9# generic/regc_locale.c for translation of character classes. 10# This file must be generated using a tclsh that contains the 11# correct corresponding tclUniData.c file (generated by uniParse.tcl) 12# in order for the class ranges to match. 13# 14 15proc emitRange {first last} { 16 global ranges numranges chars numchars 17 18 if {$first < ($last-1)} { 19 append ranges [format "{0x%04x, 0x%04x}, " \ 20 $first $last] 21 if {[incr numranges] % 4 == 0} { 22 append ranges "\n " 23 } 24 } else { 25 append chars [format "0x%04x, " $first] 26 incr numchars 27 if {$numchars % 9 == 0} { 28 append chars "\n " 29 } 30 if {$first != $last} { 31 append chars [format "0x%04x, " $last] 32 incr numchars 33 if {$numchars % 9 == 0} { 34 append chars "\n " 35 } 36 } 37 } 38} 39 40proc genTable {type} { 41 global first last ranges numranges chars numchars 42 set first -2 43 set last -2 44 45 set ranges " " 46 set numranges 0 47 set chars " " 48 set numchars 0 49 50 for {set i 0} {$i <= 0xFFFF} {incr i} { 51 if {[string is $type [format %c $i]]} { 52 if {$i == ($last + 1)} { 53 set last $i 54 } else { 55 if {$first > 0} { 56 emitRange $first $last 57 } 58 set first $i 59 set last $i 60 } 61 } 62 } 63 emitRange $first $last 64 65 set ranges [string trimright $ranges "\t\n ,"] 66 set chars [string trimright $chars "\t\n ,"] 67 if {$ranges != ""} { 68 puts "static crange ${type}RangeTable\[\] = {\n$ranges\n};\n" 69 puts "#define NUM_[string toupper $type]_RANGE (sizeof(${type}RangeTable)/sizeof(crange))\n" 70 } else { 71 puts "/* no contiguous ranges of $type characters */\n" 72 } 73 if {$chars != ""} { 74 puts "static chr ${type}CharTable\[\] = {\n$chars\n};\n" 75 puts "#define NUM_[string toupper $type]_CHAR (sizeof(${type}CharTable)/sizeof(chr))\n" 76 } else { 77 puts "/* no singletons of $type characters */\n" 78 } 79} 80 81puts "/* 82 * Declarations of Unicode character ranges. This code 83 * is automatically generated by the tools/uniClass.tcl script 84 * and used in generic/regc_locale.c. Do not modify by hand. 85 */ 86" 87 88foreach {type desc} { 89 alpha "alphabetic characters" 90 digit "decimal digit characters" 91 punct "punctuation characters" 92 space "white space characters" 93 lower "lowercase characters" 94 upper "uppercase characters" 95 graph "unicode print characters excluding space" 96} { 97 puts "/* Unicode: $desc */\n" 98 genTable $type 99} 100 101puts "/* 102 * End of auto-generated Unicode character ranges declarations. 103 */" 104