1# -*- tcl -*-
2#
3# Copyright (c) 2009 by Andreas Kupries <andreas_kupries@users.sourceforge.net>
4# Operations with characters: (Un)quoting.
5
6# ### ### ### ######### ######### #########
7## Requisites
8
9package require Tcl 8.5
10
11namespace eval char {
12    namespace export unquote quote
13    namespace ensemble create
14    namespace eval quote {
15	namespace export tcl string comment cstring
16	namespace ensemble create
17    }
18}
19
20# ### ### ### ######### ######### #########
21## API
22
23proc ::char::unquote {args} {
24    if {1 == [llength $args]} { return [Unquote {*}$args] }
25    set res {}
26    foreach ch $args { lappend res [Unquote $ch] }
27    return $res
28}
29
30proc ::char::Unquote {ch} {
31
32    # A character, stored in quoted form is transformed back into a
33    # proper Tcl character (i.e. the internal representation).
34
35    switch -exact -- $ch {
36	"\\n"  {return \n}
37	"\\t"  {return \t}
38	"\\r"  {return \r}
39	"\\["  {return \[}
40	"\\]"  {return \]}
41	"\\'"  {return '}
42	"\\\"" {return "\""}
43	"\\\\" {return \\}
44    }
45
46    if {[regexp {^\\([0-2][0-7][0-7])$} $ch -> ocode]} {
47	return [format %c $ocode]
48
49    } elseif {[regexp {^\\([0-7][0-7]?)$} $ch -> ocode]} {
50	return [format %c 0$ocode]
51
52    } elseif {[regexp {^\\u([[:xdigit:]][[:xdigit:]]?[[:xdigit:]]?[[:xdigit:]]?)$} $ch -> hcode]} {
53	return [format %c 0x$hcode]
54
55    }
56
57    return $ch
58}
59
60proc ::char::quote::tcl {args} {
61    if {1 == [llength $args]} { return [Tcl {*}$args] }
62    set res {}
63    foreach ch $args { lappend res [Tcl $ch] }
64    return $res
65}
66
67proc ::char::quote::Tcl {ch} {
68    # Converts a Tcl character (internal representation) into a string
69    # which is accepted by the Tcl parser, will regenerate the
70    # character in question and is 7bit ASCII.
71
72    # Special characters
73
74    switch -exact -- $ch {
75	"\n" {return "\\n"}
76	"\r" {return "\\r"}
77	"\t" {return "\\t"}
78	"\\" - "\;" -
79	" "  - "\"" -
80	"("  - ")"  -
81	"\{" - "\}" -
82	"\[" - "\]" {
83	    # Quote space and all the brackets as well, using octal,
84	    # for easy impure list-ness.
85
86	    scan $ch %c chcode
87	    return \\[format %o $chcode]
88	}
89    }
90
91    scan $ch %c chcode
92
93    # Control characters: Octal
94    if {[::string is control -strict $ch]} {
95	return \\[format %o $chcode]
96    }
97
98    # Beyond 7-bit ASCII: Unicode
99
100    if {$chcode > 127} {
101	return \\u[format %04x $chcode]
102    }
103
104    # Regular character: Is its own representation.
105
106    return $ch
107}
108
109proc ::char::quote::string {args} {
110    if {1 == [llength $args]} { return [String {*}$args] }
111    set res {}
112    foreach ch $args { lappend res [String $ch] }
113    return $res
114}
115
116proc ::char::quote::String {ch} {
117    # Converts a Tcl character (internal representation) into a string
118    # which is accepted by the Tcl parser and will generate a human
119    # readable representation of the character in question, one which
120    # when written to a channel (via puts) describes the character
121    # without using any unprintable characters. It may use backslash-
122    # quoting. High utf characters are quoted to avoid problems with
123    # the still prevalent ascii terminals. It is assumed that the
124    # string will be used in a ""-quoted environment.
125
126    # Special characters
127
128    switch -exact -- $ch {
129	" "  {return "<blank>"}
130	"\n" {return "\\\\n"}
131	"\r" {return "\\\\r"}
132	"\t" {return "\\\\t"}
133	"\"" - "\\" - "\;" -
134	"("  - ")"  -
135	"\{" - "\}" -
136	"\[" - "\]" {
137	    return \\$ch
138	}
139    }
140
141    scan $ch %c chcode
142
143    # Control characters: Octal
144    if {[::string is control -strict $ch]} {
145	return \\\\[format %o $chcode]
146    }
147
148    # Beyond 7-bit ASCII: Unicode
149
150    if {$chcode > 127} {
151	return \\\\u[format %04x $chcode]
152    }
153
154    # Regular character: Is its own representation.
155
156    return $ch
157}
158
159proc ::char::quote::cstring {args} {
160    if {1 == [llength $args]} { return [CString {*}$args] }
161    set res {}
162    foreach ch $args { lappend res [CString $ch] }
163    return $res
164}
165
166proc ::char::quote::CString {ch} {
167    # Converts a Tcl character (internal representation) into a string
168    # which is accepted by the Tcl parser and will generate a human
169    # readable representation of the character in question, one which
170    # when written to a channel (via puts) describes the character
171    # without using any unprintable characters. It may use backslash-
172    # quoting. High utf characters are quoted to avoid problems with
173    # the still prevalent ascii terminals. It is assumed that the
174    # string will be used in a ""-quoted environment.
175
176    # Special characters
177
178    switch -exact -- $ch {
179	"\n" {return "\\\\n"}
180	"\r" {return "\\\\r"}
181	"\t" {return "\\\\t"}
182	"\"" - "\\" {
183	    return \\$ch
184	}
185    }
186
187    scan $ch %c chcode
188
189    # Control characters: Octal
190    if {[::string is control -strict $ch]} {
191	return \\\\[format %o $chcode]
192    }
193
194    # Beyond 7-bit ASCII: Unicode
195
196    if {$chcode > 127} {
197	return \\\\u[format %04x $chcode]
198    }
199
200    # Regular character: Is its own representation.
201
202    return $ch
203}
204
205proc ::char::quote::comment {args} {
206    if {1 == [llength $args]} { return [Comment {*}$args] }
207    set res {}
208    foreach ch $args { lappend res [Comment $ch] }
209    return $res
210}
211
212proc ::char::quote::Comment {ch} {
213    # Converts a Tcl character (internal representation) into a string
214    # which is accepted by the Tcl parser when used within a Tcl
215    # comment.
216
217    # Special characters
218
219    switch -exact -- $ch {
220	" "  {return "<blank>"}
221	"\n" {return "\\n"}
222	"\r" {return "\\r"}
223	"\t" {return "\\t"}
224	"\"" -
225	"\{" - "\}" -
226	"("  - ")"  {
227	    return \\$ch
228	}
229    }
230
231    scan $ch %c chcode
232
233    # Control characters: Octal
234    if {[::string is control -strict $ch]} {
235	return \\[format %o $chcode]
236    }
237
238    # Beyond 7-bit ASCII: Unicode
239
240    if {$chcode > 127} {
241	return \\u[format %04x $chcode]
242    }
243
244    # Regular character: Is its own representation.
245
246    return $ch
247}
248
249# ### ### ### ######### ######### #########
250## Ready
251
252package provide char 1
253
254