1# base64.tcl --
2#
3# Encode/Decode base64 for a string
4# Stephen Uhler / Brent Welch (c) 1997 Sun Microsystems
5# The decoder was done for exmh by Chris Garrigues
6#
7# Copyright (c) 1998-2000 by Ajuba Solutions.
8# See the file "license.terms" for information on usage and redistribution
9# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
10#
11# RCS: @(#) $Id: base64.tcl,v 1.32 2010/07/06 19:15:40 andreas_kupries Exp $
12
13# Version 1.0   implemented Base64_Encode, Base64_Decode
14# Version 2.0   uses the base64 namespace
15# Version 2.1   fixes various decode bugs and adds options to encode
16# Version 2.2   is much faster, Tcl8.0 compatible
17# Version 2.2.1 bugfixes
18# Version 2.2.2 bugfixes
19# Version 2.3   bugfixes and extended to support Trf
20
21# @mdgen EXCLUDE: base64c.tcl
22
23package require Tcl 8.2
24namespace eval ::base64 {
25    namespace export encode decode
26}
27
28if {![catch {package require Trf 2.0}]} {
29    # Trf is available, so implement the functionality provided here
30    # in terms of calls to Trf for speed.
31
32    # ::base64::encode --
33    #
34    #	Base64 encode a given string.
35    #
36    # Arguments:
37    #	args	?-maxlen maxlen? ?-wrapchar wrapchar? string
38    #
39    #		If maxlen is 0, the output is not wrapped.
40    #
41    # Results:
42    #	A Base64 encoded version of $string, wrapped at $maxlen characters
43    #	by $wrapchar.
44
45    proc ::base64::encode {args} {
46	# Set the default wrapchar and maximum line length to match
47	# the settings for MIME encoding (RFC 3548, RFC 2045). These
48	# are the settings used by Trf as well. Various RFCs allow for
49	# different wrapping characters and wraplengths, so these may
50	# be overridden by command line options.
51	set wrapchar "\n"
52	set maxlen 76
53
54	if { [llength $args] == 0 } {
55	    error "wrong # args: should be \"[lindex [info level 0] 0]\
56		    ?-maxlen maxlen? ?-wrapchar wrapchar? string\""
57	}
58
59	set optionStrings [list "-maxlen" "-wrapchar"]
60	for {set i 0} {$i < [llength $args] - 1} {incr i} {
61	    set arg [lindex $args $i]
62	    set index [lsearch -glob $optionStrings "${arg}*"]
63	    if { $index == -1 } {
64		error "unknown option \"$arg\": must be -maxlen or -wrapchar"
65	    }
66	    incr i
67	    if { $i >= [llength $args] - 1 } {
68		error "value for \"$arg\" missing"
69	    }
70	    set val [lindex $args $i]
71
72	    # The name of the variable to assign the value to is extracted
73	    # from the list of known options, all of which have an
74	    # associated variable of the same name as the option without
75	    # a leading "-". The [string range] command is used to strip
76	    # of the leading "-" from the name of the option.
77	    #
78	    # FRINK: nocheck
79	    set [string range [lindex $optionStrings $index] 1 end] $val
80	}
81
82	# [string is] requires Tcl8.2; this works with 8.0 too
83	if {[catch {expr {$maxlen % 2}}]} {
84	    return -code error "expected integer but got \"$maxlen\""
85	} elseif {$maxlen < 0} {
86	    return -code error "expected positive integer but got \"$maxlen\""
87	}
88
89	set string [lindex $args end]
90	set result [::base64 -mode encode -- $string]
91
92	# Trf's encoder implicitly uses the settings -maxlen 76,
93	# -wrapchar \n for its output. We may have to reflow this for
94	# the settings chosen by the user. A second difference is that
95	# Trf closes the output with the wrap char sequence,
96	# always. The code here doesn't. Therefore 'trimright' is
97	# needed in the fast cases.
98
99	if {($maxlen == 76) && [string equal $wrapchar \n]} {
100	    # Both maxlen and wrapchar are identical to Trf's
101	    # settings. This is the super-fast case, because nearly
102	    # nothing has to be done. Only thing to do is strip a
103	    # terminating wrapchar.
104	    set result [string trimright $result]
105	} elseif {$maxlen == 76} {
106	    # wrapchar has to be different here, length is the
107	    # same. We can use 'string map' to transform the wrap
108	    # information.
109	    set result [string map [list \n $wrapchar] \
110			    [string trimright $result]]
111	} elseif {$maxlen == 0} {
112	    # Have to reflow the output to no wrapping. Another fast
113	    # case using only 'string map'. 'trimright' is not needed
114	    # here.
115
116	    set result [string map [list \n ""] $result]
117	} else {
118	    # Have to reflow the output from 76 to the chosen maxlen,
119	    # and possibly change the wrap sequence as well.
120
121	    # Note: After getting rid of the old wrap sequence we
122	    # extract the relevant segments from the string without
123	    # modifying the string. Modification, i.e. removal of the
124	    # processed part, means 'shifting down characters in
125	    # memory', making the algorithm O(n^2). By avoiding the
126	    # modification we stay in O(n).
127
128	    set result [string map [list \n ""] $result]
129	    set l [expr {[string length $result]-$maxlen}]
130	    for {set off 0} {$off < $l} {incr off $maxlen} {
131		append res [string range $result $off [expr {$off+$maxlen-1}]] $wrapchar
132	    }
133	    append res [string range $result $off end]
134	    set result $res
135	}
136
137	return $result
138    }
139
140    # ::base64::decode --
141    #
142    #	Base64 decode a given string.
143    #
144    # Arguments:
145    #	string	The string to decode.  Characters not in the base64
146    #		alphabet are ignored (e.g., newlines)
147    #
148    # Results:
149    #	The decoded value.
150
151    proc ::base64::decode {string} {
152	regsub -all {\s} $string {} string
153	::base64 -mode decode -- $string
154    }
155
156} else {
157    # Without Trf use a pure tcl implementation
158
159    namespace eval base64 {
160	variable base64 {}
161	variable base64_en {}
162
163	# We create the auxiliary array base64_tmp, it will be unset later.
164	variable base64_tmp
165	variable i
166
167	set i 0
168	foreach char {A B C D E F G H I J K L M N O P Q R S T U V W X Y Z \
169		a b c d e f g h i j k l m n o p q r s t u v w x y z \
170		0 1 2 3 4 5 6 7 8 9 + /} {
171	    set base64_tmp($char) $i
172	    lappend base64_en $char
173	    incr i
174	}
175
176	#
177	# Create base64 as list: to code for instance C<->3, specify
178	# that [lindex $base64 67] be 3 (C is 67 in ascii); non-coded
179	# ascii chars get a {}. we later use the fact that lindex on a
180	# non-existing index returns {}, and that [expr {} < 0] is true
181	#
182
183	# the last ascii char is 'z'
184	variable char
185	variable len
186	variable val
187
188	scan z %c len
189	for {set i 0} {$i <= $len} {incr i} {
190	    set char [format %c $i]
191	    set val {}
192	    if {[info exists base64_tmp($char)]} {
193		set val $base64_tmp($char)
194	    } else {
195		set val {}
196	    }
197	    lappend base64 $val
198	}
199
200	# code the character "=" as -1; used to signal end of message
201	scan = %c i
202	set base64 [lreplace $base64 $i $i -1]
203
204	# remove unneeded variables
205	unset base64_tmp i char len val
206
207	namespace export encode decode
208    }
209
210    # ::base64::encode --
211    #
212    #	Base64 encode a given string.
213    #
214    # Arguments:
215    #	args	?-maxlen maxlen? ?-wrapchar wrapchar? string
216    #
217    #		If maxlen is 0, the output is not wrapped.
218    #
219    # Results:
220    #	A Base64 encoded version of $string, wrapped at $maxlen characters
221    #	by $wrapchar.
222
223    proc ::base64::encode {args} {
224	set base64_en $::base64::base64_en
225
226	# Set the default wrapchar and maximum line length to match
227	# the settings for MIME encoding (RFC 3548, RFC 2045). These
228	# are the settings used by Trf as well. Various RFCs allow for
229	# different wrapping characters and wraplengths, so these may
230	# be overridden by command line options.
231	set wrapchar "\n"
232	set maxlen 76
233
234	if { [llength $args] == 0 } {
235	    error "wrong # args: should be \"[lindex [info level 0] 0]\
236		    ?-maxlen maxlen? ?-wrapchar wrapchar? string\""
237	}
238
239	set optionStrings [list "-maxlen" "-wrapchar"]
240	for {set i 0} {$i < [llength $args] - 1} {incr i} {
241	    set arg [lindex $args $i]
242	    set index [lsearch -glob $optionStrings "${arg}*"]
243	    if { $index == -1 } {
244		error "unknown option \"$arg\": must be -maxlen or -wrapchar"
245	    }
246	    incr i
247	    if { $i >= [llength $args] - 1 } {
248		error "value for \"$arg\" missing"
249	    }
250	    set val [lindex $args $i]
251
252	    # The name of the variable to assign the value to is extracted
253	    # from the list of known options, all of which have an
254	    # associated variable of the same name as the option without
255	    # a leading "-". The [string range] command is used to strip
256	    # of the leading "-" from the name of the option.
257	    #
258	    # FRINK: nocheck
259	    set [string range [lindex $optionStrings $index] 1 end] $val
260	}
261
262	# [string is] requires Tcl8.2; this works with 8.0 too
263	if {[catch {expr {$maxlen % 2}}]} {
264	    return -code error "expected integer but got \"$maxlen\""
265	} elseif {$maxlen < 0} {
266	    return -code error "expected positive integer but got \"$maxlen\""
267	}
268
269	set string [lindex $args end]
270
271	set result {}
272	set state 0
273	set length 0
274
275
276	# Process the input bytes 3-by-3
277
278	binary scan $string c* X
279
280	foreach {x y z} $X {
281	    ADD [lindex $base64_en [expr {($x >>2) & 0x3F}]]
282	    if {$y != {}} {
283		ADD [lindex $base64_en [expr {(($x << 4) & 0x30) | (($y >> 4) & 0xF)}]]
284		if {$z != {}} {
285		    ADD [lindex $base64_en [expr {(($y << 2) & 0x3C) | (($z >> 6) & 0x3)}]]
286		    ADD [lindex $base64_en [expr {($z & 0x3F)}]]
287		} else {
288		    set state 2
289		    break
290		}
291	    } else {
292		set state 1
293		break
294	    }
295	}
296	if {$state == 1} {
297	    ADD [lindex $base64_en [expr {(($x << 4) & 0x30)}]]
298	    ADD =
299	    ADD =
300	} elseif {$state == 2} {
301	    ADD [lindex $base64_en [expr {(($y << 2) & 0x3C)}]]
302	    ADD =
303	}
304	return $result
305    }
306
307    proc ::base64::ADD {x} {
308	# The line length check is always done before appending so
309	# that we don't get an extra newline if the output is a
310	# multiple of $maxlen chars long.
311
312	upvar 1 maxlen maxlen length length result result wrapchar wrapchar
313	if {$maxlen && $length >= $maxlen} {
314	    append result $wrapchar
315	    set length 0
316	}
317	append result $x
318	incr length
319	return
320    }
321
322    # ::base64::decode --
323    #
324    #	Base64 decode a given string.
325    #
326    # Arguments:
327    #	string	The string to decode.  Characters not in the base64
328    #		alphabet are ignored (e.g., newlines)
329    #
330    # Results:
331    #	The decoded value.
332
333    proc ::base64::decode {string} {
334	if {[string length $string] == 0} {return ""}
335
336	set base64 $::base64::base64
337	set output "" ; # Fix for [Bug 821126]
338
339	binary scan $string c* X
340	foreach x $X {
341	    set bits [lindex $base64 $x]
342	    if {$bits >= 0} {
343		if {[llength [lappend nums $bits]] == 4} {
344		    foreach {v w z y} $nums break
345		    set a [expr {($v << 2) | ($w >> 4)}]
346		    set b [expr {(($w & 0xF) << 4) | ($z >> 2)}]
347		    set c [expr {(($z & 0x3) << 6) | $y}]
348		    append output [binary format ccc $a $b $c]
349		    set nums {}
350		}
351	    } elseif {$bits == -1} {
352		# = indicates end of data.  Output whatever chars are left.
353		# The encoding algorithm dictates that we can only have 1 or 2
354		# padding characters.  If x=={}, we must (*) have 12 bits of input
355		# (enough for 1 8-bit output).  If x!={}, we have 18 bits of
356		# input (enough for 2 8-bit outputs).
357		#
358		# (*) If we don't then the input is broken (bug 2976290).
359
360		foreach {v w z} $nums break
361
362		# Bug 2976290
363		if {$w == {}} {
364		    return -code error "Not enough data to process padding"
365		}
366
367		set a [expr {($v << 2) | (($w & 0x30) >> 4)}]
368		if {$z == {}} {
369		    append output [binary format c $a ]
370		} else {
371		    set b [expr {(($w & 0xF) << 4) | (($z & 0x3C) >> 2)}]
372		    append output [binary format cc $a $b]
373		}
374		break
375	    } else {
376		# RFC 2045 says that line breaks and other characters not part
377		# of the Base64 alphabet must be ignored, and that the decoder
378		# can optionally emit a warning or reject the message.  We opt
379		# not to do so, but to just ignore the character.
380		continue
381	    }
382	}
383	return $output
384    }
385}
386
387package provide base64 2.4.2
388