1(*
2    Title:      Standard Basis Library: Word and LargeWord Structure
3    Copyright   David Matthews 1999, 2005, 2012, 2016
4
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License version 2.1 as published by the Free Software Foundation.
8    
9    This library is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Lesser General Public License for more details.
13    
14    You should have received a copy of the GNU Lesser General Public
15    License along with this library; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17*)
18
19(*
20This file contains definitions of both LargeWord and Word.  SysWord is
21defined to be LargeWord.
22The only purpose of LargeWord is so that it can be used, as SysWord, to
23hold the full machine word values for certain operating-system calls.
24*)
25
26(* This uses the global definition of type "word" made in the compiler.
27   That type has special status as the default for literals of the form
28   0wn in the absence of any other type information. *)
29local
30    type largeword = LargeWord.word
31    and shortword = Word.word
32
33    (* Extract a word value from a character stream. *)
34    (* There's a complication here which is similar to that with 0x for
35       Int.scan.  A word value may, optionally, be preceded by 0w or
36       for hex values 0wx, 0wX, 0x or 0X.  Since this is optional it is
37       possible for the value after the 0w to be anything, not just a
38       valid number, in which case the result is the 0 and the continuation
39       is w... *)
40    fun scanWord radix getc src =
41        let
42        (* Some of this code duplicates code in Int.scan.  It would
43           be better to avoid that if we could. The difficulty is that
44           Int.scan allows the number to begin with a sign and also
45           another 0x for hex values. *)
46        val base: LargeInt.int =
47            case radix of
48                StringCvt.BIN => 2
49              | StringCvt.OCT => 8
50              | StringCvt.DEC => 10
51              | StringCvt.HEX => 16
52        
53        (* Read the digits, accumulating the result in acc.  isOk is true
54           once we have read a valid digit. *)
55        fun read_digits src acc isOk =
56            case getc src of
57                NONE => if isOk then SOME(acc, src) else NONE
58              | SOME(ch, src') =>
59                if Char.ord ch >= Char.ord #"0"
60                   andalso Char.ord ch < (Char.ord #"0" + LargeInt.toInt base)
61                then read_digits src'
62                        (acc*base + LargeInt.fromInt(Char.ord ch - Char.ord #"0")) true
63                else (* Invalid character - either end of number or bad no. *)
64                    if isOk then SOME(acc, src) else NONE
65
66        fun read_hex_digits src acc isOk =
67            case getc src of
68                NONE => if isOk then SOME(acc, src) else NONE
69              | SOME(ch, src') =>
70                if Char.ord ch >= Char.ord #"0"
71                   andalso Char.ord ch <= Char.ord #"9"
72                then read_hex_digits src'
73                        (acc*16 + LargeInt.fromInt(Char.ord ch - Char.ord #"0")) true
74                else if Char.ord ch >= Char.ord #"A"
75                   andalso Char.ord ch <= Char.ord #"F"
76                then read_hex_digits src'
77                        (acc*16 + LargeInt.fromInt(Char.ord ch - Char.ord #"A" + 10)) true
78                else if Char.ord ch >= Char.ord #"a"
79                   andalso Char.ord ch <= Char.ord #"f"
80                then read_hex_digits src'
81                        (acc*16 + LargeInt.fromInt(Char.ord ch - Char.ord #"a" + 10)) true
82                else (* Invalid character - either end of number or bad no. *)
83                    if isOk then SOME(acc, src) else NONE
84
85        fun read_number src =
86            case radix of
87                StringCvt.HEX => read_hex_digits src 0 false
88              | _ => (* Binary, octal and decimal *) read_digits src 0 false
89        in
90        case getc src of
91            NONE => NONE
92         |  SOME(#"0", src') =>
93            let (* May be the start of the number or may be 0w, 0x etc. *)
94                val after0 = 
95                    case getc src' of
96                        NONE => NONE
97                      | SOME(ch, src'') =>
98                        if ch = #"w"
99                        then if radix = StringCvt.HEX
100                        then (* Is it 0wx, 0wX ? *)
101                            (
102                            case getc src'' of
103                                NONE => NONE
104                              | SOME(ch, src''') =>
105                                if ch = #"x" orelse ch = #"X"
106                                then read_number src''' (* Skip the 0wx *)
107                                else read_number src'' (* Skip the 0w *)
108                            )
109                        else read_number src'' (* Skip the 0w *)
110                        else if (ch = #"x" orelse ch = #"X") andalso radix = StringCvt.HEX
111                        then read_number src''
112                        else read_number src (* Include the 0 in the input *)
113            in
114                (* If the string *)
115                case after0 of
116                    NONE => (* No valid number after it, return the zero .*)
117                        SOME(0, src')
118                  | res => res
119            end
120
121         |  SOME(ch, src') =>
122                if Char.isSpace ch (* Skip white space. *)
123                then scanWord radix getc src' (* Recurse *)
124                else (* See if it's a valid digit. *)
125                    read_number src
126        end (* scanWord *)
127
128    (* Conversion from arbitrary precision integer may involve extracting the low-order word
129       from a long-integer representation.  *)
130    local
131        val getLowOrderWord: LargeInt.int -> LargeWord.word =
132            RunCall.rtsCallFull1 "PolyGetLowOrderAsLargeWord"
133        val isShortInt: LargeInt.int -> bool = RunCall.isShort
134    in
135        fun wordFromLargeInt (i: LargeInt.int): word =
136            if isShortInt i
137            then RunCall.unsafeCast i
138            else Word.fromLargeWord(getLowOrderWord i)
139            
140        and largeWordFromLargeInt (i: LargeInt.int): LargeWord.word =
141            if isShortInt i
142            then Word.toLargeX(RunCall.unsafeCast i)
143            else getLowOrderWord i
144    end
145
146    (* We have to use the full conversion if int is arbitrary precision.  If int is
147       fixed precision this will be optimised away. *)
148    fun wordFromInt(i: int): word =
149        if Bootstrap.intIsArbitraryPrecision
150        then wordFromLargeInt(LargeInt.fromInt i)
151        else RunCall.unsafeCast i
152
153    (* The maximum word is the largest tagged value.  The maximum large-word is
154       the largest value that will fit in a machine word. *)
155    local
156        fun power2' n 0 : LargeInt.int = n
157         |  power2' n i = power2' (2*n) (i-1)
158        val power2 = power2' 1
159        val bitsInWord: int = (RunCall.unsafeCast LibrarySupport.wordSize) * 8
160    in
161        val wordSize = bitsInWord - 1 (* 31 or 63 bits *)
162        val maxWordP1: LargeInt.int = power2 wordSize (* One more than the maximum word *)
163        val maxWord: LargeInt.int = maxWordP1 - 1
164        val largeWordSize = bitsInWord
165        val maxLargeWord = power2 largeWordSize - 1
166        val largeWordTopBit: LargeInt.int = maxWordP1 (* The top bit of a large word *)
167        val maxWordAsWord = wordFromLargeInt maxWord
168    end
169
170in
171    structure Word :> WORD where type word = shortword =
172    struct
173        
174        (* Word.word is represented using the short (tagged) integer format.
175           It is, though, unsigned so large word values are represented in the
176           same form as negative integers.  *)
177        type word = word
178        val fromInt = wordFromInt
179        and wordSize = wordSize
180        and fromLargeInt = wordFromLargeInt
181
182        (* Conversion to signed integer is simple. *)
183        val toIntX: word->int = RunCall.unsafeCast
184        and toLargeIntX: word -> LargeInt.int = RunCall.unsafeCast
185        
186        (* Conversion to unsigned integer has to treat values with the sign bit
187           set specially. *)
188        fun toLargeInt x =
189            let
190                val signed = toLargeIntX x
191            in
192                if signed < 0 then maxWordP1 + signed else signed
193            end
194
195        fun toInt x = LargeInt.toInt(toLargeInt x)
196
197        fun scan radix getc src =
198            case scanWord radix getc src of
199                NONE => NONE
200            |   SOME(res, src') =>
201                    if res > maxWord then raise General.Overflow
202                    else SOME(fromLargeInt res, src')
203
204        (* TODO: Implement this directly? *)
205        val fromString = StringCvt.scanString (scan StringCvt.HEX)
206
207        infix >> << ~>>
208        
209        (* We can format the result using the large integer format function. *)
210        fun fmt radix i = LargeInt.fmt radix (toLargeInt i)
211        val toString = fmt StringCvt.HEX
212    
213        fun compare (i, j) =
214            if i < j then General.LESS
215            else if i > j then General.GREATER else General.EQUAL
216        
217        fun min (i, j) = if i < j then i else j
218        and max (i, j) = if i > j then i else j
219        
220        open Word (* Include all the initial definitions. *)
221
222        fun notb x = xorb(maxWordAsWord, x)
223
224    end (* Word *)
225
226    (* LargeWord.word values have one more bit of precision than Word,word values and
227       are always "boxed" i.e. held in a one word piece of memory with the "byte" bit set. *)
228    structure LargeWord:> WORD where type word = largeword =
229    struct
230        open LargeWord (* Add in the built-ins. *)
231        type word = largeword
232        val wordSize = largeWordSize
233
234        (* As this is LargeWord we don't need to do anything here. *)
235        fun toLargeWord x = x
236        and toLargeWordX x = x
237        and fromLargeWord x = x
238        val toLarge = toLargeWord and toLargeX = toLargeWordX and fromLarge = fromLargeWord
239        val fromLargeInt = largeWordFromLargeInt
240
241        local
242            val shortToWord: LargeInt.int -> largeword = Word.toLargeWordX o RunCall.unsafeCast
243            val longToInt: largeword -> LargeInt.int = RunCall.unsafeCast o Word.fromLargeWord
244            val zero: largeword = shortToWord 0
245
246            infix << orb andb
247
248            local
249                open Int
250            in
251                val topBitAsLargeWord: largeword =
252                    (* The top bit *) shortToWord 1 << Word.fromInt(largeWordSize - 1)
253            end
254
255            fun topBitClear (x: largeword) : bool = (x andb topBitAsLargeWord) = zero 
256        in
257
258            fun toLargeInt x =
259            let
260                val asInt: LargeInt.int = longToInt x
261                open LargeInt (* <, + and - are all LargeInt ops. *)
262            in
263                (if asInt < 0 then maxWordP1 + asInt else asInt) +
264                (if topBitClear x then 0 else largeWordTopBit)
265            end
266            and toLargeIntX x =
267            let
268                val asInt: LargeInt.int = longToInt x
269                open LargeInt
270            in
271                (if asInt < 0 then maxWordP1 + asInt else asInt) -
272                (if topBitClear x then 0 else largeWordTopBit)
273            end
274           
275            val zero = zero
276            val maxLargeWordAsLargeWord = fromLargeInt maxLargeWord
277        end
278
279        fun ~ x = zero - x
280        fun notb x = xorb(maxLargeWordAsLargeWord, x)
281
282        (* If int is fixed precision an int is the same size as a word and will always fit within a
283           large-word value. *)
284        fun fromInt(i: int): word =
285            if Bootstrap.intIsArbitraryPrecision
286            then fromLargeInt(LargeInt.fromInt i)
287            else Word.toLargeWord(Word.fromInt i)
288
289        and toInt(w: word): int =
290            if Bootstrap.intIsArbitraryPrecision
291            then LargeInt.toInt(toLargeInt w)
292            else Word.toInt(Word.fromLargeWord w)
293            
294        and toIntX(w: word): int =
295            if Bootstrap.intIsArbitraryPrecision
296            then LargeInt.toInt(toLargeIntX w)
297            else Word.toIntX(Word.fromLargeWord w)
298
299        fun scan radix getc src =
300            case scanWord radix getc src of
301                NONE => NONE
302            |   SOME(res, src') =>
303                    if LargeInt.>(res, maxLargeWord) then raise General.Overflow
304                    else SOME(fromLargeInt res, src')
305
306        val fromString = StringCvt.scanString (scan StringCvt.HEX)
307
308        fun compare (i, j) =
309            if i < j then General.LESS
310            else if i > j then General.GREATER else General.EQUAL
311        
312        fun min (i, j) = if i < j then i else j
313        and max (i, j) = if i > j then i else j
314
315        (* We can format the result using the large integer format function.
316           Large unsigned values may be outside the short integer range. *)
317        fun fmt radix i = LargeInt.fmt radix (toLargeInt i)
318        val toString = fmt StringCvt.HEX
319    end;
320end;
321
322local
323    (* Install the pretty printer for Word.word *)
324    fun prettyWord _ _ x =
325        PolyML.PrettyString("0wx" ^ Word.toString x)
326    and prettyLarge _ _ x =
327        PolyML.PrettyString("0wx" ^ LargeWord.toString x)
328in
329    val () = PolyML.addPrettyPrinter prettyWord
330    val () = PolyML.addPrettyPrinter prettyLarge
331end;
332
333(* Converter to word values.  These must be installed outside the structure
334   because they depend on the type identifiers. *)
335local
336
337    (* The string may be either 0wnnn or 0wxXXX *)
338    fun getRadix s =
339        if String.size s > 2 andalso String.sub(s, 2) = #"x"
340        then StringCvt.HEX else StringCvt.DEC
341
342    fun convWord s =
343        let
344        val radix = getRadix s
345        in
346            case StringCvt.scanString (Word.scan radix) s of
347                NONE => raise RunCall.Conversion "Invalid word constant"
348              | SOME res => res
349        end
350    and convLarge s =
351        let
352        val radix = getRadix s
353        in
354            case StringCvt.scanString (LargeWord.scan radix) s of
355                NONE => raise RunCall.Conversion "Invalid word constant"
356              | SOME res => res
357        end
358
359in
360    (* Install this as a conversion function for word literals.
361       Unlike other overloaded functions there's no need to
362       ensure that overloaded conversion functions are installed
363       at the top-level.  The compiler has type "word" built in
364       and will use this conversion function for literals of the
365       form 0w... in preference to any other (e.g. for Word8.word)
366       if unification does not give an explicit type.
367       However, because LargeWord.word is abstract we have to
368       install the convertor outside the structure. *)
369    val () = RunCall.addOverload convWord "convWord"
370    val () = RunCall.addOverload convLarge "convWord"
371end;
372
373structure SysWord = LargeWord;
374
375(* Add the overloaded operators.  Do this outside the structure so
376   that we can capture the inline code.  We've already done this for
377   word (=Word.word) in the prelude. *)
378
379val () = RunCall.addOverload LargeWord.~ "~";
380val () = RunCall.addOverload LargeWord.+ "+";
381val () = RunCall.addOverload LargeWord.- "-";
382val () = RunCall.addOverload LargeWord.* "*";
383val () = RunCall.addOverload LargeWord.div "div";
384val () = RunCall.addOverload LargeWord.mod "mod";
385val () = RunCall.addOverload LargeWord.< "<";
386val () = RunCall.addOverload LargeWord.> ">";
387val () = RunCall.addOverload LargeWord.<= "<=";
388val () = RunCall.addOverload LargeWord.>= ">=";
389
390
391
392