1(*
2    Copyright (c) 2015-18, 2020 David C.J. Matthews
3    
4    Copyright (c) 2000
5        Cambridge University Technical Services Limited
6
7    This library is free software; you can redistribute it and/or
8    modify it under the terms of the GNU Lesser General Public
9    License version 2.1 as published by the Free Software Foundation.
10    
11    This library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15    
16    You should have received a copy of the GNU Lesser General Public
17    License along with this library; if not, write to the Free Software
18    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19*)
20
21functor INTCODECONS (
22structure DEBUG: DEBUG
23
24structure PRETTY: PRETTYSIG
25
26) : INTCODECONSSIG =
27
28struct
29    open CODE_ARRAY
30    open DEBUG
31    open Address
32    open Misc
33
34    infix 5 << <<+ <<- >> >>+ >>- ~>> ~>>+ ~>>- (* Shift operators *)
35    infix 3 andb orb xorb andbL orbL xorbL andb8 orb8 xorb8
36    
37    val op << = Word.<< and op >> = Word.>> and op ~>> = Word.~>>
38
39    val wordToWord8 = Word8.fromLargeWord o Word.toLargeWord
40    and word8ToWord = Word.fromLargeWord o Word8.toLargeWord
41    
42    (* Typically the compiler is built on a little-endian machine but it could
43       be run on a machine with either endian-ness.  We have to find out the
44       endian-ness when we run.  There are separate versions of the compiler
45       for 32-bit and 64-bit so that can be a constant.  *)
46    local
47        val isBigEndian: unit -> bool = RunCall.rtsCallFast1 "PolyIsBigEndian"
48    in
49        val isBigEndian = isBigEndian()
50    end
51
52    val opcode_jump              = 0wx02    (* 8-bit unsigned jump forward. *)
53    and opcode_jumpFalse         = 0wx03    (* Test top of stack. Take 8-bit unsigned jump if false. *)
54    and opcode_loadMLWord        = 0wx04
55    and opcode_storeMLWord       = 0wx05
56    and opcode_alloc_ref         = 0wx06
57    and opcode_blockMoveWord     = 0wx07
58    and opcode_loadUntagged      = 0wx08
59    and opcode_storeUntagged     = 0wx09
60    and opcode_case16            = 0wx0a
61    and opcode_callClosure       = 0wx0c
62    and opcode_returnW           = 0wx0d
63    and opcode_containerB        = 0wx0e
64    and opcode_raiseEx           = 0wx10
65    and opcode_callConstAddr16   = 0wx11
66    and opcode_callConstAddr8    = 0wx12
67    and opcode_localW            = 0wx13
68    and opcode_callLocalB        = 0wx16
69    and opcode_constAddr16       = 0wx1a
70    and opcode_constIntW         = 0wx1b
71    and opcode_jumpBack8         = 0wx1e   (* 8-bit unsigned jump backwards - relative to end of instr. *)
72    and opcode_returnB           = 0wx1f
73    and opcode_jumpBack16        = 0wx20    (* 16-bit unsigned jump backwards - relative to end of instr. *)
74    and opcode_indirectLocalBB   = 0wx21
75    and opcode_localB            = 0wx22
76    and opcode_indirectB         = 0wx23
77    and opcode_moveToContainerB  = 0wx24
78    and opcode_setStackValB      = 0wx25
79    and opcode_resetB            = 0wx26
80    and opcode_resetRB           = 0wx27
81    and opcode_constIntB         = 0wx28
82    and opcode_local_0           = 0wx29
83    and opcode_local_1           = 0wx2a
84    and opcode_local_2           = 0wx2b
85    and opcode_local_3           = 0wx2c
86    and opcode_local_4           = 0wx2d
87    and opcode_local_5           = 0wx2e
88    and opcode_local_6           = 0wx2f
89    and opcode_local_7           = 0wx30
90    and opcode_local_8           = 0wx31
91    and opcode_local_9           = 0wx32
92    and opcode_local_10          = 0wx33
93    and opcode_local_11          = 0wx34
94    and opcode_indirect_0        = 0wx35
95    and opcode_indirect_1        = 0wx36
96    and opcode_indirect_2        = 0wx37
97    and opcode_indirect_3        = 0wx38
98    and opcode_indirect_4        = 0wx39
99    and opcode_indirect_5        = 0wx3a
100    and opcode_const_0           = 0wx3b
101    and opcode_const_1           = 0wx3c
102    and opcode_const_2           = 0wx3d
103    and opcode_const_3           = 0wx3e
104    and opcode_const_4           = 0wx3f
105    and opcode_const_10          = 0wx40
106    and opcode_return_1          = 0wx42
107    and opcode_return_2          = 0wx43
108    and opcode_return_3          = 0wx44
109    and opcode_local_12          = 0wx45
110    and opcode_jumpTrue          = 0wx46
111    and opcode_jump16True        = 0wx47
112    and opcode_local_13          = 0wx49
113    and opcode_local_14          = 0wx4a
114    and opcode_local_15          = 0wx4b
115    and opcode_reset_1           = 0wx50
116    and opcode_reset_2           = 0wx51
117    and opcode_indirectClosureBB = 0wx54
118    and opcode_resetR_1          = 0wx64
119    and opcode_resetR_2          = 0wx65
120    and opcode_resetR_3          = 0wx66
121    and opcode_tupleB            = 0wx68
122    and opcode_tuple_2           = 0wx69
123    and opcode_tuple_3           = 0wx6a
124    and opcode_tuple_4           = 0wx6b
125    and opcode_lock              = 0wx6c
126    and opcode_ldexc             = 0wx6d
127    and opcode_indirectContainerB= 0wx74
128    and opcode_moveToMutClosureB = 0wx75
129    and opcode_allocMutClosureB  = 0wx76
130    and opcode_indirectClosureB0 = 0wx77
131    and opcode_pushHandler       = 0wx78
132    and opcode_indirectClosureB1 = 0wx7a
133    and opcode_tailbb            = 0wx7b
134    and opcode_indirectClosureB2 = 0wx7c
135    and opcode_setHandler        = 0wx81
136    and opcode_callFastRTS0      = 0wx83
137    and opcode_callFastRTS1      = 0wx84
138    and opcode_callFastRTS2      = 0wx85
139    and opcode_callFastRTS3      = 0wx86
140    and opcode_callFastRTS4      = 0wx87
141    and opcode_callFastRTS5      = 0wx88
142    (*and opcode_callFullRTS0      = 0wx89 (* Legacy *)
143    and opcode_callFullRTS1      = 0wx8a
144    and opcode_callFullRTS2      = 0wx8b
145    and opcode_callFullRTS3      = 0wx8c
146    and opcode_callFullRTS4      = 0wx8d
147    and opcode_callFullRTS5      = 0wx8e*)
148    and opcode_notBoolean        = 0wx91
149    and opcode_isTagged          = 0wx92
150    and opcode_cellLength        = 0wx93
151    and opcode_cellFlags         = 0wx94
152    and opcode_clearMutable      = 0wx95
153    and opcode_atomicIncr        = 0wx97
154    and opcode_atomicDecr        = 0wx98
155    and opcode_equalWord         = 0wxa0
156    and opcode_lessSigned        = 0wxa2
157    and opcode_lessUnsigned      = 0wxa3
158    and opcode_lessEqSigned      = 0wxa4
159    and opcode_lessEqUnsigned    = 0wxa5
160    and opcode_greaterSigned     = 0wxa6
161    and opcode_greaterUnsigned   = 0wxa7
162    and opcode_greaterEqSigned   = 0wxa8
163    and opcode_greaterEqUnsigned = 0wxa9
164    and opcode_fixedAdd          = 0wxaa
165    and opcode_fixedSub          = 0wxab
166    and opcode_fixedMult         = 0wxac
167    and opcode_fixedQuot         = 0wxad
168    and opcode_fixedRem          = 0wxae
169    and opcode_wordAdd           = 0wxb1
170    and opcode_wordSub           = 0wxb2
171    and opcode_wordMult          = 0wxb3
172    and opcode_wordDiv           = 0wxb4
173    and opcode_wordMod           = 0wxb5
174    and opcode_wordAnd           = 0wxb7
175    and opcode_wordOr            = 0wxb8
176    and opcode_wordXor           = 0wxb9
177    and opcode_wordShiftLeft     = 0wxba
178    and opcode_wordShiftRLog     = 0wxbb
179    and opcode_allocByteMem      = 0wxbd
180    and opcode_indirectLocalB1   = 0wxc1
181    and opcode_isTaggedLocalB    = 0wxc2
182    and opcode_jumpNEqLocalInd   = 0wxc3
183    and opcode_jumpTaggedLocal   = 0wxc4
184    and opcode_jumpNEqLocal      = 0wxc5
185    and opcode_indirect0Local0   = 0wxc6
186    and opcode_indirectLocalB0   = 0wxc7
187    and opcode_closureB          = 0wxd0
188    and opcode_getThreadId       = 0wxd9
189    and opcode_allocWordMemory   = 0wxda
190    and opcode_loadMLByte        = 0wxdc
191    and opcode_storeMLByte       = 0wxe4
192    and opcode_blockMoveByte     = 0wxec
193    and opcode_blockEqualByte    = 0wxed
194    and opcode_blockCompareByte  = 0wxee
195    and opcode_deleteHandler     = 0wxf1 (* Just deletes the handler - no jump. *)
196    and opcode_jump16            = 0wxf7
197    and opcode_jump16False       = 0wxf8
198    and opcode_setHandler16      = 0wxf9
199    and opcode_constAddr8        = 0wxfa
200    (*and opcode_stackSize8        = 0wxfb*)
201    and opcode_stackSize16       = 0wxfc
202    and opcode_escape            = 0wxfe (* For two-byte opcodes. *)
203    (*and opcode_enterIntX86       = 0wxff*) (* Reserved - this is the first byte of a call *)
204
205    (* Extended opcodes - preceded by 0xfe escape *)
206    val ext_opcode_containerW        = 0wx0b
207    and ext_opcode_allocMutClosureW  = 0wx0f    (* Allocate a mutable closure for mutual recursion *)
208    and ext_opcode_indirectClosureW  = 0wx10
209    and ext_opcode_indirectContainerW= 0wx11
210    and ext_opcode_indirectW         = 0wx14
211    and ext_opcode_moveToContainerW  = 0wx15
212    and ext_opcode_moveToMutClosureW = 0wx16
213    and ext_opcode_setStackValW      = 0wx17
214    and ext_opcode_resetW            = 0wx18
215    and ext_opcode_resetR_w          = 0wx19
216    and ext_opcode_callFastRTSRRtoR  = 0wx1c
217    and ext_opcode_callFastRTSRGtoR  = 0wx1d
218    and ext_opcode_jump32True        = 0wx48
219    and ext_opcode_floatAbs          = 0wx56
220    and ext_opcode_floatNeg          = 0wx57
221    and ext_opcode_fixedIntToFloat   = 0wx58
222    and ext_opcode_floatToReal       = 0wx59
223    and ext_opcode_realToFloat       = 0wx5a
224    and ext_opcode_floatEqual        = 0wx5b
225    and ext_opcode_floatLess         = 0wx5c
226    and ext_opcode_floatLessEq       = 0wx5d
227    and ext_opcode_floatGreater      = 0wx5e
228    and ext_opcode_floatGreaterEq    = 0wx5f
229    and ext_opcode_floatAdd          = 0wx60
230    and ext_opcode_floatSub          = 0wx61
231    and ext_opcode_floatMult         = 0wx62
232    and ext_opcode_floatDiv          = 0wx63
233    and ext_opcode_tupleW            = 0wx67
234    and ext_opcode_realToInt         = 0wx6e
235    and ext_opcode_floatToInt        = 0wx6f
236    and ext_opcode_callFastRTSFtoF   = 0wx70
237    and ext_opcode_callFastRTSGtoF   = 0wx71
238    and ext_opcode_callFastRTSFFtoF  = 0wx72
239    and ext_opcode_callFastRTSFGtoF  = 0wx73
240    and ext_opcode_realUnordered     = 0wx79
241    and ext_opcode_floatUnordered    = 0wx7a
242    and ext_opcode_tail              = 0wx7c
243    and ext_opcode_callFastRTSRtoR   = 0wx8f
244    and ext_opcode_callFastRTSGtoR   = 0wx90
245    and ext_opcode_atomicReset       = 0wx99
246    and ext_opcode_longWToTagged     = 0wx9a
247    and ext_opcode_signedToLongW     = 0wx9b
248    and ext_opcode_unsignedToLongW   = 0wx9c
249    and ext_opcode_realAbs           = 0wx9d
250    and ext_opcode_realNeg           = 0wx9e
251    and ext_opcode_fixedIntToReal    = 0wx9f
252    and ext_opcode_fixedDiv          = 0wxaf
253    and ext_opcode_fixedMod          = 0wxb0
254    and ext_opcode_wordShiftRArith   = 0wxbc
255    and ext_opcode_lgWordEqual       = 0wxbe
256    and ext_opcode_lgWordLess        = 0wxc0
257    and ext_opcode_lgWordLessEq      = 0wxc1
258    and ext_opcode_lgWordGreater     = 0wxc2
259    and ext_opcode_lgWordGreaterEq   = 0wxc3
260    and ext_opcode_lgWordAdd         = 0wxc4
261    and ext_opcode_lgWordSub         = 0wxc5
262    and ext_opcode_lgWordMult        = 0wxc6
263    and ext_opcode_lgWordDiv         = 0wxc7
264    and ext_opcode_lgWordMod         = 0wxc8
265    and ext_opcode_lgWordAnd         = 0wxc9
266    and ext_opcode_lgWordOr          = 0wxca
267    and ext_opcode_lgWordXor         = 0wxcb
268    and ext_opcode_lgWordShiftLeft   = 0wxcc
269    and ext_opcode_lgWordShiftRLog   = 0wxcd
270    and ext_opcode_lgWordShiftRArith = 0wxce
271    and ext_opcode_realEqual         = 0wxcf
272    and ext_opcode_closureW          = 0wxd0
273    and ext_opcode_realLess          = 0wxd1
274    and ext_opcode_realLessEq        = 0wxd2
275    and ext_opcode_realGreater       = 0wxd3
276    and ext_opcode_realGreaterEq     = 0wxd4
277    and ext_opcode_realAdd           = 0wxd5
278    and ext_opcode_realSub           = 0wxd6
279    and ext_opcode_realMult          = 0wxd7
280    and ext_opcode_realDiv           = 0wxd8
281    and ext_opcode_loadC8            = 0wxdd
282    and ext_opcode_loadC16           = 0wxde
283    and ext_opcode_loadC32           = 0wxdf
284    and ext_opcode_loadC64           = 0wxe0
285    and ext_opcode_loadCFloat        = 0wxe1
286    and ext_opcode_loadCDouble       = 0wxe2
287    and ext_opcode_storeC8           = 0wxe5
288    and ext_opcode_storeC16          = 0wxe6
289    and ext_opcode_storeC32          = 0wxe7
290    and ext_opcode_storeC64          = 0wxe8
291    and ext_opcode_storeCFloat       = 0wxe9
292    and ext_opcode_storeCDouble      = 0wxea
293    and ext_opcode_jump32            = 0wxf2 (* 32-bit signed jump, forwards or backwards. *)
294    and ext_opcode_jump32False       = 0wxf3 (* Test top item. Take 32-bit signed jump if false. *)
295    and ext_opcode_constAddr32       = 0wxf4 (* Followed by a 32-bit offset. Load a constant at that address. *)
296    and ext_opcode_setHandler32      = 0wxf5 (* Setup a handler whose address is given by the 32-bit signed offset. *)
297    and ext_opcode_case32            = 0wxf6 (* Indexed case with 32-bit offsets *)
298    and ext_opcode_allocCSpace       = 0wxfd
299    and ext_opcode_freeCSpace        = 0wxfe
300
301    (* A Label is a ref that is later set to the location.
302       Several labels can be linked together so that they are only set
303       at a single point.
304       Only forward jumps are linked so when we come to finally set the
305       label we will have the full list. *)
306    type labels = Word.word ref list ref
307
308    (* Used for jump, jumpFalse, setHandler and delHandler. *)
309    datatype jumpTypes = Jump | JumpBack | JumpFalse | JumpTrue | SetHandler
310
311    datatype opcode =
312        SimpleCode of Word8.word list           (* Bytes that don't need any special treatment *)
313    |   LabelCode of labels            (* A label - forwards or backwards. *)
314    |   JumpInstruction of { label: labels, jumpType: jumpTypes, size: jumpSize ref }   (* Jumps or SetHandler. *)
315    |   PushConstant of { constNum: int, size : jumpSize ref, isCall: bool }
316    |   PushShort of Word.word
317    |   IndexedCase of { labels: labels list, size : jumpSize ref }
318    |   LoadLocal of Word8.word (* Locals - simplifies peephole optimisation. *)
319    |   IndirectLocal of { localAddr: Word8.word, indirect: Word8.word } (* Ditto *)
320    |   UncondTransfer of Word8.word list (* Raisex, return and tail. *)
321    |   IsTaggedLocalB of Word8.word
322    |   JumpOnIsTaggedLocalB of { label: labels, size: jumpSize ref, localAddr: Word8.word }
323    |   JumpNotEqualLocalInd0BB of { label: labels, size: jumpSize ref, localAddr: Word8.word, const: Word8.word }
324    |   JumpNotEqualLocalConstBB of { label: labels, size: jumpSize ref, localAddr: Word8.word, const: Word8.word }
325    
326    and jumpSize = Size8 | Size16 | Size32
327
328    and code = Code of 
329    {
330        constVec:       machineWord list ref, (* Vector of words to be put at end *)
331        procName:       string,         (* Name of the procedure. *)
332        printAssemblyCode:bool,            (* Whether to print the code when we finish. *)
333        printStream:    string->unit,    (* The stream to use *)
334        stage1Code:     opcode list ref,
335        enterIntMode:   int (* 0 => None, 1 => X86. *)
336    }
337    
338    val getEnterIntMode: unit -> int = RunCall.rtsCallFast0 "PolyInterpretedEnterIntMode"
339
340    (* create and initialise a code segment *)
341    fun codeCreate (name : string, parameters) = 
342    let
343        val printStream = PRETTY.getSimplePrinter(parameters, [])
344    in
345        Code
346        { 
347            constVec         = ref [],
348            procName         = name,
349            printAssemblyCode = DEBUG.getParameter DEBUG.assemblyCodeTag parameters,
350            printStream    = printStream,
351            stage1Code       = ref [],
352            enterIntMode     = getEnterIntMode()
353        }
354    end
355
356    (* Find the offset in the constant area of a constant. *)
357    (* The first has offset 0.                             *)
358    fun addConstToVec (valu, Code{constVec, ...}) =
359    let
360        (* Search the list to see if the constant is already there. *)
361        fun findConst valu [] num =
362            (* Add to the list *)
363            (
364                constVec    := ! constVec @ [valu];
365                num
366            )
367        |   findConst valu (h :: t) num =
368                if wordEq (valu, h)
369                then num
370                else findConst valu t (num + 1) (* Not equal *)
371    in
372        findConst valu (! constVec) 0
373    end
374
375    fun printCode (seg: codeVec, procName: string, endcode, printStream) =
376    let
377        val () = printStream "\n";
378        val () = if procName = "" (* No name *) then printStream "?" else printStream procName;
379        val () = printStream ":\n";
380
381        (* prints a string representation of a number *)
382        fun printHex (v) = printStream(Word.fmt StringCvt.HEX v);
383 
384        val ptr = ref 0w0;
385
386        (* Gets "length" bytes from locations "addr", "addr"+1...
387           Returns an unsigned number. *)
388        fun getB (0, _, _) = 0w0
389        |   getB (length, addr, seg) =
390                (getB (length - 1, addr + 0w1, seg) << 0w8) + word8ToWord (codeVecGet (seg, addr))
391
392        (* Prints a relative address. *)
393        fun printDisp (len, spacer: string) =
394        let
395            val ad = getB(len, !ptr, seg) + !ptr + Word.fromInt len
396            val () = printStream spacer;
397            val () = printHex ad;
398        in
399            ptr := !ptr + Word.fromInt len
400        end
401
402        (* Prints an operand of an instruction *)
403        fun printOp (len, spacer : string) =
404        let
405            val () = printStream spacer;
406            val () = printHex (getB (len, !ptr, seg))
407        in
408            ptr := !ptr + Word.fromInt len
409        end;
410
411    in
412        while !ptr < endcode do
413        let
414            val addr = !ptr
415        in
416            printHex addr; (* The address. *)
417
418            let (* It's an instruction. *)
419                val ()  = printStream "\t"
420                val opc = codeVecGet (seg, !ptr) (* opcode *)
421                val ()  = ptr := !ptr + 0w1
422            in
423                case opc of
424                    0wx02 => (printStream "jump"; printDisp (1, "\t\t"))
425                |   0wx03 => (printStream "jumpFalse"; printDisp (1, "\t"))
426                |   0wx04 => printStream "loadMLWord"
427                |   0wx05 => printStream "storeMLWord"
428                |   0wx06 => printStream "alloc_ref"
429                |   0wx07 => printStream "blockMoveWord"
430                |   0wx08 => printStream "loadUntagged"
431                |   0wx09 => printStream "storeUntagged"
432                |   0wx0a =>
433                    let
434                        (* Have to find out how many items there are. *)
435                        val limit = getB (2, !ptr, seg);
436                        val () = printOp (2, "case16\t");
437                        val base = !ptr;
438        
439                        fun printEntry _ = (printStream "\n\t"; printHex(base + getB(2, !ptr, seg)); ptr := !ptr + 0w2)
440        
441                        fun forLoop f i n = if i >= n then () else (f i; forLoop f (i + 0w1) n)
442                    in
443                        forLoop printEntry 0w0 limit
444                    end
445                |   0wx0c => printStream "callClosure"
446                |   0wx0d => printOp(2, "returnW\t")
447                |   0wx0e => printStream "containerB"
448                |   0wx0f => printOp(2, "allocMutClosure")
449                |   0wx10 => printStream "raiseEx"
450                |   0wx11 => printDisp (2, "callConstAddr16\t")
451                |   0wx12 => printDisp (1, "callConstAddr8\t")
452                |   0wx13 => printOp(2, "localW\t")
453                |   0wx16 => printOp(1, "callLocalB\t")
454                |   0wx1a => (printStream "constAddr16"; printDisp (2, "\t"))
455                |   0wx1b => printOp(2, "constIntW\t")
456                |   0wx1e =>
457                    ((* Should be negative *)
458                        printStream "jumpBack8\t";
459                        printHex((!ptr - 0w1) - getB(1, !ptr, seg));
460                        ptr := !ptr + 0w1
461                    )
462                |   0wx1f => printOp(1, "returnB\t")
463                |   0wx20 =>
464                    (
465                        printStream "jumpBack16\t";
466                        printHex((!ptr - 0w1) - getB(2, !ptr, seg));
467                        ptr := !ptr + 0w2
468                    )
469                |   0wx21 => (printOp(1, "indirectLocalBB\t"); printOp(1, ","))
470                |   0wx22 => printOp(1, "localB\t")
471                |   0wx23 => printOp(1, "indirectB\t")
472                |   0wx24 => printOp(1, "moveToContainerB\t")
473                |   0wx25 => printOp(1, "setStackValB\t")
474                |   0wx26 => printOp(1, "resetB\t")
475                |   0wx27 => printOp(1, "resetRB\t")
476                |   0wx28 => printOp(1, "constIntB\t")
477                |   0wx29 => printStream "local_0"
478                |   0wx2a => printStream "local_1"
479                |   0wx2b => printStream "local_2"
480                |   0wx2c => printStream "local_3"
481                |   0wx2d => printStream "local_4"
482                |   0wx2e => printStream "local_5"
483                |   0wx2f => printStream "local_6"
484                |   0wx30 => printStream "local_7"
485                |   0wx31 => printStream "local_8"
486                |   0wx32 => printStream "local_9"
487                |   0wx33 => printStream "local_10"
488                |   0wx34 => printStream "local_11"
489                |   0wx35 => printStream "indirect_0"
490                |   0wx36 => printStream "indirect_1"
491                |   0wx37 => printStream "indirect_2"
492                |   0wx38 => printStream "indirect_3"
493                |   0wx39 => printStream "indirect_4"
494                |   0wx3a => printStream "indirect_5"
495                |   0wx3b => printStream "const_0"
496                |   0wx3c => printStream "const_1"
497                |   0wx3d => printStream "const_2"
498                |   0wx3e => printStream "const_3"
499                |   0wx3f => printStream "const_4"
500                |   0wx40 => printStream "const_10"
501                |   0wx41 => printStream "return_0"
502                |   0wx42 => printStream "return_1"
503                |   0wx43 => printStream "return_2"
504                |   0wx44 => printStream "return_3"
505                |   0wx45 => printStream "local_12"
506                |   0wx46 => (printStream "jumpTrue"; printDisp (1, "\t"))
507                |   0wx47 => (printStream "jumpTrue"; printDisp (2, "\t"))
508                |   0wx49 => printStream "local_13"
509                |   0wx4a => printStream "local_14"
510                |   0wx4b => printStream "local_15"
511                |   0wx50 => printStream "reset_1"
512                |   0wx51 => printStream "reset_2"
513                |   0wx54 => (printOp(1, "indirectClosureBB\t"); printOp(1, ", "))
514                |   0wx64 => printStream "resetR_1"
515                |   0wx65 => printStream "resetR_2"
516                |   0wx66 => printStream "resetR_3"
517                |   0wx68 => printOp(1, "tupleB\t")
518                |   0wx69 => printStream "tuple_2"
519                |   0wx6a => printStream "tuple_3"
520                |   0wx6b => printStream "tuple_4"
521                |   0wx6c => printStream "lock"
522                |   0wx6d => printStream "ldexc"
523                |   0wx74 => printOp(1, "indirectContainerB\t")
524                |   0wx75 => printOp(1, "moveToMutClosureB\t")
525                |   0wx76 => printOp(1, "allocMutClosureB\t")
526                |   0wx77 => printOp(1, "indirectClosureB0\t")
527                |   0wx78 => printStream "pushHandler"
528                |   0wx7a => printOp(1, "indirectClosureB1\t")
529                |   0wx7b => (printOp (1, "tailbb\t"); printOp (1, ","))
530                |   0wx7c => printOp(1, "indirectClosureB2\t")
531                |   0wx7d => printOp(1, "tail3b\t")
532                |   0wx7e => printOp(1, "tail4b\t")
533                |   0wx7f => printStream "tail3_2"
534                |   0wx80 => printStream "tail3_3"
535                |   0wx81 => (printStream "setHandler"; printDisp (1, "\t"))
536                |   0wx83 => printStream "callFastRTS0"
537                |   0wx84 => printStream "callFastRTS1"
538                |   0wx85 => printStream "callFastRTS2"
539                |   0wx86 => printStream "callFastRTS3"
540                |   0wx87 => printStream "callFastRTS4"
541                |   0wx88 => printStream "callFastRTS5"
542                |   0wx91 => printStream "notBoolean"
543                |   0wx92 => printStream "isTagged"
544                |   0wx93 => printStream "cellLength"
545                |   0wx94 => printStream "cellFlags"
546                |   0wx95 => printStream "clearMutable"
547                |   0wx97 => printStream "atomicIncr"
548                |   0wx98 => printStream "atomicDecr"
549                |   0wxa0 => printStream "equalWord"
550                |   0wxa1 => printOp(1, "equalWordConstB\t")
551                |   0wxa2 => printStream "lessSigned"
552                |   0wxa3 => printStream "lessUnsigned"
553                |   0wxa4 => printStream "lessEqSigned"
554                |   0wxa5 => printStream "lessEqUnsigned"
555                |   0wxa6 => printStream "greaterSigned"
556                |   0wxa7 => printStream "greaterUnsigned"
557                |   0wxa8 => printStream "greaterEqSigned"
558                |   0wxa9 => printStream "greaterEqUnsigned"
559                |   0wxaa => printStream "fixedAdd"
560                |   0wxab => printStream "fixedSub"
561                |   0wxac => printStream "fixedMult"
562                |   0wxad => printStream "fixedQuot"
563                |   0wxae => printStream "fixedRem"
564                |   0wxb1 => printStream "wordAdd"
565                |   0wxb2 => printStream "wordSub"
566                |   0wxb3 => printStream "wordMult"
567                |   0wxb4 => printStream "wordDiv"
568                |   0wxb5 => printStream "wordMod"
569                |   0wxb7 => printStream "wordAnd"
570                |   0wxb8 => printStream "wordOr"
571                |   0wxb9 => printStream "wordXor"
572                |   0wxba => printStream "wordShiftLeft"
573                |   0wxbb => printStream "wordShiftRLog"
574                |   0wxbd => printStream "allocByteMem"
575                |   0wxc1 => printOp(1, "indirectLocalB1\t")
576                |   0wxc2 => printOp(1, "isTaggedLocalB\t")
577                |   0wxc3 => (printOp(1, "jumpNEqLocalInd\t"); printOp(1, ","); printOp(1, ","); printDisp(1, "\t"))
578                |   0wxc4 => (printOp(1, "jumpTaggedLocal\t"); printDisp(1, "\t"))
579                |   0wxc5 => (printOp(1, "jumpNEqLocal\t"); printOp(1, ","); printOp(1, ","); printDisp(1, "\t"))
580                |   0wxc6 => printStream "indirect0Local0"
581                |   0wxc7 => printOp(1, "indirectLocalB0\t")
582                |   0wxd0 => printOp(1, "closureB\t")
583                |   0wxd9 => printStream "getThreadId"
584                |   0wxda => printStream "allocWordMemory"
585                |   0wxdc => printStream "loadMLByte"
586                |   0wxe4 => printStream "storeMLByte"
587                |   0wxec => printStream "blockMoveByte"
588                |   0wxed => printStream "blockEqualByte"
589                |   0wxee => printStream "blockCompareByte"
590                |   0wxf1 => printStream "deleteHandler"
591                |   0wxf7 => printStream "jump16"
592                |   0wxf8 => printStream "jump16False"
593                |   0wxf9 => printStream "setHandler16"
594                |   0wxfa => printDisp (1, "constAddr8\t")
595                |   0wxfb => printOp(1, "stackSize8\t")
596                |   0wxfc => printOp(2, "stackSize16\t")
597                |   0wxff => printStream "enterIntX86"
598                
599                |   0wxfe =>
600                    (
601                        case codeVecGet (seg, !ptr) before ptr := !ptr + 0w1 of
602                            0wx0b => printStream "containerW"
603                        |   0wx10 => printOp(2, "indirectClosureW\t")
604                        |   0wx11 => printOp(2, "indirectContainerW\t")
605                        |   0wx14 => printOp(2, "indirectW\t")
606                        |   0wx15 => printOp(2, "moveToContainerW\t")
607                        |   0wx16 => printOp(2, "moveToMutClosureW\t")
608                        |   0wx17 => printOp(2, "setStackValW\t")
609                        |   0wx18 => printOp(2, "resetW\t")
610                        |   0wx19 => printOp(2, "resetR_w\t")
611                        |   0wx1c => printStream "callFastRTSRRtoR"
612                        |   0wx1d => printStream "callFastRTSRGtoR"
613                        |   0wx48 => (printStream "jumpTrue"; printDisp (4, "\t"))
614                        |   0wx56 => printStream "floatAbs"
615                        |   0wx57 => printStream "floatNeg"
616                        |   0wx58 => printStream "fixedIntToFloat"
617                        |   0wx59 => printStream "floatToReal"
618                        |   0wx5a => printOp(1, "realToFloat\t")
619                        |   0wx5b => printStream "floatEqual"
620                        |   0wx5c => printStream "floatLess"
621                        |   0wx5d => printStream "floatLessEq"
622                        |   0wx5e => printStream "floatGreater"
623                        |   0wx5f => printStream "floatGreaterEq"
624                        |   0wx60 => printStream "floatAdd"
625                        |   0wx61 => printStream "floatSub"
626                        |   0wx62 => printStream "floatMult"
627                        |   0wx63 => printStream "floatDiv"
628                        |   0wx67 => printOp(2, "tupleW\t")
629                        |   0wx6e => printOp(1, "realToInt\t")
630                        |   0wx6f => printOp(1, "floatToInt\t")
631                        |   0wx70 => printStream "callFastRTSFtoF"
632                        |   0wx71 => printStream "callFastRTSGtoF"
633                        |   0wx72 => printStream "callFastRTSFFtoF"
634                        |   0wx73 => printStream "callFastRTSFGtoF"
635                        |   0wx79 => printStream "realUnordered"
636                        |   0wx7a => printStream "floatUnordered"
637                        |   0wx7c => (printOp (2, "tail\t"); printOp (2, ","))
638                        |   0wx8f => printStream "callFastRTSRtoR"
639                        |   0wx90 => printStream "callFastRTSGtoR"
640                        |   0wx99 => printStream "atomicReset"
641                        |   0wx9a => printStream "longWToTagged"
642                        |   0wx9b => printStream "signedToLongW"
643                        |   0wx9c => printStream "unsignedToLongW"
644                        |   0wx9d => printStream "realAbs"
645                        |   0wx9e => printStream "realNeg"
646                        |   0wx9f => printStream "fixedIntToReal"
647                        |   0wxaf => printStream "fixedDiv"
648                        |   0wxb0 => printStream "fixedMod"
649                        |   0wxbc => printStream "wordShiftRArith"
650                        |   0wxbe => printStream "lgWordEqual"
651                        |   0wxc0 => printStream "lgWordLess"
652                        |   0wxc1 => printStream "lgWordLessEq"
653                        |   0wxc2 => printStream "lgWordGreater"
654                        |   0wxc3 => printStream "lgWordGreaterEq"
655                        |   0wxc4 => printStream "lgWordAdd"
656                        |   0wxc5 => printStream "lgWordSub"
657                        |   0wxc6 => printStream "lgWordMult"
658                        |   0wxc7 => printStream "lgWordDiv"
659                        |   0wxc8 => printStream "lgWordMod"
660                        |   0wxc9 => printStream "lgWordAnd"
661                        |   0wxca => printStream "lgWordOr"
662                        |   0wxcb => printStream "lgWordXor"
663                        |   0wxcc => printStream "lgWordShiftLeft"
664                        |   0wxcd => printStream "lgWordShiftRLog"
665                        |   0wxce => printStream "lgWordShiftRArith"
666                        |   0wxcf => printStream "realEqual"
667                        |   0wxd0 => printOp(2, "closureW\t")
668                        |   0wxd1 => printStream "realLess"
669                        |   0wxd2 => printStream "realLessEq"
670                        |   0wxd3 => printStream "realGreater"
671                        |   0wxd4 => printStream "realGreaterEq"
672                        |   0wxd5 => printStream "realAdd"
673                        |   0wxd6 => printStream "realSub"
674                        |   0wxd7 => printStream "realMult"
675                        |   0wxd8 => printStream "realDiv"
676                        |   0wxdd => printStream "loadC8"
677                        |   0wxde => printStream "loadC16"
678                        |   0wxdf => printStream "loadC32"
679                        |   0wxe0 => printStream "loadC64"
680                        |   0wxe1 => printStream "loadCFloat"
681                        |   0wxe2 => printStream "loadCDouble"
682                        |   0wxe5 => printStream "storeC8"
683                        |   0wxe6 => printStream "storeC16"
684                        |   0wxe7 => printStream "storeC32"
685                        |   0wxe8 => printStream "storeC64"
686                        |   0wxe9 => printStream "storeCFloat"
687                        |   0wxea => printStream "storeCDouble"
688                        |   0wxf2 => printDisp (4, "jump32\t")
689                        |   0wxf3 => printDisp (4, "jump32False\t")
690                        |   0wxf4 => printDisp (4, "constAddr32\t")
691                        |   0wxf5 => printDisp (4, "setHandler32\t")
692                        |   0wxf6 =>
693                            let
694                                (* Have to find out how many items there are. *)
695                                val limit = getB (2, !ptr, seg);
696                                val () = printOp (2, "case32\t");
697                                val base = !ptr;
698        
699                                fun printEntry _ = (printStream "\n\t"; printHex(base + getB(4, !ptr, seg)); ptr := !ptr + 0w4)
700        
701                                fun forLoop f i n = if i >= n then () else (f i; forLoop f (i + 0w1) n)
702                            in
703                                forLoop printEntry 0w0 limit
704                            end
705                        |   0wxfd => printStream "allocCSpace"
706                        |   0wxfe => printStream "freeCSpace"
707                        |  _ => printStream ("unknown:0xfe 0x" ^ Word8.toString opc)
708                    )
709
710                |   opc => printStream("unknown:0x" ^ Word8.toString opc)
711
712            end; (* an instruction. *)
713
714            printStream "\n"
715        end (* main loop *)
716    end (* printCode *)
717
718    fun codeSize (SimpleCode l) = List.length l
719    |   codeSize (LabelCode _) = 0
720    |   codeSize (JumpInstruction{size=ref Size8, ...}) = 2
721    |   codeSize (JumpInstruction{size=ref Size16, ...}) = 3
722    |   codeSize (JumpInstruction{size=ref Size32, ...}) = 6
723    |   codeSize (PushConstant{size=ref Size8, ...}) = 2
724    |   codeSize (PushConstant{size=ref Size16, ...}) = 3
725    |   codeSize (PushConstant{size=ref Size32, isCall=false, ...}) = 6
726    |   codeSize (PushConstant{size=ref Size32, isCall=true, ...}) = 7
727    |   codeSize (PushShort value) =
728            if value <= 0w4 orelse value = 0w10 then 1
729            else if value < 0w256 then 2 else 3
730    |   codeSize (IndexedCase{labels, size=ref Size32, ...}) = 4 + List.length labels * 4 
731    |   codeSize (IndexedCase{labels, size=ref Size16, ...}) = 3 + List.length labels * 2 
732    |   codeSize (IndexedCase{labels=_, size=ref Size8, ...}) = raise InternalError "codeSize"
733    |   codeSize (LoadLocal w) = if w <= 0w15 then 1 else 2
734    |   codeSize (IndirectLocal{indirect=0w0, localAddr=0w0}) = 1
735    |   codeSize (IndirectLocal{indirect=0w0, ...}) = 2
736    |   codeSize (IndirectLocal{indirect=0w1, ...}) = 2
737    |   codeSize (IndirectLocal _) = 3
738    |   codeSize (UncondTransfer l) = List.length l
739    |   codeSize (IsTaggedLocalB _) = 2
740    |   codeSize (JumpOnIsTaggedLocalB{size=ref Size8, ...}) = 3
741    |   codeSize (JumpOnIsTaggedLocalB{size=ref Size16, ...}) = 5
742    |   codeSize (JumpOnIsTaggedLocalB{size=ref Size32, ...}) = 8
743
744    |   codeSize (JumpNotEqualLocalInd0BB{size=ref Size8, ...}) = 4
745    |   codeSize (JumpNotEqualLocalInd0BB{label, size, localAddr, const}) =
746            codeSize(IndirectLocal{localAddr=localAddr, indirect=0w0}) +
747                codeSize(PushShort(word8ToWord const)) + 1 +
748                codeSize(JumpInstruction{jumpType=JumpFalse, label=label, size=size})
749
750    |   codeSize (JumpNotEqualLocalConstBB{size=ref Size8, ...}) = 4
751    |   codeSize (JumpNotEqualLocalConstBB {label, size, localAddr, const}) =
752            codeSize(LoadLocal localAddr) + codeSize(PushShort(word8ToWord const)) + 1 +
753                codeSize(JumpInstruction{jumpType=JumpFalse, label=label, size=size})
754
755    (* General function to process the code.  ic is the byte counter within the original code. *)
756    fun foldCode startIc foldFn ops =
757    let
758        fun doFold(oper :: operList, ic) =
759            doFold(operList,
760                (* Get the size BEFORE any possible change. *)
761                ic + Word.fromInt(codeSize oper) before foldFn(oper, ic))
762        |   doFold(_, ic) = ic
763    in
764        doFold(ops, startIc)
765    end
766
767    (* Process the code, setting the destination of any labels.  Return the length of the code. *)
768    fun setLabels(LabelCode(ref labs) :: ops, ic) = (List.app(fn d => d := ic) labs; setLabels(ops, ic))
769    |   setLabels(oper :: ops, ic) = setLabels(ops, ic + Word.fromInt(codeSize oper))
770    |   setLabels([], ic) = ic
771
772    (* Set the sizes of branches depending on the distance to the destination. *)
773    fun setLabelsAndSizes ops =
774    let
775        val wordLength = wordSize
776
777        (* Set the labels and adjust the sizes, repeating until it never gets smaller*)
778        fun setLabAndSize(ops, lastSize) =
779        let
780            (* Calculate offsets for constants. *)
781            val endIC = Word.andb(lastSize + wordLength - 0w1, ~ wordLength)
782            val firstConstant = endIC + wordLength * 0w3
783            (* Because the constant area is word aligned we have to allow for
784               the possibility that the distance between a "load constant"
785               instruction and the target could actually increase. *)
786            val alignment = wordLength - 0w1
787        
788            fun adjust(JumpInstruction{size as ref Size32, label=ref lab, ...}, ic) =
789                let
790                    val dest = !(hd lab)
791                    val diff =
792                        if dest <= ic (* N.B. Include infinite loops as backwards. *)
793                        then ic - dest (* Backwards - Counts from start of instruction. *)
794                        else dest - (ic + 0w6) (* Forwards - Relative to the current end. *)
795                in
796                    if diff < 0wx100
797                    then size := Size8
798                    else if diff < 0wx10000
799                    then size := Size16
800                    else ()
801                end
802
803            |   adjust(JumpInstruction{size as ref Size16, label=ref lab, ...}, ic) =
804                let
805                    val dest = !(hd lab)
806                in
807                    if dest <= ic
808                    then if ic - dest < 0wx100 then size := Size8 else ()
809                    else if dest - (ic + 0w3)  < 0wx100 then size := Size8 else ()
810                end
811
812            |   adjust(IndexedCase{size as ref Size32, labels}, ic) =
813                let
814                    val startAddr = ic+0w4
815                    (* Use 16-bit case if all the offsets are 16-bits. *)
816                    fun is16bit(ref lab) =
817                    let
818                        val dest = !(hd lab)
819                    in
820                        dest > startAddr andalso dest < startAddr+0wx10000
821                    end
822                in
823                    if List.all is16bit labels
824                    then size := Size16
825                    else ()
826                end
827
828            |   adjust(PushConstant{size as ref Size32, constNum, ...}, ic) =
829                let
830                    val constAddr = firstConstant + Word.fromInt constNum * wordLength
831                    val offset = constAddr - (ic + 0w6)
832                in
833                    if offset < 0wx100-alignment then size := Size8
834                    else if offset < 0wx10000-alignment then size := Size16
835                    else ()
836                end
837
838            |   adjust(PushConstant{size as ref Size16, constNum, ...}, ic) =
839                let
840                    val constAddr = firstConstant + Word.fromInt constNum * wordLength
841                    val offset = constAddr - (ic + 0w3)
842                in
843                    if offset < 0wx100-alignment then size := Size8
844                    else ()
845                end
846
847            |   adjust(JumpOnIsTaggedLocalB{size as ref Size32, label=ref lab, ...}, ic) =
848                let
849                    val dest = !(hd lab)
850                    val diff = dest - (ic + 0w8)
851                in
852                    if diff < 0wx100
853                    then size := Size8
854                    else if diff < 0wx10000
855                    then size := Size16
856                    else ()
857                end
858
859            |   adjust(JumpOnIsTaggedLocalB{size as ref Size16, label=ref lab, ...}, ic) =
860                let
861                    val dest = !(hd lab)
862                in
863                    if dest - (ic + 0w5)  < 0wx100 then size := Size8 else ()
864                end
865
866            |   adjust(j as JumpNotEqualLocalInd0BB{size as ref Size32, label=ref lab, ...}, ic) =
867                let
868                    val dest = !(hd lab)
869                    val diff = dest - (ic + Word.fromInt(codeSize j))
870                in
871                    if diff < 0wx100
872                    then size := Size8
873                    else if diff < 0wx10000
874                    then size := Size16
875                    else ()
876                end
877
878            |   adjust(j as JumpNotEqualLocalInd0BB{size as ref Size16, label=ref lab, ...}, ic) =
879                let
880                    val dest = !(hd lab)
881                in
882                    if dest - (ic + Word.fromInt(codeSize j))  < 0wx100 then size := Size8 else ()
883                end
884
885            |   adjust(j as JumpNotEqualLocalConstBB{size as ref Size32, label=ref lab, ...}, ic) =
886                let
887                    val dest = !(hd lab)
888                    val diff = dest - (ic + Word.fromInt(codeSize j))
889                in
890                    if diff < 0wx100
891                    then size := Size8
892                    else if diff < 0wx10000
893                    then size := Size16
894                    else ()
895                end
896
897            |   adjust(j as JumpNotEqualLocalConstBB{size as ref Size16, label=ref lab, ...}, ic) =
898                let
899                    val dest = !(hd lab)
900                in
901                    if dest - (ic + Word.fromInt(codeSize j))  < 0wx100 then size := Size8 else ()
902                end
903
904            |   adjust _ = ()
905
906            val _ = foldCode 0w0 adjust ops
907            val nextSize = setLabels(ops, 0w0)
908        in
909            if nextSize < lastSize then setLabAndSize(ops, nextSize)
910            else if nextSize = lastSize then lastSize
911            else raise InternalError "setLabAndSize - size increased"
912        end
913    in
914        setLabAndSize(ops, setLabels(ops, 0w0))
915    end
916    
917    fun genCode(ops, Code {constVec, ...}) =
918    let
919        (* First pass - set the labels. *)
920        val codeSize = setLabelsAndSizes ops
921        val wordSize = wordSize
922        (* Align to wordLength. *)
923        val endIC = Word.andb(codeSize + wordSize - 0w1, ~ wordSize)
924        val paddingBytes = List.tabulate(Word.toInt(endIC - codeSize), fn _ => SimpleCode[opcode_const_0])
925        val endOfCode = endIC div wordSize
926        val firstConstant = endIC + wordSize * 0w3 (* Add 3 for no of consts, fn name and profile count. *)
927        val segSize   = endOfCode + Word.fromInt(List.length(! constVec)) + 0w4
928        val codeVec = byteVecMake segSize
929
930        val ic = ref 0w0
931        
932        fun genByte b = byteVecSet(codeVec, !ic, b) before ic := !ic + 0w1
933
934        fun genByteCode(SimpleCode bytes, _) =
935            (* Simple code - just generate the bytes. *)
936                List.app genByte bytes
937
938        |   genByteCode(UncondTransfer bytes, _) = List.app genByte bytes
939
940        |   genByteCode(LabelCode _, _) = ()
941
942        |   genByteCode(JumpInstruction{label=ref labs, jumpType, size=ref Size32, ...}, ic) =
943            let
944                val dest = !(hd labs)
945                val extOpc =
946                    case jumpType of
947                        SetHandler => ext_opcode_setHandler32
948                    |   JumpFalse => ext_opcode_jump32False
949                    |   JumpTrue => ext_opcode_jump32True
950                    |   Jump => ext_opcode_jump32
951                    |   JumpBack => ext_opcode_jump32
952                val diff = dest - (ic + 0w6)
953            in
954                genByte opcode_escape;
955                genByte extOpc;
956                genByte(wordToWord8 diff);
957                (* This may be negative so we must use an arithmetic shift. *)
958                genByte(wordToWord8(diff ~>> 0w8));
959                genByte(wordToWord8(diff ~>> 0w16));
960                genByte(wordToWord8(diff ~>> 0w24))
961            end
962
963        |   genByteCode(JumpInstruction{label=ref labs, jumpType, size=ref Size16, ...}, ic) =
964            let
965                val dest = !(hd labs)
966            in
967                if dest <= ic
968                then (* Jump back. *)
969                let
970                    val _ = jumpType = JumpBack orelse raise InternalError "genByteCode - back jump"
971                    val diff = ic - dest
972                    val _ = diff < 0wx10000 orelse raise InternalError "genByteCode - jump range"
973                in
974                    genByte opcode_jumpBack16;
975                    genByte(wordToWord8 diff);
976                    genByte(wordToWord8(diff >> 0w8))
977                end
978                else
979                let
980                    val opc =
981                        case jumpType of
982                            SetHandler => opcode_setHandler16
983                        |   JumpFalse => opcode_jump16False
984                        |   JumpTrue => opcode_jump16True
985                        |   Jump => opcode_jump16
986                        |   JumpBack => raise InternalError "genByteCode: JumpBack goes forward"
987                    val diff = dest - (ic + 0w3)
988                    val _ = diff < 0wx10000 orelse raise InternalError "genByteCode - jump range"
989                in
990                    genByte opc;
991                    genByte(wordToWord8 diff);
992                    genByte(wordToWord8(diff >> 0w8))
993                end
994            end
995
996        |   genByteCode(JumpInstruction{label=ref labs, jumpType, size=ref Size8, ...}, ic) =
997            let
998                val dest = !(hd labs)
999            in
1000                if dest <= ic
1001                then (* Jump back. *)
1002                let
1003                    val _ = jumpType = JumpBack orelse raise InternalError "genByteCode - back jump"
1004                    val diff = ic - dest
1005                    val _ = diff < 0wx100 orelse raise InternalError "genByteCode - jump range"
1006                in
1007                    genByte opcode_jumpBack8;
1008                    genByte(wordToWord8 diff)
1009                end
1010                else
1011                let
1012                    val opc =
1013                        case jumpType of
1014                            SetHandler => opcode_setHandler
1015                        |   JumpFalse => opcode_jumpFalse
1016                        |   JumpTrue => opcode_jumpTrue
1017                        |   Jump => opcode_jump
1018                        |   JumpBack => raise InternalError "genByteCode: JumpBack goes forward"
1019                    val diff = dest - (ic + 0w2)
1020                    val _ = diff < 0wx100 orelse raise InternalError "genByteCode - jump range"
1021                in
1022                    genByte opc;
1023                    genByte(wordToWord8 diff)
1024                end
1025            end
1026
1027        |   genByteCode(PushConstant{ constNum, size=ref Size32, isCall=false, ... }, ic) =
1028            let
1029                val constAddr = firstConstant + Word.fromInt constNum * wordSize
1030                (* Offsets are calculated from the END of the instruction *)
1031                val offset = constAddr - (ic + 0w6)
1032            in
1033                genByte opcode_escape;
1034                genByte ext_opcode_constAddr32;
1035                genByte(wordToWord8 offset);
1036                genByte(wordToWord8(offset >> 0w8));
1037                genByte(wordToWord8(offset >> 0w16));
1038                genByte(wordToWord8(offset >> 0w24))
1039            end
1040
1041        |   genByteCode(PushConstant{ constNum, size=ref Size32, isCall=true, ... }, ic) =
1042            (
1043                (* Turn this back into a push of a constant and call-closure. *)
1044                genByteCode(PushConstant{ constNum=constNum, size=ref Size32, isCall=false }, ic);
1045                genByte opcode_callClosure
1046            )
1047
1048        |   genByteCode(PushConstant{ constNum, size=ref Size16, isCall, ... }, ic) =
1049            let
1050                val constAddr = firstConstant + Word.fromInt constNum * wordSize
1051                val offset = constAddr - (ic + 0w3)
1052                val _ = offset < 0wx10000 orelse raise InternalError "genByteCode - constant range"
1053            in
1054                genByte(if isCall then opcode_callConstAddr16 else opcode_constAddr16);
1055                genByte(wordToWord8 offset);
1056                genByte(wordToWord8(offset >> 0w8))
1057            end
1058
1059        |   genByteCode(PushConstant{ constNum, size=ref Size8, isCall, ... }, ic) =
1060            let
1061                val constAddr = firstConstant + Word.fromInt constNum * wordSize
1062                val offset = constAddr - (ic + 0w2)
1063                val _ = offset < 0wx100 orelse raise InternalError "genByteCode - constant range"
1064            in
1065                genByte(if isCall then opcode_callConstAddr8 else opcode_constAddr8);
1066                genByte(wordToWord8 offset)
1067            end
1068
1069        |   genByteCode(PushShort 0w0, _) = genByte opcode_const_0
1070        |   genByteCode(PushShort 0w1, _) = genByte opcode_const_1
1071        |   genByteCode(PushShort 0w2, _) = genByte opcode_const_2
1072        |   genByteCode(PushShort 0w3, _) = genByte opcode_const_3
1073        |   genByteCode(PushShort 0w4, _) = genByte opcode_const_4
1074        |   genByteCode(PushShort 0w10, _) = genByte opcode_const_10
1075        |   genByteCode(PushShort value, _) =
1076            if value < 0w256 then (genByte opcode_constIntB; genByte(wordToWord8 value))
1077            else (genByte opcode_constIntW; genByte(wordToWord8 value); genByte(wordToWord8(value >> 0w8)))
1078
1079        |   genByteCode(IndexedCase{labels, size=ref Size32, ...}, ic) =
1080            let
1081                val nCases = List.length labels
1082                val () = genByte opcode_escape
1083                val () = genByte ext_opcode_case32
1084                val () = genByte(Word8.fromInt nCases)
1085                val () = genByte(Word8.fromInt (nCases div 256))
1086                val startOffset = ic+0w4 (* Offsets are relative to here. *)
1087
1088                fun putLabel(ref labs) =
1089                let
1090                    val dest = !(hd labs)
1091                    val diff = dest - startOffset
1092                    val _ = dest > startOffset orelse raise InternalError "genByteCode - indexed case"
1093                in
1094                    genByte(wordToWord8 diff);
1095                    genByte(wordToWord8(diff >> 0w8));
1096                    genByte(wordToWord8(diff >> 0w16));
1097                    genByte(wordToWord8(diff >> 0w24))
1098                end
1099            in
1100                List.app putLabel labels
1101            end
1102        
1103        |   genByteCode(IndexedCase{labels, size=ref Size16, ...}, ic) =
1104            let
1105                val nCases = List.length labels
1106                val () = genByte(opcode_case16)
1107                val () = genByte(Word8.fromInt nCases)
1108                val () = genByte(Word8.fromInt (nCases div 256))
1109                val startOffset = ic+0w3 (* Offsets are relative to here. *)
1110
1111                fun putLabel(ref labs) =
1112                let
1113                    val dest = !(hd labs)
1114                    val diff = dest - startOffset
1115                    val _ = dest > startOffset orelse raise InternalError "genByteCode - indexed case"
1116                    val _ = diff < 0wx10000 orelse raise InternalError "genByteCode - indexed case"
1117                in
1118                    genByte(wordToWord8 diff);
1119                    genByte(wordToWord8(diff >> 0w8))
1120                end
1121            in
1122                List.app putLabel labels
1123            end
1124        
1125        |   genByteCode(IndexedCase{size=ref Size8, ...}, _) = raise InternalError "genByteCode - IndexedCase byte"
1126        
1127        |   genByteCode(LoadLocal 0w0, _) = genByte opcode_local_0
1128        |   genByteCode(LoadLocal 0w1, _) = genByte opcode_local_1
1129        |   genByteCode(LoadLocal 0w2, _) = genByte opcode_local_2
1130        |   genByteCode(LoadLocal 0w3, _) = genByte opcode_local_3
1131        |   genByteCode(LoadLocal 0w4, _) = genByte opcode_local_4
1132        |   genByteCode(LoadLocal 0w5, _) = genByte opcode_local_5
1133        |   genByteCode(LoadLocal 0w6, _) = genByte opcode_local_6
1134        |   genByteCode(LoadLocal 0w7, _) = genByte opcode_local_7
1135        |   genByteCode(LoadLocal 0w8, _) = genByte opcode_local_8
1136        |   genByteCode(LoadLocal 0w9, _) = genByte opcode_local_9
1137        |   genByteCode(LoadLocal 0w10, _) = genByte opcode_local_10
1138        |   genByteCode(LoadLocal 0w11, _) = genByte opcode_local_11
1139        |   genByteCode(LoadLocal 0w12, _) = genByte opcode_local_12
1140        |   genByteCode(LoadLocal 0w13, _) = genByte opcode_local_13
1141        |   genByteCode(LoadLocal 0w14, _) = genByte opcode_local_14
1142        |   genByteCode(LoadLocal 0w15, _) = genByte opcode_local_15
1143        |   genByteCode(LoadLocal w, _) = (genByte opcode_localB; genByte w)
1144
1145        |   genByteCode(IndirectLocal{localAddr=0w0, indirect=0w0}, _) = genByte opcode_indirect0Local0
1146        |   genByteCode(IndirectLocal{localAddr, indirect=0w0}, _) =
1147                (genByte opcode_indirectLocalB0; genByte localAddr)
1148        |   genByteCode(IndirectLocal{localAddr, indirect=0w1}, _) =
1149                (genByte opcode_indirectLocalB1; genByte localAddr)
1150        |   genByteCode(IndirectLocal{localAddr, indirect}, _) =
1151                (genByte opcode_indirectLocalBB; genByte localAddr; genByte indirect)
1152
1153        |   genByteCode(IsTaggedLocalB addr, _) =
1154                (genByte opcode_isTaggedLocalB; genByte addr)
1155
1156        |   genByteCode(JumpOnIsTaggedLocalB {label=ref labs, size=ref Size8, localAddr}, ic) =
1157            let
1158                val dest = !(hd labs)
1159                val diff = dest - (ic + 0w3)
1160            in
1161                genByte opcode_jumpTaggedLocal;
1162                genByte localAddr;
1163                genByte(wordToWord8 diff)
1164            end
1165
1166        |   genByteCode(JumpOnIsTaggedLocalB {label, size, localAddr}, ic) =
1167            (
1168                (* Turn this back into the original sequence. *)
1169                genByteCode(IsTaggedLocalB localAddr, ic);
1170                genByteCode(JumpInstruction{jumpType=JumpTrue, label=label, size=size}, ic+0w2)
1171            )
1172
1173        |   genByteCode(JumpNotEqualLocalInd0BB {label=ref labs, size=ref Size8, localAddr, const}, ic) =
1174            let
1175                val dest = !(hd labs)
1176                val diff = dest - (ic + 0w4)
1177            in
1178                genByte opcode_jumpNEqLocalInd;
1179                genByte localAddr; genByte const;
1180                genByte(wordToWord8 diff)
1181            end
1182
1183        |   genByteCode(JumpNotEqualLocalInd0BB {label, size, localAddr, const}, ic) =
1184                (* Turn this back into the original sequence. *)
1185                (foldCode ic genByteCode
1186                    [IndirectLocal{localAddr=localAddr, indirect=0w0}, PushShort(word8ToWord const),
1187                     SimpleCode[opcode_equalWord],
1188                     JumpInstruction{jumpType=JumpFalse, label=label, size=size}]; ())
1189
1190        |   genByteCode(JumpNotEqualLocalConstBB {label=ref labs, size=ref Size8, localAddr, const}, ic) =
1191            let
1192                val dest = !(hd labs)
1193                val diff = dest - (ic + 0w4)
1194            in
1195                genByte opcode_jumpNEqLocal;
1196                genByte localAddr; genByte const;
1197                genByte(wordToWord8 diff)
1198            end
1199
1200        |   genByteCode(JumpNotEqualLocalConstBB {label, size, localAddr, const}, ic) =
1201                (* Turn this back into the original sequence. *)
1202                (foldCode ic genByteCode
1203                    [LoadLocal localAddr, PushShort(word8ToWord const), 
1204                     SimpleCode[opcode_equalWord],
1205                     JumpInstruction{jumpType=JumpFalse, label=label, size=size}]; ())
1206    in
1207        foldCode 0w0 genByteCode (ops @ paddingBytes);
1208        (codeVec (* Return the completed code. *), endIC (* And the size. *))
1209    end
1210
1211    fun setLong (value, addrs, seg) =
1212    let
1213        val wordLength = wordSize
1214        
1215        fun putBytes(value, a, seg, i) =
1216        if i = wordLength then ()
1217        else
1218        (
1219            byteVecSet(seg,
1220                if not isBigEndian then a+i else a+wordLength-i-0w1,
1221                Word8.fromInt(value mod 256));
1222            putBytes(value div 256, a, seg, i+0w1)
1223        )
1224    in
1225        putBytes(value, addrs, seg, 0w0)
1226    end
1227
1228    (* Peephole optimisation. *)
1229    local
1230        fun peepHole([], _, output) = List.rev output
1231        
1232        |   peepHole(LabelCode lab1 :: (instrs as LabelCode lab2 :: _), exited, output) =
1233            (
1234                (* Consecutive labels.  Merge these, discarding the first. *)
1235                lab2 := !lab1 @ !lab2;
1236                peepHole(instrs, exited, output)
1237            )
1238
1239            (* A label followed by an unconditional branch.  Forward the original label.
1240               Although JumpBack is also unconditional we don't forward those because
1241               we don't have a conditional backwards jump. *)
1242        |   peepHole((LabelCode lab1)  ::
1243                     (jump as JumpInstruction{jumpType=Jump, label=lab2, ...}) :: tl,
1244                     exited, output) =
1245            (
1246                lab2 := !lab1 @ !lab2;
1247                (* Leave the jump in the stream and leave "exited" unchanged.
1248                   This will now be unreachable if we had previously exited but
1249                   we need to take the jump if we hadn't. *)
1250                peepHole(jump :: tl, exited, output)
1251            )
1252
1253           (* Discard everything after an unconditional transfer until the next label. *)
1254        |   peepHole((label as LabelCode _) :: tl, _, output) =
1255                peepHole(tl, false, label::output)
1256        
1257        |   peepHole(_ :: tl, true, output) = peepHole(tl, true, output)
1258
1259        |   peepHole((jump as JumpInstruction{jumpType=Jump, ...}) :: tl, _, output) =
1260                peepHole(tl, true, jump :: output)
1261                
1262            (* Return, raise-exception and tail-call. *)
1263        |   peepHole((uncond as UncondTransfer _) :: tl, _, output) =
1264                peepHole(tl, true, uncond :: output)
1265
1266            (* A conditional branch round an unconditional branch.  Replace by a
1267               conditional branch with the sense reversed. *)
1268        |   peepHole((cond as JumpInstruction{jumpType=JumpFalse, label=lab1, ...}) ::
1269                (uncond as JumpInstruction{jumpType=Jump, label=lab2, size}) ::
1270                (tail as LabelCode lab3 :: _), _, output) =
1271                if lab1 = lab3
1272                then peepHole(tail, false, JumpInstruction{jumpType=JumpTrue, label=lab2, size=size} :: output)
1273                else peepHole(uncond :: tail, false, cond :: output)
1274
1275        |   peepHole((cond as JumpInstruction{jumpType=JumpTrue, label=lab1, ...}) ::
1276                (uncond as JumpInstruction{jumpType=Jump, label=lab2, size}) ::
1277                (tail as LabelCode lab3 :: _), _, output) =
1278                if lab1 = lab3
1279                then peepHole(tail, false, JumpInstruction{jumpType=JumpFalse, label=lab2, size=size} :: output)
1280                else peepHole(uncond :: tail, false, cond :: output)
1281
1282        |   peepHole(IsTaggedLocalB addr :: JumpInstruction{jumpType=JumpTrue, label, size} :: tail, _, output) =
1283                peepHole(tail, false, JumpOnIsTaggedLocalB {label=label, size=size, localAddr=addr} :: output)
1284
1285        |   peepHole((indLocal as IndirectLocal{localAddr, indirect=0w0}) ::
1286                       (instrs as PushShort const :: SimpleCode[0wxa0(*opcode_equalWord*)]  ::
1287                            JumpInstruction{jumpType=JumpFalse, label, size} :: tail), _, output) =
1288                if const < 0w256
1289                then peepHole(tail, false,
1290                        JumpNotEqualLocalInd0BB {label=label, size=size, localAddr=localAddr, const=wordToWord8 const} :: output)
1291                else peepHole(instrs, false, indLocal :: output)
1292
1293        |   peepHole((load as LoadLocal localAddr) ::
1294                       (instrs as PushShort const :: SimpleCode[0wxa0(*opcode_equalWord*)]  ::
1295                            JumpInstruction{jumpType=JumpFalse, label, size} :: tail), _, output) =
1296                if const < 0w256
1297                then peepHole(tail, false,
1298                        JumpNotEqualLocalConstBB {label=label, size=size, localAddr=localAddr, const=wordToWord8 const} :: output)
1299                else peepHole(instrs, false, load :: output)
1300
1301        |   peepHole(hd::tl, exited, output) = peepHole(tl, exited, hd::output)
1302    in
1303        fun optimise code = peepHole(code, false, [])
1304    end
1305
1306    (* Generate the code sequence to enter the interpreter when this code is called or
1307       returned to or an exception is raised.   This is only required when bootstrapping
1308       a native code compiler. *)
1309    fun genEnterInt(_, Code { enterIntMode = 0 (* None *), ...}) = []
1310    |   genEnterInt(b, Code { enterIntMode = 1 (* X86_32 *), ...}) = [SimpleCode[0wxff, 0wx55, 0wx24, b]]
1311    |   genEnterInt(b, Code { enterIntMode = 2 (* X86_64 *), ...}) = [SimpleCode[0wxff, 0wx55, 0wx48, b]]
1312    |   genEnterInt(b, Code { enterIntMode = 3 (* X86_32_64 *), ...}) = [SimpleCode[0wxff, 0wx55, 0wx48, b]]
1313    |   genEnterInt _ = raise InternalError "genEnterInt: unknown architecture value"
1314
1315    (* Adds the constants onto the code, and copies the code into a new segment *)
1316    fun copyCode {code as
1317                    Code{ printAssemblyCode, printStream,
1318                           procName, constVec, stage1Code, ...}, maxStack, numberOfArguments, resultClosure} =
1319    let
1320        val cvec = code
1321        local
1322            val revCode = optimise(List.rev(!stage1Code))
1323            (* Add a stack check.  This is only needed if the
1324               function needs more than 128 words since the call and tail functions
1325               check for this much. *)
1326        in
1327            val codeList =
1328                if maxStack < 128
1329                then revCode
1330                else SimpleCode[opcode_stackSize16, Word8.fromInt maxStack, Word8.fromInt(maxStack div 256)] :: revCode
1331        end
1332        (* Add an enterInt if necessary *)
1333        (* If we need enter-int code it must go first. *)
1334        val enterInt = genEnterInt(Word8.fromInt numberOfArguments + 0wx80, cvec)
1335        val (byteVec, endIC) = genCode(enterInt @ codeList, cvec)
1336        val wordLength = wordSize
1337  
1338        (* +3 for profile count, function name and constants count *)
1339        val numOfConst = List.length(! constVec)
1340        val endOfCode = endIC div wordLength
1341        val segSize   = endOfCode + Word.fromInt numOfConst + 0w4
1342        val firstConstant = endIC + wordLength * 0w3 (* Add 3 for no of consts, fn name and profile count. *)
1343    
1344        (* Put in the number of constants. This must go in before
1345           we actually put in any constants. *)
1346        local
1347            val lastWord = (segSize - 0w1) * wordLength
1348        in
1349            val () = setLong(numOfConst + 2, endIC, byteVec)
1350            (* Set the last word of the code to the (negative) byte offset of the start of the code area
1351               from the end of this word. *)
1352            val () = setLong((numOfConst + 3) * ~ (Word.toInt wordLength), lastWord, byteVec) 
1353        end
1354
1355        (* Now we've filled in all the size info we need to convert the segment
1356           into a proper code segment before it's safe to put in any ML values. *)
1357        val codeVec = byteVecToCodeVec(byteVec, resultClosure)
1358
1359        local
1360            val name     : string = procName
1361            val nameWord : machineWord = toMachineWord name
1362        in
1363            val () = codeVecPutWord (codeVec, endOfCode+0w1, nameWord)
1364        end
1365        (* Profile ref.  A byte ref used by the profiler in the RTS. *)
1366        local
1367            val v = RunCall.allocateByteMemory(0w1, Word.fromLargeWord(Word8.toLargeWord(Word8.orb(F_mutable, F_bytes))))
1368            fun clear 0w0 = ()
1369            |   clear i = (assignByte(v, i-0w1, 0w0); clear (i-0w1))
1370            val () = clear(wordSize)
1371        in
1372            val () = codeVecPutWord (codeVec, endOfCode+0w2, toMachineWord v)
1373        end
1374
1375        (* and then copy the constants from the constant list. *)
1376        local
1377            fun setConstant(value, num) =
1378            let
1379                val constAddr = (firstConstant div wordLength) + num
1380            in
1381                codeVecPutWord (codeVec, constAddr, value);
1382                num+0w1
1383            end
1384        in
1385            val _ = List.foldl setConstant 0w0 (!constVec)
1386        end
1387    in
1388        if printAssemblyCode
1389        then (* print out the code *)
1390            (printCode (codeVec, procName, endIC, printStream); printStream"\n")
1391        else ();
1392        codeVecLock(codeVec, resultClosure)
1393    end (* copyCode *)
1394    
1395    fun addItemToList(item, Code{stage1Code, ...}) = stage1Code := item :: !stage1Code
1396
1397    val genOpcode = addItemToList
1398    
1399    fun putBranchInstruction(brOp, label, cvec) =
1400        addItemToList(JumpInstruction{label=label, jumpType=brOp, size = ref Size32}, cvec)
1401
1402    fun setLabel(label, cvec) = addItemToList(LabelCode label, cvec)
1403    
1404    fun createLabel () = ref [ref 0w0]
1405    
1406    local
1407        fun genOpc(opc, cvec) = addItemToList(SimpleCode [opc], cvec)
1408        and genExtOpc(opc, cvec) = addItemToList(SimpleCode [opcode_escape, opc], cvec)
1409        and genOpcByte(opc, arg1, cvec) =
1410            if 0 <= arg1 andalso arg1 < 256
1411            then addItemToList(SimpleCode [opc, Word8.fromInt arg1], cvec)
1412            else raise InternalError "genOpcByte"
1413        and genExtOpcByte(opc, arg1, cvec) = 
1414            if 0 <= arg1 andalso arg1 < 256
1415            then addItemToList(SimpleCode [opcode_escape, opc, Word8.fromInt arg1], cvec)
1416            else raise InternalError "genExtOpcByte"
1417        and genExtOpcWord(opc, arg1, cvec) =
1418            if 0 <= arg1 andalso arg1 < 65536
1419            then addItemToList(SimpleCode[opcode_escape, opc, Word8.fromInt arg1, Word8.fromInt (arg1 div 256)], cvec)
1420            else raise InternalError "genExtOpcWord"
1421        
1422        open IEEEReal
1423        
1424        fun encodeRound TO_NEAREST = 0
1425        |   encodeRound TO_NEGINF = 1
1426        |   encodeRound TO_POSINF = 2
1427        |   encodeRound TO_ZERO = 3
1428    in
1429        fun genRaiseEx cvec = addItemToList(UncondTransfer [opcode_raiseEx], cvec)
1430        fun genLock cvec = genOpc (opcode_lock, cvec)
1431        fun genLdexc cvec = genOpc (opcode_ldexc, cvec)
1432        fun genPushHandler cvec = genOpc (opcode_pushHandler, cvec)
1433    
1434        fun genRTSCallFast(0, cvec) = genOpc (opcode_callFastRTS0, cvec)
1435        |   genRTSCallFast(1, cvec) = genOpc (opcode_callFastRTS1, cvec)
1436        |   genRTSCallFast(2, cvec) = genOpc (opcode_callFastRTS2, cvec)
1437        |   genRTSCallFast(3, cvec) = genOpc (opcode_callFastRTS3, cvec)
1438        |   genRTSCallFast(4, cvec) = genOpc (opcode_callFastRTS4, cvec)
1439        |   genRTSCallFast(5, cvec) = genOpc (opcode_callFastRTS5, cvec)
1440        |   genRTSCallFast(_, _) = raise InternalError "genRTSFastCall"
1441
1442        fun genContainer (size, cvec) =
1443            if size < 256
1444            then genOpcByte(opcode_containerB, size, cvec)
1445            else genExtOpcWord(ext_opcode_containerW, size, cvec)
1446
1447        fun genCase (nCases, cvec) =
1448        let
1449            val labels = List.tabulate(nCases, fn _ => createLabel())
1450        in
1451            addItemToList(IndexedCase{labels=labels, size=ref Size32}, cvec);
1452            labels
1453        end
1454        
1455        (* For the moment don't try to merge stack resets. *)
1456        fun resetStack(0, _, _) = ()
1457
1458        |   resetStack(1, true, cvec) =
1459                addItemToList(SimpleCode[opcode_resetR_1], cvec)
1460        |   resetStack(2, true, cvec) =
1461                addItemToList(SimpleCode[opcode_resetR_2], cvec)
1462        |   resetStack(3, true, cvec) =
1463                addItemToList(SimpleCode[opcode_resetR_3], cvec)
1464
1465        |   resetStack(offset, true, cvec) =
1466            if offset < 0 then raise InternalError "resetStack"
1467            else if offset > 255
1468            then genExtOpcWord(ext_opcode_resetR_w, offset, cvec)
1469            else genOpcByte(opcode_resetRB, offset, cvec)
1470            
1471        |   resetStack(1, false, cvec) =
1472                addItemToList(SimpleCode[opcode_reset_1], cvec)
1473        |   resetStack(2, false, cvec) =
1474                addItemToList(SimpleCode[opcode_reset_2], cvec)
1475        
1476        |   resetStack(offset, false, cvec) =
1477            if offset < 0 then raise InternalError "resetStack"
1478            else if offset > 255
1479            then genExtOpcWord(ext_opcode_resetW, offset, cvec)
1480            else genOpcByte(opcode_resetB, offset, cvec)
1481
1482        fun genCallClosure(Code{stage1Code as ref(PushConstant{constNum, size, isCall=false} :: tail), ...}) =
1483            stage1Code := PushConstant{constNum=constNum, size=size, isCall=true} :: tail
1484        
1485        |   genCallClosure(Code{stage1Code as ref(LoadLocal w :: tail), ...}) =
1486            stage1Code := SimpleCode [opcode_callLocalB, w] :: tail
1487
1488        |   genCallClosure(Code{stage1Code, ...}) =
1489            stage1Code := SimpleCode [opcode_callClosure] :: !stage1Code
1490
1491        fun genTailCall (toslide, slideby, cvec) =
1492        if toslide < 256 andalso slideby < 256
1493        then (* General byte case *)
1494            addItemToList(UncondTransfer[opcode_tailbb, Word8.fromInt toslide, Word8.fromInt slideby], cvec)          
1495        else (* General case. *)
1496                addItemToList(
1497                    UncondTransfer[opcode_escape, ext_opcode_tail, Word8.fromInt toslide, Word8.fromInt(toslide div 256),
1498                               Word8.fromInt slideby, Word8.fromInt (slideby div 256)], cvec)
1499
1500        fun pushConst (value : machineWord, cvec) =
1501            if isShort value andalso toShort value < 0w32768
1502            then addItemToList(PushShort(toShort value), cvec)
1503            else (* address or large short *)
1504                addItemToList(PushConstant{constNum = addConstToVec(value, cvec), size=ref Size32, isCall=false}, cvec)
1505
1506        fun genRTSCallFastRealtoReal cvec = genExtOpc (ext_opcode_callFastRTSRtoR, cvec)
1507        and genRTSCallFastRealRealtoReal cvec = genExtOpc (ext_opcode_callFastRTSRRtoR, cvec)
1508        and genRTSCallFastGeneraltoReal cvec = genExtOpc (ext_opcode_callFastRTSGtoR, cvec)
1509        and genRTSCallFastRealGeneraltoReal cvec = genExtOpc (ext_opcode_callFastRTSRGtoR, cvec)
1510        
1511        and genRTSCallFastFloattoFloat cvec = genExtOpc (ext_opcode_callFastRTSFtoF, cvec)
1512        and genRTSCallFastFloatFloattoFloat cvec = genExtOpc (ext_opcode_callFastRTSFFtoF, cvec)
1513        and genRTSCallFastGeneraltoFloat cvec = genExtOpc (ext_opcode_callFastRTSGtoF, cvec)
1514        and genRTSCallFastFloatGeneraltoFloat cvec = genExtOpc (ext_opcode_callFastRTSFGtoF, cvec)
1515        
1516        fun genDoubleToFloat(SOME rnding, cvec) = genExtOpcByte(ext_opcode_realToFloat, encodeRound rnding, cvec)
1517        |   genDoubleToFloat(NONE, cvec) = genExtOpcByte(ext_opcode_realToFloat, 5, cvec)
1518
1519        and genRealToInt(rnding, cvec) = genExtOpcByte(ext_opcode_realToInt, encodeRound rnding, cvec)
1520        and genFloatToInt(rnding, cvec) = genExtOpcByte(ext_opcode_floatToInt, encodeRound rnding, cvec)
1521        
1522        fun genEqualWordConst(w, cvec) =
1523            (pushConst(toMachineWord w, cvec); genOpc(opcode_equalWord, cvec))
1524       
1525        fun genIsTagged(Code{stage1Code as ref(LoadLocal addr :: tail), ...}) =
1526                stage1Code := IsTaggedLocalB addr :: tail
1527        |   genIsTagged cvec = genOpc(opcode_isTagged, cvec)
1528
1529        fun genIndirectSimple(0, cvec) = genOpc(opcode_indirect_0, cvec)
1530        |   genIndirectSimple(1, cvec) = genOpc(opcode_indirect_1, cvec)
1531        |   genIndirectSimple(2, cvec) = genOpc(opcode_indirect_2, cvec)
1532        |   genIndirectSimple(3, cvec) = genOpc(opcode_indirect_3, cvec)
1533        |   genIndirectSimple(4, cvec) = genOpc(opcode_indirect_4, cvec)
1534        |   genIndirectSimple(5, cvec) = genOpc(opcode_indirect_5, cvec)
1535        |   genIndirectSimple(arg1, cvec) =
1536                if arg1 < 256
1537                then genOpcByte(opcode_indirectB, arg1, cvec)
1538                else genExtOpcWord(ext_opcode_indirectW, arg1, cvec)
1539        
1540        fun genIndirectContainer(arg1, cvec) =
1541            if arg1 < 256
1542            then genOpcByte(opcode_indirectContainerB, arg1, cvec)
1543            else genExtOpcWord(ext_opcode_indirectContainerW, arg1, cvec)
1544
1545        fun genMoveToContainer (arg1, cvec) =
1546            if arg1 < 256
1547            then genOpcByte(opcode_moveToContainerB, arg1, cvec)
1548            else genExtOpcWord(ext_opcode_moveToContainerW, arg1, cvec)
1549
1550        fun genMoveToMutClosure (arg1, cvec) =
1551            if arg1 < 256
1552            then genOpcByte(opcode_moveToMutClosureB, arg1, cvec)
1553            else genExtOpcWord(ext_opcode_moveToMutClosureW, arg1, cvec)
1554
1555        fun genSetStackVal (arg1, cvec) =
1556            if arg1 < 256
1557            then genOpcByte(opcode_setStackValB, arg1, cvec)
1558            else genExtOpcWord(ext_opcode_setStackValW, arg1, cvec)
1559
1560        fun genTuple (2, cvec) = genOpc(opcode_tuple_2, cvec)
1561        |   genTuple (3, cvec) = genOpc(opcode_tuple_3, cvec)
1562        |   genTuple (4, cvec) = genOpc(opcode_tuple_4, cvec)
1563        |   genTuple (arg1, cvec) =
1564                if arg1 < 256
1565                then genOpcByte(opcode_tupleB, arg1, cvec)
1566                else genExtOpcWord(ext_opcode_tupleW, arg1, cvec)
1567 
1568        fun genAllocMutableClosure(closureSize, cvec) =
1569            if closureSize < 256
1570            then genOpcByte(opcode_allocMutClosureB, closureSize, cvec)
1571            else genExtOpcWord(ext_opcode_allocMutClosureW, closureSize, cvec)
1572
1573        fun genClosure (arg1, cvec) =
1574            if arg1 < 256
1575            then genOpcByte(opcode_closureB, arg1, cvec)
1576            else genExtOpcWord(ext_opcode_closureW, arg1, cvec)
1577
1578        fun genLocal (arg1, cvec) =
1579            if 0 <= arg1 andalso arg1 < 256 then addItemToList(LoadLocal(Word8.fromInt arg1), cvec)
1580            else addItemToList(SimpleCode[opcode_localW, Word8.fromInt arg1, Word8.fromInt(arg1 div 256)], cvec)
1581
1582        fun genIndirectClosure{ addr, item, code=cvec } =
1583        if addr < 256 andalso item < 256
1584        then
1585        (
1586            case item of
1587                0 => genOpcByte(opcode_indirectClosureB0, addr, cvec)
1588            |   1 => genOpcByte(opcode_indirectClosureB1, addr, cvec)
1589            |   2 => genOpcByte(opcode_indirectClosureB2, addr, cvec)
1590            |   _ => addItemToList(SimpleCode[opcode_indirectClosureBB, Word8.fromInt addr, Word8.fromInt item], cvec)
1591        )
1592        else
1593        (
1594            genLocal (addr, cvec);
1595            addItemToList(SimpleCode[opcode_escape, ext_opcode_indirectClosureW,
1596                Word8.fromInt item, Word8.fromInt (item div 256)], cvec)
1597        )
1598    end
1599    
1600    fun genReturn(1, cvec) = addItemToList(UncondTransfer[opcode_return_1], cvec)
1601    |   genReturn(2, cvec) = addItemToList(UncondTransfer[opcode_return_2], cvec)
1602    |   genReturn(3, cvec) = addItemToList(UncondTransfer[opcode_return_3], cvec)
1603    |   genReturn(arg1, cvec) =
1604            addItemToList(UncondTransfer(
1605                if 0 <= arg1 andalso arg1 <= 255
1606                then [opcode_returnB, Word8.fromInt arg1]
1607                else [opcode_returnW, Word8.fromInt arg1, Word8.fromInt(arg1 div 256)]),
1608                cvec)
1609    
1610    fun genIndirect (arg1, cvec as Code{stage1Code as ref(LoadLocal w :: tail), ...}) =
1611        if 0 <= arg1 andalso arg1 <= 255
1612        then stage1Code := IndirectLocal{localAddr=w, indirect=Word8.fromInt arg1} :: tail
1613        else genIndirectSimple(arg1, cvec)
1614
1615    |   genIndirect (arg1, cvec) = genIndirectSimple(arg1, cvec)
1616
1617    fun genEnterIntCatch(code as Code{stage1Code, ...}) =
1618        stage1Code := genEnterInt(0wxff, code) @ !stage1Code
1619    and genEnterIntCall(code as Code{stage1Code, ...}, args) =
1620        stage1Code := genEnterInt(Word8.fromInt args, code) @ !stage1Code
1621
1622    val opcode_notBoolean       = SimpleCode [opcode_notBoolean]
1623    val opcode_cellLength       = SimpleCode [opcode_cellLength]
1624    and opcode_cellFlags        = SimpleCode [opcode_cellFlags]
1625    and opcode_clearMutable     = SimpleCode [opcode_clearMutable]
1626    and opcode_atomicIncr       = SimpleCode [opcode_atomicIncr]
1627    and opcode_atomicDecr       = SimpleCode [opcode_atomicDecr]
1628    and opcode_atomicReset      = SimpleCode [opcode_escape, ext_opcode_atomicReset]
1629    and opcode_longWToTagged    = SimpleCode [opcode_escape, ext_opcode_longWToTagged]
1630    and opcode_signedToLongW    = SimpleCode [opcode_escape, ext_opcode_signedToLongW]
1631    and opcode_unsignedToLongW  = SimpleCode [opcode_escape, ext_opcode_unsignedToLongW]
1632    and opcode_realAbs          = SimpleCode [opcode_escape, ext_opcode_realAbs]
1633    and opcode_realNeg          = SimpleCode [opcode_escape, ext_opcode_realNeg]
1634    and opcode_fixedIntToReal   = SimpleCode [opcode_escape, ext_opcode_fixedIntToReal]
1635    and opcode_fixedIntToFloat  = SimpleCode [opcode_escape, ext_opcode_fixedIntToFloat]
1636    and opcode_floatToReal      = SimpleCode [opcode_escape, ext_opcode_floatToReal]
1637    
1638    val opcode_equalWord        = SimpleCode [opcode_equalWord]
1639    and opcode_lessSigned       = SimpleCode [opcode_lessSigned]
1640    and opcode_lessUnsigned     = SimpleCode [opcode_lessUnsigned]
1641    and opcode_lessEqSigned     = SimpleCode [opcode_lessEqSigned]
1642    and opcode_lessEqUnsigned   = SimpleCode [opcode_lessEqUnsigned]
1643    and opcode_greaterSigned    = SimpleCode [opcode_greaterSigned]
1644    and opcode_greaterUnsigned  = SimpleCode [opcode_greaterUnsigned]
1645    and opcode_greaterEqSigned  = SimpleCode [opcode_greaterEqSigned]
1646    and opcode_greaterEqUnsigned = SimpleCode [opcode_greaterEqUnsigned]
1647
1648    val opcode_fixedAdd         = SimpleCode [opcode_fixedAdd]
1649    val opcode_fixedSub         = SimpleCode [opcode_fixedSub]
1650    val opcode_fixedMult        = SimpleCode [opcode_fixedMult]
1651    val opcode_fixedQuot        = SimpleCode [opcode_fixedQuot]
1652    val opcode_fixedRem         = SimpleCode [opcode_fixedRem]
1653    val opcode_fixedDiv         = SimpleCode [opcode_escape, ext_opcode_fixedDiv]
1654    val opcode_fixedMod         = SimpleCode [opcode_escape, ext_opcode_fixedMod]
1655    val opcode_wordAdd          = SimpleCode [opcode_wordAdd]
1656    val opcode_wordSub          = SimpleCode [opcode_wordSub]
1657    val opcode_wordMult         = SimpleCode [opcode_wordMult]
1658    val opcode_wordDiv          = SimpleCode [opcode_wordDiv]
1659    val opcode_wordMod          = SimpleCode [opcode_wordMod]
1660    val opcode_wordAnd          = SimpleCode [opcode_wordAnd]
1661    val opcode_wordOr           = SimpleCode [opcode_wordOr]
1662    val opcode_wordXor          = SimpleCode [opcode_wordXor]
1663    val opcode_wordShiftLeft    = SimpleCode [opcode_wordShiftLeft]
1664    val opcode_wordShiftRLog    = SimpleCode [opcode_wordShiftRLog]
1665    val opcode_wordShiftRArith  = SimpleCode [opcode_escape, ext_opcode_wordShiftRArith]
1666    val opcode_allocByteMem     = SimpleCode [opcode_allocByteMem]
1667    val opcode_lgWordEqual      = SimpleCode [opcode_escape, ext_opcode_lgWordEqual]
1668    val opcode_lgWordLess       = SimpleCode [opcode_escape, ext_opcode_lgWordLess]
1669    val opcode_lgWordLessEq     = SimpleCode [opcode_escape, ext_opcode_lgWordLessEq]
1670    val opcode_lgWordGreater    = SimpleCode [opcode_escape, ext_opcode_lgWordGreater]
1671    val opcode_lgWordGreaterEq  = SimpleCode [opcode_escape, ext_opcode_lgWordGreaterEq]
1672    val opcode_lgWordAdd        = SimpleCode [opcode_escape, ext_opcode_lgWordAdd]
1673    val opcode_lgWordSub        = SimpleCode [opcode_escape, ext_opcode_lgWordSub]
1674    val opcode_lgWordMult       = SimpleCode [opcode_escape, ext_opcode_lgWordMult]
1675    val opcode_lgWordDiv        = SimpleCode [opcode_escape, ext_opcode_lgWordDiv]
1676    val opcode_lgWordMod        = SimpleCode [opcode_escape, ext_opcode_lgWordMod]
1677    val opcode_lgWordAnd        = SimpleCode [opcode_escape, ext_opcode_lgWordAnd]
1678    val opcode_lgWordOr         = SimpleCode [opcode_escape, ext_opcode_lgWordOr]
1679    val opcode_lgWordXor        = SimpleCode [opcode_escape, ext_opcode_lgWordXor]
1680    val opcode_lgWordShiftLeft  = SimpleCode [opcode_escape, ext_opcode_lgWordShiftLeft]
1681    val opcode_lgWordShiftRLog  = SimpleCode [opcode_escape, ext_opcode_lgWordShiftRLog]
1682    val opcode_lgWordShiftRArith = SimpleCode [opcode_escape, ext_opcode_lgWordShiftRArith]
1683    val opcode_realEqual        = SimpleCode [opcode_escape, ext_opcode_realEqual]
1684    val opcode_realLess         = SimpleCode [opcode_escape, ext_opcode_realLess]
1685    val opcode_realLessEq       = SimpleCode [opcode_escape, ext_opcode_realLessEq]
1686    val opcode_realGreater      = SimpleCode [opcode_escape, ext_opcode_realGreater]
1687    val opcode_realGreaterEq    = SimpleCode [opcode_escape, ext_opcode_realGreaterEq]
1688    val opcode_realUnordered    = SimpleCode [opcode_escape, ext_opcode_realUnordered]
1689    val opcode_realAdd          = SimpleCode [opcode_escape, ext_opcode_realAdd]
1690    val opcode_realSub          = SimpleCode [opcode_escape, ext_opcode_realSub]
1691    val opcode_realMult         = SimpleCode [opcode_escape, ext_opcode_realMult]
1692    val opcode_realDiv          = SimpleCode [opcode_escape, ext_opcode_realDiv]
1693    and opcode_floatAbs         = SimpleCode [opcode_escape, ext_opcode_floatAbs]
1694    and opcode_floatNeg         = SimpleCode [opcode_escape, ext_opcode_floatNeg]
1695    val opcode_floatEqual       = SimpleCode [opcode_escape, ext_opcode_floatEqual]
1696    val opcode_floatLess        = SimpleCode [opcode_escape, ext_opcode_floatLess]
1697    val opcode_floatLessEq      = SimpleCode [opcode_escape, ext_opcode_floatLessEq]
1698    val opcode_floatGreater     = SimpleCode [opcode_escape, ext_opcode_floatGreater]
1699    val opcode_floatGreaterEq   = SimpleCode [opcode_escape, ext_opcode_floatGreaterEq]
1700    val opcode_floatUnordered   = SimpleCode [opcode_escape, ext_opcode_floatUnordered]
1701    val opcode_floatAdd         = SimpleCode [opcode_escape, ext_opcode_floatAdd]
1702    val opcode_floatSub         = SimpleCode [opcode_escape, ext_opcode_floatSub]
1703    val opcode_floatMult        = SimpleCode [opcode_escape, ext_opcode_floatMult]
1704    val opcode_floatDiv         = SimpleCode [opcode_escape, ext_opcode_floatDiv]
1705    val opcode_getThreadId      = SimpleCode [opcode_getThreadId]
1706    val opcode_allocWordMemory  = SimpleCode [opcode_allocWordMemory]
1707    val opcode_alloc_ref        = SimpleCode [opcode_alloc_ref]
1708    val opcode_loadMLWord       = SimpleCode [opcode_loadMLWord]
1709    val opcode_loadMLByte       = SimpleCode [opcode_loadMLByte]
1710    val opcode_loadC8           = SimpleCode [opcode_escape, ext_opcode_loadC8]
1711    val opcode_loadC16          = SimpleCode [opcode_escape, ext_opcode_loadC16]
1712    val opcode_loadC32          = SimpleCode [opcode_escape, ext_opcode_loadC32]
1713    val opcode_loadC64          = SimpleCode [opcode_escape, ext_opcode_loadC64]
1714    val opcode_loadCFloat       = SimpleCode [opcode_escape, ext_opcode_loadCFloat]
1715    val opcode_loadCDouble      = SimpleCode [opcode_escape, ext_opcode_loadCDouble]
1716    val opcode_loadUntagged     = SimpleCode [opcode_loadUntagged]
1717    val opcode_storeMLWord      = SimpleCode [opcode_storeMLWord]
1718    val opcode_storeMLByte      = SimpleCode [opcode_storeMLByte]
1719    val opcode_storeC8          = SimpleCode [opcode_escape, ext_opcode_storeC8]
1720    val opcode_storeC16         = SimpleCode [opcode_escape, ext_opcode_storeC16]
1721    val opcode_storeC32         = SimpleCode [opcode_escape, ext_opcode_storeC32]
1722    val opcode_storeC64         = SimpleCode [opcode_escape, ext_opcode_storeC64]
1723    val opcode_storeCFloat      = SimpleCode [opcode_escape, ext_opcode_storeCFloat]
1724    val opcode_storeCDouble     = SimpleCode [opcode_escape, ext_opcode_storeCDouble]
1725    val opcode_storeUntagged    = SimpleCode [opcode_storeUntagged]
1726    val opcode_blockMoveWord    = SimpleCode [opcode_blockMoveWord]
1727    val opcode_blockMoveByte    = SimpleCode [opcode_blockMoveByte]
1728    val opcode_blockEqualByte   = SimpleCode [opcode_blockEqualByte]
1729    val opcode_blockCompareByte = SimpleCode [opcode_blockCompareByte]
1730    val opcode_deleteHandler    = SimpleCode [opcode_deleteHandler]
1731    val opcode_allocCSpace      = SimpleCode [opcode_escape, ext_opcode_allocCSpace]
1732    val opcode_freeCSpace       = SimpleCode [opcode_escape, ext_opcode_freeCSpace]
1733
1734    structure Sharing =
1735    struct
1736        type code = code
1737        type opcode = opcode
1738        type labels = labels
1739        type closureRef = closureRef
1740    end
1741
1742end;
1743
1744