1(*
2    Copyright (c) 2016-20 David C.J. Matthews
3
4    This library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Lesser General Public
6    License version 2.1 as published by the Free Software Foundation.
7    
8    This library is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11    Lesser General Public License for more details.
12    
13    You should have received a copy of the GNU Lesser General Public
14    License along with this library; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
16*)
17
18functor X86ICodeIdentifyReferences(
19    structure ICODE: ICodeSig
20    structure DEBUG: DEBUG
21    structure INTSET: INTSETSIG
22): X86IDENTIFYREFSSIG =
23struct
24    open ICODE
25    open INTSET
26
27    type regState =
28    { 
29        active: int, refs: int, pushState: bool, prop: regProperty
30    }
31
32    (* CC states before and after.  The instruction may use the CC or ignore it. The only
33       instructions to use the CC is X87FPGetCondition.  Conditional branches
34       are handled at the block level.
35       The result of executing the instruction may be to set the condition code to a
36       defined state, an undefined state or leave it unchanged.
37       N.B. Some "instructions" may involve a stack reset that could affect the CC. *)
38    datatype outCCState = CCSet of ccRef | CCIndeterminate | CCUnchanged
39    and inCCState = CCNeeded of ccRef | CCUnused
40    
41    datatype extendedBasicBlock =
42        ExtendedBasicBlock of
43        {
44            block: {instr: x86ICode, current: intSet, active: intSet, kill: intSet } list,
45            flow: controlFlow,
46            locals: intSet, (* Defined and used entirely within the block. *)
47            imports: intSet, (* Defined outside the block, used inside it, but not needed afterwards. *)
48            exports: intSet, (* Defined within the block, possibly used inside, but used outside. *)
49            passThrough: intSet, (* Active throughout the block. May be referred to by it but needed afterwards. *)
50            loopRegs: intSet, (* Destination registers for a loop.  They will be updated by this block. *)
51            initialStacks: intSet, (* Stack items required at the start i.e. imports+passThrough for stack items. *)
52            inCCState: ccRef option, (* The state this block assumes.  If SOME _ all predecessors must set it. *)
53            outCCState: ccRef option (* The condition code set by this block.  SOME _ if at least one successor needs it. *)
54        }
55    
56    exception InternalError = Misc.InternalError
57
58    (* Return the list of blocks that are the immediate successor of this. *)
59    fun blockSuccessors(BasicBlock{flow, ...}) = successorBlocks flow
60
61    (* Find the registers from an argument. *)
62    fun argRegs(RegisterArgument rarg) = [rarg]
63    |   argRegs(MemoryLocation { base, index, cache=SOME cr, ...}) = cr  :: base :: argIndex index
64    |   argRegs(MemoryLocation { base, index, cache=NONE, ...}) = base :: argIndex index
65    |   argRegs(StackLocation { cache=SOME rarg, ...}) = [rarg]
66    |   argRegs _ = []
67    
68    and argIndex NoMemIndex = []
69    |   argIndex(MemIndex1 arg) = [arg]
70    |   argIndex(MemIndex2 arg) = [arg]
71    |   argIndex(MemIndex4 arg) = [arg]
72    |   argIndex(MemIndex8 arg) = [arg]
73    |   argIndex ObjectIndex = []
74
75    fun argStacks(StackLocation { container, ...}) = [container]
76    |   argStacks(ContainerAddr { container, ...}) = [container]
77    |   argStacks _ = []
78
79    (* Return the set of registers used by the instruction.
80       sources are registers that must have values after the instruction.
81       dests are registers that are given values or modified by the instruction. *)
82    fun getInstructionState(LoadArgument { source, dest, ...}) =
83        { sources=argRegs source, dests=[dest], sStacks=argStacks source, dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
84
85    |   getInstructionState(StoreArgument{ source, base, index, ...}) =
86            { sources=argRegs source @ [base] @ argIndex index, dests=[], sStacks=argStacks source, dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
87
88    |   getInstructionState(LoadMemReg { dest, ...}) =
89            { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
90
91    |   getInstructionState(StoreMemReg { source, ...}) =
92            { sources=[source], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
93
94    |   getInstructionState(BeginFunction {regArgs, stackArgs, ...}) =
95            { sources=[], dests=map #1 regArgs, sStacks=[], dStacks=stackArgs, ccIn=CCUnused, ccOut=CCIndeterminate }
96
97    |   getInstructionState(FunctionCall{regArgs, stackArgs, dest, ...}) =
98        let
99            (* Non-tail-recursive.  Behaves as a normal reference to sources. *)
100            fun getSources argSource =
101            let
102                val stackSources = List.foldl(fn (arg, srcs) => argSource arg @ srcs) [] stackArgs
103                fun regSource((arg, _), srcs) = argSource arg @ srcs
104            in
105                List.foldl regSource stackSources regArgs
106            end
107        in
108            { sources=getSources argRegs, dests=[dest], sStacks=getSources argStacks, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
109        end
110
111    |   getInstructionState(TailRecursiveCall{regArgs, stackArgs, workReg, ...}) =
112        let
113            (* Tail recursive call.  References the argument sources but exits. *)
114            fun getSources argSource =
115            let
116                val stackSources = List.foldl(fn ({src, ...}, srcs) => argSource src @ srcs) [] stackArgs
117                fun regSource((arg, _), srcs) = argSource arg @ srcs
118            in
119                List.foldl regSource stackSources regArgs
120            end
121        in
122            { sources=getSources argRegs, dests=[workReg], sStacks=getSources argStacks, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
123        end
124
125    |   getInstructionState(AllocateMemoryOperation{dest, ...}) =
126            { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
127
128    |   getInstructionState(AllocateMemoryVariable{size, dest, ...}) =
129            { sources=[size], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
130
131    |   getInstructionState(InitialiseMem{size, addr, init}) =
132            { sources=[size, addr, init], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
133
134    |   getInstructionState(InitialisationComplete) =
135            (* This is just a marker.  It doesn't actually generate any code. *)
136            { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
137
138    |   getInstructionState(BeginLoop) =
139            (* This is just a marker.  It doesn't actually generate any code. *)
140            { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
141
142    |   getInstructionState(JumpLoop{regArgs, stackArgs, workReg, ...}) =
143        let
144            fun getSources argSource =
145            let
146                val regSourceAsRegs =
147                    List.foldl(fn ((source, _), srcs) => argSource source @ srcs) [] regArgs
148            in
149                List.foldl(fn ((source, _, _), srcs) => argSource source @ srcs) regSourceAsRegs stackArgs
150            end
151            val dests = case workReg of SOME r => [r] | NONE => []
152        in
153            { sources=getSources argRegs, dests=dests, sStacks=getSources argStacks, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
154        end
155
156    |   getInstructionState(RaiseExceptionPacket{packetReg}) =
157            { sources=[packetReg], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
158
159    |   getInstructionState(ReserveContainer{container, ...}) =
160            { sources=[], dests=[], sStacks=[], dStacks=[container], ccIn=CCUnused, ccOut=CCUnchanged }
161
162    |   getInstructionState(IndexedCaseOperation{testReg, workReg, ...}) =
163            { sources=[testReg], dests=[workReg], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
164
165    |   getInstructionState(LockMutable{addr}) =
166            { sources=[addr], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
167
168    |   getInstructionState(WordComparison{arg1, arg2, ccRef, ...}) =
169            { sources=arg1 :: argRegs arg2, dests=[], sStacks=argStacks arg2, dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef }
170
171    |   getInstructionState(CompareLiteral{arg1, ccRef, ...}) =
172            { sources=argRegs arg1, dests=[], sStacks=argStacks arg1, dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef }
173
174    |   getInstructionState(CompareByteMem{arg1={base, index, ...}, ccRef, ...}) =
175            { sources=base :: argIndex index, dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef }
176
177    |   getInstructionState(PushExceptionHandler{workReg, ...}) =
178            { sources=[], dests=[workReg], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
179
180    |   getInstructionState(PopExceptionHandler{ workReg }) =
181            { sources=[], dests=[workReg], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
182
183    |   getInstructionState(BeginHandler{ workReg, packetReg, ...}) =
184            { sources=[], dests=[packetReg, workReg], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
185
186    |   getInstructionState(ReturnResultFromFunction{resultReg, ...}) =
187            { sources=[resultReg], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
188
189    |   getInstructionState(ArithmeticFunction{resultReg, operand1, operand2, ccRef, ...}) =
190            { sources=operand1 :: argRegs operand2, dests=[resultReg],
191              sStacks=argStacks operand2, dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef }
192
193    |   getInstructionState(TestTagBit{arg, ccRef, ...}) =
194            { sources=argRegs arg, dests=[], sStacks=argStacks arg, dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef }
195
196    |   getInstructionState(PushValue {arg, container, ...}) =
197            { sources=argRegs arg, dests=[], sStacks=argStacks arg, dStacks=[container], ccIn=CCUnused, ccOut=CCUnchanged }
198    
199    |   getInstructionState(CopyToCache{source, dest, ...}) =
200            { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged}
201
202    |   getInstructionState(ResetStackPtr{preserveCC, ...}) =
203            { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused,
204                ccOut=if preserveCC then CCUnchanged else CCIndeterminate }
205
206    |   getInstructionState(StoreToStack {source, container, ...}) =
207        (* Although this stores into the container it must already exist. *)
208            { sources=argRegs source, dests=[], sStacks=container :: argStacks source, dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
209
210    |   getInstructionState(TagValue{source, dest, ...}) =
211            { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
212
213    |   getInstructionState(UntagValue{source, dest, cache, ...}) =
214            { sources=case cache of NONE => [source] | SOME cr => [cr, source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
215
216    |   getInstructionState(LoadEffectiveAddress{base, index, dest, ...}) =
217        let
218            val bRegs =
219                case base of SOME bReg => [bReg] | _ => []
220            val iRegs = argIndex index
221        in
222            { sources=bRegs @ iRegs, dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
223        end
224
225    |   getInstructionState(ShiftOperation{resultReg, operand, shiftAmount, ccRef, ...}) =
226            { sources=operand :: argRegs shiftAmount, dests=[resultReg],
227              sStacks=argStacks shiftAmount, dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef }
228
229    |   getInstructionState(Multiplication{resultReg, operand1, operand2, ccRef, ...}) =
230            { sources=operand1 :: argRegs operand2, dests=[resultReg],
231              sStacks=argStacks operand2, dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef }
232
233    |   getInstructionState(Division{dividend, divisor, quotient, remainder, ...}) =
234            { sources=dividend :: argRegs divisor, dests=[quotient, remainder],
235              sStacks=argStacks divisor, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
236
237    |   getInstructionState(AtomicExchangeAndAdd{base, source}) =
238            { sources=[base, source], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
239
240    |   getInstructionState(BoxValue{source, dest, ...}) =
241            { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
242
243    |   getInstructionState(CompareByteVectors{vec1Addr, vec2Addr, length, ccRef, ...}) =
244            { sources=[vec1Addr, vec2Addr, length], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef }
245
246    |   getInstructionState(BlockMove{srcAddr, destAddr, length, ...}) =
247            { sources=[srcAddr, destAddr, length], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
248
249    |   getInstructionState(X87Compare{arg1, arg2, ccRef, ...}) =
250            { sources=arg1 :: argRegs arg2, dests=[], sStacks=argStacks arg2,
251              dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef }
252
253    |   getInstructionState(SSE2Compare{arg1, arg2, ccRef, ...}) =
254            { sources=arg1 :: argRegs arg2, dests=[], sStacks=argStacks arg2,
255              dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef }
256
257    |   getInstructionState(X87FPGetCondition{dest, ccRef, ...}) =
258            { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCNeeded ccRef, ccOut=CCIndeterminate }
259
260    |   getInstructionState(X87FPArith{resultReg, arg1, arg2, ...}) =
261            { sources=arg1 :: argRegs arg2, dests=[resultReg],
262              sStacks=argStacks arg2, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
263
264    |   getInstructionState(X87FPUnaryOps{dest, source, ...}) =
265            { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
266
267    |   getInstructionState(X87Float{dest, source}) =
268            { sources=argRegs source, dests=[dest], sStacks=argStacks source, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
269
270    |   getInstructionState(SSE2Float{dest, source}) =
271            { sources=argRegs source, dests=[dest], sStacks=argStacks source, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
272
273    |   getInstructionState(SSE2FPUnary{resultReg, source, ...}) =
274            { sources=argRegs source, dests=[resultReg],
275              sStacks=argStacks source, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
276
277    |   getInstructionState(SSE2FPBinary{resultReg, arg1, arg2, ...}) =
278            { sources=arg1 :: argRegs arg2, dests=[resultReg],
279              sStacks=argStacks arg2, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
280
281    |   getInstructionState(TagFloat{source, dest, ...}) =
282            { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
283
284    |   getInstructionState(UntagFloat{source, dest, cache, ...}) =
285            { sources=case cache of NONE => argRegs source | SOME cr => cr :: argRegs source, dests=[dest],
286              sStacks=argStacks source, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
287
288    |   getInstructionState(GetSSE2ControlReg{dest}) =
289            { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
290
291    |   getInstructionState(SetSSE2ControlReg{source}) =
292            { sources=[source], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
293
294    |   getInstructionState(GetX87ControlReg{dest}) =
295            { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
296
297    |   getInstructionState(SetX87ControlReg{source}) =
298            { sources=[source], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
299
300    |   getInstructionState(X87RealToInt{ source, dest }) =
301            { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
302
303    |   getInstructionState(SSE2RealToInt{ source, dest, ... }) =
304            { sources=argRegs source, dests=[dest], sStacks=argStacks source, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
305
306    |   getInstructionState(SignExtend32To64{ source, dest }) =
307            { sources=argRegs source, dests=[dest], sStacks=argStacks source, dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
308
309    |   getInstructionState(TouchArgument{source}) =
310            { sources=[source], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
311
312    (* These instructions can be eliminated if their register sources are not used.
313       There may be other cases. *)
314    fun eliminateable(LoadArgument _) = true
315    |   eliminateable(TagValue _) = true
316    |   eliminateable(UntagValue _) = true
317    |   eliminateable(LoadEffectiveAddress _) = true
318    |   eliminateable(BoxValue _) = true
319    |   eliminateable(CopyToCache _) = true
320    |   eliminateable(LoadMemReg _) = true
321    |   eliminateable _ = false
322
323    fun identifyRegs(blockVector, pregProps): extendedBasicBlock vector * regState vector =
324    let
325        val maxPRegs = Vector.length pregProps
326        val vectorLength = Vector.length blockVector
327        (* Initial arrays - declarationArray is the set of registers given
328           values by the block, importArray is the set of registers referenced by
329           the block and not declared locally. *)
330        val declarationArray = Array.array(vectorLength, emptySet)
331        and importArray = Array.array(vectorLength, emptySet)
332        val stackDecArray = Array.array(vectorLength, emptySet)
333        and stackImportArray = Array.array(vectorLength, emptySet)
334        and localLoopRegArray = Array.array(vectorLength, emptySet)
335        
336        (* References - this is used locally to see if a register is ever
337           actually used and also included in the result which uses it as
338           part of the choice of which register to spill. *)
339        val regRefs = Array.array(maxPRegs, 0)
340        (* Registers that must be pushed because they are required after
341           a function call.  For cache registers this means "discard". *)
342        and requirePushOrDiscard = Array.array(maxPRegs, false)
343
344        fun incrRef r = Array.update(regRefs, r, Array.sub(regRefs, r)+1)
345        
346        (* Contains the, possibly filtered, code for each block. *)
347        val resultCode = Array.array(vectorLength, NONE)
348        
349        val ccInStates = Array.array(vectorLength, CCUnused)
350        and ccOutStates = Array.array(vectorLength, CCIndeterminate)
351        
352        (* First pass - for each block build up the sets of registers defined and
353           used in the block.  We do this depth-first so that we can use "refs" to
354           see if a register is used.  If this is an instruction that can be eliminated
355           we don't need to generate it and can ignore any references it makes. *)
356        local
357            fun blockScan blockNo =
358            if isSome(Array.sub(resultCode, blockNo)) then ()
359            else
360            let
361                val () = Array.update(resultCode, blockNo, SOME []) (* Prevent looping. *)
362                val thisBlock as BasicBlock { block, flow, ...} = Vector.sub(blockVector, blockNo)
363                val successors = blockSuccessors thisBlock
364                (* Visit everything reachable first. *)
365                val () = List.app blockScan successors
366                
367                fun scanCode(instr, original as { code, decs, refs, sDecs, sRefs, occIn, occOut, loopRegs, ... }) =
368                let
369                    val { sources, dests, sStacks=stackSrcs, dStacks=stackDests, ccIn, ccOut, ... } =
370                        getInstructionState instr
371                    fun regNo(PReg i) = i
372                    and stackNo(StackLoc{rno, ...}) = rno
373                    val destRegNos = map regNo dests
374                    and sourceRegNos = map regNo sources
375                    val stackDestRegNos = map stackNo stackDests
376                    and stackSourceRegNos = map stackNo stackSrcs
377                    (* If this instruction requires a cc i.e. is SetToCondition or X87FPGetCondition we
378                       need to set this as a requirement earlier.  If this sets the CC and it is the condition
379                       we've been expecting we've satisfied it and can set the previous condition to Unused.
380                       We could use this to decide if a comparison is no longer required.  That can only
381                       happen in very specific circumstances e.g. some tests in Test176.ML so it's
382                       not worthwhile. *)
383                    val newInCC =
384                        case (ccIn, ccOut, occIn) of
385                            (cc as CCNeeded _, _, _) => cc (* This instr needs a particular cc. *)
386                        |   (CCUnused, CCSet _, _) => CCUnused
387                        |   (CCUnused, _, occIn) => occIn
388                    (* If this instruction modifies the CC check to see if it is setting an requirement. *)
389                    val _ =
390                        case (occIn, ccOut) of
391                            (CCNeeded ccRIn, CCSet ccRout) =>
392                                if ccRIn = ccRout then () else raise InternalError "CCCheck failed"
393                        |   (CCNeeded _, CCIndeterminate) => raise InternalError "CCCheck failed"
394                        |   _ => ()
395                    (* The output CC is the last CC set.  Tail instructions that don't change
396                       the CC state are ignored until we reach an instruction that sets it. *)
397                    val newOutCC = case occOut of CCUnchanged => ccOut | _ => occOut
398
399                    val instrLoopRegs =
400                        case instr of
401                            JumpLoop{regArgs, ...} => listToSet (map (regNo o #2) regArgs)
402                        |   _ => emptySet
403                in
404                    if eliminateable instr andalso
405                        List.all(fn dReg => Array.sub(regRefs, dReg) = 0) destRegNos
406                    then original (* Don't include this instruction. *)
407                    else
408                    let
409                        (* Only mark the sources as referred after we know we're going to need this.
410                           In that way we may eliminate the instruction that created this source. *)
411                        val () = List.app incrRef sourceRegNos
412                    in
413                        { code = instr :: code, decs = union(listToSet destRegNos, decs), refs = union(listToSet sourceRegNos, refs),
414                          sDecs = union(listToSet stackDestRegNos, sDecs), sRefs = union(listToSet stackSourceRegNos, sRefs),
415                          occIn = newInCC, occOut = newOutCC, loopRegs = union(loopRegs, instrLoopRegs)}
416                    end
417                end
418                
419                (* If we have a conditional branch at the end we need the condition code.  It should either
420                   be set here or in a preceding block. *)
421                val inCC = case flow of Conditional { ccRef, ...} => CCNeeded ccRef | _ => CCUnused
422
423                val { code, decs, refs, sDecs, sRefs, occIn, occOut, loopRegs, ... } =
424                    List.foldr scanCode
425                        {code=[], decs=emptySet, refs=emptySet, sDecs=emptySet, sRefs=emptySet, occIn=inCC, occOut=CCUnchanged, loopRegs=emptySet} block
426            in
427                Array.update(declarationArray, blockNo, decs);
428                (* refs includes local declarations. Remove before adding to the result. *)
429                Array.update(importArray, blockNo, minus(refs, decs));
430                Array.update(localLoopRegArray, blockNo, loopRegs);
431                Array.update(stackDecArray, blockNo, sDecs);
432                Array.update(stackImportArray, blockNo, minus(sRefs, sDecs));
433                Array.update(resultCode, blockNo, SOME code);
434                Array.update(ccInStates, blockNo, occIn);
435                Array.update(ccOutStates, blockNo, occOut)
436            end
437        in
438            val () = blockScan 0 (* Start with the root block. *)
439        end
440        
441        (* Second phase - Propagate reference information between the blocks.
442           We need to consider loops here.  Do a depth-first scan marking each
443           block.  If we find a loop we save the import information we've used.
444           If when we come to process that block we find the import information
445           is different we need to reprocess. *)
446        (* Pass through array - values used in other blocks after this that
447           are not declared in this block. *)
448        val passThroughArray = Array.array(vectorLength, emptySet)
449        val stackPassThroughArray = Array.array(vectorLength, emptySet)
450        (* Exports - those of our declarations that are used in other blocks. *)
451        val exportArray = Array.array(vectorLength, emptySet)
452        val stackExportArray = Array.array(vectorLength, emptySet)
453        (* Loop registers.  This contains the registers that are not exported
454           from or passed through this block but are used subsequently as
455           loop registers. *)
456        val loopRegArray = Array.array(vectorLength, emptySet)
457        val () = Array.copy{src=localLoopRegArray, dst=loopRegArray, di=0}
458        (* If any one of the successors requires the CC then this is set.
459           Otherwise we leave it as Unused. *)
460        val ccRequiredOut = Array.array(vectorLength, CCUnused)
461        local
462            datatype loopData =
463                Unprocessed | Processing | Processed
464            |   Looped of { regSet: intSet, loopSet: intSet, stackSet: intSet, ccState: inCCState }
465            
466            fun reprocessLoop () =
467            let
468                val reprocess = ref false
469                val loopArray = Array.array(vectorLength, Unprocessed)
470            
471                fun processBlocks blockNo =
472                    case Array.sub(loopArray, blockNo) of
473                        Processed => (* Already seen this by a different route. *)
474                            {
475                                regSet = union(Array.sub(passThroughArray, blockNo), Array.sub(importArray, blockNo)),
476                                stackSet = union(Array.sub(stackPassThroughArray, blockNo), Array.sub(stackImportArray, blockNo)),
477                                ccState = Array.sub(ccInStates, blockNo),
478                                loopSet = Array.sub(loopRegArray, blockNo)
479                            }
480                    |   Looped s => s (* We've already seen this in a loop. *)
481                    |   Processing => (* We have a loop. *)
482                        let
483                            (* Use the existing input array. *)
484                            val inputs =
485                            {
486                                regSet = union(Array.sub(passThroughArray, blockNo), Array.sub(importArray, blockNo)),
487                                stackSet = union(Array.sub(stackPassThroughArray, blockNo), Array.sub(stackImportArray, blockNo)),
488                                ccState = Array.sub(ccInStates, blockNo),
489                                loopSet = Array.sub(loopRegArray, blockNo)
490                            }
491                            val () = Array.update(loopArray, blockNo, Looped inputs)
492                        in
493                            inputs
494                        end
495                    |   Unprocessed => (* Normal case - not visited yet. *)
496                        let
497                            val () = Array.update(loopArray, blockNo, Processing)
498                            val thisBlock = Vector.sub(blockVector, blockNo)
499                            val ourDeclarations = Array.sub(declarationArray, blockNo)
500                            and ourStackDeclarations = Array.sub(stackDecArray, blockNo)
501                            and ourLocalLoopRegs = Array.sub(localLoopRegArray, blockNo)
502                            val successors = blockSuccessors thisBlock
503
504                            fun addSuccessor b =
505                            let
506                                val {regSet=theirImports, stackSet=theirStackImports, ccState=theirInState, loopSet=theirLoops} = processBlocks b
507                                (* Remove loop regs from the imports if they are actually given new
508                                   values by this block.  We don't want to pass the old loop regs through here. *)
509                                val theirImports = minus(theirImports, ourLocalLoopRegs)
510                                (* Split the imports.  If a register is a local declaration then
511                                   it becomes an export.  If it is not it becomes part of our
512                                   passThrough. *)
513                                val (addToExp, addToImp) =
514                                    INTSET.partition (fn i => member(i, ourDeclarations)) theirImports
515                                val (addToStackExp, addToStackImp) =
516                                    INTSET.partition (fn i => member(i, ourStackDeclarations)) theirStackImports
517                                (* Merge the input states from each of the successors. *)
518                                val () =
519                                    case (theirInState, Array.sub(ccRequiredOut, blockNo)) of
520                                        (CCNeeded ts, CCNeeded req) =>
521                                            if ts = req then () else raise InternalError "Mismatched states"
522                                    |   (ts as CCNeeded _, _) => Array.update(ccRequiredOut, blockNo, ts)
523                                    |   _ => ()
524                                (* Add loop registers to the set if they are not declared here.  The
525                                   only place they are declared is at the entry to the loop so that
526                                   stops them being propagated further. *)
527                                val addToLoops = minus(theirLoops, ourDeclarations)
528                            in
529                                Array.update(exportArray, blockNo,
530                                    union(Array.sub(exportArray, blockNo), addToExp));
531                                Array.update(passThroughArray, blockNo,
532                                    union(Array.sub(passThroughArray, blockNo), addToImp));
533                                Array.update(stackExportArray, blockNo,
534                                    union(Array.sub(stackExportArray, blockNo), addToStackExp));
535                                Array.update(stackPassThroughArray, blockNo,
536                                    union(Array.sub(stackPassThroughArray, blockNo), addToStackImp));
537                                Array.update(loopRegArray, blockNo,
538                                    union(Array.sub(loopRegArray, blockNo), addToLoops))
539                            end
540                            val () = List.app addSuccessor successors
541                            val ourInputs =
542                                union(Array.sub(passThroughArray, blockNo), Array.sub(importArray, blockNo))
543                            val ourStackInputs =
544                                union(Array.sub(stackPassThroughArray, blockNo), Array.sub(stackImportArray, blockNo))
545                        in
546                            (* Check that we supply the required state. *)
547                            case (Array.sub(ccRequiredOut, blockNo), Array.sub(ccOutStates, blockNo)) of
548                                (CCNeeded ccReq, CCSet ccSet) =>
549                                    if ccReq = ccSet then () else raise InternalError "Mismatched cc states"
550                            |   (CCNeeded _, CCIndeterminate) => raise InternalError "Mismatched cc states"
551                            |   (cc as CCNeeded needOut, CCUnchanged) =>
552                                (
553                                    (* We pass through the state.  If we don't use the state then we
554                                       need to set this as the input.  If we do use the state it must be
555                                       the same. *)
556                                    case Array.sub(ccInStates, blockNo) of
557                                        CCUnused => Array.update(ccInStates, blockNo, cc)
558                                    |   CCNeeded needIn =>
559                                            if needOut = needIn then () else raise InternalError "Mismatched cc states"
560                                )
561                            |   _ => ();
562                            (* Was this block used in a loop?  If so we should not be requiring a CC. *)
563                            case Array.sub(loopArray, blockNo) of
564                                Looped {regSet, stackSet, ...} =>
565                                (
566                                    case Array.sub(ccInStates, blockNo) of
567                                        CCNeeded _ => raise InternalError "Looped state needs cc" | _ => ();
568                                    if setToList regSet = setToList ourInputs andalso
569                                        setToList stackSet = setToList ourStackInputs
570                                    then ()
571                                    else reprocess := true
572                                )
573                            |   _ => ();
574                            Array.update(loopArray, blockNo, Processed);
575                            { regSet = ourInputs, stackSet = ourStackInputs,
576                              ccState = Array.sub(ccInStates, blockNo), loopSet=Array.sub(loopRegArray, blockNo)}
577                        end
578            in
579                reprocess := false;
580                processBlocks 0;
581                if !reprocess then reprocessLoop () else ()
582            end
583        in
584            val () = reprocessLoop ()
585        end
586        
587        (* Third pass - Build the result list with the active registers for each
588           instruction.  We don't include registers in the passThrough set since
589           they are active throughout the block. *)
590        local
591            (* Number of instrs for which this is active.  We use this to try to select a
592               register to push to the stack if we have too many.  Registers that have
593               only a short lifetime are less likely to be pushed than those that are
594               active longer. *)
595            val regActive = Array.array(maxPRegs, 0)
596            fun addActivity n r = Array.update(regActive, r, Array.sub(regActive, r)+n)
597            
598            fun createResultInstrs (passThrough, stackPassThrough)
599                (instr, (tail, activeAfterThis, stackActiveAfterThis)) =
600            let
601                val { sources, dests, sStacks=stackSrcs, dStacks=stackDests, ... } = getInstructionState instr
602            in
603                (* Eliminate instructions if their results are not required.  The earlier check for this
604                   will remove most cases but if we have duplicated a block we may have a register that
605                   is required elsewhere but not in this particular branch.  *)
606                if not(List.exists(fn PReg d => member(d, activeAfterThis)) dests) andalso eliminateable instr
607                then (tail, activeAfterThis, stackActiveAfterThis)
608                else
609                let
610                    fun regNo(PReg i) = i
611                    fun stackNo(StackLoc{rno, ...}) = rno
612                    val destRegNos = map regNo dests
613                    and sourceRegNos = map regNo sources
614                    val destSet = listToSet destRegNos
615                    (* Remove any sources that are present in passThrough since
616                       they are going to be active throughout the block. *)
617                    and sourceSet = minus(listToSet sourceRegNos, passThrough)
618                    val stackDestRegNos = map stackNo stackDests
619                    and stackSourceRegNos = map stackNo stackSrcs
620                    val stackDestSet = listToSet stackDestRegNos
621                    and stackSourceSet = minus(listToSet stackSourceRegNos, stackPassThrough)
622
623                    (* To compute the active set for the PREVIOUS instruction (we're processing from the
624                       end back to the start) we remove any registers that have been given values in this
625                       instruction and add anything that we are using in this instruction since they will
626                       now need to have values. *)
627                    val afterRemoveDests = minus(activeAfterThis, destSet)
628                    val stackAfterRemoveDests = minus(stackActiveAfterThis, stackDestSet)
629                    val activeForPrevious = union(sourceSet, afterRemoveDests)
630                    val stackActiveForPrevious = union(stackSourceSet, stackAfterRemoveDests)
631            
632                    (* The "active" set is the set of registers that need to be active DURING the
633                       instruction.  It includes destinations, which will usually be in
634                       "activeAfterThis", because there may be destinations that are not actually used
635                       subsequently but still need a register.  That will also include work registers.
636                       Usually sources aren't included if this is the last use but the
637                       AllocateMemoryVariable "instruction" can't set the size after the memory is
638                       allocated so the active set includes the source(s). *)
639                    val activeForInstr =
640                        case instr of
641                            FunctionCall _ => sourceSet (* Is this still needed? *)
642                        |   TailRecursiveCall _ =>
643                                (* Set the active set to the total set of registers we require including
644                                   the work register.  This ensures that we will spill as many registers
645                                   as we require when we look at the size of the active set. *)
646                                union(sourceSet, destSet)
647                        |   AllocateMemoryVariable _ => (* We can only set the size after the memory is allocated. *)
648                                union(activeAfterThis, union(sourceSet, destSet))
649                        |   BoxValue _ => (* We can only store the value in the box after the box is allocated. *)
650                                union(activeAfterThis, union(sourceSet, destSet))
651                        |   _ => union(activeAfterThis, destSet)
652                
653                    val () = List.app(addActivity 1) (setToList activeForInstr)
654
655                    local
656                        (* If we are allocating memory we have to save the current registers if
657                           they could contain an address.  We mustn't push untagged registers
658                           and we mustn't push the destination. *)
659                        fun getSaveSet dReg =
660                        let
661                            val activeAfter = union(activeAfterThis, passThrough)
662                            (* Remove any registers marked - must-not-push.  These are
663                               registers holding non-address values.  They will actually
664                               be saved by the RTS across any GC but not checked or
665                               modified by the GC.
666                               Exclude the result register. *)
667                            fun getSave i =
668                                if i = dReg
669                                then NONE
670                                else case Vector.sub(pregProps, i) of
671                                    RegPropGeneral => SOME(PReg i)
672                                |   RegPropCacheTagged => SOME(PReg i)
673                                |   RegPropUntagged => NONE
674                                |   RegPropStack _ => NONE
675                                |   RegPropCacheUntagged => NONE
676                                |   RegPropMultiple => raise InternalError "getSave: RegPropMultiple"
677                        in
678                            List.mapPartial getSave (setToList activeAfter)
679                        end
680                    in
681                        (* Sometimes we need to modify the instruction e.g. to include the set
682                           of registers to save. *)
683                        val convertedInstr =
684                            case instr of
685                                AllocateMemoryOperation{size, flags, dest, saveRegs=_} =>
686                                    AllocateMemoryOperation{size=size, flags=flags, dest=dest,
687                                        saveRegs=getSaveSet(regNo dest)}
688
689                            |   AllocateMemoryVariable{size, dest, saveRegs=_} =>
690                                    AllocateMemoryVariable{size=size, dest=dest, saveRegs=getSaveSet(regNo dest)}
691
692                            |   BoxValue{source, dest, boxKind, saveRegs=_} =>
693                                    BoxValue{source=source, dest=dest, boxKind=boxKind,
694                                        saveRegs=getSaveSet(regNo dest)}
695                        
696                            |   JumpLoop{regArgs, stackArgs, checkInterrupt = SOME _, workReg, ...} =>
697                                let
698                                    (* If we have to check for interrupts we must preserve registers across
699                                       the RTS call. *)
700                                    fun getSave i =
701                                        case Vector.sub(pregProps, i) of
702                                        RegPropGeneral => SOME(PReg i)
703                                    |   RegPropCacheTagged => SOME(PReg i)
704                                    |   RegPropUntagged => NONE
705                                    |   RegPropStack _ => NONE
706                                    |   RegPropCacheUntagged => NONE
707                                    |   RegPropMultiple => raise InternalError "getSave: RegPropMultiple"
708                                    val currentRegs = union(activeAfterThis, passThrough)
709                                    (* Have to include the loop registers.  These were previously included
710                                       automatically because they were part of the import set. *)
711                                    val check = List.mapPartial getSave (map (regNo o #2) regArgs @ setToList currentRegs)
712                                in
713                                    JumpLoop{regArgs=regArgs, stackArgs=stackArgs, checkInterrupt=SOME check, workReg=workReg}
714                                end
715                        
716                            |   FunctionCall{regArgs, stackArgs=[], dest, realDest, callKind as ConstantCode m, saveRegs=_} =>
717                                (* If this is arbitrary precision push the registers rather than marking them as "save".
718                                   stringOfWord returns 'CODE "PolyAddArbitrary"' etc. *)
719                                if (String.isSubstring "Arbitrary\"" (Address.stringOfWord m))
720                                then FunctionCall{regArgs=regArgs, stackArgs=[], callKind=callKind, dest=dest,
721                                        realDest=realDest, saveRegs=getSaveSet(regNo dest) }
722                                else instr
723                        
724                            |   instr as LoadArgument{dest=PReg dreg, ...} =>
725                                (
726                                    if member(dreg, activeAfterThis)
727                                    then ()
728                                    else print("Register " ^ Int.toString dreg ^ " inactive-" ^ PolyML.makestring instr ^ "\n");
729                                    instr
730                                )
731
732                            |   _ => instr
733                    end
734                
735                    (* FunctionCall must mark all registers as "push". *)
736                    local
737                        fun pushRegisters () =
738                        let
739                            val activeAfter = union(activeAfterThis, passThrough)
740                            fun pushAllButDests i =
741                                if List.exists(fn j => i=j) destRegNos
742                                then ()
743                                else case Vector.sub(pregProps, i) of
744                                    RegPropCacheTagged => raise InternalError "pushRegisters: cache reg"
745                                |   RegPropCacheUntagged => raise InternalError "pushRegisters: cache reg"
746                                |  _ => Array.update(requirePushOrDiscard, i, true)
747                        in
748                            (* We need to push everything active after this
749                               except the result register. *)
750                            List.app pushAllButDests (setToList activeAfter)
751                        end
752                    in
753                        val () =
754                            case instr of
755                                FunctionCall{ stackArgs=[], callKind=ConstantCode m, ...} =>
756                                if (String.isSubstring "Arbitrary\"" (Address.stringOfWord m))
757                                then ()
758                                else pushRegisters ()
759                            
760                            |   FunctionCall _ => pushRegisters ()
761                            
762                                (* It should no longer be necessary to push across a handler but
763                                   there still seem to be cases that need it. *)
764                            |   BeginHandler _ => pushRegisters ()
765                        
766                            |   CopyToCache {source=PReg srcReg, dest=PReg dstReg, ...} =>
767                                (* If the source is a cache register marked as "must push" i.e. discard,
768                                   the destination must also be discarded i.e. not available. 
769                                   Note: the source could be a non-cache register marked for pushing. *)
770                                (
771                                    case (Vector.sub(pregProps, srcReg), Array.sub(requirePushOrDiscard, srcReg)) of
772                                        (RegPropCacheTagged, true) => Array.update(requirePushOrDiscard, dstReg, true)
773                                    |   (RegPropCacheUntagged, true) => Array.update(requirePushOrDiscard, dstReg, true)
774                                    |   _ => ()
775                                )
776
777                            |   _ => ()
778                    end
779                
780                    (* Which entries are active in this instruction but not afterwards? *)
781                    val kill = union(minus(stackSourceSet, stackActiveAfterThis), minus(sourceSet, activeAfterThis))
782                in
783                    ({instr=convertedInstr, active=activeForInstr, current=activeAfterThis, kill=kill} :: tail, activeForPrevious,
784                     stackActiveForPrevious)
785                end
786            end
787
788            fun createResult blockNo =
789            let
790                val BasicBlock{ flow, ...} = Vector.sub(blockVector, blockNo)
791                val declSet   = Array.sub(declarationArray, blockNo)
792                and importSet = Array.sub(importArray, blockNo)
793                and passSet   = Array.sub(passThroughArray, blockNo)
794                and loopSet   = Array.sub(loopRegArray, blockNo)
795                and exportSet = Array.sub(exportArray, blockNo)
796                and stackPassSet = Array.sub(stackPassThroughArray, blockNo)
797                and stackImportSet = Array.sub(stackImportArray, blockNo)
798                and stackExportSet = Array.sub(stackExportArray, blockNo)
799                val filteredCode = getOpt(Array.sub(resultCode, blockNo), [])
800                (* At the end of the block we should have the exports active. *)
801                val (resultInstrs, _, _) = List.foldr (createResultInstrs (passSet, stackPassSet)) ([], exportSet, stackExportSet) filteredCode
802                (* Set the active count for the pass through. *)
803                val instrCount = List.length filteredCode
804                val () = List.app(addActivity instrCount) (setToList passSet)
805                val inCCState =
806                    case Array.sub(ccInStates, blockNo) of CCNeeded s => SOME s | CCUnused => NONE
807                val outCCState =
808                    case Array.sub(ccRequiredOut, blockNo) of CCNeeded s => SOME s | CCUnused => NONE
809            in
810                ExtendedBasicBlock {
811                    block = resultInstrs,
812                    flow=flow,
813                    locals = minus(declSet, exportSet),
814                    imports = importSet,
815                    exports = exportSet,
816                    passThrough = passSet,
817                    loopRegs = loopSet,
818                    initialStacks = union(stackPassSet, stackImportSet),
819                    inCCState = inCCState,
820                    outCCState = outCCState
821                }
822            end
823        in
824            val resultBlocks = Vector.tabulate(vectorLength, createResult)
825            val regActive = regActive
826        end
827        
828        val registerState: regState vector =
829            Vector.tabulate(maxPRegs,
830                fn i => {
831                    active = Array.sub(regActive, i),
832                    refs = Array.sub(regRefs, i),
833                    pushState = Array.sub(requirePushOrDiscard, i),
834                    prop = Vector.sub(pregProps, i)
835                }
836            )
837    in
838        (resultBlocks, registerState)
839    end
840
841    (* Exported function.  First filter out unreferenced blocks then process the
842       registers themselves. *)
843    fun identifyRegisters(blockVector, pregProps) =
844    let
845        val vectorLength = Vector.length blockVector
846        val mapArray = Array.array(vectorLength, NONE)
847        and resArray = Array.array(vectorLength, NONE)
848        val count = ref 0
849        
850        fun setReferences label =
851            case Array.sub(mapArray, label) of
852                NONE => (* Not yet visited *)
853                let
854                    val BasicBlock{flow, block} = Vector.sub(blockVector, label)
855                    (* Create a new entry for it. *)
856                    val newLabel = ! count before count := !count + 1
857                    (* Add it to the map.  Any other references will use this
858                       without reprocessing. *)
859                    val () = Array.update(mapArray, label, SOME newLabel)
860                    val newFlow =
861                        case flow of
862                            Unconditional l => Unconditional(setReferences l)
863                        |   Conditional{trueJump, falseJump, ccRef, condition} =>
864                                Conditional{trueJump=setReferences trueJump, falseJump=setReferences falseJump,
865                                        ccRef=ccRef, condition=condition}
866                        |   ExitCode => ExitCode
867                        |   IndexedBr list => IndexedBr(map setReferences list)
868                        |   SetHandler{handler, continue} =>
869                                SetHandler{handler=setReferences handler, continue=setReferences continue}
870                        |   UnconditionalHandle l => UnconditionalHandle(setReferences l)
871                        |   ConditionalHandle{handler, continue} =>
872                                ConditionalHandle{handler=setReferences handler, continue=setReferences continue}
873
874                    val () = Array.update(resArray, newLabel, SOME(BasicBlock{flow=newFlow, block=block}))
875                in
876                    newLabel
877                end
878            |   SOME lab => lab
879       
880        val _ = setReferences 0
881        
882        val newBlockVector =
883            Vector.tabulate(!count, fn i => valOf(Array.sub(resArray, i)))
884    in
885        identifyRegs(newBlockVector, pregProps)
886    end
887
888    (* Exported for use in GetConflictSets *)
889    fun getInstructionRegisters instr =
890    let
891        val {sources, dests, ...} = getInstructionState instr
892    in
893        {sources=sources, dests=dests}
894    end
895    
896    (* Exported for use in ICodeOptimise *)
897    val getInstructionCC = #ccOut o getInstructionState
898
899    structure Sharing =
900    struct
901        type x86ICode           = x86ICode
902        and reg                 = reg
903        and preg                = preg
904        and intSet              = intSet
905        and basicBlock          = basicBlock
906        and extendedBasicBlock  = extendedBasicBlock
907        and controlFlow         = controlFlow
908        and argument            = argument
909        and memoryIndex         = memoryIndex
910        and regProperty         = regProperty
911        and ccRef               = ccRef
912        and outCCState          = outCCState
913    end
914end;
915