1(* 2 Copyright (c) 2016-20 David C.J. Matthews 3 4 This library is free software; you can redistribute it and/or 5 modify it under the terms of the GNU Lesser General Public 6 License version 2.1 as published by the Free Software Foundation. 7 8 This library is distributed in the hope that it will be useful, 9 but WITHOUT ANY WARRANTY; without even the implied warranty of 10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 Lesser General Public License for more details. 12 13 You should have received a copy of the GNU Lesser General Public 14 License along with this library; if not, write to the Free Software 15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16*) 17 18functor X86ICodeIdentifyReferences( 19 structure ICODE: ICodeSig 20 structure DEBUG: DEBUG 21 structure INTSET: INTSETSIG 22): X86IDENTIFYREFSSIG = 23struct 24 open ICODE 25 open INTSET 26 27 type regState = 28 { 29 active: int, refs: int, pushState: bool, prop: regProperty 30 } 31 32 (* CC states before and after. The instruction may use the CC or ignore it. The only 33 instructions to use the CC is X87FPGetCondition. Conditional branches 34 are handled at the block level. 35 The result of executing the instruction may be to set the condition code to a 36 defined state, an undefined state or leave it unchanged. 37 N.B. Some "instructions" may involve a stack reset that could affect the CC. *) 38 datatype outCCState = CCSet of ccRef | CCIndeterminate | CCUnchanged 39 and inCCState = CCNeeded of ccRef | CCUnused 40 41 datatype extendedBasicBlock = 42 ExtendedBasicBlock of 43 { 44 block: {instr: x86ICode, current: intSet, active: intSet, kill: intSet } list, 45 flow: controlFlow, 46 locals: intSet, (* Defined and used entirely within the block. *) 47 imports: intSet, (* Defined outside the block, used inside it, but not needed afterwards. *) 48 exports: intSet, (* Defined within the block, possibly used inside, but used outside. *) 49 passThrough: intSet, (* Active throughout the block. May be referred to by it but needed afterwards. *) 50 loopRegs: intSet, (* Destination registers for a loop. They will be updated by this block. *) 51 initialStacks: intSet, (* Stack items required at the start i.e. imports+passThrough for stack items. *) 52 inCCState: ccRef option, (* The state this block assumes. If SOME _ all predecessors must set it. *) 53 outCCState: ccRef option (* The condition code set by this block. SOME _ if at least one successor needs it. *) 54 } 55 56 exception InternalError = Misc.InternalError 57 58 (* Return the list of blocks that are the immediate successor of this. *) 59 fun blockSuccessors(BasicBlock{flow, ...}) = successorBlocks flow 60 61 (* Find the registers from an argument. *) 62 fun argRegs(RegisterArgument rarg) = [rarg] 63 | argRegs(MemoryLocation { base, index, cache=SOME cr, ...}) = cr :: base :: argIndex index 64 | argRegs(MemoryLocation { base, index, cache=NONE, ...}) = base :: argIndex index 65 | argRegs(StackLocation { cache=SOME rarg, ...}) = [rarg] 66 | argRegs _ = [] 67 68 and argIndex NoMemIndex = [] 69 | argIndex(MemIndex1 arg) = [arg] 70 | argIndex(MemIndex2 arg) = [arg] 71 | argIndex(MemIndex4 arg) = [arg] 72 | argIndex(MemIndex8 arg) = [arg] 73 | argIndex ObjectIndex = [] 74 75 fun argStacks(StackLocation { container, ...}) = [container] 76 | argStacks(ContainerAddr { container, ...}) = [container] 77 | argStacks _ = [] 78 79 (* Return the set of registers used by the instruction. 80 sources are registers that must have values after the instruction. 81 dests are registers that are given values or modified by the instruction. *) 82 fun getInstructionState(LoadArgument { source, dest, ...}) = 83 { sources=argRegs source, dests=[dest], sStacks=argStacks source, dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } 84 85 | getInstructionState(StoreArgument{ source, base, index, ...}) = 86 { sources=argRegs source @ [base] @ argIndex index, dests=[], sStacks=argStacks source, dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } 87 88 | getInstructionState(LoadMemReg { dest, ...}) = 89 { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } 90 91 | getInstructionState(StoreMemReg { source, ...}) = 92 { sources=[source], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } 93 94 | getInstructionState(BeginFunction {regArgs, stackArgs, ...}) = 95 { sources=[], dests=map #1 regArgs, sStacks=[], dStacks=stackArgs, ccIn=CCUnused, ccOut=CCIndeterminate } 96 97 | getInstructionState(FunctionCall{regArgs, stackArgs, dest, ...}) = 98 let 99 (* Non-tail-recursive. Behaves as a normal reference to sources. *) 100 fun getSources argSource = 101 let 102 val stackSources = List.foldl(fn (arg, srcs) => argSource arg @ srcs) [] stackArgs 103 fun regSource((arg, _), srcs) = argSource arg @ srcs 104 in 105 List.foldl regSource stackSources regArgs 106 end 107 in 108 { sources=getSources argRegs, dests=[dest], sStacks=getSources argStacks, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 109 end 110 111 | getInstructionState(TailRecursiveCall{regArgs, stackArgs, workReg, ...}) = 112 let 113 (* Tail recursive call. References the argument sources but exits. *) 114 fun getSources argSource = 115 let 116 val stackSources = List.foldl(fn ({src, ...}, srcs) => argSource src @ srcs) [] stackArgs 117 fun regSource((arg, _), srcs) = argSource arg @ srcs 118 in 119 List.foldl regSource stackSources regArgs 120 end 121 in 122 { sources=getSources argRegs, dests=[workReg], sStacks=getSources argStacks, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 123 end 124 125 | getInstructionState(AllocateMemoryOperation{dest, ...}) = 126 { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 127 128 | getInstructionState(AllocateMemoryVariable{size, dest, ...}) = 129 { sources=[size], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 130 131 | getInstructionState(InitialiseMem{size, addr, init}) = 132 { sources=[size, addr, init], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } 133 134 | getInstructionState(InitialisationComplete) = 135 (* This is just a marker. It doesn't actually generate any code. *) 136 { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 137 138 | getInstructionState(BeginLoop) = 139 (* This is just a marker. It doesn't actually generate any code. *) 140 { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 141 142 | getInstructionState(JumpLoop{regArgs, stackArgs, workReg, ...}) = 143 let 144 fun getSources argSource = 145 let 146 val regSourceAsRegs = 147 List.foldl(fn ((source, _), srcs) => argSource source @ srcs) [] regArgs 148 in 149 List.foldl(fn ((source, _, _), srcs) => argSource source @ srcs) regSourceAsRegs stackArgs 150 end 151 val dests = case workReg of SOME r => [r] | NONE => [] 152 in 153 { sources=getSources argRegs, dests=dests, sStacks=getSources argStacks, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 154 end 155 156 | getInstructionState(RaiseExceptionPacket{packetReg}) = 157 { sources=[packetReg], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 158 159 | getInstructionState(ReserveContainer{container, ...}) = 160 { sources=[], dests=[], sStacks=[], dStacks=[container], ccIn=CCUnused, ccOut=CCUnchanged } 161 162 | getInstructionState(IndexedCaseOperation{testReg, workReg, ...}) = 163 { sources=[testReg], dests=[workReg], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 164 165 | getInstructionState(LockMutable{addr}) = 166 { sources=[addr], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 167 168 | getInstructionState(WordComparison{arg1, arg2, ccRef, ...}) = 169 { sources=arg1 :: argRegs arg2, dests=[], sStacks=argStacks arg2, dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef } 170 171 | getInstructionState(CompareLiteral{arg1, ccRef, ...}) = 172 { sources=argRegs arg1, dests=[], sStacks=argStacks arg1, dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef } 173 174 | getInstructionState(CompareByteMem{arg1={base, index, ...}, ccRef, ...}) = 175 { sources=base :: argIndex index, dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef } 176 177 | getInstructionState(PushExceptionHandler{workReg, ...}) = 178 { sources=[], dests=[workReg], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 179 180 | getInstructionState(PopExceptionHandler{ workReg }) = 181 { sources=[], dests=[workReg], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 182 183 | getInstructionState(BeginHandler{ workReg, packetReg, ...}) = 184 { sources=[], dests=[packetReg, workReg], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 185 186 | getInstructionState(ReturnResultFromFunction{resultReg, ...}) = 187 { sources=[resultReg], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 188 189 | getInstructionState(ArithmeticFunction{resultReg, operand1, operand2, ccRef, ...}) = 190 { sources=operand1 :: argRegs operand2, dests=[resultReg], 191 sStacks=argStacks operand2, dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef } 192 193 | getInstructionState(TestTagBit{arg, ccRef, ...}) = 194 { sources=argRegs arg, dests=[], sStacks=argStacks arg, dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef } 195 196 | getInstructionState(PushValue {arg, container, ...}) = 197 { sources=argRegs arg, dests=[], sStacks=argStacks arg, dStacks=[container], ccIn=CCUnused, ccOut=CCUnchanged } 198 199 | getInstructionState(CopyToCache{source, dest, ...}) = 200 { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged} 201 202 | getInstructionState(ResetStackPtr{preserveCC, ...}) = 203 { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, 204 ccOut=if preserveCC then CCUnchanged else CCIndeterminate } 205 206 | getInstructionState(StoreToStack {source, container, ...}) = 207 (* Although this stores into the container it must already exist. *) 208 { sources=argRegs source, dests=[], sStacks=container :: argStacks source, dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } 209 210 | getInstructionState(TagValue{source, dest, ...}) = 211 { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } 212 213 | getInstructionState(UntagValue{source, dest, cache, ...}) = 214 { sources=case cache of NONE => [source] | SOME cr => [cr, source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 215 216 | getInstructionState(LoadEffectiveAddress{base, index, dest, ...}) = 217 let 218 val bRegs = 219 case base of SOME bReg => [bReg] | _ => [] 220 val iRegs = argIndex index 221 in 222 { sources=bRegs @ iRegs, dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } 223 end 224 225 | getInstructionState(ShiftOperation{resultReg, operand, shiftAmount, ccRef, ...}) = 226 { sources=operand :: argRegs shiftAmount, dests=[resultReg], 227 sStacks=argStacks shiftAmount, dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef } 228 229 | getInstructionState(Multiplication{resultReg, operand1, operand2, ccRef, ...}) = 230 { sources=operand1 :: argRegs operand2, dests=[resultReg], 231 sStacks=argStacks operand2, dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef } 232 233 | getInstructionState(Division{dividend, divisor, quotient, remainder, ...}) = 234 { sources=dividend :: argRegs divisor, dests=[quotient, remainder], 235 sStacks=argStacks divisor, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 236 237 | getInstructionState(AtomicExchangeAndAdd{base, source}) = 238 { sources=[base, source], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 239 240 | getInstructionState(BoxValue{source, dest, ...}) = 241 { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 242 243 | getInstructionState(CompareByteVectors{vec1Addr, vec2Addr, length, ccRef, ...}) = 244 { sources=[vec1Addr, vec2Addr, length], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef } 245 246 | getInstructionState(BlockMove{srcAddr, destAddr, length, ...}) = 247 { sources=[srcAddr, destAddr, length], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 248 249 | getInstructionState(X87Compare{arg1, arg2, ccRef, ...}) = 250 { sources=arg1 :: argRegs arg2, dests=[], sStacks=argStacks arg2, 251 dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef } 252 253 | getInstructionState(SSE2Compare{arg1, arg2, ccRef, ...}) = 254 { sources=arg1 :: argRegs arg2, dests=[], sStacks=argStacks arg2, 255 dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef } 256 257 | getInstructionState(X87FPGetCondition{dest, ccRef, ...}) = 258 { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCNeeded ccRef, ccOut=CCIndeterminate } 259 260 | getInstructionState(X87FPArith{resultReg, arg1, arg2, ...}) = 261 { sources=arg1 :: argRegs arg2, dests=[resultReg], 262 sStacks=argStacks arg2, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 263 264 | getInstructionState(X87FPUnaryOps{dest, source, ...}) = 265 { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 266 267 | getInstructionState(X87Float{dest, source}) = 268 { sources=argRegs source, dests=[dest], sStacks=argStacks source, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 269 270 | getInstructionState(SSE2Float{dest, source}) = 271 { sources=argRegs source, dests=[dest], sStacks=argStacks source, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 272 273 | getInstructionState(SSE2FPUnary{resultReg, source, ...}) = 274 { sources=argRegs source, dests=[resultReg], 275 sStacks=argStacks source, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 276 277 | getInstructionState(SSE2FPBinary{resultReg, arg1, arg2, ...}) = 278 { sources=arg1 :: argRegs arg2, dests=[resultReg], 279 sStacks=argStacks arg2, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 280 281 | getInstructionState(TagFloat{source, dest, ...}) = 282 { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 283 284 | getInstructionState(UntagFloat{source, dest, cache, ...}) = 285 { sources=case cache of NONE => argRegs source | SOME cr => cr :: argRegs source, dests=[dest], 286 sStacks=argStacks source, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 287 288 | getInstructionState(GetSSE2ControlReg{dest}) = 289 { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 290 291 | getInstructionState(SetSSE2ControlReg{source}) = 292 { sources=[source], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 293 294 | getInstructionState(GetX87ControlReg{dest}) = 295 { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 296 297 | getInstructionState(SetX87ControlReg{source}) = 298 { sources=[source], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 299 300 | getInstructionState(X87RealToInt{ source, dest }) = 301 { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 302 303 | getInstructionState(SSE2RealToInt{ source, dest, ... }) = 304 { sources=argRegs source, dests=[dest], sStacks=argStacks source, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } 305 306 | getInstructionState(SignExtend32To64{ source, dest }) = 307 { sources=argRegs source, dests=[dest], sStacks=argStacks source, dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } 308 309 | getInstructionState(TouchArgument{source}) = 310 { sources=[source], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } 311 312 (* These instructions can be eliminated if their register sources are not used. 313 There may be other cases. *) 314 fun eliminateable(LoadArgument _) = true 315 | eliminateable(TagValue _) = true 316 | eliminateable(UntagValue _) = true 317 | eliminateable(LoadEffectiveAddress _) = true 318 | eliminateable(BoxValue _) = true 319 | eliminateable(CopyToCache _) = true 320 | eliminateable(LoadMemReg _) = true 321 | eliminateable _ = false 322 323 fun identifyRegs(blockVector, pregProps): extendedBasicBlock vector * regState vector = 324 let 325 val maxPRegs = Vector.length pregProps 326 val vectorLength = Vector.length blockVector 327 (* Initial arrays - declarationArray is the set of registers given 328 values by the block, importArray is the set of registers referenced by 329 the block and not declared locally. *) 330 val declarationArray = Array.array(vectorLength, emptySet) 331 and importArray = Array.array(vectorLength, emptySet) 332 val stackDecArray = Array.array(vectorLength, emptySet) 333 and stackImportArray = Array.array(vectorLength, emptySet) 334 and localLoopRegArray = Array.array(vectorLength, emptySet) 335 336 (* References - this is used locally to see if a register is ever 337 actually used and also included in the result which uses it as 338 part of the choice of which register to spill. *) 339 val regRefs = Array.array(maxPRegs, 0) 340 (* Registers that must be pushed because they are required after 341 a function call. For cache registers this means "discard". *) 342 and requirePushOrDiscard = Array.array(maxPRegs, false) 343 344 fun incrRef r = Array.update(regRefs, r, Array.sub(regRefs, r)+1) 345 346 (* Contains the, possibly filtered, code for each block. *) 347 val resultCode = Array.array(vectorLength, NONE) 348 349 val ccInStates = Array.array(vectorLength, CCUnused) 350 and ccOutStates = Array.array(vectorLength, CCIndeterminate) 351 352 (* First pass - for each block build up the sets of registers defined and 353 used in the block. We do this depth-first so that we can use "refs" to 354 see if a register is used. If this is an instruction that can be eliminated 355 we don't need to generate it and can ignore any references it makes. *) 356 local 357 fun blockScan blockNo = 358 if isSome(Array.sub(resultCode, blockNo)) then () 359 else 360 let 361 val () = Array.update(resultCode, blockNo, SOME []) (* Prevent looping. *) 362 val thisBlock as BasicBlock { block, flow, ...} = Vector.sub(blockVector, blockNo) 363 val successors = blockSuccessors thisBlock 364 (* Visit everything reachable first. *) 365 val () = List.app blockScan successors 366 367 fun scanCode(instr, original as { code, decs, refs, sDecs, sRefs, occIn, occOut, loopRegs, ... }) = 368 let 369 val { sources, dests, sStacks=stackSrcs, dStacks=stackDests, ccIn, ccOut, ... } = 370 getInstructionState instr 371 fun regNo(PReg i) = i 372 and stackNo(StackLoc{rno, ...}) = rno 373 val destRegNos = map regNo dests 374 and sourceRegNos = map regNo sources 375 val stackDestRegNos = map stackNo stackDests 376 and stackSourceRegNos = map stackNo stackSrcs 377 (* If this instruction requires a cc i.e. is SetToCondition or X87FPGetCondition we 378 need to set this as a requirement earlier. If this sets the CC and it is the condition 379 we've been expecting we've satisfied it and can set the previous condition to Unused. 380 We could use this to decide if a comparison is no longer required. That can only 381 happen in very specific circumstances e.g. some tests in Test176.ML so it's 382 not worthwhile. *) 383 val newInCC = 384 case (ccIn, ccOut, occIn) of 385 (cc as CCNeeded _, _, _) => cc (* This instr needs a particular cc. *) 386 | (CCUnused, CCSet _, _) => CCUnused 387 | (CCUnused, _, occIn) => occIn 388 (* If this instruction modifies the CC check to see if it is setting an requirement. *) 389 val _ = 390 case (occIn, ccOut) of 391 (CCNeeded ccRIn, CCSet ccRout) => 392 if ccRIn = ccRout then () else raise InternalError "CCCheck failed" 393 | (CCNeeded _, CCIndeterminate) => raise InternalError "CCCheck failed" 394 | _ => () 395 (* The output CC is the last CC set. Tail instructions that don't change 396 the CC state are ignored until we reach an instruction that sets it. *) 397 val newOutCC = case occOut of CCUnchanged => ccOut | _ => occOut 398 399 val instrLoopRegs = 400 case instr of 401 JumpLoop{regArgs, ...} => listToSet (map (regNo o #2) regArgs) 402 | _ => emptySet 403 in 404 if eliminateable instr andalso 405 List.all(fn dReg => Array.sub(regRefs, dReg) = 0) destRegNos 406 then original (* Don't include this instruction. *) 407 else 408 let 409 (* Only mark the sources as referred after we know we're going to need this. 410 In that way we may eliminate the instruction that created this source. *) 411 val () = List.app incrRef sourceRegNos 412 in 413 { code = instr :: code, decs = union(listToSet destRegNos, decs), refs = union(listToSet sourceRegNos, refs), 414 sDecs = union(listToSet stackDestRegNos, sDecs), sRefs = union(listToSet stackSourceRegNos, sRefs), 415 occIn = newInCC, occOut = newOutCC, loopRegs = union(loopRegs, instrLoopRegs)} 416 end 417 end 418 419 (* If we have a conditional branch at the end we need the condition code. It should either 420 be set here or in a preceding block. *) 421 val inCC = case flow of Conditional { ccRef, ...} => CCNeeded ccRef | _ => CCUnused 422 423 val { code, decs, refs, sDecs, sRefs, occIn, occOut, loopRegs, ... } = 424 List.foldr scanCode 425 {code=[], decs=emptySet, refs=emptySet, sDecs=emptySet, sRefs=emptySet, occIn=inCC, occOut=CCUnchanged, loopRegs=emptySet} block 426 in 427 Array.update(declarationArray, blockNo, decs); 428 (* refs includes local declarations. Remove before adding to the result. *) 429 Array.update(importArray, blockNo, minus(refs, decs)); 430 Array.update(localLoopRegArray, blockNo, loopRegs); 431 Array.update(stackDecArray, blockNo, sDecs); 432 Array.update(stackImportArray, blockNo, minus(sRefs, sDecs)); 433 Array.update(resultCode, blockNo, SOME code); 434 Array.update(ccInStates, blockNo, occIn); 435 Array.update(ccOutStates, blockNo, occOut) 436 end 437 in 438 val () = blockScan 0 (* Start with the root block. *) 439 end 440 441 (* Second phase - Propagate reference information between the blocks. 442 We need to consider loops here. Do a depth-first scan marking each 443 block. If we find a loop we save the import information we've used. 444 If when we come to process that block we find the import information 445 is different we need to reprocess. *) 446 (* Pass through array - values used in other blocks after this that 447 are not declared in this block. *) 448 val passThroughArray = Array.array(vectorLength, emptySet) 449 val stackPassThroughArray = Array.array(vectorLength, emptySet) 450 (* Exports - those of our declarations that are used in other blocks. *) 451 val exportArray = Array.array(vectorLength, emptySet) 452 val stackExportArray = Array.array(vectorLength, emptySet) 453 (* Loop registers. This contains the registers that are not exported 454 from or passed through this block but are used subsequently as 455 loop registers. *) 456 val loopRegArray = Array.array(vectorLength, emptySet) 457 val () = Array.copy{src=localLoopRegArray, dst=loopRegArray, di=0} 458 (* If any one of the successors requires the CC then this is set. 459 Otherwise we leave it as Unused. *) 460 val ccRequiredOut = Array.array(vectorLength, CCUnused) 461 local 462 datatype loopData = 463 Unprocessed | Processing | Processed 464 | Looped of { regSet: intSet, loopSet: intSet, stackSet: intSet, ccState: inCCState } 465 466 fun reprocessLoop () = 467 let 468 val reprocess = ref false 469 val loopArray = Array.array(vectorLength, Unprocessed) 470 471 fun processBlocks blockNo = 472 case Array.sub(loopArray, blockNo) of 473 Processed => (* Already seen this by a different route. *) 474 { 475 regSet = union(Array.sub(passThroughArray, blockNo), Array.sub(importArray, blockNo)), 476 stackSet = union(Array.sub(stackPassThroughArray, blockNo), Array.sub(stackImportArray, blockNo)), 477 ccState = Array.sub(ccInStates, blockNo), 478 loopSet = Array.sub(loopRegArray, blockNo) 479 } 480 | Looped s => s (* We've already seen this in a loop. *) 481 | Processing => (* We have a loop. *) 482 let 483 (* Use the existing input array. *) 484 val inputs = 485 { 486 regSet = union(Array.sub(passThroughArray, blockNo), Array.sub(importArray, blockNo)), 487 stackSet = union(Array.sub(stackPassThroughArray, blockNo), Array.sub(stackImportArray, blockNo)), 488 ccState = Array.sub(ccInStates, blockNo), 489 loopSet = Array.sub(loopRegArray, blockNo) 490 } 491 val () = Array.update(loopArray, blockNo, Looped inputs) 492 in 493 inputs 494 end 495 | Unprocessed => (* Normal case - not visited yet. *) 496 let 497 val () = Array.update(loopArray, blockNo, Processing) 498 val thisBlock = Vector.sub(blockVector, blockNo) 499 val ourDeclarations = Array.sub(declarationArray, blockNo) 500 and ourStackDeclarations = Array.sub(stackDecArray, blockNo) 501 and ourLocalLoopRegs = Array.sub(localLoopRegArray, blockNo) 502 val successors = blockSuccessors thisBlock 503 504 fun addSuccessor b = 505 let 506 val {regSet=theirImports, stackSet=theirStackImports, ccState=theirInState, loopSet=theirLoops} = processBlocks b 507 (* Remove loop regs from the imports if they are actually given new 508 values by this block. We don't want to pass the old loop regs through here. *) 509 val theirImports = minus(theirImports, ourLocalLoopRegs) 510 (* Split the imports. If a register is a local declaration then 511 it becomes an export. If it is not it becomes part of our 512 passThrough. *) 513 val (addToExp, addToImp) = 514 INTSET.partition (fn i => member(i, ourDeclarations)) theirImports 515 val (addToStackExp, addToStackImp) = 516 INTSET.partition (fn i => member(i, ourStackDeclarations)) theirStackImports 517 (* Merge the input states from each of the successors. *) 518 val () = 519 case (theirInState, Array.sub(ccRequiredOut, blockNo)) of 520 (CCNeeded ts, CCNeeded req) => 521 if ts = req then () else raise InternalError "Mismatched states" 522 | (ts as CCNeeded _, _) => Array.update(ccRequiredOut, blockNo, ts) 523 | _ => () 524 (* Add loop registers to the set if they are not declared here. The 525 only place they are declared is at the entry to the loop so that 526 stops them being propagated further. *) 527 val addToLoops = minus(theirLoops, ourDeclarations) 528 in 529 Array.update(exportArray, blockNo, 530 union(Array.sub(exportArray, blockNo), addToExp)); 531 Array.update(passThroughArray, blockNo, 532 union(Array.sub(passThroughArray, blockNo), addToImp)); 533 Array.update(stackExportArray, blockNo, 534 union(Array.sub(stackExportArray, blockNo), addToStackExp)); 535 Array.update(stackPassThroughArray, blockNo, 536 union(Array.sub(stackPassThroughArray, blockNo), addToStackImp)); 537 Array.update(loopRegArray, blockNo, 538 union(Array.sub(loopRegArray, blockNo), addToLoops)) 539 end 540 val () = List.app addSuccessor successors 541 val ourInputs = 542 union(Array.sub(passThroughArray, blockNo), Array.sub(importArray, blockNo)) 543 val ourStackInputs = 544 union(Array.sub(stackPassThroughArray, blockNo), Array.sub(stackImportArray, blockNo)) 545 in 546 (* Check that we supply the required state. *) 547 case (Array.sub(ccRequiredOut, blockNo), Array.sub(ccOutStates, blockNo)) of 548 (CCNeeded ccReq, CCSet ccSet) => 549 if ccReq = ccSet then () else raise InternalError "Mismatched cc states" 550 | (CCNeeded _, CCIndeterminate) => raise InternalError "Mismatched cc states" 551 | (cc as CCNeeded needOut, CCUnchanged) => 552 ( 553 (* We pass through the state. If we don't use the state then we 554 need to set this as the input. If we do use the state it must be 555 the same. *) 556 case Array.sub(ccInStates, blockNo) of 557 CCUnused => Array.update(ccInStates, blockNo, cc) 558 | CCNeeded needIn => 559 if needOut = needIn then () else raise InternalError "Mismatched cc states" 560 ) 561 | _ => (); 562 (* Was this block used in a loop? If so we should not be requiring a CC. *) 563 case Array.sub(loopArray, blockNo) of 564 Looped {regSet, stackSet, ...} => 565 ( 566 case Array.sub(ccInStates, blockNo) of 567 CCNeeded _ => raise InternalError "Looped state needs cc" | _ => (); 568 if setToList regSet = setToList ourInputs andalso 569 setToList stackSet = setToList ourStackInputs 570 then () 571 else reprocess := true 572 ) 573 | _ => (); 574 Array.update(loopArray, blockNo, Processed); 575 { regSet = ourInputs, stackSet = ourStackInputs, 576 ccState = Array.sub(ccInStates, blockNo), loopSet=Array.sub(loopRegArray, blockNo)} 577 end 578 in 579 reprocess := false; 580 processBlocks 0; 581 if !reprocess then reprocessLoop () else () 582 end 583 in 584 val () = reprocessLoop () 585 end 586 587 (* Third pass - Build the result list with the active registers for each 588 instruction. We don't include registers in the passThrough set since 589 they are active throughout the block. *) 590 local 591 (* Number of instrs for which this is active. We use this to try to select a 592 register to push to the stack if we have too many. Registers that have 593 only a short lifetime are less likely to be pushed than those that are 594 active longer. *) 595 val regActive = Array.array(maxPRegs, 0) 596 fun addActivity n r = Array.update(regActive, r, Array.sub(regActive, r)+n) 597 598 fun createResultInstrs (passThrough, stackPassThrough) 599 (instr, (tail, activeAfterThis, stackActiveAfterThis)) = 600 let 601 val { sources, dests, sStacks=stackSrcs, dStacks=stackDests, ... } = getInstructionState instr 602 in 603 (* Eliminate instructions if their results are not required. The earlier check for this 604 will remove most cases but if we have duplicated a block we may have a register that 605 is required elsewhere but not in this particular branch. *) 606 if not(List.exists(fn PReg d => member(d, activeAfterThis)) dests) andalso eliminateable instr 607 then (tail, activeAfterThis, stackActiveAfterThis) 608 else 609 let 610 fun regNo(PReg i) = i 611 fun stackNo(StackLoc{rno, ...}) = rno 612 val destRegNos = map regNo dests 613 and sourceRegNos = map regNo sources 614 val destSet = listToSet destRegNos 615 (* Remove any sources that are present in passThrough since 616 they are going to be active throughout the block. *) 617 and sourceSet = minus(listToSet sourceRegNos, passThrough) 618 val stackDestRegNos = map stackNo stackDests 619 and stackSourceRegNos = map stackNo stackSrcs 620 val stackDestSet = listToSet stackDestRegNos 621 and stackSourceSet = minus(listToSet stackSourceRegNos, stackPassThrough) 622 623 (* To compute the active set for the PREVIOUS instruction (we're processing from the 624 end back to the start) we remove any registers that have been given values in this 625 instruction and add anything that we are using in this instruction since they will 626 now need to have values. *) 627 val afterRemoveDests = minus(activeAfterThis, destSet) 628 val stackAfterRemoveDests = minus(stackActiveAfterThis, stackDestSet) 629 val activeForPrevious = union(sourceSet, afterRemoveDests) 630 val stackActiveForPrevious = union(stackSourceSet, stackAfterRemoveDests) 631 632 (* The "active" set is the set of registers that need to be active DURING the 633 instruction. It includes destinations, which will usually be in 634 "activeAfterThis", because there may be destinations that are not actually used 635 subsequently but still need a register. That will also include work registers. 636 Usually sources aren't included if this is the last use but the 637 AllocateMemoryVariable "instruction" can't set the size after the memory is 638 allocated so the active set includes the source(s). *) 639 val activeForInstr = 640 case instr of 641 FunctionCall _ => sourceSet (* Is this still needed? *) 642 | TailRecursiveCall _ => 643 (* Set the active set to the total set of registers we require including 644 the work register. This ensures that we will spill as many registers 645 as we require when we look at the size of the active set. *) 646 union(sourceSet, destSet) 647 | AllocateMemoryVariable _ => (* We can only set the size after the memory is allocated. *) 648 union(activeAfterThis, union(sourceSet, destSet)) 649 | BoxValue _ => (* We can only store the value in the box after the box is allocated. *) 650 union(activeAfterThis, union(sourceSet, destSet)) 651 | _ => union(activeAfterThis, destSet) 652 653 val () = List.app(addActivity 1) (setToList activeForInstr) 654 655 local 656 (* If we are allocating memory we have to save the current registers if 657 they could contain an address. We mustn't push untagged registers 658 and we mustn't push the destination. *) 659 fun getSaveSet dReg = 660 let 661 val activeAfter = union(activeAfterThis, passThrough) 662 (* Remove any registers marked - must-not-push. These are 663 registers holding non-address values. They will actually 664 be saved by the RTS across any GC but not checked or 665 modified by the GC. 666 Exclude the result register. *) 667 fun getSave i = 668 if i = dReg 669 then NONE 670 else case Vector.sub(pregProps, i) of 671 RegPropGeneral => SOME(PReg i) 672 | RegPropCacheTagged => SOME(PReg i) 673 | RegPropUntagged => NONE 674 | RegPropStack _ => NONE 675 | RegPropCacheUntagged => NONE 676 | RegPropMultiple => raise InternalError "getSave: RegPropMultiple" 677 in 678 List.mapPartial getSave (setToList activeAfter) 679 end 680 in 681 (* Sometimes we need to modify the instruction e.g. to include the set 682 of registers to save. *) 683 val convertedInstr = 684 case instr of 685 AllocateMemoryOperation{size, flags, dest, saveRegs=_} => 686 AllocateMemoryOperation{size=size, flags=flags, dest=dest, 687 saveRegs=getSaveSet(regNo dest)} 688 689 | AllocateMemoryVariable{size, dest, saveRegs=_} => 690 AllocateMemoryVariable{size=size, dest=dest, saveRegs=getSaveSet(regNo dest)} 691 692 | BoxValue{source, dest, boxKind, saveRegs=_} => 693 BoxValue{source=source, dest=dest, boxKind=boxKind, 694 saveRegs=getSaveSet(regNo dest)} 695 696 | JumpLoop{regArgs, stackArgs, checkInterrupt = SOME _, workReg, ...} => 697 let 698 (* If we have to check for interrupts we must preserve registers across 699 the RTS call. *) 700 fun getSave i = 701 case Vector.sub(pregProps, i) of 702 RegPropGeneral => SOME(PReg i) 703 | RegPropCacheTagged => SOME(PReg i) 704 | RegPropUntagged => NONE 705 | RegPropStack _ => NONE 706 | RegPropCacheUntagged => NONE 707 | RegPropMultiple => raise InternalError "getSave: RegPropMultiple" 708 val currentRegs = union(activeAfterThis, passThrough) 709 (* Have to include the loop registers. These were previously included 710 automatically because they were part of the import set. *) 711 val check = List.mapPartial getSave (map (regNo o #2) regArgs @ setToList currentRegs) 712 in 713 JumpLoop{regArgs=regArgs, stackArgs=stackArgs, checkInterrupt=SOME check, workReg=workReg} 714 end 715 716 | FunctionCall{regArgs, stackArgs=[], dest, realDest, callKind as ConstantCode m, saveRegs=_} => 717 (* If this is arbitrary precision push the registers rather than marking them as "save". 718 stringOfWord returns 'CODE "PolyAddArbitrary"' etc. *) 719 if (String.isSubstring "Arbitrary\"" (Address.stringOfWord m)) 720 then FunctionCall{regArgs=regArgs, stackArgs=[], callKind=callKind, dest=dest, 721 realDest=realDest, saveRegs=getSaveSet(regNo dest) } 722 else instr 723 724 | instr as LoadArgument{dest=PReg dreg, ...} => 725 ( 726 if member(dreg, activeAfterThis) 727 then () 728 else print("Register " ^ Int.toString dreg ^ " inactive-" ^ PolyML.makestring instr ^ "\n"); 729 instr 730 ) 731 732 | _ => instr 733 end 734 735 (* FunctionCall must mark all registers as "push". *) 736 local 737 fun pushRegisters () = 738 let 739 val activeAfter = union(activeAfterThis, passThrough) 740 fun pushAllButDests i = 741 if List.exists(fn j => i=j) destRegNos 742 then () 743 else case Vector.sub(pregProps, i) of 744 RegPropCacheTagged => raise InternalError "pushRegisters: cache reg" 745 | RegPropCacheUntagged => raise InternalError "pushRegisters: cache reg" 746 | _ => Array.update(requirePushOrDiscard, i, true) 747 in 748 (* We need to push everything active after this 749 except the result register. *) 750 List.app pushAllButDests (setToList activeAfter) 751 end 752 in 753 val () = 754 case instr of 755 FunctionCall{ stackArgs=[], callKind=ConstantCode m, ...} => 756 if (String.isSubstring "Arbitrary\"" (Address.stringOfWord m)) 757 then () 758 else pushRegisters () 759 760 | FunctionCall _ => pushRegisters () 761 762 (* It should no longer be necessary to push across a handler but 763 there still seem to be cases that need it. *) 764 | BeginHandler _ => pushRegisters () 765 766 | CopyToCache {source=PReg srcReg, dest=PReg dstReg, ...} => 767 (* If the source is a cache register marked as "must push" i.e. discard, 768 the destination must also be discarded i.e. not available. 769 Note: the source could be a non-cache register marked for pushing. *) 770 ( 771 case (Vector.sub(pregProps, srcReg), Array.sub(requirePushOrDiscard, srcReg)) of 772 (RegPropCacheTagged, true) => Array.update(requirePushOrDiscard, dstReg, true) 773 | (RegPropCacheUntagged, true) => Array.update(requirePushOrDiscard, dstReg, true) 774 | _ => () 775 ) 776 777 | _ => () 778 end 779 780 (* Which entries are active in this instruction but not afterwards? *) 781 val kill = union(minus(stackSourceSet, stackActiveAfterThis), minus(sourceSet, activeAfterThis)) 782 in 783 ({instr=convertedInstr, active=activeForInstr, current=activeAfterThis, kill=kill} :: tail, activeForPrevious, 784 stackActiveForPrevious) 785 end 786 end 787 788 fun createResult blockNo = 789 let 790 val BasicBlock{ flow, ...} = Vector.sub(blockVector, blockNo) 791 val declSet = Array.sub(declarationArray, blockNo) 792 and importSet = Array.sub(importArray, blockNo) 793 and passSet = Array.sub(passThroughArray, blockNo) 794 and loopSet = Array.sub(loopRegArray, blockNo) 795 and exportSet = Array.sub(exportArray, blockNo) 796 and stackPassSet = Array.sub(stackPassThroughArray, blockNo) 797 and stackImportSet = Array.sub(stackImportArray, blockNo) 798 and stackExportSet = Array.sub(stackExportArray, blockNo) 799 val filteredCode = getOpt(Array.sub(resultCode, blockNo), []) 800 (* At the end of the block we should have the exports active. *) 801 val (resultInstrs, _, _) = List.foldr (createResultInstrs (passSet, stackPassSet)) ([], exportSet, stackExportSet) filteredCode 802 (* Set the active count for the pass through. *) 803 val instrCount = List.length filteredCode 804 val () = List.app(addActivity instrCount) (setToList passSet) 805 val inCCState = 806 case Array.sub(ccInStates, blockNo) of CCNeeded s => SOME s | CCUnused => NONE 807 val outCCState = 808 case Array.sub(ccRequiredOut, blockNo) of CCNeeded s => SOME s | CCUnused => NONE 809 in 810 ExtendedBasicBlock { 811 block = resultInstrs, 812 flow=flow, 813 locals = minus(declSet, exportSet), 814 imports = importSet, 815 exports = exportSet, 816 passThrough = passSet, 817 loopRegs = loopSet, 818 initialStacks = union(stackPassSet, stackImportSet), 819 inCCState = inCCState, 820 outCCState = outCCState 821 } 822 end 823 in 824 val resultBlocks = Vector.tabulate(vectorLength, createResult) 825 val regActive = regActive 826 end 827 828 val registerState: regState vector = 829 Vector.tabulate(maxPRegs, 830 fn i => { 831 active = Array.sub(regActive, i), 832 refs = Array.sub(regRefs, i), 833 pushState = Array.sub(requirePushOrDiscard, i), 834 prop = Vector.sub(pregProps, i) 835 } 836 ) 837 in 838 (resultBlocks, registerState) 839 end 840 841 (* Exported function. First filter out unreferenced blocks then process the 842 registers themselves. *) 843 fun identifyRegisters(blockVector, pregProps) = 844 let 845 val vectorLength = Vector.length blockVector 846 val mapArray = Array.array(vectorLength, NONE) 847 and resArray = Array.array(vectorLength, NONE) 848 val count = ref 0 849 850 fun setReferences label = 851 case Array.sub(mapArray, label) of 852 NONE => (* Not yet visited *) 853 let 854 val BasicBlock{flow, block} = Vector.sub(blockVector, label) 855 (* Create a new entry for it. *) 856 val newLabel = ! count before count := !count + 1 857 (* Add it to the map. Any other references will use this 858 without reprocessing. *) 859 val () = Array.update(mapArray, label, SOME newLabel) 860 val newFlow = 861 case flow of 862 Unconditional l => Unconditional(setReferences l) 863 | Conditional{trueJump, falseJump, ccRef, condition} => 864 Conditional{trueJump=setReferences trueJump, falseJump=setReferences falseJump, 865 ccRef=ccRef, condition=condition} 866 | ExitCode => ExitCode 867 | IndexedBr list => IndexedBr(map setReferences list) 868 | SetHandler{handler, continue} => 869 SetHandler{handler=setReferences handler, continue=setReferences continue} 870 | UnconditionalHandle l => UnconditionalHandle(setReferences l) 871 | ConditionalHandle{handler, continue} => 872 ConditionalHandle{handler=setReferences handler, continue=setReferences continue} 873 874 val () = Array.update(resArray, newLabel, SOME(BasicBlock{flow=newFlow, block=block})) 875 in 876 newLabel 877 end 878 | SOME lab => lab 879 880 val _ = setReferences 0 881 882 val newBlockVector = 883 Vector.tabulate(!count, fn i => valOf(Array.sub(resArray, i))) 884 in 885 identifyRegs(newBlockVector, pregProps) 886 end 887 888 (* Exported for use in GetConflictSets *) 889 fun getInstructionRegisters instr = 890 let 891 val {sources, dests, ...} = getInstructionState instr 892 in 893 {sources=sources, dests=dests} 894 end 895 896 (* Exported for use in ICodeOptimise *) 897 val getInstructionCC = #ccOut o getInstructionState 898 899 structure Sharing = 900 struct 901 type x86ICode = x86ICode 902 and reg = reg 903 and preg = preg 904 and intSet = intSet 905 and basicBlock = basicBlock 906 and extendedBasicBlock = extendedBasicBlock 907 and controlFlow = controlFlow 908 and argument = argument 909 and memoryIndex = memoryIndex 910 and regProperty = regProperty 911 and ccRef = ccRef 912 and outCCState = outCCState 913 end 914end; 915