1(* 2 Copyright David C. J. Matthews 2016-20 3 4 This library is free software; you can redistribute it and/or 5 modify it under the terms of the GNU Lesser General Public 6 License version 2.1 as published by the Free Software Foundation. 7 8 This library is distributed in the hope that it will be useful, 9 but WITHOUT ANY WARRANTY; without even the implied warranty of 10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 Lesser General Public License for more details. 12 13 You should have received a copy of the GNU Lesser General Public 14 License along with this library; if not, write to the Free Software 15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16*) 17 18functor X86ICodeToX86Code( 19 20 structure X86CODE: X86CODESIG 21 22 structure X86OPTIMISE: 23 sig 24 type operation 25 type code 26 type operations = operation list 27 type closureRef 28 29 val generateCode: {code: code, ops: operations, labelCount: int, resultClosure: closureRef } -> unit 30 31 structure Sharing: 32 sig 33 type operation = operation 34 type code = code 35 type closureRef = closureRef 36 end 37 end 38 39 structure DEBUG: DEBUG 40 41 structure ICODE: ICodeSig 42 structure IDENTIFY: X86IDENTIFYREFSSIG 43 structure INTSET: INTSETSIG 44 structure PRETTY: PRETTYSIG 45 46 structure STRONGLY: 47 sig 48 val stronglyConnectedComponents: {nodeAddress: 'a -> int, arcs: 'a -> int list } -> 'a list -> 'a list list 49 end 50 51 sharing X86CODE.Sharing = ICODE.Sharing = X86OPTIMISE.Sharing = IDENTIFY.Sharing = INTSET 52): X86ICODEGENERATESIG = 53struct 54 open IDENTIFY 55 open ICODE 56 57 open X86CODE 58 59 open Address 60 61 exception InternalError = Misc.InternalError 62 63 fun asGenReg(GenReg r) = r 64 | asGenReg _ = raise InternalError "asGenReg" 65 66 and asFPReg(FPReg r) = r 67 | asFPReg _ = raise InternalError "asFPReg" 68 69 and asXMMReg(XMMReg r) = r 70 | asXMMReg _ = raise InternalError "asXMMReg" 71 72 (* tag a short constant *) 73 fun tag c = 2 * c + 1 74 75 local 76 val regs = 77 case targetArch of 78 Native32Bit => [edi, esi, edx, ecx, ebx, eax] 79 | Native64Bit => [r14, r13, r12, r11, r10, r9, r8, edi, esi, edx, ecx, ebx, eax] 80 | ObjectId32Bit => [r14, r13, r12, r11, r10, r9, r8, edi, esi, edx, ecx, eax] 81 in 82 val generalRegisters = List.map GenReg regs 83 end 84 85 fun opSizeToMove OpSize32 = Move32 | opSizeToMove OpSize64 = Move64 86 87 fun icodeToX86Code{blocks, functionName, stackRequired, debugSwitches, allocatedRegisters, resultClosure, ...} = 88 let 89 fun argAsGenReg(RegisterArg(GenReg r)) = r 90 | argAsGenReg _ = raise InternalError "argAsGenReg" 91 92 fun sourceAsGenRegOrMem(RegisterArg(GenReg r)) = RegisterArg r 93 | sourceAsGenRegOrMem(MemoryArg{offset, base=baseReg, index}) = 94 MemoryArg{base=baseReg, offset=offset, index=index} 95 | sourceAsGenRegOrMem(NonAddressConstArg v) = NonAddressConstArg v 96 | sourceAsGenRegOrMem(AddressConstArg v) = AddressConstArg v 97 | sourceAsGenRegOrMem _ = raise InternalError "sourceAsGenRegOrMem" 98 99 and sourceAsXMMRegOrMem(RegisterArg(XMMReg r)) = RegisterArg r 100 | sourceAsXMMRegOrMem(MemoryArg{offset, base=baseReg, index}) = 101 MemoryArg{base=baseReg, offset=offset, index=index} 102 | sourceAsXMMRegOrMem(NonAddressConstArg v) = NonAddressConstArg v 103 | sourceAsXMMRegOrMem(AddressConstArg v) = AddressConstArg v 104 | sourceAsXMMRegOrMem _ = raise InternalError "sourceAsGenRegOrMem" 105 106 (* Moves and loads. *) 107 fun llLoadArgument({ source, dest=GenReg destReg, kind=Move64Bit}, code) = 108 Move { source=sourceAsGenRegOrMem source, destination=RegisterArg destReg, moveSize=Move64 } :: code 109 110 | llLoadArgument({ source=MemoryArg mLoc, dest=GenReg destReg, kind=MoveByte}, code) = (* Load from memory. *) 111 Move{moveSize=Move8, source=MemoryArg mLoc, destination=RegisterArg destReg} :: code 112 113 | llLoadArgument({ source=MemoryArg mLoc, dest=GenReg destReg, kind=Move16Bit}, code) = (* Load from memory. *) 114 Move{moveSize=Move16, source=MemoryArg mLoc, destination=RegisterArg destReg} :: code 115 116 | llLoadArgument({ source, dest=GenReg destReg, kind=Move32Bit}, code) = (* Load from memory. *) 117 Move { source=sourceAsGenRegOrMem source, destination=RegisterArg destReg, moveSize=Move32 } :: code 118 119 (* Load a floating point value. *) 120 | llLoadArgument({source=MemoryArg{offset, base=baseReg, index}, dest=FPReg fpReg, kind=MoveDouble}, code) = 121 moveToOutputFP(fpReg, 122 FPLoadFromMemory{ address={base=baseReg, offset=offset, index=index}, precision=DoublePrecision } :: code) 123 124 | llLoadArgument({source=AddressConstArg addrConst, dest=FPReg fpReg, kind=MoveDouble}, code) = 125 moveToOutputFP(fpReg, FPLoadFromConst{ constant= addrConst, precision=DoublePrecision } :: code) 126 127 | llLoadArgument({source=RegisterArg(FPReg fpSrc), dest=FPReg fpDest, kind=MoveDouble}, code) = 128 (* Moving from one FP reg to another. Even if we are moving from FP0 we still do a load 129 because FPStoreToFPReg adds one to the register number to account for one value on the 130 stack. *) 131 moveToOutputFP(fpDest, FPLoadFromFPReg{source=fpSrc, lastRef=false} :: code) 132 133 (* Load or move from an XMM reg. *) 134 | llLoadArgument({source, dest=XMMReg xmmRegReg, kind=MoveDouble}, code) = 135 XMMArith { opc= SSE2MoveDouble, source=sourceAsXMMRegOrMem source, output=xmmRegReg } :: code 136 137 (* Load a floating point value. *) 138 | llLoadArgument({source=MemoryArg{offset, base=baseReg, index}, dest=FPReg fpReg, kind=MoveFloat}, code) = 139 moveToOutputFP(fpReg, 140 FPLoadFromMemory{ address={ base=baseReg, offset=offset, index=index }, precision=SinglePrecision } :: code) 141 142 | llLoadArgument({source=AddressConstArg addrConst, dest=FPReg fpReg, kind=MoveFloat}, code) = 143 moveToOutputFP(fpReg, FPLoadFromConst{ constant= addrConst, precision=SinglePrecision } :: code) 144 145 (* Load or move from an XMM reg. *) 146 | llLoadArgument({source, dest=XMMReg xmmRegReg, kind=MoveFloat}, code) = 147 XMMArith { opc= SSE2MoveFloat, source=sourceAsXMMRegOrMem source, output=xmmRegReg } :: code 148 149 (* Any other combinations are not allowed. *) 150 | llLoadArgument _ = raise InternalError "codeGenICode: LoadArgument" 151 152 (* Unless the destination is FP0 we need to store and pop. *) 153 and moveToOutputFP(fpDest, code) = 154 if fpDest = fp0 then code 155 else FPStoreToFPReg{output=fpDest, andPop=true} :: code 156 157 (* Store to memory *) 158 fun llStoreArgument{ source=RegisterArg(GenReg sourceReg), base, offset, index, kind=Move64Bit} = 159 Move{source=RegisterArg sourceReg, destination=MemoryArg {base=base, offset=offset, index=index}, moveSize=opSizeToMove OpSize64} 160 161 | llStoreArgument{ source=RegisterArg(GenReg sourceReg), base, offset, index, kind=MoveByte} = 162 Move{moveSize=Move8, source=RegisterArg sourceReg, destination=MemoryArg {base=base, offset=offset, index=index}} 163 164 | llStoreArgument{ source=RegisterArg(GenReg sourceReg), base, offset, index, kind=Move16Bit} = 165 Move{moveSize=Move16, source=RegisterArg sourceReg, destination=MemoryArg {base=base, offset=offset, index=index}} 166 167 | llStoreArgument{ source=RegisterArg(GenReg sourceReg), base, offset, index, kind=Move32Bit} = 168 Move{source=RegisterArg sourceReg, destination=MemoryArg {base=base, offset=offset, index=index}, moveSize=opSizeToMove OpSize32} 169 170 (* Store a short constant to memory *) 171 | llStoreArgument{ source=NonAddressConstArg srcValue, base, offset, index, kind=Move64Bit} = 172 Move{source=NonAddressConstArg srcValue, destination=MemoryArg {base=base, offset=offset, index=index}, moveSize=Move64} 173 174 | llStoreArgument{ source=NonAddressConstArg srcValue, base, offset, index, kind=Move32Bit} = 175 Move{source=NonAddressConstArg srcValue, destination=MemoryArg {base=base, offset=offset, index=index}, moveSize=Move32} 176 177 | llStoreArgument{ source=NonAddressConstArg srcValue, base, offset, index, kind=MoveByte} = 178 Move{moveSize=Move8, source=NonAddressConstArg srcValue, destination=MemoryArg{base=base, offset=offset, index=index}} 179 180 (* Store a long constant to memory *) 181 | llStoreArgument{ source=AddressConstArg srcValue, base, offset, index, kind} = 182 ( 183 (* This Move must be of a polyWord size. *) 184 case (kind, polyWordOpSize) of 185 (Move64Bit, OpSize64) => () 186 | (Move32Bit, OpSize32) => () 187 | _ => raise InternalError "Move of AddressConstArg"; 188 Move{moveSize=opSizeToMove polyWordOpSize, source=AddressConstArg srcValue, destination=MemoryArg {base=base, offset=offset, index=index}} 189 ) 190 191 (* Store a floating point value. *) 192 | llStoreArgument{source=RegisterArg(FPReg fpReg), offset, base=baseReg, index, kind=MoveDouble} = 193 let 194 val _ = fpReg = fp0 orelse raise InternalError "llStoreArgument: Store FPReg <> fp0" 195 in 196 FPStoreToMemory{ address={ base=baseReg, offset=offset, index=index}, precision=DoublePrecision, andPop=true } 197 end 198 199 | llStoreArgument{source=RegisterArg(XMMReg xmmRegReg), offset, base=baseReg, index, kind=MoveDouble} = 200 XMMStoreToMemory { toStore=xmmRegReg, address={base=baseReg, offset=offset, index=index}, precision=DoublePrecision } 201 202 (* Store a floating point value. *) 203 | llStoreArgument{source=RegisterArg(FPReg fpReg), offset, base=baseReg, index, kind=MoveFloat} = 204 let 205 val _ = fpReg = fp0 orelse raise InternalError "llStoreArgument: Store FPReg <> fp0" 206 in 207 FPStoreToMemory{address={ base=baseReg, offset=offset, index=index}, precision=SinglePrecision, andPop=true } 208 end 209 210 | llStoreArgument{source=RegisterArg(XMMReg xmmRegReg), offset, base=baseReg, index, kind=MoveFloat} = 211 XMMStoreToMemory { toStore=xmmRegReg, address={base=baseReg, offset=offset, index=index}, precision=SinglePrecision } 212 213 | llStoreArgument _ = raise InternalError "llStoreArgument: StoreArgument" 214 215 val numBlocks = Vector.length blocks 216 217 fun getAllocatedReg r = Vector.sub(allocatedRegisters, r) 218 219 val getAllocatedGenReg = asGenReg o getAllocatedReg 220 and getAllocatedFPReg = asFPReg o getAllocatedReg 221 and getAllocatedXMMReg = asXMMReg o getAllocatedReg 222 223 fun codeExtIndex NoMemIndex = NoIndex 224 | codeExtIndex(MemIndex1(PReg r)) = Index1(getAllocatedGenReg r) 225 | codeExtIndex(MemIndex2(PReg r)) = Index2(getAllocatedGenReg r) 226 | codeExtIndex(MemIndex4(PReg r)) = Index4(getAllocatedGenReg r) 227 | codeExtIndex(MemIndex8(PReg r)) = Index8(getAllocatedGenReg r) 228 | codeExtIndex ObjectIndex = raise InternalError "codeExtIndex: ObjectIndex" 229 230 local 231 fun codeExtArgument getReg (RegisterArgument(PReg r)) = RegisterArg(getReg r) 232 | codeExtArgument _ (AddressConstant m) = AddressConstArg m 233 | codeExtArgument _ (IntegerConstant i) = NonAddressConstArg i 234 | codeExtArgument _ (MemoryLocation{base=PReg bReg, offset, index=ObjectIndex, cache=NONE}) = 235 MemoryArg{base=ebx, index=Index4(getAllocatedGenReg bReg), offset=offset} 236 | codeExtArgument _ (MemoryLocation{base=PReg bReg, offset, index, cache=NONE}) = 237 MemoryArg{base=getAllocatedGenReg bReg, offset=offset, index=codeExtIndex index} 238 | codeExtArgument getReg (MemoryLocation{cache=SOME(PReg r), ...}) = RegisterArg(getReg r) 239 | codeExtArgument _ (StackLocation{wordOffset, cache=NONE, ...}) = 240 MemoryArg{base=esp, offset=wordOffset*Word.toInt nativeWordSize, index=NoIndex} 241 | codeExtArgument getReg (StackLocation{cache=SOME(PReg r), ...}) = RegisterArg(getReg r) 242 | codeExtArgument _ (ContainerAddr _) = raise InternalError "codeExtArgument - ContainerAddr" 243 in 244 val codeExtArgument = codeExtArgument getAllocatedReg 245 and codeExtArgumentAsGenReg = codeExtArgument getAllocatedGenReg 246 and codeExtArgumentAsFPReg = codeExtArgument getAllocatedFPReg 247 and codeExtArgumentAsXMMReg = codeExtArgument getAllocatedXMMReg 248 end 249 250 fun codeCallKind Recursive = NonAddressConstArg 0 (* Jump to the start *) 251 | codeCallKind (ConstantCode v) = AddressConstArg v 252 | codeCallKind FullCall = 253 ( 254 case targetArch of 255 ObjectId32Bit => MemoryArg{base=ebx, index=Index4 edx, offset=0} 256 | _ => MemoryArg{base=edx, index=NoIndex, offset=0} 257 ) 258 259 (* Move unless the registers are the same. *) 260 fun moveIfNecessary({src, dst, kind}, code) = 261 if src = dst then code 262 else llLoadArgument({source=RegisterArg src, dest=dst, kind=kind}, code) 263 264 fun opSizeToIMove OpSize64 = Move64Bit 265 | opSizeToIMove OpSize32 = Move32Bit 266 267 datatype llsource = 268 StackSource of int 269 | OtherSource of reg regOrMemoryArg 270 271 fun sourceToX86Code(OtherSource r) = r 272 | sourceToX86Code(StackSource wordOffset) = MemoryArg{base=esp, offset=wordOffset*Word.toInt nativeWordSize, index=NoIndex} 273 274 local 275 fun indexRegister NoIndex = NONE 276 | indexRegister (Index1 r) = SOME r 277 | indexRegister (Index2 r) = SOME r 278 | indexRegister (Index4 r) = SOME r 279 | indexRegister (Index8 r) = SOME r 280 (* The registers are numbered from 0. Choose values that don't conflict with 281 the stack addresses. *) 282 fun regNo r = ~1 - nReg r 283 type node = {src: llsource, dst: destinations } 284 285 fun nodeAddress({dst=RegDest r, ...}: node) = regNo r 286 | nodeAddress({dst=StackDest a, ...}) = a 287 288 fun arcs({src=StackSource wordOffset, ...}: node) = [wordOffset] 289 | arcs{src=OtherSource(RegisterArg r), ...} = [regNo r] 290 | arcs{src=OtherSource(MemoryArg{base, index, ...}), ...} = 291 (case indexRegister index of NONE => [regNo(GenReg base)] | SOME r => [regNo(GenReg base), regNo(GenReg r)]) 292 | arcs _ = [] 293 in 294 val stronglyConnected = STRONGLY.stronglyConnectedComponents { nodeAddress=nodeAddress, arcs=arcs } 295 end 296 297 (* This is a general function for moving values into registers or to the stack 298 where it is possible that the source values might also be in use as destinations. 299 The stack is used for destinations only for tail recursive calls. *) 300 fun moveMultipleValues(moves, workReg: reg option, code) = 301 let 302 val _ = 303 if List.exists(fn {dst=StackDest _, ...} => true | _ => false) moves andalso not(isSome workReg) then raise InternalError "no work reg" else () 304 305 fun moveValues ([], code) = code (* We're done. *) 306 307 | moveValues (arguments, code) = 308 let 309 (* stronglyConnectedComponents does two things. It detects loops where 310 it's not possible to move items without breaking the loop but more 311 importantly it orders the dependencies so that if there are no loops we 312 can load the source and store it in the destination knowing that 313 we won't overwrite anything we might later need. *) 314 315 val ordered = stronglyConnected arguments 316 317 fun isFPReg(GenReg _) = false 318 | isFPReg(XMMReg _) = true 319 | isFPReg(FPReg _) = true 320 321 fun moveEachValue ([], code) = code 322 323 | moveEachValue ([{dst=RegDest reg, src as OtherSource(RegisterArg r)}] :: rest, code) = 324 (* Source and dest are both regs - only move if they're different. *) 325 if r = reg 326 then moveEachValue(rest, code) 327 else moveEachValue(rest, 328 llLoadArgument({source=sourceToX86Code src, dest=reg, kind=if isFPReg reg then MoveDouble else moveNativeWord}, code)) 329 330 | moveEachValue ([{dst=RegDest reg, src as StackSource _}] :: rest, code) = 331 (* If loading from the stack always use native word. The value could be a stack address. *) 332 moveEachValue(rest, llLoadArgument({source=sourceToX86Code src, dest=reg, kind=moveNativeWord}, code)) 333 334 | moveEachValue ([{dst=RegDest reg, src}] :: rest, code) = 335 (* Load from store or a constant. Have to use movePolyWord if it's an address constant. *) 336 moveEachValue(rest, llLoadArgument({source=sourceToX86Code src, dest=reg, kind=movePolyWord}, code)) 337 338 | moveEachValue ([{dst=StackDest _, src=OtherSource(MemoryArg _ )}] :: _, _) = 339 raise InternalError "moveEachValue - MemoryArgument" 340 341 | moveEachValue ([{dst=StackDest addr, src as StackSource wordOffset}] :: rest, code) = 342 (* Copy a stack location - needs a load and store unless the address is the same. *) 343 if addr = wordOffset 344 then moveEachValue(rest, code) 345 else 346 let 347 val workReg = valOf workReg 348 in 349 moveEachValue(rest, 350 llStoreArgument{source=RegisterArg workReg, base=esp, index=NoIndex, 351 offset = addr*Word.toInt nativeWordSize, kind=moveNativeWord} :: 352 llLoadArgument({source=sourceToX86Code src, dest=workReg, kind=moveNativeWord}, code)) 353 end 354 355 | moveEachValue ([{dst=StackDest addr, src}] :: rest, code) = 356 (* Store from a register or a constant. *) 357 moveEachValue(rest, 358 llStoreArgument{ 359 source=sourceToX86Code src, base=esp, index=NoIndex, offset = addr*Word.toInt nativeWordSize, kind=moveNativeWord} :: code) 360 361 | moveEachValue((cycle as first :: _ :: _) :: rest, code) = 362 (* We have a cycle. *) 363 let 364 (* We need to exchange some of the arguments. Doing an exchange here will 365 set the destination with the correct source. However we have to process 366 every subsequent entry with the swapped registers. That may well mean that 367 one of those entries becomes trivial. Using XCHG means that we can move 368 N registers in N-1 exchanges. 369 We also need to rerun stronglyConnectedComponents on at least the rest of 370 this cycle. It's easiest to flatten the rest and do everything. *) 371 (* Try to find either a register-register move or a register-stack move. 372 If not use the first. If there's a stack-register move there will 373 also be a register-stack so we don't need to look for both. *) 374 val {dst=selectDst, src=selectSrc} = 375 case List.find(fn {src=OtherSource(RegisterArg _), dst=RegDest _} => true | _ => false) cycle of 376 SOME found => found 377 | _ => 378 ( 379 case List.find(fn {dst=RegDest _, ...} => true | _ => false) cycle of 380 SOME found => found 381 | NONE => first 382 ) 383 (* This includes this entry but after the swap we'll eliminate it. *) 384 val flattened = List.foldl(fn (a, b) => a @ b) [] (cycle :: rest) 385 val destAsSource = 386 case selectDst of 387 RegDest reg => OtherSource(RegisterArg reg) 388 | StackDest s => StackSource s 389 390 (* Source is not an equality type. We can't currently handle the 391 situation where the source is a memory location. *) 392 fun match(OtherSource(RegisterArg r1), OtherSource(RegisterArg r2)) = r1 = r2 393 | match(StackSource s1, StackSource s2) = s1 = s2 394 | match(OtherSource(MemoryArg _), _) = raise InternalError "moveEachValue: cycle" 395 | match _ = false 396 397 fun swapSources{src, dst} = 398 if match(src, selectSrc) then {src=destAsSource, dst=dst} 399 else if match(src, destAsSource) then {src=selectSrc, dst=dst} 400 else {src=src, dst=dst} 401 (* Try to use register to register exchange if we can. 402 A register-to-memory exchange involves a bus lock and we'd 403 like to avoid that. *) 404 val exchangeCode = 405 case (selectDst, selectSrc) of 406 (RegDest(GenReg regA), OtherSource(RegisterArg(GenReg regB))) => 407 XChng { reg=regA, arg=RegisterArg regB, opSize=nativeWordOpSize } :: code 408 409 | (RegDest(XMMReg regA), OtherSource(RegisterArg(XMMReg regB))) => 410 (* This is the only case where we can have a cycle with SSE2 regs. 411 There are various ways of doing it but XORs are probably the easiest. *) 412 XMMArith{opc=SSE2Xor, source=RegisterArg regA, output=regB} :: 413 XMMArith{opc=SSE2Xor, source=RegisterArg regB, output=regA} :: 414 XMMArith{opc=SSE2Xor, source=RegisterArg regA, output=regB} :: code 415 416 | (RegDest _, OtherSource(RegisterArg _)) => 417 raise InternalError "moveEachValue: invalid register combination" 418 419 | (RegDest regA, src as StackSource addr) => 420 let 421 val workReg = valOf workReg 422 in 423 llStoreArgument{source=RegisterArg workReg, base=esp, index=NoIndex, 424 offset = addr*Word.toInt nativeWordSize, kind=moveNativeWord} :: 425 XChng { reg=asGenReg regA, arg=RegisterArg(asGenReg workReg), opSize=nativeWordOpSize } :: 426 llLoadArgument({source=sourceToX86Code src, dest=workReg, kind=moveNativeWord}, code) 427 end 428 429 | (StackDest addr, OtherSource(RegisterArg regA)) => 430 let 431 (* This doesn't actually occur because we always find the case above. *) 432 val workReg = valOf workReg 433 in 434 llStoreArgument{source=RegisterArg workReg, base=esp, index=NoIndex, 435 offset = addr*Word.toInt nativeWordSize, kind=moveNativeWord} :: 436 XChng { reg=asGenReg regA, arg=RegisterArg (asGenReg workReg), opSize=nativeWordOpSize } :: 437 llLoadArgument({ 438 source=MemoryArg{base=esp, offset=addr*Word.toInt nativeWordSize, index=NoIndex}, dest=workReg, kind=moveNativeWord}, code) 439 end 440 441 | (StackDest addr1, StackSource addr2) => 442 let 443 val workReg = valOf workReg 444 (* This can still happen if we have argument registers that need to be 445 loaded from stack locations and those argument registers happen to 446 contain the values to be stored into those stack locations. 447 e.g. ebx => S8; eax => S7; S8 => eax; S7 => eax. 448 Eliminating the registers results in a cycle. 449 It may be possible to avoid this by excluding the argument 450 registers (eax; ebx; r8; r9; r10) from holding values in the 451 area to be overwritten. *) 452 in 453 llStoreArgument{source=RegisterArg workReg, base=esp, index=NoIndex, 454 offset = addr1*Word.toInt nativeWordSize, kind=moveNativeWord} :: 455 XChng { reg=asGenReg workReg, 456 arg=MemoryArg{base=esp, offset=addr2*Word.toInt nativeWordSize, index=NoIndex}, 457 opSize=nativeWordOpSize } :: 458 llLoadArgument({ 459 source=MemoryArg{base=esp, offset=addr1*Word.toInt nativeWordSize, index=NoIndex}, dest=workReg, kind=moveNativeWord}, code) 460 end 461 462 | _ => raise InternalError "moveEachValue: cycle" 463 464 in 465 moveValues(List.map swapSources flattened, exchangeCode) 466 end 467 468 | moveEachValue(([]) :: _, _) = (* This should not happen - avoid warning. *) 469 raise InternalError "moveEachValue - empty set" 470 in 471 moveEachValue(ordered, code) 472 end 473 in 474 moveValues(moves, code) 475 end 476 477 (* Where we have multiple specific registers as either source or 478 destination there is the potential that a destination register 479 if currently in use as a source. *) 480 fun moveMultipleRegisters(regPairList, code) = 481 let 482 val regPairsAsDests = 483 List.map(fn {src, dst} => {src=OtherSource(RegisterArg src), dst=RegDest dst}) regPairList 484 in 485 moveMultipleValues(regPairsAsDests, NONE, code) 486 end 487 488 val outputLabelCount = ref 0 489 val blockToLabelMap = Array.array(numBlocks, ~1) 490 491 fun makeLabel() = Label{labelNo = ! outputLabelCount} before outputLabelCount := !outputLabelCount + 1 492 493 fun getBlockLabel blockNo = 494 case Array.sub(blockToLabelMap, blockNo) of 495 ~1 => 496 let 497 val label as Label{labelNo} = makeLabel() 498 val () = Array.update(blockToLabelMap, blockNo, labelNo) 499 in label end 500 | n => Label{labelNo=n} 501 502 (* The profile object is a single mutable with the F_bytes bit set. *) 503 local 504 val v = RunCall.allocateByteMemory(0w1, Word.fromLargeWord(Word8.toLargeWord(Word8.orb(F_mutable, F_bytes)))) 505 fun clear 0w0 = () 506 | clear i = (assignByte(v, i-0w1, 0w0); clear (i-0w1)) 507 val () = clear wordSize 508 in 509 val profileObject = toMachineWord v 510 end 511 (* Switch to indicate if we want to trace where live data has been allocated. *) 512 val addAllocatingFunction = 513 DEBUG.getParameter DEBUG.profileAllocationTag debugSwitches = 1 514 515 fun llAllocateMemoryOperation ({ size, flags, dest, saveRegs}, code) = 516 let 517 val toReg = asGenReg dest 518 val preserve = saveRegs 519 520 (* Allocate memory. N.B. Instructions are in reverse order. *) 521 fun allocStore{size, flags, output, preserve} = 522 if targetArch = Native64Bit andalso flags <> 0w0 523 then 524 [Move{moveSize=Move8, source=NonAddressConstArg(Word8.toLargeInt flags), destination=MemoryArg {offset= ~1, base=output, index=NoIndex}}, 525 Move{source=NonAddressConstArg(LargeInt.fromInt size), 526 destination=MemoryArg {offset= ~ (Word.toInt wordSize), base=output, index=NoIndex}, 527 moveSize=opSizeToMove polyWordOpSize}, 528 AllocStore{size=size, output=output, saveRegs=preserve}] 529 else 530 let 531 val lengthWord = IntInf.orb(IntInf.fromInt size, IntInf.<<(Word8.toLargeInt flags, 0w24)) 532 in 533 [Move{source=NonAddressConstArg lengthWord, destination=MemoryArg {offset= ~ (Word.toInt wordSize), base=output, index=NoIndex}, 534 moveSize=opSizeToMove polyWordOpSize}, 535 AllocStore{size=size, output=output, saveRegs=preserve}] 536 end 537 538 val allocCode = 539 (* If we need to add the profile object *) 540 if addAllocatingFunction 541 then 542 allocStore {size=size+1, flags=Word8.orb(flags, Address.F_profile), output=toReg, preserve=preserve} @ 543 [Move{moveSize=opSizeToMove polyWordOpSize, source=AddressConstArg profileObject, 544 destination=MemoryArg {base=toReg, offset=size*Word.toInt wordSize, index=NoIndex}}] 545 else allocStore {size=size, flags=flags, output=toReg, preserve=preserve} 546 547 (* Convert to an object index if necessary. *) 548 val convertToObjId = 549 if targetArch = ObjectId32Bit 550 then [ ShiftConstant{ shiftType=SHR, output=toReg, shift=0w2, opSize=OpSize64 }, 551 ArithToGenReg{ opc=SUB, output=toReg, source=RegisterArg ebx, opSize=nativeWordOpSize } ] 552 else [] 553 in 554 convertToObjId @ allocCode @ code 555 end 556 557 (* Check the stack limit "register". This is used both at the start of a function for genuine 558 stack checking but also in a loop to check for an interrupt. We need to save the registers 559 even across an interrupt because it can be used if another thread wants a GC. *) 560 fun testRegAndTrap(reg, entryPt, saveRegs) = 561 let 562 (* Normally we won't have a stack overflow so we will skip the check. *) 563 val skipCheckLab = makeLabel() 564 in 565 (* Need it in reverse order. *) 566 [ 567 JumpLabel skipCheckLab, 568 CallRTS{rtsEntry=entryPt, saveRegs=saveRegs}, 569 ConditionalBranch{test=JNB, label=skipCheckLab}, 570 ArithToGenReg{ opc=CMP, output=reg, source=MemoryArg{offset=memRegStackLimit, base=ebp, index=NoIndex}, opSize=nativeWordOpSize } 571 ] 572 end 573 574 local 575 val numRegisters = Vector.length allocatedRegisters 576 val uses = Array.array(numRegisters, false) 577 fun used(PReg r) = Array.update(uses, r, true) 578 fun isUsed(PReg r) = Array.sub(uses, r) 579 580 (* Set the registers used by the sources. This differs from getInstructionState in that we don't set 581 the base register of a memory location to "used" if we can use the cache. *) 582 fun argUses(RegisterArgument rarg) = used rarg 583 | argUses(MemoryLocation { cache=SOME cr, ...}) = used cr 584 | argUses(MemoryLocation { base, index, cache=NONE, ...}) = (used base; indexUses index) 585 | argUses(StackLocation { cache=SOME rarg, ...}) = used rarg 586 | argUses _ = () 587 588 and indexUses NoMemIndex = () 589 | indexUses(MemIndex1 arg) = used arg 590 | indexUses(MemIndex2 arg) = used arg 591 | indexUses(MemIndex4 arg) = used arg 592 | indexUses(MemIndex8 arg) = used arg 593 | indexUses ObjectIndex = () 594 595 (* LoadArgument, TagValue, CopyToCache, UntagValue and BoxValue are eliminated if their destination 596 is not used. In that case their source are not used either. *) 597 fun instructionUses(LoadArgument { source, dest, ...}) = if isUsed dest then argUses source else () 598 | instructionUses(StoreArgument{ source, base, index, ...}) = (argUses source; used base; indexUses index) 599 | instructionUses(LoadMemReg _) = () 600 | instructionUses(StoreMemReg {source, ...}) = used source 601 | instructionUses(BeginFunction _) = () 602 | instructionUses(FunctionCall{regArgs, stackArgs, ...}) = (List.app(argUses o #1) regArgs; List.app argUses stackArgs) 603 | instructionUses(TailRecursiveCall{regArgs, stackArgs, ...}) = (List.app(argUses o #1) regArgs; List.app(argUses o #src) stackArgs) 604 | instructionUses(AllocateMemoryOperation _) = () 605 | instructionUses(AllocateMemoryVariable{size, ...}) = used size 606 | instructionUses(InitialiseMem{size, addr, init}) = (used size; used addr; used init) 607 | instructionUses(InitialisationComplete) = () 608 | instructionUses(BeginLoop) = () 609 | instructionUses(JumpLoop{regArgs, stackArgs, ...}) = (List.app(argUses o #1) regArgs; List.app(argUses o #1) stackArgs) 610 | instructionUses(RaiseExceptionPacket{packetReg}) = used packetReg 611 | instructionUses(ReserveContainer _) = () 612 | instructionUses(IndexedCaseOperation{testReg, ...}) = used testReg 613 | instructionUses(LockMutable{addr}) = used addr 614 | instructionUses(WordComparison{arg1, arg2, ...}) = (used arg1; argUses arg2) 615 | instructionUses(CompareLiteral{arg1, ...}) = argUses arg1 616 | instructionUses(CompareByteMem{arg1={base, index, ...}, ...}) = (used base; indexUses index) 617 | instructionUses(PushExceptionHandler _) = () 618 | instructionUses(PopExceptionHandler _) = () 619 | instructionUses(BeginHandler _) = () 620 | instructionUses(ReturnResultFromFunction{resultReg, ...}) = used resultReg 621 | instructionUses(ArithmeticFunction{operand1, operand2, ...}) = (used operand1; argUses operand2) 622 | instructionUses(TestTagBit{arg, ...}) = argUses arg 623 | instructionUses(PushValue {arg, ...}) = argUses arg 624 | instructionUses(CopyToCache{source, dest, ...}) = if isUsed dest then used source else () 625 | instructionUses(ResetStackPtr _) = () 626 | instructionUses(StoreToStack {source, ...}) = argUses source 627 | instructionUses(TagValue{source, dest, ...}) = if isUsed dest then used source else () 628 | instructionUses(UntagValue{dest, cache=SOME cacheR, ...}) = if isUsed dest then used cacheR else () 629 | instructionUses(UntagValue{source, dest, cache=NONE, ...}) = if isUsed dest then used source else () 630 | instructionUses(LoadEffectiveAddress{base, index, ...}) = (case base of SOME bReg => used bReg | NONE => (); indexUses index) 631 | instructionUses(ShiftOperation{operand, shiftAmount, ...}) = (used operand; argUses shiftAmount) 632 | instructionUses(Multiplication{operand1, operand2, ...}) = (used operand1; argUses operand2) 633 | instructionUses(Division{dividend, divisor, ...}) = (used dividend; argUses divisor) 634 | instructionUses(AtomicExchangeAndAdd{base, source}) = (used base; used source) 635 | instructionUses(BoxValue{source, dest, ...}) = if isUsed dest then used source else () 636 | instructionUses(CompareByteVectors{vec1Addr, vec2Addr, length, ...}) = (used vec1Addr; used vec2Addr; used length) 637 | instructionUses(BlockMove{srcAddr, destAddr, length, ...}) = (used srcAddr; used destAddr; used length) 638 | instructionUses(X87Compare{arg1, arg2, ...}) = (used arg1; argUses arg2) 639 | instructionUses(SSE2Compare{arg1, arg2, ...}) = (used arg1; argUses arg2) 640 | instructionUses(X87FPGetCondition _) = () 641 | instructionUses(X87FPArith{arg1, arg2, ...}) = (used arg1; argUses arg2) 642 | instructionUses(X87FPUnaryOps{source, ...}) = used source 643 | instructionUses(X87Float{source, ...}) = argUses source 644 | instructionUses(SSE2Float{source, ...}) = argUses source 645 | instructionUses(SSE2FPUnary{source, ...}) = argUses source 646 | instructionUses(SSE2FPBinary{arg1, arg2, ...}) = (used arg1; argUses arg2) 647 | instructionUses(TagFloat{source, dest, ...}) = if isUsed dest then used source else () 648 | instructionUses(UntagFloat{dest, cache=SOME cacheR, ...}) = if isUsed dest then used cacheR else () 649 | instructionUses(UntagFloat{source, dest, cache=NONE, ...}) = if isUsed dest then argUses source else () 650 | instructionUses(GetSSE2ControlReg _) = () 651 | instructionUses(SetSSE2ControlReg{source}) = used source 652 | instructionUses(GetX87ControlReg _) = () 653 | instructionUses(SetX87ControlReg{source}) = used source 654 | instructionUses(X87RealToInt{source, ...}) = used source 655 | instructionUses(SSE2RealToInt{source, ...}) = argUses source 656 | instructionUses(SignExtend32To64{source, dest}) = if isUsed dest then argUses source else () 657 | instructionUses(TouchArgument{source}) = used source 658 659 (* Depth-first scan. *) 660 val visited = Array.array(numBlocks, false) 661 662 fun processBlocks blockNo = 663 if Array.sub(visited, blockNo) 664 then () (* Done or currently being done. *) 665 else 666 let 667 val () = Array.update(visited, blockNo, true) 668 val ExtendedBasicBlock { flow, block,...} = Vector.sub(blocks, blockNo) 669 val () = 670 (* Process the dependencies first. *) 671 case flow of 672 ExitCode => () 673 | Unconditional m => processBlocks m 674 | Conditional {trueJump, falseJump, ...} => 675 (processBlocks trueJump; processBlocks falseJump) 676 | IndexedBr cases => List.app processBlocks cases 677 | SetHandler{ handler, continue } => 678 (processBlocks handler; processBlocks continue) 679 | UnconditionalHandle _ => () 680 | ConditionalHandle { continue, ...} => processBlocks continue 681 (* Now this block. *) 682 in 683 List.foldr(fn ({instr, ...}, ()) => instructionUses instr) () block 684 end 685 686 in 687 val () = processBlocks 0 688 val isUsed = isUsed 689 end 690 691 (* Return the register part of a cached item. *) 692 fun decache(StackLocation{cache=SOME r, ...}) = RegisterArgument r 693 | decache(MemoryLocation{cache=SOME r, ...}) = RegisterArgument r 694 | decache arg = arg 695 696 (* Only get the registers that are actually used. *) 697 val getSaveRegs = List.mapPartial(fn (reg as PReg r) => if isUsed reg then SOME(getAllocatedGenReg r) else NONE) 698 699 fun codeExtended _ ({instr=LoadArgument{source, dest as PReg dreg, kind}, ...}, code) = 700 if not (isUsed dest) 701 then code 702 else 703 let 704 val realDestReg = getAllocatedReg dreg 705 in 706 case source of 707 RegisterArgument(PReg sreg) => 708 (* Register to register move. Try to use the same register for the source as the destination 709 to eliminate the instruction. *) 710 (* If the source is the same as the destination we don't need to do anything. *) 711 moveIfNecessary({src=getAllocatedReg sreg, dst=realDestReg, kind=kind}, code) 712 713 | MemoryLocation{cache=SOME(PReg sreg), ...} => 714 (* This is also a register to register move but because the original load is from 715 memory it could be a byte or short precision value. *) 716 let 717 val moveKind = 718 case kind of 719 Move64Bit => Move64Bit 720 | MoveByte => Move32Bit 721 | Move16Bit => Move32Bit 722 | Move32Bit => Move32Bit 723 | MoveFloat => MoveFloat 724 | MoveDouble => MoveDouble 725 in 726 moveIfNecessary({src=getAllocatedReg sreg, dst=realDestReg, kind=moveKind}, code) 727 end 728 729 (* TODO: Isn't this covered by codeExtArgument? It looks like it was added in the 730 32-in-64 changes. *) 731 | StackLocation{cache=SOME(PReg sreg), ...} => 732 moveIfNecessary({src=getAllocatedReg sreg, dst=realDestReg, kind=kind}, code) 733 734 | source as StackLocation _ => (* Always use native loads from the stack. *) 735 llLoadArgument({source=codeExtArgument source, dest=realDestReg, kind=moveNativeWord}, code) 736 737 | source => (* Loads of constants or from an address. *) 738 llLoadArgument({source=codeExtArgument source, dest=realDestReg, kind=kind}, code) 739 end 740 741 | codeExtended _ ({instr=StoreArgument{ source, base=PReg bReg, offset, index, kind, ... }, ...}, code) = 742 let 743 val (baseReg, indexVal) = 744 case index of 745 ObjectIndex => (ebx, Index4(getAllocatedGenReg bReg)) 746 | _ => (getAllocatedGenReg bReg, codeExtIndex index) 747 in 748 case (decache source, kind) of 749 (RegisterArgument(PReg sReg), MoveByte) => 750 if targetArch <> Native32Bit 751 then 752 llStoreArgument{ 753 source=codeExtArgument source, base=baseReg, offset=offset, index=indexVal, kind=MoveByte} :: code 754 else 755 (* This is complicated on X86/32. We can't use edi or esi for the store registers. Instead 756 we reserve ecx (see special case in "identify") and use that if we have to. *) 757 let 758 val realStoreReg = getAllocatedReg sReg 759 val (moveCode, storeReg) = 760 if realStoreReg = GenReg edi orelse realStoreReg = GenReg esi 761 then (moveIfNecessary({src=realStoreReg, dst=GenReg ecx, kind=moveNativeWord}, code), GenReg ecx) 762 else (code, realStoreReg) 763 in 764 llStoreArgument{ 765 source=RegisterArg storeReg, base=baseReg, offset=offset, index=indexVal, kind=MoveByte} :: 766 moveCode 767 end 768 769 | _ => 770 llStoreArgument{ 771 source=codeExtArgument source, base=baseReg, offset=offset, index=indexVal, kind=kind} :: code 772 end 773 774 | codeExtended _ ({instr=LoadMemReg { offset, dest=PReg pr, kind}, ...}, code) = 775 (* Load from the "memory registers" pointed at by rbp. *) 776 llLoadArgument({source=MemoryArg{base=rbp, offset=offset, index=NoIndex}, dest=getAllocatedReg pr, kind=kind}, code) 777 778 | codeExtended _ ({instr=StoreMemReg { offset, source=PReg pr, kind}, ...}, code) = 779 (* Store into the "memory register". *) 780 llStoreArgument{ 781 source=RegisterArg(getAllocatedReg pr), base=rbp, offset=offset, index=NoIndex, kind=kind} :: 782 code 783 784 | codeExtended _ ({instr=BeginFunction{regArgs, ...}, ...}, code) = 785 let 786 val minStackCheck = 20 787 val saveRegs = List.mapPartial(fn (_, GenReg r) => SOME r | _ => NONE) regArgs 788 val preludeCode = 789 if stackRequired >= minStackCheck 790 then 791 let 792 (* Compute the necessary amount in edi and compare that. *) 793 val stackByteAdjust = ~ (Word.toInt nativeWordSize) * stackRequired 794 val testEdiCode = 795 testRegAndTrap (edi, StackOverflowCallEx, saveRegs) 796 in 797 (* N.B. In reverse order. *) 798 testEdiCode @ [LoadAddress{output=edi, base=SOME esp, index=NoIndex, offset=stackByteAdjust, opSize=nativeWordOpSize}] 799 end 800 801 else testRegAndTrap (esp, StackOverflowCall, saveRegs) 802 803 val usedRegs = List.filter (isUsed o #1) regArgs 804 fun mkPair(PReg pr, rr) = {src=rr,dst=getAllocatedReg pr} 805 val regPairs = List.map mkPair usedRegs 806 in 807 moveMultipleRegisters(regPairs, preludeCode @ code) 808 end 809 810 | codeExtended _ ({instr=TailRecursiveCall{callKind, regArgs=oRegArgs, stackArgs=oStackArgs, stackAdjust, currStackSize, workReg=PReg wReg}, ...}, code) = 811 let 812 val regArgs = List.map (fn (arg, reg) => (decache arg, reg)) oRegArgs 813 and stackArgs = List.map(fn {src, stack } => {src=decache src, stack=stack}) oStackArgs 814 val workReg = getAllocatedReg wReg 815 816 (* We must leave stack entries as stack entries for the moment. *) 817 fun codeArg(StackLocation{wordOffset, cache=NONE, ...}) = StackSource wordOffset 818 | codeArg arg = OtherSource(codeExtArgument arg) 819 820 val extStackArgs = map (fn {stack, src} => {dst=StackDest(stack+currStackSize), src=codeArg src}) stackArgs 821 val extRegArgs = map (fn (a, r) => {src=codeArg a, dst=RegDest r}) regArgs 822 823 (* Tail recursive calls are complicated because we generally have to overwrite the existing stack. 824 That means storing the arguments in the right order to avoid overwriting a 825 value that we are using for a different argument. *) 826 fun codeTailCall(arguments: {dst: destinations, src: llsource} list, stackAdjust, code) = 827 if stackAdjust < 0 828 then 829 let 830 (* If the function we're calling takes more arguments on the stack than the 831 current function we will have to extend the stack. Do that by pushing the 832 argument whose offset is at -1. Then adjust all the offsets and repeat. *) 833 val {src=argM1, ...} = valOf(List.find(fn {dst=StackDest ~1, ...} => true | _ => false) arguments) 834 fun renumberArgs [] = [] 835 | renumberArgs ({dst=StackDest ~1, ...} :: args) = renumberArgs args (* Remove the one we've done. *) 836 | renumberArgs ({dst, src} :: args) = 837 let 838 val newDest = case dst of StackDest d => StackDest(d+1) | regDest => regDest 839 val newSrc = 840 case src of 841 StackSource wordOffset => StackSource(wordOffset+1) 842 | other => other 843 in 844 {dst=newDest, src=newSrc} :: renumberArgs args 845 end 846 in 847 codeTailCall(renumberArgs arguments, stackAdjust+1, 848 PushToStack(sourceAsGenRegOrMem(sourceToX86Code argM1)) :: code) 849 end 850 else 851 let 852 val loadArgs = moveMultipleValues(arguments, SOME workReg, code) 853 in 854 if stackAdjust = 0 855 then loadArgs 856 else ResetStack{numWords=stackAdjust, preserveCC=false} :: loadArgs 857 end 858 in 859 JumpAddress(codeCallKind callKind) :: 860 codeTailCall(extStackArgs @ extRegArgs, stackAdjust+currStackSize, code) 861 end 862 863 | codeExtended _ ({instr=FunctionCall{callKind, regArgs=oRegArgs, stackArgs=oStackArgs, dest=PReg dReg, realDest, saveRegs}, ...}, code) = 864 let 865 val regArgs = List.map (fn (arg, reg) => (decache arg, reg)) oRegArgs 866 and stackArgs = List.map decache oStackArgs 867 868 val destReg = getAllocatedReg dReg 869 870 871 fun pushStackArgs ([], _, code) = code 872 873 | pushStackArgs (ContainerAddr {stackOffset, ...} ::args, argNum, code) = 874 let 875 val adjustedAddr = stackOffset+argNum 876 (* If there is an offset relative to rsp we need to add this in. *) 877 val addOffset = 878 if adjustedAddr = 0 879 then [] 880 else [ArithMemConst{opc=ADD, address={offset=0, base=esp, index=NoIndex}, 881 source=LargeInt.fromInt(adjustedAddr*Word.toInt nativeWordSize), opSize=nativeWordOpSize}] 882 in 883 pushStackArgs(args, argNum+1, addOffset @ PushToStack(RegisterArg esp) :: code) 884 end 885 886 | pushStackArgs (StackLocation {wordOffset, container, field, ...} ::args, argNum, code) = 887 let 888 (* Have to adjust the offsets of stack arguments. *) 889 val adjusted = 890 StackLocation{wordOffset=wordOffset+argNum, container=container, field=field+argNum, 891 cache=NONE} 892 in 893 pushStackArgs(args, argNum+1, PushToStack(codeExtArgumentAsGenReg adjusted) :: code) 894 end 895 896 | pushStackArgs (arg::args, argNum, code) = 897 pushStackArgs(args, argNum+1, PushToStack(codeExtArgumentAsGenReg arg) :: code) 898 899 val pushedArgs = pushStackArgs(stackArgs, 0, code (* Initial code *)) 900 (* We have to adjust any stack offset to account for the arguments we've pushed. *) 901 val numStackArgs = List.length stackArgs 902 903 (* We don't currently allow the arguments to be memory locations and instead 904 force them into registers. That may be simpler especially if we can get the 905 values directly into the required register. *) 906 fun getRegArgs(RegisterArgument(PReg pr), reg) = 907 SOME{dst=reg, src=getAllocatedReg pr} 908 | getRegArgs(StackLocation {cache=SOME(PReg pr), ...}, reg) = 909 SOME{dst=reg, src=getAllocatedReg pr} 910 | getRegArgs(MemoryLocation _, _) = raise InternalError "FunctionCall - MemoryLocation" 911 | getRegArgs _ = NONE 912 913 val loadRegArgs = 914 moveMultipleRegisters(List.mapPartial getRegArgs regArgs, pushedArgs) 915 916 (* These are all items we can load without requiring a source register. 917 That includes loading from the stack. *) 918 fun getConstArgs((AddressConstant m, reg), code) = 919 llLoadArgument({source=AddressConstArg m, dest=reg, kind=movePolyWord}, code) 920 | getConstArgs((IntegerConstant i, reg), code) = 921 llLoadArgument({source=NonAddressConstArg i, dest=reg, kind=movePolyWord}, code) 922 | getConstArgs((StackLocation { cache=SOME _, ...}, _), code) = code 923 | getConstArgs((StackLocation { wordOffset, ...}, reg), code) = 924 llLoadArgument({source=MemoryArg{offset=(wordOffset+numStackArgs)*Word.toInt nativeWordSize, base=esp, index=NoIndex}, 925 dest=reg, kind=moveNativeWord}, code) 926 | getConstArgs((ContainerAddr {stackOffset, ...}, reg), code) = 927 if stackOffset+numStackArgs = 0 928 then llLoadArgument({source=RegisterArg(GenReg esp), dest=reg, kind=moveNativeWord}, code) 929 else LoadAddress{ output=asGenReg reg, offset=(stackOffset+numStackArgs)*Word.toInt nativeWordSize, base=SOME esp, 930 index=NoIndex, opSize=nativeWordOpSize } :: code 931 | getConstArgs((RegisterArgument _, _), code) = code 932 | getConstArgs((MemoryLocation _, _), code) = code 933 val loadConstArgs = List.foldl getConstArgs loadRegArgs regArgs 934 935 (* Push the registers before the call and pop them afterwards. *) 936 fun makeSaves([], code) = CallAddress(codeCallKind callKind) :: code 937 | makeSaves(PReg reg::regs, code) = 938 let 939 val areg = getAllocatedGenReg reg 940 val _ = areg = eax andalso raise InternalError "codeExtended: eax in save regs" 941 val _ = if List.exists(fn (_, r) => r = GenReg areg) regArgs then raise InternalError "codeExtended: arg reg in save regs" else () 942 in 943 PopR areg :: makeSaves(regs, PushToStack(RegisterArg areg) :: code) 944 end 945 946 in 947 moveIfNecessary({dst=destReg, src=realDest, kind=case realDest of GenReg _ => moveNativeWord | _ => MoveDouble}, 948 makeSaves(saveRegs, loadConstArgs)) 949 end 950 951 | codeExtended _ ({instr=AllocateMemoryOperation{ size, flags, dest=PReg dReg, saveRegs}, ...}, code) = 952 let 953 val preserve = getSaveRegs saveRegs 954 in 955 llAllocateMemoryOperation({ size=size, flags=flags, dest=getAllocatedReg dReg, saveRegs=preserve}, code) 956 end 957 958 | codeExtended _ ({instr=AllocateMemoryVariable{size=PReg size, dest=PReg dest, saveRegs}, ...}, code) = 959 let 960 (* Simple case - no initialiser. *) 961 val saveRegs = getSaveRegs saveRegs 962 val sReg = getAllocatedGenReg size and dReg = getAllocatedGenReg dest 963 val _ = sReg <> dReg 964 orelse raise InternalError "codeGenICode-AllocateMemoryVariable" 965 966 val allocCode = 967 [ 968 (* Store it as the length field. *) 969 Move{source=RegisterArg sReg, moveSize=opSizeToMove polyWordOpSize, 970 destination=MemoryArg {base=dReg, offset= ~ (Word.toInt wordSize), index=NoIndex}}, 971 (* Untag the length *) 972 ShiftConstant{ shiftType=SHR, output=sReg, shift=0w1, opSize=polyWordOpSize}, 973 (* Allocate the memory *) 974 AllocStoreVariable{ size=sReg, output=dReg, saveRegs=saveRegs} 975 ] 976 (* Convert to an object index if necessary. *) 977 val convertToObjId = 978 if targetArch = ObjectId32Bit 979 then [ ShiftConstant{ shiftType=SHR, output=dReg, shift=0w2, opSize=OpSize64 }, 980 ArithToGenReg{ opc=SUB, output=dReg, source=RegisterArg ebx, opSize=nativeWordOpSize } ] 981 else [] 982 in 983 convertToObjId @ allocCode @ code 984 end 985 986 | codeExtended _ ({instr=InitialiseMem{size=PReg sReg, addr=PReg aReg, init=PReg iReg}, ...}, code) = 987 (* We are going to use rep stosl/q to set the memory. 988 That requires the length to be in ecx, the initialiser to be in eax and 989 the destination to be edi. *) 990 RepeatOperation (if polyWordOpSize = OpSize64 then STOS64 else STOS32):: 991 moveIfNecessary({src=getAllocatedReg iReg, dst=GenReg eax, kind=moveNativeWord}, 992 moveIfNecessary({src=getAllocatedReg aReg, dst=GenReg edi, kind=moveNativeWord}, 993 moveIfNecessary({src=getAllocatedReg sReg, dst=GenReg ecx, kind=moveNativeWord}, code))) 994 995 | codeExtended _ ({instr=InitialisationComplete, ...}, code) = StoreInitialised :: code 996 997 | codeExtended _ ({instr=BeginLoop, ...}, code) = code 998 999 | codeExtended _ ({instr=JumpLoop{regArgs, stackArgs, checkInterrupt, workReg}, ...}, code) = 1000 let 1001 val workReg = Option.map (fn PReg r => getAllocatedReg r) workReg 1002 (* TODO: Make the sources and destinations "friends". *) 1003 (* We must leave stack entries as stack entries for the moment as with TailCall. *) 1004 fun codeArg(StackLocation{wordOffset, ...}) = StackSource wordOffset 1005 | codeArg arg = OtherSource(codeExtArgument arg) 1006 val extStackArgs = map (fn (src, stack, _) => {dst=StackDest stack, src=codeArg src}) stackArgs 1007 val extRegArgs = map (fn (a, PReg r) => {src=codeArg a, dst=RegDest(getAllocatedReg r)}) regArgs 1008 val checkCode = 1009 case checkInterrupt of 1010 NONE => [] 1011 | SOME saveRegs => testRegAndTrap (esp, StackOverflowCall, getSaveRegs saveRegs) 1012 in 1013 checkCode @ moveMultipleValues(extStackArgs @ extRegArgs, workReg, code) 1014 end 1015 1016 | codeExtended _ ({instr=RaiseExceptionPacket{ packetReg=PReg preg }, ...}, code) = 1017 (* We need a work register here. It can be any register other than eax since 1018 we don't preserve registers across calls. *) 1019 RaiseException { workReg=ecx } :: moveIfNecessary({src=getAllocatedReg preg, dst=GenReg eax, kind=moveNativeWord}, code) 1020 1021 | codeExtended _ ({instr=ReserveContainer{size, ...}, ...}, code) = 1022 (* The memory must be cleared in case we have a GC. *) 1023 List.tabulate(size, fn _ => PushToStack(NonAddressConstArg(tag 0))) @ code 1024 1025 | codeExtended {flow} ({instr=IndexedCaseOperation{testReg=PReg tReg, workReg=PReg wReg}, ...}, code) = 1026 let 1027 val testReg = getAllocatedReg tReg 1028 val workReg = getAllocatedReg wReg 1029 val _ = testReg <> workReg orelse raise InternalError "IndexedCaseOperation - same registers" 1030 val rReg = asGenReg testReg and wReg = asGenReg workReg 1031 val _ = rReg <> wReg orelse raise InternalError "IndexedCaseOperation - same registers" 1032 (* This should only be within a block with an IndexedBr flow type. *) 1033 val cases = 1034 case flow of IndexedBr cases => cases | _ => raise InternalError "codeGenICode: IndexedCaseOperation" 1035 val caseLabels = map getBlockLabel cases 1036 val startJumpTable = makeLabel() 1037 (* Compute the jump address. The index is a tagged 1038 integer so it is already multiplied by 2. We need to 1039 multiply by four to get the correct size. Subtract off the 1040 shifted tag. *) 1041 val jumpSize = ref JumpSize8 1042 in 1043 JumpTable{cases=caseLabels, jumpSize=jumpSize} :: JumpLabel startJumpTable :: JumpAddress(RegisterArg wReg) :: 1044 IndexedJumpCalc{ addrReg=wReg, indexReg=rReg, jumpSize=jumpSize } :: 1045 LoadLabelAddress{label=startJumpTable, output=wReg} :: code 1046 end 1047 1048 | codeExtended _ ({instr=LockMutable{addr=PReg pr}, ...}, code) = 1049 let 1050 val (bReg, index) = 1051 if targetArch = ObjectId32Bit 1052 then (ebx, Index4(asGenReg(getAllocatedReg pr))) 1053 else (asGenReg(getAllocatedReg pr), NoIndex) 1054 in 1055 (* Mask off the mutable bit. *) 1056 ArithByteMemConst{opc=AND, address={base=bReg, offset= ~1, index=index}, source=0wxff - F_mutable} :: code 1057 end 1058 1059 | codeExtended _ ({instr=WordComparison{ arg1=PReg pr, arg2, opSize, ... }, ...}, code) = 1060 ArithToGenReg {opc=CMP, output=asGenReg(getAllocatedReg pr), source=codeExtArgumentAsGenReg arg2, opSize=opSize} :: code 1061 1062 | codeExtended _ ({instr=CompareLiteral{ arg1, arg2, opSize, ... }, ...}, code) = 1063 ( 1064 case decache arg1 of (* N.B. We MUST decache since we're assuming that the base reg is not used. *) 1065 RegisterArgument(PReg pr) => 1066 ArithToGenReg {opc=CMP, output=asGenReg(getAllocatedReg pr), source=NonAddressConstArg arg2, opSize=opSize} :: code 1067 | MemoryLocation{base=PReg br, offset, index=ObjectIndex, ...} => 1068 ArithMemConst{ opc=CMP, 1069 address={offset=offset, base=ebx, index=Index4(asGenReg(getAllocatedReg br))}, source=arg2, opSize=opSize } :: code 1070 | MemoryLocation{base=PReg br, index, offset, ...} => 1071 ArithMemConst{ opc=CMP, 1072 address={offset=offset, base=asGenReg(getAllocatedReg br), index=codeExtIndex index}, source=arg2, opSize=opSize } :: code 1073 | StackLocation{wordOffset, ...} => 1074 ArithMemConst{ opc=CMP, address={offset=wordOffset*Word.toInt nativeWordSize, base=esp, index=NoIndex}, source=arg2, opSize=opSize } :: code 1075 | _ => raise InternalError "CompareLiteral" 1076 ) 1077 1078 | codeExtended _ ({instr=CompareByteMem{ arg1={base=PReg br, offset, index}, arg2, ... }, ...}, code) = 1079 let 1080 val (bReg, index) = 1081 case index of 1082 ObjectIndex => (ebx, Index4(asGenReg(getAllocatedReg br))) 1083 | _ => (asGenReg(getAllocatedReg br), codeExtIndex index) 1084 in 1085 ArithByteMemConst{ opc=CMP, address={offset=offset, base=bReg, index=index}, source=arg2 } :: code 1086 end 1087 1088 (* Set up an exception handler. *) 1089 | codeExtended {flow} ({instr=PushExceptionHandler{workReg=PReg hReg}, ...}, code) = 1090 let (* Set up an exception handler. *) 1091 val workReg=getAllocatedReg hReg 1092 (* Although we're pushing this to the stack we need to use LEA on the 1093 X86/64 and some arithmetic on the X86/32. We need a work reg for that. *) 1094 val handleReg = asGenReg workReg 1095 (* This should only be within a block with a SetHandler flow type. *) 1096 val handleLabel = 1097 case flow of 1098 SetHandler{ handler, ...} => handler 1099 | _ => raise InternalError "codeGenICode: PushExceptionHandler" 1100 val labelRef = getBlockLabel handleLabel 1101 (* Set up the handler by pushing the old handler to the stack, pushing the 1102 entry point and setting the handler address to the current stack pointer. *) 1103 in 1104 ( 1105 Move{source=RegisterArg esp, destination=MemoryArg {offset=memRegHandlerRegister, base=ebp, index=NoIndex}, 1106 moveSize=opSizeToMove nativeWordOpSize} :: 1107 PushToStack(RegisterArg handleReg) :: 1108 LoadLabelAddress{ label=labelRef, output=handleReg} :: 1109 PushToStack(MemoryArg{base=ebp, offset=memRegHandlerRegister, index=NoIndex}) :: code) 1110 end 1111 1112 (* Pop an exception handler at the end of a handled section. Executed if no exception has been raised. 1113 This removes items from the stack. *) 1114 | codeExtended _ ({instr=PopExceptionHandler{workReg=PReg wReg, ...}, ...}, code) = 1115 let 1116 val workReg = getAllocatedReg wReg 1117 val wReg = asGenReg workReg 1118 in 1119 (* The stack pointer has been adjusted to just above the two words that were stored 1120 in PushExceptionHandler. *) 1121 ( 1122 Move{source=RegisterArg wReg, destination=MemoryArg {offset=memRegHandlerRegister, base=ebp, index=NoIndex}, 1123 moveSize=opSizeToMove nativeWordOpSize} :: 1124 PopR wReg :: 1125 ResetStack{numWords=1, preserveCC=false} :: code) 1126 end 1127 1128 (* Start of a handler. Sets the address associated with PushExceptionHandler and 1129 provides a register for the packet.*) 1130 | codeExtended _ ({instr=BeginHandler{packetReg=PReg pReg, workReg=PReg wReg}, ...}, code) = 1131 let 1132 (* The exception packet is in rax. *) 1133 val realPktReg = getAllocatedReg pReg 1134 val realWorkreg = getAllocatedGenReg wReg 1135 (* The code here is almost the same as PopExceptionHandler. The only real difference 1136 is that PopExceptionHandler needs to pass the result of executing the handled code 1137 which could be in any register. This code needs to transmit the exception packet 1138 and that is always in rax. *) 1139 val beginHandleCode = 1140 Move{source=RegisterArg realWorkreg, destination=MemoryArg {offset=memRegHandlerRegister, base=ebp, index=NoIndex}, 1141 moveSize=opSizeToMove nativeWordOpSize} :: 1142 PopR realWorkreg :: ResetStack{numWords=1, preserveCC=false} :: 1143 Move{ source=MemoryArg{base=ebp, offset=memRegHandlerRegister, index=NoIndex}, 1144 destination=RegisterArg esp, moveSize=opSizeToMove nativeWordOpSize } :: code 1145 in 1146 moveIfNecessary({src=GenReg eax, dst=realPktReg, kind=moveNativeWord }, beginHandleCode) 1147 end 1148 1149 | codeExtended _ ({instr=ReturnResultFromFunction { resultReg=PReg resReg, realReg, numStackArgs }, ...}, code) = 1150 let 1151 val resultReg = getAllocatedReg resReg 1152 (* If for some reason it's not in the right register we have to move it there. *) 1153 in 1154 ReturnFromFunction numStackArgs :: moveIfNecessary({src=resultReg, dst=realReg, kind=moveNativeWord}, code) 1155 end 1156 1157 | codeExtended _ ({instr=ArithmeticFunction{oper=SUB, resultReg=PReg resReg, operand1=PReg op1Reg, 1158 operand2, opSize, ...}, ...}, code) = 1159 (* Subtraction - this is special because it can only be done one way round. The first argument must 1160 be in a register. *) 1161 let 1162 val realDestReg = getAllocatedReg resReg 1163 val realOp1Reg = getAllocatedReg op1Reg 1164 in 1165 ArithToGenReg { opc=SUB, output=asGenReg realDestReg, source=codeExtArgumentAsGenReg operand2, opSize=opSize } :: 1166 moveIfNecessary({src=realOp1Reg, dst=realDestReg, kind=opSizeToIMove opSize}, code) 1167 end 1168 1169 | codeExtended _ ({instr=ArithmeticFunction{oper, resultReg=PReg resReg, operand1=PReg op1Reg, operand2, opSize, ...}, ...}, code) = 1170 ( 1171 case decache operand2 of 1172 RegisterArgument(PReg op2Reg) => 1173 (* Arithmetic operation with both arguments as registers. These operations are all symmetric so 1174 we can try to put either argument into the result reg and then do the operation on the other arg. *) 1175 let 1176 val realDestReg = getAllocatedGenReg resReg 1177 val realOp1Reg = getAllocatedGenReg op1Reg 1178 and realOp2Reg = getAllocatedGenReg op2Reg 1179 val (operandReg, moveInstr) = 1180 if realOp1Reg = realDestReg 1181 then (realOp2Reg, code) 1182 else if realOp2Reg = realDestReg 1183 then (realOp1Reg, code) 1184 else (realOp2Reg, Move{source=RegisterArg realOp1Reg, destination=RegisterArg realDestReg, moveSize=opSizeToMove opSize} :: code) 1185 in 1186 ArithToGenReg { opc=oper, output=realDestReg, source=RegisterArg operandReg, opSize=opSize } :: moveInstr 1187 end 1188 1189 | operand2 => 1190 (* Arithmetic operation with the first argument in a register and the second a constant or memory location. *) 1191 let 1192 val realDestReg = getAllocatedReg resReg 1193 val realOp1Reg = getAllocatedReg op1Reg 1194 val op2Arg = codeExtArgumentAsGenReg operand2 1195 (* If we couldn't put it in the result register we have to copy it there. *) 1196 in 1197 ArithToGenReg { opc=oper, output=asGenReg realDestReg, source=op2Arg, opSize=opSize } :: 1198 moveIfNecessary({src=realOp1Reg, dst=realDestReg, kind=opSizeToIMove opSize}, code) 1199 end 1200 ) 1201 1202 | codeExtended _ ({instr=TestTagBit{arg, ...}, ...}, code) = 1203 TestByteBits{arg=codeExtArgumentAsGenReg arg, bits=0w1} :: code 1204 1205 | codeExtended _ ({instr=PushValue {arg, ...}, ...}, code) = PushToStack(codeExtArgumentAsGenReg arg) :: code 1206 1207 | codeExtended _ ({instr=CopyToCache{source=PReg sreg, dest as PReg dreg, kind}, ...}, code) = 1208 if not (isUsed dest) 1209 then code 1210 else 1211 let 1212 val realDestReg = getAllocatedReg dreg 1213 (* Get the source register using the current destination as a preference. *) 1214 val realSrcReg = getAllocatedReg sreg 1215 in 1216 (* If the source is the same as the destination we don't need to do anything. *) 1217 moveIfNecessary({src=realSrcReg, dst=realDestReg, kind=kind}, code) 1218 end 1219 1220 | codeExtended _ ({instr=ResetStackPtr {numWords, preserveCC}, ...}, code) = 1221 ( 1222 numWords >= 0 orelse raise InternalError "codeGenICode: ResetStackPtr - negative offset"; 1223 ResetStack{numWords=numWords, preserveCC=preserveCC} :: code 1224 ) 1225 1226 | codeExtended _ ({instr=StoreToStack{ source, stackOffset, ... }, ...}, code) = 1227 llStoreArgument{ 1228 source=codeExtArgument source, base=esp, offset=stackOffset*Word.toInt nativeWordSize, index=NoIndex, kind=moveNativeWord} :: code 1229 1230 | codeExtended _ ({instr=TagValue{source=PReg srcReg, dest as PReg dReg, opSize, ...}, ...}, code) = 1231 if not (isUsed dest) 1232 then code 1233 else 1234 let 1235 val regResult = asGenReg(getAllocatedReg dReg) 1236 val realSReg = asGenReg(getAllocatedReg srcReg) 1237 in 1238 (* N.B. Using LEA with a base register and an index multiplier of 1 is shorter than 1239 using no base register and a multiplier of two. *) 1240 (* TODO: If the value we're tagging is a byte or a 16-bit value we can use OpSize32 and possibly 1241 save the Rex byte. *) 1242 LoadAddress{ output=regResult, offset=1, base=SOME realSReg, index=Index1 realSReg, opSize=opSize } :: code 1243 end 1244 1245 | codeExtended _ ({instr=UntagValue{dest as PReg dReg, cache=SOME(PReg cacheReg), opSize, ...}, ...}, code) = 1246 if not (isUsed dest) 1247 then code 1248 else moveIfNecessary({src=getAllocatedReg cacheReg, dst=getAllocatedReg dReg, kind=opSizeToIMove opSize}, code) 1249 1250 | codeExtended _ ({instr=UntagValue{source=PReg sReg, dest as PReg dReg, isSigned, opSize, ...}, ...}, code) = 1251 if not (isUsed dest) 1252 then code 1253 else 1254 let 1255 val regResult = getAllocatedReg dReg 1256 val realSReg = getAllocatedReg sReg 1257 in 1258 (* For most cases we're going to be using a 32-bit word if this is 32-in-64. The exception 1259 is when converting a word to a signed large-word. *) 1260 ShiftConstant{ shiftType=if isSigned then SAR else SHR, output=asGenReg regResult, shift=0w1, opSize=opSize } :: 1261 moveIfNecessary({src=realSReg, dst=regResult, kind=opSizeToIMove opSize}, code) 1262 end 1263 1264 | codeExtended _ ({instr=LoadEffectiveAddress{base, offset, index=ObjectIndex, dest=PReg dReg, opSize}, ...}, code) = 1265 let 1266 val destReg = asGenReg(getAllocatedReg dReg) 1267 val bReg = 1268 case base of 1269 SOME(PReg br) => asGenReg(getAllocatedReg br) 1270 | NONE => raise InternalError "LoadEffectiveAddress - ObjectIndex but no base" 1271 in 1272 LoadAddress{ output=destReg, offset=offset, base=SOME ebx, index=Index4 bReg, opSize=opSize } :: code 1273 end 1274 1275 | codeExtended _ ({instr=LoadEffectiveAddress{base, offset, index, dest=PReg dReg, opSize}, ...}, code) = 1276 let 1277 val destReg = asGenReg(getAllocatedReg dReg) 1278 val bReg = 1279 case base of 1280 SOME(PReg br) => SOME(asGenReg(getAllocatedReg br)) 1281 | NONE => NONE 1282 val indexR = codeExtIndex index 1283 in 1284 LoadAddress{ output=destReg, offset=offset, base=bReg, index=indexR, opSize=opSize } :: code 1285 end 1286 1287 | codeExtended _ ({instr=ShiftOperation{shift, resultReg=PReg resReg, operand=PReg operReg, shiftAmount=IntegerConstant i, opSize, ...}, ...}, code) = 1288 let 1289 val realDestReg = getAllocatedReg resReg 1290 val realOpReg = getAllocatedReg operReg 1291 in 1292 ShiftConstant{ shiftType=shift, output=asGenReg realDestReg, shift=Word8.fromLargeInt i, opSize=opSize } :: 1293 moveIfNecessary({src=realOpReg, dst=realDestReg, kind=opSizeToIMove opSize}, code) 1294 end 1295 1296 | codeExtended _ ({instr=ShiftOperation{shift, resultReg=PReg resReg, operand=PReg operReg, 1297 shiftAmount=RegisterArgument(PReg shiftReg), opSize, ...}, ...}, code) = 1298 let 1299 val realDestReg = getAllocatedReg resReg 1300 val realShiftReg = getAllocatedReg shiftReg 1301 val realOpReg = getAllocatedReg operReg 1302 (* We want the shift in ecx. We may not have got it there but the register 1303 should be free. The shift is masked to 5 or 6 bits so we have to 1304 check for larger shift values at a higher level.*) 1305 in 1306 ShiftVariable{ shiftType=shift, output=asGenReg realDestReg, opSize=opSize } :: 1307 moveIfNecessary({src=realOpReg, dst=realDestReg, kind=opSizeToIMove opSize}, 1308 moveIfNecessary({src=realShiftReg, dst=GenReg ecx, kind=Move32Bit (* < 64*)}, code)) 1309 end 1310 1311 | codeExtended _ ({instr=ShiftOperation _, ...}, _) = raise InternalError "codeExtended - ShiftOperation" 1312 1313 | codeExtended _ ({instr= 1314 Multiplication{resultReg=PReg resReg, operand1=PReg op1Reg, 1315 operand2, opSize, ...}, ...}, code) = 1316 ( 1317 case decache operand2 of 1318 RegisterArgument(PReg op2Reg) => 1319 let 1320 (* Treat exactly the same as ArithmeticFunction. *) 1321 val realDestReg = getAllocatedGenReg resReg 1322 val realOp1Reg = getAllocatedGenReg op1Reg 1323 and realOp2Reg = getAllocatedGenReg op2Reg 1324 val (operandReg, moveInstr) = 1325 if realOp1Reg = realDestReg 1326 then (realOp2Reg, code) 1327 else if realOp2Reg = realDestReg 1328 then (realOp1Reg, code) 1329 else (realOp2Reg, Move{source=RegisterArg realOp1Reg, destination=RegisterArg realDestReg, moveSize=opSizeToMove opSize} :: code) 1330 in 1331 MultiplyR { source=RegisterArg operandReg, output=realDestReg, opSize=opSize } :: moveInstr 1332 end 1333 | operand2 => 1334 (* Multiply operation with the first argument in a register and the second a constant or memory location. *) 1335 let 1336 val realDestReg = getAllocatedReg resReg 1337 val realOp1Reg = getAllocatedReg op1Reg 1338 val op2Arg = codeExtArgumentAsGenReg operand2 1339 in 1340 MultiplyR { output=asGenReg realDestReg, source=op2Arg, opSize=opSize } :: 1341 moveIfNecessary({src=realOp1Reg, dst=realDestReg, kind=opSizeToIMove opSize}, code) 1342 end 1343 ) 1344 1345 | codeExtended _ ({instr=Division{isSigned, dividend=PReg regDivid, divisor, quotient=PReg regQuot, 1346 remainder=PReg regRem, opSize}, ...}, code) = 1347 let 1348 (* TODO: This currently only supports the dividend in a register. LargeWord division will 1349 generally load the argument from a box so we could support a memory argument for that 1350 case. Word and integer values will always have to be detagged. *) 1351 (* Division is specific as to the registers. The dividend must be eax, quotient is 1352 eax and the remainder is edx. *) 1353 val realDiviReg = getAllocatedReg regDivid 1354 val realQuotReg = getAllocatedReg regQuot 1355 val realRemReg = getAllocatedReg regRem 1356 val divisorArg = codeExtArgument divisor 1357 val divisorReg = argAsGenReg divisorArg 1358 val _ = divisorReg <> eax andalso divisorReg <> edx orelse raise InternalError "codeGenICode: Division" 1359 (* rdx needs to be set to the high order part of the dividend. For signed 1360 division that means sign-extending rdx, for unsigned division we clear it. 1361 We only need a 32-bit clear since the top 32-bits are cleared anyway. *) 1362 val setRDX = 1363 if isSigned then SignExtendForDivide opSize 1364 else ArithToGenReg{ opc=XOR, output=edx, source=RegisterArg edx, opSize=OpSize32 } 1365 in 1366 (* We may need to move one or more of the registers although normally that 1367 won't be necessary. Almost certainly only either the remainder or the 1368 quotient will actually be used. *) 1369 moveMultipleRegisters([{src=GenReg eax, dst=realQuotReg}, {src=GenReg edx, dst=realRemReg}], 1370 DivideAccR {arg=divisorReg, isSigned=isSigned, opSize=opSize} :: setRDX :: 1371 moveIfNecessary({src=realDiviReg, dst=GenReg eax, kind=opSizeToIMove opSize}, code)) 1372 end 1373 1374 | codeExtended _ ({instr=AtomicExchangeAndAdd{base=PReg bReg, source=PReg sReg}, ...}, code) = 1375 let 1376 val baseReg = asGenReg (getAllocatedReg bReg) and outReg = asGenReg (getAllocatedReg sReg) 1377 val address = 1378 if targetArch = ObjectId32Bit 1379 then {base=ebx, index=Index4 baseReg, offset=0} 1380 else {base=baseReg, index=NoIndex, offset=0} 1381 in 1382 AtomicXAdd{address=address, output=outReg, opSize=polyWordOpSize} :: code 1383 end 1384 1385 | codeExtended _ ({instr=BoxValue{boxKind, source=PReg sReg, dest as PReg dReg, saveRegs}, ...}, code) = 1386 if not (isUsed dest) 1387 then code 1388 else 1389 let 1390 val preserve = getSaveRegs saveRegs 1391 val (srcReg, boxSize, moveKind) = 1392 case boxKind of 1393 BoxLargeWord => (getAllocatedReg sReg, Word.toInt(nativeWordSize div wordSize), moveNativeWord) 1394 | BoxX87Double => (getAllocatedReg sReg, Word.toInt(0w8 div wordSize), MoveDouble) 1395 | BoxX87Float => (getAllocatedReg sReg, Word.toInt(0w4 div wordSize), MoveFloat) 1396 | BoxSSE2Double => (getAllocatedReg sReg, Word.toInt(0w8 div wordSize), MoveDouble) 1397 | BoxSSE2Float => (getAllocatedReg sReg, Word.toInt(0w4 div wordSize), MoveFloat) 1398 val dstReg = getAllocatedReg dReg 1399 val (bReg, index) = 1400 if targetArch = ObjectId32Bit 1401 then (ebx, Index4(asGenReg dstReg)) 1402 else (asGenReg dstReg, NoIndex) 1403 in 1404 StoreInitialised :: 1405 llStoreArgument{ source=RegisterArg srcReg, offset=0, base=bReg, index=index, kind=moveKind} :: 1406 llAllocateMemoryOperation({ size=boxSize, flags=0wx1, dest=dstReg, saveRegs=preserve}, code) 1407 end 1408 1409 | codeExtended _ ({instr=CompareByteVectors{vec1Addr=PReg v1Reg, vec2Addr=PReg v2Reg, length=PReg lReg, ...}, ...}, code) = 1410 (* There's a complication here. CompareByteVectors generates REPE CMPSB to compare 1411 the vectors but the condition code is only set if CMPSB is executed at least 1412 once. If the value in RCX/ECX is zero it will never be executed and the 1413 condition code will be unchanged. We want the result to be "equal" in that 1414 case so we need to ensure that is the case. It's quite possible that the 1415 condition code has just been set by shifting RCX/ECX to remove the tag in which 1416 case it will have set "equal" if the value was zero. We use CMP R/ECX,R/ECX which 1417 is two bytes in 32-bit. 1418 If we knew the length was non-zero (e.g. a constant) we could avoid this. *) 1419 RepeatOperation CMPS8 :: ArithToGenReg {opc=CMP, output=ecx, source=RegisterArg ecx, opSize=OpSize32} :: 1420 moveIfNecessary({src=getAllocatedReg v1Reg, dst=GenReg esi, kind=moveNativeWord}, 1421 moveIfNecessary({src=getAllocatedReg v2Reg, dst=GenReg edi, kind=moveNativeWord}, 1422 moveIfNecessary({src=getAllocatedReg lReg, dst=GenReg ecx, kind=moveNativeWord}, code))) 1423 1424 | codeExtended _ ({instr=BlockMove{srcAddr=PReg sReg, destAddr=PReg dReg, length=PReg lReg, isByteMove}, ...}, code) = 1425 (* We may need to move these into the appropriate registers. They have been reserved but it's 1426 still possible the values could be in something else. *) 1427 RepeatOperation(if isByteMove then MOVS8 else if polyWordOpSize = OpSize64 then MOVS64 else MOVS32) :: 1428 moveIfNecessary({src=getAllocatedReg sReg, dst=GenReg esi, kind=moveNativeWord}, 1429 moveIfNecessary({src=getAllocatedReg dReg, dst=GenReg edi, kind=moveNativeWord}, 1430 moveIfNecessary({src=getAllocatedReg lReg, dst=GenReg ecx, kind=moveNativeWord}, code))) 1431 1432 | codeExtended _ ({instr=X87Compare{arg1=PReg argReg, arg2, isDouble, ...}, ...}, code) = 1433 let 1434 val fpReg = getAllocatedFPReg argReg 1435 val _ = fpReg = fp0 orelse raise InternalError "codeGenICode: CompareFloatingPt not fp0" 1436 (* This currently pops the value. *) 1437 val precision = if isDouble then DoublePrecision else SinglePrecision 1438 in 1439 case codeExtArgumentAsFPReg arg2 of 1440 RegisterArg fpReg2 => FPArithR{opc=FCOMP, source=fpReg2} :: code 1441 | MemoryArg{offset, base=baseReg, index=NoIndex} => 1442 FPArithMemory{opc=FCOMP, base=baseReg, offset=offset, precision=precision} :: code 1443 | AddressConstArg const => 1444 FPArithConst{opc=FCOMP, source = const, precision=precision} :: code 1445 | _ => raise InternalError "codeGenICode: CompareFloatingPt: TODO" 1446 end 1447 1448 | codeExtended _ ({instr=SSE2Compare{arg1=PReg argReg, arg2, isDouble, ...}, ...}, code) = 1449 let 1450 val xmmReg = getAllocatedXMMReg argReg 1451 val arg2Code = codeExtArgumentAsXMMReg arg2 1452 in 1453 XMMArith { opc= if isDouble then SSE2CompDouble else SSE2CompSingle, output=xmmReg, source=arg2Code} :: code 1454 end 1455 1456 | codeExtended _ ({instr=X87FPGetCondition{dest=PReg dReg, ...}, ...}, code) = 1457 moveIfNecessary({src=GenReg eax, dst=getAllocatedReg dReg, kind=Move32Bit}, 1458 FPStatusToEAX :: code) 1459 1460 | codeExtended _ ({instr=X87FPArith{opc, resultReg=PReg resReg, arg1=PReg op1Reg, arg2, isDouble}, ...}, code) = 1461 let 1462 val realDestReg = getAllocatedFPReg resReg 1463 val realOp1Reg = getAllocatedFPReg op1Reg 1464 val _ = realDestReg = fp0 orelse raise InternalError "codeGenICode: FloatingPointArith not fp0" 1465 val _ = realOp1Reg = fp0 orelse raise InternalError "codeGenICode: FloatingPointArith not fp0" 1466 val op2Arg = codeExtArgumentAsFPReg arg2 1467 val precision = if isDouble then DoublePrecision else SinglePrecision 1468 in 1469 case op2Arg of 1470 MemoryArg{offset, base=baseReg, index=NoIndex} => 1471 FPArithMemory{opc=opc, base=baseReg, offset=offset, precision=precision} :: code 1472 | AddressConstArg const => 1473 FPArithConst{opc=opc, source = const, precision=precision} :: code 1474 | _ => raise InternalError "codeGenICode: X87FPArith: TODO" 1475 end 1476 1477 | codeExtended _ ({instr=X87FPUnaryOps{fpOp, dest=PReg resReg, source=PReg op1Reg}, ...}, code) = 1478 let 1479 val realDestReg = getAllocatedFPReg resReg 1480 val realOp1Reg = getAllocatedFPReg op1Reg 1481 val _ = realDestReg = fp0 orelse raise InternalError "codeGenICode: X87FPUnaryOps not fp0" 1482 val _ = realOp1Reg = fp0 orelse raise InternalError "codeGenICode: X87FPUnaryOps not fp0" 1483 in 1484 FPUnary fpOp :: code 1485 end 1486 1487 | codeExtended _ ({instr=X87Float{dest=PReg resReg, source}, ...}, code) = 1488 let 1489 val intSource = codeExtArgumentAsGenReg source 1490 val fpReg = getAllocatedFPReg resReg 1491 val _ = fpReg = fp0 orelse raise InternalError "codeGenICode: FloatFixedInt not fp0" 1492 in 1493 (* This is complicated. The integer value has to be in memory not in a 1494 register so we have to push it to the stack and then make sure it is 1495 popped afterwards. Because it is untagged it is unsafe to leave it. *) 1496 ResetStack{numWords=1, preserveCC=false} :: FPLoadInt{ base=esp, offset=0, opSize=polyWordOpSize } :: PushToStack intSource :: code 1497 end 1498 1499 | codeExtended _ ({instr=SSE2Float{dest=PReg resReg, source}, ...}, code) = 1500 let 1501 val xmmResReg = getAllocatedXMMReg resReg 1502 val srcReg = case codeExtArgumentAsGenReg source of RegisterArg srcReg => srcReg | _ => raise InternalError "FloatFixedInt: not reg" 1503 in 1504 XMMConvertFromInt{ output=xmmResReg, source=srcReg, opSize=polyWordOpSize} :: code 1505 end 1506 1507 | codeExtended _ ({instr=SSE2FPUnary{opc, resultReg=PReg resReg, source}, ...}, code) = 1508 let 1509 val realDestReg = getAllocatedXMMReg resReg 1510 val opArg = codeExtArgumentAsXMMReg source 1511 val sse2Op = 1512 case opc of 1513 SSE2UDoubleToFloat => SSE2DoubleToFloat 1514 | SSE2UFloatToDouble => SSE2FloatToDouble 1515 in 1516 XMMArith{ opc=sse2Op, output=realDestReg, source=opArg} :: code 1517 end 1518 1519 | codeExtended _ ({instr=SSE2FPBinary{opc, resultReg=PReg resReg, arg1=PReg op1Reg, arg2}, ...}, code) = 1520 let 1521 val realDestReg = getAllocatedXMMReg resReg 1522 val realOp1Reg = getAllocatedXMMReg op1Reg 1523 val op2Arg = codeExtArgumentAsXMMReg arg2 1524 (* xorpd and andpd require 128-bit arguments with 128-bit alignment. *) 1525 val _ = 1526 case (opc, op2Arg) of 1527 (SSE2BXor, RegisterArg _) => () 1528 | (SSE2BXor, _) => raise InternalError "codeGenICode - SSE2Xor not in register" 1529 | (SSE2BAnd, RegisterArg _) => () 1530 | (SSE2BAnd, _) => raise InternalError "codeGenICode - SSE2And not in register" 1531 | _ => () 1532 val doMove = 1533 if realDestReg = realOp1Reg 1534 then code 1535 else XMMArith { opc=SSE2MoveDouble, source=RegisterArg realOp1Reg, output=realDestReg } :: code 1536 val sse2Op = 1537 case opc of 1538 SSE2BAddDouble => SSE2AddDouble 1539 | SSE2BSubDouble => SSE2SubDouble 1540 | SSE2BMulDouble => SSE2MulDouble 1541 | SSE2BDivDouble => SSE2DivDouble 1542 | SSE2BAddSingle => SSE2AddSingle 1543 | SSE2BSubSingle => SSE2SubSingle 1544 | SSE2BMulSingle => SSE2MulSingle 1545 | SSE2BDivSingle => SSE2DivSingle 1546 | SSE2BXor => SSE2Xor 1547 | SSE2BAnd => SSE2And 1548 in 1549 XMMArith{ opc=sse2Op, output=realDestReg, source=op2Arg} :: doMove 1550 end 1551 1552 | codeExtended _ ({instr=TagFloat{source=PReg srcReg, dest as PReg dReg, ...}, ...}, code) = 1553 if not (isUsed dest) 1554 then code 1555 else 1556 let 1557 val _ = targetArch = Native64Bit orelse raise InternalError "TagFloat: not 64-bit" 1558 (* Copy the value from an XMM reg into a general reg and tag it. *) 1559 val regResult = asGenReg(getAllocatedReg dReg) 1560 val realSReg = getAllocatedXMMReg srcReg 1561 in 1562 ArithToGenReg { opc=ADD, output=regResult, source=NonAddressConstArg 1, opSize=polyWordOpSize } :: 1563 ShiftConstant{ shiftType=SHL, output=regResult, shift=0w32, opSize=OpSize64} :: 1564 MoveXMMRegToGenReg { source = realSReg, output = regResult } :: code 1565 end 1566 1567 | codeExtended _ ({instr=UntagFloat{dest as PReg dReg, cache=SOME(PReg cacheReg), ...}, ...}, code) = 1568 if not (isUsed dest) 1569 then code 1570 else moveIfNecessary({src=getAllocatedReg cacheReg, dst=getAllocatedReg dReg, kind=MoveFloat}, code) 1571 1572 | codeExtended _ ({instr=UntagFloat{source, dest as PReg dReg, ...}, ...}, code) = 1573 if not (isUsed dest) 1574 then code 1575 else 1576 let 1577 val regResult = getAllocatedXMMReg dReg 1578 in 1579 case codeExtArgumentAsGenReg source of 1580 RegisterArg realSReg => 1581 XMMShiftRight{ output=regResult, shift=0w4 (* Bytes - not bits *) } :: 1582 MoveGenRegToXMMReg {source=realSReg, output=regResult} :: code 1583 | MemoryArg{base, offset, index} => 1584 (* If the value is in memory we can just load the high order word. *) 1585 XMMArith { opc=SSE2MoveFloat, source=MemoryArg{base=base, offset=offset+4, index=index}, output=regResult } :: code 1586 | NonAddressConstArg ic => 1587 (* Shift down and then load from the non-constant area. *) 1588 XMMArith { opc=SSE2MoveFloat, source=NonAddressConstArg(IntInf.~>>(ic, 0w32)), output=regResult } :: code 1589 | _ => raise InternalError "UntagFloat - not register or memory" 1590 end 1591 1592 | codeExtended _ ({instr=GetSSE2ControlReg{dest=PReg dReg}, ...}, code) = 1593 let 1594 (* This has to work through memory. Reserve one word on the stack, get the 1595 MXCSR register into it and pop it to the register. *) 1596 val regResult = getAllocatedGenReg dReg 1597 in 1598 PopR regResult :: 1599 XMMStoreCSR{base=esp, offset=0, index=NoIndex } :: 1600 PushToStack(NonAddressConstArg 0) :: code 1601 end 1602 1603 | codeExtended _ ({instr=SetSSE2ControlReg{source=PReg sReg}, ...}, code) = 1604 let 1605 (* This has to work through memory. Push the register to the stack, 1606 store the value into the control register and remove it from the stack. *) 1607 val sourceReg = getAllocatedGenReg sReg 1608 in 1609 ResetStack{ numWords=1, preserveCC=false } :: 1610 XMMLoadCSR{base=esp, offset=0, index=NoIndex } :: 1611 PushToStack(RegisterArg sourceReg) :: code 1612 end 1613 1614 | codeExtended _ ({instr=GetX87ControlReg{dest=PReg dReg}, ...}, code) = 1615 let 1616 (* This has to work through memory. Reserve one word on the stack, get the 1617 X87 control register into it and pop it to the register. *) 1618 val regResult = getAllocatedGenReg dReg 1619 in 1620 PopR regResult :: 1621 FPStoreCtrlWord{base=esp, offset=0, index=NoIndex } :: 1622 PushToStack(NonAddressConstArg 0) :: code 1623 end 1624 1625 | codeExtended _ ({instr=SetX87ControlReg{source=PReg sReg}, ...}, code) = 1626 let 1627 (* This has to work through memory. Push the register to the stack, 1628 store the value into the control register and remove it from the stack. *) 1629 val sourceReg = getAllocatedGenReg sReg 1630 in 1631 ResetStack{ numWords=1, preserveCC=false } :: 1632 FPLoadCtrlWord{base=esp, offset=0, index=NoIndex } :: 1633 PushToStack(RegisterArg sourceReg) :: code 1634 end 1635 1636 | codeExtended _ ({instr=X87RealToInt{source=PReg sReg, dest=PReg dReg}, ...}, code) = 1637 let 1638 (* This has to work through memory. Reserve one word on the stack, 1639 convert the value into it and pop it to the register. *) 1640 val regResult = getAllocatedGenReg dReg 1641 val fpReg = getAllocatedFPReg sReg 1642 val _ = fpReg = fp0 orelse raise InternalError "codeGenICode: CompareFloatingPt not fp0" 1643 (* This currently pops the value. *) 1644 in 1645 PopR regResult :: 1646 FPStoreInt{base=esp, offset=0, index=NoIndex } :: 1647 PushToStack(NonAddressConstArg 0) :: code 1648 end 1649 1650 | codeExtended _ ({instr=SSE2RealToInt{source, dest=PReg dReg, isDouble, isTruncate}, ...}, code) = 1651 let 1652 (* The source is either an XMM register or memory. *) 1653 val regResult = getAllocatedGenReg dReg 1654 val opArg = codeExtArgumentAsXMMReg source 1655 in 1656 XMMStoreInt { 1657 source=opArg, precision=if isDouble then DoublePrecision else SinglePrecision, 1658 output = regResult, isTruncate=isTruncate } :: code 1659 end 1660 1661 | codeExtended _ ({instr=SignExtend32To64{source, dest=PReg dReg}, ...}, code) = 1662 let 1663 val regResult = getAllocatedGenReg dReg 1664 val opArg = codeExtArgumentAsGenReg source 1665 in 1666 Move{moveSize=Move32X64, source=opArg, destination=RegisterArg regResult } :: code 1667 end 1668 1669 | codeExtended _ ({instr=TouchArgument _, ...}, code) = code (* Don't need to do anything. *) 1670 1671 val newCode = codeCreate (functionName, profileObject, debugSwitches) 1672 1673 local 1674 (* processed - set to true when a block has been processed. *) 1675 val processed = Array.array(numBlocks, false) 1676 fun haveProcessed n = Array.sub(processed, n) 1677 1678 (* Find the blocks that reference this one. This isn't essential but 1679 allows us to try to generate blocks in the order of the control 1680 flow. This in turn may allow us to use short branches rather 1681 than long ones. *) 1682 val labelRefs = Array.array(numBlocks, []) 1683 1684 datatype flowCode = 1685 FlowCodeSimple of int 1686 | FlowCodeCMove of {code: operation list, trueJump: int, falseJump: int} 1687 1688 (* Process this recursively to set the references. If we have 1689 unreachable blocks, perhaps because they've been merged, we 1690 don't want to include them in the reference counting. 1691 This shouldn't happen now that IdentifyReferences removes 1692 unreferenced blocks. *) 1693 fun setReferences fromLabel toLabel = 1694 case Array.sub(labelRefs, toLabel) of 1695 [] => (* Not yet visited at all. *) 1696 let 1697 val ExtendedBasicBlock{ flow, ...} = Vector.sub(blocks, toLabel) 1698 val refs = 1699 case flow of 1700 ExitCode => [] 1701 | Unconditional lab => [lab] 1702 | Conditional{trueJump, falseJump, ... } => [trueJump, falseJump] 1703 | IndexedBr labs => labs 1704 | SetHandler { handler, continue } => [handler, continue] 1705 | UnconditionalHandle _ => [] 1706 | ConditionalHandle { continue, ...} => [continue] 1707 1708 val () = 1709 if fromLabel >= 0 then Array.update(labelRefs, toLabel, [fromLabel]) else () 1710 in 1711 List.app (setReferences toLabel) refs 1712 end 1713 1714 | refs => 1715 (* We've visiting this at least once. Just add us to the list. *) 1716 Array.update(labelRefs, toLabel, fromLabel :: refs) 1717 1718 val _ = setReferences 0 0 1719 1720 (* Process the blocks. We keep the "stack" explicit rather than using recursion 1721 because this allows us to select both arms of a conditional branch sooner. *) 1722 fun genCode(toDo, lastFlow, code) = 1723 case List.filter (not o haveProcessed) toDo of 1724 [] => 1725 let 1726 (* There's nothing left to do. We may need to add a final branch to the end. *) 1727 val finalBranch = 1728 case lastFlow of 1729 ExitCode => [] 1730 | IndexedBr _ => [] 1731 | Unconditional dest => [UncondBranch(getBlockLabel dest)] 1732 | Conditional { condition, trueJump, falseJump, ...} => 1733 [ 1734 UncondBranch(getBlockLabel falseJump), 1735 ConditionalBranch{test=condition, label=getBlockLabel trueJump} 1736 ] 1737 | SetHandler { continue, ...} => [UncondBranch(getBlockLabel continue)] 1738 | UnconditionalHandle _ => [] 1739 | ConditionalHandle { continue, ...} => [UncondBranch(getBlockLabel continue)] 1740 in 1741 finalBranch @ code (* Done. *) 1742 end 1743 1744 | stillToDo as head :: _ => 1745 let 1746 local 1747 (* Check the references. If all the sources that lead up to this have 1748 already been we won't have any backward jumps. *) 1749 fun available dest = List.all haveProcessed (Array.sub(labelRefs, dest)) 1750 1751 val continuation = 1752 case lastFlow of 1753 ExitCode => NONE 1754 | IndexedBr _ => NONE (* We could put the last branch in here. *) 1755 | Unconditional dest => 1756 if not (haveProcessed dest) andalso available dest 1757 then SOME(FlowCodeSimple dest) 1758 else NONE 1759 | Conditional {trueJump, falseJump, condition, ...} => 1760 let 1761 (* Can we replace this with a SETCC or CMOV? If both arms simply set 1762 a register to a value and either return or jump to the same location 1763 we can use a SETCC or a CMOV. *) 1764 val ExtendedBasicBlock { flow=tFlow, block=tBlock, ...} = Vector.sub(blocks, trueJump) 1765 and ExtendedBasicBlock { flow=fFlow, block=fBlock, ...} = Vector.sub(blocks, falseJump) 1766 1767 fun cmoveOrSetcc{condition, output, tSource=IntegerConstant trueValue, fSource=IntegerConstant falseValue, kind, code} = 1768 let (* Could use SETCC. Only if we can use LEA for multiplication. The result must be 1769 tagged so we will always have a multiplier. *) 1770 val (multiplier, fValue, testCondition) = 1771 if trueValue >= falseValue 1772 then (trueValue-falseValue, falseValue, condition) 1773 else (falseValue-trueValue, trueValue, invertTest condition) 1774 val destReg = asGenReg output 1775 in 1776 if not (targetArch = Native32Bit andalso (destReg=esi orelse destReg=edi)) 1777 (* We can't use Setcc with esi or edi on native 32-bit. *) 1778 andalso (multiplier = 2 orelse multiplier = 4 orelse multiplier = 8) 1779 (* We're using LEA so can only be multiplying by 2, 4 or 8. *) 1780 andalso is32bit fValue (* and we're going to put this in the offset *) 1781 then 1782 let 1783 val effectiveOpSize = 1784 (* We can generally use 32-bit LEA except if the result is negative. *) 1785 if kind = Move32Bit orelse fValue >= 0 andalso fValue+multiplier <= 0x7fffffff 1786 then OpSize32 else OpSize64 1787 val (index, base) = 1788 case multiplier of 1789 2 => (Index1 destReg, SOME destReg) 1790 | 4 => (Index4 destReg, NONE) 1791 | 8 => (Index8 destReg, NONE) 1792 | _ => (NoIndex, NONE) 1793 (* Try to put the instruction to zero the register before any compare. We can do it 1794 provided the register we're going to zero isn't used in the comparison. *) 1795 fun checkArg(RegisterArg r) = r <> destReg 1796 | checkArg(MemoryArg mem) = checkMem mem 1797 | checkArg _ = true 1798 1799 and checkMem{base, index=NoIndex, ...} = base <> destReg 1800 | checkMem{base, index=Index1 index, ...} = base <> destReg andalso index <> destReg 1801 | checkMem{base, index=Index2 index, ...} = base <> destReg andalso index <> destReg 1802 | checkMem{base, index=Index4 index, ...} = base <> destReg andalso index <> destReg 1803 | checkMem{base, index=Index8 index, ...} = base <> destReg andalso index <> destReg 1804 1805 val zeroReg = ArithToGenReg { opc=XOR, output=destReg, source=RegisterArg destReg, opSize=OpSize32 } 1806 1807 fun addXOR [] = NONE 1808 | addXOR ((instr as ResetStack _) :: tl) = 1809 (* If we can add the XOR before the ResetStack do so. *) 1810 Option.map(fn code => instr :: code) (addXOR tl) 1811 | addXOR ((instr as ArithToGenReg{output, source, ...}) :: tl) = 1812 if output <> destReg andalso checkArg source 1813 then SOME(instr :: zeroReg :: tl) 1814 else NONE 1815 | addXOR ((instr as ArithMemConst{address, ...}) :: tl) = 1816 if checkMem address 1817 then SOME(instr :: zeroReg :: tl) 1818 else NONE 1819 | addXOR ((instr as ArithByteMemConst{address, ...}) :: tl) = 1820 if checkMem address 1821 then SOME(instr :: zeroReg :: tl) 1822 else NONE 1823 | addXOR ((instr as XMMArith{source=MemoryArg mem, ...}) :: tl) = 1824 if checkMem mem 1825 then SOME(instr :: zeroReg :: tl) 1826 else NONE 1827 | addXOR ((instr as XMMArith _) :: tl) = SOME(instr :: zeroReg :: tl) 1828 | addXOR ((instr as TestByteBits{arg, ...}) :: tl) = 1829 if checkArg arg 1830 then SOME(instr :: zeroReg :: tl) 1831 else NONE 1832 | addXOR ((instr as RepeatOperation CMPS8) :: tl) = 1833 (* This uses edi, esi and ecx implicitly *) 1834 if destReg <> esi andalso destReg <> edi andalso destReg <> ecx 1835 then SOME(instr :: zeroReg :: tl) 1836 else NONE 1837 (* This seems to be just a conditional jump as a result of 1838 testing the condition code twice in Real.== *) 1839 | addXOR _ = NONE 1840 1841 (* If we can't put the XOR before the instruction we need to either zero 1842 it using a move which won't affect the CC or we use MOVZB to extend 1843 the byte value to 32/64 bits. *) 1844 val loadAddr = LoadAddress{output=destReg, offset=Int.fromLarge fValue, base=base, index=index, opSize=effectiveOpSize} 1845 and setCond = SetCondition{output=destReg, test=testCondition} 1846 val code = 1847 case addXOR code of 1848 SOME withXOR => loadAddr :: setCond :: withXOR 1849 | NONE => 1850 loadAddr :: 1851 (* We've already check that we're not using esi/edi on native 32-bits. *) 1852 Move{destination=RegisterArg destReg, source=RegisterArg destReg, moveSize=Move8} :: setCond :: code 1853 in 1854 SOME code 1855 end 1856 else NONE 1857 end 1858 1859 (* If either value is a memory location it isn't safe to load it. The base address 1860 may not be valid if the condition does not hold. *) 1861 | cmoveOrSetcc{tSource=MemoryLocation _, ...} = NONE 1862 | cmoveOrSetcc{fSource=MemoryLocation _, ...} = NONE 1863 1864 | cmoveOrSetcc{condition, output, tSource, fSource, kind, code} = 1865 if targetArch = Native32Bit 1866 then NONE (* CMov doesn't work for constants. *) 1867 else 1868 let 1869 val output = asGenReg output 1870 val codeTrue = codeExtArgumentAsGenReg tSource 1871 and codeFalse = codeExtArgumentAsGenReg fSource 1872 val opSize = 1873 case kind of 1874 Move32Bit => OpSize32 1875 | Move64Bit => OpSize64 1876 | _ => raise InternalError "move size" 1877 (* One argument has to be loaded into a register first and the other 1878 is conditionally moved. *) 1879 val loadFalseCmoveTrue = 1880 if (case codeFalse of RegisterArg regFalse => regFalse = output | _ => false) 1881 then true (* The false value is already in the right register. *) 1882 else if (case codeTrue of RegisterArg regTrue => regTrue = output | _ => false) 1883 then false (* The true value is in the right register - have to reverse. *) 1884 else if (case codeTrue of NonAddressConstArg _ => true | _ => false) 1885 then false (* The true value is a short constant. If we use a CMOV we will have to put that 1886 in the non-constant area and use a PC-relative reference. Try to avoid it. *) 1887 else true 1888 fun cmov{codeLoad, codeMove, condition} = 1889 let 1890 val load = 1891 case codeLoad of 1892 RegisterArg regLoad => 1893 moveIfNecessary({src=GenReg regLoad, dst=GenReg output, kind=opSizeToIMove opSize}, code) 1894 | codeLoad => 1895 Move{source=codeLoad, destination=RegisterArg output, moveSize=opSizeToMove opSize} :: code 1896 in 1897 CondMove{test=condition, output=output, source=codeMove, opSize=opSize} :: load 1898 end 1899 in 1900 if loadFalseCmoveTrue 1901 then SOME(cmov{codeLoad=codeFalse, codeMove=codeTrue, condition=condition}) 1902 else SOME(cmov{codeLoad=codeTrue, codeMove=codeFalse, condition=invertTest condition}) 1903 end 1904 1905 val isPossSetCCOrCmov = 1906 if not (haveProcessed trueJump) andalso available trueJump 1907 andalso not (haveProcessed falseJump) andalso available falseJump 1908 then case (tFlow, fFlow, tBlock, fBlock) of 1909 (ExitCode, 1910 ExitCode, 1911 [{instr=LoadArgument{dest=PReg tReg, source=tSource, kind=kindT}, ...}, 1912 {instr=ReturnResultFromFunction{resultReg=PReg resReg, realReg, numStackArgs, ...}, ...}], 1913 [{instr=LoadArgument{dest=PReg fReg, source=fSource, kind=kindF}, ...}, 1914 {instr=ReturnResultFromFunction _, ...}]) => 1915 (* The real register for the two sides should both be rax. *) 1916 let 1917 val realTReg = getAllocatedReg tReg and realFReg = getAllocatedReg fReg 1918 in 1919 if realTReg = realFReg andalso kindT = kindF andalso (kindT = Move32Bit orelse kindT = Move64Bit) 1920 then 1921 ( 1922 case cmoveOrSetcc{condition=condition, output=realTReg, tSource=tSource, fSource=fSource, 1923 kind=kindT, code=code} of 1924 SOME code => 1925 let 1926 val resultReg = getAllocatedReg resReg 1927 val code = 1928 ReturnFromFunction numStackArgs :: 1929 moveIfNecessary({src=resultReg, dst=realReg, kind=moveNativeWord}, code) 1930 in 1931 SOME{code=code, trueJump=trueJump, falseJump=falseJump} 1932 end 1933 | NONE => NONE 1934 ) 1935 else NONE 1936 end 1937 | (Unconditional tDest, 1938 Unconditional fDest, 1939 [{instr=LoadArgument{dest=PReg tReg, source=tSource, kind=kindT}, ...}], 1940 [{instr=LoadArgument{dest=PReg fReg, source=fSource, kind=kindF}, ...}]) => 1941 let 1942 val realTReg = getAllocatedReg tReg and realFReg = getAllocatedReg fReg 1943 in 1944 if tDest = fDest andalso realTReg = realFReg andalso kindT = kindF andalso (kindT = Move32Bit orelse kindT = Move64Bit) 1945 then 1946 ( 1947 case cmoveOrSetcc{condition=condition, output=realTReg, tSource=tSource, fSource=fSource, 1948 kind=kindT, code=code} of 1949 SOME code => SOME{code=code, trueJump=trueJump, falseJump=falseJump} 1950 | NONE => NONE 1951 ) 1952 else NONE 1953 end 1954 | _ => NONE 1955 else NONE 1956 in 1957 case isPossSetCCOrCmov of 1958 NONE => 1959 (* We can usually choose either destination and in nearly all cases 1960 it won't matter. The default branch is not to take forward jumps 1961 so if there is reason to believe that one branch is more likely 1962 we should follow that branch now and leave the other. If we 1963 have JO/JNO we assume that overflow is unusual. If one branch 1964 raises an exception we assume that that is unusual. *) 1965 let 1966 val (first, second) = 1967 case (condition, Vector.sub(blocks, falseJump)) of 1968 (JNO, _) => (trueJump, falseJump) 1969 | (_, ExtendedBasicBlock{ flow=ExitCode, block, ...}) => 1970 if List.exists(fn{instr=RaiseExceptionPacket _, ...} => true | _ => false) block 1971 then (trueJump, falseJump) 1972 else (falseJump, trueJump) 1973 | _ => (falseJump, trueJump) 1974 in 1975 if not (haveProcessed first) andalso available first 1976 then SOME(FlowCodeSimple first) 1977 else if not (haveProcessed second) andalso available second 1978 then SOME(FlowCodeSimple second) 1979 else NONE 1980 end 1981 | SOME args => SOME(FlowCodeCMove args) 1982 end 1983 | SetHandler { continue, ... } => 1984 (* We want the continuation if possible. We'll need a 1985 branch round the handler so that won't help. *) 1986 if not (haveProcessed continue) andalso available continue 1987 then SOME(FlowCodeSimple continue) 1988 else NONE 1989 | UnconditionalHandle _ => NONE 1990 | ConditionalHandle _ => NONE 1991 in 1992 (* First choice - continue the existing block. 1993 Second choice - the first item whose sources have all been 1994 processed. 1995 Third choice - something from the list. *) 1996 val picked = 1997 case continuation of 1998 SOME c => c 1999 | NONE => 2000 case List.find available stillToDo of 2001 SOME c => FlowCodeSimple c 2002 | NONE => FlowCodeSimple head 2003 end 2004 2005 in 2006 case picked of 2007 FlowCodeSimple picked => 2008 let 2009 val () = Array.update(processed, picked, true) 2010 2011 (* Code to terminate the previous block. *) 2012 val startCode = 2013 case lastFlow of 2014 ExitCode => [] 2015 | IndexedBr _ => [] 2016 | UnconditionalHandle _ => [] 2017 | Unconditional dest => 2018 if dest = picked then [] else [UncondBranch(getBlockLabel dest)] 2019 | ConditionalHandle { continue, ...} => 2020 if continue = picked then [] else [UncondBranch(getBlockLabel continue)] 2021 | SetHandler { continue, ... } => 2022 if continue = picked then [] else [UncondBranch(getBlockLabel continue)] 2023 | Conditional { condition, trueJump, falseJump, ...} => 2024 if picked = falseJump (* Usual case. *) 2025 then [ConditionalBranch{test=condition, label=getBlockLabel trueJump}] 2026 else if picked = trueJump 2027 then (* We have a jump to the true condition. Invert the jump. 2028 This is more than an optimisation. Because this immediately precedes the 2029 true block we're not going to generate a label. *) 2030 [ConditionalBranch{test=invertTest condition, label=getBlockLabel falseJump}] 2031 else 2032 [ 2033 UncondBranch(getBlockLabel falseJump), 2034 ConditionalBranch{test=condition, label=getBlockLabel trueJump} 2035 ] 2036 2037 (* Code-generate the body with the code we've done so far 2038 at the end. Add a label at the start if necessary. *) 2039 local 2040 (* If the previous block dropped through to this and this was 2041 the only reference then we don't need a label. *) 2042 fun onlyJumpingHere (lab: int) = 2043 if lab <> picked then false 2044 else case Array.sub(labelRefs, picked) of 2045 [singleton] => singleton = lab 2046 | _ => false 2047 2048 val noLabel = 2049 case lastFlow of 2050 ExitCode => picked = 0 (* Unless this was the first block. *) 2051 | Unconditional dest => onlyJumpingHere dest 2052 | Conditional { trueJump, falseJump, ...} => 2053 onlyJumpingHere trueJump orelse onlyJumpingHere falseJump 2054 | IndexedBr _ => false 2055 | SetHandler _ => false 2056 | UnconditionalHandle _ => false 2057 | ConditionalHandle { continue, ...} => onlyJumpingHere continue 2058 in 2059 val startLabel = if noLabel then [] else [JumpLabel(getBlockLabel picked)] 2060 end 2061 2062 val ExtendedBasicBlock { flow, block, ...} = Vector.sub(blocks, picked) 2063 2064 local 2065 fun genCodeBlock(instr, code) = codeExtended {flow=flow} (instr, code) 2066 in 2067 val bodyCode = List.foldl genCodeBlock (startLabel @ startCode @ code) block 2068 end 2069 2070 val addSet = 2071 case flow of 2072 ExitCode => [] 2073 | IndexedBr cases => cases 2074 | Unconditional dest => [dest] 2075 | Conditional {trueJump, falseJump, ...} => [falseJump, trueJump] 2076 | SetHandler { handler, continue } => [handler, continue] 2077 | UnconditionalHandle _ => [] 2078 | ConditionalHandle { continue, ...} => [continue] 2079 2080 in 2081 genCode(addSet @ stillToDo, flow, bodyCode) 2082 end 2083 2084 | FlowCodeCMove{code, trueJump, falseJump} => 2085 let 2086 (* We've generated a conditional move and possibly a return. If the 2087 trueJump and falseJump are only ever referenced from this block 2088 they're done, otherwise we still need to do them. *) 2089 val _ = 2090 case Array.sub(labelRefs, trueJump) of 2091 [_] => Array.update(processed, trueJump, true) 2092 | _ => () 2093 val _ = 2094 case Array.sub(labelRefs, falseJump) of 2095 [_] => Array.update(processed, falseJump, true) 2096 | _ => () 2097 val ExtendedBasicBlock { flow, ...} = Vector.sub(blocks, trueJump) 2098 val addSet = 2099 case flow of 2100 ExitCode => [] 2101 | Unconditional dest => [dest] 2102 | _ => raise InternalError "FlowCodeCMove" 2103 in 2104 genCode(addSet @ stillToDo, flow, code) 2105 end 2106 end 2107 in 2108 val ops = genCode([0], ExitCode, []) 2109 end 2110 in 2111 X86OPTIMISE.generateCode{code=newCode, ops=List.rev ops, 2112 labelCount= !outputLabelCount, resultClosure=resultClosure} 2113 end 2114 2115 val nGenRegs = List.length generalRegisters 2116 2117 structure Sharing = 2118 struct 2119 type intSet = intSet 2120 and extendedBasicBlock = extendedBasicBlock 2121 and regProperty = regProperty 2122 and reg = reg 2123 and closureRef = closureRef 2124 end 2125 2126end; 2127