1; 2; Title: Assembly code routines for the poly system. 3; Author: David Matthews 4; Copyright (c) David C. J. Matthews 2000-2020 5; 6; This library is free software; you can redistribute it and/or 7; modify it under the terms of the GNU Lesser General Public 8; License version 2.1 as published by the Free Software Foundation. 9; 10; This library is distributed in the hope that it will be useful, 11; but WITHOUT ANY WARRANTY; without even the implied warranty of 12; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13; Lesser General Public License for more details. 14; 15; You should have received a copy of the GNU Lesser General Public 16; License along with this library; if not, write to the Free Software 17; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18; 19 20; 21; Registers used :- 22; 23; rax: First argument to function. Result of function call. 24; rbx: Second argument to function. 25; rcx: General register 26; rdx: Closure pointer in call. 27; rbp: Points to memory used for extra registers 28; rsi: General register. 29; rdi: General register. 30; rsp: Stack pointer. 31; r8: Third argument to function 32; r9: Fourth argument to function 33; r10: Fifth argument to function 34; r11: General register 35; r12: General register 36; r13: General register 37; r14: General register 38; r15: Memory allocation pointer 39 40; Extra entries on the C stack 41Fr_Size EQU 64 ; Must be multiple of 16 to get alignment correct 42 43; This is the argument vector passed in to X86AsmSwitchToPoly 44; It is used to initialise the frame. A few values are updated 45; when ML returns. 46ArgVector STRUCT 47LocalMPointer QWORD ? 48HandlerRegister QWORD ? 49LocalMbottom QWORD ? 50StackLimit QWORD ? 51ExceptionPacket QWORD ? ; Address of packet to raise 52UnusedRequestCode DB ? ; Byte: Io function to call. 53UnusedFlag DB ? 54ReturnReason DB ? ; Byte: Reason for returning from ML. 55UnusedRestore DB ? ; Byte: 56UnusedAlign DWORD ? 57SaveCStack QWORD ? ; Saved C stack frame 58ThreadId QWORD ? ; My thread id 59StackPtr QWORD ? ; Stack pointer 60UnusedProgramCtr QWORD ? 61HeapOverFlowCall QWORD ? 62StackOverFlowCall QWORD ? 63StackOverFlowCallEx QWORD ? 64TrapHandlerEntry QWORD ? 65SaveRAX QWORD ? 66SaveRBX QWORD ? 67SaveRCX QWORD ? 68SaveRDX QWORD ? 69SaveRSI QWORD ? 70SaveRDI QWORD ? 71SaveR8 QWORD ? 72SaveR9 QWORD ? 73SaveR10 QWORD ? 74SaveR11 QWORD ? 75SaveR12 QWORD ? 76SaveR13 QWORD ? 77SaveR14 QWORD ? 78SaveXMM0 QWORD ? 79SaveXMM1 QWORD ? 80SaveXMM2 QWORD ? 81SaveXMM3 QWORD ? 82SaveXMM4 QWORD ? 83SaveXMM5 QWORD ? 84SaveXMM6 QWORD ? 85ArgVector ENDS 86 87RETURN_HEAP_OVERFLOW EQU 1 88RETURN_STACK_OVERFLOW EQU 2 89RETURN_STACK_OVERFLOWEX EQU 3 90RETURN_KILL_SELF EQU 9 91 92; 93; CODE STARTS HERE 94; 95 .CODE 96 97; Define standard call macro. 98 99CALL_EXTRA MACRO index 100 mov byte ptr [ArgVector.ReturnReason+rbp],index 101 jmp CallTrapHandler 102ENDM 103 104 105; Enter ML code. This is now only ever used to start a new thread. 106; It is probably unnecessary to save the callee-save regs or load the ML regs. 107; This does not set up a correct frame because we do not want to reserve a register for 108; that. RBP needs to be the original argument because we need to be able to modify 109; the stack limit "register" from another thread in order to be able to interrupt 110; this one. 111X86AsmSwitchToPoly PROC FRAME 112 push rbp ; Standard entry sequence 113 push rbx ; Save callee-save registers 114 push r12 115 push r13 116 push r14 117 push r15 118 push rdi ; Callee save in Windows 119 push rsi ; Strictly, we should also save xmm6 120 .endprolog 121 mov rbp,rcx ; Move argument into rbp - this is definitely non-standard 122 sub rsp,(Fr_size-56) 123 mov [ArgVector.SaveCStack+rcx],rsp ; Save the C stack pointer 124 mov r15,[ArgVector.LocalMpointer+rbp] 125 mov rsp,[ArgVector.StackPtr+rbp] 126 movsd xmm0,[ArgVector.SaveXMM0+rbp] 127 movsd xmm1,[ArgVector.SaveXMM1+rbp] 128 movsd xmm2,[ArgVector.SaveXMM2+rbp] 129 movsd xmm3,[ArgVector.SaveXMM3+rbp] 130 movsd xmm4,[ArgVector.SaveXMM4+rbp] 131 movsd xmm5,[ArgVector.SaveXMM5+rbp] 132 movsd xmm6,[ArgVector.SaveXMM6+rbp] 133 mov rbx,[ArgVector.SaveRBX+rbp] 134 mov rcx,[ArgVector.SaveRCX+rbp] 135 mov rdx,[ArgVector.SaveRDX+rbp] 136 mov rsi,[ArgVector.SaveRSI+rbp] 137 mov rdi,[ArgVector.SaveRDI+rbp] 138 mov r8,[ArgVector.SaveR8+rbp] 139 mov r9,[ArgVector.SaveR9+rbp] 140 mov r10,[ArgVector.SaveR10+rbp] 141 mov r11,[ArgVector.SaveR11+rbp] 142 mov r12,[ArgVector.SaveR12+rbp] 143 mov r13,[ArgVector.SaveR13+rbp] 144 mov r14,[ArgVector.SaveR14+rbp] 145 mov rax,[ArgVector.SaveRAX+rbp] 146 cld ; Clear this just in case 147#ifdef POLYML32IN64 148 jmp qword ptr [rbx+rdx*4] 149#else 150 jmp qword ptr [rdx] 151#endif 152 153; Everything up to here is considered as part of the X86AsmSwitchToPoly proc 154X86AsmSwitchToPoly ENDP 155 156; Save all the registers and enter the trap handler. 157; It is probably unnecessary to save the FP state now. 158X86TrapHandler PROTO C 159 160CallTrapHandler: 161 mov [ArgVector.SaveRAX+rbp],rax 162 mov [ArgVector.SaveRBX+rbp],rbx 163 mov [ArgVector.SaveRCX+rbp],rcx 164 mov [ArgVector.SaveRDX+rbp],rdx 165 mov [ArgVector.SaveRSI+rbp],rsi 166 mov [ArgVector.SaveRDI+rbp],rdi 167 movsd [ArgVector.SaveXMM0+rbp],xmm0 168 movsd [ArgVector.SaveXMM1+rbp],xmm1 169 movsd [ArgVector.SaveXMM2+rbp],xmm2 170 movsd [ArgVector.SaveXMM3+rbp],xmm3 171 movsd [ArgVector.SaveXMM4+rbp],xmm4 172 movsd [ArgVector.SaveXMM5+rbp],xmm5 173 movsd [ArgVector.SaveXMM6+rbp],xmm6 174 mov [ArgVector.SaveR8+rbp],r8 175 mov [ArgVector.SaveR9+rbp],r9 176 mov [ArgVector.SaveR10+rbp],r10 177 mov [ArgVector.SaveR11+rbp],r11 178 mov [ArgVector.SaveR12+rbp],r12 179 mov [ArgVector.SaveR13+rbp],r13 180 mov [ArgVector.SaveR14+rbp],r14 181 mov [ArgVector.StackPtr+rbp],rsp ; Save ML stack pointer 182 mov [ArgVector.LocalMpointer+rbp],r15 ; Save back heap pointer 183 mov rsp,[ArgVector.SaveCStack+rbp] ; Restore C stack pointer 184 sub rsp,32 ; Create Windows save area 185 mov rcx,[ArgVector.ThreadId+rbp] 186 call [ArgVector.TrapHandlerEntry+rbp] 187 add rsp,32 188 mov r15,[ArgVector.LocalMpointer+rbp] 189 mov rsp,[ArgVector.StackPtr+rbp] 190 movsd xmm0,[ArgVector.SaveXMM0+rbp] 191 movsd xmm1,[ArgVector.SaveXMM1+rbp] 192 movsd xmm2,[ArgVector.SaveXMM2+rbp] 193 movsd xmm3,[ArgVector.SaveXMM3+rbp] 194 movsd xmm4,[ArgVector.SaveXMM4+rbp] 195 movsd xmm5,[ArgVector.SaveXMM5+rbp] 196 movsd xmm6,[ArgVector.SaveXMM6+rbp] 197 mov rbx,[ArgVector.SaveRBX+rbp] 198 mov rcx,[ArgVector.SaveRCX+rbp] 199 mov rdx,[ArgVector.SaveRDX+rbp] 200 mov rsi,[ArgVector.SaveRSI+rbp] 201 mov rdi,[ArgVector.SaveRDI+rbp] 202 mov r8,[ArgVector.SaveR8+rbp] 203 mov r9,[ArgVector.SaveR9+rbp] 204 mov r10,[ArgVector.SaveR10+rbp] 205 mov r11,[ArgVector.SaveR11+rbp] 206 mov r12,[ArgVector.SaveR12+rbp] 207 mov r13,[ArgVector.SaveR13+rbp] 208 mov r14,[ArgVector.SaveR14+rbp] 209 mov rax,[ArgVector.ExceptionPacket+rbp] 210 cmp rax,1 ; Did we raise an exception? 211 jnz raisexcept 212 mov rax,[ArgVector.SaveRAX+rbp] 213 cld ; Clear this just in case 214 ret 215 216raisexcept: 217 mov rcx,[ArgVector.HandlerRegister+rbp] 218 jmp qword ptr [rcx] 219 220 221; This implements atomic subtraction in the same way as atomic_decrement 222PUBLIC X86AsmAtomicDecrement 223X86AsmAtomicDecrement: 224 mov rax,rcx 225; Use rcx and rax because they are volatile (unlike rbx on X86/64/Unix) 226 mov rcx,-2 227#ifdef POLYML32IN64 228 lock xadd [rax],ecx ;# Rax is an absolute address but this is only a word 229#else 230 lock xadd [rax],rcx 231#endif 232 sub rcx,2 233 mov rax,rcx 234 ret 235 236CREATE_EXTRA_CALL MACRO index 237PUBLIC X86AsmCallExtra&index& 238X86AsmCallExtra&index&: 239 CALL_EXTRA index 240 ENDM 241 242CREATE_EXTRA_CALL RETURN_HEAP_OVERFLOW 243CREATE_EXTRA_CALL RETURN_STACK_OVERFLOW 244CREATE_EXTRA_CALL RETURN_STACK_OVERFLOWEX 245 246END 247