1;
2;  Title:  Assembly code routines for the poly system.
3;  Author:    David Matthews
4;  Copyright (c) David C. J. Matthews 2000-2020
5;
6;  This library is free software; you can redistribute it and/or
7;  modify it under the terms of the GNU Lesser General Public
8;  License version 2.1 as published by the Free Software Foundation.
9;
10;  This library is distributed in the hope that it will be useful,
11;  but WITHOUT ANY WARRANTY; without even the implied warranty of
12;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13;  Lesser General Public License for more details.
14;
15;  You should have received a copy of the GNU Lesser General Public
16;  License along with this library; if not, write to the Free Software
17;  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18;
19
20;
21; Registers used :-
22;
23;  rax:  First argument to function.  Result of function call.
24;  rbx:  Second argument to function.
25;  rcx:  General register
26;  rdx:  Closure pointer in call.
27;  rbp:  Points to memory used for extra registers
28;  rsi:  General register.
29;  rdi:  General register.
30;  rsp:  Stack pointer.
31;  r8:   Third argument to function
32;  r9:   Fourth argument to function
33;  r10:  Fifth argument to function
34;  r11:  General register
35;  r12:  General register
36;  r13:  General register
37;  r14:  General register
38;  r15:  Memory allocation pointer
39
40; Extra entries on the C stack
41Fr_Size             EQU     64         ; Must be multiple of 16 to get alignment correct
42
43; This is the argument vector passed in to X86AsmSwitchToPoly
44; It is used to initialise the frame.  A few values are updated
45; when ML returns.
46ArgVector STRUCT
47LocalMPointer       QWORD   ?
48HandlerRegister     QWORD   ?
49LocalMbottom        QWORD   ?
50StackLimit          QWORD   ?
51ExceptionPacket     QWORD   ?       ; Address of packet to raise
52UnusedRequestCode   DB      ?       ; Byte: Io function to call.
53UnusedFlag          DB      ?
54ReturnReason        DB      ?       ; Byte: Reason for returning from ML.
55UnusedRestore       DB      ?       ; Byte:
56UnusedAlign         DWORD   ?
57SaveCStack          QWORD   ?       ; Saved C stack frame
58ThreadId            QWORD   ?       ; My thread id
59StackPtr            QWORD   ?       ; Stack pointer
60UnusedProgramCtr    QWORD   ?
61HeapOverFlowCall    QWORD   ?
62StackOverFlowCall   QWORD   ?
63StackOverFlowCallEx QWORD   ?
64TrapHandlerEntry    QWORD   ?
65SaveRAX             QWORD   ?
66SaveRBX             QWORD   ?
67SaveRCX             QWORD   ?
68SaveRDX             QWORD   ?
69SaveRSI             QWORD   ?
70SaveRDI             QWORD   ?
71SaveR8              QWORD   ?
72SaveR9              QWORD   ?
73SaveR10             QWORD   ?
74SaveR11             QWORD   ?
75SaveR12             QWORD   ?
76SaveR13             QWORD   ?
77SaveR14             QWORD   ?
78SaveXMM0            QWORD   ?
79SaveXMM1            QWORD   ?
80SaveXMM2            QWORD   ?
81SaveXMM3            QWORD   ?
82SaveXMM4            QWORD   ?
83SaveXMM5            QWORD   ?
84SaveXMM6            QWORD   ?
85ArgVector ENDS
86
87RETURN_HEAP_OVERFLOW        EQU 1
88RETURN_STACK_OVERFLOW       EQU 2
89RETURN_STACK_OVERFLOWEX     EQU 3
90RETURN_KILL_SELF            EQU 9
91
92;
93; CODE STARTS HERE
94;
95    .CODE
96
97; Define standard call macro.
98
99CALL_EXTRA  MACRO   index
100    mov     byte ptr [ArgVector.ReturnReason+rbp],index
101    jmp     CallTrapHandler
102ENDM
103
104
105; Enter ML code.  This is now only ever used to start a new thread.
106; It is probably unnecessary to save the callee-save regs or load the ML regs.
107; This does not set up a correct frame because we do not want to reserve a register for
108; that.  RBP needs to be the original argument because we need to be able to modify
109; the stack limit "register" from another thread in order to be able to interrupt
110; this one.
111X86AsmSwitchToPoly  PROC FRAME
112    push    rbp                             ; Standard entry sequence
113    push    rbx                             ; Save callee-save registers
114    push    r12
115    push    r13
116    push    r14
117    push    r15
118    push    rdi                             ; Callee save in Windows
119    push    rsi                             ; Strictly, we should also save xmm6
120    .endprolog
121    mov     rbp,rcx                         ; Move argument into rbp - this is definitely non-standard
122    sub     rsp,(Fr_size-56)
123    mov     [ArgVector.SaveCStack+rcx],rsp  ; Save the C stack pointer
124    mov     r15,[ArgVector.LocalMpointer+rbp]
125    mov     rsp,[ArgVector.StackPtr+rbp]
126    movsd   xmm0,[ArgVector.SaveXMM0+rbp]
127    movsd   xmm1,[ArgVector.SaveXMM1+rbp]
128    movsd   xmm2,[ArgVector.SaveXMM2+rbp]
129    movsd   xmm3,[ArgVector.SaveXMM3+rbp]
130    movsd   xmm4,[ArgVector.SaveXMM4+rbp]
131    movsd   xmm5,[ArgVector.SaveXMM5+rbp]
132    movsd   xmm6,[ArgVector.SaveXMM6+rbp]
133    mov     rbx,[ArgVector.SaveRBX+rbp]
134    mov     rcx,[ArgVector.SaveRCX+rbp]
135    mov     rdx,[ArgVector.SaveRDX+rbp]
136    mov     rsi,[ArgVector.SaveRSI+rbp]
137    mov     rdi,[ArgVector.SaveRDI+rbp]
138    mov     r8,[ArgVector.SaveR8+rbp]
139    mov     r9,[ArgVector.SaveR9+rbp]
140    mov     r10,[ArgVector.SaveR10+rbp]
141    mov     r11,[ArgVector.SaveR11+rbp]
142    mov     r12,[ArgVector.SaveR12+rbp]
143    mov     r13,[ArgVector.SaveR13+rbp]
144    mov     r14,[ArgVector.SaveR14+rbp]
145    mov     rax,[ArgVector.SaveRAX+rbp]
146    cld                                     ; Clear this just in case
147#ifdef  POLYML32IN64
148    jmp     qword ptr [rbx+rdx*4]
149#else
150    jmp     qword ptr [rdx]
151#endif
152
153; Everything up to here is considered as part of the X86AsmSwitchToPoly proc
154X86AsmSwitchToPoly ENDP
155
156; Save all the registers and enter the trap handler.
157; It is probably unnecessary to save the FP state now.
158X86TrapHandler PROTO C
159
160CallTrapHandler:
161    mov     [ArgVector.SaveRAX+rbp],rax
162    mov     [ArgVector.SaveRBX+rbp],rbx
163    mov     [ArgVector.SaveRCX+rbp],rcx
164    mov     [ArgVector.SaveRDX+rbp],rdx
165    mov     [ArgVector.SaveRSI+rbp],rsi
166    mov     [ArgVector.SaveRDI+rbp],rdi
167    movsd   [ArgVector.SaveXMM0+rbp],xmm0
168    movsd   [ArgVector.SaveXMM1+rbp],xmm1
169    movsd   [ArgVector.SaveXMM2+rbp],xmm2
170    movsd   [ArgVector.SaveXMM3+rbp],xmm3
171    movsd   [ArgVector.SaveXMM4+rbp],xmm4
172    movsd   [ArgVector.SaveXMM5+rbp],xmm5
173    movsd   [ArgVector.SaveXMM6+rbp],xmm6
174    mov     [ArgVector.SaveR8+rbp],r8
175    mov     [ArgVector.SaveR9+rbp],r9
176    mov     [ArgVector.SaveR10+rbp],r10
177    mov     [ArgVector.SaveR11+rbp],r11
178    mov     [ArgVector.SaveR12+rbp],r12
179    mov     [ArgVector.SaveR13+rbp],r13
180    mov     [ArgVector.SaveR14+rbp],r14
181    mov     [ArgVector.StackPtr+rbp],rsp    ; Save ML stack pointer
182    mov     [ArgVector.LocalMpointer+rbp],r15       ; Save back heap pointer
183    mov     rsp,[ArgVector.SaveCStack+rbp]          ; Restore C stack pointer
184    sub     rsp,32                          ; Create Windows save area
185    mov     rcx,[ArgVector.ThreadId+rbp]
186    call    [ArgVector.TrapHandlerEntry+rbp]
187    add     rsp,32
188    mov     r15,[ArgVector.LocalMpointer+rbp]
189    mov     rsp,[ArgVector.StackPtr+rbp]
190    movsd   xmm0,[ArgVector.SaveXMM0+rbp]
191    movsd   xmm1,[ArgVector.SaveXMM1+rbp]
192    movsd   xmm2,[ArgVector.SaveXMM2+rbp]
193    movsd   xmm3,[ArgVector.SaveXMM3+rbp]
194    movsd   xmm4,[ArgVector.SaveXMM4+rbp]
195    movsd   xmm5,[ArgVector.SaveXMM5+rbp]
196    movsd   xmm6,[ArgVector.SaveXMM6+rbp]
197    mov     rbx,[ArgVector.SaveRBX+rbp]
198    mov     rcx,[ArgVector.SaveRCX+rbp]
199    mov     rdx,[ArgVector.SaveRDX+rbp]
200    mov     rsi,[ArgVector.SaveRSI+rbp]
201    mov     rdi,[ArgVector.SaveRDI+rbp]
202    mov     r8,[ArgVector.SaveR8+rbp]
203    mov     r9,[ArgVector.SaveR9+rbp]
204    mov     r10,[ArgVector.SaveR10+rbp]
205    mov     r11,[ArgVector.SaveR11+rbp]
206    mov     r12,[ArgVector.SaveR12+rbp]
207    mov     r13,[ArgVector.SaveR13+rbp]
208    mov     r14,[ArgVector.SaveR14+rbp]
209    mov     rax,[ArgVector.ExceptionPacket+rbp]
210    cmp     rax,1                                           ; Did we raise an exception?
211    jnz     raisexcept
212    mov     rax,[ArgVector.SaveRAX+rbp]
213    cld                                     ; Clear this just in case
214    ret
215
216raisexcept:
217    mov     rcx,[ArgVector.HandlerRegister+rbp]
218    jmp     qword ptr [rcx]
219
220
221; This implements atomic subtraction in the same way as atomic_decrement
222PUBLIC  X86AsmAtomicDecrement
223X86AsmAtomicDecrement:
224    mov     rax,rcx
225; Use rcx and rax because they are volatile (unlike rbx on X86/64/Unix)
226    mov     rcx,-2
227#ifdef  POLYML32IN64
228    lock xadd [rax],ecx     ;# Rax is an absolute address but this is only a word
229#else
230    lock xadd [rax],rcx
231#endif
232    sub     rcx,2
233    mov     rax,rcx
234    ret
235
236CREATE_EXTRA_CALL MACRO index
237PUBLIC  X86AsmCallExtra&index&
238X86AsmCallExtra&index&:
239    CALL_EXTRA index
240    ENDM
241
242CREATE_EXTRA_CALL RETURN_HEAP_OVERFLOW
243CREATE_EXTRA_CALL RETURN_STACK_OVERFLOW
244CREATE_EXTRA_CALL RETURN_STACK_OVERFLOWEX
245
246END
247