1/*
2   Title:  Assembly code routines for the poly system.
3   Author:    David Matthews
4   Copyright (c) David C. J. Matthews 2000-2020
5
6   This library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License version 2.1 as published by the Free Software Foundation.
9
10   This library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with this library; if not, write to the Free Software
17   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18*/
19
20/*
21   This is the 64-bit gas version of the assembly code file.
22   There are separate versions of 32/64 and MAMS (Intel syntax) and
23   and GCC (gas syntax).
24*/
25
26/*
27 Registers used :-
28
29  %rax: First argument to function.  Result of function call.
30  %rbx: Second argument to function.
31  %rcx: General register
32  %rdx: Closure pointer in call.
33  %rbp: Points to memory used for extra registers
34  %rsi: General register.
35  %rdi: General register.
36  %rsp: Stack pointer.
37  %r8:   Third argument to function
38  %r9:   Fourth argument to function
39  %r10:  Fifth argument to function
40  %r11:  General register
41  %r12:  General register
42  %r13:  General register
43  %r14:  General register
44  %r15:  Memory allocation pointer
45*/
46
47
48#include "config.h"
49#ifdef SYMBOLS_REQUIRE_UNDERSCORE
50#define EXTNAME(x)  _##x
51#else
52#define EXTNAME(x)  x
53#endif
54
55#ifdef __CYGWIN__
56#define _WIN32 1
57#endif
58
59/* Macro to begin the hand-coded functions */
60#ifdef MACOSX
61#define GLOBAL .globl
62#else
63#define GLOBAL .global
64#endif
65
66#define INLINE_ROUTINE(id) \
67GLOBAL EXTNAME(id); \
68EXTNAME(id):
69
70/* Extra entries on the C stack */
71#define Fr_Size                 64         /* Must be multiple of 16 to get alignment correct */
72
73/* This is the argument vector passed in to X86AsmSwitchToPoly
74   It is used to initialise the frame.  A few values are updated
75   when ML returns. */
76#define Arg_LocalMpointer       0x0
77#define Arg_HandlerRegister     0x8
78#define Arg_LocalMbottom        0x10
79#define Arg_StackLimit          0x18
80#define Arg_ExceptionPacket     0x20  /* Address of packet to raise */
81#define Arg_RequestCode         0x28  /* Byte: Io function to call. */
82#define Arg_ReturnReason        0x2a  /* Byte: Reason for returning from ML. */
83#define Arg_UnusedRestore       0x2b  /* Byte: Full/partial restore */
84#define Arg_SaveCStack          0x30  /* Current stack base */
85#define Arg_ThreadId            0x38  /* My thread id */
86#define Arg_StackPtr            0x40  /* Stack Pointer */
87#define Arg_TrapHandlerEntry    0x68
88#define Arg_SaveRAX             0x70
89#define Arg_SaveRBX             0x78
90#define Arg_SaveRCX             0x80
91#define Arg_SaveRDX             0x88
92#define Arg_SaveRSI             0x90
93#define Arg_SaveRDI             0x98
94#define Arg_SaveR8              0xa0
95#define Arg_SaveR9              0xa8
96#define Arg_SaveR10             0xb0
97#define Arg_SaveR11             0xb8
98#define Arg_SaveR12             0xc0
99#define Arg_SaveR13             0xc8
100#define Arg_SaveR14             0xd0
101#define Arg_SaveXMM0            0xd8
102#define Arg_SaveXMM1            0xe0
103#define Arg_SaveXMM2            0xe8
104#define Arg_SaveXMM3            0xf0
105#define Arg_SaveXMM4            0xf8
106#define Arg_SaveXMM5            0x100
107#define Arg_SaveXMM6            0x108
108
109/* IO function numbers.  These are functions that are called
110   to handle special cases in this code */
111#include "sys.h"
112
113#define RETURN_HEAP_OVERFLOW        1
114#define RETURN_STACK_OVERFLOW       2
115#define RETURN_STACK_OVERFLOWEX     3
116#define RETURN_RAISE_OVERFLOW       8
117
118# Mark the stack as non-executable when supported
119#if (defined(__linux__) && defined(__ELF__))
120.section .note.GNU-stack, "", @progbits
121#endif
122
123#
124# CODE STARTS HERE
125#
126    .text
127
128#define CALL_EXTRA(index) \
129        movb  $index,Arg_ReturnReason(%rbp); \
130        jmp   CallTrapHandler;
131
132
133/* Enter ML code.  This is now only ever used to start a new thread.
134   It is probably unnecessary to save the callee-save regs or load the ML regs. */
135INLINE_ROUTINE(X86AsmSwitchToPoly)
136    pushq   %rbp                            # Standard entry sequence
137/* If we're compiling with Mingw we're using Windows calling conventions. */
138#ifdef _WIN32
139    movq    %rcx,%rbp                       # Argument is in %rcx
140#else
141    movq    %rdi,%rbp                       # Argument is in %rdi
142#endif
143    pushq   %rbx
144    pushq   %r12
145    pushq   %r13
146    pushq   %r14
147    pushq   %r15
148#ifdef _WIN32
149    pushq   %rdi                            # Callee save in Windows
150    pushq   %rsi
151    subq    $(Fr_Size-56),%rsp              # Argument is already in %rcx
152#else
153    subq    $(Fr_Size-40),%rsp
154#endif
155    movq    %rsp,Arg_SaveCStack(%rbp)
156
157    movq    Arg_LocalMpointer(%rbp),%r15
158    movq    Arg_StackPtr(%rbp),%rsp               # Set the new stack ptr
159
160    movsd   Arg_SaveXMM0(%rbp),%xmm0              # Load the registers
161    movsd   Arg_SaveXMM1(%rbp),%xmm1
162    movsd   Arg_SaveXMM2(%rbp),%xmm2
163    movsd   Arg_SaveXMM3(%rbp),%xmm3
164    movsd   Arg_SaveXMM4(%rbp),%xmm4
165    movsd   Arg_SaveXMM5(%rbp),%xmm5
166    movsd   Arg_SaveXMM6(%rbp),%xmm6
167    movq    Arg_SaveRBX(%rbp),%rbx
168    movq    Arg_SaveRCX(%rbp),%rcx
169    movq    Arg_SaveRDX(%rbp),%rdx
170    movq    Arg_SaveRSI(%rbp),%rsi
171    movq    Arg_SaveRDI(%rbp),%rdi
172    movq    Arg_SaveR8(%rbp),%r8
173    movq    Arg_SaveR9(%rbp),%r9
174    movq    Arg_SaveR10(%rbp),%r10
175    movq    Arg_SaveR11(%rbp),%r11
176    movq    Arg_SaveR12(%rbp),%r12
177    movq    Arg_SaveR13(%rbp),%r13
178    movq    Arg_SaveR14(%rbp),%r14
179    movq    Arg_SaveRAX(%rbp),%rax
180    cld                                     # Clear this just in case
181#ifdef POLYML32IN64
182    jmp     *(%rbx,%rdx,4)
183#else
184    jmp     *(%rdx)
185#endif
186
187/* This is exactly the same as raisex but seems to be needed to work round a PIC problem. */
188raisexLocal:
189    movq    Arg_HandlerRegister(%rbp),%rcx    # Get next handler into %rcx
190    jmp     *(%rcx)
191
192/* Save all the registers and enter the trap handler.
193   It is probably unnecessary to save the FP state now. */
194CallTrapHandler:
195    movq    %rax,Arg_SaveRAX(%rbp)
196    movq    %rbx,Arg_SaveRBX(%rbp)
197    movq    %rcx,Arg_SaveRCX(%rbp)
198    movq    %rdx,Arg_SaveRDX(%rbp)
199    movq    %rsi,Arg_SaveRSI(%rbp)
200    movq    %rdi,Arg_SaveRDI(%rbp)
201    movsd   %xmm0,Arg_SaveXMM0(%rbp)
202    movsd   %xmm1,Arg_SaveXMM1(%rbp)
203    movsd   %xmm2,Arg_SaveXMM2(%rbp)
204    movsd   %xmm3,Arg_SaveXMM3(%rbp)
205    movsd   %xmm4,Arg_SaveXMM4(%rbp)
206    movsd   %xmm5,Arg_SaveXMM5(%rbp)
207    movsd   %xmm6,Arg_SaveXMM6(%rbp)
208    movq    %r8,Arg_SaveR8(%rbp)
209    movq    %r9,Arg_SaveR9(%rbp)
210    movq    %r10,Arg_SaveR10(%rbp)
211    movq    %r11,Arg_SaveR11(%rbp)
212    movq    %r12,Arg_SaveR12(%rbp)
213    movq    %r13,Arg_SaveR13(%rbp)
214    movq    %r14,Arg_SaveR14(%rbp)
215    movq    %rsp,Arg_StackPtr(%rbp)
216    movq    %r15,Arg_LocalMpointer(%rbp)        # Save back heap pointer
217    movq    Arg_SaveCStack(%rbp),%rsp           # Restore C stack pointer
218#ifdef _WIN32
219    subq    $32,%rsp                            # Windows save area
220    movq    Arg_ThreadId(%rbp),%rcx
221#else
222    movq    Arg_ThreadId(%rbp),%rdi
223#endif
224    callq   *Arg_TrapHandlerEntry(%rbp)
225#ifdef _WIN32
226    addq    $32,%rsp
227#endif
228    movq    Arg_LocalMpointer(%rbp),%r15
229    movq    Arg_StackPtr(%rbp),%rsp               # Set the new stack ptr
230    movsd   Arg_SaveXMM0(%rbp),%xmm0              # Load the registers
231    movsd   Arg_SaveXMM1(%rbp),%xmm1
232    movsd   Arg_SaveXMM2(%rbp),%xmm2
233    movsd   Arg_SaveXMM3(%rbp),%xmm3
234    movsd   Arg_SaveXMM4(%rbp),%xmm4
235    movsd   Arg_SaveXMM5(%rbp),%xmm5
236    movsd   Arg_SaveXMM6(%rbp),%xmm6
237    movq    Arg_SaveRBX(%rbp),%rbx
238    movq    Arg_SaveRCX(%rbp),%rcx
239    movq    Arg_SaveRDX(%rbp),%rdx
240    movq    Arg_SaveRSI(%rbp),%rsi
241    movq    Arg_SaveRDI(%rbp),%rdi
242    movq    Arg_SaveR8(%rbp),%r8
243    movq    Arg_SaveR9(%rbp),%r9
244    movq    Arg_SaveR10(%rbp),%r10
245    movq    Arg_SaveR11(%rbp),%r11
246    movq    Arg_SaveR12(%rbp),%r12
247    movq    Arg_SaveR13(%rbp),%r13
248    movq    Arg_SaveR14(%rbp),%r14
249    movq    Arg_ExceptionPacket(%rbp),%rax
250    cmpq    $1,%rax                             # Did we raise an exception?
251    jnz     raisexLocal
252    movq    Arg_SaveRAX(%rbp),%rax
253    cld                                     # Clear this just in case
254    ret
255
256# Additional assembly code routines
257
258INLINE_ROUTINE(X86AsmCallExtraRETURN_HEAP_OVERFLOW)
259    CALL_EXTRA(RETURN_HEAP_OVERFLOW)
260
261INLINE_ROUTINE(X86AsmCallExtraRETURN_STACK_OVERFLOW)
262    CALL_EXTRA(RETURN_STACK_OVERFLOW)
263
264INLINE_ROUTINE(X86AsmCallExtraRETURN_STACK_OVERFLOWEX)
265    CALL_EXTRA(RETURN_STACK_OVERFLOWEX)
266
267# This implements atomic subtraction in the same way as atomic_decrement
268INLINE_ROUTINE(X86AsmAtomicDecrement)
269#ifdef _WIN32
270    movq    %rcx,%rax       # On Windows the argument is passed in %rcx
271#else
272    movq    %rdi,%rax   # On X86_64 the argument is passed in %rdi
273#endif
274# Use %rcx and %rax because they are volatile (unlike %rbx on X86/64/Unix)
275    movq    $-2,%rcx
276#ifdef POLYML32IN64
277    lock xaddl %ecx,(%rax)      # Rax is an absolute address but this is only a word
278#else
279    lock xaddq %rcx,(%rax)
280#endif
281    subq    $2,%rcx
282    movq    %rcx,%rax
283    ret
284
285