1/* 2 Title: Assembly code routines for the poly system. 3 Author: David Matthews 4 Copyright (c) David C. J. Matthews 2000-2020 5 6 This library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Lesser General Public 8 License version 2.1 as published by the Free Software Foundation. 9 10 This library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with this library; if not, write to the Free Software 17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18*/ 19 20/* 21 This is the 64-bit gas version of the assembly code file. 22 There are separate versions of 32/64 and MAMS (Intel syntax) and 23 and GCC (gas syntax). 24*/ 25 26/* 27 Registers used :- 28 29 %rax: First argument to function. Result of function call. 30 %rbx: Second argument to function. 31 %rcx: General register 32 %rdx: Closure pointer in call. 33 %rbp: Points to memory used for extra registers 34 %rsi: General register. 35 %rdi: General register. 36 %rsp: Stack pointer. 37 %r8: Third argument to function 38 %r9: Fourth argument to function 39 %r10: Fifth argument to function 40 %r11: General register 41 %r12: General register 42 %r13: General register 43 %r14: General register 44 %r15: Memory allocation pointer 45*/ 46 47 48#include "config.h" 49#ifdef SYMBOLS_REQUIRE_UNDERSCORE 50#define EXTNAME(x) _##x 51#else 52#define EXTNAME(x) x 53#endif 54 55#ifdef __CYGWIN__ 56#define _WIN32 1 57#endif 58 59/* Macro to begin the hand-coded functions */ 60#ifdef MACOSX 61#define GLOBAL .globl 62#else 63#define GLOBAL .global 64#endif 65 66#define INLINE_ROUTINE(id) \ 67GLOBAL EXTNAME(id); \ 68EXTNAME(id): 69 70/* Extra entries on the C stack */ 71#define Fr_Size 64 /* Must be multiple of 16 to get alignment correct */ 72 73/* This is the argument vector passed in to X86AsmSwitchToPoly 74 It is used to initialise the frame. A few values are updated 75 when ML returns. */ 76#define Arg_LocalMpointer 0x0 77#define Arg_HandlerRegister 0x8 78#define Arg_LocalMbottom 0x10 79#define Arg_StackLimit 0x18 80#define Arg_ExceptionPacket 0x20 /* Address of packet to raise */ 81#define Arg_RequestCode 0x28 /* Byte: Io function to call. */ 82#define Arg_ReturnReason 0x2a /* Byte: Reason for returning from ML. */ 83#define Arg_UnusedRestore 0x2b /* Byte: Full/partial restore */ 84#define Arg_SaveCStack 0x30 /* Current stack base */ 85#define Arg_ThreadId 0x38 /* My thread id */ 86#define Arg_StackPtr 0x40 /* Stack Pointer */ 87#define Arg_TrapHandlerEntry 0x68 88#define Arg_SaveRAX 0x70 89#define Arg_SaveRBX 0x78 90#define Arg_SaveRCX 0x80 91#define Arg_SaveRDX 0x88 92#define Arg_SaveRSI 0x90 93#define Arg_SaveRDI 0x98 94#define Arg_SaveR8 0xa0 95#define Arg_SaveR9 0xa8 96#define Arg_SaveR10 0xb0 97#define Arg_SaveR11 0xb8 98#define Arg_SaveR12 0xc0 99#define Arg_SaveR13 0xc8 100#define Arg_SaveR14 0xd0 101#define Arg_SaveXMM0 0xd8 102#define Arg_SaveXMM1 0xe0 103#define Arg_SaveXMM2 0xe8 104#define Arg_SaveXMM3 0xf0 105#define Arg_SaveXMM4 0xf8 106#define Arg_SaveXMM5 0x100 107#define Arg_SaveXMM6 0x108 108 109/* IO function numbers. These are functions that are called 110 to handle special cases in this code */ 111#include "sys.h" 112 113#define RETURN_HEAP_OVERFLOW 1 114#define RETURN_STACK_OVERFLOW 2 115#define RETURN_STACK_OVERFLOWEX 3 116#define RETURN_RAISE_OVERFLOW 8 117 118# Mark the stack as non-executable when supported 119#if (defined(__linux__) && defined(__ELF__)) 120.section .note.GNU-stack, "", @progbits 121#endif 122 123# 124# CODE STARTS HERE 125# 126 .text 127 128#define CALL_EXTRA(index) \ 129 movb $index,Arg_ReturnReason(%rbp); \ 130 jmp CallTrapHandler; 131 132 133/* Enter ML code. This is now only ever used to start a new thread. 134 It is probably unnecessary to save the callee-save regs or load the ML regs. */ 135INLINE_ROUTINE(X86AsmSwitchToPoly) 136 pushq %rbp # Standard entry sequence 137/* If we're compiling with Mingw we're using Windows calling conventions. */ 138#ifdef _WIN32 139 movq %rcx,%rbp # Argument is in %rcx 140#else 141 movq %rdi,%rbp # Argument is in %rdi 142#endif 143 pushq %rbx 144 pushq %r12 145 pushq %r13 146 pushq %r14 147 pushq %r15 148#ifdef _WIN32 149 pushq %rdi # Callee save in Windows 150 pushq %rsi 151 subq $(Fr_Size-56),%rsp # Argument is already in %rcx 152#else 153 subq $(Fr_Size-40),%rsp 154#endif 155 movq %rsp,Arg_SaveCStack(%rbp) 156 157 movq Arg_LocalMpointer(%rbp),%r15 158 movq Arg_StackPtr(%rbp),%rsp # Set the new stack ptr 159 160 movsd Arg_SaveXMM0(%rbp),%xmm0 # Load the registers 161 movsd Arg_SaveXMM1(%rbp),%xmm1 162 movsd Arg_SaveXMM2(%rbp),%xmm2 163 movsd Arg_SaveXMM3(%rbp),%xmm3 164 movsd Arg_SaveXMM4(%rbp),%xmm4 165 movsd Arg_SaveXMM5(%rbp),%xmm5 166 movsd Arg_SaveXMM6(%rbp),%xmm6 167 movq Arg_SaveRBX(%rbp),%rbx 168 movq Arg_SaveRCX(%rbp),%rcx 169 movq Arg_SaveRDX(%rbp),%rdx 170 movq Arg_SaveRSI(%rbp),%rsi 171 movq Arg_SaveRDI(%rbp),%rdi 172 movq Arg_SaveR8(%rbp),%r8 173 movq Arg_SaveR9(%rbp),%r9 174 movq Arg_SaveR10(%rbp),%r10 175 movq Arg_SaveR11(%rbp),%r11 176 movq Arg_SaveR12(%rbp),%r12 177 movq Arg_SaveR13(%rbp),%r13 178 movq Arg_SaveR14(%rbp),%r14 179 movq Arg_SaveRAX(%rbp),%rax 180 cld # Clear this just in case 181#ifdef POLYML32IN64 182 jmp *(%rbx,%rdx,4) 183#else 184 jmp *(%rdx) 185#endif 186 187/* This is exactly the same as raisex but seems to be needed to work round a PIC problem. */ 188raisexLocal: 189 movq Arg_HandlerRegister(%rbp),%rcx # Get next handler into %rcx 190 jmp *(%rcx) 191 192/* Save all the registers and enter the trap handler. 193 It is probably unnecessary to save the FP state now. */ 194CallTrapHandler: 195 movq %rax,Arg_SaveRAX(%rbp) 196 movq %rbx,Arg_SaveRBX(%rbp) 197 movq %rcx,Arg_SaveRCX(%rbp) 198 movq %rdx,Arg_SaveRDX(%rbp) 199 movq %rsi,Arg_SaveRSI(%rbp) 200 movq %rdi,Arg_SaveRDI(%rbp) 201 movsd %xmm0,Arg_SaveXMM0(%rbp) 202 movsd %xmm1,Arg_SaveXMM1(%rbp) 203 movsd %xmm2,Arg_SaveXMM2(%rbp) 204 movsd %xmm3,Arg_SaveXMM3(%rbp) 205 movsd %xmm4,Arg_SaveXMM4(%rbp) 206 movsd %xmm5,Arg_SaveXMM5(%rbp) 207 movsd %xmm6,Arg_SaveXMM6(%rbp) 208 movq %r8,Arg_SaveR8(%rbp) 209 movq %r9,Arg_SaveR9(%rbp) 210 movq %r10,Arg_SaveR10(%rbp) 211 movq %r11,Arg_SaveR11(%rbp) 212 movq %r12,Arg_SaveR12(%rbp) 213 movq %r13,Arg_SaveR13(%rbp) 214 movq %r14,Arg_SaveR14(%rbp) 215 movq %rsp,Arg_StackPtr(%rbp) 216 movq %r15,Arg_LocalMpointer(%rbp) # Save back heap pointer 217 movq Arg_SaveCStack(%rbp),%rsp # Restore C stack pointer 218#ifdef _WIN32 219 subq $32,%rsp # Windows save area 220 movq Arg_ThreadId(%rbp),%rcx 221#else 222 movq Arg_ThreadId(%rbp),%rdi 223#endif 224 callq *Arg_TrapHandlerEntry(%rbp) 225#ifdef _WIN32 226 addq $32,%rsp 227#endif 228 movq Arg_LocalMpointer(%rbp),%r15 229 movq Arg_StackPtr(%rbp),%rsp # Set the new stack ptr 230 movsd Arg_SaveXMM0(%rbp),%xmm0 # Load the registers 231 movsd Arg_SaveXMM1(%rbp),%xmm1 232 movsd Arg_SaveXMM2(%rbp),%xmm2 233 movsd Arg_SaveXMM3(%rbp),%xmm3 234 movsd Arg_SaveXMM4(%rbp),%xmm4 235 movsd Arg_SaveXMM5(%rbp),%xmm5 236 movsd Arg_SaveXMM6(%rbp),%xmm6 237 movq Arg_SaveRBX(%rbp),%rbx 238 movq Arg_SaveRCX(%rbp),%rcx 239 movq Arg_SaveRDX(%rbp),%rdx 240 movq Arg_SaveRSI(%rbp),%rsi 241 movq Arg_SaveRDI(%rbp),%rdi 242 movq Arg_SaveR8(%rbp),%r8 243 movq Arg_SaveR9(%rbp),%r9 244 movq Arg_SaveR10(%rbp),%r10 245 movq Arg_SaveR11(%rbp),%r11 246 movq Arg_SaveR12(%rbp),%r12 247 movq Arg_SaveR13(%rbp),%r13 248 movq Arg_SaveR14(%rbp),%r14 249 movq Arg_ExceptionPacket(%rbp),%rax 250 cmpq $1,%rax # Did we raise an exception? 251 jnz raisexLocal 252 movq Arg_SaveRAX(%rbp),%rax 253 cld # Clear this just in case 254 ret 255 256# Additional assembly code routines 257 258INLINE_ROUTINE(X86AsmCallExtraRETURN_HEAP_OVERFLOW) 259 CALL_EXTRA(RETURN_HEAP_OVERFLOW) 260 261INLINE_ROUTINE(X86AsmCallExtraRETURN_STACK_OVERFLOW) 262 CALL_EXTRA(RETURN_STACK_OVERFLOW) 263 264INLINE_ROUTINE(X86AsmCallExtraRETURN_STACK_OVERFLOWEX) 265 CALL_EXTRA(RETURN_STACK_OVERFLOWEX) 266 267# This implements atomic subtraction in the same way as atomic_decrement 268INLINE_ROUTINE(X86AsmAtomicDecrement) 269#ifdef _WIN32 270 movq %rcx,%rax # On Windows the argument is passed in %rcx 271#else 272 movq %rdi,%rax # On X86_64 the argument is passed in %rdi 273#endif 274# Use %rcx and %rax because they are volatile (unlike %rbx on X86/64/Unix) 275 movq $-2,%rcx 276#ifdef POLYML32IN64 277 lock xaddl %ecx,(%rax) # Rax is an absolute address but this is only a word 278#else 279 lock xaddq %rcx,(%rax) 280#endif 281 subq $2,%rcx 282 movq %rcx,%rax 283 ret 284 285