1/* $NetBSD: startprog64.S,v 1.5 2023/06/24 05:31:04 msaitoh Exp $ */ 2/* NetBSD: startprog.S,v 1.3 2003/02/01 14:48:18 dsl Exp */ 3 4/* starts program in protected mode / flat space 5 with given stackframe 6 needs global variables flatcodeseg and flatdataseg 7 (gdt offsets) 8 derived from: NetBSD:sys/arch/i386/boot/asm.S 9 */ 10 11/* 12 * Ported to boot 386BSD by Julian Elischer (julian@tfs.com) Sept 1992 13 * 14 * Mach Operating System 15 * Copyright (c) 1992, 1991 Carnegie Mellon University 16 * All Rights Reserved. 17 * 18 * Permission to use, copy, modify and distribute this software and its 19 * documentation is hereby granted, provided that both the copyright 20 * notice and this permission notice appear in all copies of the 21 * software, derivative works or modified versions, and any portions 22 * thereof, and that both notices appear in supporting documentation. 23 * 24 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 25 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 26 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 27 * 28 * Carnegie Mellon requests users of this software to return to 29 * 30 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 31 * School of Computer Science 32 * Carnegie Mellon University 33 * Pittsburgh PA 15213-3890 34 * 35 * any improvements or extensions that they make and grant Carnegie Mellon 36 * the rights to redistribute these changes. 37 */ 38 39/* 40 Copyright 1988, 1989, 1990, 1991, 1992 41 by Intel Corporation, Santa Clara, California. 42 43 All Rights Reserved 44 45Permission to use, copy, modify, and distribute this software and 46its documentation for any purpose and without fee is hereby 47granted, provided that the above copyright notice appears in all 48copies and that both the copyright notice and this permission notice 49appear in supporting documentation, and that the name of Intel 50not be used in advertising or publicity pertaining to distribution 51of the software without specific, written prior permission. 52 53INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE 54INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, 55IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR 56CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 57LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, 58NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION 59WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 60*/ 61 62#include <machine/asm.h> 63#include <machine/specialreg.h> 64 65#define CODE_SEGMENT 0x08 66#define DATA_SEGMENT 0x10 67 68 .align 16 69 .globl _C_LABEL(startprog64) 70_C_LABEL(startprog64): 71 .quad 0 72 73 .globl _C_LABEL(startprog64_size) 74_C_LABEL(startprog64_size): 75 .long startprog64_end - _C_LABEL(startprog64_start) 76 77 .text 78 .p2align 4,,15 79 80/* 81 * startprog64(loaddr,entry,stack,kern_load,kern_start,kern_size) 82 */ 83ENTRY(startprog64_start) 84start: 85 /* 86 * This function is to call the loaded kernel's start() with 87 * 32bit segment mode from x64 mode. 88 * %rdi: kernel start address 89 * %rsi: loaded kernel address 90 * %rdx: stack address 91 * %rcx: loaded kernel size 92 * %r8 : loaded start address 93 * %r9 : kernel entry address 94 */ 95 96 cld /* LynxOS depends on it */ 97 98 cli 99 100 /* skip copy if same source and destination */ 101 cmpq %rdi,%rsi 102 jz .Lcopy_done 103 104 /* Copy kernel */ 105 mov %rcx, %r12 /* original kernel size */ 106 movq %rdi, %r11 /* for misaligned check */ 107 108#if !defined(NO_OVERLAP) 109 movq %rdi, %r13 110 subq %rsi, %r13 111#endif 112 113 shrq $3, %rcx /* count for copy by words */ 114 jz 8f /* j if less than 8 bytes */ 115 116 lea -8(%rdi, %r12), %r14 /* target address of last 8 */ 117 mov -8(%rsi, %r12), %r15 /* get last word */ 118#if !defined(NO_OVERLAP) 119 cmpq %r12, %r13 /* overlapping? */ 120 jb 10f 121#endif 122 123/* 124 * Non-overlaping, copy forwards. 125 * Newer Intel cpus (Nehalem) will do 16byte read/write transfers 126 * if %ecx is more than 76. 127 * AMD might do something similar some day. 128 */ 129 and $7, %r11 /* destination misaligned ? */ 130 jnz 2f 131 rep 132 movsq 133 mov %r15, (%r14) /* write last word */ 134 jmp .Lcopy_done 135 136/* 137 * Destination misaligned 138 * AMD say it is better to align the destination (not the source). 139 * This will also re-align copies if the source and dest are both 140 * misaligned by the same amount) 141 * (I think Nehalem will use its accelerated copy if the source 142 * and destination have the same alignment.) 143 */ 1442: 145 lea -9(%r11, %r12), %rcx /* post re-alignment count */ 146 neg %r11 /* now -1 .. -7 */ 147 mov (%rsi), %r12 /* get first word */ 148 mov %rdi, %r13 /* target for first word */ 149 lea 8(%rsi, %r11), %rsi 150 lea 8(%rdi, %r11), %rdi 151 shr $3, %rcx 152 rep 153 movsq 154 mov %r12, (%r13) /* write first word */ 155 mov %r15, (%r14) /* write last word */ 156 jmp .Lcopy_done 157 158#if !defined(NO_OVERLAP) 159/* Must copy backwards. 160 * Reverse copy is probably easy to code faster than 'rep movds' 161 * since that requires (IIRC) an extra clock every 3 iterations (AMD). 162 * However I don't suppose anything cares that much! 163 * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4. 164 * The copy is aligned with the buffer start (more likely to 165 * be a multiple of 8 than the end). 166 */ 16710: 168 lea -8(%rsi, %rcx, 8), %rsi 169 lea -8(%rdi, %rcx, 8), %rdi 170 std 171 rep 172 movsq 173 cld 174 mov %r15, (%r14) /* write last bytes */ 175 jmp .Lcopy_done 176#endif 177 178/* Less than 8 bytes to copy, copy by bytes */ 179/* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks). 180 * For longer transfers it is 50+ ! 181 */ 1828: mov %r12, %rcx 183 184#if !defined(NO_OVERLAP) 185 cmpq %r12, %r13 /* overlapping? */ 186 jb 81f 187#endif 188 189 /* nope, copy forwards. */ 190 rep 191 movsb 192 jmp .Lcopy_done 193 194#if !defined(NO_OVERLAP) 195/* Must copy backwards */ 19681: 197 lea -1(%rsi, %rcx), %rsi 198 lea -1(%rdi, %rcx), %rdi 199 std 200 rep 201 movsb 202 cld 203#endif 204 /* End of copy kernel */ 205.Lcopy_done: 206 207 mov %r8, %rdi /* %rdi: loaded start address */ 208 mov %r9, %rsi /* %rsi: kernel entry address */ 209 210 /* Prepare jump address */ 211 lea (start32a - start)(%rdi), %rax 212 movl %eax, (start32r - start)(%rdi) 213 214 /* Setup GDT */ 215 lea (gdt - start)(%rdi), %rax 216 mov %rax, (gdtrr - start)(%rdi) 217 lgdt (gdtr - start)(%rdi) 218 219 /* Jump to set %cs */ 220 ljmp *(start32r - start)(%rdi) 221 222 .align 4 223 .code32 224start32a: 225 movl $DATA_SEGMENT, %eax 226 movw %ax, %ds 227 movw %ax, %es 228 movw %ax, %fs 229 movw %ax, %gs 230 movw %ax, %ss 231 232 movl %edx, %esp 233 234 /* Disable Paging in CR0 */ 235 movl %cr0, %eax 236 andl $(~CR0_PG), %eax 237 movl %eax, %cr0 238 239 /* Disable PAE in CR4 */ 240 movl %cr4, %eax 241 andl $(~CR4_PAE), %eax 242 movl %eax, %cr4 243 244 jmp start32b 245 246 .align 4 247start32b: 248 xor %eax, %eax 249 call *%esi 250 251 .align 16 252start32r: 253 .long 0 254 .long CODE_SEGMENT 255 .align 16 256gdt: 257 .long 0, 0 258 .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00 259 .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00 260gdtr: 261 .word gdtr - gdt 262gdtrr: 263 .quad 264start32end: 265 /* Space for the stack */ 266 .align 16 267 .space 8192 268startprog64_end: 269