1/* 2 * linux/arch/i386/kernel/sysenter.c 3 * 4 * (C) Copyright 2002 Linus Torvalds 5 * Portions based on the vdso-randomization code from exec-shield: 6 * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar 7 * 8 * This file contains the needed initializations to support sysenter. 9 */ 10 11#include <linux/init.h> 12#include <linux/smp.h> 13#include <linux/thread_info.h> 14#include <linux/sched.h> 15#include <linux/gfp.h> 16#include <linux/string.h> 17#include <linux/elf.h> 18#include <linux/mm.h> 19#include <linux/module.h> 20 21#include <asm/cpufeature.h> 22#include <asm/msr.h> 23#include <asm/pgtable.h> 24#include <asm/unistd.h> 25#include <asm/elf.h> 26#include <asm/tlbflush.h> 27 28enum { 29 VDSO_DISABLED = 0, 30 VDSO_ENABLED = 1, 31 VDSO_COMPAT = 2, 32}; 33 34#ifdef CONFIG_COMPAT_VDSO 35#define VDSO_DEFAULT VDSO_COMPAT 36#else 37#define VDSO_DEFAULT VDSO_ENABLED 38#endif 39 40/* 41 * Should the kernel map a VDSO page into processes and pass its 42 * address down to glibc upon exec()? 43 */ 44unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT; 45 46EXPORT_SYMBOL_GPL(vdso_enabled); 47 48static int __init vdso_setup(char *s) 49{ 50 vdso_enabled = simple_strtoul(s, NULL, 0); 51 52 return 1; 53} 54 55__setup("vdso=", vdso_setup); 56 57extern asmlinkage void sysenter_entry(void); 58 59static __init void reloc_symtab(Elf32_Ehdr *ehdr, 60 unsigned offset, unsigned size) 61{ 62 Elf32_Sym *sym = (void *)ehdr + offset; 63 unsigned nsym = size / sizeof(*sym); 64 unsigned i; 65 66 for(i = 0; i < nsym; i++, sym++) { 67 if (sym->st_shndx == SHN_UNDEF || 68 sym->st_shndx == SHN_ABS) 69 continue; /* skip */ 70 71 if (sym->st_shndx > SHN_LORESERVE) { 72 printk(KERN_INFO "VDSO: unexpected st_shndx %x\n", 73 sym->st_shndx); 74 continue; 75 } 76 77 switch(ELF_ST_TYPE(sym->st_info)) { 78 case STT_OBJECT: 79 case STT_FUNC: 80 case STT_SECTION: 81 case STT_FILE: 82 sym->st_value += VDSO_HIGH_BASE; 83 } 84 } 85} 86 87static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) 88{ 89 Elf32_Dyn *dyn = (void *)ehdr + offset; 90 91 for(; dyn->d_tag != DT_NULL; dyn++) 92 switch(dyn->d_tag) { 93 case DT_PLTGOT: 94 case DT_HASH: 95 case DT_STRTAB: 96 case DT_SYMTAB: 97 case DT_RELA: 98 case DT_INIT: 99 case DT_FINI: 100 case DT_REL: 101 case DT_DEBUG: 102 case DT_JMPREL: 103 case DT_VERSYM: 104 case DT_VERDEF: 105 case DT_VERNEED: 106 case DT_ADDRRNGLO ... DT_ADDRRNGHI: 107 /* definitely pointers needing relocation */ 108 dyn->d_un.d_ptr += VDSO_HIGH_BASE; 109 break; 110 111 case DT_ENCODING ... OLD_DT_LOOS-1: 112 case DT_LOOS ... DT_HIOS-1: 113 /* Tags above DT_ENCODING are pointers if 114 they're even */ 115 if (dyn->d_tag >= DT_ENCODING && 116 (dyn->d_tag & 1) == 0) 117 dyn->d_un.d_ptr += VDSO_HIGH_BASE; 118 break; 119 120 case DT_VERDEFNUM: 121 case DT_VERNEEDNUM: 122 case DT_FLAGS_1: 123 case DT_RELACOUNT: 124 case DT_RELCOUNT: 125 case DT_VALRNGLO ... DT_VALRNGHI: 126 /* definitely not pointers */ 127 break; 128 129 case OLD_DT_LOOS ... DT_LOOS-1: 130 case DT_HIOS ... DT_VALRNGLO-1: 131 default: 132 if (dyn->d_tag > DT_ENCODING) 133 printk(KERN_INFO "VDSO: unexpected DT_tag %x\n", 134 dyn->d_tag); 135 break; 136 } 137} 138 139static __init void relocate_vdso(Elf32_Ehdr *ehdr) 140{ 141 Elf32_Phdr *phdr; 142 Elf32_Shdr *shdr; 143 int i; 144 145 BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 || 146 !elf_check_arch(ehdr) || 147 ehdr->e_type != ET_DYN); 148 149 ehdr->e_entry += VDSO_HIGH_BASE; 150 151 /* rebase phdrs */ 152 phdr = (void *)ehdr + ehdr->e_phoff; 153 for (i = 0; i < ehdr->e_phnum; i++) { 154 phdr[i].p_vaddr += VDSO_HIGH_BASE; 155 156 /* relocate dynamic stuff */ 157 if (phdr[i].p_type == PT_DYNAMIC) 158 reloc_dyn(ehdr, phdr[i].p_offset); 159 } 160 161 /* rebase sections */ 162 shdr = (void *)ehdr + ehdr->e_shoff; 163 for(i = 0; i < ehdr->e_shnum; i++) { 164 if (!(shdr[i].sh_flags & SHF_ALLOC)) 165 continue; 166 167 shdr[i].sh_addr += VDSO_HIGH_BASE; 168 169 if (shdr[i].sh_type == SHT_SYMTAB || 170 shdr[i].sh_type == SHT_DYNSYM) 171 reloc_symtab(ehdr, shdr[i].sh_offset, 172 shdr[i].sh_size); 173 } 174} 175 176void enable_sep_cpu(void) 177{ 178 int cpu = get_cpu(); 179 struct tss_struct *tss = &per_cpu(init_tss, cpu); 180 181 if (!boot_cpu_has(X86_FEATURE_SEP)) { 182 put_cpu(); 183 return; 184 } 185 186 tss->x86_tss.ss1 = __KERNEL_CS; 187 tss->x86_tss.esp1 = sizeof(struct tss_struct) + (unsigned long) tss; 188 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); 189 wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.esp1, 0); 190 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0); 191 put_cpu(); 192} 193 194static struct vm_area_struct gate_vma; 195 196static int __init gate_vma_init(void) 197{ 198 gate_vma.vm_mm = NULL; 199 gate_vma.vm_start = FIXADDR_USER_START; 200 gate_vma.vm_end = FIXADDR_USER_END; 201 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; 202 gate_vma.vm_page_prot = __P101; 203 /* 204 * Make sure the vDSO gets into every core dump. 205 * Dumping its contents makes post-mortem fully interpretable later 206 * without matching up the same kernel and hardware config to see 207 * what PC values meant. 208 */ 209 gate_vma.vm_flags |= VM_ALWAYSDUMP; 210 return 0; 211} 212 213/* 214 * These symbols are defined by vsyscall.o to mark the bounds 215 * of the ELF DSO images included therein. 216 */ 217extern const char vsyscall_int80_start, vsyscall_int80_end; 218extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; 219static struct page *syscall_pages[1]; 220 221static void map_compat_vdso(int map) 222{ 223 static int vdso_mapped; 224 225 if (map == vdso_mapped) 226 return; 227 228 vdso_mapped = map; 229 230 __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT, 231 map ? PAGE_READONLY_EXEC : PAGE_NONE); 232 233 /* flush stray tlbs */ 234 flush_tlb_all(); 235} 236 237int __init sysenter_setup(void) 238{ 239 void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); 240 const void *vsyscall; 241 size_t vsyscall_len; 242 243 syscall_pages[0] = virt_to_page(syscall_page); 244 245 gate_vma_init(); 246 247 printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); 248 249 if (!boot_cpu_has(X86_FEATURE_SEP)) { 250 vsyscall = &vsyscall_int80_start; 251 vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start; 252 } else { 253 vsyscall = &vsyscall_sysenter_start; 254 vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start; 255 } 256 257 memcpy(syscall_page, vsyscall, vsyscall_len); 258 relocate_vdso(syscall_page); 259 260 return 0; 261} 262 263/* Defined in vsyscall-sysenter.S */ 264extern void SYSENTER_RETURN; 265 266/* Setup a VMA at program startup for the vsyscall page */ 267int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) 268{ 269 struct mm_struct *mm = current->mm; 270 unsigned long addr; 271 int ret = 0; 272 bool compat; 273 274 down_write(&mm->mmap_sem); 275 276 /* Test compat mode once here, in case someone 277 changes it via sysctl */ 278 compat = (vdso_enabled == VDSO_COMPAT); 279 280 map_compat_vdso(compat); 281 282 if (compat) 283 addr = VDSO_HIGH_BASE; 284 else { 285 addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); 286 if (IS_ERR_VALUE(addr)) { 287 ret = addr; 288 goto up_fail; 289 } 290 291 /* 292 * MAYWRITE to allow gdb to COW and set breakpoints 293 * 294 * Make sure the vDSO gets into every core dump. 295 * Dumping its contents makes post-mortem fully 296 * interpretable later without matching up the same 297 * kernel and hardware config to see what PC values 298 * meant. 299 */ 300 ret = install_special_mapping(mm, addr, PAGE_SIZE, 301 VM_READ|VM_EXEC| 302 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 303 VM_ALWAYSDUMP, 304 syscall_pages); 305 306 if (ret) 307 goto up_fail; 308 } 309 310 current->mm->context.vdso = (void *)addr; 311 current_thread_info()->sysenter_return = 312 (void *)VDSO_SYM(&SYSENTER_RETURN); 313 314 up_fail: 315 up_write(&mm->mmap_sem); 316 317 return ret; 318} 319 320const char *arch_vma_name(struct vm_area_struct *vma) 321{ 322 if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) 323 return "[vdso]"; 324 return NULL; 325} 326 327struct vm_area_struct *get_gate_vma(struct task_struct *tsk) 328{ 329 struct mm_struct *mm = tsk->mm; 330 331 /* Check to see if this task was created in compat vdso mode */ 332 if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) 333 return &gate_vma; 334 return NULL; 335} 336 337int in_gate_area(struct task_struct *task, unsigned long addr) 338{ 339 return 0; 340} 341 342int in_gate_area_no_task(unsigned long addr) 343{ 344 return 0; 345} 346