1/* binfmt_elf_fdpic.c: FDPIC ELF binary format
2 *
3 * Copyright (C) 2003, 2004, 2006 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 * Derived from binfmt_elf.c
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/module.h>
14
15#include <linux/fs.h>
16#include <linux/stat.h>
17#include <linux/sched.h>
18#include <linux/mm.h>
19#include <linux/mman.h>
20#include <linux/errno.h>
21#include <linux/signal.h>
22#include <linux/binfmts.h>
23#include <linux/string.h>
24#include <linux/file.h>
25#include <linux/fcntl.h>
26#include <linux/slab.h>
27#include <linux/pagemap.h>
28#include <linux/security.h>
29#include <linux/highmem.h>
30#include <linux/highuid.h>
31#include <linux/personality.h>
32#include <linux/ptrace.h>
33#include <linux/init.h>
34#include <linux/elf.h>
35#include <linux/elf-fdpic.h>
36#include <linux/elfcore.h>
37#include <linux/coredump.h>
38
39#include <asm/uaccess.h>
40#include <asm/param.h>
41#include <asm/pgalloc.h>
42
43typedef char *elf_caddr_t;
44
45#define kdebug(fmt, ...) do {} while(0)
46
47#define kdcore(fmt, ...) do {} while(0)
48
49MODULE_LICENSE("GPL");
50
51static int load_elf_fdpic_binary(struct linux_binprm *, struct pt_regs *);
52static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *, struct file *);
53static int elf_fdpic_map_file(struct elf_fdpic_params *, struct file *,
54			      struct mm_struct *, const char *);
55
56static int create_elf_fdpic_tables(struct linux_binprm *, struct mm_struct *,
57				   struct elf_fdpic_params *,
58				   struct elf_fdpic_params *);
59
60#ifndef CONFIG_MMU
61static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *,
62					    unsigned long *);
63static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *,
64						   struct file *,
65						   struct mm_struct *);
66#endif
67
68static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *,
69					     struct file *, struct mm_struct *);
70
71#ifdef CONFIG_ELF_CORE
72static int elf_fdpic_core_dump(struct coredump_params *cprm);
73#endif
74
75static struct linux_binfmt elf_fdpic_format = {
76	.module		= THIS_MODULE,
77	.load_binary	= load_elf_fdpic_binary,
78#ifdef CONFIG_ELF_CORE
79	.core_dump	= elf_fdpic_core_dump,
80#endif
81	.min_coredump	= ELF_EXEC_PAGESIZE,
82};
83
84static int __init init_elf_fdpic_binfmt(void)
85{
86	return register_binfmt(&elf_fdpic_format);
87}
88
89static void __exit exit_elf_fdpic_binfmt(void)
90{
91	unregister_binfmt(&elf_fdpic_format);
92}
93
94core_initcall(init_elf_fdpic_binfmt);
95module_exit(exit_elf_fdpic_binfmt);
96
97static int is_elf_fdpic(struct elfhdr *hdr, struct file *file)
98{
99	if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0)
100		return 0;
101	if (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN)
102		return 0;
103	if (!elf_check_arch(hdr) || !elf_check_fdpic(hdr))
104		return 0;
105	if (!file->f_op || !file->f_op->mmap)
106		return 0;
107	return 1;
108}
109
110/*****************************************************************************/
111/*
112 * read the program headers table into memory
113 */
114static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params,
115				 struct file *file)
116{
117	struct elf32_phdr *phdr;
118	unsigned long size;
119	int retval, loop;
120
121	if (params->hdr.e_phentsize != sizeof(struct elf_phdr))
122		return -ENOMEM;
123	if (params->hdr.e_phnum > 65536U / sizeof(struct elf_phdr))
124		return -ENOMEM;
125
126	size = params->hdr.e_phnum * sizeof(struct elf_phdr);
127	params->phdrs = kmalloc(size, GFP_KERNEL);
128	if (!params->phdrs)
129		return -ENOMEM;
130
131	retval = kernel_read(file, params->hdr.e_phoff,
132			     (char *) params->phdrs, size);
133	if (unlikely(retval != size))
134		return retval < 0 ? retval : -ENOEXEC;
135
136	/* determine stack size for this binary */
137	phdr = params->phdrs;
138	for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) {
139		if (phdr->p_type != PT_GNU_STACK)
140			continue;
141
142		if (phdr->p_flags & PF_X)
143			params->flags |= ELF_FDPIC_FLAG_EXEC_STACK;
144		else
145			params->flags |= ELF_FDPIC_FLAG_NOEXEC_STACK;
146
147		params->stack_size = phdr->p_memsz;
148		break;
149	}
150
151	return 0;
152}
153
154/*****************************************************************************/
155/*
156 * load an fdpic binary into various bits of memory
157 */
158static int load_elf_fdpic_binary(struct linux_binprm *bprm,
159				 struct pt_regs *regs)
160{
161	struct elf_fdpic_params exec_params, interp_params;
162	struct elf_phdr *phdr;
163	unsigned long stack_size, entryaddr;
164#ifdef ELF_FDPIC_PLAT_INIT
165	unsigned long dynaddr;
166#endif
167#ifndef CONFIG_MMU
168	unsigned long stack_prot;
169#endif
170	struct file *interpreter = NULL; /* to shut gcc up */
171	char *interpreter_name = NULL;
172	int executable_stack;
173	int retval, i;
174
175	kdebug("____ LOAD %d ____", current->pid);
176
177	memset(&exec_params, 0, sizeof(exec_params));
178	memset(&interp_params, 0, sizeof(interp_params));
179
180	exec_params.hdr = *(struct elfhdr *) bprm->buf;
181	exec_params.flags = ELF_FDPIC_FLAG_PRESENT | ELF_FDPIC_FLAG_EXECUTABLE;
182
183	/* check that this is a binary we know how to deal with */
184	retval = -ENOEXEC;
185	if (!is_elf_fdpic(&exec_params.hdr, bprm->file))
186		goto error;
187
188	/* read the program header table */
189	retval = elf_fdpic_fetch_phdrs(&exec_params, bprm->file);
190	if (retval < 0)
191		goto error;
192
193	/* scan for a program header that specifies an interpreter */
194	phdr = exec_params.phdrs;
195
196	for (i = 0; i < exec_params.hdr.e_phnum; i++, phdr++) {
197		switch (phdr->p_type) {
198		case PT_INTERP:
199			retval = -ENOMEM;
200			if (phdr->p_filesz > PATH_MAX)
201				goto error;
202			retval = -ENOENT;
203			if (phdr->p_filesz < 2)
204				goto error;
205
206			/* read the name of the interpreter into memory */
207			interpreter_name = kmalloc(phdr->p_filesz, GFP_KERNEL);
208			if (!interpreter_name)
209				goto error;
210
211			retval = kernel_read(bprm->file,
212					     phdr->p_offset,
213					     interpreter_name,
214					     phdr->p_filesz);
215			if (unlikely(retval != phdr->p_filesz)) {
216				if (retval >= 0)
217					retval = -ENOEXEC;
218				goto error;
219			}
220
221			retval = -ENOENT;
222			if (interpreter_name[phdr->p_filesz - 1] != '\0')
223				goto error;
224
225			kdebug("Using ELF interpreter %s", interpreter_name);
226
227			/* replace the program with the interpreter */
228			interpreter = open_exec(interpreter_name);
229			retval = PTR_ERR(interpreter);
230			if (IS_ERR(interpreter)) {
231				interpreter = NULL;
232				goto error;
233			}
234
235			/*
236			 * If the binary is not readable then enforce
237			 * mm->dumpable = 0 regardless of the interpreter's
238			 * permissions.
239			 */
240			if (file_permission(interpreter, MAY_READ) < 0)
241				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
242
243			retval = kernel_read(interpreter, 0, bprm->buf,
244					     BINPRM_BUF_SIZE);
245			if (unlikely(retval != BINPRM_BUF_SIZE)) {
246				if (retval >= 0)
247					retval = -ENOEXEC;
248				goto error;
249			}
250
251			interp_params.hdr = *((struct elfhdr *) bprm->buf);
252			break;
253
254		case PT_LOAD:
255#ifdef CONFIG_MMU
256			if (exec_params.load_addr == 0)
257				exec_params.load_addr = phdr->p_vaddr;
258#endif
259			break;
260		}
261
262	}
263
264	if (elf_check_const_displacement(&exec_params.hdr))
265		exec_params.flags |= ELF_FDPIC_FLAG_CONSTDISP;
266
267	/* perform insanity checks on the interpreter */
268	if (interpreter_name) {
269		retval = -ELIBBAD;
270		if (!is_elf_fdpic(&interp_params.hdr, interpreter))
271			goto error;
272
273		interp_params.flags = ELF_FDPIC_FLAG_PRESENT;
274
275		/* read the interpreter's program header table */
276		retval = elf_fdpic_fetch_phdrs(&interp_params, interpreter);
277		if (retval < 0)
278			goto error;
279	}
280
281	stack_size = exec_params.stack_size;
282	if (exec_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
283		executable_stack = EXSTACK_ENABLE_X;
284	else if (exec_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
285		executable_stack = EXSTACK_DISABLE_X;
286	else
287		executable_stack = EXSTACK_DEFAULT;
288
289	if (stack_size == 0) {
290		stack_size = interp_params.stack_size;
291		if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
292			executable_stack = EXSTACK_ENABLE_X;
293		else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
294			executable_stack = EXSTACK_DISABLE_X;
295		else
296			executable_stack = EXSTACK_DEFAULT;
297	}
298
299	retval = -ENOEXEC;
300	if (stack_size == 0)
301		goto error;
302
303	if (elf_check_const_displacement(&interp_params.hdr))
304		interp_params.flags |= ELF_FDPIC_FLAG_CONSTDISP;
305
306	/* flush all traces of the currently running executable */
307	retval = flush_old_exec(bprm);
308	if (retval)
309		goto error;
310
311	/* there's now no turning back... the old userspace image is dead,
312	 * defunct, deceased, etc. after this point we have to exit via
313	 * error_kill */
314	set_personality(PER_LINUX_FDPIC);
315	if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
316		current->personality |= READ_IMPLIES_EXEC;
317
318	setup_new_exec(bprm);
319
320	set_binfmt(&elf_fdpic_format);
321
322	current->mm->start_code = 0;
323	current->mm->end_code = 0;
324	current->mm->start_stack = 0;
325	current->mm->start_data = 0;
326	current->mm->end_data = 0;
327	current->mm->context.exec_fdpic_loadmap = 0;
328	current->mm->context.interp_fdpic_loadmap = 0;
329
330	current->flags &= ~PF_FORKNOEXEC;
331
332#ifdef CONFIG_MMU
333	elf_fdpic_arch_lay_out_mm(&exec_params,
334				  &interp_params,
335				  &current->mm->start_stack,
336				  &current->mm->start_brk);
337
338	retval = setup_arg_pages(bprm, current->mm->start_stack,
339				 executable_stack);
340	if (retval < 0) {
341		send_sig(SIGKILL, current, 0);
342		goto error_kill;
343	}
344#endif
345
346	/* load the executable and interpreter into memory */
347	retval = elf_fdpic_map_file(&exec_params, bprm->file, current->mm,
348				    "executable");
349	if (retval < 0)
350		goto error_kill;
351
352	if (interpreter_name) {
353		retval = elf_fdpic_map_file(&interp_params, interpreter,
354					    current->mm, "interpreter");
355		if (retval < 0) {
356			printk(KERN_ERR "Unable to load interpreter\n");
357			goto error_kill;
358		}
359
360		allow_write_access(interpreter);
361		fput(interpreter);
362		interpreter = NULL;
363	}
364
365#ifdef CONFIG_MMU
366	if (!current->mm->start_brk)
367		current->mm->start_brk = current->mm->end_data;
368
369	current->mm->brk = current->mm->start_brk =
370		PAGE_ALIGN(current->mm->start_brk);
371
372#else
373	/* create a stack and brk area big enough for everyone
374	 * - the brk heap starts at the bottom and works up
375	 * - the stack starts at the top and works down
376	 */
377	stack_size = (stack_size + PAGE_SIZE - 1) & PAGE_MASK;
378	if (stack_size < PAGE_SIZE * 2)
379		stack_size = PAGE_SIZE * 2;
380
381	stack_prot = PROT_READ | PROT_WRITE;
382	if (executable_stack == EXSTACK_ENABLE_X ||
383	    (executable_stack == EXSTACK_DEFAULT && VM_STACK_FLAGS & VM_EXEC))
384		stack_prot |= PROT_EXEC;
385
386	down_write(&current->mm->mmap_sem);
387	current->mm->start_brk = do_mmap(NULL, 0, stack_size, stack_prot,
388					 MAP_PRIVATE | MAP_ANONYMOUS |
389					 MAP_UNINITIALIZED | MAP_GROWSDOWN,
390					 0);
391
392	if (IS_ERR_VALUE(current->mm->start_brk)) {
393		up_write(&current->mm->mmap_sem);
394		retval = current->mm->start_brk;
395		current->mm->start_brk = 0;
396		goto error_kill;
397	}
398
399	up_write(&current->mm->mmap_sem);
400
401	current->mm->brk = current->mm->start_brk;
402	current->mm->context.end_brk = current->mm->start_brk;
403	current->mm->context.end_brk +=
404		(stack_size > PAGE_SIZE) ? (stack_size - PAGE_SIZE) : 0;
405	current->mm->start_stack = current->mm->start_brk + stack_size;
406#endif
407
408	install_exec_creds(bprm);
409	current->flags &= ~PF_FORKNOEXEC;
410	if (create_elf_fdpic_tables(bprm, current->mm,
411				    &exec_params, &interp_params) < 0)
412		goto error_kill;
413
414	kdebug("- start_code  %lx", current->mm->start_code);
415	kdebug("- end_code    %lx", current->mm->end_code);
416	kdebug("- start_data  %lx", current->mm->start_data);
417	kdebug("- end_data    %lx", current->mm->end_data);
418	kdebug("- start_brk   %lx", current->mm->start_brk);
419	kdebug("- brk         %lx", current->mm->brk);
420	kdebug("- start_stack %lx", current->mm->start_stack);
421
422#ifdef ELF_FDPIC_PLAT_INIT
423	/*
424	 * The ABI may specify that certain registers be set up in special
425	 * ways (on i386 %edx is the address of a DT_FINI function, for
426	 * example.  This macro performs whatever initialization to
427	 * the regs structure is required.
428	 */
429	dynaddr = interp_params.dynamic_addr ?: exec_params.dynamic_addr;
430	ELF_FDPIC_PLAT_INIT(regs, exec_params.map_addr, interp_params.map_addr,
431			    dynaddr);
432#endif
433
434	/* everything is now ready... get the userspace context ready to roll */
435	entryaddr = interp_params.entry_addr ?: exec_params.entry_addr;
436	start_thread(regs, entryaddr, current->mm->start_stack);
437
438	retval = 0;
439
440error:
441	if (interpreter) {
442		allow_write_access(interpreter);
443		fput(interpreter);
444	}
445	kfree(interpreter_name);
446	kfree(exec_params.phdrs);
447	kfree(exec_params.loadmap);
448	kfree(interp_params.phdrs);
449	kfree(interp_params.loadmap);
450	return retval;
451
452	/* unrecoverable error - kill the process */
453error_kill:
454	send_sig(SIGSEGV, current, 0);
455	goto error;
456
457}
458
459/*****************************************************************************/
460
461#ifndef ELF_BASE_PLATFORM
462/*
463 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
464 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
465 * will be copied to the user stack in the same manner as AT_PLATFORM.
466 */
467#define ELF_BASE_PLATFORM NULL
468#endif
469
470/*
471 * present useful information to the program by shovelling it onto the new
472 * process's stack
473 */
474static int create_elf_fdpic_tables(struct linux_binprm *bprm,
475				   struct mm_struct *mm,
476				   struct elf_fdpic_params *exec_params,
477				   struct elf_fdpic_params *interp_params)
478{
479	const struct cred *cred = current_cred();
480	unsigned long sp, csp, nitems;
481	elf_caddr_t __user *argv, *envp;
482	size_t platform_len = 0, len;
483	char *k_platform, *k_base_platform;
484	char __user *u_platform, *u_base_platform, *p;
485	long hwcap;
486	int loop;
487	int nr;	/* reset for each csp adjustment */
488
489#ifdef CONFIG_MMU
490	/* In some cases (e.g. Hyper-Threading), we want to avoid L1 evictions
491	 * by the processes running on the same package. One thing we can do is
492	 * to shuffle the initial stack for them, so we give the architecture
493	 * an opportunity to do so here.
494	 */
495	sp = arch_align_stack(bprm->p);
496#else
497	sp = mm->start_stack;
498
499	/* stack the program arguments and environment */
500	if (elf_fdpic_transfer_args_to_stack(bprm, &sp) < 0)
501		return -EFAULT;
502#endif
503
504	hwcap = ELF_HWCAP;
505
506	/*
507	 * If this architecture has a platform capability string, copy it
508	 * to userspace.  In some cases (Sparc), this info is impossible
509	 * for userspace to get any other way, in others (i386) it is
510	 * merely difficult.
511	 */
512	k_platform = ELF_PLATFORM;
513	u_platform = NULL;
514
515	if (k_platform) {
516		platform_len = strlen(k_platform) + 1;
517		sp -= platform_len;
518		u_platform = (char __user *) sp;
519		if (__copy_to_user(u_platform, k_platform, platform_len) != 0)
520			return -EFAULT;
521	}
522
523	/*
524	 * If this architecture has a "base" platform capability
525	 * string, copy it to userspace.
526	 */
527	k_base_platform = ELF_BASE_PLATFORM;
528	u_base_platform = NULL;
529
530	if (k_base_platform) {
531		platform_len = strlen(k_base_platform) + 1;
532		sp -= platform_len;
533		u_base_platform = (char __user *) sp;
534		if (__copy_to_user(u_base_platform, k_base_platform, platform_len) != 0)
535			return -EFAULT;
536	}
537
538	sp &= ~7UL;
539
540	/* stack the load map(s) */
541	len = sizeof(struct elf32_fdpic_loadmap);
542	len += sizeof(struct elf32_fdpic_loadseg) * exec_params->loadmap->nsegs;
543	sp = (sp - len) & ~7UL;
544	exec_params->map_addr = sp;
545
546	if (copy_to_user((void __user *) sp, exec_params->loadmap, len) != 0)
547		return -EFAULT;
548
549	current->mm->context.exec_fdpic_loadmap = (unsigned long) sp;
550
551	if (interp_params->loadmap) {
552		len = sizeof(struct elf32_fdpic_loadmap);
553		len += sizeof(struct elf32_fdpic_loadseg) *
554			interp_params->loadmap->nsegs;
555		sp = (sp - len) & ~7UL;
556		interp_params->map_addr = sp;
557
558		if (copy_to_user((void __user *) sp, interp_params->loadmap,
559				 len) != 0)
560			return -EFAULT;
561
562		current->mm->context.interp_fdpic_loadmap = (unsigned long) sp;
563	}
564
565	/* force 16 byte _final_ alignment here for generality */
566#define DLINFO_ITEMS 15
567
568	nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0) +
569		(k_base_platform ? 1 : 0) + AT_VECTOR_SIZE_ARCH;
570
571	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD)
572		nitems++;
573
574	csp = sp;
575	sp -= nitems * 2 * sizeof(unsigned long);
576	sp -= (bprm->envc + 1) * sizeof(char *);	/* envv[] */
577	sp -= (bprm->argc + 1) * sizeof(char *);	/* argv[] */
578	sp -= 1 * sizeof(unsigned long);		/* argc */
579
580	csp -= sp & 15UL;
581	sp -= sp & 15UL;
582
583	/* put the ELF interpreter info on the stack */
584#define NEW_AUX_ENT(id, val)						\
585	do {								\
586		struct { unsigned long _id, _val; } __user *ent;	\
587									\
588		ent = (void __user *) csp;				\
589		__put_user((id), &ent[nr]._id);				\
590		__put_user((val), &ent[nr]._val);			\
591		nr++;							\
592	} while (0)
593
594	nr = 0;
595	csp -= 2 * sizeof(unsigned long);
596	NEW_AUX_ENT(AT_NULL, 0);
597	if (k_platform) {
598		nr = 0;
599		csp -= 2 * sizeof(unsigned long);
600		NEW_AUX_ENT(AT_PLATFORM,
601			    (elf_addr_t) (unsigned long) u_platform);
602	}
603
604	if (k_base_platform) {
605		nr = 0;
606		csp -= 2 * sizeof(unsigned long);
607		NEW_AUX_ENT(AT_BASE_PLATFORM,
608			    (elf_addr_t) (unsigned long) u_base_platform);
609	}
610
611	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
612		nr = 0;
613		csp -= 2 * sizeof(unsigned long);
614		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
615	}
616
617	nr = 0;
618	csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long);
619	NEW_AUX_ENT(AT_HWCAP,	hwcap);
620	NEW_AUX_ENT(AT_PAGESZ,	PAGE_SIZE);
621	NEW_AUX_ENT(AT_CLKTCK,	CLOCKS_PER_SEC);
622	NEW_AUX_ENT(AT_PHDR,	exec_params->ph_addr);
623	NEW_AUX_ENT(AT_PHENT,	sizeof(struct elf_phdr));
624	NEW_AUX_ENT(AT_PHNUM,	exec_params->hdr.e_phnum);
625	NEW_AUX_ENT(AT_BASE,	interp_params->elfhdr_addr);
626	NEW_AUX_ENT(AT_FLAGS,	0);
627	NEW_AUX_ENT(AT_ENTRY,	exec_params->entry_addr);
628	NEW_AUX_ENT(AT_UID,	(elf_addr_t) cred->uid);
629	NEW_AUX_ENT(AT_EUID,	(elf_addr_t) cred->euid);
630	NEW_AUX_ENT(AT_GID,	(elf_addr_t) cred->gid);
631	NEW_AUX_ENT(AT_EGID,	(elf_addr_t) cred->egid);
632	NEW_AUX_ENT(AT_SECURE,	security_bprm_secureexec(bprm));
633	NEW_AUX_ENT(AT_EXECFN,	bprm->exec);
634
635#ifdef ARCH_DLINFO
636	nr = 0;
637	csp -= AT_VECTOR_SIZE_ARCH * 2 * sizeof(unsigned long);
638
639	/* ARCH_DLINFO must come last so platform specific code can enforce
640	 * special alignment requirements on the AUXV if necessary (eg. PPC).
641	 */
642	ARCH_DLINFO;
643#endif
644#undef NEW_AUX_ENT
645
646	/* allocate room for argv[] and envv[] */
647	csp -= (bprm->envc + 1) * sizeof(elf_caddr_t);
648	envp = (elf_caddr_t __user *) csp;
649	csp -= (bprm->argc + 1) * sizeof(elf_caddr_t);
650	argv = (elf_caddr_t __user *) csp;
651
652	/* stack argc */
653	csp -= sizeof(unsigned long);
654	__put_user(bprm->argc, (unsigned long __user *) csp);
655
656	BUG_ON(csp != sp);
657
658	/* fill in the argv[] array */
659#ifdef CONFIG_MMU
660	current->mm->arg_start = bprm->p;
661#else
662	current->mm->arg_start = current->mm->start_stack -
663		(MAX_ARG_PAGES * PAGE_SIZE - bprm->p);
664#endif
665
666	p = (char __user *) current->mm->arg_start;
667	for (loop = bprm->argc; loop > 0; loop--) {
668		__put_user((elf_caddr_t) p, argv++);
669		len = strnlen_user(p, MAX_ARG_STRLEN);
670		if (!len || len > MAX_ARG_STRLEN)
671			return -EINVAL;
672		p += len;
673	}
674	__put_user(NULL, argv);
675	current->mm->arg_end = (unsigned long) p;
676
677	/* fill in the envv[] array */
678	current->mm->env_start = (unsigned long) p;
679	for (loop = bprm->envc; loop > 0; loop--) {
680		__put_user((elf_caddr_t)(unsigned long) p, envp++);
681		len = strnlen_user(p, MAX_ARG_STRLEN);
682		if (!len || len > MAX_ARG_STRLEN)
683			return -EINVAL;
684		p += len;
685	}
686	__put_user(NULL, envp);
687	current->mm->env_end = (unsigned long) p;
688
689	mm->start_stack = (unsigned long) sp;
690	return 0;
691}
692
693/*****************************************************************************/
694/*
695 * transfer the program arguments and environment from the holding pages onto
696 * the stack
697 */
698#ifndef CONFIG_MMU
699static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm,
700					    unsigned long *_sp)
701{
702	unsigned long index, stop, sp;
703	char *src;
704	int ret = 0;
705
706	stop = bprm->p >> PAGE_SHIFT;
707	sp = *_sp;
708
709	for (index = MAX_ARG_PAGES - 1; index >= stop; index--) {
710		src = kmap(bprm->page[index]);
711		sp -= PAGE_SIZE;
712		if (copy_to_user((void *) sp, src, PAGE_SIZE) != 0)
713			ret = -EFAULT;
714		kunmap(bprm->page[index]);
715		if (ret < 0)
716			goto out;
717	}
718
719	*_sp = (*_sp - (MAX_ARG_PAGES * PAGE_SIZE - bprm->p)) & ~15;
720
721out:
722	return ret;
723}
724#endif
725
726/*****************************************************************************/
727/*
728 * load the appropriate binary image (executable or interpreter) into memory
729 * - we assume no MMU is available
730 * - if no other PIC bits are set in params->hdr->e_flags
731 *   - we assume that the LOADable segments in the binary are independently relocatable
732 *   - we assume R/O executable segments are shareable
733 * - else
734 *   - we assume the loadable parts of the image to require fixed displacement
735 *   - the image is not shareable
736 */
737static int elf_fdpic_map_file(struct elf_fdpic_params *params,
738			      struct file *file,
739			      struct mm_struct *mm,
740			      const char *what)
741{
742	struct elf32_fdpic_loadmap *loadmap;
743#ifdef CONFIG_MMU
744	struct elf32_fdpic_loadseg *mseg;
745#endif
746	struct elf32_fdpic_loadseg *seg;
747	struct elf32_phdr *phdr;
748	unsigned long load_addr, stop;
749	unsigned nloads, tmp;
750	size_t size;
751	int loop, ret;
752
753	/* allocate a load map table */
754	nloads = 0;
755	for (loop = 0; loop < params->hdr.e_phnum; loop++)
756		if (params->phdrs[loop].p_type == PT_LOAD)
757			nloads++;
758
759	if (nloads == 0)
760		return -ELIBBAD;
761
762	size = sizeof(*loadmap) + nloads * sizeof(*seg);
763	loadmap = kzalloc(size, GFP_KERNEL);
764	if (!loadmap)
765		return -ENOMEM;
766
767	params->loadmap = loadmap;
768
769	loadmap->version = ELF32_FDPIC_LOADMAP_VERSION;
770	loadmap->nsegs = nloads;
771
772	load_addr = params->load_addr;
773	seg = loadmap->segs;
774
775	/* map the requested LOADs into the memory space */
776	switch (params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) {
777	case ELF_FDPIC_FLAG_CONSTDISP:
778	case ELF_FDPIC_FLAG_CONTIGUOUS:
779#ifndef CONFIG_MMU
780		ret = elf_fdpic_map_file_constdisp_on_uclinux(params, file, mm);
781		if (ret < 0)
782			return ret;
783		break;
784#endif
785	default:
786		ret = elf_fdpic_map_file_by_direct_mmap(params, file, mm);
787		if (ret < 0)
788			return ret;
789		break;
790	}
791
792	/* map the entry point */
793	if (params->hdr.e_entry) {
794		seg = loadmap->segs;
795		for (loop = loadmap->nsegs; loop > 0; loop--, seg++) {
796			if (params->hdr.e_entry >= seg->p_vaddr &&
797			    params->hdr.e_entry < seg->p_vaddr + seg->p_memsz) {
798				params->entry_addr =
799					(params->hdr.e_entry - seg->p_vaddr) +
800					seg->addr;
801				break;
802			}
803		}
804	}
805
806	/* determine where the program header table has wound up if mapped */
807	stop = params->hdr.e_phoff;
808	stop += params->hdr.e_phnum * sizeof (struct elf_phdr);
809	phdr = params->phdrs;
810
811	for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) {
812		if (phdr->p_type != PT_LOAD)
813			continue;
814
815		if (phdr->p_offset > params->hdr.e_phoff ||
816		    phdr->p_offset + phdr->p_filesz < stop)
817			continue;
818
819		seg = loadmap->segs;
820		for (loop = loadmap->nsegs; loop > 0; loop--, seg++) {
821			if (phdr->p_vaddr >= seg->p_vaddr &&
822			    phdr->p_vaddr + phdr->p_filesz <=
823			    seg->p_vaddr + seg->p_memsz) {
824				params->ph_addr =
825					(phdr->p_vaddr - seg->p_vaddr) +
826					seg->addr +
827					params->hdr.e_phoff - phdr->p_offset;
828				break;
829			}
830		}
831		break;
832	}
833
834	/* determine where the dynamic section has wound up if there is one */
835	phdr = params->phdrs;
836	for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) {
837		if (phdr->p_type != PT_DYNAMIC)
838			continue;
839
840		seg = loadmap->segs;
841		for (loop = loadmap->nsegs; loop > 0; loop--, seg++) {
842			if (phdr->p_vaddr >= seg->p_vaddr &&
843			    phdr->p_vaddr + phdr->p_memsz <=
844			    seg->p_vaddr + seg->p_memsz) {
845				params->dynamic_addr =
846					(phdr->p_vaddr - seg->p_vaddr) +
847					seg->addr;
848
849				/* check the dynamic section contains at least
850				 * one item, and that the last item is a NULL
851				 * entry */
852				if (phdr->p_memsz == 0 ||
853				    phdr->p_memsz % sizeof(Elf32_Dyn) != 0)
854					goto dynamic_error;
855
856				tmp = phdr->p_memsz / sizeof(Elf32_Dyn);
857				if (((Elf32_Dyn *)
858				     params->dynamic_addr)[tmp - 1].d_tag != 0)
859					goto dynamic_error;
860				break;
861			}
862		}
863		break;
864	}
865
866	/* now elide adjacent segments in the load map on MMU linux
867	 * - on uClinux the holes between may actually be filled with system
868	 *   stuff or stuff from other processes
869	 */
870#ifdef CONFIG_MMU
871	nloads = loadmap->nsegs;
872	mseg = loadmap->segs;
873	seg = mseg + 1;
874	for (loop = 1; loop < nloads; loop++) {
875		/* see if we have a candidate for merging */
876		if (seg->p_vaddr - mseg->p_vaddr == seg->addr - mseg->addr) {
877			load_addr = PAGE_ALIGN(mseg->addr + mseg->p_memsz);
878			if (load_addr == (seg->addr & PAGE_MASK)) {
879				mseg->p_memsz +=
880					load_addr -
881					(mseg->addr + mseg->p_memsz);
882				mseg->p_memsz += seg->addr & ~PAGE_MASK;
883				mseg->p_memsz += seg->p_memsz;
884				loadmap->nsegs--;
885				continue;
886			}
887		}
888
889		mseg++;
890		if (mseg != seg)
891			*mseg = *seg;
892	}
893#endif
894
895	kdebug("Mapped Object [%s]:", what);
896	kdebug("- elfhdr   : %lx", params->elfhdr_addr);
897	kdebug("- entry    : %lx", params->entry_addr);
898	kdebug("- PHDR[]   : %lx", params->ph_addr);
899	kdebug("- DYNAMIC[]: %lx", params->dynamic_addr);
900	seg = loadmap->segs;
901	for (loop = 0; loop < loadmap->nsegs; loop++, seg++)
902		kdebug("- LOAD[%d] : %08x-%08x [va=%x ms=%x]",
903		       loop,
904		       seg->addr, seg->addr + seg->p_memsz - 1,
905		       seg->p_vaddr, seg->p_memsz);
906
907	return 0;
908
909dynamic_error:
910	printk("ELF FDPIC %s with invalid DYNAMIC section (inode=%lu)\n",
911	       what, file->f_path.dentry->d_inode->i_ino);
912	return -ELIBBAD;
913}
914
915/*****************************************************************************/
916/*
917 * map a file with constant displacement under uClinux
918 */
919#ifndef CONFIG_MMU
920static int elf_fdpic_map_file_constdisp_on_uclinux(
921	struct elf_fdpic_params *params,
922	struct file *file,
923	struct mm_struct *mm)
924{
925	struct elf32_fdpic_loadseg *seg;
926	struct elf32_phdr *phdr;
927	unsigned long load_addr, base = ULONG_MAX, top = 0, maddr = 0, mflags;
928	loff_t fpos;
929	int loop, ret;
930
931	load_addr = params->load_addr;
932	seg = params->loadmap->segs;
933
934	/* determine the bounds of the contiguous overall allocation we must
935	 * make */
936	phdr = params->phdrs;
937	for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) {
938		if (params->phdrs[loop].p_type != PT_LOAD)
939			continue;
940
941		if (base > phdr->p_vaddr)
942			base = phdr->p_vaddr;
943		if (top < phdr->p_vaddr + phdr->p_memsz)
944			top = phdr->p_vaddr + phdr->p_memsz;
945	}
946
947	/* allocate one big anon block for everything */
948	mflags = MAP_PRIVATE;
949	if (params->flags & ELF_FDPIC_FLAG_EXECUTABLE)
950		mflags |= MAP_EXECUTABLE;
951
952	down_write(&mm->mmap_sem);
953	maddr = do_mmap(NULL, load_addr, top - base,
954			PROT_READ | PROT_WRITE | PROT_EXEC, mflags, 0);
955	up_write(&mm->mmap_sem);
956	if (IS_ERR_VALUE(maddr))
957		return (int) maddr;
958
959	if (load_addr != 0)
960		load_addr += PAGE_ALIGN(top - base);
961
962	/* and then load the file segments into it */
963	phdr = params->phdrs;
964	for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) {
965		if (params->phdrs[loop].p_type != PT_LOAD)
966			continue;
967
968		fpos = phdr->p_offset;
969
970		seg->addr = maddr + (phdr->p_vaddr - base);
971		seg->p_vaddr = phdr->p_vaddr;
972		seg->p_memsz = phdr->p_memsz;
973
974		ret = file->f_op->read(file, (void *) seg->addr,
975				       phdr->p_filesz, &fpos);
976		if (ret < 0)
977			return ret;
978
979		/* map the ELF header address if in this segment */
980		if (phdr->p_offset == 0)
981			params->elfhdr_addr = seg->addr;
982
983		/* clear any space allocated but not loaded */
984		if (phdr->p_filesz < phdr->p_memsz) {
985			if (clear_user((void *) (seg->addr + phdr->p_filesz),
986				       phdr->p_memsz - phdr->p_filesz))
987				return -EFAULT;
988		}
989
990		if (mm) {
991			if (phdr->p_flags & PF_X) {
992				if (!mm->start_code) {
993					mm->start_code = seg->addr;
994					mm->end_code = seg->addr +
995						phdr->p_memsz;
996				}
997			} else if (!mm->start_data) {
998				mm->start_data = seg->addr;
999				mm->end_data = seg->addr + phdr->p_memsz;
1000			}
1001		}
1002
1003		seg++;
1004	}
1005
1006	return 0;
1007}
1008#endif
1009
1010/*****************************************************************************/
1011/*
1012 * map a binary by direct mmap() of the individual PT_LOAD segments
1013 */
1014static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1015					     struct file *file,
1016					     struct mm_struct *mm)
1017{
1018	struct elf32_fdpic_loadseg *seg;
1019	struct elf32_phdr *phdr;
1020	unsigned long load_addr, delta_vaddr;
1021	int loop, dvset;
1022
1023	load_addr = params->load_addr;
1024	delta_vaddr = 0;
1025	dvset = 0;
1026
1027	seg = params->loadmap->segs;
1028
1029	/* deal with each load segment separately */
1030	phdr = params->phdrs;
1031	for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) {
1032		unsigned long maddr, disp, excess, excess1;
1033		int prot = 0, flags;
1034
1035		if (phdr->p_type != PT_LOAD)
1036			continue;
1037
1038		kdebug("[LOAD] va=%lx of=%lx fs=%lx ms=%lx",
1039		       (unsigned long) phdr->p_vaddr,
1040		       (unsigned long) phdr->p_offset,
1041		       (unsigned long) phdr->p_filesz,
1042		       (unsigned long) phdr->p_memsz);
1043
1044		/* determine the mapping parameters */
1045		if (phdr->p_flags & PF_R) prot |= PROT_READ;
1046		if (phdr->p_flags & PF_W) prot |= PROT_WRITE;
1047		if (phdr->p_flags & PF_X) prot |= PROT_EXEC;
1048
1049		flags = MAP_PRIVATE | MAP_DENYWRITE;
1050		if (params->flags & ELF_FDPIC_FLAG_EXECUTABLE)
1051			flags |= MAP_EXECUTABLE;
1052
1053		maddr = 0;
1054
1055		switch (params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) {
1056		case ELF_FDPIC_FLAG_INDEPENDENT:
1057			/* PT_LOADs are independently locatable */
1058			break;
1059
1060		case ELF_FDPIC_FLAG_HONOURVADDR:
1061			/* the specified virtual address must be honoured */
1062			maddr = phdr->p_vaddr;
1063			flags |= MAP_FIXED;
1064			break;
1065
1066		case ELF_FDPIC_FLAG_CONSTDISP:
1067			/* constant displacement
1068			 * - can be mapped anywhere, but must be mapped as a
1069			 *   unit
1070			 */
1071			if (!dvset) {
1072				maddr = load_addr;
1073				delta_vaddr = phdr->p_vaddr;
1074				dvset = 1;
1075			} else {
1076				maddr = load_addr + phdr->p_vaddr - delta_vaddr;
1077				flags |= MAP_FIXED;
1078			}
1079			break;
1080
1081		case ELF_FDPIC_FLAG_CONTIGUOUS:
1082			/* contiguity handled later */
1083			break;
1084
1085		default:
1086			BUG();
1087		}
1088
1089		maddr &= PAGE_MASK;
1090
1091		/* create the mapping */
1092		disp = phdr->p_vaddr & ~PAGE_MASK;
1093		down_write(&mm->mmap_sem);
1094		maddr = do_mmap(file, maddr, phdr->p_memsz + disp, prot, flags,
1095				phdr->p_offset - disp);
1096		up_write(&mm->mmap_sem);
1097
1098		kdebug("mmap[%d] <file> sz=%lx pr=%x fl=%x of=%lx --> %08lx",
1099		       loop, phdr->p_memsz + disp, prot, flags,
1100		       phdr->p_offset - disp, maddr);
1101
1102		if (IS_ERR_VALUE(maddr))
1103			return (int) maddr;
1104
1105		if ((params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) ==
1106		    ELF_FDPIC_FLAG_CONTIGUOUS)
1107			load_addr += PAGE_ALIGN(phdr->p_memsz + disp);
1108
1109		seg->addr = maddr + disp;
1110		seg->p_vaddr = phdr->p_vaddr;
1111		seg->p_memsz = phdr->p_memsz;
1112
1113		/* map the ELF header address if in this segment */
1114		if (phdr->p_offset == 0)
1115			params->elfhdr_addr = seg->addr;
1116
1117		/* clear the bit between beginning of mapping and beginning of
1118		 * PT_LOAD */
1119		if (prot & PROT_WRITE && disp > 0) {
1120			kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp);
1121			if (clear_user((void __user *) maddr, disp))
1122				return -EFAULT;
1123			maddr += disp;
1124		}
1125
1126		/* clear any space allocated but not loaded
1127		 * - on uClinux we can just clear the lot
1128		 * - on MMU linux we'll get a SIGBUS beyond the last page
1129		 *   extant in the file
1130		 */
1131		excess = phdr->p_memsz - phdr->p_filesz;
1132		excess1 = PAGE_SIZE - ((maddr + phdr->p_filesz) & ~PAGE_MASK);
1133
1134#ifdef CONFIG_MMU
1135		if (excess > excess1) {
1136			unsigned long xaddr = maddr + phdr->p_filesz + excess1;
1137			unsigned long xmaddr;
1138
1139			flags |= MAP_FIXED | MAP_ANONYMOUS;
1140			down_write(&mm->mmap_sem);
1141			xmaddr = do_mmap(NULL, xaddr, excess - excess1,
1142					 prot, flags, 0);
1143			up_write(&mm->mmap_sem);
1144
1145			kdebug("mmap[%d] <anon>"
1146			       " ad=%lx sz=%lx pr=%x fl=%x of=0 --> %08lx",
1147			       loop, xaddr, excess - excess1, prot, flags,
1148			       xmaddr);
1149
1150			if (xmaddr != xaddr)
1151				return -ENOMEM;
1152		}
1153
1154		if (prot & PROT_WRITE && excess1 > 0) {
1155			kdebug("clear[%d] ad=%lx sz=%lx",
1156			       loop, maddr + phdr->p_filesz, excess1);
1157			if (clear_user((void __user *) maddr + phdr->p_filesz,
1158				       excess1))
1159				return -EFAULT;
1160		}
1161
1162#else
1163		if (excess > 0) {
1164			kdebug("clear[%d] ad=%lx sz=%lx",
1165			       loop, maddr + phdr->p_filesz, excess);
1166			if (clear_user((void *) maddr + phdr->p_filesz, excess))
1167				return -EFAULT;
1168		}
1169#endif
1170
1171		if (mm) {
1172			if (phdr->p_flags & PF_X) {
1173				if (!mm->start_code) {
1174					mm->start_code = maddr;
1175					mm->end_code = maddr + phdr->p_memsz;
1176				}
1177			} else if (!mm->start_data) {
1178				mm->start_data = maddr;
1179				mm->end_data = maddr + phdr->p_memsz;
1180			}
1181		}
1182
1183		seg++;
1184	}
1185
1186	return 0;
1187}
1188
1189/*****************************************************************************/
1190/*
1191 * ELF-FDPIC core dumper
1192 *
1193 * Modelled on fs/exec.c:aout_core_dump()
1194 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1195 *
1196 * Modelled on fs/binfmt_elf.c core dumper
1197 */
1198#ifdef CONFIG_ELF_CORE
1199
1200/*
1201 * Decide whether a segment is worth dumping; default is yes to be
1202 * sure (missing info is worse than too much; etc).
1203 * Personally I'd include everything, and use the coredump limit...
1204 *
1205 * I think we should skip something. But I am not sure how. H.J.
1206 */
1207static int maydump(struct vm_area_struct *vma, unsigned long mm_flags)
1208{
1209	int dump_ok;
1210
1211	/* Do not dump I/O mapped devices or special mappings */
1212	if (vma->vm_flags & (VM_IO | VM_RESERVED)) {
1213		kdcore("%08lx: %08lx: no (IO)", vma->vm_start, vma->vm_flags);
1214		return 0;
1215	}
1216
1217	/* If we may not read the contents, don't allow us to dump
1218	 * them either. "dump_write()" can't handle it anyway.
1219	 */
1220	if (!(vma->vm_flags & VM_READ)) {
1221		kdcore("%08lx: %08lx: no (!read)", vma->vm_start, vma->vm_flags);
1222		return 0;
1223	}
1224
1225	/* By default, dump shared memory if mapped from an anonymous file. */
1226	if (vma->vm_flags & VM_SHARED) {
1227		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0) {
1228			dump_ok = test_bit(MMF_DUMP_ANON_SHARED, &mm_flags);
1229			kdcore("%08lx: %08lx: %s (share)", vma->vm_start,
1230			       vma->vm_flags, dump_ok ? "yes" : "no");
1231			return dump_ok;
1232		}
1233
1234		dump_ok = test_bit(MMF_DUMP_MAPPED_SHARED, &mm_flags);
1235		kdcore("%08lx: %08lx: %s (share)", vma->vm_start,
1236		       vma->vm_flags, dump_ok ? "yes" : "no");
1237		return dump_ok;
1238	}
1239
1240#ifdef CONFIG_MMU
1241	/* By default, if it hasn't been written to, don't write it out */
1242	if (!vma->anon_vma) {
1243		dump_ok = test_bit(MMF_DUMP_MAPPED_PRIVATE, &mm_flags);
1244		kdcore("%08lx: %08lx: %s (!anon)", vma->vm_start,
1245		       vma->vm_flags, dump_ok ? "yes" : "no");
1246		return dump_ok;
1247	}
1248#endif
1249
1250	dump_ok = test_bit(MMF_DUMP_ANON_PRIVATE, &mm_flags);
1251	kdcore("%08lx: %08lx: %s", vma->vm_start, vma->vm_flags,
1252	       dump_ok ? "yes" : "no");
1253	return dump_ok;
1254}
1255
1256/* An ELF note in memory */
1257struct memelfnote
1258{
1259	const char *name;
1260	int type;
1261	unsigned int datasz;
1262	void *data;
1263};
1264
1265static int notesize(struct memelfnote *en)
1266{
1267	int sz;
1268
1269	sz = sizeof(struct elf_note);
1270	sz += roundup(strlen(en->name) + 1, 4);
1271	sz += roundup(en->datasz, 4);
1272
1273	return sz;
1274}
1275
1276/* #define DEBUG */
1277
1278#define DUMP_WRITE(addr, nr, foffset)	\
1279	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1280
1281static int alignfile(struct file *file, loff_t *foffset)
1282{
1283	static const char buf[4] = { 0, };
1284	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1285	return 1;
1286}
1287
1288static int writenote(struct memelfnote *men, struct file *file,
1289			loff_t *foffset)
1290{
1291	struct elf_note en;
1292	en.n_namesz = strlen(men->name) + 1;
1293	en.n_descsz = men->datasz;
1294	en.n_type = men->type;
1295
1296	DUMP_WRITE(&en, sizeof(en), foffset);
1297	DUMP_WRITE(men->name, en.n_namesz, foffset);
1298	if (!alignfile(file, foffset))
1299		return 0;
1300	DUMP_WRITE(men->data, men->datasz, foffset);
1301	if (!alignfile(file, foffset))
1302		return 0;
1303
1304	return 1;
1305}
1306#undef DUMP_WRITE
1307
1308static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
1309{
1310	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1311	elf->e_ident[EI_CLASS] = ELF_CLASS;
1312	elf->e_ident[EI_DATA] = ELF_DATA;
1313	elf->e_ident[EI_VERSION] = EV_CURRENT;
1314	elf->e_ident[EI_OSABI] = ELF_OSABI;
1315	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1316
1317	elf->e_type = ET_CORE;
1318	elf->e_machine = ELF_ARCH;
1319	elf->e_version = EV_CURRENT;
1320	elf->e_entry = 0;
1321	elf->e_phoff = sizeof(struct elfhdr);
1322	elf->e_shoff = 0;
1323	elf->e_flags = ELF_FDPIC_CORE_EFLAGS;
1324	elf->e_ehsize = sizeof(struct elfhdr);
1325	elf->e_phentsize = sizeof(struct elf_phdr);
1326	elf->e_phnum = segs;
1327	elf->e_shentsize = 0;
1328	elf->e_shnum = 0;
1329	elf->e_shstrndx = 0;
1330	return;
1331}
1332
1333static inline void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1334{
1335	phdr->p_type = PT_NOTE;
1336	phdr->p_offset = offset;
1337	phdr->p_vaddr = 0;
1338	phdr->p_paddr = 0;
1339	phdr->p_filesz = sz;
1340	phdr->p_memsz = 0;
1341	phdr->p_flags = 0;
1342	phdr->p_align = 0;
1343	return;
1344}
1345
1346static inline void fill_note(struct memelfnote *note, const char *name, int type,
1347		unsigned int sz, void *data)
1348{
1349	note->name = name;
1350	note->type = type;
1351	note->datasz = sz;
1352	note->data = data;
1353	return;
1354}
1355
1356/*
1357 * fill up all the fields in prstatus from the given task struct, except
1358 * registers which need to be filled up separately.
1359 */
1360static void fill_prstatus(struct elf_prstatus *prstatus,
1361			  struct task_struct *p, long signr)
1362{
1363	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1364	prstatus->pr_sigpend = p->pending.signal.sig[0];
1365	prstatus->pr_sighold = p->blocked.sig[0];
1366	rcu_read_lock();
1367	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1368	rcu_read_unlock();
1369	prstatus->pr_pid = task_pid_vnr(p);
1370	prstatus->pr_pgrp = task_pgrp_vnr(p);
1371	prstatus->pr_sid = task_session_vnr(p);
1372	if (thread_group_leader(p)) {
1373		struct task_cputime cputime;
1374
1375		/*
1376		 * This is the record for the group leader.  It shows the
1377		 * group-wide total, not its individual thread total.
1378		 */
1379		thread_group_cputime(p, &cputime);
1380		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1381		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1382	} else {
1383		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1384		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1385	}
1386	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1387	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1388
1389	prstatus->pr_exec_fdpic_loadmap = p->mm->context.exec_fdpic_loadmap;
1390	prstatus->pr_interp_fdpic_loadmap = p->mm->context.interp_fdpic_loadmap;
1391}
1392
1393static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1394		       struct mm_struct *mm)
1395{
1396	const struct cred *cred;
1397	unsigned int i, len;
1398
1399	/* first copy the parameters from user space */
1400	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1401
1402	len = mm->arg_end - mm->arg_start;
1403	if (len >= ELF_PRARGSZ)
1404		len = ELF_PRARGSZ - 1;
1405	if (copy_from_user(&psinfo->pr_psargs,
1406		           (const char __user *) mm->arg_start, len))
1407		return -EFAULT;
1408	for (i = 0; i < len; i++)
1409		if (psinfo->pr_psargs[i] == 0)
1410			psinfo->pr_psargs[i] = ' ';
1411	psinfo->pr_psargs[len] = 0;
1412
1413	rcu_read_lock();
1414	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1415	rcu_read_unlock();
1416	psinfo->pr_pid = task_pid_vnr(p);
1417	psinfo->pr_pgrp = task_pgrp_vnr(p);
1418	psinfo->pr_sid = task_session_vnr(p);
1419
1420	i = p->state ? ffz(~p->state) + 1 : 0;
1421	psinfo->pr_state = i;
1422	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1423	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1424	psinfo->pr_nice = task_nice(p);
1425	psinfo->pr_flag = p->flags;
1426	rcu_read_lock();
1427	cred = __task_cred(p);
1428	SET_UID(psinfo->pr_uid, cred->uid);
1429	SET_GID(psinfo->pr_gid, cred->gid);
1430	rcu_read_unlock();
1431	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1432
1433	return 0;
1434}
1435
1436/* Here is the structure in which status of each thread is captured. */
1437struct elf_thread_status
1438{
1439	struct list_head list;
1440	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1441	elf_fpregset_t fpu;		/* NT_PRFPREG */
1442	struct task_struct *thread;
1443#ifdef ELF_CORE_COPY_XFPREGS
1444	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1445#endif
1446	struct memelfnote notes[3];
1447	int num_notes;
1448};
1449
1450/*
1451 * In order to add the specific thread information for the elf file format,
1452 * we need to keep a linked list of every thread's pr_status and then create
1453 * a single section for them in the final core file.
1454 */
1455static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1456{
1457	struct task_struct *p = t->thread;
1458	int sz = 0;
1459
1460	t->num_notes = 0;
1461
1462	fill_prstatus(&t->prstatus, p, signr);
1463	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1464
1465	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1466		  &t->prstatus);
1467	t->num_notes++;
1468	sz += notesize(&t->notes[0]);
1469
1470	t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, &t->fpu);
1471	if (t->prstatus.pr_fpvalid) {
1472		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1473			  &t->fpu);
1474		t->num_notes++;
1475		sz += notesize(&t->notes[1]);
1476	}
1477
1478#ifdef ELF_CORE_COPY_XFPREGS
1479	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1480		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1481			  sizeof(t->xfpu), &t->xfpu);
1482		t->num_notes++;
1483		sz += notesize(&t->notes[2]);
1484	}
1485#endif
1486	return sz;
1487}
1488
1489static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1490			     elf_addr_t e_shoff, int segs)
1491{
1492	elf->e_shoff = e_shoff;
1493	elf->e_shentsize = sizeof(*shdr4extnum);
1494	elf->e_shnum = 1;
1495	elf->e_shstrndx = SHN_UNDEF;
1496
1497	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1498
1499	shdr4extnum->sh_type = SHT_NULL;
1500	shdr4extnum->sh_size = elf->e_shnum;
1501	shdr4extnum->sh_link = elf->e_shstrndx;
1502	shdr4extnum->sh_info = segs;
1503}
1504
1505/*
1506 * dump the segments for an MMU process
1507 */
1508#ifdef CONFIG_MMU
1509static int elf_fdpic_dump_segments(struct file *file, size_t *size,
1510			   unsigned long *limit, unsigned long mm_flags)
1511{
1512	struct vm_area_struct *vma;
1513	int err = 0;
1514
1515	for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
1516		unsigned long addr;
1517
1518		if (!maydump(vma, mm_flags))
1519			continue;
1520
1521		for (addr = vma->vm_start; addr < vma->vm_end;
1522							addr += PAGE_SIZE) {
1523			struct page *page = get_dump_page(addr);
1524			if (page) {
1525				void *kaddr = kmap(page);
1526				*size += PAGE_SIZE;
1527				if (*size > *limit)
1528					err = -EFBIG;
1529				else if (!dump_write(file, kaddr, PAGE_SIZE))
1530					err = -EIO;
1531				kunmap(page);
1532				page_cache_release(page);
1533			} else if (!dump_seek(file, PAGE_SIZE))
1534				err = -EFBIG;
1535			if (err)
1536				goto out;
1537		}
1538	}
1539out:
1540	return err;
1541}
1542#endif
1543
1544/*
1545 * dump the segments for a NOMMU process
1546 */
1547#ifndef CONFIG_MMU
1548static int elf_fdpic_dump_segments(struct file *file, size_t *size,
1549			   unsigned long *limit, unsigned long mm_flags)
1550{
1551	struct vm_area_struct *vma;
1552
1553	for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
1554		if (!maydump(vma, mm_flags))
1555			continue;
1556
1557		if ((*size += PAGE_SIZE) > *limit)
1558			return -EFBIG;
1559
1560		if (!dump_write(file, (void *) vma->vm_start,
1561				vma->vm_end - vma->vm_start))
1562			return -EIO;
1563	}
1564
1565	return 0;
1566}
1567#endif
1568
1569static size_t elf_core_vma_data_size(unsigned long mm_flags)
1570{
1571	struct vm_area_struct *vma;
1572	size_t size = 0;
1573
1574	for (vma = current->mm->mmap; vma; vma = vma->vm_next)
1575		if (maydump(vma, mm_flags))
1576			size += vma->vm_end - vma->vm_start;
1577	return size;
1578}
1579
1580/*
1581 * Actual dumper
1582 *
1583 * This is a two-pass process; first we find the offsets of the bits,
1584 * and then they are actually written out.  If we run out of core limit
1585 * we just truncate.
1586 */
1587static int elf_fdpic_core_dump(struct coredump_params *cprm)
1588{
1589#define	NUM_NOTES	6
1590	int has_dumped = 0;
1591	mm_segment_t fs;
1592	int segs;
1593	size_t size = 0;
1594	int i;
1595	struct vm_area_struct *vma;
1596	struct elfhdr *elf = NULL;
1597	loff_t offset = 0, dataoff, foffset;
1598	int numnote;
1599	struct memelfnote *notes = NULL;
1600	struct elf_prstatus *prstatus = NULL;	/* NT_PRSTATUS */
1601	struct elf_prpsinfo *psinfo = NULL;	/* NT_PRPSINFO */
1602 	LIST_HEAD(thread_list);
1603 	struct list_head *t;
1604	elf_fpregset_t *fpu = NULL;
1605#ifdef ELF_CORE_COPY_XFPREGS
1606	elf_fpxregset_t *xfpu = NULL;
1607#endif
1608	int thread_status_size = 0;
1609	elf_addr_t *auxv;
1610	struct elf_phdr *phdr4note = NULL;
1611	struct elf_shdr *shdr4extnum = NULL;
1612	Elf_Half e_phnum;
1613	elf_addr_t e_shoff;
1614
1615	/*
1616	 * We no longer stop all VM operations.
1617	 *
1618	 * This is because those proceses that could possibly change map_count
1619	 * or the mmap / vma pages are now blocked in do_exit on current
1620	 * finishing this core dump.
1621	 *
1622	 * Only ptrace can touch these memory addresses, but it doesn't change
1623	 * the map_count or the pages allocated. So no possibility of crashing
1624	 * exists while dumping the mm->vm_next areas to the core file.
1625	 */
1626
1627	/* alloc memory for large data structures: too large to be on stack */
1628	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1629	if (!elf)
1630		goto cleanup;
1631	prstatus = kzalloc(sizeof(*prstatus), GFP_KERNEL);
1632	if (!prstatus)
1633		goto cleanup;
1634	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1635	if (!psinfo)
1636		goto cleanup;
1637	notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1638	if (!notes)
1639		goto cleanup;
1640	fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1641	if (!fpu)
1642		goto cleanup;
1643#ifdef ELF_CORE_COPY_XFPREGS
1644	xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1645	if (!xfpu)
1646		goto cleanup;
1647#endif
1648
1649	if (cprm->signr) {
1650		struct core_thread *ct;
1651		struct elf_thread_status *tmp;
1652
1653		for (ct = current->mm->core_state->dumper.next;
1654						ct; ct = ct->next) {
1655			tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
1656			if (!tmp)
1657				goto cleanup;
1658
1659			tmp->thread = ct->task;
1660			list_add(&tmp->list, &thread_list);
1661		}
1662
1663		list_for_each(t, &thread_list) {
1664			struct elf_thread_status *tmp;
1665			int sz;
1666
1667			tmp = list_entry(t, struct elf_thread_status, list);
1668			sz = elf_dump_thread_status(cprm->signr, tmp);
1669			thread_status_size += sz;
1670		}
1671	}
1672
1673	/* now collect the dump for the current */
1674	fill_prstatus(prstatus, current, cprm->signr);
1675	elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);
1676
1677	segs = current->mm->map_count;
1678	segs += elf_core_extra_phdrs();
1679
1680	/* for notes section */
1681	segs++;
1682
1683	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1684	 * this, kernel supports extended numbering. Have a look at
1685	 * include/linux/elf.h for further information. */
1686	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1687
1688	/* Set up header */
1689	fill_elf_fdpic_header(elf, e_phnum);
1690
1691	has_dumped = 1;
1692	current->flags |= PF_DUMPCORE;
1693
1694	/*
1695	 * Set up the notes in similar form to SVR4 core dumps made
1696	 * with info from their /proc.
1697	 */
1698
1699	fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1700	fill_psinfo(psinfo, current->group_leader, current->mm);
1701	fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1702
1703	numnote = 2;
1704
1705	auxv = (elf_addr_t *) current->mm->saved_auxv;
1706
1707	i = 0;
1708	do
1709		i += 2;
1710	while (auxv[i - 2] != AT_NULL);
1711	fill_note(&notes[numnote++], "CORE", NT_AUXV,
1712		  i * sizeof(elf_addr_t), auxv);
1713
1714  	/* Try to dump the FPU. */
1715	if ((prstatus->pr_fpvalid =
1716	     elf_core_copy_task_fpregs(current, cprm->regs, fpu)))
1717		fill_note(notes + numnote++,
1718			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1719#ifdef ELF_CORE_COPY_XFPREGS
1720	if (elf_core_copy_task_xfpregs(current, xfpu))
1721		fill_note(notes + numnote++,
1722			  "LINUX", ELF_CORE_XFPREG_TYPE, sizeof(*xfpu), xfpu);
1723#endif
1724
1725	fs = get_fs();
1726	set_fs(KERNEL_DS);
1727
1728	offset += sizeof(*elf);				/* Elf header */
1729	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
1730	foffset = offset;
1731
1732	/* Write notes phdr entry */
1733	{
1734		int sz = 0;
1735
1736		for (i = 0; i < numnote; i++)
1737			sz += notesize(notes + i);
1738
1739		sz += thread_status_size;
1740
1741		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
1742		if (!phdr4note)
1743			goto end_coredump;
1744
1745		fill_elf_note_phdr(phdr4note, sz, offset);
1746		offset += sz;
1747	}
1748
1749	/* Page-align dumped data */
1750	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1751
1752	offset += elf_core_vma_data_size(cprm->mm_flags);
1753	offset += elf_core_extra_data_size();
1754	e_shoff = offset;
1755
1756	if (e_phnum == PN_XNUM) {
1757		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
1758		if (!shdr4extnum)
1759			goto end_coredump;
1760		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
1761	}
1762
1763	offset = dataoff;
1764
1765	size += sizeof(*elf);
1766	if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
1767		goto end_coredump;
1768
1769	size += sizeof(*phdr4note);
1770	if (size > cprm->limit
1771	    || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
1772		goto end_coredump;
1773
1774	/* write program headers for segments dump */
1775	for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
1776		struct elf_phdr phdr;
1777		size_t sz;
1778
1779		sz = vma->vm_end - vma->vm_start;
1780
1781		phdr.p_type = PT_LOAD;
1782		phdr.p_offset = offset;
1783		phdr.p_vaddr = vma->vm_start;
1784		phdr.p_paddr = 0;
1785		phdr.p_filesz = maydump(vma, cprm->mm_flags) ? sz : 0;
1786		phdr.p_memsz = sz;
1787		offset += phdr.p_filesz;
1788		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1789		if (vma->vm_flags & VM_WRITE)
1790			phdr.p_flags |= PF_W;
1791		if (vma->vm_flags & VM_EXEC)
1792			phdr.p_flags |= PF_X;
1793		phdr.p_align = ELF_EXEC_PAGESIZE;
1794
1795		size += sizeof(phdr);
1796		if (size > cprm->limit
1797		    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
1798			goto end_coredump;
1799	}
1800
1801	if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
1802		goto end_coredump;
1803
1804 	/* write out the notes section */
1805	for (i = 0; i < numnote; i++)
1806		if (!writenote(notes + i, cprm->file, &foffset))
1807			goto end_coredump;
1808
1809	/* write out the thread status notes section */
1810	list_for_each(t, &thread_list) {
1811		struct elf_thread_status *tmp =
1812				list_entry(t, struct elf_thread_status, list);
1813
1814		for (i = 0; i < tmp->num_notes; i++)
1815			if (!writenote(&tmp->notes[i], cprm->file, &foffset))
1816				goto end_coredump;
1817	}
1818
1819	if (!dump_seek(cprm->file, dataoff - foffset))
1820		goto end_coredump;
1821
1822	if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
1823				    cprm->mm_flags) < 0)
1824		goto end_coredump;
1825
1826	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
1827		goto end_coredump;
1828
1829	if (e_phnum == PN_XNUM) {
1830		size += sizeof(*shdr4extnum);
1831		if (size > cprm->limit
1832		    || !dump_write(cprm->file, shdr4extnum,
1833				   sizeof(*shdr4extnum)))
1834			goto end_coredump;
1835	}
1836
1837	if (cprm->file->f_pos != offset) {
1838		/* Sanity check */
1839		printk(KERN_WARNING
1840		       "elf_core_dump: file->f_pos (%lld) != offset (%lld)\n",
1841		       cprm->file->f_pos, offset);
1842	}
1843
1844end_coredump:
1845	set_fs(fs);
1846
1847cleanup:
1848	while (!list_empty(&thread_list)) {
1849		struct list_head *tmp = thread_list.next;
1850		list_del(tmp);
1851		kfree(list_entry(tmp, struct elf_thread_status, list));
1852	}
1853	kfree(phdr4note);
1854	kfree(elf);
1855	kfree(prstatus);
1856	kfree(psinfo);
1857	kfree(notes);
1858	kfree(fpu);
1859#ifdef ELF_CORE_COPY_XFPREGS
1860	kfree(xfpu);
1861#endif
1862	return has_dumped;
1863#undef NUM_NOTES
1864}
1865
1866#endif		/* CONFIG_ELF_CORE */
1867