1/*
2 * linux/fs/binfmt_elf.c
3 *
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines.  Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
8 *
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10 */
11
12#include <linux/module.h>
13#include <linux/kernel.h>
14#include <linux/fs.h>
15#include <linux/stat.h>
16#include <linux/time.h>
17#include <linux/mm.h>
18#include <linux/mman.h>
19#include <linux/a.out.h>
20#include <linux/errno.h>
21#include <linux/signal.h>
22#include <linux/binfmts.h>
23#include <linux/string.h>
24#include <linux/file.h>
25#include <linux/fcntl.h>
26#include <linux/ptrace.h>
27#include <linux/slab.h>
28#include <linux/shm.h>
29#include <linux/personality.h>
30#include <linux/elfcore.h>
31#include <linux/init.h>
32#include <linux/highuid.h>
33#include <linux/smp.h>
34#include <linux/compiler.h>
35#include <linux/highmem.h>
36#include <linux/pagemap.h>
37#include <linux/security.h>
38#include <linux/syscalls.h>
39#include <linux/random.h>
40#include <linux/elf.h>
41#include <linux/utsname.h>
42#include <asm/uaccess.h>
43#include <asm/param.h>
44#include <asm/page.h>
45
46static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47static int load_elf_library(struct file *);
48static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
49
50/*
51 * If we don't support core dumping, then supply a NULL so we
52 * don't even try.
53 */
54#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
56#else
57#define elf_core_dump	NULL
58#endif
59
60#if ELF_EXEC_PAGESIZE > PAGE_SIZE
61#define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
62#else
63#define ELF_MIN_ALIGN	PAGE_SIZE
64#endif
65
66#ifndef ELF_CORE_EFLAGS
67#define ELF_CORE_EFLAGS	0
68#endif
69
70#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73
74static struct linux_binfmt elf_format = {
75		.module		= THIS_MODULE,
76		.load_binary	= load_elf_binary,
77		.load_shlib	= load_elf_library,
78		.core_dump	= elf_core_dump,
79		.min_coredump	= ELF_EXEC_PAGESIZE,
80		.hasvdso	= 1
81};
82
83#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
84
85static int set_brk(unsigned long start, unsigned long end)
86{
87	start = ELF_PAGEALIGN(start);
88	end = ELF_PAGEALIGN(end);
89	if (end > start) {
90		unsigned long addr;
91		down_write(&current->mm->mmap_sem);
92		addr = do_brk(start, end - start);
93		up_write(&current->mm->mmap_sem);
94		if (BAD_ADDR(addr))
95			return addr;
96	}
97	current->mm->start_brk = current->mm->brk = end;
98	return 0;
99}
100
101/* We need to explicitly zero any fractional pages
102   after the data section (i.e. bss).  This would
103   contain the junk from the file that should not
104   be in memory
105 */
106static int padzero(unsigned long elf_bss)
107{
108	unsigned long nbyte;
109
110	nbyte = ELF_PAGEOFFSET(elf_bss);
111	if (nbyte) {
112		nbyte = ELF_MIN_ALIGN - nbyte;
113		if (clear_user((void __user *) elf_bss, nbyte))
114			return -EFAULT;
115	}
116	return 0;
117}
118
119/* Let's use some macros to make this stack manipulation a litle clearer */
120#ifdef CONFIG_STACK_GROWSUP
121#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122#define STACK_ROUND(sp, items) \
123	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124#define STACK_ALLOC(sp, len) ({ \
125	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126	old_sp; })
127#else
128#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129#define STACK_ROUND(sp, items) \
130	(((unsigned long) (sp - items)) &~ 15UL)
131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132#endif
133
134static int
135create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136		int interp_aout, unsigned long load_addr,
137		unsigned long interp_load_addr)
138{
139	unsigned long p = bprm->p;
140	int argc = bprm->argc;
141	int envc = bprm->envc;
142	elf_addr_t __user *argv;
143	elf_addr_t __user *envp;
144	elf_addr_t __user *sp;
145	elf_addr_t __user *u_platform;
146	const char *k_platform = ELF_PLATFORM;
147	int items;
148	elf_addr_t *elf_info;
149	int ei_index = 0;
150	struct task_struct *tsk = current;
151
152	/*
153	 * If this architecture has a platform capability string, copy it
154	 * to userspace.  In some cases (Sparc), this info is impossible
155	 * for userspace to get any other way, in others (i386) it is
156	 * merely difficult.
157	 */
158	u_platform = NULL;
159	if (k_platform) {
160		size_t len = strlen(k_platform) + 1;
161
162		/*
163		 * In some cases (e.g. Hyper-Threading), we want to avoid L1
164		 * evictions by the processes running on the same package. One
165		 * thing we can do is to shuffle the initial stack for them.
166		 */
167
168		p = arch_align_stack(p);
169
170		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
171		if (__copy_to_user(u_platform, k_platform, len))
172			return -EFAULT;
173	}
174
175	/* Create the ELF interpreter info */
176	elf_info = (elf_addr_t *)current->mm->saved_auxv;
177#define NEW_AUX_ENT(id, val) \
178	do { \
179		elf_info[ei_index++] = id; \
180		elf_info[ei_index++] = val; \
181	} while (0)
182
183#ifdef ARCH_DLINFO
184	/*
185	 * ARCH_DLINFO must come first so PPC can do its special alignment of
186	 * AUXV.
187	 */
188	ARCH_DLINFO;
189#endif
190	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
191	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
192	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
193	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
194	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
195	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
196	NEW_AUX_ENT(AT_BASE, interp_load_addr);
197	NEW_AUX_ENT(AT_FLAGS, 0);
198	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
199	NEW_AUX_ENT(AT_UID, tsk->uid);
200	NEW_AUX_ENT(AT_EUID, tsk->euid);
201	NEW_AUX_ENT(AT_GID, tsk->gid);
202	NEW_AUX_ENT(AT_EGID, tsk->egid);
203 	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
204	if (k_platform) {
205		NEW_AUX_ENT(AT_PLATFORM,
206			    (elf_addr_t)(unsigned long)u_platform);
207	}
208	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
209		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
210	}
211#undef NEW_AUX_ENT
212	/* AT_NULL is zero; clear the rest too */
213	memset(&elf_info[ei_index], 0,
214	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
215
216	/* And advance past the AT_NULL entry.  */
217	ei_index += 2;
218
219	sp = STACK_ADD(p, ei_index);
220
221	items = (argc + 1) + (envc + 1);
222	if (interp_aout) {
223		items += 3; /* a.out interpreters require argv & envp too */
224	} else {
225		items += 1; /* ELF interpreters only put argc on the stack */
226	}
227	bprm->p = STACK_ROUND(sp, items);
228
229	/* Point sp at the lowest address on the stack */
230#ifdef CONFIG_STACK_GROWSUP
231	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
232	bprm->exec = (unsigned long)sp;
233#else
234	sp = (elf_addr_t __user *)bprm->p;
235#endif
236
237	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
238	if (__put_user(argc, sp++))
239		return -EFAULT;
240	if (interp_aout) {
241		argv = sp + 2;
242		envp = argv + argc + 1;
243		if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
244		    __put_user((elf_addr_t)(unsigned long)envp, sp++))
245			return -EFAULT;
246	} else {
247		argv = sp;
248		envp = argv + argc + 1;
249	}
250
251	/* Populate argv and envp */
252	p = current->mm->arg_end = current->mm->arg_start;
253	while (argc-- > 0) {
254		size_t len;
255		if (__put_user((elf_addr_t)p, argv++))
256			return -EFAULT;
257		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
258		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
259			return 0;
260		p += len;
261	}
262	if (__put_user(0, argv))
263		return -EFAULT;
264	current->mm->arg_end = current->mm->env_start = p;
265	while (envc-- > 0) {
266		size_t len;
267		if (__put_user((elf_addr_t)p, envp++))
268			return -EFAULT;
269		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
270		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
271			return 0;
272		p += len;
273	}
274	if (__put_user(0, envp))
275		return -EFAULT;
276	current->mm->env_end = p;
277
278	/* Put the elf_info on the stack in the right place.  */
279	sp = (elf_addr_t __user *)envp + 1;
280	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
281		return -EFAULT;
282	return 0;
283}
284
285#ifndef elf_map
286
287static unsigned long elf_map(struct file *filep, unsigned long addr,
288		struct elf_phdr *eppnt, int prot, int type)
289{
290	unsigned long map_addr;
291	unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
292
293	down_write(&current->mm->mmap_sem);
294	/* mmap() will return -EINVAL if given a zero size, but a
295	 * segment with zero filesize is perfectly valid */
296	if (eppnt->p_filesz + pageoffset)
297		map_addr = do_mmap(filep, ELF_PAGESTART(addr),
298				   eppnt->p_filesz + pageoffset, prot, type,
299				   eppnt->p_offset - pageoffset);
300	else
301		map_addr = ELF_PAGESTART(addr);
302	up_write(&current->mm->mmap_sem);
303	return(map_addr);
304}
305
306#endif /* !elf_map */
307
308/* This is much more generalized than the library routine read function,
309   so we keep this separate.  Technically the library read function
310   is only provided so that we can read a.out libraries that have
311   an ELF header */
312
313static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
314		struct file *interpreter, unsigned long *interp_load_addr)
315{
316	struct elf_phdr *elf_phdata;
317	struct elf_phdr *eppnt;
318	unsigned long load_addr = 0;
319	int load_addr_set = 0;
320	unsigned long last_bss = 0, elf_bss = 0;
321	unsigned long error = ~0UL;
322	int retval, i, size;
323
324	/* First of all, some simple consistency checks */
325	if (interp_elf_ex->e_type != ET_EXEC &&
326	    interp_elf_ex->e_type != ET_DYN)
327		goto out;
328	if (!elf_check_arch(interp_elf_ex))
329		goto out;
330	if (!interpreter->f_op || !interpreter->f_op->mmap)
331		goto out;
332
333	/*
334	 * If the size of this structure has changed, then punt, since
335	 * we will be doing the wrong thing.
336	 */
337	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
338		goto out;
339	if (interp_elf_ex->e_phnum < 1 ||
340		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
341		goto out;
342
343	/* Now read in all of the header information */
344	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
345	if (size > ELF_MIN_ALIGN)
346		goto out;
347	elf_phdata = kmalloc(size, GFP_KERNEL);
348	if (!elf_phdata)
349		goto out;
350
351	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
352			     (char *)elf_phdata,size);
353	error = -EIO;
354	if (retval != size) {
355		if (retval < 0)
356			error = retval;
357		goto out_close;
358	}
359
360	eppnt = elf_phdata;
361	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
362		if (eppnt->p_type == PT_LOAD) {
363			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
364			int elf_prot = 0;
365			unsigned long vaddr = 0;
366			unsigned long k, map_addr;
367
368			if (eppnt->p_flags & PF_R)
369		    		elf_prot = PROT_READ;
370			if (eppnt->p_flags & PF_W)
371				elf_prot |= PROT_WRITE;
372			if (eppnt->p_flags & PF_X)
373				elf_prot |= PROT_EXEC;
374			vaddr = eppnt->p_vaddr;
375			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
376				elf_type |= MAP_FIXED;
377
378			map_addr = elf_map(interpreter, load_addr + vaddr,
379					   eppnt, elf_prot, elf_type);
380			error = map_addr;
381			if (BAD_ADDR(map_addr))
382				goto out_close;
383
384			if (!load_addr_set &&
385			    interp_elf_ex->e_type == ET_DYN) {
386				load_addr = map_addr - ELF_PAGESTART(vaddr);
387				load_addr_set = 1;
388			}
389
390			/*
391			 * Check to see if the section's size will overflow the
392			 * allowed task size. Note that p_filesz must always be
393			 * <= p_memsize so it's only necessary to check p_memsz.
394			 */
395			k = load_addr + eppnt->p_vaddr;
396			if (BAD_ADDR(k) ||
397			    eppnt->p_filesz > eppnt->p_memsz ||
398			    eppnt->p_memsz > TASK_SIZE ||
399			    TASK_SIZE - eppnt->p_memsz < k) {
400				error = -ENOMEM;
401				goto out_close;
402			}
403
404			/*
405			 * Find the end of the file mapping for this phdr, and
406			 * keep track of the largest address we see for this.
407			 */
408			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
409			if (k > elf_bss)
410				elf_bss = k;
411
412			/*
413			 * Do the same thing for the memory mapping - between
414			 * elf_bss and last_bss is the bss section.
415			 */
416			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
417			if (k > last_bss)
418				last_bss = k;
419		}
420	}
421
422	/*
423	 * Now fill out the bss section.  First pad the last page up
424	 * to the page boundary, and then perform a mmap to make sure
425	 * that there are zero-mapped pages up to and including the
426	 * last bss page.
427	 */
428	if (padzero(elf_bss)) {
429		error = -EFAULT;
430		goto out_close;
431	}
432
433	/* What we have mapped so far */
434	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
435
436	/* Map the last of the bss segment */
437	if (last_bss > elf_bss) {
438		down_write(&current->mm->mmap_sem);
439		error = do_brk(elf_bss, last_bss - elf_bss);
440		up_write(&current->mm->mmap_sem);
441		if (BAD_ADDR(error))
442			goto out_close;
443	}
444
445	*interp_load_addr = load_addr;
446	error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
447
448out_close:
449	kfree(elf_phdata);
450out:
451	return error;
452}
453
454static unsigned long load_aout_interp(struct exec *interp_ex,
455		struct file *interpreter)
456{
457	unsigned long text_data, elf_entry = ~0UL;
458	char __user * addr;
459	loff_t offset;
460
461	current->mm->end_code = interp_ex->a_text;
462	text_data = interp_ex->a_text + interp_ex->a_data;
463	current->mm->end_data = text_data;
464	current->mm->brk = interp_ex->a_bss + text_data;
465
466	switch (N_MAGIC(*interp_ex)) {
467	case OMAGIC:
468		offset = 32;
469		addr = (char __user *)0;
470		break;
471	case ZMAGIC:
472	case QMAGIC:
473		offset = N_TXTOFF(*interp_ex);
474		addr = (char __user *)N_TXTADDR(*interp_ex);
475		break;
476	default:
477		goto out;
478	}
479
480	down_write(&current->mm->mmap_sem);
481	do_brk(0, text_data);
482	up_write(&current->mm->mmap_sem);
483	if (!interpreter->f_op || !interpreter->f_op->read)
484		goto out;
485	if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
486		goto out;
487	flush_icache_range((unsigned long)addr,
488	                   (unsigned long)addr + text_data);
489
490	down_write(&current->mm->mmap_sem);
491	do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
492		interp_ex->a_bss);
493	up_write(&current->mm->mmap_sem);
494	elf_entry = interp_ex->a_entry;
495
496out:
497	return elf_entry;
498}
499
500/*
501 * These are the functions used to load ELF style executables and shared
502 * libraries.  There is no binary dependent code anywhere else.
503 */
504
505#define INTERPRETER_NONE 0
506#define INTERPRETER_AOUT 1
507#define INTERPRETER_ELF 2
508
509#ifndef STACK_RND_MASK
510#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
511#endif
512
513static unsigned long randomize_stack_top(unsigned long stack_top)
514{
515	unsigned int random_variable = 0;
516
517	if ((current->flags & PF_RANDOMIZE) &&
518		!(current->personality & ADDR_NO_RANDOMIZE)) {
519		random_variable = get_random_int() & STACK_RND_MASK;
520		random_variable <<= PAGE_SHIFT;
521	}
522#ifdef CONFIG_STACK_GROWSUP
523	return PAGE_ALIGN(stack_top) + random_variable;
524#else
525	return PAGE_ALIGN(stack_top) - random_variable;
526#endif
527}
528
529static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
530{
531	struct file *interpreter = NULL; /* to shut gcc up */
532 	unsigned long load_addr = 0, load_bias = 0;
533	int load_addr_set = 0;
534	char * elf_interpreter = NULL;
535	unsigned int interpreter_type = INTERPRETER_NONE;
536	unsigned char ibcs2_interpreter = 0;
537	unsigned long error;
538	struct elf_phdr *elf_ppnt, *elf_phdata;
539	unsigned long elf_bss, elf_brk;
540	int elf_exec_fileno;
541	int retval, i;
542	unsigned int size;
543	unsigned long elf_entry, interp_load_addr = 0;
544	unsigned long start_code, end_code, start_data, end_data;
545	unsigned long reloc_func_desc = 0;
546	char passed_fileno[6];
547	struct files_struct *files;
548	int executable_stack = EXSTACK_DEFAULT;
549	unsigned long def_flags = 0;
550	struct {
551		struct elfhdr elf_ex;
552		struct elfhdr interp_elf_ex;
553  		struct exec interp_ex;
554	} *loc;
555
556	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
557	if (!loc) {
558		retval = -ENOMEM;
559		goto out_ret;
560	}
561
562	/* Get the exec-header */
563	loc->elf_ex = *((struct elfhdr *)bprm->buf);
564
565	retval = -ENOEXEC;
566	/* First of all, some simple consistency checks */
567	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
568		goto out;
569
570	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
571		goto out;
572	if (!elf_check_arch(&loc->elf_ex))
573		goto out;
574	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
575		goto out;
576
577	/* Now read in all of the header information */
578	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
579		goto out;
580	if (loc->elf_ex.e_phnum < 1 ||
581	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
582		goto out;
583	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
584	retval = -ENOMEM;
585	elf_phdata = kmalloc(size, GFP_KERNEL);
586	if (!elf_phdata)
587		goto out;
588
589	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
590			     (char *)elf_phdata, size);
591	if (retval != size) {
592		if (retval >= 0)
593			retval = -EIO;
594		goto out_free_ph;
595	}
596
597	files = current->files;	/* Refcounted so ok */
598	retval = unshare_files();
599	if (retval < 0)
600		goto out_free_ph;
601	if (files == current->files) {
602		put_files_struct(files);
603		files = NULL;
604	}
605
606	/* exec will make our files private anyway, but for the a.out
607	   loader stuff we need to do it earlier */
608	retval = get_unused_fd();
609	if (retval < 0)
610		goto out_free_fh;
611	get_file(bprm->file);
612	fd_install(elf_exec_fileno = retval, bprm->file);
613
614	elf_ppnt = elf_phdata;
615	elf_bss = 0;
616	elf_brk = 0;
617
618	start_code = ~0UL;
619	end_code = 0;
620	start_data = 0;
621	end_data = 0;
622
623	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
624		if (elf_ppnt->p_type == PT_INTERP) {
625			/* This is the program interpreter used for
626			 * shared libraries - for now assume that this
627			 * is an a.out format binary
628			 */
629			retval = -ENOEXEC;
630			if (elf_ppnt->p_filesz > PATH_MAX ||
631			    elf_ppnt->p_filesz < 2)
632				goto out_free_file;
633
634			retval = -ENOMEM;
635			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
636						  GFP_KERNEL);
637			if (!elf_interpreter)
638				goto out_free_file;
639
640			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
641					     elf_interpreter,
642					     elf_ppnt->p_filesz);
643			if (retval != elf_ppnt->p_filesz) {
644				if (retval >= 0)
645					retval = -EIO;
646				goto out_free_interp;
647			}
648			/* make sure path is NULL terminated */
649			retval = -ENOEXEC;
650			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
651				goto out_free_interp;
652
653			/* If the program interpreter is one of these two,
654			 * then assume an iBCS2 image. Otherwise assume
655			 * a native linux image.
656			 */
657			if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
658			    strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
659				ibcs2_interpreter = 1;
660
661			/*
662			 * The early SET_PERSONALITY here is so that the lookup
663			 * for the interpreter happens in the namespace of the
664			 * to-be-execed image.  SET_PERSONALITY can select an
665			 * alternate root.
666			 *
667			 * However, SET_PERSONALITY is NOT allowed to switch
668			 * this task into the new images's memory mapping
669			 * policy - that is, TASK_SIZE must still evaluate to
670			 * that which is appropriate to the execing application.
671			 * This is because exit_mmap() needs to have TASK_SIZE
672			 * evaluate to the size of the old image.
673			 *
674			 * So if (say) a 64-bit application is execing a 32-bit
675			 * application it is the architecture's responsibility
676			 * to defer changing the value of TASK_SIZE until the
677			 * switch really is going to happen - do this in
678			 * flush_thread().	- akpm
679			 */
680			SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
681
682			interpreter = open_exec(elf_interpreter);
683			retval = PTR_ERR(interpreter);
684			if (IS_ERR(interpreter))
685				goto out_free_interp;
686
687			/*
688			 * If the binary is not readable then enforce
689			 * mm->dumpable = 0 regardless of the interpreter's
690			 * permissions.
691			 */
692			if (file_permission(interpreter, MAY_READ) < 0)
693				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
694
695			retval = kernel_read(interpreter, 0, bprm->buf,
696					     BINPRM_BUF_SIZE);
697			if (retval != BINPRM_BUF_SIZE) {
698				if (retval >= 0)
699					retval = -EIO;
700				goto out_free_dentry;
701			}
702
703			/* Get the exec headers */
704			loc->interp_ex = *((struct exec *)bprm->buf);
705			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
706			break;
707		}
708		elf_ppnt++;
709	}
710
711	elf_ppnt = elf_phdata;
712	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
713		if (elf_ppnt->p_type == PT_GNU_STACK) {
714			if (elf_ppnt->p_flags & PF_X)
715				executable_stack = EXSTACK_ENABLE_X;
716			else
717				executable_stack = EXSTACK_DISABLE_X;
718			break;
719		}
720
721	/* Some simple consistency checks for the interpreter */
722	if (elf_interpreter) {
723		interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
724
725		/* Now figure out which format our binary is */
726		if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
727		    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
728		    (N_MAGIC(loc->interp_ex) != QMAGIC))
729			interpreter_type = INTERPRETER_ELF;
730
731		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
732			interpreter_type &= ~INTERPRETER_ELF;
733
734		retval = -ELIBBAD;
735		if (!interpreter_type)
736			goto out_free_dentry;
737
738		/* Make sure only one type was selected */
739		if ((interpreter_type & INTERPRETER_ELF) &&
740		     interpreter_type != INTERPRETER_ELF) {
741			// printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
742			interpreter_type = INTERPRETER_ELF;
743		}
744		/* Verify the interpreter has a valid arch */
745		if ((interpreter_type == INTERPRETER_ELF) &&
746		    !elf_check_arch(&loc->interp_elf_ex))
747			goto out_free_dentry;
748	} else {
749		/* Executables without an interpreter also need a personality  */
750		SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
751	}
752
753	/* OK, we are done with that, now set up the arg stuff,
754	   and then start this sucker up */
755	if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
756		char *passed_p = passed_fileno;
757		sprintf(passed_fileno, "%d", elf_exec_fileno);
758
759		if (elf_interpreter) {
760			retval = copy_strings_kernel(1, &passed_p, bprm);
761			if (retval)
762				goto out_free_dentry;
763			bprm->argc++;
764		}
765	}
766
767	/* Flush all traces of the currently running executable */
768	retval = flush_old_exec(bprm);
769	if (retval)
770		goto out_free_dentry;
771
772	/* Discard our unneeded old files struct */
773	if (files) {
774		put_files_struct(files);
775		files = NULL;
776	}
777
778	/* OK, This is the point of no return */
779	current->mm->start_data = 0;
780	current->mm->end_data = 0;
781	current->mm->end_code = 0;
782	current->mm->mmap = NULL;
783	current->flags &= ~PF_FORKNOEXEC;
784	current->mm->def_flags = def_flags;
785
786	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
787	   may depend on the personality.  */
788	SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
789	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
790		current->personality |= READ_IMPLIES_EXEC;
791
792	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
793		current->flags |= PF_RANDOMIZE;
794	arch_pick_mmap_layout(current->mm);
795
796	/* Do this so that we can load the interpreter, if need be.  We will
797	   change some of these later */
798	current->mm->free_area_cache = current->mm->mmap_base;
799	current->mm->cached_hole_size = 0;
800	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
801				 executable_stack);
802	if (retval < 0) {
803		send_sig(SIGKILL, current, 0);
804		goto out_free_dentry;
805	}
806
807	current->mm->start_stack = bprm->p;
808
809	/* Now we do a little grungy work by mmaping the ELF image into
810	   the correct location in memory.  At this point, we assume that
811	   the image should be loaded at fixed address, not at a variable
812	   address. */
813	for(i = 0, elf_ppnt = elf_phdata;
814	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
815		int elf_prot = 0, elf_flags;
816		unsigned long k, vaddr;
817
818		if (elf_ppnt->p_type != PT_LOAD)
819			continue;
820
821		if (unlikely (elf_brk > elf_bss)) {
822			unsigned long nbyte;
823
824			/* There was a PT_LOAD segment with p_memsz > p_filesz
825			   before this one. Map anonymous pages, if needed,
826			   and clear the area.  */
827			retval = set_brk (elf_bss + load_bias,
828					  elf_brk + load_bias);
829			if (retval) {
830				send_sig(SIGKILL, current, 0);
831				goto out_free_dentry;
832			}
833			nbyte = ELF_PAGEOFFSET(elf_bss);
834			if (nbyte) {
835				nbyte = ELF_MIN_ALIGN - nbyte;
836				if (nbyte > elf_brk - elf_bss)
837					nbyte = elf_brk - elf_bss;
838				if (clear_user((void __user *)elf_bss +
839							load_bias, nbyte)) {
840					/*
841					 * This bss-zeroing can fail if the ELF
842					 * file specifies odd protections. So
843					 * we don't check the return value
844					 */
845				}
846			}
847		}
848
849		if (elf_ppnt->p_flags & PF_R)
850			elf_prot |= PROT_READ;
851		if (elf_ppnt->p_flags & PF_W)
852			elf_prot |= PROT_WRITE;
853		if (elf_ppnt->p_flags & PF_X)
854			elf_prot |= PROT_EXEC;
855
856		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
857
858		vaddr = elf_ppnt->p_vaddr;
859		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
860			elf_flags |= MAP_FIXED;
861		} else if (loc->elf_ex.e_type == ET_DYN) {
862			/* Try and get dynamic programs out of the way of the
863			 * default mmap base, as well as whatever program they
864			 * might try to exec.  This is because the brk will
865			 * follow the loader, and is not movable.  */
866			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
867		}
868
869		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
870				elf_prot, elf_flags);
871		if (BAD_ADDR(error)) {
872			send_sig(SIGKILL, current, 0);
873			retval = IS_ERR((void *)error) ?
874				PTR_ERR((void*)error) : -EINVAL;
875			goto out_free_dentry;
876		}
877
878		if (!load_addr_set) {
879			load_addr_set = 1;
880			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
881			if (loc->elf_ex.e_type == ET_DYN) {
882				load_bias += error -
883				             ELF_PAGESTART(load_bias + vaddr);
884				load_addr += load_bias;
885				reloc_func_desc = load_bias;
886			}
887		}
888		k = elf_ppnt->p_vaddr;
889		if (k < start_code)
890			start_code = k;
891		if (start_data < k)
892			start_data = k;
893
894		/*
895		 * Check to see if the section's size will overflow the
896		 * allowed task size. Note that p_filesz must always be
897		 * <= p_memsz so it is only necessary to check p_memsz.
898		 */
899		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
900		    elf_ppnt->p_memsz > TASK_SIZE ||
901		    TASK_SIZE - elf_ppnt->p_memsz < k) {
902			/* set_brk can never work. Avoid overflows. */
903			send_sig(SIGKILL, current, 0);
904			retval = -EINVAL;
905			goto out_free_dentry;
906		}
907
908		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
909
910		if (k > elf_bss)
911			elf_bss = k;
912		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
913			end_code = k;
914		if (end_data < k)
915			end_data = k;
916		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
917		if (k > elf_brk)
918			elf_brk = k;
919	}
920
921	loc->elf_ex.e_entry += load_bias;
922	elf_bss += load_bias;
923	elf_brk += load_bias;
924	start_code += load_bias;
925	end_code += load_bias;
926	start_data += load_bias;
927	end_data += load_bias;
928
929	/* Calling set_brk effectively mmaps the pages that we need
930	 * for the bss and break sections.  We must do this before
931	 * mapping in the interpreter, to make sure it doesn't wind
932	 * up getting placed where the bss needs to go.
933	 */
934	retval = set_brk(elf_bss, elf_brk);
935	if (retval) {
936		send_sig(SIGKILL, current, 0);
937		goto out_free_dentry;
938	}
939	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
940		send_sig(SIGSEGV, current, 0);
941		retval = -EFAULT; /* Nobody gets to see this, but.. */
942		goto out_free_dentry;
943	}
944
945	if (elf_interpreter) {
946		if (interpreter_type == INTERPRETER_AOUT)
947			elf_entry = load_aout_interp(&loc->interp_ex,
948						     interpreter);
949		else
950			elf_entry = load_elf_interp(&loc->interp_elf_ex,
951						    interpreter,
952						    &interp_load_addr);
953		if (BAD_ADDR(elf_entry)) {
954			force_sig(SIGSEGV, current);
955			retval = IS_ERR((void *)elf_entry) ?
956					(int)elf_entry : -EINVAL;
957			goto out_free_dentry;
958		}
959		reloc_func_desc = interp_load_addr;
960
961		allow_write_access(interpreter);
962		fput(interpreter);
963		kfree(elf_interpreter);
964	} else {
965		elf_entry = loc->elf_ex.e_entry;
966		if (BAD_ADDR(elf_entry)) {
967			force_sig(SIGSEGV, current);
968			retval = -EINVAL;
969			goto out_free_dentry;
970		}
971	}
972
973	kfree(elf_phdata);
974
975	if (interpreter_type != INTERPRETER_AOUT)
976		sys_close(elf_exec_fileno);
977
978	set_binfmt(&elf_format);
979
980#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
981	retval = arch_setup_additional_pages(bprm, executable_stack);
982	if (retval < 0) {
983		send_sig(SIGKILL, current, 0);
984		goto out;
985	}
986#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
987
988	compute_creds(bprm);
989	current->flags &= ~PF_FORKNOEXEC;
990	create_elf_tables(bprm, &loc->elf_ex,
991			  (interpreter_type == INTERPRETER_AOUT),
992			  load_addr, interp_load_addr);
993	/* N.B. passed_fileno might not be initialized? */
994	if (interpreter_type == INTERPRETER_AOUT)
995		current->mm->arg_start += strlen(passed_fileno) + 1;
996	current->mm->end_code = end_code;
997	current->mm->start_code = start_code;
998	current->mm->start_data = start_data;
999	current->mm->end_data = end_data;
1000	current->mm->start_stack = bprm->p;
1001
1002	if (current->personality & MMAP_PAGE_ZERO) {
1003		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1004		   and some applications "depend" upon this behavior.
1005		   Since we do not have the power to recompile these, we
1006		   emulate the SVr4 behavior. Sigh. */
1007		down_write(&current->mm->mmap_sem);
1008		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1009				MAP_FIXED | MAP_PRIVATE, 0);
1010		up_write(&current->mm->mmap_sem);
1011	}
1012
1013#ifdef ELF_PLAT_INIT
1014	/*
1015	 * The ABI may specify that certain registers be set up in special
1016	 * ways (on i386 %edx is the address of a DT_FINI function, for
1017	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1018	 * that the e_entry field is the address of the function descriptor
1019	 * for the startup routine, rather than the address of the startup
1020	 * routine itself.  This macro performs whatever initialization to
1021	 * the regs structure is required as well as any relocations to the
1022	 * function descriptor entries when executing dynamically links apps.
1023	 */
1024	ELF_PLAT_INIT(regs, reloc_func_desc);
1025#endif
1026
1027	start_thread(regs, elf_entry, bprm->p);
1028	if (unlikely(current->ptrace & PT_PTRACED)) {
1029		if (current->ptrace & PT_TRACE_EXEC)
1030			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1031		else
1032			send_sig(SIGTRAP, current, 0);
1033	}
1034	retval = 0;
1035out:
1036	kfree(loc);
1037out_ret:
1038	return retval;
1039
1040	/* error cleanup */
1041out_free_dentry:
1042	allow_write_access(interpreter);
1043	if (interpreter)
1044		fput(interpreter);
1045out_free_interp:
1046	kfree(elf_interpreter);
1047out_free_file:
1048	sys_close(elf_exec_fileno);
1049out_free_fh:
1050	if (files)
1051		reset_files_struct(current, files);
1052out_free_ph:
1053	kfree(elf_phdata);
1054	goto out;
1055}
1056
1057/* This is really simpleminded and specialized - we are loading an
1058   a.out library that is given an ELF header. */
1059static int load_elf_library(struct file *file)
1060{
1061	struct elf_phdr *elf_phdata;
1062	struct elf_phdr *eppnt;
1063	unsigned long elf_bss, bss, len;
1064	int retval, error, i, j;
1065	struct elfhdr elf_ex;
1066
1067	error = -ENOEXEC;
1068	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1069	if (retval != sizeof(elf_ex))
1070		goto out;
1071
1072	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1073		goto out;
1074
1075	/* First of all, some simple consistency checks */
1076	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1077	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1078		goto out;
1079
1080	/* Now read in all of the header information */
1081
1082	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1083	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1084
1085	error = -ENOMEM;
1086	elf_phdata = kmalloc(j, GFP_KERNEL);
1087	if (!elf_phdata)
1088		goto out;
1089
1090	eppnt = elf_phdata;
1091	error = -ENOEXEC;
1092	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1093	if (retval != j)
1094		goto out_free_ph;
1095
1096	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1097		if ((eppnt + i)->p_type == PT_LOAD)
1098			j++;
1099	if (j != 1)
1100		goto out_free_ph;
1101
1102	while (eppnt->p_type != PT_LOAD)
1103		eppnt++;
1104
1105	/* Now use mmap to map the library into memory. */
1106	down_write(&current->mm->mmap_sem);
1107	error = do_mmap(file,
1108			ELF_PAGESTART(eppnt->p_vaddr),
1109			(eppnt->p_filesz +
1110			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1111			PROT_READ | PROT_WRITE | PROT_EXEC,
1112			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1113			(eppnt->p_offset -
1114			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1115	up_write(&current->mm->mmap_sem);
1116	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1117		goto out_free_ph;
1118
1119	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1120	if (padzero(elf_bss)) {
1121		error = -EFAULT;
1122		goto out_free_ph;
1123	}
1124
1125	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1126			    ELF_MIN_ALIGN - 1);
1127	bss = eppnt->p_memsz + eppnt->p_vaddr;
1128	if (bss > len) {
1129		down_write(&current->mm->mmap_sem);
1130		do_brk(len, bss - len);
1131		up_write(&current->mm->mmap_sem);
1132	}
1133	error = 0;
1134
1135out_free_ph:
1136	kfree(elf_phdata);
1137out:
1138	return error;
1139}
1140
1141/*
1142 * Note that some platforms still use traditional core dumps and not
1143 * the ELF core dump.  Each platform can select it as appropriate.
1144 */
1145#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1146
1147/*
1148 * ELF core dumper
1149 *
1150 * Modelled on fs/exec.c:aout_core_dump()
1151 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1152 */
1153/*
1154 * These are the only things you should do on a core-file: use only these
1155 * functions to write out all the necessary info.
1156 */
1157static int dump_write(struct file *file, const void *addr, int nr)
1158{
1159	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1160}
1161
1162static int dump_seek(struct file *file, loff_t off)
1163{
1164	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1165		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1166			return 0;
1167	} else {
1168		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1169		if (!buf)
1170			return 0;
1171		while (off > 0) {
1172			unsigned long n = off;
1173			if (n > PAGE_SIZE)
1174				n = PAGE_SIZE;
1175			if (!dump_write(file, buf, n))
1176				return 0;
1177			off -= n;
1178		}
1179		free_page((unsigned long)buf);
1180	}
1181	return 1;
1182}
1183
1184/*
1185 * Decide whether a segment is worth dumping; default is yes to be
1186 * sure (missing info is worse than too much; etc).
1187 * Personally I'd include everything, and use the coredump limit...
1188 *
1189 * I think we should skip something. But I am not sure how. H.J.
1190 */
1191static int maydump(struct vm_area_struct *vma)
1192{
1193	/* The vma can be set up to tell us the answer directly.  */
1194	if (vma->vm_flags & VM_ALWAYSDUMP)
1195		return 1;
1196
1197	/* Do not dump I/O mapped devices or special mappings */
1198	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1199		return 0;
1200
1201	/* Dump shared memory only if mapped from an anonymous file. */
1202	if (vma->vm_flags & VM_SHARED)
1203		return vma->vm_file->f_path.dentry->d_inode->i_nlink == 0;
1204
1205	/* If it hasn't been written to, don't write it out */
1206	if (!vma->anon_vma)
1207		return 0;
1208
1209	return 1;
1210}
1211
1212/* An ELF note in memory */
1213struct memelfnote
1214{
1215	const char *name;
1216	int type;
1217	unsigned int datasz;
1218	void *data;
1219};
1220
1221static int notesize(struct memelfnote *en)
1222{
1223	int sz;
1224
1225	sz = sizeof(struct elf_note);
1226	sz += roundup(strlen(en->name) + 1, 4);
1227	sz += roundup(en->datasz, 4);
1228
1229	return sz;
1230}
1231
1232#define DUMP_WRITE(addr, nr, foffset)	\
1233	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1234
1235static int alignfile(struct file *file, loff_t *foffset)
1236{
1237	static const char buf[4] = { 0, };
1238	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1239	return 1;
1240}
1241
1242static int writenote(struct memelfnote *men, struct file *file,
1243			loff_t *foffset)
1244{
1245	struct elf_note en;
1246	en.n_namesz = strlen(men->name) + 1;
1247	en.n_descsz = men->datasz;
1248	en.n_type = men->type;
1249
1250	DUMP_WRITE(&en, sizeof(en), foffset);
1251	DUMP_WRITE(men->name, en.n_namesz, foffset);
1252	if (!alignfile(file, foffset))
1253		return 0;
1254	DUMP_WRITE(men->data, men->datasz, foffset);
1255	if (!alignfile(file, foffset))
1256		return 0;
1257
1258	return 1;
1259}
1260#undef DUMP_WRITE
1261
1262#define DUMP_WRITE(addr, nr)	\
1263	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1264		goto end_coredump;
1265#define DUMP_SEEK(off)	\
1266	if (!dump_seek(file, (off))) \
1267		goto end_coredump;
1268
1269static void fill_elf_header(struct elfhdr *elf, int segs)
1270{
1271	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1272	elf->e_ident[EI_CLASS] = ELF_CLASS;
1273	elf->e_ident[EI_DATA] = ELF_DATA;
1274	elf->e_ident[EI_VERSION] = EV_CURRENT;
1275	elf->e_ident[EI_OSABI] = ELF_OSABI;
1276	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1277
1278	elf->e_type = ET_CORE;
1279	elf->e_machine = ELF_ARCH;
1280	elf->e_version = EV_CURRENT;
1281	elf->e_entry = 0;
1282	elf->e_phoff = sizeof(struct elfhdr);
1283	elf->e_shoff = 0;
1284	elf->e_flags = ELF_CORE_EFLAGS;
1285	elf->e_ehsize = sizeof(struct elfhdr);
1286	elf->e_phentsize = sizeof(struct elf_phdr);
1287	elf->e_phnum = segs;
1288	elf->e_shentsize = 0;
1289	elf->e_shnum = 0;
1290	elf->e_shstrndx = 0;
1291	return;
1292}
1293
1294static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1295{
1296	phdr->p_type = PT_NOTE;
1297	phdr->p_offset = offset;
1298	phdr->p_vaddr = 0;
1299	phdr->p_paddr = 0;
1300	phdr->p_filesz = sz;
1301	phdr->p_memsz = 0;
1302	phdr->p_flags = 0;
1303	phdr->p_align = 0;
1304	return;
1305}
1306
1307static void fill_note(struct memelfnote *note, const char *name, int type,
1308		unsigned int sz, void *data)
1309{
1310	note->name = name;
1311	note->type = type;
1312	note->datasz = sz;
1313	note->data = data;
1314	return;
1315}
1316
1317/*
1318 * fill up all the fields in prstatus from the given task struct, except
1319 * registers which need to be filled up separately.
1320 */
1321static void fill_prstatus(struct elf_prstatus *prstatus,
1322		struct task_struct *p, long signr)
1323{
1324	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1325	prstatus->pr_sigpend = p->pending.signal.sig[0];
1326	prstatus->pr_sighold = p->blocked.sig[0];
1327	prstatus->pr_pid = p->pid;
1328	prstatus->pr_ppid = p->parent->pid;
1329	prstatus->pr_pgrp = process_group(p);
1330	prstatus->pr_sid = process_session(p);
1331	if (thread_group_leader(p)) {
1332		/*
1333		 * This is the record for the group leader.  Add in the
1334		 * cumulative times of previous dead threads.  This total
1335		 * won't include the time of each live thread whose state
1336		 * is included in the core dump.  The final total reported
1337		 * to our parent process when it calls wait4 will include
1338		 * those sums as well as the little bit more time it takes
1339		 * this and each other thread to finish dying after the
1340		 * core dump synchronization phase.
1341		 */
1342		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1343				   &prstatus->pr_utime);
1344		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1345				   &prstatus->pr_stime);
1346	} else {
1347		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1348		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1349	}
1350	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1351	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1352}
1353
1354static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1355		       struct mm_struct *mm)
1356{
1357	unsigned int i, len;
1358
1359	/* first copy the parameters from user space */
1360	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1361
1362	len = mm->arg_end - mm->arg_start;
1363	if (len >= ELF_PRARGSZ)
1364		len = ELF_PRARGSZ-1;
1365	if (copy_from_user(&psinfo->pr_psargs,
1366		           (const char __user *)mm->arg_start, len))
1367		return -EFAULT;
1368	for(i = 0; i < len; i++)
1369		if (psinfo->pr_psargs[i] == 0)
1370			psinfo->pr_psargs[i] = ' ';
1371	psinfo->pr_psargs[len] = 0;
1372
1373	psinfo->pr_pid = p->pid;
1374	psinfo->pr_ppid = p->parent->pid;
1375	psinfo->pr_pgrp = process_group(p);
1376	psinfo->pr_sid = process_session(p);
1377
1378	i = p->state ? ffz(~p->state) + 1 : 0;
1379	psinfo->pr_state = i;
1380	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1381	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1382	psinfo->pr_nice = task_nice(p);
1383	psinfo->pr_flag = p->flags;
1384	SET_UID(psinfo->pr_uid, p->uid);
1385	SET_GID(psinfo->pr_gid, p->gid);
1386	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1387
1388	return 0;
1389}
1390
1391/* Here is the structure in which status of each thread is captured. */
1392struct elf_thread_status
1393{
1394	struct list_head list;
1395	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1396	elf_fpregset_t fpu;		/* NT_PRFPREG */
1397	struct task_struct *thread;
1398#ifdef ELF_CORE_COPY_XFPREGS
1399	elf_fpxregset_t xfpu;		/* NT_PRXFPREG */
1400#endif
1401	struct memelfnote notes[3];
1402	int num_notes;
1403};
1404
1405/*
1406 * In order to add the specific thread information for the elf file format,
1407 * we need to keep a linked list of every threads pr_status and then create
1408 * a single section for them in the final core file.
1409 */
1410static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1411{
1412	int sz = 0;
1413	struct task_struct *p = t->thread;
1414	t->num_notes = 0;
1415
1416	fill_prstatus(&t->prstatus, p, signr);
1417	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1418
1419	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1420		  &(t->prstatus));
1421	t->num_notes++;
1422	sz += notesize(&t->notes[0]);
1423
1424	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1425								&t->fpu))) {
1426		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1427			  &(t->fpu));
1428		t->num_notes++;
1429		sz += notesize(&t->notes[1]);
1430	}
1431
1432#ifdef ELF_CORE_COPY_XFPREGS
1433	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1434		fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1435			  &t->xfpu);
1436		t->num_notes++;
1437		sz += notesize(&t->notes[2]);
1438	}
1439#endif
1440	return sz;
1441}
1442
1443static struct vm_area_struct *first_vma(struct task_struct *tsk,
1444					struct vm_area_struct *gate_vma)
1445{
1446	struct vm_area_struct *ret = tsk->mm->mmap;
1447
1448	if (ret)
1449		return ret;
1450	return gate_vma;
1451}
1452/*
1453 * Helper function for iterating across a vma list.  It ensures that the caller
1454 * will visit `gate_vma' prior to terminating the search.
1455 */
1456static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1457					struct vm_area_struct *gate_vma)
1458{
1459	struct vm_area_struct *ret;
1460
1461	ret = this_vma->vm_next;
1462	if (ret)
1463		return ret;
1464	if (this_vma == gate_vma)
1465		return NULL;
1466	return gate_vma;
1467}
1468
1469/*
1470 * Actual dumper
1471 *
1472 * This is a two-pass process; first we find the offsets of the bits,
1473 * and then they are actually written out.  If we run out of core limit
1474 * we just truncate.
1475 */
1476static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1477{
1478#define	NUM_NOTES	6
1479	int has_dumped = 0;
1480	mm_segment_t fs;
1481	int segs;
1482	size_t size = 0;
1483	int i;
1484	struct vm_area_struct *vma, *gate_vma;
1485	struct elfhdr *elf = NULL;
1486	loff_t offset = 0, dataoff, foffset;
1487	unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1488	int numnote;
1489	struct memelfnote *notes = NULL;
1490	struct elf_prstatus *prstatus = NULL;	/* NT_PRSTATUS */
1491	struct elf_prpsinfo *psinfo = NULL;	/* NT_PRPSINFO */
1492 	struct task_struct *g, *p;
1493 	LIST_HEAD(thread_list);
1494 	struct list_head *t;
1495	elf_fpregset_t *fpu = NULL;
1496#ifdef ELF_CORE_COPY_XFPREGS
1497	elf_fpxregset_t *xfpu = NULL;
1498#endif
1499	int thread_status_size = 0;
1500	elf_addr_t *auxv;
1501#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1502	int extra_notes_size;
1503#endif
1504
1505	/*
1506	 * We no longer stop all VM operations.
1507	 *
1508	 * This is because those proceses that could possibly change map_count
1509	 * or the mmap / vma pages are now blocked in do_exit on current
1510	 * finishing this core dump.
1511	 *
1512	 * Only ptrace can touch these memory addresses, but it doesn't change
1513	 * the map_count or the pages allocated. So no possibility of crashing
1514	 * exists while dumping the mm->vm_next areas to the core file.
1515	 */
1516
1517	/* alloc memory for large data structures: too large to be on stack */
1518	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1519	if (!elf)
1520		goto cleanup;
1521	prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1522	if (!prstatus)
1523		goto cleanup;
1524	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1525	if (!psinfo)
1526		goto cleanup;
1527	notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1528	if (!notes)
1529		goto cleanup;
1530	fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1531	if (!fpu)
1532		goto cleanup;
1533#ifdef ELF_CORE_COPY_XFPREGS
1534	xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1535	if (!xfpu)
1536		goto cleanup;
1537#endif
1538
1539	if (signr) {
1540		struct elf_thread_status *tmp;
1541		rcu_read_lock();
1542		do_each_thread(g,p)
1543			if (current->mm == p->mm && current != p) {
1544				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1545				if (!tmp) {
1546					rcu_read_unlock();
1547					goto cleanup;
1548				}
1549				tmp->thread = p;
1550				list_add(&tmp->list, &thread_list);
1551			}
1552		while_each_thread(g,p);
1553		rcu_read_unlock();
1554		list_for_each(t, &thread_list) {
1555			struct elf_thread_status *tmp;
1556			int sz;
1557
1558			tmp = list_entry(t, struct elf_thread_status, list);
1559			sz = elf_dump_thread_status(signr, tmp);
1560			thread_status_size += sz;
1561		}
1562	}
1563	/* now collect the dump for the current */
1564	memset(prstatus, 0, sizeof(*prstatus));
1565	fill_prstatus(prstatus, current, signr);
1566	elf_core_copy_regs(&prstatus->pr_reg, regs);
1567
1568	segs = current->mm->map_count;
1569#ifdef ELF_CORE_EXTRA_PHDRS
1570	segs += ELF_CORE_EXTRA_PHDRS;
1571#endif
1572
1573	gate_vma = get_gate_vma(current);
1574	if (gate_vma != NULL)
1575		segs++;
1576
1577	/* Set up header */
1578	fill_elf_header(elf, segs + 1);	/* including notes section */
1579
1580	has_dumped = 1;
1581	current->flags |= PF_DUMPCORE;
1582
1583	/*
1584	 * Set up the notes in similar form to SVR4 core dumps made
1585	 * with info from their /proc.
1586	 */
1587
1588	fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1589	fill_psinfo(psinfo, current->group_leader, current->mm);
1590	fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1591
1592	numnote = 2;
1593
1594	auxv = (elf_addr_t *)current->mm->saved_auxv;
1595
1596	i = 0;
1597	do
1598		i += 2;
1599	while (auxv[i - 2] != AT_NULL);
1600	fill_note(&notes[numnote++], "CORE", NT_AUXV,
1601		  i * sizeof(elf_addr_t), auxv);
1602
1603  	/* Try to dump the FPU. */
1604	if ((prstatus->pr_fpvalid =
1605	     elf_core_copy_task_fpregs(current, regs, fpu)))
1606		fill_note(notes + numnote++,
1607			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1608#ifdef ELF_CORE_COPY_XFPREGS
1609	if (elf_core_copy_task_xfpregs(current, xfpu))
1610		fill_note(notes + numnote++,
1611			  "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1612#endif
1613
1614	fs = get_fs();
1615	set_fs(KERNEL_DS);
1616
1617	DUMP_WRITE(elf, sizeof(*elf));
1618	offset += sizeof(*elf);				/* Elf header */
1619	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1620	foffset = offset;
1621
1622	/* Write notes phdr entry */
1623	{
1624		struct elf_phdr phdr;
1625		int sz = 0;
1626
1627		for (i = 0; i < numnote; i++)
1628			sz += notesize(notes + i);
1629
1630		sz += thread_status_size;
1631
1632#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1633		extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE;
1634		sz += extra_notes_size;
1635#endif
1636
1637		fill_elf_note_phdr(&phdr, sz, offset);
1638		offset += sz;
1639		DUMP_WRITE(&phdr, sizeof(phdr));
1640	}
1641
1642	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1643
1644	/* Write program headers for segments dump */
1645	for (vma = first_vma(current, gate_vma); vma != NULL;
1646			vma = next_vma(vma, gate_vma)) {
1647		struct elf_phdr phdr;
1648		size_t sz;
1649
1650		sz = vma->vm_end - vma->vm_start;
1651
1652		phdr.p_type = PT_LOAD;
1653		phdr.p_offset = offset;
1654		phdr.p_vaddr = vma->vm_start;
1655		phdr.p_paddr = 0;
1656		phdr.p_filesz = maydump(vma) ? sz : 0;
1657		phdr.p_memsz = sz;
1658		offset += phdr.p_filesz;
1659		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1660		if (vma->vm_flags & VM_WRITE)
1661			phdr.p_flags |= PF_W;
1662		if (vma->vm_flags & VM_EXEC)
1663			phdr.p_flags |= PF_X;
1664		phdr.p_align = ELF_EXEC_PAGESIZE;
1665
1666		DUMP_WRITE(&phdr, sizeof(phdr));
1667	}
1668
1669#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1670	ELF_CORE_WRITE_EXTRA_PHDRS;
1671#endif
1672
1673 	/* write out the notes section */
1674	for (i = 0; i < numnote; i++)
1675		if (!writenote(notes + i, file, &foffset))
1676			goto end_coredump;
1677
1678#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1679	ELF_CORE_WRITE_EXTRA_NOTES;
1680	foffset += extra_notes_size;
1681#endif
1682
1683	/* write out the thread status notes section */
1684	list_for_each(t, &thread_list) {
1685		struct elf_thread_status *tmp =
1686				list_entry(t, struct elf_thread_status, list);
1687
1688		for (i = 0; i < tmp->num_notes; i++)
1689			if (!writenote(&tmp->notes[i], file, &foffset))
1690				goto end_coredump;
1691	}
1692
1693	/* Align to page */
1694	DUMP_SEEK(dataoff - foffset);
1695
1696	for (vma = first_vma(current, gate_vma); vma != NULL;
1697			vma = next_vma(vma, gate_vma)) {
1698		unsigned long addr;
1699
1700		if (!maydump(vma))
1701			continue;
1702
1703		for (addr = vma->vm_start;
1704		     addr < vma->vm_end;
1705		     addr += PAGE_SIZE) {
1706			struct page *page;
1707			struct vm_area_struct *vma;
1708
1709			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1710						&page, &vma) <= 0) {
1711				DUMP_SEEK(PAGE_SIZE);
1712			} else {
1713				if (page == ZERO_PAGE(addr)) {
1714					if (!dump_seek(file, PAGE_SIZE)) {
1715						page_cache_release(page);
1716						goto end_coredump;
1717					}
1718				} else {
1719					void *kaddr;
1720					flush_cache_page(vma, addr,
1721							 page_to_pfn(page));
1722					kaddr = kmap(page);
1723					if ((size += PAGE_SIZE) > limit ||
1724					    !dump_write(file, kaddr,
1725					    PAGE_SIZE)) {
1726						kunmap(page);
1727						page_cache_release(page);
1728						goto end_coredump;
1729					}
1730					kunmap(page);
1731				}
1732				page_cache_release(page);
1733			}
1734		}
1735	}
1736
1737#ifdef ELF_CORE_WRITE_EXTRA_DATA
1738	ELF_CORE_WRITE_EXTRA_DATA;
1739#endif
1740
1741end_coredump:
1742	set_fs(fs);
1743
1744cleanup:
1745	while (!list_empty(&thread_list)) {
1746		struct list_head *tmp = thread_list.next;
1747		list_del(tmp);
1748		kfree(list_entry(tmp, struct elf_thread_status, list));
1749	}
1750
1751	kfree(elf);
1752	kfree(prstatus);
1753	kfree(psinfo);
1754	kfree(notes);
1755	kfree(fpu);
1756#ifdef ELF_CORE_COPY_XFPREGS
1757	kfree(xfpu);
1758#endif
1759	return has_dumped;
1760#undef NUM_NOTES
1761}
1762
1763#endif		/* USE_ELF_CORE_DUMP */
1764
1765static int __init init_elf_binfmt(void)
1766{
1767	return register_binfmt(&elf_format);
1768}
1769
1770static void __exit exit_elf_binfmt(void)
1771{
1772	/* Remove the COFF and ELF loaders. */
1773	unregister_binfmt(&elf_format);
1774}
1775
1776core_initcall(init_elf_binfmt);
1777module_exit(exit_elf_binfmt);
1778MODULE_LICENSE("GPL");
1779