1/*
2 * linux/fs/binfmt_elf.c
3 *
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines.  Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
8 *
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10 */
11
12#include <linux/module.h>
13#include <linux/kernel.h>
14#include <linux/fs.h>
15#include <linux/mm.h>
16#include <linux/mman.h>
17#include <linux/errno.h>
18#include <linux/signal.h>
19#include <linux/binfmts.h>
20#include <linux/string.h>
21#include <linux/file.h>
22#include <linux/slab.h>
23#include <linux/personality.h>
24#include <linux/elfcore.h>
25#include <linux/init.h>
26#include <linux/highuid.h>
27#include <linux/compiler.h>
28#include <linux/highmem.h>
29#include <linux/pagemap.h>
30#include <linux/security.h>
31#include <linux/random.h>
32#include <linux/elf.h>
33#include <linux/utsname.h>
34#include <linux/coredump.h>
35#include <asm/uaccess.h>
36#include <asm/param.h>
37#include <asm/page.h>
38
39static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
40static int load_elf_library(struct file *);
41static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
42				int, int, unsigned long);
43
44/*
45 * If we don't support core dumping, then supply a NULL so we
46 * don't even try.
47 */
48#ifdef CONFIG_ELF_CORE
49static int elf_core_dump(struct coredump_params *cprm);
50#else
51#define elf_core_dump	NULL
52#endif
53
54#if ELF_EXEC_PAGESIZE > PAGE_SIZE
55#define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
56#else
57#define ELF_MIN_ALIGN	PAGE_SIZE
58#endif
59
60#ifndef ELF_CORE_EFLAGS
61#define ELF_CORE_EFLAGS	0
62#endif
63
64#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
65#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
66#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
67
68static struct linux_binfmt elf_format = {
69		.module		= THIS_MODULE,
70		.load_binary	= load_elf_binary,
71		.load_shlib	= load_elf_library,
72		.core_dump	= elf_core_dump,
73		.min_coredump	= ELF_EXEC_PAGESIZE,
74		.hasvdso	= 1
75};
76
77#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
78
79static int set_brk(unsigned long start, unsigned long end)
80{
81	start = ELF_PAGEALIGN(start);
82	end = ELF_PAGEALIGN(end);
83	if (end > start) {
84		unsigned long addr;
85		down_write(&current->mm->mmap_sem);
86		addr = do_brk(start, end - start);
87		up_write(&current->mm->mmap_sem);
88		if (BAD_ADDR(addr))
89			return addr;
90	}
91	current->mm->start_brk = current->mm->brk = end;
92	return 0;
93}
94
95/* We need to explicitly zero any fractional pages
96   after the data section (i.e. bss).  This would
97   contain the junk from the file that should not
98   be in memory
99 */
100static int padzero(unsigned long elf_bss)
101{
102	unsigned long nbyte;
103
104	nbyte = ELF_PAGEOFFSET(elf_bss);
105	if (nbyte) {
106		nbyte = ELF_MIN_ALIGN - nbyte;
107		if (clear_user((void __user *) elf_bss, nbyte))
108			return -EFAULT;
109	}
110	return 0;
111}
112
113/* Let's use some macros to make this stack manipulation a little clearer */
114#ifdef CONFIG_STACK_GROWSUP
115#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
116#define STACK_ROUND(sp, items) \
117	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
118#define STACK_ALLOC(sp, len) ({ \
119	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
120	old_sp; })
121#else
122#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
123#define STACK_ROUND(sp, items) \
124	(((unsigned long) (sp - items)) &~ 15UL)
125#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
126#endif
127
128#ifndef ELF_BASE_PLATFORM
129/*
130 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
131 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
132 * will be copied to the user stack in the same manner as AT_PLATFORM.
133 */
134#define ELF_BASE_PLATFORM NULL
135#endif
136
137static int
138create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
139		unsigned long load_addr, unsigned long interp_load_addr)
140{
141	unsigned long p = bprm->p;
142	int argc = bprm->argc;
143	int envc = bprm->envc;
144	elf_addr_t __user *argv;
145	elf_addr_t __user *envp;
146	elf_addr_t __user *sp;
147	elf_addr_t __user *u_platform;
148	elf_addr_t __user *u_base_platform;
149	elf_addr_t __user *u_rand_bytes;
150	const char *k_platform = ELF_PLATFORM;
151	const char *k_base_platform = ELF_BASE_PLATFORM;
152	unsigned char k_rand_bytes[16];
153	int items;
154	elf_addr_t *elf_info;
155	int ei_index = 0;
156	const struct cred *cred = current_cred();
157	struct vm_area_struct *vma;
158
159	/*
160	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
161	 * evictions by the processes running on the same package. One
162	 * thing we can do is to shuffle the initial stack for them.
163	 */
164
165	p = arch_align_stack(p);
166
167	/*
168	 * If this architecture has a platform capability string, copy it
169	 * to userspace.  In some cases (Sparc), this info is impossible
170	 * for userspace to get any other way, in others (i386) it is
171	 * merely difficult.
172	 */
173	u_platform = NULL;
174	if (k_platform) {
175		size_t len = strlen(k_platform) + 1;
176
177		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
178		if (__copy_to_user(u_platform, k_platform, len))
179			return -EFAULT;
180	}
181
182	/*
183	 * If this architecture has a "base" platform capability
184	 * string, copy it to userspace.
185	 */
186	u_base_platform = NULL;
187	if (k_base_platform) {
188		size_t len = strlen(k_base_platform) + 1;
189
190		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
191		if (__copy_to_user(u_base_platform, k_base_platform, len))
192			return -EFAULT;
193	}
194
195	/*
196	 * Generate 16 random bytes for userspace PRNG seeding.
197	 */
198	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
199	u_rand_bytes = (elf_addr_t __user *)
200		       STACK_ALLOC(p, sizeof(k_rand_bytes));
201	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
202		return -EFAULT;
203
204	/* Create the ELF interpreter info */
205	elf_info = (elf_addr_t *)current->mm->saved_auxv;
206	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
207#define NEW_AUX_ENT(id, val) \
208	do { \
209		elf_info[ei_index++] = id; \
210		elf_info[ei_index++] = val; \
211	} while (0)
212
213#ifdef ARCH_DLINFO
214	/*
215	 * ARCH_DLINFO must come first so PPC can do its special alignment of
216	 * AUXV.
217	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
218	 * ARCH_DLINFO changes
219	 */
220	ARCH_DLINFO;
221#endif
222	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
223	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
224	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
225	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
226	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
227	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
228	NEW_AUX_ENT(AT_BASE, interp_load_addr);
229	NEW_AUX_ENT(AT_FLAGS, 0);
230	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
231	NEW_AUX_ENT(AT_UID, cred->uid);
232	NEW_AUX_ENT(AT_EUID, cred->euid);
233	NEW_AUX_ENT(AT_GID, cred->gid);
234	NEW_AUX_ENT(AT_EGID, cred->egid);
235 	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
236	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
237	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
238	if (k_platform) {
239		NEW_AUX_ENT(AT_PLATFORM,
240			    (elf_addr_t)(unsigned long)u_platform);
241	}
242	if (k_base_platform) {
243		NEW_AUX_ENT(AT_BASE_PLATFORM,
244			    (elf_addr_t)(unsigned long)u_base_platform);
245	}
246	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
247		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
248	}
249#undef NEW_AUX_ENT
250	/* AT_NULL is zero; clear the rest too */
251	memset(&elf_info[ei_index], 0,
252	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
253
254	/* And advance past the AT_NULL entry.  */
255	ei_index += 2;
256
257	sp = STACK_ADD(p, ei_index);
258
259	items = (argc + 1) + (envc + 1) + 1;
260	bprm->p = STACK_ROUND(sp, items);
261
262	/* Point sp at the lowest address on the stack */
263#ifdef CONFIG_STACK_GROWSUP
264	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
265	bprm->exec = (unsigned long)sp;
266#else
267	sp = (elf_addr_t __user *)bprm->p;
268#endif
269
270
271	/*
272	 * Grow the stack manually; some architectures have a limit on how
273	 * far ahead a user-space access may be in order to grow the stack.
274	 */
275	vma = find_extend_vma(current->mm, bprm->p);
276	if (!vma)
277		return -EFAULT;
278
279	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
280	if (__put_user(argc, sp++))
281		return -EFAULT;
282	argv = sp;
283	envp = argv + argc + 1;
284
285	/* Populate argv and envp */
286	p = current->mm->arg_end = current->mm->arg_start;
287	while (argc-- > 0) {
288		size_t len;
289		if (__put_user((elf_addr_t)p, argv++))
290			return -EFAULT;
291		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
292		if (!len || len > MAX_ARG_STRLEN)
293			return -EINVAL;
294		p += len;
295	}
296	if (__put_user(0, argv))
297		return -EFAULT;
298	current->mm->arg_end = current->mm->env_start = p;
299	while (envc-- > 0) {
300		size_t len;
301		if (__put_user((elf_addr_t)p, envp++))
302			return -EFAULT;
303		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
304		if (!len || len > MAX_ARG_STRLEN)
305			return -EINVAL;
306		p += len;
307	}
308	if (__put_user(0, envp))
309		return -EFAULT;
310	current->mm->env_end = p;
311
312	/* Put the elf_info on the stack in the right place.  */
313	sp = (elf_addr_t __user *)envp + 1;
314	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
315		return -EFAULT;
316	return 0;
317}
318
319#ifndef elf_map
320
321static unsigned long elf_map(struct file *filep, unsigned long addr,
322		struct elf_phdr *eppnt, int prot, int type,
323		unsigned long total_size)
324{
325	unsigned long map_addr;
326	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
327	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
328	addr = ELF_PAGESTART(addr);
329	size = ELF_PAGEALIGN(size);
330
331	/* mmap() will return -EINVAL if given a zero size, but a
332	 * segment with zero filesize is perfectly valid */
333	if (!size)
334		return addr;
335
336	down_write(&current->mm->mmap_sem);
337	/*
338	* total_size is the size of the ELF (interpreter) image.
339	* The _first_ mmap needs to know the full size, otherwise
340	* randomization might put this image into an overlapping
341	* position with the ELF binary image. (since size < total_size)
342	* So we first map the 'big' image - and unmap the remainder at
343	* the end. (which unmap is needed for ELF images with holes.)
344	*/
345	if (total_size) {
346		total_size = ELF_PAGEALIGN(total_size);
347		map_addr = do_mmap(filep, addr, total_size, prot, type, off);
348		if (!BAD_ADDR(map_addr))
349			do_munmap(current->mm, map_addr+size, total_size-size);
350	} else
351		map_addr = do_mmap(filep, addr, size, prot, type, off);
352
353	up_write(&current->mm->mmap_sem);
354	return(map_addr);
355}
356
357#endif /* !elf_map */
358
359static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
360{
361	int i, first_idx = -1, last_idx = -1;
362
363	for (i = 0; i < nr; i++) {
364		if (cmds[i].p_type == PT_LOAD) {
365			last_idx = i;
366			if (first_idx == -1)
367				first_idx = i;
368		}
369	}
370	if (first_idx == -1)
371		return 0;
372
373	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
374				ELF_PAGESTART(cmds[first_idx].p_vaddr);
375}
376
377
378/* This is much more generalized than the library routine read function,
379   so we keep this separate.  Technically the library read function
380   is only provided so that we can read a.out libraries that have
381   an ELF header */
382
383static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
384		struct file *interpreter, unsigned long *interp_map_addr,
385		unsigned long no_base)
386{
387	struct elf_phdr *elf_phdata;
388	struct elf_phdr *eppnt;
389	unsigned long load_addr = 0;
390	int load_addr_set = 0;
391	unsigned long last_bss = 0, elf_bss = 0;
392	unsigned long error = ~0UL;
393	unsigned long total_size;
394	int retval, i, size;
395
396	/* First of all, some simple consistency checks */
397	if (interp_elf_ex->e_type != ET_EXEC &&
398	    interp_elf_ex->e_type != ET_DYN)
399		goto out;
400	if (!elf_check_arch(interp_elf_ex))
401		goto out;
402	if (!interpreter->f_op || !interpreter->f_op->mmap)
403		goto out;
404
405	/*
406	 * If the size of this structure has changed, then punt, since
407	 * we will be doing the wrong thing.
408	 */
409	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
410		goto out;
411	if (interp_elf_ex->e_phnum < 1 ||
412		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
413		goto out;
414
415	/* Now read in all of the header information */
416	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
417	if (size > ELF_MIN_ALIGN)
418		goto out;
419	elf_phdata = kmalloc(size, GFP_KERNEL);
420	if (!elf_phdata)
421		goto out;
422
423	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
424			     (char *)elf_phdata,size);
425	error = -EIO;
426	if (retval != size) {
427		if (retval < 0)
428			error = retval;
429		goto out_close;
430	}
431
432	total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
433	if (!total_size) {
434		error = -EINVAL;
435		goto out_close;
436	}
437
438	eppnt = elf_phdata;
439	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
440		if (eppnt->p_type == PT_LOAD) {
441			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
442			int elf_prot = 0;
443			unsigned long vaddr = 0;
444			unsigned long k, map_addr;
445
446			if (eppnt->p_flags & PF_R)
447		    		elf_prot = PROT_READ;
448			if (eppnt->p_flags & PF_W)
449				elf_prot |= PROT_WRITE;
450			if (eppnt->p_flags & PF_X)
451				elf_prot |= PROT_EXEC;
452			vaddr = eppnt->p_vaddr;
453			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
454				elf_type |= MAP_FIXED;
455			else if (no_base && interp_elf_ex->e_type == ET_DYN)
456				load_addr = -vaddr;
457
458			map_addr = elf_map(interpreter, load_addr + vaddr,
459					eppnt, elf_prot, elf_type, total_size);
460			total_size = 0;
461			if (!*interp_map_addr)
462				*interp_map_addr = map_addr;
463			error = map_addr;
464			if (BAD_ADDR(map_addr))
465				goto out_close;
466
467			if (!load_addr_set &&
468			    interp_elf_ex->e_type == ET_DYN) {
469				load_addr = map_addr - ELF_PAGESTART(vaddr);
470				load_addr_set = 1;
471			}
472
473			/*
474			 * Check to see if the section's size will overflow the
475			 * allowed task size. Note that p_filesz must always be
476			 * <= p_memsize so it's only necessary to check p_memsz.
477			 */
478			k = load_addr + eppnt->p_vaddr;
479			if (BAD_ADDR(k) ||
480			    eppnt->p_filesz > eppnt->p_memsz ||
481			    eppnt->p_memsz > TASK_SIZE ||
482			    TASK_SIZE - eppnt->p_memsz < k) {
483				error = -ENOMEM;
484				goto out_close;
485			}
486
487			/*
488			 * Find the end of the file mapping for this phdr, and
489			 * keep track of the largest address we see for this.
490			 */
491			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
492			if (k > elf_bss)
493				elf_bss = k;
494
495			/*
496			 * Do the same thing for the memory mapping - between
497			 * elf_bss and last_bss is the bss section.
498			 */
499			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
500			if (k > last_bss)
501				last_bss = k;
502		}
503	}
504
505	if (last_bss > elf_bss) {
506		/*
507		 * Now fill out the bss section.  First pad the last page up
508		 * to the page boundary, and then perform a mmap to make sure
509		 * that there are zero-mapped pages up to and including the
510		 * last bss page.
511		 */
512		if (padzero(elf_bss)) {
513			error = -EFAULT;
514			goto out_close;
515		}
516
517		/* What we have mapped so far */
518		elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
519
520		/* Map the last of the bss segment */
521		down_write(&current->mm->mmap_sem);
522		error = do_brk(elf_bss, last_bss - elf_bss);
523		up_write(&current->mm->mmap_sem);
524		if (BAD_ADDR(error))
525			goto out_close;
526	}
527
528	error = load_addr;
529
530out_close:
531	kfree(elf_phdata);
532out:
533	return error;
534}
535
536/*
537 * These are the functions used to load ELF style executables and shared
538 * libraries.  There is no binary dependent code anywhere else.
539 */
540
541#define INTERPRETER_NONE 0
542#define INTERPRETER_ELF 2
543
544#ifndef STACK_RND_MASK
545#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
546#endif
547
548static unsigned long randomize_stack_top(unsigned long stack_top)
549{
550	unsigned int random_variable = 0;
551
552	if ((current->flags & PF_RANDOMIZE) &&
553		!(current->personality & ADDR_NO_RANDOMIZE)) {
554		random_variable = get_random_int() & STACK_RND_MASK;
555		random_variable <<= PAGE_SHIFT;
556	}
557#ifdef CONFIG_STACK_GROWSUP
558	return PAGE_ALIGN(stack_top) + random_variable;
559#else
560	return PAGE_ALIGN(stack_top) - random_variable;
561#endif
562}
563
564static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
565{
566	struct file *interpreter = NULL; /* to shut gcc up */
567 	unsigned long load_addr = 0, load_bias = 0;
568	int load_addr_set = 0;
569	char * elf_interpreter = NULL;
570	unsigned long error;
571	struct elf_phdr *elf_ppnt, *elf_phdata;
572	unsigned long elf_bss, elf_brk;
573	int retval, i;
574	unsigned int size;
575	unsigned long elf_entry;
576	unsigned long interp_load_addr = 0;
577	unsigned long start_code, end_code, start_data, end_data;
578	unsigned long reloc_func_desc = 0;
579	int executable_stack = EXSTACK_DEFAULT;
580	unsigned long def_flags = 0;
581	struct {
582		struct elfhdr elf_ex;
583		struct elfhdr interp_elf_ex;
584	} *loc;
585
586	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
587	if (!loc) {
588		retval = -ENOMEM;
589		goto out_ret;
590	}
591
592	/* Get the exec-header */
593	loc->elf_ex = *((struct elfhdr *)bprm->buf);
594
595	retval = -ENOEXEC;
596	/* First of all, some simple consistency checks */
597	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
598		goto out;
599
600	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
601		goto out;
602	if (!elf_check_arch(&loc->elf_ex))
603		goto out;
604	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
605		goto out;
606
607	/* Now read in all of the header information */
608	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
609		goto out;
610	if (loc->elf_ex.e_phnum < 1 ||
611	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
612		goto out;
613	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
614	retval = -ENOMEM;
615	elf_phdata = kmalloc(size, GFP_KERNEL);
616	if (!elf_phdata)
617		goto out;
618
619	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
620			     (char *)elf_phdata, size);
621	if (retval != size) {
622		if (retval >= 0)
623			retval = -EIO;
624		goto out_free_ph;
625	}
626
627	elf_ppnt = elf_phdata;
628	elf_bss = 0;
629	elf_brk = 0;
630
631	start_code = ~0UL;
632	end_code = 0;
633	start_data = 0;
634	end_data = 0;
635
636	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
637		if (elf_ppnt->p_type == PT_INTERP) {
638			/* This is the program interpreter used for
639			 * shared libraries - for now assume that this
640			 * is an a.out format binary
641			 */
642			retval = -ENOEXEC;
643			if (elf_ppnt->p_filesz > PATH_MAX ||
644			    elf_ppnt->p_filesz < 2)
645				goto out_free_ph;
646
647			retval = -ENOMEM;
648			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
649						  GFP_KERNEL);
650			if (!elf_interpreter)
651				goto out_free_ph;
652
653			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
654					     elf_interpreter,
655					     elf_ppnt->p_filesz);
656			if (retval != elf_ppnt->p_filesz) {
657				if (retval >= 0)
658					retval = -EIO;
659				goto out_free_interp;
660			}
661			/* make sure path is NULL terminated */
662			retval = -ENOEXEC;
663			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
664				goto out_free_interp;
665
666			interpreter = open_exec(elf_interpreter);
667			retval = PTR_ERR(interpreter);
668			if (IS_ERR(interpreter))
669				goto out_free_interp;
670
671			/*
672			 * If the binary is not readable then enforce
673			 * mm->dumpable = 0 regardless of the interpreter's
674			 * permissions.
675			 */
676			if (file_permission(interpreter, MAY_READ) < 0)
677				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
678
679			retval = kernel_read(interpreter, 0, bprm->buf,
680					     BINPRM_BUF_SIZE);
681			if (retval != BINPRM_BUF_SIZE) {
682				if (retval >= 0)
683					retval = -EIO;
684				goto out_free_dentry;
685			}
686
687			/* Get the exec headers */
688			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
689			break;
690		}
691		elf_ppnt++;
692	}
693
694	elf_ppnt = elf_phdata;
695	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
696		if (elf_ppnt->p_type == PT_GNU_STACK) {
697			if (elf_ppnt->p_flags & PF_X)
698				executable_stack = EXSTACK_ENABLE_X;
699			else
700				executable_stack = EXSTACK_DISABLE_X;
701			break;
702		}
703
704	/* Some simple consistency checks for the interpreter */
705	if (elf_interpreter) {
706		retval = -ELIBBAD;
707		/* Not an ELF interpreter */
708		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
709			goto out_free_dentry;
710		/* Verify the interpreter has a valid arch */
711		if (!elf_check_arch(&loc->interp_elf_ex))
712			goto out_free_dentry;
713	}
714
715	/* Flush all traces of the currently running executable */
716	retval = flush_old_exec(bprm);
717	if (retval)
718		goto out_free_dentry;
719
720	/* OK, This is the point of no return */
721	current->flags &= ~PF_FORKNOEXEC;
722	current->mm->def_flags = def_flags;
723
724	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
725	   may depend on the personality.  */
726	SET_PERSONALITY(loc->elf_ex);
727	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
728		current->personality |= READ_IMPLIES_EXEC;
729
730	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
731		current->flags |= PF_RANDOMIZE;
732
733	setup_new_exec(bprm);
734
735	/* Do this so that we can load the interpreter, if need be.  We will
736	   change some of these later */
737	current->mm->free_area_cache = current->mm->mmap_base;
738	current->mm->cached_hole_size = 0;
739	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
740				 executable_stack);
741	if (retval < 0) {
742		send_sig(SIGKILL, current, 0);
743		goto out_free_dentry;
744	}
745
746	current->mm->start_stack = bprm->p;
747
748	/* Now we do a little grungy work by mmapping the ELF image into
749	   the correct location in memory. */
750	for(i = 0, elf_ppnt = elf_phdata;
751	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
752		int elf_prot = 0, elf_flags;
753		unsigned long k, vaddr;
754
755		if (elf_ppnt->p_type != PT_LOAD)
756			continue;
757
758		if (unlikely (elf_brk > elf_bss)) {
759			unsigned long nbyte;
760
761			/* There was a PT_LOAD segment with p_memsz > p_filesz
762			   before this one. Map anonymous pages, if needed,
763			   and clear the area.  */
764			retval = set_brk (elf_bss + load_bias,
765					  elf_brk + load_bias);
766			if (retval) {
767				send_sig(SIGKILL, current, 0);
768				goto out_free_dentry;
769			}
770			nbyte = ELF_PAGEOFFSET(elf_bss);
771			if (nbyte) {
772				nbyte = ELF_MIN_ALIGN - nbyte;
773				if (nbyte > elf_brk - elf_bss)
774					nbyte = elf_brk - elf_bss;
775				if (clear_user((void __user *)elf_bss +
776							load_bias, nbyte)) {
777					/*
778					 * This bss-zeroing can fail if the ELF
779					 * file specifies odd protections. So
780					 * we don't check the return value
781					 */
782				}
783			}
784		}
785
786		if (elf_ppnt->p_flags & PF_R)
787			elf_prot |= PROT_READ;
788		if (elf_ppnt->p_flags & PF_W)
789			elf_prot |= PROT_WRITE;
790		if (elf_ppnt->p_flags & PF_X)
791			elf_prot |= PROT_EXEC;
792
793		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
794
795		vaddr = elf_ppnt->p_vaddr;
796		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
797			elf_flags |= MAP_FIXED;
798		} else if (loc->elf_ex.e_type == ET_DYN) {
799			/* Try and get dynamic programs out of the way of the
800			 * default mmap base, as well as whatever program they
801			 * might try to exec.  This is because the brk will
802			 * follow the loader, and is not movable.  */
803#ifdef CONFIG_X86
804			load_bias = 0;
805#else
806			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
807#endif
808		}
809
810		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
811				elf_prot, elf_flags, 0);
812		if (BAD_ADDR(error)) {
813			send_sig(SIGKILL, current, 0);
814			retval = IS_ERR((void *)error) ?
815				PTR_ERR((void*)error) : -EINVAL;
816			goto out_free_dentry;
817		}
818
819		if (!load_addr_set) {
820			load_addr_set = 1;
821			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
822			if (loc->elf_ex.e_type == ET_DYN) {
823				load_bias += error -
824				             ELF_PAGESTART(load_bias + vaddr);
825				load_addr += load_bias;
826				reloc_func_desc = load_bias;
827			}
828		}
829		k = elf_ppnt->p_vaddr;
830		if (k < start_code)
831			start_code = k;
832		if (start_data < k)
833			start_data = k;
834
835		/*
836		 * Check to see if the section's size will overflow the
837		 * allowed task size. Note that p_filesz must always be
838		 * <= p_memsz so it is only necessary to check p_memsz.
839		 */
840		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
841		    elf_ppnt->p_memsz > TASK_SIZE ||
842		    TASK_SIZE - elf_ppnt->p_memsz < k) {
843			/* set_brk can never work. Avoid overflows. */
844			send_sig(SIGKILL, current, 0);
845			retval = -EINVAL;
846			goto out_free_dentry;
847		}
848
849		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
850
851		if (k > elf_bss)
852			elf_bss = k;
853		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
854			end_code = k;
855		if (end_data < k)
856			end_data = k;
857		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
858		if (k > elf_brk)
859			elf_brk = k;
860	}
861
862	loc->elf_ex.e_entry += load_bias;
863	elf_bss += load_bias;
864	elf_brk += load_bias;
865	start_code += load_bias;
866	end_code += load_bias;
867	start_data += load_bias;
868	end_data += load_bias;
869
870	/* Calling set_brk effectively mmaps the pages that we need
871	 * for the bss and break sections.  We must do this before
872	 * mapping in the interpreter, to make sure it doesn't wind
873	 * up getting placed where the bss needs to go.
874	 */
875	retval = set_brk(elf_bss, elf_brk);
876	if (retval) {
877		send_sig(SIGKILL, current, 0);
878		goto out_free_dentry;
879	}
880	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
881		send_sig(SIGSEGV, current, 0);
882		retval = -EFAULT; /* Nobody gets to see this, but.. */
883		goto out_free_dentry;
884	}
885
886	if (elf_interpreter) {
887		unsigned long uninitialized_var(interp_map_addr);
888
889		elf_entry = load_elf_interp(&loc->interp_elf_ex,
890					    interpreter,
891					    &interp_map_addr,
892					    load_bias);
893		if (!IS_ERR((void *)elf_entry)) {
894			/*
895			 * load_elf_interp() returns relocation
896			 * adjustment
897			 */
898			interp_load_addr = elf_entry;
899			elf_entry += loc->interp_elf_ex.e_entry;
900		}
901		if (BAD_ADDR(elf_entry)) {
902			force_sig(SIGSEGV, current);
903			retval = IS_ERR((void *)elf_entry) ?
904					(int)elf_entry : -EINVAL;
905			goto out_free_dentry;
906		}
907		reloc_func_desc = interp_load_addr;
908
909		allow_write_access(interpreter);
910		fput(interpreter);
911		kfree(elf_interpreter);
912	} else {
913		elf_entry = loc->elf_ex.e_entry;
914		if (BAD_ADDR(elf_entry)) {
915			force_sig(SIGSEGV, current);
916			retval = -EINVAL;
917			goto out_free_dentry;
918		}
919	}
920
921	kfree(elf_phdata);
922
923	set_binfmt(&elf_format);
924
925#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
926	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
927	if (retval < 0) {
928		send_sig(SIGKILL, current, 0);
929		goto out;
930	}
931#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
932
933	install_exec_creds(bprm);
934	current->flags &= ~PF_FORKNOEXEC;
935	retval = create_elf_tables(bprm, &loc->elf_ex,
936			  load_addr, interp_load_addr);
937	if (retval < 0) {
938		send_sig(SIGKILL, current, 0);
939		goto out;
940	}
941	/* N.B. passed_fileno might not be initialized? */
942	current->mm->end_code = end_code;
943	current->mm->start_code = start_code;
944	current->mm->start_data = start_data;
945	current->mm->end_data = end_data;
946	current->mm->start_stack = bprm->p;
947
948#ifdef arch_randomize_brk
949	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
950		current->mm->brk = current->mm->start_brk =
951			arch_randomize_brk(current->mm);
952#endif
953
954	if (current->personality & MMAP_PAGE_ZERO) {
955		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
956		   and some applications "depend" upon this behavior.
957		   Since we do not have the power to recompile these, we
958		   emulate the SVr4 behavior. Sigh. */
959		down_write(&current->mm->mmap_sem);
960		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
961				MAP_FIXED | MAP_PRIVATE, 0);
962		up_write(&current->mm->mmap_sem);
963	}
964
965#ifdef ELF_PLAT_INIT
966	/*
967	 * The ABI may specify that certain registers be set up in special
968	 * ways (on i386 %edx is the address of a DT_FINI function, for
969	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
970	 * that the e_entry field is the address of the function descriptor
971	 * for the startup routine, rather than the address of the startup
972	 * routine itself.  This macro performs whatever initialization to
973	 * the regs structure is required as well as any relocations to the
974	 * function descriptor entries when executing dynamically links apps.
975	 */
976	ELF_PLAT_INIT(regs, reloc_func_desc);
977#endif
978
979	start_thread(regs, elf_entry, bprm->p);
980	retval = 0;
981out:
982	kfree(loc);
983out_ret:
984	return retval;
985
986	/* error cleanup */
987out_free_dentry:
988	allow_write_access(interpreter);
989	if (interpreter)
990		fput(interpreter);
991out_free_interp:
992	kfree(elf_interpreter);
993out_free_ph:
994	kfree(elf_phdata);
995	goto out;
996}
997
998/* This is really simpleminded and specialized - we are loading an
999   a.out library that is given an ELF header. */
1000static int load_elf_library(struct file *file)
1001{
1002	struct elf_phdr *elf_phdata;
1003	struct elf_phdr *eppnt;
1004	unsigned long elf_bss, bss, len;
1005	int retval, error, i, j;
1006	struct elfhdr elf_ex;
1007
1008	error = -ENOEXEC;
1009	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1010	if (retval != sizeof(elf_ex))
1011		goto out;
1012
1013	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1014		goto out;
1015
1016	/* First of all, some simple consistency checks */
1017	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1018	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1019		goto out;
1020
1021	/* Now read in all of the header information */
1022
1023	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1024	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1025
1026	error = -ENOMEM;
1027	elf_phdata = kmalloc(j, GFP_KERNEL);
1028	if (!elf_phdata)
1029		goto out;
1030
1031	eppnt = elf_phdata;
1032	error = -ENOEXEC;
1033	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1034	if (retval != j)
1035		goto out_free_ph;
1036
1037	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1038		if ((eppnt + i)->p_type == PT_LOAD)
1039			j++;
1040	if (j != 1)
1041		goto out_free_ph;
1042
1043	while (eppnt->p_type != PT_LOAD)
1044		eppnt++;
1045
1046	/* Now use mmap to map the library into memory. */
1047	down_write(&current->mm->mmap_sem);
1048	error = do_mmap(file,
1049			ELF_PAGESTART(eppnt->p_vaddr),
1050			(eppnt->p_filesz +
1051			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1052			PROT_READ | PROT_WRITE | PROT_EXEC,
1053			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1054			(eppnt->p_offset -
1055			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1056	up_write(&current->mm->mmap_sem);
1057	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1058		goto out_free_ph;
1059
1060	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1061	if (padzero(elf_bss)) {
1062		error = -EFAULT;
1063		goto out_free_ph;
1064	}
1065
1066	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1067			    ELF_MIN_ALIGN - 1);
1068	bss = eppnt->p_memsz + eppnt->p_vaddr;
1069	if (bss > len) {
1070		down_write(&current->mm->mmap_sem);
1071		do_brk(len, bss - len);
1072		up_write(&current->mm->mmap_sem);
1073	}
1074	error = 0;
1075
1076out_free_ph:
1077	kfree(elf_phdata);
1078out:
1079	return error;
1080}
1081
1082#ifdef CONFIG_ELF_CORE
1083/*
1084 * ELF core dumper
1085 *
1086 * Modelled on fs/exec.c:aout_core_dump()
1087 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1088 */
1089
1090/*
1091 * Decide what to dump of a segment, part, all or none.
1092 */
1093static unsigned long vma_dump_size(struct vm_area_struct *vma,
1094				   unsigned long mm_flags)
1095{
1096#define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1097
1098	/* The vma can be set up to tell us the answer directly.  */
1099	if (vma->vm_flags & VM_ALWAYSDUMP)
1100		goto whole;
1101
1102	/* Hugetlb memory check */
1103	if (vma->vm_flags & VM_HUGETLB) {
1104		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1105			goto whole;
1106		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1107			goto whole;
1108	}
1109
1110	/* Do not dump I/O mapped devices or special mappings */
1111	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1112		return 0;
1113
1114	/* By default, dump shared memory if mapped from an anonymous file. */
1115	if (vma->vm_flags & VM_SHARED) {
1116		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1117		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1118			goto whole;
1119		return 0;
1120	}
1121
1122	/* Dump segments that have been written to.  */
1123	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1124		goto whole;
1125	if (vma->vm_file == NULL)
1126		return 0;
1127
1128	if (FILTER(MAPPED_PRIVATE))
1129		goto whole;
1130
1131	/*
1132	 * If this looks like the beginning of a DSO or executable mapping,
1133	 * check for an ELF header.  If we find one, dump the first page to
1134	 * aid in determining what was mapped here.
1135	 */
1136	if (FILTER(ELF_HEADERS) &&
1137	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1138		u32 __user *header = (u32 __user *) vma->vm_start;
1139		u32 word;
1140		mm_segment_t fs = get_fs();
1141		/*
1142		 * Doing it this way gets the constant folded by GCC.
1143		 */
1144		union {
1145			u32 cmp;
1146			char elfmag[SELFMAG];
1147		} magic;
1148		BUILD_BUG_ON(SELFMAG != sizeof word);
1149		magic.elfmag[EI_MAG0] = ELFMAG0;
1150		magic.elfmag[EI_MAG1] = ELFMAG1;
1151		magic.elfmag[EI_MAG2] = ELFMAG2;
1152		magic.elfmag[EI_MAG3] = ELFMAG3;
1153		/*
1154		 * Switch to the user "segment" for get_user(),
1155		 * then put back what elf_core_dump() had in place.
1156		 */
1157		set_fs(USER_DS);
1158		if (unlikely(get_user(word, header)))
1159			word = 0;
1160		set_fs(fs);
1161		if (word == magic.cmp)
1162			return PAGE_SIZE;
1163	}
1164
1165#undef	FILTER
1166
1167	return 0;
1168
1169whole:
1170	return vma->vm_end - vma->vm_start;
1171}
1172
1173/* An ELF note in memory */
1174struct memelfnote
1175{
1176	const char *name;
1177	int type;
1178	unsigned int datasz;
1179	void *data;
1180};
1181
1182static int notesize(struct memelfnote *en)
1183{
1184	int sz;
1185
1186	sz = sizeof(struct elf_note);
1187	sz += roundup(strlen(en->name) + 1, 4);
1188	sz += roundup(en->datasz, 4);
1189
1190	return sz;
1191}
1192
1193#define DUMP_WRITE(addr, nr, foffset)	\
1194	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1195
1196static int alignfile(struct file *file, loff_t *foffset)
1197{
1198	static const char buf[4] = { 0, };
1199	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1200	return 1;
1201}
1202
1203static int writenote(struct memelfnote *men, struct file *file,
1204			loff_t *foffset)
1205{
1206	struct elf_note en;
1207	en.n_namesz = strlen(men->name) + 1;
1208	en.n_descsz = men->datasz;
1209	en.n_type = men->type;
1210
1211	DUMP_WRITE(&en, sizeof(en), foffset);
1212	DUMP_WRITE(men->name, en.n_namesz, foffset);
1213	if (!alignfile(file, foffset))
1214		return 0;
1215	DUMP_WRITE(men->data, men->datasz, foffset);
1216	if (!alignfile(file, foffset))
1217		return 0;
1218
1219	return 1;
1220}
1221#undef DUMP_WRITE
1222
1223static void fill_elf_header(struct elfhdr *elf, int segs,
1224			    u16 machine, u32 flags, u8 osabi)
1225{
1226	memset(elf, 0, sizeof(*elf));
1227
1228	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1229	elf->e_ident[EI_CLASS] = ELF_CLASS;
1230	elf->e_ident[EI_DATA] = ELF_DATA;
1231	elf->e_ident[EI_VERSION] = EV_CURRENT;
1232	elf->e_ident[EI_OSABI] = ELF_OSABI;
1233
1234	elf->e_type = ET_CORE;
1235	elf->e_machine = machine;
1236	elf->e_version = EV_CURRENT;
1237	elf->e_phoff = sizeof(struct elfhdr);
1238	elf->e_flags = flags;
1239	elf->e_ehsize = sizeof(struct elfhdr);
1240	elf->e_phentsize = sizeof(struct elf_phdr);
1241	elf->e_phnum = segs;
1242
1243	return;
1244}
1245
1246static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1247{
1248	phdr->p_type = PT_NOTE;
1249	phdr->p_offset = offset;
1250	phdr->p_vaddr = 0;
1251	phdr->p_paddr = 0;
1252	phdr->p_filesz = sz;
1253	phdr->p_memsz = 0;
1254	phdr->p_flags = 0;
1255	phdr->p_align = 0;
1256	return;
1257}
1258
1259static void fill_note(struct memelfnote *note, const char *name, int type,
1260		unsigned int sz, void *data)
1261{
1262	note->name = name;
1263	note->type = type;
1264	note->datasz = sz;
1265	note->data = data;
1266	return;
1267}
1268
1269/*
1270 * fill up all the fields in prstatus from the given task struct, except
1271 * registers which need to be filled up separately.
1272 */
1273static void fill_prstatus(struct elf_prstatus *prstatus,
1274		struct task_struct *p, long signr)
1275{
1276	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1277	prstatus->pr_sigpend = p->pending.signal.sig[0];
1278	prstatus->pr_sighold = p->blocked.sig[0];
1279	rcu_read_lock();
1280	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1281	rcu_read_unlock();
1282	prstatus->pr_pid = task_pid_vnr(p);
1283	prstatus->pr_pgrp = task_pgrp_vnr(p);
1284	prstatus->pr_sid = task_session_vnr(p);
1285	if (thread_group_leader(p)) {
1286		struct task_cputime cputime;
1287
1288		/*
1289		 * This is the record for the group leader.  It shows the
1290		 * group-wide total, not its individual thread total.
1291		 */
1292		thread_group_cputime(p, &cputime);
1293		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1294		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1295	} else {
1296		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1297		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1298	}
1299	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1300	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1301}
1302
1303static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1304		       struct mm_struct *mm)
1305{
1306	const struct cred *cred;
1307	unsigned int i, len;
1308
1309	/* first copy the parameters from user space */
1310	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1311
1312	len = mm->arg_end - mm->arg_start;
1313	if (len >= ELF_PRARGSZ)
1314		len = ELF_PRARGSZ-1;
1315	if (copy_from_user(&psinfo->pr_psargs,
1316		           (const char __user *)mm->arg_start, len))
1317		return -EFAULT;
1318	for(i = 0; i < len; i++)
1319		if (psinfo->pr_psargs[i] == 0)
1320			psinfo->pr_psargs[i] = ' ';
1321	psinfo->pr_psargs[len] = 0;
1322
1323	rcu_read_lock();
1324	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1325	rcu_read_unlock();
1326	psinfo->pr_pid = task_pid_vnr(p);
1327	psinfo->pr_pgrp = task_pgrp_vnr(p);
1328	psinfo->pr_sid = task_session_vnr(p);
1329
1330	i = p->state ? ffz(~p->state) + 1 : 0;
1331	psinfo->pr_state = i;
1332	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1333	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1334	psinfo->pr_nice = task_nice(p);
1335	psinfo->pr_flag = p->flags;
1336	rcu_read_lock();
1337	cred = __task_cred(p);
1338	SET_UID(psinfo->pr_uid, cred->uid);
1339	SET_GID(psinfo->pr_gid, cred->gid);
1340	rcu_read_unlock();
1341	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1342
1343	return 0;
1344}
1345
1346static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1347{
1348	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1349	int i = 0;
1350	do
1351		i += 2;
1352	while (auxv[i - 2] != AT_NULL);
1353	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1354}
1355
1356#ifdef CORE_DUMP_USE_REGSET
1357#include <linux/regset.h>
1358
1359struct elf_thread_core_info {
1360	struct elf_thread_core_info *next;
1361	struct task_struct *task;
1362	struct elf_prstatus prstatus;
1363	struct memelfnote notes[0];
1364};
1365
1366struct elf_note_info {
1367	struct elf_thread_core_info *thread;
1368	struct memelfnote psinfo;
1369	struct memelfnote auxv;
1370	size_t size;
1371	int thread_notes;
1372};
1373
1374/*
1375 * When a regset has a writeback hook, we call it on each thread before
1376 * dumping user memory.  On register window machines, this makes sure the
1377 * user memory backing the register data is up to date before we read it.
1378 */
1379static void do_thread_regset_writeback(struct task_struct *task,
1380				       const struct user_regset *regset)
1381{
1382	if (regset->writeback)
1383		regset->writeback(task, regset, 1);
1384}
1385
1386static int fill_thread_core_info(struct elf_thread_core_info *t,
1387				 const struct user_regset_view *view,
1388				 long signr, size_t *total)
1389{
1390	unsigned int i;
1391
1392	/*
1393	 * NT_PRSTATUS is the one special case, because the regset data
1394	 * goes into the pr_reg field inside the note contents, rather
1395	 * than being the whole note contents.  We fill the reset in here.
1396	 * We assume that regset 0 is NT_PRSTATUS.
1397	 */
1398	fill_prstatus(&t->prstatus, t->task, signr);
1399	(void) view->regsets[0].get(t->task, &view->regsets[0],
1400				    0, sizeof(t->prstatus.pr_reg),
1401				    &t->prstatus.pr_reg, NULL);
1402
1403	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1404		  sizeof(t->prstatus), &t->prstatus);
1405	*total += notesize(&t->notes[0]);
1406
1407	do_thread_regset_writeback(t->task, &view->regsets[0]);
1408
1409	/*
1410	 * Each other regset might generate a note too.  For each regset
1411	 * that has no core_note_type or is inactive, we leave t->notes[i]
1412	 * all zero and we'll know to skip writing it later.
1413	 */
1414	for (i = 1; i < view->n; ++i) {
1415		const struct user_regset *regset = &view->regsets[i];
1416		do_thread_regset_writeback(t->task, regset);
1417		if (regset->core_note_type &&
1418		    (!regset->active || regset->active(t->task, regset))) {
1419			int ret;
1420			size_t size = regset->n * regset->size;
1421			void *data = kmalloc(size, GFP_KERNEL);
1422			if (unlikely(!data))
1423				return 0;
1424			ret = regset->get(t->task, regset,
1425					  0, size, data, NULL);
1426			if (unlikely(ret))
1427				kfree(data);
1428			else {
1429				if (regset->core_note_type != NT_PRFPREG)
1430					fill_note(&t->notes[i], "LINUX",
1431						  regset->core_note_type,
1432						  size, data);
1433				else {
1434					t->prstatus.pr_fpvalid = 1;
1435					fill_note(&t->notes[i], "CORE",
1436						  NT_PRFPREG, size, data);
1437				}
1438				*total += notesize(&t->notes[i]);
1439			}
1440		}
1441	}
1442
1443	return 1;
1444}
1445
1446static int fill_note_info(struct elfhdr *elf, int phdrs,
1447			  struct elf_note_info *info,
1448			  long signr, struct pt_regs *regs)
1449{
1450	struct task_struct *dump_task = current;
1451	const struct user_regset_view *view = task_user_regset_view(dump_task);
1452	struct elf_thread_core_info *t;
1453	struct elf_prpsinfo *psinfo;
1454	struct core_thread *ct;
1455	unsigned int i;
1456
1457	info->size = 0;
1458	info->thread = NULL;
1459
1460	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1461	if (psinfo == NULL)
1462		return 0;
1463
1464	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1465
1466	/*
1467	 * Figure out how many notes we're going to need for each thread.
1468	 */
1469	info->thread_notes = 0;
1470	for (i = 0; i < view->n; ++i)
1471		if (view->regsets[i].core_note_type != 0)
1472			++info->thread_notes;
1473
1474	/*
1475	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1476	 * since it is our one special case.
1477	 */
1478	if (unlikely(info->thread_notes == 0) ||
1479	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1480		WARN_ON(1);
1481		return 0;
1482	}
1483
1484	/*
1485	 * Initialize the ELF file header.
1486	 */
1487	fill_elf_header(elf, phdrs,
1488			view->e_machine, view->e_flags, view->ei_osabi);
1489
1490	/*
1491	 * Allocate a structure for each thread.
1492	 */
1493	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1494		t = kzalloc(offsetof(struct elf_thread_core_info,
1495				     notes[info->thread_notes]),
1496			    GFP_KERNEL);
1497		if (unlikely(!t))
1498			return 0;
1499
1500		t->task = ct->task;
1501		if (ct->task == dump_task || !info->thread) {
1502			t->next = info->thread;
1503			info->thread = t;
1504		} else {
1505			/*
1506			 * Make sure to keep the original task at
1507			 * the head of the list.
1508			 */
1509			t->next = info->thread->next;
1510			info->thread->next = t;
1511		}
1512	}
1513
1514	/*
1515	 * Now fill in each thread's information.
1516	 */
1517	for (t = info->thread; t != NULL; t = t->next)
1518		if (!fill_thread_core_info(t, view, signr, &info->size))
1519			return 0;
1520
1521	/*
1522	 * Fill in the two process-wide notes.
1523	 */
1524	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1525	info->size += notesize(&info->psinfo);
1526
1527	fill_auxv_note(&info->auxv, current->mm);
1528	info->size += notesize(&info->auxv);
1529
1530	return 1;
1531}
1532
1533static size_t get_note_info_size(struct elf_note_info *info)
1534{
1535	return info->size;
1536}
1537
1538/*
1539 * Write all the notes for each thread.  When writing the first thread, the
1540 * process-wide notes are interleaved after the first thread-specific note.
1541 */
1542static int write_note_info(struct elf_note_info *info,
1543			   struct file *file, loff_t *foffset)
1544{
1545	bool first = 1;
1546	struct elf_thread_core_info *t = info->thread;
1547
1548	do {
1549		int i;
1550
1551		if (!writenote(&t->notes[0], file, foffset))
1552			return 0;
1553
1554		if (first && !writenote(&info->psinfo, file, foffset))
1555			return 0;
1556		if (first && !writenote(&info->auxv, file, foffset))
1557			return 0;
1558
1559		for (i = 1; i < info->thread_notes; ++i)
1560			if (t->notes[i].data &&
1561			    !writenote(&t->notes[i], file, foffset))
1562				return 0;
1563
1564		first = 0;
1565		t = t->next;
1566	} while (t);
1567
1568	return 1;
1569}
1570
1571static void free_note_info(struct elf_note_info *info)
1572{
1573	struct elf_thread_core_info *threads = info->thread;
1574	while (threads) {
1575		unsigned int i;
1576		struct elf_thread_core_info *t = threads;
1577		threads = t->next;
1578		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1579		for (i = 1; i < info->thread_notes; ++i)
1580			kfree(t->notes[i].data);
1581		kfree(t);
1582	}
1583	kfree(info->psinfo.data);
1584}
1585
1586#else
1587
1588/* Here is the structure in which status of each thread is captured. */
1589struct elf_thread_status
1590{
1591	struct list_head list;
1592	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1593	elf_fpregset_t fpu;		/* NT_PRFPREG */
1594	struct task_struct *thread;
1595#ifdef ELF_CORE_COPY_XFPREGS
1596	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1597#endif
1598	struct memelfnote notes[3];
1599	int num_notes;
1600};
1601
1602/*
1603 * In order to add the specific thread information for the elf file format,
1604 * we need to keep a linked list of every threads pr_status and then create
1605 * a single section for them in the final core file.
1606 */
1607static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1608{
1609	int sz = 0;
1610	struct task_struct *p = t->thread;
1611	t->num_notes = 0;
1612
1613	fill_prstatus(&t->prstatus, p, signr);
1614	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1615
1616	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1617		  &(t->prstatus));
1618	t->num_notes++;
1619	sz += notesize(&t->notes[0]);
1620
1621	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1622								&t->fpu))) {
1623		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1624			  &(t->fpu));
1625		t->num_notes++;
1626		sz += notesize(&t->notes[1]);
1627	}
1628
1629#ifdef ELF_CORE_COPY_XFPREGS
1630	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1631		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1632			  sizeof(t->xfpu), &t->xfpu);
1633		t->num_notes++;
1634		sz += notesize(&t->notes[2]);
1635	}
1636#endif
1637	return sz;
1638}
1639
1640struct elf_note_info {
1641	struct memelfnote *notes;
1642	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1643	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1644	struct list_head thread_list;
1645	elf_fpregset_t *fpu;
1646#ifdef ELF_CORE_COPY_XFPREGS
1647	elf_fpxregset_t *xfpu;
1648#endif
1649	int thread_status_size;
1650	int numnote;
1651};
1652
1653static int elf_note_info_init(struct elf_note_info *info)
1654{
1655	memset(info, 0, sizeof(*info));
1656	INIT_LIST_HEAD(&info->thread_list);
1657
1658	/* Allocate space for six ELF notes */
1659	info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1660	if (!info->notes)
1661		return 0;
1662	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1663	if (!info->psinfo)
1664		goto notes_free;
1665	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1666	if (!info->prstatus)
1667		goto psinfo_free;
1668	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1669	if (!info->fpu)
1670		goto prstatus_free;
1671#ifdef ELF_CORE_COPY_XFPREGS
1672	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1673	if (!info->xfpu)
1674		goto fpu_free;
1675#endif
1676	return 1;
1677#ifdef ELF_CORE_COPY_XFPREGS
1678 fpu_free:
1679	kfree(info->fpu);
1680#endif
1681 prstatus_free:
1682	kfree(info->prstatus);
1683 psinfo_free:
1684	kfree(info->psinfo);
1685 notes_free:
1686	kfree(info->notes);
1687	return 0;
1688}
1689
1690static int fill_note_info(struct elfhdr *elf, int phdrs,
1691			  struct elf_note_info *info,
1692			  long signr, struct pt_regs *regs)
1693{
1694	struct list_head *t;
1695
1696	if (!elf_note_info_init(info))
1697		return 0;
1698
1699	if (signr) {
1700		struct core_thread *ct;
1701		struct elf_thread_status *ets;
1702
1703		for (ct = current->mm->core_state->dumper.next;
1704						ct; ct = ct->next) {
1705			ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1706			if (!ets)
1707				return 0;
1708
1709			ets->thread = ct->task;
1710			list_add(&ets->list, &info->thread_list);
1711		}
1712
1713		list_for_each(t, &info->thread_list) {
1714			int sz;
1715
1716			ets = list_entry(t, struct elf_thread_status, list);
1717			sz = elf_dump_thread_status(signr, ets);
1718			info->thread_status_size += sz;
1719		}
1720	}
1721	/* now collect the dump for the current */
1722	memset(info->prstatus, 0, sizeof(*info->prstatus));
1723	fill_prstatus(info->prstatus, current, signr);
1724	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1725
1726	/* Set up header */
1727	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1728
1729	/*
1730	 * Set up the notes in similar form to SVR4 core dumps made
1731	 * with info from their /proc.
1732	 */
1733
1734	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1735		  sizeof(*info->prstatus), info->prstatus);
1736	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1737	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1738		  sizeof(*info->psinfo), info->psinfo);
1739
1740	info->numnote = 2;
1741
1742	fill_auxv_note(&info->notes[info->numnote++], current->mm);
1743
1744	/* Try to dump the FPU. */
1745	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1746							       info->fpu);
1747	if (info->prstatus->pr_fpvalid)
1748		fill_note(info->notes + info->numnote++,
1749			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1750#ifdef ELF_CORE_COPY_XFPREGS
1751	if (elf_core_copy_task_xfpregs(current, info->xfpu))
1752		fill_note(info->notes + info->numnote++,
1753			  "LINUX", ELF_CORE_XFPREG_TYPE,
1754			  sizeof(*info->xfpu), info->xfpu);
1755#endif
1756
1757	return 1;
1758}
1759
1760static size_t get_note_info_size(struct elf_note_info *info)
1761{
1762	int sz = 0;
1763	int i;
1764
1765	for (i = 0; i < info->numnote; i++)
1766		sz += notesize(info->notes + i);
1767
1768	sz += info->thread_status_size;
1769
1770	return sz;
1771}
1772
1773static int write_note_info(struct elf_note_info *info,
1774			   struct file *file, loff_t *foffset)
1775{
1776	int i;
1777	struct list_head *t;
1778
1779	for (i = 0; i < info->numnote; i++)
1780		if (!writenote(info->notes + i, file, foffset))
1781			return 0;
1782
1783	/* write out the thread status notes section */
1784	list_for_each(t, &info->thread_list) {
1785		struct elf_thread_status *tmp =
1786				list_entry(t, struct elf_thread_status, list);
1787
1788		for (i = 0; i < tmp->num_notes; i++)
1789			if (!writenote(&tmp->notes[i], file, foffset))
1790				return 0;
1791	}
1792
1793	return 1;
1794}
1795
1796static void free_note_info(struct elf_note_info *info)
1797{
1798	while (!list_empty(&info->thread_list)) {
1799		struct list_head *tmp = info->thread_list.next;
1800		list_del(tmp);
1801		kfree(list_entry(tmp, struct elf_thread_status, list));
1802	}
1803
1804	kfree(info->prstatus);
1805	kfree(info->psinfo);
1806	kfree(info->notes);
1807	kfree(info->fpu);
1808#ifdef ELF_CORE_COPY_XFPREGS
1809	kfree(info->xfpu);
1810#endif
1811}
1812
1813#endif
1814
1815static struct vm_area_struct *first_vma(struct task_struct *tsk,
1816					struct vm_area_struct *gate_vma)
1817{
1818	struct vm_area_struct *ret = tsk->mm->mmap;
1819
1820	if (ret)
1821		return ret;
1822	return gate_vma;
1823}
1824/*
1825 * Helper function for iterating across a vma list.  It ensures that the caller
1826 * will visit `gate_vma' prior to terminating the search.
1827 */
1828static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1829					struct vm_area_struct *gate_vma)
1830{
1831	struct vm_area_struct *ret;
1832
1833	ret = this_vma->vm_next;
1834	if (ret)
1835		return ret;
1836	if (this_vma == gate_vma)
1837		return NULL;
1838	return gate_vma;
1839}
1840
1841static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1842			     elf_addr_t e_shoff, int segs)
1843{
1844	elf->e_shoff = e_shoff;
1845	elf->e_shentsize = sizeof(*shdr4extnum);
1846	elf->e_shnum = 1;
1847	elf->e_shstrndx = SHN_UNDEF;
1848
1849	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1850
1851	shdr4extnum->sh_type = SHT_NULL;
1852	shdr4extnum->sh_size = elf->e_shnum;
1853	shdr4extnum->sh_link = elf->e_shstrndx;
1854	shdr4extnum->sh_info = segs;
1855}
1856
1857static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
1858				     unsigned long mm_flags)
1859{
1860	struct vm_area_struct *vma;
1861	size_t size = 0;
1862
1863	for (vma = first_vma(current, gate_vma); vma != NULL;
1864	     vma = next_vma(vma, gate_vma))
1865		size += vma_dump_size(vma, mm_flags);
1866	return size;
1867}
1868
1869/*
1870 * Actual dumper
1871 *
1872 * This is a two-pass process; first we find the offsets of the bits,
1873 * and then they are actually written out.  If we run out of core limit
1874 * we just truncate.
1875 */
1876static int elf_core_dump(struct coredump_params *cprm)
1877{
1878	int has_dumped = 0;
1879	mm_segment_t fs;
1880	int segs;
1881	size_t size = 0;
1882	struct vm_area_struct *vma, *gate_vma;
1883	struct elfhdr *elf = NULL;
1884	loff_t offset = 0, dataoff, foffset;
1885	struct elf_note_info info;
1886	struct elf_phdr *phdr4note = NULL;
1887	struct elf_shdr *shdr4extnum = NULL;
1888	Elf_Half e_phnum;
1889	elf_addr_t e_shoff;
1890
1891	/*
1892	 * We no longer stop all VM operations.
1893	 *
1894	 * This is because those proceses that could possibly change map_count
1895	 * or the mmap / vma pages are now blocked in do_exit on current
1896	 * finishing this core dump.
1897	 *
1898	 * Only ptrace can touch these memory addresses, but it doesn't change
1899	 * the map_count or the pages allocated. So no possibility of crashing
1900	 * exists while dumping the mm->vm_next areas to the core file.
1901	 */
1902
1903	/* alloc memory for large data structures: too large to be on stack */
1904	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1905	if (!elf)
1906		goto out;
1907	/*
1908	 * The number of segs are recored into ELF header as 16bit value.
1909	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1910	 */
1911	segs = current->mm->map_count;
1912	segs += elf_core_extra_phdrs();
1913
1914	gate_vma = get_gate_vma(current);
1915	if (gate_vma != NULL)
1916		segs++;
1917
1918	/* for notes section */
1919	segs++;
1920
1921	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1922	 * this, kernel supports extended numbering. Have a look at
1923	 * include/linux/elf.h for further information. */
1924	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1925
1926	/*
1927	 * Collect all the non-memory information about the process for the
1928	 * notes.  This also sets up the file header.
1929	 */
1930	if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
1931		goto cleanup;
1932
1933	has_dumped = 1;
1934	current->flags |= PF_DUMPCORE;
1935
1936	fs = get_fs();
1937	set_fs(KERNEL_DS);
1938
1939	offset += sizeof(*elf);				/* Elf header */
1940	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
1941	foffset = offset;
1942
1943	/* Write notes phdr entry */
1944	{
1945		size_t sz = get_note_info_size(&info);
1946
1947		sz += elf_coredump_extra_notes_size();
1948
1949		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
1950		if (!phdr4note)
1951			goto end_coredump;
1952
1953		fill_elf_note_phdr(phdr4note, sz, offset);
1954		offset += sz;
1955	}
1956
1957	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1958
1959	offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
1960	offset += elf_core_extra_data_size();
1961	e_shoff = offset;
1962
1963	if (e_phnum == PN_XNUM) {
1964		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
1965		if (!shdr4extnum)
1966			goto end_coredump;
1967		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
1968	}
1969
1970	offset = dataoff;
1971
1972	size += sizeof(*elf);
1973	if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
1974		goto end_coredump;
1975
1976	size += sizeof(*phdr4note);
1977	if (size > cprm->limit
1978	    || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
1979		goto end_coredump;
1980
1981	/* Write program headers for segments dump */
1982	for (vma = first_vma(current, gate_vma); vma != NULL;
1983			vma = next_vma(vma, gate_vma)) {
1984		struct elf_phdr phdr;
1985
1986		phdr.p_type = PT_LOAD;
1987		phdr.p_offset = offset;
1988		phdr.p_vaddr = vma->vm_start;
1989		phdr.p_paddr = 0;
1990		phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
1991		phdr.p_memsz = vma->vm_end - vma->vm_start;
1992		offset += phdr.p_filesz;
1993		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1994		if (vma->vm_flags & VM_WRITE)
1995			phdr.p_flags |= PF_W;
1996		if (vma->vm_flags & VM_EXEC)
1997			phdr.p_flags |= PF_X;
1998		phdr.p_align = ELF_EXEC_PAGESIZE;
1999
2000		size += sizeof(phdr);
2001		if (size > cprm->limit
2002		    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2003			goto end_coredump;
2004	}
2005
2006	if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2007		goto end_coredump;
2008
2009 	/* write out the notes section */
2010	if (!write_note_info(&info, cprm->file, &foffset))
2011		goto end_coredump;
2012
2013	if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2014		goto end_coredump;
2015
2016	/* Align to page */
2017	if (!dump_seek(cprm->file, dataoff - foffset))
2018		goto end_coredump;
2019
2020	for (vma = first_vma(current, gate_vma); vma != NULL;
2021			vma = next_vma(vma, gate_vma)) {
2022		unsigned long addr;
2023		unsigned long end;
2024
2025		end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2026
2027		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2028			struct page *page;
2029			int stop;
2030
2031			page = get_dump_page(addr);
2032			if (page) {
2033				void *kaddr = kmap(page);
2034				stop = ((size += PAGE_SIZE) > cprm->limit) ||
2035					!dump_write(cprm->file, kaddr,
2036						    PAGE_SIZE);
2037				kunmap(page);
2038				page_cache_release(page);
2039			} else
2040				stop = !dump_seek(cprm->file, PAGE_SIZE);
2041			if (stop)
2042				goto end_coredump;
2043		}
2044	}
2045
2046	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2047		goto end_coredump;
2048
2049	if (e_phnum == PN_XNUM) {
2050		size += sizeof(*shdr4extnum);
2051		if (size > cprm->limit
2052		    || !dump_write(cprm->file, shdr4extnum,
2053				   sizeof(*shdr4extnum)))
2054			goto end_coredump;
2055	}
2056
2057end_coredump:
2058	set_fs(fs);
2059
2060cleanup:
2061	free_note_info(&info);
2062	kfree(shdr4extnum);
2063	kfree(phdr4note);
2064	kfree(elf);
2065out:
2066	return has_dumped;
2067}
2068
2069#endif		/* CONFIG_ELF_CORE */
2070
2071static int __init init_elf_binfmt(void)
2072{
2073	return register_binfmt(&elf_format);
2074}
2075
2076static void __exit exit_elf_binfmt(void)
2077{
2078	/* Remove the COFF and ELF loaders. */
2079	unregister_binfmt(&elf_format);
2080}
2081
2082core_initcall(init_elf_binfmt);
2083module_exit(exit_elf_binfmt);
2084MODULE_LICENSE("GPL");
2085