1/*
2 *  linux/fs/exec.c
3 *
4 *  Copyright (C) 1991, 1992  Linus Torvalds
5 */
6
7/*
8 * #!-checking implemented by tytso.
9 */
10/*
11 * Demand-loading implemented 01.12.91 - no need to read anything but
12 * the header into memory. The inode of the executable is put into
13 * "current->executable", and page faults do the actual loading. Clean.
14 *
15 * Once more I can proudly say that linux stood up to being changed: it
16 * was less than 2 hours work to get demand-loading completely implemented.
17 *
18 * Demand loading changed July 1993 by Eric Youngdale.   Use mmap instead,
19 * current->executable is only used by the procfs.  This allows a dispatch
20 * table to check for several different types  of binary formats.  We keep
21 * trying until we recognize the file or we run out of supported binary
22 * formats.
23 */
24
25#include <linux/config.h>
26#include <linux/slab.h>
27#include <linux/file.h>
28#include <linux/mman.h>
29#include <linux/a.out.h>
30#include <linux/stat.h>
31#include <linux/fcntl.h>
32#include <linux/smp_lock.h>
33#include <linux/init.h>
34#include <linux/pagemap.h>
35#include <linux/highmem.h>
36#include <linux/spinlock.h>
37#include <linux/personality.h>
38#define __NO_VERSION__
39#include <linux/module.h>
40
41#include <asm/uaccess.h>
42#include <asm/pgalloc.h>
43#include <asm/mmu_context.h>
44
45#ifdef CONFIG_KMOD
46#include <linux/kmod.h>
47#endif
48
49int core_uses_pid;
50
51static struct linux_binfmt *formats;
52static rwlock_t binfmt_lock = RW_LOCK_UNLOCKED;
53
54int register_binfmt(struct linux_binfmt * fmt)
55{
56	struct linux_binfmt ** tmp = &formats;
57
58	if (!fmt)
59		return -EINVAL;
60	if (fmt->next)
61		return -EBUSY;
62	write_lock(&binfmt_lock);
63	while (*tmp) {
64		if (fmt == *tmp) {
65			write_unlock(&binfmt_lock);
66			return -EBUSY;
67		}
68		tmp = &(*tmp)->next;
69	}
70	fmt->next = formats;
71	formats = fmt;
72	write_unlock(&binfmt_lock);
73	return 0;
74}
75
76int unregister_binfmt(struct linux_binfmt * fmt)
77{
78	struct linux_binfmt ** tmp = &formats;
79
80	write_lock(&binfmt_lock);
81	while (*tmp) {
82		if (fmt == *tmp) {
83			*tmp = fmt->next;
84			write_unlock(&binfmt_lock);
85			return 0;
86		}
87		tmp = &(*tmp)->next;
88	}
89	write_unlock(&binfmt_lock);
90	return -EINVAL;
91}
92
93static inline void put_binfmt(struct linux_binfmt * fmt)
94{
95	if (fmt->module)
96		__MOD_DEC_USE_COUNT(fmt->module);
97}
98
99/*
100 * Note that a shared library must be both readable and executable due to
101 * security reasons.
102 *
103 * Also note that we take the address to load from from the file itself.
104 */
105asmlinkage long sys_uselib(const char * library)
106{
107	struct file * file;
108	struct nameidata nd;
109	int error;
110
111	error = user_path_walk(library, &nd);
112	if (error)
113		goto out;
114
115	error = -EINVAL;
116	if (!S_ISREG(nd.dentry->d_inode->i_mode))
117		goto exit;
118
119	error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC);
120	if (error)
121		goto exit;
122
123	file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
124	error = PTR_ERR(file);
125	if (IS_ERR(file))
126		goto out;
127
128	error = -ENOEXEC;
129	if(file->f_op && file->f_op->read) {
130		struct linux_binfmt * fmt;
131
132		read_lock(&binfmt_lock);
133		for (fmt = formats ; fmt ; fmt = fmt->next) {
134			if (!fmt->load_shlib)
135				continue;
136			if (!try_inc_mod_count(fmt->module))
137				continue;
138			read_unlock(&binfmt_lock);
139			error = fmt->load_shlib(file);
140			read_lock(&binfmt_lock);
141			put_binfmt(fmt);
142			if (error != -ENOEXEC)
143				break;
144		}
145		read_unlock(&binfmt_lock);
146	}
147	fput(file);
148out:
149  	return error;
150exit:
151	path_release(&nd);
152	goto out;
153}
154
155/*
156 * count() counts the number of arguments/envelopes
157 */
158static int count(char ** argv, int max)
159{
160	int i = 0;
161
162	if (argv != NULL) {
163		for (;;) {
164			char * p;
165
166			if (get_user(p, argv))
167				return -EFAULT;
168			if (!p)
169				break;
170			argv++;
171			if(++i > max)
172				return -E2BIG;
173		}
174	}
175	return i;
176}
177
178/*
179 * 'copy_strings()' copies argument/envelope strings from user
180 * memory to free pages in kernel mem. These are in a format ready
181 * to be put directly into the top of new user memory.
182 */
183int copy_strings(int argc,char ** argv, struct linux_binprm *bprm)
184{
185	struct page *kmapped_page = NULL;
186	char *kaddr = NULL;
187	int ret;
188
189	while (argc-- > 0) {
190		char *str;
191		int len;
192		unsigned long pos;
193
194		if (get_user(str, argv+argc) ||
195				!(len = strnlen_user(str, bprm->p))) {
196			ret = -EFAULT;
197			goto out;
198		}
199
200		if (bprm->p < len)  {
201			ret = -E2BIG;
202			goto out;
203		}
204
205		bprm->p -= len;
206		pos = bprm->p;
207
208		while (len > 0) {
209			int i, new, err;
210			int offset, bytes_to_copy;
211			struct page *page;
212
213			offset = pos % PAGE_SIZE;
214			i = pos/PAGE_SIZE;
215			page = bprm->page[i];
216			new = 0;
217			if (!page) {
218				page = alloc_page(GFP_HIGHUSER);
219				bprm->page[i] = page;
220				if (!page) {
221					ret = -ENOMEM;
222					goto out;
223				}
224				new = 1;
225			}
226
227			if (page != kmapped_page) {
228				if (kmapped_page)
229					kunmap(kmapped_page);
230				kmapped_page = page;
231				kaddr = kmap(kmapped_page);
232			}
233			if (new && offset)
234				memset(kaddr, 0, offset);
235			bytes_to_copy = PAGE_SIZE - offset;
236			if (bytes_to_copy > len) {
237				bytes_to_copy = len;
238				if (new)
239					memset(kaddr+offset+len, 0,
240						PAGE_SIZE-offset-len);
241			}
242			err = copy_from_user(kaddr+offset, str, bytes_to_copy);
243			if (err) {
244				ret = -EFAULT;
245				goto out;
246			}
247
248			pos += bytes_to_copy;
249			str += bytes_to_copy;
250			len -= bytes_to_copy;
251		}
252	}
253	ret = 0;
254out:
255	if (kmapped_page)
256		kunmap(kmapped_page);
257	return ret;
258}
259
260/*
261 * Like copy_strings, but get argv and its values from kernel memory.
262 */
263int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
264{
265	int r;
266	mm_segment_t oldfs = get_fs();
267	set_fs(KERNEL_DS);
268	r = copy_strings(argc, argv, bprm);
269	set_fs(oldfs);
270	return r;
271}
272
273/*
274 * This routine is used to map in a page into an address space: needed by
275 * execve() for the initial stack and environment pages.
276 *
277 * tsk->mmap_sem is held for writing.
278 */
279void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address)
280{
281	pgd_t * pgd;
282	pmd_t * pmd;
283	pte_t * pte;
284
285	if (page_count(page) != 1)
286		printk(KERN_ERR "mem_map disagrees with %p at %08lx\n", page, address);
287	pgd = pgd_offset(tsk->mm, address);
288
289	spin_lock(&tsk->mm->page_table_lock);
290	pmd = pmd_alloc(tsk->mm, pgd, address);
291	if (!pmd)
292		goto out;
293	pte = pte_alloc(tsk->mm, pmd, address);
294	if (!pte)
295		goto out;
296	if (!pte_none(*pte))
297		goto out;
298	lru_cache_add(page);
299	flush_dcache_page(page);
300	flush_page_to_ram(page);
301	set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, PAGE_COPY))));
302	tsk->mm->rss++;
303	spin_unlock(&tsk->mm->page_table_lock);
304
305	/* no need for flush_tlb */
306	return;
307out:
308	spin_unlock(&tsk->mm->page_table_lock);
309	__free_page(page);
310	force_sig(SIGKILL, tsk);
311	return;
312}
313
314int setup_arg_pages(struct linux_binprm *bprm)
315{
316	unsigned long stack_base;
317	struct vm_area_struct *mpnt;
318	int i;
319
320	stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
321
322	bprm->p += stack_base;
323	if (bprm->loader)
324		bprm->loader += stack_base;
325	bprm->exec += stack_base;
326
327	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
328	if (!mpnt)
329		return -ENOMEM;
330
331	down_write(&current->mm->mmap_sem);
332	{
333		mpnt->vm_mm = current->mm;
334		mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
335		mpnt->vm_end = STACK_TOP;
336		mpnt->vm_page_prot = PAGE_COPY;
337		mpnt->vm_flags = VM_STACK_FLAGS;
338		mpnt->vm_ops = NULL;
339		mpnt->vm_pgoff = 0;
340		mpnt->vm_file = NULL;
341		mpnt->vm_private_data = (void *) 0;
342		insert_vm_struct(current->mm, mpnt);
343		current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
344	}
345
346	for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
347		struct page *page = bprm->page[i];
348		if (page) {
349			bprm->page[i] = NULL;
350			put_dirty_page(current,page,stack_base);
351		}
352		stack_base += PAGE_SIZE;
353	}
354	up_write(&current->mm->mmap_sem);
355
356	return 0;
357}
358
359struct file *open_exec(const char *name)
360{
361	struct nameidata nd;
362	struct inode *inode;
363	struct file *file;
364	int err = 0;
365
366	err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
367	file = ERR_PTR(err);
368	if (!err) {
369		inode = nd.dentry->d_inode;
370		file = ERR_PTR(-EACCES);
371		if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
372		    S_ISREG(inode->i_mode)) {
373			int err = permission(inode, MAY_EXEC);
374			if (!err && !(inode->i_mode & 0111))
375				err = -EACCES;
376			file = ERR_PTR(err);
377			if (!err) {
378				file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
379				if (!IS_ERR(file)) {
380					err = deny_write_access(file);
381					if (err) {
382						fput(file);
383						file = ERR_PTR(err);
384					}
385				}
386out:
387				return file;
388			}
389		}
390		path_release(&nd);
391	}
392	goto out;
393}
394
395int kernel_read(struct file *file, unsigned long offset,
396	char * addr, unsigned long count)
397{
398	mm_segment_t old_fs;
399	loff_t pos = offset;
400	int result = -ENOSYS;
401
402	if (!file->f_op->read)
403		goto fail;
404	old_fs = get_fs();
405	set_fs(get_ds());
406	result = file->f_op->read(file, addr, count, &pos);
407	set_fs(old_fs);
408fail:
409	return result;
410}
411
412static int exec_mmap(void)
413{
414	struct mm_struct * mm, * old_mm;
415
416	old_mm = current->mm;
417	if (old_mm && atomic_read(&old_mm->mm_users) == 1) {
418		mm_release();
419		exit_mmap(old_mm);
420		return 0;
421	}
422
423	mm = mm_alloc();
424	if (mm) {
425		struct mm_struct *active_mm;
426
427		if (init_new_context(current, mm)) {
428			mmdrop(mm);
429			return -ENOMEM;
430		}
431
432		/* Add it to the list of mm's */
433		spin_lock(&mmlist_lock);
434		list_add(&mm->mmlist, &init_mm.mmlist);
435		mmlist_nr++;
436		spin_unlock(&mmlist_lock);
437
438		task_lock(current);
439		active_mm = current->active_mm;
440		current->mm = mm;
441		current->active_mm = mm;
442		task_unlock(current);
443		activate_mm(active_mm, mm);
444		mm_release();
445		if (old_mm) {
446			if (active_mm != old_mm) BUG();
447			mmput(old_mm);
448			return 0;
449		}
450		mmdrop(active_mm);
451		return 0;
452	}
453	return -ENOMEM;
454}
455
456/*
457 * This function makes sure the current process has its own signal table,
458 * so that flush_signal_handlers can later reset the handlers without
459 * disturbing other processes.  (Other processes might share the signal
460 * table via the CLONE_SIGNAL option to clone().)
461 */
462
463static inline int make_private_signals(void)
464{
465	struct signal_struct * newsig;
466
467	if (atomic_read(&current->sig->count) <= 1)
468		return 0;
469	newsig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
470	if (newsig == NULL)
471		return -ENOMEM;
472	spin_lock_init(&newsig->siglock);
473	atomic_set(&newsig->count, 1);
474	memcpy(newsig->action, current->sig->action, sizeof(newsig->action));
475	spin_lock_irq(&current->sigmask_lock);
476	current->sig = newsig;
477	spin_unlock_irq(&current->sigmask_lock);
478	return 0;
479}
480
481/*
482 * If make_private_signals() made a copy of the signal table, decrement the
483 * refcount of the original table, and free it if necessary.
484 * We don't do that in make_private_signals() so that we can back off
485 * in flush_old_exec() if an error occurs after calling make_private_signals().
486 */
487
488static inline void release_old_signals(struct signal_struct * oldsig)
489{
490	if (current->sig == oldsig)
491		return;
492	if (atomic_dec_and_test(&oldsig->count))
493		kmem_cache_free(sigact_cachep, oldsig);
494}
495
496/*
497 * These functions flushes out all traces of the currently running executable
498 * so that a new one can be started
499 */
500
501static inline void flush_old_files(struct files_struct * files)
502{
503	long j = -1;
504
505	write_lock(&files->file_lock);
506	for (;;) {
507		unsigned long set, i;
508
509		j++;
510		i = j * __NFDBITS;
511		if (i >= files->max_fds || i >= files->max_fdset)
512			break;
513		set = files->close_on_exec->fds_bits[j];
514		if (!set)
515			continue;
516		files->close_on_exec->fds_bits[j] = 0;
517		write_unlock(&files->file_lock);
518		for ( ; set ; i++,set >>= 1) {
519			if (set & 1) {
520				sys_close(i);
521			}
522		}
523		write_lock(&files->file_lock);
524
525	}
526	write_unlock(&files->file_lock);
527}
528
529/*
530 * An execve() will automatically "de-thread" the process.
531 * Note: we don't have to hold the tasklist_lock to test
532 * whether we migth need to do this. If we're not part of
533 * a thread group, there is no way we can become one
534 * dynamically. And if we are, we only need to protect the
535 * unlink - even if we race with the last other thread exit,
536 * at worst the list_del_init() might end up being a no-op.
537 */
538static inline void de_thread(struct task_struct *tsk)
539{
540	if (!list_empty(&tsk->thread_group)) {
541		write_lock_irq(&tasklist_lock);
542		list_del_init(&tsk->thread_group);
543		write_unlock_irq(&tasklist_lock);
544	}
545
546	/* Minor oddity: this might stay the same. */
547	tsk->tgid = tsk->pid;
548}
549
550int flush_old_exec(struct linux_binprm * bprm)
551{
552	char * name;
553	int i, ch, retval;
554	struct signal_struct * oldsig;
555
556	/*
557	 * Make sure we have a private signal table
558	 */
559	oldsig = current->sig;
560	retval = make_private_signals();
561	if (retval) goto flush_failed;
562
563	/*
564	 * Release all of the old mmap stuff
565	 */
566	retval = exec_mmap();
567	if (retval) goto mmap_failed;
568
569	/* This is the point of no return */
570	release_old_signals(oldsig);
571
572	current->sas_ss_sp = current->sas_ss_size = 0;
573
574	if (current->euid == current->uid && current->egid == current->gid)
575		current->mm->dumpable = 1;
576	name = bprm->filename;
577	for (i=0; (ch = *(name++)) != '\0';) {
578		if (ch == '/')
579			i = 0;
580		else
581			if (i < 15)
582				current->comm[i++] = ch;
583	}
584	current->comm[i] = '\0';
585
586	flush_thread();
587
588	de_thread(current);
589
590	if (bprm->e_uid != current->euid || bprm->e_gid != current->egid ||
591	    permission(bprm->file->f_dentry->d_inode,MAY_READ))
592		current->mm->dumpable = 0;
593
594	/* An exec changes our domain. We are no longer part of the thread
595	   group */
596
597	current->self_exec_id++;
598
599	flush_signal_handlers(current);
600	flush_old_files(current->files);
601
602	return 0;
603
604mmap_failed:
605flush_failed:
606	spin_lock_irq(&current->sigmask_lock);
607	if (current->sig != oldsig) {
608		kmem_cache_free(sigact_cachep, current->sig);
609		current->sig = oldsig;
610	}
611	spin_unlock_irq(&current->sigmask_lock);
612	return retval;
613}
614
615/*
616 * We mustn't allow tracing of suid binaries, unless
617 * the tracer has the capability to trace anything..
618 */
619static inline int must_not_trace_exec(struct task_struct * p)
620{
621	return (p->ptrace & PT_PTRACED) && !(p->ptrace & PT_PTRACE_CAP);
622}
623
624/*
625 * Fill the binprm structure from the inode.
626 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
627 */
628int prepare_binprm(struct linux_binprm *bprm)
629{
630	int mode;
631	struct inode * inode = bprm->file->f_dentry->d_inode;
632
633	mode = inode->i_mode;
634	/*
635	 * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
636	 * vfs_permission lets a non-executable through
637	 */
638	if (!(mode & 0111))	/* with at least _one_ execute bit set */
639		return -EACCES;
640	if (bprm->file->f_op == NULL)
641		return -EACCES;
642
643	bprm->e_uid = current->euid;
644	bprm->e_gid = current->egid;
645
646	if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
647		/* Set-uid? */
648		if (mode & S_ISUID)
649			bprm->e_uid = inode->i_uid;
650
651		/* Set-gid? */
652		/*
653		 * If setgid is set but no group execute bit then this
654		 * is a candidate for mandatory locking, not a setgid
655		 * executable.
656		 */
657		if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
658			bprm->e_gid = inode->i_gid;
659	}
660
661	/* We don't have VFS support for capabilities yet */
662	cap_clear(bprm->cap_inheritable);
663	cap_clear(bprm->cap_permitted);
664	cap_clear(bprm->cap_effective);
665
666	/*  To support inheritance of root-permissions and suid-root
667         *  executables under compatibility mode, we raise all three
668         *  capability sets for the file.
669         *
670         *  If only the real uid is 0, we only raise the inheritable
671         *  and permitted sets of the executable file.
672         */
673
674	if (!issecure(SECURE_NOROOT)) {
675		if (bprm->e_uid == 0 || current->uid == 0) {
676			cap_set_full(bprm->cap_inheritable);
677			cap_set_full(bprm->cap_permitted);
678		}
679		if (bprm->e_uid == 0)
680			cap_set_full(bprm->cap_effective);
681	}
682
683	memset(bprm->buf,0,BINPRM_BUF_SIZE);
684	return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE);
685}
686
687/*
688 * This function is used to produce the new IDs and capabilities
689 * from the old ones and the file's capabilities.
690 *
691 * The formula used for evolving capabilities is:
692 *
693 *       pI' = pI
694 * (***) pP' = (fP & X) | (fI & pI)
695 *       pE' = pP' & fE          [NB. fE is 0 or ~0]
696 *
697 * I=Inheritable, P=Permitted, E=Effective // p=process, f=file
698 * ' indicates post-exec(), and X is the global 'cap_bset'.
699 *
700 */
701
702void compute_creds(struct linux_binprm *bprm)
703{
704	kernel_cap_t new_permitted, working;
705	int do_unlock = 0;
706
707	new_permitted = cap_intersect(bprm->cap_permitted, cap_bset);
708	working = cap_intersect(bprm->cap_inheritable,
709				current->cap_inheritable);
710	new_permitted = cap_combine(new_permitted, working);
711
712	if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
713	    !cap_issubset(new_permitted, current->cap_permitted)) {
714                current->mm->dumpable = 0;
715
716		lock_kernel();
717		if (must_not_trace_exec(current)
718		    || atomic_read(&current->fs->count) > 1
719		    || atomic_read(&current->files->count) > 1
720		    || atomic_read(&current->sig->count) > 1) {
721			if(!capable(CAP_SETUID)) {
722				bprm->e_uid = current->uid;
723				bprm->e_gid = current->gid;
724			}
725			if(!capable(CAP_SETPCAP)) {
726				new_permitted = cap_intersect(new_permitted,
727							current->cap_permitted);
728			}
729		}
730		do_unlock = 1;
731	}
732
733
734	/* For init, we want to retain the capabilities set
735         * in the init_task struct. Thus we skip the usual
736         * capability rules */
737	if (current->pid != 1) {
738		current->cap_permitted = new_permitted;
739		current->cap_effective =
740			cap_intersect(new_permitted, bprm->cap_effective);
741	}
742
743        /* AUD: Audit candidate if current->cap_effective is set */
744
745        current->suid = current->euid = current->fsuid = bprm->e_uid;
746        current->sgid = current->egid = current->fsgid = bprm->e_gid;
747
748	if(do_unlock)
749		unlock_kernel();
750	current->keep_capabilities = 0;
751}
752
753
754void remove_arg_zero(struct linux_binprm *bprm)
755{
756	if (bprm->argc) {
757		unsigned long offset;
758		char * kaddr;
759		struct page *page;
760
761		offset = bprm->p % PAGE_SIZE;
762		goto inside;
763
764		while (bprm->p++, *(kaddr+offset++)) {
765			if (offset != PAGE_SIZE)
766				continue;
767			offset = 0;
768			kunmap(page);
769inside:
770			page = bprm->page[bprm->p/PAGE_SIZE];
771			kaddr = kmap(page);
772		}
773		kunmap(page);
774		bprm->argc--;
775	}
776}
777
778/*
779 * cycle the list of binary formats handler, until one recognizes the image
780 */
781int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
782{
783	int try,retval=0;
784	struct linux_binfmt *fmt;
785#ifdef __alpha__
786	/* handle /sbin/loader.. */
787	{
788	    struct exec * eh = (struct exec *) bprm->buf;
789
790	    if (!bprm->loader && eh->fh.f_magic == 0x183 &&
791		(eh->fh.f_flags & 0x3000) == 0x3000)
792	    {
793		struct file * file;
794		unsigned long loader;
795
796		allow_write_access(bprm->file);
797		fput(bprm->file);
798		bprm->file = NULL;
799
800	        loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
801
802		file = open_exec("/sbin/loader");
803		retval = PTR_ERR(file);
804		if (IS_ERR(file))
805			return retval;
806
807		/* Remember if the application is TASO.  */
808		bprm->sh_bang = eh->ah.entry < 0x100000000;
809
810		bprm->file = file;
811		bprm->loader = loader;
812		retval = prepare_binprm(bprm);
813		if (retval<0)
814			return retval;
815		/* should call search_binary_handler recursively here,
816		   but it does not matter */
817	    }
818	}
819#endif
820	/* kernel module loader fixup */
821	/* so we don't try to load run modprobe in kernel space. */
822	set_fs(USER_DS);
823	for (try=0; try<2; try++) {
824		read_lock(&binfmt_lock);
825		for (fmt = formats ; fmt ; fmt = fmt->next) {
826			int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
827			if (!fn)
828				continue;
829			if (!try_inc_mod_count(fmt->module))
830				continue;
831			read_unlock(&binfmt_lock);
832			retval = fn(bprm, regs);
833			if (retval >= 0) {
834				put_binfmt(fmt);
835				allow_write_access(bprm->file);
836				if (bprm->file)
837					fput(bprm->file);
838				bprm->file = NULL;
839				current->did_exec = 1;
840				return retval;
841			}
842			read_lock(&binfmt_lock);
843			put_binfmt(fmt);
844			if (retval != -ENOEXEC)
845				break;
846			if (!bprm->file) {
847				read_unlock(&binfmt_lock);
848				return retval;
849			}
850		}
851		read_unlock(&binfmt_lock);
852		if (retval != -ENOEXEC) {
853			break;
854#ifdef CONFIG_KMOD
855		}else{
856#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
857			char modname[20];
858			if (printable(bprm->buf[0]) &&
859			    printable(bprm->buf[1]) &&
860			    printable(bprm->buf[2]) &&
861			    printable(bprm->buf[3]))
862				break; /* -ENOEXEC */
863			sprintf(modname, "binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
864			request_module(modname);
865#endif
866		}
867	}
868	return retval;
869}
870
871
872/*
873 * sys_execve() executes a new program.
874 */
875int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
876{
877	struct linux_binprm bprm;
878	struct file *file;
879	int retval;
880	int i;
881
882	file = open_exec(filename);
883
884	retval = PTR_ERR(file);
885	if (IS_ERR(file))
886		return retval;
887
888	bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
889	memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0]));
890
891	bprm.file = file;
892	bprm.filename = filename;
893	bprm.sh_bang = 0;
894	bprm.loader = 0;
895	bprm.exec = 0;
896	if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) {
897		allow_write_access(file);
898		fput(file);
899		return bprm.argc;
900	}
901
902	if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) {
903		allow_write_access(file);
904		fput(file);
905		return bprm.envc;
906	}
907
908	retval = prepare_binprm(&bprm);
909	if (retval < 0)
910		goto out;
911
912	retval = copy_strings_kernel(1, &bprm.filename, &bprm);
913	if (retval < 0)
914		goto out;
915
916	bprm.exec = bprm.p;
917	retval = copy_strings(bprm.envc, envp, &bprm);
918	if (retval < 0)
919		goto out;
920
921	retval = copy_strings(bprm.argc, argv, &bprm);
922	if (retval < 0)
923		goto out;
924
925	retval = search_binary_handler(&bprm,regs);
926	if (retval >= 0) {
927#ifdef CONFIG_HND_BMIPS3300_PROF
928		extern void sb1250_prof_mm_changed(struct task_struct *task, int sem);
929		sb1250_prof_mm_changed(current, 0);
930#endif	/* CONFIG_HND_BMIPS3300_PROF */
931		/* execve success */
932		return retval;
933	}
934
935out:
936	/* Something went wrong, return the inode and free the argument pages*/
937	allow_write_access(bprm.file);
938	if (bprm.file)
939		fput(bprm.file);
940
941	for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
942		struct page * page = bprm.page[i];
943		if (page)
944			__free_page(page);
945	}
946
947	return retval;
948}
949
950void set_binfmt(struct linux_binfmt *new)
951{
952	struct linux_binfmt *old = current->binfmt;
953	if (new && new->module)
954		__MOD_INC_USE_COUNT(new->module);
955	current->binfmt = new;
956	if (old && old->module)
957		__MOD_DEC_USE_COUNT(old->module);
958}
959
960int do_coredump(long signr, struct pt_regs * regs)
961{
962	struct linux_binfmt * binfmt;
963	char corename[6+sizeof(current->comm)+10];
964	struct file * file;
965	struct inode * inode;
966	int retval = 0;
967
968	lock_kernel();
969	binfmt = current->binfmt;
970	if (!binfmt || !binfmt->core_dump)
971		goto fail;
972	if (!current->mm->dumpable)
973		goto fail;
974	current->mm->dumpable = 0;
975	if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
976		goto fail;
977
978	memcpy(corename,"core", 5); /* include trailing \0 */
979 	if (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)
980 		sprintf(&corename[4], ".%d", current->pid);
981	file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW, 0600);
982	if (IS_ERR(file))
983		goto fail;
984	inode = file->f_dentry->d_inode;
985	if (inode->i_nlink > 1)
986		goto close_fail;	/* multiple links - don't dump */
987	if (d_unhashed(file->f_dentry))
988		goto close_fail;
989
990	if (!S_ISREG(inode->i_mode))
991		goto close_fail;
992	if (!file->f_op)
993		goto close_fail;
994	if (!file->f_op->write)
995		goto close_fail;
996	if (do_truncate(file->f_dentry, 0) != 0)
997		goto close_fail;
998
999	retval = binfmt->core_dump(signr, regs, file);
1000
1001close_fail:
1002	filp_close(file, NULL);
1003fail:
1004	unlock_kernel();
1005	return retval;
1006}
1007