1/*
2 * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Derived from sys_sparc32.c.
3 *
4 * Copyright (C) 2000		VA Linux Co
5 * Copyright (C) 2000		Don Dugger <n0ano@valinux.com>
6 * Copyright (C) 1999		Arun Sharma <arun.sharma@intel.com>
7 * Copyright (C) 1997,1998	Jakub Jelinek (jj@sunsite.mff.cuni.cz)
8 * Copyright (C) 1997		David S. Miller (davem@caip.rutgers.edu)
9 * Copyright (C) 2000-2003, 2005 Hewlett-Packard Co
10 *	David Mosberger-Tang <davidm@hpl.hp.com>
11 * Copyright (C) 2004		Gordon Jin <gordon.jin@intel.com>
12 *
13 * These routines maintain argument size conversion between 32bit and 64bit
14 * environment.
15 */
16
17#include <linux/kernel.h>
18#include <linux/syscalls.h>
19#include <linux/sysctl.h>
20#include <linux/sched.h>
21#include <linux/fs.h>
22#include <linux/file.h>
23#include <linux/signal.h>
24#include <linux/resource.h>
25#include <linux/times.h>
26#include <linux/utsname.h>
27#include <linux/smp.h>
28#include <linux/smp_lock.h>
29#include <linux/sem.h>
30#include <linux/msg.h>
31#include <linux/mm.h>
32#include <linux/shm.h>
33#include <linux/slab.h>
34#include <linux/uio.h>
35#include <linux/nfs_fs.h>
36#include <linux/quota.h>
37#include <linux/syscalls.h>
38#include <linux/sunrpc/svc.h>
39#include <linux/nfsd/nfsd.h>
40#include <linux/nfsd/cache.h>
41#include <linux/nfsd/xdr.h>
42#include <linux/nfsd/syscall.h>
43#include <linux/poll.h>
44#include <linux/eventpoll.h>
45#include <linux/personality.h>
46#include <linux/ptrace.h>
47#include <linux/stat.h>
48#include <linux/ipc.h>
49#include <linux/capability.h>
50#include <linux/compat.h>
51#include <linux/vfs.h>
52#include <linux/mman.h>
53#include <linux/mutex.h>
54
55#include <asm/intrinsics.h>
56#include <asm/types.h>
57#include <asm/uaccess.h>
58#include <asm/unistd.h>
59
60#include "ia32priv.h"
61
62#include <net/scm.h>
63#include <net/sock.h>
64
65#define DEBUG	0
66
67#if DEBUG
68# define DBG(fmt...)	printk(KERN_DEBUG fmt)
69#else
70# define DBG(fmt...)
71#endif
72
73#define ROUND_UP(x,a)	((__typeof__(x))(((unsigned long)(x) + ((a) - 1)) & ~((a) - 1)))
74
75#define OFFSET4K(a)		((a) & 0xfff)
76#define PAGE_START(addr)	((addr) & PAGE_MASK)
77#define MINSIGSTKSZ_IA32	2048
78
79#define high2lowuid(uid) ((uid) > 65535 ? 65534 : (uid))
80#define high2lowgid(gid) ((gid) > 65535 ? 65534 : (gid))
81
82/*
83 * Anything that modifies or inspects ia32 user virtual memory must hold this semaphore
84 * while doing so.
85 */
86static DEFINE_MUTEX(ia32_mmap_mutex);
87
88asmlinkage long
89sys32_execve (char __user *name, compat_uptr_t __user *argv, compat_uptr_t __user *envp,
90	      struct pt_regs *regs)
91{
92	long error;
93	char *filename;
94	unsigned long old_map_base, old_task_size, tssd;
95
96	filename = getname(name);
97	error = PTR_ERR(filename);
98	if (IS_ERR(filename))
99		return error;
100
101	old_map_base  = current->thread.map_base;
102	old_task_size = current->thread.task_size;
103	tssd = ia64_get_kr(IA64_KR_TSSD);
104
105	/* we may be exec'ing a 64-bit process: reset map base, task-size, and io-base: */
106	current->thread.map_base  = DEFAULT_MAP_BASE;
107	current->thread.task_size = DEFAULT_TASK_SIZE;
108	ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob);
109	ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1);
110
111	error = compat_do_execve(filename, argv, envp, regs);
112	putname(filename);
113
114	if (error < 0) {
115		/* oops, execve failed, switch back to old values... */
116		ia64_set_kr(IA64_KR_IO_BASE, IA32_IOBASE);
117		ia64_set_kr(IA64_KR_TSSD, tssd);
118		current->thread.map_base  = old_map_base;
119		current->thread.task_size = old_task_size;
120	}
121
122	return error;
123}
124
125int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
126{
127	compat_ino_t ino;
128	int err;
129
130	if ((u64) stat->size > MAX_NON_LFS ||
131	    !old_valid_dev(stat->dev) ||
132	    !old_valid_dev(stat->rdev))
133		return -EOVERFLOW;
134
135	ino = stat->ino;
136	if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino)
137		return -EOVERFLOW;
138
139	if (clear_user(ubuf, sizeof(*ubuf)))
140		return -EFAULT;
141
142	err  = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev);
143	err |= __put_user(ino, &ubuf->st_ino);
144	err |= __put_user(stat->mode, &ubuf->st_mode);
145	err |= __put_user(stat->nlink, &ubuf->st_nlink);
146	err |= __put_user(high2lowuid(stat->uid), &ubuf->st_uid);
147	err |= __put_user(high2lowgid(stat->gid), &ubuf->st_gid);
148	err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev);
149	err |= __put_user(stat->size, &ubuf->st_size);
150	err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime);
151	err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec);
152	err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime);
153	err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec);
154	err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime);
155	err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec);
156	err |= __put_user(stat->blksize, &ubuf->st_blksize);
157	err |= __put_user(stat->blocks, &ubuf->st_blocks);
158	return err;
159}
160
161#if PAGE_SHIFT > IA32_PAGE_SHIFT
162
163
164static int
165get_page_prot (struct vm_area_struct *vma, unsigned long addr)
166{
167	int prot = 0;
168
169	if (!vma || vma->vm_start > addr)
170		return 0;
171
172	if (vma->vm_flags & VM_READ)
173		prot |= PROT_READ;
174	if (vma->vm_flags & VM_WRITE)
175		prot |= PROT_WRITE;
176	if (vma->vm_flags & VM_EXEC)
177		prot |= PROT_EXEC;
178	return prot;
179}
180
181/*
182 * Map a subpage by creating an anonymous page that contains the union of the old page and
183 * the subpage.
184 */
185static unsigned long
186mmap_subpage (struct file *file, unsigned long start, unsigned long end, int prot, int flags,
187	      loff_t off)
188{
189	void *page = NULL;
190	struct inode *inode;
191	unsigned long ret = 0;
192	struct vm_area_struct *vma = find_vma(current->mm, start);
193	int old_prot = get_page_prot(vma, start);
194
195	DBG("mmap_subpage(file=%p,start=0x%lx,end=0x%lx,prot=%x,flags=%x,off=0x%llx)\n",
196	    file, start, end, prot, flags, off);
197
198
199	/* Optimize the case where the old mmap and the new mmap are both anonymous */
200	if ((old_prot & PROT_WRITE) && (flags & MAP_ANONYMOUS) && !vma->vm_file) {
201		if (clear_user((void __user *) start, end - start)) {
202			ret = -EFAULT;
203			goto out;
204		}
205		goto skip_mmap;
206	}
207
208	page = (void *) get_zeroed_page(GFP_KERNEL);
209	if (!page)
210		return -ENOMEM;
211
212	if (old_prot)
213		copy_from_user(page, (void __user *) PAGE_START(start), PAGE_SIZE);
214
215	down_write(&current->mm->mmap_sem);
216	{
217		ret = do_mmap(NULL, PAGE_START(start), PAGE_SIZE, prot | PROT_WRITE,
218			      flags | MAP_FIXED | MAP_ANONYMOUS, 0);
219	}
220	up_write(&current->mm->mmap_sem);
221
222	if (IS_ERR((void *) ret))
223		goto out;
224
225	if (old_prot) {
226		/* copy back the old page contents.  */
227		if (offset_in_page(start))
228			copy_to_user((void __user *) PAGE_START(start), page,
229				     offset_in_page(start));
230		if (offset_in_page(end))
231			copy_to_user((void __user *) end, page + offset_in_page(end),
232				     PAGE_SIZE - offset_in_page(end));
233	}
234
235	if (!(flags & MAP_ANONYMOUS)) {
236		/* read the file contents */
237		inode = file->f_path.dentry->d_inode;
238		if (!inode->i_fop || !file->f_op->read
239		    || ((*file->f_op->read)(file, (char __user *) start, end - start, &off) < 0))
240		{
241			ret = -EINVAL;
242			goto out;
243		}
244	}
245
246 skip_mmap:
247	if (!(prot & PROT_WRITE))
248		ret = sys_mprotect(PAGE_START(start), PAGE_SIZE, prot | old_prot);
249  out:
250	if (page)
251		free_page((unsigned long) page);
252	return ret;
253}
254
255/* SLAB cache for partial_page structures */
256struct kmem_cache *partial_page_cachep;
257
258/*
259 * init partial_page_list.
260 * return 0 means kmalloc fail.
261 */
262struct partial_page_list*
263ia32_init_pp_list(void)
264{
265	struct partial_page_list *p;
266
267	if ((p = kmalloc(sizeof(*p), GFP_KERNEL)) == NULL)
268		return p;
269	p->pp_head = NULL;
270	p->ppl_rb = RB_ROOT;
271	p->pp_hint = NULL;
272	atomic_set(&p->pp_count, 1);
273	return p;
274}
275
276/*
277 * Search for the partial page with @start in partial page list @ppl.
278 * If finds the partial page, return the found partial page.
279 * Else, return 0 and provide @pprev, @rb_link, @rb_parent to
280 * be used by later __ia32_insert_pp().
281 */
282static struct partial_page *
283__ia32_find_pp(struct partial_page_list *ppl, unsigned int start,
284	struct partial_page **pprev, struct rb_node ***rb_link,
285	struct rb_node **rb_parent)
286{
287	struct partial_page *pp;
288	struct rb_node **__rb_link, *__rb_parent, *rb_prev;
289
290	pp = ppl->pp_hint;
291	if (pp && pp->base == start)
292		return pp;
293
294	__rb_link = &ppl->ppl_rb.rb_node;
295	rb_prev = __rb_parent = NULL;
296
297	while (*__rb_link) {
298		__rb_parent = *__rb_link;
299		pp = rb_entry(__rb_parent, struct partial_page, pp_rb);
300
301		if (pp->base == start) {
302			ppl->pp_hint = pp;
303			return pp;
304		} else if (pp->base < start) {
305			rb_prev = __rb_parent;
306			__rb_link = &__rb_parent->rb_right;
307		} else {
308			__rb_link = &__rb_parent->rb_left;
309		}
310	}
311
312	*rb_link = __rb_link;
313	*rb_parent = __rb_parent;
314	*pprev = NULL;
315	if (rb_prev)
316		*pprev = rb_entry(rb_prev, struct partial_page, pp_rb);
317	return NULL;
318}
319
320/*
321 * insert @pp into @ppl.
322 */
323static void
324__ia32_insert_pp(struct partial_page_list *ppl, struct partial_page *pp,
325	 struct partial_page *prev, struct rb_node **rb_link,
326	struct rb_node *rb_parent)
327{
328	/* link list */
329	if (prev) {
330		pp->next = prev->next;
331		prev->next = pp;
332	} else {
333		ppl->pp_head = pp;
334		if (rb_parent)
335			pp->next = rb_entry(rb_parent,
336				struct partial_page, pp_rb);
337		else
338			pp->next = NULL;
339	}
340
341	/* link rb */
342	rb_link_node(&pp->pp_rb, rb_parent, rb_link);
343	rb_insert_color(&pp->pp_rb, &ppl->ppl_rb);
344
345	ppl->pp_hint = pp;
346}
347
348/*
349 * delete @pp from partial page list @ppl.
350 */
351static void
352__ia32_delete_pp(struct partial_page_list *ppl, struct partial_page *pp,
353	struct partial_page *prev)
354{
355	if (prev) {
356		prev->next = pp->next;
357		if (ppl->pp_hint == pp)
358			ppl->pp_hint = prev;
359	} else {
360		ppl->pp_head = pp->next;
361		if (ppl->pp_hint == pp)
362			ppl->pp_hint = pp->next;
363	}
364	rb_erase(&pp->pp_rb, &ppl->ppl_rb);
365	kmem_cache_free(partial_page_cachep, pp);
366}
367
368static struct partial_page *
369__pp_prev(struct partial_page *pp)
370{
371	struct rb_node *prev = rb_prev(&pp->pp_rb);
372	if (prev)
373		return rb_entry(prev, struct partial_page, pp_rb);
374	else
375		return NULL;
376}
377
378/*
379 * Delete partial pages with address between @start and @end.
380 * @start and @end are page aligned.
381 */
382static void
383__ia32_delete_pp_range(unsigned int start, unsigned int end)
384{
385	struct partial_page *pp, *prev;
386	struct rb_node **rb_link, *rb_parent;
387
388	if (start >= end)
389		return;
390
391	pp = __ia32_find_pp(current->thread.ppl, start, &prev,
392					&rb_link, &rb_parent);
393	if (pp)
394		prev = __pp_prev(pp);
395	else {
396		if (prev)
397			pp = prev->next;
398		else
399			pp = current->thread.ppl->pp_head;
400	}
401
402	while (pp && pp->base < end) {
403		struct partial_page *tmp = pp->next;
404		__ia32_delete_pp(current->thread.ppl, pp, prev);
405		pp = tmp;
406	}
407}
408
409/*
410 * Set the range between @start and @end in bitmap.
411 * @start and @end should be IA32 page aligned and in the same IA64 page.
412 */
413static int
414__ia32_set_pp(unsigned int start, unsigned int end, int flags)
415{
416	struct partial_page *pp, *prev;
417	struct rb_node ** rb_link, *rb_parent;
418	unsigned int pstart, start_bit, end_bit, i;
419
420	pstart = PAGE_START(start);
421	start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE;
422	end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE;
423	if (end_bit == 0)
424		end_bit = PAGE_SIZE / IA32_PAGE_SIZE;
425	pp = __ia32_find_pp(current->thread.ppl, pstart, &prev,
426					&rb_link, &rb_parent);
427	if (pp) {
428		for (i = start_bit; i < end_bit; i++)
429			set_bit(i, &pp->bitmap);
430		/*
431		 * Check: if this partial page has been set to a full page,
432		 * then delete it.
433		 */
434		if (find_first_zero_bit(&pp->bitmap, sizeof(pp->bitmap)*8) >=
435				PAGE_SIZE/IA32_PAGE_SIZE) {
436			__ia32_delete_pp(current->thread.ppl, pp, __pp_prev(pp));
437		}
438		return 0;
439	}
440
441	/*
442	 * MAP_FIXED may lead to overlapping mmap.
443	 * In this case, the requested mmap area may already mmaped as a full
444	 * page. So check vma before adding a new partial page.
445	 */
446	if (flags & MAP_FIXED) {
447		struct vm_area_struct *vma = find_vma(current->mm, pstart);
448		if (vma && vma->vm_start <= pstart)
449			return 0;
450	}
451
452	/* new a partial_page */
453	pp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL);
454	if (!pp)
455		return -ENOMEM;
456	pp->base = pstart;
457	pp->bitmap = 0;
458	for (i=start_bit; i<end_bit; i++)
459		set_bit(i, &(pp->bitmap));
460	pp->next = NULL;
461	__ia32_insert_pp(current->thread.ppl, pp, prev, rb_link, rb_parent);
462	return 0;
463}
464
465/*
466 * @start and @end should be IA32 page aligned, but don't need to be in the
467 * same IA64 page. Split @start and @end to make sure they're in the same IA64
468 * page, then call __ia32_set_pp().
469 */
470static void
471ia32_set_pp(unsigned int start, unsigned int end, int flags)
472{
473	down_write(&current->mm->mmap_sem);
474	if (flags & MAP_FIXED) {
475		/*
476		 * MAP_FIXED may lead to overlapping mmap. When this happens,
477		 * a series of complete IA64 pages results in deletion of
478		 * old partial pages in that range.
479		 */
480		__ia32_delete_pp_range(PAGE_ALIGN(start), PAGE_START(end));
481	}
482
483	if (end < PAGE_ALIGN(start)) {
484		__ia32_set_pp(start, end, flags);
485	} else {
486		if (offset_in_page(start))
487			__ia32_set_pp(start, PAGE_ALIGN(start), flags);
488		if (offset_in_page(end))
489			__ia32_set_pp(PAGE_START(end), end, flags);
490	}
491	up_write(&current->mm->mmap_sem);
492}
493
494/*
495 * Unset the range between @start and @end in bitmap.
496 * @start and @end should be IA32 page aligned and in the same IA64 page.
497 * After doing that, if the bitmap is 0, then free the page and return 1,
498 * 	else return 0;
499 * If not find the partial page in the list, then
500 * 	If the vma exists, then the full page is set to a partial page;
501 *	Else return -ENOMEM.
502 */
503static int
504__ia32_unset_pp(unsigned int start, unsigned int end)
505{
506	struct partial_page *pp, *prev;
507	struct rb_node ** rb_link, *rb_parent;
508	unsigned int pstart, start_bit, end_bit, i;
509	struct vm_area_struct *vma;
510
511	pstart = PAGE_START(start);
512	start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE;
513	end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE;
514	if (end_bit == 0)
515		end_bit = PAGE_SIZE / IA32_PAGE_SIZE;
516
517	pp = __ia32_find_pp(current->thread.ppl, pstart, &prev,
518					&rb_link, &rb_parent);
519	if (pp) {
520		for (i = start_bit; i < end_bit; i++)
521			clear_bit(i, &pp->bitmap);
522		if (pp->bitmap == 0) {
523			__ia32_delete_pp(current->thread.ppl, pp, __pp_prev(pp));
524			return 1;
525		}
526		return 0;
527	}
528
529	vma = find_vma(current->mm, pstart);
530	if (!vma || vma->vm_start > pstart) {
531		return -ENOMEM;
532	}
533
534	/* new a partial_page */
535	pp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL);
536	if (!pp)
537		return -ENOMEM;
538	pp->base = pstart;
539	pp->bitmap = 0;
540	for (i = 0; i < start_bit; i++)
541		set_bit(i, &(pp->bitmap));
542	for (i = end_bit; i < PAGE_SIZE / IA32_PAGE_SIZE; i++)
543		set_bit(i, &(pp->bitmap));
544	pp->next = NULL;
545	__ia32_insert_pp(current->thread.ppl, pp, prev, rb_link, rb_parent);
546	return 0;
547}
548
549/*
550 * Delete pp between PAGE_ALIGN(start) and PAGE_START(end) by calling
551 * __ia32_delete_pp_range(). Unset possible partial pages by calling
552 * __ia32_unset_pp().
553 * The returned value see __ia32_unset_pp().
554 */
555static int
556ia32_unset_pp(unsigned int *startp, unsigned int *endp)
557{
558	unsigned int start = *startp, end = *endp;
559	int ret = 0;
560
561	down_write(&current->mm->mmap_sem);
562
563	__ia32_delete_pp_range(PAGE_ALIGN(start), PAGE_START(end));
564
565	if (end < PAGE_ALIGN(start)) {
566		ret = __ia32_unset_pp(start, end);
567		if (ret == 1) {
568			*startp = PAGE_START(start);
569			*endp = PAGE_ALIGN(end);
570		}
571		if (ret == 0) {
572			/* to shortcut sys_munmap() in sys32_munmap() */
573			*startp = PAGE_START(start);
574			*endp = PAGE_START(end);
575		}
576	} else {
577		if (offset_in_page(start)) {
578			ret = __ia32_unset_pp(start, PAGE_ALIGN(start));
579			if (ret == 1)
580				*startp = PAGE_START(start);
581			if (ret == 0)
582				*startp = PAGE_ALIGN(start);
583			if (ret < 0)
584				goto out;
585		}
586		if (offset_in_page(end)) {
587			ret = __ia32_unset_pp(PAGE_START(end), end);
588			if (ret == 1)
589				*endp = PAGE_ALIGN(end);
590			if (ret == 0)
591				*endp = PAGE_START(end);
592		}
593	}
594
595 out:
596	up_write(&current->mm->mmap_sem);
597	return ret;
598}
599
600/*
601 * Compare the range between @start and @end with bitmap in partial page.
602 * @start and @end should be IA32 page aligned and in the same IA64 page.
603 */
604static int
605__ia32_compare_pp(unsigned int start, unsigned int end)
606{
607	struct partial_page *pp, *prev;
608	struct rb_node ** rb_link, *rb_parent;
609	unsigned int pstart, start_bit, end_bit, size;
610	unsigned int first_bit, next_zero_bit;	/* the first range in bitmap */
611
612	pstart = PAGE_START(start);
613
614	pp = __ia32_find_pp(current->thread.ppl, pstart, &prev,
615					&rb_link, &rb_parent);
616	if (!pp)
617		return 1;
618
619	start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE;
620	end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE;
621	size = sizeof(pp->bitmap) * 8;
622	first_bit = find_first_bit(&pp->bitmap, size);
623	next_zero_bit = find_next_zero_bit(&pp->bitmap, size, first_bit);
624	if ((start_bit < first_bit) || (end_bit > next_zero_bit)) {
625		/* exceeds the first range in bitmap */
626		return -ENOMEM;
627	} else if ((start_bit == first_bit) && (end_bit == next_zero_bit)) {
628		first_bit = find_next_bit(&pp->bitmap, size, next_zero_bit);
629		if ((next_zero_bit < first_bit) && (first_bit < size))
630			return 1;	/* has next range */
631		else
632			return 0; 	/* no next range */
633	} else
634		return 1;
635}
636
637/*
638 * @start and @end should be IA32 page aligned, but don't need to be in the
639 * same IA64 page. Split @start and @end to make sure they're in the same IA64
640 * page, then call __ia32_compare_pp().
641 *
642 * Take this as example: the range is the 1st and 2nd 4K page.
643 * Return 0 if they fit bitmap exactly, i.e. bitmap = 00000011;
644 * Return 1 if the range doesn't cover whole bitmap, e.g. bitmap = 00001111;
645 * Return -ENOMEM if the range exceeds the bitmap, e.g. bitmap = 00000001 or
646 * 	bitmap = 00000101.
647 */
648static int
649ia32_compare_pp(unsigned int *startp, unsigned int *endp)
650{
651	unsigned int start = *startp, end = *endp;
652	int retval = 0;
653
654	down_write(&current->mm->mmap_sem);
655
656	if (end < PAGE_ALIGN(start)) {
657		retval = __ia32_compare_pp(start, end);
658		if (retval == 0) {
659			*startp = PAGE_START(start);
660			*endp = PAGE_ALIGN(end);
661		}
662	} else {
663		if (offset_in_page(start)) {
664			retval = __ia32_compare_pp(start,
665						   PAGE_ALIGN(start));
666			if (retval == 0)
667				*startp = PAGE_START(start);
668			if (retval < 0)
669				goto out;
670		}
671		if (offset_in_page(end)) {
672			retval = __ia32_compare_pp(PAGE_START(end), end);
673			if (retval == 0)
674				*endp = PAGE_ALIGN(end);
675		}
676	}
677
678 out:
679	up_write(&current->mm->mmap_sem);
680	return retval;
681}
682
683static void
684__ia32_drop_pp_list(struct partial_page_list *ppl)
685{
686	struct partial_page *pp = ppl->pp_head;
687
688	while (pp) {
689		struct partial_page *next = pp->next;
690		kmem_cache_free(partial_page_cachep, pp);
691		pp = next;
692	}
693
694	kfree(ppl);
695}
696
697void
698ia32_drop_partial_page_list(struct task_struct *task)
699{
700	struct partial_page_list* ppl = task->thread.ppl;
701
702	if (ppl && atomic_dec_and_test(&ppl->pp_count))
703		__ia32_drop_pp_list(ppl);
704}
705
706/*
707 * Copy current->thread.ppl to ppl (already initialized).
708 */
709static int
710__ia32_copy_pp_list(struct partial_page_list *ppl)
711{
712	struct partial_page *pp, *tmp, *prev;
713	struct rb_node **rb_link, *rb_parent;
714
715	ppl->pp_head = NULL;
716	ppl->pp_hint = NULL;
717	ppl->ppl_rb = RB_ROOT;
718	rb_link = &ppl->ppl_rb.rb_node;
719	rb_parent = NULL;
720	prev = NULL;
721
722	for (pp = current->thread.ppl->pp_head; pp; pp = pp->next) {
723		tmp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL);
724		if (!tmp)
725			return -ENOMEM;
726		*tmp = *pp;
727		__ia32_insert_pp(ppl, tmp, prev, rb_link, rb_parent);
728		prev = tmp;
729		rb_link = &tmp->pp_rb.rb_right;
730		rb_parent = &tmp->pp_rb;
731	}
732	return 0;
733}
734
735int
736ia32_copy_partial_page_list(struct task_struct *p, unsigned long clone_flags)
737{
738	int retval = 0;
739
740	if (clone_flags & CLONE_VM) {
741		atomic_inc(&current->thread.ppl->pp_count);
742		p->thread.ppl = current->thread.ppl;
743	} else {
744		p->thread.ppl = ia32_init_pp_list();
745		if (!p->thread.ppl)
746			return -ENOMEM;
747		down_write(&current->mm->mmap_sem);
748		{
749			retval = __ia32_copy_pp_list(p->thread.ppl);
750		}
751		up_write(&current->mm->mmap_sem);
752	}
753
754	return retval;
755}
756
757static unsigned long
758emulate_mmap (struct file *file, unsigned long start, unsigned long len, int prot, int flags,
759	      loff_t off)
760{
761	unsigned long tmp, end, pend, pstart, ret, is_congruent, fudge = 0;
762	struct inode *inode;
763	loff_t poff;
764
765	end = start + len;
766	pstart = PAGE_START(start);
767	pend = PAGE_ALIGN(end);
768
769	if (flags & MAP_FIXED) {
770		ia32_set_pp((unsigned int)start, (unsigned int)end, flags);
771		if (start > pstart) {
772			if (flags & MAP_SHARED)
773				printk(KERN_INFO
774				       "%s(%d): emulate_mmap() can't share head (addr=0x%lx)\n",
775				       current->comm, current->pid, start);
776			ret = mmap_subpage(file, start, min(PAGE_ALIGN(start), end), prot, flags,
777					   off);
778			if (IS_ERR((void *) ret))
779				return ret;
780			pstart += PAGE_SIZE;
781			if (pstart >= pend)
782				goto out;	/* done */
783		}
784		if (end < pend) {
785			if (flags & MAP_SHARED)
786				printk(KERN_INFO
787				       "%s(%d): emulate_mmap() can't share tail (end=0x%lx)\n",
788				       current->comm, current->pid, end);
789			ret = mmap_subpage(file, max(start, PAGE_START(end)), end, prot, flags,
790					   (off + len) - offset_in_page(end));
791			if (IS_ERR((void *) ret))
792				return ret;
793			pend -= PAGE_SIZE;
794			if (pstart >= pend)
795				goto out;	/* done */
796		}
797	} else {
798		/*
799		 * If a start address was specified, use it if the entire rounded out area
800		 * is available.
801		 */
802		if (start && !pstart)
803			fudge = 1;	/* handle case of mapping to range (0,PAGE_SIZE) */
804		tmp = arch_get_unmapped_area(file, pstart - fudge, pend - pstart, 0, flags);
805		if (tmp != pstart) {
806			pstart = tmp;
807			start = pstart + offset_in_page(off);	/* make start congruent with off */
808			end = start + len;
809			pend = PAGE_ALIGN(end);
810		}
811	}
812
813	poff = off + (pstart - start);	/* note: (pstart - start) may be negative */
814	is_congruent = (flags & MAP_ANONYMOUS) || (offset_in_page(poff) == 0);
815
816	if ((flags & MAP_SHARED) && !is_congruent)
817		printk(KERN_INFO "%s(%d): emulate_mmap() can't share contents of incongruent mmap "
818		       "(addr=0x%lx,off=0x%llx)\n", current->comm, current->pid, start, off);
819
820	DBG("mmap_body: mapping [0x%lx-0x%lx) %s with poff 0x%llx\n", pstart, pend,
821	    is_congruent ? "congruent" : "not congruent", poff);
822
823	down_write(&current->mm->mmap_sem);
824	{
825		if (!(flags & MAP_ANONYMOUS) && is_congruent)
826			ret = do_mmap(file, pstart, pend - pstart, prot, flags | MAP_FIXED, poff);
827		else
828			ret = do_mmap(NULL, pstart, pend - pstart,
829				      prot | ((flags & MAP_ANONYMOUS) ? 0 : PROT_WRITE),
830				      flags | MAP_FIXED | MAP_ANONYMOUS, 0);
831	}
832	up_write(&current->mm->mmap_sem);
833
834	if (IS_ERR((void *) ret))
835		return ret;
836
837	if (!is_congruent) {
838		/* read the file contents */
839		inode = file->f_path.dentry->d_inode;
840		if (!inode->i_fop || !file->f_op->read
841		    || ((*file->f_op->read)(file, (char __user *) pstart, pend - pstart, &poff)
842			< 0))
843		{
844			sys_munmap(pstart, pend - pstart);
845			return -EINVAL;
846		}
847		if (!(prot & PROT_WRITE) && sys_mprotect(pstart, pend - pstart, prot) < 0)
848			return -EINVAL;
849	}
850
851	if (!(flags & MAP_FIXED))
852		ia32_set_pp((unsigned int)start, (unsigned int)end, flags);
853out:
854	return start;
855}
856
857#endif /* PAGE_SHIFT > IA32_PAGE_SHIFT */
858
859static inline unsigned int
860get_prot32 (unsigned int prot)
861{
862	if (prot & PROT_WRITE)
863		/* on x86, PROT_WRITE implies PROT_READ which implies PROT_EEC */
864		prot |= PROT_READ | PROT_WRITE | PROT_EXEC;
865	else if (prot & (PROT_READ | PROT_EXEC))
866		/* on x86, there is no distinction between PROT_READ and PROT_EXEC */
867		prot |= (PROT_READ | PROT_EXEC);
868
869	return prot;
870}
871
872unsigned long
873ia32_do_mmap (struct file *file, unsigned long addr, unsigned long len, int prot, int flags,
874	      loff_t offset)
875{
876	DBG("ia32_do_mmap(file=%p,addr=0x%lx,len=0x%lx,prot=%x,flags=%x,offset=0x%llx)\n",
877	    file, addr, len, prot, flags, offset);
878
879	if (file && (!file->f_op || !file->f_op->mmap))
880		return -ENODEV;
881
882	len = IA32_PAGE_ALIGN(len);
883	if (len == 0)
884		return addr;
885
886	if (len > IA32_PAGE_OFFSET || addr > IA32_PAGE_OFFSET - len)
887	{
888		if (flags & MAP_FIXED)
889			return -ENOMEM;
890		else
891		return -EINVAL;
892	}
893
894	if (OFFSET4K(offset))
895		return -EINVAL;
896
897	prot = get_prot32(prot);
898
899#if PAGE_SHIFT > IA32_PAGE_SHIFT
900	mutex_lock(&ia32_mmap_mutex);
901	{
902		addr = emulate_mmap(file, addr, len, prot, flags, offset);
903	}
904	mutex_unlock(&ia32_mmap_mutex);
905#else
906	down_write(&current->mm->mmap_sem);
907	{
908		addr = do_mmap(file, addr, len, prot, flags, offset);
909	}
910	up_write(&current->mm->mmap_sem);
911#endif
912	DBG("ia32_do_mmap: returning 0x%lx\n", addr);
913	return addr;
914}
915
916/*
917 * Linux/i386 didn't use to be able to handle more than 4 system call parameters, so these
918 * system calls used a memory block for parameter passing..
919 */
920
921struct mmap_arg_struct {
922	unsigned int addr;
923	unsigned int len;
924	unsigned int prot;
925	unsigned int flags;
926	unsigned int fd;
927	unsigned int offset;
928};
929
930asmlinkage long
931sys32_mmap (struct mmap_arg_struct __user *arg)
932{
933	struct mmap_arg_struct a;
934	struct file *file = NULL;
935	unsigned long addr;
936	int flags;
937
938	if (copy_from_user(&a, arg, sizeof(a)))
939		return -EFAULT;
940
941	if (OFFSET4K(a.offset))
942		return -EINVAL;
943
944	flags = a.flags;
945
946	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
947	if (!(flags & MAP_ANONYMOUS)) {
948		file = fget(a.fd);
949		if (!file)
950			return -EBADF;
951	}
952
953	addr = ia32_do_mmap(file, a.addr, a.len, a.prot, flags, a.offset);
954
955	if (file)
956		fput(file);
957	return addr;
958}
959
960asmlinkage long
961sys32_mmap2 (unsigned int addr, unsigned int len, unsigned int prot, unsigned int flags,
962	     unsigned int fd, unsigned int pgoff)
963{
964	struct file *file = NULL;
965	unsigned long retval;
966
967	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
968	if (!(flags & MAP_ANONYMOUS)) {
969		file = fget(fd);
970		if (!file)
971			return -EBADF;
972	}
973
974	retval = ia32_do_mmap(file, addr, len, prot, flags,
975			      (unsigned long) pgoff << IA32_PAGE_SHIFT);
976
977	if (file)
978		fput(file);
979	return retval;
980}
981
982asmlinkage long
983sys32_munmap (unsigned int start, unsigned int len)
984{
985	unsigned int end = start + len;
986	long ret;
987
988#if PAGE_SHIFT <= IA32_PAGE_SHIFT
989	ret = sys_munmap(start, end - start);
990#else
991	if (OFFSET4K(start))
992		return -EINVAL;
993
994	end = IA32_PAGE_ALIGN(end);
995	if (start >= end)
996		return -EINVAL;
997
998	ret = ia32_unset_pp(&start, &end);
999	if (ret < 0)
1000		return ret;
1001
1002	if (start >= end)
1003		return 0;
1004
1005	mutex_lock(&ia32_mmap_mutex);
1006	ret = sys_munmap(start, end - start);
1007	mutex_unlock(&ia32_mmap_mutex);
1008#endif
1009	return ret;
1010}
1011
1012#if PAGE_SHIFT > IA32_PAGE_SHIFT
1013
1014/*
1015 * When mprotect()ing a partial page, we set the permission to the union of the old
1016 * settings and the new settings.  In other words, it's only possible to make access to a
1017 * partial page less restrictive.
1018 */
1019static long
1020mprotect_subpage (unsigned long address, int new_prot)
1021{
1022	int old_prot;
1023	struct vm_area_struct *vma;
1024
1025	if (new_prot == PROT_NONE)
1026		return 0;		/* optimize case where nothing changes... */
1027	vma = find_vma(current->mm, address);
1028	old_prot = get_page_prot(vma, address);
1029	return sys_mprotect(address, PAGE_SIZE, new_prot | old_prot);
1030}
1031
1032#endif /* PAGE_SHIFT > IA32_PAGE_SHIFT */
1033
1034asmlinkage long
1035sys32_mprotect (unsigned int start, unsigned int len, int prot)
1036{
1037	unsigned int end = start + len;
1038#if PAGE_SHIFT > IA32_PAGE_SHIFT
1039	long retval = 0;
1040#endif
1041
1042	prot = get_prot32(prot);
1043
1044#if PAGE_SHIFT <= IA32_PAGE_SHIFT
1045	return sys_mprotect(start, end - start, prot);
1046#else
1047	if (OFFSET4K(start))
1048		return -EINVAL;
1049
1050	end = IA32_PAGE_ALIGN(end);
1051	if (end < start)
1052		return -EINVAL;
1053
1054	retval = ia32_compare_pp(&start, &end);
1055
1056	if (retval < 0)
1057		return retval;
1058
1059	mutex_lock(&ia32_mmap_mutex);
1060	{
1061		if (offset_in_page(start)) {
1062			/* start address is 4KB aligned but not page aligned. */
1063			retval = mprotect_subpage(PAGE_START(start), prot);
1064			if (retval < 0)
1065				goto out;
1066
1067			start = PAGE_ALIGN(start);
1068			if (start >= end)
1069				goto out;	/* retval is already zero... */
1070		}
1071
1072		if (offset_in_page(end)) {
1073			/* end address is 4KB aligned but not page aligned. */
1074			retval = mprotect_subpage(PAGE_START(end), prot);
1075			if (retval < 0)
1076				goto out;
1077
1078			end = PAGE_START(end);
1079		}
1080		retval = sys_mprotect(start, end - start, prot);
1081	}
1082  out:
1083	mutex_unlock(&ia32_mmap_mutex);
1084	return retval;
1085#endif
1086}
1087
1088asmlinkage long
1089sys32_mremap (unsigned int addr, unsigned int old_len, unsigned int new_len,
1090		unsigned int flags, unsigned int new_addr)
1091{
1092	long ret;
1093
1094#if PAGE_SHIFT <= IA32_PAGE_SHIFT
1095	ret = sys_mremap(addr, old_len, new_len, flags, new_addr);
1096#else
1097	unsigned int old_end, new_end;
1098
1099	if (OFFSET4K(addr))
1100		return -EINVAL;
1101
1102	old_len = IA32_PAGE_ALIGN(old_len);
1103	new_len = IA32_PAGE_ALIGN(new_len);
1104	old_end = addr + old_len;
1105	new_end = addr + new_len;
1106
1107	if (!new_len)
1108		return -EINVAL;
1109
1110	if ((flags & MREMAP_FIXED) && (OFFSET4K(new_addr)))
1111		return -EINVAL;
1112
1113	if (old_len >= new_len) {
1114		ret = sys32_munmap(addr + new_len, old_len - new_len);
1115		if (ret && old_len != new_len)
1116			return ret;
1117		ret = addr;
1118		if (!(flags & MREMAP_FIXED) || (new_addr == addr))
1119			return ret;
1120		old_len = new_len;
1121	}
1122
1123	addr = PAGE_START(addr);
1124	old_len = PAGE_ALIGN(old_end) - addr;
1125	new_len = PAGE_ALIGN(new_end) - addr;
1126
1127	mutex_lock(&ia32_mmap_mutex);
1128	ret = sys_mremap(addr, old_len, new_len, flags, new_addr);
1129	mutex_unlock(&ia32_mmap_mutex);
1130
1131	if ((ret >= 0) && (old_len < new_len)) {
1132		/* mremap expanded successfully */
1133		ia32_set_pp(old_end, new_end, flags);
1134	}
1135#endif
1136	return ret;
1137}
1138
1139asmlinkage long
1140sys32_pipe (int __user *fd)
1141{
1142	int retval;
1143	int fds[2];
1144
1145	retval = do_pipe(fds);
1146	if (retval)
1147		goto out;
1148	if (copy_to_user(fd, fds, sizeof(fds)))
1149		retval = -EFAULT;
1150  out:
1151	return retval;
1152}
1153
1154static inline long
1155get_tv32 (struct timeval *o, struct compat_timeval __user *i)
1156{
1157	return (!access_ok(VERIFY_READ, i, sizeof(*i)) ||
1158		(__get_user(o->tv_sec, &i->tv_sec) | __get_user(o->tv_usec, &i->tv_usec)));
1159}
1160
1161static inline long
1162put_tv32 (struct compat_timeval __user *o, struct timeval *i)
1163{
1164	return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
1165		(__put_user(i->tv_sec, &o->tv_sec) | __put_user(i->tv_usec, &o->tv_usec)));
1166}
1167
1168asmlinkage unsigned long
1169sys32_alarm (unsigned int seconds)
1170{
1171	return alarm_setitimer(seconds);
1172}
1173
1174/* Translations due to time_t size differences.  Which affects all
1175   sorts of things, like timeval and itimerval.  */
1176
1177extern struct timezone sys_tz;
1178
1179asmlinkage long
1180sys32_gettimeofday (struct compat_timeval __user *tv, struct timezone __user *tz)
1181{
1182	if (tv) {
1183		struct timeval ktv;
1184		do_gettimeofday(&ktv);
1185		if (put_tv32(tv, &ktv))
1186			return -EFAULT;
1187	}
1188	if (tz) {
1189		if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
1190			return -EFAULT;
1191	}
1192	return 0;
1193}
1194
1195asmlinkage long
1196sys32_settimeofday (struct compat_timeval __user *tv, struct timezone __user *tz)
1197{
1198	struct timeval ktv;
1199	struct timespec kts;
1200	struct timezone ktz;
1201
1202	if (tv) {
1203		if (get_tv32(&ktv, tv))
1204			return -EFAULT;
1205		kts.tv_sec = ktv.tv_sec;
1206		kts.tv_nsec = ktv.tv_usec * 1000;
1207	}
1208	if (tz) {
1209		if (copy_from_user(&ktz, tz, sizeof(ktz)))
1210			return -EFAULT;
1211	}
1212
1213	return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
1214}
1215
1216struct getdents32_callback {
1217	struct compat_dirent __user *current_dir;
1218	struct compat_dirent __user *previous;
1219	int count;
1220	int error;
1221};
1222
1223struct readdir32_callback {
1224	struct old_linux32_dirent __user * dirent;
1225	int count;
1226};
1227
1228static int
1229filldir32 (void *__buf, const char *name, int namlen, loff_t offset, u64 ino,
1230	   unsigned int d_type)
1231{
1232	struct compat_dirent __user * dirent;
1233	struct getdents32_callback * buf = (struct getdents32_callback *) __buf;
1234	int reclen = ROUND_UP(offsetof(struct compat_dirent, d_name) + namlen + 1, 4);
1235	u32 d_ino;
1236
1237	buf->error = -EINVAL;	/* only used if we fail.. */
1238	if (reclen > buf->count)
1239		return -EINVAL;
1240	d_ino = ino;
1241	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
1242		return -EOVERFLOW;
1243	buf->error = -EFAULT;	/* only used if we fail.. */
1244	dirent = buf->previous;
1245	if (dirent)
1246		if (put_user(offset, &dirent->d_off))
1247			return -EFAULT;
1248	dirent = buf->current_dir;
1249	buf->previous = dirent;
1250	if (put_user(d_ino, &dirent->d_ino)
1251	    || put_user(reclen, &dirent->d_reclen)
1252	    || copy_to_user(dirent->d_name, name, namlen)
1253	    || put_user(0, dirent->d_name + namlen))
1254		return -EFAULT;
1255	dirent = (struct compat_dirent __user *) ((char __user *) dirent + reclen);
1256	buf->current_dir = dirent;
1257	buf->count -= reclen;
1258	return 0;
1259}
1260
1261asmlinkage long
1262sys32_getdents (unsigned int fd, struct compat_dirent __user *dirent, unsigned int count)
1263{
1264	struct file * file;
1265	struct compat_dirent __user * lastdirent;
1266	struct getdents32_callback buf;
1267	int error;
1268
1269	error = -EFAULT;
1270	if (!access_ok(VERIFY_WRITE, dirent, count))
1271		goto out;
1272
1273	error = -EBADF;
1274	file = fget(fd);
1275	if (!file)
1276		goto out;
1277
1278	buf.current_dir = dirent;
1279	buf.previous = NULL;
1280	buf.count = count;
1281	buf.error = 0;
1282
1283	error = vfs_readdir(file, filldir32, &buf);
1284	if (error < 0)
1285		goto out_putf;
1286	error = buf.error;
1287	lastdirent = buf.previous;
1288	if (lastdirent) {
1289		if (put_user(file->f_pos, &lastdirent->d_off))
1290			error = -EFAULT;
1291		else
1292			error = count - buf.count;
1293	}
1294
1295out_putf:
1296	fput(file);
1297out:
1298	return error;
1299}
1300
1301static int
1302fillonedir32 (void * __buf, const char * name, int namlen, loff_t offset, u64 ino,
1303	      unsigned int d_type)
1304{
1305	struct readdir32_callback * buf = (struct readdir32_callback *) __buf;
1306	struct old_linux32_dirent __user * dirent;
1307	u32 d_ino;
1308
1309	if (buf->count)
1310		return -EINVAL;
1311	d_ino = ino;
1312	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
1313		return -EOVERFLOW;
1314	buf->count++;
1315	dirent = buf->dirent;
1316	if (put_user(d_ino, &dirent->d_ino)
1317	    || put_user(offset, &dirent->d_offset)
1318	    || put_user(namlen, &dirent->d_namlen)
1319	    || copy_to_user(dirent->d_name, name, namlen)
1320	    || put_user(0, dirent->d_name + namlen))
1321		return -EFAULT;
1322	return 0;
1323}
1324
1325asmlinkage long
1326sys32_readdir (unsigned int fd, void __user *dirent, unsigned int count)
1327{
1328	int error;
1329	struct file * file;
1330	struct readdir32_callback buf;
1331
1332	error = -EBADF;
1333	file = fget(fd);
1334	if (!file)
1335		goto out;
1336
1337	buf.count = 0;
1338	buf.dirent = dirent;
1339
1340	error = vfs_readdir(file, fillonedir32, &buf);
1341	if (error >= 0)
1342		error = buf.count;
1343	fput(file);
1344out:
1345	return error;
1346}
1347
1348struct sel_arg_struct {
1349	unsigned int n;
1350	unsigned int inp;
1351	unsigned int outp;
1352	unsigned int exp;
1353	unsigned int tvp;
1354};
1355
1356asmlinkage long
1357sys32_old_select (struct sel_arg_struct __user *arg)
1358{
1359	struct sel_arg_struct a;
1360
1361	if (copy_from_user(&a, arg, sizeof(a)))
1362		return -EFAULT;
1363	return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
1364				 compat_ptr(a.exp), compat_ptr(a.tvp));
1365}
1366
1367#define SEMOP		 1
1368#define SEMGET		 2
1369#define SEMCTL		 3
1370#define SEMTIMEDOP	 4
1371#define MSGSND		11
1372#define MSGRCV		12
1373#define MSGGET		13
1374#define MSGCTL		14
1375#define SHMAT		21
1376#define SHMDT		22
1377#define SHMGET		23
1378#define SHMCTL		24
1379
1380asmlinkage long
1381sys32_ipc(u32 call, int first, int second, int third, u32 ptr, u32 fifth)
1382{
1383	int version;
1384
1385	version = call >> 16; /* hack for backward compatibility */
1386	call &= 0xffff;
1387
1388	switch (call) {
1389	      case SEMTIMEDOP:
1390		if (fifth)
1391			return compat_sys_semtimedop(first, compat_ptr(ptr),
1392				second, compat_ptr(fifth));
1393		/* else fall through for normal semop() */
1394	      case SEMOP:
1395		/* struct sembuf is the same on 32 and 64bit :)) */
1396		return sys_semtimedop(first, compat_ptr(ptr), second,
1397				      NULL);
1398	      case SEMGET:
1399		return sys_semget(first, second, third);
1400	      case SEMCTL:
1401		return compat_sys_semctl(first, second, third, compat_ptr(ptr));
1402
1403	      case MSGSND:
1404		return compat_sys_msgsnd(first, second, third, compat_ptr(ptr));
1405	      case MSGRCV:
1406		return compat_sys_msgrcv(first, second, fifth, third, version, compat_ptr(ptr));
1407	      case MSGGET:
1408		return sys_msgget((key_t) first, second);
1409	      case MSGCTL:
1410		return compat_sys_msgctl(first, second, compat_ptr(ptr));
1411
1412	      case SHMAT:
1413		return compat_sys_shmat(first, second, third, version, compat_ptr(ptr));
1414		break;
1415	      case SHMDT:
1416		return sys_shmdt(compat_ptr(ptr));
1417	      case SHMGET:
1418		return sys_shmget(first, (unsigned)second, third);
1419	      case SHMCTL:
1420		return compat_sys_shmctl(first, second, compat_ptr(ptr));
1421
1422	      default:
1423		return -ENOSYS;
1424	}
1425	return -EINVAL;
1426}
1427
1428asmlinkage long
1429compat_sys_wait4 (compat_pid_t pid, compat_uint_t * stat_addr, int options,
1430		 struct compat_rusage *ru);
1431
1432asmlinkage long
1433sys32_waitpid (int pid, unsigned int *stat_addr, int options)
1434{
1435	return compat_sys_wait4(pid, stat_addr, options, NULL);
1436}
1437
1438static unsigned int
1439ia32_peek (struct task_struct *child, unsigned long addr, unsigned int *val)
1440{
1441	size_t copied;
1442	unsigned int ret;
1443
1444	copied = access_process_vm(child, addr, val, sizeof(*val), 0);
1445	return (copied != sizeof(ret)) ? -EIO : 0;
1446}
1447
1448static unsigned int
1449ia32_poke (struct task_struct *child, unsigned long addr, unsigned int val)
1450{
1451
1452	if (access_process_vm(child, addr, &val, sizeof(val), 1) != sizeof(val))
1453		return -EIO;
1454	return 0;
1455}
1456
1457/*
1458 *  The order in which registers are stored in the ptrace regs structure
1459 */
1460#define PT_EBX	0
1461#define PT_ECX	1
1462#define PT_EDX	2
1463#define PT_ESI	3
1464#define PT_EDI	4
1465#define PT_EBP	5
1466#define PT_EAX	6
1467#define PT_DS	7
1468#define PT_ES	8
1469#define PT_FS	9
1470#define PT_GS	10
1471#define PT_ORIG_EAX 11
1472#define PT_EIP	12
1473#define PT_CS	13
1474#define PT_EFL	14
1475#define PT_UESP	15
1476#define PT_SS	16
1477
1478static unsigned int
1479getreg (struct task_struct *child, int regno)
1480{
1481	struct pt_regs *child_regs;
1482
1483	child_regs = task_pt_regs(child);
1484	switch (regno / sizeof(int)) {
1485	      case PT_EBX: return child_regs->r11;
1486	      case PT_ECX: return child_regs->r9;
1487	      case PT_EDX: return child_regs->r10;
1488	      case PT_ESI: return child_regs->r14;
1489	      case PT_EDI: return child_regs->r15;
1490	      case PT_EBP: return child_regs->r13;
1491	      case PT_EAX: return child_regs->r8;
1492	      case PT_ORIG_EAX: return child_regs->r1; /* see dispatch_to_ia32_handler() */
1493	      case PT_EIP: return child_regs->cr_iip;
1494	      case PT_UESP: return child_regs->r12;
1495	      case PT_EFL: return child->thread.eflag;
1496	      case PT_DS: case PT_ES: case PT_FS: case PT_GS: case PT_SS:
1497		return __USER_DS;
1498	      case PT_CS: return __USER_CS;
1499	      default:
1500		printk(KERN_ERR "ia32.getreg(): unknown register %d\n", regno);
1501		break;
1502	}
1503	return 0;
1504}
1505
1506static void
1507putreg (struct task_struct *child, int regno, unsigned int value)
1508{
1509	struct pt_regs *child_regs;
1510
1511	child_regs = task_pt_regs(child);
1512	switch (regno / sizeof(int)) {
1513	      case PT_EBX: child_regs->r11 = value; break;
1514	      case PT_ECX: child_regs->r9 = value; break;
1515	      case PT_EDX: child_regs->r10 = value; break;
1516	      case PT_ESI: child_regs->r14 = value; break;
1517	      case PT_EDI: child_regs->r15 = value; break;
1518	      case PT_EBP: child_regs->r13 = value; break;
1519	      case PT_EAX: child_regs->r8 = value; break;
1520	      case PT_ORIG_EAX: child_regs->r1 = value; break;
1521	      case PT_EIP: child_regs->cr_iip = value; break;
1522	      case PT_UESP: child_regs->r12 = value; break;
1523	      case PT_EFL: child->thread.eflag = value; break;
1524	      case PT_DS: case PT_ES: case PT_FS: case PT_GS: case PT_SS:
1525		if (value != __USER_DS)
1526			printk(KERN_ERR
1527			       "ia32.putreg: attempt to set invalid segment register %d = %x\n",
1528			       regno, value);
1529		break;
1530	      case PT_CS:
1531		if (value != __USER_CS)
1532			printk(KERN_ERR
1533			       "ia32.putreg: attempt to to set invalid segment register %d = %x\n",
1534			       regno, value);
1535		break;
1536	      default:
1537		printk(KERN_ERR "ia32.putreg: unknown register %d\n", regno);
1538		break;
1539	}
1540}
1541
1542static void
1543put_fpreg (int regno, struct _fpreg_ia32 __user *reg, struct pt_regs *ptp,
1544	   struct switch_stack *swp, int tos)
1545{
1546	struct _fpreg_ia32 *f;
1547	char buf[32];
1548
1549	f = (struct _fpreg_ia32 *)(((unsigned long)buf + 15) & ~15);
1550	if ((regno += tos) >= 8)
1551		regno -= 8;
1552	switch (regno) {
1553	      case 0:
1554		ia64f2ia32f(f, &ptp->f8);
1555		break;
1556	      case 1:
1557		ia64f2ia32f(f, &ptp->f9);
1558		break;
1559	      case 2:
1560		ia64f2ia32f(f, &ptp->f10);
1561		break;
1562	      case 3:
1563		ia64f2ia32f(f, &ptp->f11);
1564		break;
1565	      case 4:
1566	      case 5:
1567	      case 6:
1568	      case 7:
1569		ia64f2ia32f(f, &swp->f12 + (regno - 4));
1570		break;
1571	}
1572	copy_to_user(reg, f, sizeof(*reg));
1573}
1574
1575static void
1576get_fpreg (int regno, struct _fpreg_ia32 __user *reg, struct pt_regs *ptp,
1577	   struct switch_stack *swp, int tos)
1578{
1579
1580	if ((regno += tos) >= 8)
1581		regno -= 8;
1582	switch (regno) {
1583	      case 0:
1584		copy_from_user(&ptp->f8, reg, sizeof(*reg));
1585		break;
1586	      case 1:
1587		copy_from_user(&ptp->f9, reg, sizeof(*reg));
1588		break;
1589	      case 2:
1590		copy_from_user(&ptp->f10, reg, sizeof(*reg));
1591		break;
1592	      case 3:
1593		copy_from_user(&ptp->f11, reg, sizeof(*reg));
1594		break;
1595	      case 4:
1596	      case 5:
1597	      case 6:
1598	      case 7:
1599		copy_from_user(&swp->f12 + (regno - 4), reg, sizeof(*reg));
1600		break;
1601	}
1602	return;
1603}
1604
1605int
1606save_ia32_fpstate (struct task_struct *tsk, struct ia32_user_i387_struct __user *save)
1607{
1608	struct switch_stack *swp;
1609	struct pt_regs *ptp;
1610	int i, tos;
1611
1612	if (!access_ok(VERIFY_WRITE, save, sizeof(*save)))
1613		return -EFAULT;
1614
1615	__put_user(tsk->thread.fcr & 0xffff, &save->cwd);
1616	__put_user(tsk->thread.fsr & 0xffff, &save->swd);
1617	__put_user((tsk->thread.fsr>>16) & 0xffff, &save->twd);
1618	__put_user(tsk->thread.fir, &save->fip);
1619	__put_user((tsk->thread.fir>>32) & 0xffff, &save->fcs);
1620	__put_user(tsk->thread.fdr, &save->foo);
1621	__put_user((tsk->thread.fdr>>32) & 0xffff, &save->fos);
1622
1623	/*
1624	 *  Stack frames start with 16-bytes of temp space
1625	 */
1626	swp = (struct switch_stack *)(tsk->thread.ksp + 16);
1627	ptp = task_pt_regs(tsk);
1628	tos = (tsk->thread.fsr >> 11) & 7;
1629	for (i = 0; i < 8; i++)
1630		put_fpreg(i, &save->st_space[i], ptp, swp, tos);
1631	return 0;
1632}
1633
1634static int
1635restore_ia32_fpstate (struct task_struct *tsk, struct ia32_user_i387_struct __user *save)
1636{
1637	struct switch_stack *swp;
1638	struct pt_regs *ptp;
1639	int i, tos;
1640	unsigned int fsrlo, fsrhi, num32;
1641
1642	if (!access_ok(VERIFY_READ, save, sizeof(*save)))
1643		return(-EFAULT);
1644
1645	__get_user(num32, (unsigned int __user *)&save->cwd);
1646	tsk->thread.fcr = (tsk->thread.fcr & (~0x1f3f)) | (num32 & 0x1f3f);
1647	__get_user(fsrlo, (unsigned int __user *)&save->swd);
1648	__get_user(fsrhi, (unsigned int __user *)&save->twd);
1649	num32 = (fsrhi << 16) | fsrlo;
1650	tsk->thread.fsr = (tsk->thread.fsr & (~0xffffffff)) | num32;
1651	__get_user(num32, (unsigned int __user *)&save->fip);
1652	tsk->thread.fir = (tsk->thread.fir & (~0xffffffff)) | num32;
1653	__get_user(num32, (unsigned int __user *)&save->foo);
1654	tsk->thread.fdr = (tsk->thread.fdr & (~0xffffffff)) | num32;
1655
1656	/*
1657	 *  Stack frames start with 16-bytes of temp space
1658	 */
1659	swp = (struct switch_stack *)(tsk->thread.ksp + 16);
1660	ptp = task_pt_regs(tsk);
1661	tos = (tsk->thread.fsr >> 11) & 7;
1662	for (i = 0; i < 8; i++)
1663		get_fpreg(i, &save->st_space[i], ptp, swp, tos);
1664	return 0;
1665}
1666
1667int
1668save_ia32_fpxstate (struct task_struct *tsk, struct ia32_user_fxsr_struct __user *save)
1669{
1670	struct switch_stack *swp;
1671	struct pt_regs *ptp;
1672	int i, tos;
1673	unsigned long mxcsr=0;
1674	unsigned long num128[2];
1675
1676	if (!access_ok(VERIFY_WRITE, save, sizeof(*save)))
1677		return -EFAULT;
1678
1679	__put_user(tsk->thread.fcr & 0xffff, &save->cwd);
1680	__put_user(tsk->thread.fsr & 0xffff, &save->swd);
1681	__put_user((tsk->thread.fsr>>16) & 0xffff, &save->twd);
1682	__put_user(tsk->thread.fir, &save->fip);
1683	__put_user((tsk->thread.fir>>32) & 0xffff, &save->fcs);
1684	__put_user(tsk->thread.fdr, &save->foo);
1685	__put_user((tsk->thread.fdr>>32) & 0xffff, &save->fos);
1686
1687        /*
1688         *  Stack frames start with 16-bytes of temp space
1689         */
1690        swp = (struct switch_stack *)(tsk->thread.ksp + 16);
1691        ptp = task_pt_regs(tsk);
1692	tos = (tsk->thread.fsr >> 11) & 7;
1693        for (i = 0; i < 8; i++)
1694		put_fpreg(i, (struct _fpreg_ia32 __user *)&save->st_space[4*i], ptp, swp, tos);
1695
1696	mxcsr = ((tsk->thread.fcr>>32) & 0xff80) | ((tsk->thread.fsr>>32) & 0x3f);
1697	__put_user(mxcsr & 0xffff, &save->mxcsr);
1698	for (i = 0; i < 8; i++) {
1699		memcpy(&(num128[0]), &(swp->f16) + i*2, sizeof(unsigned long));
1700		memcpy(&(num128[1]), &(swp->f17) + i*2, sizeof(unsigned long));
1701		copy_to_user(&save->xmm_space[0] + 4*i, num128, sizeof(struct _xmmreg_ia32));
1702	}
1703	return 0;
1704}
1705
1706static int
1707restore_ia32_fpxstate (struct task_struct *tsk, struct ia32_user_fxsr_struct __user *save)
1708{
1709	struct switch_stack *swp;
1710	struct pt_regs *ptp;
1711	int i, tos;
1712	unsigned int fsrlo, fsrhi, num32;
1713	int mxcsr;
1714	unsigned long num64;
1715	unsigned long num128[2];
1716
1717	if (!access_ok(VERIFY_READ, save, sizeof(*save)))
1718		return(-EFAULT);
1719
1720	__get_user(num32, (unsigned int __user *)&save->cwd);
1721	tsk->thread.fcr = (tsk->thread.fcr & (~0x1f3f)) | (num32 & 0x1f3f);
1722	__get_user(fsrlo, (unsigned int __user *)&save->swd);
1723	__get_user(fsrhi, (unsigned int __user *)&save->twd);
1724	num32 = (fsrhi << 16) | fsrlo;
1725	tsk->thread.fsr = (tsk->thread.fsr & (~0xffffffff)) | num32;
1726	__get_user(num32, (unsigned int __user *)&save->fip);
1727	tsk->thread.fir = (tsk->thread.fir & (~0xffffffff)) | num32;
1728	__get_user(num32, (unsigned int __user *)&save->foo);
1729	tsk->thread.fdr = (tsk->thread.fdr & (~0xffffffff)) | num32;
1730
1731	/*
1732	 *  Stack frames start with 16-bytes of temp space
1733	 */
1734	swp = (struct switch_stack *)(tsk->thread.ksp + 16);
1735	ptp = task_pt_regs(tsk);
1736	tos = (tsk->thread.fsr >> 11) & 7;
1737	for (i = 0; i < 8; i++)
1738	get_fpreg(i, (struct _fpreg_ia32 __user *)&save->st_space[4*i], ptp, swp, tos);
1739
1740	__get_user(mxcsr, (unsigned int __user *)&save->mxcsr);
1741	num64 = mxcsr & 0xff10;
1742	tsk->thread.fcr = (tsk->thread.fcr & (~0xff1000000000UL)) | (num64<<32);
1743	num64 = mxcsr & 0x3f;
1744	tsk->thread.fsr = (tsk->thread.fsr & (~0x3f00000000UL)) | (num64<<32);
1745
1746	for (i = 0; i < 8; i++) {
1747		copy_from_user(num128, &save->xmm_space[0] + 4*i, sizeof(struct _xmmreg_ia32));
1748		memcpy(&(swp->f16) + i*2, &(num128[0]), sizeof(unsigned long));
1749		memcpy(&(swp->f17) + i*2, &(num128[1]), sizeof(unsigned long));
1750	}
1751	return 0;
1752}
1753
1754asmlinkage long
1755sys32_ptrace (int request, pid_t pid, unsigned int addr, unsigned int data)
1756{
1757	struct task_struct *child;
1758	unsigned int value, tmp;
1759	long i, ret;
1760
1761	lock_kernel();
1762	if (request == PTRACE_TRACEME) {
1763		ret = ptrace_traceme();
1764		goto out;
1765	}
1766
1767	child = ptrace_get_task_struct(pid);
1768	if (IS_ERR(child)) {
1769		ret = PTR_ERR(child);
1770		goto out;
1771	}
1772
1773	if (request == PTRACE_ATTACH) {
1774		ret = sys_ptrace(request, pid, addr, data);
1775		goto out_tsk;
1776	}
1777
1778	ret = ptrace_check_attach(child, request == PTRACE_KILL);
1779	if (ret < 0)
1780		goto out_tsk;
1781
1782	switch (request) {
1783	      case PTRACE_PEEKTEXT:
1784	      case PTRACE_PEEKDATA:	/* read word at location addr */
1785		ret = ia32_peek(child, addr, &value);
1786		if (ret == 0)
1787			ret = put_user(value, (unsigned int __user *) compat_ptr(data));
1788		else
1789			ret = -EIO;
1790		goto out_tsk;
1791
1792	      case PTRACE_POKETEXT:
1793	      case PTRACE_POKEDATA:	/* write the word at location addr */
1794		ret = ia32_poke(child, addr, data);
1795		goto out_tsk;
1796
1797	      case PTRACE_PEEKUSR:	/* read word at addr in USER area */
1798		ret = -EIO;
1799		if ((addr & 3) || addr > 17*sizeof(int))
1800			break;
1801
1802		tmp = getreg(child, addr);
1803		if (!put_user(tmp, (unsigned int __user *) compat_ptr(data)))
1804			ret = 0;
1805		break;
1806
1807	      case PTRACE_POKEUSR:	/* write word at addr in USER area */
1808		ret = -EIO;
1809		if ((addr & 3) || addr > 17*sizeof(int))
1810			break;
1811
1812		putreg(child, addr, data);
1813		ret = 0;
1814		break;
1815
1816	      case IA32_PTRACE_GETREGS:
1817		if (!access_ok(VERIFY_WRITE, compat_ptr(data), 17*sizeof(int))) {
1818			ret = -EIO;
1819			break;
1820		}
1821		for (i = 0; i < (int) (17*sizeof(int)); i += sizeof(int) ) {
1822			put_user(getreg(child, i), (unsigned int __user *) compat_ptr(data));
1823			data += sizeof(int);
1824		}
1825		ret = 0;
1826		break;
1827
1828	      case IA32_PTRACE_SETREGS:
1829		if (!access_ok(VERIFY_READ, compat_ptr(data), 17*sizeof(int))) {
1830			ret = -EIO;
1831			break;
1832		}
1833		for (i = 0; i < (int) (17*sizeof(int)); i += sizeof(int) ) {
1834			get_user(tmp, (unsigned int __user *) compat_ptr(data));
1835			putreg(child, i, tmp);
1836			data += sizeof(int);
1837		}
1838		ret = 0;
1839		break;
1840
1841	      case IA32_PTRACE_GETFPREGS:
1842		ret = save_ia32_fpstate(child, (struct ia32_user_i387_struct __user *)
1843					compat_ptr(data));
1844		break;
1845
1846	      case IA32_PTRACE_GETFPXREGS:
1847		ret = save_ia32_fpxstate(child, (struct ia32_user_fxsr_struct __user *)
1848					 compat_ptr(data));
1849		break;
1850
1851	      case IA32_PTRACE_SETFPREGS:
1852		ret = restore_ia32_fpstate(child, (struct ia32_user_i387_struct __user *)
1853					   compat_ptr(data));
1854		break;
1855
1856	      case IA32_PTRACE_SETFPXREGS:
1857		ret = restore_ia32_fpxstate(child, (struct ia32_user_fxsr_struct __user *)
1858					    compat_ptr(data));
1859		break;
1860
1861	      case PTRACE_GETEVENTMSG:
1862		ret = put_user(child->ptrace_message, (unsigned int __user *) compat_ptr(data));
1863		break;
1864
1865	      case PTRACE_SYSCALL:	/* continue, stop after next syscall */
1866	      case PTRACE_CONT:		/* restart after signal. */
1867	      case PTRACE_KILL:
1868	      case PTRACE_SINGLESTEP:	/* execute chile for one instruction */
1869	      case PTRACE_DETACH:	/* detach a process */
1870		ret = sys_ptrace(request, pid, addr, data);
1871		break;
1872
1873	      default:
1874		ret = ptrace_request(child, request, addr, data);
1875		break;
1876
1877	}
1878  out_tsk:
1879	put_task_struct(child);
1880  out:
1881	unlock_kernel();
1882	return ret;
1883}
1884
1885typedef struct {
1886	unsigned int	ss_sp;
1887	unsigned int	ss_flags;
1888	unsigned int	ss_size;
1889} ia32_stack_t;
1890
1891asmlinkage long
1892sys32_sigaltstack (ia32_stack_t __user *uss32, ia32_stack_t __user *uoss32,
1893		   long arg2, long arg3, long arg4, long arg5, long arg6,
1894		   long arg7, struct pt_regs pt)
1895{
1896	stack_t uss, uoss;
1897	ia32_stack_t buf32;
1898	int ret;
1899	mm_segment_t old_fs = get_fs();
1900
1901	if (uss32) {
1902		if (copy_from_user(&buf32, uss32, sizeof(ia32_stack_t)))
1903			return -EFAULT;
1904		uss.ss_sp = (void __user *) (long) buf32.ss_sp;
1905		uss.ss_flags = buf32.ss_flags;
1906		/* MINSIGSTKSZ is different for ia32 vs ia64. We lie here to pass the
1907	           check and set it to the user requested value later */
1908		if ((buf32.ss_flags != SS_DISABLE) && (buf32.ss_size < MINSIGSTKSZ_IA32)) {
1909			ret = -ENOMEM;
1910			goto out;
1911		}
1912		uss.ss_size = MINSIGSTKSZ;
1913	}
1914	set_fs(KERNEL_DS);
1915	ret = do_sigaltstack(uss32 ? (stack_t __user *) &uss : NULL,
1916			     (stack_t __user *) &uoss, pt.r12);
1917 	current->sas_ss_size = buf32.ss_size;
1918	set_fs(old_fs);
1919out:
1920	if (ret < 0)
1921		return(ret);
1922	if (uoss32) {
1923		buf32.ss_sp = (long __user) uoss.ss_sp;
1924		buf32.ss_flags = uoss.ss_flags;
1925		buf32.ss_size = uoss.ss_size;
1926		if (copy_to_user(uoss32, &buf32, sizeof(ia32_stack_t)))
1927			return -EFAULT;
1928	}
1929	return ret;
1930}
1931
1932asmlinkage int
1933sys32_pause (void)
1934{
1935	current->state = TASK_INTERRUPTIBLE;
1936	schedule();
1937	return -ERESTARTNOHAND;
1938}
1939
1940asmlinkage int
1941sys32_msync (unsigned int start, unsigned int len, int flags)
1942{
1943	unsigned int addr;
1944
1945	if (OFFSET4K(start))
1946		return -EINVAL;
1947	addr = PAGE_START(start);
1948	return sys_msync(addr, len + (start - addr), flags);
1949}
1950
1951struct sysctl32 {
1952	unsigned int	name;
1953	int		nlen;
1954	unsigned int	oldval;
1955	unsigned int	oldlenp;
1956	unsigned int	newval;
1957	unsigned int	newlen;
1958	unsigned int	__unused[4];
1959};
1960
1961#ifdef CONFIG_SYSCTL_SYSCALL
1962asmlinkage long
1963sys32_sysctl (struct sysctl32 __user *args)
1964{
1965	struct sysctl32 a32;
1966	mm_segment_t old_fs = get_fs ();
1967	void __user *oldvalp, *newvalp;
1968	size_t oldlen;
1969	int __user *namep;
1970	long ret;
1971
1972	if (copy_from_user(&a32, args, sizeof(a32)))
1973		return -EFAULT;
1974
1975	/*
1976	 * We need to pre-validate these because we have to disable address checking
1977	 * before calling do_sysctl() because of OLDLEN but we can't run the risk of the
1978	 * user specifying bad addresses here.  Well, since we're dealing with 32 bit
1979	 * addresses, we KNOW that access_ok() will always succeed, so this is an
1980	 * expensive NOP, but so what...
1981	 */
1982	namep = (int __user *) compat_ptr(a32.name);
1983	oldvalp = compat_ptr(a32.oldval);
1984	newvalp = compat_ptr(a32.newval);
1985
1986	if ((oldvalp && get_user(oldlen, (int __user *) compat_ptr(a32.oldlenp)))
1987	    || !access_ok(VERIFY_WRITE, namep, 0)
1988	    || !access_ok(VERIFY_WRITE, oldvalp, 0)
1989	    || !access_ok(VERIFY_WRITE, newvalp, 0))
1990		return -EFAULT;
1991
1992	set_fs(KERNEL_DS);
1993	lock_kernel();
1994	ret = do_sysctl(namep, a32.nlen, oldvalp, (size_t __user *) &oldlen,
1995			newvalp, (size_t) a32.newlen);
1996	unlock_kernel();
1997	set_fs(old_fs);
1998
1999	if (oldvalp && put_user (oldlen, (int __user *) compat_ptr(a32.oldlenp)))
2000		return -EFAULT;
2001
2002	return ret;
2003}
2004#endif
2005
2006asmlinkage long
2007sys32_newuname (struct new_utsname __user *name)
2008{
2009	int ret = sys_newuname(name);
2010
2011	if (!ret)
2012		if (copy_to_user(name->machine, "i686\0\0\0", 8))
2013			ret = -EFAULT;
2014	return ret;
2015}
2016
2017asmlinkage long
2018sys32_getresuid16 (u16 __user *ruid, u16 __user *euid, u16 __user *suid)
2019{
2020	uid_t a, b, c;
2021	int ret;
2022	mm_segment_t old_fs = get_fs();
2023
2024	set_fs(KERNEL_DS);
2025	ret = sys_getresuid((uid_t __user *) &a, (uid_t __user *) &b, (uid_t __user *) &c);
2026	set_fs(old_fs);
2027
2028	if (put_user(a, ruid) || put_user(b, euid) || put_user(c, suid))
2029		return -EFAULT;
2030	return ret;
2031}
2032
2033asmlinkage long
2034sys32_getresgid16 (u16 __user *rgid, u16 __user *egid, u16 __user *sgid)
2035{
2036	gid_t a, b, c;
2037	int ret;
2038	mm_segment_t old_fs = get_fs();
2039
2040	set_fs(KERNEL_DS);
2041	ret = sys_getresgid((gid_t __user *) &a, (gid_t __user *) &b, (gid_t __user *) &c);
2042	set_fs(old_fs);
2043
2044	if (ret)
2045		return ret;
2046
2047	return put_user(a, rgid) | put_user(b, egid) | put_user(c, sgid);
2048}
2049
2050asmlinkage long
2051sys32_lseek (unsigned int fd, int offset, unsigned int whence)
2052{
2053	/* Sign-extension of "offset" is important here... */
2054	return sys_lseek(fd, offset, whence);
2055}
2056
2057static int
2058groups16_to_user(short __user *grouplist, struct group_info *group_info)
2059{
2060	int i;
2061	short group;
2062
2063	for (i = 0; i < group_info->ngroups; i++) {
2064		group = (short)GROUP_AT(group_info, i);
2065		if (put_user(group, grouplist+i))
2066			return -EFAULT;
2067	}
2068
2069	return 0;
2070}
2071
2072static int
2073groups16_from_user(struct group_info *group_info, short __user *grouplist)
2074{
2075	int i;
2076	short group;
2077
2078	for (i = 0; i < group_info->ngroups; i++) {
2079		if (get_user(group, grouplist+i))
2080			return  -EFAULT;
2081		GROUP_AT(group_info, i) = (gid_t)group;
2082	}
2083
2084	return 0;
2085}
2086
2087asmlinkage long
2088sys32_getgroups16 (int gidsetsize, short __user *grouplist)
2089{
2090	int i;
2091
2092	if (gidsetsize < 0)
2093		return -EINVAL;
2094
2095	get_group_info(current->group_info);
2096	i = current->group_info->ngroups;
2097	if (gidsetsize) {
2098		if (i > gidsetsize) {
2099			i = -EINVAL;
2100			goto out;
2101		}
2102		if (groups16_to_user(grouplist, current->group_info)) {
2103			i = -EFAULT;
2104			goto out;
2105		}
2106	}
2107out:
2108	put_group_info(current->group_info);
2109	return i;
2110}
2111
2112asmlinkage long
2113sys32_setgroups16 (int gidsetsize, short __user *grouplist)
2114{
2115	struct group_info *group_info;
2116	int retval;
2117
2118	if (!capable(CAP_SETGID))
2119		return -EPERM;
2120	if ((unsigned)gidsetsize > NGROUPS_MAX)
2121		return -EINVAL;
2122
2123	group_info = groups_alloc(gidsetsize);
2124	if (!group_info)
2125		return -ENOMEM;
2126	retval = groups16_from_user(group_info, grouplist);
2127	if (retval) {
2128		put_group_info(group_info);
2129		return retval;
2130	}
2131
2132	retval = set_current_groups(group_info);
2133	put_group_info(group_info);
2134
2135	return retval;
2136}
2137
2138asmlinkage long
2139sys32_truncate64 (unsigned int path, unsigned int len_lo, unsigned int len_hi)
2140{
2141	return sys_truncate(compat_ptr(path), ((unsigned long) len_hi << 32) | len_lo);
2142}
2143
2144asmlinkage long
2145sys32_ftruncate64 (int fd, unsigned int len_lo, unsigned int len_hi)
2146{
2147	return sys_ftruncate(fd, ((unsigned long) len_hi << 32) | len_lo);
2148}
2149
2150static int
2151putstat64 (struct stat64 __user *ubuf, struct kstat *kbuf)
2152{
2153	int err;
2154	u64 hdev;
2155
2156	if (clear_user(ubuf, sizeof(*ubuf)))
2157		return -EFAULT;
2158
2159	hdev = huge_encode_dev(kbuf->dev);
2160	err  = __put_user(hdev, (u32 __user*)&ubuf->st_dev);
2161	err |= __put_user(hdev >> 32, ((u32 __user*)&ubuf->st_dev) + 1);
2162	err |= __put_user(kbuf->ino, &ubuf->__st_ino);
2163	err |= __put_user(kbuf->ino, &ubuf->st_ino_lo);
2164	err |= __put_user(kbuf->ino >> 32, &ubuf->st_ino_hi);
2165	err |= __put_user(kbuf->mode, &ubuf->st_mode);
2166	err |= __put_user(kbuf->nlink, &ubuf->st_nlink);
2167	err |= __put_user(kbuf->uid, &ubuf->st_uid);
2168	err |= __put_user(kbuf->gid, &ubuf->st_gid);
2169	hdev = huge_encode_dev(kbuf->rdev);
2170	err  = __put_user(hdev, (u32 __user*)&ubuf->st_rdev);
2171	err |= __put_user(hdev >> 32, ((u32 __user*)&ubuf->st_rdev) + 1);
2172	err |= __put_user(kbuf->size, &ubuf->st_size_lo);
2173	err |= __put_user((kbuf->size >> 32), &ubuf->st_size_hi);
2174	err |= __put_user(kbuf->atime.tv_sec, &ubuf->st_atime);
2175	err |= __put_user(kbuf->atime.tv_nsec, &ubuf->st_atime_nsec);
2176	err |= __put_user(kbuf->mtime.tv_sec, &ubuf->st_mtime);
2177	err |= __put_user(kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec);
2178	err |= __put_user(kbuf->ctime.tv_sec, &ubuf->st_ctime);
2179	err |= __put_user(kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec);
2180	err |= __put_user(kbuf->blksize, &ubuf->st_blksize);
2181	err |= __put_user(kbuf->blocks, &ubuf->st_blocks);
2182	return err;
2183}
2184
2185asmlinkage long
2186sys32_stat64 (char __user *filename, struct stat64 __user *statbuf)
2187{
2188	struct kstat s;
2189	long ret = vfs_stat(filename, &s);
2190	if (!ret)
2191		ret = putstat64(statbuf, &s);
2192	return ret;
2193}
2194
2195asmlinkage long
2196sys32_lstat64 (char __user *filename, struct stat64 __user *statbuf)
2197{
2198	struct kstat s;
2199	long ret = vfs_lstat(filename, &s);
2200	if (!ret)
2201		ret = putstat64(statbuf, &s);
2202	return ret;
2203}
2204
2205asmlinkage long
2206sys32_fstat64 (unsigned int fd, struct stat64 __user *statbuf)
2207{
2208	struct kstat s;
2209	long ret = vfs_fstat(fd, &s);
2210	if (!ret)
2211		ret = putstat64(statbuf, &s);
2212	return ret;
2213}
2214
2215asmlinkage long
2216sys32_sched_rr_get_interval (pid_t pid, struct compat_timespec __user *interval)
2217{
2218	mm_segment_t old_fs = get_fs();
2219	struct timespec t;
2220	long ret;
2221
2222	set_fs(KERNEL_DS);
2223	ret = sys_sched_rr_get_interval(pid, (struct timespec __user *) &t);
2224	set_fs(old_fs);
2225	if (put_compat_timespec(&t, interval))
2226		return -EFAULT;
2227	return ret;
2228}
2229
2230asmlinkage long
2231sys32_pread (unsigned int fd, void __user *buf, unsigned int count, u32 pos_lo, u32 pos_hi)
2232{
2233	return sys_pread64(fd, buf, count, ((unsigned long) pos_hi << 32) | pos_lo);
2234}
2235
2236asmlinkage long
2237sys32_pwrite (unsigned int fd, void __user *buf, unsigned int count, u32 pos_lo, u32 pos_hi)
2238{
2239	return sys_pwrite64(fd, buf, count, ((unsigned long) pos_hi << 32) | pos_lo);
2240}
2241
2242asmlinkage long
2243sys32_sendfile (int out_fd, int in_fd, int __user *offset, unsigned int count)
2244{
2245	mm_segment_t old_fs = get_fs();
2246	long ret;
2247	off_t of;
2248
2249	if (offset && get_user(of, offset))
2250		return -EFAULT;
2251
2252	set_fs(KERNEL_DS);
2253	ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *) &of : NULL, count);
2254	set_fs(old_fs);
2255
2256	if (offset && put_user(of, offset))
2257		return -EFAULT;
2258
2259	return ret;
2260}
2261
2262asmlinkage long
2263sys32_personality (unsigned int personality)
2264{
2265	long ret;
2266
2267	if (current->personality == PER_LINUX32 && personality == PER_LINUX)
2268		personality = PER_LINUX32;
2269	ret = sys_personality(personality);
2270	if (ret == PER_LINUX32)
2271		ret = PER_LINUX;
2272	return ret;
2273}
2274
2275asmlinkage unsigned long
2276sys32_brk (unsigned int brk)
2277{
2278	unsigned long ret, obrk;
2279	struct mm_struct *mm = current->mm;
2280
2281	obrk = mm->brk;
2282	ret = sys_brk(brk);
2283	if (ret < obrk)
2284		clear_user(compat_ptr(ret), PAGE_ALIGN(ret) - ret);
2285	return ret;
2286}
2287
2288/* Structure for ia32 emulation on ia64 */
2289struct epoll_event32
2290{
2291	u32 events;
2292	u32 data[2];
2293};
2294
2295asmlinkage long
2296sys32_epoll_ctl(int epfd, int op, int fd, struct epoll_event32 __user *event)
2297{
2298	mm_segment_t old_fs = get_fs();
2299	struct epoll_event event64;
2300	int error;
2301	u32 data_halfword;
2302
2303	if (!access_ok(VERIFY_READ, event, sizeof(struct epoll_event32)))
2304		return -EFAULT;
2305
2306	__get_user(event64.events, &event->events);
2307	__get_user(data_halfword, &event->data[0]);
2308	event64.data = data_halfword;
2309	__get_user(data_halfword, &event->data[1]);
2310 	event64.data |= (u64)data_halfword << 32;
2311
2312	set_fs(KERNEL_DS);
2313	error = sys_epoll_ctl(epfd, op, fd, (struct epoll_event __user *) &event64);
2314	set_fs(old_fs);
2315
2316	return error;
2317}
2318
2319asmlinkage long
2320sys32_epoll_wait(int epfd, struct epoll_event32 __user * events, int maxevents,
2321		 int timeout)
2322{
2323	struct epoll_event *events64 = NULL;
2324	mm_segment_t old_fs = get_fs();
2325	int numevents, size;
2326	int evt_idx;
2327	int do_free_pages = 0;
2328
2329	if (maxevents <= 0) {
2330		return -EINVAL;
2331	}
2332
2333	/* Verify that the area passed by the user is writeable */
2334	if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event32)))
2335		return -EFAULT;
2336
2337	/*
2338 	 * Allocate space for the intermediate copy.  If the space needed
2339	 * is large enough to cause kmalloc to fail, then try again with
2340	 * __get_free_pages.
2341	 */
2342	size = maxevents * sizeof(struct epoll_event);
2343	events64 = kmalloc(size, GFP_KERNEL);
2344	if (events64 == NULL) {
2345		events64 = (struct epoll_event *)
2346				__get_free_pages(GFP_KERNEL, get_order(size));
2347		if (events64 == NULL)
2348			return -ENOMEM;
2349		do_free_pages = 1;
2350	}
2351
2352	/* Do the system call */
2353	set_fs(KERNEL_DS); /* copy_to/from_user should work on kernel mem*/
2354	numevents = sys_epoll_wait(epfd, (struct epoll_event __user *) events64,
2355				   maxevents, timeout);
2356	set_fs(old_fs);
2357
2358	/* Don't modify userspace memory if we're returning an error */
2359	if (numevents > 0) {
2360		/* Translate the 64-bit structures back into the 32-bit
2361		   structures */
2362		for (evt_idx = 0; evt_idx < numevents; evt_idx++) {
2363			__put_user(events64[evt_idx].events,
2364				   &events[evt_idx].events);
2365			__put_user((u32)events64[evt_idx].data,
2366				   &events[evt_idx].data[0]);
2367			__put_user((u32)(events64[evt_idx].data >> 32),
2368				   &events[evt_idx].data[1]);
2369		}
2370	}
2371
2372	if (do_free_pages)
2373		free_pages((unsigned long) events64, get_order(size));
2374	else
2375		kfree(events64);
2376	return numevents;
2377}
2378
2379/*
2380 * Get a yet unused TLS descriptor index.
2381 */
2382static int
2383get_free_idx (void)
2384{
2385	struct thread_struct *t = &current->thread;
2386	int idx;
2387
2388	for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
2389		if (desc_empty(t->tls_array + idx))
2390			return idx + GDT_ENTRY_TLS_MIN;
2391	return -ESRCH;
2392}
2393
2394/*
2395 * Set a given TLS descriptor:
2396 */
2397asmlinkage int
2398sys32_set_thread_area (struct ia32_user_desc __user *u_info)
2399{
2400	struct thread_struct *t = &current->thread;
2401	struct ia32_user_desc info;
2402	struct desc_struct *desc;
2403	int cpu, idx;
2404
2405	if (copy_from_user(&info, u_info, sizeof(info)))
2406		return -EFAULT;
2407	idx = info.entry_number;
2408
2409	/*
2410	 * index -1 means the kernel should try to find and allocate an empty descriptor:
2411	 */
2412	if (idx == -1) {
2413		idx = get_free_idx();
2414		if (idx < 0)
2415			return idx;
2416		if (put_user(idx, &u_info->entry_number))
2417			return -EFAULT;
2418	}
2419
2420	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
2421		return -EINVAL;
2422
2423	desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
2424
2425	cpu = smp_processor_id();
2426
2427	if (LDT_empty(&info)) {
2428		desc->a = 0;
2429		desc->b = 0;
2430	} else {
2431		desc->a = LDT_entry_a(&info);
2432		desc->b = LDT_entry_b(&info);
2433	}
2434	load_TLS(t, cpu);
2435	return 0;
2436}
2437
2438/*
2439 * Get the current Thread-Local Storage area:
2440 */
2441
2442#define GET_BASE(desc) (			\
2443	(((desc)->a >> 16) & 0x0000ffff) |	\
2444	(((desc)->b << 16) & 0x00ff0000) |	\
2445	( (desc)->b        & 0xff000000)   )
2446
2447#define GET_LIMIT(desc) (			\
2448	((desc)->a & 0x0ffff) |			\
2449	 ((desc)->b & 0xf0000) )
2450
2451#define GET_32BIT(desc)		(((desc)->b >> 22) & 1)
2452#define GET_CONTENTS(desc)	(((desc)->b >> 10) & 3)
2453#define GET_WRITABLE(desc)	(((desc)->b >>  9) & 1)
2454#define GET_LIMIT_PAGES(desc)	(((desc)->b >> 23) & 1)
2455#define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
2456#define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
2457
2458asmlinkage int
2459sys32_get_thread_area (struct ia32_user_desc __user *u_info)
2460{
2461	struct ia32_user_desc info;
2462	struct desc_struct *desc;
2463	int idx;
2464
2465	if (get_user(idx, &u_info->entry_number))
2466		return -EFAULT;
2467	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
2468		return -EINVAL;
2469
2470	desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
2471
2472	info.entry_number = idx;
2473	info.base_addr = GET_BASE(desc);
2474	info.limit = GET_LIMIT(desc);
2475	info.seg_32bit = GET_32BIT(desc);
2476	info.contents = GET_CONTENTS(desc);
2477	info.read_exec_only = !GET_WRITABLE(desc);
2478	info.limit_in_pages = GET_LIMIT_PAGES(desc);
2479	info.seg_not_present = !GET_PRESENT(desc);
2480	info.useable = GET_USEABLE(desc);
2481
2482	if (copy_to_user(u_info, &info, sizeof(info)))
2483		return -EFAULT;
2484	return 0;
2485}
2486
2487long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
2488			__u32 len_low, __u32 len_high, int advice)
2489{
2490	return sys_fadvise64_64(fd,
2491			       (((u64)offset_high)<<32) | offset_low,
2492			       (((u64)len_high)<<32) | len_low,
2493			       advice);
2494}
2495
2496#ifdef	NOTYET      /* UNTESTED FOR IA64 FROM HERE DOWN */
2497
2498asmlinkage long sys32_setreuid(compat_uid_t ruid, compat_uid_t euid)
2499{
2500	uid_t sruid, seuid;
2501
2502	sruid = (ruid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)ruid);
2503	seuid = (euid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)euid);
2504	return sys_setreuid(sruid, seuid);
2505}
2506
2507asmlinkage long
2508sys32_setresuid(compat_uid_t ruid, compat_uid_t euid,
2509		compat_uid_t suid)
2510{
2511	uid_t sruid, seuid, ssuid;
2512
2513	sruid = (ruid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)ruid);
2514	seuid = (euid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)euid);
2515	ssuid = (suid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)suid);
2516	return sys_setresuid(sruid, seuid, ssuid);
2517}
2518
2519asmlinkage long
2520sys32_setregid(compat_gid_t rgid, compat_gid_t egid)
2521{
2522	gid_t srgid, segid;
2523
2524	srgid = (rgid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)rgid);
2525	segid = (egid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)egid);
2526	return sys_setregid(srgid, segid);
2527}
2528
2529asmlinkage long
2530sys32_setresgid(compat_gid_t rgid, compat_gid_t egid,
2531		compat_gid_t sgid)
2532{
2533	gid_t srgid, segid, ssgid;
2534
2535	srgid = (rgid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)rgid);
2536	segid = (egid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)egid);
2537	ssgid = (sgid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)sgid);
2538	return sys_setresgid(srgid, segid, ssgid);
2539}
2540#endif /* NOTYET */
2541