sys_pipe.c revision 121256
113675Sdyson/*
213675Sdyson * Copyright (c) 1996 John S. Dyson
313675Sdyson * All rights reserved.
413675Sdyson *
513675Sdyson * Redistribution and use in source and binary forms, with or without
613675Sdyson * modification, are permitted provided that the following conditions
713675Sdyson * are met:
813675Sdyson * 1. Redistributions of source code must retain the above copyright
913675Sdyson *    notice immediately at the beginning of the file, without modification,
1013675Sdyson *    this list of conditions, and the following disclaimer.
1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright
1213675Sdyson *    notice, this list of conditions and the following disclaimer in the
1313675Sdyson *    documentation and/or other materials provided with the distribution.
1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author
1513675Sdyson *    John S. Dyson.
1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions
1713675Sdyson *    are met.
1813675Sdyson */
1913675Sdyson
2013675Sdyson/*
2113675Sdyson * This file contains a high-performance replacement for the socket-based
2213675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
2313675Sdyson * all features of sockets, but does do everything that pipes normally
2413675Sdyson * do.
2513675Sdyson */
2613675Sdyson
2713907Sdyson/*
2813907Sdyson * This code has two modes of operation, a small write mode and a large
2913907Sdyson * write mode.  The small write mode acts like conventional pipes with
3013907Sdyson * a kernel buffer.  If the buffer is less than PIPE_MINDIRECT, then the
3113907Sdyson * "normal" pipe buffering is done.  If the buffer is between PIPE_MINDIRECT
3213907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and
3313907Sdyson * the receiving process can copy it directly from the pages in the sending
3413907Sdyson * process.
3513907Sdyson *
3613907Sdyson * If the sending process receives a signal, it is possible that it will
3713913Sdyson * go away, and certainly its address space can change, because control
3813907Sdyson * is returned back to the user-mode side.  In that case, the pipe code
3913907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable
4013907Sdyson * kernel buffer, and the receiving process will grab the data from the
4113907Sdyson * pageable kernel buffer.  Since signals don't happen all that often,
4213907Sdyson * the copy operation is normally eliminated.
4313907Sdyson *
4413907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
4513907Sdyson * happen for small transfers so that the system will not spend all of
46118764Ssilby * its time context switching.
47117325Ssilby *
48118764Ssilby * In order to limit the resource use of pipes, two sysctls exist:
49117325Ssilby *
50118764Ssilby * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
51118764Ssilby * address space available to us in pipe_map.  Whenever the amount in use
52118764Ssilby * exceeds half of this value, all new pipes will be created with size
53118764Ssilby * SMALL_PIPE_SIZE, rather than PIPE_SIZE.  Big pipe creation will be limited
54118764Ssilby * as well.  This value is loader tunable only.
55117325Ssilby *
56117325Ssilby * kern.ipc.maxpipekvawired - This value limits the amount of memory that may
57117325Ssilby * be wired in order to facilitate direct copies using page flipping.
58117325Ssilby * Whenever this value is exceeded, pipes will fall back to using regular
59118764Ssilby * copies.  This value is sysctl controllable at all times.
60117325Ssilby *
61117325Ssilby * These values are autotuned in subr_param.c.
62117325Ssilby *
63117325Ssilby * Memory usage may be monitored through the sysctls
64117325Ssilby * kern.ipc.pipes, kern.ipc.pipekva and kern.ipc.pipekvawired.
65117325Ssilby *
6613907Sdyson */
6713907Sdyson
68116182Sobrien#include <sys/cdefs.h>
69116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sys_pipe.c 121256 2003-10-19 20:41:07Z dwmalone $");
70116182Sobrien
71101768Srwatson#include "opt_mac.h"
72101768Srwatson
7313675Sdyson#include <sys/param.h>
7413675Sdyson#include <sys/systm.h>
7524131Sbde#include <sys/fcntl.h>
7613675Sdyson#include <sys/file.h>
7713675Sdyson#include <sys/filedesc.h>
7824206Sbde#include <sys/filio.h>
7991372Salfred#include <sys/kernel.h>
8076166Smarkm#include <sys/lock.h>
81101768Srwatson#include <sys/mac.h>
8276827Salfred#include <sys/mutex.h>
8324206Sbde#include <sys/ttycom.h>
8413675Sdyson#include <sys/stat.h>
8591968Salfred#include <sys/malloc.h>
8629356Speter#include <sys/poll.h>
8770834Swollman#include <sys/selinfo.h>
8813675Sdyson#include <sys/signalvar.h>
89117325Ssilby#include <sys/sysctl.h>
9013675Sdyson#include <sys/sysproto.h>
9113675Sdyson#include <sys/pipe.h>
9276166Smarkm#include <sys/proc.h>
9355112Sbde#include <sys/vnode.h>
9434924Sbde#include <sys/uio.h>
9559288Sjlemon#include <sys/event.h>
9613675Sdyson
9713675Sdyson#include <vm/vm.h>
9813675Sdyson#include <vm/vm_param.h>
9913675Sdyson#include <vm/vm_object.h>
10013675Sdyson#include <vm/vm_kern.h>
10113675Sdyson#include <vm/vm_extern.h>
10213675Sdyson#include <vm/pmap.h>
10313675Sdyson#include <vm/vm_map.h>
10413907Sdyson#include <vm/vm_page.h>
10592751Sjeff#include <vm/uma.h>
10613675Sdyson
10714037Sdyson/*
10814037Sdyson * Use this define if you want to disable *fancy* VM things.  Expect an
10914037Sdyson * approx 30% decrease in transfer rate.  This could be useful for
11014037Sdyson * NetBSD or OpenBSD.
11114037Sdyson */
11214037Sdyson/* #define PIPE_NODIRECT */
11314037Sdyson
11414037Sdyson/*
11514037Sdyson * interfaces to the outside world
11614037Sdyson */
117108255Sphkstatic fo_rdwr_t	pipe_read;
118108255Sphkstatic fo_rdwr_t	pipe_write;
119108255Sphkstatic fo_ioctl_t	pipe_ioctl;
120108255Sphkstatic fo_poll_t	pipe_poll;
121108255Sphkstatic fo_kqfilter_t	pipe_kqfilter;
122108255Sphkstatic fo_stat_t	pipe_stat;
123108255Sphkstatic fo_close_t	pipe_close;
12413675Sdyson
12572521Sjlemonstatic struct fileops pipeops = {
126116546Sphk	.fo_read = pipe_read,
127116546Sphk	.fo_write = pipe_write,
128116546Sphk	.fo_ioctl = pipe_ioctl,
129116546Sphk	.fo_poll = pipe_poll,
130116546Sphk	.fo_kqfilter = pipe_kqfilter,
131116546Sphk	.fo_stat = pipe_stat,
132116546Sphk	.fo_close = pipe_close,
133116546Sphk	.fo_flags = DFLAG_PASSABLE
13472521Sjlemon};
13513675Sdyson
13659288Sjlemonstatic void	filt_pipedetach(struct knote *kn);
13759288Sjlemonstatic int	filt_piperead(struct knote *kn, long hint);
13859288Sjlemonstatic int	filt_pipewrite(struct knote *kn, long hint);
13959288Sjlemon
14072521Sjlemonstatic struct filterops pipe_rfiltops =
14172521Sjlemon	{ 1, NULL, filt_pipedetach, filt_piperead };
14272521Sjlemonstatic struct filterops pipe_wfiltops =
14372521Sjlemon	{ 1, NULL, filt_pipedetach, filt_pipewrite };
14459288Sjlemon
14513675Sdyson/*
14613675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe
14713675Sdyson * space is pageable.  The pipe code will try to maintain locality of
14813675Sdyson * reference for performance reasons, so small amounts of outstanding I/O
14913675Sdyson * will not wipe the cache.
15013675Sdyson */
15113907Sdyson#define MINPIPESIZE (PIPE_SIZE/3)
15213907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3)
15313675Sdyson
15413907Sdyson/*
15517163Sdyson * Limit the number of "big" pipes
15617163Sdyson */
15717163Sdyson#define LIMITBIGPIPES	32
15833181Seivindstatic int nbigpipe;
15917163Sdyson
160117325Ssilbystatic int amountpipes;
16117124Sbdestatic int amountpipekva;
162117325Ssilbystatic int amountpipekvawired;
16313907Sdyson
164117325SsilbySYSCTL_DECL(_kern_ipc);
165117325Ssilby
166118764SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD,
167117325Ssilby	   &maxpipekva, 0, "Pipe KVA limit");
168117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW,
169117325Ssilby	   &maxpipekvawired, 0, "Pipe KVA wired limit");
170117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD,
171117364Ssilby	   &amountpipes, 0, "Current # of pipes");
172117364SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD,
173117364Ssilby	   &nbigpipe, 0, "Current # of big pipes");
174117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD,
175117325Ssilby	   &amountpipekva, 0, "Pipe KVA usage");
176117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD,
177117325Ssilby	   &amountpipekvawired, 0, "Pipe wired KVA usage");
178117325Ssilby
17991413Salfredstatic void pipeinit(void *dummy __unused);
18091413Salfredstatic void pipeclose(struct pipe *cpipe);
18191413Salfredstatic void pipe_free_kmem(struct pipe *cpipe);
18291413Salfredstatic int pipe_create(struct pipe **cpipep);
18391413Salfredstatic __inline int pipelock(struct pipe *cpipe, int catch);
18491413Salfredstatic __inline void pipeunlock(struct pipe *cpipe);
18591413Salfredstatic __inline void pipeselwakeup(struct pipe *cpipe);
18614037Sdyson#ifndef PIPE_NODIRECT
18791413Salfredstatic int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio);
18891413Salfredstatic void pipe_destroy_write_buffer(struct pipe *wpipe);
18991413Salfredstatic int pipe_direct_write(struct pipe *wpipe, struct uio *uio);
19091413Salfredstatic void pipe_clone_write_buffer(struct pipe *wpipe);
19114037Sdyson#endif
19291413Salfredstatic int pipespace(struct pipe *cpipe, int size);
19313675Sdyson
19492751Sjeffstatic uma_zone_t pipe_zone;
19527899Sdyson
19691372SalfredSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
19791372Salfred
19891372Salfredstatic void
19991372Salfredpipeinit(void *dummy __unused)
20091372Salfred{
201118880Salc
20292654Sjeff	pipe_zone = uma_zcreate("PIPE", sizeof(struct pipe), NULL,
20392654Sjeff	    NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
204118880Salc	KASSERT(pipe_zone != NULL, ("pipe_zone not initialized"));
20591372Salfred}
20691372Salfred
20713675Sdyson/*
20813675Sdyson * The pipe system call for the DTYPE_PIPE type of pipes
20913675Sdyson */
21013675Sdyson
21113675Sdyson/* ARGSUSED */
21213675Sdysonint
21383366Sjulianpipe(td, uap)
21483366Sjulian	struct thread *td;
21513675Sdyson	struct pipe_args /* {
21613675Sdyson		int	dummy;
21713675Sdyson	} */ *uap;
21813675Sdyson{
21983366Sjulian	struct filedesc *fdp = td->td_proc->p_fd;
22013675Sdyson	struct file *rf, *wf;
22113675Sdyson	struct pipe *rpipe, *wpipe;
22291968Salfred	struct mtx *pmtx;
22313675Sdyson	int fd, error;
22427899Sdyson
225111119Simp	pmtx = malloc(sizeof(*pmtx), M_TEMP, M_WAITOK | M_ZERO);
22691968Salfred
22776756Salfred	rpipe = wpipe = NULL;
22876364Salfred	if (pipe_create(&rpipe) || pipe_create(&wpipe)) {
22976364Salfred		pipeclose(rpipe);
23076364Salfred		pipeclose(wpipe);
23191968Salfred		free(pmtx, M_TEMP);
23276364Salfred		return (ENFILE);
23376364Salfred	}
23476364Salfred
23513907Sdyson	rpipe->pipe_state |= PIPE_DIRECTOK;
23613907Sdyson	wpipe->pipe_state |= PIPE_DIRECTOK;
23713675Sdyson
23883366Sjulian	error = falloc(td, &rf, &fd);
23970915Sdwmalone	if (error) {
24070915Sdwmalone		pipeclose(rpipe);
24170915Sdwmalone		pipeclose(wpipe);
24291968Salfred		free(pmtx, M_TEMP);
24370915Sdwmalone		return (error);
24470915Sdwmalone	}
245121256Sdwmalone	/* An extra reference on `rf' has been held for us by falloc(). */
24683366Sjulian	td->td_retval[0] = fd;
24770915Sdwmalone
24870803Sdwmalone	/*
24970803Sdwmalone	 * Warning: once we've gotten past allocation of the fd for the
25070803Sdwmalone	 * read-side, we can only drop the read side via fdrop() in order
25170803Sdwmalone	 * to avoid races against processes which manage to dup() the read
25270803Sdwmalone	 * side while we are blocked trying to allocate the write side.
25370803Sdwmalone	 */
25489306Salfred	FILE_LOCK(rf);
25513675Sdyson	rf->f_flag = FREAD | FWRITE;
25613675Sdyson	rf->f_type = DTYPE_PIPE;
257109153Sdillon	rf->f_data = rpipe;
25813675Sdyson	rf->f_ops = &pipeops;
25989306Salfred	FILE_UNLOCK(rf);
26083366Sjulian	error = falloc(td, &wf, &fd);
26170915Sdwmalone	if (error) {
26289306Salfred		FILEDESC_LOCK(fdp);
26383366Sjulian		if (fdp->fd_ofiles[td->td_retval[0]] == rf) {
26483366Sjulian			fdp->fd_ofiles[td->td_retval[0]] = NULL;
26589306Salfred			FILEDESC_UNLOCK(fdp);
26683366Sjulian			fdrop(rf, td);
26789306Salfred		} else
26889306Salfred			FILEDESC_UNLOCK(fdp);
26983366Sjulian		fdrop(rf, td);
27070915Sdwmalone		/* rpipe has been closed by fdrop(). */
27170915Sdwmalone		pipeclose(wpipe);
27291968Salfred		free(pmtx, M_TEMP);
27370915Sdwmalone		return (error);
27470915Sdwmalone	}
275121256Sdwmalone	/* An extra reference on `wf' has been held for us by falloc(). */
27689306Salfred	FILE_LOCK(wf);
27713675Sdyson	wf->f_flag = FREAD | FWRITE;
27813675Sdyson	wf->f_type = DTYPE_PIPE;
279109153Sdillon	wf->f_data = wpipe;
28013675Sdyson	wf->f_ops = &pipeops;
28189306Salfred	FILE_UNLOCK(wf);
282121256Sdwmalone	fdrop(wf, td);
28383366Sjulian	td->td_retval[1] = fd;
28413675Sdyson	rpipe->pipe_peer = wpipe;
28513675Sdyson	wpipe->pipe_peer = rpipe;
286101768Srwatson#ifdef MAC
287101768Srwatson	/*
288101768Srwatson	 * struct pipe represents a pipe endpoint.  The MAC label is shared
289101768Srwatson	 * between the connected endpoints.  As a result mac_init_pipe() and
290101768Srwatson	 * mac_create_pipe() should only be called on one of the endpoints
291101768Srwatson	 * after they have been connected.
292101768Srwatson	 */
293101768Srwatson	mac_init_pipe(rpipe);
294101768Srwatson	mac_create_pipe(td->td_ucred, rpipe);
295101768Srwatson#endif
29693818Sjhb	mtx_init(pmtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE);
29791968Salfred	rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
29883366Sjulian	fdrop(rf, td);
29913675Sdyson
30013675Sdyson	return (0);
30113675Sdyson}
30213675Sdyson
30313909Sdyson/*
30413909Sdyson * Allocate kva for pipe circular buffer, the space is pageable
30576364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails
30676364Salfred * it will retain the old buffer.
30776364Salfred * If it fails it will return ENOMEM.
30813909Sdyson */
30976364Salfredstatic int
31076364Salfredpipespace(cpipe, size)
31113675Sdyson	struct pipe *cpipe;
31276364Salfred	int size;
31313675Sdyson{
31476364Salfred	struct vm_object *object;
31576364Salfred	caddr_t buffer;
31613688Sdyson	int npages, error;
317117325Ssilby	static int curfail = 0;
318117325Ssilby	static struct timeval lastfail;
31913675Sdyson
32091412Salfred	KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
32191412Salfred	       ("pipespace: pipe mutex locked"));
32279224Sdillon
323118764Ssilby	size = round_page(size);
324118764Ssilby	npages = size / PAGE_SIZE;
32513675Sdyson	/*
32613675Sdyson	 * Create an object, I don't like the idea of paging to/from
32713675Sdyson	 * kernel_object.
32814037Sdyson	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
32913675Sdyson	 */
33076364Salfred	object = vm_object_allocate(OBJT_DEFAULT, npages);
331118764Ssilby	buffer = (caddr_t) vm_map_min(pipe_map);
33213675Sdyson
33313675Sdyson	/*
33413675Sdyson	 * Insert the object into the kernel map, and allocate kva for it.
33513675Sdyson	 * The map entry is, by default, pageable.
33614037Sdyson	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
33713675Sdyson	 */
338118764Ssilby	error = vm_map_find(pipe_map, object, 0,
33976364Salfred		(vm_offset_t *) &buffer, size, 1,
34013688Sdyson		VM_PROT_ALL, VM_PROT_ALL, 0);
34113675Sdyson
34276364Salfred	if (error != KERN_SUCCESS) {
34376364Salfred		vm_object_deallocate(object);
344118764Ssilby		if (ppsratecheck(&lastfail, &curfail, 1))
345118764Ssilby			printf("kern.maxpipekva exceeded, please see tuning(7).\n");
34676364Salfred		return (ENOMEM);
34776364Salfred	}
34876364Salfred
34976364Salfred	/* free old resources if we're resizing */
35076364Salfred	pipe_free_kmem(cpipe);
35176364Salfred	cpipe->pipe_buffer.buffer = buffer;
35276364Salfred	cpipe->pipe_buffer.size = size;
35376364Salfred	cpipe->pipe_buffer.in = 0;
35476364Salfred	cpipe->pipe_buffer.out = 0;
35576364Salfred	cpipe->pipe_buffer.cnt = 0;
356117325Ssilby	atomic_add_int(&amountpipes, 1);
357110816Salc	atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size);
35876364Salfred	return (0);
35913907Sdyson}
36013688Sdyson
36113907Sdyson/*
36213907Sdyson * initialize and allocate VM and memory for pipe
36313907Sdyson */
36476364Salfredstatic int
36576364Salfredpipe_create(cpipep)
36676364Salfred	struct pipe **cpipep;
36776364Salfred{
36813907Sdyson	struct pipe *cpipe;
36976364Salfred	int error;
37013907Sdyson
371111119Simp	*cpipep = uma_zalloc(pipe_zone, M_WAITOK);
37276364Salfred	if (*cpipep == NULL)
37376364Salfred		return (ENOMEM);
37417163Sdyson
37576364Salfred	cpipe = *cpipep;
37676364Salfred
37776364Salfred	/*
37876364Salfred	 * protect so pipeclose() doesn't follow a junk pointer
37976364Salfred	 * if pipespace() fails.
38076364Salfred	 */
38176754Salfred	bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel));
38213675Sdyson	cpipe->pipe_state = 0;
38313675Sdyson	cpipe->pipe_peer = NULL;
38413675Sdyson	cpipe->pipe_busy = 0;
38513907Sdyson
38614037Sdyson#ifndef PIPE_NODIRECT
38713907Sdyson	/*
38813907Sdyson	 * pipe data structure initializations to support direct pipe I/O
38913907Sdyson	 */
39013907Sdyson	cpipe->pipe_map.cnt = 0;
39113907Sdyson	cpipe->pipe_map.kva = 0;
39213907Sdyson	cpipe->pipe_map.pos = 0;
39313907Sdyson	cpipe->pipe_map.npages = 0;
39417124Sbde	/* cpipe->pipe_map.ms[] = invalid */
39514037Sdyson#endif
39676364Salfred
39791412Salfred	cpipe->pipe_mtxp = NULL;	/* avoid pipespace assertion */
398117325Ssilby	/*
399117325Ssilby	 * Reduce to 1/4th pipe size if we're over our global max.
400117325Ssilby	 */
401118764Ssilby	if (amountpipekva > maxpipekva / 2)
402117325Ssilby		error = pipespace(cpipe, SMALL_PIPE_SIZE);
403117325Ssilby	else
404117325Ssilby		error = pipespace(cpipe, PIPE_SIZE);
40576760Salfred	if (error)
40676364Salfred		return (error);
40776364Salfred
40876364Salfred	vfs_timestamp(&cpipe->pipe_ctime);
40976364Salfred	cpipe->pipe_atime = cpipe->pipe_ctime;
41076364Salfred	cpipe->pipe_mtime = cpipe->pipe_ctime;
41176364Salfred
41276364Salfred	return (0);
41313675Sdyson}
41413675Sdyson
41513675Sdyson
41613675Sdyson/*
41713675Sdyson * lock a pipe for I/O, blocking other access
41813675Sdyson */
41913675Sdysonstatic __inline int
42013907Sdysonpipelock(cpipe, catch)
42113675Sdyson	struct pipe *cpipe;
42213907Sdyson	int catch;
42313675Sdyson{
42413776Sdyson	int error;
42576364Salfred
42691362Salfred	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
42791362Salfred	while (cpipe->pipe_state & PIPE_LOCKFL) {
42813675Sdyson		cpipe->pipe_state |= PIPE_LWANT;
42991362Salfred		error = msleep(cpipe, PIPE_MTX(cpipe),
43091362Salfred		    catch ? (PRIBIO | PCATCH) : PRIBIO,
43176760Salfred		    "pipelk", 0);
43276760Salfred		if (error != 0)
43376760Salfred			return (error);
43413675Sdyson	}
43591362Salfred	cpipe->pipe_state |= PIPE_LOCKFL;
43676760Salfred	return (0);
43713675Sdyson}
43813675Sdyson
43913675Sdyson/*
44013675Sdyson * unlock a pipe I/O lock
44113675Sdyson */
44213675Sdysonstatic __inline void
44313675Sdysonpipeunlock(cpipe)
44413675Sdyson	struct pipe *cpipe;
44513675Sdyson{
44676364Salfred
44791362Salfred	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
44891362Salfred	cpipe->pipe_state &= ~PIPE_LOCKFL;
44913675Sdyson	if (cpipe->pipe_state & PIPE_LWANT) {
45013675Sdyson		cpipe->pipe_state &= ~PIPE_LWANT;
45114177Sdyson		wakeup(cpipe);
45213675Sdyson	}
45313675Sdyson}
45413675Sdyson
45514037Sdysonstatic __inline void
45614037Sdysonpipeselwakeup(cpipe)
45714037Sdyson	struct pipe *cpipe;
45814037Sdyson{
45976364Salfred
46014037Sdyson	if (cpipe->pipe_state & PIPE_SEL) {
46114037Sdyson		cpipe->pipe_state &= ~PIPE_SEL;
46214037Sdyson		selwakeup(&cpipe->pipe_sel);
46314037Sdyson	}
46441086Struckman	if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio)
46595883Salfred		pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
46659288Sjlemon	KNOTE(&cpipe->pipe_sel.si_note, 0);
46714037Sdyson}
46814037Sdyson
46913675Sdyson/* ARGSUSED */
47013675Sdysonstatic int
471101941Srwatsonpipe_read(fp, uio, active_cred, flags, td)
47213675Sdyson	struct file *fp;
47313675Sdyson	struct uio *uio;
474101941Srwatson	struct ucred *active_cred;
47583366Sjulian	struct thread *td;
47645311Sdt	int flags;
47713675Sdyson{
478109153Sdillon	struct pipe *rpipe = fp->f_data;
47947748Salc	int error;
48013675Sdyson	int nread = 0;
48118863Sdyson	u_int size;
48213675Sdyson
48391362Salfred	PIPE_LOCK(rpipe);
48413675Sdyson	++rpipe->pipe_busy;
48547748Salc	error = pipelock(rpipe, 1);
48647748Salc	if (error)
48747748Salc		goto unlocked_error;
48847748Salc
489101768Srwatson#ifdef MAC
490102115Srwatson	error = mac_check_pipe_read(active_cred, rpipe);
491101768Srwatson	if (error)
492101768Srwatson		goto locked_error;
493101768Srwatson#endif
494101768Srwatson
49513675Sdyson	while (uio->uio_resid) {
49613907Sdyson		/*
49713907Sdyson		 * normal pipe buffer receive
49813907Sdyson		 */
49913675Sdyson		if (rpipe->pipe_buffer.cnt > 0) {
50018863Sdyson			size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
50113675Sdyson			if (size > rpipe->pipe_buffer.cnt)
50213675Sdyson				size = rpipe->pipe_buffer.cnt;
50318863Sdyson			if (size > (u_int) uio->uio_resid)
50418863Sdyson				size = (u_int) uio->uio_resid;
50547748Salc
50691362Salfred			PIPE_UNLOCK(rpipe);
507116127Smux			error = uiomove(
508116127Smux			    &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
509116127Smux			    size, uio);
51091362Salfred			PIPE_LOCK(rpipe);
51176760Salfred			if (error)
51213675Sdyson				break;
51376760Salfred
51413675Sdyson			rpipe->pipe_buffer.out += size;
51513675Sdyson			if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
51613675Sdyson				rpipe->pipe_buffer.out = 0;
51713675Sdyson
51813675Sdyson			rpipe->pipe_buffer.cnt -= size;
51947748Salc
52047748Salc			/*
52147748Salc			 * If there is no more to read in the pipe, reset
52247748Salc			 * its pointers to the beginning.  This improves
52347748Salc			 * cache hit stats.
52447748Salc			 */
52547748Salc			if (rpipe->pipe_buffer.cnt == 0) {
52647748Salc				rpipe->pipe_buffer.in = 0;
52747748Salc				rpipe->pipe_buffer.out = 0;
52847748Salc			}
52913675Sdyson			nread += size;
53014037Sdyson#ifndef PIPE_NODIRECT
53113907Sdyson		/*
53213907Sdyson		 * Direct copy, bypassing a kernel buffer.
53313907Sdyson		 */
53413907Sdyson		} else if ((size = rpipe->pipe_map.cnt) &&
53547748Salc			   (rpipe->pipe_state & PIPE_DIRECTW)) {
53647748Salc			caddr_t	va;
53718863Sdyson			if (size > (u_int) uio->uio_resid)
53818863Sdyson				size = (u_int) uio->uio_resid;
53947748Salc
54076760Salfred			va = (caddr_t) rpipe->pipe_map.kva +
54176760Salfred			    rpipe->pipe_map.pos;
54291362Salfred			PIPE_UNLOCK(rpipe);
54347748Salc			error = uiomove(va, size, uio);
54491362Salfred			PIPE_LOCK(rpipe);
54513907Sdyson			if (error)
54613907Sdyson				break;
54713907Sdyson			nread += size;
54813907Sdyson			rpipe->pipe_map.pos += size;
54913907Sdyson			rpipe->pipe_map.cnt -= size;
55013907Sdyson			if (rpipe->pipe_map.cnt == 0) {
55113907Sdyson				rpipe->pipe_state &= ~PIPE_DIRECTW;
55213907Sdyson				wakeup(rpipe);
55313907Sdyson			}
55414037Sdyson#endif
55513675Sdyson		} else {
55613675Sdyson			/*
55713675Sdyson			 * detect EOF condition
55876760Salfred			 * read returns 0 on EOF, no need to set error
55913675Sdyson			 */
56076760Salfred			if (rpipe->pipe_state & PIPE_EOF)
56113675Sdyson				break;
56243623Sdillon
56313675Sdyson			/*
56413675Sdyson			 * If the "write-side" has been blocked, wake it up now.
56513675Sdyson			 */
56613675Sdyson			if (rpipe->pipe_state & PIPE_WANTW) {
56713675Sdyson				rpipe->pipe_state &= ~PIPE_WANTW;
56813675Sdyson				wakeup(rpipe);
56913675Sdyson			}
57043623Sdillon
57143623Sdillon			/*
57247748Salc			 * Break if some data was read.
57343623Sdillon			 */
57447748Salc			if (nread > 0)
57513675Sdyson				break;
57616960Sdyson
57743623Sdillon			/*
578116127Smux			 * Unlock the pipe buffer for our remaining processing.
579116127Smux			 * We will either break out with an error or we will
580116127Smux			 * sleep and relock to loop.
58143623Sdillon			 */
58247748Salc			pipeunlock(rpipe);
58343623Sdillon
58413675Sdyson			/*
58547748Salc			 * Handle non-blocking mode operation or
58647748Salc			 * wait for more data.
58713675Sdyson			 */
58876760Salfred			if (fp->f_flag & FNONBLOCK) {
58947748Salc				error = EAGAIN;
59076760Salfred			} else {
59147748Salc				rpipe->pipe_state |= PIPE_WANTR;
59291362Salfred				if ((error = msleep(rpipe, PIPE_MTX(rpipe),
59391362Salfred				    PRIBIO | PCATCH,
59477140Salfred				    "piperd", 0)) == 0)
59547748Salc					error = pipelock(rpipe, 1);
59613675Sdyson			}
59747748Salc			if (error)
59847748Salc				goto unlocked_error;
59913675Sdyson		}
60013675Sdyson	}
601101768Srwatson#ifdef MAC
602101768Srwatsonlocked_error:
603101768Srwatson#endif
60447748Salc	pipeunlock(rpipe);
60513675Sdyson
60691362Salfred	/* XXX: should probably do this before getting any locks. */
60724101Sbde	if (error == 0)
60855112Sbde		vfs_timestamp(&rpipe->pipe_atime);
60947748Salcunlocked_error:
61047748Salc	--rpipe->pipe_busy;
61113913Sdyson
61247748Salc	/*
61347748Salc	 * PIPE_WANT processing only makes sense if pipe_busy is 0.
61447748Salc	 */
61513675Sdyson	if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
61613675Sdyson		rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
61713675Sdyson		wakeup(rpipe);
61813675Sdyson	} else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
61913675Sdyson		/*
62047748Salc		 * Handle write blocking hysteresis.
62113675Sdyson		 */
62213675Sdyson		if (rpipe->pipe_state & PIPE_WANTW) {
62313675Sdyson			rpipe->pipe_state &= ~PIPE_WANTW;
62413675Sdyson			wakeup(rpipe);
62513675Sdyson		}
62613675Sdyson	}
62714037Sdyson
62814802Sdyson	if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF)
62914037Sdyson		pipeselwakeup(rpipe);
63014037Sdyson
63191362Salfred	PIPE_UNLOCK(rpipe);
63276760Salfred	return (error);
63313675Sdyson}
63413675Sdyson
63514037Sdyson#ifndef PIPE_NODIRECT
63613907Sdyson/*
63713907Sdyson * Map the sending processes' buffer into kernel space and wire it.
63813907Sdyson * This is similar to a physical write operation.
63913907Sdyson */
64013675Sdysonstatic int
64113907Sdysonpipe_build_write_buffer(wpipe, uio)
64213907Sdyson	struct pipe *wpipe;
64313675Sdyson	struct uio *uio;
64413675Sdyson{
645119872Salc	pmap_t pmap;
64618863Sdyson	u_int size;
647119872Salc	int i, j;
648112569Sjake	vm_offset_t addr, endaddr;
64913907Sdyson
65091412Salfred	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
65179224Sdillon
65218863Sdyson	size = (u_int) uio->uio_iov->iov_len;
65313907Sdyson	if (size > wpipe->pipe_buffer.size)
65413907Sdyson		size = wpipe->pipe_buffer.size;
65513907Sdyson
656119872Salc	pmap = vmspace_pmap(curproc->p_vmspace);
65740286Sdg	endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
65876760Salfred	addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
65976760Salfred	for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
66099899Salc		/*
66199899Salc		 * vm_fault_quick() can sleep.  Consequently,
66299899Salc		 * vm_page_lock_queue() and vm_page_unlock_queue()
66399899Salc		 * should not be performed outside of this loop.
66499899Salc		 */
665119872Salc	race:
666119872Salc		if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) {
66799899Salc			vm_page_lock_queues();
668119872Salc			for (j = 0; j < i; j++)
669118757Salc				vm_page_unhold(wpipe->pipe_map.ms[j]);
67099899Salc			vm_page_unlock_queues();
67176760Salfred			return (EFAULT);
67213907Sdyson		}
673120000Salc		wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr,
674120000Salc		    VM_PROT_READ);
675119872Salc		if (wpipe->pipe_map.ms[i] == NULL)
676119872Salc			goto race;
67713907Sdyson	}
67813907Sdyson
67913907Sdyson/*
68013907Sdyson * set up the control block
68113907Sdyson */
68213907Sdyson	wpipe->pipe_map.npages = i;
68376760Salfred	wpipe->pipe_map.pos =
68476760Salfred	    ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
68513907Sdyson	wpipe->pipe_map.cnt = size;
68613907Sdyson
68713907Sdyson/*
68813907Sdyson * and map the buffer
68913907Sdyson */
69013907Sdyson	if (wpipe->pipe_map.kva == 0) {
69113912Sdyson		/*
69213912Sdyson		 * We need to allocate space for an extra page because the
69313912Sdyson		 * address range might (will) span pages at times.
69413912Sdyson		 */
695118220Salc		wpipe->pipe_map.kva = kmem_alloc_nofault(kernel_map,
69613912Sdyson			wpipe->pipe_buffer.size + PAGE_SIZE);
697118764Ssilby		atomic_add_int(&amountpipekvawired,
698110816Salc		    wpipe->pipe_buffer.size + PAGE_SIZE);
69913907Sdyson	}
70013907Sdyson	pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
70113907Sdyson		wpipe->pipe_map.npages);
70213907Sdyson
70313907Sdyson/*
70413907Sdyson * and update the uio data
70513907Sdyson */
70613907Sdyson
70713907Sdyson	uio->uio_iov->iov_len -= size;
708104908Smike	uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size;
70913907Sdyson	if (uio->uio_iov->iov_len == 0)
71013907Sdyson		uio->uio_iov++;
71113907Sdyson	uio->uio_resid -= size;
71213907Sdyson	uio->uio_offset += size;
71376760Salfred	return (0);
71413907Sdyson}
71513907Sdyson
71613907Sdyson/*
71713907Sdyson * unmap and unwire the process buffer
71813907Sdyson */
71913907Sdysonstatic void
72013907Sdysonpipe_destroy_write_buffer(wpipe)
72176760Salfred	struct pipe *wpipe;
72213907Sdyson{
72313907Sdyson	int i;
72476364Salfred
72591412Salfred	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
72617163Sdyson	if (wpipe->pipe_map.kva) {
72717163Sdyson		pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages);
72813907Sdyson
729118764Ssilby		if (amountpipekvawired > maxpipekvawired / 2) {
730118764Ssilby			/* Conserve address space */
73113907Sdyson			vm_offset_t kva = wpipe->pipe_map.kva;
73213907Sdyson			wpipe->pipe_map.kva = 0;
73313907Sdyson			kmem_free(kernel_map, kva,
734119811Salc			    wpipe->pipe_buffer.size + PAGE_SIZE);
735118764Ssilby			atomic_subtract_int(&amountpipekvawired,
736110816Salc			    wpipe->pipe_buffer.size + PAGE_SIZE);
73713907Sdyson		}
73813907Sdyson	}
73999899Salc	vm_page_lock_queues();
740117325Ssilby	for (i = 0; i < wpipe->pipe_map.npages; i++) {
741118757Salc		vm_page_unhold(wpipe->pipe_map.ms[i]);
742117325Ssilby	}
74399899Salc	vm_page_unlock_queues();
74491653Stanimura	wpipe->pipe_map.npages = 0;
74513907Sdyson}
74613907Sdyson
74713907Sdyson/*
74813907Sdyson * In the case of a signal, the writing process might go away.  This
74913907Sdyson * code copies the data into the circular buffer so that the source
75013907Sdyson * pages can be freed without loss of data.
75113907Sdyson */
75213907Sdysonstatic void
75313907Sdysonpipe_clone_write_buffer(wpipe)
75476364Salfred	struct pipe *wpipe;
75513907Sdyson{
75613907Sdyson	int size;
75713907Sdyson	int pos;
75813907Sdyson
75991362Salfred	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
76013907Sdyson	size = wpipe->pipe_map.cnt;
76113907Sdyson	pos = wpipe->pipe_map.pos;
76213907Sdyson
76313907Sdyson	wpipe->pipe_buffer.in = size;
76413907Sdyson	wpipe->pipe_buffer.out = 0;
76513907Sdyson	wpipe->pipe_buffer.cnt = size;
76613907Sdyson	wpipe->pipe_state &= ~PIPE_DIRECTW;
76713907Sdyson
768119811Salc	PIPE_UNLOCK(wpipe);
76992959Salfred	bcopy((caddr_t) wpipe->pipe_map.kva + pos,
770100527Salfred	    wpipe->pipe_buffer.buffer, size);
77113907Sdyson	pipe_destroy_write_buffer(wpipe);
772119811Salc	PIPE_LOCK(wpipe);
77313907Sdyson}
77413907Sdyson
77513907Sdyson/*
77613907Sdyson * This implements the pipe buffer write mechanism.  Note that only
77713907Sdyson * a direct write OR a normal pipe write can be pending at any given time.
77813907Sdyson * If there are any characters in the pipe buffer, the direct write will
77913907Sdyson * be deferred until the receiving process grabs all of the bytes from
78013907Sdyson * the pipe buffer.  Then the direct mapping write is set-up.
78113907Sdyson */
78213907Sdysonstatic int
78313907Sdysonpipe_direct_write(wpipe, uio)
78413907Sdyson	struct pipe *wpipe;
78513907Sdyson	struct uio *uio;
78613907Sdyson{
78713907Sdyson	int error;
78876364Salfred
78913951Sdysonretry:
79091362Salfred	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
79113907Sdyson	while (wpipe->pipe_state & PIPE_DIRECTW) {
79276760Salfred		if (wpipe->pipe_state & PIPE_WANTR) {
79313951Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
79413951Sdyson			wakeup(wpipe);
79513951Sdyson		}
79613992Sdyson		wpipe->pipe_state |= PIPE_WANTW;
79791362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe),
79891362Salfred		    PRIBIO | PCATCH, "pipdww", 0);
79914802Sdyson		if (error)
80013907Sdyson			goto error1;
80114802Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
80214802Sdyson			error = EPIPE;
80314802Sdyson			goto error1;
80414802Sdyson		}
80513907Sdyson	}
80613907Sdyson	wpipe->pipe_map.cnt = 0;	/* transfer not ready yet */
80713951Sdyson	if (wpipe->pipe_buffer.cnt > 0) {
80876760Salfred		if (wpipe->pipe_state & PIPE_WANTR) {
80913951Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
81013951Sdyson			wakeup(wpipe);
81113951Sdyson		}
81213951Sdyson
81313992Sdyson		wpipe->pipe_state |= PIPE_WANTW;
81491362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe),
81591362Salfred		    PRIBIO | PCATCH, "pipdwc", 0);
81614802Sdyson		if (error)
81713907Sdyson			goto error1;
81814802Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
81914802Sdyson			error = EPIPE;
82014802Sdyson			goto error1;
82113907Sdyson		}
82213951Sdyson		goto retry;
82313907Sdyson	}
82413907Sdyson
82513951Sdyson	wpipe->pipe_state |= PIPE_DIRECTW;
82613951Sdyson
82792305Salfred	pipelock(wpipe, 0);
828119872Salc	PIPE_UNLOCK(wpipe);
82913907Sdyson	error = pipe_build_write_buffer(wpipe, uio);
830119872Salc	PIPE_LOCK(wpipe);
83192305Salfred	pipeunlock(wpipe);
83213907Sdyson	if (error) {
83313907Sdyson		wpipe->pipe_state &= ~PIPE_DIRECTW;
83413907Sdyson		goto error1;
83513907Sdyson	}
83613907Sdyson
83713907Sdyson	error = 0;
83813907Sdyson	while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
83913907Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
84013907Sdyson			pipelock(wpipe, 0);
841119811Salc			PIPE_UNLOCK(wpipe);
84213907Sdyson			pipe_destroy_write_buffer(wpipe);
843119811Salc			PIPE_LOCK(wpipe);
844112981Shsu			pipeselwakeup(wpipe);
84513907Sdyson			pipeunlock(wpipe);
84614802Sdyson			error = EPIPE;
84714802Sdyson			goto error1;
84813907Sdyson		}
84913992Sdyson		if (wpipe->pipe_state & PIPE_WANTR) {
85013992Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
85113992Sdyson			wakeup(wpipe);
85213992Sdyson		}
85314037Sdyson		pipeselwakeup(wpipe);
85491362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
85591362Salfred		    "pipdwt", 0);
85613907Sdyson	}
85713907Sdyson
85813907Sdyson	pipelock(wpipe,0);
85913907Sdyson	if (wpipe->pipe_state & PIPE_DIRECTW) {
86013907Sdyson		/*
86113907Sdyson		 * this bit of trickery substitutes a kernel buffer for
86213907Sdyson		 * the process that might be going away.
86313907Sdyson		 */
86413907Sdyson		pipe_clone_write_buffer(wpipe);
86513907Sdyson	} else {
866119811Salc		PIPE_UNLOCK(wpipe);
86713907Sdyson		pipe_destroy_write_buffer(wpipe);
868119811Salc		PIPE_LOCK(wpipe);
86913907Sdyson	}
87013907Sdyson	pipeunlock(wpipe);
87176760Salfred	return (error);
87213907Sdyson
87313907Sdysonerror1:
87413907Sdyson	wakeup(wpipe);
87576760Salfred	return (error);
87613907Sdyson}
87714037Sdyson#endif
87813907Sdyson
87916960Sdysonstatic int
880101941Srwatsonpipe_write(fp, uio, active_cred, flags, td)
88116960Sdyson	struct file *fp;
88213907Sdyson	struct uio *uio;
883101941Srwatson	struct ucred *active_cred;
88483366Sjulian	struct thread *td;
88545311Sdt	int flags;
88613907Sdyson{
88713675Sdyson	int error = 0;
88813913Sdyson	int orig_resid;
88916960Sdyson	struct pipe *wpipe, *rpipe;
89016960Sdyson
891109153Sdillon	rpipe = fp->f_data;
89216960Sdyson	wpipe = rpipe->pipe_peer;
89316960Sdyson
89491395Salfred	PIPE_LOCK(rpipe);
89513675Sdyson	/*
89613675Sdyson	 * detect loss of pipe read side, issue SIGPIPE if lost.
89713675Sdyson	 */
89816960Sdyson	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
89991395Salfred		PIPE_UNLOCK(rpipe);
90076760Salfred		return (EPIPE);
90113675Sdyson	}
902101768Srwatson#ifdef MAC
903102115Srwatson	error = mac_check_pipe_write(active_cred, wpipe);
904101768Srwatson	if (error) {
905101768Srwatson		PIPE_UNLOCK(rpipe);
906101768Srwatson		return (error);
907101768Srwatson	}
908101768Srwatson#endif
90977676Sdillon	++wpipe->pipe_busy;
91013675Sdyson
91117163Sdyson	/*
91217163Sdyson	 * If it is advantageous to resize the pipe buffer, do
91317163Sdyson	 * so.
91417163Sdyson	 */
91517163Sdyson	if ((uio->uio_resid > PIPE_SIZE) &&
916118764Ssilby		(amountpipekva < maxpipekva / 2) &&
91717163Sdyson		(nbigpipe < LIMITBIGPIPES) &&
91817163Sdyson		(wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
91917163Sdyson		(wpipe->pipe_buffer.size <= PIPE_SIZE) &&
92017163Sdyson		(wpipe->pipe_buffer.cnt == 0)) {
92117163Sdyson
922105009Salfred		if ((error = pipelock(wpipe, 1)) == 0) {
923118799Salc			PIPE_UNLOCK(wpipe);
92476364Salfred			if (pipespace(wpipe, BIG_PIPE_SIZE) == 0)
925117364Ssilby				atomic_add_int(&nbigpipe, 1);
926118799Salc			PIPE_LOCK(wpipe);
92713907Sdyson			pipeunlock(wpipe);
92813907Sdyson		}
92913907Sdyson	}
93077676Sdillon
93177676Sdillon	/*
93277676Sdillon	 * If an early error occured unbusy and return, waking up any pending
93377676Sdillon	 * readers.
93477676Sdillon	 */
93577676Sdillon	if (error) {
93677676Sdillon		--wpipe->pipe_busy;
93777676Sdillon		if ((wpipe->pipe_busy == 0) &&
93877676Sdillon		    (wpipe->pipe_state & PIPE_WANT)) {
93977676Sdillon			wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
94077676Sdillon			wakeup(wpipe);
94177676Sdillon		}
94291395Salfred		PIPE_UNLOCK(rpipe);
94377676Sdillon		return(error);
94477676Sdillon	}
94576364Salfred
94613913Sdyson	orig_resid = uio->uio_resid;
94777676Sdillon
94813675Sdyson	while (uio->uio_resid) {
94913907Sdyson		int space;
95076760Salfred
95114037Sdyson#ifndef PIPE_NODIRECT
95213907Sdyson		/*
95313907Sdyson		 * If the transfer is large, we can gain performance if
95413907Sdyson		 * we do process-to-process copies directly.
95516416Sdyson		 * If the write is non-blocking, we don't use the
95616416Sdyson		 * direct write mechanism.
95758505Sdillon		 *
95858505Sdillon		 * The direct write mechanism will detect the reader going
95958505Sdillon		 * away on us.
96013907Sdyson		 */
96117163Sdyson		if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
96217163Sdyson		    (fp->f_flag & FNONBLOCK) == 0 &&
963118764Ssilby		    amountpipekvawired + uio->uio_resid < maxpipekvawired) {
964105009Salfred			error = pipe_direct_write(wpipe, uio);
96576760Salfred			if (error)
96613907Sdyson				break;
96713907Sdyson			continue;
96891362Salfred		}
96914037Sdyson#endif
97013907Sdyson
97113907Sdyson		/*
97213907Sdyson		 * Pipe buffered writes cannot be coincidental with
97313907Sdyson		 * direct writes.  We wait until the currently executing
97413907Sdyson		 * direct write is completed before we start filling the
97558505Sdillon		 * pipe buffer.  We break out if a signal occurs or the
97658505Sdillon		 * reader goes away.
97713907Sdyson		 */
97813907Sdyson	retrywrite:
97913907Sdyson		while (wpipe->pipe_state & PIPE_DIRECTW) {
98013992Sdyson			if (wpipe->pipe_state & PIPE_WANTR) {
98113992Sdyson				wpipe->pipe_state &= ~PIPE_WANTR;
98213992Sdyson				wakeup(wpipe);
98313992Sdyson			}
98491395Salfred			error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH,
98591362Salfred			    "pipbww", 0);
98658505Sdillon			if (wpipe->pipe_state & PIPE_EOF)
98758505Sdillon				break;
98813907Sdyson			if (error)
98913907Sdyson				break;
99013907Sdyson		}
99158505Sdillon		if (wpipe->pipe_state & PIPE_EOF) {
99258505Sdillon			error = EPIPE;
99358505Sdillon			break;
99458505Sdillon		}
99513907Sdyson
99613907Sdyson		space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
99714644Sdyson
99814644Sdyson		/* Writes of size <= PIPE_BUF must be atomic. */
99913913Sdyson		if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF))
100013913Sdyson			space = 0;
100113907Sdyson
1002118230Spb		if (space > 0) {
100313907Sdyson			if ((error = pipelock(wpipe,1)) == 0) {
100454534Stegge				int size;	/* Transfer size */
100554534Stegge				int segsize;	/* first segment to transfer */
100676760Salfred
100713907Sdyson				/*
100813907Sdyson				 * It is possible for a direct write to
100913907Sdyson				 * slip in on us... handle it here...
101013907Sdyson				 */
101113907Sdyson				if (wpipe->pipe_state & PIPE_DIRECTW) {
101213907Sdyson					pipeunlock(wpipe);
101313907Sdyson					goto retrywrite;
101413907Sdyson				}
101554534Stegge				/*
101654534Stegge				 * If a process blocked in uiomove, our
101754534Stegge				 * value for space might be bad.
101858505Sdillon				 *
101958505Sdillon				 * XXX will we be ok if the reader has gone
102058505Sdillon				 * away here?
102154534Stegge				 */
102254534Stegge				if (space > wpipe->pipe_buffer.size -
102354534Stegge				    wpipe->pipe_buffer.cnt) {
102454534Stegge					pipeunlock(wpipe);
102554534Stegge					goto retrywrite;
102654534Stegge				}
102754534Stegge
102854534Stegge				/*
102954534Stegge				 * Transfer size is minimum of uio transfer
103054534Stegge				 * and free space in pipe buffer.
103154534Stegge				 */
103254534Stegge				if (space > uio->uio_resid)
103354534Stegge					size = uio->uio_resid;
103454534Stegge				else
103554534Stegge					size = space;
103654534Stegge				/*
103754534Stegge				 * First segment to transfer is minimum of
103854534Stegge				 * transfer size and contiguous space in
103954534Stegge				 * pipe buffer.  If first segment to transfer
104054534Stegge				 * is less than the transfer size, we've got
104154534Stegge				 * a wraparound in the buffer.
104254534Stegge				 */
104354534Stegge				segsize = wpipe->pipe_buffer.size -
104454534Stegge					wpipe->pipe_buffer.in;
104554534Stegge				if (segsize > size)
104654534Stegge					segsize = size;
104754534Stegge
104854534Stegge				/* Transfer first segment */
104954534Stegge
105091395Salfred				PIPE_UNLOCK(rpipe);
105154534Stegge				error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
105254534Stegge						segsize, uio);
105391395Salfred				PIPE_LOCK(rpipe);
105454534Stegge
105554534Stegge				if (error == 0 && segsize < size) {
105654534Stegge					/*
105754534Stegge					 * Transfer remaining part now, to
105854534Stegge					 * support atomic writes.  Wraparound
105954534Stegge					 * happened.
106054534Stegge					 */
106154534Stegge					if (wpipe->pipe_buffer.in + segsize !=
106254534Stegge					    wpipe->pipe_buffer.size)
1063116127Smux						panic("Expected pipe buffer "
1064116127Smux						    "wraparound disappeared");
106554534Stegge
106691395Salfred					PIPE_UNLOCK(rpipe);
1067116127Smux					error = uiomove(
1068116127Smux					    &wpipe->pipe_buffer.buffer[0],
1069116127Smux				    	    size - segsize, uio);
107091395Salfred					PIPE_LOCK(rpipe);
107154534Stegge				}
107254534Stegge				if (error == 0) {
107354534Stegge					wpipe->pipe_buffer.in += size;
107454534Stegge					if (wpipe->pipe_buffer.in >=
107554534Stegge					    wpipe->pipe_buffer.size) {
1076116127Smux						if (wpipe->pipe_buffer.in !=
1077116127Smux						    size - segsize +
1078116127Smux						    wpipe->pipe_buffer.size)
1079116127Smux							panic("Expected "
1080116127Smux							    "wraparound bad");
1081116127Smux						wpipe->pipe_buffer.in = size -
1082116127Smux						    segsize;
108354534Stegge					}
108454534Stegge
108554534Stegge					wpipe->pipe_buffer.cnt += size;
1086116127Smux					if (wpipe->pipe_buffer.cnt >
1087116127Smux					    wpipe->pipe_buffer.size)
108854534Stegge						panic("Pipe buffer overflow");
108954534Stegge
109054534Stegge				}
109113675Sdyson				pipeunlock(wpipe);
109213675Sdyson			}
109313675Sdyson			if (error)
109413675Sdyson				break;
109513675Sdyson
109613675Sdyson		} else {
109713675Sdyson			/*
109813675Sdyson			 * If the "read-side" has been blocked, wake it up now.
109913675Sdyson			 */
110013675Sdyson			if (wpipe->pipe_state & PIPE_WANTR) {
110113675Sdyson				wpipe->pipe_state &= ~PIPE_WANTR;
110213675Sdyson				wakeup(wpipe);
110313675Sdyson			}
110414037Sdyson
110513675Sdyson			/*
110613675Sdyson			 * don't block on non-blocking I/O
110713675Sdyson			 */
110816960Sdyson			if (fp->f_flag & FNONBLOCK) {
110913907Sdyson				error = EAGAIN;
111013675Sdyson				break;
111113675Sdyson			}
111213907Sdyson
111314037Sdyson			/*
111414037Sdyson			 * We have no more space and have something to offer,
111529356Speter			 * wake up select/poll.
111614037Sdyson			 */
111714037Sdyson			pipeselwakeup(wpipe);
111814037Sdyson
111913675Sdyson			wpipe->pipe_state |= PIPE_WANTW;
112091395Salfred			error = msleep(wpipe, PIPE_MTX(rpipe),
112191362Salfred			    PRIBIO | PCATCH, "pipewr", 0);
112276760Salfred			if (error != 0)
112313675Sdyson				break;
112413675Sdyson			/*
112513675Sdyson			 * If read side wants to go away, we just issue a signal
112613675Sdyson			 * to ourselves.
112713675Sdyson			 */
112813675Sdyson			if (wpipe->pipe_state & PIPE_EOF) {
112913774Sdyson				error = EPIPE;
113013907Sdyson				break;
113113675Sdyson			}
113213675Sdyson		}
113313675Sdyson	}
113413675Sdyson
113514644Sdyson	--wpipe->pipe_busy;
113677676Sdillon
113776760Salfred	if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
113876760Salfred		wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
113913675Sdyson		wakeup(wpipe);
114013675Sdyson	} else if (wpipe->pipe_buffer.cnt > 0) {
114113675Sdyson		/*
114213675Sdyson		 * If we have put any characters in the buffer, we wake up
114313675Sdyson		 * the reader.
114413675Sdyson		 */
114513675Sdyson		if (wpipe->pipe_state & PIPE_WANTR) {
114613675Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
114713675Sdyson			wakeup(wpipe);
114813675Sdyson		}
114913675Sdyson	}
115013909Sdyson
115113909Sdyson	/*
115213909Sdyson	 * Don't return EPIPE if I/O was successful
115313909Sdyson	 */
115413907Sdyson	if ((wpipe->pipe_buffer.cnt == 0) &&
115577676Sdillon	    (uio->uio_resid == 0) &&
115677676Sdillon	    (error == EPIPE)) {
115713907Sdyson		error = 0;
115877676Sdillon	}
115913913Sdyson
116024101Sbde	if (error == 0)
116155112Sbde		vfs_timestamp(&wpipe->pipe_mtime);
116224101Sbde
116314037Sdyson	/*
116414037Sdyson	 * We have something to offer,
116529356Speter	 * wake up select/poll.
116614037Sdyson	 */
116714177Sdyson	if (wpipe->pipe_buffer.cnt)
116814037Sdyson		pipeselwakeup(wpipe);
116913907Sdyson
117091395Salfred	PIPE_UNLOCK(rpipe);
117176760Salfred	return (error);
117213675Sdyson}
117313675Sdyson
117413675Sdyson/*
117513675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets.
117613675Sdyson */
1177104094Sphkstatic int
1178102003Srwatsonpipe_ioctl(fp, cmd, data, active_cred, td)
117913675Sdyson	struct file *fp;
118036735Sdfr	u_long cmd;
118199009Salfred	void *data;
1182102003Srwatson	struct ucred *active_cred;
118383366Sjulian	struct thread *td;
118413675Sdyson{
1185109153Sdillon	struct pipe *mpipe = fp->f_data;
1186101768Srwatson#ifdef MAC
1187101768Srwatson	int error;
1188104269Srwatson#endif
118913675Sdyson
1190104269Srwatson	PIPE_LOCK(mpipe);
1191104269Srwatson
1192104269Srwatson#ifdef MAC
1193102003Srwatson	error = mac_check_pipe_ioctl(active_cred, mpipe, cmd, data);
1194101768Srwatson	if (error)
1195101768Srwatson		return (error);
1196101768Srwatson#endif
1197101768Srwatson
119813675Sdyson	switch (cmd) {
119913675Sdyson
120013675Sdyson	case FIONBIO:
1201104269Srwatson		PIPE_UNLOCK(mpipe);
120213675Sdyson		return (0);
120313675Sdyson
120413675Sdyson	case FIOASYNC:
120513675Sdyson		if (*(int *)data) {
120613675Sdyson			mpipe->pipe_state |= PIPE_ASYNC;
120713675Sdyson		} else {
120813675Sdyson			mpipe->pipe_state &= ~PIPE_ASYNC;
120913675Sdyson		}
121091362Salfred		PIPE_UNLOCK(mpipe);
121113675Sdyson		return (0);
121213675Sdyson
121313675Sdyson	case FIONREAD:
121414037Sdyson		if (mpipe->pipe_state & PIPE_DIRECTW)
121514037Sdyson			*(int *)data = mpipe->pipe_map.cnt;
121614037Sdyson		else
121714037Sdyson			*(int *)data = mpipe->pipe_buffer.cnt;
121891362Salfred		PIPE_UNLOCK(mpipe);
121913675Sdyson		return (0);
122013675Sdyson
122141086Struckman	case FIOSETOWN:
1222104269Srwatson		PIPE_UNLOCK(mpipe);
122341086Struckman		return (fsetown(*(int *)data, &mpipe->pipe_sigio));
122441086Struckman
122541086Struckman	case FIOGETOWN:
1226104269Srwatson		PIPE_UNLOCK(mpipe);
1227104393Struckman		*(int *)data = fgetown(&mpipe->pipe_sigio);
122813675Sdyson		return (0);
122913675Sdyson
123041086Struckman	/* This is deprecated, FIOSETOWN should be used instead. */
123141086Struckman	case TIOCSPGRP:
1232104269Srwatson		PIPE_UNLOCK(mpipe);
123341086Struckman		return (fsetown(-(*(int *)data), &mpipe->pipe_sigio));
123441086Struckman
123541086Struckman	/* This is deprecated, FIOGETOWN should be used instead. */
123618863Sdyson	case TIOCGPGRP:
1237104269Srwatson		PIPE_UNLOCK(mpipe);
1238104393Struckman		*(int *)data = -fgetown(&mpipe->pipe_sigio);
123913675Sdyson		return (0);
124013675Sdyson
124113675Sdyson	}
1242104269Srwatson	PIPE_UNLOCK(mpipe);
124317124Sbde	return (ENOTTY);
124413675Sdyson}
124513675Sdyson
1246104094Sphkstatic int
1247101983Srwatsonpipe_poll(fp, events, active_cred, td)
124813675Sdyson	struct file *fp;
124929356Speter	int events;
1250101983Srwatson	struct ucred *active_cred;
125183366Sjulian	struct thread *td;
125213675Sdyson{
1253109153Sdillon	struct pipe *rpipe = fp->f_data;
125413675Sdyson	struct pipe *wpipe;
125529356Speter	int revents = 0;
1256101768Srwatson#ifdef MAC
1257101768Srwatson	int error;
1258101768Srwatson#endif
125913675Sdyson
126013675Sdyson	wpipe = rpipe->pipe_peer;
126191362Salfred	PIPE_LOCK(rpipe);
1262101768Srwatson#ifdef MAC
1263102115Srwatson	error = mac_check_pipe_poll(active_cred, rpipe);
1264101768Srwatson	if (error)
1265101768Srwatson		goto locked_error;
1266101768Srwatson#endif
126729356Speter	if (events & (POLLIN | POLLRDNORM))
126829356Speter		if ((rpipe->pipe_state & PIPE_DIRECTW) ||
126929356Speter		    (rpipe->pipe_buffer.cnt > 0) ||
127029356Speter		    (rpipe->pipe_state & PIPE_EOF))
127129356Speter			revents |= events & (POLLIN | POLLRDNORM);
127213675Sdyson
127329356Speter	if (events & (POLLOUT | POLLWRNORM))
127429356Speter		if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) ||
127543311Sdillon		    (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
127643311Sdillon		     (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF))
127729356Speter			revents |= events & (POLLOUT | POLLWRNORM);
127813675Sdyson
127929356Speter	if ((rpipe->pipe_state & PIPE_EOF) ||
128029356Speter	    (wpipe == NULL) ||
128129356Speter	    (wpipe->pipe_state & PIPE_EOF))
128229356Speter		revents |= POLLHUP;
128329356Speter
128429356Speter	if (revents == 0) {
128529356Speter		if (events & (POLLIN | POLLRDNORM)) {
128683805Sjhb			selrecord(td, &rpipe->pipe_sel);
128729356Speter			rpipe->pipe_state |= PIPE_SEL;
128813675Sdyson		}
128913675Sdyson
129029356Speter		if (events & (POLLOUT | POLLWRNORM)) {
129183805Sjhb			selrecord(td, &wpipe->pipe_sel);
129230164Speter			wpipe->pipe_state |= PIPE_SEL;
129313907Sdyson		}
129413675Sdyson	}
1295101768Srwatson#ifdef MAC
1296101768Srwatsonlocked_error:
1297101768Srwatson#endif
129891362Salfred	PIPE_UNLOCK(rpipe);
129929356Speter
130029356Speter	return (revents);
130113675Sdyson}
130213675Sdyson
130398989Salfred/*
130498989Salfred * We shouldn't need locks here as we're doing a read and this should
130598989Salfred * be a natural race.
130698989Salfred */
130752983Speterstatic int
1308101983Srwatsonpipe_stat(fp, ub, active_cred, td)
130952983Speter	struct file *fp;
131052983Speter	struct stat *ub;
1311101983Srwatson	struct ucred *active_cred;
131283366Sjulian	struct thread *td;
131313675Sdyson{
1314109153Sdillon	struct pipe *pipe = fp->f_data;
1315101768Srwatson#ifdef MAC
1316101768Srwatson	int error;
131752983Speter
1318104269Srwatson	PIPE_LOCK(pipe);
1319102115Srwatson	error = mac_check_pipe_stat(active_cred, pipe);
1320104269Srwatson	PIPE_UNLOCK(pipe);
1321101768Srwatson	if (error)
1322101768Srwatson		return (error);
1323101768Srwatson#endif
1324100527Salfred	bzero(ub, sizeof(*ub));
132517124Sbde	ub->st_mode = S_IFIFO;
132613907Sdyson	ub->st_blksize = pipe->pipe_buffer.size;
132713675Sdyson	ub->st_size = pipe->pipe_buffer.cnt;
132813675Sdyson	ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
132934901Sphk	ub->st_atimespec = pipe->pipe_atime;
133034901Sphk	ub->st_mtimespec = pipe->pipe_mtime;
133134901Sphk	ub->st_ctimespec = pipe->pipe_ctime;
133260404Schris	ub->st_uid = fp->f_cred->cr_uid;
133360404Schris	ub->st_gid = fp->f_cred->cr_gid;
133417124Sbde	/*
133560404Schris	 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
133617124Sbde	 * XXX (st_dev, st_ino) should be unique.
133717124Sbde	 */
133876760Salfred	return (0);
133913675Sdyson}
134013675Sdyson
134113675Sdyson/* ARGSUSED */
134213675Sdysonstatic int
134383366Sjulianpipe_close(fp, td)
134413675Sdyson	struct file *fp;
134583366Sjulian	struct thread *td;
134613675Sdyson{
1347109153Sdillon	struct pipe *cpipe = fp->f_data;
134816322Sgpalmer
134949413Sgreen	fp->f_ops = &badfileops;
1350109153Sdillon	fp->f_data = NULL;
135196122Salfred	funsetown(&cpipe->pipe_sigio);
135213675Sdyson	pipeclose(cpipe);
135376760Salfred	return (0);
135413675Sdyson}
135513675Sdyson
135676364Salfredstatic void
135776364Salfredpipe_free_kmem(cpipe)
135876364Salfred	struct pipe *cpipe;
135976364Salfred{
136091412Salfred
136191412Salfred	KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
136291412Salfred	       ("pipespace: pipe mutex locked"));
136376364Salfred
136476364Salfred	if (cpipe->pipe_buffer.buffer != NULL) {
136576364Salfred		if (cpipe->pipe_buffer.size > PIPE_SIZE)
1366117364Ssilby			atomic_subtract_int(&nbigpipe, 1);
1367110816Salc		atomic_subtract_int(&amountpipekva, cpipe->pipe_buffer.size);
1368117325Ssilby		atomic_subtract_int(&amountpipes, 1);
1369118764Ssilby		vm_map_remove(pipe_map,
1370118764Ssilby		    (vm_offset_t)cpipe->pipe_buffer.buffer,
1371118764Ssilby		    (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size);
137276364Salfred		cpipe->pipe_buffer.buffer = NULL;
137376364Salfred	}
137476364Salfred#ifndef PIPE_NODIRECT
1375102241Sarchie	if (cpipe->pipe_map.kva != 0) {
1376118764Ssilby		atomic_subtract_int(&amountpipekvawired,
1377110816Salc		    cpipe->pipe_buffer.size + PAGE_SIZE);
137876364Salfred		kmem_free(kernel_map,
137976364Salfred			cpipe->pipe_map.kva,
138076364Salfred			cpipe->pipe_buffer.size + PAGE_SIZE);
138176364Salfred		cpipe->pipe_map.cnt = 0;
138276364Salfred		cpipe->pipe_map.kva = 0;
138376364Salfred		cpipe->pipe_map.pos = 0;
138476364Salfred		cpipe->pipe_map.npages = 0;
138576364Salfred	}
138676364Salfred#endif
138776364Salfred}
138876364Salfred
138913675Sdyson/*
139013675Sdyson * shutdown the pipe
139113675Sdyson */
139213675Sdysonstatic void
139313675Sdysonpipeclose(cpipe)
139413675Sdyson	struct pipe *cpipe;
139513675Sdyson{
139613907Sdyson	struct pipe *ppipe;
139791968Salfred	int hadpeer;
139876364Salfred
139991968Salfred	if (cpipe == NULL)
140091968Salfred		return;
140191968Salfred
140291968Salfred	hadpeer = 0;
140391968Salfred
140491968Salfred	/* partially created pipes won't have a valid mutex. */
140591968Salfred	if (PIPE_MTX(cpipe) != NULL)
140691362Salfred		PIPE_LOCK(cpipe);
140713907Sdyson
140891968Salfred	pipeselwakeup(cpipe);
140913907Sdyson
141091968Salfred	/*
141191968Salfred	 * If the other side is blocked, wake it up saying that
141291968Salfred	 * we want to close it down.
141391968Salfred	 */
141491968Salfred	while (cpipe->pipe_busy) {
141591968Salfred		wakeup(cpipe);
141691968Salfred		cpipe->pipe_state |= PIPE_WANT | PIPE_EOF;
141791968Salfred		msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
141891968Salfred	}
141913675Sdyson
1420101768Srwatson#ifdef MAC
1421101768Srwatson	if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL)
1422101768Srwatson		mac_destroy_pipe(cpipe);
1423101768Srwatson#endif
1424101768Srwatson
142591968Salfred	/*
142691968Salfred	 * Disconnect from peer
142791968Salfred	 */
142891968Salfred	if ((ppipe = cpipe->pipe_peer) != NULL) {
142991968Salfred		hadpeer++;
143091968Salfred		pipeselwakeup(ppipe);
143113907Sdyson
143291968Salfred		ppipe->pipe_state |= PIPE_EOF;
143391968Salfred		wakeup(ppipe);
143491968Salfred		KNOTE(&ppipe->pipe_sel.si_note, 0);
143591968Salfred		ppipe->pipe_peer = NULL;
143691968Salfred	}
143791968Salfred	/*
143891968Salfred	 * free resources
143991968Salfred	 */
144091968Salfred	if (PIPE_MTX(cpipe) != NULL) {
144191968Salfred		PIPE_UNLOCK(cpipe);
144291968Salfred		if (!hadpeer) {
144391968Salfred			mtx_destroy(PIPE_MTX(cpipe));
144491968Salfred			free(PIPE_MTX(cpipe), M_TEMP);
144513675Sdyson		}
144613675Sdyson	}
144791968Salfred	pipe_free_kmem(cpipe);
144892751Sjeff	uma_zfree(pipe_zone, cpipe);
144913675Sdyson}
145059288Sjlemon
145172521Sjlemon/*ARGSUSED*/
145259288Sjlemonstatic int
145372521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn)
145459288Sjlemon{
145589306Salfred	struct pipe *cpipe;
145659288Sjlemon
1457109153Sdillon	cpipe = kn->kn_fp->f_data;
145872521Sjlemon	switch (kn->kn_filter) {
145972521Sjlemon	case EVFILT_READ:
146072521Sjlemon		kn->kn_fop = &pipe_rfiltops;
146172521Sjlemon		break;
146272521Sjlemon	case EVFILT_WRITE:
146372521Sjlemon		kn->kn_fop = &pipe_wfiltops;
146478292Sjlemon		cpipe = cpipe->pipe_peer;
1465101382Sdes		if (cpipe == NULL)
1466101382Sdes			/* other end of pipe has been closed */
1467118929Sjmg			return (EPIPE);
146872521Sjlemon		break;
146972521Sjlemon	default:
147072521Sjlemon		return (1);
147172521Sjlemon	}
147278292Sjlemon
147391372Salfred	PIPE_LOCK(cpipe);
147478292Sjlemon	SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext);
147591372Salfred	PIPE_UNLOCK(cpipe);
147659288Sjlemon	return (0);
147759288Sjlemon}
147859288Sjlemon
147959288Sjlemonstatic void
148059288Sjlemonfilt_pipedetach(struct knote *kn)
148159288Sjlemon{
1482121018Sjmg	struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
148359288Sjlemon
1484121018Sjmg	if (kn->kn_filter == EVFILT_WRITE) {
1485121018Sjmg		if (cpipe->pipe_peer == NULL)
1486121018Sjmg			return;
1487121018Sjmg		cpipe = cpipe->pipe_peer;
1488121018Sjmg	}
1489121018Sjmg
149091372Salfred	PIPE_LOCK(cpipe);
149178292Sjlemon	SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext);
149291372Salfred	PIPE_UNLOCK(cpipe);
149359288Sjlemon}
149459288Sjlemon
149559288Sjlemon/*ARGSUSED*/
149659288Sjlemonstatic int
149759288Sjlemonfilt_piperead(struct knote *kn, long hint)
149859288Sjlemon{
1499109153Sdillon	struct pipe *rpipe = kn->kn_fp->f_data;
150059288Sjlemon	struct pipe *wpipe = rpipe->pipe_peer;
150159288Sjlemon
150291372Salfred	PIPE_LOCK(rpipe);
150359288Sjlemon	kn->kn_data = rpipe->pipe_buffer.cnt;
150459288Sjlemon	if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
150559288Sjlemon		kn->kn_data = rpipe->pipe_map.cnt;
150659288Sjlemon
150759288Sjlemon	if ((rpipe->pipe_state & PIPE_EOF) ||
150859288Sjlemon	    (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
150991372Salfred		kn->kn_flags |= EV_EOF;
151091372Salfred		PIPE_UNLOCK(rpipe);
151159288Sjlemon		return (1);
151259288Sjlemon	}
151391372Salfred	PIPE_UNLOCK(rpipe);
151459288Sjlemon	return (kn->kn_data > 0);
151559288Sjlemon}
151659288Sjlemon
151759288Sjlemon/*ARGSUSED*/
151859288Sjlemonstatic int
151959288Sjlemonfilt_pipewrite(struct knote *kn, long hint)
152059288Sjlemon{
1521109153Sdillon	struct pipe *rpipe = kn->kn_fp->f_data;
152259288Sjlemon	struct pipe *wpipe = rpipe->pipe_peer;
152359288Sjlemon
152491372Salfred	PIPE_LOCK(rpipe);
152559288Sjlemon	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
152659288Sjlemon		kn->kn_data = 0;
152759288Sjlemon		kn->kn_flags |= EV_EOF;
152891372Salfred		PIPE_UNLOCK(rpipe);
152959288Sjlemon		return (1);
153059288Sjlemon	}
153159288Sjlemon	kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
153265855Sjlemon	if (wpipe->pipe_state & PIPE_DIRECTW)
153359288Sjlemon		kn->kn_data = 0;
153459288Sjlemon
153591372Salfred	PIPE_UNLOCK(rpipe);
153659288Sjlemon	return (kn->kn_data >= PIPE_BUF);
153759288Sjlemon}
1538