sys_pipe.c revision 118929
113675Sdyson/*
213675Sdyson * Copyright (c) 1996 John S. Dyson
313675Sdyson * All rights reserved.
413675Sdyson *
513675Sdyson * Redistribution and use in source and binary forms, with or without
613675Sdyson * modification, are permitted provided that the following conditions
713675Sdyson * are met:
813675Sdyson * 1. Redistributions of source code must retain the above copyright
913675Sdyson *    notice immediately at the beginning of the file, without modification,
1013675Sdyson *    this list of conditions, and the following disclaimer.
1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright
1213675Sdyson *    notice, this list of conditions and the following disclaimer in the
1313675Sdyson *    documentation and/or other materials provided with the distribution.
1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author
1513675Sdyson *    John S. Dyson.
1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions
1713675Sdyson *    are met.
1813675Sdyson */
1913675Sdyson
2013675Sdyson/*
2113675Sdyson * This file contains a high-performance replacement for the socket-based
2213675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
2313675Sdyson * all features of sockets, but does do everything that pipes normally
2413675Sdyson * do.
2513675Sdyson */
2613675Sdyson
2713907Sdyson/*
2813907Sdyson * This code has two modes of operation, a small write mode and a large
2913907Sdyson * write mode.  The small write mode acts like conventional pipes with
3013907Sdyson * a kernel buffer.  If the buffer is less than PIPE_MINDIRECT, then the
3113907Sdyson * "normal" pipe buffering is done.  If the buffer is between PIPE_MINDIRECT
3213907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and
3313907Sdyson * the receiving process can copy it directly from the pages in the sending
3413907Sdyson * process.
3513907Sdyson *
3613907Sdyson * If the sending process receives a signal, it is possible that it will
3713913Sdyson * go away, and certainly its address space can change, because control
3813907Sdyson * is returned back to the user-mode side.  In that case, the pipe code
3913907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable
4013907Sdyson * kernel buffer, and the receiving process will grab the data from the
4113907Sdyson * pageable kernel buffer.  Since signals don't happen all that often,
4213907Sdyson * the copy operation is normally eliminated.
4313907Sdyson *
4413907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
4513907Sdyson * happen for small transfers so that the system will not spend all of
46118764Ssilby * its time context switching.
47117325Ssilby *
48118764Ssilby * In order to limit the resource use of pipes, two sysctls exist:
49117325Ssilby *
50118764Ssilby * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
51118764Ssilby * address space available to us in pipe_map.  Whenever the amount in use
52118764Ssilby * exceeds half of this value, all new pipes will be created with size
53118764Ssilby * SMALL_PIPE_SIZE, rather than PIPE_SIZE.  Big pipe creation will be limited
54118764Ssilby * as well.  This value is loader tunable only.
55117325Ssilby *
56117325Ssilby * kern.ipc.maxpipekvawired - This value limits the amount of memory that may
57117325Ssilby * be wired in order to facilitate direct copies using page flipping.
58117325Ssilby * Whenever this value is exceeded, pipes will fall back to using regular
59118764Ssilby * copies.  This value is sysctl controllable at all times.
60117325Ssilby *
61117325Ssilby * These values are autotuned in subr_param.c.
62117325Ssilby *
63117325Ssilby * Memory usage may be monitored through the sysctls
64117325Ssilby * kern.ipc.pipes, kern.ipc.pipekva and kern.ipc.pipekvawired.
65117325Ssilby *
6613907Sdyson */
6713907Sdyson
68116182Sobrien#include <sys/cdefs.h>
69116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sys_pipe.c 118929 2003-08-15 04:31:01Z jmg $");
70116182Sobrien
71101768Srwatson#include "opt_mac.h"
72101768Srwatson
7313675Sdyson#include <sys/param.h>
7413675Sdyson#include <sys/systm.h>
7524131Sbde#include <sys/fcntl.h>
7613675Sdyson#include <sys/file.h>
7713675Sdyson#include <sys/filedesc.h>
7824206Sbde#include <sys/filio.h>
7991372Salfred#include <sys/kernel.h>
8076166Smarkm#include <sys/lock.h>
81101768Srwatson#include <sys/mac.h>
8276827Salfred#include <sys/mutex.h>
8324206Sbde#include <sys/ttycom.h>
8413675Sdyson#include <sys/stat.h>
8591968Salfred#include <sys/malloc.h>
8629356Speter#include <sys/poll.h>
8770834Swollman#include <sys/selinfo.h>
8813675Sdyson#include <sys/signalvar.h>
89117325Ssilby#include <sys/sysctl.h>
9013675Sdyson#include <sys/sysproto.h>
9113675Sdyson#include <sys/pipe.h>
9276166Smarkm#include <sys/proc.h>
9355112Sbde#include <sys/vnode.h>
9434924Sbde#include <sys/uio.h>
9559288Sjlemon#include <sys/event.h>
9613675Sdyson
9713675Sdyson#include <vm/vm.h>
9813675Sdyson#include <vm/vm_param.h>
9913675Sdyson#include <vm/vm_object.h>
10013675Sdyson#include <vm/vm_kern.h>
10113675Sdyson#include <vm/vm_extern.h>
10213675Sdyson#include <vm/pmap.h>
10313675Sdyson#include <vm/vm_map.h>
10413907Sdyson#include <vm/vm_page.h>
10592751Sjeff#include <vm/uma.h>
10613675Sdyson
10714037Sdyson/*
10814037Sdyson * Use this define if you want to disable *fancy* VM things.  Expect an
10914037Sdyson * approx 30% decrease in transfer rate.  This could be useful for
11014037Sdyson * NetBSD or OpenBSD.
11114037Sdyson */
11214037Sdyson/* #define PIPE_NODIRECT */
11314037Sdyson
11414037Sdyson/*
11514037Sdyson * interfaces to the outside world
11614037Sdyson */
117108255Sphkstatic fo_rdwr_t	pipe_read;
118108255Sphkstatic fo_rdwr_t	pipe_write;
119108255Sphkstatic fo_ioctl_t	pipe_ioctl;
120108255Sphkstatic fo_poll_t	pipe_poll;
121108255Sphkstatic fo_kqfilter_t	pipe_kqfilter;
122108255Sphkstatic fo_stat_t	pipe_stat;
123108255Sphkstatic fo_close_t	pipe_close;
12413675Sdyson
12572521Sjlemonstatic struct fileops pipeops = {
126116546Sphk	.fo_read = pipe_read,
127116546Sphk	.fo_write = pipe_write,
128116546Sphk	.fo_ioctl = pipe_ioctl,
129116546Sphk	.fo_poll = pipe_poll,
130116546Sphk	.fo_kqfilter = pipe_kqfilter,
131116546Sphk	.fo_stat = pipe_stat,
132116546Sphk	.fo_close = pipe_close,
133116546Sphk	.fo_flags = DFLAG_PASSABLE
13472521Sjlemon};
13513675Sdyson
13659288Sjlemonstatic void	filt_pipedetach(struct knote *kn);
13759288Sjlemonstatic int	filt_piperead(struct knote *kn, long hint);
13859288Sjlemonstatic int	filt_pipewrite(struct knote *kn, long hint);
13959288Sjlemon
14072521Sjlemonstatic struct filterops pipe_rfiltops =
14172521Sjlemon	{ 1, NULL, filt_pipedetach, filt_piperead };
14272521Sjlemonstatic struct filterops pipe_wfiltops =
14372521Sjlemon	{ 1, NULL, filt_pipedetach, filt_pipewrite };
14459288Sjlemon
14592305Salfred#define PIPE_GET_GIANT(pipe)						\
14691362Salfred	do {								\
14792305Salfred		KASSERT(((pipe)->pipe_state & PIPE_LOCKFL) != 0,	\
14892305Salfred		    ("%s:%d PIPE_GET_GIANT: line pipe not locked",	\
14992305Salfred		     __FILE__, __LINE__));				\
15092305Salfred		PIPE_UNLOCK(pipe);					\
15191362Salfred		mtx_lock(&Giant);					\
15291362Salfred	} while (0)
15372521Sjlemon
15491362Salfred#define PIPE_DROP_GIANT(pipe)						\
15591362Salfred	do {								\
15691362Salfred		mtx_unlock(&Giant);					\
15792305Salfred		PIPE_LOCK(pipe);					\
15891362Salfred	} while (0)
15991362Salfred
16013675Sdyson/*
16113675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe
16213675Sdyson * space is pageable.  The pipe code will try to maintain locality of
16313675Sdyson * reference for performance reasons, so small amounts of outstanding I/O
16413675Sdyson * will not wipe the cache.
16513675Sdyson */
16613907Sdyson#define MINPIPESIZE (PIPE_SIZE/3)
16713907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3)
16813675Sdyson
16913907Sdyson/*
17017163Sdyson * Limit the number of "big" pipes
17117163Sdyson */
17217163Sdyson#define LIMITBIGPIPES	32
17333181Seivindstatic int nbigpipe;
17417163Sdyson
175117325Ssilbystatic int amountpipes;
17617124Sbdestatic int amountpipekva;
177117325Ssilbystatic int amountpipekvawired;
17813907Sdyson
179117325SsilbySYSCTL_DECL(_kern_ipc);
180117325Ssilby
181118764SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD,
182117325Ssilby	   &maxpipekva, 0, "Pipe KVA limit");
183117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW,
184117325Ssilby	   &maxpipekvawired, 0, "Pipe KVA wired limit");
185117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD,
186117364Ssilby	   &amountpipes, 0, "Current # of pipes");
187117364SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD,
188117364Ssilby	   &nbigpipe, 0, "Current # of big pipes");
189117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD,
190117325Ssilby	   &amountpipekva, 0, "Pipe KVA usage");
191117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD,
192117325Ssilby	   &amountpipekvawired, 0, "Pipe wired KVA usage");
193117325Ssilby
19491413Salfredstatic void pipeinit(void *dummy __unused);
19591413Salfredstatic void pipeclose(struct pipe *cpipe);
19691413Salfredstatic void pipe_free_kmem(struct pipe *cpipe);
19791413Salfredstatic int pipe_create(struct pipe **cpipep);
19891413Salfredstatic __inline int pipelock(struct pipe *cpipe, int catch);
19991413Salfredstatic __inline void pipeunlock(struct pipe *cpipe);
20091413Salfredstatic __inline void pipeselwakeup(struct pipe *cpipe);
20114037Sdyson#ifndef PIPE_NODIRECT
20291413Salfredstatic int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio);
20391413Salfredstatic void pipe_destroy_write_buffer(struct pipe *wpipe);
20491413Salfredstatic int pipe_direct_write(struct pipe *wpipe, struct uio *uio);
20591413Salfredstatic void pipe_clone_write_buffer(struct pipe *wpipe);
20614037Sdyson#endif
20791413Salfredstatic int pipespace(struct pipe *cpipe, int size);
20813675Sdyson
20992751Sjeffstatic uma_zone_t pipe_zone;
21027899Sdyson
21191372SalfredSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
21291372Salfred
21391372Salfredstatic void
21491372Salfredpipeinit(void *dummy __unused)
21591372Salfred{
216118880Salc
21792654Sjeff	pipe_zone = uma_zcreate("PIPE", sizeof(struct pipe), NULL,
21892654Sjeff	    NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
219118880Salc	KASSERT(pipe_zone != NULL, ("pipe_zone not initialized"));
22091372Salfred}
22191372Salfred
22213675Sdyson/*
22313675Sdyson * The pipe system call for the DTYPE_PIPE type of pipes
22413675Sdyson */
22513675Sdyson
22613675Sdyson/* ARGSUSED */
22713675Sdysonint
22883366Sjulianpipe(td, uap)
22983366Sjulian	struct thread *td;
23013675Sdyson	struct pipe_args /* {
23113675Sdyson		int	dummy;
23213675Sdyson	} */ *uap;
23313675Sdyson{
23483366Sjulian	struct filedesc *fdp = td->td_proc->p_fd;
23513675Sdyson	struct file *rf, *wf;
23613675Sdyson	struct pipe *rpipe, *wpipe;
23791968Salfred	struct mtx *pmtx;
23813675Sdyson	int fd, error;
23927899Sdyson
240111119Simp	pmtx = malloc(sizeof(*pmtx), M_TEMP, M_WAITOK | M_ZERO);
24191968Salfred
24276756Salfred	rpipe = wpipe = NULL;
24376364Salfred	if (pipe_create(&rpipe) || pipe_create(&wpipe)) {
24476364Salfred		pipeclose(rpipe);
24576364Salfred		pipeclose(wpipe);
24691968Salfred		free(pmtx, M_TEMP);
24776364Salfred		return (ENFILE);
24876364Salfred	}
24976364Salfred
25013907Sdyson	rpipe->pipe_state |= PIPE_DIRECTOK;
25113907Sdyson	wpipe->pipe_state |= PIPE_DIRECTOK;
25213675Sdyson
25383366Sjulian	error = falloc(td, &rf, &fd);
25470915Sdwmalone	if (error) {
25570915Sdwmalone		pipeclose(rpipe);
25670915Sdwmalone		pipeclose(wpipe);
25791968Salfred		free(pmtx, M_TEMP);
25870915Sdwmalone		return (error);
25970915Sdwmalone	}
26070915Sdwmalone	fhold(rf);
26183366Sjulian	td->td_retval[0] = fd;
26270915Sdwmalone
26370803Sdwmalone	/*
26470803Sdwmalone	 * Warning: once we've gotten past allocation of the fd for the
26570803Sdwmalone	 * read-side, we can only drop the read side via fdrop() in order
26670803Sdwmalone	 * to avoid races against processes which manage to dup() the read
26770803Sdwmalone	 * side while we are blocked trying to allocate the write side.
26870803Sdwmalone	 */
26989306Salfred	FILE_LOCK(rf);
27013675Sdyson	rf->f_flag = FREAD | FWRITE;
27113675Sdyson	rf->f_type = DTYPE_PIPE;
272109153Sdillon	rf->f_data = rpipe;
27313675Sdyson	rf->f_ops = &pipeops;
27489306Salfred	FILE_UNLOCK(rf);
27583366Sjulian	error = falloc(td, &wf, &fd);
27670915Sdwmalone	if (error) {
27789306Salfred		FILEDESC_LOCK(fdp);
27883366Sjulian		if (fdp->fd_ofiles[td->td_retval[0]] == rf) {
27983366Sjulian			fdp->fd_ofiles[td->td_retval[0]] = NULL;
28089306Salfred			FILEDESC_UNLOCK(fdp);
28183366Sjulian			fdrop(rf, td);
28289306Salfred		} else
28389306Salfred			FILEDESC_UNLOCK(fdp);
28483366Sjulian		fdrop(rf, td);
28570915Sdwmalone		/* rpipe has been closed by fdrop(). */
28670915Sdwmalone		pipeclose(wpipe);
28791968Salfred		free(pmtx, M_TEMP);
28870915Sdwmalone		return (error);
28970915Sdwmalone	}
29089306Salfred	FILE_LOCK(wf);
29113675Sdyson	wf->f_flag = FREAD | FWRITE;
29213675Sdyson	wf->f_type = DTYPE_PIPE;
293109153Sdillon	wf->f_data = wpipe;
29413675Sdyson	wf->f_ops = &pipeops;
29589306Salfred	FILE_UNLOCK(wf);
29683366Sjulian	td->td_retval[1] = fd;
29713675Sdyson	rpipe->pipe_peer = wpipe;
29813675Sdyson	wpipe->pipe_peer = rpipe;
299101768Srwatson#ifdef MAC
300101768Srwatson	/*
301101768Srwatson	 * struct pipe represents a pipe endpoint.  The MAC label is shared
302101768Srwatson	 * between the connected endpoints.  As a result mac_init_pipe() and
303101768Srwatson	 * mac_create_pipe() should only be called on one of the endpoints
304101768Srwatson	 * after they have been connected.
305101768Srwatson	 */
306101768Srwatson	mac_init_pipe(rpipe);
307101768Srwatson	mac_create_pipe(td->td_ucred, rpipe);
308101768Srwatson#endif
30993818Sjhb	mtx_init(pmtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE);
31091968Salfred	rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
31183366Sjulian	fdrop(rf, td);
31213675Sdyson
31313675Sdyson	return (0);
31413675Sdyson}
31513675Sdyson
31613909Sdyson/*
31713909Sdyson * Allocate kva for pipe circular buffer, the space is pageable
31876364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails
31976364Salfred * it will retain the old buffer.
32076364Salfred * If it fails it will return ENOMEM.
32113909Sdyson */
32276364Salfredstatic int
32376364Salfredpipespace(cpipe, size)
32413675Sdyson	struct pipe *cpipe;
32576364Salfred	int size;
32613675Sdyson{
32776364Salfred	struct vm_object *object;
32876364Salfred	caddr_t buffer;
32913688Sdyson	int npages, error;
330117325Ssilby	static int curfail = 0;
331117325Ssilby	static struct timeval lastfail;
33213675Sdyson
33391412Salfred	KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
33491412Salfred	       ("pipespace: pipe mutex locked"));
33579224Sdillon
336118764Ssilby	size = round_page(size);
337118764Ssilby	npages = size / PAGE_SIZE;
33813675Sdyson	/*
33913675Sdyson	 * Create an object, I don't like the idea of paging to/from
34013675Sdyson	 * kernel_object.
34114037Sdyson	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
34213675Sdyson	 */
34376364Salfred	object = vm_object_allocate(OBJT_DEFAULT, npages);
344118764Ssilby	buffer = (caddr_t) vm_map_min(pipe_map);
34513675Sdyson
34613675Sdyson	/*
34713675Sdyson	 * Insert the object into the kernel map, and allocate kva for it.
34813675Sdyson	 * The map entry is, by default, pageable.
34914037Sdyson	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
35013675Sdyson	 */
351118764Ssilby	error = vm_map_find(pipe_map, object, 0,
35276364Salfred		(vm_offset_t *) &buffer, size, 1,
35313688Sdyson		VM_PROT_ALL, VM_PROT_ALL, 0);
35413675Sdyson
35576364Salfred	if (error != KERN_SUCCESS) {
35676364Salfred		vm_object_deallocate(object);
357118764Ssilby		if (ppsratecheck(&lastfail, &curfail, 1))
358118764Ssilby			printf("kern.maxpipekva exceeded, please see tuning(7).\n");
35976364Salfred		return (ENOMEM);
36076364Salfred	}
36176364Salfred
36276364Salfred	/* free old resources if we're resizing */
36376364Salfred	pipe_free_kmem(cpipe);
36476364Salfred	cpipe->pipe_buffer.buffer = buffer;
36576364Salfred	cpipe->pipe_buffer.size = size;
36676364Salfred	cpipe->pipe_buffer.in = 0;
36776364Salfred	cpipe->pipe_buffer.out = 0;
36876364Salfred	cpipe->pipe_buffer.cnt = 0;
369117325Ssilby	atomic_add_int(&amountpipes, 1);
370110816Salc	atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size);
37176364Salfred	return (0);
37213907Sdyson}
37313688Sdyson
37413907Sdyson/*
37513907Sdyson * initialize and allocate VM and memory for pipe
37613907Sdyson */
37776364Salfredstatic int
37876364Salfredpipe_create(cpipep)
37976364Salfred	struct pipe **cpipep;
38076364Salfred{
38113907Sdyson	struct pipe *cpipe;
38276364Salfred	int error;
38313907Sdyson
384111119Simp	*cpipep = uma_zalloc(pipe_zone, M_WAITOK);
38576364Salfred	if (*cpipep == NULL)
38676364Salfred		return (ENOMEM);
38717163Sdyson
38876364Salfred	cpipe = *cpipep;
38976364Salfred
39076364Salfred	/*
39176364Salfred	 * protect so pipeclose() doesn't follow a junk pointer
39276364Salfred	 * if pipespace() fails.
39376364Salfred	 */
39476754Salfred	bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel));
39513675Sdyson	cpipe->pipe_state = 0;
39613675Sdyson	cpipe->pipe_peer = NULL;
39713675Sdyson	cpipe->pipe_busy = 0;
39813907Sdyson
39914037Sdyson#ifndef PIPE_NODIRECT
40013907Sdyson	/*
40113907Sdyson	 * pipe data structure initializations to support direct pipe I/O
40213907Sdyson	 */
40313907Sdyson	cpipe->pipe_map.cnt = 0;
40413907Sdyson	cpipe->pipe_map.kva = 0;
40513907Sdyson	cpipe->pipe_map.pos = 0;
40613907Sdyson	cpipe->pipe_map.npages = 0;
40717124Sbde	/* cpipe->pipe_map.ms[] = invalid */
40814037Sdyson#endif
40976364Salfred
41091412Salfred	cpipe->pipe_mtxp = NULL;	/* avoid pipespace assertion */
411117325Ssilby	/*
412117325Ssilby	 * Reduce to 1/4th pipe size if we're over our global max.
413117325Ssilby	 */
414118764Ssilby	if (amountpipekva > maxpipekva / 2)
415117325Ssilby		error = pipespace(cpipe, SMALL_PIPE_SIZE);
416117325Ssilby	else
417117325Ssilby		error = pipespace(cpipe, PIPE_SIZE);
41876760Salfred	if (error)
41976364Salfred		return (error);
42076364Salfred
42176364Salfred	vfs_timestamp(&cpipe->pipe_ctime);
42276364Salfred	cpipe->pipe_atime = cpipe->pipe_ctime;
42376364Salfred	cpipe->pipe_mtime = cpipe->pipe_ctime;
42476364Salfred
42576364Salfred	return (0);
42613675Sdyson}
42713675Sdyson
42813675Sdyson
42913675Sdyson/*
43013675Sdyson * lock a pipe for I/O, blocking other access
43113675Sdyson */
43213675Sdysonstatic __inline int
43313907Sdysonpipelock(cpipe, catch)
43413675Sdyson	struct pipe *cpipe;
43513907Sdyson	int catch;
43613675Sdyson{
43713776Sdyson	int error;
43876364Salfred
43991362Salfred	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
44091362Salfred	while (cpipe->pipe_state & PIPE_LOCKFL) {
44113675Sdyson		cpipe->pipe_state |= PIPE_LWANT;
44291362Salfred		error = msleep(cpipe, PIPE_MTX(cpipe),
44391362Salfred		    catch ? (PRIBIO | PCATCH) : PRIBIO,
44476760Salfred		    "pipelk", 0);
44576760Salfred		if (error != 0)
44676760Salfred			return (error);
44713675Sdyson	}
44891362Salfred	cpipe->pipe_state |= PIPE_LOCKFL;
44976760Salfred	return (0);
45013675Sdyson}
45113675Sdyson
45213675Sdyson/*
45313675Sdyson * unlock a pipe I/O lock
45413675Sdyson */
45513675Sdysonstatic __inline void
45613675Sdysonpipeunlock(cpipe)
45713675Sdyson	struct pipe *cpipe;
45813675Sdyson{
45976364Salfred
46091362Salfred	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
46191362Salfred	cpipe->pipe_state &= ~PIPE_LOCKFL;
46213675Sdyson	if (cpipe->pipe_state & PIPE_LWANT) {
46313675Sdyson		cpipe->pipe_state &= ~PIPE_LWANT;
46414177Sdyson		wakeup(cpipe);
46513675Sdyson	}
46613675Sdyson}
46713675Sdyson
46814037Sdysonstatic __inline void
46914037Sdysonpipeselwakeup(cpipe)
47014037Sdyson	struct pipe *cpipe;
47114037Sdyson{
47276364Salfred
47314037Sdyson	if (cpipe->pipe_state & PIPE_SEL) {
47414037Sdyson		cpipe->pipe_state &= ~PIPE_SEL;
47514037Sdyson		selwakeup(&cpipe->pipe_sel);
47614037Sdyson	}
47741086Struckman	if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio)
47895883Salfred		pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
47959288Sjlemon	KNOTE(&cpipe->pipe_sel.si_note, 0);
48014037Sdyson}
48114037Sdyson
48213675Sdyson/* ARGSUSED */
48313675Sdysonstatic int
484101941Srwatsonpipe_read(fp, uio, active_cred, flags, td)
48513675Sdyson	struct file *fp;
48613675Sdyson	struct uio *uio;
487101941Srwatson	struct ucred *active_cred;
48883366Sjulian	struct thread *td;
48945311Sdt	int flags;
49013675Sdyson{
491109153Sdillon	struct pipe *rpipe = fp->f_data;
49247748Salc	int error;
49313675Sdyson	int nread = 0;
49418863Sdyson	u_int size;
49513675Sdyson
49691362Salfred	PIPE_LOCK(rpipe);
49713675Sdyson	++rpipe->pipe_busy;
49847748Salc	error = pipelock(rpipe, 1);
49947748Salc	if (error)
50047748Salc		goto unlocked_error;
50147748Salc
502101768Srwatson#ifdef MAC
503102115Srwatson	error = mac_check_pipe_read(active_cred, rpipe);
504101768Srwatson	if (error)
505101768Srwatson		goto locked_error;
506101768Srwatson#endif
507101768Srwatson
50813675Sdyson	while (uio->uio_resid) {
50913907Sdyson		/*
51013907Sdyson		 * normal pipe buffer receive
51113907Sdyson		 */
51213675Sdyson		if (rpipe->pipe_buffer.cnt > 0) {
51318863Sdyson			size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
51413675Sdyson			if (size > rpipe->pipe_buffer.cnt)
51513675Sdyson				size = rpipe->pipe_buffer.cnt;
51618863Sdyson			if (size > (u_int) uio->uio_resid)
51718863Sdyson				size = (u_int) uio->uio_resid;
51847748Salc
51991362Salfred			PIPE_UNLOCK(rpipe);
520116127Smux			error = uiomove(
521116127Smux			    &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
522116127Smux			    size, uio);
52391362Salfred			PIPE_LOCK(rpipe);
52476760Salfred			if (error)
52513675Sdyson				break;
52676760Salfred
52713675Sdyson			rpipe->pipe_buffer.out += size;
52813675Sdyson			if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
52913675Sdyson				rpipe->pipe_buffer.out = 0;
53013675Sdyson
53113675Sdyson			rpipe->pipe_buffer.cnt -= size;
53247748Salc
53347748Salc			/*
53447748Salc			 * If there is no more to read in the pipe, reset
53547748Salc			 * its pointers to the beginning.  This improves
53647748Salc			 * cache hit stats.
53747748Salc			 */
53847748Salc			if (rpipe->pipe_buffer.cnt == 0) {
53947748Salc				rpipe->pipe_buffer.in = 0;
54047748Salc				rpipe->pipe_buffer.out = 0;
54147748Salc			}
54213675Sdyson			nread += size;
54314037Sdyson#ifndef PIPE_NODIRECT
54413907Sdyson		/*
54513907Sdyson		 * Direct copy, bypassing a kernel buffer.
54613907Sdyson		 */
54713907Sdyson		} else if ((size = rpipe->pipe_map.cnt) &&
54847748Salc			   (rpipe->pipe_state & PIPE_DIRECTW)) {
54947748Salc			caddr_t	va;
55018863Sdyson			if (size > (u_int) uio->uio_resid)
55118863Sdyson				size = (u_int) uio->uio_resid;
55247748Salc
55376760Salfred			va = (caddr_t) rpipe->pipe_map.kva +
55476760Salfred			    rpipe->pipe_map.pos;
55591362Salfred			PIPE_UNLOCK(rpipe);
55647748Salc			error = uiomove(va, size, uio);
55791362Salfred			PIPE_LOCK(rpipe);
55813907Sdyson			if (error)
55913907Sdyson				break;
56013907Sdyson			nread += size;
56113907Sdyson			rpipe->pipe_map.pos += size;
56213907Sdyson			rpipe->pipe_map.cnt -= size;
56313907Sdyson			if (rpipe->pipe_map.cnt == 0) {
56413907Sdyson				rpipe->pipe_state &= ~PIPE_DIRECTW;
56513907Sdyson				wakeup(rpipe);
56613907Sdyson			}
56714037Sdyson#endif
56813675Sdyson		} else {
56913675Sdyson			/*
57013675Sdyson			 * detect EOF condition
57176760Salfred			 * read returns 0 on EOF, no need to set error
57213675Sdyson			 */
57376760Salfred			if (rpipe->pipe_state & PIPE_EOF)
57413675Sdyson				break;
57543623Sdillon
57613675Sdyson			/*
57713675Sdyson			 * If the "write-side" has been blocked, wake it up now.
57813675Sdyson			 */
57913675Sdyson			if (rpipe->pipe_state & PIPE_WANTW) {
58013675Sdyson				rpipe->pipe_state &= ~PIPE_WANTW;
58113675Sdyson				wakeup(rpipe);
58213675Sdyson			}
58343623Sdillon
58443623Sdillon			/*
58547748Salc			 * Break if some data was read.
58643623Sdillon			 */
58747748Salc			if (nread > 0)
58813675Sdyson				break;
58916960Sdyson
59043623Sdillon			/*
591116127Smux			 * Unlock the pipe buffer for our remaining processing.
592116127Smux			 * We will either break out with an error or we will
593116127Smux			 * sleep and relock to loop.
59443623Sdillon			 */
59547748Salc			pipeunlock(rpipe);
59643623Sdillon
59713675Sdyson			/*
59847748Salc			 * Handle non-blocking mode operation or
59947748Salc			 * wait for more data.
60013675Sdyson			 */
60176760Salfred			if (fp->f_flag & FNONBLOCK) {
60247748Salc				error = EAGAIN;
60376760Salfred			} else {
60447748Salc				rpipe->pipe_state |= PIPE_WANTR;
60591362Salfred				if ((error = msleep(rpipe, PIPE_MTX(rpipe),
60691362Salfred				    PRIBIO | PCATCH,
60777140Salfred				    "piperd", 0)) == 0)
60847748Salc					error = pipelock(rpipe, 1);
60913675Sdyson			}
61047748Salc			if (error)
61147748Salc				goto unlocked_error;
61213675Sdyson		}
61313675Sdyson	}
614101768Srwatson#ifdef MAC
615101768Srwatsonlocked_error:
616101768Srwatson#endif
61747748Salc	pipeunlock(rpipe);
61813675Sdyson
61991362Salfred	/* XXX: should probably do this before getting any locks. */
62024101Sbde	if (error == 0)
62155112Sbde		vfs_timestamp(&rpipe->pipe_atime);
62247748Salcunlocked_error:
62347748Salc	--rpipe->pipe_busy;
62413913Sdyson
62547748Salc	/*
62647748Salc	 * PIPE_WANT processing only makes sense if pipe_busy is 0.
62747748Salc	 */
62813675Sdyson	if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
62913675Sdyson		rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
63013675Sdyson		wakeup(rpipe);
63113675Sdyson	} else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
63213675Sdyson		/*
63347748Salc		 * Handle write blocking hysteresis.
63413675Sdyson		 */
63513675Sdyson		if (rpipe->pipe_state & PIPE_WANTW) {
63613675Sdyson			rpipe->pipe_state &= ~PIPE_WANTW;
63713675Sdyson			wakeup(rpipe);
63813675Sdyson		}
63913675Sdyson	}
64014037Sdyson
64114802Sdyson	if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF)
64214037Sdyson		pipeselwakeup(rpipe);
64314037Sdyson
64491362Salfred	PIPE_UNLOCK(rpipe);
64576760Salfred	return (error);
64613675Sdyson}
64713675Sdyson
64814037Sdyson#ifndef PIPE_NODIRECT
64913907Sdyson/*
65013907Sdyson * Map the sending processes' buffer into kernel space and wire it.
65113907Sdyson * This is similar to a physical write operation.
65213907Sdyson */
65313675Sdysonstatic int
65413907Sdysonpipe_build_write_buffer(wpipe, uio)
65513907Sdyson	struct pipe *wpipe;
65613675Sdyson	struct uio *uio;
65713675Sdyson{
65818863Sdyson	u_int size;
65994566Stmm	int i;
660112569Sjake	vm_offset_t addr, endaddr;
661112569Sjake	vm_paddr_t paddr;
66213907Sdyson
66379224Sdillon	GIANT_REQUIRED;
66491412Salfred	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
66579224Sdillon
66618863Sdyson	size = (u_int) uio->uio_iov->iov_len;
66713907Sdyson	if (size > wpipe->pipe_buffer.size)
66813907Sdyson		size = wpipe->pipe_buffer.size;
66913907Sdyson
67040286Sdg	endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
67176760Salfred	addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
67276760Salfred	for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
67394566Stmm		vm_page_t m;
67494566Stmm
67599899Salc		/*
67699899Salc		 * vm_fault_quick() can sleep.  Consequently,
67799899Salc		 * vm_page_lock_queue() and vm_page_unlock_queue()
67899899Salc		 * should not be performed outside of this loop.
67999899Salc		 */
68051474Sdillon		if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 ||
68194608Stmm		    (paddr = pmap_extract(vmspace_pmap(curproc->p_vmspace),
68294608Stmm		     addr)) == 0) {
68313907Sdyson			int j;
68476760Salfred
68599899Salc			vm_page_lock_queues();
686117325Ssilby			for (j = 0; j < i; j++) {
687118757Salc				vm_page_unhold(wpipe->pipe_map.ms[j]);
688117325Ssilby			}
68999899Salc			vm_page_unlock_queues();
69076760Salfred			return (EFAULT);
69113907Sdyson		}
69213907Sdyson
69394566Stmm		m = PHYS_TO_VM_PAGE(paddr);
69499899Salc		vm_page_lock_queues();
695118757Salc		vm_page_hold(m);
69699899Salc		vm_page_unlock_queues();
69713907Sdyson		wpipe->pipe_map.ms[i] = m;
69813907Sdyson	}
69913907Sdyson
70013907Sdyson/*
70113907Sdyson * set up the control block
70213907Sdyson */
70313907Sdyson	wpipe->pipe_map.npages = i;
70476760Salfred	wpipe->pipe_map.pos =
70576760Salfred	    ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
70613907Sdyson	wpipe->pipe_map.cnt = size;
70713907Sdyson
70813907Sdyson/*
70913907Sdyson * and map the buffer
71013907Sdyson */
71113907Sdyson	if (wpipe->pipe_map.kva == 0) {
71213912Sdyson		/*
71313912Sdyson		 * We need to allocate space for an extra page because the
71413912Sdyson		 * address range might (will) span pages at times.
71513912Sdyson		 */
716118220Salc		wpipe->pipe_map.kva = kmem_alloc_nofault(kernel_map,
71713912Sdyson			wpipe->pipe_buffer.size + PAGE_SIZE);
718118764Ssilby		atomic_add_int(&amountpipekvawired,
719110816Salc		    wpipe->pipe_buffer.size + PAGE_SIZE);
72013907Sdyson	}
72113907Sdyson	pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
72213907Sdyson		wpipe->pipe_map.npages);
72313907Sdyson
72413907Sdyson/*
72513907Sdyson * and update the uio data
72613907Sdyson */
72713907Sdyson
72813907Sdyson	uio->uio_iov->iov_len -= size;
729104908Smike	uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size;
73013907Sdyson	if (uio->uio_iov->iov_len == 0)
73113907Sdyson		uio->uio_iov++;
73213907Sdyson	uio->uio_resid -= size;
73313907Sdyson	uio->uio_offset += size;
73476760Salfred	return (0);
73513907Sdyson}
73613907Sdyson
73713907Sdyson/*
73813907Sdyson * unmap and unwire the process buffer
73913907Sdyson */
74013907Sdysonstatic void
74113907Sdysonpipe_destroy_write_buffer(wpipe)
74276760Salfred	struct pipe *wpipe;
74313907Sdyson{
74413907Sdyson	int i;
74576364Salfred
74679224Sdillon	GIANT_REQUIRED;
74791412Salfred	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
74879224Sdillon
74917163Sdyson	if (wpipe->pipe_map.kva) {
75017163Sdyson		pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages);
75113907Sdyson
752118764Ssilby		if (amountpipekvawired > maxpipekvawired / 2) {
753118764Ssilby			/* Conserve address space */
75413907Sdyson			vm_offset_t kva = wpipe->pipe_map.kva;
75513907Sdyson			wpipe->pipe_map.kva = 0;
75613907Sdyson			kmem_free(kernel_map, kva,
75713912Sdyson				wpipe->pipe_buffer.size + PAGE_SIZE);
758118764Ssilby			atomic_subtract_int(&amountpipekvawired,
759110816Salc			    wpipe->pipe_buffer.size + PAGE_SIZE);
76013907Sdyson		}
76113907Sdyson	}
76299899Salc	vm_page_lock_queues();
763117325Ssilby	for (i = 0; i < wpipe->pipe_map.npages; i++) {
764118757Salc		vm_page_unhold(wpipe->pipe_map.ms[i]);
765117325Ssilby	}
76699899Salc	vm_page_unlock_queues();
76791653Stanimura	wpipe->pipe_map.npages = 0;
76813907Sdyson}
76913907Sdyson
77013907Sdyson/*
77113907Sdyson * In the case of a signal, the writing process might go away.  This
77213907Sdyson * code copies the data into the circular buffer so that the source
77313907Sdyson * pages can be freed without loss of data.
77413907Sdyson */
77513907Sdysonstatic void
77613907Sdysonpipe_clone_write_buffer(wpipe)
77776364Salfred	struct pipe *wpipe;
77813907Sdyson{
77913907Sdyson	int size;
78013907Sdyson	int pos;
78113907Sdyson
78291362Salfred	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
78313907Sdyson	size = wpipe->pipe_map.cnt;
78413907Sdyson	pos = wpipe->pipe_map.pos;
78513907Sdyson
78613907Sdyson	wpipe->pipe_buffer.in = size;
78713907Sdyson	wpipe->pipe_buffer.out = 0;
78813907Sdyson	wpipe->pipe_buffer.cnt = size;
78913907Sdyson	wpipe->pipe_state &= ~PIPE_DIRECTW;
79013907Sdyson
79191412Salfred	PIPE_GET_GIANT(wpipe);
79292959Salfred	bcopy((caddr_t) wpipe->pipe_map.kva + pos,
793100527Salfred	    wpipe->pipe_buffer.buffer, size);
79413907Sdyson	pipe_destroy_write_buffer(wpipe);
79591412Salfred	PIPE_DROP_GIANT(wpipe);
79613907Sdyson}
79713907Sdyson
79813907Sdyson/*
79913907Sdyson * This implements the pipe buffer write mechanism.  Note that only
80013907Sdyson * a direct write OR a normal pipe write can be pending at any given time.
80113907Sdyson * If there are any characters in the pipe buffer, the direct write will
80213907Sdyson * be deferred until the receiving process grabs all of the bytes from
80313907Sdyson * the pipe buffer.  Then the direct mapping write is set-up.
80413907Sdyson */
80513907Sdysonstatic int
80613907Sdysonpipe_direct_write(wpipe, uio)
80713907Sdyson	struct pipe *wpipe;
80813907Sdyson	struct uio *uio;
80913907Sdyson{
81013907Sdyson	int error;
81176364Salfred
81213951Sdysonretry:
81391362Salfred	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
81413907Sdyson	while (wpipe->pipe_state & PIPE_DIRECTW) {
81576760Salfred		if (wpipe->pipe_state & PIPE_WANTR) {
81613951Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
81713951Sdyson			wakeup(wpipe);
81813951Sdyson		}
81913992Sdyson		wpipe->pipe_state |= PIPE_WANTW;
82091362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe),
82191362Salfred		    PRIBIO | PCATCH, "pipdww", 0);
82214802Sdyson		if (error)
82313907Sdyson			goto error1;
82414802Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
82514802Sdyson			error = EPIPE;
82614802Sdyson			goto error1;
82714802Sdyson		}
82813907Sdyson	}
82913907Sdyson	wpipe->pipe_map.cnt = 0;	/* transfer not ready yet */
83013951Sdyson	if (wpipe->pipe_buffer.cnt > 0) {
83176760Salfred		if (wpipe->pipe_state & PIPE_WANTR) {
83213951Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
83313951Sdyson			wakeup(wpipe);
83413951Sdyson		}
83513951Sdyson
83613992Sdyson		wpipe->pipe_state |= PIPE_WANTW;
83791362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe),
83891362Salfred		    PRIBIO | PCATCH, "pipdwc", 0);
83914802Sdyson		if (error)
84013907Sdyson			goto error1;
84114802Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
84214802Sdyson			error = EPIPE;
84314802Sdyson			goto error1;
84413907Sdyson		}
84513951Sdyson		goto retry;
84613907Sdyson	}
84713907Sdyson
84813951Sdyson	wpipe->pipe_state |= PIPE_DIRECTW;
84913951Sdyson
85092305Salfred	pipelock(wpipe, 0);
85191362Salfred	PIPE_GET_GIANT(wpipe);
85213907Sdyson	error = pipe_build_write_buffer(wpipe, uio);
85391362Salfred	PIPE_DROP_GIANT(wpipe);
85492305Salfred	pipeunlock(wpipe);
85513907Sdyson	if (error) {
85613907Sdyson		wpipe->pipe_state &= ~PIPE_DIRECTW;
85713907Sdyson		goto error1;
85813907Sdyson	}
85913907Sdyson
86013907Sdyson	error = 0;
86113907Sdyson	while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
86213907Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
86313907Sdyson			pipelock(wpipe, 0);
86491362Salfred			PIPE_GET_GIANT(wpipe);
86513907Sdyson			pipe_destroy_write_buffer(wpipe);
86691362Salfred			PIPE_DROP_GIANT(wpipe);
867112981Shsu			pipeselwakeup(wpipe);
86813907Sdyson			pipeunlock(wpipe);
86914802Sdyson			error = EPIPE;
87014802Sdyson			goto error1;
87113907Sdyson		}
87213992Sdyson		if (wpipe->pipe_state & PIPE_WANTR) {
87313992Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
87413992Sdyson			wakeup(wpipe);
87513992Sdyson		}
87614037Sdyson		pipeselwakeup(wpipe);
87791362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
87891362Salfred		    "pipdwt", 0);
87913907Sdyson	}
88013907Sdyson
88113907Sdyson	pipelock(wpipe,0);
88213907Sdyson	if (wpipe->pipe_state & PIPE_DIRECTW) {
88313907Sdyson		/*
88413907Sdyson		 * this bit of trickery substitutes a kernel buffer for
88513907Sdyson		 * the process that might be going away.
88613907Sdyson		 */
88713907Sdyson		pipe_clone_write_buffer(wpipe);
88813907Sdyson	} else {
88991412Salfred		PIPE_GET_GIANT(wpipe);
89013907Sdyson		pipe_destroy_write_buffer(wpipe);
89191412Salfred		PIPE_DROP_GIANT(wpipe);
89213907Sdyson	}
89313907Sdyson	pipeunlock(wpipe);
89476760Salfred	return (error);
89513907Sdyson
89613907Sdysonerror1:
89713907Sdyson	wakeup(wpipe);
89876760Salfred	return (error);
89913907Sdyson}
90014037Sdyson#endif
90113907Sdyson
90216960Sdysonstatic int
903101941Srwatsonpipe_write(fp, uio, active_cred, flags, td)
90416960Sdyson	struct file *fp;
90513907Sdyson	struct uio *uio;
906101941Srwatson	struct ucred *active_cred;
90783366Sjulian	struct thread *td;
90845311Sdt	int flags;
90913907Sdyson{
91013675Sdyson	int error = 0;
91113913Sdyson	int orig_resid;
91216960Sdyson	struct pipe *wpipe, *rpipe;
91316960Sdyson
914109153Sdillon	rpipe = fp->f_data;
91516960Sdyson	wpipe = rpipe->pipe_peer;
91616960Sdyson
91791395Salfred	PIPE_LOCK(rpipe);
91813675Sdyson	/*
91913675Sdyson	 * detect loss of pipe read side, issue SIGPIPE if lost.
92013675Sdyson	 */
92116960Sdyson	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
92291395Salfred		PIPE_UNLOCK(rpipe);
92376760Salfred		return (EPIPE);
92413675Sdyson	}
925101768Srwatson#ifdef MAC
926102115Srwatson	error = mac_check_pipe_write(active_cred, wpipe);
927101768Srwatson	if (error) {
928101768Srwatson		PIPE_UNLOCK(rpipe);
929101768Srwatson		return (error);
930101768Srwatson	}
931101768Srwatson#endif
93277676Sdillon	++wpipe->pipe_busy;
93313675Sdyson
93417163Sdyson	/*
93517163Sdyson	 * If it is advantageous to resize the pipe buffer, do
93617163Sdyson	 * so.
93717163Sdyson	 */
93817163Sdyson	if ((uio->uio_resid > PIPE_SIZE) &&
939118764Ssilby		(amountpipekva < maxpipekva / 2) &&
94017163Sdyson		(nbigpipe < LIMITBIGPIPES) &&
94117163Sdyson		(wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
94217163Sdyson		(wpipe->pipe_buffer.size <= PIPE_SIZE) &&
94317163Sdyson		(wpipe->pipe_buffer.cnt == 0)) {
94417163Sdyson
945105009Salfred		if ((error = pipelock(wpipe, 1)) == 0) {
946118799Salc			PIPE_UNLOCK(wpipe);
94776364Salfred			if (pipespace(wpipe, BIG_PIPE_SIZE) == 0)
948117364Ssilby				atomic_add_int(&nbigpipe, 1);
949118799Salc			PIPE_LOCK(wpipe);
95013907Sdyson			pipeunlock(wpipe);
95113907Sdyson		}
95213907Sdyson	}
95377676Sdillon
95477676Sdillon	/*
95577676Sdillon	 * If an early error occured unbusy and return, waking up any pending
95677676Sdillon	 * readers.
95777676Sdillon	 */
95877676Sdillon	if (error) {
95977676Sdillon		--wpipe->pipe_busy;
96077676Sdillon		if ((wpipe->pipe_busy == 0) &&
96177676Sdillon		    (wpipe->pipe_state & PIPE_WANT)) {
96277676Sdillon			wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
96377676Sdillon			wakeup(wpipe);
96477676Sdillon		}
96591395Salfred		PIPE_UNLOCK(rpipe);
96677676Sdillon		return(error);
96777676Sdillon	}
96876364Salfred
96913913Sdyson	orig_resid = uio->uio_resid;
97077676Sdillon
97113675Sdyson	while (uio->uio_resid) {
97213907Sdyson		int space;
97376760Salfred
97414037Sdyson#ifndef PIPE_NODIRECT
97513907Sdyson		/*
97613907Sdyson		 * If the transfer is large, we can gain performance if
97713907Sdyson		 * we do process-to-process copies directly.
97816416Sdyson		 * If the write is non-blocking, we don't use the
97916416Sdyson		 * direct write mechanism.
98058505Sdillon		 *
98158505Sdillon		 * The direct write mechanism will detect the reader going
98258505Sdillon		 * away on us.
98313907Sdyson		 */
98417163Sdyson		if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
98517163Sdyson		    (fp->f_flag & FNONBLOCK) == 0 &&
986118764Ssilby		    amountpipekvawired + uio->uio_resid < maxpipekvawired) {
987105009Salfred			error = pipe_direct_write(wpipe, uio);
98876760Salfred			if (error)
98913907Sdyson				break;
99013907Sdyson			continue;
99191362Salfred		}
99214037Sdyson#endif
99313907Sdyson
99413907Sdyson		/*
99513907Sdyson		 * Pipe buffered writes cannot be coincidental with
99613907Sdyson		 * direct writes.  We wait until the currently executing
99713907Sdyson		 * direct write is completed before we start filling the
99858505Sdillon		 * pipe buffer.  We break out if a signal occurs or the
99958505Sdillon		 * reader goes away.
100013907Sdyson		 */
100113907Sdyson	retrywrite:
100213907Sdyson		while (wpipe->pipe_state & PIPE_DIRECTW) {
100313992Sdyson			if (wpipe->pipe_state & PIPE_WANTR) {
100413992Sdyson				wpipe->pipe_state &= ~PIPE_WANTR;
100513992Sdyson				wakeup(wpipe);
100613992Sdyson			}
100791395Salfred			error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH,
100891362Salfred			    "pipbww", 0);
100958505Sdillon			if (wpipe->pipe_state & PIPE_EOF)
101058505Sdillon				break;
101113907Sdyson			if (error)
101213907Sdyson				break;
101313907Sdyson		}
101458505Sdillon		if (wpipe->pipe_state & PIPE_EOF) {
101558505Sdillon			error = EPIPE;
101658505Sdillon			break;
101758505Sdillon		}
101813907Sdyson
101913907Sdyson		space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
102014644Sdyson
102114644Sdyson		/* Writes of size <= PIPE_BUF must be atomic. */
102213913Sdyson		if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF))
102313913Sdyson			space = 0;
102413907Sdyson
1025118230Spb		if (space > 0) {
102613907Sdyson			if ((error = pipelock(wpipe,1)) == 0) {
102754534Stegge				int size;	/* Transfer size */
102854534Stegge				int segsize;	/* first segment to transfer */
102976760Salfred
103013907Sdyson				/*
103113907Sdyson				 * It is possible for a direct write to
103213907Sdyson				 * slip in on us... handle it here...
103313907Sdyson				 */
103413907Sdyson				if (wpipe->pipe_state & PIPE_DIRECTW) {
103513907Sdyson					pipeunlock(wpipe);
103613907Sdyson					goto retrywrite;
103713907Sdyson				}
103854534Stegge				/*
103954534Stegge				 * If a process blocked in uiomove, our
104054534Stegge				 * value for space might be bad.
104158505Sdillon				 *
104258505Sdillon				 * XXX will we be ok if the reader has gone
104358505Sdillon				 * away here?
104454534Stegge				 */
104554534Stegge				if (space > wpipe->pipe_buffer.size -
104654534Stegge				    wpipe->pipe_buffer.cnt) {
104754534Stegge					pipeunlock(wpipe);
104854534Stegge					goto retrywrite;
104954534Stegge				}
105054534Stegge
105154534Stegge				/*
105254534Stegge				 * Transfer size is minimum of uio transfer
105354534Stegge				 * and free space in pipe buffer.
105454534Stegge				 */
105554534Stegge				if (space > uio->uio_resid)
105654534Stegge					size = uio->uio_resid;
105754534Stegge				else
105854534Stegge					size = space;
105954534Stegge				/*
106054534Stegge				 * First segment to transfer is minimum of
106154534Stegge				 * transfer size and contiguous space in
106254534Stegge				 * pipe buffer.  If first segment to transfer
106354534Stegge				 * is less than the transfer size, we've got
106454534Stegge				 * a wraparound in the buffer.
106554534Stegge				 */
106654534Stegge				segsize = wpipe->pipe_buffer.size -
106754534Stegge					wpipe->pipe_buffer.in;
106854534Stegge				if (segsize > size)
106954534Stegge					segsize = size;
107054534Stegge
107154534Stegge				/* Transfer first segment */
107254534Stegge
107391395Salfred				PIPE_UNLOCK(rpipe);
107454534Stegge				error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
107554534Stegge						segsize, uio);
107691395Salfred				PIPE_LOCK(rpipe);
107754534Stegge
107854534Stegge				if (error == 0 && segsize < size) {
107954534Stegge					/*
108054534Stegge					 * Transfer remaining part now, to
108154534Stegge					 * support atomic writes.  Wraparound
108254534Stegge					 * happened.
108354534Stegge					 */
108454534Stegge					if (wpipe->pipe_buffer.in + segsize !=
108554534Stegge					    wpipe->pipe_buffer.size)
1086116127Smux						panic("Expected pipe buffer "
1087116127Smux						    "wraparound disappeared");
108854534Stegge
108991395Salfred					PIPE_UNLOCK(rpipe);
1090116127Smux					error = uiomove(
1091116127Smux					    &wpipe->pipe_buffer.buffer[0],
1092116127Smux				    	    size - segsize, uio);
109391395Salfred					PIPE_LOCK(rpipe);
109454534Stegge				}
109554534Stegge				if (error == 0) {
109654534Stegge					wpipe->pipe_buffer.in += size;
109754534Stegge					if (wpipe->pipe_buffer.in >=
109854534Stegge					    wpipe->pipe_buffer.size) {
1099116127Smux						if (wpipe->pipe_buffer.in !=
1100116127Smux						    size - segsize +
1101116127Smux						    wpipe->pipe_buffer.size)
1102116127Smux							panic("Expected "
1103116127Smux							    "wraparound bad");
1104116127Smux						wpipe->pipe_buffer.in = size -
1105116127Smux						    segsize;
110654534Stegge					}
110754534Stegge
110854534Stegge					wpipe->pipe_buffer.cnt += size;
1109116127Smux					if (wpipe->pipe_buffer.cnt >
1110116127Smux					    wpipe->pipe_buffer.size)
111154534Stegge						panic("Pipe buffer overflow");
111254534Stegge
111354534Stegge				}
111413675Sdyson				pipeunlock(wpipe);
111513675Sdyson			}
111613675Sdyson			if (error)
111713675Sdyson				break;
111813675Sdyson
111913675Sdyson		} else {
112013675Sdyson			/*
112113675Sdyson			 * If the "read-side" has been blocked, wake it up now.
112213675Sdyson			 */
112313675Sdyson			if (wpipe->pipe_state & PIPE_WANTR) {
112413675Sdyson				wpipe->pipe_state &= ~PIPE_WANTR;
112513675Sdyson				wakeup(wpipe);
112613675Sdyson			}
112714037Sdyson
112813675Sdyson			/*
112913675Sdyson			 * don't block on non-blocking I/O
113013675Sdyson			 */
113116960Sdyson			if (fp->f_flag & FNONBLOCK) {
113213907Sdyson				error = EAGAIN;
113313675Sdyson				break;
113413675Sdyson			}
113513907Sdyson
113614037Sdyson			/*
113714037Sdyson			 * We have no more space and have something to offer,
113829356Speter			 * wake up select/poll.
113914037Sdyson			 */
114014037Sdyson			pipeselwakeup(wpipe);
114114037Sdyson
114213675Sdyson			wpipe->pipe_state |= PIPE_WANTW;
114391395Salfred			error = msleep(wpipe, PIPE_MTX(rpipe),
114491362Salfred			    PRIBIO | PCATCH, "pipewr", 0);
114576760Salfred			if (error != 0)
114613675Sdyson				break;
114713675Sdyson			/*
114813675Sdyson			 * If read side wants to go away, we just issue a signal
114913675Sdyson			 * to ourselves.
115013675Sdyson			 */
115113675Sdyson			if (wpipe->pipe_state & PIPE_EOF) {
115213774Sdyson				error = EPIPE;
115313907Sdyson				break;
115413675Sdyson			}
115513675Sdyson		}
115613675Sdyson	}
115713675Sdyson
115814644Sdyson	--wpipe->pipe_busy;
115977676Sdillon
116076760Salfred	if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
116176760Salfred		wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
116213675Sdyson		wakeup(wpipe);
116313675Sdyson	} else if (wpipe->pipe_buffer.cnt > 0) {
116413675Sdyson		/*
116513675Sdyson		 * If we have put any characters in the buffer, we wake up
116613675Sdyson		 * the reader.
116713675Sdyson		 */
116813675Sdyson		if (wpipe->pipe_state & PIPE_WANTR) {
116913675Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
117013675Sdyson			wakeup(wpipe);
117113675Sdyson		}
117213675Sdyson	}
117313909Sdyson
117413909Sdyson	/*
117513909Sdyson	 * Don't return EPIPE if I/O was successful
117613909Sdyson	 */
117713907Sdyson	if ((wpipe->pipe_buffer.cnt == 0) &&
117877676Sdillon	    (uio->uio_resid == 0) &&
117977676Sdillon	    (error == EPIPE)) {
118013907Sdyson		error = 0;
118177676Sdillon	}
118213913Sdyson
118324101Sbde	if (error == 0)
118455112Sbde		vfs_timestamp(&wpipe->pipe_mtime);
118524101Sbde
118614037Sdyson	/*
118714037Sdyson	 * We have something to offer,
118829356Speter	 * wake up select/poll.
118914037Sdyson	 */
119014177Sdyson	if (wpipe->pipe_buffer.cnt)
119114037Sdyson		pipeselwakeup(wpipe);
119213907Sdyson
119391395Salfred	PIPE_UNLOCK(rpipe);
119476760Salfred	return (error);
119513675Sdyson}
119613675Sdyson
119713675Sdyson/*
119813675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets.
119913675Sdyson */
1200104094Sphkstatic int
1201102003Srwatsonpipe_ioctl(fp, cmd, data, active_cred, td)
120213675Sdyson	struct file *fp;
120336735Sdfr	u_long cmd;
120499009Salfred	void *data;
1205102003Srwatson	struct ucred *active_cred;
120683366Sjulian	struct thread *td;
120713675Sdyson{
1208109153Sdillon	struct pipe *mpipe = fp->f_data;
1209101768Srwatson#ifdef MAC
1210101768Srwatson	int error;
1211104269Srwatson#endif
121213675Sdyson
1213104269Srwatson	PIPE_LOCK(mpipe);
1214104269Srwatson
1215104269Srwatson#ifdef MAC
1216102003Srwatson	error = mac_check_pipe_ioctl(active_cred, mpipe, cmd, data);
1217101768Srwatson	if (error)
1218101768Srwatson		return (error);
1219101768Srwatson#endif
1220101768Srwatson
122113675Sdyson	switch (cmd) {
122213675Sdyson
122313675Sdyson	case FIONBIO:
1224104269Srwatson		PIPE_UNLOCK(mpipe);
122513675Sdyson		return (0);
122613675Sdyson
122713675Sdyson	case FIOASYNC:
122813675Sdyson		if (*(int *)data) {
122913675Sdyson			mpipe->pipe_state |= PIPE_ASYNC;
123013675Sdyson		} else {
123113675Sdyson			mpipe->pipe_state &= ~PIPE_ASYNC;
123213675Sdyson		}
123391362Salfred		PIPE_UNLOCK(mpipe);
123413675Sdyson		return (0);
123513675Sdyson
123613675Sdyson	case FIONREAD:
123714037Sdyson		if (mpipe->pipe_state & PIPE_DIRECTW)
123814037Sdyson			*(int *)data = mpipe->pipe_map.cnt;
123914037Sdyson		else
124014037Sdyson			*(int *)data = mpipe->pipe_buffer.cnt;
124191362Salfred		PIPE_UNLOCK(mpipe);
124213675Sdyson		return (0);
124313675Sdyson
124441086Struckman	case FIOSETOWN:
1245104269Srwatson		PIPE_UNLOCK(mpipe);
124641086Struckman		return (fsetown(*(int *)data, &mpipe->pipe_sigio));
124741086Struckman
124841086Struckman	case FIOGETOWN:
1249104269Srwatson		PIPE_UNLOCK(mpipe);
1250104393Struckman		*(int *)data = fgetown(&mpipe->pipe_sigio);
125113675Sdyson		return (0);
125213675Sdyson
125341086Struckman	/* This is deprecated, FIOSETOWN should be used instead. */
125441086Struckman	case TIOCSPGRP:
1255104269Srwatson		PIPE_UNLOCK(mpipe);
125641086Struckman		return (fsetown(-(*(int *)data), &mpipe->pipe_sigio));
125741086Struckman
125841086Struckman	/* This is deprecated, FIOGETOWN should be used instead. */
125918863Sdyson	case TIOCGPGRP:
1260104269Srwatson		PIPE_UNLOCK(mpipe);
1261104393Struckman		*(int *)data = -fgetown(&mpipe->pipe_sigio);
126213675Sdyson		return (0);
126313675Sdyson
126413675Sdyson	}
1265104269Srwatson	PIPE_UNLOCK(mpipe);
126617124Sbde	return (ENOTTY);
126713675Sdyson}
126813675Sdyson
1269104094Sphkstatic int
1270101983Srwatsonpipe_poll(fp, events, active_cred, td)
127113675Sdyson	struct file *fp;
127229356Speter	int events;
1273101983Srwatson	struct ucred *active_cred;
127483366Sjulian	struct thread *td;
127513675Sdyson{
1276109153Sdillon	struct pipe *rpipe = fp->f_data;
127713675Sdyson	struct pipe *wpipe;
127829356Speter	int revents = 0;
1279101768Srwatson#ifdef MAC
1280101768Srwatson	int error;
1281101768Srwatson#endif
128213675Sdyson
128313675Sdyson	wpipe = rpipe->pipe_peer;
128491362Salfred	PIPE_LOCK(rpipe);
1285101768Srwatson#ifdef MAC
1286102115Srwatson	error = mac_check_pipe_poll(active_cred, rpipe);
1287101768Srwatson	if (error)
1288101768Srwatson		goto locked_error;
1289101768Srwatson#endif
129029356Speter	if (events & (POLLIN | POLLRDNORM))
129129356Speter		if ((rpipe->pipe_state & PIPE_DIRECTW) ||
129229356Speter		    (rpipe->pipe_buffer.cnt > 0) ||
129329356Speter		    (rpipe->pipe_state & PIPE_EOF))
129429356Speter			revents |= events & (POLLIN | POLLRDNORM);
129513675Sdyson
129629356Speter	if (events & (POLLOUT | POLLWRNORM))
129729356Speter		if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) ||
129843311Sdillon		    (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
129943311Sdillon		     (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF))
130029356Speter			revents |= events & (POLLOUT | POLLWRNORM);
130113675Sdyson
130229356Speter	if ((rpipe->pipe_state & PIPE_EOF) ||
130329356Speter	    (wpipe == NULL) ||
130429356Speter	    (wpipe->pipe_state & PIPE_EOF))
130529356Speter		revents |= POLLHUP;
130629356Speter
130729356Speter	if (revents == 0) {
130829356Speter		if (events & (POLLIN | POLLRDNORM)) {
130983805Sjhb			selrecord(td, &rpipe->pipe_sel);
131029356Speter			rpipe->pipe_state |= PIPE_SEL;
131113675Sdyson		}
131213675Sdyson
131329356Speter		if (events & (POLLOUT | POLLWRNORM)) {
131483805Sjhb			selrecord(td, &wpipe->pipe_sel);
131530164Speter			wpipe->pipe_state |= PIPE_SEL;
131613907Sdyson		}
131713675Sdyson	}
1318101768Srwatson#ifdef MAC
1319101768Srwatsonlocked_error:
1320101768Srwatson#endif
132191362Salfred	PIPE_UNLOCK(rpipe);
132229356Speter
132329356Speter	return (revents);
132413675Sdyson}
132513675Sdyson
132698989Salfred/*
132798989Salfred * We shouldn't need locks here as we're doing a read and this should
132898989Salfred * be a natural race.
132998989Salfred */
133052983Speterstatic int
1331101983Srwatsonpipe_stat(fp, ub, active_cred, td)
133252983Speter	struct file *fp;
133352983Speter	struct stat *ub;
1334101983Srwatson	struct ucred *active_cred;
133583366Sjulian	struct thread *td;
133613675Sdyson{
1337109153Sdillon	struct pipe *pipe = fp->f_data;
1338101768Srwatson#ifdef MAC
1339101768Srwatson	int error;
134052983Speter
1341104269Srwatson	PIPE_LOCK(pipe);
1342102115Srwatson	error = mac_check_pipe_stat(active_cred, pipe);
1343104269Srwatson	PIPE_UNLOCK(pipe);
1344101768Srwatson	if (error)
1345101768Srwatson		return (error);
1346101768Srwatson#endif
1347100527Salfred	bzero(ub, sizeof(*ub));
134817124Sbde	ub->st_mode = S_IFIFO;
134913907Sdyson	ub->st_blksize = pipe->pipe_buffer.size;
135013675Sdyson	ub->st_size = pipe->pipe_buffer.cnt;
135113675Sdyson	ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
135234901Sphk	ub->st_atimespec = pipe->pipe_atime;
135334901Sphk	ub->st_mtimespec = pipe->pipe_mtime;
135434901Sphk	ub->st_ctimespec = pipe->pipe_ctime;
135560404Schris	ub->st_uid = fp->f_cred->cr_uid;
135660404Schris	ub->st_gid = fp->f_cred->cr_gid;
135717124Sbde	/*
135860404Schris	 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
135917124Sbde	 * XXX (st_dev, st_ino) should be unique.
136017124Sbde	 */
136176760Salfred	return (0);
136213675Sdyson}
136313675Sdyson
136413675Sdyson/* ARGSUSED */
136513675Sdysonstatic int
136683366Sjulianpipe_close(fp, td)
136713675Sdyson	struct file *fp;
136883366Sjulian	struct thread *td;
136913675Sdyson{
1370109153Sdillon	struct pipe *cpipe = fp->f_data;
137116322Sgpalmer
137249413Sgreen	fp->f_ops = &badfileops;
1373109153Sdillon	fp->f_data = NULL;
137496122Salfred	funsetown(&cpipe->pipe_sigio);
137513675Sdyson	pipeclose(cpipe);
137676760Salfred	return (0);
137713675Sdyson}
137813675Sdyson
137976364Salfredstatic void
138076364Salfredpipe_free_kmem(cpipe)
138176364Salfred	struct pipe *cpipe;
138276364Salfred{
138391412Salfred
138491412Salfred	KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
138591412Salfred	       ("pipespace: pipe mutex locked"));
138676364Salfred
138776364Salfred	if (cpipe->pipe_buffer.buffer != NULL) {
138876364Salfred		if (cpipe->pipe_buffer.size > PIPE_SIZE)
1389117364Ssilby			atomic_subtract_int(&nbigpipe, 1);
1390110816Salc		atomic_subtract_int(&amountpipekva, cpipe->pipe_buffer.size);
1391117325Ssilby		atomic_subtract_int(&amountpipes, 1);
1392118764Ssilby		vm_map_remove(pipe_map,
1393118764Ssilby		    (vm_offset_t)cpipe->pipe_buffer.buffer,
1394118764Ssilby		    (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size);
139576364Salfred		cpipe->pipe_buffer.buffer = NULL;
139676364Salfred	}
139776364Salfred#ifndef PIPE_NODIRECT
1398102241Sarchie	if (cpipe->pipe_map.kva != 0) {
1399118764Ssilby		atomic_subtract_int(&amountpipekvawired,
1400110816Salc		    cpipe->pipe_buffer.size + PAGE_SIZE);
140176364Salfred		kmem_free(kernel_map,
140276364Salfred			cpipe->pipe_map.kva,
140376364Salfred			cpipe->pipe_buffer.size + PAGE_SIZE);
140476364Salfred		cpipe->pipe_map.cnt = 0;
140576364Salfred		cpipe->pipe_map.kva = 0;
140676364Salfred		cpipe->pipe_map.pos = 0;
140776364Salfred		cpipe->pipe_map.npages = 0;
140876364Salfred	}
140976364Salfred#endif
141076364Salfred}
141176364Salfred
141213675Sdyson/*
141313675Sdyson * shutdown the pipe
141413675Sdyson */
141513675Sdysonstatic void
141613675Sdysonpipeclose(cpipe)
141713675Sdyson	struct pipe *cpipe;
141813675Sdyson{
141913907Sdyson	struct pipe *ppipe;
142091968Salfred	int hadpeer;
142176364Salfred
142291968Salfred	if (cpipe == NULL)
142391968Salfred		return;
142491968Salfred
142591968Salfred	hadpeer = 0;
142691968Salfred
142791968Salfred	/* partially created pipes won't have a valid mutex. */
142891968Salfred	if (PIPE_MTX(cpipe) != NULL)
142991362Salfred		PIPE_LOCK(cpipe);
143013907Sdyson
143191968Salfred	pipeselwakeup(cpipe);
143213907Sdyson
143391968Salfred	/*
143491968Salfred	 * If the other side is blocked, wake it up saying that
143591968Salfred	 * we want to close it down.
143691968Salfred	 */
143791968Salfred	while (cpipe->pipe_busy) {
143891968Salfred		wakeup(cpipe);
143991968Salfred		cpipe->pipe_state |= PIPE_WANT | PIPE_EOF;
144091968Salfred		msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
144191968Salfred	}
144213675Sdyson
1443101768Srwatson#ifdef MAC
1444101768Srwatson	if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL)
1445101768Srwatson		mac_destroy_pipe(cpipe);
1446101768Srwatson#endif
1447101768Srwatson
144891968Salfred	/*
144991968Salfred	 * Disconnect from peer
145091968Salfred	 */
145191968Salfred	if ((ppipe = cpipe->pipe_peer) != NULL) {
145291968Salfred		hadpeer++;
145391968Salfred		pipeselwakeup(ppipe);
145413907Sdyson
145591968Salfred		ppipe->pipe_state |= PIPE_EOF;
145691968Salfred		wakeup(ppipe);
145791968Salfred		KNOTE(&ppipe->pipe_sel.si_note, 0);
145891968Salfred		ppipe->pipe_peer = NULL;
145991968Salfred	}
146091968Salfred	/*
146191968Salfred	 * free resources
146291968Salfred	 */
146391968Salfred	if (PIPE_MTX(cpipe) != NULL) {
146491968Salfred		PIPE_UNLOCK(cpipe);
146591968Salfred		if (!hadpeer) {
146691968Salfred			mtx_destroy(PIPE_MTX(cpipe));
146791968Salfred			free(PIPE_MTX(cpipe), M_TEMP);
146813675Sdyson		}
146913675Sdyson	}
147091968Salfred	pipe_free_kmem(cpipe);
147192751Sjeff	uma_zfree(pipe_zone, cpipe);
147213675Sdyson}
147359288Sjlemon
147472521Sjlemon/*ARGSUSED*/
147559288Sjlemonstatic int
147672521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn)
147759288Sjlemon{
147889306Salfred	struct pipe *cpipe;
147959288Sjlemon
1480109153Sdillon	cpipe = kn->kn_fp->f_data;
148172521Sjlemon	switch (kn->kn_filter) {
148272521Sjlemon	case EVFILT_READ:
148372521Sjlemon		kn->kn_fop = &pipe_rfiltops;
148472521Sjlemon		break;
148572521Sjlemon	case EVFILT_WRITE:
148672521Sjlemon		kn->kn_fop = &pipe_wfiltops;
148778292Sjlemon		cpipe = cpipe->pipe_peer;
1488101382Sdes		if (cpipe == NULL)
1489101382Sdes			/* other end of pipe has been closed */
1490118929Sjmg			return (EPIPE);
149172521Sjlemon		break;
149272521Sjlemon	default:
149372521Sjlemon		return (1);
149472521Sjlemon	}
1495100527Salfred	kn->kn_hook = cpipe;
149678292Sjlemon
149791372Salfred	PIPE_LOCK(cpipe);
149878292Sjlemon	SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext);
149991372Salfred	PIPE_UNLOCK(cpipe);
150059288Sjlemon	return (0);
150159288Sjlemon}
150259288Sjlemon
150359288Sjlemonstatic void
150459288Sjlemonfilt_pipedetach(struct knote *kn)
150559288Sjlemon{
150678292Sjlemon	struct pipe *cpipe = (struct pipe *)kn->kn_hook;
150759288Sjlemon
150891372Salfred	PIPE_LOCK(cpipe);
150978292Sjlemon	SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext);
151091372Salfred	PIPE_UNLOCK(cpipe);
151159288Sjlemon}
151259288Sjlemon
151359288Sjlemon/*ARGSUSED*/
151459288Sjlemonstatic int
151559288Sjlemonfilt_piperead(struct knote *kn, long hint)
151659288Sjlemon{
1517109153Sdillon	struct pipe *rpipe = kn->kn_fp->f_data;
151859288Sjlemon	struct pipe *wpipe = rpipe->pipe_peer;
151959288Sjlemon
152091372Salfred	PIPE_LOCK(rpipe);
152159288Sjlemon	kn->kn_data = rpipe->pipe_buffer.cnt;
152259288Sjlemon	if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
152359288Sjlemon		kn->kn_data = rpipe->pipe_map.cnt;
152459288Sjlemon
152559288Sjlemon	if ((rpipe->pipe_state & PIPE_EOF) ||
152659288Sjlemon	    (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
152791372Salfred		kn->kn_flags |= EV_EOF;
152891372Salfred		PIPE_UNLOCK(rpipe);
152959288Sjlemon		return (1);
153059288Sjlemon	}
153191372Salfred	PIPE_UNLOCK(rpipe);
153259288Sjlemon	return (kn->kn_data > 0);
153359288Sjlemon}
153459288Sjlemon
153559288Sjlemon/*ARGSUSED*/
153659288Sjlemonstatic int
153759288Sjlemonfilt_pipewrite(struct knote *kn, long hint)
153859288Sjlemon{
1539109153Sdillon	struct pipe *rpipe = kn->kn_fp->f_data;
154059288Sjlemon	struct pipe *wpipe = rpipe->pipe_peer;
154159288Sjlemon
154291372Salfred	PIPE_LOCK(rpipe);
154359288Sjlemon	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
154459288Sjlemon		kn->kn_data = 0;
154559288Sjlemon		kn->kn_flags |= EV_EOF;
154691372Salfred		PIPE_UNLOCK(rpipe);
154759288Sjlemon		return (1);
154859288Sjlemon	}
154959288Sjlemon	kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
155065855Sjlemon	if (wpipe->pipe_state & PIPE_DIRECTW)
155159288Sjlemon		kn->kn_data = 0;
155259288Sjlemon
155391372Salfred	PIPE_UNLOCK(rpipe);
155459288Sjlemon	return (kn->kn_data >= PIPE_BUF);
155559288Sjlemon}
1556