sys_pipe.c revision 119811
113675Sdyson/*
213675Sdyson * Copyright (c) 1996 John S. Dyson
313675Sdyson * All rights reserved.
413675Sdyson *
513675Sdyson * Redistribution and use in source and binary forms, with or without
613675Sdyson * modification, are permitted provided that the following conditions
713675Sdyson * are met:
813675Sdyson * 1. Redistributions of source code must retain the above copyright
913675Sdyson *    notice immediately at the beginning of the file, without modification,
1013675Sdyson *    this list of conditions, and the following disclaimer.
1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright
1213675Sdyson *    notice, this list of conditions and the following disclaimer in the
1313675Sdyson *    documentation and/or other materials provided with the distribution.
1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author
1513675Sdyson *    John S. Dyson.
1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions
1713675Sdyson *    are met.
1813675Sdyson */
1913675Sdyson
2013675Sdyson/*
2113675Sdyson * This file contains a high-performance replacement for the socket-based
2213675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
2313675Sdyson * all features of sockets, but does do everything that pipes normally
2413675Sdyson * do.
2513675Sdyson */
2613675Sdyson
2713907Sdyson/*
2813907Sdyson * This code has two modes of operation, a small write mode and a large
2913907Sdyson * write mode.  The small write mode acts like conventional pipes with
3013907Sdyson * a kernel buffer.  If the buffer is less than PIPE_MINDIRECT, then the
3113907Sdyson * "normal" pipe buffering is done.  If the buffer is between PIPE_MINDIRECT
3213907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and
3313907Sdyson * the receiving process can copy it directly from the pages in the sending
3413907Sdyson * process.
3513907Sdyson *
3613907Sdyson * If the sending process receives a signal, it is possible that it will
3713913Sdyson * go away, and certainly its address space can change, because control
3813907Sdyson * is returned back to the user-mode side.  In that case, the pipe code
3913907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable
4013907Sdyson * kernel buffer, and the receiving process will grab the data from the
4113907Sdyson * pageable kernel buffer.  Since signals don't happen all that often,
4213907Sdyson * the copy operation is normally eliminated.
4313907Sdyson *
4413907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
4513907Sdyson * happen for small transfers so that the system will not spend all of
46118764Ssilby * its time context switching.
47117325Ssilby *
48118764Ssilby * In order to limit the resource use of pipes, two sysctls exist:
49117325Ssilby *
50118764Ssilby * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
51118764Ssilby * address space available to us in pipe_map.  Whenever the amount in use
52118764Ssilby * exceeds half of this value, all new pipes will be created with size
53118764Ssilby * SMALL_PIPE_SIZE, rather than PIPE_SIZE.  Big pipe creation will be limited
54118764Ssilby * as well.  This value is loader tunable only.
55117325Ssilby *
56117325Ssilby * kern.ipc.maxpipekvawired - This value limits the amount of memory that may
57117325Ssilby * be wired in order to facilitate direct copies using page flipping.
58117325Ssilby * Whenever this value is exceeded, pipes will fall back to using regular
59118764Ssilby * copies.  This value is sysctl controllable at all times.
60117325Ssilby *
61117325Ssilby * These values are autotuned in subr_param.c.
62117325Ssilby *
63117325Ssilby * Memory usage may be monitored through the sysctls
64117325Ssilby * kern.ipc.pipes, kern.ipc.pipekva and kern.ipc.pipekvawired.
65117325Ssilby *
6613907Sdyson */
6713907Sdyson
68116182Sobrien#include <sys/cdefs.h>
69116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sys_pipe.c 119811 2003-09-06 21:02:10Z alc $");
70116182Sobrien
71101768Srwatson#include "opt_mac.h"
72101768Srwatson
7313675Sdyson#include <sys/param.h>
7413675Sdyson#include <sys/systm.h>
7524131Sbde#include <sys/fcntl.h>
7613675Sdyson#include <sys/file.h>
7713675Sdyson#include <sys/filedesc.h>
7824206Sbde#include <sys/filio.h>
7991372Salfred#include <sys/kernel.h>
8076166Smarkm#include <sys/lock.h>
81101768Srwatson#include <sys/mac.h>
8276827Salfred#include <sys/mutex.h>
8324206Sbde#include <sys/ttycom.h>
8413675Sdyson#include <sys/stat.h>
8591968Salfred#include <sys/malloc.h>
8629356Speter#include <sys/poll.h>
8770834Swollman#include <sys/selinfo.h>
8813675Sdyson#include <sys/signalvar.h>
89117325Ssilby#include <sys/sysctl.h>
9013675Sdyson#include <sys/sysproto.h>
9113675Sdyson#include <sys/pipe.h>
9276166Smarkm#include <sys/proc.h>
9355112Sbde#include <sys/vnode.h>
9434924Sbde#include <sys/uio.h>
9559288Sjlemon#include <sys/event.h>
9613675Sdyson
9713675Sdyson#include <vm/vm.h>
9813675Sdyson#include <vm/vm_param.h>
9913675Sdyson#include <vm/vm_object.h>
10013675Sdyson#include <vm/vm_kern.h>
10113675Sdyson#include <vm/vm_extern.h>
10213675Sdyson#include <vm/pmap.h>
10313675Sdyson#include <vm/vm_map.h>
10413907Sdyson#include <vm/vm_page.h>
10592751Sjeff#include <vm/uma.h>
10613675Sdyson
10714037Sdyson/*
10814037Sdyson * Use this define if you want to disable *fancy* VM things.  Expect an
10914037Sdyson * approx 30% decrease in transfer rate.  This could be useful for
11014037Sdyson * NetBSD or OpenBSD.
11114037Sdyson */
11214037Sdyson/* #define PIPE_NODIRECT */
11314037Sdyson
11414037Sdyson/*
11514037Sdyson * interfaces to the outside world
11614037Sdyson */
117108255Sphkstatic fo_rdwr_t	pipe_read;
118108255Sphkstatic fo_rdwr_t	pipe_write;
119108255Sphkstatic fo_ioctl_t	pipe_ioctl;
120108255Sphkstatic fo_poll_t	pipe_poll;
121108255Sphkstatic fo_kqfilter_t	pipe_kqfilter;
122108255Sphkstatic fo_stat_t	pipe_stat;
123108255Sphkstatic fo_close_t	pipe_close;
12413675Sdyson
12572521Sjlemonstatic struct fileops pipeops = {
126116546Sphk	.fo_read = pipe_read,
127116546Sphk	.fo_write = pipe_write,
128116546Sphk	.fo_ioctl = pipe_ioctl,
129116546Sphk	.fo_poll = pipe_poll,
130116546Sphk	.fo_kqfilter = pipe_kqfilter,
131116546Sphk	.fo_stat = pipe_stat,
132116546Sphk	.fo_close = pipe_close,
133116546Sphk	.fo_flags = DFLAG_PASSABLE
13472521Sjlemon};
13513675Sdyson
13659288Sjlemonstatic void	filt_pipedetach(struct knote *kn);
13759288Sjlemonstatic int	filt_piperead(struct knote *kn, long hint);
13859288Sjlemonstatic int	filt_pipewrite(struct knote *kn, long hint);
13959288Sjlemon
14072521Sjlemonstatic struct filterops pipe_rfiltops =
14172521Sjlemon	{ 1, NULL, filt_pipedetach, filt_piperead };
14272521Sjlemonstatic struct filterops pipe_wfiltops =
14372521Sjlemon	{ 1, NULL, filt_pipedetach, filt_pipewrite };
14459288Sjlemon
14592305Salfred#define PIPE_GET_GIANT(pipe)						\
14691362Salfred	do {								\
14792305Salfred		KASSERT(((pipe)->pipe_state & PIPE_LOCKFL) != 0,	\
14892305Salfred		    ("%s:%d PIPE_GET_GIANT: line pipe not locked",	\
14992305Salfred		     __FILE__, __LINE__));				\
15092305Salfred		PIPE_UNLOCK(pipe);					\
15191362Salfred		mtx_lock(&Giant);					\
15291362Salfred	} while (0)
15372521Sjlemon
15491362Salfred#define PIPE_DROP_GIANT(pipe)						\
15591362Salfred	do {								\
15691362Salfred		mtx_unlock(&Giant);					\
15792305Salfred		PIPE_LOCK(pipe);					\
15891362Salfred	} while (0)
15991362Salfred
16013675Sdyson/*
16113675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe
16213675Sdyson * space is pageable.  The pipe code will try to maintain locality of
16313675Sdyson * reference for performance reasons, so small amounts of outstanding I/O
16413675Sdyson * will not wipe the cache.
16513675Sdyson */
16613907Sdyson#define MINPIPESIZE (PIPE_SIZE/3)
16713907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3)
16813675Sdyson
16913907Sdyson/*
17017163Sdyson * Limit the number of "big" pipes
17117163Sdyson */
17217163Sdyson#define LIMITBIGPIPES	32
17333181Seivindstatic int nbigpipe;
17417163Sdyson
175117325Ssilbystatic int amountpipes;
17617124Sbdestatic int amountpipekva;
177117325Ssilbystatic int amountpipekvawired;
17813907Sdyson
179117325SsilbySYSCTL_DECL(_kern_ipc);
180117325Ssilby
181118764SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD,
182117325Ssilby	   &maxpipekva, 0, "Pipe KVA limit");
183117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW,
184117325Ssilby	   &maxpipekvawired, 0, "Pipe KVA wired limit");
185117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD,
186117364Ssilby	   &amountpipes, 0, "Current # of pipes");
187117364SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD,
188117364Ssilby	   &nbigpipe, 0, "Current # of big pipes");
189117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD,
190117325Ssilby	   &amountpipekva, 0, "Pipe KVA usage");
191117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD,
192117325Ssilby	   &amountpipekvawired, 0, "Pipe wired KVA usage");
193117325Ssilby
19491413Salfredstatic void pipeinit(void *dummy __unused);
19591413Salfredstatic void pipeclose(struct pipe *cpipe);
19691413Salfredstatic void pipe_free_kmem(struct pipe *cpipe);
19791413Salfredstatic int pipe_create(struct pipe **cpipep);
19891413Salfredstatic __inline int pipelock(struct pipe *cpipe, int catch);
19991413Salfredstatic __inline void pipeunlock(struct pipe *cpipe);
20091413Salfredstatic __inline void pipeselwakeup(struct pipe *cpipe);
20114037Sdyson#ifndef PIPE_NODIRECT
20291413Salfredstatic int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio);
20391413Salfredstatic void pipe_destroy_write_buffer(struct pipe *wpipe);
20491413Salfredstatic int pipe_direct_write(struct pipe *wpipe, struct uio *uio);
20591413Salfredstatic void pipe_clone_write_buffer(struct pipe *wpipe);
20614037Sdyson#endif
20791413Salfredstatic int pipespace(struct pipe *cpipe, int size);
20813675Sdyson
20992751Sjeffstatic uma_zone_t pipe_zone;
21027899Sdyson
21191372SalfredSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
21291372Salfred
21391372Salfredstatic void
21491372Salfredpipeinit(void *dummy __unused)
21591372Salfred{
216118880Salc
21792654Sjeff	pipe_zone = uma_zcreate("PIPE", sizeof(struct pipe), NULL,
21892654Sjeff	    NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
219118880Salc	KASSERT(pipe_zone != NULL, ("pipe_zone not initialized"));
22091372Salfred}
22191372Salfred
22213675Sdyson/*
22313675Sdyson * The pipe system call for the DTYPE_PIPE type of pipes
22413675Sdyson */
22513675Sdyson
22613675Sdyson/* ARGSUSED */
22713675Sdysonint
22883366Sjulianpipe(td, uap)
22983366Sjulian	struct thread *td;
23013675Sdyson	struct pipe_args /* {
23113675Sdyson		int	dummy;
23213675Sdyson	} */ *uap;
23313675Sdyson{
23483366Sjulian	struct filedesc *fdp = td->td_proc->p_fd;
23513675Sdyson	struct file *rf, *wf;
23613675Sdyson	struct pipe *rpipe, *wpipe;
23791968Salfred	struct mtx *pmtx;
23813675Sdyson	int fd, error;
23927899Sdyson
240111119Simp	pmtx = malloc(sizeof(*pmtx), M_TEMP, M_WAITOK | M_ZERO);
24191968Salfred
24276756Salfred	rpipe = wpipe = NULL;
24376364Salfred	if (pipe_create(&rpipe) || pipe_create(&wpipe)) {
24476364Salfred		pipeclose(rpipe);
24576364Salfred		pipeclose(wpipe);
24691968Salfred		free(pmtx, M_TEMP);
24776364Salfred		return (ENFILE);
24876364Salfred	}
24976364Salfred
25013907Sdyson	rpipe->pipe_state |= PIPE_DIRECTOK;
25113907Sdyson	wpipe->pipe_state |= PIPE_DIRECTOK;
25213675Sdyson
25383366Sjulian	error = falloc(td, &rf, &fd);
25470915Sdwmalone	if (error) {
25570915Sdwmalone		pipeclose(rpipe);
25670915Sdwmalone		pipeclose(wpipe);
25791968Salfred		free(pmtx, M_TEMP);
25870915Sdwmalone		return (error);
25970915Sdwmalone	}
26070915Sdwmalone	fhold(rf);
26183366Sjulian	td->td_retval[0] = fd;
26270915Sdwmalone
26370803Sdwmalone	/*
26470803Sdwmalone	 * Warning: once we've gotten past allocation of the fd for the
26570803Sdwmalone	 * read-side, we can only drop the read side via fdrop() in order
26670803Sdwmalone	 * to avoid races against processes which manage to dup() the read
26770803Sdwmalone	 * side while we are blocked trying to allocate the write side.
26870803Sdwmalone	 */
26989306Salfred	FILE_LOCK(rf);
27013675Sdyson	rf->f_flag = FREAD | FWRITE;
27113675Sdyson	rf->f_type = DTYPE_PIPE;
272109153Sdillon	rf->f_data = rpipe;
27313675Sdyson	rf->f_ops = &pipeops;
27489306Salfred	FILE_UNLOCK(rf);
27583366Sjulian	error = falloc(td, &wf, &fd);
27670915Sdwmalone	if (error) {
27789306Salfred		FILEDESC_LOCK(fdp);
27883366Sjulian		if (fdp->fd_ofiles[td->td_retval[0]] == rf) {
27983366Sjulian			fdp->fd_ofiles[td->td_retval[0]] = NULL;
28089306Salfred			FILEDESC_UNLOCK(fdp);
28183366Sjulian			fdrop(rf, td);
28289306Salfred		} else
28389306Salfred			FILEDESC_UNLOCK(fdp);
28483366Sjulian		fdrop(rf, td);
28570915Sdwmalone		/* rpipe has been closed by fdrop(). */
28670915Sdwmalone		pipeclose(wpipe);
28791968Salfred		free(pmtx, M_TEMP);
28870915Sdwmalone		return (error);
28970915Sdwmalone	}
29089306Salfred	FILE_LOCK(wf);
29113675Sdyson	wf->f_flag = FREAD | FWRITE;
29213675Sdyson	wf->f_type = DTYPE_PIPE;
293109153Sdillon	wf->f_data = wpipe;
29413675Sdyson	wf->f_ops = &pipeops;
29589306Salfred	FILE_UNLOCK(wf);
29683366Sjulian	td->td_retval[1] = fd;
29713675Sdyson	rpipe->pipe_peer = wpipe;
29813675Sdyson	wpipe->pipe_peer = rpipe;
299101768Srwatson#ifdef MAC
300101768Srwatson	/*
301101768Srwatson	 * struct pipe represents a pipe endpoint.  The MAC label is shared
302101768Srwatson	 * between the connected endpoints.  As a result mac_init_pipe() and
303101768Srwatson	 * mac_create_pipe() should only be called on one of the endpoints
304101768Srwatson	 * after they have been connected.
305101768Srwatson	 */
306101768Srwatson	mac_init_pipe(rpipe);
307101768Srwatson	mac_create_pipe(td->td_ucred, rpipe);
308101768Srwatson#endif
30993818Sjhb	mtx_init(pmtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE);
31091968Salfred	rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
31183366Sjulian	fdrop(rf, td);
31213675Sdyson
31313675Sdyson	return (0);
31413675Sdyson}
31513675Sdyson
31613909Sdyson/*
31713909Sdyson * Allocate kva for pipe circular buffer, the space is pageable
31876364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails
31976364Salfred * it will retain the old buffer.
32076364Salfred * If it fails it will return ENOMEM.
32113909Sdyson */
32276364Salfredstatic int
32376364Salfredpipespace(cpipe, size)
32413675Sdyson	struct pipe *cpipe;
32576364Salfred	int size;
32613675Sdyson{
32776364Salfred	struct vm_object *object;
32876364Salfred	caddr_t buffer;
32913688Sdyson	int npages, error;
330117325Ssilby	static int curfail = 0;
331117325Ssilby	static struct timeval lastfail;
33213675Sdyson
33391412Salfred	KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
33491412Salfred	       ("pipespace: pipe mutex locked"));
33579224Sdillon
336118764Ssilby	size = round_page(size);
337118764Ssilby	npages = size / PAGE_SIZE;
33813675Sdyson	/*
33913675Sdyson	 * Create an object, I don't like the idea of paging to/from
34013675Sdyson	 * kernel_object.
34114037Sdyson	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
34213675Sdyson	 */
34376364Salfred	object = vm_object_allocate(OBJT_DEFAULT, npages);
344118764Ssilby	buffer = (caddr_t) vm_map_min(pipe_map);
34513675Sdyson
34613675Sdyson	/*
34713675Sdyson	 * Insert the object into the kernel map, and allocate kva for it.
34813675Sdyson	 * The map entry is, by default, pageable.
34914037Sdyson	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
35013675Sdyson	 */
351118764Ssilby	error = vm_map_find(pipe_map, object, 0,
35276364Salfred		(vm_offset_t *) &buffer, size, 1,
35313688Sdyson		VM_PROT_ALL, VM_PROT_ALL, 0);
35413675Sdyson
35576364Salfred	if (error != KERN_SUCCESS) {
35676364Salfred		vm_object_deallocate(object);
357118764Ssilby		if (ppsratecheck(&lastfail, &curfail, 1))
358118764Ssilby			printf("kern.maxpipekva exceeded, please see tuning(7).\n");
35976364Salfred		return (ENOMEM);
36076364Salfred	}
36176364Salfred
36276364Salfred	/* free old resources if we're resizing */
36376364Salfred	pipe_free_kmem(cpipe);
36476364Salfred	cpipe->pipe_buffer.buffer = buffer;
36576364Salfred	cpipe->pipe_buffer.size = size;
36676364Salfred	cpipe->pipe_buffer.in = 0;
36776364Salfred	cpipe->pipe_buffer.out = 0;
36876364Salfred	cpipe->pipe_buffer.cnt = 0;
369117325Ssilby	atomic_add_int(&amountpipes, 1);
370110816Salc	atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size);
37176364Salfred	return (0);
37213907Sdyson}
37313688Sdyson
37413907Sdyson/*
37513907Sdyson * initialize and allocate VM and memory for pipe
37613907Sdyson */
37776364Salfredstatic int
37876364Salfredpipe_create(cpipep)
37976364Salfred	struct pipe **cpipep;
38076364Salfred{
38113907Sdyson	struct pipe *cpipe;
38276364Salfred	int error;
38313907Sdyson
384111119Simp	*cpipep = uma_zalloc(pipe_zone, M_WAITOK);
38576364Salfred	if (*cpipep == NULL)
38676364Salfred		return (ENOMEM);
38717163Sdyson
38876364Salfred	cpipe = *cpipep;
38976364Salfred
39076364Salfred	/*
39176364Salfred	 * protect so pipeclose() doesn't follow a junk pointer
39276364Salfred	 * if pipespace() fails.
39376364Salfred	 */
39476754Salfred	bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel));
39513675Sdyson	cpipe->pipe_state = 0;
39613675Sdyson	cpipe->pipe_peer = NULL;
39713675Sdyson	cpipe->pipe_busy = 0;
39813907Sdyson
39914037Sdyson#ifndef PIPE_NODIRECT
40013907Sdyson	/*
40113907Sdyson	 * pipe data structure initializations to support direct pipe I/O
40213907Sdyson	 */
40313907Sdyson	cpipe->pipe_map.cnt = 0;
40413907Sdyson	cpipe->pipe_map.kva = 0;
40513907Sdyson	cpipe->pipe_map.pos = 0;
40613907Sdyson	cpipe->pipe_map.npages = 0;
40717124Sbde	/* cpipe->pipe_map.ms[] = invalid */
40814037Sdyson#endif
40976364Salfred
41091412Salfred	cpipe->pipe_mtxp = NULL;	/* avoid pipespace assertion */
411117325Ssilby	/*
412117325Ssilby	 * Reduce to 1/4th pipe size if we're over our global max.
413117325Ssilby	 */
414118764Ssilby	if (amountpipekva > maxpipekva / 2)
415117325Ssilby		error = pipespace(cpipe, SMALL_PIPE_SIZE);
416117325Ssilby	else
417117325Ssilby		error = pipespace(cpipe, PIPE_SIZE);
41876760Salfred	if (error)
41976364Salfred		return (error);
42076364Salfred
42176364Salfred	vfs_timestamp(&cpipe->pipe_ctime);
42276364Salfred	cpipe->pipe_atime = cpipe->pipe_ctime;
42376364Salfred	cpipe->pipe_mtime = cpipe->pipe_ctime;
42476364Salfred
42576364Salfred	return (0);
42613675Sdyson}
42713675Sdyson
42813675Sdyson
42913675Sdyson/*
43013675Sdyson * lock a pipe for I/O, blocking other access
43113675Sdyson */
43213675Sdysonstatic __inline int
43313907Sdysonpipelock(cpipe, catch)
43413675Sdyson	struct pipe *cpipe;
43513907Sdyson	int catch;
43613675Sdyson{
43713776Sdyson	int error;
43876364Salfred
43991362Salfred	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
44091362Salfred	while (cpipe->pipe_state & PIPE_LOCKFL) {
44113675Sdyson		cpipe->pipe_state |= PIPE_LWANT;
44291362Salfred		error = msleep(cpipe, PIPE_MTX(cpipe),
44391362Salfred		    catch ? (PRIBIO | PCATCH) : PRIBIO,
44476760Salfred		    "pipelk", 0);
44576760Salfred		if (error != 0)
44676760Salfred			return (error);
44713675Sdyson	}
44891362Salfred	cpipe->pipe_state |= PIPE_LOCKFL;
44976760Salfred	return (0);
45013675Sdyson}
45113675Sdyson
45213675Sdyson/*
45313675Sdyson * unlock a pipe I/O lock
45413675Sdyson */
45513675Sdysonstatic __inline void
45613675Sdysonpipeunlock(cpipe)
45713675Sdyson	struct pipe *cpipe;
45813675Sdyson{
45976364Salfred
46091362Salfred	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
46191362Salfred	cpipe->pipe_state &= ~PIPE_LOCKFL;
46213675Sdyson	if (cpipe->pipe_state & PIPE_LWANT) {
46313675Sdyson		cpipe->pipe_state &= ~PIPE_LWANT;
46414177Sdyson		wakeup(cpipe);
46513675Sdyson	}
46613675Sdyson}
46713675Sdyson
46814037Sdysonstatic __inline void
46914037Sdysonpipeselwakeup(cpipe)
47014037Sdyson	struct pipe *cpipe;
47114037Sdyson{
47276364Salfred
47314037Sdyson	if (cpipe->pipe_state & PIPE_SEL) {
47414037Sdyson		cpipe->pipe_state &= ~PIPE_SEL;
47514037Sdyson		selwakeup(&cpipe->pipe_sel);
47614037Sdyson	}
47741086Struckman	if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio)
47895883Salfred		pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
47959288Sjlemon	KNOTE(&cpipe->pipe_sel.si_note, 0);
48014037Sdyson}
48114037Sdyson
48213675Sdyson/* ARGSUSED */
48313675Sdysonstatic int
484101941Srwatsonpipe_read(fp, uio, active_cred, flags, td)
48513675Sdyson	struct file *fp;
48613675Sdyson	struct uio *uio;
487101941Srwatson	struct ucred *active_cred;
48883366Sjulian	struct thread *td;
48945311Sdt	int flags;
49013675Sdyson{
491109153Sdillon	struct pipe *rpipe = fp->f_data;
49247748Salc	int error;
49313675Sdyson	int nread = 0;
49418863Sdyson	u_int size;
49513675Sdyson
49691362Salfred	PIPE_LOCK(rpipe);
49713675Sdyson	++rpipe->pipe_busy;
49847748Salc	error = pipelock(rpipe, 1);
49947748Salc	if (error)
50047748Salc		goto unlocked_error;
50147748Salc
502101768Srwatson#ifdef MAC
503102115Srwatson	error = mac_check_pipe_read(active_cred, rpipe);
504101768Srwatson	if (error)
505101768Srwatson		goto locked_error;
506101768Srwatson#endif
507101768Srwatson
50813675Sdyson	while (uio->uio_resid) {
50913907Sdyson		/*
51013907Sdyson		 * normal pipe buffer receive
51113907Sdyson		 */
51213675Sdyson		if (rpipe->pipe_buffer.cnt > 0) {
51318863Sdyson			size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
51413675Sdyson			if (size > rpipe->pipe_buffer.cnt)
51513675Sdyson				size = rpipe->pipe_buffer.cnt;
51618863Sdyson			if (size > (u_int) uio->uio_resid)
51718863Sdyson				size = (u_int) uio->uio_resid;
51847748Salc
51991362Salfred			PIPE_UNLOCK(rpipe);
520116127Smux			error = uiomove(
521116127Smux			    &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
522116127Smux			    size, uio);
52391362Salfred			PIPE_LOCK(rpipe);
52476760Salfred			if (error)
52513675Sdyson				break;
52676760Salfred
52713675Sdyson			rpipe->pipe_buffer.out += size;
52813675Sdyson			if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
52913675Sdyson				rpipe->pipe_buffer.out = 0;
53013675Sdyson
53113675Sdyson			rpipe->pipe_buffer.cnt -= size;
53247748Salc
53347748Salc			/*
53447748Salc			 * If there is no more to read in the pipe, reset
53547748Salc			 * its pointers to the beginning.  This improves
53647748Salc			 * cache hit stats.
53747748Salc			 */
53847748Salc			if (rpipe->pipe_buffer.cnt == 0) {
53947748Salc				rpipe->pipe_buffer.in = 0;
54047748Salc				rpipe->pipe_buffer.out = 0;
54147748Salc			}
54213675Sdyson			nread += size;
54314037Sdyson#ifndef PIPE_NODIRECT
54413907Sdyson		/*
54513907Sdyson		 * Direct copy, bypassing a kernel buffer.
54613907Sdyson		 */
54713907Sdyson		} else if ((size = rpipe->pipe_map.cnt) &&
54847748Salc			   (rpipe->pipe_state & PIPE_DIRECTW)) {
54947748Salc			caddr_t	va;
55018863Sdyson			if (size > (u_int) uio->uio_resid)
55118863Sdyson				size = (u_int) uio->uio_resid;
55247748Salc
55376760Salfred			va = (caddr_t) rpipe->pipe_map.kva +
55476760Salfred			    rpipe->pipe_map.pos;
55591362Salfred			PIPE_UNLOCK(rpipe);
55647748Salc			error = uiomove(va, size, uio);
55791362Salfred			PIPE_LOCK(rpipe);
55813907Sdyson			if (error)
55913907Sdyson				break;
56013907Sdyson			nread += size;
56113907Sdyson			rpipe->pipe_map.pos += size;
56213907Sdyson			rpipe->pipe_map.cnt -= size;
56313907Sdyson			if (rpipe->pipe_map.cnt == 0) {
56413907Sdyson				rpipe->pipe_state &= ~PIPE_DIRECTW;
56513907Sdyson				wakeup(rpipe);
56613907Sdyson			}
56714037Sdyson#endif
56813675Sdyson		} else {
56913675Sdyson			/*
57013675Sdyson			 * detect EOF condition
57176760Salfred			 * read returns 0 on EOF, no need to set error
57213675Sdyson			 */
57376760Salfred			if (rpipe->pipe_state & PIPE_EOF)
57413675Sdyson				break;
57543623Sdillon
57613675Sdyson			/*
57713675Sdyson			 * If the "write-side" has been blocked, wake it up now.
57813675Sdyson			 */
57913675Sdyson			if (rpipe->pipe_state & PIPE_WANTW) {
58013675Sdyson				rpipe->pipe_state &= ~PIPE_WANTW;
58113675Sdyson				wakeup(rpipe);
58213675Sdyson			}
58343623Sdillon
58443623Sdillon			/*
58547748Salc			 * Break if some data was read.
58643623Sdillon			 */
58747748Salc			if (nread > 0)
58813675Sdyson				break;
58916960Sdyson
59043623Sdillon			/*
591116127Smux			 * Unlock the pipe buffer for our remaining processing.
592116127Smux			 * We will either break out with an error or we will
593116127Smux			 * sleep and relock to loop.
59443623Sdillon			 */
59547748Salc			pipeunlock(rpipe);
59643623Sdillon
59713675Sdyson			/*
59847748Salc			 * Handle non-blocking mode operation or
59947748Salc			 * wait for more data.
60013675Sdyson			 */
60176760Salfred			if (fp->f_flag & FNONBLOCK) {
60247748Salc				error = EAGAIN;
60376760Salfred			} else {
60447748Salc				rpipe->pipe_state |= PIPE_WANTR;
60591362Salfred				if ((error = msleep(rpipe, PIPE_MTX(rpipe),
60691362Salfred				    PRIBIO | PCATCH,
60777140Salfred				    "piperd", 0)) == 0)
60847748Salc					error = pipelock(rpipe, 1);
60913675Sdyson			}
61047748Salc			if (error)
61147748Salc				goto unlocked_error;
61213675Sdyson		}
61313675Sdyson	}
614101768Srwatson#ifdef MAC
615101768Srwatsonlocked_error:
616101768Srwatson#endif
61747748Salc	pipeunlock(rpipe);
61813675Sdyson
61991362Salfred	/* XXX: should probably do this before getting any locks. */
62024101Sbde	if (error == 0)
62155112Sbde		vfs_timestamp(&rpipe->pipe_atime);
62247748Salcunlocked_error:
62347748Salc	--rpipe->pipe_busy;
62413913Sdyson
62547748Salc	/*
62647748Salc	 * PIPE_WANT processing only makes sense if pipe_busy is 0.
62747748Salc	 */
62813675Sdyson	if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
62913675Sdyson		rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
63013675Sdyson		wakeup(rpipe);
63113675Sdyson	} else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
63213675Sdyson		/*
63347748Salc		 * Handle write blocking hysteresis.
63413675Sdyson		 */
63513675Sdyson		if (rpipe->pipe_state & PIPE_WANTW) {
63613675Sdyson			rpipe->pipe_state &= ~PIPE_WANTW;
63713675Sdyson			wakeup(rpipe);
63813675Sdyson		}
63913675Sdyson	}
64014037Sdyson
64114802Sdyson	if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF)
64214037Sdyson		pipeselwakeup(rpipe);
64314037Sdyson
64491362Salfred	PIPE_UNLOCK(rpipe);
64576760Salfred	return (error);
64613675Sdyson}
64713675Sdyson
64814037Sdyson#ifndef PIPE_NODIRECT
64913907Sdyson/*
65013907Sdyson * Map the sending processes' buffer into kernel space and wire it.
65113907Sdyson * This is similar to a physical write operation.
65213907Sdyson */
65313675Sdysonstatic int
65413907Sdysonpipe_build_write_buffer(wpipe, uio)
65513907Sdyson	struct pipe *wpipe;
65613675Sdyson	struct uio *uio;
65713675Sdyson{
65818863Sdyson	u_int size;
65994566Stmm	int i;
660112569Sjake	vm_offset_t addr, endaddr;
661112569Sjake	vm_paddr_t paddr;
66213907Sdyson
66379224Sdillon	GIANT_REQUIRED;
66491412Salfred	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
66579224Sdillon
66618863Sdyson	size = (u_int) uio->uio_iov->iov_len;
66713907Sdyson	if (size > wpipe->pipe_buffer.size)
66813907Sdyson		size = wpipe->pipe_buffer.size;
66913907Sdyson
67040286Sdg	endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
67176760Salfred	addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
67276760Salfred	for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
67394566Stmm		vm_page_t m;
67494566Stmm
67599899Salc		/*
67699899Salc		 * vm_fault_quick() can sleep.  Consequently,
67799899Salc		 * vm_page_lock_queue() and vm_page_unlock_queue()
67899899Salc		 * should not be performed outside of this loop.
67999899Salc		 */
68051474Sdillon		if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 ||
68194608Stmm		    (paddr = pmap_extract(vmspace_pmap(curproc->p_vmspace),
68294608Stmm		     addr)) == 0) {
68313907Sdyson			int j;
68476760Salfred
68599899Salc			vm_page_lock_queues();
686117325Ssilby			for (j = 0; j < i; j++) {
687118757Salc				vm_page_unhold(wpipe->pipe_map.ms[j]);
688117325Ssilby			}
68999899Salc			vm_page_unlock_queues();
69076760Salfred			return (EFAULT);
69113907Sdyson		}
69213907Sdyson
69394566Stmm		m = PHYS_TO_VM_PAGE(paddr);
69499899Salc		vm_page_lock_queues();
695118757Salc		vm_page_hold(m);
69699899Salc		vm_page_unlock_queues();
69713907Sdyson		wpipe->pipe_map.ms[i] = m;
69813907Sdyson	}
69913907Sdyson
70013907Sdyson/*
70113907Sdyson * set up the control block
70213907Sdyson */
70313907Sdyson	wpipe->pipe_map.npages = i;
70476760Salfred	wpipe->pipe_map.pos =
70576760Salfred	    ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
70613907Sdyson	wpipe->pipe_map.cnt = size;
70713907Sdyson
70813907Sdyson/*
70913907Sdyson * and map the buffer
71013907Sdyson */
71113907Sdyson	if (wpipe->pipe_map.kva == 0) {
71213912Sdyson		/*
71313912Sdyson		 * We need to allocate space for an extra page because the
71413912Sdyson		 * address range might (will) span pages at times.
71513912Sdyson		 */
716118220Salc		wpipe->pipe_map.kva = kmem_alloc_nofault(kernel_map,
71713912Sdyson			wpipe->pipe_buffer.size + PAGE_SIZE);
718118764Ssilby		atomic_add_int(&amountpipekvawired,
719110816Salc		    wpipe->pipe_buffer.size + PAGE_SIZE);
72013907Sdyson	}
72113907Sdyson	pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
72213907Sdyson		wpipe->pipe_map.npages);
72313907Sdyson
72413907Sdyson/*
72513907Sdyson * and update the uio data
72613907Sdyson */
72713907Sdyson
72813907Sdyson	uio->uio_iov->iov_len -= size;
729104908Smike	uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size;
73013907Sdyson	if (uio->uio_iov->iov_len == 0)
73113907Sdyson		uio->uio_iov++;
73213907Sdyson	uio->uio_resid -= size;
73313907Sdyson	uio->uio_offset += size;
73476760Salfred	return (0);
73513907Sdyson}
73613907Sdyson
73713907Sdyson/*
73813907Sdyson * unmap and unwire the process buffer
73913907Sdyson */
74013907Sdysonstatic void
74113907Sdysonpipe_destroy_write_buffer(wpipe)
74276760Salfred	struct pipe *wpipe;
74313907Sdyson{
74413907Sdyson	int i;
74576364Salfred
74691412Salfred	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
74717163Sdyson	if (wpipe->pipe_map.kva) {
74817163Sdyson		pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages);
74913907Sdyson
750118764Ssilby		if (amountpipekvawired > maxpipekvawired / 2) {
751118764Ssilby			/* Conserve address space */
75213907Sdyson			vm_offset_t kva = wpipe->pipe_map.kva;
75313907Sdyson			wpipe->pipe_map.kva = 0;
75413907Sdyson			kmem_free(kernel_map, kva,
755119811Salc			    wpipe->pipe_buffer.size + PAGE_SIZE);
756118764Ssilby			atomic_subtract_int(&amountpipekvawired,
757110816Salc			    wpipe->pipe_buffer.size + PAGE_SIZE);
75813907Sdyson		}
75913907Sdyson	}
76099899Salc	vm_page_lock_queues();
761117325Ssilby	for (i = 0; i < wpipe->pipe_map.npages; i++) {
762118757Salc		vm_page_unhold(wpipe->pipe_map.ms[i]);
763117325Ssilby	}
76499899Salc	vm_page_unlock_queues();
76591653Stanimura	wpipe->pipe_map.npages = 0;
76613907Sdyson}
76713907Sdyson
76813907Sdyson/*
76913907Sdyson * In the case of a signal, the writing process might go away.  This
77013907Sdyson * code copies the data into the circular buffer so that the source
77113907Sdyson * pages can be freed without loss of data.
77213907Sdyson */
77313907Sdysonstatic void
77413907Sdysonpipe_clone_write_buffer(wpipe)
77576364Salfred	struct pipe *wpipe;
77613907Sdyson{
77713907Sdyson	int size;
77813907Sdyson	int pos;
77913907Sdyson
78091362Salfred	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
78113907Sdyson	size = wpipe->pipe_map.cnt;
78213907Sdyson	pos = wpipe->pipe_map.pos;
78313907Sdyson
78413907Sdyson	wpipe->pipe_buffer.in = size;
78513907Sdyson	wpipe->pipe_buffer.out = 0;
78613907Sdyson	wpipe->pipe_buffer.cnt = size;
78713907Sdyson	wpipe->pipe_state &= ~PIPE_DIRECTW;
78813907Sdyson
789119811Salc	PIPE_UNLOCK(wpipe);
79092959Salfred	bcopy((caddr_t) wpipe->pipe_map.kva + pos,
791100527Salfred	    wpipe->pipe_buffer.buffer, size);
79213907Sdyson	pipe_destroy_write_buffer(wpipe);
793119811Salc	PIPE_LOCK(wpipe);
79413907Sdyson}
79513907Sdyson
79613907Sdyson/*
79713907Sdyson * This implements the pipe buffer write mechanism.  Note that only
79813907Sdyson * a direct write OR a normal pipe write can be pending at any given time.
79913907Sdyson * If there are any characters in the pipe buffer, the direct write will
80013907Sdyson * be deferred until the receiving process grabs all of the bytes from
80113907Sdyson * the pipe buffer.  Then the direct mapping write is set-up.
80213907Sdyson */
80313907Sdysonstatic int
80413907Sdysonpipe_direct_write(wpipe, uio)
80513907Sdyson	struct pipe *wpipe;
80613907Sdyson	struct uio *uio;
80713907Sdyson{
80813907Sdyson	int error;
80976364Salfred
81013951Sdysonretry:
81191362Salfred	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
81213907Sdyson	while (wpipe->pipe_state & PIPE_DIRECTW) {
81376760Salfred		if (wpipe->pipe_state & PIPE_WANTR) {
81413951Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
81513951Sdyson			wakeup(wpipe);
81613951Sdyson		}
81713992Sdyson		wpipe->pipe_state |= PIPE_WANTW;
81891362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe),
81991362Salfred		    PRIBIO | PCATCH, "pipdww", 0);
82014802Sdyson		if (error)
82113907Sdyson			goto error1;
82214802Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
82314802Sdyson			error = EPIPE;
82414802Sdyson			goto error1;
82514802Sdyson		}
82613907Sdyson	}
82713907Sdyson	wpipe->pipe_map.cnt = 0;	/* transfer not ready yet */
82813951Sdyson	if (wpipe->pipe_buffer.cnt > 0) {
82976760Salfred		if (wpipe->pipe_state & PIPE_WANTR) {
83013951Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
83113951Sdyson			wakeup(wpipe);
83213951Sdyson		}
83313951Sdyson
83413992Sdyson		wpipe->pipe_state |= PIPE_WANTW;
83591362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe),
83691362Salfred		    PRIBIO | PCATCH, "pipdwc", 0);
83714802Sdyson		if (error)
83813907Sdyson			goto error1;
83914802Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
84014802Sdyson			error = EPIPE;
84114802Sdyson			goto error1;
84213907Sdyson		}
84313951Sdyson		goto retry;
84413907Sdyson	}
84513907Sdyson
84613951Sdyson	wpipe->pipe_state |= PIPE_DIRECTW;
84713951Sdyson
84892305Salfred	pipelock(wpipe, 0);
84991362Salfred	PIPE_GET_GIANT(wpipe);
85013907Sdyson	error = pipe_build_write_buffer(wpipe, uio);
85191362Salfred	PIPE_DROP_GIANT(wpipe);
85292305Salfred	pipeunlock(wpipe);
85313907Sdyson	if (error) {
85413907Sdyson		wpipe->pipe_state &= ~PIPE_DIRECTW;
85513907Sdyson		goto error1;
85613907Sdyson	}
85713907Sdyson
85813907Sdyson	error = 0;
85913907Sdyson	while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
86013907Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
86113907Sdyson			pipelock(wpipe, 0);
862119811Salc			PIPE_UNLOCK(wpipe);
86313907Sdyson			pipe_destroy_write_buffer(wpipe);
864119811Salc			PIPE_LOCK(wpipe);
865112981Shsu			pipeselwakeup(wpipe);
86613907Sdyson			pipeunlock(wpipe);
86714802Sdyson			error = EPIPE;
86814802Sdyson			goto error1;
86913907Sdyson		}
87013992Sdyson		if (wpipe->pipe_state & PIPE_WANTR) {
87113992Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
87213992Sdyson			wakeup(wpipe);
87313992Sdyson		}
87414037Sdyson		pipeselwakeup(wpipe);
87591362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
87691362Salfred		    "pipdwt", 0);
87713907Sdyson	}
87813907Sdyson
87913907Sdyson	pipelock(wpipe,0);
88013907Sdyson	if (wpipe->pipe_state & PIPE_DIRECTW) {
88113907Sdyson		/*
88213907Sdyson		 * this bit of trickery substitutes a kernel buffer for
88313907Sdyson		 * the process that might be going away.
88413907Sdyson		 */
88513907Sdyson		pipe_clone_write_buffer(wpipe);
88613907Sdyson	} else {
887119811Salc		PIPE_UNLOCK(wpipe);
88813907Sdyson		pipe_destroy_write_buffer(wpipe);
889119811Salc		PIPE_LOCK(wpipe);
89013907Sdyson	}
89113907Sdyson	pipeunlock(wpipe);
89276760Salfred	return (error);
89313907Sdyson
89413907Sdysonerror1:
89513907Sdyson	wakeup(wpipe);
89676760Salfred	return (error);
89713907Sdyson}
89814037Sdyson#endif
89913907Sdyson
90016960Sdysonstatic int
901101941Srwatsonpipe_write(fp, uio, active_cred, flags, td)
90216960Sdyson	struct file *fp;
90313907Sdyson	struct uio *uio;
904101941Srwatson	struct ucred *active_cred;
90583366Sjulian	struct thread *td;
90645311Sdt	int flags;
90713907Sdyson{
90813675Sdyson	int error = 0;
90913913Sdyson	int orig_resid;
91016960Sdyson	struct pipe *wpipe, *rpipe;
91116960Sdyson
912109153Sdillon	rpipe = fp->f_data;
91316960Sdyson	wpipe = rpipe->pipe_peer;
91416960Sdyson
91591395Salfred	PIPE_LOCK(rpipe);
91613675Sdyson	/*
91713675Sdyson	 * detect loss of pipe read side, issue SIGPIPE if lost.
91813675Sdyson	 */
91916960Sdyson	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
92091395Salfred		PIPE_UNLOCK(rpipe);
92176760Salfred		return (EPIPE);
92213675Sdyson	}
923101768Srwatson#ifdef MAC
924102115Srwatson	error = mac_check_pipe_write(active_cred, wpipe);
925101768Srwatson	if (error) {
926101768Srwatson		PIPE_UNLOCK(rpipe);
927101768Srwatson		return (error);
928101768Srwatson	}
929101768Srwatson#endif
93077676Sdillon	++wpipe->pipe_busy;
93113675Sdyson
93217163Sdyson	/*
93317163Sdyson	 * If it is advantageous to resize the pipe buffer, do
93417163Sdyson	 * so.
93517163Sdyson	 */
93617163Sdyson	if ((uio->uio_resid > PIPE_SIZE) &&
937118764Ssilby		(amountpipekva < maxpipekva / 2) &&
93817163Sdyson		(nbigpipe < LIMITBIGPIPES) &&
93917163Sdyson		(wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
94017163Sdyson		(wpipe->pipe_buffer.size <= PIPE_SIZE) &&
94117163Sdyson		(wpipe->pipe_buffer.cnt == 0)) {
94217163Sdyson
943105009Salfred		if ((error = pipelock(wpipe, 1)) == 0) {
944118799Salc			PIPE_UNLOCK(wpipe);
94576364Salfred			if (pipespace(wpipe, BIG_PIPE_SIZE) == 0)
946117364Ssilby				atomic_add_int(&nbigpipe, 1);
947118799Salc			PIPE_LOCK(wpipe);
94813907Sdyson			pipeunlock(wpipe);
94913907Sdyson		}
95013907Sdyson	}
95177676Sdillon
95277676Sdillon	/*
95377676Sdillon	 * If an early error occured unbusy and return, waking up any pending
95477676Sdillon	 * readers.
95577676Sdillon	 */
95677676Sdillon	if (error) {
95777676Sdillon		--wpipe->pipe_busy;
95877676Sdillon		if ((wpipe->pipe_busy == 0) &&
95977676Sdillon		    (wpipe->pipe_state & PIPE_WANT)) {
96077676Sdillon			wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
96177676Sdillon			wakeup(wpipe);
96277676Sdillon		}
96391395Salfred		PIPE_UNLOCK(rpipe);
96477676Sdillon		return(error);
96577676Sdillon	}
96676364Salfred
96713913Sdyson	orig_resid = uio->uio_resid;
96877676Sdillon
96913675Sdyson	while (uio->uio_resid) {
97013907Sdyson		int space;
97176760Salfred
97214037Sdyson#ifndef PIPE_NODIRECT
97313907Sdyson		/*
97413907Sdyson		 * If the transfer is large, we can gain performance if
97513907Sdyson		 * we do process-to-process copies directly.
97616416Sdyson		 * If the write is non-blocking, we don't use the
97716416Sdyson		 * direct write mechanism.
97858505Sdillon		 *
97958505Sdillon		 * The direct write mechanism will detect the reader going
98058505Sdillon		 * away on us.
98113907Sdyson		 */
98217163Sdyson		if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
98317163Sdyson		    (fp->f_flag & FNONBLOCK) == 0 &&
984118764Ssilby		    amountpipekvawired + uio->uio_resid < maxpipekvawired) {
985105009Salfred			error = pipe_direct_write(wpipe, uio);
98676760Salfred			if (error)
98713907Sdyson				break;
98813907Sdyson			continue;
98991362Salfred		}
99014037Sdyson#endif
99113907Sdyson
99213907Sdyson		/*
99313907Sdyson		 * Pipe buffered writes cannot be coincidental with
99413907Sdyson		 * direct writes.  We wait until the currently executing
99513907Sdyson		 * direct write is completed before we start filling the
99658505Sdillon		 * pipe buffer.  We break out if a signal occurs or the
99758505Sdillon		 * reader goes away.
99813907Sdyson		 */
99913907Sdyson	retrywrite:
100013907Sdyson		while (wpipe->pipe_state & PIPE_DIRECTW) {
100113992Sdyson			if (wpipe->pipe_state & PIPE_WANTR) {
100213992Sdyson				wpipe->pipe_state &= ~PIPE_WANTR;
100313992Sdyson				wakeup(wpipe);
100413992Sdyson			}
100591395Salfred			error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH,
100691362Salfred			    "pipbww", 0);
100758505Sdillon			if (wpipe->pipe_state & PIPE_EOF)
100858505Sdillon				break;
100913907Sdyson			if (error)
101013907Sdyson				break;
101113907Sdyson		}
101258505Sdillon		if (wpipe->pipe_state & PIPE_EOF) {
101358505Sdillon			error = EPIPE;
101458505Sdillon			break;
101558505Sdillon		}
101613907Sdyson
101713907Sdyson		space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
101814644Sdyson
101914644Sdyson		/* Writes of size <= PIPE_BUF must be atomic. */
102013913Sdyson		if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF))
102113913Sdyson			space = 0;
102213907Sdyson
1023118230Spb		if (space > 0) {
102413907Sdyson			if ((error = pipelock(wpipe,1)) == 0) {
102554534Stegge				int size;	/* Transfer size */
102654534Stegge				int segsize;	/* first segment to transfer */
102776760Salfred
102813907Sdyson				/*
102913907Sdyson				 * It is possible for a direct write to
103013907Sdyson				 * slip in on us... handle it here...
103113907Sdyson				 */
103213907Sdyson				if (wpipe->pipe_state & PIPE_DIRECTW) {
103313907Sdyson					pipeunlock(wpipe);
103413907Sdyson					goto retrywrite;
103513907Sdyson				}
103654534Stegge				/*
103754534Stegge				 * If a process blocked in uiomove, our
103854534Stegge				 * value for space might be bad.
103958505Sdillon				 *
104058505Sdillon				 * XXX will we be ok if the reader has gone
104158505Sdillon				 * away here?
104254534Stegge				 */
104354534Stegge				if (space > wpipe->pipe_buffer.size -
104454534Stegge				    wpipe->pipe_buffer.cnt) {
104554534Stegge					pipeunlock(wpipe);
104654534Stegge					goto retrywrite;
104754534Stegge				}
104854534Stegge
104954534Stegge				/*
105054534Stegge				 * Transfer size is minimum of uio transfer
105154534Stegge				 * and free space in pipe buffer.
105254534Stegge				 */
105354534Stegge				if (space > uio->uio_resid)
105454534Stegge					size = uio->uio_resid;
105554534Stegge				else
105654534Stegge					size = space;
105754534Stegge				/*
105854534Stegge				 * First segment to transfer is minimum of
105954534Stegge				 * transfer size and contiguous space in
106054534Stegge				 * pipe buffer.  If first segment to transfer
106154534Stegge				 * is less than the transfer size, we've got
106254534Stegge				 * a wraparound in the buffer.
106354534Stegge				 */
106454534Stegge				segsize = wpipe->pipe_buffer.size -
106554534Stegge					wpipe->pipe_buffer.in;
106654534Stegge				if (segsize > size)
106754534Stegge					segsize = size;
106854534Stegge
106954534Stegge				/* Transfer first segment */
107054534Stegge
107191395Salfred				PIPE_UNLOCK(rpipe);
107254534Stegge				error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
107354534Stegge						segsize, uio);
107491395Salfred				PIPE_LOCK(rpipe);
107554534Stegge
107654534Stegge				if (error == 0 && segsize < size) {
107754534Stegge					/*
107854534Stegge					 * Transfer remaining part now, to
107954534Stegge					 * support atomic writes.  Wraparound
108054534Stegge					 * happened.
108154534Stegge					 */
108254534Stegge					if (wpipe->pipe_buffer.in + segsize !=
108354534Stegge					    wpipe->pipe_buffer.size)
1084116127Smux						panic("Expected pipe buffer "
1085116127Smux						    "wraparound disappeared");
108654534Stegge
108791395Salfred					PIPE_UNLOCK(rpipe);
1088116127Smux					error = uiomove(
1089116127Smux					    &wpipe->pipe_buffer.buffer[0],
1090116127Smux				    	    size - segsize, uio);
109191395Salfred					PIPE_LOCK(rpipe);
109254534Stegge				}
109354534Stegge				if (error == 0) {
109454534Stegge					wpipe->pipe_buffer.in += size;
109554534Stegge					if (wpipe->pipe_buffer.in >=
109654534Stegge					    wpipe->pipe_buffer.size) {
1097116127Smux						if (wpipe->pipe_buffer.in !=
1098116127Smux						    size - segsize +
1099116127Smux						    wpipe->pipe_buffer.size)
1100116127Smux							panic("Expected "
1101116127Smux							    "wraparound bad");
1102116127Smux						wpipe->pipe_buffer.in = size -
1103116127Smux						    segsize;
110454534Stegge					}
110554534Stegge
110654534Stegge					wpipe->pipe_buffer.cnt += size;
1107116127Smux					if (wpipe->pipe_buffer.cnt >
1108116127Smux					    wpipe->pipe_buffer.size)
110954534Stegge						panic("Pipe buffer overflow");
111054534Stegge
111154534Stegge				}
111213675Sdyson				pipeunlock(wpipe);
111313675Sdyson			}
111413675Sdyson			if (error)
111513675Sdyson				break;
111613675Sdyson
111713675Sdyson		} else {
111813675Sdyson			/*
111913675Sdyson			 * If the "read-side" has been blocked, wake it up now.
112013675Sdyson			 */
112113675Sdyson			if (wpipe->pipe_state & PIPE_WANTR) {
112213675Sdyson				wpipe->pipe_state &= ~PIPE_WANTR;
112313675Sdyson				wakeup(wpipe);
112413675Sdyson			}
112514037Sdyson
112613675Sdyson			/*
112713675Sdyson			 * don't block on non-blocking I/O
112813675Sdyson			 */
112916960Sdyson			if (fp->f_flag & FNONBLOCK) {
113013907Sdyson				error = EAGAIN;
113113675Sdyson				break;
113213675Sdyson			}
113313907Sdyson
113414037Sdyson			/*
113514037Sdyson			 * We have no more space and have something to offer,
113629356Speter			 * wake up select/poll.
113714037Sdyson			 */
113814037Sdyson			pipeselwakeup(wpipe);
113914037Sdyson
114013675Sdyson			wpipe->pipe_state |= PIPE_WANTW;
114191395Salfred			error = msleep(wpipe, PIPE_MTX(rpipe),
114291362Salfred			    PRIBIO | PCATCH, "pipewr", 0);
114376760Salfred			if (error != 0)
114413675Sdyson				break;
114513675Sdyson			/*
114613675Sdyson			 * If read side wants to go away, we just issue a signal
114713675Sdyson			 * to ourselves.
114813675Sdyson			 */
114913675Sdyson			if (wpipe->pipe_state & PIPE_EOF) {
115013774Sdyson				error = EPIPE;
115113907Sdyson				break;
115213675Sdyson			}
115313675Sdyson		}
115413675Sdyson	}
115513675Sdyson
115614644Sdyson	--wpipe->pipe_busy;
115777676Sdillon
115876760Salfred	if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
115976760Salfred		wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
116013675Sdyson		wakeup(wpipe);
116113675Sdyson	} else if (wpipe->pipe_buffer.cnt > 0) {
116213675Sdyson		/*
116313675Sdyson		 * If we have put any characters in the buffer, we wake up
116413675Sdyson		 * the reader.
116513675Sdyson		 */
116613675Sdyson		if (wpipe->pipe_state & PIPE_WANTR) {
116713675Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
116813675Sdyson			wakeup(wpipe);
116913675Sdyson		}
117013675Sdyson	}
117113909Sdyson
117213909Sdyson	/*
117313909Sdyson	 * Don't return EPIPE if I/O was successful
117413909Sdyson	 */
117513907Sdyson	if ((wpipe->pipe_buffer.cnt == 0) &&
117677676Sdillon	    (uio->uio_resid == 0) &&
117777676Sdillon	    (error == EPIPE)) {
117813907Sdyson		error = 0;
117977676Sdillon	}
118013913Sdyson
118124101Sbde	if (error == 0)
118255112Sbde		vfs_timestamp(&wpipe->pipe_mtime);
118324101Sbde
118414037Sdyson	/*
118514037Sdyson	 * We have something to offer,
118629356Speter	 * wake up select/poll.
118714037Sdyson	 */
118814177Sdyson	if (wpipe->pipe_buffer.cnt)
118914037Sdyson		pipeselwakeup(wpipe);
119013907Sdyson
119191395Salfred	PIPE_UNLOCK(rpipe);
119276760Salfred	return (error);
119313675Sdyson}
119413675Sdyson
119513675Sdyson/*
119613675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets.
119713675Sdyson */
1198104094Sphkstatic int
1199102003Srwatsonpipe_ioctl(fp, cmd, data, active_cred, td)
120013675Sdyson	struct file *fp;
120136735Sdfr	u_long cmd;
120299009Salfred	void *data;
1203102003Srwatson	struct ucred *active_cred;
120483366Sjulian	struct thread *td;
120513675Sdyson{
1206109153Sdillon	struct pipe *mpipe = fp->f_data;
1207101768Srwatson#ifdef MAC
1208101768Srwatson	int error;
1209104269Srwatson#endif
121013675Sdyson
1211104269Srwatson	PIPE_LOCK(mpipe);
1212104269Srwatson
1213104269Srwatson#ifdef MAC
1214102003Srwatson	error = mac_check_pipe_ioctl(active_cred, mpipe, cmd, data);
1215101768Srwatson	if (error)
1216101768Srwatson		return (error);
1217101768Srwatson#endif
1218101768Srwatson
121913675Sdyson	switch (cmd) {
122013675Sdyson
122113675Sdyson	case FIONBIO:
1222104269Srwatson		PIPE_UNLOCK(mpipe);
122313675Sdyson		return (0);
122413675Sdyson
122513675Sdyson	case FIOASYNC:
122613675Sdyson		if (*(int *)data) {
122713675Sdyson			mpipe->pipe_state |= PIPE_ASYNC;
122813675Sdyson		} else {
122913675Sdyson			mpipe->pipe_state &= ~PIPE_ASYNC;
123013675Sdyson		}
123191362Salfred		PIPE_UNLOCK(mpipe);
123213675Sdyson		return (0);
123313675Sdyson
123413675Sdyson	case FIONREAD:
123514037Sdyson		if (mpipe->pipe_state & PIPE_DIRECTW)
123614037Sdyson			*(int *)data = mpipe->pipe_map.cnt;
123714037Sdyson		else
123814037Sdyson			*(int *)data = mpipe->pipe_buffer.cnt;
123991362Salfred		PIPE_UNLOCK(mpipe);
124013675Sdyson		return (0);
124113675Sdyson
124241086Struckman	case FIOSETOWN:
1243104269Srwatson		PIPE_UNLOCK(mpipe);
124441086Struckman		return (fsetown(*(int *)data, &mpipe->pipe_sigio));
124541086Struckman
124641086Struckman	case FIOGETOWN:
1247104269Srwatson		PIPE_UNLOCK(mpipe);
1248104393Struckman		*(int *)data = fgetown(&mpipe->pipe_sigio);
124913675Sdyson		return (0);
125013675Sdyson
125141086Struckman	/* This is deprecated, FIOSETOWN should be used instead. */
125241086Struckman	case TIOCSPGRP:
1253104269Srwatson		PIPE_UNLOCK(mpipe);
125441086Struckman		return (fsetown(-(*(int *)data), &mpipe->pipe_sigio));
125541086Struckman
125641086Struckman	/* This is deprecated, FIOGETOWN should be used instead. */
125718863Sdyson	case TIOCGPGRP:
1258104269Srwatson		PIPE_UNLOCK(mpipe);
1259104393Struckman		*(int *)data = -fgetown(&mpipe->pipe_sigio);
126013675Sdyson		return (0);
126113675Sdyson
126213675Sdyson	}
1263104269Srwatson	PIPE_UNLOCK(mpipe);
126417124Sbde	return (ENOTTY);
126513675Sdyson}
126613675Sdyson
1267104094Sphkstatic int
1268101983Srwatsonpipe_poll(fp, events, active_cred, td)
126913675Sdyson	struct file *fp;
127029356Speter	int events;
1271101983Srwatson	struct ucred *active_cred;
127283366Sjulian	struct thread *td;
127313675Sdyson{
1274109153Sdillon	struct pipe *rpipe = fp->f_data;
127513675Sdyson	struct pipe *wpipe;
127629356Speter	int revents = 0;
1277101768Srwatson#ifdef MAC
1278101768Srwatson	int error;
1279101768Srwatson#endif
128013675Sdyson
128113675Sdyson	wpipe = rpipe->pipe_peer;
128291362Salfred	PIPE_LOCK(rpipe);
1283101768Srwatson#ifdef MAC
1284102115Srwatson	error = mac_check_pipe_poll(active_cred, rpipe);
1285101768Srwatson	if (error)
1286101768Srwatson		goto locked_error;
1287101768Srwatson#endif
128829356Speter	if (events & (POLLIN | POLLRDNORM))
128929356Speter		if ((rpipe->pipe_state & PIPE_DIRECTW) ||
129029356Speter		    (rpipe->pipe_buffer.cnt > 0) ||
129129356Speter		    (rpipe->pipe_state & PIPE_EOF))
129229356Speter			revents |= events & (POLLIN | POLLRDNORM);
129313675Sdyson
129429356Speter	if (events & (POLLOUT | POLLWRNORM))
129529356Speter		if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) ||
129643311Sdillon		    (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
129743311Sdillon		     (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF))
129829356Speter			revents |= events & (POLLOUT | POLLWRNORM);
129913675Sdyson
130029356Speter	if ((rpipe->pipe_state & PIPE_EOF) ||
130129356Speter	    (wpipe == NULL) ||
130229356Speter	    (wpipe->pipe_state & PIPE_EOF))
130329356Speter		revents |= POLLHUP;
130429356Speter
130529356Speter	if (revents == 0) {
130629356Speter		if (events & (POLLIN | POLLRDNORM)) {
130783805Sjhb			selrecord(td, &rpipe->pipe_sel);
130829356Speter			rpipe->pipe_state |= PIPE_SEL;
130913675Sdyson		}
131013675Sdyson
131129356Speter		if (events & (POLLOUT | POLLWRNORM)) {
131283805Sjhb			selrecord(td, &wpipe->pipe_sel);
131330164Speter			wpipe->pipe_state |= PIPE_SEL;
131413907Sdyson		}
131513675Sdyson	}
1316101768Srwatson#ifdef MAC
1317101768Srwatsonlocked_error:
1318101768Srwatson#endif
131991362Salfred	PIPE_UNLOCK(rpipe);
132029356Speter
132129356Speter	return (revents);
132213675Sdyson}
132313675Sdyson
132498989Salfred/*
132598989Salfred * We shouldn't need locks here as we're doing a read and this should
132698989Salfred * be a natural race.
132798989Salfred */
132852983Speterstatic int
1329101983Srwatsonpipe_stat(fp, ub, active_cred, td)
133052983Speter	struct file *fp;
133152983Speter	struct stat *ub;
1332101983Srwatson	struct ucred *active_cred;
133383366Sjulian	struct thread *td;
133413675Sdyson{
1335109153Sdillon	struct pipe *pipe = fp->f_data;
1336101768Srwatson#ifdef MAC
1337101768Srwatson	int error;
133852983Speter
1339104269Srwatson	PIPE_LOCK(pipe);
1340102115Srwatson	error = mac_check_pipe_stat(active_cred, pipe);
1341104269Srwatson	PIPE_UNLOCK(pipe);
1342101768Srwatson	if (error)
1343101768Srwatson		return (error);
1344101768Srwatson#endif
1345100527Salfred	bzero(ub, sizeof(*ub));
134617124Sbde	ub->st_mode = S_IFIFO;
134713907Sdyson	ub->st_blksize = pipe->pipe_buffer.size;
134813675Sdyson	ub->st_size = pipe->pipe_buffer.cnt;
134913675Sdyson	ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
135034901Sphk	ub->st_atimespec = pipe->pipe_atime;
135134901Sphk	ub->st_mtimespec = pipe->pipe_mtime;
135234901Sphk	ub->st_ctimespec = pipe->pipe_ctime;
135360404Schris	ub->st_uid = fp->f_cred->cr_uid;
135460404Schris	ub->st_gid = fp->f_cred->cr_gid;
135517124Sbde	/*
135660404Schris	 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
135717124Sbde	 * XXX (st_dev, st_ino) should be unique.
135817124Sbde	 */
135976760Salfred	return (0);
136013675Sdyson}
136113675Sdyson
136213675Sdyson/* ARGSUSED */
136313675Sdysonstatic int
136483366Sjulianpipe_close(fp, td)
136513675Sdyson	struct file *fp;
136683366Sjulian	struct thread *td;
136713675Sdyson{
1368109153Sdillon	struct pipe *cpipe = fp->f_data;
136916322Sgpalmer
137049413Sgreen	fp->f_ops = &badfileops;
1371109153Sdillon	fp->f_data = NULL;
137296122Salfred	funsetown(&cpipe->pipe_sigio);
137313675Sdyson	pipeclose(cpipe);
137476760Salfred	return (0);
137513675Sdyson}
137613675Sdyson
137776364Salfredstatic void
137876364Salfredpipe_free_kmem(cpipe)
137976364Salfred	struct pipe *cpipe;
138076364Salfred{
138191412Salfred
138291412Salfred	KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
138391412Salfred	       ("pipespace: pipe mutex locked"));
138476364Salfred
138576364Salfred	if (cpipe->pipe_buffer.buffer != NULL) {
138676364Salfred		if (cpipe->pipe_buffer.size > PIPE_SIZE)
1387117364Ssilby			atomic_subtract_int(&nbigpipe, 1);
1388110816Salc		atomic_subtract_int(&amountpipekva, cpipe->pipe_buffer.size);
1389117325Ssilby		atomic_subtract_int(&amountpipes, 1);
1390118764Ssilby		vm_map_remove(pipe_map,
1391118764Ssilby		    (vm_offset_t)cpipe->pipe_buffer.buffer,
1392118764Ssilby		    (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size);
139376364Salfred		cpipe->pipe_buffer.buffer = NULL;
139476364Salfred	}
139576364Salfred#ifndef PIPE_NODIRECT
1396102241Sarchie	if (cpipe->pipe_map.kva != 0) {
1397118764Ssilby		atomic_subtract_int(&amountpipekvawired,
1398110816Salc		    cpipe->pipe_buffer.size + PAGE_SIZE);
139976364Salfred		kmem_free(kernel_map,
140076364Salfred			cpipe->pipe_map.kva,
140176364Salfred			cpipe->pipe_buffer.size + PAGE_SIZE);
140276364Salfred		cpipe->pipe_map.cnt = 0;
140376364Salfred		cpipe->pipe_map.kva = 0;
140476364Salfred		cpipe->pipe_map.pos = 0;
140576364Salfred		cpipe->pipe_map.npages = 0;
140676364Salfred	}
140776364Salfred#endif
140876364Salfred}
140976364Salfred
141013675Sdyson/*
141113675Sdyson * shutdown the pipe
141213675Sdyson */
141313675Sdysonstatic void
141413675Sdysonpipeclose(cpipe)
141513675Sdyson	struct pipe *cpipe;
141613675Sdyson{
141713907Sdyson	struct pipe *ppipe;
141891968Salfred	int hadpeer;
141976364Salfred
142091968Salfred	if (cpipe == NULL)
142191968Salfred		return;
142291968Salfred
142391968Salfred	hadpeer = 0;
142491968Salfred
142591968Salfred	/* partially created pipes won't have a valid mutex. */
142691968Salfred	if (PIPE_MTX(cpipe) != NULL)
142791362Salfred		PIPE_LOCK(cpipe);
142813907Sdyson
142991968Salfred	pipeselwakeup(cpipe);
143013907Sdyson
143191968Salfred	/*
143291968Salfred	 * If the other side is blocked, wake it up saying that
143391968Salfred	 * we want to close it down.
143491968Salfred	 */
143591968Salfred	while (cpipe->pipe_busy) {
143691968Salfred		wakeup(cpipe);
143791968Salfred		cpipe->pipe_state |= PIPE_WANT | PIPE_EOF;
143891968Salfred		msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
143991968Salfred	}
144013675Sdyson
1441101768Srwatson#ifdef MAC
1442101768Srwatson	if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL)
1443101768Srwatson		mac_destroy_pipe(cpipe);
1444101768Srwatson#endif
1445101768Srwatson
144691968Salfred	/*
144791968Salfred	 * Disconnect from peer
144891968Salfred	 */
144991968Salfred	if ((ppipe = cpipe->pipe_peer) != NULL) {
145091968Salfred		hadpeer++;
145191968Salfred		pipeselwakeup(ppipe);
145213907Sdyson
145391968Salfred		ppipe->pipe_state |= PIPE_EOF;
145491968Salfred		wakeup(ppipe);
145591968Salfred		KNOTE(&ppipe->pipe_sel.si_note, 0);
145691968Salfred		ppipe->pipe_peer = NULL;
145791968Salfred	}
145891968Salfred	/*
145991968Salfred	 * free resources
146091968Salfred	 */
146191968Salfred	if (PIPE_MTX(cpipe) != NULL) {
146291968Salfred		PIPE_UNLOCK(cpipe);
146391968Salfred		if (!hadpeer) {
146491968Salfred			mtx_destroy(PIPE_MTX(cpipe));
146591968Salfred			free(PIPE_MTX(cpipe), M_TEMP);
146613675Sdyson		}
146713675Sdyson	}
146891968Salfred	pipe_free_kmem(cpipe);
146992751Sjeff	uma_zfree(pipe_zone, cpipe);
147013675Sdyson}
147159288Sjlemon
147272521Sjlemon/*ARGSUSED*/
147359288Sjlemonstatic int
147472521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn)
147559288Sjlemon{
147689306Salfred	struct pipe *cpipe;
147759288Sjlemon
1478109153Sdillon	cpipe = kn->kn_fp->f_data;
147972521Sjlemon	switch (kn->kn_filter) {
148072521Sjlemon	case EVFILT_READ:
148172521Sjlemon		kn->kn_fop = &pipe_rfiltops;
148272521Sjlemon		break;
148372521Sjlemon	case EVFILT_WRITE:
148472521Sjlemon		kn->kn_fop = &pipe_wfiltops;
148578292Sjlemon		cpipe = cpipe->pipe_peer;
1486101382Sdes		if (cpipe == NULL)
1487101382Sdes			/* other end of pipe has been closed */
1488118929Sjmg			return (EPIPE);
148972521Sjlemon		break;
149072521Sjlemon	default:
149172521Sjlemon		return (1);
149272521Sjlemon	}
1493100527Salfred	kn->kn_hook = cpipe;
149478292Sjlemon
149591372Salfred	PIPE_LOCK(cpipe);
149678292Sjlemon	SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext);
149791372Salfred	PIPE_UNLOCK(cpipe);
149859288Sjlemon	return (0);
149959288Sjlemon}
150059288Sjlemon
150159288Sjlemonstatic void
150259288Sjlemonfilt_pipedetach(struct knote *kn)
150359288Sjlemon{
150478292Sjlemon	struct pipe *cpipe = (struct pipe *)kn->kn_hook;
150559288Sjlemon
150691372Salfred	PIPE_LOCK(cpipe);
150778292Sjlemon	SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext);
150891372Salfred	PIPE_UNLOCK(cpipe);
150959288Sjlemon}
151059288Sjlemon
151159288Sjlemon/*ARGSUSED*/
151259288Sjlemonstatic int
151359288Sjlemonfilt_piperead(struct knote *kn, long hint)
151459288Sjlemon{
1515109153Sdillon	struct pipe *rpipe = kn->kn_fp->f_data;
151659288Sjlemon	struct pipe *wpipe = rpipe->pipe_peer;
151759288Sjlemon
151891372Salfred	PIPE_LOCK(rpipe);
151959288Sjlemon	kn->kn_data = rpipe->pipe_buffer.cnt;
152059288Sjlemon	if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
152159288Sjlemon		kn->kn_data = rpipe->pipe_map.cnt;
152259288Sjlemon
152359288Sjlemon	if ((rpipe->pipe_state & PIPE_EOF) ||
152459288Sjlemon	    (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
152591372Salfred		kn->kn_flags |= EV_EOF;
152691372Salfred		PIPE_UNLOCK(rpipe);
152759288Sjlemon		return (1);
152859288Sjlemon	}
152991372Salfred	PIPE_UNLOCK(rpipe);
153059288Sjlemon	return (kn->kn_data > 0);
153159288Sjlemon}
153259288Sjlemon
153359288Sjlemon/*ARGSUSED*/
153459288Sjlemonstatic int
153559288Sjlemonfilt_pipewrite(struct knote *kn, long hint)
153659288Sjlemon{
1537109153Sdillon	struct pipe *rpipe = kn->kn_fp->f_data;
153859288Sjlemon	struct pipe *wpipe = rpipe->pipe_peer;
153959288Sjlemon
154091372Salfred	PIPE_LOCK(rpipe);
154159288Sjlemon	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
154259288Sjlemon		kn->kn_data = 0;
154359288Sjlemon		kn->kn_flags |= EV_EOF;
154491372Salfred		PIPE_UNLOCK(rpipe);
154559288Sjlemon		return (1);
154659288Sjlemon	}
154759288Sjlemon	kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
154865855Sjlemon	if (wpipe->pipe_state & PIPE_DIRECTW)
154959288Sjlemon		kn->kn_data = 0;
155059288Sjlemon
155191372Salfred	PIPE_UNLOCK(rpipe);
155259288Sjlemon	return (kn->kn_data >= PIPE_BUF);
155359288Sjlemon}
1554