sys_pipe.c revision 101941
113675Sdyson/*
213675Sdyson * Copyright (c) 1996 John S. Dyson
313675Sdyson * All rights reserved.
413675Sdyson *
513675Sdyson * Redistribution and use in source and binary forms, with or without
613675Sdyson * modification, are permitted provided that the following conditions
713675Sdyson * are met:
813675Sdyson * 1. Redistributions of source code must retain the above copyright
913675Sdyson *    notice immediately at the beginning of the file, without modification,
1013675Sdyson *    this list of conditions, and the following disclaimer.
1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright
1213675Sdyson *    notice, this list of conditions and the following disclaimer in the
1313675Sdyson *    documentation and/or other materials provided with the distribution.
1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author
1513675Sdyson *    John S. Dyson.
1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions
1713675Sdyson *    are met.
1813675Sdyson *
1950477Speter * $FreeBSD: head/sys/kern/sys_pipe.c 101941 2002-08-15 20:55:08Z rwatson $
2013675Sdyson */
2113675Sdyson
2213675Sdyson/*
2313675Sdyson * This file contains a high-performance replacement for the socket-based
2413675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
2513675Sdyson * all features of sockets, but does do everything that pipes normally
2613675Sdyson * do.
2713675Sdyson */
2813675Sdyson
2913907Sdyson/*
3013907Sdyson * This code has two modes of operation, a small write mode and a large
3113907Sdyson * write mode.  The small write mode acts like conventional pipes with
3213907Sdyson * a kernel buffer.  If the buffer is less than PIPE_MINDIRECT, then the
3313907Sdyson * "normal" pipe buffering is done.  If the buffer is between PIPE_MINDIRECT
3413907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and
3513907Sdyson * the receiving process can copy it directly from the pages in the sending
3613907Sdyson * process.
3713907Sdyson *
3813907Sdyson * If the sending process receives a signal, it is possible that it will
3913913Sdyson * go away, and certainly its address space can change, because control
4013907Sdyson * is returned back to the user-mode side.  In that case, the pipe code
4113907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable
4213907Sdyson * kernel buffer, and the receiving process will grab the data from the
4313907Sdyson * pageable kernel buffer.  Since signals don't happen all that often,
4413907Sdyson * the copy operation is normally eliminated.
4513907Sdyson *
4613907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
4713907Sdyson * happen for small transfers so that the system will not spend all of
4813913Sdyson * its time context switching.  PIPE_SIZE is constrained by the
4913907Sdyson * amount of kernel virtual memory.
5013907Sdyson */
5113907Sdyson
52101768Srwatson#include "opt_mac.h"
53101768Srwatson
5413675Sdyson#include <sys/param.h>
5513675Sdyson#include <sys/systm.h>
5624131Sbde#include <sys/fcntl.h>
5713675Sdyson#include <sys/file.h>
5813675Sdyson#include <sys/filedesc.h>
5924206Sbde#include <sys/filio.h>
6091372Salfred#include <sys/kernel.h>
6176166Smarkm#include <sys/lock.h>
62101768Srwatson#include <sys/mac.h>
6376827Salfred#include <sys/mutex.h>
6424206Sbde#include <sys/ttycom.h>
6513675Sdyson#include <sys/stat.h>
6691968Salfred#include <sys/malloc.h>
6729356Speter#include <sys/poll.h>
6870834Swollman#include <sys/selinfo.h>
6913675Sdyson#include <sys/signalvar.h>
7013675Sdyson#include <sys/sysproto.h>
7113675Sdyson#include <sys/pipe.h>
7276166Smarkm#include <sys/proc.h>
7355112Sbde#include <sys/vnode.h>
7434924Sbde#include <sys/uio.h>
7559288Sjlemon#include <sys/event.h>
7613675Sdyson
7713675Sdyson#include <vm/vm.h>
7813675Sdyson#include <vm/vm_param.h>
7913675Sdyson#include <vm/vm_object.h>
8013675Sdyson#include <vm/vm_kern.h>
8113675Sdyson#include <vm/vm_extern.h>
8213675Sdyson#include <vm/pmap.h>
8313675Sdyson#include <vm/vm_map.h>
8413907Sdyson#include <vm/vm_page.h>
8592751Sjeff#include <vm/uma.h>
8613675Sdyson
8714037Sdyson/*
8814037Sdyson * Use this define if you want to disable *fancy* VM things.  Expect an
8914037Sdyson * approx 30% decrease in transfer rate.  This could be useful for
9014037Sdyson * NetBSD or OpenBSD.
9114037Sdyson */
9214037Sdyson/* #define PIPE_NODIRECT */
9314037Sdyson
9414037Sdyson/*
9514037Sdyson * interfaces to the outside world
9614037Sdyson */
9791413Salfredstatic int pipe_read(struct file *fp, struct uio *uio,
98101941Srwatson		struct ucred *active_cred, int flags, struct thread *td);
9991413Salfredstatic int pipe_write(struct file *fp, struct uio *uio,
100101941Srwatson		struct ucred *active_cred, int flags, struct thread *td);
10191413Salfredstatic int pipe_close(struct file *fp, struct thread *td);
10291413Salfredstatic int pipe_poll(struct file *fp, int events, struct ucred *cred,
10391413Salfred		struct thread *td);
10491413Salfredstatic int pipe_kqfilter(struct file *fp, struct knote *kn);
10591413Salfredstatic int pipe_stat(struct file *fp, struct stat *sb, struct thread *td);
10699009Salfredstatic int pipe_ioctl(struct file *fp, u_long cmd, void *data,
10799009Salfred    struct thread *td);
10813675Sdyson
10972521Sjlemonstatic struct fileops pipeops = {
11072521Sjlemon	pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter,
11172521Sjlemon	pipe_stat, pipe_close
11272521Sjlemon};
11313675Sdyson
11459288Sjlemonstatic void	filt_pipedetach(struct knote *kn);
11559288Sjlemonstatic int	filt_piperead(struct knote *kn, long hint);
11659288Sjlemonstatic int	filt_pipewrite(struct knote *kn, long hint);
11759288Sjlemon
11872521Sjlemonstatic struct filterops pipe_rfiltops =
11972521Sjlemon	{ 1, NULL, filt_pipedetach, filt_piperead };
12072521Sjlemonstatic struct filterops pipe_wfiltops =
12172521Sjlemon	{ 1, NULL, filt_pipedetach, filt_pipewrite };
12259288Sjlemon
12392305Salfred#define PIPE_GET_GIANT(pipe)						\
12491362Salfred	do {								\
12592305Salfred		KASSERT(((pipe)->pipe_state & PIPE_LOCKFL) != 0,	\
12692305Salfred		    ("%s:%d PIPE_GET_GIANT: line pipe not locked",	\
12792305Salfred		     __FILE__, __LINE__));				\
12892305Salfred		PIPE_UNLOCK(pipe);					\
12991362Salfred		mtx_lock(&Giant);					\
13091362Salfred	} while (0)
13172521Sjlemon
13291362Salfred#define PIPE_DROP_GIANT(pipe)						\
13391362Salfred	do {								\
13491362Salfred		mtx_unlock(&Giant);					\
13592305Salfred		PIPE_LOCK(pipe);					\
13691362Salfred	} while (0)
13791362Salfred
13813675Sdyson/*
13913675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe
14013675Sdyson * space is pageable.  The pipe code will try to maintain locality of
14113675Sdyson * reference for performance reasons, so small amounts of outstanding I/O
14213675Sdyson * will not wipe the cache.
14313675Sdyson */
14413907Sdyson#define MINPIPESIZE (PIPE_SIZE/3)
14513907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3)
14613675Sdyson
14713907Sdyson/*
14813907Sdyson * Maximum amount of kva for pipes -- this is kind-of a soft limit, but
14913907Sdyson * is there so that on large systems, we don't exhaust it.
15013907Sdyson */
15113907Sdyson#define MAXPIPEKVA (8*1024*1024)
15213907Sdyson
15313907Sdyson/*
15413907Sdyson * Limit for direct transfers, we cannot, of course limit
15513907Sdyson * the amount of kva for pipes in general though.
15613907Sdyson */
15713907Sdyson#define LIMITPIPEKVA (16*1024*1024)
15817163Sdyson
15917163Sdyson/*
16017163Sdyson * Limit the number of "big" pipes
16117163Sdyson */
16217163Sdyson#define LIMITBIGPIPES	32
16333181Seivindstatic int nbigpipe;
16417163Sdyson
16517124Sbdestatic int amountpipekva;
16613907Sdyson
16791413Salfredstatic void pipeinit(void *dummy __unused);
16891413Salfredstatic void pipeclose(struct pipe *cpipe);
16991413Salfredstatic void pipe_free_kmem(struct pipe *cpipe);
17091413Salfredstatic int pipe_create(struct pipe **cpipep);
17191413Salfredstatic __inline int pipelock(struct pipe *cpipe, int catch);
17291413Salfredstatic __inline void pipeunlock(struct pipe *cpipe);
17391413Salfredstatic __inline void pipeselwakeup(struct pipe *cpipe);
17414037Sdyson#ifndef PIPE_NODIRECT
17591413Salfredstatic int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio);
17691413Salfredstatic void pipe_destroy_write_buffer(struct pipe *wpipe);
17791413Salfredstatic int pipe_direct_write(struct pipe *wpipe, struct uio *uio);
17891413Salfredstatic void pipe_clone_write_buffer(struct pipe *wpipe);
17914037Sdyson#endif
18091413Salfredstatic int pipespace(struct pipe *cpipe, int size);
18113675Sdyson
18292751Sjeffstatic uma_zone_t pipe_zone;
18327899Sdyson
18491372SalfredSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
18591372Salfred
18691372Salfredstatic void
18791372Salfredpipeinit(void *dummy __unused)
18891372Salfred{
18992654Sjeff	pipe_zone = uma_zcreate("PIPE", sizeof(struct pipe), NULL,
19092654Sjeff	    NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
19191372Salfred}
19291372Salfred
19313675Sdyson/*
19413675Sdyson * The pipe system call for the DTYPE_PIPE type of pipes
19513675Sdyson */
19613675Sdyson
19713675Sdyson/* ARGSUSED */
19813675Sdysonint
19983366Sjulianpipe(td, uap)
20083366Sjulian	struct thread *td;
20113675Sdyson	struct pipe_args /* {
20213675Sdyson		int	dummy;
20313675Sdyson	} */ *uap;
20413675Sdyson{
20583366Sjulian	struct filedesc *fdp = td->td_proc->p_fd;
20613675Sdyson	struct file *rf, *wf;
20713675Sdyson	struct pipe *rpipe, *wpipe;
20891968Salfred	struct mtx *pmtx;
20913675Sdyson	int fd, error;
21091362Salfred
21191372Salfred	KASSERT(pipe_zone != NULL, ("pipe_zone not initialized"));
21227899Sdyson
21391968Salfred	pmtx = malloc(sizeof(*pmtx), M_TEMP, M_WAITOK | M_ZERO);
21491968Salfred
21576756Salfred	rpipe = wpipe = NULL;
21676364Salfred	if (pipe_create(&rpipe) || pipe_create(&wpipe)) {
21776364Salfred		pipeclose(rpipe);
21876364Salfred		pipeclose(wpipe);
21991968Salfred		free(pmtx, M_TEMP);
22076364Salfred		return (ENFILE);
22176364Salfred	}
22276364Salfred
22313907Sdyson	rpipe->pipe_state |= PIPE_DIRECTOK;
22413907Sdyson	wpipe->pipe_state |= PIPE_DIRECTOK;
22513675Sdyson
22683366Sjulian	error = falloc(td, &rf, &fd);
22770915Sdwmalone	if (error) {
22870915Sdwmalone		pipeclose(rpipe);
22970915Sdwmalone		pipeclose(wpipe);
23091968Salfred		free(pmtx, M_TEMP);
23170915Sdwmalone		return (error);
23270915Sdwmalone	}
23370915Sdwmalone	fhold(rf);
23483366Sjulian	td->td_retval[0] = fd;
23570915Sdwmalone
23670803Sdwmalone	/*
23770803Sdwmalone	 * Warning: once we've gotten past allocation of the fd for the
23870803Sdwmalone	 * read-side, we can only drop the read side via fdrop() in order
23970803Sdwmalone	 * to avoid races against processes which manage to dup() the read
24070803Sdwmalone	 * side while we are blocked trying to allocate the write side.
24170803Sdwmalone	 */
24289306Salfred	FILE_LOCK(rf);
24313675Sdyson	rf->f_flag = FREAD | FWRITE;
24413675Sdyson	rf->f_type = DTYPE_PIPE;
245100527Salfred	rf->f_data = rpipe;
24613675Sdyson	rf->f_ops = &pipeops;
24789306Salfred	FILE_UNLOCK(rf);
24883366Sjulian	error = falloc(td, &wf, &fd);
24970915Sdwmalone	if (error) {
25089306Salfred		FILEDESC_LOCK(fdp);
25183366Sjulian		if (fdp->fd_ofiles[td->td_retval[0]] == rf) {
25283366Sjulian			fdp->fd_ofiles[td->td_retval[0]] = NULL;
25389306Salfred			FILEDESC_UNLOCK(fdp);
25483366Sjulian			fdrop(rf, td);
25589306Salfred		} else
25689306Salfred			FILEDESC_UNLOCK(fdp);
25783366Sjulian		fdrop(rf, td);
25870915Sdwmalone		/* rpipe has been closed by fdrop(). */
25970915Sdwmalone		pipeclose(wpipe);
26091968Salfred		free(pmtx, M_TEMP);
26170915Sdwmalone		return (error);
26270915Sdwmalone	}
26389306Salfred	FILE_LOCK(wf);
26413675Sdyson	wf->f_flag = FREAD | FWRITE;
26513675Sdyson	wf->f_type = DTYPE_PIPE;
266100527Salfred	wf->f_data = wpipe;
26713675Sdyson	wf->f_ops = &pipeops;
26889306Salfred	FILE_UNLOCK(wf);
26983366Sjulian	td->td_retval[1] = fd;
27013675Sdyson	rpipe->pipe_peer = wpipe;
27113675Sdyson	wpipe->pipe_peer = rpipe;
272101768Srwatson#ifdef MAC
273101768Srwatson	/*
274101768Srwatson	 * struct pipe represents a pipe endpoint.  The MAC label is shared
275101768Srwatson	 * between the connected endpoints.  As a result mac_init_pipe() and
276101768Srwatson	 * mac_create_pipe() should only be called on one of the endpoints
277101768Srwatson	 * after they have been connected.
278101768Srwatson	 */
279101768Srwatson	mac_init_pipe(rpipe);
280101768Srwatson	mac_create_pipe(td->td_ucred, rpipe);
281101768Srwatson#endif
28293818Sjhb	mtx_init(pmtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE);
28391968Salfred	rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
28483366Sjulian	fdrop(rf, td);
28513675Sdyson
28613675Sdyson	return (0);
28713675Sdyson}
28813675Sdyson
28913909Sdyson/*
29013909Sdyson * Allocate kva for pipe circular buffer, the space is pageable
29176364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails
29276364Salfred * it will retain the old buffer.
29376364Salfred * If it fails it will return ENOMEM.
29413909Sdyson */
29576364Salfredstatic int
29676364Salfredpipespace(cpipe, size)
29713675Sdyson	struct pipe *cpipe;
29876364Salfred	int size;
29913675Sdyson{
30076364Salfred	struct vm_object *object;
30176364Salfred	caddr_t buffer;
30213688Sdyson	int npages, error;
30313675Sdyson
30479224Sdillon	GIANT_REQUIRED;
30591412Salfred	KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
30691412Salfred	       ("pipespace: pipe mutex locked"));
30779224Sdillon
30876364Salfred	npages = round_page(size)/PAGE_SIZE;
30913675Sdyson	/*
31013675Sdyson	 * Create an object, I don't like the idea of paging to/from
31113675Sdyson	 * kernel_object.
31214037Sdyson	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
31313675Sdyson	 */
31476364Salfred	object = vm_object_allocate(OBJT_DEFAULT, npages);
31576364Salfred	buffer = (caddr_t) vm_map_min(kernel_map);
31613675Sdyson
31713675Sdyson	/*
31813675Sdyson	 * Insert the object into the kernel map, and allocate kva for it.
31913675Sdyson	 * The map entry is, by default, pageable.
32014037Sdyson	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
32113675Sdyson	 */
32276364Salfred	error = vm_map_find(kernel_map, object, 0,
32376364Salfred		(vm_offset_t *) &buffer, size, 1,
32413688Sdyson		VM_PROT_ALL, VM_PROT_ALL, 0);
32513675Sdyson
32676364Salfred	if (error != KERN_SUCCESS) {
32776364Salfred		vm_object_deallocate(object);
32876364Salfred		return (ENOMEM);
32976364Salfred	}
33076364Salfred
33176364Salfred	/* free old resources if we're resizing */
33276364Salfred	pipe_free_kmem(cpipe);
33376364Salfred	cpipe->pipe_buffer.object = object;
33476364Salfred	cpipe->pipe_buffer.buffer = buffer;
33576364Salfred	cpipe->pipe_buffer.size = size;
33676364Salfred	cpipe->pipe_buffer.in = 0;
33776364Salfred	cpipe->pipe_buffer.out = 0;
33876364Salfred	cpipe->pipe_buffer.cnt = 0;
33913907Sdyson	amountpipekva += cpipe->pipe_buffer.size;
34076364Salfred	return (0);
34113907Sdyson}
34213688Sdyson
34313907Sdyson/*
34413907Sdyson * initialize and allocate VM and memory for pipe
34513907Sdyson */
34676364Salfredstatic int
34776364Salfredpipe_create(cpipep)
34876364Salfred	struct pipe **cpipep;
34976364Salfred{
35013907Sdyson	struct pipe *cpipe;
35176364Salfred	int error;
35213907Sdyson
35392751Sjeff	*cpipep = uma_zalloc(pipe_zone, M_WAITOK);
35476364Salfred	if (*cpipep == NULL)
35576364Salfred		return (ENOMEM);
35617163Sdyson
35776364Salfred	cpipe = *cpipep;
35876364Salfred
35976364Salfred	/* so pipespace()->pipe_free_kmem() doesn't follow junk pointer */
36076364Salfred	cpipe->pipe_buffer.object = NULL;
36176364Salfred#ifndef PIPE_NODIRECT
36276364Salfred	cpipe->pipe_map.kva = NULL;
36376364Salfred#endif
36476364Salfred	/*
36576364Salfred	 * protect so pipeclose() doesn't follow a junk pointer
36676364Salfred	 * if pipespace() fails.
36776364Salfred	 */
36876754Salfred	bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel));
36913675Sdyson	cpipe->pipe_state = 0;
37013675Sdyson	cpipe->pipe_peer = NULL;
37113675Sdyson	cpipe->pipe_busy = 0;
37213907Sdyson
37314037Sdyson#ifndef PIPE_NODIRECT
37413907Sdyson	/*
37513907Sdyson	 * pipe data structure initializations to support direct pipe I/O
37613907Sdyson	 */
37713907Sdyson	cpipe->pipe_map.cnt = 0;
37813907Sdyson	cpipe->pipe_map.kva = 0;
37913907Sdyson	cpipe->pipe_map.pos = 0;
38013907Sdyson	cpipe->pipe_map.npages = 0;
38117124Sbde	/* cpipe->pipe_map.ms[] = invalid */
38214037Sdyson#endif
38376364Salfred
38491412Salfred	cpipe->pipe_mtxp = NULL;	/* avoid pipespace assertion */
38576364Salfred	error = pipespace(cpipe, PIPE_SIZE);
38676760Salfred	if (error)
38776364Salfred		return (error);
38876364Salfred
38976364Salfred	vfs_timestamp(&cpipe->pipe_ctime);
39076364Salfred	cpipe->pipe_atime = cpipe->pipe_ctime;
39176364Salfred	cpipe->pipe_mtime = cpipe->pipe_ctime;
39276364Salfred
39376364Salfred	return (0);
39413675Sdyson}
39513675Sdyson
39613675Sdyson
39713675Sdyson/*
39813675Sdyson * lock a pipe for I/O, blocking other access
39913675Sdyson */
40013675Sdysonstatic __inline int
40113907Sdysonpipelock(cpipe, catch)
40213675Sdyson	struct pipe *cpipe;
40313907Sdyson	int catch;
40413675Sdyson{
40513776Sdyson	int error;
40676364Salfred
40791362Salfred	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
40891362Salfred	while (cpipe->pipe_state & PIPE_LOCKFL) {
40913675Sdyson		cpipe->pipe_state |= PIPE_LWANT;
41091362Salfred		error = msleep(cpipe, PIPE_MTX(cpipe),
41191362Salfred		    catch ? (PRIBIO | PCATCH) : PRIBIO,
41276760Salfred		    "pipelk", 0);
41376760Salfred		if (error != 0)
41476760Salfred			return (error);
41513675Sdyson	}
41691362Salfred	cpipe->pipe_state |= PIPE_LOCKFL;
41776760Salfred	return (0);
41813675Sdyson}
41913675Sdyson
42013675Sdyson/*
42113675Sdyson * unlock a pipe I/O lock
42213675Sdyson */
42313675Sdysonstatic __inline void
42413675Sdysonpipeunlock(cpipe)
42513675Sdyson	struct pipe *cpipe;
42613675Sdyson{
42776364Salfred
42891362Salfred	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
42991362Salfred	cpipe->pipe_state &= ~PIPE_LOCKFL;
43013675Sdyson	if (cpipe->pipe_state & PIPE_LWANT) {
43113675Sdyson		cpipe->pipe_state &= ~PIPE_LWANT;
43214177Sdyson		wakeup(cpipe);
43313675Sdyson	}
43413675Sdyson}
43513675Sdyson
43614037Sdysonstatic __inline void
43714037Sdysonpipeselwakeup(cpipe)
43814037Sdyson	struct pipe *cpipe;
43914037Sdyson{
44076364Salfred
44114037Sdyson	if (cpipe->pipe_state & PIPE_SEL) {
44214037Sdyson		cpipe->pipe_state &= ~PIPE_SEL;
44314037Sdyson		selwakeup(&cpipe->pipe_sel);
44414037Sdyson	}
44541086Struckman	if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio)
44695883Salfred		pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
44759288Sjlemon	KNOTE(&cpipe->pipe_sel.si_note, 0);
44814037Sdyson}
44914037Sdyson
45013675Sdyson/* ARGSUSED */
45113675Sdysonstatic int
452101941Srwatsonpipe_read(fp, uio, active_cred, flags, td)
45313675Sdyson	struct file *fp;
45413675Sdyson	struct uio *uio;
455101941Srwatson	struct ucred *active_cred;
45683366Sjulian	struct thread *td;
45745311Sdt	int flags;
45813675Sdyson{
45913675Sdyson	struct pipe *rpipe = (struct pipe *) fp->f_data;
46047748Salc	int error;
46113675Sdyson	int nread = 0;
46218863Sdyson	u_int size;
46313675Sdyson
46491362Salfred	PIPE_LOCK(rpipe);
46513675Sdyson	++rpipe->pipe_busy;
46647748Salc	error = pipelock(rpipe, 1);
46747748Salc	if (error)
46847748Salc		goto unlocked_error;
46947748Salc
470101768Srwatson#ifdef MAC
471101941Srwatson	error = mac_check_pipe_op(active_cred, rpipe, MAC_OP_PIPE_READ);
472101768Srwatson	if (error)
473101768Srwatson		goto locked_error;
474101768Srwatson#endif
475101768Srwatson
47613675Sdyson	while (uio->uio_resid) {
47713907Sdyson		/*
47813907Sdyson		 * normal pipe buffer receive
47913907Sdyson		 */
48013675Sdyson		if (rpipe->pipe_buffer.cnt > 0) {
48118863Sdyson			size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
48213675Sdyson			if (size > rpipe->pipe_buffer.cnt)
48313675Sdyson				size = rpipe->pipe_buffer.cnt;
48418863Sdyson			if (size > (u_int) uio->uio_resid)
48518863Sdyson				size = (u_int) uio->uio_resid;
48647748Salc
48791362Salfred			PIPE_UNLOCK(rpipe);
48847748Salc			error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
48913675Sdyson					size, uio);
49091362Salfred			PIPE_LOCK(rpipe);
49176760Salfred			if (error)
49213675Sdyson				break;
49376760Salfred
49413675Sdyson			rpipe->pipe_buffer.out += size;
49513675Sdyson			if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
49613675Sdyson				rpipe->pipe_buffer.out = 0;
49713675Sdyson
49813675Sdyson			rpipe->pipe_buffer.cnt -= size;
49947748Salc
50047748Salc			/*
50147748Salc			 * If there is no more to read in the pipe, reset
50247748Salc			 * its pointers to the beginning.  This improves
50347748Salc			 * cache hit stats.
50447748Salc			 */
50547748Salc			if (rpipe->pipe_buffer.cnt == 0) {
50647748Salc				rpipe->pipe_buffer.in = 0;
50747748Salc				rpipe->pipe_buffer.out = 0;
50847748Salc			}
50913675Sdyson			nread += size;
51014037Sdyson#ifndef PIPE_NODIRECT
51113907Sdyson		/*
51213907Sdyson		 * Direct copy, bypassing a kernel buffer.
51313907Sdyson		 */
51413907Sdyson		} else if ((size = rpipe->pipe_map.cnt) &&
51547748Salc			   (rpipe->pipe_state & PIPE_DIRECTW)) {
51647748Salc			caddr_t	va;
51718863Sdyson			if (size > (u_int) uio->uio_resid)
51818863Sdyson				size = (u_int) uio->uio_resid;
51947748Salc
52076760Salfred			va = (caddr_t) rpipe->pipe_map.kva +
52176760Salfred			    rpipe->pipe_map.pos;
52291362Salfred			PIPE_UNLOCK(rpipe);
52347748Salc			error = uiomove(va, size, uio);
52491362Salfred			PIPE_LOCK(rpipe);
52513907Sdyson			if (error)
52613907Sdyson				break;
52713907Sdyson			nread += size;
52813907Sdyson			rpipe->pipe_map.pos += size;
52913907Sdyson			rpipe->pipe_map.cnt -= size;
53013907Sdyson			if (rpipe->pipe_map.cnt == 0) {
53113907Sdyson				rpipe->pipe_state &= ~PIPE_DIRECTW;
53213907Sdyson				wakeup(rpipe);
53313907Sdyson			}
53414037Sdyson#endif
53513675Sdyson		} else {
53613675Sdyson			/*
53713675Sdyson			 * detect EOF condition
53876760Salfred			 * read returns 0 on EOF, no need to set error
53913675Sdyson			 */
54076760Salfred			if (rpipe->pipe_state & PIPE_EOF)
54113675Sdyson				break;
54243623Sdillon
54313675Sdyson			/*
54413675Sdyson			 * If the "write-side" has been blocked, wake it up now.
54513675Sdyson			 */
54613675Sdyson			if (rpipe->pipe_state & PIPE_WANTW) {
54713675Sdyson				rpipe->pipe_state &= ~PIPE_WANTW;
54813675Sdyson				wakeup(rpipe);
54913675Sdyson			}
55043623Sdillon
55143623Sdillon			/*
55247748Salc			 * Break if some data was read.
55343623Sdillon			 */
55447748Salc			if (nread > 0)
55513675Sdyson				break;
55616960Sdyson
55743623Sdillon			/*
55847748Salc			 * Unlock the pipe buffer for our remaining processing.  We
55947748Salc			 * will either break out with an error or we will sleep and
56047748Salc			 * relock to loop.
56143623Sdillon			 */
56247748Salc			pipeunlock(rpipe);
56343623Sdillon
56413675Sdyson			/*
56547748Salc			 * Handle non-blocking mode operation or
56647748Salc			 * wait for more data.
56713675Sdyson			 */
56876760Salfred			if (fp->f_flag & FNONBLOCK) {
56947748Salc				error = EAGAIN;
57076760Salfred			} else {
57147748Salc				rpipe->pipe_state |= PIPE_WANTR;
57291362Salfred				if ((error = msleep(rpipe, PIPE_MTX(rpipe),
57391362Salfred				    PRIBIO | PCATCH,
57477140Salfred				    "piperd", 0)) == 0)
57547748Salc					error = pipelock(rpipe, 1);
57613675Sdyson			}
57747748Salc			if (error)
57847748Salc				goto unlocked_error;
57913675Sdyson		}
58013675Sdyson	}
581101768Srwatson#ifdef MAC
582101768Srwatsonlocked_error:
583101768Srwatson#endif
58447748Salc	pipeunlock(rpipe);
58513675Sdyson
58691362Salfred	/* XXX: should probably do this before getting any locks. */
58724101Sbde	if (error == 0)
58855112Sbde		vfs_timestamp(&rpipe->pipe_atime);
58947748Salcunlocked_error:
59047748Salc	--rpipe->pipe_busy;
59113913Sdyson
59247748Salc	/*
59347748Salc	 * PIPE_WANT processing only makes sense if pipe_busy is 0.
59447748Salc	 */
59513675Sdyson	if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
59613675Sdyson		rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
59713675Sdyson		wakeup(rpipe);
59813675Sdyson	} else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
59913675Sdyson		/*
60047748Salc		 * Handle write blocking hysteresis.
60113675Sdyson		 */
60213675Sdyson		if (rpipe->pipe_state & PIPE_WANTW) {
60313675Sdyson			rpipe->pipe_state &= ~PIPE_WANTW;
60413675Sdyson			wakeup(rpipe);
60513675Sdyson		}
60613675Sdyson	}
60714037Sdyson
60814802Sdyson	if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF)
60914037Sdyson		pipeselwakeup(rpipe);
61014037Sdyson
61191362Salfred	PIPE_UNLOCK(rpipe);
61276760Salfred	return (error);
61313675Sdyson}
61413675Sdyson
61514037Sdyson#ifndef PIPE_NODIRECT
61613907Sdyson/*
61713907Sdyson * Map the sending processes' buffer into kernel space and wire it.
61813907Sdyson * This is similar to a physical write operation.
61913907Sdyson */
62013675Sdysonstatic int
62113907Sdysonpipe_build_write_buffer(wpipe, uio)
62213907Sdyson	struct pipe *wpipe;
62313675Sdyson	struct uio *uio;
62413675Sdyson{
62518863Sdyson	u_int size;
62694566Stmm	int i;
62794566Stmm	vm_offset_t addr, endaddr, paddr;
62813907Sdyson
62979224Sdillon	GIANT_REQUIRED;
63091412Salfred	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
63179224Sdillon
63218863Sdyson	size = (u_int) uio->uio_iov->iov_len;
63313907Sdyson	if (size > wpipe->pipe_buffer.size)
63413907Sdyson		size = wpipe->pipe_buffer.size;
63513907Sdyson
63640286Sdg	endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
63776760Salfred	addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
63876760Salfred	for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
63994566Stmm		vm_page_t m;
64094566Stmm
64199899Salc		/*
64299899Salc		 * vm_fault_quick() can sleep.  Consequently,
64399899Salc		 * vm_page_lock_queue() and vm_page_unlock_queue()
64499899Salc		 * should not be performed outside of this loop.
64599899Salc		 */
64651474Sdillon		if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 ||
64794608Stmm		    (paddr = pmap_extract(vmspace_pmap(curproc->p_vmspace),
64894608Stmm		     addr)) == 0) {
64913907Sdyson			int j;
65076760Salfred
65199899Salc			vm_page_lock_queues();
65276760Salfred			for (j = 0; j < i; j++)
65340700Sdg				vm_page_unwire(wpipe->pipe_map.ms[j], 1);
65499899Salc			vm_page_unlock_queues();
65576760Salfred			return (EFAULT);
65613907Sdyson		}
65713907Sdyson
65894566Stmm		m = PHYS_TO_VM_PAGE(paddr);
65999899Salc		vm_page_lock_queues();
66013907Sdyson		vm_page_wire(m);
66199899Salc		vm_page_unlock_queues();
66213907Sdyson		wpipe->pipe_map.ms[i] = m;
66313907Sdyson	}
66413907Sdyson
66513907Sdyson/*
66613907Sdyson * set up the control block
66713907Sdyson */
66813907Sdyson	wpipe->pipe_map.npages = i;
66976760Salfred	wpipe->pipe_map.pos =
67076760Salfred	    ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
67113907Sdyson	wpipe->pipe_map.cnt = size;
67213907Sdyson
67313907Sdyson/*
67413907Sdyson * and map the buffer
67513907Sdyson */
67613907Sdyson	if (wpipe->pipe_map.kva == 0) {
67713912Sdyson		/*
67813912Sdyson		 * We need to allocate space for an extra page because the
67913912Sdyson		 * address range might (will) span pages at times.
68013912Sdyson		 */
68113907Sdyson		wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map,
68213912Sdyson			wpipe->pipe_buffer.size + PAGE_SIZE);
68313912Sdyson		amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE;
68413907Sdyson	}
68513907Sdyson	pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
68613907Sdyson		wpipe->pipe_map.npages);
68713907Sdyson
68813907Sdyson/*
68913907Sdyson * and update the uio data
69013907Sdyson */
69113907Sdyson
69213907Sdyson	uio->uio_iov->iov_len -= size;
69313907Sdyson	uio->uio_iov->iov_base += size;
69413907Sdyson	if (uio->uio_iov->iov_len == 0)
69513907Sdyson		uio->uio_iov++;
69613907Sdyson	uio->uio_resid -= size;
69713907Sdyson	uio->uio_offset += size;
69876760Salfred	return (0);
69913907Sdyson}
70013907Sdyson
70113907Sdyson/*
70213907Sdyson * unmap and unwire the process buffer
70313907Sdyson */
70413907Sdysonstatic void
70513907Sdysonpipe_destroy_write_buffer(wpipe)
70676760Salfred	struct pipe *wpipe;
70713907Sdyson{
70813907Sdyson	int i;
70976364Salfred
71079224Sdillon	GIANT_REQUIRED;
71191412Salfred	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
71279224Sdillon
71317163Sdyson	if (wpipe->pipe_map.kva) {
71417163Sdyson		pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages);
71513907Sdyson
71613907Sdyson		if (amountpipekva > MAXPIPEKVA) {
71713907Sdyson			vm_offset_t kva = wpipe->pipe_map.kva;
71813907Sdyson			wpipe->pipe_map.kva = 0;
71913907Sdyson			kmem_free(kernel_map, kva,
72013912Sdyson				wpipe->pipe_buffer.size + PAGE_SIZE);
72113912Sdyson			amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE;
72213907Sdyson		}
72313907Sdyson	}
72499899Salc	vm_page_lock_queues();
72576760Salfred	for (i = 0; i < wpipe->pipe_map.npages; i++)
72640700Sdg		vm_page_unwire(wpipe->pipe_map.ms[i], 1);
72799899Salc	vm_page_unlock_queues();
72891653Stanimura	wpipe->pipe_map.npages = 0;
72913907Sdyson}
73013907Sdyson
73113907Sdyson/*
73213907Sdyson * In the case of a signal, the writing process might go away.  This
73313907Sdyson * code copies the data into the circular buffer so that the source
73413907Sdyson * pages can be freed without loss of data.
73513907Sdyson */
73613907Sdysonstatic void
73713907Sdysonpipe_clone_write_buffer(wpipe)
73876364Salfred	struct pipe *wpipe;
73913907Sdyson{
74013907Sdyson	int size;
74113907Sdyson	int pos;
74213907Sdyson
74391362Salfred	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
74413907Sdyson	size = wpipe->pipe_map.cnt;
74513907Sdyson	pos = wpipe->pipe_map.pos;
74613907Sdyson
74713907Sdyson	wpipe->pipe_buffer.in = size;
74813907Sdyson	wpipe->pipe_buffer.out = 0;
74913907Sdyson	wpipe->pipe_buffer.cnt = size;
75013907Sdyson	wpipe->pipe_state &= ~PIPE_DIRECTW;
75113907Sdyson
75291412Salfred	PIPE_GET_GIANT(wpipe);
75392959Salfred	bcopy((caddr_t) wpipe->pipe_map.kva + pos,
754100527Salfred	    wpipe->pipe_buffer.buffer, size);
75513907Sdyson	pipe_destroy_write_buffer(wpipe);
75691412Salfred	PIPE_DROP_GIANT(wpipe);
75713907Sdyson}
75813907Sdyson
75913907Sdyson/*
76013907Sdyson * This implements the pipe buffer write mechanism.  Note that only
76113907Sdyson * a direct write OR a normal pipe write can be pending at any given time.
76213907Sdyson * If there are any characters in the pipe buffer, the direct write will
76313907Sdyson * be deferred until the receiving process grabs all of the bytes from
76413907Sdyson * the pipe buffer.  Then the direct mapping write is set-up.
76513907Sdyson */
76613907Sdysonstatic int
76713907Sdysonpipe_direct_write(wpipe, uio)
76813907Sdyson	struct pipe *wpipe;
76913907Sdyson	struct uio *uio;
77013907Sdyson{
77113907Sdyson	int error;
77276364Salfred
77313951Sdysonretry:
77491362Salfred	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
77513907Sdyson	while (wpipe->pipe_state & PIPE_DIRECTW) {
77676760Salfred		if (wpipe->pipe_state & PIPE_WANTR) {
77713951Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
77813951Sdyson			wakeup(wpipe);
77913951Sdyson		}
78013992Sdyson		wpipe->pipe_state |= PIPE_WANTW;
78191362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe),
78291362Salfred		    PRIBIO | PCATCH, "pipdww", 0);
78314802Sdyson		if (error)
78413907Sdyson			goto error1;
78514802Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
78614802Sdyson			error = EPIPE;
78714802Sdyson			goto error1;
78814802Sdyson		}
78913907Sdyson	}
79013907Sdyson	wpipe->pipe_map.cnt = 0;	/* transfer not ready yet */
79113951Sdyson	if (wpipe->pipe_buffer.cnt > 0) {
79276760Salfred		if (wpipe->pipe_state & PIPE_WANTR) {
79313951Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
79413951Sdyson			wakeup(wpipe);
79513951Sdyson		}
79613951Sdyson
79713992Sdyson		wpipe->pipe_state |= PIPE_WANTW;
79891362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe),
79991362Salfred		    PRIBIO | PCATCH, "pipdwc", 0);
80014802Sdyson		if (error)
80113907Sdyson			goto error1;
80214802Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
80314802Sdyson			error = EPIPE;
80414802Sdyson			goto error1;
80513907Sdyson		}
80613951Sdyson		goto retry;
80713907Sdyson	}
80813907Sdyson
80913951Sdyson	wpipe->pipe_state |= PIPE_DIRECTW;
81013951Sdyson
81192305Salfred	pipelock(wpipe, 0);
81291362Salfred	PIPE_GET_GIANT(wpipe);
81313907Sdyson	error = pipe_build_write_buffer(wpipe, uio);
81491362Salfred	PIPE_DROP_GIANT(wpipe);
81592305Salfred	pipeunlock(wpipe);
81613907Sdyson	if (error) {
81713907Sdyson		wpipe->pipe_state &= ~PIPE_DIRECTW;
81813907Sdyson		goto error1;
81913907Sdyson	}
82013907Sdyson
82113907Sdyson	error = 0;
82213907Sdyson	while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
82313907Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
82413907Sdyson			pipelock(wpipe, 0);
82591362Salfred			PIPE_GET_GIANT(wpipe);
82613907Sdyson			pipe_destroy_write_buffer(wpipe);
82791362Salfred			PIPE_DROP_GIANT(wpipe);
82813907Sdyson			pipeunlock(wpipe);
82914037Sdyson			pipeselwakeup(wpipe);
83014802Sdyson			error = EPIPE;
83114802Sdyson			goto error1;
83213907Sdyson		}
83313992Sdyson		if (wpipe->pipe_state & PIPE_WANTR) {
83413992Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
83513992Sdyson			wakeup(wpipe);
83613992Sdyson		}
83714037Sdyson		pipeselwakeup(wpipe);
83891362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
83991362Salfred		    "pipdwt", 0);
84013907Sdyson	}
84113907Sdyson
84213907Sdyson	pipelock(wpipe,0);
84313907Sdyson	if (wpipe->pipe_state & PIPE_DIRECTW) {
84413907Sdyson		/*
84513907Sdyson		 * this bit of trickery substitutes a kernel buffer for
84613907Sdyson		 * the process that might be going away.
84713907Sdyson		 */
84813907Sdyson		pipe_clone_write_buffer(wpipe);
84913907Sdyson	} else {
85091412Salfred		PIPE_GET_GIANT(wpipe);
85113907Sdyson		pipe_destroy_write_buffer(wpipe);
85291412Salfred		PIPE_DROP_GIANT(wpipe);
85313907Sdyson	}
85413907Sdyson	pipeunlock(wpipe);
85576760Salfred	return (error);
85613907Sdyson
85713907Sdysonerror1:
85813907Sdyson	wakeup(wpipe);
85976760Salfred	return (error);
86013907Sdyson}
86114037Sdyson#endif
86213907Sdyson
86316960Sdysonstatic int
864101941Srwatsonpipe_write(fp, uio, active_cred, flags, td)
86516960Sdyson	struct file *fp;
86613907Sdyson	struct uio *uio;
867101941Srwatson	struct ucred *active_cred;
86883366Sjulian	struct thread *td;
86945311Sdt	int flags;
87013907Sdyson{
87113675Sdyson	int error = 0;
87213913Sdyson	int orig_resid;
87316960Sdyson	struct pipe *wpipe, *rpipe;
87416960Sdyson
87516960Sdyson	rpipe = (struct pipe *) fp->f_data;
87616960Sdyson	wpipe = rpipe->pipe_peer;
87716960Sdyson
87891395Salfred	PIPE_LOCK(rpipe);
87913675Sdyson	/*
88013675Sdyson	 * detect loss of pipe read side, issue SIGPIPE if lost.
88113675Sdyson	 */
88216960Sdyson	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
88391395Salfred		PIPE_UNLOCK(rpipe);
88476760Salfred		return (EPIPE);
88513675Sdyson	}
886101768Srwatson#ifdef MAC
887101941Srwatson	error = mac_check_pipe_op(active_cred, wpipe, MAC_OP_PIPE_WRITE);
888101768Srwatson	if (error) {
889101768Srwatson		PIPE_UNLOCK(rpipe);
890101768Srwatson		return (error);
891101768Srwatson	}
892101768Srwatson#endif
89377676Sdillon	++wpipe->pipe_busy;
89413675Sdyson
89517163Sdyson	/*
89617163Sdyson	 * If it is advantageous to resize the pipe buffer, do
89717163Sdyson	 * so.
89817163Sdyson	 */
89917163Sdyson	if ((uio->uio_resid > PIPE_SIZE) &&
90017163Sdyson		(nbigpipe < LIMITBIGPIPES) &&
90117163Sdyson		(wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
90217163Sdyson		(wpipe->pipe_buffer.size <= PIPE_SIZE) &&
90317163Sdyson		(wpipe->pipe_buffer.cnt == 0)) {
90417163Sdyson
90513907Sdyson		if ((error = pipelock(wpipe,1)) == 0) {
90692305Salfred			PIPE_GET_GIANT(wpipe);
90776364Salfred			if (pipespace(wpipe, BIG_PIPE_SIZE) == 0)
90876364Salfred				nbigpipe++;
90992305Salfred			PIPE_DROP_GIANT(wpipe);
91013907Sdyson			pipeunlock(wpipe);
91113907Sdyson		}
91213907Sdyson	}
91377676Sdillon
91477676Sdillon	/*
91577676Sdillon	 * If an early error occured unbusy and return, waking up any pending
91677676Sdillon	 * readers.
91777676Sdillon	 */
91877676Sdillon	if (error) {
91977676Sdillon		--wpipe->pipe_busy;
92077676Sdillon		if ((wpipe->pipe_busy == 0) &&
92177676Sdillon		    (wpipe->pipe_state & PIPE_WANT)) {
92277676Sdillon			wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
92377676Sdillon			wakeup(wpipe);
92477676Sdillon		}
92591395Salfred		PIPE_UNLOCK(rpipe);
92677676Sdillon		return(error);
92777676Sdillon	}
92876364Salfred
92976364Salfred	KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone"));
93013907Sdyson
93113913Sdyson	orig_resid = uio->uio_resid;
93277676Sdillon
93313675Sdyson	while (uio->uio_resid) {
93413907Sdyson		int space;
93576760Salfred
93614037Sdyson#ifndef PIPE_NODIRECT
93713907Sdyson		/*
93813907Sdyson		 * If the transfer is large, we can gain performance if
93913907Sdyson		 * we do process-to-process copies directly.
94016416Sdyson		 * If the write is non-blocking, we don't use the
94116416Sdyson		 * direct write mechanism.
94258505Sdillon		 *
94358505Sdillon		 * The direct write mechanism will detect the reader going
94458505Sdillon		 * away on us.
94513907Sdyson		 */
94617163Sdyson		if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
94717163Sdyson		    (fp->f_flag & FNONBLOCK) == 0 &&
94817163Sdyson			(wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) &&
94913907Sdyson			(uio->uio_iov->iov_len >= PIPE_MINDIRECT)) {
95013907Sdyson			error = pipe_direct_write( wpipe, uio);
95176760Salfred			if (error)
95213907Sdyson				break;
95313907Sdyson			continue;
95491362Salfred		}
95514037Sdyson#endif
95613907Sdyson
95713907Sdyson		/*
95813907Sdyson		 * Pipe buffered writes cannot be coincidental with
95913907Sdyson		 * direct writes.  We wait until the currently executing
96013907Sdyson		 * direct write is completed before we start filling the
96158505Sdillon		 * pipe buffer.  We break out if a signal occurs or the
96258505Sdillon		 * reader goes away.
96313907Sdyson		 */
96413907Sdyson	retrywrite:
96513907Sdyson		while (wpipe->pipe_state & PIPE_DIRECTW) {
96613992Sdyson			if (wpipe->pipe_state & PIPE_WANTR) {
96713992Sdyson				wpipe->pipe_state &= ~PIPE_WANTR;
96813992Sdyson				wakeup(wpipe);
96913992Sdyson			}
97091395Salfred			error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH,
97191362Salfred			    "pipbww", 0);
97258505Sdillon			if (wpipe->pipe_state & PIPE_EOF)
97358505Sdillon				break;
97413907Sdyson			if (error)
97513907Sdyson				break;
97613907Sdyson		}
97758505Sdillon		if (wpipe->pipe_state & PIPE_EOF) {
97858505Sdillon			error = EPIPE;
97958505Sdillon			break;
98058505Sdillon		}
98113907Sdyson
98213907Sdyson		space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
98314644Sdyson
98414644Sdyson		/* Writes of size <= PIPE_BUF must be atomic. */
98513913Sdyson		if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF))
98613913Sdyson			space = 0;
98713907Sdyson
98817163Sdyson		if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) {
98913907Sdyson			if ((error = pipelock(wpipe,1)) == 0) {
99054534Stegge				int size;	/* Transfer size */
99154534Stegge				int segsize;	/* first segment to transfer */
99276760Salfred
99313907Sdyson				/*
99413907Sdyson				 * It is possible for a direct write to
99513907Sdyson				 * slip in on us... handle it here...
99613907Sdyson				 */
99713907Sdyson				if (wpipe->pipe_state & PIPE_DIRECTW) {
99813907Sdyson					pipeunlock(wpipe);
99913907Sdyson					goto retrywrite;
100013907Sdyson				}
100154534Stegge				/*
100254534Stegge				 * If a process blocked in uiomove, our
100354534Stegge				 * value for space might be bad.
100458505Sdillon				 *
100558505Sdillon				 * XXX will we be ok if the reader has gone
100658505Sdillon				 * away here?
100754534Stegge				 */
100854534Stegge				if (space > wpipe->pipe_buffer.size -
100954534Stegge				    wpipe->pipe_buffer.cnt) {
101054534Stegge					pipeunlock(wpipe);
101154534Stegge					goto retrywrite;
101254534Stegge				}
101354534Stegge
101454534Stegge				/*
101554534Stegge				 * Transfer size is minimum of uio transfer
101654534Stegge				 * and free space in pipe buffer.
101754534Stegge				 */
101854534Stegge				if (space > uio->uio_resid)
101954534Stegge					size = uio->uio_resid;
102054534Stegge				else
102154534Stegge					size = space;
102254534Stegge				/*
102354534Stegge				 * First segment to transfer is minimum of
102454534Stegge				 * transfer size and contiguous space in
102554534Stegge				 * pipe buffer.  If first segment to transfer
102654534Stegge				 * is less than the transfer size, we've got
102754534Stegge				 * a wraparound in the buffer.
102854534Stegge				 */
102954534Stegge				segsize = wpipe->pipe_buffer.size -
103054534Stegge					wpipe->pipe_buffer.in;
103154534Stegge				if (segsize > size)
103254534Stegge					segsize = size;
103354534Stegge
103454534Stegge				/* Transfer first segment */
103554534Stegge
103691395Salfred				PIPE_UNLOCK(rpipe);
103754534Stegge				error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
103854534Stegge						segsize, uio);
103991395Salfred				PIPE_LOCK(rpipe);
104054534Stegge
104154534Stegge				if (error == 0 && segsize < size) {
104254534Stegge					/*
104354534Stegge					 * Transfer remaining part now, to
104454534Stegge					 * support atomic writes.  Wraparound
104554534Stegge					 * happened.
104654534Stegge					 */
104754534Stegge					if (wpipe->pipe_buffer.in + segsize !=
104854534Stegge					    wpipe->pipe_buffer.size)
104954534Stegge						panic("Expected pipe buffer wraparound disappeared");
105054534Stegge
105191395Salfred					PIPE_UNLOCK(rpipe);
105254534Stegge					error = uiomove(&wpipe->pipe_buffer.buffer[0],
105354534Stegge							size - segsize, uio);
105491395Salfred					PIPE_LOCK(rpipe);
105554534Stegge				}
105654534Stegge				if (error == 0) {
105754534Stegge					wpipe->pipe_buffer.in += size;
105854534Stegge					if (wpipe->pipe_buffer.in >=
105954534Stegge					    wpipe->pipe_buffer.size) {
106054534Stegge						if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size)
106154534Stegge							panic("Expected wraparound bad");
106254534Stegge						wpipe->pipe_buffer.in = size - segsize;
106354534Stegge					}
106454534Stegge
106554534Stegge					wpipe->pipe_buffer.cnt += size;
106654534Stegge					if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size)
106754534Stegge						panic("Pipe buffer overflow");
106854534Stegge
106954534Stegge				}
107013675Sdyson				pipeunlock(wpipe);
107113675Sdyson			}
107213675Sdyson			if (error)
107313675Sdyson				break;
107413675Sdyson
107513675Sdyson		} else {
107613675Sdyson			/*
107713675Sdyson			 * If the "read-side" has been blocked, wake it up now.
107813675Sdyson			 */
107913675Sdyson			if (wpipe->pipe_state & PIPE_WANTR) {
108013675Sdyson				wpipe->pipe_state &= ~PIPE_WANTR;
108113675Sdyson				wakeup(wpipe);
108213675Sdyson			}
108314037Sdyson
108413675Sdyson			/*
108513675Sdyson			 * don't block on non-blocking I/O
108613675Sdyson			 */
108716960Sdyson			if (fp->f_flag & FNONBLOCK) {
108813907Sdyson				error = EAGAIN;
108913675Sdyson				break;
109013675Sdyson			}
109113907Sdyson
109214037Sdyson			/*
109314037Sdyson			 * We have no more space and have something to offer,
109429356Speter			 * wake up select/poll.
109514037Sdyson			 */
109614037Sdyson			pipeselwakeup(wpipe);
109714037Sdyson
109813675Sdyson			wpipe->pipe_state |= PIPE_WANTW;
109991395Salfred			error = msleep(wpipe, PIPE_MTX(rpipe),
110091362Salfred			    PRIBIO | PCATCH, "pipewr", 0);
110176760Salfred			if (error != 0)
110213675Sdyson				break;
110313675Sdyson			/*
110413675Sdyson			 * If read side wants to go away, we just issue a signal
110513675Sdyson			 * to ourselves.
110613675Sdyson			 */
110713675Sdyson			if (wpipe->pipe_state & PIPE_EOF) {
110813774Sdyson				error = EPIPE;
110913907Sdyson				break;
111013675Sdyson			}
111113675Sdyson		}
111213675Sdyson	}
111313675Sdyson
111414644Sdyson	--wpipe->pipe_busy;
111577676Sdillon
111676760Salfred	if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
111776760Salfred		wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
111813675Sdyson		wakeup(wpipe);
111913675Sdyson	} else if (wpipe->pipe_buffer.cnt > 0) {
112013675Sdyson		/*
112113675Sdyson		 * If we have put any characters in the buffer, we wake up
112213675Sdyson		 * the reader.
112313675Sdyson		 */
112413675Sdyson		if (wpipe->pipe_state & PIPE_WANTR) {
112513675Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
112613675Sdyson			wakeup(wpipe);
112713675Sdyson		}
112813675Sdyson	}
112913909Sdyson
113013909Sdyson	/*
113113909Sdyson	 * Don't return EPIPE if I/O was successful
113213909Sdyson	 */
113313907Sdyson	if ((wpipe->pipe_buffer.cnt == 0) &&
113477676Sdillon	    (uio->uio_resid == 0) &&
113577676Sdillon	    (error == EPIPE)) {
113613907Sdyson		error = 0;
113777676Sdillon	}
113813913Sdyson
113924101Sbde	if (error == 0)
114055112Sbde		vfs_timestamp(&wpipe->pipe_mtime);
114124101Sbde
114214037Sdyson	/*
114314037Sdyson	 * We have something to offer,
114429356Speter	 * wake up select/poll.
114514037Sdyson	 */
114614177Sdyson	if (wpipe->pipe_buffer.cnt)
114714037Sdyson		pipeselwakeup(wpipe);
114813907Sdyson
114991395Salfred	PIPE_UNLOCK(rpipe);
115076760Salfred	return (error);
115113675Sdyson}
115213675Sdyson
115313675Sdyson/*
115413675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets.
115513675Sdyson */
115613675Sdysonint
115783366Sjulianpipe_ioctl(fp, cmd, data, td)
115813675Sdyson	struct file *fp;
115936735Sdfr	u_long cmd;
116099009Salfred	void *data;
116183366Sjulian	struct thread *td;
116213675Sdyson{
116376364Salfred	struct pipe *mpipe = (struct pipe *)fp->f_data;
1164101768Srwatson#ifdef MAC
1165101768Srwatson	int error;
116613675Sdyson
1167101768Srwatson	/* XXXMAC: Pipe should be locked for this check. */
1168101768Srwatson	error = mac_check_pipe_ioctl(td->td_ucred, mpipe, cmd, data);
1169101768Srwatson	if (error)
1170101768Srwatson		return (error);
1171101768Srwatson#endif
1172101768Srwatson
117313675Sdyson	switch (cmd) {
117413675Sdyson
117513675Sdyson	case FIONBIO:
117613675Sdyson		return (0);
117713675Sdyson
117813675Sdyson	case FIOASYNC:
117991362Salfred		PIPE_LOCK(mpipe);
118013675Sdyson		if (*(int *)data) {
118113675Sdyson			mpipe->pipe_state |= PIPE_ASYNC;
118213675Sdyson		} else {
118313675Sdyson			mpipe->pipe_state &= ~PIPE_ASYNC;
118413675Sdyson		}
118591362Salfred		PIPE_UNLOCK(mpipe);
118613675Sdyson		return (0);
118713675Sdyson
118813675Sdyson	case FIONREAD:
118991362Salfred		PIPE_LOCK(mpipe);
119014037Sdyson		if (mpipe->pipe_state & PIPE_DIRECTW)
119114037Sdyson			*(int *)data = mpipe->pipe_map.cnt;
119214037Sdyson		else
119314037Sdyson			*(int *)data = mpipe->pipe_buffer.cnt;
119491362Salfred		PIPE_UNLOCK(mpipe);
119513675Sdyson		return (0);
119613675Sdyson
119741086Struckman	case FIOSETOWN:
119841086Struckman		return (fsetown(*(int *)data, &mpipe->pipe_sigio));
119941086Struckman
120041086Struckman	case FIOGETOWN:
120141086Struckman		*(int *)data = fgetown(mpipe->pipe_sigio);
120213675Sdyson		return (0);
120313675Sdyson
120441086Struckman	/* This is deprecated, FIOSETOWN should be used instead. */
120541086Struckman	case TIOCSPGRP:
120641086Struckman		return (fsetown(-(*(int *)data), &mpipe->pipe_sigio));
120741086Struckman
120841086Struckman	/* This is deprecated, FIOGETOWN should be used instead. */
120918863Sdyson	case TIOCGPGRP:
121041086Struckman		*(int *)data = -fgetown(mpipe->pipe_sigio);
121113675Sdyson		return (0);
121213675Sdyson
121313675Sdyson	}
121417124Sbde	return (ENOTTY);
121513675Sdyson}
121613675Sdyson
121713675Sdysonint
121883366Sjulianpipe_poll(fp, events, cred, td)
121913675Sdyson	struct file *fp;
122029356Speter	int events;
122129356Speter	struct ucred *cred;
122283366Sjulian	struct thread *td;
122313675Sdyson{
122476364Salfred	struct pipe *rpipe = (struct pipe *)fp->f_data;
122513675Sdyson	struct pipe *wpipe;
122629356Speter	int revents = 0;
1227101768Srwatson#ifdef MAC
1228101768Srwatson	int error;
1229101768Srwatson#endif
123013675Sdyson
123113675Sdyson	wpipe = rpipe->pipe_peer;
123291362Salfred	PIPE_LOCK(rpipe);
1233101768Srwatson#ifdef MAC
1234101768Srwatson	error = mac_check_pipe_op(td->td_ucred, rpipe, MAC_OP_PIPE_POLL);
1235101768Srwatson	if (error)
1236101768Srwatson		goto locked_error;
1237101768Srwatson#endif
123829356Speter	if (events & (POLLIN | POLLRDNORM))
123929356Speter		if ((rpipe->pipe_state & PIPE_DIRECTW) ||
124029356Speter		    (rpipe->pipe_buffer.cnt > 0) ||
124129356Speter		    (rpipe->pipe_state & PIPE_EOF))
124229356Speter			revents |= events & (POLLIN | POLLRDNORM);
124313675Sdyson
124429356Speter	if (events & (POLLOUT | POLLWRNORM))
124529356Speter		if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) ||
124643311Sdillon		    (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
124743311Sdillon		     (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF))
124829356Speter			revents |= events & (POLLOUT | POLLWRNORM);
124913675Sdyson
125029356Speter	if ((rpipe->pipe_state & PIPE_EOF) ||
125129356Speter	    (wpipe == NULL) ||
125229356Speter	    (wpipe->pipe_state & PIPE_EOF))
125329356Speter		revents |= POLLHUP;
125429356Speter
125529356Speter	if (revents == 0) {
125629356Speter		if (events & (POLLIN | POLLRDNORM)) {
125783805Sjhb			selrecord(td, &rpipe->pipe_sel);
125829356Speter			rpipe->pipe_state |= PIPE_SEL;
125913675Sdyson		}
126013675Sdyson
126129356Speter		if (events & (POLLOUT | POLLWRNORM)) {
126283805Sjhb			selrecord(td, &wpipe->pipe_sel);
126330164Speter			wpipe->pipe_state |= PIPE_SEL;
126413907Sdyson		}
126513675Sdyson	}
1266101768Srwatson#ifdef MAC
1267101768Srwatsonlocked_error:
1268101768Srwatson#endif
126991362Salfred	PIPE_UNLOCK(rpipe);
127029356Speter
127129356Speter	return (revents);
127213675Sdyson}
127313675Sdyson
127498989Salfred/*
127598989Salfred * We shouldn't need locks here as we're doing a read and this should
127698989Salfred * be a natural race.
127798989Salfred */
127852983Speterstatic int
127983366Sjulianpipe_stat(fp, ub, td)
128052983Speter	struct file *fp;
128152983Speter	struct stat *ub;
128283366Sjulian	struct thread *td;
128313675Sdyson{
128452983Speter	struct pipe *pipe = (struct pipe *)fp->f_data;
1285101768Srwatson#ifdef MAC
1286101768Srwatson	int error;
128752983Speter
1288101768Srwatson	/* XXXMAC: Pipe should be locked for this check. */
1289101768Srwatson	error = mac_check_pipe_op(td->td_ucred, pipe, MAC_OP_PIPE_STAT);
1290101768Srwatson	if (error)
1291101768Srwatson		return (error);
1292101768Srwatson#endif
1293100527Salfred	bzero(ub, sizeof(*ub));
129417124Sbde	ub->st_mode = S_IFIFO;
129513907Sdyson	ub->st_blksize = pipe->pipe_buffer.size;
129613675Sdyson	ub->st_size = pipe->pipe_buffer.cnt;
129713675Sdyson	ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
129834901Sphk	ub->st_atimespec = pipe->pipe_atime;
129934901Sphk	ub->st_mtimespec = pipe->pipe_mtime;
130034901Sphk	ub->st_ctimespec = pipe->pipe_ctime;
130160404Schris	ub->st_uid = fp->f_cred->cr_uid;
130260404Schris	ub->st_gid = fp->f_cred->cr_gid;
130317124Sbde	/*
130460404Schris	 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
130517124Sbde	 * XXX (st_dev, st_ino) should be unique.
130617124Sbde	 */
130776760Salfred	return (0);
130813675Sdyson}
130913675Sdyson
131013675Sdyson/* ARGSUSED */
131113675Sdysonstatic int
131283366Sjulianpipe_close(fp, td)
131313675Sdyson	struct file *fp;
131483366Sjulian	struct thread *td;
131513675Sdyson{
131613675Sdyson	struct pipe *cpipe = (struct pipe *)fp->f_data;
131716322Sgpalmer
131849413Sgreen	fp->f_ops = &badfileops;
131949413Sgreen	fp->f_data = NULL;
132096122Salfred	funsetown(&cpipe->pipe_sigio);
132113675Sdyson	pipeclose(cpipe);
132276760Salfred	return (0);
132313675Sdyson}
132413675Sdyson
132576364Salfredstatic void
132676364Salfredpipe_free_kmem(cpipe)
132776364Salfred	struct pipe *cpipe;
132876364Salfred{
132991412Salfred
133079224Sdillon	GIANT_REQUIRED;
133191412Salfred	KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
133291412Salfred	       ("pipespace: pipe mutex locked"));
133376364Salfred
133476364Salfred	if (cpipe->pipe_buffer.buffer != NULL) {
133576364Salfred		if (cpipe->pipe_buffer.size > PIPE_SIZE)
133676364Salfred			--nbigpipe;
133776364Salfred		amountpipekva -= cpipe->pipe_buffer.size;
133876364Salfred		kmem_free(kernel_map,
133976364Salfred			(vm_offset_t)cpipe->pipe_buffer.buffer,
134076364Salfred			cpipe->pipe_buffer.size);
134176364Salfred		cpipe->pipe_buffer.buffer = NULL;
134276364Salfred	}
134376364Salfred#ifndef PIPE_NODIRECT
134476364Salfred	if (cpipe->pipe_map.kva != NULL) {
134576364Salfred		amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE;
134676364Salfred		kmem_free(kernel_map,
134776364Salfred			cpipe->pipe_map.kva,
134876364Salfred			cpipe->pipe_buffer.size + PAGE_SIZE);
134976364Salfred		cpipe->pipe_map.cnt = 0;
135076364Salfred		cpipe->pipe_map.kva = 0;
135176364Salfred		cpipe->pipe_map.pos = 0;
135276364Salfred		cpipe->pipe_map.npages = 0;
135376364Salfred	}
135476364Salfred#endif
135576364Salfred}
135676364Salfred
135713675Sdyson/*
135813675Sdyson * shutdown the pipe
135913675Sdyson */
136013675Sdysonstatic void
136113675Sdysonpipeclose(cpipe)
136213675Sdyson	struct pipe *cpipe;
136313675Sdyson{
136413907Sdyson	struct pipe *ppipe;
136591968Salfred	int hadpeer;
136676364Salfred
136791968Salfred	if (cpipe == NULL)
136891968Salfred		return;
136991968Salfred
137091968Salfred	hadpeer = 0;
137191968Salfred
137291968Salfred	/* partially created pipes won't have a valid mutex. */
137391968Salfred	if (PIPE_MTX(cpipe) != NULL)
137491362Salfred		PIPE_LOCK(cpipe);
137513907Sdyson
137691968Salfred	pipeselwakeup(cpipe);
137713907Sdyson
137891968Salfred	/*
137991968Salfred	 * If the other side is blocked, wake it up saying that
138091968Salfred	 * we want to close it down.
138191968Salfred	 */
138291968Salfred	while (cpipe->pipe_busy) {
138391968Salfred		wakeup(cpipe);
138491968Salfred		cpipe->pipe_state |= PIPE_WANT | PIPE_EOF;
138591968Salfred		msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
138691968Salfred	}
138713675Sdyson
1388101768Srwatson#ifdef MAC
1389101768Srwatson	if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL)
1390101768Srwatson		mac_destroy_pipe(cpipe);
1391101768Srwatson#endif
1392101768Srwatson
139391968Salfred	/*
139491968Salfred	 * Disconnect from peer
139591968Salfred	 */
139691968Salfred	if ((ppipe = cpipe->pipe_peer) != NULL) {
139791968Salfred		hadpeer++;
139891968Salfred		pipeselwakeup(ppipe);
139913907Sdyson
140091968Salfred		ppipe->pipe_state |= PIPE_EOF;
140191968Salfred		wakeup(ppipe);
140291968Salfred		KNOTE(&ppipe->pipe_sel.si_note, 0);
140391968Salfred		ppipe->pipe_peer = NULL;
140491968Salfred	}
140591968Salfred	/*
140691968Salfred	 * free resources
140791968Salfred	 */
140891968Salfred	if (PIPE_MTX(cpipe) != NULL) {
140991968Salfred		PIPE_UNLOCK(cpipe);
141091968Salfred		if (!hadpeer) {
141191968Salfred			mtx_destroy(PIPE_MTX(cpipe));
141291968Salfred			free(PIPE_MTX(cpipe), M_TEMP);
141313675Sdyson		}
141413675Sdyson	}
141591968Salfred	mtx_lock(&Giant);
141691968Salfred	pipe_free_kmem(cpipe);
141792751Sjeff	uma_zfree(pipe_zone, cpipe);
141891968Salfred	mtx_unlock(&Giant);
141913675Sdyson}
142059288Sjlemon
142172521Sjlemon/*ARGSUSED*/
142259288Sjlemonstatic int
142372521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn)
142459288Sjlemon{
142589306Salfred	struct pipe *cpipe;
142659288Sjlemon
142789306Salfred	cpipe = (struct pipe *)kn->kn_fp->f_data;
142872521Sjlemon	switch (kn->kn_filter) {
142972521Sjlemon	case EVFILT_READ:
143072521Sjlemon		kn->kn_fop = &pipe_rfiltops;
143172521Sjlemon		break;
143272521Sjlemon	case EVFILT_WRITE:
143372521Sjlemon		kn->kn_fop = &pipe_wfiltops;
143478292Sjlemon		cpipe = cpipe->pipe_peer;
1435101382Sdes		if (cpipe == NULL)
1436101382Sdes			/* other end of pipe has been closed */
1437101382Sdes			return (EBADF);
143872521Sjlemon		break;
143972521Sjlemon	default:
144072521Sjlemon		return (1);
144172521Sjlemon	}
1442100527Salfred	kn->kn_hook = cpipe;
144378292Sjlemon
144491372Salfred	PIPE_LOCK(cpipe);
144578292Sjlemon	SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext);
144691372Salfred	PIPE_UNLOCK(cpipe);
144759288Sjlemon	return (0);
144859288Sjlemon}
144959288Sjlemon
145059288Sjlemonstatic void
145159288Sjlemonfilt_pipedetach(struct knote *kn)
145259288Sjlemon{
145378292Sjlemon	struct pipe *cpipe = (struct pipe *)kn->kn_hook;
145459288Sjlemon
145591372Salfred	PIPE_LOCK(cpipe);
145678292Sjlemon	SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext);
145791372Salfred	PIPE_UNLOCK(cpipe);
145859288Sjlemon}
145959288Sjlemon
146059288Sjlemon/*ARGSUSED*/
146159288Sjlemonstatic int
146259288Sjlemonfilt_piperead(struct knote *kn, long hint)
146359288Sjlemon{
146459288Sjlemon	struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
146559288Sjlemon	struct pipe *wpipe = rpipe->pipe_peer;
146659288Sjlemon
146791372Salfred	PIPE_LOCK(rpipe);
146859288Sjlemon	kn->kn_data = rpipe->pipe_buffer.cnt;
146959288Sjlemon	if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
147059288Sjlemon		kn->kn_data = rpipe->pipe_map.cnt;
147159288Sjlemon
147259288Sjlemon	if ((rpipe->pipe_state & PIPE_EOF) ||
147359288Sjlemon	    (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
147491372Salfred		kn->kn_flags |= EV_EOF;
147591372Salfred		PIPE_UNLOCK(rpipe);
147659288Sjlemon		return (1);
147759288Sjlemon	}
147891372Salfred	PIPE_UNLOCK(rpipe);
147959288Sjlemon	return (kn->kn_data > 0);
148059288Sjlemon}
148159288Sjlemon
148259288Sjlemon/*ARGSUSED*/
148359288Sjlemonstatic int
148459288Sjlemonfilt_pipewrite(struct knote *kn, long hint)
148559288Sjlemon{
148659288Sjlemon	struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
148759288Sjlemon	struct pipe *wpipe = rpipe->pipe_peer;
148859288Sjlemon
148991372Salfred	PIPE_LOCK(rpipe);
149059288Sjlemon	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
149159288Sjlemon		kn->kn_data = 0;
149259288Sjlemon		kn->kn_flags |= EV_EOF;
149391372Salfred		PIPE_UNLOCK(rpipe);
149459288Sjlemon		return (1);
149559288Sjlemon	}
149659288Sjlemon	kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
149765855Sjlemon	if (wpipe->pipe_state & PIPE_DIRECTW)
149859288Sjlemon		kn->kn_data = 0;
149959288Sjlemon
150091372Salfred	PIPE_UNLOCK(rpipe);
150159288Sjlemon	return (kn->kn_data >= PIPE_BUF);
150259288Sjlemon}
1503