sys_pipe.c revision 102003
113675Sdyson/*
213675Sdyson * Copyright (c) 1996 John S. Dyson
313675Sdyson * All rights reserved.
413675Sdyson *
513675Sdyson * Redistribution and use in source and binary forms, with or without
613675Sdyson * modification, are permitted provided that the following conditions
713675Sdyson * are met:
813675Sdyson * 1. Redistributions of source code must retain the above copyright
913675Sdyson *    notice immediately at the beginning of the file, without modification,
1013675Sdyson *    this list of conditions, and the following disclaimer.
1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright
1213675Sdyson *    notice, this list of conditions and the following disclaimer in the
1313675Sdyson *    documentation and/or other materials provided with the distribution.
1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author
1513675Sdyson *    John S. Dyson.
1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions
1713675Sdyson *    are met.
1813675Sdyson *
1950477Speter * $FreeBSD: head/sys/kern/sys_pipe.c 102003 2002-08-17 02:36:16Z rwatson $
2013675Sdyson */
2113675Sdyson
2213675Sdyson/*
2313675Sdyson * This file contains a high-performance replacement for the socket-based
2413675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
2513675Sdyson * all features of sockets, but does do everything that pipes normally
2613675Sdyson * do.
2713675Sdyson */
2813675Sdyson
2913907Sdyson/*
3013907Sdyson * This code has two modes of operation, a small write mode and a large
3113907Sdyson * write mode.  The small write mode acts like conventional pipes with
3213907Sdyson * a kernel buffer.  If the buffer is less than PIPE_MINDIRECT, then the
3313907Sdyson * "normal" pipe buffering is done.  If the buffer is between PIPE_MINDIRECT
3413907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and
3513907Sdyson * the receiving process can copy it directly from the pages in the sending
3613907Sdyson * process.
3713907Sdyson *
3813907Sdyson * If the sending process receives a signal, it is possible that it will
3913913Sdyson * go away, and certainly its address space can change, because control
4013907Sdyson * is returned back to the user-mode side.  In that case, the pipe code
4113907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable
4213907Sdyson * kernel buffer, and the receiving process will grab the data from the
4313907Sdyson * pageable kernel buffer.  Since signals don't happen all that often,
4413907Sdyson * the copy operation is normally eliminated.
4513907Sdyson *
4613907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
4713907Sdyson * happen for small transfers so that the system will not spend all of
4813913Sdyson * its time context switching.  PIPE_SIZE is constrained by the
4913907Sdyson * amount of kernel virtual memory.
5013907Sdyson */
5113907Sdyson
52101768Srwatson#include "opt_mac.h"
53101768Srwatson
5413675Sdyson#include <sys/param.h>
5513675Sdyson#include <sys/systm.h>
5624131Sbde#include <sys/fcntl.h>
5713675Sdyson#include <sys/file.h>
5813675Sdyson#include <sys/filedesc.h>
5924206Sbde#include <sys/filio.h>
6091372Salfred#include <sys/kernel.h>
6176166Smarkm#include <sys/lock.h>
62101768Srwatson#include <sys/mac.h>
6376827Salfred#include <sys/mutex.h>
6424206Sbde#include <sys/ttycom.h>
6513675Sdyson#include <sys/stat.h>
6691968Salfred#include <sys/malloc.h>
6729356Speter#include <sys/poll.h>
6870834Swollman#include <sys/selinfo.h>
6913675Sdyson#include <sys/signalvar.h>
7013675Sdyson#include <sys/sysproto.h>
7113675Sdyson#include <sys/pipe.h>
7276166Smarkm#include <sys/proc.h>
7355112Sbde#include <sys/vnode.h>
7434924Sbde#include <sys/uio.h>
7559288Sjlemon#include <sys/event.h>
7613675Sdyson
7713675Sdyson#include <vm/vm.h>
7813675Sdyson#include <vm/vm_param.h>
7913675Sdyson#include <vm/vm_object.h>
8013675Sdyson#include <vm/vm_kern.h>
8113675Sdyson#include <vm/vm_extern.h>
8213675Sdyson#include <vm/pmap.h>
8313675Sdyson#include <vm/vm_map.h>
8413907Sdyson#include <vm/vm_page.h>
8592751Sjeff#include <vm/uma.h>
8613675Sdyson
8714037Sdyson/*
8814037Sdyson * Use this define if you want to disable *fancy* VM things.  Expect an
8914037Sdyson * approx 30% decrease in transfer rate.  This could be useful for
9014037Sdyson * NetBSD or OpenBSD.
9114037Sdyson */
9214037Sdyson/* #define PIPE_NODIRECT */
9314037Sdyson
9414037Sdyson/*
9514037Sdyson * interfaces to the outside world
9614037Sdyson */
9791413Salfredstatic int pipe_read(struct file *fp, struct uio *uio,
98101941Srwatson		struct ucred *active_cred, int flags, struct thread *td);
9991413Salfredstatic int pipe_write(struct file *fp, struct uio *uio,
100101941Srwatson		struct ucred *active_cred, int flags, struct thread *td);
10191413Salfredstatic int pipe_close(struct file *fp, struct thread *td);
102101983Srwatsonstatic int pipe_poll(struct file *fp, int events, struct ucred *active_cred,
10391413Salfred		struct thread *td);
10491413Salfredstatic int pipe_kqfilter(struct file *fp, struct knote *kn);
105101983Srwatsonstatic int pipe_stat(struct file *fp, struct stat *sb,
106101987Srwatson		struct ucred *active_cred, struct thread *td);
10799009Salfredstatic int pipe_ioctl(struct file *fp, u_long cmd, void *data,
108102003Srwatson		struct ucred *active_cred, struct thread *td);
10913675Sdyson
11072521Sjlemonstatic struct fileops pipeops = {
11172521Sjlemon	pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter,
11272521Sjlemon	pipe_stat, pipe_close
11372521Sjlemon};
11413675Sdyson
11559288Sjlemonstatic void	filt_pipedetach(struct knote *kn);
11659288Sjlemonstatic int	filt_piperead(struct knote *kn, long hint);
11759288Sjlemonstatic int	filt_pipewrite(struct knote *kn, long hint);
11859288Sjlemon
11972521Sjlemonstatic struct filterops pipe_rfiltops =
12072521Sjlemon	{ 1, NULL, filt_pipedetach, filt_piperead };
12172521Sjlemonstatic struct filterops pipe_wfiltops =
12272521Sjlemon	{ 1, NULL, filt_pipedetach, filt_pipewrite };
12359288Sjlemon
12492305Salfred#define PIPE_GET_GIANT(pipe)						\
12591362Salfred	do {								\
12692305Salfred		KASSERT(((pipe)->pipe_state & PIPE_LOCKFL) != 0,	\
12792305Salfred		    ("%s:%d PIPE_GET_GIANT: line pipe not locked",	\
12892305Salfred		     __FILE__, __LINE__));				\
12992305Salfred		PIPE_UNLOCK(pipe);					\
13091362Salfred		mtx_lock(&Giant);					\
13191362Salfred	} while (0)
13272521Sjlemon
13391362Salfred#define PIPE_DROP_GIANT(pipe)						\
13491362Salfred	do {								\
13591362Salfred		mtx_unlock(&Giant);					\
13692305Salfred		PIPE_LOCK(pipe);					\
13791362Salfred	} while (0)
13891362Salfred
13913675Sdyson/*
14013675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe
14113675Sdyson * space is pageable.  The pipe code will try to maintain locality of
14213675Sdyson * reference for performance reasons, so small amounts of outstanding I/O
14313675Sdyson * will not wipe the cache.
14413675Sdyson */
14513907Sdyson#define MINPIPESIZE (PIPE_SIZE/3)
14613907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3)
14713675Sdyson
14813907Sdyson/*
14913907Sdyson * Maximum amount of kva for pipes -- this is kind-of a soft limit, but
15013907Sdyson * is there so that on large systems, we don't exhaust it.
15113907Sdyson */
15213907Sdyson#define MAXPIPEKVA (8*1024*1024)
15313907Sdyson
15413907Sdyson/*
15513907Sdyson * Limit for direct transfers, we cannot, of course limit
15613907Sdyson * the amount of kva for pipes in general though.
15713907Sdyson */
15813907Sdyson#define LIMITPIPEKVA (16*1024*1024)
15917163Sdyson
16017163Sdyson/*
16117163Sdyson * Limit the number of "big" pipes
16217163Sdyson */
16317163Sdyson#define LIMITBIGPIPES	32
16433181Seivindstatic int nbigpipe;
16517163Sdyson
16617124Sbdestatic int amountpipekva;
16713907Sdyson
16891413Salfredstatic void pipeinit(void *dummy __unused);
16991413Salfredstatic void pipeclose(struct pipe *cpipe);
17091413Salfredstatic void pipe_free_kmem(struct pipe *cpipe);
17191413Salfredstatic int pipe_create(struct pipe **cpipep);
17291413Salfredstatic __inline int pipelock(struct pipe *cpipe, int catch);
17391413Salfredstatic __inline void pipeunlock(struct pipe *cpipe);
17491413Salfredstatic __inline void pipeselwakeup(struct pipe *cpipe);
17514037Sdyson#ifndef PIPE_NODIRECT
17691413Salfredstatic int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio);
17791413Salfredstatic void pipe_destroy_write_buffer(struct pipe *wpipe);
17891413Salfredstatic int pipe_direct_write(struct pipe *wpipe, struct uio *uio);
17991413Salfredstatic void pipe_clone_write_buffer(struct pipe *wpipe);
18014037Sdyson#endif
18191413Salfredstatic int pipespace(struct pipe *cpipe, int size);
18213675Sdyson
18392751Sjeffstatic uma_zone_t pipe_zone;
18427899Sdyson
18591372SalfredSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
18691372Salfred
18791372Salfredstatic void
18891372Salfredpipeinit(void *dummy __unused)
18991372Salfred{
19092654Sjeff	pipe_zone = uma_zcreate("PIPE", sizeof(struct pipe), NULL,
19192654Sjeff	    NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
19291372Salfred}
19391372Salfred
19413675Sdyson/*
19513675Sdyson * The pipe system call for the DTYPE_PIPE type of pipes
19613675Sdyson */
19713675Sdyson
19813675Sdyson/* ARGSUSED */
19913675Sdysonint
20083366Sjulianpipe(td, uap)
20183366Sjulian	struct thread *td;
20213675Sdyson	struct pipe_args /* {
20313675Sdyson		int	dummy;
20413675Sdyson	} */ *uap;
20513675Sdyson{
20683366Sjulian	struct filedesc *fdp = td->td_proc->p_fd;
20713675Sdyson	struct file *rf, *wf;
20813675Sdyson	struct pipe *rpipe, *wpipe;
20991968Salfred	struct mtx *pmtx;
21013675Sdyson	int fd, error;
21191362Salfred
21291372Salfred	KASSERT(pipe_zone != NULL, ("pipe_zone not initialized"));
21327899Sdyson
21491968Salfred	pmtx = malloc(sizeof(*pmtx), M_TEMP, M_WAITOK | M_ZERO);
21591968Salfred
21676756Salfred	rpipe = wpipe = NULL;
21776364Salfred	if (pipe_create(&rpipe) || pipe_create(&wpipe)) {
21876364Salfred		pipeclose(rpipe);
21976364Salfred		pipeclose(wpipe);
22091968Salfred		free(pmtx, M_TEMP);
22176364Salfred		return (ENFILE);
22276364Salfred	}
22376364Salfred
22413907Sdyson	rpipe->pipe_state |= PIPE_DIRECTOK;
22513907Sdyson	wpipe->pipe_state |= PIPE_DIRECTOK;
22613675Sdyson
22783366Sjulian	error = falloc(td, &rf, &fd);
22870915Sdwmalone	if (error) {
22970915Sdwmalone		pipeclose(rpipe);
23070915Sdwmalone		pipeclose(wpipe);
23191968Salfred		free(pmtx, M_TEMP);
23270915Sdwmalone		return (error);
23370915Sdwmalone	}
23470915Sdwmalone	fhold(rf);
23583366Sjulian	td->td_retval[0] = fd;
23670915Sdwmalone
23770803Sdwmalone	/*
23870803Sdwmalone	 * Warning: once we've gotten past allocation of the fd for the
23970803Sdwmalone	 * read-side, we can only drop the read side via fdrop() in order
24070803Sdwmalone	 * to avoid races against processes which manage to dup() the read
24170803Sdwmalone	 * side while we are blocked trying to allocate the write side.
24270803Sdwmalone	 */
24389306Salfred	FILE_LOCK(rf);
24413675Sdyson	rf->f_flag = FREAD | FWRITE;
24513675Sdyson	rf->f_type = DTYPE_PIPE;
246100527Salfred	rf->f_data = rpipe;
24713675Sdyson	rf->f_ops = &pipeops;
24889306Salfred	FILE_UNLOCK(rf);
24983366Sjulian	error = falloc(td, &wf, &fd);
25070915Sdwmalone	if (error) {
25189306Salfred		FILEDESC_LOCK(fdp);
25283366Sjulian		if (fdp->fd_ofiles[td->td_retval[0]] == rf) {
25383366Sjulian			fdp->fd_ofiles[td->td_retval[0]] = NULL;
25489306Salfred			FILEDESC_UNLOCK(fdp);
25583366Sjulian			fdrop(rf, td);
25689306Salfred		} else
25789306Salfred			FILEDESC_UNLOCK(fdp);
25883366Sjulian		fdrop(rf, td);
25970915Sdwmalone		/* rpipe has been closed by fdrop(). */
26070915Sdwmalone		pipeclose(wpipe);
26191968Salfred		free(pmtx, M_TEMP);
26270915Sdwmalone		return (error);
26370915Sdwmalone	}
26489306Salfred	FILE_LOCK(wf);
26513675Sdyson	wf->f_flag = FREAD | FWRITE;
26613675Sdyson	wf->f_type = DTYPE_PIPE;
267100527Salfred	wf->f_data = wpipe;
26813675Sdyson	wf->f_ops = &pipeops;
26989306Salfred	FILE_UNLOCK(wf);
27083366Sjulian	td->td_retval[1] = fd;
27113675Sdyson	rpipe->pipe_peer = wpipe;
27213675Sdyson	wpipe->pipe_peer = rpipe;
273101768Srwatson#ifdef MAC
274101768Srwatson	/*
275101768Srwatson	 * struct pipe represents a pipe endpoint.  The MAC label is shared
276101768Srwatson	 * between the connected endpoints.  As a result mac_init_pipe() and
277101768Srwatson	 * mac_create_pipe() should only be called on one of the endpoints
278101768Srwatson	 * after they have been connected.
279101768Srwatson	 */
280101768Srwatson	mac_init_pipe(rpipe);
281101768Srwatson	mac_create_pipe(td->td_ucred, rpipe);
282101768Srwatson#endif
28393818Sjhb	mtx_init(pmtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE);
28491968Salfred	rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
28583366Sjulian	fdrop(rf, td);
28613675Sdyson
28713675Sdyson	return (0);
28813675Sdyson}
28913675Sdyson
29013909Sdyson/*
29113909Sdyson * Allocate kva for pipe circular buffer, the space is pageable
29276364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails
29376364Salfred * it will retain the old buffer.
29476364Salfred * If it fails it will return ENOMEM.
29513909Sdyson */
29676364Salfredstatic int
29776364Salfredpipespace(cpipe, size)
29813675Sdyson	struct pipe *cpipe;
29976364Salfred	int size;
30013675Sdyson{
30176364Salfred	struct vm_object *object;
30276364Salfred	caddr_t buffer;
30313688Sdyson	int npages, error;
30413675Sdyson
30579224Sdillon	GIANT_REQUIRED;
30691412Salfred	KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
30791412Salfred	       ("pipespace: pipe mutex locked"));
30879224Sdillon
30976364Salfred	npages = round_page(size)/PAGE_SIZE;
31013675Sdyson	/*
31113675Sdyson	 * Create an object, I don't like the idea of paging to/from
31213675Sdyson	 * kernel_object.
31314037Sdyson	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
31413675Sdyson	 */
31576364Salfred	object = vm_object_allocate(OBJT_DEFAULT, npages);
31676364Salfred	buffer = (caddr_t) vm_map_min(kernel_map);
31713675Sdyson
31813675Sdyson	/*
31913675Sdyson	 * Insert the object into the kernel map, and allocate kva for it.
32013675Sdyson	 * The map entry is, by default, pageable.
32114037Sdyson	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
32213675Sdyson	 */
32376364Salfred	error = vm_map_find(kernel_map, object, 0,
32476364Salfred		(vm_offset_t *) &buffer, size, 1,
32513688Sdyson		VM_PROT_ALL, VM_PROT_ALL, 0);
32613675Sdyson
32776364Salfred	if (error != KERN_SUCCESS) {
32876364Salfred		vm_object_deallocate(object);
32976364Salfred		return (ENOMEM);
33076364Salfred	}
33176364Salfred
33276364Salfred	/* free old resources if we're resizing */
33376364Salfred	pipe_free_kmem(cpipe);
33476364Salfred	cpipe->pipe_buffer.object = object;
33576364Salfred	cpipe->pipe_buffer.buffer = buffer;
33676364Salfred	cpipe->pipe_buffer.size = size;
33776364Salfred	cpipe->pipe_buffer.in = 0;
33876364Salfred	cpipe->pipe_buffer.out = 0;
33976364Salfred	cpipe->pipe_buffer.cnt = 0;
34013907Sdyson	amountpipekva += cpipe->pipe_buffer.size;
34176364Salfred	return (0);
34213907Sdyson}
34313688Sdyson
34413907Sdyson/*
34513907Sdyson * initialize and allocate VM and memory for pipe
34613907Sdyson */
34776364Salfredstatic int
34876364Salfredpipe_create(cpipep)
34976364Salfred	struct pipe **cpipep;
35076364Salfred{
35113907Sdyson	struct pipe *cpipe;
35276364Salfred	int error;
35313907Sdyson
35492751Sjeff	*cpipep = uma_zalloc(pipe_zone, M_WAITOK);
35576364Salfred	if (*cpipep == NULL)
35676364Salfred		return (ENOMEM);
35717163Sdyson
35876364Salfred	cpipe = *cpipep;
35976364Salfred
36076364Salfred	/* so pipespace()->pipe_free_kmem() doesn't follow junk pointer */
36176364Salfred	cpipe->pipe_buffer.object = NULL;
36276364Salfred#ifndef PIPE_NODIRECT
36376364Salfred	cpipe->pipe_map.kva = NULL;
36476364Salfred#endif
36576364Salfred	/*
36676364Salfred	 * protect so pipeclose() doesn't follow a junk pointer
36776364Salfred	 * if pipespace() fails.
36876364Salfred	 */
36976754Salfred	bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel));
37013675Sdyson	cpipe->pipe_state = 0;
37113675Sdyson	cpipe->pipe_peer = NULL;
37213675Sdyson	cpipe->pipe_busy = 0;
37313907Sdyson
37414037Sdyson#ifndef PIPE_NODIRECT
37513907Sdyson	/*
37613907Sdyson	 * pipe data structure initializations to support direct pipe I/O
37713907Sdyson	 */
37813907Sdyson	cpipe->pipe_map.cnt = 0;
37913907Sdyson	cpipe->pipe_map.kva = 0;
38013907Sdyson	cpipe->pipe_map.pos = 0;
38113907Sdyson	cpipe->pipe_map.npages = 0;
38217124Sbde	/* cpipe->pipe_map.ms[] = invalid */
38314037Sdyson#endif
38476364Salfred
38591412Salfred	cpipe->pipe_mtxp = NULL;	/* avoid pipespace assertion */
38676364Salfred	error = pipespace(cpipe, PIPE_SIZE);
38776760Salfred	if (error)
38876364Salfred		return (error);
38976364Salfred
39076364Salfred	vfs_timestamp(&cpipe->pipe_ctime);
39176364Salfred	cpipe->pipe_atime = cpipe->pipe_ctime;
39276364Salfred	cpipe->pipe_mtime = cpipe->pipe_ctime;
39376364Salfred
39476364Salfred	return (0);
39513675Sdyson}
39613675Sdyson
39713675Sdyson
39813675Sdyson/*
39913675Sdyson * lock a pipe for I/O, blocking other access
40013675Sdyson */
40113675Sdysonstatic __inline int
40213907Sdysonpipelock(cpipe, catch)
40313675Sdyson	struct pipe *cpipe;
40413907Sdyson	int catch;
40513675Sdyson{
40613776Sdyson	int error;
40776364Salfred
40891362Salfred	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
40991362Salfred	while (cpipe->pipe_state & PIPE_LOCKFL) {
41013675Sdyson		cpipe->pipe_state |= PIPE_LWANT;
41191362Salfred		error = msleep(cpipe, PIPE_MTX(cpipe),
41291362Salfred		    catch ? (PRIBIO | PCATCH) : PRIBIO,
41376760Salfred		    "pipelk", 0);
41476760Salfred		if (error != 0)
41576760Salfred			return (error);
41613675Sdyson	}
41791362Salfred	cpipe->pipe_state |= PIPE_LOCKFL;
41876760Salfred	return (0);
41913675Sdyson}
42013675Sdyson
42113675Sdyson/*
42213675Sdyson * unlock a pipe I/O lock
42313675Sdyson */
42413675Sdysonstatic __inline void
42513675Sdysonpipeunlock(cpipe)
42613675Sdyson	struct pipe *cpipe;
42713675Sdyson{
42876364Salfred
42991362Salfred	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
43091362Salfred	cpipe->pipe_state &= ~PIPE_LOCKFL;
43113675Sdyson	if (cpipe->pipe_state & PIPE_LWANT) {
43213675Sdyson		cpipe->pipe_state &= ~PIPE_LWANT;
43314177Sdyson		wakeup(cpipe);
43413675Sdyson	}
43513675Sdyson}
43613675Sdyson
43714037Sdysonstatic __inline void
43814037Sdysonpipeselwakeup(cpipe)
43914037Sdyson	struct pipe *cpipe;
44014037Sdyson{
44176364Salfred
44214037Sdyson	if (cpipe->pipe_state & PIPE_SEL) {
44314037Sdyson		cpipe->pipe_state &= ~PIPE_SEL;
44414037Sdyson		selwakeup(&cpipe->pipe_sel);
44514037Sdyson	}
44641086Struckman	if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio)
44795883Salfred		pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
44859288Sjlemon	KNOTE(&cpipe->pipe_sel.si_note, 0);
44914037Sdyson}
45014037Sdyson
45113675Sdyson/* ARGSUSED */
45213675Sdysonstatic int
453101941Srwatsonpipe_read(fp, uio, active_cred, flags, td)
45413675Sdyson	struct file *fp;
45513675Sdyson	struct uio *uio;
456101941Srwatson	struct ucred *active_cred;
45783366Sjulian	struct thread *td;
45845311Sdt	int flags;
45913675Sdyson{
46013675Sdyson	struct pipe *rpipe = (struct pipe *) fp->f_data;
46147748Salc	int error;
46213675Sdyson	int nread = 0;
46318863Sdyson	u_int size;
46413675Sdyson
46591362Salfred	PIPE_LOCK(rpipe);
46613675Sdyson	++rpipe->pipe_busy;
46747748Salc	error = pipelock(rpipe, 1);
46847748Salc	if (error)
46947748Salc		goto unlocked_error;
47047748Salc
471101768Srwatson#ifdef MAC
472101941Srwatson	error = mac_check_pipe_op(active_cred, rpipe, MAC_OP_PIPE_READ);
473101768Srwatson	if (error)
474101768Srwatson		goto locked_error;
475101768Srwatson#endif
476101768Srwatson
47713675Sdyson	while (uio->uio_resid) {
47813907Sdyson		/*
47913907Sdyson		 * normal pipe buffer receive
48013907Sdyson		 */
48113675Sdyson		if (rpipe->pipe_buffer.cnt > 0) {
48218863Sdyson			size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
48313675Sdyson			if (size > rpipe->pipe_buffer.cnt)
48413675Sdyson				size = rpipe->pipe_buffer.cnt;
48518863Sdyson			if (size > (u_int) uio->uio_resid)
48618863Sdyson				size = (u_int) uio->uio_resid;
48747748Salc
48891362Salfred			PIPE_UNLOCK(rpipe);
48947748Salc			error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
49013675Sdyson					size, uio);
49191362Salfred			PIPE_LOCK(rpipe);
49276760Salfred			if (error)
49313675Sdyson				break;
49476760Salfred
49513675Sdyson			rpipe->pipe_buffer.out += size;
49613675Sdyson			if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
49713675Sdyson				rpipe->pipe_buffer.out = 0;
49813675Sdyson
49913675Sdyson			rpipe->pipe_buffer.cnt -= size;
50047748Salc
50147748Salc			/*
50247748Salc			 * If there is no more to read in the pipe, reset
50347748Salc			 * its pointers to the beginning.  This improves
50447748Salc			 * cache hit stats.
50547748Salc			 */
50647748Salc			if (rpipe->pipe_buffer.cnt == 0) {
50747748Salc				rpipe->pipe_buffer.in = 0;
50847748Salc				rpipe->pipe_buffer.out = 0;
50947748Salc			}
51013675Sdyson			nread += size;
51114037Sdyson#ifndef PIPE_NODIRECT
51213907Sdyson		/*
51313907Sdyson		 * Direct copy, bypassing a kernel buffer.
51413907Sdyson		 */
51513907Sdyson		} else if ((size = rpipe->pipe_map.cnt) &&
51647748Salc			   (rpipe->pipe_state & PIPE_DIRECTW)) {
51747748Salc			caddr_t	va;
51818863Sdyson			if (size > (u_int) uio->uio_resid)
51918863Sdyson				size = (u_int) uio->uio_resid;
52047748Salc
52176760Salfred			va = (caddr_t) rpipe->pipe_map.kva +
52276760Salfred			    rpipe->pipe_map.pos;
52391362Salfred			PIPE_UNLOCK(rpipe);
52447748Salc			error = uiomove(va, size, uio);
52591362Salfred			PIPE_LOCK(rpipe);
52613907Sdyson			if (error)
52713907Sdyson				break;
52813907Sdyson			nread += size;
52913907Sdyson			rpipe->pipe_map.pos += size;
53013907Sdyson			rpipe->pipe_map.cnt -= size;
53113907Sdyson			if (rpipe->pipe_map.cnt == 0) {
53213907Sdyson				rpipe->pipe_state &= ~PIPE_DIRECTW;
53313907Sdyson				wakeup(rpipe);
53413907Sdyson			}
53514037Sdyson#endif
53613675Sdyson		} else {
53713675Sdyson			/*
53813675Sdyson			 * detect EOF condition
53976760Salfred			 * read returns 0 on EOF, no need to set error
54013675Sdyson			 */
54176760Salfred			if (rpipe->pipe_state & PIPE_EOF)
54213675Sdyson				break;
54343623Sdillon
54413675Sdyson			/*
54513675Sdyson			 * If the "write-side" has been blocked, wake it up now.
54613675Sdyson			 */
54713675Sdyson			if (rpipe->pipe_state & PIPE_WANTW) {
54813675Sdyson				rpipe->pipe_state &= ~PIPE_WANTW;
54913675Sdyson				wakeup(rpipe);
55013675Sdyson			}
55143623Sdillon
55243623Sdillon			/*
55347748Salc			 * Break if some data was read.
55443623Sdillon			 */
55547748Salc			if (nread > 0)
55613675Sdyson				break;
55716960Sdyson
55843623Sdillon			/*
55947748Salc			 * Unlock the pipe buffer for our remaining processing.  We
56047748Salc			 * will either break out with an error or we will sleep and
56147748Salc			 * relock to loop.
56243623Sdillon			 */
56347748Salc			pipeunlock(rpipe);
56443623Sdillon
56513675Sdyson			/*
56647748Salc			 * Handle non-blocking mode operation or
56747748Salc			 * wait for more data.
56813675Sdyson			 */
56976760Salfred			if (fp->f_flag & FNONBLOCK) {
57047748Salc				error = EAGAIN;
57176760Salfred			} else {
57247748Salc				rpipe->pipe_state |= PIPE_WANTR;
57391362Salfred				if ((error = msleep(rpipe, PIPE_MTX(rpipe),
57491362Salfred				    PRIBIO | PCATCH,
57577140Salfred				    "piperd", 0)) == 0)
57647748Salc					error = pipelock(rpipe, 1);
57713675Sdyson			}
57847748Salc			if (error)
57947748Salc				goto unlocked_error;
58013675Sdyson		}
58113675Sdyson	}
582101768Srwatson#ifdef MAC
583101768Srwatsonlocked_error:
584101768Srwatson#endif
58547748Salc	pipeunlock(rpipe);
58613675Sdyson
58791362Salfred	/* XXX: should probably do this before getting any locks. */
58824101Sbde	if (error == 0)
58955112Sbde		vfs_timestamp(&rpipe->pipe_atime);
59047748Salcunlocked_error:
59147748Salc	--rpipe->pipe_busy;
59213913Sdyson
59347748Salc	/*
59447748Salc	 * PIPE_WANT processing only makes sense if pipe_busy is 0.
59547748Salc	 */
59613675Sdyson	if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
59713675Sdyson		rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
59813675Sdyson		wakeup(rpipe);
59913675Sdyson	} else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
60013675Sdyson		/*
60147748Salc		 * Handle write blocking hysteresis.
60213675Sdyson		 */
60313675Sdyson		if (rpipe->pipe_state & PIPE_WANTW) {
60413675Sdyson			rpipe->pipe_state &= ~PIPE_WANTW;
60513675Sdyson			wakeup(rpipe);
60613675Sdyson		}
60713675Sdyson	}
60814037Sdyson
60914802Sdyson	if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF)
61014037Sdyson		pipeselwakeup(rpipe);
61114037Sdyson
61291362Salfred	PIPE_UNLOCK(rpipe);
61376760Salfred	return (error);
61413675Sdyson}
61513675Sdyson
61614037Sdyson#ifndef PIPE_NODIRECT
61713907Sdyson/*
61813907Sdyson * Map the sending processes' buffer into kernel space and wire it.
61913907Sdyson * This is similar to a physical write operation.
62013907Sdyson */
62113675Sdysonstatic int
62213907Sdysonpipe_build_write_buffer(wpipe, uio)
62313907Sdyson	struct pipe *wpipe;
62413675Sdyson	struct uio *uio;
62513675Sdyson{
62618863Sdyson	u_int size;
62794566Stmm	int i;
62894566Stmm	vm_offset_t addr, endaddr, paddr;
62913907Sdyson
63079224Sdillon	GIANT_REQUIRED;
63191412Salfred	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
63279224Sdillon
63318863Sdyson	size = (u_int) uio->uio_iov->iov_len;
63413907Sdyson	if (size > wpipe->pipe_buffer.size)
63513907Sdyson		size = wpipe->pipe_buffer.size;
63613907Sdyson
63740286Sdg	endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
63876760Salfred	addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
63976760Salfred	for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
64094566Stmm		vm_page_t m;
64194566Stmm
64299899Salc		/*
64399899Salc		 * vm_fault_quick() can sleep.  Consequently,
64499899Salc		 * vm_page_lock_queue() and vm_page_unlock_queue()
64599899Salc		 * should not be performed outside of this loop.
64699899Salc		 */
64751474Sdillon		if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 ||
64894608Stmm		    (paddr = pmap_extract(vmspace_pmap(curproc->p_vmspace),
64994608Stmm		     addr)) == 0) {
65013907Sdyson			int j;
65176760Salfred
65299899Salc			vm_page_lock_queues();
65376760Salfred			for (j = 0; j < i; j++)
65440700Sdg				vm_page_unwire(wpipe->pipe_map.ms[j], 1);
65599899Salc			vm_page_unlock_queues();
65676760Salfred			return (EFAULT);
65713907Sdyson		}
65813907Sdyson
65994566Stmm		m = PHYS_TO_VM_PAGE(paddr);
66099899Salc		vm_page_lock_queues();
66113907Sdyson		vm_page_wire(m);
66299899Salc		vm_page_unlock_queues();
66313907Sdyson		wpipe->pipe_map.ms[i] = m;
66413907Sdyson	}
66513907Sdyson
66613907Sdyson/*
66713907Sdyson * set up the control block
66813907Sdyson */
66913907Sdyson	wpipe->pipe_map.npages = i;
67076760Salfred	wpipe->pipe_map.pos =
67176760Salfred	    ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
67213907Sdyson	wpipe->pipe_map.cnt = size;
67313907Sdyson
67413907Sdyson/*
67513907Sdyson * and map the buffer
67613907Sdyson */
67713907Sdyson	if (wpipe->pipe_map.kva == 0) {
67813912Sdyson		/*
67913912Sdyson		 * We need to allocate space for an extra page because the
68013912Sdyson		 * address range might (will) span pages at times.
68113912Sdyson		 */
68213907Sdyson		wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map,
68313912Sdyson			wpipe->pipe_buffer.size + PAGE_SIZE);
68413912Sdyson		amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE;
68513907Sdyson	}
68613907Sdyson	pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
68713907Sdyson		wpipe->pipe_map.npages);
68813907Sdyson
68913907Sdyson/*
69013907Sdyson * and update the uio data
69113907Sdyson */
69213907Sdyson
69313907Sdyson	uio->uio_iov->iov_len -= size;
69413907Sdyson	uio->uio_iov->iov_base += size;
69513907Sdyson	if (uio->uio_iov->iov_len == 0)
69613907Sdyson		uio->uio_iov++;
69713907Sdyson	uio->uio_resid -= size;
69813907Sdyson	uio->uio_offset += size;
69976760Salfred	return (0);
70013907Sdyson}
70113907Sdyson
70213907Sdyson/*
70313907Sdyson * unmap and unwire the process buffer
70413907Sdyson */
70513907Sdysonstatic void
70613907Sdysonpipe_destroy_write_buffer(wpipe)
70776760Salfred	struct pipe *wpipe;
70813907Sdyson{
70913907Sdyson	int i;
71076364Salfred
71179224Sdillon	GIANT_REQUIRED;
71291412Salfred	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
71379224Sdillon
71417163Sdyson	if (wpipe->pipe_map.kva) {
71517163Sdyson		pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages);
71613907Sdyson
71713907Sdyson		if (amountpipekva > MAXPIPEKVA) {
71813907Sdyson			vm_offset_t kva = wpipe->pipe_map.kva;
71913907Sdyson			wpipe->pipe_map.kva = 0;
72013907Sdyson			kmem_free(kernel_map, kva,
72113912Sdyson				wpipe->pipe_buffer.size + PAGE_SIZE);
72213912Sdyson			amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE;
72313907Sdyson		}
72413907Sdyson	}
72599899Salc	vm_page_lock_queues();
72676760Salfred	for (i = 0; i < wpipe->pipe_map.npages; i++)
72740700Sdg		vm_page_unwire(wpipe->pipe_map.ms[i], 1);
72899899Salc	vm_page_unlock_queues();
72991653Stanimura	wpipe->pipe_map.npages = 0;
73013907Sdyson}
73113907Sdyson
73213907Sdyson/*
73313907Sdyson * In the case of a signal, the writing process might go away.  This
73413907Sdyson * code copies the data into the circular buffer so that the source
73513907Sdyson * pages can be freed without loss of data.
73613907Sdyson */
73713907Sdysonstatic void
73813907Sdysonpipe_clone_write_buffer(wpipe)
73976364Salfred	struct pipe *wpipe;
74013907Sdyson{
74113907Sdyson	int size;
74213907Sdyson	int pos;
74313907Sdyson
74491362Salfred	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
74513907Sdyson	size = wpipe->pipe_map.cnt;
74613907Sdyson	pos = wpipe->pipe_map.pos;
74713907Sdyson
74813907Sdyson	wpipe->pipe_buffer.in = size;
74913907Sdyson	wpipe->pipe_buffer.out = 0;
75013907Sdyson	wpipe->pipe_buffer.cnt = size;
75113907Sdyson	wpipe->pipe_state &= ~PIPE_DIRECTW;
75213907Sdyson
75391412Salfred	PIPE_GET_GIANT(wpipe);
75492959Salfred	bcopy((caddr_t) wpipe->pipe_map.kva + pos,
755100527Salfred	    wpipe->pipe_buffer.buffer, size);
75613907Sdyson	pipe_destroy_write_buffer(wpipe);
75791412Salfred	PIPE_DROP_GIANT(wpipe);
75813907Sdyson}
75913907Sdyson
76013907Sdyson/*
76113907Sdyson * This implements the pipe buffer write mechanism.  Note that only
76213907Sdyson * a direct write OR a normal pipe write can be pending at any given time.
76313907Sdyson * If there are any characters in the pipe buffer, the direct write will
76413907Sdyson * be deferred until the receiving process grabs all of the bytes from
76513907Sdyson * the pipe buffer.  Then the direct mapping write is set-up.
76613907Sdyson */
76713907Sdysonstatic int
76813907Sdysonpipe_direct_write(wpipe, uio)
76913907Sdyson	struct pipe *wpipe;
77013907Sdyson	struct uio *uio;
77113907Sdyson{
77213907Sdyson	int error;
77376364Salfred
77413951Sdysonretry:
77591362Salfred	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
77613907Sdyson	while (wpipe->pipe_state & PIPE_DIRECTW) {
77776760Salfred		if (wpipe->pipe_state & PIPE_WANTR) {
77813951Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
77913951Sdyson			wakeup(wpipe);
78013951Sdyson		}
78113992Sdyson		wpipe->pipe_state |= PIPE_WANTW;
78291362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe),
78391362Salfred		    PRIBIO | PCATCH, "pipdww", 0);
78414802Sdyson		if (error)
78513907Sdyson			goto error1;
78614802Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
78714802Sdyson			error = EPIPE;
78814802Sdyson			goto error1;
78914802Sdyson		}
79013907Sdyson	}
79113907Sdyson	wpipe->pipe_map.cnt = 0;	/* transfer not ready yet */
79213951Sdyson	if (wpipe->pipe_buffer.cnt > 0) {
79376760Salfred		if (wpipe->pipe_state & PIPE_WANTR) {
79413951Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
79513951Sdyson			wakeup(wpipe);
79613951Sdyson		}
79713951Sdyson
79813992Sdyson		wpipe->pipe_state |= PIPE_WANTW;
79991362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe),
80091362Salfred		    PRIBIO | PCATCH, "pipdwc", 0);
80114802Sdyson		if (error)
80213907Sdyson			goto error1;
80314802Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
80414802Sdyson			error = EPIPE;
80514802Sdyson			goto error1;
80613907Sdyson		}
80713951Sdyson		goto retry;
80813907Sdyson	}
80913907Sdyson
81013951Sdyson	wpipe->pipe_state |= PIPE_DIRECTW;
81113951Sdyson
81292305Salfred	pipelock(wpipe, 0);
81391362Salfred	PIPE_GET_GIANT(wpipe);
81413907Sdyson	error = pipe_build_write_buffer(wpipe, uio);
81591362Salfred	PIPE_DROP_GIANT(wpipe);
81692305Salfred	pipeunlock(wpipe);
81713907Sdyson	if (error) {
81813907Sdyson		wpipe->pipe_state &= ~PIPE_DIRECTW;
81913907Sdyson		goto error1;
82013907Sdyson	}
82113907Sdyson
82213907Sdyson	error = 0;
82313907Sdyson	while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
82413907Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
82513907Sdyson			pipelock(wpipe, 0);
82691362Salfred			PIPE_GET_GIANT(wpipe);
82713907Sdyson			pipe_destroy_write_buffer(wpipe);
82891362Salfred			PIPE_DROP_GIANT(wpipe);
82913907Sdyson			pipeunlock(wpipe);
83014037Sdyson			pipeselwakeup(wpipe);
83114802Sdyson			error = EPIPE;
83214802Sdyson			goto error1;
83313907Sdyson		}
83413992Sdyson		if (wpipe->pipe_state & PIPE_WANTR) {
83513992Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
83613992Sdyson			wakeup(wpipe);
83713992Sdyson		}
83814037Sdyson		pipeselwakeup(wpipe);
83991362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
84091362Salfred		    "pipdwt", 0);
84113907Sdyson	}
84213907Sdyson
84313907Sdyson	pipelock(wpipe,0);
84413907Sdyson	if (wpipe->pipe_state & PIPE_DIRECTW) {
84513907Sdyson		/*
84613907Sdyson		 * this bit of trickery substitutes a kernel buffer for
84713907Sdyson		 * the process that might be going away.
84813907Sdyson		 */
84913907Sdyson		pipe_clone_write_buffer(wpipe);
85013907Sdyson	} else {
85191412Salfred		PIPE_GET_GIANT(wpipe);
85213907Sdyson		pipe_destroy_write_buffer(wpipe);
85391412Salfred		PIPE_DROP_GIANT(wpipe);
85413907Sdyson	}
85513907Sdyson	pipeunlock(wpipe);
85676760Salfred	return (error);
85713907Sdyson
85813907Sdysonerror1:
85913907Sdyson	wakeup(wpipe);
86076760Salfred	return (error);
86113907Sdyson}
86214037Sdyson#endif
86313907Sdyson
86416960Sdysonstatic int
865101941Srwatsonpipe_write(fp, uio, active_cred, flags, td)
86616960Sdyson	struct file *fp;
86713907Sdyson	struct uio *uio;
868101941Srwatson	struct ucred *active_cred;
86983366Sjulian	struct thread *td;
87045311Sdt	int flags;
87113907Sdyson{
87213675Sdyson	int error = 0;
87313913Sdyson	int orig_resid;
87416960Sdyson	struct pipe *wpipe, *rpipe;
87516960Sdyson
87616960Sdyson	rpipe = (struct pipe *) fp->f_data;
87716960Sdyson	wpipe = rpipe->pipe_peer;
87816960Sdyson
87991395Salfred	PIPE_LOCK(rpipe);
88013675Sdyson	/*
88113675Sdyson	 * detect loss of pipe read side, issue SIGPIPE if lost.
88213675Sdyson	 */
88316960Sdyson	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
88491395Salfred		PIPE_UNLOCK(rpipe);
88576760Salfred		return (EPIPE);
88613675Sdyson	}
887101768Srwatson#ifdef MAC
888101941Srwatson	error = mac_check_pipe_op(active_cred, wpipe, MAC_OP_PIPE_WRITE);
889101768Srwatson	if (error) {
890101768Srwatson		PIPE_UNLOCK(rpipe);
891101768Srwatson		return (error);
892101768Srwatson	}
893101768Srwatson#endif
89477676Sdillon	++wpipe->pipe_busy;
89513675Sdyson
89617163Sdyson	/*
89717163Sdyson	 * If it is advantageous to resize the pipe buffer, do
89817163Sdyson	 * so.
89917163Sdyson	 */
90017163Sdyson	if ((uio->uio_resid > PIPE_SIZE) &&
90117163Sdyson		(nbigpipe < LIMITBIGPIPES) &&
90217163Sdyson		(wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
90317163Sdyson		(wpipe->pipe_buffer.size <= PIPE_SIZE) &&
90417163Sdyson		(wpipe->pipe_buffer.cnt == 0)) {
90517163Sdyson
90613907Sdyson		if ((error = pipelock(wpipe,1)) == 0) {
90792305Salfred			PIPE_GET_GIANT(wpipe);
90876364Salfred			if (pipespace(wpipe, BIG_PIPE_SIZE) == 0)
90976364Salfred				nbigpipe++;
91092305Salfred			PIPE_DROP_GIANT(wpipe);
91113907Sdyson			pipeunlock(wpipe);
91213907Sdyson		}
91313907Sdyson	}
91477676Sdillon
91577676Sdillon	/*
91677676Sdillon	 * If an early error occured unbusy and return, waking up any pending
91777676Sdillon	 * readers.
91877676Sdillon	 */
91977676Sdillon	if (error) {
92077676Sdillon		--wpipe->pipe_busy;
92177676Sdillon		if ((wpipe->pipe_busy == 0) &&
92277676Sdillon		    (wpipe->pipe_state & PIPE_WANT)) {
92377676Sdillon			wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
92477676Sdillon			wakeup(wpipe);
92577676Sdillon		}
92691395Salfred		PIPE_UNLOCK(rpipe);
92777676Sdillon		return(error);
92877676Sdillon	}
92976364Salfred
93076364Salfred	KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone"));
93113907Sdyson
93213913Sdyson	orig_resid = uio->uio_resid;
93377676Sdillon
93413675Sdyson	while (uio->uio_resid) {
93513907Sdyson		int space;
93676760Salfred
93714037Sdyson#ifndef PIPE_NODIRECT
93813907Sdyson		/*
93913907Sdyson		 * If the transfer is large, we can gain performance if
94013907Sdyson		 * we do process-to-process copies directly.
94116416Sdyson		 * If the write is non-blocking, we don't use the
94216416Sdyson		 * direct write mechanism.
94358505Sdillon		 *
94458505Sdillon		 * The direct write mechanism will detect the reader going
94558505Sdillon		 * away on us.
94613907Sdyson		 */
94717163Sdyson		if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
94817163Sdyson		    (fp->f_flag & FNONBLOCK) == 0 &&
94917163Sdyson			(wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) &&
95013907Sdyson			(uio->uio_iov->iov_len >= PIPE_MINDIRECT)) {
95113907Sdyson			error = pipe_direct_write( wpipe, uio);
95276760Salfred			if (error)
95313907Sdyson				break;
95413907Sdyson			continue;
95591362Salfred		}
95614037Sdyson#endif
95713907Sdyson
95813907Sdyson		/*
95913907Sdyson		 * Pipe buffered writes cannot be coincidental with
96013907Sdyson		 * direct writes.  We wait until the currently executing
96113907Sdyson		 * direct write is completed before we start filling the
96258505Sdillon		 * pipe buffer.  We break out if a signal occurs or the
96358505Sdillon		 * reader goes away.
96413907Sdyson		 */
96513907Sdyson	retrywrite:
96613907Sdyson		while (wpipe->pipe_state & PIPE_DIRECTW) {
96713992Sdyson			if (wpipe->pipe_state & PIPE_WANTR) {
96813992Sdyson				wpipe->pipe_state &= ~PIPE_WANTR;
96913992Sdyson				wakeup(wpipe);
97013992Sdyson			}
97191395Salfred			error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH,
97291362Salfred			    "pipbww", 0);
97358505Sdillon			if (wpipe->pipe_state & PIPE_EOF)
97458505Sdillon				break;
97513907Sdyson			if (error)
97613907Sdyson				break;
97713907Sdyson		}
97858505Sdillon		if (wpipe->pipe_state & PIPE_EOF) {
97958505Sdillon			error = EPIPE;
98058505Sdillon			break;
98158505Sdillon		}
98213907Sdyson
98313907Sdyson		space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
98414644Sdyson
98514644Sdyson		/* Writes of size <= PIPE_BUF must be atomic. */
98613913Sdyson		if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF))
98713913Sdyson			space = 0;
98813907Sdyson
98917163Sdyson		if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) {
99013907Sdyson			if ((error = pipelock(wpipe,1)) == 0) {
99154534Stegge				int size;	/* Transfer size */
99254534Stegge				int segsize;	/* first segment to transfer */
99376760Salfred
99413907Sdyson				/*
99513907Sdyson				 * It is possible for a direct write to
99613907Sdyson				 * slip in on us... handle it here...
99713907Sdyson				 */
99813907Sdyson				if (wpipe->pipe_state & PIPE_DIRECTW) {
99913907Sdyson					pipeunlock(wpipe);
100013907Sdyson					goto retrywrite;
100113907Sdyson				}
100254534Stegge				/*
100354534Stegge				 * If a process blocked in uiomove, our
100454534Stegge				 * value for space might be bad.
100558505Sdillon				 *
100658505Sdillon				 * XXX will we be ok if the reader has gone
100758505Sdillon				 * away here?
100854534Stegge				 */
100954534Stegge				if (space > wpipe->pipe_buffer.size -
101054534Stegge				    wpipe->pipe_buffer.cnt) {
101154534Stegge					pipeunlock(wpipe);
101254534Stegge					goto retrywrite;
101354534Stegge				}
101454534Stegge
101554534Stegge				/*
101654534Stegge				 * Transfer size is minimum of uio transfer
101754534Stegge				 * and free space in pipe buffer.
101854534Stegge				 */
101954534Stegge				if (space > uio->uio_resid)
102054534Stegge					size = uio->uio_resid;
102154534Stegge				else
102254534Stegge					size = space;
102354534Stegge				/*
102454534Stegge				 * First segment to transfer is minimum of
102554534Stegge				 * transfer size and contiguous space in
102654534Stegge				 * pipe buffer.  If first segment to transfer
102754534Stegge				 * is less than the transfer size, we've got
102854534Stegge				 * a wraparound in the buffer.
102954534Stegge				 */
103054534Stegge				segsize = wpipe->pipe_buffer.size -
103154534Stegge					wpipe->pipe_buffer.in;
103254534Stegge				if (segsize > size)
103354534Stegge					segsize = size;
103454534Stegge
103554534Stegge				/* Transfer first segment */
103654534Stegge
103791395Salfred				PIPE_UNLOCK(rpipe);
103854534Stegge				error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
103954534Stegge						segsize, uio);
104091395Salfred				PIPE_LOCK(rpipe);
104154534Stegge
104254534Stegge				if (error == 0 && segsize < size) {
104354534Stegge					/*
104454534Stegge					 * Transfer remaining part now, to
104554534Stegge					 * support atomic writes.  Wraparound
104654534Stegge					 * happened.
104754534Stegge					 */
104854534Stegge					if (wpipe->pipe_buffer.in + segsize !=
104954534Stegge					    wpipe->pipe_buffer.size)
105054534Stegge						panic("Expected pipe buffer wraparound disappeared");
105154534Stegge
105291395Salfred					PIPE_UNLOCK(rpipe);
105354534Stegge					error = uiomove(&wpipe->pipe_buffer.buffer[0],
105454534Stegge							size - segsize, uio);
105591395Salfred					PIPE_LOCK(rpipe);
105654534Stegge				}
105754534Stegge				if (error == 0) {
105854534Stegge					wpipe->pipe_buffer.in += size;
105954534Stegge					if (wpipe->pipe_buffer.in >=
106054534Stegge					    wpipe->pipe_buffer.size) {
106154534Stegge						if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size)
106254534Stegge							panic("Expected wraparound bad");
106354534Stegge						wpipe->pipe_buffer.in = size - segsize;
106454534Stegge					}
106554534Stegge
106654534Stegge					wpipe->pipe_buffer.cnt += size;
106754534Stegge					if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size)
106854534Stegge						panic("Pipe buffer overflow");
106954534Stegge
107054534Stegge				}
107113675Sdyson				pipeunlock(wpipe);
107213675Sdyson			}
107313675Sdyson			if (error)
107413675Sdyson				break;
107513675Sdyson
107613675Sdyson		} else {
107713675Sdyson			/*
107813675Sdyson			 * If the "read-side" has been blocked, wake it up now.
107913675Sdyson			 */
108013675Sdyson			if (wpipe->pipe_state & PIPE_WANTR) {
108113675Sdyson				wpipe->pipe_state &= ~PIPE_WANTR;
108213675Sdyson				wakeup(wpipe);
108313675Sdyson			}
108414037Sdyson
108513675Sdyson			/*
108613675Sdyson			 * don't block on non-blocking I/O
108713675Sdyson			 */
108816960Sdyson			if (fp->f_flag & FNONBLOCK) {
108913907Sdyson				error = EAGAIN;
109013675Sdyson				break;
109113675Sdyson			}
109213907Sdyson
109314037Sdyson			/*
109414037Sdyson			 * We have no more space and have something to offer,
109529356Speter			 * wake up select/poll.
109614037Sdyson			 */
109714037Sdyson			pipeselwakeup(wpipe);
109814037Sdyson
109913675Sdyson			wpipe->pipe_state |= PIPE_WANTW;
110091395Salfred			error = msleep(wpipe, PIPE_MTX(rpipe),
110191362Salfred			    PRIBIO | PCATCH, "pipewr", 0);
110276760Salfred			if (error != 0)
110313675Sdyson				break;
110413675Sdyson			/*
110513675Sdyson			 * If read side wants to go away, we just issue a signal
110613675Sdyson			 * to ourselves.
110713675Sdyson			 */
110813675Sdyson			if (wpipe->pipe_state & PIPE_EOF) {
110913774Sdyson				error = EPIPE;
111013907Sdyson				break;
111113675Sdyson			}
111213675Sdyson		}
111313675Sdyson	}
111413675Sdyson
111514644Sdyson	--wpipe->pipe_busy;
111677676Sdillon
111776760Salfred	if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
111876760Salfred		wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
111913675Sdyson		wakeup(wpipe);
112013675Sdyson	} else if (wpipe->pipe_buffer.cnt > 0) {
112113675Sdyson		/*
112213675Sdyson		 * If we have put any characters in the buffer, we wake up
112313675Sdyson		 * the reader.
112413675Sdyson		 */
112513675Sdyson		if (wpipe->pipe_state & PIPE_WANTR) {
112613675Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
112713675Sdyson			wakeup(wpipe);
112813675Sdyson		}
112913675Sdyson	}
113013909Sdyson
113113909Sdyson	/*
113213909Sdyson	 * Don't return EPIPE if I/O was successful
113313909Sdyson	 */
113413907Sdyson	if ((wpipe->pipe_buffer.cnt == 0) &&
113577676Sdillon	    (uio->uio_resid == 0) &&
113677676Sdillon	    (error == EPIPE)) {
113713907Sdyson		error = 0;
113877676Sdillon	}
113913913Sdyson
114024101Sbde	if (error == 0)
114155112Sbde		vfs_timestamp(&wpipe->pipe_mtime);
114224101Sbde
114314037Sdyson	/*
114414037Sdyson	 * We have something to offer,
114529356Speter	 * wake up select/poll.
114614037Sdyson	 */
114714177Sdyson	if (wpipe->pipe_buffer.cnt)
114814037Sdyson		pipeselwakeup(wpipe);
114913907Sdyson
115091395Salfred	PIPE_UNLOCK(rpipe);
115176760Salfred	return (error);
115213675Sdyson}
115313675Sdyson
115413675Sdyson/*
115513675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets.
115613675Sdyson */
115713675Sdysonint
1158102003Srwatsonpipe_ioctl(fp, cmd, data, active_cred, td)
115913675Sdyson	struct file *fp;
116036735Sdfr	u_long cmd;
116199009Salfred	void *data;
1162102003Srwatson	struct ucred *active_cred;
116383366Sjulian	struct thread *td;
116413675Sdyson{
116576364Salfred	struct pipe *mpipe = (struct pipe *)fp->f_data;
1166101768Srwatson#ifdef MAC
1167101768Srwatson	int error;
116813675Sdyson
1169101768Srwatson	/* XXXMAC: Pipe should be locked for this check. */
1170102003Srwatson	error = mac_check_pipe_ioctl(active_cred, mpipe, cmd, data);
1171101768Srwatson	if (error)
1172101768Srwatson		return (error);
1173101768Srwatson#endif
1174101768Srwatson
117513675Sdyson	switch (cmd) {
117613675Sdyson
117713675Sdyson	case FIONBIO:
117813675Sdyson		return (0);
117913675Sdyson
118013675Sdyson	case FIOASYNC:
118191362Salfred		PIPE_LOCK(mpipe);
118213675Sdyson		if (*(int *)data) {
118313675Sdyson			mpipe->pipe_state |= PIPE_ASYNC;
118413675Sdyson		} else {
118513675Sdyson			mpipe->pipe_state &= ~PIPE_ASYNC;
118613675Sdyson		}
118791362Salfred		PIPE_UNLOCK(mpipe);
118813675Sdyson		return (0);
118913675Sdyson
119013675Sdyson	case FIONREAD:
119191362Salfred		PIPE_LOCK(mpipe);
119214037Sdyson		if (mpipe->pipe_state & PIPE_DIRECTW)
119314037Sdyson			*(int *)data = mpipe->pipe_map.cnt;
119414037Sdyson		else
119514037Sdyson			*(int *)data = mpipe->pipe_buffer.cnt;
119691362Salfred		PIPE_UNLOCK(mpipe);
119713675Sdyson		return (0);
119813675Sdyson
119941086Struckman	case FIOSETOWN:
120041086Struckman		return (fsetown(*(int *)data, &mpipe->pipe_sigio));
120141086Struckman
120241086Struckman	case FIOGETOWN:
120341086Struckman		*(int *)data = fgetown(mpipe->pipe_sigio);
120413675Sdyson		return (0);
120513675Sdyson
120641086Struckman	/* This is deprecated, FIOSETOWN should be used instead. */
120741086Struckman	case TIOCSPGRP:
120841086Struckman		return (fsetown(-(*(int *)data), &mpipe->pipe_sigio));
120941086Struckman
121041086Struckman	/* This is deprecated, FIOGETOWN should be used instead. */
121118863Sdyson	case TIOCGPGRP:
121241086Struckman		*(int *)data = -fgetown(mpipe->pipe_sigio);
121313675Sdyson		return (0);
121413675Sdyson
121513675Sdyson	}
121617124Sbde	return (ENOTTY);
121713675Sdyson}
121813675Sdyson
121913675Sdysonint
1220101983Srwatsonpipe_poll(fp, events, active_cred, td)
122113675Sdyson	struct file *fp;
122229356Speter	int events;
1223101983Srwatson	struct ucred *active_cred;
122483366Sjulian	struct thread *td;
122513675Sdyson{
122676364Salfred	struct pipe *rpipe = (struct pipe *)fp->f_data;
122713675Sdyson	struct pipe *wpipe;
122829356Speter	int revents = 0;
1229101768Srwatson#ifdef MAC
1230101768Srwatson	int error;
1231101768Srwatson#endif
123213675Sdyson
123313675Sdyson	wpipe = rpipe->pipe_peer;
123491362Salfred	PIPE_LOCK(rpipe);
1235101768Srwatson#ifdef MAC
1236101983Srwatson	error = mac_check_pipe_op(active_cred, rpipe, MAC_OP_PIPE_POLL);
1237101768Srwatson	if (error)
1238101768Srwatson		goto locked_error;
1239101768Srwatson#endif
124029356Speter	if (events & (POLLIN | POLLRDNORM))
124129356Speter		if ((rpipe->pipe_state & PIPE_DIRECTW) ||
124229356Speter		    (rpipe->pipe_buffer.cnt > 0) ||
124329356Speter		    (rpipe->pipe_state & PIPE_EOF))
124429356Speter			revents |= events & (POLLIN | POLLRDNORM);
124513675Sdyson
124629356Speter	if (events & (POLLOUT | POLLWRNORM))
124729356Speter		if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) ||
124843311Sdillon		    (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
124943311Sdillon		     (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF))
125029356Speter			revents |= events & (POLLOUT | POLLWRNORM);
125113675Sdyson
125229356Speter	if ((rpipe->pipe_state & PIPE_EOF) ||
125329356Speter	    (wpipe == NULL) ||
125429356Speter	    (wpipe->pipe_state & PIPE_EOF))
125529356Speter		revents |= POLLHUP;
125629356Speter
125729356Speter	if (revents == 0) {
125829356Speter		if (events & (POLLIN | POLLRDNORM)) {
125983805Sjhb			selrecord(td, &rpipe->pipe_sel);
126029356Speter			rpipe->pipe_state |= PIPE_SEL;
126113675Sdyson		}
126213675Sdyson
126329356Speter		if (events & (POLLOUT | POLLWRNORM)) {
126483805Sjhb			selrecord(td, &wpipe->pipe_sel);
126530164Speter			wpipe->pipe_state |= PIPE_SEL;
126613907Sdyson		}
126713675Sdyson	}
1268101768Srwatson#ifdef MAC
1269101768Srwatsonlocked_error:
1270101768Srwatson#endif
127191362Salfred	PIPE_UNLOCK(rpipe);
127229356Speter
127329356Speter	return (revents);
127413675Sdyson}
127513675Sdyson
127698989Salfred/*
127798989Salfred * We shouldn't need locks here as we're doing a read and this should
127898989Salfred * be a natural race.
127998989Salfred */
128052983Speterstatic int
1281101983Srwatsonpipe_stat(fp, ub, active_cred, td)
128252983Speter	struct file *fp;
128352983Speter	struct stat *ub;
1284101983Srwatson	struct ucred *active_cred;
128583366Sjulian	struct thread *td;
128613675Sdyson{
128752983Speter	struct pipe *pipe = (struct pipe *)fp->f_data;
1288101768Srwatson#ifdef MAC
1289101768Srwatson	int error;
129052983Speter
1291101768Srwatson	/* XXXMAC: Pipe should be locked for this check. */
1292101983Srwatson	error = mac_check_pipe_op(active_cred, pipe, MAC_OP_PIPE_STAT);
1293101768Srwatson	if (error)
1294101768Srwatson		return (error);
1295101768Srwatson#endif
1296100527Salfred	bzero(ub, sizeof(*ub));
129717124Sbde	ub->st_mode = S_IFIFO;
129813907Sdyson	ub->st_blksize = pipe->pipe_buffer.size;
129913675Sdyson	ub->st_size = pipe->pipe_buffer.cnt;
130013675Sdyson	ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
130134901Sphk	ub->st_atimespec = pipe->pipe_atime;
130234901Sphk	ub->st_mtimespec = pipe->pipe_mtime;
130334901Sphk	ub->st_ctimespec = pipe->pipe_ctime;
130460404Schris	ub->st_uid = fp->f_cred->cr_uid;
130560404Schris	ub->st_gid = fp->f_cred->cr_gid;
130617124Sbde	/*
130760404Schris	 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
130817124Sbde	 * XXX (st_dev, st_ino) should be unique.
130917124Sbde	 */
131076760Salfred	return (0);
131113675Sdyson}
131213675Sdyson
131313675Sdyson/* ARGSUSED */
131413675Sdysonstatic int
131583366Sjulianpipe_close(fp, td)
131613675Sdyson	struct file *fp;
131783366Sjulian	struct thread *td;
131813675Sdyson{
131913675Sdyson	struct pipe *cpipe = (struct pipe *)fp->f_data;
132016322Sgpalmer
132149413Sgreen	fp->f_ops = &badfileops;
132249413Sgreen	fp->f_data = NULL;
132396122Salfred	funsetown(&cpipe->pipe_sigio);
132413675Sdyson	pipeclose(cpipe);
132576760Salfred	return (0);
132613675Sdyson}
132713675Sdyson
132876364Salfredstatic void
132976364Salfredpipe_free_kmem(cpipe)
133076364Salfred	struct pipe *cpipe;
133176364Salfred{
133291412Salfred
133379224Sdillon	GIANT_REQUIRED;
133491412Salfred	KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
133591412Salfred	       ("pipespace: pipe mutex locked"));
133676364Salfred
133776364Salfred	if (cpipe->pipe_buffer.buffer != NULL) {
133876364Salfred		if (cpipe->pipe_buffer.size > PIPE_SIZE)
133976364Salfred			--nbigpipe;
134076364Salfred		amountpipekva -= cpipe->pipe_buffer.size;
134176364Salfred		kmem_free(kernel_map,
134276364Salfred			(vm_offset_t)cpipe->pipe_buffer.buffer,
134376364Salfred			cpipe->pipe_buffer.size);
134476364Salfred		cpipe->pipe_buffer.buffer = NULL;
134576364Salfred	}
134676364Salfred#ifndef PIPE_NODIRECT
134776364Salfred	if (cpipe->pipe_map.kva != NULL) {
134876364Salfred		amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE;
134976364Salfred		kmem_free(kernel_map,
135076364Salfred			cpipe->pipe_map.kva,
135176364Salfred			cpipe->pipe_buffer.size + PAGE_SIZE);
135276364Salfred		cpipe->pipe_map.cnt = 0;
135376364Salfred		cpipe->pipe_map.kva = 0;
135476364Salfred		cpipe->pipe_map.pos = 0;
135576364Salfred		cpipe->pipe_map.npages = 0;
135676364Salfred	}
135776364Salfred#endif
135876364Salfred}
135976364Salfred
136013675Sdyson/*
136113675Sdyson * shutdown the pipe
136213675Sdyson */
136313675Sdysonstatic void
136413675Sdysonpipeclose(cpipe)
136513675Sdyson	struct pipe *cpipe;
136613675Sdyson{
136713907Sdyson	struct pipe *ppipe;
136891968Salfred	int hadpeer;
136976364Salfred
137091968Salfred	if (cpipe == NULL)
137191968Salfred		return;
137291968Salfred
137391968Salfred	hadpeer = 0;
137491968Salfred
137591968Salfred	/* partially created pipes won't have a valid mutex. */
137691968Salfred	if (PIPE_MTX(cpipe) != NULL)
137791362Salfred		PIPE_LOCK(cpipe);
137813907Sdyson
137991968Salfred	pipeselwakeup(cpipe);
138013907Sdyson
138191968Salfred	/*
138291968Salfred	 * If the other side is blocked, wake it up saying that
138391968Salfred	 * we want to close it down.
138491968Salfred	 */
138591968Salfred	while (cpipe->pipe_busy) {
138691968Salfred		wakeup(cpipe);
138791968Salfred		cpipe->pipe_state |= PIPE_WANT | PIPE_EOF;
138891968Salfred		msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
138991968Salfred	}
139013675Sdyson
1391101768Srwatson#ifdef MAC
1392101768Srwatson	if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL)
1393101768Srwatson		mac_destroy_pipe(cpipe);
1394101768Srwatson#endif
1395101768Srwatson
139691968Salfred	/*
139791968Salfred	 * Disconnect from peer
139891968Salfred	 */
139991968Salfred	if ((ppipe = cpipe->pipe_peer) != NULL) {
140091968Salfred		hadpeer++;
140191968Salfred		pipeselwakeup(ppipe);
140213907Sdyson
140391968Salfred		ppipe->pipe_state |= PIPE_EOF;
140491968Salfred		wakeup(ppipe);
140591968Salfred		KNOTE(&ppipe->pipe_sel.si_note, 0);
140691968Salfred		ppipe->pipe_peer = NULL;
140791968Salfred	}
140891968Salfred	/*
140991968Salfred	 * free resources
141091968Salfred	 */
141191968Salfred	if (PIPE_MTX(cpipe) != NULL) {
141291968Salfred		PIPE_UNLOCK(cpipe);
141391968Salfred		if (!hadpeer) {
141491968Salfred			mtx_destroy(PIPE_MTX(cpipe));
141591968Salfred			free(PIPE_MTX(cpipe), M_TEMP);
141613675Sdyson		}
141713675Sdyson	}
141891968Salfred	mtx_lock(&Giant);
141991968Salfred	pipe_free_kmem(cpipe);
142092751Sjeff	uma_zfree(pipe_zone, cpipe);
142191968Salfred	mtx_unlock(&Giant);
142213675Sdyson}
142359288Sjlemon
142472521Sjlemon/*ARGSUSED*/
142559288Sjlemonstatic int
142672521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn)
142759288Sjlemon{
142889306Salfred	struct pipe *cpipe;
142959288Sjlemon
143089306Salfred	cpipe = (struct pipe *)kn->kn_fp->f_data;
143172521Sjlemon	switch (kn->kn_filter) {
143272521Sjlemon	case EVFILT_READ:
143372521Sjlemon		kn->kn_fop = &pipe_rfiltops;
143472521Sjlemon		break;
143572521Sjlemon	case EVFILT_WRITE:
143672521Sjlemon		kn->kn_fop = &pipe_wfiltops;
143778292Sjlemon		cpipe = cpipe->pipe_peer;
1438101382Sdes		if (cpipe == NULL)
1439101382Sdes			/* other end of pipe has been closed */
1440101382Sdes			return (EBADF);
144172521Sjlemon		break;
144272521Sjlemon	default:
144372521Sjlemon		return (1);
144472521Sjlemon	}
1445100527Salfred	kn->kn_hook = cpipe;
144678292Sjlemon
144791372Salfred	PIPE_LOCK(cpipe);
144878292Sjlemon	SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext);
144991372Salfred	PIPE_UNLOCK(cpipe);
145059288Sjlemon	return (0);
145159288Sjlemon}
145259288Sjlemon
145359288Sjlemonstatic void
145459288Sjlemonfilt_pipedetach(struct knote *kn)
145559288Sjlemon{
145678292Sjlemon	struct pipe *cpipe = (struct pipe *)kn->kn_hook;
145759288Sjlemon
145891372Salfred	PIPE_LOCK(cpipe);
145978292Sjlemon	SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext);
146091372Salfred	PIPE_UNLOCK(cpipe);
146159288Sjlemon}
146259288Sjlemon
146359288Sjlemon/*ARGSUSED*/
146459288Sjlemonstatic int
146559288Sjlemonfilt_piperead(struct knote *kn, long hint)
146659288Sjlemon{
146759288Sjlemon	struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
146859288Sjlemon	struct pipe *wpipe = rpipe->pipe_peer;
146959288Sjlemon
147091372Salfred	PIPE_LOCK(rpipe);
147159288Sjlemon	kn->kn_data = rpipe->pipe_buffer.cnt;
147259288Sjlemon	if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
147359288Sjlemon		kn->kn_data = rpipe->pipe_map.cnt;
147459288Sjlemon
147559288Sjlemon	if ((rpipe->pipe_state & PIPE_EOF) ||
147659288Sjlemon	    (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
147791372Salfred		kn->kn_flags |= EV_EOF;
147891372Salfred		PIPE_UNLOCK(rpipe);
147959288Sjlemon		return (1);
148059288Sjlemon	}
148191372Salfred	PIPE_UNLOCK(rpipe);
148259288Sjlemon	return (kn->kn_data > 0);
148359288Sjlemon}
148459288Sjlemon
148559288Sjlemon/*ARGSUSED*/
148659288Sjlemonstatic int
148759288Sjlemonfilt_pipewrite(struct knote *kn, long hint)
148859288Sjlemon{
148959288Sjlemon	struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
149059288Sjlemon	struct pipe *wpipe = rpipe->pipe_peer;
149159288Sjlemon
149291372Salfred	PIPE_LOCK(rpipe);
149359288Sjlemon	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
149459288Sjlemon		kn->kn_data = 0;
149559288Sjlemon		kn->kn_flags |= EV_EOF;
149691372Salfred		PIPE_UNLOCK(rpipe);
149759288Sjlemon		return (1);
149859288Sjlemon	}
149959288Sjlemon	kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
150065855Sjlemon	if (wpipe->pipe_state & PIPE_DIRECTW)
150159288Sjlemon		kn->kn_data = 0;
150259288Sjlemon
150391372Salfred	PIPE_UNLOCK(rpipe);
150459288Sjlemon	return (kn->kn_data >= PIPE_BUF);
150559288Sjlemon}
1506