sys_pipe.c revision 116546
113675Sdyson/*
213675Sdyson * Copyright (c) 1996 John S. Dyson
313675Sdyson * All rights reserved.
413675Sdyson *
513675Sdyson * Redistribution and use in source and binary forms, with or without
613675Sdyson * modification, are permitted provided that the following conditions
713675Sdyson * are met:
813675Sdyson * 1. Redistributions of source code must retain the above copyright
913675Sdyson *    notice immediately at the beginning of the file, without modification,
1013675Sdyson *    this list of conditions, and the following disclaimer.
1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright
1213675Sdyson *    notice, this list of conditions and the following disclaimer in the
1313675Sdyson *    documentation and/or other materials provided with the distribution.
1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author
1513675Sdyson *    John S. Dyson.
1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions
1713675Sdyson *    are met.
1813675Sdyson */
1913675Sdyson
2013675Sdyson/*
2113675Sdyson * This file contains a high-performance replacement for the socket-based
2213675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
2313675Sdyson * all features of sockets, but does do everything that pipes normally
2413675Sdyson * do.
2513675Sdyson */
2613675Sdyson
2713907Sdyson/*
2813907Sdyson * This code has two modes of operation, a small write mode and a large
2913907Sdyson * write mode.  The small write mode acts like conventional pipes with
3013907Sdyson * a kernel buffer.  If the buffer is less than PIPE_MINDIRECT, then the
3113907Sdyson * "normal" pipe buffering is done.  If the buffer is between PIPE_MINDIRECT
3213907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and
3313907Sdyson * the receiving process can copy it directly from the pages in the sending
3413907Sdyson * process.
3513907Sdyson *
3613907Sdyson * If the sending process receives a signal, it is possible that it will
3713913Sdyson * go away, and certainly its address space can change, because control
3813907Sdyson * is returned back to the user-mode side.  In that case, the pipe code
3913907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable
4013907Sdyson * kernel buffer, and the receiving process will grab the data from the
4113907Sdyson * pageable kernel buffer.  Since signals don't happen all that often,
4213907Sdyson * the copy operation is normally eliminated.
4313907Sdyson *
4413907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
4513907Sdyson * happen for small transfers so that the system will not spend all of
4613913Sdyson * its time context switching.  PIPE_SIZE is constrained by the
4713907Sdyson * amount of kernel virtual memory.
4813907Sdyson */
4913907Sdyson
50116182Sobrien#include <sys/cdefs.h>
51116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sys_pipe.c 116546 2003-06-18 18:16:40Z phk $");
52116182Sobrien
53101768Srwatson#include "opt_mac.h"
54101768Srwatson
5513675Sdyson#include <sys/param.h>
5613675Sdyson#include <sys/systm.h>
5724131Sbde#include <sys/fcntl.h>
5813675Sdyson#include <sys/file.h>
5913675Sdyson#include <sys/filedesc.h>
6024206Sbde#include <sys/filio.h>
6191372Salfred#include <sys/kernel.h>
6276166Smarkm#include <sys/lock.h>
63101768Srwatson#include <sys/mac.h>
6476827Salfred#include <sys/mutex.h>
6524206Sbde#include <sys/ttycom.h>
6613675Sdyson#include <sys/stat.h>
6791968Salfred#include <sys/malloc.h>
6829356Speter#include <sys/poll.h>
6970834Swollman#include <sys/selinfo.h>
7013675Sdyson#include <sys/signalvar.h>
7113675Sdyson#include <sys/sysproto.h>
7213675Sdyson#include <sys/pipe.h>
7376166Smarkm#include <sys/proc.h>
7455112Sbde#include <sys/vnode.h>
7534924Sbde#include <sys/uio.h>
7659288Sjlemon#include <sys/event.h>
7713675Sdyson
7813675Sdyson#include <vm/vm.h>
7913675Sdyson#include <vm/vm_param.h>
8013675Sdyson#include <vm/vm_object.h>
8113675Sdyson#include <vm/vm_kern.h>
8213675Sdyson#include <vm/vm_extern.h>
8313675Sdyson#include <vm/pmap.h>
8413675Sdyson#include <vm/vm_map.h>
8513907Sdyson#include <vm/vm_page.h>
8692751Sjeff#include <vm/uma.h>
8713675Sdyson
8814037Sdyson/*
8914037Sdyson * Use this define if you want to disable *fancy* VM things.  Expect an
9014037Sdyson * approx 30% decrease in transfer rate.  This could be useful for
9114037Sdyson * NetBSD or OpenBSD.
9214037Sdyson */
9314037Sdyson/* #define PIPE_NODIRECT */
9414037Sdyson
9514037Sdyson/*
9614037Sdyson * interfaces to the outside world
9714037Sdyson */
98108255Sphkstatic fo_rdwr_t	pipe_read;
99108255Sphkstatic fo_rdwr_t	pipe_write;
100108255Sphkstatic fo_ioctl_t	pipe_ioctl;
101108255Sphkstatic fo_poll_t	pipe_poll;
102108255Sphkstatic fo_kqfilter_t	pipe_kqfilter;
103108255Sphkstatic fo_stat_t	pipe_stat;
104108255Sphkstatic fo_close_t	pipe_close;
10513675Sdyson
10672521Sjlemonstatic struct fileops pipeops = {
107116546Sphk	.fo_read = pipe_read,
108116546Sphk	.fo_write = pipe_write,
109116546Sphk	.fo_ioctl = pipe_ioctl,
110116546Sphk	.fo_poll = pipe_poll,
111116546Sphk	.fo_kqfilter = pipe_kqfilter,
112116546Sphk	.fo_stat = pipe_stat,
113116546Sphk	.fo_close = pipe_close,
114116546Sphk	.fo_flags = DFLAG_PASSABLE
11572521Sjlemon};
11613675Sdyson
11759288Sjlemonstatic void	filt_pipedetach(struct knote *kn);
11859288Sjlemonstatic int	filt_piperead(struct knote *kn, long hint);
11959288Sjlemonstatic int	filt_pipewrite(struct knote *kn, long hint);
12059288Sjlemon
12172521Sjlemonstatic struct filterops pipe_rfiltops =
12272521Sjlemon	{ 1, NULL, filt_pipedetach, filt_piperead };
12372521Sjlemonstatic struct filterops pipe_wfiltops =
12472521Sjlemon	{ 1, NULL, filt_pipedetach, filt_pipewrite };
12559288Sjlemon
12692305Salfred#define PIPE_GET_GIANT(pipe)						\
12791362Salfred	do {								\
12892305Salfred		KASSERT(((pipe)->pipe_state & PIPE_LOCKFL) != 0,	\
12992305Salfred		    ("%s:%d PIPE_GET_GIANT: line pipe not locked",	\
13092305Salfred		     __FILE__, __LINE__));				\
13192305Salfred		PIPE_UNLOCK(pipe);					\
13291362Salfred		mtx_lock(&Giant);					\
13391362Salfred	} while (0)
13472521Sjlemon
13591362Salfred#define PIPE_DROP_GIANT(pipe)						\
13691362Salfred	do {								\
13791362Salfred		mtx_unlock(&Giant);					\
13892305Salfred		PIPE_LOCK(pipe);					\
13991362Salfred	} while (0)
14091362Salfred
14113675Sdyson/*
14213675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe
14313675Sdyson * space is pageable.  The pipe code will try to maintain locality of
14413675Sdyson * reference for performance reasons, so small amounts of outstanding I/O
14513675Sdyson * will not wipe the cache.
14613675Sdyson */
14713907Sdyson#define MINPIPESIZE (PIPE_SIZE/3)
14813907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3)
14913675Sdyson
15013907Sdyson/*
15113907Sdyson * Maximum amount of kva for pipes -- this is kind-of a soft limit, but
15213907Sdyson * is there so that on large systems, we don't exhaust it.
15313907Sdyson */
15413907Sdyson#define MAXPIPEKVA (8*1024*1024)
15513907Sdyson
15613907Sdyson/*
15713907Sdyson * Limit for direct transfers, we cannot, of course limit
15813907Sdyson * the amount of kva for pipes in general though.
15913907Sdyson */
16013907Sdyson#define LIMITPIPEKVA (16*1024*1024)
16117163Sdyson
16217163Sdyson/*
16317163Sdyson * Limit the number of "big" pipes
16417163Sdyson */
16517163Sdyson#define LIMITBIGPIPES	32
16633181Seivindstatic int nbigpipe;
16717163Sdyson
16817124Sbdestatic int amountpipekva;
16913907Sdyson
17091413Salfredstatic void pipeinit(void *dummy __unused);
17191413Salfredstatic void pipeclose(struct pipe *cpipe);
17291413Salfredstatic void pipe_free_kmem(struct pipe *cpipe);
17391413Salfredstatic int pipe_create(struct pipe **cpipep);
17491413Salfredstatic __inline int pipelock(struct pipe *cpipe, int catch);
17591413Salfredstatic __inline void pipeunlock(struct pipe *cpipe);
17691413Salfredstatic __inline void pipeselwakeup(struct pipe *cpipe);
17714037Sdyson#ifndef PIPE_NODIRECT
17891413Salfredstatic int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio);
17991413Salfredstatic void pipe_destroy_write_buffer(struct pipe *wpipe);
18091413Salfredstatic int pipe_direct_write(struct pipe *wpipe, struct uio *uio);
18191413Salfredstatic void pipe_clone_write_buffer(struct pipe *wpipe);
18214037Sdyson#endif
18391413Salfredstatic int pipespace(struct pipe *cpipe, int size);
18413675Sdyson
18592751Sjeffstatic uma_zone_t pipe_zone;
18627899Sdyson
18791372SalfredSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
18891372Salfred
18991372Salfredstatic void
19091372Salfredpipeinit(void *dummy __unused)
19191372Salfred{
19292654Sjeff	pipe_zone = uma_zcreate("PIPE", sizeof(struct pipe), NULL,
19392654Sjeff	    NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
19491372Salfred}
19591372Salfred
19613675Sdyson/*
19713675Sdyson * The pipe system call for the DTYPE_PIPE type of pipes
19813675Sdyson */
19913675Sdyson
20013675Sdyson/* ARGSUSED */
20113675Sdysonint
20283366Sjulianpipe(td, uap)
20383366Sjulian	struct thread *td;
20413675Sdyson	struct pipe_args /* {
20513675Sdyson		int	dummy;
20613675Sdyson	} */ *uap;
20713675Sdyson{
20883366Sjulian	struct filedesc *fdp = td->td_proc->p_fd;
20913675Sdyson	struct file *rf, *wf;
21013675Sdyson	struct pipe *rpipe, *wpipe;
21191968Salfred	struct mtx *pmtx;
21213675Sdyson	int fd, error;
21391362Salfred
21491372Salfred	KASSERT(pipe_zone != NULL, ("pipe_zone not initialized"));
21527899Sdyson
216111119Simp	pmtx = malloc(sizeof(*pmtx), M_TEMP, M_WAITOK | M_ZERO);
21791968Salfred
21876756Salfred	rpipe = wpipe = NULL;
21976364Salfred	if (pipe_create(&rpipe) || pipe_create(&wpipe)) {
22076364Salfred		pipeclose(rpipe);
22176364Salfred		pipeclose(wpipe);
22291968Salfred		free(pmtx, M_TEMP);
22376364Salfred		return (ENFILE);
22476364Salfred	}
22576364Salfred
22613907Sdyson	rpipe->pipe_state |= PIPE_DIRECTOK;
22713907Sdyson	wpipe->pipe_state |= PIPE_DIRECTOK;
22813675Sdyson
22983366Sjulian	error = falloc(td, &rf, &fd);
23070915Sdwmalone	if (error) {
23170915Sdwmalone		pipeclose(rpipe);
23270915Sdwmalone		pipeclose(wpipe);
23391968Salfred		free(pmtx, M_TEMP);
23470915Sdwmalone		return (error);
23570915Sdwmalone	}
23670915Sdwmalone	fhold(rf);
23783366Sjulian	td->td_retval[0] = fd;
23870915Sdwmalone
23970803Sdwmalone	/*
24070803Sdwmalone	 * Warning: once we've gotten past allocation of the fd for the
24170803Sdwmalone	 * read-side, we can only drop the read side via fdrop() in order
24270803Sdwmalone	 * to avoid races against processes which manage to dup() the read
24370803Sdwmalone	 * side while we are blocked trying to allocate the write side.
24470803Sdwmalone	 */
24589306Salfred	FILE_LOCK(rf);
24613675Sdyson	rf->f_flag = FREAD | FWRITE;
24713675Sdyson	rf->f_type = DTYPE_PIPE;
248109153Sdillon	rf->f_data = rpipe;
24913675Sdyson	rf->f_ops = &pipeops;
25089306Salfred	FILE_UNLOCK(rf);
25183366Sjulian	error = falloc(td, &wf, &fd);
25270915Sdwmalone	if (error) {
25389306Salfred		FILEDESC_LOCK(fdp);
25483366Sjulian		if (fdp->fd_ofiles[td->td_retval[0]] == rf) {
25583366Sjulian			fdp->fd_ofiles[td->td_retval[0]] = NULL;
25689306Salfred			FILEDESC_UNLOCK(fdp);
25783366Sjulian			fdrop(rf, td);
25889306Salfred		} else
25989306Salfred			FILEDESC_UNLOCK(fdp);
26083366Sjulian		fdrop(rf, td);
26170915Sdwmalone		/* rpipe has been closed by fdrop(). */
26270915Sdwmalone		pipeclose(wpipe);
26391968Salfred		free(pmtx, M_TEMP);
26470915Sdwmalone		return (error);
26570915Sdwmalone	}
26689306Salfred	FILE_LOCK(wf);
26713675Sdyson	wf->f_flag = FREAD | FWRITE;
26813675Sdyson	wf->f_type = DTYPE_PIPE;
269109153Sdillon	wf->f_data = wpipe;
27013675Sdyson	wf->f_ops = &pipeops;
27189306Salfred	FILE_UNLOCK(wf);
27283366Sjulian	td->td_retval[1] = fd;
27313675Sdyson	rpipe->pipe_peer = wpipe;
27413675Sdyson	wpipe->pipe_peer = rpipe;
275101768Srwatson#ifdef MAC
276101768Srwatson	/*
277101768Srwatson	 * struct pipe represents a pipe endpoint.  The MAC label is shared
278101768Srwatson	 * between the connected endpoints.  As a result mac_init_pipe() and
279101768Srwatson	 * mac_create_pipe() should only be called on one of the endpoints
280101768Srwatson	 * after they have been connected.
281101768Srwatson	 */
282101768Srwatson	mac_init_pipe(rpipe);
283101768Srwatson	mac_create_pipe(td->td_ucred, rpipe);
284101768Srwatson#endif
28593818Sjhb	mtx_init(pmtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE);
28691968Salfred	rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
28783366Sjulian	fdrop(rf, td);
28813675Sdyson
28913675Sdyson	return (0);
29013675Sdyson}
29113675Sdyson
29213909Sdyson/*
29313909Sdyson * Allocate kva for pipe circular buffer, the space is pageable
29476364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails
29576364Salfred * it will retain the old buffer.
29676364Salfred * If it fails it will return ENOMEM.
29713909Sdyson */
29876364Salfredstatic int
29976364Salfredpipespace(cpipe, size)
30013675Sdyson	struct pipe *cpipe;
30176364Salfred	int size;
30213675Sdyson{
30376364Salfred	struct vm_object *object;
30476364Salfred	caddr_t buffer;
30513688Sdyson	int npages, error;
30613675Sdyson
30779224Sdillon	GIANT_REQUIRED;
30891412Salfred	KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
30991412Salfred	       ("pipespace: pipe mutex locked"));
31079224Sdillon
31176364Salfred	npages = round_page(size)/PAGE_SIZE;
31213675Sdyson	/*
31313675Sdyson	 * Create an object, I don't like the idea of paging to/from
31413675Sdyson	 * kernel_object.
31514037Sdyson	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
31613675Sdyson	 */
31776364Salfred	object = vm_object_allocate(OBJT_DEFAULT, npages);
31876364Salfred	buffer = (caddr_t) vm_map_min(kernel_map);
31913675Sdyson
32013675Sdyson	/*
32113675Sdyson	 * Insert the object into the kernel map, and allocate kva for it.
32213675Sdyson	 * The map entry is, by default, pageable.
32314037Sdyson	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
32413675Sdyson	 */
32576364Salfred	error = vm_map_find(kernel_map, object, 0,
32676364Salfred		(vm_offset_t *) &buffer, size, 1,
32713688Sdyson		VM_PROT_ALL, VM_PROT_ALL, 0);
32813675Sdyson
32976364Salfred	if (error != KERN_SUCCESS) {
33076364Salfred		vm_object_deallocate(object);
33176364Salfred		return (ENOMEM);
33276364Salfred	}
33376364Salfred
33476364Salfred	/* free old resources if we're resizing */
33576364Salfred	pipe_free_kmem(cpipe);
33676364Salfred	cpipe->pipe_buffer.object = object;
33776364Salfred	cpipe->pipe_buffer.buffer = buffer;
33876364Salfred	cpipe->pipe_buffer.size = size;
33976364Salfred	cpipe->pipe_buffer.in = 0;
34076364Salfred	cpipe->pipe_buffer.out = 0;
34176364Salfred	cpipe->pipe_buffer.cnt = 0;
342110816Salc	atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size);
34376364Salfred	return (0);
34413907Sdyson}
34513688Sdyson
34613907Sdyson/*
34713907Sdyson * initialize and allocate VM and memory for pipe
34813907Sdyson */
34976364Salfredstatic int
35076364Salfredpipe_create(cpipep)
35176364Salfred	struct pipe **cpipep;
35276364Salfred{
35313907Sdyson	struct pipe *cpipe;
35476364Salfred	int error;
35513907Sdyson
356111119Simp	*cpipep = uma_zalloc(pipe_zone, M_WAITOK);
35776364Salfred	if (*cpipep == NULL)
35876364Salfred		return (ENOMEM);
35917163Sdyson
36076364Salfred	cpipe = *cpipep;
36176364Salfred
36276364Salfred	/* so pipespace()->pipe_free_kmem() doesn't follow junk pointer */
36376364Salfred	cpipe->pipe_buffer.object = NULL;
36476364Salfred#ifndef PIPE_NODIRECT
365102241Sarchie	cpipe->pipe_map.kva = 0;
36676364Salfred#endif
36776364Salfred	/*
36876364Salfred	 * protect so pipeclose() doesn't follow a junk pointer
36976364Salfred	 * if pipespace() fails.
37076364Salfred	 */
37176754Salfred	bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel));
37213675Sdyson	cpipe->pipe_state = 0;
37313675Sdyson	cpipe->pipe_peer = NULL;
37413675Sdyson	cpipe->pipe_busy = 0;
37513907Sdyson
37614037Sdyson#ifndef PIPE_NODIRECT
37713907Sdyson	/*
37813907Sdyson	 * pipe data structure initializations to support direct pipe I/O
37913907Sdyson	 */
38013907Sdyson	cpipe->pipe_map.cnt = 0;
38113907Sdyson	cpipe->pipe_map.kva = 0;
38213907Sdyson	cpipe->pipe_map.pos = 0;
38313907Sdyson	cpipe->pipe_map.npages = 0;
38417124Sbde	/* cpipe->pipe_map.ms[] = invalid */
38514037Sdyson#endif
38676364Salfred
38791412Salfred	cpipe->pipe_mtxp = NULL;	/* avoid pipespace assertion */
38876364Salfred	error = pipespace(cpipe, PIPE_SIZE);
38976760Salfred	if (error)
39076364Salfred		return (error);
39176364Salfred
39276364Salfred	vfs_timestamp(&cpipe->pipe_ctime);
39376364Salfred	cpipe->pipe_atime = cpipe->pipe_ctime;
39476364Salfred	cpipe->pipe_mtime = cpipe->pipe_ctime;
39576364Salfred
39676364Salfred	return (0);
39713675Sdyson}
39813675Sdyson
39913675Sdyson
40013675Sdyson/*
40113675Sdyson * lock a pipe for I/O, blocking other access
40213675Sdyson */
40313675Sdysonstatic __inline int
40413907Sdysonpipelock(cpipe, catch)
40513675Sdyson	struct pipe *cpipe;
40613907Sdyson	int catch;
40713675Sdyson{
40813776Sdyson	int error;
40976364Salfred
41091362Salfred	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
41191362Salfred	while (cpipe->pipe_state & PIPE_LOCKFL) {
41213675Sdyson		cpipe->pipe_state |= PIPE_LWANT;
41391362Salfred		error = msleep(cpipe, PIPE_MTX(cpipe),
41491362Salfred		    catch ? (PRIBIO | PCATCH) : PRIBIO,
41576760Salfred		    "pipelk", 0);
41676760Salfred		if (error != 0)
41776760Salfred			return (error);
41813675Sdyson	}
41991362Salfred	cpipe->pipe_state |= PIPE_LOCKFL;
42076760Salfred	return (0);
42113675Sdyson}
42213675Sdyson
42313675Sdyson/*
42413675Sdyson * unlock a pipe I/O lock
42513675Sdyson */
42613675Sdysonstatic __inline void
42713675Sdysonpipeunlock(cpipe)
42813675Sdyson	struct pipe *cpipe;
42913675Sdyson{
43076364Salfred
43191362Salfred	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
43291362Salfred	cpipe->pipe_state &= ~PIPE_LOCKFL;
43313675Sdyson	if (cpipe->pipe_state & PIPE_LWANT) {
43413675Sdyson		cpipe->pipe_state &= ~PIPE_LWANT;
43514177Sdyson		wakeup(cpipe);
43613675Sdyson	}
43713675Sdyson}
43813675Sdyson
43914037Sdysonstatic __inline void
44014037Sdysonpipeselwakeup(cpipe)
44114037Sdyson	struct pipe *cpipe;
44214037Sdyson{
44376364Salfred
44414037Sdyson	if (cpipe->pipe_state & PIPE_SEL) {
44514037Sdyson		cpipe->pipe_state &= ~PIPE_SEL;
44614037Sdyson		selwakeup(&cpipe->pipe_sel);
44714037Sdyson	}
44841086Struckman	if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio)
44995883Salfred		pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
45059288Sjlemon	KNOTE(&cpipe->pipe_sel.si_note, 0);
45114037Sdyson}
45214037Sdyson
45313675Sdyson/* ARGSUSED */
45413675Sdysonstatic int
455101941Srwatsonpipe_read(fp, uio, active_cred, flags, td)
45613675Sdyson	struct file *fp;
45713675Sdyson	struct uio *uio;
458101941Srwatson	struct ucred *active_cred;
45983366Sjulian	struct thread *td;
46045311Sdt	int flags;
46113675Sdyson{
462109153Sdillon	struct pipe *rpipe = fp->f_data;
46347748Salc	int error;
46413675Sdyson	int nread = 0;
46518863Sdyson	u_int size;
46613675Sdyson
46791362Salfred	PIPE_LOCK(rpipe);
46813675Sdyson	++rpipe->pipe_busy;
46947748Salc	error = pipelock(rpipe, 1);
47047748Salc	if (error)
47147748Salc		goto unlocked_error;
47247748Salc
473101768Srwatson#ifdef MAC
474102115Srwatson	error = mac_check_pipe_read(active_cred, rpipe);
475101768Srwatson	if (error)
476101768Srwatson		goto locked_error;
477101768Srwatson#endif
478101768Srwatson
47913675Sdyson	while (uio->uio_resid) {
48013907Sdyson		/*
48113907Sdyson		 * normal pipe buffer receive
48213907Sdyson		 */
48313675Sdyson		if (rpipe->pipe_buffer.cnt > 0) {
48418863Sdyson			size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
48513675Sdyson			if (size > rpipe->pipe_buffer.cnt)
48613675Sdyson				size = rpipe->pipe_buffer.cnt;
48718863Sdyson			if (size > (u_int) uio->uio_resid)
48818863Sdyson				size = (u_int) uio->uio_resid;
48947748Salc
49091362Salfred			PIPE_UNLOCK(rpipe);
491116127Smux			error = uiomove(
492116127Smux			    &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
493116127Smux			    size, uio);
49491362Salfred			PIPE_LOCK(rpipe);
49576760Salfred			if (error)
49613675Sdyson				break;
49776760Salfred
49813675Sdyson			rpipe->pipe_buffer.out += size;
49913675Sdyson			if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
50013675Sdyson				rpipe->pipe_buffer.out = 0;
50113675Sdyson
50213675Sdyson			rpipe->pipe_buffer.cnt -= size;
50347748Salc
50447748Salc			/*
50547748Salc			 * If there is no more to read in the pipe, reset
50647748Salc			 * its pointers to the beginning.  This improves
50747748Salc			 * cache hit stats.
50847748Salc			 */
50947748Salc			if (rpipe->pipe_buffer.cnt == 0) {
51047748Salc				rpipe->pipe_buffer.in = 0;
51147748Salc				rpipe->pipe_buffer.out = 0;
51247748Salc			}
51313675Sdyson			nread += size;
51414037Sdyson#ifndef PIPE_NODIRECT
51513907Sdyson		/*
51613907Sdyson		 * Direct copy, bypassing a kernel buffer.
51713907Sdyson		 */
51813907Sdyson		} else if ((size = rpipe->pipe_map.cnt) &&
51947748Salc			   (rpipe->pipe_state & PIPE_DIRECTW)) {
52047748Salc			caddr_t	va;
52118863Sdyson			if (size > (u_int) uio->uio_resid)
52218863Sdyson				size = (u_int) uio->uio_resid;
52347748Salc
52476760Salfred			va = (caddr_t) rpipe->pipe_map.kva +
52576760Salfred			    rpipe->pipe_map.pos;
52691362Salfred			PIPE_UNLOCK(rpipe);
52747748Salc			error = uiomove(va, size, uio);
52891362Salfred			PIPE_LOCK(rpipe);
52913907Sdyson			if (error)
53013907Sdyson				break;
53113907Sdyson			nread += size;
53213907Sdyson			rpipe->pipe_map.pos += size;
53313907Sdyson			rpipe->pipe_map.cnt -= size;
53413907Sdyson			if (rpipe->pipe_map.cnt == 0) {
53513907Sdyson				rpipe->pipe_state &= ~PIPE_DIRECTW;
53613907Sdyson				wakeup(rpipe);
53713907Sdyson			}
53814037Sdyson#endif
53913675Sdyson		} else {
54013675Sdyson			/*
54113675Sdyson			 * detect EOF condition
54276760Salfred			 * read returns 0 on EOF, no need to set error
54313675Sdyson			 */
54476760Salfred			if (rpipe->pipe_state & PIPE_EOF)
54513675Sdyson				break;
54643623Sdillon
54713675Sdyson			/*
54813675Sdyson			 * If the "write-side" has been blocked, wake it up now.
54913675Sdyson			 */
55013675Sdyson			if (rpipe->pipe_state & PIPE_WANTW) {
55113675Sdyson				rpipe->pipe_state &= ~PIPE_WANTW;
55213675Sdyson				wakeup(rpipe);
55313675Sdyson			}
55443623Sdillon
55543623Sdillon			/*
55647748Salc			 * Break if some data was read.
55743623Sdillon			 */
55847748Salc			if (nread > 0)
55913675Sdyson				break;
56016960Sdyson
56143623Sdillon			/*
562116127Smux			 * Unlock the pipe buffer for our remaining processing.
563116127Smux			 * We will either break out with an error or we will
564116127Smux			 * sleep and relock to loop.
56543623Sdillon			 */
56647748Salc			pipeunlock(rpipe);
56743623Sdillon
56813675Sdyson			/*
56947748Salc			 * Handle non-blocking mode operation or
57047748Salc			 * wait for more data.
57113675Sdyson			 */
57276760Salfred			if (fp->f_flag & FNONBLOCK) {
57347748Salc				error = EAGAIN;
57476760Salfred			} else {
57547748Salc				rpipe->pipe_state |= PIPE_WANTR;
57691362Salfred				if ((error = msleep(rpipe, PIPE_MTX(rpipe),
57791362Salfred				    PRIBIO | PCATCH,
57877140Salfred				    "piperd", 0)) == 0)
57947748Salc					error = pipelock(rpipe, 1);
58013675Sdyson			}
58147748Salc			if (error)
58247748Salc				goto unlocked_error;
58313675Sdyson		}
58413675Sdyson	}
585101768Srwatson#ifdef MAC
586101768Srwatsonlocked_error:
587101768Srwatson#endif
58847748Salc	pipeunlock(rpipe);
58913675Sdyson
59091362Salfred	/* XXX: should probably do this before getting any locks. */
59124101Sbde	if (error == 0)
59255112Sbde		vfs_timestamp(&rpipe->pipe_atime);
59347748Salcunlocked_error:
59447748Salc	--rpipe->pipe_busy;
59513913Sdyson
59647748Salc	/*
59747748Salc	 * PIPE_WANT processing only makes sense if pipe_busy is 0.
59847748Salc	 */
59913675Sdyson	if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
60013675Sdyson		rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
60113675Sdyson		wakeup(rpipe);
60213675Sdyson	} else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
60313675Sdyson		/*
60447748Salc		 * Handle write blocking hysteresis.
60513675Sdyson		 */
60613675Sdyson		if (rpipe->pipe_state & PIPE_WANTW) {
60713675Sdyson			rpipe->pipe_state &= ~PIPE_WANTW;
60813675Sdyson			wakeup(rpipe);
60913675Sdyson		}
61013675Sdyson	}
61114037Sdyson
61214802Sdyson	if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF)
61314037Sdyson		pipeselwakeup(rpipe);
61414037Sdyson
61591362Salfred	PIPE_UNLOCK(rpipe);
61676760Salfred	return (error);
61713675Sdyson}
61813675Sdyson
61914037Sdyson#ifndef PIPE_NODIRECT
62013907Sdyson/*
62113907Sdyson * Map the sending processes' buffer into kernel space and wire it.
62213907Sdyson * This is similar to a physical write operation.
62313907Sdyson */
62413675Sdysonstatic int
62513907Sdysonpipe_build_write_buffer(wpipe, uio)
62613907Sdyson	struct pipe *wpipe;
62713675Sdyson	struct uio *uio;
62813675Sdyson{
62918863Sdyson	u_int size;
63094566Stmm	int i;
631112569Sjake	vm_offset_t addr, endaddr;
632112569Sjake	vm_paddr_t paddr;
63313907Sdyson
63479224Sdillon	GIANT_REQUIRED;
63591412Salfred	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
63679224Sdillon
63718863Sdyson	size = (u_int) uio->uio_iov->iov_len;
63813907Sdyson	if (size > wpipe->pipe_buffer.size)
63913907Sdyson		size = wpipe->pipe_buffer.size;
64013907Sdyson
64140286Sdg	endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
64276760Salfred	addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
64376760Salfred	for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
64494566Stmm		vm_page_t m;
64594566Stmm
64699899Salc		/*
64799899Salc		 * vm_fault_quick() can sleep.  Consequently,
64899899Salc		 * vm_page_lock_queue() and vm_page_unlock_queue()
64999899Salc		 * should not be performed outside of this loop.
65099899Salc		 */
65151474Sdillon		if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 ||
65294608Stmm		    (paddr = pmap_extract(vmspace_pmap(curproc->p_vmspace),
65394608Stmm		     addr)) == 0) {
65413907Sdyson			int j;
65576760Salfred
65699899Salc			vm_page_lock_queues();
65776760Salfred			for (j = 0; j < i; j++)
65840700Sdg				vm_page_unwire(wpipe->pipe_map.ms[j], 1);
65999899Salc			vm_page_unlock_queues();
66076760Salfred			return (EFAULT);
66113907Sdyson		}
66213907Sdyson
66394566Stmm		m = PHYS_TO_VM_PAGE(paddr);
66499899Salc		vm_page_lock_queues();
66513907Sdyson		vm_page_wire(m);
66699899Salc		vm_page_unlock_queues();
66713907Sdyson		wpipe->pipe_map.ms[i] = m;
66813907Sdyson	}
66913907Sdyson
67013907Sdyson/*
67113907Sdyson * set up the control block
67213907Sdyson */
67313907Sdyson	wpipe->pipe_map.npages = i;
67476760Salfred	wpipe->pipe_map.pos =
67576760Salfred	    ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
67613907Sdyson	wpipe->pipe_map.cnt = size;
67713907Sdyson
67813907Sdyson/*
67913907Sdyson * and map the buffer
68013907Sdyson */
68113907Sdyson	if (wpipe->pipe_map.kva == 0) {
68213912Sdyson		/*
68313912Sdyson		 * We need to allocate space for an extra page because the
68413912Sdyson		 * address range might (will) span pages at times.
68513912Sdyson		 */
68613907Sdyson		wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map,
68713912Sdyson			wpipe->pipe_buffer.size + PAGE_SIZE);
688110816Salc		atomic_add_int(&amountpipekva,
689110816Salc		    wpipe->pipe_buffer.size + PAGE_SIZE);
69013907Sdyson	}
69113907Sdyson	pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
69213907Sdyson		wpipe->pipe_map.npages);
69313907Sdyson
69413907Sdyson/*
69513907Sdyson * and update the uio data
69613907Sdyson */
69713907Sdyson
69813907Sdyson	uio->uio_iov->iov_len -= size;
699104908Smike	uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size;
70013907Sdyson	if (uio->uio_iov->iov_len == 0)
70113907Sdyson		uio->uio_iov++;
70213907Sdyson	uio->uio_resid -= size;
70313907Sdyson	uio->uio_offset += size;
70476760Salfred	return (0);
70513907Sdyson}
70613907Sdyson
70713907Sdyson/*
70813907Sdyson * unmap and unwire the process buffer
70913907Sdyson */
71013907Sdysonstatic void
71113907Sdysonpipe_destroy_write_buffer(wpipe)
71276760Salfred	struct pipe *wpipe;
71313907Sdyson{
71413907Sdyson	int i;
71576364Salfred
71679224Sdillon	GIANT_REQUIRED;
71791412Salfred	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
71879224Sdillon
71917163Sdyson	if (wpipe->pipe_map.kva) {
72017163Sdyson		pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages);
72113907Sdyson
72213907Sdyson		if (amountpipekva > MAXPIPEKVA) {
72313907Sdyson			vm_offset_t kva = wpipe->pipe_map.kva;
72413907Sdyson			wpipe->pipe_map.kva = 0;
72513907Sdyson			kmem_free(kernel_map, kva,
72613912Sdyson				wpipe->pipe_buffer.size + PAGE_SIZE);
727110816Salc			atomic_subtract_int(&amountpipekva,
728110816Salc			    wpipe->pipe_buffer.size + PAGE_SIZE);
72913907Sdyson		}
73013907Sdyson	}
73199899Salc	vm_page_lock_queues();
73276760Salfred	for (i = 0; i < wpipe->pipe_map.npages; i++)
73340700Sdg		vm_page_unwire(wpipe->pipe_map.ms[i], 1);
73499899Salc	vm_page_unlock_queues();
73591653Stanimura	wpipe->pipe_map.npages = 0;
73613907Sdyson}
73713907Sdyson
73813907Sdyson/*
73913907Sdyson * In the case of a signal, the writing process might go away.  This
74013907Sdyson * code copies the data into the circular buffer so that the source
74113907Sdyson * pages can be freed without loss of data.
74213907Sdyson */
74313907Sdysonstatic void
74413907Sdysonpipe_clone_write_buffer(wpipe)
74576364Salfred	struct pipe *wpipe;
74613907Sdyson{
74713907Sdyson	int size;
74813907Sdyson	int pos;
74913907Sdyson
75091362Salfred	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
75113907Sdyson	size = wpipe->pipe_map.cnt;
75213907Sdyson	pos = wpipe->pipe_map.pos;
75313907Sdyson
75413907Sdyson	wpipe->pipe_buffer.in = size;
75513907Sdyson	wpipe->pipe_buffer.out = 0;
75613907Sdyson	wpipe->pipe_buffer.cnt = size;
75713907Sdyson	wpipe->pipe_state &= ~PIPE_DIRECTW;
75813907Sdyson
75991412Salfred	PIPE_GET_GIANT(wpipe);
76092959Salfred	bcopy((caddr_t) wpipe->pipe_map.kva + pos,
761100527Salfred	    wpipe->pipe_buffer.buffer, size);
76213907Sdyson	pipe_destroy_write_buffer(wpipe);
76391412Salfred	PIPE_DROP_GIANT(wpipe);
76413907Sdyson}
76513907Sdyson
76613907Sdyson/*
76713907Sdyson * This implements the pipe buffer write mechanism.  Note that only
76813907Sdyson * a direct write OR a normal pipe write can be pending at any given time.
76913907Sdyson * If there are any characters in the pipe buffer, the direct write will
77013907Sdyson * be deferred until the receiving process grabs all of the bytes from
77113907Sdyson * the pipe buffer.  Then the direct mapping write is set-up.
77213907Sdyson */
77313907Sdysonstatic int
77413907Sdysonpipe_direct_write(wpipe, uio)
77513907Sdyson	struct pipe *wpipe;
77613907Sdyson	struct uio *uio;
77713907Sdyson{
77813907Sdyson	int error;
77976364Salfred
78013951Sdysonretry:
78191362Salfred	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
78213907Sdyson	while (wpipe->pipe_state & PIPE_DIRECTW) {
78376760Salfred		if (wpipe->pipe_state & PIPE_WANTR) {
78413951Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
78513951Sdyson			wakeup(wpipe);
78613951Sdyson		}
78713992Sdyson		wpipe->pipe_state |= PIPE_WANTW;
78891362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe),
78991362Salfred		    PRIBIO | PCATCH, "pipdww", 0);
79014802Sdyson		if (error)
79113907Sdyson			goto error1;
79214802Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
79314802Sdyson			error = EPIPE;
79414802Sdyson			goto error1;
79514802Sdyson		}
79613907Sdyson	}
79713907Sdyson	wpipe->pipe_map.cnt = 0;	/* transfer not ready yet */
79813951Sdyson	if (wpipe->pipe_buffer.cnt > 0) {
79976760Salfred		if (wpipe->pipe_state & PIPE_WANTR) {
80013951Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
80113951Sdyson			wakeup(wpipe);
80213951Sdyson		}
80313951Sdyson
80413992Sdyson		wpipe->pipe_state |= PIPE_WANTW;
80591362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe),
80691362Salfred		    PRIBIO | PCATCH, "pipdwc", 0);
80714802Sdyson		if (error)
80813907Sdyson			goto error1;
80914802Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
81014802Sdyson			error = EPIPE;
81114802Sdyson			goto error1;
81213907Sdyson		}
81313951Sdyson		goto retry;
81413907Sdyson	}
81513907Sdyson
81613951Sdyson	wpipe->pipe_state |= PIPE_DIRECTW;
81713951Sdyson
81892305Salfred	pipelock(wpipe, 0);
81991362Salfred	PIPE_GET_GIANT(wpipe);
82013907Sdyson	error = pipe_build_write_buffer(wpipe, uio);
82191362Salfred	PIPE_DROP_GIANT(wpipe);
82292305Salfred	pipeunlock(wpipe);
82313907Sdyson	if (error) {
82413907Sdyson		wpipe->pipe_state &= ~PIPE_DIRECTW;
82513907Sdyson		goto error1;
82613907Sdyson	}
82713907Sdyson
82813907Sdyson	error = 0;
82913907Sdyson	while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
83013907Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
83113907Sdyson			pipelock(wpipe, 0);
83291362Salfred			PIPE_GET_GIANT(wpipe);
83313907Sdyson			pipe_destroy_write_buffer(wpipe);
83491362Salfred			PIPE_DROP_GIANT(wpipe);
835112981Shsu			pipeselwakeup(wpipe);
83613907Sdyson			pipeunlock(wpipe);
83714802Sdyson			error = EPIPE;
83814802Sdyson			goto error1;
83913907Sdyson		}
84013992Sdyson		if (wpipe->pipe_state & PIPE_WANTR) {
84113992Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
84213992Sdyson			wakeup(wpipe);
84313992Sdyson		}
84414037Sdyson		pipeselwakeup(wpipe);
84591362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
84691362Salfred		    "pipdwt", 0);
84713907Sdyson	}
84813907Sdyson
84913907Sdyson	pipelock(wpipe,0);
85013907Sdyson	if (wpipe->pipe_state & PIPE_DIRECTW) {
85113907Sdyson		/*
85213907Sdyson		 * this bit of trickery substitutes a kernel buffer for
85313907Sdyson		 * the process that might be going away.
85413907Sdyson		 */
85513907Sdyson		pipe_clone_write_buffer(wpipe);
85613907Sdyson	} else {
85791412Salfred		PIPE_GET_GIANT(wpipe);
85813907Sdyson		pipe_destroy_write_buffer(wpipe);
85991412Salfred		PIPE_DROP_GIANT(wpipe);
86013907Sdyson	}
86113907Sdyson	pipeunlock(wpipe);
86276760Salfred	return (error);
86313907Sdyson
86413907Sdysonerror1:
86513907Sdyson	wakeup(wpipe);
86676760Salfred	return (error);
86713907Sdyson}
86814037Sdyson#endif
86913907Sdyson
87016960Sdysonstatic int
871101941Srwatsonpipe_write(fp, uio, active_cred, flags, td)
87216960Sdyson	struct file *fp;
87313907Sdyson	struct uio *uio;
874101941Srwatson	struct ucred *active_cred;
87583366Sjulian	struct thread *td;
87645311Sdt	int flags;
87713907Sdyson{
87813675Sdyson	int error = 0;
87913913Sdyson	int orig_resid;
88016960Sdyson	struct pipe *wpipe, *rpipe;
88116960Sdyson
882109153Sdillon	rpipe = fp->f_data;
88316960Sdyson	wpipe = rpipe->pipe_peer;
88416960Sdyson
88591395Salfred	PIPE_LOCK(rpipe);
88613675Sdyson	/*
88713675Sdyson	 * detect loss of pipe read side, issue SIGPIPE if lost.
88813675Sdyson	 */
88916960Sdyson	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
89091395Salfred		PIPE_UNLOCK(rpipe);
89176760Salfred		return (EPIPE);
89213675Sdyson	}
893101768Srwatson#ifdef MAC
894102115Srwatson	error = mac_check_pipe_write(active_cred, wpipe);
895101768Srwatson	if (error) {
896101768Srwatson		PIPE_UNLOCK(rpipe);
897101768Srwatson		return (error);
898101768Srwatson	}
899101768Srwatson#endif
90077676Sdillon	++wpipe->pipe_busy;
90113675Sdyson
90217163Sdyson	/*
90317163Sdyson	 * If it is advantageous to resize the pipe buffer, do
90417163Sdyson	 * so.
90517163Sdyson	 */
90617163Sdyson	if ((uio->uio_resid > PIPE_SIZE) &&
90717163Sdyson		(nbigpipe < LIMITBIGPIPES) &&
90817163Sdyson		(wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
90917163Sdyson		(wpipe->pipe_buffer.size <= PIPE_SIZE) &&
91017163Sdyson		(wpipe->pipe_buffer.cnt == 0)) {
91117163Sdyson
912105009Salfred		if ((error = pipelock(wpipe, 1)) == 0) {
91392305Salfred			PIPE_GET_GIANT(wpipe);
91476364Salfred			if (pipespace(wpipe, BIG_PIPE_SIZE) == 0)
91576364Salfred				nbigpipe++;
91692305Salfred			PIPE_DROP_GIANT(wpipe);
91713907Sdyson			pipeunlock(wpipe);
91813907Sdyson		}
91913907Sdyson	}
92077676Sdillon
92177676Sdillon	/*
92277676Sdillon	 * If an early error occured unbusy and return, waking up any pending
92377676Sdillon	 * readers.
92477676Sdillon	 */
92577676Sdillon	if (error) {
92677676Sdillon		--wpipe->pipe_busy;
92777676Sdillon		if ((wpipe->pipe_busy == 0) &&
92877676Sdillon		    (wpipe->pipe_state & PIPE_WANT)) {
92977676Sdillon			wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
93077676Sdillon			wakeup(wpipe);
93177676Sdillon		}
93291395Salfred		PIPE_UNLOCK(rpipe);
93377676Sdillon		return(error);
93477676Sdillon	}
93576364Salfred
93613913Sdyson	orig_resid = uio->uio_resid;
93777676Sdillon
93813675Sdyson	while (uio->uio_resid) {
93913907Sdyson		int space;
94076760Salfred
94114037Sdyson#ifndef PIPE_NODIRECT
94213907Sdyson		/*
94313907Sdyson		 * If the transfer is large, we can gain performance if
94413907Sdyson		 * we do process-to-process copies directly.
94516416Sdyson		 * If the write is non-blocking, we don't use the
94616416Sdyson		 * direct write mechanism.
94758505Sdillon		 *
94858505Sdillon		 * The direct write mechanism will detect the reader going
94958505Sdillon		 * away on us.
95013907Sdyson		 */
95117163Sdyson		if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
95217163Sdyson		    (fp->f_flag & FNONBLOCK) == 0 &&
95317163Sdyson			(wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) &&
95413907Sdyson			(uio->uio_iov->iov_len >= PIPE_MINDIRECT)) {
955105009Salfred			error = pipe_direct_write(wpipe, uio);
95676760Salfred			if (error)
95713907Sdyson				break;
95813907Sdyson			continue;
95991362Salfred		}
96014037Sdyson#endif
96113907Sdyson
96213907Sdyson		/*
96313907Sdyson		 * Pipe buffered writes cannot be coincidental with
96413907Sdyson		 * direct writes.  We wait until the currently executing
96513907Sdyson		 * direct write is completed before we start filling the
96658505Sdillon		 * pipe buffer.  We break out if a signal occurs or the
96758505Sdillon		 * reader goes away.
96813907Sdyson		 */
96913907Sdyson	retrywrite:
97013907Sdyson		while (wpipe->pipe_state & PIPE_DIRECTW) {
97113992Sdyson			if (wpipe->pipe_state & PIPE_WANTR) {
97213992Sdyson				wpipe->pipe_state &= ~PIPE_WANTR;
97313992Sdyson				wakeup(wpipe);
97413992Sdyson			}
97591395Salfred			error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH,
97691362Salfred			    "pipbww", 0);
97758505Sdillon			if (wpipe->pipe_state & PIPE_EOF)
97858505Sdillon				break;
97913907Sdyson			if (error)
98013907Sdyson				break;
98113907Sdyson		}
98258505Sdillon		if (wpipe->pipe_state & PIPE_EOF) {
98358505Sdillon			error = EPIPE;
98458505Sdillon			break;
98558505Sdillon		}
98613907Sdyson
98713907Sdyson		space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
98814644Sdyson
98914644Sdyson		/* Writes of size <= PIPE_BUF must be atomic. */
99013913Sdyson		if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF))
99113913Sdyson			space = 0;
99213907Sdyson
99317163Sdyson		if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) {
99413907Sdyson			if ((error = pipelock(wpipe,1)) == 0) {
99554534Stegge				int size;	/* Transfer size */
99654534Stegge				int segsize;	/* first segment to transfer */
99776760Salfred
99813907Sdyson				/*
99913907Sdyson				 * It is possible for a direct write to
100013907Sdyson				 * slip in on us... handle it here...
100113907Sdyson				 */
100213907Sdyson				if (wpipe->pipe_state & PIPE_DIRECTW) {
100313907Sdyson					pipeunlock(wpipe);
100413907Sdyson					goto retrywrite;
100513907Sdyson				}
100654534Stegge				/*
100754534Stegge				 * If a process blocked in uiomove, our
100854534Stegge				 * value for space might be bad.
100958505Sdillon				 *
101058505Sdillon				 * XXX will we be ok if the reader has gone
101158505Sdillon				 * away here?
101254534Stegge				 */
101354534Stegge				if (space > wpipe->pipe_buffer.size -
101454534Stegge				    wpipe->pipe_buffer.cnt) {
101554534Stegge					pipeunlock(wpipe);
101654534Stegge					goto retrywrite;
101754534Stegge				}
101854534Stegge
101954534Stegge				/*
102054534Stegge				 * Transfer size is minimum of uio transfer
102154534Stegge				 * and free space in pipe buffer.
102254534Stegge				 */
102354534Stegge				if (space > uio->uio_resid)
102454534Stegge					size = uio->uio_resid;
102554534Stegge				else
102654534Stegge					size = space;
102754534Stegge				/*
102854534Stegge				 * First segment to transfer is minimum of
102954534Stegge				 * transfer size and contiguous space in
103054534Stegge				 * pipe buffer.  If first segment to transfer
103154534Stegge				 * is less than the transfer size, we've got
103254534Stegge				 * a wraparound in the buffer.
103354534Stegge				 */
103454534Stegge				segsize = wpipe->pipe_buffer.size -
103554534Stegge					wpipe->pipe_buffer.in;
103654534Stegge				if (segsize > size)
103754534Stegge					segsize = size;
103854534Stegge
103954534Stegge				/* Transfer first segment */
104054534Stegge
104191395Salfred				PIPE_UNLOCK(rpipe);
104254534Stegge				error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
104354534Stegge						segsize, uio);
104491395Salfred				PIPE_LOCK(rpipe);
104554534Stegge
104654534Stegge				if (error == 0 && segsize < size) {
104754534Stegge					/*
104854534Stegge					 * Transfer remaining part now, to
104954534Stegge					 * support atomic writes.  Wraparound
105054534Stegge					 * happened.
105154534Stegge					 */
105254534Stegge					if (wpipe->pipe_buffer.in + segsize !=
105354534Stegge					    wpipe->pipe_buffer.size)
1054116127Smux						panic("Expected pipe buffer "
1055116127Smux						    "wraparound disappeared");
105654534Stegge
105791395Salfred					PIPE_UNLOCK(rpipe);
1058116127Smux					error = uiomove(
1059116127Smux					    &wpipe->pipe_buffer.buffer[0],
1060116127Smux				    	    size - segsize, uio);
106191395Salfred					PIPE_LOCK(rpipe);
106254534Stegge				}
106354534Stegge				if (error == 0) {
106454534Stegge					wpipe->pipe_buffer.in += size;
106554534Stegge					if (wpipe->pipe_buffer.in >=
106654534Stegge					    wpipe->pipe_buffer.size) {
1067116127Smux						if (wpipe->pipe_buffer.in !=
1068116127Smux						    size - segsize +
1069116127Smux						    wpipe->pipe_buffer.size)
1070116127Smux							panic("Expected "
1071116127Smux							    "wraparound bad");
1072116127Smux						wpipe->pipe_buffer.in = size -
1073116127Smux						    segsize;
107454534Stegge					}
107554534Stegge
107654534Stegge					wpipe->pipe_buffer.cnt += size;
1077116127Smux					if (wpipe->pipe_buffer.cnt >
1078116127Smux					    wpipe->pipe_buffer.size)
107954534Stegge						panic("Pipe buffer overflow");
108054534Stegge
108154534Stegge				}
108213675Sdyson				pipeunlock(wpipe);
108313675Sdyson			}
108413675Sdyson			if (error)
108513675Sdyson				break;
108613675Sdyson
108713675Sdyson		} else {
108813675Sdyson			/*
108913675Sdyson			 * If the "read-side" has been blocked, wake it up now.
109013675Sdyson			 */
109113675Sdyson			if (wpipe->pipe_state & PIPE_WANTR) {
109213675Sdyson				wpipe->pipe_state &= ~PIPE_WANTR;
109313675Sdyson				wakeup(wpipe);
109413675Sdyson			}
109514037Sdyson
109613675Sdyson			/*
109713675Sdyson			 * don't block on non-blocking I/O
109813675Sdyson			 */
109916960Sdyson			if (fp->f_flag & FNONBLOCK) {
110013907Sdyson				error = EAGAIN;
110113675Sdyson				break;
110213675Sdyson			}
110313907Sdyson
110414037Sdyson			/*
110514037Sdyson			 * We have no more space and have something to offer,
110629356Speter			 * wake up select/poll.
110714037Sdyson			 */
110814037Sdyson			pipeselwakeup(wpipe);
110914037Sdyson
111013675Sdyson			wpipe->pipe_state |= PIPE_WANTW;
111191395Salfred			error = msleep(wpipe, PIPE_MTX(rpipe),
111291362Salfred			    PRIBIO | PCATCH, "pipewr", 0);
111376760Salfred			if (error != 0)
111413675Sdyson				break;
111513675Sdyson			/*
111613675Sdyson			 * If read side wants to go away, we just issue a signal
111713675Sdyson			 * to ourselves.
111813675Sdyson			 */
111913675Sdyson			if (wpipe->pipe_state & PIPE_EOF) {
112013774Sdyson				error = EPIPE;
112113907Sdyson				break;
112213675Sdyson			}
112313675Sdyson		}
112413675Sdyson	}
112513675Sdyson
112614644Sdyson	--wpipe->pipe_busy;
112777676Sdillon
112876760Salfred	if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
112976760Salfred		wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
113013675Sdyson		wakeup(wpipe);
113113675Sdyson	} else if (wpipe->pipe_buffer.cnt > 0) {
113213675Sdyson		/*
113313675Sdyson		 * If we have put any characters in the buffer, we wake up
113413675Sdyson		 * the reader.
113513675Sdyson		 */
113613675Sdyson		if (wpipe->pipe_state & PIPE_WANTR) {
113713675Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
113813675Sdyson			wakeup(wpipe);
113913675Sdyson		}
114013675Sdyson	}
114113909Sdyson
114213909Sdyson	/*
114313909Sdyson	 * Don't return EPIPE if I/O was successful
114413909Sdyson	 */
114513907Sdyson	if ((wpipe->pipe_buffer.cnt == 0) &&
114677676Sdillon	    (uio->uio_resid == 0) &&
114777676Sdillon	    (error == EPIPE)) {
114813907Sdyson		error = 0;
114977676Sdillon	}
115013913Sdyson
115124101Sbde	if (error == 0)
115255112Sbde		vfs_timestamp(&wpipe->pipe_mtime);
115324101Sbde
115414037Sdyson	/*
115514037Sdyson	 * We have something to offer,
115629356Speter	 * wake up select/poll.
115714037Sdyson	 */
115814177Sdyson	if (wpipe->pipe_buffer.cnt)
115914037Sdyson		pipeselwakeup(wpipe);
116013907Sdyson
116191395Salfred	PIPE_UNLOCK(rpipe);
116276760Salfred	return (error);
116313675Sdyson}
116413675Sdyson
116513675Sdyson/*
116613675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets.
116713675Sdyson */
1168104094Sphkstatic int
1169102003Srwatsonpipe_ioctl(fp, cmd, data, active_cred, td)
117013675Sdyson	struct file *fp;
117136735Sdfr	u_long cmd;
117299009Salfred	void *data;
1173102003Srwatson	struct ucred *active_cred;
117483366Sjulian	struct thread *td;
117513675Sdyson{
1176109153Sdillon	struct pipe *mpipe = fp->f_data;
1177101768Srwatson#ifdef MAC
1178101768Srwatson	int error;
1179104269Srwatson#endif
118013675Sdyson
1181104269Srwatson	PIPE_LOCK(mpipe);
1182104269Srwatson
1183104269Srwatson#ifdef MAC
1184102003Srwatson	error = mac_check_pipe_ioctl(active_cred, mpipe, cmd, data);
1185101768Srwatson	if (error)
1186101768Srwatson		return (error);
1187101768Srwatson#endif
1188101768Srwatson
118913675Sdyson	switch (cmd) {
119013675Sdyson
119113675Sdyson	case FIONBIO:
1192104269Srwatson		PIPE_UNLOCK(mpipe);
119313675Sdyson		return (0);
119413675Sdyson
119513675Sdyson	case FIOASYNC:
119613675Sdyson		if (*(int *)data) {
119713675Sdyson			mpipe->pipe_state |= PIPE_ASYNC;
119813675Sdyson		} else {
119913675Sdyson			mpipe->pipe_state &= ~PIPE_ASYNC;
120013675Sdyson		}
120191362Salfred		PIPE_UNLOCK(mpipe);
120213675Sdyson		return (0);
120313675Sdyson
120413675Sdyson	case FIONREAD:
120514037Sdyson		if (mpipe->pipe_state & PIPE_DIRECTW)
120614037Sdyson			*(int *)data = mpipe->pipe_map.cnt;
120714037Sdyson		else
120814037Sdyson			*(int *)data = mpipe->pipe_buffer.cnt;
120991362Salfred		PIPE_UNLOCK(mpipe);
121013675Sdyson		return (0);
121113675Sdyson
121241086Struckman	case FIOSETOWN:
1213104269Srwatson		PIPE_UNLOCK(mpipe);
121441086Struckman		return (fsetown(*(int *)data, &mpipe->pipe_sigio));
121541086Struckman
121641086Struckman	case FIOGETOWN:
1217104269Srwatson		PIPE_UNLOCK(mpipe);
1218104393Struckman		*(int *)data = fgetown(&mpipe->pipe_sigio);
121913675Sdyson		return (0);
122013675Sdyson
122141086Struckman	/* This is deprecated, FIOSETOWN should be used instead. */
122241086Struckman	case TIOCSPGRP:
1223104269Srwatson		PIPE_UNLOCK(mpipe);
122441086Struckman		return (fsetown(-(*(int *)data), &mpipe->pipe_sigio));
122541086Struckman
122641086Struckman	/* This is deprecated, FIOGETOWN should be used instead. */
122718863Sdyson	case TIOCGPGRP:
1228104269Srwatson		PIPE_UNLOCK(mpipe);
1229104393Struckman		*(int *)data = -fgetown(&mpipe->pipe_sigio);
123013675Sdyson		return (0);
123113675Sdyson
123213675Sdyson	}
1233104269Srwatson	PIPE_UNLOCK(mpipe);
123417124Sbde	return (ENOTTY);
123513675Sdyson}
123613675Sdyson
1237104094Sphkstatic int
1238101983Srwatsonpipe_poll(fp, events, active_cred, td)
123913675Sdyson	struct file *fp;
124029356Speter	int events;
1241101983Srwatson	struct ucred *active_cred;
124283366Sjulian	struct thread *td;
124313675Sdyson{
1244109153Sdillon	struct pipe *rpipe = fp->f_data;
124513675Sdyson	struct pipe *wpipe;
124629356Speter	int revents = 0;
1247101768Srwatson#ifdef MAC
1248101768Srwatson	int error;
1249101768Srwatson#endif
125013675Sdyson
125113675Sdyson	wpipe = rpipe->pipe_peer;
125291362Salfred	PIPE_LOCK(rpipe);
1253101768Srwatson#ifdef MAC
1254102115Srwatson	error = mac_check_pipe_poll(active_cred, rpipe);
1255101768Srwatson	if (error)
1256101768Srwatson		goto locked_error;
1257101768Srwatson#endif
125829356Speter	if (events & (POLLIN | POLLRDNORM))
125929356Speter		if ((rpipe->pipe_state & PIPE_DIRECTW) ||
126029356Speter		    (rpipe->pipe_buffer.cnt > 0) ||
126129356Speter		    (rpipe->pipe_state & PIPE_EOF))
126229356Speter			revents |= events & (POLLIN | POLLRDNORM);
126313675Sdyson
126429356Speter	if (events & (POLLOUT | POLLWRNORM))
126529356Speter		if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) ||
126643311Sdillon		    (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
126743311Sdillon		     (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF))
126829356Speter			revents |= events & (POLLOUT | POLLWRNORM);
126913675Sdyson
127029356Speter	if ((rpipe->pipe_state & PIPE_EOF) ||
127129356Speter	    (wpipe == NULL) ||
127229356Speter	    (wpipe->pipe_state & PIPE_EOF))
127329356Speter		revents |= POLLHUP;
127429356Speter
127529356Speter	if (revents == 0) {
127629356Speter		if (events & (POLLIN | POLLRDNORM)) {
127783805Sjhb			selrecord(td, &rpipe->pipe_sel);
127829356Speter			rpipe->pipe_state |= PIPE_SEL;
127913675Sdyson		}
128013675Sdyson
128129356Speter		if (events & (POLLOUT | POLLWRNORM)) {
128283805Sjhb			selrecord(td, &wpipe->pipe_sel);
128330164Speter			wpipe->pipe_state |= PIPE_SEL;
128413907Sdyson		}
128513675Sdyson	}
1286101768Srwatson#ifdef MAC
1287101768Srwatsonlocked_error:
1288101768Srwatson#endif
128991362Salfred	PIPE_UNLOCK(rpipe);
129029356Speter
129129356Speter	return (revents);
129213675Sdyson}
129313675Sdyson
129498989Salfred/*
129598989Salfred * We shouldn't need locks here as we're doing a read and this should
129698989Salfred * be a natural race.
129798989Salfred */
129852983Speterstatic int
1299101983Srwatsonpipe_stat(fp, ub, active_cred, td)
130052983Speter	struct file *fp;
130152983Speter	struct stat *ub;
1302101983Srwatson	struct ucred *active_cred;
130383366Sjulian	struct thread *td;
130413675Sdyson{
1305109153Sdillon	struct pipe *pipe = fp->f_data;
1306101768Srwatson#ifdef MAC
1307101768Srwatson	int error;
130852983Speter
1309104269Srwatson	PIPE_LOCK(pipe);
1310102115Srwatson	error = mac_check_pipe_stat(active_cred, pipe);
1311104269Srwatson	PIPE_UNLOCK(pipe);
1312101768Srwatson	if (error)
1313101768Srwatson		return (error);
1314101768Srwatson#endif
1315100527Salfred	bzero(ub, sizeof(*ub));
131617124Sbde	ub->st_mode = S_IFIFO;
131713907Sdyson	ub->st_blksize = pipe->pipe_buffer.size;
131813675Sdyson	ub->st_size = pipe->pipe_buffer.cnt;
131913675Sdyson	ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
132034901Sphk	ub->st_atimespec = pipe->pipe_atime;
132134901Sphk	ub->st_mtimespec = pipe->pipe_mtime;
132234901Sphk	ub->st_ctimespec = pipe->pipe_ctime;
132360404Schris	ub->st_uid = fp->f_cred->cr_uid;
132460404Schris	ub->st_gid = fp->f_cred->cr_gid;
132517124Sbde	/*
132660404Schris	 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
132717124Sbde	 * XXX (st_dev, st_ino) should be unique.
132817124Sbde	 */
132976760Salfred	return (0);
133013675Sdyson}
133113675Sdyson
133213675Sdyson/* ARGSUSED */
133313675Sdysonstatic int
133483366Sjulianpipe_close(fp, td)
133513675Sdyson	struct file *fp;
133683366Sjulian	struct thread *td;
133713675Sdyson{
1338109153Sdillon	struct pipe *cpipe = fp->f_data;
133916322Sgpalmer
134049413Sgreen	fp->f_ops = &badfileops;
1341109153Sdillon	fp->f_data = NULL;
134296122Salfred	funsetown(&cpipe->pipe_sigio);
134313675Sdyson	pipeclose(cpipe);
134476760Salfred	return (0);
134513675Sdyson}
134613675Sdyson
134776364Salfredstatic void
134876364Salfredpipe_free_kmem(cpipe)
134976364Salfred	struct pipe *cpipe;
135076364Salfred{
135191412Salfred
135279224Sdillon	GIANT_REQUIRED;
135391412Salfred	KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
135491412Salfred	       ("pipespace: pipe mutex locked"));
135576364Salfred
135676364Salfred	if (cpipe->pipe_buffer.buffer != NULL) {
135776364Salfred		if (cpipe->pipe_buffer.size > PIPE_SIZE)
135876364Salfred			--nbigpipe;
1359110816Salc		atomic_subtract_int(&amountpipekva, cpipe->pipe_buffer.size);
136076364Salfred		kmem_free(kernel_map,
136176364Salfred			(vm_offset_t)cpipe->pipe_buffer.buffer,
136276364Salfred			cpipe->pipe_buffer.size);
136376364Salfred		cpipe->pipe_buffer.buffer = NULL;
136476364Salfred	}
136576364Salfred#ifndef PIPE_NODIRECT
1366102241Sarchie	if (cpipe->pipe_map.kva != 0) {
1367110816Salc		atomic_subtract_int(&amountpipekva,
1368110816Salc		    cpipe->pipe_buffer.size + PAGE_SIZE);
136976364Salfred		kmem_free(kernel_map,
137076364Salfred			cpipe->pipe_map.kva,
137176364Salfred			cpipe->pipe_buffer.size + PAGE_SIZE);
137276364Salfred		cpipe->pipe_map.cnt = 0;
137376364Salfred		cpipe->pipe_map.kva = 0;
137476364Salfred		cpipe->pipe_map.pos = 0;
137576364Salfred		cpipe->pipe_map.npages = 0;
137676364Salfred	}
137776364Salfred#endif
137876364Salfred}
137976364Salfred
138013675Sdyson/*
138113675Sdyson * shutdown the pipe
138213675Sdyson */
138313675Sdysonstatic void
138413675Sdysonpipeclose(cpipe)
138513675Sdyson	struct pipe *cpipe;
138613675Sdyson{
138713907Sdyson	struct pipe *ppipe;
138891968Salfred	int hadpeer;
138976364Salfred
139091968Salfred	if (cpipe == NULL)
139191968Salfred		return;
139291968Salfred
139391968Salfred	hadpeer = 0;
139491968Salfred
139591968Salfred	/* partially created pipes won't have a valid mutex. */
139691968Salfred	if (PIPE_MTX(cpipe) != NULL)
139791362Salfred		PIPE_LOCK(cpipe);
139813907Sdyson
139991968Salfred	pipeselwakeup(cpipe);
140013907Sdyson
140191968Salfred	/*
140291968Salfred	 * If the other side is blocked, wake it up saying that
140391968Salfred	 * we want to close it down.
140491968Salfred	 */
140591968Salfred	while (cpipe->pipe_busy) {
140691968Salfred		wakeup(cpipe);
140791968Salfred		cpipe->pipe_state |= PIPE_WANT | PIPE_EOF;
140891968Salfred		msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
140991968Salfred	}
141013675Sdyson
1411101768Srwatson#ifdef MAC
1412101768Srwatson	if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL)
1413101768Srwatson		mac_destroy_pipe(cpipe);
1414101768Srwatson#endif
1415101768Srwatson
141691968Salfred	/*
141791968Salfred	 * Disconnect from peer
141891968Salfred	 */
141991968Salfred	if ((ppipe = cpipe->pipe_peer) != NULL) {
142091968Salfred		hadpeer++;
142191968Salfred		pipeselwakeup(ppipe);
142213907Sdyson
142391968Salfred		ppipe->pipe_state |= PIPE_EOF;
142491968Salfred		wakeup(ppipe);
142591968Salfred		KNOTE(&ppipe->pipe_sel.si_note, 0);
142691968Salfred		ppipe->pipe_peer = NULL;
142791968Salfred	}
142891968Salfred	/*
142991968Salfred	 * free resources
143091968Salfred	 */
143191968Salfred	if (PIPE_MTX(cpipe) != NULL) {
143291968Salfred		PIPE_UNLOCK(cpipe);
143391968Salfred		if (!hadpeer) {
143491968Salfred			mtx_destroy(PIPE_MTX(cpipe));
143591968Salfred			free(PIPE_MTX(cpipe), M_TEMP);
143613675Sdyson		}
143713675Sdyson	}
143891968Salfred	mtx_lock(&Giant);
143991968Salfred	pipe_free_kmem(cpipe);
144092751Sjeff	uma_zfree(pipe_zone, cpipe);
144191968Salfred	mtx_unlock(&Giant);
144213675Sdyson}
144359288Sjlemon
144472521Sjlemon/*ARGSUSED*/
144559288Sjlemonstatic int
144672521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn)
144759288Sjlemon{
144889306Salfred	struct pipe *cpipe;
144959288Sjlemon
1450109153Sdillon	cpipe = kn->kn_fp->f_data;
145172521Sjlemon	switch (kn->kn_filter) {
145272521Sjlemon	case EVFILT_READ:
145372521Sjlemon		kn->kn_fop = &pipe_rfiltops;
145472521Sjlemon		break;
145572521Sjlemon	case EVFILT_WRITE:
145672521Sjlemon		kn->kn_fop = &pipe_wfiltops;
145778292Sjlemon		cpipe = cpipe->pipe_peer;
1458101382Sdes		if (cpipe == NULL)
1459101382Sdes			/* other end of pipe has been closed */
1460101382Sdes			return (EBADF);
146172521Sjlemon		break;
146272521Sjlemon	default:
146372521Sjlemon		return (1);
146472521Sjlemon	}
1465100527Salfred	kn->kn_hook = cpipe;
146678292Sjlemon
146791372Salfred	PIPE_LOCK(cpipe);
146878292Sjlemon	SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext);
146991372Salfred	PIPE_UNLOCK(cpipe);
147059288Sjlemon	return (0);
147159288Sjlemon}
147259288Sjlemon
147359288Sjlemonstatic void
147459288Sjlemonfilt_pipedetach(struct knote *kn)
147559288Sjlemon{
147678292Sjlemon	struct pipe *cpipe = (struct pipe *)kn->kn_hook;
147759288Sjlemon
147891372Salfred	PIPE_LOCK(cpipe);
147978292Sjlemon	SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext);
148091372Salfred	PIPE_UNLOCK(cpipe);
148159288Sjlemon}
148259288Sjlemon
148359288Sjlemon/*ARGSUSED*/
148459288Sjlemonstatic int
148559288Sjlemonfilt_piperead(struct knote *kn, long hint)
148659288Sjlemon{
1487109153Sdillon	struct pipe *rpipe = kn->kn_fp->f_data;
148859288Sjlemon	struct pipe *wpipe = rpipe->pipe_peer;
148959288Sjlemon
149091372Salfred	PIPE_LOCK(rpipe);
149159288Sjlemon	kn->kn_data = rpipe->pipe_buffer.cnt;
149259288Sjlemon	if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
149359288Sjlemon		kn->kn_data = rpipe->pipe_map.cnt;
149459288Sjlemon
149559288Sjlemon	if ((rpipe->pipe_state & PIPE_EOF) ||
149659288Sjlemon	    (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
149791372Salfred		kn->kn_flags |= EV_EOF;
149891372Salfred		PIPE_UNLOCK(rpipe);
149959288Sjlemon		return (1);
150059288Sjlemon	}
150191372Salfred	PIPE_UNLOCK(rpipe);
150259288Sjlemon	return (kn->kn_data > 0);
150359288Sjlemon}
150459288Sjlemon
150559288Sjlemon/*ARGSUSED*/
150659288Sjlemonstatic int
150759288Sjlemonfilt_pipewrite(struct knote *kn, long hint)
150859288Sjlemon{
1509109153Sdillon	struct pipe *rpipe = kn->kn_fp->f_data;
151059288Sjlemon	struct pipe *wpipe = rpipe->pipe_peer;
151159288Sjlemon
151291372Salfred	PIPE_LOCK(rpipe);
151359288Sjlemon	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
151459288Sjlemon		kn->kn_data = 0;
151559288Sjlemon		kn->kn_flags |= EV_EOF;
151691372Salfred		PIPE_UNLOCK(rpipe);
151759288Sjlemon		return (1);
151859288Sjlemon	}
151959288Sjlemon	kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
152065855Sjlemon	if (wpipe->pipe_state & PIPE_DIRECTW)
152159288Sjlemon		kn->kn_data = 0;
152259288Sjlemon
152391372Salfred	PIPE_UNLOCK(rpipe);
152459288Sjlemon	return (kn->kn_data >= PIPE_BUF);
152559288Sjlemon}
1526