sys_pipe.c revision 165347
1139804Simp/*-
213675Sdyson * Copyright (c) 1996 John S. Dyson
313675Sdyson * All rights reserved.
413675Sdyson *
513675Sdyson * Redistribution and use in source and binary forms, with or without
613675Sdyson * modification, are permitted provided that the following conditions
713675Sdyson * are met:
813675Sdyson * 1. Redistributions of source code must retain the above copyright
913675Sdyson *    notice immediately at the beginning of the file, without modification,
1013675Sdyson *    this list of conditions, and the following disclaimer.
1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright
1213675Sdyson *    notice, this list of conditions and the following disclaimer in the
1313675Sdyson *    documentation and/or other materials provided with the distribution.
1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author
1513675Sdyson *    John S. Dyson.
1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions
1713675Sdyson *    are met.
1813675Sdyson */
1913675Sdyson
2013675Sdyson/*
2113675Sdyson * This file contains a high-performance replacement for the socket-based
2213675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
2313675Sdyson * all features of sockets, but does do everything that pipes normally
2413675Sdyson * do.
2513675Sdyson */
2613675Sdyson
2713907Sdyson/*
2813907Sdyson * This code has two modes of operation, a small write mode and a large
2913907Sdyson * write mode.  The small write mode acts like conventional pipes with
3013907Sdyson * a kernel buffer.  If the buffer is less than PIPE_MINDIRECT, then the
3113907Sdyson * "normal" pipe buffering is done.  If the buffer is between PIPE_MINDIRECT
3213907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and
3313907Sdyson * the receiving process can copy it directly from the pages in the sending
3413907Sdyson * process.
3513907Sdyson *
3613907Sdyson * If the sending process receives a signal, it is possible that it will
3713913Sdyson * go away, and certainly its address space can change, because control
3813907Sdyson * is returned back to the user-mode side.  In that case, the pipe code
3913907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable
4013907Sdyson * kernel buffer, and the receiving process will grab the data from the
4113907Sdyson * pageable kernel buffer.  Since signals don't happen all that often,
4213907Sdyson * the copy operation is normally eliminated.
4313907Sdyson *
4413907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
4513907Sdyson * happen for small transfers so that the system will not spend all of
46118764Ssilby * its time context switching.
47117325Ssilby *
48118764Ssilby * In order to limit the resource use of pipes, two sysctls exist:
49117325Ssilby *
50118764Ssilby * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
51133790Ssilby * address space available to us in pipe_map. This value is normally
52133790Ssilby * autotuned, but may also be loader tuned.
53117325Ssilby *
54133790Ssilby * kern.ipc.pipekva - This read-only sysctl tracks the current amount of
55133790Ssilby * memory in use by pipes.
56117325Ssilby *
57133790Ssilby * Based on how large pipekva is relative to maxpipekva, the following
58133790Ssilby * will happen:
59117325Ssilby *
60133790Ssilby * 0% - 50%:
61133790Ssilby *     New pipes are given 16K of memory backing, pipes may dynamically
62133790Ssilby *     grow to as large as 64K where needed.
63133790Ssilby * 50% - 75%:
64133790Ssilby *     New pipes are given 4K (or PAGE_SIZE) of memory backing,
65133790Ssilby *     existing pipes may NOT grow.
66133790Ssilby * 75% - 100%:
67133790Ssilby *     New pipes are given 4K (or PAGE_SIZE) of memory backing,
68133790Ssilby *     existing pipes will be shrunk down to 4K whenever possible.
69133049Ssilby *
70133790Ssilby * Resizing may be disabled by setting kern.ipc.piperesizeallowed=0.  If
71133790Ssilby * that is set,  the only resize that will occur is the 0 -> SMALL_PIPE_SIZE
72133790Ssilby * resize which MUST occur for reverse-direction pipes when they are
73133790Ssilby * first used.
74133790Ssilby *
75133790Ssilby * Additional information about the current state of pipes may be obtained
76133790Ssilby * from kern.ipc.pipes, kern.ipc.pipefragretry, kern.ipc.pipeallocfail,
77133790Ssilby * and kern.ipc.piperesizefail.
78133790Ssilby *
79133049Ssilby * Locking rules:  There are two locks present here:  A mutex, used via
80133049Ssilby * PIPE_LOCK, and a flag, used via pipelock().  All locking is done via
81133049Ssilby * the flag, as mutexes can not persist over uiomove.  The mutex
82133049Ssilby * exists only to guard access to the flag, and is not in itself a
83133790Ssilby * locking mechanism.  Also note that there is only a single mutex for
84133790Ssilby * both directions of a pipe.
85133049Ssilby *
86133049Ssilby * As pipelock() may have to sleep before it can acquire the flag, it
87133049Ssilby * is important to reread all data after a call to pipelock(); everything
88133049Ssilby * in the structure may have changed.
8913907Sdyson */
9013907Sdyson
91116182Sobrien#include <sys/cdefs.h>
92116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sys_pipe.c 165347 2006-12-19 12:52:22Z pjd $");
93116182Sobrien
94101768Srwatson#include "opt_mac.h"
95101768Srwatson
9613675Sdyson#include <sys/param.h>
9713675Sdyson#include <sys/systm.h>
9824131Sbde#include <sys/fcntl.h>
9913675Sdyson#include <sys/file.h>
10013675Sdyson#include <sys/filedesc.h>
10124206Sbde#include <sys/filio.h>
10291372Salfred#include <sys/kernel.h>
10376166Smarkm#include <sys/lock.h>
10476827Salfred#include <sys/mutex.h>
10524206Sbde#include <sys/ttycom.h>
10613675Sdyson#include <sys/stat.h>
10791968Salfred#include <sys/malloc.h>
10829356Speter#include <sys/poll.h>
10970834Swollman#include <sys/selinfo.h>
11013675Sdyson#include <sys/signalvar.h>
111117325Ssilby#include <sys/sysctl.h>
11213675Sdyson#include <sys/sysproto.h>
11313675Sdyson#include <sys/pipe.h>
11476166Smarkm#include <sys/proc.h>
11555112Sbde#include <sys/vnode.h>
11634924Sbde#include <sys/uio.h>
11759288Sjlemon#include <sys/event.h>
11813675Sdyson
119163606Srwatson#include <security/mac/mac_framework.h>
120163606Srwatson
12113675Sdyson#include <vm/vm.h>
12213675Sdyson#include <vm/vm_param.h>
12313675Sdyson#include <vm/vm_object.h>
12413675Sdyson#include <vm/vm_kern.h>
12513675Sdyson#include <vm/vm_extern.h>
12613675Sdyson#include <vm/pmap.h>
12713675Sdyson#include <vm/vm_map.h>
12813907Sdyson#include <vm/vm_page.h>
12992751Sjeff#include <vm/uma.h>
13013675Sdyson
13114037Sdyson/*
13214037Sdyson * Use this define if you want to disable *fancy* VM things.  Expect an
13314037Sdyson * approx 30% decrease in transfer rate.  This could be useful for
13414037Sdyson * NetBSD or OpenBSD.
13514037Sdyson */
13614037Sdyson/* #define PIPE_NODIRECT */
13714037Sdyson
13814037Sdyson/*
13914037Sdyson * interfaces to the outside world
14014037Sdyson */
141108255Sphkstatic fo_rdwr_t	pipe_read;
142108255Sphkstatic fo_rdwr_t	pipe_write;
143108255Sphkstatic fo_ioctl_t	pipe_ioctl;
144108255Sphkstatic fo_poll_t	pipe_poll;
145108255Sphkstatic fo_kqfilter_t	pipe_kqfilter;
146108255Sphkstatic fo_stat_t	pipe_stat;
147108255Sphkstatic fo_close_t	pipe_close;
14813675Sdyson
14972521Sjlemonstatic struct fileops pipeops = {
150116546Sphk	.fo_read = pipe_read,
151116546Sphk	.fo_write = pipe_write,
152116546Sphk	.fo_ioctl = pipe_ioctl,
153116546Sphk	.fo_poll = pipe_poll,
154116546Sphk	.fo_kqfilter = pipe_kqfilter,
155116546Sphk	.fo_stat = pipe_stat,
156116546Sphk	.fo_close = pipe_close,
157116546Sphk	.fo_flags = DFLAG_PASSABLE
15872521Sjlemon};
15913675Sdyson
16059288Sjlemonstatic void	filt_pipedetach(struct knote *kn);
16159288Sjlemonstatic int	filt_piperead(struct knote *kn, long hint);
16259288Sjlemonstatic int	filt_pipewrite(struct knote *kn, long hint);
16359288Sjlemon
16472521Sjlemonstatic struct filterops pipe_rfiltops =
16572521Sjlemon	{ 1, NULL, filt_pipedetach, filt_piperead };
16672521Sjlemonstatic struct filterops pipe_wfiltops =
16772521Sjlemon	{ 1, NULL, filt_pipedetach, filt_pipewrite };
16859288Sjlemon
16913675Sdyson/*
17013675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe
17113675Sdyson * space is pageable.  The pipe code will try to maintain locality of
17213675Sdyson * reference for performance reasons, so small amounts of outstanding I/O
17313675Sdyson * will not wipe the cache.
17413675Sdyson */
17513907Sdyson#define MINPIPESIZE (PIPE_SIZE/3)
17613907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3)
17713675Sdyson
178117325Ssilbystatic int amountpipes;
17917124Sbdestatic int amountpipekva;
180133790Ssilbystatic int pipefragretry;
181133790Ssilbystatic int pipeallocfail;
182133790Ssilbystatic int piperesizefail;
183133790Ssilbystatic int piperesizeallowed = 1;
18413907Sdyson
185121307SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN,
186117325Ssilby	   &maxpipekva, 0, "Pipe KVA limit");
187117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD,
188117364Ssilby	   &amountpipes, 0, "Current # of pipes");
189117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD,
190117325Ssilby	   &amountpipekva, 0, "Pipe KVA usage");
191133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipefragretry, CTLFLAG_RD,
192133790Ssilby	  &pipefragretry, 0, "Pipe allocation retries due to fragmentation");
193133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipeallocfail, CTLFLAG_RD,
194133790Ssilby	  &pipeallocfail, 0, "Pipe allocation failures");
195133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, piperesizefail, CTLFLAG_RD,
196133790Ssilby	  &piperesizefail, 0, "Pipe resize failures");
197133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, piperesizeallowed, CTLFLAG_RW,
198133790Ssilby	  &piperesizeallowed, 0, "Pipe resizing allowed");
199117325Ssilby
20091413Salfredstatic void pipeinit(void *dummy __unused);
20191413Salfredstatic void pipeclose(struct pipe *cpipe);
20291413Salfredstatic void pipe_free_kmem(struct pipe *cpipe);
203133790Ssilbystatic int pipe_create(struct pipe *pipe, int backing);
20491413Salfredstatic __inline int pipelock(struct pipe *cpipe, int catch);
20591413Salfredstatic __inline void pipeunlock(struct pipe *cpipe);
20691413Salfredstatic __inline void pipeselwakeup(struct pipe *cpipe);
20714037Sdyson#ifndef PIPE_NODIRECT
20891413Salfredstatic int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio);
20991413Salfredstatic void pipe_destroy_write_buffer(struct pipe *wpipe);
21091413Salfredstatic int pipe_direct_write(struct pipe *wpipe, struct uio *uio);
21191413Salfredstatic void pipe_clone_write_buffer(struct pipe *wpipe);
21214037Sdyson#endif
21391413Salfredstatic int pipespace(struct pipe *cpipe, int size);
214132579Srwatsonstatic int pipespace_new(struct pipe *cpipe, int size);
21513675Sdyson
216132987Sgreenstatic int	pipe_zone_ctor(void *mem, int size, void *arg, int flags);
217125293Srwatsonstatic void	pipe_zone_dtor(void *mem, int size, void *arg);
218132987Sgreenstatic int	pipe_zone_init(void *mem, int size, int flags);
219125293Srwatsonstatic void	pipe_zone_fini(void *mem, int size);
220125293Srwatson
22192751Sjeffstatic uma_zone_t pipe_zone;
22227899Sdyson
22391372SalfredSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
22491372Salfred
22591372Salfredstatic void
22691372Salfredpipeinit(void *dummy __unused)
22791372Salfred{
228118880Salc
229125293Srwatson	pipe_zone = uma_zcreate("PIPE", sizeof(struct pipepair),
230125293Srwatson	    pipe_zone_ctor, pipe_zone_dtor, pipe_zone_init, pipe_zone_fini,
231125293Srwatson	    UMA_ALIGN_PTR, 0);
232118880Salc	KASSERT(pipe_zone != NULL, ("pipe_zone not initialized"));
23391372Salfred}
23491372Salfred
235132987Sgreenstatic int
236132987Sgreenpipe_zone_ctor(void *mem, int size, void *arg, int flags)
237125293Srwatson{
238125293Srwatson	struct pipepair *pp;
239125293Srwatson	struct pipe *rpipe, *wpipe;
240125293Srwatson
241125293Srwatson	KASSERT(size == sizeof(*pp), ("pipe_zone_ctor: wrong size"));
242125293Srwatson
243125293Srwatson	pp = (struct pipepair *)mem;
244125293Srwatson
245125293Srwatson	/*
246125293Srwatson	 * We zero both pipe endpoints to make sure all the kmem pointers
247125293Srwatson	 * are NULL, flag fields are zero'd, etc.  We timestamp both
248125293Srwatson	 * endpoints with the same time.
249125293Srwatson	 */
250125293Srwatson	rpipe = &pp->pp_rpipe;
251125293Srwatson	bzero(rpipe, sizeof(*rpipe));
252125293Srwatson	vfs_timestamp(&rpipe->pipe_ctime);
253125293Srwatson	rpipe->pipe_atime = rpipe->pipe_mtime = rpipe->pipe_ctime;
254125293Srwatson
255125293Srwatson	wpipe = &pp->pp_wpipe;
256125293Srwatson	bzero(wpipe, sizeof(*wpipe));
257125293Srwatson	wpipe->pipe_ctime = rpipe->pipe_ctime;
258125293Srwatson	wpipe->pipe_atime = wpipe->pipe_mtime = rpipe->pipe_ctime;
259125293Srwatson
260125293Srwatson	rpipe->pipe_peer = wpipe;
261125293Srwatson	rpipe->pipe_pair = pp;
262125293Srwatson	wpipe->pipe_peer = rpipe;
263125293Srwatson	wpipe->pipe_pair = pp;
264125293Srwatson
265125293Srwatson	/*
266125293Srwatson	 * Mark both endpoints as present; they will later get free'd
267125293Srwatson	 * one at a time.  When both are free'd, then the whole pair
268125293Srwatson	 * is released.
269125293Srwatson	 */
270125293Srwatson	rpipe->pipe_present = 1;
271125293Srwatson	wpipe->pipe_present = 1;
272125293Srwatson
273125293Srwatson	/*
274125293Srwatson	 * Eventually, the MAC Framework may initialize the label
275125293Srwatson	 * in ctor or init, but for now we do it elswhere to avoid
276125293Srwatson	 * blocking in ctor or init.
277125293Srwatson	 */
278125293Srwatson	pp->pp_label = NULL;
279125293Srwatson
280125367Srwatson	atomic_add_int(&amountpipes, 2);
281132987Sgreen	return (0);
282125293Srwatson}
283125293Srwatson
284125293Srwatsonstatic void
285125293Srwatsonpipe_zone_dtor(void *mem, int size, void *arg)
286125293Srwatson{
287125293Srwatson	struct pipepair *pp;
288125293Srwatson
289125293Srwatson	KASSERT(size == sizeof(*pp), ("pipe_zone_dtor: wrong size"));
290125293Srwatson
291125293Srwatson	pp = (struct pipepair *)mem;
292125367Srwatson
293125367Srwatson	atomic_subtract_int(&amountpipes, 2);
294125293Srwatson}
295125293Srwatson
296132987Sgreenstatic int
297132987Sgreenpipe_zone_init(void *mem, int size, int flags)
298125293Srwatson{
299125293Srwatson	struct pipepair *pp;
300125293Srwatson
301125293Srwatson	KASSERT(size == sizeof(*pp), ("pipe_zone_init: wrong size"));
302125293Srwatson
303125293Srwatson	pp = (struct pipepair *)mem;
304125293Srwatson
305125293Srwatson	mtx_init(&pp->pp_mtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE);
306132987Sgreen	return (0);
307125293Srwatson}
308125293Srwatson
309125293Srwatsonstatic void
310125293Srwatsonpipe_zone_fini(void *mem, int size)
311125293Srwatson{
312125293Srwatson	struct pipepair *pp;
313125293Srwatson
314125293Srwatson	KASSERT(size == sizeof(*pp), ("pipe_zone_fini: wrong size"));
315125293Srwatson
316125293Srwatson	pp = (struct pipepair *)mem;
317125293Srwatson
318125293Srwatson	mtx_destroy(&pp->pp_mtx);
319125293Srwatson}
320125293Srwatson
32113675Sdyson/*
322125293Srwatson * The pipe system call for the DTYPE_PIPE type of pipes.  If we fail,
323125293Srwatson * let the zone pick up the pieces via pipeclose().
32413675Sdyson */
32513675Sdyson
32613675Sdyson/* ARGSUSED */
32713675Sdysonint
32883366Sjulianpipe(td, uap)
32983366Sjulian	struct thread *td;
33013675Sdyson	struct pipe_args /* {
33113675Sdyson		int	dummy;
33213675Sdyson	} */ *uap;
33313675Sdyson{
33483366Sjulian	struct filedesc *fdp = td->td_proc->p_fd;
33513675Sdyson	struct file *rf, *wf;
336125293Srwatson	struct pipepair *pp;
33713675Sdyson	struct pipe *rpipe, *wpipe;
33813675Sdyson	int fd, error;
33927899Sdyson
340125293Srwatson	pp = uma_zalloc(pipe_zone, M_WAITOK);
341125293Srwatson#ifdef MAC
342125293Srwatson	/*
343126249Srwatson	 * The MAC label is shared between the connected endpoints.  As a
344126249Srwatson	 * result mac_init_pipe() and mac_create_pipe() are called once
345126249Srwatson	 * for the pair, and not on the endpoints.
346125293Srwatson	 */
347125293Srwatson	mac_init_pipe(pp);
348125293Srwatson	mac_create_pipe(td->td_ucred, pp);
349125293Srwatson#endif
350125293Srwatson	rpipe = &pp->pp_rpipe;
351125293Srwatson	wpipe = &pp->pp_wpipe;
352125293Srwatson
353147730Sssouhlal	knlist_init(&rpipe->pipe_sel.si_note, PIPE_MTX(rpipe), NULL, NULL,
354147730Sssouhlal	    NULL);
355147730Sssouhlal	knlist_init(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe), NULL, NULL,
356147730Sssouhlal	    NULL);
357140369Ssilby
358133790Ssilby	/* Only the forward direction pipe is backed by default */
359155035Sglebius	if ((error = pipe_create(rpipe, 1)) != 0 ||
360155035Sglebius	    (error = pipe_create(wpipe, 0)) != 0) {
361124394Sdes		pipeclose(rpipe);
362124394Sdes		pipeclose(wpipe);
363155035Sglebius		return (error);
36476364Salfred	}
365124394Sdes
36613907Sdyson	rpipe->pipe_state |= PIPE_DIRECTOK;
36713907Sdyson	wpipe->pipe_state |= PIPE_DIRECTOK;
36813675Sdyson
36983366Sjulian	error = falloc(td, &rf, &fd);
37070915Sdwmalone	if (error) {
37170915Sdwmalone		pipeclose(rpipe);
37270915Sdwmalone		pipeclose(wpipe);
37370915Sdwmalone		return (error);
37470915Sdwmalone	}
375121256Sdwmalone	/* An extra reference on `rf' has been held for us by falloc(). */
37683366Sjulian	td->td_retval[0] = fd;
37770915Sdwmalone
37870803Sdwmalone	/*
37970803Sdwmalone	 * Warning: once we've gotten past allocation of the fd for the
38070803Sdwmalone	 * read-side, we can only drop the read side via fdrop() in order
38170803Sdwmalone	 * to avoid races against processes which manage to dup() the read
38270803Sdwmalone	 * side while we are blocked trying to allocate the write side.
38370803Sdwmalone	 */
38489306Salfred	FILE_LOCK(rf);
38513675Sdyson	rf->f_flag = FREAD | FWRITE;
38613675Sdyson	rf->f_type = DTYPE_PIPE;
387109153Sdillon	rf->f_data = rpipe;
38813675Sdyson	rf->f_ops = &pipeops;
38989306Salfred	FILE_UNLOCK(rf);
39083366Sjulian	error = falloc(td, &wf, &fd);
39170915Sdwmalone	if (error) {
392137355Sphk		fdclose(fdp, rf, td->td_retval[0], td);
39383366Sjulian		fdrop(rf, td);
39470915Sdwmalone		/* rpipe has been closed by fdrop(). */
39570915Sdwmalone		pipeclose(wpipe);
39670915Sdwmalone		return (error);
39770915Sdwmalone	}
398121256Sdwmalone	/* An extra reference on `wf' has been held for us by falloc(). */
39989306Salfred	FILE_LOCK(wf);
40013675Sdyson	wf->f_flag = FREAD | FWRITE;
40113675Sdyson	wf->f_type = DTYPE_PIPE;
402109153Sdillon	wf->f_data = wpipe;
40313675Sdyson	wf->f_ops = &pipeops;
40489306Salfred	FILE_UNLOCK(wf);
405121256Sdwmalone	fdrop(wf, td);
40683366Sjulian	td->td_retval[1] = fd;
40783366Sjulian	fdrop(rf, td);
40813675Sdyson
40913675Sdyson	return (0);
41013675Sdyson}
41113675Sdyson
41213909Sdyson/*
41313909Sdyson * Allocate kva for pipe circular buffer, the space is pageable
41476364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails
41576364Salfred * it will retain the old buffer.
41676364Salfred * If it fails it will return ENOMEM.
41713909Sdyson */
41876364Salfredstatic int
419132579Srwatsonpipespace_new(cpipe, size)
42013675Sdyson	struct pipe *cpipe;
42176364Salfred	int size;
42213675Sdyson{
42376364Salfred	caddr_t buffer;
424133790Ssilby	int error, cnt, firstseg;
425117325Ssilby	static int curfail = 0;
426117325Ssilby	static struct timeval lastfail;
42713675Sdyson
428125293Srwatson	KASSERT(!mtx_owned(PIPE_MTX(cpipe)), ("pipespace: pipe mutex locked"));
429133790Ssilby	KASSERT(!(cpipe->pipe_state & PIPE_DIRECTW),
430133790Ssilby		("pipespace: resize of direct writes not allowed"));
431133790Ssilbyretry:
432133790Ssilby	cnt = cpipe->pipe_buffer.cnt;
433133790Ssilby	if (cnt > size)
434133790Ssilby		size = cnt;
43579224Sdillon
436118764Ssilby	size = round_page(size);
437118764Ssilby	buffer = (caddr_t) vm_map_min(pipe_map);
43813675Sdyson
439122163Salc	error = vm_map_find(pipe_map, NULL, 0,
44076364Salfred		(vm_offset_t *) &buffer, size, 1,
44113688Sdyson		VM_PROT_ALL, VM_PROT_ALL, 0);
44276364Salfred	if (error != KERN_SUCCESS) {
443133790Ssilby		if ((cpipe->pipe_buffer.buffer == NULL) &&
444133790Ssilby			(size > SMALL_PIPE_SIZE)) {
445133790Ssilby			size = SMALL_PIPE_SIZE;
446133790Ssilby			pipefragretry++;
447133790Ssilby			goto retry;
448133790Ssilby		}
449133790Ssilby		if (cpipe->pipe_buffer.buffer == NULL) {
450133790Ssilby			pipeallocfail++;
451133790Ssilby			if (ppsratecheck(&lastfail, &curfail, 1))
452133790Ssilby				printf("kern.ipc.maxpipekva exceeded; see tuning(7)\n");
453133790Ssilby		} else {
454133790Ssilby			piperesizefail++;
455133790Ssilby		}
45676364Salfred		return (ENOMEM);
45776364Salfred	}
45876364Salfred
459133790Ssilby	/* copy data, then free old resources if we're resizing */
460133790Ssilby	if (cnt > 0) {
461133790Ssilby		if (cpipe->pipe_buffer.in <= cpipe->pipe_buffer.out) {
462133790Ssilby			firstseg = cpipe->pipe_buffer.size - cpipe->pipe_buffer.out;
463133790Ssilby			bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out],
464133790Ssilby				buffer, firstseg);
465133790Ssilby			if ((cnt - firstseg) > 0)
466133790Ssilby				bcopy(cpipe->pipe_buffer.buffer, &buffer[firstseg],
467133790Ssilby					cpipe->pipe_buffer.in);
468133790Ssilby		} else {
469133790Ssilby			bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out],
470133790Ssilby				buffer, cnt);
471133790Ssilby		}
472133790Ssilby	}
47376364Salfred	pipe_free_kmem(cpipe);
47476364Salfred	cpipe->pipe_buffer.buffer = buffer;
47576364Salfred	cpipe->pipe_buffer.size = size;
476133790Ssilby	cpipe->pipe_buffer.in = cnt;
47776364Salfred	cpipe->pipe_buffer.out = 0;
478133790Ssilby	cpipe->pipe_buffer.cnt = cnt;
479110816Salc	atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size);
48076364Salfred	return (0);
48113907Sdyson}
48213688Sdyson
48313907Sdyson/*
484132579Srwatson * Wrapper for pipespace_new() that performs locking assertions.
485132579Srwatson */
486132579Srwatsonstatic int
487132579Srwatsonpipespace(cpipe, size)
488132579Srwatson	struct pipe *cpipe;
489132579Srwatson	int size;
490132579Srwatson{
491132579Srwatson
492133049Ssilby	KASSERT(cpipe->pipe_state & PIPE_LOCKFL,
493133049Ssilby		("Unlocked pipe passed to pipespace"));
494132579Srwatson	return (pipespace_new(cpipe, size));
495132579Srwatson}
496132579Srwatson
497132579Srwatson/*
49813675Sdyson * lock a pipe for I/O, blocking other access
49913675Sdyson */
50013675Sdysonstatic __inline int
50113907Sdysonpipelock(cpipe, catch)
50213675Sdyson	struct pipe *cpipe;
50313907Sdyson	int catch;
50413675Sdyson{
50513776Sdyson	int error;
50676364Salfred
50791362Salfred	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
50891362Salfred	while (cpipe->pipe_state & PIPE_LOCKFL) {
50913675Sdyson		cpipe->pipe_state |= PIPE_LWANT;
51091362Salfred		error = msleep(cpipe, PIPE_MTX(cpipe),
51191362Salfred		    catch ? (PRIBIO | PCATCH) : PRIBIO,
51276760Salfred		    "pipelk", 0);
513124394Sdes		if (error != 0)
51476760Salfred			return (error);
51513675Sdyson	}
51691362Salfred	cpipe->pipe_state |= PIPE_LOCKFL;
51776760Salfred	return (0);
51813675Sdyson}
51913675Sdyson
52013675Sdyson/*
52113675Sdyson * unlock a pipe I/O lock
52213675Sdyson */
52313675Sdysonstatic __inline void
52413675Sdysonpipeunlock(cpipe)
52513675Sdyson	struct pipe *cpipe;
52613675Sdyson{
52776364Salfred
52891362Salfred	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
529133049Ssilby	KASSERT(cpipe->pipe_state & PIPE_LOCKFL,
530133049Ssilby		("Unlocked pipe passed to pipeunlock"));
53191362Salfred	cpipe->pipe_state &= ~PIPE_LOCKFL;
53213675Sdyson	if (cpipe->pipe_state & PIPE_LWANT) {
53313675Sdyson		cpipe->pipe_state &= ~PIPE_LWANT;
53414177Sdyson		wakeup(cpipe);
53513675Sdyson	}
53613675Sdyson}
53713675Sdyson
53814037Sdysonstatic __inline void
53914037Sdysonpipeselwakeup(cpipe)
54014037Sdyson	struct pipe *cpipe;
54114037Sdyson{
54276364Salfred
543126252Srwatson	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
54414037Sdyson	if (cpipe->pipe_state & PIPE_SEL) {
54514037Sdyson		cpipe->pipe_state &= ~PIPE_SEL;
546122352Stanimura		selwakeuppri(&cpipe->pipe_sel, PSOCK);
54714037Sdyson	}
54841086Struckman	if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio)
54995883Salfred		pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
550133741Sjmg	KNOTE_LOCKED(&cpipe->pipe_sel.si_note, 0);
55114037Sdyson}
55214037Sdyson
553126131Sgreen/*
554126131Sgreen * Initialize and allocate VM and memory for pipe.  The structure
555126131Sgreen * will start out zero'd from the ctor, so we just manage the kmem.
556126131Sgreen */
557126131Sgreenstatic int
558133790Ssilbypipe_create(pipe, backing)
559126131Sgreen	struct pipe *pipe;
560133790Ssilby	int backing;
561126131Sgreen{
562126131Sgreen	int error;
563126131Sgreen
564133790Ssilby	if (backing) {
565133790Ssilby		if (amountpipekva > maxpipekva / 2)
566133790Ssilby			error = pipespace_new(pipe, SMALL_PIPE_SIZE);
567133790Ssilby		else
568133790Ssilby			error = pipespace_new(pipe, PIPE_SIZE);
569133790Ssilby	} else {
570133790Ssilby		/* If we're not backing this pipe, no need to do anything. */
571133790Ssilby		error = 0;
572133790Ssilby	}
573132579Srwatson	return (error);
574126131Sgreen}
575126131Sgreen
57613675Sdyson/* ARGSUSED */
57713675Sdysonstatic int
578101941Srwatsonpipe_read(fp, uio, active_cred, flags, td)
57913675Sdyson	struct file *fp;
58013675Sdyson	struct uio *uio;
581101941Srwatson	struct ucred *active_cred;
58283366Sjulian	struct thread *td;
58345311Sdt	int flags;
58413675Sdyson{
585109153Sdillon	struct pipe *rpipe = fp->f_data;
58647748Salc	int error;
58713675Sdyson	int nread = 0;
58818863Sdyson	u_int size;
58913675Sdyson
59091362Salfred	PIPE_LOCK(rpipe);
59113675Sdyson	++rpipe->pipe_busy;
59247748Salc	error = pipelock(rpipe, 1);
59347748Salc	if (error)
59447748Salc		goto unlocked_error;
59547748Salc
596101768Srwatson#ifdef MAC
597125293Srwatson	error = mac_check_pipe_read(active_cred, rpipe->pipe_pair);
598101768Srwatson	if (error)
599101768Srwatson		goto locked_error;
600101768Srwatson#endif
601133790Ssilby	if (amountpipekva > (3 * maxpipekva) / 4) {
602133790Ssilby		if (!(rpipe->pipe_state & PIPE_DIRECTW) &&
603133790Ssilby			(rpipe->pipe_buffer.size > SMALL_PIPE_SIZE) &&
604133790Ssilby			(rpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) &&
605133790Ssilby			(piperesizeallowed == 1)) {
606133790Ssilby			PIPE_UNLOCK(rpipe);
607133790Ssilby			pipespace(rpipe, SMALL_PIPE_SIZE);
608133790Ssilby			PIPE_LOCK(rpipe);
609133790Ssilby		}
610133790Ssilby	}
611101768Srwatson
61213675Sdyson	while (uio->uio_resid) {
61313907Sdyson		/*
61413907Sdyson		 * normal pipe buffer receive
61513907Sdyson		 */
61613675Sdyson		if (rpipe->pipe_buffer.cnt > 0) {
61718863Sdyson			size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
61813675Sdyson			if (size > rpipe->pipe_buffer.cnt)
61913675Sdyson				size = rpipe->pipe_buffer.cnt;
62018863Sdyson			if (size > (u_int) uio->uio_resid)
62118863Sdyson				size = (u_int) uio->uio_resid;
62247748Salc
62391362Salfred			PIPE_UNLOCK(rpipe);
624116127Smux			error = uiomove(
625116127Smux			    &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
626116127Smux			    size, uio);
62791362Salfred			PIPE_LOCK(rpipe);
62876760Salfred			if (error)
62913675Sdyson				break;
63076760Salfred
63113675Sdyson			rpipe->pipe_buffer.out += size;
63213675Sdyson			if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
63313675Sdyson				rpipe->pipe_buffer.out = 0;
63413675Sdyson
63513675Sdyson			rpipe->pipe_buffer.cnt -= size;
63647748Salc
63747748Salc			/*
63847748Salc			 * If there is no more to read in the pipe, reset
63947748Salc			 * its pointers to the beginning.  This improves
64047748Salc			 * cache hit stats.
64147748Salc			 */
64247748Salc			if (rpipe->pipe_buffer.cnt == 0) {
64347748Salc				rpipe->pipe_buffer.in = 0;
64447748Salc				rpipe->pipe_buffer.out = 0;
64547748Salc			}
64613675Sdyson			nread += size;
64714037Sdyson#ifndef PIPE_NODIRECT
64813907Sdyson		/*
64913907Sdyson		 * Direct copy, bypassing a kernel buffer.
65013907Sdyson		 */
65113907Sdyson		} else if ((size = rpipe->pipe_map.cnt) &&
65247748Salc			   (rpipe->pipe_state & PIPE_DIRECTW)) {
65318863Sdyson			if (size > (u_int) uio->uio_resid)
65418863Sdyson				size = (u_int) uio->uio_resid;
65547748Salc
65691362Salfred			PIPE_UNLOCK(rpipe);
657127501Salc			error = uiomove_fromphys(rpipe->pipe_map.ms,
658127501Salc			    rpipe->pipe_map.pos, size, uio);
65991362Salfred			PIPE_LOCK(rpipe);
66013907Sdyson			if (error)
66113907Sdyson				break;
66213907Sdyson			nread += size;
66313907Sdyson			rpipe->pipe_map.pos += size;
66413907Sdyson			rpipe->pipe_map.cnt -= size;
66513907Sdyson			if (rpipe->pipe_map.cnt == 0) {
66613907Sdyson				rpipe->pipe_state &= ~PIPE_DIRECTW;
66713907Sdyson				wakeup(rpipe);
66813907Sdyson			}
66914037Sdyson#endif
67013675Sdyson		} else {
67113675Sdyson			/*
67213675Sdyson			 * detect EOF condition
67376760Salfred			 * read returns 0 on EOF, no need to set error
67413675Sdyson			 */
67576760Salfred			if (rpipe->pipe_state & PIPE_EOF)
67613675Sdyson				break;
67743623Sdillon
67813675Sdyson			/*
67913675Sdyson			 * If the "write-side" has been blocked, wake it up now.
68013675Sdyson			 */
68113675Sdyson			if (rpipe->pipe_state & PIPE_WANTW) {
68213675Sdyson				rpipe->pipe_state &= ~PIPE_WANTW;
68313675Sdyson				wakeup(rpipe);
68413675Sdyson			}
68543623Sdillon
68643623Sdillon			/*
68747748Salc			 * Break if some data was read.
68843623Sdillon			 */
68947748Salc			if (nread > 0)
69013675Sdyson				break;
69116960Sdyson
69243623Sdillon			/*
693124394Sdes			 * Unlock the pipe buffer for our remaining processing.
694116127Smux			 * We will either break out with an error or we will
695116127Smux			 * sleep and relock to loop.
69643623Sdillon			 */
69747748Salc			pipeunlock(rpipe);
69843623Sdillon
69913675Sdyson			/*
70047748Salc			 * Handle non-blocking mode operation or
70147748Salc			 * wait for more data.
70213675Sdyson			 */
70376760Salfred			if (fp->f_flag & FNONBLOCK) {
70447748Salc				error = EAGAIN;
70576760Salfred			} else {
70647748Salc				rpipe->pipe_state |= PIPE_WANTR;
70791362Salfred				if ((error = msleep(rpipe, PIPE_MTX(rpipe),
70891362Salfred				    PRIBIO | PCATCH,
70977140Salfred				    "piperd", 0)) == 0)
71047748Salc					error = pipelock(rpipe, 1);
71113675Sdyson			}
71247748Salc			if (error)
71347748Salc				goto unlocked_error;
71413675Sdyson		}
71513675Sdyson	}
716101768Srwatson#ifdef MAC
717101768Srwatsonlocked_error:
718101768Srwatson#endif
71947748Salc	pipeunlock(rpipe);
72013675Sdyson
72191362Salfred	/* XXX: should probably do this before getting any locks. */
72224101Sbde	if (error == 0)
72355112Sbde		vfs_timestamp(&rpipe->pipe_atime);
72447748Salcunlocked_error:
72547748Salc	--rpipe->pipe_busy;
72613913Sdyson
72747748Salc	/*
72847748Salc	 * PIPE_WANT processing only makes sense if pipe_busy is 0.
72947748Salc	 */
73013675Sdyson	if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
73113675Sdyson		rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
73213675Sdyson		wakeup(rpipe);
73313675Sdyson	} else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
73413675Sdyson		/*
73547748Salc		 * Handle write blocking hysteresis.
73613675Sdyson		 */
73713675Sdyson		if (rpipe->pipe_state & PIPE_WANTW) {
73813675Sdyson			rpipe->pipe_state &= ~PIPE_WANTW;
73913675Sdyson			wakeup(rpipe);
74013675Sdyson		}
74113675Sdyson	}
74214037Sdyson
74314802Sdyson	if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF)
74414037Sdyson		pipeselwakeup(rpipe);
74514037Sdyson
74691362Salfred	PIPE_UNLOCK(rpipe);
74776760Salfred	return (error);
74813675Sdyson}
74913675Sdyson
75014037Sdyson#ifndef PIPE_NODIRECT
75113907Sdyson/*
75213907Sdyson * Map the sending processes' buffer into kernel space and wire it.
75313907Sdyson * This is similar to a physical write operation.
75413907Sdyson */
75513675Sdysonstatic int
75613907Sdysonpipe_build_write_buffer(wpipe, uio)
75713907Sdyson	struct pipe *wpipe;
75813675Sdyson	struct uio *uio;
75913675Sdyson{
760119872Salc	pmap_t pmap;
76118863Sdyson	u_int size;
762119872Salc	int i, j;
763112569Sjake	vm_offset_t addr, endaddr;
76413907Sdyson
76591412Salfred	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
766133790Ssilby	KASSERT(wpipe->pipe_state & PIPE_DIRECTW,
767133790Ssilby		("Clone attempt on non-direct write pipe!"));
76879224Sdillon
76918863Sdyson	size = (u_int) uio->uio_iov->iov_len;
77013907Sdyson	if (size > wpipe->pipe_buffer.size)
77113907Sdyson		size = wpipe->pipe_buffer.size;
77213907Sdyson
773119872Salc	pmap = vmspace_pmap(curproc->p_vmspace);
77440286Sdg	endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
77576760Salfred	addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
77676760Salfred	for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
77799899Salc		/*
77899899Salc		 * vm_fault_quick() can sleep.  Consequently,
77999899Salc		 * vm_page_lock_queue() and vm_page_unlock_queue()
78099899Salc		 * should not be performed outside of this loop.
78199899Salc		 */
782119872Salc	race:
783119872Salc		if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) {
78499899Salc			vm_page_lock_queues();
785119872Salc			for (j = 0; j < i; j++)
786118757Salc				vm_page_unhold(wpipe->pipe_map.ms[j]);
78799899Salc			vm_page_unlock_queues();
78876760Salfred			return (EFAULT);
78913907Sdyson		}
790120000Salc		wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr,
791120000Salc		    VM_PROT_READ);
792119872Salc		if (wpipe->pipe_map.ms[i] == NULL)
793119872Salc			goto race;
79413907Sdyson	}
79513907Sdyson
79613907Sdyson/*
79713907Sdyson * set up the control block
79813907Sdyson */
79913907Sdyson	wpipe->pipe_map.npages = i;
80076760Salfred	wpipe->pipe_map.pos =
80176760Salfred	    ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
80213907Sdyson	wpipe->pipe_map.cnt = size;
80313907Sdyson
80413907Sdyson/*
80513907Sdyson * and update the uio data
80613907Sdyson */
80713907Sdyson
80813907Sdyson	uio->uio_iov->iov_len -= size;
809104908Smike	uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size;
81013907Sdyson	if (uio->uio_iov->iov_len == 0)
81113907Sdyson		uio->uio_iov++;
81213907Sdyson	uio->uio_resid -= size;
81313907Sdyson	uio->uio_offset += size;
81476760Salfred	return (0);
81513907Sdyson}
81613907Sdyson
81713907Sdyson/*
81813907Sdyson * unmap and unwire the process buffer
81913907Sdyson */
82013907Sdysonstatic void
82113907Sdysonpipe_destroy_write_buffer(wpipe)
82276760Salfred	struct pipe *wpipe;
82313907Sdyson{
82413907Sdyson	int i;
82576364Salfred
826127501Salc	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
82799899Salc	vm_page_lock_queues();
828117325Ssilby	for (i = 0; i < wpipe->pipe_map.npages; i++) {
829118757Salc		vm_page_unhold(wpipe->pipe_map.ms[i]);
830117325Ssilby	}
83199899Salc	vm_page_unlock_queues();
83291653Stanimura	wpipe->pipe_map.npages = 0;
83313907Sdyson}
83413907Sdyson
83513907Sdyson/*
83613907Sdyson * In the case of a signal, the writing process might go away.  This
83713907Sdyson * code copies the data into the circular buffer so that the source
83813907Sdyson * pages can be freed without loss of data.
83913907Sdyson */
84013907Sdysonstatic void
84113907Sdysonpipe_clone_write_buffer(wpipe)
84276364Salfred	struct pipe *wpipe;
84313907Sdyson{
844127501Salc	struct uio uio;
845127501Salc	struct iovec iov;
84613907Sdyson	int size;
84713907Sdyson	int pos;
84813907Sdyson
84991362Salfred	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
85013907Sdyson	size = wpipe->pipe_map.cnt;
85113907Sdyson	pos = wpipe->pipe_map.pos;
85213907Sdyson
85313907Sdyson	wpipe->pipe_buffer.in = size;
85413907Sdyson	wpipe->pipe_buffer.out = 0;
85513907Sdyson	wpipe->pipe_buffer.cnt = size;
85613907Sdyson	wpipe->pipe_state &= ~PIPE_DIRECTW;
85713907Sdyson
858119811Salc	PIPE_UNLOCK(wpipe);
859127501Salc	iov.iov_base = wpipe->pipe_buffer.buffer;
860127501Salc	iov.iov_len = size;
861127501Salc	uio.uio_iov = &iov;
862127501Salc	uio.uio_iovcnt = 1;
863127501Salc	uio.uio_offset = 0;
864127501Salc	uio.uio_resid = size;
865127501Salc	uio.uio_segflg = UIO_SYSSPACE;
866127501Salc	uio.uio_rw = UIO_READ;
867127501Salc	uio.uio_td = curthread;
868127501Salc	uiomove_fromphys(wpipe->pipe_map.ms, pos, size, &uio);
869127501Salc	PIPE_LOCK(wpipe);
87013907Sdyson	pipe_destroy_write_buffer(wpipe);
87113907Sdyson}
87213907Sdyson
87313907Sdyson/*
87413907Sdyson * This implements the pipe buffer write mechanism.  Note that only
87513907Sdyson * a direct write OR a normal pipe write can be pending at any given time.
87613907Sdyson * If there are any characters in the pipe buffer, the direct write will
87713907Sdyson * be deferred until the receiving process grabs all of the bytes from
87813907Sdyson * the pipe buffer.  Then the direct mapping write is set-up.
87913907Sdyson */
88013907Sdysonstatic int
88113907Sdysonpipe_direct_write(wpipe, uio)
88213907Sdyson	struct pipe *wpipe;
88313907Sdyson	struct uio *uio;
88413907Sdyson{
88513907Sdyson	int error;
88676364Salfred
88713951Sdysonretry:
88891362Salfred	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
889133049Ssilby	error = pipelock(wpipe, 1);
890133049Ssilby	if (wpipe->pipe_state & PIPE_EOF)
891133049Ssilby		error = EPIPE;
892133049Ssilby	if (error) {
893133049Ssilby		pipeunlock(wpipe);
894133049Ssilby		goto error1;
895133049Ssilby	}
89613907Sdyson	while (wpipe->pipe_state & PIPE_DIRECTW) {
89776760Salfred		if (wpipe->pipe_state & PIPE_WANTR) {
89813951Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
89913951Sdyson			wakeup(wpipe);
90013951Sdyson		}
90113992Sdyson		wpipe->pipe_state |= PIPE_WANTW;
902133049Ssilby		pipeunlock(wpipe);
90391362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe),
90491362Salfred		    PRIBIO | PCATCH, "pipdww", 0);
90514802Sdyson		if (error)
90613907Sdyson			goto error1;
907133049Ssilby		else
908133049Ssilby			goto retry;
90913907Sdyson	}
91013907Sdyson	wpipe->pipe_map.cnt = 0;	/* transfer not ready yet */
91113951Sdyson	if (wpipe->pipe_buffer.cnt > 0) {
91276760Salfred		if (wpipe->pipe_state & PIPE_WANTR) {
91313951Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
91413951Sdyson			wakeup(wpipe);
91513951Sdyson		}
91613992Sdyson		wpipe->pipe_state |= PIPE_WANTW;
917133049Ssilby		pipeunlock(wpipe);
91891362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe),
91991362Salfred		    PRIBIO | PCATCH, "pipdwc", 0);
92014802Sdyson		if (error)
92113907Sdyson			goto error1;
922133049Ssilby		else
923133049Ssilby			goto retry;
92413907Sdyson	}
92513907Sdyson
92613951Sdyson	wpipe->pipe_state |= PIPE_DIRECTW;
92713951Sdyson
928119872Salc	PIPE_UNLOCK(wpipe);
92913907Sdyson	error = pipe_build_write_buffer(wpipe, uio);
930119872Salc	PIPE_LOCK(wpipe);
93113907Sdyson	if (error) {
93213907Sdyson		wpipe->pipe_state &= ~PIPE_DIRECTW;
933133049Ssilby		pipeunlock(wpipe);
93413907Sdyson		goto error1;
93513907Sdyson	}
93613907Sdyson
93713907Sdyson	error = 0;
93813907Sdyson	while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
93913907Sdyson		if (wpipe->pipe_state & PIPE_EOF) {
94013907Sdyson			pipe_destroy_write_buffer(wpipe);
941112981Shsu			pipeselwakeup(wpipe);
94213907Sdyson			pipeunlock(wpipe);
94314802Sdyson			error = EPIPE;
94414802Sdyson			goto error1;
94513907Sdyson		}
94613992Sdyson		if (wpipe->pipe_state & PIPE_WANTR) {
94713992Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
94813992Sdyson			wakeup(wpipe);
94913992Sdyson		}
95014037Sdyson		pipeselwakeup(wpipe);
951133049Ssilby		pipeunlock(wpipe);
95291362Salfred		error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
95391362Salfred		    "pipdwt", 0);
954133049Ssilby		pipelock(wpipe, 0);
95513907Sdyson	}
95613907Sdyson
957126131Sgreen	if (wpipe->pipe_state & PIPE_EOF)
958126131Sgreen		error = EPIPE;
95913907Sdyson	if (wpipe->pipe_state & PIPE_DIRECTW) {
96013907Sdyson		/*
96113907Sdyson		 * this bit of trickery substitutes a kernel buffer for
96213907Sdyson		 * the process that might be going away.
96313907Sdyson		 */
96413907Sdyson		pipe_clone_write_buffer(wpipe);
96513907Sdyson	} else {
96613907Sdyson		pipe_destroy_write_buffer(wpipe);
96713907Sdyson	}
96813907Sdyson	pipeunlock(wpipe);
96976760Salfred	return (error);
97013907Sdyson
97113907Sdysonerror1:
97213907Sdyson	wakeup(wpipe);
97376760Salfred	return (error);
97413907Sdyson}
97514037Sdyson#endif
976124394Sdes
97716960Sdysonstatic int
978101941Srwatsonpipe_write(fp, uio, active_cred, flags, td)
97916960Sdyson	struct file *fp;
98013907Sdyson	struct uio *uio;
981101941Srwatson	struct ucred *active_cred;
98283366Sjulian	struct thread *td;
98345311Sdt	int flags;
98413907Sdyson{
98513675Sdyson	int error = 0;
986133790Ssilby	int desiredsize, orig_resid;
98716960Sdyson	struct pipe *wpipe, *rpipe;
98816960Sdyson
989109153Sdillon	rpipe = fp->f_data;
99016960Sdyson	wpipe = rpipe->pipe_peer;
99116960Sdyson
99291395Salfred	PIPE_LOCK(rpipe);
993133049Ssilby	error = pipelock(wpipe, 1);
994133049Ssilby	if (error) {
995133049Ssilby		PIPE_UNLOCK(rpipe);
996133049Ssilby		return (error);
997133049Ssilby	}
99813675Sdyson	/*
99913675Sdyson	 * detect loss of pipe read side, issue SIGPIPE if lost.
100013675Sdyson	 */
1001125364Srwatson	if ((!wpipe->pipe_present) || (wpipe->pipe_state & PIPE_EOF)) {
1002133049Ssilby		pipeunlock(wpipe);
100391395Salfred		PIPE_UNLOCK(rpipe);
100476760Salfred		return (EPIPE);
100513675Sdyson	}
1006101768Srwatson#ifdef MAC
1007125293Srwatson	error = mac_check_pipe_write(active_cred, wpipe->pipe_pair);
1008101768Srwatson	if (error) {
1009133049Ssilby		pipeunlock(wpipe);
1010101768Srwatson		PIPE_UNLOCK(rpipe);
1011101768Srwatson		return (error);
1012101768Srwatson	}
1013101768Srwatson#endif
101477676Sdillon	++wpipe->pipe_busy;
101513675Sdyson
1016133790Ssilby	/* Choose a larger size if it's advantageous */
1017133790Ssilby	desiredsize = max(SMALL_PIPE_SIZE, wpipe->pipe_buffer.size);
1018133790Ssilby	while (desiredsize < wpipe->pipe_buffer.cnt + uio->uio_resid) {
1019133790Ssilby		if (piperesizeallowed != 1)
1020133790Ssilby			break;
1021133790Ssilby		if (amountpipekva > maxpipekva / 2)
1022133790Ssilby			break;
1023133790Ssilby		if (desiredsize == BIG_PIPE_SIZE)
1024133790Ssilby			break;
1025133790Ssilby		desiredsize = desiredsize * 2;
1026133790Ssilby	}
102717163Sdyson
1028133790Ssilby	/* Choose a smaller size if we're in a OOM situation */
1029133790Ssilby	if ((amountpipekva > (3 * maxpipekva) / 4) &&
1030133790Ssilby		(wpipe->pipe_buffer.size > SMALL_PIPE_SIZE) &&
1031133790Ssilby		(wpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) &&
1032133790Ssilby		(piperesizeallowed == 1))
1033133790Ssilby		desiredsize = SMALL_PIPE_SIZE;
1034133790Ssilby
1035133790Ssilby	/* Resize if the above determined that a new size was necessary */
1036133790Ssilby	if ((desiredsize != wpipe->pipe_buffer.size) &&
1037133790Ssilby		((wpipe->pipe_state & PIPE_DIRECTW) == 0)) {
1038133049Ssilby		PIPE_UNLOCK(wpipe);
1039133790Ssilby		pipespace(wpipe, desiredsize);
1040133049Ssilby		PIPE_LOCK(wpipe);
104113907Sdyson	}
1042133790Ssilby	if (wpipe->pipe_buffer.size == 0) {
1043133790Ssilby		/*
1044133790Ssilby		 * This can only happen for reverse direction use of pipes
1045133790Ssilby		 * in a complete OOM situation.
1046133790Ssilby		 */
1047133790Ssilby		error = ENOMEM;
1048133790Ssilby		--wpipe->pipe_busy;
1049133790Ssilby		pipeunlock(wpipe);
1050133790Ssilby		PIPE_UNLOCK(wpipe);
1051133790Ssilby		return (error);
1052133790Ssilby	}
105377676Sdillon
1054133049Ssilby	pipeunlock(wpipe);
1055124394Sdes
105613913Sdyson	orig_resid = uio->uio_resid;
105777676Sdillon
105813675Sdyson	while (uio->uio_resid) {
105913907Sdyson		int space;
106076760Salfred
1061133049Ssilby		pipelock(wpipe, 0);
1062133049Ssilby		if (wpipe->pipe_state & PIPE_EOF) {
1063133049Ssilby			pipeunlock(wpipe);
1064133049Ssilby			error = EPIPE;
1065133049Ssilby			break;
1066133049Ssilby		}
106714037Sdyson#ifndef PIPE_NODIRECT
106813907Sdyson		/*
106913907Sdyson		 * If the transfer is large, we can gain performance if
107013907Sdyson		 * we do process-to-process copies directly.
107116416Sdyson		 * If the write is non-blocking, we don't use the
107216416Sdyson		 * direct write mechanism.
107358505Sdillon		 *
107458505Sdillon		 * The direct write mechanism will detect the reader going
107558505Sdillon		 * away on us.
107613907Sdyson		 */
1077165347Spjd		if (uio->uio_segflg == UIO_USERSPACE &&
1078165347Spjd		    uio->uio_iov->iov_len >= PIPE_MINDIRECT &&
1079165347Spjd		    wpipe->pipe_buffer.size >= PIPE_MINDIRECT &&
1080127501Salc		    (fp->f_flag & FNONBLOCK) == 0) {
1081133049Ssilby			pipeunlock(wpipe);
1082105009Salfred			error = pipe_direct_write(wpipe, uio);
108376760Salfred			if (error)
108413907Sdyson				break;
108513907Sdyson			continue;
108691362Salfred		}
108714037Sdyson#endif
108813907Sdyson
108913907Sdyson		/*
109013907Sdyson		 * Pipe buffered writes cannot be coincidental with
109113907Sdyson		 * direct writes.  We wait until the currently executing
109213907Sdyson		 * direct write is completed before we start filling the
109358505Sdillon		 * pipe buffer.  We break out if a signal occurs or the
109458505Sdillon		 * reader goes away.
109513907Sdyson		 */
1096133049Ssilby		if (wpipe->pipe_state & PIPE_DIRECTW) {
109713992Sdyson			if (wpipe->pipe_state & PIPE_WANTR) {
109813992Sdyson				wpipe->pipe_state &= ~PIPE_WANTR;
109913992Sdyson				wakeup(wpipe);
110013992Sdyson			}
1101133049Ssilby			pipeunlock(wpipe);
110291395Salfred			error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH,
110391362Salfred			    "pipbww", 0);
110413907Sdyson			if (error)
110513907Sdyson				break;
1106133049Ssilby			else
1107133049Ssilby				continue;
110813907Sdyson		}
110913907Sdyson
111013907Sdyson		space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
111114644Sdyson
111214644Sdyson		/* Writes of size <= PIPE_BUF must be atomic. */
111313913Sdyson		if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF))
111413913Sdyson			space = 0;
111513907Sdyson
1116118230Spb		if (space > 0) {
1117133049Ssilby			int size;	/* Transfer size */
1118133049Ssilby			int segsize;	/* first segment to transfer */
111976760Salfred
1120133049Ssilby			/*
1121133049Ssilby			 * Transfer size is minimum of uio transfer
1122133049Ssilby			 * and free space in pipe buffer.
1123133049Ssilby			 */
1124133049Ssilby			if (space > uio->uio_resid)
1125133049Ssilby				size = uio->uio_resid;
1126133049Ssilby			else
1127133049Ssilby				size = space;
1128133049Ssilby			/*
1129133049Ssilby			 * First segment to transfer is minimum of
1130133049Ssilby			 * transfer size and contiguous space in
1131133049Ssilby			 * pipe buffer.  If first segment to transfer
1132133049Ssilby			 * is less than the transfer size, we've got
1133133049Ssilby			 * a wraparound in the buffer.
1134133049Ssilby			 */
1135133049Ssilby			segsize = wpipe->pipe_buffer.size -
1136133049Ssilby				wpipe->pipe_buffer.in;
1137133049Ssilby			if (segsize > size)
1138133049Ssilby				segsize = size;
113954534Stegge
1140133049Ssilby			/* Transfer first segment */
1141133049Ssilby
1142133049Ssilby			PIPE_UNLOCK(rpipe);
1143133049Ssilby			error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
1144133049Ssilby					segsize, uio);
1145133049Ssilby			PIPE_LOCK(rpipe);
1146133049Ssilby
1147133049Ssilby			if (error == 0 && segsize < size) {
1148133049Ssilby				KASSERT(wpipe->pipe_buffer.in + segsize ==
1149133049Ssilby					wpipe->pipe_buffer.size,
1150133049Ssilby					("Pipe buffer wraparound disappeared"));
115154534Stegge				/*
1152133049Ssilby				 * Transfer remaining part now, to
1153133049Ssilby				 * support atomic writes.  Wraparound
1154133049Ssilby				 * happened.
115554534Stegge				 */
1156124394Sdes
115791395Salfred				PIPE_UNLOCK(rpipe);
1158133049Ssilby				error = uiomove(
1159133049Ssilby				    &wpipe->pipe_buffer.buffer[0],
1160133049Ssilby				    size - segsize, uio);
116191395Salfred				PIPE_LOCK(rpipe);
1162133049Ssilby			}
1163133049Ssilby			if (error == 0) {
1164133049Ssilby				wpipe->pipe_buffer.in += size;
1165133049Ssilby				if (wpipe->pipe_buffer.in >=
1166133049Ssilby				    wpipe->pipe_buffer.size) {
1167133049Ssilby					KASSERT(wpipe->pipe_buffer.in ==
1168133049Ssilby						size - segsize +
1169133049Ssilby						wpipe->pipe_buffer.size,
1170133049Ssilby						("Expected wraparound bad"));
1171133049Ssilby					wpipe->pipe_buffer.in = size - segsize;
117254534Stegge				}
1173124394Sdes
1174133049Ssilby				wpipe->pipe_buffer.cnt += size;
1175133049Ssilby				KASSERT(wpipe->pipe_buffer.cnt <=
1176133049Ssilby					wpipe->pipe_buffer.size,
1177133049Ssilby					("Pipe buffer overflow"));
117813675Sdyson			}
1179133049Ssilby			pipeunlock(wpipe);
1180153484Sdelphij			if (error != 0)
1181153484Sdelphij				break;
118213675Sdyson		} else {
118313675Sdyson			/*
118413675Sdyson			 * If the "read-side" has been blocked, wake it up now.
118513675Sdyson			 */
118613675Sdyson			if (wpipe->pipe_state & PIPE_WANTR) {
118713675Sdyson				wpipe->pipe_state &= ~PIPE_WANTR;
118813675Sdyson				wakeup(wpipe);
118913675Sdyson			}
119014037Sdyson
119113675Sdyson			/*
119213675Sdyson			 * don't block on non-blocking I/O
119313675Sdyson			 */
119416960Sdyson			if (fp->f_flag & FNONBLOCK) {
119513907Sdyson				error = EAGAIN;
1196133049Ssilby				pipeunlock(wpipe);
119713675Sdyson				break;
119813675Sdyson			}
119913907Sdyson
120014037Sdyson			/*
120114037Sdyson			 * We have no more space and have something to offer,
120229356Speter			 * wake up select/poll.
120314037Sdyson			 */
120414037Sdyson			pipeselwakeup(wpipe);
120514037Sdyson
120613675Sdyson			wpipe->pipe_state |= PIPE_WANTW;
1207133049Ssilby			pipeunlock(wpipe);
120891395Salfred			error = msleep(wpipe, PIPE_MTX(rpipe),
120991362Salfred			    PRIBIO | PCATCH, "pipewr", 0);
121076760Salfred			if (error != 0)
121113675Sdyson				break;
121213675Sdyson		}
121313675Sdyson	}
121413675Sdyson
1215133049Ssilby	pipelock(wpipe, 0);
121614644Sdyson	--wpipe->pipe_busy;
121777676Sdillon
121876760Salfred	if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
121976760Salfred		wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
122013675Sdyson		wakeup(wpipe);
122113675Sdyson	} else if (wpipe->pipe_buffer.cnt > 0) {
122213675Sdyson		/*
122313675Sdyson		 * If we have put any characters in the buffer, we wake up
122413675Sdyson		 * the reader.
122513675Sdyson		 */
122613675Sdyson		if (wpipe->pipe_state & PIPE_WANTR) {
122713675Sdyson			wpipe->pipe_state &= ~PIPE_WANTR;
122813675Sdyson			wakeup(wpipe);
122913675Sdyson		}
123013675Sdyson	}
123113909Sdyson
123213909Sdyson	/*
123313909Sdyson	 * Don't return EPIPE if I/O was successful
123413909Sdyson	 */
123513907Sdyson	if ((wpipe->pipe_buffer.cnt == 0) &&
123677676Sdillon	    (uio->uio_resid == 0) &&
123777676Sdillon	    (error == EPIPE)) {
123813907Sdyson		error = 0;
123977676Sdillon	}
124013913Sdyson
124124101Sbde	if (error == 0)
124255112Sbde		vfs_timestamp(&wpipe->pipe_mtime);
124324101Sbde
124414037Sdyson	/*
124514037Sdyson	 * We have something to offer,
124629356Speter	 * wake up select/poll.
124714037Sdyson	 */
124814177Sdyson	if (wpipe->pipe_buffer.cnt)
124914037Sdyson		pipeselwakeup(wpipe);
125013907Sdyson
1251133049Ssilby	pipeunlock(wpipe);
125291395Salfred	PIPE_UNLOCK(rpipe);
125376760Salfred	return (error);
125413675Sdyson}
125513675Sdyson
125613675Sdyson/*
125713675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets.
125813675Sdyson */
1259104094Sphkstatic int
1260102003Srwatsonpipe_ioctl(fp, cmd, data, active_cred, td)
126113675Sdyson	struct file *fp;
126236735Sdfr	u_long cmd;
126399009Salfred	void *data;
1264102003Srwatson	struct ucred *active_cred;
126583366Sjulian	struct thread *td;
126613675Sdyson{
1267109153Sdillon	struct pipe *mpipe = fp->f_data;
1268101768Srwatson	int error;
126913675Sdyson
1270104269Srwatson	PIPE_LOCK(mpipe);
1271104269Srwatson
1272104269Srwatson#ifdef MAC
1273125293Srwatson	error = mac_check_pipe_ioctl(active_cred, mpipe->pipe_pair, cmd, data);
1274121970Srwatson	if (error) {
1275121970Srwatson		PIPE_UNLOCK(mpipe);
1276101768Srwatson		return (error);
1277121970Srwatson	}
1278101768Srwatson#endif
1279101768Srwatson
1280137752Sphk	error = 0;
128113675Sdyson	switch (cmd) {
128213675Sdyson
128313675Sdyson	case FIONBIO:
1284137752Sphk		break;
128513675Sdyson
128613675Sdyson	case FIOASYNC:
128713675Sdyson		if (*(int *)data) {
128813675Sdyson			mpipe->pipe_state |= PIPE_ASYNC;
128913675Sdyson		} else {
129013675Sdyson			mpipe->pipe_state &= ~PIPE_ASYNC;
129113675Sdyson		}
1292137752Sphk		break;
129313675Sdyson
129413675Sdyson	case FIONREAD:
129514037Sdyson		if (mpipe->pipe_state & PIPE_DIRECTW)
129614037Sdyson			*(int *)data = mpipe->pipe_map.cnt;
129714037Sdyson		else
129814037Sdyson			*(int *)data = mpipe->pipe_buffer.cnt;
1299137752Sphk		break;
130013675Sdyson
130141086Struckman	case FIOSETOWN:
1302138032Srwatson		PIPE_UNLOCK(mpipe);
1303137752Sphk		error = fsetown(*(int *)data, &mpipe->pipe_sigio);
1304138032Srwatson		goto out_unlocked;
130541086Struckman
130641086Struckman	case FIOGETOWN:
1307104393Struckman		*(int *)data = fgetown(&mpipe->pipe_sigio);
1308137752Sphk		break;
130913675Sdyson
131041086Struckman	/* This is deprecated, FIOSETOWN should be used instead. */
131141086Struckman	case TIOCSPGRP:
1312138032Srwatson		PIPE_UNLOCK(mpipe);
1313137752Sphk		error = fsetown(-(*(int *)data), &mpipe->pipe_sigio);
1314138032Srwatson		goto out_unlocked;
131541086Struckman
131641086Struckman	/* This is deprecated, FIOGETOWN should be used instead. */
131718863Sdyson	case TIOCGPGRP:
1318104393Struckman		*(int *)data = -fgetown(&mpipe->pipe_sigio);
1319137752Sphk		break;
132013675Sdyson
1321137752Sphk	default:
1322137752Sphk		error = ENOTTY;
1323137764Sphk		break;
132413675Sdyson	}
1325104269Srwatson	PIPE_UNLOCK(mpipe);
1326138032Srwatsonout_unlocked:
1327137752Sphk	return (error);
132813675Sdyson}
132913675Sdyson
1330104094Sphkstatic int
1331101983Srwatsonpipe_poll(fp, events, active_cred, td)
133213675Sdyson	struct file *fp;
133329356Speter	int events;
1334101983Srwatson	struct ucred *active_cred;
133583366Sjulian	struct thread *td;
133613675Sdyson{
1337109153Sdillon	struct pipe *rpipe = fp->f_data;
133813675Sdyson	struct pipe *wpipe;
133929356Speter	int revents = 0;
1340101768Srwatson#ifdef MAC
1341101768Srwatson	int error;
1342101768Srwatson#endif
134313675Sdyson
134413675Sdyson	wpipe = rpipe->pipe_peer;
134591362Salfred	PIPE_LOCK(rpipe);
1346101768Srwatson#ifdef MAC
1347125293Srwatson	error = mac_check_pipe_poll(active_cred, rpipe->pipe_pair);
1348101768Srwatson	if (error)
1349101768Srwatson		goto locked_error;
1350101768Srwatson#endif
135129356Speter	if (events & (POLLIN | POLLRDNORM))
135229356Speter		if ((rpipe->pipe_state & PIPE_DIRECTW) ||
135329356Speter		    (rpipe->pipe_buffer.cnt > 0) ||
135429356Speter		    (rpipe->pipe_state & PIPE_EOF))
135529356Speter			revents |= events & (POLLIN | POLLRDNORM);
135613675Sdyson
135729356Speter	if (events & (POLLOUT | POLLWRNORM))
1358125364Srwatson		if (!wpipe->pipe_present || (wpipe->pipe_state & PIPE_EOF) ||
135943311Sdillon		    (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
136043311Sdillon		     (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF))
136129356Speter			revents |= events & (POLLOUT | POLLWRNORM);
136213675Sdyson
136329356Speter	if ((rpipe->pipe_state & PIPE_EOF) ||
1364125364Srwatson	    (!wpipe->pipe_present) ||
136529356Speter	    (wpipe->pipe_state & PIPE_EOF))
136629356Speter		revents |= POLLHUP;
136729356Speter
136829356Speter	if (revents == 0) {
136929356Speter		if (events & (POLLIN | POLLRDNORM)) {
137083805Sjhb			selrecord(td, &rpipe->pipe_sel);
137129356Speter			rpipe->pipe_state |= PIPE_SEL;
137213675Sdyson		}
137313675Sdyson
137429356Speter		if (events & (POLLOUT | POLLWRNORM)) {
137583805Sjhb			selrecord(td, &wpipe->pipe_sel);
137630164Speter			wpipe->pipe_state |= PIPE_SEL;
137713907Sdyson		}
137813675Sdyson	}
1379101768Srwatson#ifdef MAC
1380101768Srwatsonlocked_error:
1381101768Srwatson#endif
138291362Salfred	PIPE_UNLOCK(rpipe);
138329356Speter
138429356Speter	return (revents);
138513675Sdyson}
138613675Sdyson
138798989Salfred/*
138898989Salfred * We shouldn't need locks here as we're doing a read and this should
138998989Salfred * be a natural race.
139098989Salfred */
139152983Speterstatic int
1392101983Srwatsonpipe_stat(fp, ub, active_cred, td)
139352983Speter	struct file *fp;
139452983Speter	struct stat *ub;
1395101983Srwatson	struct ucred *active_cred;
139683366Sjulian	struct thread *td;
139713675Sdyson{
1398109153Sdillon	struct pipe *pipe = fp->f_data;
1399101768Srwatson#ifdef MAC
1400101768Srwatson	int error;
140152983Speter
1402104269Srwatson	PIPE_LOCK(pipe);
1403125293Srwatson	error = mac_check_pipe_stat(active_cred, pipe->pipe_pair);
1404104269Srwatson	PIPE_UNLOCK(pipe);
1405101768Srwatson	if (error)
1406101768Srwatson		return (error);
1407101768Srwatson#endif
1408100527Salfred	bzero(ub, sizeof(*ub));
140917124Sbde	ub->st_mode = S_IFIFO;
1410133790Ssilby	ub->st_blksize = PAGE_SIZE;
1411132436Ssilby	if (pipe->pipe_state & PIPE_DIRECTW)
1412132436Ssilby		ub->st_size = pipe->pipe_map.cnt;
1413132436Ssilby	else
1414132436Ssilby		ub->st_size = pipe->pipe_buffer.cnt;
141513675Sdyson	ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
141634901Sphk	ub->st_atimespec = pipe->pipe_atime;
141734901Sphk	ub->st_mtimespec = pipe->pipe_mtime;
141834901Sphk	ub->st_ctimespec = pipe->pipe_ctime;
141960404Schris	ub->st_uid = fp->f_cred->cr_uid;
142060404Schris	ub->st_gid = fp->f_cred->cr_gid;
142117124Sbde	/*
142260404Schris	 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
142317124Sbde	 * XXX (st_dev, st_ino) should be unique.
142417124Sbde	 */
142576760Salfred	return (0);
142613675Sdyson}
142713675Sdyson
142813675Sdyson/* ARGSUSED */
142913675Sdysonstatic int
143083366Sjulianpipe_close(fp, td)
143113675Sdyson	struct file *fp;
143283366Sjulian	struct thread *td;
143313675Sdyson{
1434109153Sdillon	struct pipe *cpipe = fp->f_data;
143516322Sgpalmer
143649413Sgreen	fp->f_ops = &badfileops;
1437109153Sdillon	fp->f_data = NULL;
143896122Salfred	funsetown(&cpipe->pipe_sigio);
143913675Sdyson	pipeclose(cpipe);
144076760Salfred	return (0);
144113675Sdyson}
144213675Sdyson
144376364Salfredstatic void
144476364Salfredpipe_free_kmem(cpipe)
144576364Salfred	struct pipe *cpipe;
144676364Salfred{
144791412Salfred
1448125293Srwatson	KASSERT(!mtx_owned(PIPE_MTX(cpipe)),
1449125293Srwatson	    ("pipe_free_kmem: pipe mutex locked"));
145076364Salfred
145176364Salfred	if (cpipe->pipe_buffer.buffer != NULL) {
1452110816Salc		atomic_subtract_int(&amountpipekva, cpipe->pipe_buffer.size);
1453118764Ssilby		vm_map_remove(pipe_map,
1454118764Ssilby		    (vm_offset_t)cpipe->pipe_buffer.buffer,
1455118764Ssilby		    (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size);
145676364Salfred		cpipe->pipe_buffer.buffer = NULL;
145776364Salfred	}
145876364Salfred#ifndef PIPE_NODIRECT
1459127501Salc	{
146076364Salfred		cpipe->pipe_map.cnt = 0;
146176364Salfred		cpipe->pipe_map.pos = 0;
146276364Salfred		cpipe->pipe_map.npages = 0;
146376364Salfred	}
146476364Salfred#endif
146576364Salfred}
146676364Salfred
146713675Sdyson/*
146813675Sdyson * shutdown the pipe
146913675Sdyson */
147013675Sdysonstatic void
147113675Sdysonpipeclose(cpipe)
147213675Sdyson	struct pipe *cpipe;
147313675Sdyson{
1474125293Srwatson	struct pipepair *pp;
147513907Sdyson	struct pipe *ppipe;
147676364Salfred
1477125293Srwatson	KASSERT(cpipe != NULL, ("pipeclose: cpipe == NULL"));
147891968Salfred
1479125293Srwatson	PIPE_LOCK(cpipe);
1480133049Ssilby	pipelock(cpipe, 0);
1481125293Srwatson	pp = cpipe->pipe_pair;
148291968Salfred
148391968Salfred	pipeselwakeup(cpipe);
148413907Sdyson
148591968Salfred	/*
148691968Salfred	 * If the other side is blocked, wake it up saying that
148791968Salfred	 * we want to close it down.
148891968Salfred	 */
1489126131Sgreen	cpipe->pipe_state |= PIPE_EOF;
149091968Salfred	while (cpipe->pipe_busy) {
149191968Salfred		wakeup(cpipe);
1492126131Sgreen		cpipe->pipe_state |= PIPE_WANT;
1493133049Ssilby		pipeunlock(cpipe);
149491968Salfred		msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
1495133049Ssilby		pipelock(cpipe, 0);
149691968Salfred	}
149713675Sdyson
1498101768Srwatson
149991968Salfred	/*
1500125293Srwatson	 * Disconnect from peer, if any.
150191968Salfred	 */
1502125293Srwatson	ppipe = cpipe->pipe_peer;
1503125293Srwatson	if (ppipe->pipe_present != 0) {
150491968Salfred		pipeselwakeup(ppipe);
150513907Sdyson
150691968Salfred		ppipe->pipe_state |= PIPE_EOF;
150791968Salfred		wakeup(ppipe);
1508133741Sjmg		KNOTE_LOCKED(&ppipe->pipe_sel.si_note, 0);
150991968Salfred	}
1510125293Srwatson
151191968Salfred	/*
1512125293Srwatson	 * Mark this endpoint as free.  Release kmem resources.  We
1513125293Srwatson	 * don't mark this endpoint as unused until we've finished
1514125293Srwatson	 * doing that, or the pipe might disappear out from under
1515125293Srwatson	 * us.
151691968Salfred	 */
1517125293Srwatson	PIPE_UNLOCK(cpipe);
1518125293Srwatson	pipe_free_kmem(cpipe);
1519125293Srwatson	PIPE_LOCK(cpipe);
1520125293Srwatson	cpipe->pipe_present = 0;
1521126131Sgreen	pipeunlock(cpipe);
1522133741Sjmg	knlist_clear(&cpipe->pipe_sel.si_note, 1);
1523133741Sjmg	knlist_destroy(&cpipe->pipe_sel.si_note);
1524125293Srwatson
1525125293Srwatson	/*
1526125293Srwatson	 * If both endpoints are now closed, release the memory for the
1527125293Srwatson	 * pipe pair.  If not, unlock.
1528125293Srwatson	 */
1529125293Srwatson	if (ppipe->pipe_present == 0) {
153091968Salfred		PIPE_UNLOCK(cpipe);
1531125293Srwatson#ifdef MAC
1532125293Srwatson		mac_destroy_pipe(pp);
1533125293Srwatson#endif
1534125293Srwatson		uma_zfree(pipe_zone, cpipe->pipe_pair);
1535125293Srwatson	} else
1536125293Srwatson		PIPE_UNLOCK(cpipe);
153713675Sdyson}
153859288Sjlemon
153972521Sjlemon/*ARGSUSED*/
154059288Sjlemonstatic int
154172521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn)
154259288Sjlemon{
154389306Salfred	struct pipe *cpipe;
154459288Sjlemon
1545109153Sdillon	cpipe = kn->kn_fp->f_data;
1546126131Sgreen	PIPE_LOCK(cpipe);
154772521Sjlemon	switch (kn->kn_filter) {
154872521Sjlemon	case EVFILT_READ:
154972521Sjlemon		kn->kn_fop = &pipe_rfiltops;
155072521Sjlemon		break;
155172521Sjlemon	case EVFILT_WRITE:
155272521Sjlemon		kn->kn_fop = &pipe_wfiltops;
1553126131Sgreen		if (!cpipe->pipe_peer->pipe_present) {
1554101382Sdes			/* other end of pipe has been closed */
1555126131Sgreen			PIPE_UNLOCK(cpipe);
1556118929Sjmg			return (EPIPE);
1557126131Sgreen		}
1558126131Sgreen		cpipe = cpipe->pipe_peer;
155972521Sjlemon		break;
156072521Sjlemon	default:
1561126131Sgreen		PIPE_UNLOCK(cpipe);
1562133741Sjmg		return (EINVAL);
156372521Sjlemon	}
156478292Sjlemon
1565133741Sjmg	knlist_add(&cpipe->pipe_sel.si_note, kn, 1);
156691372Salfred	PIPE_UNLOCK(cpipe);
156759288Sjlemon	return (0);
156859288Sjlemon}
156959288Sjlemon
157059288Sjlemonstatic void
157159288Sjlemonfilt_pipedetach(struct knote *kn)
157259288Sjlemon{
1573121018Sjmg	struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
157459288Sjlemon
1575126131Sgreen	PIPE_LOCK(cpipe);
1576121018Sjmg	if (kn->kn_filter == EVFILT_WRITE) {
1577126131Sgreen		if (!cpipe->pipe_peer->pipe_present) {
1578126131Sgreen			PIPE_UNLOCK(cpipe);
1579121018Sjmg			return;
1580126131Sgreen		}
1581121018Sjmg		cpipe = cpipe->pipe_peer;
1582121018Sjmg	}
1583133741Sjmg	knlist_remove(&cpipe->pipe_sel.si_note, kn, 1);
158491372Salfred	PIPE_UNLOCK(cpipe);
158559288Sjlemon}
158659288Sjlemon
158759288Sjlemon/*ARGSUSED*/
158859288Sjlemonstatic int
158959288Sjlemonfilt_piperead(struct knote *kn, long hint)
159059288Sjlemon{
1591109153Sdillon	struct pipe *rpipe = kn->kn_fp->f_data;
159259288Sjlemon	struct pipe *wpipe = rpipe->pipe_peer;
1593133741Sjmg	int ret;
159459288Sjlemon
159591372Salfred	PIPE_LOCK(rpipe);
159659288Sjlemon	kn->kn_data = rpipe->pipe_buffer.cnt;
159759288Sjlemon	if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
159859288Sjlemon		kn->kn_data = rpipe->pipe_map.cnt;
159959288Sjlemon
160059288Sjlemon	if ((rpipe->pipe_state & PIPE_EOF) ||
1601125364Srwatson	    (!wpipe->pipe_present) || (wpipe->pipe_state & PIPE_EOF)) {
160291372Salfred		kn->kn_flags |= EV_EOF;
160391372Salfred		PIPE_UNLOCK(rpipe);
160459288Sjlemon		return (1);
160559288Sjlemon	}
1606133741Sjmg	ret = kn->kn_data > 0;
160791372Salfred	PIPE_UNLOCK(rpipe);
1608133741Sjmg	return ret;
160959288Sjlemon}
161059288Sjlemon
161159288Sjlemon/*ARGSUSED*/
161259288Sjlemonstatic int
161359288Sjlemonfilt_pipewrite(struct knote *kn, long hint)
161459288Sjlemon{
1615109153Sdillon	struct pipe *rpipe = kn->kn_fp->f_data;
161659288Sjlemon	struct pipe *wpipe = rpipe->pipe_peer;
161759288Sjlemon
161891372Salfred	PIPE_LOCK(rpipe);
1619125364Srwatson	if ((!wpipe->pipe_present) || (wpipe->pipe_state & PIPE_EOF)) {
162059288Sjlemon		kn->kn_data = 0;
1621124394Sdes		kn->kn_flags |= EV_EOF;
162291372Salfred		PIPE_UNLOCK(rpipe);
162359288Sjlemon		return (1);
162459288Sjlemon	}
162559288Sjlemon	kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
162665855Sjlemon	if (wpipe->pipe_state & PIPE_DIRECTW)
162759288Sjlemon		kn->kn_data = 0;
162859288Sjlemon
162991372Salfred	PIPE_UNLOCK(rpipe);
163059288Sjlemon	return (kn->kn_data >= PIPE_BUF);
163159288Sjlemon}
1632