sys_pipe.c revision 54534
1163953Srrs/* 2185694Srrs * Copyright (c) 1996 John S. Dyson 3163953Srrs * All rights reserved. 4163953Srrs * 5163953Srrs * Redistribution and use in source and binary forms, with or without 6163953Srrs * modification, are permitted provided that the following conditions 7163953Srrs * are met: 8163953Srrs * 1. Redistributions of source code must retain the above copyright 9163953Srrs * notice immediately at the beginning of the file, without modification, 10163953Srrs * this list of conditions, and the following disclaimer. 11163953Srrs * 2. Redistributions in binary form must reproduce the above copyright 12163953Srrs * notice, this list of conditions and the following disclaimer in the 13163953Srrs * documentation and/or other materials provided with the distribution. 14163953Srrs * 3. Absolutely no warranty of function or purpose is made by the author 15163953Srrs * John S. Dyson. 16163953Srrs * 4. Modifications may be freely made to this file if the above conditions 17163953Srrs * are met. 18163953Srrs * 19163953Srrs * $FreeBSD: head/sys/kern/sys_pipe.c 54534 1999-12-13 02:55:47Z tegge $ 20163953Srrs */ 21163953Srrs 22163953Srrs/* 23163953Srrs * This file contains a high-performance replacement for the socket-based 24163953Srrs * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 25163953Srrs * all features of sockets, but does do everything that pipes normally 26163953Srrs * do. 27163953Srrs */ 28163953Srrs 29163953Srrs/* 30163953Srrs * This code has two modes of operation, a small write mode and a large 31163953Srrs * write mode. The small write mode acts like conventional pipes with 32163953Srrs * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 33163953Srrs * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 34163953Srrs * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 35166086Srrs * the receiving process can copy it directly from the pages in the sending 36163953Srrs * process. 37163953Srrs * 38163953Srrs * If the sending process receives a signal, it is possible that it will 39163953Srrs * go away, and certainly its address space can change, because control 40167695Srrs * is returned back to the user-mode side. In that case, the pipe code 41167695Srrs * arranges to copy the buffer supplied by the user process, to a pageable 42167598Srrs * kernel buffer, and the receiving process will grab the data from the 43163953Srrs * pageable kernel buffer. Since signals don't happen all that often, 44163953Srrs * the copy operation is normally eliminated. 45163953Srrs * 46163953Srrs * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 47163953Srrs * happen for small transfers so that the system will not spend all of 48163953Srrs * its time context switching. PIPE_SIZE is constrained by the 49163953Srrs * amount of kernel virtual memory. 50170091Srrs */ 51185694Srrs 52164085Srrs#include <sys/param.h> 53163953Srrs#include <sys/systm.h> 54163953Srrs#include <sys/proc.h> 55217611Stuexen#include <sys/fcntl.h> 56217760Stuexen#include <sys/file.h> 57170091Srrs#include <sys/filedesc.h> 58163953Srrs#include <sys/filio.h> 59163953Srrs#include <sys/ttycom.h> 60163953Srrs#include <sys/stat.h> 61163953Srrs#include <sys/poll.h> 62163953Srrs#include <sys/select.h> 63179783Srrs#include <sys/signalvar.h> 64163953Srrs#include <sys/sysproto.h> 65179783Srrs#include <sys/pipe.h> 66179783Srrs#include <sys/uio.h> 67163953Srrs 68179783Srrs#include <vm/vm.h> 69163953Srrs#include <vm/vm_param.h> 70163953Srrs#include <sys/lock.h> 71163953Srrs#include <vm/vm_object.h> 72163953Srrs#include <vm/vm_kern.h> 73163953Srrs#include <vm/vm_extern.h> 74179783Srrs#include <vm/pmap.h> 75170056Srrs#include <vm/vm_map.h> 76163953Srrs#include <vm/vm_page.h> 77163953Srrs#include <vm/vm_zone.h> 78163953Srrs 79163953Srrs/* 80163953Srrs * Use this define if you want to disable *fancy* VM things. Expect an 81179783Srrs * approx 30% decrease in transfer rate. This could be useful for 82163953Srrs * NetBSD or OpenBSD. 83179783Srrs */ 84179783Srrs/* #define PIPE_NODIRECT */ 85179783Srrs 86179783Srrs/* 87179783Srrs * interfaces to the outside world 88179783Srrs */ 89179783Srrsstatic int pipe_read __P((struct file *fp, struct uio *uio, 90179783Srrs struct ucred *cred, int flags, struct proc *p)); 91179783Srrsstatic int pipe_write __P((struct file *fp, struct uio *uio, 92179783Srrs struct ucred *cred, int flags, struct proc *p)); 93163953Srrsstatic int pipe_close __P((struct file *fp, struct proc *p)); 94163953Srrsstatic int pipe_poll __P((struct file *fp, int events, struct ucred *cred, 95179783Srrs struct proc *p)); 96179783Srrsstatic int pipe_stat __P((struct file *fp, struct stat *sb, struct proc *p)); 97179783Srrsstatic int pipe_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct proc *p)); 98179783Srrs 99179783Srrsstatic struct fileops pipeops = 100163953Srrs { pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_stat, pipe_close }; 101166023Srrs 102163953Srrs/* 103179157Srrs * Default pipe buffer size(s), this can be kind-of large now because pipe 104167695Srrs * space is pageable. The pipe code will try to maintain locality of 105163953Srrs * reference for performance reasons, so small amounts of outstanding I/O 106163953Srrs * will not wipe the cache. 107163953Srrs */ 108163953Srrs#define MINPIPESIZE (PIPE_SIZE/3) 109163953Srrs#define MAXPIPESIZE (2*PIPE_SIZE/3) 110197257Stuexen 111163953Srrs/* 112163953Srrs * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 113163953Srrs * is there so that on large systems, we don't exhaust it. 114163953Srrs */ 115197257Stuexen#define MAXPIPEKVA (8*1024*1024) 116197257Stuexen 117197257Stuexen/* 118197257Stuexen * Limit for direct transfers, we cannot, of course limit 119163953Srrs * the amount of kva for pipes in general though. 120197257Stuexen */ 121163953Srrs#define LIMITPIPEKVA (16*1024*1024) 122163953Srrs 123163953Srrs/* 124163953Srrs * Limit the number of "big" pipes 125197257Stuexen */ 126163953Srrs#define LIMITBIGPIPES 32 127163953Srrsstatic int nbigpipe; 128163953Srrs 129163953Srrsstatic int amountpipekva; 130163953Srrs 131190689Srrsstatic void pipeclose __P((struct pipe *cpipe)); 132190689Srrsstatic void pipeinit __P((struct pipe *cpipe)); 133190689Srrsstatic __inline int pipelock __P((struct pipe *cpipe, int catch)); 134190689Srrsstatic __inline void pipeunlock __P((struct pipe *cpipe)); 135163953Srrsstatic __inline void pipeselwakeup __P((struct pipe *cpipe)); 136163953Srrs#ifndef PIPE_NODIRECT 137163953Srrsstatic int pipe_build_write_buffer __P((struct pipe *wpipe, struct uio *uio)); 138163953Srrsstatic void pipe_destroy_write_buffer __P((struct pipe *wpipe)); 139163953Srrsstatic int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); 140179783Srrsstatic void pipe_clone_write_buffer __P((struct pipe *wpipe)); 141170744Srrs#endif 142170744Srrsstatic void pipespace __P((struct pipe *cpipe)); 143170744Srrs 144170744Srrsstatic vm_zone_t pipe_zone; 145170744Srrs 146170744Srrs/* 147163953Srrs * The pipe system call for the DTYPE_PIPE type of pipes 148163953Srrs */ 149163953Srrs 150163953Srrs/* ARGSUSED */ 151163953Srrsint 152163953Srrspipe(p, uap) 153163953Srrs struct proc *p; 154163953Srrs struct pipe_args /* { 155163953Srrs int dummy; 156163953Srrs } */ *uap; 157163953Srrs{ 158163953Srrs register struct filedesc *fdp = p->p_fd; 159163953Srrs struct file *rf, *wf; 160163953Srrs struct pipe *rpipe, *wpipe; 161163953Srrs int fd, error; 162163953Srrs 163163953Srrs if (pipe_zone == NULL) 164163953Srrs pipe_zone = zinit("PIPE", sizeof (struct pipe), 0, 0, 4); 165163953Srrs 166163953Srrs rpipe = zalloc( pipe_zone); 167169420Srrs pipeinit(rpipe); 168163953Srrs rpipe->pipe_state |= PIPE_DIRECTOK; 169169420Srrs wpipe = zalloc( pipe_zone); 170163953Srrs pipeinit(wpipe); 171163953Srrs wpipe->pipe_state |= PIPE_DIRECTOK; 172163953Srrs 173163953Srrs error = falloc(p, &rf, &fd); 174163953Srrs if (error) 175163953Srrs goto free2; 176163953Srrs p->p_retval[0] = fd; 177163953Srrs rf->f_flag = FREAD | FWRITE; 178163953Srrs rf->f_type = DTYPE_PIPE; 179163953Srrs rf->f_data = (caddr_t)rpipe; 180163953Srrs rf->f_ops = &pipeops; 181163953Srrs error = falloc(p, &wf, &fd); 182163953Srrs if (error) 183163953Srrs goto free3; 184163953Srrs wf->f_flag = FREAD | FWRITE; 185163953Srrs wf->f_type = DTYPE_PIPE; 186163953Srrs wf->f_data = (caddr_t)wpipe; 187163953Srrs wf->f_ops = &pipeops; 188163953Srrs p->p_retval[1] = fd; 189163953Srrs 190163953Srrs rpipe->pipe_peer = wpipe; 191171943Srrs wpipe->pipe_peer = rpipe; 192163953Srrs 193163953Srrs return (0); 194163953Srrsfree3: 195163953Srrs fdp->fd_ofiles[p->p_retval[0]] = 0; 196163953Srrs ffree(rf); 197163953Srrsfree2: 198214939Stuexen (void)pipeclose(wpipe); 199163953Srrs (void)pipeclose(rpipe); 200163953Srrs return (error); 201165647Srrs} 202163953Srrs 203165220Srrs/* 204165220Srrs * Allocate kva for pipe circular buffer, the space is pageable 205163953Srrs */ 206163953Srrsstatic void 207163953Srrspipespace(cpipe) 208163953Srrs struct pipe *cpipe; 209185694Srrs{ 210185694Srrs int npages, error; 211185694Srrs 212163953Srrs npages = round_page(cpipe->pipe_buffer.size)/PAGE_SIZE; 213163953Srrs /* 214163953Srrs * Create an object, I don't like the idea of paging to/from 215167695Srrs * kernel_object. 216163953Srrs * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 217163953Srrs */ 218163953Srrs cpipe->pipe_buffer.object = vm_object_allocate(OBJT_DEFAULT, npages); 219163953Srrs cpipe->pipe_buffer.buffer = (caddr_t) vm_map_min(kernel_map); 220163953Srrs 221163953Srrs /* 222163953Srrs * Insert the object into the kernel map, and allocate kva for it. 223163953Srrs * The map entry is, by default, pageable. 224163953Srrs * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 225163953Srrs */ 226172091Srrs error = vm_map_find(kernel_map, cpipe->pipe_buffer.object, 0, 227163953Srrs (vm_offset_t *) &cpipe->pipe_buffer.buffer, 228163953Srrs cpipe->pipe_buffer.size, 1, 229163953Srrs VM_PROT_ALL, VM_PROT_ALL, 0); 230163953Srrs 231163953Srrs if (error != KERN_SUCCESS) 232172090Srrs panic("pipeinit: cannot allocate pipe -- out of kvm -- code = %d", error); 233172090Srrs amountpipekva += cpipe->pipe_buffer.size; 234172090Srrs} 235172090Srrs 236163953Srrs/* 237172091Srrs * initialize and allocate VM and memory for pipe 238172091Srrs */ 239172091Srrsstatic void 240172091Srrspipeinit(cpipe) 241163953Srrs struct pipe *cpipe; 242163953Srrs{ 243172091Srrs 244172091Srrs cpipe->pipe_buffer.in = 0; 245163953Srrs cpipe->pipe_buffer.out = 0; 246163953Srrs cpipe->pipe_buffer.cnt = 0; 247163953Srrs cpipe->pipe_buffer.size = PIPE_SIZE; 248163953Srrs 249172091Srrs /* Buffer kva gets dynamically allocated */ 250163953Srrs cpipe->pipe_buffer.buffer = NULL; 251163953Srrs /* cpipe->pipe_buffer.object = invalid */ 252172091Srrs 253172091Srrs cpipe->pipe_state = 0; 254172091Srrs cpipe->pipe_peer = NULL; 255172091Srrs cpipe->pipe_busy = 0; 256172091Srrs getnanotime(&cpipe->pipe_ctime); 257172091Srrs cpipe->pipe_atime = cpipe->pipe_ctime; 258172091Srrs cpipe->pipe_mtime = cpipe->pipe_ctime; 259172091Srrs bzero(&cpipe->pipe_sel, sizeof cpipe->pipe_sel); 260172091Srrs 261172091Srrs#ifndef PIPE_NODIRECT 262172091Srrs /* 263172091Srrs * pipe data structure initializations to support direct pipe I/O 264172091Srrs */ 265172091Srrs cpipe->pipe_map.cnt = 0; 266172091Srrs cpipe->pipe_map.kva = 0; 267163953Srrs cpipe->pipe_map.pos = 0; 268163953Srrs cpipe->pipe_map.npages = 0; 269163953Srrs /* cpipe->pipe_map.ms[] = invalid */ 270163953Srrs#endif 271163953Srrs} 272163953Srrs 273163953Srrs 274172091Srrs/* 275172091Srrs * lock a pipe for I/O, blocking other access 276172091Srrs */ 277172091Srrsstatic __inline int 278172091Srrspipelock(cpipe, catch) 279172091Srrs struct pipe *cpipe; 280167598Srrs int catch; 281172091Srrs{ 282172091Srrs int error; 283163953Srrs while (cpipe->pipe_state & PIPE_LOCK) { 284172091Srrs cpipe->pipe_state |= PIPE_LWANT; 285172091Srrs if ((error = tsleep( cpipe, 286172091Srrs catch?(PRIBIO|PCATCH):PRIBIO, "pipelk", 0)) != 0) { 287172091Srrs return error; 288172091Srrs } 289172091Srrs } 290163953Srrs cpipe->pipe_state |= PIPE_LOCK; 291172091Srrs return 0; 292172091Srrs} 293172091Srrs 294172091Srrs/* 295172091Srrs * unlock a pipe I/O lock 296216669Stuexen */ 297211944Stuexenstatic __inline void 298172091Srrspipeunlock(cpipe) 299172091Srrs struct pipe *cpipe; 300172091Srrs{ 301172091Srrs cpipe->pipe_state &= ~PIPE_LOCK; 302172091Srrs if (cpipe->pipe_state & PIPE_LWANT) { 303172091Srrs cpipe->pipe_state &= ~PIPE_LWANT; 304172091Srrs wakeup(cpipe); 305172091Srrs } 306172091Srrs} 307172091Srrs 308172091Srrsstatic __inline void 309172091Srrspipeselwakeup(cpipe) 310172091Srrs struct pipe *cpipe; 311172091Srrs{ 312172091Srrs if (cpipe->pipe_state & PIPE_SEL) { 313172091Srrs cpipe->pipe_state &= ~PIPE_SEL; 314172091Srrs selwakeup(&cpipe->pipe_sel); 315172091Srrs } 316172091Srrs if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 317172091Srrs pgsigio(cpipe->pipe_sigio, SIGIO, 0); 318172091Srrs} 319172090Srrs 320172091Srrs/* ARGSUSED */ 321172091Srrsstatic int 322172091Srrspipe_read(fp, uio, cred, flags, p) 323172091Srrs struct file *fp; 324172091Srrs struct uio *uio; 325172091Srrs struct ucred *cred; 326172090Srrs struct proc *p; 327172091Srrs int flags; 328172090Srrs{ 329172091Srrs 330172091Srrs struct pipe *rpipe = (struct pipe *) fp->f_data; 331172090Srrs int error; 332172091Srrs int nread = 0; 333163953Srrs u_int size; 334172091Srrs 335163953Srrs ++rpipe->pipe_busy; 336163953Srrs error = pipelock(rpipe, 1); 337163953Srrs if (error) 338163953Srrs goto unlocked_error; 339163953Srrs 340163953Srrs while (uio->uio_resid) { 341163953Srrs /* 342163953Srrs * normal pipe buffer receive 343163953Srrs */ 344163953Srrs if (rpipe->pipe_buffer.cnt > 0) { 345163953Srrs size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 346167598Srrs if (size > rpipe->pipe_buffer.cnt) 347163953Srrs size = rpipe->pipe_buffer.cnt; 348168299Srrs if (size > (u_int) uio->uio_resid) 349167598Srrs size = (u_int) uio->uio_resid; 350163953Srrs 351163953Srrs error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 352163953Srrs size, uio); 353163953Srrs if (error) { 354163953Srrs break; 355163953Srrs } 356163953Srrs rpipe->pipe_buffer.out += size; 357163953Srrs if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 358163953Srrs rpipe->pipe_buffer.out = 0; 359163953Srrs 360163953Srrs rpipe->pipe_buffer.cnt -= size; 361163953Srrs 362163953Srrs /* 363163953Srrs * If there is no more to read in the pipe, reset 364163953Srrs * its pointers to the beginning. This improves 365163953Srrs * cache hit stats. 366163953Srrs */ 367163953Srrs if (rpipe->pipe_buffer.cnt == 0) { 368163953Srrs rpipe->pipe_buffer.in = 0; 369163953Srrs rpipe->pipe_buffer.out = 0; 370163953Srrs } 371163953Srrs nread += size; 372163953Srrs#ifndef PIPE_NODIRECT 373163953Srrs /* 374163953Srrs * Direct copy, bypassing a kernel buffer. 375163953Srrs */ 376163953Srrs } else if ((size = rpipe->pipe_map.cnt) && 377163953Srrs (rpipe->pipe_state & PIPE_DIRECTW)) { 378163953Srrs caddr_t va; 379163953Srrs if (size > (u_int) uio->uio_resid) 380163953Srrs size = (u_int) uio->uio_resid; 381163953Srrs 382167598Srrs va = (caddr_t) rpipe->pipe_map.kva + rpipe->pipe_map.pos; 383163953Srrs error = uiomove(va, size, uio); 384163953Srrs if (error) 385172091Srrs break; 386163953Srrs nread += size; 387163953Srrs rpipe->pipe_map.pos += size; 388163953Srrs rpipe->pipe_map.cnt -= size; 389163953Srrs if (rpipe->pipe_map.cnt == 0) { 390163953Srrs rpipe->pipe_state &= ~PIPE_DIRECTW; 391163953Srrs wakeup(rpipe); 392163953Srrs } 393163953Srrs#endif 394163953Srrs } else { 395163953Srrs /* 396163953Srrs * detect EOF condition 397163953Srrs */ 398163953Srrs if (rpipe->pipe_state & PIPE_EOF) { 399209029Srrs /* XXX error = ? */ 400209029Srrs break; 401209029Srrs } 402163953Srrs 403163953Srrs /* 404163953Srrs * If the "write-side" has been blocked, wake it up now. 405163953Srrs */ 406163953Srrs if (rpipe->pipe_state & PIPE_WANTW) { 407163953Srrs rpipe->pipe_state &= ~PIPE_WANTW; 408163953Srrs wakeup(rpipe); 409163953Srrs } 410164085Srrs 411163953Srrs /* 412163953Srrs * Break if some data was read. 413163953Srrs */ 414163953Srrs if (nread > 0) 415164085Srrs break; 416167598Srrs 417163953Srrs /* 418168299Srrs * Unlock the pipe buffer for our remaining processing. We 419167598Srrs * will either break out with an error or we will sleep and 420168299Srrs * relock to loop. 421170587Srwatson */ 422170587Srwatson pipeunlock(rpipe); 423163953Srrs 424163953Srrs /* 425164039Srwatson * Handle non-blocking mode operation or 426163953Srrs * wait for more data. 427163953Srrs */ 428163953Srrs if (fp->f_flag & FNONBLOCK) 429163953Srrs error = EAGAIN; 430163953Srrs else { 431163953Srrs rpipe->pipe_state |= PIPE_WANTR; 432167598Srrs if ((error = tsleep(rpipe, PRIBIO|PCATCH, "piperd", 0)) == 0) 433163953Srrs error = pipelock(rpipe, 1); 434163953Srrs } 435163953Srrs if (error) 436163953Srrs goto unlocked_error; 437163953Srrs } 438164085Srrs } 439163953Srrs pipeunlock(rpipe); 440171943Srrs 441163953Srrs if (error == 0) 442163953Srrs getnanotime(&rpipe->pipe_atime); 443163953Srrsunlocked_error: 444163953Srrs --rpipe->pipe_busy; 445164085Srrs 446164085Srrs /* 447164085Srrs * PIPE_WANT processing only makes sense if pipe_busy is 0. 448164085Srrs */ 449164085Srrs if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 450164085Srrs rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 451164085Srrs wakeup(rpipe); 452164085Srrs } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 453164085Srrs /* 454164085Srrs * Handle write blocking hysteresis. 455164085Srrs */ 456164085Srrs if (rpipe->pipe_state & PIPE_WANTW) { 457164085Srrs rpipe->pipe_state &= ~PIPE_WANTW; 458164085Srrs wakeup(rpipe); 459164085Srrs } 460164085Srrs } 461163953Srrs 462163953Srrs if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 463163953Srrs pipeselwakeup(rpipe); 464163953Srrs 465163953Srrs return error; 466163953Srrs} 467163953Srrs 468163953Srrs#ifndef PIPE_NODIRECT 469163953Srrs/* 470163953Srrs * Map the sending processes' buffer into kernel space and wire it. 471163953Srrs * This is similar to a physical write operation. 472163953Srrs */ 473163953Srrsstatic int 474163953Srrspipe_build_write_buffer(wpipe, uio) 475163953Srrs struct pipe *wpipe; 476171943Srrs struct uio *uio; 477163953Srrs{ 478171943Srrs u_int size; 479163953Srrs int i; 480163953Srrs vm_offset_t addr, endaddr, paddr; 481163953Srrs 482163953Srrs size = (u_int) uio->uio_iov->iov_len; 483163953Srrs if (size > wpipe->pipe_buffer.size) 484163953Srrs size = wpipe->pipe_buffer.size; 485163953Srrs 486163953Srrs endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 487163953Srrs for(i = 0, addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 488163953Srrs addr < endaddr; 489169380Srrs addr += PAGE_SIZE, i+=1) { 490169380Srrs 491163953Srrs vm_page_t m; 492167695Srrs 493163953Srrs if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 || 494163953Srrs (paddr = pmap_kextract(addr)) == 0) { 495163953Srrs int j; 496163953Srrs for(j=0;j<i;j++) 497167695Srrs vm_page_unwire(wpipe->pipe_map.ms[j], 1); 498167695Srrs return EFAULT; 499167695Srrs } 500163953Srrs 501163953Srrs m = PHYS_TO_VM_PAGE(paddr); 502163953Srrs vm_page_wire(m); 503163953Srrs wpipe->pipe_map.ms[i] = m; 504163953Srrs } 505163953Srrs 506163953Srrs/* 507163953Srrs * set up the control block 508163953Srrs */ 509163953Srrs wpipe->pipe_map.npages = i; 510163953Srrs wpipe->pipe_map.pos = ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 511163953Srrs wpipe->pipe_map.cnt = size; 512163953Srrs 513163953Srrs/* 514163953Srrs * and map the buffer 515163953Srrs */ 516166086Srrs if (wpipe->pipe_map.kva == 0) { 517170205Srrs /* 518185694Srrs * We need to allocate space for an extra page because the 519171167Sgnn * address range might (will) span pages at times. 520163953Srrs */ 521185694Srrs wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, 522185435Sbz wpipe->pipe_buffer.size + PAGE_SIZE); 523171440Srrs amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE; 524163953Srrs } 525163953Srrs pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 526171943Srrs wpipe->pipe_map.npages); 527163953Srrs 528163953Srrs/* 529184030Srrs * and update the uio data 530184030Srrs */ 531184030Srrs 532184030Srrs uio->uio_iov->iov_len -= size; 533184030Srrs uio->uio_iov->iov_base += size; 534163953Srrs if (uio->uio_iov->iov_len == 0) 535170205Srrs uio->uio_iov++; 536163953Srrs uio->uio_resid -= size; 537163953Srrs uio->uio_offset += size; 538163953Srrs return 0; 539163953Srrs} 540163953Srrs 541163953Srrs/* 542163953Srrs * unmap and unwire the process buffer 543163953Srrs */ 544197288Srrsstatic void 545171167Sgnnpipe_destroy_write_buffer(wpipe) 546171133Sgnnstruct pipe *wpipe; 547163953Srrs{ 548163953Srrs int i; 549163953Srrs if (wpipe->pipe_map.kva) { 550163953Srrs pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 551202523Srrs 552163953Srrs if (amountpipekva > MAXPIPEKVA) { 553163953Srrs vm_offset_t kva = wpipe->pipe_map.kva; 554163953Srrs wpipe->pipe_map.kva = 0; 555163953Srrs kmem_free(kernel_map, kva, 556163953Srrs wpipe->pipe_buffer.size + PAGE_SIZE); 557163953Srrs amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 558169352Srrs } 559169380Srrs } 560169380Srrs for (i=0;i<wpipe->pipe_map.npages;i++) 561169254Srrs vm_page_unwire(wpipe->pipe_map.ms[i], 1); 562202523Srrs} 563202523Srrs 564202523Srrs/* 565202523Srrs * In the case of a signal, the writing process might go away. This 566202523Srrs * code copies the data into the circular buffer so that the source 567202523Srrs * pages can be freed without loss of data. 568163953Srrs */ 569163953Srrsstatic void 570163953Srrspipe_clone_write_buffer(wpipe) 571171167Sgnnstruct pipe *wpipe; 572163953Srrs{ 573163953Srrs int size; 574163953Srrs int pos; 575163953Srrs 576163953Srrs size = wpipe->pipe_map.cnt; 577163953Srrs pos = wpipe->pipe_map.pos; 578163953Srrs bcopy((caddr_t) wpipe->pipe_map.kva+pos, 579171943Srrs (caddr_t) wpipe->pipe_buffer.buffer, 580166086Srrs size); 581163953Srrs 582163953Srrs wpipe->pipe_buffer.in = size; 583171943Srrs wpipe->pipe_buffer.out = 0; 584163953Srrs wpipe->pipe_buffer.cnt = size; 585171943Srrs wpipe->pipe_state &= ~PIPE_DIRECTW; 586163953Srrs 587171943Srrs pipe_destroy_write_buffer(wpipe); 588163953Srrs} 589170056Srrs 590171943Srrs/* 591170056Srrs * This implements the pipe buffer write mechanism. Note that only 592170056Srrs * a direct write OR a normal pipe write can be pending at any given time. 593163953Srrs * If there are any characters in the pipe buffer, the direct write will 594171943Srrs * be deferred until the receiving process grabs all of the bytes from 595171943Srrs * the pipe buffer. Then the direct mapping write is set-up. 596163953Srrs */ 597171943Srrsstatic int 598171572Srrspipe_direct_write(wpipe, uio) 599163953Srrs struct pipe *wpipe; 600163953Srrs struct uio *uio; 601163953Srrs{ 602171990Srrs int error; 603163953Srrsretry: 604163953Srrs while (wpipe->pipe_state & PIPE_DIRECTW) { 605163953Srrs if ( wpipe->pipe_state & PIPE_WANTR) { 606163953Srrs wpipe->pipe_state &= ~PIPE_WANTR; 607163953Srrs wakeup(wpipe); 608163953Srrs } 609163953Srrs wpipe->pipe_state |= PIPE_WANTW; 610163953Srrs error = tsleep(wpipe, 611163953Srrs PRIBIO|PCATCH, "pipdww", 0); 612163953Srrs if (error) 613163953Srrs goto error1; 614163953Srrs if (wpipe->pipe_state & PIPE_EOF) { 615163953Srrs error = EPIPE; 616163953Srrs goto error1; 617163953Srrs } 618163953Srrs } 619163953Srrs wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 620163953Srrs if (wpipe->pipe_buffer.cnt > 0) { 621163953Srrs if ( wpipe->pipe_state & PIPE_WANTR) { 622163953Srrs wpipe->pipe_state &= ~PIPE_WANTR; 623163953Srrs wakeup(wpipe); 624163953Srrs } 625163953Srrs 626163953Srrs wpipe->pipe_state |= PIPE_WANTW; 627169380Srrs error = tsleep(wpipe, 628169380Srrs PRIBIO|PCATCH, "pipdwc", 0); 629163953Srrs if (error) 630163953Srrs goto error1; 631163953Srrs if (wpipe->pipe_state & PIPE_EOF) { 632163953Srrs error = EPIPE; 633169380Srrs goto error1; 634169380Srrs } 635163953Srrs goto retry; 636163953Srrs } 637163953Srrs 638163953Srrs wpipe->pipe_state |= PIPE_DIRECTW; 639163953Srrs 640163953Srrs error = pipe_build_write_buffer(wpipe, uio); 641167695Srrs if (error) { 642163953Srrs wpipe->pipe_state &= ~PIPE_DIRECTW; 643163953Srrs goto error1; 644163953Srrs } 645163953Srrs 646167695Srrs error = 0; 647167695Srrs while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 648167695Srrs if (wpipe->pipe_state & PIPE_EOF) { 649163953Srrs pipelock(wpipe, 0); 650163953Srrs pipe_destroy_write_buffer(wpipe); 651163953Srrs pipeunlock(wpipe); 652163953Srrs pipeselwakeup(wpipe); 653163953Srrs error = EPIPE; 654163953Srrs goto error1; 655163953Srrs } 656163953Srrs if (wpipe->pipe_state & PIPE_WANTR) { 657163953Srrs wpipe->pipe_state &= ~PIPE_WANTR; 658163953Srrs wakeup(wpipe); 659163953Srrs } 660163953Srrs pipeselwakeup(wpipe); 661163953Srrs error = tsleep(wpipe, PRIBIO|PCATCH, "pipdwt", 0); 662163953Srrs } 663163953Srrs 664163953Srrs pipelock(wpipe,0); 665163953Srrs if (wpipe->pipe_state & PIPE_DIRECTW) { 666163953Srrs /* 667163953Srrs * this bit of trickery substitutes a kernel buffer for 668163953Srrs * the process that might be going away. 669163953Srrs */ 670163953Srrs pipe_clone_write_buffer(wpipe); 671163953Srrs } else { 672163953Srrs pipe_destroy_write_buffer(wpipe); 673163953Srrs } 674163953Srrs pipeunlock(wpipe); 675163953Srrs return error; 676163953Srrs 677163953Srrserror1: 678163953Srrs wakeup(wpipe); 679171943Srrs return error; 680163953Srrs} 681163953Srrs#endif 682163953Srrs 683163953Srrsstatic int 684163953Srrspipe_write(fp, uio, cred, flags, p) 685163953Srrs struct file *fp; 686163953Srrs struct uio *uio; 687163953Srrs struct ucred *cred; 688163953Srrs struct proc *p; 689163953Srrs int flags; 690171943Srrs{ 691163953Srrs int error = 0; 692163953Srrs int orig_resid; 693163953Srrs 694163953Srrs struct pipe *wpipe, *rpipe; 695163953Srrs 696163953Srrs rpipe = (struct pipe *) fp->f_data; 697163953Srrs wpipe = rpipe->pipe_peer; 698163953Srrs 699163953Srrs /* 700163953Srrs * detect loss of pipe read side, issue SIGPIPE if lost. 701163953Srrs */ 702171943Srrs if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 703163953Srrs return EPIPE; 704163953Srrs } 705163953Srrs 706163953Srrs /* 707163953Srrs * If it is advantageous to resize the pipe buffer, do 708163953Srrs * so. 709171943Srrs */ 710163953Srrs if ((uio->uio_resid > PIPE_SIZE) && 711163953Srrs (nbigpipe < LIMITBIGPIPES) && 712163953Srrs (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 713163953Srrs (wpipe->pipe_buffer.size <= PIPE_SIZE) && 714163953Srrs (wpipe->pipe_buffer.cnt == 0)) { 715163953Srrs 716169420Srrs if (wpipe->pipe_buffer.buffer) { 717163953Srrs amountpipekva -= wpipe->pipe_buffer.size; 718163953Srrs kmem_free(kernel_map, 719163953Srrs (vm_offset_t)wpipe->pipe_buffer.buffer, 720163953Srrs wpipe->pipe_buffer.size); 721163953Srrs } 722163953Srrs 723163953Srrs#ifndef PIPE_NODIRECT 724165647Srrs if (wpipe->pipe_map.kva) { 725163953Srrs amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 726163953Srrs kmem_free(kernel_map, 727163953Srrs wpipe->pipe_map.kva, 728163953Srrs wpipe->pipe_buffer.size + PAGE_SIZE); 729163953Srrs } 730163953Srrs#endif 731163953Srrs 732163953Srrs wpipe->pipe_buffer.in = 0; 733163953Srrs wpipe->pipe_buffer.out = 0; 734163953Srrs wpipe->pipe_buffer.cnt = 0; 735163953Srrs wpipe->pipe_buffer.size = BIG_PIPE_SIZE; 736163953Srrs wpipe->pipe_buffer.buffer = NULL; 737163953Srrs ++nbigpipe; 738163953Srrs 739163953Srrs#ifndef PIPE_NODIRECT 740163953Srrs wpipe->pipe_map.cnt = 0; 741163953Srrs wpipe->pipe_map.kva = 0; 742163953Srrs wpipe->pipe_map.pos = 0; 743163953Srrs wpipe->pipe_map.npages = 0; 744163953Srrs#endif 745163953Srrs 746163953Srrs } 747163953Srrs 748163953Srrs 749163953Srrs if( wpipe->pipe_buffer.buffer == NULL) { 750163953Srrs if ((error = pipelock(wpipe,1)) == 0) { 751163953Srrs pipespace(wpipe); 752171990Srrs pipeunlock(wpipe); 753163953Srrs } else { 754163953Srrs return error; 755163953Srrs } 756163953Srrs } 757163953Srrs 758163953Srrs ++wpipe->pipe_busy; 759171943Srrs orig_resid = uio->uio_resid; 760163953Srrs while (uio->uio_resid) { 761163953Srrs int space; 762163953Srrs#ifndef PIPE_NODIRECT 763171745Srrs /* 764171745Srrs * If the transfer is large, we can gain performance if 765199437Stuexen * we do process-to-process copies directly. 766163953Srrs * If the write is non-blocking, we don't use the 767163953Srrs * direct write mechanism. 768163953Srrs */ 769163953Srrs if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 770163953Srrs (fp->f_flag & FNONBLOCK) == 0 && 771163953Srrs (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) && 772163953Srrs (uio->uio_iov->iov_len >= PIPE_MINDIRECT)) { 773163953Srrs error = pipe_direct_write( wpipe, uio); 774163953Srrs if (error) { 775163953Srrs break; 776171943Srrs } 777163953Srrs continue; 778163953Srrs } 779163953Srrs#endif 780163953Srrs 781163953Srrs /* 782163953Srrs * Pipe buffered writes cannot be coincidental with 783163953Srrs * direct writes. We wait until the currently executing 784163953Srrs * direct write is completed before we start filling the 785163953Srrs * pipe buffer. 786163953Srrs */ 787163953Srrs retrywrite: 788163953Srrs while (wpipe->pipe_state & PIPE_DIRECTW) { 789163953Srrs if (wpipe->pipe_state & PIPE_WANTR) { 790163953Srrs wpipe->pipe_state &= ~PIPE_WANTR; 791163953Srrs wakeup(wpipe); 792163953Srrs } 793163953Srrs error = tsleep(wpipe, 794163953Srrs PRIBIO|PCATCH, "pipbww", 0); 795163953Srrs if (error) 796163953Srrs break; 797163953Srrs } 798163953Srrs 799163953Srrs space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 800163953Srrs 801163953Srrs /* Writes of size <= PIPE_BUF must be atomic. */ 802163953Srrs if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 803163953Srrs space = 0; 804165647Srrs 805163953Srrs if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) { 806165647Srrs if ((error = pipelock(wpipe,1)) == 0) { 807163953Srrs int size; /* Transfer size */ 808172396Srrs int segsize; /* first segment to transfer */ 809172396Srrs /* 810172396Srrs * It is possible for a direct write to 811172090Srrs * slip in on us... handle it here... 812163953Srrs */ 813163953Srrs if (wpipe->pipe_state & PIPE_DIRECTW) { 814163953Srrs pipeunlock(wpipe); 815163953Srrs goto retrywrite; 816163953Srrs } 817163953Srrs /* 818163953Srrs * If a process blocked in uiomove, our 819171943Srrs * value for space might be bad. 820163953Srrs */ 821163953Srrs if (space > wpipe->pipe_buffer.size - 822163953Srrs wpipe->pipe_buffer.cnt) { 823163953Srrs pipeunlock(wpipe); 824163953Srrs goto retrywrite; 825163953Srrs } 826163953Srrs 827163953Srrs /* 828163953Srrs * Transfer size is minimum of uio transfer 829163953Srrs * and free space in pipe buffer. 830166675Srrs */ 831166675Srrs if (space > uio->uio_resid) 832163953Srrs size = uio->uio_resid; 833163953Srrs else 834163953Srrs size = space; 835163953Srrs /* 836172090Srrs * First segment to transfer is minimum of 837166675Srrs * transfer size and contiguous space in 838166675Srrs * pipe buffer. If first segment to transfer 839166675Srrs * is less than the transfer size, we've got 840166675Srrs * a wraparound in the buffer. 841171943Srrs */ 842172703Srrs segsize = wpipe->pipe_buffer.size - 843163953Srrs wpipe->pipe_buffer.in; 844163953Srrs if (segsize > size) 845163953Srrs segsize = size; 846163953Srrs 847163953Srrs /* Transfer first segment */ 848163953Srrs 849163953Srrs error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 850163953Srrs segsize, uio); 851163953Srrs 852163953Srrs if (error == 0 && segsize < size) { 853163953Srrs /* 854163953Srrs * Transfer remaining part now, to 855163953Srrs * support atomic writes. Wraparound 856163953Srrs * happened. 857163953Srrs */ 858163953Srrs if (wpipe->pipe_buffer.in + segsize != 859163953Srrs wpipe->pipe_buffer.size) 860163953Srrs panic("Expected pipe buffer wraparound disappeared"); 861163953Srrs 862163953Srrs error = uiomove(&wpipe->pipe_buffer.buffer[0], 863163953Srrs size - segsize, uio); 864163953Srrs } 865163953Srrs if (error == 0) { 866163953Srrs wpipe->pipe_buffer.in += size; 867163953Srrs if (wpipe->pipe_buffer.in >= 868163953Srrs wpipe->pipe_buffer.size) { 869163953Srrs if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 870169420Srrs panic("Expected wraparound bad"); 871163953Srrs wpipe->pipe_buffer.in = size - segsize; 872163953Srrs } 873163953Srrs 874163953Srrs wpipe->pipe_buffer.cnt += size; 875163953Srrs if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 876163953Srrs panic("Pipe buffer overflow"); 877163953Srrs 878163953Srrs } 879163953Srrs pipeunlock(wpipe); 880163953Srrs } 881163953Srrs if (error) 882163953Srrs break; 883163953Srrs 884163953Srrs } else { 885163953Srrs /* 886163953Srrs * If the "read-side" has been blocked, wake it up now. 887163953Srrs */ 888163953Srrs if (wpipe->pipe_state & PIPE_WANTR) { 889163953Srrs wpipe->pipe_state &= ~PIPE_WANTR; 890163953Srrs wakeup(wpipe); 891163953Srrs } 892163953Srrs 893165647Srrs /* 894163953Srrs * don't block on non-blocking I/O 895163953Srrs */ 896163953Srrs if (fp->f_flag & FNONBLOCK) { 897163953Srrs error = EAGAIN; 898163953Srrs break; 899165647Srrs } 900163953Srrs 901165220Srrs /* 902163953Srrs * We have no more space and have something to offer, 903172396Srrs * wake up select/poll. 904172396Srrs */ 905172396Srrs pipeselwakeup(wpipe); 906172396Srrs 907165220Srrs wpipe->pipe_state |= PIPE_WANTW; 908172090Srrs if ((error = tsleep(wpipe, (PRIBIO+1)|PCATCH, "pipewr", 0)) != 0) { 909163953Srrs break; 910163953Srrs } 911163953Srrs /* 912163953Srrs * If read side wants to go away, we just issue a signal 913163953Srrs * to ourselves. 914163953Srrs */ 915171943Srrs if (wpipe->pipe_state & PIPE_EOF) { 916163953Srrs error = EPIPE; 917171990Srrs break; 918172090Srrs } 919163953Srrs } 920163953Srrs } 921188067Srrs 922163953Srrs --wpipe->pipe_busy; 923163953Srrs if ((wpipe->pipe_busy == 0) && 924163953Srrs (wpipe->pipe_state & PIPE_WANT)) { 925163953Srrs wpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTR); 926163953Srrs wakeup(wpipe); 927163953Srrs } else if (wpipe->pipe_buffer.cnt > 0) { 928163953Srrs /* 929163953Srrs * If we have put any characters in the buffer, we wake up 930171943Srrs * the reader. 931163953Srrs */ 932163953Srrs if (wpipe->pipe_state & PIPE_WANTR) { 933163953Srrs wpipe->pipe_state &= ~PIPE_WANTR; 934163953Srrs wakeup(wpipe); 935163953Srrs } 936178202Srrs } 937178202Srrs 938178202Srrs /* 939178202Srrs * Don't return EPIPE if I/O was successful 940178202Srrs */ 941178202Srrs if ((wpipe->pipe_buffer.cnt == 0) && 942178202Srrs (uio->uio_resid == 0) && 943178202Srrs (error == EPIPE)) 944209289Stuexen error = 0; 945209289Stuexen 946209289Stuexen if (error == 0) 947209289Stuexen getnanotime(&wpipe->pipe_mtime); 948209289Stuexen 949209289Stuexen /* 950209289Stuexen * We have something to offer, 951209289Stuexen * wake up select/poll. 952209289Stuexen */ 953209289Stuexen if (wpipe->pipe_buffer.cnt) 954209289Stuexen pipeselwakeup(wpipe); 955209289Stuexen 956209289Stuexen return error; 957209289Stuexen} 958178202Srrs 959178202Srrs/* 960178202Srrs * we implement a very minimal set of ioctls for compatibility with sockets. 961178202Srrs */ 962178202Srrsint 963209289Stuexenpipe_ioctl(fp, cmd, data, p) 964209289Stuexen struct file *fp; 965209289Stuexen u_long cmd; 966209289Stuexen register caddr_t data; 967209289Stuexen struct proc *p; 968178202Srrs{ 969178202Srrs register struct pipe *mpipe = (struct pipe *)fp->f_data; 970178202Srrs 971178202Srrs switch (cmd) { 972178202Srrs 973178202Srrs case FIONBIO: 974178202Srrs return (0); 975178202Srrs 976178202Srrs case FIOASYNC: 977178202Srrs if (*(int *)data) { 978178202Srrs mpipe->pipe_state |= PIPE_ASYNC; 979178202Srrs } else { 980178202Srrs mpipe->pipe_state &= ~PIPE_ASYNC; 981178202Srrs } 982178202Srrs return (0); 983178202Srrs 984178202Srrs case FIONREAD: 985178202Srrs if (mpipe->pipe_state & PIPE_DIRECTW) 986163953Srrs *(int *)data = mpipe->pipe_map.cnt; 987163953Srrs else 988163953Srrs *(int *)data = mpipe->pipe_buffer.cnt; 989163953Srrs return (0); 990163953Srrs 991163953Srrs case FIOSETOWN: 992171943Srrs return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 993163953Srrs 994163953Srrs case FIOGETOWN: 995163953Srrs *(int *)data = fgetown(mpipe->pipe_sigio); 996163953Srrs return (0); 997163953Srrs 998163953Srrs /* This is deprecated, FIOSETOWN should be used instead. */ 999204096Stuexen case TIOCSPGRP: 1000163953Srrs return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 1001204096Stuexen 1002163953Srrs /* This is deprecated, FIOGETOWN should be used instead. */ 1003163953Srrs case TIOCGPGRP: 1004171943Srrs *(int *)data = -fgetown(mpipe->pipe_sigio); 1005163953Srrs return (0); 1006163953Srrs 1007163953Srrs } 1008163953Srrs return (ENOTTY); 1009163953Srrs} 1010163953Srrs 1011163953Srrsint 1012163953Srrspipe_poll(fp, events, cred, p) 1013163953Srrs struct file *fp; 1014163953Srrs int events; 1015188067Srrs struct ucred *cred; 1016188067Srrs struct proc *p; 1017188067Srrs{ 1018188067Srrs register struct pipe *rpipe = (struct pipe *)fp->f_data; 1019188067Srrs struct pipe *wpipe; 1020163953Srrs int revents = 0; 1021163953Srrs 1022163953Srrs wpipe = rpipe->pipe_peer; 1023163953Srrs if (events & (POLLIN | POLLRDNORM)) 1024163953Srrs if ((rpipe->pipe_state & PIPE_DIRECTW) || 1025163953Srrs (rpipe->pipe_buffer.cnt > 0) || 1026163953Srrs (rpipe->pipe_state & PIPE_EOF)) 1027163953Srrs revents |= events & (POLLIN | POLLRDNORM); 1028163953Srrs 1029168299Srrs if (events & (POLLOUT | POLLWRNORM)) 1030163953Srrs if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 1031163953Srrs (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1032163953Srrs (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 1033163953Srrs revents |= events & (POLLOUT | POLLWRNORM); 1034163953Srrs 1035163953Srrs if ((rpipe->pipe_state & PIPE_EOF) || 1036163953Srrs (wpipe == NULL) || 1037163953Srrs (wpipe->pipe_state & PIPE_EOF)) 1038163953Srrs revents |= POLLHUP; 1039163953Srrs 1040163953Srrs if (revents == 0) { 1041163953Srrs if (events & (POLLIN | POLLRDNORM)) { 1042163953Srrs selrecord(p, &rpipe->pipe_sel); 1043163953Srrs rpipe->pipe_state |= PIPE_SEL; 1044163953Srrs } 1045163953Srrs 1046172218Srrs if (events & (POLLOUT | POLLWRNORM)) { 1047166675Srrs selrecord(p, &wpipe->pipe_sel); 1048166675Srrs wpipe->pipe_state |= PIPE_SEL; 1049166675Srrs } 1050166675Srrs } 1051171943Srrs 1052172703Srrs return (revents); 1053163953Srrs} 1054163953Srrs 1055163953Srrsstatic int 1056163953Srrspipe_stat(fp, ub, p) 1057163953Srrs struct file *fp; 1058163953Srrs struct stat *ub; 1059163953Srrs struct proc *p; 1060163953Srrs{ 1061163953Srrs struct pipe *pipe = (struct pipe *)fp->f_data; 1062163953Srrs 1063163953Srrs bzero((caddr_t)ub, sizeof (*ub)); 1064163953Srrs ub->st_mode = S_IFIFO; 1065163953Srrs ub->st_blksize = pipe->pipe_buffer.size; 1066163953Srrs ub->st_size = pipe->pipe_buffer.cnt; 1067163953Srrs ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 1068163953Srrs ub->st_atimespec = pipe->pipe_atime; 1069163953Srrs ub->st_mtimespec = pipe->pipe_mtime; 1070163953Srrs ub->st_ctimespec = pipe->pipe_ctime; 1071163953Srrs /* 1072163953Srrs * Left as 0: st_dev, st_ino, st_nlink, st_uid, st_gid, st_rdev, 1073163953Srrs * st_flags, st_gen. 1074163953Srrs * XXX (st_dev, st_ino) should be unique. 1075169420Srrs */ 1076163953Srrs return 0; 1077163953Srrs} 1078163953Srrs 1079163953Srrs/* ARGSUSED */ 1080163953Srrsstatic int 1081163953Srrspipe_close(fp, p) 1082163953Srrs struct file *fp; 1083163953Srrs struct proc *p; 1084163953Srrs{ 1085163953Srrs struct pipe *cpipe = (struct pipe *)fp->f_data; 1086163953Srrs 1087163953Srrs fp->f_ops = &badfileops; 1088163953Srrs fp->f_data = NULL; 1089163953Srrs funsetown(cpipe->pipe_sigio); 1090163953Srrs pipeclose(cpipe); 1091163953Srrs return 0; 1092163953Srrs} 1093163953Srrs 1094163953Srrs/* 1095163953Srrs * shutdown the pipe 1096165647Srrs */ 1097163953Srrsstatic void 1098163953Srrspipeclose(cpipe) 1099163953Srrs struct pipe *cpipe; 1100163953Srrs{ 1101163953Srrs struct pipe *ppipe; 1102165647Srrs if (cpipe) { 1103163953Srrs 1104165220Srrs pipeselwakeup(cpipe); 1105163953Srrs 1106172396Srrs /* 1107172396Srrs * If the other side is blocked, wake it up saying that 1108172396Srrs * we want to close it down. 1109165220Srrs */ 1110163953Srrs while (cpipe->pipe_busy) { 1111163953Srrs wakeup(cpipe); 1112172090Srrs cpipe->pipe_state |= PIPE_WANT|PIPE_EOF; 1113163953Srrs tsleep(cpipe, PRIBIO, "pipecl", 0); 1114171990Srrs } 1115172090Srrs 1116163953Srrs /* 1117163953Srrs * Disconnect from peer 1118163953Srrs */ 1119163953Srrs if ((ppipe = cpipe->pipe_peer) != NULL) { 1120163953Srrs pipeselwakeup(ppipe); 1121163953Srrs 1122163953Srrs ppipe->pipe_state |= PIPE_EOF; 1123163953Srrs wakeup(ppipe); 1124163953Srrs ppipe->pipe_peer = NULL; 1125163953Srrs } 1126163953Srrs 1127163953Srrs /* 1128163953Srrs * free resources 1129163953Srrs */ 1130163953Srrs if (cpipe->pipe_buffer.buffer) { 1131163953Srrs if (cpipe->pipe_buffer.size > PIPE_SIZE) 1132178251Srrs --nbigpipe; 1133163953Srrs amountpipekva -= cpipe->pipe_buffer.size; 1134163953Srrs kmem_free(kernel_map, 1135163953Srrs (vm_offset_t)cpipe->pipe_buffer.buffer, 1136163953Srrs cpipe->pipe_buffer.size); 1137178251Srrs } 1138163953Srrs#ifndef PIPE_NODIRECT 1139163953Srrs if (cpipe->pipe_map.kva) { 1140163953Srrs amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; 1141163953Srrs kmem_free(kernel_map, 1142163953Srrs cpipe->pipe_map.kva, 1143163953Srrs cpipe->pipe_buffer.size + PAGE_SIZE); 1144172091Srrs } 1145172091Srrs#endif 1146172091Srrs zfree(pipe_zone, cpipe); 1147166675Srrs } 1148168124Srrs} 1149163953Srrs