Cross Reference: /freebsd-11.0-release/sys/kern/vfs

Deleted Added

sdiff udiff text old ( 90576 ) new ( 91140 )

full compact

vfs_aio.c (90576)	vfs_aio.c (91140)
1/* 2 * Copyright (c) 1997 John S. Dyson. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. John S. Dyson's name may not be used to endorse or promote products 10 * derived from this software without specific prior written permission. 11 * 12 * DISCLAIMER: This code isn't warranted to do anything useful. Anything 13 * bad that happens because of using this software isn't the responsibility 14 * of the author. This software is distributed AS-IS. 15 *	1/* 2 * Copyright (c) 1997 John S. Dyson. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. John S. Dyson's name may not be used to endorse or promote products 10 * derived from this software without specific prior written permission. 11 * 12 * DISCLAIMER: This code isn't warranted to do anything useful. Anything 13 * bad that happens because of using this software isn't the responsibility 14 * of the author. This software is distributed AS-IS. 15 *
16 * $FreeBSD: head/sys/kern/vfs_aio.c 90576 2002-02-12 17:40:41Z alc $	16 * $FreeBSD: head/sys/kern/vfs_aio.c 91140 2002-02-23 11:12:57Z tanimura $
17 / 18 19/ 20 * This file contains support for the POSIX 1003.1B AIO/LIO facility. 21 */ 22 23#include <sys/param.h> 24#include <sys/systm.h>	17 / 18 19/ 20 * This file contains support for the POSIX 1003.1B AIO/LIO facility. 21 */ 22 23#include <sys/param.h> 24#include <sys/systm.h>
	25#include <sys/malloc.h>
25#include <sys/bio.h> 26#include <sys/buf.h> 27#include <sys/sysproto.h> 28#include <sys/filedesc.h> 29#include <sys/kernel.h> 30#include <sys/kthread.h> 31#include <sys/fcntl.h> 32#include <sys/file.h> 33#include <sys/lock.h> 34#include <sys/mutex.h> 35#include <sys/unistd.h> 36#include <sys/proc.h> 37#include <sys/resourcevar.h> 38#include <sys/signalvar.h> 39#include <sys/protosw.h> 40#include <sys/socketvar.h> 41#include <sys/syscall.h> 42#include <sys/sysent.h> 43#include <sys/sysctl.h> 44#include <sys/vnode.h> 45#include <sys/conf.h> 46#include <sys/event.h> 47 48#include <vm/vm.h> 49#include <vm/vm_extern.h> 50#include <vm/pmap.h> 51#include <vm/vm_map.h> 52#include <vm/vm_zone.h> 53#include <sys/aio.h> 54 55#include <machine/limits.h> 56 57#include "opt_vfs_aio.h" 58 59static long jobrefid; 60 61#define JOBST_NULL 0x0 62#define JOBST_JOBQGLOBAL 0x2 63#define JOBST_JOBRUNNING 0x3 64#define JOBST_JOBFINISHED 0x4 65#define JOBST_JOBQBUF 0x5 66#define JOBST_JOBBFINISHED 0x6 67 68#ifndef MAX_AIO_PER_PROC 69#define MAX_AIO_PER_PROC 32 70#endif 71 72#ifndef MAX_AIO_QUEUE_PER_PROC 73#define MAX_AIO_QUEUE_PER_PROC 256 /* Bigger than AIO_LISTIO_MAX / 74#endif 75 76#ifndef MAX_AIO_PROCS 77#define MAX_AIO_PROCS 32 78#endif 79 80#ifndef MAX_AIO_QUEUE 81#define MAX_AIO_QUEUE 1024 / Bigger than AIO_LISTIO_MAX / 82#endif 83 84#ifndef TARGET_AIO_PROCS 85#define TARGET_AIO_PROCS 4 86#endif 87 88#ifndef MAX_BUF_AIO 89#define MAX_BUF_AIO 16 90#endif 91 92#ifndef AIOD_TIMEOUT_DEFAULT 93#define AIOD_TIMEOUT_DEFAULT (10 hz) 94#endif 95 96#ifndef AIOD_LIFETIME_DEFAULT 97#define AIOD_LIFETIME_DEFAULT (30 * hz) 98#endif 99 100static int max_aio_procs = MAX_AIO_PROCS; 101static int num_aio_procs = 0; 102static int target_aio_procs = TARGET_AIO_PROCS; 103static int max_queue_count = MAX_AIO_QUEUE; 104static int num_queue_count = 0; 105static int num_buf_aio = 0; 106static int num_aio_resv_start = 0; 107static int aiod_timeout; 108static int aiod_lifetime; 109static int unloadable = 0; 110 111static int max_aio_per_proc = MAX_AIO_PER_PROC; 112static int max_aio_queue_per_proc = MAX_AIO_QUEUE_PER_PROC; 113static int max_buf_aio = MAX_BUF_AIO; 114 115SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW, 0, "AIO mgmt"); 116 117SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc, 118 CTLFLAG_RW, &max_aio_per_proc, 0, ""); 119 120SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc, 121 CTLFLAG_RW, &max_aio_queue_per_proc, 0, ""); 122 123SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_procs, 124 CTLFLAG_RW, &max_aio_procs, 0, ""); 125 126SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs, 127 CTLFLAG_RD, &num_aio_procs, 0, ""); 128 129SYSCTL_INT(_vfs_aio, OID_AUTO, num_queue_count, 130 CTLFLAG_RD, &num_queue_count, 0, ""); 131 132SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue, 133 CTLFLAG_RW, &max_queue_count, 0, ""); 134 135SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs, 136 CTLFLAG_RW, &target_aio_procs, 0, ""); 137 138SYSCTL_INT(_vfs_aio, OID_AUTO, max_buf_aio, 139 CTLFLAG_RW, &max_buf_aio, 0, ""); 140 141SYSCTL_INT(_vfs_aio, OID_AUTO, num_buf_aio, 142 CTLFLAG_RD, &num_buf_aio, 0, ""); 143 144SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_lifetime, 145 CTLFLAG_RW, &aiod_lifetime, 0, ""); 146 147SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_timeout, 148 CTLFLAG_RW, &aiod_timeout, 0, ""); 149 150SYSCTL_INT(_vfs_aio, OID_AUTO, unloadable, CTLFLAG_RW, &unloadable, 0, 151 "Allow unload of aio (not recommended)"); 152 153struct aiocblist { 154 TAILQ_ENTRY(aiocblist) list; /* List of jobs / 155* TAILQ_ENTRY(aiocblist) plist; /* List of jobs for proc / 156* int jobflags; 157 int jobstate; 158 int inputcharge; 159 int outputcharge; 160 struct callout_handle timeouthandle; 161 struct buf bp; / Buffer pointer / 162* struct proc userproc; / User process / / Not td! / 163* struct file fd_file; / Pointer to file structure / 164* struct aiothreadlist jobaiothread; / AIO process descriptor / 165* struct aio_liojob lio; / Optional lio job / 166* struct aiocb uuaiocb; / Pointer in userspace of aiocb / 167* struct klist klist; /* list of knotes / 168* struct aiocb uaiocb; /* Kernel I/O control block / 169}; 170* 171/* jobflags / 172#define AIOCBLIST_RUNDOWN 0x4 173#define AIOCBLIST_ASYNCFREE 0x8 174#define AIOCBLIST_DONE 0x10 175* 176/* 177 * AIO process info 178 / 179#define AIOP_FREE 0x1 / proc on free queue / 180#define AIOP_SCHED 0x2 / proc explicitly scheduled / 181* 182struct aiothreadlist { 183 int aiothreadflags; /* AIO proc flags / 184* TAILQ_ENTRY(aiothreadlist) list; /* List of processes / 185* struct thread aiothread; / The AIO thread / 186}; 187* 188/* 189 * data-structure for lio signal management 190 / 191struct aio_liojob { 192* int lioj_flags; 193 int lioj_buffer_count; 194 int lioj_buffer_finished_count; 195 int lioj_queue_count; 196 int lioj_queue_finished_count; 197 struct sigevent lioj_signal; /* signal on all I/O done / 198* TAILQ_ENTRY(aio_liojob) lioj_list; 199 struct kaioinfo lioj_ki; 200}; 201#define LIOJ_SIGNAL 0x1 / signal on all done (lio) / 202#define LIOJ_SIGNAL_POSTED 0x2 / signal has been posted / 203* 204/* 205 * per process aio data structure 206 / 207struct kaioinfo { 208* int kaio_flags; /* per process kaio flags / 209* int kaio_maxactive_count; /* maximum number of AIOs / 210* int kaio_active_count; /* number of currently used AIOs / 211* int kaio_qallowed_count; /* maxiumu size of AIO queue / 212* int kaio_queue_count; /* size of AIO queue / 213* int kaio_ballowed_count; /* maximum number of buffers / 214* int kaio_queue_finished_count; /* number of daemon jobs finished / 215* int kaio_buffer_count; /* number of physio buffers / 216* int kaio_buffer_finished_count; /* count of I/O done / 217* struct proc kaio_p; / process that uses this kaio block / 218* TAILQ_HEAD(,aio_liojob) kaio_liojoblist; /* list of lio jobs / 219* TAILQ_HEAD(,aiocblist) kaio_jobqueue; /* job queue for process / 220* TAILQ_HEAD(,aiocblist) kaio_jobdone; /* done queue for process / 221* TAILQ_HEAD(,aiocblist) kaio_bufqueue; /* buffer job queue for process / 222* TAILQ_HEAD(,aiocblist) kaio_bufdone; /* buffer done queue for process / 223* TAILQ_HEAD(,aiocblist) kaio_sockqueue; /* queue for aios waiting on sockets / 224}; 225* 226#define KAIO_RUNDOWN 0x1 /* process is being run down / 227#define KAIO_WAKEUP 0x2 / wakeup process when there is a significant event / 228* 229static TAILQ_HEAD(,aiothreadlist) aio_freeproc, aio_activeproc; 230static TAILQ_HEAD(,aiocblist) aio_jobs; /* Async job list / 231static TAILQ_HEAD(,aiocblist) aio_bufjobs; / Phys I/O job list / 232* 233static void aio_init_aioinfo(struct proc p); 234static void aio_onceonly(void); 235static int aio_free_entry(struct aiocblist aiocbe); 236static void aio_process(struct aiocblist aiocbe); 237static int aio_newproc(void); 238static int aio_aqueue(struct thread td, struct aiocb job, int type); 239static void aio_physwakeup(struct buf bp); 240static void aio_proc_rundown(struct proc p); 241static int aio_fphysio(struct aiocblist aiocbe); 242static int aio_qphysio(struct proc p, struct aiocblist iocb); 243static void aio_daemon(void uproc); 244static void aio_swake_cb(struct socket , struct sockbuf ); 245static int aio_unload(void); 246static void process_signal(void aioj); 247static int filt_aioattach(struct knote kn); 248static void filt_aiodetach(struct knote kn); 249static int filt_aio(struct knote kn, long hint); 250* 251static vm_zone_t kaio_zone, aiop_zone, aiocb_zone, aiol_zone; 252static vm_zone_t aiolio_zone; 253 254static struct filterops aio_filtops = 255 { 0, filt_aioattach, filt_aiodetach, filt_aio }; 256 257static int 258aio_modload(struct module module, int cmd, void arg) 259{ 260 int error = 0; 261 262 switch (cmd) { 263 case MOD_LOAD: 264 aio_onceonly(); 265 break; 266 case MOD_UNLOAD: 267 error = aio_unload(); 268 break; 269 case MOD_SHUTDOWN: 270 break; 271 default: 272 error = EINVAL; 273 break; 274 } 275 return (error); 276} 277 278static moduledata_t aio_mod = { 279 "aio", 280 &aio_modload, 281 NULL 282}; 283 284SYSCALL_MODULE_HELPER(aio_return); 285SYSCALL_MODULE_HELPER(aio_suspend); 286SYSCALL_MODULE_HELPER(aio_cancel); 287SYSCALL_MODULE_HELPER(aio_error); 288SYSCALL_MODULE_HELPER(aio_read); 289SYSCALL_MODULE_HELPER(aio_write); 290SYSCALL_MODULE_HELPER(aio_waitcomplete); 291SYSCALL_MODULE_HELPER(lio_listio); 292 293DECLARE_MODULE(aio, aio_mod, 294 SI_SUB_VFS, SI_ORDER_ANY); 295MODULE_VERSION(aio, 1); 296 297/* 298 * Startup initialization 299 / 300static void 301aio_onceonly(void) 302{ 303* 304 /* XXX: should probably just use so->callback / 305* aio_swake = &aio_swake_cb; 306 at_exit(aio_proc_rundown); 307 at_exec(aio_proc_rundown); 308 kqueue_add_filteropts(EVFILT_AIO, &aio_filtops); 309 TAILQ_INIT(&aio_freeproc); 310 TAILQ_INIT(&aio_activeproc); 311 TAILQ_INIT(&aio_jobs); 312 TAILQ_INIT(&aio_bufjobs); 313 kaio_zone = zinit("AIO", sizeof(struct kaioinfo), 0, 0, 1); 314 aiop_zone = zinit("AIOP", sizeof(struct aiothreadlist), 0, 0, 1); 315 aiocb_zone = zinit("AIOCB", sizeof(struct aiocblist), 0, 0, 1); 316 aiol_zone = zinit("AIOL", AIO_LISTIO_MAXsizeof(intptr_t), 0, 0, 1); 317* aiolio_zone = zinit("AIOLIO", sizeof(struct aio_liojob), 0, 0, 1); 318 aiod_timeout = AIOD_TIMEOUT_DEFAULT; 319 aiod_lifetime = AIOD_LIFETIME_DEFAULT; 320 jobrefid = 1; 321} 322 323static int 324aio_unload(void) 325{ 326 327 /* 328 * XXX: no unloads by default, it's too dangerous. 329 * perhaps we could do it if locked out callers and then 330 * did an aio_proc_rundown() on each process. 331 / 332* if (!unloadable) 333 return (EOPNOTSUPP); 334 335 aio_swake = NULL; 336 rm_at_exit(aio_proc_rundown); 337 rm_at_exec(aio_proc_rundown); 338 kqueue_del_filteropts(EVFILT_AIO); 339 return (0); 340} 341 342/* 343 * Init the per-process aioinfo structure. The aioinfo limits are set 344 * per-process for user limit (resource) management. 345 / 346static void 347aio_init_aioinfo(struct proc p) 348{ 349 struct kaioinfo ki; 350* if (p->p_aioinfo == NULL) { 351 ki = zalloc(kaio_zone); 352 p->p_aioinfo = ki; 353 ki->kaio_flags = 0; 354 ki->kaio_maxactive_count = max_aio_per_proc; 355 ki->kaio_active_count = 0; 356 ki->kaio_qallowed_count = max_aio_queue_per_proc; 357 ki->kaio_queue_count = 0; 358 ki->kaio_ballowed_count = max_buf_aio; 359 ki->kaio_buffer_count = 0; 360 ki->kaio_buffer_finished_count = 0; 361 ki->kaio_p = p; 362 TAILQ_INIT(&ki->kaio_jobdone); 363 TAILQ_INIT(&ki->kaio_jobqueue); 364 TAILQ_INIT(&ki->kaio_bufdone); 365 TAILQ_INIT(&ki->kaio_bufqueue); 366 TAILQ_INIT(&ki->kaio_liojoblist); 367 TAILQ_INIT(&ki->kaio_sockqueue); 368 } 369 370 while (num_aio_procs < target_aio_procs) 371 aio_newproc(); 372} 373 374/* 375 * Free a job entry. Wait for completion if it is currently active, but don't 376 * delay forever. If we delay, we return a flag that says that we have to 377 * restart the queue scan. 378 / 379static int 380aio_free_entry(struct aiocblist aiocbe) 381{ 382 struct kaioinfo ki; 383* struct aio_liojob lj; 384* struct proc p; 385* int error; 386 int s; 387 388 if (aiocbe->jobstate == JOBST_NULL) 389 panic("aio_free_entry: freeing already free job"); 390 391 p = aiocbe->userproc; 392 ki = p->p_aioinfo; 393 lj = aiocbe->lio; 394 if (ki == NULL) 395 panic("aio_free_entry: missing p->p_aioinfo"); 396 397 while (aiocbe->jobstate == JOBST_JOBRUNNING) { 398 if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) 399 return 0; 400 aiocbe->jobflags \|= AIOCBLIST_RUNDOWN; 401 tsleep(aiocbe, PRIBIO, "jobwai", 0); 402 } 403 aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE; 404 405 if (aiocbe->bp == NULL) { 406 if (ki->kaio_queue_count <= 0) 407 panic("aio_free_entry: process queue size <= 0"); 408 if (num_queue_count <= 0) 409 panic("aio_free_entry: system wide queue size <= 0"); 410 411 if (lj) { 412 lj->lioj_queue_count--; 413 if (aiocbe->jobflags & AIOCBLIST_DONE) 414 lj->lioj_queue_finished_count--; 415 } 416 ki->kaio_queue_count--; 417 if (aiocbe->jobflags & AIOCBLIST_DONE) 418 ki->kaio_queue_finished_count--; 419 num_queue_count--; 420 } else { 421 if (lj) { 422 lj->lioj_buffer_count--; 423 if (aiocbe->jobflags & AIOCBLIST_DONE) 424 lj->lioj_buffer_finished_count--; 425 } 426 if (aiocbe->jobflags & AIOCBLIST_DONE) 427 ki->kaio_buffer_finished_count--; 428 ki->kaio_buffer_count--; 429 num_buf_aio--; 430 } 431 432 /* aiocbe is going away, we need to destroy any knotes / 433* /* XXXKSE Note the thread here is used to eventually find the 434 * owning process again, but it is also used to do a fo_close 435 * and that requires the thread. (but does it require the 436 * OWNING thread? (or maybe the running thread?) 437 * There is a semantic problem here... 438 / 439* knote_remove(FIRST_THREAD_IN_PROC(p), &aiocbe->klist); /* XXXKSE / 440* 441 if ((ki->kaio_flags & KAIO_WAKEUP) \|\| ((ki->kaio_flags & KAIO_RUNDOWN) 442 && ((ki->kaio_buffer_count == 0) && (ki->kaio_queue_count == 0)))) { 443 ki->kaio_flags &= ~KAIO_WAKEUP; 444 wakeup(p); 445 } 446 447 if (aiocbe->jobstate == JOBST_JOBQBUF) { 448 if ((error = aio_fphysio(aiocbe)) != 0) 449 return error; 450 if (aiocbe->jobstate != JOBST_JOBBFINISHED) 451 panic("aio_free_entry: invalid physio finish-up state"); 452 s = splbio(); 453 TAILQ_REMOVE(&ki->kaio_bufdone, aiocbe, plist); 454 splx(s); 455 } else if (aiocbe->jobstate == JOBST_JOBQGLOBAL) { 456 s = splnet(); 457 TAILQ_REMOVE(&aio_jobs, aiocbe, list); 458 TAILQ_REMOVE(&ki->kaio_jobqueue, aiocbe, plist); 459 splx(s); 460 } else if (aiocbe->jobstate == JOBST_JOBFINISHED) 461 TAILQ_REMOVE(&ki->kaio_jobdone, aiocbe, plist); 462 else if (aiocbe->jobstate == JOBST_JOBBFINISHED) { 463 s = splbio(); 464 TAILQ_REMOVE(&ki->kaio_bufdone, aiocbe, plist); 465 splx(s); 466 if (aiocbe->bp) { 467 vunmapbuf(aiocbe->bp); 468 relpbuf(aiocbe->bp, NULL); 469 aiocbe->bp = NULL; 470 } 471 } 472 if (lj && (lj->lioj_buffer_count == 0) && (lj->lioj_queue_count == 0)) { 473 TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); 474 zfree(aiolio_zone, lj); 475 } 476 aiocbe->jobstate = JOBST_NULL; 477 untimeout(process_signal, aiocbe, aiocbe->timeouthandle); 478 zfree(aiocb_zone, aiocbe); 479 return 0; 480} 481 482/* 483 * Rundown the jobs for a given process. 484 / 485static void 486aio_proc_rundown(struct proc p) 487{ 488 int s; 489 struct kaioinfo ki; 490* struct aio_liojob lj, ljn; 491 struct aiocblist aiocbe, aiocbn; 492 struct file fp; 493* struct filedesc fdp; 494* struct socket so; 495* 496 ki = p->p_aioinfo; 497 if (ki == NULL) 498 return; 499 500 ki->kaio_flags \|= LIOJ_SIGNAL_POSTED; 501 while ((ki->kaio_active_count > 0) \|\| (ki->kaio_buffer_count > 502 ki->kaio_buffer_finished_count)) { 503 ki->kaio_flags \|= KAIO_RUNDOWN; 504 if (tsleep(p, PRIBIO, "kaiowt", aiod_timeout)) 505 break; 506 } 507 508 /* 509 * Move any aio ops that are waiting on socket I/O to the normal job 510 * queues so they are cleaned up with any others. 511 / 512* fdp = p->p_fd; 513 514 s = splnet(); 515 for (aiocbe = TAILQ_FIRST(&ki->kaio_sockqueue); aiocbe; aiocbe = 516 aiocbn) { 517 aiocbn = TAILQ_NEXT(aiocbe, plist); 518 fp = fdp->fd_ofiles[aiocbe->uaiocb.aio_fildes]; 519 520 /* 521 * Under some circumstances, the aio_fildes and the file 522 * structure don't match. This would leave aiocbe's in the 523 * TAILQ associated with the socket and cause a panic later. 524 * 525 * Detect and fix. 526 / 527* if ((fp == NULL) \|\| (fp != aiocbe->fd_file)) 528 fp = aiocbe->fd_file; 529 if (fp) { 530 so = (struct socket )fp->f_data; 531* TAILQ_REMOVE(&so->so_aiojobq, aiocbe, list); 532 if (TAILQ_EMPTY(&so->so_aiojobq)) { 533 so->so_snd.sb_flags &= ~SB_AIO; 534 so->so_rcv.sb_flags &= ~SB_AIO; 535 } 536 } 537 TAILQ_REMOVE(&ki->kaio_sockqueue, aiocbe, plist); 538 TAILQ_INSERT_HEAD(&aio_jobs, aiocbe, list); 539 TAILQ_INSERT_HEAD(&ki->kaio_jobqueue, aiocbe, plist); 540 } 541 splx(s); 542 543restart1: 544 for (aiocbe = TAILQ_FIRST(&ki->kaio_jobdone); aiocbe; aiocbe = aiocbn) { 545 aiocbn = TAILQ_NEXT(aiocbe, plist); 546 if (aio_free_entry(aiocbe)) 547 goto restart1; 548 } 549 550restart2: 551 for (aiocbe = TAILQ_FIRST(&ki->kaio_jobqueue); aiocbe; aiocbe = 552 aiocbn) { 553 aiocbn = TAILQ_NEXT(aiocbe, plist); 554 if (aio_free_entry(aiocbe)) 555 goto restart2; 556 } 557 558/* 559 * Note the use of lots of splbio here, trying to avoid splbio for long chains 560 * of I/O. Probably unnecessary. 561 / 562restart3: 563* s = splbio(); 564 while (TAILQ_FIRST(&ki->kaio_bufqueue)) { 565 ki->kaio_flags \|= KAIO_WAKEUP; 566 tsleep(p, PRIBIO, "aioprn", 0); 567 splx(s); 568 goto restart3; 569 } 570 splx(s); 571 572restart4: 573 s = splbio(); 574 for (aiocbe = TAILQ_FIRST(&ki->kaio_bufdone); aiocbe; aiocbe = aiocbn) { 575 aiocbn = TAILQ_NEXT(aiocbe, plist); 576 if (aio_free_entry(aiocbe)) { 577 splx(s); 578 goto restart4; 579 } 580 } 581 splx(s); 582 583 /* 584 * If we've slept, jobs might have moved from one queue to another. 585 * Retry rundown if we didn't manage to empty the queues. 586 / 587* if (TAILQ_FIRST(&ki->kaio_jobdone) != NULL \|\| 588 TAILQ_FIRST(&ki->kaio_jobqueue) != NULL \|\| 589 TAILQ_FIRST(&ki->kaio_bufqueue) != NULL \|\| 590 TAILQ_FIRST(&ki->kaio_bufdone) != NULL) 591 goto restart1; 592 593 for (lj = TAILQ_FIRST(&ki->kaio_liojoblist); lj; lj = ljn) { 594 ljn = TAILQ_NEXT(lj, lioj_list); 595 if ((lj->lioj_buffer_count == 0) && (lj->lioj_queue_count == 596 0)) { 597 TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); 598 zfree(aiolio_zone, lj); 599 } else { 600#ifdef DIAGNOSTIC 601 printf("LIO job not cleaned up: B:%d, BF:%d, Q:%d, " 602 "QF:%d\n", lj->lioj_buffer_count, 603 lj->lioj_buffer_finished_count, 604 lj->lioj_queue_count, 605 lj->lioj_queue_finished_count); 606#endif 607 } 608 } 609 610 zfree(kaio_zone, ki); 611 p->p_aioinfo = NULL; 612} 613 614/* 615 * Select a job to run (called by an AIO daemon). 616 / 617static struct aiocblist 618aio_selectjob(struct aiothreadlist aiop) 619{ 620* int s; 621 struct aiocblist aiocbe; 622* struct kaioinfo ki; 623* struct proc userp; 624* 625 s = splnet(); 626 for (aiocbe = TAILQ_FIRST(&aio_jobs); aiocbe; aiocbe = 627 TAILQ_NEXT(aiocbe, list)) { 628 userp = aiocbe->userproc; 629 ki = userp->p_aioinfo; 630 631 if (ki->kaio_active_count < ki->kaio_maxactive_count) { 632 TAILQ_REMOVE(&aio_jobs, aiocbe, list); 633 splx(s); 634 return aiocbe; 635 } 636 } 637 splx(s); 638 639 return NULL; 640} 641 642/* 643 * The AIO processing activity. This is the code that does the I/O request for 644 * the non-physio version of the operations. The normal vn operations are used, 645 * and this code should work in all instances for every type of file, including 646 * pipes, sockets, fifos, and regular files. 647 / 648static void 649aio_process(struct aiocblist aiocbe) 650{ 651 struct filedesc fdp; 652* struct thread td; 653* struct proc userp; 654* struct proc mycp; 655* struct aiocb cb; 656* struct file fp; 657* struct uio auio; 658 struct iovec aiov; 659 unsigned int fd; 660 int cnt; 661 int error; 662 off_t offset; 663 int oublock_st, oublock_end; 664 int inblock_st, inblock_end; 665 666 userp = aiocbe->userproc; 667 td = curthread; 668 mycp = td->td_proc; 669 cb = &aiocbe->uaiocb; 670 671 fdp = mycp->p_fd; 672 fd = cb->aio_fildes; 673 fp = fdp->fd_ofiles[fd]; 674 675 if ((fp == NULL) \|\| (fp != aiocbe->fd_file)) { 676 cb->_aiocb_private.error = EBADF; 677 cb->_aiocb_private.status = -1; 678 return; 679 } 680 681 aiov.iov_base = (void )(uintptr_t)cb->aio_buf; 682* aiov.iov_len = cb->aio_nbytes; 683 684 auio.uio_iov = &aiov; 685 auio.uio_iovcnt = 1; 686 auio.uio_offset = offset = cb->aio_offset; 687 auio.uio_resid = cb->aio_nbytes; 688 cnt = cb->aio_nbytes; 689 auio.uio_segflg = UIO_USERSPACE; 690 auio.uio_td = td; 691 692 inblock_st = mycp->p_stats->p_ru.ru_inblock; 693 oublock_st = mycp->p_stats->p_ru.ru_oublock; 694 /* 695 * Temporarily bump the ref count while reading to avoid the 696 * descriptor being ripped out from under us. 697 / 698* fhold(fp); 699 if (cb->aio_lio_opcode == LIO_READ) { 700 auio.uio_rw = UIO_READ; 701 error = fo_read(fp, &auio, fp->f_cred, FOF_OFFSET, td); 702 } else { 703 auio.uio_rw = UIO_WRITE; 704 error = fo_write(fp, &auio, fp->f_cred, FOF_OFFSET, td); 705 } 706 fdrop(fp, td); 707 inblock_end = mycp->p_stats->p_ru.ru_inblock; 708 oublock_end = mycp->p_stats->p_ru.ru_oublock; 709 710 aiocbe->inputcharge = inblock_end - inblock_st; 711 aiocbe->outputcharge = oublock_end - oublock_st; 712 713 if ((error) && (auio.uio_resid != cnt)) { 714 if (error == ERESTART \|\| error == EINTR \|\| error == EWOULDBLOCK) 715 error = 0; 716 if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE)) { 717 PROC_LOCK(userp); 718 psignal(userp, SIGPIPE); 719 PROC_UNLOCK(userp); 720 } 721 } 722 723 cnt -= auio.uio_resid; 724 cb->_aiocb_private.error = error; 725 cb->_aiocb_private.status = cnt; 726} 727 728/* 729 * The AIO daemon, most of the actual work is done in aio_process, 730 * but the setup (and address space mgmt) is done in this routine. 731 / 732static void 733aio_daemon(void uproc) 734{ 735 int s; 736 struct aio_liojob lj; 737* struct aiocb cb; 738* struct aiocblist aiocbe; 739* struct aiothreadlist aiop; 740* struct kaioinfo ki; 741* struct proc curcp, mycp, userp; 742* struct vmspace myvm, tmpvm; 743 struct thread *td = curthread;	26#include <sys/bio.h> 27#include <sys/buf.h> 28#include <sys/sysproto.h> 29#include <sys/filedesc.h> 30#include <sys/kernel.h> 31#include <sys/kthread.h> 32#include <sys/fcntl.h> 33#include <sys/file.h> 34#include <sys/lock.h> 35#include <sys/mutex.h> 36#include <sys/unistd.h> 37#include <sys/proc.h> 38#include <sys/resourcevar.h> 39#include <sys/signalvar.h> 40#include <sys/protosw.h> 41#include <sys/socketvar.h> 42#include <sys/syscall.h> 43#include <sys/sysent.h> 44#include <sys/sysctl.h> 45#include <sys/vnode.h> 46#include <sys/conf.h> 47#include <sys/event.h> 48 49#include <vm/vm.h> 50#include <vm/vm_extern.h> 51#include <vm/pmap.h> 52#include <vm/vm_map.h> 53#include <vm/vm_zone.h> 54#include <sys/aio.h> 55 56#include <machine/limits.h> 57 58#include "opt_vfs_aio.h" 59 60static long jobrefid; 61 62#define JOBST_NULL 0x0 63#define JOBST_JOBQGLOBAL 0x2 64#define JOBST_JOBRUNNING 0x3 65#define JOBST_JOBFINISHED 0x4 66#define JOBST_JOBQBUF 0x5 67#define JOBST_JOBBFINISHED 0x6 68 69#ifndef MAX_AIO_PER_PROC 70#define MAX_AIO_PER_PROC 32 71#endif 72 73#ifndef MAX_AIO_QUEUE_PER_PROC 74#define MAX_AIO_QUEUE_PER_PROC 256 /* Bigger than AIO_LISTIO_MAX / 75#endif 76 77#ifndef MAX_AIO_PROCS 78#define MAX_AIO_PROCS 32 79#endif 80 81#ifndef MAX_AIO_QUEUE 82#define MAX_AIO_QUEUE 1024 / Bigger than AIO_LISTIO_MAX / 83#endif 84 85#ifndef TARGET_AIO_PROCS 86#define TARGET_AIO_PROCS 4 87#endif 88 89#ifndef MAX_BUF_AIO 90#define MAX_BUF_AIO 16 91#endif 92 93#ifndef AIOD_TIMEOUT_DEFAULT 94#define AIOD_TIMEOUT_DEFAULT (10 hz) 95#endif 96 97#ifndef AIOD_LIFETIME_DEFAULT 98#define AIOD_LIFETIME_DEFAULT (30 * hz) 99#endif 100 101static int max_aio_procs = MAX_AIO_PROCS; 102static int num_aio_procs = 0; 103static int target_aio_procs = TARGET_AIO_PROCS; 104static int max_queue_count = MAX_AIO_QUEUE; 105static int num_queue_count = 0; 106static int num_buf_aio = 0; 107static int num_aio_resv_start = 0; 108static int aiod_timeout; 109static int aiod_lifetime; 110static int unloadable = 0; 111 112static int max_aio_per_proc = MAX_AIO_PER_PROC; 113static int max_aio_queue_per_proc = MAX_AIO_QUEUE_PER_PROC; 114static int max_buf_aio = MAX_BUF_AIO; 115 116SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW, 0, "AIO mgmt"); 117 118SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc, 119 CTLFLAG_RW, &max_aio_per_proc, 0, ""); 120 121SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc, 122 CTLFLAG_RW, &max_aio_queue_per_proc, 0, ""); 123 124SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_procs, 125 CTLFLAG_RW, &max_aio_procs, 0, ""); 126 127SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs, 128 CTLFLAG_RD, &num_aio_procs, 0, ""); 129 130SYSCTL_INT(_vfs_aio, OID_AUTO, num_queue_count, 131 CTLFLAG_RD, &num_queue_count, 0, ""); 132 133SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue, 134 CTLFLAG_RW, &max_queue_count, 0, ""); 135 136SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs, 137 CTLFLAG_RW, &target_aio_procs, 0, ""); 138 139SYSCTL_INT(_vfs_aio, OID_AUTO, max_buf_aio, 140 CTLFLAG_RW, &max_buf_aio, 0, ""); 141 142SYSCTL_INT(_vfs_aio, OID_AUTO, num_buf_aio, 143 CTLFLAG_RD, &num_buf_aio, 0, ""); 144 145SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_lifetime, 146 CTLFLAG_RW, &aiod_lifetime, 0, ""); 147 148SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_timeout, 149 CTLFLAG_RW, &aiod_timeout, 0, ""); 150 151SYSCTL_INT(_vfs_aio, OID_AUTO, unloadable, CTLFLAG_RW, &unloadable, 0, 152 "Allow unload of aio (not recommended)"); 153 154struct aiocblist { 155 TAILQ_ENTRY(aiocblist) list; /* List of jobs / 156* TAILQ_ENTRY(aiocblist) plist; /* List of jobs for proc / 157* int jobflags; 158 int jobstate; 159 int inputcharge; 160 int outputcharge; 161 struct callout_handle timeouthandle; 162 struct buf bp; / Buffer pointer / 163* struct proc userproc; / User process / / Not td! / 164* struct file fd_file; / Pointer to file structure / 165* struct aiothreadlist jobaiothread; / AIO process descriptor / 166* struct aio_liojob lio; / Optional lio job / 167* struct aiocb uuaiocb; / Pointer in userspace of aiocb / 168* struct klist klist; /* list of knotes / 169* struct aiocb uaiocb; /* Kernel I/O control block / 170}; 171* 172/* jobflags / 173#define AIOCBLIST_RUNDOWN 0x4 174#define AIOCBLIST_ASYNCFREE 0x8 175#define AIOCBLIST_DONE 0x10 176* 177/* 178 * AIO process info 179 / 180#define AIOP_FREE 0x1 / proc on free queue / 181#define AIOP_SCHED 0x2 / proc explicitly scheduled / 182* 183struct aiothreadlist { 184 int aiothreadflags; /* AIO proc flags / 185* TAILQ_ENTRY(aiothreadlist) list; /* List of processes / 186* struct thread aiothread; / The AIO thread / 187}; 188* 189/* 190 * data-structure for lio signal management 191 / 192struct aio_liojob { 193* int lioj_flags; 194 int lioj_buffer_count; 195 int lioj_buffer_finished_count; 196 int lioj_queue_count; 197 int lioj_queue_finished_count; 198 struct sigevent lioj_signal; /* signal on all I/O done / 199* TAILQ_ENTRY(aio_liojob) lioj_list; 200 struct kaioinfo lioj_ki; 201}; 202#define LIOJ_SIGNAL 0x1 / signal on all done (lio) / 203#define LIOJ_SIGNAL_POSTED 0x2 / signal has been posted / 204* 205/* 206 * per process aio data structure 207 / 208struct kaioinfo { 209* int kaio_flags; /* per process kaio flags / 210* int kaio_maxactive_count; /* maximum number of AIOs / 211* int kaio_active_count; /* number of currently used AIOs / 212* int kaio_qallowed_count; /* maxiumu size of AIO queue / 213* int kaio_queue_count; /* size of AIO queue / 214* int kaio_ballowed_count; /* maximum number of buffers / 215* int kaio_queue_finished_count; /* number of daemon jobs finished / 216* int kaio_buffer_count; /* number of physio buffers / 217* int kaio_buffer_finished_count; /* count of I/O done / 218* struct proc kaio_p; / process that uses this kaio block / 219* TAILQ_HEAD(,aio_liojob) kaio_liojoblist; /* list of lio jobs / 220* TAILQ_HEAD(,aiocblist) kaio_jobqueue; /* job queue for process / 221* TAILQ_HEAD(,aiocblist) kaio_jobdone; /* done queue for process / 222* TAILQ_HEAD(,aiocblist) kaio_bufqueue; /* buffer job queue for process / 223* TAILQ_HEAD(,aiocblist) kaio_bufdone; /* buffer done queue for process / 224* TAILQ_HEAD(,aiocblist) kaio_sockqueue; /* queue for aios waiting on sockets / 225}; 226* 227#define KAIO_RUNDOWN 0x1 /* process is being run down / 228#define KAIO_WAKEUP 0x2 / wakeup process when there is a significant event / 229* 230static TAILQ_HEAD(,aiothreadlist) aio_freeproc, aio_activeproc; 231static TAILQ_HEAD(,aiocblist) aio_jobs; /* Async job list / 232static TAILQ_HEAD(,aiocblist) aio_bufjobs; / Phys I/O job list / 233* 234static void aio_init_aioinfo(struct proc p); 235static void aio_onceonly(void); 236static int aio_free_entry(struct aiocblist aiocbe); 237static void aio_process(struct aiocblist aiocbe); 238static int aio_newproc(void); 239static int aio_aqueue(struct thread td, struct aiocb job, int type); 240static void aio_physwakeup(struct buf bp); 241static void aio_proc_rundown(struct proc p); 242static int aio_fphysio(struct aiocblist aiocbe); 243static int aio_qphysio(struct proc p, struct aiocblist iocb); 244static void aio_daemon(void uproc); 245static void aio_swake_cb(struct socket , struct sockbuf ); 246static int aio_unload(void); 247static void process_signal(void aioj); 248static int filt_aioattach(struct knote kn); 249static void filt_aiodetach(struct knote kn); 250static int filt_aio(struct knote kn, long hint); 251* 252static vm_zone_t kaio_zone, aiop_zone, aiocb_zone, aiol_zone; 253static vm_zone_t aiolio_zone; 254 255static struct filterops aio_filtops = 256 { 0, filt_aioattach, filt_aiodetach, filt_aio }; 257 258static int 259aio_modload(struct module module, int cmd, void arg) 260{ 261 int error = 0; 262 263 switch (cmd) { 264 case MOD_LOAD: 265 aio_onceonly(); 266 break; 267 case MOD_UNLOAD: 268 error = aio_unload(); 269 break; 270 case MOD_SHUTDOWN: 271 break; 272 default: 273 error = EINVAL; 274 break; 275 } 276 return (error); 277} 278 279static moduledata_t aio_mod = { 280 "aio", 281 &aio_modload, 282 NULL 283}; 284 285SYSCALL_MODULE_HELPER(aio_return); 286SYSCALL_MODULE_HELPER(aio_suspend); 287SYSCALL_MODULE_HELPER(aio_cancel); 288SYSCALL_MODULE_HELPER(aio_error); 289SYSCALL_MODULE_HELPER(aio_read); 290SYSCALL_MODULE_HELPER(aio_write); 291SYSCALL_MODULE_HELPER(aio_waitcomplete); 292SYSCALL_MODULE_HELPER(lio_listio); 293 294DECLARE_MODULE(aio, aio_mod, 295 SI_SUB_VFS, SI_ORDER_ANY); 296MODULE_VERSION(aio, 1); 297 298/* 299 * Startup initialization 300 / 301static void 302aio_onceonly(void) 303{ 304* 305 /* XXX: should probably just use so->callback / 306* aio_swake = &aio_swake_cb; 307 at_exit(aio_proc_rundown); 308 at_exec(aio_proc_rundown); 309 kqueue_add_filteropts(EVFILT_AIO, &aio_filtops); 310 TAILQ_INIT(&aio_freeproc); 311 TAILQ_INIT(&aio_activeproc); 312 TAILQ_INIT(&aio_jobs); 313 TAILQ_INIT(&aio_bufjobs); 314 kaio_zone = zinit("AIO", sizeof(struct kaioinfo), 0, 0, 1); 315 aiop_zone = zinit("AIOP", sizeof(struct aiothreadlist), 0, 0, 1); 316 aiocb_zone = zinit("AIOCB", sizeof(struct aiocblist), 0, 0, 1); 317 aiol_zone = zinit("AIOL", AIO_LISTIO_MAXsizeof(intptr_t), 0, 0, 1); 318* aiolio_zone = zinit("AIOLIO", sizeof(struct aio_liojob), 0, 0, 1); 319 aiod_timeout = AIOD_TIMEOUT_DEFAULT; 320 aiod_lifetime = AIOD_LIFETIME_DEFAULT; 321 jobrefid = 1; 322} 323 324static int 325aio_unload(void) 326{ 327 328 /* 329 * XXX: no unloads by default, it's too dangerous. 330 * perhaps we could do it if locked out callers and then 331 * did an aio_proc_rundown() on each process. 332 / 333* if (!unloadable) 334 return (EOPNOTSUPP); 335 336 aio_swake = NULL; 337 rm_at_exit(aio_proc_rundown); 338 rm_at_exec(aio_proc_rundown); 339 kqueue_del_filteropts(EVFILT_AIO); 340 return (0); 341} 342 343/* 344 * Init the per-process aioinfo structure. The aioinfo limits are set 345 * per-process for user limit (resource) management. 346 / 347static void 348aio_init_aioinfo(struct proc p) 349{ 350 struct kaioinfo ki; 351* if (p->p_aioinfo == NULL) { 352 ki = zalloc(kaio_zone); 353 p->p_aioinfo = ki; 354 ki->kaio_flags = 0; 355 ki->kaio_maxactive_count = max_aio_per_proc; 356 ki->kaio_active_count = 0; 357 ki->kaio_qallowed_count = max_aio_queue_per_proc; 358 ki->kaio_queue_count = 0; 359 ki->kaio_ballowed_count = max_buf_aio; 360 ki->kaio_buffer_count = 0; 361 ki->kaio_buffer_finished_count = 0; 362 ki->kaio_p = p; 363 TAILQ_INIT(&ki->kaio_jobdone); 364 TAILQ_INIT(&ki->kaio_jobqueue); 365 TAILQ_INIT(&ki->kaio_bufdone); 366 TAILQ_INIT(&ki->kaio_bufqueue); 367 TAILQ_INIT(&ki->kaio_liojoblist); 368 TAILQ_INIT(&ki->kaio_sockqueue); 369 } 370 371 while (num_aio_procs < target_aio_procs) 372 aio_newproc(); 373} 374 375/* 376 * Free a job entry. Wait for completion if it is currently active, but don't 377 * delay forever. If we delay, we return a flag that says that we have to 378 * restart the queue scan. 379 / 380static int 381aio_free_entry(struct aiocblist aiocbe) 382{ 383 struct kaioinfo ki; 384* struct aio_liojob lj; 385* struct proc p; 386* int error; 387 int s; 388 389 if (aiocbe->jobstate == JOBST_NULL) 390 panic("aio_free_entry: freeing already free job"); 391 392 p = aiocbe->userproc; 393 ki = p->p_aioinfo; 394 lj = aiocbe->lio; 395 if (ki == NULL) 396 panic("aio_free_entry: missing p->p_aioinfo"); 397 398 while (aiocbe->jobstate == JOBST_JOBRUNNING) { 399 if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) 400 return 0; 401 aiocbe->jobflags \|= AIOCBLIST_RUNDOWN; 402 tsleep(aiocbe, PRIBIO, "jobwai", 0); 403 } 404 aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE; 405 406 if (aiocbe->bp == NULL) { 407 if (ki->kaio_queue_count <= 0) 408 panic("aio_free_entry: process queue size <= 0"); 409 if (num_queue_count <= 0) 410 panic("aio_free_entry: system wide queue size <= 0"); 411 412 if (lj) { 413 lj->lioj_queue_count--; 414 if (aiocbe->jobflags & AIOCBLIST_DONE) 415 lj->lioj_queue_finished_count--; 416 } 417 ki->kaio_queue_count--; 418 if (aiocbe->jobflags & AIOCBLIST_DONE) 419 ki->kaio_queue_finished_count--; 420 num_queue_count--; 421 } else { 422 if (lj) { 423 lj->lioj_buffer_count--; 424 if (aiocbe->jobflags & AIOCBLIST_DONE) 425 lj->lioj_buffer_finished_count--; 426 } 427 if (aiocbe->jobflags & AIOCBLIST_DONE) 428 ki->kaio_buffer_finished_count--; 429 ki->kaio_buffer_count--; 430 num_buf_aio--; 431 } 432 433 /* aiocbe is going away, we need to destroy any knotes / 434* /* XXXKSE Note the thread here is used to eventually find the 435 * owning process again, but it is also used to do a fo_close 436 * and that requires the thread. (but does it require the 437 * OWNING thread? (or maybe the running thread?) 438 * There is a semantic problem here... 439 / 440* knote_remove(FIRST_THREAD_IN_PROC(p), &aiocbe->klist); /* XXXKSE / 441* 442 if ((ki->kaio_flags & KAIO_WAKEUP) \|\| ((ki->kaio_flags & KAIO_RUNDOWN) 443 && ((ki->kaio_buffer_count == 0) && (ki->kaio_queue_count == 0)))) { 444 ki->kaio_flags &= ~KAIO_WAKEUP; 445 wakeup(p); 446 } 447 448 if (aiocbe->jobstate == JOBST_JOBQBUF) { 449 if ((error = aio_fphysio(aiocbe)) != 0) 450 return error; 451 if (aiocbe->jobstate != JOBST_JOBBFINISHED) 452 panic("aio_free_entry: invalid physio finish-up state"); 453 s = splbio(); 454 TAILQ_REMOVE(&ki->kaio_bufdone, aiocbe, plist); 455 splx(s); 456 } else if (aiocbe->jobstate == JOBST_JOBQGLOBAL) { 457 s = splnet(); 458 TAILQ_REMOVE(&aio_jobs, aiocbe, list); 459 TAILQ_REMOVE(&ki->kaio_jobqueue, aiocbe, plist); 460 splx(s); 461 } else if (aiocbe->jobstate == JOBST_JOBFINISHED) 462 TAILQ_REMOVE(&ki->kaio_jobdone, aiocbe, plist); 463 else if (aiocbe->jobstate == JOBST_JOBBFINISHED) { 464 s = splbio(); 465 TAILQ_REMOVE(&ki->kaio_bufdone, aiocbe, plist); 466 splx(s); 467 if (aiocbe->bp) { 468 vunmapbuf(aiocbe->bp); 469 relpbuf(aiocbe->bp, NULL); 470 aiocbe->bp = NULL; 471 } 472 } 473 if (lj && (lj->lioj_buffer_count == 0) && (lj->lioj_queue_count == 0)) { 474 TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); 475 zfree(aiolio_zone, lj); 476 } 477 aiocbe->jobstate = JOBST_NULL; 478 untimeout(process_signal, aiocbe, aiocbe->timeouthandle); 479 zfree(aiocb_zone, aiocbe); 480 return 0; 481} 482 483/* 484 * Rundown the jobs for a given process. 485 / 486static void 487aio_proc_rundown(struct proc p) 488{ 489 int s; 490 struct kaioinfo ki; 491* struct aio_liojob lj, ljn; 492 struct aiocblist aiocbe, aiocbn; 493 struct file fp; 494* struct filedesc fdp; 495* struct socket so; 496* 497 ki = p->p_aioinfo; 498 if (ki == NULL) 499 return; 500 501 ki->kaio_flags \|= LIOJ_SIGNAL_POSTED; 502 while ((ki->kaio_active_count > 0) \|\| (ki->kaio_buffer_count > 503 ki->kaio_buffer_finished_count)) { 504 ki->kaio_flags \|= KAIO_RUNDOWN; 505 if (tsleep(p, PRIBIO, "kaiowt", aiod_timeout)) 506 break; 507 } 508 509 /* 510 * Move any aio ops that are waiting on socket I/O to the normal job 511 * queues so they are cleaned up with any others. 512 / 513* fdp = p->p_fd; 514 515 s = splnet(); 516 for (aiocbe = TAILQ_FIRST(&ki->kaio_sockqueue); aiocbe; aiocbe = 517 aiocbn) { 518 aiocbn = TAILQ_NEXT(aiocbe, plist); 519 fp = fdp->fd_ofiles[aiocbe->uaiocb.aio_fildes]; 520 521 /* 522 * Under some circumstances, the aio_fildes and the file 523 * structure don't match. This would leave aiocbe's in the 524 * TAILQ associated with the socket and cause a panic later. 525 * 526 * Detect and fix. 527 / 528* if ((fp == NULL) \|\| (fp != aiocbe->fd_file)) 529 fp = aiocbe->fd_file; 530 if (fp) { 531 so = (struct socket )fp->f_data; 532* TAILQ_REMOVE(&so->so_aiojobq, aiocbe, list); 533 if (TAILQ_EMPTY(&so->so_aiojobq)) { 534 so->so_snd.sb_flags &= ~SB_AIO; 535 so->so_rcv.sb_flags &= ~SB_AIO; 536 } 537 } 538 TAILQ_REMOVE(&ki->kaio_sockqueue, aiocbe, plist); 539 TAILQ_INSERT_HEAD(&aio_jobs, aiocbe, list); 540 TAILQ_INSERT_HEAD(&ki->kaio_jobqueue, aiocbe, plist); 541 } 542 splx(s); 543 544restart1: 545 for (aiocbe = TAILQ_FIRST(&ki->kaio_jobdone); aiocbe; aiocbe = aiocbn) { 546 aiocbn = TAILQ_NEXT(aiocbe, plist); 547 if (aio_free_entry(aiocbe)) 548 goto restart1; 549 } 550 551restart2: 552 for (aiocbe = TAILQ_FIRST(&ki->kaio_jobqueue); aiocbe; aiocbe = 553 aiocbn) { 554 aiocbn = TAILQ_NEXT(aiocbe, plist); 555 if (aio_free_entry(aiocbe)) 556 goto restart2; 557 } 558 559/* 560 * Note the use of lots of splbio here, trying to avoid splbio for long chains 561 * of I/O. Probably unnecessary. 562 / 563restart3: 564* s = splbio(); 565 while (TAILQ_FIRST(&ki->kaio_bufqueue)) { 566 ki->kaio_flags \|= KAIO_WAKEUP; 567 tsleep(p, PRIBIO, "aioprn", 0); 568 splx(s); 569 goto restart3; 570 } 571 splx(s); 572 573restart4: 574 s = splbio(); 575 for (aiocbe = TAILQ_FIRST(&ki->kaio_bufdone); aiocbe; aiocbe = aiocbn) { 576 aiocbn = TAILQ_NEXT(aiocbe, plist); 577 if (aio_free_entry(aiocbe)) { 578 splx(s); 579 goto restart4; 580 } 581 } 582 splx(s); 583 584 /* 585 * If we've slept, jobs might have moved from one queue to another. 586 * Retry rundown if we didn't manage to empty the queues. 587 / 588* if (TAILQ_FIRST(&ki->kaio_jobdone) != NULL \|\| 589 TAILQ_FIRST(&ki->kaio_jobqueue) != NULL \|\| 590 TAILQ_FIRST(&ki->kaio_bufqueue) != NULL \|\| 591 TAILQ_FIRST(&ki->kaio_bufdone) != NULL) 592 goto restart1; 593 594 for (lj = TAILQ_FIRST(&ki->kaio_liojoblist); lj; lj = ljn) { 595 ljn = TAILQ_NEXT(lj, lioj_list); 596 if ((lj->lioj_buffer_count == 0) && (lj->lioj_queue_count == 597 0)) { 598 TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); 599 zfree(aiolio_zone, lj); 600 } else { 601#ifdef DIAGNOSTIC 602 printf("LIO job not cleaned up: B:%d, BF:%d, Q:%d, " 603 "QF:%d\n", lj->lioj_buffer_count, 604 lj->lioj_buffer_finished_count, 605 lj->lioj_queue_count, 606 lj->lioj_queue_finished_count); 607#endif 608 } 609 } 610 611 zfree(kaio_zone, ki); 612 p->p_aioinfo = NULL; 613} 614 615/* 616 * Select a job to run (called by an AIO daemon). 617 / 618static struct aiocblist 619aio_selectjob(struct aiothreadlist aiop) 620{ 621* int s; 622 struct aiocblist aiocbe; 623* struct kaioinfo ki; 624* struct proc userp; 625* 626 s = splnet(); 627 for (aiocbe = TAILQ_FIRST(&aio_jobs); aiocbe; aiocbe = 628 TAILQ_NEXT(aiocbe, list)) { 629 userp = aiocbe->userproc; 630 ki = userp->p_aioinfo; 631 632 if (ki->kaio_active_count < ki->kaio_maxactive_count) { 633 TAILQ_REMOVE(&aio_jobs, aiocbe, list); 634 splx(s); 635 return aiocbe; 636 } 637 } 638 splx(s); 639 640 return NULL; 641} 642 643/* 644 * The AIO processing activity. This is the code that does the I/O request for 645 * the non-physio version of the operations. The normal vn operations are used, 646 * and this code should work in all instances for every type of file, including 647 * pipes, sockets, fifos, and regular files. 648 / 649static void 650aio_process(struct aiocblist aiocbe) 651{ 652 struct filedesc fdp; 653* struct thread td; 654* struct proc userp; 655* struct proc mycp; 656* struct aiocb cb; 657* struct file fp; 658* struct uio auio; 659 struct iovec aiov; 660 unsigned int fd; 661 int cnt; 662 int error; 663 off_t offset; 664 int oublock_st, oublock_end; 665 int inblock_st, inblock_end; 666 667 userp = aiocbe->userproc; 668 td = curthread; 669 mycp = td->td_proc; 670 cb = &aiocbe->uaiocb; 671 672 fdp = mycp->p_fd; 673 fd = cb->aio_fildes; 674 fp = fdp->fd_ofiles[fd]; 675 676 if ((fp == NULL) \|\| (fp != aiocbe->fd_file)) { 677 cb->_aiocb_private.error = EBADF; 678 cb->_aiocb_private.status = -1; 679 return; 680 } 681 682 aiov.iov_base = (void )(uintptr_t)cb->aio_buf; 683* aiov.iov_len = cb->aio_nbytes; 684 685 auio.uio_iov = &aiov; 686 auio.uio_iovcnt = 1; 687 auio.uio_offset = offset = cb->aio_offset; 688 auio.uio_resid = cb->aio_nbytes; 689 cnt = cb->aio_nbytes; 690 auio.uio_segflg = UIO_USERSPACE; 691 auio.uio_td = td; 692 693 inblock_st = mycp->p_stats->p_ru.ru_inblock; 694 oublock_st = mycp->p_stats->p_ru.ru_oublock; 695 /* 696 * Temporarily bump the ref count while reading to avoid the 697 * descriptor being ripped out from under us. 698 / 699* fhold(fp); 700 if (cb->aio_lio_opcode == LIO_READ) { 701 auio.uio_rw = UIO_READ; 702 error = fo_read(fp, &auio, fp->f_cred, FOF_OFFSET, td); 703 } else { 704 auio.uio_rw = UIO_WRITE; 705 error = fo_write(fp, &auio, fp->f_cred, FOF_OFFSET, td); 706 } 707 fdrop(fp, td); 708 inblock_end = mycp->p_stats->p_ru.ru_inblock; 709 oublock_end = mycp->p_stats->p_ru.ru_oublock; 710 711 aiocbe->inputcharge = inblock_end - inblock_st; 712 aiocbe->outputcharge = oublock_end - oublock_st; 713 714 if ((error) && (auio.uio_resid != cnt)) { 715 if (error == ERESTART \|\| error == EINTR \|\| error == EWOULDBLOCK) 716 error = 0; 717 if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE)) { 718 PROC_LOCK(userp); 719 psignal(userp, SIGPIPE); 720 PROC_UNLOCK(userp); 721 } 722 } 723 724 cnt -= auio.uio_resid; 725 cb->_aiocb_private.error = error; 726 cb->_aiocb_private.status = cnt; 727} 728 729/* 730 * The AIO daemon, most of the actual work is done in aio_process, 731 * but the setup (and address space mgmt) is done in this routine. 732 / 733static void 734aio_daemon(void uproc) 735{ 736 int s; 737 struct aio_liojob lj; 738* struct aiocb cb; 739* struct aiocblist aiocbe; 740* struct aiothreadlist aiop; 741* struct kaioinfo ki; 742* struct proc curcp, mycp, userp; 743* struct vmspace myvm, tmpvm; 744 struct thread *td = curthread;
	745 struct pgrp newpgrp; 746* struct session *newsess;
744 745 mtx_lock(&Giant); 746 /* 747 * Local copies of curproc (cp) and vmspace (myvm) 748 / 749* mycp = td->td_proc; 750 myvm = mycp->p_vmspace; 751 752 if (mycp->p_textvp) { 753 vrele(mycp->p_textvp); 754 mycp->p_textvp = NULL; 755 } 756 757 /* 758 * Allocate and ready the aio control info. There is one aiop structure 759 * per daemon. 760 / 761* aiop = zalloc(aiop_zone); 762 aiop->aiothread = td; 763 aiop->aiothreadflags \|= AIOP_FREE; 764 765 s = splnet(); 766 767 /* 768 * Place thread (lightweight process) onto the AIO free thread list. 769 / 770* if (TAILQ_EMPTY(&aio_freeproc)) 771 wakeup(&aio_freeproc); 772 TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); 773 774 splx(s); 775 776 /* 777 * Get rid of our current filedescriptors. AIOD's don't need any 778 * filedescriptors, except as temporarily inherited from the client. 779 / 780* fdfree(td); 781 mycp->p_fd = NULL; 782 783 /* The daemon resides in its own pgrp. */	747 748 mtx_lock(&Giant); 749 /* 750 * Local copies of curproc (cp) and vmspace (myvm) 751 / 752* mycp = td->td_proc; 753 myvm = mycp->p_vmspace; 754 755 if (mycp->p_textvp) { 756 vrele(mycp->p_textvp); 757 mycp->p_textvp = NULL; 758 } 759 760 /* 761 * Allocate and ready the aio control info. There is one aiop structure 762 * per daemon. 763 / 764* aiop = zalloc(aiop_zone); 765 aiop->aiothread = td; 766 aiop->aiothreadflags \|= AIOP_FREE; 767 768 s = splnet(); 769 770 /* 771 * Place thread (lightweight process) onto the AIO free thread list. 772 / 773* if (TAILQ_EMPTY(&aio_freeproc)) 774 wakeup(&aio_freeproc); 775 TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); 776 777 splx(s); 778 779 /* 780 * Get rid of our current filedescriptors. AIOD's don't need any 781 * filedescriptors, except as temporarily inherited from the client. 782 / 783* fdfree(td); 784 mycp->p_fd = NULL; 785 786 /* The daemon resides in its own pgrp. */
784 enterpgrp(mycp, mycp->p_pid, 1);	787 MALLOC(newpgrp, struct pgrp , sizeof(struct pgrp), M_PGRP, M_WAITOK \| M_ZERO); 788* MALLOC(newsess, struct session *, sizeof(struct session), M_SESSION, M_WAITOK \| M_ZERO);
785	789
	790 PGRPSESS_XLOCK(); 791 enterpgrp(mycp, mycp->p_pid, newpgrp, newsess); 792 PGRPSESS_XUNLOCK(); 793
786 /* Mark special process type. / 787* mycp->p_flag \|= P_SYSTEM; 788 789 /* 790 * Wakeup parent process. (Parent sleeps to keep from blasting away 791 * and creating too many daemons.) 792 / 793* wakeup(mycp); 794 795 for (;;) { 796 /* 797 * curcp is the current daemon process context. 798 * userp is the current user process context. 799 / 800* curcp = mycp; 801 802 /* 803 * Take daemon off of free queue 804 / 805* if (aiop->aiothreadflags & AIOP_FREE) { 806 s = splnet(); 807 TAILQ_REMOVE(&aio_freeproc, aiop, list); 808 TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list); 809 aiop->aiothreadflags &= ~AIOP_FREE; 810 splx(s); 811 } 812 aiop->aiothreadflags &= ~AIOP_SCHED; 813 814 /* 815 * Check for jobs. 816 / 817* while ((aiocbe = aio_selectjob(aiop)) != NULL) { 818 cb = &aiocbe->uaiocb; 819 userp = aiocbe->userproc; 820 821 aiocbe->jobstate = JOBST_JOBRUNNING; 822 823 /* 824 * Connect to process address space for user program. 825 / 826* if (userp != curcp) { 827 /* 828 * Save the current address space that we are 829 * connected to. 830 / 831* tmpvm = mycp->p_vmspace; 832 833 /* 834 * Point to the new user address space, and 835 * refer to it. 836 / 837* mycp->p_vmspace = userp->p_vmspace; 838 mycp->p_vmspace->vm_refcnt++; 839 840 /* Activate the new mapping. / 841* pmap_activate(FIRST_THREAD_IN_PROC(mycp)); 842 843 /* 844 * If the old address space wasn't the daemons 845 * own address space, then we need to remove the 846 * daemon's reference from the other process 847 * that it was acting on behalf of. 848 / 849* if (tmpvm != myvm) { 850 vmspace_free(tmpvm); 851 } 852 853 /* 854 * Disassociate from previous clients file 855 * descriptors, and associate to the new clients 856 * descriptors. Note that the daemon doesn't 857 * need to worry about its orginal descriptors, 858 * because they were originally freed. 859 / 860* if (mycp->p_fd) 861 fdfree(td); 862 mycp->p_fd = fdshare(userp); 863 curcp = userp; 864 } 865 866 ki = userp->p_aioinfo; 867 lj = aiocbe->lio; 868 869 /* Account for currently active jobs. / 870* ki->kaio_active_count++; 871 872 /* Do the I/O function. / 873* aiocbe->jobaiothread = aiop; 874 aio_process(aiocbe); 875 876 /* Decrement the active job count. / 877* ki->kaio_active_count--; 878 879 /* 880 * Increment the completion count for wakeup/signal 881 * comparisons. 882 / 883* aiocbe->jobflags \|= AIOCBLIST_DONE; 884 ki->kaio_queue_finished_count++; 885 if (lj) 886 lj->lioj_queue_finished_count++; 887 if ((ki->kaio_flags & KAIO_WAKEUP) \|\| ((ki->kaio_flags 888 & KAIO_RUNDOWN) && (ki->kaio_active_count == 0))) { 889 ki->kaio_flags &= ~KAIO_WAKEUP; 890 wakeup(userp); 891 } 892 893 s = splbio(); 894 if (lj && (lj->lioj_flags & 895 (LIOJ_SIGNAL\|LIOJ_SIGNAL_POSTED)) == LIOJ_SIGNAL) { 896 if ((lj->lioj_queue_finished_count == 897 lj->lioj_queue_count) && 898 (lj->lioj_buffer_finished_count == 899 lj->lioj_buffer_count)) { 900 PROC_LOCK(userp); 901 psignal(userp, 902 lj->lioj_signal.sigev_signo); 903 PROC_UNLOCK(userp); 904 lj->lioj_flags \|= LIOJ_SIGNAL_POSTED; 905 } 906 } 907 splx(s); 908 909 aiocbe->jobstate = JOBST_JOBFINISHED; 910 911 /* 912 * If the I/O request should be automatically rundown, 913 * do the needed cleanup. Otherwise, place the queue 914 * entry for the just finished I/O request into the done 915 * queue for the associated client. 916 / 917* s = splnet(); 918 if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) { 919 aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE; 920 zfree(aiocb_zone, aiocbe); 921 } else { 922 TAILQ_REMOVE(&ki->kaio_jobqueue, aiocbe, plist); 923 TAILQ_INSERT_TAIL(&ki->kaio_jobdone, aiocbe, 924 plist); 925 } 926 splx(s); 927 KNOTE(&aiocbe->klist, 0); 928 929 if (aiocbe->jobflags & AIOCBLIST_RUNDOWN) { 930 wakeup(aiocbe); 931 aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN; 932 } 933 934 if (cb->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { 935 PROC_LOCK(userp); 936 psignal(userp, cb->aio_sigevent.sigev_signo); 937 PROC_UNLOCK(userp); 938 } 939 } 940 941 /* 942 * Disconnect from user address space. 943 / 944* if (curcp != mycp) { 945 /* Get the user address space to disconnect from. / 946* tmpvm = mycp->p_vmspace; 947 948 /* Get original address space for daemon. / 949* mycp->p_vmspace = myvm; 950 951 /* Activate the daemon's address space. / 952* pmap_activate(FIRST_THREAD_IN_PROC(mycp)); 953#ifdef DIAGNOSTIC 954 if (tmpvm == myvm) { 955 printf("AIOD: vmspace problem -- %d\n", 956 mycp->p_pid); 957 } 958#endif 959 /* Remove our vmspace reference. / 960* vmspace_free(tmpvm); 961 962 /* 963 * Disassociate from the user process's file 964 * descriptors. 965 / 966* if (mycp->p_fd) 967 fdfree(td); 968 mycp->p_fd = NULL; 969 curcp = mycp; 970 } 971 972 /* 973 * If we are the first to be put onto the free queue, wakeup 974 * anyone waiting for a daemon. 975 / 976* s = splnet(); 977 TAILQ_REMOVE(&aio_activeproc, aiop, list); 978 if (TAILQ_EMPTY(&aio_freeproc)) 979 wakeup(&aio_freeproc); 980 TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); 981 aiop->aiothreadflags \|= AIOP_FREE; 982 splx(s); 983 984 /* 985 * If daemon is inactive for a long time, allow it to exit, 986 * thereby freeing resources. 987 / 988* if ((aiop->aiothreadflags & AIOP_SCHED) == 0 && 989 tsleep(aiop->aiothread, PRIBIO, "aiordy", aiod_lifetime)) { 990 s = splnet(); 991 if (TAILQ_EMPTY(&aio_jobs)) { 992 if ((aiop->aiothreadflags & AIOP_FREE) && 993 (num_aio_procs > target_aio_procs)) { 994 TAILQ_REMOVE(&aio_freeproc, aiop, list); 995 splx(s); 996 zfree(aiop_zone, aiop); 997 num_aio_procs--; 998#ifdef DIAGNOSTIC 999 if (mycp->p_vmspace->vm_refcnt <= 1) { 1000 printf("AIOD: bad vm refcnt for" 1001 " exiting daemon: %d\n", 1002 mycp->p_vmspace->vm_refcnt); 1003 } 1004#endif 1005 kthread_exit(0); 1006 } 1007 } 1008 splx(s); 1009 } 1010 } 1011} 1012 1013/* 1014 * Create a new AIO daemon. This is mostly a kernel-thread fork routine. The 1015 * AIO daemon modifies its environment itself. 1016 / 1017static int 1018aio_newproc() 1019{ 1020* int error; 1021 struct proc p; 1022* 1023 error = kthread_create(aio_daemon, curproc, &p, RFNOWAIT, "aiod%d", 1024 num_aio_procs); 1025 if (error) 1026 return error; 1027 1028 /* 1029 * Wait until daemon is started, but continue on just in case to 1030 * handle error conditions. 1031 / 1032* error = tsleep(p, PZERO, "aiosta", aiod_timeout); 1033 1034 num_aio_procs++; 1035 1036 return error; 1037} 1038 1039/* 1040 * Try the high-performance, low-overhead physio method for eligible 1041 * VCHR devices. This method doesn't use an aio helper thread, and 1042 * thus has very low overhead. 1043 * 1044 * Assumes that the caller, _aio_aqueue(), has incremented the file 1045 * structure's reference count, preventing its deallocation for the 1046 * duration of this call. 1047 / 1048static int 1049aio_qphysio(struct proc p, struct aiocblist aiocbe) 1050{ 1051* int error; 1052 struct aiocb cb; 1053* struct file fp; 1054* struct buf bp; 1055* struct vnode vp; 1056* struct kaioinfo ki; 1057* struct aio_liojob lj; 1058* int s; 1059 int notify; 1060 1061 cb = &aiocbe->uaiocb; 1062 fp = aiocbe->fd_file; 1063 1064 if (fp->f_type != DTYPE_VNODE) 1065 return (-1); 1066 1067 vp = (struct vnode )fp->f_data; 1068* 1069 /* 1070 * If its not a disk, we don't want to return a positive error. 1071 * It causes the aio code to not fall through to try the thread 1072 * way when you're talking to a regular file. 1073 / 1074* if (!vn_isdisk(vp, &error)) { 1075 if (error == ENOTBLK) 1076 return (-1); 1077 else 1078 return (error); 1079 } 1080 1081 if (cb->aio_nbytes % vp->v_rdev->si_bsize_phys) 1082 return (-1); 1083 1084 if (cb->aio_nbytes > 1085 MAXPHYS - (((vm_offset_t) cb->aio_buf) & PAGE_MASK)) 1086 return (-1); 1087 1088 ki = p->p_aioinfo; 1089 if (ki->kaio_buffer_count >= ki->kaio_ballowed_count) 1090 return (-1); 1091 1092 ki->kaio_buffer_count++; 1093 1094 lj = aiocbe->lio; 1095 if (lj) 1096 lj->lioj_buffer_count++; 1097 1098 /* Create and build a buffer header for a transfer. / 1099* bp = (struct buf )getpbuf(NULL); 1100* BUF_KERNPROC(bp); 1101 1102 /* 1103 * Get a copy of the kva from the physical buffer. 1104 / 1105* bp->b_caller1 = p; 1106 bp->b_dev = vp->v_rdev; 1107 error = bp->b_error = 0; 1108 1109 bp->b_bcount = cb->aio_nbytes; 1110 bp->b_bufsize = cb->aio_nbytes; 1111 bp->b_flags = B_PHYS; 1112 bp->b_iodone = aio_physwakeup; 1113 bp->b_saveaddr = bp->b_data; 1114 bp->b_data = (void )(uintptr_t)cb->aio_buf; 1115* bp->b_blkno = btodb(cb->aio_offset); 1116 1117 if (cb->aio_lio_opcode == LIO_WRITE) { 1118 bp->b_iocmd = BIO_WRITE; 1119 if (!useracc(bp->b_data, bp->b_bufsize, VM_PROT_READ)) { 1120 error = EFAULT; 1121 goto doerror; 1122 } 1123 } else { 1124 bp->b_iocmd = BIO_READ; 1125 if (!useracc(bp->b_data, bp->b_bufsize, VM_PROT_WRITE)) { 1126 error = EFAULT; 1127 goto doerror; 1128 } 1129 } 1130 1131 /* Bring buffer into kernel space. / 1132* vmapbuf(bp); 1133 1134 s = splbio(); 1135 aiocbe->bp = bp; 1136 bp->b_spc = (void )aiocbe; 1137* TAILQ_INSERT_TAIL(&aio_bufjobs, aiocbe, list); 1138 TAILQ_INSERT_TAIL(&ki->kaio_bufqueue, aiocbe, plist); 1139 aiocbe->jobstate = JOBST_JOBQBUF; 1140 cb->_aiocb_private.status = cb->aio_nbytes; 1141 num_buf_aio++; 1142 bp->b_error = 0; 1143 1144 splx(s); 1145 1146 /* Perform transfer. / 1147* DEV_STRATEGY(bp, 0); 1148 1149 notify = 0; 1150 s = splbio(); 1151 1152 /* 1153 * If we had an error invoking the request, or an error in processing 1154 * the request before we have returned, we process it as an error in 1155 * transfer. Note that such an I/O error is not indicated immediately, 1156 * but is returned using the aio_error mechanism. In this case, 1157 * aio_suspend will return immediately. 1158 / 1159* if (bp->b_error \|\| (bp->b_ioflags & BIO_ERROR)) { 1160 struct aiocb job = aiocbe->uuaiocb; 1161* 1162 aiocbe->uaiocb._aiocb_private.status = 0; 1163 suword(&job->_aiocb_private.status, 0); 1164 aiocbe->uaiocb._aiocb_private.error = bp->b_error; 1165 suword(&job->_aiocb_private.error, bp->b_error); 1166 1167 ki->kaio_buffer_finished_count++; 1168 1169 if (aiocbe->jobstate != JOBST_JOBBFINISHED) { 1170 aiocbe->jobstate = JOBST_JOBBFINISHED; 1171 aiocbe->jobflags \|= AIOCBLIST_DONE; 1172 TAILQ_REMOVE(&aio_bufjobs, aiocbe, list); 1173 TAILQ_REMOVE(&ki->kaio_bufqueue, aiocbe, plist); 1174 TAILQ_INSERT_TAIL(&ki->kaio_bufdone, aiocbe, plist); 1175 notify = 1; 1176 } 1177 } 1178 splx(s); 1179 if (notify) 1180 KNOTE(&aiocbe->klist, 0); 1181 return 0; 1182 1183doerror: 1184 ki->kaio_buffer_count--; 1185 if (lj) 1186 lj->lioj_buffer_count--; 1187 aiocbe->bp = NULL; 1188 relpbuf(bp, NULL); 1189 return error; 1190} 1191 1192/* 1193 * This waits/tests physio completion. 1194 / 1195static int 1196aio_fphysio(struct aiocblist iocb) 1197{ 1198 int s; 1199 struct buf bp; 1200* int error; 1201 1202 bp = iocb->bp; 1203 1204 s = splbio(); 1205 while ((bp->b_flags & B_DONE) == 0) { 1206 if (tsleep(bp, PRIBIO, "physstr", aiod_timeout)) { 1207 if ((bp->b_flags & B_DONE) == 0) { 1208 splx(s); 1209 return EINPROGRESS; 1210 } else 1211 break; 1212 } 1213 } 1214 splx(s); 1215 1216 /* Release mapping into kernel space. / 1217* vunmapbuf(bp); 1218 iocb->bp = 0; 1219 1220 error = 0; 1221 1222 /* Check for an error. / 1223* if (bp->b_ioflags & BIO_ERROR) 1224 error = bp->b_error; 1225 1226 relpbuf(bp, NULL); 1227 return (error); 1228} 1229 1230/* 1231 * Wake up aio requests that may be serviceable now. 1232 / 1233static void 1234aio_swake_cb(struct socket so, struct sockbuf sb) 1235{ 1236* struct aiocblist cb,cbn; 1237 struct proc p; 1238* struct kaioinfo ki = NULL; 1239* int opcode, wakecount = 0; 1240 struct aiothreadlist aiop; 1241* 1242 if (sb == &so->so_snd) { 1243 opcode = LIO_WRITE; 1244 so->so_snd.sb_flags &= ~SB_AIO; 1245 } else { 1246 opcode = LIO_READ; 1247 so->so_rcv.sb_flags &= ~SB_AIO; 1248 } 1249 1250 for (cb = TAILQ_FIRST(&so->so_aiojobq); cb; cb = cbn) { 1251 cbn = TAILQ_NEXT(cb, list); 1252 if (opcode == cb->uaiocb.aio_lio_opcode) { 1253 p = cb->userproc; 1254 ki = p->p_aioinfo; 1255 TAILQ_REMOVE(&so->so_aiojobq, cb, list); 1256 TAILQ_REMOVE(&ki->kaio_sockqueue, cb, plist); 1257 TAILQ_INSERT_TAIL(&aio_jobs, cb, list); 1258 TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, cb, plist); 1259 wakecount++; 1260 if (cb->jobstate != JOBST_JOBQGLOBAL) 1261 panic("invalid queue value"); 1262 } 1263 } 1264 1265 while (wakecount--) { 1266 if ((aiop = TAILQ_FIRST(&aio_freeproc)) != 0) { 1267 TAILQ_REMOVE(&aio_freeproc, aiop, list); 1268 TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list); 1269 aiop->aiothreadflags &= ~AIOP_FREE; 1270 wakeup(aiop->aiothread); 1271 } 1272 } 1273} 1274 1275/* 1276 * Queue a new AIO request. Choosing either the threaded or direct physio VCHR 1277 * technique is done in this code. 1278 / 1279static int 1280_aio_aqueue(struct thread td, struct aiocb job, struct aio_liojob lj, int type) 1281{ 1282 struct proc p = td->td_proc; 1283* struct filedesc fdp; 1284* struct file fp; 1285* unsigned int fd; 1286 struct socket so; 1287* int s; 1288 int error; 1289 int opcode; 1290 struct aiocblist aiocbe; 1291* struct aiothreadlist aiop; 1292* struct kaioinfo ki; 1293* struct kevent kev; 1294 struct kqueue kq; 1295* struct file kq_fp; 1296* 1297 aiocbe = zalloc(aiocb_zone); 1298 aiocbe->inputcharge = 0; 1299 aiocbe->outputcharge = 0; 1300 callout_handle_init(&aiocbe->timeouthandle); 1301 SLIST_INIT(&aiocbe->klist); 1302 1303 suword(&job->_aiocb_private.status, -1); 1304 suword(&job->_aiocb_private.error, 0); 1305 suword(&job->_aiocb_private.kernelinfo, -1); 1306 1307 error = copyin(job, &aiocbe->uaiocb, sizeof(aiocbe->uaiocb)); 1308 if (error) { 1309 suword(&job->_aiocb_private.error, error); 1310 zfree(aiocb_zone, aiocbe); 1311 return error; 1312 } 1313 if (aiocbe->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL && 1314 !_SIG_VALID(aiocbe->uaiocb.aio_sigevent.sigev_signo)) { 1315 zfree(aiocb_zone, aiocbe); 1316 return EINVAL; 1317 } 1318 1319 /* Save userspace address of the job info. / 1320* aiocbe->uuaiocb = job; 1321 1322 /* Get the opcode. / 1323* if (type != LIO_NOP) 1324 aiocbe->uaiocb.aio_lio_opcode = type; 1325 opcode = aiocbe->uaiocb.aio_lio_opcode; 1326 1327 /* Get the fd info for process. / 1328* fdp = p->p_fd; 1329 1330 /* 1331 * Range check file descriptor. 1332 / 1333* fd = aiocbe->uaiocb.aio_fildes; 1334 if (fd >= fdp->fd_nfiles) { 1335 zfree(aiocb_zone, aiocbe); 1336 if (type == 0) 1337 suword(&job->_aiocb_private.error, EBADF); 1338 return EBADF; 1339 } 1340 1341 fp = aiocbe->fd_file = fdp->fd_ofiles[fd]; 1342 if ((fp == NULL) \|\| ((opcode == LIO_WRITE) && ((fp->f_flag & FWRITE) == 1343 0))) { 1344 zfree(aiocb_zone, aiocbe); 1345 if (type == 0) 1346 suword(&job->_aiocb_private.error, EBADF); 1347 return EBADF; 1348 } 1349 1350 if (aiocbe->uaiocb.aio_offset == -1LL) { 1351 zfree(aiocb_zone, aiocbe); 1352 if (type == 0) 1353 suword(&job->_aiocb_private.error, EINVAL); 1354 return EINVAL; 1355 } 1356 1357 error = suword(&job->_aiocb_private.kernelinfo, jobrefid); 1358 if (error) { 1359 zfree(aiocb_zone, aiocbe); 1360 if (type == 0) 1361 suword(&job->_aiocb_private.error, EINVAL); 1362 return error; 1363 } 1364 1365 aiocbe->uaiocb._aiocb_private.kernelinfo = (void )(intptr_t)jobrefid; 1366* if (jobrefid == LONG_MAX) 1367 jobrefid = 1; 1368 else 1369 jobrefid++; 1370 1371 if (opcode == LIO_NOP) { 1372 zfree(aiocb_zone, aiocbe); 1373 if (type == 0) { 1374 suword(&job->_aiocb_private.error, 0); 1375 suword(&job->_aiocb_private.status, 0); 1376 suword(&job->_aiocb_private.kernelinfo, 0); 1377 } 1378 return 0; 1379 } 1380 1381 if ((opcode != LIO_READ) && (opcode != LIO_WRITE)) { 1382 zfree(aiocb_zone, aiocbe); 1383 if (type == 0) { 1384 suword(&job->_aiocb_private.status, 0); 1385 suword(&job->_aiocb_private.error, EINVAL); 1386 } 1387 return EINVAL; 1388 } 1389 1390 fhold(fp); 1391 1392 if (aiocbe->uaiocb.aio_sigevent.sigev_notify == SIGEV_KEVENT) { 1393 kev.ident = aiocbe->uaiocb.aio_sigevent.sigev_notify_kqueue; 1394 kev.udata = aiocbe->uaiocb.aio_sigevent.sigev_value.sigval_ptr; 1395 } 1396 else { 1397 /* 1398 * This method for requesting kevent-based notification won't 1399 * work on the alpha, since we're passing in a pointer 1400 * via aio_lio_opcode, which is an int. Use the SIGEV_KEVENT- 1401 * based method instead. 1402 / 1403* struct kevent kevp; 1404* 1405 kevp = (struct kevent )(uintptr_t)job->aio_lio_opcode; 1406* if (kevp == NULL) 1407 goto no_kqueue; 1408 1409 error = copyin(kevp, &kev, sizeof(kev)); 1410 if (error) 1411 goto aqueue_fail; 1412 } 1413 if ((u_int)kev.ident >= fdp->fd_nfiles \|\| 1414 (kq_fp = fdp->fd_ofiles[kev.ident]) == NULL \|\| 1415 (kq_fp->f_type != DTYPE_KQUEUE)) { 1416 error = EBADF; 1417 goto aqueue_fail; 1418 } 1419 kq = (struct kqueue )kq_fp->f_data; 1420* kev.ident = (uintptr_t)aiocbe; 1421 kev.filter = EVFILT_AIO; 1422 kev.flags = EV_ADD \| EV_ENABLE \| EV_FLAG1; 1423 error = kqueue_register(kq, &kev, td); 1424aqueue_fail: 1425 if (error) { 1426 zfree(aiocb_zone, aiocbe); 1427 if (type == 0) 1428 suword(&job->_aiocb_private.error, error); 1429 goto done; 1430 } 1431no_kqueue: 1432 1433 suword(&job->_aiocb_private.error, EINPROGRESS); 1434 aiocbe->uaiocb._aiocb_private.error = EINPROGRESS; 1435 aiocbe->userproc = p; 1436 aiocbe->jobflags = 0; 1437 aiocbe->lio = lj; 1438 ki = p->p_aioinfo; 1439 1440 if (fp->f_type == DTYPE_SOCKET) { 1441 /* 1442 * Alternate queueing for socket ops: Reach down into the 1443 * descriptor to get the socket data. Then check to see if the 1444 * socket is ready to be read or written (based on the requested 1445 * operation). 1446 * 1447 * If it is not ready for io, then queue the aiocbe on the 1448 * socket, and set the flags so we get a call when sbnotify() 1449 * happens. 1450 / 1451* so = (struct socket )fp->f_data; 1452* s = splnet(); 1453 if (((opcode == LIO_READ) && (!soreadable(so))) \|\| ((opcode == 1454 LIO_WRITE) && (!sowriteable(so)))) { 1455 TAILQ_INSERT_TAIL(&so->so_aiojobq, aiocbe, list); 1456 TAILQ_INSERT_TAIL(&ki->kaio_sockqueue, aiocbe, plist); 1457 if (opcode == LIO_READ) 1458 so->so_rcv.sb_flags \|= SB_AIO; 1459 else 1460 so->so_snd.sb_flags \|= SB_AIO; 1461 aiocbe->jobstate = JOBST_JOBQGLOBAL; /* XXX / 1462* ki->kaio_queue_count++; 1463 num_queue_count++; 1464 splx(s); 1465 error = 0; 1466 goto done; 1467 } 1468 splx(s); 1469 } 1470 1471 if ((error = aio_qphysio(p, aiocbe)) == 0) 1472 goto done; 1473 if (error > 0) { 1474 suword(&job->_aiocb_private.status, 0); 1475 aiocbe->uaiocb._aiocb_private.error = error; 1476 suword(&job->_aiocb_private.error, error); 1477 goto done; 1478 } 1479 1480 /* No buffer for daemon I/O. / 1481* aiocbe->bp = NULL; 1482 1483 ki->kaio_queue_count++; 1484 if (lj) 1485 lj->lioj_queue_count++; 1486 s = splnet(); 1487 TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist); 1488 TAILQ_INSERT_TAIL(&aio_jobs, aiocbe, list); 1489 splx(s); 1490 aiocbe->jobstate = JOBST_JOBQGLOBAL; 1491 1492 num_queue_count++; 1493 error = 0; 1494 1495 /* 1496 * If we don't have a free AIO process, and we are below our quota, then 1497 * start one. Otherwise, depend on the subsequent I/O completions to 1498 * pick-up this job. If we don't sucessfully create the new process 1499 * (thread) due to resource issues, we return an error for now (EAGAIN), 1500 * which is likely not the correct thing to do. 1501 / 1502* s = splnet(); 1503retryproc: 1504 if ((aiop = TAILQ_FIRST(&aio_freeproc)) != NULL) { 1505 TAILQ_REMOVE(&aio_freeproc, aiop, list); 1506 TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list); 1507 aiop->aiothreadflags &= ~AIOP_FREE; 1508 wakeup(aiop->aiothread); 1509 } else if (((num_aio_resv_start + num_aio_procs) < max_aio_procs) && 1510 ((ki->kaio_active_count + num_aio_resv_start) < 1511 ki->kaio_maxactive_count)) { 1512 num_aio_resv_start++; 1513 if ((error = aio_newproc()) == 0) { 1514 num_aio_resv_start--; 1515 goto retryproc; 1516 } 1517 num_aio_resv_start--; 1518 } 1519 splx(s); 1520done: 1521 fdrop(fp, td); 1522 return error; 1523} 1524 1525/* 1526 * This routine queues an AIO request, checking for quotas. 1527 / 1528static int 1529aio_aqueue(struct thread td, struct aiocb job, int type) 1530{ 1531* struct proc p = td->td_proc; 1532* struct kaioinfo ki; 1533* 1534 if (p->p_aioinfo == NULL) 1535 aio_init_aioinfo(p); 1536 1537 if (num_queue_count >= max_queue_count) 1538 return EAGAIN; 1539 1540 ki = p->p_aioinfo; 1541 if (ki->kaio_queue_count >= ki->kaio_qallowed_count) 1542 return EAGAIN; 1543 1544 return _aio_aqueue(td, job, NULL, type); 1545} 1546 1547/* 1548 * Support the aio_return system call, as a side-effect, kernel resources are 1549 * released. 1550 / 1551int 1552aio_return(struct thread td, struct aio_return_args uap) 1553{ 1554* struct proc p = td->td_proc; 1555* int s; 1556 int jobref; 1557 struct aiocblist cb, ncb; 1558 struct aiocb ujob; 1559* struct kaioinfo ki; 1560* 1561 ki = p->p_aioinfo; 1562 if (ki == NULL) 1563 return EINVAL; 1564 1565 ujob = uap->aiocbp; 1566 1567 jobref = fuword(&ujob->_aiocb_private.kernelinfo); 1568 if (jobref == -1 \|\| jobref == 0) 1569 return EINVAL; 1570 1571 TAILQ_FOREACH(cb, &ki->kaio_jobdone, plist) { 1572 if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) == 1573 jobref) { 1574 if (ujob == cb->uuaiocb) { 1575 td->td_retval[0] = 1576 cb->uaiocb._aiocb_private.status; 1577 } else 1578 td->td_retval[0] = EFAULT; 1579 if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) { 1580 p->p_stats->p_ru.ru_oublock += 1581 cb->outputcharge; 1582 cb->outputcharge = 0; 1583 } else if (cb->uaiocb.aio_lio_opcode == LIO_READ) { 1584 p->p_stats->p_ru.ru_inblock += cb->inputcharge; 1585 cb->inputcharge = 0; 1586 } 1587 aio_free_entry(cb); 1588 return 0; 1589 } 1590 } 1591 s = splbio(); 1592 for (cb = TAILQ_FIRST(&ki->kaio_bufdone); cb; cb = ncb) { 1593 ncb = TAILQ_NEXT(cb, plist); 1594 if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) 1595 == jobref) { 1596 splx(s); 1597 if (ujob == cb->uuaiocb) { 1598 td->td_retval[0] = 1599 cb->uaiocb._aiocb_private.status; 1600 } else 1601 td->td_retval[0] = EFAULT; 1602 aio_free_entry(cb); 1603 return 0; 1604 } 1605 } 1606 splx(s); 1607 1608 return (EINVAL); 1609} 1610 1611/* 1612 * Allow a process to wakeup when any of the I/O requests are completed. 1613 / 1614int 1615aio_suspend(struct thread td, struct aio_suspend_args uap) 1616{ 1617* struct proc p = td->td_proc; 1618* struct timeval atv; 1619 struct timespec ts; 1620 struct aiocb const cbptr, cbp; 1621* struct kaioinfo ki; 1622* struct aiocblist cb; 1623* int i; 1624 int njoblist; 1625 int error, s, timo; 1626 int ijoblist; 1627* struct aiocb *ujoblist; 1628* 1629 if (uap->nent > AIO_LISTIO_MAX) 1630 return EINVAL; 1631 1632 timo = 0; 1633 if (uap->timeout) { 1634 /* Get timespec struct. / 1635* if ((error = copyin(uap->timeout, &ts, sizeof(ts))) != 0) 1636 return error; 1637 1638 if (ts.tv_nsec < 0 \|\| ts.tv_nsec >= 1000000000) 1639 return (EINVAL); 1640 1641 TIMESPEC_TO_TIMEVAL(&atv, &ts); 1642 if (itimerfix(&atv)) 1643 return (EINVAL); 1644 timo = tvtohz(&atv); 1645 } 1646 1647 ki = p->p_aioinfo; 1648 if (ki == NULL) 1649 return EAGAIN; 1650 1651 njoblist = 0; 1652 ijoblist = zalloc(aiol_zone); 1653 ujoblist = zalloc(aiol_zone); 1654 cbptr = uap->aiocbp; 1655 1656 for (i = 0; i < uap->nent; i++) { 1657 cbp = (struct aiocb )(intptr_t)fuword((caddr_t)&cbptr[i]); 1658* if (cbp == 0) 1659 continue; 1660 ujoblist[njoblist] = cbp; 1661 ijoblist[njoblist] = fuword(&cbp->_aiocb_private.kernelinfo); 1662 njoblist++; 1663 } 1664 1665 if (njoblist == 0) { 1666 zfree(aiol_zone, ijoblist); 1667 zfree(aiol_zone, ujoblist); 1668 return 0; 1669 } 1670 1671 error = 0; 1672 for (;;) { 1673 TAILQ_FOREACH(cb, &ki->kaio_jobdone, plist) { 1674 for (i = 0; i < njoblist; i++) { 1675 if (((intptr_t) 1676 cb->uaiocb._aiocb_private.kernelinfo) == 1677 ijoblist[i]) { 1678 if (ujoblist[i] != cb->uuaiocb) 1679 error = EINVAL; 1680 zfree(aiol_zone, ijoblist); 1681 zfree(aiol_zone, ujoblist); 1682 return error; 1683 } 1684 } 1685 } 1686 1687 s = splbio(); 1688 for (cb = TAILQ_FIRST(&ki->kaio_bufdone); cb; cb = 1689 TAILQ_NEXT(cb, plist)) { 1690 for (i = 0; i < njoblist; i++) { 1691 if (((intptr_t) 1692 cb->uaiocb._aiocb_private.kernelinfo) == 1693 ijoblist[i]) { 1694 splx(s); 1695 if (ujoblist[i] != cb->uuaiocb) 1696 error = EINVAL; 1697 zfree(aiol_zone, ijoblist); 1698 zfree(aiol_zone, ujoblist); 1699 return error; 1700 } 1701 } 1702 } 1703 1704 ki->kaio_flags \|= KAIO_WAKEUP; 1705 error = tsleep(p, PRIBIO \| PCATCH, "aiospn", timo); 1706 splx(s); 1707 1708 if (error == ERESTART \|\| error == EINTR) { 1709 zfree(aiol_zone, ijoblist); 1710 zfree(aiol_zone, ujoblist); 1711 return EINTR; 1712 } else if (error == EWOULDBLOCK) { 1713 zfree(aiol_zone, ijoblist); 1714 zfree(aiol_zone, ujoblist); 1715 return EAGAIN; 1716 } 1717 } 1718 1719/* NOTREACHED / 1720* return EINVAL; 1721} 1722 1723/* 1724 * aio_cancel cancels any non-physio aio operations not currently in 1725 * progress. 1726 / 1727int 1728aio_cancel(struct thread td, struct aio_cancel_args uap) 1729{ 1730* struct proc p = td->td_proc; 1731* struct kaioinfo ki; 1732* struct aiocblist cbe, cbn; 1733 struct file fp; 1734* struct filedesc fdp; 1735* struct socket so; 1736* struct proc po; 1737* int s,error; 1738 int cancelled=0; 1739 int notcancelled=0; 1740 struct vnode vp; 1741* 1742 fdp = p->p_fd; 1743 if ((u_int)uap->fd >= fdp->fd_nfiles \|\| 1744 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 1745 return (EBADF); 1746 1747 if (fp->f_type == DTYPE_VNODE) { 1748 vp = (struct vnode )fp->f_data; 1749* 1750 if (vn_isdisk(vp,&error)) { 1751 td->td_retval[0] = AIO_NOTCANCELED; 1752 return 0; 1753 } 1754 } else if (fp->f_type == DTYPE_SOCKET) { 1755 so = (struct socket )fp->f_data; 1756* 1757 s = splnet(); 1758 1759 for (cbe = TAILQ_FIRST(&so->so_aiojobq); cbe; cbe = cbn) { 1760 cbn = TAILQ_NEXT(cbe, list); 1761 if ((uap->aiocbp == NULL) \|\| 1762 (uap->aiocbp == cbe->uuaiocb) ) { 1763 po = cbe->userproc; 1764 ki = po->p_aioinfo; 1765 TAILQ_REMOVE(&so->so_aiojobq, cbe, list); 1766 TAILQ_REMOVE(&ki->kaio_sockqueue, cbe, plist); 1767 TAILQ_INSERT_TAIL(&ki->kaio_jobdone, cbe, plist); 1768 if (ki->kaio_flags & KAIO_WAKEUP) { 1769 wakeup(po); 1770 } 1771 cbe->jobstate = JOBST_JOBFINISHED; 1772 cbe->uaiocb._aiocb_private.status=-1; 1773 cbe->uaiocb._aiocb_private.error=ECANCELED; 1774 cancelled++; 1775/* XXX cancelled, knote? / 1776* if (cbe->uaiocb.aio_sigevent.sigev_notify == 1777 SIGEV_SIGNAL) { 1778 PROC_LOCK(cbe->userproc); 1779 psignal(cbe->userproc, cbe->uaiocb.aio_sigevent.sigev_signo); 1780 PROC_UNLOCK(cbe->userproc); 1781 } 1782 if (uap->aiocbp) 1783 break; 1784 } 1785 } 1786 splx(s); 1787 1788 if ((cancelled) && (uap->aiocbp)) { 1789 td->td_retval[0] = AIO_CANCELED; 1790 return 0; 1791 } 1792 } 1793 ki=p->p_aioinfo; 1794 s = splnet(); 1795 1796 for (cbe = TAILQ_FIRST(&ki->kaio_jobqueue); cbe; cbe = cbn) { 1797 cbn = TAILQ_NEXT(cbe, plist); 1798 1799 if ((uap->fd == cbe->uaiocb.aio_fildes) && 1800 ((uap->aiocbp == NULL ) \|\| 1801 (uap->aiocbp == cbe->uuaiocb))) { 1802 1803 if (cbe->jobstate == JOBST_JOBQGLOBAL) { 1804 TAILQ_REMOVE(&aio_jobs, cbe, list); 1805 TAILQ_REMOVE(&ki->kaio_jobqueue, cbe, plist); 1806 TAILQ_INSERT_TAIL(&ki->kaio_jobdone, cbe, 1807 plist); 1808 cancelled++; 1809 ki->kaio_queue_finished_count++; 1810 cbe->jobstate = JOBST_JOBFINISHED; 1811 cbe->uaiocb._aiocb_private.status = -1; 1812 cbe->uaiocb._aiocb_private.error = ECANCELED; 1813/* XXX cancelled, knote? / 1814* if (cbe->uaiocb.aio_sigevent.sigev_notify == 1815 SIGEV_SIGNAL) { 1816 PROC_LOCK(cbe->userproc); 1817 psignal(cbe->userproc, cbe->uaiocb.aio_sigevent.sigev_signo); 1818 PROC_UNLOCK(cbe->userproc); 1819 } 1820 } else { 1821 notcancelled++; 1822 } 1823 } 1824 } 1825 splx(s); 1826 1827 if (notcancelled) { 1828 td->td_retval[0] = AIO_NOTCANCELED; 1829 return 0; 1830 } 1831 if (cancelled) { 1832 td->td_retval[0] = AIO_CANCELED; 1833 return 0; 1834 } 1835 td->td_retval[0] = AIO_ALLDONE; 1836 1837 return 0; 1838} 1839 1840/* 1841 * aio_error is implemented in the kernel level for compatibility purposes only. 1842 * For a user mode async implementation, it would be best to do it in a userland 1843 * subroutine. 1844 / 1845int 1846aio_error(struct thread td, struct aio_error_args uap) 1847{ 1848* struct proc p = td->td_proc; 1849* int s; 1850 struct aiocblist cb; 1851* struct kaioinfo ki; 1852* int jobref; 1853 1854 ki = p->p_aioinfo; 1855 if (ki == NULL) 1856 return EINVAL; 1857 1858 jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo); 1859 if ((jobref == -1) \|\| (jobref == 0)) 1860 return EINVAL; 1861 1862 TAILQ_FOREACH(cb, &ki->kaio_jobdone, plist) { 1863 if (((intptr_t)cb->uaiocb._aiocb_private.kernelinfo) == 1864 jobref) { 1865 td->td_retval[0] = cb->uaiocb._aiocb_private.error; 1866 return 0; 1867 } 1868 } 1869 1870 s = splnet(); 1871 1872 for (cb = TAILQ_FIRST(&ki->kaio_jobqueue); cb; cb = TAILQ_NEXT(cb, 1873 plist)) { 1874 if (((intptr_t)cb->uaiocb._aiocb_private.kernelinfo) == 1875 jobref) { 1876 td->td_retval[0] = EINPROGRESS; 1877 splx(s); 1878 return 0; 1879 } 1880 } 1881 1882 for (cb = TAILQ_FIRST(&ki->kaio_sockqueue); cb; cb = TAILQ_NEXT(cb, 1883 plist)) { 1884 if (((intptr_t)cb->uaiocb._aiocb_private.kernelinfo) == 1885 jobref) { 1886 td->td_retval[0] = EINPROGRESS; 1887 splx(s); 1888 return 0; 1889 } 1890 } 1891 splx(s); 1892 1893 s = splbio(); 1894 for (cb = TAILQ_FIRST(&ki->kaio_bufdone); cb; cb = TAILQ_NEXT(cb, 1895 plist)) { 1896 if (((intptr_t)cb->uaiocb._aiocb_private.kernelinfo) == 1897 jobref) { 1898 td->td_retval[0] = cb->uaiocb._aiocb_private.error; 1899 splx(s); 1900 return 0; 1901 } 1902 } 1903 1904 for (cb = TAILQ_FIRST(&ki->kaio_bufqueue); cb; cb = TAILQ_NEXT(cb, 1905 plist)) { 1906 if (((intptr_t)cb->uaiocb._aiocb_private.kernelinfo) == 1907 jobref) { 1908 td->td_retval[0] = EINPROGRESS; 1909 splx(s); 1910 return 0; 1911 } 1912 } 1913 splx(s); 1914 1915#if (0) 1916 /* 1917 * Hack for lio. 1918 / 1919* status = fuword(&uap->aiocbp->_aiocb_private.status); 1920 if (status == -1) 1921 return fuword(&uap->aiocbp->_aiocb_private.error); 1922#endif 1923 return EINVAL; 1924} 1925 1926int 1927aio_read(struct thread td, struct aio_read_args uap) 1928{ 1929 1930 return aio_aqueue(td, uap->aiocbp, LIO_READ); 1931} 1932 1933int 1934aio_write(struct thread td, struct aio_write_args uap) 1935{ 1936 1937 return aio_aqueue(td, uap->aiocbp, LIO_WRITE); 1938} 1939 1940int 1941lio_listio(struct thread td, struct lio_listio_args uap) 1942{ 1943 struct proc p = td->td_proc; 1944* int nent, nentqueued; 1945 struct aiocb iocb, const cbptr; 1946* struct aiocblist cb; 1947* struct kaioinfo ki; 1948* struct aio_liojob lj; 1949* int error, runningcode; 1950 int nerror; 1951 int i; 1952 int s; 1953 1954 if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) 1955 return EINVAL; 1956 1957 nent = uap->nent; 1958 if (nent > AIO_LISTIO_MAX) 1959 return EINVAL; 1960 1961 if (p->p_aioinfo == NULL) 1962 aio_init_aioinfo(p); 1963 1964 if ((nent + num_queue_count) > max_queue_count) 1965 return EAGAIN; 1966 1967 ki = p->p_aioinfo; 1968 if ((nent + ki->kaio_queue_count) > ki->kaio_qallowed_count) 1969 return EAGAIN; 1970 1971 lj = zalloc(aiolio_zone); 1972 if (!lj) 1973 return EAGAIN; 1974 1975 lj->lioj_flags = 0; 1976 lj->lioj_buffer_count = 0; 1977 lj->lioj_buffer_finished_count = 0; 1978 lj->lioj_queue_count = 0; 1979 lj->lioj_queue_finished_count = 0; 1980 lj->lioj_ki = ki; 1981 1982 /* 1983 * Setup signal. 1984 / 1985* if (uap->sig && (uap->mode == LIO_NOWAIT)) { 1986 error = copyin(uap->sig, &lj->lioj_signal, 1987 sizeof(lj->lioj_signal)); 1988 if (error) { 1989 zfree(aiolio_zone, lj); 1990 return error; 1991 } 1992 if (!_SIG_VALID(lj->lioj_signal.sigev_signo)) { 1993 zfree(aiolio_zone, lj); 1994 return EINVAL; 1995 } 1996 lj->lioj_flags \|= LIOJ_SIGNAL; 1997 lj->lioj_flags &= ~LIOJ_SIGNAL_POSTED; 1998 } else 1999 lj->lioj_flags &= ~LIOJ_SIGNAL; 2000 2001 TAILQ_INSERT_TAIL(&ki->kaio_liojoblist, lj, lioj_list); 2002 /* 2003 * Get pointers to the list of I/O requests. 2004 / 2005* nerror = 0; 2006 nentqueued = 0; 2007 cbptr = uap->acb_list; 2008 for (i = 0; i < uap->nent; i++) { 2009 iocb = (struct aiocb )(intptr_t)fuword((caddr_t)&cbptr[i]); 2010* if (((intptr_t)iocb != -1) && ((intptr_t)iocb != NULL)) { 2011 error = _aio_aqueue(td, iocb, lj, 0); 2012 if (error == 0) 2013 nentqueued++; 2014 else 2015 nerror++; 2016 } 2017 } 2018 2019 /* 2020 * If we haven't queued any, then just return error. 2021 / 2022* if (nentqueued == 0) 2023 return 0; 2024 2025 /* 2026 * Calculate the appropriate error return. 2027 / 2028* runningcode = 0; 2029 if (nerror) 2030 runningcode = EIO; 2031 2032 if (uap->mode == LIO_WAIT) { 2033 int command, found, jobref; 2034 2035 for (;;) { 2036 found = 0; 2037 for (i = 0; i < uap->nent; i++) { 2038 /* 2039 * Fetch address of the control buf pointer in 2040 * user space. 2041 / 2042* iocb = (struct aiocb )(intptr_t)fuword((caddr_t)&cbptr[i]); 2043* if (((intptr_t)iocb == -1) \|\| ((intptr_t)iocb 2044 == 0)) 2045 continue; 2046 2047 /* 2048 * Fetch the associated command from user space. 2049 / 2050* command = fuword(&iocb->aio_lio_opcode); 2051 if (command == LIO_NOP) { 2052 found++; 2053 continue; 2054 } 2055 2056 jobref = fuword(&iocb->_aiocb_private.kernelinfo); 2057 2058 TAILQ_FOREACH(cb, &ki->kaio_jobdone, plist) { 2059 if (((intptr_t)cb->uaiocb._aiocb_private.kernelinfo) 2060 == jobref) { 2061 if (cb->uaiocb.aio_lio_opcode 2062 == LIO_WRITE) { 2063 p->p_stats->p_ru.ru_oublock 2064 += 2065 cb->outputcharge; 2066 cb->outputcharge = 0; 2067 } else if (cb->uaiocb.aio_lio_opcode 2068 == LIO_READ) { 2069 p->p_stats->p_ru.ru_inblock 2070 += cb->inputcharge; 2071 cb->inputcharge = 0; 2072 } 2073 found++; 2074 break; 2075 } 2076 } 2077 2078 s = splbio(); 2079 TAILQ_FOREACH(cb, &ki->kaio_bufdone, plist) { 2080 if (((intptr_t)cb->uaiocb._aiocb_private.kernelinfo) 2081 == jobref) { 2082 found++; 2083 break; 2084 } 2085 } 2086 splx(s); 2087 } 2088 2089 /* 2090 * If all I/Os have been disposed of, then we can 2091 * return. 2092 / 2093* if (found == nentqueued) 2094 return runningcode; 2095 2096 ki->kaio_flags \|= KAIO_WAKEUP; 2097 error = tsleep(p, PRIBIO \| PCATCH, "aiospn", 0); 2098 2099 if (error == EINTR) 2100 return EINTR; 2101 else if (error == EWOULDBLOCK) 2102 return EAGAIN; 2103 } 2104 } 2105 2106 return runningcode; 2107} 2108 2109/* 2110 * This is a weird hack so that we can post a signal. It is safe to do so from 2111 * a timeout routine, but not from an interrupt routine. 2112 / 2113static void 2114process_signal(void aioj) 2115{ 2116 struct aiocblist aiocbe = aioj; 2117* struct aio_liojob lj = aiocbe->lio; 2118* struct aiocb cb = &aiocbe->uaiocb; 2119* 2120 if ((lj) && (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL) && 2121 (lj->lioj_queue_count == lj->lioj_queue_finished_count)) { 2122 PROC_LOCK(lj->lioj_ki->kaio_p); 2123 psignal(lj->lioj_ki->kaio_p, lj->lioj_signal.sigev_signo); 2124 PROC_UNLOCK(lj->lioj_ki->kaio_p); 2125 lj->lioj_flags \|= LIOJ_SIGNAL_POSTED; 2126 } 2127 2128 if (cb->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { 2129 PROC_LOCK(aiocbe->userproc); 2130 psignal(aiocbe->userproc, cb->aio_sigevent.sigev_signo); 2131 PROC_UNLOCK(aiocbe->userproc); 2132 } 2133} 2134 2135/* 2136 * Interrupt handler for physio, performs the necessary process wakeups, and 2137 * signals. 2138 / 2139static void 2140aio_physwakeup(struct buf bp) 2141{ 2142 struct aiocblist aiocbe; 2143* struct proc p; 2144* struct kaioinfo ki; 2145* struct aio_liojob lj; 2146* 2147 wakeup(bp); 2148 2149 aiocbe = (struct aiocblist )bp->b_spc; 2150* if (aiocbe) { 2151 p = bp->b_caller1; 2152 2153 aiocbe->jobstate = JOBST_JOBBFINISHED; 2154 aiocbe->uaiocb._aiocb_private.status -= bp->b_resid; 2155 aiocbe->uaiocb._aiocb_private.error = 0; 2156 aiocbe->jobflags \|= AIOCBLIST_DONE; 2157 2158 if (bp->b_ioflags & BIO_ERROR) 2159 aiocbe->uaiocb._aiocb_private.error = bp->b_error; 2160 2161 lj = aiocbe->lio; 2162 if (lj) { 2163 lj->lioj_buffer_finished_count++; 2164 2165 /* 2166 * wakeup/signal if all of the interrupt jobs are done. 2167 / 2168* if (lj->lioj_buffer_finished_count == 2169 lj->lioj_buffer_count) { 2170 /* 2171 * Post a signal if it is called for. 2172 / 2173* if ((lj->lioj_flags & 2174 (LIOJ_SIGNAL\|LIOJ_SIGNAL_POSTED)) == 2175 LIOJ_SIGNAL) { 2176 lj->lioj_flags \|= LIOJ_SIGNAL_POSTED; 2177 aiocbe->timeouthandle = 2178 timeout(process_signal, 2179 aiocbe, 0); 2180 } 2181 } 2182 } 2183 2184 ki = p->p_aioinfo; 2185 if (ki) { 2186 ki->kaio_buffer_finished_count++; 2187 TAILQ_REMOVE(&aio_bufjobs, aiocbe, list); 2188 TAILQ_REMOVE(&ki->kaio_bufqueue, aiocbe, plist); 2189 TAILQ_INSERT_TAIL(&ki->kaio_bufdone, aiocbe, plist); 2190 2191 KNOTE(&aiocbe->klist, 0); 2192 /* Do the wakeup. / 2193* if (ki->kaio_flags & (KAIO_RUNDOWN\|KAIO_WAKEUP)) { 2194 ki->kaio_flags &= ~KAIO_WAKEUP; 2195 wakeup(p); 2196 } 2197 } 2198 2199 if (aiocbe->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL) 2200 aiocbe->timeouthandle = 2201 timeout(process_signal, aiocbe, 0); 2202 } 2203} 2204 2205int 2206aio_waitcomplete(struct thread td, struct aio_waitcomplete_args uap) 2207{ 2208 struct proc p = td->td_proc; 2209* struct timeval atv; 2210 struct timespec ts; 2211 struct aiocb *cbptr; 2212* struct kaioinfo ki; 2213* struct aiocblist cb = NULL; 2214* int error, s, timo; 2215 2216 suword(uap->aiocbp, (int)NULL); 2217 2218 timo = 0; 2219 if (uap->timeout) { 2220 /* Get timespec struct. / 2221* error = copyin(uap->timeout, &ts, sizeof(ts)); 2222 if (error) 2223 return error; 2224 2225 if ((ts.tv_nsec < 0) \|\| (ts.tv_nsec >= 1000000000)) 2226 return (EINVAL); 2227 2228 TIMESPEC_TO_TIMEVAL(&atv, &ts); 2229 if (itimerfix(&atv)) 2230 return (EINVAL); 2231 timo = tvtohz(&atv); 2232 } 2233 2234 ki = p->p_aioinfo; 2235 if (ki == NULL) 2236 return EAGAIN; 2237 2238 cbptr = uap->aiocbp; 2239 2240 for (;;) { 2241 if ((cb = TAILQ_FIRST(&ki->kaio_jobdone)) != 0) { 2242 suword(uap->aiocbp, (uintptr_t)cb->uuaiocb); 2243 td->td_retval[0] = cb->uaiocb._aiocb_private.status; 2244 if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) { 2245 p->p_stats->p_ru.ru_oublock += 2246 cb->outputcharge; 2247 cb->outputcharge = 0; 2248 } else if (cb->uaiocb.aio_lio_opcode == LIO_READ) { 2249 p->p_stats->p_ru.ru_inblock += cb->inputcharge; 2250 cb->inputcharge = 0; 2251 } 2252 aio_free_entry(cb); 2253 return cb->uaiocb._aiocb_private.error; 2254 } 2255 2256 s = splbio(); 2257 if ((cb = TAILQ_FIRST(&ki->kaio_bufdone)) != 0 ) { 2258 splx(s); 2259 suword(uap->aiocbp, (uintptr_t)cb->uuaiocb); 2260 td->td_retval[0] = cb->uaiocb._aiocb_private.status; 2261 aio_free_entry(cb); 2262 return cb->uaiocb._aiocb_private.error; 2263 } 2264 2265 ki->kaio_flags \|= KAIO_WAKEUP; 2266 error = tsleep(p, PRIBIO \| PCATCH, "aiowc", timo); 2267 splx(s); 2268 2269 if (error == ERESTART) 2270 return EINTR; 2271 else if (error < 0) 2272 return error; 2273 else if (error == EINTR) 2274 return EINTR; 2275 else if (error == EWOULDBLOCK) 2276 return EAGAIN; 2277 } 2278} 2279 2280static int 2281filt_aioattach(struct knote kn) 2282{ 2283* struct aiocblist aiocbe = (struct aiocblist )kn->kn_id; 2284 2285 /* 2286 * The aiocbe pointer must be validated before using it, so 2287 * registration is restricted to the kernel; the user cannot 2288 * set EV_FLAG1. 2289 / 2290* if ((kn->kn_flags & EV_FLAG1) == 0) 2291 return (EPERM); 2292 kn->kn_flags &= ~EV_FLAG1; 2293 2294 SLIST_INSERT_HEAD(&aiocbe->klist, kn, kn_selnext); 2295 2296 return (0); 2297} 2298 2299static void 2300filt_aiodetach(struct knote kn) 2301{ 2302* struct aiocblist aiocbe = (struct aiocblist )kn->kn_id; 2303 2304 SLIST_REMOVE(&aiocbe->klist, kn, knote, kn_selnext); 2305} 2306 2307/ARGSUSED/ 2308static int 2309filt_aio(struct knote kn, long hint) 2310{ 2311* struct aiocblist aiocbe = (struct aiocblist )kn->kn_id; 2312 2313 kn->kn_data = aiocbe->uaiocb._aiocb_private.error; 2314 if (aiocbe->jobstate != JOBST_JOBFINISHED && 2315 aiocbe->jobstate != JOBST_JOBBFINISHED) 2316 return (0); 2317 kn->kn_flags \|= EV_EOF; 2318 return (1); 2319}	794 /* Mark special process type. / 795* mycp->p_flag \|= P_SYSTEM; 796 797 /* 798 * Wakeup parent process. (Parent sleeps to keep from blasting away 799 * and creating too many daemons.) 800 / 801* wakeup(mycp); 802 803 for (;;) { 804 /* 805 * curcp is the current daemon process context. 806 * userp is the current user process context. 807 / 808* curcp = mycp; 809 810 /* 811 * Take daemon off of free queue 812 / 813* if (aiop->aiothreadflags & AIOP_FREE) { 814 s = splnet(); 815 TAILQ_REMOVE(&aio_freeproc, aiop, list); 816 TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list); 817 aiop->aiothreadflags &= ~AIOP_FREE; 818 splx(s); 819 } 820 aiop->aiothreadflags &= ~AIOP_SCHED; 821 822 /* 823 * Check for jobs. 824 / 825* while ((aiocbe = aio_selectjob(aiop)) != NULL) { 826 cb = &aiocbe->uaiocb; 827 userp = aiocbe->userproc; 828 829 aiocbe->jobstate = JOBST_JOBRUNNING; 830 831 /* 832 * Connect to process address space for user program. 833 / 834* if (userp != curcp) { 835 /* 836 * Save the current address space that we are 837 * connected to. 838 / 839* tmpvm = mycp->p_vmspace; 840 841 /* 842 * Point to the new user address space, and 843 * refer to it. 844 / 845* mycp->p_vmspace = userp->p_vmspace; 846 mycp->p_vmspace->vm_refcnt++; 847 848 /* Activate the new mapping. / 849* pmap_activate(FIRST_THREAD_IN_PROC(mycp)); 850 851 /* 852 * If the old address space wasn't the daemons 853 * own address space, then we need to remove the 854 * daemon's reference from the other process 855 * that it was acting on behalf of. 856 / 857* if (tmpvm != myvm) { 858 vmspace_free(tmpvm); 859 } 860 861 /* 862 * Disassociate from previous clients file 863 * descriptors, and associate to the new clients 864 * descriptors. Note that the daemon doesn't 865 * need to worry about its orginal descriptors, 866 * because they were originally freed. 867 / 868* if (mycp->p_fd) 869 fdfree(td); 870 mycp->p_fd = fdshare(userp); 871 curcp = userp; 872 } 873 874 ki = userp->p_aioinfo; 875 lj = aiocbe->lio; 876 877 /* Account for currently active jobs. / 878* ki->kaio_active_count++; 879 880 /* Do the I/O function. / 881* aiocbe->jobaiothread = aiop; 882 aio_process(aiocbe); 883 884 /* Decrement the active job count. / 885* ki->kaio_active_count--; 886 887 /* 888 * Increment the completion count for wakeup/signal 889 * comparisons. 890 / 891* aiocbe->jobflags \|= AIOCBLIST_DONE; 892 ki->kaio_queue_finished_count++; 893 if (lj) 894 lj->lioj_queue_finished_count++; 895 if ((ki->kaio_flags & KAIO_WAKEUP) \|\| ((ki->kaio_flags 896 & KAIO_RUNDOWN) && (ki->kaio_active_count == 0))) { 897 ki->kaio_flags &= ~KAIO_WAKEUP; 898 wakeup(userp); 899 } 900 901 s = splbio(); 902 if (lj && (lj->lioj_flags & 903 (LIOJ_SIGNAL\|LIOJ_SIGNAL_POSTED)) == LIOJ_SIGNAL) { 904 if ((lj->lioj_queue_finished_count == 905 lj->lioj_queue_count) && 906 (lj->lioj_buffer_finished_count == 907 lj->lioj_buffer_count)) { 908 PROC_LOCK(userp); 909 psignal(userp, 910 lj->lioj_signal.sigev_signo); 911 PROC_UNLOCK(userp); 912 lj->lioj_flags \|= LIOJ_SIGNAL_POSTED; 913 } 914 } 915 splx(s); 916 917 aiocbe->jobstate = JOBST_JOBFINISHED; 918 919 /* 920 * If the I/O request should be automatically rundown, 921 * do the needed cleanup. Otherwise, place the queue 922 * entry for the just finished I/O request into the done 923 * queue for the associated client. 924 / 925* s = splnet(); 926 if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) { 927 aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE; 928 zfree(aiocb_zone, aiocbe); 929 } else { 930 TAILQ_REMOVE(&ki->kaio_jobqueue, aiocbe, plist); 931 TAILQ_INSERT_TAIL(&ki->kaio_jobdone, aiocbe, 932 plist); 933 } 934 splx(s); 935 KNOTE(&aiocbe->klist, 0); 936 937 if (aiocbe->jobflags & AIOCBLIST_RUNDOWN) { 938 wakeup(aiocbe); 939 aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN; 940 } 941 942 if (cb->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { 943 PROC_LOCK(userp); 944 psignal(userp, cb->aio_sigevent.sigev_signo); 945 PROC_UNLOCK(userp); 946 } 947 } 948 949 /* 950 * Disconnect from user address space. 951 / 952* if (curcp != mycp) { 953 /* Get the user address space to disconnect from. / 954* tmpvm = mycp->p_vmspace; 955 956 /* Get original address space for daemon. / 957* mycp->p_vmspace = myvm; 958 959 /* Activate the daemon's address space. / 960* pmap_activate(FIRST_THREAD_IN_PROC(mycp)); 961#ifdef DIAGNOSTIC 962 if (tmpvm == myvm) { 963 printf("AIOD: vmspace problem -- %d\n", 964 mycp->p_pid); 965 } 966#endif 967 /* Remove our vmspace reference. / 968* vmspace_free(tmpvm); 969 970 /* 971 * Disassociate from the user process's file 972 * descriptors. 973 / 974* if (mycp->p_fd) 975 fdfree(td); 976 mycp->p_fd = NULL; 977 curcp = mycp; 978 } 979 980 /* 981 * If we are the first to be put onto the free queue, wakeup 982 * anyone waiting for a daemon. 983 / 984* s = splnet(); 985 TAILQ_REMOVE(&aio_activeproc, aiop, list); 986 if (TAILQ_EMPTY(&aio_freeproc)) 987 wakeup(&aio_freeproc); 988 TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); 989 aiop->aiothreadflags \|= AIOP_FREE; 990 splx(s); 991 992 /* 993 * If daemon is inactive for a long time, allow it to exit, 994 * thereby freeing resources. 995 / 996* if ((aiop->aiothreadflags & AIOP_SCHED) == 0 && 997 tsleep(aiop->aiothread, PRIBIO, "aiordy", aiod_lifetime)) { 998 s = splnet(); 999 if (TAILQ_EMPTY(&aio_jobs)) { 1000 if ((aiop->aiothreadflags & AIOP_FREE) && 1001 (num_aio_procs > target_aio_procs)) { 1002 TAILQ_REMOVE(&aio_freeproc, aiop, list); 1003 splx(s); 1004 zfree(aiop_zone, aiop); 1005 num_aio_procs--; 1006#ifdef DIAGNOSTIC 1007 if (mycp->p_vmspace->vm_refcnt <= 1) { 1008 printf("AIOD: bad vm refcnt for" 1009 " exiting daemon: %d\n", 1010 mycp->p_vmspace->vm_refcnt); 1011 } 1012#endif 1013 kthread_exit(0); 1014 } 1015 } 1016 splx(s); 1017 } 1018 } 1019} 1020 1021/* 1022 * Create a new AIO daemon. This is mostly a kernel-thread fork routine. The 1023 * AIO daemon modifies its environment itself. 1024 / 1025static int 1026aio_newproc() 1027{ 1028* int error; 1029 struct proc p; 1030* 1031 error = kthread_create(aio_daemon, curproc, &p, RFNOWAIT, "aiod%d", 1032 num_aio_procs); 1033 if (error) 1034 return error; 1035 1036 /* 1037 * Wait until daemon is started, but continue on just in case to 1038 * handle error conditions. 1039 / 1040* error = tsleep(p, PZERO, "aiosta", aiod_timeout); 1041 1042 num_aio_procs++; 1043 1044 return error; 1045} 1046 1047/* 1048 * Try the high-performance, low-overhead physio method for eligible 1049 * VCHR devices. This method doesn't use an aio helper thread, and 1050 * thus has very low overhead. 1051 * 1052 * Assumes that the caller, _aio_aqueue(), has incremented the file 1053 * structure's reference count, preventing its deallocation for the 1054 * duration of this call. 1055 / 1056static int 1057aio_qphysio(struct proc p, struct aiocblist aiocbe) 1058{ 1059* int error; 1060 struct aiocb cb; 1061* struct file fp; 1062* struct buf bp; 1063* struct vnode vp; 1064* struct kaioinfo ki; 1065* struct aio_liojob lj; 1066* int s; 1067 int notify; 1068 1069 cb = &aiocbe->uaiocb; 1070 fp = aiocbe->fd_file; 1071 1072 if (fp->f_type != DTYPE_VNODE) 1073 return (-1); 1074 1075 vp = (struct vnode )fp->f_data; 1076* 1077 /* 1078 * If its not a disk, we don't want to return a positive error. 1079 * It causes the aio code to not fall through to try the thread 1080 * way when you're talking to a regular file. 1081 / 1082* if (!vn_isdisk(vp, &error)) { 1083 if (error == ENOTBLK) 1084 return (-1); 1085 else 1086 return (error); 1087 } 1088 1089 if (cb->aio_nbytes % vp->v_rdev->si_bsize_phys) 1090 return (-1); 1091 1092 if (cb->aio_nbytes > 1093 MAXPHYS - (((vm_offset_t) cb->aio_buf) & PAGE_MASK)) 1094 return (-1); 1095 1096 ki = p->p_aioinfo; 1097 if (ki->kaio_buffer_count >= ki->kaio_ballowed_count) 1098 return (-1); 1099 1100 ki->kaio_buffer_count++; 1101 1102 lj = aiocbe->lio; 1103 if (lj) 1104 lj->lioj_buffer_count++; 1105 1106 /* Create and build a buffer header for a transfer. / 1107* bp = (struct buf )getpbuf(NULL); 1108* BUF_KERNPROC(bp); 1109 1110 /* 1111 * Get a copy of the kva from the physical buffer. 1112 / 1113* bp->b_caller1 = p; 1114 bp->b_dev = vp->v_rdev; 1115 error = bp->b_error = 0; 1116 1117 bp->b_bcount = cb->aio_nbytes; 1118 bp->b_bufsize = cb->aio_nbytes; 1119 bp->b_flags = B_PHYS; 1120 bp->b_iodone = aio_physwakeup; 1121 bp->b_saveaddr = bp->b_data; 1122 bp->b_data = (void )(uintptr_t)cb->aio_buf; 1123* bp->b_blkno = btodb(cb->aio_offset); 1124 1125 if (cb->aio_lio_opcode == LIO_WRITE) { 1126 bp->b_iocmd = BIO_WRITE; 1127 if (!useracc(bp->b_data, bp->b_bufsize, VM_PROT_READ)) { 1128 error = EFAULT; 1129 goto doerror; 1130 } 1131 } else { 1132 bp->b_iocmd = BIO_READ; 1133 if (!useracc(bp->b_data, bp->b_bufsize, VM_PROT_WRITE)) { 1134 error = EFAULT; 1135 goto doerror; 1136 } 1137 } 1138 1139 /* Bring buffer into kernel space. / 1140* vmapbuf(bp); 1141 1142 s = splbio(); 1143 aiocbe->bp = bp; 1144 bp->b_spc = (void )aiocbe; 1145* TAILQ_INSERT_TAIL(&aio_bufjobs, aiocbe, list); 1146 TAILQ_INSERT_TAIL(&ki->kaio_bufqueue, aiocbe, plist); 1147 aiocbe->jobstate = JOBST_JOBQBUF; 1148 cb->_aiocb_private.status = cb->aio_nbytes; 1149 num_buf_aio++; 1150 bp->b_error = 0; 1151 1152 splx(s); 1153 1154 /* Perform transfer. / 1155* DEV_STRATEGY(bp, 0); 1156 1157 notify = 0; 1158 s = splbio(); 1159 1160 /* 1161 * If we had an error invoking the request, or an error in processing 1162 * the request before we have returned, we process it as an error in 1163 * transfer. Note that such an I/O error is not indicated immediately, 1164 * but is returned using the aio_error mechanism. In this case, 1165 * aio_suspend will return immediately. 1166 / 1167* if (bp->b_error \|\| (bp->b_ioflags & BIO_ERROR)) { 1168 struct aiocb job = aiocbe->uuaiocb; 1169* 1170 aiocbe->uaiocb._aiocb_private.status = 0; 1171 suword(&job->_aiocb_private.status, 0); 1172 aiocbe->uaiocb._aiocb_private.error = bp->b_error; 1173 suword(&job->_aiocb_private.error, bp->b_error); 1174 1175 ki->kaio_buffer_finished_count++; 1176 1177 if (aiocbe->jobstate != JOBST_JOBBFINISHED) { 1178 aiocbe->jobstate = JOBST_JOBBFINISHED; 1179 aiocbe->jobflags \|= AIOCBLIST_DONE; 1180 TAILQ_REMOVE(&aio_bufjobs, aiocbe, list); 1181 TAILQ_REMOVE(&ki->kaio_bufqueue, aiocbe, plist); 1182 TAILQ_INSERT_TAIL(&ki->kaio_bufdone, aiocbe, plist); 1183 notify = 1; 1184 } 1185 } 1186 splx(s); 1187 if (notify) 1188 KNOTE(&aiocbe->klist, 0); 1189 return 0; 1190 1191doerror: 1192 ki->kaio_buffer_count--; 1193 if (lj) 1194 lj->lioj_buffer_count--; 1195 aiocbe->bp = NULL; 1196 relpbuf(bp, NULL); 1197 return error; 1198} 1199 1200/* 1201 * This waits/tests physio completion. 1202 / 1203static int 1204aio_fphysio(struct aiocblist iocb) 1205{ 1206 int s; 1207 struct buf bp; 1208* int error; 1209 1210 bp = iocb->bp; 1211 1212 s = splbio(); 1213 while ((bp->b_flags & B_DONE) == 0) { 1214 if (tsleep(bp, PRIBIO, "physstr", aiod_timeout)) { 1215 if ((bp->b_flags & B_DONE) == 0) { 1216 splx(s); 1217 return EINPROGRESS; 1218 } else 1219 break; 1220 } 1221 } 1222 splx(s); 1223 1224 /* Release mapping into kernel space. / 1225* vunmapbuf(bp); 1226 iocb->bp = 0; 1227 1228 error = 0; 1229 1230 /* Check for an error. / 1231* if (bp->b_ioflags & BIO_ERROR) 1232 error = bp->b_error; 1233 1234 relpbuf(bp, NULL); 1235 return (error); 1236} 1237 1238/* 1239 * Wake up aio requests that may be serviceable now. 1240 / 1241static void 1242aio_swake_cb(struct socket so, struct sockbuf sb) 1243{ 1244* struct aiocblist cb,cbn; 1245 struct proc p; 1246* struct kaioinfo ki = NULL; 1247* int opcode, wakecount = 0; 1248 struct aiothreadlist aiop; 1249* 1250 if (sb == &so->so_snd) { 1251 opcode = LIO_WRITE; 1252 so->so_snd.sb_flags &= ~SB_AIO; 1253 } else { 1254 opcode = LIO_READ; 1255 so->so_rcv.sb_flags &= ~SB_AIO; 1256 } 1257 1258 for (cb = TAILQ_FIRST(&so->so_aiojobq); cb; cb = cbn) { 1259 cbn = TAILQ_NEXT(cb, list); 1260 if (opcode == cb->uaiocb.aio_lio_opcode) { 1261 p = cb->userproc; 1262 ki = p->p_aioinfo; 1263 TAILQ_REMOVE(&so->so_aiojobq, cb, list); 1264 TAILQ_REMOVE(&ki->kaio_sockqueue, cb, plist); 1265 TAILQ_INSERT_TAIL(&aio_jobs, cb, list); 1266 TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, cb, plist); 1267 wakecount++; 1268 if (cb->jobstate != JOBST_JOBQGLOBAL) 1269 panic("invalid queue value"); 1270 } 1271 } 1272 1273 while (wakecount--) { 1274 if ((aiop = TAILQ_FIRST(&aio_freeproc)) != 0) { 1275 TAILQ_REMOVE(&aio_freeproc, aiop, list); 1276 TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list); 1277 aiop->aiothreadflags &= ~AIOP_FREE; 1278 wakeup(aiop->aiothread); 1279 } 1280 } 1281} 1282 1283/* 1284 * Queue a new AIO request. Choosing either the threaded or direct physio VCHR 1285 * technique is done in this code. 1286 / 1287static int 1288_aio_aqueue(struct thread td, struct aiocb job, struct aio_liojob lj, int type) 1289{ 1290 struct proc p = td->td_proc; 1291* struct filedesc fdp; 1292* struct file fp; 1293* unsigned int fd; 1294 struct socket so; 1295* int s; 1296 int error; 1297 int opcode; 1298 struct aiocblist aiocbe; 1299* struct aiothreadlist aiop; 1300* struct kaioinfo ki; 1301* struct kevent kev; 1302 struct kqueue kq; 1303* struct file kq_fp; 1304* 1305 aiocbe = zalloc(aiocb_zone); 1306 aiocbe->inputcharge = 0; 1307 aiocbe->outputcharge = 0; 1308 callout_handle_init(&aiocbe->timeouthandle); 1309 SLIST_INIT(&aiocbe->klist); 1310 1311 suword(&job->_aiocb_private.status, -1); 1312 suword(&job->_aiocb_private.error, 0); 1313 suword(&job->_aiocb_private.kernelinfo, -1); 1314 1315 error = copyin(job, &aiocbe->uaiocb, sizeof(aiocbe->uaiocb)); 1316 if (error) { 1317 suword(&job->_aiocb_private.error, error); 1318 zfree(aiocb_zone, aiocbe); 1319 return error; 1320 } 1321 if (aiocbe->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL && 1322 !_SIG_VALID(aiocbe->uaiocb.aio_sigevent.sigev_signo)) { 1323 zfree(aiocb_zone, aiocbe); 1324 return EINVAL; 1325 } 1326 1327 /* Save userspace address of the job info. / 1328* aiocbe->uuaiocb = job; 1329 1330 /* Get the opcode. / 1331* if (type != LIO_NOP) 1332 aiocbe->uaiocb.aio_lio_opcode = type; 1333 opcode = aiocbe->uaiocb.aio_lio_opcode; 1334 1335 /* Get the fd info for process. / 1336* fdp = p->p_fd; 1337 1338 /* 1339 * Range check file descriptor. 1340 / 1341* fd = aiocbe->uaiocb.aio_fildes; 1342 if (fd >= fdp->fd_nfiles) { 1343 zfree(aiocb_zone, aiocbe); 1344 if (type == 0) 1345 suword(&job->_aiocb_private.error, EBADF); 1346 return EBADF; 1347 } 1348 1349 fp = aiocbe->fd_file = fdp->fd_ofiles[fd]; 1350 if ((fp == NULL) \|\| ((opcode == LIO_WRITE) && ((fp->f_flag & FWRITE) == 1351 0))) { 1352 zfree(aiocb_zone, aiocbe); 1353 if (type == 0) 1354 suword(&job->_aiocb_private.error, EBADF); 1355 return EBADF; 1356 } 1357 1358 if (aiocbe->uaiocb.aio_offset == -1LL) { 1359 zfree(aiocb_zone, aiocbe); 1360 if (type == 0) 1361 suword(&job->_aiocb_private.error, EINVAL); 1362 return EINVAL; 1363 } 1364 1365 error = suword(&job->_aiocb_private.kernelinfo, jobrefid); 1366 if (error) { 1367 zfree(aiocb_zone, aiocbe); 1368 if (type == 0) 1369 suword(&job->_aiocb_private.error, EINVAL); 1370 return error; 1371 } 1372 1373 aiocbe->uaiocb._aiocb_private.kernelinfo = (void )(intptr_t)jobrefid; 1374* if (jobrefid == LONG_MAX) 1375 jobrefid = 1; 1376 else 1377 jobrefid++; 1378 1379 if (opcode == LIO_NOP) { 1380 zfree(aiocb_zone, aiocbe); 1381 if (type == 0) { 1382 suword(&job->_aiocb_private.error, 0); 1383 suword(&job->_aiocb_private.status, 0); 1384 suword(&job->_aiocb_private.kernelinfo, 0); 1385 } 1386 return 0; 1387 } 1388 1389 if ((opcode != LIO_READ) && (opcode != LIO_WRITE)) { 1390 zfree(aiocb_zone, aiocbe); 1391 if (type == 0) { 1392 suword(&job->_aiocb_private.status, 0); 1393 suword(&job->_aiocb_private.error, EINVAL); 1394 } 1395 return EINVAL; 1396 } 1397 1398 fhold(fp); 1399 1400 if (aiocbe->uaiocb.aio_sigevent.sigev_notify == SIGEV_KEVENT) { 1401 kev.ident = aiocbe->uaiocb.aio_sigevent.sigev_notify_kqueue; 1402 kev.udata = aiocbe->uaiocb.aio_sigevent.sigev_value.sigval_ptr; 1403 } 1404 else { 1405 /* 1406 * This method for requesting kevent-based notification won't 1407 * work on the alpha, since we're passing in a pointer 1408 * via aio_lio_opcode, which is an int. Use the SIGEV_KEVENT- 1409 * based method instead. 1410 / 1411* struct kevent kevp; 1412* 1413 kevp = (struct kevent )(uintptr_t)job->aio_lio_opcode; 1414* if (kevp == NULL) 1415 goto no_kqueue; 1416 1417 error = copyin(kevp, &kev, sizeof(kev)); 1418 if (error) 1419 goto aqueue_fail; 1420 } 1421 if ((u_int)kev.ident >= fdp->fd_nfiles \|\| 1422 (kq_fp = fdp->fd_ofiles[kev.ident]) == NULL \|\| 1423 (kq_fp->f_type != DTYPE_KQUEUE)) { 1424 error = EBADF; 1425 goto aqueue_fail; 1426 } 1427 kq = (struct kqueue )kq_fp->f_data; 1428* kev.ident = (uintptr_t)aiocbe; 1429 kev.filter = EVFILT_AIO; 1430 kev.flags = EV_ADD \| EV_ENABLE \| EV_FLAG1; 1431 error = kqueue_register(kq, &kev, td); 1432aqueue_fail: 1433 if (error) { 1434 zfree(aiocb_zone, aiocbe); 1435 if (type == 0) 1436 suword(&job->_aiocb_private.error, error); 1437 goto done; 1438 } 1439no_kqueue: 1440 1441 suword(&job->_aiocb_private.error, EINPROGRESS); 1442 aiocbe->uaiocb._aiocb_private.error = EINPROGRESS; 1443 aiocbe->userproc = p; 1444 aiocbe->jobflags = 0; 1445 aiocbe->lio = lj; 1446 ki = p->p_aioinfo; 1447 1448 if (fp->f_type == DTYPE_SOCKET) { 1449 /* 1450 * Alternate queueing for socket ops: Reach down into the 1451 * descriptor to get the socket data. Then check to see if the 1452 * socket is ready to be read or written (based on the requested 1453 * operation). 1454 * 1455 * If it is not ready for io, then queue the aiocbe on the 1456 * socket, and set the flags so we get a call when sbnotify() 1457 * happens. 1458 / 1459* so = (struct socket )fp->f_data; 1460* s = splnet(); 1461 if (((opcode == LIO_READ) && (!soreadable(so))) \|\| ((opcode == 1462 LIO_WRITE) && (!sowriteable(so)))) { 1463 TAILQ_INSERT_TAIL(&so->so_aiojobq, aiocbe, list); 1464 TAILQ_INSERT_TAIL(&ki->kaio_sockqueue, aiocbe, plist); 1465 if (opcode == LIO_READ) 1466 so->so_rcv.sb_flags \|= SB_AIO; 1467 else 1468 so->so_snd.sb_flags \|= SB_AIO; 1469 aiocbe->jobstate = JOBST_JOBQGLOBAL; /* XXX / 1470* ki->kaio_queue_count++; 1471 num_queue_count++; 1472 splx(s); 1473 error = 0; 1474 goto done; 1475 } 1476 splx(s); 1477 } 1478 1479 if ((error = aio_qphysio(p, aiocbe)) == 0) 1480 goto done; 1481 if (error > 0) { 1482 suword(&job->_aiocb_private.status, 0); 1483 aiocbe->uaiocb._aiocb_private.error = error; 1484 suword(&job->_aiocb_private.error, error); 1485 goto done; 1486 } 1487 1488 /* No buffer for daemon I/O. / 1489* aiocbe->bp = NULL; 1490 1491 ki->kaio_queue_count++; 1492 if (lj) 1493 lj->lioj_queue_count++; 1494 s = splnet(); 1495 TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist); 1496 TAILQ_INSERT_TAIL(&aio_jobs, aiocbe, list); 1497 splx(s); 1498 aiocbe->jobstate = JOBST_JOBQGLOBAL; 1499 1500 num_queue_count++; 1501 error = 0; 1502 1503 /* 1504 * If we don't have a free AIO process, and we are below our quota, then 1505 * start one. Otherwise, depend on the subsequent I/O completions to 1506 * pick-up this job. If we don't sucessfully create the new process 1507 * (thread) due to resource issues, we return an error for now (EAGAIN), 1508 * which is likely not the correct thing to do. 1509 / 1510* s = splnet(); 1511retryproc: 1512 if ((aiop = TAILQ_FIRST(&aio_freeproc)) != NULL) { 1513 TAILQ_REMOVE(&aio_freeproc, aiop, list); 1514 TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list); 1515 aiop->aiothreadflags &= ~AIOP_FREE; 1516 wakeup(aiop->aiothread); 1517 } else if (((num_aio_resv_start + num_aio_procs) < max_aio_procs) && 1518 ((ki->kaio_active_count + num_aio_resv_start) < 1519 ki->kaio_maxactive_count)) { 1520 num_aio_resv_start++; 1521 if ((error = aio_newproc()) == 0) { 1522 num_aio_resv_start--; 1523 goto retryproc; 1524 } 1525 num_aio_resv_start--; 1526 } 1527 splx(s); 1528done: 1529 fdrop(fp, td); 1530 return error; 1531} 1532 1533/* 1534 * This routine queues an AIO request, checking for quotas. 1535 / 1536static int 1537aio_aqueue(struct thread td, struct aiocb job, int type) 1538{ 1539* struct proc p = td->td_proc; 1540* struct kaioinfo ki; 1541* 1542 if (p->p_aioinfo == NULL) 1543 aio_init_aioinfo(p); 1544 1545 if (num_queue_count >= max_queue_count) 1546 return EAGAIN; 1547 1548 ki = p->p_aioinfo; 1549 if (ki->kaio_queue_count >= ki->kaio_qallowed_count) 1550 return EAGAIN; 1551 1552 return _aio_aqueue(td, job, NULL, type); 1553} 1554 1555/* 1556 * Support the aio_return system call, as a side-effect, kernel resources are 1557 * released. 1558 / 1559int 1560aio_return(struct thread td, struct aio_return_args uap) 1561{ 1562* struct proc p = td->td_proc; 1563* int s; 1564 int jobref; 1565 struct aiocblist cb, ncb; 1566 struct aiocb ujob; 1567* struct kaioinfo ki; 1568* 1569 ki = p->p_aioinfo; 1570 if (ki == NULL) 1571 return EINVAL; 1572 1573 ujob = uap->aiocbp; 1574 1575 jobref = fuword(&ujob->_aiocb_private.kernelinfo); 1576 if (jobref == -1 \|\| jobref == 0) 1577 return EINVAL; 1578 1579 TAILQ_FOREACH(cb, &ki->kaio_jobdone, plist) { 1580 if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) == 1581 jobref) { 1582 if (ujob == cb->uuaiocb) { 1583 td->td_retval[0] = 1584 cb->uaiocb._aiocb_private.status; 1585 } else 1586 td->td_retval[0] = EFAULT; 1587 if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) { 1588 p->p_stats->p_ru.ru_oublock += 1589 cb->outputcharge; 1590 cb->outputcharge = 0; 1591 } else if (cb->uaiocb.aio_lio_opcode == LIO_READ) { 1592 p->p_stats->p_ru.ru_inblock += cb->inputcharge; 1593 cb->inputcharge = 0; 1594 } 1595 aio_free_entry(cb); 1596 return 0; 1597 } 1598 } 1599 s = splbio(); 1600 for (cb = TAILQ_FIRST(&ki->kaio_bufdone); cb; cb = ncb) { 1601 ncb = TAILQ_NEXT(cb, plist); 1602 if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) 1603 == jobref) { 1604 splx(s); 1605 if (ujob == cb->uuaiocb) { 1606 td->td_retval[0] = 1607 cb->uaiocb._aiocb_private.status; 1608 } else 1609 td->td_retval[0] = EFAULT; 1610 aio_free_entry(cb); 1611 return 0; 1612 } 1613 } 1614 splx(s); 1615 1616 return (EINVAL); 1617} 1618 1619/* 1620 * Allow a process to wakeup when any of the I/O requests are completed. 1621 / 1622int 1623aio_suspend(struct thread td, struct aio_suspend_args uap) 1624{ 1625* struct proc p = td->td_proc; 1626* struct timeval atv; 1627 struct timespec ts; 1628 struct aiocb const cbptr, cbp; 1629* struct kaioinfo ki; 1630* struct aiocblist cb; 1631* int i; 1632 int njoblist; 1633 int error, s, timo; 1634 int ijoblist; 1635* struct aiocb *ujoblist; 1636* 1637 if (uap->nent > AIO_LISTIO_MAX) 1638 return EINVAL; 1639 1640 timo = 0; 1641 if (uap->timeout) { 1642 /* Get timespec struct. / 1643* if ((error = copyin(uap->timeout, &ts, sizeof(ts))) != 0) 1644 return error; 1645 1646 if (ts.tv_nsec < 0 \|\| ts.tv_nsec >= 1000000000) 1647 return (EINVAL); 1648 1649 TIMESPEC_TO_TIMEVAL(&atv, &ts); 1650 if (itimerfix(&atv)) 1651 return (EINVAL); 1652 timo = tvtohz(&atv); 1653 } 1654 1655 ki = p->p_aioinfo; 1656 if (ki == NULL) 1657 return EAGAIN; 1658 1659 njoblist = 0; 1660 ijoblist = zalloc(aiol_zone); 1661 ujoblist = zalloc(aiol_zone); 1662 cbptr = uap->aiocbp; 1663 1664 for (i = 0; i < uap->nent; i++) { 1665 cbp = (struct aiocb )(intptr_t)fuword((caddr_t)&cbptr[i]); 1666* if (cbp == 0) 1667 continue; 1668 ujoblist[njoblist] = cbp; 1669 ijoblist[njoblist] = fuword(&cbp->_aiocb_private.kernelinfo); 1670 njoblist++; 1671 } 1672 1673 if (njoblist == 0) { 1674 zfree(aiol_zone, ijoblist); 1675 zfree(aiol_zone, ujoblist); 1676 return 0; 1677 } 1678 1679 error = 0; 1680 for (;;) { 1681 TAILQ_FOREACH(cb, &ki->kaio_jobdone, plist) { 1682 for (i = 0; i < njoblist; i++) { 1683 if (((intptr_t) 1684 cb->uaiocb._aiocb_private.kernelinfo) == 1685 ijoblist[i]) { 1686 if (ujoblist[i] != cb->uuaiocb) 1687 error = EINVAL; 1688 zfree(aiol_zone, ijoblist); 1689 zfree(aiol_zone, ujoblist); 1690 return error; 1691 } 1692 } 1693 } 1694 1695 s = splbio(); 1696 for (cb = TAILQ_FIRST(&ki->kaio_bufdone); cb; cb = 1697 TAILQ_NEXT(cb, plist)) { 1698 for (i = 0; i < njoblist; i++) { 1699 if (((intptr_t) 1700 cb->uaiocb._aiocb_private.kernelinfo) == 1701 ijoblist[i]) { 1702 splx(s); 1703 if (ujoblist[i] != cb->uuaiocb) 1704 error = EINVAL; 1705 zfree(aiol_zone, ijoblist); 1706 zfree(aiol_zone, ujoblist); 1707 return error; 1708 } 1709 } 1710 } 1711 1712 ki->kaio_flags \|= KAIO_WAKEUP; 1713 error = tsleep(p, PRIBIO \| PCATCH, "aiospn", timo); 1714 splx(s); 1715 1716 if (error == ERESTART \|\| error == EINTR) { 1717 zfree(aiol_zone, ijoblist); 1718 zfree(aiol_zone, ujoblist); 1719 return EINTR; 1720 } else if (error == EWOULDBLOCK) { 1721 zfree(aiol_zone, ijoblist); 1722 zfree(aiol_zone, ujoblist); 1723 return EAGAIN; 1724 } 1725 } 1726 1727/* NOTREACHED / 1728* return EINVAL; 1729} 1730 1731/* 1732 * aio_cancel cancels any non-physio aio operations not currently in 1733 * progress. 1734 / 1735int 1736aio_cancel(struct thread td, struct aio_cancel_args uap) 1737{ 1738* struct proc p = td->td_proc; 1739* struct kaioinfo ki; 1740* struct aiocblist cbe, cbn; 1741 struct file fp; 1742* struct filedesc fdp; 1743* struct socket so; 1744* struct proc po; 1745* int s,error; 1746 int cancelled=0; 1747 int notcancelled=0; 1748 struct vnode vp; 1749* 1750 fdp = p->p_fd; 1751 if ((u_int)uap->fd >= fdp->fd_nfiles \|\| 1752 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 1753 return (EBADF); 1754 1755 if (fp->f_type == DTYPE_VNODE) { 1756 vp = (struct vnode )fp->f_data; 1757* 1758 if (vn_isdisk(vp,&error)) { 1759 td->td_retval[0] = AIO_NOTCANCELED; 1760 return 0; 1761 } 1762 } else if (fp->f_type == DTYPE_SOCKET) { 1763 so = (struct socket )fp->f_data; 1764* 1765 s = splnet(); 1766 1767 for (cbe = TAILQ_FIRST(&so->so_aiojobq); cbe; cbe = cbn) { 1768 cbn = TAILQ_NEXT(cbe, list); 1769 if ((uap->aiocbp == NULL) \|\| 1770 (uap->aiocbp == cbe->uuaiocb) ) { 1771 po = cbe->userproc; 1772 ki = po->p_aioinfo; 1773 TAILQ_REMOVE(&so->so_aiojobq, cbe, list); 1774 TAILQ_REMOVE(&ki->kaio_sockqueue, cbe, plist); 1775 TAILQ_INSERT_TAIL(&ki->kaio_jobdone, cbe, plist); 1776 if (ki->kaio_flags & KAIO_WAKEUP) { 1777 wakeup(po); 1778 } 1779 cbe->jobstate = JOBST_JOBFINISHED; 1780 cbe->uaiocb._aiocb_private.status=-1; 1781 cbe->uaiocb._aiocb_private.error=ECANCELED; 1782 cancelled++; 1783/* XXX cancelled, knote? / 1784* if (cbe->uaiocb.aio_sigevent.sigev_notify == 1785 SIGEV_SIGNAL) { 1786 PROC_LOCK(cbe->userproc); 1787 psignal(cbe->userproc, cbe->uaiocb.aio_sigevent.sigev_signo); 1788 PROC_UNLOCK(cbe->userproc); 1789 } 1790 if (uap->aiocbp) 1791 break; 1792 } 1793 } 1794 splx(s); 1795 1796 if ((cancelled) && (uap->aiocbp)) { 1797 td->td_retval[0] = AIO_CANCELED; 1798 return 0; 1799 } 1800 } 1801 ki=p->p_aioinfo; 1802 s = splnet(); 1803 1804 for (cbe = TAILQ_FIRST(&ki->kaio_jobqueue); cbe; cbe = cbn) { 1805 cbn = TAILQ_NEXT(cbe, plist); 1806 1807 if ((uap->fd == cbe->uaiocb.aio_fildes) && 1808 ((uap->aiocbp == NULL ) \|\| 1809 (uap->aiocbp == cbe->uuaiocb))) { 1810 1811 if (cbe->jobstate == JOBST_JOBQGLOBAL) { 1812 TAILQ_REMOVE(&aio_jobs, cbe, list); 1813 TAILQ_REMOVE(&ki->kaio_jobqueue, cbe, plist); 1814 TAILQ_INSERT_TAIL(&ki->kaio_jobdone, cbe, 1815 plist); 1816 cancelled++; 1817 ki->kaio_queue_finished_count++; 1818 cbe->jobstate = JOBST_JOBFINISHED; 1819 cbe->uaiocb._aiocb_private.status = -1; 1820 cbe->uaiocb._aiocb_private.error = ECANCELED; 1821/* XXX cancelled, knote? / 1822* if (cbe->uaiocb.aio_sigevent.sigev_notify == 1823 SIGEV_SIGNAL) { 1824 PROC_LOCK(cbe->userproc); 1825 psignal(cbe->userproc, cbe->uaiocb.aio_sigevent.sigev_signo); 1826 PROC_UNLOCK(cbe->userproc); 1827 } 1828 } else { 1829 notcancelled++; 1830 } 1831 } 1832 } 1833 splx(s); 1834 1835 if (notcancelled) { 1836 td->td_retval[0] = AIO_NOTCANCELED; 1837 return 0; 1838 } 1839 if (cancelled) { 1840 td->td_retval[0] = AIO_CANCELED; 1841 return 0; 1842 } 1843 td->td_retval[0] = AIO_ALLDONE; 1844 1845 return 0; 1846} 1847 1848/* 1849 * aio_error is implemented in the kernel level for compatibility purposes only. 1850 * For a user mode async implementation, it would be best to do it in a userland 1851 * subroutine. 1852 / 1853int 1854aio_error(struct thread td, struct aio_error_args uap) 1855{ 1856* struct proc p = td->td_proc; 1857* int s; 1858 struct aiocblist cb; 1859* struct kaioinfo ki; 1860* int jobref; 1861 1862 ki = p->p_aioinfo; 1863 if (ki == NULL) 1864 return EINVAL; 1865 1866 jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo); 1867 if ((jobref == -1) \|\| (jobref == 0)) 1868 return EINVAL; 1869 1870 TAILQ_FOREACH(cb, &ki->kaio_jobdone, plist) { 1871 if (((intptr_t)cb->uaiocb._aiocb_private.kernelinfo) == 1872 jobref) { 1873 td->td_retval[0] = cb->uaiocb._aiocb_private.error; 1874 return 0; 1875 } 1876 } 1877 1878 s = splnet(); 1879 1880 for (cb = TAILQ_FIRST(&ki->kaio_jobqueue); cb; cb = TAILQ_NEXT(cb, 1881 plist)) { 1882 if (((intptr_t)cb->uaiocb._aiocb_private.kernelinfo) == 1883 jobref) { 1884 td->td_retval[0] = EINPROGRESS; 1885 splx(s); 1886 return 0; 1887 } 1888 } 1889 1890 for (cb = TAILQ_FIRST(&ki->kaio_sockqueue); cb; cb = TAILQ_NEXT(cb, 1891 plist)) { 1892 if (((intptr_t)cb->uaiocb._aiocb_private.kernelinfo) == 1893 jobref) { 1894 td->td_retval[0] = EINPROGRESS; 1895 splx(s); 1896 return 0; 1897 } 1898 } 1899 splx(s); 1900 1901 s = splbio(); 1902 for (cb = TAILQ_FIRST(&ki->kaio_bufdone); cb; cb = TAILQ_NEXT(cb, 1903 plist)) { 1904 if (((intptr_t)cb->uaiocb._aiocb_private.kernelinfo) == 1905 jobref) { 1906 td->td_retval[0] = cb->uaiocb._aiocb_private.error; 1907 splx(s); 1908 return 0; 1909 } 1910 } 1911 1912 for (cb = TAILQ_FIRST(&ki->kaio_bufqueue); cb; cb = TAILQ_NEXT(cb, 1913 plist)) { 1914 if (((intptr_t)cb->uaiocb._aiocb_private.kernelinfo) == 1915 jobref) { 1916 td->td_retval[0] = EINPROGRESS; 1917 splx(s); 1918 return 0; 1919 } 1920 } 1921 splx(s); 1922 1923#if (0) 1924 /* 1925 * Hack for lio. 1926 / 1927* status = fuword(&uap->aiocbp->_aiocb_private.status); 1928 if (status == -1) 1929 return fuword(&uap->aiocbp->_aiocb_private.error); 1930#endif 1931 return EINVAL; 1932} 1933 1934int 1935aio_read(struct thread td, struct aio_read_args uap) 1936{ 1937 1938 return aio_aqueue(td, uap->aiocbp, LIO_READ); 1939} 1940 1941int 1942aio_write(struct thread td, struct aio_write_args uap) 1943{ 1944 1945 return aio_aqueue(td, uap->aiocbp, LIO_WRITE); 1946} 1947 1948int 1949lio_listio(struct thread td, struct lio_listio_args uap) 1950{ 1951 struct proc p = td->td_proc; 1952* int nent, nentqueued; 1953 struct aiocb iocb, const cbptr; 1954* struct aiocblist cb; 1955* struct kaioinfo ki; 1956* struct aio_liojob lj; 1957* int error, runningcode; 1958 int nerror; 1959 int i; 1960 int s; 1961 1962 if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) 1963 return EINVAL; 1964 1965 nent = uap->nent; 1966 if (nent > AIO_LISTIO_MAX) 1967 return EINVAL; 1968 1969 if (p->p_aioinfo == NULL) 1970 aio_init_aioinfo(p); 1971 1972 if ((nent + num_queue_count) > max_queue_count) 1973 return EAGAIN; 1974 1975 ki = p->p_aioinfo; 1976 if ((nent + ki->kaio_queue_count) > ki->kaio_qallowed_count) 1977 return EAGAIN; 1978 1979 lj = zalloc(aiolio_zone); 1980 if (!lj) 1981 return EAGAIN; 1982 1983 lj->lioj_flags = 0; 1984 lj->lioj_buffer_count = 0; 1985 lj->lioj_buffer_finished_count = 0; 1986 lj->lioj_queue_count = 0; 1987 lj->lioj_queue_finished_count = 0; 1988 lj->lioj_ki = ki; 1989 1990 /* 1991 * Setup signal. 1992 / 1993* if (uap->sig && (uap->mode == LIO_NOWAIT)) { 1994 error = copyin(uap->sig, &lj->lioj_signal, 1995 sizeof(lj->lioj_signal)); 1996 if (error) { 1997 zfree(aiolio_zone, lj); 1998 return error; 1999 } 2000 if (!_SIG_VALID(lj->lioj_signal.sigev_signo)) { 2001 zfree(aiolio_zone, lj); 2002 return EINVAL; 2003 } 2004 lj->lioj_flags \|= LIOJ_SIGNAL; 2005 lj->lioj_flags &= ~LIOJ_SIGNAL_POSTED; 2006 } else 2007 lj->lioj_flags &= ~LIOJ_SIGNAL; 2008 2009 TAILQ_INSERT_TAIL(&ki->kaio_liojoblist, lj, lioj_list); 2010 /* 2011 * Get pointers to the list of I/O requests. 2012 / 2013* nerror = 0; 2014 nentqueued = 0; 2015 cbptr = uap->acb_list; 2016 for (i = 0; i < uap->nent; i++) { 2017 iocb = (struct aiocb )(intptr_t)fuword((caddr_t)&cbptr[i]); 2018* if (((intptr_t)iocb != -1) && ((intptr_t)iocb != NULL)) { 2019 error = _aio_aqueue(td, iocb, lj, 0); 2020 if (error == 0) 2021 nentqueued++; 2022 else 2023 nerror++; 2024 } 2025 } 2026 2027 /* 2028 * If we haven't queued any, then just return error. 2029 / 2030* if (nentqueued == 0) 2031 return 0; 2032 2033 /* 2034 * Calculate the appropriate error return. 2035 / 2036* runningcode = 0; 2037 if (nerror) 2038 runningcode = EIO; 2039 2040 if (uap->mode == LIO_WAIT) { 2041 int command, found, jobref; 2042 2043 for (;;) { 2044 found = 0; 2045 for (i = 0; i < uap->nent; i++) { 2046 /* 2047 * Fetch address of the control buf pointer in 2048 * user space. 2049 / 2050* iocb = (struct aiocb )(intptr_t)fuword((caddr_t)&cbptr[i]); 2051* if (((intptr_t)iocb == -1) \|\| ((intptr_t)iocb 2052 == 0)) 2053 continue; 2054 2055 /* 2056 * Fetch the associated command from user space. 2057 / 2058* command = fuword(&iocb->aio_lio_opcode); 2059 if (command == LIO_NOP) { 2060 found++; 2061 continue; 2062 } 2063 2064 jobref = fuword(&iocb->_aiocb_private.kernelinfo); 2065 2066 TAILQ_FOREACH(cb, &ki->kaio_jobdone, plist) { 2067 if (((intptr_t)cb->uaiocb._aiocb_private.kernelinfo) 2068 == jobref) { 2069 if (cb->uaiocb.aio_lio_opcode 2070 == LIO_WRITE) { 2071 p->p_stats->p_ru.ru_oublock 2072 += 2073 cb->outputcharge; 2074 cb->outputcharge = 0; 2075 } else if (cb->uaiocb.aio_lio_opcode 2076 == LIO_READ) { 2077 p->p_stats->p_ru.ru_inblock 2078 += cb->inputcharge; 2079 cb->inputcharge = 0; 2080 } 2081 found++; 2082 break; 2083 } 2084 } 2085 2086 s = splbio(); 2087 TAILQ_FOREACH(cb, &ki->kaio_bufdone, plist) { 2088 if (((intptr_t)cb->uaiocb._aiocb_private.kernelinfo) 2089 == jobref) { 2090 found++; 2091 break; 2092 } 2093 } 2094 splx(s); 2095 } 2096 2097 /* 2098 * If all I/Os have been disposed of, then we can 2099 * return. 2100 / 2101* if (found == nentqueued) 2102 return runningcode; 2103 2104 ki->kaio_flags \|= KAIO_WAKEUP; 2105 error = tsleep(p, PRIBIO \| PCATCH, "aiospn", 0); 2106 2107 if (error == EINTR) 2108 return EINTR; 2109 else if (error == EWOULDBLOCK) 2110 return EAGAIN; 2111 } 2112 } 2113 2114 return runningcode; 2115} 2116 2117/* 2118 * This is a weird hack so that we can post a signal. It is safe to do so from 2119 * a timeout routine, but not from an interrupt routine. 2120 / 2121static void 2122process_signal(void aioj) 2123{ 2124 struct aiocblist aiocbe = aioj; 2125* struct aio_liojob lj = aiocbe->lio; 2126* struct aiocb cb = &aiocbe->uaiocb; 2127* 2128 if ((lj) && (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL) && 2129 (lj->lioj_queue_count == lj->lioj_queue_finished_count)) { 2130 PROC_LOCK(lj->lioj_ki->kaio_p); 2131 psignal(lj->lioj_ki->kaio_p, lj->lioj_signal.sigev_signo); 2132 PROC_UNLOCK(lj->lioj_ki->kaio_p); 2133 lj->lioj_flags \|= LIOJ_SIGNAL_POSTED; 2134 } 2135 2136 if (cb->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { 2137 PROC_LOCK(aiocbe->userproc); 2138 psignal(aiocbe->userproc, cb->aio_sigevent.sigev_signo); 2139 PROC_UNLOCK(aiocbe->userproc); 2140 } 2141} 2142 2143/* 2144 * Interrupt handler for physio, performs the necessary process wakeups, and 2145 * signals. 2146 / 2147static void 2148aio_physwakeup(struct buf bp) 2149{ 2150 struct aiocblist aiocbe; 2151* struct proc p; 2152* struct kaioinfo ki; 2153* struct aio_liojob lj; 2154* 2155 wakeup(bp); 2156 2157 aiocbe = (struct aiocblist )bp->b_spc; 2158* if (aiocbe) { 2159 p = bp->b_caller1; 2160 2161 aiocbe->jobstate = JOBST_JOBBFINISHED; 2162 aiocbe->uaiocb._aiocb_private.status -= bp->b_resid; 2163 aiocbe->uaiocb._aiocb_private.error = 0; 2164 aiocbe->jobflags \|= AIOCBLIST_DONE; 2165 2166 if (bp->b_ioflags & BIO_ERROR) 2167 aiocbe->uaiocb._aiocb_private.error = bp->b_error; 2168 2169 lj = aiocbe->lio; 2170 if (lj) { 2171 lj->lioj_buffer_finished_count++; 2172 2173 /* 2174 * wakeup/signal if all of the interrupt jobs are done. 2175 / 2176* if (lj->lioj_buffer_finished_count == 2177 lj->lioj_buffer_count) { 2178 /* 2179 * Post a signal if it is called for. 2180 / 2181* if ((lj->lioj_flags & 2182 (LIOJ_SIGNAL\|LIOJ_SIGNAL_POSTED)) == 2183 LIOJ_SIGNAL) { 2184 lj->lioj_flags \|= LIOJ_SIGNAL_POSTED; 2185 aiocbe->timeouthandle = 2186 timeout(process_signal, 2187 aiocbe, 0); 2188 } 2189 } 2190 } 2191 2192 ki = p->p_aioinfo; 2193 if (ki) { 2194 ki->kaio_buffer_finished_count++; 2195 TAILQ_REMOVE(&aio_bufjobs, aiocbe, list); 2196 TAILQ_REMOVE(&ki->kaio_bufqueue, aiocbe, plist); 2197 TAILQ_INSERT_TAIL(&ki->kaio_bufdone, aiocbe, plist); 2198 2199 KNOTE(&aiocbe->klist, 0); 2200 /* Do the wakeup. / 2201* if (ki->kaio_flags & (KAIO_RUNDOWN\|KAIO_WAKEUP)) { 2202 ki->kaio_flags &= ~KAIO_WAKEUP; 2203 wakeup(p); 2204 } 2205 } 2206 2207 if (aiocbe->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL) 2208 aiocbe->timeouthandle = 2209 timeout(process_signal, aiocbe, 0); 2210 } 2211} 2212 2213int 2214aio_waitcomplete(struct thread td, struct aio_waitcomplete_args uap) 2215{ 2216 struct proc p = td->td_proc; 2217* struct timeval atv; 2218 struct timespec ts; 2219 struct aiocb *cbptr; 2220* struct kaioinfo ki; 2221* struct aiocblist cb = NULL; 2222* int error, s, timo; 2223 2224 suword(uap->aiocbp, (int)NULL); 2225 2226 timo = 0; 2227 if (uap->timeout) { 2228 /* Get timespec struct. / 2229* error = copyin(uap->timeout, &ts, sizeof(ts)); 2230 if (error) 2231 return error; 2232 2233 if ((ts.tv_nsec < 0) \|\| (ts.tv_nsec >= 1000000000)) 2234 return (EINVAL); 2235 2236 TIMESPEC_TO_TIMEVAL(&atv, &ts); 2237 if (itimerfix(&atv)) 2238 return (EINVAL); 2239 timo = tvtohz(&atv); 2240 } 2241 2242 ki = p->p_aioinfo; 2243 if (ki == NULL) 2244 return EAGAIN; 2245 2246 cbptr = uap->aiocbp; 2247 2248 for (;;) { 2249 if ((cb = TAILQ_FIRST(&ki->kaio_jobdone)) != 0) { 2250 suword(uap->aiocbp, (uintptr_t)cb->uuaiocb); 2251 td->td_retval[0] = cb->uaiocb._aiocb_private.status; 2252 if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) { 2253 p->p_stats->p_ru.ru_oublock += 2254 cb->outputcharge; 2255 cb->outputcharge = 0; 2256 } else if (cb->uaiocb.aio_lio_opcode == LIO_READ) { 2257 p->p_stats->p_ru.ru_inblock += cb->inputcharge; 2258 cb->inputcharge = 0; 2259 } 2260 aio_free_entry(cb); 2261 return cb->uaiocb._aiocb_private.error; 2262 } 2263 2264 s = splbio(); 2265 if ((cb = TAILQ_FIRST(&ki->kaio_bufdone)) != 0 ) { 2266 splx(s); 2267 suword(uap->aiocbp, (uintptr_t)cb->uuaiocb); 2268 td->td_retval[0] = cb->uaiocb._aiocb_private.status; 2269 aio_free_entry(cb); 2270 return cb->uaiocb._aiocb_private.error; 2271 } 2272 2273 ki->kaio_flags \|= KAIO_WAKEUP; 2274 error = tsleep(p, PRIBIO \| PCATCH, "aiowc", timo); 2275 splx(s); 2276 2277 if (error == ERESTART) 2278 return EINTR; 2279 else if (error < 0) 2280 return error; 2281 else if (error == EINTR) 2282 return EINTR; 2283 else if (error == EWOULDBLOCK) 2284 return EAGAIN; 2285 } 2286} 2287 2288static int 2289filt_aioattach(struct knote kn) 2290{ 2291* struct aiocblist aiocbe = (struct aiocblist )kn->kn_id; 2292 2293 /* 2294 * The aiocbe pointer must be validated before using it, so 2295 * registration is restricted to the kernel; the user cannot 2296 * set EV_FLAG1. 2297 / 2298* if ((kn->kn_flags & EV_FLAG1) == 0) 2299 return (EPERM); 2300 kn->kn_flags &= ~EV_FLAG1; 2301 2302 SLIST_INSERT_HEAD(&aiocbe->klist, kn, kn_selnext); 2303 2304 return (0); 2305} 2306 2307static void 2308filt_aiodetach(struct knote kn) 2309{ 2310* struct aiocblist aiocbe = (struct aiocblist )kn->kn_id; 2311 2312 SLIST_REMOVE(&aiocbe->klist, kn, knote, kn_selnext); 2313} 2314 2315/ARGSUSED/ 2316static int 2317filt_aio(struct knote kn, long hint) 2318{ 2319* struct aiocblist aiocbe = (struct aiocblist )kn->kn_id; 2320 2321 kn->kn_data = aiocbe->uaiocb._aiocb_private.error; 2322 if (aiocbe->jobstate != JOBST_JOBFINISHED && 2323 aiocbe->jobstate != JOBST_JOBBFINISHED) 2324 return (0); 2325 kn->kn_flags \|= EV_EOF; 2326 return (1); 2327}